mirror of
https://github.com/koala73/worldmonitor.git
synced 2026-04-26 01:24:59 +02:00
* feat(config): add NSE and BSE (India) market support (#1102) * fix(india-markets): wire NSE/BSE symbols into stocks.json so seed fetches them - Add 20 India symbols (^NSEI, ^BSESN, 18x .NS equities) to shared/stocks.json - Mark all .NS symbols + indices as yahooOnly (Finnhub does not support NSE) - Remove orphan src/config/india-markets.ts; stocks.json is the seed source of truth * fix(india-markets): sync scripts/shared/stocks.json mirror * fix(ci): exclude scripts/data/ and scripts/node_modules/ from unicode safety scan --------- Co-authored-by: lspassos1 <lspassos@icloud.com>
224 lines
5.6 KiB
JavaScript
224 lines
5.6 KiB
JavaScript
#!/usr/bin/env node
|
||
/**
|
||
* Detect suspicious invisible Unicode in executable repository files.
|
||
*
|
||
* Threat model:
|
||
* - Trojan Source (bidi controls)
|
||
* - Zero-width/invisible control chars
|
||
* - Variation selector steganography / Unicode tags
|
||
* - Private Use Area payload hiding
|
||
*
|
||
* Usage:
|
||
* node scripts/check-unicode-safety.mjs
|
||
* node scripts/check-unicode-safety.mjs --staged
|
||
*/
|
||
|
||
import { readFileSync, readdirSync, statSync } from 'node:fs';
|
||
import { join, relative } from 'node:path';
|
||
import { execFileSync } from 'node:child_process';
|
||
|
||
const args = new Set(process.argv.slice(2));
|
||
const stagedOnly = args.has('--staged');
|
||
|
||
const ROOT = process.cwd();
|
||
|
||
const SCAN_ROOTS = [
|
||
'src',
|
||
'server',
|
||
'api',
|
||
'scripts',
|
||
'tests',
|
||
'e2e',
|
||
'.github',
|
||
'.husky',
|
||
];
|
||
|
||
const INCLUDED_EXTENSIONS = new Set([
|
||
'.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs',
|
||
'.json', '.yml', '.yaml', '.sh',
|
||
'', // extensionless scripts (e.g. .husky/pre-commit, .husky/pre-push)
|
||
]);
|
||
|
||
const EXCLUDED_PREFIXES = [
|
||
'.git/',
|
||
'node_modules/',
|
||
'src/locales/',
|
||
'src/generated/',
|
||
'docs/',
|
||
'blog-site/',
|
||
'public/blog/',
|
||
'scripts/data/',
|
||
'scripts/node_modules/',
|
||
];
|
||
|
||
const ZERO_WIDTH = new Set([0x200B, 0x200C, 0x200D, 0x2060, 0xFEFF]);
|
||
|
||
function isBidiControl(cp) {
|
||
return (cp >= 0x202A && cp <= 0x202E) || (cp >= 0x2066 && cp <= 0x2069);
|
||
}
|
||
|
||
function isVariationSelectorSupplement(cp) {
|
||
return cp >= 0xE0100 && cp <= 0xE01EF;
|
||
}
|
||
|
||
function isVariationSelectorSuspicious(cp) {
|
||
// FE0F (emoji presentation selector) is legitimately used after emoji base
|
||
// characters (including ASCII keycap sequences like #️⃣) — skip to avoid
|
||
// false positives. FE00..FE0E (text/emoji selectors) are rare in source and
|
||
// suspicious for steganography.
|
||
return cp >= 0xFE00 && cp <= 0xFE0E;
|
||
}
|
||
|
||
// PUA (E000–F8FF) is intentionally excluded: it doesn't affect parser
|
||
// semantics and is legitimately used by icon fonts in string literals.
|
||
|
||
function getExtension(path) {
|
||
const idx = path.lastIndexOf('.');
|
||
return idx === -1 ? '' : path.slice(idx);
|
||
}
|
||
|
||
function shouldScanFile(path) {
|
||
if (EXCLUDED_PREFIXES.some(prefix => path.startsWith(prefix))) return false;
|
||
const ext = getExtension(path);
|
||
if (!INCLUDED_EXTENSIONS.has(ext)) return false;
|
||
return true;
|
||
}
|
||
|
||
function walkDir(rootDir, out) {
|
||
let entries;
|
||
try {
|
||
entries = readdirSync(rootDir, { withFileTypes: true });
|
||
} catch {
|
||
return;
|
||
}
|
||
for (const entry of entries) {
|
||
const abs = join(rootDir, entry.name);
|
||
const rel = relative(ROOT, abs).replace(/\\/g, '/');
|
||
if (EXCLUDED_PREFIXES.some(prefix => rel.startsWith(prefix))) continue;
|
||
if (entry.isDirectory()) {
|
||
walkDir(abs, out);
|
||
continue;
|
||
}
|
||
if (!entry.isFile()) continue;
|
||
if (!shouldScanFile(rel)) continue;
|
||
out.push(rel);
|
||
}
|
||
}
|
||
|
||
function getRepoFiles() {
|
||
const files = [];
|
||
for (const root of SCAN_ROOTS) {
|
||
const abs = join(ROOT, root);
|
||
try {
|
||
if (statSync(abs).isDirectory()) walkDir(abs, files);
|
||
} catch {
|
||
// ignore missing roots
|
||
}
|
||
}
|
||
return files;
|
||
}
|
||
|
||
function getStagedFiles() {
|
||
let out = '';
|
||
try {
|
||
out = execFileSync('git', ['diff', '--cached', '--name-only', '--diff-filter=ACMR'], {
|
||
cwd: ROOT,
|
||
encoding: 'utf8',
|
||
stdio: ['ignore', 'pipe', 'pipe'],
|
||
});
|
||
} catch {
|
||
return [];
|
||
}
|
||
return out
|
||
.split('\n')
|
||
.map(s => s.trim().replace(/\\/g, '/'))
|
||
.filter(Boolean)
|
||
.filter(shouldScanFile);
|
||
}
|
||
|
||
function formatCodePoint(cp) {
|
||
return `U+${cp.toString(16).toUpperCase().padStart(cp > 0xFFFF ? 6 : 4, '0')}`;
|
||
}
|
||
|
||
function classify(cp) {
|
||
if (isBidiControl(cp)) return 'bidi-control';
|
||
if (ZERO_WIDTH.has(cp)) return 'zero-width';
|
||
if (isVariationSelectorSupplement(cp)) return 'variation-selector-supplement';
|
||
if (isVariationSelectorSuspicious(cp)) return 'variation-selector';
|
||
return null;
|
||
}
|
||
|
||
function scanFile(path) {
|
||
const abs = join(ROOT, path);
|
||
let text;
|
||
try {
|
||
text = readFileSync(abs, 'utf8');
|
||
} catch {
|
||
return [];
|
||
}
|
||
|
||
const findings = [];
|
||
const lines = text.split('\n');
|
||
let line = 1;
|
||
let col = 1;
|
||
|
||
for (const ch of text) {
|
||
const cp = ch.codePointAt(0);
|
||
const kind = classify(cp);
|
||
if (kind) {
|
||
const lineText = lines[line - 1] ?? '';
|
||
findings.push({
|
||
path,
|
||
line,
|
||
col,
|
||
kind,
|
||
cp: formatCodePoint(cp),
|
||
lineText,
|
||
});
|
||
}
|
||
|
||
if (ch === '\n') {
|
||
line += 1;
|
||
col = 1;
|
||
} else {
|
||
// Astral-plane characters (cp > 0xFFFF) occupy two UTF-16 code units.
|
||
// Increment by 2 so reported columns match editor column positions.
|
||
col += cp > 0xFFFF ? 2 : 1;
|
||
}
|
||
}
|
||
|
||
return findings;
|
||
}
|
||
|
||
function main() {
|
||
const files = stagedOnly ? getStagedFiles() : getRepoFiles();
|
||
if (files.length === 0) {
|
||
console.log(stagedOnly ? 'Unicode safety: no staged executable files to scan.' : 'Unicode safety: no files matched scan scope.');
|
||
return;
|
||
}
|
||
|
||
const findings = [];
|
||
for (const file of files) {
|
||
findings.push(...scanFile(file));
|
||
}
|
||
|
||
if (findings.length === 0) {
|
||
console.log(`Unicode safety: scanned ${files.length} file(s), no suspicious hidden Unicode found.`);
|
||
return;
|
||
}
|
||
|
||
console.error(`Unicode safety check failed: ${findings.length} suspicious character(s) found.`);
|
||
for (const f of findings.slice(0, 200)) {
|
||
console.error(`${f.path}:${f.line}:${f.col} ${f.cp} ${f.kind}`);
|
||
if (f.lineText) console.error(` ${f.lineText}`);
|
||
}
|
||
if (findings.length > 200) {
|
||
console.error(`... ${findings.length - 200} more finding(s) omitted.`);
|
||
}
|
||
console.error('');
|
||
console.error('If intentional, replace with visible escapes or remove from executable files.');
|
||
process.exit(1);
|
||
}
|
||
|
||
main();
|