Files
worldmonitor/scripts/check-unicode-safety.mjs
Elie Habib 0dae526a4b feat(markets): add NSE/BSE India market support (#1863)
* feat(config): add NSE and BSE (India) market support (#1102)

* fix(india-markets): wire NSE/BSE symbols into stocks.json so seed fetches them

- Add 20 India symbols (^NSEI, ^BSESN, 18x .NS equities) to shared/stocks.json
- Mark all .NS symbols + indices as yahooOnly (Finnhub does not support NSE)
- Remove orphan src/config/india-markets.ts; stocks.json is the seed source of truth

* fix(india-markets): sync scripts/shared/stocks.json mirror

* fix(ci): exclude scripts/data/ and scripts/node_modules/ from unicode safety scan

---------

Co-authored-by: lspassos1 <lspassos@icloud.com>
2026-03-19 10:31:37 +04:00

224 lines
5.6 KiB
JavaScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env node
/**
* Detect suspicious invisible Unicode in executable repository files.
*
* Threat model:
* - Trojan Source (bidi controls)
* - Zero-width/invisible control chars
* - Variation selector steganography / Unicode tags
* - Private Use Area payload hiding
*
* Usage:
* node scripts/check-unicode-safety.mjs
* node scripts/check-unicode-safety.mjs --staged
*/
import { readFileSync, readdirSync, statSync } from 'node:fs';
import { join, relative } from 'node:path';
import { execFileSync } from 'node:child_process';
const args = new Set(process.argv.slice(2));
const stagedOnly = args.has('--staged');
const ROOT = process.cwd();
const SCAN_ROOTS = [
'src',
'server',
'api',
'scripts',
'tests',
'e2e',
'.github',
'.husky',
];
const INCLUDED_EXTENSIONS = new Set([
'.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs',
'.json', '.yml', '.yaml', '.sh',
'', // extensionless scripts (e.g. .husky/pre-commit, .husky/pre-push)
]);
const EXCLUDED_PREFIXES = [
'.git/',
'node_modules/',
'src/locales/',
'src/generated/',
'docs/',
'blog-site/',
'public/blog/',
'scripts/data/',
'scripts/node_modules/',
];
const ZERO_WIDTH = new Set([0x200B, 0x200C, 0x200D, 0x2060, 0xFEFF]);
function isBidiControl(cp) {
return (cp >= 0x202A && cp <= 0x202E) || (cp >= 0x2066 && cp <= 0x2069);
}
function isVariationSelectorSupplement(cp) {
return cp >= 0xE0100 && cp <= 0xE01EF;
}
function isVariationSelectorSuspicious(cp) {
// FE0F (emoji presentation selector) is legitimately used after emoji base
// characters (including ASCII keycap sequences like #️⃣) — skip to avoid
// false positives. FE00..FE0E (text/emoji selectors) are rare in source and
// suspicious for steganography.
return cp >= 0xFE00 && cp <= 0xFE0E;
}
// PUA (E000F8FF) is intentionally excluded: it doesn't affect parser
// semantics and is legitimately used by icon fonts in string literals.
function getExtension(path) {
const idx = path.lastIndexOf('.');
return idx === -1 ? '' : path.slice(idx);
}
function shouldScanFile(path) {
if (EXCLUDED_PREFIXES.some(prefix => path.startsWith(prefix))) return false;
const ext = getExtension(path);
if (!INCLUDED_EXTENSIONS.has(ext)) return false;
return true;
}
function walkDir(rootDir, out) {
let entries;
try {
entries = readdirSync(rootDir, { withFileTypes: true });
} catch {
return;
}
for (const entry of entries) {
const abs = join(rootDir, entry.name);
const rel = relative(ROOT, abs).replace(/\\/g, '/');
if (EXCLUDED_PREFIXES.some(prefix => rel.startsWith(prefix))) continue;
if (entry.isDirectory()) {
walkDir(abs, out);
continue;
}
if (!entry.isFile()) continue;
if (!shouldScanFile(rel)) continue;
out.push(rel);
}
}
function getRepoFiles() {
const files = [];
for (const root of SCAN_ROOTS) {
const abs = join(ROOT, root);
try {
if (statSync(abs).isDirectory()) walkDir(abs, files);
} catch {
// ignore missing roots
}
}
return files;
}
function getStagedFiles() {
let out = '';
try {
out = execFileSync('git', ['diff', '--cached', '--name-only', '--diff-filter=ACMR'], {
cwd: ROOT,
encoding: 'utf8',
stdio: ['ignore', 'pipe', 'pipe'],
});
} catch {
return [];
}
return out
.split('\n')
.map(s => s.trim().replace(/\\/g, '/'))
.filter(Boolean)
.filter(shouldScanFile);
}
function formatCodePoint(cp) {
return `U+${cp.toString(16).toUpperCase().padStart(cp > 0xFFFF ? 6 : 4, '0')}`;
}
function classify(cp) {
if (isBidiControl(cp)) return 'bidi-control';
if (ZERO_WIDTH.has(cp)) return 'zero-width';
if (isVariationSelectorSupplement(cp)) return 'variation-selector-supplement';
if (isVariationSelectorSuspicious(cp)) return 'variation-selector';
return null;
}
function scanFile(path) {
const abs = join(ROOT, path);
let text;
try {
text = readFileSync(abs, 'utf8');
} catch {
return [];
}
const findings = [];
const lines = text.split('\n');
let line = 1;
let col = 1;
for (const ch of text) {
const cp = ch.codePointAt(0);
const kind = classify(cp);
if (kind) {
const lineText = lines[line - 1] ?? '';
findings.push({
path,
line,
col,
kind,
cp: formatCodePoint(cp),
lineText,
});
}
if (ch === '\n') {
line += 1;
col = 1;
} else {
// Astral-plane characters (cp > 0xFFFF) occupy two UTF-16 code units.
// Increment by 2 so reported columns match editor column positions.
col += cp > 0xFFFF ? 2 : 1;
}
}
return findings;
}
function main() {
const files = stagedOnly ? getStagedFiles() : getRepoFiles();
if (files.length === 0) {
console.log(stagedOnly ? 'Unicode safety: no staged executable files to scan.' : 'Unicode safety: no files matched scan scope.');
return;
}
const findings = [];
for (const file of files) {
findings.push(...scanFile(file));
}
if (findings.length === 0) {
console.log(`Unicode safety: scanned ${files.length} file(s), no suspicious hidden Unicode found.`);
return;
}
console.error(`Unicode safety check failed: ${findings.length} suspicious character(s) found.`);
for (const f of findings.slice(0, 200)) {
console.error(`${f.path}:${f.line}:${f.col} ${f.cp} ${f.kind}`);
if (f.lineText) console.error(` ${f.lineText}`);
}
if (findings.length > 200) {
console.error(`... ${findings.length - 200} more finding(s) omitted.`);
}
console.error('');
console.error('If intentional, replace with visible escapes or remove from executable files.');
process.exit(1);
}
main();