feat(sanctions): entity lookup index + OpenSanctions search (#2042) (#2085)

* feat(sanctions): entity lookup index + OpenSanctions search (#2042)

* fix: guard tokens[0] access in sanctions lookup

* fix: use createIpRateLimiter pattern in sanctions-entity-search

* fix: add sanctions-entity-search to allowlist and cache tier

* fix: add LookupSanctionEntity RPC to service.proto, regenerate

* fix(sanctions): strip _entityIndex/_state from main key publish, guard limit NaN

P0: seed-sanctions-pressure was writing the full _entityIndex array and _state
snapshot into sanctions:pressure:v1 because afterPublish runs after atomicPublish.
Add publishTransform to strip both fields before the main key write so the
pressure payload stays compact; afterPublish and extraKeys still receive the full
data object and write the correct separate keys.

P1: limit param in sanctions-entity-search edge function passed NaN to OpenSanctions
when a non-numeric value was supplied. Fix with Number.isFinite guard.

P2: add 200-char max length on q param to prevent oversized upstream requests.

* fix(sanctions): maxStaleMin 2x interval, no-store on entity search

health.js: 720min (1x) → 1440min (2x) for both sanctionsPressure and
sanctionsEntities. A single missed 12h cron was immediately flagging stale.

sanctions-entity-search.js: Cache-Control public → no-store. Sanctions
lookups include compliance-sensitive names in the query string; public
caching would have logged/stored these at CDN/proxy layer.
This commit is contained in:
Elie Habib
2026-03-23 19:38:11 +04:00
committed by GitHub
parent d29fd4e03f
commit 3321069fb3
13 changed files with 449 additions and 3 deletions

View File

@@ -36,6 +36,7 @@ const BOOTSTRAP_KEYS = {
securityAdvisories: 'intelligence:advisories-bootstrap:v1',
customsRevenue: 'trade:customs-revenue:v1',
sanctionsPressure: 'sanctions:pressure:v1',
sanctionsEntities: 'sanctions:entities:v1',
radiationWatch: 'radiation:observations:v1',
consumerPricesOverview: 'consumer-prices:overview:ae',
consumerPricesCategories: 'consumer-prices:categories:ae:30d',
@@ -144,7 +145,8 @@ const SEED_META = {
usniFleet: { key: 'seed-meta:military:usni-fleet', maxStaleMin: 480 },
securityAdvisories: { key: 'seed-meta:intelligence:advisories', maxStaleMin: 120 },
customsRevenue: { key: 'seed-meta:trade:customs-revenue', maxStaleMin: 1440 },
sanctionsPressure: { key: 'seed-meta:sanctions:pressure', maxStaleMin: 720 },
sanctionsPressure: { key: 'seed-meta:sanctions:pressure', maxStaleMin: 1440 }, // 12h cron; 1440min = 24h = 2x interval
sanctionsEntities: { key: 'seed-meta:sanctions:entities', maxStaleMin: 1440 }, // 12h cron; 1440min = 24h = 2x interval
radiationWatch: { key: 'seed-meta:radiation:observations', maxStaleMin: 30 },
groceryBasket: { key: 'seed-meta:economic:grocery-basket', maxStaleMin: 10080 }, // weekly seed; 10080 = 7 days
bigmac: { key: 'seed-meta:economic:bigmac', maxStaleMin: 10080 }, // weekly seed; 10080 = 7 days

View File

@@ -0,0 +1,88 @@
// Edge function: on-demand OpenSanctions entity search (Phase 2 — issue #2042).
// Proxies to https://api.opensanctions.org — no auth required for basic search.
// Merges results with OFAC via the RPC lookup endpoint for a unified response.
export const config = { runtime: 'edge' };
import { createIpRateLimiter } from './_ip-rate-limit.js';
import { jsonResponse } from './_json-response.js';
import { getClientIp } from './_turnstile.js';
const OPENSANCTIONS_BASE = 'https://api.opensanctions.org';
const OPENSANCTIONS_TIMEOUT_MS = 8_000;
const MAX_RESULTS = 20;
const rateLimiter = createIpRateLimiter({ limit: 30, windowMs: 60_000 });
function normalizeEntity(hit) {
const props = hit.properties ?? {};
const name = (props.name ?? [hit.caption]).filter(Boolean)[0] ?? '';
const countries = props.country ?? props.nationality ?? [];
const programs = props.program ?? props.sanctions ?? [];
const schema = hit.schema ?? '';
let entityType = 'entity';
if (schema === 'Vessel') entityType = 'vessel';
else if (schema === 'Aircraft') entityType = 'aircraft';
else if (schema === 'Person') entityType = 'individual';
return {
id: `opensanctions:${hit.id}`,
name,
entityType,
countryCodes: countries.slice(0, 3),
programs: programs.slice(0, 3),
datasets: hit.datasets ?? [],
score: hit.score ?? 0,
};
}
export default async function handler(req) {
const ip = getClientIp(req);
if (rateLimiter.isRateLimited(ip)) {
return jsonResponse({ error: 'Too many requests' }, 429);
}
const { searchParams } = new URL(req.url);
const q = (searchParams.get('q') ?? '').trim();
if (!q || q.length < 2) {
return jsonResponse({ error: 'q must be at least 2 characters' }, 400);
}
if (q.length > 200) {
return jsonResponse({ error: 'q must be at most 200 characters' }, 400);
}
const limitRaw = Number(searchParams.get('limit') ?? '10');
const limit = Math.min(Number.isFinite(limitRaw) && limitRaw > 0 ? Math.trunc(limitRaw) : 10, MAX_RESULTS);
try {
const url = new URL(`${OPENSANCTIONS_BASE}/search/default`);
url.searchParams.set('q', q);
url.searchParams.set('limit', String(limit));
const resp = await fetch(url.toString(), {
headers: {
'User-Agent': 'WorldMonitor/1.0 sanctions-search',
Accept: 'application/json',
},
signal: AbortSignal.timeout(OPENSANCTIONS_TIMEOUT_MS),
});
if (!resp.ok) {
return jsonResponse({ results: [], total: 0, source: 'opensanctions', error: `upstream HTTP ${resp.status}` }, 200);
}
const data = await resp.json();
const results = (data.results ?? []).map(normalizeEntity);
return jsonResponse({
results,
total: data.total?.value ?? results.length,
source: 'opensanctions',
}, 200, { 'Cache-Control': 'no-store' });
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
return jsonResponse({ results: [], total: 0, source: 'opensanctions', error: message }, 200);
}
}

File diff suppressed because one or more lines are too long

View File

@@ -36,6 +36,44 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/Error'
/api/sanctions/v1/lookup-sanction-entity:
get:
tags:
- SanctionsService
summary: LookupSanctionEntity
description: LookupSanctionEntity searches the OFAC entity index by name, vessel, or aircraft.
operationId: LookupSanctionEntity
parameters:
- name: q
in: query
required: false
schema:
type: string
- name: max_results
in: query
required: false
schema:
type: integer
format: int32
responses:
"200":
description: Successful response
content:
application/json:
schema:
$ref: '#/components/schemas/LookupSanctionEntityResponse'
"400":
description: Validation error
content:
application/json:
schema:
$ref: '#/components/schemas/ValidationError'
default:
description: Error response
content:
application/json:
schema:
$ref: '#/components/schemas/Error'
components:
schemas:
Error:
@@ -188,3 +226,43 @@ components:
type: integer
format: int32
description: ProgramSanctionsPressure summarizes designation volume and recent additions by OFAC program.
LookupSanctionEntityRequest:
type: object
properties:
q:
type: string
maxResults:
type: integer
format: int32
description: LookupSanctionEntityRequest searches the OFAC entity index by name, vessel, or aircraft.
LookupSanctionEntityResponse:
type: object
properties:
results:
type: array
items:
$ref: '#/components/schemas/SanctionEntityMatch'
total:
type: integer
format: int32
source:
type: string
description: LookupSanctionEntityResponse contains matched entities from OFAC + OpenSanctions.
SanctionEntityMatch:
type: object
properties:
id:
type: string
name:
type: string
entityType:
type: string
countryCodes:
type: array
items:
type: string
programs:
type: array
items:
type: string
description: SanctionEntityMatch is a compact entity match from the lookup index.

View File

@@ -0,0 +1,27 @@
syntax = "proto3";
package worldmonitor.sanctions.v1;
import "sebuf/http/annotations.proto";
// LookupSanctionEntityRequest searches the OFAC entity index by name, vessel, or aircraft.
message LookupSanctionEntityRequest {
string q = 1 [(sebuf.http.query) = { name: "q" }];
int32 max_results = 2 [(sebuf.http.query) = { name: "max_results" }];
}
// SanctionEntityMatch is a compact entity match from the lookup index.
message SanctionEntityMatch {
string id = 1;
string name = 2;
string entity_type = 3;
repeated string country_codes = 4;
repeated string programs = 5;
}
// LookupSanctionEntityResponse contains matched entities from OFAC + OpenSanctions.
message LookupSanctionEntityResponse {
repeated SanctionEntityMatch results = 1;
int32 total = 2;
string source = 3;
}

View File

@@ -4,6 +4,7 @@ package worldmonitor.sanctions.v1;
import "sebuf/http/annotations.proto";
import "worldmonitor/sanctions/v1/list_sanctions_pressure.proto";
import "worldmonitor/sanctions/v1/lookup_entity.proto";
// SanctionsService provides structured OFAC sanctions pressure data.
service SanctionsService {
@@ -13,4 +14,9 @@ service SanctionsService {
rpc ListSanctionsPressure(ListSanctionsPressureRequest) returns (ListSanctionsPressureResponse) {
option (sebuf.http.config) = {path: "/list-sanctions-pressure", method: HTTP_METHOD_GET};
}
// LookupSanctionEntity searches the OFAC entity index by name, vessel, or aircraft.
rpc LookupSanctionEntity(LookupSanctionEntityRequest) returns (LookupSanctionEntityResponse) {
option (sebuf.http.config) = {path: "/lookup-sanction-entity", method: HTTP_METHOD_GET};
}
}

View File

@@ -6,13 +6,21 @@
// 120MB XML download against Railway's 512MB container limit.
import sax from 'sax';
import { CHROME_UA, loadEnvFile, runSeed, verifySeedKey } from './_seed-utils.mjs';
import { CHROME_UA, loadEnvFile, runSeed, verifySeedKey, writeExtraKeyWithMeta } from './_seed-utils.mjs';
loadEnvFile(import.meta.url);
const CANONICAL_KEY = 'sanctions:pressure:v1';
const STATE_KEY = 'sanctions:pressure:state:v1';
const ENTITY_INDEX_KEY = 'sanctions:entities:v1';
const CACHE_TTL = 15 * 60 * 60; // 15h — 3h buffer over 12h cron cadence (was 12h = 0 buffer)
// Compact entity type codes for the lookup index (saves space vs full enum strings)
const ET_CODE = {
SANCTIONS_ENTITY_TYPE_VESSEL: 'vessel',
SANCTIONS_ENTITY_TYPE_AIRCRAFT: 'aircraft',
SANCTIONS_ENTITY_TYPE_INDIVIDUAL: 'individual',
SANCTIONS_ENTITY_TYPE_ENTITY: 'entity',
};
const DEFAULT_RECENT_LIMIT = 60;
const OFAC_TIMEOUT_MS = 45_000;
const PROGRAM_CODE_RE = /^[A-Z0-9][A-Z0-9-]{1,24}$/;
@@ -488,6 +496,18 @@ async function fetchSanctionsPressure() {
const aircraftCount = entries.filter((entry) => entry.entityType === 'SANCTIONS_ENTITY_TYPE_AIRCRAFT').length;
console.log(` Merged: ${totalCount} total (${results[0]?.entries.length ?? 0} SDN + ${results[1]?.entries.length ?? 0} consolidated), ${newEntryCount} new, ${vesselCount} vessels, ${aircraftCount} aircraft`);
// Build compact entity index for name-based lookup (Phase 1 — issue #2042).
// Each record: { id, name, et (compact type), cc (country codes), pr (programs) }
// Stored as a flat array in a single Redis key for O(N) in-memory search.
const _entityIndex = entries.map((e) => ({
id: e.id,
name: e.name,
et: ET_CODE[e.entityType] ?? 'entity',
cc: e.countryCodes.slice(0, 3),
pr: e.programs.slice(0, 3),
}));
console.log(` Entity index: ${_entityIndex.length} records (~${Math.round(JSON.stringify(_entityIndex).length / 1024)}KB)`);
return {
fetchedAt: String(Date.now()),
datasetDate: String(datasetDate),
@@ -500,6 +520,7 @@ async function fetchSanctionsPressure() {
countries: buildCountryPressure(entries),
programs: buildProgramPressure(entries),
entries: sortedEntries.slice(0, DEFAULT_RECENT_LIMIT),
_entityIndex,
_state: {
entryIds: entries.map((entry) => entry.id),
},
@@ -515,6 +536,12 @@ runSeed('sanctions', 'pressure', CANONICAL_KEY, fetchSanctionsPressure, {
validateFn: validate,
sourceVersion: 'ofac-sls-advanced-xml-v1',
recordCount: (data) => data.totalCount ?? 0,
// Strip internal-only fields before writing the main key so the pressure payload
// does not include the entity index (~hundreds of KB) or state snapshot.
publishTransform: (data) => {
const { _entityIndex: _ei, _state: _s, ...rest } = data;
return rest;
},
extraKeys: [
{
key: STATE_KEY,
@@ -523,6 +550,18 @@ runSeed('sanctions', 'pressure', CANONICAL_KEY, fetchSanctionsPressure, {
},
],
afterPublish: async (data, _ctx) => {
// Write entity lookup index with seed-meta so health.js can monitor it.
// Uses writeExtraKeyWithMeta rather than extraKeys because runSeed's extraKeys
// calls writeExtraKey (no meta), and we need a seed-meta key for health tracking.
if (data._entityIndex) {
await writeExtraKeyWithMeta(
ENTITY_INDEX_KEY,
data._entityIndex,
CACHE_TTL,
data._entityIndex.length,
);
}
delete data._state;
delete data._entityIndex;
},
});

View File

@@ -103,6 +103,7 @@ const RPC_CACHE_TIER: Record<string, CacheTier> = {
'/api/intelligence/v1/get-country-intel-brief': 'static',
'/api/climate/v1/list-climate-anomalies': 'static',
'/api/sanctions/v1/list-sanctions-pressure': 'static',
'/api/sanctions/v1/lookup-sanction-entity': 'no-store',
'/api/radiation/v1/list-radiation-observations': 'slow',
'/api/thermal/v1/list-thermal-escalations': 'slow',
'/api/research/v1/list-tech-events': 'static',

View File

@@ -1,7 +1,9 @@
import type { SanctionsServiceHandler } from '../../../../src/generated/server/worldmonitor/sanctions/v1/service_server';
import { listSanctionsPressure } from './list-sanctions-pressure';
import { lookupSanctionEntity } from './lookup-entity';
export const sanctionsHandler: SanctionsServiceHandler = {
listSanctionsPressure,
lookupSanctionEntity,
};

View File

@@ -0,0 +1,89 @@
import type {
LookupSanctionEntityRequest,
LookupSanctionEntityResponse,
SanctionEntityMatch,
SanctionsServiceHandler,
ServerContext,
} from '../../../../src/generated/server/worldmonitor/sanctions/v1/service_server';
import { getCachedJson } from '../../../_shared/redis';
const ENTITY_INDEX_KEY = 'sanctions:entities:v1';
const DEFAULT_MAX = 10;
const MAX_RESULTS_LIMIT = 50;
const MIN_QUERY_LENGTH = 2;
interface EntityIndexRecord {
id: string;
name: string;
et: string;
cc: string[];
pr: string[];
}
function normalize(s: string): string {
return s.toLowerCase().replace(/[^a-z0-9]/g, ' ').replace(/\s+/g, ' ').trim();
}
function clampMax(value: number): number {
if (!Number.isFinite(value) || value <= 0) return DEFAULT_MAX;
return Math.min(Math.max(Math.trunc(value), 1), MAX_RESULTS_LIMIT);
}
export const lookupSanctionEntity: SanctionsServiceHandler['lookupSanctionEntity'] = async (
_ctx: ServerContext,
req: LookupSanctionEntityRequest,
): Promise<LookupSanctionEntityResponse> => {
const q = (req.q ?? '').trim();
if (q.length < MIN_QUERY_LENGTH) {
return { results: [], total: 0, source: 'ofac' };
}
const maxResults = clampMax(req.maxResults);
const needle = normalize(q);
const tokens = needle.split(' ').filter(Boolean);
try {
const raw = await getCachedJson(ENTITY_INDEX_KEY, true);
if (!Array.isArray(raw)) return { results: [], total: 0, source: 'ofac' };
const index = raw as EntityIndexRecord[];
const scored: Array<{ score: number; entry: EntityIndexRecord }> = [];
for (const entry of index) {
const haystack = normalize(entry.name);
if (haystack === needle) {
scored.push({ score: 100, entry });
continue;
}
if (haystack.startsWith(needle)) {
scored.push({ score: 80, entry });
continue;
}
if (tokens.length > 0 && tokens.every((t) => haystack.includes(t))) {
const pos = haystack.indexOf(tokens[0] ?? '');
scored.push({ score: 60 - Math.min(pos, 20), entry });
continue;
}
const matchCount = tokens.filter((t) => haystack.includes(t)).length;
if (matchCount > 0) {
scored.push({ score: matchCount * 10, entry });
}
}
scored.sort((a, b) => b.score - a.score);
const results: SanctionEntityMatch[] = scored.slice(0, maxResults).map(({ entry }) => ({
id: entry.id,
name: entry.name,
entityType: entry.et,
countryCodes: entry.cc,
programs: entry.pr,
}));
return { results, total: scored.length, source: 'ofac' };
} catch {
return { results: [], total: 0, source: 'ofac' };
}
};

View File

@@ -48,6 +48,25 @@ export interface ProgramSanctionsPressure {
newEntryCount: number;
}
export interface LookupSanctionEntityRequest {
q: string;
maxResults: number;
}
export interface LookupSanctionEntityResponse {
results: SanctionEntityMatch[];
total: number;
source: string;
}
export interface SanctionEntityMatch {
id: string;
name: string;
entityType: string;
countryCodes: string[];
programs: string[];
}
export type SanctionsEntityType = "SANCTIONS_ENTITY_TYPE_UNSPECIFIED" | "SANCTIONS_ENTITY_TYPE_ENTITY" | "SANCTIONS_ENTITY_TYPE_INDIVIDUAL" | "SANCTIONS_ENTITY_TYPE_VESSEL" | "SANCTIONS_ENTITY_TYPE_AIRCRAFT";
export interface FieldViolation {
@@ -123,6 +142,32 @@ export class SanctionsServiceClient {
return await resp.json() as ListSanctionsPressureResponse;
}
async lookupSanctionEntity(req: LookupSanctionEntityRequest, options?: SanctionsServiceCallOptions): Promise<LookupSanctionEntityResponse> {
let path = "/api/sanctions/v1/lookup-sanction-entity";
const params = new URLSearchParams();
if (req.q != null && req.q !== "") params.set("q", String(req.q));
if (req.maxResults != null && req.maxResults !== 0) params.set("max_results", String(req.maxResults));
const url = this.baseURL + path + (params.toString() ? "?" + params.toString() : "");
const headers: Record<string, string> = {
"Content-Type": "application/json",
...this.defaultHeaders,
...options?.headers,
};
const resp = await this.fetchFn(url, {
method: "GET",
headers,
signal: options?.signal,
});
if (!resp.ok) {
return this.handleError(resp);
}
return await resp.json() as LookupSanctionEntityResponse;
}
private async handleError(resp: Response): Promise<never> {
const body = await resp.text();
if (resp.status === 400) {

View File

@@ -48,6 +48,25 @@ export interface ProgramSanctionsPressure {
newEntryCount: number;
}
export interface LookupSanctionEntityRequest {
q: string;
maxResults: number;
}
export interface LookupSanctionEntityResponse {
results: SanctionEntityMatch[];
total: number;
source: string;
}
export interface SanctionEntityMatch {
id: string;
name: string;
entityType: string;
countryCodes: string[];
programs: string[];
}
export type SanctionsEntityType = "SANCTIONS_ENTITY_TYPE_UNSPECIFIED" | "SANCTIONS_ENTITY_TYPE_ENTITY" | "SANCTIONS_ENTITY_TYPE_INDIVIDUAL" | "SANCTIONS_ENTITY_TYPE_VESSEL" | "SANCTIONS_ENTITY_TYPE_AIRCRAFT";
export interface FieldViolation {
@@ -96,6 +115,7 @@ export interface RouteDescriptor {
export interface SanctionsServiceHandler {
listSanctionsPressure(ctx: ServerContext, req: ListSanctionsPressureRequest): Promise<ListSanctionsPressureResponse>;
lookupSanctionEntity(ctx: ServerContext, req: LookupSanctionEntityRequest): Promise<LookupSanctionEntityResponse>;
}
export function createSanctionsServiceRoutes(
@@ -150,6 +170,54 @@ export function createSanctionsServiceRoutes(
}
},
},
{
method: "GET",
path: "/api/sanctions/v1/lookup-sanction-entity",
handler: async (req: Request): Promise<Response> => {
try {
const pathParams: Record<string, string> = {};
const url = new URL(req.url, "http://localhost");
const params = url.searchParams;
const body: LookupSanctionEntityRequest = {
q: params.get("q") ?? "",
maxResults: Number(params.get("max_results") ?? "0"),
};
if (options?.validateRequest) {
const bodyViolations = options.validateRequest("lookupSanctionEntity", body);
if (bodyViolations) {
throw new ValidationError(bodyViolations);
}
}
const ctx: ServerContext = {
request: req,
pathParams,
headers: Object.fromEntries(req.headers.entries()),
};
const result = await handler.lookupSanctionEntity(ctx, body);
return new Response(JSON.stringify(result as LookupSanctionEntityResponse), {
status: 200,
headers: { "Content-Type": "application/json" },
});
} catch (err: unknown) {
if (err instanceof ValidationError) {
return new Response(JSON.stringify({ violations: err.violations }), {
status: 400,
headers: { "Content-Type": "application/json" },
});
}
if (options?.onError) {
return options.onError(err, req);
}
const message = err instanceof Error ? err.message : String(err);
return new Response(JSON.stringify({ message }), {
status: 500,
headers: { "Content-Type": "application/json" },
});
}
},
},
];
}

View File

@@ -80,6 +80,7 @@ describe('Legacy api/*.js endpoint allowlist', () => {
'seed-health.js',
'story.js',
'telegram-feed.js',
'sanctions-entity-search.js',
'version.js',
]);