fix: use null-byte delimiter in observation content hash to prevent collisions

Fields concatenated without separators allowed different tuples to produce
identical hashes (e.g. session="ab", title="cd" vs session="abc", title="d").
This could cause legitimate observations to be silently deduplicated.

Join with \x00 so field boundaries are unambiguous.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
79475432@qq.com
2026-03-26 17:16:17 +08:00
parent e2a230286d
commit 9cfa57d498
2 changed files with 11 additions and 1 deletions

View File

@@ -22,7 +22,7 @@ export function computeObservationContentHash(
narrative: string | null
): string {
return createHash('sha256')
.update((memorySessionId || '') + (title || '') + (narrative || ''))
.update([memorySessionId || '', title || '', narrative || ''].join('\x00'))
.digest('hex')
.slice(0, 16);
}

View File

@@ -69,6 +69,16 @@ describe('TRIAGE-03: Data Integrity', () => {
expect(hash.length).toBe(16);
});
it('computeObservationContentHash avoids collision from field boundary ambiguity', () => {
// These tuples would collide without a delimiter between fields
const hash1 = computeObservationContentHash('session-abc', 'debug log', '');
const hash2 = computeObservationContentHash('session-ab', 'cdebug log', '');
const hash3 = computeObservationContentHash('session-', 'abcdebug log', '');
const hash4 = computeObservationContentHash('', 'session-abcdebug log', '');
const hashes = new Set([hash1, hash2, hash3, hash4]);
expect(hashes.size).toBe(4);
});
it('storeObservation deduplicates identical observations within 30s window', () => {
const memId = createSessionWithMemoryId(db, 'content-dedup-1', 'mem-dedup-1');
const obs = createObservationInput({ title: 'Same Title', narrative: 'Same Narrative' });