mirror of
https://github.com/thedotmack/claude-mem
synced 2026-04-25 17:15:04 +02:00
fix: use null-byte delimiter in observation content hash to prevent collisions
Fields concatenated without separators allowed different tuples to produce identical hashes (e.g. session="ab", title="cd" vs session="abc", title="d"). This could cause legitimate observations to be silently deduplicated. Join with \x00 so field boundaries are unambiguous. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -22,7 +22,7 @@ export function computeObservationContentHash(
|
||||
narrative: string | null
|
||||
): string {
|
||||
return createHash('sha256')
|
||||
.update((memorySessionId || '') + (title || '') + (narrative || ''))
|
||||
.update([memorySessionId || '', title || '', narrative || ''].join('\x00'))
|
||||
.digest('hex')
|
||||
.slice(0, 16);
|
||||
}
|
||||
|
||||
@@ -69,6 +69,16 @@ describe('TRIAGE-03: Data Integrity', () => {
|
||||
expect(hash.length).toBe(16);
|
||||
});
|
||||
|
||||
it('computeObservationContentHash avoids collision from field boundary ambiguity', () => {
|
||||
// These tuples would collide without a delimiter between fields
|
||||
const hash1 = computeObservationContentHash('session-abc', 'debug log', '');
|
||||
const hash2 = computeObservationContentHash('session-ab', 'cdebug log', '');
|
||||
const hash3 = computeObservationContentHash('session-', 'abcdebug log', '');
|
||||
const hash4 = computeObservationContentHash('', 'session-abcdebug log', '');
|
||||
const hashes = new Set([hash1, hash2, hash3, hash4]);
|
||||
expect(hashes.size).toBe(4);
|
||||
});
|
||||
|
||||
it('storeObservation deduplicates identical observations within 30s window', () => {
|
||||
const memId = createSessionWithMemoryId(db, 'content-dedup-1', 'mem-dedup-1');
|
||||
const obs = createObservationInput({ title: 'Same Title', narrative: 'Same Narrative' });
|
||||
|
||||
Reference in New Issue
Block a user