diff --git a/resources.js b/resources.js index d0927f7..54eb44e 100644 --- a/resources.js +++ b/resources.js @@ -369,7 +369,13 @@ export class MemorySearch extends Resource { const results = []; for await (const record of Memory.search(searchParams)) { - results.push(record); + // Normalize Harper's cosine distance (0-2 range) to similarity score (0-1) + // For normalized vectors, distance = 2 - 2*similarity, so similarity = 1 - distance/2 + const similarity = Math.max(0, 1 - (record.$distance || 0) / 2); + results.push({ + ...record, + similarity, + }); } return { results, count: results.length }; @@ -810,7 +816,13 @@ export class SynapseSearch extends Resource { const results = []; for await (const record of SynapseEntryBase.search(searchParams)) { - results.push(record); + // Normalize Harper's cosine distance (0-2 range) to similarity score (0-1) + // For normalized vectors, distance = 2 - 2*similarity, so similarity = 1 - distance/2 + const similarity = Math.max(0, 1 - (record.$distance || 0) / 2); + results.push({ + ...record, + similarity, + }); } return { results, count: results.length }; diff --git a/test/score-normalization.test.js b/test/score-normalization.test.js new file mode 100644 index 0000000..504c937 --- /dev/null +++ b/test/score-normalization.test.js @@ -0,0 +1,204 @@ +import assert from 'node:assert/strict'; +import { describe, it, mock } from 'node:test'; + +const mockSearch = mock.fn(function*() {}); + +class MockMemory { + static put = mock.fn(); + static search = mockSearch; + static get = mock.fn(); +} + +class MockSynapseEntry { + static put = mock.fn(); + static search = mockSearch; + static get = mock.fn(); +} + +mock.module('harperdb', { + namedExports: { + Resource: class Resource {}, + tables: { Memory: MockMemory, SynapseEntry: MockSynapseEntry }, + }, +}); + +mock.module('@anthropic-ai/sdk', { + defaultExport: class Anthropic { + constructor() { + this.messages = { create: mock.fn() }; + } + }, +}); + +const mockExtractor = mock.fn(); +mock.module('@xenova/transformers', { + namedExports: { + pipeline: mock.fn(async () => mockExtractor), + }, +}); + +process.env.ANTHROPIC_API_KEY = 'test-key'; + +const { MemorySearch, SynapseSearch } = await import('../resources.js'); + +describe('Score Normalization', () => { + describe('MemorySearch', () => { + it('normalizes distance 0 to similarity 1 (perfect match)', async () => { + mockExtractor.mock.mockImplementation(async () => ({ + data: new Float32Array(384).fill(0.5), + })); + + mockSearch.mock.mockImplementation(function*() { + yield { + id: 'test-id', + rawText: 'test message', + classification: 'decision', + $distance: 0, + }; + }); + + const search = new MemorySearch(); + const result = await search.post({ query: 'test' }); + + assert.equal(result.results[0].similarity, 1); + }); + + it('normalizes distance 1 to similarity 0.5 (moderate match)', async () => { + mockExtractor.mock.mockImplementation(async () => ({ + data: new Float32Array(384).fill(0.5), + })); + + mockSearch.mock.mockImplementation(function*() { + yield { + id: 'test-id', + rawText: 'test message', + classification: 'decision', + $distance: 1, + }; + }); + + const search = new MemorySearch(); + const result = await search.post({ query: 'test' }); + + assert.equal(result.results[0].similarity, 0.5); + }); + + it('normalizes distance 2 to similarity 0 (no match)', async () => { + mockExtractor.mock.mockImplementation(async () => ({ + data: new Float32Array(384).fill(0.5), + })); + + mockSearch.mock.mockImplementation(function*() { + yield { + id: 'test-id', + rawText: 'test message', + classification: 'decision', + $distance: 2, + }; + }); + + const search = new MemorySearch(); + const result = await search.post({ query: 'test' }); + + assert.equal(result.results[0].similarity, 0); + }); + + it('clamps negative similarity to 0', async () => { + mockExtractor.mock.mockImplementation(async () => ({ + data: new Float32Array(384).fill(0.5), + })); + + mockSearch.mock.mockImplementation(function*() { + yield { + id: 'test-id', + rawText: 'test message', + classification: 'decision', + $distance: 2.5, // edge case beyond 2 + }; + }); + + const search = new MemorySearch(); + const result = await search.post({ query: 'test' }); + + assert.equal(result.results[0].similarity, 0); + }); + + it('includes similarity alongside $distance in results', async () => { + mockExtractor.mock.mockImplementation(async () => ({ + data: new Float32Array(384).fill(0.5), + })); + + mockSearch.mock.mockImplementation(function*() { + yield { + id: 'test-id', + rawText: 'test message', + classification: 'decision', + $distance: 0.3, + }; + }); + + const search = new MemorySearch(); + const result = await search.post({ query: 'test' }); + + assert.ok(result.results[0].$distance !== undefined); + assert.ok(result.results[0].similarity !== undefined); + assert.equal(result.results[0].similarity, 1 - 0.3 / 2); + }); + }); + + describe('SynapseSearch', () => { + it('normalizes distance for Synapse entries', async () => { + mockExtractor.mock.mockImplementation(async () => ({ + data: new Float32Array(384).fill(0.5), + })); + + mockSearch.mock.mockImplementation(function*() { + yield { + id: 'synapse-1', + type: 'intent', + content: 'design pattern', + $distance: 0.5, + }; + }); + + const search = new SynapseSearch(); + const result = await search.post({ + query: 'architecture', + projectId: 'proj-1', + }); + + assert.equal(result.results[0].similarity, 1 - 0.5 / 2); + }); + + it('returns normalized scores for multiple Synapse results', async () => { + mockExtractor.mock.mockImplementation(async () => ({ + data: new Float32Array(384).fill(0.5), + })); + + mockSearch.mock.mockImplementation(function*() { + yield { + id: 'synapse-1', + type: 'intent', + content: 'design pattern', + $distance: 0.2, + }; + yield { + id: 'synapse-2', + type: 'constraint', + content: 'must use REST', + $distance: 0.8, + }; + }); + + const search = new SynapseSearch(); + const result = await search.post({ + query: 'architecture', + projectId: 'proj-1', + }); + + assert.equal(result.count, 2); + assert.equal(result.results[0].similarity, 1 - 0.2 / 2); + assert.equal(result.results[1].similarity, 1 - 0.8 / 2); + }); + }); +});