Files
adiuva/src/main/db/vectordb.ts
Roberto Musso 77b94e2b27 US-023
2026-02-24 22:02:46 +01:00

148 lines
4.3 KiB
TypeScript

import * as lancedb from 'vectordb';
import { app } from 'electron';
import path from 'node:path';
import { getDb } from './index';
import { notes } from './schema';
import { embedText } from '../ai/embeddings';
interface NoteRecord {
id: string;
/** Empty string when the note has no project (Arrow string fields don't cleanly handle null) */
projectId: string;
content: string;
vector: number[];
}
export interface SearchResult {
id: string;
projectId: string;
content: string;
_distance: number;
}
let conn: lancedb.Connection | null = null;
/**
* Initialize the LanceDB connection. Must be called before any other
* function in this module. Vector data is stored at userData/vectors/.
*/
export async function initVectorDb(): Promise<void> {
const vectorPath = path.join(app.getPath('userData'), 'vectors');
conn = await lancedb.connect(vectorPath);
console.log('[VectorDB] Connected at:', vectorPath);
}
function getConn(): lancedb.Connection {
if (!conn) throw new Error('[VectorDB] Not initialized. Call initVectorDb() first.');
return conn;
}
/**
* Embed note content and upsert the record into the LanceDB 'notes' table.
*
* Upsert strategy: delete-then-add.
* table.delete(where) is a no-op when no rows match, so this is safe for
* both first-time inserts and subsequent updates.
*
* On the very first call when the table does not yet exist, createTable
* infers the Arrow schema from the initial record.
*
* Throws on error — callers fire-and-forget via .catch().
*/
export async function upsertNoteEmbedding(
noteId: string,
projectId: string | null,
content: string,
): Promise<void> {
const c = getConn();
const vector = await embedText(content);
const record: NoteRecord = {
id: noteId,
projectId: projectId ?? '',
content,
vector,
};
const tableNames = await c.tableNames();
if (!tableNames.includes('notes')) {
// First embedding: createTable infers the Arrow schema from this record.
// The vector dimension (1536 for text-embedding-3-small) is baked in here.
await c.createTable('notes', [record]);
console.log('[VectorDB] Created notes table');
return;
}
const table = await c.openTable<NoteRecord>('notes');
// Note IDs are UUID v4 — only [0-9a-f-] chars, no SQL injection risk.
await table.delete(`id = '${noteId}'`);
await table.add([record]);
}
/**
* On first startup, check if the LanceDB 'notes' table exists.
* If not, embed all existing SQLite notes and populate LanceDB.
*
* Per-note errors are caught and logged; a single failure does not
* abort the remaining notes.
*/
export async function migrateNotesIfNeeded(): Promise<void> {
const c = getConn();
const tableNames = await c.tableNames();
if (tableNames.includes('notes')) {
console.log('[VectorDB] Notes table exists, skipping migration');
return;
}
const allNotes = getDb().select().from(notes).all();
if (allNotes.length === 0) {
console.log('[VectorDB] No existing notes to migrate');
return;
}
console.log(`[VectorDB] Migrating ${allNotes.length} notes...`);
let successCount = 0;
for (const note of allNotes) {
try {
const embeddingText = `${note.title}\n\n${note.content}`;
await upsertNoteEmbedding(note.id, note.projectId ?? null, embeddingText);
successCount++;
} catch (err) {
console.error(`[VectorDB] Failed to embed note ${note.id} during migration:`, err);
}
}
console.log(`[VectorDB] Migration complete: ${successCount}/${allNotes.length} notes embedded`);
}
/**
* Embed the query string and perform a similarity search across all notes
* in the LanceDB 'notes' table. Returns up to `limit` results sorted by
* distance (closest first).
*
* Returns an empty array if the notes table does not exist yet.
*/
export async function searchNotes(query: string, limit = 5): Promise<SearchResult[]> {
const c = getConn();
const tableNames = await c.tableNames();
if (!tableNames.includes('notes')) {
return [];
}
const queryVector = await embedText(query);
const table = await c.openTable('notes');
const results = await table.search(queryVector).limit(limit).execute();
return results.map((r) => ({
id: r.id as string,
projectId: r.projectId as string,
content: r.content as string,
_distance: r._distance as number,
}));
}