import * as lancedb from 'vectordb'; import { app } from 'electron'; import path from 'node:path'; import { getDb } from './index'; import { notes } from './schema'; import { embedText } from '../ai/embeddings'; interface NoteRecord { id: string; /** Empty string when the note has no project (Arrow string fields don't cleanly handle null) */ projectId: string; content: string; vector: number[]; } export interface SearchResult { id: string; projectId: string; content: string; _distance: number; } let conn: lancedb.Connection | null = null; /** * Initialize the LanceDB connection. Must be called before any other * function in this module. Vector data is stored at userData/vectors/. */ export async function initVectorDb(): Promise { const vectorPath = path.join(app.getPath('userData'), 'vectors'); conn = await lancedb.connect(vectorPath); console.log('[VectorDB] Connected at:', vectorPath); } function getConn(): lancedb.Connection { if (!conn) throw new Error('[VectorDB] Not initialized. Call initVectorDb() first.'); return conn; } /** * Embed note content and upsert the record into the LanceDB 'notes' table. * * Upsert strategy: delete-then-add. * table.delete(where) is a no-op when no rows match, so this is safe for * both first-time inserts and subsequent updates. * * On the very first call when the table does not yet exist, createTable * infers the Arrow schema from the initial record. * * Throws on error — callers fire-and-forget via .catch(). */ export async function upsertNoteEmbedding( noteId: string, projectId: string | null, content: string, ): Promise { const c = getConn(); const vector = await embedText(content); const record: NoteRecord = { id: noteId, projectId: projectId ?? '', content, vector, }; const tableNames = await c.tableNames(); if (!tableNames.includes('notes')) { // First embedding: createTable infers the Arrow schema from this record. // The vector dimension (1536 for text-embedding-3-small) is baked in here. await c.createTable('notes', [record]); console.log('[VectorDB] Created notes table'); return; } const table = await c.openTable('notes'); // Note IDs are UUID v4 — only [0-9a-f-] chars, no SQL injection risk. await table.delete(`id = '${noteId}'`); await table.add([record]); } /** * On first startup, check if the LanceDB 'notes' table exists. * If not, embed all existing SQLite notes and populate LanceDB. * * Per-note errors are caught and logged; a single failure does not * abort the remaining notes. */ export async function migrateNotesIfNeeded(): Promise { const c = getConn(); const tableNames = await c.tableNames(); if (tableNames.includes('notes')) { console.log('[VectorDB] Notes table exists, skipping migration'); return; } const allNotes = getDb().select().from(notes).all(); if (allNotes.length === 0) { console.log('[VectorDB] No existing notes to migrate'); return; } console.log(`[VectorDB] Migrating ${allNotes.length} notes...`); let successCount = 0; for (const note of allNotes) { try { const embeddingText = `${note.title}\n\n${note.content}`; await upsertNoteEmbedding(note.id, note.projectId ?? null, embeddingText); successCount++; } catch (err) { console.error(`[VectorDB] Failed to embed note ${note.id} during migration:`, err); } } console.log(`[VectorDB] Migration complete: ${successCount}/${allNotes.length} notes embedded`); } /** * Embed the query string and perform a similarity search across all notes * in the LanceDB 'notes' table. Returns up to `limit` results sorted by * distance (closest first). * * Returns an empty array if the notes table does not exist yet. */ export async function searchNotes(query: string, limit = 5): Promise { const c = getConn(); const tableNames = await c.tableNames(); if (!tableNames.includes('notes')) { return []; } const queryVector = await embedText(query); const table = await c.openTable('notes'); const results = await table.search(queryVector).limit(limit).execute(); return results.map((r) => ({ id: r.id as string, projectId: r.projectId as string, content: r.content as string, _distance: r._distance as number, })); }