148 lines
4.3 KiB
TypeScript
148 lines
4.3 KiB
TypeScript
import * as lancedb from 'vectordb';
|
|
import { app } from 'electron';
|
|
import path from 'node:path';
|
|
import { getDb } from './index';
|
|
import { notes } from './schema';
|
|
import { embedText } from '../ai/embeddings';
|
|
|
|
interface NoteRecord {
|
|
id: string;
|
|
/** Empty string when the note has no project (Arrow string fields don't cleanly handle null) */
|
|
projectId: string;
|
|
content: string;
|
|
vector: number[];
|
|
}
|
|
|
|
export interface SearchResult {
|
|
id: string;
|
|
projectId: string;
|
|
content: string;
|
|
_distance: number;
|
|
}
|
|
|
|
let conn: lancedb.Connection | null = null;
|
|
|
|
/**
|
|
* Initialize the LanceDB connection. Must be called before any other
|
|
* function in this module. Vector data is stored at userData/vectors/.
|
|
*/
|
|
export async function initVectorDb(): Promise<void> {
|
|
const vectorPath = path.join(app.getPath('userData'), 'vectors');
|
|
conn = await lancedb.connect(vectorPath);
|
|
console.log('[VectorDB] Connected at:', vectorPath);
|
|
}
|
|
|
|
function getConn(): lancedb.Connection {
|
|
if (!conn) throw new Error('[VectorDB] Not initialized. Call initVectorDb() first.');
|
|
return conn;
|
|
}
|
|
|
|
/**
|
|
* Embed note content and upsert the record into the LanceDB 'notes' table.
|
|
*
|
|
* Upsert strategy: delete-then-add.
|
|
* table.delete(where) is a no-op when no rows match, so this is safe for
|
|
* both first-time inserts and subsequent updates.
|
|
*
|
|
* On the very first call when the table does not yet exist, createTable
|
|
* infers the Arrow schema from the initial record.
|
|
*
|
|
* Throws on error — callers fire-and-forget via .catch().
|
|
*/
|
|
export async function upsertNoteEmbedding(
|
|
noteId: string,
|
|
projectId: string | null,
|
|
content: string,
|
|
): Promise<void> {
|
|
const c = getConn();
|
|
const vector = await embedText(content);
|
|
|
|
const record: NoteRecord = {
|
|
id: noteId,
|
|
projectId: projectId ?? '',
|
|
content,
|
|
vector,
|
|
};
|
|
|
|
const tableNames = await c.tableNames();
|
|
|
|
if (!tableNames.includes('notes')) {
|
|
// First embedding: createTable infers the Arrow schema from this record.
|
|
// The vector dimension (1536 for text-embedding-3-small) is baked in here.
|
|
await c.createTable('notes', [record]);
|
|
console.log('[VectorDB] Created notes table');
|
|
return;
|
|
}
|
|
|
|
const table = await c.openTable<NoteRecord>('notes');
|
|
// Note IDs are UUID v4 — only [0-9a-f-] chars, no SQL injection risk.
|
|
await table.delete(`id = '${noteId}'`);
|
|
await table.add([record]);
|
|
}
|
|
|
|
/**
|
|
* On first startup, check if the LanceDB 'notes' table exists.
|
|
* If not, embed all existing SQLite notes and populate LanceDB.
|
|
*
|
|
* Per-note errors are caught and logged; a single failure does not
|
|
* abort the remaining notes.
|
|
*/
|
|
export async function migrateNotesIfNeeded(): Promise<void> {
|
|
const c = getConn();
|
|
const tableNames = await c.tableNames();
|
|
|
|
if (tableNames.includes('notes')) {
|
|
console.log('[VectorDB] Notes table exists, skipping migration');
|
|
return;
|
|
}
|
|
|
|
const allNotes = getDb().select().from(notes).all();
|
|
|
|
if (allNotes.length === 0) {
|
|
console.log('[VectorDB] No existing notes to migrate');
|
|
return;
|
|
}
|
|
|
|
console.log(`[VectorDB] Migrating ${allNotes.length} notes...`);
|
|
let successCount = 0;
|
|
|
|
for (const note of allNotes) {
|
|
try {
|
|
const embeddingText = `${note.title}\n\n${note.content}`;
|
|
await upsertNoteEmbedding(note.id, note.projectId ?? null, embeddingText);
|
|
successCount++;
|
|
} catch (err) {
|
|
console.error(`[VectorDB] Failed to embed note ${note.id} during migration:`, err);
|
|
}
|
|
}
|
|
|
|
console.log(`[VectorDB] Migration complete: ${successCount}/${allNotes.length} notes embedded`);
|
|
}
|
|
|
|
/**
|
|
* Embed the query string and perform a similarity search across all notes
|
|
* in the LanceDB 'notes' table. Returns up to `limit` results sorted by
|
|
* distance (closest first).
|
|
*
|
|
* Returns an empty array if the notes table does not exist yet.
|
|
*/
|
|
export async function searchNotes(query: string, limit = 5): Promise<SearchResult[]> {
|
|
const c = getConn();
|
|
const tableNames = await c.tableNames();
|
|
|
|
if (!tableNames.includes('notes')) {
|
|
return [];
|
|
}
|
|
|
|
const queryVector = await embedText(query);
|
|
const table = await c.openTable('notes');
|
|
const results = await table.search(queryVector).limit(limit).execute();
|
|
|
|
return results.map((r) => ({
|
|
id: r.id as string,
|
|
projectId: r.projectId as string,
|
|
content: r.content as string,
|
|
_distance: r._distance as number,
|
|
}));
|
|
}
|