feat: integrate vectordb for note embeddings
- Added `vectordb` as a dependency in `package.json`. - Implemented `embedText` function in `src/main/ai/embeddings.ts` to handle text embeddings using GitHub Copilot OAuth token or OpenAI token. - Created `vectordb.ts` for managing LanceDB connection and embedding notes with upsert strategy. - Updated `index.ts` to initialize vector database and migrate existing notes on app ready. - Modified `router/index.ts` to fire-and-forget embedding calls on note creation and updates. - Enhanced `progress.txt` with detailed implementation notes and learnings regarding the integration.
This commit is contained in:
113
src/main/db/vectordb.ts
Normal file
113
src/main/db/vectordb.ts
Normal file
@@ -0,0 +1,113 @@
|
||||
import * as lancedb from 'vectordb';
|
||||
import { app } from 'electron';
|
||||
import path from 'node:path';
|
||||
import { getDb } from './index';
|
||||
import { notes } from './schema';
|
||||
import { embedText } from '../ai/embeddings';
|
||||
|
||||
interface NoteRecord {
|
||||
id: string;
|
||||
/** Empty string when the note has no project (Arrow string fields don't cleanly handle null) */
|
||||
projectId: string;
|
||||
content: string;
|
||||
vector: number[];
|
||||
}
|
||||
|
||||
let conn: lancedb.Connection | null = null;
|
||||
|
||||
/**
|
||||
* Initialize the LanceDB connection. Must be called before any other
|
||||
* function in this module. Vector data is stored at userData/vectors/.
|
||||
*/
|
||||
export async function initVectorDb(): Promise<void> {
|
||||
const vectorPath = path.join(app.getPath('userData'), 'vectors');
|
||||
conn = await lancedb.connect(vectorPath);
|
||||
console.log('[VectorDB] Connected at:', vectorPath);
|
||||
}
|
||||
|
||||
function getConn(): lancedb.Connection {
|
||||
if (!conn) throw new Error('[VectorDB] Not initialized. Call initVectorDb() first.');
|
||||
return conn;
|
||||
}
|
||||
|
||||
/**
|
||||
* Embed note content and upsert the record into the LanceDB 'notes' table.
|
||||
*
|
||||
* Upsert strategy: delete-then-add.
|
||||
* table.delete(where) is a no-op when no rows match, so this is safe for
|
||||
* both first-time inserts and subsequent updates.
|
||||
*
|
||||
* On the very first call when the table does not yet exist, createTable
|
||||
* infers the Arrow schema from the initial record.
|
||||
*
|
||||
* Throws on error — callers fire-and-forget via .catch().
|
||||
*/
|
||||
export async function upsertNoteEmbedding(
|
||||
noteId: string,
|
||||
projectId: string | null,
|
||||
content: string,
|
||||
): Promise<void> {
|
||||
const c = getConn();
|
||||
const vector = await embedText(content);
|
||||
|
||||
const record: NoteRecord = {
|
||||
id: noteId,
|
||||
projectId: projectId ?? '',
|
||||
content,
|
||||
vector,
|
||||
};
|
||||
|
||||
const tableNames = await c.tableNames();
|
||||
|
||||
if (!tableNames.includes('notes')) {
|
||||
// First embedding: createTable infers the Arrow schema from this record.
|
||||
// The vector dimension (1536 for text-embedding-3-small) is baked in here.
|
||||
await c.createTable('notes', [record]);
|
||||
console.log('[VectorDB] Created notes table');
|
||||
return;
|
||||
}
|
||||
|
||||
const table = await c.openTable<NoteRecord>('notes');
|
||||
// Note IDs are UUID v4 — only [0-9a-f-] chars, no SQL injection risk.
|
||||
await table.delete(`id = '${noteId}'`);
|
||||
await table.add([record]);
|
||||
}
|
||||
|
||||
/**
|
||||
* On first startup, check if the LanceDB 'notes' table exists.
|
||||
* If not, embed all existing SQLite notes and populate LanceDB.
|
||||
*
|
||||
* Per-note errors are caught and logged; a single failure does not
|
||||
* abort the remaining notes.
|
||||
*/
|
||||
export async function migrateNotesIfNeeded(): Promise<void> {
|
||||
const c = getConn();
|
||||
const tableNames = await c.tableNames();
|
||||
|
||||
if (tableNames.includes('notes')) {
|
||||
console.log('[VectorDB] Notes table exists, skipping migration');
|
||||
return;
|
||||
}
|
||||
|
||||
const allNotes = getDb().select().from(notes).all();
|
||||
|
||||
if (allNotes.length === 0) {
|
||||
console.log('[VectorDB] No existing notes to migrate');
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`[VectorDB] Migrating ${allNotes.length} notes...`);
|
||||
let successCount = 0;
|
||||
|
||||
for (const note of allNotes) {
|
||||
try {
|
||||
const embeddingText = `${note.title}\n\n${note.content}`;
|
||||
await upsertNoteEmbedding(note.id, note.projectId ?? null, embeddingText);
|
||||
successCount++;
|
||||
} catch (err) {
|
||||
console.error(`[VectorDB] Failed to embed note ${note.id} during migration:`, err);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`[VectorDB] Migration complete: ${successCount}/${allNotes.length} notes embedded`);
|
||||
}
|
||||
Reference in New Issue
Block a user