feat: integrate vectordb for note embeddings
- Added `vectordb` as a dependency in `package.json`. - Implemented `embedText` function in `src/main/ai/embeddings.ts` to handle text embeddings using GitHub Copilot OAuth token or OpenAI token. - Created `vectordb.ts` for managing LanceDB connection and embedding notes with upsert strategy. - Updated `index.ts` to initialize vector database and migrate existing notes on app ready. - Modified `router/index.ts` to fire-and-forget embedding calls on note creation and updates. - Enhanced `progress.txt` with detailed implementation notes and learnings regarding the integration.
This commit is contained in:
73
src/main/ai/embeddings.ts
Normal file
73
src/main/ai/embeddings.ts
Normal file
@@ -0,0 +1,73 @@
|
||||
import fs from 'node:fs';
|
||||
import os from 'node:os';
|
||||
import path from 'node:path';
|
||||
import { getToken } from './token';
|
||||
|
||||
interface CopilotConfig {
|
||||
copilot_tokens?: Record<string, string>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the GitHub Copilot OAuth token from the CLI config file.
|
||||
* Stored at ~/.copilot/config.json under copilot_tokens["{host}:{login}"].
|
||||
* Returns the first available token, or null if unavailable.
|
||||
*/
|
||||
function readCopilotToken(): string | null {
|
||||
try {
|
||||
const raw = fs.readFileSync(
|
||||
path.join(os.homedir(), '.copilot', 'config.json'),
|
||||
'utf-8',
|
||||
);
|
||||
const cfg = JSON.parse(raw) as CopilotConfig;
|
||||
const vals = Object.values(cfg.copilot_tokens ?? {});
|
||||
return vals[0] ?? null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Embed a single text string using the best available credentials.
|
||||
*
|
||||
* Priority:
|
||||
* 1. GitHub Copilot CLI token → OpenAI-compatible embeddings endpoint at
|
||||
* https://api.githubcopilot.com
|
||||
* 2. Stored OpenAI token → standard OpenAI embeddings API
|
||||
*
|
||||
* Throws if no credentials are available or the API call fails.
|
||||
* Callers must .catch() this and handle the error without rejecting
|
||||
* the surrounding tRPC mutation.
|
||||
*/
|
||||
export async function embedText(text: string): Promise<number[]> {
|
||||
const { OpenAIEmbeddings } = await import('@langchain/openai');
|
||||
|
||||
const copilotToken = readCopilotToken();
|
||||
|
||||
let embeddingsInstance;
|
||||
if (copilotToken) {
|
||||
embeddingsInstance = new OpenAIEmbeddings({
|
||||
apiKey: copilotToken,
|
||||
model: 'text-embedding-3-small',
|
||||
configuration: { baseURL: 'https://api.githubcopilot.com' },
|
||||
});
|
||||
} else {
|
||||
const openaiToken = await getToken('openai');
|
||||
if (!openaiToken) {
|
||||
throw new Error(
|
||||
'[Embeddings] No credentials available. Authenticate with Copilot CLI or add an OpenAI token in Settings.',
|
||||
);
|
||||
}
|
||||
embeddingsInstance = new OpenAIEmbeddings({
|
||||
apiKey: openaiToken,
|
||||
model: 'text-embedding-3-small',
|
||||
});
|
||||
}
|
||||
|
||||
// embedDocuments returns number[][] — cast explicitly to satisfy strict TS
|
||||
const results = (await embeddingsInstance.embedDocuments([text])) as number[][];
|
||||
const vector = results[0] as number[] | undefined;
|
||||
if (!vector || vector.length === 0) {
|
||||
throw new Error('[Embeddings] Empty vector returned from embedding API');
|
||||
}
|
||||
return vector;
|
||||
}
|
||||
113
src/main/db/vectordb.ts
Normal file
113
src/main/db/vectordb.ts
Normal file
@@ -0,0 +1,113 @@
|
||||
import * as lancedb from 'vectordb';
|
||||
import { app } from 'electron';
|
||||
import path from 'node:path';
|
||||
import { getDb } from './index';
|
||||
import { notes } from './schema';
|
||||
import { embedText } from '../ai/embeddings';
|
||||
|
||||
interface NoteRecord {
|
||||
id: string;
|
||||
/** Empty string when the note has no project (Arrow string fields don't cleanly handle null) */
|
||||
projectId: string;
|
||||
content: string;
|
||||
vector: number[];
|
||||
}
|
||||
|
||||
let conn: lancedb.Connection | null = null;
|
||||
|
||||
/**
|
||||
* Initialize the LanceDB connection. Must be called before any other
|
||||
* function in this module. Vector data is stored at userData/vectors/.
|
||||
*/
|
||||
export async function initVectorDb(): Promise<void> {
|
||||
const vectorPath = path.join(app.getPath('userData'), 'vectors');
|
||||
conn = await lancedb.connect(vectorPath);
|
||||
console.log('[VectorDB] Connected at:', vectorPath);
|
||||
}
|
||||
|
||||
function getConn(): lancedb.Connection {
|
||||
if (!conn) throw new Error('[VectorDB] Not initialized. Call initVectorDb() first.');
|
||||
return conn;
|
||||
}
|
||||
|
||||
/**
|
||||
* Embed note content and upsert the record into the LanceDB 'notes' table.
|
||||
*
|
||||
* Upsert strategy: delete-then-add.
|
||||
* table.delete(where) is a no-op when no rows match, so this is safe for
|
||||
* both first-time inserts and subsequent updates.
|
||||
*
|
||||
* On the very first call when the table does not yet exist, createTable
|
||||
* infers the Arrow schema from the initial record.
|
||||
*
|
||||
* Throws on error — callers fire-and-forget via .catch().
|
||||
*/
|
||||
export async function upsertNoteEmbedding(
|
||||
noteId: string,
|
||||
projectId: string | null,
|
||||
content: string,
|
||||
): Promise<void> {
|
||||
const c = getConn();
|
||||
const vector = await embedText(content);
|
||||
|
||||
const record: NoteRecord = {
|
||||
id: noteId,
|
||||
projectId: projectId ?? '',
|
||||
content,
|
||||
vector,
|
||||
};
|
||||
|
||||
const tableNames = await c.tableNames();
|
||||
|
||||
if (!tableNames.includes('notes')) {
|
||||
// First embedding: createTable infers the Arrow schema from this record.
|
||||
// The vector dimension (1536 for text-embedding-3-small) is baked in here.
|
||||
await c.createTable('notes', [record]);
|
||||
console.log('[VectorDB] Created notes table');
|
||||
return;
|
||||
}
|
||||
|
||||
const table = await c.openTable<NoteRecord>('notes');
|
||||
// Note IDs are UUID v4 — only [0-9a-f-] chars, no SQL injection risk.
|
||||
await table.delete(`id = '${noteId}'`);
|
||||
await table.add([record]);
|
||||
}
|
||||
|
||||
/**
|
||||
* On first startup, check if the LanceDB 'notes' table exists.
|
||||
* If not, embed all existing SQLite notes and populate LanceDB.
|
||||
*
|
||||
* Per-note errors are caught and logged; a single failure does not
|
||||
* abort the remaining notes.
|
||||
*/
|
||||
export async function migrateNotesIfNeeded(): Promise<void> {
|
||||
const c = getConn();
|
||||
const tableNames = await c.tableNames();
|
||||
|
||||
if (tableNames.includes('notes')) {
|
||||
console.log('[VectorDB] Notes table exists, skipping migration');
|
||||
return;
|
||||
}
|
||||
|
||||
const allNotes = getDb().select().from(notes).all();
|
||||
|
||||
if (allNotes.length === 0) {
|
||||
console.log('[VectorDB] No existing notes to migrate');
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`[VectorDB] Migrating ${allNotes.length} notes...`);
|
||||
let successCount = 0;
|
||||
|
||||
for (const note of allNotes) {
|
||||
try {
|
||||
const embeddingText = `${note.title}\n\n${note.content}`;
|
||||
await upsertNoteEmbedding(note.id, note.projectId ?? null, embeddingText);
|
||||
successCount++;
|
||||
} catch (err) {
|
||||
console.error(`[VectorDB] Failed to embed note ${note.id} during migration:`, err);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`[VectorDB] Migration complete: ${successCount}/${allNotes.length} notes embedded`);
|
||||
}
|
||||
@@ -5,6 +5,7 @@ import { initDb } from './db';
|
||||
import { appRouter } from './router';
|
||||
import { createIPCHandler } from './ipc';
|
||||
import { initAI } from './ai/provider';
|
||||
import { initVectorDb, migrateNotesIfNeeded } from './db/vectordb';
|
||||
// Import to trigger provider registration before initAI() runs
|
||||
import './ai/copilot';
|
||||
|
||||
@@ -54,6 +55,10 @@ app.on('ready', () => {
|
||||
createIPCHandler({ router: appRouter, windows: [win] });
|
||||
// AI init is best-effort — never block window creation
|
||||
initAI().catch((err) => console.error('[AI] Init failed:', err));
|
||||
// Vector DB init + migration is best-effort — runs after window is shown
|
||||
initVectorDb()
|
||||
.then(() => migrateNotesIfNeeded())
|
||||
.catch((err) => console.error('[VectorDB] Init or migration failed:', err));
|
||||
});
|
||||
|
||||
// Quit when all windows are closed, except on macOS. There, it's common
|
||||
|
||||
@@ -7,6 +7,7 @@ import { clients, projects, tasks, checkpoints, notes, taskComments } from '../d
|
||||
import { getStore } from '../store';
|
||||
import { saveTokenAndInit, hasActiveToken } from '../ai/provider';
|
||||
import { orchestrate } from '../ai/orchestrator';
|
||||
import { upsertNoteEmbedding } from '../db/vectordb';
|
||||
import type { TRPCContext } from '../ipc';
|
||||
|
||||
const t = initTRPC.context<TRPCContext>().create();
|
||||
@@ -406,7 +407,7 @@ const notesRouter = router({
|
||||
|
||||
create: publicProcedure
|
||||
.input(z.object({ title: z.string(), content: z.string(), projectId: z.string().optional() }))
|
||||
.mutation(({ input }) => {
|
||||
.mutation(async ({ input }) => {
|
||||
const id = crypto.randomUUID();
|
||||
const now = Date.now();
|
||||
getDb().insert(notes).values({
|
||||
@@ -417,18 +418,37 @@ const notesRouter = router({
|
||||
createdAt: now,
|
||||
updatedAt: now,
|
||||
}).run();
|
||||
// Fire-and-forget: embed the note. Errors are logged, never thrown.
|
||||
upsertNoteEmbedding(id, input.projectId ?? null, `${input.title}\n\n${input.content}`)
|
||||
.catch((err) => console.error('[VectorDB] Failed to embed note on create:', err));
|
||||
return { id };
|
||||
}),
|
||||
|
||||
update: publicProcedure
|
||||
.input(z.object({ id: z.string(), title: z.string().optional(), content: z.string().optional() }))
|
||||
.mutation(({ input }) => {
|
||||
.mutation(async ({ input }) => {
|
||||
const set: Partial<{ title: string; content: string; updatedAt: number }> = {};
|
||||
if (input.title !== undefined) set.title = input.title;
|
||||
if (input.content !== undefined) set.content = input.content;
|
||||
// Always update updatedAt
|
||||
set.updatedAt = Date.now();
|
||||
getDb().update(notes).set(set).where(eq(notes.id, input.id)).run();
|
||||
|
||||
// Re-embed if searchable text fields changed.
|
||||
// Re-fetch from SQLite so the embedding reflects the full current note
|
||||
// (the update may have changed only one of title or content).
|
||||
if (input.title !== undefined || input.content !== undefined) {
|
||||
const updated = getDb()
|
||||
.select({ id: notes.id, projectId: notes.projectId, title: notes.title, content: notes.content })
|
||||
.from(notes)
|
||||
.where(eq(notes.id, input.id))
|
||||
.all()[0];
|
||||
if (updated) {
|
||||
upsertNoteEmbedding(updated.id, updated.projectId ?? null, `${updated.title}\n\n${updated.content}`)
|
||||
.catch((err) => console.error('[VectorDB] Failed to embed note on update:', err));
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}),
|
||||
|
||||
|
||||
Reference in New Issue
Block a user