diff --git a/prd.json b/prd.json index 9ba5929..76e7f76 100644 --- a/prd.json +++ b/prd.json @@ -416,8 +416,8 @@ "Typecheck passes" ], "priority": 23, - "passes": false, - "notes": "" + "passes": true, + "notes": "Completed: searchNotes() in vectordb.ts performs LanceDB similarity search (embedText query → table.search().limit(5).execute()). buildKnowledgeTools() in orchestrator.ts defines vector_search_all tool with Zod schema, joins SQLite for note title + project name, returns formatted results with 'From: [Project Name] — [Note Title]' citations. knowledgeAgent() rewritten as full tool-calling agent loop (mirrors projectAgent pattern: TOOL_CALLING_PROVIDERS check, bindTools, 5-iteration loop with ToolMessage accumulation). makeKnowledgeAgentPrompt() updated with tool docs and citation format instructions. Typecheck passes." }, { "id": "US-024", diff --git a/progress.txt b/progress.txt index a6a6ddd..1f43a9f 100644 --- a/progress.txt +++ b/progress.txt @@ -519,3 +519,17 @@ - `notes.update` allows partial field updates (title or content can be omitted). Always re-fetch the full note from SQLite after the update write to get the correct combined text for embedding - `vectordb`'s `table.delete(where)` accepts a raw SQL WHERE clause string. UUID v4 IDs are safe to interpolate directly (only `[0-9a-f-]` characters) --- + +## 2026-02-24 - US-023 +- Implemented @KnowledgeAgent semantic search across all projects +- Added `searchNotes()` to `src/main/db/vectordb.ts`: embeds query via `embedText()`, performs `table.search(vector).limit(k).execute()` on LanceDB notes table, returns `SearchResult[]` with id, projectId, content, _distance +- Added `buildKnowledgeTools()` to `src/main/ai/orchestrator.ts`: defines `vector_search_all` tool that calls `searchNotes()`, joins SQLite for note title and project name, returns formatted results with `From: [Project Name] — [Note Title]` citation headers +- Rewrote `knowledgeAgent()` from simple LLM stub to full tool-calling agent loop (mirrors projectAgent/generalAgent pattern: TOOL_CALLING_PROVIDERS check, bindTools, 5-iteration MAX_ITERATIONS loop with ToolMessage accumulation, fallback for providers without tool support) +- Updated `makeKnowledgeAgentPrompt()` with `withTools` parameter, tool documentation for `vector_search_all`, and citation format instructions +- Files changed: `src/main/db/vectordb.ts`, `src/main/ai/orchestrator.ts` +- **Learnings for future iterations:** + - `openTable('name')` in vectordb makes `search()` expect `T` as input type — omit the generic when using `search()` with a raw vector array + - The agent loop pattern (check TOOL_CALLING_PROVIDERS → buildTools → bindTools → iterate with ToolMessage) is now used consistently across all three agents (project, knowledge, general) — follow this pattern for any new agent + - LanceDB `table.search(vector).limit(k).execute()` returns objects with all stored fields plus `_distance` (L2 distance, lower = more similar) + - The `SearchResult` type is exported from `vectordb.ts` for reuse in the orchestrator — keep vector DB types in the DB module, not the AI module +--- diff --git a/src/main/ai/orchestrator.ts b/src/main/ai/orchestrator.ts index ada3e2b..4b60de0 100644 --- a/src/main/ai/orchestrator.ts +++ b/src/main/ai/orchestrator.ts @@ -13,6 +13,7 @@ import { getDb } from '../db'; import { projects, tasks, checkpoints, notes, clients } from '../db/schema'; import { getLLM } from './llm'; import { getActiveProviderName } from './provider'; +import { searchNotes, type SearchResult } from '../db/vectordb'; /** * Providers with tool calling support. @@ -307,6 +308,66 @@ function buildGlobalTools(): StructuredTool[] { return [addTaskTool] as StructuredTool[]; } +// --------------------------------------------------------------------------- +// Knowledge tools (cross-project vector search) +// --------------------------------------------------------------------------- + +function buildKnowledgeTools(): StructuredTool[] { + const db = getDb(); + + const vectorSearchAllTool = tool( + async (input: { query: string }) => { + const results: SearchResult[] = await searchNotes(input.query, 5); + + if (results.length === 0) { + return 'No matching notes found across projects.'; + } + + const enriched = results.map((r) => { + const noteRow = db + .select({ title: notes.title }) + .from(notes) + .where(eq(notes.id, r.id)) + .all()[0]; + + let projectName = 'No project'; + if (r.projectId) { + const projectRow = db + .select({ name: projects.name }) + .from(projects) + .where(eq(projects.id, r.projectId)) + .all()[0]; + if (projectRow) projectName = projectRow.name; + } + + const title = noteRow?.title ?? 'Untitled'; + const excerpt = + r.content.length > 300 ? r.content.slice(0, 300) + '…' : r.content; + + return [ + `**From: ${projectName} — ${title}**`, + `Note ID: ${r.id} | Project ID: ${r.projectId}`, + excerpt, + ].join('\n'); + }); + + return enriched.join('\n\n---\n\n'); + }, + { + name: 'vector_search_all', + description: + 'Performs a semantic search across ALL project notes in the workspace. ' + + 'Returns the top 5 most relevant notes with their project name, note title, and a text excerpt. ' + + 'Use this tool whenever the user asks a cross-project knowledge question.', + schema: z.object({ + query: z.string().describe('The search query to find relevant notes across all projects'), + }), + }, + ); + + return [vectorSearchAllTool] as StructuredTool[]; +} + // --------------------------------------------------------------------------- // System prompts // --------------------------------------------------------------------------- @@ -349,15 +410,27 @@ Help the user with their question based on this workspace context. Provide conci When discussing tasks or projects, reference them by name.`; } -function makeKnowledgeAgentPrompt(contextData: string): string { +function makeKnowledgeAgentPrompt(contextData: string, withTools = true): string { + const toolsSection = withTools ? ` +You have access to the following tools — use them proactively: +- vector_search_all: Performs semantic search across ALL project notes. Always use this tool when the user asks a knowledge question. Pass the user's question (or a refined version) as the query. + +IMPORTANT: After receiving search results, format your response with inline citations. +For each piece of information you reference, include the citation in this exact format: +From: [Project Name] — [Note Title] + +Example: +"The team decided to use React for the frontend. (From: Website Redesign — Tech Stack Decision)"` : ''; + return `You are @KnowledgeAgent, an AI assistant that searches across all project knowledge in Adiuva. You have access to the following workspace data: ${contextData} - -Note: Semantic vector search is not yet available. Answer based on the workspace summary data above. -If the user asks about specific note contents that aren't included here, let them know that full cross-project search will be available soon.`; +${toolsSection} +Your primary job is to find and synthesize information from notes across all projects. +Always use the vector_search_all tool to search for relevant notes before answering. +If no results are found, say so clearly.`; } // --------------------------------------------------------------------------- @@ -515,24 +588,82 @@ async function projectAgent(state: State): Promise> { return { messages: messageHistory, response: fallbackContent }; } -/** Node 2b: Knowledge agent — cross-project search */ +/** Node 2b: Knowledge agent — cross-project semantic search */ async function knowledgeAgent(state: State): Promise> { const llm = await getLLM(); if (!llm) throw new Error('AI provider not configured.'); const contextData = buildGlobalContext(); - const systemPrompt = makeKnowledgeAgentPrompt(contextData); - const response = await llm.invoke([ + const supportsTools = TOOL_CALLING_PROVIDERS.has(getActiveProviderName()); + const includeToolsInPrompt = supportsTools && getActiveProviderName() !== 'copilot'; + const systemPrompt = makeKnowledgeAgentPrompt(contextData, includeToolsInPrompt); + + console.log(`[Orchestrator] knowledgeAgent: provider="${getActiveProviderName()}", supportsTools=${supportsTools}`); + + if (!supportsTools) { + const response = await llm.invoke([ + new SystemMessage(systemPrompt), + new HumanMessage(state.userMessage), + ]); + const content = typeof response.content === 'string' ? response.content : ''; + return { messages: [response], response: content }; + } + + const knowledgeTools = buildKnowledgeTools(); + + console.log(`[Orchestrator] knowledgeAgent: binding ${knowledgeTools.length} tools: [${knowledgeTools.map((t) => t.name).join(', ')}]`); + + // eslint-disable-next-line @typescript-eslint/no-non-null-assertion + const llmWithTools = llm.bindTools!(knowledgeTools); + + const MAX_ITERATIONS = 5; + const messageHistory: BaseMessage[] = [ new SystemMessage(systemPrompt), new HumanMessage(state.userMessage), - ]); + ]; - const content = typeof response.content === 'string' ? response.content : ''; - return { - messages: [response], - response: content, - }; + for (let iteration = 0; iteration < MAX_ITERATIONS; iteration++) { + const response = await llmWithTools.invoke(messageHistory); + messageHistory.push(response); + + const toolCalls: ToolCall[] = AIMessage.isInstance(response) ? (response.tool_calls ?? []) : []; + + console.log(`[Orchestrator] knowledgeAgent loop iteration=${iteration}: tool_calls=[${toolCalls.map((c) => c.name).join(', ')}], content="${String(typeof response.content === 'string' ? response.content : '').slice(0, 100)}"`); + + if (toolCalls.length === 0) { + const content = typeof response.content === 'string' ? response.content : ''; + return { messages: messageHistory, response: content }; + } + + for (const toolCall of toolCalls) { + const matched = knowledgeTools.find((t) => t.name === toolCall.name); + if (!matched) { + messageHistory.push( + new ToolMessage({ + content: `Error: tool "${toolCall.name}" is not available.`, + tool_call_id: toolCall.id ?? crypto.randomUUID(), + }), + ); + continue; + } + + const output = await matched.invoke({ ...toolCall, type: 'tool_call' as const }); + const resultContent = typeof output === 'string' ? output : JSON.stringify(output); + + messageHistory.push( + new ToolMessage({ + content: resultContent, + tool_call_id: toolCall.id ?? crypto.randomUUID(), + }), + ); + } + } + + const lastAiMsg = [...messageHistory].reverse().find((m) => AIMessage.isInstance(m)); + const fallbackContent = + lastAiMsg && typeof lastAiMsg.content === 'string' ? lastAiMsg.content : ''; + return { messages: messageHistory, response: fallbackContent }; } /** Node 2c: General agent — workspace-wide questions and global task actions */ diff --git a/src/main/db/vectordb.ts b/src/main/db/vectordb.ts index 4b483d3..00de448 100644 --- a/src/main/db/vectordb.ts +++ b/src/main/db/vectordb.ts @@ -13,6 +13,13 @@ interface NoteRecord { vector: number[]; } +export interface SearchResult { + id: string; + projectId: string; + content: string; + _distance: number; +} + let conn: lancedb.Connection | null = null; /** @@ -111,3 +118,30 @@ export async function migrateNotesIfNeeded(): Promise { console.log(`[VectorDB] Migration complete: ${successCount}/${allNotes.length} notes embedded`); } + +/** + * Embed the query string and perform a similarity search across all notes + * in the LanceDB 'notes' table. Returns up to `limit` results sorted by + * distance (closest first). + * + * Returns an empty array if the notes table does not exist yet. + */ +export async function searchNotes(query: string, limit = 5): Promise { + const c = getConn(); + const tableNames = await c.tableNames(); + + if (!tableNames.includes('notes')) { + return []; + } + + const queryVector = await embedText(query); + const table = await c.openTable('notes'); + const results = await table.search(queryVector).limit(limit).execute(); + + return results.map((r) => ({ + id: r.id as string, + projectId: r.projectId as string, + content: r.content as string, + _distance: r._distance as number, + })); +}