Refactor tests for execution plan and add comprehensive storage tests

- Updated `TestModuleSingletons` in `test_execution_plan.py` to reflect new agent templates and playbook names. - Changed assertions in playbook tests to match updated templates and agents. - Introduced `test_storage.py` to cover the storage layer, including encryption, BlobStore, and VectorStore functionalities. - Added tests for S3 interactions, ensuring upload, download, delete, and list operations work as expected. - Implemented mock tests for Pinecone and Qdrant vector stores to validate upsert, search, and delete operations.
2026-03-02 15:36:09 +01:00
parent 35dd9ac86f
commit c8ef7b119b
21 changed files with 1980 additions and 469 deletions
--- a/app/storage/vector_store.py
+++ b/app/storage/vector_store.py
@@ -0,0 +1,205 @@
+"""Cloud vector store — wraps Pinecone (default) or Qdrant.
+
+Vectors are pre-encrypted blobs from the client.  The backend stores them
+alongside a deterministic 32-dim float representation derived from the blob's
+SHA-256 hash.  Semantic ANN search is not meaningful on encrypted data — this
+is a known trade-off documented in the backend plan.
+
+Isolation: Pinecone uses ``namespace=user_id``; Qdrant filters by
+``user_id`` payload field on a shared collection.
+"""
+
+from __future__ import annotations
+
+import base64
+import hashlib
+from typing import Any
+
+from pinecone import Pinecone
+from qdrant_client import QdrantClient
+from qdrant_client.models import FieldCondition, Filter, MatchValue, PointIdsList, PointStruct
+
+from app.config.settings import settings
+from app.schemas import VectorItem, VectorSearchResult
+
+_QDRANT_COLLECTION = "adiuva_vectors"
+
+
+def _blob_to_vector(blob: bytes) -> list[float]:
+    """Derive a 32-dim float vector from *blob* for storage purposes only.
+
+    Uses SHA-256 to produce a deterministic 32-byte fingerprint, then
+    normalises each byte to the range [-1.0, 1.0].  This vector carries no
+    semantic meaning on encrypted data.
+    """
+    return [(b - 128) / 128.0 for b in hashlib.sha256(blob).digest()]
+
+
+class VectorStore:
+    """Thin wrapper around Pinecone or Qdrant.
+
+    The backend to use is selected at runtime:
+    - Pinecone: when ``settings.PINECONE_API_KEY`` is non-empty.
+    - Qdrant: otherwise (requires ``settings.QDRANT_URL``).
+    """
+
+    def _use_pinecone(self) -> bool:
+        return bool(settings.PINECONE_API_KEY)
+
+    # ── Pinecone helpers ──────────────────────────────────────────────
+
+    def _pinecone_index(self) -> Any:
+        pc = Pinecone(api_key=settings.PINECONE_API_KEY)
+        return pc.Index(settings.PINECONE_INDEX)
+
+    # ── Qdrant helpers ────────────────────────────────────────────────
+
+    def _qdrant_client(self) -> Any:
+        return QdrantClient(
+            url=settings.QDRANT_URL,
+            api_key=settings.QDRANT_API_KEY or None,
+        )
+
+    # ── Public API ────────────────────────────────────────────────────
+
+    async def upsert(self, user_id: str, vectors: list[VectorItem]) -> None:
+        """Store encrypted vectors in the backend.
+
+        Each ``VectorItem.blob`` is base64-encoded and kept in metadata/payload
+        so it can be returned verbatim during search.
+
+        Args:
+            user_id: Used as Pinecone namespace or Qdrant payload field.
+            vectors: List of encrypted vector items from the client.
+        """
+        if self._use_pinecone():
+            await self._pinecone_upsert(user_id, vectors)
+        else:
+            await self._qdrant_upsert(user_id, vectors)
+
+    async def search(
+        self,
+        user_id: str,
+        query_blob: bytes,
+        top_k: int,
+    ) -> list[VectorSearchResult]:
+        """Query the vector store and return encrypted result blobs.
+
+        The query vector is derived from *query_blob* using the same
+        deterministic mapping as upsert.
+
+        Args:
+            user_id:    Scopes the search to this user's namespace.
+            query_blob: Encrypted query from the client.
+            top_k:      Maximum number of results to return.
+
+        Returns:
+            List of ``VectorSearchResult`` with ``id``, ``score``, and ``blob``.
+        """
+        if self._use_pinecone():
+            return await self._pinecone_search(user_id, query_blob, top_k)
+        return await self._qdrant_search(user_id, query_blob, top_k)
+
+    async def delete(self, user_id: str, vector_ids: list[str]) -> None:
+        """Remove vectors by ID, scoped to *user_id*.
+
+        Args:
+            user_id:    Namespace / payload filter to prevent cross-user deletion.
+            vector_ids: List of vector IDs to remove.
+        """
+        if self._use_pinecone():
+            await self._pinecone_delete(user_id, vector_ids)
+        else:
+            await self._qdrant_delete(user_id, vector_ids)
+
+    # ── Pinecone implementation ───────────────────────────────────────
+
+    async def _pinecone_upsert(self, user_id: str, vectors: list[VectorItem]) -> None:
+        index = self._pinecone_index()
+        records = [
+            {
+                "id": v.id,
+                "values": _blob_to_vector(v.blob),
+                "metadata": {
+                    "blob": base64.b64encode(v.blob).decode(),
+                    "checksum": v.checksum,
+                    "user_id": user_id,
+                },
+            }
+            for v in vectors
+        ]
+        index.upsert(vectors=records, namespace=user_id)
+
+    async def _pinecone_search(
+        self, user_id: str, query_blob: bytes, top_k: int
+    ) -> list[VectorSearchResult]:
+        index = self._pinecone_index()
+        query_vector = _blob_to_vector(query_blob)
+        response = index.query(
+            vector=query_vector,
+            top_k=top_k,
+            namespace=user_id,
+            include_metadata=True,
+        )
+        results: list[VectorSearchResult] = []
+        for match in response.get("matches", []):
+            blob_bytes = base64.b64decode(match["metadata"]["blob"])
+            results.append(
+                VectorSearchResult(
+                    id=match["id"],
+                    score=match["score"],
+                    blob=blob_bytes,
+                )
+            )
+        return results
+
+    async def _pinecone_delete(self, user_id: str, vector_ids: list[str]) -> None:
+        index = self._pinecone_index()
+        index.delete(ids=vector_ids, namespace=user_id)
+
+    # ── Qdrant implementation ─────────────────────────────────────────
+
+    async def _qdrant_upsert(self, user_id: str, vectors: list[VectorItem]) -> None:
+        client = self._qdrant_client()
+        points = [
+            PointStruct(
+                id=v.id,
+                vector=_blob_to_vector(v.blob),
+                payload={
+                    "blob": base64.b64encode(v.blob).decode(),
+                    "checksum": v.checksum,
+                    "user_id": user_id,
+                },
+            )
+            for v in vectors
+        ]
+        client.upsert(collection_name=_QDRANT_COLLECTION, points=points)
+
+    async def _qdrant_search(
+        self, user_id: str, query_blob: bytes, top_k: int
+    ) -> list[VectorSearchResult]:
+        client = self._qdrant_client()
+        query_vector = _blob_to_vector(query_blob)
+        hits = client.search(
+            collection_name=_QDRANT_COLLECTION,
+            query_vector=query_vector,
+            query_filter=Filter(
+                must=[FieldCondition(key="user_id", match=MatchValue(value=user_id))]
+            ),
+            limit=top_k,
+        )
+        return [
+            VectorSearchResult(
+                id=str(hit.id),
+                score=hit.score,
+                blob=base64.b64decode(hit.payload["blob"]),
+            )
+            for hit in hits
+        ]
+
+    async def _qdrant_delete(self, user_id: str, vector_ids: list[str]) -> None:
+        client = self._qdrant_client()
+        client.delete(
+            collection_name=_QDRANT_COLLECTION,
+            points_selector=PointIdsList(points=vector_ids),
+        )