step-6: add memory models and migration (models.py, alembic)

- User.encryption_key: per-user Fernet key generated on registration
- MemoryCore: encrypted key/value preferences
- MemoryAssociative: encrypted semantic memory + pgvector(1536) embedding
- MemoryEpisodic: encrypted session summaries
- MemoryProactive: encrypted behavioral patterns with confidence score
- Migration 004: enables pgvector extension, creates all 4 tables + ivfflat index
- auth.py register: generates Fernet key for new users
- 8 unit tests pass (SQLite in-memory, JSON embedding fallback)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-08 22:05:58 +01:00
parent 76c8f2bdad
commit c90ed58078
5 changed files with 449 additions and 1 deletions

View File

@@ -285,7 +285,7 @@ pytest tests/test_memory_models.py
``` ```
**Status**: **Status**:
- [ ] Step 6 complete - [x] Step 6 complete
**Commit**: After tests pass, commit with: **Commit**: After tests pass, commit with:
``` ```

View File

@@ -0,0 +1,144 @@
"""Add memory tables and user encryption_key column.
Memory tables:
memory_core — per-user key/value preferences (encrypted)
memory_associative — semantic memory with pgvector embedding (encrypted)
memory_episodic — session summaries (encrypted)
memory_proactive — behavioral patterns (encrypted)
Also adds encryption_key column to users table.
Revision ID: 004
Revises: 003
Create Date: 2026-03-08
"""
from __future__ import annotations
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
revision: str = "004"
down_revision: Union[str, None] = "003"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ── Enable pgvector extension (idempotent) ────────────────────────────────
op.execute("CREATE EXTENSION IF NOT EXISTS vector;")
# ── Add encryption_key to users ───────────────────────────────────────────
op.add_column(
"users",
sa.Column("encryption_key", sa.String(64), nullable=True),
)
# ── memory_core ───────────────────────────────────────────────────────────
op.create_table(
"memory_core",
sa.Column("id", sa.String(36), primary_key=True),
sa.Column(
"user_id",
sa.String(36),
sa.ForeignKey("users.id", ondelete="CASCADE"),
nullable=False,
index=True,
),
sa.Column("key", sa.String(255), nullable=False),
sa.Column("value_encrypted", sa.Text, nullable=False),
sa.Column(
"updated_at",
sa.DateTime(timezone=True),
nullable=False,
server_default=sa.func.now(),
),
)
op.create_index("ix_memory_core_user_id", "memory_core", ["user_id"])
# ── memory_associative ────────────────────────────────────────────────────
# The embedding column uses pgvector's vector(1536) type.
op.create_table(
"memory_associative",
sa.Column("id", sa.String(36), primary_key=True),
sa.Column(
"user_id",
sa.String(36),
sa.ForeignKey("users.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column("content_encrypted", sa.Text, nullable=False),
sa.Column("entity_type", sa.String(100), nullable=True),
sa.Column("entity_id", sa.String(255), nullable=True),
sa.Column(
"updated_at",
sa.DateTime(timezone=True),
nullable=False,
server_default=sa.func.now(),
),
)
# Add the pgvector column separately (not supported by generic sa types)
op.execute(
"ALTER TABLE memory_associative ADD COLUMN embedding vector(1536);"
)
op.create_index("ix_memory_associative_user_id", "memory_associative", ["user_id"])
# IVFFlat index for approximate nearest-neighbour search
op.execute(
"CREATE INDEX ix_memory_associative_embedding "
"ON memory_associative USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);"
)
# ── memory_episodic ───────────────────────────────────────────────────────
op.create_table(
"memory_episodic",
sa.Column("id", sa.String(36), primary_key=True),
sa.Column(
"user_id",
sa.String(36),
sa.ForeignKey("users.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column("summary_encrypted", sa.Text, nullable=False),
sa.Column("session_id", sa.String(255), nullable=False),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
nullable=False,
server_default=sa.func.now(),
),
)
op.create_index("ix_memory_episodic_user_id", "memory_episodic", ["user_id"])
op.create_index("ix_memory_episodic_session_id", "memory_episodic", ["session_id"])
# ── memory_proactive ──────────────────────────────────────────────────────
op.create_table(
"memory_proactive",
sa.Column("id", sa.String(36), primary_key=True),
sa.Column(
"user_id",
sa.String(36),
sa.ForeignKey("users.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column("pattern_encrypted", sa.Text, nullable=False),
sa.Column("confidence", sa.Float, nullable=False, server_default="0.5"),
sa.Column("source", sa.String(50), nullable=False, server_default="inferred"),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
nullable=False,
server_default=sa.func.now(),
),
)
op.create_index("ix_memory_proactive_user_id", "memory_proactive", ["user_id"])
def downgrade() -> None:
op.drop_table("memory_proactive")
op.drop_table("memory_episodic")
op.drop_index("ix_memory_associative_embedding", "memory_associative")
op.drop_table("memory_associative")
op.drop_table("memory_core")
op.drop_column("users", "encryption_key")

View File

@@ -13,6 +13,7 @@ import uuid
from datetime import datetime, timedelta, timezone from datetime import datetime, timedelta, timezone
import bcrypt import bcrypt
from cryptography.fernet import Fernet
from fastapi import APIRouter, Depends, HTTPException, status from fastapi import APIRouter, Depends, HTTPException, status
from jose import jwt from jose import jwt
from pydantic import BaseModel from pydantic import BaseModel
@@ -94,6 +95,7 @@ async def register(
email=body.email, email=body.email,
password_hash=_hash_password(body.password), password_hash=_hash_password(body.password),
tier="free", tier="free",
encryption_key=Fernet.generate_key().decode(),
) )
db.add(user) db.add(user)
await db.flush() # get user.id without committing await db.flush() # get user.id without committing

View File

@@ -14,6 +14,10 @@ Table inventory:
plugin_installations — per-user install records plugin_installations — per-user install records
plugin_reviews — admin review decisions plugin_reviews — admin review decisions
revenue_events — Stripe Connect 70/30 split ledger revenue_events — Stripe Connect 70/30 split ledger
memory_core — per-user persistent key/value preferences (encrypted)
memory_associative — per-user semantic memory with embeddings (encrypted)
memory_episodic — per-user session summaries (encrypted)
memory_proactive — per-user behavioral patterns (encrypted)
""" """
from __future__ import annotations from __future__ import annotations
@@ -74,6 +78,9 @@ class User(Base):
password_hash: Mapped[str] = mapped_column(String(255), nullable=False) password_hash: Mapped[str] = mapped_column(String(255), nullable=False)
tier: Mapped[str] = mapped_column(TierEnum, nullable=False, default="free") tier: Mapped[str] = mapped_column(TierEnum, nullable=False, default="free")
stripe_customer_id: Mapped[str | None] = mapped_column(String(255), nullable=True) stripe_customer_id: Mapped[str | None] = mapped_column(String(255), nullable=True)
# Per-user Fernet key (base64-urlsafe, 44 chars). Generated on registration.
# Used to encrypt/decrypt all memory rows for this user.
encryption_key: Mapped[str | None] = mapped_column(String(64), nullable=True)
created_at: Mapped[datetime] = mapped_column( created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), nullable=False, server_default=func.now() DateTime(timezone=True), nullable=False, server_default=func.now()
) )
@@ -375,3 +382,93 @@ class AgentRunLog(Base):
foreign_keys="AgentRunLog.agent_id", foreign_keys="AgentRunLog.agent_id",
overlaps="run_logs,local_agent", overlaps="run_logs,local_agent",
) )
# ── Memory models ─────────────────────────────────────────────────────────────
class MemoryCore(Base):
"""Per-user persistent key/value preferences, encrypted at rest.
Examples: preferred_language, timezone, work_style.
Decrypted in-memory only using User.encryption_key.
"""
__tablename__ = "memory_core"
id: Mapped[str] = mapped_column(Uuid(as_uuid=False), primary_key=True, default=_uuid)
user_id: Mapped[str] = mapped_column(
Uuid(as_uuid=False), ForeignKey("users.id", ondelete="CASCADE"),
nullable=False, index=True,
)
key: Mapped[str] = mapped_column(String(255), nullable=False)
value_encrypted: Mapped[str] = mapped_column(Text, nullable=False)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), nullable=False, server_default=func.now(), onupdate=func.now()
)
class MemoryAssociative(Base):
"""Per-user semantic memory: encrypted content + pgvector embedding for similarity search.
Production: ``embedding`` column is ``vector(1536)`` via pgvector.
Tests (SQLite): stored as JSON list.
"""
__tablename__ = "memory_associative"
id: Mapped[str] = mapped_column(Uuid(as_uuid=False), primary_key=True, default=_uuid)
user_id: Mapped[str] = mapped_column(
Uuid(as_uuid=False), ForeignKey("users.id", ondelete="CASCADE"),
nullable=False, index=True,
)
content_encrypted: Mapped[str] = mapped_column(Text, nullable=False)
# JSON-encoded float list in SQLite tests; vector(1536) in Postgres via migration.
embedding: Mapped[list | None] = mapped_column(JSON, nullable=True)
entity_type: Mapped[str | None] = mapped_column(String(100), nullable=True)
entity_id: Mapped[str | None] = mapped_column(String(255), nullable=True)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), nullable=False, server_default=func.now(), onupdate=func.now()
)
class MemoryEpisodic(Base):
"""Per-user session summaries, encrypted at rest.
One row per session interaction; used to recall recent conversations.
"""
__tablename__ = "memory_episodic"
id: Mapped[str] = mapped_column(Uuid(as_uuid=False), primary_key=True, default=_uuid)
user_id: Mapped[str] = mapped_column(
Uuid(as_uuid=False), ForeignKey("users.id", ondelete="CASCADE"),
nullable=False, index=True,
)
summary_encrypted: Mapped[str] = mapped_column(Text, nullable=False)
session_id: Mapped[str] = mapped_column(String(255), nullable=False, index=True)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), nullable=False, server_default=func.now()
)
class MemoryProactive(Base):
"""Per-user inferred behavioral patterns, encrypted at rest.
Confidence in [0.0, 1.0]; only patterns above threshold are injected.
Source: 'inferred' (from episodes) or 'explicit' (user-stated).
"""
__tablename__ = "memory_proactive"
id: Mapped[str] = mapped_column(Uuid(as_uuid=False), primary_key=True, default=_uuid)
user_id: Mapped[str] = mapped_column(
Uuid(as_uuid=False), ForeignKey("users.id", ondelete="CASCADE"),
nullable=False, index=True,
)
pattern_encrypted: Mapped[str] = mapped_column(Text, nullable=False)
confidence: Mapped[float] = mapped_column(Float, nullable=False, default=0.5)
source: Mapped[str] = mapped_column(String(50), nullable=False, default="inferred")
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), nullable=False, server_default=func.now()
)

205
tests/test_memory_models.py Normal file
View File

@@ -0,0 +1,205 @@
"""Tests for Step 6 — memory ORM models and User.encryption_key.
Uses the SQLite in-memory test DB (from conftest). The pgvector embedding
column is stored as JSON in tests (SQLite-compatible).
"""
from __future__ import annotations
import uuid
from datetime import datetime, timezone
import pytest
import pytest_asyncio
from cryptography.fernet import Fernet
from sqlalchemy import select
from app.models import MemoryAssociative, MemoryCore, MemoryEpisodic, MemoryProactive, User
from tests.conftest import TEST_USER_IDS
USER_ID = TEST_USER_IDS["power"]
# ── helpers ───────────────────────────────────────────────────────────────────
def _fernet_key() -> str:
return Fernet.generate_key().decode()
def _encrypt(key: str, plaintext: str) -> str:
return Fernet(key.encode()).encrypt(plaintext.encode()).decode()
def _decrypt(key: str, ciphertext: str) -> str:
return Fernet(key.encode()).decrypt(ciphertext.encode()).decode()
# ── User.encryption_key ───────────────────────────────────────────────────────
@pytest.mark.asyncio
async def test_user_encryption_key_column_exists(db_session):
"""User model has encryption_key column and it can be set."""
result = await db_session.execute(select(User).where(User.id == USER_ID))
user = result.scalar_one()
# Column exists (may be None for seeded users)
assert hasattr(user, "encryption_key")
@pytest.mark.asyncio
async def test_user_encryption_key_can_be_set(db_session):
key = _fernet_key()
result = await db_session.execute(select(User).where(User.id == USER_ID))
user = result.scalar_one()
user.encryption_key = key
await db_session.commit()
result2 = await db_session.execute(select(User).where(User.id == USER_ID))
user2 = result2.scalar_one()
assert user2.encryption_key == key
# ── MemoryCore ────────────────────────────────────────────────────────────────
@pytest.mark.asyncio
async def test_memory_core_create_and_read(db_session):
key = _fernet_key()
encrypted_val = _encrypt(key, "UTC")
row = MemoryCore(
id=str(uuid.uuid4()),
user_id=USER_ID,
key="timezone",
value_encrypted=encrypted_val,
)
db_session.add(row)
await db_session.commit()
result = await db_session.execute(
select(MemoryCore).where(MemoryCore.user_id == USER_ID)
)
fetched = result.scalar_one()
assert fetched.key == "timezone"
assert _decrypt(key, fetched.value_encrypted) == "UTC"
@pytest.mark.asyncio
async def test_memory_core_cascade_delete(db_session):
"""Deleting a user cascades to memory_core."""
row = MemoryCore(
id=str(uuid.uuid4()),
user_id=USER_ID,
key="lang",
value_encrypted="enc",
)
db_session.add(row)
await db_session.commit()
user = (await db_session.execute(select(User).where(User.id == USER_ID))).scalar_one()
await db_session.delete(user)
await db_session.commit()
remaining = (
await db_session.execute(select(MemoryCore).where(MemoryCore.user_id == USER_ID))
).scalars().all()
assert remaining == []
# ── MemoryAssociative ─────────────────────────────────────────────────────────
@pytest.mark.asyncio
async def test_memory_associative_create_and_read(db_session):
key = _fernet_key()
content = _encrypt(key, "User prefers morning meetings")
embedding = [0.1] * 1536 # fake embedding
row = MemoryAssociative(
id=str(uuid.uuid4()),
user_id=USER_ID,
content_encrypted=content,
embedding=embedding,
entity_type="preference",
entity_id=None,
)
db_session.add(row)
await db_session.commit()
result = await db_session.execute(
select(MemoryAssociative).where(MemoryAssociative.user_id == USER_ID)
)
fetched = result.scalar_one()
assert fetched.entity_type == "preference"
assert _decrypt(key, fetched.content_encrypted) == "User prefers morning meetings"
assert len(fetched.embedding) == 1536
# ── MemoryEpisodic ────────────────────────────────────────────────────────────
@pytest.mark.asyncio
async def test_memory_episodic_create_and_read(db_session):
key = _fernet_key()
session_id = str(uuid.uuid4())
summary = _encrypt(key, "User asked about Q1 tasks")
row = MemoryEpisodic(
id=str(uuid.uuid4()),
user_id=USER_ID,
summary_encrypted=summary,
session_id=session_id,
)
db_session.add(row)
await db_session.commit()
result = await db_session.execute(
select(MemoryEpisodic).where(MemoryEpisodic.session_id == session_id)
)
fetched = result.scalar_one()
assert _decrypt(key, fetched.summary_encrypted) == "User asked about Q1 tasks"
assert isinstance(fetched.created_at, datetime)
# ── MemoryProactive ───────────────────────────────────────────────────────────
@pytest.mark.asyncio
async def test_memory_proactive_create_and_read(db_session):
key = _fernet_key()
pattern = _encrypt(key, "User always assigns tasks to self")
row = MemoryProactive(
id=str(uuid.uuid4()),
user_id=USER_ID,
pattern_encrypted=pattern,
confidence=0.85,
source="inferred",
)
db_session.add(row)
await db_session.commit()
result = await db_session.execute(
select(MemoryProactive).where(MemoryProactive.user_id == USER_ID)
)
fetched = result.scalar_one()
assert fetched.confidence == pytest.approx(0.85)
assert fetched.source == "inferred"
assert _decrypt(key, fetched.pattern_encrypted) == "User always assigns tasks to self"
# ── Auth registration generates encryption_key ───────────────────────────────
def test_register_sets_encryption_key(client):
"""POST /api/v1/auth/register creates a user with a valid Fernet key."""
resp = client.post(
"/api/v1/auth/register",
json={"email": "newuser@test.com", "password": "testpassword123"},
)
assert resp.status_code == 201
# Fetch the newly created user via the access token
token = resp.json()["access_token"]
me_resp = client.get(
"/api/v1/auth/me",
headers={"Authorization": f"Bearer {token}"},
)
assert me_resp.status_code == 200
# We can't see encryption_key in the API response (not in UserProfile),
# but we verify registration didn't crash — key generation is implicit.