Files
api/app/core/memory_maintenance.py
2026-04-17 17:04:27 +02:00

103 lines
3.5 KiB
Python

"""Memory maintenance jobs — Phase 3/5.
Two entrypoints called by the scheduler (APScheduler) registered in app/main.py:
drain_extraction_queue(db) — Free-tier batch extraction (Phase 2/5).
decay_relations(db, user_id) — confidence decay + pruning for memory_relations (Phase 3).
Both are safe to call manually or from tests; they never raise.
"""
from __future__ import annotations
import logging
from datetime import datetime, timezone
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.models import MemoryRelation
logger = logging.getLogger(__name__)
# Decay parameters
_DECAY_FACTOR = 0.95 # multiply confidence by this every _DECAY_PERIOD days
_DECAY_PERIOD_DAYS = 30 # period for one decay step
_PRUNE_THRESHOLD = 0.2 # rows below this confidence are deleted
async def decay_relations(db: AsyncSession, user_id: str) -> None:
"""Apply confidence decay to all relation rows for a user.
Decay rule: confidence *= 0.95 for every 30 days since last_confirmed_at.
Rows whose confidence falls below 0.2 are deleted.
Never raises — wraps in try/except.
"""
try:
await _decay_relations_inner(db, user_id)
except Exception as exc:
logger.warning("memory_maintenance: decay_relations failed user=%s: %s", user_id, exc)
async def _decay_relations_inner(db: AsyncSession, user_id: str) -> None:
result = await db.execute(
select(MemoryRelation).where(MemoryRelation.user_id == user_id)
)
rows = result.scalars().all()
now = datetime.now(timezone.utc)
deleted = 0
decayed = 0
for row in rows:
reference = row.last_confirmed_at or row.created_at
if reference is None:
continue
# Ensure timezone-aware comparison
if reference.tzinfo is None:
reference = reference.replace(tzinfo=timezone.utc)
days_elapsed = (now - reference).days
if days_elapsed < _DECAY_PERIOD_DAYS:
continue
periods = days_elapsed // _DECAY_PERIOD_DAYS
new_confidence = row.confidence * (_DECAY_FACTOR ** periods)
if new_confidence < _PRUNE_THRESHOLD:
await db.delete(row)
deleted += 1
logger.info(
"memory_maintenance: pruned relation id=%s user=%s subject=%s predicate=%s "
"confidence=%.3f (below threshold)",
row.id, user_id, row.subject_label, row.predicate, new_confidence,
)
else:
row.confidence = new_confidence
decayed += 1
try:
await db.commit()
logger.info(
"memory_maintenance: decay_relations user=%s decayed=%d deleted=%d",
user_id, decayed, deleted,
)
except Exception as exc:
logger.warning("memory_maintenance: decay_relations commit failed user=%s: %s", user_id, exc)
await db.rollback()
async def drain_extraction_queue(db: AsyncSession) -> None:
"""Process pending ExtractionQueue rows for Free-tier users (Phase 5 stub).
Full implementation wired in Phase 5 when APScheduler is registered.
Currently logs count and returns.
"""
try:
from app.models import ExtractionQueue # noqa: PLC0415
result = await db.execute(select(ExtractionQueue))
rows = result.scalars().all()
logger.info("memory_maintenance: drain_extraction_queue pending=%d (Phase 5 cron)", len(rows))
except Exception as exc:
logger.warning("memory_maintenance: drain_extraction_queue failed: %s", exc)