Add skip tracking, compressed heatmap, listening log, docs, tests, and OpenAI support

Major changes: - Add skip tracking: poll currently-playing every 15s, detect skips (<30s listened) - Add listening-log and sessions API endpoints - Fix ReccoBeats client to extract spotify_id from href response - Compress heatmap from 24 hours to 6 x 4-hour blocks - Add OpenAI support in narrative service (use max_completion_tokens for new models) - Add ListeningLog component with timeline and list views - Update all frontend components to use real data (album art, play counts) - Add docker-compose external network (dockernet) support - Add comprehensive documentation (API, DATA_MODEL, ARCHITECTURE, FRONTEND) - Add unit tests for ingest and API endpoints
2026-06-25 17:18:29 +02:00 · 2025-12-30 00:15:01 +04:00
parent faee830545
commit 887e78bf47
26 changed files with 1942 additions and 662 deletions
@@ -1,6 +1,6 @@
 import asyncio
 import os
-from datetime import datetime
+from datetime import datetime, timedelta
 from sqlalchemy.orm import Session
 from .models import Track, PlayHistory, Artist
 from .database import SessionLocal
@@ -9,6 +9,17 @@ from .services.reccobeats_client import ReccoBeatsClient
 from .services.genius_client import GeniusClient
 from dateutil import parser

+
+class PlaybackTracker:
+    def __init__(self):
+        self.current_track_id = None
+        self.track_start_time = None
+        self.accumulated_listen_ms = 0
+        self.last_progress_ms = 0
+        self.last_poll_time = None
+        self.is_paused = False
+
+
 # Initialize Clients
 def get_spotify_client():
    return SpotifyClient(
@@ -17,12 +28,15 @@ def get_spotify_client():
        refresh_token=os.getenv("SPOTIFY_REFRESH_TOKEN"),
    )

+
 def get_reccobeats_client():
    return ReccoBeatsClient()

+
 def get_genius_client():
    return GeniusClient()

+
 async def ensure_artists_exist(db: Session, artists_data: list):
    """
    Ensures that all artists in the list exist in the Artist table.
@@ -36,18 +50,19 @@ async def ensure_artists_exist(db: Session, artists_data: list):
            img = None
            if "images" in a_data and a_data["images"]:
                img = a_data["images"][0]["url"]
-                
-            artist = Artist(
-                id=artist_id,
-                name=a_data["name"],
-                genres=[],
-                image_url=img
-            )
+
+            artist = Artist(id=artist_id, name=a_data["name"], genres=[], image_url=img)
            db.add(artist)
        artist_objects.append(artist)
    return artist_objects

-async def enrich_tracks(db: Session, spotify_client: SpotifyClient, recco_client: ReccoBeatsClient, genius_client: GeniusClient):
+
+async def enrich_tracks(
+    db: Session,
+    spotify_client: SpotifyClient,
+    recco_client: ReccoBeatsClient,
+    genius_client: GeniusClient,
+):
    """
    Enrichment Pipeline:
    1. Audio Features (ReccoBeats)
@@ -56,18 +71,19 @@ async def enrich_tracks(db: Session, spotify_client: SpotifyClient, recco_client
    """

    # 1. Enrich Audio Features
-    tracks_missing_features = db.query(Track).filter(Track.danceability == None).limit(50).all()
+    tracks_missing_features = (
+        db.query(Track).filter(Track.danceability == None).limit(50).all()
+    )
    if tracks_missing_features:
        print(f"Enriching {len(tracks_missing_features)} tracks with audio features...")
        ids = [t.id for t in tracks_missing_features]
        features_list = await recco_client.get_audio_features(ids)
-        
-        # Map features by ID
+
        features_map = {}
        for f in features_list:
-            # Handle potential ID mismatch or URI format
-            tid = f.get("id")
-            if tid: features_map[tid] = f
+            tid = f.get("spotify_id") or f.get("id")
+            if tid:
+                features_map[tid] = f

        for track in tracks_missing_features:
            data = features_map.get(track.id)
@@ -83,61 +99,74 @@ async def enrich_tracks(db: Session, spotify_client: SpotifyClient, recco_client
                track.liveness = data.get("liveness")
                track.valence = data.get("valence")
                track.tempo = data.get("tempo")
-        
+
        db.commit()

    # 2. Enrich Artist Genres & Images (Spotify)
-    artists_missing_data = db.query(Artist).filter((Artist.genres == None) | (Artist.image_url == None)).limit(50).all()
+    artists_missing_data = (
+        db.query(Artist)
+        .filter((Artist.genres == None) | (Artist.image_url == None))
+        .limit(50)
+        .all()
+    )
    if artists_missing_data:
        print(f"Enriching {len(artists_missing_data)} artists with genres/images...")
        artist_ids_list = [a.id for a in artists_missing_data]
-        
+
        artist_data_map = {}
        for i in range(0, len(artist_ids_list), 50):
-            chunk = artist_ids_list[i:i+50]
+            chunk = artist_ids_list[i : i + 50]
            artists_data = await spotify_client.get_artists(chunk)
            for a_data in artists_data:
                if a_data:
                    img = a_data["images"][0]["url"] if a_data.get("images") else None
                    artist_data_map[a_data["id"]] = {
                        "genres": a_data.get("genres", []),
-                        "image_url": img
+                        "image_url": img,
                    }

        for artist in artists_missing_data:
            data = artist_data_map.get(artist.id)
            if data:
-                if artist.genres is None: artist.genres = data["genres"]
-                if artist.image_url is None: artist.image_url = data["image_url"]
+                if artist.genres is None:
+                    artist.genres = data["genres"]
+                if artist.image_url is None:
+                    artist.image_url = data["image_url"]
            elif artist.genres is None:
-                artist.genres = [] # Prevent retry loop
-        
+                artist.genres = []  # Prevent retry loop
+
        db.commit()

    # 3. Enrich Lyrics (Genius)
    # Only fetch for tracks that have been played recently to avoid spamming Genius API
-    tracks_missing_lyrics = db.query(Track).filter(Track.lyrics == None).order_by(Track.updated_at.desc()).limit(10).all()
-    
+    tracks_missing_lyrics = (
+        db.query(Track)
+        .filter(Track.lyrics == None)
+        .order_by(Track.updated_at.desc())
+        .limit(10)
+        .all()
+    )
+
    if tracks_missing_lyrics and genius_client.genius:
        print(f"Enriching {len(tracks_missing_lyrics)} tracks with lyrics (Genius)...")
        for track in tracks_missing_lyrics:
            # We need the primary artist name
-            artist_name = track.artist.split(",")[0] # Heuristic: take first artist
-            
+            artist_name = track.artist.split(",")[0]  # Heuristic: take first artist
+
            print(f"Searching Genius for: {track.name} by {artist_name}")
            data = genius_client.search_song(track.name, artist_name)
-            
+
            if data:
                track.lyrics = data["lyrics"]
                # Fallback: if we didn't get high-res art from Spotify, use Genius
                if not track.image_url and data.get("image_url"):
                    track.image_url = data["image_url"]
            else:
-                track.lyrics = "" # Mark as empty to prevent retry loop
-            
+                track.lyrics = ""  # Mark as empty to prevent retry loop
+
            # Small sleep to be nice to API? GeniusClient is synchronous.
            # We are in async function but GeniusClient is blocking. It's fine for worker.
-        
+
        db.commit()


@@ -164,7 +193,7 @@ async def ingest_recently_played(db: Session):

        if not track:
            print(f"New track found: {track_data['name']}")
-            
+
            # Extract Album Art
            image_url = None
            if track_data.get("album") and track_data["album"].get("images"):
@@ -173,12 +202,12 @@ async def ingest_recently_played(db: Session):
            track = Track(
                id=track_id,
                name=track_data["name"],
-                artist=", ".join([a["name"] for a in track_data["artists"]]), 
+                artist=", ".join([a["name"] for a in track_data["artists"]]),
                album=track_data["album"]["name"],
                image_url=image_url,
                duration_ms=track_data["duration_ms"],
                popularity=track_data["popularity"],
-                raw_data=track_data
+                raw_data=track_data,
            )

            # Handle Artists Relation
@@ -191,21 +220,27 @@ async def ingest_recently_played(db: Session):

        # Ensure relationships exist logic...
        if not track.artists and track.raw_data and "artists" in track.raw_data:
-             artist_objects = await ensure_artists_exist(db, track.raw_data["artists"])
-             track.artists = artist_objects
-             db.commit()
+            artist_objects = await ensure_artists_exist(db, track.raw_data["artists"])
+            track.artists = artist_objects
+            db.commit()

-        exists = db.query(PlayHistory).filter(
-            PlayHistory.track_id == track_id,
-            PlayHistory.played_at == played_at
-        ).first()
+        exists = (
+            db.query(PlayHistory)
+            .filter(
+                PlayHistory.track_id == track_id, PlayHistory.played_at == played_at
+            )
+            .first()
+        )

        if not exists:
            print(f" recording play: {track_data['name']} at {played_at}")
            play = PlayHistory(
                track_id=track_id,
                played_at=played_at,
-                context_uri=item.get("context", {}).get("uri") if item.get("context") else None
+                context_uri=item.get("context", {}).get("uri")
+                if item.get("context")
+                else None,
+                source="recently_played",
            )
            db.add(play)

@@ -214,17 +249,145 @@ async def ingest_recently_played(db: Session):
    # Enrich
    await enrich_tracks(db, spotify_client, recco_client, genius_client)

+
 async def run_worker():
-    """Simulates a background worker loop."""
    db = SessionLocal()
+    tracker = PlaybackTracker()
+    spotify_client = get_spotify_client()
+    poll_count = 0

    try:
        while True:
-            print("Worker: Polling Spotify...")
-            await ingest_recently_played(db)
-            print("Worker: Sleeping for 60 seconds...")
-            await asyncio.sleep(60)
+            poll_count += 1
+
+            await poll_currently_playing(db, spotify_client, tracker)
+
+            if poll_count % 4 == 0:
+                print("Worker: Polling recently-played...")
+                await ingest_recently_played(db)
+
+            await asyncio.sleep(15)
    except Exception as e:
        print(f"Worker crashed: {e}")
    finally:
        db.close()
+
+
+async def poll_currently_playing(
+    db: Session, spotify_client: SpotifyClient, tracker: PlaybackTracker
+):
+    try:
+        response = await spotify_client.get_currently_playing()
+    except Exception as e:
+        print(f"Error polling currently-playing: {e}")
+        return
+
+    now = datetime.utcnow()
+
+    if not response or response.get("currently_playing_type") != "track":
+        if tracker.current_track_id and tracker.last_poll_time:
+            finalize_track(db, tracker)
+        return
+
+    item = response.get("item")
+    if not item:
+        return
+
+    current_track_id = item["id"]
+    current_progress_ms = response.get("progress_ms", 0)
+    is_playing = response.get("is_playing", False)
+
+    if current_track_id != tracker.current_track_id:
+        if tracker.current_track_id and tracker.last_poll_time:
+            finalize_track(db, tracker)
+
+        tracker.current_track_id = current_track_id
+        tracker.track_start_time = now - timedelta(milliseconds=current_progress_ms)
+        tracker.accumulated_listen_ms = current_progress_ms if is_playing else 0
+        tracker.last_progress_ms = current_progress_ms
+        tracker.last_poll_time = now
+        tracker.is_paused = not is_playing
+
+        await ensure_track_exists(db, item, spotify_client)
+    else:
+        if tracker.last_poll_time:
+            time_delta_ms = (now - tracker.last_poll_time).total_seconds() * 1000
+            if is_playing and not tracker.is_paused:
+                tracker.accumulated_listen_ms += time_delta_ms
+
+        tracker.last_progress_ms = current_progress_ms
+        tracker.last_poll_time = now
+        tracker.is_paused = not is_playing
+
+
+def finalize_track(db: Session, tracker: PlaybackTracker):
+    listened_ms = int(tracker.accumulated_listen_ms)
+    skipped = listened_ms < 30000
+
+    existing = (
+        db.query(PlayHistory)
+        .filter(
+            PlayHistory.track_id == tracker.current_track_id,
+            PlayHistory.played_at >= tracker.track_start_time - timedelta(seconds=5),
+            PlayHistory.played_at <= tracker.track_start_time + timedelta(seconds=5),
+        )
+        .first()
+    )
+
+    if existing:
+        if existing.listened_ms is None:
+            existing.listened_ms = listened_ms
+            existing.skipped = skipped
+            existing.source = "currently_playing"
+            db.commit()
+    else:
+        play = PlayHistory(
+            track_id=tracker.current_track_id,
+            played_at=tracker.track_start_time,
+            listened_ms=listened_ms,
+            skipped=skipped,
+            source="currently_playing",
+        )
+        db.add(play)
+        db.commit()
+
+    print(
+        f"Finalized: {tracker.current_track_id} listened={listened_ms}ms skipped={skipped}"
+    )
+
+    tracker.current_track_id = None
+    tracker.track_start_time = None
+    tracker.accumulated_listen_ms = 0
+    tracker.last_progress_ms = 0
+    tracker.last_poll_time = None
+    tracker.is_paused = False
+
+
+async def ensure_track_exists(
+    db: Session, track_data: dict, spotify_client: SpotifyClient
+):
+    track_id = track_data["id"]
+    track = db.query(Track).filter(Track.id == track_id).first()
+
+    if not track:
+        image_url = None
+        if track_data.get("album") and track_data["album"].get("images"):
+            image_url = track_data["album"]["images"][0]["url"]
+
+        track = Track(
+            id=track_id,
+            name=track_data["name"],
+            artist=", ".join([a["name"] for a in track_data.get("artists", [])]),
+            album=track_data.get("album", {}).get("name", "Unknown"),
+            image_url=image_url,
+            duration_ms=track_data.get("duration_ms"),
+            popularity=track_data.get("popularity"),
+            raw_data=track_data,
+        )
+
+        artists_data = track_data.get("artists", [])
+        artist_objects = await ensure_artists_exist(db, artists_data)
+        track.artists = artist_objects
+
+        db.add(track)
+        db.commit()
@@ -1,11 +1,15 @@
-from fastapi import FastAPI, Depends, HTTPException, BackgroundTasks
-from sqlalchemy.orm import Session
+from fastapi import FastAPI, Depends, HTTPException, BackgroundTasks, Query
+from sqlalchemy.orm import Session, joinedload
 from datetime import datetime, timedelta
 from typing import List, Optional
 from dotenv import load_dotenv

 from .database import engine, Base, get_db
-from .models import PlayHistory as PlayHistoryModel, Track as TrackModel, AnalysisSnapshot
+from .models import (
+    PlayHistory as PlayHistoryModel,
+    Track as TrackModel,
+    AnalysisSnapshot,
+)
 from . import schemas
 from .ingest import ingest_recently_played
 from .services.stats_service import StatsService
@@ -13,7 +17,6 @@ from .services.narrative_service import NarrativeService

 load_dotenv()

-# Create tables
 Base.metadata.create_all(bind=engine)

 from fastapi.middleware.cors import CORSMiddleware
@@ -22,37 +25,49 @@ app = FastAPI(title="Music Analyser Backend")

 app.add_middleware(
    CORSMiddleware,
-    allow_origins=["http://localhost:5173"],
+    allow_origins=["http://localhost:5173", "http://localhost:8991"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
 )

+
@app.get("/")
 def read_root():
    return {"status": "ok", "message": "Music Analyser API is running"}

+
@app.get("/history", response_model=List[schemas.PlayHistory])
 def get_history(limit: int = 50, db: Session = Depends(get_db)):
-    history = db.query(PlayHistoryModel).order_by(PlayHistoryModel.played_at.desc()).limit(limit).all()
+    history = (
+        db.query(PlayHistoryModel)
+        .order_by(PlayHistoryModel.played_at.desc())
+        .limit(limit)
+        .all()
+    )
    return history

+
@app.get("/tracks", response_model=List[schemas.Track])
 def get_tracks(limit: int = 50, db: Session = Depends(get_db)):
    tracks = db.query(TrackModel).limit(limit).all()
    return tracks

+
@app.post("/trigger-ingest")
-async def trigger_ingest(background_tasks: BackgroundTasks, db: Session = Depends(get_db)):
+async def trigger_ingest(
+    background_tasks: BackgroundTasks, db: Session = Depends(get_db)
+):
    """Triggers Spotify ingestion in the background."""
    background_tasks.add_task(ingest_recently_played, db)
    return {"status": "Ingestion started in background"}

+
@app.post("/trigger-analysis")
 def trigger_analysis(
    days: int = 30,
-    model_name: str = "gemini-2.5-flash",
-    db: Session = Depends(get_db)
+    model_name: str = "gpt-5-mini-2025-08-07",
+    db: Session = Depends(get_db),
 ):
    """
    Runs the full analysis pipeline (Stats + LLM) for the last X days.
@@ -67,7 +82,9 @@ def trigger_analysis(
        stats_json = stats_service.generate_full_report(start_date, end_date)

        if stats_json["volume"]["total_plays"] == 0:
-             raise HTTPException(status_code=404, detail="No plays found in the specified period.")
+            raise HTTPException(
+                status_code=404, detail="No plays found in the specified period."
+            )

        narrative_service = NarrativeService(model_name=model_name)
        narrative_json = narrative_service.generate_full_narrative(stats_json)
@@ -79,7 +96,7 @@ def trigger_analysis(
            period_label=f"last_{days}_days",
            metrics_payload=stats_json,
            narrative_report=narrative_json,
-            model_used=model_name
+            model_used=model_name,
        )
        db.add(snapshot)
        db.commit()
@@ -90,7 +107,7 @@ def trigger_analysis(
            "snapshot_id": snapshot.id,
            "period": {"start": start_date, "end": end_date},
            "metrics": stats_json,
-            "narrative": narrative_json
+            "narrative": narrative_json,
        }

    except HTTPException:
@@ -99,7 +116,91 @@ def trigger_analysis(
        print(f"Analysis Failed: {e}")
        raise HTTPException(status_code=500, detail=str(e))

+
@app.get("/snapshots")
 def get_snapshots(limit: int = 10, db: Session = Depends(get_db)):
-    """Retrieve past analysis snapshots."""
-    return db.query(AnalysisSnapshot).order_by(AnalysisSnapshot.date.desc()).limit(limit).all()
+    return (
+        db.query(AnalysisSnapshot)
+        .order_by(AnalysisSnapshot.date.desc())
+        .limit(limit)
+        .all()
+    )
+
+
+@app.get("/listening-log")
+def get_listening_log(
+    days: int = Query(default=7, ge=1, le=365),
+    limit: int = Query(default=200, ge=1, le=1000),
+    db: Session = Depends(get_db),
+):
+    end_date = datetime.utcnow()
+    start_date = end_date - timedelta(days=days)
+
+    plays = (
+        db.query(PlayHistoryModel)
+        .options(joinedload(PlayHistoryModel.track))
+        .filter(
+            PlayHistoryModel.played_at >= start_date,
+            PlayHistoryModel.played_at <= end_date,
+        )
+        .order_by(PlayHistoryModel.played_at.desc())
+        .limit(limit)
+        .all()
+    )
+
+    result = []
+    for i, play in enumerate(plays):
+        track = play.track
+        listened_ms = play.listened_ms
+        skipped = play.skipped
+
+        if listened_ms is None and i < len(plays) - 1:
+            next_play = plays[i + 1]
+            diff_seconds = (play.played_at - next_play.played_at).total_seconds()
+            if track and track.duration_ms:
+                duration_sec = track.duration_ms / 1000.0
+                listened_ms = int(min(diff_seconds, duration_sec) * 1000)
+                skipped = diff_seconds < 30
+
+        result.append(
+            {
+                "id": play.id,
+                "track_id": play.track_id,
+                "track_name": track.name if track else "Unknown",
+                "artist": track.artist if track else "Unknown",
+                "album": track.album if track else "Unknown",
+                "image": track.image_url if track else None,
+                "played_at": play.played_at.isoformat(),
+                "duration_ms": track.duration_ms if track else 0,
+                "listened_ms": listened_ms,
+                "skipped": skipped,
+                "context_uri": play.context_uri,
+                "source": play.source,
+            }
+        )
+
+    return {
+        "plays": result,
+        "period": {"start": start_date.isoformat(), "end": end_date.isoformat()},
+    }
+
+
+@app.get("/sessions")
+def get_sessions(
+    days: int = Query(default=7, ge=1, le=365), db: Session = Depends(get_db)
+):
+    end_date = datetime.utcnow()
+    start_date = end_date - timedelta(days=days)
+
+    stats_service = StatsService(db)
+    session_stats = stats_service.compute_session_stats(start_date, end_date)
+
+    return {
+        "sessions": session_stats.get("session_list", []),
+        "summary": {
+            "count": session_stats.get("count", 0),
+            "avg_minutes": session_stats.get("avg_minutes", 0),
+            "micro_rate": session_stats.get("micro_session_rate", 0),
+            "marathon_rate": session_stats.get("marathon_session_rate", 0),
+        },
+    }
@@ -1,35 +1,50 @@
-from sqlalchemy import Column, Integer, String, DateTime, JSON, ForeignKey, Float, Table, Text
+from sqlalchemy import (
+    Boolean,
+    Column,
+    Integer,
+    String,
+    DateTime,
+    JSON,
+    ForeignKey,
+    Float,
+    Table,
+    Text,
+)
 from sqlalchemy.orm import relationship
 from datetime import datetime
 from .database import Base

 # Association Table for Many-to-Many Relationship between Track and Artist
 track_artists = Table(
-    'track_artists',
+    "track_artists",
    Base.metadata,
-    Column('track_id', String, ForeignKey('tracks.id'), primary_key=True),
-    Column('artist_id', String, ForeignKey('artists.id'), primary_key=True)
+    Column("track_id", String, ForeignKey("tracks.id"), primary_key=True),
+    Column("artist_id", String, ForeignKey("artists.id"), primary_key=True),
 )

+
 class Artist(Base):
    __tablename__ = "artists"

-    id = Column(String, primary_key=True, index=True) # Spotify ID
+    id = Column(String, primary_key=True, index=True)  # Spotify ID
    name = Column(String)
-    genres = Column(JSON, nullable=True) # List of genre strings
-    image_url = Column(String, nullable=True) # Artist profile image
+    genres = Column(JSON, nullable=True)  # List of genre strings
+    image_url = Column(String, nullable=True)  # Artist profile image

    # Relationships
    tracks = relationship("Track", secondary=track_artists, back_populates="artists")

+
 class Track(Base):
    __tablename__ = "tracks"

-    id = Column(String, primary_key=True, index=True) # Spotify ID
+    id = Column(String, primary_key=True, index=True)  # Spotify ID
    name = Column(String)
-    artist = Column(String) # Display string (e.g. "Drake, Future") - kept for convenience
+    artist = Column(
+        String
+    )  # Display string (e.g. "Drake, Future") - kept for convenience
    album = Column(String)
-    image_url = Column(String, nullable=True) # Album art
+    image_url = Column(String, nullable=True)  # Album art
    duration_ms = Column(Integer)
    popularity = Column(Integer, nullable=True)

@@ -55,7 +70,7 @@ class Track(Base):
    genres = Column(JSON, nullable=True)

    # AI Analysis fields
-    lyrics = Column(Text, nullable=True) # Full lyrics from Genius
+    lyrics = Column(Text, nullable=True)  # Full lyrics from Genius
    lyrics_summary = Column(String, nullable=True)
    genre_tags = Column(String, nullable=True)

@@ -71,11 +86,13 @@ class PlayHistory(Base):

    id = Column(Integer, primary_key=True, index=True)
    track_id = Column(String, ForeignKey("tracks.id"))
-    played_at = Column(DateTime, index=True) # The timestamp from Spotify
-
-    # Context (album, playlist, etc.)
+    played_at = Column(DateTime, index=True)
    context_uri = Column(String, nullable=True)

+    listened_ms = Column(Integer, nullable=True)
+    skipped = Column(Boolean, nullable=True)
+    source = Column(String, nullable=True)
+
    track = relationship("Track", back_populates="plays")


@@ -84,16 +101,19 @@ class AnalysisSnapshot(Base):
    Stores the computed statistics and LLM analysis for a given period.
    Allows for trend analysis over time.
    """
+
    __tablename__ = "analysis_snapshots"

    id = Column(Integer, primary_key=True, index=True)
-    date = Column(DateTime, default=datetime.utcnow, index=True) # When the analysis was run
+    date = Column(
+        DateTime, default=datetime.utcnow, index=True
+    )  # When the analysis was run
    period_start = Column(DateTime)
    period_end = Column(DateTime)
-    period_label = Column(String) # e.g., "last_30_days", "monthly_nov_2023"
+    period_label = Column(String)  # e.g., "last_30_days", "monthly_nov_2023"

    # The heavy lifting: stored as JSON blobs
-    metrics_payload = Column(JSON) # The input to the LLM (StatsService output)
-    narrative_report = Column(JSON) # The output from the LLM (NarrativeService output)
+    metrics_payload = Column(JSON)  # The input to the LLM (StatsService output)
+    narrative_report = Column(JSON)  # The output from the LLM (NarrativeService output)

-    model_used = Column(String, nullable=True) # e.g. "gemini-1.5-flash"
+    model_used = Column(String, nullable=True)  # e.g. "gemini-1.5-flash"
@@ -1,101 +1,154 @@
 import os
 import json
 import re
-from google import genai
-from typing import Dict, Any, List, Optional
+from typing import Dict, Any
+
+try:
+    from openai import OpenAI
+except ImportError:
+    OpenAI = None
+
+try:
+    from google import genai
+except ImportError:
+    genai = None
+

 class NarrativeService:
-    def __init__(self, model_name: str = "gemini-2.0-flash-exp"):
-        self.api_key = os.getenv("GEMINI_API_KEY")
-        self.client = genai.Client(api_key=self.api_key) if self.api_key else None
-        if not self.api_key:
-            print("WARNING: GEMINI_API_KEY not found. LLM features will fail.")
-
+    def __init__(self, model_name: str = "gpt-5-mini-2025-08-07"):
        self.model_name = model_name
+        self.provider = self._detect_provider()
+        self.client = self._init_client()
+
+    def _detect_provider(self) -> str:
+        openai_key = os.getenv("OPENAI_API_KEY") or os.getenv("OPENAI_APIKEY")
+        gemini_key = os.getenv("GEMINI_API_KEY")
+
+        if self.model_name.startswith("gpt") and openai_key and OpenAI:
+            return "openai"
+        elif gemini_key and genai:
+            return "gemini"
+        elif openai_key and OpenAI:
+            return "openai"
+        elif gemini_key and genai:
+            return "gemini"
+        return "none"
+
+    def _init_client(self):
+        if self.provider == "openai":
+            api_key = os.getenv("OPENAI_API_KEY") or os.getenv("OPENAI_APIKEY")
+            return OpenAI(api_key=api_key)
+        elif self.provider == "gemini":
+            api_key = os.getenv("GEMINI_API_KEY")
+            return genai.Client(api_key=api_key)
+        return None

    def generate_full_narrative(self, stats_json: Dict[str, Any]) -> Dict[str, Any]:
-        """
-        Orchestrates the generation of the full narrative report.
-        Currently uses a single call for consistency and speed.
-        """
-        if not self.api_key:
+        if not self.client:
+            print("WARNING: No LLM client available")
            return self._get_fallback_narrative()

        clean_stats = self._shape_payload(stats_json)
-        
-        prompt = f"""
-You are a witty, insightful, and slightly snarky music critic analyzing a user's Spotify listening data.
-Your goal is to generate a JSON report that acts as a deeper, more honest "Spotify Wrapped".
+        prompt = self._build_prompt(clean_stats)

-**CORE RULES:**
-1. **NO Mental Health Diagnoses:** Do not mention depression, anxiety, or therapy. Stick to behavioral descriptors (e.g., "introspective", "high-energy").
-2. **Be Specific:** Use the provided metrics. Don't say "You like pop," say "Your Mainstream Score of 85% suggests..."
-3. **Roast Gently:** Be playful but not cruel.
-4. **JSON Output Only:** Return strictly valid JSON.
-
-**DATA TO ANALYZE:**
-{json.dumps(clean_stats, indent=2)}
-
-**REQUIRED JSON STRUCTURE:**
-{{
-  "vibe_check": "2-3 paragraphs describing their overall listening personality this period.",
-  "patterns": ["Observation 1", "Observation 2", "Observation 3 (Look for specific habits like skipping or late-night sessions)"],
-  "persona": "A creative label (e.g., 'The Genre Chameleon', 'Nostalgic Dad-Rocker').",
-  "era_insight": "A specific comment on their Musical Age ({clean_stats.get('era', {}).get('musical_age', 'N/A')}) and Nostalgia Gap.",
-  "roast": "A 1-2 sentence playful roast about their taste.",
-  "comparison": "A short comment comparing this period to the previous one (if data exists)."
-}}
-"""
        try:
-            response = self.client.models.generate_content(
-                model=self.model_name,
-                contents=prompt,
-                config=genai.types.GenerateContentConfig(response_mime_type="application/json")
-            )
-            
-            return self._clean_and_parse_json(response.text)
-
+            if self.provider == "openai":
+                return self._call_openai(prompt)
+            elif self.provider == "gemini":
+                return self._call_gemini(prompt)
        except Exception as e:
            print(f"LLM Generation Error: {e}")
            return self._get_fallback_narrative()

+        return self._get_fallback_narrative()
+
+    def _call_openai(self, prompt: str) -> Dict[str, Any]:
+        response = self.client.chat.completions.create(
+            model=self.model_name,
+            messages=[
+                {
+                    "role": "system",
+                    "content": "You are a witty music critic. Output only valid JSON.",
+                },
+                {"role": "user", "content": prompt},
+            ],
+            response_format={"type": "json_object"},
+            max_completion_tokens=1500,
+            temperature=0.8,
+        )
+        return self._clean_and_parse_json(response.choices[0].message.content)
+
+    def _call_gemini(self, prompt: str) -> Dict[str, Any]:
+        response = self.client.models.generate_content(
+            model=self.model_name,
+            contents=prompt,
+            config=genai.types.GenerateContentConfig(
+                response_mime_type="application/json"
+            ),
+        )
+        return self._clean_and_parse_json(response.text)
+
+    def _build_prompt(self, clean_stats: Dict[str, Any]) -> str:
+        return f"""Analyze this Spotify listening data and generate a personalized report.
+
+**RULES:**
+1. NO mental health diagnoses. Use behavioral descriptors only.
+2. Be specific - reference actual metrics from the data.
+3. Be playful but not cruel.
+4. Return ONLY valid JSON.
+
+**DATA:**
+{json.dumps(clean_stats, indent=2)}
+
+**REQUIRED JSON:**
+{{
+  "vibe_check_short": "1-2 sentence hook for the hero banner.",
+  "vibe_check": "2-3 paragraphs describing their overall listening personality.",
+  "patterns": ["Observation 1", "Observation 2", "Observation 3"],
+  "persona": "A creative label (e.g., 'The Genre Chameleon').",
+  "era_insight": "Comment on Musical Age ({clean_stats.get("era", {}).get("musical_age", "N/A")}).",
+  "roast": "1-2 sentence playful roast.",
+  "comparison": "Compare to previous period if data exists."
+}}"""
+
    def _shape_payload(self, stats: Dict[str, Any]) -> Dict[str, Any]:
-        """
-        Compresses the stats JSON to save tokens and focus the LLM.
-        Removes raw lists beyond top 5/10.
-        """
        s = stats.copy()
-        
-        # Simplify Volume
+
        if "volume" in s:
-            s["volume"] = {
-                k: v for k, v in s["volume"].items() 
+            volume_copy = {
+                k: v
+                for k, v in s["volume"].items()
                if k not in ["top_tracks", "top_artists", "top_albums", "top_genres"]
            }
-            # Add back condensed top lists (just names)
-            s["volume"]["top_tracks"] = [t["name"] for t in stats["volume"].get("top_tracks", [])[:5]]
-            s["volume"]["top_artists"] = [a["name"] for a in stats["volume"].get("top_artists", [])[:5]]
-            s["volume"]["top_genres"] = [g["name"] for g in stats["volume"].get("top_genres", [])[:5]]
+            volume_copy["top_tracks"] = [
+                t["name"] for t in stats["volume"].get("top_tracks", [])[:5]
+            ]
+            volume_copy["top_artists"] = [
+                a["name"] for a in stats["volume"].get("top_artists", [])[:5]
+            ]
+            volume_copy["top_genres"] = [
+                g["name"] for g in stats["volume"].get("top_genres", [])[:5]
+            ]
+            s["volume"] = volume_copy
+
+        if "time_habits" in s:
+            s["time_habits"] = {
+                k: v for k, v in s["time_habits"].items() if k != "heatmap"
+            }
+
+        if "sessions" in s:
+            s["sessions"] = {
+                k: v for k, v in s["sessions"].items() if k != "session_list"
+            }

-        # Simplify Time (Keep distributions but maybe round them?)
-        # Keeping hourly/daily is fine, they are small arrays.
-        
-        # Simplify Vibe (Remove huge transition arrays if they accidentally leaked, though stats service handles this)
-        
-        # Remove period details if verbose
        return s

    def _clean_and_parse_json(self, raw_text: str) -> Dict[str, Any]:
-        """
-        Robust JSON extractor.
-        """
        try:
-            # 1. Try direct parse
            return json.loads(raw_text)
        except json.JSONDecodeError:
            pass

-        # 2. Extract between first { and last }
        try:
            match = re.search(r"\{.*\}", raw_text, re.DOTALL)
            if match:
@@ -107,16 +160,11 @@ Your goal is to generate a JSON report that acts as a deeper, more honest "Spoti

    def _get_fallback_narrative(self) -> Dict[str, Any]:
        return {
-            "vibe_check": "Data processing error. You're too mysterious for us to analyze right now.",
+            "vibe_check_short": "Your taste is... interesting.",
+            "vibe_check": "Data processing error. You're too mysterious to analyze right now.",
            "patterns": [],
            "persona": "The Enigma",
            "era_insight": "Time is a flat circle.",
            "roast": "You broke the machine. Congratulations.",
-            "comparison": "N/A"
+            "comparison": "N/A",
        }
-
-    # Individual accessors if needed by frontend, though full_narrative is preferred
-    def generate_vibe_check(self, stats): return self.generate_full_narrative(stats).get("vibe_check")
-    def identify_patterns(self, stats): return self.generate_full_narrative(stats).get("patterns")
-    def generate_persona(self, stats): return self.generate_full_narrative(stats).get("persona")
-    def generate_roast(self, stats): return self.generate_full_narrative(stats).get("roast")
@@ -3,16 +3,30 @@ from typing import List, Dict, Any

 RECCOBEATS_API_URL = "https://api.reccobeats.com/v1/audio-features"

+
 class ReccoBeatsClient:
    async def get_audio_features(self, spotify_ids: List[str]) -> List[Dict[str, Any]]:
        if not spotify_ids:
            return []
        ids_param = ",".join(spotify_ids)
-        async with httpx.AsyncClient() as client:
+        async with httpx.AsyncClient(timeout=30.0) as client:
            try:
-                response = await client.get(RECCOBEATS_API_URL, params={"ids": ids_param})
+                response = await client.get(
+                    RECCOBEATS_API_URL, params={"ids": ids_param}
+                )
                if response.status_code != 200:
+                    print(f"ReccoBeats API returned status {response.status_code}")
                    return []
-                return response.json().get("content", [])
-            except Exception:
+
+                content = response.json().get("content", [])
+
+                for item in content:
+                    href = item.get("href", "")
+                    if "spotify.com/track/" in href:
+                        spotify_id = href.split("/track/")[-1].split("?")[0]
+                        item["spotify_id"] = spotify_id
+
+                return content
+            except Exception as e:
+                print(f"ReccoBeats API error: {e}")
                return []
@@ -8,6 +8,7 @@ from typing import List, Dict, Any
 SPOTIFY_TOKEN_URL = "https://accounts.spotify.com/api/token"
 SPOTIFY_API_BASE = "https://api.spotify.com/v1"

+
 class SpotifyClient:
    def __init__(self, client_id: str, client_secret: str, refresh_token: str):
        self.client_id = client_id
@@ -92,3 +93,17 @@ class SpotifyClient:
                return []

            return response.json().get("artists", [])
+
+    async def get_currently_playing(self) -> Dict[str, Any] | None:
+        token = await self.get_access_token()
+        async with httpx.AsyncClient() as client:
+            response = await client.get(
+                f"{SPOTIFY_API_BASE}/me/player/currently-playing",
+                headers={"Authorization": f"Bearer {token}"},
+            )
+            if response.status_code == 204:
+                return None
+            if response.status_code != 200:
+                print(f"Error fetching currently playing: {response.text}")
+                return None
+            return response.json()