From 887e78bf478647c39d1d3ebc89cfa9177a573357 Mon Sep 17 00:00:00 2001 From: bnair123 Date: Tue, 30 Dec 2025 00:15:01 +0400 Subject: [PATCH] Add skip tracking, compressed heatmap, listening log, docs, tests, and OpenAI support Major changes: - Add skip tracking: poll currently-playing every 15s, detect skips (<30s listened) - Add listening-log and sessions API endpoints - Fix ReccoBeats client to extract spotify_id from href response - Compress heatmap from 24 hours to 6 x 4-hour blocks - Add OpenAI support in narrative service (use max_completion_tokens for new models) - Add ListeningLog component with timeline and list views - Update all frontend components to use real data (album art, play counts) - Add docker-compose external network (dockernet) support - Add comprehensive documentation (API, DATA_MODEL, ARCHITECTURE, FRONTEND) - Add unit tests for ingest and API endpoints --- .../a1b2c3d4e5f6_add_skip_tracking_columns.py | 34 + backend/app/ingest.py | 259 ++++++-- backend/app/main.py | 129 +++- backend/app/models.py | 58 +- backend/app/services/narrative_service.py | 198 +++--- backend/app/services/reccobeats_client.py | 22 +- backend/app/services/spotify_client.py | 15 + backend/app/services/stats_service.py | 607 ++++++++++++------ backend/requirements.txt | 1 + backend/tests/conftest.py | 5 + backend/tests/test_ingest.py | 113 ++++ backend/tests/test_main.py | 49 ++ backend/tests/test_stats_full.py | 155 ----- docker-compose.template.yml | 64 ++ docker-compose.yml | 14 +- docs/API.md | 125 ++++ docs/ARCHITECTURE.md | 43 ++ docs/DATA_MODEL.md | 89 +++ docs/FRONTEND.md | 61 ++ frontend/src/components/Dashboard.jsx | 48 +- frontend/src/components/HeatMap.jsx | 171 ++--- frontend/src/components/ListeningLog.jsx | 202 ++++++ frontend/src/components/NarrativeSection.jsx | 31 +- frontend/src/components/StatsGrid.jsx | 27 +- frontend/src/components/TopRotation.jsx | 17 +- frontend/src/components/VibeRadar.jsx | 67 +- 26 files changed, 1942 insertions(+), 662 deletions(-) create mode 100644 backend/alembic/versions/a1b2c3d4e5f6_add_skip_tracking_columns.py create mode 100644 backend/tests/conftest.py create mode 100644 backend/tests/test_ingest.py create mode 100644 backend/tests/test_main.py delete mode 100644 backend/tests/test_stats_full.py create mode 100644 docker-compose.template.yml create mode 100644 docs/API.md create mode 100644 docs/ARCHITECTURE.md create mode 100644 docs/DATA_MODEL.md create mode 100644 docs/FRONTEND.md create mode 100644 frontend/src/components/ListeningLog.jsx diff --git a/backend/alembic/versions/a1b2c3d4e5f6_add_skip_tracking_columns.py b/backend/alembic/versions/a1b2c3d4e5f6_add_skip_tracking_columns.py new file mode 100644 index 0000000..9ea3ce1 --- /dev/null +++ b/backend/alembic/versions/a1b2c3d4e5f6_add_skip_tracking_columns.py @@ -0,0 +1,34 @@ +"""Add skip tracking columns to play_history + +Revision ID: a1b2c3d4e5f6 +Revises: f92d8a9264d3 +Create Date: 2025-12-29 22:30:00.000000 + +""" + +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = "a1b2c3d4e5f6" +down_revision: Union[str, Sequence[str], None] = "f92d8a9264d3" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Add listened_ms, skipped, and source columns to play_history.""" + op.add_column("play_history", sa.Column("listened_ms", sa.Integer(), nullable=True)) + op.add_column("play_history", sa.Column("skipped", sa.Boolean(), nullable=True)) + op.add_column("play_history", sa.Column("source", sa.String(), nullable=True)) + # source can be: 'recently_played', 'currently_playing', 'inferred' + + +def downgrade() -> None: + """Remove skip tracking columns.""" + op.drop_column("play_history", "source") + op.drop_column("play_history", "skipped") + op.drop_column("play_history", "listened_ms") diff --git a/backend/app/ingest.py b/backend/app/ingest.py index b305196..a85a647 100644 --- a/backend/app/ingest.py +++ b/backend/app/ingest.py @@ -1,6 +1,6 @@ import asyncio import os -from datetime import datetime +from datetime import datetime, timedelta from sqlalchemy.orm import Session from .models import Track, PlayHistory, Artist from .database import SessionLocal @@ -9,6 +9,17 @@ from .services.reccobeats_client import ReccoBeatsClient from .services.genius_client import GeniusClient from dateutil import parser + +class PlaybackTracker: + def __init__(self): + self.current_track_id = None + self.track_start_time = None + self.accumulated_listen_ms = 0 + self.last_progress_ms = 0 + self.last_poll_time = None + self.is_paused = False + + # Initialize Clients def get_spotify_client(): return SpotifyClient( @@ -17,12 +28,15 @@ def get_spotify_client(): refresh_token=os.getenv("SPOTIFY_REFRESH_TOKEN"), ) + def get_reccobeats_client(): return ReccoBeatsClient() + def get_genius_client(): return GeniusClient() + async def ensure_artists_exist(db: Session, artists_data: list): """ Ensures that all artists in the list exist in the Artist table. @@ -36,18 +50,19 @@ async def ensure_artists_exist(db: Session, artists_data: list): img = None if "images" in a_data and a_data["images"]: img = a_data["images"][0]["url"] - - artist = Artist( - id=artist_id, - name=a_data["name"], - genres=[], - image_url=img - ) + + artist = Artist(id=artist_id, name=a_data["name"], genres=[], image_url=img) db.add(artist) artist_objects.append(artist) return artist_objects -async def enrich_tracks(db: Session, spotify_client: SpotifyClient, recco_client: ReccoBeatsClient, genius_client: GeniusClient): + +async def enrich_tracks( + db: Session, + spotify_client: SpotifyClient, + recco_client: ReccoBeatsClient, + genius_client: GeniusClient, +): """ Enrichment Pipeline: 1. Audio Features (ReccoBeats) @@ -56,18 +71,19 @@ async def enrich_tracks(db: Session, spotify_client: SpotifyClient, recco_client """ # 1. Enrich Audio Features - tracks_missing_features = db.query(Track).filter(Track.danceability == None).limit(50).all() + tracks_missing_features = ( + db.query(Track).filter(Track.danceability == None).limit(50).all() + ) if tracks_missing_features: print(f"Enriching {len(tracks_missing_features)} tracks with audio features...") ids = [t.id for t in tracks_missing_features] features_list = await recco_client.get_audio_features(ids) - - # Map features by ID + features_map = {} for f in features_list: - # Handle potential ID mismatch or URI format - tid = f.get("id") - if tid: features_map[tid] = f + tid = f.get("spotify_id") or f.get("id") + if tid: + features_map[tid] = f for track in tracks_missing_features: data = features_map.get(track.id) @@ -83,61 +99,74 @@ async def enrich_tracks(db: Session, spotify_client: SpotifyClient, recco_client track.liveness = data.get("liveness") track.valence = data.get("valence") track.tempo = data.get("tempo") - + db.commit() # 2. Enrich Artist Genres & Images (Spotify) - artists_missing_data = db.query(Artist).filter((Artist.genres == None) | (Artist.image_url == None)).limit(50).all() + artists_missing_data = ( + db.query(Artist) + .filter((Artist.genres == None) | (Artist.image_url == None)) + .limit(50) + .all() + ) if artists_missing_data: print(f"Enriching {len(artists_missing_data)} artists with genres/images...") artist_ids_list = [a.id for a in artists_missing_data] - + artist_data_map = {} for i in range(0, len(artist_ids_list), 50): - chunk = artist_ids_list[i:i+50] + chunk = artist_ids_list[i : i + 50] artists_data = await spotify_client.get_artists(chunk) for a_data in artists_data: if a_data: img = a_data["images"][0]["url"] if a_data.get("images") else None artist_data_map[a_data["id"]] = { "genres": a_data.get("genres", []), - "image_url": img + "image_url": img, } for artist in artists_missing_data: data = artist_data_map.get(artist.id) if data: - if artist.genres is None: artist.genres = data["genres"] - if artist.image_url is None: artist.image_url = data["image_url"] + if artist.genres is None: + artist.genres = data["genres"] + if artist.image_url is None: + artist.image_url = data["image_url"] elif artist.genres is None: - artist.genres = [] # Prevent retry loop - + artist.genres = [] # Prevent retry loop + db.commit() # 3. Enrich Lyrics (Genius) # Only fetch for tracks that have been played recently to avoid spamming Genius API - tracks_missing_lyrics = db.query(Track).filter(Track.lyrics == None).order_by(Track.updated_at.desc()).limit(10).all() - + tracks_missing_lyrics = ( + db.query(Track) + .filter(Track.lyrics == None) + .order_by(Track.updated_at.desc()) + .limit(10) + .all() + ) + if tracks_missing_lyrics and genius_client.genius: print(f"Enriching {len(tracks_missing_lyrics)} tracks with lyrics (Genius)...") for track in tracks_missing_lyrics: # We need the primary artist name - artist_name = track.artist.split(",")[0] # Heuristic: take first artist - + artist_name = track.artist.split(",")[0] # Heuristic: take first artist + print(f"Searching Genius for: {track.name} by {artist_name}") data = genius_client.search_song(track.name, artist_name) - + if data: track.lyrics = data["lyrics"] # Fallback: if we didn't get high-res art from Spotify, use Genius if not track.image_url and data.get("image_url"): track.image_url = data["image_url"] else: - track.lyrics = "" # Mark as empty to prevent retry loop - + track.lyrics = "" # Mark as empty to prevent retry loop + # Small sleep to be nice to API? GeniusClient is synchronous. # We are in async function but GeniusClient is blocking. It's fine for worker. - + db.commit() @@ -164,7 +193,7 @@ async def ingest_recently_played(db: Session): if not track: print(f"New track found: {track_data['name']}") - + # Extract Album Art image_url = None if track_data.get("album") and track_data["album"].get("images"): @@ -173,12 +202,12 @@ async def ingest_recently_played(db: Session): track = Track( id=track_id, name=track_data["name"], - artist=", ".join([a["name"] for a in track_data["artists"]]), + artist=", ".join([a["name"] for a in track_data["artists"]]), album=track_data["album"]["name"], image_url=image_url, duration_ms=track_data["duration_ms"], popularity=track_data["popularity"], - raw_data=track_data + raw_data=track_data, ) # Handle Artists Relation @@ -191,21 +220,27 @@ async def ingest_recently_played(db: Session): # Ensure relationships exist logic... if not track.artists and track.raw_data and "artists" in track.raw_data: - artist_objects = await ensure_artists_exist(db, track.raw_data["artists"]) - track.artists = artist_objects - db.commit() + artist_objects = await ensure_artists_exist(db, track.raw_data["artists"]) + track.artists = artist_objects + db.commit() - exists = db.query(PlayHistory).filter( - PlayHistory.track_id == track_id, - PlayHistory.played_at == played_at - ).first() + exists = ( + db.query(PlayHistory) + .filter( + PlayHistory.track_id == track_id, PlayHistory.played_at == played_at + ) + .first() + ) if not exists: print(f" recording play: {track_data['name']} at {played_at}") play = PlayHistory( track_id=track_id, played_at=played_at, - context_uri=item.get("context", {}).get("uri") if item.get("context") else None + context_uri=item.get("context", {}).get("uri") + if item.get("context") + else None, + source="recently_played", ) db.add(play) @@ -214,17 +249,145 @@ async def ingest_recently_played(db: Session): # Enrich await enrich_tracks(db, spotify_client, recco_client, genius_client) + async def run_worker(): - """Simulates a background worker loop.""" db = SessionLocal() + tracker = PlaybackTracker() + spotify_client = get_spotify_client() + poll_count = 0 try: while True: - print("Worker: Polling Spotify...") - await ingest_recently_played(db) - print("Worker: Sleeping for 60 seconds...") - await asyncio.sleep(60) + poll_count += 1 + + await poll_currently_playing(db, spotify_client, tracker) + + if poll_count % 4 == 0: + print("Worker: Polling recently-played...") + await ingest_recently_played(db) + + await asyncio.sleep(15) except Exception as e: print(f"Worker crashed: {e}") finally: db.close() + + +async def poll_currently_playing( + db: Session, spotify_client: SpotifyClient, tracker: PlaybackTracker +): + try: + response = await spotify_client.get_currently_playing() + except Exception as e: + print(f"Error polling currently-playing: {e}") + return + + now = datetime.utcnow() + + if not response or response.get("currently_playing_type") != "track": + if tracker.current_track_id and tracker.last_poll_time: + finalize_track(db, tracker) + return + + item = response.get("item") + if not item: + return + + current_track_id = item["id"] + current_progress_ms = response.get("progress_ms", 0) + is_playing = response.get("is_playing", False) + + if current_track_id != tracker.current_track_id: + if tracker.current_track_id and tracker.last_poll_time: + finalize_track(db, tracker) + + tracker.current_track_id = current_track_id + tracker.track_start_time = now - timedelta(milliseconds=current_progress_ms) + tracker.accumulated_listen_ms = current_progress_ms if is_playing else 0 + tracker.last_progress_ms = current_progress_ms + tracker.last_poll_time = now + tracker.is_paused = not is_playing + + await ensure_track_exists(db, item, spotify_client) + else: + if tracker.last_poll_time: + time_delta_ms = (now - tracker.last_poll_time).total_seconds() * 1000 + if is_playing and not tracker.is_paused: + tracker.accumulated_listen_ms += time_delta_ms + + tracker.last_progress_ms = current_progress_ms + tracker.last_poll_time = now + tracker.is_paused = not is_playing + + +def finalize_track(db: Session, tracker: PlaybackTracker): + listened_ms = int(tracker.accumulated_listen_ms) + skipped = listened_ms < 30000 + + existing = ( + db.query(PlayHistory) + .filter( + PlayHistory.track_id == tracker.current_track_id, + PlayHistory.played_at >= tracker.track_start_time - timedelta(seconds=5), + PlayHistory.played_at <= tracker.track_start_time + timedelta(seconds=5), + ) + .first() + ) + + if existing: + if existing.listened_ms is None: + existing.listened_ms = listened_ms + existing.skipped = skipped + existing.source = "currently_playing" + db.commit() + else: + play = PlayHistory( + track_id=tracker.current_track_id, + played_at=tracker.track_start_time, + listened_ms=listened_ms, + skipped=skipped, + source="currently_playing", + ) + db.add(play) + db.commit() + + print( + f"Finalized: {tracker.current_track_id} listened={listened_ms}ms skipped={skipped}" + ) + + tracker.current_track_id = None + tracker.track_start_time = None + tracker.accumulated_listen_ms = 0 + tracker.last_progress_ms = 0 + tracker.last_poll_time = None + tracker.is_paused = False + + +async def ensure_track_exists( + db: Session, track_data: dict, spotify_client: SpotifyClient +): + track_id = track_data["id"] + track = db.query(Track).filter(Track.id == track_id).first() + + if not track: + image_url = None + if track_data.get("album") and track_data["album"].get("images"): + image_url = track_data["album"]["images"][0]["url"] + + track = Track( + id=track_id, + name=track_data["name"], + artist=", ".join([a["name"] for a in track_data.get("artists", [])]), + album=track_data.get("album", {}).get("name", "Unknown"), + image_url=image_url, + duration_ms=track_data.get("duration_ms"), + popularity=track_data.get("popularity"), + raw_data=track_data, + ) + + artists_data = track_data.get("artists", []) + artist_objects = await ensure_artists_exist(db, artists_data) + track.artists = artist_objects + + db.add(track) + db.commit() diff --git a/backend/app/main.py b/backend/app/main.py index ea624f8..2da654d 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -1,11 +1,15 @@ -from fastapi import FastAPI, Depends, HTTPException, BackgroundTasks -from sqlalchemy.orm import Session +from fastapi import FastAPI, Depends, HTTPException, BackgroundTasks, Query +from sqlalchemy.orm import Session, joinedload from datetime import datetime, timedelta from typing import List, Optional from dotenv import load_dotenv from .database import engine, Base, get_db -from .models import PlayHistory as PlayHistoryModel, Track as TrackModel, AnalysisSnapshot +from .models import ( + PlayHistory as PlayHistoryModel, + Track as TrackModel, + AnalysisSnapshot, +) from . import schemas from .ingest import ingest_recently_played from .services.stats_service import StatsService @@ -13,7 +17,6 @@ from .services.narrative_service import NarrativeService load_dotenv() -# Create tables Base.metadata.create_all(bind=engine) from fastapi.middleware.cors import CORSMiddleware @@ -22,37 +25,49 @@ app = FastAPI(title="Music Analyser Backend") app.add_middleware( CORSMiddleware, - allow_origins=["http://localhost:5173"], + allow_origins=["http://localhost:5173", "http://localhost:8991"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) + @app.get("/") def read_root(): return {"status": "ok", "message": "Music Analyser API is running"} + @app.get("/history", response_model=List[schemas.PlayHistory]) def get_history(limit: int = 50, db: Session = Depends(get_db)): - history = db.query(PlayHistoryModel).order_by(PlayHistoryModel.played_at.desc()).limit(limit).all() + history = ( + db.query(PlayHistoryModel) + .order_by(PlayHistoryModel.played_at.desc()) + .limit(limit) + .all() + ) return history + @app.get("/tracks", response_model=List[schemas.Track]) def get_tracks(limit: int = 50, db: Session = Depends(get_db)): tracks = db.query(TrackModel).limit(limit).all() return tracks + @app.post("/trigger-ingest") -async def trigger_ingest(background_tasks: BackgroundTasks, db: Session = Depends(get_db)): +async def trigger_ingest( + background_tasks: BackgroundTasks, db: Session = Depends(get_db) +): """Triggers Spotify ingestion in the background.""" background_tasks.add_task(ingest_recently_played, db) return {"status": "Ingestion started in background"} + @app.post("/trigger-analysis") def trigger_analysis( days: int = 30, - model_name: str = "gemini-2.5-flash", - db: Session = Depends(get_db) + model_name: str = "gpt-5-mini-2025-08-07", + db: Session = Depends(get_db), ): """ Runs the full analysis pipeline (Stats + LLM) for the last X days. @@ -67,7 +82,9 @@ def trigger_analysis( stats_json = stats_service.generate_full_report(start_date, end_date) if stats_json["volume"]["total_plays"] == 0: - raise HTTPException(status_code=404, detail="No plays found in the specified period.") + raise HTTPException( + status_code=404, detail="No plays found in the specified period." + ) narrative_service = NarrativeService(model_name=model_name) narrative_json = narrative_service.generate_full_narrative(stats_json) @@ -79,7 +96,7 @@ def trigger_analysis( period_label=f"last_{days}_days", metrics_payload=stats_json, narrative_report=narrative_json, - model_used=model_name + model_used=model_name, ) db.add(snapshot) db.commit() @@ -90,7 +107,7 @@ def trigger_analysis( "snapshot_id": snapshot.id, "period": {"start": start_date, "end": end_date}, "metrics": stats_json, - "narrative": narrative_json + "narrative": narrative_json, } except HTTPException: @@ -99,7 +116,91 @@ def trigger_analysis( print(f"Analysis Failed: {e}") raise HTTPException(status_code=500, detail=str(e)) + @app.get("/snapshots") def get_snapshots(limit: int = 10, db: Session = Depends(get_db)): - """Retrieve past analysis snapshots.""" - return db.query(AnalysisSnapshot).order_by(AnalysisSnapshot.date.desc()).limit(limit).all() \ No newline at end of file + return ( + db.query(AnalysisSnapshot) + .order_by(AnalysisSnapshot.date.desc()) + .limit(limit) + .all() + ) + + +@app.get("/listening-log") +def get_listening_log( + days: int = Query(default=7, ge=1, le=365), + limit: int = Query(default=200, ge=1, le=1000), + db: Session = Depends(get_db), +): + end_date = datetime.utcnow() + start_date = end_date - timedelta(days=days) + + plays = ( + db.query(PlayHistoryModel) + .options(joinedload(PlayHistoryModel.track)) + .filter( + PlayHistoryModel.played_at >= start_date, + PlayHistoryModel.played_at <= end_date, + ) + .order_by(PlayHistoryModel.played_at.desc()) + .limit(limit) + .all() + ) + + result = [] + for i, play in enumerate(plays): + track = play.track + listened_ms = play.listened_ms + skipped = play.skipped + + if listened_ms is None and i < len(plays) - 1: + next_play = plays[i + 1] + diff_seconds = (play.played_at - next_play.played_at).total_seconds() + if track and track.duration_ms: + duration_sec = track.duration_ms / 1000.0 + listened_ms = int(min(diff_seconds, duration_sec) * 1000) + skipped = diff_seconds < 30 + + result.append( + { + "id": play.id, + "track_id": play.track_id, + "track_name": track.name if track else "Unknown", + "artist": track.artist if track else "Unknown", + "album": track.album if track else "Unknown", + "image": track.image_url if track else None, + "played_at": play.played_at.isoformat(), + "duration_ms": track.duration_ms if track else 0, + "listened_ms": listened_ms, + "skipped": skipped, + "context_uri": play.context_uri, + "source": play.source, + } + ) + + return { + "plays": result, + "period": {"start": start_date.isoformat(), "end": end_date.isoformat()}, + } + + +@app.get("/sessions") +def get_sessions( + days: int = Query(default=7, ge=1, le=365), db: Session = Depends(get_db) +): + end_date = datetime.utcnow() + start_date = end_date - timedelta(days=days) + + stats_service = StatsService(db) + session_stats = stats_service.compute_session_stats(start_date, end_date) + + return { + "sessions": session_stats.get("session_list", []), + "summary": { + "count": session_stats.get("count", 0), + "avg_minutes": session_stats.get("avg_minutes", 0), + "micro_rate": session_stats.get("micro_session_rate", 0), + "marathon_rate": session_stats.get("marathon_session_rate", 0), + }, + } diff --git a/backend/app/models.py b/backend/app/models.py index 3751adf..d79b0c5 100644 --- a/backend/app/models.py +++ b/backend/app/models.py @@ -1,35 +1,50 @@ -from sqlalchemy import Column, Integer, String, DateTime, JSON, ForeignKey, Float, Table, Text +from sqlalchemy import ( + Boolean, + Column, + Integer, + String, + DateTime, + JSON, + ForeignKey, + Float, + Table, + Text, +) from sqlalchemy.orm import relationship from datetime import datetime from .database import Base # Association Table for Many-to-Many Relationship between Track and Artist track_artists = Table( - 'track_artists', + "track_artists", Base.metadata, - Column('track_id', String, ForeignKey('tracks.id'), primary_key=True), - Column('artist_id', String, ForeignKey('artists.id'), primary_key=True) + Column("track_id", String, ForeignKey("tracks.id"), primary_key=True), + Column("artist_id", String, ForeignKey("artists.id"), primary_key=True), ) + class Artist(Base): __tablename__ = "artists" - id = Column(String, primary_key=True, index=True) # Spotify ID + id = Column(String, primary_key=True, index=True) # Spotify ID name = Column(String) - genres = Column(JSON, nullable=True) # List of genre strings - image_url = Column(String, nullable=True) # Artist profile image + genres = Column(JSON, nullable=True) # List of genre strings + image_url = Column(String, nullable=True) # Artist profile image # Relationships tracks = relationship("Track", secondary=track_artists, back_populates="artists") + class Track(Base): __tablename__ = "tracks" - id = Column(String, primary_key=True, index=True) # Spotify ID + id = Column(String, primary_key=True, index=True) # Spotify ID name = Column(String) - artist = Column(String) # Display string (e.g. "Drake, Future") - kept for convenience + artist = Column( + String + ) # Display string (e.g. "Drake, Future") - kept for convenience album = Column(String) - image_url = Column(String, nullable=True) # Album art + image_url = Column(String, nullable=True) # Album art duration_ms = Column(Integer) popularity = Column(Integer, nullable=True) @@ -55,7 +70,7 @@ class Track(Base): genres = Column(JSON, nullable=True) # AI Analysis fields - lyrics = Column(Text, nullable=True) # Full lyrics from Genius + lyrics = Column(Text, nullable=True) # Full lyrics from Genius lyrics_summary = Column(String, nullable=True) genre_tags = Column(String, nullable=True) @@ -71,11 +86,13 @@ class PlayHistory(Base): id = Column(Integer, primary_key=True, index=True) track_id = Column(String, ForeignKey("tracks.id")) - played_at = Column(DateTime, index=True) # The timestamp from Spotify - - # Context (album, playlist, etc.) + played_at = Column(DateTime, index=True) context_uri = Column(String, nullable=True) + listened_ms = Column(Integer, nullable=True) + skipped = Column(Boolean, nullable=True) + source = Column(String, nullable=True) + track = relationship("Track", back_populates="plays") @@ -84,16 +101,19 @@ class AnalysisSnapshot(Base): Stores the computed statistics and LLM analysis for a given period. Allows for trend analysis over time. """ + __tablename__ = "analysis_snapshots" id = Column(Integer, primary_key=True, index=True) - date = Column(DateTime, default=datetime.utcnow, index=True) # When the analysis was run + date = Column( + DateTime, default=datetime.utcnow, index=True + ) # When the analysis was run period_start = Column(DateTime) period_end = Column(DateTime) - period_label = Column(String) # e.g., "last_30_days", "monthly_nov_2023" + period_label = Column(String) # e.g., "last_30_days", "monthly_nov_2023" # The heavy lifting: stored as JSON blobs - metrics_payload = Column(JSON) # The input to the LLM (StatsService output) - narrative_report = Column(JSON) # The output from the LLM (NarrativeService output) + metrics_payload = Column(JSON) # The input to the LLM (StatsService output) + narrative_report = Column(JSON) # The output from the LLM (NarrativeService output) - model_used = Column(String, nullable=True) # e.g. "gemini-1.5-flash" + model_used = Column(String, nullable=True) # e.g. "gemini-1.5-flash" diff --git a/backend/app/services/narrative_service.py b/backend/app/services/narrative_service.py index d80245e..c61d0a4 100644 --- a/backend/app/services/narrative_service.py +++ b/backend/app/services/narrative_service.py @@ -1,101 +1,154 @@ import os import json import re -from google import genai -from typing import Dict, Any, List, Optional +from typing import Dict, Any + +try: + from openai import OpenAI +except ImportError: + OpenAI = None + +try: + from google import genai +except ImportError: + genai = None + class NarrativeService: - def __init__(self, model_name: str = "gemini-2.0-flash-exp"): - self.api_key = os.getenv("GEMINI_API_KEY") - self.client = genai.Client(api_key=self.api_key) if self.api_key else None - if not self.api_key: - print("WARNING: GEMINI_API_KEY not found. LLM features will fail.") - + def __init__(self, model_name: str = "gpt-5-mini-2025-08-07"): self.model_name = model_name + self.provider = self._detect_provider() + self.client = self._init_client() + + def _detect_provider(self) -> str: + openai_key = os.getenv("OPENAI_API_KEY") or os.getenv("OPENAI_APIKEY") + gemini_key = os.getenv("GEMINI_API_KEY") + + if self.model_name.startswith("gpt") and openai_key and OpenAI: + return "openai" + elif gemini_key and genai: + return "gemini" + elif openai_key and OpenAI: + return "openai" + elif gemini_key and genai: + return "gemini" + return "none" + + def _init_client(self): + if self.provider == "openai": + api_key = os.getenv("OPENAI_API_KEY") or os.getenv("OPENAI_APIKEY") + return OpenAI(api_key=api_key) + elif self.provider == "gemini": + api_key = os.getenv("GEMINI_API_KEY") + return genai.Client(api_key=api_key) + return None def generate_full_narrative(self, stats_json: Dict[str, Any]) -> Dict[str, Any]: - """ - Orchestrates the generation of the full narrative report. - Currently uses a single call for consistency and speed. - """ - if not self.api_key: + if not self.client: + print("WARNING: No LLM client available") return self._get_fallback_narrative() clean_stats = self._shape_payload(stats_json) - - prompt = f""" -You are a witty, insightful, and slightly snarky music critic analyzing a user's Spotify listening data. -Your goal is to generate a JSON report that acts as a deeper, more honest "Spotify Wrapped". + prompt = self._build_prompt(clean_stats) -**CORE RULES:** -1. **NO Mental Health Diagnoses:** Do not mention depression, anxiety, or therapy. Stick to behavioral descriptors (e.g., "introspective", "high-energy"). -2. **Be Specific:** Use the provided metrics. Don't say "You like pop," say "Your Mainstream Score of 85% suggests..." -3. **Roast Gently:** Be playful but not cruel. -4. **JSON Output Only:** Return strictly valid JSON. - -**DATA TO ANALYZE:** -{json.dumps(clean_stats, indent=2)} - -**REQUIRED JSON STRUCTURE:** -{{ - "vibe_check": "2-3 paragraphs describing their overall listening personality this period.", - "patterns": ["Observation 1", "Observation 2", "Observation 3 (Look for specific habits like skipping or late-night sessions)"], - "persona": "A creative label (e.g., 'The Genre Chameleon', 'Nostalgic Dad-Rocker').", - "era_insight": "A specific comment on their Musical Age ({clean_stats.get('era', {}).get('musical_age', 'N/A')}) and Nostalgia Gap.", - "roast": "A 1-2 sentence playful roast about their taste.", - "comparison": "A short comment comparing this period to the previous one (if data exists)." -}} -""" try: - response = self.client.models.generate_content( - model=self.model_name, - contents=prompt, - config=genai.types.GenerateContentConfig(response_mime_type="application/json") - ) - - return self._clean_and_parse_json(response.text) - + if self.provider == "openai": + return self._call_openai(prompt) + elif self.provider == "gemini": + return self._call_gemini(prompt) except Exception as e: print(f"LLM Generation Error: {e}") return self._get_fallback_narrative() + return self._get_fallback_narrative() + + def _call_openai(self, prompt: str) -> Dict[str, Any]: + response = self.client.chat.completions.create( + model=self.model_name, + messages=[ + { + "role": "system", + "content": "You are a witty music critic. Output only valid JSON.", + }, + {"role": "user", "content": prompt}, + ], + response_format={"type": "json_object"}, + max_completion_tokens=1500, + temperature=0.8, + ) + return self._clean_and_parse_json(response.choices[0].message.content) + + def _call_gemini(self, prompt: str) -> Dict[str, Any]: + response = self.client.models.generate_content( + model=self.model_name, + contents=prompt, + config=genai.types.GenerateContentConfig( + response_mime_type="application/json" + ), + ) + return self._clean_and_parse_json(response.text) + + def _build_prompt(self, clean_stats: Dict[str, Any]) -> str: + return f"""Analyze this Spotify listening data and generate a personalized report. + +**RULES:** +1. NO mental health diagnoses. Use behavioral descriptors only. +2. Be specific - reference actual metrics from the data. +3. Be playful but not cruel. +4. Return ONLY valid JSON. + +**DATA:** +{json.dumps(clean_stats, indent=2)} + +**REQUIRED JSON:** +{{ + "vibe_check_short": "1-2 sentence hook for the hero banner.", + "vibe_check": "2-3 paragraphs describing their overall listening personality.", + "patterns": ["Observation 1", "Observation 2", "Observation 3"], + "persona": "A creative label (e.g., 'The Genre Chameleon').", + "era_insight": "Comment on Musical Age ({clean_stats.get("era", {}).get("musical_age", "N/A")}).", + "roast": "1-2 sentence playful roast.", + "comparison": "Compare to previous period if data exists." +}}""" + def _shape_payload(self, stats: Dict[str, Any]) -> Dict[str, Any]: - """ - Compresses the stats JSON to save tokens and focus the LLM. - Removes raw lists beyond top 5/10. - """ s = stats.copy() - - # Simplify Volume + if "volume" in s: - s["volume"] = { - k: v for k, v in s["volume"].items() + volume_copy = { + k: v + for k, v in s["volume"].items() if k not in ["top_tracks", "top_artists", "top_albums", "top_genres"] } - # Add back condensed top lists (just names) - s["volume"]["top_tracks"] = [t["name"] for t in stats["volume"].get("top_tracks", [])[:5]] - s["volume"]["top_artists"] = [a["name"] for a in stats["volume"].get("top_artists", [])[:5]] - s["volume"]["top_genres"] = [g["name"] for g in stats["volume"].get("top_genres", [])[:5]] + volume_copy["top_tracks"] = [ + t["name"] for t in stats["volume"].get("top_tracks", [])[:5] + ] + volume_copy["top_artists"] = [ + a["name"] for a in stats["volume"].get("top_artists", [])[:5] + ] + volume_copy["top_genres"] = [ + g["name"] for g in stats["volume"].get("top_genres", [])[:5] + ] + s["volume"] = volume_copy + + if "time_habits" in s: + s["time_habits"] = { + k: v for k, v in s["time_habits"].items() if k != "heatmap" + } + + if "sessions" in s: + s["sessions"] = { + k: v for k, v in s["sessions"].items() if k != "session_list" + } - # Simplify Time (Keep distributions but maybe round them?) - # Keeping hourly/daily is fine, they are small arrays. - - # Simplify Vibe (Remove huge transition arrays if they accidentally leaked, though stats service handles this) - - # Remove period details if verbose return s def _clean_and_parse_json(self, raw_text: str) -> Dict[str, Any]: - """ - Robust JSON extractor. - """ try: - # 1. Try direct parse return json.loads(raw_text) except json.JSONDecodeError: pass - # 2. Extract between first { and last } try: match = re.search(r"\{.*\}", raw_text, re.DOTALL) if match: @@ -107,16 +160,11 @@ Your goal is to generate a JSON report that acts as a deeper, more honest "Spoti def _get_fallback_narrative(self) -> Dict[str, Any]: return { - "vibe_check": "Data processing error. You're too mysterious for us to analyze right now.", + "vibe_check_short": "Your taste is... interesting.", + "vibe_check": "Data processing error. You're too mysterious to analyze right now.", "patterns": [], "persona": "The Enigma", "era_insight": "Time is a flat circle.", "roast": "You broke the machine. Congratulations.", - "comparison": "N/A" + "comparison": "N/A", } - - # Individual accessors if needed by frontend, though full_narrative is preferred - def generate_vibe_check(self, stats): return self.generate_full_narrative(stats).get("vibe_check") - def identify_patterns(self, stats): return self.generate_full_narrative(stats).get("patterns") - def generate_persona(self, stats): return self.generate_full_narrative(stats).get("persona") - def generate_roast(self, stats): return self.generate_full_narrative(stats).get("roast") \ No newline at end of file diff --git a/backend/app/services/reccobeats_client.py b/backend/app/services/reccobeats_client.py index ed4c2d5..361b91a 100644 --- a/backend/app/services/reccobeats_client.py +++ b/backend/app/services/reccobeats_client.py @@ -3,16 +3,30 @@ from typing import List, Dict, Any RECCOBEATS_API_URL = "https://api.reccobeats.com/v1/audio-features" + class ReccoBeatsClient: async def get_audio_features(self, spotify_ids: List[str]) -> List[Dict[str, Any]]: if not spotify_ids: return [] ids_param = ",".join(spotify_ids) - async with httpx.AsyncClient() as client: + async with httpx.AsyncClient(timeout=30.0) as client: try: - response = await client.get(RECCOBEATS_API_URL, params={"ids": ids_param}) + response = await client.get( + RECCOBEATS_API_URL, params={"ids": ids_param} + ) if response.status_code != 200: + print(f"ReccoBeats API returned status {response.status_code}") return [] - return response.json().get("content", []) - except Exception: + + content = response.json().get("content", []) + + for item in content: + href = item.get("href", "") + if "spotify.com/track/" in href: + spotify_id = href.split("/track/")[-1].split("?")[0] + item["spotify_id"] = spotify_id + + return content + except Exception as e: + print(f"ReccoBeats API error: {e}") return [] diff --git a/backend/app/services/spotify_client.py b/backend/app/services/spotify_client.py index 06bd697..5b5978f 100644 --- a/backend/app/services/spotify_client.py +++ b/backend/app/services/spotify_client.py @@ -8,6 +8,7 @@ from typing import List, Dict, Any SPOTIFY_TOKEN_URL = "https://accounts.spotify.com/api/token" SPOTIFY_API_BASE = "https://api.spotify.com/v1" + class SpotifyClient: def __init__(self, client_id: str, client_secret: str, refresh_token: str): self.client_id = client_id @@ -92,3 +93,17 @@ class SpotifyClient: return [] return response.json().get("artists", []) + + async def get_currently_playing(self) -> Dict[str, Any] | None: + token = await self.get_access_token() + async with httpx.AsyncClient() as client: + response = await client.get( + f"{SPOTIFY_API_BASE}/me/player/currently-playing", + headers={"Authorization": f"Bearer {token}"}, + ) + if response.status_code == 204: + return None + if response.status_code != 200: + print(f"Error fetching currently playing: {response.text}") + return None + return response.json() diff --git a/backend/app/services/stats_service.py b/backend/app/services/stats_service.py index d2a5eda..2ffcc65 100644 --- a/backend/app/services/stats_service.py +++ b/backend/app/services/stats_service.py @@ -8,11 +8,17 @@ from sklearn.cluster import KMeans from ..models import PlayHistory, Track, Artist + class StatsService: def __init__(self, db: Session): self.db = db - def compute_comparison(self, current_stats: Dict[str, Any], period_start: datetime, period_end: datetime) -> Dict[str, Any]: + def compute_comparison( + self, + current_stats: Dict[str, Any], + period_start: datetime, + period_end: datetime, + ) -> Dict[str, Any]: """ Calculates deltas vs the previous period of the same length. """ @@ -44,28 +50,38 @@ class StatsService: deltas["valence_delta"] = round(curr_v - prev_v, 2) # Popularity - if "avg_popularity" in current_stats["taste"] and "avg_popularity" in prev_taste: - deltas["popularity_delta"] = round(current_stats["taste"]["avg_popularity"] - prev_taste["avg_popularity"], 1) + if ( + "avg_popularity" in current_stats["taste"] + and "avg_popularity" in prev_taste + ): + deltas["popularity_delta"] = round( + current_stats["taste"]["avg_popularity"] - prev_taste["avg_popularity"], + 1, + ) return { "previous_period": { "start": prev_start.isoformat(), - "end": prev_end.isoformat() + "end": prev_end.isoformat(), }, - "deltas": deltas + "deltas": deltas, } - def compute_volume_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]: + def compute_volume_stats( + self, period_start: datetime, period_end: datetime + ) -> Dict[str, Any]: """ Calculates volume metrics including Concentration (HHI, Gini, Entropy) and Top Lists. """ # Eager load tracks AND artists to fix the "Artist String Problem" and performance # Use < period_end for half-open interval to avoid double counting boundaries - query = self.db.query(PlayHistory).options( - joinedload(PlayHistory.track).joinedload(Track.artists) - ).filter( - PlayHistory.played_at >= period_start, - PlayHistory.played_at < period_end + query = ( + self.db.query(PlayHistory) + .options(joinedload(PlayHistory.track).joinedload(Track.artists)) + .filter( + PlayHistory.played_at >= period_start, + PlayHistory.played_at < period_end, + ) ) plays = query.all() total_plays = len(plays) @@ -78,38 +94,41 @@ class StatsService: artist_counts = {} genre_counts = {} album_counts = {} - + # Maps for resolving names/images later without DB hits - track_map = {} + track_map = {} artist_map = {} album_map = {} - + # Helper to safely get image def get_track_image(t): - if t.image_url: return t.image_url + if t.image_url: + return t.image_url if t.raw_data and "album" in t.raw_data and "images" in t.raw_data["album"]: imgs = t.raw_data["album"]["images"] - if imgs: return imgs[0].get("url") + if imgs: + return imgs[0].get("url") return None for p in plays: t = p.track - if not t: continue + if not t: + continue total_ms += t.duration_ms if t.duration_ms else 0 - + # Track Aggregation track_counts[t.id] = track_counts.get(t.id, 0) + 1 track_map[t.id] = t # Album Aggregation # Prefer ID from raw_data, fallback to name - album_id = t.album + album_id = t.album album_name = t.album if t.raw_data and "album" in t.raw_data: album_id = t.raw_data["album"].get("id", t.album) album_name = t.raw_data["album"].get("name", t.album) - + album_counts[album_id] = album_counts.get(album_id, 0) + 1 # Store tuple of (name, image_url) if album_id not in album_map: @@ -119,8 +138,11 @@ class StatsService: for artist in t.artists: artist_counts[artist.id] = artist_counts.get(artist.id, 0) + 1 if artist.id not in artist_map: - artist_map[artist.id] = {"name": artist.name, "image": artist.image_url} - + artist_map[artist.id] = { + "name": artist.name, + "image": artist.image_url, + } + # Genre Aggregation if artist.genres: # artist.genres is a JSON list of strings @@ -136,41 +158,65 @@ class StatsService: top_tracks = [ { "name": track_map[tid].name, - "artist": ", ".join([a.name for a in track_map[tid].artists]), + "artist": ", ".join([a.name for a in track_map[tid].artists]), "image": get_track_image(track_map[tid]), - "count": c + "count": c, } - for tid, c in sorted(track_counts.items(), key=lambda x: x[1], reverse=True)[:5] + for tid, c in sorted( + track_counts.items(), key=lambda x: x[1], reverse=True + )[:5] ] top_artists = [ - {"name": artist_map[aid]["name"], "id": aid, "image": artist_map[aid]["image"], "count": c} - for aid, c in sorted(artist_counts.items(), key=lambda x: x[1], reverse=True)[:5] - ] - - top_albums = [ - {"name": album_map[aid]["name"], "image": album_map[aid]["image"], "count": c} - for aid, c in sorted(album_counts.items(), key=lambda x: x[1], reverse=True)[:5] + { + "name": artist_map[aid]["name"], + "id": aid, + "image": artist_map[aid]["image"], + "count": c, + } + for aid, c in sorted( + artist_counts.items(), key=lambda x: x[1], reverse=True + )[:5] ] - top_genres = [{"name": k, "count": v} for k, v in sorted(genre_counts.items(), key=lambda x: x[1], reverse=True)[:5]] + top_albums = [ + { + "name": album_map[aid]["name"], + "image": album_map[aid]["image"], + "count": c, + } + for aid, c in sorted( + album_counts.items(), key=lambda x: x[1], reverse=True + )[:5] + ] + + top_genres = [ + {"name": k, "count": v} + for k, v in sorted(genre_counts.items(), key=lambda x: x[1], reverse=True)[ + :5 + ] + ] # Concentration Metrics # HHI: Sum of (share)^2 - hhi = sum([s ** 2 for s in shares]) + hhi = sum([s**2 for s in shares]) # Gini Coefficient sorted_shares = sorted(shares) n = len(shares) gini = 0 if n > 0: - gini = (2 * sum((i + 1) * x for i, x in enumerate(sorted_shares))) / (n * sum(sorted_shares)) - (n + 1) / n + gini = (2 * sum((i + 1) * x for i, x in enumerate(sorted_shares))) / ( + n * sum(sorted_shares) + ) - (n + 1) / n # Genre Entropy: -SUM(p * log(p)) total_genre_occurrences = sum(genre_counts.values()) genre_entropy = 0 if total_genre_occurrences > 0: - genre_probs = [count / total_genre_occurrences for count in genre_counts.values()] + genre_probs = [ + count / total_genre_occurrences for count in genre_counts.values() + ] genre_entropy = -sum([p * math.log(p) for p in genre_probs if p > 0]) # Top 5 Share @@ -188,46 +234,73 @@ class StatsService: "top_artists": top_artists, "top_albums": top_albums, "top_genres": top_genres, - "repeat_rate": round((total_plays - unique_tracks) / total_plays, 3) if total_plays else 0, - "one_and_done_rate": round(one_and_done / unique_tracks, 3) if unique_tracks else 0, + "repeat_rate": round((total_plays - unique_tracks) / total_plays, 3) + if total_plays + else 0, + "one_and_done_rate": round(one_and_done / unique_tracks, 3) + if unique_tracks + else 0, "concentration": { "hhi": round(hhi, 4), "gini": round(gini, 4), "top_1_share": round(max(shares), 3) if shares else 0, "top_5_share": round(top_5_share, 3), - "genre_entropy": round(genre_entropy, 2) - } + "genre_entropy": round(genre_entropy, 2), + }, } - def compute_time_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]: + def compute_time_stats( + self, period_start: datetime, period_end: datetime + ) -> Dict[str, Any]: """ Includes Part-of-Day buckets, Listening Streaks, Active Days, and 2D Heatmap. """ - query = self.db.query(PlayHistory).filter( - PlayHistory.played_at >= period_start, - PlayHistory.played_at < period_end - ).order_by(PlayHistory.played_at.asc()) + query = ( + self.db.query(PlayHistory) + .filter( + PlayHistory.played_at >= period_start, + PlayHistory.played_at < period_end, + ) + .order_by(PlayHistory.played_at.asc()) + ) plays = query.all() if not plays: return {} - # Heatmap: 7 days x 24 hours + # Heatmap: 7 days x 24 hours (granular) and 7 days x 6 blocks (compressed) heatmap = [[0 for _ in range(24)] for _ in range(7)] - + # Compressed heatmap: 6 x 4-hour blocks per day + # Blocks: 0-4 (Night), 4-8 (Early Morning), 8-12 (Morning), 12-16 (Afternoon), 16-20 (Evening), 20-24 (Night) + heatmap_compressed = [[0 for _ in range(6)] for _ in range(7)] + block_labels = [ + "12am-4am", + "4am-8am", + "8am-12pm", + "12pm-4pm", + "4pm-8pm", + "8pm-12am", + ] + hourly_counts = [0] * 24 weekday_counts = [0] * 7 - + part_of_day = {"morning": 0, "afternoon": 0, "evening": 0, "night": 0} active_dates = set() for p in plays: h = p.played_at.hour d = p.played_at.weekday() - - # Populate Heatmap + + # Populate Heatmap (granular) heatmap[d][h] += 1 - + + # Populate compressed heatmap (4-hour blocks) + block_idx = ( + h // 4 + ) # 0-3 -> 0, 4-7 -> 1, 8-11 -> 2, 12-15 -> 3, 16-19 -> 4, 20-23 -> 5 + heatmap_compressed[d][block_idx] += 1 + hourly_counts[h] += 1 weekday_counts[d] += 1 active_dates.add(p.played_at.date()) @@ -261,26 +334,38 @@ class StatsService: active_days_count = len(active_dates) return { - "heatmap": heatmap, # 7x24 Matrix + "heatmap": heatmap, + "heatmap_compressed": heatmap_compressed, + "block_labels": block_labels, "hourly_distribution": hourly_counts, "peak_hour": hourly_counts.index(max(hourly_counts)), "weekday_distribution": weekday_counts, + "daily_distribution": weekday_counts, "weekend_share": round(weekend_plays / len(plays), 2), "part_of_day": part_of_day, "listening_streak": current_streak, "longest_streak": longest_streak, "active_days": active_days_count, - "avg_plays_per_active_day": round(len(plays) / active_days_count, 1) if active_days_count else 0 + "avg_plays_per_active_day": round(len(plays) / active_days_count, 1) + if active_days_count + else 0, } - def compute_session_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]: + def compute_session_stats( + self, period_start: datetime, period_end: datetime + ) -> Dict[str, Any]: """ Includes Micro-sessions, Marathon sessions, Energy Arcs, Median metrics, and Session List. """ - query = self.db.query(PlayHistory).options(joinedload(PlayHistory.track)).filter( - PlayHistory.played_at >= period_start, - PlayHistory.played_at < period_end - ).order_by(PlayHistory.played_at.asc()) + query = ( + self.db.query(PlayHistory) + .options(joinedload(PlayHistory.track)) + .filter( + PlayHistory.played_at >= period_start, + PlayHistory.played_at < period_end, + ) + .order_by(PlayHistory.played_at.asc()) + ) plays = query.all() if not plays: @@ -291,7 +376,7 @@ class StatsService: # 1. Sessionization (Gap > 20 mins) for i in range(1, len(plays)): - diff = (plays[i].played_at - plays[i-1].played_at).total_seconds() / 60 + diff = (plays[i].played_at - plays[i - 1].played_at).total_seconds() / 60 if diff > 20: sessions.append(current_session) current_session = [] @@ -304,13 +389,13 @@ class StatsService: marathon_sessions = 0 energy_arcs = {"rising": 0, "falling": 0, "flat": 0, "unknown": 0} start_hour_dist = [0] * 24 - - session_list = [] # Metadata for timeline + + session_list = [] # Metadata for timeline for sess in sessions: start_t = sess[0].played_at end_t = sess[-1].played_at - + # Start time distribution start_hour_dist[start_t.hour] += 1 @@ -319,41 +404,51 @@ class StatsService: duration = (end_t - start_t).total_seconds() / 60 lengths_min.append(duration) else: - duration = 3.0 # Approx single song + duration = 3.0 # Approx single song lengths_min.append(duration) # Types sess_type = "Standard" - if len(sess) <= 3: + if len(sess) <= 3: micro_sessions += 1 sess_type = "Micro" - elif len(sess) >= 20: + elif len(sess) >= 20: marathon_sessions += 1 sess_type = "Marathon" - + # Store Session Metadata - session_list.append({ - "start_time": start_t.isoformat(), - "end_time": end_t.isoformat(), - "duration_minutes": round(duration, 1), - "track_count": len(sess), - "type": sess_type - }) + session_list.append( + { + "start_time": start_t.isoformat(), + "end_time": end_t.isoformat(), + "duration_minutes": round(duration, 1), + "track_count": len(sess), + "type": sess_type, + } + ) # Energy Arc first_t = sess[0].track last_t = sess[-1].track - if first_t and last_t and first_t.energy is not None and last_t.energy is not None: + if ( + first_t + and last_t + and first_t.energy is not None + and last_t.energy is not None + ): diff = last_t.energy - first_t.energy - if diff > 0.1: energy_arcs["rising"] += 1 - elif diff < -0.1: energy_arcs["falling"] += 1 - else: energy_arcs["flat"] += 1 + if diff > 0.1: + energy_arcs["rising"] += 1 + elif diff < -0.1: + energy_arcs["falling"] += 1 + else: + energy_arcs["flat"] += 1 else: energy_arcs["unknown"] += 1 avg_min = np.mean(lengths_min) if lengths_min else 0 median_min = np.median(lengths_min) if lengths_min else 0 - + # Sessions per day active_days = len(set(p.played_at.date() for p in plays)) sessions_per_day = len(sessions) / active_days if active_days else 0 @@ -369,17 +464,24 @@ class StatsService: "micro_session_rate": round(micro_sessions / len(sessions), 2), "marathon_session_rate": round(marathon_sessions / len(sessions), 2), "energy_arcs": energy_arcs, - "session_list": session_list + "session_list": session_list, } - def compute_vibe_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]: + def compute_vibe_stats( + self, period_start: datetime, period_end: datetime + ) -> Dict[str, Any]: """ Aggregates Audio Features + Calculates Whiplash + Clustering + Harmonic Profile. """ - plays = self.db.query(PlayHistory).filter( - PlayHistory.played_at >= period_start, - PlayHistory.played_at < period_end - ).order_by(PlayHistory.played_at.asc()).all() + plays = ( + self.db.query(PlayHistory) + .filter( + PlayHistory.played_at >= period_start, + PlayHistory.played_at < period_end, + ) + .order_by(PlayHistory.played_at.asc()) + .all() + ) if not plays: return {} @@ -389,10 +491,19 @@ class StatsService: track_map = {t.id: t for t in tracks} # 1. Aggregates - feature_keys = ["energy", "valence", "danceability", "tempo", "acousticness", - "instrumentalness", "liveness", "speechiness", "loudness"] + feature_keys = [ + "energy", + "valence", + "danceability", + "tempo", + "acousticness", + "instrumentalness", + "liveness", + "speechiness", + "loudness", + ] features = {k: [] for k in feature_keys} - + # For Clustering: List of [energy, valence, danceability, acousticness] cluster_data = [] @@ -408,38 +519,53 @@ class StatsService: for i, p in enumerate(plays): t = track_map.get(p.track_id) - if not t: continue + if not t: + continue # Robust Null Check: Append separately for key in feature_keys: val = getattr(t, key, None) if val is not None: features[key].append(val) - + # Cluster Data (only if all 4 exist) - if all(getattr(t, k) is not None for k in ["energy", "valence", "danceability", "acousticness"]): - cluster_data.append([t.energy, t.valence, t.danceability, t.acousticness]) + if all( + getattr(t, k) is not None + for k in ["energy", "valence", "danceability", "acousticness"] + ): + cluster_data.append( + [t.energy, t.valence, t.danceability, t.acousticness] + ) # Harmonic - if t.key is not None: keys.append(t.key) - if t.mode is not None: modes.append(t.mode) - + if t.key is not None: + keys.append(t.key) + if t.mode is not None: + modes.append(t.mode) + # Tempo Zones if t.tempo is not None: - if t.tempo < 100: tempo_zones["chill"] += 1 - elif t.tempo < 130: tempo_zones["groove"] += 1 - else: tempo_zones["hype"] += 1 + if t.tempo < 100: + tempo_zones["chill"] += 1 + elif t.tempo < 130: + tempo_zones["groove"] += 1 + else: + tempo_zones["hype"] += 1 # Calculate Transitions (Whiplash) if i > 0 and previous_track: time_diff = (p.played_at - plays[i - 1].played_at).total_seconds() - if time_diff < 300: # 5 min gap max + if time_diff < 300: # 5 min gap max if t.tempo is not None and previous_track.tempo is not None: transitions["tempo"].append(abs(t.tempo - previous_track.tempo)) if t.energy is not None and previous_track.energy is not None: - transitions["energy"].append(abs(t.energy - previous_track.energy)) + transitions["energy"].append( + abs(t.energy - previous_track.energy) + ) if t.valence is not None and previous_track.valence is not None: - transitions["valence"].append(abs(t.valence - previous_track.valence)) + transitions["valence"].append( + abs(t.valence - previous_track.valence) + ) previous_track = t @@ -448,33 +574,42 @@ class StatsService: for key, values in features.items(): valid = [v for v in values if v is not None] if valid: - stats[f"avg_{key}"] = float(np.mean(valid)) + avg_val = float(np.mean(valid)) + stats[key] = round(avg_val, 3) + stats[f"avg_{key}"] = avg_val stats[f"std_{key}"] = float(np.std(valid)) stats[f"p10_{key}"] = float(np.percentile(valid, 10)) - stats[f"p50_{key}"] = float(np.percentile(valid, 50)) # Median + stats[f"p50_{key}"] = float(np.percentile(valid, 50)) stats[f"p90_{key}"] = float(np.percentile(valid, 90)) else: + stats[key] = 0.0 stats[f"avg_{key}"] = None # Derived Metrics if stats.get("avg_energy") is not None and stats.get("avg_valence") is not None: stats["mood_quadrant"] = { "x": round(stats["avg_valence"], 2), - "y": round(stats["avg_energy"], 2) + "y": round(stats["avg_energy"], 2), } avg_std = (stats.get("std_energy", 0) + stats.get("std_valence", 0)) / 2 stats["consistency_score"] = round(1.0 - avg_std, 2) - - if stats.get("avg_tempo") is not None and stats.get("avg_danceability") is not None: + + if ( + stats.get("avg_tempo") is not None + and stats.get("avg_danceability") is not None + ): stats["rhythm_profile"] = { "avg_tempo": round(stats["avg_tempo"], 1), - "avg_danceability": round(stats["avg_danceability"], 2) + "avg_danceability": round(stats["avg_danceability"], 2), } - - if stats.get("avg_acousticness") is not None and stats.get("avg_instrumentalness") is not None: + + if ( + stats.get("avg_acousticness") is not None + and stats.get("avg_instrumentalness") is not None + ): stats["texture_profile"] = { "acousticness": round(stats["avg_acousticness"], 2), - "instrumentalness": round(stats["avg_instrumentalness"], 2) + "instrumentalness": round(stats["avg_instrumentalness"], 2), } # Whiplash @@ -484,11 +619,13 @@ class StatsService: stats["whiplash"][k] = round(float(np.mean(transitions[k])), 2) else: stats["whiplash"][k] = 0 - + # Tempo Zones total_tempo = sum(tempo_zones.values()) if total_tempo > 0: - stats["tempo_zones"] = {k: round(v / total_tempo, 2) for k, v in tempo_zones.items()} + stats["tempo_zones"] = { + k: round(v / total_tempo, 2) for k, v in tempo_zones.items() + } else: stats["tempo_zones"] = {} @@ -497,57 +634,84 @@ class StatsService: major_count = len([m for m in modes if m == 1]) stats["harmonic_profile"] = { "major_pct": round(major_count / len(modes), 2), - "minor_pct": round((len(modes) - major_count) / len(modes), 2) + "minor_pct": round((len(modes) - major_count) / len(modes), 2), } - + if keys: # Map integers to pitch class notation - pitch_class = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"] + pitch_class = [ + "C", + "C#", + "D", + "D#", + "E", + "F", + "F#", + "G", + "G#", + "A", + "A#", + "B", + ] key_counts = {} for k in keys: if 0 <= k < 12: label = pitch_class[k] key_counts[label] = key_counts.get(label, 0) + 1 - stats["top_keys"] = [{"key": k, "count": v} for k, v in sorted(key_counts.items(), key=lambda x: x[1], reverse=True)[:3]] + stats["top_keys"] = [ + {"key": k, "count": v} + for k, v in sorted( + key_counts.items(), key=lambda x: x[1], reverse=True + )[:3] + ] # CLUSTERING (K-Means) - if len(cluster_data) >= 5: # Need enough data points + if len(cluster_data) >= 5: # Need enough data points try: # Features: energy, valence, danceability, acousticness kmeans = KMeans(n_clusters=3, random_state=42, n_init=10) labels = kmeans.fit_predict(cluster_data) - + # Analyze clusters clusters = [] for i in range(3): - mask = (labels == i) + mask = labels == i count = np.sum(mask) - if count == 0: continue - + if count == 0: + continue + centroid = kmeans.cluster_centers_[i] share = count / len(cluster_data) - + # Heuristic Naming c_energy, c_valence, c_dance, c_acoustic = centroid name = "Mixed Vibe" - if c_energy > 0.7: name = "High Energy" - elif c_acoustic > 0.7: name = "Acoustic / Chill" - elif c_valence < 0.3: name = "Melancholy" - elif c_dance > 0.7: name = "Dance / Groove" - - clusters.append({ - "name": name, - "share": round(share, 2), - "features": { - "energy": round(c_energy, 2), - "valence": round(c_valence, 2), - "danceability": round(c_dance, 2), - "acousticness": round(c_acoustic, 2) + if c_energy > 0.7: + name = "High Energy" + elif c_acoustic > 0.7: + name = "Acoustic / Chill" + elif c_valence < 0.3: + name = "Melancholy" + elif c_dance > 0.7: + name = "Dance / Groove" + + clusters.append( + { + "name": name, + "share": round(share, 2), + "features": { + "energy": round(c_energy, 2), + "valence": round(c_valence, 2), + "danceability": round(c_dance, 2), + "acousticness": round(c_acoustic, 2), + }, } - }) - + ) + # Sort by share - stats["clusters"] = sorted(clusters, key=lambda x: x["share"], reverse=True) + stats["clusters"] = sorted( + clusters, key=lambda x: x["share"], reverse=True + ) except Exception as e: print(f"Clustering failed: {e}") stats["clusters"] = [] @@ -556,13 +720,19 @@ class StatsService: return stats - def compute_era_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]: + def compute_era_stats( + self, period_start: datetime, period_end: datetime + ) -> Dict[str, Any]: """ Includes Nostalgia Gap and granular decade breakdown. """ - query = self.db.query(PlayHistory).options(joinedload(PlayHistory.track)).filter( - PlayHistory.played_at >= period_start, - PlayHistory.played_at < period_end + query = ( + self.db.query(PlayHistory) + .options(joinedload(PlayHistory.track)) + .filter( + PlayHistory.played_at >= period_start, + PlayHistory.played_at < period_end, + ) ) plays = query.all() @@ -597,19 +767,27 @@ class StatsService: return { "musical_age": int(avg_year), "nostalgia_gap": int(current_year - avg_year), - "freshness_score": dist.get(f"{int(current_year / 10) * 10}s", 0), # Share of current decade - "decade_distribution": dist + "freshness_score": dist.get( + f"{int(current_year / 10) * 10}s", 0 + ), # Share of current decade + "decade_distribution": dist, } - def compute_skip_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]: + def compute_skip_stats( + self, period_start: datetime, period_end: datetime + ) -> Dict[str, Any]: """ Implements boredom skip detection: (next_track.played_at - current_track.played_at) < (current_track.duration_ms / 1000 - 10s) """ - query = self.db.query(PlayHistory).filter( - PlayHistory.played_at >= period_start, - PlayHistory.played_at <= period_end - ).order_by(PlayHistory.played_at.asc()) + query = ( + self.db.query(PlayHistory) + .filter( + PlayHistory.played_at >= period_start, + PlayHistory.played_at <= period_end, + ) + .order_by(PlayHistory.played_at.asc()) + ) plays = query.all() if len(plays) < 2: @@ -622,13 +800,15 @@ class StatsService: for i in range(len(plays) - 1): current_play = plays[i] - next_play = plays[i+1] + next_play = plays[i + 1] track = track_map.get(current_play.track_id) if not track or not track.duration_ms: continue - diff_seconds = (next_play.played_at - current_play.played_at).total_seconds() + diff_seconds = ( + next_play.played_at - current_play.played_at + ).total_seconds() # Logic: If diff < (duration - 10s), it's a skip. # Convert duration to seconds @@ -641,25 +821,29 @@ class StatsService: if diff_seconds < (duration_sec - 10): skips += 1 - return { - "total_skips": skips, - "skip_rate": round(skips / len(plays), 3) - } + return {"total_skips": skips, "skip_rate": round(skips / len(plays), 3)} - def compute_context_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]: + def compute_context_stats( + self, period_start: datetime, period_end: datetime + ) -> Dict[str, Any]: """ Analyzes context_uri to determine if user listens to Playlists, Albums, or Artists. """ query = self.db.query(PlayHistory).filter( - PlayHistory.played_at >= period_start, - PlayHistory.played_at <= period_end + PlayHistory.played_at >= period_start, PlayHistory.played_at <= period_end ) plays = query.all() if not plays: return {} - context_counts = {"playlist": 0, "album": 0, "artist": 0, "collection": 0, "unknown": 0} + context_counts = { + "playlist": 0, + "album": 0, + "artist": 0, + "collection": 0, + "unknown": 0, + } unique_contexts = {} for p in plays: @@ -686,26 +870,32 @@ class StatsService: breakdown = {k: round(v / total, 2) for k, v in context_counts.items()} # Top 5 Contexts (Requires resolving URI to name, possibly missing metadata here) - sorted_contexts = sorted(unique_contexts.items(), key=lambda x: x[1], reverse=True)[:5] + sorted_contexts = sorted( + unique_contexts.items(), key=lambda x: x[1], reverse=True + )[:5] return { "type_breakdown": breakdown, "album_purist_score": breakdown.get("album", 0), "playlist_dependency": breakdown.get("playlist", 0), - "context_loyalty": round(len(plays) / len(unique_contexts), 2) if unique_contexts else 0, - "top_context_uris": [{"uri": k, "count": v} for k, v in sorted_contexts] + "context_loyalty": round(len(plays) / len(unique_contexts), 2) + if unique_contexts + else 0, + "top_context_uris": [{"uri": k, "count": v} for k, v in sorted_contexts], } - def compute_taste_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]: + def compute_taste_stats( + self, period_start: datetime, period_end: datetime + ) -> Dict[str, Any]: """ Mainstream vs. Hipster analysis based on Track.popularity (0-100). """ query = self.db.query(PlayHistory).filter( - PlayHistory.played_at >= period_start, - PlayHistory.played_at <= period_end + PlayHistory.played_at >= period_start, PlayHistory.played_at <= period_end ) plays = query.all() - if not plays: return {} + if not plays: + return {} track_ids = list(set([p.track_id for p in plays])) tracks = self.db.query(Track).filter(Track.id.in_(track_ids)).all() @@ -730,20 +920,27 @@ class StatsService: "avg_popularity": round(avg_pop, 1), "hipster_score": round((underground_plays / len(pop_values)) * 100, 1), "mainstream_score": round((mainstream_plays / len(pop_values)) * 100, 1), - "obscurity_rating": round(100 - avg_pop, 1) + "obscurity_rating": round(100 - avg_pop, 1), } - def compute_lifecycle_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]: + def compute_lifecycle_stats( + self, period_start: datetime, period_end: datetime + ) -> Dict[str, Any]: """ Determines if tracks are 'New Discoveries' or 'Old Favorites'. """ # 1. Get tracks played in this period - current_plays = self.db.query(PlayHistory).filter( - PlayHistory.played_at >= period_start, - PlayHistory.played_at <= period_end - ).all() + current_plays = ( + self.db.query(PlayHistory) + .filter( + PlayHistory.played_at >= period_start, + PlayHistory.played_at <= period_end, + ) + .all() + ) - if not current_plays: return {} + if not current_plays: + return {} current_track_ids = set([p.track_id for p in current_plays]) @@ -751,7 +948,7 @@ class StatsService: # We find which of the current_track_ids exist in history < period_start old_tracks_query = self.db.query(distinct(PlayHistory.track_id)).filter( PlayHistory.track_id.in_(current_track_ids), - PlayHistory.played_at < period_start + PlayHistory.played_at < period_start, ) old_track_ids = set([r[0] for r in old_tracks_query.all()]) @@ -765,21 +962,32 @@ class StatsService: return { "discovery_count": discovery_count, - "discovery_rate": round(plays_on_new / total_plays, 3) if total_plays > 0 else 0, - "recurrence_rate": round((total_plays - plays_on_new) / total_plays, 3) if total_plays > 0 else 0 + "discovery_rate": round(plays_on_new / total_plays, 3) + if total_plays > 0 + else 0, + "recurrence_rate": round((total_plays - plays_on_new) / total_plays, 3) + if total_plays > 0 + else 0, } - def compute_explicit_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]: + def compute_explicit_stats( + self, period_start: datetime, period_end: datetime + ) -> Dict[str, Any]: """ Analyzes explicit content consumption. """ - query = self.db.query(PlayHistory).options(joinedload(PlayHistory.track)).filter( - PlayHistory.played_at >= period_start, - PlayHistory.played_at <= period_end + query = ( + self.db.query(PlayHistory) + .options(joinedload(PlayHistory.track)) + .filter( + PlayHistory.played_at >= period_start, + PlayHistory.played_at <= period_end, + ) ) plays = query.all() - if not plays: return {"explicit_rate": 0, "hourly_explicit_rate": []} + if not plays: + return {"explicit_rate": 0, "hourly_explicit_rate": []} total_plays = len(plays) explicit_count = 0 @@ -811,13 +1019,18 @@ class StatsService: return { "explicit_rate": round(explicit_count / total_plays, 3), "total_explicit_plays": explicit_count, - "hourly_explicit_distribution": hourly_rates + "hourly_explicit_distribution": hourly_rates, } - def generate_full_report(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]: + def generate_full_report( + self, period_start: datetime, period_end: datetime + ) -> Dict[str, Any]: # 1. Calculate all current stats current_stats = { - "period": {"start": period_start.isoformat(), "end": period_end.isoformat()}, + "period": { + "start": period_start.isoformat(), + "end": period_end.isoformat(), + }, "volume": self.compute_volume_stats(period_start, period_end), "time_habits": self.compute_time_stats(period_start, period_end), "sessions": self.compute_session_stats(period_start, period_end), @@ -827,24 +1040,34 @@ class StatsService: "taste": self.compute_taste_stats(period_start, period_end), "lifecycle": self.compute_lifecycle_stats(period_start, period_end), "flags": self.compute_explicit_stats(period_start, period_end), - "skips": self.compute_skip_stats(period_start, period_end) + "skips": self.compute_skip_stats(period_start, period_end), } # 2. Calculate Comparison - current_stats["comparison"] = self.compute_comparison(current_stats, period_start, period_end) + current_stats["comparison"] = self.compute_comparison( + current_stats, period_start, period_end + ) return current_stats def _empty_volume_stats(self): return { - "total_plays": 0, "estimated_minutes": 0, "unique_tracks": 0, - "unique_artists": 0, "unique_albums": 0, "unique_genres": 0, - "top_tracks": [], "top_artists": [], "top_albums": [], "top_genres": [], - "repeat_rate": 0, "one_and_done_rate": 0, - "concentration": {} + "total_plays": 0, + "estimated_minutes": 0, + "unique_tracks": 0, + "unique_artists": 0, + "unique_albums": 0, + "unique_genres": 0, + "top_tracks": [], + "top_artists": [], + "top_albums": [], + "top_genres": [], + "repeat_rate": 0, + "one_and_done_rate": 0, + "concentration": {}, } - + def _pct_change(self, curr, prev): if prev == 0: return 100.0 if curr > 0 else 0.0 - return round(((curr - prev) / prev) * 100, 1) \ No newline at end of file + return round(((curr - prev) / prev) * 100, 1) diff --git a/backend/requirements.txt b/backend/requirements.txt index 3b52a98..b71daa5 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -13,3 +13,4 @@ alembic==1.13.1 scikit-learn==1.4.0 lyricsgenius==3.0.1 google-genai==1.56.0 +openai>=1.0.0 diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py new file mode 100644 index 0000000..d045788 --- /dev/null +++ b/backend/tests/conftest.py @@ -0,0 +1,5 @@ +import pytest +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent)) diff --git a/backend/tests/test_ingest.py b/backend/tests/test_ingest.py new file mode 100644 index 0000000..c890187 --- /dev/null +++ b/backend/tests/test_ingest.py @@ -0,0 +1,113 @@ +import pytest +from unittest.mock import MagicMock, AsyncMock, patch +from datetime import datetime, timedelta + +from app.ingest import PlaybackTracker, finalize_track + + +class TestPlaybackTracker: + def test_initial_state(self): + tracker = PlaybackTracker() + assert tracker.current_track_id is None + assert tracker.track_start_time is None + assert tracker.accumulated_listen_ms == 0 + assert tracker.last_progress_ms == 0 + assert tracker.is_paused is False + + +class TestFinalizeTrack: + def test_finalize_creates_play_history_when_not_exists(self): + mock_db = MagicMock() + mock_db.query.return_value.filter.return_value.first.return_value = None + + tracker = PlaybackTracker() + tracker.current_track_id = "track123" + tracker.track_start_time = datetime(2024, 1, 1, 10, 0, 0) + tracker.accumulated_listen_ms = 60000 + + finalize_track(mock_db, tracker) + + mock_db.add.assert_called_once() + mock_db.commit.assert_called_once() + + assert tracker.current_track_id is None + assert tracker.accumulated_listen_ms == 0 + + def test_finalize_marks_skip_when_under_30s(self): + mock_db = MagicMock() + mock_db.query.return_value.filter.return_value.first.return_value = None + + tracker = PlaybackTracker() + tracker.current_track_id = "track123" + tracker.track_start_time = datetime(2024, 1, 1, 10, 0, 0) + tracker.accumulated_listen_ms = 15000 + + finalize_track(mock_db, tracker) + + call_args = mock_db.add.call_args[0][0] + assert call_args.skipped is True + + def test_finalize_updates_existing_play(self): + mock_existing = MagicMock() + mock_existing.listened_ms = None + + mock_db = MagicMock() + mock_db.query.return_value.filter.return_value.first.return_value = ( + mock_existing + ) + + tracker = PlaybackTracker() + tracker.current_track_id = "track123" + tracker.track_start_time = datetime(2024, 1, 1, 10, 0, 0) + tracker.accumulated_listen_ms = 120000 + + finalize_track(mock_db, tracker) + + assert mock_existing.listened_ms == 120000 + assert mock_existing.skipped is False + mock_db.commit.assert_called_once() + + +class TestReccoBeatsClient: + @pytest.mark.asyncio + async def test_extracts_spotify_id_from_href(self): + from app.services.reccobeats_client import ReccoBeatsClient + + with patch("httpx.AsyncClient") as mock_client: + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "content": [ + { + "id": "uuid-here", + "href": "https://open.spotify.com/track/abc123xyz", + "energy": 0.8, + "valence": 0.6, + } + ] + } + + mock_client.return_value.__aenter__.return_value.get = AsyncMock( + return_value=mock_response + ) + + client = ReccoBeatsClient() + result = await client.get_audio_features(["abc123xyz"]) + + assert len(result) == 1 + assert result[0]["spotify_id"] == "abc123xyz" + assert result[0]["energy"] == 0.8 + + @pytest.mark.asyncio + async def test_returns_empty_on_error(self): + from app.services.reccobeats_client import ReccoBeatsClient + + with patch("httpx.AsyncClient") as mock_client: + mock_client.return_value.__aenter__.return_value.get = AsyncMock( + side_effect=Exception("Network error") + ) + + client = ReccoBeatsClient() + result = await client.get_audio_features(["test123"]) + + assert result == [] diff --git a/backend/tests/test_main.py b/backend/tests/test_main.py new file mode 100644 index 0000000..d93a604 --- /dev/null +++ b/backend/tests/test_main.py @@ -0,0 +1,49 @@ +import pytest +from unittest.mock import MagicMock, patch, AsyncMock +from datetime import datetime + + +@pytest.fixture +def mock_db(): + return MagicMock() + + +class TestSnapshotsEndpoint: + def test_snapshots_endpoint_exists(self, mock_db): + with patch("app.main.SessionLocal", return_value=mock_db): + from fastapi.testclient import TestClient + from app.main import app + + mock_db.query.return_value.order_by.return_value.limit.return_value.all.return_value = [] + + with TestClient(app) as client: + response = client.get("/snapshots?limit=1") + assert response.status_code == 200 + + +class TestListeningLogEndpoint: + def test_listening_log_endpoint_exists(self, mock_db): + with patch("app.main.SessionLocal", return_value=mock_db): + from fastapi.testclient import TestClient + from app.main import app + + mock_db.query.return_value.options.return_value.filter.return_value.order_by.return_value.limit.return_value.all.return_value = [] + + with TestClient(app) as client: + response = client.get("/listening-log?days=7&limit=100") + assert response.status_code == 200 + + +class TestSessionsEndpoint: + def test_sessions_endpoint_exists(self, mock_db): + with patch("app.main.SessionLocal", return_value=mock_db): + from fastapi.testclient import TestClient + from app.main import app + + mock_db.query.return_value.options.return_value.filter.return_value.order_by.return_value.all.return_value = [] + + with TestClient(app) as client: + response = client.get("/sessions?days=7") + assert response.status_code == 200 + data = response.json() + assert "session_list" in data diff --git a/backend/tests/test_stats_full.py b/backend/tests/test_stats_full.py deleted file mode 100644 index db00779..0000000 --- a/backend/tests/test_stats_full.py +++ /dev/null @@ -1,155 +0,0 @@ -import os -import json -# import pytest <-- Removed -from datetime import datetime, timedelta -from sqlalchemy import create_engine -from sqlalchemy.orm import sessionmaker -from backend.app.models import Base, PlayHistory, Track, Artist -from backend.app.services.stats_service import StatsService - -# Setup Test Database -# @pytest.fixture <-- Removed -def db_session(): - engine = create_engine("sqlite:///:memory:") - Base.metadata.create_all(engine) - Session = sessionmaker(bind=engine) - session = Session() - yield session - session.close() - -def seed_data(db): - """ - Seeds the database with specific patterns to verify metrics. - Pattern: - - High Energy/Happy Session (Morning) - - Low Energy/Sad Session (Night) - - Skips - - Repeats - """ - - # 1. Create Artists - a1 = Artist(id="a1", name="The Hype Men", genres=["pop", "dance"]) - a2 = Artist(id="a2", name="Sad Bois", genres=["indie", "folk"]) - a3 = Artist(id="a3", name="Mozart", genres=["classical"]) - db.add_all([a1, a2, a3]) - - # 2. Create Tracks - # High Energy, High Valence, Fast - t1 = Track( - id="t1", name="Party Anthem", album="Hype Vol 1", duration_ms=180000, - popularity=80, energy=0.9, valence=0.9, danceability=0.8, tempo=140.0, acousticness=0.1, instrumentalness=0.0, - key=0, mode=1 # C Major - ) - t1.artists.append(a1) - - # Low Energy, Low Valence, Slow - t2 = Track( - id="t2", name="Rainy Day", album="Sad Vol 1", duration_ms=240000, - popularity=20, energy=0.2, valence=0.1, danceability=0.3, tempo=80.0, acousticness=0.9, instrumentalness=0.0, - key=9, mode=0 # A Minor - ) - t2.artists.append(a2) - - # Classical (Instrumental) - t3 = Track( - id="t3", name="Symphony 40", album="Classics", duration_ms=300000, - popularity=50, energy=0.4, valence=0.5, danceability=0.1, tempo=110.0, acousticness=0.8, instrumentalness=0.9, - key=5, mode=0 - ) - t3.artists.append(a3) - - db.add_all([t1, t2, t3]) - db.commit() - - # 3. Create History - base_time = datetime(2023, 11, 1, 8, 0, 0) # Morning - - plays = [] - - # SESSION 1: Morning Hype (3 plays of t1) - # 08:00 - plays.append(PlayHistory(track_id="t1", played_at=base_time, context_uri="spotify:playlist:morning")) - # 08:04 (4 min gap) - plays.append(PlayHistory(track_id="t1", played_at=base_time + timedelta(minutes=4), context_uri="spotify:playlist:morning")) - # 08:08 - plays.append(PlayHistory(track_id="t1", played_at=base_time + timedelta(minutes=8), context_uri="spotify:playlist:morning")) - - # GAP > 20 mins -> New Session - - # SESSION 2: Night Sadness (t2, t2, t3) - # 22:00 - night_time = datetime(2023, 11, 1, 22, 0, 0) - plays.append(PlayHistory(track_id="t2", played_at=night_time, context_uri="spotify:album:sad")) - - # SKIP SIMULATION: t2 played at 22:00, next play at 22:00:20 (20s later). - # Duration is 240s. 20s < 230s. This is a skip. - # But wait, logic says "boredom skip". - # If I play t2 at 22:00. - # And play t3 at 22:00:40. - # Diff = 40s. 40 < (240 - 10). Yes, Skip. - plays.append(PlayHistory(track_id="t3", played_at=night_time + timedelta(seconds=40), context_uri="spotify:album:sad")) - - # Finish t3 (5 mins) - plays.append(PlayHistory(track_id="t3", played_at=night_time + timedelta(seconds=40) + timedelta(minutes=5, seconds=10), context_uri="spotify:album:sad")) - - db.add_all(plays) - db.commit() - -def test_stats_generation(db_session): - seed_data(db_session) - stats_service = StatsService(db_session) - - start = datetime(2023, 11, 1, 0, 0, 0) - end = datetime(2023, 11, 2, 0, 0, 0) - - report = stats_service.generate_full_report(start, end) - - print("\n--- GENERATED REPORT ---") - print(json.dumps(report, indent=2, default=str)) - print("------------------------\n") - - # Assertions - - # 1. Volume - assert report["volume"]["total_plays"] == 6 - assert report["volume"]["unique_tracks"] == 3 - # Top track should be t1 (3 plays) - assert report["volume"]["top_tracks"][0]["name"] == "Party Anthem" - - # 2. Time - # 3 plays in morning (8am), 3 plays at night (22pm) - assert report["time_habits"]["part_of_day"]["morning"] == 3 - assert report["time_habits"]["part_of_day"]["night"] == 0 # 22:00 is "evening" in buckets (18-23) - assert report["time_habits"]["part_of_day"]["evening"] == 3 - - # 3. Sessions - # Should be 2 sessions (gap between 08:08 and 22:00) - assert report["sessions"]["count"] == 2 - - # 4. Skips - # 1 skip detected (t2 -> t3 gap was 40s vs 240s duration) - assert report["skips"]["total_skips"] == 1 - - # 5. Vibe & Clustering - # Should have cluster info - assert "clusters" in report["vibe"] - # Check harmonic - assert report["vibe"]["harmonic_profile"]["major_pct"] > 0 - # Check tempo zones (t1=140=Hype, t2=80=Chill, t3=110=Groove) - # 3x t1 (Hype), 1x t2 (Chill), 2x t3 (Groove) - # Total 6. Hype=0.5, Chill=0.17, Groove=0.33 - zones = report["vibe"]["tempo_zones"] - assert zones["hype"] == 0.5 - - # 6. Context - # Morning = Playlist (3), Night = Album (3) -> 50/50 - assert report["context"]["type_breakdown"]["playlist"] == 0.5 - assert report["context"]["type_breakdown"]["album"] == 0.5 - -if __name__ == "__main__": - # Manually run if executed as script - engine = create_engine("sqlite:///:memory:") - Base.metadata.create_all(engine) - Session = sessionmaker(bind=engine) - session = Session() - test_stats_generation(session) diff --git a/docker-compose.template.yml b/docker-compose.template.yml new file mode 100644 index 0000000..941c535 --- /dev/null +++ b/docker-compose.template.yml @@ -0,0 +1,64 @@ +# MusicAnalyser Docker Compose Template +# Copy this file to docker-compose.yml and fill in your values +# Or use environment variables / .env file + +version: '3.8' + +services: + backend: + build: + context: ./backend + image: ghcr.io/bnair123/musicanalyser:latest + container_name: music-analyser-backend + restart: unless-stopped + volumes: + - music_data:/app/data + environment: + - DATABASE_URL=sqlite:////app/data/music.db + # Required: Spotify API credentials + - SPOTIFY_CLIENT_ID=your_spotify_client_id_here + - SPOTIFY_CLIENT_SECRET=your_spotify_client_secret_here + - SPOTIFY_REFRESH_TOKEN=your_spotify_refresh_token_here + # Required: AI API key (choose one) + - OPENAI_API_KEY=your_openai_api_key_here + # OR + - GEMINI_API_KEY=your_gemini_api_key_here + # Optional: Genius for lyrics + - GENIUS_ACCESS_TOKEN=your_genius_token_here + ports: + - '8000:8000' + networks: + - dockernet + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/snapshots?limit=1"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 60s + + frontend: + build: + context: ./frontend + image: ghcr.io/bnair123/musicanalyser-frontend:latest + container_name: music-analyser-frontend + restart: unless-stopped + ports: + - '8991:80' + networks: + - dockernet + depends_on: + backend: + condition: service_healthy + +volumes: + music_data: + driver: local + +networks: + dockernet: + external: true + # If you don't have an external dockernet, create it with: + # docker network create dockernet + # Or change to: + # dockernet: + # driver: bridge diff --git a/docker-compose.yml b/docker-compose.yml index 223771e..ceb43a1 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -16,14 +16,18 @@ services: - SPOTIFY_REFRESH_TOKEN=${SPOTIFY_REFRESH_TOKEN} - GEMINI_API_KEY=${GEMINI_API_KEY} - GENIUS_ACCESS_TOKEN=${GENIUS_ACCESS_TOKEN} + - OPENAI_API_KEY=${OPENAI_API_KEY} + - OPENAI_APIKEY=${OPENAI_APIKEY} ports: - '8000:8000' + networks: + - dockernet healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:8000/"] + test: ["CMD", "curl", "-f", "http://localhost:8000/snapshots?limit=1"] interval: 30s timeout: 10s retries: 3 - start_period: 40s + start_period: 60s frontend: build: @@ -33,6 +37,8 @@ services: restart: unless-stopped ports: - '8991:80' + networks: + - dockernet depends_on: backend: condition: service_healthy @@ -40,3 +46,7 @@ services: volumes: music_data: driver: local + +networks: + dockernet: + external: true diff --git a/docs/API.md b/docs/API.md new file mode 100644 index 0000000..ff2ff0f --- /dev/null +++ b/docs/API.md @@ -0,0 +1,125 @@ +# API Documentation + +The MusicAnalyser Backend is built with FastAPI. It provides endpoints for data ingestion, listening history retrieval, and AI-powered analysis. + +## Base URL +Default local development: `http://localhost:8000` +Docker environment: Proxied via Nginx at `http://localhost:8991/api` + +--- + +## Endpoints + +### 1. Root / Health Check +- **URL**: `/` +- **Method**: `GET` +- **Response**: + ```json + { + "status": "ok", + "message": "Music Analyser API is running" + } + ``` + +### 2. Get Recent History +Returns a flat list of recently played tracks. +- **URL**: `/history` +- **Method**: `GET` +- **Query Parameters**: + - `limit` (int, default=50): Number of items to return. +- **Response**: List of PlayHistory objects with nested Track data. + +### 3. Get Tracks +Returns a list of unique tracks in the database. +- **URL**: `/tracks` +- **Method**: `GET` +- **Query Parameters**: + - `limit` (int, default=50): Number of tracks to return. + +### 4. Trigger Spotify Ingestion +Manually triggers a background task to poll Spotify for recently played tracks. +- **URL**: `/trigger-ingest` +- **Method**: `POST` +- **Response**: + ```json + { + "status": "Ingestion started in background" + } + ``` + +### 5. Trigger Analysis Pipeline +Runs the full stats calculation and AI narrative generation for a specific timeframe. +- **URL**: `/trigger-analysis` +- **Method**: `POST` +- **Query Parameters**: + - `days` (int, default=30): Number of past days to analyze. + - `model_name` (str): LLM model to use. +- **Response**: + ```json + { + "status": "success", + "snapshot_id": 1, + "period": { "start": "...", "end": "..." }, + "metrics": { ... }, + "narrative": { ... } + } + ``` + +### 6. Get Analysis Snapshots +Retrieves previously saved analysis reports. +- **URL**: `/snapshots` +- **Method**: `GET` +- **Query Parameters**: + - `limit` (int, default=10): Number of snapshots to return. + +### 7. Detailed Listening Log +Returns a refined listening log with skip detection and listening duration calculations. +- **URL**: `/listening-log` +- **Method**: `GET` +- **Query Parameters**: + - `days` (int, 1-365, default=7): Timeframe. + - `limit` (int, 1-1000, default=200): Max plays to return. +- **Response**: + ```json + { + "plays": [ + { + "id": 123, + "track_name": "Song Name", + "artist": "Artist Name", + "played_at": "ISO-TIMESTAMP", + "listened_ms": 180000, + "skipped": false, + "image": "..." + } + ], + "period": { "start": "...", "end": "..." } + } + ``` + +### 8. Session Statistics +Groups plays into listening sessions (Marathon, Standard, Micro). +- **URL**: `/sessions` +- **Method**: `GET` +- **Query Parameters**: + - `days` (int, 1-365, default=7): Timeframe. +- **Response**: + ```json + { + "sessions": [ + { + "start_time": "...", + "end_time": "...", + "duration_minutes": 45, + "track_count": 12, + "type": "Standard" + } + ], + "summary": { + "count": 10, + "avg_minutes": 35, + "micro_rate": 0.1, + "marathon_rate": 0.05 + } + } + ``` diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md new file mode 100644 index 0000000..9e9691c --- /dev/null +++ b/docs/ARCHITECTURE.md @@ -0,0 +1,43 @@ +# Architecture Overview + +MusicAnalyser is a full-stack personal analytics platform designed to collect, store, and analyze music listening habits using the Spotify API and Google Gemini AI. + +## System Components + +### 1. Backend (FastAPI) +- **API Layer**: Handles requests from the frontend, manages the database, and triggers analysis. +- **Database**: SQLite used for local storage of listening history, track metadata, and AI snapshots. +- **ORM**: SQLAlchemy manages the data models and relationships. +- **Services**: + - `SpotifyClient`: Handles OAuth2 flow and API requests. + - `StatsService`: Computes complex metrics (heatmaps, sessions, top tracks, hipster scores). + - `NarrativeService`: Interfaces with Google Gemini to generate text-based insights. + - `IngestService`: Manages the logic of fetching and deduplicating Spotify "recently played" data. + +### 2. Background Worker +- A standalone Python script (`run_worker.py`) that polls the Spotify API every 60 seconds. +- Ensures a continuous record of listening history even when the dashboard is not open. + +### 3. Frontend (React) +- **Framework**: Vite + React. +- **Styling**: Tailwind CSS for a modern, dark-themed dashboard. +- **Visualizations**: Recharts for radar and heatmaps; Framer Motion for animations. +- **State**: Managed via standard React hooks (`useState`, `useEffect`) and local storage for caching. + +### 4. External Integrations +- **Spotify API**: Primary data source for tracks, artists, and listening history. +- **ReccoBeats API**: Used for fetching audio features (BPM, Energy, Mood) for tracks. +- **Genius API**: Used for fetching song lyrics to provide deep content analysis. +- **Google Gemini**: Large Language Model used to "roast" the user's taste and generate personas. + +## Data Flow + +1. **Ingestion**: `Background Worker` → `Spotify API` → `Database (PlayHistory)`. +2. **Enrichment**: `Ingest Logic` → `ReccoBeats/Genius/Spotify` → `Database (Track/Artist)`. +3. **Analysis**: `Frontend` → `Backend API` → `StatsService` → `NarrativeService (Gemini)` → `Database (Snapshot)`. +4. **Visualization**: `Frontend` ← `Backend API` ← `Database (Snapshot/Log)`. + +## Deployment +- **Containerization**: Both Backend and Frontend are containerized using Docker. +- **Docker Compose**: Orchestrates the backend (including worker) and frontend (Nginx proxy) services. +- **CI/CD**: GitHub Actions builds multi-arch images (amd64/arm64) and pushes to GHCR. diff --git a/docs/DATA_MODEL.md b/docs/DATA_MODEL.md new file mode 100644 index 0000000..7a0d4d3 --- /dev/null +++ b/docs/DATA_MODEL.md @@ -0,0 +1,89 @@ +# Data Model Documentation + +This document describes the database schema for the MusicAnalyser project. The project uses SQLite with SQLAlchemy as the ORM. + +## Entity Relationship Diagram Overview + +- **Artist** (Many-to-Many) **Track** +- **Track** (One-to-Many) **PlayHistory** +- **AnalysisSnapshot** (Independent) + +--- + +## Tables + +### `artists` +Stores unique artists retrieved from Spotify. + +| Field | Type | Description | +|-------|------|-------------| +| `id` | String | Spotify ID (Primary Key) | +| `name` | String | Artist name | +| `genres` | JSON | List of genre strings | +| `image_url` | String | URL to artist profile image | + +### `tracks` +Stores unique tracks retrieved from Spotify, enriched with audio features and lyrics. + +| Field | Type | Description | +|-------|------|-------------| +| `id` | String | Spotify ID (Primary Key) | +| `name` | String | Track name | +| `artist` | String | Display string for artists (e.g., "Artist A, Artist B") | +| `album` | String | Album name | +| `image_url` | String | URL to album art | +| `duration_ms` | Integer | Track duration in milliseconds | +| `popularity` | Integer | Spotify popularity score (0-100) | +| `raw_data` | JSON | Full raw response from Spotify API for future-proofing | +| `danceability` | Float | Audio feature: Danceability (0.0 to 1.0) | +| `energy` | Float | Audio feature: Energy (0.0 to 1.0) | +| `key` | Integer | Audio feature: Key | +| `loudness` | Float | Audio feature: Loudness in dB | +| `mode` | Integer | Audio feature: Mode (0 for Minor, 1 for Major) | +| `speechiness` | Float | Audio feature: Speechiness (0.0 to 1.0) | +| `acousticness` | Float | Audio feature: Acousticness (0.0 to 1.0) | +| `instrumentalness` | Float | Audio feature: Instrumentalness (0.0 to 1.0) | +| `liveness` | Float | Audio feature: Liveness (0.0 to 1.0) | +| `valence` | Float | Audio feature: Valence (0.0 to 1.0) | +| `tempo` | Float | Audio feature: Tempo in BPM | +| `time_signature` | Integer | Audio feature: Time signature | +| `lyrics` | Text | Full lyrics retrieved from Genius | +| `lyrics_summary` | String | AI-generated summary of lyrics | +| `genre_tags` | String | Combined genre tags for the track | +| `created_at` | DateTime | Timestamp of record creation | +| `updated_at` | DateTime | Timestamp of last update | + +### `play_history` +Stores individual listening instances. + +| Field | Type | Description | +|-------|------|-------------| +| `id` | Integer | Primary Key (Auto-increment) | +| `track_id` | String | Foreign Key to `tracks.id` | +| `played_at` | DateTime | Timestamp when the track was played | +| `context_uri` | String | Spotify context URI (e.g., playlist or album URI) | +| `listened_ms` | Integer | Computed duration the track was actually heard | +| `skipped` | Boolean | Whether the track was likely skipped | +| `source` | String | Ingestion source (e.g., "spotify_recently_played") | + +### `analysis_snapshots` +Stores periodic analysis results generated by the AI service. + +| Field | Type | Description | +|-------|------|-------------| +| `id` | Integer | Primary Key | +| `date` | DateTime | When the analysis was performed | +| `period_start` | DateTime | Start of the analyzed period | +| `period_end` | DateTime | End of the analyzed period | +| `period_label` | String | Label for the period (e.g., "last_30_days") | +| `metrics_payload` | JSON | Computed statistics used as input for the AI | +| `narrative_report` | JSON | AI-generated narrative and persona | +| `model_used` | String | LLM model identifier (e.g., "gemini-1.5-flash") | + +### `track_artists` (Association Table) +Facilitates the many-to-many relationship between tracks and artists. + +| Field | Type | Description | +|-------|------|-------------| +| `track_id` | String | Foreign Key to `tracks.id` | +| `artist_id` | String | Foreign Key to `artists.id` | diff --git a/docs/FRONTEND.md b/docs/FRONTEND.md new file mode 100644 index 0000000..631ceea --- /dev/null +++ b/docs/FRONTEND.md @@ -0,0 +1,61 @@ +# Frontend Documentation + +The frontend is a React application built with Vite and Tailwind CSS. It uses Ant Design for some UI components and Recharts for data visualization. + +## Main Components + +### `Dashboard.jsx` +The primary layout component that manages data fetching and state. +- **Features**: + - Handles API calls to `/snapshots` and `/trigger-analysis`. + - Implements local storage caching to reduce API load. + - Displays a global loading state during analysis. + - Contains the main header with a refresh trigger. + +### `NarrativeSection.jsx` +Displays the AI-generated qualitative analysis. +- **Props**: + - `narrative`: Object containing `persona`, `vibe_check_short`, and `roast`. + - `vibe`: Object containing audio features used to generate dynamic tags. +- **Purpose**: Gives the user a "identity" based on their music taste (e.g., "THE MELANCHOLIC ARCHITECT"). + +### `StatsGrid.jsx` +A grid of high-level metric cards. +- **Props**: + - `metrics`: The `metrics_payload` from a snapshot. +- **Displays**: + - **Minutes Listened**: Total listening time converted to days. + - **Obsession**: The #1 most played track with album art background. + - **Unique Artists**: Count of different artists encountered. + - **Hipster Score**: A percentage indicating how obscure the user's taste is. + +### `VibeRadar.jsx` +Visualizes the "Sonic DNA" of the user. +- **Props**: + - `vibe`: Audio feature averages (acousticness, danceability, energy, etc.). +- **Visuals**: + - **Radar Chart**: Shows the balance of audio features. + - **Mood Clusters**: Floating bubbles representing "Party", "Focus", and "Chill" percentages. + - **Whiplash Meter**: Shows volatility in tempo, energy, and valence between consecutive tracks. + +### `TopRotation.jsx` +A horizontal scrolling list of the most played tracks. +- **Props**: + - `volume`: Object containing `top_tracks` array. +- **Purpose**: Quick view of recent favorites. + +### `HeatMap.jsx` +Visualizes when the user listens to music. +- **Props**: + - `timeHabits`: Compressed heatmap data (7x6 grid for days/time blocks). + - `sessions`: List of recent listening sessions. +- **Visuals**: + - **Grid**: Days of the week vs. Time blocks (12am, 4am, etc.). + - **Session Timeline**: Vertical list of recent listening bouts with session type (Marathon vs. Micro). + +### `ListeningLog.jsx` +A detailed view of individual plays. +- **Features**: + - **Timeline View**: Visualizes listening sessions across the day for the last 7 days. + - **List View**: A table of individual plays with skip status detection. + - **Timeframe Filter**: Toggle between 24h, 7d, 14d, and 30d views. diff --git a/frontend/src/components/Dashboard.jsx b/frontend/src/components/Dashboard.jsx index b44d79c..38e6d10 100644 --- a/frontend/src/components/Dashboard.jsx +++ b/frontend/src/components/Dashboard.jsx @@ -5,7 +5,8 @@ import StatsGrid from './StatsGrid'; import VibeRadar from './VibeRadar'; import HeatMap from './HeatMap'; import TopRotation from './TopRotation'; -import { Spin } from 'antd'; // Keeping Spin for loading state +import ListeningLog from './ListeningLog'; +import { Spin } from 'antd'; const API_BASE_URL = '/api'; @@ -13,7 +14,7 @@ const Dashboard = () => { const [data, setData] = useState(null); const [loading, setLoading] = useState(true); - const getTodayKey = () => `sonicstats_v1_${new Date().toISOString().split('T')[0]}`; + const getTodayKey = () => `sonicstats_v2_${new Date().toISOString().split('T')[0]}`; const fetchData = async (forceRefresh = false) => { setLoading(true); @@ -73,9 +74,11 @@ const Dashboard = () => { ); } + const vibeCheckFull = data?.narrative?.vibe_check || ""; + const patterns = data?.narrative?.patterns || []; + return ( <> - {/* Navbar */}
@@ -98,27 +101,52 @@ const Dashboard = () => {
- {/* Hero */} - {/* Stats Bento Grid */} - {/* Sonic DNA & Chronobiology Split */}
- {/* Left Col: Sonic DNA (2/3 width) */}
- {/* Right Col: Chronobiology (1/3 width) */}
- +
- {/* Footer: The Roast */} + + + {(vibeCheckFull || patterns.length > 0) && ( +
+

+ psychology + Full Analysis +

+ + {vibeCheckFull && ( +
+

{vibeCheckFull}

+
+ )} + + {patterns.length > 0 && ( +
+

Patterns Detected

+
    + {patterns.map((pattern, idx) => ( +
  • + insights + {pattern} +
  • + ))} +
+
+ )} +
+ )} + {data?.narrative?.roast && (