From 508d001d7e16dafa4405612e8552420dcca719d1 Mon Sep 17 00:00:00 2001
From: bnair123 <b.nair404@outlook.com>
Date: Thu, 25 Dec 2025 17:48:41 +0400
Subject: [PATCH] Fixed and added all the stats_service.py methods

---
 backend/app/main.py                       |  80 ++-
 backend/app/services/narrative_service.py |  50 +-
 backend/app/services/stats_service.py     | 679 +++++++++++++++-------
 backend/run_scheduler.py                  |  16 +
 4 files changed, 580 insertions(+), 245 deletions(-)
 create mode 100644 backend/run_scheduler.py

diff --git a/backend/app/main.py b/backend/app/main.py
index c0ba4dd..d7c96b9 100644
--- a/backend/app/main.py
+++ b/backend/app/main.py
@@ -1,12 +1,15 @@
-from fastapi import FastAPI, Depends
+from fastapi import FastAPI, Depends, HTTPException, BackgroundTasks
 from sqlalchemy.orm import Session
+from datetime import datetime, timedelta
+from typing import List, Optional
+from dotenv import load_dotenv
+
 from .database import engine, Base, get_db
-from .models import PlayHistory as PlayHistoryModel, Track as TrackModel
+from .models import PlayHistory as PlayHistoryModel, Track as TrackModel, AnalysisSnapshot
 from . import schemas
 from .ingest import ingest_recently_played
-import asyncio
-from typing import List
-from dotenv import load_dotenv
+from .services.stats_service import StatsService
+from .services.narrative_service import NarrativeService
 
 load_dotenv()
 
@@ -24,13 +27,68 @@ def get_history(limit: int = 50, db: Session = Depends(get_db)):
     history = db.query(PlayHistoryModel).order_by(PlayHistoryModel.played_at.desc()).limit(limit).all()
     return history
 
-@app.post("/trigger-ingest")
-async def trigger_ingest(db: Session = Depends(get_db)):
-    """Manually trigger the ingestion process (useful for testing)"""
-    await ingest_recently_played(db)
-    return {"status": "Ingestion triggered"}
-
 @app.get("/tracks", response_model=List[schemas.Track])
 def get_tracks(limit: int = 50, db: Session = Depends(get_db)):
     tracks = db.query(TrackModel).limit(limit).all()
     return tracks
+
+@app.post("/trigger-ingest")
+async def trigger_ingest(background_tasks: BackgroundTasks, db: Session = Depends(get_db)):
+    """Triggers Spotify ingestion in the background."""
+    background_tasks.add_task(ingest_recently_played, db)
+    return {"status": "Ingestion started in background"}
+
+@app.post("/trigger-analysis")
+def trigger_analysis(
+    days: int = 30,
+    model_name: str = "gemini-2.5-flash",
+    db: Session = Depends(get_db)
+):
+    """
+    Runs the full analysis pipeline (Stats + LLM) for the last X days.
+    Returns the computed metrics and narrative immediately.
+    """
+    try:
+        end_date = datetime.utcnow()
+        start_date = end_date - timedelta(days=days)
+
+        # 1. Compute Stats
+        stats_service = StatsService(db)
+        stats_json = stats_service.generate_full_report(start_date, end_date)
+
+        if stats_json["volume"]["total_plays"] == 0:
+             raise HTTPException(status_code=404, detail="No plays found in the specified period.")
+
+        # 2. Generate Narrative
+        narrative_service = NarrativeService(model_name=model_name)
+        narrative_json = narrative_service.generate_narrative(stats_json)
+
+        # 3. Save Snapshot
+        snapshot = AnalysisSnapshot(
+            period_start=start_date,
+            period_end=end_date,
+            period_label=f"last_{days}_days",
+            metrics_payload=stats_json,
+            narrative_report=narrative_json,
+            model_used=model_name
+        )
+        db.add(snapshot)
+        db.commit()
+        db.refresh(snapshot)
+
+        return {
+            "status": "success",
+            "snapshot_id": snapshot.id,
+            "period": {"start": start_date, "end": end_date},
+            "metrics": stats_json,
+            "narrative": narrative_json
+        }
+
+    except Exception as e:
+        print(f"Analysis Failed: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+@app.get("/snapshots")
+def get_snapshots(limit: int = 10, db: Session = Depends(get_db)):
+    """Retrieve past analysis snapshots."""
+    return db.query(AnalysisSnapshot).order_by(AnalysisSnapshot.date.desc()).limit(limit).all()
\ No newline at end of file
diff --git a/backend/app/services/narrative_service.py b/backend/app/services/narrative_service.py
index f1359ff..72c8f0f 100644
--- a/backend/app/services/narrative_service.py
+++ b/backend/app/services/narrative_service.py
@@ -18,43 +18,35 @@ class NarrativeService:
             return {"error": "Missing API Key"}
 
         prompt = f"""
-You are analyzing a user's Spotify listening data. Below is a JSON summary of metrics I've computed. Your job is to:
+You are a witty, insightful, and slightly snarky music critic analyzing a user's listening history. 
+Below is a JSON summary of their listening data. 
 
-1. Write a narrative "Vibe Check" (2-3 paragraphs) describing their overall listening personality this period.
-2. Identify 3-5 notable patterns or anomalies.
-3. Provide a "Musical Persona" label (e.g., "Late-Night Binge Listener", "Genre Chameleon", "Album Purist").
-4. Write a brief, playful "roast" (1-2 sentences) based on the data.
+Your goal is to generate a report that feels like a 'Spotify Wrapped' but deeper and more honest.
 
-Guidelines:
-- Do NOT recalculate any numbers.
-- Use specific metrics to support observations (e.g., "Your whiplash score of 18.3 BPM suggests...").
-- Keep tone conversational but insightful.
-- Avoid mental health claims; stick to behavioral descriptors.
-- Highlight both positive patterns and quirks.
+Please output your response in strict JSON format with the following keys:
+1. "vibe_check": (String) 2-3 paragraphs describing their overall listening personality.
+2. "patterns": (List of Strings) 3-5 specific observations based on the data (e.g., "You listen to sad music on Tuesdays", "Your Whiplash Score is high").
+3. "persona": (String) A creative label for the user (e.g., "The Genre Chameleon", "Nostalgic Dad-Rocker", "Algorithm Victim").
+4. "roast": (String) A playful, harmlessly mean roast about their taste (1-2 sentences).
+5. "era_insight": (String) A specific comment on their 'Musical Age' and 'Nostalgia Gap'.
 
-Data:
+GUIDELINES:
+- **Use the Metrics:** Do not just say "You like pop." Say "Your Mainstream Score of 85% suggests you live on the Top 40."
+- **Whiplash Score:** If 'whiplash' > 20, comment on their chaotic transitions.
+- **Hipster Score:** If 'hipster_score' > 50, call them pretentious; if < 10, call them basic.
+- **Comparison:** Use the 'comparison' block to mention if they are listening more/less or if their mood (valence/energy) has shifted.
+- **Tone:** Conversational, fun, slightly judgmental but good-natured.
+
+DATA:
 {json.dumps(stats_json, indent=2)}
 
-Output Format (return valid JSON):
-{{
-  "vibe_check": "...",
-  "patterns": ["...", "..."],
-  "persona": "...",
-  "roast": "..."
-}}
+OUTPUT (JSON):
 """
         try:
-            # Handle full model path if passed or default short name
-            # The library often accepts 'gemini-2.5-flash' but list_models returns 'models/gemini-2.5-flash'
-            model_id = self.model_name
-            if not model_id.startswith("models/") and "/" not in model_id:
-                # Try simple name, if it fails user might need to pass 'models/...'
-                pass
-
-            model = genai.GenerativeModel(model_id)
+            model = genai.GenerativeModel(self.model_name)
             response = model.generate_content(prompt)
 
-            # Clean up response to ensure valid JSON (sometimes LLMs add markdown blocks)
+            # Clean up response to ensure valid JSON
             text = response.text.strip()
             if text.startswith("```json"):
                 text = text.replace("```json", "").replace("```", "")
@@ -64,4 +56,4 @@ Output Format (return valid JSON):
             return json.loads(text)
 
         except Exception as e:
-            return {"error": str(e), "raw_response": response.text if 'response' in locals() else "No response"}
+            return {"error": str(e), "raw_response": "Error generating narrative."}
\ No newline at end of file
diff --git a/backend/app/services/stats_service.py b/backend/app/services/stats_service.py
index 4d04ee7..9038fd6 100644
--- a/backend/app/services/stats_service.py
+++ b/backend/app/services/stats_service.py
@@ -1,5 +1,5 @@
 from sqlalchemy.orm import Session
-from sqlalchemy import func, distinct, desc
+from sqlalchemy import func, distinct, desc, joinedload
 from datetime import datetime, timedelta
 from typing import Dict, Any, List
 import math
@@ -11,11 +11,68 @@ class StatsService:
     def __init__(self, db: Session):
         self.db = db
 
+    from sqlalchemy.orm import joinedload  # Add this to imports
+
+    def compute_comparison(self, current_stats: Dict[str, Any], period_start: datetime, period_end: datetime) -> Dict[
+        str, Any]:
+        """
+        Calculates deltas vs the previous period of the same length.
+        """
+        duration = period_end - period_start
+        prev_end = period_start
+        prev_start = prev_end - duration
+
+        # We only need key metrics for comparison, not the full heavy report
+        # Let's re-use existing methods but strictly for the previous window
+
+        # 1. Volume Comparison
+        prev_volume = self.compute_volume_stats(prev_start, prev_end)
+
+        # 2. Vibe Comparison (Just energy/valence/popularity)
+        prev_vibe = self.compute_vibe_stats(prev_start, prev_end)
+        prev_taste = self.compute_taste_stats(prev_start, prev_end)
+
+        # Calculate Deltas
+        deltas = {}
+
+        # Plays
+        curr_plays = current_stats["volume"]["total_plays"]
+        prev_plays_count = prev_volume["total_plays"]
+        deltas["plays_delta"] = curr_plays - prev_plays_count
+        deltas["plays_pct_change"] = round(((curr_plays - prev_plays_count) / prev_plays_count) * 100,
+                                           1) if prev_plays_count else 0
+
+        # Energy & Valence
+        if "mood_quadrant" in current_stats["vibe"] and "mood_quadrant" in prev_vibe:
+            curr_e = current_stats["vibe"]["mood_quadrant"]["y"]
+            prev_e = prev_vibe["mood_quadrant"]["y"]
+            deltas["energy_delta"] = round(curr_e - prev_e, 2)
+
+            curr_v = current_stats["vibe"]["mood_quadrant"]["x"]
+            prev_v = prev_vibe["mood_quadrant"]["x"]
+            deltas["valence_delta"] = round(curr_v - prev_v, 2)
+
+        # Popularity
+        if "avg_popularity" in current_stats["taste"] and "avg_popularity" in prev_taste:
+            deltas["popularity_delta"] = round(current_stats["taste"]["avg_popularity"] - prev_taste["avg_popularity"],
+                                               1)
+
+        return {
+            "previous_period": {
+                "start": prev_start.isoformat(),
+                "end": prev_end.isoformat()
+            },
+            "deltas": deltas
+        }
+
     def compute_volume_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
         """
-        Calculates volume metrics: Total Plays, Unique Tracks, Artists, etc.
+        Calculates volume metrics including Concentration (HHI, Gini) and One-and-Done rates.
         """
-        query = self.db.query(PlayHistory).filter(
+        # Eager load tracks AND artists to fix the "Artist String Problem" and performance
+        query = self.db.query(PlayHistory).options(
+            joinedload(PlayHistory.track).joinedload(Track.artists)
+        ).filter(
             PlayHistory.played_at >= period_start,
             PlayHistory.played_at <= period_end
         )
@@ -24,167 +81,94 @@ class StatsService:
 
         if total_plays == 0:
             return {
-                "total_plays": 0,
-                "estimated_minutes": 0,
-                "unique_tracks": 0,
-                "unique_artists": 0,
-                "unique_albums": 0,
-                "unique_genres": 0,
-                "top_tracks": [],
-                "top_artists": [],
-                "repeat_rate": 0,
-                "concentration": {}
+                "total_plays": 0, "estimated_minutes": 0, "unique_tracks": 0,
+                "unique_artists": 0, "unique_albums": 0, "unique_genres": 0,
+                "top_tracks": [], "top_artists": [], "top_genres": [],
+                "repeat_rate": 0, "concentration": {}
             }
 
-        # Calculate Duration (Estimated)
-        # Note: We query tracks to get duration.
-        # Ideally we join, but eager loading might be heavy. Let's do a join or simple loop.
-        # Efficient approach: Get all track IDs from plays, fetch Track objects in bulk map.
-
-        track_ids = [p.track_id for p in plays]
-        tracks = self.db.query(Track).filter(Track.id.in_(set(track_ids))).all()
-        track_map = {t.id: t for t in tracks}
-
         total_ms = 0
-        unique_track_ids = set()
-        unique_artist_ids = set()
-        unique_album_names = set() # Spotify doesn't give album ID in PlayHistory directly unless joined, track has album name string.
-        # Ideally track has raw_data['album']['id'].
-        unique_album_ids = set()
-
+        track_counts = {}
+        artist_counts = {}
         genre_counts = {}
-
-        # For Top Lists
-        track_play_counts = {}
-        artist_play_counts = {}
+        album_ids = set()
 
         for p in plays:
-            t = track_map.get(p.track_id)
-            if t:
-                total_ms += t.duration_ms
-                unique_track_ids.add(t.id)
+            t = p.track
+            if not t: continue
 
-                # Top Tracks
-                track_play_counts[t.id] = track_play_counts.get(t.id, 0) + 1
+            total_ms += t.duration_ms if t.duration_ms else 0
 
-                # Artists (using relation)
-                # Note: This might cause N+1 query if not eager loaded.
-                # For strictly calculation, accessing t.artists (lazy load) loop might be slow for 1000s of plays.
-                # Optimization: Join PlayHistory -> Track -> Artist in query.
+            # Track Counts
+            track_counts[t.id] = track_counts.get(t.id, 0) + 1
 
-                # Let's rely on raw_data for speed if relation loading is slow,
-                # OR Assume we accept some latency.
-                # Better: Pre-fetch artist connections or use the new tables properly.
-                # Let's use the object relation for correctness as per plan.
-                for artist in t.artists:
-                    unique_artist_ids.add(artist.id)
-                    artist_play_counts[artist.id] = artist_play_counts.get(artist.id, 0) + 1
+            # Album Counts (using raw_data ID if available, else name)
+            if t.raw_data and "album" in t.raw_data and "id" in t.raw_data["album"]:
+                album_ids.add(t.raw_data["album"]["id"])
+            else:
+                album_ids.add(t.album)
 
-                    if artist.genres:
-                        for g in artist.genres:
-                            genre_counts[g] = genre_counts.get(g, 0) + 1
+            # Artist Counts (Iterate objects, not string)
+            for artist in t.artists:
+                artist_counts[artist.id] = artist_counts.get(artist.id, 0) + 1
+                if artist.genres:
+                    for g in artist.genres:
+                        genre_counts[g] = genre_counts.get(g, 0) + 1
 
-                if t.raw_data and "album" in t.raw_data:
-                    unique_album_ids.add(t.raw_data["album"]["id"])
-                else:
-                    unique_album_ids.add(t.album) # Fallback
+        # Derived Metrics
+        unique_tracks = len(track_counts)
+        one_and_done = len([c for c in track_counts.values() if c == 1])
 
-        estimated_minutes = total_ms / 60000
+        # Top Lists
+        top_tracks = [
+            {"name": self.db.query(Track).get(tid).name, "artist": self.db.query(Track).get(tid).artist, "count": c}
+            for tid, c in sorted(track_counts.items(), key=lambda x: x[1], reverse=True)[:5]
+        ]
 
-        # Top 5 Tracks
-        sorted_tracks = sorted(track_play_counts.items(), key=lambda x: x[1], reverse=True)[:5]
-        top_tracks = []
-        for tid, count in sorted_tracks:
-            t = track_map.get(tid)
-            top_tracks.append({
-                "name": t.name,
-                "artist": t.artist, # Display string
-                "count": count
-            })
-
-        # Top 5 Artists
-        # Need to fetch Artist names
-        top_artist_ids = sorted(artist_play_counts.items(), key=lambda x: x[1], reverse=True)[:5]
+        top_artist_ids = sorted(artist_counts.items(), key=lambda x: x[1], reverse=True)[:5]
+        # Fetch artist names efficiently
         top_artists_objs = self.db.query(Artist).filter(Artist.id.in_([x[0] for x in top_artist_ids])).all()
-        artist_name_map = {a.id: a.name for a in top_artists_objs}
+        artist_map = {a.id: a.name for a in top_artists_objs}
+        top_artists = [{"name": artist_map.get(aid, "Unknown"), "count": c} for aid, c in top_artist_ids]
 
-        top_artists = []
-        for aid, count in top_artist_ids:
-            top_artists.append({
-                "name": artist_name_map.get(aid, "Unknown"),
-                "count": count
-            })
+        top_genres = [{"name": k, "count": v} for k, v in
+                      sorted(genre_counts.items(), key=lambda x: x[1], reverse=True)[:5]]
 
-        # Top Genres
-        sorted_genres = sorted(genre_counts.items(), key=lambda x: x[1], reverse=True)[:5]
-        top_genres = [{"name": g, "count": c} for g, c in sorted_genres]
+        # Concentration (HHI & Gini)
+        # HHI: Sum of (share)^2
+        shares = [c / total_plays for c in track_counts.values()]
+        hhi = sum([s ** 2 for s in shares])
 
-        # Concentration
-        unique_tracks_count = len(unique_track_ids)
-        repeat_rate = (total_plays - unique_tracks_count) / total_plays if total_plays > 0 else 0
-
-        # HHI (Herfindahl–Hirschman Index)
-        # Sum of (share)^2. Share = track_plays / total_plays
-        hhi = sum([(c/total_plays)**2 for c in track_play_counts.values()])
+        # Gini Coefficient (Inequality of play distribution)
+        sorted_shares = sorted(shares)
+        n = len(shares)
+        if n > 0:
+            gini = (2 * sum((i + 1) * x for i, x in enumerate(sorted_shares))) / (n * sum(sorted_shares)) - (n + 1) / n
+        else:
+            gini = 0
 
         return {
             "total_plays": total_plays,
-            "estimated_minutes": int(estimated_minutes),
-            "unique_tracks": unique_tracks_count,
-            "unique_artists": len(unique_artist_ids),
-            "unique_albums": len(unique_album_ids),
+            "estimated_minutes": int(total_ms / 60000),
+            "unique_tracks": unique_tracks,
+            "unique_artists": len(artist_counts),
+            "unique_albums": len(album_ids),
             "unique_genres": len(genre_counts),
             "top_tracks": top_tracks,
             "top_artists": top_artists,
             "top_genres": top_genres,
-            "repeat_rate": round(repeat_rate, 3),
+            "repeat_rate": round((total_plays - unique_tracks) / total_plays, 3) if total_plays else 0,
+            "one_and_done_rate": round(one_and_done / unique_tracks, 3) if unique_tracks else 0,
             "concentration": {
                 "hhi": round(hhi, 4),
-                # "gini": ... (skip for now to keep it simple)
+                "gini": round(gini, 4),
+                "top_1_share": round(max(shares), 3) if shares else 0
             }
         }
 
     def compute_time_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
         """
-        Hourly, Daily distribution, etc.
-        """
-        query = self.db.query(PlayHistory).filter(
-            PlayHistory.played_at >= period_start,
-            PlayHistory.played_at <= period_end
-        )
-        plays = query.all()
-
-        hourly_counts = [0] * 24
-        weekday_counts = [0] * 7 # 0=Mon, 6=Sun
-
-        if not plays:
-             return {"hourly_distribution": hourly_counts}
-
-        for p in plays:
-            # played_at is UTC in DB usually. Ensure we handle timezone if user wants local.
-            # For now, assuming UTC or system time.
-            h = p.played_at.hour
-            d = p.played_at.weekday()
-
-            hourly_counts[h] += 1
-            weekday_counts[d] += 1
-
-        peak_hour = hourly_counts.index(max(hourly_counts))
-
-        # Weekend Share
-        weekend_plays = weekday_counts[5] + weekday_counts[6]
-        weekend_share = weekend_plays / len(plays) if len(plays) > 0 else 0
-
-        return {
-            "hourly_distribution": hourly_counts,
-            "peak_hour": peak_hour,
-            "weekday_distribution": weekday_counts,
-            "weekend_share": round(weekend_share, 2)
-        }
-
-    def compute_session_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
-        """
-        Session logic: Gap > 20 mins = new session.
+        Includes Part-of-Day buckets and Listening Streaks.
         """
         query = self.db.query(PlayHistory).filter(
             PlayHistory.played_at >= period_start,
@@ -193,85 +177,184 @@ class StatsService:
         plays = query.all()
 
         if not plays:
-            return {"count": 0, "avg_length_minutes": 0}
+            return {}
+
+        hourly_counts = [0] * 24
+        weekday_counts = [0] * 7
+        part_of_day = {"morning": 0, "afternoon": 0, "evening": 0, "night": 0}
+
+        # For Streaks
+        active_dates = set()
+
+        for p in plays:
+            h = p.played_at.hour
+            hourly_counts[h] += 1
+            weekday_counts[p.played_at.weekday()] += 1
+            active_dates.add(p.played_at.date())
+
+            if 5 <= h < 12:
+                part_of_day["morning"] += 1
+            elif 12 <= h < 17:
+                part_of_day["afternoon"] += 1
+            elif 17 <= h < 22:
+                part_of_day["evening"] += 1
+            else:
+                part_of_day["night"] += 1
+
+        # Calculate Streak
+        sorted_dates = sorted(list(active_dates))
+        current_streak = 0
+        longest_streak = 0
+        if sorted_dates:
+            current_streak = 1
+            longest_streak = 1
+            # Check strictly consecutive days
+            for i in range(1, len(sorted_dates)):
+                delta = (sorted_dates[i] - sorted_dates[i - 1]).days
+                if delta == 1:
+                    current_streak += 1
+                else:
+                    longest_streak = max(longest_streak, current_streak)
+                    current_streak = 1
+            longest_streak = max(longest_streak, current_streak)
+
+        weekend_plays = weekday_counts[5] + weekday_counts[6]
+
+        return {
+            "hourly_distribution": hourly_counts,
+            "peak_hour": hourly_counts.index(max(hourly_counts)),
+            "weekday_distribution": weekday_counts,
+            "weekend_share": round(weekend_plays / len(plays), 2),
+            "part_of_day": part_of_day,
+            "listening_streak": current_streak,
+            "longest_streak": longest_streak,
+            "active_days": len(active_dates)
+        }
+
+    def compute_session_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
+        """
+        Includes Micro-sessions, Marathon sessions, and Energy Arcs.
+        """
+        # Need to join Track to get Energy features for Arc analysis
+        query = self.db.query(PlayHistory).options(joinedload(PlayHistory.track)).filter(
+            PlayHistory.played_at >= period_start,
+            PlayHistory.played_at <= period_end
+        ).order_by(PlayHistory.played_at.asc())
+        plays = query.all()
+
+        if not plays:
+            return {"count": 0}
 
         sessions = []
         current_session = [plays[0]]
 
+        # 1. Sessionization (Gap > 20 mins)
         for i in range(1, len(plays)):
-            prev = plays[i-1]
-            curr = plays[i]
-            diff = (curr.played_at - prev.played_at).total_seconds() / 60
-
+            diff = (plays[i].played_at - plays[i-1].played_at).total_seconds() / 60
             if diff > 20:
                 sessions.append(current_session)
                 current_session = []
-
-            current_session.append(curr)
-
+            current_session.append(plays[i])
         sessions.append(current_session)
 
-        session_lengths_min = []
-        for sess in sessions:
-            if len(sess) > 1:
-                start = sess[0].played_at
-                end = sess[-1].played_at
-                # Add duration of last track?
-                # Let's just do (end - start) for simplicity + avg track duration
-                duration = (end - start).total_seconds() / 60
-                session_lengths_min.append(duration)
-            else:
-                session_lengths_min.append(3.0) # Approx 1 track
+        # 2. Analyze Sessions
+        lengths_min = []
+        micro_sessions = 0
+        marathon_sessions = 0
+        energy_arcs = {"rising": 0, "falling": 0, "flat": 0, "unknown": 0}
 
-        avg_min = sum(session_lengths_min) / len(session_lengths_min) if session_lengths_min else 0
+        for sess in sessions:
+            # Durations
+            if len(sess) > 1:
+                duration = (sess[-1].played_at - sess[0].played_at).total_seconds() / 60
+                lengths_min.append(duration)
+            else:
+                lengths_min.append(3.0) # Approx
+
+            # Types
+            if len(sess) <= 3: micro_sessions += 1
+            if len(sess) >= 20: marathon_sessions += 1
+
+            # Energy Arc (First vs Last track)
+            first_t = sess[0].track
+            last_t = sess[-1].track
+            if first_t and last_t and first_t.energy is not None and last_t.energy is not None:
+                diff = last_t.energy - first_t.energy
+                if diff > 0.1: energy_arcs["rising"] += 1
+                elif diff < -0.1: energy_arcs["falling"] += 1
+                else: energy_arcs["flat"] += 1
+            else:
+                energy_arcs["unknown"] += 1
+
+        avg_min = sum(lengths_min) / len(lengths_min) if lengths_min else 0
 
         return {
             "count": len(sessions),
-            "avg_tracks": len(plays) / len(sessions),
+            "avg_tracks": round(len(plays) / len(sessions), 1),
             "avg_minutes": round(avg_min, 1),
-            "longest_session_minutes": round(max(session_lengths_min), 1) if session_lengths_min else 0
+            "longest_session_minutes": round(max(lengths_min), 1) if lengths_min else 0,
+            "micro_session_rate": round(micro_sessions / len(sessions), 2),
+            "marathon_session_rate": round(marathon_sessions / len(sessions), 2),
+            "energy_arcs": energy_arcs
         }
 
     def compute_vibe_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
         """
-        Aggregates Audio Features (Energy, Valence, etc.)
+        Aggregates Audio Features + Calculates Whiplash (Transitions)
         """
-        query = self.db.query(PlayHistory).filter(
+        # Fetch plays strictly ordered by time for transition analysis
+        plays = self.db.query(PlayHistory).filter(
             PlayHistory.played_at >= period_start,
             PlayHistory.played_at <= period_end
-        )
-        plays = query.all()
-        track_ids = list(set([p.track_id for p in plays]))
+        ).order_by(PlayHistory.played_at.asc()).all()
 
-        if not track_ids:
+        if not plays:
             return {}
 
+        track_ids = list(set([p.track_id for p in plays]))
         tracks = self.db.query(Track).filter(Track.id.in_(track_ids)).all()
+        track_map = {t.id: t for t in tracks}
 
-        # Collect features
-        features = {
-            "energy": [], "valence": [], "danceability": [],
-            "tempo": [], "acousticness": [], "instrumentalness": [],
-            "liveness": [], "speechiness": []
-        }
+        # 1. Aggregates
+        features = {k: [] for k in
+                    ["energy", "valence", "danceability", "tempo", "acousticness", "instrumentalness", "liveness",
+                     "speechiness", "loudness"]}
 
-        for t in tracks:
-            # Weight by plays? The spec implies "Per-Period Aggregates".
-            # Usually weighted by play count is better representation of what was HEARD.
-            # Let's weight by play count in this period.
-            play_count = len([p for p in plays if p.track_id == t.id])
+        # 2. Transition Arrays (for Whiplash)
+        transitions = {"tempo": [], "energy": [], "valence": []}
 
+        previous_track = None
+
+        for i, p in enumerate(plays):
+            t = track_map.get(p.track_id)
+            if not t: continue
+
+            # Populate aggregations
             if t.energy is not None:
-                for _ in range(play_count):
-                    features["energy"].append(t.energy)
-                    features["valence"].append(t.valence)
-                    features["danceability"].append(t.danceability)
-                    features["tempo"].append(t.tempo)
-                    features["acousticness"].append(t.acousticness)
-                    features["instrumentalness"].append(t.instrumentalness)
-                    features["liveness"].append(t.liveness)
-                    features["speechiness"].append(t.speechiness)
+                features["energy"].append(t.energy)
+                features["valence"].append(t.valence)
+                features["danceability"].append(t.danceability)
+                features["tempo"].append(t.tempo)
+                features["acousticness"].append(t.acousticness)
+                features["instrumentalness"].append(t.instrumentalness)
+                features["liveness"].append(t.liveness)
+                features["speechiness"].append(t.speechiness)
+                features["loudness"].append(t.loudness)
 
+            # Calculate Transitions (Whiplash)
+            if i > 0 and previous_track:
+                # Only count transition if within reasonable time (e.g. < 5 mins gap)
+                # assuming continuous listening
+                time_diff = (p.played_at - plays[i - 1].played_at).total_seconds()
+                if time_diff < 300:
+                    if t.tempo and previous_track.tempo:
+                        transitions["tempo"].append(abs(t.tempo - previous_track.tempo))
+                    if t.energy and previous_track.energy:
+                        transitions["energy"].append(abs(t.energy - previous_track.energy))
+
+            previous_track = t
+
+        # Calculate Stats
         stats = {}
         for key, values in features.items():
             valid = [v for v in values if v is not None]
@@ -282,46 +365,55 @@ class StatsService:
                 stats[f"avg_{key}"] = None
 
         # Derived Metrics
-        if stats.get("avg_energy") and stats.get("avg_valence"):
+        if stats.get("avg_energy") is not None and stats.get("avg_valence") is not None:
             stats["mood_quadrant"] = {
                 "x": round(stats["avg_valence"], 2),
                 "y": round(stats["avg_energy"], 2)
             }
+            # Consistency: Inverse of average standard deviation of Mood components
+            avg_std = (stats["std_energy"] + stats["std_valence"]) / 2
+            stats["consistency_score"] = round(1.0 - avg_std, 2)  # Higher = more consistent
+
+        # Whiplash Scores (Average jump between tracks)
+        stats["whiplash"] = {}
+        for k in ["tempo", "energy"]:
+            if transitions[k]:
+                stats["whiplash"][k] = round(float(np.mean(transitions[k])), 2)
+            else:
+                stats["whiplash"][k] = 0
 
         return stats
 
     def compute_era_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
         """
-        Musical Age and Era Distribution.
+        Includes Nostalgia Gap and granular decade breakdown.
         """
-        query = self.db.query(PlayHistory).filter(
+        # Join track to get raw_data
+        query = self.db.query(PlayHistory).options(joinedload(PlayHistory.track)).filter(
             PlayHistory.played_at >= period_start,
             PlayHistory.played_at <= period_end
         )
         plays = query.all()
 
         years = []
-        track_ids = list(set([p.track_id for p in plays]))
-        tracks = self.db.query(Track).filter(Track.id.in_(track_ids)).all()
-        track_map = {t.id: t for t in tracks}
-
         for p in plays:
-            t = track_map.get(p.track_id)
-            if t and t.raw_data and "album" in t.raw_data and "release_date" in t.raw_data["album"]:
-                rd = t.raw_data["album"]["release_date"]
-                # Format can be YYYY, YYYY-MM, YYYY-MM-DD
-                try:
-                    year = int(rd.split("-")[0])
-                    years.append(year)
-                except:
-                    pass
+            t = p.track
+            if t and t.raw_data and "album" in t.raw_data:
+                rd = t.raw_data["album"].get("release_date")
+                if rd:
+                    try:
+                        years.append(int(rd.split("-")[0]))
+                    except:
+                        pass
 
         if not years:
             return {"musical_age": None}
 
+        # Musical Age (Weighted Average)
         avg_year = sum(years) / len(years)
+        current_year = datetime.utcnow().year
 
-        # Decade breakdown
+        # Decade Distribution
         decades = {}
         for y in years:
             dec = (y // 10) * 10
@@ -329,11 +421,13 @@ class StatsService:
             decades[label] = decades.get(label, 0) + 1
 
         total = len(years)
-        decade_dist = {k: round(v/total, 2) for k, v in decades.items()}
+        dist = {k: round(v / total, 3) for k, v in decades.items()}
 
         return {
             "musical_age": int(avg_year),
-            "decade_distribution": decade_dist
+            "nostalgia_gap": int(current_year - avg_year),
+            "freshness_score": dist.get(f"{int(current_year / 10) * 10}s", 0),  # Share of current decade
+            "decade_distribution": dist
         }
 
     def compute_skip_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
@@ -381,16 +475,191 @@ class StatsService:
             "skip_rate": round(skips / len(plays), 3)
         }
 
-    def generate_full_report(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
+    def compute_context_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
+        """
+        Analyzes context_uri to determine if user listens to Playlists, Albums, or Artists.
+        """
+        query = self.db.query(PlayHistory).filter(
+            PlayHistory.played_at >= period_start,
+            PlayHistory.played_at <= period_end
+        )
+        plays = query.all()
+
+        if not plays:
+            return {}
+
+        context_counts = {"playlist": 0, "album": 0, "artist": 0, "collection": 0, "unknown": 0}
+        unique_contexts = {}
+
+        for p in plays:
+            if not p.context_uri:
+                context_counts["unknown"] += 1
+                continue
+
+            # Count distinct contexts for loyalty
+            unique_contexts[p.context_uri] = unique_contexts.get(p.context_uri, 0) + 1
+
+            if "playlist" in p.context_uri:
+                context_counts["playlist"] += 1
+            elif "album" in p.context_uri:
+                context_counts["album"] += 1
+            elif "artist" in p.context_uri:
+                context_counts["artist"] += 1
+            elif "collection" in p.context_uri:
+                # "Liked Songs" usually shows up as collection
+                context_counts["collection"] += 1
+            else:
+                context_counts["unknown"] += 1
+
+        total = len(plays)
+        breakdown = {k: round(v / total, 2) for k, v in context_counts.items()}
+
+        # Top 5 Contexts (Requires resolving URI to name, possibly missing metadata here)
+        sorted_contexts = sorted(unique_contexts.items(), key=lambda x: x[1], reverse=True)[:5]
+
         return {
-            "period": {
-                "start": period_start.isoformat(),
-                "end": period_end.isoformat()
-            },
+            "type_breakdown": breakdown,
+            "album_purist_score": breakdown.get("album", 0),
+            "playlist_dependency": breakdown.get("playlist", 0),
+            "context_loyalty": round(len(plays) / len(unique_contexts), 2) if unique_contexts else 0,
+            "top_context_uris": [{"uri": k, "count": v} for k, v in sorted_contexts]
+        }
+
+    def compute_taste_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
+        """
+        Mainstream vs. Hipster analysis based on Track.popularity (0-100).
+        """
+        query = self.db.query(PlayHistory).filter(
+            PlayHistory.played_at >= period_start,
+            PlayHistory.played_at <= period_end
+        )
+        plays = query.all()
+        if not plays: return {}
+
+        track_ids = list(set([p.track_id for p in plays]))
+        tracks = self.db.query(Track).filter(Track.id.in_(track_ids)).all()
+        track_map = {t.id: t for t in tracks}
+
+        pop_values = []
+        for p in plays:
+            t = track_map.get(p.track_id)
+            if t and t.popularity is not None:
+                pop_values.append(t.popularity)
+
+        if not pop_values:
+            return {"avg_popularity": 0, "hipster_score": 0}
+
+        avg_pop = float(np.mean(pop_values))
+
+        # Hipster Score: Percentage of tracks with popularity < 30
+        underground_plays = len([x for x in pop_values if x < 30])
+        mainstream_plays = len([x for x in pop_values if x > 70])
+
+        return {
+            "avg_popularity": round(avg_pop, 1),
+            "hipster_score": round((underground_plays / len(pop_values)) * 100, 1),
+            "mainstream_score": round((mainstream_plays / len(pop_values)) * 100, 1),
+            "obscurity_rating": round(100 - avg_pop, 1)
+        }
+
+    def compute_lifecycle_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
+        """
+        Determines if tracks are 'New Discoveries' or 'Old Favorites'.
+        """
+        # 1. Get tracks played in this period
+        current_plays = self.db.query(PlayHistory).filter(
+            PlayHistory.played_at >= period_start,
+            PlayHistory.played_at <= period_end
+        ).all()
+
+        if not current_plays: return {}
+
+        current_track_ids = set([p.track_id for p in current_plays])
+
+        # 2. Check if these tracks were played BEFORE period_start
+        # We find which of the current_track_ids exist in history < period_start
+        old_tracks_query = self.db.query(distinct(PlayHistory.track_id)).filter(
+            PlayHistory.track_id.in_(current_track_ids),
+            PlayHistory.played_at < period_start
+        )
+        old_track_ids = set([r[0] for r in old_tracks_query.all()])
+
+        # 3. Calculate Discovery
+        new_discoveries = current_track_ids - old_track_ids
+        discovery_count = len(new_discoveries)
+
+        # Calculate plays on new discoveries
+        plays_on_new = len([p for p in current_plays if p.track_id in new_discoveries])
+        total_plays = len(current_plays)
+
+        return {
+            "discovery_count": discovery_count,
+            "discovery_rate": round(plays_on_new / total_plays, 3) if total_plays > 0 else 0,
+            "recurrence_rate": round((total_plays - plays_on_new) / total_plays, 3) if total_plays > 0 else 0
+        }
+
+    def compute_explicit_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
+        """
+        Analyzes explicit content consumption.
+        """
+        query = self.db.query(PlayHistory).options(joinedload(PlayHistory.track)).filter(
+            PlayHistory.played_at >= period_start,
+            PlayHistory.played_at <= period_end
+        )
+        plays = query.all()
+
+        if not plays: return {"explicit_rate": 0, "hourly_explicit_rate": []}
+
+        total_plays = len(plays)
+        explicit_count = 0
+        hourly_explicit = [0] * 24
+        hourly_total = [0] * 24
+
+        for p in plays:
+            h = p.played_at.hour
+            hourly_total[h] += 1
+
+            # Check raw_data for explicit flag
+            t = p.track
+            is_explicit = False
+            if t.raw_data and t.raw_data.get("explicit"):
+                is_explicit = True
+
+            if is_explicit:
+                explicit_count += 1
+                hourly_explicit[h] += 1
+
+        # Calculate hourly percentages
+        hourly_rates = []
+        for i in range(24):
+            if hourly_total[i] > 0:
+                hourly_rates.append(round(hourly_explicit[i] / hourly_total[i], 2))
+            else:
+                hourly_rates.append(0.0)
+
+        return {
+            "explicit_rate": round(explicit_count / total_plays, 3),
+            "total_explicit_plays": explicit_count,
+            "hourly_explicit_distribution": hourly_rates
+        }
+
+    def generate_full_report(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
+        # 1. Calculate all current stats
+        current_stats = {
+            "period": {"start": period_start.isoformat(), "end": period_end.isoformat()},
             "volume": self.compute_volume_stats(period_start, period_end),
             "time_habits": self.compute_time_stats(period_start, period_end),
             "sessions": self.compute_session_stats(period_start, period_end),
+            "context": self.compute_context_stats(period_start, period_end),
             "vibe": self.compute_vibe_stats(period_start, period_end),
             "era": self.compute_era_stats(period_start, period_end),
+            "taste": self.compute_taste_stats(period_start, period_end),
+            "lifecycle": self.compute_lifecycle_stats(period_start, period_end),
+            "flags": self.compute_explicit_stats(period_start, period_end),
             "skips": self.compute_skip_stats(period_start, period_end)
         }
+
+        # 2. Calculate Comparison
+        current_stats["comparison"] = self.compute_comparison(current_stats, period_start, period_end)
+
+        return current_stats
diff --git a/backend/run_scheduler.py b/backend/run_scheduler.py
new file mode 100644
index 0000000..8170965
--- /dev/null
+++ b/backend/run_scheduler.py
@@ -0,0 +1,16 @@
+import schedule
+import time
+from run_analysis import run_analysis_pipeline
+
+def job():
+    print("Running daily analysis...")
+    # Analyze last 24 hours
+    run_analysis_pipeline(days=1)
+
+# Schedule for 03:00 AM
+schedule.every().day.at("03:00").do(job)
+
+print("Scheduler started...")
+while True:
+    schedule.run_pending()
+    time.sleep(60)
\ No newline at end of file