Refactor Stats and Narrative services to match spec

- StatsService: Fixed N+1 queries, added missing metrics (whiplash, entropy, lifecycle), and improved correctness (boundary checks, null handling). - NarrativeService: Added payload shaping for token efficiency, improved JSON robustness, and updated prompts to align with persona specs. - Documentation: Added backend/TECHNICAL_DOCS.md detailing the logic.
2026-02-25 11:46:07 +00:00 · 2025-12-25 18:12:05 +04:00
parent 508d001d7e
commit af0d985253
3 changed files with 410 additions and 202 deletions
--- a/backend/app/services/narrative_service.py
+++ b/backend/app/services/narrative_service.py
@@ -1,10 +1,11 @@
 import os
 import json
+import re
 import google.generativeai as genai
-from typing import Dict, Any
+from typing import Dict, Any, List, Optional

 class NarrativeService:
-    def __init__(self, model_name: str = "gemini-2.5-flash"):
+    def __init__(self, model_name: str = "gemini-2.0-flash-exp"):
        self.api_key = os.getenv("GEMINI_API_KEY")
        if not self.api_key:
            print("WARNING: GEMINI_API_KEY not found. LLM features will fail.")
@@ -13,47 +14,111 @@ class NarrativeService:

        self.model_name = model_name

-    def generate_narrative(self, stats_json: Dict[str, Any]) -> Dict[str, str]:
+    def generate_full_narrative(self, stats_json: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Orchestrates the generation of the full narrative report.
+        Currently uses a single call for consistency and speed.
+        """
        if not self.api_key:
-            return {"error": "Missing API Key"}
+            return self._get_fallback_narrative()

+        clean_stats = self._shape_payload(stats_json)
+        
        prompt = f"""
-You are a witty, insightful, and slightly snarky music critic analyzing a user's listening history. 
-Below is a JSON summary of their listening data. 
+You are a witty, insightful, and slightly snarky music critic analyzing a user's Spotify listening data.
+Your goal is to generate a JSON report that acts as a deeper, more honest "Spotify Wrapped".

-Your goal is to generate a report that feels like a 'Spotify Wrapped' but deeper and more honest.
+**CORE RULES:**
+1. **NO Mental Health Diagnoses:** Do not mention depression, anxiety, or therapy. Stick to behavioral descriptors (e.g., "introspective", "high-energy").
+2. **Be Specific:** Use the provided metrics. Don't say "You like pop," say "Your Mainstream Score of 85% suggests..."
+3. **Roast Gently:** Be playful but not cruel.
+4. **JSON Output Only:** Return strictly valid JSON.

-Please output your response in strict JSON format with the following keys:
-1. "vibe_check": (String) 2-3 paragraphs describing their overall listening personality.
-2. "patterns": (List of Strings) 3-5 specific observations based on the data (e.g., "You listen to sad music on Tuesdays", "Your Whiplash Score is high").
-3. "persona": (String) A creative label for the user (e.g., "The Genre Chameleon", "Nostalgic Dad-Rocker", "Algorithm Victim").
-4. "roast": (String) A playful, harmlessly mean roast about their taste (1-2 sentences).
-5. "era_insight": (String) A specific comment on their 'Musical Age' and 'Nostalgia Gap'.
+**DATA TO ANALYZE:**
+{json.dumps(clean_stats, indent=2)}

-GUIDELINES:
- **Use the Metrics:** Do not just say "You like pop." Say "Your Mainstream Score of 85% suggests you live on the Top 40."
- **Whiplash Score:** If 'whiplash' > 20, comment on their chaotic transitions.
- **Hipster Score:** If 'hipster_score' > 50, call them pretentious; if < 10, call them basic.
- **Comparison:** Use the 'comparison' block to mention if they are listening more/less or if their mood (valence/energy) has shifted.
- **Tone:** Conversational, fun, slightly judgmental but good-natured.
-
-DATA:
-{json.dumps(stats_json, indent=2)}
-
-OUTPUT (JSON):
+**REQUIRED JSON STRUCTURE:**
+{{
+  "vibe_check": "2-3 paragraphs describing their overall listening personality this period.",
+  "patterns": ["Observation 1", "Observation 2", "Observation 3 (Look for specific habits like skipping or late-night sessions)"],
+  "persona": "A creative label (e.g., 'The Genre Chameleon', 'Nostalgic Dad-Rocker').",
+  "era_insight": "A specific comment on their Musical Age ({clean_stats.get('era', {}).get('musical_age', 'N/A')}) and Nostalgia Gap.",
+  "roast": "A 1-2 sentence playful roast about their taste.",
+  "comparison": "A short comment comparing this period to the previous one (if data exists)."
+}}
 """
        try:
            model = genai.GenerativeModel(self.model_name)
-            response = model.generate_content(prompt)
-
-            # Clean up response to ensure valid JSON
-            text = response.text.strip()
-            if text.startswith("```json"):
-                text = text.replace("```json", "").replace("```", "")
-            elif text.startswith("```"):
-                 text = text.replace("```", "")
-
-            return json.loads(text)
+            # Use JSON mode if available, otherwise rely on prompt + cleaning
+            response = model.generate_content(
+                prompt,
+                generation_config={"response_mime_type": "application/json"}
+            )
+            
+            return self._clean_and_parse_json(response.text)

        except Exception as e:
-            return {"error": str(e), "raw_response": "Error generating narrative."}
+            print(f"LLM Generation Error: {e}")
+            return self._get_fallback_narrative()
+
+    def _shape_payload(self, stats: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Compresses the stats JSON to save tokens and focus the LLM.
+        Removes raw lists beyond top 5/10.
+        """
+        s = stats.copy()
+        
+        # Simplify Volume
+        if "volume" in s:
+            s["volume"] = {
+                k: v for k, v in s["volume"].items() 
+                if k not in ["top_tracks", "top_artists", "top_albums", "top_genres"]
+            }
+            # Add back condensed top lists (just names)
+            s["volume"]["top_tracks"] = [t["name"] for t in stats["volume"].get("top_tracks", [])[:5]]
+            s["volume"]["top_artists"] = [a["name"] for a in stats["volume"].get("top_artists", [])[:5]]
+            s["volume"]["top_genres"] = [g["name"] for g in stats["volume"].get("top_genres", [])[:5]]
+
+        # Simplify Time (Keep distributions but maybe round them?)
+        # Keeping hourly/daily is fine, they are small arrays.
+        
+        # Simplify Vibe (Remove huge transition arrays if they accidentally leaked, though stats service handles this)
+        
+        # Remove period details if verbose
+        return s
+
+    def _clean_and_parse_json(self, raw_text: str) -> Dict[str, Any]:
+        """
+        Robust JSON extractor.
+        """
+        try:
+            # 1. Try direct parse
+            return json.loads(raw_text)
+        except json.JSONDecodeError:
+            pass
+
+        # 2. Extract between first { and last }
+        try:
+            match = re.search(r"\{.*\}", raw_text, re.DOTALL)
+            if match:
+                return json.loads(match.group(0))
+        except:
+            pass
+
+        return self._get_fallback_narrative()
+
+    def _get_fallback_narrative(self) -> Dict[str, Any]:
+        return {
+            "vibe_check": "Data processing error. You're too mysterious for us to analyze right now.",
+            "patterns": [],
+            "persona": "The Enigma",
+            "era_insight": "Time is a flat circle.",
+            "roast": "You broke the machine. Congratulations.",
+            "comparison": "N/A"
+        }
+
+    # Individual accessors if needed by frontend, though full_narrative is preferred
+    def generate_vibe_check(self, stats): return self.generate_full_narrative(stats).get("vibe_check")
+    def identify_patterns(self, stats): return self.generate_full_narrative(stats).get("patterns")
+    def generate_persona(self, stats): return self.generate_full_narrative(stats).get("persona")
+    def generate_roast(self, stats): return self.generate_full_narrative(stats).get("roast")