MusicAnalyser/backend/app/services/narrative_service.py

import os
import json
import re
import google.generativeai as genai
from typing import Dict, Any, List, Optional

class NarrativeService:
    def __init__(self, model_name: str = "gemini-2.0-flash-exp"):
        self.api_key = os.getenv("GEMINI_API_KEY")
        if not self.api_key:
            print("WARNING: GEMINI_API_KEY not found. LLM features will fail.")
        else:
            genai.configure(api_key=self.api_key)

        self.model_name = model_name

    def generate_full_narrative(self, stats_json: Dict[str, Any]) -> Dict[str, Any]:
        """
        Orchestrates the generation of the full narrative report.
        Currently uses a single call for consistency and speed.
        """
        if not self.api_key:
            return self._get_fallback_narrative()

        clean_stats = self._shape_payload(stats_json)

        prompt = f"""
You are a witty, insightful, and slightly snarky music critic analyzing a user's Spotify listening data.
Your goal is to generate a JSON report that acts as a deeper, more honest "Spotify Wrapped".

**CORE RULES:**
1. **NO Mental Health Diagnoses:** Do not mention depression, anxiety, or therapy. Stick to behavioral descriptors (e.g., "introspective", "high-energy").
2. **Be Specific:** Use the provided metrics. Don't say "You like pop," say "Your Mainstream Score of 85% suggests..."
3. **Roast Gently:** Be playful but not cruel.
4. **JSON Output Only:** Return strictly valid JSON.

**DATA TO ANALYZE:**
{json.dumps(clean_stats, indent=2)}

**REQUIRED JSON STRUCTURE:**
{{
  "vibe_check": "2-3 paragraphs describing their overall listening personality this period.",
  "patterns": ["Observation 1", "Observation 2", "Observation 3 (Look for specific habits like skipping or late-night sessions)"],
  "persona": "A creative label (e.g., 'The Genre Chameleon', 'Nostalgic Dad-Rocker').",
  "era_insight": "A specific comment on their Musical Age ({clean_stats.get('era', {}).get('musical_age', 'N/A')}) and Nostalgia Gap.",
  "roast": "A 1-2 sentence playful roast about their taste.",
  "comparison": "A short comment comparing this period to the previous one (if data exists)."
}}
"""
        try:
            model = genai.GenerativeModel(self.model_name)
            # Use JSON mode if available, otherwise rely on prompt + cleaning
            response = model.generate_content(
                prompt,
                generation_config={"response_mime_type": "application/json"}
            )

            return self._clean_and_parse_json(response.text)

        except Exception as e:
            print(f"LLM Generation Error: {e}")
            return self._get_fallback_narrative()

    def _shape_payload(self, stats: Dict[str, Any]) -> Dict[str, Any]:
        """
        Compresses the stats JSON to save tokens and focus the LLM.
        Removes raw lists beyond top 5/10.
        """
        s = stats.copy()

        # Simplify Volume
        if "volume" in s:
            s["volume"] = {
                k: v for k, v in s["volume"].items()
                if k not in ["top_tracks", "top_artists", "top_albums", "top_genres"]
            }
            # Add back condensed top lists (just names)
            s["volume"]["top_tracks"] = [t["name"] for t in stats["volume"].get("top_tracks", [])[:5]]
            s["volume"]["top_artists"] = [a["name"] for a in stats["volume"].get("top_artists", [])[:5]]
            s["volume"]["top_genres"] = [g["name"] for g in stats["volume"].get("top_genres", [])[:5]]

        # Simplify Time (Keep distributions but maybe round them?)
        # Keeping hourly/daily is fine, they are small arrays.

        # Simplify Vibe (Remove huge transition arrays if they accidentally leaked, though stats service handles this)

        # Remove period details if verbose
        return s

    def _clean_and_parse_json(self, raw_text: str) -> Dict[str, Any]:
        """
        Robust JSON extractor.
        """
        try:
            # 1. Try direct parse
            return json.loads(raw_text)
        except json.JSONDecodeError:
            pass

        # 2. Extract between first { and last }
        try:
            match = re.search(r"\{.*\}", raw_text, re.DOTALL)
            if match:
                return json.loads(match.group(0))
        except:
            pass

        return self._get_fallback_narrative()

    def _get_fallback_narrative(self) -> Dict[str, Any]:
        return {
            "vibe_check": "Data processing error. You're too mysterious for us to analyze right now.",
            "patterns": [],
            "persona": "The Enigma",
            "era_insight": "Time is a flat circle.",
            "roast": "You broke the machine. Congratulations.",
            "comparison": "N/A"
        }

    # Individual accessors if needed by frontend, though full_narrative is preferred
    def generate_vibe_check(self, stats): return self.generate_full_narrative(stats).get("vibe_check")
    def identify_patterns(self, stats): return self.generate_full_narrative(stats).get("patterns")
    def generate_persona(self, stats): return self.generate_full_narrative(stats).get("persona")
    def generate_roast(self, stats): return self.generate_full_narrative(stats).get("roast")