Add skip tracking, compressed heatmap, listening log, docs, tests, and OpenAI support

Major changes:
- Add skip tracking: poll currently-playing every 15s, detect skips (<30s listened)
- Add listening-log and sessions API endpoints
- Fix ReccoBeats client to extract spotify_id from href response
- Compress heatmap from 24 hours to 6 x 4-hour blocks
- Add OpenAI support in narrative service (use max_completion_tokens for new models)
- Add ListeningLog component with timeline and list views
- Update all frontend components to use real data (album art, play counts)
- Add docker-compose external network (dockernet) support
- Add comprehensive documentation (API, DATA_MODEL, ARCHITECTURE, FRONTEND)
- Add unit tests for ingest and API endpoints
This commit is contained in:
bnair123
2025-12-30 00:15:01 +04:00
parent faee830545
commit 887e78bf47
26 changed files with 1942 additions and 662 deletions

View File

@@ -1,6 +1,6 @@
import asyncio
import os
from datetime import datetime
from datetime import datetime, timedelta
from sqlalchemy.orm import Session
from .models import Track, PlayHistory, Artist
from .database import SessionLocal
@@ -9,6 +9,17 @@ from .services.reccobeats_client import ReccoBeatsClient
from .services.genius_client import GeniusClient
from dateutil import parser
class PlaybackTracker:
def __init__(self):
self.current_track_id = None
self.track_start_time = None
self.accumulated_listen_ms = 0
self.last_progress_ms = 0
self.last_poll_time = None
self.is_paused = False
# Initialize Clients
def get_spotify_client():
return SpotifyClient(
@@ -17,12 +28,15 @@ def get_spotify_client():
refresh_token=os.getenv("SPOTIFY_REFRESH_TOKEN"),
)
def get_reccobeats_client():
return ReccoBeatsClient()
def get_genius_client():
return GeniusClient()
async def ensure_artists_exist(db: Session, artists_data: list):
"""
Ensures that all artists in the list exist in the Artist table.
@@ -36,18 +50,19 @@ async def ensure_artists_exist(db: Session, artists_data: list):
img = None
if "images" in a_data and a_data["images"]:
img = a_data["images"][0]["url"]
artist = Artist(
id=artist_id,
name=a_data["name"],
genres=[],
image_url=img
)
artist = Artist(id=artist_id, name=a_data["name"], genres=[], image_url=img)
db.add(artist)
artist_objects.append(artist)
return artist_objects
async def enrich_tracks(db: Session, spotify_client: SpotifyClient, recco_client: ReccoBeatsClient, genius_client: GeniusClient):
async def enrich_tracks(
db: Session,
spotify_client: SpotifyClient,
recco_client: ReccoBeatsClient,
genius_client: GeniusClient,
):
"""
Enrichment Pipeline:
1. Audio Features (ReccoBeats)
@@ -56,18 +71,19 @@ async def enrich_tracks(db: Session, spotify_client: SpotifyClient, recco_client
"""
# 1. Enrich Audio Features
tracks_missing_features = db.query(Track).filter(Track.danceability == None).limit(50).all()
tracks_missing_features = (
db.query(Track).filter(Track.danceability == None).limit(50).all()
)
if tracks_missing_features:
print(f"Enriching {len(tracks_missing_features)} tracks with audio features...")
ids = [t.id for t in tracks_missing_features]
features_list = await recco_client.get_audio_features(ids)
# Map features by ID
features_map = {}
for f in features_list:
# Handle potential ID mismatch or URI format
tid = f.get("id")
if tid: features_map[tid] = f
tid = f.get("spotify_id") or f.get("id")
if tid:
features_map[tid] = f
for track in tracks_missing_features:
data = features_map.get(track.id)
@@ -83,61 +99,74 @@ async def enrich_tracks(db: Session, spotify_client: SpotifyClient, recco_client
track.liveness = data.get("liveness")
track.valence = data.get("valence")
track.tempo = data.get("tempo")
db.commit()
# 2. Enrich Artist Genres & Images (Spotify)
artists_missing_data = db.query(Artist).filter((Artist.genres == None) | (Artist.image_url == None)).limit(50).all()
artists_missing_data = (
db.query(Artist)
.filter((Artist.genres == None) | (Artist.image_url == None))
.limit(50)
.all()
)
if artists_missing_data:
print(f"Enriching {len(artists_missing_data)} artists with genres/images...")
artist_ids_list = [a.id for a in artists_missing_data]
artist_data_map = {}
for i in range(0, len(artist_ids_list), 50):
chunk = artist_ids_list[i:i+50]
chunk = artist_ids_list[i : i + 50]
artists_data = await spotify_client.get_artists(chunk)
for a_data in artists_data:
if a_data:
img = a_data["images"][0]["url"] if a_data.get("images") else None
artist_data_map[a_data["id"]] = {
"genres": a_data.get("genres", []),
"image_url": img
"image_url": img,
}
for artist in artists_missing_data:
data = artist_data_map.get(artist.id)
if data:
if artist.genres is None: artist.genres = data["genres"]
if artist.image_url is None: artist.image_url = data["image_url"]
if artist.genres is None:
artist.genres = data["genres"]
if artist.image_url is None:
artist.image_url = data["image_url"]
elif artist.genres is None:
artist.genres = [] # Prevent retry loop
artist.genres = [] # Prevent retry loop
db.commit()
# 3. Enrich Lyrics (Genius)
# Only fetch for tracks that have been played recently to avoid spamming Genius API
tracks_missing_lyrics = db.query(Track).filter(Track.lyrics == None).order_by(Track.updated_at.desc()).limit(10).all()
tracks_missing_lyrics = (
db.query(Track)
.filter(Track.lyrics == None)
.order_by(Track.updated_at.desc())
.limit(10)
.all()
)
if tracks_missing_lyrics and genius_client.genius:
print(f"Enriching {len(tracks_missing_lyrics)} tracks with lyrics (Genius)...")
for track in tracks_missing_lyrics:
# We need the primary artist name
artist_name = track.artist.split(",")[0] # Heuristic: take first artist
artist_name = track.artist.split(",")[0] # Heuristic: take first artist
print(f"Searching Genius for: {track.name} by {artist_name}")
data = genius_client.search_song(track.name, artist_name)
if data:
track.lyrics = data["lyrics"]
# Fallback: if we didn't get high-res art from Spotify, use Genius
if not track.image_url and data.get("image_url"):
track.image_url = data["image_url"]
else:
track.lyrics = "" # Mark as empty to prevent retry loop
track.lyrics = "" # Mark as empty to prevent retry loop
# Small sleep to be nice to API? GeniusClient is synchronous.
# We are in async function but GeniusClient is blocking. It's fine for worker.
db.commit()
@@ -164,7 +193,7 @@ async def ingest_recently_played(db: Session):
if not track:
print(f"New track found: {track_data['name']}")
# Extract Album Art
image_url = None
if track_data.get("album") and track_data["album"].get("images"):
@@ -173,12 +202,12 @@ async def ingest_recently_played(db: Session):
track = Track(
id=track_id,
name=track_data["name"],
artist=", ".join([a["name"] for a in track_data["artists"]]),
artist=", ".join([a["name"] for a in track_data["artists"]]),
album=track_data["album"]["name"],
image_url=image_url,
duration_ms=track_data["duration_ms"],
popularity=track_data["popularity"],
raw_data=track_data
raw_data=track_data,
)
# Handle Artists Relation
@@ -191,21 +220,27 @@ async def ingest_recently_played(db: Session):
# Ensure relationships exist logic...
if not track.artists and track.raw_data and "artists" in track.raw_data:
artist_objects = await ensure_artists_exist(db, track.raw_data["artists"])
track.artists = artist_objects
db.commit()
artist_objects = await ensure_artists_exist(db, track.raw_data["artists"])
track.artists = artist_objects
db.commit()
exists = db.query(PlayHistory).filter(
PlayHistory.track_id == track_id,
PlayHistory.played_at == played_at
).first()
exists = (
db.query(PlayHistory)
.filter(
PlayHistory.track_id == track_id, PlayHistory.played_at == played_at
)
.first()
)
if not exists:
print(f" recording play: {track_data['name']} at {played_at}")
play = PlayHistory(
track_id=track_id,
played_at=played_at,
context_uri=item.get("context", {}).get("uri") if item.get("context") else None
context_uri=item.get("context", {}).get("uri")
if item.get("context")
else None,
source="recently_played",
)
db.add(play)
@@ -214,17 +249,145 @@ async def ingest_recently_played(db: Session):
# Enrich
await enrich_tracks(db, spotify_client, recco_client, genius_client)
async def run_worker():
"""Simulates a background worker loop."""
db = SessionLocal()
tracker = PlaybackTracker()
spotify_client = get_spotify_client()
poll_count = 0
try:
while True:
print("Worker: Polling Spotify...")
await ingest_recently_played(db)
print("Worker: Sleeping for 60 seconds...")
await asyncio.sleep(60)
poll_count += 1
await poll_currently_playing(db, spotify_client, tracker)
if poll_count % 4 == 0:
print("Worker: Polling recently-played...")
await ingest_recently_played(db)
await asyncio.sleep(15)
except Exception as e:
print(f"Worker crashed: {e}")
finally:
db.close()
async def poll_currently_playing(
db: Session, spotify_client: SpotifyClient, tracker: PlaybackTracker
):
try:
response = await spotify_client.get_currently_playing()
except Exception as e:
print(f"Error polling currently-playing: {e}")
return
now = datetime.utcnow()
if not response or response.get("currently_playing_type") != "track":
if tracker.current_track_id and tracker.last_poll_time:
finalize_track(db, tracker)
return
item = response.get("item")
if not item:
return
current_track_id = item["id"]
current_progress_ms = response.get("progress_ms", 0)
is_playing = response.get("is_playing", False)
if current_track_id != tracker.current_track_id:
if tracker.current_track_id and tracker.last_poll_time:
finalize_track(db, tracker)
tracker.current_track_id = current_track_id
tracker.track_start_time = now - timedelta(milliseconds=current_progress_ms)
tracker.accumulated_listen_ms = current_progress_ms if is_playing else 0
tracker.last_progress_ms = current_progress_ms
tracker.last_poll_time = now
tracker.is_paused = not is_playing
await ensure_track_exists(db, item, spotify_client)
else:
if tracker.last_poll_time:
time_delta_ms = (now - tracker.last_poll_time).total_seconds() * 1000
if is_playing and not tracker.is_paused:
tracker.accumulated_listen_ms += time_delta_ms
tracker.last_progress_ms = current_progress_ms
tracker.last_poll_time = now
tracker.is_paused = not is_playing
def finalize_track(db: Session, tracker: PlaybackTracker):
listened_ms = int(tracker.accumulated_listen_ms)
skipped = listened_ms < 30000
existing = (
db.query(PlayHistory)
.filter(
PlayHistory.track_id == tracker.current_track_id,
PlayHistory.played_at >= tracker.track_start_time - timedelta(seconds=5),
PlayHistory.played_at <= tracker.track_start_time + timedelta(seconds=5),
)
.first()
)
if existing:
if existing.listened_ms is None:
existing.listened_ms = listened_ms
existing.skipped = skipped
existing.source = "currently_playing"
db.commit()
else:
play = PlayHistory(
track_id=tracker.current_track_id,
played_at=tracker.track_start_time,
listened_ms=listened_ms,
skipped=skipped,
source="currently_playing",
)
db.add(play)
db.commit()
print(
f"Finalized: {tracker.current_track_id} listened={listened_ms}ms skipped={skipped}"
)
tracker.current_track_id = None
tracker.track_start_time = None
tracker.accumulated_listen_ms = 0
tracker.last_progress_ms = 0
tracker.last_poll_time = None
tracker.is_paused = False
async def ensure_track_exists(
db: Session, track_data: dict, spotify_client: SpotifyClient
):
track_id = track_data["id"]
track = db.query(Track).filter(Track.id == track_id).first()
if not track:
image_url = None
if track_data.get("album") and track_data["album"].get("images"):
image_url = track_data["album"]["images"][0]["url"]
track = Track(
id=track_id,
name=track_data["name"],
artist=", ".join([a["name"] for a in track_data.get("artists", [])]),
album=track_data.get("album", {}).get("name", "Unknown"),
image_url=image_url,
duration_ms=track_data.get("duration_ms"),
popularity=track_data.get("popularity"),
raw_data=track_data,
)
artists_data = track_data.get("artists", [])
artist_objects = await ensure_artists_exist(db, artists_data)
track.artists = artist_objects
db.add(track)
db.commit()

View File

@@ -1,11 +1,15 @@
from fastapi import FastAPI, Depends, HTTPException, BackgroundTasks
from sqlalchemy.orm import Session
from fastapi import FastAPI, Depends, HTTPException, BackgroundTasks, Query
from sqlalchemy.orm import Session, joinedload
from datetime import datetime, timedelta
from typing import List, Optional
from dotenv import load_dotenv
from .database import engine, Base, get_db
from .models import PlayHistory as PlayHistoryModel, Track as TrackModel, AnalysisSnapshot
from .models import (
PlayHistory as PlayHistoryModel,
Track as TrackModel,
AnalysisSnapshot,
)
from . import schemas
from .ingest import ingest_recently_played
from .services.stats_service import StatsService
@@ -13,7 +17,6 @@ from .services.narrative_service import NarrativeService
load_dotenv()
# Create tables
Base.metadata.create_all(bind=engine)
from fastapi.middleware.cors import CORSMiddleware
@@ -22,37 +25,49 @@ app = FastAPI(title="Music Analyser Backend")
app.add_middleware(
CORSMiddleware,
allow_origins=["http://localhost:5173"],
allow_origins=["http://localhost:5173", "http://localhost:8991"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.get("/")
def read_root():
return {"status": "ok", "message": "Music Analyser API is running"}
@app.get("/history", response_model=List[schemas.PlayHistory])
def get_history(limit: int = 50, db: Session = Depends(get_db)):
history = db.query(PlayHistoryModel).order_by(PlayHistoryModel.played_at.desc()).limit(limit).all()
history = (
db.query(PlayHistoryModel)
.order_by(PlayHistoryModel.played_at.desc())
.limit(limit)
.all()
)
return history
@app.get("/tracks", response_model=List[schemas.Track])
def get_tracks(limit: int = 50, db: Session = Depends(get_db)):
tracks = db.query(TrackModel).limit(limit).all()
return tracks
@app.post("/trigger-ingest")
async def trigger_ingest(background_tasks: BackgroundTasks, db: Session = Depends(get_db)):
async def trigger_ingest(
background_tasks: BackgroundTasks, db: Session = Depends(get_db)
):
"""Triggers Spotify ingestion in the background."""
background_tasks.add_task(ingest_recently_played, db)
return {"status": "Ingestion started in background"}
@app.post("/trigger-analysis")
def trigger_analysis(
days: int = 30,
model_name: str = "gemini-2.5-flash",
db: Session = Depends(get_db)
model_name: str = "gpt-5-mini-2025-08-07",
db: Session = Depends(get_db),
):
"""
Runs the full analysis pipeline (Stats + LLM) for the last X days.
@@ -67,7 +82,9 @@ def trigger_analysis(
stats_json = stats_service.generate_full_report(start_date, end_date)
if stats_json["volume"]["total_plays"] == 0:
raise HTTPException(status_code=404, detail="No plays found in the specified period.")
raise HTTPException(
status_code=404, detail="No plays found in the specified period."
)
narrative_service = NarrativeService(model_name=model_name)
narrative_json = narrative_service.generate_full_narrative(stats_json)
@@ -79,7 +96,7 @@ def trigger_analysis(
period_label=f"last_{days}_days",
metrics_payload=stats_json,
narrative_report=narrative_json,
model_used=model_name
model_used=model_name,
)
db.add(snapshot)
db.commit()
@@ -90,7 +107,7 @@ def trigger_analysis(
"snapshot_id": snapshot.id,
"period": {"start": start_date, "end": end_date},
"metrics": stats_json,
"narrative": narrative_json
"narrative": narrative_json,
}
except HTTPException:
@@ -99,7 +116,91 @@ def trigger_analysis(
print(f"Analysis Failed: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.get("/snapshots")
def get_snapshots(limit: int = 10, db: Session = Depends(get_db)):
"""Retrieve past analysis snapshots."""
return db.query(AnalysisSnapshot).order_by(AnalysisSnapshot.date.desc()).limit(limit).all()
return (
db.query(AnalysisSnapshot)
.order_by(AnalysisSnapshot.date.desc())
.limit(limit)
.all()
)
@app.get("/listening-log")
def get_listening_log(
days: int = Query(default=7, ge=1, le=365),
limit: int = Query(default=200, ge=1, le=1000),
db: Session = Depends(get_db),
):
end_date = datetime.utcnow()
start_date = end_date - timedelta(days=days)
plays = (
db.query(PlayHistoryModel)
.options(joinedload(PlayHistoryModel.track))
.filter(
PlayHistoryModel.played_at >= start_date,
PlayHistoryModel.played_at <= end_date,
)
.order_by(PlayHistoryModel.played_at.desc())
.limit(limit)
.all()
)
result = []
for i, play in enumerate(plays):
track = play.track
listened_ms = play.listened_ms
skipped = play.skipped
if listened_ms is None and i < len(plays) - 1:
next_play = plays[i + 1]
diff_seconds = (play.played_at - next_play.played_at).total_seconds()
if track and track.duration_ms:
duration_sec = track.duration_ms / 1000.0
listened_ms = int(min(diff_seconds, duration_sec) * 1000)
skipped = diff_seconds < 30
result.append(
{
"id": play.id,
"track_id": play.track_id,
"track_name": track.name if track else "Unknown",
"artist": track.artist if track else "Unknown",
"album": track.album if track else "Unknown",
"image": track.image_url if track else None,
"played_at": play.played_at.isoformat(),
"duration_ms": track.duration_ms if track else 0,
"listened_ms": listened_ms,
"skipped": skipped,
"context_uri": play.context_uri,
"source": play.source,
}
)
return {
"plays": result,
"period": {"start": start_date.isoformat(), "end": end_date.isoformat()},
}
@app.get("/sessions")
def get_sessions(
days: int = Query(default=7, ge=1, le=365), db: Session = Depends(get_db)
):
end_date = datetime.utcnow()
start_date = end_date - timedelta(days=days)
stats_service = StatsService(db)
session_stats = stats_service.compute_session_stats(start_date, end_date)
return {
"sessions": session_stats.get("session_list", []),
"summary": {
"count": session_stats.get("count", 0),
"avg_minutes": session_stats.get("avg_minutes", 0),
"micro_rate": session_stats.get("micro_session_rate", 0),
"marathon_rate": session_stats.get("marathon_session_rate", 0),
},
}

View File

@@ -1,35 +1,50 @@
from sqlalchemy import Column, Integer, String, DateTime, JSON, ForeignKey, Float, Table, Text
from sqlalchemy import (
Boolean,
Column,
Integer,
String,
DateTime,
JSON,
ForeignKey,
Float,
Table,
Text,
)
from sqlalchemy.orm import relationship
from datetime import datetime
from .database import Base
# Association Table for Many-to-Many Relationship between Track and Artist
track_artists = Table(
'track_artists',
"track_artists",
Base.metadata,
Column('track_id', String, ForeignKey('tracks.id'), primary_key=True),
Column('artist_id', String, ForeignKey('artists.id'), primary_key=True)
Column("track_id", String, ForeignKey("tracks.id"), primary_key=True),
Column("artist_id", String, ForeignKey("artists.id"), primary_key=True),
)
class Artist(Base):
__tablename__ = "artists"
id = Column(String, primary_key=True, index=True) # Spotify ID
id = Column(String, primary_key=True, index=True) # Spotify ID
name = Column(String)
genres = Column(JSON, nullable=True) # List of genre strings
image_url = Column(String, nullable=True) # Artist profile image
genres = Column(JSON, nullable=True) # List of genre strings
image_url = Column(String, nullable=True) # Artist profile image
# Relationships
tracks = relationship("Track", secondary=track_artists, back_populates="artists")
class Track(Base):
__tablename__ = "tracks"
id = Column(String, primary_key=True, index=True) # Spotify ID
id = Column(String, primary_key=True, index=True) # Spotify ID
name = Column(String)
artist = Column(String) # Display string (e.g. "Drake, Future") - kept for convenience
artist = Column(
String
) # Display string (e.g. "Drake, Future") - kept for convenience
album = Column(String)
image_url = Column(String, nullable=True) # Album art
image_url = Column(String, nullable=True) # Album art
duration_ms = Column(Integer)
popularity = Column(Integer, nullable=True)
@@ -55,7 +70,7 @@ class Track(Base):
genres = Column(JSON, nullable=True)
# AI Analysis fields
lyrics = Column(Text, nullable=True) # Full lyrics from Genius
lyrics = Column(Text, nullable=True) # Full lyrics from Genius
lyrics_summary = Column(String, nullable=True)
genre_tags = Column(String, nullable=True)
@@ -71,11 +86,13 @@ class PlayHistory(Base):
id = Column(Integer, primary_key=True, index=True)
track_id = Column(String, ForeignKey("tracks.id"))
played_at = Column(DateTime, index=True) # The timestamp from Spotify
# Context (album, playlist, etc.)
played_at = Column(DateTime, index=True)
context_uri = Column(String, nullable=True)
listened_ms = Column(Integer, nullable=True)
skipped = Column(Boolean, nullable=True)
source = Column(String, nullable=True)
track = relationship("Track", back_populates="plays")
@@ -84,16 +101,19 @@ class AnalysisSnapshot(Base):
Stores the computed statistics and LLM analysis for a given period.
Allows for trend analysis over time.
"""
__tablename__ = "analysis_snapshots"
id = Column(Integer, primary_key=True, index=True)
date = Column(DateTime, default=datetime.utcnow, index=True) # When the analysis was run
date = Column(
DateTime, default=datetime.utcnow, index=True
) # When the analysis was run
period_start = Column(DateTime)
period_end = Column(DateTime)
period_label = Column(String) # e.g., "last_30_days", "monthly_nov_2023"
period_label = Column(String) # e.g., "last_30_days", "monthly_nov_2023"
# The heavy lifting: stored as JSON blobs
metrics_payload = Column(JSON) # The input to the LLM (StatsService output)
narrative_report = Column(JSON) # The output from the LLM (NarrativeService output)
metrics_payload = Column(JSON) # The input to the LLM (StatsService output)
narrative_report = Column(JSON) # The output from the LLM (NarrativeService output)
model_used = Column(String, nullable=True) # e.g. "gemini-1.5-flash"
model_used = Column(String, nullable=True) # e.g. "gemini-1.5-flash"

View File

@@ -1,101 +1,154 @@
import os
import json
import re
from google import genai
from typing import Dict, Any, List, Optional
from typing import Dict, Any
try:
from openai import OpenAI
except ImportError:
OpenAI = None
try:
from google import genai
except ImportError:
genai = None
class NarrativeService:
def __init__(self, model_name: str = "gemini-2.0-flash-exp"):
self.api_key = os.getenv("GEMINI_API_KEY")
self.client = genai.Client(api_key=self.api_key) if self.api_key else None
if not self.api_key:
print("WARNING: GEMINI_API_KEY not found. LLM features will fail.")
def __init__(self, model_name: str = "gpt-5-mini-2025-08-07"):
self.model_name = model_name
self.provider = self._detect_provider()
self.client = self._init_client()
def _detect_provider(self) -> str:
openai_key = os.getenv("OPENAI_API_KEY") or os.getenv("OPENAI_APIKEY")
gemini_key = os.getenv("GEMINI_API_KEY")
if self.model_name.startswith("gpt") and openai_key and OpenAI:
return "openai"
elif gemini_key and genai:
return "gemini"
elif openai_key and OpenAI:
return "openai"
elif gemini_key and genai:
return "gemini"
return "none"
def _init_client(self):
if self.provider == "openai":
api_key = os.getenv("OPENAI_API_KEY") or os.getenv("OPENAI_APIKEY")
return OpenAI(api_key=api_key)
elif self.provider == "gemini":
api_key = os.getenv("GEMINI_API_KEY")
return genai.Client(api_key=api_key)
return None
def generate_full_narrative(self, stats_json: Dict[str, Any]) -> Dict[str, Any]:
"""
Orchestrates the generation of the full narrative report.
Currently uses a single call for consistency and speed.
"""
if not self.api_key:
if not self.client:
print("WARNING: No LLM client available")
return self._get_fallback_narrative()
clean_stats = self._shape_payload(stats_json)
prompt = f"""
You are a witty, insightful, and slightly snarky music critic analyzing a user's Spotify listening data.
Your goal is to generate a JSON report that acts as a deeper, more honest "Spotify Wrapped".
prompt = self._build_prompt(clean_stats)
**CORE RULES:**
1. **NO Mental Health Diagnoses:** Do not mention depression, anxiety, or therapy. Stick to behavioral descriptors (e.g., "introspective", "high-energy").
2. **Be Specific:** Use the provided metrics. Don't say "You like pop," say "Your Mainstream Score of 85% suggests..."
3. **Roast Gently:** Be playful but not cruel.
4. **JSON Output Only:** Return strictly valid JSON.
**DATA TO ANALYZE:**
{json.dumps(clean_stats, indent=2)}
**REQUIRED JSON STRUCTURE:**
{{
"vibe_check": "2-3 paragraphs describing their overall listening personality this period.",
"patterns": ["Observation 1", "Observation 2", "Observation 3 (Look for specific habits like skipping or late-night sessions)"],
"persona": "A creative label (e.g., 'The Genre Chameleon', 'Nostalgic Dad-Rocker').",
"era_insight": "A specific comment on their Musical Age ({clean_stats.get('era', {}).get('musical_age', 'N/A')}) and Nostalgia Gap.",
"roast": "A 1-2 sentence playful roast about their taste.",
"comparison": "A short comment comparing this period to the previous one (if data exists)."
}}
"""
try:
response = self.client.models.generate_content(
model=self.model_name,
contents=prompt,
config=genai.types.GenerateContentConfig(response_mime_type="application/json")
)
return self._clean_and_parse_json(response.text)
if self.provider == "openai":
return self._call_openai(prompt)
elif self.provider == "gemini":
return self._call_gemini(prompt)
except Exception as e:
print(f"LLM Generation Error: {e}")
return self._get_fallback_narrative()
return self._get_fallback_narrative()
def _call_openai(self, prompt: str) -> Dict[str, Any]:
response = self.client.chat.completions.create(
model=self.model_name,
messages=[
{
"role": "system",
"content": "You are a witty music critic. Output only valid JSON.",
},
{"role": "user", "content": prompt},
],
response_format={"type": "json_object"},
max_completion_tokens=1500,
temperature=0.8,
)
return self._clean_and_parse_json(response.choices[0].message.content)
def _call_gemini(self, prompt: str) -> Dict[str, Any]:
response = self.client.models.generate_content(
model=self.model_name,
contents=prompt,
config=genai.types.GenerateContentConfig(
response_mime_type="application/json"
),
)
return self._clean_and_parse_json(response.text)
def _build_prompt(self, clean_stats: Dict[str, Any]) -> str:
return f"""Analyze this Spotify listening data and generate a personalized report.
**RULES:**
1. NO mental health diagnoses. Use behavioral descriptors only.
2. Be specific - reference actual metrics from the data.
3. Be playful but not cruel.
4. Return ONLY valid JSON.
**DATA:**
{json.dumps(clean_stats, indent=2)}
**REQUIRED JSON:**
{{
"vibe_check_short": "1-2 sentence hook for the hero banner.",
"vibe_check": "2-3 paragraphs describing their overall listening personality.",
"patterns": ["Observation 1", "Observation 2", "Observation 3"],
"persona": "A creative label (e.g., 'The Genre Chameleon').",
"era_insight": "Comment on Musical Age ({clean_stats.get("era", {}).get("musical_age", "N/A")}).",
"roast": "1-2 sentence playful roast.",
"comparison": "Compare to previous period if data exists."
}}"""
def _shape_payload(self, stats: Dict[str, Any]) -> Dict[str, Any]:
"""
Compresses the stats JSON to save tokens and focus the LLM.
Removes raw lists beyond top 5/10.
"""
s = stats.copy()
# Simplify Volume
if "volume" in s:
s["volume"] = {
k: v for k, v in s["volume"].items()
volume_copy = {
k: v
for k, v in s["volume"].items()
if k not in ["top_tracks", "top_artists", "top_albums", "top_genres"]
}
# Add back condensed top lists (just names)
s["volume"]["top_tracks"] = [t["name"] for t in stats["volume"].get("top_tracks", [])[:5]]
s["volume"]["top_artists"] = [a["name"] for a in stats["volume"].get("top_artists", [])[:5]]
s["volume"]["top_genres"] = [g["name"] for g in stats["volume"].get("top_genres", [])[:5]]
volume_copy["top_tracks"] = [
t["name"] for t in stats["volume"].get("top_tracks", [])[:5]
]
volume_copy["top_artists"] = [
a["name"] for a in stats["volume"].get("top_artists", [])[:5]
]
volume_copy["top_genres"] = [
g["name"] for g in stats["volume"].get("top_genres", [])[:5]
]
s["volume"] = volume_copy
if "time_habits" in s:
s["time_habits"] = {
k: v for k, v in s["time_habits"].items() if k != "heatmap"
}
if "sessions" in s:
s["sessions"] = {
k: v for k, v in s["sessions"].items() if k != "session_list"
}
# Simplify Time (Keep distributions but maybe round them?)
# Keeping hourly/daily is fine, they are small arrays.
# Simplify Vibe (Remove huge transition arrays if they accidentally leaked, though stats service handles this)
# Remove period details if verbose
return s
def _clean_and_parse_json(self, raw_text: str) -> Dict[str, Any]:
"""
Robust JSON extractor.
"""
try:
# 1. Try direct parse
return json.loads(raw_text)
except json.JSONDecodeError:
pass
# 2. Extract between first { and last }
try:
match = re.search(r"\{.*\}", raw_text, re.DOTALL)
if match:
@@ -107,16 +160,11 @@ Your goal is to generate a JSON report that acts as a deeper, more honest "Spoti
def _get_fallback_narrative(self) -> Dict[str, Any]:
return {
"vibe_check": "Data processing error. You're too mysterious for us to analyze right now.",
"vibe_check_short": "Your taste is... interesting.",
"vibe_check": "Data processing error. You're too mysterious to analyze right now.",
"patterns": [],
"persona": "The Enigma",
"era_insight": "Time is a flat circle.",
"roast": "You broke the machine. Congratulations.",
"comparison": "N/A"
"comparison": "N/A",
}
# Individual accessors if needed by frontend, though full_narrative is preferred
def generate_vibe_check(self, stats): return self.generate_full_narrative(stats).get("vibe_check")
def identify_patterns(self, stats): return self.generate_full_narrative(stats).get("patterns")
def generate_persona(self, stats): return self.generate_full_narrative(stats).get("persona")
def generate_roast(self, stats): return self.generate_full_narrative(stats).get("roast")

View File

@@ -3,16 +3,30 @@ from typing import List, Dict, Any
RECCOBEATS_API_URL = "https://api.reccobeats.com/v1/audio-features"
class ReccoBeatsClient:
async def get_audio_features(self, spotify_ids: List[str]) -> List[Dict[str, Any]]:
if not spotify_ids:
return []
ids_param = ",".join(spotify_ids)
async with httpx.AsyncClient() as client:
async with httpx.AsyncClient(timeout=30.0) as client:
try:
response = await client.get(RECCOBEATS_API_URL, params={"ids": ids_param})
response = await client.get(
RECCOBEATS_API_URL, params={"ids": ids_param}
)
if response.status_code != 200:
print(f"ReccoBeats API returned status {response.status_code}")
return []
return response.json().get("content", [])
except Exception:
content = response.json().get("content", [])
for item in content:
href = item.get("href", "")
if "spotify.com/track/" in href:
spotify_id = href.split("/track/")[-1].split("?")[0]
item["spotify_id"] = spotify_id
return content
except Exception as e:
print(f"ReccoBeats API error: {e}")
return []

View File

@@ -8,6 +8,7 @@ from typing import List, Dict, Any
SPOTIFY_TOKEN_URL = "https://accounts.spotify.com/api/token"
SPOTIFY_API_BASE = "https://api.spotify.com/v1"
class SpotifyClient:
def __init__(self, client_id: str, client_secret: str, refresh_token: str):
self.client_id = client_id
@@ -92,3 +93,17 @@ class SpotifyClient:
return []
return response.json().get("artists", [])
async def get_currently_playing(self) -> Dict[str, Any] | None:
token = await self.get_access_token()
async with httpx.AsyncClient() as client:
response = await client.get(
f"{SPOTIFY_API_BASE}/me/player/currently-playing",
headers={"Authorization": f"Bearer {token}"},
)
if response.status_code == 204:
return None
if response.status_code != 200:
print(f"Error fetching currently playing: {response.text}")
return None
return response.json()

File diff suppressed because it is too large Load Diff