mirror of
https://github.com/bnair123/MusicAnalyser.git
synced 2026-02-25 11:46:07 +00:00
Add skip tracking, compressed heatmap, listening log, docs, tests, and OpenAI support
Major changes: - Add skip tracking: poll currently-playing every 15s, detect skips (<30s listened) - Add listening-log and sessions API endpoints - Fix ReccoBeats client to extract spotify_id from href response - Compress heatmap from 24 hours to 6 x 4-hour blocks - Add OpenAI support in narrative service (use max_completion_tokens for new models) - Add ListeningLog component with timeline and list views - Update all frontend components to use real data (album art, play counts) - Add docker-compose external network (dockernet) support - Add comprehensive documentation (API, DATA_MODEL, ARCHITECTURE, FRONTEND) - Add unit tests for ingest and API endpoints
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
import asyncio
|
||||
import os
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timedelta
|
||||
from sqlalchemy.orm import Session
|
||||
from .models import Track, PlayHistory, Artist
|
||||
from .database import SessionLocal
|
||||
@@ -9,6 +9,17 @@ from .services.reccobeats_client import ReccoBeatsClient
|
||||
from .services.genius_client import GeniusClient
|
||||
from dateutil import parser
|
||||
|
||||
|
||||
class PlaybackTracker:
|
||||
def __init__(self):
|
||||
self.current_track_id = None
|
||||
self.track_start_time = None
|
||||
self.accumulated_listen_ms = 0
|
||||
self.last_progress_ms = 0
|
||||
self.last_poll_time = None
|
||||
self.is_paused = False
|
||||
|
||||
|
||||
# Initialize Clients
|
||||
def get_spotify_client():
|
||||
return SpotifyClient(
|
||||
@@ -17,12 +28,15 @@ def get_spotify_client():
|
||||
refresh_token=os.getenv("SPOTIFY_REFRESH_TOKEN"),
|
||||
)
|
||||
|
||||
|
||||
def get_reccobeats_client():
|
||||
return ReccoBeatsClient()
|
||||
|
||||
|
||||
def get_genius_client():
|
||||
return GeniusClient()
|
||||
|
||||
|
||||
async def ensure_artists_exist(db: Session, artists_data: list):
|
||||
"""
|
||||
Ensures that all artists in the list exist in the Artist table.
|
||||
@@ -36,18 +50,19 @@ async def ensure_artists_exist(db: Session, artists_data: list):
|
||||
img = None
|
||||
if "images" in a_data and a_data["images"]:
|
||||
img = a_data["images"][0]["url"]
|
||||
|
||||
artist = Artist(
|
||||
id=artist_id,
|
||||
name=a_data["name"],
|
||||
genres=[],
|
||||
image_url=img
|
||||
)
|
||||
|
||||
artist = Artist(id=artist_id, name=a_data["name"], genres=[], image_url=img)
|
||||
db.add(artist)
|
||||
artist_objects.append(artist)
|
||||
return artist_objects
|
||||
|
||||
async def enrich_tracks(db: Session, spotify_client: SpotifyClient, recco_client: ReccoBeatsClient, genius_client: GeniusClient):
|
||||
|
||||
async def enrich_tracks(
|
||||
db: Session,
|
||||
spotify_client: SpotifyClient,
|
||||
recco_client: ReccoBeatsClient,
|
||||
genius_client: GeniusClient,
|
||||
):
|
||||
"""
|
||||
Enrichment Pipeline:
|
||||
1. Audio Features (ReccoBeats)
|
||||
@@ -56,18 +71,19 @@ async def enrich_tracks(db: Session, spotify_client: SpotifyClient, recco_client
|
||||
"""
|
||||
|
||||
# 1. Enrich Audio Features
|
||||
tracks_missing_features = db.query(Track).filter(Track.danceability == None).limit(50).all()
|
||||
tracks_missing_features = (
|
||||
db.query(Track).filter(Track.danceability == None).limit(50).all()
|
||||
)
|
||||
if tracks_missing_features:
|
||||
print(f"Enriching {len(tracks_missing_features)} tracks with audio features...")
|
||||
ids = [t.id for t in tracks_missing_features]
|
||||
features_list = await recco_client.get_audio_features(ids)
|
||||
|
||||
# Map features by ID
|
||||
|
||||
features_map = {}
|
||||
for f in features_list:
|
||||
# Handle potential ID mismatch or URI format
|
||||
tid = f.get("id")
|
||||
if tid: features_map[tid] = f
|
||||
tid = f.get("spotify_id") or f.get("id")
|
||||
if tid:
|
||||
features_map[tid] = f
|
||||
|
||||
for track in tracks_missing_features:
|
||||
data = features_map.get(track.id)
|
||||
@@ -83,61 +99,74 @@ async def enrich_tracks(db: Session, spotify_client: SpotifyClient, recco_client
|
||||
track.liveness = data.get("liveness")
|
||||
track.valence = data.get("valence")
|
||||
track.tempo = data.get("tempo")
|
||||
|
||||
|
||||
db.commit()
|
||||
|
||||
# 2. Enrich Artist Genres & Images (Spotify)
|
||||
artists_missing_data = db.query(Artist).filter((Artist.genres == None) | (Artist.image_url == None)).limit(50).all()
|
||||
artists_missing_data = (
|
||||
db.query(Artist)
|
||||
.filter((Artist.genres == None) | (Artist.image_url == None))
|
||||
.limit(50)
|
||||
.all()
|
||||
)
|
||||
if artists_missing_data:
|
||||
print(f"Enriching {len(artists_missing_data)} artists with genres/images...")
|
||||
artist_ids_list = [a.id for a in artists_missing_data]
|
||||
|
||||
|
||||
artist_data_map = {}
|
||||
for i in range(0, len(artist_ids_list), 50):
|
||||
chunk = artist_ids_list[i:i+50]
|
||||
chunk = artist_ids_list[i : i + 50]
|
||||
artists_data = await spotify_client.get_artists(chunk)
|
||||
for a_data in artists_data:
|
||||
if a_data:
|
||||
img = a_data["images"][0]["url"] if a_data.get("images") else None
|
||||
artist_data_map[a_data["id"]] = {
|
||||
"genres": a_data.get("genres", []),
|
||||
"image_url": img
|
||||
"image_url": img,
|
||||
}
|
||||
|
||||
for artist in artists_missing_data:
|
||||
data = artist_data_map.get(artist.id)
|
||||
if data:
|
||||
if artist.genres is None: artist.genres = data["genres"]
|
||||
if artist.image_url is None: artist.image_url = data["image_url"]
|
||||
if artist.genres is None:
|
||||
artist.genres = data["genres"]
|
||||
if artist.image_url is None:
|
||||
artist.image_url = data["image_url"]
|
||||
elif artist.genres is None:
|
||||
artist.genres = [] # Prevent retry loop
|
||||
|
||||
artist.genres = [] # Prevent retry loop
|
||||
|
||||
db.commit()
|
||||
|
||||
# 3. Enrich Lyrics (Genius)
|
||||
# Only fetch for tracks that have been played recently to avoid spamming Genius API
|
||||
tracks_missing_lyrics = db.query(Track).filter(Track.lyrics == None).order_by(Track.updated_at.desc()).limit(10).all()
|
||||
|
||||
tracks_missing_lyrics = (
|
||||
db.query(Track)
|
||||
.filter(Track.lyrics == None)
|
||||
.order_by(Track.updated_at.desc())
|
||||
.limit(10)
|
||||
.all()
|
||||
)
|
||||
|
||||
if tracks_missing_lyrics and genius_client.genius:
|
||||
print(f"Enriching {len(tracks_missing_lyrics)} tracks with lyrics (Genius)...")
|
||||
for track in tracks_missing_lyrics:
|
||||
# We need the primary artist name
|
||||
artist_name = track.artist.split(",")[0] # Heuristic: take first artist
|
||||
|
||||
artist_name = track.artist.split(",")[0] # Heuristic: take first artist
|
||||
|
||||
print(f"Searching Genius for: {track.name} by {artist_name}")
|
||||
data = genius_client.search_song(track.name, artist_name)
|
||||
|
||||
|
||||
if data:
|
||||
track.lyrics = data["lyrics"]
|
||||
# Fallback: if we didn't get high-res art from Spotify, use Genius
|
||||
if not track.image_url and data.get("image_url"):
|
||||
track.image_url = data["image_url"]
|
||||
else:
|
||||
track.lyrics = "" # Mark as empty to prevent retry loop
|
||||
|
||||
track.lyrics = "" # Mark as empty to prevent retry loop
|
||||
|
||||
# Small sleep to be nice to API? GeniusClient is synchronous.
|
||||
# We are in async function but GeniusClient is blocking. It's fine for worker.
|
||||
|
||||
|
||||
db.commit()
|
||||
|
||||
|
||||
@@ -164,7 +193,7 @@ async def ingest_recently_played(db: Session):
|
||||
|
||||
if not track:
|
||||
print(f"New track found: {track_data['name']}")
|
||||
|
||||
|
||||
# Extract Album Art
|
||||
image_url = None
|
||||
if track_data.get("album") and track_data["album"].get("images"):
|
||||
@@ -173,12 +202,12 @@ async def ingest_recently_played(db: Session):
|
||||
track = Track(
|
||||
id=track_id,
|
||||
name=track_data["name"],
|
||||
artist=", ".join([a["name"] for a in track_data["artists"]]),
|
||||
artist=", ".join([a["name"] for a in track_data["artists"]]),
|
||||
album=track_data["album"]["name"],
|
||||
image_url=image_url,
|
||||
duration_ms=track_data["duration_ms"],
|
||||
popularity=track_data["popularity"],
|
||||
raw_data=track_data
|
||||
raw_data=track_data,
|
||||
)
|
||||
|
||||
# Handle Artists Relation
|
||||
@@ -191,21 +220,27 @@ async def ingest_recently_played(db: Session):
|
||||
|
||||
# Ensure relationships exist logic...
|
||||
if not track.artists and track.raw_data and "artists" in track.raw_data:
|
||||
artist_objects = await ensure_artists_exist(db, track.raw_data["artists"])
|
||||
track.artists = artist_objects
|
||||
db.commit()
|
||||
artist_objects = await ensure_artists_exist(db, track.raw_data["artists"])
|
||||
track.artists = artist_objects
|
||||
db.commit()
|
||||
|
||||
exists = db.query(PlayHistory).filter(
|
||||
PlayHistory.track_id == track_id,
|
||||
PlayHistory.played_at == played_at
|
||||
).first()
|
||||
exists = (
|
||||
db.query(PlayHistory)
|
||||
.filter(
|
||||
PlayHistory.track_id == track_id, PlayHistory.played_at == played_at
|
||||
)
|
||||
.first()
|
||||
)
|
||||
|
||||
if not exists:
|
||||
print(f" recording play: {track_data['name']} at {played_at}")
|
||||
play = PlayHistory(
|
||||
track_id=track_id,
|
||||
played_at=played_at,
|
||||
context_uri=item.get("context", {}).get("uri") if item.get("context") else None
|
||||
context_uri=item.get("context", {}).get("uri")
|
||||
if item.get("context")
|
||||
else None,
|
||||
source="recently_played",
|
||||
)
|
||||
db.add(play)
|
||||
|
||||
@@ -214,17 +249,145 @@ async def ingest_recently_played(db: Session):
|
||||
# Enrich
|
||||
await enrich_tracks(db, spotify_client, recco_client, genius_client)
|
||||
|
||||
|
||||
async def run_worker():
|
||||
"""Simulates a background worker loop."""
|
||||
db = SessionLocal()
|
||||
tracker = PlaybackTracker()
|
||||
spotify_client = get_spotify_client()
|
||||
poll_count = 0
|
||||
|
||||
try:
|
||||
while True:
|
||||
print("Worker: Polling Spotify...")
|
||||
await ingest_recently_played(db)
|
||||
print("Worker: Sleeping for 60 seconds...")
|
||||
await asyncio.sleep(60)
|
||||
poll_count += 1
|
||||
|
||||
await poll_currently_playing(db, spotify_client, tracker)
|
||||
|
||||
if poll_count % 4 == 0:
|
||||
print("Worker: Polling recently-played...")
|
||||
await ingest_recently_played(db)
|
||||
|
||||
await asyncio.sleep(15)
|
||||
except Exception as e:
|
||||
print(f"Worker crashed: {e}")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
async def poll_currently_playing(
|
||||
db: Session, spotify_client: SpotifyClient, tracker: PlaybackTracker
|
||||
):
|
||||
try:
|
||||
response = await spotify_client.get_currently_playing()
|
||||
except Exception as e:
|
||||
print(f"Error polling currently-playing: {e}")
|
||||
return
|
||||
|
||||
now = datetime.utcnow()
|
||||
|
||||
if not response or response.get("currently_playing_type") != "track":
|
||||
if tracker.current_track_id and tracker.last_poll_time:
|
||||
finalize_track(db, tracker)
|
||||
return
|
||||
|
||||
item = response.get("item")
|
||||
if not item:
|
||||
return
|
||||
|
||||
current_track_id = item["id"]
|
||||
current_progress_ms = response.get("progress_ms", 0)
|
||||
is_playing = response.get("is_playing", False)
|
||||
|
||||
if current_track_id != tracker.current_track_id:
|
||||
if tracker.current_track_id and tracker.last_poll_time:
|
||||
finalize_track(db, tracker)
|
||||
|
||||
tracker.current_track_id = current_track_id
|
||||
tracker.track_start_time = now - timedelta(milliseconds=current_progress_ms)
|
||||
tracker.accumulated_listen_ms = current_progress_ms if is_playing else 0
|
||||
tracker.last_progress_ms = current_progress_ms
|
||||
tracker.last_poll_time = now
|
||||
tracker.is_paused = not is_playing
|
||||
|
||||
await ensure_track_exists(db, item, spotify_client)
|
||||
else:
|
||||
if tracker.last_poll_time:
|
||||
time_delta_ms = (now - tracker.last_poll_time).total_seconds() * 1000
|
||||
if is_playing and not tracker.is_paused:
|
||||
tracker.accumulated_listen_ms += time_delta_ms
|
||||
|
||||
tracker.last_progress_ms = current_progress_ms
|
||||
tracker.last_poll_time = now
|
||||
tracker.is_paused = not is_playing
|
||||
|
||||
|
||||
def finalize_track(db: Session, tracker: PlaybackTracker):
|
||||
listened_ms = int(tracker.accumulated_listen_ms)
|
||||
skipped = listened_ms < 30000
|
||||
|
||||
existing = (
|
||||
db.query(PlayHistory)
|
||||
.filter(
|
||||
PlayHistory.track_id == tracker.current_track_id,
|
||||
PlayHistory.played_at >= tracker.track_start_time - timedelta(seconds=5),
|
||||
PlayHistory.played_at <= tracker.track_start_time + timedelta(seconds=5),
|
||||
)
|
||||
.first()
|
||||
)
|
||||
|
||||
if existing:
|
||||
if existing.listened_ms is None:
|
||||
existing.listened_ms = listened_ms
|
||||
existing.skipped = skipped
|
||||
existing.source = "currently_playing"
|
||||
db.commit()
|
||||
else:
|
||||
play = PlayHistory(
|
||||
track_id=tracker.current_track_id,
|
||||
played_at=tracker.track_start_time,
|
||||
listened_ms=listened_ms,
|
||||
skipped=skipped,
|
||||
source="currently_playing",
|
||||
)
|
||||
db.add(play)
|
||||
db.commit()
|
||||
|
||||
print(
|
||||
f"Finalized: {tracker.current_track_id} listened={listened_ms}ms skipped={skipped}"
|
||||
)
|
||||
|
||||
tracker.current_track_id = None
|
||||
tracker.track_start_time = None
|
||||
tracker.accumulated_listen_ms = 0
|
||||
tracker.last_progress_ms = 0
|
||||
tracker.last_poll_time = None
|
||||
tracker.is_paused = False
|
||||
|
||||
|
||||
async def ensure_track_exists(
|
||||
db: Session, track_data: dict, spotify_client: SpotifyClient
|
||||
):
|
||||
track_id = track_data["id"]
|
||||
track = db.query(Track).filter(Track.id == track_id).first()
|
||||
|
||||
if not track:
|
||||
image_url = None
|
||||
if track_data.get("album") and track_data["album"].get("images"):
|
||||
image_url = track_data["album"]["images"][0]["url"]
|
||||
|
||||
track = Track(
|
||||
id=track_id,
|
||||
name=track_data["name"],
|
||||
artist=", ".join([a["name"] for a in track_data.get("artists", [])]),
|
||||
album=track_data.get("album", {}).get("name", "Unknown"),
|
||||
image_url=image_url,
|
||||
duration_ms=track_data.get("duration_ms"),
|
||||
popularity=track_data.get("popularity"),
|
||||
raw_data=track_data,
|
||||
)
|
||||
|
||||
artists_data = track_data.get("artists", [])
|
||||
artist_objects = await ensure_artists_exist(db, artists_data)
|
||||
track.artists = artist_objects
|
||||
|
||||
db.add(track)
|
||||
db.commit()
|
||||
|
||||
@@ -1,11 +1,15 @@
|
||||
from fastapi import FastAPI, Depends, HTTPException, BackgroundTasks
|
||||
from sqlalchemy.orm import Session
|
||||
from fastapi import FastAPI, Depends, HTTPException, BackgroundTasks, Query
|
||||
from sqlalchemy.orm import Session, joinedload
|
||||
from datetime import datetime, timedelta
|
||||
from typing import List, Optional
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from .database import engine, Base, get_db
|
||||
from .models import PlayHistory as PlayHistoryModel, Track as TrackModel, AnalysisSnapshot
|
||||
from .models import (
|
||||
PlayHistory as PlayHistoryModel,
|
||||
Track as TrackModel,
|
||||
AnalysisSnapshot,
|
||||
)
|
||||
from . import schemas
|
||||
from .ingest import ingest_recently_played
|
||||
from .services.stats_service import StatsService
|
||||
@@ -13,7 +17,6 @@ from .services.narrative_service import NarrativeService
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# Create tables
|
||||
Base.metadata.create_all(bind=engine)
|
||||
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
@@ -22,37 +25,49 @@ app = FastAPI(title="Music Analyser Backend")
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["http://localhost:5173"],
|
||||
allow_origins=["http://localhost:5173", "http://localhost:8991"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
|
||||
@app.get("/")
|
||||
def read_root():
|
||||
return {"status": "ok", "message": "Music Analyser API is running"}
|
||||
|
||||
|
||||
@app.get("/history", response_model=List[schemas.PlayHistory])
|
||||
def get_history(limit: int = 50, db: Session = Depends(get_db)):
|
||||
history = db.query(PlayHistoryModel).order_by(PlayHistoryModel.played_at.desc()).limit(limit).all()
|
||||
history = (
|
||||
db.query(PlayHistoryModel)
|
||||
.order_by(PlayHistoryModel.played_at.desc())
|
||||
.limit(limit)
|
||||
.all()
|
||||
)
|
||||
return history
|
||||
|
||||
|
||||
@app.get("/tracks", response_model=List[schemas.Track])
|
||||
def get_tracks(limit: int = 50, db: Session = Depends(get_db)):
|
||||
tracks = db.query(TrackModel).limit(limit).all()
|
||||
return tracks
|
||||
|
||||
|
||||
@app.post("/trigger-ingest")
|
||||
async def trigger_ingest(background_tasks: BackgroundTasks, db: Session = Depends(get_db)):
|
||||
async def trigger_ingest(
|
||||
background_tasks: BackgroundTasks, db: Session = Depends(get_db)
|
||||
):
|
||||
"""Triggers Spotify ingestion in the background."""
|
||||
background_tasks.add_task(ingest_recently_played, db)
|
||||
return {"status": "Ingestion started in background"}
|
||||
|
||||
|
||||
@app.post("/trigger-analysis")
|
||||
def trigger_analysis(
|
||||
days: int = 30,
|
||||
model_name: str = "gemini-2.5-flash",
|
||||
db: Session = Depends(get_db)
|
||||
model_name: str = "gpt-5-mini-2025-08-07",
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""
|
||||
Runs the full analysis pipeline (Stats + LLM) for the last X days.
|
||||
@@ -67,7 +82,9 @@ def trigger_analysis(
|
||||
stats_json = stats_service.generate_full_report(start_date, end_date)
|
||||
|
||||
if stats_json["volume"]["total_plays"] == 0:
|
||||
raise HTTPException(status_code=404, detail="No plays found in the specified period.")
|
||||
raise HTTPException(
|
||||
status_code=404, detail="No plays found in the specified period."
|
||||
)
|
||||
|
||||
narrative_service = NarrativeService(model_name=model_name)
|
||||
narrative_json = narrative_service.generate_full_narrative(stats_json)
|
||||
@@ -79,7 +96,7 @@ def trigger_analysis(
|
||||
period_label=f"last_{days}_days",
|
||||
metrics_payload=stats_json,
|
||||
narrative_report=narrative_json,
|
||||
model_used=model_name
|
||||
model_used=model_name,
|
||||
)
|
||||
db.add(snapshot)
|
||||
db.commit()
|
||||
@@ -90,7 +107,7 @@ def trigger_analysis(
|
||||
"snapshot_id": snapshot.id,
|
||||
"period": {"start": start_date, "end": end_date},
|
||||
"metrics": stats_json,
|
||||
"narrative": narrative_json
|
||||
"narrative": narrative_json,
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
@@ -99,7 +116,91 @@ def trigger_analysis(
|
||||
print(f"Analysis Failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@app.get("/snapshots")
|
||||
def get_snapshots(limit: int = 10, db: Session = Depends(get_db)):
|
||||
"""Retrieve past analysis snapshots."""
|
||||
return db.query(AnalysisSnapshot).order_by(AnalysisSnapshot.date.desc()).limit(limit).all()
|
||||
return (
|
||||
db.query(AnalysisSnapshot)
|
||||
.order_by(AnalysisSnapshot.date.desc())
|
||||
.limit(limit)
|
||||
.all()
|
||||
)
|
||||
|
||||
|
||||
@app.get("/listening-log")
|
||||
def get_listening_log(
|
||||
days: int = Query(default=7, ge=1, le=365),
|
||||
limit: int = Query(default=200, ge=1, le=1000),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
end_date = datetime.utcnow()
|
||||
start_date = end_date - timedelta(days=days)
|
||||
|
||||
plays = (
|
||||
db.query(PlayHistoryModel)
|
||||
.options(joinedload(PlayHistoryModel.track))
|
||||
.filter(
|
||||
PlayHistoryModel.played_at >= start_date,
|
||||
PlayHistoryModel.played_at <= end_date,
|
||||
)
|
||||
.order_by(PlayHistoryModel.played_at.desc())
|
||||
.limit(limit)
|
||||
.all()
|
||||
)
|
||||
|
||||
result = []
|
||||
for i, play in enumerate(plays):
|
||||
track = play.track
|
||||
listened_ms = play.listened_ms
|
||||
skipped = play.skipped
|
||||
|
||||
if listened_ms is None and i < len(plays) - 1:
|
||||
next_play = plays[i + 1]
|
||||
diff_seconds = (play.played_at - next_play.played_at).total_seconds()
|
||||
if track and track.duration_ms:
|
||||
duration_sec = track.duration_ms / 1000.0
|
||||
listened_ms = int(min(diff_seconds, duration_sec) * 1000)
|
||||
skipped = diff_seconds < 30
|
||||
|
||||
result.append(
|
||||
{
|
||||
"id": play.id,
|
||||
"track_id": play.track_id,
|
||||
"track_name": track.name if track else "Unknown",
|
||||
"artist": track.artist if track else "Unknown",
|
||||
"album": track.album if track else "Unknown",
|
||||
"image": track.image_url if track else None,
|
||||
"played_at": play.played_at.isoformat(),
|
||||
"duration_ms": track.duration_ms if track else 0,
|
||||
"listened_ms": listened_ms,
|
||||
"skipped": skipped,
|
||||
"context_uri": play.context_uri,
|
||||
"source": play.source,
|
||||
}
|
||||
)
|
||||
|
||||
return {
|
||||
"plays": result,
|
||||
"period": {"start": start_date.isoformat(), "end": end_date.isoformat()},
|
||||
}
|
||||
|
||||
|
||||
@app.get("/sessions")
|
||||
def get_sessions(
|
||||
days: int = Query(default=7, ge=1, le=365), db: Session = Depends(get_db)
|
||||
):
|
||||
end_date = datetime.utcnow()
|
||||
start_date = end_date - timedelta(days=days)
|
||||
|
||||
stats_service = StatsService(db)
|
||||
session_stats = stats_service.compute_session_stats(start_date, end_date)
|
||||
|
||||
return {
|
||||
"sessions": session_stats.get("session_list", []),
|
||||
"summary": {
|
||||
"count": session_stats.get("count", 0),
|
||||
"avg_minutes": session_stats.get("avg_minutes", 0),
|
||||
"micro_rate": session_stats.get("micro_session_rate", 0),
|
||||
"marathon_rate": session_stats.get("marathon_session_rate", 0),
|
||||
},
|
||||
}
|
||||
|
||||
@@ -1,35 +1,50 @@
|
||||
from sqlalchemy import Column, Integer, String, DateTime, JSON, ForeignKey, Float, Table, Text
|
||||
from sqlalchemy import (
|
||||
Boolean,
|
||||
Column,
|
||||
Integer,
|
||||
String,
|
||||
DateTime,
|
||||
JSON,
|
||||
ForeignKey,
|
||||
Float,
|
||||
Table,
|
||||
Text,
|
||||
)
|
||||
from sqlalchemy.orm import relationship
|
||||
from datetime import datetime
|
||||
from .database import Base
|
||||
|
||||
# Association Table for Many-to-Many Relationship between Track and Artist
|
||||
track_artists = Table(
|
||||
'track_artists',
|
||||
"track_artists",
|
||||
Base.metadata,
|
||||
Column('track_id', String, ForeignKey('tracks.id'), primary_key=True),
|
||||
Column('artist_id', String, ForeignKey('artists.id'), primary_key=True)
|
||||
Column("track_id", String, ForeignKey("tracks.id"), primary_key=True),
|
||||
Column("artist_id", String, ForeignKey("artists.id"), primary_key=True),
|
||||
)
|
||||
|
||||
|
||||
class Artist(Base):
|
||||
__tablename__ = "artists"
|
||||
|
||||
id = Column(String, primary_key=True, index=True) # Spotify ID
|
||||
id = Column(String, primary_key=True, index=True) # Spotify ID
|
||||
name = Column(String)
|
||||
genres = Column(JSON, nullable=True) # List of genre strings
|
||||
image_url = Column(String, nullable=True) # Artist profile image
|
||||
genres = Column(JSON, nullable=True) # List of genre strings
|
||||
image_url = Column(String, nullable=True) # Artist profile image
|
||||
|
||||
# Relationships
|
||||
tracks = relationship("Track", secondary=track_artists, back_populates="artists")
|
||||
|
||||
|
||||
class Track(Base):
|
||||
__tablename__ = "tracks"
|
||||
|
||||
id = Column(String, primary_key=True, index=True) # Spotify ID
|
||||
id = Column(String, primary_key=True, index=True) # Spotify ID
|
||||
name = Column(String)
|
||||
artist = Column(String) # Display string (e.g. "Drake, Future") - kept for convenience
|
||||
artist = Column(
|
||||
String
|
||||
) # Display string (e.g. "Drake, Future") - kept for convenience
|
||||
album = Column(String)
|
||||
image_url = Column(String, nullable=True) # Album art
|
||||
image_url = Column(String, nullable=True) # Album art
|
||||
duration_ms = Column(Integer)
|
||||
popularity = Column(Integer, nullable=True)
|
||||
|
||||
@@ -55,7 +70,7 @@ class Track(Base):
|
||||
genres = Column(JSON, nullable=True)
|
||||
|
||||
# AI Analysis fields
|
||||
lyrics = Column(Text, nullable=True) # Full lyrics from Genius
|
||||
lyrics = Column(Text, nullable=True) # Full lyrics from Genius
|
||||
lyrics_summary = Column(String, nullable=True)
|
||||
genre_tags = Column(String, nullable=True)
|
||||
|
||||
@@ -71,11 +86,13 @@ class PlayHistory(Base):
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
track_id = Column(String, ForeignKey("tracks.id"))
|
||||
played_at = Column(DateTime, index=True) # The timestamp from Spotify
|
||||
|
||||
# Context (album, playlist, etc.)
|
||||
played_at = Column(DateTime, index=True)
|
||||
context_uri = Column(String, nullable=True)
|
||||
|
||||
listened_ms = Column(Integer, nullable=True)
|
||||
skipped = Column(Boolean, nullable=True)
|
||||
source = Column(String, nullable=True)
|
||||
|
||||
track = relationship("Track", back_populates="plays")
|
||||
|
||||
|
||||
@@ -84,16 +101,19 @@ class AnalysisSnapshot(Base):
|
||||
Stores the computed statistics and LLM analysis for a given period.
|
||||
Allows for trend analysis over time.
|
||||
"""
|
||||
|
||||
__tablename__ = "analysis_snapshots"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
date = Column(DateTime, default=datetime.utcnow, index=True) # When the analysis was run
|
||||
date = Column(
|
||||
DateTime, default=datetime.utcnow, index=True
|
||||
) # When the analysis was run
|
||||
period_start = Column(DateTime)
|
||||
period_end = Column(DateTime)
|
||||
period_label = Column(String) # e.g., "last_30_days", "monthly_nov_2023"
|
||||
period_label = Column(String) # e.g., "last_30_days", "monthly_nov_2023"
|
||||
|
||||
# The heavy lifting: stored as JSON blobs
|
||||
metrics_payload = Column(JSON) # The input to the LLM (StatsService output)
|
||||
narrative_report = Column(JSON) # The output from the LLM (NarrativeService output)
|
||||
metrics_payload = Column(JSON) # The input to the LLM (StatsService output)
|
||||
narrative_report = Column(JSON) # The output from the LLM (NarrativeService output)
|
||||
|
||||
model_used = Column(String, nullable=True) # e.g. "gemini-1.5-flash"
|
||||
model_used = Column(String, nullable=True) # e.g. "gemini-1.5-flash"
|
||||
|
||||
@@ -1,101 +1,154 @@
|
||||
import os
|
||||
import json
|
||||
import re
|
||||
from google import genai
|
||||
from typing import Dict, Any, List, Optional
|
||||
from typing import Dict, Any
|
||||
|
||||
try:
|
||||
from openai import OpenAI
|
||||
except ImportError:
|
||||
OpenAI = None
|
||||
|
||||
try:
|
||||
from google import genai
|
||||
except ImportError:
|
||||
genai = None
|
||||
|
||||
|
||||
class NarrativeService:
|
||||
def __init__(self, model_name: str = "gemini-2.0-flash-exp"):
|
||||
self.api_key = os.getenv("GEMINI_API_KEY")
|
||||
self.client = genai.Client(api_key=self.api_key) if self.api_key else None
|
||||
if not self.api_key:
|
||||
print("WARNING: GEMINI_API_KEY not found. LLM features will fail.")
|
||||
|
||||
def __init__(self, model_name: str = "gpt-5-mini-2025-08-07"):
|
||||
self.model_name = model_name
|
||||
self.provider = self._detect_provider()
|
||||
self.client = self._init_client()
|
||||
|
||||
def _detect_provider(self) -> str:
|
||||
openai_key = os.getenv("OPENAI_API_KEY") or os.getenv("OPENAI_APIKEY")
|
||||
gemini_key = os.getenv("GEMINI_API_KEY")
|
||||
|
||||
if self.model_name.startswith("gpt") and openai_key and OpenAI:
|
||||
return "openai"
|
||||
elif gemini_key and genai:
|
||||
return "gemini"
|
||||
elif openai_key and OpenAI:
|
||||
return "openai"
|
||||
elif gemini_key and genai:
|
||||
return "gemini"
|
||||
return "none"
|
||||
|
||||
def _init_client(self):
|
||||
if self.provider == "openai":
|
||||
api_key = os.getenv("OPENAI_API_KEY") or os.getenv("OPENAI_APIKEY")
|
||||
return OpenAI(api_key=api_key)
|
||||
elif self.provider == "gemini":
|
||||
api_key = os.getenv("GEMINI_API_KEY")
|
||||
return genai.Client(api_key=api_key)
|
||||
return None
|
||||
|
||||
def generate_full_narrative(self, stats_json: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Orchestrates the generation of the full narrative report.
|
||||
Currently uses a single call for consistency and speed.
|
||||
"""
|
||||
if not self.api_key:
|
||||
if not self.client:
|
||||
print("WARNING: No LLM client available")
|
||||
return self._get_fallback_narrative()
|
||||
|
||||
clean_stats = self._shape_payload(stats_json)
|
||||
|
||||
prompt = f"""
|
||||
You are a witty, insightful, and slightly snarky music critic analyzing a user's Spotify listening data.
|
||||
Your goal is to generate a JSON report that acts as a deeper, more honest "Spotify Wrapped".
|
||||
prompt = self._build_prompt(clean_stats)
|
||||
|
||||
**CORE RULES:**
|
||||
1. **NO Mental Health Diagnoses:** Do not mention depression, anxiety, or therapy. Stick to behavioral descriptors (e.g., "introspective", "high-energy").
|
||||
2. **Be Specific:** Use the provided metrics. Don't say "You like pop," say "Your Mainstream Score of 85% suggests..."
|
||||
3. **Roast Gently:** Be playful but not cruel.
|
||||
4. **JSON Output Only:** Return strictly valid JSON.
|
||||
|
||||
**DATA TO ANALYZE:**
|
||||
{json.dumps(clean_stats, indent=2)}
|
||||
|
||||
**REQUIRED JSON STRUCTURE:**
|
||||
{{
|
||||
"vibe_check": "2-3 paragraphs describing their overall listening personality this period.",
|
||||
"patterns": ["Observation 1", "Observation 2", "Observation 3 (Look for specific habits like skipping or late-night sessions)"],
|
||||
"persona": "A creative label (e.g., 'The Genre Chameleon', 'Nostalgic Dad-Rocker').",
|
||||
"era_insight": "A specific comment on their Musical Age ({clean_stats.get('era', {}).get('musical_age', 'N/A')}) and Nostalgia Gap.",
|
||||
"roast": "A 1-2 sentence playful roast about their taste.",
|
||||
"comparison": "A short comment comparing this period to the previous one (if data exists)."
|
||||
}}
|
||||
"""
|
||||
try:
|
||||
response = self.client.models.generate_content(
|
||||
model=self.model_name,
|
||||
contents=prompt,
|
||||
config=genai.types.GenerateContentConfig(response_mime_type="application/json")
|
||||
)
|
||||
|
||||
return self._clean_and_parse_json(response.text)
|
||||
|
||||
if self.provider == "openai":
|
||||
return self._call_openai(prompt)
|
||||
elif self.provider == "gemini":
|
||||
return self._call_gemini(prompt)
|
||||
except Exception as e:
|
||||
print(f"LLM Generation Error: {e}")
|
||||
return self._get_fallback_narrative()
|
||||
|
||||
return self._get_fallback_narrative()
|
||||
|
||||
def _call_openai(self, prompt: str) -> Dict[str, Any]:
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.model_name,
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a witty music critic. Output only valid JSON.",
|
||||
},
|
||||
{"role": "user", "content": prompt},
|
||||
],
|
||||
response_format={"type": "json_object"},
|
||||
max_completion_tokens=1500,
|
||||
temperature=0.8,
|
||||
)
|
||||
return self._clean_and_parse_json(response.choices[0].message.content)
|
||||
|
||||
def _call_gemini(self, prompt: str) -> Dict[str, Any]:
|
||||
response = self.client.models.generate_content(
|
||||
model=self.model_name,
|
||||
contents=prompt,
|
||||
config=genai.types.GenerateContentConfig(
|
||||
response_mime_type="application/json"
|
||||
),
|
||||
)
|
||||
return self._clean_and_parse_json(response.text)
|
||||
|
||||
def _build_prompt(self, clean_stats: Dict[str, Any]) -> str:
|
||||
return f"""Analyze this Spotify listening data and generate a personalized report.
|
||||
|
||||
**RULES:**
|
||||
1. NO mental health diagnoses. Use behavioral descriptors only.
|
||||
2. Be specific - reference actual metrics from the data.
|
||||
3. Be playful but not cruel.
|
||||
4. Return ONLY valid JSON.
|
||||
|
||||
**DATA:**
|
||||
{json.dumps(clean_stats, indent=2)}
|
||||
|
||||
**REQUIRED JSON:**
|
||||
{{
|
||||
"vibe_check_short": "1-2 sentence hook for the hero banner.",
|
||||
"vibe_check": "2-3 paragraphs describing their overall listening personality.",
|
||||
"patterns": ["Observation 1", "Observation 2", "Observation 3"],
|
||||
"persona": "A creative label (e.g., 'The Genre Chameleon').",
|
||||
"era_insight": "Comment on Musical Age ({clean_stats.get("era", {}).get("musical_age", "N/A")}).",
|
||||
"roast": "1-2 sentence playful roast.",
|
||||
"comparison": "Compare to previous period if data exists."
|
||||
}}"""
|
||||
|
||||
def _shape_payload(self, stats: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Compresses the stats JSON to save tokens and focus the LLM.
|
||||
Removes raw lists beyond top 5/10.
|
||||
"""
|
||||
s = stats.copy()
|
||||
|
||||
# Simplify Volume
|
||||
|
||||
if "volume" in s:
|
||||
s["volume"] = {
|
||||
k: v for k, v in s["volume"].items()
|
||||
volume_copy = {
|
||||
k: v
|
||||
for k, v in s["volume"].items()
|
||||
if k not in ["top_tracks", "top_artists", "top_albums", "top_genres"]
|
||||
}
|
||||
# Add back condensed top lists (just names)
|
||||
s["volume"]["top_tracks"] = [t["name"] for t in stats["volume"].get("top_tracks", [])[:5]]
|
||||
s["volume"]["top_artists"] = [a["name"] for a in stats["volume"].get("top_artists", [])[:5]]
|
||||
s["volume"]["top_genres"] = [g["name"] for g in stats["volume"].get("top_genres", [])[:5]]
|
||||
volume_copy["top_tracks"] = [
|
||||
t["name"] for t in stats["volume"].get("top_tracks", [])[:5]
|
||||
]
|
||||
volume_copy["top_artists"] = [
|
||||
a["name"] for a in stats["volume"].get("top_artists", [])[:5]
|
||||
]
|
||||
volume_copy["top_genres"] = [
|
||||
g["name"] for g in stats["volume"].get("top_genres", [])[:5]
|
||||
]
|
||||
s["volume"] = volume_copy
|
||||
|
||||
if "time_habits" in s:
|
||||
s["time_habits"] = {
|
||||
k: v for k, v in s["time_habits"].items() if k != "heatmap"
|
||||
}
|
||||
|
||||
if "sessions" in s:
|
||||
s["sessions"] = {
|
||||
k: v for k, v in s["sessions"].items() if k != "session_list"
|
||||
}
|
||||
|
||||
# Simplify Time (Keep distributions but maybe round them?)
|
||||
# Keeping hourly/daily is fine, they are small arrays.
|
||||
|
||||
# Simplify Vibe (Remove huge transition arrays if they accidentally leaked, though stats service handles this)
|
||||
|
||||
# Remove period details if verbose
|
||||
return s
|
||||
|
||||
def _clean_and_parse_json(self, raw_text: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Robust JSON extractor.
|
||||
"""
|
||||
try:
|
||||
# 1. Try direct parse
|
||||
return json.loads(raw_text)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# 2. Extract between first { and last }
|
||||
try:
|
||||
match = re.search(r"\{.*\}", raw_text, re.DOTALL)
|
||||
if match:
|
||||
@@ -107,16 +160,11 @@ Your goal is to generate a JSON report that acts as a deeper, more honest "Spoti
|
||||
|
||||
def _get_fallback_narrative(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"vibe_check": "Data processing error. You're too mysterious for us to analyze right now.",
|
||||
"vibe_check_short": "Your taste is... interesting.",
|
||||
"vibe_check": "Data processing error. You're too mysterious to analyze right now.",
|
||||
"patterns": [],
|
||||
"persona": "The Enigma",
|
||||
"era_insight": "Time is a flat circle.",
|
||||
"roast": "You broke the machine. Congratulations.",
|
||||
"comparison": "N/A"
|
||||
"comparison": "N/A",
|
||||
}
|
||||
|
||||
# Individual accessors if needed by frontend, though full_narrative is preferred
|
||||
def generate_vibe_check(self, stats): return self.generate_full_narrative(stats).get("vibe_check")
|
||||
def identify_patterns(self, stats): return self.generate_full_narrative(stats).get("patterns")
|
||||
def generate_persona(self, stats): return self.generate_full_narrative(stats).get("persona")
|
||||
def generate_roast(self, stats): return self.generate_full_narrative(stats).get("roast")
|
||||
@@ -3,16 +3,30 @@ from typing import List, Dict, Any
|
||||
|
||||
RECCOBEATS_API_URL = "https://api.reccobeats.com/v1/audio-features"
|
||||
|
||||
|
||||
class ReccoBeatsClient:
|
||||
async def get_audio_features(self, spotify_ids: List[str]) -> List[Dict[str, Any]]:
|
||||
if not spotify_ids:
|
||||
return []
|
||||
ids_param = ",".join(spotify_ids)
|
||||
async with httpx.AsyncClient() as client:
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
try:
|
||||
response = await client.get(RECCOBEATS_API_URL, params={"ids": ids_param})
|
||||
response = await client.get(
|
||||
RECCOBEATS_API_URL, params={"ids": ids_param}
|
||||
)
|
||||
if response.status_code != 200:
|
||||
print(f"ReccoBeats API returned status {response.status_code}")
|
||||
return []
|
||||
return response.json().get("content", [])
|
||||
except Exception:
|
||||
|
||||
content = response.json().get("content", [])
|
||||
|
||||
for item in content:
|
||||
href = item.get("href", "")
|
||||
if "spotify.com/track/" in href:
|
||||
spotify_id = href.split("/track/")[-1].split("?")[0]
|
||||
item["spotify_id"] = spotify_id
|
||||
|
||||
return content
|
||||
except Exception as e:
|
||||
print(f"ReccoBeats API error: {e}")
|
||||
return []
|
||||
|
||||
@@ -8,6 +8,7 @@ from typing import List, Dict, Any
|
||||
SPOTIFY_TOKEN_URL = "https://accounts.spotify.com/api/token"
|
||||
SPOTIFY_API_BASE = "https://api.spotify.com/v1"
|
||||
|
||||
|
||||
class SpotifyClient:
|
||||
def __init__(self, client_id: str, client_secret: str, refresh_token: str):
|
||||
self.client_id = client_id
|
||||
@@ -92,3 +93,17 @@ class SpotifyClient:
|
||||
return []
|
||||
|
||||
return response.json().get("artists", [])
|
||||
|
||||
async def get_currently_playing(self) -> Dict[str, Any] | None:
|
||||
token = await self.get_access_token()
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.get(
|
||||
f"{SPOTIFY_API_BASE}/me/player/currently-playing",
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
)
|
||||
if response.status_code == 204:
|
||||
return None
|
||||
if response.status_code != 200:
|
||||
print(f"Error fetching currently playing: {response.text}")
|
||||
return None
|
||||
return response.json()
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user