mirror of
https://github.com/bnair123/MusicAnalyser.git
synced 2026-02-25 11:46:07 +00:00
Add skip tracking, compressed heatmap, listening log, docs, tests, and OpenAI support
Major changes: - Add skip tracking: poll currently-playing every 15s, detect skips (<30s listened) - Add listening-log and sessions API endpoints - Fix ReccoBeats client to extract spotify_id from href response - Compress heatmap from 24 hours to 6 x 4-hour blocks - Add OpenAI support in narrative service (use max_completion_tokens for new models) - Add ListeningLog component with timeline and list views - Update all frontend components to use real data (album art, play counts) - Add docker-compose external network (dockernet) support - Add comprehensive documentation (API, DATA_MODEL, ARCHITECTURE, FRONTEND) - Add unit tests for ingest and API endpoints
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
import asyncio
|
||||
import os
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timedelta
|
||||
from sqlalchemy.orm import Session
|
||||
from .models import Track, PlayHistory, Artist
|
||||
from .database import SessionLocal
|
||||
@@ -9,6 +9,17 @@ from .services.reccobeats_client import ReccoBeatsClient
|
||||
from .services.genius_client import GeniusClient
|
||||
from dateutil import parser
|
||||
|
||||
|
||||
class PlaybackTracker:
|
||||
def __init__(self):
|
||||
self.current_track_id = None
|
||||
self.track_start_time = None
|
||||
self.accumulated_listen_ms = 0
|
||||
self.last_progress_ms = 0
|
||||
self.last_poll_time = None
|
||||
self.is_paused = False
|
||||
|
||||
|
||||
# Initialize Clients
|
||||
def get_spotify_client():
|
||||
return SpotifyClient(
|
||||
@@ -17,12 +28,15 @@ def get_spotify_client():
|
||||
refresh_token=os.getenv("SPOTIFY_REFRESH_TOKEN"),
|
||||
)
|
||||
|
||||
|
||||
def get_reccobeats_client():
|
||||
return ReccoBeatsClient()
|
||||
|
||||
|
||||
def get_genius_client():
|
||||
return GeniusClient()
|
||||
|
||||
|
||||
async def ensure_artists_exist(db: Session, artists_data: list):
|
||||
"""
|
||||
Ensures that all artists in the list exist in the Artist table.
|
||||
@@ -36,18 +50,19 @@ async def ensure_artists_exist(db: Session, artists_data: list):
|
||||
img = None
|
||||
if "images" in a_data and a_data["images"]:
|
||||
img = a_data["images"][0]["url"]
|
||||
|
||||
artist = Artist(
|
||||
id=artist_id,
|
||||
name=a_data["name"],
|
||||
genres=[],
|
||||
image_url=img
|
||||
)
|
||||
|
||||
artist = Artist(id=artist_id, name=a_data["name"], genres=[], image_url=img)
|
||||
db.add(artist)
|
||||
artist_objects.append(artist)
|
||||
return artist_objects
|
||||
|
||||
async def enrich_tracks(db: Session, spotify_client: SpotifyClient, recco_client: ReccoBeatsClient, genius_client: GeniusClient):
|
||||
|
||||
async def enrich_tracks(
|
||||
db: Session,
|
||||
spotify_client: SpotifyClient,
|
||||
recco_client: ReccoBeatsClient,
|
||||
genius_client: GeniusClient,
|
||||
):
|
||||
"""
|
||||
Enrichment Pipeline:
|
||||
1. Audio Features (ReccoBeats)
|
||||
@@ -56,18 +71,19 @@ async def enrich_tracks(db: Session, spotify_client: SpotifyClient, recco_client
|
||||
"""
|
||||
|
||||
# 1. Enrich Audio Features
|
||||
tracks_missing_features = db.query(Track).filter(Track.danceability == None).limit(50).all()
|
||||
tracks_missing_features = (
|
||||
db.query(Track).filter(Track.danceability == None).limit(50).all()
|
||||
)
|
||||
if tracks_missing_features:
|
||||
print(f"Enriching {len(tracks_missing_features)} tracks with audio features...")
|
||||
ids = [t.id for t in tracks_missing_features]
|
||||
features_list = await recco_client.get_audio_features(ids)
|
||||
|
||||
# Map features by ID
|
||||
|
||||
features_map = {}
|
||||
for f in features_list:
|
||||
# Handle potential ID mismatch or URI format
|
||||
tid = f.get("id")
|
||||
if tid: features_map[tid] = f
|
||||
tid = f.get("spotify_id") or f.get("id")
|
||||
if tid:
|
||||
features_map[tid] = f
|
||||
|
||||
for track in tracks_missing_features:
|
||||
data = features_map.get(track.id)
|
||||
@@ -83,61 +99,74 @@ async def enrich_tracks(db: Session, spotify_client: SpotifyClient, recco_client
|
||||
track.liveness = data.get("liveness")
|
||||
track.valence = data.get("valence")
|
||||
track.tempo = data.get("tempo")
|
||||
|
||||
|
||||
db.commit()
|
||||
|
||||
# 2. Enrich Artist Genres & Images (Spotify)
|
||||
artists_missing_data = db.query(Artist).filter((Artist.genres == None) | (Artist.image_url == None)).limit(50).all()
|
||||
artists_missing_data = (
|
||||
db.query(Artist)
|
||||
.filter((Artist.genres == None) | (Artist.image_url == None))
|
||||
.limit(50)
|
||||
.all()
|
||||
)
|
||||
if artists_missing_data:
|
||||
print(f"Enriching {len(artists_missing_data)} artists with genres/images...")
|
||||
artist_ids_list = [a.id for a in artists_missing_data]
|
||||
|
||||
|
||||
artist_data_map = {}
|
||||
for i in range(0, len(artist_ids_list), 50):
|
||||
chunk = artist_ids_list[i:i+50]
|
||||
chunk = artist_ids_list[i : i + 50]
|
||||
artists_data = await spotify_client.get_artists(chunk)
|
||||
for a_data in artists_data:
|
||||
if a_data:
|
||||
img = a_data["images"][0]["url"] if a_data.get("images") else None
|
||||
artist_data_map[a_data["id"]] = {
|
||||
"genres": a_data.get("genres", []),
|
||||
"image_url": img
|
||||
"image_url": img,
|
||||
}
|
||||
|
||||
for artist in artists_missing_data:
|
||||
data = artist_data_map.get(artist.id)
|
||||
if data:
|
||||
if artist.genres is None: artist.genres = data["genres"]
|
||||
if artist.image_url is None: artist.image_url = data["image_url"]
|
||||
if artist.genres is None:
|
||||
artist.genres = data["genres"]
|
||||
if artist.image_url is None:
|
||||
artist.image_url = data["image_url"]
|
||||
elif artist.genres is None:
|
||||
artist.genres = [] # Prevent retry loop
|
||||
|
||||
artist.genres = [] # Prevent retry loop
|
||||
|
||||
db.commit()
|
||||
|
||||
# 3. Enrich Lyrics (Genius)
|
||||
# Only fetch for tracks that have been played recently to avoid spamming Genius API
|
||||
tracks_missing_lyrics = db.query(Track).filter(Track.lyrics == None).order_by(Track.updated_at.desc()).limit(10).all()
|
||||
|
||||
tracks_missing_lyrics = (
|
||||
db.query(Track)
|
||||
.filter(Track.lyrics == None)
|
||||
.order_by(Track.updated_at.desc())
|
||||
.limit(10)
|
||||
.all()
|
||||
)
|
||||
|
||||
if tracks_missing_lyrics and genius_client.genius:
|
||||
print(f"Enriching {len(tracks_missing_lyrics)} tracks with lyrics (Genius)...")
|
||||
for track in tracks_missing_lyrics:
|
||||
# We need the primary artist name
|
||||
artist_name = track.artist.split(",")[0] # Heuristic: take first artist
|
||||
|
||||
artist_name = track.artist.split(",")[0] # Heuristic: take first artist
|
||||
|
||||
print(f"Searching Genius for: {track.name} by {artist_name}")
|
||||
data = genius_client.search_song(track.name, artist_name)
|
||||
|
||||
|
||||
if data:
|
||||
track.lyrics = data["lyrics"]
|
||||
# Fallback: if we didn't get high-res art from Spotify, use Genius
|
||||
if not track.image_url and data.get("image_url"):
|
||||
track.image_url = data["image_url"]
|
||||
else:
|
||||
track.lyrics = "" # Mark as empty to prevent retry loop
|
||||
|
||||
track.lyrics = "" # Mark as empty to prevent retry loop
|
||||
|
||||
# Small sleep to be nice to API? GeniusClient is synchronous.
|
||||
# We are in async function but GeniusClient is blocking. It's fine for worker.
|
||||
|
||||
|
||||
db.commit()
|
||||
|
||||
|
||||
@@ -164,7 +193,7 @@ async def ingest_recently_played(db: Session):
|
||||
|
||||
if not track:
|
||||
print(f"New track found: {track_data['name']}")
|
||||
|
||||
|
||||
# Extract Album Art
|
||||
image_url = None
|
||||
if track_data.get("album") and track_data["album"].get("images"):
|
||||
@@ -173,12 +202,12 @@ async def ingest_recently_played(db: Session):
|
||||
track = Track(
|
||||
id=track_id,
|
||||
name=track_data["name"],
|
||||
artist=", ".join([a["name"] for a in track_data["artists"]]),
|
||||
artist=", ".join([a["name"] for a in track_data["artists"]]),
|
||||
album=track_data["album"]["name"],
|
||||
image_url=image_url,
|
||||
duration_ms=track_data["duration_ms"],
|
||||
popularity=track_data["popularity"],
|
||||
raw_data=track_data
|
||||
raw_data=track_data,
|
||||
)
|
||||
|
||||
# Handle Artists Relation
|
||||
@@ -191,21 +220,27 @@ async def ingest_recently_played(db: Session):
|
||||
|
||||
# Ensure relationships exist logic...
|
||||
if not track.artists and track.raw_data and "artists" in track.raw_data:
|
||||
artist_objects = await ensure_artists_exist(db, track.raw_data["artists"])
|
||||
track.artists = artist_objects
|
||||
db.commit()
|
||||
artist_objects = await ensure_artists_exist(db, track.raw_data["artists"])
|
||||
track.artists = artist_objects
|
||||
db.commit()
|
||||
|
||||
exists = db.query(PlayHistory).filter(
|
||||
PlayHistory.track_id == track_id,
|
||||
PlayHistory.played_at == played_at
|
||||
).first()
|
||||
exists = (
|
||||
db.query(PlayHistory)
|
||||
.filter(
|
||||
PlayHistory.track_id == track_id, PlayHistory.played_at == played_at
|
||||
)
|
||||
.first()
|
||||
)
|
||||
|
||||
if not exists:
|
||||
print(f" recording play: {track_data['name']} at {played_at}")
|
||||
play = PlayHistory(
|
||||
track_id=track_id,
|
||||
played_at=played_at,
|
||||
context_uri=item.get("context", {}).get("uri") if item.get("context") else None
|
||||
context_uri=item.get("context", {}).get("uri")
|
||||
if item.get("context")
|
||||
else None,
|
||||
source="recently_played",
|
||||
)
|
||||
db.add(play)
|
||||
|
||||
@@ -214,17 +249,145 @@ async def ingest_recently_played(db: Session):
|
||||
# Enrich
|
||||
await enrich_tracks(db, spotify_client, recco_client, genius_client)
|
||||
|
||||
|
||||
async def run_worker():
|
||||
"""Simulates a background worker loop."""
|
||||
db = SessionLocal()
|
||||
tracker = PlaybackTracker()
|
||||
spotify_client = get_spotify_client()
|
||||
poll_count = 0
|
||||
|
||||
try:
|
||||
while True:
|
||||
print("Worker: Polling Spotify...")
|
||||
await ingest_recently_played(db)
|
||||
print("Worker: Sleeping for 60 seconds...")
|
||||
await asyncio.sleep(60)
|
||||
poll_count += 1
|
||||
|
||||
await poll_currently_playing(db, spotify_client, tracker)
|
||||
|
||||
if poll_count % 4 == 0:
|
||||
print("Worker: Polling recently-played...")
|
||||
await ingest_recently_played(db)
|
||||
|
||||
await asyncio.sleep(15)
|
||||
except Exception as e:
|
||||
print(f"Worker crashed: {e}")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
async def poll_currently_playing(
|
||||
db: Session, spotify_client: SpotifyClient, tracker: PlaybackTracker
|
||||
):
|
||||
try:
|
||||
response = await spotify_client.get_currently_playing()
|
||||
except Exception as e:
|
||||
print(f"Error polling currently-playing: {e}")
|
||||
return
|
||||
|
||||
now = datetime.utcnow()
|
||||
|
||||
if not response or response.get("currently_playing_type") != "track":
|
||||
if tracker.current_track_id and tracker.last_poll_time:
|
||||
finalize_track(db, tracker)
|
||||
return
|
||||
|
||||
item = response.get("item")
|
||||
if not item:
|
||||
return
|
||||
|
||||
current_track_id = item["id"]
|
||||
current_progress_ms = response.get("progress_ms", 0)
|
||||
is_playing = response.get("is_playing", False)
|
||||
|
||||
if current_track_id != tracker.current_track_id:
|
||||
if tracker.current_track_id and tracker.last_poll_time:
|
||||
finalize_track(db, tracker)
|
||||
|
||||
tracker.current_track_id = current_track_id
|
||||
tracker.track_start_time = now - timedelta(milliseconds=current_progress_ms)
|
||||
tracker.accumulated_listen_ms = current_progress_ms if is_playing else 0
|
||||
tracker.last_progress_ms = current_progress_ms
|
||||
tracker.last_poll_time = now
|
||||
tracker.is_paused = not is_playing
|
||||
|
||||
await ensure_track_exists(db, item, spotify_client)
|
||||
else:
|
||||
if tracker.last_poll_time:
|
||||
time_delta_ms = (now - tracker.last_poll_time).total_seconds() * 1000
|
||||
if is_playing and not tracker.is_paused:
|
||||
tracker.accumulated_listen_ms += time_delta_ms
|
||||
|
||||
tracker.last_progress_ms = current_progress_ms
|
||||
tracker.last_poll_time = now
|
||||
tracker.is_paused = not is_playing
|
||||
|
||||
|
||||
def finalize_track(db: Session, tracker: PlaybackTracker):
|
||||
listened_ms = int(tracker.accumulated_listen_ms)
|
||||
skipped = listened_ms < 30000
|
||||
|
||||
existing = (
|
||||
db.query(PlayHistory)
|
||||
.filter(
|
||||
PlayHistory.track_id == tracker.current_track_id,
|
||||
PlayHistory.played_at >= tracker.track_start_time - timedelta(seconds=5),
|
||||
PlayHistory.played_at <= tracker.track_start_time + timedelta(seconds=5),
|
||||
)
|
||||
.first()
|
||||
)
|
||||
|
||||
if existing:
|
||||
if existing.listened_ms is None:
|
||||
existing.listened_ms = listened_ms
|
||||
existing.skipped = skipped
|
||||
existing.source = "currently_playing"
|
||||
db.commit()
|
||||
else:
|
||||
play = PlayHistory(
|
||||
track_id=tracker.current_track_id,
|
||||
played_at=tracker.track_start_time,
|
||||
listened_ms=listened_ms,
|
||||
skipped=skipped,
|
||||
source="currently_playing",
|
||||
)
|
||||
db.add(play)
|
||||
db.commit()
|
||||
|
||||
print(
|
||||
f"Finalized: {tracker.current_track_id} listened={listened_ms}ms skipped={skipped}"
|
||||
)
|
||||
|
||||
tracker.current_track_id = None
|
||||
tracker.track_start_time = None
|
||||
tracker.accumulated_listen_ms = 0
|
||||
tracker.last_progress_ms = 0
|
||||
tracker.last_poll_time = None
|
||||
tracker.is_paused = False
|
||||
|
||||
|
||||
async def ensure_track_exists(
|
||||
db: Session, track_data: dict, spotify_client: SpotifyClient
|
||||
):
|
||||
track_id = track_data["id"]
|
||||
track = db.query(Track).filter(Track.id == track_id).first()
|
||||
|
||||
if not track:
|
||||
image_url = None
|
||||
if track_data.get("album") and track_data["album"].get("images"):
|
||||
image_url = track_data["album"]["images"][0]["url"]
|
||||
|
||||
track = Track(
|
||||
id=track_id,
|
||||
name=track_data["name"],
|
||||
artist=", ".join([a["name"] for a in track_data.get("artists", [])]),
|
||||
album=track_data.get("album", {}).get("name", "Unknown"),
|
||||
image_url=image_url,
|
||||
duration_ms=track_data.get("duration_ms"),
|
||||
popularity=track_data.get("popularity"),
|
||||
raw_data=track_data,
|
||||
)
|
||||
|
||||
artists_data = track_data.get("artists", [])
|
||||
artist_objects = await ensure_artists_exist(db, artists_data)
|
||||
track.artists = artist_objects
|
||||
|
||||
db.add(track)
|
||||
db.commit()
|
||||
|
||||
Reference in New Issue
Block a user