Fixed and added all the stats_service.py methods

This commit is contained in:
bnair123
2025-12-25 22:17:21 +04:00
parent e7980cc706
commit 9b8f7355fb
9 changed files with 412 additions and 146 deletions

View File

@@ -6,9 +6,10 @@ from .models import Track, PlayHistory, Artist
from .database import SessionLocal
from .services.spotify_client import SpotifyClient
from .services.reccobeats_client import ReccoBeatsClient
from .services.genius_client import GeniusClient
from dateutil import parser
# Initialize Spotify Client (env vars will be populated later)
# Initialize Clients
def get_spotify_client():
return SpotifyClient(
client_id=os.getenv("SPOTIFY_CLIENT_ID"),
@@ -19,57 +20,55 @@ def get_spotify_client():
def get_reccobeats_client():
return ReccoBeatsClient()
def get_genius_client():
return GeniusClient()
async def ensure_artists_exist(db: Session, artists_data: list):
"""
Ensures that all artists in the list exist in the Artist table.
Returns a list of Artist objects.
"""
artist_objects = []
for a_data in artists_data:
artist_id = a_data["id"]
artist = db.query(Artist).filter(Artist.id == artist_id).first()
if not artist:
# Check if image is available in this payload (rare for track-linked artists, but possible)
img = None
if "images" in a_data and a_data["images"]:
img = a_data["images"][0]["url"]
artist = Artist(
id=artist_id,
name=a_data["name"],
genres=[] # Will be enriched later
genres=[],
image_url=img
)
db.add(artist)
# We commit inside the loop or after, but for now we rely on the main commit
# However, to return the object correctly we might need to flush if we were doing complex things,
# but here adding to session is enough for SQLAlchemy to track it.
artist_objects.append(artist)
return artist_objects
async def enrich_tracks(db: Session, spotify_client: SpotifyClient, recco_client: ReccoBeatsClient):
async def enrich_tracks(db: Session, spotify_client: SpotifyClient, recco_client: ReccoBeatsClient, genius_client: GeniusClient):
"""
Finds tracks missing genres (Spotify) or audio features (ReccoBeats) and enriches them.
Also enriches Artists with genres.
Enrichment Pipeline:
1. Audio Features (ReccoBeats)
2. Artist Metadata: Genres & Images (Spotify)
3. Lyrics & Fallback Images (Genius)
"""
# 1. Enrich Audio Features (via ReccoBeats)
# 1. Enrich Audio Features
tracks_missing_features = db.query(Track).filter(Track.danceability == None).limit(50).all()
print(f"DEBUG: Found {len(tracks_missing_features)} tracks missing audio features.")
if tracks_missing_features:
print(f"Enriching {len(tracks_missing_features)} tracks with audio features (ReccoBeats)...")
print(f"Enriching {len(tracks_missing_features)} tracks with audio features...")
ids = [t.id for t in tracks_missing_features]
features_list = await recco_client.get_audio_features(ids)
# Map features by ID
features_map = {}
for f in features_list:
# Handle potential ID mismatch or URI format
tid = f.get("id")
if not tid and "href" in f:
if "tracks/" in f["href"]:
tid = f["href"].split("tracks/")[1].split("?")[0]
elif "track/" in f["href"]:
tid = f["href"].split("track/")[1].split("?")[0]
if tid: features_map[tid] = f
if tid:
features_map[tid] = f
updated_count = 0
for track in tracks_missing_features:
data = features_map.get(track.id)
if data:
@@ -84,47 +83,68 @@ async def enrich_tracks(db: Session, spotify_client: SpotifyClient, recco_client
track.liveness = data.get("liveness")
track.valence = data.get("valence")
track.tempo = data.get("tempo")
updated_count += 1
print(f"Updated {updated_count} tracks with audio features.")
db.commit()
# 2. Enrich Artist Genres (via Spotify Artists)
# We look for artists who have no genres. Note: an artist might genuinely have no genres,
# so we might need a flag "genres_checked" in the future, but for now checking empty list is okay.
# However, newly created artists have genres=[] (empty list) or None?
# My model definition: genres = Column(JSON, nullable=True)
# So if it is None, we haven't fetched it.
artists_missing_genres = db.query(Artist).filter(Artist.genres == None).limit(50).all()
if artists_missing_genres:
print(f"Enriching {len(artists_missing_genres)} artists with genres (Spotify)...")
artist_ids_list = [a.id for a in artists_missing_genres]
# 2. Enrich Artist Genres & Images (Spotify)
artists_missing_data = db.query(Artist).filter((Artist.genres == None) | (Artist.image_url == None)).limit(50).all()
if artists_missing_data:
print(f"Enriching {len(artists_missing_data)} artists with genres/images...")
artist_ids_list = [a.id for a in artists_missing_data]
artist_data_map = {}
# Spotify allows fetching 50 artists at a time
for i in range(0, len(artist_ids_list), 50):
chunk = artist_ids_list[i:i+50]
artists_data = await spotify_client.get_artists(chunk)
for a_data in artists_data:
if a_data:
artist_data_map[a_data["id"]] = a_data.get("genres", [])
img = a_data["images"][0]["url"] if a_data.get("images") else None
artist_data_map[a_data["id"]] = {
"genres": a_data.get("genres", []),
"image_url": img
}
for artist in artists_missing_genres:
genres = artist_data_map.get(artist.id)
if genres is not None:
artist.genres = genres
for artist in artists_missing_data:
data = artist_data_map.get(artist.id)
if data:
if artist.genres is None: artist.genres = data["genres"]
if artist.image_url is None: artist.image_url = data["image_url"]
elif artist.genres is None:
artist.genres = [] # Prevent retry loop
db.commit()
# 3. Enrich Lyrics (Genius)
# Only fetch for tracks that have been played recently to avoid spamming Genius API
tracks_missing_lyrics = db.query(Track).filter(Track.lyrics == None).order_by(Track.updated_at.desc()).limit(10).all()
if tracks_missing_lyrics and genius_client.genius:
print(f"Enriching {len(tracks_missing_lyrics)} tracks with lyrics (Genius)...")
for track in tracks_missing_lyrics:
# We need the primary artist name
artist_name = track.artist.split(",")[0] # Heuristic: take first artist
print(f"Searching Genius for: {track.name} by {artist_name}")
data = genius_client.search_song(track.name, artist_name)
if data:
track.lyrics = data["lyrics"]
# Fallback: if we didn't get high-res art from Spotify, use Genius
if not track.image_url and data.get("image_url"):
track.image_url = data["image_url"]
else:
# If we couldn't fetch, set to empty list so we don't keep retrying forever (or handle errors better)
artist.genres = []
track.lyrics = "" # Mark as empty to prevent retry loop
# Small sleep to be nice to API? GeniusClient is synchronous.
# We are in async function but GeniusClient is blocking. It's fine for worker.
db.commit()
async def ingest_recently_played(db: Session):
spotify_client = get_spotify_client()
recco_client = get_reccobeats_client()
genius_client = get_genius_client()
try:
items = await spotify_client.get_recently_played(limit=50)
@@ -144,11 +164,18 @@ async def ingest_recently_played(db: Session):
if not track:
print(f"New track found: {track_data['name']}")
# Extract Album Art
image_url = None
if track_data.get("album") and track_data["album"].get("images"):
image_url = track_data["album"]["images"][0]["url"]
track = Track(
id=track_id,
name=track_data["name"],
artist=", ".join([a["name"] for a in track_data["artists"]]), # Legacy string
artist=", ".join([a["name"] for a in track_data["artists"]]),
album=track_data["album"]["name"],
image_url=image_url,
duration_ms=track_data["duration_ms"],
popularity=track_data["popularity"],
raw_data=track_data
@@ -162,11 +189,8 @@ async def ingest_recently_played(db: Session):
db.add(track)
db.commit()
# Ensure relationships exist even if track existed (e.g. migration)
# Check if track has artists linked. If not (and raw_data has them), link them.
# FIX: Logic was previously indented improperly inside `if not track`.
# Ensure relationships exist logic...
if not track.artists and track.raw_data and "artists" in track.raw_data:
print(f"Backfilling artists for track {track.name}")
artist_objects = await ensure_artists_exist(db, track.raw_data["artists"])
track.artists = artist_objects
db.commit()
@@ -188,7 +212,7 @@ async def ingest_recently_played(db: Session):
db.commit()
# Enrich
await enrich_tracks(db, spotify_client, recco_client)
await enrich_tracks(db, spotify_client, recco_client, genius_client)
async def run_worker():
"""Simulates a background worker loop."""