import asyncio import os from datetime import datetime from sqlalchemy.orm import Session from .models import Track, PlayHistory, Artist from .database import SessionLocal from .services.spotify_client import SpotifyClient from .services.reccobeats_client import ReccoBeatsClient from dateutil import parser # Initialize Spotify Client (env vars will be populated later) def get_spotify_client(): return SpotifyClient( client_id=os.getenv("SPOTIFY_CLIENT_ID"), client_secret=os.getenv("SPOTIFY_CLIENT_SECRET"), refresh_token=os.getenv("SPOTIFY_REFRESH_TOKEN"), ) def get_reccobeats_client(): return ReccoBeatsClient() async def ensure_artists_exist(db: Session, artists_data: list): """ Ensures that all artists in the list exist in the Artist table. Returns a list of Artist objects. """ artist_objects = [] for a_data in artists_data: artist_id = a_data["id"] artist = db.query(Artist).filter(Artist.id == artist_id).first() if not artist: artist = Artist( id=artist_id, name=a_data["name"], genres=[] # Will be enriched later ) db.add(artist) # We commit inside the loop or after, but for now we rely on the main commit # However, to return the object correctly we might need to flush if we were doing complex things, # but here adding to session is enough for SQLAlchemy to track it. artist_objects.append(artist) return artist_objects async def enrich_tracks(db: Session, spotify_client: SpotifyClient, recco_client: ReccoBeatsClient): """ Finds tracks missing genres (Spotify) or audio features (ReccoBeats) and enriches them. Also enriches Artists with genres. """ # 1. Enrich Audio Features (via ReccoBeats) tracks_missing_features = db.query(Track).filter(Track.danceability == None).limit(50).all() print(f"DEBUG: Found {len(tracks_missing_features)} tracks missing audio features.") if tracks_missing_features: print(f"Enriching {len(tracks_missing_features)} tracks with audio features (ReccoBeats)...") ids = [t.id for t in tracks_missing_features] features_list = await recco_client.get_audio_features(ids) features_map = {} for f in features_list: tid = f.get("id") if not tid and "href" in f: if "tracks/" in f["href"]: tid = f["href"].split("tracks/")[1].split("?")[0] elif "track/" in f["href"]: tid = f["href"].split("track/")[1].split("?")[0] if tid: features_map[tid] = f updated_count = 0 for track in tracks_missing_features: data = features_map.get(track.id) if data: track.danceability = data.get("danceability") track.energy = data.get("energy") track.key = data.get("key") track.loudness = data.get("loudness") track.mode = data.get("mode") track.speechiness = data.get("speechiness") track.acousticness = data.get("acousticness") track.instrumentalness = data.get("instrumentalness") track.liveness = data.get("liveness") track.valence = data.get("valence") track.tempo = data.get("tempo") updated_count += 1 print(f"Updated {updated_count} tracks with audio features.") db.commit() # 2. Enrich Artist Genres (via Spotify Artists) # We look for artists who have no genres. Note: an artist might genuinely have no genres, # so we might need a flag "genres_checked" in the future, but for now checking empty list is okay. # However, newly created artists have genres=[] (empty list) or None? # My model definition: genres = Column(JSON, nullable=True) # So if it is None, we haven't fetched it. artists_missing_genres = db.query(Artist).filter(Artist.genres == None).limit(50).all() if artists_missing_genres: print(f"Enriching {len(artists_missing_genres)} artists with genres (Spotify)...") artist_ids_list = [a.id for a in artists_missing_genres] artist_data_map = {} # Spotify allows fetching 50 artists at a time for i in range(0, len(artist_ids_list), 50): chunk = artist_ids_list[i:i+50] artists_data = await spotify_client.get_artists(chunk) for a_data in artists_data: if a_data: artist_data_map[a_data["id"]] = a_data.get("genres", []) for artist in artists_missing_genres: genres = artist_data_map.get(artist.id) if genres is not None: artist.genres = genres else: # If we couldn't fetch, set to empty list so we don't keep retrying forever (or handle errors better) artist.genres = [] db.commit() async def ingest_recently_played(db: Session): spotify_client = get_spotify_client() recco_client = get_reccobeats_client() try: items = await spotify_client.get_recently_played(limit=50) except Exception as e: print(f"Error connecting to Spotify: {e}") return print(f"Fetched {len(items)} items from Spotify.") for item in items: track_data = item["track"] played_at_str = item["played_at"] played_at = parser.isoparse(played_at_str) track_id = track_data["id"] track = db.query(Track).filter(Track.id == track_id).first() if not track: print(f"New track found: {track_data['name']}") track = Track( id=track_id, name=track_data["name"], artist=", ".join([a["name"] for a in track_data["artists"]]), # Legacy string album=track_data["album"]["name"], duration_ms=track_data["duration_ms"], popularity=track_data["popularity"], raw_data=track_data ) # Handle Artists Relation artists_data = track_data.get("artists", []) artist_objects = await ensure_artists_exist(db, artists_data) track.artists = artist_objects db.add(track) db.commit() # Ensure relationships exist even if track existed (e.g. migration) # Check if track has artists linked. If not (and raw_data has them), link them. # FIX: Logic was previously indented improperly inside `if not track`. if not track.artists and track.raw_data and "artists" in track.raw_data: print(f"Backfilling artists for track {track.name}") artist_objects = await ensure_artists_exist(db, track.raw_data["artists"]) track.artists = artist_objects db.commit() exists = db.query(PlayHistory).filter( PlayHistory.track_id == track_id, PlayHistory.played_at == played_at ).first() if not exists: print(f" recording play: {track_data['name']} at {played_at}") play = PlayHistory( track_id=track_id, played_at=played_at, context_uri=item.get("context", {}).get("uri") if item.get("context") else None ) db.add(play) db.commit() # Enrich await enrich_tracks(db, spotify_client, recco_client) async def run_worker(): """Simulates a background worker loop.""" db = SessionLocal() try: while True: print("Worker: Polling Spotify...") await ingest_recently_played(db) print("Worker: Sleeping for 60 seconds...") await asyncio.sleep(60) except Exception as e: print(f"Worker crashed: {e}") finally: db.close()