Implement Phase 2 Frontend and Phase 3 Data Enrichment

- Initialize React+Vite Frontend with Ant Design Dashboard.
- Implement Data Enrichment: ReccoBeats (Audio Features) and Spotify (Genres).
- Update Database Schema via Alembic Migrations.
- Add Docker support (Dockerfile, docker-compose.yml).
- Update README with hosting instructions.
This commit is contained in:
google-labs-jules[bot]
2025-12-24 21:34:36 +00:00
parent 3a424d15a5
commit 0ca9893c68
15 changed files with 607 additions and 60 deletions

View File

@@ -5,6 +5,7 @@ from sqlalchemy.orm import Session
from .models import Track, PlayHistory
from .database import SessionLocal
from .services.spotify_client import SpotifyClient
from .services.reccobeats_client import ReccoBeatsClient
from dateutil import parser
# Initialize Spotify Client (env vars will be populated later)
@@ -15,10 +16,93 @@ def get_spotify_client():
refresh_token=os.getenv("SPOTIFY_REFRESH_TOKEN"),
)
def get_reccobeats_client():
return ReccoBeatsClient()
async def enrich_tracks(db: Session, spotify_client: SpotifyClient, recco_client: ReccoBeatsClient):
"""
Finds tracks missing genres (Spotify) or audio features (ReccoBeats) and enriches them.
"""
# 1. Enrich Audio Features (via ReccoBeats)
tracks_missing_features = db.query(Track).filter(Track.danceability == None).limit(50).all()
print(f"DEBUG: Found {len(tracks_missing_features)} tracks missing audio features.")
if tracks_missing_features:
print(f"Enriching {len(tracks_missing_features)} tracks with audio features (ReccoBeats)...")
ids = [t.id for t in tracks_missing_features]
features_list = await recco_client.get_audio_features(ids)
features_map = {}
for f in features_list:
if "href" in f and "track/" in f["href"]:
tid = f["href"].split("track/")[1].split("?")[0]
features_map[tid] = f
updated_count = 0
for track in tracks_missing_features:
data = features_map.get(track.id)
if data:
track.danceability = data.get("danceability")
track.energy = data.get("energy")
track.key = data.get("key")
track.loudness = data.get("loudness")
track.mode = data.get("mode")
track.speechiness = data.get("speechiness")
track.acousticness = data.get("acousticness")
track.instrumentalness = data.get("instrumentalness")
track.liveness = data.get("liveness")
track.valence = data.get("valence")
track.tempo = data.get("tempo")
updated_count += 1
print(f"Updated {updated_count} tracks with audio features.")
db.commit()
# 2. Enrich Genres (via Spotify Artists)
tracks_missing_genres = db.query(Track).filter(Track.genres == None).limit(50).all()
if tracks_missing_genres:
print(f"Enriching {len(tracks_missing_genres)} tracks with genres (Spotify)...")
artist_ids = set()
track_artist_map = {}
for t in tracks_missing_genres:
if t.raw_data and "artists" in t.raw_data:
a_ids = [a["id"] for a in t.raw_data["artists"]]
artist_ids.update(a_ids)
track_artist_map[t.id] = a_ids
artist_ids_list = list(artist_ids)
artist_genre_map = {}
for i in range(0, len(artist_ids_list), 50):
chunk = artist_ids_list[i:i+50]
artists_data = await spotify_client.get_artists(chunk)
for a_data in artists_data:
if a_data:
artist_genre_map[a_data["id"]] = a_data.get("genres", [])
for t in tracks_missing_genres:
a_ids = track_artist_map.get(t.id, [])
combined_genres = set()
for a_id in a_ids:
genres = artist_genre_map.get(a_id, [])
combined_genres.update(genres)
t.genres = list(combined_genres)
db.commit()
async def ingest_recently_played(db: Session):
client = get_spotify_client()
spotify_client = get_spotify_client()
recco_client = get_reccobeats_client()
try:
items = await client.get_recently_played(limit=50)
items = await spotify_client.get_recently_played(limit=50)
except Exception as e:
print(f"Error connecting to Spotify: {e}")
return
@@ -30,7 +114,6 @@ async def ingest_recently_played(db: Session):
played_at_str = item["played_at"]
played_at = parser.isoparse(played_at_str)
# 1. Check if track exists, if not create it
track_id = track_data["id"]
track = db.query(Track).filter(Track.id == track_id).first()
@@ -46,10 +129,8 @@ async def ingest_recently_played(db: Session):
raw_data=track_data
)
db.add(track)
db.commit() # Commit immediately so ID exists for foreign key
db.commit()
# 2. Check if this specific play instance exists
# We assume (track_id, played_at) is unique enough
exists = db.query(PlayHistory).filter(
PlayHistory.track_id == track_id,
PlayHistory.played_at == played_at
@@ -66,9 +147,13 @@ async def ingest_recently_played(db: Session):
db.commit()
# Enrich
await enrich_tracks(db, spotify_client, recco_client)
async def run_worker():
"""Simulates a background worker loop."""
db = SessionLocal()
try:
while True:
print("Worker: Polling Spotify...")