Implement Phase 2 Frontend and Phase 3 Data Enrichment

- Initialize React+Vite Frontend with Ant Design Dashboard.
- Implement Data Enrichment: ReccoBeats (Audio Features) and Spotify (Genres).
- Update Database Schema via Alembic Migrations.
- Add Docker support (Dockerfile, docker-compose.yml).
- Update README with hosting instructions.
This commit is contained in:
google-labs-jules[bot]
2025-12-24 21:34:36 +00:00
parent 3a424d15a5
commit 0ca9893c68
15 changed files with 607 additions and 60 deletions

View File

@@ -5,6 +5,7 @@ from sqlalchemy.orm import Session
from .models import Track, PlayHistory
from .database import SessionLocal
from .services.spotify_client import SpotifyClient
from .services.reccobeats_client import ReccoBeatsClient
from dateutil import parser
# Initialize Spotify Client (env vars will be populated later)
@@ -15,10 +16,93 @@ def get_spotify_client():
refresh_token=os.getenv("SPOTIFY_REFRESH_TOKEN"),
)
def get_reccobeats_client():
return ReccoBeatsClient()
async def enrich_tracks(db: Session, spotify_client: SpotifyClient, recco_client: ReccoBeatsClient):
"""
Finds tracks missing genres (Spotify) or audio features (ReccoBeats) and enriches them.
"""
# 1. Enrich Audio Features (via ReccoBeats)
tracks_missing_features = db.query(Track).filter(Track.danceability == None).limit(50).all()
print(f"DEBUG: Found {len(tracks_missing_features)} tracks missing audio features.")
if tracks_missing_features:
print(f"Enriching {len(tracks_missing_features)} tracks with audio features (ReccoBeats)...")
ids = [t.id for t in tracks_missing_features]
features_list = await recco_client.get_audio_features(ids)
features_map = {}
for f in features_list:
if "href" in f and "track/" in f["href"]:
tid = f["href"].split("track/")[1].split("?")[0]
features_map[tid] = f
updated_count = 0
for track in tracks_missing_features:
data = features_map.get(track.id)
if data:
track.danceability = data.get("danceability")
track.energy = data.get("energy")
track.key = data.get("key")
track.loudness = data.get("loudness")
track.mode = data.get("mode")
track.speechiness = data.get("speechiness")
track.acousticness = data.get("acousticness")
track.instrumentalness = data.get("instrumentalness")
track.liveness = data.get("liveness")
track.valence = data.get("valence")
track.tempo = data.get("tempo")
updated_count += 1
print(f"Updated {updated_count} tracks with audio features.")
db.commit()
# 2. Enrich Genres (via Spotify Artists)
tracks_missing_genres = db.query(Track).filter(Track.genres == None).limit(50).all()
if tracks_missing_genres:
print(f"Enriching {len(tracks_missing_genres)} tracks with genres (Spotify)...")
artist_ids = set()
track_artist_map = {}
for t in tracks_missing_genres:
if t.raw_data and "artists" in t.raw_data:
a_ids = [a["id"] for a in t.raw_data["artists"]]
artist_ids.update(a_ids)
track_artist_map[t.id] = a_ids
artist_ids_list = list(artist_ids)
artist_genre_map = {}
for i in range(0, len(artist_ids_list), 50):
chunk = artist_ids_list[i:i+50]
artists_data = await spotify_client.get_artists(chunk)
for a_data in artists_data:
if a_data:
artist_genre_map[a_data["id"]] = a_data.get("genres", [])
for t in tracks_missing_genres:
a_ids = track_artist_map.get(t.id, [])
combined_genres = set()
for a_id in a_ids:
genres = artist_genre_map.get(a_id, [])
combined_genres.update(genres)
t.genres = list(combined_genres)
db.commit()
async def ingest_recently_played(db: Session):
client = get_spotify_client()
spotify_client = get_spotify_client()
recco_client = get_reccobeats_client()
try:
items = await client.get_recently_played(limit=50)
items = await spotify_client.get_recently_played(limit=50)
except Exception as e:
print(f"Error connecting to Spotify: {e}")
return
@@ -30,7 +114,6 @@ async def ingest_recently_played(db: Session):
played_at_str = item["played_at"]
played_at = parser.isoparse(played_at_str)
# 1. Check if track exists, if not create it
track_id = track_data["id"]
track = db.query(Track).filter(Track.id == track_id).first()
@@ -46,10 +129,8 @@ async def ingest_recently_played(db: Session):
raw_data=track_data
)
db.add(track)
db.commit() # Commit immediately so ID exists for foreign key
db.commit()
# 2. Check if this specific play instance exists
# We assume (track_id, played_at) is unique enough
exists = db.query(PlayHistory).filter(
PlayHistory.track_id == track_id,
PlayHistory.played_at == played_at
@@ -66,9 +147,13 @@ async def ingest_recently_played(db: Session):
db.commit()
# Enrich
await enrich_tracks(db, spotify_client, recco_client)
async def run_worker():
"""Simulates a background worker loop."""
db = SessionLocal()
try:
while True:
print("Worker: Polling Spotify...")

View File

@@ -1,4 +1,4 @@
from sqlalchemy import Column, Integer, String, DateTime, JSON, ForeignKey, Boolean
from sqlalchemy import Column, Integer, String, DateTime, JSON, ForeignKey, Float
from sqlalchemy.orm import relationship
from datetime import datetime
from .database import Base
@@ -16,6 +16,24 @@ class Track(Base):
# Store raw full JSON response for future-proofing analysis
raw_data = Column(JSON, nullable=True)
# Enriched Data (Phase 3 Prep)
# Audio Features
danceability = Column(Float, nullable=True)
energy = Column(Float, nullable=True)
key = Column(Integer, nullable=True)
loudness = Column(Float, nullable=True)
mode = Column(Integer, nullable=True)
speechiness = Column(Float, nullable=True)
acousticness = Column(Float, nullable=True)
instrumentalness = Column(Float, nullable=True)
liveness = Column(Float, nullable=True)
valence = Column(Float, nullable=True)
tempo = Column(Float, nullable=True)
time_signature = Column(Integer, nullable=True)
# Genres (stored as JSON list of strings)
genres = Column(JSON, nullable=True)
# AI Analysis fields
lyrics_summary = Column(String, nullable=True)
genre_tags = Column(String, nullable=True) # JSON list stored as string or just raw JSON

View File

@@ -0,0 +1,18 @@
import httpx
from typing import List, Dict, Any
RECCOBEATS_API_URL = "https://api.reccobeats.com/v1/audio-features"
class ReccoBeatsClient:
async def get_audio_features(self, spotify_ids: List[str]) -> List[Dict[str, Any]]:
if not spotify_ids:
return []
ids_param = ",".join(spotify_ids)
async with httpx.AsyncClient() as client:
try:
response = await client.get(RECCOBEATS_API_URL, params={"ids": ids_param})
if response.status_code != 200:
return []
return response.json().get("content", [])
except Exception:
return []

View File

@@ -3,6 +3,7 @@ import base64
import time
import httpx
from fastapi import HTTPException
from typing import List, Dict, Any
SPOTIFY_TOKEN_URL = "https://accounts.spotify.com/api/token"
SPOTIFY_API_BASE = "https://api.spotify.com/v1"
@@ -68,3 +69,26 @@ class SpotifyClient:
if response.status_code != 200:
return None
return response.json()
async def get_artists(self, artist_ids: List[str]) -> List[Dict[str, Any]]:
"""
Fetches artist details (including genres) for a list of artist IDs.
Spotify allows up to 50 IDs per request.
"""
if not artist_ids:
return []
token = await self.get_access_token()
ids_param = ",".join(artist_ids)
async with httpx.AsyncClient() as client:
response = await client.get(
f"{SPOTIFY_API_BASE}/artists",
params={"ids": ids_param},
headers={"Authorization": f"Bearer {token}"},
)
if response.status_code != 200:
print(f"Error fetching artists: {response.text}")
return []
return response.json().get("artists", [])