Implement Phase 2 Frontend and Phase 3 Data Enrichment

- Initialize React+Vite Frontend with Ant Design Dashboard. - Implement Data Enrichment: ReccoBeats (Audio Features) and Spotify (Genres). - Update Database Schema via Alembic Migrations. - Add Docker support (Dockerfile, docker-compose.yml). - Update README with hosting instructions.
2026-04-26 13:25:04 +00:00 · 2025-12-24 21:34:36 +00:00
parent 3a424d15a5
commit 0ca9893c68
15 changed files with 607 additions and 60 deletions
--- a/backend/app/ingest.py
+++ b/backend/app/ingest.py
@@ -5,6 +5,7 @@ from sqlalchemy.orm import Session
 from .models import Track, PlayHistory
 from .database import SessionLocal
 from .services.spotify_client import SpotifyClient
+from .services.reccobeats_client import ReccoBeatsClient
 from dateutil import parser

 # Initialize Spotify Client (env vars will be populated later)
@@ -15,10 +16,93 @@ def get_spotify_client():
        refresh_token=os.getenv("SPOTIFY_REFRESH_TOKEN"),
    )

+def get_reccobeats_client():
+    return ReccoBeatsClient()
+
+async def enrich_tracks(db: Session, spotify_client: SpotifyClient, recco_client: ReccoBeatsClient):
+    """
+    Finds tracks missing genres (Spotify) or audio features (ReccoBeats) and enriches them.
+    """
+
+    # 1. Enrich Audio Features (via ReccoBeats)
+    tracks_missing_features = db.query(Track).filter(Track.danceability == None).limit(50).all()
+    print(f"DEBUG: Found {len(tracks_missing_features)} tracks missing audio features.")
+
+    if tracks_missing_features:
+        print(f"Enriching {len(tracks_missing_features)} tracks with audio features (ReccoBeats)...")
+        ids = [t.id for t in tracks_missing_features]
+
+        features_list = await recco_client.get_audio_features(ids)
+
+        features_map = {}
+        for f in features_list:
+            if "href" in f and "track/" in f["href"]:
+                tid = f["href"].split("track/")[1].split("?")[0]
+                features_map[tid] = f
+
+        updated_count = 0
+        for track in tracks_missing_features:
+            data = features_map.get(track.id)
+            if data:
+                track.danceability = data.get("danceability")
+                track.energy = data.get("energy")
+                track.key = data.get("key")
+                track.loudness = data.get("loudness")
+                track.mode = data.get("mode")
+                track.speechiness = data.get("speechiness")
+                track.acousticness = data.get("acousticness")
+                track.instrumentalness = data.get("instrumentalness")
+                track.liveness = data.get("liveness")
+                track.valence = data.get("valence")
+                track.tempo = data.get("tempo")
+                updated_count += 1
+
+        print(f"Updated {updated_count} tracks with audio features.")
+        db.commit()
+
+    # 2. Enrich Genres (via Spotify Artists)
+    tracks_missing_genres = db.query(Track).filter(Track.genres == None).limit(50).all()
+
+    if tracks_missing_genres:
+        print(f"Enriching {len(tracks_missing_genres)} tracks with genres (Spotify)...")
+
+        artist_ids = set()
+        track_artist_map = {}
+
+        for t in tracks_missing_genres:
+            if t.raw_data and "artists" in t.raw_data:
+                a_ids = [a["id"] for a in t.raw_data["artists"]]
+                artist_ids.update(a_ids)
+                track_artist_map[t.id] = a_ids
+
+        artist_ids_list = list(artist_ids)
+        artist_genre_map = {}
+
+        for i in range(0, len(artist_ids_list), 50):
+            chunk = artist_ids_list[i:i+50]
+            artists_data = await spotify_client.get_artists(chunk)
+            for a_data in artists_data:
+                if a_data:
+                    artist_genre_map[a_data["id"]] = a_data.get("genres", [])
+
+        for t in tracks_missing_genres:
+            a_ids = track_artist_map.get(t.id, [])
+            combined_genres = set()
+            for a_id in a_ids:
+                genres = artist_genre_map.get(a_id, [])
+                combined_genres.update(genres)
+
+            t.genres = list(combined_genres)
+
+        db.commit()
+
+
 async def ingest_recently_played(db: Session):
-    client = get_spotify_client()
+    spotify_client = get_spotify_client()
+    recco_client = get_reccobeats_client()
+
    try:
-        items = await client.get_recently_played(limit=50)
+        items = await spotify_client.get_recently_played(limit=50)
    except Exception as e:
        print(f"Error connecting to Spotify: {e}")
        return
@@ -30,7 +114,6 @@ async def ingest_recently_played(db: Session):
        played_at_str = item["played_at"]
        played_at = parser.isoparse(played_at_str)

-        # 1. Check if track exists, if not create it
        track_id = track_data["id"]
        track = db.query(Track).filter(Track.id == track_id).first()

@@ -46,10 +129,8 @@ async def ingest_recently_played(db: Session):
                raw_data=track_data
            )
            db.add(track)
-            db.commit() # Commit immediately so ID exists for foreign key
+            db.commit()

-        # 2. Check if this specific play instance exists
-        # We assume (track_id, played_at) is unique enough
        exists = db.query(PlayHistory).filter(
            PlayHistory.track_id == track_id,
            PlayHistory.played_at == played_at
@@ -66,9 +147,13 @@ async def ingest_recently_played(db: Session):

    db.commit()

+    # Enrich
+    await enrich_tracks(db, spotify_client, recco_client)
+
 async def run_worker():
    """Simulates a background worker loop."""
    db = SessionLocal()
+
    try:
        while True:
            print("Worker: Polling Spotify...")
--- a/backend/app/models.py
+++ b/backend/app/models.py
@@ -1,4 +1,4 @@
-from sqlalchemy import Column, Integer, String, DateTime, JSON, ForeignKey, Boolean
+from sqlalchemy import Column, Integer, String, DateTime, JSON, ForeignKey, Float
 from sqlalchemy.orm import relationship
 from datetime import datetime
 from .database import Base
@@ -16,6 +16,24 @@ class Track(Base):
    # Store raw full JSON response for future-proofing analysis
    raw_data = Column(JSON, nullable=True)

+    # Enriched Data (Phase 3 Prep)
+    # Audio Features
+    danceability = Column(Float, nullable=True)
+    energy = Column(Float, nullable=True)
+    key = Column(Integer, nullable=True)
+    loudness = Column(Float, nullable=True)
+    mode = Column(Integer, nullable=True)
+    speechiness = Column(Float, nullable=True)
+    acousticness = Column(Float, nullable=True)
+    instrumentalness = Column(Float, nullable=True)
+    liveness = Column(Float, nullable=True)
+    valence = Column(Float, nullable=True)
+    tempo = Column(Float, nullable=True)
+    time_signature = Column(Integer, nullable=True)
+
+    # Genres (stored as JSON list of strings)
+    genres = Column(JSON, nullable=True)
+
    # AI Analysis fields
    lyrics_summary = Column(String, nullable=True)
    genre_tags = Column(String, nullable=True) # JSON list stored as string or just raw JSON
--- a/backend/app/services/reccobeats_client.py
+++ b/backend/app/services/reccobeats_client.py
@@ -0,0 +1,18 @@
+import httpx
+from typing import List, Dict, Any
+
+RECCOBEATS_API_URL = "https://api.reccobeats.com/v1/audio-features"
+
+class ReccoBeatsClient:
+    async def get_audio_features(self, spotify_ids: List[str]) -> List[Dict[str, Any]]:
+        if not spotify_ids:
+            return []
+        ids_param = ",".join(spotify_ids)
+        async with httpx.AsyncClient() as client:
+            try:
+                response = await client.get(RECCOBEATS_API_URL, params={"ids": ids_param})
+                if response.status_code != 200:
+                    return []
+                return response.json().get("content", [])
+            except Exception:
+                return []
--- a/backend/app/services/spotify_client.py
+++ b/backend/app/services/spotify_client.py
@@ -3,6 +3,7 @@ import base64
 import time
 import httpx
 from fastapi import HTTPException
+from typing import List, Dict, Any

 SPOTIFY_TOKEN_URL = "https://accounts.spotify.com/api/token"
 SPOTIFY_API_BASE = "https://api.spotify.com/v1"
@@ -68,3 +69,26 @@ class SpotifyClient:
            if response.status_code != 200:
                return None
            return response.json()
+
+    async def get_artists(self, artist_ids: List[str]) -> List[Dict[str, Any]]:
+        """
+        Fetches artist details (including genres) for a list of artist IDs.
+        Spotify allows up to 50 IDs per request.
+        """
+        if not artist_ids:
+            return []
+
+        token = await self.get_access_token()
+        ids_param = ",".join(artist_ids)
+
+        async with httpx.AsyncClient() as client:
+            response = await client.get(
+                f"{SPOTIFY_API_BASE}/artists",
+                params={"ids": ids_param},
+                headers={"Authorization": f"Bearer {token}"},
+            )
+            if response.status_code != 200:
+                print(f"Error fetching artists: {response.text}")
+                return []
+
+            return response.json().get("artists", [])