Implement Phase 3 Music Analysis and LLM Engine

- Refactor Database: Add `Artist` model, M2M relationship, and `AnalysisSnapshot` model.
- Backend Services: Implement `StatsService` for computable metrics and `NarrativeService` for Gemini LLM integration.
- Fix Ingestion: Correctly handle multiple artists per track and backfill existing data.
- Testing: Add unit tests for statistics logic and live verification scripts.
- Documentation: Add `PHASE_4_FRONTEND_GUIDE.md`.
This commit is contained in:
google-labs-jules[bot]
2025-12-24 23:16:32 +00:00
parent ab47dd62ca
commit f4432154b6
9 changed files with 942 additions and 30 deletions

78
backend/seed_data.py Normal file
View File

@@ -0,0 +1,78 @@
from datetime import datetime, timedelta
import random
from app.database import SessionLocal
from app.models import Track, Artist, PlayHistory
from app.services.stats_service import StatsService
def seed_db():
db = SessionLocal()
# 1. Create Artists
artists = []
for i in range(10):
a = Artist(
id=f"artist_{i}",
name=f"Artist {i}",
genres=[random.choice(["pop", "rock", "jazz", "edm", "hip-hop"]) for _ in range(2)]
)
db.merge(a) # merge handles insert/update
artists.append(a)
db.commit()
print(f"Seeded {len(artists)} artists.")
# 2. Create Tracks
tracks = []
for i in range(50):
# Random artist
artist = random.choice(artists)
t = Track(
id=f"track_{i}",
name=f"Track {i}",
artist=artist.name, # Legacy
album=f"Album {i % 10}",
duration_ms=random.randint(180000, 300000), # 3-5 mins
popularity=random.randint(10, 90),
danceability=random.uniform(0.3, 0.9),
energy=random.uniform(0.3, 0.9),
valence=random.uniform(0.1, 0.9),
tempo=random.uniform(80, 160),
raw_data={"album": {"id": f"album_{i%10}", "release_date": f"{random.randint(2000, 2023)}-01-01"}}
)
# Link artist
t.artists.append(artist)
db.merge(t)
tracks.append(t)
db.commit()
print(f"Seeded {len(tracks)} tracks.")
# 3. Create Play History (Last 30 days)
plays = []
base_time = datetime.utcnow() - timedelta(days=25)
for i in range(200):
# Create sessions
# 80% chance next play is soon (2-5 mins), 20% chance gap (30-600 mins)
gap = random.randint(2, 6) if random.random() > 0.2 else random.randint(30, 600)
base_time += timedelta(minutes=gap)
if base_time > datetime.utcnow():
break
track = random.choice(tracks)
p = PlayHistory(
track_id=track.id,
played_at=base_time,
context_uri="spotify:playlist:fake"
)
db.add(p)
db.commit()
print(f"Seeded play history until {base_time}.")
db.close()
if __name__ == "__main__":
seed_db()