Implement Phase 3 Music Analysis and LLM Engine

- Refactor Database: Add `Artist` model, M2M relationship, and `AnalysisSnapshot` model.
- Backend Services: Implement `StatsService` for computable metrics and `NarrativeService` for Gemini LLM integration.
- Fix Ingestion: Correctly handle multiple artists per track and backfill existing data.
- Testing: Add unit tests for statistics logic and live verification scripts.
- Documentation: Add `PHASE_4_FRONTEND_GUIDE.md`.
This commit is contained in:
google-labs-jules[bot]
2025-12-24 23:16:32 +00:00
parent ab47dd62ca
commit f4432154b6
9 changed files with 942 additions and 30 deletions

View File

@@ -1,14 +1,32 @@
from sqlalchemy import Column, Integer, String, DateTime, JSON, ForeignKey, Float
from sqlalchemy import Column, Integer, String, DateTime, JSON, ForeignKey, Float, Table, Text
from sqlalchemy.orm import relationship
from datetime import datetime
from .database import Base
# Association Table for Many-to-Many Relationship between Track and Artist
track_artists = Table(
'track_artists',
Base.metadata,
Column('track_id', String, ForeignKey('tracks.id'), primary_key=True),
Column('artist_id', String, ForeignKey('artists.id'), primary_key=True)
)
class Artist(Base):
__tablename__ = "artists"
id = Column(String, primary_key=True, index=True) # Spotify ID
name = Column(String)
genres = Column(JSON, nullable=True) # List of genre strings
# Relationships
tracks = relationship("Track", secondary=track_artists, back_populates="artists")
class Track(Base):
__tablename__ = "tracks"
id = Column(String, primary_key=True, index=True) # Spotify ID
name = Column(String)
artist = Column(String)
artist = Column(String) # Display string (e.g. "Drake, Future") - kept for convenience
album = Column(String)
duration_ms = Column(Integer)
popularity = Column(Integer, nullable=True)
@@ -31,17 +49,18 @@ class Track(Base):
tempo = Column(Float, nullable=True)
time_signature = Column(Integer, nullable=True)
# Genres (stored as JSON list of strings)
# Genres (stored as JSON list of strings) - DEPRECATED in favor of Artist.genres but kept for now
genres = Column(JSON, nullable=True)
# AI Analysis fields
lyrics_summary = Column(String, nullable=True)
genre_tags = Column(String, nullable=True) # JSON list stored as string or just raw JSON
genre_tags = Column(String, nullable=True)
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
plays = relationship("PlayHistory", back_populates="track")
artists = relationship("Artist", secondary=track_artists, back_populates="tracks")
class PlayHistory(Base):
@@ -55,3 +74,23 @@ class PlayHistory(Base):
context_uri = Column(String, nullable=True)
track = relationship("Track", back_populates="plays")
class AnalysisSnapshot(Base):
"""
Stores the computed statistics and LLM analysis for a given period.
Allows for trend analysis over time.
"""
__tablename__ = "analysis_snapshots"
id = Column(Integer, primary_key=True, index=True)
date = Column(DateTime, default=datetime.utcnow, index=True) # When the analysis was run
period_start = Column(DateTime)
period_end = Column(DateTime)
period_label = Column(String) # e.g., "last_30_days", "monthly_nov_2023"
# The heavy lifting: stored as JSON blobs
metrics_payload = Column(JSON) # The input to the LLM (StatsService output)
narrative_report = Column(JSON) # The output from the LLM (NarrativeService output)
model_used = Column(String, nullable=True) # e.g. "gemini-1.5-flash"