Files
MusicAnalyser/backend/app/models.py
google-labs-jules[bot] f4432154b6 Implement Phase 3 Music Analysis and LLM Engine
- Refactor Database: Add `Artist` model, M2M relationship, and `AnalysisSnapshot` model.
- Backend Services: Implement `StatsService` for computable metrics and `NarrativeService` for Gemini LLM integration.
- Fix Ingestion: Correctly handle multiple artists per track and backfill existing data.
- Testing: Add unit tests for statistics logic and live verification scripts.
- Documentation: Add `PHASE_4_FRONTEND_GUIDE.md`.
2025-12-24 23:16:32 +00:00

97 lines
3.5 KiB
Python

from sqlalchemy import Column, Integer, String, DateTime, JSON, ForeignKey, Float, Table, Text
from sqlalchemy.orm import relationship
from datetime import datetime
from .database import Base
# Association Table for Many-to-Many Relationship between Track and Artist
track_artists = Table(
'track_artists',
Base.metadata,
Column('track_id', String, ForeignKey('tracks.id'), primary_key=True),
Column('artist_id', String, ForeignKey('artists.id'), primary_key=True)
)
class Artist(Base):
__tablename__ = "artists"
id = Column(String, primary_key=True, index=True) # Spotify ID
name = Column(String)
genres = Column(JSON, nullable=True) # List of genre strings
# Relationships
tracks = relationship("Track", secondary=track_artists, back_populates="artists")
class Track(Base):
__tablename__ = "tracks"
id = Column(String, primary_key=True, index=True) # Spotify ID
name = Column(String)
artist = Column(String) # Display string (e.g. "Drake, Future") - kept for convenience
album = Column(String)
duration_ms = Column(Integer)
popularity = Column(Integer, nullable=True)
# Store raw full JSON response for future-proofing analysis
raw_data = Column(JSON, nullable=True)
# Enriched Data (Phase 3 Prep)
# Audio Features
danceability = Column(Float, nullable=True)
energy = Column(Float, nullable=True)
key = Column(Integer, nullable=True)
loudness = Column(Float, nullable=True)
mode = Column(Integer, nullable=True)
speechiness = Column(Float, nullable=True)
acousticness = Column(Float, nullable=True)
instrumentalness = Column(Float, nullable=True)
liveness = Column(Float, nullable=True)
valence = Column(Float, nullable=True)
tempo = Column(Float, nullable=True)
time_signature = Column(Integer, nullable=True)
# Genres (stored as JSON list of strings) - DEPRECATED in favor of Artist.genres but kept for now
genres = Column(JSON, nullable=True)
# AI Analysis fields
lyrics_summary = Column(String, nullable=True)
genre_tags = Column(String, nullable=True)
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
plays = relationship("PlayHistory", back_populates="track")
artists = relationship("Artist", secondary=track_artists, back_populates="tracks")
class PlayHistory(Base):
__tablename__ = "play_history"
id = Column(Integer, primary_key=True, index=True)
track_id = Column(String, ForeignKey("tracks.id"))
played_at = Column(DateTime, index=True) # The timestamp from Spotify
# Context (album, playlist, etc.)
context_uri = Column(String, nullable=True)
track = relationship("Track", back_populates="plays")
class AnalysisSnapshot(Base):
"""
Stores the computed statistics and LLM analysis for a given period.
Allows for trend analysis over time.
"""
__tablename__ = "analysis_snapshots"
id = Column(Integer, primary_key=True, index=True)
date = Column(DateTime, default=datetime.utcnow, index=True) # When the analysis was run
period_start = Column(DateTime)
period_end = Column(DateTime)
period_label = Column(String) # e.g., "last_30_days", "monthly_nov_2023"
# The heavy lifting: stored as JSON blobs
metrics_payload = Column(JSON) # The input to the LLM (StatsService output)
narrative_report = Column(JSON) # The output from the LLM (NarrativeService output)
model_used = Column(String, nullable=True) # e.g. "gemini-1.5-flash"