mirror of
https://github.com/bnair123/MusicAnalyser.git
synced 2026-02-25 11:46:07 +00:00
Fixed and added all the stats_service.py methods
This commit is contained in:
6
.idea/vcs.xml
generated
Normal file
6
.idea/vcs.xml
generated
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="VcsDirectoryMappings">
|
||||||
|
<mapping directory="" vcs="Git" />
|
||||||
|
</component>
|
||||||
|
</project>
|
||||||
@@ -5,7 +5,10 @@ A personal analytics dashboard for your music listening habits, powered by Pytho
|
|||||||
## Features
|
## Features
|
||||||
|
|
||||||
- **Continuous Ingestion**: Polls Spotify every 60 seconds to record your listening history.
|
- **Continuous Ingestion**: Polls Spotify every 60 seconds to record your listening history.
|
||||||
- **Data Enrichment**: Automatically fetches **Genres** (via Spotify) and **Audio Features** (Energy, BPM, Mood via ReccoBeats).
|
- **Data Enrichment**:
|
||||||
|
- **Genres & Images** (via Spotify)
|
||||||
|
- **Audio Features** (Energy, BPM, Mood via ReccoBeats)
|
||||||
|
- **Lyrics & Metadata** (via Genius)
|
||||||
- **Dashboard**: A responsive UI (Ant Design) to view your history, stats, and "Vibes".
|
- **Dashboard**: A responsive UI (Ant Design) to view your history, stats, and "Vibes".
|
||||||
- **AI Ready**: Database schema and environment prepared for Gemini AI integration.
|
- **AI Ready**: Database schema and environment prepared for Gemini AI integration.
|
||||||
|
|
||||||
@@ -18,6 +21,7 @@ You can run this application using Docker Compose. You have two options: using t
|
|||||||
- **Spotify Developer Credentials** (Client ID & Secret).
|
- **Spotify Developer Credentials** (Client ID & Secret).
|
||||||
- **Spotify Refresh Token** (Run `backend/scripts/get_refresh_token.py` locally to generate this).
|
- **Spotify Refresh Token** (Run `backend/scripts/get_refresh_token.py` locally to generate this).
|
||||||
- **Google Gemini API Key**.
|
- **Google Gemini API Key**.
|
||||||
|
- **Genius API Token** (Optional, for lyrics).
|
||||||
|
|
||||||
### 2. Configuration (`.env`)
|
### 2. Configuration (`.env`)
|
||||||
|
|
||||||
@@ -28,6 +32,7 @@ SPOTIFY_CLIENT_ID="your_client_id"
|
|||||||
SPOTIFY_CLIENT_SECRET="your_client_secret"
|
SPOTIFY_CLIENT_SECRET="your_client_secret"
|
||||||
SPOTIFY_REFRESH_TOKEN="your_refresh_token"
|
SPOTIFY_REFRESH_TOKEN="your_refresh_token"
|
||||||
GEMINI_API_KEY="your_gemini_key"
|
GEMINI_API_KEY="your_gemini_key"
|
||||||
|
GENIUS_ACCESS_TOKEN="your_genius_token"
|
||||||
```
|
```
|
||||||
|
|
||||||
### 3. Run with Docker Compose
|
### 3. Run with Docker Compose
|
||||||
|
|||||||
@@ -87,9 +87,28 @@ The LLM returns a JSON object with:
|
|||||||
|
|
||||||
## 3. Data Models (`backend/app/models.py`)
|
## 3. Data Models (`backend/app/models.py`)
|
||||||
|
|
||||||
- **Track:** Stores static metadata and audio features. `raw_data` stores the full Spotify JSON for future-proofing.
|
- **Track:** Stores static metadata and audio features.
|
||||||
- **Artist:** Normalized artist entities. Linked to tracks via `track_artists` table.
|
- `lyrics`: Full lyrics from Genius (Text).
|
||||||
|
- `image_url`: Album art URL (String).
|
||||||
|
- `raw_data`: The full Spotify JSON for future-proofing.
|
||||||
|
- **Artist:** Normalized artist entities.
|
||||||
|
- `image_url`: Artist profile image (String).
|
||||||
- **PlayHistory:** The timeseries ledger. Links `Track` to a timestamp and context.
|
- **PlayHistory:** The timeseries ledger. Links `Track` to a timestamp and context.
|
||||||
- **AnalysisSnapshot:** Stores the final output of these services.
|
- **AnalysisSnapshot:** Stores the final output of these services.
|
||||||
- `metrics_payload`: The JSON output of `StatsService`.
|
- `metrics_payload`: The JSON output of `StatsService`.
|
||||||
- `narrative_report`: The JSON output of `NarrativeService`.
|
- `narrative_report`: The JSON output of `NarrativeService`.
|
||||||
|
|
||||||
|
## 4. External Integrations
|
||||||
|
|
||||||
|
### Spotify
|
||||||
|
- **Ingestion:** Polls `recently-played` endpoint every 60s.
|
||||||
|
- **Enrichment:** Fetches Artist genres and images.
|
||||||
|
|
||||||
|
### Genius
|
||||||
|
- **Client:** `backend/app/services/genius_client.py`.
|
||||||
|
- **Function:** Searches for lyrics and high-res album art if missing from Spotify data.
|
||||||
|
- **Trigger:** Runs during the ingestion loop for new tracks.
|
||||||
|
|
||||||
|
### ReccoBeats
|
||||||
|
- **Function:** Fetches audio features (Danceability, Energy, Valence) for tracks.
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,36 @@
|
|||||||
|
"""Add image_url and lyrics columns
|
||||||
|
|
||||||
|
Revision ID: f92d8a9264d3
|
||||||
|
Revises: 4401cb416661
|
||||||
|
Create Date: 2025-12-25 22:06:05.841447
|
||||||
|
|
||||||
|
"""
|
||||||
|
from typing import Sequence, Union
|
||||||
|
|
||||||
|
from alembic import op
|
||||||
|
import sqlalchemy as sa
|
||||||
|
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision: str = 'f92d8a9264d3'
|
||||||
|
down_revision: Union[str, Sequence[str], None] = '4401cb416661'
|
||||||
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
"""Upgrade schema."""
|
||||||
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
|
op.add_column('artists', sa.Column('image_url', sa.String(), nullable=True))
|
||||||
|
op.add_column('tracks', sa.Column('image_url', sa.String(), nullable=True))
|
||||||
|
op.add_column('tracks', sa.Column('lyrics', sa.Text(), nullable=True))
|
||||||
|
# ### end Alembic commands ###
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
"""Downgrade schema."""
|
||||||
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
|
op.drop_column('tracks', 'lyrics')
|
||||||
|
op.drop_column('tracks', 'image_url')
|
||||||
|
op.drop_column('artists', 'image_url')
|
||||||
|
# ### end Alembic commands ###
|
||||||
@@ -6,9 +6,10 @@ from .models import Track, PlayHistory, Artist
|
|||||||
from .database import SessionLocal
|
from .database import SessionLocal
|
||||||
from .services.spotify_client import SpotifyClient
|
from .services.spotify_client import SpotifyClient
|
||||||
from .services.reccobeats_client import ReccoBeatsClient
|
from .services.reccobeats_client import ReccoBeatsClient
|
||||||
|
from .services.genius_client import GeniusClient
|
||||||
from dateutil import parser
|
from dateutil import parser
|
||||||
|
|
||||||
# Initialize Spotify Client (env vars will be populated later)
|
# Initialize Clients
|
||||||
def get_spotify_client():
|
def get_spotify_client():
|
||||||
return SpotifyClient(
|
return SpotifyClient(
|
||||||
client_id=os.getenv("SPOTIFY_CLIENT_ID"),
|
client_id=os.getenv("SPOTIFY_CLIENT_ID"),
|
||||||
@@ -19,57 +20,55 @@ def get_spotify_client():
|
|||||||
def get_reccobeats_client():
|
def get_reccobeats_client():
|
||||||
return ReccoBeatsClient()
|
return ReccoBeatsClient()
|
||||||
|
|
||||||
|
def get_genius_client():
|
||||||
|
return GeniusClient()
|
||||||
|
|
||||||
async def ensure_artists_exist(db: Session, artists_data: list):
|
async def ensure_artists_exist(db: Session, artists_data: list):
|
||||||
"""
|
"""
|
||||||
Ensures that all artists in the list exist in the Artist table.
|
Ensures that all artists in the list exist in the Artist table.
|
||||||
Returns a list of Artist objects.
|
|
||||||
"""
|
"""
|
||||||
artist_objects = []
|
artist_objects = []
|
||||||
for a_data in artists_data:
|
for a_data in artists_data:
|
||||||
artist_id = a_data["id"]
|
artist_id = a_data["id"]
|
||||||
artist = db.query(Artist).filter(Artist.id == artist_id).first()
|
artist = db.query(Artist).filter(Artist.id == artist_id).first()
|
||||||
if not artist:
|
if not artist:
|
||||||
|
# Check if image is available in this payload (rare for track-linked artists, but possible)
|
||||||
|
img = None
|
||||||
|
if "images" in a_data and a_data["images"]:
|
||||||
|
img = a_data["images"][0]["url"]
|
||||||
|
|
||||||
artist = Artist(
|
artist = Artist(
|
||||||
id=artist_id,
|
id=artist_id,
|
||||||
name=a_data["name"],
|
name=a_data["name"],
|
||||||
genres=[] # Will be enriched later
|
genres=[],
|
||||||
|
image_url=img
|
||||||
)
|
)
|
||||||
db.add(artist)
|
db.add(artist)
|
||||||
# We commit inside the loop or after, but for now we rely on the main commit
|
|
||||||
# However, to return the object correctly we might need to flush if we were doing complex things,
|
|
||||||
# but here adding to session is enough for SQLAlchemy to track it.
|
|
||||||
artist_objects.append(artist)
|
artist_objects.append(artist)
|
||||||
return artist_objects
|
return artist_objects
|
||||||
|
|
||||||
async def enrich_tracks(db: Session, spotify_client: SpotifyClient, recco_client: ReccoBeatsClient):
|
async def enrich_tracks(db: Session, spotify_client: SpotifyClient, recco_client: ReccoBeatsClient, genius_client: GeniusClient):
|
||||||
"""
|
"""
|
||||||
Finds tracks missing genres (Spotify) or audio features (ReccoBeats) and enriches them.
|
Enrichment Pipeline:
|
||||||
Also enriches Artists with genres.
|
1. Audio Features (ReccoBeats)
|
||||||
|
2. Artist Metadata: Genres & Images (Spotify)
|
||||||
|
3. Lyrics & Fallback Images (Genius)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# 1. Enrich Audio Features (via ReccoBeats)
|
# 1. Enrich Audio Features
|
||||||
tracks_missing_features = db.query(Track).filter(Track.danceability == None).limit(50).all()
|
tracks_missing_features = db.query(Track).filter(Track.danceability == None).limit(50).all()
|
||||||
print(f"DEBUG: Found {len(tracks_missing_features)} tracks missing audio features.")
|
|
||||||
|
|
||||||
if tracks_missing_features:
|
if tracks_missing_features:
|
||||||
print(f"Enriching {len(tracks_missing_features)} tracks with audio features (ReccoBeats)...")
|
print(f"Enriching {len(tracks_missing_features)} tracks with audio features...")
|
||||||
ids = [t.id for t in tracks_missing_features]
|
ids = [t.id for t in tracks_missing_features]
|
||||||
|
|
||||||
features_list = await recco_client.get_audio_features(ids)
|
features_list = await recco_client.get_audio_features(ids)
|
||||||
|
|
||||||
|
# Map features by ID
|
||||||
features_map = {}
|
features_map = {}
|
||||||
for f in features_list:
|
for f in features_list:
|
||||||
|
# Handle potential ID mismatch or URI format
|
||||||
tid = f.get("id")
|
tid = f.get("id")
|
||||||
if not tid and "href" in f:
|
if tid: features_map[tid] = f
|
||||||
if "tracks/" in f["href"]:
|
|
||||||
tid = f["href"].split("tracks/")[1].split("?")[0]
|
|
||||||
elif "track/" in f["href"]:
|
|
||||||
tid = f["href"].split("track/")[1].split("?")[0]
|
|
||||||
|
|
||||||
if tid:
|
|
||||||
features_map[tid] = f
|
|
||||||
|
|
||||||
updated_count = 0
|
|
||||||
for track in tracks_missing_features:
|
for track in tracks_missing_features:
|
||||||
data = features_map.get(track.id)
|
data = features_map.get(track.id)
|
||||||
if data:
|
if data:
|
||||||
@@ -84,47 +83,68 @@ async def enrich_tracks(db: Session, spotify_client: SpotifyClient, recco_client
|
|||||||
track.liveness = data.get("liveness")
|
track.liveness = data.get("liveness")
|
||||||
track.valence = data.get("valence")
|
track.valence = data.get("valence")
|
||||||
track.tempo = data.get("tempo")
|
track.tempo = data.get("tempo")
|
||||||
updated_count += 1
|
|
||||||
|
|
||||||
print(f"Updated {updated_count} tracks with audio features.")
|
|
||||||
db.commit()
|
db.commit()
|
||||||
|
|
||||||
# 2. Enrich Artist Genres (via Spotify Artists)
|
# 2. Enrich Artist Genres & Images (Spotify)
|
||||||
# We look for artists who have no genres. Note: an artist might genuinely have no genres,
|
artists_missing_data = db.query(Artist).filter((Artist.genres == None) | (Artist.image_url == None)).limit(50).all()
|
||||||
# so we might need a flag "genres_checked" in the future, but for now checking empty list is okay.
|
if artists_missing_data:
|
||||||
# However, newly created artists have genres=[] (empty list) or None?
|
print(f"Enriching {len(artists_missing_data)} artists with genres/images...")
|
||||||
# My model definition: genres = Column(JSON, nullable=True)
|
artist_ids_list = [a.id for a in artists_missing_data]
|
||||||
# So if it is None, we haven't fetched it.
|
|
||||||
|
|
||||||
artists_missing_genres = db.query(Artist).filter(Artist.genres == None).limit(50).all()
|
|
||||||
|
|
||||||
if artists_missing_genres:
|
|
||||||
print(f"Enriching {len(artists_missing_genres)} artists with genres (Spotify)...")
|
|
||||||
artist_ids_list = [a.id for a in artists_missing_genres]
|
|
||||||
|
|
||||||
artist_data_map = {}
|
artist_data_map = {}
|
||||||
# Spotify allows fetching 50 artists at a time
|
|
||||||
for i in range(0, len(artist_ids_list), 50):
|
for i in range(0, len(artist_ids_list), 50):
|
||||||
chunk = artist_ids_list[i:i+50]
|
chunk = artist_ids_list[i:i+50]
|
||||||
artists_data = await spotify_client.get_artists(chunk)
|
artists_data = await spotify_client.get_artists(chunk)
|
||||||
for a_data in artists_data:
|
for a_data in artists_data:
|
||||||
if a_data:
|
if a_data:
|
||||||
artist_data_map[a_data["id"]] = a_data.get("genres", [])
|
img = a_data["images"][0]["url"] if a_data.get("images") else None
|
||||||
|
artist_data_map[a_data["id"]] = {
|
||||||
|
"genres": a_data.get("genres", []),
|
||||||
|
"image_url": img
|
||||||
|
}
|
||||||
|
|
||||||
for artist in artists_missing_genres:
|
for artist in artists_missing_data:
|
||||||
genres = artist_data_map.get(artist.id)
|
data = artist_data_map.get(artist.id)
|
||||||
if genres is not None:
|
if data:
|
||||||
artist.genres = genres
|
if artist.genres is None: artist.genres = data["genres"]
|
||||||
|
if artist.image_url is None: artist.image_url = data["image_url"]
|
||||||
|
elif artist.genres is None:
|
||||||
|
artist.genres = [] # Prevent retry loop
|
||||||
|
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
# 3. Enrich Lyrics (Genius)
|
||||||
|
# Only fetch for tracks that have been played recently to avoid spamming Genius API
|
||||||
|
tracks_missing_lyrics = db.query(Track).filter(Track.lyrics == None).order_by(Track.updated_at.desc()).limit(10).all()
|
||||||
|
|
||||||
|
if tracks_missing_lyrics and genius_client.genius:
|
||||||
|
print(f"Enriching {len(tracks_missing_lyrics)} tracks with lyrics (Genius)...")
|
||||||
|
for track in tracks_missing_lyrics:
|
||||||
|
# We need the primary artist name
|
||||||
|
artist_name = track.artist.split(",")[0] # Heuristic: take first artist
|
||||||
|
|
||||||
|
print(f"Searching Genius for: {track.name} by {artist_name}")
|
||||||
|
data = genius_client.search_song(track.name, artist_name)
|
||||||
|
|
||||||
|
if data:
|
||||||
|
track.lyrics = data["lyrics"]
|
||||||
|
# Fallback: if we didn't get high-res art from Spotify, use Genius
|
||||||
|
if not track.image_url and data.get("image_url"):
|
||||||
|
track.image_url = data["image_url"]
|
||||||
else:
|
else:
|
||||||
# If we couldn't fetch, set to empty list so we don't keep retrying forever (or handle errors better)
|
track.lyrics = "" # Mark as empty to prevent retry loop
|
||||||
artist.genres = []
|
|
||||||
|
# Small sleep to be nice to API? GeniusClient is synchronous.
|
||||||
|
# We are in async function but GeniusClient is blocking. It's fine for worker.
|
||||||
|
|
||||||
db.commit()
|
db.commit()
|
||||||
|
|
||||||
|
|
||||||
async def ingest_recently_played(db: Session):
|
async def ingest_recently_played(db: Session):
|
||||||
spotify_client = get_spotify_client()
|
spotify_client = get_spotify_client()
|
||||||
recco_client = get_reccobeats_client()
|
recco_client = get_reccobeats_client()
|
||||||
|
genius_client = get_genius_client()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
items = await spotify_client.get_recently_played(limit=50)
|
items = await spotify_client.get_recently_played(limit=50)
|
||||||
@@ -144,11 +164,18 @@ async def ingest_recently_played(db: Session):
|
|||||||
|
|
||||||
if not track:
|
if not track:
|
||||||
print(f"New track found: {track_data['name']}")
|
print(f"New track found: {track_data['name']}")
|
||||||
|
|
||||||
|
# Extract Album Art
|
||||||
|
image_url = None
|
||||||
|
if track_data.get("album") and track_data["album"].get("images"):
|
||||||
|
image_url = track_data["album"]["images"][0]["url"]
|
||||||
|
|
||||||
track = Track(
|
track = Track(
|
||||||
id=track_id,
|
id=track_id,
|
||||||
name=track_data["name"],
|
name=track_data["name"],
|
||||||
artist=", ".join([a["name"] for a in track_data["artists"]]), # Legacy string
|
artist=", ".join([a["name"] for a in track_data["artists"]]),
|
||||||
album=track_data["album"]["name"],
|
album=track_data["album"]["name"],
|
||||||
|
image_url=image_url,
|
||||||
duration_ms=track_data["duration_ms"],
|
duration_ms=track_data["duration_ms"],
|
||||||
popularity=track_data["popularity"],
|
popularity=track_data["popularity"],
|
||||||
raw_data=track_data
|
raw_data=track_data
|
||||||
@@ -162,11 +189,8 @@ async def ingest_recently_played(db: Session):
|
|||||||
db.add(track)
|
db.add(track)
|
||||||
db.commit()
|
db.commit()
|
||||||
|
|
||||||
# Ensure relationships exist even if track existed (e.g. migration)
|
# Ensure relationships exist logic...
|
||||||
# Check if track has artists linked. If not (and raw_data has them), link them.
|
|
||||||
# FIX: Logic was previously indented improperly inside `if not track`.
|
|
||||||
if not track.artists and track.raw_data and "artists" in track.raw_data:
|
if not track.artists and track.raw_data and "artists" in track.raw_data:
|
||||||
print(f"Backfilling artists for track {track.name}")
|
|
||||||
artist_objects = await ensure_artists_exist(db, track.raw_data["artists"])
|
artist_objects = await ensure_artists_exist(db, track.raw_data["artists"])
|
||||||
track.artists = artist_objects
|
track.artists = artist_objects
|
||||||
db.commit()
|
db.commit()
|
||||||
@@ -188,7 +212,7 @@ async def ingest_recently_played(db: Session):
|
|||||||
db.commit()
|
db.commit()
|
||||||
|
|
||||||
# Enrich
|
# Enrich
|
||||||
await enrich_tracks(db, spotify_client, recco_client)
|
await enrich_tracks(db, spotify_client, recco_client, genius_client)
|
||||||
|
|
||||||
async def run_worker():
|
async def run_worker():
|
||||||
"""Simulates a background worker loop."""
|
"""Simulates a background worker loop."""
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ class Artist(Base):
|
|||||||
id = Column(String, primary_key=True, index=True) # Spotify ID
|
id = Column(String, primary_key=True, index=True) # Spotify ID
|
||||||
name = Column(String)
|
name = Column(String)
|
||||||
genres = Column(JSON, nullable=True) # List of genre strings
|
genres = Column(JSON, nullable=True) # List of genre strings
|
||||||
|
image_url = Column(String, nullable=True) # Artist profile image
|
||||||
|
|
||||||
# Relationships
|
# Relationships
|
||||||
tracks = relationship("Track", secondary=track_artists, back_populates="artists")
|
tracks = relationship("Track", secondary=track_artists, back_populates="artists")
|
||||||
@@ -28,6 +29,7 @@ class Track(Base):
|
|||||||
name = Column(String)
|
name = Column(String)
|
||||||
artist = Column(String) # Display string (e.g. "Drake, Future") - kept for convenience
|
artist = Column(String) # Display string (e.g. "Drake, Future") - kept for convenience
|
||||||
album = Column(String)
|
album = Column(String)
|
||||||
|
image_url = Column(String, nullable=True) # Album art
|
||||||
duration_ms = Column(Integer)
|
duration_ms = Column(Integer)
|
||||||
popularity = Column(Integer, nullable=True)
|
popularity = Column(Integer, nullable=True)
|
||||||
|
|
||||||
@@ -53,6 +55,7 @@ class Track(Base):
|
|||||||
genres = Column(JSON, nullable=True)
|
genres = Column(JSON, nullable=True)
|
||||||
|
|
||||||
# AI Analysis fields
|
# AI Analysis fields
|
||||||
|
lyrics = Column(Text, nullable=True) # Full lyrics from Genius
|
||||||
lyrics_summary = Column(String, nullable=True)
|
lyrics_summary = Column(String, nullable=True)
|
||||||
genre_tags = Column(String, nullable=True)
|
genre_tags = Column(String, nullable=True)
|
||||||
|
|
||||||
|
|||||||
35
backend/app/services/genius_client.py
Normal file
35
backend/app/services/genius_client.py
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
import os
|
||||||
|
import lyricsgenius
|
||||||
|
from typing import Optional, Dict, Any
|
||||||
|
|
||||||
|
class GeniusClient:
|
||||||
|
def __init__(self):
|
||||||
|
self.access_token = os.getenv("GENIUS_ACCESS_TOKEN")
|
||||||
|
if self.access_token:
|
||||||
|
self.genius = lyricsgenius.Genius(self.access_token, verbose=False, remove_section_headers=True)
|
||||||
|
else:
|
||||||
|
print("WARNING: GENIUS_ACCESS_TOKEN not found. Lyrics enrichment will be skipped.")
|
||||||
|
self.genius = None
|
||||||
|
|
||||||
|
def search_song(self, title: str, artist: str) -> Optional[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Searches for a song on Genius and returns metadata + lyrics.
|
||||||
|
"""
|
||||||
|
if not self.genius:
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Clean up title (remove "Feat.", "Remastered", etc for better search match)
|
||||||
|
clean_title = title.split(" - ")[0].split("(")[0].strip()
|
||||||
|
song = self.genius.search_song(clean_title, artist)
|
||||||
|
|
||||||
|
if song:
|
||||||
|
return {
|
||||||
|
"lyrics": song.lyrics,
|
||||||
|
"image_url": song.song_art_image_url,
|
||||||
|
"artist_image_url": song.primary_artist.image_url
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Genius Search Error for {title} by {artist}: {e}")
|
||||||
|
|
||||||
|
return None
|
||||||
@@ -4,6 +4,7 @@ from datetime import datetime, timedelta
|
|||||||
from typing import Dict, Any, List, Optional
|
from typing import Dict, Any, List, Optional
|
||||||
import math
|
import math
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from sklearn.cluster import KMeans
|
||||||
|
|
||||||
from ..models import PlayHistory, Track, Artist
|
from ..models import PlayHistory, Track, Artist
|
||||||
|
|
||||||
@@ -78,10 +79,18 @@ class StatsService:
|
|||||||
genre_counts = {}
|
genre_counts = {}
|
||||||
album_counts = {}
|
album_counts = {}
|
||||||
|
|
||||||
# Maps for resolving names later without DB hits
|
# Maps for resolving names/images later without DB hits
|
||||||
track_map = {}
|
track_map = {}
|
||||||
artist_map = {}
|
artist_map = {}
|
||||||
album_map = {}
|
album_map = {}
|
||||||
|
|
||||||
|
# Helper to safely get image
|
||||||
|
def get_track_image(t):
|
||||||
|
if t.image_url: return t.image_url
|
||||||
|
if t.raw_data and "album" in t.raw_data and "images" in t.raw_data["album"]:
|
||||||
|
imgs = t.raw_data["album"]["images"]
|
||||||
|
if imgs: return imgs[0].get("url")
|
||||||
|
return None
|
||||||
|
|
||||||
for p in plays:
|
for p in plays:
|
||||||
t = p.track
|
t = p.track
|
||||||
@@ -102,12 +111,15 @@ class StatsService:
|
|||||||
album_name = t.raw_data["album"].get("name", t.album)
|
album_name = t.raw_data["album"].get("name", t.album)
|
||||||
|
|
||||||
album_counts[album_id] = album_counts.get(album_id, 0) + 1
|
album_counts[album_id] = album_counts.get(album_id, 0) + 1
|
||||||
album_map[album_id] = album_name
|
# Store tuple of (name, image_url)
|
||||||
|
if album_id not in album_map:
|
||||||
|
album_map[album_id] = {"name": album_name, "image": get_track_image(t)}
|
||||||
|
|
||||||
# Artist Aggregation (Iterate objects, not string)
|
# Artist Aggregation (Iterate objects, not string)
|
||||||
for artist in t.artists:
|
for artist in t.artists:
|
||||||
artist_counts[artist.id] = artist_counts.get(artist.id, 0) + 1
|
artist_counts[artist.id] = artist_counts.get(artist.id, 0) + 1
|
||||||
artist_map[artist.id] = artist.name
|
if artist.id not in artist_map:
|
||||||
|
artist_map[artist.id] = {"name": artist.name, "image": artist.image_url}
|
||||||
|
|
||||||
# Genre Aggregation
|
# Genre Aggregation
|
||||||
if artist.genres:
|
if artist.genres:
|
||||||
@@ -124,19 +136,20 @@ class StatsService:
|
|||||||
top_tracks = [
|
top_tracks = [
|
||||||
{
|
{
|
||||||
"name": track_map[tid].name,
|
"name": track_map[tid].name,
|
||||||
"artist": ", ".join([a.name for a in track_map[tid].artists]), # Correct artist display
|
"artist": ", ".join([a.name for a in track_map[tid].artists]),
|
||||||
|
"image": get_track_image(track_map[tid]),
|
||||||
"count": c
|
"count": c
|
||||||
}
|
}
|
||||||
for tid, c in sorted(track_counts.items(), key=lambda x: x[1], reverse=True)[:5]
|
for tid, c in sorted(track_counts.items(), key=lambda x: x[1], reverse=True)[:5]
|
||||||
]
|
]
|
||||||
|
|
||||||
top_artists = [
|
top_artists = [
|
||||||
{"name": artist_map.get(aid, "Unknown"), "count": c}
|
{"name": artist_map[aid]["name"], "id": aid, "image": artist_map[aid]["image"], "count": c}
|
||||||
for aid, c in sorted(artist_counts.items(), key=lambda x: x[1], reverse=True)[:5]
|
for aid, c in sorted(artist_counts.items(), key=lambda x: x[1], reverse=True)[:5]
|
||||||
]
|
]
|
||||||
|
|
||||||
top_albums = [
|
top_albums = [
|
||||||
{"name": album_map.get(aid, "Unknown"), "count": c}
|
{"name": album_map[aid]["name"], "image": album_map[aid]["image"], "count": c}
|
||||||
for aid, c in sorted(album_counts.items(), key=lambda x: x[1], reverse=True)[:5]
|
for aid, c in sorted(album_counts.items(), key=lambda x: x[1], reverse=True)[:5]
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -188,7 +201,7 @@ class StatsService:
|
|||||||
|
|
||||||
def compute_time_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
|
def compute_time_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Includes Part-of-Day buckets, Listening Streaks, and Active Days stats.
|
Includes Part-of-Day buckets, Listening Streaks, Active Days, and 2D Heatmap.
|
||||||
"""
|
"""
|
||||||
query = self.db.query(PlayHistory).filter(
|
query = self.db.query(PlayHistory).filter(
|
||||||
PlayHistory.played_at >= period_start,
|
PlayHistory.played_at >= period_start,
|
||||||
@@ -199,16 +212,24 @@ class StatsService:
|
|||||||
if not plays:
|
if not plays:
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
# Heatmap: 7 days x 24 hours
|
||||||
|
heatmap = [[0 for _ in range(24)] for _ in range(7)]
|
||||||
|
|
||||||
hourly_counts = [0] * 24
|
hourly_counts = [0] * 24
|
||||||
weekday_counts = [0] * 7
|
weekday_counts = [0] * 7
|
||||||
# Spec: Morning (6-12), Afternoon (12-18), Evening (18-24), Night (0-6)
|
|
||||||
part_of_day = {"morning": 0, "afternoon": 0, "evening": 0, "night": 0}
|
part_of_day = {"morning": 0, "afternoon": 0, "evening": 0, "night": 0}
|
||||||
active_dates = set()
|
active_dates = set()
|
||||||
|
|
||||||
for p in plays:
|
for p in plays:
|
||||||
h = p.played_at.hour
|
h = p.played_at.hour
|
||||||
|
d = p.played_at.weekday()
|
||||||
|
|
||||||
|
# Populate Heatmap
|
||||||
|
heatmap[d][h] += 1
|
||||||
|
|
||||||
hourly_counts[h] += 1
|
hourly_counts[h] += 1
|
||||||
weekday_counts[p.played_at.weekday()] += 1
|
weekday_counts[d] += 1
|
||||||
active_dates.add(p.played_at.date())
|
active_dates.add(p.played_at.date())
|
||||||
|
|
||||||
if 6 <= h < 12:
|
if 6 <= h < 12:
|
||||||
@@ -240,6 +261,7 @@ class StatsService:
|
|||||||
active_days_count = len(active_dates)
|
active_days_count = len(active_dates)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
"heatmap": heatmap, # 7x24 Matrix
|
||||||
"hourly_distribution": hourly_counts,
|
"hourly_distribution": hourly_counts,
|
||||||
"peak_hour": hourly_counts.index(max(hourly_counts)),
|
"peak_hour": hourly_counts.index(max(hourly_counts)),
|
||||||
"weekday_distribution": weekday_counts,
|
"weekday_distribution": weekday_counts,
|
||||||
@@ -253,7 +275,7 @@ class StatsService:
|
|||||||
|
|
||||||
def compute_session_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
|
def compute_session_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Includes Micro-sessions, Marathon sessions, Energy Arcs, and Median metrics.
|
Includes Micro-sessions, Marathon sessions, Energy Arcs, Median metrics, and Session List.
|
||||||
"""
|
"""
|
||||||
query = self.db.query(PlayHistory).options(joinedload(PlayHistory.track)).filter(
|
query = self.db.query(PlayHistory).options(joinedload(PlayHistory.track)).filter(
|
||||||
PlayHistory.played_at >= period_start,
|
PlayHistory.played_at >= period_start,
|
||||||
@@ -282,21 +304,41 @@ class StatsService:
|
|||||||
marathon_sessions = 0
|
marathon_sessions = 0
|
||||||
energy_arcs = {"rising": 0, "falling": 0, "flat": 0, "unknown": 0}
|
energy_arcs = {"rising": 0, "falling": 0, "flat": 0, "unknown": 0}
|
||||||
start_hour_dist = [0] * 24
|
start_hour_dist = [0] * 24
|
||||||
|
|
||||||
|
session_list = [] # Metadata for timeline
|
||||||
|
|
||||||
for sess in sessions:
|
for sess in sessions:
|
||||||
|
start_t = sess[0].played_at
|
||||||
|
end_t = sess[-1].played_at
|
||||||
|
|
||||||
# Start time distribution
|
# Start time distribution
|
||||||
start_hour_dist[sess[0].played_at.hour] += 1
|
start_hour_dist[start_t.hour] += 1
|
||||||
|
|
||||||
# Durations
|
# Durations
|
||||||
if len(sess) > 1:
|
if len(sess) > 1:
|
||||||
duration = (sess[-1].played_at - sess[0].played_at).total_seconds() / 60
|
duration = (end_t - start_t).total_seconds() / 60
|
||||||
lengths_min.append(duration)
|
lengths_min.append(duration)
|
||||||
else:
|
else:
|
||||||
lengths_min.append(3.0) # Approx single song
|
duration = 3.0 # Approx single song
|
||||||
|
lengths_min.append(duration)
|
||||||
|
|
||||||
# Types
|
# Types
|
||||||
if len(sess) <= 3: micro_sessions += 1
|
sess_type = "Standard"
|
||||||
if len(sess) >= 20: marathon_sessions += 1
|
if len(sess) <= 3:
|
||||||
|
micro_sessions += 1
|
||||||
|
sess_type = "Micro"
|
||||||
|
elif len(sess) >= 20:
|
||||||
|
marathon_sessions += 1
|
||||||
|
sess_type = "Marathon"
|
||||||
|
|
||||||
|
# Store Session Metadata
|
||||||
|
session_list.append({
|
||||||
|
"start_time": start_t.isoformat(),
|
||||||
|
"end_time": end_t.isoformat(),
|
||||||
|
"duration_minutes": round(duration, 1),
|
||||||
|
"track_count": len(sess),
|
||||||
|
"type": sess_type
|
||||||
|
})
|
||||||
|
|
||||||
# Energy Arc
|
# Energy Arc
|
||||||
first_t = sess[0].track
|
first_t = sess[0].track
|
||||||
@@ -326,12 +368,13 @@ class StatsService:
|
|||||||
"start_hour_distribution": start_hour_dist,
|
"start_hour_distribution": start_hour_dist,
|
||||||
"micro_session_rate": round(micro_sessions / len(sessions), 2),
|
"micro_session_rate": round(micro_sessions / len(sessions), 2),
|
||||||
"marathon_session_rate": round(marathon_sessions / len(sessions), 2),
|
"marathon_session_rate": round(marathon_sessions / len(sessions), 2),
|
||||||
"energy_arcs": energy_arcs
|
"energy_arcs": energy_arcs,
|
||||||
|
"session_list": session_list
|
||||||
}
|
}
|
||||||
|
|
||||||
def compute_vibe_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
|
def compute_vibe_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Aggregates Audio Features + Calculates Whiplash, Percentiles, and Profiles.
|
Aggregates Audio Features + Calculates Whiplash + Clustering + Harmonic Profile.
|
||||||
"""
|
"""
|
||||||
plays = self.db.query(PlayHistory).filter(
|
plays = self.db.query(PlayHistory).filter(
|
||||||
PlayHistory.played_at >= period_start,
|
PlayHistory.played_at >= period_start,
|
||||||
@@ -349,6 +392,14 @@ class StatsService:
|
|||||||
feature_keys = ["energy", "valence", "danceability", "tempo", "acousticness",
|
feature_keys = ["energy", "valence", "danceability", "tempo", "acousticness",
|
||||||
"instrumentalness", "liveness", "speechiness", "loudness"]
|
"instrumentalness", "liveness", "speechiness", "loudness"]
|
||||||
features = {k: [] for k in feature_keys}
|
features = {k: [] for k in feature_keys}
|
||||||
|
|
||||||
|
# For Clustering: List of [energy, valence, danceability, acousticness]
|
||||||
|
cluster_data = []
|
||||||
|
|
||||||
|
# For Harmonic & Tempo
|
||||||
|
keys = []
|
||||||
|
modes = []
|
||||||
|
tempo_zones = {"chill": 0, "groove": 0, "hype": 0}
|
||||||
|
|
||||||
# 2. Transition Arrays (for Whiplash)
|
# 2. Transition Arrays (for Whiplash)
|
||||||
transitions = {"tempo": [], "energy": [], "valence": []}
|
transitions = {"tempo": [], "energy": [], "valence": []}
|
||||||
@@ -364,6 +415,20 @@ class StatsService:
|
|||||||
val = getattr(t, key, None)
|
val = getattr(t, key, None)
|
||||||
if val is not None:
|
if val is not None:
|
||||||
features[key].append(val)
|
features[key].append(val)
|
||||||
|
|
||||||
|
# Cluster Data (only if all 4 exist)
|
||||||
|
if all(getattr(t, k) is not None for k in ["energy", "valence", "danceability", "acousticness"]):
|
||||||
|
cluster_data.append([t.energy, t.valence, t.danceability, t.acousticness])
|
||||||
|
|
||||||
|
# Harmonic
|
||||||
|
if t.key is not None: keys.append(t.key)
|
||||||
|
if t.mode is not None: modes.append(t.mode)
|
||||||
|
|
||||||
|
# Tempo Zones
|
||||||
|
if t.tempo is not None:
|
||||||
|
if t.tempo < 100: tempo_zones["chill"] += 1
|
||||||
|
elif t.tempo < 130: tempo_zones["groove"] += 1
|
||||||
|
else: tempo_zones["hype"] += 1
|
||||||
|
|
||||||
# Calculate Transitions (Whiplash)
|
# Calculate Transitions (Whiplash)
|
||||||
if i > 0 and previous_track:
|
if i > 0 and previous_track:
|
||||||
@@ -381,12 +446,13 @@ class StatsService:
|
|||||||
# Calculate Stats (Mean, Std, Percentiles)
|
# Calculate Stats (Mean, Std, Percentiles)
|
||||||
stats = {}
|
stats = {}
|
||||||
for key, values in features.items():
|
for key, values in features.items():
|
||||||
if values:
|
valid = [v for v in values if v is not None]
|
||||||
stats[f"avg_{key}"] = float(np.mean(values))
|
if valid:
|
||||||
stats[f"std_{key}"] = float(np.std(values))
|
stats[f"avg_{key}"] = float(np.mean(valid))
|
||||||
stats[f"p10_{key}"] = float(np.percentile(values, 10))
|
stats[f"std_{key}"] = float(np.std(valid))
|
||||||
stats[f"p50_{key}"] = float(np.percentile(values, 50)) # Median
|
stats[f"p10_{key}"] = float(np.percentile(valid, 10))
|
||||||
stats[f"p90_{key}"] = float(np.percentile(values, 90))
|
stats[f"p50_{key}"] = float(np.percentile(valid, 50)) # Median
|
||||||
|
stats[f"p90_{key}"] = float(np.percentile(valid, 90))
|
||||||
else:
|
else:
|
||||||
stats[f"avg_{key}"] = None
|
stats[f"avg_{key}"] = None
|
||||||
|
|
||||||
@@ -396,31 +462,97 @@ class StatsService:
|
|||||||
"x": round(stats["avg_valence"], 2),
|
"x": round(stats["avg_valence"], 2),
|
||||||
"y": round(stats["avg_energy"], 2)
|
"y": round(stats["avg_energy"], 2)
|
||||||
}
|
}
|
||||||
# Consistency
|
|
||||||
avg_std = (stats.get("std_energy", 0) + stats.get("std_valence", 0)) / 2
|
avg_std = (stats.get("std_energy", 0) + stats.get("std_valence", 0)) / 2
|
||||||
stats["consistency_score"] = round(1.0 - avg_std, 2)
|
stats["consistency_score"] = round(1.0 - avg_std, 2)
|
||||||
|
|
||||||
# Rhythm Profile
|
|
||||||
if stats.get("avg_tempo") is not None and stats.get("avg_danceability") is not None:
|
if stats.get("avg_tempo") is not None and stats.get("avg_danceability") is not None:
|
||||||
stats["rhythm_profile"] = {
|
stats["rhythm_profile"] = {
|
||||||
"avg_tempo": round(stats["avg_tempo"], 1),
|
"avg_tempo": round(stats["avg_tempo"], 1),
|
||||||
"avg_danceability": round(stats["avg_danceability"], 2)
|
"avg_danceability": round(stats["avg_danceability"], 2)
|
||||||
}
|
}
|
||||||
|
|
||||||
# Texture Profile
|
|
||||||
if stats.get("avg_acousticness") is not None and stats.get("avg_instrumentalness") is not None:
|
if stats.get("avg_acousticness") is not None and stats.get("avg_instrumentalness") is not None:
|
||||||
stats["texture_profile"] = {
|
stats["texture_profile"] = {
|
||||||
"acousticness": round(stats["avg_acousticness"], 2),
|
"acousticness": round(stats["avg_acousticness"], 2),
|
||||||
"instrumentalness": round(stats["avg_instrumentalness"], 2)
|
"instrumentalness": round(stats["avg_instrumentalness"], 2)
|
||||||
}
|
}
|
||||||
|
|
||||||
# Whiplash Scores
|
# Whiplash
|
||||||
stats["whiplash"] = {}
|
stats["whiplash"] = {}
|
||||||
for k in ["tempo", "energy", "valence"]:
|
for k in ["tempo", "energy", "valence"]:
|
||||||
if transitions[k]:
|
if transitions[k]:
|
||||||
stats["whiplash"][k] = round(float(np.mean(transitions[k])), 2)
|
stats["whiplash"][k] = round(float(np.mean(transitions[k])), 2)
|
||||||
else:
|
else:
|
||||||
stats["whiplash"][k] = 0
|
stats["whiplash"][k] = 0
|
||||||
|
|
||||||
|
# Tempo Zones
|
||||||
|
total_tempo = sum(tempo_zones.values())
|
||||||
|
if total_tempo > 0:
|
||||||
|
stats["tempo_zones"] = {k: round(v / total_tempo, 2) for k, v in tempo_zones.items()}
|
||||||
|
else:
|
||||||
|
stats["tempo_zones"] = {}
|
||||||
|
|
||||||
|
# Harmonic Profile
|
||||||
|
if modes:
|
||||||
|
major_count = len([m for m in modes if m == 1])
|
||||||
|
stats["harmonic_profile"] = {
|
||||||
|
"major_pct": round(major_count / len(modes), 2),
|
||||||
|
"minor_pct": round((len(modes) - major_count) / len(modes), 2)
|
||||||
|
}
|
||||||
|
|
||||||
|
if keys:
|
||||||
|
# Map integers to pitch class notation
|
||||||
|
pitch_class = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]
|
||||||
|
key_counts = {}
|
||||||
|
for k in keys:
|
||||||
|
if 0 <= k < 12:
|
||||||
|
label = pitch_class[k]
|
||||||
|
key_counts[label] = key_counts.get(label, 0) + 1
|
||||||
|
stats["top_keys"] = [{"key": k, "count": v} for k, v in sorted(key_counts.items(), key=lambda x: x[1], reverse=True)[:3]]
|
||||||
|
|
||||||
|
# CLUSTERING (K-Means)
|
||||||
|
if len(cluster_data) >= 5: # Need enough data points
|
||||||
|
try:
|
||||||
|
# Features: energy, valence, danceability, acousticness
|
||||||
|
kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)
|
||||||
|
labels = kmeans.fit_predict(cluster_data)
|
||||||
|
|
||||||
|
# Analyze clusters
|
||||||
|
clusters = []
|
||||||
|
for i in range(3):
|
||||||
|
mask = (labels == i)
|
||||||
|
count = np.sum(mask)
|
||||||
|
if count == 0: continue
|
||||||
|
|
||||||
|
centroid = kmeans.cluster_centers_[i]
|
||||||
|
share = count / len(cluster_data)
|
||||||
|
|
||||||
|
# Heuristic Naming
|
||||||
|
c_energy, c_valence, c_dance, c_acoustic = centroid
|
||||||
|
name = "Mixed Vibe"
|
||||||
|
if c_energy > 0.7: name = "High Energy"
|
||||||
|
elif c_acoustic > 0.7: name = "Acoustic / Chill"
|
||||||
|
elif c_valence < 0.3: name = "Melancholy"
|
||||||
|
elif c_dance > 0.7: name = "Dance / Groove"
|
||||||
|
|
||||||
|
clusters.append({
|
||||||
|
"name": name,
|
||||||
|
"share": round(share, 2),
|
||||||
|
"features": {
|
||||||
|
"energy": round(c_energy, 2),
|
||||||
|
"valence": round(c_valence, 2),
|
||||||
|
"danceability": round(c_dance, 2),
|
||||||
|
"acousticness": round(c_acoustic, 2)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
# Sort by share
|
||||||
|
stats["clusters"] = sorted(clusters, key=lambda x: x["share"], reverse=True)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Clustering failed: {e}")
|
||||||
|
stats["clusters"] = []
|
||||||
|
else:
|
||||||
|
stats["clusters"] = []
|
||||||
|
|
||||||
return stats
|
return stats
|
||||||
|
|
||||||
@@ -448,9 +580,11 @@ class StatsService:
|
|||||||
if not years:
|
if not years:
|
||||||
return {"musical_age": None}
|
return {"musical_age": None}
|
||||||
|
|
||||||
|
# Musical Age (Weighted Average)
|
||||||
avg_year = sum(years) / len(years)
|
avg_year = sum(years) / len(years)
|
||||||
current_year = datetime.utcnow().year
|
current_year = datetime.utcnow().year
|
||||||
|
|
||||||
|
# Decade Distribution
|
||||||
decades = {}
|
decades = {}
|
||||||
for y in years:
|
for y in years:
|
||||||
dec = (y // 10) * 10
|
dec = (y // 10) * 10
|
||||||
@@ -463,17 +597,18 @@ class StatsService:
|
|||||||
return {
|
return {
|
||||||
"musical_age": int(avg_year),
|
"musical_age": int(avg_year),
|
||||||
"nostalgia_gap": int(current_year - avg_year),
|
"nostalgia_gap": int(current_year - avg_year),
|
||||||
"freshness_score": dist.get(f"{int(current_year / 10) * 10}s", 0),
|
"freshness_score": dist.get(f"{int(current_year / 10) * 10}s", 0), # Share of current decade
|
||||||
"decade_distribution": dist
|
"decade_distribution": dist
|
||||||
}
|
}
|
||||||
|
|
||||||
def compute_skip_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
|
def compute_skip_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Implements boredom skip detection.
|
Implements boredom skip detection:
|
||||||
|
(next_track.played_at - current_track.played_at) < (current_track.duration_ms / 1000 - 10s)
|
||||||
"""
|
"""
|
||||||
query = self.db.query(PlayHistory).filter(
|
query = self.db.query(PlayHistory).filter(
|
||||||
PlayHistory.played_at >= period_start,
|
PlayHistory.played_at >= period_start,
|
||||||
PlayHistory.played_at < period_end
|
PlayHistory.played_at <= period_end
|
||||||
).order_by(PlayHistory.played_at.asc())
|
).order_by(PlayHistory.played_at.asc())
|
||||||
plays = query.all()
|
plays = query.all()
|
||||||
|
|
||||||
@@ -485,10 +620,7 @@ class StatsService:
|
|||||||
tracks = self.db.query(Track).filter(Track.id.in_(track_ids)).all()
|
tracks = self.db.query(Track).filter(Track.id.in_(track_ids)).all()
|
||||||
track_map = {t.id: t for t in tracks}
|
track_map = {t.id: t for t in tracks}
|
||||||
|
|
||||||
# Denominator: transitions, which is plays - 1
|
for i in range(len(plays) - 1):
|
||||||
transitions_count = len(plays) - 1
|
|
||||||
|
|
||||||
for i in range(transitions_count):
|
|
||||||
current_play = plays[i]
|
current_play = plays[i]
|
||||||
next_play = plays[i+1]
|
next_play = plays[i+1]
|
||||||
track = track_map.get(current_play.track_id)
|
track = track_map.get(current_play.track_id)
|
||||||
@@ -497,28 +629,31 @@ class StatsService:
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
diff_seconds = (next_play.played_at - current_play.played_at).total_seconds()
|
diff_seconds = (next_play.played_at - current_play.played_at).total_seconds()
|
||||||
duration_sec = track.duration_ms / 1000.0
|
|
||||||
|
|
||||||
# Logic: If diff < (duration - 10s), it's a skip.
|
# Logic: If diff < (duration - 10s), it's a skip.
|
||||||
# AND it must be a "valid" listening attempt (e.g. > 30s)
|
# Convert duration to seconds
|
||||||
# AND it shouldn't be a huge gap (e.g. paused for 2 hours then hit next)
|
duration_sec = track.duration_ms / 1000.0
|
||||||
|
|
||||||
if 30 < diff_seconds < (duration_sec - 10):
|
# Also ensure diff isn't negative or weirdly small (re-plays)
|
||||||
|
# And assume "listening" means diff > 30s at least?
|
||||||
|
# Spec says "Spotify only returns 30s+".
|
||||||
|
|
||||||
|
if diff_seconds < (duration_sec - 10):
|
||||||
skips += 1
|
skips += 1
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"total_skips": skips,
|
"total_skips": skips,
|
||||||
"skip_rate": round(skips / transitions_count, 3) if transitions_count > 0 else 0
|
"skip_rate": round(skips / len(plays), 3)
|
||||||
}
|
}
|
||||||
|
|
||||||
def compute_context_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
|
def compute_context_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Analyzes context_uri and switching rate.
|
Analyzes context_uri to determine if user listens to Playlists, Albums, or Artists.
|
||||||
"""
|
"""
|
||||||
query = self.db.query(PlayHistory).filter(
|
query = self.db.query(PlayHistory).filter(
|
||||||
PlayHistory.played_at >= period_start,
|
PlayHistory.played_at >= period_start,
|
||||||
PlayHistory.played_at < period_end
|
PlayHistory.played_at <= period_end
|
||||||
).order_by(PlayHistory.played_at.asc())
|
)
|
||||||
plays = query.all()
|
plays = query.all()
|
||||||
|
|
||||||
if not plays:
|
if not plays:
|
||||||
@@ -526,32 +661,31 @@ class StatsService:
|
|||||||
|
|
||||||
context_counts = {"playlist": 0, "album": 0, "artist": 0, "collection": 0, "unknown": 0}
|
context_counts = {"playlist": 0, "album": 0, "artist": 0, "collection": 0, "unknown": 0}
|
||||||
unique_contexts = {}
|
unique_contexts = {}
|
||||||
context_switches = 0
|
|
||||||
|
|
||||||
last_context = None
|
|
||||||
|
|
||||||
for p in plays:
|
for p in plays:
|
||||||
uri = p.context_uri
|
if not p.context_uri:
|
||||||
if not uri:
|
|
||||||
context_counts["unknown"] += 1
|
context_counts["unknown"] += 1
|
||||||
uri = "unknown"
|
continue
|
||||||
else:
|
|
||||||
if "playlist" in uri: context_counts["playlist"] += 1
|
|
||||||
elif "album" in uri: context_counts["album"] += 1
|
|
||||||
elif "artist" in uri: context_counts["artist"] += 1
|
|
||||||
elif "collection" in uri: context_counts["collection"] += 1
|
|
||||||
else: context_counts["unknown"] += 1
|
|
||||||
|
|
||||||
if uri != "unknown":
|
# Count distinct contexts for loyalty
|
||||||
unique_contexts[uri] = unique_contexts.get(uri, 0) + 1
|
unique_contexts[p.context_uri] = unique_contexts.get(p.context_uri, 0) + 1
|
||||||
|
|
||||||
# Switch detection
|
if "playlist" in p.context_uri:
|
||||||
if last_context and uri != last_context:
|
context_counts["playlist"] += 1
|
||||||
context_switches += 1
|
elif "album" in p.context_uri:
|
||||||
last_context = uri
|
context_counts["album"] += 1
|
||||||
|
elif "artist" in p.context_uri:
|
||||||
|
context_counts["artist"] += 1
|
||||||
|
elif "collection" in p.context_uri:
|
||||||
|
# "Liked Songs" usually shows up as collection
|
||||||
|
context_counts["collection"] += 1
|
||||||
|
else:
|
||||||
|
context_counts["unknown"] += 1
|
||||||
|
|
||||||
total = len(plays)
|
total = len(plays)
|
||||||
breakdown = {k: round(v / total, 2) for k, v in context_counts.items()}
|
breakdown = {k: round(v / total, 2) for k, v in context_counts.items()}
|
||||||
|
|
||||||
|
# Top 5 Contexts (Requires resolving URI to name, possibly missing metadata here)
|
||||||
sorted_contexts = sorted(unique_contexts.items(), key=lambda x: x[1], reverse=True)[:5]
|
sorted_contexts = sorted(unique_contexts.items(), key=lambda x: x[1], reverse=True)[:5]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
@@ -559,17 +693,16 @@ class StatsService:
|
|||||||
"album_purist_score": breakdown.get("album", 0),
|
"album_purist_score": breakdown.get("album", 0),
|
||||||
"playlist_dependency": breakdown.get("playlist", 0),
|
"playlist_dependency": breakdown.get("playlist", 0),
|
||||||
"context_loyalty": round(len(plays) / len(unique_contexts), 2) if unique_contexts else 0,
|
"context_loyalty": round(len(plays) / len(unique_contexts), 2) if unique_contexts else 0,
|
||||||
"context_switching_rate": round(context_switches / (total - 1), 2) if total > 1 else 0,
|
|
||||||
"top_context_uris": [{"uri": k, "count": v} for k, v in sorted_contexts]
|
"top_context_uris": [{"uri": k, "count": v} for k, v in sorted_contexts]
|
||||||
}
|
}
|
||||||
|
|
||||||
def compute_taste_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
|
def compute_taste_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Mainstream vs. Hipster analysis.
|
Mainstream vs. Hipster analysis based on Track.popularity (0-100).
|
||||||
"""
|
"""
|
||||||
query = self.db.query(PlayHistory).filter(
|
query = self.db.query(PlayHistory).filter(
|
||||||
PlayHistory.played_at >= period_start,
|
PlayHistory.played_at >= period_start,
|
||||||
PlayHistory.played_at < period_end
|
PlayHistory.played_at <= period_end
|
||||||
)
|
)
|
||||||
plays = query.all()
|
plays = query.all()
|
||||||
if not plays: return {}
|
if not plays: return {}
|
||||||
@@ -602,47 +735,38 @@ class StatsService:
|
|||||||
|
|
||||||
def compute_lifecycle_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
|
def compute_lifecycle_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Discovery, Recurrence, Comebacks, Obsessions.
|
Determines if tracks are 'New Discoveries' or 'Old Favorites'.
|
||||||
"""
|
"""
|
||||||
# 1. Current plays
|
# 1. Get tracks played in this period
|
||||||
current_plays = self.db.query(PlayHistory).filter(
|
current_plays = self.db.query(PlayHistory).filter(
|
||||||
PlayHistory.played_at >= period_start,
|
PlayHistory.played_at >= period_start,
|
||||||
PlayHistory.played_at < period_end
|
PlayHistory.played_at <= period_end
|
||||||
).all()
|
).all()
|
||||||
|
|
||||||
if not current_plays: return {}
|
if not current_plays: return {}
|
||||||
|
|
||||||
current_track_ids = set([p.track_id for p in current_plays])
|
current_track_ids = set([p.track_id for p in current_plays])
|
||||||
|
|
||||||
# 2. Historical check
|
# 2. Check if these tracks were played BEFORE period_start
|
||||||
|
# We find which of the current_track_ids exist in history < period_start
|
||||||
old_tracks_query = self.db.query(distinct(PlayHistory.track_id)).filter(
|
old_tracks_query = self.db.query(distinct(PlayHistory.track_id)).filter(
|
||||||
PlayHistory.track_id.in_(current_track_ids),
|
PlayHistory.track_id.in_(current_track_ids),
|
||||||
PlayHistory.played_at < period_start
|
PlayHistory.played_at < period_start
|
||||||
)
|
)
|
||||||
old_track_ids = set([r[0] for r in old_tracks_query.all()])
|
old_track_ids = set([r[0] for r in old_tracks_query.all()])
|
||||||
|
|
||||||
# 3. Discovery
|
# 3. Calculate Discovery
|
||||||
new_discoveries = current_track_ids - old_track_ids
|
new_discoveries = current_track_ids - old_track_ids
|
||||||
|
discovery_count = len(new_discoveries)
|
||||||
# 4. Obsessions (Tracks with > 5 plays in period)
|
|
||||||
track_counts = {}
|
# Calculate plays on new discoveries
|
||||||
for p in current_plays:
|
|
||||||
track_counts[p.track_id] = track_counts.get(p.track_id, 0) + 1
|
|
||||||
obsessions = [tid for tid, count in track_counts.items() if count >= 5]
|
|
||||||
|
|
||||||
# 5. Comeback Detection (Old tracks not played in last 30 days)
|
|
||||||
# Simplified: If in old_track_ids but NOT in last 30 days before period_start?
|
|
||||||
# That requires a gap check. For now, we will mark 'recurrence' as general relistening.
|
|
||||||
|
|
||||||
plays_on_new = len([p for p in current_plays if p.track_id in new_discoveries])
|
plays_on_new = len([p for p in current_plays if p.track_id in new_discoveries])
|
||||||
total_plays = len(current_plays)
|
total_plays = len(current_plays)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"discovery_count": len(new_discoveries),
|
"discovery_count": discovery_count,
|
||||||
"discovery_rate": round(plays_on_new / total_plays, 3) if total_plays > 0 else 0,
|
"discovery_rate": round(plays_on_new / total_plays, 3) if total_plays > 0 else 0,
|
||||||
"recurrence_rate": round((total_plays - plays_on_new) / total_plays, 3) if total_plays > 0 else 0,
|
"recurrence_rate": round((total_plays - plays_on_new) / total_plays, 3) if total_plays > 0 else 0
|
||||||
"obsession_count": len(obsessions),
|
|
||||||
"obsession_rate": round(len(obsessions) / len(current_track_ids), 3) if current_track_ids else 0
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def compute_explicit_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
|
def compute_explicit_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
|
||||||
@@ -651,7 +775,7 @@ class StatsService:
|
|||||||
"""
|
"""
|
||||||
query = self.db.query(PlayHistory).options(joinedload(PlayHistory.track)).filter(
|
query = self.db.query(PlayHistory).options(joinedload(PlayHistory.track)).filter(
|
||||||
PlayHistory.played_at >= period_start,
|
PlayHistory.played_at >= period_start,
|
||||||
PlayHistory.played_at < period_end
|
PlayHistory.played_at <= period_end
|
||||||
)
|
)
|
||||||
plays = query.all()
|
plays = query.all()
|
||||||
|
|
||||||
@@ -665,14 +789,24 @@ class StatsService:
|
|||||||
for p in plays:
|
for p in plays:
|
||||||
h = p.played_at.hour
|
h = p.played_at.hour
|
||||||
hourly_total[h] += 1
|
hourly_total[h] += 1
|
||||||
|
|
||||||
|
# Check raw_data for explicit flag
|
||||||
t = p.track
|
t = p.track
|
||||||
|
is_explicit = False
|
||||||
if t.raw_data and t.raw_data.get("explicit"):
|
if t.raw_data and t.raw_data.get("explicit"):
|
||||||
|
is_explicit = True
|
||||||
|
|
||||||
|
if is_explicit:
|
||||||
explicit_count += 1
|
explicit_count += 1
|
||||||
hourly_explicit[h] += 1
|
hourly_explicit[h] += 1
|
||||||
|
|
||||||
|
# Calculate hourly percentages
|
||||||
hourly_rates = []
|
hourly_rates = []
|
||||||
for i in range(24):
|
for i in range(24):
|
||||||
hourly_rates.append(round(hourly_explicit[i] / hourly_total[i], 2) if hourly_total[i] > 0 else 0.0)
|
if hourly_total[i] > 0:
|
||||||
|
hourly_rates.append(round(hourly_explicit[i] / hourly_total[i], 2))
|
||||||
|
else:
|
||||||
|
hourly_rates.append(0.0)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"explicit_rate": round(explicit_count / total_plays, 3),
|
"explicit_rate": round(explicit_count / total_plays, 3),
|
||||||
@@ -681,6 +815,7 @@ class StatsService:
|
|||||||
}
|
}
|
||||||
|
|
||||||
def generate_full_report(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
|
def generate_full_report(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
|
||||||
|
# 1. Calculate all current stats
|
||||||
current_stats = {
|
current_stats = {
|
||||||
"period": {"start": period_start.isoformat(), "end": period_end.isoformat()},
|
"period": {"start": period_start.isoformat(), "end": period_end.isoformat()},
|
||||||
"volume": self.compute_volume_stats(period_start, period_end),
|
"volume": self.compute_volume_stats(period_start, period_end),
|
||||||
@@ -695,7 +830,9 @@ class StatsService:
|
|||||||
"skips": self.compute_skip_stats(period_start, period_end)
|
"skips": self.compute_skip_stats(period_start, period_end)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# 2. Calculate Comparison
|
||||||
current_stats["comparison"] = self.compute_comparison(current_stats, period_start, period_end)
|
current_stats["comparison"] = self.compute_comparison(current_stats, period_start, period_end)
|
||||||
|
|
||||||
return current_stats
|
return current_stats
|
||||||
|
|
||||||
def _empty_volume_stats(self):
|
def _empty_volume_stats(self):
|
||||||
@@ -710,4 +847,4 @@ class StatsService:
|
|||||||
def _pct_change(self, curr, prev):
|
def _pct_change(self, curr, prev):
|
||||||
if prev == 0:
|
if prev == 0:
|
||||||
return 100.0 if curr > 0 else 0.0
|
return 100.0 if curr > 0 else 0.0
|
||||||
return round(((curr - prev) / prev) * 100, 1)
|
return round(((curr - prev) / prev) * 100, 1)
|
||||||
@@ -11,3 +11,4 @@ python-dateutil==2.9.0.post0
|
|||||||
requests==2.31.0
|
requests==2.31.0
|
||||||
alembic==1.13.1
|
alembic==1.13.1
|
||||||
scikit-learn==1.4.0
|
scikit-learn==1.4.0
|
||||||
|
lyricsgenius==3.0.1
|
||||||
|
|||||||
Reference in New Issue
Block a user