mirror of
https://github.com/bnair123/MusicAnalyser.git
synced 2026-02-25 11:46:07 +00:00
Fixed and added all the stats_service.py methods
This commit is contained in:
@@ -1,12 +1,15 @@
|
|||||||
from fastapi import FastAPI, Depends
|
from fastapi import FastAPI, Depends, HTTPException, BackgroundTasks
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from typing import List, Optional
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
from .database import engine, Base, get_db
|
from .database import engine, Base, get_db
|
||||||
from .models import PlayHistory as PlayHistoryModel, Track as TrackModel
|
from .models import PlayHistory as PlayHistoryModel, Track as TrackModel, AnalysisSnapshot
|
||||||
from . import schemas
|
from . import schemas
|
||||||
from .ingest import ingest_recently_played
|
from .ingest import ingest_recently_played
|
||||||
import asyncio
|
from .services.stats_service import StatsService
|
||||||
from typing import List
|
from .services.narrative_service import NarrativeService
|
||||||
from dotenv import load_dotenv
|
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
@@ -24,13 +27,68 @@ def get_history(limit: int = 50, db: Session = Depends(get_db)):
|
|||||||
history = db.query(PlayHistoryModel).order_by(PlayHistoryModel.played_at.desc()).limit(limit).all()
|
history = db.query(PlayHistoryModel).order_by(PlayHistoryModel.played_at.desc()).limit(limit).all()
|
||||||
return history
|
return history
|
||||||
|
|
||||||
@app.post("/trigger-ingest")
|
|
||||||
async def trigger_ingest(db: Session = Depends(get_db)):
|
|
||||||
"""Manually trigger the ingestion process (useful for testing)"""
|
|
||||||
await ingest_recently_played(db)
|
|
||||||
return {"status": "Ingestion triggered"}
|
|
||||||
|
|
||||||
@app.get("/tracks", response_model=List[schemas.Track])
|
@app.get("/tracks", response_model=List[schemas.Track])
|
||||||
def get_tracks(limit: int = 50, db: Session = Depends(get_db)):
|
def get_tracks(limit: int = 50, db: Session = Depends(get_db)):
|
||||||
tracks = db.query(TrackModel).limit(limit).all()
|
tracks = db.query(TrackModel).limit(limit).all()
|
||||||
return tracks
|
return tracks
|
||||||
|
|
||||||
|
@app.post("/trigger-ingest")
|
||||||
|
async def trigger_ingest(background_tasks: BackgroundTasks, db: Session = Depends(get_db)):
|
||||||
|
"""Triggers Spotify ingestion in the background."""
|
||||||
|
background_tasks.add_task(ingest_recently_played, db)
|
||||||
|
return {"status": "Ingestion started in background"}
|
||||||
|
|
||||||
|
@app.post("/trigger-analysis")
|
||||||
|
def trigger_analysis(
|
||||||
|
days: int = 30,
|
||||||
|
model_name: str = "gemini-2.5-flash",
|
||||||
|
db: Session = Depends(get_db)
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Runs the full analysis pipeline (Stats + LLM) for the last X days.
|
||||||
|
Returns the computed metrics and narrative immediately.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
end_date = datetime.utcnow()
|
||||||
|
start_date = end_date - timedelta(days=days)
|
||||||
|
|
||||||
|
# 1. Compute Stats
|
||||||
|
stats_service = StatsService(db)
|
||||||
|
stats_json = stats_service.generate_full_report(start_date, end_date)
|
||||||
|
|
||||||
|
if stats_json["volume"]["total_plays"] == 0:
|
||||||
|
raise HTTPException(status_code=404, detail="No plays found in the specified period.")
|
||||||
|
|
||||||
|
# 2. Generate Narrative
|
||||||
|
narrative_service = NarrativeService(model_name=model_name)
|
||||||
|
narrative_json = narrative_service.generate_narrative(stats_json)
|
||||||
|
|
||||||
|
# 3. Save Snapshot
|
||||||
|
snapshot = AnalysisSnapshot(
|
||||||
|
period_start=start_date,
|
||||||
|
period_end=end_date,
|
||||||
|
period_label=f"last_{days}_days",
|
||||||
|
metrics_payload=stats_json,
|
||||||
|
narrative_report=narrative_json,
|
||||||
|
model_used=model_name
|
||||||
|
)
|
||||||
|
db.add(snapshot)
|
||||||
|
db.commit()
|
||||||
|
db.refresh(snapshot)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"status": "success",
|
||||||
|
"snapshot_id": snapshot.id,
|
||||||
|
"period": {"start": start_date, "end": end_date},
|
||||||
|
"metrics": stats_json,
|
||||||
|
"narrative": narrative_json
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Analysis Failed: {e}")
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
@app.get("/snapshots")
|
||||||
|
def get_snapshots(limit: int = 10, db: Session = Depends(get_db)):
|
||||||
|
"""Retrieve past analysis snapshots."""
|
||||||
|
return db.query(AnalysisSnapshot).order_by(AnalysisSnapshot.date.desc()).limit(limit).all()
|
||||||
@@ -18,43 +18,35 @@ class NarrativeService:
|
|||||||
return {"error": "Missing API Key"}
|
return {"error": "Missing API Key"}
|
||||||
|
|
||||||
prompt = f"""
|
prompt = f"""
|
||||||
You are analyzing a user's Spotify listening data. Below is a JSON summary of metrics I've computed. Your job is to:
|
You are a witty, insightful, and slightly snarky music critic analyzing a user's listening history.
|
||||||
|
Below is a JSON summary of their listening data.
|
||||||
|
|
||||||
1. Write a narrative "Vibe Check" (2-3 paragraphs) describing their overall listening personality this period.
|
Your goal is to generate a report that feels like a 'Spotify Wrapped' but deeper and more honest.
|
||||||
2. Identify 3-5 notable patterns or anomalies.
|
|
||||||
3. Provide a "Musical Persona" label (e.g., "Late-Night Binge Listener", "Genre Chameleon", "Album Purist").
|
|
||||||
4. Write a brief, playful "roast" (1-2 sentences) based on the data.
|
|
||||||
|
|
||||||
Guidelines:
|
Please output your response in strict JSON format with the following keys:
|
||||||
- Do NOT recalculate any numbers.
|
1. "vibe_check": (String) 2-3 paragraphs describing their overall listening personality.
|
||||||
- Use specific metrics to support observations (e.g., "Your whiplash score of 18.3 BPM suggests...").
|
2. "patterns": (List of Strings) 3-5 specific observations based on the data (e.g., "You listen to sad music on Tuesdays", "Your Whiplash Score is high").
|
||||||
- Keep tone conversational but insightful.
|
3. "persona": (String) A creative label for the user (e.g., "The Genre Chameleon", "Nostalgic Dad-Rocker", "Algorithm Victim").
|
||||||
- Avoid mental health claims; stick to behavioral descriptors.
|
4. "roast": (String) A playful, harmlessly mean roast about their taste (1-2 sentences).
|
||||||
- Highlight both positive patterns and quirks.
|
5. "era_insight": (String) A specific comment on their 'Musical Age' and 'Nostalgia Gap'.
|
||||||
|
|
||||||
Data:
|
GUIDELINES:
|
||||||
|
- **Use the Metrics:** Do not just say "You like pop." Say "Your Mainstream Score of 85% suggests you live on the Top 40."
|
||||||
|
- **Whiplash Score:** If 'whiplash' > 20, comment on their chaotic transitions.
|
||||||
|
- **Hipster Score:** If 'hipster_score' > 50, call them pretentious; if < 10, call them basic.
|
||||||
|
- **Comparison:** Use the 'comparison' block to mention if they are listening more/less or if their mood (valence/energy) has shifted.
|
||||||
|
- **Tone:** Conversational, fun, slightly judgmental but good-natured.
|
||||||
|
|
||||||
|
DATA:
|
||||||
{json.dumps(stats_json, indent=2)}
|
{json.dumps(stats_json, indent=2)}
|
||||||
|
|
||||||
Output Format (return valid JSON):
|
OUTPUT (JSON):
|
||||||
{{
|
|
||||||
"vibe_check": "...",
|
|
||||||
"patterns": ["...", "..."],
|
|
||||||
"persona": "...",
|
|
||||||
"roast": "..."
|
|
||||||
}}
|
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# Handle full model path if passed or default short name
|
model = genai.GenerativeModel(self.model_name)
|
||||||
# The library often accepts 'gemini-2.5-flash' but list_models returns 'models/gemini-2.5-flash'
|
|
||||||
model_id = self.model_name
|
|
||||||
if not model_id.startswith("models/") and "/" not in model_id:
|
|
||||||
# Try simple name, if it fails user might need to pass 'models/...'
|
|
||||||
pass
|
|
||||||
|
|
||||||
model = genai.GenerativeModel(model_id)
|
|
||||||
response = model.generate_content(prompt)
|
response = model.generate_content(prompt)
|
||||||
|
|
||||||
# Clean up response to ensure valid JSON (sometimes LLMs add markdown blocks)
|
# Clean up response to ensure valid JSON
|
||||||
text = response.text.strip()
|
text = response.text.strip()
|
||||||
if text.startswith("```json"):
|
if text.startswith("```json"):
|
||||||
text = text.replace("```json", "").replace("```", "")
|
text = text.replace("```json", "").replace("```", "")
|
||||||
@@ -64,4 +56,4 @@ Output Format (return valid JSON):
|
|||||||
return json.loads(text)
|
return json.loads(text)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return {"error": str(e), "raw_response": response.text if 'response' in locals() else "No response"}
|
return {"error": str(e), "raw_response": "Error generating narrative."}
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
from sqlalchemy import func, distinct, desc
|
from sqlalchemy import func, distinct, desc, joinedload
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from typing import Dict, Any, List
|
from typing import Dict, Any, List
|
||||||
import math
|
import math
|
||||||
@@ -11,11 +11,68 @@ class StatsService:
|
|||||||
def __init__(self, db: Session):
|
def __init__(self, db: Session):
|
||||||
self.db = db
|
self.db = db
|
||||||
|
|
||||||
|
from sqlalchemy.orm import joinedload # Add this to imports
|
||||||
|
|
||||||
|
def compute_comparison(self, current_stats: Dict[str, Any], period_start: datetime, period_end: datetime) -> Dict[
|
||||||
|
str, Any]:
|
||||||
|
"""
|
||||||
|
Calculates deltas vs the previous period of the same length.
|
||||||
|
"""
|
||||||
|
duration = period_end - period_start
|
||||||
|
prev_end = period_start
|
||||||
|
prev_start = prev_end - duration
|
||||||
|
|
||||||
|
# We only need key metrics for comparison, not the full heavy report
|
||||||
|
# Let's re-use existing methods but strictly for the previous window
|
||||||
|
|
||||||
|
# 1. Volume Comparison
|
||||||
|
prev_volume = self.compute_volume_stats(prev_start, prev_end)
|
||||||
|
|
||||||
|
# 2. Vibe Comparison (Just energy/valence/popularity)
|
||||||
|
prev_vibe = self.compute_vibe_stats(prev_start, prev_end)
|
||||||
|
prev_taste = self.compute_taste_stats(prev_start, prev_end)
|
||||||
|
|
||||||
|
# Calculate Deltas
|
||||||
|
deltas = {}
|
||||||
|
|
||||||
|
# Plays
|
||||||
|
curr_plays = current_stats["volume"]["total_plays"]
|
||||||
|
prev_plays_count = prev_volume["total_plays"]
|
||||||
|
deltas["plays_delta"] = curr_plays - prev_plays_count
|
||||||
|
deltas["plays_pct_change"] = round(((curr_plays - prev_plays_count) / prev_plays_count) * 100,
|
||||||
|
1) if prev_plays_count else 0
|
||||||
|
|
||||||
|
# Energy & Valence
|
||||||
|
if "mood_quadrant" in current_stats["vibe"] and "mood_quadrant" in prev_vibe:
|
||||||
|
curr_e = current_stats["vibe"]["mood_quadrant"]["y"]
|
||||||
|
prev_e = prev_vibe["mood_quadrant"]["y"]
|
||||||
|
deltas["energy_delta"] = round(curr_e - prev_e, 2)
|
||||||
|
|
||||||
|
curr_v = current_stats["vibe"]["mood_quadrant"]["x"]
|
||||||
|
prev_v = prev_vibe["mood_quadrant"]["x"]
|
||||||
|
deltas["valence_delta"] = round(curr_v - prev_v, 2)
|
||||||
|
|
||||||
|
# Popularity
|
||||||
|
if "avg_popularity" in current_stats["taste"] and "avg_popularity" in prev_taste:
|
||||||
|
deltas["popularity_delta"] = round(current_stats["taste"]["avg_popularity"] - prev_taste["avg_popularity"],
|
||||||
|
1)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"previous_period": {
|
||||||
|
"start": prev_start.isoformat(),
|
||||||
|
"end": prev_end.isoformat()
|
||||||
|
},
|
||||||
|
"deltas": deltas
|
||||||
|
}
|
||||||
|
|
||||||
def compute_volume_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
|
def compute_volume_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Calculates volume metrics: Total Plays, Unique Tracks, Artists, etc.
|
Calculates volume metrics including Concentration (HHI, Gini) and One-and-Done rates.
|
||||||
"""
|
"""
|
||||||
query = self.db.query(PlayHistory).filter(
|
# Eager load tracks AND artists to fix the "Artist String Problem" and performance
|
||||||
|
query = self.db.query(PlayHistory).options(
|
||||||
|
joinedload(PlayHistory.track).joinedload(Track.artists)
|
||||||
|
).filter(
|
||||||
PlayHistory.played_at >= period_start,
|
PlayHistory.played_at >= period_start,
|
||||||
PlayHistory.played_at <= period_end
|
PlayHistory.played_at <= period_end
|
||||||
)
|
)
|
||||||
@@ -24,167 +81,94 @@ class StatsService:
|
|||||||
|
|
||||||
if total_plays == 0:
|
if total_plays == 0:
|
||||||
return {
|
return {
|
||||||
"total_plays": 0,
|
"total_plays": 0, "estimated_minutes": 0, "unique_tracks": 0,
|
||||||
"estimated_minutes": 0,
|
"unique_artists": 0, "unique_albums": 0, "unique_genres": 0,
|
||||||
"unique_tracks": 0,
|
"top_tracks": [], "top_artists": [], "top_genres": [],
|
||||||
"unique_artists": 0,
|
"repeat_rate": 0, "concentration": {}
|
||||||
"unique_albums": 0,
|
|
||||||
"unique_genres": 0,
|
|
||||||
"top_tracks": [],
|
|
||||||
"top_artists": [],
|
|
||||||
"repeat_rate": 0,
|
|
||||||
"concentration": {}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Calculate Duration (Estimated)
|
|
||||||
# Note: We query tracks to get duration.
|
|
||||||
# Ideally we join, but eager loading might be heavy. Let's do a join or simple loop.
|
|
||||||
# Efficient approach: Get all track IDs from plays, fetch Track objects in bulk map.
|
|
||||||
|
|
||||||
track_ids = [p.track_id for p in plays]
|
|
||||||
tracks = self.db.query(Track).filter(Track.id.in_(set(track_ids))).all()
|
|
||||||
track_map = {t.id: t for t in tracks}
|
|
||||||
|
|
||||||
total_ms = 0
|
total_ms = 0
|
||||||
unique_track_ids = set()
|
track_counts = {}
|
||||||
unique_artist_ids = set()
|
artist_counts = {}
|
||||||
unique_album_names = set() # Spotify doesn't give album ID in PlayHistory directly unless joined, track has album name string.
|
|
||||||
# Ideally track has raw_data['album']['id'].
|
|
||||||
unique_album_ids = set()
|
|
||||||
|
|
||||||
genre_counts = {}
|
genre_counts = {}
|
||||||
|
album_ids = set()
|
||||||
# For Top Lists
|
|
||||||
track_play_counts = {}
|
|
||||||
artist_play_counts = {}
|
|
||||||
|
|
||||||
for p in plays:
|
for p in plays:
|
||||||
t = track_map.get(p.track_id)
|
t = p.track
|
||||||
if t:
|
if not t: continue
|
||||||
total_ms += t.duration_ms
|
|
||||||
unique_track_ids.add(t.id)
|
|
||||||
|
|
||||||
# Top Tracks
|
total_ms += t.duration_ms if t.duration_ms else 0
|
||||||
track_play_counts[t.id] = track_play_counts.get(t.id, 0) + 1
|
|
||||||
|
|
||||||
# Artists (using relation)
|
# Track Counts
|
||||||
# Note: This might cause N+1 query if not eager loaded.
|
track_counts[t.id] = track_counts.get(t.id, 0) + 1
|
||||||
# For strictly calculation, accessing t.artists (lazy load) loop might be slow for 1000s of plays.
|
|
||||||
# Optimization: Join PlayHistory -> Track -> Artist in query.
|
|
||||||
|
|
||||||
# Let's rely on raw_data for speed if relation loading is slow,
|
# Album Counts (using raw_data ID if available, else name)
|
||||||
# OR Assume we accept some latency.
|
if t.raw_data and "album" in t.raw_data and "id" in t.raw_data["album"]:
|
||||||
# Better: Pre-fetch artist connections or use the new tables properly.
|
album_ids.add(t.raw_data["album"]["id"])
|
||||||
# Let's use the object relation for correctness as per plan.
|
else:
|
||||||
for artist in t.artists:
|
album_ids.add(t.album)
|
||||||
unique_artist_ids.add(artist.id)
|
|
||||||
artist_play_counts[artist.id] = artist_play_counts.get(artist.id, 0) + 1
|
|
||||||
|
|
||||||
if artist.genres:
|
# Artist Counts (Iterate objects, not string)
|
||||||
for g in artist.genres:
|
for artist in t.artists:
|
||||||
genre_counts[g] = genre_counts.get(g, 0) + 1
|
artist_counts[artist.id] = artist_counts.get(artist.id, 0) + 1
|
||||||
|
if artist.genres:
|
||||||
|
for g in artist.genres:
|
||||||
|
genre_counts[g] = genre_counts.get(g, 0) + 1
|
||||||
|
|
||||||
if t.raw_data and "album" in t.raw_data:
|
# Derived Metrics
|
||||||
unique_album_ids.add(t.raw_data["album"]["id"])
|
unique_tracks = len(track_counts)
|
||||||
else:
|
one_and_done = len([c for c in track_counts.values() if c == 1])
|
||||||
unique_album_ids.add(t.album) # Fallback
|
|
||||||
|
|
||||||
estimated_minutes = total_ms / 60000
|
# Top Lists
|
||||||
|
top_tracks = [
|
||||||
|
{"name": self.db.query(Track).get(tid).name, "artist": self.db.query(Track).get(tid).artist, "count": c}
|
||||||
|
for tid, c in sorted(track_counts.items(), key=lambda x: x[1], reverse=True)[:5]
|
||||||
|
]
|
||||||
|
|
||||||
# Top 5 Tracks
|
top_artist_ids = sorted(artist_counts.items(), key=lambda x: x[1], reverse=True)[:5]
|
||||||
sorted_tracks = sorted(track_play_counts.items(), key=lambda x: x[1], reverse=True)[:5]
|
# Fetch artist names efficiently
|
||||||
top_tracks = []
|
|
||||||
for tid, count in sorted_tracks:
|
|
||||||
t = track_map.get(tid)
|
|
||||||
top_tracks.append({
|
|
||||||
"name": t.name,
|
|
||||||
"artist": t.artist, # Display string
|
|
||||||
"count": count
|
|
||||||
})
|
|
||||||
|
|
||||||
# Top 5 Artists
|
|
||||||
# Need to fetch Artist names
|
|
||||||
top_artist_ids = sorted(artist_play_counts.items(), key=lambda x: x[1], reverse=True)[:5]
|
|
||||||
top_artists_objs = self.db.query(Artist).filter(Artist.id.in_([x[0] for x in top_artist_ids])).all()
|
top_artists_objs = self.db.query(Artist).filter(Artist.id.in_([x[0] for x in top_artist_ids])).all()
|
||||||
artist_name_map = {a.id: a.name for a in top_artists_objs}
|
artist_map = {a.id: a.name for a in top_artists_objs}
|
||||||
|
top_artists = [{"name": artist_map.get(aid, "Unknown"), "count": c} for aid, c in top_artist_ids]
|
||||||
|
|
||||||
top_artists = []
|
top_genres = [{"name": k, "count": v} for k, v in
|
||||||
for aid, count in top_artist_ids:
|
sorted(genre_counts.items(), key=lambda x: x[1], reverse=True)[:5]]
|
||||||
top_artists.append({
|
|
||||||
"name": artist_name_map.get(aid, "Unknown"),
|
|
||||||
"count": count
|
|
||||||
})
|
|
||||||
|
|
||||||
# Top Genres
|
# Concentration (HHI & Gini)
|
||||||
sorted_genres = sorted(genre_counts.items(), key=lambda x: x[1], reverse=True)[:5]
|
# HHI: Sum of (share)^2
|
||||||
top_genres = [{"name": g, "count": c} for g, c in sorted_genres]
|
shares = [c / total_plays for c in track_counts.values()]
|
||||||
|
hhi = sum([s ** 2 for s in shares])
|
||||||
|
|
||||||
# Concentration
|
# Gini Coefficient (Inequality of play distribution)
|
||||||
unique_tracks_count = len(unique_track_ids)
|
sorted_shares = sorted(shares)
|
||||||
repeat_rate = (total_plays - unique_tracks_count) / total_plays if total_plays > 0 else 0
|
n = len(shares)
|
||||||
|
if n > 0:
|
||||||
# HHI (Herfindahl–Hirschman Index)
|
gini = (2 * sum((i + 1) * x for i, x in enumerate(sorted_shares))) / (n * sum(sorted_shares)) - (n + 1) / n
|
||||||
# Sum of (share)^2. Share = track_plays / total_plays
|
else:
|
||||||
hhi = sum([(c/total_plays)**2 for c in track_play_counts.values()])
|
gini = 0
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"total_plays": total_plays,
|
"total_plays": total_plays,
|
||||||
"estimated_minutes": int(estimated_minutes),
|
"estimated_minutes": int(total_ms / 60000),
|
||||||
"unique_tracks": unique_tracks_count,
|
"unique_tracks": unique_tracks,
|
||||||
"unique_artists": len(unique_artist_ids),
|
"unique_artists": len(artist_counts),
|
||||||
"unique_albums": len(unique_album_ids),
|
"unique_albums": len(album_ids),
|
||||||
"unique_genres": len(genre_counts),
|
"unique_genres": len(genre_counts),
|
||||||
"top_tracks": top_tracks,
|
"top_tracks": top_tracks,
|
||||||
"top_artists": top_artists,
|
"top_artists": top_artists,
|
||||||
"top_genres": top_genres,
|
"top_genres": top_genres,
|
||||||
"repeat_rate": round(repeat_rate, 3),
|
"repeat_rate": round((total_plays - unique_tracks) / total_plays, 3) if total_plays else 0,
|
||||||
|
"one_and_done_rate": round(one_and_done / unique_tracks, 3) if unique_tracks else 0,
|
||||||
"concentration": {
|
"concentration": {
|
||||||
"hhi": round(hhi, 4),
|
"hhi": round(hhi, 4),
|
||||||
# "gini": ... (skip for now to keep it simple)
|
"gini": round(gini, 4),
|
||||||
|
"top_1_share": round(max(shares), 3) if shares else 0
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def compute_time_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
|
def compute_time_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Hourly, Daily distribution, etc.
|
Includes Part-of-Day buckets and Listening Streaks.
|
||||||
"""
|
|
||||||
query = self.db.query(PlayHistory).filter(
|
|
||||||
PlayHistory.played_at >= period_start,
|
|
||||||
PlayHistory.played_at <= period_end
|
|
||||||
)
|
|
||||||
plays = query.all()
|
|
||||||
|
|
||||||
hourly_counts = [0] * 24
|
|
||||||
weekday_counts = [0] * 7 # 0=Mon, 6=Sun
|
|
||||||
|
|
||||||
if not plays:
|
|
||||||
return {"hourly_distribution": hourly_counts}
|
|
||||||
|
|
||||||
for p in plays:
|
|
||||||
# played_at is UTC in DB usually. Ensure we handle timezone if user wants local.
|
|
||||||
# For now, assuming UTC or system time.
|
|
||||||
h = p.played_at.hour
|
|
||||||
d = p.played_at.weekday()
|
|
||||||
|
|
||||||
hourly_counts[h] += 1
|
|
||||||
weekday_counts[d] += 1
|
|
||||||
|
|
||||||
peak_hour = hourly_counts.index(max(hourly_counts))
|
|
||||||
|
|
||||||
# Weekend Share
|
|
||||||
weekend_plays = weekday_counts[5] + weekday_counts[6]
|
|
||||||
weekend_share = weekend_plays / len(plays) if len(plays) > 0 else 0
|
|
||||||
|
|
||||||
return {
|
|
||||||
"hourly_distribution": hourly_counts,
|
|
||||||
"peak_hour": peak_hour,
|
|
||||||
"weekday_distribution": weekday_counts,
|
|
||||||
"weekend_share": round(weekend_share, 2)
|
|
||||||
}
|
|
||||||
|
|
||||||
def compute_session_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Session logic: Gap > 20 mins = new session.
|
|
||||||
"""
|
"""
|
||||||
query = self.db.query(PlayHistory).filter(
|
query = self.db.query(PlayHistory).filter(
|
||||||
PlayHistory.played_at >= period_start,
|
PlayHistory.played_at >= period_start,
|
||||||
@@ -193,85 +177,184 @@ class StatsService:
|
|||||||
plays = query.all()
|
plays = query.all()
|
||||||
|
|
||||||
if not plays:
|
if not plays:
|
||||||
return {"count": 0, "avg_length_minutes": 0}
|
return {}
|
||||||
|
|
||||||
|
hourly_counts = [0] * 24
|
||||||
|
weekday_counts = [0] * 7
|
||||||
|
part_of_day = {"morning": 0, "afternoon": 0, "evening": 0, "night": 0}
|
||||||
|
|
||||||
|
# For Streaks
|
||||||
|
active_dates = set()
|
||||||
|
|
||||||
|
for p in plays:
|
||||||
|
h = p.played_at.hour
|
||||||
|
hourly_counts[h] += 1
|
||||||
|
weekday_counts[p.played_at.weekday()] += 1
|
||||||
|
active_dates.add(p.played_at.date())
|
||||||
|
|
||||||
|
if 5 <= h < 12:
|
||||||
|
part_of_day["morning"] += 1
|
||||||
|
elif 12 <= h < 17:
|
||||||
|
part_of_day["afternoon"] += 1
|
||||||
|
elif 17 <= h < 22:
|
||||||
|
part_of_day["evening"] += 1
|
||||||
|
else:
|
||||||
|
part_of_day["night"] += 1
|
||||||
|
|
||||||
|
# Calculate Streak
|
||||||
|
sorted_dates = sorted(list(active_dates))
|
||||||
|
current_streak = 0
|
||||||
|
longest_streak = 0
|
||||||
|
if sorted_dates:
|
||||||
|
current_streak = 1
|
||||||
|
longest_streak = 1
|
||||||
|
# Check strictly consecutive days
|
||||||
|
for i in range(1, len(sorted_dates)):
|
||||||
|
delta = (sorted_dates[i] - sorted_dates[i - 1]).days
|
||||||
|
if delta == 1:
|
||||||
|
current_streak += 1
|
||||||
|
else:
|
||||||
|
longest_streak = max(longest_streak, current_streak)
|
||||||
|
current_streak = 1
|
||||||
|
longest_streak = max(longest_streak, current_streak)
|
||||||
|
|
||||||
|
weekend_plays = weekday_counts[5] + weekday_counts[6]
|
||||||
|
|
||||||
|
return {
|
||||||
|
"hourly_distribution": hourly_counts,
|
||||||
|
"peak_hour": hourly_counts.index(max(hourly_counts)),
|
||||||
|
"weekday_distribution": weekday_counts,
|
||||||
|
"weekend_share": round(weekend_plays / len(plays), 2),
|
||||||
|
"part_of_day": part_of_day,
|
||||||
|
"listening_streak": current_streak,
|
||||||
|
"longest_streak": longest_streak,
|
||||||
|
"active_days": len(active_dates)
|
||||||
|
}
|
||||||
|
|
||||||
|
def compute_session_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Includes Micro-sessions, Marathon sessions, and Energy Arcs.
|
||||||
|
"""
|
||||||
|
# Need to join Track to get Energy features for Arc analysis
|
||||||
|
query = self.db.query(PlayHistory).options(joinedload(PlayHistory.track)).filter(
|
||||||
|
PlayHistory.played_at >= period_start,
|
||||||
|
PlayHistory.played_at <= period_end
|
||||||
|
).order_by(PlayHistory.played_at.asc())
|
||||||
|
plays = query.all()
|
||||||
|
|
||||||
|
if not plays:
|
||||||
|
return {"count": 0}
|
||||||
|
|
||||||
sessions = []
|
sessions = []
|
||||||
current_session = [plays[0]]
|
current_session = [plays[0]]
|
||||||
|
|
||||||
|
# 1. Sessionization (Gap > 20 mins)
|
||||||
for i in range(1, len(plays)):
|
for i in range(1, len(plays)):
|
||||||
prev = plays[i-1]
|
diff = (plays[i].played_at - plays[i-1].played_at).total_seconds() / 60
|
||||||
curr = plays[i]
|
|
||||||
diff = (curr.played_at - prev.played_at).total_seconds() / 60
|
|
||||||
|
|
||||||
if diff > 20:
|
if diff > 20:
|
||||||
sessions.append(current_session)
|
sessions.append(current_session)
|
||||||
current_session = []
|
current_session = []
|
||||||
|
current_session.append(plays[i])
|
||||||
current_session.append(curr)
|
|
||||||
|
|
||||||
sessions.append(current_session)
|
sessions.append(current_session)
|
||||||
|
|
||||||
session_lengths_min = []
|
# 2. Analyze Sessions
|
||||||
for sess in sessions:
|
lengths_min = []
|
||||||
if len(sess) > 1:
|
micro_sessions = 0
|
||||||
start = sess[0].played_at
|
marathon_sessions = 0
|
||||||
end = sess[-1].played_at
|
energy_arcs = {"rising": 0, "falling": 0, "flat": 0, "unknown": 0}
|
||||||
# Add duration of last track?
|
|
||||||
# Let's just do (end - start) for simplicity + avg track duration
|
|
||||||
duration = (end - start).total_seconds() / 60
|
|
||||||
session_lengths_min.append(duration)
|
|
||||||
else:
|
|
||||||
session_lengths_min.append(3.0) # Approx 1 track
|
|
||||||
|
|
||||||
avg_min = sum(session_lengths_min) / len(session_lengths_min) if session_lengths_min else 0
|
for sess in sessions:
|
||||||
|
# Durations
|
||||||
|
if len(sess) > 1:
|
||||||
|
duration = (sess[-1].played_at - sess[0].played_at).total_seconds() / 60
|
||||||
|
lengths_min.append(duration)
|
||||||
|
else:
|
||||||
|
lengths_min.append(3.0) # Approx
|
||||||
|
|
||||||
|
# Types
|
||||||
|
if len(sess) <= 3: micro_sessions += 1
|
||||||
|
if len(sess) >= 20: marathon_sessions += 1
|
||||||
|
|
||||||
|
# Energy Arc (First vs Last track)
|
||||||
|
first_t = sess[0].track
|
||||||
|
last_t = sess[-1].track
|
||||||
|
if first_t and last_t and first_t.energy is not None and last_t.energy is not None:
|
||||||
|
diff = last_t.energy - first_t.energy
|
||||||
|
if diff > 0.1: energy_arcs["rising"] += 1
|
||||||
|
elif diff < -0.1: energy_arcs["falling"] += 1
|
||||||
|
else: energy_arcs["flat"] += 1
|
||||||
|
else:
|
||||||
|
energy_arcs["unknown"] += 1
|
||||||
|
|
||||||
|
avg_min = sum(lengths_min) / len(lengths_min) if lengths_min else 0
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"count": len(sessions),
|
"count": len(sessions),
|
||||||
"avg_tracks": len(plays) / len(sessions),
|
"avg_tracks": round(len(plays) / len(sessions), 1),
|
||||||
"avg_minutes": round(avg_min, 1),
|
"avg_minutes": round(avg_min, 1),
|
||||||
"longest_session_minutes": round(max(session_lengths_min), 1) if session_lengths_min else 0
|
"longest_session_minutes": round(max(lengths_min), 1) if lengths_min else 0,
|
||||||
|
"micro_session_rate": round(micro_sessions / len(sessions), 2),
|
||||||
|
"marathon_session_rate": round(marathon_sessions / len(sessions), 2),
|
||||||
|
"energy_arcs": energy_arcs
|
||||||
}
|
}
|
||||||
|
|
||||||
def compute_vibe_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
|
def compute_vibe_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Aggregates Audio Features (Energy, Valence, etc.)
|
Aggregates Audio Features + Calculates Whiplash (Transitions)
|
||||||
"""
|
"""
|
||||||
query = self.db.query(PlayHistory).filter(
|
# Fetch plays strictly ordered by time for transition analysis
|
||||||
|
plays = self.db.query(PlayHistory).filter(
|
||||||
PlayHistory.played_at >= period_start,
|
PlayHistory.played_at >= period_start,
|
||||||
PlayHistory.played_at <= period_end
|
PlayHistory.played_at <= period_end
|
||||||
)
|
).order_by(PlayHistory.played_at.asc()).all()
|
||||||
plays = query.all()
|
|
||||||
track_ids = list(set([p.track_id for p in plays]))
|
|
||||||
|
|
||||||
if not track_ids:
|
if not plays:
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
track_ids = list(set([p.track_id for p in plays]))
|
||||||
tracks = self.db.query(Track).filter(Track.id.in_(track_ids)).all()
|
tracks = self.db.query(Track).filter(Track.id.in_(track_ids)).all()
|
||||||
|
track_map = {t.id: t for t in tracks}
|
||||||
|
|
||||||
# Collect features
|
# 1. Aggregates
|
||||||
features = {
|
features = {k: [] for k in
|
||||||
"energy": [], "valence": [], "danceability": [],
|
["energy", "valence", "danceability", "tempo", "acousticness", "instrumentalness", "liveness",
|
||||||
"tempo": [], "acousticness": [], "instrumentalness": [],
|
"speechiness", "loudness"]}
|
||||||
"liveness": [], "speechiness": []
|
|
||||||
}
|
|
||||||
|
|
||||||
for t in tracks:
|
# 2. Transition Arrays (for Whiplash)
|
||||||
# Weight by plays? The spec implies "Per-Period Aggregates".
|
transitions = {"tempo": [], "energy": [], "valence": []}
|
||||||
# Usually weighted by play count is better representation of what was HEARD.
|
|
||||||
# Let's weight by play count in this period.
|
|
||||||
play_count = len([p for p in plays if p.track_id == t.id])
|
|
||||||
|
|
||||||
|
previous_track = None
|
||||||
|
|
||||||
|
for i, p in enumerate(plays):
|
||||||
|
t = track_map.get(p.track_id)
|
||||||
|
if not t: continue
|
||||||
|
|
||||||
|
# Populate aggregations
|
||||||
if t.energy is not None:
|
if t.energy is not None:
|
||||||
for _ in range(play_count):
|
features["energy"].append(t.energy)
|
||||||
features["energy"].append(t.energy)
|
features["valence"].append(t.valence)
|
||||||
features["valence"].append(t.valence)
|
features["danceability"].append(t.danceability)
|
||||||
features["danceability"].append(t.danceability)
|
features["tempo"].append(t.tempo)
|
||||||
features["tempo"].append(t.tempo)
|
features["acousticness"].append(t.acousticness)
|
||||||
features["acousticness"].append(t.acousticness)
|
features["instrumentalness"].append(t.instrumentalness)
|
||||||
features["instrumentalness"].append(t.instrumentalness)
|
features["liveness"].append(t.liveness)
|
||||||
features["liveness"].append(t.liveness)
|
features["speechiness"].append(t.speechiness)
|
||||||
features["speechiness"].append(t.speechiness)
|
features["loudness"].append(t.loudness)
|
||||||
|
|
||||||
|
# Calculate Transitions (Whiplash)
|
||||||
|
if i > 0 and previous_track:
|
||||||
|
# Only count transition if within reasonable time (e.g. < 5 mins gap)
|
||||||
|
# assuming continuous listening
|
||||||
|
time_diff = (p.played_at - plays[i - 1].played_at).total_seconds()
|
||||||
|
if time_diff < 300:
|
||||||
|
if t.tempo and previous_track.tempo:
|
||||||
|
transitions["tempo"].append(abs(t.tempo - previous_track.tempo))
|
||||||
|
if t.energy and previous_track.energy:
|
||||||
|
transitions["energy"].append(abs(t.energy - previous_track.energy))
|
||||||
|
|
||||||
|
previous_track = t
|
||||||
|
|
||||||
|
# Calculate Stats
|
||||||
stats = {}
|
stats = {}
|
||||||
for key, values in features.items():
|
for key, values in features.items():
|
||||||
valid = [v for v in values if v is not None]
|
valid = [v for v in values if v is not None]
|
||||||
@@ -282,46 +365,55 @@ class StatsService:
|
|||||||
stats[f"avg_{key}"] = None
|
stats[f"avg_{key}"] = None
|
||||||
|
|
||||||
# Derived Metrics
|
# Derived Metrics
|
||||||
if stats.get("avg_energy") and stats.get("avg_valence"):
|
if stats.get("avg_energy") is not None and stats.get("avg_valence") is not None:
|
||||||
stats["mood_quadrant"] = {
|
stats["mood_quadrant"] = {
|
||||||
"x": round(stats["avg_valence"], 2),
|
"x": round(stats["avg_valence"], 2),
|
||||||
"y": round(stats["avg_energy"], 2)
|
"y": round(stats["avg_energy"], 2)
|
||||||
}
|
}
|
||||||
|
# Consistency: Inverse of average standard deviation of Mood components
|
||||||
|
avg_std = (stats["std_energy"] + stats["std_valence"]) / 2
|
||||||
|
stats["consistency_score"] = round(1.0 - avg_std, 2) # Higher = more consistent
|
||||||
|
|
||||||
|
# Whiplash Scores (Average jump between tracks)
|
||||||
|
stats["whiplash"] = {}
|
||||||
|
for k in ["tempo", "energy"]:
|
||||||
|
if transitions[k]:
|
||||||
|
stats["whiplash"][k] = round(float(np.mean(transitions[k])), 2)
|
||||||
|
else:
|
||||||
|
stats["whiplash"][k] = 0
|
||||||
|
|
||||||
return stats
|
return stats
|
||||||
|
|
||||||
def compute_era_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
|
def compute_era_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Musical Age and Era Distribution.
|
Includes Nostalgia Gap and granular decade breakdown.
|
||||||
"""
|
"""
|
||||||
query = self.db.query(PlayHistory).filter(
|
# Join track to get raw_data
|
||||||
|
query = self.db.query(PlayHistory).options(joinedload(PlayHistory.track)).filter(
|
||||||
PlayHistory.played_at >= period_start,
|
PlayHistory.played_at >= period_start,
|
||||||
PlayHistory.played_at <= period_end
|
PlayHistory.played_at <= period_end
|
||||||
)
|
)
|
||||||
plays = query.all()
|
plays = query.all()
|
||||||
|
|
||||||
years = []
|
years = []
|
||||||
track_ids = list(set([p.track_id for p in plays]))
|
|
||||||
tracks = self.db.query(Track).filter(Track.id.in_(track_ids)).all()
|
|
||||||
track_map = {t.id: t for t in tracks}
|
|
||||||
|
|
||||||
for p in plays:
|
for p in plays:
|
||||||
t = track_map.get(p.track_id)
|
t = p.track
|
||||||
if t and t.raw_data and "album" in t.raw_data and "release_date" in t.raw_data["album"]:
|
if t and t.raw_data and "album" in t.raw_data:
|
||||||
rd = t.raw_data["album"]["release_date"]
|
rd = t.raw_data["album"].get("release_date")
|
||||||
# Format can be YYYY, YYYY-MM, YYYY-MM-DD
|
if rd:
|
||||||
try:
|
try:
|
||||||
year = int(rd.split("-")[0])
|
years.append(int(rd.split("-")[0]))
|
||||||
years.append(year)
|
except:
|
||||||
except:
|
pass
|
||||||
pass
|
|
||||||
|
|
||||||
if not years:
|
if not years:
|
||||||
return {"musical_age": None}
|
return {"musical_age": None}
|
||||||
|
|
||||||
|
# Musical Age (Weighted Average)
|
||||||
avg_year = sum(years) / len(years)
|
avg_year = sum(years) / len(years)
|
||||||
|
current_year = datetime.utcnow().year
|
||||||
|
|
||||||
# Decade breakdown
|
# Decade Distribution
|
||||||
decades = {}
|
decades = {}
|
||||||
for y in years:
|
for y in years:
|
||||||
dec = (y // 10) * 10
|
dec = (y // 10) * 10
|
||||||
@@ -329,11 +421,13 @@ class StatsService:
|
|||||||
decades[label] = decades.get(label, 0) + 1
|
decades[label] = decades.get(label, 0) + 1
|
||||||
|
|
||||||
total = len(years)
|
total = len(years)
|
||||||
decade_dist = {k: round(v/total, 2) for k, v in decades.items()}
|
dist = {k: round(v / total, 3) for k, v in decades.items()}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"musical_age": int(avg_year),
|
"musical_age": int(avg_year),
|
||||||
"decade_distribution": decade_dist
|
"nostalgia_gap": int(current_year - avg_year),
|
||||||
|
"freshness_score": dist.get(f"{int(current_year / 10) * 10}s", 0), # Share of current decade
|
||||||
|
"decade_distribution": dist
|
||||||
}
|
}
|
||||||
|
|
||||||
def compute_skip_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
|
def compute_skip_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
|
||||||
@@ -381,16 +475,191 @@ class StatsService:
|
|||||||
"skip_rate": round(skips / len(plays), 3)
|
"skip_rate": round(skips / len(plays), 3)
|
||||||
}
|
}
|
||||||
|
|
||||||
def generate_full_report(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
|
def compute_context_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Analyzes context_uri to determine if user listens to Playlists, Albums, or Artists.
|
||||||
|
"""
|
||||||
|
query = self.db.query(PlayHistory).filter(
|
||||||
|
PlayHistory.played_at >= period_start,
|
||||||
|
PlayHistory.played_at <= period_end
|
||||||
|
)
|
||||||
|
plays = query.all()
|
||||||
|
|
||||||
|
if not plays:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
context_counts = {"playlist": 0, "album": 0, "artist": 0, "collection": 0, "unknown": 0}
|
||||||
|
unique_contexts = {}
|
||||||
|
|
||||||
|
for p in plays:
|
||||||
|
if not p.context_uri:
|
||||||
|
context_counts["unknown"] += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Count distinct contexts for loyalty
|
||||||
|
unique_contexts[p.context_uri] = unique_contexts.get(p.context_uri, 0) + 1
|
||||||
|
|
||||||
|
if "playlist" in p.context_uri:
|
||||||
|
context_counts["playlist"] += 1
|
||||||
|
elif "album" in p.context_uri:
|
||||||
|
context_counts["album"] += 1
|
||||||
|
elif "artist" in p.context_uri:
|
||||||
|
context_counts["artist"] += 1
|
||||||
|
elif "collection" in p.context_uri:
|
||||||
|
# "Liked Songs" usually shows up as collection
|
||||||
|
context_counts["collection"] += 1
|
||||||
|
else:
|
||||||
|
context_counts["unknown"] += 1
|
||||||
|
|
||||||
|
total = len(plays)
|
||||||
|
breakdown = {k: round(v / total, 2) for k, v in context_counts.items()}
|
||||||
|
|
||||||
|
# Top 5 Contexts (Requires resolving URI to name, possibly missing metadata here)
|
||||||
|
sorted_contexts = sorted(unique_contexts.items(), key=lambda x: x[1], reverse=True)[:5]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"period": {
|
"type_breakdown": breakdown,
|
||||||
"start": period_start.isoformat(),
|
"album_purist_score": breakdown.get("album", 0),
|
||||||
"end": period_end.isoformat()
|
"playlist_dependency": breakdown.get("playlist", 0),
|
||||||
},
|
"context_loyalty": round(len(plays) / len(unique_contexts), 2) if unique_contexts else 0,
|
||||||
|
"top_context_uris": [{"uri": k, "count": v} for k, v in sorted_contexts]
|
||||||
|
}
|
||||||
|
|
||||||
|
def compute_taste_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Mainstream vs. Hipster analysis based on Track.popularity (0-100).
|
||||||
|
"""
|
||||||
|
query = self.db.query(PlayHistory).filter(
|
||||||
|
PlayHistory.played_at >= period_start,
|
||||||
|
PlayHistory.played_at <= period_end
|
||||||
|
)
|
||||||
|
plays = query.all()
|
||||||
|
if not plays: return {}
|
||||||
|
|
||||||
|
track_ids = list(set([p.track_id for p in plays]))
|
||||||
|
tracks = self.db.query(Track).filter(Track.id.in_(track_ids)).all()
|
||||||
|
track_map = {t.id: t for t in tracks}
|
||||||
|
|
||||||
|
pop_values = []
|
||||||
|
for p in plays:
|
||||||
|
t = track_map.get(p.track_id)
|
||||||
|
if t and t.popularity is not None:
|
||||||
|
pop_values.append(t.popularity)
|
||||||
|
|
||||||
|
if not pop_values:
|
||||||
|
return {"avg_popularity": 0, "hipster_score": 0}
|
||||||
|
|
||||||
|
avg_pop = float(np.mean(pop_values))
|
||||||
|
|
||||||
|
# Hipster Score: Percentage of tracks with popularity < 30
|
||||||
|
underground_plays = len([x for x in pop_values if x < 30])
|
||||||
|
mainstream_plays = len([x for x in pop_values if x > 70])
|
||||||
|
|
||||||
|
return {
|
||||||
|
"avg_popularity": round(avg_pop, 1),
|
||||||
|
"hipster_score": round((underground_plays / len(pop_values)) * 100, 1),
|
||||||
|
"mainstream_score": round((mainstream_plays / len(pop_values)) * 100, 1),
|
||||||
|
"obscurity_rating": round(100 - avg_pop, 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
def compute_lifecycle_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Determines if tracks are 'New Discoveries' or 'Old Favorites'.
|
||||||
|
"""
|
||||||
|
# 1. Get tracks played in this period
|
||||||
|
current_plays = self.db.query(PlayHistory).filter(
|
||||||
|
PlayHistory.played_at >= period_start,
|
||||||
|
PlayHistory.played_at <= period_end
|
||||||
|
).all()
|
||||||
|
|
||||||
|
if not current_plays: return {}
|
||||||
|
|
||||||
|
current_track_ids = set([p.track_id for p in current_plays])
|
||||||
|
|
||||||
|
# 2. Check if these tracks were played BEFORE period_start
|
||||||
|
# We find which of the current_track_ids exist in history < period_start
|
||||||
|
old_tracks_query = self.db.query(distinct(PlayHistory.track_id)).filter(
|
||||||
|
PlayHistory.track_id.in_(current_track_ids),
|
||||||
|
PlayHistory.played_at < period_start
|
||||||
|
)
|
||||||
|
old_track_ids = set([r[0] for r in old_tracks_query.all()])
|
||||||
|
|
||||||
|
# 3. Calculate Discovery
|
||||||
|
new_discoveries = current_track_ids - old_track_ids
|
||||||
|
discovery_count = len(new_discoveries)
|
||||||
|
|
||||||
|
# Calculate plays on new discoveries
|
||||||
|
plays_on_new = len([p for p in current_plays if p.track_id in new_discoveries])
|
||||||
|
total_plays = len(current_plays)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"discovery_count": discovery_count,
|
||||||
|
"discovery_rate": round(plays_on_new / total_plays, 3) if total_plays > 0 else 0,
|
||||||
|
"recurrence_rate": round((total_plays - plays_on_new) / total_plays, 3) if total_plays > 0 else 0
|
||||||
|
}
|
||||||
|
|
||||||
|
def compute_explicit_stats(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Analyzes explicit content consumption.
|
||||||
|
"""
|
||||||
|
query = self.db.query(PlayHistory).options(joinedload(PlayHistory.track)).filter(
|
||||||
|
PlayHistory.played_at >= period_start,
|
||||||
|
PlayHistory.played_at <= period_end
|
||||||
|
)
|
||||||
|
plays = query.all()
|
||||||
|
|
||||||
|
if not plays: return {"explicit_rate": 0, "hourly_explicit_rate": []}
|
||||||
|
|
||||||
|
total_plays = len(plays)
|
||||||
|
explicit_count = 0
|
||||||
|
hourly_explicit = [0] * 24
|
||||||
|
hourly_total = [0] * 24
|
||||||
|
|
||||||
|
for p in plays:
|
||||||
|
h = p.played_at.hour
|
||||||
|
hourly_total[h] += 1
|
||||||
|
|
||||||
|
# Check raw_data for explicit flag
|
||||||
|
t = p.track
|
||||||
|
is_explicit = False
|
||||||
|
if t.raw_data and t.raw_data.get("explicit"):
|
||||||
|
is_explicit = True
|
||||||
|
|
||||||
|
if is_explicit:
|
||||||
|
explicit_count += 1
|
||||||
|
hourly_explicit[h] += 1
|
||||||
|
|
||||||
|
# Calculate hourly percentages
|
||||||
|
hourly_rates = []
|
||||||
|
for i in range(24):
|
||||||
|
if hourly_total[i] > 0:
|
||||||
|
hourly_rates.append(round(hourly_explicit[i] / hourly_total[i], 2))
|
||||||
|
else:
|
||||||
|
hourly_rates.append(0.0)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"explicit_rate": round(explicit_count / total_plays, 3),
|
||||||
|
"total_explicit_plays": explicit_count,
|
||||||
|
"hourly_explicit_distribution": hourly_rates
|
||||||
|
}
|
||||||
|
|
||||||
|
def generate_full_report(self, period_start: datetime, period_end: datetime) -> Dict[str, Any]:
|
||||||
|
# 1. Calculate all current stats
|
||||||
|
current_stats = {
|
||||||
|
"period": {"start": period_start.isoformat(), "end": period_end.isoformat()},
|
||||||
"volume": self.compute_volume_stats(period_start, period_end),
|
"volume": self.compute_volume_stats(period_start, period_end),
|
||||||
"time_habits": self.compute_time_stats(period_start, period_end),
|
"time_habits": self.compute_time_stats(period_start, period_end),
|
||||||
"sessions": self.compute_session_stats(period_start, period_end),
|
"sessions": self.compute_session_stats(period_start, period_end),
|
||||||
|
"context": self.compute_context_stats(period_start, period_end),
|
||||||
"vibe": self.compute_vibe_stats(period_start, period_end),
|
"vibe": self.compute_vibe_stats(period_start, period_end),
|
||||||
"era": self.compute_era_stats(period_start, period_end),
|
"era": self.compute_era_stats(period_start, period_end),
|
||||||
|
"taste": self.compute_taste_stats(period_start, period_end),
|
||||||
|
"lifecycle": self.compute_lifecycle_stats(period_start, period_end),
|
||||||
|
"flags": self.compute_explicit_stats(period_start, period_end),
|
||||||
"skips": self.compute_skip_stats(period_start, period_end)
|
"skips": self.compute_skip_stats(period_start, period_end)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# 2. Calculate Comparison
|
||||||
|
current_stats["comparison"] = self.compute_comparison(current_stats, period_start, period_end)
|
||||||
|
|
||||||
|
return current_stats
|
||||||
|
|||||||
16
backend/run_scheduler.py
Normal file
16
backend/run_scheduler.py
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
import schedule
|
||||||
|
import time
|
||||||
|
from run_analysis import run_analysis_pipeline
|
||||||
|
|
||||||
|
def job():
|
||||||
|
print("Running daily analysis...")
|
||||||
|
# Analyze last 24 hours
|
||||||
|
run_analysis_pipeline(days=1)
|
||||||
|
|
||||||
|
# Schedule for 03:00 AM
|
||||||
|
schedule.every().day.at("03:00").do(job)
|
||||||
|
|
||||||
|
print("Scheduler started...")
|
||||||
|
while True:
|
||||||
|
schedule.run_pending()
|
||||||
|
time.sleep(60)
|
||||||
Reference in New Issue
Block a user