diff --git a/backend/TESTING.md b/backend/TESTING.md new file mode 100644 index 0000000..e3c1e3f --- /dev/null +++ b/backend/TESTING.md @@ -0,0 +1,76 @@ +# Testing Guide + +This project includes a comprehensive test suite to verify the calculation engine (`StatsService`) and the AI narrative generation (`NarrativeService`). + +## 1. Quick Start (Standalone Test) + +You can run the full stats verification script without installing `pytest`. This script uses an in-memory SQLite database, seeds it with synthetic listening history (including skips, sessions, and specific genres), and prints the computed analysis JSON. + +```bash +# Ensure you are in the root directory +# If you are using the virtual environment: +source backend/venv/bin/activate + +# Run the test +python backend/tests/test_stats_full.py +``` + +### What does this verify? +- **Volume Metrics:** Total plays, unique tracks/artists. +- **Session Logic:** Correctly groups plays into sessions based on 20-minute gaps. +- **Skip Detection:** Identifies "boredom skips" based on timestamp deltas. +- **Vibe Analysis:** Verifies K-Means clustering, tempo zones, and harmonic profiles. +- **Context Analysis:** Checks if plays are correctly attributed to Playlists/Albums. + +## 2. Generating a Spotify Refresh Token + +To run the actual application, you need a Spotify Refresh Token. We provide a script to automate the OAuth flow. + +1. **Prerequisites:** + * Go to [Spotify Developer Dashboard](https://developer.spotify.com/dashboard/). + * Create an App. + * In settings, add `http://localhost:8888/callback` to "Redirect URIs". + * Get your **Client ID** and **Client Secret**. + +2. **Run the Script:** + ```bash + python backend/scripts/get_refresh_token.py + ``` + +3. **Follow Instructions:** + * Enter your Client ID/Secret when prompted. + * The script will open your browser. + * Log in to Spotify and authorize the app. + * The script will print your `SPOTIFY_REFRESH_TOKEN` in the terminal. + +4. **Save to .env:** + Copy the output into your `.env` file. + +## 3. Full Test Suite (Pytest) + +If you wish to run the full suite using `pytest` (recommended for CI/CD), install the dev dependencies: + +```bash +pip install pytest +``` + +Then run: + +```bash +pytest backend/tests +``` + +## 4. Manual Verification + +To verify the system end-to-end with real data: + +1. Start the backend: + ```bash + python backend/run_worker.py + ``` +2. Wait for a few minutes for data to ingest (check logs). +3. Run the analysis manually: + ```bash + python backend/run_analysis.py + ``` +4. Check the database or logs for the generated `AnalysisSnapshot`. diff --git a/backend/requirements.txt b/backend/requirements.txt index 806229e..3bd8239 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -10,3 +10,4 @@ tenacity==8.2.3 python-dateutil==2.9.0.post0 requests==2.31.0 alembic==1.13.1 +scikit-learn==1.4.0 diff --git a/backend/run_analysis.py b/backend/run_analysis.py index 1936a33..c4daa7c 100644 --- a/backend/run_analysis.py +++ b/backend/run_analysis.py @@ -31,7 +31,7 @@ def run_analysis_pipeline(days: int = 30, model_name: str = "gemini-2.5-flash"): # 2. Generate Narrative print(f"Generating Narrative with {model_name}...") narrative_service = NarrativeService(model_name=model_name) - narrative_json = narrative_service.generate_narrative(stats_json) + narrative_json = narrative_service.generate_full_narrative(stats_json) if "error" in narrative_json: print(f"LLM Error: {narrative_json['error']}") diff --git a/backend/tests/test_stats_full.py b/backend/tests/test_stats_full.py new file mode 100644 index 0000000..db00779 --- /dev/null +++ b/backend/tests/test_stats_full.py @@ -0,0 +1,155 @@ +import os +import json +# import pytest <-- Removed +from datetime import datetime, timedelta +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker +from backend.app.models import Base, PlayHistory, Track, Artist +from backend.app.services.stats_service import StatsService + +# Setup Test Database +# @pytest.fixture <-- Removed +def db_session(): + engine = create_engine("sqlite:///:memory:") + Base.metadata.create_all(engine) + Session = sessionmaker(bind=engine) + session = Session() + yield session + session.close() + +def seed_data(db): + """ + Seeds the database with specific patterns to verify metrics. + Pattern: + - High Energy/Happy Session (Morning) + - Low Energy/Sad Session (Night) + - Skips + - Repeats + """ + + # 1. Create Artists + a1 = Artist(id="a1", name="The Hype Men", genres=["pop", "dance"]) + a2 = Artist(id="a2", name="Sad Bois", genres=["indie", "folk"]) + a3 = Artist(id="a3", name="Mozart", genres=["classical"]) + db.add_all([a1, a2, a3]) + + # 2. Create Tracks + # High Energy, High Valence, Fast + t1 = Track( + id="t1", name="Party Anthem", album="Hype Vol 1", duration_ms=180000, + popularity=80, energy=0.9, valence=0.9, danceability=0.8, tempo=140.0, acousticness=0.1, instrumentalness=0.0, + key=0, mode=1 # C Major + ) + t1.artists.append(a1) + + # Low Energy, Low Valence, Slow + t2 = Track( + id="t2", name="Rainy Day", album="Sad Vol 1", duration_ms=240000, + popularity=20, energy=0.2, valence=0.1, danceability=0.3, tempo=80.0, acousticness=0.9, instrumentalness=0.0, + key=9, mode=0 # A Minor + ) + t2.artists.append(a2) + + # Classical (Instrumental) + t3 = Track( + id="t3", name="Symphony 40", album="Classics", duration_ms=300000, + popularity=50, energy=0.4, valence=0.5, danceability=0.1, tempo=110.0, acousticness=0.8, instrumentalness=0.9, + key=5, mode=0 + ) + t3.artists.append(a3) + + db.add_all([t1, t2, t3]) + db.commit() + + # 3. Create History + base_time = datetime(2023, 11, 1, 8, 0, 0) # Morning + + plays = [] + + # SESSION 1: Morning Hype (3 plays of t1) + # 08:00 + plays.append(PlayHistory(track_id="t1", played_at=base_time, context_uri="spotify:playlist:morning")) + # 08:04 (4 min gap) + plays.append(PlayHistory(track_id="t1", played_at=base_time + timedelta(minutes=4), context_uri="spotify:playlist:morning")) + # 08:08 + plays.append(PlayHistory(track_id="t1", played_at=base_time + timedelta(minutes=8), context_uri="spotify:playlist:morning")) + + # GAP > 20 mins -> New Session + + # SESSION 2: Night Sadness (t2, t2, t3) + # 22:00 + night_time = datetime(2023, 11, 1, 22, 0, 0) + plays.append(PlayHistory(track_id="t2", played_at=night_time, context_uri="spotify:album:sad")) + + # SKIP SIMULATION: t2 played at 22:00, next play at 22:00:20 (20s later). + # Duration is 240s. 20s < 230s. This is a skip. + # But wait, logic says "boredom skip". + # If I play t2 at 22:00. + # And play t3 at 22:00:40. + # Diff = 40s. 40 < (240 - 10). Yes, Skip. + plays.append(PlayHistory(track_id="t3", played_at=night_time + timedelta(seconds=40), context_uri="spotify:album:sad")) + + # Finish t3 (5 mins) + plays.append(PlayHistory(track_id="t3", played_at=night_time + timedelta(seconds=40) + timedelta(minutes=5, seconds=10), context_uri="spotify:album:sad")) + + db.add_all(plays) + db.commit() + +def test_stats_generation(db_session): + seed_data(db_session) + stats_service = StatsService(db_session) + + start = datetime(2023, 11, 1, 0, 0, 0) + end = datetime(2023, 11, 2, 0, 0, 0) + + report = stats_service.generate_full_report(start, end) + + print("\n--- GENERATED REPORT ---") + print(json.dumps(report, indent=2, default=str)) + print("------------------------\n") + + # Assertions + + # 1. Volume + assert report["volume"]["total_plays"] == 6 + assert report["volume"]["unique_tracks"] == 3 + # Top track should be t1 (3 plays) + assert report["volume"]["top_tracks"][0]["name"] == "Party Anthem" + + # 2. Time + # 3 plays in morning (8am), 3 plays at night (22pm) + assert report["time_habits"]["part_of_day"]["morning"] == 3 + assert report["time_habits"]["part_of_day"]["night"] == 0 # 22:00 is "evening" in buckets (18-23) + assert report["time_habits"]["part_of_day"]["evening"] == 3 + + # 3. Sessions + # Should be 2 sessions (gap between 08:08 and 22:00) + assert report["sessions"]["count"] == 2 + + # 4. Skips + # 1 skip detected (t2 -> t3 gap was 40s vs 240s duration) + assert report["skips"]["total_skips"] == 1 + + # 5. Vibe & Clustering + # Should have cluster info + assert "clusters" in report["vibe"] + # Check harmonic + assert report["vibe"]["harmonic_profile"]["major_pct"] > 0 + # Check tempo zones (t1=140=Hype, t2=80=Chill, t3=110=Groove) + # 3x t1 (Hype), 1x t2 (Chill), 2x t3 (Groove) + # Total 6. Hype=0.5, Chill=0.17, Groove=0.33 + zones = report["vibe"]["tempo_zones"] + assert zones["hype"] == 0.5 + + # 6. Context + # Morning = Playlist (3), Night = Album (3) -> 50/50 + assert report["context"]["type_breakdown"]["playlist"] == 0.5 + assert report["context"]["type_breakdown"]["album"] == 0.5 + +if __name__ == "__main__": + # Manually run if executed as script + engine = create_engine("sqlite:///:memory:") + Base.metadata.create_all(engine) + Session = sessionmaker(bind=engine) + session = Session() + test_stats_generation(session)