feat: Initial backend setup for Music Analyser

- Created FastAPI backend structure.
- Implemented Spotify Recently Played ingestion logic.
- Set up SQLite database with SQLAlchemy models.
- Added AI Service using Google Gemini.
- Created helper scripts for auth and background worker.
- Added Dockerfile and GitHub Actions workflow.
This commit is contained in:
google-labs-jules[bot]
2025-12-24 17:26:01 +00:00
parent a458eb00db
commit a97997a17a
16 changed files with 579 additions and 2 deletions

18
backend/app/database.py Normal file
View File

@@ -0,0 +1,18 @@
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker, declarative_base
SQLALCHEMY_DATABASE_URL = "sqlite:///./music.db"
engine = create_engine(
SQLALCHEMY_DATABASE_URL, connect_args={"check_same_thread": False}
)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
Base = declarative_base()
def get_db():
db = SessionLocal()
try:
yield db
finally:
db.close()

81
backend/app/ingest.py Normal file
View File

@@ -0,0 +1,81 @@
import asyncio
import os
from datetime import datetime
from sqlalchemy.orm import Session
from .models import Track, PlayHistory
from .database import SessionLocal
from .services.spotify_client import SpotifyClient
from dateutil import parser
# Initialize Spotify Client (env vars will be populated later)
def get_spotify_client():
return SpotifyClient(
client_id=os.getenv("SPOTIFY_CLIENT_ID"),
client_secret=os.getenv("SPOTIFY_CLIENT_SECRET"),
refresh_token=os.getenv("SPOTIFY_REFRESH_TOKEN"),
)
async def ingest_recently_played(db: Session):
client = get_spotify_client()
try:
items = await client.get_recently_played(limit=50)
except Exception as e:
print(f"Error connecting to Spotify: {e}")
return
print(f"Fetched {len(items)} items from Spotify.")
for item in items:
track_data = item["track"]
played_at_str = item["played_at"]
played_at = parser.isoparse(played_at_str)
# 1. Check if track exists, if not create it
track_id = track_data["id"]
track = db.query(Track).filter(Track.id == track_id).first()
if not track:
print(f"New track found: {track_data['name']}")
track = Track(
id=track_id,
name=track_data["name"],
artist=", ".join([a["name"] for a in track_data["artists"]]),
album=track_data["album"]["name"],
duration_ms=track_data["duration_ms"],
popularity=track_data["popularity"],
raw_data=track_data
)
db.add(track)
db.commit() # Commit immediately so ID exists for foreign key
# 2. Check if this specific play instance exists
# We assume (track_id, played_at) is unique enough
exists = db.query(PlayHistory).filter(
PlayHistory.track_id == track_id,
PlayHistory.played_at == played_at
).first()
if not exists:
print(f" recording play: {track_data['name']} at {played_at}")
play = PlayHistory(
track_id=track_id,
played_at=played_at,
context_uri=item.get("context", {}).get("uri") if item.get("context") else None
)
db.add(play)
db.commit()
async def run_worker():
"""Simulates a background worker loop."""
db = SessionLocal()
try:
while True:
print("Worker: Polling Spotify...")
await ingest_recently_played(db)
print("Worker: Sleeping for 60 seconds...")
await asyncio.sleep(60)
except Exception as e:
print(f"Worker crashed: {e}")
finally:
db.close()

36
backend/app/main.py Normal file
View File

@@ -0,0 +1,36 @@
from fastapi import FastAPI, Depends
from sqlalchemy.orm import Session
from .database import engine, Base, get_db
from .models import PlayHistory as PlayHistoryModel, Track as TrackModel
from . import schemas
from .ingest import ingest_recently_played
import asyncio
from typing import List
from dotenv import load_dotenv
load_dotenv()
# Create tables
Base.metadata.create_all(bind=engine)
app = FastAPI(title="Music Analyser Backend")
@app.get("/")
def read_root():
return {"status": "ok", "message": "Music Analyser API is running"}
@app.get("/history", response_model=List[schemas.PlayHistory])
def get_history(limit: int = 50, db: Session = Depends(get_db)):
history = db.query(PlayHistoryModel).order_by(PlayHistoryModel.played_at.desc()).limit(limit).all()
return history
@app.post("/trigger-ingest")
async def trigger_ingest(db: Session = Depends(get_db)):
"""Manually trigger the ingestion process (useful for testing)"""
await ingest_recently_played(db)
return {"status": "Ingestion triggered"}
@app.get("/tracks", response_model=List[schemas.Track])
def get_tracks(limit: int = 50, db: Session = Depends(get_db)):
tracks = db.query(TrackModel).limit(limit).all()
return tracks

39
backend/app/models.py Normal file
View File

@@ -0,0 +1,39 @@
from sqlalchemy import Column, Integer, String, DateTime, JSON, ForeignKey, Boolean
from sqlalchemy.orm import relationship
from datetime import datetime
from .database import Base
class Track(Base):
__tablename__ = "tracks"
id = Column(String, primary_key=True, index=True) # Spotify ID
name = Column(String)
artist = Column(String)
album = Column(String)
duration_ms = Column(Integer)
popularity = Column(Integer, nullable=True)
# Store raw full JSON response for future-proofing analysis
raw_data = Column(JSON, nullable=True)
# AI Analysis fields
lyrics_summary = Column(String, nullable=True)
genre_tags = Column(String, nullable=True) # JSON list stored as string or just raw JSON
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
plays = relationship("PlayHistory", back_populates="track")
class PlayHistory(Base):
__tablename__ = "play_history"
id = Column(Integer, primary_key=True, index=True)
track_id = Column(String, ForeignKey("tracks.id"))
played_at = Column(DateTime, index=True) # The timestamp from Spotify
# Context (album, playlist, etc.)
context_uri = Column(String, nullable=True)
track = relationship("Track", back_populates="plays")

32
backend/app/schemas.py Normal file
View File

@@ -0,0 +1,32 @@
from pydantic import BaseModel
from typing import List, Optional
from datetime import datetime
class TrackBase(BaseModel):
id: str
name: str
artist: str
album: str
duration_ms: int
popularity: Optional[int] = None
lyrics_summary: Optional[str] = None
genre_tags: Optional[str] = None
class Track(TrackBase):
created_at: datetime
updated_at: datetime
class Config:
from_attributes = True
class PlayHistoryBase(BaseModel):
track_id: str
played_at: datetime
context_uri: Optional[str] = None
class PlayHistory(PlayHistoryBase):
id: int
track: Track
class Config:
from_attributes = True

View File

@@ -0,0 +1,40 @@
import os
import google.generativeai as genai
from typing import List
from ..models import PlayHistory, Track
class AIService:
def __init__(self, api_key: str):
genai.configure(api_key=api_key)
self.model = genai.GenerativeModel('models/gemini-2.0-flash')
def generate_analysis(self, plays: List[PlayHistory]) -> str:
"""
Generates a summary analysis of the provided play history.
"""
if not plays:
return "No listening history available to analyze."
# Prepare a simple text representation of the history
history_text = "Here is my recent listening history:\n"
for play in plays:
history_text += f"- {play.track.name} by {play.track.artist} (Played at {play.played_at})\n"
prompt = f"""
You are a music taste analyst.
Analyze the following listening history and provide a short, fun, and insightful summary.
Identify the vibe, top artists, and any interesting patterns (e.g. "You started with high energy and chilled out").
Keep it under 200 words.
{history_text}
"""
try:
response = self.model.generate_content(prompt)
return response.text
except Exception as e:
return f"AI Analysis failed: {str(e)}"
# Singleton accessor
def get_ai_service():
return AIService(api_key=os.getenv("GEMINI_API_KEY"))

View File

@@ -0,0 +1,70 @@
import os
import base64
import time
import httpx
from fastapi import HTTPException
SPOTIFY_TOKEN_URL = "https://accounts.spotify.com/api/token"
SPOTIFY_API_BASE = "https://api.spotify.com/v1"
class SpotifyClient:
def __init__(self, client_id: str, client_secret: str, refresh_token: str):
self.client_id = client_id
self.client_secret = client_secret
self.refresh_token = refresh_token
self.access_token = None
self.token_expires_at = 0
async def get_access_token(self):
"""Returns a valid access token, refreshing if necessary."""
if self.access_token and time.time() < self.token_expires_at:
return self.access_token
print("Refreshing Spotify Access Token...")
async with httpx.AsyncClient() as client:
auth_str = f"{self.client_id}:{self.client_secret}"
b64_auth = base64.b64encode(auth_str.encode()).decode()
response = await client.post(
SPOTIFY_TOKEN_URL,
data={
"grant_type": "refresh_token",
"refresh_token": self.refresh_token,
},
headers={"Authorization": f"Basic {b64_auth}"},
)
if response.status_code != 200:
print(f"Failed to refresh token: {response.text}")
raise Exception("Could not refresh Spotify token")
data = response.json()
self.access_token = data["access_token"]
# expires_in is usually 3600 seconds. buffer by 60s
self.token_expires_at = time.time() + data["expires_in"] - 60
return self.access_token
async def get_recently_played(self, limit=50):
token = await self.get_access_token()
async with httpx.AsyncClient() as client:
response = await client.get(
f"{SPOTIFY_API_BASE}/me/player/recently-played",
params={"limit": limit},
headers={"Authorization": f"Bearer {token}"},
)
if response.status_code != 200:
print(f"Error fetching recently played: {response.text}")
return []
return response.json().get("items", [])
async def get_track(self, track_id: str):
token = await self.get_access_token()
async with httpx.AsyncClient() as client:
response = await client.get(
f"{SPOTIFY_API_BASE}/tracks/{track_id}",
headers={"Authorization": f"Bearer {token}"},
)
if response.status_code != 200:
return None
return response.json()