mirror of
https://github.com/bnair123/MusicAnalyser.git
synced 2026-02-25 11:46:07 +00:00
- Migrate database from SQLite to PostgreSQL (100.91.248.114:5433) - Fix playlist curation to use actual top tracks instead of AI name matching - Add /playlists/history endpoint for historical playlist viewing - Add Playlist Archives section to frontend with expandable history - Add playlist-modify-* scopes to Spotify OAuth for playlist creation - Rewrite Genius client to use official API (fixes 403 scraping blocks) - Ensure playlists are created on Spotify before curation attempts - Add DATABASE.md documentation for PostgreSQL schema - Add migrations for PlaylistConfig and composition storage
104 lines
3.2 KiB
Python
104 lines
3.2 KiB
Python
import os
|
|
import requests
|
|
from typing import Optional, Dict, Any
|
|
import re
|
|
|
|
|
|
class GeniusClient:
|
|
def __init__(self):
|
|
self.access_token = os.getenv("GENIUS_ACCESS_TOKEN")
|
|
self.base_url = "https://api.genius.com"
|
|
self.headers = (
|
|
{"Authorization": f"Bearer {self.access_token}"}
|
|
if self.access_token
|
|
else {}
|
|
)
|
|
|
|
if not self.access_token:
|
|
print(
|
|
"WARNING: GENIUS_ACCESS_TOKEN not found. Lyrics enrichment will be skipped."
|
|
)
|
|
self.genius = None
|
|
else:
|
|
self.genius = True
|
|
|
|
def search_song(self, title: str, artist: str) -> Optional[Dict[str, Any]]:
|
|
if not self.genius:
|
|
return None
|
|
|
|
try:
|
|
clean_title = title.split(" - ")[0].split("(")[0].strip()
|
|
query = f"{clean_title} {artist}"
|
|
|
|
response = requests.get(
|
|
f"{self.base_url}/search",
|
|
headers=self.headers,
|
|
params={"q": query},
|
|
timeout=10,
|
|
)
|
|
|
|
if response.status_code != 200:
|
|
print(f"Genius API Error: {response.status_code}")
|
|
return None
|
|
|
|
data = response.json()
|
|
hits = data.get("response", {}).get("hits", [])
|
|
|
|
if not hits:
|
|
return None
|
|
|
|
song = hits[0]["result"]
|
|
|
|
lyrics = self._scrape_lyrics(song.get("url")) if song.get("url") else None
|
|
|
|
return {
|
|
"lyrics": lyrics,
|
|
"image_url": song.get("song_art_image_url")
|
|
or song.get("header_image_url"),
|
|
"artist_image_url": song.get("primary_artist", {}).get("image_url"),
|
|
}
|
|
except Exception as e:
|
|
print(f"Genius Search Error for {title} by {artist}: {e}")
|
|
|
|
return None
|
|
|
|
def _scrape_lyrics(self, url: str) -> Optional[str]:
|
|
try:
|
|
headers = {
|
|
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
|
"Accept-Language": "en-US,en;q=0.5",
|
|
}
|
|
response = requests.get(url, headers=headers, timeout=10)
|
|
|
|
if response.status_code != 200:
|
|
return None
|
|
|
|
html = response.text
|
|
|
|
lyrics_divs = re.findall(
|
|
r'<div[^>]*data-lyrics-container="true"[^>]*>(.*?)</div>',
|
|
html,
|
|
re.DOTALL,
|
|
)
|
|
|
|
if not lyrics_divs:
|
|
return None
|
|
|
|
lyrics = ""
|
|
for div in lyrics_divs:
|
|
text = re.sub(r"<br\s*/?>", "\n", div)
|
|
text = re.sub(r"<[^>]+>", "", text)
|
|
text = (
|
|
text.replace("&", "&")
|
|
.replace(""", '"')
|
|
.replace("'", "'")
|
|
)
|
|
lyrics += text + "\n"
|
|
|
|
return lyrics.strip() if lyrics.strip() else None
|
|
|
|
except Exception as e:
|
|
print(f"Lyrics scrape error: {e}")
|
|
return None
|