feat: migrate to PostgreSQL and enhance playlist curation

- Migrate database from SQLite to PostgreSQL (100.91.248.114:5433) - Fix playlist curation to use actual top tracks instead of AI name matching - Add /playlists/history endpoint for historical playlist viewing - Add Playlist Archives section to frontend with expandable history - Add playlist-modify-* scopes to Spotify OAuth for playlist creation - Rewrite Genius client to use official API (fixes 403 scraping blocks) - Ensure playlists are created on Spotify before curation attempts - Add DATABASE.md documentation for PostgreSQL schema - Add migrations for PlaylistConfig and composition storage
2026-02-25 11:46:07 +00:00 · 2025-12-30 22:24:56 +04:00
parent 26b4895695
commit 272148c5bf
19 changed files with 1130 additions and 145 deletions
--- a/backend/app/services/genius_client.py
+++ b/backend/app/services/genius_client.py
@@ -1,35 +1,103 @@
 import os
-import lyricsgenius
+import requests
 from typing import Optional, Dict, Any
+import re
+

 class GeniusClient:
    def __init__(self):
        self.access_token = os.getenv("GENIUS_ACCESS_TOKEN")
-        if self.access_token:
-            self.genius = lyricsgenius.Genius(self.access_token, verbose=False, remove_section_headers=True)
-        else:
-            print("WARNING: GENIUS_ACCESS_TOKEN not found. Lyrics enrichment will be skipped.")
+        self.base_url = "https://api.genius.com"
+        self.headers = (
+            {"Authorization": f"Bearer {self.access_token}"}
+            if self.access_token
+            else {}
+        )
+
+        if not self.access_token:
+            print(
+                "WARNING: GENIUS_ACCESS_TOKEN not found. Lyrics enrichment will be skipped."
+            )
            self.genius = None
+        else:
+            self.genius = True

    def search_song(self, title: str, artist: str) -> Optional[Dict[str, Any]]:
-        """
-        Searches for a song on Genius and returns metadata + lyrics.
-        """
        if not self.genius:
            return None

        try:
-            # Clean up title (remove "Feat.", "Remastered", etc for better search match)
            clean_title = title.split(" - ")[0].split("(")[0].strip()
-            song = self.genius.search_song(clean_title, artist)
-            
-            if song:
-                return {
-                    "lyrics": song.lyrics,
-                    "image_url": song.song_art_image_url,
-                    "artist_image_url": song.primary_artist.image_url
-                }
+            query = f"{clean_title} {artist}"
+
+            response = requests.get(
+                f"{self.base_url}/search",
+                headers=self.headers,
+                params={"q": query},
+                timeout=10,
+            )
+
+            if response.status_code != 200:
+                print(f"Genius API Error: {response.status_code}")
+                return None
+
+            data = response.json()
+            hits = data.get("response", {}).get("hits", [])
+
+            if not hits:
+                return None
+
+            song = hits[0]["result"]
+
+            lyrics = self._scrape_lyrics(song.get("url")) if song.get("url") else None
+
+            return {
+                "lyrics": lyrics,
+                "image_url": song.get("song_art_image_url")
+                or song.get("header_image_url"),
+                "artist_image_url": song.get("primary_artist", {}).get("image_url"),
+            }
        except Exception as e:
            print(f"Genius Search Error for {title} by {artist}: {e}")
-        
+
        return None
+
+    def _scrape_lyrics(self, url: str) -> Optional[str]:
+        try:
+            headers = {
+                "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
+                "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
+                "Accept-Language": "en-US,en;q=0.5",
+            }
+            response = requests.get(url, headers=headers, timeout=10)
+
+            if response.status_code != 200:
+                return None
+
+            html = response.text
+
+            lyrics_divs = re.findall(
+                r'<div[^>]*data-lyrics-container="true"[^>]*>(.*?)</div>',
+                html,
+                re.DOTALL,
+            )
+
+            if not lyrics_divs:
+                return None
+
+            lyrics = ""
+            for div in lyrics_divs:
+                text = re.sub(r"<br\s*/?>", "\n", div)
+                text = re.sub(r"<[^>]+>", "", text)
+                text = (
+                    text.replace("&amp;", "&")
+                    .replace("&quot;", '"')
+                    .replace("&#x27;", "'")
+                )
+                lyrics += text + "\n"
+
+            return lyrics.strip() if lyrics.strip() else None
+
+        except Exception as e:
+            print(f"Lyrics scrape error: {e}")
+            return None