MusicAnalyser/backend/app/services/genius_client.py

import os
import requests
from typing import Optional, Dict, Any
import re


class GeniusClient:
    def __init__(self):
        self.access_token = os.getenv("GENIUS_ACCESS_TOKEN")
        self.base_url = "https://api.genius.com"
        self.headers = (
            {"Authorization": f"Bearer {self.access_token}"}
            if self.access_token
            else {}
        )

        if not self.access_token:
            print(
                "WARNING: GENIUS_ACCESS_TOKEN not found. Lyrics enrichment will be skipped."
            )
            self.genius = None
        else:
            self.genius = True

    def search_song(self, title: str, artist: str) -> Optional[Dict[str, Any]]:
        if not self.genius:
            return None

        try:
            clean_title = title.split(" - ")[0].split("(")[0].strip()
            query = f"{clean_title} {artist}"

            response = requests.get(
                f"{self.base_url}/search",
                headers=self.headers,
                params={"q": query},
                timeout=10,
            )

            if response.status_code != 200:
                print(f"Genius API Error: {response.status_code}")
                return None

            data = response.json()
            hits = data.get("response", {}).get("hits", [])

            if not hits:
                return None

            song = hits[0]["result"]

            lyrics = self._scrape_lyrics(song.get("url")) if song.get("url") else None

            return {
                "lyrics": lyrics,
                "image_url": song.get("song_art_image_url")
                or song.get("header_image_url"),
                "artist_image_url": song.get("primary_artist", {}).get("image_url"),
            }
        except Exception as e:
            print(f"Genius Search Error for {title} by {artist}: {e}")

        return None

    def _scrape_lyrics(self, url: str) -> Optional[str]:
        try:
            headers = {
                "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
                "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
                "Accept-Language": "en-US,en;q=0.5",
            }
            response = requests.get(url, headers=headers, timeout=10)

            if response.status_code != 200:
                return None

            html = response.text

            lyrics_divs = re.findall(
                r'<div[^>]*data-lyrics-container="true"[^>]*>(.*?)</div>',
                html,
                re.DOTALL,
            )

            if not lyrics_divs:
                return None

            lyrics = ""
            for div in lyrics_divs:
                text = re.sub(r"<br\s*/?>", "\n", div)
                text = re.sub(r"<[^>]+>", "", text)
                text = (
                    text.replace("&amp;", "&")
                    .replace("&quot;", '"')
                    .replace("&#x27;", "'")
                )
                lyrics += text + "\n"

            return lyrics.strip() if lyrics.strip() else None

        except Exception as e:
            print(f"Lyrics scrape error: {e}")
            return None