Skip to content

Commit 7f597fe

Browse files
committed
Added musicxmatch lyrics provider via rapidapi
1 parent b311947 commit 7f597fe

File tree

9 files changed

+891
-14
lines changed

9 files changed

+891
-14
lines changed

lyrics_transcriber/core/controller.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from lyrics_transcriber.lyrics.base_lyrics_provider import BaseLyricsProvider, LyricsProviderConfig
1111
from lyrics_transcriber.lyrics.genius import GeniusProvider
1212
from lyrics_transcriber.lyrics.spotify import SpotifyProvider
13+
from lyrics_transcriber.lyrics.musixmatch import MusixmatchProvider
1314
from lyrics_transcriber.output.generator import OutputGenerator
1415
from lyrics_transcriber.correction.corrector import LyricsCorrector
1516
from lyrics_transcriber.core.config import TranscriberConfig, LyricsConfig, OutputConfig
@@ -215,6 +216,12 @@ def _initialize_lyrics_providers(self) -> Dict[str, BaseLyricsProvider]:
215216
else:
216217
self.logger.debug("Skipping Spotify provider - no cookie provided")
217218

219+
if provider_config.rapidapi_key:
220+
self.logger.debug("Initializing Musixmatch lyrics provider")
221+
providers["musixmatch"] = MusixmatchProvider(config=provider_config, logger=self.logger)
222+
else:
223+
self.logger.debug("Skipping Musixmatch provider - no RapidAPI key provided")
224+
218225
return providers
219226

220227
def _initialize_output_generator(self) -> OutputGenerator:

lyrics_transcriber/lyrics/genius.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@ def __init__(self, config: LyricsProviderConfig, logger: Optional[logging.Logger
1515
self.api_token = config.genius_api_token
1616
self.rapidapi_key = config.rapidapi_key
1717
self.client = None
18-
if self.api_token:
18+
# Only initialize lyricsgenius client if rapidapi_key is not set
19+
if self.api_token and not self.rapidapi_key:
1920
self.client = lyricsgenius.Genius(
2021
self.api_token,
2122
verbose=(logger.getEffectiveLevel() == logging.DEBUG if logger else False),
Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
import logging
2+
from typing import Optional, Dict, Any
3+
import requests
4+
from lyrics_transcriber.types import LyricsData, LyricsMetadata
5+
from lyrics_transcriber.lyrics.base_lyrics_provider import BaseLyricsProvider, LyricsProviderConfig
6+
7+
8+
class MusixmatchProvider(BaseLyricsProvider):
9+
"""Handles fetching lyrics from Musixmatch via RapidAPI."""
10+
11+
def __init__(self, config: LyricsProviderConfig, logger: Optional[logging.Logger] = None):
12+
super().__init__(config, logger)
13+
self.rapidapi_key = config.rapidapi_key
14+
15+
def _fetch_data_from_source(self, artist: str, title: str) -> Optional[Dict[str, Any]]:
16+
"""Fetch raw song data from Musixmatch via RapidAPI."""
17+
if not self.rapidapi_key:
18+
self.logger.warning("No RapidAPI key provided for Musixmatch")
19+
return None
20+
21+
self.logger.info(f"Fetching lyrics from Musixmatch for {artist} - {title}")
22+
23+
try:
24+
# Construct the API URL with artist and title
25+
url = f"https://musixmatch-song-lyrics-api.p.rapidapi.com/lyrics/{artist}/{title}/"
26+
27+
headers = {
28+
"x-rapidapi-key": self.rapidapi_key,
29+
"x-rapidapi-host": "musixmatch-song-lyrics-api.p.rapidapi.com"
30+
}
31+
32+
self.logger.debug(f"Making Musixmatch API request to: {url}")
33+
response = requests.get(url, headers=headers, timeout=10)
34+
response.raise_for_status()
35+
36+
data = response.json()
37+
38+
# Check if we got a valid response
39+
if not data.get("message", {}).get("body", {}).get("macro_calls"):
40+
self.logger.warning("Invalid response structure from Musixmatch API")
41+
return None
42+
43+
# Check if lyrics are available
44+
lyrics_data = data.get("message", {}).get("body", {}).get("macro_calls", {}).get("track.lyrics.get", {})
45+
if not lyrics_data.get("message", {}).get("body", {}).get("lyrics"):
46+
self.logger.warning("No lyrics found in Musixmatch response")
47+
return None
48+
49+
self.logger.info("Successfully fetched lyrics from Musixmatch")
50+
return data
51+
52+
except requests.exceptions.RequestException as e:
53+
self.logger.error(f"Musixmatch API request failed: {str(e)}")
54+
return None
55+
except Exception as e:
56+
self.logger.error(f"Error fetching from Musixmatch: {str(e)}")
57+
return None
58+
59+
def _convert_result_format(self, raw_data: Dict[str, Any]) -> LyricsData:
60+
"""Convert Musixmatch's raw API response to standardized format."""
61+
try:
62+
# Extract macro calls from the nested response
63+
macro_calls = raw_data.get("message", {}).get("body", {}).get("macro_calls", {})
64+
65+
# Extract track information
66+
track_data = macro_calls.get("matcher.track.get", {}).get("message", {}).get("body", {}).get("track", {})
67+
68+
# Extract lyrics information
69+
lyrics_data = macro_calls.get("track.lyrics.get", {}).get("message", {}).get("body", {}).get("lyrics", {})
70+
71+
# Get the actual lyrics text
72+
lyrics_text = lyrics_data.get("lyrics_body", "")
73+
74+
# Clean the lyrics
75+
lyrics_text = self._clean_lyrics(lyrics_text)
76+
77+
# Create metadata object
78+
metadata = LyricsMetadata(
79+
source="musixmatch",
80+
track_name=track_data.get("track_name", ""),
81+
artist_names=track_data.get("artist_name", ""),
82+
album_name=track_data.get("album_name", ""),
83+
duration_ms=track_data.get("track_length", 0) * 1000 if track_data.get("track_length") else None,
84+
explicit=bool(track_data.get("explicit", 0)),
85+
language=lyrics_data.get("lyrics_language", ""),
86+
is_synced=False, # Musixmatch API doesn't provide sync data in this format
87+
lyrics_provider="musixmatch",
88+
lyrics_provider_id=str(lyrics_data.get("lyrics_id", "")),
89+
provider_metadata={
90+
"musixmatch_track_id": track_data.get("track_id"),
91+
"musixmatch_lyrics_id": lyrics_data.get("lyrics_id"),
92+
"album_id": track_data.get("album_id"),
93+
"artist_id": track_data.get("artist_id"),
94+
"track_share_url": track_data.get("track_share_url"),
95+
"track_edit_url": track_data.get("track_edit_url"),
96+
"lyrics_language": lyrics_data.get("lyrics_language"),
97+
"lyrics_language_description": lyrics_data.get("lyrics_language_description"),
98+
"lyrics_copyright": lyrics_data.get("lyrics_copyright"),
99+
"track_rating": track_data.get("track_rating"),
100+
"num_favourite": track_data.get("num_favourite"),
101+
"first_release_date": track_data.get("first_release_date"),
102+
"spotify_id": track_data.get("track_spotify_id"),
103+
"isrc": track_data.get("track_isrc"),
104+
"api_source": "rapidapi_musixmatch",
105+
},
106+
)
107+
108+
# Create segments with words from lyrics
109+
segments = self._create_segments_with_words(lyrics_text, is_synced=False)
110+
111+
# Create result object with segments
112+
return LyricsData(source="musixmatch", segments=segments, metadata=metadata)
113+
114+
except Exception as e:
115+
self.logger.error(f"Error converting Musixmatch response format: {str(e)}")
116+
# Return empty lyrics data if conversion fails
117+
return LyricsData(
118+
source="musixmatch",
119+
segments=[],
120+
metadata=LyricsMetadata(
121+
source="musixmatch",
122+
track_name="",
123+
artist_names="",
124+
lyrics_provider="musixmatch",
125+
is_synced=False,
126+
provider_metadata={"api_source": "rapidapi_musixmatch", "conversion_error": str(e)},
127+
)
128+
)
129+
130+
def _clean_lyrics(self, lyrics: str) -> str:
131+
"""Clean and process lyrics from Musixmatch to remove unwanted content."""
132+
if not isinstance(lyrics, str):
133+
self.logger.warning(f"Expected string for lyrics, got {type(lyrics)}: {repr(lyrics)}")
134+
if lyrics is None:
135+
return ""
136+
try:
137+
lyrics = str(lyrics)
138+
except Exception as e:
139+
self.logger.error(f"Failed to convert lyrics to string: {e}")
140+
return ""
141+
142+
# Replace escaped newlines with actual newlines, handling whitespace
143+
import re
144+
lyrics = re.sub(r'\s*\\n\s*', '\n', lyrics)
145+
146+
# Remove any HTML tags that might be present
147+
lyrics = re.sub(r'<[^>]+>', '', lyrics)
148+
149+
# Clean up multiple consecutive newlines
150+
lyrics = re.sub(r'\n\s*\n\s*\n+', '\n\n', lyrics)
151+
152+
# Clean up leading/trailing whitespace
153+
lyrics = lyrics.strip()
154+
155+
self.logger.debug("Completed Musixmatch lyrics cleaning process")
156+
return lyrics

lyrics_transcriber/lyrics/spotify.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ def __init__(self, config: LyricsProviderConfig, logger: Optional[logging.Logger
1818
self.rapidapi_key = config.rapidapi_key
1919
self.client = None
2020

21-
if self.cookie:
21+
# Only initialize syrics client if rapidapi_key is not set
22+
if self.cookie and not self.rapidapi_key:
2223
max_retries = 5
2324
retry_delay = 5 # seconds
2425

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "lyrics-transcriber"
3-
version = "0.59.0"
3+
version = "0.60.0"
44
description = "Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify"
55
authors = ["Andrew Beveridge <[email protected]>"]
66
license = "MIT"

0 commit comments

Comments
 (0)