1
+ import logging
2
+ from typing import Optional , Dict , Any
3
+ import requests
4
+ from lyrics_transcriber .types import LyricsData , LyricsMetadata
5
+ from lyrics_transcriber .lyrics .base_lyrics_provider import BaseLyricsProvider , LyricsProviderConfig
6
+
7
+
8
+ class MusixmatchProvider (BaseLyricsProvider ):
9
+ """Handles fetching lyrics from Musixmatch via RapidAPI."""
10
+
11
+ def __init__ (self , config : LyricsProviderConfig , logger : Optional [logging .Logger ] = None ):
12
+ super ().__init__ (config , logger )
13
+ self .rapidapi_key = config .rapidapi_key
14
+
15
+ def _fetch_data_from_source (self , artist : str , title : str ) -> Optional [Dict [str , Any ]]:
16
+ """Fetch raw song data from Musixmatch via RapidAPI."""
17
+ if not self .rapidapi_key :
18
+ self .logger .warning ("No RapidAPI key provided for Musixmatch" )
19
+ return None
20
+
21
+ self .logger .info (f"Fetching lyrics from Musixmatch for { artist } - { title } " )
22
+
23
+ try :
24
+ # Construct the API URL with artist and title
25
+ url = f"https://musixmatch-song-lyrics-api.p.rapidapi.com/lyrics/{ artist } /{ title } /"
26
+
27
+ headers = {
28
+ "x-rapidapi-key" : self .rapidapi_key ,
29
+ "x-rapidapi-host" : "musixmatch-song-lyrics-api.p.rapidapi.com"
30
+ }
31
+
32
+ self .logger .debug (f"Making Musixmatch API request to: { url } " )
33
+ response = requests .get (url , headers = headers , timeout = 10 )
34
+ response .raise_for_status ()
35
+
36
+ data = response .json ()
37
+
38
+ # Check if we got a valid response
39
+ if not data .get ("message" , {}).get ("body" , {}).get ("macro_calls" ):
40
+ self .logger .warning ("Invalid response structure from Musixmatch API" )
41
+ return None
42
+
43
+ # Check if lyrics are available
44
+ lyrics_data = data .get ("message" , {}).get ("body" , {}).get ("macro_calls" , {}).get ("track.lyrics.get" , {})
45
+ if not lyrics_data .get ("message" , {}).get ("body" , {}).get ("lyrics" ):
46
+ self .logger .warning ("No lyrics found in Musixmatch response" )
47
+ return None
48
+
49
+ self .logger .info ("Successfully fetched lyrics from Musixmatch" )
50
+ return data
51
+
52
+ except requests .exceptions .RequestException as e :
53
+ self .logger .error (f"Musixmatch API request failed: { str (e )} " )
54
+ return None
55
+ except Exception as e :
56
+ self .logger .error (f"Error fetching from Musixmatch: { str (e )} " )
57
+ return None
58
+
59
+ def _convert_result_format (self , raw_data : Dict [str , Any ]) -> LyricsData :
60
+ """Convert Musixmatch's raw API response to standardized format."""
61
+ try :
62
+ # Extract macro calls from the nested response
63
+ macro_calls = raw_data .get ("message" , {}).get ("body" , {}).get ("macro_calls" , {})
64
+
65
+ # Extract track information
66
+ track_data = macro_calls .get ("matcher.track.get" , {}).get ("message" , {}).get ("body" , {}).get ("track" , {})
67
+
68
+ # Extract lyrics information
69
+ lyrics_data = macro_calls .get ("track.lyrics.get" , {}).get ("message" , {}).get ("body" , {}).get ("lyrics" , {})
70
+
71
+ # Get the actual lyrics text
72
+ lyrics_text = lyrics_data .get ("lyrics_body" , "" )
73
+
74
+ # Clean the lyrics
75
+ lyrics_text = self ._clean_lyrics (lyrics_text )
76
+
77
+ # Create metadata object
78
+ metadata = LyricsMetadata (
79
+ source = "musixmatch" ,
80
+ track_name = track_data .get ("track_name" , "" ),
81
+ artist_names = track_data .get ("artist_name" , "" ),
82
+ album_name = track_data .get ("album_name" , "" ),
83
+ duration_ms = track_data .get ("track_length" , 0 ) * 1000 if track_data .get ("track_length" ) else None ,
84
+ explicit = bool (track_data .get ("explicit" , 0 )),
85
+ language = lyrics_data .get ("lyrics_language" , "" ),
86
+ is_synced = False , # Musixmatch API doesn't provide sync data in this format
87
+ lyrics_provider = "musixmatch" ,
88
+ lyrics_provider_id = str (lyrics_data .get ("lyrics_id" , "" )),
89
+ provider_metadata = {
90
+ "musixmatch_track_id" : track_data .get ("track_id" ),
91
+ "musixmatch_lyrics_id" : lyrics_data .get ("lyrics_id" ),
92
+ "album_id" : track_data .get ("album_id" ),
93
+ "artist_id" : track_data .get ("artist_id" ),
94
+ "track_share_url" : track_data .get ("track_share_url" ),
95
+ "track_edit_url" : track_data .get ("track_edit_url" ),
96
+ "lyrics_language" : lyrics_data .get ("lyrics_language" ),
97
+ "lyrics_language_description" : lyrics_data .get ("lyrics_language_description" ),
98
+ "lyrics_copyright" : lyrics_data .get ("lyrics_copyright" ),
99
+ "track_rating" : track_data .get ("track_rating" ),
100
+ "num_favourite" : track_data .get ("num_favourite" ),
101
+ "first_release_date" : track_data .get ("first_release_date" ),
102
+ "spotify_id" : track_data .get ("track_spotify_id" ),
103
+ "isrc" : track_data .get ("track_isrc" ),
104
+ "api_source" : "rapidapi_musixmatch" ,
105
+ },
106
+ )
107
+
108
+ # Create segments with words from lyrics
109
+ segments = self ._create_segments_with_words (lyrics_text , is_synced = False )
110
+
111
+ # Create result object with segments
112
+ return LyricsData (source = "musixmatch" , segments = segments , metadata = metadata )
113
+
114
+ except Exception as e :
115
+ self .logger .error (f"Error converting Musixmatch response format: { str (e )} " )
116
+ # Return empty lyrics data if conversion fails
117
+ return LyricsData (
118
+ source = "musixmatch" ,
119
+ segments = [],
120
+ metadata = LyricsMetadata (
121
+ source = "musixmatch" ,
122
+ track_name = "" ,
123
+ artist_names = "" ,
124
+ lyrics_provider = "musixmatch" ,
125
+ is_synced = False ,
126
+ provider_metadata = {"api_source" : "rapidapi_musixmatch" , "conversion_error" : str (e )},
127
+ )
128
+ )
129
+
130
+ def _clean_lyrics (self , lyrics : str ) -> str :
131
+ """Clean and process lyrics from Musixmatch to remove unwanted content."""
132
+ if not isinstance (lyrics , str ):
133
+ self .logger .warning (f"Expected string for lyrics, got { type (lyrics )} : { repr (lyrics )} " )
134
+ if lyrics is None :
135
+ return ""
136
+ try :
137
+ lyrics = str (lyrics )
138
+ except Exception as e :
139
+ self .logger .error (f"Failed to convert lyrics to string: { e } " )
140
+ return ""
141
+
142
+ # Replace escaped newlines with actual newlines, handling whitespace
143
+ import re
144
+ lyrics = re .sub (r'\s*\\n\s*' , '\n ' , lyrics )
145
+
146
+ # Remove any HTML tags that might be present
147
+ lyrics = re .sub (r'<[^>]+>' , '' , lyrics )
148
+
149
+ # Clean up multiple consecutive newlines
150
+ lyrics = re .sub (r'\n\s*\n\s*\n+' , '\n \n ' , lyrics )
151
+
152
+ # Clean up leading/trailing whitespace
153
+ lyrics = lyrics .strip ()
154
+
155
+ self .logger .debug ("Completed Musixmatch lyrics cleaning process" )
156
+ return lyrics
0 commit comments