Skip to content

Commit d55d1f4

Browse files
committed
[YouTube] Always extract using MWEB API client
* temporary fix-up for 403 on download * MWEB parameters from yt-dlp 2024-12-06
1 parent eeafbbc commit d55d1f4

File tree

1 file changed

+77
-7
lines changed

1 file changed

+77
-7
lines changed

youtube_dl/extractor/youtube.py

Lines changed: 77 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,13 @@
33
from __future__ import unicode_literals
44

55
import collections
6+
import hashlib
67
import itertools
78
import json
89
import os.path
910
import random
1011
import re
12+
import time
1113
import traceback
1214

1315
from .common import InfoExtractor, SearchInfoExtractor
@@ -290,6 +292,33 @@ def _real_initialize(self):
290292
_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
291293
_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
292294

295+
_SAPISID = None
296+
297+
def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
298+
time_now = round(time.time())
299+
if self._SAPISID is None:
300+
yt_cookies = self._get_cookies('https://www.youtube.com')
301+
# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
302+
# See: https://github.com/yt-dlp/yt-dlp/issues/393
303+
sapisid_cookie = dict_get(
304+
yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
305+
if sapisid_cookie and sapisid_cookie.value:
306+
self._SAPISID = sapisid_cookie.value
307+
self.write_debug('Extracted SAPISID cookie')
308+
# SAPISID cookie is required if not already present
309+
if not yt_cookies.get('SAPISID'):
310+
self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
311+
self._set_cookie(
312+
'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
313+
else:
314+
self._SAPISID = False
315+
if not self._SAPISID:
316+
return None
317+
# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
318+
sapisidhash = hashlib.sha1(
319+
'{0} {1} {2}'.format(time_now, self._SAPISID, origin).encode('utf-8')).hexdigest()
320+
return 'SAPISIDHASH {0}_{1}'.format(time_now, sapisidhash)
321+
293322
def _call_api(self, ep, query, video_id, fatal=True, headers=None):
294323
data = self._DEFAULT_API_DATA.copy()
295324
data.update(query)
@@ -1914,9 +1943,50 @@ def _real_extract(self, url):
19141943
player_response = self._extract_yt_initial_variable(
19151944
webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
19161945
video_id, 'initial player response')
1917-
if not player_response:
1946+
if False and not player_response:
19181947
player_response = self._call_api(
19191948
'player', {'videoId': video_id}, video_id)
1949+
if True or not player_response:
1950+
origin = 'https://www.youtube.com'
1951+
pb_context = {'html5Preference': 'HTML5_PREF_WANTS'}
1952+
1953+
player_url = self._extract_player_url(webpage)
1954+
ytcfg = self._extract_ytcfg(video_id, webpage)
1955+
sts = self._extract_signature_timestamp(video_id, player_url, ytcfg)
1956+
if sts:
1957+
pb_context['signatureTimestamp'] = sts
1958+
1959+
query = {
1960+
'playbackContext': {
1961+
'contentPlaybackContext': pb_context,
1962+
'contentCheckOk': True,
1963+
'racyCheckOk': True,
1964+
},
1965+
'context': {
1966+
'client': {
1967+
'clientName': 'MWEB',
1968+
'clientVersion': '2.20241202.07.00',
1969+
'hl': 'en',
1970+
'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)',
1971+
'timeZone': 'UTC',
1972+
'utcOffsetMinutes': 0,
1973+
},
1974+
},
1975+
'videoId': video_id,
1976+
}
1977+
headers = {
1978+
'X-YouTube-Client-Name': '2',
1979+
'X-YouTube-Client-Version': '2.20241202.07.00',
1980+
'Origin': origin,
1981+
'Sec-Fetch-Mode': 'navigate',
1982+
'User-Agent': query['context']['client']['userAgent'],
1983+
}
1984+
auth = self._generate_sapisidhash_header(origin)
1985+
if auth is not None:
1986+
headers['Authorization'] = auth
1987+
headers['X-Origin'] = origin
1988+
1989+
player_response = self._call_api('player', query, video_id, fatal=False, headers=headers)
19201990

19211991
def is_agegated(playability):
19221992
if not isinstance(playability, dict):
@@ -2223,12 +2293,12 @@ def process_manifest_format(f, proto, client_name, itag, all_formats=False):
22232293
formats.append(f)
22242294

22252295
playable_formats = [f for f in formats if not f.get('has_drm')]
2226-
if formats and not playable_formats:
2227-
# If there are no formats that definitely don't have DRM, all have DRM
2228-
self.report_drm(video_id)
2229-
formats[:] = playable_formats
2230-
2231-
if not formats:
2296+
if formats:
2297+
if not playable_formats:
2298+
# If there are no formats that definitely don't have DRM, all have DRM
2299+
self.report_drm(video_id)
2300+
formats[:] = playable_formats
2301+
else:
22322302
if streaming_data.get('licenseInfos'):
22332303
raise ExtractorError(
22342304
'This video is DRM protected.', expected=True)

0 commit comments

Comments
 (0)