forked from jengelh/yt-dlp
4121 lines
176 KiB
Diff
4121 lines
176 KiB
Diff
From cbf832cee291ab88ba32b345b9784c5ef028d521 Mon Sep 17 00:00:00 2001
|
||
From: coletdjnz <coletdjnz@protonmail.com>
|
||
Date: Sat, 23 Aug 2025 20:05:53 +1200
|
||
Subject: [PATCH 01/43] [youtube] POC JS Challenge Provider framework
|
||
|
||
---
|
||
yt_dlp/extractor/youtube/_video.py | 399 +++---------------
|
||
yt_dlp/extractor/youtube/js/README.md | 115 +++++
|
||
yt_dlp/extractor/youtube/js/__init__.py | 2 +
|
||
.../extractor/youtube/js/_builtin/__init__.py | 0
|
||
README.md | 21
|
||
test/test_jsc/conftest.py | 43
|
||
test/test_jsc/test_deno.py | 252 ++-
|
||
test/test_jsc/test_runtime.py | 85 +
|
||
yt_dlp/YoutubeDL.py | 44
|
||
yt_dlp/__init__.py | 17
|
||
yt_dlp/extractor/youtube/_video.py | 935 +++++--------
|
||
yt_dlp/extractor/youtube/js/README.md | 242 +--
|
||
yt_dlp/extractor/youtube/js/__init__.py | 7
|
||
yt_dlp/extractor/youtube/js/_builtin/jsinterp.py | 703 ++++-----
|
||
yt_dlp/extractor/youtube/js/_director.py | 336 ++--
|
||
yt_dlp/extractor/youtube/js/_registry.py | 11
|
||
yt_dlp/extractor/youtube/js/provider.py | 275 +--
|
||
yt_dlp/extractor/youtube/js/utils.py | 5
|
||
yt_dlp/extractor/youtube/jsc/README.md | 129 +
|
||
yt_dlp/extractor/youtube/jsc/__init__.py | 5
|
||
yt_dlp/extractor/youtube/jsc/_builtin/bun.py | 79 +
|
||
yt_dlp/extractor/youtube/jsc/_builtin/bundle/__init__.py | 27
|
||
yt_dlp/extractor/youtube/jsc/_builtin/bundle/bun.lib.js | 9
|
||
yt_dlp/extractor/youtube/jsc/_builtin/bundle/core.js | 501 ------
|
||
yt_dlp/extractor/youtube/jsc/_builtin/bundle/deno.lib.js | 9
|
||
yt_dlp/extractor/youtube/jsc/_builtin/bundle/jsc.js | 1005 +++++++-------
|
||
yt_dlp/extractor/youtube/jsc/_builtin/deno.py | 82 +
|
||
yt_dlp/extractor/youtube/jsc/_builtin/jsinterp.py | 288 ++++
|
||
yt_dlp/extractor/youtube/jsc/_builtin/node.py | 47
|
||
yt_dlp/extractor/youtube/jsc/_builtin/runtime.py | 283 +++
|
||
yt_dlp/extractor/youtube/jsc/_builtin/scripts/__init__.py | 12
|
||
yt_dlp/extractor/youtube/jsc/_builtin/scripts/bun.lib.js | 3
|
||
yt_dlp/extractor/youtube/jsc/_builtin/scripts/core.js | 501 ++++++
|
||
yt_dlp/extractor/youtube/jsc/_builtin/scripts/deno.lib.js | 3
|
||
yt_dlp/extractor/youtube/jsc/_director.py | 234 +++
|
||
yt_dlp/extractor/youtube/jsc/_registry.py | 4
|
||
yt_dlp/extractor/youtube/jsc/provider.py | 157 ++
|
||
yt_dlp/extractor/youtube/jsc/utils.py | 1
|
||
yt_dlp/extractor/youtube/pot/_director.py | 11
|
||
yt_dlp/extractor/youtube/pot/_provider.py | 6
|
||
yt_dlp/globals.py | 6
|
||
yt_dlp/options.py | 35
|
||
yt_dlp/utils/_jsruntime.py | 57
|
||
README.md | 21
|
||
test/test_jsc/conftest.py | 43
|
||
test/test_jsc/test_runtime.py | 85 +
|
||
yt_dlp/YoutubeDL.py | 44
|
||
yt_dlp/__init__.py | 18
|
||
yt_dlp/extractor/youtube/_video.py | 935 +++++---------
|
||
yt_dlp/extractor/youtube/jsc/README.md | 129 +
|
||
yt_dlp/extractor/youtube/jsc/__init__.py | 5
|
||
yt_dlp/extractor/youtube/jsc/_builtin/bun.py | 79 +
|
||
yt_dlp/extractor/youtube/jsc/_builtin/bundle/core.js | 504 +++++++
|
||
yt_dlp/extractor/youtube/jsc/_builtin/deno.py | 82 +
|
||
yt_dlp/extractor/youtube/jsc/_builtin/jsinterp.py | 288 ++++
|
||
yt_dlp/extractor/youtube/jsc/_builtin/node.py | 47
|
||
yt_dlp/extractor/youtube/jsc/_builtin/runtime.py | 283 ++++
|
||
yt_dlp/extractor/youtube/jsc/_builtin/scripts/__init__.py | 12
|
||
yt_dlp/extractor/youtube/jsc/_builtin/scripts/bun.lib.js | 3
|
||
yt_dlp/extractor/youtube/jsc/_builtin/scripts/core.js | 501 +++++++
|
||
yt_dlp/extractor/youtube/jsc/_builtin/scripts/deno.lib.js | 3
|
||
yt_dlp/extractor/youtube/jsc/_director.py | 234 +++
|
||
yt_dlp/extractor/youtube/jsc/_registry.py | 4
|
||
yt_dlp/extractor/youtube/jsc/provider.py | 157 ++
|
||
yt_dlp/extractor/youtube/jsc/utils.py | 1
|
||
yt_dlp/extractor/youtube/pot/_director.py | 11
|
||
yt_dlp/extractor/youtube/pot/_provider.py | 6
|
||
yt_dlp/globals.py | 6
|
||
yt_dlp/options.py | 35
|
||
yt_dlp/utils/_jsruntime.py | 57
|
||
27 files changed, 3036 insertions(+), 557 deletions(-)
|
||
create mode 100644 yt_dlp/extractor/youtube/js/README.md
|
||
create mode 100644 yt_dlp/extractor/youtube/js/__init__.py
|
||
create mode 100644 yt_dlp/extractor/youtube/js/_builtin/__init__.py
|
||
create mode 100644 yt_dlp/extractor/youtube/js/_builtin/jsinterp.py
|
||
create mode 100644 yt_dlp/extractor/youtube/js/_director.py
|
||
create mode 100644 yt_dlp/extractor/youtube/js/_registry.py
|
||
create mode 100644 yt_dlp/extractor/youtube/js/provider.py
|
||
create mode 100644 yt_dlp/extractor/youtube/js/utils.py
|
||
|
||
Index: yt-dlp/README.md
|
||
===================================================================
|
||
--- yt-dlp.orig/README.md 2025-09-23 08:47:15.000000000 +0200
|
||
+++ yt-dlp/README.md 2025-09-25 21:17:17.679575923 +0200
|
||
@@ -362,6 +362,27 @@
|
||
--no-plugin-dirs Clear plugin directories to search,
|
||
including defaults and those provided by
|
||
previous --plugin-dirs
|
||
+ --js-runtimes RUNTIME[:PATH] Additional JavaScript runtime to enable,
|
||
+ with an optional path to the runtime
|
||
+ location. This option can be used multiple
|
||
+ times to enable multiple runtimes. Supported
|
||
+ runtimes: deno, node, bun. By default, only
|
||
+ "deno" runtime is enabled.
|
||
+ --no-js-runtimes Clear JavaScript runtimes to enable,
|
||
+ including defaults and those provided by
|
||
+ previous --js-runtimes
|
||
+ --download-ext-components COMPONENT
|
||
+ Specify external components that yt-dlp is
|
||
+ allowed to download when needed. You can use
|
||
+ this option multiple times to allow multiple
|
||
+ components. Supported values: npm
|
||
+ (JavaScript dependencies from npm), ejs-
|
||
+ github (official JS scripts from yt-dlp-ejs
|
||
+ GitHub). By default, no external components
|
||
+ are allowed.
|
||
+ --no-download-ext-components Disallow downloading of all external
|
||
+ components, including any previously allowed
|
||
+ by --download-ext-components or defaults.
|
||
--flat-playlist Do not extract a playlist's URL result
|
||
entries; some entry metadata may be missing
|
||
and downloading may be bypassed
|
||
Index: yt-dlp/test/test_jsc/conftest.py
|
||
===================================================================
|
||
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
||
+++ yt-dlp/test/test_jsc/conftest.py 2025-09-25 21:17:17.681033945 +0200
|
||
@@ -0,0 +1,43 @@
|
||
+import collections
|
||
+
|
||
+import pytest
|
||
+
|
||
+import yt_dlp.globals
|
||
+from yt_dlp import YoutubeDL
|
||
+from yt_dlp.extractor.common import InfoExtractor
|
||
+from yt_dlp.extractor.youtube.pot._provider import IEContentProviderLogger
|
||
+
|
||
+
|
||
+class MockLogger(IEContentProviderLogger):
|
||
+ log_level = IEContentProviderLogger.LogLevel.TRACE
|
||
+
|
||
+ def __init__(self, *args, **kwargs):
|
||
+ super().__init__(*args, **kwargs)
|
||
+ self.messages = collections.defaultdict(list)
|
||
+
|
||
+ def trace(self, message: str):
|
||
+ self.messages['trace'].append(message)
|
||
+
|
||
+ def debug(self, message: str, *, once=False):
|
||
+ self.messages['debug'].append(message)
|
||
+
|
||
+ def info(self, message: str):
|
||
+ self.messages['info'].append(message)
|
||
+
|
||
+ def warning(self, message: str, *, once=False):
|
||
+ self.messages['warning'].append(message)
|
||
+
|
||
+ def error(self, message: str):
|
||
+ self.messages['error'].append(message)
|
||
+
|
||
+
|
||
+@pytest.fixture
|
||
+def ie() -> InfoExtractor:
|
||
+ runtime_names = yt_dlp.globals.supported_js_runtimes.value
|
||
+ ydl = YoutubeDL({'js_runtimes': {key: {} for key in runtime_names}})
|
||
+ return ydl.get_info_extractor('Youtube')
|
||
+
|
||
+
|
||
+@pytest.fixture
|
||
+def logger() -> MockLogger:
|
||
+ return MockLogger()
|
||
Index: yt-dlp/test/test_jsc/test_runtime.py
|
||
===================================================================
|
||
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
||
+++ yt-dlp/test/test_jsc/test_runtime.py 2025-09-25 21:17:17.681178518 +0200
|
||
@@ -0,0 +1,85 @@
|
||
+from __future__ import annotations
|
||
+
|
||
+import json
|
||
+
|
||
+import pytest
|
||
+try:
|
||
+ import yt_dlp_jsc
|
||
+except ImportError:
|
||
+ yt_dlp_jsc = None
|
||
+
|
||
+from yt_dlp.extractor.youtube.jsc.provider import (
|
||
+ JsChallengeRequest,
|
||
+ JsChallengeType,
|
||
+ JsChallengeProviderResponse,
|
||
+ JsChallengeResponse,
|
||
+ NChallengeInput,
|
||
+ NChallengeOutput,
|
||
+ SigChallengeInput,
|
||
+ SigChallengeOutput,
|
||
+)
|
||
+from yt_dlp.extractor.youtube.jsc._builtin.bun import BunJCP
|
||
+from yt_dlp.extractor.youtube.jsc._builtin.deno import DenoJCP
|
||
+from yt_dlp.extractor.youtube.jsc._builtin.node import NodeJCP
|
||
+
|
||
+
|
||
+pytestmark = pytest.mark.skipif(not yt_dlp_jsc, reason='yt-dlp-jsc not available')
|
||
+
|
||
+TESTS = [
|
||
+ JsChallengeRequest(JsChallengeType.N, NChallengeInput('https://www.youtube.com/s/player/3d3ba064/player_ias_tce.vflset/en_US/base.js', [
|
||
+ 'ZdZIqFPQK-Ty8wId',
|
||
+ '4GMrWHyKI5cEvhDO',
|
||
+ ])),
|
||
+ JsChallengeRequest(JsChallengeType.SIG, SigChallengeInput('https://www.youtube.com/s/player/3d3ba064/player_ias_tce.vflset/en_US/base.js', [
|
||
+ 'gN7a-hudCuAuPH6fByOk1_GNXN0yNMHShjZXS2VOgsEItAJz0tipeavEOmNdYN-wUtcEqD3bCXjc0iyKfAyZxCBGgIARwsSdQfJ2CJtt',
|
||
+ ])),
|
||
+ JsChallengeRequest(JsChallengeType.N, NChallengeInput('https://www.youtube.com/s/player/5ec65609/player_ias_tce.vflset/en_US/base.js', [
|
||
+ '0eRGgQWJGfT5rFHFj',
|
||
+ ])),
|
||
+ JsChallengeRequest(JsChallengeType.SIG, SigChallengeInput('https://www.youtube.com/s/player/5ec65609/player_ias_tce.vflset/en_US/base.js', [
|
||
+ 'AAJAJfQdSswRQIhAMG5SN7-cAFChdrE7tLA6grH0rTMICA1mmDc0HoXgW3CAiAQQ4=CspfaF_vt82XH5yewvqcuEkvzeTsbRuHssRMyJQ=I',
|
||
+ ])),
|
||
+ JsChallengeRequest(JsChallengeType.N, NChallengeInput('https://www.youtube.com/s/player/6742b2b9/player_ias_tce.vflset/en_US/base.js', [
|
||
+ '_HPB-7GFg1VTkn9u',
|
||
+ 'K1t_fcB6phzuq2SF',
|
||
+ ])),
|
||
+ JsChallengeRequest(JsChallengeType.SIG, SigChallengeInput('https://www.youtube.com/s/player/6742b2b9/player_ias_tce.vflset/en_US/base.js', [
|
||
+ 'MMGZJMUucirzS_SnrSPYsc85CJNnTUi6GgR5NKn-znQEICACojE8MHS6S7uYq4TGjQX_D4aPk99hNU6wbTvorvVVMgIARwsSdQfJAA',
|
||
+ ])),
|
||
+]
|
||
+
|
||
+RESPONSES = [
|
||
+ JsChallengeProviderResponse(test, JsChallengeResponse(test.type, (
|
||
+ NChallengeOutput if test.type is JsChallengeType.N else SigChallengeOutput
|
||
+ )(dict(zip(test.input.challenges, results)))))
|
||
+ for test, results in zip(TESTS, [
|
||
+ ['qmtUsIz04xxiNW', 'N9gmEX7YhKTSmw'],
|
||
+ ['ttJC2JfQdSswRAIgGBCxZyAfKyi0cjXCb3gqEctUw-NYdNmOEvaepit0zJAtIEsgOV2SXZjhSHMNy0NXNG_1kNyBf6HPuAuCduh-a7O'],
|
||
+ ['4SvMpDQH-vBJCw'],
|
||
+ ['AJfQdSswRQIhAMG5SN7-cAFChdrE7tLA6grI0rTMICA1mmDc0HoXgW3CAiAQQ4HCspfaF_vt82XH5yewvqcuEkvzeTsbRuHssRMyJQ=='],
|
||
+ ['qUAsPryAO_ByYg', 'Y7PcOt3VE62mog'],
|
||
+ ['AJfQdSswRAIgMVVvrovTbw6UNh99kPa4D_XQjGT4qYu7S6SHM8EjoCACIEQnz-nKN5RgG6iUTnNJC58csYPSrnS_SzricuUMJZGM'],
|
||
+ ])
|
||
+]
|
||
+
|
||
+
|
||
+@pytest.fixture(params=[BunJCP, DenoJCP, NodeJCP])
|
||
+def jcp(request, ie, logger):
|
||
+ obj = request.param(ie, logger, settings={'debug': ['true']})
|
||
+ if not obj.is_available():
|
||
+ pytest.skip(f'{obj.PROVIDER_NAME} is not available')
|
||
+ return obj
|
||
+
|
||
+
|
||
+def test_bulk_requests(jcp):
|
||
+ assert list(jcp.bulk_solve(TESTS)) == RESPONSES
|
||
+
|
||
+
|
||
+def test_using_cached_player(jcp):
|
||
+ requests = TESTS[:3]
|
||
+ player = jcp._get_player(requests[0].video_id, requests[0].input.player_url)
|
||
+ initial = json.loads(jcp._run_js_runtime(jcp._construct_stdin(player, False, requests)))
|
||
+ preprocessed = initial.pop('preprocessed_player')
|
||
+ result = json.loads(jcp._run_js_runtime(jcp._construct_stdin(preprocessed, True, requests)))
|
||
+
|
||
+ assert initial == result
|
||
Index: yt-dlp/yt_dlp/YoutubeDL.py
|
||
===================================================================
|
||
--- yt-dlp.orig/yt_dlp/YoutubeDL.py 2025-09-23 08:45:39.000000000 +0200
|
||
+++ yt-dlp/yt_dlp/YoutubeDL.py 2025-09-25 21:17:17.681593799 +0200
|
||
@@ -42,6 +42,7 @@
|
||
plugin_pps,
|
||
all_plugins_loaded,
|
||
plugin_dirs,
|
||
+ supported_js_runtimes,
|
||
)
|
||
from .minicurses import format_text
|
||
from .networking import HEADRequest, Request, RequestDirector
|
||
@@ -533,6 +534,17 @@
|
||
See "EXTRACTOR ARGUMENTS" for details.
|
||
Argument values must always be a list of string(s).
|
||
E.g. {'youtube': {'skip': ['dash', 'hls']}}
|
||
+ js_runtimes: A dictionary of JavaScript runtime keys (in lower case) to enable
|
||
+ and a dictionary of additional configuration for the runtime.
|
||
+ If None, the default runtime of "deno" will be enabled.
|
||
+ The runtime configuration dictionary can have the following keys:
|
||
+ - path: Path to the executable (optional)
|
||
+ E.g. {'deno': {'path': '/path/to/deno'}
|
||
+ download_ext_components: A list of external components that are allowed to be downloaded when required.
|
||
+ Supported components:
|
||
+ - `npm` (JS Dependencies from npm)
|
||
+ - `ejs-github` (Official JS Scripts from yt-dlp-ejs GitHub).
|
||
+ By default, no external components are allowed to be downloaded.
|
||
mark_watched: Mark videos watched (even with --simulate). Only for YouTube
|
||
|
||
The following options are deprecated and may be removed in the future:
|
||
@@ -717,6 +729,10 @@
|
||
else:
|
||
raise
|
||
|
||
+ # Note: this must be after plugins are loaded
|
||
+ self.params['js_runtimes'] = self.params.get('js_runtimes', {'deno': {}})
|
||
+ self._validate_js_runtimes(self.params['js_runtimes'])
|
||
+
|
||
self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
|
||
self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
|
||
self._load_cookies(self.params['http_headers'].get('Cookie')) # compat
|
||
@@ -829,6 +845,26 @@
|
||
|
||
self.archive = preload_download_archive(self.params.get('download_archive'))
|
||
|
||
+ def _validate_js_runtimes(self, runtimes):
|
||
+ if not (
|
||
+ isinstance(runtimes, dict)
|
||
+ and all(isinstance(k, str) and (v is None or isinstance(v, dict)) for k, v in runtimes.items())
|
||
+ ):
|
||
+ raise ValueError('Invalid js_runtimes format, expected a dict of {runtime: {config}}')
|
||
+
|
||
+ if unsupported_runtimes := runtimes.keys() - supported_js_runtimes.value.keys():
|
||
+ raise ValueError(
|
||
+ f'Unsupported JavaScript runtimes specified: {", ".join(unsupported_runtimes)}.'
|
||
+ f' Supported runtimes are: {", ".join(supported_js_runtimes.value.keys())}')
|
||
+
|
||
+ @functools.cached_property
|
||
+ def _js_runtimes(self):
|
||
+ runtimes = {}
|
||
+ for name, config in self.params.get('js_runtimes', {}).items():
|
||
+ runtime_cls = supported_js_runtimes.value.get(name)
|
||
+ runtimes[name] = runtime_cls(path=config.get('path')) if runtime_cls else None
|
||
+ return runtimes
|
||
+
|
||
def warn_if_short_id(self, argv):
|
||
# short YouTube ID starting with dash?
|
||
idxs = [
|
||
@@ -4064,6 +4100,14 @@
|
||
join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
|
||
})) or 'none'))
|
||
|
||
+ if not self.params.get('js_runtimes'):
|
||
+ write_debug('JS runtimes: none (disabled)')
|
||
+ else:
|
||
+ write_debug('JS runtimes: %s' % (', '.join(sorted(
|
||
+ f'{name} (unknown)' if runtime is None else join_nonempty(runtime.info.name, runtime.info.version)
|
||
+ for name, runtime in self._js_runtimes.items() if runtime is None or runtime.info is not None
|
||
+ )) or 'none'))
|
||
+
|
||
write_debug(f'Proxy map: {self.proxies}')
|
||
write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')
|
||
|
||
Index: yt-dlp/yt_dlp/__init__.py
|
||
===================================================================
|
||
--- yt-dlp.orig/yt_dlp/__init__.py 2025-09-23 08:45:39.000000000 +0200
|
||
+++ yt-dlp/yt_dlp/__init__.py 2025-09-25 21:17:42.034536471 +0200
|
||
@@ -59,10 +59,16 @@
|
||
render_table,
|
||
setproctitle,
|
||
shell_quote,
|
||
+ traverse_obj,
|
||
variadic,
|
||
write_string,
|
||
)
|
||
from .utils._utils import _UnsafeExtensionError
|
||
+from .utils._jsruntime import (
|
||
+ BunJsRuntime as _BunJsRuntime,
|
||
+ DenoJsRuntime as _DenoJsRuntime,
|
||
+ NodeJsRuntime as _NodeJsRuntime,
|
||
+)
|
||
from .YoutubeDL import YoutubeDL
|
||
|
||
|
||
@@ -773,6 +779,10 @@
|
||
else opts.audioformat if (opts.extractaudio and opts.audioformat in FFmpegExtractAudioPP.SUPPORTED_EXTS)
|
||
else None)
|
||
|
||
+ js_runtimes = {
|
||
+ runtime.lower(): {'path': path} for runtime, path in (
|
||
+ [*arg.split(':', 1), None][:2] for arg in opts.js_runtimes)}
|
||
+
|
||
return ParsedOptions(parser, opts, urls, {
|
||
'usenetrc': opts.usenetrc,
|
||
'netrc_location': opts.netrc_location,
|
||
@@ -944,6 +954,8 @@
|
||
'_warnings': warnings,
|
||
'_deprecation_warnings': deprecation_warnings,
|
||
'compat_opts': opts.compat_opts,
|
||
+ 'js_runtimes': js_runtimes,
|
||
+ 'download_ext_components': opts.download_ext_components,
|
||
})
|
||
|
||
|
||
@@ -1086,6 +1098,12 @@
|
||
|
||
from .extractor import gen_extractors, list_extractors
|
||
|
||
+# Register JS runtimes
|
||
+from .globals import supported_js_runtimes
|
||
+supported_js_runtimes.value['deno'] = _DenoJsRuntime
|
||
+supported_js_runtimes.value['node'] = _NodeJsRuntime
|
||
+supported_js_runtimes.value['bun'] = _BunJsRuntime
|
||
+
|
||
__all__ = [
|
||
'YoutubeDL',
|
||
'gen_extractors',
|
||
Index: yt-dlp/yt_dlp/extractor/youtube/_video.py
|
||
===================================================================
|
||
--- yt-dlp.orig/yt_dlp/extractor/youtube/_video.py 2025-09-23 08:45:40.000000000 +0200
|
||
+++ yt-dlp/yt_dlp/extractor/youtube/_video.py 2025-09-25 21:17:17.682484914 +0200
|
||
@@ -4,9 +4,7 @@
|
||
import datetime as dt
|
||
import functools
|
||
import itertools
|
||
-import json
|
||
import math
|
||
-import os.path
|
||
import random
|
||
import re
|
||
import sys
|
||
@@ -26,10 +24,10 @@
|
||
_split_innertube_client,
|
||
short_client_name,
|
||
)
|
||
+from .jsc._director import initialize_jsc_director
|
||
+from .jsc.provider import JsChallengeRequest, JsChallengeType, NChallengeInput, SigChallengeInput
|
||
from .pot._director import initialize_pot_director
|
||
from .pot.provider import PoTokenContext, PoTokenRequest
|
||
-from ..openload import PhantomJSwrapper
|
||
-from ...jsinterp import JSInterpreter, LocalNameSpace
|
||
from ...networking.exceptions import HTTPError
|
||
from ...utils import (
|
||
NO_DEFAULT,
|
||
@@ -39,13 +37,11 @@
|
||
clean_html,
|
||
datetime_from_str,
|
||
filesize_from_tbr,
|
||
- filter_dict,
|
||
float_or_none,
|
||
format_field,
|
||
get_first,
|
||
int_or_none,
|
||
join_nonempty,
|
||
- js_to_json,
|
||
mimetype2ext,
|
||
orderedSet,
|
||
parse_codecs,
|
||
@@ -1827,8 +1823,6 @@
|
||
'tablet': 'player-plasma-ias-tablet-en_US.vflset/base.js',
|
||
}
|
||
_INVERSE_PLAYER_JS_VARIANT_MAP = {v: k for k, v in _PLAYER_JS_VARIANT_MAP.items()}
|
||
- _NSIG_FUNC_CACHE_ID = 'nsig func'
|
||
- _DUMMY_STRING = 'dlp_wins'
|
||
|
||
@classmethod
|
||
def suitable(cls, url):
|
||
@@ -1848,6 +1842,7 @@
|
||
def _real_initialize(self):
|
||
super()._real_initialize()
|
||
self._pot_director = initialize_pot_director(self)
|
||
+ self._jsc_director = initialize_jsc_director(self)
|
||
|
||
def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):
|
||
lock = threading.Lock()
|
||
@@ -1865,7 +1860,7 @@
|
||
microformats = traverse_obj(
|
||
prs, (..., 'microformat', 'playerMicroformatRenderer'),
|
||
expected_type=dict)
|
||
- _, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
|
||
+ _, live_status, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
|
||
is_live = live_status == 'is_live'
|
||
start_time = time.time()
|
||
|
||
@@ -2075,10 +2070,6 @@
|
||
variant = re.sub(r'[^a-zA-Z0-9]', '_', remove_end(player_path, '.js'))
|
||
return join_nonempty(player_id, variant)
|
||
|
||
- def _signature_cache_id(self, example_sig):
|
||
- """ Return a string representation of a signature """
|
||
- return '.'.join(str(len(part)) for part in example_sig.split('.'))
|
||
-
|
||
@classmethod
|
||
def _extract_player_info(cls, player_url):
|
||
for player_re in cls._PLAYER_INFO_RE:
|
||
@@ -2100,53 +2091,17 @@
|
||
self._code_cache[player_js_key] = code
|
||
return self._code_cache.get(player_js_key)
|
||
|
||
- def _extract_signature_function(self, video_id, player_url, example_sig):
|
||
- # Read from filesystem cache
|
||
- func_id = join_nonempty(
|
||
- self._player_js_cache_key(player_url), self._signature_cache_id(example_sig))
|
||
- assert os.path.basename(func_id) == func_id
|
||
-
|
||
- self.write_debug(f'Extracting signature function {func_id}')
|
||
- cache_spec, code = self.cache.load('youtube-sigfuncs', func_id, min_ver='2025.07.21'), None
|
||
-
|
||
- if not cache_spec:
|
||
- code = self._load_player(video_id, player_url)
|
||
- if code:
|
||
- res = self._parse_sig_js(code, player_url)
|
||
- test_string = ''.join(map(chr, range(len(example_sig))))
|
||
- cache_spec = [ord(c) for c in res(test_string)]
|
||
- self.cache.store('youtube-sigfuncs', func_id, cache_spec)
|
||
-
|
||
- return lambda s: ''.join(s[i] for i in cache_spec)
|
||
-
|
||
- def _parse_sig_js(self, jscode, player_url):
|
||
- # Examples where `sig` is funcname:
|
||
- # sig=function(a){a=a.split(""); ... ;return a.join("")};
|
||
- # ;c&&(c=sig(decodeURIComponent(c)),a.set(b,encodeURIComponent(c)));return a};
|
||
- # {var l=f,m=h.sp,n=sig(decodeURIComponent(h.s));l.set(m,encodeURIComponent(n))}
|
||
- # sig=function(J){J=J.split(""); ... ;return J.join("")};
|
||
- # ;N&&(N=sig(decodeURIComponent(N)),J.set(R,encodeURIComponent(N)));return J};
|
||
- # {var H=u,k=f.sp,v=sig(decodeURIComponent(f.s));H.set(k,encodeURIComponent(v))}
|
||
- funcname = self._search_regex(
|
||
- (r'\b(?P<var>[a-zA-Z0-9_$]+)&&\((?P=var)=(?P<sig>[a-zA-Z0-9_$]{2,})\(decodeURIComponent\((?P=var)\)\)',
|
||
- r'(?P<sig>[a-zA-Z0-9_$]+)\s*=\s*function\(\s*(?P<arg>[a-zA-Z0-9_$]+)\s*\)\s*{\s*(?P=arg)\s*=\s*(?P=arg)\.split\(\s*""\s*\)\s*;\s*[^}]+;\s*return\s+(?P=arg)\.join\(\s*""\s*\)',
|
||
- r'(?:\b|[^a-zA-Z0-9_$])(?P<sig>[a-zA-Z0-9_$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9_$]{2}\.[a-zA-Z0-9_$]{2}\(a,\d+\))?',
|
||
- # Old patterns
|
||
- r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||
- r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||
- r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
|
||
- # Obsolete patterns
|
||
- r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||
- r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
|
||
- r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||
- r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||
- r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
|
||
- jscode, 'Initial JS player signature function name', group='sig')
|
||
-
|
||
- varname, global_list = self._interpret_player_js_global_var(jscode, player_url)
|
||
- jsi = JSInterpreter(jscode)
|
||
- initial_function = jsi.extract_function(funcname, filter_dict({varname: global_list}))
|
||
- return lambda s: initial_function([s])
|
||
+ def _load_player_data_from_cache(self, name, player_url):
|
||
+ cache_id = (f'youtube-{name}', self._player_js_cache_key(player_url))
|
||
+
|
||
+ if data := self._player_cache.get(cache_id):
|
||
+ return data
|
||
+
|
||
+ data = self.cache.load(*cache_id, min_ver='2025.07.21')
|
||
+ if data:
|
||
+ self._player_cache[cache_id] = data
|
||
+
|
||
+ return data
|
||
|
||
def _cached(self, func, *cache_id):
|
||
def inner(*args, **kwargs):
|
||
@@ -2164,17 +2119,23 @@
|
||
return ret
|
||
return inner
|
||
|
||
- def _load_player_data_from_cache(self, name, player_url):
|
||
- cache_id = (f'youtube-{name}', self._player_js_cache_key(player_url))
|
||
-
|
||
- if data := self._player_cache.get(cache_id):
|
||
- return data
|
||
+ def _sig_spec_cache_id(self, player_url, spec_id):
|
||
+ return join_nonempty(self._player_js_cache_key(player_url), str(spec_id))
|
||
|
||
- data = self.cache.load(*cache_id, min_ver='2025.07.21')
|
||
- if data:
|
||
- self._player_cache[cache_id] = data
|
||
-
|
||
- return data
|
||
+ def _load_sig_spec_from_cache(self, spec_cache_id):
|
||
+ # This is almost identical to _load_player_data_from_cache
|
||
+ # I hate it
|
||
+ if spec_cache_id in self._player_cache:
|
||
+ return self._player_cache[spec_cache_id]
|
||
+ spec = self.cache.load('youtube-sigfuncs', spec_cache_id, min_ver='2025.07.21')
|
||
+ if spec:
|
||
+ self._player_cache[spec_cache_id] = spec
|
||
+ return spec
|
||
+
|
||
+ def _store_sig_spec_to_cache(self, spec_cache_id, spec):
|
||
+ if spec_cache_id not in self._player_cache:
|
||
+ self._player_cache[spec_cache_id] = spec
|
||
+ self.cache.store('youtube-sigfuncs', spec_cache_id, spec)
|
||
|
||
def _store_player_data_to_cache(self, name, player_url, data):
|
||
cache_id = (f'youtube-{name}', self._player_js_cache_key(player_url))
|
||
@@ -2182,218 +2143,6 @@
|
||
self.cache.store(*cache_id, data)
|
||
self._player_cache[cache_id] = data
|
||
|
||
- def _decrypt_signature(self, s, video_id, player_url):
|
||
- """Turn the encrypted s field into a working signature"""
|
||
- extract_sig = self._cached(
|
||
- self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
|
||
- func = extract_sig(video_id, player_url, s)
|
||
- return func(s)
|
||
-
|
||
- def _decrypt_nsig(self, s, video_id, player_url):
|
||
- """Turn the encrypted n field into a working signature"""
|
||
- if player_url is None:
|
||
- raise ExtractorError('Cannot decrypt nsig without player_url')
|
||
- player_url = urljoin('https://www.youtube.com', player_url)
|
||
-
|
||
- try:
|
||
- jsi, _, func_code = self._extract_n_function_code(video_id, player_url)
|
||
- except ExtractorError as e:
|
||
- raise ExtractorError('Unable to extract nsig function code', cause=e)
|
||
-
|
||
- try:
|
||
- extract_nsig = self._cached(self._extract_n_function_from_code, self._NSIG_FUNC_CACHE_ID, player_url)
|
||
- ret = extract_nsig(jsi, func_code)(s)
|
||
- except JSInterpreter.Exception as e:
|
||
- try:
|
||
- jsi = PhantomJSwrapper(self, timeout=5000)
|
||
- except ExtractorError:
|
||
- raise e
|
||
- self.report_warning(
|
||
- f'Native nsig extraction failed: Trying with PhantomJS\n'
|
||
- f' n = {s} ; player = {player_url}', video_id)
|
||
- self.write_debug(e, only_once=True)
|
||
-
|
||
- args, func_body = func_code
|
||
- ret = jsi.execute(
|
||
- f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
|
||
- video_id=video_id, note='Executing signature code').strip()
|
||
-
|
||
- self.write_debug(f'Decrypted nsig {s} => {ret}')
|
||
- # Only cache nsig func JS code to disk if successful, and only once
|
||
- self._store_player_data_to_cache('nsig', player_url, func_code)
|
||
- return ret
|
||
-
|
||
- def _extract_n_function_name(self, jscode, player_url=None):
|
||
- varname, global_list = self._interpret_player_js_global_var(jscode, player_url)
|
||
- if debug_str := traverse_obj(global_list, (lambda _, v: v.endswith('-_w8_'), any)):
|
||
- pattern = r'''(?x)
|
||
- \{\s*return\s+%s\[%d\]\s*\+\s*(?P<argname>[a-zA-Z0-9_$]+)\s*\}
|
||
- ''' % (re.escape(varname), global_list.index(debug_str))
|
||
- if match := re.search(pattern, jscode):
|
||
- pattern = r'''(?x)
|
||
- \{\s*\)%s\(\s*
|
||
- (?:
|
||
- (?P<funcname_a>[a-zA-Z0-9_$]+)\s*noitcnuf\s*
|
||
- |noitcnuf\s*=\s*(?P<funcname_b>[a-zA-Z0-9_$]+)(?:\s+rav)?
|
||
- )[;\n]
|
||
- ''' % re.escape(match.group('argname')[::-1])
|
||
- if match := re.search(pattern, jscode[match.start()::-1]):
|
||
- a, b = match.group('funcname_a', 'funcname_b')
|
||
- return (a or b)[::-1]
|
||
- self.write_debug(join_nonempty(
|
||
- 'Initial search was unable to find nsig function name',
|
||
- player_url and f' player = {player_url}', delim='\n'), only_once=True)
|
||
-
|
||
- # Examples (with placeholders nfunc, narray, idx):
|
||
- # * .get("n"))&&(b=nfunc(b)
|
||
- # * .get("n"))&&(b=narray[idx](b)
|
||
- # * b=String.fromCharCode(110),c=a.get(b))&&c=narray[idx](c)
|
||
- # * a.D&&(b="nn"[+a.D],c=a.get(b))&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
|
||
- # * a.D&&(PL(a),b=a.j.n||null)&&(b=narray[0](b),a.set("n",b),narray.length||nfunc("")
|
||
- # * a.D&&(b="nn"[+a.D],vL(a),c=a.j[b]||null)&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
|
||
- # * J.J="";J.url="";J.Z&&(R="nn"[+J.Z],mW(J),N=J.K[R]||null)&&(N=narray[idx](N),J.set(R,N))}};
|
||
- funcname, idx = self._search_regex(
|
||
- r'''(?x)
|
||
- (?:
|
||
- \.get\("n"\)\)&&\(b=|
|
||
- (?:
|
||
- b=String\.fromCharCode\(110\)|
|
||
- (?P<str_idx>[a-zA-Z0-9_$.]+)&&\(b="nn"\[\+(?P=str_idx)\]
|
||
- )
|
||
- (?:
|
||
- ,[a-zA-Z0-9_$]+\(a\))?,c=a\.
|
||
- (?:
|
||
- get\(b\)|
|
||
- [a-zA-Z0-9_$]+\[b\]\|\|null
|
||
- )\)&&\(c=|
|
||
- \b(?P<var>[a-zA-Z0-9_$]+)=
|
||
- )(?P<nfunc>[a-zA-Z0-9_$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z]\)
|
||
- (?(var),[a-zA-Z0-9_$]+\.set\((?:"n+"|[a-zA-Z0-9_$]+)\,(?P=var)\))''',
|
||
- jscode, 'n function name', group=('nfunc', 'idx'), default=(None, None))
|
||
- if not funcname:
|
||
- self.report_warning(join_nonempty(
|
||
- 'Falling back to generic n function search',
|
||
- player_url and f' player = {player_url}', delim='\n'), only_once=True)
|
||
- return self._search_regex(
|
||
- r'''(?xs)
|
||
- ;\s*(?P<name>[a-zA-Z0-9_$]+)\s*=\s*function\([a-zA-Z0-9_$]+\)
|
||
- \s*\{(?:(?!};).)+?return\s*(?P<q>["'])[\w-]+_w8_(?P=q)\s*\+\s*[a-zA-Z0-9_$]+''',
|
||
- jscode, 'Initial JS player n function name', group='name')
|
||
- elif not idx:
|
||
- return funcname
|
||
-
|
||
- return json.loads(js_to_json(self._search_regex(
|
||
- rf'var {re.escape(funcname)}\s*=\s*(\[.+?\])\s*[,;]', jscode,
|
||
- f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
|
||
-
|
||
- def _interpret_player_js_global_var(self, jscode, player_url):
|
||
- """Returns tuple of: variable name string, variable value list"""
|
||
- extract_global_var = self._cached(self._search_regex, 'js global array', player_url)
|
||
- varcode, varname, varvalue = extract_global_var(
|
||
- r'''(?x)
|
||
- (?P<q1>["\'])use\s+strict(?P=q1);\s*
|
||
- (?P<code>
|
||
- var\s+(?P<name>[a-zA-Z0-9_$]+)\s*=\s*
|
||
- (?P<value>
|
||
- (?P<q2>["\'])(?:(?!(?P=q2)).|\\.)+(?P=q2)
|
||
- \.split\((?P<q3>["\'])(?:(?!(?P=q3)).)+(?P=q3)\)
|
||
- |\[\s*(?:(?P<q4>["\'])(?:(?!(?P=q4)).|\\.)*(?P=q4)\s*,?\s*)+\]
|
||
- )
|
||
- )[;,]
|
||
- ''', jscode, 'global variable', group=('code', 'name', 'value'), default=(None, None, None))
|
||
- if not varcode:
|
||
- self.write_debug(join_nonempty(
|
||
- 'No global array variable found in player JS',
|
||
- player_url and f' player = {player_url}', delim='\n'), only_once=True)
|
||
- return None, None
|
||
-
|
||
- jsi = JSInterpreter(varcode)
|
||
- interpret_global_var = self._cached(jsi.interpret_expression, 'js global list', player_url)
|
||
- return varname, interpret_global_var(varvalue, LocalNameSpace(), allow_recursion=10)
|
||
-
|
||
- def _fixup_n_function_code(self, argnames, nsig_code, jscode, player_url):
|
||
- # Fixup global array
|
||
- varname, global_list = self._interpret_player_js_global_var(jscode, player_url)
|
||
- if varname and global_list:
|
||
- nsig_code = f'var {varname}={json.dumps(global_list)}; {nsig_code}'
|
||
- else:
|
||
- varname = self._DUMMY_STRING
|
||
- global_list = []
|
||
-
|
||
- # Fixup typeof check
|
||
- undefined_idx = global_list.index('undefined') if 'undefined' in global_list else r'\d+'
|
||
- fixed_code = re.sub(
|
||
- fr'''(?x)
|
||
- ;\s*if\s*\(\s*typeof\s+[a-zA-Z0-9_$]+\s*===?\s*(?:
|
||
- (["\'])undefined\1|
|
||
- {re.escape(varname)}\[{undefined_idx}\]
|
||
- )\s*\)\s*return\s+{re.escape(argnames[0])};
|
||
- ''', ';', nsig_code)
|
||
- if fixed_code == nsig_code:
|
||
- self.write_debug(join_nonempty(
|
||
- 'No typeof statement found in nsig function code',
|
||
- player_url and f' player = {player_url}', delim='\n'), only_once=True)
|
||
-
|
||
- # Fixup global funcs
|
||
- jsi = JSInterpreter(fixed_code)
|
||
- cache_id = (self._NSIG_FUNC_CACHE_ID, player_url)
|
||
- try:
|
||
- self._cached(
|
||
- self._extract_n_function_from_code, *cache_id)(jsi, (argnames, fixed_code))(self._DUMMY_STRING)
|
||
- except JSInterpreter.Exception:
|
||
- self._player_cache.pop(cache_id, None)
|
||
-
|
||
- global_funcnames = jsi._undefined_varnames
|
||
- debug_names = []
|
||
- jsi = JSInterpreter(jscode)
|
||
- for func_name in global_funcnames:
|
||
- try:
|
||
- func_args, func_code = jsi.extract_function_code(func_name)
|
||
- fixed_code = f'var {func_name} = function({", ".join(func_args)}) {{ {func_code} }}; {fixed_code}'
|
||
- debug_names.append(func_name)
|
||
- except Exception:
|
||
- self.report_warning(join_nonempty(
|
||
- f'Unable to extract global nsig function {func_name} from player JS',
|
||
- player_url and f' player = {player_url}', delim='\n'), only_once=True)
|
||
-
|
||
- if debug_names:
|
||
- self.write_debug(f'Extracted global nsig functions: {", ".join(debug_names)}')
|
||
-
|
||
- return argnames, fixed_code
|
||
-
|
||
- def _extract_n_function_code(self, video_id, player_url):
|
||
- player_id = self._extract_player_info(player_url)
|
||
- func_code = self._load_player_data_from_cache('nsig', player_url)
|
||
- jscode = func_code or self._load_player(video_id, player_url)
|
||
- jsi = JSInterpreter(jscode)
|
||
-
|
||
- if func_code:
|
||
- return jsi, player_id, func_code
|
||
-
|
||
- func_name = self._extract_n_function_name(jscode, player_url=player_url)
|
||
-
|
||
- # XXX: Work around (a) global array variable, (b) `typeof` short-circuit, (c) global functions
|
||
- func_code = self._fixup_n_function_code(*jsi.extract_function_code(func_name), jscode, player_url)
|
||
-
|
||
- return jsi, player_id, func_code
|
||
-
|
||
- def _extract_n_function_from_code(self, jsi, func_code):
|
||
- func = jsi.extract_function_from_code(*func_code)
|
||
-
|
||
- def extract_nsig(s):
|
||
- try:
|
||
- ret = func([s])
|
||
- except JSInterpreter.Exception:
|
||
- raise
|
||
- except Exception as e:
|
||
- raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
|
||
-
|
||
- if ret.startswith('enhanced_except_') or ret.endswith(s):
|
||
- raise JSInterpreter.Exception('Signature function returned an exception')
|
||
- return ret
|
||
-
|
||
- return extract_nsig
|
||
-
|
||
def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
|
||
"""
|
||
Extract signatureTimestamp (sts)
|
||
@@ -3226,12 +2975,12 @@
|
||
sd[STREAMING_DATA_INNERTUBE_CONTEXT] = innertube_context
|
||
sd[STREAMING_DATA_FETCH_SUBS_PO_TOKEN] = fetch_subs_po_token_func
|
||
sd[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER] = is_premium_subscriber
|
||
+ sd[STREAMING_DATA_FETCHED_TIMESTAMP] = fetched_timestamp
|
||
for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
|
||
f[STREAMING_DATA_CLIENT_NAME] = client
|
||
f[STREAMING_DATA_FETCH_GVS_PO_TOKEN] = fetch_gvs_po_token_func
|
||
f[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER] = is_premium_subscriber
|
||
f[STREAMING_DATA_PLAYER_TOKEN_PROVIDED] = bool(player_po_token)
|
||
- f[STREAMING_DATA_FETCHED_TIMESTAMP] = fetched_timestamp
|
||
if deprioritize_pr:
|
||
deprioritized_prs.append(pr)
|
||
else:
|
||
@@ -3311,12 +3060,13 @@
|
||
else:
|
||
self.report_warning(msg, only_once=True)
|
||
|
||
- def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
|
||
+ def _extract_formats_and_subtitles(self, video_id, player_responses, player_url, live_status, duration):
|
||
CHUNK_SIZE = 10 << 20
|
||
PREFERRED_LANG_VALUE = 10
|
||
original_language = None
|
||
itags, stream_ids = collections.defaultdict(set), []
|
||
itag_qualities, res_qualities = {}, {0: None}
|
||
+ subtitles = {}
|
||
q = qualities([
|
||
# Normally tiny is the smallest video-only formats. But
|
||
# audio-only formats with unknown quality may get tagged as tiny
|
||
@@ -3324,7 +3074,6 @@
|
||
'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
|
||
'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres',
|
||
])
|
||
- streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...))
|
||
format_types = self._configuration_arg('formats')
|
||
all_formats = 'duplicate' in format_types
|
||
if self._configuration_arg('include_duplicate_formats'):
|
||
@@ -3332,6 +3081,9 @@
|
||
self._downloader.deprecated_feature('[youtube] include_duplicate_formats extractor argument is deprecated. '
|
||
'Use formats=duplicate extractor argument instead')
|
||
|
||
+ def solve_sig(s, spec):
|
||
+ return ''.join(s[i] for i in spec)
|
||
+
|
||
def build_fragments(f):
|
||
return LazyList({
|
||
'url': update_url_query(f['url'], {
|
||
@@ -3351,279 +3103,363 @@
|
||
# For handling potential pre-playback required waiting period
|
||
playback_wait = int_or_none(self._configuration_arg('playback_wait', [None])[0], default=6)
|
||
|
||
- for fmt in streaming_formats:
|
||
- client_name = fmt[STREAMING_DATA_CLIENT_NAME]
|
||
- available_at = fmt[STREAMING_DATA_FETCHED_TIMESTAMP] + playback_wait
|
||
- if fmt.get('targetDurationSec'):
|
||
+ for pr in player_responses:
|
||
+ streaming_data = traverse_obj(pr, 'streamingData')
|
||
+ if not streaming_data:
|
||
continue
|
||
|
||
- itag = str_or_none(fmt.get('itag'))
|
||
- audio_track = fmt.get('audioTrack') or {}
|
||
- stream_id = (itag, audio_track.get('id'), fmt.get('isDrc'))
|
||
- if not all_formats:
|
||
- if stream_id in stream_ids:
|
||
- continue
|
||
-
|
||
- quality = fmt.get('quality')
|
||
- height = int_or_none(fmt.get('height'))
|
||
- if quality == 'tiny' or not quality:
|
||
- quality = fmt.get('audioQuality', '').lower() or quality
|
||
- # The 3gp format (17) in android client has a quality of "small",
|
||
- # but is actually worse than other formats
|
||
- if itag == '17':
|
||
- quality = 'tiny'
|
||
- if quality:
|
||
- if itag:
|
||
- itag_qualities[itag] = quality
|
||
- if height:
|
||
- res_qualities[height] = quality
|
||
-
|
||
- display_name = audio_track.get('displayName') or ''
|
||
- is_original = 'original' in display_name.lower()
|
||
- is_descriptive = 'descriptive' in display_name.lower()
|
||
- is_default = audio_track.get('audioIsDefault')
|
||
- language_code = audio_track.get('id', '').split('.')[0]
|
||
- if language_code and (is_original or (is_default and not original_language)):
|
||
- original_language = language_code
|
||
-
|
||
- has_drm = bool(fmt.get('drmFamilies'))
|
||
-
|
||
- # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
|
||
- # (adding `&sq=0` to the URL) and parsing emsg box to determine the
|
||
- # number of fragment that would subsequently requested with (`&sq=N`)
|
||
- if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF' and not has_drm:
|
||
- continue
|
||
-
|
||
- if has_drm:
|
||
- msg = f'Some {client_name} client https formats have been skipped as they are DRM protected. '
|
||
- if client_name == 'tv':
|
||
- msg += (
|
||
- f'{"Your account" if self.is_authenticated else "The current session"} may have '
|
||
- f'an experiment that applies DRM to all videos on the tv client. '
|
||
- f'See https://github.com/yt-dlp/yt-dlp/issues/12563 for more details.'
|
||
- )
|
||
- self.report_warning(msg, video_id, only_once=True)
|
||
-
|
||
- fmt_url = fmt.get('url')
|
||
- if not fmt_url:
|
||
- sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
|
||
- fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
|
||
- encrypted_sig = try_get(sc, lambda x: x['s'][0])
|
||
- if not all((sc, fmt_url, player_url, encrypted_sig)):
|
||
- msg = f'Some {client_name} client https formats have been skipped as they are missing a url. '
|
||
- if client_name in ('web', 'web_safari'):
|
||
- msg += 'YouTube is forcing SABR streaming for this client. '
|
||
- else:
|
||
+ fetch_po_token_func = streaming_data[STREAMING_DATA_FETCH_GVS_PO_TOKEN]
|
||
+ is_premium_subscriber = streaming_data[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER]
|
||
+ player_token_provided = streaming_data[STREAMING_DATA_PLAYER_TOKEN_PROVIDED]
|
||
+ client_name = streaming_data.get(STREAMING_DATA_CLIENT_NAME)
|
||
+ available_at = streaming_data[STREAMING_DATA_FETCHED_TIMESTAMP] + playback_wait
|
||
+ streaming_formats = traverse_obj(streaming_data, (('formats', 'adaptiveFormats'), ...))
|
||
+
|
||
+ def get_stream_id(fmt_stream):
|
||
+ return str_or_none(fmt_stream.get('itag')), traverse_obj(fmt_stream, 'audioTrack', 'id'), fmt_stream.get('isDrc')
|
||
+
|
||
+ def process_format_stream(fmt_stream, proto, missing_pot):
|
||
+ nonlocal original_language
|
||
+ itag = str_or_none(fmt_stream.get('itag'))
|
||
+ audio_track = fmt_stream.get('audioTrack') or {}
|
||
+ quality = fmt_stream.get('quality')
|
||
+ height = int_or_none(fmt_stream.get('height'))
|
||
+ if quality == 'tiny' or not quality:
|
||
+ quality = fmt_stream.get('audioQuality', '').lower() or quality
|
||
+ # The 3gp format (17) in android client has a quality of "small",
|
||
+ # but is actually worse than other formats
|
||
+ if itag == '17':
|
||
+ quality = 'tiny'
|
||
+ if quality:
|
||
+ if itag:
|
||
+ itag_qualities[itag] = quality
|
||
+ if height:
|
||
+ res_qualities[height] = quality
|
||
+
|
||
+ display_name = audio_track.get('displayName') or ''
|
||
+ is_original = 'original' in display_name.lower()
|
||
+ is_descriptive = 'descriptive' in display_name.lower()
|
||
+ is_default = audio_track.get('audioIsDefault')
|
||
+ language_code = audio_track.get('id', '').split('.')[0]
|
||
+ if language_code and (is_original or (is_default and not original_language)):
|
||
+ original_language = language_code
|
||
+
|
||
+ has_drm = bool(fmt_stream.get('drmFamilies'))
|
||
+
|
||
+ if has_drm:
|
||
+ msg = f'Some {client_name} client {proto} formats have been skipped as they are DRM protected. '
|
||
+ if client_name == 'tv':
|
||
msg += (
|
||
- f'YouTube may have enabled the SABR-only or Server-Side Ad Placement experiment for '
|
||
- f'{"your account" if self.is_authenticated else "the current session"}. '
|
||
+ f'{"Your account" if self.is_authenticated else "The current session"} may have '
|
||
+ f'an experiment that applies DRM to all videos on the tv client. '
|
||
+ f'See https://github.com/yt-dlp/yt-dlp/issues/12563 for more details.'
|
||
)
|
||
- msg += 'See https://github.com/yt-dlp/yt-dlp/issues/12482 for more details'
|
||
self.report_warning(msg, video_id, only_once=True)
|
||
- continue
|
||
- try:
|
||
- fmt_url += '&{}={}'.format(
|
||
- traverse_obj(sc, ('sp', -1)) or 'signature',
|
||
- self._decrypt_signature(encrypted_sig, video_id, player_url),
|
||
- )
|
||
- except ExtractorError as e:
|
||
- self.report_warning(
|
||
- f'Signature extraction failed: Some formats may be missing\n'
|
||
- f' player = {player_url}\n'
|
||
- f' {bug_reports_message(before="")}',
|
||
- video_id=video_id, only_once=True)
|
||
- self.write_debug(
|
||
- f'{video_id}: Signature extraction failure info:\n'
|
||
- f' encrypted sig = {encrypted_sig}\n'
|
||
- f' player = {player_url}')
|
||
- self.write_debug(e, only_once=True)
|
||
- continue
|
||
|
||
- query = parse_qs(fmt_url)
|
||
- if query.get('n'):
|
||
- try:
|
||
- decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
|
||
- fmt_url = update_url_query(fmt_url, {
|
||
- 'n': decrypt_nsig(query['n'][0], video_id, player_url),
|
||
- })
|
||
- except ExtractorError as e:
|
||
- if player_url:
|
||
- self.report_warning(
|
||
- f'nsig extraction failed: Some formats may be missing\n'
|
||
- f' n = {query["n"][0]} ; player = {player_url}\n'
|
||
- f' {bug_reports_message(before="")}',
|
||
- video_id=video_id, only_once=True)
|
||
- self.write_debug(e, only_once=True)
|
||
- else:
|
||
- self.report_warning(
|
||
- 'Cannot decrypt nsig without player_url: Some formats may be missing',
|
||
- video_id=video_id, only_once=True)
|
||
- continue
|
||
+ tbr = float_or_none(fmt_stream.get('averageBitrate') or fmt_stream.get('bitrate'), 1000)
|
||
+ format_duration = traverse_obj(fmt_stream, ('approxDurationMs', {float_or_none(scale=1000)}))
|
||
+ # Some formats may have much smaller duration than others (possibly damaged during encoding)
|
||
+ # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
|
||
+ # Make sure to avoid false positives with small duration differences.
|
||
+ # E.g. __2ABJjxzNo, ySuUZEjARPY
|
||
+ is_damaged = try_call(lambda: format_duration < duration // 2)
|
||
+ if is_damaged:
|
||
+ self.report_warning(
|
||
+ f'Some {client_name} client {proto} formats are possibly damaged. They will be deprioritized', video_id, only_once=True)
|
||
|
||
- tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
|
||
- format_duration = traverse_obj(fmt, ('approxDurationMs', {float_or_none(scale=1000)}))
|
||
- # Some formats may have much smaller duration than others (possibly damaged during encoding)
|
||
- # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
|
||
- # Make sure to avoid false positives with small duration differences.
|
||
- # E.g. __2ABJjxzNo, ySuUZEjARPY
|
||
- is_damaged = try_call(lambda: format_duration < duration // 2)
|
||
- if is_damaged:
|
||
- self.report_warning(
|
||
- 'Some formats are possibly damaged. They will be deprioritized', video_id, only_once=True)
|
||
+ if missing_pot and 'missing_pot' not in self._configuration_arg('formats'):
|
||
+ self._report_pot_format_skipped(video_id, client_name, proto)
|
||
+ return None
|
||
+
|
||
+ name = fmt_stream.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
|
||
+ fps = int_or_none(fmt_stream.get('fps')) or 0
|
||
+ dct = {
|
||
+ 'asr': int_or_none(fmt_stream.get('audioSampleRate')),
|
||
+ 'filesize': int_or_none(fmt_stream.get('contentLength')),
|
||
+ 'format_id': f'{itag}{"-drc" if fmt_stream.get("isDrc") else ""}',
|
||
+ 'format_note': join_nonempty(
|
||
+ join_nonempty(display_name, is_default and ' (default)', delim=''),
|
||
+ name, fmt_stream.get('isDrc') and 'DRC',
|
||
+ try_get(fmt_stream, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
|
||
+ try_get(fmt_stream, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
|
||
+ is_damaged and 'DAMAGED', missing_pot and 'MISSING POT',
|
||
+ (self.get_param('verbose') or all_formats) and short_client_name(client_name),
|
||
+ delim=', '),
|
||
+ # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
|
||
+ 'source_preference': (-5 if itag == '22' else -1) + (100 if 'Premium' in name else 0),
|
||
+ 'fps': fps if fps > 1 else None, # For some formats, fps is wrongly returned as 1
|
||
+ 'audio_channels': fmt_stream.get('audioChannels'),
|
||
+ 'height': height,
|
||
+ 'quality': q(quality) - bool(fmt_stream.get('isDrc')) / 2,
|
||
+ 'has_drm': has_drm,
|
||
+ 'tbr': tbr,
|
||
+ 'filesize_approx': filesize_from_tbr(tbr, format_duration),
|
||
+ 'width': int_or_none(fmt_stream.get('width')),
|
||
+ 'language': join_nonempty(language_code, 'desc' if is_descriptive else '') or None,
|
||
+ 'language_preference': PREFERRED_LANG_VALUE if is_original else 5 if is_default else -10 if is_descriptive else -1,
|
||
+ # Strictly de-prioritize damaged and 3gp formats
|
||
+ 'preference': -10 if is_damaged else -2 if itag == '17' else None,
|
||
+ }
|
||
+ mime_mobj = re.match(
|
||
+ r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt_stream.get('mimeType') or '')
|
||
+ if mime_mobj:
|
||
+ dct['ext'] = mimetype2ext(mime_mobj.group(1))
|
||
+ dct.update(parse_codecs(mime_mobj.group(2)))
|
||
+
|
||
+ single_stream = 'none' in (dct.get('acodec'), dct.get('vcodec'))
|
||
+ if single_stream and dct.get('ext'):
|
||
+ dct['container'] = dct['ext'] + '_dash'
|
||
+
|
||
+ return dct
|
||
+
|
||
+ def process_https_formats():
|
||
+ proto = 'https'
|
||
+ https_fmts = []
|
||
+ for fmt_stream in streaming_formats:
|
||
+ if fmt_stream.get('targetDurationSec'):
|
||
+ continue
|
||
|
||
- fetch_po_token_func = fmt[STREAMING_DATA_FETCH_GVS_PO_TOKEN]
|
||
- pot_policy: GvsPoTokenPolicy = self._get_default_ytcfg(client_name)['GVS_PO_TOKEN_POLICY'][StreamingProtocol.HTTPS]
|
||
+ # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
|
||
+ # (adding `&sq=0` to the URL) and parsing emsg box to determine the
|
||
+ # number of fragment that would subsequently requested with (`&sq=N`)
|
||
+ if fmt_stream.get('type') == 'FORMAT_STREAM_TYPE_OTF' and not bool(fmt_stream.get('drmFamilies')):
|
||
+ continue
|
||
+ stream_id = get_stream_id(fmt_stream)
|
||
+ if not all_formats:
|
||
+ if stream_id in stream_ids:
|
||
+ continue
|
||
+
|
||
+ pot_policy: GvsPoTokenPolicy = self._get_default_ytcfg(client_name)['GVS_PO_TOKEN_POLICY'][StreamingProtocol.HTTPS]
|
||
+
|
||
+ require_po_token = (
|
||
+ stream_id[0] not in ['18']
|
||
+ and gvs_pot_required(pot_policy, is_premium_subscriber, player_token_provided))
|
||
+
|
||
+ po_token = (
|
||
+ gvs_pots.get(client_name)
|
||
+ or fetch_po_token_func(required=require_po_token or pot_policy.recommended))
|
||
+ if po_token:
|
||
+ if client_name not in gvs_pots:
|
||
+ gvs_pots[client_name] = po_token
|
||
+
|
||
+ fmt_url = fmt_stream.get('url')
|
||
+ encrypted_sig, sc = None, None
|
||
+ if not fmt_url:
|
||
+ sc = urllib.parse.parse_qs(fmt_stream.get('signatureCipher'))
|
||
+ fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
|
||
+ encrypted_sig = try_get(sc, lambda x: x['s'][0])
|
||
+ if not all((sc, fmt_url, player_url, encrypted_sig)):
|
||
+ msg = f'Some {client_name} client https formats have been skipped as they are missing a url. '
|
||
+ if client_name in ('web', 'web_safari'):
|
||
+ msg += 'YouTube is forcing SABR streaming for this client. '
|
||
+ else:
|
||
+ msg += (
|
||
+ f'YouTube may have enabled the SABR-only or Server-Side Ad Placement experiment for '
|
||
+ f'{"your account" if self.is_authenticated else "the current session"}. '
|
||
+ )
|
||
+ msg += 'See https://github.com/yt-dlp/yt-dlp/issues/12482 for more details'
|
||
+ self.report_warning(msg, video_id, only_once=True)
|
||
+ continue
|
||
|
||
- require_po_token = (
|
||
- itag not in ['18']
|
||
- and gvs_pot_required(
|
||
- pot_policy, fmt[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER],
|
||
- fmt[STREAMING_DATA_PLAYER_TOKEN_PROVIDED]))
|
||
-
|
||
- po_token = (
|
||
- gvs_pots.get(client_name)
|
||
- or fetch_po_token_func(required=require_po_token or pot_policy.recommended))
|
||
-
|
||
- if po_token:
|
||
- fmt_url = update_url_query(fmt_url, {'pot': po_token})
|
||
- if client_name not in gvs_pots:
|
||
- gvs_pots[client_name] = po_token
|
||
+ fmt = process_format_stream(fmt_stream, proto, missing_pot=require_po_token and not po_token)
|
||
+ if not fmt:
|
||
+ continue
|
||
|
||
- if not po_token and require_po_token and 'missing_pot' not in self._configuration_arg('formats'):
|
||
- self._report_pot_format_skipped(video_id, client_name, 'https')
|
||
- continue
|
||
+ # signature
|
||
+ # Attempt to load sig spec from cache
|
||
+ if encrypted_sig:
|
||
+ spec_cache_id = self._sig_spec_cache_id(player_url, len(encrypted_sig))
|
||
+ spec = self._load_sig_spec_from_cache(spec_cache_id)
|
||
+ if spec:
|
||
+ self.write_debug(f'Using cached signature function {spec_cache_id}', only_once=True)
|
||
+ fmt_url += '&{}={}'.format(traverse_obj(sc, ('sp', -1)) or 'signature',
|
||
+ solve_sig(encrypted_sig, spec))
|
||
+ else:
|
||
+ fmt['_jsc_s_challenge'] = encrypted_sig
|
||
+ fmt['_jsc_s_sc'] = sc
|
||
+
|
||
+ # nsig
|
||
+ query = parse_qs(fmt_url)
|
||
+ if query.get('n'):
|
||
+ n_challenge = query['n'][0]
|
||
+
|
||
+ if n_challenge in self._player_cache:
|
||
+ fmt_url = update_url_query(fmt_url, {'n': self._player_cache[n_challenge]})
|
||
+ else:
|
||
+ fmt['_jsc_n_challenge'] = n_challenge
|
||
+
|
||
+ if po_token:
|
||
+ fmt_url = update_url_query(fmt_url, {'pot': po_token})
|
||
+
|
||
+ fmt['url'] = fmt_url
|
||
+
|
||
+ if stream_id[0]:
|
||
+ itags[stream_id[0]].add((proto, fmt.get('language')))
|
||
+ stream_ids.append(stream_id)
|
||
+
|
||
+ # For handling potential pre-playback required waiting period
|
||
+ if live_status not in ('is_live', 'post_live'):
|
||
+ fmt['available_at'] = available_at
|
||
+
|
||
+ if (all_formats or 'dashy' in format_types) and fmt['filesize']:
|
||
+ https_fmts.append({
|
||
+ **fmt,
|
||
+ 'format_id': f'{fmt["format_id"]}-dashy' if all_formats else fmt['format_id'],
|
||
+ 'protocol': 'http_dash_segments',
|
||
+ 'fragments': build_fragments(fmt),
|
||
+ })
|
||
+ if all_formats or 'dashy' not in format_types:
|
||
+ fmt['downloader_options'] = {'http_chunk_size': CHUNK_SIZE}
|
||
+ https_fmts.append(fmt)
|
||
+
|
||
+ # Bulk process sig/nsig handling
|
||
+ # Retrieve all JSC Sig and Nsig requests for this player response in one go
|
||
+ n_challenges = {}
|
||
+ s_challenges = {}
|
||
+ for fmt in https_fmts:
|
||
+ # This will de-duplicate requests
|
||
+ n_challenge = fmt.pop('_jsc_n_challenge', None)
|
||
+ if n_challenge is not None:
|
||
+ n_challenges.setdefault(n_challenge, []).append(fmt)
|
||
+
|
||
+ s_challenge = fmt.pop('_jsc_s_challenge', None)
|
||
+ if s_challenge is not None:
|
||
+ s_challenges.setdefault(len(s_challenge), {}).setdefault(s_challenge, []).append(fmt)
|
||
+
|
||
+ challenge_requests = []
|
||
+ if n_challenges:
|
||
+ challenge_requests.append(JsChallengeRequest(
|
||
+ type=JsChallengeType.N,
|
||
+ video_id=video_id,
|
||
+ input=NChallengeInput(challenges=list(n_challenges.keys()), player_url=player_url)))
|
||
+ if s_challenges:
|
||
+ challenge_requests.append(JsChallengeRequest(
|
||
+ type=JsChallengeType.SIG,
|
||
+ video_id=video_id,
|
||
+ input=SigChallengeInput(challenges=[''.join(map(chr, range(spec_id))) for spec_id in s_challenges], player_url=player_url)))
|
||
+
|
||
+ if challenge_requests:
|
||
+ for _challenge_request, challenge_response in self._jsc_director.bulk_solve(challenge_requests):
|
||
+ if challenge_response.type == JsChallengeType.SIG:
|
||
+ for challenge, result in challenge_response.output.results.items():
|
||
+ spec_id = len(challenge)
|
||
+ spec = [ord(c) for c in result]
|
||
+ self._store_sig_spec_to_cache(self._sig_spec_cache_id(player_url, spec_id), spec)
|
||
+ s_challenge_data = s_challenges.pop(spec_id, {})
|
||
+ if not s_challenge_data:
|
||
+ continue
|
||
+ for s_challenge, fmts in s_challenge_data.items():
|
||
+ solved_challenge = solve_sig(s_challenge, spec)
|
||
+ for fmt in fmts:
|
||
+ sc = fmt.pop('_jsc_s_sc')
|
||
+ fmt['url'] += '&{}={}'.format(
|
||
+ traverse_obj(sc, ('sp', -1)) or 'signature',
|
||
+ solved_challenge)
|
||
+
|
||
+ elif challenge_response.type == JsChallengeType.N:
|
||
+ for challenge, result in challenge_response.output.results.items():
|
||
+ fmts = n_challenges.pop(challenge, [])
|
||
+ for fmt in fmts:
|
||
+ self._player_cache[challenge] = result
|
||
+ fmt['url'] = update_url_query(fmt['url'], {'n': result})
|
||
+
|
||
+ # Raise warning if any challenge requests remain
|
||
+ # Depending on type of challenge request
|
||
+ # TODO: this could happen as there are no supported JSC Providers
|
||
+ # TODO: cleanup
|
||
|
||
- name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
|
||
- fps = int_or_none(fmt.get('fps')) or 0
|
||
- dct = {
|
||
- 'asr': int_or_none(fmt.get('audioSampleRate')),
|
||
- 'filesize': int_or_none(fmt.get('contentLength')),
|
||
- 'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}',
|
||
- 'format_note': join_nonempty(
|
||
- join_nonempty(display_name, is_default and ' (default)', delim=''),
|
||
- name, fmt.get('isDrc') and 'DRC',
|
||
- try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
|
||
- try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
|
||
- is_damaged and 'DAMAGED', require_po_token and not po_token and 'MISSING POT',
|
||
- (self.get_param('verbose') or all_formats) and short_client_name(client_name),
|
||
- delim=', '),
|
||
- # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
|
||
- 'source_preference': (-5 if itag == '22' else -1) + (100 if 'Premium' in name else 0),
|
||
- 'fps': fps if fps > 1 else None, # For some formats, fps is wrongly returned as 1
|
||
- 'audio_channels': fmt.get('audioChannels'),
|
||
- 'height': height,
|
||
- 'quality': q(quality) - bool(fmt.get('isDrc')) / 2,
|
||
- 'has_drm': has_drm,
|
||
- 'tbr': tbr,
|
||
- 'filesize_approx': filesize_from_tbr(tbr, format_duration),
|
||
- 'url': fmt_url,
|
||
- 'width': int_or_none(fmt.get('width')),
|
||
- 'language': join_nonempty(language_code, 'desc' if is_descriptive else '') or None,
|
||
- 'language_preference': PREFERRED_LANG_VALUE if is_original else 5 if is_default else -10 if is_descriptive else -1,
|
||
- # Strictly de-prioritize damaged and 3gp formats
|
||
- 'preference': -10 if is_damaged else -2 if itag == '17' else None,
|
||
- }
|
||
- mime_mobj = re.match(
|
||
- r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
|
||
- if mime_mobj:
|
||
- dct['ext'] = mimetype2ext(mime_mobj.group(1))
|
||
- dct.update(parse_codecs(mime_mobj.group(2)))
|
||
- if itag:
|
||
- itags[itag].add(('https', dct.get('language')))
|
||
- stream_ids.append(stream_id)
|
||
- single_stream = 'none' in (dct.get('acodec'), dct.get('vcodec'))
|
||
- if single_stream and dct.get('ext'):
|
||
- dct['container'] = dct['ext'] + '_dash'
|
||
-
|
||
- # For handling potential pre-playback required waiting period
|
||
- if live_status not in ('is_live', 'post_live'):
|
||
- dct['available_at'] = available_at
|
||
-
|
||
- if (all_formats or 'dashy' in format_types) and dct['filesize']:
|
||
- yield {
|
||
- **dct,
|
||
- 'format_id': f'{dct["format_id"]}-dashy' if all_formats else dct['format_id'],
|
||
- 'protocol': 'http_dash_segments',
|
||
- 'fragments': build_fragments(dct),
|
||
- }
|
||
- if all_formats or 'dashy' not in format_types:
|
||
- dct['downloader_options'] = {'http_chunk_size': CHUNK_SIZE}
|
||
- yield dct
|
||
-
|
||
- needs_live_processing = self._needs_live_processing(live_status, duration)
|
||
- skip_bad_formats = 'incomplete' not in format_types
|
||
-
|
||
- skip_manifests = set(self._configuration_arg('skip'))
|
||
- if (needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway
|
||
- or (needs_live_processing and skip_bad_formats)):
|
||
- skip_manifests.add('hls')
|
||
- if skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
|
||
- skip_manifests.add('dash')
|
||
-
|
||
- def process_manifest_format(f, proto, client_name, itag, missing_pot):
|
||
- key = (proto, f.get('language'))
|
||
- if not all_formats and key in itags[itag]:
|
||
- return False
|
||
-
|
||
- # For handling potential pre-playback required waiting period
|
||
- if live_status not in ('is_live', 'post_live'):
|
||
- f['available_at'] = available_at
|
||
-
|
||
- if f.get('source_preference') is None:
|
||
- f['source_preference'] = -1
|
||
-
|
||
- # Deprioritize since its pre-merged m3u8 formats may have lower quality audio streams
|
||
- if client_name == 'web_safari' and proto == 'hls' and live_status != 'is_live':
|
||
- f['source_preference'] -= 1
|
||
-
|
||
- if missing_pot:
|
||
- f['format_note'] = join_nonempty(f.get('format_note'), 'MISSING POT', delim=' ')
|
||
- f['source_preference'] -= 20
|
||
-
|
||
- itags[itag].add(key)
|
||
-
|
||
- if itag and all_formats:
|
||
- f['format_id'] = f'{itag}-{proto}'
|
||
- elif any(p != proto for p, _ in itags[itag]):
|
||
- f['format_id'] = f'{itag}-{proto}'
|
||
- elif itag:
|
||
- f['format_id'] = itag
|
||
-
|
||
- if original_language and f.get('language') == original_language:
|
||
- f['format_note'] = join_nonempty(f.get('format_note'), '(default)', delim=' ')
|
||
- f['language_preference'] = PREFERRED_LANG_VALUE
|
||
-
|
||
- if itag in ('616', '235'):
|
||
- f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ')
|
||
- f['source_preference'] += 100
|
||
-
|
||
- f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
|
||
- if f['quality'] == -1 and f.get('height'):
|
||
- f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
|
||
- if self.get_param('verbose') or all_formats:
|
||
- f['format_note'] = join_nonempty(
|
||
- f.get('format_note'), short_client_name(client_name), delim=', ')
|
||
- if f.get('fps') and f['fps'] <= 1:
|
||
- del f['fps']
|
||
-
|
||
- if proto == 'hls' and f.get('has_drm'):
|
||
- f['has_drm'] = 'maybe'
|
||
- f['source_preference'] -= 5
|
||
- return True
|
||
+ if s_challenges:
|
||
+ self.report_warning(
|
||
+ 'Signature extraction failed: Some formats may be missing',
|
||
+ video_id=video_id, only_once=True)
|
||
+ if n_challenges:
|
||
+ self.report_warning(
|
||
+ 'nsig extraction failed: Some formats may be missing',
|
||
+ video_id=video_id, only_once=True)
|
||
|
||
- subtitles = {}
|
||
- for sd in streaming_data:
|
||
- client_name = sd[STREAMING_DATA_CLIENT_NAME]
|
||
- fetch_pot_func = sd[STREAMING_DATA_FETCH_GVS_PO_TOKEN]
|
||
- is_premium_subscriber = sd[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER]
|
||
- has_player_token = sd[STREAMING_DATA_PLAYER_TOKEN_PROVIDED]
|
||
+ for cfmts in list(s_challenges.values()) + list(n_challenges.values()):
|
||
+ for fmt in cfmts:
|
||
+ if fmt in https_fmts:
|
||
+ https_fmts.remove(fmt)
|
||
+
|
||
+ yield from https_fmts
|
||
+
|
||
+ yield from process_https_formats()
|
||
+
|
||
+ needs_live_processing = self._needs_live_processing(live_status, duration)
|
||
+ skip_bad_formats = 'incomplete' not in format_types
|
||
+ if self._configuration_arg('include_incomplete_formats'):
|
||
+ skip_bad_formats = False
|
||
+ self._downloader.deprecated_feature('[youtube] include_incomplete_formats extractor argument is deprecated. '
|
||
+ 'Use formats=incomplete extractor argument instead')
|
||
+
|
||
+ skip_manifests = set(self._configuration_arg('skip'))
|
||
+ if (not self.get_param('youtube_include_hls_manifest', True)
|
||
+ or needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway
|
||
+ or (needs_live_processing and skip_bad_formats)):
|
||
+ skip_manifests.add('hls')
|
||
+
|
||
+ if not self.get_param('youtube_include_dash_manifest', True):
|
||
+ skip_manifests.add('dash')
|
||
+ if self._configuration_arg('include_live_dash'):
|
||
+ self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '
|
||
+ 'Use formats=incomplete extractor argument instead')
|
||
+ elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
|
||
+ skip_manifests.add('dash')
|
||
+
|
||
+ def process_manifest_format(f, proto, client_name, itag, missing_pot):
|
||
+ key = (proto, f.get('language'))
|
||
+ if not all_formats and key in itags[itag]:
|
||
+ return False
|
||
+
|
||
+ # For handling potential pre-playback required waiting period
|
||
+ if live_status not in ('is_live', 'post_live'):
|
||
+ f['available_at'] = available_at
|
||
+
|
||
+ if f.get('source_preference') is None:
|
||
+ f['source_preference'] = -1
|
||
+
|
||
+ if missing_pot:
|
||
+ f['format_note'] = join_nonempty(f.get('format_note'), 'MISSING POT', delim=' ')
|
||
+ f['source_preference'] -= 20
|
||
+
|
||
+ itags[itag].add(key)
|
||
+
|
||
+ if itag and all_formats:
|
||
+ f['format_id'] = f'{itag}-{proto}'
|
||
+ elif any(p != proto for p, _ in itags[itag]):
|
||
+ f['format_id'] = f'{itag}-{proto}'
|
||
+ elif itag:
|
||
+ f['format_id'] = itag
|
||
+
|
||
+ if original_language and f.get('language') == original_language:
|
||
+ f['format_note'] = join_nonempty(f.get('format_note'), '(default)', delim=' ')
|
||
+ f['language_preference'] = PREFERRED_LANG_VALUE
|
||
+
|
||
+ if itag in ('616', '235'):
|
||
+ f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ')
|
||
+ f['source_preference'] += 100
|
||
+
|
||
+ f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
|
||
+ if f['quality'] == -1 and f.get('height'):
|
||
+ f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
|
||
+ if self.get_param('verbose') or all_formats:
|
||
+ f['format_note'] = join_nonempty(
|
||
+ f.get('format_note'), short_client_name(client_name), delim=', ')
|
||
+ if f.get('fps') and f['fps'] <= 1:
|
||
+ del f['fps']
|
||
+
|
||
+ if proto == 'hls' and f.get('has_drm'):
|
||
+ f['has_drm'] = 'maybe'
|
||
+ f['source_preference'] -= 5
|
||
+ return True
|
||
|
||
- hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')
|
||
+ hls_manifest_url = 'hls' not in skip_manifests and streaming_data.get('hlsManifestUrl')
|
||
if hls_manifest_url:
|
||
pot_policy: GvsPoTokenPolicy = self._get_default_ytcfg(
|
||
client_name)['GVS_PO_TOKEN_POLICY'][StreamingProtocol.HLS]
|
||
- require_po_token = gvs_pot_required(pot_policy, is_premium_subscriber, has_player_token)
|
||
- po_token = gvs_pots.get(client_name, fetch_pot_func(required=require_po_token or pot_policy.recommended))
|
||
+ require_po_token = gvs_pot_required(pot_policy, is_premium_subscriber, player_token_provided)
|
||
+ po_token = gvs_pots.get(client_name, fetch_po_token_func(required=require_po_token or pot_policy.recommended))
|
||
if po_token:
|
||
hls_manifest_url = hls_manifest_url.rstrip('/') + f'/pot/{po_token}'
|
||
if client_name not in gvs_pots:
|
||
@@ -3643,12 +3479,12 @@
|
||
r'/itag/(\d+)', f['url'], 'itag', default=None), require_po_token and not po_token):
|
||
yield f
|
||
|
||
- dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')
|
||
+ dash_manifest_url = 'dash' not in skip_manifests and streaming_data.get('dashManifestUrl')
|
||
if dash_manifest_url:
|
||
pot_policy: GvsPoTokenPolicy = self._get_default_ytcfg(
|
||
client_name)['GVS_PO_TOKEN_POLICY'][StreamingProtocol.DASH]
|
||
- require_po_token = gvs_pot_required(pot_policy, is_premium_subscriber, has_player_token)
|
||
- po_token = gvs_pots.get(client_name, fetch_pot_func(required=require_po_token or pot_policy.recommended))
|
||
+ require_po_token = gvs_pot_required(pot_policy, is_premium_subscriber, player_token_provided)
|
||
+ po_token = gvs_pots.get(client_name, fetch_po_token_func(required=require_po_token or pot_policy.recommended))
|
||
if po_token:
|
||
dash_manifest_url = dash_manifest_url.rstrip('/') + f'/pot/{po_token}'
|
||
if client_name not in gvs_pots:
|
||
@@ -3668,7 +3504,6 @@
|
||
r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
|
||
if needs_live_processing:
|
||
f['is_from_start'] = True
|
||
-
|
||
yield f
|
||
yield subtitles
|
||
|
||
@@ -3741,14 +3576,13 @@
|
||
else 'was_live' if live_content
|
||
else 'not_live' if False in (is_live, live_content)
|
||
else None)
|
||
- streaming_data = traverse_obj(player_responses, (..., 'streamingData'))
|
||
- *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)
|
||
+ *formats, subtitles = self._extract_formats_and_subtitles(video_id, player_responses, player_url, live_status, duration)
|
||
if all(f.get('has_drm') for f in formats):
|
||
# If there are no formats that definitely don't have DRM, all have DRM
|
||
for f in formats:
|
||
f['has_drm'] = True
|
||
|
||
- return live_broadcast_details, live_status, streaming_data, formats, subtitles
|
||
+ return live_broadcast_details, live_status, formats, subtitles
|
||
|
||
def _download_initial_data(self, video_id, webpage, webpage_client, webpage_ytcfg):
|
||
initial_data = None
|
||
@@ -3908,8 +3742,9 @@
|
||
or int_or_none(get_first(microformats, 'lengthSeconds'))
|
||
or parse_duration(search_meta('duration')) or None)
|
||
|
||
- live_broadcast_details, live_status, streaming_data, formats, automatic_captions = \
|
||
+ live_broadcast_details, live_status, formats, automatic_captions = \
|
||
self._list_formats(video_id, microformats, video_details, player_responses, player_url, duration)
|
||
+ streaming_data = traverse_obj(player_responses, (..., 'streamingData'))
|
||
if live_status == 'post_live':
|
||
self.write_debug(f'{video_id}: Video is in Post-Live Manifestless mode')
|
||
|
||
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/README.md
|
||
===================================================================
|
||
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
||
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/README.md 2025-09-25 21:17:17.683261906 +0200
|
||
@@ -0,0 +1,129 @@
|
||
+# YoutubeIE JS Challenge Provider Framework
|
||
+
|
||
+As part of the YouTube extractor, we have a framework for solving JS Challenges programmatically (sig, nsig). This can be used by plugins.
|
||
+
|
||
+> [!TIP]
|
||
+> If publishing a JS Challenge Provider plugin to GitHub, add the [yt-dlp-jsc-provider](https://github.com/topics/yt-dlp-jsc-provider) topic to your repository to help users find it.
|
||
+
|
||
+
|
||
+## Public APIs
|
||
+
|
||
+- `yt_dlp.extractor.youtube.jsc.provider`
|
||
+- `yt_dlp.extractor.youtube.jsc.utils`
|
||
+
|
||
+Everything else is internal-only and no guarantees are made about the API stability.
|
||
+
|
||
+> [!WARNING]
|
||
+> We will try our best to maintain stability with the public APIs.
|
||
+> However, due to the nature of extractors and YouTube, we may need to remove or change APIs in the future.
|
||
+> If you are using these APIs outside yt-dlp plugins, please account for this by importing them safely.
|
||
+
|
||
+## JS Challenge Provider
|
||
+
|
||
+`yt_dlp.extractor.youtube.jsc.provider`
|
||
+
|
||
+```python
|
||
+from yt_dlp.extractor.youtube.jsc.provider import (
|
||
+ register_provider,
|
||
+ register_preference,
|
||
+ JsChallengeProvider,
|
||
+ JsChallengeRequest,
|
||
+ JsChallengeResponse,
|
||
+ JsChallengeProviderError,
|
||
+ JsChallengeProviderRejectedRequest,
|
||
+ JsChallengeType,
|
||
+ JsChallengeProviderResponse,
|
||
+ NChallengeOutput,
|
||
+)
|
||
+from yt_dlp.utils import traverse_obj, Popen
|
||
+import json
|
||
+import subprocess
|
||
+import typing
|
||
+
|
||
+@register_provider
|
||
+class MyJsChallengeProviderJSP(JsChallengeProvider): # Provider class name must end with "JSP"
|
||
+ PROVIDER_VERSION = '0.2.1'
|
||
+ # Define a unique display name for the provider
|
||
+ PROVIDER_NAME = 'my-provider'
|
||
+ BUG_REPORT_LOCATION = 'https://issues.example.com/report'
|
||
+
|
||
+ # Set supported challenge types.
|
||
+ # If None, the provider will handle all types.
|
||
+ _SUPPORTED_TYPES = [JsChallengeType.N]
|
||
+
|
||
+ def is_available(self) -> bool:
|
||
+ """
|
||
+ Check if the provider is available (e.g. all required dependencies are available)
|
||
+ This is used to determine if the provider should be used and to provide debug information.
|
||
+
|
||
+ IMPORTANT: This method SHOULD NOT make any network requests or perform any expensive operations.
|
||
+
|
||
+ Since this is called multiple times, we recommend caching the result.
|
||
+ """
|
||
+ return True
|
||
+
|
||
+ def close(self):
|
||
+ # Optional close hook, called when YoutubeDL is closed.
|
||
+ pass
|
||
+
|
||
+ def _real_bulk_solve(self, requests: list[JsChallengeRequest]) -> typing.Generator[JsChallengeProviderResponse, None, None]:
|
||
+ # ℹ️ If you need to do additional validation on the requests.
|
||
+ # Raise yt_dlp.extractor.youtube.jsc.provider.JsChallengeProviderRejectedRequest if the request is not supported.
|
||
+ if len("something") > 255:
|
||
+ raise JsChallengeProviderRejectedRequest('Challenges longer than 255 are not supported', expected=True)
|
||
+
|
||
+
|
||
+ # ℹ️ Settings are pulled from extractor args passed to yt-dlp with the key `youtubejs-<PROVIDER_KEY>`.
|
||
+ # For this example, the extractor arg would be:
|
||
+ # `--extractor-args "youtubejs-myjschallengeprovider:bin_path=/path/to/bin"`
|
||
+ bin_path = self._configuration_arg(
|
||
+ 'bin_path', default=['/path/to/bin'])[0]
|
||
+
|
||
+ # See below for logging guidelines
|
||
+ self.logger.trace(f'Using bin path: {bin_path}')
|
||
+
|
||
+ for request in requests:
|
||
+ # You can use the _get_player method to get the player JS code if needed.
|
||
+ # This shares the same caching as the YouTube extractor, so it will not make unnecessary requests.
|
||
+ player_js = self._get_player(request.video_id, request.input.player_url)
|
||
+ cmd = f'{bin_path} {request.input.challenges} {player_js}'
|
||
+ self.logger.info(f'Executing command: {cmd}')
|
||
+ stdout, _, ret = Popen.run(cmd, text=True, shell=True, stdout=subprocess.PIPE)
|
||
+ if ret != 0:
|
||
+ # ℹ️ If there is an error, raise JsChallengeProviderError.
|
||
+ # The request will be sent to the next provider if there is one.
|
||
+ # You can specify whether it is expected or not. If it is unexpected,
|
||
+ # the log will include a link to the bug report location (BUG_REPORT_LOCATION).
|
||
+
|
||
+ # raise JsChallengeProviderError(f'Command returned error code {ret}', expected=False)
|
||
+
|
||
+ # You can also only fail this specific request by returning a JsChallengeProviderResponse with the error.
|
||
+ # This will allow other requests to be processed by this provider.
|
||
+ yield JsChallengeProviderResponse(
|
||
+ request=request,
|
||
+ error=JsChallengeProviderError(f'Command returned error code {ret}', expected=False)
|
||
+ )
|
||
+
|
||
+ yield JsChallengeProviderResponse(
|
||
+ request=request,
|
||
+ response=JsChallengeResponse(
|
||
+ type=JsChallengeType.N,
|
||
+ output=NChallengeOutput(results=traverse_obj(json.loads(stdout))),
|
||
+ ))
|
||
+
|
||
+
|
||
+# If there are multiple JS Challenge Providers that can handle the same JsChallengeRequest(s),
|
||
+# you can define a preference function to increase/decrease the priority of providers.
|
||
+
|
||
+@register_preference(MyJsChallengeProviderJSP)
|
||
+def my_provider_preference(provider: JsChallengeProvider, requests: list[JsChallengeRequest]) -> int:
|
||
+ return 50
|
||
+```
|
||
+
|
||
+## Logging Guidelines
|
||
+
|
||
+todo
|
||
+
|
||
+## Debugging
|
||
+
|
||
+- Use `-v --extractor-args "youtube:jsc_trace=true"` to enable JS Challenge debug output.
|
||
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/__init__.py
|
||
===================================================================
|
||
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
||
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/__init__.py 2025-09-25 21:17:17.683390485 +0200
|
||
@@ -0,0 +1,5 @@
|
||
+# Trigger import of built-in providers
|
||
+from ._builtin.bun import BunJCP as _BunJCP # noqa: F401
|
||
+from ._builtin.deno import DenoJCP as _DenoJCP # noqa: F401
|
||
+from ._builtin.jsinterp import JsInterpJCP as _JsInterpJCP # noqa: F401
|
||
+from ._builtin.node import NodeJCP as _NodeJCP # noqa: F401
|
||
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/bun.py
|
||
===================================================================
|
||
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
||
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/bun.py 2025-09-25 21:17:17.683542881 +0200
|
||
@@ -0,0 +1,79 @@
|
||
+from __future__ import annotations
|
||
+
|
||
+import shlex
|
||
+import subprocess
|
||
+
|
||
+from yt_dlp.extractor.youtube.jsc._builtin.runtime import (
|
||
+ JsRuntimeChalBaseJCP,
|
||
+ Script,
|
||
+ ScriptSource,
|
||
+ ScriptType,
|
||
+ ScriptVariant,
|
||
+)
|
||
+from yt_dlp.extractor.youtube.jsc._builtin.scripts import load_script
|
||
+from yt_dlp.extractor.youtube.jsc.provider import (
|
||
+ JsChallengeProvider,
|
||
+ JsChallengeProviderError,
|
||
+ JsChallengeRequest,
|
||
+ register_preference,
|
||
+ register_provider,
|
||
+)
|
||
+from yt_dlp.extractor.youtube.pot._provider import BuiltinIEContentProvider
|
||
+from yt_dlp.extractor.youtube.pot.provider import provider_bug_report_message
|
||
+from yt_dlp.utils import Popen
|
||
+
|
||
+
|
||
+@register_provider
|
||
+class BunJCP(JsRuntimeChalBaseJCP, BuiltinIEContentProvider):
|
||
+ PROVIDER_NAME = 'bun'
|
||
+ JS_RUNTIME_NAME = 'bun'
|
||
+
|
||
+ _ARGS = ['--bun', 'run', '-']
|
||
+ BUN_NPM_LIB_FILENAME = 'bun.lib.js'
|
||
+
|
||
+ def _iter_script_sources(self):
|
||
+ for source, func in super()._iter_script_sources():
|
||
+ if source == ScriptSource.WEB:
|
||
+ yield ScriptSource.BUILTIN, self._bun_npm_source
|
||
+ yield source, func
|
||
+
|
||
+ def _bun_npm_source(self, script_type: ScriptType, /) -> Script | None:
|
||
+ if script_type != ScriptType.LIB:
|
||
+ return None
|
||
+ if 'npm' not in self.ie.get_param('download_ext_components', []):
|
||
+ self._report_ext_component_skipped('npm', 'NPM package')
|
||
+ return None
|
||
+
|
||
+ # Bun-specific lib scripts that uses Bun autoimport
|
||
+ # https://bun.com/docs/runtime/autoimport
|
||
+ error_hook = lambda e: self.logger.warning(
|
||
+ f'Failed to read bun challenge solver lib script: {e}{provider_bug_report_message(self)}')
|
||
+ code = load_script(
|
||
+ self.BUN_NPM_LIB_FILENAME, error_hook=error_hook)
|
||
+ if code:
|
||
+ return Script(script_type, ScriptVariant.BUN_NPM, ScriptSource.BUILTIN, self._SUPPORTED_VERSION, code)
|
||
+ return None
|
||
+
|
||
+ def _run_js_runtime(self, stdin: str, /) -> str:
|
||
+ cmd = [self.runtime_info.path, *self._ARGS]
|
||
+ self.logger.debug(f'Running bun: {shlex.join(cmd)}')
|
||
+ with Popen(
|
||
+ cmd,
|
||
+ text=True,
|
||
+ stdin=subprocess.PIPE,
|
||
+ stdout=subprocess.PIPE,
|
||
+ stderr=subprocess.PIPE,
|
||
+ ) as proc:
|
||
+ stdout, stderr = proc.communicate_or_kill(stdin)
|
||
+ if proc.returncode or stderr:
|
||
+ msg = 'Error running bun process'
|
||
+ if stderr:
|
||
+ msg = f'{msg}: {stderr}'
|
||
+ raise JsChallengeProviderError(msg)
|
||
+
|
||
+ return stdout
|
||
+
|
||
+
|
||
+@register_preference(BunJCP)
|
||
+def preference(provider: JsChallengeProvider, requests: list[JsChallengeRequest]) -> int:
|
||
+ return 800
|
||
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/bundle/core.js
|
||
===================================================================
|
||
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
||
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/bundle/core.js 2025-09-25 21:17:17.684050702 +0200
|
||
@@ -0,0 +1,504 @@
|
||
+--- yt_dlp/extractor/youtube/jsc/_builtin/bundle/core.js 2025-09-25 20:23:11.518734893 +0200
|
||
++++ /dev/null
|
||
+@@ -1,501 +0,0 @@
|
||
+-// This file is auto-generated from https://github.com/Grub4K/yt-dlp-jsc-deno
|
||
+-// Do not edit, changes will be overwritten.
|
||
+-// TODO: make this automatically updated
|
||
+-var jsc = (function (meriyah, astring) {
|
||
+- 'use strict';
|
||
+-
|
||
+- function matchesStructure(
|
||
+- obj,
|
||
+- structure,
|
||
+- ) {
|
||
+- if (Array.isArray(structure)) {
|
||
+- if (!Array.isArray(obj)) {
|
||
+- return false;
|
||
+- }
|
||
+- return (
|
||
+- structure.length === obj.length &&
|
||
+- structure.every((value, index) => matchesStructure(obj[index], value))
|
||
+- );
|
||
+- }
|
||
+- if (typeof structure === "object") {
|
||
+- if (!obj) {
|
||
+- return !structure;
|
||
+- }
|
||
+- if ("or" in structure) {
|
||
+- // Handle `{ or: [a, b] }`
|
||
+- return structure.or.some((node) => matchesStructure(obj, node));
|
||
+- }
|
||
+- for (const [key, value] of Object.entries(structure)) {
|
||
+- if (!matchesStructure(obj[key ], value)) {
|
||
+- return false;
|
||
+- }
|
||
+- }
|
||
+- return true;
|
||
+- }
|
||
+- return structure === obj;
|
||
+- }
|
||
+-
|
||
+- function isOneOf(value, ...of) {
|
||
+- return of.includes(value );
|
||
+- }
|
||
+-
|
||
+- function _optionalChain$2(ops) { let lastAccessLHS = undefined; let value = ops[0]; let i = 1; while (i < ops.length) { const op = ops[i]; const fn = ops[i + 1]; i += 2; if ((op === 'optionalAccess' || op === 'optionalCall') && value == null) { return undefined; } if (op === 'access' || op === 'optionalAccess') { lastAccessLHS = value; value = fn(value); } else if (op === 'call' || op === 'optionalCall') { value = fn((...args) => value.call(lastAccessLHS, ...args)); lastAccessLHS = undefined; } } return value; }
|
||
+-
|
||
+-
|
||
+- const logicalExpression = {
|
||
+- type: "ExpressionStatement",
|
||
+- expression: {
|
||
+- type: "LogicalExpression",
|
||
+- left: {
|
||
+- type: "Identifier",
|
||
+- },
|
||
+- right: {
|
||
+- type: "SequenceExpression",
|
||
+- expressions: [
|
||
+- {
|
||
+- type: "AssignmentExpression",
|
||
+- left: {
|
||
+- type: "Identifier",
|
||
+- },
|
||
+- operator: "=",
|
||
+- right: {
|
||
+- type: "CallExpression",
|
||
+- callee: {
|
||
+- type: "Identifier",
|
||
+- },
|
||
+- arguments: {
|
||
+- or: [
|
||
+- [
|
||
+- { type: "Literal" },
|
||
+- {
|
||
+- type: "CallExpression",
|
||
+- callee: {
|
||
+- type: "Identifier",
|
||
+- name: "decodeURIComponent",
|
||
+- },
|
||
+- arguments: [{ type: "Identifier" }],
|
||
+- optional: false,
|
||
+- },
|
||
+- ],
|
||
+- [
|
||
+- {
|
||
+- type: "CallExpression",
|
||
+- callee: {
|
||
+- type: "Identifier",
|
||
+- name: "decodeURIComponent",
|
||
+- },
|
||
+- arguments: [{ type: "Identifier" }],
|
||
+- optional: false,
|
||
+- },
|
||
+- ],
|
||
+- ],
|
||
+- },
|
||
+- optional: false,
|
||
+- },
|
||
+- },
|
||
+- {
|
||
+- type: "CallExpression",
|
||
+- },
|
||
+- ],
|
||
+- },
|
||
+- operator: "&&",
|
||
+- },
|
||
+- };
|
||
+-
|
||
+- const identifier$1 = {
|
||
+- or: [{
|
||
+- type: "ExpressionStatement",
|
||
+- expression: {
|
||
+- type: "AssignmentExpression",
|
||
+- operator: "=",
|
||
+- left: {
|
||
+- type: "Identifier",
|
||
+- },
|
||
+- right: {
|
||
+- type: "FunctionExpression",
|
||
+- params: [{}, {}, {}],
|
||
+- },
|
||
+- },
|
||
+- }, {
|
||
+- type: "FunctionDeclaration",
|
||
+- params: [{}, {}, {}],
|
||
+- }],
|
||
+- } ;
|
||
+-
|
||
+- function extract$1(
|
||
+- node,
|
||
+- ) {
|
||
+- if (
|
||
+- !matchesStructure(node, identifier$1 )
|
||
+- ) {
|
||
+- return null;
|
||
+- }
|
||
+- const block = (node.type === "ExpressionStatement" &&
|
||
+- node.expression.type === "AssignmentExpression" &&
|
||
+- node.expression.right.type === "FunctionExpression")
|
||
+- ? node.expression.right.body
|
||
+- : node.type === "FunctionDeclaration"
|
||
+- ? node.body
|
||
+- : null;
|
||
+- const relevantExpression = _optionalChain$2([block, 'optionalAccess', _ => _.body, 'access', _2 => _2.at, 'call', _3 => _3(-2)]);
|
||
+- if (!matchesStructure(relevantExpression, logicalExpression)) {
|
||
+- return null;
|
||
+- }
|
||
+- if (
|
||
+- _optionalChain$2([relevantExpression, 'optionalAccess', _4 => _4.type]) !== "ExpressionStatement" ||
|
||
+- relevantExpression.expression.type !==
|
||
+- "LogicalExpression" ||
|
||
+- relevantExpression.expression.right.type !==
|
||
+- "SequenceExpression" ||
|
||
+- relevantExpression.expression.right.expressions[0].type !==
|
||
+- "AssignmentExpression"
|
||
+- ) {
|
||
+- return null;
|
||
+- }
|
||
+- const call = relevantExpression.expression.right.expressions[0].right;
|
||
+- if (call.type !== "CallExpression" || call.callee.type !== "Identifier") {
|
||
+- return null;
|
||
+- }
|
||
+- // TODO: verify identifiers here
|
||
+- return {
|
||
+- type: "ArrowFunctionExpression",
|
||
+- params: [
|
||
+- {
|
||
+- type: "Identifier",
|
||
+- name: "sig",
|
||
+- },
|
||
+- ],
|
||
+- body: {
|
||
+- type: "CallExpression",
|
||
+- callee: {
|
||
+- type: "Identifier",
|
||
+- name: call.callee.name,
|
||
+- },
|
||
+- arguments: call.arguments.length === 1
|
||
+- ? [
|
||
+- {
|
||
+- type: "Identifier",
|
||
+- name: "sig",
|
||
+- },
|
||
+- ]
|
||
+- : [
|
||
+- call.arguments[0],
|
||
+- {
|
||
+- type: "Identifier",
|
||
+- name: "sig",
|
||
+- },
|
||
+- ],
|
||
+- optional: false,
|
||
+- },
|
||
+- async: false,
|
||
+- expression: false,
|
||
+- generator: false,
|
||
+- };
|
||
+- }
|
||
+-
|
||
+- function _optionalChain$1(ops) { let lastAccessLHS = undefined; let value = ops[0]; let i = 1; while (i < ops.length) { const op = ops[i]; const fn = ops[i + 1]; i += 2; if ((op === 'optionalAccess' || op === 'optionalCall') && value == null) { return undefined; } if (op === 'access' || op === 'optionalAccess') { lastAccessLHS = value; value = fn(value); } else if (op === 'call' || op === 'optionalCall') { value = fn((...args) => value.call(lastAccessLHS, ...args)); lastAccessLHS = undefined; } } return value; }
|
||
+-
|
||
+-
|
||
+- const identifier = {
|
||
+- type: "VariableDeclaration",
|
||
+- kind: "var",
|
||
+- declarations: [
|
||
+- {
|
||
+- type: "VariableDeclarator",
|
||
+- id: {
|
||
+- type: "Identifier",
|
||
+- },
|
||
+- init: {
|
||
+- type: "ArrayExpression",
|
||
+- elements: [
|
||
+- {
|
||
+- type: "Identifier",
|
||
+- },
|
||
+- ],
|
||
+- },
|
||
+- },
|
||
+- ],
|
||
+- };
|
||
+-
|
||
+- const catchBlockBody = [
|
||
+- {
|
||
+- type: "ReturnStatement",
|
||
+- argument: {
|
||
+- type: "BinaryExpression",
|
||
+- left: {
|
||
+- type: "MemberExpression",
|
||
+- object: {
|
||
+- type: "Identifier",
|
||
+- },
|
||
+- computed: true,
|
||
+- property: {
|
||
+- type: "Literal",
|
||
+- },
|
||
+- optional: false,
|
||
+- },
|
||
+- right: {
|
||
+- type: "Identifier",
|
||
+- },
|
||
+- operator: "+",
|
||
+- },
|
||
+- },
|
||
+- ] ;
|
||
+-
|
||
+- function extract(
|
||
+- node,
|
||
+- ) {
|
||
+- if (!matchesStructure(node, identifier)) {
|
||
+- // Fallback search for try { } catch { return X[12] + Y }
|
||
+- let name = null;
|
||
+- let block = null;
|
||
+- switch (node.type) {
|
||
+- case "ExpressionStatement": {
|
||
+- if (
|
||
+- node.expression.type === "AssignmentExpression" &&
|
||
+- node.expression.left.type === "Identifier" &&
|
||
+- node.expression.right.type === "FunctionExpression" &&
|
||
+- node.expression.right.params.length === 1
|
||
+- ) {
|
||
+- name = node.expression.left.name;
|
||
+- block = node.expression.right.body;
|
||
+- }
|
||
+- break;
|
||
+- }
|
||
+- case "FunctionDeclaration": {
|
||
+- if (node.params.length === 1) {
|
||
+- name = _optionalChain$1([node, 'access', _ => _.id, 'optionalAccess', _2 => _2.name]);
|
||
+- block = node.body;
|
||
+- }
|
||
+- break;
|
||
+- }
|
||
+- }
|
||
+- if (!block || !name) {
|
||
+- return null;
|
||
+- }
|
||
+- const tryNode = block.body.at(-2);
|
||
+- if (
|
||
+- _optionalChain$1([tryNode, 'optionalAccess', _3 => _3.type]) !== "TryStatement" ||
|
||
+- _optionalChain$1([tryNode, 'access', _4 => _4.handler, 'optionalAccess', _5 => _5.type]) !== "CatchClause"
|
||
+- ) {
|
||
+- return null;
|
||
+- }
|
||
+- const catchBody = tryNode.handler.body.body;
|
||
+- if (matchesStructure(catchBody, catchBlockBody)) {
|
||
+- return makeSolverFuncFromName(name);
|
||
+- }
|
||
+- return null;
|
||
+- }
|
||
+-
|
||
+- if (node.type !== "VariableDeclaration") {
|
||
+- return null;
|
||
+- }
|
||
+- const declaration = node.declarations[0];
|
||
+- if (
|
||
+- declaration.type !== "VariableDeclarator" || !declaration.init ||
|
||
+- declaration.init.type !== "ArrayExpression" ||
|
||
+- declaration.init.elements.length !== 1
|
||
+- ) {
|
||
+- return null;
|
||
+- }
|
||
+- const [firstElement] = declaration.init.elements;
|
||
+- if (!firstElement || firstElement.type !== "Identifier") {
|
||
+- return null;
|
||
+- }
|
||
+- return makeSolverFuncFromName(firstElement.name);
|
||
+- }
|
||
+-
|
||
+- function makeSolverFuncFromName(name) {
|
||
+- return {
|
||
+- type: "ArrowFunctionExpression",
|
||
+- params: [
|
||
+- {
|
||
+- type: "Identifier",
|
||
+- name: "nsig",
|
||
+- },
|
||
+- ],
|
||
+- body: {
|
||
+- type: "CallExpression",
|
||
+- callee: {
|
||
+- type: "Identifier",
|
||
+- name: name,
|
||
+- },
|
||
+- arguments: [
|
||
+- {
|
||
+- type: "Identifier",
|
||
+- name: "nsig",
|
||
+- },
|
||
+- ],
|
||
+- optional: false,
|
||
+- },
|
||
+- async: false,
|
||
+- expression: false,
|
||
+- generator: false,
|
||
+- };
|
||
+- }
|
||
+-
|
||
+- const setupNodes = meriyah.parse(`
|
||
+-globalThis.XMLHttpRequest = { prototype: {} };
|
||
+-const window = Object.assign(Object.create(null), globalThis);
|
||
+-window.location = new URL("https://www.youtube.com/watch?v=yt-dlp-wins");
|
||
+-const document = {};
|
||
+-let self = globalThis;
|
||
+-`).body;
|
||
+-
|
||
+- function _optionalChain(ops) { let lastAccessLHS = undefined; let value = ops[0]; let i = 1; while (i < ops.length) { const op = ops[i]; const fn = ops[i + 1]; i += 2; if ((op === 'optionalAccess' || op === 'optionalCall') && value == null) { return undefined; } if (op === 'access' || op === 'optionalAccess') { lastAccessLHS = value; value = fn(value); } else if (op === 'call' || op === 'optionalCall') { value = fn((...args) => value.call(lastAccessLHS, ...args)); lastAccessLHS = undefined; } } return value; }
|
||
+- function preprocessPlayer(data) {
|
||
+- const ast = meriyah.parse(data);
|
||
+- const body = ast.body;
|
||
+-
|
||
+- const block = (() => {
|
||
+- switch (body.length) {
|
||
+- case 1: {
|
||
+- const func = body[0];
|
||
+- if (
|
||
+- _optionalChain([func, 'optionalAccess', _ => _.type]) === "ExpressionStatement" &&
|
||
+- func.expression.type === "CallExpression" &&
|
||
+- func.expression.callee.type === "MemberExpression" &&
|
||
+- func.expression.callee.object.type === "FunctionExpression"
|
||
+- ) {
|
||
+- return func.expression.callee.object.body;
|
||
+- }
|
||
+- break;
|
||
+- }
|
||
+- case 2: {
|
||
+- const func = body[1];
|
||
+- if (
|
||
+- _optionalChain([func, 'optionalAccess', _2 => _2.type]) === "ExpressionStatement" &&
|
||
+- func.expression.type === "CallExpression" &&
|
||
+- func.expression.callee.type === "FunctionExpression"
|
||
+- ) {
|
||
+- const block = func.expression.callee.body;
|
||
+- // Skip `var window = this;`
|
||
+- block.body.splice(0, 1);
|
||
+- return block;
|
||
+- }
|
||
+- break;
|
||
+- }
|
||
+- }
|
||
+- throw "unexpected structure";
|
||
+- })();
|
||
+-
|
||
+- const found = {
|
||
+- nsig: [] ,
|
||
+- sig: [] ,
|
||
+- };
|
||
+- const plainExpressions = block.body.filter((node) => {
|
||
+- const nsig = extract(node);
|
||
+- if (nsig) {
|
||
+- found.nsig.push(nsig);
|
||
+- }
|
||
+- const sig = extract$1(node);
|
||
+- if (sig) {
|
||
+- found.sig.push(sig);
|
||
+- }
|
||
+- if (node.type === "ExpressionStatement") {
|
||
+- if (node.expression.type === "AssignmentExpression") {
|
||
+- return true;
|
||
+- }
|
||
+- return node.expression.type === "Literal";
|
||
+- }
|
||
+- return true;
|
||
+- });
|
||
+- block.body = plainExpressions;
|
||
+-
|
||
+- for (const [name, options] of Object.entries(found)) {
|
||
+- // TODO: this is cringe fix plz
|
||
+- const unique = new Set(options.map((x) => JSON.stringify(x)));
|
||
+- if (unique.size !== 1) {
|
||
+- const message = `found ${unique.size} ${name} function possibilities`;
|
||
+- throw (
|
||
+- message +
|
||
+- (unique.size ? `: ${options.map((x) => astring.generate(x)).join(", ")}` : "")
|
||
+- );
|
||
+- }
|
||
+- plainExpressions.push({
|
||
+- type: "ExpressionStatement",
|
||
+- expression: {
|
||
+- type: "AssignmentExpression",
|
||
+- operator: "=",
|
||
+- left: {
|
||
+- type: "MemberExpression",
|
||
+- computed: false,
|
||
+- object: {
|
||
+- type: "Identifier",
|
||
+- name: "_result",
|
||
+- },
|
||
+- property: {
|
||
+- type: "Identifier",
|
||
+- name: name,
|
||
+- },
|
||
+- },
|
||
+- right: options[0],
|
||
+- },
|
||
+- });
|
||
+- }
|
||
+-
|
||
+- ast.body.splice(0, 0, ...setupNodes);
|
||
+-
|
||
+- return astring.generate(ast);
|
||
+- }
|
||
+-
|
||
+- function getFromPrepared(code)
|
||
+-
|
||
+-
|
||
+- {
|
||
+- const resultObj = { nsig: null, sig: null };
|
||
+- Function("_result", code)(resultObj);
|
||
+- return resultObj;
|
||
+- }
|
||
+-
|
||
+- function main(input) {
|
||
+- const preprocessedPlayer = input.type === "player"
|
||
+- ? preprocessPlayer(input.player)
|
||
+- : input.preprocessed_player;
|
||
+- const solvers = getFromPrepared(preprocessedPlayer);
|
||
+-
|
||
+- const responses = input.requests.map(
|
||
+- (input) => {
|
||
+- if (!isOneOf(input.type, "nsig", "sig")) {
|
||
+- return {
|
||
+- type: "error",
|
||
+- error: `Unknown request type: ${input.type}`,
|
||
+- };
|
||
+- }
|
||
+- const solver = solvers[input.type];
|
||
+- if (!solver) {
|
||
+- return {
|
||
+- type: "error",
|
||
+- error: `Failed to extract ${input.type} function`,
|
||
+- };
|
||
+- }
|
||
+- try {
|
||
+- return {
|
||
+- type: "result",
|
||
+- data: Object.fromEntries(
|
||
+- input.challenges.map((challenge) => [challenge, solver(challenge)]),
|
||
+- ),
|
||
+- };
|
||
+- } catch (error) {
|
||
+- return {
|
||
+- type: "error",
|
||
+- error: error instanceof Error
|
||
+- ? `${error.message}\n${error.stack}`
|
||
+- : `${error}`,
|
||
+- };
|
||
+- }
|
||
+- },
|
||
+- );
|
||
+-
|
||
+- const output = {
|
||
+- type: "result",
|
||
+- responses,
|
||
+- };
|
||
+- if (input.type === "player" && input.output_preprocessed) {
|
||
+- output.preprocessed_player = preprocessedPlayer;
|
||
+- }
|
||
+- return output;
|
||
+- }
|
||
+-
|
||
+- return main;
|
||
+-
|
||
+-})(meriyah, astring);
|
||
\ No newline at end of file
|
||
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/deno.py
|
||
===================================================================
|
||
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
||
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/deno.py 2025-09-25 21:17:17.684342223 +0200
|
||
@@ -0,0 +1,82 @@
|
||
+from __future__ import annotations
|
||
+
|
||
+import contextlib
|
||
+import shlex
|
||
+import subprocess
|
||
+
|
||
+from yt_dlp.extractor.youtube.jsc._builtin.runtime import (
|
||
+ JsRuntimeChalBaseJCP,
|
||
+ Script,
|
||
+ ScriptSource,
|
||
+ ScriptType,
|
||
+ ScriptVariant,
|
||
+)
|
||
+from yt_dlp.extractor.youtube.jsc._builtin.scripts import load_script
|
||
+from yt_dlp.extractor.youtube.jsc.provider import (
|
||
+ JsChallengeProvider,
|
||
+ JsChallengeProviderError,
|
||
+ JsChallengeRequest,
|
||
+ register_preference,
|
||
+ register_provider,
|
||
+)
|
||
+from yt_dlp.extractor.youtube.pot._provider import BuiltinIEContentProvider
|
||
+from yt_dlp.extractor.youtube.pot.provider import provider_bug_report_message
|
||
+from yt_dlp.utils import Popen
|
||
+
|
||
+
|
||
+@register_provider
|
||
+class DenoJCP(JsRuntimeChalBaseJCP, BuiltinIEContentProvider):
|
||
+ PROVIDER_NAME = 'deno'
|
||
+ JS_RUNTIME_NAME = 'deno'
|
||
+
|
||
+ _DENO_OPTIONS = ['--no-prompt', '--no-remote']
|
||
+ DENO_NPM_LIB_FILENAME = 'deno.lib.js'
|
||
+
|
||
+ def _iter_script_sources(self):
|
||
+ for source, func in super()._iter_script_sources():
|
||
+ if source == ScriptSource.WEB:
|
||
+ yield ScriptSource.BUILTIN, self._deno_npm_source
|
||
+ yield source, func
|
||
+
|
||
+ def _deno_npm_source(self, script_type: ScriptType, /) -> Script | None:
|
||
+ if script_type != ScriptType.LIB:
|
||
+ return None
|
||
+ if 'npm' not in self.ie.get_param('download_ext_components', []):
|
||
+ self._report_ext_component_skipped('npm', 'NPM package')
|
||
+ return None
|
||
+ # Deno-specific lib scripts that uses Deno NPM imports
|
||
+ error_hook = lambda e: self.logger.warning(
|
||
+ f'Failed to read deno challenge solver lib script: {e}{provider_bug_report_message(self)}')
|
||
+ code = load_script(
|
||
+ self.DENO_NPM_LIB_FILENAME, error_hook=error_hook)
|
||
+ if code:
|
||
+ # TODO: any other permissions we want when not using --no-remote?
|
||
+ with contextlib.suppress(ValueError):
|
||
+ self._DENO_OPTIONS.remove('--no-remote')
|
||
+ return Script(script_type, ScriptVariant.DENO_NPM, ScriptSource.BUILTIN, self._SUPPORTED_VERSION, code)
|
||
+ return None
|
||
+
|
||
+ def _run_js_runtime(self, stdin: str, /) -> str:
|
||
+ cmd = [self.runtime_info.path, 'run', *self._DENO_OPTIONS, '-']
|
||
+ self.logger.debug(f'Running deno: {shlex.join(cmd)}')
|
||
+ with Popen(
|
||
+ cmd,
|
||
+ text=True,
|
||
+ stdin=subprocess.PIPE,
|
||
+ stdout=subprocess.PIPE,
|
||
+ stderr=subprocess.PIPE,
|
||
+ ) as proc:
|
||
+ stdout, stderr = proc.communicate_or_kill(stdin)
|
||
+ # TODO: fails when deno needs to download dependencies?
|
||
+ if proc.returncode or stderr:
|
||
+ msg = 'Error running deno process'
|
||
+ if stderr:
|
||
+ msg = f'{msg}: {stderr}'
|
||
+ raise JsChallengeProviderError(msg)
|
||
+
|
||
+ return stdout
|
||
+
|
||
+
|
||
+@register_preference(DenoJCP)
|
||
+def preference(provider: JsChallengeProvider, requests: list[JsChallengeRequest]) -> int:
|
||
+ return 1000
|
||
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/jsinterp.py
|
||
===================================================================
|
||
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
||
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/jsinterp.py 2025-09-25 21:17:17.684435951 +0200
|
||
@@ -0,0 +1,288 @@
|
||
+import json
|
||
+import re
|
||
+import traceback
|
||
+from collections.abc import Generator
|
||
+
|
||
+from yt_dlp import join_nonempty, traverse_obj
|
||
+from yt_dlp.extractor.youtube.jsc.provider import (
|
||
+ JsChallengeProvider,
|
||
+ JsChallengeProviderError,
|
||
+ JsChallengeProviderResponse,
|
||
+ JsChallengeRequest,
|
||
+ JsChallengeResponse,
|
||
+ JsChallengeType,
|
||
+ NChallengeInput,
|
||
+ NChallengeOutput,
|
||
+ SigChallengeInput,
|
||
+ SigChallengeOutput,
|
||
+ register_provider,
|
||
+)
|
||
+from yt_dlp.extractor.youtube.pot._provider import BuiltinIEContentProvider
|
||
+from yt_dlp.jsinterp import JSInterpreter, LocalNameSpace
|
||
+from yt_dlp.utils import ExtractorError, filter_dict, js_to_json
|
||
+
|
||
+
|
||
+@register_provider
|
||
+class JsInterpJCP(JsChallengeProvider, BuiltinIEContentProvider):
|
||
+ PROVIDER_NAME = 'jsinterp'
|
||
+ _SUPPORTED_TYPES = [JsChallengeType.SIG, JsChallengeType.N]
|
||
+
|
||
+ _NSIG_FUNC_CACHE_ID = 'nsig func'
|
||
+ _DUMMY_STRING = 'dlp_wins'
|
||
+
|
||
+ def is_available(self) -> bool:
|
||
+ return True
|
||
+
|
||
+ def _real_bulk_solve(self, requests: list[JsChallengeRequest]) -> Generator[JsChallengeProviderResponse, None, None]:
|
||
+ for request in requests:
|
||
+ try:
|
||
+ if request.type == JsChallengeType.SIG:
|
||
+ output = self._solve_sig_challenges(request.video_id, request.input)
|
||
+ else:
|
||
+ output = self._solve_nsig_challenges(request.video_id, request.input)
|
||
+ yield JsChallengeProviderResponse(
|
||
+ request=request, response=JsChallengeResponse(type=request.type, output=output))
|
||
+ except Exception as e:
|
||
+ yield JsChallengeProviderResponse(request=request, error=e)
|
||
+
|
||
+ # region sig
|
||
+ def _solve_sig_challenges(self, video_id, sig_input: SigChallengeInput) -> SigChallengeOutput:
|
||
+ """Turn the s field into a working signature spec"""
|
||
+ results = {}
|
||
+ self.logger.trace(f'Solving {len(sig_input.challenges)} sig challenges using player {sig_input.player_url}')
|
||
+ for challenge in sig_input.challenges:
|
||
+ results[challenge] = self._solve_sig_challenge(challenge, video_id, sig_input.player_url)
|
||
+ return SigChallengeOutput(results=results)
|
||
+
|
||
+ def _solve_sig_challenge(self, challenge, video_id, player_url) -> str:
|
||
+ code = self._get_player(video_id, player_url)
|
||
+ return self._parse_sig_js(code, player_url)(challenge)
|
||
+
|
||
+ def _parse_sig_js(self, jscode, player_url):
|
||
+ # Examples where `sig` is funcname:
|
||
+ # sig=function(a){a=a.split(""); ... ;return a.join("")};
|
||
+ # ;c&&(c=sig(decodeURIComponent(c)),a.set(b,encodeURIComponent(c)));return a};
|
||
+ # {var l=f,m=h.sp,n=sig(decodeURIComponent(h.s));l.set(m,encodeURIComponent(n))}
|
||
+ # sig=function(J){J=J.split(""); ... ;return J.join("")};
|
||
+ # ;N&&(N=sig(decodeURIComponent(N)),J.set(R,encodeURIComponent(N)));return J};
|
||
+ # {var H=u,k=f.sp,v=sig(decodeURIComponent(f.s));H.set(k,encodeURIComponent(v))}
|
||
+ funcname = self.ie._search_regex(
|
||
+ (r'\b(?P<var>[a-zA-Z0-9_$]+)&&\((?P=var)=(?P<sig>[a-zA-Z0-9_$]{2,})\(decodeURIComponent\((?P=var)\)\)',
|
||
+ r'(?P<sig>[a-zA-Z0-9_$]+)\s*=\s*function\(\s*(?P<arg>[a-zA-Z0-9_$]+)\s*\)\s*{\s*(?P=arg)\s*=\s*(?P=arg)\.split\(\s*""\s*\)\s*;\s*[^}]+;\s*return\s+(?P=arg)\.join\(\s*""\s*\)',
|
||
+ r'(?:\b|[^a-zA-Z0-9_$])(?P<sig>[a-zA-Z0-9_$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9_$]{2}\.[a-zA-Z0-9_$]{2}\(a,\d+\))?',
|
||
+ # Old patterns
|
||
+ r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||
+ r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||
+ r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
|
||
+ # Obsolete patterns
|
||
+ r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||
+ r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
|
||
+ r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||
+ r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||
+ r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
|
||
+ jscode, 'Initial JS player signature function name', group='sig')
|
||
+
|
||
+ varname, global_list = self._interpret_player_js_global_var(jscode, player_url)
|
||
+ jsi = JSInterpreter(jscode)
|
||
+ initial_function = jsi.extract_function(funcname, filter_dict({varname: global_list}))
|
||
+ return lambda s: initial_function([s])
|
||
+ # endregion sig
|
||
+
|
||
+ # region nsig
|
||
+ def _solve_nsig_challenges(self, video_id, nsig_input: NChallengeInput) -> NChallengeOutput:
|
||
+ """Turn the n field into a working signature"""
|
||
+ results = {}
|
||
+ self.logger.trace(f'Solving {len(nsig_input.challenges)} nsig challenges using player {nsig_input.player_url}')
|
||
+ for challenge in nsig_input.challenges:
|
||
+ results[challenge] = self._solve_nsig_challenge(challenge, video_id, nsig_input.player_url)
|
||
+ return NChallengeOutput(results=results)
|
||
+
|
||
+ def _solve_nsig_challenge(self, challenge, video_id, player_url) -> str:
|
||
+ """Turn the n field into a working signature"""
|
||
+ try:
|
||
+ jsi, _, func_code = self._extract_n_function_code(video_id, player_url)
|
||
+ except ExtractorError as e:
|
||
+ raise JsChallengeProviderError(f'Unable to extract nsig function code: {e}') from e
|
||
+
|
||
+ try:
|
||
+ extract_nsig = self.ie._cached(self._extract_n_function_from_code, self._NSIG_FUNC_CACHE_ID, player_url)
|
||
+ ret = extract_nsig(jsi, func_code)(challenge)
|
||
+ except JSInterpreter.Exception as e:
|
||
+ self.logger.debug(str(e), once=True)
|
||
+ raise JsChallengeProviderError(
|
||
+ 'Native nsig extraction failed', expected=False) from e
|
||
+
|
||
+ self.logger.debug(f'Transformed nsig {challenge} => {ret}')
|
||
+ # Only cache nsig func JS code to disk if successful, and only once
|
||
+ self.ie._store_player_data_to_cache('nsig', player_url, func_code)
|
||
+ return ret
|
||
+
|
||
+ def _extract_n_function_name(self, jscode, player_url=None):
|
||
+ varname, global_list = self._interpret_player_js_global_var(jscode, player_url)
|
||
+ if debug_str := traverse_obj(global_list, (lambda _, v: v.endswith('-_w8_'), any)):
|
||
+ pattern = r'''(?x)
|
||
+ \{\s*return\s+%s\[%d\]\s*\+\s*(?P<argname>[a-zA-Z0-9_$]+)\s*\}
|
||
+ ''' % (re.escape(varname), global_list.index(debug_str))
|
||
+ if match := re.search(pattern, jscode):
|
||
+ pattern = r'''(?x)
|
||
+ \{\s*\)%s\(\s*
|
||
+ (?:
|
||
+ (?P<funcname_a>[a-zA-Z0-9_$]+)\s*noitcnuf\s*
|
||
+ |noitcnuf\s*=\s*(?P<funcname_b>[a-zA-Z0-9_$]+)(?:\s+rav)?
|
||
+ )[;\n]
|
||
+ ''' % re.escape(match.group('argname')[::-1])
|
||
+ if match := re.search(pattern, jscode[match.start()::-1]):
|
||
+ a, b = match.group('funcname_a', 'funcname_b')
|
||
+ return (a or b)[::-1]
|
||
+ self.logger.debug(join_nonempty(
|
||
+ 'Initial search was unable to find nsig function name',
|
||
+ player_url and f' player = {player_url}', delim='\n'), once=True)
|
||
+
|
||
+ # Examples (with placeholders nfunc, narray, idx):
|
||
+ # * .get("n"))&&(b=nfunc(b)
|
||
+ # * .get("n"))&&(b=narray[idx](b)
|
||
+ # * b=String.fromCharCode(110),c=a.get(b))&&c=narray[idx](c)
|
||
+ # * a.D&&(b="nn"[+a.D],c=a.get(b))&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
|
||
+ # * a.D&&(PL(a),b=a.j.n||null)&&(b=narray[0](b),a.set("n",b),narray.length||nfunc("")
|
||
+ # * a.D&&(b="nn"[+a.D],vL(a),c=a.j[b]||null)&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
|
||
+ # * J.J="";J.url="";J.Z&&(R="nn"[+J.Z],mW(J),N=J.K[R]||null)&&(N=narray[idx](N),J.set(R,N))}};
|
||
+ funcname, idx = self.ie._search_regex(
|
||
+ r'''(?x)
|
||
+ (?:
|
||
+ \.get\("n"\)\)&&\(b=|
|
||
+ (?:
|
||
+ b=String\.fromCharCode\(110\)|
|
||
+ (?P<str_idx>[a-zA-Z0-9_$.]+)&&\(b="nn"\[\+(?P=str_idx)\]
|
||
+ )
|
||
+ (?:
|
||
+ ,[a-zA-Z0-9_$]+\(a\))?,c=a\.
|
||
+ (?:
|
||
+ get\(b\)|
|
||
+ [a-zA-Z0-9_$]+\[b\]\|\|null
|
||
+ )\)&&\(c=|
|
||
+ \b(?P<var>[a-zA-Z0-9_$]+)=
|
||
+ )(?P<nfunc>[a-zA-Z0-9_$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z]\)
|
||
+ (?(var),[a-zA-Z0-9_$]+\.set\((?:"n+"|[a-zA-Z0-9_$]+)\,(?P=var)\))''',
|
||
+ jscode, 'n function name', group=('nfunc', 'idx'), default=(None, None))
|
||
+ if not funcname:
|
||
+ self.logger.warning(join_nonempty(
|
||
+ 'Falling back to generic n function search',
|
||
+ player_url and f' player = {player_url}', delim='\n'), once=True)
|
||
+ return self.ie._search_regex(
|
||
+ r'''(?xs)
|
||
+ ;\s*(?P<name>[a-zA-Z0-9_$]+)\s*=\s*function\([a-zA-Z0-9_$]+\)
|
||
+ \s*\{(?:(?!};).)+?return\s*(?P<q>["'])[\w-]+_w8_(?P=q)\s*\+\s*[a-zA-Z0-9_$]+''',
|
||
+ jscode, 'Initial JS player n function name', group='name')
|
||
+ elif not idx:
|
||
+ return funcname
|
||
+
|
||
+ return json.loads(js_to_json(self.ie._search_regex(
|
||
+ rf'var {re.escape(funcname)}\s*=\s*(\[.+?\])\s*[,;]', jscode,
|
||
+ f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
|
||
+
|
||
+ def _fixup_n_function_code(self, argnames, nsig_code, jscode, player_url):
|
||
+ # Fixup global array
|
||
+ varname, global_list = self._interpret_player_js_global_var(jscode, player_url)
|
||
+ if varname and global_list:
|
||
+ nsig_code = f'var {varname}={json.dumps(global_list)}; {nsig_code}'
|
||
+ else:
|
||
+ varname = self._DUMMY_STRING
|
||
+ global_list = []
|
||
+
|
||
+ # Fixup typeof check
|
||
+ undefined_idx = global_list.index('undefined') if 'undefined' in global_list else r'\d+'
|
||
+ fixed_code = re.sub(
|
||
+ fr'''(?x)
|
||
+ ;\s*if\s*\(\s*typeof\s+[a-zA-Z0-9_$]+\s*===?\s*(?:
|
||
+ (["\'])undefined\1|
|
||
+ {re.escape(varname)}\[{undefined_idx}\]
|
||
+ )\s*\)\s*return\s+{re.escape(argnames[0])};
|
||
+ ''', ';', nsig_code)
|
||
+ if fixed_code == nsig_code:
|
||
+ self.logger.debug(join_nonempty(
|
||
+ 'No typeof statement found in nsig function code',
|
||
+ player_url and f' player = {player_url}', delim='\n'), once=True)
|
||
+
|
||
+ # Fixup global funcs
|
||
+ jsi = JSInterpreter(fixed_code)
|
||
+ cache_id = (self._NSIG_FUNC_CACHE_ID, player_url)
|
||
+ try:
|
||
+ self.ie._cached(
|
||
+ self._extract_n_function_from_code, *cache_id)(jsi, (argnames, fixed_code))(self._DUMMY_STRING)
|
||
+ except JSInterpreter.Exception:
|
||
+ self.ie._player_cache.pop(cache_id, None)
|
||
+
|
||
+ global_funcnames = jsi._undefined_varnames
|
||
+ debug_names = []
|
||
+ jsi = JSInterpreter(jscode)
|
||
+ for func_name in global_funcnames:
|
||
+ try:
|
||
+ func_args, func_code = jsi.extract_function_code(func_name)
|
||
+ fixed_code = f'var {func_name} = function({", ".join(func_args)}) {{ {func_code} }}; {fixed_code}'
|
||
+ debug_names.append(func_name)
|
||
+ except Exception:
|
||
+ self.logger.warning(join_nonempty(
|
||
+ f'Unable to extract global nsig function {func_name} from player JS',
|
||
+ player_url and f' player = {player_url}', delim='\n'), once=True)
|
||
+
|
||
+ if debug_names:
|
||
+ self.logger.debug(f'Extracted global nsig functions: {", ".join(debug_names)}')
|
||
+
|
||
+ return argnames, fixed_code
|
||
+
|
||
+ def _extract_n_function_code(self, video_id, player_url):
|
||
+ player_id = self.ie._extract_player_info(player_url)
|
||
+ func_code = self.ie._load_player_data_from_cache('nsig', player_url)
|
||
+ jscode = func_code or self.ie._load_player(video_id, player_url)
|
||
+ jsi = JSInterpreter(jscode)
|
||
+
|
||
+ if func_code:
|
||
+ return jsi, player_id, func_code
|
||
+
|
||
+ func_name = self._extract_n_function_name(jscode, player_url=player_url)
|
||
+
|
||
+ # XXX: Work around (a) global array variable, (b) `typeof` short-circuit, (c) global functions
|
||
+ func_code = self._fixup_n_function_code(*jsi.extract_function_code(func_name), jscode, player_url)
|
||
+
|
||
+ return jsi, player_id, func_code
|
||
+
|
||
+ def _extract_n_function_from_code(self, jsi, func_code):
|
||
+ func = jsi.extract_function_from_code(*func_code)
|
||
+
|
||
+ def extract_nsig(s):
|
||
+ try:
|
||
+ ret = func([s])
|
||
+ except JSInterpreter.Exception:
|
||
+ raise
|
||
+ except Exception as e:
|
||
+ raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
|
||
+
|
||
+ return ret
|
||
+
|
||
+ return extract_nsig
|
||
+ # endregion nsig
|
||
+
|
||
+ def _interpret_player_js_global_var(self, jscode, player_url):
|
||
+ """Returns tuple of: variable name string, variable value list"""
|
||
+ extract_global_var = self.ie._cached(self.ie._search_regex, 'jsc global array', player_url)
|
||
+ varcode, varname, varvalue = extract_global_var(
|
||
+ r'''(?x)
|
||
+ (?P<q1>["\'])use\s+strict(?P=q1);\s*
|
||
+ (?P<code>
|
||
+ var\s+(?P<name>[a-zA-Z0-9_$]+)\s*=\s*
|
||
+ (?P<value>
|
||
+ (?P<q2>["\'])(?:(?!(?P=q2)).|\\.)+(?P=q2)
|
||
+ \.split\((?P<q3>["\'])(?:(?!(?P=q3)).)+(?P=q3)\)
|
||
+ |\[\s*(?:(?P<q4>["\'])(?:(?!(?P=q4)).|\\.)*(?P=q4)\s*,?\s*)+\]
|
||
+ )
|
||
+ )[;,]
|
||
+ ''', jscode, 'global variable', group=('code', 'name', 'value'), default=(None, None, None))
|
||
+ if not varcode:
|
||
+ self.logger.debug(join_nonempty(
|
||
+ 'No global array variable found in player JS',
|
||
+ player_url and f' player = {player_url}', delim='\n'), once=True)
|
||
+ return None, None
|
||
+
|
||
+ jsi = JSInterpreter(varcode)
|
||
+ interpret_global_var = self.ie._cached(jsi.interpret_expression, 'jsc global list', player_url)
|
||
+ return varname, interpret_global_var(varvalue, LocalNameSpace(), allow_recursion=10)
|
||
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/node.py
|
||
===================================================================
|
||
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
||
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/node.py 2025-09-25 21:17:17.684750310 +0200
|
||
@@ -0,0 +1,47 @@
|
||
+from __future__ import annotations
|
||
+
|
||
+import shlex
|
||
+import subprocess
|
||
+
|
||
+from yt_dlp.extractor.youtube.jsc._builtin.runtime import JsRuntimeChalBaseJCP
|
||
+from yt_dlp.extractor.youtube.jsc.provider import (
|
||
+ JsChallengeProvider,
|
||
+ JsChallengeProviderError,
|
||
+ JsChallengeRequest,
|
||
+ register_preference,
|
||
+ register_provider,
|
||
+)
|
||
+from yt_dlp.extractor.youtube.pot._provider import BuiltinIEContentProvider
|
||
+from yt_dlp.utils import Popen
|
||
+
|
||
+
|
||
+@register_provider
|
||
+class NodeJCP(JsRuntimeChalBaseJCP, BuiltinIEContentProvider):
|
||
+ PROVIDER_NAME = 'node'
|
||
+ JS_RUNTIME_NAME = 'node'
|
||
+
|
||
+ _ARGS = ['--permission', '-']
|
||
+
|
||
+ def _run_js_runtime(self, stdin: str, /) -> str:
|
||
+ cmd = [self.runtime_info.path, *self._ARGS]
|
||
+ self.logger.debug(f'Running node: {shlex.join(cmd)}')
|
||
+ with Popen(
|
||
+ cmd,
|
||
+ text=True,
|
||
+ stdin=subprocess.PIPE,
|
||
+ stdout=subprocess.PIPE,
|
||
+ stderr=subprocess.PIPE,
|
||
+ ) as proc:
|
||
+ stdout, stderr = proc.communicate_or_kill(stdin)
|
||
+ if proc.returncode or stderr:
|
||
+ msg = 'Error running node process'
|
||
+ if stderr:
|
||
+ msg = f'{msg}: {stderr}'
|
||
+ raise JsChallengeProviderError(msg)
|
||
+
|
||
+ return stdout
|
||
+
|
||
+
|
||
+@register_preference(NodeJCP)
|
||
+def preference(provider: JsChallengeProvider, requests: list[JsChallengeRequest]) -> int:
|
||
+ return 900
|
||
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/runtime.py
|
||
===================================================================
|
||
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
||
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/runtime.py 2025-09-25 21:17:17.684970592 +0200
|
||
@@ -0,0 +1,283 @@
|
||
+from __future__ import annotations
|
||
+
|
||
+import collections
|
||
+import dataclasses
|
||
+import enum
|
||
+import functools
|
||
+import hashlib
|
||
+import importlib.resources
|
||
+import json
|
||
+import sys
|
||
+
|
||
+import yt_dlp
|
||
+from yt_dlp.extractor.youtube.jsc._builtin.scripts import load_script
|
||
+from yt_dlp.extractor.youtube.jsc.provider import (
|
||
+ JsChallengeProvider,
|
||
+ JsChallengeProviderError,
|
||
+ JsChallengeProviderRejectedRequest,
|
||
+ JsChallengeProviderResponse,
|
||
+ JsChallengeResponse,
|
||
+ JsChallengeType,
|
||
+ NChallengeOutput,
|
||
+ SigChallengeOutput,
|
||
+)
|
||
+from yt_dlp.extractor.youtube.pot.provider import provider_bug_report_message
|
||
+from yt_dlp.utils._jsruntime import JsRuntimeInfo
|
||
+
|
||
+TYPE_CHECKING = False
|
||
+if TYPE_CHECKING:
|
||
+ from collections.abc import Generator
|
||
+
|
||
+ from yt_dlp.extractor.youtube.jsc.provider import JsChallengeRequest
|
||
+
|
||
+
|
||
+class ScriptType(enum.Enum):
|
||
+ LIB = 'lib'
|
||
+ CORE = 'core'
|
||
+
|
||
+
|
||
+class ScriptVariant(enum.Enum):
|
||
+ UNKNOWN = 'unknown'
|
||
+ MINIFIED = 'minified'
|
||
+ UNMINIFIED = 'unminified'
|
||
+ DENO_NPM = 'deno_npm'
|
||
+ BUN_NPM = 'bun_npm'
|
||
+
|
||
+
|
||
+class ScriptSource(enum.Enum):
|
||
+ PYPACKAGE = 'python package'
|
||
+ BINARY = 'binary'
|
||
+ CACHE = 'cache'
|
||
+ WEB = 'web'
|
||
+ BUILTIN = 'builtin'
|
||
+
|
||
+
|
||
+@dataclasses.dataclass
|
||
+class Script:
|
||
+ type: ScriptType
|
||
+ variant: ScriptVariant
|
||
+ source: ScriptSource
|
||
+ version: str
|
||
+ code: str
|
||
+
|
||
+ @functools.cached_property
|
||
+ def hash(self, /) -> str:
|
||
+ return hashlib.sha3_512(self.code.encode()).hexdigest()
|
||
+
|
||
+ def __str__(self, /):
|
||
+ return f'<Script {self.type.value!r} v{self.version} (source: {self.source.value}) variant={self.variant.value!r} size={len(self.code)} hash={self.hash[:7]}...>'
|
||
+
|
||
+
|
||
+class JsRuntimeChalBaseJCP(JsChallengeProvider):
|
||
+ JS_RUNTIME_NAME: str
|
||
+ _CACHE_SECTION = 'challenge-solver'
|
||
+
|
||
+ _JCP_GUIDE_URL = 'https://github.com/yt-dlp/yt-dlp/wiki/YouTube-JS-Challenges'
|
||
+ _REPOSITORY = 'yt-dlp/yt-dlp-jsc-deno'
|
||
+ _SUPPORTED_TYPES = [JsChallengeType.N, JsChallengeType.SIG]
|
||
+ _SUPPORTED_VERSION = '0.0.1'
|
||
+ # TODO: insert correct hashes here
|
||
+ # TODO: Integration tests for each kind of scripts source
|
||
+ _ALLOWED_HASHES = {
|
||
+ ScriptType.LIB: {
|
||
+ ScriptVariant.MINIFIED: '488c1903d8beb24ee9788400b2a91e724751b04988ba4de398320de0e36b4a9e3a8db58849189bf1d48df3fc4b0972d96b4aabfd80fea25d7c43988b437062fd',
|
||
+ ScriptVariant.DENO_NPM: 'cbd33afbfa778e436aef774f3983f0b1234ad7f737ea9dbd9783ee26dce195f4b3242d1e202b2038e748044960bc2f976372e883c76157b24acdea939dba7603',
|
||
+ ScriptVariant.BUN_NPM: '2065c7584b39d4e3fe62f147ff0572c051629a00b1bdb3dbd21d61db172a42ad0fac210e923e080a58ca21d1cbf7c6a22a727a726654bae83af045e12958a5a0',
|
||
+ },
|
||
+ ScriptType.CORE: {
|
||
+ ScriptVariant.MINIFIED: 'df0c08c152911dedd35a98bbbb6a1786718c11e4233c52abda3d19fd11d97c3ba09745dfbca913ddeed72fead18819f62139220420c41a04d5a66ed629fbde4e',
|
||
+ ScriptVariant.UNMINIFIED: '8abfd4818573b6cf397cfae227661e3449fb5ac737a272ac0cf8268d94447b04b1c9a15f459b336175bf0605678a376e962df99b2c8d5498f16db801735f771c',
|
||
+ },
|
||
+ }
|
||
+
|
||
+ _SCRIPT_FILENAMES = {
|
||
+ ScriptType.LIB: 'lib.js',
|
||
+ ScriptType.CORE: 'core.js',
|
||
+ }
|
||
+
|
||
+ _MIN_SCRIPT_FILENAMES = {
|
||
+ ScriptType.LIB: 'lib.min.js',
|
||
+ ScriptType.CORE: 'core.min.js',
|
||
+ }
|
||
+
|
||
+ def __init__(self, *args, **kwargs):
|
||
+ super().__init__(*args, **kwargs)
|
||
+ self._available = True
|
||
+ # Note: developer use only, intentionally not documented.
|
||
+ # This bypasses verification of script hashes and versions.
|
||
+ # --extractor-args youtubejsc-{provider key}:dev=true
|
||
+ self.is_dev = self.settings.get('dev', []) == ['true']
|
||
+ if self.is_dev:
|
||
+ self.logger.warning(
|
||
+ f'You have enabled dev mode for {self.PROVIDER_KEY}JCP. '
|
||
+ f'This is a developer option intended for debugging. \n'
|
||
+ ' If you experience any issues while using this option, '
|
||
+ f'{self.ie._downloader._format_err("DO NOT", self.ie._downloader.Styles.ERROR)} open a bug report')
|
||
+
|
||
+ def _run_js_runtime(self, stdin: str, /) -> str:
|
||
+ """To be implemented by subclasses"""
|
||
+ raise NotImplementedError
|
||
+
|
||
+ def _real_bulk_solve(self, /, requests: list[JsChallengeRequest]):
|
||
+ grouped: dict[str, list[JsChallengeRequest]] = collections.defaultdict(list)
|
||
+ for request in requests:
|
||
+ grouped[request.input.player_url].append(request)
|
||
+
|
||
+ for player_url, grouped_requests in grouped.items():
|
||
+ player = self.ie.cache.load(self._CACHE_SECTION, f'player:{player_url}')
|
||
+ if player:
|
||
+ cached = True
|
||
+ else:
|
||
+ cached = False
|
||
+ video_id = next((request.video_id for request in grouped_requests), None)
|
||
+ player = self._get_player(video_id, player_url)
|
||
+
|
||
+ stdin = self._construct_stdin(player, cached, grouped_requests)
|
||
+ stdout = self._run_js_runtime(stdin)
|
||
+ output = json.loads(stdout)
|
||
+ if output['type'] == 'error':
|
||
+ raise JsChallengeProviderError(output['error'])
|
||
+
|
||
+ if preprocessed := output.get('preprocessed_player'):
|
||
+ self.ie.cache.store(self._CACHE_SECTION, f'player:{player_url}', preprocessed)
|
||
+
|
||
+ for request, response_data in zip(grouped_requests, output['responses']):
|
||
+ if response_data['type'] == 'error':
|
||
+ yield JsChallengeProviderResponse(request, None, response_data['error'])
|
||
+ else:
|
||
+ yield JsChallengeProviderResponse(request, JsChallengeResponse(request.type, (
|
||
+ NChallengeOutput(response_data['data']) if request.type is JsChallengeType.N
|
||
+ else SigChallengeOutput(response_data['data']))))
|
||
+
|
||
+ def _construct_stdin(self, player: str, preprocessed: bool, requests: list[JsChallengeRequest], /) -> str:
|
||
+ json_requests = [{
|
||
+ # TODO: i despise nsig name
|
||
+ 'type': 'nsig' if request.type.value == 'n' else request.type.value,
|
||
+ 'challenges': request.input.challenges,
|
||
+ } for request in requests]
|
||
+ data = {
|
||
+ 'type': 'preprocessed',
|
||
+ 'preprocessed_player': player,
|
||
+ 'requests': json_requests,
|
||
+ } if preprocessed else {
|
||
+ 'type': 'player',
|
||
+ 'player': player,
|
||
+ 'requests': json_requests,
|
||
+ 'output_preprocessed': True,
|
||
+ }
|
||
+ return f'''\
|
||
+ {self._lib_script.code}
|
||
+ const {{ astring, meriyah }} = lib;
|
||
+ {self._core_script.code}
|
||
+ console.log(JSON.stringify(jsc({json.dumps(data)})));
|
||
+ '''
|
||
+
|
||
+ # region: challenge solver script
|
||
+
|
||
+ @functools.cached_property
|
||
+ def _lib_script(self, /):
|
||
+ return self._get_script(ScriptType.LIB)
|
||
+
|
||
+ @functools.cached_property
|
||
+ def _core_script(self, /):
|
||
+ return self._get_script(ScriptType.CORE)
|
||
+
|
||
+ def _get_script(self, script_type: ScriptType, /) -> Script:
|
||
+ for _, from_source in self._iter_script_sources():
|
||
+ script = from_source(script_type)
|
||
+ if not script:
|
||
+ continue
|
||
+ if script.version != self._SUPPORTED_VERSION and not self.is_dev:
|
||
+ self.logger.warning(
|
||
+ f'Challenge solver {script_type.value} script version {script.version} '
|
||
+ f'is not supported (source: {script.source.value}, supported version: {self._SUPPORTED_VERSION})')
|
||
+ script_hashes = self._ALLOWED_HASHES[script.type].get(script.variant, [])
|
||
+ if script_hashes and script.hash not in script_hashes and not self.is_dev:
|
||
+ self.logger.warning(
|
||
+ f'Hash mismatch on challenge solver {script.type.value} script '
|
||
+ f'(source: {script.source.value}, hash: {script.hash})!{provider_bug_report_message(self)}')
|
||
+ else:
|
||
+ self.logger.debug(f'Using challenge solver {script.type.value} script v{script.version} (source: {script.source.value}, variant: {script.variant.value})')
|
||
+ return script
|
||
+
|
||
+ self._available = False
|
||
+ raise JsChallengeProviderRejectedRequest(f'No usable challenge solver {script_type.value} script available')
|
||
+
|
||
+ def _iter_script_sources(self) -> Generator[tuple[ScriptSource, callable]]:
|
||
+ yield from [
|
||
+ (ScriptSource.PYPACKAGE, self._pypackage_source),
|
||
+ (ScriptSource.BINARY, self._binary_source),
|
||
+ (ScriptSource.CACHE, self._cached_source),
|
||
+ (ScriptSource.BUILTIN, self._builtin_source),
|
||
+ (ScriptSource.WEB, self._web_release_source)]
|
||
+
|
||
+ def _pypackage_source(self, script_type: ScriptType, /) -> Script | None:
|
||
+ try:
|
||
+ import yt_dlp_jsc as yt_dlp_ejs
|
||
+ except ImportError as e:
|
||
+ self.logger.trace(f'yt_dlp_ejs python package unavailable, reason: {e}')
|
||
+ return None
|
||
+ # TODO: fix API naming
|
||
+ code = yt_dlp_ejs.jsc() if script_type is ScriptType.CORE else yt_dlp_ejs.lib()
|
||
+ return Script(script_type, ScriptVariant.MINIFIED, ScriptSource.PYPACKAGE, yt_dlp_ejs.version, code)
|
||
+
|
||
+ def _binary_source(self, script_type: ScriptType, /) -> Script | None:
|
||
+ if (
|
||
+ getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS')
|
||
+ and importlib.resources.is_resource(yt_dlp, self._MIN_SCRIPT_FILENAMES[script_type])
|
||
+ ):
|
||
+ code = importlib.resources.read_text(yt_dlp, self._MIN_SCRIPT_FILENAMES[script_type])
|
||
+ return Script(script_type, ScriptVariant.MINIFIED, ScriptSource.BINARY, self._SUPPORTED_VERSION, code)
|
||
+ return None
|
||
+
|
||
+ def _cached_source(self, script_type: ScriptType, /) -> Script | None:
|
||
+ if data := self.ie.cache.load(self._CACHE_SECTION, script_type.value):
|
||
+ return Script(script_type, ScriptVariant.MINIFIED, ScriptSource.CACHE, data['version'], data['code'])
|
||
+ return None
|
||
+
|
||
+ def _builtin_source(self, script_type: ScriptType, /) -> Script | None:
|
||
+ error_hook = lambda _: self.logger.warning(
|
||
+ f'Failed to read builtin challenge solver {script_type.value} script{provider_bug_report_message(self)}')
|
||
+ code = load_script(
|
||
+ self._SCRIPT_FILENAMES[script_type], error_hook=error_hook)
|
||
+ if code:
|
||
+ # TODO: strip internal header comments as to match published version
|
||
+ return Script(script_type, ScriptVariant.UNMINIFIED, ScriptSource.BUILTIN, self._SUPPORTED_VERSION, code)
|
||
+ return None
|
||
+
|
||
+ def _web_release_source(self, script_type: ScriptType, /) -> Script | None:
|
||
+ if 'ejs-github' not in self.ie.get_param('download_ext_components', []):
|
||
+ self._report_ext_component_skipped('ejs-github', 'challenge solver script')
|
||
+ return None
|
||
+ url = f'https://github.com/{self._REPOSITORY}/releases/download/{self._SUPPORTED_VERSION}/{self._MIN_SCRIPT_FILENAMES[script_type]}'
|
||
+ if code := self.ie._download_webpage_with_retries(
|
||
+ url, None, f'[{self.logger.prefix}] Downloading challenge solver {script_type.value} script from {url}',
|
||
+ f'[{self.logger.prefix}] Failed to download challenge solver {script_type.value} script', fatal=False,
|
||
+ ):
|
||
+ self.ie.cache.store(self._CACHE_SECTION, script_type.value, {
|
||
+ 'version': self._SUPPORTED_VERSION,
|
||
+ 'code': code,
|
||
+ })
|
||
+ return Script(script_type, ScriptVariant.MINIFIED, ScriptSource.WEB, self._SUPPORTED_VERSION, code)
|
||
+ return None
|
||
+
|
||
+ # endregion: challenge solver script
|
||
+
|
||
+ @property
|
||
+ def runtime_info(self) -> JsRuntimeInfo | bool:
|
||
+ runtime = self.ie._downloader._js_runtimes.get(self.JS_RUNTIME_NAME)
|
||
+ if not runtime or not runtime.info or not runtime.info.supported:
|
||
+ return False
|
||
+ return runtime.info
|
||
+
|
||
+ def is_available(self, /) -> bool:
|
||
+ if not self.runtime_info:
|
||
+ return False
|
||
+ return self._available
|
||
+
|
||
+ def _report_ext_component_skipped(self, component: str, component_description: str):
|
||
+ self.logger.warning(
|
||
+ f'External {component_description} downloads are disabled. '
|
||
+ f'This may be required to solve JS challenges using {self.JS_RUNTIME_NAME} JS runtime. '
|
||
+ f'You can enable {component_description} downloads with "--download-ext-components {component}". '
|
||
+ f'For more information and alternatives, refer to {self._JCP_GUIDE_URL}')
|
||
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/scripts/__init__.py
|
||
===================================================================
|
||
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
||
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/scripts/__init__.py 2025-09-25 21:17:17.685386781 +0200
|
||
@@ -0,0 +1,12 @@
|
||
+import importlib.resources
|
||
+
|
||
+
|
||
+def load_script(filename, error_hook=None):
|
||
+ if importlib.resources.is_resource(__package__, filename):
|
||
+ try:
|
||
+ return importlib.resources.read_text(__package__, filename)
|
||
+ except (OSError, FileNotFoundError, ModuleNotFoundError) as e:
|
||
+ if error_hook:
|
||
+ error_hook(e)
|
||
+ return None
|
||
+ return None
|
||
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/scripts/bun.lib.js
|
||
===================================================================
|
||
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
||
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/scripts/bun.lib.js 2025-09-25 21:17:17.685553703 +0200
|
||
@@ -0,0 +1,3 @@
|
||
+// TODO: Generate this file automatically from bundle repo
|
||
+const [m, a] = await Promise.all([ import("meriyah@6.1.4"), import("astring@1.9.0") ]);
|
||
+export const lib = { meriyah: m, astring: a };
|
||
\ No newline at end of file
|
||
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/scripts/core.js
|
||
===================================================================
|
||
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
||
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/scripts/core.js 2025-09-25 21:17:17.685656860 +0200
|
||
@@ -0,0 +1,501 @@
|
||
+// This file is auto-generated from https://github.com/Grub4K/yt-dlp-jsc-deno
|
||
+// Do not edit, changes will be overwritten.
|
||
+// TODO: make this automatically updated
|
||
+var jsc = (function (meriyah, astring) {
|
||
+ 'use strict';
|
||
+
|
||
+ function matchesStructure(
|
||
+ obj,
|
||
+ structure,
|
||
+ ) {
|
||
+ if (Array.isArray(structure)) {
|
||
+ if (!Array.isArray(obj)) {
|
||
+ return false;
|
||
+ }
|
||
+ return (
|
||
+ structure.length === obj.length &&
|
||
+ structure.every((value, index) => matchesStructure(obj[index], value))
|
||
+ );
|
||
+ }
|
||
+ if (typeof structure === "object") {
|
||
+ if (!obj) {
|
||
+ return !structure;
|
||
+ }
|
||
+ if ("or" in structure) {
|
||
+ // Handle `{ or: [a, b] }`
|
||
+ return structure.or.some((node) => matchesStructure(obj, node));
|
||
+ }
|
||
+ for (const [key, value] of Object.entries(structure)) {
|
||
+ if (!matchesStructure(obj[key ], value)) {
|
||
+ return false;
|
||
+ }
|
||
+ }
|
||
+ return true;
|
||
+ }
|
||
+ return structure === obj;
|
||
+ }
|
||
+
|
||
+ function isOneOf(value, ...of) {
|
||
+ return of.includes(value );
|
||
+ }
|
||
+
|
||
+ function _optionalChain$2(ops) { let lastAccessLHS = undefined; let value = ops[0]; let i = 1; while (i < ops.length) { const op = ops[i]; const fn = ops[i + 1]; i += 2; if ((op === 'optionalAccess' || op === 'optionalCall') && value == null) { return undefined; } if (op === 'access' || op === 'optionalAccess') { lastAccessLHS = value; value = fn(value); } else if (op === 'call' || op === 'optionalCall') { value = fn((...args) => value.call(lastAccessLHS, ...args)); lastAccessLHS = undefined; } } return value; }
|
||
+
|
||
+
|
||
+ const logicalExpression = {
|
||
+ type: "ExpressionStatement",
|
||
+ expression: {
|
||
+ type: "LogicalExpression",
|
||
+ left: {
|
||
+ type: "Identifier",
|
||
+ },
|
||
+ right: {
|
||
+ type: "SequenceExpression",
|
||
+ expressions: [
|
||
+ {
|
||
+ type: "AssignmentExpression",
|
||
+ left: {
|
||
+ type: "Identifier",
|
||
+ },
|
||
+ operator: "=",
|
||
+ right: {
|
||
+ type: "CallExpression",
|
||
+ callee: {
|
||
+ type: "Identifier",
|
||
+ },
|
||
+ arguments: {
|
||
+ or: [
|
||
+ [
|
||
+ { type: "Literal" },
|
||
+ {
|
||
+ type: "CallExpression",
|
||
+ callee: {
|
||
+ type: "Identifier",
|
||
+ name: "decodeURIComponent",
|
||
+ },
|
||
+ arguments: [{ type: "Identifier" }],
|
||
+ optional: false,
|
||
+ },
|
||
+ ],
|
||
+ [
|
||
+ {
|
||
+ type: "CallExpression",
|
||
+ callee: {
|
||
+ type: "Identifier",
|
||
+ name: "decodeURIComponent",
|
||
+ },
|
||
+ arguments: [{ type: "Identifier" }],
|
||
+ optional: false,
|
||
+ },
|
||
+ ],
|
||
+ ],
|
||
+ },
|
||
+ optional: false,
|
||
+ },
|
||
+ },
|
||
+ {
|
||
+ type: "CallExpression",
|
||
+ },
|
||
+ ],
|
||
+ },
|
||
+ operator: "&&",
|
||
+ },
|
||
+ };
|
||
+
|
||
+ const identifier$1 = {
|
||
+ or: [{
|
||
+ type: "ExpressionStatement",
|
||
+ expression: {
|
||
+ type: "AssignmentExpression",
|
||
+ operator: "=",
|
||
+ left: {
|
||
+ type: "Identifier",
|
||
+ },
|
||
+ right: {
|
||
+ type: "FunctionExpression",
|
||
+ params: [{}, {}, {}],
|
||
+ },
|
||
+ },
|
||
+ }, {
|
||
+ type: "FunctionDeclaration",
|
||
+ params: [{}, {}, {}],
|
||
+ }],
|
||
+ } ;
|
||
+
|
||
+ function extract$1(
|
||
+ node,
|
||
+ ) {
|
||
+ if (
|
||
+ !matchesStructure(node, identifier$1 )
|
||
+ ) {
|
||
+ return null;
|
||
+ }
|
||
+ const block = (node.type === "ExpressionStatement" &&
|
||
+ node.expression.type === "AssignmentExpression" &&
|
||
+ node.expression.right.type === "FunctionExpression")
|
||
+ ? node.expression.right.body
|
||
+ : node.type === "FunctionDeclaration"
|
||
+ ? node.body
|
||
+ : null;
|
||
+ const relevantExpression = _optionalChain$2([block, 'optionalAccess', _ => _.body, 'access', _2 => _2.at, 'call', _3 => _3(-2)]);
|
||
+ if (!matchesStructure(relevantExpression, logicalExpression)) {
|
||
+ return null;
|
||
+ }
|
||
+ if (
|
||
+ _optionalChain$2([relevantExpression, 'optionalAccess', _4 => _4.type]) !== "ExpressionStatement" ||
|
||
+ relevantExpression.expression.type !==
|
||
+ "LogicalExpression" ||
|
||
+ relevantExpression.expression.right.type !==
|
||
+ "SequenceExpression" ||
|
||
+ relevantExpression.expression.right.expressions[0].type !==
|
||
+ "AssignmentExpression"
|
||
+ ) {
|
||
+ return null;
|
||
+ }
|
||
+ const call = relevantExpression.expression.right.expressions[0].right;
|
||
+ if (call.type !== "CallExpression" || call.callee.type !== "Identifier") {
|
||
+ return null;
|
||
+ }
|
||
+ // TODO: verify identifiers here
|
||
+ return {
|
||
+ type: "ArrowFunctionExpression",
|
||
+ params: [
|
||
+ {
|
||
+ type: "Identifier",
|
||
+ name: "sig",
|
||
+ },
|
||
+ ],
|
||
+ body: {
|
||
+ type: "CallExpression",
|
||
+ callee: {
|
||
+ type: "Identifier",
|
||
+ name: call.callee.name,
|
||
+ },
|
||
+ arguments: call.arguments.length === 1
|
||
+ ? [
|
||
+ {
|
||
+ type: "Identifier",
|
||
+ name: "sig",
|
||
+ },
|
||
+ ]
|
||
+ : [
|
||
+ call.arguments[0],
|
||
+ {
|
||
+ type: "Identifier",
|
||
+ name: "sig",
|
||
+ },
|
||
+ ],
|
||
+ optional: false,
|
||
+ },
|
||
+ async: false,
|
||
+ expression: false,
|
||
+ generator: false,
|
||
+ };
|
||
+ }
|
||
+
|
||
+ function _optionalChain$1(ops) { let lastAccessLHS = undefined; let value = ops[0]; let i = 1; while (i < ops.length) { const op = ops[i]; const fn = ops[i + 1]; i += 2; if ((op === 'optionalAccess' || op === 'optionalCall') && value == null) { return undefined; } if (op === 'access' || op === 'optionalAccess') { lastAccessLHS = value; value = fn(value); } else if (op === 'call' || op === 'optionalCall') { value = fn((...args) => value.call(lastAccessLHS, ...args)); lastAccessLHS = undefined; } } return value; }
|
||
+
|
||
+
|
||
+ const identifier = {
|
||
+ type: "VariableDeclaration",
|
||
+ kind: "var",
|
||
+ declarations: [
|
||
+ {
|
||
+ type: "VariableDeclarator",
|
||
+ id: {
|
||
+ type: "Identifier",
|
||
+ },
|
||
+ init: {
|
||
+ type: "ArrayExpression",
|
||
+ elements: [
|
||
+ {
|
||
+ type: "Identifier",
|
||
+ },
|
||
+ ],
|
||
+ },
|
||
+ },
|
||
+ ],
|
||
+ };
|
||
+
|
||
+ const catchBlockBody = [
|
||
+ {
|
||
+ type: "ReturnStatement",
|
||
+ argument: {
|
||
+ type: "BinaryExpression",
|
||
+ left: {
|
||
+ type: "MemberExpression",
|
||
+ object: {
|
||
+ type: "Identifier",
|
||
+ },
|
||
+ computed: true,
|
||
+ property: {
|
||
+ type: "Literal",
|
||
+ },
|
||
+ optional: false,
|
||
+ },
|
||
+ right: {
|
||
+ type: "Identifier",
|
||
+ },
|
||
+ operator: "+",
|
||
+ },
|
||
+ },
|
||
+ ] ;
|
||
+
|
||
+ function extract(
|
||
+ node,
|
||
+ ) {
|
||
+ if (!matchesStructure(node, identifier)) {
|
||
+ // Fallback search for try { } catch { return X[12] + Y }
|
||
+ let name = null;
|
||
+ let block = null;
|
||
+ switch (node.type) {
|
||
+ case "ExpressionStatement": {
|
||
+ if (
|
||
+ node.expression.type === "AssignmentExpression" &&
|
||
+ node.expression.left.type === "Identifier" &&
|
||
+ node.expression.right.type === "FunctionExpression" &&
|
||
+ node.expression.right.params.length === 1
|
||
+ ) {
|
||
+ name = node.expression.left.name;
|
||
+ block = node.expression.right.body;
|
||
+ }
|
||
+ break;
|
||
+ }
|
||
+ case "FunctionDeclaration": {
|
||
+ if (node.params.length === 1) {
|
||
+ name = _optionalChain$1([node, 'access', _ => _.id, 'optionalAccess', _2 => _2.name]);
|
||
+ block = node.body;
|
||
+ }
|
||
+ break;
|
||
+ }
|
||
+ }
|
||
+ if (!block || !name) {
|
||
+ return null;
|
||
+ }
|
||
+ const tryNode = block.body.at(-2);
|
||
+ if (
|
||
+ _optionalChain$1([tryNode, 'optionalAccess', _3 => _3.type]) !== "TryStatement" ||
|
||
+ _optionalChain$1([tryNode, 'access', _4 => _4.handler, 'optionalAccess', _5 => _5.type]) !== "CatchClause"
|
||
+ ) {
|
||
+ return null;
|
||
+ }
|
||
+ const catchBody = tryNode.handler.body.body;
|
||
+ if (matchesStructure(catchBody, catchBlockBody)) {
|
||
+ return makeSolverFuncFromName(name);
|
||
+ }
|
||
+ return null;
|
||
+ }
|
||
+
|
||
+ if (node.type !== "VariableDeclaration") {
|
||
+ return null;
|
||
+ }
|
||
+ const declaration = node.declarations[0];
|
||
+ if (
|
||
+ declaration.type !== "VariableDeclarator" || !declaration.init ||
|
||
+ declaration.init.type !== "ArrayExpression" ||
|
||
+ declaration.init.elements.length !== 1
|
||
+ ) {
|
||
+ return null;
|
||
+ }
|
||
+ const [firstElement] = declaration.init.elements;
|
||
+ if (!firstElement || firstElement.type !== "Identifier") {
|
||
+ return null;
|
||
+ }
|
||
+ return makeSolverFuncFromName(firstElement.name);
|
||
+ }
|
||
+
|
||
+ function makeSolverFuncFromName(name) {
|
||
+ return {
|
||
+ type: "ArrowFunctionExpression",
|
||
+ params: [
|
||
+ {
|
||
+ type: "Identifier",
|
||
+ name: "nsig",
|
||
+ },
|
||
+ ],
|
||
+ body: {
|
||
+ type: "CallExpression",
|
||
+ callee: {
|
||
+ type: "Identifier",
|
||
+ name: name,
|
||
+ },
|
||
+ arguments: [
|
||
+ {
|
||
+ type: "Identifier",
|
||
+ name: "nsig",
|
||
+ },
|
||
+ ],
|
||
+ optional: false,
|
||
+ },
|
||
+ async: false,
|
||
+ expression: false,
|
||
+ generator: false,
|
||
+ };
|
||
+ }
|
||
+
|
||
+ const setupNodes = meriyah.parse(`
|
||
+globalThis.XMLHttpRequest = { prototype: {} };
|
||
+const window = Object.assign(Object.create(null), globalThis);
|
||
+window.location = new URL("https://www.youtube.com/watch?v=yt-dlp-wins");
|
||
+const document = {};
|
||
+let self = globalThis;
|
||
+`).body;
|
||
+
|
||
+ function _optionalChain(ops) { let lastAccessLHS = undefined; let value = ops[0]; let i = 1; while (i < ops.length) { const op = ops[i]; const fn = ops[i + 1]; i += 2; if ((op === 'optionalAccess' || op === 'optionalCall') && value == null) { return undefined; } if (op === 'access' || op === 'optionalAccess') { lastAccessLHS = value; value = fn(value); } else if (op === 'call' || op === 'optionalCall') { value = fn((...args) => value.call(lastAccessLHS, ...args)); lastAccessLHS = undefined; } } return value; }
|
||
+ function preprocessPlayer(data) {
|
||
+ const ast = meriyah.parse(data);
|
||
+ const body = ast.body;
|
||
+
|
||
+ const block = (() => {
|
||
+ switch (body.length) {
|
||
+ case 1: {
|
||
+ const func = body[0];
|
||
+ if (
|
||
+ _optionalChain([func, 'optionalAccess', _ => _.type]) === "ExpressionStatement" &&
|
||
+ func.expression.type === "CallExpression" &&
|
||
+ func.expression.callee.type === "MemberExpression" &&
|
||
+ func.expression.callee.object.type === "FunctionExpression"
|
||
+ ) {
|
||
+ return func.expression.callee.object.body;
|
||
+ }
|
||
+ break;
|
||
+ }
|
||
+ case 2: {
|
||
+ const func = body[1];
|
||
+ if (
|
||
+ _optionalChain([func, 'optionalAccess', _2 => _2.type]) === "ExpressionStatement" &&
|
||
+ func.expression.type === "CallExpression" &&
|
||
+ func.expression.callee.type === "FunctionExpression"
|
||
+ ) {
|
||
+ const block = func.expression.callee.body;
|
||
+ // Skip `var window = this;`
|
||
+ block.body.splice(0, 1);
|
||
+ return block;
|
||
+ }
|
||
+ break;
|
||
+ }
|
||
+ }
|
||
+ throw "unexpected structure";
|
||
+ })();
|
||
+
|
||
+ const found = {
|
||
+ nsig: [] ,
|
||
+ sig: [] ,
|
||
+ };
|
||
+ const plainExpressions = block.body.filter((node) => {
|
||
+ const nsig = extract(node);
|
||
+ if (nsig) {
|
||
+ found.nsig.push(nsig);
|
||
+ }
|
||
+ const sig = extract$1(node);
|
||
+ if (sig) {
|
||
+ found.sig.push(sig);
|
||
+ }
|
||
+ if (node.type === "ExpressionStatement") {
|
||
+ if (node.expression.type === "AssignmentExpression") {
|
||
+ return true;
|
||
+ }
|
||
+ return node.expression.type === "Literal";
|
||
+ }
|
||
+ return true;
|
||
+ });
|
||
+ block.body = plainExpressions;
|
||
+
|
||
+ for (const [name, options] of Object.entries(found)) {
|
||
+ // TODO: this is cringe fix plz
|
||
+ const unique = new Set(options.map((x) => JSON.stringify(x)));
|
||
+ if (unique.size !== 1) {
|
||
+ const message = `found ${unique.size} ${name} function possibilities`;
|
||
+ throw (
|
||
+ message +
|
||
+ (unique.size ? `: ${options.map((x) => astring.generate(x)).join(", ")}` : "")
|
||
+ );
|
||
+ }
|
||
+ plainExpressions.push({
|
||
+ type: "ExpressionStatement",
|
||
+ expression: {
|
||
+ type: "AssignmentExpression",
|
||
+ operator: "=",
|
||
+ left: {
|
||
+ type: "MemberExpression",
|
||
+ computed: false,
|
||
+ object: {
|
||
+ type: "Identifier",
|
||
+ name: "_result",
|
||
+ },
|
||
+ property: {
|
||
+ type: "Identifier",
|
||
+ name: name,
|
||
+ },
|
||
+ },
|
||
+ right: options[0],
|
||
+ },
|
||
+ });
|
||
+ }
|
||
+
|
||
+ ast.body.splice(0, 0, ...setupNodes);
|
||
+
|
||
+ return astring.generate(ast);
|
||
+ }
|
||
+
|
||
+ function getFromPrepared(code)
|
||
+
|
||
+
|
||
+ {
|
||
+ const resultObj = { nsig: null, sig: null };
|
||
+ Function("_result", code)(resultObj);
|
||
+ return resultObj;
|
||
+ }
|
||
+
|
||
+ function main(input) {
|
||
+ const preprocessedPlayer = input.type === "player"
|
||
+ ? preprocessPlayer(input.player)
|
||
+ : input.preprocessed_player;
|
||
+ const solvers = getFromPrepared(preprocessedPlayer);
|
||
+
|
||
+ const responses = input.requests.map(
|
||
+ (input) => {
|
||
+ if (!isOneOf(input.type, "nsig", "sig")) {
|
||
+ return {
|
||
+ type: "error",
|
||
+ error: `Unknown request type: ${input.type}`,
|
||
+ };
|
||
+ }
|
||
+ const solver = solvers[input.type];
|
||
+ if (!solver) {
|
||
+ return {
|
||
+ type: "error",
|
||
+ error: `Failed to extract ${input.type} function`,
|
||
+ };
|
||
+ }
|
||
+ try {
|
||
+ return {
|
||
+ type: "result",
|
||
+ data: Object.fromEntries(
|
||
+ input.challenges.map((challenge) => [challenge, solver(challenge)]),
|
||
+ ),
|
||
+ };
|
||
+ } catch (error) {
|
||
+ return {
|
||
+ type: "error",
|
||
+ error: error instanceof Error
|
||
+ ? `${error.message}\n${error.stack}`
|
||
+ : `${error}`,
|
||
+ };
|
||
+ }
|
||
+ },
|
||
+ );
|
||
+
|
||
+ const output = {
|
||
+ type: "result",
|
||
+ responses,
|
||
+ };
|
||
+ if (input.type === "player" && input.output_preprocessed) {
|
||
+ output.preprocessed_player = preprocessedPlayer;
|
||
+ }
|
||
+ return output;
|
||
+ }
|
||
+
|
||
+ return main;
|
||
+
|
||
+})(meriyah, astring);
|
||
\ No newline at end of file
|
||
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/scripts/deno.lib.js
|
||
===================================================================
|
||
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
||
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/scripts/deno.lib.js 2025-09-25 21:17:17.685849344 +0200
|
||
@@ -0,0 +1,3 @@
|
||
+// TODO: Generate this file automatically from bundle repo
|
||
+const [m, a] = await Promise.all([ import("npm:meriyah@6.1.4"), import("npm:astring@1.9.0") ]);
|
||
+export const lib = { meriyah: m, astring: a };
|
||
\ No newline at end of file
|
||
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/_director.py
|
||
===================================================================
|
||
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
||
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/_director.py 2025-09-25 21:17:17.686014381 +0200
|
||
@@ -0,0 +1,234 @@
|
||
+from __future__ import annotations
|
||
+
|
||
+import dataclasses
|
||
+import typing
|
||
+from collections.abc import Iterable
|
||
+
|
||
+from yt_dlp.extractor.youtube.jsc._registry import (
|
||
+ _jsc_preferences,
|
||
+ _jsc_providers,
|
||
+)
|
||
+from yt_dlp.extractor.youtube.jsc.provider import (
|
||
+ JsChallengeProvider,
|
||
+ JsChallengeProviderError,
|
||
+ JsChallengeProviderRejectedRequest,
|
||
+ JsChallengeProviderResponse,
|
||
+ JsChallengeRequest,
|
||
+ JsChallengeResponse,
|
||
+ JsChallengeType,
|
||
+ NChallengeInput,
|
||
+ NChallengeOutput,
|
||
+ SigChallengeInput,
|
||
+ SigChallengeOutput,
|
||
+)
|
||
+from yt_dlp.extractor.youtube.pot._director import YoutubeIEContentProviderLogger, provider_display_list
|
||
+from yt_dlp.extractor.youtube.pot._provider import (
|
||
+ IEContentProviderLogger,
|
||
+)
|
||
+from yt_dlp.extractor.youtube.pot.provider import (
|
||
+ provider_bug_report_message,
|
||
+)
|
||
+
|
||
+if typing.TYPE_CHECKING:
|
||
+ from yt_dlp.extractor.youtube.jsc.provider import Preference as JsChallengePreference
|
||
+
|
||
+
|
||
+class JsChallengeRequestDirector:
|
||
+
|
||
+ def __init__(self, logger: IEContentProviderLogger):
|
||
+ self.providers: dict[str, JsChallengeProvider] = {}
|
||
+ self.preferences: list[JsChallengePreference] = []
|
||
+ self.logger = logger
|
||
+
|
||
+ def register_provider(self, provider: JsChallengeProvider):
|
||
+ self.providers[provider.PROVIDER_KEY] = provider
|
||
+
|
||
+ def register_preference(self, preference: JsChallengePreference):
|
||
+ self.preferences.append(preference)
|
||
+
|
||
+ def _get_providers(self, requests: list[JsChallengeRequest]) -> Iterable[JsChallengeProvider]:
|
||
+ """Sorts available providers by preference, given a request"""
|
||
+ preferences = {
|
||
+ provider: sum(pref(provider, requests) for pref in self.preferences)
|
||
+ for provider in self.providers.values()
|
||
+ }
|
||
+ if self.logger.log_level <= self.logger.LogLevel.TRACE:
|
||
+ # calling is_available() for every JS Challenge provider upfront may have some overhead
|
||
+ self.logger.trace(f'JS Challenge Providers: {provider_display_list(self.providers.values())}')
|
||
+ self.logger.trace('JS Challenge Provider preferences for this request: {}'.format(', '.join(
|
||
+ f'{provider.PROVIDER_NAME}={pref}' for provider, pref in preferences.items())))
|
||
+
|
||
+ return (
|
||
+ provider for provider in sorted(
|
||
+ self.providers.values(), key=preferences.get, reverse=True)
|
||
+ if provider.is_available()
|
||
+ )
|
||
+
|
||
+ def _handle_error(self, e: Exception, provider: JsChallengeProvider, requests: list[JsChallengeRequest]):
|
||
+ if isinstance(e, JsChallengeProviderRejectedRequest):
|
||
+ self.logger.trace(
|
||
+ f'JS Challenge Provider "{provider.PROVIDER_NAME}" rejected '
|
||
+ f'{"this request" if len(requests) == 1 else f"{len(requests)} requests"}, '
|
||
+ f'trying next available provider. Reason: {e}',
|
||
+ )
|
||
+ elif isinstance(e, JsChallengeProviderError):
|
||
+ if len(requests) == 1:
|
||
+ self.logger.warning(
|
||
+ f'Error solving {requests[0].type.value} challenge request using "{provider.PROVIDER_NAME}" provider: {e}.\n'
|
||
+ f' input = {requests[0].input}\n'
|
||
+ f' {(provider_bug_report_message(provider, before="") if not e.expected else "")}')
|
||
+ else:
|
||
+ self.logger.warning(
|
||
+ f'Error solving {len(requests)} challenge requests using "{provider.PROVIDER_NAME}" provider: {e}.\n'
|
||
+ f' requests = {requests}\n'
|
||
+ f' {(provider_bug_report_message(provider, before="") if not e.expected else "")}')
|
||
+ else:
|
||
+ self.logger.error(
|
||
+ f'Unexpected error solving {len(requests)} challenge request(s) using "{provider.PROVIDER_NAME}" provider: {e!r}\n'
|
||
+ f' requests = {requests}\n'
|
||
+ f' {provider_bug_report_message(provider, before="")}', cause=e)
|
||
+
|
||
+ def bulk_solve(self, requests: list[JsChallengeRequest]) -> list[tuple[JsChallengeRequest, JsChallengeResponse]]:
|
||
+ """Solves multiple JS Challenges in bulk, returning a list of responses"""
|
||
+ if not self.providers:
|
||
+ self.logger.trace('No JS Challenge providers registered')
|
||
+ return []
|
||
+
|
||
+ results = []
|
||
+ next_requests = requests[:]
|
||
+
|
||
+ for provider in self._get_providers(next_requests):
|
||
+ if not next_requests:
|
||
+ break
|
||
+ self.logger.trace(
|
||
+ f'Attempting to solve {len(next_requests)} challenges using "{provider.PROVIDER_NAME}" provider')
|
||
+ try:
|
||
+ for response in provider.bulk_solve([dataclasses.replace(request) for request in next_requests]):
|
||
+ if not validate_provider_response(response):
|
||
+ self.logger.warning(
|
||
+ f'JS Challenge Provider "{provider.PROVIDER_NAME}" returned an invalid response:'
|
||
+ f' response = {response!r}\n'
|
||
+ f' {provider_bug_report_message(provider, before="")}')
|
||
+ continue
|
||
+ if response.error:
|
||
+ self._handle_error(response.error, provider, [response.request])
|
||
+ continue
|
||
+ if (vr_msg := validate_response(response.response, response.request)) is not True:
|
||
+ self.logger.warning(
|
||
+ f'Invalid JS Challenge response received from "{provider.PROVIDER_NAME}" provider: {vr_msg or ""}\n'
|
||
+ f' response = {response.response}\n'
|
||
+ f' request = {response.request}\n'
|
||
+ f' {provider_bug_report_message(provider, before="")}')
|
||
+ continue
|
||
+ try:
|
||
+ next_requests.remove(response.request)
|
||
+ except ValueError:
|
||
+ self.logger.warning(
|
||
+ f'JS Challenge Provider "{provider.PROVIDER_NAME}" returned a response for an unknown request:\n'
|
||
+ f' request = {response.request}\n'
|
||
+ f' {provider_bug_report_message(provider, before="")}')
|
||
+ continue
|
||
+ results.append((response.request, response.response))
|
||
+ except Exception as e:
|
||
+ self._handle_error(e, provider, next_requests)
|
||
+ continue
|
||
+
|
||
+ if len(results) != len(requests):
|
||
+ self.logger.trace(
|
||
+ f'Not all JS Challenges were solved, expected {len(requests)} responses, got {len(results)}')
|
||
+ self.logger.trace(f'Unsolved requests: {next_requests}')
|
||
+ else:
|
||
+ self.logger.trace(f'Solved all {len(requests)} requested JS Challenges')
|
||
+ return results
|
||
+
|
||
+ def close(self):
|
||
+ for provider in self.providers.values():
|
||
+ provider.close()
|
||
+
|
||
+
|
||
+EXTRACTOR_ARG_PREFIX = 'youtubejsc'
|
||
+
|
||
+
|
||
+def initialize_jsc_director(ie):
|
||
+ assert ie._downloader is not None, 'Downloader not set'
|
||
+
|
||
+ enable_trace = ie._configuration_arg(
|
||
+ 'jsc_trace', ['false'], ie_key='youtube', casesense=False)[0] == 'true'
|
||
+
|
||
+ if enable_trace:
|
||
+ log_level = IEContentProviderLogger.LogLevel.TRACE
|
||
+ elif ie.get_param('verbose', False):
|
||
+ log_level = IEContentProviderLogger.LogLevel.DEBUG
|
||
+ else:
|
||
+ log_level = IEContentProviderLogger.LogLevel.INFO
|
||
+
|
||
+ def get_provider_logger_and_settings(provider, logger_key):
|
||
+ logger_prefix = f'{logger_key}:{provider.PROVIDER_NAME}'
|
||
+ extractor_key = f'{EXTRACTOR_ARG_PREFIX}-{provider.PROVIDER_KEY.lower()}'
|
||
+ return (
|
||
+ YoutubeIEContentProviderLogger(ie, logger_prefix, log_level=log_level),
|
||
+ ie.get_param('extractor_args', {}).get(extractor_key, {}))
|
||
+
|
||
+ director = JsChallengeRequestDirector(
|
||
+ logger=YoutubeIEContentProviderLogger(ie, 'jsc', log_level=log_level),
|
||
+ )
|
||
+
|
||
+ ie._downloader.add_close_hook(director.close)
|
||
+
|
||
+ for provider in _jsc_providers.value.values():
|
||
+ logger, settings = get_provider_logger_and_settings(provider, 'jsc')
|
||
+ director.register_provider(provider(ie, logger, settings))
|
||
+
|
||
+ for preference in _jsc_preferences.value:
|
||
+ director.register_preference(preference)
|
||
+
|
||
+ if director.logger.log_level <= director.logger.LogLevel.DEBUG:
|
||
+ # calling is_available() for every JS Challenge provider upfront may have some overhead
|
||
+ director.logger.debug(f'JS Challenge Providers: {provider_display_list(director.providers.values())}')
|
||
+ director.logger.trace(f'Registered {len(director.preferences)} JS Challenge provider preferences')
|
||
+
|
||
+ return director
|
||
+
|
||
+
|
||
+def validate_provider_response(response: JsChallengeProviderResponse) -> bool:
|
||
+ return (
|
||
+ isinstance(response, JsChallengeProviderResponse)
|
||
+ and isinstance(response.request, JsChallengeRequest)
|
||
+ and (
|
||
+ isinstance(response.response, JsChallengeResponse)
|
||
+ or (response.error is not None and isinstance(response.error, Exception)))
|
||
+ )
|
||
+
|
||
+
|
||
+def validate_response(response: JsChallengeResponse, request: JsChallengeRequest) -> bool | str:
|
||
+ if not isinstance(response, JsChallengeResponse):
|
||
+ return 'Response is not a JsChallengeResponse'
|
||
+ if request.type == JsChallengeType.N:
|
||
+ return validate_nsig_challenge_output(response.output, request.input)
|
||
+ else:
|
||
+ return validate_sig_challenge_output(response.output, request.input)
|
||
+
|
||
+
|
||
+def validate_nsig_challenge_output(challenge_output: NChallengeOutput, challenge_input: NChallengeInput) -> bool | str:
|
||
+ if not (
|
||
+ isinstance(challenge_output, NChallengeOutput)
|
||
+ and len(challenge_output.results) == len(challenge_input.challenges)
|
||
+ and all(isinstance(k, str) and isinstance(v, str) for k, v in challenge_output.results.items())
|
||
+ and all(challenge in challenge_output.results for challenge in challenge_input.challenges)
|
||
+ ):
|
||
+ return 'Invalid NChallengeOutput'
|
||
+
|
||
+ # Validate nsig results are valid - if they end with the input challenge then the js function returned with an exception.
|
||
+ for challenge, result in challenge_output.results.items():
|
||
+ if result.endswith(challenge):
|
||
+ return f'nsig result is invalid for {challenge!r}: {result!r}'
|
||
+ return True
|
||
+
|
||
+
|
||
+def validate_sig_challenge_output(challenge_output: SigChallengeOutput, challenge_input: SigChallengeInput) -> bool:
|
||
+ return (
|
||
+ isinstance(challenge_output, SigChallengeOutput)
|
||
+ and len(challenge_output.results) == len(challenge_input.challenges)
|
||
+ and all(isinstance(k, str) and isinstance(v, str) for k, v in challenge_output.results.items())
|
||
+ and all(challenge in challenge_output.results for challenge in challenge_input.challenges)
|
||
+ ) or 'Invalid SigChallengeOutput'
|
||
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/_registry.py
|
||
===================================================================
|
||
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
||
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/_registry.py 2025-09-25 21:17:17.686129760 +0200
|
||
@@ -0,0 +1,4 @@
|
||
+from yt_dlp.globals import Indirect
|
||
+
|
||
+_jsc_providers = Indirect({})
|
||
+_jsc_preferences = Indirect(set())
|
||
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/provider.py
|
||
===================================================================
|
||
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
||
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/provider.py 2025-09-25 21:17:17.686241927 +0200
|
||
@@ -0,0 +1,157 @@
|
||
+"""PUBLIC API"""
|
||
+
|
||
+from __future__ import annotations
|
||
+
|
||
+import abc
|
||
+import dataclasses
|
||
+import enum
|
||
+import typing
|
||
+
|
||
+from yt_dlp.extractor.youtube.jsc._registry import _jsc_preferences, _jsc_providers
|
||
+from yt_dlp.extractor.youtube.pot._provider import (
|
||
+ IEContentProvider,
|
||
+ IEContentProviderError,
|
||
+ register_preference_generic,
|
||
+ register_provider_generic,
|
||
+)
|
||
+from yt_dlp.utils import ExtractorError
|
||
+
|
||
+__all__ = [
|
||
+ 'JsChallengeProvider',
|
||
+ 'JsChallengeProviderError',
|
||
+ 'JsChallengeProviderRejectedRequest',
|
||
+ 'JsChallengeProviderResponse',
|
||
+ 'JsChallengeRequest',
|
||
+ 'JsChallengeResponse',
|
||
+ 'JsChallengeType',
|
||
+ 'NChallengeInput',
|
||
+ 'NChallengeOutput',
|
||
+ 'SigChallengeInput',
|
||
+ 'SigChallengeOutput',
|
||
+ 'register_preference',
|
||
+ 'register_provider',
|
||
+]
|
||
+
|
||
+
|
||
+class JsChallengeType(enum.Enum):
|
||
+ N = 'n'
|
||
+ SIG = 'sig'
|
||
+
|
||
+
|
||
+@dataclasses.dataclass(frozen=True)
|
||
+class JsChallengeRequest:
|
||
+ type: JsChallengeType
|
||
+ input: NChallengeInput | SigChallengeInput
|
||
+ video_id: str | None = None
|
||
+
|
||
+
|
||
+@dataclasses.dataclass(frozen=True)
|
||
+class NChallengeInput:
|
||
+ player_url: str
|
||
+ challenges: list[str] = dataclasses.field(default_factory=list)
|
||
+
|
||
+
|
||
+@dataclasses.dataclass(frozen=True)
|
||
+class SigChallengeInput:
|
||
+ player_url: str
|
||
+ challenges: list[str] = dataclasses.field(default_factory=list)
|
||
+
|
||
+
|
||
+@dataclasses.dataclass(frozen=True)
|
||
+class NChallengeOutput:
|
||
+ results: dict[str, str] = dataclasses.field(default_factory=dict)
|
||
+
|
||
+
|
||
+@dataclasses.dataclass(frozen=True)
|
||
+class SigChallengeOutput:
|
||
+ results: dict[str, str] = dataclasses.field(default_factory=dict)
|
||
+
|
||
+
|
||
+@dataclasses.dataclass
|
||
+class JsChallengeProviderResponse:
|
||
+ request: JsChallengeRequest
|
||
+ response: JsChallengeResponse | None = None
|
||
+ error: Exception | None = None
|
||
+
|
||
+
|
||
+@dataclasses.dataclass
|
||
+class JsChallengeResponse:
|
||
+ type: JsChallengeType
|
||
+ output: NChallengeOutput | SigChallengeOutput
|
||
+
|
||
+
|
||
+class JsChallengeProviderRejectedRequest(IEContentProviderError):
|
||
+ """Reject the JsChallengeRequest (cannot handle the request)"""
|
||
+
|
||
+
|
||
+class JsChallengeProviderError(IEContentProviderError):
|
||
+ """An error occurred while solving the challenge"""
|
||
+
|
||
+
|
||
+class JsChallengeProvider(IEContentProvider, abc.ABC, suffix='JCP'):
|
||
+
|
||
+ # Set to None to disable the check
|
||
+ _SUPPORTED_TYPES: tuple[JsChallengeType] | None = ()
|
||
+
|
||
+ def __validate_request(self, request: JsChallengeRequest):
|
||
+ if not self.is_available():
|
||
+ raise JsChallengeProviderRejectedRequest(f'{self.PROVIDER_NAME} is not available')
|
||
+
|
||
+ # Validate request using built-in settings
|
||
+ if (
|
||
+ self._SUPPORTED_TYPES is not None
|
||
+ and request.type not in self._SUPPORTED_TYPES
|
||
+ ):
|
||
+ raise JsChallengeProviderRejectedRequest(
|
||
+ f'JS Challenge type "{request.type}" is not supported by {self.PROVIDER_NAME}')
|
||
+
|
||
+ def bulk_solve(self, requests: list[JsChallengeRequest]) -> typing.Generator[JsChallengeProviderResponse, None, None]:
|
||
+ """Solve multiple JS challenges and return the results"""
|
||
+ validated_requests = []
|
||
+ for request in requests:
|
||
+ try:
|
||
+ self.__validate_request(request)
|
||
+ validated_requests.append(request)
|
||
+ except JsChallengeProviderRejectedRequest as e:
|
||
+ yield JsChallengeProviderResponse(request=request, error=e)
|
||
+ continue
|
||
+ yield from self._real_bulk_solve(validated_requests)
|
||
+
|
||
+ @abc.abstractmethod
|
||
+ def _real_bulk_solve(self, requests: list[JsChallengeRequest]) -> typing.Generator[JsChallengeProviderResponse, None, None]:
|
||
+ """Subclasses can override this method to handle bulk solving"""
|
||
+ raise NotImplementedError(f'{self.PROVIDER_NAME} does not implement bulk solving')
|
||
+
|
||
+ def _get_player(self, video_id, player_url):
|
||
+ try:
|
||
+ return self.ie._load_player(
|
||
+ video_id=video_id,
|
||
+ player_url=player_url,
|
||
+ fatal=True,
|
||
+ )
|
||
+ except ExtractorError as e:
|
||
+ raise JsChallengeProviderError(
|
||
+ f'Failed to load player for JS challenge: {e}') from e
|
||
+
|
||
+
|
||
+def register_provider(provider: type[JsChallengeProvider]):
|
||
+ """Register a JsChallengeProvider class"""
|
||
+ return register_provider_generic(
|
||
+ provider=provider,
|
||
+ base_class=JsChallengeProvider,
|
||
+ registry=_jsc_providers.value,
|
||
+ )
|
||
+
|
||
+
|
||
+def register_preference(*providers: type[JsChallengeProvider]) -> typing.Callable[[Preference], Preference]:
|
||
+ """Register a preference for a JsChallengeProvider class."""
|
||
+ return register_preference_generic(
|
||
+ JsChallengeProvider,
|
||
+ _jsc_preferences.value,
|
||
+ *providers,
|
||
+ )
|
||
+
|
||
+
|
||
+if typing.TYPE_CHECKING:
|
||
+ Preference = typing.Callable[[JsChallengeProvider, list[JsChallengeRequest]], int]
|
||
+ __all__.append('Preference')
|
||
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/utils.py
|
||
===================================================================
|
||
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
||
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/utils.py 2025-09-25 21:17:17.686384544 +0200
|
||
@@ -0,0 +1 @@
|
||
+"""PUBLIC API"""
|
||
Index: yt-dlp/yt_dlp/extractor/youtube/pot/_director.py
|
||
===================================================================
|
||
--- yt-dlp.orig/yt_dlp/extractor/youtube/pot/_director.py 2025-09-23 08:45:40.000000000 +0200
|
||
+++ yt-dlp/yt_dlp/extractor/youtube/pot/_director.py 2025-09-25 21:17:17.686535682 +0200
|
||
@@ -6,6 +6,7 @@
|
||
import datetime as dt
|
||
import hashlib
|
||
import json
|
||
+import traceback
|
||
import typing
|
||
import urllib.parse
|
||
from collections.abc import Iterable
|
||
@@ -58,9 +59,9 @@
|
||
if self.log_level <= self.LogLevel.TRACE:
|
||
self.__ie.write_debug(self._format_msg('TRACE: ' + message))
|
||
|
||
- def debug(self, message: str):
|
||
+ def debug(self, message: str, *, once=False):
|
||
if self.log_level <= self.LogLevel.DEBUG:
|
||
- self.__ie.write_debug(self._format_msg(message))
|
||
+ self.__ie.write_debug(self._format_msg(message), only_once=once)
|
||
|
||
def info(self, message: str):
|
||
if self.log_level <= self.LogLevel.INFO:
|
||
@@ -70,9 +71,11 @@
|
||
if self.log_level <= self.LogLevel.WARNING:
|
||
self.__ie.report_warning(self._format_msg(message), only_once=once)
|
||
|
||
- def error(self, message: str):
|
||
+ def error(self, message: str, cause=None):
|
||
if self.log_level <= self.LogLevel.ERROR:
|
||
- self.__ie._downloader.report_error(self._format_msg(message), is_error=False)
|
||
+ self.__ie._downloader.report_error(
|
||
+ self._format_msg(message), is_error=False,
|
||
+ tb=''.join(traceback.format_exception(None, cause, cause.__traceback__)) if cause else None)
|
||
|
||
|
||
class PoTokenCache:
|
||
Index: yt-dlp/yt_dlp/extractor/youtube/pot/_provider.py
|
||
===================================================================
|
||
--- yt-dlp.orig/yt_dlp/extractor/youtube/pot/_provider.py 2025-09-23 08:45:40.000000000 +0200
|
||
+++ yt-dlp/yt_dlp/extractor/youtube/pot/_provider.py 2025-09-25 21:17:17.686801920 +0200
|
||
@@ -36,7 +36,7 @@
|
||
pass
|
||
|
||
@abc.abstractmethod
|
||
- def debug(self, message: str):
|
||
+ def debug(self, message: str, *, once=False):
|
||
pass
|
||
|
||
@abc.abstractmethod
|
||
@@ -48,7 +48,7 @@
|
||
pass
|
||
|
||
@abc.abstractmethod
|
||
- def error(self, message: str):
|
||
+ def error(self, message: str, cause=None):
|
||
pass
|
||
|
||
|
||
@@ -90,7 +90,7 @@
|
||
@classproperty
|
||
def PROVIDER_KEY(cls) -> str:
|
||
assert hasattr(cls, '_PROVIDER_KEY_SUFFIX'), 'Content Provider implementation must define a suffix for the provider key'
|
||
- assert cls.__name__.endswith(cls._PROVIDER_KEY_SUFFIX), f'PoTokenProvider class names must end with "{cls._PROVIDER_KEY_SUFFIX}"'
|
||
+ assert cls.__name__.endswith(cls._PROVIDER_KEY_SUFFIX), f'Class name must end with "{cls._PROVIDER_KEY_SUFFIX}"'
|
||
return cls.__name__[:-len(cls._PROVIDER_KEY_SUFFIX)]
|
||
|
||
@abc.abstractmethod
|
||
Index: yt-dlp/yt_dlp/globals.py
|
||
===================================================================
|
||
--- yt-dlp.orig/yt_dlp/globals.py 2025-09-23 08:45:40.000000000 +0200
|
||
+++ yt-dlp/yt_dlp/globals.py 2025-09-25 21:17:17.686934410 +0200
|
||
@@ -1,3 +1,4 @@
|
||
+from __future__ import annotations
|
||
import os
|
||
from collections import defaultdict
|
||
|
||
@@ -30,3 +31,8 @@
|
||
IN_CLI = Indirect(False)
|
||
LAZY_EXTRACTORS = Indirect(None) # `False`=force, `None`=disabled, `True`=enabled
|
||
WINDOWS_VT_MODE = Indirect(False if os.name == 'nt' else None)
|
||
+
|
||
+# JS Runtimes
|
||
+# If adding support for another runtime, register it here to allow `js_runtimes` option to accept it.
|
||
+# key is the runtime name, value is None or a JsRuntime subclass (internal-only)
|
||
+supported_js_runtimes = Indirect({})
|
||
Index: yt-dlp/yt_dlp/options.py
|
||
===================================================================
|
||
--- yt-dlp.orig/yt_dlp/options.py 2025-09-23 08:45:40.000000000 +0200
|
||
+++ yt-dlp/yt_dlp/options.py 2025-09-25 21:17:17.687118095 +0200
|
||
@@ -457,6 +457,41 @@
|
||
dest='plugin_dirs', action='store_const', const=[],
|
||
help='Clear plugin directories to search, including defaults and those provided by previous --plugin-dirs')
|
||
general.add_option(
|
||
+ '--js-runtimes',
|
||
+ metavar='RUNTIME[:PATH]',
|
||
+ dest='js_runtimes',
|
||
+ action='callback',
|
||
+ callback=_list_from_options_callback,
|
||
+ type='str',
|
||
+ callback_kwargs={'delim': None},
|
||
+ default=['deno'],
|
||
+ help=(
|
||
+ 'Additional JavaScript runtime to enable, with an optional path to the runtime location. '
|
||
+ 'This option can be used multiple times to enable multiple runtimes. '
|
||
+ 'Supported runtimes: deno, node, bun. By default, only "deno" runtime is enabled.'))
|
||
+ general.add_option(
|
||
+ '--no-js-runtimes',
|
||
+ dest='js_runtimes', action='store_const', const=[],
|
||
+ help='Clear JavaScript runtimes to enable, including defaults and those provided by previous --js-runtimes')
|
||
+ general.add_option(
|
||
+ '--download-ext-components',
|
||
+ metavar='COMPONENT',
|
||
+ dest='download_ext_components',
|
||
+ action='callback',
|
||
+ callback=_list_from_options_callback,
|
||
+ type='str',
|
||
+ callback_kwargs={'delim': None},
|
||
+ default=[],
|
||
+ help=(
|
||
+ 'Specify external components that yt-dlp is allowed to download when needed. '
|
||
+ 'You can use this option multiple times to allow multiple components. '
|
||
+ 'Supported values: npm (JavaScript dependencies from npm), ejs-github (official JS scripts from yt-dlp-ejs GitHub). '
|
||
+ 'By default, no external components are allowed.'))
|
||
+ general.add_option(
|
||
+ '--no-download-ext-components',
|
||
+ dest='download_ext_components', action='store_const', const=[],
|
||
+ help='Disallow downloading of all external components, including any previously allowed by --download-ext-components or defaults.')
|
||
+ general.add_option(
|
||
'--flat-playlist',
|
||
action='store_const', dest='extract_flat', const='in_playlist', default=False,
|
||
help=(
|
||
Index: yt-dlp/yt_dlp/utils/_jsruntime.py
|
||
===================================================================
|
||
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
||
+++ yt-dlp/yt_dlp/utils/_jsruntime.py 2025-09-25 21:17:17.687370993 +0200
|
||
@@ -0,0 +1,57 @@
|
||
+from __future__ import annotations
|
||
+import abc
|
||
+import dataclasses
|
||
+import functools
|
||
+
|
||
+from ._utils import _get_exe_version_output, detect_exe_version
|
||
+
|
||
+
|
||
+@dataclasses.dataclass(frozen=True)
|
||
+class JsRuntimeInfo:
|
||
+ name: str
|
||
+ path: str
|
||
+ version: str
|
||
+ supported: bool = True
|
||
+
|
||
+
|
||
+class JsRuntime(abc.ABC):
|
||
+ def __init__(self, path=None):
|
||
+ self._path = path
|
||
+
|
||
+ @functools.cached_property
|
||
+ def info(self) -> JsRuntimeInfo | None:
|
||
+ return self._info()
|
||
+
|
||
+ @abc.abstractmethod
|
||
+ def _info(self) -> JsRuntimeInfo | None:
|
||
+ raise NotImplementedError
|
||
+
|
||
+
|
||
+class DenoJsRuntime(JsRuntime):
|
||
+ def _info(self):
|
||
+ deno_path = self._path or 'deno'
|
||
+ out = _get_exe_version_output(deno_path, ['--version'])
|
||
+ if not out:
|
||
+ return None
|
||
+ version = detect_exe_version(out, r'^deno (\S+)')
|
||
+ return JsRuntimeInfo(name='deno', path=deno_path, version=version)
|
||
+
|
||
+
|
||
+class BunJsRuntime(JsRuntime):
|
||
+ def _info(self):
|
||
+ path = self._path or 'bun'
|
||
+ out = _get_exe_version_output(path, ['--version'])
|
||
+ if not out:
|
||
+ return None
|
||
+ version = detect_exe_version(out, r'^(\S+)')
|
||
+ return JsRuntimeInfo(name='bun', path=path, version=version)
|
||
+
|
||
+
|
||
+class NodeJsRuntime(JsRuntime):
|
||
+ def _info(self):
|
||
+ node_path = self._path or 'node'
|
||
+ out = _get_exe_version_output(node_path, ['--version'])
|
||
+ if not out:
|
||
+ return None
|
||
+ version = detect_exe_version(out, r'^v(\S+)')
|
||
+ return JsRuntimeInfo(name='node', path=node_path, version=version)
|