SHA256
1
0
forked from jengelh/yt-dlp
Files
yt-dlp/add-external-jsint.patch
Matěj Cepl b626204e4f Add add-external-jsint.patch to add dependency on the external
JavaScript interpreter (gh#yt-dlp/yt-dlp!14157).
2025-09-25 21:21:40 +02:00

4121 lines
176 KiB
Diff
Raw Permalink Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
From cbf832cee291ab88ba32b345b9784c5ef028d521 Mon Sep 17 00:00:00 2001
From: coletdjnz <coletdjnz@protonmail.com>
Date: Sat, 23 Aug 2025 20:05:53 +1200
Subject: [PATCH 01/43] [youtube] POC JS Challenge Provider framework
---
yt_dlp/extractor/youtube/_video.py | 399 +++---------------
yt_dlp/extractor/youtube/js/README.md | 115 +++++
yt_dlp/extractor/youtube/js/__init__.py | 2 +
.../extractor/youtube/js/_builtin/__init__.py | 0
README.md | 21
test/test_jsc/conftest.py | 43
test/test_jsc/test_deno.py | 252 ++-
test/test_jsc/test_runtime.py | 85 +
yt_dlp/YoutubeDL.py | 44
yt_dlp/__init__.py | 17
yt_dlp/extractor/youtube/_video.py | 935 +++++--------
yt_dlp/extractor/youtube/js/README.md | 242 +--
yt_dlp/extractor/youtube/js/__init__.py | 7
yt_dlp/extractor/youtube/js/_builtin/jsinterp.py | 703 ++++-----
yt_dlp/extractor/youtube/js/_director.py | 336 ++--
yt_dlp/extractor/youtube/js/_registry.py | 11
yt_dlp/extractor/youtube/js/provider.py | 275 +--
yt_dlp/extractor/youtube/js/utils.py | 5
yt_dlp/extractor/youtube/jsc/README.md | 129 +
yt_dlp/extractor/youtube/jsc/__init__.py | 5
yt_dlp/extractor/youtube/jsc/_builtin/bun.py | 79 +
yt_dlp/extractor/youtube/jsc/_builtin/bundle/__init__.py | 27
yt_dlp/extractor/youtube/jsc/_builtin/bundle/bun.lib.js | 9
yt_dlp/extractor/youtube/jsc/_builtin/bundle/core.js | 501 ------
yt_dlp/extractor/youtube/jsc/_builtin/bundle/deno.lib.js | 9
yt_dlp/extractor/youtube/jsc/_builtin/bundle/jsc.js | 1005 +++++++-------
yt_dlp/extractor/youtube/jsc/_builtin/deno.py | 82 +
yt_dlp/extractor/youtube/jsc/_builtin/jsinterp.py | 288 ++++
yt_dlp/extractor/youtube/jsc/_builtin/node.py | 47
yt_dlp/extractor/youtube/jsc/_builtin/runtime.py | 283 +++
yt_dlp/extractor/youtube/jsc/_builtin/scripts/__init__.py | 12
yt_dlp/extractor/youtube/jsc/_builtin/scripts/bun.lib.js | 3
yt_dlp/extractor/youtube/jsc/_builtin/scripts/core.js | 501 ++++++
yt_dlp/extractor/youtube/jsc/_builtin/scripts/deno.lib.js | 3
yt_dlp/extractor/youtube/jsc/_director.py | 234 +++
yt_dlp/extractor/youtube/jsc/_registry.py | 4
yt_dlp/extractor/youtube/jsc/provider.py | 157 ++
yt_dlp/extractor/youtube/jsc/utils.py | 1
yt_dlp/extractor/youtube/pot/_director.py | 11
yt_dlp/extractor/youtube/pot/_provider.py | 6
yt_dlp/globals.py | 6
yt_dlp/options.py | 35
yt_dlp/utils/_jsruntime.py | 57
README.md | 21
test/test_jsc/conftest.py | 43
test/test_jsc/test_runtime.py | 85 +
yt_dlp/YoutubeDL.py | 44
yt_dlp/__init__.py | 18
yt_dlp/extractor/youtube/_video.py | 935 +++++---------
yt_dlp/extractor/youtube/jsc/README.md | 129 +
yt_dlp/extractor/youtube/jsc/__init__.py | 5
yt_dlp/extractor/youtube/jsc/_builtin/bun.py | 79 +
yt_dlp/extractor/youtube/jsc/_builtin/bundle/core.js | 504 +++++++
yt_dlp/extractor/youtube/jsc/_builtin/deno.py | 82 +
yt_dlp/extractor/youtube/jsc/_builtin/jsinterp.py | 288 ++++
yt_dlp/extractor/youtube/jsc/_builtin/node.py | 47
yt_dlp/extractor/youtube/jsc/_builtin/runtime.py | 283 ++++
yt_dlp/extractor/youtube/jsc/_builtin/scripts/__init__.py | 12
yt_dlp/extractor/youtube/jsc/_builtin/scripts/bun.lib.js | 3
yt_dlp/extractor/youtube/jsc/_builtin/scripts/core.js | 501 +++++++
yt_dlp/extractor/youtube/jsc/_builtin/scripts/deno.lib.js | 3
yt_dlp/extractor/youtube/jsc/_director.py | 234 +++
yt_dlp/extractor/youtube/jsc/_registry.py | 4
yt_dlp/extractor/youtube/jsc/provider.py | 157 ++
yt_dlp/extractor/youtube/jsc/utils.py | 1
yt_dlp/extractor/youtube/pot/_director.py | 11
yt_dlp/extractor/youtube/pot/_provider.py | 6
yt_dlp/globals.py | 6
yt_dlp/options.py | 35
yt_dlp/utils/_jsruntime.py | 57
27 files changed, 3036 insertions(+), 557 deletions(-)
create mode 100644 yt_dlp/extractor/youtube/js/README.md
create mode 100644 yt_dlp/extractor/youtube/js/__init__.py
create mode 100644 yt_dlp/extractor/youtube/js/_builtin/__init__.py
create mode 100644 yt_dlp/extractor/youtube/js/_builtin/jsinterp.py
create mode 100644 yt_dlp/extractor/youtube/js/_director.py
create mode 100644 yt_dlp/extractor/youtube/js/_registry.py
create mode 100644 yt_dlp/extractor/youtube/js/provider.py
create mode 100644 yt_dlp/extractor/youtube/js/utils.py
Index: yt-dlp/README.md
===================================================================
--- yt-dlp.orig/README.md 2025-09-23 08:47:15.000000000 +0200
+++ yt-dlp/README.md 2025-09-25 21:17:17.679575923 +0200
@@ -362,6 +362,27 @@
--no-plugin-dirs Clear plugin directories to search,
including defaults and those provided by
previous --plugin-dirs
+ --js-runtimes RUNTIME[:PATH] Additional JavaScript runtime to enable,
+ with an optional path to the runtime
+ location. This option can be used multiple
+ times to enable multiple runtimes. Supported
+ runtimes: deno, node, bun. By default, only
+ "deno" runtime is enabled.
+ --no-js-runtimes Clear JavaScript runtimes to enable,
+ including defaults and those provided by
+ previous --js-runtimes
+ --download-ext-components COMPONENT
+ Specify external components that yt-dlp is
+ allowed to download when needed. You can use
+ this option multiple times to allow multiple
+ components. Supported values: npm
+ (JavaScript dependencies from npm), ejs-
+ github (official JS scripts from yt-dlp-ejs
+ GitHub). By default, no external components
+ are allowed.
+ --no-download-ext-components Disallow downloading of all external
+ components, including any previously allowed
+ by --download-ext-components or defaults.
--flat-playlist Do not extract a playlist's URL result
entries; some entry metadata may be missing
and downloading may be bypassed
Index: yt-dlp/test/test_jsc/conftest.py
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ yt-dlp/test/test_jsc/conftest.py 2025-09-25 21:17:17.681033945 +0200
@@ -0,0 +1,43 @@
+import collections
+
+import pytest
+
+import yt_dlp.globals
+from yt_dlp import YoutubeDL
+from yt_dlp.extractor.common import InfoExtractor
+from yt_dlp.extractor.youtube.pot._provider import IEContentProviderLogger
+
+
+class MockLogger(IEContentProviderLogger):
+ log_level = IEContentProviderLogger.LogLevel.TRACE
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.messages = collections.defaultdict(list)
+
+ def trace(self, message: str):
+ self.messages['trace'].append(message)
+
+ def debug(self, message: str, *, once=False):
+ self.messages['debug'].append(message)
+
+ def info(self, message: str):
+ self.messages['info'].append(message)
+
+ def warning(self, message: str, *, once=False):
+ self.messages['warning'].append(message)
+
+ def error(self, message: str):
+ self.messages['error'].append(message)
+
+
+@pytest.fixture
+def ie() -> InfoExtractor:
+ runtime_names = yt_dlp.globals.supported_js_runtimes.value
+ ydl = YoutubeDL({'js_runtimes': {key: {} for key in runtime_names}})
+ return ydl.get_info_extractor('Youtube')
+
+
+@pytest.fixture
+def logger() -> MockLogger:
+ return MockLogger()
Index: yt-dlp/test/test_jsc/test_runtime.py
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ yt-dlp/test/test_jsc/test_runtime.py 2025-09-25 21:17:17.681178518 +0200
@@ -0,0 +1,85 @@
+from __future__ import annotations
+
+import json
+
+import pytest
+try:
+ import yt_dlp_jsc
+except ImportError:
+ yt_dlp_jsc = None
+
+from yt_dlp.extractor.youtube.jsc.provider import (
+ JsChallengeRequest,
+ JsChallengeType,
+ JsChallengeProviderResponse,
+ JsChallengeResponse,
+ NChallengeInput,
+ NChallengeOutput,
+ SigChallengeInput,
+ SigChallengeOutput,
+)
+from yt_dlp.extractor.youtube.jsc._builtin.bun import BunJCP
+from yt_dlp.extractor.youtube.jsc._builtin.deno import DenoJCP
+from yt_dlp.extractor.youtube.jsc._builtin.node import NodeJCP
+
+
+pytestmark = pytest.mark.skipif(not yt_dlp_jsc, reason='yt-dlp-jsc not available')
+
+TESTS = [
+ JsChallengeRequest(JsChallengeType.N, NChallengeInput('https://www.youtube.com/s/player/3d3ba064/player_ias_tce.vflset/en_US/base.js', [
+ 'ZdZIqFPQK-Ty8wId',
+ '4GMrWHyKI5cEvhDO',
+ ])),
+ JsChallengeRequest(JsChallengeType.SIG, SigChallengeInput('https://www.youtube.com/s/player/3d3ba064/player_ias_tce.vflset/en_US/base.js', [
+ 'gN7a-hudCuAuPH6fByOk1_GNXN0yNMHShjZXS2VOgsEItAJz0tipeavEOmNdYN-wUtcEqD3bCXjc0iyKfAyZxCBGgIARwsSdQfJ2CJtt',
+ ])),
+ JsChallengeRequest(JsChallengeType.N, NChallengeInput('https://www.youtube.com/s/player/5ec65609/player_ias_tce.vflset/en_US/base.js', [
+ '0eRGgQWJGfT5rFHFj',
+ ])),
+ JsChallengeRequest(JsChallengeType.SIG, SigChallengeInput('https://www.youtube.com/s/player/5ec65609/player_ias_tce.vflset/en_US/base.js', [
+ 'AAJAJfQdSswRQIhAMG5SN7-cAFChdrE7tLA6grH0rTMICA1mmDc0HoXgW3CAiAQQ4=CspfaF_vt82XH5yewvqcuEkvzeTsbRuHssRMyJQ=I',
+ ])),
+ JsChallengeRequest(JsChallengeType.N, NChallengeInput('https://www.youtube.com/s/player/6742b2b9/player_ias_tce.vflset/en_US/base.js', [
+ '_HPB-7GFg1VTkn9u',
+ 'K1t_fcB6phzuq2SF',
+ ])),
+ JsChallengeRequest(JsChallengeType.SIG, SigChallengeInput('https://www.youtube.com/s/player/6742b2b9/player_ias_tce.vflset/en_US/base.js', [
+ 'MMGZJMUucirzS_SnrSPYsc85CJNnTUi6GgR5NKn-znQEICACojE8MHS6S7uYq4TGjQX_D4aPk99hNU6wbTvorvVVMgIARwsSdQfJAA',
+ ])),
+]
+
+RESPONSES = [
+ JsChallengeProviderResponse(test, JsChallengeResponse(test.type, (
+ NChallengeOutput if test.type is JsChallengeType.N else SigChallengeOutput
+ )(dict(zip(test.input.challenges, results)))))
+ for test, results in zip(TESTS, [
+ ['qmtUsIz04xxiNW', 'N9gmEX7YhKTSmw'],
+ ['ttJC2JfQdSswRAIgGBCxZyAfKyi0cjXCb3gqEctUw-NYdNmOEvaepit0zJAtIEsgOV2SXZjhSHMNy0NXNG_1kNyBf6HPuAuCduh-a7O'],
+ ['4SvMpDQH-vBJCw'],
+ ['AJfQdSswRQIhAMG5SN7-cAFChdrE7tLA6grI0rTMICA1mmDc0HoXgW3CAiAQQ4HCspfaF_vt82XH5yewvqcuEkvzeTsbRuHssRMyJQ=='],
+ ['qUAsPryAO_ByYg', 'Y7PcOt3VE62mog'],
+ ['AJfQdSswRAIgMVVvrovTbw6UNh99kPa4D_XQjGT4qYu7S6SHM8EjoCACIEQnz-nKN5RgG6iUTnNJC58csYPSrnS_SzricuUMJZGM'],
+ ])
+]
+
+
+@pytest.fixture(params=[BunJCP, DenoJCP, NodeJCP])
+def jcp(request, ie, logger):
+ obj = request.param(ie, logger, settings={'debug': ['true']})
+ if not obj.is_available():
+ pytest.skip(f'{obj.PROVIDER_NAME} is not available')
+ return obj
+
+
+def test_bulk_requests(jcp):
+ assert list(jcp.bulk_solve(TESTS)) == RESPONSES
+
+
+def test_using_cached_player(jcp):
+ requests = TESTS[:3]
+ player = jcp._get_player(requests[0].video_id, requests[0].input.player_url)
+ initial = json.loads(jcp._run_js_runtime(jcp._construct_stdin(player, False, requests)))
+ preprocessed = initial.pop('preprocessed_player')
+ result = json.loads(jcp._run_js_runtime(jcp._construct_stdin(preprocessed, True, requests)))
+
+ assert initial == result
Index: yt-dlp/yt_dlp/YoutubeDL.py
===================================================================
--- yt-dlp.orig/yt_dlp/YoutubeDL.py 2025-09-23 08:45:39.000000000 +0200
+++ yt-dlp/yt_dlp/YoutubeDL.py 2025-09-25 21:17:17.681593799 +0200
@@ -42,6 +42,7 @@
plugin_pps,
all_plugins_loaded,
plugin_dirs,
+ supported_js_runtimes,
)
from .minicurses import format_text
from .networking import HEADRequest, Request, RequestDirector
@@ -533,6 +534,17 @@
See "EXTRACTOR ARGUMENTS" for details.
Argument values must always be a list of string(s).
E.g. {'youtube': {'skip': ['dash', 'hls']}}
+ js_runtimes: A dictionary of JavaScript runtime keys (in lower case) to enable
+ and a dictionary of additional configuration for the runtime.
+ If None, the default runtime of "deno" will be enabled.
+ The runtime configuration dictionary can have the following keys:
+ - path: Path to the executable (optional)
+ E.g. {'deno': {'path': '/path/to/deno'}
+ download_ext_components: A list of external components that are allowed to be downloaded when required.
+ Supported components:
+ - `npm` (JS Dependencies from npm)
+ - `ejs-github` (Official JS Scripts from yt-dlp-ejs GitHub).
+ By default, no external components are allowed to be downloaded.
mark_watched: Mark videos watched (even with --simulate). Only for YouTube
The following options are deprecated and may be removed in the future:
@@ -717,6 +729,10 @@
else:
raise
+ # Note: this must be after plugins are loaded
+ self.params['js_runtimes'] = self.params.get('js_runtimes', {'deno': {}})
+ self._validate_js_runtimes(self.params['js_runtimes'])
+
self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
self._load_cookies(self.params['http_headers'].get('Cookie')) # compat
@@ -829,6 +845,26 @@
self.archive = preload_download_archive(self.params.get('download_archive'))
+ def _validate_js_runtimes(self, runtimes):
+ if not (
+ isinstance(runtimes, dict)
+ and all(isinstance(k, str) and (v is None or isinstance(v, dict)) for k, v in runtimes.items())
+ ):
+ raise ValueError('Invalid js_runtimes format, expected a dict of {runtime: {config}}')
+
+ if unsupported_runtimes := runtimes.keys() - supported_js_runtimes.value.keys():
+ raise ValueError(
+ f'Unsupported JavaScript runtimes specified: {", ".join(unsupported_runtimes)}.'
+ f' Supported runtimes are: {", ".join(supported_js_runtimes.value.keys())}')
+
+ @functools.cached_property
+ def _js_runtimes(self):
+ runtimes = {}
+ for name, config in self.params.get('js_runtimes', {}).items():
+ runtime_cls = supported_js_runtimes.value.get(name)
+ runtimes[name] = runtime_cls(path=config.get('path')) if runtime_cls else None
+ return runtimes
+
def warn_if_short_id(self, argv):
# short YouTube ID starting with dash?
idxs = [
@@ -4064,6 +4100,14 @@
join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
})) or 'none'))
+ if not self.params.get('js_runtimes'):
+ write_debug('JS runtimes: none (disabled)')
+ else:
+ write_debug('JS runtimes: %s' % (', '.join(sorted(
+ f'{name} (unknown)' if runtime is None else join_nonempty(runtime.info.name, runtime.info.version)
+ for name, runtime in self._js_runtimes.items() if runtime is None or runtime.info is not None
+ )) or 'none'))
+
write_debug(f'Proxy map: {self.proxies}')
write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')
Index: yt-dlp/yt_dlp/__init__.py
===================================================================
--- yt-dlp.orig/yt_dlp/__init__.py 2025-09-23 08:45:39.000000000 +0200
+++ yt-dlp/yt_dlp/__init__.py 2025-09-25 21:17:42.034536471 +0200
@@ -59,10 +59,16 @@
render_table,
setproctitle,
shell_quote,
+ traverse_obj,
variadic,
write_string,
)
from .utils._utils import _UnsafeExtensionError
+from .utils._jsruntime import (
+ BunJsRuntime as _BunJsRuntime,
+ DenoJsRuntime as _DenoJsRuntime,
+ NodeJsRuntime as _NodeJsRuntime,
+)
from .YoutubeDL import YoutubeDL
@@ -773,6 +779,10 @@
else opts.audioformat if (opts.extractaudio and opts.audioformat in FFmpegExtractAudioPP.SUPPORTED_EXTS)
else None)
+ js_runtimes = {
+ runtime.lower(): {'path': path} for runtime, path in (
+ [*arg.split(':', 1), None][:2] for arg in opts.js_runtimes)}
+
return ParsedOptions(parser, opts, urls, {
'usenetrc': opts.usenetrc,
'netrc_location': opts.netrc_location,
@@ -944,6 +954,8 @@
'_warnings': warnings,
'_deprecation_warnings': deprecation_warnings,
'compat_opts': opts.compat_opts,
+ 'js_runtimes': js_runtimes,
+ 'download_ext_components': opts.download_ext_components,
})
@@ -1086,6 +1098,12 @@
from .extractor import gen_extractors, list_extractors
+# Register JS runtimes
+from .globals import supported_js_runtimes
+supported_js_runtimes.value['deno'] = _DenoJsRuntime
+supported_js_runtimes.value['node'] = _NodeJsRuntime
+supported_js_runtimes.value['bun'] = _BunJsRuntime
+
__all__ = [
'YoutubeDL',
'gen_extractors',
Index: yt-dlp/yt_dlp/extractor/youtube/_video.py
===================================================================
--- yt-dlp.orig/yt_dlp/extractor/youtube/_video.py 2025-09-23 08:45:40.000000000 +0200
+++ yt-dlp/yt_dlp/extractor/youtube/_video.py 2025-09-25 21:17:17.682484914 +0200
@@ -4,9 +4,7 @@
import datetime as dt
import functools
import itertools
-import json
import math
-import os.path
import random
import re
import sys
@@ -26,10 +24,10 @@
_split_innertube_client,
short_client_name,
)
+from .jsc._director import initialize_jsc_director
+from .jsc.provider import JsChallengeRequest, JsChallengeType, NChallengeInput, SigChallengeInput
from .pot._director import initialize_pot_director
from .pot.provider import PoTokenContext, PoTokenRequest
-from ..openload import PhantomJSwrapper
-from ...jsinterp import JSInterpreter, LocalNameSpace
from ...networking.exceptions import HTTPError
from ...utils import (
NO_DEFAULT,
@@ -39,13 +37,11 @@
clean_html,
datetime_from_str,
filesize_from_tbr,
- filter_dict,
float_or_none,
format_field,
get_first,
int_or_none,
join_nonempty,
- js_to_json,
mimetype2ext,
orderedSet,
parse_codecs,
@@ -1827,8 +1823,6 @@
'tablet': 'player-plasma-ias-tablet-en_US.vflset/base.js',
}
_INVERSE_PLAYER_JS_VARIANT_MAP = {v: k for k, v in _PLAYER_JS_VARIANT_MAP.items()}
- _NSIG_FUNC_CACHE_ID = 'nsig func'
- _DUMMY_STRING = 'dlp_wins'
@classmethod
def suitable(cls, url):
@@ -1848,6 +1842,7 @@
def _real_initialize(self):
super()._real_initialize()
self._pot_director = initialize_pot_director(self)
+ self._jsc_director = initialize_jsc_director(self)
def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):
lock = threading.Lock()
@@ -1865,7 +1860,7 @@
microformats = traverse_obj(
prs, (..., 'microformat', 'playerMicroformatRenderer'),
expected_type=dict)
- _, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
+ _, live_status, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
is_live = live_status == 'is_live'
start_time = time.time()
@@ -2075,10 +2070,6 @@
variant = re.sub(r'[^a-zA-Z0-9]', '_', remove_end(player_path, '.js'))
return join_nonempty(player_id, variant)
- def _signature_cache_id(self, example_sig):
- """ Return a string representation of a signature """
- return '.'.join(str(len(part)) for part in example_sig.split('.'))
-
@classmethod
def _extract_player_info(cls, player_url):
for player_re in cls._PLAYER_INFO_RE:
@@ -2100,53 +2091,17 @@
self._code_cache[player_js_key] = code
return self._code_cache.get(player_js_key)
- def _extract_signature_function(self, video_id, player_url, example_sig):
- # Read from filesystem cache
- func_id = join_nonempty(
- self._player_js_cache_key(player_url), self._signature_cache_id(example_sig))
- assert os.path.basename(func_id) == func_id
-
- self.write_debug(f'Extracting signature function {func_id}')
- cache_spec, code = self.cache.load('youtube-sigfuncs', func_id, min_ver='2025.07.21'), None
-
- if not cache_spec:
- code = self._load_player(video_id, player_url)
- if code:
- res = self._parse_sig_js(code, player_url)
- test_string = ''.join(map(chr, range(len(example_sig))))
- cache_spec = [ord(c) for c in res(test_string)]
- self.cache.store('youtube-sigfuncs', func_id, cache_spec)
-
- return lambda s: ''.join(s[i] for i in cache_spec)
-
- def _parse_sig_js(self, jscode, player_url):
- # Examples where `sig` is funcname:
- # sig=function(a){a=a.split(""); ... ;return a.join("")};
- # ;c&&(c=sig(decodeURIComponent(c)),a.set(b,encodeURIComponent(c)));return a};
- # {var l=f,m=h.sp,n=sig(decodeURIComponent(h.s));l.set(m,encodeURIComponent(n))}
- # sig=function(J){J=J.split(""); ... ;return J.join("")};
- # ;N&&(N=sig(decodeURIComponent(N)),J.set(R,encodeURIComponent(N)));return J};
- # {var H=u,k=f.sp,v=sig(decodeURIComponent(f.s));H.set(k,encodeURIComponent(v))}
- funcname = self._search_regex(
- (r'\b(?P<var>[a-zA-Z0-9_$]+)&&\((?P=var)=(?P<sig>[a-zA-Z0-9_$]{2,})\(decodeURIComponent\((?P=var)\)\)',
- r'(?P<sig>[a-zA-Z0-9_$]+)\s*=\s*function\(\s*(?P<arg>[a-zA-Z0-9_$]+)\s*\)\s*{\s*(?P=arg)\s*=\s*(?P=arg)\.split\(\s*""\s*\)\s*;\s*[^}]+;\s*return\s+(?P=arg)\.join\(\s*""\s*\)',
- r'(?:\b|[^a-zA-Z0-9_$])(?P<sig>[a-zA-Z0-9_$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9_$]{2}\.[a-zA-Z0-9_$]{2}\(a,\d+\))?',
- # Old patterns
- r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
- r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
- r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
- # Obsolete patterns
- r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
- r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
- r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
- r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
- r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
- jscode, 'Initial JS player signature function name', group='sig')
-
- varname, global_list = self._interpret_player_js_global_var(jscode, player_url)
- jsi = JSInterpreter(jscode)
- initial_function = jsi.extract_function(funcname, filter_dict({varname: global_list}))
- return lambda s: initial_function([s])
+ def _load_player_data_from_cache(self, name, player_url):
+ cache_id = (f'youtube-{name}', self._player_js_cache_key(player_url))
+
+ if data := self._player_cache.get(cache_id):
+ return data
+
+ data = self.cache.load(*cache_id, min_ver='2025.07.21')
+ if data:
+ self._player_cache[cache_id] = data
+
+ return data
def _cached(self, func, *cache_id):
def inner(*args, **kwargs):
@@ -2164,17 +2119,23 @@
return ret
return inner
- def _load_player_data_from_cache(self, name, player_url):
- cache_id = (f'youtube-{name}', self._player_js_cache_key(player_url))
-
- if data := self._player_cache.get(cache_id):
- return data
+ def _sig_spec_cache_id(self, player_url, spec_id):
+ return join_nonempty(self._player_js_cache_key(player_url), str(spec_id))
- data = self.cache.load(*cache_id, min_ver='2025.07.21')
- if data:
- self._player_cache[cache_id] = data
-
- return data
+ def _load_sig_spec_from_cache(self, spec_cache_id):
+ # This is almost identical to _load_player_data_from_cache
+ # I hate it
+ if spec_cache_id in self._player_cache:
+ return self._player_cache[spec_cache_id]
+ spec = self.cache.load('youtube-sigfuncs', spec_cache_id, min_ver='2025.07.21')
+ if spec:
+ self._player_cache[spec_cache_id] = spec
+ return spec
+
+ def _store_sig_spec_to_cache(self, spec_cache_id, spec):
+ if spec_cache_id not in self._player_cache:
+ self._player_cache[spec_cache_id] = spec
+ self.cache.store('youtube-sigfuncs', spec_cache_id, spec)
def _store_player_data_to_cache(self, name, player_url, data):
cache_id = (f'youtube-{name}', self._player_js_cache_key(player_url))
@@ -2182,218 +2143,6 @@
self.cache.store(*cache_id, data)
self._player_cache[cache_id] = data
- def _decrypt_signature(self, s, video_id, player_url):
- """Turn the encrypted s field into a working signature"""
- extract_sig = self._cached(
- self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
- func = extract_sig(video_id, player_url, s)
- return func(s)
-
- def _decrypt_nsig(self, s, video_id, player_url):
- """Turn the encrypted n field into a working signature"""
- if player_url is None:
- raise ExtractorError('Cannot decrypt nsig without player_url')
- player_url = urljoin('https://www.youtube.com', player_url)
-
- try:
- jsi, _, func_code = self._extract_n_function_code(video_id, player_url)
- except ExtractorError as e:
- raise ExtractorError('Unable to extract nsig function code', cause=e)
-
- try:
- extract_nsig = self._cached(self._extract_n_function_from_code, self._NSIG_FUNC_CACHE_ID, player_url)
- ret = extract_nsig(jsi, func_code)(s)
- except JSInterpreter.Exception as e:
- try:
- jsi = PhantomJSwrapper(self, timeout=5000)
- except ExtractorError:
- raise e
- self.report_warning(
- f'Native nsig extraction failed: Trying with PhantomJS\n'
- f' n = {s} ; player = {player_url}', video_id)
- self.write_debug(e, only_once=True)
-
- args, func_body = func_code
- ret = jsi.execute(
- f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
- video_id=video_id, note='Executing signature code').strip()
-
- self.write_debug(f'Decrypted nsig {s} => {ret}')
- # Only cache nsig func JS code to disk if successful, and only once
- self._store_player_data_to_cache('nsig', player_url, func_code)
- return ret
-
- def _extract_n_function_name(self, jscode, player_url=None):
- varname, global_list = self._interpret_player_js_global_var(jscode, player_url)
- if debug_str := traverse_obj(global_list, (lambda _, v: v.endswith('-_w8_'), any)):
- pattern = r'''(?x)
- \{\s*return\s+%s\[%d\]\s*\+\s*(?P<argname>[a-zA-Z0-9_$]+)\s*\}
- ''' % (re.escape(varname), global_list.index(debug_str))
- if match := re.search(pattern, jscode):
- pattern = r'''(?x)
- \{\s*\)%s\(\s*
- (?:
- (?P<funcname_a>[a-zA-Z0-9_$]+)\s*noitcnuf\s*
- |noitcnuf\s*=\s*(?P<funcname_b>[a-zA-Z0-9_$]+)(?:\s+rav)?
- )[;\n]
- ''' % re.escape(match.group('argname')[::-1])
- if match := re.search(pattern, jscode[match.start()::-1]):
- a, b = match.group('funcname_a', 'funcname_b')
- return (a or b)[::-1]
- self.write_debug(join_nonempty(
- 'Initial search was unable to find nsig function name',
- player_url and f' player = {player_url}', delim='\n'), only_once=True)
-
- # Examples (with placeholders nfunc, narray, idx):
- # * .get("n"))&&(b=nfunc(b)
- # * .get("n"))&&(b=narray[idx](b)
- # * b=String.fromCharCode(110),c=a.get(b))&&c=narray[idx](c)
- # * a.D&&(b="nn"[+a.D],c=a.get(b))&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
- # * a.D&&(PL(a),b=a.j.n||null)&&(b=narray[0](b),a.set("n",b),narray.length||nfunc("")
- # * a.D&&(b="nn"[+a.D],vL(a),c=a.j[b]||null)&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
- # * J.J="";J.url="";J.Z&&(R="nn"[+J.Z],mW(J),N=J.K[R]||null)&&(N=narray[idx](N),J.set(R,N))}};
- funcname, idx = self._search_regex(
- r'''(?x)
- (?:
- \.get\("n"\)\)&&\(b=|
- (?:
- b=String\.fromCharCode\(110\)|
- (?P<str_idx>[a-zA-Z0-9_$.]+)&&\(b="nn"\[\+(?P=str_idx)\]
- )
- (?:
- ,[a-zA-Z0-9_$]+\(a\))?,c=a\.
- (?:
- get\(b\)|
- [a-zA-Z0-9_$]+\[b\]\|\|null
- )\)&&\(c=|
- \b(?P<var>[a-zA-Z0-9_$]+)=
- )(?P<nfunc>[a-zA-Z0-9_$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z]\)
- (?(var),[a-zA-Z0-9_$]+\.set\((?:"n+"|[a-zA-Z0-9_$]+)\,(?P=var)\))''',
- jscode, 'n function name', group=('nfunc', 'idx'), default=(None, None))
- if not funcname:
- self.report_warning(join_nonempty(
- 'Falling back to generic n function search',
- player_url and f' player = {player_url}', delim='\n'), only_once=True)
- return self._search_regex(
- r'''(?xs)
- ;\s*(?P<name>[a-zA-Z0-9_$]+)\s*=\s*function\([a-zA-Z0-9_$]+\)
- \s*\{(?:(?!};).)+?return\s*(?P<q>["'])[\w-]+_w8_(?P=q)\s*\+\s*[a-zA-Z0-9_$]+''',
- jscode, 'Initial JS player n function name', group='name')
- elif not idx:
- return funcname
-
- return json.loads(js_to_json(self._search_regex(
- rf'var {re.escape(funcname)}\s*=\s*(\[.+?\])\s*[,;]', jscode,
- f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
-
- def _interpret_player_js_global_var(self, jscode, player_url):
- """Returns tuple of: variable name string, variable value list"""
- extract_global_var = self._cached(self._search_regex, 'js global array', player_url)
- varcode, varname, varvalue = extract_global_var(
- r'''(?x)
- (?P<q1>["\'])use\s+strict(?P=q1);\s*
- (?P<code>
- var\s+(?P<name>[a-zA-Z0-9_$]+)\s*=\s*
- (?P<value>
- (?P<q2>["\'])(?:(?!(?P=q2)).|\\.)+(?P=q2)
- \.split\((?P<q3>["\'])(?:(?!(?P=q3)).)+(?P=q3)\)
- |\[\s*(?:(?P<q4>["\'])(?:(?!(?P=q4)).|\\.)*(?P=q4)\s*,?\s*)+\]
- )
- )[;,]
- ''', jscode, 'global variable', group=('code', 'name', 'value'), default=(None, None, None))
- if not varcode:
- self.write_debug(join_nonempty(
- 'No global array variable found in player JS',
- player_url and f' player = {player_url}', delim='\n'), only_once=True)
- return None, None
-
- jsi = JSInterpreter(varcode)
- interpret_global_var = self._cached(jsi.interpret_expression, 'js global list', player_url)
- return varname, interpret_global_var(varvalue, LocalNameSpace(), allow_recursion=10)
-
- def _fixup_n_function_code(self, argnames, nsig_code, jscode, player_url):
- # Fixup global array
- varname, global_list = self._interpret_player_js_global_var(jscode, player_url)
- if varname and global_list:
- nsig_code = f'var {varname}={json.dumps(global_list)}; {nsig_code}'
- else:
- varname = self._DUMMY_STRING
- global_list = []
-
- # Fixup typeof check
- undefined_idx = global_list.index('undefined') if 'undefined' in global_list else r'\d+'
- fixed_code = re.sub(
- fr'''(?x)
- ;\s*if\s*\(\s*typeof\s+[a-zA-Z0-9_$]+\s*===?\s*(?:
- (["\'])undefined\1|
- {re.escape(varname)}\[{undefined_idx}\]
- )\s*\)\s*return\s+{re.escape(argnames[0])};
- ''', ';', nsig_code)
- if fixed_code == nsig_code:
- self.write_debug(join_nonempty(
- 'No typeof statement found in nsig function code',
- player_url and f' player = {player_url}', delim='\n'), only_once=True)
-
- # Fixup global funcs
- jsi = JSInterpreter(fixed_code)
- cache_id = (self._NSIG_FUNC_CACHE_ID, player_url)
- try:
- self._cached(
- self._extract_n_function_from_code, *cache_id)(jsi, (argnames, fixed_code))(self._DUMMY_STRING)
- except JSInterpreter.Exception:
- self._player_cache.pop(cache_id, None)
-
- global_funcnames = jsi._undefined_varnames
- debug_names = []
- jsi = JSInterpreter(jscode)
- for func_name in global_funcnames:
- try:
- func_args, func_code = jsi.extract_function_code(func_name)
- fixed_code = f'var {func_name} = function({", ".join(func_args)}) {{ {func_code} }}; {fixed_code}'
- debug_names.append(func_name)
- except Exception:
- self.report_warning(join_nonempty(
- f'Unable to extract global nsig function {func_name} from player JS',
- player_url and f' player = {player_url}', delim='\n'), only_once=True)
-
- if debug_names:
- self.write_debug(f'Extracted global nsig functions: {", ".join(debug_names)}')
-
- return argnames, fixed_code
-
- def _extract_n_function_code(self, video_id, player_url):
- player_id = self._extract_player_info(player_url)
- func_code = self._load_player_data_from_cache('nsig', player_url)
- jscode = func_code or self._load_player(video_id, player_url)
- jsi = JSInterpreter(jscode)
-
- if func_code:
- return jsi, player_id, func_code
-
- func_name = self._extract_n_function_name(jscode, player_url=player_url)
-
- # XXX: Work around (a) global array variable, (b) `typeof` short-circuit, (c) global functions
- func_code = self._fixup_n_function_code(*jsi.extract_function_code(func_name), jscode, player_url)
-
- return jsi, player_id, func_code
-
- def _extract_n_function_from_code(self, jsi, func_code):
- func = jsi.extract_function_from_code(*func_code)
-
- def extract_nsig(s):
- try:
- ret = func([s])
- except JSInterpreter.Exception:
- raise
- except Exception as e:
- raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
-
- if ret.startswith('enhanced_except_') or ret.endswith(s):
- raise JSInterpreter.Exception('Signature function returned an exception')
- return ret
-
- return extract_nsig
-
def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
"""
Extract signatureTimestamp (sts)
@@ -3226,12 +2975,12 @@
sd[STREAMING_DATA_INNERTUBE_CONTEXT] = innertube_context
sd[STREAMING_DATA_FETCH_SUBS_PO_TOKEN] = fetch_subs_po_token_func
sd[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER] = is_premium_subscriber
+ sd[STREAMING_DATA_FETCHED_TIMESTAMP] = fetched_timestamp
for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
f[STREAMING_DATA_CLIENT_NAME] = client
f[STREAMING_DATA_FETCH_GVS_PO_TOKEN] = fetch_gvs_po_token_func
f[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER] = is_premium_subscriber
f[STREAMING_DATA_PLAYER_TOKEN_PROVIDED] = bool(player_po_token)
- f[STREAMING_DATA_FETCHED_TIMESTAMP] = fetched_timestamp
if deprioritize_pr:
deprioritized_prs.append(pr)
else:
@@ -3311,12 +3060,13 @@
else:
self.report_warning(msg, only_once=True)
- def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
+ def _extract_formats_and_subtitles(self, video_id, player_responses, player_url, live_status, duration):
CHUNK_SIZE = 10 << 20
PREFERRED_LANG_VALUE = 10
original_language = None
itags, stream_ids = collections.defaultdict(set), []
itag_qualities, res_qualities = {}, {0: None}
+ subtitles = {}
q = qualities([
# Normally tiny is the smallest video-only formats. But
# audio-only formats with unknown quality may get tagged as tiny
@@ -3324,7 +3074,6 @@
'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres',
])
- streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...))
format_types = self._configuration_arg('formats')
all_formats = 'duplicate' in format_types
if self._configuration_arg('include_duplicate_formats'):
@@ -3332,6 +3081,9 @@
self._downloader.deprecated_feature('[youtube] include_duplicate_formats extractor argument is deprecated. '
'Use formats=duplicate extractor argument instead')
+ def solve_sig(s, spec):
+ return ''.join(s[i] for i in spec)
+
def build_fragments(f):
return LazyList({
'url': update_url_query(f['url'], {
@@ -3351,279 +3103,363 @@
# For handling potential pre-playback required waiting period
playback_wait = int_or_none(self._configuration_arg('playback_wait', [None])[0], default=6)
- for fmt in streaming_formats:
- client_name = fmt[STREAMING_DATA_CLIENT_NAME]
- available_at = fmt[STREAMING_DATA_FETCHED_TIMESTAMP] + playback_wait
- if fmt.get('targetDurationSec'):
+ for pr in player_responses:
+ streaming_data = traverse_obj(pr, 'streamingData')
+ if not streaming_data:
continue
- itag = str_or_none(fmt.get('itag'))
- audio_track = fmt.get('audioTrack') or {}
- stream_id = (itag, audio_track.get('id'), fmt.get('isDrc'))
- if not all_formats:
- if stream_id in stream_ids:
- continue
-
- quality = fmt.get('quality')
- height = int_or_none(fmt.get('height'))
- if quality == 'tiny' or not quality:
- quality = fmt.get('audioQuality', '').lower() or quality
- # The 3gp format (17) in android client has a quality of "small",
- # but is actually worse than other formats
- if itag == '17':
- quality = 'tiny'
- if quality:
- if itag:
- itag_qualities[itag] = quality
- if height:
- res_qualities[height] = quality
-
- display_name = audio_track.get('displayName') or ''
- is_original = 'original' in display_name.lower()
- is_descriptive = 'descriptive' in display_name.lower()
- is_default = audio_track.get('audioIsDefault')
- language_code = audio_track.get('id', '').split('.')[0]
- if language_code and (is_original or (is_default and not original_language)):
- original_language = language_code
-
- has_drm = bool(fmt.get('drmFamilies'))
-
- # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
- # (adding `&sq=0` to the URL) and parsing emsg box to determine the
- # number of fragment that would subsequently requested with (`&sq=N`)
- if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF' and not has_drm:
- continue
-
- if has_drm:
- msg = f'Some {client_name} client https formats have been skipped as they are DRM protected. '
- if client_name == 'tv':
- msg += (
- f'{"Your account" if self.is_authenticated else "The current session"} may have '
- f'an experiment that applies DRM to all videos on the tv client. '
- f'See https://github.com/yt-dlp/yt-dlp/issues/12563 for more details.'
- )
- self.report_warning(msg, video_id, only_once=True)
-
- fmt_url = fmt.get('url')
- if not fmt_url:
- sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
- fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
- encrypted_sig = try_get(sc, lambda x: x['s'][0])
- if not all((sc, fmt_url, player_url, encrypted_sig)):
- msg = f'Some {client_name} client https formats have been skipped as they are missing a url. '
- if client_name in ('web', 'web_safari'):
- msg += 'YouTube is forcing SABR streaming for this client. '
- else:
+ fetch_po_token_func = streaming_data[STREAMING_DATA_FETCH_GVS_PO_TOKEN]
+ is_premium_subscriber = streaming_data[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER]
+ player_token_provided = streaming_data[STREAMING_DATA_PLAYER_TOKEN_PROVIDED]
+ client_name = streaming_data.get(STREAMING_DATA_CLIENT_NAME)
+ available_at = streaming_data[STREAMING_DATA_FETCHED_TIMESTAMP] + playback_wait
+ streaming_formats = traverse_obj(streaming_data, (('formats', 'adaptiveFormats'), ...))
+
+ def get_stream_id(fmt_stream):
+ return str_or_none(fmt_stream.get('itag')), traverse_obj(fmt_stream, 'audioTrack', 'id'), fmt_stream.get('isDrc')
+
+ def process_format_stream(fmt_stream, proto, missing_pot):
+ nonlocal original_language
+ itag = str_or_none(fmt_stream.get('itag'))
+ audio_track = fmt_stream.get('audioTrack') or {}
+ quality = fmt_stream.get('quality')
+ height = int_or_none(fmt_stream.get('height'))
+ if quality == 'tiny' or not quality:
+ quality = fmt_stream.get('audioQuality', '').lower() or quality
+ # The 3gp format (17) in android client has a quality of "small",
+ # but is actually worse than other formats
+ if itag == '17':
+ quality = 'tiny'
+ if quality:
+ if itag:
+ itag_qualities[itag] = quality
+ if height:
+ res_qualities[height] = quality
+
+ display_name = audio_track.get('displayName') or ''
+ is_original = 'original' in display_name.lower()
+ is_descriptive = 'descriptive' in display_name.lower()
+ is_default = audio_track.get('audioIsDefault')
+ language_code = audio_track.get('id', '').split('.')[0]
+ if language_code and (is_original or (is_default and not original_language)):
+ original_language = language_code
+
+ has_drm = bool(fmt_stream.get('drmFamilies'))
+
+ if has_drm:
+ msg = f'Some {client_name} client {proto} formats have been skipped as they are DRM protected. '
+ if client_name == 'tv':
msg += (
- f'YouTube may have enabled the SABR-only or Server-Side Ad Placement experiment for '
- f'{"your account" if self.is_authenticated else "the current session"}. '
+ f'{"Your account" if self.is_authenticated else "The current session"} may have '
+ f'an experiment that applies DRM to all videos on the tv client. '
+ f'See https://github.com/yt-dlp/yt-dlp/issues/12563 for more details.'
)
- msg += 'See https://github.com/yt-dlp/yt-dlp/issues/12482 for more details'
self.report_warning(msg, video_id, only_once=True)
- continue
- try:
- fmt_url += '&{}={}'.format(
- traverse_obj(sc, ('sp', -1)) or 'signature',
- self._decrypt_signature(encrypted_sig, video_id, player_url),
- )
- except ExtractorError as e:
- self.report_warning(
- f'Signature extraction failed: Some formats may be missing\n'
- f' player = {player_url}\n'
- f' {bug_reports_message(before="")}',
- video_id=video_id, only_once=True)
- self.write_debug(
- f'{video_id}: Signature extraction failure info:\n'
- f' encrypted sig = {encrypted_sig}\n'
- f' player = {player_url}')
- self.write_debug(e, only_once=True)
- continue
- query = parse_qs(fmt_url)
- if query.get('n'):
- try:
- decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
- fmt_url = update_url_query(fmt_url, {
- 'n': decrypt_nsig(query['n'][0], video_id, player_url),
- })
- except ExtractorError as e:
- if player_url:
- self.report_warning(
- f'nsig extraction failed: Some formats may be missing\n'
- f' n = {query["n"][0]} ; player = {player_url}\n'
- f' {bug_reports_message(before="")}',
- video_id=video_id, only_once=True)
- self.write_debug(e, only_once=True)
- else:
- self.report_warning(
- 'Cannot decrypt nsig without player_url: Some formats may be missing',
- video_id=video_id, only_once=True)
- continue
+ tbr = float_or_none(fmt_stream.get('averageBitrate') or fmt_stream.get('bitrate'), 1000)
+ format_duration = traverse_obj(fmt_stream, ('approxDurationMs', {float_or_none(scale=1000)}))
+ # Some formats may have much smaller duration than others (possibly damaged during encoding)
+ # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
+ # Make sure to avoid false positives with small duration differences.
+ # E.g. __2ABJjxzNo, ySuUZEjARPY
+ is_damaged = try_call(lambda: format_duration < duration // 2)
+ if is_damaged:
+ self.report_warning(
+ f'Some {client_name} client {proto} formats are possibly damaged. They will be deprioritized', video_id, only_once=True)
- tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
- format_duration = traverse_obj(fmt, ('approxDurationMs', {float_or_none(scale=1000)}))
- # Some formats may have much smaller duration than others (possibly damaged during encoding)
- # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
- # Make sure to avoid false positives with small duration differences.
- # E.g. __2ABJjxzNo, ySuUZEjARPY
- is_damaged = try_call(lambda: format_duration < duration // 2)
- if is_damaged:
- self.report_warning(
- 'Some formats are possibly damaged. They will be deprioritized', video_id, only_once=True)
+ if missing_pot and 'missing_pot' not in self._configuration_arg('formats'):
+ self._report_pot_format_skipped(video_id, client_name, proto)
+ return None
+
+ name = fmt_stream.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
+ fps = int_or_none(fmt_stream.get('fps')) or 0
+ dct = {
+ 'asr': int_or_none(fmt_stream.get('audioSampleRate')),
+ 'filesize': int_or_none(fmt_stream.get('contentLength')),
+ 'format_id': f'{itag}{"-drc" if fmt_stream.get("isDrc") else ""}',
+ 'format_note': join_nonempty(
+ join_nonempty(display_name, is_default and ' (default)', delim=''),
+ name, fmt_stream.get('isDrc') and 'DRC',
+ try_get(fmt_stream, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
+ try_get(fmt_stream, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
+ is_damaged and 'DAMAGED', missing_pot and 'MISSING POT',
+ (self.get_param('verbose') or all_formats) and short_client_name(client_name),
+ delim=', '),
+ # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
+ 'source_preference': (-5 if itag == '22' else -1) + (100 if 'Premium' in name else 0),
+ 'fps': fps if fps > 1 else None, # For some formats, fps is wrongly returned as 1
+ 'audio_channels': fmt_stream.get('audioChannels'),
+ 'height': height,
+ 'quality': q(quality) - bool(fmt_stream.get('isDrc')) / 2,
+ 'has_drm': has_drm,
+ 'tbr': tbr,
+ 'filesize_approx': filesize_from_tbr(tbr, format_duration),
+ 'width': int_or_none(fmt_stream.get('width')),
+ 'language': join_nonempty(language_code, 'desc' if is_descriptive else '') or None,
+ 'language_preference': PREFERRED_LANG_VALUE if is_original else 5 if is_default else -10 if is_descriptive else -1,
+ # Strictly de-prioritize damaged and 3gp formats
+ 'preference': -10 if is_damaged else -2 if itag == '17' else None,
+ }
+ mime_mobj = re.match(
+ r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt_stream.get('mimeType') or '')
+ if mime_mobj:
+ dct['ext'] = mimetype2ext(mime_mobj.group(1))
+ dct.update(parse_codecs(mime_mobj.group(2)))
+
+ single_stream = 'none' in (dct.get('acodec'), dct.get('vcodec'))
+ if single_stream and dct.get('ext'):
+ dct['container'] = dct['ext'] + '_dash'
+
+ return dct
+
+ def process_https_formats():
+ proto = 'https'
+ https_fmts = []
+ for fmt_stream in streaming_formats:
+ if fmt_stream.get('targetDurationSec'):
+ continue
- fetch_po_token_func = fmt[STREAMING_DATA_FETCH_GVS_PO_TOKEN]
- pot_policy: GvsPoTokenPolicy = self._get_default_ytcfg(client_name)['GVS_PO_TOKEN_POLICY'][StreamingProtocol.HTTPS]
+ # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
+ # (adding `&sq=0` to the URL) and parsing emsg box to determine the
+ # number of fragment that would subsequently requested with (`&sq=N`)
+ if fmt_stream.get('type') == 'FORMAT_STREAM_TYPE_OTF' and not bool(fmt_stream.get('drmFamilies')):
+ continue
+ stream_id = get_stream_id(fmt_stream)
+ if not all_formats:
+ if stream_id in stream_ids:
+ continue
+
+ pot_policy: GvsPoTokenPolicy = self._get_default_ytcfg(client_name)['GVS_PO_TOKEN_POLICY'][StreamingProtocol.HTTPS]
+
+ require_po_token = (
+ stream_id[0] not in ['18']
+ and gvs_pot_required(pot_policy, is_premium_subscriber, player_token_provided))
+
+ po_token = (
+ gvs_pots.get(client_name)
+ or fetch_po_token_func(required=require_po_token or pot_policy.recommended))
+ if po_token:
+ if client_name not in gvs_pots:
+ gvs_pots[client_name] = po_token
+
+ fmt_url = fmt_stream.get('url')
+ encrypted_sig, sc = None, None
+ if not fmt_url:
+ sc = urllib.parse.parse_qs(fmt_stream.get('signatureCipher'))
+ fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
+ encrypted_sig = try_get(sc, lambda x: x['s'][0])
+ if not all((sc, fmt_url, player_url, encrypted_sig)):
+ msg = f'Some {client_name} client https formats have been skipped as they are missing a url. '
+ if client_name in ('web', 'web_safari'):
+ msg += 'YouTube is forcing SABR streaming for this client. '
+ else:
+ msg += (
+ f'YouTube may have enabled the SABR-only or Server-Side Ad Placement experiment for '
+ f'{"your account" if self.is_authenticated else "the current session"}. '
+ )
+ msg += 'See https://github.com/yt-dlp/yt-dlp/issues/12482 for more details'
+ self.report_warning(msg, video_id, only_once=True)
+ continue
- require_po_token = (
- itag not in ['18']
- and gvs_pot_required(
- pot_policy, fmt[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER],
- fmt[STREAMING_DATA_PLAYER_TOKEN_PROVIDED]))
-
- po_token = (
- gvs_pots.get(client_name)
- or fetch_po_token_func(required=require_po_token or pot_policy.recommended))
-
- if po_token:
- fmt_url = update_url_query(fmt_url, {'pot': po_token})
- if client_name not in gvs_pots:
- gvs_pots[client_name] = po_token
+ fmt = process_format_stream(fmt_stream, proto, missing_pot=require_po_token and not po_token)
+ if not fmt:
+ continue
- if not po_token and require_po_token and 'missing_pot' not in self._configuration_arg('formats'):
- self._report_pot_format_skipped(video_id, client_name, 'https')
- continue
+ # signature
+ # Attempt to load sig spec from cache
+ if encrypted_sig:
+ spec_cache_id = self._sig_spec_cache_id(player_url, len(encrypted_sig))
+ spec = self._load_sig_spec_from_cache(spec_cache_id)
+ if spec:
+ self.write_debug(f'Using cached signature function {spec_cache_id}', only_once=True)
+ fmt_url += '&{}={}'.format(traverse_obj(sc, ('sp', -1)) or 'signature',
+ solve_sig(encrypted_sig, spec))
+ else:
+ fmt['_jsc_s_challenge'] = encrypted_sig
+ fmt['_jsc_s_sc'] = sc
+
+ # nsig
+ query = parse_qs(fmt_url)
+ if query.get('n'):
+ n_challenge = query['n'][0]
+
+ if n_challenge in self._player_cache:
+ fmt_url = update_url_query(fmt_url, {'n': self._player_cache[n_challenge]})
+ else:
+ fmt['_jsc_n_challenge'] = n_challenge
+
+ if po_token:
+ fmt_url = update_url_query(fmt_url, {'pot': po_token})
+
+ fmt['url'] = fmt_url
+
+ if stream_id[0]:
+ itags[stream_id[0]].add((proto, fmt.get('language')))
+ stream_ids.append(stream_id)
+
+ # For handling potential pre-playback required waiting period
+ if live_status not in ('is_live', 'post_live'):
+ fmt['available_at'] = available_at
+
+ if (all_formats or 'dashy' in format_types) and fmt['filesize']:
+ https_fmts.append({
+ **fmt,
+ 'format_id': f'{fmt["format_id"]}-dashy' if all_formats else fmt['format_id'],
+ 'protocol': 'http_dash_segments',
+ 'fragments': build_fragments(fmt),
+ })
+ if all_formats or 'dashy' not in format_types:
+ fmt['downloader_options'] = {'http_chunk_size': CHUNK_SIZE}
+ https_fmts.append(fmt)
+
+ # Bulk process sig/nsig handling
+ # Retrieve all JSC Sig and Nsig requests for this player response in one go
+ n_challenges = {}
+ s_challenges = {}
+ for fmt in https_fmts:
+ # This will de-duplicate requests
+ n_challenge = fmt.pop('_jsc_n_challenge', None)
+ if n_challenge is not None:
+ n_challenges.setdefault(n_challenge, []).append(fmt)
+
+ s_challenge = fmt.pop('_jsc_s_challenge', None)
+ if s_challenge is not None:
+ s_challenges.setdefault(len(s_challenge), {}).setdefault(s_challenge, []).append(fmt)
+
+ challenge_requests = []
+ if n_challenges:
+ challenge_requests.append(JsChallengeRequest(
+ type=JsChallengeType.N,
+ video_id=video_id,
+ input=NChallengeInput(challenges=list(n_challenges.keys()), player_url=player_url)))
+ if s_challenges:
+ challenge_requests.append(JsChallengeRequest(
+ type=JsChallengeType.SIG,
+ video_id=video_id,
+ input=SigChallengeInput(challenges=[''.join(map(chr, range(spec_id))) for spec_id in s_challenges], player_url=player_url)))
+
+ if challenge_requests:
+ for _challenge_request, challenge_response in self._jsc_director.bulk_solve(challenge_requests):
+ if challenge_response.type == JsChallengeType.SIG:
+ for challenge, result in challenge_response.output.results.items():
+ spec_id = len(challenge)
+ spec = [ord(c) for c in result]
+ self._store_sig_spec_to_cache(self._sig_spec_cache_id(player_url, spec_id), spec)
+ s_challenge_data = s_challenges.pop(spec_id, {})
+ if not s_challenge_data:
+ continue
+ for s_challenge, fmts in s_challenge_data.items():
+ solved_challenge = solve_sig(s_challenge, spec)
+ for fmt in fmts:
+ sc = fmt.pop('_jsc_s_sc')
+ fmt['url'] += '&{}={}'.format(
+ traverse_obj(sc, ('sp', -1)) or 'signature',
+ solved_challenge)
+
+ elif challenge_response.type == JsChallengeType.N:
+ for challenge, result in challenge_response.output.results.items():
+ fmts = n_challenges.pop(challenge, [])
+ for fmt in fmts:
+ self._player_cache[challenge] = result
+ fmt['url'] = update_url_query(fmt['url'], {'n': result})
+
+ # Raise warning if any challenge requests remain
+ # Depending on type of challenge request
+ # TODO: this could happen as there are no supported JSC Providers
+ # TODO: cleanup
- name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
- fps = int_or_none(fmt.get('fps')) or 0
- dct = {
- 'asr': int_or_none(fmt.get('audioSampleRate')),
- 'filesize': int_or_none(fmt.get('contentLength')),
- 'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}',
- 'format_note': join_nonempty(
- join_nonempty(display_name, is_default and ' (default)', delim=''),
- name, fmt.get('isDrc') and 'DRC',
- try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
- try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
- is_damaged and 'DAMAGED', require_po_token and not po_token and 'MISSING POT',
- (self.get_param('verbose') or all_formats) and short_client_name(client_name),
- delim=', '),
- # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
- 'source_preference': (-5 if itag == '22' else -1) + (100 if 'Premium' in name else 0),
- 'fps': fps if fps > 1 else None, # For some formats, fps is wrongly returned as 1
- 'audio_channels': fmt.get('audioChannels'),
- 'height': height,
- 'quality': q(quality) - bool(fmt.get('isDrc')) / 2,
- 'has_drm': has_drm,
- 'tbr': tbr,
- 'filesize_approx': filesize_from_tbr(tbr, format_duration),
- 'url': fmt_url,
- 'width': int_or_none(fmt.get('width')),
- 'language': join_nonempty(language_code, 'desc' if is_descriptive else '') or None,
- 'language_preference': PREFERRED_LANG_VALUE if is_original else 5 if is_default else -10 if is_descriptive else -1,
- # Strictly de-prioritize damaged and 3gp formats
- 'preference': -10 if is_damaged else -2 if itag == '17' else None,
- }
- mime_mobj = re.match(
- r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
- if mime_mobj:
- dct['ext'] = mimetype2ext(mime_mobj.group(1))
- dct.update(parse_codecs(mime_mobj.group(2)))
- if itag:
- itags[itag].add(('https', dct.get('language')))
- stream_ids.append(stream_id)
- single_stream = 'none' in (dct.get('acodec'), dct.get('vcodec'))
- if single_stream and dct.get('ext'):
- dct['container'] = dct['ext'] + '_dash'
-
- # For handling potential pre-playback required waiting period
- if live_status not in ('is_live', 'post_live'):
- dct['available_at'] = available_at
-
- if (all_formats or 'dashy' in format_types) and dct['filesize']:
- yield {
- **dct,
- 'format_id': f'{dct["format_id"]}-dashy' if all_formats else dct['format_id'],
- 'protocol': 'http_dash_segments',
- 'fragments': build_fragments(dct),
- }
- if all_formats or 'dashy' not in format_types:
- dct['downloader_options'] = {'http_chunk_size': CHUNK_SIZE}
- yield dct
-
- needs_live_processing = self._needs_live_processing(live_status, duration)
- skip_bad_formats = 'incomplete' not in format_types
-
- skip_manifests = set(self._configuration_arg('skip'))
- if (needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway
- or (needs_live_processing and skip_bad_formats)):
- skip_manifests.add('hls')
- if skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
- skip_manifests.add('dash')
-
- def process_manifest_format(f, proto, client_name, itag, missing_pot):
- key = (proto, f.get('language'))
- if not all_formats and key in itags[itag]:
- return False
-
- # For handling potential pre-playback required waiting period
- if live_status not in ('is_live', 'post_live'):
- f['available_at'] = available_at
-
- if f.get('source_preference') is None:
- f['source_preference'] = -1
-
- # Deprioritize since its pre-merged m3u8 formats may have lower quality audio streams
- if client_name == 'web_safari' and proto == 'hls' and live_status != 'is_live':
- f['source_preference'] -= 1
-
- if missing_pot:
- f['format_note'] = join_nonempty(f.get('format_note'), 'MISSING POT', delim=' ')
- f['source_preference'] -= 20
-
- itags[itag].add(key)
-
- if itag and all_formats:
- f['format_id'] = f'{itag}-{proto}'
- elif any(p != proto for p, _ in itags[itag]):
- f['format_id'] = f'{itag}-{proto}'
- elif itag:
- f['format_id'] = itag
-
- if original_language and f.get('language') == original_language:
- f['format_note'] = join_nonempty(f.get('format_note'), '(default)', delim=' ')
- f['language_preference'] = PREFERRED_LANG_VALUE
-
- if itag in ('616', '235'):
- f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ')
- f['source_preference'] += 100
-
- f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
- if f['quality'] == -1 and f.get('height'):
- f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
- if self.get_param('verbose') or all_formats:
- f['format_note'] = join_nonempty(
- f.get('format_note'), short_client_name(client_name), delim=', ')
- if f.get('fps') and f['fps'] <= 1:
- del f['fps']
-
- if proto == 'hls' and f.get('has_drm'):
- f['has_drm'] = 'maybe'
- f['source_preference'] -= 5
- return True
+ if s_challenges:
+ self.report_warning(
+ 'Signature extraction failed: Some formats may be missing',
+ video_id=video_id, only_once=True)
+ if n_challenges:
+ self.report_warning(
+ 'nsig extraction failed: Some formats may be missing',
+ video_id=video_id, only_once=True)
- subtitles = {}
- for sd in streaming_data:
- client_name = sd[STREAMING_DATA_CLIENT_NAME]
- fetch_pot_func = sd[STREAMING_DATA_FETCH_GVS_PO_TOKEN]
- is_premium_subscriber = sd[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER]
- has_player_token = sd[STREAMING_DATA_PLAYER_TOKEN_PROVIDED]
+ for cfmts in list(s_challenges.values()) + list(n_challenges.values()):
+ for fmt in cfmts:
+ if fmt in https_fmts:
+ https_fmts.remove(fmt)
+
+ yield from https_fmts
+
+ yield from process_https_formats()
+
+ needs_live_processing = self._needs_live_processing(live_status, duration)
+ skip_bad_formats = 'incomplete' not in format_types
+ if self._configuration_arg('include_incomplete_formats'):
+ skip_bad_formats = False
+ self._downloader.deprecated_feature('[youtube] include_incomplete_formats extractor argument is deprecated. '
+ 'Use formats=incomplete extractor argument instead')
+
+ skip_manifests = set(self._configuration_arg('skip'))
+ if (not self.get_param('youtube_include_hls_manifest', True)
+ or needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway
+ or (needs_live_processing and skip_bad_formats)):
+ skip_manifests.add('hls')
+
+ if not self.get_param('youtube_include_dash_manifest', True):
+ skip_manifests.add('dash')
+ if self._configuration_arg('include_live_dash'):
+ self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '
+ 'Use formats=incomplete extractor argument instead')
+ elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
+ skip_manifests.add('dash')
+
+ def process_manifest_format(f, proto, client_name, itag, missing_pot):
+ key = (proto, f.get('language'))
+ if not all_formats and key in itags[itag]:
+ return False
+
+ # For handling potential pre-playback required waiting period
+ if live_status not in ('is_live', 'post_live'):
+ f['available_at'] = available_at
+
+ if f.get('source_preference') is None:
+ f['source_preference'] = -1
+
+ if missing_pot:
+ f['format_note'] = join_nonempty(f.get('format_note'), 'MISSING POT', delim=' ')
+ f['source_preference'] -= 20
+
+ itags[itag].add(key)
+
+ if itag and all_formats:
+ f['format_id'] = f'{itag}-{proto}'
+ elif any(p != proto for p, _ in itags[itag]):
+ f['format_id'] = f'{itag}-{proto}'
+ elif itag:
+ f['format_id'] = itag
+
+ if original_language and f.get('language') == original_language:
+ f['format_note'] = join_nonempty(f.get('format_note'), '(default)', delim=' ')
+ f['language_preference'] = PREFERRED_LANG_VALUE
+
+ if itag in ('616', '235'):
+ f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ')
+ f['source_preference'] += 100
+
+ f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
+ if f['quality'] == -1 and f.get('height'):
+ f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
+ if self.get_param('verbose') or all_formats:
+ f['format_note'] = join_nonempty(
+ f.get('format_note'), short_client_name(client_name), delim=', ')
+ if f.get('fps') and f['fps'] <= 1:
+ del f['fps']
+
+ if proto == 'hls' and f.get('has_drm'):
+ f['has_drm'] = 'maybe'
+ f['source_preference'] -= 5
+ return True
- hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')
+ hls_manifest_url = 'hls' not in skip_manifests and streaming_data.get('hlsManifestUrl')
if hls_manifest_url:
pot_policy: GvsPoTokenPolicy = self._get_default_ytcfg(
client_name)['GVS_PO_TOKEN_POLICY'][StreamingProtocol.HLS]
- require_po_token = gvs_pot_required(pot_policy, is_premium_subscriber, has_player_token)
- po_token = gvs_pots.get(client_name, fetch_pot_func(required=require_po_token or pot_policy.recommended))
+ require_po_token = gvs_pot_required(pot_policy, is_premium_subscriber, player_token_provided)
+ po_token = gvs_pots.get(client_name, fetch_po_token_func(required=require_po_token or pot_policy.recommended))
if po_token:
hls_manifest_url = hls_manifest_url.rstrip('/') + f'/pot/{po_token}'
if client_name not in gvs_pots:
@@ -3643,12 +3479,12 @@
r'/itag/(\d+)', f['url'], 'itag', default=None), require_po_token and not po_token):
yield f
- dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')
+ dash_manifest_url = 'dash' not in skip_manifests and streaming_data.get('dashManifestUrl')
if dash_manifest_url:
pot_policy: GvsPoTokenPolicy = self._get_default_ytcfg(
client_name)['GVS_PO_TOKEN_POLICY'][StreamingProtocol.DASH]
- require_po_token = gvs_pot_required(pot_policy, is_premium_subscriber, has_player_token)
- po_token = gvs_pots.get(client_name, fetch_pot_func(required=require_po_token or pot_policy.recommended))
+ require_po_token = gvs_pot_required(pot_policy, is_premium_subscriber, player_token_provided)
+ po_token = gvs_pots.get(client_name, fetch_po_token_func(required=require_po_token or pot_policy.recommended))
if po_token:
dash_manifest_url = dash_manifest_url.rstrip('/') + f'/pot/{po_token}'
if client_name not in gvs_pots:
@@ -3668,7 +3504,6 @@
r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
if needs_live_processing:
f['is_from_start'] = True
-
yield f
yield subtitles
@@ -3741,14 +3576,13 @@
else 'was_live' if live_content
else 'not_live' if False in (is_live, live_content)
else None)
- streaming_data = traverse_obj(player_responses, (..., 'streamingData'))
- *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)
+ *formats, subtitles = self._extract_formats_and_subtitles(video_id, player_responses, player_url, live_status, duration)
if all(f.get('has_drm') for f in formats):
# If there are no formats that definitely don't have DRM, all have DRM
for f in formats:
f['has_drm'] = True
- return live_broadcast_details, live_status, streaming_data, formats, subtitles
+ return live_broadcast_details, live_status, formats, subtitles
def _download_initial_data(self, video_id, webpage, webpage_client, webpage_ytcfg):
initial_data = None
@@ -3908,8 +3742,9 @@
or int_or_none(get_first(microformats, 'lengthSeconds'))
or parse_duration(search_meta('duration')) or None)
- live_broadcast_details, live_status, streaming_data, formats, automatic_captions = \
+ live_broadcast_details, live_status, formats, automatic_captions = \
self._list_formats(video_id, microformats, video_details, player_responses, player_url, duration)
+ streaming_data = traverse_obj(player_responses, (..., 'streamingData'))
if live_status == 'post_live':
self.write_debug(f'{video_id}: Video is in Post-Live Manifestless mode')
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/README.md
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/README.md 2025-09-25 21:17:17.683261906 +0200
@@ -0,0 +1,129 @@
+# YoutubeIE JS Challenge Provider Framework
+
+As part of the YouTube extractor, we have a framework for solving JS Challenges programmatically (sig, nsig). This can be used by plugins.
+
+> [!TIP]
+> If publishing a JS Challenge Provider plugin to GitHub, add the [yt-dlp-jsc-provider](https://github.com/topics/yt-dlp-jsc-provider) topic to your repository to help users find it.
+
+
+## Public APIs
+
+- `yt_dlp.extractor.youtube.jsc.provider`
+- `yt_dlp.extractor.youtube.jsc.utils`
+
+Everything else is internal-only and no guarantees are made about the API stability.
+
+> [!WARNING]
+> We will try our best to maintain stability with the public APIs.
+> However, due to the nature of extractors and YouTube, we may need to remove or change APIs in the future.
+> If you are using these APIs outside yt-dlp plugins, please account for this by importing them safely.
+
+## JS Challenge Provider
+
+`yt_dlp.extractor.youtube.jsc.provider`
+
+```python
+from yt_dlp.extractor.youtube.jsc.provider import (
+ register_provider,
+ register_preference,
+ JsChallengeProvider,
+ JsChallengeRequest,
+ JsChallengeResponse,
+ JsChallengeProviderError,
+ JsChallengeProviderRejectedRequest,
+ JsChallengeType,
+ JsChallengeProviderResponse,
+ NChallengeOutput,
+)
+from yt_dlp.utils import traverse_obj, Popen
+import json
+import subprocess
+import typing
+
+@register_provider
+class MyJsChallengeProviderJSP(JsChallengeProvider): # Provider class name must end with "JSP"
+ PROVIDER_VERSION = '0.2.1'
+ # Define a unique display name for the provider
+ PROVIDER_NAME = 'my-provider'
+ BUG_REPORT_LOCATION = 'https://issues.example.com/report'
+
+ # Set supported challenge types.
+ # If None, the provider will handle all types.
+ _SUPPORTED_TYPES = [JsChallengeType.N]
+
+ def is_available(self) -> bool:
+ """
+ Check if the provider is available (e.g. all required dependencies are available)
+ This is used to determine if the provider should be used and to provide debug information.
+
+ IMPORTANT: This method SHOULD NOT make any network requests or perform any expensive operations.
+
+ Since this is called multiple times, we recommend caching the result.
+ """
+ return True
+
+ def close(self):
+ # Optional close hook, called when YoutubeDL is closed.
+ pass
+
+ def _real_bulk_solve(self, requests: list[JsChallengeRequest]) -> typing.Generator[JsChallengeProviderResponse, None, None]:
+ # If you need to do additional validation on the requests.
+ # Raise yt_dlp.extractor.youtube.jsc.provider.JsChallengeProviderRejectedRequest if the request is not supported.
+ if len("something") > 255:
+ raise JsChallengeProviderRejectedRequest('Challenges longer than 255 are not supported', expected=True)
+
+
+ # Settings are pulled from extractor args passed to yt-dlp with the key `youtubejs-<PROVIDER_KEY>`.
+ # For this example, the extractor arg would be:
+ # `--extractor-args "youtubejs-myjschallengeprovider:bin_path=/path/to/bin"`
+ bin_path = self._configuration_arg(
+ 'bin_path', default=['/path/to/bin'])[0]
+
+ # See below for logging guidelines
+ self.logger.trace(f'Using bin path: {bin_path}')
+
+ for request in requests:
+ # You can use the _get_player method to get the player JS code if needed.
+ # This shares the same caching as the YouTube extractor, so it will not make unnecessary requests.
+ player_js = self._get_player(request.video_id, request.input.player_url)
+ cmd = f'{bin_path} {request.input.challenges} {player_js}'
+ self.logger.info(f'Executing command: {cmd}')
+ stdout, _, ret = Popen.run(cmd, text=True, shell=True, stdout=subprocess.PIPE)
+ if ret != 0:
+ # If there is an error, raise JsChallengeProviderError.
+ # The request will be sent to the next provider if there is one.
+ # You can specify whether it is expected or not. If it is unexpected,
+ # the log will include a link to the bug report location (BUG_REPORT_LOCATION).
+
+ # raise JsChallengeProviderError(f'Command returned error code {ret}', expected=False)
+
+ # You can also only fail this specific request by returning a JsChallengeProviderResponse with the error.
+ # This will allow other requests to be processed by this provider.
+ yield JsChallengeProviderResponse(
+ request=request,
+ error=JsChallengeProviderError(f'Command returned error code {ret}', expected=False)
+ )
+
+ yield JsChallengeProviderResponse(
+ request=request,
+ response=JsChallengeResponse(
+ type=JsChallengeType.N,
+ output=NChallengeOutput(results=traverse_obj(json.loads(stdout))),
+ ))
+
+
+# If there are multiple JS Challenge Providers that can handle the same JsChallengeRequest(s),
+# you can define a preference function to increase/decrease the priority of providers.
+
+@register_preference(MyJsChallengeProviderJSP)
+def my_provider_preference(provider: JsChallengeProvider, requests: list[JsChallengeRequest]) -> int:
+ return 50
+```
+
+## Logging Guidelines
+
+todo
+
+## Debugging
+
+- Use `-v --extractor-args "youtube:jsc_trace=true"` to enable JS Challenge debug output.
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/__init__.py
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/__init__.py 2025-09-25 21:17:17.683390485 +0200
@@ -0,0 +1,5 @@
+# Trigger import of built-in providers
+from ._builtin.bun import BunJCP as _BunJCP # noqa: F401
+from ._builtin.deno import DenoJCP as _DenoJCP # noqa: F401
+from ._builtin.jsinterp import JsInterpJCP as _JsInterpJCP # noqa: F401
+from ._builtin.node import NodeJCP as _NodeJCP # noqa: F401
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/bun.py
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/bun.py 2025-09-25 21:17:17.683542881 +0200
@@ -0,0 +1,79 @@
+from __future__ import annotations
+
+import shlex
+import subprocess
+
+from yt_dlp.extractor.youtube.jsc._builtin.runtime import (
+ JsRuntimeChalBaseJCP,
+ Script,
+ ScriptSource,
+ ScriptType,
+ ScriptVariant,
+)
+from yt_dlp.extractor.youtube.jsc._builtin.scripts import load_script
+from yt_dlp.extractor.youtube.jsc.provider import (
+ JsChallengeProvider,
+ JsChallengeProviderError,
+ JsChallengeRequest,
+ register_preference,
+ register_provider,
+)
+from yt_dlp.extractor.youtube.pot._provider import BuiltinIEContentProvider
+from yt_dlp.extractor.youtube.pot.provider import provider_bug_report_message
+from yt_dlp.utils import Popen
+
+
+@register_provider
+class BunJCP(JsRuntimeChalBaseJCP, BuiltinIEContentProvider):
+ PROVIDER_NAME = 'bun'
+ JS_RUNTIME_NAME = 'bun'
+
+ _ARGS = ['--bun', 'run', '-']
+ BUN_NPM_LIB_FILENAME = 'bun.lib.js'
+
+ def _iter_script_sources(self):
+ for source, func in super()._iter_script_sources():
+ if source == ScriptSource.WEB:
+ yield ScriptSource.BUILTIN, self._bun_npm_source
+ yield source, func
+
+ def _bun_npm_source(self, script_type: ScriptType, /) -> Script | None:
+ if script_type != ScriptType.LIB:
+ return None
+ if 'npm' not in self.ie.get_param('download_ext_components', []):
+ self._report_ext_component_skipped('npm', 'NPM package')
+ return None
+
+ # Bun-specific lib scripts that uses Bun autoimport
+ # https://bun.com/docs/runtime/autoimport
+ error_hook = lambda e: self.logger.warning(
+ f'Failed to read bun challenge solver lib script: {e}{provider_bug_report_message(self)}')
+ code = load_script(
+ self.BUN_NPM_LIB_FILENAME, error_hook=error_hook)
+ if code:
+ return Script(script_type, ScriptVariant.BUN_NPM, ScriptSource.BUILTIN, self._SUPPORTED_VERSION, code)
+ return None
+
+ def _run_js_runtime(self, stdin: str, /) -> str:
+ cmd = [self.runtime_info.path, *self._ARGS]
+ self.logger.debug(f'Running bun: {shlex.join(cmd)}')
+ with Popen(
+ cmd,
+ text=True,
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ ) as proc:
+ stdout, stderr = proc.communicate_or_kill(stdin)
+ if proc.returncode or stderr:
+ msg = 'Error running bun process'
+ if stderr:
+ msg = f'{msg}: {stderr}'
+ raise JsChallengeProviderError(msg)
+
+ return stdout
+
+
+@register_preference(BunJCP)
+def preference(provider: JsChallengeProvider, requests: list[JsChallengeRequest]) -> int:
+ return 800
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/bundle/core.js
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/bundle/core.js 2025-09-25 21:17:17.684050702 +0200
@@ -0,0 +1,504 @@
+--- yt_dlp/extractor/youtube/jsc/_builtin/bundle/core.js 2025-09-25 20:23:11.518734893 +0200
++++ /dev/null
+@@ -1,501 +0,0 @@
+-// This file is auto-generated from https://github.com/Grub4K/yt-dlp-jsc-deno
+-// Do not edit, changes will be overwritten.
+-// TODO: make this automatically updated
+-var jsc = (function (meriyah, astring) {
+- 'use strict';
+-
+- function matchesStructure(
+- obj,
+- structure,
+- ) {
+- if (Array.isArray(structure)) {
+- if (!Array.isArray(obj)) {
+- return false;
+- }
+- return (
+- structure.length === obj.length &&
+- structure.every((value, index) => matchesStructure(obj[index], value))
+- );
+- }
+- if (typeof structure === "object") {
+- if (!obj) {
+- return !structure;
+- }
+- if ("or" in structure) {
+- // Handle `{ or: [a, b] }`
+- return structure.or.some((node) => matchesStructure(obj, node));
+- }
+- for (const [key, value] of Object.entries(structure)) {
+- if (!matchesStructure(obj[key ], value)) {
+- return false;
+- }
+- }
+- return true;
+- }
+- return structure === obj;
+- }
+-
+- function isOneOf(value, ...of) {
+- return of.includes(value );
+- }
+-
+- function _optionalChain$2(ops) { let lastAccessLHS = undefined; let value = ops[0]; let i = 1; while (i < ops.length) { const op = ops[i]; const fn = ops[i + 1]; i += 2; if ((op === 'optionalAccess' || op === 'optionalCall') && value == null) { return undefined; } if (op === 'access' || op === 'optionalAccess') { lastAccessLHS = value; value = fn(value); } else if (op === 'call' || op === 'optionalCall') { value = fn((...args) => value.call(lastAccessLHS, ...args)); lastAccessLHS = undefined; } } return value; }
+-
+-
+- const logicalExpression = {
+- type: "ExpressionStatement",
+- expression: {
+- type: "LogicalExpression",
+- left: {
+- type: "Identifier",
+- },
+- right: {
+- type: "SequenceExpression",
+- expressions: [
+- {
+- type: "AssignmentExpression",
+- left: {
+- type: "Identifier",
+- },
+- operator: "=",
+- right: {
+- type: "CallExpression",
+- callee: {
+- type: "Identifier",
+- },
+- arguments: {
+- or: [
+- [
+- { type: "Literal" },
+- {
+- type: "CallExpression",
+- callee: {
+- type: "Identifier",
+- name: "decodeURIComponent",
+- },
+- arguments: [{ type: "Identifier" }],
+- optional: false,
+- },
+- ],
+- [
+- {
+- type: "CallExpression",
+- callee: {
+- type: "Identifier",
+- name: "decodeURIComponent",
+- },
+- arguments: [{ type: "Identifier" }],
+- optional: false,
+- },
+- ],
+- ],
+- },
+- optional: false,
+- },
+- },
+- {
+- type: "CallExpression",
+- },
+- ],
+- },
+- operator: "&&",
+- },
+- };
+-
+- const identifier$1 = {
+- or: [{
+- type: "ExpressionStatement",
+- expression: {
+- type: "AssignmentExpression",
+- operator: "=",
+- left: {
+- type: "Identifier",
+- },
+- right: {
+- type: "FunctionExpression",
+- params: [{}, {}, {}],
+- },
+- },
+- }, {
+- type: "FunctionDeclaration",
+- params: [{}, {}, {}],
+- }],
+- } ;
+-
+- function extract$1(
+- node,
+- ) {
+- if (
+- !matchesStructure(node, identifier$1 )
+- ) {
+- return null;
+- }
+- const block = (node.type === "ExpressionStatement" &&
+- node.expression.type === "AssignmentExpression" &&
+- node.expression.right.type === "FunctionExpression")
+- ? node.expression.right.body
+- : node.type === "FunctionDeclaration"
+- ? node.body
+- : null;
+- const relevantExpression = _optionalChain$2([block, 'optionalAccess', _ => _.body, 'access', _2 => _2.at, 'call', _3 => _3(-2)]);
+- if (!matchesStructure(relevantExpression, logicalExpression)) {
+- return null;
+- }
+- if (
+- _optionalChain$2([relevantExpression, 'optionalAccess', _4 => _4.type]) !== "ExpressionStatement" ||
+- relevantExpression.expression.type !==
+- "LogicalExpression" ||
+- relevantExpression.expression.right.type !==
+- "SequenceExpression" ||
+- relevantExpression.expression.right.expressions[0].type !==
+- "AssignmentExpression"
+- ) {
+- return null;
+- }
+- const call = relevantExpression.expression.right.expressions[0].right;
+- if (call.type !== "CallExpression" || call.callee.type !== "Identifier") {
+- return null;
+- }
+- // TODO: verify identifiers here
+- return {
+- type: "ArrowFunctionExpression",
+- params: [
+- {
+- type: "Identifier",
+- name: "sig",
+- },
+- ],
+- body: {
+- type: "CallExpression",
+- callee: {
+- type: "Identifier",
+- name: call.callee.name,
+- },
+- arguments: call.arguments.length === 1
+- ? [
+- {
+- type: "Identifier",
+- name: "sig",
+- },
+- ]
+- : [
+- call.arguments[0],
+- {
+- type: "Identifier",
+- name: "sig",
+- },
+- ],
+- optional: false,
+- },
+- async: false,
+- expression: false,
+- generator: false,
+- };
+- }
+-
+- function _optionalChain$1(ops) { let lastAccessLHS = undefined; let value = ops[0]; let i = 1; while (i < ops.length) { const op = ops[i]; const fn = ops[i + 1]; i += 2; if ((op === 'optionalAccess' || op === 'optionalCall') && value == null) { return undefined; } if (op === 'access' || op === 'optionalAccess') { lastAccessLHS = value; value = fn(value); } else if (op === 'call' || op === 'optionalCall') { value = fn((...args) => value.call(lastAccessLHS, ...args)); lastAccessLHS = undefined; } } return value; }
+-
+-
+- const identifier = {
+- type: "VariableDeclaration",
+- kind: "var",
+- declarations: [
+- {
+- type: "VariableDeclarator",
+- id: {
+- type: "Identifier",
+- },
+- init: {
+- type: "ArrayExpression",
+- elements: [
+- {
+- type: "Identifier",
+- },
+- ],
+- },
+- },
+- ],
+- };
+-
+- const catchBlockBody = [
+- {
+- type: "ReturnStatement",
+- argument: {
+- type: "BinaryExpression",
+- left: {
+- type: "MemberExpression",
+- object: {
+- type: "Identifier",
+- },
+- computed: true,
+- property: {
+- type: "Literal",
+- },
+- optional: false,
+- },
+- right: {
+- type: "Identifier",
+- },
+- operator: "+",
+- },
+- },
+- ] ;
+-
+- function extract(
+- node,
+- ) {
+- if (!matchesStructure(node, identifier)) {
+- // Fallback search for try { } catch { return X[12] + Y }
+- let name = null;
+- let block = null;
+- switch (node.type) {
+- case "ExpressionStatement": {
+- if (
+- node.expression.type === "AssignmentExpression" &&
+- node.expression.left.type === "Identifier" &&
+- node.expression.right.type === "FunctionExpression" &&
+- node.expression.right.params.length === 1
+- ) {
+- name = node.expression.left.name;
+- block = node.expression.right.body;
+- }
+- break;
+- }
+- case "FunctionDeclaration": {
+- if (node.params.length === 1) {
+- name = _optionalChain$1([node, 'access', _ => _.id, 'optionalAccess', _2 => _2.name]);
+- block = node.body;
+- }
+- break;
+- }
+- }
+- if (!block || !name) {
+- return null;
+- }
+- const tryNode = block.body.at(-2);
+- if (
+- _optionalChain$1([tryNode, 'optionalAccess', _3 => _3.type]) !== "TryStatement" ||
+- _optionalChain$1([tryNode, 'access', _4 => _4.handler, 'optionalAccess', _5 => _5.type]) !== "CatchClause"
+- ) {
+- return null;
+- }
+- const catchBody = tryNode.handler.body.body;
+- if (matchesStructure(catchBody, catchBlockBody)) {
+- return makeSolverFuncFromName(name);
+- }
+- return null;
+- }
+-
+- if (node.type !== "VariableDeclaration") {
+- return null;
+- }
+- const declaration = node.declarations[0];
+- if (
+- declaration.type !== "VariableDeclarator" || !declaration.init ||
+- declaration.init.type !== "ArrayExpression" ||
+- declaration.init.elements.length !== 1
+- ) {
+- return null;
+- }
+- const [firstElement] = declaration.init.elements;
+- if (!firstElement || firstElement.type !== "Identifier") {
+- return null;
+- }
+- return makeSolverFuncFromName(firstElement.name);
+- }
+-
+- function makeSolverFuncFromName(name) {
+- return {
+- type: "ArrowFunctionExpression",
+- params: [
+- {
+- type: "Identifier",
+- name: "nsig",
+- },
+- ],
+- body: {
+- type: "CallExpression",
+- callee: {
+- type: "Identifier",
+- name: name,
+- },
+- arguments: [
+- {
+- type: "Identifier",
+- name: "nsig",
+- },
+- ],
+- optional: false,
+- },
+- async: false,
+- expression: false,
+- generator: false,
+- };
+- }
+-
+- const setupNodes = meriyah.parse(`
+-globalThis.XMLHttpRequest = { prototype: {} };
+-const window = Object.assign(Object.create(null), globalThis);
+-window.location = new URL("https://www.youtube.com/watch?v=yt-dlp-wins");
+-const document = {};
+-let self = globalThis;
+-`).body;
+-
+- function _optionalChain(ops) { let lastAccessLHS = undefined; let value = ops[0]; let i = 1; while (i < ops.length) { const op = ops[i]; const fn = ops[i + 1]; i += 2; if ((op === 'optionalAccess' || op === 'optionalCall') && value == null) { return undefined; } if (op === 'access' || op === 'optionalAccess') { lastAccessLHS = value; value = fn(value); } else if (op === 'call' || op === 'optionalCall') { value = fn((...args) => value.call(lastAccessLHS, ...args)); lastAccessLHS = undefined; } } return value; }
+- function preprocessPlayer(data) {
+- const ast = meriyah.parse(data);
+- const body = ast.body;
+-
+- const block = (() => {
+- switch (body.length) {
+- case 1: {
+- const func = body[0];
+- if (
+- _optionalChain([func, 'optionalAccess', _ => _.type]) === "ExpressionStatement" &&
+- func.expression.type === "CallExpression" &&
+- func.expression.callee.type === "MemberExpression" &&
+- func.expression.callee.object.type === "FunctionExpression"
+- ) {
+- return func.expression.callee.object.body;
+- }
+- break;
+- }
+- case 2: {
+- const func = body[1];
+- if (
+- _optionalChain([func, 'optionalAccess', _2 => _2.type]) === "ExpressionStatement" &&
+- func.expression.type === "CallExpression" &&
+- func.expression.callee.type === "FunctionExpression"
+- ) {
+- const block = func.expression.callee.body;
+- // Skip `var window = this;`
+- block.body.splice(0, 1);
+- return block;
+- }
+- break;
+- }
+- }
+- throw "unexpected structure";
+- })();
+-
+- const found = {
+- nsig: [] ,
+- sig: [] ,
+- };
+- const plainExpressions = block.body.filter((node) => {
+- const nsig = extract(node);
+- if (nsig) {
+- found.nsig.push(nsig);
+- }
+- const sig = extract$1(node);
+- if (sig) {
+- found.sig.push(sig);
+- }
+- if (node.type === "ExpressionStatement") {
+- if (node.expression.type === "AssignmentExpression") {
+- return true;
+- }
+- return node.expression.type === "Literal";
+- }
+- return true;
+- });
+- block.body = plainExpressions;
+-
+- for (const [name, options] of Object.entries(found)) {
+- // TODO: this is cringe fix plz
+- const unique = new Set(options.map((x) => JSON.stringify(x)));
+- if (unique.size !== 1) {
+- const message = `found ${unique.size} ${name} function possibilities`;
+- throw (
+- message +
+- (unique.size ? `: ${options.map((x) => astring.generate(x)).join(", ")}` : "")
+- );
+- }
+- plainExpressions.push({
+- type: "ExpressionStatement",
+- expression: {
+- type: "AssignmentExpression",
+- operator: "=",
+- left: {
+- type: "MemberExpression",
+- computed: false,
+- object: {
+- type: "Identifier",
+- name: "_result",
+- },
+- property: {
+- type: "Identifier",
+- name: name,
+- },
+- },
+- right: options[0],
+- },
+- });
+- }
+-
+- ast.body.splice(0, 0, ...setupNodes);
+-
+- return astring.generate(ast);
+- }
+-
+- function getFromPrepared(code)
+-
+-
+- {
+- const resultObj = { nsig: null, sig: null };
+- Function("_result", code)(resultObj);
+- return resultObj;
+- }
+-
+- function main(input) {
+- const preprocessedPlayer = input.type === "player"
+- ? preprocessPlayer(input.player)
+- : input.preprocessed_player;
+- const solvers = getFromPrepared(preprocessedPlayer);
+-
+- const responses = input.requests.map(
+- (input) => {
+- if (!isOneOf(input.type, "nsig", "sig")) {
+- return {
+- type: "error",
+- error: `Unknown request type: ${input.type}`,
+- };
+- }
+- const solver = solvers[input.type];
+- if (!solver) {
+- return {
+- type: "error",
+- error: `Failed to extract ${input.type} function`,
+- };
+- }
+- try {
+- return {
+- type: "result",
+- data: Object.fromEntries(
+- input.challenges.map((challenge) => [challenge, solver(challenge)]),
+- ),
+- };
+- } catch (error) {
+- return {
+- type: "error",
+- error: error instanceof Error
+- ? `${error.message}\n${error.stack}`
+- : `${error}`,
+- };
+- }
+- },
+- );
+-
+- const output = {
+- type: "result",
+- responses,
+- };
+- if (input.type === "player" && input.output_preprocessed) {
+- output.preprocessed_player = preprocessedPlayer;
+- }
+- return output;
+- }
+-
+- return main;
+-
+-})(meriyah, astring);
\ No newline at end of file
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/deno.py
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/deno.py 2025-09-25 21:17:17.684342223 +0200
@@ -0,0 +1,82 @@
+from __future__ import annotations
+
+import contextlib
+import shlex
+import subprocess
+
+from yt_dlp.extractor.youtube.jsc._builtin.runtime import (
+ JsRuntimeChalBaseJCP,
+ Script,
+ ScriptSource,
+ ScriptType,
+ ScriptVariant,
+)
+from yt_dlp.extractor.youtube.jsc._builtin.scripts import load_script
+from yt_dlp.extractor.youtube.jsc.provider import (
+ JsChallengeProvider,
+ JsChallengeProviderError,
+ JsChallengeRequest,
+ register_preference,
+ register_provider,
+)
+from yt_dlp.extractor.youtube.pot._provider import BuiltinIEContentProvider
+from yt_dlp.extractor.youtube.pot.provider import provider_bug_report_message
+from yt_dlp.utils import Popen
+
+
+@register_provider
+class DenoJCP(JsRuntimeChalBaseJCP, BuiltinIEContentProvider):
+ PROVIDER_NAME = 'deno'
+ JS_RUNTIME_NAME = 'deno'
+
+ _DENO_OPTIONS = ['--no-prompt', '--no-remote']
+ DENO_NPM_LIB_FILENAME = 'deno.lib.js'
+
+ def _iter_script_sources(self):
+ for source, func in super()._iter_script_sources():
+ if source == ScriptSource.WEB:
+ yield ScriptSource.BUILTIN, self._deno_npm_source
+ yield source, func
+
+ def _deno_npm_source(self, script_type: ScriptType, /) -> Script | None:
+ if script_type != ScriptType.LIB:
+ return None
+ if 'npm' not in self.ie.get_param('download_ext_components', []):
+ self._report_ext_component_skipped('npm', 'NPM package')
+ return None
+ # Deno-specific lib scripts that uses Deno NPM imports
+ error_hook = lambda e: self.logger.warning(
+ f'Failed to read deno challenge solver lib script: {e}{provider_bug_report_message(self)}')
+ code = load_script(
+ self.DENO_NPM_LIB_FILENAME, error_hook=error_hook)
+ if code:
+ # TODO: any other permissions we want when not using --no-remote?
+ with contextlib.suppress(ValueError):
+ self._DENO_OPTIONS.remove('--no-remote')
+ return Script(script_type, ScriptVariant.DENO_NPM, ScriptSource.BUILTIN, self._SUPPORTED_VERSION, code)
+ return None
+
+ def _run_js_runtime(self, stdin: str, /) -> str:
+ cmd = [self.runtime_info.path, 'run', *self._DENO_OPTIONS, '-']
+ self.logger.debug(f'Running deno: {shlex.join(cmd)}')
+ with Popen(
+ cmd,
+ text=True,
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ ) as proc:
+ stdout, stderr = proc.communicate_or_kill(stdin)
+ # TODO: fails when deno needs to download dependencies?
+ if proc.returncode or stderr:
+ msg = 'Error running deno process'
+ if stderr:
+ msg = f'{msg}: {stderr}'
+ raise JsChallengeProviderError(msg)
+
+ return stdout
+
+
+@register_preference(DenoJCP)
+def preference(provider: JsChallengeProvider, requests: list[JsChallengeRequest]) -> int:
+ return 1000
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/jsinterp.py
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/jsinterp.py 2025-09-25 21:17:17.684435951 +0200
@@ -0,0 +1,288 @@
+import json
+import re
+import traceback
+from collections.abc import Generator
+
+from yt_dlp import join_nonempty, traverse_obj
+from yt_dlp.extractor.youtube.jsc.provider import (
+ JsChallengeProvider,
+ JsChallengeProviderError,
+ JsChallengeProviderResponse,
+ JsChallengeRequest,
+ JsChallengeResponse,
+ JsChallengeType,
+ NChallengeInput,
+ NChallengeOutput,
+ SigChallengeInput,
+ SigChallengeOutput,
+ register_provider,
+)
+from yt_dlp.extractor.youtube.pot._provider import BuiltinIEContentProvider
+from yt_dlp.jsinterp import JSInterpreter, LocalNameSpace
+from yt_dlp.utils import ExtractorError, filter_dict, js_to_json
+
+
+@register_provider
+class JsInterpJCP(JsChallengeProvider, BuiltinIEContentProvider):
+ PROVIDER_NAME = 'jsinterp'
+ _SUPPORTED_TYPES = [JsChallengeType.SIG, JsChallengeType.N]
+
+ _NSIG_FUNC_CACHE_ID = 'nsig func'
+ _DUMMY_STRING = 'dlp_wins'
+
+ def is_available(self) -> bool:
+ return True
+
+ def _real_bulk_solve(self, requests: list[JsChallengeRequest]) -> Generator[JsChallengeProviderResponse, None, None]:
+ for request in requests:
+ try:
+ if request.type == JsChallengeType.SIG:
+ output = self._solve_sig_challenges(request.video_id, request.input)
+ else:
+ output = self._solve_nsig_challenges(request.video_id, request.input)
+ yield JsChallengeProviderResponse(
+ request=request, response=JsChallengeResponse(type=request.type, output=output))
+ except Exception as e:
+ yield JsChallengeProviderResponse(request=request, error=e)
+
+ # region sig
+ def _solve_sig_challenges(self, video_id, sig_input: SigChallengeInput) -> SigChallengeOutput:
+ """Turn the s field into a working signature spec"""
+ results = {}
+ self.logger.trace(f'Solving {len(sig_input.challenges)} sig challenges using player {sig_input.player_url}')
+ for challenge in sig_input.challenges:
+ results[challenge] = self._solve_sig_challenge(challenge, video_id, sig_input.player_url)
+ return SigChallengeOutput(results=results)
+
+ def _solve_sig_challenge(self, challenge, video_id, player_url) -> str:
+ code = self._get_player(video_id, player_url)
+ return self._parse_sig_js(code, player_url)(challenge)
+
+ def _parse_sig_js(self, jscode, player_url):
+ # Examples where `sig` is funcname:
+ # sig=function(a){a=a.split(""); ... ;return a.join("")};
+ # ;c&&(c=sig(decodeURIComponent(c)),a.set(b,encodeURIComponent(c)));return a};
+ # {var l=f,m=h.sp,n=sig(decodeURIComponent(h.s));l.set(m,encodeURIComponent(n))}
+ # sig=function(J){J=J.split(""); ... ;return J.join("")};
+ # ;N&&(N=sig(decodeURIComponent(N)),J.set(R,encodeURIComponent(N)));return J};
+ # {var H=u,k=f.sp,v=sig(decodeURIComponent(f.s));H.set(k,encodeURIComponent(v))}
+ funcname = self.ie._search_regex(
+ (r'\b(?P<var>[a-zA-Z0-9_$]+)&&\((?P=var)=(?P<sig>[a-zA-Z0-9_$]{2,})\(decodeURIComponent\((?P=var)\)\)',
+ r'(?P<sig>[a-zA-Z0-9_$]+)\s*=\s*function\(\s*(?P<arg>[a-zA-Z0-9_$]+)\s*\)\s*{\s*(?P=arg)\s*=\s*(?P=arg)\.split\(\s*""\s*\)\s*;\s*[^}]+;\s*return\s+(?P=arg)\.join\(\s*""\s*\)',
+ r'(?:\b|[^a-zA-Z0-9_$])(?P<sig>[a-zA-Z0-9_$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9_$]{2}\.[a-zA-Z0-9_$]{2}\(a,\d+\))?',
+ # Old patterns
+ r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
+ # Obsolete patterns
+ r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
+ jscode, 'Initial JS player signature function name', group='sig')
+
+ varname, global_list = self._interpret_player_js_global_var(jscode, player_url)
+ jsi = JSInterpreter(jscode)
+ initial_function = jsi.extract_function(funcname, filter_dict({varname: global_list}))
+ return lambda s: initial_function([s])
+ # endregion sig
+
+ # region nsig
+ def _solve_nsig_challenges(self, video_id, nsig_input: NChallengeInput) -> NChallengeOutput:
+ """Turn the n field into a working signature"""
+ results = {}
+ self.logger.trace(f'Solving {len(nsig_input.challenges)} nsig challenges using player {nsig_input.player_url}')
+ for challenge in nsig_input.challenges:
+ results[challenge] = self._solve_nsig_challenge(challenge, video_id, nsig_input.player_url)
+ return NChallengeOutput(results=results)
+
+ def _solve_nsig_challenge(self, challenge, video_id, player_url) -> str:
+ """Turn the n field into a working signature"""
+ try:
+ jsi, _, func_code = self._extract_n_function_code(video_id, player_url)
+ except ExtractorError as e:
+ raise JsChallengeProviderError(f'Unable to extract nsig function code: {e}') from e
+
+ try:
+ extract_nsig = self.ie._cached(self._extract_n_function_from_code, self._NSIG_FUNC_CACHE_ID, player_url)
+ ret = extract_nsig(jsi, func_code)(challenge)
+ except JSInterpreter.Exception as e:
+ self.logger.debug(str(e), once=True)
+ raise JsChallengeProviderError(
+ 'Native nsig extraction failed', expected=False) from e
+
+ self.logger.debug(f'Transformed nsig {challenge} => {ret}')
+ # Only cache nsig func JS code to disk if successful, and only once
+ self.ie._store_player_data_to_cache('nsig', player_url, func_code)
+ return ret
+
+ def _extract_n_function_name(self, jscode, player_url=None):
+ varname, global_list = self._interpret_player_js_global_var(jscode, player_url)
+ if debug_str := traverse_obj(global_list, (lambda _, v: v.endswith('-_w8_'), any)):
+ pattern = r'''(?x)
+ \{\s*return\s+%s\[%d\]\s*\+\s*(?P<argname>[a-zA-Z0-9_$]+)\s*\}
+ ''' % (re.escape(varname), global_list.index(debug_str))
+ if match := re.search(pattern, jscode):
+ pattern = r'''(?x)
+ \{\s*\)%s\(\s*
+ (?:
+ (?P<funcname_a>[a-zA-Z0-9_$]+)\s*noitcnuf\s*
+ |noitcnuf\s*=\s*(?P<funcname_b>[a-zA-Z0-9_$]+)(?:\s+rav)?
+ )[;\n]
+ ''' % re.escape(match.group('argname')[::-1])
+ if match := re.search(pattern, jscode[match.start()::-1]):
+ a, b = match.group('funcname_a', 'funcname_b')
+ return (a or b)[::-1]
+ self.logger.debug(join_nonempty(
+ 'Initial search was unable to find nsig function name',
+ player_url and f' player = {player_url}', delim='\n'), once=True)
+
+ # Examples (with placeholders nfunc, narray, idx):
+ # * .get("n"))&&(b=nfunc(b)
+ # * .get("n"))&&(b=narray[idx](b)
+ # * b=String.fromCharCode(110),c=a.get(b))&&c=narray[idx](c)
+ # * a.D&&(b="nn"[+a.D],c=a.get(b))&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
+ # * a.D&&(PL(a),b=a.j.n||null)&&(b=narray[0](b),a.set("n",b),narray.length||nfunc("")
+ # * a.D&&(b="nn"[+a.D],vL(a),c=a.j[b]||null)&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
+ # * J.J="";J.url="";J.Z&&(R="nn"[+J.Z],mW(J),N=J.K[R]||null)&&(N=narray[idx](N),J.set(R,N))}};
+ funcname, idx = self.ie._search_regex(
+ r'''(?x)
+ (?:
+ \.get\("n"\)\)&&\(b=|
+ (?:
+ b=String\.fromCharCode\(110\)|
+ (?P<str_idx>[a-zA-Z0-9_$.]+)&&\(b="nn"\[\+(?P=str_idx)\]
+ )
+ (?:
+ ,[a-zA-Z0-9_$]+\(a\))?,c=a\.
+ (?:
+ get\(b\)|
+ [a-zA-Z0-9_$]+\[b\]\|\|null
+ )\)&&\(c=|
+ \b(?P<var>[a-zA-Z0-9_$]+)=
+ )(?P<nfunc>[a-zA-Z0-9_$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z]\)
+ (?(var),[a-zA-Z0-9_$]+\.set\((?:"n+"|[a-zA-Z0-9_$]+)\,(?P=var)\))''',
+ jscode, 'n function name', group=('nfunc', 'idx'), default=(None, None))
+ if not funcname:
+ self.logger.warning(join_nonempty(
+ 'Falling back to generic n function search',
+ player_url and f' player = {player_url}', delim='\n'), once=True)
+ return self.ie._search_regex(
+ r'''(?xs)
+ ;\s*(?P<name>[a-zA-Z0-9_$]+)\s*=\s*function\([a-zA-Z0-9_$]+\)
+ \s*\{(?:(?!};).)+?return\s*(?P<q>["'])[\w-]+_w8_(?P=q)\s*\+\s*[a-zA-Z0-9_$]+''',
+ jscode, 'Initial JS player n function name', group='name')
+ elif not idx:
+ return funcname
+
+ return json.loads(js_to_json(self.ie._search_regex(
+ rf'var {re.escape(funcname)}\s*=\s*(\[.+?\])\s*[,;]', jscode,
+ f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
+
+ def _fixup_n_function_code(self, argnames, nsig_code, jscode, player_url):
+ # Fixup global array
+ varname, global_list = self._interpret_player_js_global_var(jscode, player_url)
+ if varname and global_list:
+ nsig_code = f'var {varname}={json.dumps(global_list)}; {nsig_code}'
+ else:
+ varname = self._DUMMY_STRING
+ global_list = []
+
+ # Fixup typeof check
+ undefined_idx = global_list.index('undefined') if 'undefined' in global_list else r'\d+'
+ fixed_code = re.sub(
+ fr'''(?x)
+ ;\s*if\s*\(\s*typeof\s+[a-zA-Z0-9_$]+\s*===?\s*(?:
+ (["\'])undefined\1|
+ {re.escape(varname)}\[{undefined_idx}\]
+ )\s*\)\s*return\s+{re.escape(argnames[0])};
+ ''', ';', nsig_code)
+ if fixed_code == nsig_code:
+ self.logger.debug(join_nonempty(
+ 'No typeof statement found in nsig function code',
+ player_url and f' player = {player_url}', delim='\n'), once=True)
+
+ # Fixup global funcs
+ jsi = JSInterpreter(fixed_code)
+ cache_id = (self._NSIG_FUNC_CACHE_ID, player_url)
+ try:
+ self.ie._cached(
+ self._extract_n_function_from_code, *cache_id)(jsi, (argnames, fixed_code))(self._DUMMY_STRING)
+ except JSInterpreter.Exception:
+ self.ie._player_cache.pop(cache_id, None)
+
+ global_funcnames = jsi._undefined_varnames
+ debug_names = []
+ jsi = JSInterpreter(jscode)
+ for func_name in global_funcnames:
+ try:
+ func_args, func_code = jsi.extract_function_code(func_name)
+ fixed_code = f'var {func_name} = function({", ".join(func_args)}) {{ {func_code} }}; {fixed_code}'
+ debug_names.append(func_name)
+ except Exception:
+ self.logger.warning(join_nonempty(
+ f'Unable to extract global nsig function {func_name} from player JS',
+ player_url and f' player = {player_url}', delim='\n'), once=True)
+
+ if debug_names:
+ self.logger.debug(f'Extracted global nsig functions: {", ".join(debug_names)}')
+
+ return argnames, fixed_code
+
+ def _extract_n_function_code(self, video_id, player_url):
+ player_id = self.ie._extract_player_info(player_url)
+ func_code = self.ie._load_player_data_from_cache('nsig', player_url)
+ jscode = func_code or self.ie._load_player(video_id, player_url)
+ jsi = JSInterpreter(jscode)
+
+ if func_code:
+ return jsi, player_id, func_code
+
+ func_name = self._extract_n_function_name(jscode, player_url=player_url)
+
+ # XXX: Work around (a) global array variable, (b) `typeof` short-circuit, (c) global functions
+ func_code = self._fixup_n_function_code(*jsi.extract_function_code(func_name), jscode, player_url)
+
+ return jsi, player_id, func_code
+
+ def _extract_n_function_from_code(self, jsi, func_code):
+ func = jsi.extract_function_from_code(*func_code)
+
+ def extract_nsig(s):
+ try:
+ ret = func([s])
+ except JSInterpreter.Exception:
+ raise
+ except Exception as e:
+ raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
+
+ return ret
+
+ return extract_nsig
+ # endregion nsig
+
+ def _interpret_player_js_global_var(self, jscode, player_url):
+ """Returns tuple of: variable name string, variable value list"""
+ extract_global_var = self.ie._cached(self.ie._search_regex, 'jsc global array', player_url)
+ varcode, varname, varvalue = extract_global_var(
+ r'''(?x)
+ (?P<q1>["\'])use\s+strict(?P=q1);\s*
+ (?P<code>
+ var\s+(?P<name>[a-zA-Z0-9_$]+)\s*=\s*
+ (?P<value>
+ (?P<q2>["\'])(?:(?!(?P=q2)).|\\.)+(?P=q2)
+ \.split\((?P<q3>["\'])(?:(?!(?P=q3)).)+(?P=q3)\)
+ |\[\s*(?:(?P<q4>["\'])(?:(?!(?P=q4)).|\\.)*(?P=q4)\s*,?\s*)+\]
+ )
+ )[;,]
+ ''', jscode, 'global variable', group=('code', 'name', 'value'), default=(None, None, None))
+ if not varcode:
+ self.logger.debug(join_nonempty(
+ 'No global array variable found in player JS',
+ player_url and f' player = {player_url}', delim='\n'), once=True)
+ return None, None
+
+ jsi = JSInterpreter(varcode)
+ interpret_global_var = self.ie._cached(jsi.interpret_expression, 'jsc global list', player_url)
+ return varname, interpret_global_var(varvalue, LocalNameSpace(), allow_recursion=10)
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/node.py
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/node.py 2025-09-25 21:17:17.684750310 +0200
@@ -0,0 +1,47 @@
+from __future__ import annotations
+
+import shlex
+import subprocess
+
+from yt_dlp.extractor.youtube.jsc._builtin.runtime import JsRuntimeChalBaseJCP
+from yt_dlp.extractor.youtube.jsc.provider import (
+ JsChallengeProvider,
+ JsChallengeProviderError,
+ JsChallengeRequest,
+ register_preference,
+ register_provider,
+)
+from yt_dlp.extractor.youtube.pot._provider import BuiltinIEContentProvider
+from yt_dlp.utils import Popen
+
+
+@register_provider
+class NodeJCP(JsRuntimeChalBaseJCP, BuiltinIEContentProvider):
+ PROVIDER_NAME = 'node'
+ JS_RUNTIME_NAME = 'node'
+
+ _ARGS = ['--permission', '-']
+
+ def _run_js_runtime(self, stdin: str, /) -> str:
+ cmd = [self.runtime_info.path, *self._ARGS]
+ self.logger.debug(f'Running node: {shlex.join(cmd)}')
+ with Popen(
+ cmd,
+ text=True,
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ ) as proc:
+ stdout, stderr = proc.communicate_or_kill(stdin)
+ if proc.returncode or stderr:
+ msg = 'Error running node process'
+ if stderr:
+ msg = f'{msg}: {stderr}'
+ raise JsChallengeProviderError(msg)
+
+ return stdout
+
+
+@register_preference(NodeJCP)
+def preference(provider: JsChallengeProvider, requests: list[JsChallengeRequest]) -> int:
+ return 900
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/runtime.py
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/runtime.py 2025-09-25 21:17:17.684970592 +0200
@@ -0,0 +1,283 @@
+from __future__ import annotations
+
+import collections
+import dataclasses
+import enum
+import functools
+import hashlib
+import importlib.resources
+import json
+import sys
+
+import yt_dlp
+from yt_dlp.extractor.youtube.jsc._builtin.scripts import load_script
+from yt_dlp.extractor.youtube.jsc.provider import (
+ JsChallengeProvider,
+ JsChallengeProviderError,
+ JsChallengeProviderRejectedRequest,
+ JsChallengeProviderResponse,
+ JsChallengeResponse,
+ JsChallengeType,
+ NChallengeOutput,
+ SigChallengeOutput,
+)
+from yt_dlp.extractor.youtube.pot.provider import provider_bug_report_message
+from yt_dlp.utils._jsruntime import JsRuntimeInfo
+
+TYPE_CHECKING = False
+if TYPE_CHECKING:
+ from collections.abc import Generator
+
+ from yt_dlp.extractor.youtube.jsc.provider import JsChallengeRequest
+
+
+class ScriptType(enum.Enum):
+ LIB = 'lib'
+ CORE = 'core'
+
+
+class ScriptVariant(enum.Enum):
+ UNKNOWN = 'unknown'
+ MINIFIED = 'minified'
+ UNMINIFIED = 'unminified'
+ DENO_NPM = 'deno_npm'
+ BUN_NPM = 'bun_npm'
+
+
+class ScriptSource(enum.Enum):
+ PYPACKAGE = 'python package'
+ BINARY = 'binary'
+ CACHE = 'cache'
+ WEB = 'web'
+ BUILTIN = 'builtin'
+
+
+@dataclasses.dataclass
+class Script:
+ type: ScriptType
+ variant: ScriptVariant
+ source: ScriptSource
+ version: str
+ code: str
+
+ @functools.cached_property
+ def hash(self, /) -> str:
+ return hashlib.sha3_512(self.code.encode()).hexdigest()
+
+ def __str__(self, /):
+ return f'<Script {self.type.value!r} v{self.version} (source: {self.source.value}) variant={self.variant.value!r} size={len(self.code)} hash={self.hash[:7]}...>'
+
+
+class JsRuntimeChalBaseJCP(JsChallengeProvider):
+ JS_RUNTIME_NAME: str
+ _CACHE_SECTION = 'challenge-solver'
+
+ _JCP_GUIDE_URL = 'https://github.com/yt-dlp/yt-dlp/wiki/YouTube-JS-Challenges'
+ _REPOSITORY = 'yt-dlp/yt-dlp-jsc-deno'
+ _SUPPORTED_TYPES = [JsChallengeType.N, JsChallengeType.SIG]
+ _SUPPORTED_VERSION = '0.0.1'
+ # TODO: insert correct hashes here
+ # TODO: Integration tests for each kind of scripts source
+ _ALLOWED_HASHES = {
+ ScriptType.LIB: {
+ ScriptVariant.MINIFIED: '488c1903d8beb24ee9788400b2a91e724751b04988ba4de398320de0e36b4a9e3a8db58849189bf1d48df3fc4b0972d96b4aabfd80fea25d7c43988b437062fd',
+ ScriptVariant.DENO_NPM: 'cbd33afbfa778e436aef774f3983f0b1234ad7f737ea9dbd9783ee26dce195f4b3242d1e202b2038e748044960bc2f976372e883c76157b24acdea939dba7603',
+ ScriptVariant.BUN_NPM: '2065c7584b39d4e3fe62f147ff0572c051629a00b1bdb3dbd21d61db172a42ad0fac210e923e080a58ca21d1cbf7c6a22a727a726654bae83af045e12958a5a0',
+ },
+ ScriptType.CORE: {
+ ScriptVariant.MINIFIED: 'df0c08c152911dedd35a98bbbb6a1786718c11e4233c52abda3d19fd11d97c3ba09745dfbca913ddeed72fead18819f62139220420c41a04d5a66ed629fbde4e',
+ ScriptVariant.UNMINIFIED: '8abfd4818573b6cf397cfae227661e3449fb5ac737a272ac0cf8268d94447b04b1c9a15f459b336175bf0605678a376e962df99b2c8d5498f16db801735f771c',
+ },
+ }
+
+ _SCRIPT_FILENAMES = {
+ ScriptType.LIB: 'lib.js',
+ ScriptType.CORE: 'core.js',
+ }
+
+ _MIN_SCRIPT_FILENAMES = {
+ ScriptType.LIB: 'lib.min.js',
+ ScriptType.CORE: 'core.min.js',
+ }
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self._available = True
+ # Note: developer use only, intentionally not documented.
+ # This bypasses verification of script hashes and versions.
+ # --extractor-args youtubejsc-{provider key}:dev=true
+ self.is_dev = self.settings.get('dev', []) == ['true']
+ if self.is_dev:
+ self.logger.warning(
+ f'You have enabled dev mode for {self.PROVIDER_KEY}JCP. '
+ f'This is a developer option intended for debugging. \n'
+ ' If you experience any issues while using this option, '
+ f'{self.ie._downloader._format_err("DO NOT", self.ie._downloader.Styles.ERROR)} open a bug report')
+
+ def _run_js_runtime(self, stdin: str, /) -> str:
+ """To be implemented by subclasses"""
+ raise NotImplementedError
+
+ def _real_bulk_solve(self, /, requests: list[JsChallengeRequest]):
+ grouped: dict[str, list[JsChallengeRequest]] = collections.defaultdict(list)
+ for request in requests:
+ grouped[request.input.player_url].append(request)
+
+ for player_url, grouped_requests in grouped.items():
+ player = self.ie.cache.load(self._CACHE_SECTION, f'player:{player_url}')
+ if player:
+ cached = True
+ else:
+ cached = False
+ video_id = next((request.video_id for request in grouped_requests), None)
+ player = self._get_player(video_id, player_url)
+
+ stdin = self._construct_stdin(player, cached, grouped_requests)
+ stdout = self._run_js_runtime(stdin)
+ output = json.loads(stdout)
+ if output['type'] == 'error':
+ raise JsChallengeProviderError(output['error'])
+
+ if preprocessed := output.get('preprocessed_player'):
+ self.ie.cache.store(self._CACHE_SECTION, f'player:{player_url}', preprocessed)
+
+ for request, response_data in zip(grouped_requests, output['responses']):
+ if response_data['type'] == 'error':
+ yield JsChallengeProviderResponse(request, None, response_data['error'])
+ else:
+ yield JsChallengeProviderResponse(request, JsChallengeResponse(request.type, (
+ NChallengeOutput(response_data['data']) if request.type is JsChallengeType.N
+ else SigChallengeOutput(response_data['data']))))
+
+ def _construct_stdin(self, player: str, preprocessed: bool, requests: list[JsChallengeRequest], /) -> str:
+ json_requests = [{
+ # TODO: i despise nsig name
+ 'type': 'nsig' if request.type.value == 'n' else request.type.value,
+ 'challenges': request.input.challenges,
+ } for request in requests]
+ data = {
+ 'type': 'preprocessed',
+ 'preprocessed_player': player,
+ 'requests': json_requests,
+ } if preprocessed else {
+ 'type': 'player',
+ 'player': player,
+ 'requests': json_requests,
+ 'output_preprocessed': True,
+ }
+ return f'''\
+ {self._lib_script.code}
+ const {{ astring, meriyah }} = lib;
+ {self._core_script.code}
+ console.log(JSON.stringify(jsc({json.dumps(data)})));
+ '''
+
+ # region: challenge solver script
+
+ @functools.cached_property
+ def _lib_script(self, /):
+ return self._get_script(ScriptType.LIB)
+
+ @functools.cached_property
+ def _core_script(self, /):
+ return self._get_script(ScriptType.CORE)
+
+ def _get_script(self, script_type: ScriptType, /) -> Script:
+ for _, from_source in self._iter_script_sources():
+ script = from_source(script_type)
+ if not script:
+ continue
+ if script.version != self._SUPPORTED_VERSION and not self.is_dev:
+ self.logger.warning(
+ f'Challenge solver {script_type.value} script version {script.version} '
+ f'is not supported (source: {script.source.value}, supported version: {self._SUPPORTED_VERSION})')
+ script_hashes = self._ALLOWED_HASHES[script.type].get(script.variant, [])
+ if script_hashes and script.hash not in script_hashes and not self.is_dev:
+ self.logger.warning(
+ f'Hash mismatch on challenge solver {script.type.value} script '
+ f'(source: {script.source.value}, hash: {script.hash})!{provider_bug_report_message(self)}')
+ else:
+ self.logger.debug(f'Using challenge solver {script.type.value} script v{script.version} (source: {script.source.value}, variant: {script.variant.value})')
+ return script
+
+ self._available = False
+ raise JsChallengeProviderRejectedRequest(f'No usable challenge solver {script_type.value} script available')
+
+ def _iter_script_sources(self) -> Generator[tuple[ScriptSource, callable]]:
+ yield from [
+ (ScriptSource.PYPACKAGE, self._pypackage_source),
+ (ScriptSource.BINARY, self._binary_source),
+ (ScriptSource.CACHE, self._cached_source),
+ (ScriptSource.BUILTIN, self._builtin_source),
+ (ScriptSource.WEB, self._web_release_source)]
+
+ def _pypackage_source(self, script_type: ScriptType, /) -> Script | None:
+ try:
+ import yt_dlp_jsc as yt_dlp_ejs
+ except ImportError as e:
+ self.logger.trace(f'yt_dlp_ejs python package unavailable, reason: {e}')
+ return None
+ # TODO: fix API naming
+ code = yt_dlp_ejs.jsc() if script_type is ScriptType.CORE else yt_dlp_ejs.lib()
+ return Script(script_type, ScriptVariant.MINIFIED, ScriptSource.PYPACKAGE, yt_dlp_ejs.version, code)
+
+ def _binary_source(self, script_type: ScriptType, /) -> Script | None:
+ if (
+ getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS')
+ and importlib.resources.is_resource(yt_dlp, self._MIN_SCRIPT_FILENAMES[script_type])
+ ):
+ code = importlib.resources.read_text(yt_dlp, self._MIN_SCRIPT_FILENAMES[script_type])
+ return Script(script_type, ScriptVariant.MINIFIED, ScriptSource.BINARY, self._SUPPORTED_VERSION, code)
+ return None
+
+ def _cached_source(self, script_type: ScriptType, /) -> Script | None:
+ if data := self.ie.cache.load(self._CACHE_SECTION, script_type.value):
+ return Script(script_type, ScriptVariant.MINIFIED, ScriptSource.CACHE, data['version'], data['code'])
+ return None
+
+ def _builtin_source(self, script_type: ScriptType, /) -> Script | None:
+ error_hook = lambda _: self.logger.warning(
+ f'Failed to read builtin challenge solver {script_type.value} script{provider_bug_report_message(self)}')
+ code = load_script(
+ self._SCRIPT_FILENAMES[script_type], error_hook=error_hook)
+ if code:
+ # TODO: strip internal header comments as to match published version
+ return Script(script_type, ScriptVariant.UNMINIFIED, ScriptSource.BUILTIN, self._SUPPORTED_VERSION, code)
+ return None
+
+ def _web_release_source(self, script_type: ScriptType, /) -> Script | None:
+ if 'ejs-github' not in self.ie.get_param('download_ext_components', []):
+ self._report_ext_component_skipped('ejs-github', 'challenge solver script')
+ return None
+ url = f'https://github.com/{self._REPOSITORY}/releases/download/{self._SUPPORTED_VERSION}/{self._MIN_SCRIPT_FILENAMES[script_type]}'
+ if code := self.ie._download_webpage_with_retries(
+ url, None, f'[{self.logger.prefix}] Downloading challenge solver {script_type.value} script from {url}',
+ f'[{self.logger.prefix}] Failed to download challenge solver {script_type.value} script', fatal=False,
+ ):
+ self.ie.cache.store(self._CACHE_SECTION, script_type.value, {
+ 'version': self._SUPPORTED_VERSION,
+ 'code': code,
+ })
+ return Script(script_type, ScriptVariant.MINIFIED, ScriptSource.WEB, self._SUPPORTED_VERSION, code)
+ return None
+
+ # endregion: challenge solver script
+
+ @property
+ def runtime_info(self) -> JsRuntimeInfo | bool:
+ runtime = self.ie._downloader._js_runtimes.get(self.JS_RUNTIME_NAME)
+ if not runtime or not runtime.info or not runtime.info.supported:
+ return False
+ return runtime.info
+
+ def is_available(self, /) -> bool:
+ if not self.runtime_info:
+ return False
+ return self._available
+
+ def _report_ext_component_skipped(self, component: str, component_description: str):
+ self.logger.warning(
+ f'External {component_description} downloads are disabled. '
+ f'This may be required to solve JS challenges using {self.JS_RUNTIME_NAME} JS runtime. '
+ f'You can enable {component_description} downloads with "--download-ext-components {component}". '
+ f'For more information and alternatives, refer to {self._JCP_GUIDE_URL}')
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/scripts/__init__.py
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/scripts/__init__.py 2025-09-25 21:17:17.685386781 +0200
@@ -0,0 +1,12 @@
+import importlib.resources
+
+
+def load_script(filename, error_hook=None):
+ if importlib.resources.is_resource(__package__, filename):
+ try:
+ return importlib.resources.read_text(__package__, filename)
+ except (OSError, FileNotFoundError, ModuleNotFoundError) as e:
+ if error_hook:
+ error_hook(e)
+ return None
+ return None
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/scripts/bun.lib.js
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/scripts/bun.lib.js 2025-09-25 21:17:17.685553703 +0200
@@ -0,0 +1,3 @@
+// TODO: Generate this file automatically from bundle repo
+const [m, a] = await Promise.all([ import("meriyah@6.1.4"), import("astring@1.9.0") ]);
+export const lib = { meriyah: m, astring: a };
\ No newline at end of file
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/scripts/core.js
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/scripts/core.js 2025-09-25 21:17:17.685656860 +0200
@@ -0,0 +1,501 @@
+// This file is auto-generated from https://github.com/Grub4K/yt-dlp-jsc-deno
+// Do not edit, changes will be overwritten.
+// TODO: make this automatically updated
+var jsc = (function (meriyah, astring) {
+ 'use strict';
+
+ function matchesStructure(
+ obj,
+ structure,
+ ) {
+ if (Array.isArray(structure)) {
+ if (!Array.isArray(obj)) {
+ return false;
+ }
+ return (
+ structure.length === obj.length &&
+ structure.every((value, index) => matchesStructure(obj[index], value))
+ );
+ }
+ if (typeof structure === "object") {
+ if (!obj) {
+ return !structure;
+ }
+ if ("or" in structure) {
+ // Handle `{ or: [a, b] }`
+ return structure.or.some((node) => matchesStructure(obj, node));
+ }
+ for (const [key, value] of Object.entries(structure)) {
+ if (!matchesStructure(obj[key ], value)) {
+ return false;
+ }
+ }
+ return true;
+ }
+ return structure === obj;
+ }
+
+ function isOneOf(value, ...of) {
+ return of.includes(value );
+ }
+
+ function _optionalChain$2(ops) { let lastAccessLHS = undefined; let value = ops[0]; let i = 1; while (i < ops.length) { const op = ops[i]; const fn = ops[i + 1]; i += 2; if ((op === 'optionalAccess' || op === 'optionalCall') && value == null) { return undefined; } if (op === 'access' || op === 'optionalAccess') { lastAccessLHS = value; value = fn(value); } else if (op === 'call' || op === 'optionalCall') { value = fn((...args) => value.call(lastAccessLHS, ...args)); lastAccessLHS = undefined; } } return value; }
+
+
+ const logicalExpression = {
+ type: "ExpressionStatement",
+ expression: {
+ type: "LogicalExpression",
+ left: {
+ type: "Identifier",
+ },
+ right: {
+ type: "SequenceExpression",
+ expressions: [
+ {
+ type: "AssignmentExpression",
+ left: {
+ type: "Identifier",
+ },
+ operator: "=",
+ right: {
+ type: "CallExpression",
+ callee: {
+ type: "Identifier",
+ },
+ arguments: {
+ or: [
+ [
+ { type: "Literal" },
+ {
+ type: "CallExpression",
+ callee: {
+ type: "Identifier",
+ name: "decodeURIComponent",
+ },
+ arguments: [{ type: "Identifier" }],
+ optional: false,
+ },
+ ],
+ [
+ {
+ type: "CallExpression",
+ callee: {
+ type: "Identifier",
+ name: "decodeURIComponent",
+ },
+ arguments: [{ type: "Identifier" }],
+ optional: false,
+ },
+ ],
+ ],
+ },
+ optional: false,
+ },
+ },
+ {
+ type: "CallExpression",
+ },
+ ],
+ },
+ operator: "&&",
+ },
+ };
+
+ const identifier$1 = {
+ or: [{
+ type: "ExpressionStatement",
+ expression: {
+ type: "AssignmentExpression",
+ operator: "=",
+ left: {
+ type: "Identifier",
+ },
+ right: {
+ type: "FunctionExpression",
+ params: [{}, {}, {}],
+ },
+ },
+ }, {
+ type: "FunctionDeclaration",
+ params: [{}, {}, {}],
+ }],
+ } ;
+
+ function extract$1(
+ node,
+ ) {
+ if (
+ !matchesStructure(node, identifier$1 )
+ ) {
+ return null;
+ }
+ const block = (node.type === "ExpressionStatement" &&
+ node.expression.type === "AssignmentExpression" &&
+ node.expression.right.type === "FunctionExpression")
+ ? node.expression.right.body
+ : node.type === "FunctionDeclaration"
+ ? node.body
+ : null;
+ const relevantExpression = _optionalChain$2([block, 'optionalAccess', _ => _.body, 'access', _2 => _2.at, 'call', _3 => _3(-2)]);
+ if (!matchesStructure(relevantExpression, logicalExpression)) {
+ return null;
+ }
+ if (
+ _optionalChain$2([relevantExpression, 'optionalAccess', _4 => _4.type]) !== "ExpressionStatement" ||
+ relevantExpression.expression.type !==
+ "LogicalExpression" ||
+ relevantExpression.expression.right.type !==
+ "SequenceExpression" ||
+ relevantExpression.expression.right.expressions[0].type !==
+ "AssignmentExpression"
+ ) {
+ return null;
+ }
+ const call = relevantExpression.expression.right.expressions[0].right;
+ if (call.type !== "CallExpression" || call.callee.type !== "Identifier") {
+ return null;
+ }
+ // TODO: verify identifiers here
+ return {
+ type: "ArrowFunctionExpression",
+ params: [
+ {
+ type: "Identifier",
+ name: "sig",
+ },
+ ],
+ body: {
+ type: "CallExpression",
+ callee: {
+ type: "Identifier",
+ name: call.callee.name,
+ },
+ arguments: call.arguments.length === 1
+ ? [
+ {
+ type: "Identifier",
+ name: "sig",
+ },
+ ]
+ : [
+ call.arguments[0],
+ {
+ type: "Identifier",
+ name: "sig",
+ },
+ ],
+ optional: false,
+ },
+ async: false,
+ expression: false,
+ generator: false,
+ };
+ }
+
+ function _optionalChain$1(ops) { let lastAccessLHS = undefined; let value = ops[0]; let i = 1; while (i < ops.length) { const op = ops[i]; const fn = ops[i + 1]; i += 2; if ((op === 'optionalAccess' || op === 'optionalCall') && value == null) { return undefined; } if (op === 'access' || op === 'optionalAccess') { lastAccessLHS = value; value = fn(value); } else if (op === 'call' || op === 'optionalCall') { value = fn((...args) => value.call(lastAccessLHS, ...args)); lastAccessLHS = undefined; } } return value; }
+
+
+ const identifier = {
+ type: "VariableDeclaration",
+ kind: "var",
+ declarations: [
+ {
+ type: "VariableDeclarator",
+ id: {
+ type: "Identifier",
+ },
+ init: {
+ type: "ArrayExpression",
+ elements: [
+ {
+ type: "Identifier",
+ },
+ ],
+ },
+ },
+ ],
+ };
+
+ const catchBlockBody = [
+ {
+ type: "ReturnStatement",
+ argument: {
+ type: "BinaryExpression",
+ left: {
+ type: "MemberExpression",
+ object: {
+ type: "Identifier",
+ },
+ computed: true,
+ property: {
+ type: "Literal",
+ },
+ optional: false,
+ },
+ right: {
+ type: "Identifier",
+ },
+ operator: "+",
+ },
+ },
+ ] ;
+
+ function extract(
+ node,
+ ) {
+ if (!matchesStructure(node, identifier)) {
+ // Fallback search for try { } catch { return X[12] + Y }
+ let name = null;
+ let block = null;
+ switch (node.type) {
+ case "ExpressionStatement": {
+ if (
+ node.expression.type === "AssignmentExpression" &&
+ node.expression.left.type === "Identifier" &&
+ node.expression.right.type === "FunctionExpression" &&
+ node.expression.right.params.length === 1
+ ) {
+ name = node.expression.left.name;
+ block = node.expression.right.body;
+ }
+ break;
+ }
+ case "FunctionDeclaration": {
+ if (node.params.length === 1) {
+ name = _optionalChain$1([node, 'access', _ => _.id, 'optionalAccess', _2 => _2.name]);
+ block = node.body;
+ }
+ break;
+ }
+ }
+ if (!block || !name) {
+ return null;
+ }
+ const tryNode = block.body.at(-2);
+ if (
+ _optionalChain$1([tryNode, 'optionalAccess', _3 => _3.type]) !== "TryStatement" ||
+ _optionalChain$1([tryNode, 'access', _4 => _4.handler, 'optionalAccess', _5 => _5.type]) !== "CatchClause"
+ ) {
+ return null;
+ }
+ const catchBody = tryNode.handler.body.body;
+ if (matchesStructure(catchBody, catchBlockBody)) {
+ return makeSolverFuncFromName(name);
+ }
+ return null;
+ }
+
+ if (node.type !== "VariableDeclaration") {
+ return null;
+ }
+ const declaration = node.declarations[0];
+ if (
+ declaration.type !== "VariableDeclarator" || !declaration.init ||
+ declaration.init.type !== "ArrayExpression" ||
+ declaration.init.elements.length !== 1
+ ) {
+ return null;
+ }
+ const [firstElement] = declaration.init.elements;
+ if (!firstElement || firstElement.type !== "Identifier") {
+ return null;
+ }
+ return makeSolverFuncFromName(firstElement.name);
+ }
+
+ function makeSolverFuncFromName(name) {
+ return {
+ type: "ArrowFunctionExpression",
+ params: [
+ {
+ type: "Identifier",
+ name: "nsig",
+ },
+ ],
+ body: {
+ type: "CallExpression",
+ callee: {
+ type: "Identifier",
+ name: name,
+ },
+ arguments: [
+ {
+ type: "Identifier",
+ name: "nsig",
+ },
+ ],
+ optional: false,
+ },
+ async: false,
+ expression: false,
+ generator: false,
+ };
+ }
+
+ const setupNodes = meriyah.parse(`
+globalThis.XMLHttpRequest = { prototype: {} };
+const window = Object.assign(Object.create(null), globalThis);
+window.location = new URL("https://www.youtube.com/watch?v=yt-dlp-wins");
+const document = {};
+let self = globalThis;
+`).body;
+
+ function _optionalChain(ops) { let lastAccessLHS = undefined; let value = ops[0]; let i = 1; while (i < ops.length) { const op = ops[i]; const fn = ops[i + 1]; i += 2; if ((op === 'optionalAccess' || op === 'optionalCall') && value == null) { return undefined; } if (op === 'access' || op === 'optionalAccess') { lastAccessLHS = value; value = fn(value); } else if (op === 'call' || op === 'optionalCall') { value = fn((...args) => value.call(lastAccessLHS, ...args)); lastAccessLHS = undefined; } } return value; }
+ function preprocessPlayer(data) {
+ const ast = meriyah.parse(data);
+ const body = ast.body;
+
+ const block = (() => {
+ switch (body.length) {
+ case 1: {
+ const func = body[0];
+ if (
+ _optionalChain([func, 'optionalAccess', _ => _.type]) === "ExpressionStatement" &&
+ func.expression.type === "CallExpression" &&
+ func.expression.callee.type === "MemberExpression" &&
+ func.expression.callee.object.type === "FunctionExpression"
+ ) {
+ return func.expression.callee.object.body;
+ }
+ break;
+ }
+ case 2: {
+ const func = body[1];
+ if (
+ _optionalChain([func, 'optionalAccess', _2 => _2.type]) === "ExpressionStatement" &&
+ func.expression.type === "CallExpression" &&
+ func.expression.callee.type === "FunctionExpression"
+ ) {
+ const block = func.expression.callee.body;
+ // Skip `var window = this;`
+ block.body.splice(0, 1);
+ return block;
+ }
+ break;
+ }
+ }
+ throw "unexpected structure";
+ })();
+
+ const found = {
+ nsig: [] ,
+ sig: [] ,
+ };
+ const plainExpressions = block.body.filter((node) => {
+ const nsig = extract(node);
+ if (nsig) {
+ found.nsig.push(nsig);
+ }
+ const sig = extract$1(node);
+ if (sig) {
+ found.sig.push(sig);
+ }
+ if (node.type === "ExpressionStatement") {
+ if (node.expression.type === "AssignmentExpression") {
+ return true;
+ }
+ return node.expression.type === "Literal";
+ }
+ return true;
+ });
+ block.body = plainExpressions;
+
+ for (const [name, options] of Object.entries(found)) {
+ // TODO: this is cringe fix plz
+ const unique = new Set(options.map((x) => JSON.stringify(x)));
+ if (unique.size !== 1) {
+ const message = `found ${unique.size} ${name} function possibilities`;
+ throw (
+ message +
+ (unique.size ? `: ${options.map((x) => astring.generate(x)).join(", ")}` : "")
+ );
+ }
+ plainExpressions.push({
+ type: "ExpressionStatement",
+ expression: {
+ type: "AssignmentExpression",
+ operator: "=",
+ left: {
+ type: "MemberExpression",
+ computed: false,
+ object: {
+ type: "Identifier",
+ name: "_result",
+ },
+ property: {
+ type: "Identifier",
+ name: name,
+ },
+ },
+ right: options[0],
+ },
+ });
+ }
+
+ ast.body.splice(0, 0, ...setupNodes);
+
+ return astring.generate(ast);
+ }
+
+ function getFromPrepared(code)
+
+
+ {
+ const resultObj = { nsig: null, sig: null };
+ Function("_result", code)(resultObj);
+ return resultObj;
+ }
+
+ function main(input) {
+ const preprocessedPlayer = input.type === "player"
+ ? preprocessPlayer(input.player)
+ : input.preprocessed_player;
+ const solvers = getFromPrepared(preprocessedPlayer);
+
+ const responses = input.requests.map(
+ (input) => {
+ if (!isOneOf(input.type, "nsig", "sig")) {
+ return {
+ type: "error",
+ error: `Unknown request type: ${input.type}`,
+ };
+ }
+ const solver = solvers[input.type];
+ if (!solver) {
+ return {
+ type: "error",
+ error: `Failed to extract ${input.type} function`,
+ };
+ }
+ try {
+ return {
+ type: "result",
+ data: Object.fromEntries(
+ input.challenges.map((challenge) => [challenge, solver(challenge)]),
+ ),
+ };
+ } catch (error) {
+ return {
+ type: "error",
+ error: error instanceof Error
+ ? `${error.message}\n${error.stack}`
+ : `${error}`,
+ };
+ }
+ },
+ );
+
+ const output = {
+ type: "result",
+ responses,
+ };
+ if (input.type === "player" && input.output_preprocessed) {
+ output.preprocessed_player = preprocessedPlayer;
+ }
+ return output;
+ }
+
+ return main;
+
+})(meriyah, astring);
\ No newline at end of file
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/scripts/deno.lib.js
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/scripts/deno.lib.js 2025-09-25 21:17:17.685849344 +0200
@@ -0,0 +1,3 @@
+// TODO: Generate this file automatically from bundle repo
+const [m, a] = await Promise.all([ import("npm:meriyah@6.1.4"), import("npm:astring@1.9.0") ]);
+export const lib = { meriyah: m, astring: a };
\ No newline at end of file
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/_director.py
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/_director.py 2025-09-25 21:17:17.686014381 +0200
@@ -0,0 +1,234 @@
+from __future__ import annotations
+
+import dataclasses
+import typing
+from collections.abc import Iterable
+
+from yt_dlp.extractor.youtube.jsc._registry import (
+ _jsc_preferences,
+ _jsc_providers,
+)
+from yt_dlp.extractor.youtube.jsc.provider import (
+ JsChallengeProvider,
+ JsChallengeProviderError,
+ JsChallengeProviderRejectedRequest,
+ JsChallengeProviderResponse,
+ JsChallengeRequest,
+ JsChallengeResponse,
+ JsChallengeType,
+ NChallengeInput,
+ NChallengeOutput,
+ SigChallengeInput,
+ SigChallengeOutput,
+)
+from yt_dlp.extractor.youtube.pot._director import YoutubeIEContentProviderLogger, provider_display_list
+from yt_dlp.extractor.youtube.pot._provider import (
+ IEContentProviderLogger,
+)
+from yt_dlp.extractor.youtube.pot.provider import (
+ provider_bug_report_message,
+)
+
+if typing.TYPE_CHECKING:
+ from yt_dlp.extractor.youtube.jsc.provider import Preference as JsChallengePreference
+
+
+class JsChallengeRequestDirector:
+
+ def __init__(self, logger: IEContentProviderLogger):
+ self.providers: dict[str, JsChallengeProvider] = {}
+ self.preferences: list[JsChallengePreference] = []
+ self.logger = logger
+
+ def register_provider(self, provider: JsChallengeProvider):
+ self.providers[provider.PROVIDER_KEY] = provider
+
+ def register_preference(self, preference: JsChallengePreference):
+ self.preferences.append(preference)
+
+ def _get_providers(self, requests: list[JsChallengeRequest]) -> Iterable[JsChallengeProvider]:
+ """Sorts available providers by preference, given a request"""
+ preferences = {
+ provider: sum(pref(provider, requests) for pref in self.preferences)
+ for provider in self.providers.values()
+ }
+ if self.logger.log_level <= self.logger.LogLevel.TRACE:
+ # calling is_available() for every JS Challenge provider upfront may have some overhead
+ self.logger.trace(f'JS Challenge Providers: {provider_display_list(self.providers.values())}')
+ self.logger.trace('JS Challenge Provider preferences for this request: {}'.format(', '.join(
+ f'{provider.PROVIDER_NAME}={pref}' for provider, pref in preferences.items())))
+
+ return (
+ provider for provider in sorted(
+ self.providers.values(), key=preferences.get, reverse=True)
+ if provider.is_available()
+ )
+
+ def _handle_error(self, e: Exception, provider: JsChallengeProvider, requests: list[JsChallengeRequest]):
+ if isinstance(e, JsChallengeProviderRejectedRequest):
+ self.logger.trace(
+ f'JS Challenge Provider "{provider.PROVIDER_NAME}" rejected '
+ f'{"this request" if len(requests) == 1 else f"{len(requests)} requests"}, '
+ f'trying next available provider. Reason: {e}',
+ )
+ elif isinstance(e, JsChallengeProviderError):
+ if len(requests) == 1:
+ self.logger.warning(
+ f'Error solving {requests[0].type.value} challenge request using "{provider.PROVIDER_NAME}" provider: {e}.\n'
+ f' input = {requests[0].input}\n'
+ f' {(provider_bug_report_message(provider, before="") if not e.expected else "")}')
+ else:
+ self.logger.warning(
+ f'Error solving {len(requests)} challenge requests using "{provider.PROVIDER_NAME}" provider: {e}.\n'
+ f' requests = {requests}\n'
+ f' {(provider_bug_report_message(provider, before="") if not e.expected else "")}')
+ else:
+ self.logger.error(
+ f'Unexpected error solving {len(requests)} challenge request(s) using "{provider.PROVIDER_NAME}" provider: {e!r}\n'
+ f' requests = {requests}\n'
+ f' {provider_bug_report_message(provider, before="")}', cause=e)
+
+ def bulk_solve(self, requests: list[JsChallengeRequest]) -> list[tuple[JsChallengeRequest, JsChallengeResponse]]:
+ """Solves multiple JS Challenges in bulk, returning a list of responses"""
+ if not self.providers:
+ self.logger.trace('No JS Challenge providers registered')
+ return []
+
+ results = []
+ next_requests = requests[:]
+
+ for provider in self._get_providers(next_requests):
+ if not next_requests:
+ break
+ self.logger.trace(
+ f'Attempting to solve {len(next_requests)} challenges using "{provider.PROVIDER_NAME}" provider')
+ try:
+ for response in provider.bulk_solve([dataclasses.replace(request) for request in next_requests]):
+ if not validate_provider_response(response):
+ self.logger.warning(
+ f'JS Challenge Provider "{provider.PROVIDER_NAME}" returned an invalid response:'
+ f' response = {response!r}\n'
+ f' {provider_bug_report_message(provider, before="")}')
+ continue
+ if response.error:
+ self._handle_error(response.error, provider, [response.request])
+ continue
+ if (vr_msg := validate_response(response.response, response.request)) is not True:
+ self.logger.warning(
+ f'Invalid JS Challenge response received from "{provider.PROVIDER_NAME}" provider: {vr_msg or ""}\n'
+ f' response = {response.response}\n'
+ f' request = {response.request}\n'
+ f' {provider_bug_report_message(provider, before="")}')
+ continue
+ try:
+ next_requests.remove(response.request)
+ except ValueError:
+ self.logger.warning(
+ f'JS Challenge Provider "{provider.PROVIDER_NAME}" returned a response for an unknown request:\n'
+ f' request = {response.request}\n'
+ f' {provider_bug_report_message(provider, before="")}')
+ continue
+ results.append((response.request, response.response))
+ except Exception as e:
+ self._handle_error(e, provider, next_requests)
+ continue
+
+ if len(results) != len(requests):
+ self.logger.trace(
+ f'Not all JS Challenges were solved, expected {len(requests)} responses, got {len(results)}')
+ self.logger.trace(f'Unsolved requests: {next_requests}')
+ else:
+ self.logger.trace(f'Solved all {len(requests)} requested JS Challenges')
+ return results
+
+ def close(self):
+ for provider in self.providers.values():
+ provider.close()
+
+
+EXTRACTOR_ARG_PREFIX = 'youtubejsc'
+
+
+def initialize_jsc_director(ie):
+ assert ie._downloader is not None, 'Downloader not set'
+
+ enable_trace = ie._configuration_arg(
+ 'jsc_trace', ['false'], ie_key='youtube', casesense=False)[0] == 'true'
+
+ if enable_trace:
+ log_level = IEContentProviderLogger.LogLevel.TRACE
+ elif ie.get_param('verbose', False):
+ log_level = IEContentProviderLogger.LogLevel.DEBUG
+ else:
+ log_level = IEContentProviderLogger.LogLevel.INFO
+
+ def get_provider_logger_and_settings(provider, logger_key):
+ logger_prefix = f'{logger_key}:{provider.PROVIDER_NAME}'
+ extractor_key = f'{EXTRACTOR_ARG_PREFIX}-{provider.PROVIDER_KEY.lower()}'
+ return (
+ YoutubeIEContentProviderLogger(ie, logger_prefix, log_level=log_level),
+ ie.get_param('extractor_args', {}).get(extractor_key, {}))
+
+ director = JsChallengeRequestDirector(
+ logger=YoutubeIEContentProviderLogger(ie, 'jsc', log_level=log_level),
+ )
+
+ ie._downloader.add_close_hook(director.close)
+
+ for provider in _jsc_providers.value.values():
+ logger, settings = get_provider_logger_and_settings(provider, 'jsc')
+ director.register_provider(provider(ie, logger, settings))
+
+ for preference in _jsc_preferences.value:
+ director.register_preference(preference)
+
+ if director.logger.log_level <= director.logger.LogLevel.DEBUG:
+ # calling is_available() for every JS Challenge provider upfront may have some overhead
+ director.logger.debug(f'JS Challenge Providers: {provider_display_list(director.providers.values())}')
+ director.logger.trace(f'Registered {len(director.preferences)} JS Challenge provider preferences')
+
+ return director
+
+
+def validate_provider_response(response: JsChallengeProviderResponse) -> bool:
+ return (
+ isinstance(response, JsChallengeProviderResponse)
+ and isinstance(response.request, JsChallengeRequest)
+ and (
+ isinstance(response.response, JsChallengeResponse)
+ or (response.error is not None and isinstance(response.error, Exception)))
+ )
+
+
+def validate_response(response: JsChallengeResponse, request: JsChallengeRequest) -> bool | str:
+ if not isinstance(response, JsChallengeResponse):
+ return 'Response is not a JsChallengeResponse'
+ if request.type == JsChallengeType.N:
+ return validate_nsig_challenge_output(response.output, request.input)
+ else:
+ return validate_sig_challenge_output(response.output, request.input)
+
+
+def validate_nsig_challenge_output(challenge_output: NChallengeOutput, challenge_input: NChallengeInput) -> bool | str:
+ if not (
+ isinstance(challenge_output, NChallengeOutput)
+ and len(challenge_output.results) == len(challenge_input.challenges)
+ and all(isinstance(k, str) and isinstance(v, str) for k, v in challenge_output.results.items())
+ and all(challenge in challenge_output.results for challenge in challenge_input.challenges)
+ ):
+ return 'Invalid NChallengeOutput'
+
+ # Validate nsig results are valid - if they end with the input challenge then the js function returned with an exception.
+ for challenge, result in challenge_output.results.items():
+ if result.endswith(challenge):
+ return f'nsig result is invalid for {challenge!r}: {result!r}'
+ return True
+
+
+def validate_sig_challenge_output(challenge_output: SigChallengeOutput, challenge_input: SigChallengeInput) -> bool:
+ return (
+ isinstance(challenge_output, SigChallengeOutput)
+ and len(challenge_output.results) == len(challenge_input.challenges)
+ and all(isinstance(k, str) and isinstance(v, str) for k, v in challenge_output.results.items())
+ and all(challenge in challenge_output.results for challenge in challenge_input.challenges)
+ ) or 'Invalid SigChallengeOutput'
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/_registry.py
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/_registry.py 2025-09-25 21:17:17.686129760 +0200
@@ -0,0 +1,4 @@
+from yt_dlp.globals import Indirect
+
+_jsc_providers = Indirect({})
+_jsc_preferences = Indirect(set())
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/provider.py
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/provider.py 2025-09-25 21:17:17.686241927 +0200
@@ -0,0 +1,157 @@
+"""PUBLIC API"""
+
+from __future__ import annotations
+
+import abc
+import dataclasses
+import enum
+import typing
+
+from yt_dlp.extractor.youtube.jsc._registry import _jsc_preferences, _jsc_providers
+from yt_dlp.extractor.youtube.pot._provider import (
+ IEContentProvider,
+ IEContentProviderError,
+ register_preference_generic,
+ register_provider_generic,
+)
+from yt_dlp.utils import ExtractorError
+
+__all__ = [
+ 'JsChallengeProvider',
+ 'JsChallengeProviderError',
+ 'JsChallengeProviderRejectedRequest',
+ 'JsChallengeProviderResponse',
+ 'JsChallengeRequest',
+ 'JsChallengeResponse',
+ 'JsChallengeType',
+ 'NChallengeInput',
+ 'NChallengeOutput',
+ 'SigChallengeInput',
+ 'SigChallengeOutput',
+ 'register_preference',
+ 'register_provider',
+]
+
+
+class JsChallengeType(enum.Enum):
+ N = 'n'
+ SIG = 'sig'
+
+
+@dataclasses.dataclass(frozen=True)
+class JsChallengeRequest:
+ type: JsChallengeType
+ input: NChallengeInput | SigChallengeInput
+ video_id: str | None = None
+
+
+@dataclasses.dataclass(frozen=True)
+class NChallengeInput:
+ player_url: str
+ challenges: list[str] = dataclasses.field(default_factory=list)
+
+
+@dataclasses.dataclass(frozen=True)
+class SigChallengeInput:
+ player_url: str
+ challenges: list[str] = dataclasses.field(default_factory=list)
+
+
+@dataclasses.dataclass(frozen=True)
+class NChallengeOutput:
+ results: dict[str, str] = dataclasses.field(default_factory=dict)
+
+
+@dataclasses.dataclass(frozen=True)
+class SigChallengeOutput:
+ results: dict[str, str] = dataclasses.field(default_factory=dict)
+
+
+@dataclasses.dataclass
+class JsChallengeProviderResponse:
+ request: JsChallengeRequest
+ response: JsChallengeResponse | None = None
+ error: Exception | None = None
+
+
+@dataclasses.dataclass
+class JsChallengeResponse:
+ type: JsChallengeType
+ output: NChallengeOutput | SigChallengeOutput
+
+
+class JsChallengeProviderRejectedRequest(IEContentProviderError):
+ """Reject the JsChallengeRequest (cannot handle the request)"""
+
+
+class JsChallengeProviderError(IEContentProviderError):
+ """An error occurred while solving the challenge"""
+
+
+class JsChallengeProvider(IEContentProvider, abc.ABC, suffix='JCP'):
+
+ # Set to None to disable the check
+ _SUPPORTED_TYPES: tuple[JsChallengeType] | None = ()
+
+ def __validate_request(self, request: JsChallengeRequest):
+ if not self.is_available():
+ raise JsChallengeProviderRejectedRequest(f'{self.PROVIDER_NAME} is not available')
+
+ # Validate request using built-in settings
+ if (
+ self._SUPPORTED_TYPES is not None
+ and request.type not in self._SUPPORTED_TYPES
+ ):
+ raise JsChallengeProviderRejectedRequest(
+ f'JS Challenge type "{request.type}" is not supported by {self.PROVIDER_NAME}')
+
+ def bulk_solve(self, requests: list[JsChallengeRequest]) -> typing.Generator[JsChallengeProviderResponse, None, None]:
+ """Solve multiple JS challenges and return the results"""
+ validated_requests = []
+ for request in requests:
+ try:
+ self.__validate_request(request)
+ validated_requests.append(request)
+ except JsChallengeProviderRejectedRequest as e:
+ yield JsChallengeProviderResponse(request=request, error=e)
+ continue
+ yield from self._real_bulk_solve(validated_requests)
+
+ @abc.abstractmethod
+ def _real_bulk_solve(self, requests: list[JsChallengeRequest]) -> typing.Generator[JsChallengeProviderResponse, None, None]:
+ """Subclasses can override this method to handle bulk solving"""
+ raise NotImplementedError(f'{self.PROVIDER_NAME} does not implement bulk solving')
+
+ def _get_player(self, video_id, player_url):
+ try:
+ return self.ie._load_player(
+ video_id=video_id,
+ player_url=player_url,
+ fatal=True,
+ )
+ except ExtractorError as e:
+ raise JsChallengeProviderError(
+ f'Failed to load player for JS challenge: {e}') from e
+
+
+def register_provider(provider: type[JsChallengeProvider]):
+ """Register a JsChallengeProvider class"""
+ return register_provider_generic(
+ provider=provider,
+ base_class=JsChallengeProvider,
+ registry=_jsc_providers.value,
+ )
+
+
+def register_preference(*providers: type[JsChallengeProvider]) -> typing.Callable[[Preference], Preference]:
+ """Register a preference for a JsChallengeProvider class."""
+ return register_preference_generic(
+ JsChallengeProvider,
+ _jsc_preferences.value,
+ *providers,
+ )
+
+
+if typing.TYPE_CHECKING:
+ Preference = typing.Callable[[JsChallengeProvider, list[JsChallengeRequest]], int]
+ __all__.append('Preference')
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/utils.py
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/utils.py 2025-09-25 21:17:17.686384544 +0200
@@ -0,0 +1 @@
+"""PUBLIC API"""
Index: yt-dlp/yt_dlp/extractor/youtube/pot/_director.py
===================================================================
--- yt-dlp.orig/yt_dlp/extractor/youtube/pot/_director.py 2025-09-23 08:45:40.000000000 +0200
+++ yt-dlp/yt_dlp/extractor/youtube/pot/_director.py 2025-09-25 21:17:17.686535682 +0200
@@ -6,6 +6,7 @@
import datetime as dt
import hashlib
import json
+import traceback
import typing
import urllib.parse
from collections.abc import Iterable
@@ -58,9 +59,9 @@
if self.log_level <= self.LogLevel.TRACE:
self.__ie.write_debug(self._format_msg('TRACE: ' + message))
- def debug(self, message: str):
+ def debug(self, message: str, *, once=False):
if self.log_level <= self.LogLevel.DEBUG:
- self.__ie.write_debug(self._format_msg(message))
+ self.__ie.write_debug(self._format_msg(message), only_once=once)
def info(self, message: str):
if self.log_level <= self.LogLevel.INFO:
@@ -70,9 +71,11 @@
if self.log_level <= self.LogLevel.WARNING:
self.__ie.report_warning(self._format_msg(message), only_once=once)
- def error(self, message: str):
+ def error(self, message: str, cause=None):
if self.log_level <= self.LogLevel.ERROR:
- self.__ie._downloader.report_error(self._format_msg(message), is_error=False)
+ self.__ie._downloader.report_error(
+ self._format_msg(message), is_error=False,
+ tb=''.join(traceback.format_exception(None, cause, cause.__traceback__)) if cause else None)
class PoTokenCache:
Index: yt-dlp/yt_dlp/extractor/youtube/pot/_provider.py
===================================================================
--- yt-dlp.orig/yt_dlp/extractor/youtube/pot/_provider.py 2025-09-23 08:45:40.000000000 +0200
+++ yt-dlp/yt_dlp/extractor/youtube/pot/_provider.py 2025-09-25 21:17:17.686801920 +0200
@@ -36,7 +36,7 @@
pass
@abc.abstractmethod
- def debug(self, message: str):
+ def debug(self, message: str, *, once=False):
pass
@abc.abstractmethod
@@ -48,7 +48,7 @@
pass
@abc.abstractmethod
- def error(self, message: str):
+ def error(self, message: str, cause=None):
pass
@@ -90,7 +90,7 @@
@classproperty
def PROVIDER_KEY(cls) -> str:
assert hasattr(cls, '_PROVIDER_KEY_SUFFIX'), 'Content Provider implementation must define a suffix for the provider key'
- assert cls.__name__.endswith(cls._PROVIDER_KEY_SUFFIX), f'PoTokenProvider class names must end with "{cls._PROVIDER_KEY_SUFFIX}"'
+ assert cls.__name__.endswith(cls._PROVIDER_KEY_SUFFIX), f'Class name must end with "{cls._PROVIDER_KEY_SUFFIX}"'
return cls.__name__[:-len(cls._PROVIDER_KEY_SUFFIX)]
@abc.abstractmethod
Index: yt-dlp/yt_dlp/globals.py
===================================================================
--- yt-dlp.orig/yt_dlp/globals.py 2025-09-23 08:45:40.000000000 +0200
+++ yt-dlp/yt_dlp/globals.py 2025-09-25 21:17:17.686934410 +0200
@@ -1,3 +1,4 @@
+from __future__ import annotations
import os
from collections import defaultdict
@@ -30,3 +31,8 @@
IN_CLI = Indirect(False)
LAZY_EXTRACTORS = Indirect(None) # `False`=force, `None`=disabled, `True`=enabled
WINDOWS_VT_MODE = Indirect(False if os.name == 'nt' else None)
+
+# JS Runtimes
+# If adding support for another runtime, register it here to allow `js_runtimes` option to accept it.
+# key is the runtime name, value is None or a JsRuntime subclass (internal-only)
+supported_js_runtimes = Indirect({})
Index: yt-dlp/yt_dlp/options.py
===================================================================
--- yt-dlp.orig/yt_dlp/options.py 2025-09-23 08:45:40.000000000 +0200
+++ yt-dlp/yt_dlp/options.py 2025-09-25 21:17:17.687118095 +0200
@@ -457,6 +457,41 @@
dest='plugin_dirs', action='store_const', const=[],
help='Clear plugin directories to search, including defaults and those provided by previous --plugin-dirs')
general.add_option(
+ '--js-runtimes',
+ metavar='RUNTIME[:PATH]',
+ dest='js_runtimes',
+ action='callback',
+ callback=_list_from_options_callback,
+ type='str',
+ callback_kwargs={'delim': None},
+ default=['deno'],
+ help=(
+ 'Additional JavaScript runtime to enable, with an optional path to the runtime location. '
+ 'This option can be used multiple times to enable multiple runtimes. '
+ 'Supported runtimes: deno, node, bun. By default, only "deno" runtime is enabled.'))
+ general.add_option(
+ '--no-js-runtimes',
+ dest='js_runtimes', action='store_const', const=[],
+ help='Clear JavaScript runtimes to enable, including defaults and those provided by previous --js-runtimes')
+ general.add_option(
+ '--download-ext-components',
+ metavar='COMPONENT',
+ dest='download_ext_components',
+ action='callback',
+ callback=_list_from_options_callback,
+ type='str',
+ callback_kwargs={'delim': None},
+ default=[],
+ help=(
+ 'Specify external components that yt-dlp is allowed to download when needed. '
+ 'You can use this option multiple times to allow multiple components. '
+ 'Supported values: npm (JavaScript dependencies from npm), ejs-github (official JS scripts from yt-dlp-ejs GitHub). '
+ 'By default, no external components are allowed.'))
+ general.add_option(
+ '--no-download-ext-components',
+ dest='download_ext_components', action='store_const', const=[],
+ help='Disallow downloading of all external components, including any previously allowed by --download-ext-components or defaults.')
+ general.add_option(
'--flat-playlist',
action='store_const', dest='extract_flat', const='in_playlist', default=False,
help=(
Index: yt-dlp/yt_dlp/utils/_jsruntime.py
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ yt-dlp/yt_dlp/utils/_jsruntime.py 2025-09-25 21:17:17.687370993 +0200
@@ -0,0 +1,57 @@
+from __future__ import annotations
+import abc
+import dataclasses
+import functools
+
+from ._utils import _get_exe_version_output, detect_exe_version
+
+
+@dataclasses.dataclass(frozen=True)
+class JsRuntimeInfo:
+ name: str
+ path: str
+ version: str
+ supported: bool = True
+
+
+class JsRuntime(abc.ABC):
+ def __init__(self, path=None):
+ self._path = path
+
+ @functools.cached_property
+ def info(self) -> JsRuntimeInfo | None:
+ return self._info()
+
+ @abc.abstractmethod
+ def _info(self) -> JsRuntimeInfo | None:
+ raise NotImplementedError
+
+
+class DenoJsRuntime(JsRuntime):
+ def _info(self):
+ deno_path = self._path or 'deno'
+ out = _get_exe_version_output(deno_path, ['--version'])
+ if not out:
+ return None
+ version = detect_exe_version(out, r'^deno (\S+)')
+ return JsRuntimeInfo(name='deno', path=deno_path, version=version)
+
+
+class BunJsRuntime(JsRuntime):
+ def _info(self):
+ path = self._path or 'bun'
+ out = _get_exe_version_output(path, ['--version'])
+ if not out:
+ return None
+ version = detect_exe_version(out, r'^(\S+)')
+ return JsRuntimeInfo(name='bun', path=path, version=version)
+
+
+class NodeJsRuntime(JsRuntime):
+ def _info(self):
+ node_path = self._path or 'node'
+ out = _get_exe_version_output(node_path, ['--version'])
+ if not out:
+ return None
+ version = detect_exe_version(out, r'^v(\S+)')
+ return JsRuntimeInfo(name='node', path=node_path, version=version)