From 046ce54956a0c30120038561e53b40994d29de2c Mon Sep 17 00:00:00 2001 From: Isaac Muse Date: Wed, 17 Dec 2025 19:33:55 -0700 Subject: [PATCH] Adjustments for changes in HTML parser (#285) Fixes #284 --- docs/src/markdown/about/changelog.md | 4 ++++ soupsieve/__meta__.py | 2 +- soupsieve/css_match.py | 4 ++-- tests/test_extra/test_soup_contains.py | 5 ++++- tests/test_level2/test_lang.py | 5 ++++- tests/test_level3/test_root.py | 12 ++++++++---- tests/test_level4/test_default.py | 5 ++++- tests/test_level4/test_dir.py | 5 ++++- tests/test_level4/test_indeterminate.py | 5 ++++- 9 files changed, 35 insertions(+), 12 deletions(-) Index: soupsieve-2.6/soupsieve/css_match.py =================================================================== --- soupsieve-2.6.orig/soupsieve/css_match.py +++ soupsieve-2.6/soupsieve/css_match.py @@ -1190,7 +1190,7 @@ class CSSMatch(_DocumentNav): # Use cached meta language. if found_lang is None and self.cached_meta_lang: for cache in self.cached_meta_lang: - if root is cache[0]: + if root is not None and cast(str, root) is cache[0]: found_lang = cache[1] # If we couldn't find a language, and the document is HTML, look to meta to determine language. Index: soupsieve-2.6/tests/test_extra/test_soup_contains.py =================================================================== --- soupsieve-2.6.orig/tests/test_extra/test_soup_contains.py +++ soupsieve-2.6/tests/test_extra/test_soup_contains.py @@ -2,6 +2,9 @@ from .. import util import warnings import soupsieve as sv +from bs4 import BeautifulSoup + +IFRAME_TEXT = BeautifulSoup('', 'html.parser').iframe.text == '
' class TestSoupContains(util.TestCase): @@ -250,7 +253,7 @@ class TestSoupContains(util.TestCase): self.assert_selector( markup, 'span:-soup-contains("iframe")', - ['2'], + [] if IFRAME_TEXT else ['2'], flags=util.PYHTML ) Index: soupsieve-2.6/tests/test_level2/test_lang.py =================================================================== --- soupsieve-2.6.orig/tests/test_level2/test_lang.py +++ soupsieve-2.6/tests/test_level2/test_lang.py @@ -1,5 +1,8 @@ """Test language selector.""" from .. import util +from bs4 import BeautifulSoup + +IFRAME_TEXT = BeautifulSoup('', 'html.parser').iframe.text == '
' class TestLang(util.TestCase): @@ -58,7 +61,7 @@ class TestLang(util.TestCase): self.assert_selector( markup, "p:lang(en)", - ['3'], + [] if IFRAME_TEXT else ['3'], flags=util.PYHTML ) Index: soupsieve-2.6/tests/test_level3/test_root.py =================================================================== --- soupsieve-2.6.orig/tests/test_level3/test_root.py +++ soupsieve-2.6/tests/test_level3/test_root.py @@ -1,6 +1,10 @@ """Test root selectors.""" from .. import util import soupsieve as sv +from bs4 import BeautifulSoup +import pytest + +IFRAME_TEXT = BeautifulSoup('', 'html.parser').iframe.text == '
' class TestRoot(util.TestCase): @@ -65,7 +69,7 @@ class TestRoot(util.TestCase): self.assert_selector( self.MARKUP_IFRAME, ":root", - ["root", "root2"], + ["root"] if IFRAME_TEXT else ["root", "root2"], flags=util.PYHTML ) @@ -85,17 +89,18 @@ class TestRoot(util.TestCase): self.assert_selector( self.MARKUP_IFRAME, ":root div", - ["div", "div2", "other-div"], + ["div", "other-div"] if IFRAME_TEXT else ["div", "div2", "other-div"], flags=util.PYHTML ) self.assert_selector( self.MARKUP_IFRAME, ":root > body > div", - ["div", "div2", "other-div"], + ["div", "other-div"] if IFRAME_TEXT else ["div", "div2", "other-div"], flags=util.PYHTML ) + @pytest.mark.skipif(IFRAME_TEXT, reason="Requires old Python HTML handling") def test_iframe(self): """ Test that we only count `iframe` as root since the scoped element is the root. @@ -112,7 +117,6 @@ class TestRoot(util.TestCase): ids = [el['id'] for el in sv.select(':root > body > div', soup.iframe.html)] self.assertEqual(sorted(ids), sorted(['div2'])) - def test_no_root_double_tag(self): """Test when there is no root due to double root tags.""" Index: soupsieve-2.6/tests/test_level4/test_default.py =================================================================== --- soupsieve-2.6.orig/tests/test_level4/test_default.py +++ soupsieve-2.6/tests/test_level4/test_default.py @@ -1,5 +1,8 @@ """Test default selectors.""" from .. import util +from bs4 import BeautifulSoup + +IFRAME_TEXT = BeautifulSoup('', 'html.parser').iframe.text == '
' class TestDefault(util.TestCase): @@ -113,7 +116,7 @@ class TestDefault(util.TestCase): self.assert_selector( markup, ":default", - ['d1', 'd3', 'd4'], + ['d1', 'd3'] if IFRAME_TEXT else ['d1', 'd3', 'd4'], flags=util.PYHTML ) Index: soupsieve-2.6/tests/test_level4/test_dir.py =================================================================== --- soupsieve-2.6.orig/tests/test_level4/test_dir.py +++ soupsieve-2.6/tests/test_level4/test_dir.py @@ -1,6 +1,9 @@ """Test direction selectors.""" from .. import util import soupsieve as sv +from bs4 import BeautifulSoup + +IFRAME_TEXT = BeautifulSoup('', 'html.parser').iframe.text == '
' class TestDir(util.TestCase): @@ -169,7 +172,7 @@ class TestDir(util.TestCase): self.assert_selector( markup, "div:dir(rtl)", - ['2'], + [] if IFRAME_TEXT else ['2'], flags=util.PYHTML ) Index: soupsieve-2.6/tests/test_level4/test_indeterminate.py =================================================================== --- soupsieve-2.6.orig/tests/test_level4/test_indeterminate.py +++ soupsieve-2.6/tests/test_level4/test_indeterminate.py @@ -1,5 +1,8 @@ """Test indeterminate selectors.""" from .. import util +from bs4 import BeautifulSoup + +IFRAME_TEXT = BeautifulSoup('', 'html.parser').iframe.text == '
' class TestIndeterminate(util.TestCase): @@ -68,6 +71,6 @@ class TestIndeterminate(util.TestCase): self.assert_selector( markup, ":indeterminate", - ['radio1', 'radio3'], + ['radio1'] if IFRAME_TEXT else ['radio1', 'radio3'], flags=util.PYHTML )