|
|
|
|
@@ -0,0 +1,195 @@
|
|
|
|
|
From 046ce54956a0c30120038561e53b40994d29de2c Mon Sep 17 00:00:00 2001
|
|
|
|
|
From: Isaac Muse <faceless.shop@gmail.com>
|
|
|
|
|
Date: Wed, 17 Dec 2025 19:33:55 -0700
|
|
|
|
|
Subject: [PATCH] Adjustments for changes in HTML parser (#285)
|
|
|
|
|
|
|
|
|
|
Fixes #284
|
|
|
|
|
---
|
|
|
|
|
docs/src/markdown/about/changelog.md | 4 ++++
|
|
|
|
|
soupsieve/__meta__.py | 2 +-
|
|
|
|
|
soupsieve/css_match.py | 4 ++--
|
|
|
|
|
tests/test_extra/test_soup_contains.py | 5 ++++-
|
|
|
|
|
tests/test_level2/test_lang.py | 5 ++++-
|
|
|
|
|
tests/test_level3/test_root.py | 12 ++++++++----
|
|
|
|
|
tests/test_level4/test_default.py | 5 ++++-
|
|
|
|
|
tests/test_level4/test_dir.py | 5 ++++-
|
|
|
|
|
tests/test_level4/test_indeterminate.py | 5 ++++-
|
|
|
|
|
9 files changed, 35 insertions(+), 12 deletions(-)
|
|
|
|
|
|
|
|
|
|
Index: soupsieve-2.6/soupsieve/css_match.py
|
|
|
|
|
===================================================================
|
|
|
|
|
--- soupsieve-2.6.orig/soupsieve/css_match.py
|
|
|
|
|
+++ soupsieve-2.6/soupsieve/css_match.py
|
|
|
|
|
@@ -1190,7 +1190,7 @@ class CSSMatch(_DocumentNav):
|
|
|
|
|
# Use cached meta language.
|
|
|
|
|
if found_lang is None and self.cached_meta_lang:
|
|
|
|
|
for cache in self.cached_meta_lang:
|
|
|
|
|
- if root is cache[0]:
|
|
|
|
|
+ if root is not None and cast(str, root) is cache[0]:
|
|
|
|
|
found_lang = cache[1]
|
|
|
|
|
|
|
|
|
|
# If we couldn't find a language, and the document is HTML, look to meta to determine language.
|
|
|
|
|
Index: soupsieve-2.6/tests/test_extra/test_soup_contains.py
|
|
|
|
|
===================================================================
|
|
|
|
|
--- soupsieve-2.6.orig/tests/test_extra/test_soup_contains.py
|
|
|
|
|
+++ soupsieve-2.6/tests/test_extra/test_soup_contains.py
|
|
|
|
|
@@ -2,6 +2,9 @@
|
|
|
|
|
from .. import util
|
|
|
|
|
import warnings
|
|
|
|
|
import soupsieve as sv
|
|
|
|
|
+from bs4 import BeautifulSoup
|
|
|
|
|
+
|
|
|
|
|
+IFRAME_TEXT = BeautifulSoup('<iframe><div></div></iframe>', 'html.parser').iframe.text == '<div></div>'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestSoupContains(util.TestCase):
|
|
|
|
|
@@ -250,7 +253,7 @@ class TestSoupContains(util.TestCase):
|
|
|
|
|
self.assert_selector(
|
|
|
|
|
markup,
|
|
|
|
|
'span:-soup-contains("iframe")',
|
|
|
|
|
- ['2'],
|
|
|
|
|
+ [] if IFRAME_TEXT else ['2'],
|
|
|
|
|
flags=util.PYHTML
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
Index: soupsieve-2.6/tests/test_level2/test_lang.py
|
|
|
|
|
===================================================================
|
|
|
|
|
--- soupsieve-2.6.orig/tests/test_level2/test_lang.py
|
|
|
|
|
+++ soupsieve-2.6/tests/test_level2/test_lang.py
|
|
|
|
|
@@ -1,5 +1,8 @@
|
|
|
|
|
"""Test language selector."""
|
|
|
|
|
from .. import util
|
|
|
|
|
+from bs4 import BeautifulSoup
|
|
|
|
|
+
|
|
|
|
|
+IFRAME_TEXT = BeautifulSoup('<iframe><div></div></iframe>', 'html.parser').iframe.text == '<div></div>'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestLang(util.TestCase):
|
|
|
|
|
@@ -58,7 +61,7 @@ class TestLang(util.TestCase):
|
|
|
|
|
self.assert_selector(
|
|
|
|
|
markup,
|
|
|
|
|
"p:lang(en)",
|
|
|
|
|
- ['3'],
|
|
|
|
|
+ [] if IFRAME_TEXT else ['3'],
|
|
|
|
|
flags=util.PYHTML
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
Index: soupsieve-2.6/tests/test_level3/test_root.py
|
|
|
|
|
===================================================================
|
|
|
|
|
--- soupsieve-2.6.orig/tests/test_level3/test_root.py
|
|
|
|
|
+++ soupsieve-2.6/tests/test_level3/test_root.py
|
|
|
|
|
@@ -1,6 +1,10 @@
|
|
|
|
|
"""Test root selectors."""
|
|
|
|
|
from .. import util
|
|
|
|
|
import soupsieve as sv
|
|
|
|
|
+from bs4 import BeautifulSoup
|
|
|
|
|
+import pytest
|
|
|
|
|
+
|
|
|
|
|
+IFRAME_TEXT = BeautifulSoup('<iframe><div></div></iframe>', 'html.parser').iframe.text == '<div></div>'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestRoot(util.TestCase):
|
|
|
|
|
@@ -65,7 +69,7 @@ class TestRoot(util.TestCase):
|
|
|
|
|
self.assert_selector(
|
|
|
|
|
self.MARKUP_IFRAME,
|
|
|
|
|
":root",
|
|
|
|
|
- ["root", "root2"],
|
|
|
|
|
+ ["root"] if IFRAME_TEXT else ["root", "root2"],
|
|
|
|
|
flags=util.PYHTML
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
@@ -85,17 +89,18 @@ class TestRoot(util.TestCase):
|
|
|
|
|
self.assert_selector(
|
|
|
|
|
self.MARKUP_IFRAME,
|
|
|
|
|
":root div",
|
|
|
|
|
- ["div", "div2", "other-div"],
|
|
|
|
|
+ ["div", "other-div"] if IFRAME_TEXT else ["div", "div2", "other-div"],
|
|
|
|
|
flags=util.PYHTML
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
self.assert_selector(
|
|
|
|
|
self.MARKUP_IFRAME,
|
|
|
|
|
":root > body > div",
|
|
|
|
|
- ["div", "div2", "other-div"],
|
|
|
|
|
+ ["div", "other-div"] if IFRAME_TEXT else ["div", "div2", "other-div"],
|
|
|
|
|
flags=util.PYHTML
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
+ @pytest.mark.skipif(IFRAME_TEXT, reason="Requires old Python HTML handling")
|
|
|
|
|
def test_iframe(self):
|
|
|
|
|
"""
|
|
|
|
|
Test that we only count `iframe` as root since the scoped element is the root.
|
|
|
|
|
@@ -112,7 +117,6 @@ class TestRoot(util.TestCase):
|
|
|
|
|
|
|
|
|
|
ids = [el['id'] for el in sv.select(':root > body > div', soup.iframe.html)]
|
|
|
|
|
self.assertEqual(sorted(ids), sorted(['div2']))
|
|
|
|
|
-
|
|
|
|
|
def test_no_root_double_tag(self):
|
|
|
|
|
"""Test when there is no root due to double root tags."""
|
|
|
|
|
|
|
|
|
|
Index: soupsieve-2.6/tests/test_level4/test_default.py
|
|
|
|
|
===================================================================
|
|
|
|
|
--- soupsieve-2.6.orig/tests/test_level4/test_default.py
|
|
|
|
|
+++ soupsieve-2.6/tests/test_level4/test_default.py
|
|
|
|
|
@@ -1,5 +1,8 @@
|
|
|
|
|
"""Test default selectors."""
|
|
|
|
|
from .. import util
|
|
|
|
|
+from bs4 import BeautifulSoup
|
|
|
|
|
+
|
|
|
|
|
+IFRAME_TEXT = BeautifulSoup('<iframe><div></div></iframe>', 'html.parser').iframe.text == '<div></div>'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestDefault(util.TestCase):
|
|
|
|
|
@@ -113,7 +116,7 @@ class TestDefault(util.TestCase):
|
|
|
|
|
self.assert_selector(
|
|
|
|
|
markup,
|
|
|
|
|
":default",
|
|
|
|
|
- ['d1', 'd3', 'd4'],
|
|
|
|
|
+ ['d1', 'd3'] if IFRAME_TEXT else ['d1', 'd3', 'd4'],
|
|
|
|
|
flags=util.PYHTML
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
Index: soupsieve-2.6/tests/test_level4/test_dir.py
|
|
|
|
|
===================================================================
|
|
|
|
|
--- soupsieve-2.6.orig/tests/test_level4/test_dir.py
|
|
|
|
|
+++ soupsieve-2.6/tests/test_level4/test_dir.py
|
|
|
|
|
@@ -1,6 +1,9 @@
|
|
|
|
|
"""Test direction selectors."""
|
|
|
|
|
from .. import util
|
|
|
|
|
import soupsieve as sv
|
|
|
|
|
+from bs4 import BeautifulSoup
|
|
|
|
|
+
|
|
|
|
|
+IFRAME_TEXT = BeautifulSoup('<iframe><div></div></iframe>', 'html.parser').iframe.text == '<div></div>'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestDir(util.TestCase):
|
|
|
|
|
@@ -169,7 +172,7 @@ class TestDir(util.TestCase):
|
|
|
|
|
self.assert_selector(
|
|
|
|
|
markup,
|
|
|
|
|
"div:dir(rtl)",
|
|
|
|
|
- ['2'],
|
|
|
|
|
+ [] if IFRAME_TEXT else ['2'],
|
|
|
|
|
flags=util.PYHTML
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
Index: soupsieve-2.6/tests/test_level4/test_indeterminate.py
|
|
|
|
|
===================================================================
|
|
|
|
|
--- soupsieve-2.6.orig/tests/test_level4/test_indeterminate.py
|
|
|
|
|
+++ soupsieve-2.6/tests/test_level4/test_indeterminate.py
|
|
|
|
|
@@ -1,5 +1,8 @@
|
|
|
|
|
"""Test indeterminate selectors."""
|
|
|
|
|
from .. import util
|
|
|
|
|
+from bs4 import BeautifulSoup
|
|
|
|
|
+
|
|
|
|
|
+IFRAME_TEXT = BeautifulSoup('<iframe><div></div></iframe>', 'html.parser').iframe.text == '<div></div>'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestIndeterminate(util.TestCase):
|
|
|
|
|
@@ -68,6 +71,6 @@ class TestIndeterminate(util.TestCase):
|
|
|
|
|
self.assert_selector(
|
|
|
|
|
markup,
|
|
|
|
|
":indeterminate",
|
|
|
|
|
- ['radio1', 'radio3'],
|
|
|
|
|
+ ['radio1'] if IFRAME_TEXT else ['radio1', 'radio3'],
|
|
|
|
|
flags=util.PYHTML
|
|
|
|
|
)
|