forked from pool/python-nltk
Accepting request 1185062 from devel:languages:python
- Use tarball from GitHub instead of the Zip archive from PyPI, the latter has very messy combination of CRLF and LF EOLs, which are hard to patch. - Refresh all patches from the original locations. - Add CVE-2024-39705-disable-download.patch to make a crude workaround around CVE-2024-39705 (gh#nltk/nltk#3266, bsc#1227174). OBS-URL: https://build.opensuse.org/request/show/1185062 OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/python-nltk?expand=0&rev=16
This commit is contained in:
commit
a331f038f7
104
CVE-2024-39705-disable-download.patch
Normal file
104
CVE-2024-39705-disable-download.patch
Normal file
@ -0,0 +1,104 @@
|
|||||||
|
---
|
||||||
|
nltk/app/chartparser_app.py | 13 +++++++++++++
|
||||||
|
nltk/corpus/reader/util.py | 2 ++
|
||||||
|
nltk/data.py | 2 ++
|
||||||
|
nltk/parse/transitionparser.py | 2 ++
|
||||||
|
nltk/tbl/demo.py | 4 +++-
|
||||||
|
5 files changed, 22 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
--- a/nltk/app/chartparser_app.py
|
||||||
|
+++ b/nltk/app/chartparser_app.py
|
||||||
|
@@ -800,6 +800,10 @@ class ChartComparer:
|
||||||
|
showerror("Error Saving Chart", f"Unable to open file: {filename!r}\n{e}")
|
||||||
|
|
||||||
|
def load_chart_dialog(self, *args):
|
||||||
|
+ showerror("Security Error",
|
||||||
|
+ "Due to gh#nltk/nltk#3266, deserializing from " +
|
||||||
|
+ "a pickle is forbidden.")
|
||||||
|
+ return
|
||||||
|
filename = askopenfilename(
|
||||||
|
filetypes=self.CHART_FILE_TYPES, defaultextension=".pickle"
|
||||||
|
)
|
||||||
|
@@ -811,6 +815,8 @@ class ChartComparer:
|
||||||
|
showerror("Error Loading Chart", f"Unable to open file: {filename!r}\n{e}")
|
||||||
|
|
||||||
|
def load_chart(self, filename):
|
||||||
|
+ raise RuntimeError("Due to gh#nltk/nltk#3266, deserializing from " +
|
||||||
|
+ "a pickle is forbidden.")
|
||||||
|
with open(filename, "rb") as infile:
|
||||||
|
chart = pickle.load(infile)
|
||||||
|
name = os.path.basename(filename)
|
||||||
|
@@ -2268,6 +2274,10 @@ class ChartParserApp:
|
||||||
|
if not filename:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
+ showerror("Security Error",
|
||||||
|
+ "Due to gh#nltk/nltk#3266, deserializing from " +
|
||||||
|
+ "a pickle is forbidden.")
|
||||||
|
+ return
|
||||||
|
with open(filename, "rb") as infile:
|
||||||
|
chart = pickle.load(infile)
|
||||||
|
self._chart = chart
|
||||||
|
@@ -2306,6 +2316,9 @@ class ChartParserApp:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
if filename.endswith(".pickle"):
|
||||||
|
+ showerror("Due to gh#nltk/nltk#3266, deserializing from " +
|
||||||
|
+ "a pickle is forbidden.")
|
||||||
|
+ return
|
||||||
|
with open(filename, "rb") as infile:
|
||||||
|
grammar = pickle.load(infile)
|
||||||
|
else:
|
||||||
|
--- a/nltk/corpus/reader/util.py
|
||||||
|
+++ b/nltk/corpus/reader/util.py
|
||||||
|
@@ -521,6 +521,8 @@ class PickleCorpusView(StreamBackedCorpu
|
||||||
|
|
||||||
|
def read_block(self, stream):
|
||||||
|
result = []
|
||||||
|
+ raise RuntimeError("Due to gh#nltk/nltk#3266, deserializing from " +
|
||||||
|
+ "a pickle is forbidden.")
|
||||||
|
for i in range(self.BLOCK_SIZE):
|
||||||
|
try:
|
||||||
|
result.append(pickle.load(stream))
|
||||||
|
--- a/nltk/data.py
|
||||||
|
+++ b/nltk/data.py
|
||||||
|
@@ -752,6 +752,8 @@ def load(
|
||||||
|
if format == "raw":
|
||||||
|
resource_val = opened_resource.read()
|
||||||
|
elif format == "pickle":
|
||||||
|
+ raise RuntimeError("Due to gh#nltk/nltk#3266, deserializing from " +
|
||||||
|
+ "a pickle is forbidden.")
|
||||||
|
resource_val = pickle.load(opened_resource)
|
||||||
|
elif format == "json":
|
||||||
|
import json
|
||||||
|
--- a/nltk/parse/transitionparser.py
|
||||||
|
+++ b/nltk/parse/transitionparser.py
|
||||||
|
@@ -553,6 +553,8 @@ class TransitionParser(ParserI):
|
||||||
|
"""
|
||||||
|
result = []
|
||||||
|
# First load the model
|
||||||
|
+ raise RuntimeError("Due to gh#nltk/nltk#3266, deserializing from " +
|
||||||
|
+ "a pickle is forbidden.")
|
||||||
|
model = pickle.load(open(modelFile, "rb"))
|
||||||
|
operation = Transition(self._algorithm)
|
||||||
|
|
||||||
|
--- a/nltk/tbl/demo.py
|
||||||
|
+++ b/nltk/tbl/demo.py
|
||||||
|
@@ -253,6 +253,8 @@ def postag(
|
||||||
|
)
|
||||||
|
)
|
||||||
|
with open(cache_baseline_tagger) as print_rules:
|
||||||
|
+ raise RuntimeError("Due to gh#nltk/nltk#3266, deserializing from " +
|
||||||
|
+ "a pickle is forbidden.")
|
||||||
|
baseline_tagger = pickle.load(print_rules)
|
||||||
|
print(f"Reloaded pickled tagger from {cache_baseline_tagger}")
|
||||||
|
else:
|
||||||
|
@@ -327,7 +329,7 @@ def postag(
|
||||||
|
with open(serialize_output) as print_rules:
|
||||||
|
brill_tagger_reloaded = pickle.load(print_rules)
|
||||||
|
print(f"Reloaded pickled tagger from {serialize_output}")
|
||||||
|
- taggedtest_reloaded = brill_tagger.tag_sents(testing_data)
|
||||||
|
+ taggedtest_reloaded = brill_tagger_reloaded.tag_sents(testing_data)
|
||||||
|
if taggedtest == taggedtest_reloaded:
|
||||||
|
print("Reloaded tagger tried on test set, results identical")
|
||||||
|
else:
|
3
nltk-3.8.1.tar.gz
Normal file
3
nltk-3.8.1.tar.gz
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:676970e2b7aa0a7184e68f76e0c4f2756fd1b82559a509d5656a23117faeb658
|
||||||
|
size 2867926
|
@ -1,3 +0,0 @@
|
|||||||
version https://git-lfs.github.com/spec/v1
|
|
||||||
oid sha256:1834da3d0682cba4f2cede2f9aad6b0fafb6461ba451db0efb6f9c39798d64d3
|
|
||||||
size 4620388
|
|
File diff suppressed because it is too large
Load Diff
@ -1,3 +1,14 @@
|
|||||||
|
-------------------------------------------------------------------
|
||||||
|
Mon Jul 1 21:02:45 UTC 2024 - Matej Cepl <mcepl@cepl.eu>
|
||||||
|
|
||||||
|
- Use tarball from GitHub instead of the Zip archive from PyPI,
|
||||||
|
the latter has very messy combination of CRLF and LF EOLs,
|
||||||
|
which are hard to patch.
|
||||||
|
- Refresh all patches from the original locations.
|
||||||
|
- Add CVE-2024-39705-disable-download.patch to make a crude
|
||||||
|
workaround around CVE-2024-39705 (gh#nltk/nltk#3266,
|
||||||
|
bsc#1227174).
|
||||||
|
|
||||||
-------------------------------------------------------------------
|
-------------------------------------------------------------------
|
||||||
Thu Mar 21 17:41:52 UTC 2024 - Ben Greiner <code@bnavigator.de>
|
Thu Mar 21 17:41:52 UTC 2024 - Ben Greiner <code@bnavigator.de>
|
||||||
|
|
||||||
|
@ -16,6 +16,7 @@
|
|||||||
#
|
#
|
||||||
|
|
||||||
|
|
||||||
|
%define modname nltk
|
||||||
Name: python-nltk
|
Name: python-nltk
|
||||||
Version: 3.8.1
|
Version: 3.8.1
|
||||||
Release: 0
|
Release: 0
|
||||||
@ -23,7 +24,7 @@ Summary: Natural Language Toolkit
|
|||||||
License: Apache-2.0
|
License: Apache-2.0
|
||||||
URL: http://nltk.org/
|
URL: http://nltk.org/
|
||||||
# SourceRepository: https://github.com/nltk/nltk
|
# SourceRepository: https://github.com/nltk/nltk
|
||||||
Source0: https://files.pythonhosted.org/packages/source/n/nltk/nltk-%{version}.zip
|
Source0: https://github.com/nltk/%{modname}/archive/refs/tags/%{version}.tar.gz#/%{modname}-%{version}.tar.gz
|
||||||
# Download/Update NLTK data:
|
# Download/Update NLTK data:
|
||||||
# quilt setup python-nltk.spec
|
# quilt setup python-nltk.spec
|
||||||
# pushd nltk-?.?.?
|
# pushd nltk-?.?.?
|
||||||
@ -62,6 +63,9 @@ Source99: python-nltk.rpmlintrc
|
|||||||
Patch0: skip-networked-test.patch
|
Patch0: skip-networked-test.patch
|
||||||
# PATCH-FIX-UPSTREAM nltk-pr3207-py312.patch gh#nltk/nltk#3207
|
# PATCH-FIX-UPSTREAM nltk-pr3207-py312.patch gh#nltk/nltk#3207
|
||||||
Patch1: nltk-pr3207-py312.patch
|
Patch1: nltk-pr3207-py312.patch
|
||||||
|
# PATCH-FIX-UPSTREAM CVE-2024-39705-disable-download.patch bsc#1227174 mcepl@suse.com
|
||||||
|
# this patch makes things totally awesome
|
||||||
|
Patch2: CVE-2024-39705-disable-download.patch
|
||||||
BuildRequires: %{python_module base >= 3.7}
|
BuildRequires: %{python_module base >= 3.7}
|
||||||
BuildRequires: %{python_module pip}
|
BuildRequires: %{python_module pip}
|
||||||
BuildRequires: %{python_module setuptools}
|
BuildRequires: %{python_module setuptools}
|
||||||
@ -118,7 +122,7 @@ Python modules, data sets and tutorials supporting research and
|
|||||||
development in Natural Language Processing.
|
development in Natural Language Processing.
|
||||||
|
|
||||||
%prep
|
%prep
|
||||||
%autosetup -p1 -a1 -n nltk-%{version}
|
%setup -q -a1 -n %{modname}-%{version}
|
||||||
|
|
||||||
# Fix EOL
|
# Fix EOL
|
||||||
sed -i 's/\r/\n/g; s/\n$//' \
|
sed -i 's/\r/\n/g; s/\n$//' \
|
||||||
@ -150,6 +154,8 @@ sed -E -i "s|#![[:space:]]*%{_bindir}/env python|#!%{_bindir}/python3|" \
|
|||||||
nltk_data/corpora/pl196x/splitter.py \
|
nltk_data/corpora/pl196x/splitter.py \
|
||||||
tools/find_deprecated.py
|
tools/find_deprecated.py
|
||||||
|
|
||||||
|
%autopatch -p1
|
||||||
|
|
||||||
%build
|
%build
|
||||||
%pyproject_wheel
|
%pyproject_wheel
|
||||||
|
|
||||||
@ -164,7 +170,12 @@ chmod -x %{buildroot}%{$python_sitelib}/nltk/test/dependency.doctest
|
|||||||
%check
|
%check
|
||||||
export NLTK_DATA=$(readlink -f ./nltk_data/)
|
export NLTK_DATA=$(readlink -f ./nltk_data/)
|
||||||
# export PYTEST_ADDOPTS="--doctest-modules"
|
# export PYTEST_ADDOPTS="--doctest-modules"
|
||||||
%pytest -k 'not network'
|
# Skip tests requiring pickle.load gh#nltk/nltk#3266 (CVE-2024-39705)
|
||||||
|
skip_tests=" or test_basic or test_increment or test_pad_asterisk or test_pad_dotdot"
|
||||||
|
skip_tests+=" or test_pos_tag_eng or test_pos_tag_eng_universal or test_pos_tag_rus"
|
||||||
|
skip_tests+=" or test_pos_tag_rus_universal or test_pos_tag_unknown_lang"
|
||||||
|
skip_tests+=" or test_sent_tokenize or test_unspecified_lang or test_word_tokenize"
|
||||||
|
%pytest -k "not (network ${skip_tests})"
|
||||||
|
|
||||||
%post
|
%post
|
||||||
%python_install_alternative nltk
|
%python_install_alternative nltk
|
||||||
|
@ -6,30 +6,30 @@
|
|||||||
--- a/nltk/test/unit/test_downloader.py
|
--- a/nltk/test/unit/test_downloader.py
|
||||||
+++ b/nltk/test/unit/test_downloader.py
|
+++ b/nltk/test/unit/test_downloader.py
|
||||||
@@ -1,6 +1,9 @@
|
@@ -1,6 +1,9 @@
|
||||||
from nltk import download
|
from nltk import download
|
||||||
|
|
||||||
+import pytest
|
+import pytest
|
||||||
|
|
||||||
+
|
+
|
||||||
+@pytest.mark.network
|
+@pytest.mark.network
|
||||||
def test_downloader_using_existing_parent_download_dir(tmp_path):
|
def test_downloader_using_existing_parent_download_dir(tmp_path):
|
||||||
"""Test that download works properly when the parent folder of the download_dir exists"""
|
"""Test that download works properly when the parent folder of the download_dir exists"""
|
||||||
|
|
||||||
@@ -9,6 +12,7 @@ def test_downloader_using_existing_paren
|
@@ -9,6 +12,7 @@ def test_downloader_using_existing_paren
|
||||||
assert download_status is True
|
assert download_status is True
|
||||||
|
|
||||||
|
|
||||||
+@pytest.mark.network
|
+@pytest.mark.network
|
||||||
def test_downloader_using_non_existing_parent_download_dir(tmp_path):
|
def test_downloader_using_non_existing_parent_download_dir(tmp_path):
|
||||||
"""Test that download works properly when the parent folder of the download_dir does not exist"""
|
"""Test that download works properly when the parent folder of the download_dir does not exist"""
|
||||||
|
|
||||||
--- a/setup.cfg
|
--- a/setup.cfg
|
||||||
+++ b/setup.cfg
|
+++ b/setup.cfg
|
||||||
@@ -1,3 +1,7 @@
|
@@ -1,3 +1,7 @@
|
||||||
+[tool:pytest]
|
+[tool:pytest]
|
||||||
+markers =
|
+markers =
|
||||||
+ network: test case requires network connection
|
+ network: test case requires network connection
|
||||||
+
|
+
|
||||||
[metadata]
|
[metadata]
|
||||||
license_files =
|
license_files =
|
||||||
LICENSE.txt
|
LICENSE.txt
|
||||||
|
Loading…
Reference in New Issue
Block a user