forked from pool/python-nltk
Accepting request 1185062 from devel:languages:python
- Use tarball from GitHub instead of the Zip archive from PyPI, the latter has very messy combination of CRLF and LF EOLs, which are hard to patch. - Refresh all patches from the original locations. - Add CVE-2024-39705-disable-download.patch to make a crude workaround around CVE-2024-39705 (gh#nltk/nltk#3266, bsc#1227174). OBS-URL: https://build.opensuse.org/request/show/1185062 OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/python-nltk?expand=0&rev=16
This commit is contained in:
commit
a331f038f7
104
CVE-2024-39705-disable-download.patch
Normal file
104
CVE-2024-39705-disable-download.patch
Normal file
@ -0,0 +1,104 @@
|
||||
---
|
||||
nltk/app/chartparser_app.py | 13 +++++++++++++
|
||||
nltk/corpus/reader/util.py | 2 ++
|
||||
nltk/data.py | 2 ++
|
||||
nltk/parse/transitionparser.py | 2 ++
|
||||
nltk/tbl/demo.py | 4 +++-
|
||||
5 files changed, 22 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/nltk/app/chartparser_app.py
|
||||
+++ b/nltk/app/chartparser_app.py
|
||||
@@ -800,6 +800,10 @@ class ChartComparer:
|
||||
showerror("Error Saving Chart", f"Unable to open file: {filename!r}\n{e}")
|
||||
|
||||
def load_chart_dialog(self, *args):
|
||||
+ showerror("Security Error",
|
||||
+ "Due to gh#nltk/nltk#3266, deserializing from " +
|
||||
+ "a pickle is forbidden.")
|
||||
+ return
|
||||
filename = askopenfilename(
|
||||
filetypes=self.CHART_FILE_TYPES, defaultextension=".pickle"
|
||||
)
|
||||
@@ -811,6 +815,8 @@ class ChartComparer:
|
||||
showerror("Error Loading Chart", f"Unable to open file: {filename!r}\n{e}")
|
||||
|
||||
def load_chart(self, filename):
|
||||
+ raise RuntimeError("Due to gh#nltk/nltk#3266, deserializing from " +
|
||||
+ "a pickle is forbidden.")
|
||||
with open(filename, "rb") as infile:
|
||||
chart = pickle.load(infile)
|
||||
name = os.path.basename(filename)
|
||||
@@ -2268,6 +2274,10 @@ class ChartParserApp:
|
||||
if not filename:
|
||||
return
|
||||
try:
|
||||
+ showerror("Security Error",
|
||||
+ "Due to gh#nltk/nltk#3266, deserializing from " +
|
||||
+ "a pickle is forbidden.")
|
||||
+ return
|
||||
with open(filename, "rb") as infile:
|
||||
chart = pickle.load(infile)
|
||||
self._chart = chart
|
||||
@@ -2306,6 +2316,9 @@ class ChartParserApp:
|
||||
return
|
||||
try:
|
||||
if filename.endswith(".pickle"):
|
||||
+ showerror("Due to gh#nltk/nltk#3266, deserializing from " +
|
||||
+ "a pickle is forbidden.")
|
||||
+ return
|
||||
with open(filename, "rb") as infile:
|
||||
grammar = pickle.load(infile)
|
||||
else:
|
||||
--- a/nltk/corpus/reader/util.py
|
||||
+++ b/nltk/corpus/reader/util.py
|
||||
@@ -521,6 +521,8 @@ class PickleCorpusView(StreamBackedCorpu
|
||||
|
||||
def read_block(self, stream):
|
||||
result = []
|
||||
+ raise RuntimeError("Due to gh#nltk/nltk#3266, deserializing from " +
|
||||
+ "a pickle is forbidden.")
|
||||
for i in range(self.BLOCK_SIZE):
|
||||
try:
|
||||
result.append(pickle.load(stream))
|
||||
--- a/nltk/data.py
|
||||
+++ b/nltk/data.py
|
||||
@@ -752,6 +752,8 @@ def load(
|
||||
if format == "raw":
|
||||
resource_val = opened_resource.read()
|
||||
elif format == "pickle":
|
||||
+ raise RuntimeError("Due to gh#nltk/nltk#3266, deserializing from " +
|
||||
+ "a pickle is forbidden.")
|
||||
resource_val = pickle.load(opened_resource)
|
||||
elif format == "json":
|
||||
import json
|
||||
--- a/nltk/parse/transitionparser.py
|
||||
+++ b/nltk/parse/transitionparser.py
|
||||
@@ -553,6 +553,8 @@ class TransitionParser(ParserI):
|
||||
"""
|
||||
result = []
|
||||
# First load the model
|
||||
+ raise RuntimeError("Due to gh#nltk/nltk#3266, deserializing from " +
|
||||
+ "a pickle is forbidden.")
|
||||
model = pickle.load(open(modelFile, "rb"))
|
||||
operation = Transition(self._algorithm)
|
||||
|
||||
--- a/nltk/tbl/demo.py
|
||||
+++ b/nltk/tbl/demo.py
|
||||
@@ -253,6 +253,8 @@ def postag(
|
||||
)
|
||||
)
|
||||
with open(cache_baseline_tagger) as print_rules:
|
||||
+ raise RuntimeError("Due to gh#nltk/nltk#3266, deserializing from " +
|
||||
+ "a pickle is forbidden.")
|
||||
baseline_tagger = pickle.load(print_rules)
|
||||
print(f"Reloaded pickled tagger from {cache_baseline_tagger}")
|
||||
else:
|
||||
@@ -327,7 +329,7 @@ def postag(
|
||||
with open(serialize_output) as print_rules:
|
||||
brill_tagger_reloaded = pickle.load(print_rules)
|
||||
print(f"Reloaded pickled tagger from {serialize_output}")
|
||||
- taggedtest_reloaded = brill_tagger.tag_sents(testing_data)
|
||||
+ taggedtest_reloaded = brill_tagger_reloaded.tag_sents(testing_data)
|
||||
if taggedtest == taggedtest_reloaded:
|
||||
print("Reloaded tagger tried on test set, results identical")
|
||||
else:
|
3
nltk-3.8.1.tar.gz
Normal file
3
nltk-3.8.1.tar.gz
Normal file
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:676970e2b7aa0a7184e68f76e0c4f2756fd1b82559a509d5656a23117faeb658
|
||||
size 2867926
|
@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:1834da3d0682cba4f2cede2f9aad6b0fafb6461ba451db0efb6f9c39798d64d3
|
||||
size 4620388
|
File diff suppressed because it is too large
Load Diff
@ -1,3 +1,14 @@
|
||||
-------------------------------------------------------------------
|
||||
Mon Jul 1 21:02:45 UTC 2024 - Matej Cepl <mcepl@cepl.eu>
|
||||
|
||||
- Use tarball from GitHub instead of the Zip archive from PyPI,
|
||||
the latter has very messy combination of CRLF and LF EOLs,
|
||||
which are hard to patch.
|
||||
- Refresh all patches from the original locations.
|
||||
- Add CVE-2024-39705-disable-download.patch to make a crude
|
||||
workaround around CVE-2024-39705 (gh#nltk/nltk#3266,
|
||||
bsc#1227174).
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Thu Mar 21 17:41:52 UTC 2024 - Ben Greiner <code@bnavigator.de>
|
||||
|
||||
|
@ -16,6 +16,7 @@
|
||||
#
|
||||
|
||||
|
||||
%define modname nltk
|
||||
Name: python-nltk
|
||||
Version: 3.8.1
|
||||
Release: 0
|
||||
@ -23,7 +24,7 @@ Summary: Natural Language Toolkit
|
||||
License: Apache-2.0
|
||||
URL: http://nltk.org/
|
||||
# SourceRepository: https://github.com/nltk/nltk
|
||||
Source0: https://files.pythonhosted.org/packages/source/n/nltk/nltk-%{version}.zip
|
||||
Source0: https://github.com/nltk/%{modname}/archive/refs/tags/%{version}.tar.gz#/%{modname}-%{version}.tar.gz
|
||||
# Download/Update NLTK data:
|
||||
# quilt setup python-nltk.spec
|
||||
# pushd nltk-?.?.?
|
||||
@ -62,6 +63,9 @@ Source99: python-nltk.rpmlintrc
|
||||
Patch0: skip-networked-test.patch
|
||||
# PATCH-FIX-UPSTREAM nltk-pr3207-py312.patch gh#nltk/nltk#3207
|
||||
Patch1: nltk-pr3207-py312.patch
|
||||
# PATCH-FIX-UPSTREAM CVE-2024-39705-disable-download.patch bsc#1227174 mcepl@suse.com
|
||||
# this patch makes things totally awesome
|
||||
Patch2: CVE-2024-39705-disable-download.patch
|
||||
BuildRequires: %{python_module base >= 3.7}
|
||||
BuildRequires: %{python_module pip}
|
||||
BuildRequires: %{python_module setuptools}
|
||||
@ -118,7 +122,7 @@ Python modules, data sets and tutorials supporting research and
|
||||
development in Natural Language Processing.
|
||||
|
||||
%prep
|
||||
%autosetup -p1 -a1 -n nltk-%{version}
|
||||
%setup -q -a1 -n %{modname}-%{version}
|
||||
|
||||
# Fix EOL
|
||||
sed -i 's/\r/\n/g; s/\n$//' \
|
||||
@ -150,6 +154,8 @@ sed -E -i "s|#![[:space:]]*%{_bindir}/env python|#!%{_bindir}/python3|" \
|
||||
nltk_data/corpora/pl196x/splitter.py \
|
||||
tools/find_deprecated.py
|
||||
|
||||
%autopatch -p1
|
||||
|
||||
%build
|
||||
%pyproject_wheel
|
||||
|
||||
@ -164,7 +170,12 @@ chmod -x %{buildroot}%{$python_sitelib}/nltk/test/dependency.doctest
|
||||
%check
|
||||
export NLTK_DATA=$(readlink -f ./nltk_data/)
|
||||
# export PYTEST_ADDOPTS="--doctest-modules"
|
||||
%pytest -k 'not network'
|
||||
# Skip tests requiring pickle.load gh#nltk/nltk#3266 (CVE-2024-39705)
|
||||
skip_tests=" or test_basic or test_increment or test_pad_asterisk or test_pad_dotdot"
|
||||
skip_tests+=" or test_pos_tag_eng or test_pos_tag_eng_universal or test_pos_tag_rus"
|
||||
skip_tests+=" or test_pos_tag_rus_universal or test_pos_tag_unknown_lang"
|
||||
skip_tests+=" or test_sent_tokenize or test_unspecified_lang or test_word_tokenize"
|
||||
%pytest -k "not (network ${skip_tests})"
|
||||
|
||||
%post
|
||||
%python_install_alternative nltk
|
||||
|
@ -6,30 +6,30 @@
|
||||
--- a/nltk/test/unit/test_downloader.py
|
||||
+++ b/nltk/test/unit/test_downloader.py
|
||||
@@ -1,6 +1,9 @@
|
||||
from nltk import download
|
||||
|
||||
+import pytest
|
||||
|
||||
+
|
||||
+@pytest.mark.network
|
||||
def test_downloader_using_existing_parent_download_dir(tmp_path):
|
||||
"""Test that download works properly when the parent folder of the download_dir exists"""
|
||||
|
||||
from nltk import download
|
||||
|
||||
+import pytest
|
||||
|
||||
+
|
||||
+@pytest.mark.network
|
||||
def test_downloader_using_existing_parent_download_dir(tmp_path):
|
||||
"""Test that download works properly when the parent folder of the download_dir exists"""
|
||||
|
||||
@@ -9,6 +12,7 @@ def test_downloader_using_existing_paren
|
||||
assert download_status is True
|
||||
|
||||
|
||||
+@pytest.mark.network
|
||||
def test_downloader_using_non_existing_parent_download_dir(tmp_path):
|
||||
"""Test that download works properly when the parent folder of the download_dir does not exist"""
|
||||
|
||||
assert download_status is True
|
||||
|
||||
|
||||
+@pytest.mark.network
|
||||
def test_downloader_using_non_existing_parent_download_dir(tmp_path):
|
||||
"""Test that download works properly when the parent folder of the download_dir does not exist"""
|
||||
|
||||
--- a/setup.cfg
|
||||
+++ b/setup.cfg
|
||||
@@ -1,3 +1,7 @@
|
||||
+[tool:pytest]
|
||||
+markers =
|
||||
+ network: test case requires network connection
|
||||
+
|
||||
[metadata]
|
||||
license_files =
|
||||
LICENSE.txt
|
||||
+[tool:pytest]
|
||||
+markers =
|
||||
+ network: test case requires network connection
|
||||
+
|
||||
[metadata]
|
||||
license_files =
|
||||
LICENSE.txt
|
||||
|
Loading…
Reference in New Issue
Block a user