SHA256
1
0
forked from pool/python-nltk

Accepting request 1185062 from devel:languages:python

- Use tarball from GitHub instead of the Zip archive from PyPI,
  the latter has very messy combination of CRLF and LF EOLs,
  which are hard to patch.
- Refresh all patches from the original locations.
- Add CVE-2024-39705-disable-download.patch to make a crude
  workaround around CVE-2024-39705 (gh#nltk/nltk#3266,
  bsc#1227174).

OBS-URL: https://build.opensuse.org/request/show/1185062
OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/python-nltk?expand=0&rev=16
This commit is contained in:
Ana Guerrero 2024-07-04 14:24:00 +00:00 committed by Git OBS Bridge
commit a331f038f7
7 changed files with 228 additions and 1774 deletions

View File

@ -0,0 +1,104 @@
---
nltk/app/chartparser_app.py | 13 +++++++++++++
nltk/corpus/reader/util.py | 2 ++
nltk/data.py | 2 ++
nltk/parse/transitionparser.py | 2 ++
nltk/tbl/demo.py | 4 +++-
5 files changed, 22 insertions(+), 1 deletion(-)
--- a/nltk/app/chartparser_app.py
+++ b/nltk/app/chartparser_app.py
@@ -800,6 +800,10 @@ class ChartComparer:
showerror("Error Saving Chart", f"Unable to open file: {filename!r}\n{e}")
def load_chart_dialog(self, *args):
+ showerror("Security Error",
+ "Due to gh#nltk/nltk#3266, deserializing from " +
+ "a pickle is forbidden.")
+ return
filename = askopenfilename(
filetypes=self.CHART_FILE_TYPES, defaultextension=".pickle"
)
@@ -811,6 +815,8 @@ class ChartComparer:
showerror("Error Loading Chart", f"Unable to open file: {filename!r}\n{e}")
def load_chart(self, filename):
+ raise RuntimeError("Due to gh#nltk/nltk#3266, deserializing from " +
+ "a pickle is forbidden.")
with open(filename, "rb") as infile:
chart = pickle.load(infile)
name = os.path.basename(filename)
@@ -2268,6 +2274,10 @@ class ChartParserApp:
if not filename:
return
try:
+ showerror("Security Error",
+ "Due to gh#nltk/nltk#3266, deserializing from " +
+ "a pickle is forbidden.")
+ return
with open(filename, "rb") as infile:
chart = pickle.load(infile)
self._chart = chart
@@ -2306,6 +2316,9 @@ class ChartParserApp:
return
try:
if filename.endswith(".pickle"):
+ showerror("Due to gh#nltk/nltk#3266, deserializing from " +
+ "a pickle is forbidden.")
+ return
with open(filename, "rb") as infile:
grammar = pickle.load(infile)
else:
--- a/nltk/corpus/reader/util.py
+++ b/nltk/corpus/reader/util.py
@@ -521,6 +521,8 @@ class PickleCorpusView(StreamBackedCorpu
def read_block(self, stream):
result = []
+ raise RuntimeError("Due to gh#nltk/nltk#3266, deserializing from " +
+ "a pickle is forbidden.")
for i in range(self.BLOCK_SIZE):
try:
result.append(pickle.load(stream))
--- a/nltk/data.py
+++ b/nltk/data.py
@@ -752,6 +752,8 @@ def load(
if format == "raw":
resource_val = opened_resource.read()
elif format == "pickle":
+ raise RuntimeError("Due to gh#nltk/nltk#3266, deserializing from " +
+ "a pickle is forbidden.")
resource_val = pickle.load(opened_resource)
elif format == "json":
import json
--- a/nltk/parse/transitionparser.py
+++ b/nltk/parse/transitionparser.py
@@ -553,6 +553,8 @@ class TransitionParser(ParserI):
"""
result = []
# First load the model
+ raise RuntimeError("Due to gh#nltk/nltk#3266, deserializing from " +
+ "a pickle is forbidden.")
model = pickle.load(open(modelFile, "rb"))
operation = Transition(self._algorithm)
--- a/nltk/tbl/demo.py
+++ b/nltk/tbl/demo.py
@@ -253,6 +253,8 @@ def postag(
)
)
with open(cache_baseline_tagger) as print_rules:
+ raise RuntimeError("Due to gh#nltk/nltk#3266, deserializing from " +
+ "a pickle is forbidden.")
baseline_tagger = pickle.load(print_rules)
print(f"Reloaded pickled tagger from {cache_baseline_tagger}")
else:
@@ -327,7 +329,7 @@ def postag(
with open(serialize_output) as print_rules:
brill_tagger_reloaded = pickle.load(print_rules)
print(f"Reloaded pickled tagger from {serialize_output}")
- taggedtest_reloaded = brill_tagger.tag_sents(testing_data)
+ taggedtest_reloaded = brill_tagger_reloaded.tag_sents(testing_data)
if taggedtest == taggedtest_reloaded:
print("Reloaded tagger tried on test set, results identical")
else:

3
nltk-3.8.1.tar.gz Normal file
View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:676970e2b7aa0a7184e68f76e0c4f2756fd1b82559a509d5656a23117faeb658
size 2867926

View File

@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:1834da3d0682cba4f2cede2f9aad6b0fafb6461ba451db0efb6f9c39798d64d3
size 4620388

File diff suppressed because it is too large Load Diff

View File

@ -1,3 +1,14 @@
-------------------------------------------------------------------
Mon Jul 1 21:02:45 UTC 2024 - Matej Cepl <mcepl@cepl.eu>
- Use tarball from GitHub instead of the Zip archive from PyPI,
the latter has very messy combination of CRLF and LF EOLs,
which are hard to patch.
- Refresh all patches from the original locations.
- Add CVE-2024-39705-disable-download.patch to make a crude
workaround around CVE-2024-39705 (gh#nltk/nltk#3266,
bsc#1227174).
------------------------------------------------------------------- -------------------------------------------------------------------
Thu Mar 21 17:41:52 UTC 2024 - Ben Greiner <code@bnavigator.de> Thu Mar 21 17:41:52 UTC 2024 - Ben Greiner <code@bnavigator.de>

View File

@ -16,6 +16,7 @@
# #
%define modname nltk
Name: python-nltk Name: python-nltk
Version: 3.8.1 Version: 3.8.1
Release: 0 Release: 0
@ -23,7 +24,7 @@ Summary: Natural Language Toolkit
License: Apache-2.0 License: Apache-2.0
URL: http://nltk.org/ URL: http://nltk.org/
# SourceRepository: https://github.com/nltk/nltk # SourceRepository: https://github.com/nltk/nltk
Source0: https://files.pythonhosted.org/packages/source/n/nltk/nltk-%{version}.zip Source0: https://github.com/nltk/%{modname}/archive/refs/tags/%{version}.tar.gz#/%{modname}-%{version}.tar.gz
# Download/Update NLTK data: # Download/Update NLTK data:
# quilt setup python-nltk.spec # quilt setup python-nltk.spec
# pushd nltk-?.?.? # pushd nltk-?.?.?
@ -62,6 +63,9 @@ Source99: python-nltk.rpmlintrc
Patch0: skip-networked-test.patch Patch0: skip-networked-test.patch
# PATCH-FIX-UPSTREAM nltk-pr3207-py312.patch gh#nltk/nltk#3207 # PATCH-FIX-UPSTREAM nltk-pr3207-py312.patch gh#nltk/nltk#3207
Patch1: nltk-pr3207-py312.patch Patch1: nltk-pr3207-py312.patch
# PATCH-FIX-UPSTREAM CVE-2024-39705-disable-download.patch bsc#1227174 mcepl@suse.com
# this patch makes things totally awesome
Patch2: CVE-2024-39705-disable-download.patch
BuildRequires: %{python_module base >= 3.7} BuildRequires: %{python_module base >= 3.7}
BuildRequires: %{python_module pip} BuildRequires: %{python_module pip}
BuildRequires: %{python_module setuptools} BuildRequires: %{python_module setuptools}
@ -118,7 +122,7 @@ Python modules, data sets and tutorials supporting research and
development in Natural Language Processing. development in Natural Language Processing.
%prep %prep
%autosetup -p1 -a1 -n nltk-%{version} %setup -q -a1 -n %{modname}-%{version}
# Fix EOL # Fix EOL
sed -i 's/\r/\n/g; s/\n$//' \ sed -i 's/\r/\n/g; s/\n$//' \
@ -150,6 +154,8 @@ sed -E -i "s|#![[:space:]]*%{_bindir}/env python|#!%{_bindir}/python3|" \
nltk_data/corpora/pl196x/splitter.py \ nltk_data/corpora/pl196x/splitter.py \
tools/find_deprecated.py tools/find_deprecated.py
%autopatch -p1
%build %build
%pyproject_wheel %pyproject_wheel
@ -164,7 +170,12 @@ chmod -x %{buildroot}%{$python_sitelib}/nltk/test/dependency.doctest
%check %check
export NLTK_DATA=$(readlink -f ./nltk_data/) export NLTK_DATA=$(readlink -f ./nltk_data/)
# export PYTEST_ADDOPTS="--doctest-modules" # export PYTEST_ADDOPTS="--doctest-modules"
%pytest -k 'not network' # Skip tests requiring pickle.load gh#nltk/nltk#3266 (CVE-2024-39705)
skip_tests=" or test_basic or test_increment or test_pad_asterisk or test_pad_dotdot"
skip_tests+=" or test_pos_tag_eng or test_pos_tag_eng_universal or test_pos_tag_rus"
skip_tests+=" or test_pos_tag_rus_universal or test_pos_tag_unknown_lang"
skip_tests+=" or test_sent_tokenize or test_unspecified_lang or test_word_tokenize"
%pytest -k "not (network ${skip_tests})"
%post %post
%python_install_alternative nltk %python_install_alternative nltk

View File

@ -6,30 +6,30 @@
--- a/nltk/test/unit/test_downloader.py --- a/nltk/test/unit/test_downloader.py
+++ b/nltk/test/unit/test_downloader.py +++ b/nltk/test/unit/test_downloader.py
@@ -1,6 +1,9 @@ @@ -1,6 +1,9 @@
from nltk import download from nltk import download
+import pytest +import pytest
+ +
+@pytest.mark.network +@pytest.mark.network
def test_downloader_using_existing_parent_download_dir(tmp_path): def test_downloader_using_existing_parent_download_dir(tmp_path):
"""Test that download works properly when the parent folder of the download_dir exists""" """Test that download works properly when the parent folder of the download_dir exists"""
@@ -9,6 +12,7 @@ def test_downloader_using_existing_paren @@ -9,6 +12,7 @@ def test_downloader_using_existing_paren
assert download_status is True assert download_status is True
+@pytest.mark.network +@pytest.mark.network
def test_downloader_using_non_existing_parent_download_dir(tmp_path): def test_downloader_using_non_existing_parent_download_dir(tmp_path):
"""Test that download works properly when the parent folder of the download_dir does not exist""" """Test that download works properly when the parent folder of the download_dir does not exist"""
--- a/setup.cfg --- a/setup.cfg
+++ b/setup.cfg +++ b/setup.cfg
@@ -1,3 +1,7 @@ @@ -1,3 +1,7 @@
+[tool:pytest] +[tool:pytest]
+markers = +markers =
+ network: test case requires network connection + network: test case requires network connection
+ +
[metadata] [metadata]
license_files = license_files =
LICENSE.txt LICENSE.txt