forked from pool/python-nltk
		
	Accepting request 1185062 from devel:languages:python
- Use tarball from GitHub instead of the Zip archive from PyPI, the latter has very messy combination of CRLF and LF EOLs, which are hard to patch. - Refresh all patches from the original locations. - Add CVE-2024-39705-disable-download.patch to make a crude workaround around CVE-2024-39705 (gh#nltk/nltk#3266, bsc#1227174). OBS-URL: https://build.opensuse.org/request/show/1185062 OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/python-nltk?expand=0&rev=16
This commit is contained in:
		
							
								
								
									
										104
									
								
								CVE-2024-39705-disable-download.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										104
									
								
								CVE-2024-39705-disable-download.patch
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,104 @@
 | 
			
		||||
---
 | 
			
		||||
 nltk/app/chartparser_app.py    |   13 +++++++++++++
 | 
			
		||||
 nltk/corpus/reader/util.py     |    2 ++
 | 
			
		||||
 nltk/data.py                   |    2 ++
 | 
			
		||||
 nltk/parse/transitionparser.py |    2 ++
 | 
			
		||||
 nltk/tbl/demo.py               |    4 +++-
 | 
			
		||||
 5 files changed, 22 insertions(+), 1 deletion(-)
 | 
			
		||||
 | 
			
		||||
--- a/nltk/app/chartparser_app.py
 | 
			
		||||
+++ b/nltk/app/chartparser_app.py
 | 
			
		||||
@@ -800,6 +800,10 @@ class ChartComparer:
 | 
			
		||||
             showerror("Error Saving Chart", f"Unable to open file: {filename!r}\n{e}")
 | 
			
		||||
 
 | 
			
		||||
     def load_chart_dialog(self, *args):
 | 
			
		||||
+        showerror("Security Error",
 | 
			
		||||
+                  "Due to gh#nltk/nltk#3266, deserializing from " +
 | 
			
		||||
+                  "a pickle is forbidden.")
 | 
			
		||||
+        return
 | 
			
		||||
         filename = askopenfilename(
 | 
			
		||||
             filetypes=self.CHART_FILE_TYPES, defaultextension=".pickle"
 | 
			
		||||
         )
 | 
			
		||||
@@ -811,6 +815,8 @@ class ChartComparer:
 | 
			
		||||
             showerror("Error Loading Chart", f"Unable to open file: {filename!r}\n{e}")
 | 
			
		||||
 
 | 
			
		||||
     def load_chart(self, filename):
 | 
			
		||||
+        raise RuntimeError("Due to gh#nltk/nltk#3266, deserializing from " +
 | 
			
		||||
+                           "a pickle is forbidden.")
 | 
			
		||||
         with open(filename, "rb") as infile:
 | 
			
		||||
             chart = pickle.load(infile)
 | 
			
		||||
         name = os.path.basename(filename)
 | 
			
		||||
@@ -2268,6 +2274,10 @@ class ChartParserApp:
 | 
			
		||||
         if not filename:
 | 
			
		||||
             return
 | 
			
		||||
         try:
 | 
			
		||||
+            showerror("Security Error",
 | 
			
		||||
+                      "Due to gh#nltk/nltk#3266, deserializing from " +
 | 
			
		||||
+                      "a pickle is forbidden.")
 | 
			
		||||
+            return
 | 
			
		||||
             with open(filename, "rb") as infile:
 | 
			
		||||
                 chart = pickle.load(infile)
 | 
			
		||||
             self._chart = chart
 | 
			
		||||
@@ -2306,6 +2316,9 @@ class ChartParserApp:
 | 
			
		||||
             return
 | 
			
		||||
         try:
 | 
			
		||||
             if filename.endswith(".pickle"):
 | 
			
		||||
+                showerror("Due to gh#nltk/nltk#3266, deserializing from " +
 | 
			
		||||
+                          "a pickle is forbidden.")
 | 
			
		||||
+                return
 | 
			
		||||
                 with open(filename, "rb") as infile:
 | 
			
		||||
                     grammar = pickle.load(infile)
 | 
			
		||||
             else:
 | 
			
		||||
--- a/nltk/corpus/reader/util.py
 | 
			
		||||
+++ b/nltk/corpus/reader/util.py
 | 
			
		||||
@@ -521,6 +521,8 @@ class PickleCorpusView(StreamBackedCorpu
 | 
			
		||||
 
 | 
			
		||||
     def read_block(self, stream):
 | 
			
		||||
         result = []
 | 
			
		||||
+        raise RuntimeError("Due to gh#nltk/nltk#3266, deserializing from " +
 | 
			
		||||
+                           "a pickle is forbidden.")
 | 
			
		||||
         for i in range(self.BLOCK_SIZE):
 | 
			
		||||
             try:
 | 
			
		||||
                 result.append(pickle.load(stream))
 | 
			
		||||
--- a/nltk/data.py
 | 
			
		||||
+++ b/nltk/data.py
 | 
			
		||||
@@ -752,6 +752,8 @@ def load(
 | 
			
		||||
     if format == "raw":
 | 
			
		||||
         resource_val = opened_resource.read()
 | 
			
		||||
     elif format == "pickle":
 | 
			
		||||
+        raise RuntimeError("Due to gh#nltk/nltk#3266, deserializing from " +
 | 
			
		||||
+                           "a pickle is forbidden.")
 | 
			
		||||
         resource_val = pickle.load(opened_resource)
 | 
			
		||||
     elif format == "json":
 | 
			
		||||
         import json
 | 
			
		||||
--- a/nltk/parse/transitionparser.py
 | 
			
		||||
+++ b/nltk/parse/transitionparser.py
 | 
			
		||||
@@ -553,6 +553,8 @@ class TransitionParser(ParserI):
 | 
			
		||||
         """
 | 
			
		||||
         result = []
 | 
			
		||||
         # First load the model
 | 
			
		||||
+        raise RuntimeError("Due to gh#nltk/nltk#3266, deserializing from " +
 | 
			
		||||
+                           "a pickle is forbidden.")
 | 
			
		||||
         model = pickle.load(open(modelFile, "rb"))
 | 
			
		||||
         operation = Transition(self._algorithm)
 | 
			
		||||
 
 | 
			
		||||
--- a/nltk/tbl/demo.py
 | 
			
		||||
+++ b/nltk/tbl/demo.py
 | 
			
		||||
@@ -253,6 +253,8 @@ def postag(
 | 
			
		||||
                 )
 | 
			
		||||
             )
 | 
			
		||||
         with open(cache_baseline_tagger) as print_rules:
 | 
			
		||||
+            raise RuntimeError("Due to gh#nltk/nltk#3266, deserializing from " +
 | 
			
		||||
+                               "a pickle is forbidden.")
 | 
			
		||||
             baseline_tagger = pickle.load(print_rules)
 | 
			
		||||
             print(f"Reloaded pickled tagger from {cache_baseline_tagger}")
 | 
			
		||||
     else:
 | 
			
		||||
@@ -327,7 +329,7 @@ def postag(
 | 
			
		||||
         with open(serialize_output) as print_rules:
 | 
			
		||||
             brill_tagger_reloaded = pickle.load(print_rules)
 | 
			
		||||
         print(f"Reloaded pickled tagger from {serialize_output}")
 | 
			
		||||
-        taggedtest_reloaded = brill_tagger.tag_sents(testing_data)
 | 
			
		||||
+        taggedtest_reloaded = brill_tagger_reloaded.tag_sents(testing_data)
 | 
			
		||||
         if taggedtest == taggedtest_reloaded:
 | 
			
		||||
             print("Reloaded tagger tried on test set, results identical")
 | 
			
		||||
         else:
 | 
			
		||||
							
								
								
									
										3
									
								
								nltk-3.8.1.tar.gz
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								nltk-3.8.1.tar.gz
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,3 @@
 | 
			
		||||
version https://git-lfs.github.com/spec/v1
 | 
			
		||||
oid sha256:676970e2b7aa0a7184e68f76e0c4f2756fd1b82559a509d5656a23117faeb658
 | 
			
		||||
size 2867926
 | 
			
		||||
@@ -1,3 +0,0 @@
 | 
			
		||||
version https://git-lfs.github.com/spec/v1
 | 
			
		||||
oid sha256:1834da3d0682cba4f2cede2f9aad6b0fafb6461ba451db0efb6f9c39798d64d3
 | 
			
		||||
size 4620388
 | 
			
		||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							@@ -1,3 +1,14 @@
 | 
			
		||||
-------------------------------------------------------------------
 | 
			
		||||
Mon Jul  1 21:02:45 UTC 2024 - Matej Cepl <mcepl@cepl.eu>
 | 
			
		||||
 | 
			
		||||
- Use tarball from GitHub instead of the Zip archive from PyPI,
 | 
			
		||||
  the latter has very messy combination of CRLF and LF EOLs,
 | 
			
		||||
  which are hard to patch.
 | 
			
		||||
- Refresh all patches from the original locations.
 | 
			
		||||
- Add CVE-2024-39705-disable-download.patch to make a crude
 | 
			
		||||
  workaround around CVE-2024-39705 (gh#nltk/nltk#3266,
 | 
			
		||||
  bsc#1227174).
 | 
			
		||||
 | 
			
		||||
-------------------------------------------------------------------
 | 
			
		||||
Thu Mar 21 17:41:52 UTC 2024 - Ben Greiner <code@bnavigator.de>
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -16,6 +16,7 @@
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
%define modname nltk
 | 
			
		||||
Name:           python-nltk
 | 
			
		||||
Version:        3.8.1
 | 
			
		||||
Release:        0
 | 
			
		||||
@@ -23,7 +24,7 @@ Summary:        Natural Language Toolkit
 | 
			
		||||
License:        Apache-2.0
 | 
			
		||||
URL:            http://nltk.org/
 | 
			
		||||
# SourceRepository: https://github.com/nltk/nltk
 | 
			
		||||
Source0:        https://files.pythonhosted.org/packages/source/n/nltk/nltk-%{version}.zip
 | 
			
		||||
Source0:        https://github.com/nltk/%{modname}/archive/refs/tags/%{version}.tar.gz#/%{modname}-%{version}.tar.gz
 | 
			
		||||
# Download/Update NLTK data:
 | 
			
		||||
#     quilt setup python-nltk.spec
 | 
			
		||||
#     pushd nltk-?.?.?
 | 
			
		||||
@@ -62,6 +63,9 @@ Source99:       python-nltk.rpmlintrc
 | 
			
		||||
Patch0:         skip-networked-test.patch
 | 
			
		||||
# PATCH-FIX-UPSTREAM nltk-pr3207-py312.patch gh#nltk/nltk#3207
 | 
			
		||||
Patch1:         nltk-pr3207-py312.patch
 | 
			
		||||
# PATCH-FIX-UPSTREAM CVE-2024-39705-disable-download.patch bsc#1227174 mcepl@suse.com
 | 
			
		||||
# this patch makes things totally awesome
 | 
			
		||||
Patch2:         CVE-2024-39705-disable-download.patch
 | 
			
		||||
BuildRequires:  %{python_module base >= 3.7}
 | 
			
		||||
BuildRequires:  %{python_module pip}
 | 
			
		||||
BuildRequires:  %{python_module setuptools}
 | 
			
		||||
@@ -118,7 +122,7 @@ Python modules, data sets and tutorials supporting research and
 | 
			
		||||
development in Natural Language Processing.
 | 
			
		||||
 | 
			
		||||
%prep
 | 
			
		||||
%autosetup -p1 -a1 -n nltk-%{version}
 | 
			
		||||
%setup -q -a1 -n %{modname}-%{version}
 | 
			
		||||
 | 
			
		||||
# Fix EOL
 | 
			
		||||
sed -i 's/\r/\n/g; s/\n$//' \
 | 
			
		||||
@@ -150,6 +154,8 @@ sed -E -i "s|#![[:space:]]*%{_bindir}/env python|#!%{_bindir}/python3|" \
 | 
			
		||||
    nltk_data/corpora/pl196x/splitter.py \
 | 
			
		||||
    tools/find_deprecated.py
 | 
			
		||||
 | 
			
		||||
%autopatch -p1
 | 
			
		||||
 | 
			
		||||
%build
 | 
			
		||||
%pyproject_wheel
 | 
			
		||||
 | 
			
		||||
@@ -164,7 +170,12 @@ chmod -x %{buildroot}%{$python_sitelib}/nltk/test/dependency.doctest
 | 
			
		||||
%check
 | 
			
		||||
export NLTK_DATA=$(readlink -f ./nltk_data/)
 | 
			
		||||
# export PYTEST_ADDOPTS="--doctest-modules"
 | 
			
		||||
%pytest -k 'not network'
 | 
			
		||||
# Skip tests requiring pickle.load gh#nltk/nltk#3266 (CVE-2024-39705)
 | 
			
		||||
skip_tests=" or test_basic or test_increment or test_pad_asterisk or test_pad_dotdot"
 | 
			
		||||
skip_tests+=" or test_pos_tag_eng or test_pos_tag_eng_universal or test_pos_tag_rus"
 | 
			
		||||
skip_tests+=" or test_pos_tag_rus_universal or test_pos_tag_unknown_lang"
 | 
			
		||||
skip_tests+=" or test_sent_tokenize or test_unspecified_lang or test_word_tokenize"
 | 
			
		||||
%pytest -k "not (network ${skip_tests})"
 | 
			
		||||
 | 
			
		||||
%post
 | 
			
		||||
%python_install_alternative nltk
 | 
			
		||||
 
 | 
			
		||||
@@ -32,4 +32,4 @@
 | 
			
		||||
+
 | 
			
		||||
 [metadata]
 | 
			
		||||
 license_files =
 | 
			
		||||
 	LICENSE.txt
 | 
			
		||||
     LICENSE.txt
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user