forked from pool/python-nltk
		
	- Use tarball from GitHub instead of the Zip archive from PyPI,
the latter has very messy combination of CRLF and LF EOLs, which are hard to patch. - Refresh all patches from the original locations. - Add CVE-2024-39705-disable-download.patch to make a crude workaround around CVE-2024-39705 (gh#nltk/nltk#3266, bsc#1227174). OBS-URL: https://build.opensuse.org/package/show/devel:languages:python/python-nltk?expand=0&rev=47
This commit is contained in:
		
							
								
								
									
										104
									
								
								CVE-2024-39705-disable-download.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										104
									
								
								CVE-2024-39705-disable-download.patch
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,104 @@ | |||||||
|  | --- | ||||||
|  |  nltk/app/chartparser_app.py    |   13 +++++++++++++ | ||||||
|  |  nltk/corpus/reader/util.py     |    2 ++ | ||||||
|  |  nltk/data.py                   |    2 ++ | ||||||
|  |  nltk/parse/transitionparser.py |    2 ++ | ||||||
|  |  nltk/tbl/demo.py               |    4 +++- | ||||||
|  |  5 files changed, 22 insertions(+), 1 deletion(-) | ||||||
|  |  | ||||||
|  | --- a/nltk/app/chartparser_app.py | ||||||
|  | +++ b/nltk/app/chartparser_app.py | ||||||
|  | @@ -800,6 +800,10 @@ class ChartComparer: | ||||||
|  |              showerror("Error Saving Chart", f"Unable to open file: {filename!r}\n{e}") | ||||||
|  |   | ||||||
|  |      def load_chart_dialog(self, *args): | ||||||
|  | +        showerror("Security Error", | ||||||
|  | +                  "Due to gh#nltk/nltk#3266, deserializing from " + | ||||||
|  | +                  "a pickle is forbidden.") | ||||||
|  | +        return | ||||||
|  |          filename = askopenfilename( | ||||||
|  |              filetypes=self.CHART_FILE_TYPES, defaultextension=".pickle" | ||||||
|  |          ) | ||||||
|  | @@ -811,6 +815,8 @@ class ChartComparer: | ||||||
|  |              showerror("Error Loading Chart", f"Unable to open file: {filename!r}\n{e}") | ||||||
|  |   | ||||||
|  |      def load_chart(self, filename): | ||||||
|  | +        raise RuntimeError("Due to gh#nltk/nltk#3266, deserializing from " + | ||||||
|  | +                           "a pickle is forbidden.") | ||||||
|  |          with open(filename, "rb") as infile: | ||||||
|  |              chart = pickle.load(infile) | ||||||
|  |          name = os.path.basename(filename) | ||||||
|  | @@ -2268,6 +2274,10 @@ class ChartParserApp: | ||||||
|  |          if not filename: | ||||||
|  |              return | ||||||
|  |          try: | ||||||
|  | +            showerror("Security Error", | ||||||
|  | +                      "Due to gh#nltk/nltk#3266, deserializing from " + | ||||||
|  | +                      "a pickle is forbidden.") | ||||||
|  | +            return | ||||||
|  |              with open(filename, "rb") as infile: | ||||||
|  |                  chart = pickle.load(infile) | ||||||
|  |              self._chart = chart | ||||||
|  | @@ -2306,6 +2316,9 @@ class ChartParserApp: | ||||||
|  |              return | ||||||
|  |          try: | ||||||
|  |              if filename.endswith(".pickle"): | ||||||
|  | +                showerror("Due to gh#nltk/nltk#3266, deserializing from " + | ||||||
|  | +                          "a pickle is forbidden.") | ||||||
|  | +                return | ||||||
|  |                  with open(filename, "rb") as infile: | ||||||
|  |                      grammar = pickle.load(infile) | ||||||
|  |              else: | ||||||
|  | --- a/nltk/corpus/reader/util.py | ||||||
|  | +++ b/nltk/corpus/reader/util.py | ||||||
|  | @@ -521,6 +521,8 @@ class PickleCorpusView(StreamBackedCorpu | ||||||
|  |   | ||||||
|  |      def read_block(self, stream): | ||||||
|  |          result = [] | ||||||
|  | +        raise RuntimeError("Due to gh#nltk/nltk#3266, deserializing from " + | ||||||
|  | +                           "a pickle is forbidden.") | ||||||
|  |          for i in range(self.BLOCK_SIZE): | ||||||
|  |              try: | ||||||
|  |                  result.append(pickle.load(stream)) | ||||||
|  | --- a/nltk/data.py | ||||||
|  | +++ b/nltk/data.py | ||||||
|  | @@ -752,6 +752,8 @@ def load( | ||||||
|  |      if format == "raw": | ||||||
|  |          resource_val = opened_resource.read() | ||||||
|  |      elif format == "pickle": | ||||||
|  | +        raise RuntimeError("Due to gh#nltk/nltk#3266, deserializing from " + | ||||||
|  | +                           "a pickle is forbidden.") | ||||||
|  |          resource_val = pickle.load(opened_resource) | ||||||
|  |      elif format == "json": | ||||||
|  |          import json | ||||||
|  | --- a/nltk/parse/transitionparser.py | ||||||
|  | +++ b/nltk/parse/transitionparser.py | ||||||
|  | @@ -553,6 +553,8 @@ class TransitionParser(ParserI): | ||||||
|  |          """ | ||||||
|  |          result = [] | ||||||
|  |          # First load the model | ||||||
|  | +        raise RuntimeError("Due to gh#nltk/nltk#3266, deserializing from " + | ||||||
|  | +                           "a pickle is forbidden.") | ||||||
|  |          model = pickle.load(open(modelFile, "rb")) | ||||||
|  |          operation = Transition(self._algorithm) | ||||||
|  |   | ||||||
|  | --- a/nltk/tbl/demo.py | ||||||
|  | +++ b/nltk/tbl/demo.py | ||||||
|  | @@ -253,6 +253,8 @@ def postag( | ||||||
|  |                  ) | ||||||
|  |              ) | ||||||
|  |          with open(cache_baseline_tagger) as print_rules: | ||||||
|  | +            raise RuntimeError("Due to gh#nltk/nltk#3266, deserializing from " + | ||||||
|  | +                               "a pickle is forbidden.") | ||||||
|  |              baseline_tagger = pickle.load(print_rules) | ||||||
|  |              print(f"Reloaded pickled tagger from {cache_baseline_tagger}") | ||||||
|  |      else: | ||||||
|  | @@ -327,7 +329,7 @@ def postag( | ||||||
|  |          with open(serialize_output) as print_rules: | ||||||
|  |              brill_tagger_reloaded = pickle.load(print_rules) | ||||||
|  |          print(f"Reloaded pickled tagger from {serialize_output}") | ||||||
|  | -        taggedtest_reloaded = brill_tagger.tag_sents(testing_data) | ||||||
|  | +        taggedtest_reloaded = brill_tagger_reloaded.tag_sents(testing_data) | ||||||
|  |          if taggedtest == taggedtest_reloaded: | ||||||
|  |              print("Reloaded tagger tried on test set, results identical") | ||||||
|  |          else: | ||||||
							
								
								
									
										3
									
								
								nltk-3.8.1.tar.gz
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								nltk-3.8.1.tar.gz
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,3 @@ | |||||||
|  | version https://git-lfs.github.com/spec/v1 | ||||||
|  | oid sha256:676970e2b7aa0a7184e68f76e0c4f2756fd1b82559a509d5656a23117faeb658 | ||||||
|  | size 2867926 | ||||||
| @@ -1,3 +0,0 @@ | |||||||
| version https://git-lfs.github.com/spec/v1 |  | ||||||
| oid sha256:1834da3d0682cba4f2cede2f9aad6b0fafb6461ba451db0efb6f9c39798d64d3 |  | ||||||
| size 4620388 |  | ||||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -1,3 +1,14 @@ | |||||||
|  | ------------------------------------------------------------------- | ||||||
|  | Mon Jul  1 21:02:45 UTC 2024 - Matej Cepl <mcepl@cepl.eu> | ||||||
|  |  | ||||||
|  | - Use tarball from GitHub instead of the Zip archive from PyPI, | ||||||
|  |   the latter has very messy combination of CRLF and LF EOLs, | ||||||
|  |   which are hard to patch. | ||||||
|  | - Refresh all patches from the original locations. | ||||||
|  | - Add CVE-2024-39705-disable-download.patch to make a crude | ||||||
|  |   workaround around CVE-2024-39705 (gh#nltk/nltk#3266, | ||||||
|  |   bsc#1227174). | ||||||
|  |  | ||||||
| ------------------------------------------------------------------- | ------------------------------------------------------------------- | ||||||
| Thu Mar 21 17:41:52 UTC 2024 - Ben Greiner <code@bnavigator.de> | Thu Mar 21 17:41:52 UTC 2024 - Ben Greiner <code@bnavigator.de> | ||||||
|  |  | ||||||
|   | |||||||
| @@ -16,6 +16,7 @@ | |||||||
| # | # | ||||||
|  |  | ||||||
|  |  | ||||||
|  | %define modname nltk | ||||||
| Name:           python-nltk | Name:           python-nltk | ||||||
| Version:        3.8.1 | Version:        3.8.1 | ||||||
| Release:        0 | Release:        0 | ||||||
| @@ -23,7 +24,7 @@ Summary:        Natural Language Toolkit | |||||||
| License:        Apache-2.0 | License:        Apache-2.0 | ||||||
| URL:            http://nltk.org/ | URL:            http://nltk.org/ | ||||||
| # SourceRepository: https://github.com/nltk/nltk | # SourceRepository: https://github.com/nltk/nltk | ||||||
| Source0:        https://files.pythonhosted.org/packages/source/n/nltk/nltk-%{version}.zip | Source0:        https://github.com/nltk/%{modname}/archive/refs/tags/%{version}.tar.gz#/%{modname}-%{version}.tar.gz | ||||||
| # Download/Update NLTK data: | # Download/Update NLTK data: | ||||||
| #     quilt setup python-nltk.spec | #     quilt setup python-nltk.spec | ||||||
| #     pushd nltk-?.?.? | #     pushd nltk-?.?.? | ||||||
| @@ -62,6 +63,9 @@ Source99:       python-nltk.rpmlintrc | |||||||
| Patch0:         skip-networked-test.patch | Patch0:         skip-networked-test.patch | ||||||
| # PATCH-FIX-UPSTREAM nltk-pr3207-py312.patch gh#nltk/nltk#3207 | # PATCH-FIX-UPSTREAM nltk-pr3207-py312.patch gh#nltk/nltk#3207 | ||||||
| Patch1:         nltk-pr3207-py312.patch | Patch1:         nltk-pr3207-py312.patch | ||||||
|  | # PATCH-FIX-UPSTREAM CVE-2024-39705-disable-download.patch bsc#1227174 mcepl@suse.com | ||||||
|  | # this patch makes things totally awesome | ||||||
|  | Patch2:         CVE-2024-39705-disable-download.patch | ||||||
| BuildRequires:  %{python_module base >= 3.7} | BuildRequires:  %{python_module base >= 3.7} | ||||||
| BuildRequires:  %{python_module pip} | BuildRequires:  %{python_module pip} | ||||||
| BuildRequires:  %{python_module setuptools} | BuildRequires:  %{python_module setuptools} | ||||||
| @@ -118,7 +122,7 @@ Python modules, data sets and tutorials supporting research and | |||||||
| development in Natural Language Processing. | development in Natural Language Processing. | ||||||
|  |  | ||||||
| %prep | %prep | ||||||
| %autosetup -p1 -a1 -n nltk-%{version} | %setup -q -a1 -n %{modname}-%{version} | ||||||
|  |  | ||||||
| # Fix EOL | # Fix EOL | ||||||
| sed -i 's/\r/\n/g; s/\n$//' \ | sed -i 's/\r/\n/g; s/\n$//' \ | ||||||
| @@ -150,6 +154,8 @@ sed -E -i "s|#![[:space:]]*%{_bindir}/env python|#!%{_bindir}/python3|" \ | |||||||
|     nltk_data/corpora/pl196x/splitter.py \ |     nltk_data/corpora/pl196x/splitter.py \ | ||||||
|     tools/find_deprecated.py |     tools/find_deprecated.py | ||||||
|  |  | ||||||
|  | %autopatch -p1 | ||||||
|  |  | ||||||
| %build | %build | ||||||
| %pyproject_wheel | %pyproject_wheel | ||||||
|  |  | ||||||
| @@ -164,7 +170,12 @@ chmod -x %{buildroot}%{$python_sitelib}/nltk/test/dependency.doctest | |||||||
| %check | %check | ||||||
| export NLTK_DATA=$(readlink -f ./nltk_data/) | export NLTK_DATA=$(readlink -f ./nltk_data/) | ||||||
| # export PYTEST_ADDOPTS="--doctest-modules" | # export PYTEST_ADDOPTS="--doctest-modules" | ||||||
| %pytest -k 'not network' | # Skip tests requiring pickle.load gh#nltk/nltk#3266 (CVE-2024-39705) | ||||||
|  | skip_tests=" or test_basic or test_increment or test_pad_asterisk or test_pad_dotdot" | ||||||
|  | skip_tests+=" or test_pos_tag_eng or test_pos_tag_eng_universal or test_pos_tag_rus" | ||||||
|  | skip_tests+=" or test_pos_tag_rus_universal or test_pos_tag_unknown_lang" | ||||||
|  | skip_tests+=" or test_sent_tokenize or test_unspecified_lang or test_word_tokenize" | ||||||
|  | %pytest -k "not (network ${skip_tests})" | ||||||
|  |  | ||||||
| %post | %post | ||||||
| %python_install_alternative nltk | %python_install_alternative nltk | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user