Compare commits
No commits in common. "factory" and "factory" have entirely different histories.
38
CVE-2024-39705.patch
Normal file
38
CVE-2024-39705.patch
Normal file
@ -0,0 +1,38 @@
|
||||
From a12d0a6a8cdba58d5e4e5f92ac62bb80fc26c624 Mon Sep 17 00:00:00 2001
|
||||
From: Eric Kafe <kafe.eric@gmail.com>
|
||||
Date: Tue, 23 Jul 2024 09:09:09 +0200
|
||||
Subject: [PATCH] Prevent data.load from unpickling classes or functions
|
||||
|
||||
---
|
||||
nltk/data.py | 11 ++++++++++-
|
||||
1 file changed, 10 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/nltk/data.py b/nltk/data.py
|
||||
index cc9229b0a2..fb242721c5 100644
|
||||
--- a/nltk/data.py
|
||||
+++ b/nltk/data.py
|
||||
@@ -658,6 +658,15 @@ def retrieve(resource_url, filename=None, verbose=True):
|
||||
}
|
||||
|
||||
|
||||
+def restricted_pickle_load(string):
|
||||
+ """
|
||||
+ Prevents any class or function from loading.
|
||||
+ """
|
||||
+ from nltk.app.wordnet_app import RestrictedUnpickler
|
||||
+
|
||||
+ return RestrictedUnpickler(BytesIO(string)).load()
|
||||
+
|
||||
+
|
||||
def load(
|
||||
resource_url,
|
||||
format="auto",
|
||||
@@ -751,7 +760,7 @@ def load(
|
||||
if format == "raw":
|
||||
resource_val = opened_resource.read()
|
||||
elif format == "pickle":
|
||||
- resource_val = pickle.load(opened_resource)
|
||||
+ resource_val = restricted_pickle_load(opened_resource.read())
|
||||
elif format == "json":
|
||||
import json
|
||||
|
15
_service
15
_service
@ -1,15 +0,0 @@
|
||||
<services>
|
||||
<service name="obs_scm" mode="manual">
|
||||
<param name="url">https://github.com/nltk/nltk</param>
|
||||
<param name="scm">git</param>
|
||||
<param name="exclude">web/*</param>
|
||||
<param name="version">3.9.1</param>
|
||||
<param name="revision">3.9.1</param>
|
||||
</service>
|
||||
<service mode="manual" name="set_version" />
|
||||
<service mode="buildtime" name="tar" />
|
||||
<service mode="buildtime" name="recompress">
|
||||
<param name="file">*.tar</param>
|
||||
<param name="compression">xz</param>
|
||||
</service>
|
||||
</services>
|
3
nltk-3.8.1.tar.gz
Normal file
3
nltk-3.8.1.tar.gz
Normal file
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:676970e2b7aa0a7184e68f76e0c4f2756fd1b82559a509d5656a23117faeb658
|
||||
size 2867926
|
@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e1b59894ed0a2eebbb64fc4e0975216a5a7e1a949e61b9a4f22693e15536ff05
|
||||
size 8269325
|
110
nltk-pr3207-py312.patch
Normal file
110
nltk-pr3207-py312.patch
Normal file
@ -0,0 +1,110 @@
|
||||
From 25d35fc4283dedd2053ec6d821f4b707fff8d72c Mon Sep 17 00:00:00 2001
|
||||
From: Konstantin Chernyshev <k4black@ya.ru>
|
||||
Date: Thu, 16 Nov 2023 19:00:15 +0100
|
||||
Subject: [PATCH 1/8] ci: enable 3.12 in ci tests
|
||||
|
||||
---
|
||||
.github/workflows/ci.yaml | 2 +-
|
||||
README.md | 2 +-
|
||||
nltk/test/unit/translate/test_bleu.py | 1 -
|
||||
nltk/translate/bleu_score.py | 29 +++++++++++++++++++++++++++--
|
||||
setup.py | 3 ++-
|
||||
5 files changed, 31 insertions(+), 6 deletions(-)
|
||||
|
||||
--- a/.github/workflows/ci.yaml
|
||||
+++ b/.github/workflows/ci.yaml
|
||||
@@ -76,7 +76,7 @@ jobs:
|
||||
needs: [cache_nltk_data, cache_third_party]
|
||||
strategy:
|
||||
matrix:
|
||||
- python-version: ['3.7', '3.8', '3.9', '3.10', '3.11']
|
||||
+ python-version: ['3.7', '3.8', '3.9', '3.10', '3.11', '3.12']
|
||||
os: [ubuntu-latest, macos-latest, windows-latest]
|
||||
fail-fast: false
|
||||
runs-on: ${{ matrix.os }}
|
||||
--- a/README.md
|
||||
+++ b/README.md
|
||||
@@ -4,7 +4,7 @@
|
||||
|
||||
NLTK -- the Natural Language Toolkit -- is a suite of open source Python
|
||||
modules, data sets, and tutorials supporting research and development in Natural
|
||||
-Language Processing. NLTK requires Python version 3.7, 3.8, 3.9, 3.10 or 3.11.
|
||||
+Language Processing. NLTK requires Python version 3.7, 3.8, 3.9, 3.10, 3.11 or 3.12.
|
||||
|
||||
For documentation, please visit [nltk.org](https://www.nltk.org/).
|
||||
|
||||
--- a/nltk/test/unit/translate/test_bleu.py
|
||||
+++ b/nltk/test/unit/translate/test_bleu.py
|
||||
@@ -2,7 +2,6 @@
|
||||
Tests for BLEU translation evaluation metric
|
||||
"""
|
||||
|
||||
-import io
|
||||
import unittest
|
||||
|
||||
from nltk.data import find
|
||||
--- a/nltk/translate/bleu_score.py
|
||||
+++ b/nltk/translate/bleu_score.py
|
||||
@@ -7,16 +7,41 @@
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
"""BLEU score implementation."""
|
||||
-
|
||||
import math
|
||||
import sys
|
||||
import warnings
|
||||
from collections import Counter
|
||||
-from fractions import Fraction
|
||||
+from fractions import Fraction as _Fraction
|
||||
|
||||
from nltk.util import ngrams
|
||||
|
||||
|
||||
+class Fraction(_Fraction):
|
||||
+ """Fraction with _normalize=False support for 3.12"""
|
||||
+
|
||||
+ def __new__(cls, numerator=0, denominator=None, _normalize=False):
|
||||
+ if sys.version_info >= (3, 12):
|
||||
+ self = super().__new__(cls, numerator, denominator)
|
||||
+ else:
|
||||
+ self = super().__new__(cls, numerator, denominator, _normalize=_normalize)
|
||||
+ self._normalize = _normalize
|
||||
+ self._original_numerator = numerator
|
||||
+ self._original_denominator = denominator
|
||||
+ return self
|
||||
+
|
||||
+ @property
|
||||
+ def numerator(self):
|
||||
+ if not self._normalize:
|
||||
+ return self._original_numerator
|
||||
+ return super().numerator
|
||||
+
|
||||
+ @property
|
||||
+ def denominator(self):
|
||||
+ if not self._normalize:
|
||||
+ return self._original_denominator
|
||||
+ return super().denominator
|
||||
+
|
||||
+
|
||||
def sentence_bleu(
|
||||
references,
|
||||
hypothesis,
|
||||
--- a/setup.py
|
||||
+++ b/setup.py
|
||||
@@ -67,7 +67,7 @@ setup(
|
||||
},
|
||||
long_description="""\
|
||||
The Natural Language Toolkit (NLTK) is a Python package for
|
||||
-natural language processing. NLTK requires Python 3.7, 3.8, 3.9, 3.10 or 3.11.""",
|
||||
+natural language processing. NLTK requires Python 3.7, 3.8, 3.9, 3.10, 3.11 or 3.12.""",
|
||||
license="Apache License, Version 2.0",
|
||||
keywords=[
|
||||
"NLP",
|
||||
@@ -100,6 +100,7 @@ natural language processing. NLTK requi
|
||||
"Programming Language :: Python :: 3.9",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
+ "Programming Language :: Python :: 3.12",
|
||||
"Topic :: Scientific/Engineering",
|
||||
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
||||
"Topic :: Scientific/Engineering :: Human Machine Interfaces",
|
@ -1,4 +0,0 @@
|
||||
name: nltk
|
||||
version: 3.9.1
|
||||
mtime: 1724010420
|
||||
commit: aca78cb2add4084f76b9eac921d8a73927d7a086
|
3
nltk_data.tar.xz
Normal file
3
nltk_data.tar.xz
Normal file
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:f79462ac99f414b4850943720bed4a59c1bb15bfc8f1ce16b26165da6db07680
|
||||
size 393271816
|
@ -1,34 +1,3 @@
|
||||
-------------------------------------------------------------------
|
||||
Tue Oct 29 08:37:43 UTC 2024 - Daniel Garcia <daniel.garcia@suse.com>
|
||||
|
||||
- Use _service to download source and exclude documentation that has
|
||||
non-commercial license (boo#1232448)
|
||||
- Remove nltk_data to avoid redistribution of files with
|
||||
non-commercial (boo#1232448):
|
||||
> NLTK corpora are provided under the terms given in the README file
|
||||
> for each corpus; all are redistributable and available for
|
||||
> non-commercial use.
|
||||
- Remove not needed skip-networked-test.patch
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Mon Sep 30 21:17:21 UTC 2024 - Thiago Bertoldi <thiago.bertoldi@suse.com>
|
||||
|
||||
- Update to to 3.9.1 (changes since 3.8.1):
|
||||
* Fixed bug that prevented wordnet from loading
|
||||
* Fix security vulnerability CVE-2024-39705 (breaking change)
|
||||
* Replace pickled models (punkt, chunker, taggers) by new
|
||||
pickle-free "_tab" packages
|
||||
* No longer sort Wordnet synsets and relations (sort in calling
|
||||
function when required)
|
||||
* Only strip the last suffix in Wordnet Morphy, thus
|
||||
restricting synsets() results
|
||||
* Add Python 3.12 support
|
||||
* Many other minor fixes
|
||||
- Refresh nltk_data
|
||||
- Remome upstreamed patches:
|
||||
- CVE-2024-39705.patch
|
||||
- nltk-pr3207-py312.patch
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Fri Jul 26 07:14:33 UTC 2024 - Daniel Garcia <daniel.garcia@suse.com>
|
||||
|
||||
@ -67,7 +36,8 @@ Tue Mar 28 08:36:04 UTC 2023 - pgajdos@suse.com
|
||||
-------------------------------------------------------------------
|
||||
Fri Jan 6 15:32:43 UTC 2023 - Yogalakshmi Arunachalam <yarunachalam@suse.com>
|
||||
|
||||
- Update to 3.8
|
||||
- Update to 3.8
|
||||
|
||||
* Refactor dispersion plot (#3082)
|
||||
* Provide type hints for LazyCorpusLoader variables (#3081)
|
||||
* Throw warning when LanguageModel is initialized with incorrect vocabulary (#3080)
|
||||
@ -102,7 +72,7 @@ Fri Jan 6 15:32:43 UTC 2023 - Yogalakshmi Arunachalam <yarunachalam@suse.com>
|
||||
* Fix LC cutoff policy of text tiling (#2936)
|
||||
* Optimize ConditionalFreqDist.__add__ performance (#2939)
|
||||
* Add Markdown corpus reader (#2902)
|
||||
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Mon Dec 26 10:41:22 UTC 2022 - Matej Cepl <mcepl@suse.com>
|
||||
|
||||
@ -420,7 +390,7 @@ Fri Sep 23 12:29:05 UTC 2011 - saschpe@suse.de
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Sun Feb 7 18:51:07 CST 2010 - oddrationale@gmail.com
|
||||
|
||||
|
||||
- fixed copyright and license statements
|
||||
- removed PyYAML, and added dependency to installers and download
|
||||
instructions
|
||||
@ -442,6 +412,6 @@ Thu Dec 10 17:23:51 CST 2009 - oddrationale@gmail.com
|
||||
- added Requires: python-yaml
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Wed Dec 9 15:39:35 CST 2009 - oddrationale@gmail.com
|
||||
|
||||
Wed Dec 9 15:39:35 CST 2009 - oddrationale@gmail.com
|
||||
|
||||
- Initial Release (Version 2.0b7): Sun Feb 7 18:50:18 CST 2010
|
||||
|
@ -17,17 +17,14 @@
|
||||
|
||||
|
||||
%define modname nltk
|
||||
%{?sle15_python_module_pythons}
|
||||
Name: python-nltk
|
||||
Version: 3.9.1
|
||||
Version: 3.8.1
|
||||
Release: 0
|
||||
Summary: Natural Language Toolkit
|
||||
License: Apache-2.0
|
||||
URL: http://nltk.org/
|
||||
# SourceRepository: https://github.com/nltk/nltk
|
||||
# The _service download the source and repack without some doc files
|
||||
# that has non-commercial license. boo#1232448
|
||||
Source0: nltk-%{version}.tar.xz
|
||||
Source0: https://github.com/nltk/%{modname}/archive/refs/tags/%{version}.tar.gz#/%{modname}-%{version}.tar.gz
|
||||
# Download/Update NLTK data:
|
||||
# quilt setup python-nltk.spec
|
||||
# pushd nltk-?.?.?
|
||||
@ -59,12 +56,15 @@ Source0: nltk-%{version}.tar.xz
|
||||
# tar -cJf ../nltk_data.tar.xz nltk_data
|
||||
# popd
|
||||
# see https://www.nltk.org/data.html for more details
|
||||
########### NOTICE #########
|
||||
# Do not distribute nltk_data.tar.xz because it's licensed under
|
||||
# non-commercial, boo#1232448
|
||||
############################
|
||||
# Source1: nltk_data.tar.xz
|
||||
Source1: nltk_data.tar.xz
|
||||
Source99: python-nltk.rpmlintrc
|
||||
# PATCH-FIX-UPSTREAM skip-networked-test.patch gh#nltk/nltk#2969 mcepl@suse.com
|
||||
# skip tests requiring network connection
|
||||
Patch0: skip-networked-test.patch
|
||||
# PATCH-FIX-UPSTREAM nltk-pr3207-py312.patch gh#nltk/nltk#3207
|
||||
Patch1: nltk-pr3207-py312.patch
|
||||
# PATCH-FIX-UPSTREAM CVE-2024-39705.patch bsc#1227174 gh#nltk/nltk#3290
|
||||
Patch2: CVE-2024-39705.patch
|
||||
BuildRequires: %{python_module base >= 3.7}
|
||||
BuildRequires: %{python_module pip}
|
||||
BuildRequires: %{python_module setuptools}
|
||||
@ -121,7 +121,7 @@ Python modules, data sets and tutorials supporting research and
|
||||
development in Natural Language Processing.
|
||||
|
||||
%prep
|
||||
%setup -q -n %{modname}-%{version}
|
||||
%setup -q -a1 -n %{modname}-%{version}
|
||||
|
||||
# Fix EOL
|
||||
sed -i 's/\r/\n/g; s/\n$//' \
|
||||
@ -150,6 +150,7 @@ sed -E -i "/#![[:space:]]*\/usr\/bin\/env python/d" \
|
||||
sed -E -i "s|#![[:space:]]*%{_bindir}/env python|#!%{_bindir}/python3|" \
|
||||
setup.py \
|
||||
tools/global_replace.py \
|
||||
nltk_data/corpora/pl196x/splitter.py \
|
||||
tools/find_deprecated.py
|
||||
|
||||
%autopatch -p1
|
||||
@ -165,16 +166,15 @@ sed -E -i "s|#![[:space:]]*%{_bindir}/env python|#!%{_bindir}/python3|" \
|
||||
chmod -x %{buildroot}%{$python_sitelib}/nltk/test/dependency.doctest
|
||||
}
|
||||
|
||||
# Do not test, there's no ntlk_data, boo#1232448
|
||||
# %%check
|
||||
# export NLTK_DATA=$(readlink -f ./nltk_data/)
|
||||
# # export PYTEST_ADDOPTS="--doctest-modules"
|
||||
# # Skip tests requiring pickle.load gh#nltk/nltk#3266 (CVE-2024-39705)
|
||||
# skip_tests=" or test_basic or test_increment or test_pad_asterisk or test_pad_dotdot"
|
||||
# skip_tests+=" or test_pos_tag_eng or test_pos_tag_eng_universal or test_pos_tag_rus"
|
||||
# skip_tests+=" or test_pos_tag_rus_universal or test_pos_tag_unknown_lang"
|
||||
# skip_tests+=" or test_sent_tokenize or test_unspecified_lang or test_word_tokenize"
|
||||
# %%pytest -k "not (network ${skip_tests})"
|
||||
%check
|
||||
export NLTK_DATA=$(readlink -f ./nltk_data/)
|
||||
# export PYTEST_ADDOPTS="--doctest-modules"
|
||||
# Skip tests requiring pickle.load gh#nltk/nltk#3266 (CVE-2024-39705)
|
||||
skip_tests=" or test_basic or test_increment or test_pad_asterisk or test_pad_dotdot"
|
||||
skip_tests+=" or test_pos_tag_eng or test_pos_tag_eng_universal or test_pos_tag_rus"
|
||||
skip_tests+=" or test_pos_tag_rus_universal or test_pos_tag_unknown_lang"
|
||||
skip_tests+=" or test_sent_tokenize or test_unspecified_lang or test_word_tokenize"
|
||||
%pytest -k "not (network ${skip_tests})"
|
||||
|
||||
%post
|
||||
%python_install_alternative nltk
|
||||
|
35
skip-networked-test.patch
Normal file
35
skip-networked-test.patch
Normal file
@ -0,0 +1,35 @@
|
||||
---
|
||||
nltk/test/unit/test_downloader.py | 4 ++++
|
||||
setup.cfg | 4 ++++
|
||||
2 files changed, 8 insertions(+)
|
||||
|
||||
--- a/nltk/test/unit/test_downloader.py
|
||||
+++ b/nltk/test/unit/test_downloader.py
|
||||
@@ -1,6 +1,9 @@
|
||||
from nltk import download
|
||||
|
||||
+import pytest
|
||||
|
||||
+
|
||||
+@pytest.mark.network
|
||||
def test_downloader_using_existing_parent_download_dir(tmp_path):
|
||||
"""Test that download works properly when the parent folder of the download_dir exists"""
|
||||
|
||||
@@ -9,6 +12,7 @@ def test_downloader_using_existing_paren
|
||||
assert download_status is True
|
||||
|
||||
|
||||
+@pytest.mark.network
|
||||
def test_downloader_using_non_existing_parent_download_dir(tmp_path):
|
||||
"""Test that download works properly when the parent folder of the download_dir does not exist"""
|
||||
|
||||
--- a/setup.cfg
|
||||
+++ b/setup.cfg
|
||||
@@ -1,3 +1,7 @@
|
||||
+[tool:pytest]
|
||||
+markers =
|
||||
+ network: test case requires network connection
|
||||
+
|
||||
[metadata]
|
||||
license_files =
|
||||
LICENSE.txt
|
Loading…
x
Reference in New Issue
Block a user