forked from pool/python-nltk
Accepting request 1045543 from devel:languages:python
- Complete nltk_data.tar.xz for offline testing - Fix failing tests (gh#nltk/nltk#2969) by adding patches: - port-2to3.patch - skip-networked-test.patch - Clean up the SPEC to get rid of rpmlint warnings. OBS-URL: https://build.opensuse.org/request/show/1045543 OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/python-nltk?expand=0&rev=12
This commit is contained in:
commit
f9e9dcd3e8
3
nltk_data.tar.xz
Normal file
3
nltk_data.tar.xz
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:f41383a4774bf7227f4563f46543460ba07a6921f7bcc6185519e87ea9e4323f
|
||||||
|
size 453871052
|
48
port-2to3.patch
Normal file
48
port-2to3.patch
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
---
|
||||||
|
nltk_data/corpora/pl196x/splitter.py | 4 ++--
|
||||||
|
nltk_data/taggers/universal_tagset/universal_tags.py | 5 -----
|
||||||
|
tools/find_deprecated.py | 2 +-
|
||||||
|
3 files changed, 3 insertions(+), 8 deletions(-)
|
||||||
|
|
||||||
|
--- a/nltk_data/corpora/pl196x/splitter.py
|
||||||
|
+++ b/nltk_data/corpora/pl196x/splitter.py
|
||||||
|
@@ -1,4 +1,4 @@
|
||||||
|
-#!/usr/bin/python
|
||||||
|
+#!/usr/bin/python3
|
||||||
|
|
||||||
|
import sys, re
|
||||||
|
|
||||||
|
@@ -7,7 +7,7 @@ TEXTID = re.compile(r'<text id="(.*)">')
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
if len(sys.argv) != 2:
|
||||||
|
- print 'One argument required: a pl196x corpus to split.'
|
||||||
|
+ print('One argument required: a pl196x corpus to split.')
|
||||||
|
sys.exit()
|
||||||
|
|
||||||
|
inputFileName = sys.argv[1]
|
||||||
|
--- a/nltk_data/taggers/universal_tagset/universal_tags.py
|
||||||
|
+++ b/nltk_data/taggers/universal_tagset/universal_tags.py
|
||||||
|
@@ -22,11 +22,6 @@ X - other: foreign words, typos, abbrevi
|
||||||
|
@author: Nathan Schneider (nschneid)
|
||||||
|
@since: 2011-05-06
|
||||||
|
'''
|
||||||
|
-
|
||||||
|
-# Strive towards Python 3 compatibility
|
||||||
|
-from __future__ import print_function, unicode_literals, division
|
||||||
|
-from future_builtins import map, filter
|
||||||
|
-
|
||||||
|
import re, glob
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
--- a/tools/find_deprecated.py
|
||||||
|
+++ b/tools/find_deprecated.py
|
||||||
|
@@ -29,7 +29,7 @@ import textwrap
|
||||||
|
import tokenize
|
||||||
|
from doctest import DocTestParser, register_optionflag
|
||||||
|
|
||||||
|
-from cStringIO import StringIO
|
||||||
|
+from io import StringIO
|
||||||
|
|
||||||
|
import nltk.corpus
|
||||||
|
from nltk import defaultdict
|
@ -1,3 +1,12 @@
|
|||||||
|
-------------------------------------------------------------------
|
||||||
|
Mon Dec 26 10:41:22 UTC 2022 - Matej Cepl <mcepl@suse.com>
|
||||||
|
|
||||||
|
- Complete nltk_data.tar.xz for offline testing
|
||||||
|
- Fix failing tests (gh#nltk/nltk#2969) by adding patches:
|
||||||
|
- port-2to3.patch
|
||||||
|
- skip-networked-test.patch
|
||||||
|
- Clean up the SPEC to get rid of rpmlint warnings.
|
||||||
|
|
||||||
-------------------------------------------------------------------
|
-------------------------------------------------------------------
|
||||||
Tue Mar 22 07:48:14 UTC 2022 - Matej Cepl <mcepl@suse.com>
|
Tue Mar 22 07:48:14 UTC 2022 - Matej Cepl <mcepl@suse.com>
|
||||||
|
|
||||||
|
1
python-nltk.rpmlintrc
Normal file
1
python-nltk.rpmlintrc
Normal file
@ -0,0 +1 @@
|
|||||||
|
addFilter("E: zero-length /usr/lib/python3\.\d/site-packages/nltk/tbl/api\.py")
|
@ -25,7 +25,18 @@ Release: 0
|
|||||||
Summary: Natural Language Toolkit
|
Summary: Natural Language Toolkit
|
||||||
License: Apache-2.0
|
License: Apache-2.0
|
||||||
URL: http://nltk.org/
|
URL: http://nltk.org/
|
||||||
Source: https://files.pythonhosted.org/packages/source/n/nltk/%{pyname}-%{version}.zip
|
Source0: https://files.pythonhosted.org/packages/source/n/nltk/%{pyname}-%{version}.zip
|
||||||
|
# Downloaded NLTK data via python3 -m nltk.downloader,
|
||||||
|
# then unzip downloaded zip archive.
|
||||||
|
# see https://www.nltk.org/data.html for more details
|
||||||
|
Source1: nltk_data.tar.xz
|
||||||
|
Source99: python-nltk.rpmlintrc
|
||||||
|
# PATCH-FIX-UPSTREAM skip-networked-test.patch gh#nltk/nltk#2969 mcepl@suse.com
|
||||||
|
# skip tests requiring network connection
|
||||||
|
Patch0: skip-networked-test.patch
|
||||||
|
# PATCH-FIX-UPSTREAM port-2to3.patch bsc#[0-9]+ mcepl@suse.com
|
||||||
|
# port scripts in nltk_data to Python 3
|
||||||
|
Patch1: port-2to3.patch
|
||||||
BuildRequires: %{python_module regex}
|
BuildRequires: %{python_module regex}
|
||||||
BuildRequires: %{python_module setuptools}
|
BuildRequires: %{python_module setuptools}
|
||||||
BuildRequires: %{python_module six}
|
BuildRequires: %{python_module six}
|
||||||
@ -33,6 +44,27 @@ BuildRequires: %{pythons}
|
|||||||
BuildRequires: fdupes
|
BuildRequires: fdupes
|
||||||
BuildRequires: python-rpm-macros
|
BuildRequires: python-rpm-macros
|
||||||
BuildRequires: unzip
|
BuildRequires: unzip
|
||||||
|
# For testing
|
||||||
|
BuildRequires: %{python_module tk}
|
||||||
|
BuildRequires: %{python_module click}
|
||||||
|
BuildRequires: %{python_module pytest}
|
||||||
|
# BuildRequires: %%{python_module gensim}
|
||||||
|
BuildRequires: %{python_module joblib}
|
||||||
|
BuildRequires: %{python_module Jinja2}
|
||||||
|
BuildRequires: %{python_module matplotlib}
|
||||||
|
BuildRequires: %{python_module numpy}
|
||||||
|
BuildRequires: %{python_module pyparsing}
|
||||||
|
BuildRequires: %{python_module pytest-cov}
|
||||||
|
BuildRequires: %{python_module pytest-mock}
|
||||||
|
BuildRequires: %{python_module python-crfsuite}
|
||||||
|
BuildRequires: %{python_module regex}
|
||||||
|
BuildRequires: %{python_module requests}
|
||||||
|
BuildRequires: %{python_module scikit-learn}
|
||||||
|
BuildRequires: %{python_module scipy}
|
||||||
|
BuildRequires: %{python_module text-unidecode}
|
||||||
|
BuildRequires: %{python_module tqdm}
|
||||||
|
BuildRequires: %{python_module twython}
|
||||||
|
#
|
||||||
Requires: python-regex
|
Requires: python-regex
|
||||||
Requires: python-six
|
Requires: python-six
|
||||||
Recommends: python-gensim
|
Recommends: python-gensim
|
||||||
@ -49,19 +81,49 @@ Requires(postun):update-alternatives
|
|||||||
BuildArch: noarch
|
BuildArch: noarch
|
||||||
%python_subpackages
|
%python_subpackages
|
||||||
|
|
||||||
|
# changedir = nltk/test
|
||||||
|
|
||||||
%description
|
%description
|
||||||
NLTK -- the Natural Language Toolkit -- is a suite of
|
NLTK -- the Natural Language Toolkit -- is a suite of
|
||||||
Python modules, data sets and tutorials supporting research and
|
Python modules, data sets and tutorials supporting research and
|
||||||
development in Natural Language Processing.
|
development in Natural Language Processing.
|
||||||
|
|
||||||
%prep
|
%prep
|
||||||
%autosetup -p1 -n %{pyname}-%{version}
|
%autosetup -p1 -a1 -n %{pyname}-%{version}
|
||||||
|
|
||||||
# sed -i "1,4{/\/usr\/bin\/env/d}" nltk/corpus/reader/knbc.py
|
# Remove obsolete scripts
|
||||||
# sed -i "1,4{/\/usr\/bin\/env/d}" nltk/test/runtests.py
|
rm tools/nltk_term_index.py tools/run_doctests.py nltk_data/corpora/semcor/semcor.py
|
||||||
# sed -i "1,4{/\/usr\/bin\/env/d}" nltk/test/unit/test_tgrep.py
|
|
||||||
# sed -i "1,4{/\/usr\/bin\/env/d}" nltk/tgrep.py
|
# Fix EOL
|
||||||
# sed -i "1,4{/\/usr\/bin\/env/d}" nltk/tokenize/stanford_segmenter.py
|
sed -i 's/\r/\n/g; s/\n$//' \
|
||||||
|
README.md \
|
||||||
|
nltk/corpus/reader/knbc.py \
|
||||||
|
nltk/test/unit/test_tgrep.py \
|
||||||
|
nltk/tgrep.py \
|
||||||
|
nltk/tokenize/stanford_segmenter.py \
|
||||||
|
nltk/corpus/reader/knbc.py \
|
||||||
|
nltk/test/unit/test_tgrep.py \
|
||||||
|
nltk/tgrep.py \
|
||||||
|
nltk/tokenize/stanford_segmenter.py \
|
||||||
|
nltk/corpus/reader/knbc.py \
|
||||||
|
nltk/test/unit/test_tgrep.py \
|
||||||
|
nltk/tgrep.py \
|
||||||
|
nltk/tokenize/stanford_segmenter.py
|
||||||
|
|
||||||
|
# Remove unrequired shebangs
|
||||||
|
sed -E -i "/#![[:space:]]*\/usr\/bin\/env python/d" \
|
||||||
|
nltk/tgrep.py \
|
||||||
|
nltk/tokenize/stanford_segmenter.py \
|
||||||
|
nltk/test/unit/test_tgrep.py \
|
||||||
|
nltk/corpus/reader/knbc.py
|
||||||
|
|
||||||
|
# Switch shebangs to the standard Python interpreter
|
||||||
|
sed -E -i "s|#![[:space:]]*%{_bindir}/env python|#!%{_bindir}/python3|" \
|
||||||
|
setup.py \
|
||||||
|
tools/global_replace.py \
|
||||||
|
nltk_data/corpora/pl196x/splitter.py \
|
||||||
|
tools/find_deprecated.py \
|
||||||
|
tools/svnmime.py
|
||||||
|
|
||||||
%build
|
%build
|
||||||
%python_build
|
%python_build
|
||||||
@ -75,8 +137,9 @@ chmod -x %{buildroot}%{$python_sitelib}/nltk/test/dependency.doctest
|
|||||||
}
|
}
|
||||||
|
|
||||||
%check
|
%check
|
||||||
# FOLLOWING http://www.nltk.org/install.html
|
export NLTK_DATA=$(readlink -f ./nltk_data/)
|
||||||
%python_exec -c "import nltk" || exit 1
|
# export PYTEST_ADDOPTS="--doctest-modules"
|
||||||
|
%pytest -k 'not network'
|
||||||
|
|
||||||
%post
|
%post
|
||||||
%python_install_alternative nltk
|
%python_install_alternative nltk
|
||||||
|
35
skip-networked-test.patch
Normal file
35
skip-networked-test.patch
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
---
|
||||||
|
nltk/test/unit/test_downloader.py | 4 ++++
|
||||||
|
setup.cfg | 4 ++++
|
||||||
|
2 files changed, 8 insertions(+)
|
||||||
|
|
||||||
|
--- a/nltk/test/unit/test_downloader.py
|
||||||
|
+++ b/nltk/test/unit/test_downloader.py
|
||||||
|
@@ -1,6 +1,9 @@
|
||||||
|
from nltk import download
|
||||||
|
|
||||||
|
+import pytest
|
||||||
|
|
||||||
|
+
|
||||||
|
+@pytest.mark.network
|
||||||
|
def test_downloader_using_existing_parent_download_dir(tmp_path):
|
||||||
|
"""Test that download works properly when the parent folder of the download_dir exists"""
|
||||||
|
|
||||||
|
@@ -9,6 +12,7 @@ def test_downloader_using_existing_paren
|
||||||
|
assert download_status is True
|
||||||
|
|
||||||
|
|
||||||
|
+@pytest.mark.network
|
||||||
|
def test_downloader_using_non_existing_parent_download_dir(tmp_path):
|
||||||
|
"""Test that download works properly when the parent folder of the download_dir does not exist"""
|
||||||
|
|
||||||
|
--- a/setup.cfg
|
||||||
|
+++ b/setup.cfg
|
||||||
|
@@ -1,3 +1,7 @@
|
||||||
|
+[tool:pytest]
|
||||||
|
+markers =
|
||||||
|
+ network: test case requires network connection
|
||||||
|
+
|
||||||
|
[metadata]
|
||||||
|
license_files =
|
||||||
|
LICENSE.txt
|
Loading…
Reference in New Issue
Block a user