SHA256
1
0
forked from pool/python-nltk

- Complete nltk_data.tar.xz for offline testing

- Fix failing tests (gh#nltk/nltk#2969) by adding patches:
  - port-2to3.patch
  - skip-networked-test.patch

OBS-URL: https://build.opensuse.org/package/show/devel:languages:python/python-nltk?expand=0&rev=38
This commit is contained in:
Matej Cepl 2022-12-27 10:15:18 +00:00 committed by Git OBS Bridge
parent 31f5bb280c
commit 75f3e6db1b
5 changed files with 135 additions and 4 deletions

3
nltk_data.tar.xz Normal file
View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:f41383a4774bf7227f4563f46543460ba07a6921f7bcc6185519e87ea9e4323f
size 453871052

48
port-2to3.patch Normal file
View File

@ -0,0 +1,48 @@
---
nltk_data/corpora/pl196x/splitter.py | 4 ++--
nltk_data/taggers/universal_tagset/universal_tags.py | 5 -----
tools/find_deprecated.py | 2 +-
3 files changed, 3 insertions(+), 8 deletions(-)
--- a/nltk_data/corpora/pl196x/splitter.py
+++ b/nltk_data/corpora/pl196x/splitter.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python3
import sys, re
@@ -7,7 +7,7 @@ TEXTID = re.compile(r'<text id="(.*)">')
if __name__ == '__main__':
if len(sys.argv) != 2:
- print 'One argument required: a pl196x corpus to split.'
+ print('One argument required: a pl196x corpus to split.')
sys.exit()
inputFileName = sys.argv[1]
--- a/nltk_data/taggers/universal_tagset/universal_tags.py
+++ b/nltk_data/taggers/universal_tagset/universal_tags.py
@@ -22,11 +22,6 @@ X - other: foreign words, typos, abbrevi
@author: Nathan Schneider (nschneid)
@since: 2011-05-06
'''
-
-# Strive towards Python 3 compatibility
-from __future__ import print_function, unicode_literals, division
-from future_builtins import map, filter
-
import re, glob
from collections import defaultdict
--- a/tools/find_deprecated.py
+++ b/tools/find_deprecated.py
@@ -29,7 +29,7 @@ import textwrap
import tokenize
from doctest import DocTestParser, register_optionflag
-from cStringIO import StringIO
+from io import StringIO
import nltk.corpus
from nltk import defaultdict

View File

@ -1,3 +1,11 @@
-------------------------------------------------------------------
Mon Dec 26 10:41:22 UTC 2022 - Matej Cepl <mcepl@suse.com>
- Complete nltk_data.tar.xz for offline testing
- Fix failing tests (gh#nltk/nltk#2969) by adding patches:
- port-2to3.patch
- skip-networked-test.patch
-------------------------------------------------------------------
Tue Mar 22 07:48:14 UTC 2022 - Matej Cepl <mcepl@suse.com>

View File

@ -25,7 +25,17 @@ Release: 0
Summary: Natural Language Toolkit
License: Apache-2.0
URL: http://nltk.org/
Source: https://files.pythonhosted.org/packages/source/n/nltk/%{pyname}-%{version}.zip
Source0: https://files.pythonhosted.org/packages/source/n/nltk/%{pyname}-%{version}.zip
# Downloaded NLTK data via python3 -m nltk.downloader,
# then unzip downloaded zip archive.
# see https://www.nltk.org/data.html for more details
Source1: nltk_data.tar.xz
# PATCH-FIX-UPSTREAM skip-networked-test.patch gh#nltk/nltk#2969 mcepl@suse.com
# skip tests requiring network connection
Patch0: skip-networked-test.patch
# PATCH-FIX-UPSTREAM port-2to3.patch bsc#[0-9]+ mcepl@suse.com
# port scripts in nltk_data to Python 3
Patch1: port-2to3.patch
BuildRequires: %{python_module regex}
BuildRequires: %{python_module setuptools}
BuildRequires: %{python_module six}
@ -33,6 +43,27 @@ BuildRequires: %{pythons}
BuildRequires: fdupes
BuildRequires: python-rpm-macros
BuildRequires: unzip
# For testing
BuildRequires: %{python_module tk}
BuildRequires: %{python_module click}
BuildRequires: %{python_module pytest}
# BuildRequires: %%{python_module gensim}
BuildRequires: %{python_module joblib}
BuildRequires: %{python_module Jinja2}
BuildRequires: %{python_module matplotlib}
BuildRequires: %{python_module numpy}
BuildRequires: %{python_module pyparsing}
BuildRequires: %{python_module pytest-cov}
BuildRequires: %{python_module pytest-mock}
BuildRequires: %{python_module python-crfsuite}
BuildRequires: %{python_module regex}
BuildRequires: %{python_module requests}
BuildRequires: %{python_module scikit-learn}
BuildRequires: %{python_module scipy}
BuildRequires: %{python_module text-unidecode}
BuildRequires: %{python_module tqdm}
BuildRequires: %{python_module twython}
#
Requires: python-regex
Requires: python-six
Recommends: python-gensim
@ -49,13 +80,18 @@ Requires(postun):update-alternatives
BuildArch: noarch
%python_subpackages
# changedir = nltk/test
%description
NLTK -- the Natural Language Toolkit -- is a suite of
Python modules, data sets and tutorials supporting research and
development in Natural Language Processing.
%prep
%autosetup -p1 -n %{pyname}-%{version}
%autosetup -p1 -a1 -n %{pyname}-%{version}
# Remove obsolete scripts
rm tools/nltk_term_index.py tools/run_doctests.py nltk_data/corpora/semcor/semcor.py
# sed -i "1,4{/\/usr\/bin\/env/d}" nltk/corpus/reader/knbc.py
# sed -i "1,4{/\/usr\/bin\/env/d}" nltk/test/runtests.py
@ -75,8 +111,9 @@ chmod -x %{buildroot}%{$python_sitelib}/nltk/test/dependency.doctest
}
%check
# FOLLOWING http://www.nltk.org/install.html
%python_exec -c "import nltk" || exit 1
export NLTK_DATA=$(readlink -f ./nltk_data/)
# export PYTEST_ADDOPTS="--doctest-modules"
%pytest -k 'not network'
%post
%python_install_alternative nltk

35
skip-networked-test.patch Normal file
View File

@ -0,0 +1,35 @@
---
nltk/test/unit/test_downloader.py | 4 ++++
setup.cfg | 4 ++++
2 files changed, 8 insertions(+)
--- a/nltk/test/unit/test_downloader.py
+++ b/nltk/test/unit/test_downloader.py
@@ -1,6 +1,9 @@
from nltk import download
+import pytest
+
+@pytest.mark.network
def test_downloader_using_existing_parent_download_dir(tmp_path):
"""Test that download works properly when the parent folder of the download_dir exists"""
@@ -9,6 +12,7 @@ def test_downloader_using_existing_paren
assert download_status is True
+@pytest.mark.network
def test_downloader_using_non_existing_parent_download_dir(tmp_path):
"""Test that download works properly when the parent folder of the download_dir does not exist"""
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,3 +1,7 @@
+[tool:pytest]
+markers =
+ network: test case requires network connection
+
[metadata]
license_files =
LICENSE.txt