From 31f5bb280cca1796cd3692dc8300d2f319b5255d7be3a4c0d5ce5e044eebd9eb Mon Sep 17 00:00:00 2001
From: Matej Cepl <mcepl@suse.com>
Date: Tue, 22 Mar 2022 07:58:51 +0000
Subject: [PATCH] - Update to 3.7   - Improve and update the NLTK team page on
 nltk.org (#2855,     #2941)   - Drop support for Python 3.6, support Python
 3.10 (#2920) - Update to 3.6.7   - Resolve IndexError in `sent_tokenize` and
 `word_tokenize`     (#2922) - Update to 3.6.6   - Refactor `gensim.doctest`
 to work for gensim 4.0.0 and up     (#2914)   - Add Precision, Recall,
 F-measure, Confusion Matrix to Taggers     (#2862)   - Added warnings if .zip
 files exist without any corresponding     .csv files. (#2908)   - Fix
 `FileNotFoundError` when the `download_dir` is     a non-existing nested
 folder (#2910)   - Rename omw to omw-1.4 (#2907)   - Resolve ReDoS
 opportunity by fixing incorrectly specified     regex (#2906, bsc#1191030,
 CVE-2021-3828).   - Support OMW 1.4 (#2899)   - Deprecate Tree get and set
 node methods (#2900)   - Fix broken inaugural test case (#2903)   - Use
 Multilingual Wordnet Data from OMW with newer Wordnet     versions (#2889)  
 - Keep NLTKs "tokenize" module working with pathlib (#2896)   - Make
 prettyprinter to be more readable (#2893)   - Update links to the nltk book
 (#2895)   - Add `CITATION.cff` to nltk (#2880)   - Resolve serious ReDoS in
 PunktSentenceTokenizer (#2869)   - Delete old CI config files (#2881)

OBS-URL: https://build.opensuse.org/package/show/devel:languages:python/python-nltk?expand=0&rev=36
---
 nltk-3.5.zip        |   3 --
 nltk-3.7.zip        |   3 ++
 python-nltk.changes | 112 ++++++++++++++++++++++++++++++++++++++++++++
 python-nltk.spec    |  20 ++++----
 4 files changed, 125 insertions(+), 13 deletions(-)
 delete mode 100644 nltk-3.5.zip
 create mode 100644 nltk-3.7.zip

diff --git a/nltk-3.5.zip b/nltk-3.5.zip
deleted file mode 100644
index 799be7a..0000000
--- a/nltk-3.5.zip
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:845365449cd8c5f9731f7cb9f8bd6fd0767553b9d53af9eb1b3abf7700936b35
-size 1433531
diff --git a/nltk-3.7.zip b/nltk-3.7.zip
new file mode 100644
index 0000000..5493b4d
--- /dev/null
+++ b/nltk-3.7.zip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d6507d6460cec76d70afea4242a226a7542f85c669177b9c7f562b7cf1b05502
+size 3293449
diff --git a/python-nltk.changes b/python-nltk.changes
index 8ca07dd..974487d 100644
--- a/python-nltk.changes
+++ b/python-nltk.changes
@@ -1,3 +1,115 @@
+-------------------------------------------------------------------
+Tue Mar 22 07:48:14 UTC 2022 - Matej Cepl <mcepl@suse.com>
+
+- Update to 3.7
+  - Improve and update the NLTK team page on nltk.org (#2855,
+    #2941)
+  - Drop support for Python 3.6, support Python 3.10 (#2920)
+- Update to 3.6.7
+  - Resolve IndexError in `sent_tokenize` and `word_tokenize`
+    (#2922)
+- Update to 3.6.6
+  - Refactor `gensim.doctest` to work for gensim 4.0.0 and up
+    (#2914)
+  - Add Precision, Recall, F-measure, Confusion Matrix to Taggers
+    (#2862)
+  - Added warnings if .zip files exist without any corresponding
+    .csv files. (#2908)
+  - Fix `FileNotFoundError` when the `download_dir` is
+    a non-existing nested folder (#2910)
+  - Rename omw to omw-1.4 (#2907)
+  - Resolve ReDoS opportunity by fixing incorrectly specified
+    regex (#2906, bsc#1191030, CVE-2021-3828).
+  - Support OMW 1.4 (#2899)
+  - Deprecate Tree get and set node methods (#2900)
+  - Fix broken inaugural test case (#2903)
+  - Use Multilingual Wordnet Data from OMW with newer Wordnet
+    versions (#2889)
+  - Keep NLTKs "tokenize" module working with pathlib (#2896)
+  - Make prettyprinter to be more readable (#2893)
+  - Update links to the nltk book (#2895)
+  - Add `CITATION.cff` to nltk (#2880)
+  - Resolve serious ReDoS in PunktSentenceTokenizer (#2869)
+  - Delete old CI config files (#2881)
+  - Improve Tokenize documentation + add TokenizerI as superclass
+    for TweetTokenizer (#2878)
+  - Fix expected value for BLEU score doctest after changes from
+    #2572
+  - Add multi Bleu functionality and tests (#2793)
+  - Deprecate 'return_str' parameter in NLTKWordTokenizer and
+    TreebankWordTokenizer (#2883)
+  - Allow empty string in CFG's + more (#2888)
+  - Partition `tree.py` module into `tree` package + pickle fix
+    (#2863)
+  - Fix several TreebankWordTokenizer and NLTKWordTokenizer bugs
+    (#2877)
+  - Rewind Wordnet data file after each lookup (#2868)
+  - Correct __init__ call for SyntaxCorpusReader subclasses
+    (#2872)
+  - Documentation fixes (#2873)
+  - Fix levenstein distance for duplicated letters (#2849)
+  - Support alternative Wordnet versions (#2860)
+  - Remove hundreds of formatting warnings for nltk.org (#2859)
+  - Modernize `nltk.org/howto` pages (#2856)
+  - Fix Bleu Score smoothing function from taking log(0) (#2839)
+  - Update third party tools to newer versions and removing
+    MaltParser fixed version (#2832)
+  - Fix TypeError: _pretty() takes 1 positional argument but 2
+    were given in sem/drt.py (#2854)
+  - Replace `http` with `https` in most URLs (#2852)
+- Update to 3.6.5
+  - modernised nltk.org website
+  - addressed LGTM.com issues
+  - support ZWJ sequences emoji and skin tone modifer emoji in
+    TweetTokenizer
+  - METEOR evaluation now requires pre-tokenized input
+  - Code linting and type hinting
+  - implement get_refs function for DrtLambdaExpression
+  - Enable automated CoreNLP, Senna, Prover9/Mace4, Megam,
+    MaltParser CI tests
+  - specify minimum regex version that supports regex.Pattern
+  - avoid re.Pattern and regex.Pattern which fail for Python 3.6,
+    3.7
+- Update to 3.6.4
+  - deprecate `nltk.usage(obj)` in favor of `help(obj)`
+  - resolve ReDoS vulnerability in Corpus Reader
+  - solidify performance tests
+  - improve phone number recognition in tweet tokenizer
+  - refactored CISTEM stemmer for German
+  - identify NLTK Team as the author
+  - replace travis badge with github actions badge
+  - add SECURITY.md
+- Update to 3.6.3
+  - Dropped support for Python 3.5
+  - Run CI tests on Windows, too
+  - Moved from Travis CI to GitHub Actions
+  - Code and comment cleanups
+  - Visualize WordNet relation graphs using Graphviz
+  - Fixed large error in METEOR score
+  - Apply isort, pyupgrade, black, added as pre-commit hooks
+  - Prevent debug_decisions in Punkt from throwing IndexError
+  - Resolved ZeroDivisionError in RIBES with dissimilar sentences
+  - Initialize WordNet IC total counts with smoothing value
+  - Fixed AttributeError for Arabic ARLSTem2 stemmer
+  - Many fixes and improvements to lm language model package
+  - Fix bug in nltk.metrics.aline, C_skip = -10
+  - Improvements to TweetTokenizer
+  - Optional show arg for FreqDist.plot, ConditionalFreqDist.plot
+  - edit_distance now computes Damerau-Levenshtein edit-distance
+- Update to 3.6.2
+  - move test code to nltk/test
+  - fix bug in NgramAssocMeasures (order preserving fix)
+- Update to 3.6
+  - add support for Python 3.9
+  - add Tree.fromlist
+  - compute Minimum Spanning Tree of unweighted graph using BFS
+  - fix bug with infinite loop in Wordnet closure and tree
+  - fix bug in calculating BLEU using smoothing method 4
+  - Wordnet synset similarities work for all pos
+  - new Arabic light stemmer (ARLSTem2)
+  - new syllable tokenizer (LegalitySyllableTokenizer)
+  - remove nose in favor of pytest
+
 -------------------------------------------------------------------
 Thu Apr 23 13:54:08 UTC 2020 - John Vandenberg <jayvdb@gmail.com>
 
diff --git a/python-nltk.spec b/python-nltk.spec
index 1d34cb5..a90c1ac 100644
--- a/python-nltk.spec
+++ b/python-nltk.spec
@@ -1,7 +1,7 @@
 #
 # spec file for package python-nltk
 #
-# Copyright (c) 2020 SUSE LINUX GmbH, Nuernberg, Germany.
+# Copyright (c) 2022 SUSE LLC
 #
 # All modifications and additions to the file contributed by third parties
 # remain the property of their copyright owners, unless otherwise agreed
@@ -20,7 +20,7 @@
 %define pyname nltk
 %define skip_python2 1
 Name:           python-nltk
-Version:        3.5
+Version:        3.7
 Release:        0
 Summary:        Natural Language Toolkit
 License:        Apache-2.0
@@ -44,8 +44,8 @@ Recommends:     python-requests
 Recommends:     python-scikit-learn
 Recommends:     python-scipy
 Recommends:     python-twython
-Requires(post):   update-alternatives
-Requires(postun):  update-alternatives
+Requires(post): update-alternatives
+Requires(postun):update-alternatives
 BuildArch:      noarch
 %python_subpackages
 
@@ -55,13 +55,13 @@ Python modules, data sets and tutorials supporting research and
 development in Natural Language Processing.
 
 %prep
-%setup -q -n %{pyname}-%{version}
+%autosetup -p1 -n %{pyname}-%{version}
 
-sed -i "1,4{/\/usr\/bin\/env/d}" nltk/corpus/reader/knbc.py
-sed -i "1,4{/\/usr\/bin\/env/d}" nltk/test/runtests.py
-sed -i "1,4{/\/usr\/bin\/env/d}" nltk/test/unit/test_tgrep.py
-sed -i "1,4{/\/usr\/bin\/env/d}" nltk/tgrep.py
-sed -i "1,4{/\/usr\/bin\/env/d}" nltk/tokenize/stanford_segmenter.py
+# sed -i "1,4{/\/usr\/bin\/env/d}" nltk/corpus/reader/knbc.py
+# sed -i "1,4{/\/usr\/bin\/env/d}" nltk/test/runtests.py
+# sed -i "1,4{/\/usr\/bin\/env/d}" nltk/test/unit/test_tgrep.py
+# sed -i "1,4{/\/usr\/bin\/env/d}" nltk/tgrep.py
+# sed -i "1,4{/\/usr\/bin\/env/d}" nltk/tokenize/stanford_segmenter.py
 
 %build
 %python_build