diff --git a/CVE-2024-39705-disable-download.patch b/CVE-2024-39705-disable-download.patch
new file mode 100644
index 0000000..917d451
--- /dev/null
+++ b/CVE-2024-39705-disable-download.patch
@@ -0,0 +1,104 @@
+---
+ nltk/app/chartparser_app.py    |   13 +++++++++++++
+ nltk/corpus/reader/util.py     |    2 ++
+ nltk/data.py                   |    2 ++
+ nltk/parse/transitionparser.py |    2 ++
+ nltk/tbl/demo.py               |    4 +++-
+ 5 files changed, 22 insertions(+), 1 deletion(-)
+
+--- a/nltk/app/chartparser_app.py
++++ b/nltk/app/chartparser_app.py
+@@ -800,6 +800,10 @@ class ChartComparer:
+             showerror("Error Saving Chart", f"Unable to open file: {filename!r}\n{e}")
+ 
+     def load_chart_dialog(self, *args):
++        showerror("Security Error",
++                  "Due to gh#nltk/nltk#3266, deserializing from " +
++                  "a pickle is forbidden.")
++        return
+         filename = askopenfilename(
+             filetypes=self.CHART_FILE_TYPES, defaultextension=".pickle"
+         )
+@@ -811,6 +815,8 @@ class ChartComparer:
+             showerror("Error Loading Chart", f"Unable to open file: {filename!r}\n{e}")
+ 
+     def load_chart(self, filename):
++        raise RuntimeError("Due to gh#nltk/nltk#3266, deserializing from " +
++                           "a pickle is forbidden.")
+         with open(filename, "rb") as infile:
+             chart = pickle.load(infile)
+         name = os.path.basename(filename)
+@@ -2268,6 +2274,10 @@ class ChartParserApp:
+         if not filename:
+             return
+         try:
++            showerror("Security Error",
++                      "Due to gh#nltk/nltk#3266, deserializing from " +
++                      "a pickle is forbidden.")
++            return
+             with open(filename, "rb") as infile:
+                 chart = pickle.load(infile)
+             self._chart = chart
+@@ -2306,6 +2316,9 @@ class ChartParserApp:
+             return
+         try:
+             if filename.endswith(".pickle"):
++                showerror("Due to gh#nltk/nltk#3266, deserializing from " +
++                          "a pickle is forbidden.")
++                return
+                 with open(filename, "rb") as infile:
+                     grammar = pickle.load(infile)
+             else:
+--- a/nltk/corpus/reader/util.py
++++ b/nltk/corpus/reader/util.py
+@@ -521,6 +521,8 @@ class PickleCorpusView(StreamBackedCorpu
+ 
+     def read_block(self, stream):
+         result = []
++        raise RuntimeError("Due to gh#nltk/nltk#3266, deserializing from " +
++                           "a pickle is forbidden.")
+         for i in range(self.BLOCK_SIZE):
+             try:
+                 result.append(pickle.load(stream))
+--- a/nltk/data.py
++++ b/nltk/data.py
+@@ -752,6 +752,8 @@ def load(
+     if format == "raw":
+         resource_val = opened_resource.read()
+     elif format == "pickle":
++        raise RuntimeError("Due to gh#nltk/nltk#3266, deserializing from " +
++                           "a pickle is forbidden.")
+         resource_val = pickle.load(opened_resource)
+     elif format == "json":
+         import json
+--- a/nltk/parse/transitionparser.py
++++ b/nltk/parse/transitionparser.py
+@@ -553,6 +553,8 @@ class TransitionParser(ParserI):
+         """
+         result = []
+         # First load the model
++        raise RuntimeError("Due to gh#nltk/nltk#3266, deserializing from " +
++                           "a pickle is forbidden.")
+         model = pickle.load(open(modelFile, "rb"))
+         operation = Transition(self._algorithm)
+ 
+--- a/nltk/tbl/demo.py
++++ b/nltk/tbl/demo.py
+@@ -253,6 +253,8 @@ def postag(
+                 )
+             )
+         with open(cache_baseline_tagger) as print_rules:
++            raise RuntimeError("Due to gh#nltk/nltk#3266, deserializing from " +
++                               "a pickle is forbidden.")
+             baseline_tagger = pickle.load(print_rules)
+             print(f"Reloaded pickled tagger from {cache_baseline_tagger}")
+     else:
+@@ -327,7 +329,7 @@ def postag(
+         with open(serialize_output) as print_rules:
+             brill_tagger_reloaded = pickle.load(print_rules)
+         print(f"Reloaded pickled tagger from {serialize_output}")
+-        taggedtest_reloaded = brill_tagger.tag_sents(testing_data)
++        taggedtest_reloaded = brill_tagger_reloaded.tag_sents(testing_data)
+         if taggedtest == taggedtest_reloaded:
+             print("Reloaded tagger tried on test set, results identical")
+         else:
diff --git a/nltk-3.8.1.tar.gz b/nltk-3.8.1.tar.gz
new file mode 100644
index 0000000..47e3c7c
--- /dev/null
+++ b/nltk-3.8.1.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:676970e2b7aa0a7184e68f76e0c4f2756fd1b82559a509d5656a23117faeb658
+size 2867926
diff --git a/nltk-3.8.1.zip b/nltk-3.8.1.zip
deleted file mode 100644
index 79456a9..0000000
--- a/nltk-3.8.1.zip
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:1834da3d0682cba4f2cede2f9aad6b0fafb6461ba451db0efb6f9c39798d64d3
-size 4620388
diff --git a/nltk-pr3207-py312.patch b/nltk-pr3207-py312.patch
index 75f2bff..8524834 100644
--- a/nltk-pr3207-py312.patch
+++ b/nltk-pr3207-py312.patch
@@ -4,729 +4,62 @@ Date: Thu, 16 Nov 2023 19:00:15 +0100
 Subject: [PATCH 1/8] ci: enable 3.12 in ci tests
 
 ---
- .github/workflows/ci.yaml | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
+ .github/workflows/ci.yaml             |    2 +-
+ README.md                             |    2 +-
+ nltk/test/unit/translate/test_bleu.py |    1 -
+ nltk/translate/bleu_score.py          |   29 +++++++++++++++++++++++++++--
+ setup.py                              |    3 ++-
+ 5 files changed, 31 insertions(+), 6 deletions(-)
 
-Index: nltk-3.8.1/nltk/test/unit/translate/test_bleu.py
-===================================================================
---- nltk-3.8.1.orig/nltk/test/unit/translate/test_bleu.py
-+++ nltk-3.8.1/nltk/test/unit/translate/test_bleu.py
+--- a/.github/workflows/ci.yaml
++++ b/.github/workflows/ci.yaml
+@@ -76,7 +76,7 @@ jobs:
+     needs: [cache_nltk_data, cache_third_party]
+     strategy:
+       matrix:
+-        python-version: ['3.7', '3.8', '3.9', '3.10', '3.11']
++        python-version: ['3.7', '3.8', '3.9', '3.10', '3.11', '3.12']
+         os: [ubuntu-latest, macos-latest, windows-latest]
+       fail-fast: false
+     runs-on: ${{ matrix.os }}
+--- a/README.md
++++ b/README.md
+@@ -4,7 +4,7 @@
+ 
+ NLTK -- the Natural Language Toolkit -- is a suite of open source Python
+ modules, data sets, and tutorials supporting research and development in Natural
+-Language Processing. NLTK requires Python version 3.7, 3.8, 3.9, 3.10 or 3.11.
++Language Processing. NLTK requires Python version 3.7, 3.8, 3.9, 3.10, 3.11 or 3.12.
+ 
+ For documentation, please visit [nltk.org](https://www.nltk.org/).
+ 
+--- a/nltk/test/unit/translate/test_bleu.py
++++ b/nltk/test/unit/translate/test_bleu.py
 @@ -2,7 +2,6 @@
- Tests for BLEU translation evaluation metric
- """
- 
--import io
- import unittest
- 
- from nltk.data import find
-Index: nltk-3.8.1/nltk/translate/bleu_score.py
-===================================================================
---- nltk-3.8.1.orig/nltk/translate/bleu_score.py
-+++ nltk-3.8.1/nltk/translate/bleu_score.py
-@@ -1,685 +1,710 @@
--# Natural Language Toolkit: BLEU Score
--#
--# Copyright (C) 2001-2023 NLTK Project
--# Authors: Chin Yee Lee, Hengfeng Li, Ruxin Hou, Calvin Tanujaya Lim
--# Contributors: Björn Mattsson, Dmitrijs Milajevs, Liling Tan
--# URL: 
--# For license information, see LICENSE.TXT
--
--"""BLEU score implementation."""
--
--import math
--import sys
--import warnings
--from collections import Counter
--from fractions import Fraction
--
--from nltk.util import ngrams
--
--
--def sentence_bleu(
--    references,
--    hypothesis,
--    weights=(0.25, 0.25, 0.25, 0.25),
--    smoothing_function=None,
--    auto_reweigh=False,
--):
--    """
--    Calculate BLEU score (Bilingual Evaluation Understudy) from
--    Papineni, Kishore, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002.
--    "BLEU: a method for automatic evaluation of machine translation."
--    In Proceedings of ACL. https://www.aclweb.org/anthology/P02-1040.pdf
--
--    >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
--    ...               'ensures', 'that', 'the', 'military', 'always',
--    ...               'obeys', 'the', 'commands', 'of', 'the', 'party']
--
--    >>> hypothesis2 = ['It', 'is', 'to', 'insure', 'the', 'troops',
--    ...               'forever', 'hearing', 'the', 'activity', 'guidebook',
--    ...               'that', 'party', 'direct']
--
--    >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
--    ...               'ensures', 'that', 'the', 'military', 'will', 'forever',
--    ...               'heed', 'Party', 'commands']
--
--    >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which',
--    ...               'guarantees', 'the', 'military', 'forces', 'always',
--    ...               'being', 'under', 'the', 'command', 'of', 'the',
--    ...               'Party']
--
--    >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
--    ...               'army', 'always', 'to', 'heed', 'the', 'directions',
--    ...               'of', 'the', 'party']
--
--    >>> sentence_bleu([reference1, reference2, reference3], hypothesis1) # doctest: +ELLIPSIS
--    0.5045...
--
--    If there is no ngrams overlap for any order of n-grams, BLEU returns the
--    value 0. This is because the precision for the order of n-grams without
--    overlap is 0, and the geometric mean in the final BLEU score computation
--    multiplies the 0 with the precision of other n-grams. This results in 0
--    (independently of the precision of the other n-gram orders). The following
--    example has zero 3-gram and 4-gram overlaps:
--
--    >>> round(sentence_bleu([reference1, reference2, reference3], hypothesis2),4) # doctest: +ELLIPSIS
--    0.0
--
--    To avoid this harsh behaviour when no ngram overlaps are found a smoothing
--    function can be used.
--
--    >>> chencherry = SmoothingFunction()
--    >>> sentence_bleu([reference1, reference2, reference3], hypothesis2,
--    ...     smoothing_function=chencherry.method1) # doctest: +ELLIPSIS
--    0.0370...
--
--    The default BLEU calculates a score for up to 4-grams using uniform
--    weights (this is called BLEU-4). To evaluate your translations with
--    higher/lower order ngrams, use customized weights. E.g. when accounting
--    for up to 5-grams with uniform weights (this is called BLEU-5) use:
--
--    >>> weights = (1./5., 1./5., 1./5., 1./5., 1./5.)
--    >>> sentence_bleu([reference1, reference2, reference3], hypothesis1, weights) # doctest: +ELLIPSIS
--    0.3920...
--
--    Multiple BLEU scores can be computed at once, by supplying a list of weights.
--    E.g. for computing BLEU-2, BLEU-3 *and* BLEU-4 in one computation, use:
--    >>> weights = [
--    ...     (1./2., 1./2.),
--    ...     (1./3., 1./3., 1./3.),
--    ...     (1./4., 1./4., 1./4., 1./4.)
--    ... ]
--    >>> sentence_bleu([reference1, reference2, reference3], hypothesis1, weights) # doctest: +ELLIPSIS
--    [0.7453..., 0.6240..., 0.5045...]
--
--    :param references: reference sentences
--    :type references: list(list(str))
--    :param hypothesis: a hypothesis sentence
--    :type hypothesis: list(str)
--    :param weights: weights for unigrams, bigrams, trigrams and so on (one or a list of weights)
--    :type weights: tuple(float) / list(tuple(float))
--    :param smoothing_function:
--    :type smoothing_function: SmoothingFunction
--    :param auto_reweigh: Option to re-normalize the weights uniformly.
--    :type auto_reweigh: bool
--    :return: The sentence-level BLEU score. Returns a list if multiple weights were supplied.
--    :rtype: float / list(float)
--    """
--    return corpus_bleu(
--        [references], [hypothesis], weights, smoothing_function, auto_reweigh
--    )
--
--
--def corpus_bleu(
--    list_of_references,
--    hypotheses,
--    weights=(0.25, 0.25, 0.25, 0.25),
--    smoothing_function=None,
--    auto_reweigh=False,
--):
--    """
--    Calculate a single corpus-level BLEU score (aka. system-level BLEU) for all
--    the hypotheses and their respective references.
--
--    Instead of averaging the sentence level BLEU scores (i.e. macro-average
--    precision), the original BLEU metric (Papineni et al. 2002) accounts for
--    the micro-average precision (i.e. summing the numerators and denominators
--    for each hypothesis-reference(s) pairs before the division).
--
--    >>> hyp1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
--    ...         'ensures', 'that', 'the', 'military', 'always',
--    ...         'obeys', 'the', 'commands', 'of', 'the', 'party']
--    >>> ref1a = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
--    ...          'ensures', 'that', 'the', 'military', 'will', 'forever',
--    ...          'heed', 'Party', 'commands']
--    >>> ref1b = ['It', 'is', 'the', 'guiding', 'principle', 'which',
--    ...          'guarantees', 'the', 'military', 'forces', 'always',
--    ...          'being', 'under', 'the', 'command', 'of', 'the', 'Party']
--    >>> ref1c = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
--    ...          'army', 'always', 'to', 'heed', 'the', 'directions',
--    ...          'of', 'the', 'party']
--
--    >>> hyp2 = ['he', 'read', 'the', 'book', 'because', 'he', 'was',
--    ...         'interested', 'in', 'world', 'history']
--    >>> ref2a = ['he', 'was', 'interested', 'in', 'world', 'history',
--    ...          'because', 'he', 'read', 'the', 'book']
--
--    >>> list_of_references = [[ref1a, ref1b, ref1c], [ref2a]]
--    >>> hypotheses = [hyp1, hyp2]
--    >>> corpus_bleu(list_of_references, hypotheses) # doctest: +ELLIPSIS
--    0.5920...
--
--    The example below show that corpus_bleu() is different from averaging
--    sentence_bleu() for hypotheses
--
--    >>> score1 = sentence_bleu([ref1a, ref1b, ref1c], hyp1)
--    >>> score2 = sentence_bleu([ref2a], hyp2)
--    >>> (score1 + score2) / 2 # doctest: +ELLIPSIS
--    0.6223...
--
--    Custom weights may be supplied to fine-tune the BLEU score further.
--    A tuple of float weights for unigrams, bigrams, trigrams and so on can be given.
--    >>> weights = (0.1, 0.3, 0.5, 0.1)
--    >>> corpus_bleu(list_of_references, hypotheses, weights=weights) # doctest: +ELLIPSIS
--    0.5818...
--
--    This particular weight gave extra value to trigrams.
--    Furthermore, multiple weights can be given, resulting in multiple BLEU scores.
--    >>> weights = [
--    ...     (0.5, 0.5),
--    ...     (0.333, 0.333, 0.334),
--    ...     (0.25, 0.25, 0.25, 0.25),
--    ...     (0.2, 0.2, 0.2, 0.2, 0.2)
--    ... ]
--    >>> corpus_bleu(list_of_references, hypotheses, weights=weights) # doctest: +ELLIPSIS
--    [0.8242..., 0.7067..., 0.5920..., 0.4719...]
--
--    :param list_of_references: a corpus of lists of reference sentences, w.r.t. hypotheses
--    :type list_of_references: list(list(list(str)))
--    :param hypotheses: a list of hypothesis sentences
--    :type hypotheses: list(list(str))
--    :param weights: weights for unigrams, bigrams, trigrams and so on (one or a list of weights)
--    :type weights: tuple(float) / list(tuple(float))
--    :param smoothing_function:
--    :type smoothing_function: SmoothingFunction
--    :param auto_reweigh: Option to re-normalize the weights uniformly.
--    :type auto_reweigh: bool
--    :return: The corpus-level BLEU score.
--    :rtype: float
--    """
--    # Before proceeding to compute BLEU, perform sanity checks.
--
--    p_numerators = Counter()  # Key = ngram order, and value = no. of ngram matches.
--    p_denominators = Counter()  # Key = ngram order, and value = no. of ngram in ref.
--    hyp_lengths, ref_lengths = 0, 0
--
--    assert len(list_of_references) == len(hypotheses), (
--        "The number of hypotheses and their reference(s) should be the " "same "
--    )
--
--    try:
--        weights[0][0]
--    except TypeError:
--        weights = [weights]
--    max_weight_length = max(len(weight) for weight in weights)
--
--    # Iterate through each hypothesis and their corresponding references.
--    for references, hypothesis in zip(list_of_references, hypotheses):
--        # For each order of ngram, calculate the numerator and
--        # denominator for the corpus-level modified precision.
--        for i in range(1, max_weight_length + 1):
--            p_i = modified_precision(references, hypothesis, i)
--            p_numerators[i] += p_i.numerator
--            p_denominators[i] += p_i.denominator
--
--        # Calculate the hypothesis length and the closest reference length.
--        # Adds them to the corpus-level hypothesis and reference counts.
--        hyp_len = len(hypothesis)
--        hyp_lengths += hyp_len
--        ref_lengths += closest_ref_length(references, hyp_len)
--
--    # Calculate corpus-level brevity penalty.
--    bp = brevity_penalty(ref_lengths, hyp_lengths)
--
--    # Collects the various precision values for the different ngram orders.
--    p_n = [
--        Fraction(p_numerators[i], p_denominators[i], _normalize=False)
--        for i in range(1, max_weight_length + 1)
--    ]
--
--    # Returns 0 if there's no matching n-grams
--    # We only need to check for p_numerators[1] == 0, since if there's
--    # no unigrams, there won't be any higher order ngrams.
--    if p_numerators[1] == 0:
--        return 0 if len(weights) == 1 else [0] * len(weights)
--
--    # If there's no smoothing, set use method0 from SmoothinFunction class.
--    if not smoothing_function:
--        smoothing_function = SmoothingFunction().method0
--    # Smoothen the modified precision.
--    # Note: smoothing_function() may convert values into floats;
--    #       it tries to retain the Fraction object as much as the
--    #       smoothing method allows.
--    p_n = smoothing_function(
--        p_n, references=references, hypothesis=hypothesis, hyp_len=hyp_lengths
--    )
--
--    bleu_scores = []
--    for weight in weights:
--        # Uniformly re-weighting based on maximum hypothesis lengths if largest
--        # order of n-grams < 4 and weights is set at default.
--        if auto_reweigh:
--            if hyp_lengths < 4 and weight == (0.25, 0.25, 0.25, 0.25):
--                weight = (1 / hyp_lengths,) * hyp_lengths
--
--        s = (w_i * math.log(p_i) for w_i, p_i in zip(weight, p_n) if p_i > 0)
--        s = bp * math.exp(math.fsum(s))
--        bleu_scores.append(s)
--    return bleu_scores[0] if len(weights) == 1 else bleu_scores
--
--
--def modified_precision(references, hypothesis, n):
--    """
--    Calculate modified ngram precision.
--
--    The normal precision method may lead to some wrong translations with
--    high-precision, e.g., the translation, in which a word of reference
--    repeats several times, has very high precision.
--
--    This function only returns the Fraction object that contains the numerator
--    and denominator necessary to calculate the corpus-level precision.
--    To calculate the modified precision for a single pair of hypothesis and
--    references, cast the Fraction object into a float.
--
--    The famous "the the the ... " example shows that you can get BLEU precision
--    by duplicating high frequency words.
--
--        >>> reference1 = 'the cat is on the mat'.split()
--        >>> reference2 = 'there is a cat on the mat'.split()
--        >>> hypothesis1 = 'the the the the the the the'.split()
--        >>> references = [reference1, reference2]
--        >>> float(modified_precision(references, hypothesis1, n=1)) # doctest: +ELLIPSIS
--        0.2857...
--
--    In the modified n-gram precision, a reference word will be considered
--    exhausted after a matching hypothesis word is identified, e.g.
--
--        >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
--        ...               'ensures', 'that', 'the', 'military', 'will',
--        ...               'forever', 'heed', 'Party', 'commands']
--        >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which',
--        ...               'guarantees', 'the', 'military', 'forces', 'always',
--        ...               'being', 'under', 'the', 'command', 'of', 'the',
--        ...               'Party']
--        >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
--        ...               'army', 'always', 'to', 'heed', 'the', 'directions',
--        ...               'of', 'the', 'party']
--        >>> hypothesis = 'of the'.split()
--        >>> references = [reference1, reference2, reference3]
--        >>> float(modified_precision(references, hypothesis, n=1))
--        1.0
--        >>> float(modified_precision(references, hypothesis, n=2))
--        1.0
--
--    An example of a normal machine translation hypothesis:
--
--        >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
--        ...               'ensures', 'that', 'the', 'military', 'always',
--        ...               'obeys', 'the', 'commands', 'of', 'the', 'party']
--
--        >>> hypothesis2 = ['It', 'is', 'to', 'insure', 'the', 'troops',
--        ...               'forever', 'hearing', 'the', 'activity', 'guidebook',
--        ...               'that', 'party', 'direct']
--
--        >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
--        ...               'ensures', 'that', 'the', 'military', 'will',
--        ...               'forever', 'heed', 'Party', 'commands']
--
--        >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which',
--        ...               'guarantees', 'the', 'military', 'forces', 'always',
--        ...               'being', 'under', 'the', 'command', 'of', 'the',
--        ...               'Party']
--
--        >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
--        ...               'army', 'always', 'to', 'heed', 'the', 'directions',
--        ...               'of', 'the', 'party']
--        >>> references = [reference1, reference2, reference3]
--        >>> float(modified_precision(references, hypothesis1, n=1)) # doctest: +ELLIPSIS
--        0.9444...
--        >>> float(modified_precision(references, hypothesis2, n=1)) # doctest: +ELLIPSIS
--        0.5714...
--        >>> float(modified_precision(references, hypothesis1, n=2)) # doctest: +ELLIPSIS
--        0.5882352941176471
--        >>> float(modified_precision(references, hypothesis2, n=2)) # doctest: +ELLIPSIS
--        0.07692...
--
--
--    :param references: A list of reference translations.
--    :type references: list(list(str))
--    :param hypothesis: A hypothesis translation.
--    :type hypothesis: list(str)
--    :param n: The ngram order.
--    :type n: int
--    :return: BLEU's modified precision for the nth order ngram.
--    :rtype: Fraction
--    """
--    # Extracts all ngrams in hypothesis
--    # Set an empty Counter if hypothesis is empty.
--    counts = Counter(ngrams(hypothesis, n)) if len(hypothesis) >= n else Counter()
--    # Extract a union of references' counts.
--    # max_counts = reduce(or_, [Counter(ngrams(ref, n)) for ref in references])
--    max_counts = {}
--    for reference in references:
--        reference_counts = (
--            Counter(ngrams(reference, n)) if len(reference) >= n else Counter()
--        )
--        for ngram in counts:
--            max_counts[ngram] = max(max_counts.get(ngram, 0), reference_counts[ngram])
--
--    # Assigns the intersection between hypothesis and references' counts.
--    clipped_counts = {
--        ngram: min(count, max_counts[ngram]) for ngram, count in counts.items()
--    }
--
--    numerator = sum(clipped_counts.values())
--    # Ensures that denominator is minimum 1 to avoid ZeroDivisionError.
--    # Usually this happens when the ngram order is > len(reference).
--    denominator = max(1, sum(counts.values()))
--
--    return Fraction(numerator, denominator, _normalize=False)
--
--
--def closest_ref_length(references, hyp_len):
--    """
--    This function finds the reference that is the closest length to the
--    hypothesis. The closest reference length is referred to as *r* variable
--    from the brevity penalty formula in Papineni et. al. (2002)
--
--    :param references: A list of reference translations.
--    :type references: list(list(str))
--    :param hyp_len: The length of the hypothesis.
--    :type hyp_len: int
--    :return: The length of the reference that's closest to the hypothesis.
--    :rtype: int
--    """
--    ref_lens = (len(reference) for reference in references)
--    closest_ref_len = min(
--        ref_lens, key=lambda ref_len: (abs(ref_len - hyp_len), ref_len)
--    )
--    return closest_ref_len
--
--
--def brevity_penalty(closest_ref_len, hyp_len):
--    """
--    Calculate brevity penalty.
--
--    As the modified n-gram precision still has the problem from the short
--    length sentence, brevity penalty is used to modify the overall BLEU
--    score according to length.
--
--    An example from the paper. There are three references with length 12, 15
--    and 17. And a concise hypothesis of the length 12. The brevity penalty is 1.
--
--    >>> reference1 = list('aaaaaaaaaaaa')      # i.e. ['a'] * 12
--    >>> reference2 = list('aaaaaaaaaaaaaaa')   # i.e. ['a'] * 15
--    >>> reference3 = list('aaaaaaaaaaaaaaaaa') # i.e. ['a'] * 17
--    >>> hypothesis = list('aaaaaaaaaaaa')      # i.e. ['a'] * 12
--    >>> references = [reference1, reference2, reference3]
--    >>> hyp_len = len(hypothesis)
--    >>> closest_ref_len =  closest_ref_length(references, hyp_len)
--    >>> brevity_penalty(closest_ref_len, hyp_len)
--    1.0
--
--    In case a hypothesis translation is shorter than the references, penalty is
--    applied.
--
--    >>> references = [['a'] * 28, ['a'] * 28]
--    >>> hypothesis = ['a'] * 12
--    >>> hyp_len = len(hypothesis)
--    >>> closest_ref_len =  closest_ref_length(references, hyp_len)
--    >>> brevity_penalty(closest_ref_len, hyp_len)
--    0.2635971381157267
--
--    The length of the closest reference is used to compute the penalty. If the
--    length of a hypothesis is 12, and the reference lengths are 13 and 2, the
--    penalty is applied because the hypothesis length (12) is less then the
--    closest reference length (13).
--
--    >>> references = [['a'] * 13, ['a'] * 2]
--    >>> hypothesis = ['a'] * 12
--    >>> hyp_len = len(hypothesis)
--    >>> closest_ref_len =  closest_ref_length(references, hyp_len)
--    >>> brevity_penalty(closest_ref_len, hyp_len) # doctest: +ELLIPSIS
--    0.9200...
--
--    The brevity penalty doesn't depend on reference order. More importantly,
--    when two reference sentences are at the same distance, the shortest
--    reference sentence length is used.
--
--    >>> references = [['a'] * 13, ['a'] * 11]
--    >>> hypothesis = ['a'] * 12
--    >>> hyp_len = len(hypothesis)
--    >>> closest_ref_len =  closest_ref_length(references, hyp_len)
--    >>> bp1 = brevity_penalty(closest_ref_len, hyp_len)
--    >>> hyp_len = len(hypothesis)
--    >>> closest_ref_len =  closest_ref_length(reversed(references), hyp_len)
--    >>> bp2 = brevity_penalty(closest_ref_len, hyp_len)
--    >>> bp1 == bp2 == 1
--    True
--
--    A test example from mteval-v13a.pl (starting from the line 705):
--
--    >>> references = [['a'] * 11, ['a'] * 8]
--    >>> hypothesis = ['a'] * 7
--    >>> hyp_len = len(hypothesis)
--    >>> closest_ref_len =  closest_ref_length(references, hyp_len)
--    >>> brevity_penalty(closest_ref_len, hyp_len) # doctest: +ELLIPSIS
--    0.8668...
--
--    >>> references = [['a'] * 11, ['a'] * 8, ['a'] * 6, ['a'] * 7]
--    >>> hypothesis = ['a'] * 7
--    >>> hyp_len = len(hypothesis)
--    >>> closest_ref_len =  closest_ref_length(references, hyp_len)
--    >>> brevity_penalty(closest_ref_len, hyp_len)
--    1.0
--
--    :param hyp_len: The length of the hypothesis for a single sentence OR the
--        sum of all the hypotheses' lengths for a corpus
--    :type hyp_len: int
--    :param closest_ref_len: The length of the closest reference for a single
--        hypothesis OR the sum of all the closest references for every hypotheses.
--    :type closest_ref_len: int
--    :return: BLEU's brevity penalty.
--    :rtype: float
--    """
--    if hyp_len > closest_ref_len:
--        return 1
--    # If hypothesis is empty, brevity penalty = 0 should result in BLEU = 0.0
--    elif hyp_len == 0:
--        return 0
--    else:
--        return math.exp(1 - closest_ref_len / hyp_len)
--
--
--class SmoothingFunction:
--    """
--    This is an implementation of the smoothing techniques
--    for segment-level BLEU scores that was presented in
--    Boxing Chen and Collin Cherry (2014) A Systematic Comparison of
--    Smoothing Techniques for Sentence-Level BLEU. In WMT14.
--    http://acl2014.org/acl2014/W14-33/pdf/W14-3346.pdf
--    """
--
--    def __init__(self, epsilon=0.1, alpha=5, k=5):
--        """
--        This will initialize the parameters required for the various smoothing
--        techniques, the default values are set to the numbers used in the
--        experiments from Chen and Cherry (2014).
--
--        >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which', 'ensures',
--        ...                 'that', 'the', 'military', 'always', 'obeys', 'the',
--        ...                 'commands', 'of', 'the', 'party']
--        >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that', 'ensures',
--        ...               'that', 'the', 'military', 'will', 'forever', 'heed',
--        ...               'Party', 'commands']
--
--        >>> chencherry = SmoothingFunction()
--        >>> print(sentence_bleu([reference1], hypothesis1)) # doctest: +ELLIPSIS
--        0.4118...
--        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method0)) # doctest: +ELLIPSIS
--        0.4118...
--        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method1)) # doctest: +ELLIPSIS
--        0.4118...
--        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method2)) # doctest: +ELLIPSIS
--        0.4452...
--        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method3)) # doctest: +ELLIPSIS
--        0.4118...
--        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method4)) # doctest: +ELLIPSIS
--        0.4118...
--        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method5)) # doctest: +ELLIPSIS
--        0.4905...
--        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method6)) # doctest: +ELLIPSIS
--        0.4135...
--        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method7)) # doctest: +ELLIPSIS
--        0.4905...
--
--        :param epsilon: the epsilon value use in method 1
--        :type epsilon: float
--        :param alpha: the alpha value use in method 6
--        :type alpha: int
--        :param k: the k value use in method 4
--        :type k: int
--        """
--        self.epsilon = epsilon
--        self.alpha = alpha
--        self.k = k
--
--    def method0(self, p_n, *args, **kwargs):
--        """
--        No smoothing.
--        """
--        p_n_new = []
--        for i, p_i in enumerate(p_n):
--            if p_i.numerator != 0:
--                p_n_new.append(p_i)
--            else:
--                _msg = str(
--                    "\nThe hypothesis contains 0 counts of {}-gram overlaps.\n"
--                    "Therefore the BLEU score evaluates to 0, independently of\n"
--                    "how many N-gram overlaps of lower order it contains.\n"
--                    "Consider using lower n-gram order or use "
--                    "SmoothingFunction()"
--                ).format(i + 1)
--                warnings.warn(_msg)
--                # When numerator==0 where denonminator==0 or !=0, the result
--                # for the precision score should be equal to 0 or undefined.
--                # Due to BLEU geometric mean computation in logarithm space,
--                # we we need to take the return sys.float_info.min such that
--                # math.log(sys.float_info.min) returns a 0 precision score.
--                p_n_new.append(sys.float_info.min)
--        return p_n_new
--
--    def method1(self, p_n, *args, **kwargs):
--        """
--        Smoothing method 1: Add *epsilon* counts to precision with 0 counts.
--        """
--        return [
--            (p_i.numerator + self.epsilon) / p_i.denominator
--            if p_i.numerator == 0
--            else p_i
--            for p_i in p_n
--        ]
--
--    def method2(self, p_n, *args, **kwargs):
--        """
--        Smoothing method 2: Add 1 to both numerator and denominator from
--        Chin-Yew Lin and Franz Josef Och (2004) ORANGE: a Method for
--        Evaluating Automatic Evaluation Metrics for Machine Translation.
--        In COLING 2004.
--        """
--        return [
--            Fraction(p_n[i].numerator + 1, p_n[i].denominator + 1, _normalize=False)
--            if i != 0
--            else p_n[0]
--            for i in range(len(p_n))
--        ]
--
--    def method3(self, p_n, *args, **kwargs):
--        """
--        Smoothing method 3: NIST geometric sequence smoothing
--        The smoothing is computed by taking 1 / ( 2^k ), instead of 0, for each
--        precision score whose matching n-gram count is null.
--        k is 1 for the first 'n' value for which the n-gram match count is null/
--
--        For example, if the text contains:
--
--        - one 2-gram match
--        - and (consequently) two 1-gram matches
--
--        the n-gram count for each individual precision score would be:
--
--        - n=1  =>  prec_count = 2     (two unigrams)
--        - n=2  =>  prec_count = 1     (one bigram)
--        - n=3  =>  prec_count = 1/2   (no trigram,  taking 'smoothed' value of 1 / ( 2^k ), with k=1)
--        - n=4  =>  prec_count = 1/4   (no fourgram, taking 'smoothed' value of 1 / ( 2^k ), with k=2)
--        """
--        incvnt = 1  # From the mteval-v13a.pl, it's referred to as k.
--        for i, p_i in enumerate(p_n):
--            if p_i.numerator == 0:
--                p_n[i] = 1 / (2**incvnt * p_i.denominator)
--                incvnt += 1
--        return p_n
--
--    def method4(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
--        """
--        Smoothing method 4:
--        Shorter translations may have inflated precision values due to having
--        smaller denominators; therefore, we give them proportionally
--        smaller smoothed counts. Instead of scaling to 1/(2^k), Chen and Cherry
--        suggests dividing by 1/ln(len(T)), where T is the length of the translation.
--        """
--        incvnt = 1
--        hyp_len = hyp_len if hyp_len else len(hypothesis)
--        for i, p_i in enumerate(p_n):
--            if p_i.numerator == 0 and hyp_len > 1:
--                # incvnt = i + 1 * self.k / math.log(
--                #     hyp_len
--                # )  # Note that this K is different from the K from NIST.
--                # p_n[i] = incvnt / p_i.denominator\
--                numerator = 1 / (2**incvnt * self.k / math.log(hyp_len))
--                p_n[i] = numerator / p_i.denominator
--                incvnt += 1
--        return p_n
--
--    def method5(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
--        """
--        Smoothing method 5:
--        The matched counts for similar values of n should be similar. To a
--        calculate the n-gram matched count, it averages the n−1, n and n+1 gram
--        matched counts.
--        """
--        hyp_len = hyp_len if hyp_len else len(hypothesis)
--        m = {}
--        # Requires an precision value for an addition ngram order.
--        p_n_plus1 = p_n + [modified_precision(references, hypothesis, 5)]
--        m[-1] = p_n[0] + 1
--        for i, p_i in enumerate(p_n):
--            p_n[i] = (m[i - 1] + p_i + p_n_plus1[i + 1]) / 3
--            m[i] = p_n[i]
--        return p_n
--
--    def method6(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
--        """
--        Smoothing method 6:
--        Interpolates the maximum likelihood estimate of the precision *p_n* with
--        a prior estimate *pi0*. The prior is estimated by assuming that the ratio
--        between pn and pn−1 will be the same as that between pn−1 and pn−2; from
--        Gao and He (2013) Training MRF-Based Phrase Translation Models using
--        Gradient Ascent. In NAACL.
--        """
--        hyp_len = hyp_len if hyp_len else len(hypothesis)
--        # This smoothing only works when p_1 and p_2 is non-zero.
--        # Raise an error with an appropriate message when the input is too short
--        # to use this smoothing technique.
--        assert p_n[2], "This smoothing method requires non-zero precision for bigrams."
--        for i, p_i in enumerate(p_n):
--            if i in [0, 1]:  # Skips the first 2 orders of ngrams.
--                continue
--            else:
--                pi0 = 0 if p_n[i - 2] == 0 else p_n[i - 1] ** 2 / p_n[i - 2]
--                # No. of ngrams in translation that matches the reference.
--                m = p_i.numerator
--                # No. of ngrams in translation.
--                l = sum(1 for _ in ngrams(hypothesis, i + 1))
--                # Calculates the interpolated precision.
--                p_n[i] = (m + self.alpha * pi0) / (l + self.alpha)
--        return p_n
--
--    def method7(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
--        """
--        Smoothing method 7:
--        Interpolates methods 4 and 5.
--        """
--        hyp_len = hyp_len if hyp_len else len(hypothesis)
--        p_n = self.method4(p_n, references, hypothesis, hyp_len)
--        p_n = self.method5(p_n, references, hypothesis, hyp_len)
--        return p_n
-+# Natural Language Toolkit: BLEU Score
-+#
-+# Copyright (C) 2001-2023 NLTK Project
-+# Authors: Chin Yee Lee, Hengfeng Li, Ruxin Hou, Calvin Tanujaya Lim
-+# Contributors: Björn Mattsson, Dmitrijs Milajevs, Liling Tan
-+# URL: 
-+# For license information, see LICENSE.TXT
-+
-+"""BLEU score implementation."""
-+import math
-+import sys
-+import warnings
-+from collections import Counter
+ Tests for BLEU translation evaluation metric
+ """
+ 
+-import io
+ import unittest
+ 
+ from nltk.data import find
+--- a/nltk/translate/bleu_score.py
++++ b/nltk/translate/bleu_score.py
+@@ -7,16 +7,41 @@
+ # For license information, see LICENSE.TXT
+ 
+ """BLEU score implementation."""
+-
+ import math
+ import sys
+ import warnings
+ from collections import Counter
+-from fractions import Fraction
 +from fractions import Fraction as _Fraction
-+
-+from nltk.util import ngrams
-+
-+
+ 
+ from nltk.util import ngrams
+ 
+ 
 +class Fraction(_Fraction):
 +    """Fraction with _normalize=False support for 3.12"""
 +
@@ -753,1030 +86,25 @@ Index: nltk-3.8.1/nltk/translate/bleu_score.py
 +        return super().denominator
 +
 +
-+def sentence_bleu(
-+    references,
-+    hypothesis,
-+    weights=(0.25, 0.25, 0.25, 0.25),
-+    smoothing_function=None,
-+    auto_reweigh=False,
-+):
-+    """
-+    Calculate BLEU score (Bilingual Evaluation Understudy) from
-+    Papineni, Kishore, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002.
-+    "BLEU: a method for automatic evaluation of machine translation."
-+    In Proceedings of ACL. https://www.aclweb.org/anthology/P02-1040.pdf
-+
-+    >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
-+    ...               'ensures', 'that', 'the', 'military', 'always',
-+    ...               'obeys', 'the', 'commands', 'of', 'the', 'party']
-+
-+    >>> hypothesis2 = ['It', 'is', 'to', 'insure', 'the', 'troops',
-+    ...               'forever', 'hearing', 'the', 'activity', 'guidebook',
-+    ...               'that', 'party', 'direct']
-+
-+    >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
-+    ...               'ensures', 'that', 'the', 'military', 'will', 'forever',
-+    ...               'heed', 'Party', 'commands']
-+
-+    >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which',
-+    ...               'guarantees', 'the', 'military', 'forces', 'always',
-+    ...               'being', 'under', 'the', 'command', 'of', 'the',
-+    ...               'Party']
-+
-+    >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
-+    ...               'army', 'always', 'to', 'heed', 'the', 'directions',
-+    ...               'of', 'the', 'party']
-+
-+    >>> sentence_bleu([reference1, reference2, reference3], hypothesis1) # doctest: +ELLIPSIS
-+    0.5045...
-+
-+    If there is no ngrams overlap for any order of n-grams, BLEU returns the
-+    value 0. This is because the precision for the order of n-grams without
-+    overlap is 0, and the geometric mean in the final BLEU score computation
-+    multiplies the 0 with the precision of other n-grams. This results in 0
-+    (independently of the precision of the other n-gram orders). The following
-+    example has zero 3-gram and 4-gram overlaps:
-+
-+    >>> round(sentence_bleu([reference1, reference2, reference3], hypothesis2),4) # doctest: +ELLIPSIS
-+    0.0
-+
-+    To avoid this harsh behaviour when no ngram overlaps are found a smoothing
-+    function can be used.
-+
-+    >>> chencherry = SmoothingFunction()
-+    >>> sentence_bleu([reference1, reference2, reference3], hypothesis2,
-+    ...     smoothing_function=chencherry.method1) # doctest: +ELLIPSIS
-+    0.0370...
-+
-+    The default BLEU calculates a score for up to 4-grams using uniform
-+    weights (this is called BLEU-4). To evaluate your translations with
-+    higher/lower order ngrams, use customized weights. E.g. when accounting
-+    for up to 5-grams with uniform weights (this is called BLEU-5) use:
-+
-+    >>> weights = (1./5., 1./5., 1./5., 1./5., 1./5.)
-+    >>> sentence_bleu([reference1, reference2, reference3], hypothesis1, weights) # doctest: +ELLIPSIS
-+    0.3920...
-+
-+    Multiple BLEU scores can be computed at once, by supplying a list of weights.
-+    E.g. for computing BLEU-2, BLEU-3 *and* BLEU-4 in one computation, use:
-+    >>> weights = [
-+    ...     (1./2., 1./2.),
-+    ...     (1./3., 1./3., 1./3.),
-+    ...     (1./4., 1./4., 1./4., 1./4.)
-+    ... ]
-+    >>> sentence_bleu([reference1, reference2, reference3], hypothesis1, weights) # doctest: +ELLIPSIS
-+    [0.7453..., 0.6240..., 0.5045...]
-+
-+    :param references: reference sentences
-+    :type references: list(list(str))
-+    :param hypothesis: a hypothesis sentence
-+    :type hypothesis: list(str)
-+    :param weights: weights for unigrams, bigrams, trigrams and so on (one or a list of weights)
-+    :type weights: tuple(float) / list(tuple(float))
-+    :param smoothing_function:
-+    :type smoothing_function: SmoothingFunction
-+    :param auto_reweigh: Option to re-normalize the weights uniformly.
-+    :type auto_reweigh: bool
-+    :return: The sentence-level BLEU score. Returns a list if multiple weights were supplied.
-+    :rtype: float / list(float)
-+    """
-+    return corpus_bleu(
-+        [references], [hypothesis], weights, smoothing_function, auto_reweigh
-+    )
-+
-+
-+def corpus_bleu(
-+    list_of_references,
-+    hypotheses,
-+    weights=(0.25, 0.25, 0.25, 0.25),
-+    smoothing_function=None,
-+    auto_reweigh=False,
-+):
-+    """
-+    Calculate a single corpus-level BLEU score (aka. system-level BLEU) for all
-+    the hypotheses and their respective references.
-+
-+    Instead of averaging the sentence level BLEU scores (i.e. macro-average
-+    precision), the original BLEU metric (Papineni et al. 2002) accounts for
-+    the micro-average precision (i.e. summing the numerators and denominators
-+    for each hypothesis-reference(s) pairs before the division).
-+
-+    >>> hyp1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
-+    ...         'ensures', 'that', 'the', 'military', 'always',
-+    ...         'obeys', 'the', 'commands', 'of', 'the', 'party']
-+    >>> ref1a = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
-+    ...          'ensures', 'that', 'the', 'military', 'will', 'forever',
-+    ...          'heed', 'Party', 'commands']
-+    >>> ref1b = ['It', 'is', 'the', 'guiding', 'principle', 'which',
-+    ...          'guarantees', 'the', 'military', 'forces', 'always',
-+    ...          'being', 'under', 'the', 'command', 'of', 'the', 'Party']
-+    >>> ref1c = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
-+    ...          'army', 'always', 'to', 'heed', 'the', 'directions',
-+    ...          'of', 'the', 'party']
-+
-+    >>> hyp2 = ['he', 'read', 'the', 'book', 'because', 'he', 'was',
-+    ...         'interested', 'in', 'world', 'history']
-+    >>> ref2a = ['he', 'was', 'interested', 'in', 'world', 'history',
-+    ...          'because', 'he', 'read', 'the', 'book']
-+
-+    >>> list_of_references = [[ref1a, ref1b, ref1c], [ref2a]]
-+    >>> hypotheses = [hyp1, hyp2]
-+    >>> corpus_bleu(list_of_references, hypotheses) # doctest: +ELLIPSIS
-+    0.5920...
-+
-+    The example below show that corpus_bleu() is different from averaging
-+    sentence_bleu() for hypotheses
-+
-+    >>> score1 = sentence_bleu([ref1a, ref1b, ref1c], hyp1)
-+    >>> score2 = sentence_bleu([ref2a], hyp2)
-+    >>> (score1 + score2) / 2 # doctest: +ELLIPSIS
-+    0.6223...
-+
-+    Custom weights may be supplied to fine-tune the BLEU score further.
-+    A tuple of float weights for unigrams, bigrams, trigrams and so on can be given.
-+    >>> weights = (0.1, 0.3, 0.5, 0.1)
-+    >>> corpus_bleu(list_of_references, hypotheses, weights=weights) # doctest: +ELLIPSIS
-+    0.5818...
-+
-+    This particular weight gave extra value to trigrams.
-+    Furthermore, multiple weights can be given, resulting in multiple BLEU scores.
-+    >>> weights = [
-+    ...     (0.5, 0.5),
-+    ...     (0.333, 0.333, 0.334),
-+    ...     (0.25, 0.25, 0.25, 0.25),
-+    ...     (0.2, 0.2, 0.2, 0.2, 0.2)
-+    ... ]
-+    >>> corpus_bleu(list_of_references, hypotheses, weights=weights) # doctest: +ELLIPSIS
-+    [0.8242..., 0.7067..., 0.5920..., 0.4719...]
-+
-+    :param list_of_references: a corpus of lists of reference sentences, w.r.t. hypotheses
-+    :type list_of_references: list(list(list(str)))
-+    :param hypotheses: a list of hypothesis sentences
-+    :type hypotheses: list(list(str))
-+    :param weights: weights for unigrams, bigrams, trigrams and so on (one or a list of weights)
-+    :type weights: tuple(float) / list(tuple(float))
-+    :param smoothing_function:
-+    :type smoothing_function: SmoothingFunction
-+    :param auto_reweigh: Option to re-normalize the weights uniformly.
-+    :type auto_reweigh: bool
-+    :return: The corpus-level BLEU score.
-+    :rtype: float
-+    """
-+    # Before proceeding to compute BLEU, perform sanity checks.
-+
-+    p_numerators = Counter()  # Key = ngram order, and value = no. of ngram matches.
-+    p_denominators = Counter()  # Key = ngram order, and value = no. of ngram in ref.
-+    hyp_lengths, ref_lengths = 0, 0
-+
-+    assert len(list_of_references) == len(hypotheses), (
-+        "The number of hypotheses and their reference(s) should be the " "same "
-+    )
-+
-+    try:
-+        weights[0][0]
-+    except TypeError:
-+        weights = [weights]
-+    max_weight_length = max(len(weight) for weight in weights)
-+
-+    # Iterate through each hypothesis and their corresponding references.
-+    for references, hypothesis in zip(list_of_references, hypotheses):
-+        # For each order of ngram, calculate the numerator and
-+        # denominator for the corpus-level modified precision.
-+        for i in range(1, max_weight_length + 1):
-+            p_i = modified_precision(references, hypothesis, i)
-+            p_numerators[i] += p_i.numerator
-+            p_denominators[i] += p_i.denominator
-+
-+        # Calculate the hypothesis length and the closest reference length.
-+        # Adds them to the corpus-level hypothesis and reference counts.
-+        hyp_len = len(hypothesis)
-+        hyp_lengths += hyp_len
-+        ref_lengths += closest_ref_length(references, hyp_len)
-+
-+    # Calculate corpus-level brevity penalty.
-+    bp = brevity_penalty(ref_lengths, hyp_lengths)
-+
-+    # Collects the various precision values for the different ngram orders.
-+    p_n = [
-+        Fraction(p_numerators[i], p_denominators[i], _normalize=False)
-+        for i in range(1, max_weight_length + 1)
-+    ]
-+
-+    # Returns 0 if there's no matching n-grams
-+    # We only need to check for p_numerators[1] == 0, since if there's
-+    # no unigrams, there won't be any higher order ngrams.
-+    if p_numerators[1] == 0:
-+        return 0 if len(weights) == 1 else [0] * len(weights)
-+
-+    # If there's no smoothing, set use method0 from SmoothinFunction class.
-+    if not smoothing_function:
-+        smoothing_function = SmoothingFunction().method0
-+    # Smoothen the modified precision.
-+    # Note: smoothing_function() may convert values into floats;
-+    #       it tries to retain the Fraction object as much as the
-+    #       smoothing method allows.
-+    p_n = smoothing_function(
-+        p_n, references=references, hypothesis=hypothesis, hyp_len=hyp_lengths
-+    )
-+
-+    bleu_scores = []
-+    for weight in weights:
-+        # Uniformly re-weighting based on maximum hypothesis lengths if largest
-+        # order of n-grams < 4 and weights is set at default.
-+        if auto_reweigh:
-+            if hyp_lengths < 4 and weight == (0.25, 0.25, 0.25, 0.25):
-+                weight = (1 / hyp_lengths,) * hyp_lengths
-+
-+        s = (w_i * math.log(p_i) for w_i, p_i in zip(weight, p_n) if p_i > 0)
-+        s = bp * math.exp(math.fsum(s))
-+        bleu_scores.append(s)
-+    return bleu_scores[0] if len(weights) == 1 else bleu_scores
-+
-+
-+def modified_precision(references, hypothesis, n):
-+    """
-+    Calculate modified ngram precision.
-+
-+    The normal precision method may lead to some wrong translations with
-+    high-precision, e.g., the translation, in which a word of reference
-+    repeats several times, has very high precision.
-+
-+    This function only returns the Fraction object that contains the numerator
-+    and denominator necessary to calculate the corpus-level precision.
-+    To calculate the modified precision for a single pair of hypothesis and
-+    references, cast the Fraction object into a float.
-+
-+    The famous "the the the ... " example shows that you can get BLEU precision
-+    by duplicating high frequency words.
-+
-+        >>> reference1 = 'the cat is on the mat'.split()
-+        >>> reference2 = 'there is a cat on the mat'.split()
-+        >>> hypothesis1 = 'the the the the the the the'.split()
-+        >>> references = [reference1, reference2]
-+        >>> float(modified_precision(references, hypothesis1, n=1)) # doctest: +ELLIPSIS
-+        0.2857...
-+
-+    In the modified n-gram precision, a reference word will be considered
-+    exhausted after a matching hypothesis word is identified, e.g.
-+
-+        >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
-+        ...               'ensures', 'that', 'the', 'military', 'will',
-+        ...               'forever', 'heed', 'Party', 'commands']
-+        >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which',
-+        ...               'guarantees', 'the', 'military', 'forces', 'always',
-+        ...               'being', 'under', 'the', 'command', 'of', 'the',
-+        ...               'Party']
-+        >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
-+        ...               'army', 'always', 'to', 'heed', 'the', 'directions',
-+        ...               'of', 'the', 'party']
-+        >>> hypothesis = 'of the'.split()
-+        >>> references = [reference1, reference2, reference3]
-+        >>> float(modified_precision(references, hypothesis, n=1))
-+        1.0
-+        >>> float(modified_precision(references, hypothesis, n=2))
-+        1.0
-+
-+    An example of a normal machine translation hypothesis:
-+
-+        >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
-+        ...               'ensures', 'that', 'the', 'military', 'always',
-+        ...               'obeys', 'the', 'commands', 'of', 'the', 'party']
-+
-+        >>> hypothesis2 = ['It', 'is', 'to', 'insure', 'the', 'troops',
-+        ...               'forever', 'hearing', 'the', 'activity', 'guidebook',
-+        ...               'that', 'party', 'direct']
-+
-+        >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
-+        ...               'ensures', 'that', 'the', 'military', 'will',
-+        ...               'forever', 'heed', 'Party', 'commands']
-+
-+        >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which',
-+        ...               'guarantees', 'the', 'military', 'forces', 'always',
-+        ...               'being', 'under', 'the', 'command', 'of', 'the',
-+        ...               'Party']
-+
-+        >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
-+        ...               'army', 'always', 'to', 'heed', 'the', 'directions',
-+        ...               'of', 'the', 'party']
-+        >>> references = [reference1, reference2, reference3]
-+        >>> float(modified_precision(references, hypothesis1, n=1)) # doctest: +ELLIPSIS
-+        0.9444...
-+        >>> float(modified_precision(references, hypothesis2, n=1)) # doctest: +ELLIPSIS
-+        0.5714...
-+        >>> float(modified_precision(references, hypothesis1, n=2)) # doctest: +ELLIPSIS
-+        0.5882352941176471
-+        >>> float(modified_precision(references, hypothesis2, n=2)) # doctest: +ELLIPSIS
-+        0.07692...
-+
-+
-+    :param references: A list of reference translations.
-+    :type references: list(list(str))
-+    :param hypothesis: A hypothesis translation.
-+    :type hypothesis: list(str)
-+    :param n: The ngram order.
-+    :type n: int
-+    :return: BLEU's modified precision for the nth order ngram.
-+    :rtype: Fraction
-+    """
-+    # Extracts all ngrams in hypothesis
-+    # Set an empty Counter if hypothesis is empty.
-+    counts = Counter(ngrams(hypothesis, n)) if len(hypothesis) >= n else Counter()
-+    # Extract a union of references' counts.
-+    # max_counts = reduce(or_, [Counter(ngrams(ref, n)) for ref in references])
-+    max_counts = {}
-+    for reference in references:
-+        reference_counts = (
-+            Counter(ngrams(reference, n)) if len(reference) >= n else Counter()
-+        )
-+        for ngram in counts:
-+            max_counts[ngram] = max(max_counts.get(ngram, 0), reference_counts[ngram])
-+
-+    # Assigns the intersection between hypothesis and references' counts.
-+    clipped_counts = {
-+        ngram: min(count, max_counts[ngram]) for ngram, count in counts.items()
-+    }
-+
-+    numerator = sum(clipped_counts.values())
-+    # Ensures that denominator is minimum 1 to avoid ZeroDivisionError.
-+    # Usually this happens when the ngram order is > len(reference).
-+    denominator = max(1, sum(counts.values()))
-+
-+    return Fraction(numerator, denominator, _normalize=False)
-+
-+
-+def closest_ref_length(references, hyp_len):
-+    """
-+    This function finds the reference that is the closest length to the
-+    hypothesis. The closest reference length is referred to as *r* variable
-+    from the brevity penalty formula in Papineni et. al. (2002)
-+
-+    :param references: A list of reference translations.
-+    :type references: list(list(str))
-+    :param hyp_len: The length of the hypothesis.
-+    :type hyp_len: int
-+    :return: The length of the reference that's closest to the hypothesis.
-+    :rtype: int
-+    """
-+    ref_lens = (len(reference) for reference in references)
-+    closest_ref_len = min(
-+        ref_lens, key=lambda ref_len: (abs(ref_len - hyp_len), ref_len)
-+    )
-+    return closest_ref_len
-+
-+
-+def brevity_penalty(closest_ref_len, hyp_len):
-+    """
-+    Calculate brevity penalty.
-+
-+    As the modified n-gram precision still has the problem from the short
-+    length sentence, brevity penalty is used to modify the overall BLEU
-+    score according to length.
-+
-+    An example from the paper. There are three references with length 12, 15
-+    and 17. And a concise hypothesis of the length 12. The brevity penalty is 1.
-+
-+    >>> reference1 = list('aaaaaaaaaaaa')      # i.e. ['a'] * 12
-+    >>> reference2 = list('aaaaaaaaaaaaaaa')   # i.e. ['a'] * 15
-+    >>> reference3 = list('aaaaaaaaaaaaaaaaa') # i.e. ['a'] * 17
-+    >>> hypothesis = list('aaaaaaaaaaaa')      # i.e. ['a'] * 12
-+    >>> references = [reference1, reference2, reference3]
-+    >>> hyp_len = len(hypothesis)
-+    >>> closest_ref_len =  closest_ref_length(references, hyp_len)
-+    >>> brevity_penalty(closest_ref_len, hyp_len)
-+    1.0
-+
-+    In case a hypothesis translation is shorter than the references, penalty is
-+    applied.
-+
-+    >>> references = [['a'] * 28, ['a'] * 28]
-+    >>> hypothesis = ['a'] * 12
-+    >>> hyp_len = len(hypothesis)
-+    >>> closest_ref_len =  closest_ref_length(references, hyp_len)
-+    >>> brevity_penalty(closest_ref_len, hyp_len)
-+    0.2635971381157267
-+
-+    The length of the closest reference is used to compute the penalty. If the
-+    length of a hypothesis is 12, and the reference lengths are 13 and 2, the
-+    penalty is applied because the hypothesis length (12) is less then the
-+    closest reference length (13).
-+
-+    >>> references = [['a'] * 13, ['a'] * 2]
-+    >>> hypothesis = ['a'] * 12
-+    >>> hyp_len = len(hypothesis)
-+    >>> closest_ref_len =  closest_ref_length(references, hyp_len)
-+    >>> brevity_penalty(closest_ref_len, hyp_len) # doctest: +ELLIPSIS
-+    0.9200...
-+
-+    The brevity penalty doesn't depend on reference order. More importantly,
-+    when two reference sentences are at the same distance, the shortest
-+    reference sentence length is used.
-+
-+    >>> references = [['a'] * 13, ['a'] * 11]
-+    >>> hypothesis = ['a'] * 12
-+    >>> hyp_len = len(hypothesis)
-+    >>> closest_ref_len =  closest_ref_length(references, hyp_len)
-+    >>> bp1 = brevity_penalty(closest_ref_len, hyp_len)
-+    >>> hyp_len = len(hypothesis)
-+    >>> closest_ref_len =  closest_ref_length(reversed(references), hyp_len)
-+    >>> bp2 = brevity_penalty(closest_ref_len, hyp_len)
-+    >>> bp1 == bp2 == 1
-+    True
-+
-+    A test example from mteval-v13a.pl (starting from the line 705):
-+
-+    >>> references = [['a'] * 11, ['a'] * 8]
-+    >>> hypothesis = ['a'] * 7
-+    >>> hyp_len = len(hypothesis)
-+    >>> closest_ref_len =  closest_ref_length(references, hyp_len)
-+    >>> brevity_penalty(closest_ref_len, hyp_len) # doctest: +ELLIPSIS
-+    0.8668...
-+
-+    >>> references = [['a'] * 11, ['a'] * 8, ['a'] * 6, ['a'] * 7]
-+    >>> hypothesis = ['a'] * 7
-+    >>> hyp_len = len(hypothesis)
-+    >>> closest_ref_len =  closest_ref_length(references, hyp_len)
-+    >>> brevity_penalty(closest_ref_len, hyp_len)
-+    1.0
-+
-+    :param hyp_len: The length of the hypothesis for a single sentence OR the
-+        sum of all the hypotheses' lengths for a corpus
-+    :type hyp_len: int
-+    :param closest_ref_len: The length of the closest reference for a single
-+        hypothesis OR the sum of all the closest references for every hypotheses.
-+    :type closest_ref_len: int
-+    :return: BLEU's brevity penalty.
-+    :rtype: float
-+    """
-+    if hyp_len > closest_ref_len:
-+        return 1
-+    # If hypothesis is empty, brevity penalty = 0 should result in BLEU = 0.0
-+    elif hyp_len == 0:
-+        return 0
-+    else:
-+        return math.exp(1 - closest_ref_len / hyp_len)
-+
-+
-+class SmoothingFunction:
-+    """
-+    This is an implementation of the smoothing techniques
-+    for segment-level BLEU scores that was presented in
-+    Boxing Chen and Collin Cherry (2014) A Systematic Comparison of
-+    Smoothing Techniques for Sentence-Level BLEU. In WMT14.
-+    http://acl2014.org/acl2014/W14-33/pdf/W14-3346.pdf
-+    """
-+
-+    def __init__(self, epsilon=0.1, alpha=5, k=5):
-+        """
-+        This will initialize the parameters required for the various smoothing
-+        techniques, the default values are set to the numbers used in the
-+        experiments from Chen and Cherry (2014).
-+
-+        >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which', 'ensures',
-+        ...                 'that', 'the', 'military', 'always', 'obeys', 'the',
-+        ...                 'commands', 'of', 'the', 'party']
-+        >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that', 'ensures',
-+        ...               'that', 'the', 'military', 'will', 'forever', 'heed',
-+        ...               'Party', 'commands']
-+
-+        >>> chencherry = SmoothingFunction()
-+        >>> print(sentence_bleu([reference1], hypothesis1)) # doctest: +ELLIPSIS
-+        0.4118...
-+        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method0)) # doctest: +ELLIPSIS
-+        0.4118...
-+        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method1)) # doctest: +ELLIPSIS
-+        0.4118...
-+        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method2)) # doctest: +ELLIPSIS
-+        0.4452...
-+        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method3)) # doctest: +ELLIPSIS
-+        0.4118...
-+        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method4)) # doctest: +ELLIPSIS
-+        0.4118...
-+        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method5)) # doctest: +ELLIPSIS
-+        0.4905...
-+        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method6)) # doctest: +ELLIPSIS
-+        0.4135...
-+        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method7)) # doctest: +ELLIPSIS
-+        0.4905...
-+
-+        :param epsilon: the epsilon value use in method 1
-+        :type epsilon: float
-+        :param alpha: the alpha value use in method 6
-+        :type alpha: int
-+        :param k: the k value use in method 4
-+        :type k: int
-+        """
-+        self.epsilon = epsilon
-+        self.alpha = alpha
-+        self.k = k
-+
-+    def method0(self, p_n, *args, **kwargs):
-+        """
-+        No smoothing.
-+        """
-+        p_n_new = []
-+        for i, p_i in enumerate(p_n):
-+            if p_i.numerator != 0:
-+                p_n_new.append(p_i)
-+            else:
-+                _msg = str(
-+                    "\nThe hypothesis contains 0 counts of {}-gram overlaps.\n"
-+                    "Therefore the BLEU score evaluates to 0, independently of\n"
-+                    "how many N-gram overlaps of lower order it contains.\n"
-+                    "Consider using lower n-gram order or use "
-+                    "SmoothingFunction()"
-+                ).format(i + 1)
-+                warnings.warn(_msg)
-+                # When numerator==0 where denonminator==0 or !=0, the result
-+                # for the precision score should be equal to 0 or undefined.
-+                # Due to BLEU geometric mean computation in logarithm space,
-+                # we we need to take the return sys.float_info.min such that
-+                # math.log(sys.float_info.min) returns a 0 precision score.
-+                p_n_new.append(sys.float_info.min)
-+        return p_n_new
-+
-+    def method1(self, p_n, *args, **kwargs):
-+        """
-+        Smoothing method 1: Add *epsilon* counts to precision with 0 counts.
-+        """
-+        return [
-+            (p_i.numerator + self.epsilon) / p_i.denominator
-+            if p_i.numerator == 0
-+            else p_i
-+            for p_i in p_n
-+        ]
-+
-+    def method2(self, p_n, *args, **kwargs):
-+        """
-+        Smoothing method 2: Add 1 to both numerator and denominator from
-+        Chin-Yew Lin and Franz Josef Och (2004) ORANGE: a Method for
-+        Evaluating Automatic Evaluation Metrics for Machine Translation.
-+        In COLING 2004.
-+        """
-+        return [
-+            Fraction(p_n[i].numerator + 1, p_n[i].denominator + 1, _normalize=False)
-+            if i != 0
-+            else p_n[0]
-+            for i in range(len(p_n))
-+        ]
-+
-+    def method3(self, p_n, *args, **kwargs):
-+        """
-+        Smoothing method 3: NIST geometric sequence smoothing
-+        The smoothing is computed by taking 1 / ( 2^k ), instead of 0, for each
-+        precision score whose matching n-gram count is null.
-+        k is 1 for the first 'n' value for which the n-gram match count is null/
-+
-+        For example, if the text contains:
-+
-+        - one 2-gram match
-+        - and (consequently) two 1-gram matches
-+
-+        the n-gram count for each individual precision score would be:
-+
-+        - n=1  =>  prec_count = 2     (two unigrams)
-+        - n=2  =>  prec_count = 1     (one bigram)
-+        - n=3  =>  prec_count = 1/2   (no trigram,  taking 'smoothed' value of 1 / ( 2^k ), with k=1)
-+        - n=4  =>  prec_count = 1/4   (no fourgram, taking 'smoothed' value of 1 / ( 2^k ), with k=2)
-+        """
-+        incvnt = 1  # From the mteval-v13a.pl, it's referred to as k.
-+        for i, p_i in enumerate(p_n):
-+            if p_i.numerator == 0:
-+                p_n[i] = 1 / (2**incvnt * p_i.denominator)
-+                incvnt += 1
-+        return p_n
-+
-+    def method4(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
-+        """
-+        Smoothing method 4:
-+        Shorter translations may have inflated precision values due to having
-+        smaller denominators; therefore, we give them proportionally
-+        smaller smoothed counts. Instead of scaling to 1/(2^k), Chen and Cherry
-+        suggests dividing by 1/ln(len(T)), where T is the length of the translation.
-+        """
-+        incvnt = 1
-+        hyp_len = hyp_len if hyp_len else len(hypothesis)
-+        for i, p_i in enumerate(p_n):
-+            if p_i.numerator == 0 and hyp_len > 1:
-+                # incvnt = i + 1 * self.k / math.log(
-+                #     hyp_len
-+                # )  # Note that this K is different from the K from NIST.
-+                # p_n[i] = incvnt / p_i.denominator\
-+                numerator = 1 / (2**incvnt * self.k / math.log(hyp_len))
-+                p_n[i] = numerator / p_i.denominator
-+                incvnt += 1
-+        return p_n
-+
-+    def method5(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
-+        """
-+        Smoothing method 5:
-+        The matched counts for similar values of n should be similar. To a
-+        calculate the n-gram matched count, it averages the n−1, n and n+1 gram
-+        matched counts.
-+        """
-+        hyp_len = hyp_len if hyp_len else len(hypothesis)
-+        m = {}
-+        # Requires an precision value for an addition ngram order.
-+        p_n_plus1 = p_n + [modified_precision(references, hypothesis, 5)]
-+        m[-1] = p_n[0] + 1
-+        for i, p_i in enumerate(p_n):
-+            p_n[i] = (m[i - 1] + p_i + p_n_plus1[i + 1]) / 3
-+            m[i] = p_n[i]
-+        return p_n
-+
-+    def method6(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
-+        """
-+        Smoothing method 6:
-+        Interpolates the maximum likelihood estimate of the precision *p_n* with
-+        a prior estimate *pi0*. The prior is estimated by assuming that the ratio
-+        between pn and pn−1 will be the same as that between pn−1 and pn−2; from
-+        Gao and He (2013) Training MRF-Based Phrase Translation Models using
-+        Gradient Ascent. In NAACL.
-+        """
-+        hyp_len = hyp_len if hyp_len else len(hypothesis)
-+        # This smoothing only works when p_1 and p_2 is non-zero.
-+        # Raise an error with an appropriate message when the input is too short
-+        # to use this smoothing technique.
-+        assert p_n[2], "This smoothing method requires non-zero precision for bigrams."
-+        for i, p_i in enumerate(p_n):
-+            if i in [0, 1]:  # Skips the first 2 orders of ngrams.
-+                continue
-+            else:
-+                pi0 = 0 if p_n[i - 2] == 0 else p_n[i - 1] ** 2 / p_n[i - 2]
-+                # No. of ngrams in translation that matches the reference.
-+                m = p_i.numerator
-+                # No. of ngrams in translation.
-+                l = sum(1 for _ in ngrams(hypothesis, i + 1))
-+                # Calculates the interpolated precision.
-+                p_n[i] = (m + self.alpha * pi0) / (l + self.alpha)
-+        return p_n
-+
-+    def method7(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
-+        """
-+        Smoothing method 7:
-+        Interpolates methods 4 and 5.
-+        """
-+        hyp_len = hyp_len if hyp_len else len(hypothesis)
-+        p_n = self.method4(p_n, references, hypothesis, hyp_len)
-+        p_n = self.method5(p_n, references, hypothesis, hyp_len)
-+        return p_n
-Index: nltk-3.8.1/README.md
-===================================================================
---- nltk-3.8.1.orig/README.md
-+++ nltk-3.8.1/README.md
-@@ -1,50 +1,50 @@
--# Natural Language Toolkit (NLTK)
--[](https://pypi.python.org/pypi/nltk)
--
--
--NLTK -- the Natural Language Toolkit -- is a suite of open source Python
--modules, data sets, and tutorials supporting research and development in Natural
--Language Processing. NLTK requires Python version 3.7, 3.8, 3.9, 3.10 or 3.11.
--
--For documentation, please visit [nltk.org](https://www.nltk.org/).
--
--
--## Contributing
--
--Do you want to contribute to NLTK development? Great!
--Please read [CONTRIBUTING.md](CONTRIBUTING.md) for more details.
--
--See also [how to contribute to NLTK](https://www.nltk.org/contribute.html).
--
--
--## Donate
--
--Have you found the toolkit helpful?  Please support NLTK development by donating
--to the project via PayPal, using the link on the NLTK homepage.
--
--
--## Citing
--
--If you publish work that uses NLTK, please cite the NLTK book, as follows:
--
--    Bird, Steven, Edward Loper and Ewan Klein (2009).
--    Natural Language Processing with Python.  O'Reilly Media Inc.
--
--
--## Copyright
--
--Copyright (C) 2001-2023 NLTK Project
--
--For license information, see [LICENSE.txt](LICENSE.txt).
--
--[AUTHORS.md](AUTHORS.md) contains a list of everyone who has contributed to NLTK.
--
--
--### Redistributing
--
--- NLTK source code is distributed under the Apache 2.0 License.
--- NLTK documentation is distributed under the Creative Commons
--  Attribution-Noncommercial-No Derivative Works 3.0 United States license.
--- NLTK corpora are provided under the terms given in the README file for each
--  corpus; all are redistributable and available for non-commercial use.
--- NLTK may be freely redistributed, subject to the provisions of these licenses.
-+# Natural Language Toolkit (NLTK)
-+[](https://pypi.python.org/pypi/nltk)
-+
-+
-+NLTK -- the Natural Language Toolkit -- is a suite of open source Python
-+modules, data sets, and tutorials supporting research and development in Natural
-+Language Processing. NLTK requires Python version 3.7, 3.8, 3.9, 3.10, 3.11 or 3.12.
-+
-+For documentation, please visit [nltk.org](https://www.nltk.org/).
-+
-+
-+## Contributing
-+
-+Do you want to contribute to NLTK development? Great!
-+Please read [CONTRIBUTING.md](CONTRIBUTING.md) for more details.
-+
-+See also [how to contribute to NLTK](https://www.nltk.org/contribute.html).
-+
-+
-+## Donate
-+
-+Have you found the toolkit helpful?  Please support NLTK development by donating
-+to the project via PayPal, using the link on the NLTK homepage.
-+
-+
-+## Citing
-+
-+If you publish work that uses NLTK, please cite the NLTK book, as follows:
-+
-+    Bird, Steven, Edward Loper and Ewan Klein (2009).
-+    Natural Language Processing with Python.  O'Reilly Media Inc.
-+
-+
-+## Copyright
-+
-+Copyright (C) 2001-2023 NLTK Project
-+
-+For license information, see [LICENSE.txt](LICENSE.txt).
-+
-+[AUTHORS.md](AUTHORS.md) contains a list of everyone who has contributed to NLTK.
-+
-+
-+### Redistributing
-+
-+- NLTK source code is distributed under the Apache 2.0 License.
-+- NLTK documentation is distributed under the Creative Commons
-+  Attribution-Noncommercial-No Derivative Works 3.0 United States license.
-+- NLTK corpora are provided under the terms given in the README file for each
-+  corpus; all are redistributable and available for non-commercial use.
-+- NLTK may be freely redistributed, subject to the provisions of these licenses.
-Index: nltk-3.8.1/setup.py
-===================================================================
---- nltk-3.8.1.orig/setup.py
-+++ nltk-3.8.1/setup.py
-@@ -1,125 +1,126 @@
--#!/usr/bin/env python
--#
--# Setup script for the Natural Language Toolkit
--#
--# Copyright (C) 2001-2023 NLTK Project
--# Author: NLTK Team 
--# URL: 
--# For license information, see LICENSE.TXT
--
--# Work around mbcs bug in distutils.
--# https://bugs.python.org/issue10945
--import codecs
--
--try:
--    codecs.lookup("mbcs")
--except LookupError:
--    ascii = codecs.lookup("ascii")
--    func = lambda name, enc=ascii: {True: enc}.get(name == "mbcs")
--    codecs.register(func)
--
--import os
--
--# Use the VERSION file to get NLTK version
--version_file = os.path.join(os.path.dirname(__file__), "nltk", "VERSION")
--with open(version_file) as fh:
--    nltk_version = fh.read().strip()
--
--# setuptools
--from setuptools import find_packages, setup
--
--# Specify groups of optional dependencies
--extras_require = {
--    "machine_learning": [
--        "numpy",
--        "python-crfsuite",
--        "scikit-learn",
--        "scipy",
--    ],
--    "plot": ["matplotlib"],
--    "tgrep": ["pyparsing"],
--    "twitter": ["twython"],
--    "corenlp": ["requests"],
--}
--
--# Add a group made up of all optional dependencies
--extras_require["all"] = {
--    package for group in extras_require.values() for package in group
--}
--
--# Adds CLI commands
--console_scripts = """
--[console_scripts]
--nltk=nltk.cli:cli
--"""
--
--_project_homepage = "https://www.nltk.org/"
--
--setup(
--    name="nltk",
--    description="Natural Language Toolkit",
--    version=nltk_version,
--    url=_project_homepage,
--    project_urls={
--        "Documentation": _project_homepage,
--        "Source Code": "https://github.com/nltk/nltk",
--        "Issue Tracker": "https://github.com/nltk/nltk/issues",
--    },
--    long_description="""\
--The Natural Language Toolkit (NLTK) is a Python package for
--natural language processing.  NLTK requires Python 3.7, 3.8, 3.9, 3.10 or 3.11.""",
--    license="Apache License, Version 2.0",
--    keywords=[
--        "NLP",
--        "CL",
--        "natural language processing",
--        "computational linguistics",
--        "parsing",
--        "tagging",
--        "tokenizing",
--        "syntax",
--        "linguistics",
--        "language",
--        "natural language",
--        "text analytics",
--    ],
--    maintainer="NLTK Team",
--    maintainer_email="nltk.team@gmail.com",
--    author="NLTK Team",
--    author_email="nltk.team@gmail.com",
--    classifiers=[
--        "Development Status :: 5 - Production/Stable",
--        "Intended Audience :: Developers",
--        "Intended Audience :: Education",
--        "Intended Audience :: Information Technology",
--        "Intended Audience :: Science/Research",
--        "License :: OSI Approved :: Apache Software License",
--        "Operating System :: OS Independent",
--        "Programming Language :: Python :: 3.7",
--        "Programming Language :: Python :: 3.8",
--        "Programming Language :: Python :: 3.9",
--        "Programming Language :: Python :: 3.10",
--        "Programming Language :: Python :: 3.11",
--        "Topic :: Scientific/Engineering",
--        "Topic :: Scientific/Engineering :: Artificial Intelligence",
--        "Topic :: Scientific/Engineering :: Human Machine Interfaces",
--        "Topic :: Scientific/Engineering :: Information Analysis",
--        "Topic :: Text Processing",
--        "Topic :: Text Processing :: Filters",
--        "Topic :: Text Processing :: General",
--        "Topic :: Text Processing :: Indexing",
--        "Topic :: Text Processing :: Linguistic",
--    ],
--    package_data={"nltk": ["test/*.doctest", "VERSION"]},
--    python_requires=">=3.7",
--    install_requires=[
--        "click",
--        "joblib",
--        "regex>=2021.8.3",
--        "tqdm",
--    ],
--    extras_require=extras_require,
--    packages=find_packages(),
--    zip_safe=False,  # since normal files will be present too?
--    entry_points=console_scripts,
--)
-+#!/usr/bin/env python
-+#
-+# Setup script for the Natural Language Toolkit
-+#
-+# Copyright (C) 2001-2023 NLTK Project
-+# Author: NLTK Team 
-+# URL: 
-+# For license information, see LICENSE.TXT
-+
-+# Work around mbcs bug in distutils.
-+# https://bugs.python.org/issue10945
-+import codecs
-+
-+try:
-+    codecs.lookup("mbcs")
-+except LookupError:
-+    ascii = codecs.lookup("ascii")
-+    func = lambda name, enc=ascii: {True: enc}.get(name == "mbcs")
-+    codecs.register(func)
-+
-+import os
-+
-+# Use the VERSION file to get NLTK version
-+version_file = os.path.join(os.path.dirname(__file__), "nltk", "VERSION")
-+with open(version_file) as fh:
-+    nltk_version = fh.read().strip()
-+
-+# setuptools
-+from setuptools import find_packages, setup
-+
-+# Specify groups of optional dependencies
-+extras_require = {
-+    "machine_learning": [
-+        "numpy",
-+        "python-crfsuite",
-+        "scikit-learn",
-+        "scipy",
-+    ],
-+    "plot": ["matplotlib"],
-+    "tgrep": ["pyparsing"],
-+    "twitter": ["twython"],
-+    "corenlp": ["requests"],
-+}
-+
-+# Add a group made up of all optional dependencies
-+extras_require["all"] = {
-+    package for group in extras_require.values() for package in group
-+}
-+
-+# Adds CLI commands
-+console_scripts = """
-+[console_scripts]
-+nltk=nltk.cli:cli
-+"""
-+
-+_project_homepage = "https://www.nltk.org/"
-+
-+setup(
-+    name="nltk",
-+    description="Natural Language Toolkit",
-+    version=nltk_version,
-+    url=_project_homepage,
-+    project_urls={
-+        "Documentation": _project_homepage,
-+        "Source Code": "https://github.com/nltk/nltk",
-+        "Issue Tracker": "https://github.com/nltk/nltk/issues",
-+    },
-+    long_description="""\
-+The Natural Language Toolkit (NLTK) is a Python package for
+ def sentence_bleu(
+     references,
+     hypothesis,
+--- a/setup.py
++++ b/setup.py
+@@ -67,7 +67,7 @@ setup(
+     },
+     long_description="""\
+ The Natural Language Toolkit (NLTK) is a Python package for
+-natural language processing.  NLTK requires Python 3.7, 3.8, 3.9, 3.10 or 3.11.""",
 +natural language processing.  NLTK requires Python 3.7, 3.8, 3.9, 3.10, 3.11 or 3.12.""",
-+    license="Apache License, Version 2.0",
-+    keywords=[
-+        "NLP",
-+        "CL",
-+        "natural language processing",
-+        "computational linguistics",
-+        "parsing",
-+        "tagging",
-+        "tokenizing",
-+        "syntax",
-+        "linguistics",
-+        "language",
-+        "natural language",
-+        "text analytics",
-+    ],
-+    maintainer="NLTK Team",
-+    maintainer_email="nltk.team@gmail.com",
-+    author="NLTK Team",
-+    author_email="nltk.team@gmail.com",
-+    classifiers=[
-+        "Development Status :: 5 - Production/Stable",
-+        "Intended Audience :: Developers",
-+        "Intended Audience :: Education",
-+        "Intended Audience :: Information Technology",
-+        "Intended Audience :: Science/Research",
-+        "License :: OSI Approved :: Apache Software License",
-+        "Operating System :: OS Independent",
-+        "Programming Language :: Python :: 3.7",
-+        "Programming Language :: Python :: 3.8",
-+        "Programming Language :: Python :: 3.9",
-+        "Programming Language :: Python :: 3.10",
-+        "Programming Language :: Python :: 3.11",
+     license="Apache License, Version 2.0",
+     keywords=[
+         "NLP",
+@@ -100,6 +100,7 @@ natural language processing.  NLTK requi
+         "Programming Language :: Python :: 3.9",
+         "Programming Language :: Python :: 3.10",
+         "Programming Language :: Python :: 3.11",
 +        "Programming Language :: Python :: 3.12",
-+        "Topic :: Scientific/Engineering",
-+        "Topic :: Scientific/Engineering :: Artificial Intelligence",
-+        "Topic :: Scientific/Engineering :: Human Machine Interfaces",
-+        "Topic :: Scientific/Engineering :: Information Analysis",
-+        "Topic :: Text Processing",
-+        "Topic :: Text Processing :: Filters",
-+        "Topic :: Text Processing :: General",
-+        "Topic :: Text Processing :: Indexing",
-+        "Topic :: Text Processing :: Linguistic",
-+    ],
-+    package_data={"nltk": ["test/*.doctest", "VERSION"]},
-+    python_requires=">=3.7",
-+    install_requires=[
-+        "click",
-+        "joblib",
-+        "regex>=2021.8.3",
-+        "tqdm",
-+    ],
-+    extras_require=extras_require,
-+    packages=find_packages(),
-+    zip_safe=False,  # since normal files will be present too?
-+    entry_points=console_scripts,
-+)
+         "Topic :: Scientific/Engineering",
+         "Topic :: Scientific/Engineering :: Artificial Intelligence",
+         "Topic :: Scientific/Engineering :: Human Machine Interfaces",
diff --git a/python-nltk.changes b/python-nltk.changes
index e4d945d..2b401ed 100644
--- a/python-nltk.changes
+++ b/python-nltk.changes
@@ -1,3 +1,14 @@
+-------------------------------------------------------------------
+Mon Jul  1 21:02:45 UTC 2024 - Matej Cepl 
+
+- Use tarball from GitHub instead of the Zip archive from PyPI,
+  the latter has very messy combination of CRLF and LF EOLs,
+  which are hard to patch.
+- Refresh all patches from the original locations.
+- Add CVE-2024-39705-disable-download.patch to make a crude
+  workaround around CVE-2024-39705 (gh#nltk/nltk#3266,
+  bsc#1227174).
+
 -------------------------------------------------------------------
 Thu Mar 21 17:41:52 UTC 2024 - Ben Greiner 
 
diff --git a/python-nltk.spec b/python-nltk.spec
index c5ecdeb..21b8297 100644
--- a/python-nltk.spec
+++ b/python-nltk.spec
@@ -16,6 +16,7 @@
 #
 
 
+%define modname nltk
 Name:           python-nltk
 Version:        3.8.1
 Release:        0
@@ -23,7 +24,7 @@ Summary:        Natural Language Toolkit
 License:        Apache-2.0
 URL:            http://nltk.org/
 # SourceRepository: https://github.com/nltk/nltk
-Source0:        https://files.pythonhosted.org/packages/source/n/nltk/nltk-%{version}.zip
+Source0:        https://github.com/nltk/%{modname}/archive/refs/tags/%{version}.tar.gz#/%{modname}-%{version}.tar.gz
 # Download/Update NLTK data:
 #     quilt setup python-nltk.spec
 #     pushd nltk-?.?.?
@@ -62,6 +63,9 @@ Source99:       python-nltk.rpmlintrc
 Patch0:         skip-networked-test.patch
 # PATCH-FIX-UPSTREAM nltk-pr3207-py312.patch gh#nltk/nltk#3207
 Patch1:         nltk-pr3207-py312.patch
+# PATCH-FIX-UPSTREAM CVE-2024-39705-disable-download.patch bsc#1227174 mcepl@suse.com
+# this patch makes things totally awesome
+Patch2:         CVE-2024-39705-disable-download.patch
 BuildRequires:  %{python_module base >= 3.7}
 BuildRequires:  %{python_module pip}
 BuildRequires:  %{python_module setuptools}
@@ -118,7 +122,7 @@ Python modules, data sets and tutorials supporting research and
 development in Natural Language Processing.
 
 %prep
-%autosetup -p1 -a1 -n nltk-%{version}
+%setup -q -a1 -n %{modname}-%{version}
 
 # Fix EOL
 sed -i 's/\r/\n/g; s/\n$//' \
@@ -150,6 +154,8 @@ sed -E -i "s|#![[:space:]]*%{_bindir}/env python|#!%{_bindir}/python3|" \
     nltk_data/corpora/pl196x/splitter.py \
     tools/find_deprecated.py
 
+%autopatch -p1
+
 %build
 %pyproject_wheel
 
@@ -164,7 +170,12 @@ chmod -x %{buildroot}%{$python_sitelib}/nltk/test/dependency.doctest
 %check
 export NLTK_DATA=$(readlink -f ./nltk_data/)
 # export PYTEST_ADDOPTS="--doctest-modules"
-%pytest -k 'not network'
+# Skip tests requiring pickle.load gh#nltk/nltk#3266 (CVE-2024-39705)
+skip_tests=" or test_basic or test_increment or test_pad_asterisk or test_pad_dotdot"
+skip_tests+=" or test_pos_tag_eng or test_pos_tag_eng_universal or test_pos_tag_rus"
+skip_tests+=" or test_pos_tag_rus_universal or test_pos_tag_unknown_lang"
+skip_tests+=" or test_sent_tokenize or test_unspecified_lang or test_word_tokenize"
+%pytest -k "not (network ${skip_tests})"
 
 %post
 %python_install_alternative nltk
diff --git a/skip-networked-test.patch b/skip-networked-test.patch
index 82d62af..f1cd8f7 100644
--- a/skip-networked-test.patch
+++ b/skip-networked-test.patch
@@ -6,30 +6,30 @@
 --- a/nltk/test/unit/test_downloader.py
 +++ b/nltk/test/unit/test_downloader.py
 @@ -1,6 +1,9 @@
- from nltk import download
- 
-+import pytest
- 
-+
-+@pytest.mark.network
- def test_downloader_using_existing_parent_download_dir(tmp_path):
-     """Test that download works properly when the parent folder of the download_dir exists"""
- 
+ from nltk import download
+ 
++import pytest
+ 
++
++@pytest.mark.network
+ def test_downloader_using_existing_parent_download_dir(tmp_path):
+     """Test that download works properly when the parent folder of the download_dir exists"""
+ 
 @@ -9,6 +12,7 @@ def test_downloader_using_existing_paren
-     assert download_status is True
- 
- 
-+@pytest.mark.network
- def test_downloader_using_non_existing_parent_download_dir(tmp_path):
-     """Test that download works properly when the parent folder of the download_dir does not exist"""
- 
+     assert download_status is True
+ 
+ 
++@pytest.mark.network
+ def test_downloader_using_non_existing_parent_download_dir(tmp_path):
+     """Test that download works properly when the parent folder of the download_dir does not exist"""
+ 
 --- a/setup.cfg
 +++ b/setup.cfg
 @@ -1,3 +1,7 @@
-+[tool:pytest]
-+markers =
-+   network: test case requires network connection
-+
- [metadata]
- license_files = 
- 	LICENSE.txt
++[tool:pytest]
++markers =
++   network: test case requires network connection
++
+ [metadata]
+ license_files =
+     LICENSE.txt