From 25d35fc4283dedd2053ec6d821f4b707fff8d72c Mon Sep 17 00:00:00 2001
From: Konstantin Chernyshev <k4black@ya.ru>
Date: Thu, 16 Nov 2023 19:00:15 +0100
Subject: [PATCH 1/8] ci: enable 3.12 in ci tests

---
 .github/workflows/ci.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

Index: nltk-3.8.1/nltk/test/unit/translate/test_bleu.py
===================================================================
--- nltk-3.8.1.orig/nltk/test/unit/translate/test_bleu.py
+++ nltk-3.8.1/nltk/test/unit/translate/test_bleu.py
@@ -2,7 +2,6 @@
 Tests for BLEU translation evaluation metric
 """
 
-import io
 import unittest
 
 from nltk.data import find
Index: nltk-3.8.1/nltk/translate/bleu_score.py
===================================================================
--- nltk-3.8.1.orig/nltk/translate/bleu_score.py
+++ nltk-3.8.1/nltk/translate/bleu_score.py
@@ -1,685 +1,710 @@
-# Natural Language Toolkit: BLEU Score
-#
-# Copyright (C) 2001-2023 NLTK Project
-# Authors: Chin Yee Lee, Hengfeng Li, Ruxin Hou, Calvin Tanujaya Lim
-# Contributors: Björn Mattsson, Dmitrijs Milajevs, Liling Tan
-# URL: <https://www.nltk.org/>
-# For license information, see LICENSE.TXT
-
-"""BLEU score implementation."""
-
-import math
-import sys
-import warnings
-from collections import Counter
-from fractions import Fraction
-
-from nltk.util import ngrams
-
-
-def sentence_bleu(
-    references,
-    hypothesis,
-    weights=(0.25, 0.25, 0.25, 0.25),
-    smoothing_function=None,
-    auto_reweigh=False,
-):
-    """
-    Calculate BLEU score (Bilingual Evaluation Understudy) from
-    Papineni, Kishore, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002.
-    "BLEU: a method for automatic evaluation of machine translation."
-    In Proceedings of ACL. https://www.aclweb.org/anthology/P02-1040.pdf
-
-    >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
-    ...               'ensures', 'that', 'the', 'military', 'always',
-    ...               'obeys', 'the', 'commands', 'of', 'the', 'party']
-
-    >>> hypothesis2 = ['It', 'is', 'to', 'insure', 'the', 'troops',
-    ...               'forever', 'hearing', 'the', 'activity', 'guidebook',
-    ...               'that', 'party', 'direct']
-
-    >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
-    ...               'ensures', 'that', 'the', 'military', 'will', 'forever',
-    ...               'heed', 'Party', 'commands']
-
-    >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which',
-    ...               'guarantees', 'the', 'military', 'forces', 'always',
-    ...               'being', 'under', 'the', 'command', 'of', 'the',
-    ...               'Party']
-
-    >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
-    ...               'army', 'always', 'to', 'heed', 'the', 'directions',
-    ...               'of', 'the', 'party']
-
-    >>> sentence_bleu([reference1, reference2, reference3], hypothesis1) # doctest: +ELLIPSIS
-    0.5045...
-
-    If there is no ngrams overlap for any order of n-grams, BLEU returns the
-    value 0. This is because the precision for the order of n-grams without
-    overlap is 0, and the geometric mean in the final BLEU score computation
-    multiplies the 0 with the precision of other n-grams. This results in 0
-    (independently of the precision of the other n-gram orders). The following
-    example has zero 3-gram and 4-gram overlaps:
-
-    >>> round(sentence_bleu([reference1, reference2, reference3], hypothesis2),4) # doctest: +ELLIPSIS
-    0.0
-
-    To avoid this harsh behaviour when no ngram overlaps are found a smoothing
-    function can be used.
-
-    >>> chencherry = SmoothingFunction()
-    >>> sentence_bleu([reference1, reference2, reference3], hypothesis2,
-    ...     smoothing_function=chencherry.method1) # doctest: +ELLIPSIS
-    0.0370...
-
-    The default BLEU calculates a score for up to 4-grams using uniform
-    weights (this is called BLEU-4). To evaluate your translations with
-    higher/lower order ngrams, use customized weights. E.g. when accounting
-    for up to 5-grams with uniform weights (this is called BLEU-5) use:
-
-    >>> weights = (1./5., 1./5., 1./5., 1./5., 1./5.)
-    >>> sentence_bleu([reference1, reference2, reference3], hypothesis1, weights) # doctest: +ELLIPSIS
-    0.3920...
-
-    Multiple BLEU scores can be computed at once, by supplying a list of weights.
-    E.g. for computing BLEU-2, BLEU-3 *and* BLEU-4 in one computation, use:
-    >>> weights = [
-    ...     (1./2., 1./2.),
-    ...     (1./3., 1./3., 1./3.),
-    ...     (1./4., 1./4., 1./4., 1./4.)
-    ... ]
-    >>> sentence_bleu([reference1, reference2, reference3], hypothesis1, weights) # doctest: +ELLIPSIS
-    [0.7453..., 0.6240..., 0.5045...]
-
-    :param references: reference sentences
-    :type references: list(list(str))
-    :param hypothesis: a hypothesis sentence
-    :type hypothesis: list(str)
-    :param weights: weights for unigrams, bigrams, trigrams and so on (one or a list of weights)
-    :type weights: tuple(float) / list(tuple(float))
-    :param smoothing_function:
-    :type smoothing_function: SmoothingFunction
-    :param auto_reweigh: Option to re-normalize the weights uniformly.
-    :type auto_reweigh: bool
-    :return: The sentence-level BLEU score. Returns a list if multiple weights were supplied.
-    :rtype: float / list(float)
-    """
-    return corpus_bleu(
-        [references], [hypothesis], weights, smoothing_function, auto_reweigh
-    )
-
-
-def corpus_bleu(
-    list_of_references,
-    hypotheses,
-    weights=(0.25, 0.25, 0.25, 0.25),
-    smoothing_function=None,
-    auto_reweigh=False,
-):
-    """
-    Calculate a single corpus-level BLEU score (aka. system-level BLEU) for all
-    the hypotheses and their respective references.
-
-    Instead of averaging the sentence level BLEU scores (i.e. macro-average
-    precision), the original BLEU metric (Papineni et al. 2002) accounts for
-    the micro-average precision (i.e. summing the numerators and denominators
-    for each hypothesis-reference(s) pairs before the division).
-
-    >>> hyp1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
-    ...         'ensures', 'that', 'the', 'military', 'always',
-    ...         'obeys', 'the', 'commands', 'of', 'the', 'party']
-    >>> ref1a = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
-    ...          'ensures', 'that', 'the', 'military', 'will', 'forever',
-    ...          'heed', 'Party', 'commands']
-    >>> ref1b = ['It', 'is', 'the', 'guiding', 'principle', 'which',
-    ...          'guarantees', 'the', 'military', 'forces', 'always',
-    ...          'being', 'under', 'the', 'command', 'of', 'the', 'Party']
-    >>> ref1c = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
-    ...          'army', 'always', 'to', 'heed', 'the', 'directions',
-    ...          'of', 'the', 'party']
-
-    >>> hyp2 = ['he', 'read', 'the', 'book', 'because', 'he', 'was',
-    ...         'interested', 'in', 'world', 'history']
-    >>> ref2a = ['he', 'was', 'interested', 'in', 'world', 'history',
-    ...          'because', 'he', 'read', 'the', 'book']
-
-    >>> list_of_references = [[ref1a, ref1b, ref1c], [ref2a]]
-    >>> hypotheses = [hyp1, hyp2]
-    >>> corpus_bleu(list_of_references, hypotheses) # doctest: +ELLIPSIS
-    0.5920...
-
-    The example below show that corpus_bleu() is different from averaging
-    sentence_bleu() for hypotheses
-
-    >>> score1 = sentence_bleu([ref1a, ref1b, ref1c], hyp1)
-    >>> score2 = sentence_bleu([ref2a], hyp2)
-    >>> (score1 + score2) / 2 # doctest: +ELLIPSIS
-    0.6223...
-
-    Custom weights may be supplied to fine-tune the BLEU score further.
-    A tuple of float weights for unigrams, bigrams, trigrams and so on can be given.
-    >>> weights = (0.1, 0.3, 0.5, 0.1)
-    >>> corpus_bleu(list_of_references, hypotheses, weights=weights) # doctest: +ELLIPSIS
-    0.5818...
-
-    This particular weight gave extra value to trigrams.
-    Furthermore, multiple weights can be given, resulting in multiple BLEU scores.
-    >>> weights = [
-    ...     (0.5, 0.5),
-    ...     (0.333, 0.333, 0.334),
-    ...     (0.25, 0.25, 0.25, 0.25),
-    ...     (0.2, 0.2, 0.2, 0.2, 0.2)
-    ... ]
-    >>> corpus_bleu(list_of_references, hypotheses, weights=weights) # doctest: +ELLIPSIS
-    [0.8242..., 0.7067..., 0.5920..., 0.4719...]
-
-    :param list_of_references: a corpus of lists of reference sentences, w.r.t. hypotheses
-    :type list_of_references: list(list(list(str)))
-    :param hypotheses: a list of hypothesis sentences
-    :type hypotheses: list(list(str))
-    :param weights: weights for unigrams, bigrams, trigrams and so on (one or a list of weights)
-    :type weights: tuple(float) / list(tuple(float))
-    :param smoothing_function:
-    :type smoothing_function: SmoothingFunction
-    :param auto_reweigh: Option to re-normalize the weights uniformly.
-    :type auto_reweigh: bool
-    :return: The corpus-level BLEU score.
-    :rtype: float
-    """
-    # Before proceeding to compute BLEU, perform sanity checks.
-
-    p_numerators = Counter()  # Key = ngram order, and value = no. of ngram matches.
-    p_denominators = Counter()  # Key = ngram order, and value = no. of ngram in ref.
-    hyp_lengths, ref_lengths = 0, 0
-
-    assert len(list_of_references) == len(hypotheses), (
-        "The number of hypotheses and their reference(s) should be the " "same "
-    )
-
-    try:
-        weights[0][0]
-    except TypeError:
-        weights = [weights]
-    max_weight_length = max(len(weight) for weight in weights)
-
-    # Iterate through each hypothesis and their corresponding references.
-    for references, hypothesis in zip(list_of_references, hypotheses):
-        # For each order of ngram, calculate the numerator and
-        # denominator for the corpus-level modified precision.
-        for i in range(1, max_weight_length + 1):
-            p_i = modified_precision(references, hypothesis, i)
-            p_numerators[i] += p_i.numerator
-            p_denominators[i] += p_i.denominator
-
-        # Calculate the hypothesis length and the closest reference length.
-        # Adds them to the corpus-level hypothesis and reference counts.
-        hyp_len = len(hypothesis)
-        hyp_lengths += hyp_len
-        ref_lengths += closest_ref_length(references, hyp_len)
-
-    # Calculate corpus-level brevity penalty.
-    bp = brevity_penalty(ref_lengths, hyp_lengths)
-
-    # Collects the various precision values for the different ngram orders.
-    p_n = [
-        Fraction(p_numerators[i], p_denominators[i], _normalize=False)
-        for i in range(1, max_weight_length + 1)
-    ]
-
-    # Returns 0 if there's no matching n-grams
-    # We only need to check for p_numerators[1] == 0, since if there's
-    # no unigrams, there won't be any higher order ngrams.
-    if p_numerators[1] == 0:
-        return 0 if len(weights) == 1 else [0] * len(weights)
-
-    # If there's no smoothing, set use method0 from SmoothinFunction class.
-    if not smoothing_function:
-        smoothing_function = SmoothingFunction().method0
-    # Smoothen the modified precision.
-    # Note: smoothing_function() may convert values into floats;
-    #       it tries to retain the Fraction object as much as the
-    #       smoothing method allows.
-    p_n = smoothing_function(
-        p_n, references=references, hypothesis=hypothesis, hyp_len=hyp_lengths
-    )
-
-    bleu_scores = []
-    for weight in weights:
-        # Uniformly re-weighting based on maximum hypothesis lengths if largest
-        # order of n-grams < 4 and weights is set at default.
-        if auto_reweigh:
-            if hyp_lengths < 4 and weight == (0.25, 0.25, 0.25, 0.25):
-                weight = (1 / hyp_lengths,) * hyp_lengths
-
-        s = (w_i * math.log(p_i) for w_i, p_i in zip(weight, p_n) if p_i > 0)
-        s = bp * math.exp(math.fsum(s))
-        bleu_scores.append(s)
-    return bleu_scores[0] if len(weights) == 1 else bleu_scores
-
-
-def modified_precision(references, hypothesis, n):
-    """
-    Calculate modified ngram precision.
-
-    The normal precision method may lead to some wrong translations with
-    high-precision, e.g., the translation, in which a word of reference
-    repeats several times, has very high precision.
-
-    This function only returns the Fraction object that contains the numerator
-    and denominator necessary to calculate the corpus-level precision.
-    To calculate the modified precision for a single pair of hypothesis and
-    references, cast the Fraction object into a float.
-
-    The famous "the the the ... " example shows that you can get BLEU precision
-    by duplicating high frequency words.
-
-        >>> reference1 = 'the cat is on the mat'.split()
-        >>> reference2 = 'there is a cat on the mat'.split()
-        >>> hypothesis1 = 'the the the the the the the'.split()
-        >>> references = [reference1, reference2]
-        >>> float(modified_precision(references, hypothesis1, n=1)) # doctest: +ELLIPSIS
-        0.2857...
-
-    In the modified n-gram precision, a reference word will be considered
-    exhausted after a matching hypothesis word is identified, e.g.
-
-        >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
-        ...               'ensures', 'that', 'the', 'military', 'will',
-        ...               'forever', 'heed', 'Party', 'commands']
-        >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which',
-        ...               'guarantees', 'the', 'military', 'forces', 'always',
-        ...               'being', 'under', 'the', 'command', 'of', 'the',
-        ...               'Party']
-        >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
-        ...               'army', 'always', 'to', 'heed', 'the', 'directions',
-        ...               'of', 'the', 'party']
-        >>> hypothesis = 'of the'.split()
-        >>> references = [reference1, reference2, reference3]
-        >>> float(modified_precision(references, hypothesis, n=1))
-        1.0
-        >>> float(modified_precision(references, hypothesis, n=2))
-        1.0
-
-    An example of a normal machine translation hypothesis:
-
-        >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
-        ...               'ensures', 'that', 'the', 'military', 'always',
-        ...               'obeys', 'the', 'commands', 'of', 'the', 'party']
-
-        >>> hypothesis2 = ['It', 'is', 'to', 'insure', 'the', 'troops',
-        ...               'forever', 'hearing', 'the', 'activity', 'guidebook',
-        ...               'that', 'party', 'direct']
-
-        >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
-        ...               'ensures', 'that', 'the', 'military', 'will',
-        ...               'forever', 'heed', 'Party', 'commands']
-
-        >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which',
-        ...               'guarantees', 'the', 'military', 'forces', 'always',
-        ...               'being', 'under', 'the', 'command', 'of', 'the',
-        ...               'Party']
-
-        >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
-        ...               'army', 'always', 'to', 'heed', 'the', 'directions',
-        ...               'of', 'the', 'party']
-        >>> references = [reference1, reference2, reference3]
-        >>> float(modified_precision(references, hypothesis1, n=1)) # doctest: +ELLIPSIS
-        0.9444...
-        >>> float(modified_precision(references, hypothesis2, n=1)) # doctest: +ELLIPSIS
-        0.5714...
-        >>> float(modified_precision(references, hypothesis1, n=2)) # doctest: +ELLIPSIS
-        0.5882352941176471
-        >>> float(modified_precision(references, hypothesis2, n=2)) # doctest: +ELLIPSIS
-        0.07692...
-
-
-    :param references: A list of reference translations.
-    :type references: list(list(str))
-    :param hypothesis: A hypothesis translation.
-    :type hypothesis: list(str)
-    :param n: The ngram order.
-    :type n: int
-    :return: BLEU's modified precision for the nth order ngram.
-    :rtype: Fraction
-    """
-    # Extracts all ngrams in hypothesis
-    # Set an empty Counter if hypothesis is empty.
-    counts = Counter(ngrams(hypothesis, n)) if len(hypothesis) >= n else Counter()
-    # Extract a union of references' counts.
-    # max_counts = reduce(or_, [Counter(ngrams(ref, n)) for ref in references])
-    max_counts = {}
-    for reference in references:
-        reference_counts = (
-            Counter(ngrams(reference, n)) if len(reference) >= n else Counter()
-        )
-        for ngram in counts:
-            max_counts[ngram] = max(max_counts.get(ngram, 0), reference_counts[ngram])
-
-    # Assigns the intersection between hypothesis and references' counts.
-    clipped_counts = {
-        ngram: min(count, max_counts[ngram]) for ngram, count in counts.items()
-    }
-
-    numerator = sum(clipped_counts.values())
-    # Ensures that denominator is minimum 1 to avoid ZeroDivisionError.
-    # Usually this happens when the ngram order is > len(reference).
-    denominator = max(1, sum(counts.values()))
-
-    return Fraction(numerator, denominator, _normalize=False)
-
-
-def closest_ref_length(references, hyp_len):
-    """
-    This function finds the reference that is the closest length to the
-    hypothesis. The closest reference length is referred to as *r* variable
-    from the brevity penalty formula in Papineni et. al. (2002)
-
-    :param references: A list of reference translations.
-    :type references: list(list(str))
-    :param hyp_len: The length of the hypothesis.
-    :type hyp_len: int
-    :return: The length of the reference that's closest to the hypothesis.
-    :rtype: int
-    """
-    ref_lens = (len(reference) for reference in references)
-    closest_ref_len = min(
-        ref_lens, key=lambda ref_len: (abs(ref_len - hyp_len), ref_len)
-    )
-    return closest_ref_len
-
-
-def brevity_penalty(closest_ref_len, hyp_len):
-    """
-    Calculate brevity penalty.
-
-    As the modified n-gram precision still has the problem from the short
-    length sentence, brevity penalty is used to modify the overall BLEU
-    score according to length.
-
-    An example from the paper. There are three references with length 12, 15
-    and 17. And a concise hypothesis of the length 12. The brevity penalty is 1.
-
-    >>> reference1 = list('aaaaaaaaaaaa')      # i.e. ['a'] * 12
-    >>> reference2 = list('aaaaaaaaaaaaaaa')   # i.e. ['a'] * 15
-    >>> reference3 = list('aaaaaaaaaaaaaaaaa') # i.e. ['a'] * 17
-    >>> hypothesis = list('aaaaaaaaaaaa')      # i.e. ['a'] * 12
-    >>> references = [reference1, reference2, reference3]
-    >>> hyp_len = len(hypothesis)
-    >>> closest_ref_len =  closest_ref_length(references, hyp_len)
-    >>> brevity_penalty(closest_ref_len, hyp_len)
-    1.0
-
-    In case a hypothesis translation is shorter than the references, penalty is
-    applied.
-
-    >>> references = [['a'] * 28, ['a'] * 28]
-    >>> hypothesis = ['a'] * 12
-    >>> hyp_len = len(hypothesis)
-    >>> closest_ref_len =  closest_ref_length(references, hyp_len)
-    >>> brevity_penalty(closest_ref_len, hyp_len)
-    0.2635971381157267
-
-    The length of the closest reference is used to compute the penalty. If the
-    length of a hypothesis is 12, and the reference lengths are 13 and 2, the
-    penalty is applied because the hypothesis length (12) is less then the
-    closest reference length (13).
-
-    >>> references = [['a'] * 13, ['a'] * 2]
-    >>> hypothesis = ['a'] * 12
-    >>> hyp_len = len(hypothesis)
-    >>> closest_ref_len =  closest_ref_length(references, hyp_len)
-    >>> brevity_penalty(closest_ref_len, hyp_len) # doctest: +ELLIPSIS
-    0.9200...
-
-    The brevity penalty doesn't depend on reference order. More importantly,
-    when two reference sentences are at the same distance, the shortest
-    reference sentence length is used.
-
-    >>> references = [['a'] * 13, ['a'] * 11]
-    >>> hypothesis = ['a'] * 12
-    >>> hyp_len = len(hypothesis)
-    >>> closest_ref_len =  closest_ref_length(references, hyp_len)
-    >>> bp1 = brevity_penalty(closest_ref_len, hyp_len)
-    >>> hyp_len = len(hypothesis)
-    >>> closest_ref_len =  closest_ref_length(reversed(references), hyp_len)
-    >>> bp2 = brevity_penalty(closest_ref_len, hyp_len)
-    >>> bp1 == bp2 == 1
-    True
-
-    A test example from mteval-v13a.pl (starting from the line 705):
-
-    >>> references = [['a'] * 11, ['a'] * 8]
-    >>> hypothesis = ['a'] * 7
-    >>> hyp_len = len(hypothesis)
-    >>> closest_ref_len =  closest_ref_length(references, hyp_len)
-    >>> brevity_penalty(closest_ref_len, hyp_len) # doctest: +ELLIPSIS
-    0.8668...
-
-    >>> references = [['a'] * 11, ['a'] * 8, ['a'] * 6, ['a'] * 7]
-    >>> hypothesis = ['a'] * 7
-    >>> hyp_len = len(hypothesis)
-    >>> closest_ref_len =  closest_ref_length(references, hyp_len)
-    >>> brevity_penalty(closest_ref_len, hyp_len)
-    1.0
-
-    :param hyp_len: The length of the hypothesis for a single sentence OR the
-        sum of all the hypotheses' lengths for a corpus
-    :type hyp_len: int
-    :param closest_ref_len: The length of the closest reference for a single
-        hypothesis OR the sum of all the closest references for every hypotheses.
-    :type closest_ref_len: int
-    :return: BLEU's brevity penalty.
-    :rtype: float
-    """
-    if hyp_len > closest_ref_len:
-        return 1
-    # If hypothesis is empty, brevity penalty = 0 should result in BLEU = 0.0
-    elif hyp_len == 0:
-        return 0
-    else:
-        return math.exp(1 - closest_ref_len / hyp_len)
-
-
-class SmoothingFunction:
-    """
-    This is an implementation of the smoothing techniques
-    for segment-level BLEU scores that was presented in
-    Boxing Chen and Collin Cherry (2014) A Systematic Comparison of
-    Smoothing Techniques for Sentence-Level BLEU. In WMT14.
-    http://acl2014.org/acl2014/W14-33/pdf/W14-3346.pdf
-    """
-
-    def __init__(self, epsilon=0.1, alpha=5, k=5):
-        """
-        This will initialize the parameters required for the various smoothing
-        techniques, the default values are set to the numbers used in the
-        experiments from Chen and Cherry (2014).
-
-        >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which', 'ensures',
-        ...                 'that', 'the', 'military', 'always', 'obeys', 'the',
-        ...                 'commands', 'of', 'the', 'party']
-        >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that', 'ensures',
-        ...               'that', 'the', 'military', 'will', 'forever', 'heed',
-        ...               'Party', 'commands']
-
-        >>> chencherry = SmoothingFunction()
-        >>> print(sentence_bleu([reference1], hypothesis1)) # doctest: +ELLIPSIS
-        0.4118...
-        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method0)) # doctest: +ELLIPSIS
-        0.4118...
-        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method1)) # doctest: +ELLIPSIS
-        0.4118...
-        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method2)) # doctest: +ELLIPSIS
-        0.4452...
-        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method3)) # doctest: +ELLIPSIS
-        0.4118...
-        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method4)) # doctest: +ELLIPSIS
-        0.4118...
-        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method5)) # doctest: +ELLIPSIS
-        0.4905...
-        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method6)) # doctest: +ELLIPSIS
-        0.4135...
-        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method7)) # doctest: +ELLIPSIS
-        0.4905...
-
-        :param epsilon: the epsilon value use in method 1
-        :type epsilon: float
-        :param alpha: the alpha value use in method 6
-        :type alpha: int
-        :param k: the k value use in method 4
-        :type k: int
-        """
-        self.epsilon = epsilon
-        self.alpha = alpha
-        self.k = k
-
-    def method0(self, p_n, *args, **kwargs):
-        """
-        No smoothing.
-        """
-        p_n_new = []
-        for i, p_i in enumerate(p_n):
-            if p_i.numerator != 0:
-                p_n_new.append(p_i)
-            else:
-                _msg = str(
-                    "\nThe hypothesis contains 0 counts of {}-gram overlaps.\n"
-                    "Therefore the BLEU score evaluates to 0, independently of\n"
-                    "how many N-gram overlaps of lower order it contains.\n"
-                    "Consider using lower n-gram order or use "
-                    "SmoothingFunction()"
-                ).format(i + 1)
-                warnings.warn(_msg)
-                # When numerator==0 where denonminator==0 or !=0, the result
-                # for the precision score should be equal to 0 or undefined.
-                # Due to BLEU geometric mean computation in logarithm space,
-                # we we need to take the return sys.float_info.min such that
-                # math.log(sys.float_info.min) returns a 0 precision score.
-                p_n_new.append(sys.float_info.min)
-        return p_n_new
-
-    def method1(self, p_n, *args, **kwargs):
-        """
-        Smoothing method 1: Add *epsilon* counts to precision with 0 counts.
-        """
-        return [
-            (p_i.numerator + self.epsilon) / p_i.denominator
-            if p_i.numerator == 0
-            else p_i
-            for p_i in p_n
-        ]
-
-    def method2(self, p_n, *args, **kwargs):
-        """
-        Smoothing method 2: Add 1 to both numerator and denominator from
-        Chin-Yew Lin and Franz Josef Och (2004) ORANGE: a Method for
-        Evaluating Automatic Evaluation Metrics for Machine Translation.
-        In COLING 2004.
-        """
-        return [
-            Fraction(p_n[i].numerator + 1, p_n[i].denominator + 1, _normalize=False)
-            if i != 0
-            else p_n[0]
-            for i in range(len(p_n))
-        ]
-
-    def method3(self, p_n, *args, **kwargs):
-        """
-        Smoothing method 3: NIST geometric sequence smoothing
-        The smoothing is computed by taking 1 / ( 2^k ), instead of 0, for each
-        precision score whose matching n-gram count is null.
-        k is 1 for the first 'n' value for which the n-gram match count is null/
-
-        For example, if the text contains:
-
-        - one 2-gram match
-        - and (consequently) two 1-gram matches
-
-        the n-gram count for each individual precision score would be:
-
-        - n=1  =>  prec_count = 2     (two unigrams)
-        - n=2  =>  prec_count = 1     (one bigram)
-        - n=3  =>  prec_count = 1/2   (no trigram,  taking 'smoothed' value of 1 / ( 2^k ), with k=1)
-        - n=4  =>  prec_count = 1/4   (no fourgram, taking 'smoothed' value of 1 / ( 2^k ), with k=2)
-        """
-        incvnt = 1  # From the mteval-v13a.pl, it's referred to as k.
-        for i, p_i in enumerate(p_n):
-            if p_i.numerator == 0:
-                p_n[i] = 1 / (2**incvnt * p_i.denominator)
-                incvnt += 1
-        return p_n
-
-    def method4(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
-        """
-        Smoothing method 4:
-        Shorter translations may have inflated precision values due to having
-        smaller denominators; therefore, we give them proportionally
-        smaller smoothed counts. Instead of scaling to 1/(2^k), Chen and Cherry
-        suggests dividing by 1/ln(len(T)), where T is the length of the translation.
-        """
-        incvnt = 1
-        hyp_len = hyp_len if hyp_len else len(hypothesis)
-        for i, p_i in enumerate(p_n):
-            if p_i.numerator == 0 and hyp_len > 1:
-                # incvnt = i + 1 * self.k / math.log(
-                #     hyp_len
-                # )  # Note that this K is different from the K from NIST.
-                # p_n[i] = incvnt / p_i.denominator\
-                numerator = 1 / (2**incvnt * self.k / math.log(hyp_len))
-                p_n[i] = numerator / p_i.denominator
-                incvnt += 1
-        return p_n
-
-    def method5(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
-        """
-        Smoothing method 5:
-        The matched counts for similar values of n should be similar. To a
-        calculate the n-gram matched count, it averages the n−1, n and n+1 gram
-        matched counts.
-        """
-        hyp_len = hyp_len if hyp_len else len(hypothesis)
-        m = {}
-        # Requires an precision value for an addition ngram order.
-        p_n_plus1 = p_n + [modified_precision(references, hypothesis, 5)]
-        m[-1] = p_n[0] + 1
-        for i, p_i in enumerate(p_n):
-            p_n[i] = (m[i - 1] + p_i + p_n_plus1[i + 1]) / 3
-            m[i] = p_n[i]
-        return p_n
-
-    def method6(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
-        """
-        Smoothing method 6:
-        Interpolates the maximum likelihood estimate of the precision *p_n* with
-        a prior estimate *pi0*. The prior is estimated by assuming that the ratio
-        between pn and pn−1 will be the same as that between pn−1 and pn−2; from
-        Gao and He (2013) Training MRF-Based Phrase Translation Models using
-        Gradient Ascent. In NAACL.
-        """
-        hyp_len = hyp_len if hyp_len else len(hypothesis)
-        # This smoothing only works when p_1 and p_2 is non-zero.
-        # Raise an error with an appropriate message when the input is too short
-        # to use this smoothing technique.
-        assert p_n[2], "This smoothing method requires non-zero precision for bigrams."
-        for i, p_i in enumerate(p_n):
-            if i in [0, 1]:  # Skips the first 2 orders of ngrams.
-                continue
-            else:
-                pi0 = 0 if p_n[i - 2] == 0 else p_n[i - 1] ** 2 / p_n[i - 2]
-                # No. of ngrams in translation that matches the reference.
-                m = p_i.numerator
-                # No. of ngrams in translation.
-                l = sum(1 for _ in ngrams(hypothesis, i + 1))
-                # Calculates the interpolated precision.
-                p_n[i] = (m + self.alpha * pi0) / (l + self.alpha)
-        return p_n
-
-    def method7(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
-        """
-        Smoothing method 7:
-        Interpolates methods 4 and 5.
-        """
-        hyp_len = hyp_len if hyp_len else len(hypothesis)
-        p_n = self.method4(p_n, references, hypothesis, hyp_len)
-        p_n = self.method5(p_n, references, hypothesis, hyp_len)
-        return p_n
+# Natural Language Toolkit: BLEU Score
+#
+# Copyright (C) 2001-2023 NLTK Project
+# Authors: Chin Yee Lee, Hengfeng Li, Ruxin Hou, Calvin Tanujaya Lim
+# Contributors: Björn Mattsson, Dmitrijs Milajevs, Liling Tan
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""BLEU score implementation."""
+import math
+import sys
+import warnings
+from collections import Counter
+from fractions import Fraction as _Fraction
+
+from nltk.util import ngrams
+
+
+class Fraction(_Fraction):
+    """Fraction with _normalize=False support for 3.12"""
+
+    def __new__(cls, numerator=0, denominator=None, _normalize=False):
+        if sys.version_info >= (3, 12):
+            self = super().__new__(cls, numerator, denominator)
+        else:
+            self = super().__new__(cls, numerator, denominator, _normalize=_normalize)
+        self._normalize = _normalize
+        self._original_numerator = numerator
+        self._original_denominator = denominator
+        return self
+
+    @property
+    def numerator(self):
+        if not self._normalize:
+            return self._original_numerator
+        return super().numerator
+
+    @property
+    def denominator(self):
+        if not self._normalize:
+            return self._original_denominator
+        return super().denominator
+
+
+def sentence_bleu(
+    references,
+    hypothesis,
+    weights=(0.25, 0.25, 0.25, 0.25),
+    smoothing_function=None,
+    auto_reweigh=False,
+):
+    """
+    Calculate BLEU score (Bilingual Evaluation Understudy) from
+    Papineni, Kishore, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002.
+    "BLEU: a method for automatic evaluation of machine translation."
+    In Proceedings of ACL. https://www.aclweb.org/anthology/P02-1040.pdf
+
+    >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
+    ...               'ensures', 'that', 'the', 'military', 'always',
+    ...               'obeys', 'the', 'commands', 'of', 'the', 'party']
+
+    >>> hypothesis2 = ['It', 'is', 'to', 'insure', 'the', 'troops',
+    ...               'forever', 'hearing', 'the', 'activity', 'guidebook',
+    ...               'that', 'party', 'direct']
+
+    >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
+    ...               'ensures', 'that', 'the', 'military', 'will', 'forever',
+    ...               'heed', 'Party', 'commands']
+
+    >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which',
+    ...               'guarantees', 'the', 'military', 'forces', 'always',
+    ...               'being', 'under', 'the', 'command', 'of', 'the',
+    ...               'Party']
+
+    >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
+    ...               'army', 'always', 'to', 'heed', 'the', 'directions',
+    ...               'of', 'the', 'party']
+
+    >>> sentence_bleu([reference1, reference2, reference3], hypothesis1) # doctest: +ELLIPSIS
+    0.5045...
+
+    If there is no ngrams overlap for any order of n-grams, BLEU returns the
+    value 0. This is because the precision for the order of n-grams without
+    overlap is 0, and the geometric mean in the final BLEU score computation
+    multiplies the 0 with the precision of other n-grams. This results in 0
+    (independently of the precision of the other n-gram orders). The following
+    example has zero 3-gram and 4-gram overlaps:
+
+    >>> round(sentence_bleu([reference1, reference2, reference3], hypothesis2),4) # doctest: +ELLIPSIS
+    0.0
+
+    To avoid this harsh behaviour when no ngram overlaps are found a smoothing
+    function can be used.
+
+    >>> chencherry = SmoothingFunction()
+    >>> sentence_bleu([reference1, reference2, reference3], hypothesis2,
+    ...     smoothing_function=chencherry.method1) # doctest: +ELLIPSIS
+    0.0370...
+
+    The default BLEU calculates a score for up to 4-grams using uniform
+    weights (this is called BLEU-4). To evaluate your translations with
+    higher/lower order ngrams, use customized weights. E.g. when accounting
+    for up to 5-grams with uniform weights (this is called BLEU-5) use:
+
+    >>> weights = (1./5., 1./5., 1./5., 1./5., 1./5.)
+    >>> sentence_bleu([reference1, reference2, reference3], hypothesis1, weights) # doctest: +ELLIPSIS
+    0.3920...
+
+    Multiple BLEU scores can be computed at once, by supplying a list of weights.
+    E.g. for computing BLEU-2, BLEU-3 *and* BLEU-4 in one computation, use:
+    >>> weights = [
+    ...     (1./2., 1./2.),
+    ...     (1./3., 1./3., 1./3.),
+    ...     (1./4., 1./4., 1./4., 1./4.)
+    ... ]
+    >>> sentence_bleu([reference1, reference2, reference3], hypothesis1, weights) # doctest: +ELLIPSIS
+    [0.7453..., 0.6240..., 0.5045...]
+
+    :param references: reference sentences
+    :type references: list(list(str))
+    :param hypothesis: a hypothesis sentence
+    :type hypothesis: list(str)
+    :param weights: weights for unigrams, bigrams, trigrams and so on (one or a list of weights)
+    :type weights: tuple(float) / list(tuple(float))
+    :param smoothing_function:
+    :type smoothing_function: SmoothingFunction
+    :param auto_reweigh: Option to re-normalize the weights uniformly.
+    :type auto_reweigh: bool
+    :return: The sentence-level BLEU score. Returns a list if multiple weights were supplied.
+    :rtype: float / list(float)
+    """
+    return corpus_bleu(
+        [references], [hypothesis], weights, smoothing_function, auto_reweigh
+    )
+
+
+def corpus_bleu(
+    list_of_references,
+    hypotheses,
+    weights=(0.25, 0.25, 0.25, 0.25),
+    smoothing_function=None,
+    auto_reweigh=False,
+):
+    """
+    Calculate a single corpus-level BLEU score (aka. system-level BLEU) for all
+    the hypotheses and their respective references.
+
+    Instead of averaging the sentence level BLEU scores (i.e. macro-average
+    precision), the original BLEU metric (Papineni et al. 2002) accounts for
+    the micro-average precision (i.e. summing the numerators and denominators
+    for each hypothesis-reference(s) pairs before the division).
+
+    >>> hyp1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
+    ...         'ensures', 'that', 'the', 'military', 'always',
+    ...         'obeys', 'the', 'commands', 'of', 'the', 'party']
+    >>> ref1a = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
+    ...          'ensures', 'that', 'the', 'military', 'will', 'forever',
+    ...          'heed', 'Party', 'commands']
+    >>> ref1b = ['It', 'is', 'the', 'guiding', 'principle', 'which',
+    ...          'guarantees', 'the', 'military', 'forces', 'always',
+    ...          'being', 'under', 'the', 'command', 'of', 'the', 'Party']
+    >>> ref1c = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
+    ...          'army', 'always', 'to', 'heed', 'the', 'directions',
+    ...          'of', 'the', 'party']
+
+    >>> hyp2 = ['he', 'read', 'the', 'book', 'because', 'he', 'was',
+    ...         'interested', 'in', 'world', 'history']
+    >>> ref2a = ['he', 'was', 'interested', 'in', 'world', 'history',
+    ...          'because', 'he', 'read', 'the', 'book']
+
+    >>> list_of_references = [[ref1a, ref1b, ref1c], [ref2a]]
+    >>> hypotheses = [hyp1, hyp2]
+    >>> corpus_bleu(list_of_references, hypotheses) # doctest: +ELLIPSIS
+    0.5920...
+
+    The example below show that corpus_bleu() is different from averaging
+    sentence_bleu() for hypotheses
+
+    >>> score1 = sentence_bleu([ref1a, ref1b, ref1c], hyp1)
+    >>> score2 = sentence_bleu([ref2a], hyp2)
+    >>> (score1 + score2) / 2 # doctest: +ELLIPSIS
+    0.6223...
+
+    Custom weights may be supplied to fine-tune the BLEU score further.
+    A tuple of float weights for unigrams, bigrams, trigrams and so on can be given.
+    >>> weights = (0.1, 0.3, 0.5, 0.1)
+    >>> corpus_bleu(list_of_references, hypotheses, weights=weights) # doctest: +ELLIPSIS
+    0.5818...
+
+    This particular weight gave extra value to trigrams.
+    Furthermore, multiple weights can be given, resulting in multiple BLEU scores.
+    >>> weights = [
+    ...     (0.5, 0.5),
+    ...     (0.333, 0.333, 0.334),
+    ...     (0.25, 0.25, 0.25, 0.25),
+    ...     (0.2, 0.2, 0.2, 0.2, 0.2)
+    ... ]
+    >>> corpus_bleu(list_of_references, hypotheses, weights=weights) # doctest: +ELLIPSIS
+    [0.8242..., 0.7067..., 0.5920..., 0.4719...]
+
+    :param list_of_references: a corpus of lists of reference sentences, w.r.t. hypotheses
+    :type list_of_references: list(list(list(str)))
+    :param hypotheses: a list of hypothesis sentences
+    :type hypotheses: list(list(str))
+    :param weights: weights for unigrams, bigrams, trigrams and so on (one or a list of weights)
+    :type weights: tuple(float) / list(tuple(float))
+    :param smoothing_function:
+    :type smoothing_function: SmoothingFunction
+    :param auto_reweigh: Option to re-normalize the weights uniformly.
+    :type auto_reweigh: bool
+    :return: The corpus-level BLEU score.
+    :rtype: float
+    """
+    # Before proceeding to compute BLEU, perform sanity checks.
+
+    p_numerators = Counter()  # Key = ngram order, and value = no. of ngram matches.
+    p_denominators = Counter()  # Key = ngram order, and value = no. of ngram in ref.
+    hyp_lengths, ref_lengths = 0, 0
+
+    assert len(list_of_references) == len(hypotheses), (
+        "The number of hypotheses and their reference(s) should be the " "same "
+    )
+
+    try:
+        weights[0][0]
+    except TypeError:
+        weights = [weights]
+    max_weight_length = max(len(weight) for weight in weights)
+
+    # Iterate through each hypothesis and their corresponding references.
+    for references, hypothesis in zip(list_of_references, hypotheses):
+        # For each order of ngram, calculate the numerator and
+        # denominator for the corpus-level modified precision.
+        for i in range(1, max_weight_length + 1):
+            p_i = modified_precision(references, hypothesis, i)
+            p_numerators[i] += p_i.numerator
+            p_denominators[i] += p_i.denominator
+
+        # Calculate the hypothesis length and the closest reference length.
+        # Adds them to the corpus-level hypothesis and reference counts.
+        hyp_len = len(hypothesis)
+        hyp_lengths += hyp_len
+        ref_lengths += closest_ref_length(references, hyp_len)
+
+    # Calculate corpus-level brevity penalty.
+    bp = brevity_penalty(ref_lengths, hyp_lengths)
+
+    # Collects the various precision values for the different ngram orders.
+    p_n = [
+        Fraction(p_numerators[i], p_denominators[i], _normalize=False)
+        for i in range(1, max_weight_length + 1)
+    ]
+
+    # Returns 0 if there's no matching n-grams
+    # We only need to check for p_numerators[1] == 0, since if there's
+    # no unigrams, there won't be any higher order ngrams.
+    if p_numerators[1] == 0:
+        return 0 if len(weights) == 1 else [0] * len(weights)
+
+    # If there's no smoothing, set use method0 from SmoothinFunction class.
+    if not smoothing_function:
+        smoothing_function = SmoothingFunction().method0
+    # Smoothen the modified precision.
+    # Note: smoothing_function() may convert values into floats;
+    #       it tries to retain the Fraction object as much as the
+    #       smoothing method allows.
+    p_n = smoothing_function(
+        p_n, references=references, hypothesis=hypothesis, hyp_len=hyp_lengths
+    )
+
+    bleu_scores = []
+    for weight in weights:
+        # Uniformly re-weighting based on maximum hypothesis lengths if largest
+        # order of n-grams < 4 and weights is set at default.
+        if auto_reweigh:
+            if hyp_lengths < 4 and weight == (0.25, 0.25, 0.25, 0.25):
+                weight = (1 / hyp_lengths,) * hyp_lengths
+
+        s = (w_i * math.log(p_i) for w_i, p_i in zip(weight, p_n) if p_i > 0)
+        s = bp * math.exp(math.fsum(s))
+        bleu_scores.append(s)
+    return bleu_scores[0] if len(weights) == 1 else bleu_scores
+
+
+def modified_precision(references, hypothesis, n):
+    """
+    Calculate modified ngram precision.
+
+    The normal precision method may lead to some wrong translations with
+    high-precision, e.g., the translation, in which a word of reference
+    repeats several times, has very high precision.
+
+    This function only returns the Fraction object that contains the numerator
+    and denominator necessary to calculate the corpus-level precision.
+    To calculate the modified precision for a single pair of hypothesis and
+    references, cast the Fraction object into a float.
+
+    The famous "the the the ... " example shows that you can get BLEU precision
+    by duplicating high frequency words.
+
+        >>> reference1 = 'the cat is on the mat'.split()
+        >>> reference2 = 'there is a cat on the mat'.split()
+        >>> hypothesis1 = 'the the the the the the the'.split()
+        >>> references = [reference1, reference2]
+        >>> float(modified_precision(references, hypothesis1, n=1)) # doctest: +ELLIPSIS
+        0.2857...
+
+    In the modified n-gram precision, a reference word will be considered
+    exhausted after a matching hypothesis word is identified, e.g.
+
+        >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
+        ...               'ensures', 'that', 'the', 'military', 'will',
+        ...               'forever', 'heed', 'Party', 'commands']
+        >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which',
+        ...               'guarantees', 'the', 'military', 'forces', 'always',
+        ...               'being', 'under', 'the', 'command', 'of', 'the',
+        ...               'Party']
+        >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
+        ...               'army', 'always', 'to', 'heed', 'the', 'directions',
+        ...               'of', 'the', 'party']
+        >>> hypothesis = 'of the'.split()
+        >>> references = [reference1, reference2, reference3]
+        >>> float(modified_precision(references, hypothesis, n=1))
+        1.0
+        >>> float(modified_precision(references, hypothesis, n=2))
+        1.0
+
+    An example of a normal machine translation hypothesis:
+
+        >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
+        ...               'ensures', 'that', 'the', 'military', 'always',
+        ...               'obeys', 'the', 'commands', 'of', 'the', 'party']
+
+        >>> hypothesis2 = ['It', 'is', 'to', 'insure', 'the', 'troops',
+        ...               'forever', 'hearing', 'the', 'activity', 'guidebook',
+        ...               'that', 'party', 'direct']
+
+        >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
+        ...               'ensures', 'that', 'the', 'military', 'will',
+        ...               'forever', 'heed', 'Party', 'commands']
+
+        >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which',
+        ...               'guarantees', 'the', 'military', 'forces', 'always',
+        ...               'being', 'under', 'the', 'command', 'of', 'the',
+        ...               'Party']
+
+        >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
+        ...               'army', 'always', 'to', 'heed', 'the', 'directions',
+        ...               'of', 'the', 'party']
+        >>> references = [reference1, reference2, reference3]
+        >>> float(modified_precision(references, hypothesis1, n=1)) # doctest: +ELLIPSIS
+        0.9444...
+        >>> float(modified_precision(references, hypothesis2, n=1)) # doctest: +ELLIPSIS
+        0.5714...
+        >>> float(modified_precision(references, hypothesis1, n=2)) # doctest: +ELLIPSIS
+        0.5882352941176471
+        >>> float(modified_precision(references, hypothesis2, n=2)) # doctest: +ELLIPSIS
+        0.07692...
+
+
+    :param references: A list of reference translations.
+    :type references: list(list(str))
+    :param hypothesis: A hypothesis translation.
+    :type hypothesis: list(str)
+    :param n: The ngram order.
+    :type n: int
+    :return: BLEU's modified precision for the nth order ngram.
+    :rtype: Fraction
+    """
+    # Extracts all ngrams in hypothesis
+    # Set an empty Counter if hypothesis is empty.
+    counts = Counter(ngrams(hypothesis, n)) if len(hypothesis) >= n else Counter()
+    # Extract a union of references' counts.
+    # max_counts = reduce(or_, [Counter(ngrams(ref, n)) for ref in references])
+    max_counts = {}
+    for reference in references:
+        reference_counts = (
+            Counter(ngrams(reference, n)) if len(reference) >= n else Counter()
+        )
+        for ngram in counts:
+            max_counts[ngram] = max(max_counts.get(ngram, 0), reference_counts[ngram])
+
+    # Assigns the intersection between hypothesis and references' counts.
+    clipped_counts = {
+        ngram: min(count, max_counts[ngram]) for ngram, count in counts.items()
+    }
+
+    numerator = sum(clipped_counts.values())
+    # Ensures that denominator is minimum 1 to avoid ZeroDivisionError.
+    # Usually this happens when the ngram order is > len(reference).
+    denominator = max(1, sum(counts.values()))
+
+    return Fraction(numerator, denominator, _normalize=False)
+
+
+def closest_ref_length(references, hyp_len):
+    """
+    This function finds the reference that is the closest length to the
+    hypothesis. The closest reference length is referred to as *r* variable
+    from the brevity penalty formula in Papineni et. al. (2002)
+
+    :param references: A list of reference translations.
+    :type references: list(list(str))
+    :param hyp_len: The length of the hypothesis.
+    :type hyp_len: int
+    :return: The length of the reference that's closest to the hypothesis.
+    :rtype: int
+    """
+    ref_lens = (len(reference) for reference in references)
+    closest_ref_len = min(
+        ref_lens, key=lambda ref_len: (abs(ref_len - hyp_len), ref_len)
+    )
+    return closest_ref_len
+
+
+def brevity_penalty(closest_ref_len, hyp_len):
+    """
+    Calculate brevity penalty.
+
+    As the modified n-gram precision still has the problem from the short
+    length sentence, brevity penalty is used to modify the overall BLEU
+    score according to length.
+
+    An example from the paper. There are three references with length 12, 15
+    and 17. And a concise hypothesis of the length 12. The brevity penalty is 1.
+
+    >>> reference1 = list('aaaaaaaaaaaa')      # i.e. ['a'] * 12
+    >>> reference2 = list('aaaaaaaaaaaaaaa')   # i.e. ['a'] * 15
+    >>> reference3 = list('aaaaaaaaaaaaaaaaa') # i.e. ['a'] * 17
+    >>> hypothesis = list('aaaaaaaaaaaa')      # i.e. ['a'] * 12
+    >>> references = [reference1, reference2, reference3]
+    >>> hyp_len = len(hypothesis)
+    >>> closest_ref_len =  closest_ref_length(references, hyp_len)
+    >>> brevity_penalty(closest_ref_len, hyp_len)
+    1.0
+
+    In case a hypothesis translation is shorter than the references, penalty is
+    applied.
+
+    >>> references = [['a'] * 28, ['a'] * 28]
+    >>> hypothesis = ['a'] * 12
+    >>> hyp_len = len(hypothesis)
+    >>> closest_ref_len =  closest_ref_length(references, hyp_len)
+    >>> brevity_penalty(closest_ref_len, hyp_len)
+    0.2635971381157267
+
+    The length of the closest reference is used to compute the penalty. If the
+    length of a hypothesis is 12, and the reference lengths are 13 and 2, the
+    penalty is applied because the hypothesis length (12) is less then the
+    closest reference length (13).
+
+    >>> references = [['a'] * 13, ['a'] * 2]
+    >>> hypothesis = ['a'] * 12
+    >>> hyp_len = len(hypothesis)
+    >>> closest_ref_len =  closest_ref_length(references, hyp_len)
+    >>> brevity_penalty(closest_ref_len, hyp_len) # doctest: +ELLIPSIS
+    0.9200...
+
+    The brevity penalty doesn't depend on reference order. More importantly,
+    when two reference sentences are at the same distance, the shortest
+    reference sentence length is used.
+
+    >>> references = [['a'] * 13, ['a'] * 11]
+    >>> hypothesis = ['a'] * 12
+    >>> hyp_len = len(hypothesis)
+    >>> closest_ref_len =  closest_ref_length(references, hyp_len)
+    >>> bp1 = brevity_penalty(closest_ref_len, hyp_len)
+    >>> hyp_len = len(hypothesis)
+    >>> closest_ref_len =  closest_ref_length(reversed(references), hyp_len)
+    >>> bp2 = brevity_penalty(closest_ref_len, hyp_len)
+    >>> bp1 == bp2 == 1
+    True
+
+    A test example from mteval-v13a.pl (starting from the line 705):
+
+    >>> references = [['a'] * 11, ['a'] * 8]
+    >>> hypothesis = ['a'] * 7
+    >>> hyp_len = len(hypothesis)
+    >>> closest_ref_len =  closest_ref_length(references, hyp_len)
+    >>> brevity_penalty(closest_ref_len, hyp_len) # doctest: +ELLIPSIS
+    0.8668...
+
+    >>> references = [['a'] * 11, ['a'] * 8, ['a'] * 6, ['a'] * 7]
+    >>> hypothesis = ['a'] * 7
+    >>> hyp_len = len(hypothesis)
+    >>> closest_ref_len =  closest_ref_length(references, hyp_len)
+    >>> brevity_penalty(closest_ref_len, hyp_len)
+    1.0
+
+    :param hyp_len: The length of the hypothesis for a single sentence OR the
+        sum of all the hypotheses' lengths for a corpus
+    :type hyp_len: int
+    :param closest_ref_len: The length of the closest reference for a single
+        hypothesis OR the sum of all the closest references for every hypotheses.
+    :type closest_ref_len: int
+    :return: BLEU's brevity penalty.
+    :rtype: float
+    """
+    if hyp_len > closest_ref_len:
+        return 1
+    # If hypothesis is empty, brevity penalty = 0 should result in BLEU = 0.0
+    elif hyp_len == 0:
+        return 0
+    else:
+        return math.exp(1 - closest_ref_len / hyp_len)
+
+
+class SmoothingFunction:
+    """
+    This is an implementation of the smoothing techniques
+    for segment-level BLEU scores that was presented in
+    Boxing Chen and Collin Cherry (2014) A Systematic Comparison of
+    Smoothing Techniques for Sentence-Level BLEU. In WMT14.
+    http://acl2014.org/acl2014/W14-33/pdf/W14-3346.pdf
+    """
+
+    def __init__(self, epsilon=0.1, alpha=5, k=5):
+        """
+        This will initialize the parameters required for the various smoothing
+        techniques, the default values are set to the numbers used in the
+        experiments from Chen and Cherry (2014).
+
+        >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which', 'ensures',
+        ...                 'that', 'the', 'military', 'always', 'obeys', 'the',
+        ...                 'commands', 'of', 'the', 'party']
+        >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that', 'ensures',
+        ...               'that', 'the', 'military', 'will', 'forever', 'heed',
+        ...               'Party', 'commands']
+
+        >>> chencherry = SmoothingFunction()
+        >>> print(sentence_bleu([reference1], hypothesis1)) # doctest: +ELLIPSIS
+        0.4118...
+        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method0)) # doctest: +ELLIPSIS
+        0.4118...
+        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method1)) # doctest: +ELLIPSIS
+        0.4118...
+        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method2)) # doctest: +ELLIPSIS
+        0.4452...
+        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method3)) # doctest: +ELLIPSIS
+        0.4118...
+        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method4)) # doctest: +ELLIPSIS
+        0.4118...
+        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method5)) # doctest: +ELLIPSIS
+        0.4905...
+        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method6)) # doctest: +ELLIPSIS
+        0.4135...
+        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method7)) # doctest: +ELLIPSIS
+        0.4905...
+
+        :param epsilon: the epsilon value use in method 1
+        :type epsilon: float
+        :param alpha: the alpha value use in method 6
+        :type alpha: int
+        :param k: the k value use in method 4
+        :type k: int
+        """
+        self.epsilon = epsilon
+        self.alpha = alpha
+        self.k = k
+
+    def method0(self, p_n, *args, **kwargs):
+        """
+        No smoothing.
+        """
+        p_n_new = []
+        for i, p_i in enumerate(p_n):
+            if p_i.numerator != 0:
+                p_n_new.append(p_i)
+            else:
+                _msg = str(
+                    "\nThe hypothesis contains 0 counts of {}-gram overlaps.\n"
+                    "Therefore the BLEU score evaluates to 0, independently of\n"
+                    "how many N-gram overlaps of lower order it contains.\n"
+                    "Consider using lower n-gram order or use "
+                    "SmoothingFunction()"
+                ).format(i + 1)
+                warnings.warn(_msg)
+                # When numerator==0 where denonminator==0 or !=0, the result
+                # for the precision score should be equal to 0 or undefined.
+                # Due to BLEU geometric mean computation in logarithm space,
+                # we we need to take the return sys.float_info.min such that
+                # math.log(sys.float_info.min) returns a 0 precision score.
+                p_n_new.append(sys.float_info.min)
+        return p_n_new
+
+    def method1(self, p_n, *args, **kwargs):
+        """
+        Smoothing method 1: Add *epsilon* counts to precision with 0 counts.
+        """
+        return [
+            (p_i.numerator + self.epsilon) / p_i.denominator
+            if p_i.numerator == 0
+            else p_i
+            for p_i in p_n
+        ]
+
+    def method2(self, p_n, *args, **kwargs):
+        """
+        Smoothing method 2: Add 1 to both numerator and denominator from
+        Chin-Yew Lin and Franz Josef Och (2004) ORANGE: a Method for
+        Evaluating Automatic Evaluation Metrics for Machine Translation.
+        In COLING 2004.
+        """
+        return [
+            Fraction(p_n[i].numerator + 1, p_n[i].denominator + 1, _normalize=False)
+            if i != 0
+            else p_n[0]
+            for i in range(len(p_n))
+        ]
+
+    def method3(self, p_n, *args, **kwargs):
+        """
+        Smoothing method 3: NIST geometric sequence smoothing
+        The smoothing is computed by taking 1 / ( 2^k ), instead of 0, for each
+        precision score whose matching n-gram count is null.
+        k is 1 for the first 'n' value for which the n-gram match count is null/
+
+        For example, if the text contains:
+
+        - one 2-gram match
+        - and (consequently) two 1-gram matches
+
+        the n-gram count for each individual precision score would be:
+
+        - n=1  =>  prec_count = 2     (two unigrams)
+        - n=2  =>  prec_count = 1     (one bigram)
+        - n=3  =>  prec_count = 1/2   (no trigram,  taking 'smoothed' value of 1 / ( 2^k ), with k=1)
+        - n=4  =>  prec_count = 1/4   (no fourgram, taking 'smoothed' value of 1 / ( 2^k ), with k=2)
+        """
+        incvnt = 1  # From the mteval-v13a.pl, it's referred to as k.
+        for i, p_i in enumerate(p_n):
+            if p_i.numerator == 0:
+                p_n[i] = 1 / (2**incvnt * p_i.denominator)
+                incvnt += 1
+        return p_n
+
+    def method4(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
+        """
+        Smoothing method 4:
+        Shorter translations may have inflated precision values due to having
+        smaller denominators; therefore, we give them proportionally
+        smaller smoothed counts. Instead of scaling to 1/(2^k), Chen and Cherry
+        suggests dividing by 1/ln(len(T)), where T is the length of the translation.
+        """
+        incvnt = 1
+        hyp_len = hyp_len if hyp_len else len(hypothesis)
+        for i, p_i in enumerate(p_n):
+            if p_i.numerator == 0 and hyp_len > 1:
+                # incvnt = i + 1 * self.k / math.log(
+                #     hyp_len
+                # )  # Note that this K is different from the K from NIST.
+                # p_n[i] = incvnt / p_i.denominator\
+                numerator = 1 / (2**incvnt * self.k / math.log(hyp_len))
+                p_n[i] = numerator / p_i.denominator
+                incvnt += 1
+        return p_n
+
+    def method5(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
+        """
+        Smoothing method 5:
+        The matched counts for similar values of n should be similar. To a
+        calculate the n-gram matched count, it averages the n−1, n and n+1 gram
+        matched counts.
+        """
+        hyp_len = hyp_len if hyp_len else len(hypothesis)
+        m = {}
+        # Requires an precision value for an addition ngram order.
+        p_n_plus1 = p_n + [modified_precision(references, hypothesis, 5)]
+        m[-1] = p_n[0] + 1
+        for i, p_i in enumerate(p_n):
+            p_n[i] = (m[i - 1] + p_i + p_n_plus1[i + 1]) / 3
+            m[i] = p_n[i]
+        return p_n
+
+    def method6(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
+        """
+        Smoothing method 6:
+        Interpolates the maximum likelihood estimate of the precision *p_n* with
+        a prior estimate *pi0*. The prior is estimated by assuming that the ratio
+        between pn and pn−1 will be the same as that between pn−1 and pn−2; from
+        Gao and He (2013) Training MRF-Based Phrase Translation Models using
+        Gradient Ascent. In NAACL.
+        """
+        hyp_len = hyp_len if hyp_len else len(hypothesis)
+        # This smoothing only works when p_1 and p_2 is non-zero.
+        # Raise an error with an appropriate message when the input is too short
+        # to use this smoothing technique.
+        assert p_n[2], "This smoothing method requires non-zero precision for bigrams."
+        for i, p_i in enumerate(p_n):
+            if i in [0, 1]:  # Skips the first 2 orders of ngrams.
+                continue
+            else:
+                pi0 = 0 if p_n[i - 2] == 0 else p_n[i - 1] ** 2 / p_n[i - 2]
+                # No. of ngrams in translation that matches the reference.
+                m = p_i.numerator
+                # No. of ngrams in translation.
+                l = sum(1 for _ in ngrams(hypothesis, i + 1))
+                # Calculates the interpolated precision.
+                p_n[i] = (m + self.alpha * pi0) / (l + self.alpha)
+        return p_n
+
+    def method7(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
+        """
+        Smoothing method 7:
+        Interpolates methods 4 and 5.
+        """
+        hyp_len = hyp_len if hyp_len else len(hypothesis)
+        p_n = self.method4(p_n, references, hypothesis, hyp_len)
+        p_n = self.method5(p_n, references, hypothesis, hyp_len)
+        return p_n
Index: nltk-3.8.1/README.md
===================================================================
--- nltk-3.8.1.orig/README.md
+++ nltk-3.8.1/README.md
@@ -1,50 +1,50 @@
-# Natural Language Toolkit (NLTK)
-[![PyPI](https://img.shields.io/pypi/v/nltk.svg)](https://pypi.python.org/pypi/nltk)
-![CI](https://github.com/nltk/nltk/actions/workflows/ci.yaml/badge.svg?branch=develop)
-
-NLTK -- the Natural Language Toolkit -- is a suite of open source Python
-modules, data sets, and tutorials supporting research and development in Natural
-Language Processing. NLTK requires Python version 3.7, 3.8, 3.9, 3.10 or 3.11.
-
-For documentation, please visit [nltk.org](https://www.nltk.org/).
-
-
-## Contributing
-
-Do you want to contribute to NLTK development? Great!
-Please read [CONTRIBUTING.md](CONTRIBUTING.md) for more details.
-
-See also [how to contribute to NLTK](https://www.nltk.org/contribute.html).
-
-
-## Donate
-
-Have you found the toolkit helpful?  Please support NLTK development by donating
-to the project via PayPal, using the link on the NLTK homepage.
-
-
-## Citing
-
-If you publish work that uses NLTK, please cite the NLTK book, as follows:
-
-    Bird, Steven, Edward Loper and Ewan Klein (2009).
-    Natural Language Processing with Python.  O'Reilly Media Inc.
-
-
-## Copyright
-
-Copyright (C) 2001-2023 NLTK Project
-
-For license information, see [LICENSE.txt](LICENSE.txt).
-
-[AUTHORS.md](AUTHORS.md) contains a list of everyone who has contributed to NLTK.
-
-
-### Redistributing
-
-- NLTK source code is distributed under the Apache 2.0 License.
-- NLTK documentation is distributed under the Creative Commons
-  Attribution-Noncommercial-No Derivative Works 3.0 United States license.
-- NLTK corpora are provided under the terms given in the README file for each
-  corpus; all are redistributable and available for non-commercial use.
-- NLTK may be freely redistributed, subject to the provisions of these licenses.
+# Natural Language Toolkit (NLTK)
+[![PyPI](https://img.shields.io/pypi/v/nltk.svg)](https://pypi.python.org/pypi/nltk)
+![CI](https://github.com/nltk/nltk/actions/workflows/ci.yaml/badge.svg?branch=develop)
+
+NLTK -- the Natural Language Toolkit -- is a suite of open source Python
+modules, data sets, and tutorials supporting research and development in Natural
+Language Processing. NLTK requires Python version 3.7, 3.8, 3.9, 3.10, 3.11 or 3.12.
+
+For documentation, please visit [nltk.org](https://www.nltk.org/).
+
+
+## Contributing
+
+Do you want to contribute to NLTK development? Great!
+Please read [CONTRIBUTING.md](CONTRIBUTING.md) for more details.
+
+See also [how to contribute to NLTK](https://www.nltk.org/contribute.html).
+
+
+## Donate
+
+Have you found the toolkit helpful?  Please support NLTK development by donating
+to the project via PayPal, using the link on the NLTK homepage.
+
+
+## Citing
+
+If you publish work that uses NLTK, please cite the NLTK book, as follows:
+
+    Bird, Steven, Edward Loper and Ewan Klein (2009).
+    Natural Language Processing with Python.  O'Reilly Media Inc.
+
+
+## Copyright
+
+Copyright (C) 2001-2023 NLTK Project
+
+For license information, see [LICENSE.txt](LICENSE.txt).
+
+[AUTHORS.md](AUTHORS.md) contains a list of everyone who has contributed to NLTK.
+
+
+### Redistributing
+
+- NLTK source code is distributed under the Apache 2.0 License.
+- NLTK documentation is distributed under the Creative Commons
+  Attribution-Noncommercial-No Derivative Works 3.0 United States license.
+- NLTK corpora are provided under the terms given in the README file for each
+  corpus; all are redistributable and available for non-commercial use.
+- NLTK may be freely redistributed, subject to the provisions of these licenses.
Index: nltk-3.8.1/setup.py
===================================================================
--- nltk-3.8.1.orig/setup.py
+++ nltk-3.8.1/setup.py
@@ -1,125 +1,126 @@
-#!/usr/bin/env python
-#
-# Setup script for the Natural Language Toolkit
-#
-# Copyright (C) 2001-2023 NLTK Project
-# Author: NLTK Team <nltk.team@gmail.com>
-# URL: <https://www.nltk.org/>
-# For license information, see LICENSE.TXT
-
-# Work around mbcs bug in distutils.
-# https://bugs.python.org/issue10945
-import codecs
-
-try:
-    codecs.lookup("mbcs")
-except LookupError:
-    ascii = codecs.lookup("ascii")
-    func = lambda name, enc=ascii: {True: enc}.get(name == "mbcs")
-    codecs.register(func)
-
-import os
-
-# Use the VERSION file to get NLTK version
-version_file = os.path.join(os.path.dirname(__file__), "nltk", "VERSION")
-with open(version_file) as fh:
-    nltk_version = fh.read().strip()
-
-# setuptools
-from setuptools import find_packages, setup
-
-# Specify groups of optional dependencies
-extras_require = {
-    "machine_learning": [
-        "numpy",
-        "python-crfsuite",
-        "scikit-learn",
-        "scipy",
-    ],
-    "plot": ["matplotlib"],
-    "tgrep": ["pyparsing"],
-    "twitter": ["twython"],
-    "corenlp": ["requests"],
-}
-
-# Add a group made up of all optional dependencies
-extras_require["all"] = {
-    package for group in extras_require.values() for package in group
-}
-
-# Adds CLI commands
-console_scripts = """
-[console_scripts]
-nltk=nltk.cli:cli
-"""
-
-_project_homepage = "https://www.nltk.org/"
-
-setup(
-    name="nltk",
-    description="Natural Language Toolkit",
-    version=nltk_version,
-    url=_project_homepage,
-    project_urls={
-        "Documentation": _project_homepage,
-        "Source Code": "https://github.com/nltk/nltk",
-        "Issue Tracker": "https://github.com/nltk/nltk/issues",
-    },
-    long_description="""\
-The Natural Language Toolkit (NLTK) is a Python package for
-natural language processing.  NLTK requires Python 3.7, 3.8, 3.9, 3.10 or 3.11.""",
-    license="Apache License, Version 2.0",
-    keywords=[
-        "NLP",
-        "CL",
-        "natural language processing",
-        "computational linguistics",
-        "parsing",
-        "tagging",
-        "tokenizing",
-        "syntax",
-        "linguistics",
-        "language",
-        "natural language",
-        "text analytics",
-    ],
-    maintainer="NLTK Team",
-    maintainer_email="nltk.team@gmail.com",
-    author="NLTK Team",
-    author_email="nltk.team@gmail.com",
-    classifiers=[
-        "Development Status :: 5 - Production/Stable",
-        "Intended Audience :: Developers",
-        "Intended Audience :: Education",
-        "Intended Audience :: Information Technology",
-        "Intended Audience :: Science/Research",
-        "License :: OSI Approved :: Apache Software License",
-        "Operating System :: OS Independent",
-        "Programming Language :: Python :: 3.7",
-        "Programming Language :: Python :: 3.8",
-        "Programming Language :: Python :: 3.9",
-        "Programming Language :: Python :: 3.10",
-        "Programming Language :: Python :: 3.11",
-        "Topic :: Scientific/Engineering",
-        "Topic :: Scientific/Engineering :: Artificial Intelligence",
-        "Topic :: Scientific/Engineering :: Human Machine Interfaces",
-        "Topic :: Scientific/Engineering :: Information Analysis",
-        "Topic :: Text Processing",
-        "Topic :: Text Processing :: Filters",
-        "Topic :: Text Processing :: General",
-        "Topic :: Text Processing :: Indexing",
-        "Topic :: Text Processing :: Linguistic",
-    ],
-    package_data={"nltk": ["test/*.doctest", "VERSION"]},
-    python_requires=">=3.7",
-    install_requires=[
-        "click",
-        "joblib",
-        "regex>=2021.8.3",
-        "tqdm",
-    ],
-    extras_require=extras_require,
-    packages=find_packages(),
-    zip_safe=False,  # since normal files will be present too?
-    entry_points=console_scripts,
-)
+#!/usr/bin/env python
+#
+# Setup script for the Natural Language Toolkit
+#
+# Copyright (C) 2001-2023 NLTK Project
+# Author: NLTK Team <nltk.team@gmail.com>
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+# Work around mbcs bug in distutils.
+# https://bugs.python.org/issue10945
+import codecs
+
+try:
+    codecs.lookup("mbcs")
+except LookupError:
+    ascii = codecs.lookup("ascii")
+    func = lambda name, enc=ascii: {True: enc}.get(name == "mbcs")
+    codecs.register(func)
+
+import os
+
+# Use the VERSION file to get NLTK version
+version_file = os.path.join(os.path.dirname(__file__), "nltk", "VERSION")
+with open(version_file) as fh:
+    nltk_version = fh.read().strip()
+
+# setuptools
+from setuptools import find_packages, setup
+
+# Specify groups of optional dependencies
+extras_require = {
+    "machine_learning": [
+        "numpy",
+        "python-crfsuite",
+        "scikit-learn",
+        "scipy",
+    ],
+    "plot": ["matplotlib"],
+    "tgrep": ["pyparsing"],
+    "twitter": ["twython"],
+    "corenlp": ["requests"],
+}
+
+# Add a group made up of all optional dependencies
+extras_require["all"] = {
+    package for group in extras_require.values() for package in group
+}
+
+# Adds CLI commands
+console_scripts = """
+[console_scripts]
+nltk=nltk.cli:cli
+"""
+
+_project_homepage = "https://www.nltk.org/"
+
+setup(
+    name="nltk",
+    description="Natural Language Toolkit",
+    version=nltk_version,
+    url=_project_homepage,
+    project_urls={
+        "Documentation": _project_homepage,
+        "Source Code": "https://github.com/nltk/nltk",
+        "Issue Tracker": "https://github.com/nltk/nltk/issues",
+    },
+    long_description="""\
+The Natural Language Toolkit (NLTK) is a Python package for
+natural language processing.  NLTK requires Python 3.7, 3.8, 3.9, 3.10, 3.11 or 3.12.""",
+    license="Apache License, Version 2.0",
+    keywords=[
+        "NLP",
+        "CL",
+        "natural language processing",
+        "computational linguistics",
+        "parsing",
+        "tagging",
+        "tokenizing",
+        "syntax",
+        "linguistics",
+        "language",
+        "natural language",
+        "text analytics",
+    ],
+    maintainer="NLTK Team",
+    maintainer_email="nltk.team@gmail.com",
+    author="NLTK Team",
+    author_email="nltk.team@gmail.com",
+    classifiers=[
+        "Development Status :: 5 - Production/Stable",
+        "Intended Audience :: Developers",
+        "Intended Audience :: Education",
+        "Intended Audience :: Information Technology",
+        "Intended Audience :: Science/Research",
+        "License :: OSI Approved :: Apache Software License",
+        "Operating System :: OS Independent",
+        "Programming Language :: Python :: 3.7",
+        "Programming Language :: Python :: 3.8",
+        "Programming Language :: Python :: 3.9",
+        "Programming Language :: Python :: 3.10",
+        "Programming Language :: Python :: 3.11",
+        "Programming Language :: Python :: 3.12",
+        "Topic :: Scientific/Engineering",
+        "Topic :: Scientific/Engineering :: Artificial Intelligence",
+        "Topic :: Scientific/Engineering :: Human Machine Interfaces",
+        "Topic :: Scientific/Engineering :: Information Analysis",
+        "Topic :: Text Processing",
+        "Topic :: Text Processing :: Filters",
+        "Topic :: Text Processing :: General",
+        "Topic :: Text Processing :: Indexing",
+        "Topic :: Text Processing :: Linguistic",
+    ],
+    package_data={"nltk": ["test/*.doctest", "VERSION"]},
+    python_requires=">=3.7",
+    install_requires=[
+        "click",
+        "joblib",
+        "regex>=2021.8.3",
+        "tqdm",
+    ],
+    extras_require=extras_require,
+    packages=find_packages(),
+    zip_safe=False,  # since normal files will be present too?
+    entry_points=console_scripts,
+)