python-nltk/nltk-pr3207-py312.patch

1783 lines
73 KiB
Diff
Raw Normal View History

From 25d35fc4283dedd2053ec6d821f4b707fff8d72c Mon Sep 17 00:00:00 2001
From: Konstantin Chernyshev <k4black@ya.ru>
Date: Thu, 16 Nov 2023 19:00:15 +0100
Subject: [PATCH 1/8] ci: enable 3.12 in ci tests
---
.github/workflows/ci.yaml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
Index: nltk-3.8.1/nltk/test/unit/translate/test_bleu.py
===================================================================
--- nltk-3.8.1.orig/nltk/test/unit/translate/test_bleu.py
+++ nltk-3.8.1/nltk/test/unit/translate/test_bleu.py
@@ -2,7 +2,6 @@
Tests for BLEU translation evaluation metric
"""
-import io
import unittest
from nltk.data import find
Index: nltk-3.8.1/nltk/translate/bleu_score.py
===================================================================
--- nltk-3.8.1.orig/nltk/translate/bleu_score.py
+++ nltk-3.8.1/nltk/translate/bleu_score.py
@@ -1,685 +1,710 @@
-# Natural Language Toolkit: BLEU Score
-#
-# Copyright (C) 2001-2023 NLTK Project
-# Authors: Chin Yee Lee, Hengfeng Li, Ruxin Hou, Calvin Tanujaya Lim
-# Contributors: Björn Mattsson, Dmitrijs Milajevs, Liling Tan
-# URL: <https://www.nltk.org/>
-# For license information, see LICENSE.TXT
-
-"""BLEU score implementation."""
-
-import math
-import sys
-import warnings
-from collections import Counter
-from fractions import Fraction
-
-from nltk.util import ngrams
-
-
-def sentence_bleu(
- references,
- hypothesis,
- weights=(0.25, 0.25, 0.25, 0.25),
- smoothing_function=None,
- auto_reweigh=False,
-):
- """
- Calculate BLEU score (Bilingual Evaluation Understudy) from
- Papineni, Kishore, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002.
- "BLEU: a method for automatic evaluation of machine translation."
- In Proceedings of ACL. https://www.aclweb.org/anthology/P02-1040.pdf
-
- >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
- ... 'ensures', 'that', 'the', 'military', 'always',
- ... 'obeys', 'the', 'commands', 'of', 'the', 'party']
-
- >>> hypothesis2 = ['It', 'is', 'to', 'insure', 'the', 'troops',
- ... 'forever', 'hearing', 'the', 'activity', 'guidebook',
- ... 'that', 'party', 'direct']
-
- >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
- ... 'ensures', 'that', 'the', 'military', 'will', 'forever',
- ... 'heed', 'Party', 'commands']
-
- >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which',
- ... 'guarantees', 'the', 'military', 'forces', 'always',
- ... 'being', 'under', 'the', 'command', 'of', 'the',
- ... 'Party']
-
- >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
- ... 'army', 'always', 'to', 'heed', 'the', 'directions',
- ... 'of', 'the', 'party']
-
- >>> sentence_bleu([reference1, reference2, reference3], hypothesis1) # doctest: +ELLIPSIS
- 0.5045...
-
- If there is no ngrams overlap for any order of n-grams, BLEU returns the
- value 0. This is because the precision for the order of n-grams without
- overlap is 0, and the geometric mean in the final BLEU score computation
- multiplies the 0 with the precision of other n-grams. This results in 0
- (independently of the precision of the other n-gram orders). The following
- example has zero 3-gram and 4-gram overlaps:
-
- >>> round(sentence_bleu([reference1, reference2, reference3], hypothesis2),4) # doctest: +ELLIPSIS
- 0.0
-
- To avoid this harsh behaviour when no ngram overlaps are found a smoothing
- function can be used.
-
- >>> chencherry = SmoothingFunction()
- >>> sentence_bleu([reference1, reference2, reference3], hypothesis2,
- ... smoothing_function=chencherry.method1) # doctest: +ELLIPSIS
- 0.0370...
-
- The default BLEU calculates a score for up to 4-grams using uniform
- weights (this is called BLEU-4). To evaluate your translations with
- higher/lower order ngrams, use customized weights. E.g. when accounting
- for up to 5-grams with uniform weights (this is called BLEU-5) use:
-
- >>> weights = (1./5., 1./5., 1./5., 1./5., 1./5.)
- >>> sentence_bleu([reference1, reference2, reference3], hypothesis1, weights) # doctest: +ELLIPSIS
- 0.3920...
-
- Multiple BLEU scores can be computed at once, by supplying a list of weights.
- E.g. for computing BLEU-2, BLEU-3 *and* BLEU-4 in one computation, use:
- >>> weights = [
- ... (1./2., 1./2.),
- ... (1./3., 1./3., 1./3.),
- ... (1./4., 1./4., 1./4., 1./4.)
- ... ]
- >>> sentence_bleu([reference1, reference2, reference3], hypothesis1, weights) # doctest: +ELLIPSIS
- [0.7453..., 0.6240..., 0.5045...]
-
- :param references: reference sentences
- :type references: list(list(str))
- :param hypothesis: a hypothesis sentence
- :type hypothesis: list(str)
- :param weights: weights for unigrams, bigrams, trigrams and so on (one or a list of weights)
- :type weights: tuple(float) / list(tuple(float))
- :param smoothing_function:
- :type smoothing_function: SmoothingFunction
- :param auto_reweigh: Option to re-normalize the weights uniformly.
- :type auto_reweigh: bool
- :return: The sentence-level BLEU score. Returns a list if multiple weights were supplied.
- :rtype: float / list(float)
- """
- return corpus_bleu(
- [references], [hypothesis], weights, smoothing_function, auto_reweigh
- )
-
-
-def corpus_bleu(
- list_of_references,
- hypotheses,
- weights=(0.25, 0.25, 0.25, 0.25),
- smoothing_function=None,
- auto_reweigh=False,
-):
- """
- Calculate a single corpus-level BLEU score (aka. system-level BLEU) for all
- the hypotheses and their respective references.
-
- Instead of averaging the sentence level BLEU scores (i.e. macro-average
- precision), the original BLEU metric (Papineni et al. 2002) accounts for
- the micro-average precision (i.e. summing the numerators and denominators
- for each hypothesis-reference(s) pairs before the division).
-
- >>> hyp1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
- ... 'ensures', 'that', 'the', 'military', 'always',
- ... 'obeys', 'the', 'commands', 'of', 'the', 'party']
- >>> ref1a = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
- ... 'ensures', 'that', 'the', 'military', 'will', 'forever',
- ... 'heed', 'Party', 'commands']
- >>> ref1b = ['It', 'is', 'the', 'guiding', 'principle', 'which',
- ... 'guarantees', 'the', 'military', 'forces', 'always',
- ... 'being', 'under', 'the', 'command', 'of', 'the', 'Party']
- >>> ref1c = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
- ... 'army', 'always', 'to', 'heed', 'the', 'directions',
- ... 'of', 'the', 'party']
-
- >>> hyp2 = ['he', 'read', 'the', 'book', 'because', 'he', 'was',
- ... 'interested', 'in', 'world', 'history']
- >>> ref2a = ['he', 'was', 'interested', 'in', 'world', 'history',
- ... 'because', 'he', 'read', 'the', 'book']
-
- >>> list_of_references = [[ref1a, ref1b, ref1c], [ref2a]]
- >>> hypotheses = [hyp1, hyp2]
- >>> corpus_bleu(list_of_references, hypotheses) # doctest: +ELLIPSIS
- 0.5920...
-
- The example below show that corpus_bleu() is different from averaging
- sentence_bleu() for hypotheses
-
- >>> score1 = sentence_bleu([ref1a, ref1b, ref1c], hyp1)
- >>> score2 = sentence_bleu([ref2a], hyp2)
- >>> (score1 + score2) / 2 # doctest: +ELLIPSIS
- 0.6223...
-
- Custom weights may be supplied to fine-tune the BLEU score further.
- A tuple of float weights for unigrams, bigrams, trigrams and so on can be given.
- >>> weights = (0.1, 0.3, 0.5, 0.1)
- >>> corpus_bleu(list_of_references, hypotheses, weights=weights) # doctest: +ELLIPSIS
- 0.5818...
-
- This particular weight gave extra value to trigrams.
- Furthermore, multiple weights can be given, resulting in multiple BLEU scores.
- >>> weights = [
- ... (0.5, 0.5),
- ... (0.333, 0.333, 0.334),
- ... (0.25, 0.25, 0.25, 0.25),
- ... (0.2, 0.2, 0.2, 0.2, 0.2)
- ... ]
- >>> corpus_bleu(list_of_references, hypotheses, weights=weights) # doctest: +ELLIPSIS
- [0.8242..., 0.7067..., 0.5920..., 0.4719...]
-
- :param list_of_references: a corpus of lists of reference sentences, w.r.t. hypotheses
- :type list_of_references: list(list(list(str)))
- :param hypotheses: a list of hypothesis sentences
- :type hypotheses: list(list(str))
- :param weights: weights for unigrams, bigrams, trigrams and so on (one or a list of weights)
- :type weights: tuple(float) / list(tuple(float))
- :param smoothing_function:
- :type smoothing_function: SmoothingFunction
- :param auto_reweigh: Option to re-normalize the weights uniformly.
- :type auto_reweigh: bool
- :return: The corpus-level BLEU score.
- :rtype: float
- """
- # Before proceeding to compute BLEU, perform sanity checks.
-
- p_numerators = Counter() # Key = ngram order, and value = no. of ngram matches.
- p_denominators = Counter() # Key = ngram order, and value = no. of ngram in ref.
- hyp_lengths, ref_lengths = 0, 0
-
- assert len(list_of_references) == len(hypotheses), (
- "The number of hypotheses and their reference(s) should be the " "same "
- )
-
- try:
- weights[0][0]
- except TypeError:
- weights = [weights]
- max_weight_length = max(len(weight) for weight in weights)
-
- # Iterate through each hypothesis and their corresponding references.
- for references, hypothesis in zip(list_of_references, hypotheses):
- # For each order of ngram, calculate the numerator and
- # denominator for the corpus-level modified precision.
- for i in range(1, max_weight_length + 1):
- p_i = modified_precision(references, hypothesis, i)
- p_numerators[i] += p_i.numerator
- p_denominators[i] += p_i.denominator
-
- # Calculate the hypothesis length and the closest reference length.
- # Adds them to the corpus-level hypothesis and reference counts.
- hyp_len = len(hypothesis)
- hyp_lengths += hyp_len
- ref_lengths += closest_ref_length(references, hyp_len)
-
- # Calculate corpus-level brevity penalty.
- bp = brevity_penalty(ref_lengths, hyp_lengths)
-
- # Collects the various precision values for the different ngram orders.
- p_n = [
- Fraction(p_numerators[i], p_denominators[i], _normalize=False)
- for i in range(1, max_weight_length + 1)
- ]
-
- # Returns 0 if there's no matching n-grams
- # We only need to check for p_numerators[1] == 0, since if there's
- # no unigrams, there won't be any higher order ngrams.
- if p_numerators[1] == 0:
- return 0 if len(weights) == 1 else [0] * len(weights)
-
- # If there's no smoothing, set use method0 from SmoothinFunction class.
- if not smoothing_function:
- smoothing_function = SmoothingFunction().method0
- # Smoothen the modified precision.
- # Note: smoothing_function() may convert values into floats;
- # it tries to retain the Fraction object as much as the
- # smoothing method allows.
- p_n = smoothing_function(
- p_n, references=references, hypothesis=hypothesis, hyp_len=hyp_lengths
- )
-
- bleu_scores = []
- for weight in weights:
- # Uniformly re-weighting based on maximum hypothesis lengths if largest
- # order of n-grams < 4 and weights is set at default.
- if auto_reweigh:
- if hyp_lengths < 4 and weight == (0.25, 0.25, 0.25, 0.25):
- weight = (1 / hyp_lengths,) * hyp_lengths
-
- s = (w_i * math.log(p_i) for w_i, p_i in zip(weight, p_n) if p_i > 0)
- s = bp * math.exp(math.fsum(s))
- bleu_scores.append(s)
- return bleu_scores[0] if len(weights) == 1 else bleu_scores
-
-
-def modified_precision(references, hypothesis, n):
- """
- Calculate modified ngram precision.
-
- The normal precision method may lead to some wrong translations with
- high-precision, e.g., the translation, in which a word of reference
- repeats several times, has very high precision.
-
- This function only returns the Fraction object that contains the numerator
- and denominator necessary to calculate the corpus-level precision.
- To calculate the modified precision for a single pair of hypothesis and
- references, cast the Fraction object into a float.
-
- The famous "the the the ... " example shows that you can get BLEU precision
- by duplicating high frequency words.
-
- >>> reference1 = 'the cat is on the mat'.split()
- >>> reference2 = 'there is a cat on the mat'.split()
- >>> hypothesis1 = 'the the the the the the the'.split()
- >>> references = [reference1, reference2]
- >>> float(modified_precision(references, hypothesis1, n=1)) # doctest: +ELLIPSIS
- 0.2857...
-
- In the modified n-gram precision, a reference word will be considered
- exhausted after a matching hypothesis word is identified, e.g.
-
- >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
- ... 'ensures', 'that', 'the', 'military', 'will',
- ... 'forever', 'heed', 'Party', 'commands']
- >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which',
- ... 'guarantees', 'the', 'military', 'forces', 'always',
- ... 'being', 'under', 'the', 'command', 'of', 'the',
- ... 'Party']
- >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
- ... 'army', 'always', 'to', 'heed', 'the', 'directions',
- ... 'of', 'the', 'party']
- >>> hypothesis = 'of the'.split()
- >>> references = [reference1, reference2, reference3]
- >>> float(modified_precision(references, hypothesis, n=1))
- 1.0
- >>> float(modified_precision(references, hypothesis, n=2))
- 1.0
-
- An example of a normal machine translation hypothesis:
-
- >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
- ... 'ensures', 'that', 'the', 'military', 'always',
- ... 'obeys', 'the', 'commands', 'of', 'the', 'party']
-
- >>> hypothesis2 = ['It', 'is', 'to', 'insure', 'the', 'troops',
- ... 'forever', 'hearing', 'the', 'activity', 'guidebook',
- ... 'that', 'party', 'direct']
-
- >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
- ... 'ensures', 'that', 'the', 'military', 'will',
- ... 'forever', 'heed', 'Party', 'commands']
-
- >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which',
- ... 'guarantees', 'the', 'military', 'forces', 'always',
- ... 'being', 'under', 'the', 'command', 'of', 'the',
- ... 'Party']
-
- >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
- ... 'army', 'always', 'to', 'heed', 'the', 'directions',
- ... 'of', 'the', 'party']
- >>> references = [reference1, reference2, reference3]
- >>> float(modified_precision(references, hypothesis1, n=1)) # doctest: +ELLIPSIS
- 0.9444...
- >>> float(modified_precision(references, hypothesis2, n=1)) # doctest: +ELLIPSIS
- 0.5714...
- >>> float(modified_precision(references, hypothesis1, n=2)) # doctest: +ELLIPSIS
- 0.5882352941176471
- >>> float(modified_precision(references, hypothesis2, n=2)) # doctest: +ELLIPSIS
- 0.07692...
-
-
- :param references: A list of reference translations.
- :type references: list(list(str))
- :param hypothesis: A hypothesis translation.
- :type hypothesis: list(str)
- :param n: The ngram order.
- :type n: int
- :return: BLEU's modified precision for the nth order ngram.
- :rtype: Fraction
- """
- # Extracts all ngrams in hypothesis
- # Set an empty Counter if hypothesis is empty.
- counts = Counter(ngrams(hypothesis, n)) if len(hypothesis) >= n else Counter()
- # Extract a union of references' counts.
- # max_counts = reduce(or_, [Counter(ngrams(ref, n)) for ref in references])
- max_counts = {}
- for reference in references:
- reference_counts = (
- Counter(ngrams(reference, n)) if len(reference) >= n else Counter()
- )
- for ngram in counts:
- max_counts[ngram] = max(max_counts.get(ngram, 0), reference_counts[ngram])
-
- # Assigns the intersection between hypothesis and references' counts.
- clipped_counts = {
- ngram: min(count, max_counts[ngram]) for ngram, count in counts.items()
- }
-
- numerator = sum(clipped_counts.values())
- # Ensures that denominator is minimum 1 to avoid ZeroDivisionError.
- # Usually this happens when the ngram order is > len(reference).
- denominator = max(1, sum(counts.values()))
-
- return Fraction(numerator, denominator, _normalize=False)
-
-
-def closest_ref_length(references, hyp_len):
- """
- This function finds the reference that is the closest length to the
- hypothesis. The closest reference length is referred to as *r* variable
- from the brevity penalty formula in Papineni et. al. (2002)
-
- :param references: A list of reference translations.
- :type references: list(list(str))
- :param hyp_len: The length of the hypothesis.
- :type hyp_len: int
- :return: The length of the reference that's closest to the hypothesis.
- :rtype: int
- """
- ref_lens = (len(reference) for reference in references)
- closest_ref_len = min(
- ref_lens, key=lambda ref_len: (abs(ref_len - hyp_len), ref_len)
- )
- return closest_ref_len
-
-
-def brevity_penalty(closest_ref_len, hyp_len):
- """
- Calculate brevity penalty.
-
- As the modified n-gram precision still has the problem from the short
- length sentence, brevity penalty is used to modify the overall BLEU
- score according to length.
-
- An example from the paper. There are three references with length 12, 15
- and 17. And a concise hypothesis of the length 12. The brevity penalty is 1.
-
- >>> reference1 = list('aaaaaaaaaaaa') # i.e. ['a'] * 12
- >>> reference2 = list('aaaaaaaaaaaaaaa') # i.e. ['a'] * 15
- >>> reference3 = list('aaaaaaaaaaaaaaaaa') # i.e. ['a'] * 17
- >>> hypothesis = list('aaaaaaaaaaaa') # i.e. ['a'] * 12
- >>> references = [reference1, reference2, reference3]
- >>> hyp_len = len(hypothesis)
- >>> closest_ref_len = closest_ref_length(references, hyp_len)
- >>> brevity_penalty(closest_ref_len, hyp_len)
- 1.0
-
- In case a hypothesis translation is shorter than the references, penalty is
- applied.
-
- >>> references = [['a'] * 28, ['a'] * 28]
- >>> hypothesis = ['a'] * 12
- >>> hyp_len = len(hypothesis)
- >>> closest_ref_len = closest_ref_length(references, hyp_len)
- >>> brevity_penalty(closest_ref_len, hyp_len)
- 0.2635971381157267
-
- The length of the closest reference is used to compute the penalty. If the
- length of a hypothesis is 12, and the reference lengths are 13 and 2, the
- penalty is applied because the hypothesis length (12) is less then the
- closest reference length (13).
-
- >>> references = [['a'] * 13, ['a'] * 2]
- >>> hypothesis = ['a'] * 12
- >>> hyp_len = len(hypothesis)
- >>> closest_ref_len = closest_ref_length(references, hyp_len)
- >>> brevity_penalty(closest_ref_len, hyp_len) # doctest: +ELLIPSIS
- 0.9200...
-
- The brevity penalty doesn't depend on reference order. More importantly,
- when two reference sentences are at the same distance, the shortest
- reference sentence length is used.
-
- >>> references = [['a'] * 13, ['a'] * 11]
- >>> hypothesis = ['a'] * 12
- >>> hyp_len = len(hypothesis)
- >>> closest_ref_len = closest_ref_length(references, hyp_len)
- >>> bp1 = brevity_penalty(closest_ref_len, hyp_len)
- >>> hyp_len = len(hypothesis)
- >>> closest_ref_len = closest_ref_length(reversed(references), hyp_len)
- >>> bp2 = brevity_penalty(closest_ref_len, hyp_len)
- >>> bp1 == bp2 == 1
- True
-
- A test example from mteval-v13a.pl (starting from the line 705):
-
- >>> references = [['a'] * 11, ['a'] * 8]
- >>> hypothesis = ['a'] * 7
- >>> hyp_len = len(hypothesis)
- >>> closest_ref_len = closest_ref_length(references, hyp_len)
- >>> brevity_penalty(closest_ref_len, hyp_len) # doctest: +ELLIPSIS
- 0.8668...
-
- >>> references = [['a'] * 11, ['a'] * 8, ['a'] * 6, ['a'] * 7]
- >>> hypothesis = ['a'] * 7
- >>> hyp_len = len(hypothesis)
- >>> closest_ref_len = closest_ref_length(references, hyp_len)
- >>> brevity_penalty(closest_ref_len, hyp_len)
- 1.0
-
- :param hyp_len: The length of the hypothesis for a single sentence OR the
- sum of all the hypotheses' lengths for a corpus
- :type hyp_len: int
- :param closest_ref_len: The length of the closest reference for a single
- hypothesis OR the sum of all the closest references for every hypotheses.
- :type closest_ref_len: int
- :return: BLEU's brevity penalty.
- :rtype: float
- """
- if hyp_len > closest_ref_len:
- return 1
- # If hypothesis is empty, brevity penalty = 0 should result in BLEU = 0.0
- elif hyp_len == 0:
- return 0
- else:
- return math.exp(1 - closest_ref_len / hyp_len)
-
-
-class SmoothingFunction:
- """
- This is an implementation of the smoothing techniques
- for segment-level BLEU scores that was presented in
- Boxing Chen and Collin Cherry (2014) A Systematic Comparison of
- Smoothing Techniques for Sentence-Level BLEU. In WMT14.
- http://acl2014.org/acl2014/W14-33/pdf/W14-3346.pdf
- """
-
- def __init__(self, epsilon=0.1, alpha=5, k=5):
- """
- This will initialize the parameters required for the various smoothing
- techniques, the default values are set to the numbers used in the
- experiments from Chen and Cherry (2014).
-
- >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which', 'ensures',
- ... 'that', 'the', 'military', 'always', 'obeys', 'the',
- ... 'commands', 'of', 'the', 'party']
- >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that', 'ensures',
- ... 'that', 'the', 'military', 'will', 'forever', 'heed',
- ... 'Party', 'commands']
-
- >>> chencherry = SmoothingFunction()
- >>> print(sentence_bleu([reference1], hypothesis1)) # doctest: +ELLIPSIS
- 0.4118...
- >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method0)) # doctest: +ELLIPSIS
- 0.4118...
- >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method1)) # doctest: +ELLIPSIS
- 0.4118...
- >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method2)) # doctest: +ELLIPSIS
- 0.4452...
- >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method3)) # doctest: +ELLIPSIS
- 0.4118...
- >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method4)) # doctest: +ELLIPSIS
- 0.4118...
- >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method5)) # doctest: +ELLIPSIS
- 0.4905...
- >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method6)) # doctest: +ELLIPSIS
- 0.4135...
- >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method7)) # doctest: +ELLIPSIS
- 0.4905...
-
- :param epsilon: the epsilon value use in method 1
- :type epsilon: float
- :param alpha: the alpha value use in method 6
- :type alpha: int
- :param k: the k value use in method 4
- :type k: int
- """
- self.epsilon = epsilon
- self.alpha = alpha
- self.k = k
-
- def method0(self, p_n, *args, **kwargs):
- """
- No smoothing.
- """
- p_n_new = []
- for i, p_i in enumerate(p_n):
- if p_i.numerator != 0:
- p_n_new.append(p_i)
- else:
- _msg = str(
- "\nThe hypothesis contains 0 counts of {}-gram overlaps.\n"
- "Therefore the BLEU score evaluates to 0, independently of\n"
- "how many N-gram overlaps of lower order it contains.\n"
- "Consider using lower n-gram order or use "
- "SmoothingFunction()"
- ).format(i + 1)
- warnings.warn(_msg)
- # When numerator==0 where denonminator==0 or !=0, the result
- # for the precision score should be equal to 0 or undefined.
- # Due to BLEU geometric mean computation in logarithm space,
- # we we need to take the return sys.float_info.min such that
- # math.log(sys.float_info.min) returns a 0 precision score.
- p_n_new.append(sys.float_info.min)
- return p_n_new
-
- def method1(self, p_n, *args, **kwargs):
- """
- Smoothing method 1: Add *epsilon* counts to precision with 0 counts.
- """
- return [
- (p_i.numerator + self.epsilon) / p_i.denominator
- if p_i.numerator == 0
- else p_i
- for p_i in p_n
- ]
-
- def method2(self, p_n, *args, **kwargs):
- """
- Smoothing method 2: Add 1 to both numerator and denominator from
- Chin-Yew Lin and Franz Josef Och (2004) ORANGE: a Method for
- Evaluating Automatic Evaluation Metrics for Machine Translation.
- In COLING 2004.
- """
- return [
- Fraction(p_n[i].numerator + 1, p_n[i].denominator + 1, _normalize=False)
- if i != 0
- else p_n[0]
- for i in range(len(p_n))
- ]
-
- def method3(self, p_n, *args, **kwargs):
- """
- Smoothing method 3: NIST geometric sequence smoothing
- The smoothing is computed by taking 1 / ( 2^k ), instead of 0, for each
- precision score whose matching n-gram count is null.
- k is 1 for the first 'n' value for which the n-gram match count is null/
-
- For example, if the text contains:
-
- - one 2-gram match
- - and (consequently) two 1-gram matches
-
- the n-gram count for each individual precision score would be:
-
- - n=1 => prec_count = 2 (two unigrams)
- - n=2 => prec_count = 1 (one bigram)
- - n=3 => prec_count = 1/2 (no trigram, taking 'smoothed' value of 1 / ( 2^k ), with k=1)
- - n=4 => prec_count = 1/4 (no fourgram, taking 'smoothed' value of 1 / ( 2^k ), with k=2)
- """
- incvnt = 1 # From the mteval-v13a.pl, it's referred to as k.
- for i, p_i in enumerate(p_n):
- if p_i.numerator == 0:
- p_n[i] = 1 / (2**incvnt * p_i.denominator)
- incvnt += 1
- return p_n
-
- def method4(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
- """
- Smoothing method 4:
- Shorter translations may have inflated precision values due to having
- smaller denominators; therefore, we give them proportionally
- smaller smoothed counts. Instead of scaling to 1/(2^k), Chen and Cherry
- suggests dividing by 1/ln(len(T)), where T is the length of the translation.
- """
- incvnt = 1
- hyp_len = hyp_len if hyp_len else len(hypothesis)
- for i, p_i in enumerate(p_n):
- if p_i.numerator == 0 and hyp_len > 1:
- # incvnt = i + 1 * self.k / math.log(
- # hyp_len
- # ) # Note that this K is different from the K from NIST.
- # p_n[i] = incvnt / p_i.denominator\
- numerator = 1 / (2**incvnt * self.k / math.log(hyp_len))
- p_n[i] = numerator / p_i.denominator
- incvnt += 1
- return p_n
-
- def method5(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
- """
- Smoothing method 5:
- The matched counts for similar values of n should be similar. To a
- calculate the n-gram matched count, it averages the n1, n and n+1 gram
- matched counts.
- """
- hyp_len = hyp_len if hyp_len else len(hypothesis)
- m = {}
- # Requires an precision value for an addition ngram order.
- p_n_plus1 = p_n + [modified_precision(references, hypothesis, 5)]
- m[-1] = p_n[0] + 1
- for i, p_i in enumerate(p_n):
- p_n[i] = (m[i - 1] + p_i + p_n_plus1[i + 1]) / 3
- m[i] = p_n[i]
- return p_n
-
- def method6(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
- """
- Smoothing method 6:
- Interpolates the maximum likelihood estimate of the precision *p_n* with
- a prior estimate *pi0*. The prior is estimated by assuming that the ratio
- between pn and pn1 will be the same as that between pn1 and pn2; from
- Gao and He (2013) Training MRF-Based Phrase Translation Models using
- Gradient Ascent. In NAACL.
- """
- hyp_len = hyp_len if hyp_len else len(hypothesis)
- # This smoothing only works when p_1 and p_2 is non-zero.
- # Raise an error with an appropriate message when the input is too short
- # to use this smoothing technique.
- assert p_n[2], "This smoothing method requires non-zero precision for bigrams."
- for i, p_i in enumerate(p_n):
- if i in [0, 1]: # Skips the first 2 orders of ngrams.
- continue
- else:
- pi0 = 0 if p_n[i - 2] == 0 else p_n[i - 1] ** 2 / p_n[i - 2]
- # No. of ngrams in translation that matches the reference.
- m = p_i.numerator
- # No. of ngrams in translation.
- l = sum(1 for _ in ngrams(hypothesis, i + 1))
- # Calculates the interpolated precision.
- p_n[i] = (m + self.alpha * pi0) / (l + self.alpha)
- return p_n
-
- def method7(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
- """
- Smoothing method 7:
- Interpolates methods 4 and 5.
- """
- hyp_len = hyp_len if hyp_len else len(hypothesis)
- p_n = self.method4(p_n, references, hypothesis, hyp_len)
- p_n = self.method5(p_n, references, hypothesis, hyp_len)
- return p_n
+# Natural Language Toolkit: BLEU Score
+#
+# Copyright (C) 2001-2023 NLTK Project
+# Authors: Chin Yee Lee, Hengfeng Li, Ruxin Hou, Calvin Tanujaya Lim
+# Contributors: Björn Mattsson, Dmitrijs Milajevs, Liling Tan
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""BLEU score implementation."""
+import math
+import sys
+import warnings
+from collections import Counter
+from fractions import Fraction as _Fraction
+
+from nltk.util import ngrams
+
+
+class Fraction(_Fraction):
+ """Fraction with _normalize=False support for 3.12"""
+
+ def __new__(cls, numerator=0, denominator=None, _normalize=False):
+ if sys.version_info >= (3, 12):
+ self = super().__new__(cls, numerator, denominator)
+ else:
+ self = super().__new__(cls, numerator, denominator, _normalize=_normalize)
+ self._normalize = _normalize
+ self._original_numerator = numerator
+ self._original_denominator = denominator
+ return self
+
+ @property
+ def numerator(self):
+ if not self._normalize:
+ return self._original_numerator
+ return super().numerator
+
+ @property
+ def denominator(self):
+ if not self._normalize:
+ return self._original_denominator
+ return super().denominator
+
+
+def sentence_bleu(
+ references,
+ hypothesis,
+ weights=(0.25, 0.25, 0.25, 0.25),
+ smoothing_function=None,
+ auto_reweigh=False,
+):
+ """
+ Calculate BLEU score (Bilingual Evaluation Understudy) from
+ Papineni, Kishore, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002.
+ "BLEU: a method for automatic evaluation of machine translation."
+ In Proceedings of ACL. https://www.aclweb.org/anthology/P02-1040.pdf
+
+ >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
+ ... 'ensures', 'that', 'the', 'military', 'always',
+ ... 'obeys', 'the', 'commands', 'of', 'the', 'party']
+
+ >>> hypothesis2 = ['It', 'is', 'to', 'insure', 'the', 'troops',
+ ... 'forever', 'hearing', 'the', 'activity', 'guidebook',
+ ... 'that', 'party', 'direct']
+
+ >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
+ ... 'ensures', 'that', 'the', 'military', 'will', 'forever',
+ ... 'heed', 'Party', 'commands']
+
+ >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which',
+ ... 'guarantees', 'the', 'military', 'forces', 'always',
+ ... 'being', 'under', 'the', 'command', 'of', 'the',
+ ... 'Party']
+
+ >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
+ ... 'army', 'always', 'to', 'heed', 'the', 'directions',
+ ... 'of', 'the', 'party']
+
+ >>> sentence_bleu([reference1, reference2, reference3], hypothesis1) # doctest: +ELLIPSIS
+ 0.5045...
+
+ If there is no ngrams overlap for any order of n-grams, BLEU returns the
+ value 0. This is because the precision for the order of n-grams without
+ overlap is 0, and the geometric mean in the final BLEU score computation
+ multiplies the 0 with the precision of other n-grams. This results in 0
+ (independently of the precision of the other n-gram orders). The following
+ example has zero 3-gram and 4-gram overlaps:
+
+ >>> round(sentence_bleu([reference1, reference2, reference3], hypothesis2),4) # doctest: +ELLIPSIS
+ 0.0
+
+ To avoid this harsh behaviour when no ngram overlaps are found a smoothing
+ function can be used.
+
+ >>> chencherry = SmoothingFunction()
+ >>> sentence_bleu([reference1, reference2, reference3], hypothesis2,
+ ... smoothing_function=chencherry.method1) # doctest: +ELLIPSIS
+ 0.0370...
+
+ The default BLEU calculates a score for up to 4-grams using uniform
+ weights (this is called BLEU-4). To evaluate your translations with
+ higher/lower order ngrams, use customized weights. E.g. when accounting
+ for up to 5-grams with uniform weights (this is called BLEU-5) use:
+
+ >>> weights = (1./5., 1./5., 1./5., 1./5., 1./5.)
+ >>> sentence_bleu([reference1, reference2, reference3], hypothesis1, weights) # doctest: +ELLIPSIS
+ 0.3920...
+
+ Multiple BLEU scores can be computed at once, by supplying a list of weights.
+ E.g. for computing BLEU-2, BLEU-3 *and* BLEU-4 in one computation, use:
+ >>> weights = [
+ ... (1./2., 1./2.),
+ ... (1./3., 1./3., 1./3.),
+ ... (1./4., 1./4., 1./4., 1./4.)
+ ... ]
+ >>> sentence_bleu([reference1, reference2, reference3], hypothesis1, weights) # doctest: +ELLIPSIS
+ [0.7453..., 0.6240..., 0.5045...]
+
+ :param references: reference sentences
+ :type references: list(list(str))
+ :param hypothesis: a hypothesis sentence
+ :type hypothesis: list(str)
+ :param weights: weights for unigrams, bigrams, trigrams and so on (one or a list of weights)
+ :type weights: tuple(float) / list(tuple(float))
+ :param smoothing_function:
+ :type smoothing_function: SmoothingFunction
+ :param auto_reweigh: Option to re-normalize the weights uniformly.
+ :type auto_reweigh: bool
+ :return: The sentence-level BLEU score. Returns a list if multiple weights were supplied.
+ :rtype: float / list(float)
+ """
+ return corpus_bleu(
+ [references], [hypothesis], weights, smoothing_function, auto_reweigh
+ )
+
+
+def corpus_bleu(
+ list_of_references,
+ hypotheses,
+ weights=(0.25, 0.25, 0.25, 0.25),
+ smoothing_function=None,
+ auto_reweigh=False,
+):
+ """
+ Calculate a single corpus-level BLEU score (aka. system-level BLEU) for all
+ the hypotheses and their respective references.
+
+ Instead of averaging the sentence level BLEU scores (i.e. macro-average
+ precision), the original BLEU metric (Papineni et al. 2002) accounts for
+ the micro-average precision (i.e. summing the numerators and denominators
+ for each hypothesis-reference(s) pairs before the division).
+
+ >>> hyp1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
+ ... 'ensures', 'that', 'the', 'military', 'always',
+ ... 'obeys', 'the', 'commands', 'of', 'the', 'party']
+ >>> ref1a = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
+ ... 'ensures', 'that', 'the', 'military', 'will', 'forever',
+ ... 'heed', 'Party', 'commands']
+ >>> ref1b = ['It', 'is', 'the', 'guiding', 'principle', 'which',
+ ... 'guarantees', 'the', 'military', 'forces', 'always',
+ ... 'being', 'under', 'the', 'command', 'of', 'the', 'Party']
+ >>> ref1c = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
+ ... 'army', 'always', 'to', 'heed', 'the', 'directions',
+ ... 'of', 'the', 'party']
+
+ >>> hyp2 = ['he', 'read', 'the', 'book', 'because', 'he', 'was',
+ ... 'interested', 'in', 'world', 'history']
+ >>> ref2a = ['he', 'was', 'interested', 'in', 'world', 'history',
+ ... 'because', 'he', 'read', 'the', 'book']
+
+ >>> list_of_references = [[ref1a, ref1b, ref1c], [ref2a]]
+ >>> hypotheses = [hyp1, hyp2]
+ >>> corpus_bleu(list_of_references, hypotheses) # doctest: +ELLIPSIS
+ 0.5920...
+
+ The example below show that corpus_bleu() is different from averaging
+ sentence_bleu() for hypotheses
+
+ >>> score1 = sentence_bleu([ref1a, ref1b, ref1c], hyp1)
+ >>> score2 = sentence_bleu([ref2a], hyp2)
+ >>> (score1 + score2) / 2 # doctest: +ELLIPSIS
+ 0.6223...
+
+ Custom weights may be supplied to fine-tune the BLEU score further.
+ A tuple of float weights for unigrams, bigrams, trigrams and so on can be given.
+ >>> weights = (0.1, 0.3, 0.5, 0.1)
+ >>> corpus_bleu(list_of_references, hypotheses, weights=weights) # doctest: +ELLIPSIS
+ 0.5818...
+
+ This particular weight gave extra value to trigrams.
+ Furthermore, multiple weights can be given, resulting in multiple BLEU scores.
+ >>> weights = [
+ ... (0.5, 0.5),
+ ... (0.333, 0.333, 0.334),
+ ... (0.25, 0.25, 0.25, 0.25),
+ ... (0.2, 0.2, 0.2, 0.2, 0.2)
+ ... ]
+ >>> corpus_bleu(list_of_references, hypotheses, weights=weights) # doctest: +ELLIPSIS
+ [0.8242..., 0.7067..., 0.5920..., 0.4719...]
+
+ :param list_of_references: a corpus of lists of reference sentences, w.r.t. hypotheses
+ :type list_of_references: list(list(list(str)))
+ :param hypotheses: a list of hypothesis sentences
+ :type hypotheses: list(list(str))
+ :param weights: weights for unigrams, bigrams, trigrams and so on (one or a list of weights)
+ :type weights: tuple(float) / list(tuple(float))
+ :param smoothing_function:
+ :type smoothing_function: SmoothingFunction
+ :param auto_reweigh: Option to re-normalize the weights uniformly.
+ :type auto_reweigh: bool
+ :return: The corpus-level BLEU score.
+ :rtype: float
+ """
+ # Before proceeding to compute BLEU, perform sanity checks.
+
+ p_numerators = Counter() # Key = ngram order, and value = no. of ngram matches.
+ p_denominators = Counter() # Key = ngram order, and value = no. of ngram in ref.
+ hyp_lengths, ref_lengths = 0, 0
+
+ assert len(list_of_references) == len(hypotheses), (
+ "The number of hypotheses and their reference(s) should be the " "same "
+ )
+
+ try:
+ weights[0][0]
+ except TypeError:
+ weights = [weights]
+ max_weight_length = max(len(weight) for weight in weights)
+
+ # Iterate through each hypothesis and their corresponding references.
+ for references, hypothesis in zip(list_of_references, hypotheses):
+ # For each order of ngram, calculate the numerator and
+ # denominator for the corpus-level modified precision.
+ for i in range(1, max_weight_length + 1):
+ p_i = modified_precision(references, hypothesis, i)
+ p_numerators[i] += p_i.numerator
+ p_denominators[i] += p_i.denominator
+
+ # Calculate the hypothesis length and the closest reference length.
+ # Adds them to the corpus-level hypothesis and reference counts.
+ hyp_len = len(hypothesis)
+ hyp_lengths += hyp_len
+ ref_lengths += closest_ref_length(references, hyp_len)
+
+ # Calculate corpus-level brevity penalty.
+ bp = brevity_penalty(ref_lengths, hyp_lengths)
+
+ # Collects the various precision values for the different ngram orders.
+ p_n = [
+ Fraction(p_numerators[i], p_denominators[i], _normalize=False)
+ for i in range(1, max_weight_length + 1)
+ ]
+
+ # Returns 0 if there's no matching n-grams
+ # We only need to check for p_numerators[1] == 0, since if there's
+ # no unigrams, there won't be any higher order ngrams.
+ if p_numerators[1] == 0:
+ return 0 if len(weights) == 1 else [0] * len(weights)
+
+ # If there's no smoothing, set use method0 from SmoothinFunction class.
+ if not smoothing_function:
+ smoothing_function = SmoothingFunction().method0
+ # Smoothen the modified precision.
+ # Note: smoothing_function() may convert values into floats;
+ # it tries to retain the Fraction object as much as the
+ # smoothing method allows.
+ p_n = smoothing_function(
+ p_n, references=references, hypothesis=hypothesis, hyp_len=hyp_lengths
+ )
+
+ bleu_scores = []
+ for weight in weights:
+ # Uniformly re-weighting based on maximum hypothesis lengths if largest
+ # order of n-grams < 4 and weights is set at default.
+ if auto_reweigh:
+ if hyp_lengths < 4 and weight == (0.25, 0.25, 0.25, 0.25):
+ weight = (1 / hyp_lengths,) * hyp_lengths
+
+ s = (w_i * math.log(p_i) for w_i, p_i in zip(weight, p_n) if p_i > 0)
+ s = bp * math.exp(math.fsum(s))
+ bleu_scores.append(s)
+ return bleu_scores[0] if len(weights) == 1 else bleu_scores
+
+
+def modified_precision(references, hypothesis, n):
+ """
+ Calculate modified ngram precision.
+
+ The normal precision method may lead to some wrong translations with
+ high-precision, e.g., the translation, in which a word of reference
+ repeats several times, has very high precision.
+
+ This function only returns the Fraction object that contains the numerator
+ and denominator necessary to calculate the corpus-level precision.
+ To calculate the modified precision for a single pair of hypothesis and
+ references, cast the Fraction object into a float.
+
+ The famous "the the the ... " example shows that you can get BLEU precision
+ by duplicating high frequency words.
+
+ >>> reference1 = 'the cat is on the mat'.split()
+ >>> reference2 = 'there is a cat on the mat'.split()
+ >>> hypothesis1 = 'the the the the the the the'.split()
+ >>> references = [reference1, reference2]
+ >>> float(modified_precision(references, hypothesis1, n=1)) # doctest: +ELLIPSIS
+ 0.2857...
+
+ In the modified n-gram precision, a reference word will be considered
+ exhausted after a matching hypothesis word is identified, e.g.
+
+ >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
+ ... 'ensures', 'that', 'the', 'military', 'will',
+ ... 'forever', 'heed', 'Party', 'commands']
+ >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which',
+ ... 'guarantees', 'the', 'military', 'forces', 'always',
+ ... 'being', 'under', 'the', 'command', 'of', 'the',
+ ... 'Party']
+ >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
+ ... 'army', 'always', 'to', 'heed', 'the', 'directions',
+ ... 'of', 'the', 'party']
+ >>> hypothesis = 'of the'.split()
+ >>> references = [reference1, reference2, reference3]
+ >>> float(modified_precision(references, hypothesis, n=1))
+ 1.0
+ >>> float(modified_precision(references, hypothesis, n=2))
+ 1.0
+
+ An example of a normal machine translation hypothesis:
+
+ >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
+ ... 'ensures', 'that', 'the', 'military', 'always',
+ ... 'obeys', 'the', 'commands', 'of', 'the', 'party']
+
+ >>> hypothesis2 = ['It', 'is', 'to', 'insure', 'the', 'troops',
+ ... 'forever', 'hearing', 'the', 'activity', 'guidebook',
+ ... 'that', 'party', 'direct']
+
+ >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
+ ... 'ensures', 'that', 'the', 'military', 'will',
+ ... 'forever', 'heed', 'Party', 'commands']
+
+ >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which',
+ ... 'guarantees', 'the', 'military', 'forces', 'always',
+ ... 'being', 'under', 'the', 'command', 'of', 'the',
+ ... 'Party']
+
+ >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
+ ... 'army', 'always', 'to', 'heed', 'the', 'directions',
+ ... 'of', 'the', 'party']
+ >>> references = [reference1, reference2, reference3]
+ >>> float(modified_precision(references, hypothesis1, n=1)) # doctest: +ELLIPSIS
+ 0.9444...
+ >>> float(modified_precision(references, hypothesis2, n=1)) # doctest: +ELLIPSIS
+ 0.5714...
+ >>> float(modified_precision(references, hypothesis1, n=2)) # doctest: +ELLIPSIS
+ 0.5882352941176471
+ >>> float(modified_precision(references, hypothesis2, n=2)) # doctest: +ELLIPSIS
+ 0.07692...
+
+
+ :param references: A list of reference translations.
+ :type references: list(list(str))
+ :param hypothesis: A hypothesis translation.
+ :type hypothesis: list(str)
+ :param n: The ngram order.
+ :type n: int
+ :return: BLEU's modified precision for the nth order ngram.
+ :rtype: Fraction
+ """
+ # Extracts all ngrams in hypothesis
+ # Set an empty Counter if hypothesis is empty.
+ counts = Counter(ngrams(hypothesis, n)) if len(hypothesis) >= n else Counter()
+ # Extract a union of references' counts.
+ # max_counts = reduce(or_, [Counter(ngrams(ref, n)) for ref in references])
+ max_counts = {}
+ for reference in references:
+ reference_counts = (
+ Counter(ngrams(reference, n)) if len(reference) >= n else Counter()
+ )
+ for ngram in counts:
+ max_counts[ngram] = max(max_counts.get(ngram, 0), reference_counts[ngram])
+
+ # Assigns the intersection between hypothesis and references' counts.
+ clipped_counts = {
+ ngram: min(count, max_counts[ngram]) for ngram, count in counts.items()
+ }
+
+ numerator = sum(clipped_counts.values())
+ # Ensures that denominator is minimum 1 to avoid ZeroDivisionError.
+ # Usually this happens when the ngram order is > len(reference).
+ denominator = max(1, sum(counts.values()))
+
+ return Fraction(numerator, denominator, _normalize=False)
+
+
+def closest_ref_length(references, hyp_len):
+ """
+ This function finds the reference that is the closest length to the
+ hypothesis. The closest reference length is referred to as *r* variable
+ from the brevity penalty formula in Papineni et. al. (2002)
+
+ :param references: A list of reference translations.
+ :type references: list(list(str))
+ :param hyp_len: The length of the hypothesis.
+ :type hyp_len: int
+ :return: The length of the reference that's closest to the hypothesis.
+ :rtype: int
+ """
+ ref_lens = (len(reference) for reference in references)
+ closest_ref_len = min(
+ ref_lens, key=lambda ref_len: (abs(ref_len - hyp_len), ref_len)
+ )
+ return closest_ref_len
+
+
+def brevity_penalty(closest_ref_len, hyp_len):
+ """
+ Calculate brevity penalty.
+
+ As the modified n-gram precision still has the problem from the short
+ length sentence, brevity penalty is used to modify the overall BLEU
+ score according to length.
+
+ An example from the paper. There are three references with length 12, 15
+ and 17. And a concise hypothesis of the length 12. The brevity penalty is 1.
+
+ >>> reference1 = list('aaaaaaaaaaaa') # i.e. ['a'] * 12
+ >>> reference2 = list('aaaaaaaaaaaaaaa') # i.e. ['a'] * 15
+ >>> reference3 = list('aaaaaaaaaaaaaaaaa') # i.e. ['a'] * 17
+ >>> hypothesis = list('aaaaaaaaaaaa') # i.e. ['a'] * 12
+ >>> references = [reference1, reference2, reference3]
+ >>> hyp_len = len(hypothesis)
+ >>> closest_ref_len = closest_ref_length(references, hyp_len)
+ >>> brevity_penalty(closest_ref_len, hyp_len)
+ 1.0
+
+ In case a hypothesis translation is shorter than the references, penalty is
+ applied.
+
+ >>> references = [['a'] * 28, ['a'] * 28]
+ >>> hypothesis = ['a'] * 12
+ >>> hyp_len = len(hypothesis)
+ >>> closest_ref_len = closest_ref_length(references, hyp_len)
+ >>> brevity_penalty(closest_ref_len, hyp_len)
+ 0.2635971381157267
+
+ The length of the closest reference is used to compute the penalty. If the
+ length of a hypothesis is 12, and the reference lengths are 13 and 2, the
+ penalty is applied because the hypothesis length (12) is less then the
+ closest reference length (13).
+
+ >>> references = [['a'] * 13, ['a'] * 2]
+ >>> hypothesis = ['a'] * 12
+ >>> hyp_len = len(hypothesis)
+ >>> closest_ref_len = closest_ref_length(references, hyp_len)
+ >>> brevity_penalty(closest_ref_len, hyp_len) # doctest: +ELLIPSIS
+ 0.9200...
+
+ The brevity penalty doesn't depend on reference order. More importantly,
+ when two reference sentences are at the same distance, the shortest
+ reference sentence length is used.
+
+ >>> references = [['a'] * 13, ['a'] * 11]
+ >>> hypothesis = ['a'] * 12
+ >>> hyp_len = len(hypothesis)
+ >>> closest_ref_len = closest_ref_length(references, hyp_len)
+ >>> bp1 = brevity_penalty(closest_ref_len, hyp_len)
+ >>> hyp_len = len(hypothesis)
+ >>> closest_ref_len = closest_ref_length(reversed(references), hyp_len)
+ >>> bp2 = brevity_penalty(closest_ref_len, hyp_len)
+ >>> bp1 == bp2 == 1
+ True
+
+ A test example from mteval-v13a.pl (starting from the line 705):
+
+ >>> references = [['a'] * 11, ['a'] * 8]
+ >>> hypothesis = ['a'] * 7
+ >>> hyp_len = len(hypothesis)
+ >>> closest_ref_len = closest_ref_length(references, hyp_len)
+ >>> brevity_penalty(closest_ref_len, hyp_len) # doctest: +ELLIPSIS
+ 0.8668...
+
+ >>> references = [['a'] * 11, ['a'] * 8, ['a'] * 6, ['a'] * 7]
+ >>> hypothesis = ['a'] * 7
+ >>> hyp_len = len(hypothesis)
+ >>> closest_ref_len = closest_ref_length(references, hyp_len)
+ >>> brevity_penalty(closest_ref_len, hyp_len)
+ 1.0
+
+ :param hyp_len: The length of the hypothesis for a single sentence OR the
+ sum of all the hypotheses' lengths for a corpus
+ :type hyp_len: int
+ :param closest_ref_len: The length of the closest reference for a single
+ hypothesis OR the sum of all the closest references for every hypotheses.
+ :type closest_ref_len: int
+ :return: BLEU's brevity penalty.
+ :rtype: float
+ """
+ if hyp_len > closest_ref_len:
+ return 1
+ # If hypothesis is empty, brevity penalty = 0 should result in BLEU = 0.0
+ elif hyp_len == 0:
+ return 0
+ else:
+ return math.exp(1 - closest_ref_len / hyp_len)
+
+
+class SmoothingFunction:
+ """
+ This is an implementation of the smoothing techniques
+ for segment-level BLEU scores that was presented in
+ Boxing Chen and Collin Cherry (2014) A Systematic Comparison of
+ Smoothing Techniques for Sentence-Level BLEU. In WMT14.
+ http://acl2014.org/acl2014/W14-33/pdf/W14-3346.pdf
+ """
+
+ def __init__(self, epsilon=0.1, alpha=5, k=5):
+ """
+ This will initialize the parameters required for the various smoothing
+ techniques, the default values are set to the numbers used in the
+ experiments from Chen and Cherry (2014).
+
+ >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which', 'ensures',
+ ... 'that', 'the', 'military', 'always', 'obeys', 'the',
+ ... 'commands', 'of', 'the', 'party']
+ >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that', 'ensures',
+ ... 'that', 'the', 'military', 'will', 'forever', 'heed',
+ ... 'Party', 'commands']
+
+ >>> chencherry = SmoothingFunction()
+ >>> print(sentence_bleu([reference1], hypothesis1)) # doctest: +ELLIPSIS
+ 0.4118...
+ >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method0)) # doctest: +ELLIPSIS
+ 0.4118...
+ >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method1)) # doctest: +ELLIPSIS
+ 0.4118...
+ >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method2)) # doctest: +ELLIPSIS
+ 0.4452...
+ >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method3)) # doctest: +ELLIPSIS
+ 0.4118...
+ >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method4)) # doctest: +ELLIPSIS
+ 0.4118...
+ >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method5)) # doctest: +ELLIPSIS
+ 0.4905...
+ >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method6)) # doctest: +ELLIPSIS
+ 0.4135...
+ >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method7)) # doctest: +ELLIPSIS
+ 0.4905...
+
+ :param epsilon: the epsilon value use in method 1
+ :type epsilon: float
+ :param alpha: the alpha value use in method 6
+ :type alpha: int
+ :param k: the k value use in method 4
+ :type k: int
+ """
+ self.epsilon = epsilon
+ self.alpha = alpha
+ self.k = k
+
+ def method0(self, p_n, *args, **kwargs):
+ """
+ No smoothing.
+ """
+ p_n_new = []
+ for i, p_i in enumerate(p_n):
+ if p_i.numerator != 0:
+ p_n_new.append(p_i)
+ else:
+ _msg = str(
+ "\nThe hypothesis contains 0 counts of {}-gram overlaps.\n"
+ "Therefore the BLEU score evaluates to 0, independently of\n"
+ "how many N-gram overlaps of lower order it contains.\n"
+ "Consider using lower n-gram order or use "
+ "SmoothingFunction()"
+ ).format(i + 1)
+ warnings.warn(_msg)
+ # When numerator==0 where denonminator==0 or !=0, the result
+ # for the precision score should be equal to 0 or undefined.
+ # Due to BLEU geometric mean computation in logarithm space,
+ # we we need to take the return sys.float_info.min such that
+ # math.log(sys.float_info.min) returns a 0 precision score.
+ p_n_new.append(sys.float_info.min)
+ return p_n_new
+
+ def method1(self, p_n, *args, **kwargs):
+ """
+ Smoothing method 1: Add *epsilon* counts to precision with 0 counts.
+ """
+ return [
+ (p_i.numerator + self.epsilon) / p_i.denominator
+ if p_i.numerator == 0
+ else p_i
+ for p_i in p_n
+ ]
+
+ def method2(self, p_n, *args, **kwargs):
+ """
+ Smoothing method 2: Add 1 to both numerator and denominator from
+ Chin-Yew Lin and Franz Josef Och (2004) ORANGE: a Method for
+ Evaluating Automatic Evaluation Metrics for Machine Translation.
+ In COLING 2004.
+ """
+ return [
+ Fraction(p_n[i].numerator + 1, p_n[i].denominator + 1, _normalize=False)
+ if i != 0
+ else p_n[0]
+ for i in range(len(p_n))
+ ]
+
+ def method3(self, p_n, *args, **kwargs):
+ """
+ Smoothing method 3: NIST geometric sequence smoothing
+ The smoothing is computed by taking 1 / ( 2^k ), instead of 0, for each
+ precision score whose matching n-gram count is null.
+ k is 1 for the first 'n' value for which the n-gram match count is null/
+
+ For example, if the text contains:
+
+ - one 2-gram match
+ - and (consequently) two 1-gram matches
+
+ the n-gram count for each individual precision score would be:
+
+ - n=1 => prec_count = 2 (two unigrams)
+ - n=2 => prec_count = 1 (one bigram)
+ - n=3 => prec_count = 1/2 (no trigram, taking 'smoothed' value of 1 / ( 2^k ), with k=1)
+ - n=4 => prec_count = 1/4 (no fourgram, taking 'smoothed' value of 1 / ( 2^k ), with k=2)
+ """
+ incvnt = 1 # From the mteval-v13a.pl, it's referred to as k.
+ for i, p_i in enumerate(p_n):
+ if p_i.numerator == 0:
+ p_n[i] = 1 / (2**incvnt * p_i.denominator)
+ incvnt += 1
+ return p_n
+
+ def method4(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
+ """
+ Smoothing method 4:
+ Shorter translations may have inflated precision values due to having
+ smaller denominators; therefore, we give them proportionally
+ smaller smoothed counts. Instead of scaling to 1/(2^k), Chen and Cherry
+ suggests dividing by 1/ln(len(T)), where T is the length of the translation.
+ """
+ incvnt = 1
+ hyp_len = hyp_len if hyp_len else len(hypothesis)
+ for i, p_i in enumerate(p_n):
+ if p_i.numerator == 0 and hyp_len > 1:
+ # incvnt = i + 1 * self.k / math.log(
+ # hyp_len
+ # ) # Note that this K is different from the K from NIST.
+ # p_n[i] = incvnt / p_i.denominator\
+ numerator = 1 / (2**incvnt * self.k / math.log(hyp_len))
+ p_n[i] = numerator / p_i.denominator
+ incvnt += 1
+ return p_n
+
+ def method5(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
+ """
+ Smoothing method 5:
+ The matched counts for similar values of n should be similar. To a
+ calculate the n-gram matched count, it averages the n1, n and n+1 gram
+ matched counts.
+ """
+ hyp_len = hyp_len if hyp_len else len(hypothesis)
+ m = {}
+ # Requires an precision value for an addition ngram order.
+ p_n_plus1 = p_n + [modified_precision(references, hypothesis, 5)]
+ m[-1] = p_n[0] + 1
+ for i, p_i in enumerate(p_n):
+ p_n[i] = (m[i - 1] + p_i + p_n_plus1[i + 1]) / 3
+ m[i] = p_n[i]
+ return p_n
+
+ def method6(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
+ """
+ Smoothing method 6:
+ Interpolates the maximum likelihood estimate of the precision *p_n* with
+ a prior estimate *pi0*. The prior is estimated by assuming that the ratio
+ between pn and pn1 will be the same as that between pn1 and pn2; from
+ Gao and He (2013) Training MRF-Based Phrase Translation Models using
+ Gradient Ascent. In NAACL.
+ """
+ hyp_len = hyp_len if hyp_len else len(hypothesis)
+ # This smoothing only works when p_1 and p_2 is non-zero.
+ # Raise an error with an appropriate message when the input is too short
+ # to use this smoothing technique.
+ assert p_n[2], "This smoothing method requires non-zero precision for bigrams."
+ for i, p_i in enumerate(p_n):
+ if i in [0, 1]: # Skips the first 2 orders of ngrams.
+ continue
+ else:
+ pi0 = 0 if p_n[i - 2] == 0 else p_n[i - 1] ** 2 / p_n[i - 2]
+ # No. of ngrams in translation that matches the reference.
+ m = p_i.numerator
+ # No. of ngrams in translation.
+ l = sum(1 for _ in ngrams(hypothesis, i + 1))
+ # Calculates the interpolated precision.
+ p_n[i] = (m + self.alpha * pi0) / (l + self.alpha)
+ return p_n
+
+ def method7(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
+ """
+ Smoothing method 7:
+ Interpolates methods 4 and 5.
+ """
+ hyp_len = hyp_len if hyp_len else len(hypothesis)
+ p_n = self.method4(p_n, references, hypothesis, hyp_len)
+ p_n = self.method5(p_n, references, hypothesis, hyp_len)
+ return p_n
Index: nltk-3.8.1/README.md
===================================================================
--- nltk-3.8.1.orig/README.md
+++ nltk-3.8.1/README.md
@@ -1,50 +1,50 @@
-# Natural Language Toolkit (NLTK)
-[![PyPI](https://img.shields.io/pypi/v/nltk.svg)](https://pypi.python.org/pypi/nltk)
-![CI](https://github.com/nltk/nltk/actions/workflows/ci.yaml/badge.svg?branch=develop)
-
-NLTK -- the Natural Language Toolkit -- is a suite of open source Python
-modules, data sets, and tutorials supporting research and development in Natural
-Language Processing. NLTK requires Python version 3.7, 3.8, 3.9, 3.10 or 3.11.
-
-For documentation, please visit [nltk.org](https://www.nltk.org/).
-
-
-## Contributing
-
-Do you want to contribute to NLTK development? Great!
-Please read [CONTRIBUTING.md](CONTRIBUTING.md) for more details.
-
-See also [how to contribute to NLTK](https://www.nltk.org/contribute.html).
-
-
-## Donate
-
-Have you found the toolkit helpful? Please support NLTK development by donating
-to the project via PayPal, using the link on the NLTK homepage.
-
-
-## Citing
-
-If you publish work that uses NLTK, please cite the NLTK book, as follows:
-
- Bird, Steven, Edward Loper and Ewan Klein (2009).
- Natural Language Processing with Python. O'Reilly Media Inc.
-
-
-## Copyright
-
-Copyright (C) 2001-2023 NLTK Project
-
-For license information, see [LICENSE.txt](LICENSE.txt).
-
-[AUTHORS.md](AUTHORS.md) contains a list of everyone who has contributed to NLTK.
-
-
-### Redistributing
-
-- NLTK source code is distributed under the Apache 2.0 License.
-- NLTK documentation is distributed under the Creative Commons
- Attribution-Noncommercial-No Derivative Works 3.0 United States license.
-- NLTK corpora are provided under the terms given in the README file for each
- corpus; all are redistributable and available for non-commercial use.
-- NLTK may be freely redistributed, subject to the provisions of these licenses.
+# Natural Language Toolkit (NLTK)
+[![PyPI](https://img.shields.io/pypi/v/nltk.svg)](https://pypi.python.org/pypi/nltk)
+![CI](https://github.com/nltk/nltk/actions/workflows/ci.yaml/badge.svg?branch=develop)
+
+NLTK -- the Natural Language Toolkit -- is a suite of open source Python
+modules, data sets, and tutorials supporting research and development in Natural
+Language Processing. NLTK requires Python version 3.7, 3.8, 3.9, 3.10, 3.11 or 3.12.
+
+For documentation, please visit [nltk.org](https://www.nltk.org/).
+
+
+## Contributing
+
+Do you want to contribute to NLTK development? Great!
+Please read [CONTRIBUTING.md](CONTRIBUTING.md) for more details.
+
+See also [how to contribute to NLTK](https://www.nltk.org/contribute.html).
+
+
+## Donate
+
+Have you found the toolkit helpful? Please support NLTK development by donating
+to the project via PayPal, using the link on the NLTK homepage.
+
+
+## Citing
+
+If you publish work that uses NLTK, please cite the NLTK book, as follows:
+
+ Bird, Steven, Edward Loper and Ewan Klein (2009).
+ Natural Language Processing with Python. O'Reilly Media Inc.
+
+
+## Copyright
+
+Copyright (C) 2001-2023 NLTK Project
+
+For license information, see [LICENSE.txt](LICENSE.txt).
+
+[AUTHORS.md](AUTHORS.md) contains a list of everyone who has contributed to NLTK.
+
+
+### Redistributing
+
+- NLTK source code is distributed under the Apache 2.0 License.
+- NLTK documentation is distributed under the Creative Commons
+ Attribution-Noncommercial-No Derivative Works 3.0 United States license.
+- NLTK corpora are provided under the terms given in the README file for each
+ corpus; all are redistributable and available for non-commercial use.
+- NLTK may be freely redistributed, subject to the provisions of these licenses.
Index: nltk-3.8.1/setup.py
===================================================================
--- nltk-3.8.1.orig/setup.py
+++ nltk-3.8.1/setup.py
@@ -1,125 +1,126 @@
-#!/usr/bin/env python
-#
-# Setup script for the Natural Language Toolkit
-#
-# Copyright (C) 2001-2023 NLTK Project
-# Author: NLTK Team <nltk.team@gmail.com>
-# URL: <https://www.nltk.org/>
-# For license information, see LICENSE.TXT
-
-# Work around mbcs bug in distutils.
-# https://bugs.python.org/issue10945
-import codecs
-
-try:
- codecs.lookup("mbcs")
-except LookupError:
- ascii = codecs.lookup("ascii")
- func = lambda name, enc=ascii: {True: enc}.get(name == "mbcs")
- codecs.register(func)
-
-import os
-
-# Use the VERSION file to get NLTK version
-version_file = os.path.join(os.path.dirname(__file__), "nltk", "VERSION")
-with open(version_file) as fh:
- nltk_version = fh.read().strip()
-
-# setuptools
-from setuptools import find_packages, setup
-
-# Specify groups of optional dependencies
-extras_require = {
- "machine_learning": [
- "numpy",
- "python-crfsuite",
- "scikit-learn",
- "scipy",
- ],
- "plot": ["matplotlib"],
- "tgrep": ["pyparsing"],
- "twitter": ["twython"],
- "corenlp": ["requests"],
-}
-
-# Add a group made up of all optional dependencies
-extras_require["all"] = {
- package for group in extras_require.values() for package in group
-}
-
-# Adds CLI commands
-console_scripts = """
-[console_scripts]
-nltk=nltk.cli:cli
-"""
-
-_project_homepage = "https://www.nltk.org/"
-
-setup(
- name="nltk",
- description="Natural Language Toolkit",
- version=nltk_version,
- url=_project_homepage,
- project_urls={
- "Documentation": _project_homepage,
- "Source Code": "https://github.com/nltk/nltk",
- "Issue Tracker": "https://github.com/nltk/nltk/issues",
- },
- long_description="""\
-The Natural Language Toolkit (NLTK) is a Python package for
-natural language processing. NLTK requires Python 3.7, 3.8, 3.9, 3.10 or 3.11.""",
- license="Apache License, Version 2.0",
- keywords=[
- "NLP",
- "CL",
- "natural language processing",
- "computational linguistics",
- "parsing",
- "tagging",
- "tokenizing",
- "syntax",
- "linguistics",
- "language",
- "natural language",
- "text analytics",
- ],
- maintainer="NLTK Team",
- maintainer_email="nltk.team@gmail.com",
- author="NLTK Team",
- author_email="nltk.team@gmail.com",
- classifiers=[
- "Development Status :: 5 - Production/Stable",
- "Intended Audience :: Developers",
- "Intended Audience :: Education",
- "Intended Audience :: Information Technology",
- "Intended Audience :: Science/Research",
- "License :: OSI Approved :: Apache Software License",
- "Operating System :: OS Independent",
- "Programming Language :: Python :: 3.7",
- "Programming Language :: Python :: 3.8",
- "Programming Language :: Python :: 3.9",
- "Programming Language :: Python :: 3.10",
- "Programming Language :: Python :: 3.11",
- "Topic :: Scientific/Engineering",
- "Topic :: Scientific/Engineering :: Artificial Intelligence",
- "Topic :: Scientific/Engineering :: Human Machine Interfaces",
- "Topic :: Scientific/Engineering :: Information Analysis",
- "Topic :: Text Processing",
- "Topic :: Text Processing :: Filters",
- "Topic :: Text Processing :: General",
- "Topic :: Text Processing :: Indexing",
- "Topic :: Text Processing :: Linguistic",
- ],
- package_data={"nltk": ["test/*.doctest", "VERSION"]},
- python_requires=">=3.7",
- install_requires=[
- "click",
- "joblib",
- "regex>=2021.8.3",
- "tqdm",
- ],
- extras_require=extras_require,
- packages=find_packages(),
- zip_safe=False, # since normal files will be present too?
- entry_points=console_scripts,
-)
+#!/usr/bin/env python
+#
+# Setup script for the Natural Language Toolkit
+#
+# Copyright (C) 2001-2023 NLTK Project
+# Author: NLTK Team <nltk.team@gmail.com>
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+# Work around mbcs bug in distutils.
+# https://bugs.python.org/issue10945
+import codecs
+
+try:
+ codecs.lookup("mbcs")
+except LookupError:
+ ascii = codecs.lookup("ascii")
+ func = lambda name, enc=ascii: {True: enc}.get(name == "mbcs")
+ codecs.register(func)
+
+import os
+
+# Use the VERSION file to get NLTK version
+version_file = os.path.join(os.path.dirname(__file__), "nltk", "VERSION")
+with open(version_file) as fh:
+ nltk_version = fh.read().strip()
+
+# setuptools
+from setuptools import find_packages, setup
+
+# Specify groups of optional dependencies
+extras_require = {
+ "machine_learning": [
+ "numpy",
+ "python-crfsuite",
+ "scikit-learn",
+ "scipy",
+ ],
+ "plot": ["matplotlib"],
+ "tgrep": ["pyparsing"],
+ "twitter": ["twython"],
+ "corenlp": ["requests"],
+}
+
+# Add a group made up of all optional dependencies
+extras_require["all"] = {
+ package for group in extras_require.values() for package in group
+}
+
+# Adds CLI commands
+console_scripts = """
+[console_scripts]
+nltk=nltk.cli:cli
+"""
+
+_project_homepage = "https://www.nltk.org/"
+
+setup(
+ name="nltk",
+ description="Natural Language Toolkit",
+ version=nltk_version,
+ url=_project_homepage,
+ project_urls={
+ "Documentation": _project_homepage,
+ "Source Code": "https://github.com/nltk/nltk",
+ "Issue Tracker": "https://github.com/nltk/nltk/issues",
+ },
+ long_description="""\
+The Natural Language Toolkit (NLTK) is a Python package for
+natural language processing. NLTK requires Python 3.7, 3.8, 3.9, 3.10, 3.11 or 3.12.""",
+ license="Apache License, Version 2.0",
+ keywords=[
+ "NLP",
+ "CL",
+ "natural language processing",
+ "computational linguistics",
+ "parsing",
+ "tagging",
+ "tokenizing",
+ "syntax",
+ "linguistics",
+ "language",
+ "natural language",
+ "text analytics",
+ ],
+ maintainer="NLTK Team",
+ maintainer_email="nltk.team@gmail.com",
+ author="NLTK Team",
+ author_email="nltk.team@gmail.com",
+ classifiers=[
+ "Development Status :: 5 - Production/Stable",
+ "Intended Audience :: Developers",
+ "Intended Audience :: Education",
+ "Intended Audience :: Information Technology",
+ "Intended Audience :: Science/Research",
+ "License :: OSI Approved :: Apache Software License",
+ "Operating System :: OS Independent",
+ "Programming Language :: Python :: 3.7",
+ "Programming Language :: Python :: 3.8",
+ "Programming Language :: Python :: 3.9",
+ "Programming Language :: Python :: 3.10",
+ "Programming Language :: Python :: 3.11",
+ "Programming Language :: Python :: 3.12",
+ "Topic :: Scientific/Engineering",
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
+ "Topic :: Scientific/Engineering :: Human Machine Interfaces",
+ "Topic :: Scientific/Engineering :: Information Analysis",
+ "Topic :: Text Processing",
+ "Topic :: Text Processing :: Filters",
+ "Topic :: Text Processing :: General",
+ "Topic :: Text Processing :: Indexing",
+ "Topic :: Text Processing :: Linguistic",
+ ],
+ package_data={"nltk": ["test/*.doctest", "VERSION"]},
+ python_requires=">=3.7",
+ install_requires=[
+ "click",
+ "joblib",
+ "regex>=2021.8.3",
+ "tqdm",
+ ],
+ extras_require=extras_require,
+ packages=find_packages(),
+ zip_safe=False, # since normal files will be present too?
+ entry_points=console_scripts,
+)