1783 lines
73 KiB
Diff
1783 lines
73 KiB
Diff
|
From 25d35fc4283dedd2053ec6d821f4b707fff8d72c Mon Sep 17 00:00:00 2001
|
|||
|
From: Konstantin Chernyshev <k4black@ya.ru>
|
|||
|
Date: Thu, 16 Nov 2023 19:00:15 +0100
|
|||
|
Subject: [PATCH 1/8] ci: enable 3.12 in ci tests
|
|||
|
|
|||
|
---
|
|||
|
.github/workflows/ci.yaml | 2 +-
|
|||
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
|||
|
|
|||
|
Index: nltk-3.8.1/nltk/test/unit/translate/test_bleu.py
|
|||
|
===================================================================
|
|||
|
--- nltk-3.8.1.orig/nltk/test/unit/translate/test_bleu.py
|
|||
|
+++ nltk-3.8.1/nltk/test/unit/translate/test_bleu.py
|
|||
|
@@ -2,7 +2,6 @@
|
|||
|
Tests for BLEU translation evaluation metric
|
|||
|
"""
|
|||
|
|
|||
|
-import io
|
|||
|
import unittest
|
|||
|
|
|||
|
from nltk.data import find
|
|||
|
Index: nltk-3.8.1/nltk/translate/bleu_score.py
|
|||
|
===================================================================
|
|||
|
--- nltk-3.8.1.orig/nltk/translate/bleu_score.py
|
|||
|
+++ nltk-3.8.1/nltk/translate/bleu_score.py
|
|||
|
@@ -1,685 +1,710 @@
|
|||
|
-# Natural Language Toolkit: BLEU Score
|
|||
|
-#
|
|||
|
-# Copyright (C) 2001-2023 NLTK Project
|
|||
|
-# Authors: Chin Yee Lee, Hengfeng Li, Ruxin Hou, Calvin Tanujaya Lim
|
|||
|
-# Contributors: Björn Mattsson, Dmitrijs Milajevs, Liling Tan
|
|||
|
-# URL: <https://www.nltk.org/>
|
|||
|
-# For license information, see LICENSE.TXT
|
|||
|
-
|
|||
|
-"""BLEU score implementation."""
|
|||
|
-
|
|||
|
-import math
|
|||
|
-import sys
|
|||
|
-import warnings
|
|||
|
-from collections import Counter
|
|||
|
-from fractions import Fraction
|
|||
|
-
|
|||
|
-from nltk.util import ngrams
|
|||
|
-
|
|||
|
-
|
|||
|
-def sentence_bleu(
|
|||
|
- references,
|
|||
|
- hypothesis,
|
|||
|
- weights=(0.25, 0.25, 0.25, 0.25),
|
|||
|
- smoothing_function=None,
|
|||
|
- auto_reweigh=False,
|
|||
|
-):
|
|||
|
- """
|
|||
|
- Calculate BLEU score (Bilingual Evaluation Understudy) from
|
|||
|
- Papineni, Kishore, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002.
|
|||
|
- "BLEU: a method for automatic evaluation of machine translation."
|
|||
|
- In Proceedings of ACL. https://www.aclweb.org/anthology/P02-1040.pdf
|
|||
|
-
|
|||
|
- >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
|
|||
|
- ... 'ensures', 'that', 'the', 'military', 'always',
|
|||
|
- ... 'obeys', 'the', 'commands', 'of', 'the', 'party']
|
|||
|
-
|
|||
|
- >>> hypothesis2 = ['It', 'is', 'to', 'insure', 'the', 'troops',
|
|||
|
- ... 'forever', 'hearing', 'the', 'activity', 'guidebook',
|
|||
|
- ... 'that', 'party', 'direct']
|
|||
|
-
|
|||
|
- >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
|
|||
|
- ... 'ensures', 'that', 'the', 'military', 'will', 'forever',
|
|||
|
- ... 'heed', 'Party', 'commands']
|
|||
|
-
|
|||
|
- >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which',
|
|||
|
- ... 'guarantees', 'the', 'military', 'forces', 'always',
|
|||
|
- ... 'being', 'under', 'the', 'command', 'of', 'the',
|
|||
|
- ... 'Party']
|
|||
|
-
|
|||
|
- >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
|
|||
|
- ... 'army', 'always', 'to', 'heed', 'the', 'directions',
|
|||
|
- ... 'of', 'the', 'party']
|
|||
|
-
|
|||
|
- >>> sentence_bleu([reference1, reference2, reference3], hypothesis1) # doctest: +ELLIPSIS
|
|||
|
- 0.5045...
|
|||
|
-
|
|||
|
- If there is no ngrams overlap for any order of n-grams, BLEU returns the
|
|||
|
- value 0. This is because the precision for the order of n-grams without
|
|||
|
- overlap is 0, and the geometric mean in the final BLEU score computation
|
|||
|
- multiplies the 0 with the precision of other n-grams. This results in 0
|
|||
|
- (independently of the precision of the other n-gram orders). The following
|
|||
|
- example has zero 3-gram and 4-gram overlaps:
|
|||
|
-
|
|||
|
- >>> round(sentence_bleu([reference1, reference2, reference3], hypothesis2),4) # doctest: +ELLIPSIS
|
|||
|
- 0.0
|
|||
|
-
|
|||
|
- To avoid this harsh behaviour when no ngram overlaps are found a smoothing
|
|||
|
- function can be used.
|
|||
|
-
|
|||
|
- >>> chencherry = SmoothingFunction()
|
|||
|
- >>> sentence_bleu([reference1, reference2, reference3], hypothesis2,
|
|||
|
- ... smoothing_function=chencherry.method1) # doctest: +ELLIPSIS
|
|||
|
- 0.0370...
|
|||
|
-
|
|||
|
- The default BLEU calculates a score for up to 4-grams using uniform
|
|||
|
- weights (this is called BLEU-4). To evaluate your translations with
|
|||
|
- higher/lower order ngrams, use customized weights. E.g. when accounting
|
|||
|
- for up to 5-grams with uniform weights (this is called BLEU-5) use:
|
|||
|
-
|
|||
|
- >>> weights = (1./5., 1./5., 1./5., 1./5., 1./5.)
|
|||
|
- >>> sentence_bleu([reference1, reference2, reference3], hypothesis1, weights) # doctest: +ELLIPSIS
|
|||
|
- 0.3920...
|
|||
|
-
|
|||
|
- Multiple BLEU scores can be computed at once, by supplying a list of weights.
|
|||
|
- E.g. for computing BLEU-2, BLEU-3 *and* BLEU-4 in one computation, use:
|
|||
|
- >>> weights = [
|
|||
|
- ... (1./2., 1./2.),
|
|||
|
- ... (1./3., 1./3., 1./3.),
|
|||
|
- ... (1./4., 1./4., 1./4., 1./4.)
|
|||
|
- ... ]
|
|||
|
- >>> sentence_bleu([reference1, reference2, reference3], hypothesis1, weights) # doctest: +ELLIPSIS
|
|||
|
- [0.7453..., 0.6240..., 0.5045...]
|
|||
|
-
|
|||
|
- :param references: reference sentences
|
|||
|
- :type references: list(list(str))
|
|||
|
- :param hypothesis: a hypothesis sentence
|
|||
|
- :type hypothesis: list(str)
|
|||
|
- :param weights: weights for unigrams, bigrams, trigrams and so on (one or a list of weights)
|
|||
|
- :type weights: tuple(float) / list(tuple(float))
|
|||
|
- :param smoothing_function:
|
|||
|
- :type smoothing_function: SmoothingFunction
|
|||
|
- :param auto_reweigh: Option to re-normalize the weights uniformly.
|
|||
|
- :type auto_reweigh: bool
|
|||
|
- :return: The sentence-level BLEU score. Returns a list if multiple weights were supplied.
|
|||
|
- :rtype: float / list(float)
|
|||
|
- """
|
|||
|
- return corpus_bleu(
|
|||
|
- [references], [hypothesis], weights, smoothing_function, auto_reweigh
|
|||
|
- )
|
|||
|
-
|
|||
|
-
|
|||
|
-def corpus_bleu(
|
|||
|
- list_of_references,
|
|||
|
- hypotheses,
|
|||
|
- weights=(0.25, 0.25, 0.25, 0.25),
|
|||
|
- smoothing_function=None,
|
|||
|
- auto_reweigh=False,
|
|||
|
-):
|
|||
|
- """
|
|||
|
- Calculate a single corpus-level BLEU score (aka. system-level BLEU) for all
|
|||
|
- the hypotheses and their respective references.
|
|||
|
-
|
|||
|
- Instead of averaging the sentence level BLEU scores (i.e. macro-average
|
|||
|
- precision), the original BLEU metric (Papineni et al. 2002) accounts for
|
|||
|
- the micro-average precision (i.e. summing the numerators and denominators
|
|||
|
- for each hypothesis-reference(s) pairs before the division).
|
|||
|
-
|
|||
|
- >>> hyp1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
|
|||
|
- ... 'ensures', 'that', 'the', 'military', 'always',
|
|||
|
- ... 'obeys', 'the', 'commands', 'of', 'the', 'party']
|
|||
|
- >>> ref1a = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
|
|||
|
- ... 'ensures', 'that', 'the', 'military', 'will', 'forever',
|
|||
|
- ... 'heed', 'Party', 'commands']
|
|||
|
- >>> ref1b = ['It', 'is', 'the', 'guiding', 'principle', 'which',
|
|||
|
- ... 'guarantees', 'the', 'military', 'forces', 'always',
|
|||
|
- ... 'being', 'under', 'the', 'command', 'of', 'the', 'Party']
|
|||
|
- >>> ref1c = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
|
|||
|
- ... 'army', 'always', 'to', 'heed', 'the', 'directions',
|
|||
|
- ... 'of', 'the', 'party']
|
|||
|
-
|
|||
|
- >>> hyp2 = ['he', 'read', 'the', 'book', 'because', 'he', 'was',
|
|||
|
- ... 'interested', 'in', 'world', 'history']
|
|||
|
- >>> ref2a = ['he', 'was', 'interested', 'in', 'world', 'history',
|
|||
|
- ... 'because', 'he', 'read', 'the', 'book']
|
|||
|
-
|
|||
|
- >>> list_of_references = [[ref1a, ref1b, ref1c], [ref2a]]
|
|||
|
- >>> hypotheses = [hyp1, hyp2]
|
|||
|
- >>> corpus_bleu(list_of_references, hypotheses) # doctest: +ELLIPSIS
|
|||
|
- 0.5920...
|
|||
|
-
|
|||
|
- The example below show that corpus_bleu() is different from averaging
|
|||
|
- sentence_bleu() for hypotheses
|
|||
|
-
|
|||
|
- >>> score1 = sentence_bleu([ref1a, ref1b, ref1c], hyp1)
|
|||
|
- >>> score2 = sentence_bleu([ref2a], hyp2)
|
|||
|
- >>> (score1 + score2) / 2 # doctest: +ELLIPSIS
|
|||
|
- 0.6223...
|
|||
|
-
|
|||
|
- Custom weights may be supplied to fine-tune the BLEU score further.
|
|||
|
- A tuple of float weights for unigrams, bigrams, trigrams and so on can be given.
|
|||
|
- >>> weights = (0.1, 0.3, 0.5, 0.1)
|
|||
|
- >>> corpus_bleu(list_of_references, hypotheses, weights=weights) # doctest: +ELLIPSIS
|
|||
|
- 0.5818...
|
|||
|
-
|
|||
|
- This particular weight gave extra value to trigrams.
|
|||
|
- Furthermore, multiple weights can be given, resulting in multiple BLEU scores.
|
|||
|
- >>> weights = [
|
|||
|
- ... (0.5, 0.5),
|
|||
|
- ... (0.333, 0.333, 0.334),
|
|||
|
- ... (0.25, 0.25, 0.25, 0.25),
|
|||
|
- ... (0.2, 0.2, 0.2, 0.2, 0.2)
|
|||
|
- ... ]
|
|||
|
- >>> corpus_bleu(list_of_references, hypotheses, weights=weights) # doctest: +ELLIPSIS
|
|||
|
- [0.8242..., 0.7067..., 0.5920..., 0.4719...]
|
|||
|
-
|
|||
|
- :param list_of_references: a corpus of lists of reference sentences, w.r.t. hypotheses
|
|||
|
- :type list_of_references: list(list(list(str)))
|
|||
|
- :param hypotheses: a list of hypothesis sentences
|
|||
|
- :type hypotheses: list(list(str))
|
|||
|
- :param weights: weights for unigrams, bigrams, trigrams and so on (one or a list of weights)
|
|||
|
- :type weights: tuple(float) / list(tuple(float))
|
|||
|
- :param smoothing_function:
|
|||
|
- :type smoothing_function: SmoothingFunction
|
|||
|
- :param auto_reweigh: Option to re-normalize the weights uniformly.
|
|||
|
- :type auto_reweigh: bool
|
|||
|
- :return: The corpus-level BLEU score.
|
|||
|
- :rtype: float
|
|||
|
- """
|
|||
|
- # Before proceeding to compute BLEU, perform sanity checks.
|
|||
|
-
|
|||
|
- p_numerators = Counter() # Key = ngram order, and value = no. of ngram matches.
|
|||
|
- p_denominators = Counter() # Key = ngram order, and value = no. of ngram in ref.
|
|||
|
- hyp_lengths, ref_lengths = 0, 0
|
|||
|
-
|
|||
|
- assert len(list_of_references) == len(hypotheses), (
|
|||
|
- "The number of hypotheses and their reference(s) should be the " "same "
|
|||
|
- )
|
|||
|
-
|
|||
|
- try:
|
|||
|
- weights[0][0]
|
|||
|
- except TypeError:
|
|||
|
- weights = [weights]
|
|||
|
- max_weight_length = max(len(weight) for weight in weights)
|
|||
|
-
|
|||
|
- # Iterate through each hypothesis and their corresponding references.
|
|||
|
- for references, hypothesis in zip(list_of_references, hypotheses):
|
|||
|
- # For each order of ngram, calculate the numerator and
|
|||
|
- # denominator for the corpus-level modified precision.
|
|||
|
- for i in range(1, max_weight_length + 1):
|
|||
|
- p_i = modified_precision(references, hypothesis, i)
|
|||
|
- p_numerators[i] += p_i.numerator
|
|||
|
- p_denominators[i] += p_i.denominator
|
|||
|
-
|
|||
|
- # Calculate the hypothesis length and the closest reference length.
|
|||
|
- # Adds them to the corpus-level hypothesis and reference counts.
|
|||
|
- hyp_len = len(hypothesis)
|
|||
|
- hyp_lengths += hyp_len
|
|||
|
- ref_lengths += closest_ref_length(references, hyp_len)
|
|||
|
-
|
|||
|
- # Calculate corpus-level brevity penalty.
|
|||
|
- bp = brevity_penalty(ref_lengths, hyp_lengths)
|
|||
|
-
|
|||
|
- # Collects the various precision values for the different ngram orders.
|
|||
|
- p_n = [
|
|||
|
- Fraction(p_numerators[i], p_denominators[i], _normalize=False)
|
|||
|
- for i in range(1, max_weight_length + 1)
|
|||
|
- ]
|
|||
|
-
|
|||
|
- # Returns 0 if there's no matching n-grams
|
|||
|
- # We only need to check for p_numerators[1] == 0, since if there's
|
|||
|
- # no unigrams, there won't be any higher order ngrams.
|
|||
|
- if p_numerators[1] == 0:
|
|||
|
- return 0 if len(weights) == 1 else [0] * len(weights)
|
|||
|
-
|
|||
|
- # If there's no smoothing, set use method0 from SmoothinFunction class.
|
|||
|
- if not smoothing_function:
|
|||
|
- smoothing_function = SmoothingFunction().method0
|
|||
|
- # Smoothen the modified precision.
|
|||
|
- # Note: smoothing_function() may convert values into floats;
|
|||
|
- # it tries to retain the Fraction object as much as the
|
|||
|
- # smoothing method allows.
|
|||
|
- p_n = smoothing_function(
|
|||
|
- p_n, references=references, hypothesis=hypothesis, hyp_len=hyp_lengths
|
|||
|
- )
|
|||
|
-
|
|||
|
- bleu_scores = []
|
|||
|
- for weight in weights:
|
|||
|
- # Uniformly re-weighting based on maximum hypothesis lengths if largest
|
|||
|
- # order of n-grams < 4 and weights is set at default.
|
|||
|
- if auto_reweigh:
|
|||
|
- if hyp_lengths < 4 and weight == (0.25, 0.25, 0.25, 0.25):
|
|||
|
- weight = (1 / hyp_lengths,) * hyp_lengths
|
|||
|
-
|
|||
|
- s = (w_i * math.log(p_i) for w_i, p_i in zip(weight, p_n) if p_i > 0)
|
|||
|
- s = bp * math.exp(math.fsum(s))
|
|||
|
- bleu_scores.append(s)
|
|||
|
- return bleu_scores[0] if len(weights) == 1 else bleu_scores
|
|||
|
-
|
|||
|
-
|
|||
|
-def modified_precision(references, hypothesis, n):
|
|||
|
- """
|
|||
|
- Calculate modified ngram precision.
|
|||
|
-
|
|||
|
- The normal precision method may lead to some wrong translations with
|
|||
|
- high-precision, e.g., the translation, in which a word of reference
|
|||
|
- repeats several times, has very high precision.
|
|||
|
-
|
|||
|
- This function only returns the Fraction object that contains the numerator
|
|||
|
- and denominator necessary to calculate the corpus-level precision.
|
|||
|
- To calculate the modified precision for a single pair of hypothesis and
|
|||
|
- references, cast the Fraction object into a float.
|
|||
|
-
|
|||
|
- The famous "the the the ... " example shows that you can get BLEU precision
|
|||
|
- by duplicating high frequency words.
|
|||
|
-
|
|||
|
- >>> reference1 = 'the cat is on the mat'.split()
|
|||
|
- >>> reference2 = 'there is a cat on the mat'.split()
|
|||
|
- >>> hypothesis1 = 'the the the the the the the'.split()
|
|||
|
- >>> references = [reference1, reference2]
|
|||
|
- >>> float(modified_precision(references, hypothesis1, n=1)) # doctest: +ELLIPSIS
|
|||
|
- 0.2857...
|
|||
|
-
|
|||
|
- In the modified n-gram precision, a reference word will be considered
|
|||
|
- exhausted after a matching hypothesis word is identified, e.g.
|
|||
|
-
|
|||
|
- >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
|
|||
|
- ... 'ensures', 'that', 'the', 'military', 'will',
|
|||
|
- ... 'forever', 'heed', 'Party', 'commands']
|
|||
|
- >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which',
|
|||
|
- ... 'guarantees', 'the', 'military', 'forces', 'always',
|
|||
|
- ... 'being', 'under', 'the', 'command', 'of', 'the',
|
|||
|
- ... 'Party']
|
|||
|
- >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
|
|||
|
- ... 'army', 'always', 'to', 'heed', 'the', 'directions',
|
|||
|
- ... 'of', 'the', 'party']
|
|||
|
- >>> hypothesis = 'of the'.split()
|
|||
|
- >>> references = [reference1, reference2, reference3]
|
|||
|
- >>> float(modified_precision(references, hypothesis, n=1))
|
|||
|
- 1.0
|
|||
|
- >>> float(modified_precision(references, hypothesis, n=2))
|
|||
|
- 1.0
|
|||
|
-
|
|||
|
- An example of a normal machine translation hypothesis:
|
|||
|
-
|
|||
|
- >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
|
|||
|
- ... 'ensures', 'that', 'the', 'military', 'always',
|
|||
|
- ... 'obeys', 'the', 'commands', 'of', 'the', 'party']
|
|||
|
-
|
|||
|
- >>> hypothesis2 = ['It', 'is', 'to', 'insure', 'the', 'troops',
|
|||
|
- ... 'forever', 'hearing', 'the', 'activity', 'guidebook',
|
|||
|
- ... 'that', 'party', 'direct']
|
|||
|
-
|
|||
|
- >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
|
|||
|
- ... 'ensures', 'that', 'the', 'military', 'will',
|
|||
|
- ... 'forever', 'heed', 'Party', 'commands']
|
|||
|
-
|
|||
|
- >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which',
|
|||
|
- ... 'guarantees', 'the', 'military', 'forces', 'always',
|
|||
|
- ... 'being', 'under', 'the', 'command', 'of', 'the',
|
|||
|
- ... 'Party']
|
|||
|
-
|
|||
|
- >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
|
|||
|
- ... 'army', 'always', 'to', 'heed', 'the', 'directions',
|
|||
|
- ... 'of', 'the', 'party']
|
|||
|
- >>> references = [reference1, reference2, reference3]
|
|||
|
- >>> float(modified_precision(references, hypothesis1, n=1)) # doctest: +ELLIPSIS
|
|||
|
- 0.9444...
|
|||
|
- >>> float(modified_precision(references, hypothesis2, n=1)) # doctest: +ELLIPSIS
|
|||
|
- 0.5714...
|
|||
|
- >>> float(modified_precision(references, hypothesis1, n=2)) # doctest: +ELLIPSIS
|
|||
|
- 0.5882352941176471
|
|||
|
- >>> float(modified_precision(references, hypothesis2, n=2)) # doctest: +ELLIPSIS
|
|||
|
- 0.07692...
|
|||
|
-
|
|||
|
-
|
|||
|
- :param references: A list of reference translations.
|
|||
|
- :type references: list(list(str))
|
|||
|
- :param hypothesis: A hypothesis translation.
|
|||
|
- :type hypothesis: list(str)
|
|||
|
- :param n: The ngram order.
|
|||
|
- :type n: int
|
|||
|
- :return: BLEU's modified precision for the nth order ngram.
|
|||
|
- :rtype: Fraction
|
|||
|
- """
|
|||
|
- # Extracts all ngrams in hypothesis
|
|||
|
- # Set an empty Counter if hypothesis is empty.
|
|||
|
- counts = Counter(ngrams(hypothesis, n)) if len(hypothesis) >= n else Counter()
|
|||
|
- # Extract a union of references' counts.
|
|||
|
- # max_counts = reduce(or_, [Counter(ngrams(ref, n)) for ref in references])
|
|||
|
- max_counts = {}
|
|||
|
- for reference in references:
|
|||
|
- reference_counts = (
|
|||
|
- Counter(ngrams(reference, n)) if len(reference) >= n else Counter()
|
|||
|
- )
|
|||
|
- for ngram in counts:
|
|||
|
- max_counts[ngram] = max(max_counts.get(ngram, 0), reference_counts[ngram])
|
|||
|
-
|
|||
|
- # Assigns the intersection between hypothesis and references' counts.
|
|||
|
- clipped_counts = {
|
|||
|
- ngram: min(count, max_counts[ngram]) for ngram, count in counts.items()
|
|||
|
- }
|
|||
|
-
|
|||
|
- numerator = sum(clipped_counts.values())
|
|||
|
- # Ensures that denominator is minimum 1 to avoid ZeroDivisionError.
|
|||
|
- # Usually this happens when the ngram order is > len(reference).
|
|||
|
- denominator = max(1, sum(counts.values()))
|
|||
|
-
|
|||
|
- return Fraction(numerator, denominator, _normalize=False)
|
|||
|
-
|
|||
|
-
|
|||
|
-def closest_ref_length(references, hyp_len):
|
|||
|
- """
|
|||
|
- This function finds the reference that is the closest length to the
|
|||
|
- hypothesis. The closest reference length is referred to as *r* variable
|
|||
|
- from the brevity penalty formula in Papineni et. al. (2002)
|
|||
|
-
|
|||
|
- :param references: A list of reference translations.
|
|||
|
- :type references: list(list(str))
|
|||
|
- :param hyp_len: The length of the hypothesis.
|
|||
|
- :type hyp_len: int
|
|||
|
- :return: The length of the reference that's closest to the hypothesis.
|
|||
|
- :rtype: int
|
|||
|
- """
|
|||
|
- ref_lens = (len(reference) for reference in references)
|
|||
|
- closest_ref_len = min(
|
|||
|
- ref_lens, key=lambda ref_len: (abs(ref_len - hyp_len), ref_len)
|
|||
|
- )
|
|||
|
- return closest_ref_len
|
|||
|
-
|
|||
|
-
|
|||
|
-def brevity_penalty(closest_ref_len, hyp_len):
|
|||
|
- """
|
|||
|
- Calculate brevity penalty.
|
|||
|
-
|
|||
|
- As the modified n-gram precision still has the problem from the short
|
|||
|
- length sentence, brevity penalty is used to modify the overall BLEU
|
|||
|
- score according to length.
|
|||
|
-
|
|||
|
- An example from the paper. There are three references with length 12, 15
|
|||
|
- and 17. And a concise hypothesis of the length 12. The brevity penalty is 1.
|
|||
|
-
|
|||
|
- >>> reference1 = list('aaaaaaaaaaaa') # i.e. ['a'] * 12
|
|||
|
- >>> reference2 = list('aaaaaaaaaaaaaaa') # i.e. ['a'] * 15
|
|||
|
- >>> reference3 = list('aaaaaaaaaaaaaaaaa') # i.e. ['a'] * 17
|
|||
|
- >>> hypothesis = list('aaaaaaaaaaaa') # i.e. ['a'] * 12
|
|||
|
- >>> references = [reference1, reference2, reference3]
|
|||
|
- >>> hyp_len = len(hypothesis)
|
|||
|
- >>> closest_ref_len = closest_ref_length(references, hyp_len)
|
|||
|
- >>> brevity_penalty(closest_ref_len, hyp_len)
|
|||
|
- 1.0
|
|||
|
-
|
|||
|
- In case a hypothesis translation is shorter than the references, penalty is
|
|||
|
- applied.
|
|||
|
-
|
|||
|
- >>> references = [['a'] * 28, ['a'] * 28]
|
|||
|
- >>> hypothesis = ['a'] * 12
|
|||
|
- >>> hyp_len = len(hypothesis)
|
|||
|
- >>> closest_ref_len = closest_ref_length(references, hyp_len)
|
|||
|
- >>> brevity_penalty(closest_ref_len, hyp_len)
|
|||
|
- 0.2635971381157267
|
|||
|
-
|
|||
|
- The length of the closest reference is used to compute the penalty. If the
|
|||
|
- length of a hypothesis is 12, and the reference lengths are 13 and 2, the
|
|||
|
- penalty is applied because the hypothesis length (12) is less then the
|
|||
|
- closest reference length (13).
|
|||
|
-
|
|||
|
- >>> references = [['a'] * 13, ['a'] * 2]
|
|||
|
- >>> hypothesis = ['a'] * 12
|
|||
|
- >>> hyp_len = len(hypothesis)
|
|||
|
- >>> closest_ref_len = closest_ref_length(references, hyp_len)
|
|||
|
- >>> brevity_penalty(closest_ref_len, hyp_len) # doctest: +ELLIPSIS
|
|||
|
- 0.9200...
|
|||
|
-
|
|||
|
- The brevity penalty doesn't depend on reference order. More importantly,
|
|||
|
- when two reference sentences are at the same distance, the shortest
|
|||
|
- reference sentence length is used.
|
|||
|
-
|
|||
|
- >>> references = [['a'] * 13, ['a'] * 11]
|
|||
|
- >>> hypothesis = ['a'] * 12
|
|||
|
- >>> hyp_len = len(hypothesis)
|
|||
|
- >>> closest_ref_len = closest_ref_length(references, hyp_len)
|
|||
|
- >>> bp1 = brevity_penalty(closest_ref_len, hyp_len)
|
|||
|
- >>> hyp_len = len(hypothesis)
|
|||
|
- >>> closest_ref_len = closest_ref_length(reversed(references), hyp_len)
|
|||
|
- >>> bp2 = brevity_penalty(closest_ref_len, hyp_len)
|
|||
|
- >>> bp1 == bp2 == 1
|
|||
|
- True
|
|||
|
-
|
|||
|
- A test example from mteval-v13a.pl (starting from the line 705):
|
|||
|
-
|
|||
|
- >>> references = [['a'] * 11, ['a'] * 8]
|
|||
|
- >>> hypothesis = ['a'] * 7
|
|||
|
- >>> hyp_len = len(hypothesis)
|
|||
|
- >>> closest_ref_len = closest_ref_length(references, hyp_len)
|
|||
|
- >>> brevity_penalty(closest_ref_len, hyp_len) # doctest: +ELLIPSIS
|
|||
|
- 0.8668...
|
|||
|
-
|
|||
|
- >>> references = [['a'] * 11, ['a'] * 8, ['a'] * 6, ['a'] * 7]
|
|||
|
- >>> hypothesis = ['a'] * 7
|
|||
|
- >>> hyp_len = len(hypothesis)
|
|||
|
- >>> closest_ref_len = closest_ref_length(references, hyp_len)
|
|||
|
- >>> brevity_penalty(closest_ref_len, hyp_len)
|
|||
|
- 1.0
|
|||
|
-
|
|||
|
- :param hyp_len: The length of the hypothesis for a single sentence OR the
|
|||
|
- sum of all the hypotheses' lengths for a corpus
|
|||
|
- :type hyp_len: int
|
|||
|
- :param closest_ref_len: The length of the closest reference for a single
|
|||
|
- hypothesis OR the sum of all the closest references for every hypotheses.
|
|||
|
- :type closest_ref_len: int
|
|||
|
- :return: BLEU's brevity penalty.
|
|||
|
- :rtype: float
|
|||
|
- """
|
|||
|
- if hyp_len > closest_ref_len:
|
|||
|
- return 1
|
|||
|
- # If hypothesis is empty, brevity penalty = 0 should result in BLEU = 0.0
|
|||
|
- elif hyp_len == 0:
|
|||
|
- return 0
|
|||
|
- else:
|
|||
|
- return math.exp(1 - closest_ref_len / hyp_len)
|
|||
|
-
|
|||
|
-
|
|||
|
-class SmoothingFunction:
|
|||
|
- """
|
|||
|
- This is an implementation of the smoothing techniques
|
|||
|
- for segment-level BLEU scores that was presented in
|
|||
|
- Boxing Chen and Collin Cherry (2014) A Systematic Comparison of
|
|||
|
- Smoothing Techniques for Sentence-Level BLEU. In WMT14.
|
|||
|
- http://acl2014.org/acl2014/W14-33/pdf/W14-3346.pdf
|
|||
|
- """
|
|||
|
-
|
|||
|
- def __init__(self, epsilon=0.1, alpha=5, k=5):
|
|||
|
- """
|
|||
|
- This will initialize the parameters required for the various smoothing
|
|||
|
- techniques, the default values are set to the numbers used in the
|
|||
|
- experiments from Chen and Cherry (2014).
|
|||
|
-
|
|||
|
- >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which', 'ensures',
|
|||
|
- ... 'that', 'the', 'military', 'always', 'obeys', 'the',
|
|||
|
- ... 'commands', 'of', 'the', 'party']
|
|||
|
- >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that', 'ensures',
|
|||
|
- ... 'that', 'the', 'military', 'will', 'forever', 'heed',
|
|||
|
- ... 'Party', 'commands']
|
|||
|
-
|
|||
|
- >>> chencherry = SmoothingFunction()
|
|||
|
- >>> print(sentence_bleu([reference1], hypothesis1)) # doctest: +ELLIPSIS
|
|||
|
- 0.4118...
|
|||
|
- >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method0)) # doctest: +ELLIPSIS
|
|||
|
- 0.4118...
|
|||
|
- >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method1)) # doctest: +ELLIPSIS
|
|||
|
- 0.4118...
|
|||
|
- >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method2)) # doctest: +ELLIPSIS
|
|||
|
- 0.4452...
|
|||
|
- >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method3)) # doctest: +ELLIPSIS
|
|||
|
- 0.4118...
|
|||
|
- >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method4)) # doctest: +ELLIPSIS
|
|||
|
- 0.4118...
|
|||
|
- >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method5)) # doctest: +ELLIPSIS
|
|||
|
- 0.4905...
|
|||
|
- >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method6)) # doctest: +ELLIPSIS
|
|||
|
- 0.4135...
|
|||
|
- >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method7)) # doctest: +ELLIPSIS
|
|||
|
- 0.4905...
|
|||
|
-
|
|||
|
- :param epsilon: the epsilon value use in method 1
|
|||
|
- :type epsilon: float
|
|||
|
- :param alpha: the alpha value use in method 6
|
|||
|
- :type alpha: int
|
|||
|
- :param k: the k value use in method 4
|
|||
|
- :type k: int
|
|||
|
- """
|
|||
|
- self.epsilon = epsilon
|
|||
|
- self.alpha = alpha
|
|||
|
- self.k = k
|
|||
|
-
|
|||
|
- def method0(self, p_n, *args, **kwargs):
|
|||
|
- """
|
|||
|
- No smoothing.
|
|||
|
- """
|
|||
|
- p_n_new = []
|
|||
|
- for i, p_i in enumerate(p_n):
|
|||
|
- if p_i.numerator != 0:
|
|||
|
- p_n_new.append(p_i)
|
|||
|
- else:
|
|||
|
- _msg = str(
|
|||
|
- "\nThe hypothesis contains 0 counts of {}-gram overlaps.\n"
|
|||
|
- "Therefore the BLEU score evaluates to 0, independently of\n"
|
|||
|
- "how many N-gram overlaps of lower order it contains.\n"
|
|||
|
- "Consider using lower n-gram order or use "
|
|||
|
- "SmoothingFunction()"
|
|||
|
- ).format(i + 1)
|
|||
|
- warnings.warn(_msg)
|
|||
|
- # When numerator==0 where denonminator==0 or !=0, the result
|
|||
|
- # for the precision score should be equal to 0 or undefined.
|
|||
|
- # Due to BLEU geometric mean computation in logarithm space,
|
|||
|
- # we we need to take the return sys.float_info.min such that
|
|||
|
- # math.log(sys.float_info.min) returns a 0 precision score.
|
|||
|
- p_n_new.append(sys.float_info.min)
|
|||
|
- return p_n_new
|
|||
|
-
|
|||
|
- def method1(self, p_n, *args, **kwargs):
|
|||
|
- """
|
|||
|
- Smoothing method 1: Add *epsilon* counts to precision with 0 counts.
|
|||
|
- """
|
|||
|
- return [
|
|||
|
- (p_i.numerator + self.epsilon) / p_i.denominator
|
|||
|
- if p_i.numerator == 0
|
|||
|
- else p_i
|
|||
|
- for p_i in p_n
|
|||
|
- ]
|
|||
|
-
|
|||
|
- def method2(self, p_n, *args, **kwargs):
|
|||
|
- """
|
|||
|
- Smoothing method 2: Add 1 to both numerator and denominator from
|
|||
|
- Chin-Yew Lin and Franz Josef Och (2004) ORANGE: a Method for
|
|||
|
- Evaluating Automatic Evaluation Metrics for Machine Translation.
|
|||
|
- In COLING 2004.
|
|||
|
- """
|
|||
|
- return [
|
|||
|
- Fraction(p_n[i].numerator + 1, p_n[i].denominator + 1, _normalize=False)
|
|||
|
- if i != 0
|
|||
|
- else p_n[0]
|
|||
|
- for i in range(len(p_n))
|
|||
|
- ]
|
|||
|
-
|
|||
|
- def method3(self, p_n, *args, **kwargs):
|
|||
|
- """
|
|||
|
- Smoothing method 3: NIST geometric sequence smoothing
|
|||
|
- The smoothing is computed by taking 1 / ( 2^k ), instead of 0, for each
|
|||
|
- precision score whose matching n-gram count is null.
|
|||
|
- k is 1 for the first 'n' value for which the n-gram match count is null/
|
|||
|
-
|
|||
|
- For example, if the text contains:
|
|||
|
-
|
|||
|
- - one 2-gram match
|
|||
|
- - and (consequently) two 1-gram matches
|
|||
|
-
|
|||
|
- the n-gram count for each individual precision score would be:
|
|||
|
-
|
|||
|
- - n=1 => prec_count = 2 (two unigrams)
|
|||
|
- - n=2 => prec_count = 1 (one bigram)
|
|||
|
- - n=3 => prec_count = 1/2 (no trigram, taking 'smoothed' value of 1 / ( 2^k ), with k=1)
|
|||
|
- - n=4 => prec_count = 1/4 (no fourgram, taking 'smoothed' value of 1 / ( 2^k ), with k=2)
|
|||
|
- """
|
|||
|
- incvnt = 1 # From the mteval-v13a.pl, it's referred to as k.
|
|||
|
- for i, p_i in enumerate(p_n):
|
|||
|
- if p_i.numerator == 0:
|
|||
|
- p_n[i] = 1 / (2**incvnt * p_i.denominator)
|
|||
|
- incvnt += 1
|
|||
|
- return p_n
|
|||
|
-
|
|||
|
- def method4(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
|
|||
|
- """
|
|||
|
- Smoothing method 4:
|
|||
|
- Shorter translations may have inflated precision values due to having
|
|||
|
- smaller denominators; therefore, we give them proportionally
|
|||
|
- smaller smoothed counts. Instead of scaling to 1/(2^k), Chen and Cherry
|
|||
|
- suggests dividing by 1/ln(len(T)), where T is the length of the translation.
|
|||
|
- """
|
|||
|
- incvnt = 1
|
|||
|
- hyp_len = hyp_len if hyp_len else len(hypothesis)
|
|||
|
- for i, p_i in enumerate(p_n):
|
|||
|
- if p_i.numerator == 0 and hyp_len > 1:
|
|||
|
- # incvnt = i + 1 * self.k / math.log(
|
|||
|
- # hyp_len
|
|||
|
- # ) # Note that this K is different from the K from NIST.
|
|||
|
- # p_n[i] = incvnt / p_i.denominator\
|
|||
|
- numerator = 1 / (2**incvnt * self.k / math.log(hyp_len))
|
|||
|
- p_n[i] = numerator / p_i.denominator
|
|||
|
- incvnt += 1
|
|||
|
- return p_n
|
|||
|
-
|
|||
|
- def method5(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
|
|||
|
- """
|
|||
|
- Smoothing method 5:
|
|||
|
- The matched counts for similar values of n should be similar. To a
|
|||
|
- calculate the n-gram matched count, it averages the n−1, n and n+1 gram
|
|||
|
- matched counts.
|
|||
|
- """
|
|||
|
- hyp_len = hyp_len if hyp_len else len(hypothesis)
|
|||
|
- m = {}
|
|||
|
- # Requires an precision value for an addition ngram order.
|
|||
|
- p_n_plus1 = p_n + [modified_precision(references, hypothesis, 5)]
|
|||
|
- m[-1] = p_n[0] + 1
|
|||
|
- for i, p_i in enumerate(p_n):
|
|||
|
- p_n[i] = (m[i - 1] + p_i + p_n_plus1[i + 1]) / 3
|
|||
|
- m[i] = p_n[i]
|
|||
|
- return p_n
|
|||
|
-
|
|||
|
- def method6(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
|
|||
|
- """
|
|||
|
- Smoothing method 6:
|
|||
|
- Interpolates the maximum likelihood estimate of the precision *p_n* with
|
|||
|
- a prior estimate *pi0*. The prior is estimated by assuming that the ratio
|
|||
|
- between pn and pn−1 will be the same as that between pn−1 and pn−2; from
|
|||
|
- Gao and He (2013) Training MRF-Based Phrase Translation Models using
|
|||
|
- Gradient Ascent. In NAACL.
|
|||
|
- """
|
|||
|
- hyp_len = hyp_len if hyp_len else len(hypothesis)
|
|||
|
- # This smoothing only works when p_1 and p_2 is non-zero.
|
|||
|
- # Raise an error with an appropriate message when the input is too short
|
|||
|
- # to use this smoothing technique.
|
|||
|
- assert p_n[2], "This smoothing method requires non-zero precision for bigrams."
|
|||
|
- for i, p_i in enumerate(p_n):
|
|||
|
- if i in [0, 1]: # Skips the first 2 orders of ngrams.
|
|||
|
- continue
|
|||
|
- else:
|
|||
|
- pi0 = 0 if p_n[i - 2] == 0 else p_n[i - 1] ** 2 / p_n[i - 2]
|
|||
|
- # No. of ngrams in translation that matches the reference.
|
|||
|
- m = p_i.numerator
|
|||
|
- # No. of ngrams in translation.
|
|||
|
- l = sum(1 for _ in ngrams(hypothesis, i + 1))
|
|||
|
- # Calculates the interpolated precision.
|
|||
|
- p_n[i] = (m + self.alpha * pi0) / (l + self.alpha)
|
|||
|
- return p_n
|
|||
|
-
|
|||
|
- def method7(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
|
|||
|
- """
|
|||
|
- Smoothing method 7:
|
|||
|
- Interpolates methods 4 and 5.
|
|||
|
- """
|
|||
|
- hyp_len = hyp_len if hyp_len else len(hypothesis)
|
|||
|
- p_n = self.method4(p_n, references, hypothesis, hyp_len)
|
|||
|
- p_n = self.method5(p_n, references, hypothesis, hyp_len)
|
|||
|
- return p_n
|
|||
|
+# Natural Language Toolkit: BLEU Score
|
|||
|
+#
|
|||
|
+# Copyright (C) 2001-2023 NLTK Project
|
|||
|
+# Authors: Chin Yee Lee, Hengfeng Li, Ruxin Hou, Calvin Tanujaya Lim
|
|||
|
+# Contributors: Björn Mattsson, Dmitrijs Milajevs, Liling Tan
|
|||
|
+# URL: <https://www.nltk.org/>
|
|||
|
+# For license information, see LICENSE.TXT
|
|||
|
+
|
|||
|
+"""BLEU score implementation."""
|
|||
|
+import math
|
|||
|
+import sys
|
|||
|
+import warnings
|
|||
|
+from collections import Counter
|
|||
|
+from fractions import Fraction as _Fraction
|
|||
|
+
|
|||
|
+from nltk.util import ngrams
|
|||
|
+
|
|||
|
+
|
|||
|
+class Fraction(_Fraction):
|
|||
|
+ """Fraction with _normalize=False support for 3.12"""
|
|||
|
+
|
|||
|
+ def __new__(cls, numerator=0, denominator=None, _normalize=False):
|
|||
|
+ if sys.version_info >= (3, 12):
|
|||
|
+ self = super().__new__(cls, numerator, denominator)
|
|||
|
+ else:
|
|||
|
+ self = super().__new__(cls, numerator, denominator, _normalize=_normalize)
|
|||
|
+ self._normalize = _normalize
|
|||
|
+ self._original_numerator = numerator
|
|||
|
+ self._original_denominator = denominator
|
|||
|
+ return self
|
|||
|
+
|
|||
|
+ @property
|
|||
|
+ def numerator(self):
|
|||
|
+ if not self._normalize:
|
|||
|
+ return self._original_numerator
|
|||
|
+ return super().numerator
|
|||
|
+
|
|||
|
+ @property
|
|||
|
+ def denominator(self):
|
|||
|
+ if not self._normalize:
|
|||
|
+ return self._original_denominator
|
|||
|
+ return super().denominator
|
|||
|
+
|
|||
|
+
|
|||
|
+def sentence_bleu(
|
|||
|
+ references,
|
|||
|
+ hypothesis,
|
|||
|
+ weights=(0.25, 0.25, 0.25, 0.25),
|
|||
|
+ smoothing_function=None,
|
|||
|
+ auto_reweigh=False,
|
|||
|
+):
|
|||
|
+ """
|
|||
|
+ Calculate BLEU score (Bilingual Evaluation Understudy) from
|
|||
|
+ Papineni, Kishore, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002.
|
|||
|
+ "BLEU: a method for automatic evaluation of machine translation."
|
|||
|
+ In Proceedings of ACL. https://www.aclweb.org/anthology/P02-1040.pdf
|
|||
|
+
|
|||
|
+ >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
|
|||
|
+ ... 'ensures', 'that', 'the', 'military', 'always',
|
|||
|
+ ... 'obeys', 'the', 'commands', 'of', 'the', 'party']
|
|||
|
+
|
|||
|
+ >>> hypothesis2 = ['It', 'is', 'to', 'insure', 'the', 'troops',
|
|||
|
+ ... 'forever', 'hearing', 'the', 'activity', 'guidebook',
|
|||
|
+ ... 'that', 'party', 'direct']
|
|||
|
+
|
|||
|
+ >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
|
|||
|
+ ... 'ensures', 'that', 'the', 'military', 'will', 'forever',
|
|||
|
+ ... 'heed', 'Party', 'commands']
|
|||
|
+
|
|||
|
+ >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which',
|
|||
|
+ ... 'guarantees', 'the', 'military', 'forces', 'always',
|
|||
|
+ ... 'being', 'under', 'the', 'command', 'of', 'the',
|
|||
|
+ ... 'Party']
|
|||
|
+
|
|||
|
+ >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
|
|||
|
+ ... 'army', 'always', 'to', 'heed', 'the', 'directions',
|
|||
|
+ ... 'of', 'the', 'party']
|
|||
|
+
|
|||
|
+ >>> sentence_bleu([reference1, reference2, reference3], hypothesis1) # doctest: +ELLIPSIS
|
|||
|
+ 0.5045...
|
|||
|
+
|
|||
|
+ If there is no ngrams overlap for any order of n-grams, BLEU returns the
|
|||
|
+ value 0. This is because the precision for the order of n-grams without
|
|||
|
+ overlap is 0, and the geometric mean in the final BLEU score computation
|
|||
|
+ multiplies the 0 with the precision of other n-grams. This results in 0
|
|||
|
+ (independently of the precision of the other n-gram orders). The following
|
|||
|
+ example has zero 3-gram and 4-gram overlaps:
|
|||
|
+
|
|||
|
+ >>> round(sentence_bleu([reference1, reference2, reference3], hypothesis2),4) # doctest: +ELLIPSIS
|
|||
|
+ 0.0
|
|||
|
+
|
|||
|
+ To avoid this harsh behaviour when no ngram overlaps are found a smoothing
|
|||
|
+ function can be used.
|
|||
|
+
|
|||
|
+ >>> chencherry = SmoothingFunction()
|
|||
|
+ >>> sentence_bleu([reference1, reference2, reference3], hypothesis2,
|
|||
|
+ ... smoothing_function=chencherry.method1) # doctest: +ELLIPSIS
|
|||
|
+ 0.0370...
|
|||
|
+
|
|||
|
+ The default BLEU calculates a score for up to 4-grams using uniform
|
|||
|
+ weights (this is called BLEU-4). To evaluate your translations with
|
|||
|
+ higher/lower order ngrams, use customized weights. E.g. when accounting
|
|||
|
+ for up to 5-grams with uniform weights (this is called BLEU-5) use:
|
|||
|
+
|
|||
|
+ >>> weights = (1./5., 1./5., 1./5., 1./5., 1./5.)
|
|||
|
+ >>> sentence_bleu([reference1, reference2, reference3], hypothesis1, weights) # doctest: +ELLIPSIS
|
|||
|
+ 0.3920...
|
|||
|
+
|
|||
|
+ Multiple BLEU scores can be computed at once, by supplying a list of weights.
|
|||
|
+ E.g. for computing BLEU-2, BLEU-3 *and* BLEU-4 in one computation, use:
|
|||
|
+ >>> weights = [
|
|||
|
+ ... (1./2., 1./2.),
|
|||
|
+ ... (1./3., 1./3., 1./3.),
|
|||
|
+ ... (1./4., 1./4., 1./4., 1./4.)
|
|||
|
+ ... ]
|
|||
|
+ >>> sentence_bleu([reference1, reference2, reference3], hypothesis1, weights) # doctest: +ELLIPSIS
|
|||
|
+ [0.7453..., 0.6240..., 0.5045...]
|
|||
|
+
|
|||
|
+ :param references: reference sentences
|
|||
|
+ :type references: list(list(str))
|
|||
|
+ :param hypothesis: a hypothesis sentence
|
|||
|
+ :type hypothesis: list(str)
|
|||
|
+ :param weights: weights for unigrams, bigrams, trigrams and so on (one or a list of weights)
|
|||
|
+ :type weights: tuple(float) / list(tuple(float))
|
|||
|
+ :param smoothing_function:
|
|||
|
+ :type smoothing_function: SmoothingFunction
|
|||
|
+ :param auto_reweigh: Option to re-normalize the weights uniformly.
|
|||
|
+ :type auto_reweigh: bool
|
|||
|
+ :return: The sentence-level BLEU score. Returns a list if multiple weights were supplied.
|
|||
|
+ :rtype: float / list(float)
|
|||
|
+ """
|
|||
|
+ return corpus_bleu(
|
|||
|
+ [references], [hypothesis], weights, smoothing_function, auto_reweigh
|
|||
|
+ )
|
|||
|
+
|
|||
|
+
|
|||
|
+def corpus_bleu(
|
|||
|
+ list_of_references,
|
|||
|
+ hypotheses,
|
|||
|
+ weights=(0.25, 0.25, 0.25, 0.25),
|
|||
|
+ smoothing_function=None,
|
|||
|
+ auto_reweigh=False,
|
|||
|
+):
|
|||
|
+ """
|
|||
|
+ Calculate a single corpus-level BLEU score (aka. system-level BLEU) for all
|
|||
|
+ the hypotheses and their respective references.
|
|||
|
+
|
|||
|
+ Instead of averaging the sentence level BLEU scores (i.e. macro-average
|
|||
|
+ precision), the original BLEU metric (Papineni et al. 2002) accounts for
|
|||
|
+ the micro-average precision (i.e. summing the numerators and denominators
|
|||
|
+ for each hypothesis-reference(s) pairs before the division).
|
|||
|
+
|
|||
|
+ >>> hyp1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
|
|||
|
+ ... 'ensures', 'that', 'the', 'military', 'always',
|
|||
|
+ ... 'obeys', 'the', 'commands', 'of', 'the', 'party']
|
|||
|
+ >>> ref1a = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
|
|||
|
+ ... 'ensures', 'that', 'the', 'military', 'will', 'forever',
|
|||
|
+ ... 'heed', 'Party', 'commands']
|
|||
|
+ >>> ref1b = ['It', 'is', 'the', 'guiding', 'principle', 'which',
|
|||
|
+ ... 'guarantees', 'the', 'military', 'forces', 'always',
|
|||
|
+ ... 'being', 'under', 'the', 'command', 'of', 'the', 'Party']
|
|||
|
+ >>> ref1c = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
|
|||
|
+ ... 'army', 'always', 'to', 'heed', 'the', 'directions',
|
|||
|
+ ... 'of', 'the', 'party']
|
|||
|
+
|
|||
|
+ >>> hyp2 = ['he', 'read', 'the', 'book', 'because', 'he', 'was',
|
|||
|
+ ... 'interested', 'in', 'world', 'history']
|
|||
|
+ >>> ref2a = ['he', 'was', 'interested', 'in', 'world', 'history',
|
|||
|
+ ... 'because', 'he', 'read', 'the', 'book']
|
|||
|
+
|
|||
|
+ >>> list_of_references = [[ref1a, ref1b, ref1c], [ref2a]]
|
|||
|
+ >>> hypotheses = [hyp1, hyp2]
|
|||
|
+ >>> corpus_bleu(list_of_references, hypotheses) # doctest: +ELLIPSIS
|
|||
|
+ 0.5920...
|
|||
|
+
|
|||
|
+ The example below show that corpus_bleu() is different from averaging
|
|||
|
+ sentence_bleu() for hypotheses
|
|||
|
+
|
|||
|
+ >>> score1 = sentence_bleu([ref1a, ref1b, ref1c], hyp1)
|
|||
|
+ >>> score2 = sentence_bleu([ref2a], hyp2)
|
|||
|
+ >>> (score1 + score2) / 2 # doctest: +ELLIPSIS
|
|||
|
+ 0.6223...
|
|||
|
+
|
|||
|
+ Custom weights may be supplied to fine-tune the BLEU score further.
|
|||
|
+ A tuple of float weights for unigrams, bigrams, trigrams and so on can be given.
|
|||
|
+ >>> weights = (0.1, 0.3, 0.5, 0.1)
|
|||
|
+ >>> corpus_bleu(list_of_references, hypotheses, weights=weights) # doctest: +ELLIPSIS
|
|||
|
+ 0.5818...
|
|||
|
+
|
|||
|
+ This particular weight gave extra value to trigrams.
|
|||
|
+ Furthermore, multiple weights can be given, resulting in multiple BLEU scores.
|
|||
|
+ >>> weights = [
|
|||
|
+ ... (0.5, 0.5),
|
|||
|
+ ... (0.333, 0.333, 0.334),
|
|||
|
+ ... (0.25, 0.25, 0.25, 0.25),
|
|||
|
+ ... (0.2, 0.2, 0.2, 0.2, 0.2)
|
|||
|
+ ... ]
|
|||
|
+ >>> corpus_bleu(list_of_references, hypotheses, weights=weights) # doctest: +ELLIPSIS
|
|||
|
+ [0.8242..., 0.7067..., 0.5920..., 0.4719...]
|
|||
|
+
|
|||
|
+ :param list_of_references: a corpus of lists of reference sentences, w.r.t. hypotheses
|
|||
|
+ :type list_of_references: list(list(list(str)))
|
|||
|
+ :param hypotheses: a list of hypothesis sentences
|
|||
|
+ :type hypotheses: list(list(str))
|
|||
|
+ :param weights: weights for unigrams, bigrams, trigrams and so on (one or a list of weights)
|
|||
|
+ :type weights: tuple(float) / list(tuple(float))
|
|||
|
+ :param smoothing_function:
|
|||
|
+ :type smoothing_function: SmoothingFunction
|
|||
|
+ :param auto_reweigh: Option to re-normalize the weights uniformly.
|
|||
|
+ :type auto_reweigh: bool
|
|||
|
+ :return: The corpus-level BLEU score.
|
|||
|
+ :rtype: float
|
|||
|
+ """
|
|||
|
+ # Before proceeding to compute BLEU, perform sanity checks.
|
|||
|
+
|
|||
|
+ p_numerators = Counter() # Key = ngram order, and value = no. of ngram matches.
|
|||
|
+ p_denominators = Counter() # Key = ngram order, and value = no. of ngram in ref.
|
|||
|
+ hyp_lengths, ref_lengths = 0, 0
|
|||
|
+
|
|||
|
+ assert len(list_of_references) == len(hypotheses), (
|
|||
|
+ "The number of hypotheses and their reference(s) should be the " "same "
|
|||
|
+ )
|
|||
|
+
|
|||
|
+ try:
|
|||
|
+ weights[0][0]
|
|||
|
+ except TypeError:
|
|||
|
+ weights = [weights]
|
|||
|
+ max_weight_length = max(len(weight) for weight in weights)
|
|||
|
+
|
|||
|
+ # Iterate through each hypothesis and their corresponding references.
|
|||
|
+ for references, hypothesis in zip(list_of_references, hypotheses):
|
|||
|
+ # For each order of ngram, calculate the numerator and
|
|||
|
+ # denominator for the corpus-level modified precision.
|
|||
|
+ for i in range(1, max_weight_length + 1):
|
|||
|
+ p_i = modified_precision(references, hypothesis, i)
|
|||
|
+ p_numerators[i] += p_i.numerator
|
|||
|
+ p_denominators[i] += p_i.denominator
|
|||
|
+
|
|||
|
+ # Calculate the hypothesis length and the closest reference length.
|
|||
|
+ # Adds them to the corpus-level hypothesis and reference counts.
|
|||
|
+ hyp_len = len(hypothesis)
|
|||
|
+ hyp_lengths += hyp_len
|
|||
|
+ ref_lengths += closest_ref_length(references, hyp_len)
|
|||
|
+
|
|||
|
+ # Calculate corpus-level brevity penalty.
|
|||
|
+ bp = brevity_penalty(ref_lengths, hyp_lengths)
|
|||
|
+
|
|||
|
+ # Collects the various precision values for the different ngram orders.
|
|||
|
+ p_n = [
|
|||
|
+ Fraction(p_numerators[i], p_denominators[i], _normalize=False)
|
|||
|
+ for i in range(1, max_weight_length + 1)
|
|||
|
+ ]
|
|||
|
+
|
|||
|
+ # Returns 0 if there's no matching n-grams
|
|||
|
+ # We only need to check for p_numerators[1] == 0, since if there's
|
|||
|
+ # no unigrams, there won't be any higher order ngrams.
|
|||
|
+ if p_numerators[1] == 0:
|
|||
|
+ return 0 if len(weights) == 1 else [0] * len(weights)
|
|||
|
+
|
|||
|
+ # If there's no smoothing, set use method0 from SmoothinFunction class.
|
|||
|
+ if not smoothing_function:
|
|||
|
+ smoothing_function = SmoothingFunction().method0
|
|||
|
+ # Smoothen the modified precision.
|
|||
|
+ # Note: smoothing_function() may convert values into floats;
|
|||
|
+ # it tries to retain the Fraction object as much as the
|
|||
|
+ # smoothing method allows.
|
|||
|
+ p_n = smoothing_function(
|
|||
|
+ p_n, references=references, hypothesis=hypothesis, hyp_len=hyp_lengths
|
|||
|
+ )
|
|||
|
+
|
|||
|
+ bleu_scores = []
|
|||
|
+ for weight in weights:
|
|||
|
+ # Uniformly re-weighting based on maximum hypothesis lengths if largest
|
|||
|
+ # order of n-grams < 4 and weights is set at default.
|
|||
|
+ if auto_reweigh:
|
|||
|
+ if hyp_lengths < 4 and weight == (0.25, 0.25, 0.25, 0.25):
|
|||
|
+ weight = (1 / hyp_lengths,) * hyp_lengths
|
|||
|
+
|
|||
|
+ s = (w_i * math.log(p_i) for w_i, p_i in zip(weight, p_n) if p_i > 0)
|
|||
|
+ s = bp * math.exp(math.fsum(s))
|
|||
|
+ bleu_scores.append(s)
|
|||
|
+ return bleu_scores[0] if len(weights) == 1 else bleu_scores
|
|||
|
+
|
|||
|
+
|
|||
|
+def modified_precision(references, hypothesis, n):
|
|||
|
+ """
|
|||
|
+ Calculate modified ngram precision.
|
|||
|
+
|
|||
|
+ The normal precision method may lead to some wrong translations with
|
|||
|
+ high-precision, e.g., the translation, in which a word of reference
|
|||
|
+ repeats several times, has very high precision.
|
|||
|
+
|
|||
|
+ This function only returns the Fraction object that contains the numerator
|
|||
|
+ and denominator necessary to calculate the corpus-level precision.
|
|||
|
+ To calculate the modified precision for a single pair of hypothesis and
|
|||
|
+ references, cast the Fraction object into a float.
|
|||
|
+
|
|||
|
+ The famous "the the the ... " example shows that you can get BLEU precision
|
|||
|
+ by duplicating high frequency words.
|
|||
|
+
|
|||
|
+ >>> reference1 = 'the cat is on the mat'.split()
|
|||
|
+ >>> reference2 = 'there is a cat on the mat'.split()
|
|||
|
+ >>> hypothesis1 = 'the the the the the the the'.split()
|
|||
|
+ >>> references = [reference1, reference2]
|
|||
|
+ >>> float(modified_precision(references, hypothesis1, n=1)) # doctest: +ELLIPSIS
|
|||
|
+ 0.2857...
|
|||
|
+
|
|||
|
+ In the modified n-gram precision, a reference word will be considered
|
|||
|
+ exhausted after a matching hypothesis word is identified, e.g.
|
|||
|
+
|
|||
|
+ >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
|
|||
|
+ ... 'ensures', 'that', 'the', 'military', 'will',
|
|||
|
+ ... 'forever', 'heed', 'Party', 'commands']
|
|||
|
+ >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which',
|
|||
|
+ ... 'guarantees', 'the', 'military', 'forces', 'always',
|
|||
|
+ ... 'being', 'under', 'the', 'command', 'of', 'the',
|
|||
|
+ ... 'Party']
|
|||
|
+ >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
|
|||
|
+ ... 'army', 'always', 'to', 'heed', 'the', 'directions',
|
|||
|
+ ... 'of', 'the', 'party']
|
|||
|
+ >>> hypothesis = 'of the'.split()
|
|||
|
+ >>> references = [reference1, reference2, reference3]
|
|||
|
+ >>> float(modified_precision(references, hypothesis, n=1))
|
|||
|
+ 1.0
|
|||
|
+ >>> float(modified_precision(references, hypothesis, n=2))
|
|||
|
+ 1.0
|
|||
|
+
|
|||
|
+ An example of a normal machine translation hypothesis:
|
|||
|
+
|
|||
|
+ >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
|
|||
|
+ ... 'ensures', 'that', 'the', 'military', 'always',
|
|||
|
+ ... 'obeys', 'the', 'commands', 'of', 'the', 'party']
|
|||
|
+
|
|||
|
+ >>> hypothesis2 = ['It', 'is', 'to', 'insure', 'the', 'troops',
|
|||
|
+ ... 'forever', 'hearing', 'the', 'activity', 'guidebook',
|
|||
|
+ ... 'that', 'party', 'direct']
|
|||
|
+
|
|||
|
+ >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
|
|||
|
+ ... 'ensures', 'that', 'the', 'military', 'will',
|
|||
|
+ ... 'forever', 'heed', 'Party', 'commands']
|
|||
|
+
|
|||
|
+ >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which',
|
|||
|
+ ... 'guarantees', 'the', 'military', 'forces', 'always',
|
|||
|
+ ... 'being', 'under', 'the', 'command', 'of', 'the',
|
|||
|
+ ... 'Party']
|
|||
|
+
|
|||
|
+ >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
|
|||
|
+ ... 'army', 'always', 'to', 'heed', 'the', 'directions',
|
|||
|
+ ... 'of', 'the', 'party']
|
|||
|
+ >>> references = [reference1, reference2, reference3]
|
|||
|
+ >>> float(modified_precision(references, hypothesis1, n=1)) # doctest: +ELLIPSIS
|
|||
|
+ 0.9444...
|
|||
|
+ >>> float(modified_precision(references, hypothesis2, n=1)) # doctest: +ELLIPSIS
|
|||
|
+ 0.5714...
|
|||
|
+ >>> float(modified_precision(references, hypothesis1, n=2)) # doctest: +ELLIPSIS
|
|||
|
+ 0.5882352941176471
|
|||
|
+ >>> float(modified_precision(references, hypothesis2, n=2)) # doctest: +ELLIPSIS
|
|||
|
+ 0.07692...
|
|||
|
+
|
|||
|
+
|
|||
|
+ :param references: A list of reference translations.
|
|||
|
+ :type references: list(list(str))
|
|||
|
+ :param hypothesis: A hypothesis translation.
|
|||
|
+ :type hypothesis: list(str)
|
|||
|
+ :param n: The ngram order.
|
|||
|
+ :type n: int
|
|||
|
+ :return: BLEU's modified precision for the nth order ngram.
|
|||
|
+ :rtype: Fraction
|
|||
|
+ """
|
|||
|
+ # Extracts all ngrams in hypothesis
|
|||
|
+ # Set an empty Counter if hypothesis is empty.
|
|||
|
+ counts = Counter(ngrams(hypothesis, n)) if len(hypothesis) >= n else Counter()
|
|||
|
+ # Extract a union of references' counts.
|
|||
|
+ # max_counts = reduce(or_, [Counter(ngrams(ref, n)) for ref in references])
|
|||
|
+ max_counts = {}
|
|||
|
+ for reference in references:
|
|||
|
+ reference_counts = (
|
|||
|
+ Counter(ngrams(reference, n)) if len(reference) >= n else Counter()
|
|||
|
+ )
|
|||
|
+ for ngram in counts:
|
|||
|
+ max_counts[ngram] = max(max_counts.get(ngram, 0), reference_counts[ngram])
|
|||
|
+
|
|||
|
+ # Assigns the intersection between hypothesis and references' counts.
|
|||
|
+ clipped_counts = {
|
|||
|
+ ngram: min(count, max_counts[ngram]) for ngram, count in counts.items()
|
|||
|
+ }
|
|||
|
+
|
|||
|
+ numerator = sum(clipped_counts.values())
|
|||
|
+ # Ensures that denominator is minimum 1 to avoid ZeroDivisionError.
|
|||
|
+ # Usually this happens when the ngram order is > len(reference).
|
|||
|
+ denominator = max(1, sum(counts.values()))
|
|||
|
+
|
|||
|
+ return Fraction(numerator, denominator, _normalize=False)
|
|||
|
+
|
|||
|
+
|
|||
|
+def closest_ref_length(references, hyp_len):
|
|||
|
+ """
|
|||
|
+ This function finds the reference that is the closest length to the
|
|||
|
+ hypothesis. The closest reference length is referred to as *r* variable
|
|||
|
+ from the brevity penalty formula in Papineni et. al. (2002)
|
|||
|
+
|
|||
|
+ :param references: A list of reference translations.
|
|||
|
+ :type references: list(list(str))
|
|||
|
+ :param hyp_len: The length of the hypothesis.
|
|||
|
+ :type hyp_len: int
|
|||
|
+ :return: The length of the reference that's closest to the hypothesis.
|
|||
|
+ :rtype: int
|
|||
|
+ """
|
|||
|
+ ref_lens = (len(reference) for reference in references)
|
|||
|
+ closest_ref_len = min(
|
|||
|
+ ref_lens, key=lambda ref_len: (abs(ref_len - hyp_len), ref_len)
|
|||
|
+ )
|
|||
|
+ return closest_ref_len
|
|||
|
+
|
|||
|
+
|
|||
|
+def brevity_penalty(closest_ref_len, hyp_len):
|
|||
|
+ """
|
|||
|
+ Calculate brevity penalty.
|
|||
|
+
|
|||
|
+ As the modified n-gram precision still has the problem from the short
|
|||
|
+ length sentence, brevity penalty is used to modify the overall BLEU
|
|||
|
+ score according to length.
|
|||
|
+
|
|||
|
+ An example from the paper. There are three references with length 12, 15
|
|||
|
+ and 17. And a concise hypothesis of the length 12. The brevity penalty is 1.
|
|||
|
+
|
|||
|
+ >>> reference1 = list('aaaaaaaaaaaa') # i.e. ['a'] * 12
|
|||
|
+ >>> reference2 = list('aaaaaaaaaaaaaaa') # i.e. ['a'] * 15
|
|||
|
+ >>> reference3 = list('aaaaaaaaaaaaaaaaa') # i.e. ['a'] * 17
|
|||
|
+ >>> hypothesis = list('aaaaaaaaaaaa') # i.e. ['a'] * 12
|
|||
|
+ >>> references = [reference1, reference2, reference3]
|
|||
|
+ >>> hyp_len = len(hypothesis)
|
|||
|
+ >>> closest_ref_len = closest_ref_length(references, hyp_len)
|
|||
|
+ >>> brevity_penalty(closest_ref_len, hyp_len)
|
|||
|
+ 1.0
|
|||
|
+
|
|||
|
+ In case a hypothesis translation is shorter than the references, penalty is
|
|||
|
+ applied.
|
|||
|
+
|
|||
|
+ >>> references = [['a'] * 28, ['a'] * 28]
|
|||
|
+ >>> hypothesis = ['a'] * 12
|
|||
|
+ >>> hyp_len = len(hypothesis)
|
|||
|
+ >>> closest_ref_len = closest_ref_length(references, hyp_len)
|
|||
|
+ >>> brevity_penalty(closest_ref_len, hyp_len)
|
|||
|
+ 0.2635971381157267
|
|||
|
+
|
|||
|
+ The length of the closest reference is used to compute the penalty. If the
|
|||
|
+ length of a hypothesis is 12, and the reference lengths are 13 and 2, the
|
|||
|
+ penalty is applied because the hypothesis length (12) is less then the
|
|||
|
+ closest reference length (13).
|
|||
|
+
|
|||
|
+ >>> references = [['a'] * 13, ['a'] * 2]
|
|||
|
+ >>> hypothesis = ['a'] * 12
|
|||
|
+ >>> hyp_len = len(hypothesis)
|
|||
|
+ >>> closest_ref_len = closest_ref_length(references, hyp_len)
|
|||
|
+ >>> brevity_penalty(closest_ref_len, hyp_len) # doctest: +ELLIPSIS
|
|||
|
+ 0.9200...
|
|||
|
+
|
|||
|
+ The brevity penalty doesn't depend on reference order. More importantly,
|
|||
|
+ when two reference sentences are at the same distance, the shortest
|
|||
|
+ reference sentence length is used.
|
|||
|
+
|
|||
|
+ >>> references = [['a'] * 13, ['a'] * 11]
|
|||
|
+ >>> hypothesis = ['a'] * 12
|
|||
|
+ >>> hyp_len = len(hypothesis)
|
|||
|
+ >>> closest_ref_len = closest_ref_length(references, hyp_len)
|
|||
|
+ >>> bp1 = brevity_penalty(closest_ref_len, hyp_len)
|
|||
|
+ >>> hyp_len = len(hypothesis)
|
|||
|
+ >>> closest_ref_len = closest_ref_length(reversed(references), hyp_len)
|
|||
|
+ >>> bp2 = brevity_penalty(closest_ref_len, hyp_len)
|
|||
|
+ >>> bp1 == bp2 == 1
|
|||
|
+ True
|
|||
|
+
|
|||
|
+ A test example from mteval-v13a.pl (starting from the line 705):
|
|||
|
+
|
|||
|
+ >>> references = [['a'] * 11, ['a'] * 8]
|
|||
|
+ >>> hypothesis = ['a'] * 7
|
|||
|
+ >>> hyp_len = len(hypothesis)
|
|||
|
+ >>> closest_ref_len = closest_ref_length(references, hyp_len)
|
|||
|
+ >>> brevity_penalty(closest_ref_len, hyp_len) # doctest: +ELLIPSIS
|
|||
|
+ 0.8668...
|
|||
|
+
|
|||
|
+ >>> references = [['a'] * 11, ['a'] * 8, ['a'] * 6, ['a'] * 7]
|
|||
|
+ >>> hypothesis = ['a'] * 7
|
|||
|
+ >>> hyp_len = len(hypothesis)
|
|||
|
+ >>> closest_ref_len = closest_ref_length(references, hyp_len)
|
|||
|
+ >>> brevity_penalty(closest_ref_len, hyp_len)
|
|||
|
+ 1.0
|
|||
|
+
|
|||
|
+ :param hyp_len: The length of the hypothesis for a single sentence OR the
|
|||
|
+ sum of all the hypotheses' lengths for a corpus
|
|||
|
+ :type hyp_len: int
|
|||
|
+ :param closest_ref_len: The length of the closest reference for a single
|
|||
|
+ hypothesis OR the sum of all the closest references for every hypotheses.
|
|||
|
+ :type closest_ref_len: int
|
|||
|
+ :return: BLEU's brevity penalty.
|
|||
|
+ :rtype: float
|
|||
|
+ """
|
|||
|
+ if hyp_len > closest_ref_len:
|
|||
|
+ return 1
|
|||
|
+ # If hypothesis is empty, brevity penalty = 0 should result in BLEU = 0.0
|
|||
|
+ elif hyp_len == 0:
|
|||
|
+ return 0
|
|||
|
+ else:
|
|||
|
+ return math.exp(1 - closest_ref_len / hyp_len)
|
|||
|
+
|
|||
|
+
|
|||
|
+class SmoothingFunction:
|
|||
|
+ """
|
|||
|
+ This is an implementation of the smoothing techniques
|
|||
|
+ for segment-level BLEU scores that was presented in
|
|||
|
+ Boxing Chen and Collin Cherry (2014) A Systematic Comparison of
|
|||
|
+ Smoothing Techniques for Sentence-Level BLEU. In WMT14.
|
|||
|
+ http://acl2014.org/acl2014/W14-33/pdf/W14-3346.pdf
|
|||
|
+ """
|
|||
|
+
|
|||
|
+ def __init__(self, epsilon=0.1, alpha=5, k=5):
|
|||
|
+ """
|
|||
|
+ This will initialize the parameters required for the various smoothing
|
|||
|
+ techniques, the default values are set to the numbers used in the
|
|||
|
+ experiments from Chen and Cherry (2014).
|
|||
|
+
|
|||
|
+ >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which', 'ensures',
|
|||
|
+ ... 'that', 'the', 'military', 'always', 'obeys', 'the',
|
|||
|
+ ... 'commands', 'of', 'the', 'party']
|
|||
|
+ >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that', 'ensures',
|
|||
|
+ ... 'that', 'the', 'military', 'will', 'forever', 'heed',
|
|||
|
+ ... 'Party', 'commands']
|
|||
|
+
|
|||
|
+ >>> chencherry = SmoothingFunction()
|
|||
|
+ >>> print(sentence_bleu([reference1], hypothesis1)) # doctest: +ELLIPSIS
|
|||
|
+ 0.4118...
|
|||
|
+ >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method0)) # doctest: +ELLIPSIS
|
|||
|
+ 0.4118...
|
|||
|
+ >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method1)) # doctest: +ELLIPSIS
|
|||
|
+ 0.4118...
|
|||
|
+ >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method2)) # doctest: +ELLIPSIS
|
|||
|
+ 0.4452...
|
|||
|
+ >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method3)) # doctest: +ELLIPSIS
|
|||
|
+ 0.4118...
|
|||
|
+ >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method4)) # doctest: +ELLIPSIS
|
|||
|
+ 0.4118...
|
|||
|
+ >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method5)) # doctest: +ELLIPSIS
|
|||
|
+ 0.4905...
|
|||
|
+ >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method6)) # doctest: +ELLIPSIS
|
|||
|
+ 0.4135...
|
|||
|
+ >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method7)) # doctest: +ELLIPSIS
|
|||
|
+ 0.4905...
|
|||
|
+
|
|||
|
+ :param epsilon: the epsilon value use in method 1
|
|||
|
+ :type epsilon: float
|
|||
|
+ :param alpha: the alpha value use in method 6
|
|||
|
+ :type alpha: int
|
|||
|
+ :param k: the k value use in method 4
|
|||
|
+ :type k: int
|
|||
|
+ """
|
|||
|
+ self.epsilon = epsilon
|
|||
|
+ self.alpha = alpha
|
|||
|
+ self.k = k
|
|||
|
+
|
|||
|
+ def method0(self, p_n, *args, **kwargs):
|
|||
|
+ """
|
|||
|
+ No smoothing.
|
|||
|
+ """
|
|||
|
+ p_n_new = []
|
|||
|
+ for i, p_i in enumerate(p_n):
|
|||
|
+ if p_i.numerator != 0:
|
|||
|
+ p_n_new.append(p_i)
|
|||
|
+ else:
|
|||
|
+ _msg = str(
|
|||
|
+ "\nThe hypothesis contains 0 counts of {}-gram overlaps.\n"
|
|||
|
+ "Therefore the BLEU score evaluates to 0, independently of\n"
|
|||
|
+ "how many N-gram overlaps of lower order it contains.\n"
|
|||
|
+ "Consider using lower n-gram order or use "
|
|||
|
+ "SmoothingFunction()"
|
|||
|
+ ).format(i + 1)
|
|||
|
+ warnings.warn(_msg)
|
|||
|
+ # When numerator==0 where denonminator==0 or !=0, the result
|
|||
|
+ # for the precision score should be equal to 0 or undefined.
|
|||
|
+ # Due to BLEU geometric mean computation in logarithm space,
|
|||
|
+ # we we need to take the return sys.float_info.min such that
|
|||
|
+ # math.log(sys.float_info.min) returns a 0 precision score.
|
|||
|
+ p_n_new.append(sys.float_info.min)
|
|||
|
+ return p_n_new
|
|||
|
+
|
|||
|
+ def method1(self, p_n, *args, **kwargs):
|
|||
|
+ """
|
|||
|
+ Smoothing method 1: Add *epsilon* counts to precision with 0 counts.
|
|||
|
+ """
|
|||
|
+ return [
|
|||
|
+ (p_i.numerator + self.epsilon) / p_i.denominator
|
|||
|
+ if p_i.numerator == 0
|
|||
|
+ else p_i
|
|||
|
+ for p_i in p_n
|
|||
|
+ ]
|
|||
|
+
|
|||
|
+ def method2(self, p_n, *args, **kwargs):
|
|||
|
+ """
|
|||
|
+ Smoothing method 2: Add 1 to both numerator and denominator from
|
|||
|
+ Chin-Yew Lin and Franz Josef Och (2004) ORANGE: a Method for
|
|||
|
+ Evaluating Automatic Evaluation Metrics for Machine Translation.
|
|||
|
+ In COLING 2004.
|
|||
|
+ """
|
|||
|
+ return [
|
|||
|
+ Fraction(p_n[i].numerator + 1, p_n[i].denominator + 1, _normalize=False)
|
|||
|
+ if i != 0
|
|||
|
+ else p_n[0]
|
|||
|
+ for i in range(len(p_n))
|
|||
|
+ ]
|
|||
|
+
|
|||
|
+ def method3(self, p_n, *args, **kwargs):
|
|||
|
+ """
|
|||
|
+ Smoothing method 3: NIST geometric sequence smoothing
|
|||
|
+ The smoothing is computed by taking 1 / ( 2^k ), instead of 0, for each
|
|||
|
+ precision score whose matching n-gram count is null.
|
|||
|
+ k is 1 for the first 'n' value for which the n-gram match count is null/
|
|||
|
+
|
|||
|
+ For example, if the text contains:
|
|||
|
+
|
|||
|
+ - one 2-gram match
|
|||
|
+ - and (consequently) two 1-gram matches
|
|||
|
+
|
|||
|
+ the n-gram count for each individual precision score would be:
|
|||
|
+
|
|||
|
+ - n=1 => prec_count = 2 (two unigrams)
|
|||
|
+ - n=2 => prec_count = 1 (one bigram)
|
|||
|
+ - n=3 => prec_count = 1/2 (no trigram, taking 'smoothed' value of 1 / ( 2^k ), with k=1)
|
|||
|
+ - n=4 => prec_count = 1/4 (no fourgram, taking 'smoothed' value of 1 / ( 2^k ), with k=2)
|
|||
|
+ """
|
|||
|
+ incvnt = 1 # From the mteval-v13a.pl, it's referred to as k.
|
|||
|
+ for i, p_i in enumerate(p_n):
|
|||
|
+ if p_i.numerator == 0:
|
|||
|
+ p_n[i] = 1 / (2**incvnt * p_i.denominator)
|
|||
|
+ incvnt += 1
|
|||
|
+ return p_n
|
|||
|
+
|
|||
|
+ def method4(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
|
|||
|
+ """
|
|||
|
+ Smoothing method 4:
|
|||
|
+ Shorter translations may have inflated precision values due to having
|
|||
|
+ smaller denominators; therefore, we give them proportionally
|
|||
|
+ smaller smoothed counts. Instead of scaling to 1/(2^k), Chen and Cherry
|
|||
|
+ suggests dividing by 1/ln(len(T)), where T is the length of the translation.
|
|||
|
+ """
|
|||
|
+ incvnt = 1
|
|||
|
+ hyp_len = hyp_len if hyp_len else len(hypothesis)
|
|||
|
+ for i, p_i in enumerate(p_n):
|
|||
|
+ if p_i.numerator == 0 and hyp_len > 1:
|
|||
|
+ # incvnt = i + 1 * self.k / math.log(
|
|||
|
+ # hyp_len
|
|||
|
+ # ) # Note that this K is different from the K from NIST.
|
|||
|
+ # p_n[i] = incvnt / p_i.denominator\
|
|||
|
+ numerator = 1 / (2**incvnt * self.k / math.log(hyp_len))
|
|||
|
+ p_n[i] = numerator / p_i.denominator
|
|||
|
+ incvnt += 1
|
|||
|
+ return p_n
|
|||
|
+
|
|||
|
+ def method5(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
|
|||
|
+ """
|
|||
|
+ Smoothing method 5:
|
|||
|
+ The matched counts for similar values of n should be similar. To a
|
|||
|
+ calculate the n-gram matched count, it averages the n−1, n and n+1 gram
|
|||
|
+ matched counts.
|
|||
|
+ """
|
|||
|
+ hyp_len = hyp_len if hyp_len else len(hypothesis)
|
|||
|
+ m = {}
|
|||
|
+ # Requires an precision value for an addition ngram order.
|
|||
|
+ p_n_plus1 = p_n + [modified_precision(references, hypothesis, 5)]
|
|||
|
+ m[-1] = p_n[0] + 1
|
|||
|
+ for i, p_i in enumerate(p_n):
|
|||
|
+ p_n[i] = (m[i - 1] + p_i + p_n_plus1[i + 1]) / 3
|
|||
|
+ m[i] = p_n[i]
|
|||
|
+ return p_n
|
|||
|
+
|
|||
|
+ def method6(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
|
|||
|
+ """
|
|||
|
+ Smoothing method 6:
|
|||
|
+ Interpolates the maximum likelihood estimate of the precision *p_n* with
|
|||
|
+ a prior estimate *pi0*. The prior is estimated by assuming that the ratio
|
|||
|
+ between pn and pn−1 will be the same as that between pn−1 and pn−2; from
|
|||
|
+ Gao and He (2013) Training MRF-Based Phrase Translation Models using
|
|||
|
+ Gradient Ascent. In NAACL.
|
|||
|
+ """
|
|||
|
+ hyp_len = hyp_len if hyp_len else len(hypothesis)
|
|||
|
+ # This smoothing only works when p_1 and p_2 is non-zero.
|
|||
|
+ # Raise an error with an appropriate message when the input is too short
|
|||
|
+ # to use this smoothing technique.
|
|||
|
+ assert p_n[2], "This smoothing method requires non-zero precision for bigrams."
|
|||
|
+ for i, p_i in enumerate(p_n):
|
|||
|
+ if i in [0, 1]: # Skips the first 2 orders of ngrams.
|
|||
|
+ continue
|
|||
|
+ else:
|
|||
|
+ pi0 = 0 if p_n[i - 2] == 0 else p_n[i - 1] ** 2 / p_n[i - 2]
|
|||
|
+ # No. of ngrams in translation that matches the reference.
|
|||
|
+ m = p_i.numerator
|
|||
|
+ # No. of ngrams in translation.
|
|||
|
+ l = sum(1 for _ in ngrams(hypothesis, i + 1))
|
|||
|
+ # Calculates the interpolated precision.
|
|||
|
+ p_n[i] = (m + self.alpha * pi0) / (l + self.alpha)
|
|||
|
+ return p_n
|
|||
|
+
|
|||
|
+ def method7(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
|
|||
|
+ """
|
|||
|
+ Smoothing method 7:
|
|||
|
+ Interpolates methods 4 and 5.
|
|||
|
+ """
|
|||
|
+ hyp_len = hyp_len if hyp_len else len(hypothesis)
|
|||
|
+ p_n = self.method4(p_n, references, hypothesis, hyp_len)
|
|||
|
+ p_n = self.method5(p_n, references, hypothesis, hyp_len)
|
|||
|
+ return p_n
|
|||
|
Index: nltk-3.8.1/README.md
|
|||
|
===================================================================
|
|||
|
--- nltk-3.8.1.orig/README.md
|
|||
|
+++ nltk-3.8.1/README.md
|
|||
|
@@ -1,50 +1,50 @@
|
|||
|
-# Natural Language Toolkit (NLTK)
|
|||
|
-[![PyPI](https://img.shields.io/pypi/v/nltk.svg)](https://pypi.python.org/pypi/nltk)
|
|||
|
-![CI](https://github.com/nltk/nltk/actions/workflows/ci.yaml/badge.svg?branch=develop)
|
|||
|
-
|
|||
|
-NLTK -- the Natural Language Toolkit -- is a suite of open source Python
|
|||
|
-modules, data sets, and tutorials supporting research and development in Natural
|
|||
|
-Language Processing. NLTK requires Python version 3.7, 3.8, 3.9, 3.10 or 3.11.
|
|||
|
-
|
|||
|
-For documentation, please visit [nltk.org](https://www.nltk.org/).
|
|||
|
-
|
|||
|
-
|
|||
|
-## Contributing
|
|||
|
-
|
|||
|
-Do you want to contribute to NLTK development? Great!
|
|||
|
-Please read [CONTRIBUTING.md](CONTRIBUTING.md) for more details.
|
|||
|
-
|
|||
|
-See also [how to contribute to NLTK](https://www.nltk.org/contribute.html).
|
|||
|
-
|
|||
|
-
|
|||
|
-## Donate
|
|||
|
-
|
|||
|
-Have you found the toolkit helpful? Please support NLTK development by donating
|
|||
|
-to the project via PayPal, using the link on the NLTK homepage.
|
|||
|
-
|
|||
|
-
|
|||
|
-## Citing
|
|||
|
-
|
|||
|
-If you publish work that uses NLTK, please cite the NLTK book, as follows:
|
|||
|
-
|
|||
|
- Bird, Steven, Edward Loper and Ewan Klein (2009).
|
|||
|
- Natural Language Processing with Python. O'Reilly Media Inc.
|
|||
|
-
|
|||
|
-
|
|||
|
-## Copyright
|
|||
|
-
|
|||
|
-Copyright (C) 2001-2023 NLTK Project
|
|||
|
-
|
|||
|
-For license information, see [LICENSE.txt](LICENSE.txt).
|
|||
|
-
|
|||
|
-[AUTHORS.md](AUTHORS.md) contains a list of everyone who has contributed to NLTK.
|
|||
|
-
|
|||
|
-
|
|||
|
-### Redistributing
|
|||
|
-
|
|||
|
-- NLTK source code is distributed under the Apache 2.0 License.
|
|||
|
-- NLTK documentation is distributed under the Creative Commons
|
|||
|
- Attribution-Noncommercial-No Derivative Works 3.0 United States license.
|
|||
|
-- NLTK corpora are provided under the terms given in the README file for each
|
|||
|
- corpus; all are redistributable and available for non-commercial use.
|
|||
|
-- NLTK may be freely redistributed, subject to the provisions of these licenses.
|
|||
|
+# Natural Language Toolkit (NLTK)
|
|||
|
+[![PyPI](https://img.shields.io/pypi/v/nltk.svg)](https://pypi.python.org/pypi/nltk)
|
|||
|
+![CI](https://github.com/nltk/nltk/actions/workflows/ci.yaml/badge.svg?branch=develop)
|
|||
|
+
|
|||
|
+NLTK -- the Natural Language Toolkit -- is a suite of open source Python
|
|||
|
+modules, data sets, and tutorials supporting research and development in Natural
|
|||
|
+Language Processing. NLTK requires Python version 3.7, 3.8, 3.9, 3.10, 3.11 or 3.12.
|
|||
|
+
|
|||
|
+For documentation, please visit [nltk.org](https://www.nltk.org/).
|
|||
|
+
|
|||
|
+
|
|||
|
+## Contributing
|
|||
|
+
|
|||
|
+Do you want to contribute to NLTK development? Great!
|
|||
|
+Please read [CONTRIBUTING.md](CONTRIBUTING.md) for more details.
|
|||
|
+
|
|||
|
+See also [how to contribute to NLTK](https://www.nltk.org/contribute.html).
|
|||
|
+
|
|||
|
+
|
|||
|
+## Donate
|
|||
|
+
|
|||
|
+Have you found the toolkit helpful? Please support NLTK development by donating
|
|||
|
+to the project via PayPal, using the link on the NLTK homepage.
|
|||
|
+
|
|||
|
+
|
|||
|
+## Citing
|
|||
|
+
|
|||
|
+If you publish work that uses NLTK, please cite the NLTK book, as follows:
|
|||
|
+
|
|||
|
+ Bird, Steven, Edward Loper and Ewan Klein (2009).
|
|||
|
+ Natural Language Processing with Python. O'Reilly Media Inc.
|
|||
|
+
|
|||
|
+
|
|||
|
+## Copyright
|
|||
|
+
|
|||
|
+Copyright (C) 2001-2023 NLTK Project
|
|||
|
+
|
|||
|
+For license information, see [LICENSE.txt](LICENSE.txt).
|
|||
|
+
|
|||
|
+[AUTHORS.md](AUTHORS.md) contains a list of everyone who has contributed to NLTK.
|
|||
|
+
|
|||
|
+
|
|||
|
+### Redistributing
|
|||
|
+
|
|||
|
+- NLTK source code is distributed under the Apache 2.0 License.
|
|||
|
+- NLTK documentation is distributed under the Creative Commons
|
|||
|
+ Attribution-Noncommercial-No Derivative Works 3.0 United States license.
|
|||
|
+- NLTK corpora are provided under the terms given in the README file for each
|
|||
|
+ corpus; all are redistributable and available for non-commercial use.
|
|||
|
+- NLTK may be freely redistributed, subject to the provisions of these licenses.
|
|||
|
Index: nltk-3.8.1/setup.py
|
|||
|
===================================================================
|
|||
|
--- nltk-3.8.1.orig/setup.py
|
|||
|
+++ nltk-3.8.1/setup.py
|
|||
|
@@ -1,125 +1,126 @@
|
|||
|
-#!/usr/bin/env python
|
|||
|
-#
|
|||
|
-# Setup script for the Natural Language Toolkit
|
|||
|
-#
|
|||
|
-# Copyright (C) 2001-2023 NLTK Project
|
|||
|
-# Author: NLTK Team <nltk.team@gmail.com>
|
|||
|
-# URL: <https://www.nltk.org/>
|
|||
|
-# For license information, see LICENSE.TXT
|
|||
|
-
|
|||
|
-# Work around mbcs bug in distutils.
|
|||
|
-# https://bugs.python.org/issue10945
|
|||
|
-import codecs
|
|||
|
-
|
|||
|
-try:
|
|||
|
- codecs.lookup("mbcs")
|
|||
|
-except LookupError:
|
|||
|
- ascii = codecs.lookup("ascii")
|
|||
|
- func = lambda name, enc=ascii: {True: enc}.get(name == "mbcs")
|
|||
|
- codecs.register(func)
|
|||
|
-
|
|||
|
-import os
|
|||
|
-
|
|||
|
-# Use the VERSION file to get NLTK version
|
|||
|
-version_file = os.path.join(os.path.dirname(__file__), "nltk", "VERSION")
|
|||
|
-with open(version_file) as fh:
|
|||
|
- nltk_version = fh.read().strip()
|
|||
|
-
|
|||
|
-# setuptools
|
|||
|
-from setuptools import find_packages, setup
|
|||
|
-
|
|||
|
-# Specify groups of optional dependencies
|
|||
|
-extras_require = {
|
|||
|
- "machine_learning": [
|
|||
|
- "numpy",
|
|||
|
- "python-crfsuite",
|
|||
|
- "scikit-learn",
|
|||
|
- "scipy",
|
|||
|
- ],
|
|||
|
- "plot": ["matplotlib"],
|
|||
|
- "tgrep": ["pyparsing"],
|
|||
|
- "twitter": ["twython"],
|
|||
|
- "corenlp": ["requests"],
|
|||
|
-}
|
|||
|
-
|
|||
|
-# Add a group made up of all optional dependencies
|
|||
|
-extras_require["all"] = {
|
|||
|
- package for group in extras_require.values() for package in group
|
|||
|
-}
|
|||
|
-
|
|||
|
-# Adds CLI commands
|
|||
|
-console_scripts = """
|
|||
|
-[console_scripts]
|
|||
|
-nltk=nltk.cli:cli
|
|||
|
-"""
|
|||
|
-
|
|||
|
-_project_homepage = "https://www.nltk.org/"
|
|||
|
-
|
|||
|
-setup(
|
|||
|
- name="nltk",
|
|||
|
- description="Natural Language Toolkit",
|
|||
|
- version=nltk_version,
|
|||
|
- url=_project_homepage,
|
|||
|
- project_urls={
|
|||
|
- "Documentation": _project_homepage,
|
|||
|
- "Source Code": "https://github.com/nltk/nltk",
|
|||
|
- "Issue Tracker": "https://github.com/nltk/nltk/issues",
|
|||
|
- },
|
|||
|
- long_description="""\
|
|||
|
-The Natural Language Toolkit (NLTK) is a Python package for
|
|||
|
-natural language processing. NLTK requires Python 3.7, 3.8, 3.9, 3.10 or 3.11.""",
|
|||
|
- license="Apache License, Version 2.0",
|
|||
|
- keywords=[
|
|||
|
- "NLP",
|
|||
|
- "CL",
|
|||
|
- "natural language processing",
|
|||
|
- "computational linguistics",
|
|||
|
- "parsing",
|
|||
|
- "tagging",
|
|||
|
- "tokenizing",
|
|||
|
- "syntax",
|
|||
|
- "linguistics",
|
|||
|
- "language",
|
|||
|
- "natural language",
|
|||
|
- "text analytics",
|
|||
|
- ],
|
|||
|
- maintainer="NLTK Team",
|
|||
|
- maintainer_email="nltk.team@gmail.com",
|
|||
|
- author="NLTK Team",
|
|||
|
- author_email="nltk.team@gmail.com",
|
|||
|
- classifiers=[
|
|||
|
- "Development Status :: 5 - Production/Stable",
|
|||
|
- "Intended Audience :: Developers",
|
|||
|
- "Intended Audience :: Education",
|
|||
|
- "Intended Audience :: Information Technology",
|
|||
|
- "Intended Audience :: Science/Research",
|
|||
|
- "License :: OSI Approved :: Apache Software License",
|
|||
|
- "Operating System :: OS Independent",
|
|||
|
- "Programming Language :: Python :: 3.7",
|
|||
|
- "Programming Language :: Python :: 3.8",
|
|||
|
- "Programming Language :: Python :: 3.9",
|
|||
|
- "Programming Language :: Python :: 3.10",
|
|||
|
- "Programming Language :: Python :: 3.11",
|
|||
|
- "Topic :: Scientific/Engineering",
|
|||
|
- "Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|||
|
- "Topic :: Scientific/Engineering :: Human Machine Interfaces",
|
|||
|
- "Topic :: Scientific/Engineering :: Information Analysis",
|
|||
|
- "Topic :: Text Processing",
|
|||
|
- "Topic :: Text Processing :: Filters",
|
|||
|
- "Topic :: Text Processing :: General",
|
|||
|
- "Topic :: Text Processing :: Indexing",
|
|||
|
- "Topic :: Text Processing :: Linguistic",
|
|||
|
- ],
|
|||
|
- package_data={"nltk": ["test/*.doctest", "VERSION"]},
|
|||
|
- python_requires=">=3.7",
|
|||
|
- install_requires=[
|
|||
|
- "click",
|
|||
|
- "joblib",
|
|||
|
- "regex>=2021.8.3",
|
|||
|
- "tqdm",
|
|||
|
- ],
|
|||
|
- extras_require=extras_require,
|
|||
|
- packages=find_packages(),
|
|||
|
- zip_safe=False, # since normal files will be present too?
|
|||
|
- entry_points=console_scripts,
|
|||
|
-)
|
|||
|
+#!/usr/bin/env python
|
|||
|
+#
|
|||
|
+# Setup script for the Natural Language Toolkit
|
|||
|
+#
|
|||
|
+# Copyright (C) 2001-2023 NLTK Project
|
|||
|
+# Author: NLTK Team <nltk.team@gmail.com>
|
|||
|
+# URL: <https://www.nltk.org/>
|
|||
|
+# For license information, see LICENSE.TXT
|
|||
|
+
|
|||
|
+# Work around mbcs bug in distutils.
|
|||
|
+# https://bugs.python.org/issue10945
|
|||
|
+import codecs
|
|||
|
+
|
|||
|
+try:
|
|||
|
+ codecs.lookup("mbcs")
|
|||
|
+except LookupError:
|
|||
|
+ ascii = codecs.lookup("ascii")
|
|||
|
+ func = lambda name, enc=ascii: {True: enc}.get(name == "mbcs")
|
|||
|
+ codecs.register(func)
|
|||
|
+
|
|||
|
+import os
|
|||
|
+
|
|||
|
+# Use the VERSION file to get NLTK version
|
|||
|
+version_file = os.path.join(os.path.dirname(__file__), "nltk", "VERSION")
|
|||
|
+with open(version_file) as fh:
|
|||
|
+ nltk_version = fh.read().strip()
|
|||
|
+
|
|||
|
+# setuptools
|
|||
|
+from setuptools import find_packages, setup
|
|||
|
+
|
|||
|
+# Specify groups of optional dependencies
|
|||
|
+extras_require = {
|
|||
|
+ "machine_learning": [
|
|||
|
+ "numpy",
|
|||
|
+ "python-crfsuite",
|
|||
|
+ "scikit-learn",
|
|||
|
+ "scipy",
|
|||
|
+ ],
|
|||
|
+ "plot": ["matplotlib"],
|
|||
|
+ "tgrep": ["pyparsing"],
|
|||
|
+ "twitter": ["twython"],
|
|||
|
+ "corenlp": ["requests"],
|
|||
|
+}
|
|||
|
+
|
|||
|
+# Add a group made up of all optional dependencies
|
|||
|
+extras_require["all"] = {
|
|||
|
+ package for group in extras_require.values() for package in group
|
|||
|
+}
|
|||
|
+
|
|||
|
+# Adds CLI commands
|
|||
|
+console_scripts = """
|
|||
|
+[console_scripts]
|
|||
|
+nltk=nltk.cli:cli
|
|||
|
+"""
|
|||
|
+
|
|||
|
+_project_homepage = "https://www.nltk.org/"
|
|||
|
+
|
|||
|
+setup(
|
|||
|
+ name="nltk",
|
|||
|
+ description="Natural Language Toolkit",
|
|||
|
+ version=nltk_version,
|
|||
|
+ url=_project_homepage,
|
|||
|
+ project_urls={
|
|||
|
+ "Documentation": _project_homepage,
|
|||
|
+ "Source Code": "https://github.com/nltk/nltk",
|
|||
|
+ "Issue Tracker": "https://github.com/nltk/nltk/issues",
|
|||
|
+ },
|
|||
|
+ long_description="""\
|
|||
|
+The Natural Language Toolkit (NLTK) is a Python package for
|
|||
|
+natural language processing. NLTK requires Python 3.7, 3.8, 3.9, 3.10, 3.11 or 3.12.""",
|
|||
|
+ license="Apache License, Version 2.0",
|
|||
|
+ keywords=[
|
|||
|
+ "NLP",
|
|||
|
+ "CL",
|
|||
|
+ "natural language processing",
|
|||
|
+ "computational linguistics",
|
|||
|
+ "parsing",
|
|||
|
+ "tagging",
|
|||
|
+ "tokenizing",
|
|||
|
+ "syntax",
|
|||
|
+ "linguistics",
|
|||
|
+ "language",
|
|||
|
+ "natural language",
|
|||
|
+ "text analytics",
|
|||
|
+ ],
|
|||
|
+ maintainer="NLTK Team",
|
|||
|
+ maintainer_email="nltk.team@gmail.com",
|
|||
|
+ author="NLTK Team",
|
|||
|
+ author_email="nltk.team@gmail.com",
|
|||
|
+ classifiers=[
|
|||
|
+ "Development Status :: 5 - Production/Stable",
|
|||
|
+ "Intended Audience :: Developers",
|
|||
|
+ "Intended Audience :: Education",
|
|||
|
+ "Intended Audience :: Information Technology",
|
|||
|
+ "Intended Audience :: Science/Research",
|
|||
|
+ "License :: OSI Approved :: Apache Software License",
|
|||
|
+ "Operating System :: OS Independent",
|
|||
|
+ "Programming Language :: Python :: 3.7",
|
|||
|
+ "Programming Language :: Python :: 3.8",
|
|||
|
+ "Programming Language :: Python :: 3.9",
|
|||
|
+ "Programming Language :: Python :: 3.10",
|
|||
|
+ "Programming Language :: Python :: 3.11",
|
|||
|
+ "Programming Language :: Python :: 3.12",
|
|||
|
+ "Topic :: Scientific/Engineering",
|
|||
|
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|||
|
+ "Topic :: Scientific/Engineering :: Human Machine Interfaces",
|
|||
|
+ "Topic :: Scientific/Engineering :: Information Analysis",
|
|||
|
+ "Topic :: Text Processing",
|
|||
|
+ "Topic :: Text Processing :: Filters",
|
|||
|
+ "Topic :: Text Processing :: General",
|
|||
|
+ "Topic :: Text Processing :: Indexing",
|
|||
|
+ "Topic :: Text Processing :: Linguistic",
|
|||
|
+ ],
|
|||
|
+ package_data={"nltk": ["test/*.doctest", "VERSION"]},
|
|||
|
+ python_requires=">=3.7",
|
|||
|
+ install_requires=[
|
|||
|
+ "click",
|
|||
|
+ "joblib",
|
|||
|
+ "regex>=2021.8.3",
|
|||
|
+ "tqdm",
|
|||
|
+ ],
|
|||
|
+ extras_require=extras_require,
|
|||
|
+ packages=find_packages(),
|
|||
|
+ zip_safe=False, # since normal files will be present too?
|
|||
|
+ entry_points=console_scripts,
|
|||
|
+)
|