forked from pool/python-LTTL
- Switch to pyproject and autosetup macros - Remove skips, unneeded - Stop using greedy globs in %files OBS-URL: https://build.opensuse.org/package/show/devel:languages:python/python-LTTL?expand=0&rev=11
513 lines
21 KiB
Diff
513 lines
21 KiB
Diff
Index: LTTL-2.0.12/LTTL/Segmentation.py
|
|
===================================================================
|
|
--- LTTL-2.0.12.orig/LTTL/Segmentation.py
|
|
+++ LTTL-2.0.12/LTTL/Segmentation.py
|
|
@@ -22,8 +22,6 @@ from __future__ import unicode_literals
|
|
|
|
import numpy as np
|
|
|
|
-from future.utils import iteritems
|
|
-
|
|
import re
|
|
import os
|
|
|
|
@@ -131,7 +129,7 @@ def cleanup_segmentation(segmentation):
|
|
def clone_chunks(source, dst):
|
|
"""Copy all chunks related to a given "source" segmentation
|
|
and associate them with a given "dst" segmentation."""
|
|
- for k, v in iteritems(segments_cache):
|
|
+ for k, v in segments_cache.items():
|
|
if k[0] == source:
|
|
s = get_chunk(k[0], k[1])
|
|
add_chunk(dst, k[1], s.copy())
|
|
@@ -339,7 +337,7 @@ class Segmentation(object):
|
|
|
|
def get_annotation_tab(self, segment):
|
|
annotations_id = list()
|
|
- for key, value in iteritems(segment.annotations):
|
|
+ for key, value in segment.annotations.items():
|
|
self.add_annotation_tuple((key, value))
|
|
annotations_id.append(
|
|
[
|
|
Index: LTTL-2.0.12/LTTL/Table.py
|
|
===================================================================
|
|
--- LTTL-2.0.12.orig/LTTL/Table.py
|
|
+++ LTTL-2.0.12/LTTL/Table.py
|
|
@@ -38,10 +38,6 @@ import os
|
|
import math
|
|
import sys
|
|
|
|
-from builtins import str as text
|
|
-from future.utils import iteritems
|
|
-from past.builtins import xrange
|
|
-
|
|
__version__ = "1.0.5"
|
|
|
|
|
|
@@ -140,7 +136,7 @@ class Table(object):
|
|
output_string = self.header_col_id + col_delimiter
|
|
|
|
# Convert col headers to unicode strings and output...
|
|
- output_string += col_delimiter.join(text(i) for i in self.col_ids)
|
|
+ output_string += col_delimiter.join(str(i) for i in self.col_ids)
|
|
|
|
# Add Orange 2 table headers if needed...
|
|
if output_orange_headers:
|
|
@@ -162,7 +158,7 @@ class Table(object):
|
|
if self.missing is None:
|
|
missing = ''
|
|
else:
|
|
- missing = text(self.missing)
|
|
+ missing = str(self.missing)
|
|
|
|
# Format row strings...
|
|
row_strings = (
|
|
@@ -172,7 +168,7 @@ class Table(object):
|
|
col_delimiter,
|
|
col_delimiter.join(
|
|
[
|
|
- text(self.values.get((row_id, col_id), missing))
|
|
+ str(self.values.get((row_id, col_id), missing))
|
|
for col_id in self.col_ids
|
|
]
|
|
)
|
|
@@ -225,7 +221,7 @@ class Table(object):
|
|
# For each col header...
|
|
for col_id in ordered_cols:
|
|
# Convert it to string
|
|
- str_col_id = text(col_id)
|
|
+ str_col_id = str(col_id)
|
|
col_type = col_type_for_id(col_id)
|
|
if col_type == 'string':
|
|
var = Orange.data.StringVariable(str_col_id)
|
|
@@ -247,13 +243,13 @@ class Table(object):
|
|
values = list()
|
|
if col_id == self.header_col_id:
|
|
for row_id in self.row_ids:
|
|
- value = text(row_id)
|
|
+ value = str(row_id)
|
|
if value not in values:
|
|
values.append(value)
|
|
else:
|
|
for row_id in self.row_ids:
|
|
if (row_id, col_id) in self.values:
|
|
- value = text(self.values[(row_id, col_id)])
|
|
+ value = str(self.values[(row_id, col_id)])
|
|
if value not in values:
|
|
values.append(value)
|
|
var = Orange.data.DiscreteVariable(
|
|
@@ -267,7 +263,7 @@ class Table(object):
|
|
# Create Orange 3 domain and table
|
|
domain = Orange.data.Domain(attr_vars, class_vars, meta_vars)
|
|
if self.missing is not None:
|
|
- missing = text(self.missing)
|
|
+ missing = str(self.missing)
|
|
else:
|
|
missing = None
|
|
rows = []
|
|
@@ -285,7 +281,7 @@ class Table(object):
|
|
if isinstance(value, int):
|
|
# Assume all column values are of the same type
|
|
col_var.number_of_decimals = 0
|
|
- value = text(value)
|
|
+ value = str(value)
|
|
else:
|
|
value = missing
|
|
row_data.append(value)
|
|
@@ -326,7 +322,7 @@ class Table(object):
|
|
for col_id in ordered_cols:
|
|
|
|
# Convert it to string and encode as specified...
|
|
- str_col_id = text(col_id)
|
|
+ str_col_id = str(col_id)
|
|
encoded_col_id = str_col_id.encode(
|
|
encoding,
|
|
errors='xmlcharrefreplace',
|
|
@@ -354,7 +350,7 @@ class Table(object):
|
|
else:
|
|
for row_id in self.row_ids:
|
|
if (row_id, col_id) in self.values:
|
|
- value = text(self.values[(row_id, col_id)])
|
|
+ value = str(self.values[(row_id, col_id)])
|
|
value = value.encode(
|
|
encoding,
|
|
errors='xmlcharrefreplace',
|
|
@@ -375,7 +371,7 @@ class Table(object):
|
|
if self.missing is None:
|
|
missing = '?'
|
|
if self.missing is not None:
|
|
- missing = text(self.missing)
|
|
+ missing = str(self.missing)
|
|
|
|
# Store values in each row...
|
|
for row_id in self.row_ids:
|
|
@@ -386,7 +382,7 @@ class Table(object):
|
|
else:
|
|
value = self.values.get((row_id, col_id), missing)
|
|
if value:
|
|
- value = text(value).encode(
|
|
+ value = str(value).encode(
|
|
encoding,
|
|
errors='xmlcharrefreplace',
|
|
)
|
|
@@ -579,7 +575,7 @@ class PivotCrosstab(Crosstab):
|
|
new_col_ids,
|
|
dict(
|
|
(tuple(reversed(key)), count)
|
|
- for key, count in iteritems(self.values)
|
|
+ for key, count in self.values.items()
|
|
),
|
|
self.header_col_id,
|
|
self.header_col_type,
|
|
@@ -665,8 +661,8 @@ class PivotCrosstab(Crosstab):
|
|
np_table.fill(self.missing or 0)
|
|
|
|
# Fill and return numpy table...
|
|
- for row_idx in xrange(len(self.row_ids)):
|
|
- for col_idx in xrange(len(self.col_ids)):
|
|
+ for row_idx in range(len(self.row_ids)):
|
|
+ for col_idx in range(len(self.col_ids)):
|
|
try:
|
|
np_table[row_idx][col_idx] = self.values[
|
|
(self.row_ids[row_idx], self.col_ids[col_idx])
|
|
@@ -896,7 +892,7 @@ class IntPivotCrosstab(PivotCrosstab):
|
|
for col_id in col_ids
|
|
]
|
|
row_total = sum(row_values)
|
|
- for col_idx in xrange(len(col_ids)):
|
|
+ for col_idx in range(len(col_ids)):
|
|
freq_under_indep = row_total * col_total[col_idx]
|
|
if freq_under_indep > 0:
|
|
new_values[(row_id, col_ids[col_idx])] = (
|
|
@@ -998,7 +994,7 @@ class IntPivotCrosstab(PivotCrosstab):
|
|
output_matrix = np.dot(pi_inv, np.dot(exchange, pi_inv))
|
|
col_ids = self.col_ids
|
|
values = dict()
|
|
- for col_id_idx1 in xrange(len(col_ids)):
|
|
+ for col_id_idx1 in range(len(col_ids)):
|
|
col_id1 = col_ids[col_id_idx1]
|
|
values.update(
|
|
dict(
|
|
@@ -1006,7 +1002,7 @@ class IntPivotCrosstab(PivotCrosstab):
|
|
(col_id1, col_ids[i]),
|
|
output_matrix[col_id_idx1, i]
|
|
)
|
|
- for i in xrange(len(col_ids))
|
|
+ for i in range(len(col_ids))
|
|
)
|
|
)
|
|
if progress_callback:
|
|
@@ -1050,8 +1046,8 @@ class IntPivotCrosstab(PivotCrosstab):
|
|
for row_id in self.row_ids:
|
|
for col_id in self.col_ids:
|
|
count = get_count((row_id, col_id), 0)
|
|
- for i in xrange(count):
|
|
- new_row_id = text(row_counter)
|
|
+ for i in range(count):
|
|
+ new_row_id = str(row_counter)
|
|
new_row_ids.append(new_row_id)
|
|
new_values[(new_row_id, first_col_id)] = col_id
|
|
if num_row_ids > 1:
|
|
@@ -1194,7 +1190,7 @@ class FlatCrosstab(Crosstab):
|
|
known_pair_row_id = row_id_for_pair[pair]
|
|
new_values[(known_pair_row_id, '__weight__')] += 1
|
|
else:
|
|
- new_row_id = text(row_counter)
|
|
+ new_row_id = str(row_counter)
|
|
new_row_ids.append(new_row_id)
|
|
row_id_for_pair[pair] = new_row_id
|
|
new_values[(new_row_id, first_col_id)] = first_col_value
|
|
@@ -1212,7 +1208,7 @@ class FlatCrosstab(Crosstab):
|
|
known_value_row_id = row_id_for_value[col_value]
|
|
new_values[(known_value_row_id, '__weight__')] += 1
|
|
else:
|
|
- new_row_id = text(row_counter)
|
|
+ new_row_id = str(row_counter)
|
|
new_row_ids.append(new_row_id)
|
|
row_id_for_value[col_value] = new_row_id
|
|
new_values[(new_row_id, col_id)] = col_value
|
|
@@ -1344,8 +1340,8 @@ class IntWeightedFlatCrosstab(WeightedFl
|
|
count = self.values[(row_id, '__weight__')]
|
|
first_col_value = self.values[row_id, first_col_id]
|
|
second_col_value = self.values[row_id, second_col_id]
|
|
- for i in xrange(count):
|
|
- new_row_id = text(row_counter)
|
|
+ for i in range(count):
|
|
+ new_row_id = str(row_counter)
|
|
new_row_ids.append(new_row_id)
|
|
new_values[(new_row_id, first_col_id)] = first_col_value
|
|
new_values[(new_row_id, second_col_id)] = second_col_value
|
|
@@ -1357,8 +1353,8 @@ class IntWeightedFlatCrosstab(WeightedFl
|
|
for row_id in self.row_ids:
|
|
count = self.values[(row_id, '__weight__')]
|
|
col_value = self.values[row_id, col_id]
|
|
- for i in xrange(count):
|
|
- new_row_id = text(row_counter)
|
|
+ for i in range(count):
|
|
+ new_row_id = str(row_counter)
|
|
new_row_ids.append(new_row_id)
|
|
new_values[(new_row_id, col_id)] = col_value
|
|
row_counter += 1
|
|
Index: LTTL-2.0.12/LTTL/Utils.py
|
|
===================================================================
|
|
--- LTTL-2.0.12.orig/LTTL/Utils.py
|
|
+++ LTTL-2.0.12/LTTL/Utils.py
|
|
@@ -34,15 +34,9 @@ from __future__ import division
|
|
from __future__ import absolute_import
|
|
from __future__ import unicode_literals
|
|
|
|
-from future.utils import iteritems, itervalues
|
|
-from builtins import range, chr
|
|
-
|
|
import random, math
|
|
|
|
-try:
|
|
- from functools import lru_cache
|
|
-except ImportError:
|
|
- from backports.functools_lru_cache import lru_cache
|
|
+from functools import lru_cache
|
|
|
|
try:
|
|
from scipy.special import comb as binom
|
|
@@ -69,8 +63,8 @@ def sample_dict(dictionary, sample_size)
|
|
"""Return a randomly sampled frequency dict"""
|
|
new_dict = dict()
|
|
num_to_sample = sample_size
|
|
- num_to_process = sum(itervalues(dictionary))
|
|
- for (k, v) in iteritems(dictionary):
|
|
+ num_to_process = sum(dictionary.values())
|
|
+ for (k, v) in dictionary.items():
|
|
for i in range(v):
|
|
if random.random() < num_to_sample / num_to_process:
|
|
new_dict[k] = new_dict.get(k, 0) + 1
|
|
@@ -101,14 +95,14 @@ def get_variety(
|
|
else:
|
|
category_dict = dict()
|
|
if not unit_weighting and not category_weighting:
|
|
- for (k, v) in iteritems(dictionary):
|
|
+ for (k, v) in dictionary.items():
|
|
(category, unit) = k.split(category_delimiter, 1)
|
|
category_dict[category] = category_dict.get(category, 0) + v
|
|
return (len(dictionary) / len(category_dict))
|
|
else:
|
|
units_in_category_dict = dict()
|
|
unit_dict = dict()
|
|
- for (k, v) in iteritems(dictionary):
|
|
+ for (k, v) in dictionary.items():
|
|
(category, unit) = k.split(category_delimiter, 1)
|
|
category_dict[category] = category_dict.get(category, 0) + v
|
|
units_in_category_dict[(category, unit,)] = (
|
|
@@ -179,7 +173,7 @@ def tuple_to_simple_dict(dictionary, key
|
|
"""
|
|
return dict(
|
|
(k[1], v)
|
|
- for (k, v) in iteritems(dictionary)
|
|
+ for (k, v) in dictionary.items()
|
|
if k[0] == key and v > 0
|
|
)
|
|
|
|
@@ -192,7 +186,7 @@ def tuple_to_simple_dict_transpose(dicti
|
|
"""
|
|
return dict(
|
|
(k[0], v)
|
|
- for (k, v) in iteritems(dictionary)
|
|
+ for (k, v) in dictionary.items()
|
|
if k[1] == key and v > 0
|
|
)
|
|
|
|
@@ -226,7 +220,7 @@ def get_perplexity(dictionary):
|
|
"""Compute the perplexity (=exp entropy) of a dictionary"""
|
|
my_sum = 0
|
|
weighted_sum_of_logs = 0
|
|
- for freq in itervalues(dictionary):
|
|
+ for freq in dictionary.values():
|
|
if freq:
|
|
my_sum += freq
|
|
weighted_sum_of_logs += freq * math.log(freq)
|
|
Index: LTTL-2.0.12/LTTL/Processor.py
|
|
===================================================================
|
|
--- LTTL-2.0.12.orig/LTTL/Processor.py
|
|
+++ LTTL-2.0.12/LTTL/Processor.py
|
|
@@ -24,8 +24,6 @@ from __future__ import absolute_import
|
|
from __future__ import unicode_literals
|
|
|
|
from math import sqrt
|
|
-from builtins import range
|
|
-from builtins import str as text
|
|
|
|
import numpy as np
|
|
|
|
@@ -385,7 +383,7 @@ def count_in_window(
|
|
|
|
# Update main counts...
|
|
freq = dict(
|
|
- [(('1', k), v) for (k, v) in iteritems(window_freq)]
|
|
+ [(('1', k), v) for (k, v) in window_freq.items()]
|
|
)
|
|
|
|
if progress_callback:
|
|
@@ -418,14 +416,14 @@ def count_in_window(
|
|
|
|
# Get window type...
|
|
window_type = window_index + 1
|
|
- window_str = text(window_type)
|
|
+ window_str = str(window_type)
|
|
|
|
# Update main counts...
|
|
freq.update(
|
|
dict(
|
|
[
|
|
((window_str, k), v)
|
|
- for (k, v) in iteritems(window_freq)
|
|
+ for (k, v) in window_freq.items()
|
|
]
|
|
)
|
|
)
|
|
@@ -447,7 +445,7 @@ def count_in_window(
|
|
|
|
# Update main counts...
|
|
freq = dict(
|
|
- [(('1', k), v) for (k, v) in iteritems(window_freq)]
|
|
+ [(('1', k), v) for (k, v) in window_freq.items()]
|
|
)
|
|
|
|
if progress_callback:
|
|
@@ -468,13 +466,13 @@ def count_in_window(
|
|
|
|
# Get window type...
|
|
window_type = window_index + 1
|
|
- window_str = text(window_type)
|
|
+ window_str = str(window_type)
|
|
# Update main counts...
|
|
freq.update(
|
|
dict(
|
|
[
|
|
((window_str, k), v)
|
|
- for (k, v) in iteritems(window_freq)
|
|
+ for (k, v) in window_freq.items()
|
|
]
|
|
)
|
|
)
|
|
@@ -485,7 +483,7 @@ def count_in_window(
|
|
# Create pivot crosstab...
|
|
return (
|
|
IntPivotCrosstab(
|
|
- [text(i) for i in range(1, window_type + 1)],
|
|
+ [str(i) for i in range(1, window_type + 1)],
|
|
unit_types,
|
|
freq,
|
|
'__unit__',
|
|
@@ -566,7 +564,7 @@ def count_in_chain(
|
|
# Get the list of units in final format (content or annotation)...
|
|
if unit_annotation_key is not None:
|
|
unit_list = [
|
|
- text(
|
|
+ str(
|
|
unit_token.annotations.get(
|
|
unit_annotation_key,
|
|
'__none__', # Default annotation
|
|
@@ -943,7 +941,7 @@ def length_in_context(
|
|
]
|
|
values = dict(
|
|
(key, value)
|
|
- for key, value in iteritems(values)
|
|
+ for key, value in values.items()
|
|
if key[0] in context_types
|
|
)
|
|
|
|
@@ -1063,7 +1061,7 @@ def length_in_window(
|
|
sum_squares / window_size -
|
|
average * average
|
|
)
|
|
- window_str = text(window_type)
|
|
+ window_str = str(window_type)
|
|
values[(window_str, '__length_average__')] = average
|
|
values[(window_str, '__length_std_deviation__')] = stdev
|
|
values[(window_str, '__length_count__')] = window_size
|
|
@@ -1115,7 +1113,7 @@ def length_in_window(
|
|
|
|
# Compute and store average and standard deviation...
|
|
average = sum_values / window_size
|
|
- window_str = text(window_type)
|
|
+ window_str = str(window_type)
|
|
values[(window_str, '__length_average__')] = average
|
|
values[(window_str, '__length_count__')] = window_size
|
|
|
|
@@ -1132,7 +1130,7 @@ def length_in_window(
|
|
# Create Table...
|
|
return (
|
|
Table(
|
|
- [text(i) for i in range(1, window_type + 1)],
|
|
+ [str(i) for i in range(1, window_type + 1)],
|
|
col_ids,
|
|
values,
|
|
'__col__',
|
|
@@ -1779,7 +1777,7 @@ def annotate_contexts(
|
|
else:
|
|
new_values[(row_id, '__annotation__')] = (
|
|
multiple_values['value_delimiter'].join(
|
|
- text(a) for a in annotations
|
|
+ str(a) for a in annotations
|
|
)
|
|
)
|
|
|
|
@@ -2073,7 +2071,7 @@ def neighbors(
|
|
)
|
|
else:
|
|
string_value = left_token.get_content()
|
|
- new_values[(row_id, text(pos) + 'L')] = \
|
|
+ new_values[(row_id, str(pos) + 'L')] = \
|
|
string_value
|
|
right_index = context_index + pos
|
|
if right_index < len(context_segmentation):
|
|
@@ -2089,7 +2087,7 @@ def neighbors(
|
|
)
|
|
else:
|
|
string_value = right_token.get_content()
|
|
- new_values[(row_id, text(pos) + 'R')] = \
|
|
+ new_values[(row_id, str(pos) + 'R')] = \
|
|
string_value
|
|
|
|
if progress_callback:
|
|
@@ -2097,9 +2095,9 @@ def neighbors(
|
|
|
|
# Create table...
|
|
col_ids = ['__pos__']
|
|
- col_ids.extend([text(p) + 'L' for p in reversed(adjacent_positions)])
|
|
+ col_ids.extend([str(p) + 'L' for p in reversed(adjacent_positions)])
|
|
col_ids.append('__key_segment__')
|
|
- col_ids.extend([text(p) + 'R' for p in adjacent_positions])
|
|
+ col_ids.extend([str(p) + 'R' for p in adjacent_positions])
|
|
if unit_annotation_key is not None and units['separate_annotation']:
|
|
col_ids.append(unit_annotation_key)
|
|
col_types = dict([(p, 'string') for p in col_ids])
|
|
@@ -2333,10 +2331,10 @@ def cooc_in_context(
|
|
row_labels = contingency.row_ids
|
|
row_labels2 = contingency2.row_ids
|
|
keep_from_contingency = [
|
|
- i for i in xrange(len(row_labels)) if row_labels[i] in row_labels2
|
|
+ i for i in range(len(row_labels)) if row_labels[i] in row_labels2
|
|
]
|
|
keep_from_contingency2 =[
|
|
- i for i in xrange(len(row_labels2)) if row_labels2[i] in row_labels
|
|
+ i for i in range(len(row_labels2)) if row_labels2[i] in row_labels
|
|
]
|
|
try:
|
|
np_contingency = np_contingency[keep_from_contingency].astype(int)
|
|
Index: LTTL-2.0.12/setup.py
|
|
===================================================================
|
|
--- LTTL-2.0.12.orig/setup.py
|
|
+++ LTTL-2.0.12/setup.py
|
|
@@ -73,11 +73,8 @@ setup(
|
|
packages=find_packages(exclude=['docs', 'tests', 'bugs']),
|
|
|
|
install_requires=[
|
|
- 'setuptools',
|
|
'numpy',
|
|
'scipy',
|
|
- 'future',
|
|
- 'backports.functools_lru_cache',
|
|
],
|
|
|
|
test_suite='nose.collector',
|