forked from pool/python-shijian
1872 lines
62 KiB
Python
1872 lines
62 KiB
Python
|
# -*- coding: utf-8 -*-
|
||
|
|
||
|
"""
|
||
|
################################################################################
|
||
|
# #
|
||
|
# shijian #
|
||
|
# #
|
||
|
################################################################################
|
||
|
# #
|
||
|
# LICENCE INFORMATION #
|
||
|
# #
|
||
|
# This program provides change, time, file, list, statistics, language and #
|
||
|
# other utilities. #
|
||
|
# #
|
||
|
# copyright (C) 2014 William Breaden Madden #
|
||
|
# #
|
||
|
# This software is released under the terms of the GNU General Public License #
|
||
|
# version 3 (GPLv3). #
|
||
|
# #
|
||
|
# This program is free software: you can redistribute it and/or modify it #
|
||
|
# under the terms of the GNU General Public License as published by the Free #
|
||
|
# Software Foundation, either version 3 of the License, or (at your option) #
|
||
|
# any later version. #
|
||
|
# #
|
||
|
# This program is distributed in the hope that it will be useful, but WITHOUT #
|
||
|
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or #
|
||
|
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for #
|
||
|
# more details. #
|
||
|
# #
|
||
|
# For a copy of the GNU General Public License, see #
|
||
|
# <http://www.gnu.org/licenses/>. #
|
||
|
# #
|
||
|
################################################################################
|
||
|
"""
|
||
|
|
||
|
from __future__ import division
|
||
|
import calendar
|
||
|
import collections
|
||
|
import datetime
|
||
|
import functools
|
||
|
import inspect
|
||
|
import logging
|
||
|
import math
|
||
|
import os
|
||
|
import pickle
|
||
|
import random
|
||
|
import re
|
||
|
import sys
|
||
|
import tempfile
|
||
|
import time
|
||
|
import unicodedata
|
||
|
import uuid
|
||
|
import warnings
|
||
|
if sys.version_info[0] < 3:
|
||
|
import subprocess32 as subprocess
|
||
|
else:
|
||
|
import subprocess
|
||
|
|
||
|
import dateutil.relativedelta
|
||
|
import matplotlib.pyplot as plt
|
||
|
import numpy
|
||
|
import pandas as pd
|
||
|
import scipy.interpolate
|
||
|
import scipy.io.wavfile
|
||
|
from sklearn.preprocessing import MinMaxScaler
|
||
|
import seaborn as sns
|
||
|
import technicolor
|
||
|
|
||
|
name = "shijian"
|
||
|
__version__ = "2023-10-19T0215Z"
|
||
|
|
||
|
log = logging.getLogger(name)
|
||
|
log.addHandler(technicolor.ColorisingStreamHandler())
|
||
|
log.setLevel(logging.INFO)
|
||
|
|
||
|
def _main():
|
||
|
global clocks
|
||
|
clocks = Clocks()
|
||
|
|
||
|
def time_UNIX(
|
||
|
style = "UNIX time S"
|
||
|
):
|
||
|
return style_datetime_object(
|
||
|
datetime_object = datetime.datetime.utcnow(),
|
||
|
style = style
|
||
|
)
|
||
|
|
||
|
def time_UTC(
|
||
|
style = None
|
||
|
):
|
||
|
return style_datetime_object(
|
||
|
datetime_object = datetime.datetime.utcnow(),
|
||
|
style = style
|
||
|
)
|
||
|
|
||
|
def filename_time_UNIX(
|
||
|
style = "UNIX time S.SSSSSS",
|
||
|
extension = None
|
||
|
):
|
||
|
filename = str(
|
||
|
time_UNIX(
|
||
|
style = style
|
||
|
)
|
||
|
)
|
||
|
if extension:
|
||
|
filename = filename + extension
|
||
|
filename_proposed = propose_filename(
|
||
|
filename = filename
|
||
|
)
|
||
|
return filename_proposed
|
||
|
|
||
|
def filename_time_UTC(
|
||
|
style = "YYYY-MM-DDTHHMMSSZ",
|
||
|
extension = None
|
||
|
):
|
||
|
filename = style_datetime_object(
|
||
|
datetime_object = datetime.datetime.utcnow(),
|
||
|
style = style
|
||
|
)
|
||
|
if extension:
|
||
|
filename = filename + extension
|
||
|
filename_proposed = propose_filename(
|
||
|
filename = filename
|
||
|
)
|
||
|
return filename_proposed
|
||
|
|
||
|
def style_minimal_seconds(seconds):
|
||
|
time_intervals = ["days", "hours", "minutes", "seconds"]
|
||
|
dateutil_object = dateutil.relativedelta.relativedelta(seconds = seconds)
|
||
|
return " ".join("{} {}".format(
|
||
|
int(getattr(dateutil_object, interval)), interval
|
||
|
) for interval in time_intervals if getattr(dateutil_object, interval))
|
||
|
|
||
|
def style_UNIX_timestamp(
|
||
|
timestamp = None,
|
||
|
style = "YYYY-MM-DDTHHMMZ"
|
||
|
):
|
||
|
return style_datetime_object(
|
||
|
datetime_object = datetime.datetime.utcfromtimestamp(timestamp),
|
||
|
style = style
|
||
|
)
|
||
|
|
||
|
def style_datetime_object(
|
||
|
datetime_object = None,
|
||
|
style = "YYYY-MM-DDTHHMMZ"
|
||
|
):
|
||
|
if type(datetime_object) is datetime.datetime:
|
||
|
# filename safe
|
||
|
if style == "YYYY-MM-DDTHHMMZ":
|
||
|
return datetime_object.strftime("%Y-%m-%dT%H%MZ")
|
||
|
# filename safe with seconds
|
||
|
elif style == "YYYY-MM-DDTHHMMSSZ":
|
||
|
return datetime_object.strftime("%Y-%m-%dT%H%M%SZ")
|
||
|
# filename safe with seconds and microseconds
|
||
|
elif style == "YYYY-MM-DDTHHMMSSMMMMMMZ":
|
||
|
return datetime_object.strftime("%Y-%m-%dT%H%M%S%fZ")
|
||
|
# elegant
|
||
|
elif style == "YYYY-MM-DD HH:MM:SS UTC":
|
||
|
return datetime_object.strftime("%Y-%m-%d %H:%M:%S UTC")
|
||
|
# elegant
|
||
|
elif style == "YYYY-MM-DD HH:MM:SS Z":
|
||
|
return datetime_object.strftime("%Y-%m-%d %H:%M:%S Z")
|
||
|
# UNIX time in seconds with second fraction
|
||
|
elif style == "UNIX time S.SSSSSS":
|
||
|
return (datetime_object -\
|
||
|
datetime.datetime.utcfromtimestamp(0)).total_seconds()
|
||
|
# UNIX time in seconds rounded
|
||
|
elif style == "UNIX time S":
|
||
|
return int((datetime_object -\
|
||
|
datetime.datetime.utcfromtimestamp(0)).total_seconds())
|
||
|
# human-readable date
|
||
|
elif style == "day DD month YYYY":
|
||
|
return datetime_object.strftime("%A %d %B %Y")
|
||
|
# human-readable time and date
|
||
|
elif style == "HH:MM day DD month YYYY":
|
||
|
return datetime_object.strftime("%H:%M %A %d %B %Y")
|
||
|
# human-readable time with seconds and date
|
||
|
elif style == "HH:MM:SS day DD month YYYY":
|
||
|
return datetime_object.strftime("%H:%M:%S %A %d %B %Y")
|
||
|
# human-readable date with time with seconds
|
||
|
elif style == "day DD month YYYY HH:MM:SS":
|
||
|
return datetime_object.strftime("%A %d %B %Y %H:%M:%S")
|
||
|
# human-readable-audible time with seconds and date
|
||
|
elif style == "HH hours MM minutes SS sounds day DD month YYYY":
|
||
|
return datetime_object.strftime("%H hours %M minutes %S seconds %A %d %B %Y")
|
||
|
# human-readable days, hours and minutes
|
||
|
elif style == "DD:HH:MM":
|
||
|
return datetime_object.strftime("%d:%H:%M")
|
||
|
# human-readable days, hours, minutes and seconds
|
||
|
elif style == "DD:HH:MM:SS":
|
||
|
return datetime_object.strftime("%d:%H:%M:%S")
|
||
|
# human-readable time with seconds
|
||
|
elif style == "HH:MM:SS":
|
||
|
return datetime_object.strftime("%H:%M:%S")
|
||
|
# human-readable-audible time with seconds
|
||
|
elif style == "HH hours MM minutes SS seconds":
|
||
|
return datetime_object.strftime("%H hours %M minutes %S seconds")
|
||
|
# filename safe
|
||
|
else:
|
||
|
return datetime_object.strftime("%Y-%m-%dT%H%MZ")
|
||
|
if type(datetime_object) is datetime.timedelta:
|
||
|
if style == "YYYY-MM-DDTHHMMZ":
|
||
|
style = "{DD} days, {HH}:{MM}:{SS}"
|
||
|
if hasattr(datetime_object, "seconds"):
|
||
|
seconds = datetime_object.seconds + datetime_object.days * 24 * 3600
|
||
|
else:
|
||
|
seconds = int(datetime_object)
|
||
|
seconds_total = seconds
|
||
|
minutes = int(math.floor(seconds / 60))
|
||
|
minutes_total = minutes
|
||
|
seconds -= minutes * 60
|
||
|
hours = int(math.floor(minutes / 60))
|
||
|
hours_total = hours
|
||
|
minutes -= hours * 60
|
||
|
days = int(math.floor(hours / 24))
|
||
|
days_total = days
|
||
|
hours -= days * 24
|
||
|
years = int(math.floor(days / 365))
|
||
|
years_total = years
|
||
|
days -= years * 365
|
||
|
return style.format(**{
|
||
|
"Y" : years_total,
|
||
|
"D" : days_total,
|
||
|
"H" : hours_total,
|
||
|
"M" : minutes_total,
|
||
|
"S" : seconds_total,
|
||
|
"YYYY": str(years).zfill(4),
|
||
|
"DD" : str(days).zfill(2),
|
||
|
"HH" : str(hours).zfill(2),
|
||
|
"MM" : str(minutes).zfill(2),
|
||
|
"SS" : str(seconds).zfill(2)
|
||
|
})
|
||
|
|
||
|
def HHMM_to_minutes(
|
||
|
HHMM # string "HHMM"
|
||
|
):
|
||
|
hours, minutes = HHMM[:2], HHMM[2:]
|
||
|
return 60 * int(hours) + int(minutes)
|
||
|
|
||
|
def now_in_minutes():
|
||
|
now = datetime.datetime.utcnow()
|
||
|
return 60 * now.hour + now.minute
|
||
|
|
||
|
def in_daily_time_range(
|
||
|
time_range = None, # string "HHMM--HHMM" e.g. "1700--1000"
|
||
|
time_start = None, # string "HHMM" e.g. "1700"
|
||
|
time_stop = None # string "HHMM" e.g. "1000"
|
||
|
):
|
||
|
if time_range is None and time_start is None and time_stop is None:
|
||
|
return None
|
||
|
if time_range is not None:
|
||
|
time_start = time_range.split("--")[0]
|
||
|
time_stop = time_range.split("--")[1]
|
||
|
now = now_in_minutes()
|
||
|
time_start = HHMM_to_minutes(time_start)
|
||
|
time_stop = HHMM_to_minutes(time_stop)
|
||
|
minutes_per_day = 1440
|
||
|
return (now - time_start) % minutes_per_day <=\
|
||
|
(time_stop - time_start) % minutes_per_day
|
||
|
|
||
|
def timer(function):
|
||
|
@functools.wraps(function)
|
||
|
def decoration(
|
||
|
*args,
|
||
|
**kwargs
|
||
|
):
|
||
|
arguments = inspect.getcallargs(function, *args, **kwargs)
|
||
|
clock = Clock(name = function.__name__)
|
||
|
result = function(*args, **kwargs)
|
||
|
clock.stop()
|
||
|
return result
|
||
|
return decoration
|
||
|
|
||
|
class Clock(object):
|
||
|
|
||
|
def __init__(
|
||
|
self,
|
||
|
name = None,
|
||
|
start = True
|
||
|
):
|
||
|
self._name = name
|
||
|
self._start = start # Boolean start clock on instantiation
|
||
|
self._start_time = None # internal (value to return)
|
||
|
self._start_time_tmp = None # internal (value for calculations)
|
||
|
self._stop_time = None # internal (value to return)
|
||
|
self._update_time = None # internal
|
||
|
# If no name is specified, generate a unique one.
|
||
|
if self._name is None:
|
||
|
self._name = UID()
|
||
|
# If a global clock list is detected, add a clock instance to it.
|
||
|
if "clocks" in globals():
|
||
|
clocks.add(self)
|
||
|
self.reset()
|
||
|
if self._start:
|
||
|
self.start()
|
||
|
|
||
|
def start(self):
|
||
|
self._start_time_tmp = datetime.datetime.utcnow()
|
||
|
self._start_time = datetime.datetime.utcnow()
|
||
|
|
||
|
def stop(self):
|
||
|
self.update()
|
||
|
self._update_time = None
|
||
|
self._start_time_tmp = None
|
||
|
self._stop_time = datetime.datetime.utcnow()
|
||
|
|
||
|
# Update the clock accumulator.
|
||
|
def update(self):
|
||
|
if self._update_time:
|
||
|
self.accumulator += (
|
||
|
datetime.datetime.utcnow() - self._update_time
|
||
|
)
|
||
|
else:
|
||
|
self.accumulator += (
|
||
|
datetime.datetime.utcnow() - self._start_time_tmp
|
||
|
)
|
||
|
self._update_time = datetime.datetime.utcnow()
|
||
|
|
||
|
def reset(self):
|
||
|
self.accumulator = datetime.timedelta(0)
|
||
|
self._start_time_tmp = None
|
||
|
|
||
|
# If the clock has a start time, add the difference between now and the
|
||
|
# start time to the accumulator and return the accumulation. If the clock
|
||
|
# does not have a start time, return the accumulation.
|
||
|
def elapsed(self):
|
||
|
if self._start_time_tmp:
|
||
|
self.update()
|
||
|
return self.accumulator
|
||
|
|
||
|
def name(self):
|
||
|
return self._name
|
||
|
|
||
|
def time(self):
|
||
|
return self.elapsed().total_seconds()
|
||
|
|
||
|
def start_time(self):
|
||
|
if self._start_time:
|
||
|
return style_datetime_object(datetime_object = self._start_time)
|
||
|
else:
|
||
|
return "none"
|
||
|
|
||
|
def stop_time(self):
|
||
|
if self._stop_time:
|
||
|
return style_datetime_object(datetime_object = self._stop_time)
|
||
|
else:
|
||
|
return "none"
|
||
|
|
||
|
def report(self):
|
||
|
string = "clock attribute".ljust(39) + "value"
|
||
|
string += "\nname".ljust(40) + self.name()
|
||
|
string += "\ntime start (s)".ljust(40) + self.start_time()
|
||
|
string += "\ntime stop (s)".ljust(40) + self.stop_time()
|
||
|
string += "\ntime elapsed (s)".ljust(40) + str(self.time())
|
||
|
string += "\n"
|
||
|
return string
|
||
|
|
||
|
def printout(self):
|
||
|
print(self.report())
|
||
|
|
||
|
class Clocks(object):
|
||
|
|
||
|
def __init__(
|
||
|
self
|
||
|
):
|
||
|
self._list_of_clocks = []
|
||
|
self._default_report_style = "statistics"
|
||
|
|
||
|
def add(
|
||
|
self,
|
||
|
clock
|
||
|
):
|
||
|
self._list_of_clocks.append(clock)
|
||
|
|
||
|
def report(
|
||
|
self,
|
||
|
style = None
|
||
|
):
|
||
|
if style is None:
|
||
|
style = self._default_report_style
|
||
|
if self._list_of_clocks != []:
|
||
|
if style == "statistics":
|
||
|
# Create a dictionary of clock types with corresponding lists of
|
||
|
# times for all instances.
|
||
|
dictionary_of_clock_types = {}
|
||
|
# Get the names of all clocks and add them to the dictionary.
|
||
|
for clock in self._list_of_clocks:
|
||
|
dictionary_of_clock_types[clock.name()] = []
|
||
|
# Record the values of all clocks for their respective names in
|
||
|
# the dictionary.
|
||
|
for clock in self._list_of_clocks:
|
||
|
dictionary_of_clock_types[clock.name()].append(clock.time())
|
||
|
# Create a report, calculating the average value for each clock
|
||
|
# type.
|
||
|
string = "clock type".ljust(39) + "mean time (s)"
|
||
|
for name, values in list(dictionary_of_clock_types.items()):
|
||
|
string += "\n" +\
|
||
|
str(name).ljust(39) + str(sum(values)/len(values))
|
||
|
string += "\n"
|
||
|
elif style == "full":
|
||
|
# Create a report, listing the values of all clocks.
|
||
|
string = "clock".ljust(39) + "time (s)"
|
||
|
for clock in self._list_of_clocks:
|
||
|
string += "\n" +\
|
||
|
str(clock.name()).ljust(39) + str(clock.time())
|
||
|
string += "\n"
|
||
|
else:
|
||
|
string = "no clocks"
|
||
|
return string
|
||
|
|
||
|
def printout(
|
||
|
self,
|
||
|
style = None
|
||
|
):
|
||
|
if style is None:
|
||
|
style = self._default_report_style
|
||
|
print(self.report(style = style))
|
||
|
|
||
|
class Progress(object):
|
||
|
|
||
|
def __init__(
|
||
|
self
|
||
|
):
|
||
|
self.data = []
|
||
|
self.quick_calculation = False
|
||
|
self.update_rate = 1 # s
|
||
|
self.clock = Clock(name = "progress update clock")
|
||
|
|
||
|
def engage_quick_calculation_mode(
|
||
|
self
|
||
|
):
|
||
|
self.quick_calculation = True
|
||
|
|
||
|
def disengage_quick_calculation_mode(
|
||
|
self
|
||
|
):
|
||
|
self.quick_calculation = False
|
||
|
|
||
|
def add_datum(
|
||
|
self,
|
||
|
fraction = None,
|
||
|
style = None
|
||
|
):
|
||
|
if len(self.data) == 0:
|
||
|
self.data.append((fraction, time_UNIX()))
|
||
|
elif self.quick_calculation is True:
|
||
|
time_duration_since_last_update = self.clock.time()
|
||
|
if time_duration_since_last_update >= self.update_rate:
|
||
|
self.data.append((fraction, time_UNIX()))
|
||
|
self.clock.reset()
|
||
|
self.clock.start()
|
||
|
else:
|
||
|
self.data.append((fraction, time_UNIX()))
|
||
|
|
||
|
return self.status(style = style)
|
||
|
|
||
|
def estimated_time_of_completion(
|
||
|
self
|
||
|
):
|
||
|
if len(self.data) <= 1:
|
||
|
return 0
|
||
|
else:
|
||
|
try:
|
||
|
model_values = model_linear(
|
||
|
self.data,
|
||
|
quick_calculation = self.quick_calculation
|
||
|
)
|
||
|
b0 = model_values[0]
|
||
|
b1 = model_values[1]
|
||
|
x = 1
|
||
|
y = b0 + b1 * x
|
||
|
except:
|
||
|
y = 0
|
||
|
datetime_object = datetime.datetime.fromtimestamp(int(y))
|
||
|
return datetime_object
|
||
|
|
||
|
# estimated time of arrival
|
||
|
def ETA(
|
||
|
self
|
||
|
):
|
||
|
if len(self.data) <= 1:
|
||
|
return style_datetime_object(
|
||
|
datetime_object = datetime.datetime.now()
|
||
|
)
|
||
|
else:
|
||
|
return style_datetime_object(
|
||
|
datetime_object = self.estimated_time_of_completion()
|
||
|
)
|
||
|
|
||
|
# estimated time remaining
|
||
|
def ETR(
|
||
|
self
|
||
|
):
|
||
|
if len(self.data) <= 1:
|
||
|
return 0
|
||
|
else:
|
||
|
delta_time = \
|
||
|
self.estimated_time_of_completion() - datetime.datetime.now()
|
||
|
if delta_time.total_seconds() >= 0:
|
||
|
return delta_time.total_seconds()
|
||
|
else:
|
||
|
return 0
|
||
|
|
||
|
def fraction(
|
||
|
self
|
||
|
):
|
||
|
return self.data[-1][0]
|
||
|
|
||
|
def percentage(
|
||
|
self
|
||
|
):
|
||
|
return 100 * self.fraction()
|
||
|
|
||
|
def status(
|
||
|
self,
|
||
|
style = None
|
||
|
):
|
||
|
if style is None:
|
||
|
message =\
|
||
|
"{percentage:.2f}% complete; " +\
|
||
|
"estimated completion time: {ETA} ({ETR:.2f} s)\r"
|
||
|
return message.format(
|
||
|
percentage = self.percentage(),
|
||
|
ETA = self.ETA(),
|
||
|
ETR = self.ETR()
|
||
|
)
|
||
|
|
||
|
def UID():
|
||
|
return str(uuid.uuid4())
|
||
|
|
||
|
def unique_number(
|
||
|
style = None
|
||
|
):
|
||
|
# mode: integer 3 significant figures
|
||
|
if style == "integer 3 significant figures":
|
||
|
initial_number = 100
|
||
|
if "unique_numbers_3_significant_figures" not in globals():
|
||
|
global unique_numbers_3_significant_figures
|
||
|
unique_numbers_3_significant_figures = []
|
||
|
if not unique_numbers_3_significant_figures:
|
||
|
unique_numbers_3_significant_figures.append(initial_number)
|
||
|
else:
|
||
|
unique_numbers_3_significant_figures.append(
|
||
|
unique_numbers_3_significant_figures[-1] + 1
|
||
|
)
|
||
|
if\
|
||
|
style == "integer 3 significant figures" and \
|
||
|
unique_numbers_3_significant_figures[-1] > 999:
|
||
|
raise Exception
|
||
|
return unique_numbers_3_significant_figures[-1]
|
||
|
# mode: integer
|
||
|
else:
|
||
|
initial_number = 1
|
||
|
if "unique_numbers" not in globals():
|
||
|
global unique_numbers
|
||
|
unique_numbers = []
|
||
|
if not unique_numbers:
|
||
|
unique_numbers.append(initial_number)
|
||
|
else:
|
||
|
unique_numbers.append(unique_numbers[-1] + 1)
|
||
|
|
||
|
return unique_numbers[-1]
|
||
|
|
||
|
def unique_3_digit_number():
|
||
|
return unique_number(style = "integer 3 significant figures")
|
||
|
|
||
|
## @brief make text filename or URL safe
|
||
|
def slugify(
|
||
|
text = None,
|
||
|
filename = True,
|
||
|
URL = False,
|
||
|
return_str = True
|
||
|
):
|
||
|
if not sys.version_info >= (3, 0):
|
||
|
text = unicode(text, "utf-8")
|
||
|
text = unicodedata.normalize("NFKD", text)
|
||
|
text = text.encode("ascii", "ignore")
|
||
|
text = text.decode("utf-8")
|
||
|
text = re.sub("[^\w\s-]", "", text)
|
||
|
text = text.strip()
|
||
|
if filename and not URL:
|
||
|
text = re.sub("[\s]+", "_", text)
|
||
|
elif URL:
|
||
|
text = text.lower()
|
||
|
text = re.sub("[-\s]+", "-", text)
|
||
|
if return_str:
|
||
|
text = str(text)
|
||
|
return text
|
||
|
|
||
|
## @brief propose a filename
|
||
|
# @detail This function returns a filename string. If a default filename is not
|
||
|
# specified, the function generates one based on the current time. If a default
|
||
|
# filename is specified, the function uses it as the default filename. By
|
||
|
# default, the function then checks to see if using the filename would cause
|
||
|
# overwriting of an existing file. If overwriting is possible, the function
|
||
|
# appends an integer to the filename in a loop in order to generate a filename
|
||
|
# that would not cause overwriting of an existing file. The function can be set
|
||
|
# to overwrite instead of using the default overwrite protection behaviour.
|
||
|
# @return filename string
|
||
|
def propose_filename(
|
||
|
filename = None,
|
||
|
overwrite = False,
|
||
|
slugify_filename = True,
|
||
|
exclude_extension_from_slugify = True
|
||
|
):
|
||
|
# If no file name is specified, generate one.
|
||
|
if not filename:
|
||
|
filename = time_UTC()
|
||
|
filename_proposed = filename
|
||
|
if slugify_filename:
|
||
|
if exclude_extension_from_slugify:
|
||
|
filename_base = os.path.splitext(os.path.basename(filename))[0]
|
||
|
filename_extension = os.path.splitext(os.path.basename(filename))[1]
|
||
|
filename_base = slugify(text = filename_base)
|
||
|
filename_proposed = filename_base + filename_extension
|
||
|
else:
|
||
|
filename_proposed = slugify(text = filename)
|
||
|
if not overwrite:
|
||
|
count = 0
|
||
|
while os.path.exists(filename_proposed):
|
||
|
count = count + 1
|
||
|
filename_directory = os.path.dirname(filename)
|
||
|
filename_base = os.path.splitext(os.path.basename(filename))[0]
|
||
|
filename_extension = os.path.splitext(os.path.basename(filename))[1]
|
||
|
if filename_directory:
|
||
|
filename_proposed = filename_directory + \
|
||
|
"/" + \
|
||
|
filename_base + \
|
||
|
"_" + \
|
||
|
str(count) + \
|
||
|
filename_extension
|
||
|
else:
|
||
|
filename_proposed = filename_base + \
|
||
|
"_" + \
|
||
|
str(count) + \
|
||
|
filename_extension
|
||
|
return filename_proposed
|
||
|
|
||
|
def tmp_filepath():
|
||
|
"""
|
||
|
Return an extensionless filepath at the directory /tmp without creating a
|
||
|
file at the filepath.
|
||
|
"""
|
||
|
return "/tmp/" + next(tempfile._get_candidate_names())
|
||
|
|
||
|
def tail(
|
||
|
filepath = "log.txt",
|
||
|
lines = 50
|
||
|
):
|
||
|
"""
|
||
|
Return a specified number of last lines of a specified file. If there is an
|
||
|
error or the file does not exist, return False.
|
||
|
"""
|
||
|
try:
|
||
|
filepath = os.path.expanduser(os.path.expandvars(filepath))
|
||
|
if os.path.isfile(filepath):
|
||
|
text = subprocess.check_output(["tail", "-" + str(lines), filepath])
|
||
|
if text:
|
||
|
return text
|
||
|
else:
|
||
|
return False
|
||
|
else:
|
||
|
return False
|
||
|
except:
|
||
|
return False
|
||
|
|
||
|
def ensure_platform_release(
|
||
|
keyphrase = "el7",
|
||
|
require = True,
|
||
|
warn = False
|
||
|
):
|
||
|
import platform
|
||
|
release = platform.release()
|
||
|
if keyphrase not in release:
|
||
|
message =\
|
||
|
"inappropriate environment: " +\
|
||
|
"\"{keyphrase}\" required; \"{release}\" available".format(
|
||
|
keyphrase = keyphrase,
|
||
|
release = release
|
||
|
)
|
||
|
if warn is True:
|
||
|
log.warning(message)
|
||
|
if require is True:
|
||
|
log.fatal(message)
|
||
|
raise(EnvironmentError)
|
||
|
|
||
|
def ensure_program_available(
|
||
|
program
|
||
|
):
|
||
|
log.debug("ensure program {program} available".format(
|
||
|
program = program
|
||
|
))
|
||
|
if which(program) is None:
|
||
|
log.error("program {program} not available".format(
|
||
|
program = program
|
||
|
))
|
||
|
raise(EnvironmentError)
|
||
|
else:
|
||
|
log.debug("program {program} available".format(
|
||
|
program = program
|
||
|
))
|
||
|
|
||
|
def which(
|
||
|
program
|
||
|
):
|
||
|
def is_exe(fpath):
|
||
|
return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
|
||
|
fpath, fname = os.path.split(program)
|
||
|
if fpath:
|
||
|
if is_exe(program):
|
||
|
|
||
|
return(program)
|
||
|
else:
|
||
|
for path in os.environ["PATH"].split(os.pathsep):
|
||
|
path = path.strip('"')
|
||
|
exe_file = os.path.join(path, program)
|
||
|
if is_exe(exe_file):
|
||
|
return exe_file
|
||
|
return None
|
||
|
|
||
|
def running(
|
||
|
program
|
||
|
):
|
||
|
program = str.encode(program)
|
||
|
results = subprocess.Popen(
|
||
|
["ps", "-A"],
|
||
|
stdout = subprocess.PIPE
|
||
|
).communicate()[0].split(b"\n")
|
||
|
matches_current = [
|
||
|
line for line in results if program in line and b"defunct" not in line
|
||
|
]
|
||
|
if matches_current:
|
||
|
return True
|
||
|
else:
|
||
|
return False
|
||
|
|
||
|
def ensure_file_existence(
|
||
|
filename
|
||
|
):
|
||
|
log.debug("ensure existence of file {filename}".format(
|
||
|
filename = filename
|
||
|
))
|
||
|
if not os.path.isfile(os.path.expandvars(filename)):
|
||
|
log.error("file {filename} does not exist".format(
|
||
|
filename = filename
|
||
|
))
|
||
|
raise(IOError)
|
||
|
else:
|
||
|
log.debug("file {filename} found".format(
|
||
|
filename = filename
|
||
|
))
|
||
|
|
||
|
def rm_file(filename):
|
||
|
os.remove(filename)
|
||
|
|
||
|
## @brief return a naturally-sorted list of filenames that are in a sequence or
|
||
|
## a dictionary of lists of filenames that are in a sequence
|
||
|
def find_file_sequences(
|
||
|
extension = "png",
|
||
|
directory = ".",
|
||
|
return_first_sequence_only = True,
|
||
|
):
|
||
|
|
||
|
filenames_of_directory = os.listdir(directory)
|
||
|
filenames_found = [
|
||
|
filename for filename in filenames_of_directory if re.match(
|
||
|
r".*\d+.*\." + extension,
|
||
|
filename
|
||
|
)
|
||
|
]
|
||
|
filename_sequences = collections.defaultdict(list)
|
||
|
for filename in filenames_found:
|
||
|
pattern = re.sub("\d+", "XXX", filename)
|
||
|
filename_sequences[pattern].append(filename)
|
||
|
if return_first_sequence_only is True:
|
||
|
first_key_identified = next(iter(filename_sequences.keys()))
|
||
|
filename_sequence = \
|
||
|
natural_sort(filename_sequences[first_key_identified])
|
||
|
return filename_sequence
|
||
|
else:
|
||
|
return filename_sequences
|
||
|
|
||
|
## @brief return a list of files at a specified directory
|
||
|
def ls_files(
|
||
|
directory = "."
|
||
|
):
|
||
|
return([filename for filename in os.listdir(directory) if os.path.isfile(
|
||
|
os.path.join(directory, filename)
|
||
|
)])
|
||
|
|
||
|
## @brief return a list of files, directories and subdirectories at a specified
|
||
|
## directory
|
||
|
def directory_listing(
|
||
|
directory = ".",
|
||
|
):
|
||
|
files_list = []
|
||
|
for root, directories, filenames in os.walk(directory):
|
||
|
for filename in filenames:
|
||
|
files_list.append(os.path.join(root, filename))
|
||
|
return files_list
|
||
|
|
||
|
## @brief return a list of filepaths at a directory, optionally filtered to
|
||
|
## contain a specified extension
|
||
|
def filepaths_at_directory(
|
||
|
directory = None,
|
||
|
extension_required = None
|
||
|
):
|
||
|
if not os.path.isdir(directory):
|
||
|
log.error("error -- directory {directory} not found".format(directory = directory))
|
||
|
raise(IOError)
|
||
|
filepaths = [os.path.abspath(os.path.join(directory, filename)) for filename in os.listdir(directory) if os.path.isfile(os.path.join(directory, filename))]
|
||
|
if extension_required:
|
||
|
filepaths = [filepath for filepath in filepaths if extension_required in os.path.splitext(filepath)[1]]
|
||
|
return filepaths
|
||
|
|
||
|
def engage_command(
|
||
|
command = None,
|
||
|
background = True,
|
||
|
timeout = None
|
||
|
):
|
||
|
log.debug(command)
|
||
|
if background:
|
||
|
if timeout:
|
||
|
log.warning("warning -- command set to run in background; ignoring timeout")
|
||
|
subprocess.Popen(
|
||
|
[command],
|
||
|
shell = True,
|
||
|
executable = "/bin/bash"
|
||
|
)
|
||
|
return None
|
||
|
elif not background:
|
||
|
process = subprocess.Popen(
|
||
|
[command],
|
||
|
shell = True,
|
||
|
executable = "/bin/bash",
|
||
|
stdout = subprocess.PIPE
|
||
|
)
|
||
|
try:
|
||
|
process.wait(timeout = timeout)
|
||
|
output, errors = process.communicate(timeout = timeout)
|
||
|
return output
|
||
|
except:
|
||
|
process.kill()
|
||
|
return False
|
||
|
else:
|
||
|
return None
|
||
|
|
||
|
def percentage_power():
|
||
|
try:
|
||
|
filenames_power = engage_command(command = "upower -e")
|
||
|
filenames_power = [line for line in filenames_power.split("\n") if line]
|
||
|
filenames_power_battery = [filename for filename in filenames_power if "battery" in filename]
|
||
|
filename_power_battery = filenames_power_battery[0] if filenames_power_battery else None
|
||
|
filenames_power_line = [filename for filename in filenames_power if "line" in filename]
|
||
|
filename_power_line = filenames_power_line[0] if filenames_power_line else None
|
||
|
if filename_power_battery:
|
||
|
power_data = engage_command(command = "upower -i {filename}".format(filename = filename_power_battery))
|
||
|
percentage_power = [line for line in power_data.split("\n") if "percentage" in line][0].split()[1]
|
||
|
elif filename_power_line:
|
||
|
percentage_power = "100%"
|
||
|
else:
|
||
|
percentage_power = None
|
||
|
return percentage_power
|
||
|
except:
|
||
|
return None
|
||
|
|
||
|
def convert_type_list_elements(
|
||
|
list_object = None,
|
||
|
element_type = str
|
||
|
):
|
||
|
"""
|
||
|
Recursively convert all elements and all elements of all sublists of a list
|
||
|
to a specified type and return the new list.
|
||
|
"""
|
||
|
if element_type is str:
|
||
|
return [str(element) if not isinstance(element, list) else convert_type_list_elements(
|
||
|
list_object = element,
|
||
|
element_type = str
|
||
|
) for element in list_object]
|
||
|
|
||
|
class List_Consensus(list):
|
||
|
"""
|
||
|
This class is designed to instantiate a list of elements. It features
|
||
|
functionality that limits approximately the memory usage of the list. On
|
||
|
estimating the size of the list as greater than the specified or default
|
||
|
size limit, the list reduces the number of elements it contains. The list
|
||
|
provides functionality to return its most frequent element, which can be
|
||
|
used to determine its "consensus" element.
|
||
|
"""
|
||
|
def __init__(
|
||
|
self,
|
||
|
*args
|
||
|
):
|
||
|
# list initialisation
|
||
|
if sys.version_info >= (3, 0):
|
||
|
super().__init__(*args)
|
||
|
else:
|
||
|
super(List_Consensus, self).__init__(*args)
|
||
|
self.size_constraint = 150 # bytes
|
||
|
|
||
|
def set_size_constraint(
|
||
|
self,
|
||
|
size = None
|
||
|
):
|
||
|
if size is not None:
|
||
|
self.size_constraint = size
|
||
|
|
||
|
def ensure_size(
|
||
|
self,
|
||
|
size = None
|
||
|
):
|
||
|
"""
|
||
|
This function removes the least frequent elements until the size
|
||
|
constraint is met.
|
||
|
"""
|
||
|
if size is None:
|
||
|
size = self.size_constraint
|
||
|
while sys.getsizeof(self) > size:
|
||
|
element_frequencies = collections.Counter(self)
|
||
|
infrequent_element = element_frequencies.most_common()[-1:][0][0]
|
||
|
self.remove(infrequent_element)
|
||
|
|
||
|
def append(
|
||
|
self,
|
||
|
element,
|
||
|
ensure_size = True,
|
||
|
size = None
|
||
|
):
|
||
|
if size is None:
|
||
|
size = self.size_constraint
|
||
|
list.append(self, element)
|
||
|
if ensure_size:
|
||
|
self.ensure_size(
|
||
|
size = size
|
||
|
)
|
||
|
|
||
|
def consensus(
|
||
|
self
|
||
|
):
|
||
|
try:
|
||
|
element_frequencies = collections.Counter(self)
|
||
|
return element_frequencies.most_common(1)[0][0]
|
||
|
except:
|
||
|
return None
|
||
|
|
||
|
## @brief return a naturally-sorted list
|
||
|
# @detail This function returns a naturally-sorted list from an input list.
|
||
|
def natural_sort(
|
||
|
list_object
|
||
|
):
|
||
|
convert = lambda text: int(text) if text.isdigit() else text.lower()
|
||
|
alphanumeric_key = lambda key: [
|
||
|
convert(text) for text in re.split("([0-9]+)", key)
|
||
|
]
|
||
|
return sorted(list_object, key = alphanumeric_key)
|
||
|
|
||
|
def indices_of_list_element_duplicates(
|
||
|
x
|
||
|
):
|
||
|
seen = set()
|
||
|
for index, element in enumerate(x):
|
||
|
if isinstance(element, list):
|
||
|
element = tuple(element)
|
||
|
if isinstance(element, dict):
|
||
|
element = tuple(element.items())
|
||
|
if element not in seen:
|
||
|
seen.add(element)
|
||
|
else:
|
||
|
yield index
|
||
|
|
||
|
def indices_of_greatest_values(
|
||
|
x,
|
||
|
number = 5
|
||
|
):
|
||
|
if len(x) <= number:
|
||
|
number = len(x)
|
||
|
return [y[0] for y in sorted(enumerate(x), key = lambda y: y[1])[-number:]]
|
||
|
|
||
|
def unique_list_elements(x):
|
||
|
unique_elements = []
|
||
|
for element in x:
|
||
|
if element not in unique_elements:
|
||
|
unique_elements.append(element)
|
||
|
return unique_elements
|
||
|
|
||
|
def select_spread(
|
||
|
list_of_elements = None,
|
||
|
number_of_elements = None
|
||
|
):
|
||
|
"""
|
||
|
This function returns the specified number of elements of a list spread
|
||
|
approximately evenly.
|
||
|
"""
|
||
|
if len(list_of_elements) <= number_of_elements:
|
||
|
return list_of_elements
|
||
|
if number_of_elements == 0:
|
||
|
return []
|
||
|
if number_of_elements == 1:
|
||
|
return [list_of_elements[int(round((len(list_of_elements) - 1) / 2))]]
|
||
|
return \
|
||
|
[list_of_elements[int(round((len(list_of_elements) - 1) /\
|
||
|
(2 * number_of_elements)))]] +\
|
||
|
select_spread(list_of_elements[int(round((len(list_of_elements) - 1) /\
|
||
|
(number_of_elements))):], number_of_elements - 1)
|
||
|
|
||
|
def split_list(
|
||
|
list_object = None,
|
||
|
granularity = None
|
||
|
):
|
||
|
"""
|
||
|
This function splits a list into a specified number of lists. It returns a
|
||
|
list of lists that correspond to these parts. Negative numbers of parts are
|
||
|
not accepted and numbers of parts greater than the number of elements in the
|
||
|
list result in the maximum possible number of lists being returned.
|
||
|
"""
|
||
|
if granularity < 0:
|
||
|
raise Exception("negative granularity")
|
||
|
mean_length = len(list_object) / float(granularity)
|
||
|
split_list_object = []
|
||
|
last_length = float(0)
|
||
|
if len(list_object) > granularity:
|
||
|
while last_length < len(list_object):
|
||
|
split_list_object.append(
|
||
|
list_object[int(last_length):int(last_length + mean_length)]
|
||
|
)
|
||
|
last_length += mean_length
|
||
|
else:
|
||
|
split_list_object = [[element] for element in list_object]
|
||
|
return split_list_object
|
||
|
|
||
|
def ranges_edge_pairs(
|
||
|
extent = None,
|
||
|
range_length = None
|
||
|
):
|
||
|
"""
|
||
|
Return the edges of ranges within an extent of some length. For example, to
|
||
|
separate 76 variables into groups of at most 20 variables, the ranges of the
|
||
|
variables could be 0 to 20, 21 to 41, 42 to 62 and 63 to 76. These range
|
||
|
edges could be returned by this function as a list of tuples:
|
||
|
|
||
|
>>> ranges_edge_pairs(
|
||
|
... extent = 76, # number of variables
|
||
|
... range_length = 20 # maximum number of variables per plot
|
||
|
... )
|
||
|
[(0, 20), (21, 41), (42, 62), (63, 76)]
|
||
|
"""
|
||
|
number_of_ranges = int(math.ceil(extent / range_length))
|
||
|
return [
|
||
|
(
|
||
|
index * range_length + index,
|
||
|
min((index + 1) * range_length + index, extent)
|
||
|
)
|
||
|
for index in range(0, number_of_ranges)
|
||
|
]
|
||
|
|
||
|
def Markdown_list_to_dictionary(
|
||
|
Markdown_list = None
|
||
|
):
|
||
|
line = re.compile(r"( *)- ([^:\n]+)(?:: ([^\n]*))?\n?")
|
||
|
depth = 0
|
||
|
stack = [{}]
|
||
|
for indent, name, value in line.findall(Markdown_list):
|
||
|
indent = len(indent)
|
||
|
if indent > depth:
|
||
|
assert not stack[-1], "unexpected indent"
|
||
|
elif indent < depth:
|
||
|
stack.pop()
|
||
|
stack[-1][name] = value or {}
|
||
|
if not value:
|
||
|
# new branch
|
||
|
stack.append(stack[-1][name])
|
||
|
depth = indent
|
||
|
return stack[0]
|
||
|
|
||
|
def Markdown_list_to_OrderedDict(
|
||
|
Markdown_list = None
|
||
|
):
|
||
|
line = re.compile(r"( *)- ([^:\n]+)(?:: ([^\n]*))?\n?")
|
||
|
depth = 0
|
||
|
stack = [collections.OrderedDict()]
|
||
|
for indent, name, value in line.findall(Markdown_list):
|
||
|
indent = len(indent)
|
||
|
if indent > depth:
|
||
|
assert not stack[-1], "unexpected indent"
|
||
|
elif indent < depth:
|
||
|
stack.pop()
|
||
|
stack[-1][name] = value or collections.OrderedDict()
|
||
|
if not value:
|
||
|
# new branch
|
||
|
stack.append(stack[-1][name])
|
||
|
depth = indent
|
||
|
return stack[0]
|
||
|
|
||
|
def open_configuration(
|
||
|
filename = None
|
||
|
):
|
||
|
file_configuration = open(filename, "r").read()
|
||
|
return Markdown_list_to_OrderedDict(file_configuration)
|
||
|
|
||
|
def change_list_resolution(
|
||
|
values = None,
|
||
|
length = None,
|
||
|
interpolation_type = "linear",
|
||
|
dimensions = 1
|
||
|
):
|
||
|
y1 = values
|
||
|
x1 = list(range(0, len(values)))
|
||
|
interpolation = scipy.interpolate.interp1d(
|
||
|
x1,
|
||
|
y1,
|
||
|
kind = interpolation_type
|
||
|
)
|
||
|
x2 = list(numpy.linspace(min(x1), max(x1), length))
|
||
|
y2 = [float(interpolation(x)) for x in x2]
|
||
|
if dimensions == 1:
|
||
|
return y2
|
||
|
elif dimensions == 2:
|
||
|
return (x2, y2)
|
||
|
|
||
|
def change_waveform_to_rectangle_waveform(
|
||
|
values = None,
|
||
|
fraction_amplitude = 0.01
|
||
|
):
|
||
|
values[values >= 0] = fraction_amplitude * max(values)
|
||
|
values[values < 0] = fraction_amplitude * min(values)
|
||
|
values[:] = [x * (1 / fraction_amplitude) for x in values]
|
||
|
return values
|
||
|
|
||
|
def change_sound_file_waveform_to_sound_file_rectangle_waveform(
|
||
|
filename_waveform = None,
|
||
|
filename_rectangle_waveform = None,
|
||
|
overwrite = False,
|
||
|
fraction_amplitude = 0.01
|
||
|
):
|
||
|
if filename_rectangle_waveform is None:
|
||
|
filename_rectangle_waveform = filename_waveform
|
||
|
filename_rectangle_waveform = propose_filename(
|
||
|
filename = filename_rectangle_waveform,
|
||
|
overwrite = overwrite
|
||
|
)
|
||
|
rate, values = scipy.io.wavfile.read(filename_waveform)
|
||
|
values = change_waveform_to_rectangle_waveform(
|
||
|
values = values,
|
||
|
fraction_amplitude = fraction_amplitude
|
||
|
)
|
||
|
values[values >= 0] = fraction_amplitude * max(values)
|
||
|
values[values < 0] = fraction_amplitude * min(values)
|
||
|
values[:] = [x * (1 / fraction_amplitude) for x in values]
|
||
|
scipy.io.wavfile.write(filename_rectangle_waveform, rate, values)
|
||
|
|
||
|
def normalize(
|
||
|
x,
|
||
|
summation = None
|
||
|
):
|
||
|
if summation is None:
|
||
|
summation = sum(x) # normalize to unity
|
||
|
return [element/summation for element in x]
|
||
|
|
||
|
def rescale(
|
||
|
x,
|
||
|
minimum = 0,
|
||
|
maximum = 1
|
||
|
):
|
||
|
return [
|
||
|
minimum + (element - min(x)) * ((maximum - minimum)\
|
||
|
/ (max(x) - min(x))) for element in x
|
||
|
]
|
||
|
|
||
|
def composite_variable(
|
||
|
x
|
||
|
):
|
||
|
k = len(x) + 1
|
||
|
variable = 0
|
||
|
for index, element in enumerate(x):
|
||
|
variable += k**(index - 1) * element
|
||
|
return variable
|
||
|
|
||
|
def model_linear(
|
||
|
data = None,
|
||
|
quick_calculation = False
|
||
|
):
|
||
|
if quick_calculation is True:
|
||
|
data = select_spread(data, 10)
|
||
|
n = len(data)
|
||
|
x_values = []
|
||
|
y_values = []
|
||
|
x_squared_values = []
|
||
|
xy_values = []
|
||
|
for datum in data:
|
||
|
x = datum[0]
|
||
|
y = datum[1]
|
||
|
x_values.append(x)
|
||
|
y_values.append(y)
|
||
|
x_squared_values.append(x ** 2)
|
||
|
xy_values.append(x * y)
|
||
|
b1 = (sum(xy_values) - (sum(x_values) * sum(y_values)) / n) / \
|
||
|
(sum(x_squared_values) - (sum(x_values) ** 2) / n)
|
||
|
b0 = (sum(y_values) - b1 * sum(x_values)) / n
|
||
|
return (b0, b1)
|
||
|
|
||
|
def import_object(
|
||
|
filename = None
|
||
|
):
|
||
|
return pickle.load(open(filename, "rb"))
|
||
|
|
||
|
def export_object(
|
||
|
x,
|
||
|
filename = None,
|
||
|
overwrite = False
|
||
|
):
|
||
|
filename = propose_filename(
|
||
|
filename = filename,
|
||
|
overwrite = overwrite
|
||
|
)
|
||
|
pickle.dump(x, open(filename, "wb"))
|
||
|
|
||
|
def string_to_bool(x):
|
||
|
return x.lower() in ("yes", "true", "t", "1")
|
||
|
|
||
|
def ustr(text):
|
||
|
"""
|
||
|
Convert a string to Python 2 unicode or Python 3 string as appropriate to
|
||
|
the version of Python in use.
|
||
|
"""
|
||
|
if text is not None:
|
||
|
if sys.version_info >= (3, 0):
|
||
|
return str(text)
|
||
|
else:
|
||
|
return unicode(text)
|
||
|
else:
|
||
|
return text
|
||
|
|
||
|
|
||
|
def number_to_English_text(
|
||
|
number = None
|
||
|
):
|
||
|
ones = [
|
||
|
"",
|
||
|
"one ",
|
||
|
"two ",
|
||
|
"three ",
|
||
|
"four ",
|
||
|
"five ",
|
||
|
"six ",
|
||
|
"seven ",
|
||
|
"eight ",
|
||
|
"nine "
|
||
|
]
|
||
|
teens = [
|
||
|
"ten ",
|
||
|
"eleven ",
|
||
|
"twelve ",
|
||
|
"thirteen ",
|
||
|
"fourteen ",
|
||
|
"fifteen ",
|
||
|
"sixteen ",
|
||
|
"seventeen ",
|
||
|
"eighteen ",
|
||
|
"nineteen "
|
||
|
]
|
||
|
tens = [
|
||
|
"",
|
||
|
"",
|
||
|
"twenty ",
|
||
|
"thirty ",
|
||
|
"forty ",
|
||
|
"fifty ",
|
||
|
"sixty ",
|
||
|
"seventy ",
|
||
|
"eighty ",
|
||
|
"ninety "
|
||
|
]
|
||
|
thousands = [
|
||
|
"",
|
||
|
"thousand ",
|
||
|
"million ",
|
||
|
"billion ",
|
||
|
"trillion ",
|
||
|
"quadrillion ",
|
||
|
"quintillion ",
|
||
|
"sextillion ",
|
||
|
"septillion ",
|
||
|
"octillion ",
|
||
|
"nonillion ",
|
||
|
"decillion ",
|
||
|
"undecillion ",
|
||
|
"duodecillion ",
|
||
|
"tredecillion ",
|
||
|
"quattuordecillion ",
|
||
|
"quindecillion",
|
||
|
"sexdecillion ",
|
||
|
"septendecillion ",
|
||
|
"octodecillion ",
|
||
|
"novemdecillion ",
|
||
|
"vigintillion "
|
||
|
]
|
||
|
# Split the number into 3-digit groups with each group representing
|
||
|
# hundreds, thousands etc.
|
||
|
number_in_groups_of_3 = []
|
||
|
number_as_string = str(number)
|
||
|
for position in range(3, 33, 3):
|
||
|
progressive_number_string = number_as_string[-position:]
|
||
|
progression = len(number_as_string) - position
|
||
|
# Break if the end of the number string is encountered.
|
||
|
if progression < -2:
|
||
|
break
|
||
|
else:
|
||
|
if progression >= 0:
|
||
|
number_in_groups_of_3.append(int(progressive_number_string[:3]))
|
||
|
elif progression >= -1:
|
||
|
number_in_groups_of_3.append(int(progressive_number_string[:2]))
|
||
|
elif progression >= -2:
|
||
|
number_in_groups_of_3.append(int(progressive_number_string[:1]))
|
||
|
# Split the number 3-digit groups into groups of ones, tens etc. and build
|
||
|
# an English text representation of the number.
|
||
|
number_words = ""
|
||
|
for index, group in enumerate(number_in_groups_of_3):
|
||
|
number_1 = group % 10
|
||
|
number_2 = (group % 100) // 10
|
||
|
number_3 = (group % 1000) // 100
|
||
|
if group == 0:
|
||
|
continue
|
||
|
else:
|
||
|
thousand = thousands[index]
|
||
|
if number_2 == 0:
|
||
|
number_words = ones[number_1] + thousand + number_words
|
||
|
elif number_2 == 1:
|
||
|
number_words = teens[number_1] + thousand + number_words
|
||
|
elif number_2 > 1:
|
||
|
number_words = tens[number_2] + ones[number_1] + thousand + number_words
|
||
|
if number_3 > 0:
|
||
|
number_words = ones[number_3] + "hundred " + number_words
|
||
|
return number_words.strip(" ")
|
||
|
|
||
|
def replace_numbers_in_text_with_English_text(
|
||
|
text = None
|
||
|
):
|
||
|
# Split the text into text and numbers.
|
||
|
text = re.split("(\d+)", text)
|
||
|
if text[-1] == "":
|
||
|
text = text[:-1]
|
||
|
text_translated = []
|
||
|
# Replace numbers with English text.
|
||
|
for text_segment in text:
|
||
|
if all(character.isdigit() for character in text_segment):
|
||
|
text_translated.append(number_to_English_text(number = text_segment))
|
||
|
else:
|
||
|
text_translated.append(text_segment)
|
||
|
return "".join(text_translated)
|
||
|
|
||
|
def replace_contractions_with_full_words_and_replace_numbers_with_digits(
|
||
|
text = None,
|
||
|
remove_articles = True
|
||
|
):
|
||
|
"""
|
||
|
This function replaces contractions with full words and replaces numbers
|
||
|
with digits in specified text. There is the option to remove articles.
|
||
|
"""
|
||
|
words = text.split()
|
||
|
text_translated = ""
|
||
|
for word in words:
|
||
|
if remove_articles and word in ["a", "an", "the"]:
|
||
|
continue
|
||
|
contractions_expansions = {
|
||
|
"ain't": "is not",
|
||
|
"aren't": "are not",
|
||
|
"can't": "can not",
|
||
|
"could've": "could have",
|
||
|
"couldn't": "could not",
|
||
|
"didn't": "did not",
|
||
|
"doesn't": "does not",
|
||
|
"don't": "do not",
|
||
|
"gonna": "going to",
|
||
|
"gotta": "got to",
|
||
|
"hadn't": "had not",
|
||
|
"hasn't": "has not",
|
||
|
"haven't": "have not",
|
||
|
"he'd": "he would",
|
||
|
"he'll": "he will",
|
||
|
"he's": "he is",
|
||
|
"how'd": "how did",
|
||
|
"how'll": "how will",
|
||
|
"how's": "how is",
|
||
|
"I'd": "I would",
|
||
|
"I'll": "I will",
|
||
|
"I'm": "I am",
|
||
|
"I've": "I have",
|
||
|
"isn't": "is not",
|
||
|
"it'd": "it would",
|
||
|
"it'll": "it will",
|
||
|
"it's": "it is",
|
||
|
"mightn't": "might not",
|
||
|
"might've": "might have",
|
||
|
"mustn't": "must not",
|
||
|
"must've": "must have",
|
||
|
"needn't": "need not",
|
||
|
"oughtn't": "ought not",
|
||
|
"shan't": "shall not",
|
||
|
"she'd": "she would",
|
||
|
"she'll": "she will",
|
||
|
"she's": "she is",
|
||
|
"shouldn't": "should not",
|
||
|
"should've": "should have",
|
||
|
"somebody's": "somebody is",
|
||
|
"someone'd": "someone would",
|
||
|
"someone'll": "someone will",
|
||
|
"someone's": "someone is",
|
||
|
"that'll": "that will",
|
||
|
"that's": "that is",
|
||
|
"that'd": "that would",
|
||
|
"there'd": "there would",
|
||
|
"there're": "there are",
|
||
|
"there's": "there is",
|
||
|
"they'd": "they would",
|
||
|
"they'll": "they will",
|
||
|
"they're": "they are",
|
||
|
"they've": "they have",
|
||
|
"wasn't": "was not",
|
||
|
"we'd": "we would",
|
||
|
"we'll": "we will",
|
||
|
"we're": "we are",
|
||
|
"we've": "we have",
|
||
|
"weren't": "were not",
|
||
|
"what'd": "what did",
|
||
|
"what'll": "what will",
|
||
|
"what're": "what are",
|
||
|
"what's": "what is",
|
||
|
"whats": "what is",
|
||
|
"what've": "what have",
|
||
|
"when's": "when is",
|
||
|
"when'd": "when did",
|
||
|
"where'd": "where did",
|
||
|
"where's": "where is",
|
||
|
"where've": "where have",
|
||
|
"who'd": "who would",
|
||
|
"who'd've": "who would have",
|
||
|
"who'll": "who will",
|
||
|
"who're": "who are",
|
||
|
"who's": "who is",
|
||
|
"who've": "who have",
|
||
|
"why'd": "why did",
|
||
|
"why're": "why are",
|
||
|
"why's": "why is",
|
||
|
"won't": "will not",
|
||
|
"won't've": "will not have",
|
||
|
"would've": "would have",
|
||
|
"wouldn't": "would not",
|
||
|
"wouldn't've": "would not have",
|
||
|
"y'all": "you all",
|
||
|
"ya'll": "you all",
|
||
|
"you'd": "you would",
|
||
|
"you'd've": "you would have",
|
||
|
"you'll": "you will",
|
||
|
"y'aint": "you are not",
|
||
|
"y'ain't": "you are not",
|
||
|
"you're": "you are",
|
||
|
"you've": "you have"
|
||
|
}
|
||
|
if word in list(contractions_expansions.keys()):
|
||
|
word = contractions_expansions[word]
|
||
|
numbers_digits = {
|
||
|
"zero": "0",
|
||
|
"one": "1",
|
||
|
"two": "2",
|
||
|
"three": "3",
|
||
|
"four": "4",
|
||
|
"five": "5",
|
||
|
"six": "6",
|
||
|
"seven": "7",
|
||
|
"eight": "8",
|
||
|
"nine": "9",
|
||
|
"ten": "10",
|
||
|
"eleven": "11",
|
||
|
"twelve": "12",
|
||
|
"thirteen": "13",
|
||
|
"fourteen": "14",
|
||
|
"fifteen": "15",
|
||
|
"sixteen": "16",
|
||
|
"seventeen": "17",
|
||
|
"eighteen": "18",
|
||
|
"nineteen": "19",
|
||
|
"twenty": "20"
|
||
|
}
|
||
|
if word in list(numbers_digits.keys()):
|
||
|
word = numbers_digits[word]
|
||
|
text_translated += " " + word
|
||
|
text_translated = text_translated.strip()
|
||
|
return text_translated
|
||
|
|
||
|
def split_into_sentences(
|
||
|
text = None
|
||
|
):
|
||
|
capitals = "([A-Z])"
|
||
|
prefixes = "(Dr|dr|Hon|hon|Mr|mr|Mrs|mrs|Ms|ms|St|st)[.]"
|
||
|
suffixes = "(Co|co|Inc|inc|Jr|jr|Ltd|ltd|Sr|sr)"
|
||
|
starters = "(But\s|Dr|He\s|However\s|It\s|Mr|Mrs|Ms|Our\s|She\s|That\s|Their\s|They\s|This\s|We\s|Wherever)"
|
||
|
acronyms = "([A-Z][.][A-Z][.](?:[A-Z][.])?)"
|
||
|
websites = "[.](com|gov|io|net|org|pro)"
|
||
|
text = " " + text + " "
|
||
|
text = text.replace("\n", " ")
|
||
|
text = re.sub(prefixes, "\\1<prd>", text)
|
||
|
text = re.sub(websites, "<prd>\\1", text)
|
||
|
if "Ph.D" in text: text = text.replace("Ph.D.", "Ph<prd>D<prd>")
|
||
|
text = re.sub("\s" + capitals + "[.] ", " \\1<prd> ", text)
|
||
|
text = re.sub(acronyms + " " + starters, "\\1<stop> \\2", text)
|
||
|
text = re.sub(capitals + "[.]" + capitals + "[.]" + capitals + "[.]","\\1<prd>\\2<prd>\\3<prd>", text)
|
||
|
text = re.sub(capitals + "[.]" + capitals + "[.]", "\\1<prd>\\2<prd>", text)
|
||
|
text = re.sub(" " + suffixes + "[.] " + starters, " \\1<stop> \\2", text)
|
||
|
text = re.sub(" " + suffixes + "[.]", " \\1<prd>", text)
|
||
|
text = re.sub(" " + capitals + "[.]", " \\1<prd>", text)
|
||
|
if "”" in text: text = text.replace(".”", "”.")
|
||
|
if "\"" in text: text = text.replace(".\"", "\".")
|
||
|
if "!" in text: text = text.replace("!\"", "\"!")
|
||
|
if "?" in text: text = text.replace("?\"", "\"?")
|
||
|
text = text.replace(".", ".<stop>")
|
||
|
text = text.replace("?", "?<stop>")
|
||
|
text = text.replace("!", "!<stop>")
|
||
|
text = text.replace("<prd>", ".")
|
||
|
sentences = text.split("<stop>")
|
||
|
sentences = sentences[:-1]
|
||
|
sentences = [sentence.strip() for sentence in sentences]
|
||
|
|
||
|
return sentences
|
||
|
|
||
|
def trim_incomplete_sentences(
|
||
|
text = None
|
||
|
):
|
||
|
return " ".join(split_into_sentences(text)[1:])
|
||
|
|
||
|
def pseudorandom_MAC_address():
|
||
|
return "{aa:02x}:{bb:02x}:{cc:02x}:{dd:02x}:{ee:02x}:{ff:02x}".format(
|
||
|
aa = random.randint(0, 255),
|
||
|
bb = random.randint(0, 255),
|
||
|
cc = random.randint(0, 255),
|
||
|
dd = random.randint(0, 255),
|
||
|
ee = random.randint(0, 255),
|
||
|
ff = random.randint(0, 255)
|
||
|
)
|
||
|
|
||
|
def get_attribute(
|
||
|
object_instance = None,
|
||
|
name = None,
|
||
|
imputation_default_value = None
|
||
|
):
|
||
|
|
||
|
try:
|
||
|
if "[" in name and "]" in name:
|
||
|
index = int(name.split("[")[1].split("]")[0])
|
||
|
attribute = name.split("[")[0]
|
||
|
value = getattr(object_instance, attribute)[index]
|
||
|
else:
|
||
|
value = getattr(object_instance, name)
|
||
|
except:
|
||
|
value = imputation_default_value
|
||
|
return value
|
||
|
|
||
|
def generate_Python_variable_names(
|
||
|
number = 10
|
||
|
):
|
||
|
names = []
|
||
|
while len(names) < number:
|
||
|
name = str(uuid.uuid4()).replace("-", "")
|
||
|
if name[0].isalpha():
|
||
|
names.append(name)
|
||
|
return names
|
||
|
|
||
|
def add_time_variables(df, reindex = True):
|
||
|
"""
|
||
|
Return a DataFrame with variables for weekday index, weekday name, timedelta
|
||
|
through day, fraction through day, hours through day and days through week
|
||
|
added, optionally with the index set to datetime and the variable `datetime`
|
||
|
removed. It is assumed that the variable `datetime` exists.
|
||
|
"""
|
||
|
if not "datetime" in df.columns:
|
||
|
log.error("field datetime not found in DataFrame")
|
||
|
return False
|
||
|
df["datetime"] = pd.to_datetime(df["datetime"])
|
||
|
df["month"] = df["datetime"].dt.month
|
||
|
df["month_name"] = df["datetime"].dt.strftime("%B")
|
||
|
df["weekday"] = df["datetime"].dt.weekday
|
||
|
df["weekday_name"] = df["datetime"].dt.weekday_name
|
||
|
df["time_through_day"] = df["datetime"].map(
|
||
|
lambda x: x - datetime.datetime.combine(
|
||
|
x.date(),
|
||
|
datetime.time()
|
||
|
)
|
||
|
)
|
||
|
df["fraction_through_day"] = df["time_through_day"].map(
|
||
|
lambda x: x / datetime.timedelta(hours = 24)
|
||
|
)
|
||
|
df["hour"] = df["datetime"].dt.hour
|
||
|
df["hours_through_day"] = df["fraction_through_day"] * 24
|
||
|
df["days_through_week"] = df.apply(
|
||
|
lambda row: row["weekday"] + row["fraction_through_day"],
|
||
|
axis = 1
|
||
|
)
|
||
|
df["days_through_year"] = df["datetime"].dt.dayofyear
|
||
|
df.index = df["datetime"]
|
||
|
#del df["datetime"]
|
||
|
return df
|
||
|
|
||
|
def daily_plots(
|
||
|
df,
|
||
|
variable,
|
||
|
renormalize = True,
|
||
|
plot = True,
|
||
|
scatter = False,
|
||
|
linestyle = "-",
|
||
|
linewidth = 1,
|
||
|
s = 1
|
||
|
):
|
||
|
"""
|
||
|
Create daily plots of a variable in a DataFrame, optionally renormalized. It
|
||
|
is assumed that the DataFrame index is datetime.
|
||
|
"""
|
||
|
if not df.index.dtype in ["datetime64[ns]", "<M8[ns]", ">M8[ns]"]:
|
||
|
log.error("index is not datetime")
|
||
|
return False
|
||
|
days = []
|
||
|
for group in df.groupby(df.index.day):
|
||
|
days.append(group[1])
|
||
|
scaler = MinMaxScaler()
|
||
|
plt.xlabel("hours")
|
||
|
plt.ylabel(variable);
|
||
|
for day in days:
|
||
|
if renormalize:
|
||
|
values = scaler.fit_transform(day[[variable]])
|
||
|
else:
|
||
|
values = day[variable]
|
||
|
if plot:
|
||
|
plt.plot(day["hours_through_day"], values, linestyle = linestyle, linewidth = linewidth)
|
||
|
if scatter:
|
||
|
plt.scatter(day["hours_through_day"], values, s = s)
|
||
|
|
||
|
def weekly_plots(
|
||
|
df,
|
||
|
variable,
|
||
|
renormalize = True,
|
||
|
plot = True,
|
||
|
scatter = False,
|
||
|
linestyle = "-",
|
||
|
linewidth = 1,
|
||
|
s = 1
|
||
|
):
|
||
|
"""
|
||
|
Create weekly plots of a variable in a DataFrame, optionally renormalized.
|
||
|
It is assumed that the variable `days_through_week` exists.
|
||
|
"""
|
||
|
if not "days_through_week" in df.columns:
|
||
|
log.error("field days_through_week not found in DataFrame")
|
||
|
return False
|
||
|
weeks = []
|
||
|
for group in df.groupby(df.index.week):
|
||
|
weeks.append(group[1])
|
||
|
scaler = MinMaxScaler()
|
||
|
plt.ylabel(variable);
|
||
|
for week in weeks:
|
||
|
if renormalize:
|
||
|
values = scaler.fit_transform(week[[variable]])
|
||
|
else:
|
||
|
values = week[variable]
|
||
|
if plot:
|
||
|
plt.plot(week["days_through_week"], values, linestyle = linestyle, linewidth = linewidth)
|
||
|
if scatter:
|
||
|
plt.scatter(week["days_through_week"], values, s = s)
|
||
|
plt.xticks(
|
||
|
[ 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5],
|
||
|
["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
|
||
|
)
|
||
|
|
||
|
def yearly_plots(
|
||
|
df,
|
||
|
variable,
|
||
|
renormalize = True,
|
||
|
horizontal_axis_labels_days = False,
|
||
|
horizontal_axis_labels_months = True,
|
||
|
plot = True,
|
||
|
scatter = False,
|
||
|
linestyle = "-",
|
||
|
linewidth = 1,
|
||
|
s = 1
|
||
|
):
|
||
|
"""
|
||
|
Create yearly plots of a variable in a DataFrame, optionally renormalized.
|
||
|
It is assumed that the DataFrame index is datetime.
|
||
|
"""
|
||
|
if not df.index.dtype in ["datetime64[ns]", "<M8[ns]", ">M8[ns]"]:
|
||
|
log.error("index is not datetime")
|
||
|
return False
|
||
|
years = []
|
||
|
for group in df.groupby(df.index.year):
|
||
|
years.append(group[1])
|
||
|
scaler = MinMaxScaler()
|
||
|
plt.xlabel("days")
|
||
|
plt.ylabel(variable);
|
||
|
for year in years:
|
||
|
if renormalize:
|
||
|
values = scaler.fit_transform(year[[variable]])
|
||
|
else:
|
||
|
values = year[variable]
|
||
|
if plot:
|
||
|
plt.plot(year["days_through_year"], values, linestyle = linestyle, linewidth = linewidth, label = year.index.year.values[0])
|
||
|
if scatter:
|
||
|
plt.scatter(year["days_through_year"], values, s = s)
|
||
|
if horizontal_axis_labels_months:
|
||
|
plt.xticks(
|
||
|
[ 15.5, 45, 74.5, 105, 135.5, 166, 196.5, 227.5, 258, 288.5, 319, 349.5],
|
||
|
["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"]
|
||
|
)
|
||
|
plt.legend()
|
||
|
|
||
|
def add_rolling_statistics_variables(
|
||
|
df = None,
|
||
|
variable = None,
|
||
|
window = 20,
|
||
|
upper_factor = 2,
|
||
|
lower_factor = 2
|
||
|
):
|
||
|
"""
|
||
|
Add rolling statistics variables derived from a specified variable in a
|
||
|
DataFrame.
|
||
|
"""
|
||
|
df[variable + "_rolling_mean"] = pd.stats.moments.rolling_mean(df[variable], window)
|
||
|
df[variable + "_rolling_standard_deviation"] = pd.stats.moments.rolling_std(df[variable], window)
|
||
|
df[variable + "_rolling_upper_bound"] = df[variable + "_rolling_mean"] + upper_factor * df[variable + "_rolling_standard_deviation"]
|
||
|
df[variable + "_rolling_lower_bound"] = df[variable + "_rolling_mean"] - lower_factor * df[variable + "_rolling_standard_deviation"]
|
||
|
return df
|
||
|
|
||
|
def rescale_variables(
|
||
|
df,
|
||
|
variables_include = [],
|
||
|
variables_exclude = []
|
||
|
):
|
||
|
"""
|
||
|
Rescale variables in a DataFrame, excluding variables with NaNs and strings,
|
||
|
excluding specified variables, and including specified variables.
|
||
|
"""
|
||
|
variables_not_rescale = variables_exclude
|
||
|
variables_not_rescale.extend(df.columns[df.isna().any()].tolist()) # variables with NaNs
|
||
|
variables_not_rescale.extend(df.select_dtypes(include = ["object", "datetime", "timedelta"]).columns) # variables with strings
|
||
|
variables_rescale = list(set(df.columns) - set(variables_not_rescale))
|
||
|
variables_rescale.extend(variables_include)
|
||
|
scaler = MinMaxScaler()
|
||
|
df[variables_rescale] = scaler.fit_transform(df[variables_rescale])
|
||
|
return df
|
||
|
|
||
|
def histogram_hour_counts(
|
||
|
df,
|
||
|
variable
|
||
|
):
|
||
|
"""
|
||
|
Create a day-long histogram of counts of the variable for each hour. It is
|
||
|
assumed that the DataFrame index is datetime and that the variable
|
||
|
`hour` exists.
|
||
|
"""
|
||
|
if not df.index.dtype in ["datetime64[ns]", "<M8[ns]", ">M8[ns]"]:
|
||
|
log.error("index is not datetime")
|
||
|
return False
|
||
|
counts = df.groupby(df.index.hour)[variable].count()
|
||
|
counts.plot(kind = "bar", width = 1, rot = 0, alpha = 0.7)
|
||
|
|
||
|
def histogram_day_counts(
|
||
|
df,
|
||
|
variable
|
||
|
):
|
||
|
"""
|
||
|
Create a week-long histogram of counts of the variable for each day. It is
|
||
|
assumed that the DataFrame index is datetime and that the variable
|
||
|
`weekday_name` exists.
|
||
|
"""
|
||
|
if not df.index.dtype in ["datetime64[ns]", "<M8[ns]", ">M8[ns]"]:
|
||
|
log.error("index is not datetime")
|
||
|
return False
|
||
|
counts = df.groupby(df.index.weekday_name)[variable].count().reindex(calendar.day_name[0:])
|
||
|
counts.plot(kind = "bar", width = 1, rot = 0, alpha = 0.7)
|
||
|
|
||
|
def histogram_month_counts(
|
||
|
df,
|
||
|
variable
|
||
|
):
|
||
|
"""
|
||
|
Create a year-long histogram of counts of the variable for each month. It is
|
||
|
assumed that the DataFrame index is datetime and that the variable
|
||
|
`month_name` exists.
|
||
|
"""
|
||
|
if not df.index.dtype in ["datetime64[ns]", "<M8[ns]", ">M8[ns]"]:
|
||
|
log.error("index is not datetime")
|
||
|
return False
|
||
|
counts = df.groupby(df.index.strftime("%B"))[variable].count().reindex(calendar.month_name[1:])
|
||
|
counts.plot(kind = "bar", width = 1, rot = 0, alpha = 0.7)
|
||
|
|
||
|
def setup_Jupyter():
|
||
|
"""
|
||
|
Set up a Jupyter notebook with a few defaults.
|
||
|
"""
|
||
|
sns.set(context = "paper", font = "monospace")
|
||
|
warnings.filterwarnings("ignore")
|
||
|
pd.set_option("display.max_rows", 500)
|
||
|
pd.set_option("display.max_columns", 500)
|
||
|
plt.rcParams["figure.figsize"] = (17, 10)
|
||
|
|
||
|
def log_progress(
|
||
|
sequence,
|
||
|
every = None,
|
||
|
size = None,
|
||
|
name = "items"
|
||
|
):
|
||
|
"""
|
||
|
Display a progress bar widget in a Jupyter notebook. Its dependencies must
|
||
|
be enabled on launching Jupyter, such as in the following way:
|
||
|
|
||
|
jupyter nbextension enable --py widgetsnbextension
|
||
|
|
||
|
The progress bar can be used in a way like the following:
|
||
|
|
||
|
for item in shijian.log_progress([1, 2, 3, 4, 5]):
|
||
|
time.sleep(5)
|
||
|
"""
|
||
|
from ipywidgets import IntProgress, HTML, VBox
|
||
|
from IPython.display import display
|
||
|
is_iterator = False
|
||
|
if size is None:
|
||
|
try:
|
||
|
size = len(sequence)
|
||
|
except TypeError:
|
||
|
is_iterator = True
|
||
|
if size is not None:
|
||
|
if every is None:
|
||
|
if size <= 200:
|
||
|
every = 1
|
||
|
else:
|
||
|
every = int(size / 200) # every 0.5 %
|
||
|
else:
|
||
|
assert every is not None, "sequence is iterator, set every"
|
||
|
if is_iterator:
|
||
|
progress = IntProgress(min = 0, max = 1, value = 1)
|
||
|
progress.bar_style = "info"
|
||
|
else:
|
||
|
progress = IntProgress(min = 0, max = size, value = 0)
|
||
|
label = HTML()
|
||
|
box = VBox(children = [label, progress])
|
||
|
display(box)
|
||
|
index = 0
|
||
|
try:
|
||
|
for index, record in enumerate(sequence, 1):
|
||
|
if index == 1 or index % every == 0:
|
||
|
if is_iterator:
|
||
|
label.value = "{name}: {index} / ?".format(
|
||
|
name = name,
|
||
|
index = index
|
||
|
)
|
||
|
else:
|
||
|
progress.value = index
|
||
|
label.value = u"{name}: {index} / {size}".format(
|
||
|
name = name,
|
||
|
index = index,
|
||
|
size = size
|
||
|
)
|
||
|
yield record
|
||
|
except:
|
||
|
progress.bar_style = "danger"
|
||
|
raise
|
||
|
else:
|
||
|
progress.bar_style = "success"
|
||
|
progress.value = index
|
||
|
label.value = "{name}: {index}".format(
|
||
|
name = name,
|
||
|
index = str(index or "?")
|
||
|
)
|
||
|
|
||
|
_main()
|