2018-06-14 22:46:20 -05:00
|
|
|
#!/usr/bin/php
|
|
|
|
<?php
|
|
|
|
|
2024-01-26 10:22:41 +01:00
|
|
|
use InfluxDB2\Client;
|
|
|
|
use InfluxDB2\Point;
|
2018-06-14 22:46:20 -05:00
|
|
|
|
2018-09-04 15:03:03 -05:00
|
|
|
$CACHE_DIR = $_SERVER['HOME'] . '/.cache/openSUSE-release-tools/metrics-access';
|
2018-06-14 22:46:20 -05:00
|
|
|
const PROTOCOLS = ['ipv4', 'ipv6'];
|
2024-01-26 10:22:41 +01:00
|
|
|
const DOWNLOAD_OPENSUSE_ORG = 'https://download.opensuse.org/logs';
|
2018-06-20 16:35:04 -05:00
|
|
|
const PONTIFEX = 'http://pontifex.infra.opensuse.org/logs';
|
2023-07-05 13:35:06 +02:00
|
|
|
const BACKUP = 'http://backup.infra.opensuse.org';
|
2018-06-14 22:46:20 -05:00
|
|
|
const LANGLEY = 'http://langley.suse.de/pub/pontifex%s-opensuse.suse.de';
|
|
|
|
const VHOST = 'download.opensuse.org';
|
|
|
|
const FILENAME = 'download.opensuse.org-%s-access_log.xz';
|
|
|
|
const IPV6_PREFIX = 'ipv6.';
|
2024-07-24 17:20:43 +02:00
|
|
|
const PRODUCT_PATTERN = '/^(10\.[2-3]|11\.[0-4]|12\.[1-3]|13\.[1-2]|42\.[1-3]|15\.[0-6]|16\.0|tumbleweed|slowroll)$/';
|
2018-06-14 22:46:20 -05:00
|
|
|
|
|
|
|
$begin = new DateTime();
|
2018-06-20 16:30:20 -05:00
|
|
|
// Skip the current day since the logs are incomplete and not compressed yet.
|
|
|
|
$begin->sub(date_interval_create_from_date_string('1 day'));
|
2018-06-14 22:46:20 -05:00
|
|
|
$source_map = [
|
|
|
|
'ipv4' => [
|
2024-01-26 10:22:41 +01:00
|
|
|
// the first item defines the starting date for aggregation
|
2023-07-05 13:35:06 +02:00
|
|
|
'2023-01-01' => false,
|
2024-01-26 10:22:41 +01:00
|
|
|
'2023-11-13' => DOWNLOAD_OPENSUSE_ORG . '/' . VHOST,
|
2018-06-14 22:46:20 -05:00
|
|
|
'filename' => FILENAME,
|
|
|
|
],
|
|
|
|
'ipv6' => [
|
|
|
|
'2012-12-31' => false,
|
2024-01-26 10:22:41 +01:00
|
|
|
'2023-11-13' => DOWNLOAD_OPENSUSE_ORG . '/' . IPV6_PREFIX . VHOST,
|
2018-06-14 22:46:20 -05:00
|
|
|
'filename' => IPV6_PREFIX . FILENAME,
|
|
|
|
],
|
2024-01-26 10:22:41 +01:00
|
|
|
'ipv4+6' => [
|
|
|
|
'2023-11-13' => false,
|
|
|
|
$begin->format('Y-m-d') => DOWNLOAD_OPENSUSE_ORG . '/' . VHOST,
|
|
|
|
'filename' => FILENAME,
|
|
|
|
],
|
2018-06-14 22:46:20 -05:00
|
|
|
];
|
|
|
|
$end = new DateTime(key($source_map['ipv4'])); // decide about adding one day
|
2024-01-26 10:22:41 +01:00
|
|
|
$migration_date = new DateTime(key($source_map['ipv4+6']));
|
2018-06-14 22:46:20 -05:00
|
|
|
$period_reversed = date_period_reversed($end, '1 day', $begin);
|
|
|
|
|
|
|
|
error_log('begin: ' . $begin->format('Y-m-d'));
|
|
|
|
error_log('end: ' . $end->format('Y-m-d'));
|
|
|
|
error_log('count: ' . number_format(count($period_reversed)) . ' days');
|
|
|
|
|
|
|
|
cache_init();
|
|
|
|
ingest_all($period_reversed, $source_map);
|
2018-06-21 10:25:31 -05:00
|
|
|
aggregate_all(array_reverse($period_reversed));
|
2018-06-14 22:46:20 -05:00
|
|
|
|
|
|
|
|
|
|
|
function cache_init()
|
|
|
|
{
|
|
|
|
global $CACHE_DIR;
|
|
|
|
if (!file_exists($CACHE_DIR)) {
|
|
|
|
foreach (PROTOCOLS as $protocol) {
|
|
|
|
mkdir("$CACHE_DIR/$protocol", 0755, true);
|
|
|
|
}
|
2024-02-21 11:11:26 +01:00
|
|
|
mkdir("$CACHE_DIR/ipv4+6", 0755, true);
|
2018-06-14 22:46:20 -05:00
|
|
|
|
|
|
|
// Avoid packaging mess while still automating, but not ideal.
|
2024-01-26 10:22:41 +01:00
|
|
|
passthru('cd ' . escapeshellarg($CACHE_DIR) .
|
|
|
|
' && composer require influxdata/influxdb-client-php:~3.4 guzzlehttp/guzzle');
|
2018-06-14 22:46:20 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
require "$CACHE_DIR/vendor/autoload.php";
|
|
|
|
}
|
|
|
|
|
|
|
|
function ingest_all($period_reversed, $source_map)
|
|
|
|
{
|
|
|
|
global $CACHE_DIR;
|
|
|
|
$source = [];
|
|
|
|
$found = [];
|
|
|
|
// Walk backwards until found in cache.
|
|
|
|
foreach ($period_reversed as $date) {
|
2024-01-26 10:22:41 +01:00
|
|
|
$date_string = print_date($date);
|
|
|
|
$protocols_on_day = get_protocols($date);
|
2018-06-14 22:46:20 -05:00
|
|
|
|
2024-01-26 10:22:41 +01:00
|
|
|
foreach ($protocols_on_day as $protocol) {
|
2018-06-14 22:46:20 -05:00
|
|
|
if (!empty($found[$protocol])) continue;
|
|
|
|
if (isset($source_map[$protocol][$date_string]))
|
|
|
|
$source[$protocol] = $source_map[$protocol][$date_string];
|
|
|
|
|
|
|
|
// Skip date+protocol if no source is available.
|
|
|
|
if (empty($source[$protocol])) continue;
|
|
|
|
|
2024-01-26 10:22:41 +01:00
|
|
|
$cache_file = get_cache_file($protocol, $date);
|
2018-06-14 22:46:20 -05:00
|
|
|
if (file_exists($cache_file)) {
|
|
|
|
error_log("[$date_string] [$protocol] found");
|
|
|
|
$found[$protocol] = true;
|
|
|
|
} else {
|
|
|
|
error_log("[$date_string] [$protocol] ingest");
|
|
|
|
ingest($date, $source[$protocol], $source_map[$protocol]['filename'], $cache_file);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-01-26 10:22:41 +01:00
|
|
|
// Stop when all cache files were found
|
|
|
|
if (count($found) == count($protocols_on_day)) {
|
2018-06-14 22:46:20 -05:00
|
|
|
error_log('ingest initialization complete');
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Wait for all ingest processes to complete before proceeding.
|
|
|
|
subprocess_wait(1, 1);
|
|
|
|
}
|
|
|
|
|
2024-01-26 10:22:41 +01:00
|
|
|
function print_date($date)
|
|
|
|
{
|
|
|
|
return $date->format('Y-m-d');
|
|
|
|
}
|
|
|
|
|
|
|
|
// Logs before migration date have been kept in separate files for IPv4 and IPv6 addresses
|
|
|
|
function has_separate_protocol_logs($date)
|
|
|
|
{
|
|
|
|
global $migration_date;
|
|
|
|
if ($date > $migration_date)
|
|
|
|
return false;
|
|
|
|
else
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
function get_cache_file($protocol, $date)
|
|
|
|
{
|
|
|
|
global $CACHE_DIR;
|
|
|
|
if (has_separate_protocol_logs($date))
|
|
|
|
return "$CACHE_DIR/$protocol/" . print_date($date) . ".json";
|
|
|
|
else
|
|
|
|
return "$CACHE_DIR/ipv4+6/" . print_date($date) . ".json";
|
|
|
|
}
|
|
|
|
|
|
|
|
function get_cache_files($date)
|
|
|
|
{
|
|
|
|
$files = [];
|
|
|
|
foreach (get_protocols($date) as $protocol)
|
|
|
|
array_push($files, get_cache_file($protocol, $date));
|
|
|
|
|
|
|
|
return $files;
|
|
|
|
}
|
|
|
|
|
|
|
|
function get_protocols($date)
|
|
|
|
{
|
|
|
|
if (has_separate_protocol_logs($date))
|
|
|
|
return PROTOCOLS;
|
|
|
|
else
|
|
|
|
return array("ipv4+6");
|
|
|
|
}
|
|
|
|
|
2018-06-14 22:46:20 -05:00
|
|
|
function ingest($date, $source, $filename, $destination)
|
|
|
|
{
|
|
|
|
$url = implode('/', [
|
|
|
|
$source,
|
|
|
|
$date->format('Y'),
|
|
|
|
$date->format('m'),
|
|
|
|
sprintf($filename, $date->format('Ymd')),
|
|
|
|
]);
|
|
|
|
$command = implode(' ', [
|
2024-01-26 10:22:41 +01:00
|
|
|
'curl -s --digest --netrc',
|
2018-06-14 22:46:20 -05:00
|
|
|
escapeshellarg($url),
|
|
|
|
'| xzcat',
|
|
|
|
'| ' . __DIR__ . '/ingest.php',
|
|
|
|
'> ' . escapeshellarg($destination),
|
|
|
|
'&',
|
|
|
|
]);
|
|
|
|
error_log($command);
|
|
|
|
passthru_block($command);
|
|
|
|
}
|
|
|
|
|
|
|
|
function passthru_block($command)
|
|
|
|
{
|
|
|
|
static $cpu_count = null;
|
|
|
|
|
|
|
|
if (!$cpu_count) {
|
|
|
|
$cpuinfo = file_get_contents('/proc/cpuinfo');
|
|
|
|
preg_match_all('/^processor/m', $cpuinfo, $matches);
|
|
|
|
$cpu_count = max(count($matches[0]), 1);
|
|
|
|
error_log("detected $cpu_count cores");
|
|
|
|
}
|
|
|
|
|
|
|
|
$group_size = substr_count($command, '|') + 1;
|
|
|
|
subprocess_wait($group_size, $cpu_count);
|
|
|
|
|
|
|
|
passthru($command, $exit_code);
|
|
|
|
if ($exit_code != 0) {
|
|
|
|
error_log('failed to start process');
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
function subprocess_wait($group_size, $cap)
|
|
|
|
{
|
|
|
|
while (subprocess_count() / $group_size >= $cap) {
|
|
|
|
usleep(250000);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
function subprocess_count()
|
|
|
|
{
|
|
|
|
return substr_count(shell_exec('pgrep -g ' . getmypid()), "\n") - 1;
|
|
|
|
}
|
|
|
|
|
2018-06-21 10:25:31 -05:00
|
|
|
function aggregate_all($period)
|
2018-06-14 22:46:20 -05:00
|
|
|
{
|
|
|
|
global $CACHE_DIR;
|
2023-09-28 16:18:28 +02:00
|
|
|
$intervals = ['day' => 'Y-m-d', 'week' => 'Y-W', 'month' => 'Y-m', 'FQ' => null];
|
2018-06-14 22:46:20 -05:00
|
|
|
$merged = [];
|
|
|
|
$merged_protocol = [];
|
|
|
|
$date_previous = null;
|
2018-06-21 10:25:31 -05:00
|
|
|
foreach ($period as $date) {
|
2024-01-26 10:22:41 +01:00
|
|
|
$date_string = print_date($date);
|
2018-06-14 22:46:20 -05:00
|
|
|
|
|
|
|
$data = null;
|
|
|
|
foreach (PROTOCOLS as $protocol) {
|
2024-01-26 10:22:41 +01:00
|
|
|
$cache_file = get_cache_file($protocol, $date);
|
2021-11-18 13:37:56 +01:00
|
|
|
if (!file_exists($cache_file) or !filesize($cache_file)) continue;
|
2018-06-14 22:46:20 -05:00
|
|
|
|
2024-01-26 10:22:41 +01:00
|
|
|
error_log("[$date_string]" . (has_separate_protocol_logs($date) ? " [$protocol]" : "") . " load cache");
|
2018-06-14 22:46:20 -05:00
|
|
|
$data_new = json_decode(file_get_contents($cache_file), true);
|
|
|
|
if (!$data_new) {
|
|
|
|
error_log('ERROR: failed to load ' . $cache_file);
|
|
|
|
unlink($cache_file); // Trigger it to be re-ingested next run.
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
2024-01-26 10:22:41 +01:00
|
|
|
if (isset($data_new[$protocol])) {
|
|
|
|
// new cache files have 'ipv4' and 'ipv6' array keys
|
|
|
|
$data_protocol = $data_new[$protocol];
|
|
|
|
// we don't want to count 'total_invalid' and 'bytes' twice
|
|
|
|
if ($data) {
|
|
|
|
$data_protocol['total_invalid'] = 0;
|
|
|
|
$data_protocol['bytes'] = 0;
|
|
|
|
} else {
|
|
|
|
$data_protocol['total_invalid'] = $data_new['total_invalid'];
|
|
|
|
$data_protocol['bytes'] = $data_new['bytes'];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
$data_protocol = $data_new;
|
2018-06-14 22:46:20 -05:00
|
|
|
if (!isset($merged_protocol[$protocol])) $merged_protocol[$protocol] = [];
|
2024-01-26 10:22:41 +01:00
|
|
|
$data_protocol['days'] = 1;
|
|
|
|
normalize($data_protocol);
|
|
|
|
aggregate($intervals, $merged_protocol[$protocol], $date, $date_previous, $data_protocol,
|
2018-06-14 22:46:20 -05:00
|
|
|
['protocol' => $protocol], 'protocol');
|
|
|
|
|
|
|
|
if ($data) {
|
2024-01-26 10:22:41 +01:00
|
|
|
merge($data, $data_protocol);
|
2018-06-14 22:46:20 -05:00
|
|
|
$data['days'] = 1;
|
|
|
|
} else {
|
2024-01-26 10:22:41 +01:00
|
|
|
$data = $data_protocol;
|
2018-06-14 22:46:20 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!$data) {
|
|
|
|
error_log("[$date_string] skipping due to lack of data");
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
aggregate($intervals, $merged, $date, $date_previous, $data);
|
|
|
|
|
|
|
|
$date_previous = $date;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Write out any remaining data by simulating a date beyond all intervals.
|
2023-05-08 16:08:08 +02:00
|
|
|
/*error_log('write remaining data');
|
2018-06-14 22:46:20 -05:00
|
|
|
$date = clone $date;
|
2018-06-21 10:25:31 -05:00
|
|
|
$date->add(date_interval_create_from_date_string('1 year'));
|
2018-06-14 22:46:20 -05:00
|
|
|
|
|
|
|
foreach (PROTOCOLS as $protocol) {
|
|
|
|
aggregate($intervals, $merged_protocol[$protocol], $date, $date_previous, null,
|
|
|
|
['protocol' => $protocol], 'protocol');
|
|
|
|
}
|
2023-05-08 16:08:08 +02:00
|
|
|
aggregate($intervals, $merged, $date, $date_previous, null);*/
|
2018-06-14 22:46:20 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
function aggregate($intervals, &$merged, $date, $date_previous, $data, $tags = [], $prefix = 'access')
|
|
|
|
{
|
|
|
|
foreach ($intervals as $interval => $format) {
|
2024-01-26 10:22:41 +01:00
|
|
|
if ($interval === 'FQ') {
|
2022-03-29 14:45:38 +02:00
|
|
|
$value = format_FQ($date);
|
2022-04-08 16:33:53 +02:00
|
|
|
if (isset($date_previous))
|
|
|
|
$value_previous = format_FQ($date_previous);
|
|
|
|
}
|
2024-01-26 10:22:41 +01:00
|
|
|
elseif ($interval === 'FY') {
|
2022-03-29 14:45:38 +02:00
|
|
|
$value = format_FY($date);
|
2022-04-08 16:33:53 +02:00
|
|
|
if (isset($date_previous))
|
|
|
|
$value_previous = format_FY($date_previous);
|
|
|
|
}
|
|
|
|
else {
|
2022-03-29 14:45:38 +02:00
|
|
|
$value = $date->format($format);
|
2022-04-08 16:33:53 +02:00
|
|
|
if (isset($date_previous))
|
|
|
|
$value_previous = $date_previous->format($format);
|
|
|
|
}
|
2018-06-14 22:46:20 -05:00
|
|
|
if (!isset($merged[$interval]) || $value != $merged[$interval]['value']) {
|
|
|
|
if (!empty($merged[$interval]['data'])) {
|
|
|
|
$summary = summarize($merged[$interval]['data']);
|
2024-01-26 10:22:41 +01:00
|
|
|
if ($prefix === 'protocol') {
|
2018-06-14 22:46:20 -05:00
|
|
|
$summary = ['-' => $summary['-']];
|
|
|
|
}
|
2023-04-26 13:51:47 +02:00
|
|
|
$flavors = [];
|
2023-04-25 13:56:41 +02:00
|
|
|
foreach ($summary as $product => $details) {
|
|
|
|
if (isset($details['flavors'])) {
|
|
|
|
$flavors[$product] = $details['flavors'];
|
|
|
|
unset($summary[$product]['flavors']);
|
|
|
|
}
|
|
|
|
}
|
2018-06-14 22:46:20 -05:00
|
|
|
|
2022-04-08 16:33:53 +02:00
|
|
|
if (isset($value_previous) and $value != $value_previous) {
|
|
|
|
$count = write_summary($interval, $date_previous, $summary, $tags, $prefix);
|
2023-04-25 13:56:41 +02:00
|
|
|
if (isset($flavors)) {
|
|
|
|
$count += write_flavors($interval, $date_previous, $flavors);
|
|
|
|
}
|
2018-06-14 22:46:20 -05:00
|
|
|
|
2024-01-26 10:22:41 +01:00
|
|
|
if ($prefix === 'access') {
|
2022-04-08 16:33:53 +02:00
|
|
|
$summary = summarize_product_plus_key($merged[$interval]['data']['total_image_product']);
|
|
|
|
$count += write_summary_product_plus_key($interval, $date_previous, $summary, 'image');
|
|
|
|
}
|
2018-09-14 15:53:30 -05:00
|
|
|
|
2022-04-08 16:33:53 +02:00
|
|
|
error_log("[$prefix] [$interval] [{$merged[$interval]['value']}] wrote $count points at " .
|
|
|
|
$date_previous->format('Y-m-d') . " spanning " . $merged[$interval]['data']['days'] . ' day(s)');
|
|
|
|
}
|
2018-06-14 22:46:20 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
// Reset merge data to current data.
|
|
|
|
$merged[$interval] = [
|
|
|
|
'data' => $data,
|
|
|
|
'value' => $value,
|
|
|
|
];
|
|
|
|
}
|
|
|
|
// Merge day onto existing data for interval. A more complex approach of
|
|
|
|
// merging higher order intervals is overly complex due to weeks.
|
|
|
|
else
|
|
|
|
merge($merged[$interval]['data'], $data);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-03-29 14:45:38 +02:00
|
|
|
function format_FQ($date)
|
|
|
|
{
|
|
|
|
$financial_date = clone $date;
|
|
|
|
date_add($financial_date, date_interval_create_from_date_string('2 months'));
|
|
|
|
$quarter = ceil($financial_date->format('n')/3);
|
|
|
|
|
|
|
|
return $financial_date->format('Y') . '-' . $quarter;
|
|
|
|
}
|
|
|
|
|
|
|
|
function format_FY($date)
|
|
|
|
{
|
|
|
|
$financial_date = clone $date;
|
|
|
|
date_add($financial_date, date_interval_create_from_date_string('2 months'));
|
|
|
|
|
|
|
|
return $financial_date->format('Y');
|
|
|
|
}
|
|
|
|
|
2018-09-14 15:53:30 -05:00
|
|
|
function normalize(&$data)
|
|
|
|
{
|
|
|
|
// Ensure fields added later, that are not present in all data, are available.
|
|
|
|
if (!isset($data['total_image_product'])) {
|
|
|
|
$data['total_image_product'] = [];
|
|
|
|
}
|
2023-05-04 14:03:50 +02:00
|
|
|
$first_product = reset($data['unique_product']);
|
|
|
|
$first_key = reset($first_product);
|
|
|
|
if (is_int($first_key)) {
|
2023-05-02 16:33:16 +02:00
|
|
|
foreach ($data['unique_product'] as $product => $pairs) {
|
|
|
|
foreach ($pairs as $key => $count) {
|
|
|
|
$data['unique_product'][$product][$key] = ['count' => $count];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2018-09-14 15:53:30 -05:00
|
|
|
}
|
|
|
|
|
2018-06-14 22:46:20 -05:00
|
|
|
function merge(&$data1, $data2)
|
|
|
|
{
|
|
|
|
$data1['days'] += $data2['days'];
|
|
|
|
$data1['total'] += $data2['total'];
|
|
|
|
foreach ($data2['total_product'] as $product => $total) {
|
|
|
|
if (empty($data1['total_product'][$product]))
|
|
|
|
$data1['total_product'][$product] = 0;
|
|
|
|
|
2024-01-26 10:22:41 +01:00
|
|
|
$data1['total_product'][$product] += $total;
|
2018-06-14 22:46:20 -05:00
|
|
|
}
|
|
|
|
|
2023-05-02 16:33:16 +02:00
|
|
|
merge_unique_products($data1['unique_product'], $data2['unique_product']);
|
2018-09-14 15:53:30 -05:00
|
|
|
merge_product_plus_key($data1['total_image_product'], $data2['total_image_product']);
|
2018-06-14 22:46:20 -05:00
|
|
|
|
2018-09-14 15:52:05 -05:00
|
|
|
$data1['total_invalid'] += $data2['total_invalid'];
|
|
|
|
$data1['bytes'] += $data2['bytes'];
|
|
|
|
}
|
|
|
|
|
|
|
|
function merge_product_plus_key(&$data1, $data2)
|
|
|
|
{
|
|
|
|
foreach ($data2 as $product => $pairs) {
|
|
|
|
if (empty($data1[$product]))
|
|
|
|
$data1[$product] = [];
|
2018-06-14 22:46:20 -05:00
|
|
|
|
2018-09-14 15:52:05 -05:00
|
|
|
foreach ($pairs as $key => $value) {
|
|
|
|
if (empty($data1[$product][$key]))
|
|
|
|
$data1[$product][$key] = 0;
|
|
|
|
|
|
|
|
$data1[$product][$key] += $data2[$product][$key];
|
2018-06-14 22:46:20 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-05-02 16:33:16 +02:00
|
|
|
function merge_unique_products(&$data1, $data2)
|
|
|
|
{
|
|
|
|
foreach ($data2 as $product => $arrays) {
|
2023-05-04 14:03:50 +02:00
|
|
|
if (empty($data1[$product]))
|
2023-05-02 16:33:16 +02:00
|
|
|
$data1[$product] = [];
|
|
|
|
|
|
|
|
foreach ($arrays as $key => $array) {
|
2023-05-04 14:03:50 +02:00
|
|
|
if (empty($data1[$product][$key]))
|
2023-05-02 16:33:16 +02:00
|
|
|
$data1[$product][$key] = ['count' => 0];
|
|
|
|
|
2023-05-04 14:03:50 +02:00
|
|
|
$data1[$product][$key]['count'] += $array['count'];
|
2023-05-02 16:33:16 +02:00
|
|
|
if (isset($array['flavor'])) $data1[$product][$key]['flavor'] = $array['flavor'];
|
|
|
|
if (isset($array['ip'])) $data1[$product][$key]['ip'] = $array['ip'];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-06-14 22:46:20 -05:00
|
|
|
function summarize($data)
|
|
|
|
{
|
2018-06-21 10:25:31 -05:00
|
|
|
static $products = [];
|
|
|
|
|
2018-06-14 22:46:20 -05:00
|
|
|
$summary = [];
|
|
|
|
|
|
|
|
$summary['-'] = [
|
|
|
|
'total' => $data['total'],
|
|
|
|
'total_invalid' => $data['total_invalid'],
|
|
|
|
'bytes' => $data['bytes'],
|
|
|
|
'unique' => 0,
|
|
|
|
];
|
|
|
|
|
|
|
|
foreach ($data['total_product'] as $product => $total) {
|
|
|
|
if (!product_filter($product)) continue;
|
|
|
|
$summary_product = [
|
|
|
|
'total' => $total,
|
|
|
|
];
|
|
|
|
if (isset($data['unique_product'][$product])) {
|
|
|
|
$unique_product = $data['unique_product'][$product];
|
2023-05-02 16:33:16 +02:00
|
|
|
$summary_product += [ 'unique' => count($unique_product) ];
|
2018-06-14 22:46:20 -05:00
|
|
|
// A UUID should be unique to a product, as such this should provide an
|
|
|
|
// accurate count of total unique across all products.
|
|
|
|
$summary['-']['unique'] += $summary_product['unique'];
|
2023-05-04 14:03:50 +02:00
|
|
|
$first_key = reset($data['unique_product'][$product]);
|
|
|
|
if (isset($first_key['flavor'])) {
|
2023-05-02 16:33:16 +02:00
|
|
|
$unique_flavors = array_column($data['unique_product'][$product], 'flavor');
|
|
|
|
$flavors = array_unique($unique_flavors);
|
2023-04-25 13:56:41 +02:00
|
|
|
$summary_product['flavors'] = [];
|
2023-04-04 18:59:33 +02:00
|
|
|
foreach ($flavors as $flavor) {
|
2023-04-25 13:56:41 +02:00
|
|
|
$summary_product['flavors'][$flavor] = count(array_keys($unique_flavors, $flavor));
|
2023-04-04 18:59:33 +02:00
|
|
|
}
|
|
|
|
}
|
2018-06-21 10:25:31 -05:00
|
|
|
} else {
|
2023-05-02 16:33:16 +02:00
|
|
|
$summary_product += [ 'unique' => 0 ];
|
2018-06-14 22:46:20 -05:00
|
|
|
}
|
|
|
|
$summary[$product] = $summary_product;
|
2018-06-21 10:25:31 -05:00
|
|
|
|
|
|
|
// Keep track of which products have been included in previous summary.
|
|
|
|
if (!isset($products[$product])) $products[$product] = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Fill empty data with zeros to achieve appropriate result in graph.
|
|
|
|
$missing = array_diff(array_keys($products), array_keys($summary));
|
|
|
|
foreach ($missing as $product) {
|
|
|
|
$summary[$product] = [
|
|
|
|
'total' => 0,
|
|
|
|
'unique' => 0,
|
|
|
|
];
|
2018-06-14 22:46:20 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
return $summary;
|
|
|
|
}
|
|
|
|
|
2018-09-14 15:53:30 -05:00
|
|
|
function summarize_product_plus_key($data)
|
|
|
|
{
|
|
|
|
static $keys = [];
|
|
|
|
|
|
|
|
$summary = [];
|
|
|
|
$products = array_merge(array_keys($keys), array_keys($data));
|
|
|
|
foreach ($products as $product) {
|
|
|
|
if (!product_filter($product)) continue;
|
|
|
|
|
|
|
|
$keys_keys = isset($keys[$product]) ? array_keys($keys[$product]) : [];
|
|
|
|
$data_keys = isset($data[$product]) ? array_keys($data[$product]) : [];
|
|
|
|
$product_keys = array_merge($keys_keys, $data_keys);
|
|
|
|
|
2018-09-17 17:37:35 -05:00
|
|
|
if (!isset($keys[$product])) $keys[$product] = [];
|
2018-09-14 15:53:30 -05:00
|
|
|
$summary[$product] = [];
|
|
|
|
foreach ($product_keys as $key) {
|
|
|
|
// Fill empty data with zeros to achieve appropriate result in graph.
|
2018-09-17 17:37:35 -05:00
|
|
|
$keys[$product][$key] = true;
|
2018-09-14 15:53:30 -05:00
|
|
|
$summary[$product][$key] = isset($data[$product][$key]) ? $data[$product][$key] : 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return $summary;
|
|
|
|
}
|
|
|
|
|
2020-08-26 11:09:13 +02:00
|
|
|
function product_filter($product)
|
|
|
|
{
|
|
|
|
return (bool) preg_match(PRODUCT_PATTERN, $product);
|
|
|
|
}
|
|
|
|
|
2018-06-14 22:46:20 -05:00
|
|
|
function date_period_reversed($begin, $interval, $end)
|
|
|
|
{
|
|
|
|
$interval = DateInterval::createFromDateString($interval);
|
|
|
|
$period = new DatePeriod($begin, $interval, $end);
|
|
|
|
return array_reverse(iterator_to_array($period));
|
|
|
|
}
|
|
|
|
|
|
|
|
function write_summary($interval, DateTime $value, $summary, $tags = [], $prefix = 'access')
|
|
|
|
{
|
|
|
|
$measurement = $prefix . '_' . $interval;
|
|
|
|
$points = [];
|
|
|
|
foreach ($summary as $product => $fields) {
|
2024-01-26 10:22:41 +01:00
|
|
|
$points[] = new Point($measurement, ['product' => $product] + $tags, $fields, $value->getTimestamp());
|
2018-06-14 22:46:20 -05:00
|
|
|
}
|
|
|
|
write($points);
|
|
|
|
return count($points);
|
|
|
|
}
|
|
|
|
|
2023-04-25 13:56:41 +02:00
|
|
|
function write_flavors($interval, DateTime $value, $flavors)
|
|
|
|
{
|
|
|
|
$measurement = 'access_' . $interval;
|
|
|
|
$points = [];
|
|
|
|
foreach ($flavors as $product => $unique_flavors) {
|
|
|
|
foreach($unique_flavors as $flavor => $unique_count) {
|
|
|
|
$tags = ['product' => $product, 'flavor' => $flavor];
|
2024-01-26 10:22:41 +01:00
|
|
|
$fields = ['value' => $unique_count];
|
|
|
|
$points[] = new Point($measurement, $tags, $fields, $value->getTimestamp());
|
2023-04-25 13:56:41 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
write($points);
|
|
|
|
return count($points);
|
|
|
|
}
|
|
|
|
|
2018-09-14 15:53:30 -05:00
|
|
|
function write_summary_product_plus_key($interval, DateTime $date, $summary, $prefix)
|
|
|
|
{
|
|
|
|
$measurement = $prefix . '_' . $interval;
|
|
|
|
$points = [];
|
|
|
|
foreach ($summary as $product => $pairs) {
|
|
|
|
foreach ($pairs as $key => $value) {
|
2024-01-26 10:22:41 +01:00
|
|
|
$points[] = new Point($measurement,
|
2018-09-14 15:53:30 -05:00
|
|
|
['product' => $product, 'key' => $key], ['value' => $value], $date->getTimestamp());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
write($points);
|
|
|
|
return count($points);
|
|
|
|
}
|
|
|
|
|
2018-06-14 22:46:20 -05:00
|
|
|
function write($points)
|
|
|
|
{
|
2024-01-26 10:22:41 +01:00
|
|
|
static $client;
|
|
|
|
static $writeApi;
|
|
|
|
|
|
|
|
if (!$client) {
|
|
|
|
$client = new Client([
|
|
|
|
"url" => "http://localhost:8086",
|
|
|
|
"token" => "",
|
|
|
|
"bucket" => "osrt_access/autogen",
|
|
|
|
"org" => "-",
|
|
|
|
"precision" => InfluxDB2\Model\WritePrecision::S
|
|
|
|
]);
|
|
|
|
$writeApi = $client->createWriteApi();
|
2018-06-14 22:46:20 -05:00
|
|
|
}
|
|
|
|
|
2024-01-26 10:22:41 +01:00
|
|
|
if (!is_null($writeApi->write($points)))
|
|
|
|
die('failed to write points');
|
2018-06-14 22:46:20 -05:00
|
|
|
}
|