Optimize searching access logs
* Use binary search for parsing accessed packages * Write packages statistics to InfluxDB
This commit is contained in:
parent
adc3750f2b
commit
9d4a45e576
@ -1,6 +1,8 @@
|
|||||||
#!/usr/bin/php
|
#!/usr/bin/php
|
||||||
<?php
|
<?php
|
||||||
|
|
||||||
|
include 'utils.php';
|
||||||
|
|
||||||
use InfluxDB\Point;
|
use InfluxDB\Point;
|
||||||
use InfluxDB\Database;
|
use InfluxDB\Database;
|
||||||
|
|
||||||
@ -11,7 +13,6 @@ const LANGLEY = 'http://langley.suse.de/pub/pontifex%s-opensuse.suse.de';
|
|||||||
const VHOST = 'download.opensuse.org';
|
const VHOST = 'download.opensuse.org';
|
||||||
const FILENAME = 'download.opensuse.org-%s-access_log.xz';
|
const FILENAME = 'download.opensuse.org-%s-access_log.xz';
|
||||||
const IPV6_PREFIX = 'ipv6.';
|
const IPV6_PREFIX = 'ipv6.';
|
||||||
const PRODUCT_PATTERN = '/^(10\.[2-3]|11\.[0-4]|12\.[1-3]|13\.[1-2]|42\.[1-3]|15\.[0-1]|tumbleweed)$/';
|
|
||||||
|
|
||||||
$begin = new DateTime();
|
$begin = new DateTime();
|
||||||
// Skip the current day since the logs are incomplete and not compressed yet.
|
// Skip the current day since the logs are incomplete and not compressed yet.
|
||||||
@ -224,6 +225,9 @@ function aggregate($intervals, &$merged, $date, $date_previous, $data, $tags = [
|
|||||||
if ($prefix == 'access') {
|
if ($prefix == 'access') {
|
||||||
$summary = summarize_product_plus_key($merged[$interval]['data']['total_image_product']);
|
$summary = summarize_product_plus_key($merged[$interval]['data']['total_image_product']);
|
||||||
$count += write_summary_product_plus_key($interval, $date_previous, $summary, 'image');
|
$count += write_summary_product_plus_key($interval, $date_previous, $summary, 'image');
|
||||||
|
|
||||||
|
$summary = summarize_product_plus_key($merged[$interval]['data']['total_package_product']);
|
||||||
|
$count += write_summary_product_plus_key($interval, $date_previous, $summary, 'package');
|
||||||
}
|
}
|
||||||
|
|
||||||
error_log("[$prefix] [$interval] [{$merged[$interval]['value']}] wrote $count points at " .
|
error_log("[$prefix] [$interval] [{$merged[$interval]['value']}] wrote $count points at " .
|
||||||
@ -364,11 +368,6 @@ function summarize_product_plus_key($data)
|
|||||||
return $summary;
|
return $summary;
|
||||||
}
|
}
|
||||||
|
|
||||||
function product_filter($product)
|
|
||||||
{
|
|
||||||
return (bool) preg_match(PRODUCT_PATTERN, $product);
|
|
||||||
}
|
|
||||||
|
|
||||||
function date_period_reversed($begin, $interval, $end)
|
function date_period_reversed($begin, $interval, $end)
|
||||||
{
|
{
|
||||||
$interval = DateInterval::createFromDateString($interval);
|
$interval = DateInterval::createFromDateString($interval);
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
#!/usr/bin/php
|
#!/usr/bin/php
|
||||||
<?php
|
<?php
|
||||||
|
|
||||||
|
include 'utils.php';
|
||||||
|
|
||||||
const REGEX_LINE = '/\S+ \S+ \S+ \[([^:]+:\d+:\d+:\d+ [^\]]+)\] "(\S+)(?: (\S+) \S+)?" (\S+) (\S+) "[^"]*" "[^"]*" .* size:(\S+) \S+(?: +"?(\S+-\S+-\S+-\S+-[^\s"]+|-)"? "?(dvd|ftp|-)"?)?/';
|
const REGEX_LINE = '/\S+ \S+ \S+ \[([^:]+:\d+:\d+:\d+ [^\]]+)\] "(\S+)(?: (\S+) \S+)?" (\S+) (\S+) "[^"]*" "[^"]*" .* size:(\S+) \S+(?: +"?(\S+-\S+-\S+-\S+-[^\s"]+|-)"? "?(dvd|ftp|-)"?)?/';
|
||||||
const REGEX_PRODUCT = '#/(?:(tumbleweed)|distribution/(?:leap/)?(\d+\.\d+)|openSUSE(?:_|:/)(?:leap(?:_|:/))?(factory|tumbleweed|\d+\.\d+))#i';
|
const REGEX_PRODUCT = '#/(?:(tumbleweed)|distribution/(?:leap/)?(\d+\.\d+)|openSUSE(?:_|:/)(?:leap(?:_|:/))?(factory|tumbleweed|\d+\.\d+))#i';
|
||||||
const REGEX_IMAGE = '#(?:/(?:iso|live)/[^/]+-(DVD|NET|GNOME-Live|KDE-Live|Rescue-CD|Kubic-DVD)-[^/]+\.iso(?:\.torrent)?|/jeos/[^/]+-(JeOS)\.[^/]+\.(?:qcow2|vhdx|vmdk|vmx)$)#';
|
const REGEX_IMAGE = '#(?:/(?:iso|live)/[^/]+-(DVD|NET|GNOME-Live|KDE-Live|Rescue-CD|Kubic-DVD)-[^/]+\.iso(?:\.torrent)?|/jeos/[^/]+-(JeOS)\.[^/]+\.(?:qcow2|vhdx|vmdk|vmx)$)#';
|
||||||
@ -12,23 +14,68 @@ $total_product = [];
|
|||||||
$unique_product = [];
|
$unique_product = [];
|
||||||
$total_image_product = [];
|
$total_image_product = [];
|
||||||
$total_package_product = [];
|
$total_package_product = [];
|
||||||
|
$fallback_packages = get_packages_list('tumbleweed');
|
||||||
|
|
||||||
$packages_file = '15.2_packages';
|
function exception_error_handler($severity, $message, $file, $line) {
|
||||||
$packages = file($packages_file, FILE_IGNORE_NEW_LINES);
|
if (!(error_reporting() & $severity)) {
|
||||||
$packages = array_map('ltrim', $packages);
|
// This error code is not included in error_reporting
|
||||||
rsort($packages);
|
return;
|
||||||
|
}
|
||||||
|
throw new ErrorException($message, 0, $severity, $file, $line);
|
||||||
|
}
|
||||||
|
set_error_handler("exception_error_handler");
|
||||||
|
|
||||||
|
function get_packages_list($product) {
|
||||||
|
$packages_file = "packages/" . $product;
|
||||||
|
try {
|
||||||
|
$packages = file($packages_file, FILE_IGNORE_NEW_LINES);
|
||||||
|
} catch (ErrorException $e) {
|
||||||
|
echo 'Has not found packages file for ', $product, ". Using fallback.\n";
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
$packages = array_map('trim', $packages);
|
||||||
|
sort($packages);
|
||||||
|
return $packages;
|
||||||
|
}
|
||||||
|
|
||||||
// Find a substring at the beginning of a string from an array of substrings
|
// Find a substring at the beginning of a string from an array of substrings
|
||||||
// $substrings - array of possible substrings (needles)
|
// $substrings - array of possible substrings (needles)
|
||||||
// $string - examined string (haystack)
|
// $string - examined string (haystack)
|
||||||
// Returns the first match
|
// Returns the first match
|
||||||
function find_substring($substrings, $string) {
|
function find_substring($substrings, $string) {
|
||||||
foreach ($substrings as $sub) {
|
$result_index = binary_string_search($substrings, 0, count($substrings) - 1, $string);
|
||||||
if(stripos($string, $sub) === 0) {
|
if ($result_index >= 0)
|
||||||
return $sub;
|
return check_next_element($substrings, $string, $result_index, $substrings[$result_index]);
|
||||||
}
|
else
|
||||||
}
|
return NULL;
|
||||||
return NULL;
|
}
|
||||||
|
|
||||||
|
function check_next_element($substrings, $string, $index, $match) {
|
||||||
|
if (stripos($string, $substrings[$index + 1]) === 0)
|
||||||
|
return check_next_element($substrings, $string, $index + 1, $substrings[$index + 1]);
|
||||||
|
|
||||||
|
elseif (stripos($substrings[$index + 1], $match) === 0 &&
|
||||||
|
strncmp($substrings[$index + 1], $string, strlen($string)) < 0)
|
||||||
|
return check_next_element($substrings, $string, $index + 1, $match);
|
||||||
|
|
||||||
|
else
|
||||||
|
return $match;
|
||||||
|
}
|
||||||
|
|
||||||
|
function binary_string_search($haystack, $start, $end, $needle) {
|
||||||
|
if ($end < $start)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
$mid_index = floor(($end + $start)/2);
|
||||||
|
$comparison = strncmp($haystack[$mid_index], $needle, strlen($haystack[$mid_index]));
|
||||||
|
if ($comparison == 0)
|
||||||
|
return $mid_index;
|
||||||
|
|
||||||
|
elseif ($comparison > 0)
|
||||||
|
return binary_string_search($haystack, $start, $mid_index - 1, $needle);
|
||||||
|
|
||||||
|
else
|
||||||
|
return binary_string_search($haystack, $mid_index + 1, $end, $needle);
|
||||||
}
|
}
|
||||||
|
|
||||||
$file = $argc == 2 ? $argv[1] : 'php://stdin';
|
$file = $argc == 2 ? $argv[1] : 'php://stdin';
|
||||||
@ -42,6 +89,8 @@ while (($line = fgets($handle)) !== false) {
|
|||||||
|
|
||||||
// Only interested in GET or HEAD requests, others are invalid.
|
// Only interested in GET or HEAD requests, others are invalid.
|
||||||
if ($match[2] != 'GET' && $match[2] != 'HEAD') continue;
|
if ($match[2] != 'GET' && $match[2] != 'HEAD') continue;
|
||||||
|
// Not interested on errors.
|
||||||
|
if ($match[4] >= '400') continue;
|
||||||
$total++;
|
$total++;
|
||||||
|
|
||||||
// Attempt to determine for which product was the request.
|
// Attempt to determine for which product was the request.
|
||||||
@ -53,11 +102,19 @@ while (($line = fgets($handle)) !== false) {
|
|||||||
$values = array_filter($match_product);
|
$values = array_filter($match_product);
|
||||||
$product = str_replace('factory', 'tumbleweed', strtolower(next($values)));
|
$product = str_replace('factory', 'tumbleweed', strtolower(next($values)));
|
||||||
|
|
||||||
if (!isset($total_product[$product])) $total_product[$product] = 0;
|
if (!isset($total_product[$product])) {
|
||||||
|
$total_product[$product] = 0;
|
||||||
|
if (product_filter($product)) {
|
||||||
|
$packages[$product] = get_packages_list($product);
|
||||||
|
if (is_null($packages[$product])) {
|
||||||
|
$packages[$product] = &$fallback_packages;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
$total_product[$product] += 1;
|
$total_product[$product] += 1;
|
||||||
|
|
||||||
if (preg_match(REGEX_RPM_NAME, $match[3], $match_rpm_name)) {
|
if (product_filter($product) && preg_match(REGEX_RPM_NAME, $match[3], $match_rpm_name)) {
|
||||||
$package = find_substring($packages, $match_rpm_name[1]);
|
$package = find_substring($packages[$product], $match_rpm_name[1]);
|
||||||
if ($package) {
|
if ($package) {
|
||||||
if (!isset($total_package_product[$product])) $total_package_product[$product] = [];
|
if (!isset($total_package_product[$product])) $total_package_product[$product] = [];
|
||||||
if (!isset($total_package_product[$product][$package])) $total_package_product[$product][$package] = 0;
|
if (!isset($total_package_product[$product][$package])) $total_package_product[$product][$package] = 0;
|
||||||
|
5431
metrics/access/packages/10.2
Normal file
5431
metrics/access/packages/10.2
Normal file
File diff suppressed because it is too large
Load Diff
6770
metrics/access/packages/10.3
Normal file
6770
metrics/access/packages/10.3
Normal file
File diff suppressed because it is too large
Load Diff
7360
metrics/access/packages/11.0
Normal file
7360
metrics/access/packages/11.0
Normal file
File diff suppressed because it is too large
Load Diff
13500
metrics/access/packages/11.1
Normal file
13500
metrics/access/packages/11.1
Normal file
File diff suppressed because it is too large
Load Diff
15069
metrics/access/packages/11.2
Normal file
15069
metrics/access/packages/11.2
Normal file
File diff suppressed because it is too large
Load Diff
15573
metrics/access/packages/11.3
Normal file
15573
metrics/access/packages/11.3
Normal file
File diff suppressed because it is too large
Load Diff
17559
metrics/access/packages/11.4
Normal file
17559
metrics/access/packages/11.4
Normal file
File diff suppressed because it is too large
Load Diff
20157
metrics/access/packages/12.1
Normal file
20157
metrics/access/packages/12.1
Normal file
File diff suppressed because it is too large
Load Diff
23596
metrics/access/packages/12.2
Normal file
23596
metrics/access/packages/12.2
Normal file
File diff suppressed because it is too large
Load Diff
32531
metrics/access/packages/12.3
Normal file
32531
metrics/access/packages/12.3
Normal file
File diff suppressed because it is too large
Load Diff
35175
metrics/access/packages/13.1
Normal file
35175
metrics/access/packages/13.1
Normal file
File diff suppressed because it is too large
Load Diff
38377
metrics/access/packages/13.2
Normal file
38377
metrics/access/packages/13.2
Normal file
File diff suppressed because it is too large
Load Diff
53189
metrics/access/packages/15.0
Normal file
53189
metrics/access/packages/15.0
Normal file
File diff suppressed because it is too large
Load Diff
53396
metrics/access/packages/15.1
Normal file
53396
metrics/access/packages/15.1
Normal file
File diff suppressed because it is too large
Load Diff
58047
metrics/access/packages/15.2
Normal file
58047
metrics/access/packages/15.2
Normal file
File diff suppressed because it is too large
Load Diff
37901
metrics/access/packages/42.1
Normal file
37901
metrics/access/packages/42.1
Normal file
File diff suppressed because it is too large
Load Diff
42681
metrics/access/packages/42.2
Normal file
42681
metrics/access/packages/42.2
Normal file
File diff suppressed because it is too large
Load Diff
49369
metrics/access/packages/42.3
Normal file
49369
metrics/access/packages/42.3
Normal file
File diff suppressed because it is too large
Load Diff
60213
metrics/access/packages/tumbleweed
Normal file
60213
metrics/access/packages/tumbleweed
Normal file
File diff suppressed because it is too large
Load Diff
8
metrics/access/utils.php
Normal file
8
metrics/access/utils.php
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
const PRODUCT_PATTERN = '/^(10\.[2-3]|11\.[0-4]|12\.[1-3]|13\.[1-2]|42\.[1-3]|15\.[0-1]|tumbleweed)$/';
|
||||||
|
|
||||||
|
function product_filter($product)
|
||||||
|
{
|
||||||
|
return (bool) preg_match(PRODUCT_PATTERN, $product);
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user