82 lines
2.9 KiB
PHP
Raw Normal View History

2018-06-14 22:45:58 -05:00
#!/usr/bin/php
<?php
2023-05-02 16:33:16 +02:00
const REGEX_LINE = '/(\S+) \S+ \S+ \[([^:]+:\d+:\d+:\d+ [^\]]+)\] "(\S+)(?: (\S+) \S+)?" (\S+) (\S+) "[^"]*" "[^"]*" .* (?:size:|want:- give:- \d+ )(\S+) \S+(?: +"?(\S+-\S+-\S+-\S+-[^\s"]+|-)"? "?(dvd|ftp|mini|usb-[^"]*|livecd-[^"]*|appliance-?[^"]*|-)"?)?/';
2018-06-14 22:45:58 -05:00
const REGEX_PRODUCT = '#/(?:(tumbleweed)|distribution/(?:leap/)?(\d+\.\d+)|openSUSE(?:_|:/)(?:leap(?:_|:/))?(factory|tumbleweed|\d+\.\d+))#i';
const REGEX_IMAGE = '#(?:/(?:iso|live)/[^/]+-(DVD|NET|GNOME-Live|KDE-Live|Rescue-CD|Kubic-DVD)-[^/]+\.iso(?:\.torrent)?|/jeos/[^/]+-(JeOS)\.[^/]+\.(?:qcow2|vhdx|vmdk|vmx)$)#';
2018-06-14 22:45:58 -05:00
$total = 0;
$total_invalid = 0;
$total_product = [];
$unique_product = [];
$total_image_product = [];
2018-06-14 22:45:58 -05:00
$file = $argc == 2 ? $argv[1] : 'php://stdin';
$handle = fopen($file, 'r');
while (($line = fgets($handle)) !== false) {
if (!preg_match(REGEX_LINE, $line, $match)) {
error_log('[failed to parse] ' . rtrim($line));
$total_invalid++;
continue;
}
// Only interested in GET or HEAD requests, others are invalid.
if ($match[3] != 'GET' && $match[3] != 'HEAD') continue;
2023-03-29 15:51:42 +02:00
// Not interested on errors.
if ($match[5] >= '400') continue;
2018-06-14 22:45:58 -05:00
$total++;
// Attempt to determine for which product was the request.
2023-05-02 16:33:16 +02:00
if (!preg_match(REGEX_PRODUCT, $match[4], $match_product)) {
2018-06-14 22:45:58 -05:00
continue;
}
// Remove empty match groups and select non-all match.
$values = array_filter($match_product);
$product = str_replace('factory', 'tumbleweed', strtolower(next($values)));
if (!isset($total_product[$product])) $total_product[$product] = 0;
2018-06-14 22:45:58 -05:00
$total_product[$product] += 1;
2023-05-02 16:33:16 +02:00
if (count($match) == 10 && $match[8] != '-') {
$uuid = $match[8];
2018-06-14 22:45:58 -05:00
if (!isset($unique_product[$product])) $unique_product[$product] = [];
2023-05-02 16:33:16 +02:00
if (!isset($unique_product[$product][$uuid])) {
$unique_product[$product][$uuid] = [
'count' => 0,
'flavor' => $match[9],
'ip' => $match[1],
];
2023-04-18 16:35:44 +02:00
}
2023-05-02 16:33:16 +02:00
$unique_product[$product][$uuid]['count'] += 1;
2018-06-14 22:45:58 -05:00
}
2023-05-02 16:33:16 +02:00
if (preg_match(REGEX_IMAGE, $match[4], $match_image)) {
// Remove empty match groups and select non-all match.
$values = array_filter($match_image);
$image = next($values);
if (!isset($total_image_product[$product])) $total_image_product[$product] = [];
if (!isset($total_image_product[$product][$image])) $total_image_product[$product][$image] = 0;
$total_image_product[$product][$image] += 1;
}
2018-06-14 22:45:58 -05:00
}
$position = ftell($handle);
fclose($handle);
error_log('processed ' . number_format($position) . ' bytes');
error_log('found ' . number_format($total) . ' requests across ' .
number_format(count($total_product)) . ' products');
ksort($total_product);
ksort($unique_product);
2021-11-18 13:14:56 +01:00
if ($position) {
echo json_encode([
'total' => $total,
'total_product' => $total_product,
'unique_product' => $unique_product,
'total_image_product' => $total_image_product,
'total_invalid' => $total_invalid,
'bytes' => $position,
]) . "\n"; // JSON_PRETTY_PRINT for debugging.
}