Merge pull request #1578 from jberry-suse/metrics-access
Metrics access log parsing tool for ingesting download.o.o logs.
This commit is contained in:
commit
d41cbda37e
2
Makefile
2
Makefile
@ -22,6 +22,8 @@ install:
|
|||||||
$(DESTDIR)$(pkgdatadir)/osc-staging.py
|
$(DESTDIR)$(pkgdatadir)/osc-staging.py
|
||||||
for i in $(pkgdata_BINS); do ln -s $(pkgdatadir)/$$i $(DESTDIR)$(bindir)/osrt-$${i%.*}; done
|
for i in $(pkgdata_BINS); do ln -s $(pkgdatadir)/$$i $(DESTDIR)$(bindir)/osrt-$${i%.*}; done
|
||||||
install -m 755 script/* $(DESTDIR)$(bindir)
|
install -m 755 script/* $(DESTDIR)$(bindir)
|
||||||
|
ln -s $(pkgdatadir)/metrics/access/aggregate.php $(DESTDIR)$(bindir)/osrt-metrics-access-aggregate
|
||||||
|
ln -s $(pkgdatadir)/metrics/access/ingest.php $(DESTDIR)$(bindir)/osrt-metrics-access-ingest
|
||||||
cp -R config/* $(DESTDIR)$(sysconfdir)/$(package_name)
|
cp -R config/* $(DESTDIR)$(sysconfdir)/$(package_name)
|
||||||
for dir in dashboards datasources ; do ln -s $(pkgdatadir)/metrics/grafana/provisioning/$$dir.yaml \
|
for dir in dashboards datasources ; do ln -s $(pkgdatadir)/metrics/grafana/provisioning/$$dir.yaml \
|
||||||
$(DESTDIR)$(grafana_provisioning_dir)/$$dir/$(package_name).yaml ; done
|
$(DESTDIR)$(grafana_provisioning_dir)/$$dir/$(package_name).yaml ; done
|
||||||
|
27
dist/package/openSUSE-release-tools.spec
vendored
27
dist/package/openSUSE-release-tools.spec
vendored
@ -167,6 +167,24 @@ Suggests: influxdb
|
|||||||
%description metrics
|
%description metrics
|
||||||
Ingest relevant OBS and annotation data to generate insightful metrics.
|
Ingest relevant OBS and annotation data to generate insightful metrics.
|
||||||
|
|
||||||
|
%package metrics-access
|
||||||
|
Summary: Ingest access logs to generate metrics
|
||||||
|
Group: Development/Tools/Other
|
||||||
|
BuildArch: noarch
|
||||||
|
# Used to stream log files.
|
||||||
|
Requires: curl
|
||||||
|
Requires: %{name}-metrics = %{version}
|
||||||
|
Requires: php > 7
|
||||||
|
# Used to install influxdb/influxdb-php.
|
||||||
|
Requires: php-composer
|
||||||
|
# pgrep used in aggregate.php
|
||||||
|
Requires: procps
|
||||||
|
# xzcat for decompressing log files.
|
||||||
|
Requires: xz
|
||||||
|
|
||||||
|
%description metrics-access
|
||||||
|
Ingest download.o.o Apache access logs and generate metrics.
|
||||||
|
|
||||||
%package repo-checker
|
%package repo-checker
|
||||||
Summary: Repository checker service
|
Summary: Repository checker service
|
||||||
Group: Development/Tools/Other
|
Group: Development/Tools/Other
|
||||||
@ -474,6 +492,7 @@ fi
|
|||||||
%defattr(-,root,root,-)
|
%defattr(-,root,root,-)
|
||||||
%{_bindir}/osrt-metrics
|
%{_bindir}/osrt-metrics
|
||||||
%{_datadir}/%{source_dir}/metrics
|
%{_datadir}/%{source_dir}/metrics
|
||||||
|
%exclude %{_datadir}/%{source_dir}/metrics/access
|
||||||
%{_datadir}/%{source_dir}/metrics.py
|
%{_datadir}/%{source_dir}/metrics.py
|
||||||
%{_datadir}/%{source_dir}/metrics_release.py
|
%{_datadir}/%{source_dir}/metrics_release.py
|
||||||
# To avoid adding grafana as BuildRequires since it does not live in same repo.
|
# To avoid adding grafana as BuildRequires since it does not live in same repo.
|
||||||
@ -488,6 +507,14 @@ fi
|
|||||||
%{_unitdir}/osrt-metrics-release@.service
|
%{_unitdir}/osrt-metrics-release@.service
|
||||||
%{_unitdir}/osrt-metrics-release@.timer
|
%{_unitdir}/osrt-metrics-release@.timer
|
||||||
|
|
||||||
|
%files metrics-access
|
||||||
|
%defattr(-,root,root,-)
|
||||||
|
%{_bindir}/osrt-metrics-access-aggregate
|
||||||
|
%{_bindir}/osrt-metrics-access-ingest
|
||||||
|
%{_datadir}/%{source_dir}/metrics/access
|
||||||
|
%{_unitdir}/osrt-metrics-access.service
|
||||||
|
%{_unitdir}/osrt-metrics-access.timer
|
||||||
|
|
||||||
%files repo-checker
|
%files repo-checker
|
||||||
%defattr(-,root,root,-)
|
%defattr(-,root,root,-)
|
||||||
%{_bindir}/osrt-repo_checker
|
%{_bindir}/osrt-repo_checker
|
||||||
|
48
metrics/access/README.md
Normal file
48
metrics/access/README.md
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
# Metrics Access
|
||||||
|
|
||||||
|
Ingest [download.o.o](http://download.opensuse.org/) Apache access logs and generate metrics.
|
||||||
|
|
||||||
|
The basic flow:
|
||||||
|
- stream log file
|
||||||
|
- decompress
|
||||||
|
- run through `ingest.php` which creates summary _json_ files
|
||||||
|
|
||||||
|
This process runs multiple log ingests concurrently and will wait for all sub-processes to complete.
|
||||||
|
|
||||||
|
The cached data is then:
|
||||||
|
- loaded
|
||||||
|
- aggregated by intervals (day, week, month)
|
||||||
|
- summarized
|
||||||
|
- written to influxdb
|
||||||
|
|
||||||
|
A separate, minimal set of aggregation done for each IP protocol data.
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
- `aggregate.php`: invoke to manage entire process including calling (`ingest.php`)
|
||||||
|
- `ingest.php`: used to parse a single log file / stream and dump summary JSON to stdout
|
||||||
|
|
||||||
|
See `~/.cache/openSUSE-release-tools-access` for cache data separated by IP protocol. A single JSON file corresponds to a single access log file.
|
||||||
|
|
||||||
|
## Future product versions
|
||||||
|
|
||||||
|
All `openSUSE` style product versions are parsed via `ingest.php` and included in summary JSON files. Any request path to either the main product repositories or any respository seemingly built against a product is included. There are many bogus products found on OBS like `openSUSE_Leap_42.22222` and such which are filtered out during the aggregation step. This allows for the products included in the final output to be independent of the parse-time determination. By filtering valid products last, new product patterns may be added after access to those products has begun and been parsed.
|
||||||
|
|
||||||
|
- See `REGEX_PRODUCT` in `ingest.php` for the generalized product path detection.
|
||||||
|
- See `PRODUCT_PATTERN` in `aggregate.php` for the final product filter (note only version number is included).
|
||||||
|
|
||||||
|
A possible improvement would be to automate the update of this pattern based on information in OBS.
|
||||||
|
|
||||||
|
A product specific annotation may be added to the Grafana dashboard by duplicating the query used for the other products assuming a schedule was added to the `metrics/annotation` directory.
|
||||||
|
|
||||||
|
## Factory vs Tumbleweed
|
||||||
|
|
||||||
|
Since many repositories that build against Tumbleweed are still named `openSUSE_Factory` and the transition between the names was not done automatically it is not fully possible to determine which "product" was the target. As such all `Factory` and `Tumbleweed` names are merged and counted under `Tumbleweed` including main repository access. This could be extended to show some sort of conversion from `Factory` to `Tumbleweed`, but the primary goal was to show total users.
|
||||||
|
|
||||||
|
## Considerations
|
||||||
|
|
||||||
|
Given the archival log data is located on a different network from the active data the tool must be run from a machine with access to both or in two steps. Once the summary data has been generated access to the original log files is no longer necessary.
|
||||||
|
|
||||||
|
Existing tools (like _telegraf_) were evaluated, but found to be far too slow to process the greater than `20TB` of raw access log data. _PHP_ was selected since it runs around an order of magnitude faster than python to simply open log file and run a "startswith" on against each line. Adding additional logic sees the performance gap widen significantly. After all is said and done ~500,000 entries/second is acheived on each core of development laptop. There is no comparison to the less than 1,000 entries/second processed by _telegraf_.
|
||||||
|
|
||||||
|
The original run on `tortuga.suse.de`, using 7 cores, took roughly _23 hours_ to process `22TB` of data into `12GB` of summary data. This data takes up less than `6MB` in _influxdb_ once aggregated.
|
333
metrics/access/aggregate.php
Executable file
333
metrics/access/aggregate.php
Executable file
@ -0,0 +1,333 @@
|
|||||||
|
#!/usr/bin/php
|
||||||
|
<?php
|
||||||
|
|
||||||
|
use InfluxDB\Point;
|
||||||
|
use InfluxDB\Database;
|
||||||
|
|
||||||
|
$CACHE_DIR = $_SERVER['HOME'] . '/.cache/openSUSE-release-tools-access';
|
||||||
|
const PROTOCOLS = ['ipv4', 'ipv6'];
|
||||||
|
const PONTIFEX = 'http://downloadlogs.infra.opensuse.org';
|
||||||
|
const LANGLEY = 'http://langley.suse.de/pub/pontifex%s-opensuse.suse.de';
|
||||||
|
const VHOST = 'download.opensuse.org';
|
||||||
|
const FILENAME = 'download.opensuse.org-%s-access_log.xz';
|
||||||
|
const IPV6_PREFIX = 'ipv6.';
|
||||||
|
const PRODUCT_PATTERN = '/^(10\.[2-3]|11\.[0-4]|12\.[1-3]|13\.[1-2]|42\.[1-3]|15\.[0]|tumbleweed)$/';
|
||||||
|
|
||||||
|
$begin = new DateTime();
|
||||||
|
$source_map = [
|
||||||
|
'ipv4' => [
|
||||||
|
'2010-01-03' => false,
|
||||||
|
'2014-04-14' => sprintf(LANGLEY, 2) . '/' . VHOST,
|
||||||
|
'2017-12-04' => sprintf(LANGLEY, 3) . '/' . VHOST,
|
||||||
|
// 2017-12-05 has bad permissions on langley and is still on origin.
|
||||||
|
$begin->format('Y-m-d') => sprintf(PONTIFEX) . '/' . VHOST,
|
||||||
|
'filename' => FILENAME,
|
||||||
|
],
|
||||||
|
'ipv6' => [
|
||||||
|
'2012-12-31' => false,
|
||||||
|
'2017-12-04' => sprintf(LANGLEY, 3) . '/' . IPV6_PREFIX . VHOST,
|
||||||
|
$begin->format('Y-m-d') => sprintf(PONTIFEX) . '/' . IPV6_PREFIX . VHOST,
|
||||||
|
'filename' => IPV6_PREFIX . FILENAME,
|
||||||
|
],
|
||||||
|
];
|
||||||
|
$end = new DateTime(key($source_map['ipv4'])); // decide about adding one day
|
||||||
|
$period_reversed = date_period_reversed($end, '1 day', $begin);
|
||||||
|
|
||||||
|
error_log('begin: ' . $begin->format('Y-m-d'));
|
||||||
|
error_log('end: ' . $end->format('Y-m-d'));
|
||||||
|
error_log('count: ' . number_format(count($period_reversed)) . ' days');
|
||||||
|
|
||||||
|
cache_init();
|
||||||
|
ingest_all($period_reversed, $source_map);
|
||||||
|
aggregate_all($period_reversed);
|
||||||
|
|
||||||
|
|
||||||
|
function cache_init()
|
||||||
|
{
|
||||||
|
global $CACHE_DIR;
|
||||||
|
if (!file_exists($CACHE_DIR)) {
|
||||||
|
foreach (PROTOCOLS as $protocol) {
|
||||||
|
mkdir("$CACHE_DIR/$protocol", 0755, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Avoid packaging mess while still automating, but not ideal.
|
||||||
|
passthru('cd ' . escapeshellarg($CACHE_DIR) . ' && composer require influxdb/influxdb-php ~1');
|
||||||
|
}
|
||||||
|
|
||||||
|
require "$CACHE_DIR/vendor/autoload.php";
|
||||||
|
}
|
||||||
|
|
||||||
|
function ingest_all($period_reversed, $source_map)
|
||||||
|
{
|
||||||
|
global $CACHE_DIR;
|
||||||
|
$source = [];
|
||||||
|
$found = [];
|
||||||
|
// Walk backwards until found in cache.
|
||||||
|
foreach ($period_reversed as $date) {
|
||||||
|
$date_string = $date->format('Y-m-d');
|
||||||
|
|
||||||
|
foreach (PROTOCOLS as $protocol) {
|
||||||
|
if (!empty($found[$protocol])) continue;
|
||||||
|
if (isset($source_map[$protocol][$date_string]))
|
||||||
|
$source[$protocol] = $source_map[$protocol][$date_string];
|
||||||
|
|
||||||
|
// Skip date+protocol if no source is available.
|
||||||
|
if (empty($source[$protocol])) continue;
|
||||||
|
|
||||||
|
$cache_file = "$CACHE_DIR/$protocol/$date_string.json";
|
||||||
|
if (file_exists($cache_file)) {
|
||||||
|
error_log("[$date_string] [$protocol] found");
|
||||||
|
$found[$protocol] = true;
|
||||||
|
} else {
|
||||||
|
error_log("[$date_string] [$protocol] ingest");
|
||||||
|
ingest($date, $source[$protocol], $source_map[$protocol]['filename'], $cache_file);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (count($found) == count(PROTOCOLS)) {
|
||||||
|
error_log('ingest initialization complete');
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for all ingest processes to complete before proceeding.
|
||||||
|
subprocess_wait(1, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
function ingest($date, $source, $filename, $destination)
|
||||||
|
{
|
||||||
|
$url = implode('/', [
|
||||||
|
$source,
|
||||||
|
$date->format('Y'),
|
||||||
|
$date->format('m'),
|
||||||
|
sprintf($filename, $date->format('Ymd')),
|
||||||
|
]);
|
||||||
|
$command = implode(' ', [
|
||||||
|
'curl -s',
|
||||||
|
escapeshellarg($url),
|
||||||
|
'| xzcat',
|
||||||
|
'| ' . __DIR__ . '/ingest.php',
|
||||||
|
'> ' . escapeshellarg($destination),
|
||||||
|
'&',
|
||||||
|
]);
|
||||||
|
error_log($command);
|
||||||
|
passthru_block($command);
|
||||||
|
}
|
||||||
|
|
||||||
|
function passthru_block($command)
|
||||||
|
{
|
||||||
|
static $cpu_count = null;
|
||||||
|
|
||||||
|
if (!$cpu_count) {
|
||||||
|
$cpuinfo = file_get_contents('/proc/cpuinfo');
|
||||||
|
preg_match_all('/^processor/m', $cpuinfo, $matches);
|
||||||
|
$cpu_count = max(count($matches[0]), 1);
|
||||||
|
error_log("detected $cpu_count cores");
|
||||||
|
}
|
||||||
|
|
||||||
|
$group_size = substr_count($command, '|') + 1;
|
||||||
|
subprocess_wait($group_size, $cpu_count);
|
||||||
|
|
||||||
|
passthru($command, $exit_code);
|
||||||
|
if ($exit_code != 0) {
|
||||||
|
error_log('failed to start process');
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function subprocess_wait($group_size, $cap)
|
||||||
|
{
|
||||||
|
while (subprocess_count() / $group_size >= $cap) {
|
||||||
|
usleep(250000);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function subprocess_count()
|
||||||
|
{
|
||||||
|
return substr_count(shell_exec('pgrep -g ' . getmypid()), "\n") - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
function aggregate_all($period_reversed)
|
||||||
|
{
|
||||||
|
global $CACHE_DIR;
|
||||||
|
$intervals = ['day' => 'Y-m-d', 'week' => 'Y-W', 'month' => 'Y-m'];
|
||||||
|
$merged = [];
|
||||||
|
$merged_protocol = [];
|
||||||
|
$date_previous = null;
|
||||||
|
foreach ($period_reversed as $date) {
|
||||||
|
$date_string = $date->format('Y-m-d');
|
||||||
|
|
||||||
|
$data = null;
|
||||||
|
foreach (PROTOCOLS as $protocol) {
|
||||||
|
$cache_file = "$CACHE_DIR/$protocol/$date_string.json";
|
||||||
|
if (!file_exists($cache_file)) continue;
|
||||||
|
|
||||||
|
error_log("[$date_string] [$protocol] load cache");
|
||||||
|
$data_new = json_decode(file_get_contents($cache_file), true);
|
||||||
|
if (!$data_new) {
|
||||||
|
error_log('ERROR: failed to load ' . $cache_file);
|
||||||
|
unlink($cache_file); // Trigger it to be re-ingested next run.
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!isset($merged_protocol[$protocol])) $merged_protocol[$protocol] = [];
|
||||||
|
$data_new['days'] = 1;
|
||||||
|
aggregate($intervals, $merged_protocol[$protocol], $date, $date_previous, $data_new,
|
||||||
|
['protocol' => $protocol], 'protocol');
|
||||||
|
|
||||||
|
if ($data) {
|
||||||
|
merge($data, $data_new);
|
||||||
|
$data['days'] = 1;
|
||||||
|
} else {
|
||||||
|
$data = $data_new;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!$data) {
|
||||||
|
error_log("[$date_string] skipping due to lack of data");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
aggregate($intervals, $merged, $date, $date_previous, $data);
|
||||||
|
|
||||||
|
$date_previous = $date;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write out any remaining data by simulating a date beyond all intervals.
|
||||||
|
error_log('write remaining data');
|
||||||
|
$date = clone $date;
|
||||||
|
$date->sub(date_interval_create_from_date_string('1 year'));
|
||||||
|
|
||||||
|
foreach (PROTOCOLS as $protocol) {
|
||||||
|
aggregate($intervals, $merged_protocol[$protocol], $date, $date_previous, null,
|
||||||
|
['protocol' => $protocol], 'protocol');
|
||||||
|
}
|
||||||
|
aggregate($intervals, $merged, $date, $date_previous, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
function aggregate($intervals, &$merged, $date, $date_previous, $data, $tags = [], $prefix = 'access')
|
||||||
|
{
|
||||||
|
foreach ($intervals as $interval => $format) {
|
||||||
|
$value = $date->format($format);
|
||||||
|
if (!isset($merged[$interval]) || $value != $merged[$interval]['value']) {
|
||||||
|
if (!empty($merged[$interval]['data'])) {
|
||||||
|
$summary = summarize($merged[$interval]['data']);
|
||||||
|
if ($prefix == 'protocol') {
|
||||||
|
$summary = ['-' => $summary['-']];
|
||||||
|
}
|
||||||
|
|
||||||
|
$count = write_summary($interval, $date_previous, $summary, $tags, $prefix);
|
||||||
|
|
||||||
|
error_log("[$prefix] [$interval] [{$merged[$interval]['value']}] wrote $count points at " .
|
||||||
|
$date_previous->format('Y-m-d') . " spanning " . $merged[$interval]['data']['days'] . ' day(s)');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset merge data to current data.
|
||||||
|
$merged[$interval] = [
|
||||||
|
'data' => $data,
|
||||||
|
'value' => $value,
|
||||||
|
];
|
||||||
|
}
|
||||||
|
// Merge day onto existing data for interval. A more complex approach of
|
||||||
|
// merging higher order intervals is overly complex due to weeks.
|
||||||
|
else
|
||||||
|
merge($merged[$interval]['data'], $data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function merge(&$data1, $data2)
|
||||||
|
{
|
||||||
|
$data1['days'] += $data2['days'];
|
||||||
|
$data1['total'] += $data2['total'];
|
||||||
|
foreach ($data2['total_product'] as $product => $total) {
|
||||||
|
if (empty($data1['total_product'][$product]))
|
||||||
|
$data1['total_product'][$product] = 0;
|
||||||
|
|
||||||
|
$data1['total_product'][$product] += $data2['total_product'][$product];
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach ($data2['unique_product'] as $product => $unqiue) {
|
||||||
|
if (empty($data1['unique_product'][$product]))
|
||||||
|
$data1['unique_product'][$product] = [];
|
||||||
|
|
||||||
|
foreach ($unqiue as $uuid => $count) {
|
||||||
|
if (empty($data1['unique_product'][$product][$uuid]))
|
||||||
|
$data1['unique_product'][$product][$uuid] = 0;
|
||||||
|
|
||||||
|
$data1['unique_product'][$product][$uuid] += $data2['unique_product'][$product][$uuid];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$data1['total_invalid'] += $data2['total_invalid'];
|
||||||
|
$data1['bytes'] += $data2['bytes'];
|
||||||
|
}
|
||||||
|
|
||||||
|
function summarize($data)
|
||||||
|
{
|
||||||
|
$summary = [];
|
||||||
|
|
||||||
|
$summary['-'] = [
|
||||||
|
'total' => $data['total'],
|
||||||
|
'total_invalid' => $data['total_invalid'],
|
||||||
|
'bytes' => $data['bytes'],
|
||||||
|
'unique' => 0,
|
||||||
|
];
|
||||||
|
|
||||||
|
foreach ($data['total_product'] as $product => $total) {
|
||||||
|
if (!product_filter($product)) continue;
|
||||||
|
$summary_product = [
|
||||||
|
'total' => $total,
|
||||||
|
];
|
||||||
|
if (isset($data['unique_product'][$product])) {
|
||||||
|
$unique_product = $data['unique_product'][$product];
|
||||||
|
$summary_product += [
|
||||||
|
'unique' => count($unique_product),
|
||||||
|
'unqiue_average' => (float) (array_sum($unique_product) / count($unique_product)),
|
||||||
|
'unqiue_max' => max($unique_product),
|
||||||
|
];
|
||||||
|
// A UUID should be unique to a product, as such this should provide an
|
||||||
|
// accurate count of total unique across all products.
|
||||||
|
$summary['-']['unique'] += $summary_product['unique'];
|
||||||
|
}
|
||||||
|
$summary[$product] = $summary_product;
|
||||||
|
}
|
||||||
|
|
||||||
|
return $summary;
|
||||||
|
}
|
||||||
|
|
||||||
|
function product_filter($product)
|
||||||
|
{
|
||||||
|
return (bool) preg_match(PRODUCT_PATTERN, $product);
|
||||||
|
}
|
||||||
|
|
||||||
|
function date_period_reversed($begin, $interval, $end)
|
||||||
|
{
|
||||||
|
$interval = DateInterval::createFromDateString($interval);
|
||||||
|
$period = new DatePeriod($begin, $interval, $end);
|
||||||
|
return array_reverse(iterator_to_array($period));
|
||||||
|
}
|
||||||
|
|
||||||
|
function write_summary($interval, DateTime $value, $summary, $tags = [], $prefix = 'access')
|
||||||
|
{
|
||||||
|
$measurement = $prefix . '_' . $interval;
|
||||||
|
$points = [];
|
||||||
|
foreach ($summary as $product => $fields) {
|
||||||
|
$points[] = new Point($measurement, null,
|
||||||
|
['product' => $product] + $tags, $fields, $value->getTimestamp());
|
||||||
|
}
|
||||||
|
write($points);
|
||||||
|
return count($points);
|
||||||
|
}
|
||||||
|
|
||||||
|
function write($points)
|
||||||
|
{
|
||||||
|
static $database = null;
|
||||||
|
|
||||||
|
if (!$database) {
|
||||||
|
$database = InfluxDB\Client::fromDSN('influxdb://0.0.0.0:8086/osrt_access');
|
||||||
|
$database->drop();
|
||||||
|
$database->create();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!$database->writePoints($points, Database::PRECISION_SECONDS)) die('failed to write points');
|
||||||
|
}
|
59
metrics/access/ingest.php
Executable file
59
metrics/access/ingest.php
Executable file
@ -0,0 +1,59 @@
|
|||||||
|
#!/usr/bin/php
|
||||||
|
<?php
|
||||||
|
|
||||||
|
const REGEX_LINE = '/\S+ \S+ \S+ \[([^:]+:\d+:\d+:\d+ [^\]]+)\] "(\S+)(?: (\S+) \S+)?" (\S+) (\S+) "[^"]*" "[^"]*" .* size:(\S+) \S+(?: "?(\S+-\S+-\S+-\S+-\S+|-)"? "?(dvd|ftp|-)"?)?/';
|
||||||
|
const REGEX_PRODUCT = '#/(?:(tumbleweed)|distribution/(?:leap/)?(\d+\.\d+)|openSUSE(?:_|:/)(?:leap(?:_|:/))?(factory|tumbleweed|\d+\.\d+))#i';
|
||||||
|
|
||||||
|
$total = 0;
|
||||||
|
$total_invalid = 0;
|
||||||
|
$total_product = [];
|
||||||
|
$unique_product = [];
|
||||||
|
|
||||||
|
$file = $argc == 2 ? $argv[1] : 'php://stdin';
|
||||||
|
$handle = fopen($file, 'r');
|
||||||
|
while (($line = fgets($handle)) !== false) {
|
||||||
|
if (!preg_match(REGEX_LINE, $line, $match)) {
|
||||||
|
error_log('[failed to parse] ' . rtrim($line));
|
||||||
|
$total_invalid++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only interested in GET or HEAD requests, others are invalid.
|
||||||
|
if ($match[2] != 'GET' && $match[2] != 'HEAD') continue;
|
||||||
|
$total++;
|
||||||
|
|
||||||
|
// Attempt to determine for which product was the request.
|
||||||
|
if (!preg_match(REGEX_PRODUCT, $match[3], $match_product)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove empty match groups and select non-all match.
|
||||||
|
$values = array_filter($match_product);
|
||||||
|
$product = str_replace('factory', 'tumbleweed', strtolower(next($values)));
|
||||||
|
|
||||||
|
if (!isset($total_product[$product])) $total_product[$product] = 0;
|
||||||
|
$total_product[$product] += 1;
|
||||||
|
|
||||||
|
if (count($match) == 9 && $match[7] != '-') {
|
||||||
|
$uuid = $match[7];
|
||||||
|
if (!isset($unique_product[$product])) $unique_product[$product] = [];
|
||||||
|
if (!isset($unique_product[$product][$uuid])) $unique_product[$product][$uuid] = 0;
|
||||||
|
$unique_product[$product][$uuid] += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$position = ftell($handle);
|
||||||
|
fclose($handle);
|
||||||
|
|
||||||
|
error_log('processed ' . number_format($position) . ' bytes');
|
||||||
|
error_log('found ' . number_format($total) . ' requests across ' .
|
||||||
|
number_format(count($total_product)) . ' products');
|
||||||
|
|
||||||
|
ksort($total_product);
|
||||||
|
ksort($unique_product);
|
||||||
|
echo json_encode([
|
||||||
|
'total' => $total,
|
||||||
|
'total_product' => $total_product,
|
||||||
|
'unique_product' => $unique_product,
|
||||||
|
'total_invalid' => $total_invalid,
|
||||||
|
'bytes' => $position,
|
||||||
|
]) . "\n"; // JSON_PRETTY_PRINT for debugging.
|
1972
metrics/grafana/access.json
Normal file
1972
metrics/grafana/access.json
Normal file
File diff suppressed because it is too large
Load Diff
@ -36,3 +36,9 @@ datasources:
|
|||||||
url: http://localhost:8086
|
url: http://localhost:8086
|
||||||
access: proxy
|
access: proxy
|
||||||
database: openSUSE:13.2
|
database: openSUSE:13.2
|
||||||
|
|
||||||
|
- name: osrt_access
|
||||||
|
type: influxdb
|
||||||
|
url: http://localhost:8086
|
||||||
|
access: proxy
|
||||||
|
database: osrt_access
|
||||||
|
11
systemd/osrt-metrics-access.service
Normal file
11
systemd/osrt-metrics-access.service
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=openSUSE Release Tools: metrics - access logs
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
User=osrt-metrics
|
||||||
|
SyslogIdentifier=osrt-metrics
|
||||||
|
ExecStart=/usr/bin/osrt-metrics-access-aggregate
|
||||||
|
RuntimeMaxSec=120 hour
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
11
systemd/osrt-metrics-access.timer
Normal file
11
systemd/osrt-metrics-access.timer
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=openSUSE Release Tools: metrics - access logs
|
||||||
|
|
||||||
|
[Timer]
|
||||||
|
OnBootSec=120
|
||||||
|
# Allow for log rotation to take place on download server.
|
||||||
|
OnCalendar=*-*-* 4:00:00
|
||||||
|
Unit=osrt-metrics-access.service
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=timers.target
|
Loading…
x
Reference in New Issue
Block a user