From 12cebb1f325e7ef4adf338c8f2c81665ad48ec14f72e4170911fb70d89d067fc Mon Sep 17 00:00:00 2001 From: OBS User autobuild Date: Mon, 16 Nov 2009 17:01:52 +0000 Subject: [PATCH] Accepting request 24435 from Base:System Copy from Base:System/mcelog based on submit request 24435 from user msmeissn OBS-URL: https://build.opensuse.org/request/show/24435 OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/mcelog?expand=0&rev=6 --- mcelog-0.7-newcpus-1.diff | 1022 ------------------------------------- mcelog-0.7.tar.gz | 3 - mcelog-0.9pre.tar.bz2 | 3 + mcelog-thermal.diff | 52 -- mcelog.changes | 7 + mcelog.spec | 54 +- 6 files changed, 14 insertions(+), 1127 deletions(-) delete mode 100644 mcelog-0.7-newcpus-1.diff delete mode 100644 mcelog-0.7.tar.gz create mode 100644 mcelog-0.9pre.tar.bz2 delete mode 100644 mcelog-thermal.diff diff --git a/mcelog-0.7-newcpus-1.diff b/mcelog-0.7-newcpus-1.diff deleted file mode 100644 index 07ac914..0000000 --- a/mcelog-0.7-newcpus-1.diff +++ /dev/null @@ -1,1022 +0,0 @@ -From: Andi Kleen -Subject: mcelog decoding support for Intel Tigerton - -Backport of the changes for Tigerton/Dunnington/Nehalem changes from mcelog git -git://git.kernel.org/pub/scm/utils/cpu/mce/mcelog.git - -The Tigerton support required adding Core2 support, they are all -lumped together. I also added "P6OLD" because that was in the mainline -mcelog git changes and would have been difficult to separate. -The differences to core2 are very minimal (just a few different events). -The actual decoder is all table driven. - -In the original git this was done as individual changes, but I lumped -it all together in the backport. - -While it adds quite a lot of new code there's not many changes to generic -code. Most of the new code is only used on the new CPUs. - -diff -x '*~' -urpN mcelog-0.7/bitfield.c mcelog-0.7-newcpus//bitfield.c ---- mcelog-0.7/bitfield.c 1970-01-01 01:00:00.000000000 +0100 -+++ mcelog-0.7-newcpus//bitfield.c 2008-09-26 20:28:29.000000000 +0200 -@@ -0,0 +1,61 @@ -+#include -+#include -+#include "mcelog.h" -+#include "bitfield.h" -+ -+char *reserved_3bits[8]; -+char *reserved_1bit[2]; -+char *reserved_2bits[4]; -+ -+static u64 bitmask(u64 i) -+{ -+ u64 mask = 1; -+ while (mask < i) -+ mask = (mask << 1) | 1; -+ return mask; -+} -+ -+void decode_bitfield(u64 status, struct field *fields) -+{ -+ struct field *f; -+ int linelen = 0; -+ char *delim = ""; -+ -+ for (f = fields; f->str; f++) { -+ u64 v = (status >> f->start_bit) & bitmask(f->stringlen - 1); -+ char *s = NULL; -+ if (v < f->stringlen) -+ s = f->str[v]; -+ if (!s) { -+ if (v == 0) -+ continue; -+ char buf[60]; -+ s = buf; -+ snprintf(buf, sizeof buf, "<%u:%Lx>", f->start_bit, v); -+ } -+ int len = strlen(s); -+ if (linelen + len > 75) { -+ delim = "\n"; -+ linelen = 0; -+ } -+ Wprintf("%s%s", delim, s); -+ delim = " "; -+ linelen += len + 1; -+ } -+ if (linelen > 0) -+ Wprintf("\n"); -+} -+ -+void decode_numfield(u64 status, struct numfield *fields) -+{ -+ struct numfield *f; -+ for (f = fields; f->name; f++) { -+ u64 mask = (1ULL << (f->end - f->start - 1)) - 1; -+ u64 v = (status >> f->start) & mask; -+ if (v > 0) { -+ char fmt[30]; -+ snprintf(fmt, 30, "%%s: %s\n", f->fmt ? f->fmt : "%Lu"); -+ Wprintf(fmt, f->name, v); -+ } -+ } -+} -diff -x '*~' -urpN mcelog-0.7/bitfield.h mcelog-0.7-newcpus//bitfield.h ---- mcelog-0.7/bitfield.h 1970-01-01 01:00:00.000000000 +0100 -+++ mcelog-0.7-newcpus//bitfield.h 2008-09-26 20:28:29.000000000 +0200 -@@ -0,0 +1,27 @@ -+/* Generic bitfield decoder */ -+ -+struct field { -+ int start_bit; -+ char **str; -+ int stringlen; -+}; -+ -+struct numfield { -+ int start, end; -+ char *name; -+ char *fmt; -+}; -+ -+#define FIELD(start_bit, name) { start_bit, name, NELE(name) } -+#define SBITFIELD(start_bit, string) { start_bit, ((char * [2]) { NULL, string }), 2 } -+ -+#define NUMBER(start, end, name) { start, end, name, "%Lu" } -+#define HEXNUMBER(start, end, name) { start, end, name, "%Lx" } -+ -+void decode_bitfield(u64 status, struct field *fields); -+void decode_numfield(u64 status, struct numfield *fields); -+ -+extern char *reserved_3bits[8]; -+extern char *reserved_1bit[2]; -+extern char *reserved_2bits[4]; -+ -diff -x '*~' -urpN mcelog-0.7/core2.c mcelog-0.7-newcpus//core2.c ---- mcelog-0.7/core2.c 1970-01-01 01:00:00.000000000 +0100 -+++ mcelog-0.7-newcpus//core2.c 2008-09-26 20:21:18.000000000 +0200 -@@ -0,0 +1,105 @@ -+#include -+#include -+#include -+#include "mcelog.h" -+#include "core2.h" -+#include "bitfield.h" -+ -+/* Decode P6 family (Core2) model specific errors. -+ The generic errors are decoded in p4.c */ -+ -+/* [19..24] */ -+static char *bus_queue_req_type[] = { -+ [0] = "BQ_DCU_READ_TYPE", -+ [2] = "BQ_IFU_DEMAND_TYPE", -+ [3] = "BQ_IFU_DEMAND_NC_TYPE", -+ [4] = "BQ_DCU_RFO_TYPE", -+ [5] = "BQ_DCU_RFO_LOCK_TYPE", -+ [6] = "BQ_DCU_ITOM_TYPE", -+ [8] = "BQ_DCU_WB_TYPE", -+ [10] = "BC_DCU_WCEVICT_TYPE", -+ [11] = "BQ_DCU_WCLINE_TYPE", -+ [12] = "BQ_DCU_BTM_TYPE", -+ [13] = "BQ_DCU_INTACK_TYPE", -+ [14] = "BQ_DCU_INVALL2_TYPE", -+ [15] = "BQ_DCU_FLUSHL2_TYPE", -+ [16] = "BQ_DCU_PART_RD_TYPE", -+ [18] = "BQ_DCU_PART_WR_TYPE", -+ [20] = "BQ_DCU_SPEC_CYC_TYPE", -+ [24] = "BQ_DCU_IO_RD_TYPE", -+ [25] = "BQ_DCU_IO_WR_TYPE", -+ [28] = "BQ_DCU_LOCK_RD_TYPE", -+ [30] = "BQ_DCU_SPLOCK_RD_TYPE", -+ [29] = "BQ_DCU_LOCK_WR_TYPE", -+}; -+ -+/* [25..27] */ -+static char *bus_queue_error_type[] = { -+ [0] = "BQ_ERR_HARD_TYPE", -+ [1] = "BQ_ERR_DOUBLE_TYPE", -+ [2] = "BQ_ERR_AERR2_TYPE", -+ [4] = "BQ_ERR_SINGLE_TYPE", -+ [5] = "BQ_ERR_AERR1_TYPE", -+}; -+ -+static struct field p6_shared_status[] = { -+ FIELD(16, reserved_3bits), -+ FIELD(19, bus_queue_req_type), -+ FIELD(25, bus_queue_error_type), -+ FIELD(25, bus_queue_error_type), -+ SBITFIELD(30, "internal BINIT"), -+ SBITFIELD(36, "received parity error on response transaction"), -+ SBITFIELD(38, "timeout BINIT (ROB timeout)." -+ " No micro-instruction retired for some time"), -+ FIELD(39, reserved_3bits), -+ SBITFIELD(42, "bus transaction received hard error response"), -+ SBITFIELD(43, "failure that caused IERR"), -+ /* The following are reserved for Core in the SDM. Let's keep them here anyways*/ -+ SBITFIELD(44, "two failing bus transactions with address parity error (AERR)"), -+ SBITFIELD(45, "uncorrectable ECC error"), -+ SBITFIELD(46, "correctable ECC error"), -+ /* [47..54]: ECC syndrome */ -+ FIELD(55, reserved_2bits), -+ {}, -+}; -+ -+static struct field p6old_status[] = { -+ SBITFIELD(28, "FRC error"), -+ SBITFIELD(29, "BERR on this CPU"), -+ FIELD(31, reserved_1bit), -+ FIELD(32, reserved_3bits), -+ SBITFIELD(35, "BINIT received from external bus"), -+ SBITFIELD(37, "Received hard error reponse on split transaction (Bus BINIT)"), -+ {} -+}; -+ -+static struct field core2_status[] = { -+ SBITFIELD(28, "MCE driven"), -+ SBITFIELD(29, "MCE is observed"), -+ SBITFIELD(31, "BINIT observed"), -+ FIELD(32, reserved_2bits), -+ SBITFIELD(34, "PIC or FSB data parity error"), -+ FIELD(35, reserved_1bit), -+ SBITFIELD(37, "FSB address parity error detected"), -+ {} -+}; -+ -+static struct numfield p6old_status_numbers[] = { -+ HEXNUMBER(47, 54, "ECC syndrome"), -+ {} -+}; -+ -+void core2_decode_model(u64 status) -+{ -+ decode_bitfield(status, p6_shared_status); -+ decode_bitfield(status, core2_status); -+ /* Normally reserved, but let's parse anyways: */ -+ decode_numfield(status, p6old_status_numbers); -+} -+ -+void p6old_decode_model(u64 status) -+{ -+ decode_bitfield(status, p6_shared_status); -+ decode_bitfield(status, p6old_status); -+ decode_numfield(status, p6old_status_numbers); -+} -diff -x '*~' -urpN mcelog-0.7/core2.h mcelog-0.7-newcpus//core2.h ---- mcelog-0.7/core2.h 1970-01-01 01:00:00.000000000 +0100 -+++ mcelog-0.7-newcpus//core2.h 2008-09-26 20:21:18.000000000 +0200 -@@ -0,0 +1,2 @@ -+void core2_decode_model(u64 status); -+void p6old_decode_model(u64 status); -diff -x '*~' -urpN mcelog-0.7/dunnington.c mcelog-0.7-newcpus//dunnington.c ---- mcelog-0.7/dunnington.c 1970-01-01 01:00:00.000000000 +0100 -+++ mcelog-0.7-newcpus//dunnington.c 2008-09-26 20:24:24.000000000 +0200 -@@ -0,0 +1,123 @@ -+/* Copyright (c) 2008 by Intel Corp. -+ Decode Intel Xeon Processor 7400 Model (Dunnington) specific MCEs -+ -+ mcelog is free software; you can redistribute it and/or -+ modify it under the terms of the GNU General Public -+ License as published by the Free Software Foundation; version -+ 2. -+ -+ mcelog is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ General Public License for more details. -+ -+ You should find a copy of v2 of the GNU General Public License somewhere -+ on your Linux system; if not, write to the Free Software Foundation, -+ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ -+ Author: -+ Andi Kleen -+*/ -+ -+/* other files -+ -+mcelog.h CPU_DUNNINGTON -+mcelog.c: cputype name -+intel.h CASE_INTEL_CPUS -+intel.c model == 0x1d CPU_DUNNINGTON -+p4.c: if (cpu == CPU_DUNNINGTON) dunnington_decode_model(log->status); -+ add to CORE2 cases -+ -+*/ -+ -+#include -+#include "mcelog.h" -+#include "bitfield.h" -+#include "dunnington.h" -+ -+/* Follows Intel IA32 SDM 3b Appendix E.2.1 ++ */ -+ -+static struct field dunnington_bus_status[] = { -+ SBITFIELD(16, "Parity error detected during FSB request phase"), -+ FIELD(17, reserved_3bits), -+ SBITFIELD(20, "Hard Failure response received for a local transaction"), -+ SBITFIELD(21, "Parity error on FSB response field detected"), -+ SBITFIELD(22, "Parity data error on inbound data detected"), -+ FIELD(23, reserved_3bits), -+ FIELD(25, reserved_3bits), -+ FIELD(28, reserved_3bits), -+ FIELD(31, reserved_1bit), -+ {} -+}; -+ -+static char *dnt_front_error[0xf] = { -+ [0x1] = "Inclusion error from core 0", -+ [0x2] = "Inclusion error from core 1", -+ [0x3] = "Write Exclusive error from core 0", -+ [0x4] = "Write Exclusive error from core 1", -+ [0x5] = "Inclusion error from FSB", -+ [0x6] = "SNP stall error from FSB", -+ [0x7] = "Write stall error from FSB", -+ [0x8] = "FSB Arbiter Timeout error", -+ [0xA] = "Inclusion error from core 2", -+ [0xB] = "Write exclusive error from core 2", -+}; -+ -+static char *dnt_int_error[0xf] = { -+ [0x2] = "Internal timeout error", -+ [0x3] = "Internal timeout error", -+ [0x4] = "Intel Cache Safe Technology Queue full error\n" -+ "or disabled ways in a set overflow", -+ [0x5] = "Quiet cycle timeout error (correctable)", -+}; -+ -+struct field dnt_int_status[] = { -+ FIELD(8, dnt_int_error), -+ {} -+}; -+ -+struct field dnt_front_status[] = { -+ FIELD(0, dnt_front_error), -+ {} -+}; -+ -+struct field dnt_cecc[] = { -+ SBITFIELD(1, "Correctable ECC event on outgoing core 0 data"), -+ SBITFIELD(2, "Correctable ECC event on outgoing core 1 data"), -+ SBITFIELD(3, "Correctable ECC event on outgoing core 3 data"), -+ {} -+}; -+ -+struct field dnt_uecc[] = { -+ SBITFIELD(1, "Uncorrectable ECC event on outgoing core 0 data"), -+ SBITFIELD(2, "Uncorrectable ECC event on outgoing core 1 data"), -+ SBITFIELD(3, "Uncorrectable ECC event on outgoing core 3 data"), -+ {} -+}; -+ -+static void dunnington_decode_bus(u64 status) -+{ -+ decode_bitfield(status, dunnington_bus_status); -+} -+ -+static void dunnington_decode_internal(u64 status) -+{ -+ u32 mca = (status >> 16) & 0xffff; -+ if ((mca & 0xfff0) == 0) -+ decode_bitfield(status, dnt_front_status); -+ else if ((mca & 0xf0ff) == 0) -+ decode_bitfield(status, dnt_int_status); -+ else if ((mca & 0xfff0) == 0xc000) -+ decode_bitfield(status, dnt_cecc); -+ else if ((mca & 0xfff0) == 0xe000) -+ decode_bitfield(status, dnt_uecc); -+} -+ -+void dunnington_decode_model(u64 status) -+{ -+ if ((status & 0xffff) == 0xe0f) -+ dunnington_decode_bus(status); -+ else if ((status & 0xffff) == (1 << 10)) -+ dunnington_decode_internal(status); -+} -+ -diff -x '*~' -urpN mcelog-0.7/dunnington.h mcelog-0.7-newcpus//dunnington.h ---- mcelog-0.7/dunnington.h 1970-01-01 01:00:00.000000000 +0100 -+++ mcelog-0.7-newcpus//dunnington.h 2008-09-26 20:24:24.000000000 +0200 -@@ -0,0 +1,2 @@ -+void dunnington_decode_model(u64 status); -+ -diff -x '*~' -urpN mcelog-0.7/intel.c mcelog-0.7-newcpus//intel.c ---- mcelog-0.7/intel.c 1970-01-01 01:00:00.000000000 +0100 -+++ mcelog-0.7-newcpus//intel.c 2008-09-26 20:32:52.000000000 +0200 -@@ -0,0 +1,22 @@ -+#include "mcelog.h" -+#include "intel.h" -+#include -+ -+enum cputype select_intel_cputype(int family, int model) -+{ -+ if (family == 15) { -+ return CPU_P4; -+ } -+ if (family == 6) { -+ if (model < 0xf) -+ return CPU_P6OLD; -+ else if (model == 0xf || model == 0x17) /* Merom/Penryn */ -+ return CPU_CORE2; -+ else if (model == 0x1d) -+ return CPU_DUNNINGTON; -+ else if (model == 0x1a) -+ return CPU_NEHALEM; -+ } -+ fprintf(stderr, "Unknown Intel CPU type family %x model %x\n", family, model); -+ return family == 6 ? CPU_P6OLD : CPU_GENERIC; -+} -diff -x '*~' -urpN mcelog-0.7/intel.h mcelog-0.7-newcpus//intel.h ---- mcelog-0.7/intel.h 1970-01-01 01:00:00.000000000 +0100 -+++ mcelog-0.7-newcpus//intel.h 2008-09-26 20:32:00.000000000 +0200 -@@ -0,0 +1,9 @@ -+enum cputype select_intel_cputype(int family, int model); -+ -+#define CASE_INTEL_CPUS \ -+ case CPU_P6OLD: \ -+ case CPU_CORE2: \ -+ case CPU_NEHALEM: \ -+ case CPU_DUNNINGTON: \ -+ case CPU_P4 -+ -diff -x '*~' -urpN mcelog-0.7/Makefile mcelog-0.7-newcpus//Makefile ---- mcelog-0.7/Makefile 2006-05-03 08:55:54.000000000 +0200 -+++ mcelog-0.7-newcpus//Makefile 2008-09-26 21:07:21.000000000 +0200 -@@ -5,7 +5,8 @@ all: mcelog - - .PHONY: install clean - --mcelog: p4.o k8.o mcelog.o dmi.o -+mcelog: p4.o k8.o mcelog.o dmi.o core2.o dunnington.o nehalem.o \ -+ bitfield.o intel.o - - p4.o: p4.c mcelog.h p4.h - k8.o: k8.c mcelog.h k8.h -@@ -18,7 +19,8 @@ install: mcelog.c - echo "call mcelog regularly from your crontab" - - clean: -- rm -f mcelog mcelog.o k8.o p4.o dmi.o dmi -+ rm -f mcelog mcelog.o k8.o p4.o dmi.o dmi core2.o dunnington.o \ -+ nehalem.o bitfield.o intel.o - - dmi: dmi.c - gcc -o dmi ${CFLAGS} -DSTANDALONE dmi.c ${LDFLAGS} -diff -x '*~' -urpN mcelog-0.7/mcelog.8 mcelog-0.7-newcpus//mcelog.8 ---- mcelog-0.7/mcelog.8 2006-05-03 08:55:54.000000000 +0200 -+++ mcelog-0.7-newcpus//mcelog.8 2008-09-26 20:42:44.000000000 +0200 -@@ -2,9 +2,9 @@ - .SH NAME - mcelog \- Print machine check log from x86-64 kernel. - .SH SYNOPSIS --mcelog [\-\-syslog] [\-\-k8|\-\-p4|\-\-generic] [\-\-ignorenodev] [\-\-dmi] [\-\-filter] [device] -+mcelog [\-\-syslog] [\-\-k8|\-\-p4|\-\-generic|...] [\-\-ignorenodev] [\-\-dmi] [\-\-filter] [device] - .br --mcelog [\-\-k8|\-\-p4|\-\-generic] \-\-ascii -+mcelog [\-\-k8|\-\-p4|\-\-generic|...] \-\-ascii - .SH DESCRIPTION - Linux x86-64 kernels since 2.6.4 don't print recoverable machine check errors - to the kernel log anymore. Instead they are saved into a special -@@ -18,13 +18,21 @@ When the - .B \-\-syslog - option is specified redirect output to system log. - -+ - When - .B \-\-k8 - is specified assume the events are for a AMD Opteron or Athlon 64 or Athlon - FX CPU. - With - .B \-\-p4 --is specified assume the events are for a Intel Pentium 4 or Intel Xeon. -+is specified assume the events are for a Intel Pentium 4 or Intel (older) Xeon. -+With -+.B \-\-core2 -+assume the events are for a Intel Core2 CPU or Intel Xeon 3000, 3200, 5100, 5300, 7300 -+series. When -+.B \-\-intel-cpu=family,model -+are specified then the family number and model number of the Intel CPU -+to be decoded should be specified (can be found in /proc/cpuinfo). - When - .B \-\-generic - all the fields are dumped without CPU specific decoding. -diff -x '*~' -urpN mcelog-0.7/mcelog.c mcelog-0.7-newcpus//mcelog.c ---- mcelog-0.7/mcelog.c 2006-05-03 08:55:54.000000000 +0200 -+++ mcelog-0.7-newcpus//mcelog.c 2008-09-26 20:45:50.000000000 +0200 -@@ -31,12 +31,10 @@ - #include "k8.h" - #include "p4.h" - #include "dmi.h" -+#include "intel.h" - --enum { -- CPU_GENERIC, -- CPU_K8, -- CPU_P4 --} cpu = CPU_GENERIC; -+ -+enum cputype cpu = CPU_GENERIC; - - char *logfn = "/dev/mcelog"; - -@@ -62,8 +60,8 @@ char *bankname(unsigned bank) - switch (cpu) { - case CPU_K8: - return k8_bank_name(bank); -- case CPU_P4: -- return p4_bank_name(bank); -+ CASE_INTEL_CPUS: -+ return intel_bank_name(bank); - /* add banks of other cpu types here */ - default: - sprintf(numeric, "BANK %d", bank); -@@ -98,7 +96,7 @@ int mce_filter(struct mce *m) - case CPU_K8: - return mce_filter_k8(m); - /* add more buggy CPUs here */ -- case CPU_P4: -+ CASE_INTEL_CPUS: - /* No bugs known */ - return 1; - default: -@@ -134,8 +132,8 @@ void dump_mce(struct mce *m) - case CPU_K8: - decode_k8_mc(m); - break; -- case CPU_P4: -- decode_p4_mc(m); -+ CASE_INTEL_CPUS: -+ decode_intel_mc(m, cpu); - break; - /* add handlers for other CPUs here */ - default: -@@ -153,23 +151,27 @@ void check_cpu(void) - if (f != NULL) { - int found = 0; - int family; -+ int model; - char vendor[64]; - char *line = NULL; - size_t linelen = 0; -- while (getdelim(&line, &linelen, '\n', f) > 0 && found < 2) { -+ while (getdelim(&line, &linelen, '\n', f) > 0 && found < 3) { - if (sscanf(line, "vendor_id : %63[^\n]", vendor) == 1) - found++; - if (sscanf(line, "cpu family : %d", &family) == 1) - found++; -+ if (sscanf(line, "model : %d", &model) == 1) -+ found++; - } -- if (found == 2) { -+ if (found == 3) { - if (!strcmp(vendor,"AuthenticAMD") && family == 15) - cpu = CPU_K8; -- if (!strcmp(vendor,"GenuineIntel") && family == 15) -- cpu = CPU_P4; -+ if (!strcmp(vendor,"GenuineIntel")) -+ cpu = select_intel_cputype(family, model); - /* Add checks for other CPUs here */ - } else { -- fprintf(stderr, "mcelog: warning: Cannot parse /proc/cpuinfo\n"); -+ fprintf(stderr, -+ "mcelog: warning: Cannot parse /proc/cpuinfo\n"); - } - fclose(f); - free(line); -@@ -303,9 +305,11 @@ void usage(void) - { - fprintf(stderr, - "Usage:\n" -- " mcelog [--k8|--p4|--generic] [--ignorenodev] [--dmi] [--syslog] [--filter] [mcelogdevice]\n" -+ " mcelog options [--ignorenodev] [--dmi] [--syslog] [--filter] [mcelogdevice]\n" - "Decode machine check error records from kernel\n" -- " mcelog [--k8|--p4|--generic] [--dmi] --ascii < log\n" -+ " mcelog options [--dmi] --ascii < log\n" -+ "Options:\n" -+ "--p4|--k8|--core2|--generic|--intel-cpu=family,model Set CPU type to decode\n" - "Decode machine check ASCII output from kernel logs\n"); - exit(1); - } -@@ -318,6 +322,17 @@ int modifier(char *s) - cpu = CPU_P4; - } else if (!strcmp(s, "--generic")) { - cpu = CPU_GENERIC; -+ } else if (!strcmp(s, "--core2")) { -+ cpu = CPU_CORE2; -+ } else if (!strncmp(s, "--intel-cpu=", 12)) { -+ unsigned fam, mod; -+ if (sscanf(s + 12, "%i,%i", &fam, &mod) != 2) -+ usage(); -+ cpu = select_intel_cputype(fam, mod); -+ if (cpu == CPU_GENERIC) { -+ fprintf(stderr, "Unknown Intel CPU\n"); -+ usage(); -+ } - } else if (!strcmp(s, "--ignorenodev")) { - ignore_nodev = 1; - } else if (!strcmp(s,"--filter")) { -diff -x '*~' -urpN mcelog-0.7/mcelog.h mcelog-0.7-newcpus//mcelog.h ---- mcelog-0.7/mcelog.h 2006-05-03 08:55:54.000000000 +0200 -+++ mcelog-0.7-newcpus//mcelog.h 2008-09-26 20:28:19.000000000 +0200 -@@ -61,3 +61,13 @@ struct mce { - #endif - - void Wprintf(char *fmt, ...) PRINTFLIKE; -+ -+enum cputype { -+ CPU_GENERIC, -+ CPU_K8, -+ CPU_P4, -+ CPU_NEHALEM, -+ CPU_DUNNINGTON, -+ CPU_P6OLD, -+ CPU_CORE2, -+}; -diff -x '*~' -urpN mcelog-0.7/nehalem.c mcelog-0.7-newcpus//nehalem.c ---- mcelog-0.7/nehalem.c 1970-01-01 01:00:00.000000000 +0100 -+++ mcelog-0.7-newcpus//nehalem.c 2008-09-26 20:24:20.000000000 +0200 -@@ -0,0 +1,163 @@ -+/* Copyright (C) 2008 Intel Corporation -+ Decode Intel Nehalem specific machine check errors. -+ -+ mcelog is free software; you can redistribute it and/or -+ modify it under the terms of the GNU General Public -+ License as published by the Free Software Foundation; version -+ 2. -+ -+ mcelog is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ General Public License for more details. -+ -+ You should find a copy of v2 of the GNU General Public License somewhere -+ on your Linux system; if not, write to the Free Software Foundation, -+ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ -+ Author: Andi Kleen -+*/ -+ -+/* other files -+ -+mcelog.h CPU_NEHALEM -+intel.h CASE_INTEL_CPUS -+intel.c model == 0x1a CPU_NEHALEM -+p4.c: if (cpu == CPU_NEHALEM) nehalem_decode_model(log->status, log->misc); -+ if (test_prefix(status, 7)) decode_memory_controller(log->status); -+mcelog.c/p4.c: syslog/trigger for memory controller -+ cputype_name -+*/ -+ -+#include -+#include -+#include "mcelog.h" -+#include "nehalem.h" -+#include "core2.h" -+#include "bitfield.h" -+ -+/* See IA32 SDM Vol3B Appendix E.3.2 ff */ -+ -+/* MC1_STATUS error */ -+static struct field qpi_status[] = { -+ SBITFIELD(16, "QPI header had bad parity"), -+ SBITFIELD(17, "QPI Data packet had bad parity"), -+ SBITFIELD(18, "Number of QPI retries exceeded"), -+ SBITFIELD(19, "Received QPI data packet that was poisoned by sender"), -+ SBITFIELD(20, "QPI reserved 20"), -+ SBITFIELD(21, "QPI reserved 21"), -+ SBITFIELD(22, "QPI received unsupported message encoding"), -+ SBITFIELD(23, "QPI credit type is not supported"), -+ SBITFIELD(24, "Sender sent too many QPI flits to the receiver"), -+ SBITFIELD(25, "QPI Sender sent a failed response to receiver"), -+ SBITFIELD(26, "Clock jitter detected in internal QPI clocking"), -+ {} -+}; -+ -+static struct field qpi_misc[] = { -+ SBITFIELD(14, "QPI misc reserved 14"), -+ SBITFIELD(15, "QPI misc reserved 15"), -+ SBITFIELD(24, "QPI Interleave/Head Indication Bit (IIB)"), -+ {} -+}; -+ -+static struct numfield qpi_numbers[] = { -+ HEXNUMBER(0, 7, "QPI class and opcode of packet with error"), -+ HEXNUMBER(8, 13, "QPI Request Transaction ID"), -+ NUMBER(16, 18, "QPI Requestor/Home Node ID (RHNID)"), -+ HEXNUMBER(19, 23, "QPI miscreserved 19-23"), -+}; -+ -+static struct field memory_controller_status[] = { -+ SBITFIELD(16, "Memory read ECC error"), -+ SBITFIELD(17, "Memory ECC error occurred during scrub"), -+ SBITFIELD(18, "Memory write parity error"), -+ SBITFIELD(19, "Memory error in half of redundant memory"), -+ SBITFIELD(20, "Memory reserved 20"), -+ SBITFIELD(21, "Memory access out of range"), -+ SBITFIELD(22, "Memory internal RTID invalid"), -+ SBITFIELD(23, "Memory address parity error"), -+ SBITFIELD(24, "Memory byte enable parity error"), -+ {} -+}; -+ -+static struct numfield memory_controller_numbers[] = { -+ HEXNUMBER(0, 7, "Memory transaction Tracker ID (RTId)"), -+ HEXNUMBER(8, 15, "Memory MISC reserved 8..15"), -+ NUMBER(16, 17, "Memory DIMM ID of error"), -+ NUMBER(18, 19, "Memory channel ID of error"), -+ HEXNUMBER(32, 63, "Memory ECC syndrome"), -+ HEXNUMBER(25, 37, "Memory MISC reserved 25..37"), -+ NUMBER(38, 52, "Memory corrected error count (CORE_ERR_CNT)"), -+ HEXNUMBER(53, 56, "Memory MISC reserved 53..56"), -+ {} -+}; -+ -+static char *internal_errors[] = { -+ [0x0] = "No Error", -+ [0x3] = "Reset firmware did not complete", -+ [0x8] = "Received an invalid CMPD", -+ [0xa] = "Invalid Power Management Request", -+ [0xd] = "Invalid S-state transition", -+ [0x11] = "VID controller does not match POC controller selected", -+ [0x1a] = "MSID from POC does not match CPU MSID", -+}; -+ -+static struct field internal_error_status[] = { -+ FIELD(24, internal_errors), -+ {} -+}; -+ -+static struct numfield internal_error_numbers[] = { -+ HEXNUMBER(16, 23, "Internal machine check status reserved 16..23"), -+ HEXNUMBER(32, 56, "Internal machine check status reserved 32..56"), -+ {}, -+}; -+ -+/* Generic architectural memory controller encoding */ -+ -+static char *mmm_mnemonic[] = { -+ "GEN", "RD", "WR", "AC", "MS", "RES5", "RES6", "RES7" -+}; -+static char *mmm_desc[] = { -+ "Generic undefined request", -+ "Memory read error", -+ "Memory write error", -+ "Address/Command error", -+ "Memory scrubbing error", -+ "Reserved 5", -+ "Reserved 6", -+ "Reserved 7" -+}; -+ -+void decode_memory_controller(u32 status) -+{ -+ char channel[30]; -+ if ((status & 0xf) == 0xf) -+ strcpy(channel, "unspecified"); -+ else -+ sprintf(channel, "%u", status & 0xf); -+ Wprintf("MEMORY CONTROLLER %s_CHANNEL%s_ERR\n", -+ mmm_mnemonic[(status >> 4) & 7], -+ channel); -+ Wprintf("Transaction: %s\n", mmm_desc[(status >> 4) & 7]); -+ Wprintf("Channel: %s\n", channel); -+} -+ -+void nehalem_decode_model(u64 status, u64 misc) -+{ -+ u32 mca = status & 0xffff; -+ core2_decode_model(status); -+ if ((mca >> 11) == 1) { /* bus and interconnect QPI */ -+ decode_bitfield(status, qpi_status); -+ decode_numfield(status, qpi_numbers); -+ decode_bitfield(misc, qpi_misc); -+ } else if (mca == 0x0001) { /* internal unspecified */ -+ decode_bitfield(status, internal_error_status); -+ decode_numfield(status, internal_error_numbers); -+ } else if ((mca >> 8) == 1) { /* memory controller */ -+ decode_bitfield(status, memory_controller_status); -+ decode_numfield(status, memory_controller_numbers); -+ } -+} -+ -diff -x '*~' -urpN mcelog-0.7/nehalem.h mcelog-0.7-newcpus//nehalem.h ---- mcelog-0.7/nehalem.h 1970-01-01 01:00:00.000000000 +0100 -+++ mcelog-0.7-newcpus//nehalem.h 2008-09-26 20:24:20.000000000 +0200 -@@ -0,0 +1,2 @@ -+void nehalem_decode_model(u64 status, u64 misc); -+void decode_memory_controller(u32 status); -diff -x '*~' -urpN mcelog-0.7/p4.c mcelog-0.7-newcpus//p4.c ---- mcelog-0.7/p4.c 2006-05-03 08:55:54.000000000 +0200 -+++ mcelog-0.7-newcpus//p4.c 2008-09-26 20:34:41.000000000 +0200 -@@ -1,7 +1,6 @@ - /* Copyright (c) 2005 by Intel Corp. - -- Decode IA32/x86-64 machine check for Pentium 4, Intel Xeon -- or EM64T. -+ Decode Intel machine check (generic and P4 specific) - - mcelog is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public -@@ -19,12 +18,17 @@ - - Authors: - Racing Guo -+ Andi Kleen - */ -- -+ - #include - #include "mcelog.h" -+#include "p4.h" -+#include "core2.h" -+#include "nehalem.h" -+#include "dunnington.h" - --/* decode mce for P4/Xeon family */ -+/* decode mce for P4/Xeon and Core2 family */ - - static inline int test_prefix(int nr, __u32 value) - { -@@ -73,13 +77,12 @@ static char* get_RRRR_str(__u8 rrrr) - } - - return "UNKNOWN"; -- - } - - static char* get_PP_str(__u8 pp) - { - static char* PP[] = { -- "Originated-request", -+ "Local-CPU-originated-request", - "Responed-to-request", - "Observed-error-as-third-party", - "Generic" -@@ -112,7 +115,7 @@ static char* get_II_str(__u8 i) - return II[i]; - } - --static int decode_mca(__u32 mca, char *buf, int len) -+static void decode_mca(__u32 mca) - { - #define TLB_LL_MASK 0x3 /*bit 0, bit 1*/ - #define TLB_LL_SHIFT 0x0 -@@ -137,64 +140,59 @@ static int decode_mca(__u32 mca, char *b - #define BUS_PP_MASK 0x600 /*bit 9, bit 10*/ - #define BUS_PP_SHIFT 0x9 - -- mca = mca & 0xFFFF; -+ static char *msg[] = { -+ [0] = "No Error", -+ [1] = "Unclassified", -+ [2] = "Microcode ROM parity error", -+ [3] = "External error", -+ [4] = "FRC error", -+ }; -+ -+ if (mca & (1UL << 12)) { -+ Wprintf("corrected filtering (some unreported errors in same region)\n"); -+ mca &= ~(1UL << 12); -+ } - -- switch(mca) { -- case 0x0: -- return snprintf(buf, len, "%s", "No Error"); -- break; -- case 0x1: -- return snprintf(buf, len, "%s", "Unclassified"); -- break; -- case 0x2: -- return snprintf(buf, len, "%s", "Microcode ROM Parity Error"); -- break; -- case 0x3: -- return snprintf(buf, len, "%s", "External Error"); -- break; -- case 0x4: -- return snprintf(buf, len, "%s", "FRC Error"); -- break; -- default: -- break; -+ if (mca < NELE(msg)) { -+ Wprintf("%s\n", msg[mca]); -+ return; - } - -- if (test_prefix(4, mca)) { -- return snprintf(buf, len, "%s TLB %s Error", -+ if ((mca >> 2) == 3) { -+ Wprintf("%s Generic memory hierarchy error\n", get_LL_str(mca & 3)); -+ } else if (test_prefix(4, mca)) { -+ Wprintf("%s TLB %s Error\n", - get_TT_str((mca & TLB_TT_MASK) >> TLB_TT_SHIFT), - get_LL_str((mca & TLB_LL_MASK) >> - TLB_LL_SHIFT)); -- } -- if (test_prefix(8, mca)) { -- return snprintf(buf, len, "%s CACHE %s %s Error", -+ } else if (test_prefix(8, mca)) { -+ Wprintf("%s CACHE %s %s Error\n", - get_TT_str((mca & CACHE_TT_MASK) >> - CACHE_TT_SHIFT), - get_LL_str((mca & CACHE_LL_MASK) >> - CACHE_LL_SHIFT), - get_RRRR_str((mca & CACHE_RRRR_MASK) >> - CACHE_RRRR_SHIFT)); -- } -- if (test_prefix(10, mca)) { -+ } else if (test_prefix(10, mca)) { - if (mca == 0x400) -- return snprintf(buf, len, "Internal Timer error"); -+ Wprintf("Internal Timer error\n"); - else -- return snprintf(buf, len, -- "Internal unclassified errors"); -- } -- if (test_prefix(11, mca)) { -- -- return snprintf(buf, len, "BUS %s %s %s %s %s Error", -+ Wprintf("Internal unclassified error: %x\n", mca & 0xffff); -+ } else if (test_prefix(11, mca)) { -+ Wprintf("BUS %s %s %s %s %s Error\n", - get_LL_str((mca & BUS_LL_MASK) >> BUS_LL_SHIFT), - get_PP_str((mca & BUS_PP_MASK) >> BUS_PP_SHIFT), - get_RRRR_str((mca & BUS_RRRR_MASK) >> - BUS_RRRR_SHIFT), - get_II_str((mca & BUS_II_MASK) >> BUS_II_SHIFT), - get_T_str((mca & BUS_T_MASK) >> BUS_T_SHIFT)); -- } -- return snprintf(buf, len, "Unknown Error"); -+ } else if (test_prefix(7, mca)) { -+ decode_memory_controller(mca); -+ } else -+ Wprintf("Unknown Error %x\n", mca); - } - --static void decode_model(__u32 model) -+static void p4_decode_model(__u32 model) - { - static struct { - int value; -@@ -219,17 +217,27 @@ static void decode_model(__u32 model) - Wprintf("\n"); - } - --static void decode_mci(__u64 status) -+static void decode_tracking(u64 track, int cpu) - { --#define BUF_LEN 200 -- char buf[BUF_LEN]; -- __u32 mca; -+ static char *msg[] = { -+ [1] = "green", -+ [2] = "yellow\n" -+"Large number of corrected errors. System operating, but you should\n" -+"schedule it for service within a few weeks", -+ [3] ="res3" }; -+ if (track) { -+ Wprintf("Threshold based error status: %s\n", msg[track]); -+ if (track == 2) -+ Wprintf( -+ "CPU %d has large number of corrected errors. Consider replacement", cpu); -+ } -+} - -+static void decode_mci(__u64 status, int cpu) -+{ - Wprintf("MCi status:\n"); -- if (!(status & MCI_STATUS_VAL)) { -- Wprintf("Invalid log\n"); -- return; -- } -+ if (!(status & MCI_STATUS_VAL)) -+ Wprintf("Machine check not valid\n"); - - if (status & MCI_STATUS_OVER) - Wprintf("Error overflow\n"); -@@ -249,15 +257,9 @@ static void decode_mci(__u64 status) - if (status & MCI_STATUS_PCC) - Wprintf("Processor context corrupt\n"); - -- mca = status & 0xFFFFL; -- decode_mca(mca, buf, BUF_LEN); -- Wprintf("MCA:%s\n", buf); -- -- if (test_prefix(11, mca)) { -- __u32 model; -- model = (status & 0xFFFF0000L); -- decode_model(model); -- } -+ decode_tracking((status >> 54) & 3, cpu); -+ Wprintf("MCA: "); -+ decode_mca(status & 0xffffL); - } - - static void decode_mcg(__u64 mcgstatus) -@@ -272,13 +274,36 @@ static void decode_mcg(__u64 mcgstatus) - Wprintf("\n"); - } - --void decode_p4_mc(struct mce *log) -+void decode_intel_mc(struct mce *log, int cputype) - { -+ int cpu = log->cpu; -+ - decode_mcg(log->mcgstatus); -- decode_mci(log->status); -+ decode_mci(log->status, cpu); -+ -+ if (test_prefix(11, (log->status & 0xffffL))) { -+ switch (cputype) { -+ case CPU_P6OLD: -+ p6old_decode_model(log->status); -+ break; -+ case CPU_DUNNINGTON: -+ case CPU_CORE2: -+ core2_decode_model(log->status); -+ break; -+ case CPU_P4: -+ p4_decode_model(log->status & 0xffff0000L); -+ break; -+ case CPU_NEHALEM: -+ nehalem_decode_model(log->status, log->misc); -+ break; -+ } -+ } -+ -+ if (cputype == CPU_DUNNINGTON) -+ dunnington_decode_model(log->status); - } - --char *p4_bank_name(int num) -+char *intel_bank_name(int num) - { - static char bname[64]; - sprintf(bname, "BANK %d", num); -diff -x '*~' -urpN mcelog-0.7/p4.h mcelog-0.7-newcpus//p4.h ---- mcelog-0.7/p4.h 2006-05-03 08:55:54.000000000 +0200 -+++ mcelog-0.7-newcpus//p4.h 2008-09-26 20:35:46.000000000 +0200 -@@ -1,2 +1,2 @@ --char *p4_bank_name(int num); --void decode_p4_mc(struct mce* mce); -+char *intel_bank_name(int num); -+void decode_intel_mc(struct mce *log, int cpu); diff --git a/mcelog-0.7.tar.gz b/mcelog-0.7.tar.gz deleted file mode 100644 index 565efa2..0000000 --- a/mcelog-0.7.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8eb6072d6ad947f21f3c56a9d20b48d1d9490a32a2375ed786a717944234dc1b -size 13094 diff --git a/mcelog-0.9pre.tar.bz2 b/mcelog-0.9pre.tar.bz2 new file mode 100644 index 0000000..9c8a8e1 --- /dev/null +++ b/mcelog-0.9pre.tar.bz2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc931a0bf0eb221e3b01727f2dd59b0d39e94dbc30f5dd7134f78d61ea439986 +size 567856 diff --git a/mcelog-thermal.diff b/mcelog-thermal.diff deleted file mode 100644 index ee7283d..0000000 --- a/mcelog-thermal.diff +++ /dev/null @@ -1,52 +0,0 @@ ---- - p4.c | 19 +++++++++++++++++++ - 1 file changed, 19 insertions(+) - -Index: mcelog-0.7/p4.c -=================================================================== ---- mcelog-0.7.orig/p4.c -+++ mcelog-0.7/p4.c -@@ -28,6 +28,8 @@ - #include "nehalem.h" - #include "dunnington.h" - -+#define BANK_THERMAL 128 -+ - /* decode mce for P4/Xeon and Core2 family */ - - static inline int test_prefix(int nr, __u32 value) -@@ -274,10 +276,25 @@ static void decode_mcg(__u64 mcgstatus) - Wprintf("\n"); - } - -+static void decode_thermal(struct mce *log) -+{ -+ if (log->status & 1) -+ Wprintf("Processor core is above trip temperature. " -+ "Throttling enabled.\n"); -+ else -+ Wprintf("Processor core below trip temperature. " -+ "Throttling disabled\n"); -+} -+ - void decode_intel_mc(struct mce *log, int cputype) - { - int cpu = log->cpu; - -+ if (log->bank == BANK_THERMAL) { -+ decode_thermal(log); -+ return; -+ } -+ - decode_mcg(log->mcgstatus); - decode_mci(log->status, cpu); - -@@ -306,6 +323,8 @@ void decode_intel_mc(struct mce *log, in - char *intel_bank_name(int num) - { - static char bname[64]; -+ if (num == BANK_THERMAL) -+ return "THERMAL EVENT"; - sprintf(bname, "BANK %d", num); - return bname; - } diff --git a/mcelog.changes b/mcelog.changes index 6730246..c16e78a 100644 --- a/mcelog.changes +++ b/mcelog.changes @@ -1,3 +1,10 @@ +------------------------------------------------------------------- +Fri Oct 2 17:06:03 CEST 2009 - trenn@suse.de + +- Update to latest git version (0.9pre) + Introduces mcelog damon mode, service file will follow in an + extra commit. + ------------------------------------------------------------------- Fri Jan 9 08:41:58 CET 2009 - olh@suse.de diff --git a/mcelog.spec b/mcelog.spec index 307462f..295f27b 100644 --- a/mcelog.spec +++ b/mcelog.spec @@ -1,5 +1,5 @@ # -# spec file for package mcelog (Version 0.7) +# spec file for package mcelog (Version 0.9pre) # # Copyright (c) 2009 SUSE LINUX Products GmbH, Nuernberg, Germany. # @@ -21,13 +21,11 @@ Name: mcelog License: GPL v2 or later Summary: Log Machine Check Events -Version: 0.7 -Release: 112 +Version: 0.9pre +Release: 1 AutoReqProv: on ExclusiveArch: x86_64 -Source: mcelog-%{version}.tar.gz -Patch0: mcelog-0.7-newcpus-1.diff -Patch1: mcelog-thermal.diff +Source: mcelog-%{version}.tar.bz2 Group: System/Monitoring BuildRoot: %{_tmppath}/%{name}-%{version}-build @@ -49,8 +47,6 @@ Authors: %prep %setup -%patch0 -p1 -%patch1 -p1 %build make CFLAGS="$RPM_OPT_FLAGS" @@ -76,45 +72,3 @@ rm -rf $RPM_BUILD_ROOT /etc/logrotate.d/mcelog %changelog -* Fri Jan 09 2009 olh@suse.de -- use ExclusiveArch as in /SRC/arch/ -* Sat Sep 27 2008 trenn@suse.de -- fate #304279 mcelog support for Tigerton/Dunnington - Patch is from Andi himself with this statement: - While it looks large most of it is just new tables. -* Mon May 29 2006 ak@suse.de -- decode intel thermal events too (#179327) -* Fri May 05 2006 ak@suse.de -- Update to 0.7. This fixes - - Fix --dmi option (#166324) - - Incorporate old patches -* Fri Mar 03 2006 ak@suse.de -- Avoid cosmetic problem in --filter (#153347) -* Wed Feb 08 2006 ak@suse.de -- update to mcelog 0.6 - * Fixes bugs (#148869, #137985) - * Adds --dmi option to map addresses to DIMMs using SMBIOS - (default to off) -* Wed Jan 25 2006 mls@suse.de -- converted neededforbuild to BuildRequires -* Mon Dec 19 2005 sf@suse.de -- update to version 0.5 - * Clarify --ascii in the manpage - *Support for AMD K8 Revision F machine check DRAM error - thresholding -* Fri Feb 11 2005 ak@suse.de -- Use RPM_OPT_FLAGS -- Improve description again -* Thu Feb 10 2005 ak@suse.de -- mcelog-0.4: - * add support to decode AMD K8 (Opteron/Athlon64/AthlonFX) and - Intel P4 (Xeon and Pentium 4) events - * add --ascii option to decode machine check panic information -- Rewrite description in .spec file -* Wed Jun 09 2004 ak@suse.de -- memlog-0.2: - * fix mcelog looping (#41863) - * Add GPL notices -* Thu Mar 25 2004 sf@suse.de -- initial version -- fixes #36898