commit 466825ae5fc33000e90896d4fa12463353137619afd4df6904f77dbcdf2980c8 Author: Adrian Schröter Date: Fri May 3 16:43:37 2024 +0200 Sync from SUSE:SLFO:Main mcelog revision aeb019e3941bfe1678e679fdfa2df45f diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..9b03811 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,23 @@ +## Default LFS +*.7z filter=lfs diff=lfs merge=lfs -text +*.bsp filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.gem filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.jar filter=lfs diff=lfs merge=lfs -text +*.lz filter=lfs diff=lfs merge=lfs -text +*.lzma filter=lfs diff=lfs merge=lfs -text +*.obscpio filter=lfs diff=lfs merge=lfs -text +*.oxt filter=lfs diff=lfs merge=lfs -text +*.pdf filter=lfs diff=lfs merge=lfs -text +*.png filter=lfs diff=lfs merge=lfs -text +*.rpm filter=lfs diff=lfs merge=lfs -text +*.tbz filter=lfs diff=lfs merge=lfs -text +*.tbz2 filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.ttf filter=lfs diff=lfs merge=lfs -text +*.txz filter=lfs diff=lfs merge=lfs -text +*.whl filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text diff --git a/README.email_setup b/README.email_setup new file mode 100644 index 0000000..5002cba --- /dev/null +++ b/README.email_setup @@ -0,0 +1,78 @@ +MACHINE CHECK EXCPETION NOTIFICATION VIA EMAIL +============================================== + +(C)opyright by Thomas Renninger Novell Inc. 2010 + +The setup to send Machine Check Exceptions (MCEs) via email relies on a +working smtp server listening on localhost on port 25. + +How this can easily be configured is can be read up here: +http://en.opensuse.org/Mail_server_HOWTO +in the "Outgoing" section. + +Test your setup by trying to send test mails via the "mail" shell command, +included in the mailx package. + +Specify the email address where the MCEs should get mailed to here: +/etc/sysconfig/mcelog + +You can filter MCE mails by matching against these mail headers. +Either one of these headers are set: + - X-Mcelog-Uncorrectable + - X-Mcelog-Correctable + +and one of these are set: + - X-Mcelog-Memory + - X-Mcelog-CPU + - X-Mcelog-Misc + + +NOTE: If broken HW results in an MCE storm of dozens and hundreds of MCEs, +mcelog will not sending them all to not overload the machine and network +traffic. If in doubt, check the local mcelog log files. + + +Autoyast +-------- + +For people making use of autoyast to spread similar installations on multiple +machines, here are some hints how to set up the email notification through +autoyast. Please read the autoyast documentation first if you are not familiar +with how to create an autoyast.xml file. + +This simply sets the email address, notifications should get send to: + + + + MCELOG_ADMIN_EMAIL + /etc/sysconfig/mcelog + trenn@suse.de + + + + +This is an example of how to set up postfix to listen on localhost and +sending/forwarding all mails coming in there through the smtp server +relay.suse.de. +The alias at the beginning forwards local machine notifications sent to root, +to trenn@suse.de. Like that mails interesting for the administrator can easily +be collected and sent to one email address. But this is just one possible mail +set up example. + + + + + root + trenn@suse.de + + + permanent + false + + suse.de + + postfix + relay.suse.de + local + false + diff --git a/Start-consolidating-AMD-specific-stuff.patch b/Start-consolidating-AMD-specific-stuff.patch new file mode 100644 index 0000000..bd13015 --- /dev/null +++ b/Start-consolidating-AMD-specific-stuff.patch @@ -0,0 +1,731 @@ +From 4388981628ad9e2daba956210284017e1133cb99 Mon Sep 17 00:00:00 2001 +From: Borislav Petkov +Date: Wed, 7 May 2014 22:41:15 +0200 +Subject: [PATCH] Start consolidating AMD-specific stuff + +... in order to concentrate decoding for all families in amd.[ch]. Pass +down cpu type in decode_amd_mc. + +Signed-off-by: Borislav Petkov +--- + Makefile | 2 + amd.c | 282 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + amd.h | 14 +++ + k8.c | 281 -------------------------------------------------------------- + k8.h | 11 -- + mcelog.c | 8 - + 6 files changed, 301 insertions(+), 297 deletions(-) + rename k8.c => amd.c (97%) + rename k8.h => amd.h (79%) + +Index: mcelog-189/Makefile +=================================================================== +--- mcelog-189.orig/Makefile ++++ mcelog-189/Makefile +@@ -31,7 +31,7 @@ all: mcelog + + .PHONY: install install-nodoc clean depend FORCE + +-OBJ := p4.o k8.o mcelog.o dmi.o tsc.o core2.o bitfield.o intel.o \ ++OBJ := p4.o amd.o mcelog.o dmi.o tsc.o core2.o bitfield.o intel.o \ + nehalem.o dunnington.o tulsa.o config.o memutil.o msg.o \ + eventloop.o leaky-bucket.o memdb.o server.o trigger.o \ + client.o cache.o sysfs.o yellow.o page.o rbtree.o \ +Index: mcelog-189/amd.c +=================================================================== +--- /dev/null ++++ mcelog-189/amd.c +@@ -0,0 +1,282 @@ ++/* Based on K8 decoding code written for the 2.4 kernel by Andi Kleen and ++ * Eric Morton. Hacked and extended for mcelog by AK. ++ * Extended to support all AMD families by Borislav Petkov, SUSE Labs. ++ * ++ * Original copyright: ++ * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs. ++ * Additional K8 decoding and simplification Copyright 2003 Eric Morton, Newisys Inc ++ * K8 threshold counters decoding Copyright 2005,2006 Jacob Shin, AMD Inc. ++ * ++ * Subject to the GNU General Public License ++ */ ++ ++#include ++#include "mcelog.h" ++#include "amd.h" ++ ++static char *k8bank[] = { ++ "data cache", ++ "instruction cache", ++ "bus unit", ++ "load/store unit", ++ "northbridge", ++ "fixed-issue reoder" ++}; ++static char *transaction[] = { ++ "instruction", "data", "generic", "reserved" ++}; ++static char *cachelevel[] = { ++ "0", "1", "2", "generic" ++}; ++static char *memtrans[] = { ++ "generic error", "generic read", "generic write", "data read", ++ "data write", "instruction fetch", "prefetch", "evict", "snoop", ++ "?", "?", "?", "?", "?", "?", "?" ++}; ++static char *partproc[] = { ++ "local node origin", "local node response", ++ "local node observed", "generic participation" ++}; ++static char *timeout[] = { ++ "request didn't time out", ++ "request timed out" ++}; ++static char *memoryio[] = { ++ "memory", "res.", "i/o", "generic" ++}; ++static char *nbextendederr[] = { ++ "RAM ECC error", ++ "CRC error", ++ "Sync error", ++ "Master abort", ++ "Target abort", ++ "GART error", ++ "RMW error", ++ "Watchdog error", ++ "RAM Chipkill ECC error", ++ "DEV Error", ++ "Link Data Error", ++ "Link Protocol Error", ++ "NB Array Error", ++ "DRAM Parity Error", ++ "Link Retry", ++ "Tablew Walk Data Error", ++ "L3 Cache Data Error", ++ "L3 Cache Tag Error", ++ "L3 Cache LRU Error" ++}; ++static char *highbits[32] = { ++ [31] = "valid", ++ [30] = "error overflow (multiple errors)", ++ [29] = "error uncorrected", ++ [28] = "error enable", ++ [27] = "misc error valid", ++ [26] = "error address valid", ++ [25] = "processor context corrupt", ++ [24] = "res24", ++ [23] = "res23", ++ /* 22-15 ecc syndrome bits */ ++ [14] = "corrected ecc error", ++ [13] = "uncorrected ecc error", ++ [12] = "res12", ++ [11] = "L3 subcache in error bit 1", ++ [10] = "L3 subcache in error bit 0", ++ [9] = "sublink or DRAM channel", ++ [8] = "error found by scrub", ++ /* 7-4 ht link number of error */ ++ [3] = "err cpu3", ++ [2] = "err cpu2", ++ [1] = "err cpu1", ++ [0] = "err cpu0", ++}; ++static char *k8threshold[] = { ++ [0 ... K8_MCELOG_THRESHOLD_DRAM_ECC - 1] = "Unknow threshold counter", ++ [K8_MCELOG_THRESHOLD_DRAM_ECC] = "MC4_MISC0 DRAM threshold", ++ [K8_MCELOG_THRESHOLD_LINK] = "MC4_MISC1 Link threshold", ++ [K8_MCELOG_THRESHOLD_L3_CACHE] = "MC4_MISC2 L3 Cache threshold", ++ [K8_MCELOG_THRESHOLD_FBDIMM] = "MC4_MISC3 FBDIMM threshold", ++ [K8_MCELOG_THRESHOLD_FBDIMM + 1 ... ++ K8_MCE_THRESHOLD_TOP - K8_MCE_THRESHOLD_BASE - 1] = ++ "Unknown threshold counter", ++}; ++ ++ ++static void decode_k8_generic_errcode(u64 status) ++{ ++ unsigned short errcode = status & 0xffff; ++ int i; ++ ++ for (i=0; i<32; i++) { ++ if (i==31 || i==28 || i==26) ++ continue; ++ if (highbits[i] && (status & (1ULL<<(i+32)))) { ++ Wprintf( " bit%d = %s\n", i+32, highbits[i]); ++ } ++ } ++ ++ if ((errcode & 0xFFF0) == 0x0010) { ++ Wprintf( " TLB error '%s transaction, level %s'\n", ++ transaction[(errcode >> 2) & 3], ++ cachelevel[errcode & 3]); ++ } ++ else if ((errcode & 0xFF00) == 0x0100) { ++ Wprintf( " memory/cache error '%s mem transaction, %s transaction, level %s'\n", ++ memtrans[(errcode >> 4) & 0xf], ++ transaction[(errcode >> 2) & 3], ++ cachelevel[errcode & 3]); ++ } ++ else if ((errcode & 0xF800) == 0x0800) { ++ Wprintf( " bus error '%s, %s\n %s mem transaction\n %s access, level %s'\n", ++ partproc[(errcode >> 9) & 0x3], ++ timeout[(errcode >> 8) & 1], ++ memtrans[(errcode >> 4) & 0xf], ++ memoryio[(errcode >> 2) & 0x3], ++ cachelevel[(errcode & 0x3)]); ++ } ++} ++ ++static void decode_k8_dc_mc(u64 status, int *err) ++{ ++ unsigned short exterrcode = (status >> 16) & 0x0f; ++ unsigned short errcode = status & 0xffff; ++ ++ if(status&(3ULL<<45)) { ++ Wprintf( " Data cache ECC error (syndrome %x)", ++ (u32) (status >> 47) & 0xff); ++ if(status&(1ULL<<40)) { ++ Wprintf(" found by scrubber"); ++ } ++ Wprintf("\n"); ++ } ++ ++ if ((errcode & 0xFFF0) == 0x0010) { ++ Wprintf( " TLB parity error in %s array\n", ++ (exterrcode == 0) ? "physical" : "virtual"); ++ } ++ ++ decode_k8_generic_errcode(status); ++} ++ ++static void decode_k8_ic_mc(u64 status, int *err) ++{ ++ unsigned short exterrcode = (status >> 16) & 0x0f; ++ unsigned short errcode = status & 0xffff; ++ ++ if(status&(3ULL<<45)) { ++ Wprintf(" Instruction cache ECC error\n"); ++ } ++ ++ if ((errcode & 0xFFF0) == 0x0010) { ++ Wprintf(" TLB parity error in %s array\n", ++ (exterrcode == 0) ? "physical" : "virtual"); ++ } ++ ++ decode_k8_generic_errcode(status); ++} ++ ++static void decode_k8_bu_mc(u64 status, int *err) ++{ ++ unsigned short exterrcode = (status >> 16) & 0x0f; ++ ++ if(status&(3ULL<<45)) { ++ Wprintf(" L2 cache ECC error\n"); ++ } ++ ++ Wprintf(" %s array error\n", ++ (exterrcode == 0) ? "Bus or cache" : "Cache tag"); ++ ++ decode_k8_generic_errcode(status); ++} ++ ++static void decode_k8_ls_mc(u64 status, int *err) ++{ ++ decode_k8_generic_errcode(status); ++} ++ ++static void decode_k8_nb_mc(u64 status, int *memerr) ++{ ++ unsigned short exterrcode = (status >> 16) & 0x0f; ++ ++ Wprintf(" Northbridge %s\n", nbextendederr[exterrcode]); ++ ++ switch (exterrcode) { ++ case 0: ++ *memerr = 1; ++ Wprintf(" ECC syndrome = %x\n", ++ (u32) (status >> 47) & 0xff); ++ break; ++ case 8: ++ *memerr = 1; ++ Wprintf(" Chipkill ECC syndrome = %x\n", ++ (u32) ((((status >> 24) & 0xff) << 8) | ((status >> 47) & 0xff))); ++ break; ++ case 1: ++ case 2: ++ case 3: ++ case 4: ++ case 6: ++ Wprintf(" link number = %x\n", ++ (u32) (status >> 36) & 0xf); ++ break; ++ } ++ ++ decode_k8_generic_errcode(status); ++} ++ ++static void decode_k8_fr_mc(u64 status, int *err) ++{ ++ decode_k8_generic_errcode(status); ++} ++ ++static void decode_k8_threshold(u64 misc) ++{ ++ if (misc & MCI_THRESHOLD_OVER) ++ Wprintf(" Threshold error count overflow\n"); ++} ++ ++typedef void (*decoder_t)(u64, int *ismemerr); ++ ++static decoder_t decoders[] = { ++ [0] = decode_k8_dc_mc, ++ [1] = decode_k8_ic_mc, ++ [2] = decode_k8_bu_mc, ++ [3] = decode_k8_ls_mc, ++ [4] = decode_k8_nb_mc, ++ [5] = decode_k8_fr_mc, ++}; ++ ++void decode_amd_mc(enum cputype cpu, struct mce *mce, int *ismemerr) ++{ ++ if (mce->bank < NELE(decoders)) ++ decoders[mce->bank](mce->status, ismemerr); ++ else if (mce->bank >= K8_MCE_THRESHOLD_BASE && ++ mce->bank < K8_MCE_THRESHOLD_TOP) ++ decode_k8_threshold(mce->misc); ++ else ++ Wprintf(" no decoder for unknown bank %u\n", mce->bank); ++} ++ ++char *k8_bank_name(unsigned num) ++{ ++ static char buf[64]; ++ char *s = "unknown"; ++ if (num < NELE(k8bank)) ++ s = k8bank[num]; ++ else if (num >= K8_MCE_THRESHOLD_BASE && ++ num < K8_MCE_THRESHOLD_TOP) ++ s = k8threshold[num - K8_MCE_THRESHOLD_BASE]; ++ buf[sizeof(buf)-1] = 0; ++ snprintf(buf, sizeof(buf) - 1, "%u %s", num, s); ++ return buf; ++} ++ ++int mce_filter_k8(struct mce *m) ++{ ++ /* Filter out GART errors */ ++ if (m->bank == 4) { ++ unsigned short exterrcode = (m->status >> 16) & 0x0f; ++ if (exterrcode == 5 && (m->status & (1ULL<<61))) ++ return 0; ++ } ++ return 1; ++} +Index: mcelog-189/amd.h +=================================================================== +--- /dev/null ++++ mcelog-189/amd.h +@@ -0,0 +1,80 @@ ++char *k8_bank_name(unsigned num); ++void decode_amd_mc(enum cputype, struct mce *mce, int *ismemerr); ++int mce_filter_k8(struct mce *m); ++ ++#define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1) /* MCE_AMD */ ++#define K8_MCE_THRESHOLD_TOP (K8_MCE_THRESHOLD_BASE + 6 * 9) ++ ++#define K8_MCELOG_THRESHOLD_DRAM_ECC (4 * 9 + 0) ++#define K8_MCELOG_THRESHOLD_LINK (4 * 9 + 1) ++#define K8_MCELOG_THRESHOLD_L3_CACHE (4 * 9 + 2) ++#define K8_MCELOG_THRESHOLD_FBDIMM (4 * 9 + 3) ++ ++#define EC(x) ((x) & 0xffff) ++#define XEC(x, mask) (((x) >> 16) & mask) ++ ++#define LOW_SYNDROME(x) (((x) >> 15) & 0xff) ++#define HIGH_SYNDROME(x) (((x) >> 24) & 0xff) ++ ++#define TLB_ERROR(x) (((x) & 0xFFF0) == 0x0010) ++#define MEM_ERROR(x) (((x) & 0xFF00) == 0x0100) ++#define BUS_ERROR(x) (((x) & 0xF800) == 0x0800) ++#define INT_ERROR(x) (((x) & 0xF4FF) == 0x0400) ++ ++#define TT(x) (((x) >> 2) & 0x3) ++#define TT_MSG(x) tt_msgs[TT(x)] ++#define II(x) (((x) >> 2) & 0x3) ++#define II_MSG(x) ii_msgs[II(x)] ++#define LL(x) ((x) & 0x3) ++#define LL_MSG(x) ll_msgs[LL(x)] ++#define TO(x) (((x) >> 8) & 0x1) ++#define TO_MSG(x) to_msgs[TO(x)] ++#define PP(x) (((x) >> 9) & 0x3) ++#define PP_MSG(x) pp_msgs[PP(x)] ++#define UU(x) (((x) >> 8) & 0x3) ++#define UU_MSG(x) uu_msgs[UU(x)] ++ ++#define R4(x) (((x) >> 4) & 0xf) ++#define R4_MSG(x) ((R4(x) < 9) ? rrrr_msgs[R4(x)] : "Wrong R4!") ++ ++enum tt_ids { ++ TT_INSTR = 0, ++ TT_DATA, ++ TT_GEN, ++ TT_RESV, ++}; ++ ++enum ll_ids { ++ LL_RESV = 0, ++ LL_L1, ++ LL_L2, ++ LL_LG, ++}; ++ ++enum ii_ids { ++ II_MEM = 0, ++ II_RESV, ++ II_IO, ++ II_GEN, ++}; ++ ++enum rrrr_ids { ++ R4_GEN = 0, ++ R4_RD, ++ R4_WR, ++ R4_DRD, ++ R4_DWR, ++ R4_IRD, ++ R4_PREF, ++ R4_EVICT, ++ R4_SNOOP, ++}; ++ ++#define CASE_AMD_CPUS \ ++ (cputype == CPU_K8 || \ ++ cputype == CPU_F10H || \ ++ cputype == CPU_F11H || \ ++ cputype == CPU_F12H || \ ++ cputype == CPU_F14H || \ ++ cputype == CPU_F15H || \ ++ cputype == CPU_F16H) +Index: mcelog-189/k8.c +=================================================================== +--- mcelog-189.orig/k8.c ++++ /dev/null +@@ -1,281 +0,0 @@ +-/* Based on K8 decoding code written for the 2.4 kernel by Andi Kleen and +- * Eric Morton. Hacked and extended for mcelog by AK. +- * +- * Original copyright: +- * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs. +- * Additional K8 decoding and simplification Copyright 2003 Eric Morton, Newisys Inc +- * K8 threshold counters decoding Copyright 2005,2006 Jacob Shin, AMD Inc. +- * +- * Subject to the GNU General Public License +- */ +- +-#include +-#include "mcelog.h" +-#include "k8.h" +- +-static char *k8bank[] = { +- "data cache", +- "instruction cache", +- "bus unit", +- "load/store unit", +- "northbridge", +- "fixed-issue reoder" +-}; +-static char *transaction[] = { +- "instruction", "data", "generic", "reserved" +-}; +-static char *cachelevel[] = { +- "0", "1", "2", "generic" +-}; +-static char *memtrans[] = { +- "generic error", "generic read", "generic write", "data read", +- "data write", "instruction fetch", "prefetch", "evict", "snoop", +- "?", "?", "?", "?", "?", "?", "?" +-}; +-static char *partproc[] = { +- "local node origin", "local node response", +- "local node observed", "generic participation" +-}; +-static char *timeout[] = { +- "request didn't time out", +- "request timed out" +-}; +-static char *memoryio[] = { +- "memory", "res.", "i/o", "generic" +-}; +-static char *nbextendederr[] = { +- "RAM ECC error", +- "CRC error", +- "Sync error", +- "Master abort", +- "Target abort", +- "GART error", +- "RMW error", +- "Watchdog error", +- "RAM Chipkill ECC error", +- "DEV Error", +- "Link Data Error", +- "Link Protocol Error", +- "NB Array Error", +- "DRAM Parity Error", +- "Link Retry", +- "Tablew Walk Data Error", +- "L3 Cache Data Error", +- "L3 Cache Tag Error", +- "L3 Cache LRU Error" +-}; +-static char *highbits[32] = { +- [31] = "valid", +- [30] = "error overflow (multiple errors)", +- [29] = "error uncorrected", +- [28] = "error enable", +- [27] = "misc error valid", +- [26] = "error address valid", +- [25] = "processor context corrupt", +- [24] = "res24", +- [23] = "res23", +- /* 22-15 ecc syndrome bits */ +- [14] = "corrected ecc error", +- [13] = "uncorrected ecc error", +- [12] = "res12", +- [11] = "L3 subcache in error bit 1", +- [10] = "L3 subcache in error bit 0", +- [9] = "sublink or DRAM channel", +- [8] = "error found by scrub", +- /* 7-4 ht link number of error */ +- [3] = "err cpu3", +- [2] = "err cpu2", +- [1] = "err cpu1", +- [0] = "err cpu0", +-}; +-static char *k8threshold[] = { +- [0 ... K8_MCELOG_THRESHOLD_DRAM_ECC - 1] = "Unknown threshold counter", +- [K8_MCELOG_THRESHOLD_DRAM_ECC] = "MC4_MISC0 DRAM threshold", +- [K8_MCELOG_THRESHOLD_LINK] = "MC4_MISC1 Link threshold", +- [K8_MCELOG_THRESHOLD_L3_CACHE] = "MC4_MISC2 L3 Cache threshold", +- [K8_MCELOG_THRESHOLD_FBDIMM] = "MC4_MISC3 FBDIMM threshold", +- [K8_MCELOG_THRESHOLD_FBDIMM + 1 ... +- K8_MCE_THRESHOLD_TOP - K8_MCE_THRESHOLD_BASE - 1] = +- "Unknown threshold counter", +-}; +- +- +-static void decode_k8_generic_errcode(u64 status) +-{ +- unsigned short errcode = status & 0xffff; +- int i; +- +- for (i=0; i<32; i++) { +- if (i==31 || i==28 || i==26) +- continue; +- if (highbits[i] && (status & (1ULL<<(i+32)))) { +- Wprintf( " bit%d = %s\n", i+32, highbits[i]); +- } +- } +- +- if ((errcode & 0xFFF0) == 0x0010) { +- Wprintf( " TLB error '%s transaction, level %s'\n", +- transaction[(errcode >> 2) & 3], +- cachelevel[errcode & 3]); +- } +- else if ((errcode & 0xFF00) == 0x0100) { +- Wprintf( " memory/cache error '%s mem transaction, %s transaction, level %s'\n", +- memtrans[(errcode >> 4) & 0xf], +- transaction[(errcode >> 2) & 3], +- cachelevel[errcode & 3]); +- } +- else if ((errcode & 0xF800) == 0x0800) { +- Wprintf( " bus error '%s, %s\n %s mem transaction\n %s access, level %s'\n", +- partproc[(errcode >> 9) & 0x3], +- timeout[(errcode >> 8) & 1], +- memtrans[(errcode >> 4) & 0xf], +- memoryio[(errcode >> 2) & 0x3], +- cachelevel[(errcode & 0x3)]); +- } +-} +- +-static void decode_k8_dc_mc(u64 status, int *err) +-{ +- unsigned short exterrcode = (status >> 16) & 0x0f; +- unsigned short errcode = status & 0xffff; +- +- if(status&(3ULL<<45)) { +- Wprintf( " Data cache ECC error (syndrome %x)", +- (u32) (status >> 47) & 0xff); +- if(status&(1ULL<<40)) { +- Wprintf(" found by scrubber"); +- } +- Wprintf("\n"); +- } +- +- if ((errcode & 0xFFF0) == 0x0010) { +- Wprintf( " TLB parity error in %s array\n", +- (exterrcode == 0) ? "physical" : "virtual"); +- } +- +- decode_k8_generic_errcode(status); +-} +- +-static void decode_k8_ic_mc(u64 status, int *err) +-{ +- unsigned short exterrcode = (status >> 16) & 0x0f; +- unsigned short errcode = status & 0xffff; +- +- if(status&(3ULL<<45)) { +- Wprintf(" Instruction cache ECC error\n"); +- } +- +- if ((errcode & 0xFFF0) == 0x0010) { +- Wprintf(" TLB parity error in %s array\n", +- (exterrcode == 0) ? "physical" : "virtual"); +- } +- +- decode_k8_generic_errcode(status); +-} +- +-static void decode_k8_bu_mc(u64 status, int *err) +-{ +- unsigned short exterrcode = (status >> 16) & 0x0f; +- +- if(status&(3ULL<<45)) { +- Wprintf(" L2 cache ECC error\n"); +- } +- +- Wprintf(" %s array error\n", +- (exterrcode == 0) ? "Bus or cache" : "Cache tag"); +- +- decode_k8_generic_errcode(status); +-} +- +-static void decode_k8_ls_mc(u64 status, int *err) +-{ +- decode_k8_generic_errcode(status); +-} +- +-static void decode_k8_nb_mc(u64 status, int *memerr) +-{ +- unsigned short exterrcode = (status >> 16) & 0x0f; +- +- Wprintf(" Northbridge %s\n", nbextendederr[exterrcode]); +- +- switch (exterrcode) { +- case 0: +- *memerr = 1; +- Wprintf(" ECC syndrome = %x\n", +- (u32) (status >> 47) & 0xff); +- break; +- case 8: +- *memerr = 1; +- Wprintf(" Chipkill ECC syndrome = %x\n", +- (u32) ((((status >> 24) & 0xff) << 8) | ((status >> 47) & 0xff))); +- break; +- case 1: +- case 2: +- case 3: +- case 4: +- case 6: +- Wprintf(" link number = %x\n", +- (u32) (status >> 36) & 0xf); +- break; +- } +- +- decode_k8_generic_errcode(status); +-} +- +-static void decode_k8_fr_mc(u64 status, int *err) +-{ +- decode_k8_generic_errcode(status); +-} +- +-static void decode_k8_threshold(u64 misc) +-{ +- if (misc & MCI_THRESHOLD_OVER) +- Wprintf(" Threshold error count overflow\n"); +-} +- +-typedef void (*decoder_t)(u64, int *ismemerr); +- +-static decoder_t decoders[] = { +- [0] = decode_k8_dc_mc, +- [1] = decode_k8_ic_mc, +- [2] = decode_k8_bu_mc, +- [3] = decode_k8_ls_mc, +- [4] = decode_k8_nb_mc, +- [5] = decode_k8_fr_mc, +-}; +- +-void decode_k8_mc(struct mce *mce, int *ismemerr) +-{ +- if (mce->bank < NELE(decoders)) +- decoders[mce->bank](mce->status, ismemerr); +- else if (mce->bank >= K8_MCE_THRESHOLD_BASE && +- mce->bank < K8_MCE_THRESHOLD_TOP) +- decode_k8_threshold(mce->misc); +- else +- Wprintf(" no decoder for unknown bank %u\n", mce->bank); +-} +- +-char *k8_bank_name(unsigned num) +-{ +- static char buf[64]; +- char *s = "unknown"; +- if (num < NELE(k8bank)) +- s = k8bank[num]; +- else if (num >= K8_MCE_THRESHOLD_BASE && +- num < K8_MCE_THRESHOLD_TOP) +- s = k8threshold[num - K8_MCE_THRESHOLD_BASE]; +- buf[sizeof(buf)-1] = 0; +- snprintf(buf, sizeof(buf) - 1, "%u %s", num, s); +- return buf; +-} +- +-int mce_filter_k8(struct mce *m) +-{ +- /* Filter out GART errors */ +- if (m->bank == 4) { +- unsigned short exterrcode = (m->status >> 16) & 0x0f; +- if (exterrcode == 5 && (m->status & (1ULL<<61))) +- return 0; +- } +- return 1; +-} +Index: mcelog-189/k8.h +=================================================================== +--- mcelog-189.orig/k8.h ++++ /dev/null +@@ -1,11 +0,0 @@ +-char *k8_bank_name(unsigned num); +-void decode_k8_mc(struct mce *mce, int *ismemerr); +-int mce_filter_k8(struct mce *m); +- +-#define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1) /* MCE_AMD */ +-#define K8_MCE_THRESHOLD_TOP (K8_MCE_THRESHOLD_BASE + 6 * 9) +- +-#define K8_MCELOG_THRESHOLD_DRAM_ECC (4 * 9 + 0) +-#define K8_MCELOG_THRESHOLD_LINK (4 * 9 + 1) +-#define K8_MCELOG_THRESHOLD_L3_CACHE (4 * 9 + 2) +-#define K8_MCELOG_THRESHOLD_FBDIMM (4 * 9 + 3) +Index: mcelog-189/mcelog.c +=================================================================== +--- mcelog-189.orig/mcelog.c ++++ mcelog-189/mcelog.c +@@ -41,7 +41,7 @@ + #include + #include "mcelog.h" + #include "paths.h" +-#include "k8.h" ++#include "amd.h" + #include "intel.h" + #include "p4.h" + #include "dmi.h" +@@ -346,8 +346,8 @@ static void dump_mce(struct mce *m, unsi + time_t t = m->time; + Wprintf("TIME %llu %s", m->time, ctime(&t)); + } +- if (cputype == CPU_K8) +- decode_k8_mc(m, &ismemerr); ++ if CASE_AMD_CPUS ++ decode_amd_mc(m, &ismemerr); + else if (cputype >= CPU_INTEL) + decode_intel_mc(m, cputype, &ismemerr, recordlen); + /* else add handlers for other CPUs here */ diff --git a/_service b/_service new file mode 100644 index 0000000..1235af1 --- /dev/null +++ b/_service @@ -0,0 +1,15 @@ + + + git + https://git.kernel.org/pub/scm/utils/cpu/mce/mcelog.git + enable + v(.*) + @PARENT_TAG@ + + + + + *.tar + gz + + diff --git a/_servicedata b/_servicedata new file mode 100644 index 0000000..0a174c1 --- /dev/null +++ b/_servicedata @@ -0,0 +1,10 @@ + + + https://github.com/andikleen/mcelog + ee90ff20ce6a4d5e016aa249ce8b37f359f9fda4 + git://git.kernel.org/pub/scm/utils/cpu/mce/mcelog.git + 04d51981e8805c4200f5a03b4216c8621bc52ace + https://github.com/andikleen/mcelog.git + 1f3a769c8fb736815a56ea104b7b751c5565cb88 + https://git.kernel.org/pub/scm/utils/cpu/mce/mcelog.git + edfe78a0dc54a940f4916a9bd681eab7b3f746d1 \ No newline at end of file diff --git a/add-f10h-support.patch b/add-f10h-support.patch new file mode 100644 index 0000000..c4cddbc --- /dev/null +++ b/add-f10h-support.patch @@ -0,0 +1,683 @@ +Add F10h decoding support + +Signed-off-by: Borislav Petkov +--- + amd.c | 488 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---- + amd.h | 42 ++++- + mcelog.c | 26 +-- + mcelog.h | 1 + 4 files changed, 506 insertions(+), 51 deletions(-) + +Index: mcelog-189/amd.c +=================================================================== +--- mcelog-189.orig/amd.c ++++ mcelog-189/amd.c +@@ -14,7 +14,7 @@ + #include "mcelog.h" + #include "amd.h" + +-static char *k8bank[] = { ++static const char * const k8bank[] = { + "data cache", + "instruction cache", + "bus unit", +@@ -22,28 +22,34 @@ static char *k8bank[] = { + "northbridge", + "fixed-issue reoder" + }; +-static char *transaction[] = { ++static const char * const transaction[] = { + "instruction", "data", "generic", "reserved" +-}; +-static char *cachelevel[] = { ++}; ++static const char * const cachelevel[] = { + "0", "1", "2", "generic" + }; +-static char *memtrans[] = { ++static const char * const memtrans[] = { + "generic error", "generic read", "generic write", "data read", + "data write", "instruction fetch", "prefetch", "evict", "snoop", + "?", "?", "?", "?", "?", "?", "?" + }; +-static char *partproc[] = { +- "local node origin", "local node response", +- "local node observed", "generic participation" ++static const char * const partproc[] = { ++ "local node origin", ++ "local node response", ++ "local node observed", ++ "generic participation" + }; +-static char *timeout[] = { ++static const char * const timeout[] = { + "request didn't time out", + "request timed out" + }; +-static char *memoryio[] = { ++static const char * const memoryio[] = { + "memory", "res.", "i/o", "generic" + }; ++ ++/* internal error type */ ++static const char * const uu_msgs[] = { "RESV", "RESV", "HWA", "RESV" }; ++ + static char *nbextendederr[] = { + "RAM ECC error", + "CRC error", +@@ -65,6 +71,46 @@ static char *nbextendederr[] = { + "L3 Cache Tag Error", + "L3 Cache LRU Error" + }; ++ ++static const char * const mc4_mce_desc[] = { ++ "DRAM ECC error detected on the NB", ++ "CRC error detected on HT link", ++ "Link-defined sync error packets detected on HT link", ++ "HT Master abort", ++ "HT Target abort", ++ "Invalid GART PTE entry during GART table walk", ++ "Unsupported atomic RMW received from an IO link", ++ "Watchdog timeout due to lack of progress", ++ "DRAM ECC error detected on the NB", ++ "SVM DMA Exclusion Vector error", ++ "HT data error detected on link", ++ "Protocol error (link, L3, probe filter)", ++ "NB internal arrays parity error", ++ "DRAM addr/ctl signals parity error", ++ "IO link transmission error", ++ "L3 data cache ECC error", /* xec = 0x1c */ ++ "L3 cache tag error", ++ "L3 LRU parity bits error", ++ "ECC Error in the Probe Filter directory" ++}; ++ ++static const char * const mc5_mce_desc[] = { ++ "CPU Watchdog timer expire", ++ "Wakeup array dest tag", ++ "AG payload array", ++ "EX payload array", ++ "IDRF array", ++ "Retire dispatch queue", ++ "Mapper checkpoint array", ++ "Physical register file EX0 port", ++ "Physical register file EX1 port", ++ "Physical register file AG0 port", ++ "Physical register file AG1 port", ++ "Flag register file", ++ "DE error occurred", ++ "Retire status queue" ++}; ++ + static char *highbits[32] = { + [31] = "valid", + [30] = "error overflow (multiple errors)", +@@ -100,6 +146,21 @@ static char *k8threshold[] = { + "Unknown threshold counter", + }; + ++static u8 xec_mask = 0xf; ++ ++enum cputype select_amd_cputype(u32 family) ++{ ++ switch (family) { ++ case 0xf: ++ return CPU_K8; ++ case 0x10: ++ return CPU_F10H; ++ default: ++ break; ++ } ++ ++ return CPU_GENERIC; ++} + + static void decode_k8_generic_errcode(u64 status) + { +@@ -245,21 +306,393 @@ static decoder_t decoders[] = { + [5] = decode_k8_fr_mc, + }; + +-void decode_amd_mc(enum cputype cpu, struct mce *mce, int *ismemerr) ++static bool k8_mc1_mce(u16 ec, u8 xec) ++{ ++ u8 ll = LL(ec); ++ bool ret = true; ++ ++ if (!MEM_ERROR(ec)) ++ return false; ++ ++ if (ll == 0x2) ++ Wprintf("during a linefill from L2.\n"); ++ else if (ll == 0x1) { ++ switch (R4(ec)) { ++ case R4_IRD: ++ Wprintf("Parity error during data load.\n"); ++ break; ++ ++ case R4_EVICT: ++ Wprintf("Copyback Parity/Victim error.\n"); ++ break; ++ ++ case R4_SNOOP: ++ Wprintf("Tag Snoop error.\n"); ++ break; ++ ++ default: ++ ret = false; ++ break; ++ } ++ } else ++ ret = false; ++ ++ return ret; ++} ++ ++static bool f12h_mc0_mce(u16 ec, u8 xec) ++{ ++ bool ret = false; ++ ++ if (MEM_ERROR(ec)) { ++ u8 ll = LL(ec); ++ ret = true; ++ ++ if (ll == LL_L2) ++ Wprintf("aduring L1 linefill from L2.\n"); ++ else if (ll == LL_L1) ++ Wprintf("Data/Tag %s error.\n", R4_MSG(ec)); ++ else ++ ret = false; ++ } ++ return ret; ++} ++ ++static bool f10h_mc0_mce(u16 ec, u8 xec) ++{ ++ if (R4(ec) == R4_GEN && LL(ec) == LL_L1) { ++ Wprintf("during data scrub.\n"); ++ return true; ++ } ++ return f12h_mc0_mce(ec, xec); ++} ++ ++static void decode_mc0_mce(struct amd_decoder_ops *ops, struct mce *m) ++{ ++ u16 ec = EC(m->status); ++ u8 xec = XEC(m->status, xec_mask); ++ ++ Wprintf(" MC0 Error: "); ++ ++ /* TLB error signatures are the same across families */ ++ if (TLB_ERROR(ec)) { ++ if (TT(ec) == TT_DATA) { ++ Wprintf("%s TLB %s.\n", LL_MSG(ec), ++ ((xec == 2) ? "locked miss" ++ : (xec ? "multimatch" : "parity"))); ++ return; ++ } ++ } else if (ops->mc0_mce(ec, xec)) ++ ; ++ else ++ Eprintf("Corrupted MC0 MCE info?\n"); ++} ++ ++static void decode_mc1_mce(struct amd_decoder_ops *ops, struct mce *m) + { +- if (mce->bank < NELE(decoders)) +- decoders[mce->bank](mce->status, ismemerr); +- else if (mce->bank >= K8_MCE_THRESHOLD_BASE && +- mce->bank < K8_MCE_THRESHOLD_TOP) +- decode_k8_threshold(mce->misc); ++ u16 ec = EC(m->status); ++ u8 xec = XEC(m->status, xec_mask); ++ ++ Wprintf(" MC1 Error: "); ++ ++ if (TLB_ERROR(ec)) ++ Wprintf("%s TLB %s.\n", LL_MSG(ec), ++ (xec ? "multimatch" : "parity error")); ++ else if (BUS_ERROR(ec)) { ++ bool k8 = ((ops->cpu == AMD_K8) && (m->status & BIT_64(58))); ++ ++ Wprintf("during %s.\n", (k8 ? "system linefill" : "NB data read")); ++ } else if (ops->mc1_mce(ec, xec)) ++ ; + else +- Wprintf(" no decoder for unknown bank %u\n", mce->bank); ++ Eprintf("Corrupted MC1 MCE info?\n"); ++} ++ ++static bool k8_mc2_mce(u16 ec, u8 xec) ++{ ++ bool ret = true; ++ ++ if (xec == 0x1) ++ Wprintf(" in the write data buffers.\n"); ++ else if (xec == 0x3) ++ Wprintf(" in the victim data buffers.\n"); ++ else if (xec == 0x2 && MEM_ERROR(ec)) ++ Wprintf(": %s error in the L2 cache tags.\n", R4_MSG(ec)); ++ else if (xec == 0x0) { ++ if (TLB_ERROR(ec)) ++ Wprintf(": %s error in a Page Descriptor Cache or " ++ "Guest TLB.\n", TT_MSG(ec)); ++ else if (BUS_ERROR(ec)) ++ Wprintf(": %s/ECC error in data read from NB: %s.\n", ++ R4_MSG(ec), PP_MSG(ec)); ++ else if (MEM_ERROR(ec)) { ++ u8 r4 = R4(ec); ++ ++ if (r4 >= 0x7) ++ Wprintf(": %s error during data copyback.\n", ++ R4_MSG(ec)); ++ else if (r4 <= 0x1) ++ Wprintf(": %s parity/ECC error during data " ++ "access from L2.\n", R4_MSG(ec)); ++ else ++ ret = false; ++ } else ++ ret = false; ++ } else ++ ret = false; ++ ++ return ret; ++} ++ ++static void decode_mc2_mce(struct amd_decoder_ops *ops, struct mce *m) ++{ ++ u16 ec = EC(m->status); ++ u8 xec = XEC(m->status, xec_mask); ++ ++ Wprintf(" MC2 Error: "); ++ ++ if (!ops->mc2_mce(ec, xec)) ++ Eprintf("Corrupted MC2 MCE info?\n"); ++} ++ ++static void decode_mc3_mce(struct amd_decoder_ops *ops, struct mce *m) ++{ ++ u16 ec = EC(m->status); ++ u8 xec = XEC(m->status, xec_mask); ++ ++ if (ops->cpu >= AMD_F14H) { ++ Eprintf("You shouldn't be seeing MC3 MCE on this cpu family," ++ " please report on LKML.\n"); ++ return; ++ } ++ ++ Wprintf(" MC3 Error"); ++ ++ if (xec == 0x0) { ++ u8 r4 = R4(ec); ++ ++ if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR)) ++ goto wrong_mc3_mce; ++ ++ Wprintf(" during %s.\n", R4_MSG(ec)); ++ } else ++ goto wrong_mc3_mce; ++ ++ return; ++ ++wrong_mc3_mce: ++ Eprintf("Corrupted MC3 MCE info?\n"); ++} ++ ++static void decode_mc4_mce(struct amd_decoder_ops *ops, struct mce *m) ++{ ++ u16 ec = EC(m->status); ++ u8 xec = XEC(m->status, 0x1f); ++ u8 offset = 0; ++ ++ Wprintf(" MC4 Error: "); ++ ++ switch (xec) { ++ case 0x0 ... 0xe: ++ ++ /* special handling for DRAM ECCs */ ++ if (xec == 0x0 || xec == 0x8) { ++ /* no ECCs on F11h */ ++ if (ops->cpu == AMD_F11H) ++ goto wrong_mc4_mce; ++ ++ Wprintf("%s.\n", mc4_mce_desc[xec]); ++ return; ++ } ++ break; ++ ++ case 0xf: ++ if (TLB_ERROR(ec)) ++ Wprintf("GART Table Walk data error.\n"); ++ else if (BUS_ERROR(ec)) ++ Wprintf("DMA Exclusion Vector Table Walk error.\n"); ++ else ++ goto wrong_mc4_mce; ++ return; ++ ++ case 0x19: ++ if (ops->cpu >= AMD_F15H || ops->cpu <= AMD_F16H) ++ Wprintf("Compute Unit Data Error.\n"); ++ else ++ goto wrong_mc4_mce; ++ return; ++ ++ case 0x1c ... 0x1f: ++ offset = 13; ++ break; ++ ++ default: ++ goto wrong_mc4_mce; ++ } ++ ++ Wprintf("%s.\n", mc4_mce_desc[xec - offset]); ++ return; ++ ++ wrong_mc4_mce: ++ Eprintf("Corrupted MC4 MCE info?\n"); ++} ++ ++static void decode_mc5_mce(struct amd_decoder_ops *ops, struct mce *m) ++{ ++ u8 xec = XEC(m->status, xec_mask); ++ ++ if (ops->cpu == AMD_K8 || ops->cpu == AMD_F11H) ++ goto wrong_mc5_mce; ++ ++ Wprintf(" MC5 Error: "); ++ ++ if (xec == 0x0 || xec == 0xc) ++ Wprintf("%s.\n", mc5_mce_desc[xec]); ++ else if (xec <= 0xd) ++ Wprintf("%s parity error.\n", mc5_mce_desc[xec]); ++ else ++ goto wrong_mc5_mce; ++ ++ return; ++ ++ wrong_mc5_mce: ++ Eprintf("Corrupted MC5 MCE info?\n"); ++} ++ ++static void decode_mc6_mce(struct mce *m) ++{ ++ u8 xec = XEC(m->status, xec_mask); ++ ++ Wprintf(" MC6 Error: "); ++ ++ switch (xec) { ++ case 0x1: ++ Wprintf("Free List"); ++ break; ++ ++ case 0x2: ++ Wprintf("Physical Register File"); ++ break; ++ ++ case 0x3: ++ Wprintf("Retire Queue"); ++ break; ++ ++ case 0x4: ++ Wprintf("Scheduler table"); ++ break; ++ ++ case 0x5: ++ Wprintf("Status Register File"); ++ break; ++ ++ default: ++ goto wrong_mc6_mce; ++ break; ++ } ++ ++ Wprintf(" parity error.\n"); ++ ++ return; ++ ++ wrong_mc6_mce: ++ Eprintf("Corrupted MC6 MCE info?\n"); ++} ++ ++static inline void amd_decode_err_code(u16 ec) ++{ ++ if (INT_ERROR(ec)) { ++ Wprintf(" internal: %s\n", UU_MSG(ec)); ++ return; ++ } ++ ++ Wprintf(" cache level: %s", LL_MSG(ec)); ++ ++ if (BUS_ERROR(ec)) ++ Wprintf(", mem/io: %s", II_MSG(ec)); ++ else ++ Wprintf(", tx: %s", TT_MSG(ec)); ++ ++ if (MEM_ERROR(ec) || BUS_ERROR(ec)) { ++ Wprintf(", mem-tx: %s", R4_MSG(ec)); ++ ++ if (BUS_ERROR(ec)) ++ Wprintf(", part-proc: %s (%s)", PP_MSG(ec), TO_MSG(ec)); ++ } ++ ++ Wprintf("\n"); ++} ++ ++struct amd_decoder_ops fam_ops[] = { ++ [AMD_F10H] = { ++ .cpu = AMD_F10H, ++ .mc0_mce = f10h_mc0_mce, ++ .mc1_mce = k8_mc1_mce, ++ .mc2_mce = k8_mc2_mce, ++ }, ++}; ++ ++static void __decode_amd_mc(enum cputype cpu, struct mce *mce) ++{ ++ struct amd_decoder_ops *ops; ++ ++ switch (cpu) { ++ case CPU_F10H: ++ ops = &fam_ops[AMD_F10H]; ++ break; ++ default: ++ Eprintf("Huh? What family is it: 0x%x?!\n", cpu); ++ return; ++ break; ++ } ++ ++ switch (mce->bank) { ++ case 0: ++ decode_mc0_mce(ops, mce); ++ break; ++ case 1: ++ decode_mc1_mce(ops, mce); ++ break; ++ case 2: ++ decode_mc2_mce(ops, mce); ++ break; ++ case 3: ++ decode_mc3_mce(ops, mce); ++ break; ++ case 4: ++ decode_mc4_mce(ops, mce); ++ break; ++ case 5: ++ decode_mc5_mce(ops, mce); ++ break; ++ case 6: ++ decode_mc6_mce(mce); ++ break; ++ ++ default: ++ break; ++ } ++ amd_decode_err_code(mce->status & 0xffff); ++} ++ ++void decode_amd_mc(enum cputype cpu, struct mce *mce, int *ismemerr) ++{ ++ if (cpu == CPU_K8) { ++ if (mce->bank < NELE(decoders)) ++ decoders[mce->bank](mce->status, ismemerr); ++ else if (mce->bank >= K8_MCE_THRESHOLD_BASE && ++ mce->bank < K8_MCE_THRESHOLD_TOP) ++ decode_k8_threshold(mce->misc); ++ else ++ Wprintf(" no decoder for unknown bank %u\n", mce->bank); ++ } else ++ __decode_amd_mc(cpu, mce); + } + + char *k8_bank_name(unsigned num) + { + static char buf[64]; +- char *s = "unknown"; ++ const char *s = "unknown"; + if (num < NELE(k8bank)) + s = k8bank[num]; + else if (num >= K8_MCE_THRESHOLD_BASE && +@@ -270,13 +703,16 @@ char *k8_bank_name(unsigned num) + return buf; + } + +-int mce_filter_k8(struct mce *m) +-{ +- /* Filter out GART errors */ +- if (m->bank == 4) { +- unsigned short exterrcode = (m->status >> 16) & 0x0f; +- if (exterrcode == 5 && (m->status & (1ULL<<61))) ++int mce_filter_amd(struct mce *m) ++{ ++ /* ++ * NB GART TLB error reporting is disabled by default. ++ */ ++ if (m->bank == 4) { ++ u8 xec = (m->status >> 16) & 0x1f; ++ ++ if (xec == 0x5 && (m->status & BIT_64(61))) + return 0; +- } +- return 1; ++ } ++ return 1; + } +Index: mcelog-189/amd.h +=================================================================== +--- mcelog-189.orig/amd.h ++++ mcelog-189/amd.h +@@ -1,6 +1,25 @@ ++#include ++ + char *k8_bank_name(unsigned num); + void decode_amd_mc(enum cputype, struct mce *mce, int *ismemerr); +-int mce_filter_k8(struct mce *m); ++int mce_filter_amd(struct mce *m); ++enum cputype select_amd_cputype(u32 family); ++ ++enum amdcpu { ++ AMD_K8 = 0, ++ AMD_F10H, ++ AMD_F11H, ++ AMD_F14H, ++ AMD_F15H, ++ AMD_F16H, ++}; ++ ++struct amd_decoder_ops { ++ enum amdcpu cpu; ++ bool (*mc0_mce)(u16, u8); ++ bool (*mc1_mce)(u16, u8); ++ bool (*mc2_mce)(u16, u8); ++}; + + #define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1) /* MCE_AMD */ + #define K8_MCE_THRESHOLD_TOP (K8_MCE_THRESHOLD_BASE + 6 * 9) +@@ -10,6 +29,8 @@ int mce_filter_k8(struct mce *m); + #define K8_MCELOG_THRESHOLD_L3_CACHE (4 * 9 + 2) + #define K8_MCELOG_THRESHOLD_FBDIMM (4 * 9 + 3) + ++#define BIT_64(n) (1ULL << (n)) ++ + #define EC(x) ((x) & 0xffff) + #define XEC(x, mask) (((x) >> 16) & mask) + +@@ -22,20 +43,20 @@ int mce_filter_k8(struct mce *m); + #define INT_ERROR(x) (((x) & 0xF4FF) == 0x0400) + + #define TT(x) (((x) >> 2) & 0x3) +-#define TT_MSG(x) tt_msgs[TT(x)] ++#define TT_MSG(x) transaction[TT(x)] + #define II(x) (((x) >> 2) & 0x3) +-#define II_MSG(x) ii_msgs[II(x)] ++#define II_MSG(x) memoryio[II(x)] + #define LL(x) ((x) & 0x3) +-#define LL_MSG(x) ll_msgs[LL(x)] ++#define LL_MSG(x) cachelevel[LL(x)] + #define TO(x) (((x) >> 8) & 0x1) +-#define TO_MSG(x) to_msgs[TO(x)] ++#define TO_MSG(x) timeout[TO(x)] + #define PP(x) (((x) >> 9) & 0x3) +-#define PP_MSG(x) pp_msgs[PP(x)] ++#define PP_MSG(x) partproc[PP(x)] + #define UU(x) (((x) >> 8) & 0x3) + #define UU_MSG(x) uu_msgs[UU(x)] + + #define R4(x) (((x) >> 4) & 0xf) +-#define R4_MSG(x) ((R4(x) < 9) ? rrrr_msgs[R4(x)] : "Wrong R4!") ++#define R4_MSG(x) ((R4(x) < 9) ? memtrans[R4(x)] : "Wrong R4!") + + enum tt_ids { + TT_INSTR = 0, +Index: mcelog-189/mcelog.c +=================================================================== +--- mcelog-189.orig/mcelog.c ++++ mcelog-189/mcelog.c +@@ -152,8 +152,8 @@ static int mce_filter(struct mce *m, uns + /* Filter out known broken MCEs */ + if (cputype >= CPU_INTEL) + return mce_filter_intel(m, recordlen); +- else if (cputype == CPU_K8) +- return mce_filter_k8(m); ++ else if CASE_AMD_CPUS ++ return mce_filter_amd(m); + + return 1; + } +@@ -283,9 +283,7 @@ static enum cputype setup_cpuid(u32 cpuv + case X86_VENDOR_INTEL: + return select_intel_cputype(family, model); + case X86_VENDOR_AMD: +- if (family >= 15 && family <= 17) +- return CPU_K8; +- /* FALL THROUGH */ ++ return select_amd_cputype(family); + default: + Eprintf("Unknown CPU type vendor %u family %u model %u", + cpuvendor, family, model); +@@ -347,7 +345,7 @@ static void dump_mce(struct mce *m, unsi + Wprintf("TIME %llu %s", m->time, ctime(&t)); + } + if CASE_AMD_CPUS +- decode_amd_mc(m, &ismemerr); ++ decode_amd_mc(cputype, m, &ismemerr); + else if (cputype >= CPU_INTEL) + decode_intel_mc(m, cputype, &ismemerr, recordlen); + /* else add handlers for other CPUs here */ +@@ -463,14 +461,9 @@ int is_cpu_supported(void) + + } + if (seen == ALL) { +- if (!strcmp(vendor,"AuthenticAMD")) { +- if (family == 15) { +- cputype = CPU_K8; +- } else if (family >= 16) { +- Eprintf("ERROR: AMD Processor family %d: mcelog does not support this processor. Please use the edac_mce_amd module instead.\n", family); +- return 0; +- } +- } else if (!strcmp(vendor,"HygonGenuine")) { ++ if (!strcmp(vendor,"AuthenticAMD")) ++ cputype = select_amd_cputype(family); ++ else if (!strcmp(vendor,"HygonGenuine")) { + Eprintf("ERROR: Hygon Processor family %d: mcelog does not support this processor. Please use the edac_mce_amd module instead.\n", family); + return 0; + } else if (!strcmp(vendor,"GenuineIntel")) diff --git a/add-f11h-support.patch b/add-f11h-support.patch new file mode 100644 index 0000000..f78a4c1 --- /dev/null +++ b/add-f11h-support.patch @@ -0,0 +1,63 @@ +Add F11h decoding support + +Signed-off-by: Borislav Petkov +--- + amd.c | 21 +++++++++++++++++++++ + amd.h | 3 ++- + mcelog.c | 2 ++ + mcelog.h | 1 + + 4 files changed, 26 insertions(+), 1 deletion(-) + +Index: mcelog-189/amd.c +=================================================================== +--- mcelog-189.orig/amd.c ++++ mcelog-189/amd.c +@@ -155,6 +155,8 @@ enum cputype select_amd_cputype(u32 fami + return CPU_K8; + case 0x10: + return CPU_F10H; ++ case 0x11: ++ return CPU_F11H; + default: + break; + } +@@ -367,6 +369,16 @@ static bool f10h_mc0_mce(u16 ec, u8 xec) + return f12h_mc0_mce(ec, xec); + } + ++static bool k8_mc0_mce(u16 ec, u8 xec) ++{ ++ if (BUS_ERROR(ec)) { ++ Wprintf("during system linefill.\n"); ++ return true; ++ } ++ ++ return f10h_mc0_mce(ec, xec); ++} ++ + static void decode_mc0_mce(struct amd_decoder_ops *ops, struct mce *m) + { + u16 ec = EC(m->status); +@@ -630,6 +642,12 @@ struct amd_decoder_ops fam_ops[] = { + .mc1_mce = k8_mc1_mce, + .mc2_mce = k8_mc2_mce, + }, ++ [AMD_F11H] = { ++ .cpu = AMD_F11H, ++ .mc0_mce = k8_mc0_mce, ++ .mc1_mce = k8_mc1_mce, ++ .mc2_mce = k8_mc2_mce, ++ }, + }; + + static void __decode_amd_mc(enum cputype cpu, struct mce *mce) +@@ -640,6 +658,9 @@ static void __decode_amd_mc(enum cputype + case CPU_F10H: + ops = &fam_ops[AMD_F10H]; + break; ++ case CPU_F11H: ++ ops = &fam_ops[AMD_F11H]; ++ break; + default: + Eprintf("Huh? What family is it: 0x%x?!\n", cpu); + return; diff --git a/add-f12h-support.patch b/add-f12h-support.patch new file mode 100644 index 0000000..9b914ab --- /dev/null +++ b/add-f12h-support.patch @@ -0,0 +1,58 @@ +Add F12h decoding support + +Signed-off-by: Borislav Petkov +--- + amd.c | 11 +++++++++++ + amd.h | 4 +++- + mcelog.c | 2 ++ + mcelog.h | 1 + + 4 files changed, 17 insertions(+), 1 deletion(-) + +Index: mcelog-189/amd.c +=================================================================== +--- mcelog-189.orig/amd.c ++++ mcelog-189/amd.c +@@ -157,6 +157,8 @@ enum cputype select_amd_cputype(u32 fami + return CPU_F10H; + case 0x11: + return CPU_F11H; ++ case 0x12: ++ return CPU_F12H; + default: + break; + } +@@ -648,6 +650,12 @@ struct amd_decoder_ops fam_ops[] = { + .mc1_mce = k8_mc1_mce, + .mc2_mce = k8_mc2_mce, + }, ++ [AMD_F12H] = { ++ .cpu = AMD_F12H, ++ .mc0_mce = f12h_mc0_mce, ++ .mc1_mce = k8_mc1_mce, ++ .mc2_mce = k8_mc2_mce, ++ }, + }; + + static void __decode_amd_mc(enum cputype cpu, struct mce *mce) +@@ -661,6 +669,9 @@ static void __decode_amd_mc(enum cputype + case CPU_F11H: + ops = &fam_ops[AMD_F11H]; + break; ++ case CPU_F12H: ++ ops = &fam_ops[AMD_F12H]; ++ break; + default: + Eprintf("Huh? What family is it: 0x%x?!\n", cpu); + return; +Index: mcelog-189/amd.h +=================================================================== +--- mcelog-189.orig/amd.h ++++ mcelog-189/amd.h +@@ -9,6 +9,7 @@ enum amdcpu { + AMD_K8 = 0, + AMD_F10H, + AMD_F11H, ++ AMD_F12H, + AMD_F14H, + AMD_F15H, + AMD_F16H, diff --git a/add-f14h-support.patch b/add-f14h-support.patch new file mode 100644 index 0000000..c6e57b2 --- /dev/null +++ b/add-f14h-support.patch @@ -0,0 +1,137 @@ +Add F14h decoding support + +Signed-off-by: Borislav Petkov +--- + amd.c | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + amd.h | 3 +- + mcelog.c | 2 + + mcelog.h | 1 + 4 files changed, 93 insertions(+), 1 deletion(-) + +Index: mcelog-189/amd.c +=================================================================== +--- mcelog-189.orig/amd.c ++++ mcelog-189/amd.c +@@ -159,6 +159,8 @@ enum cputype select_amd_cputype(u32 fami + return CPU_F11H; + case 0x12: + return CPU_F12H; ++ case 0x14: ++ return CPU_F14H; + default: + break; + } +@@ -381,6 +383,58 @@ static bool k8_mc0_mce(u16 ec, u8 xec) + return f10h_mc0_mce(ec, xec); + } + ++static bool cat_mc0_mce(u16 ec, u8 xec) ++{ ++ u8 r4 = R4(ec); ++ bool ret = true; ++ ++ if (MEM_ERROR(ec)) { ++ ++ if (TT(ec) != TT_DATA || LL(ec) != LL_L1) ++ return false; ++ ++ switch (r4) { ++ case R4_DRD: ++ case R4_DWR: ++ Wprintf("Data/Tag parity error due to %s.\n", ++ (r4 == R4_DRD ? "load/hw prf" : "store")); ++ break; ++ case R4_EVICT: ++ Wprintf("Copyback parity error on a tag miss.\n"); ++ break; ++ case R4_SNOOP: ++ Wprintf("Tag parity error during snoop.\n"); ++ break; ++ default: ++ ret = false; ++ } ++ } else if (BUS_ERROR(ec)) { ++ ++ if ((II(ec) != II_MEM && II(ec) != II_IO) || LL(ec) != LL_LG) ++ return false; ++ ++ Wprintf("System read data error on a "); ++ ++ switch (r4) { ++ case R4_RD: ++ Wprintf("TLB reload.\n"); ++ break; ++ case R4_DWR: ++ Wprintf("store.\n"); ++ break; ++ case R4_DRD: ++ Wprintf("load.\n"); ++ break; ++ default: ++ ret = false; ++ } ++ } else { ++ ret = false; ++ } ++ ++ return ret; ++} ++ + static void decode_mc0_mce(struct amd_decoder_ops *ops, struct mce *m) + { + u16 ec = EC(m->status); +@@ -402,6 +456,31 @@ static void decode_mc0_mce(struct amd_de + Eprintf("Corrupted MC0 MCE info?\n"); + } + ++static bool cat_mc1_mce(u16 ec, u8 xec) ++{ ++ u8 r4 = R4(ec); ++ bool ret = true; ++ ++ if (!MEM_ERROR(ec)) ++ return false; ++ ++ if (TT(ec) != TT_INSTR) ++ return false; ++ ++ if (r4 == R4_IRD) ++ Wprintf("Data/tag array parity error for a tag hit.\n"); ++ else if (r4 == R4_SNOOP) ++ Wprintf("Tag error during snoop/victimization.\n"); ++ else if (xec == 0x0) ++ Wprintf("Tag parity error from victim castout.\n"); ++ else if (xec == 0x2) ++ Wprintf("Microcode patch RAM parity error.\n"); ++ else ++ ret = false; ++ ++ return ret; ++} ++ + static void decode_mc1_mce(struct amd_decoder_ops *ops, struct mce *m) + { + u16 ec = EC(m->status); +@@ -656,6 +735,12 @@ struct amd_decoder_ops fam_ops[] = { + .mc1_mce = k8_mc1_mce, + .mc2_mce = k8_mc2_mce, + }, ++ [AMD_F14H] = { ++ .cpu = AMD_F14H, ++ .mc0_mce = cat_mc0_mce, ++ .mc1_mce = cat_mc1_mce, ++ .mc2_mce = k8_mc2_mce, ++ }, + }; + + static void __decode_amd_mc(enum cputype cpu, struct mce *mce) +@@ -672,6 +757,9 @@ static void __decode_amd_mc(enum cputype + case CPU_F12H: + ops = &fam_ops[AMD_F12H]; + break; ++ case CPU_F14H: ++ ops = &fam_ops[AMD_F14H]; ++ break; + default: + Eprintf("Huh? What family is it: 0x%x?!\n", cpu); + return; diff --git a/add-f15h-support.patch b/add-f15h-support.patch new file mode 100644 index 0000000..3b9867d --- /dev/null +++ b/add-f15h-support.patch @@ -0,0 +1,223 @@ +Add F15h decoding support + +Signed-off-by: Borislav Petkov +--- + amd.c | 160 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + amd.h | 3 - + mcelog.c | 2 + mcelog.h | 1 + 4 files changed, 165 insertions(+), 1 deletion(-) + +Index: mcelog-189/amd.c +=================================================================== +--- mcelog-189.orig/amd.c ++++ mcelog-189/amd.c +@@ -72,6 +72,43 @@ static char *nbextendederr[] = { + "L3 Cache LRU Error" + }; + ++static const char * const f15h_mc1_mce_desc[] = { ++ "UC during a demand linefill from L2", ++ "Parity error during data load from IC", ++ "Parity error for IC valid bit", ++ "Main tag parity error", ++ "Parity error in prediction queue", ++ "PFB data/address parity error", ++ "Parity error in the branch status reg", ++ "PFB promotion address error", ++ "Tag error during probe/victimization", ++ "Parity error for IC probe tag valid bit", ++ "PFB non-cacheable bit parity error", ++ "PFB valid bit parity error", /* xec = 0xd */ ++ "Microcode Patch Buffer", /* xec = 010 */ ++ "uop queue", ++ "insn buffer", ++ "predecode buffer", ++ "fetch address FIFO" ++}; ++ ++static const char * const f15h_mc2_mce_desc[] = { ++ "Fill ECC error on data fills", /* xec = 0x4 */ ++ "Fill parity error on insn fills", ++ "Prefetcher request FIFO parity error", ++ "PRQ address parity error", ++ "PRQ data parity error", ++ "WCC Tag ECC error", ++ "WCC Data ECC error", ++ "WCB Data parity error", ++ "VB Data ECC or parity error", ++ "L2 Tag ECC error", /* xec = 0x10 */ ++ "Hard L2 Tag ECC error", ++ "Multiple hits on L2 tag", ++ "XAB parity error", ++ "PRB address parity error" ++}; ++ + static const char * const mc4_mce_desc[] = { + "DRAM ECC error detected on the NB", + "CRC error detected on HT link", +@@ -161,6 +198,8 @@ enum cputype select_amd_cputype(u32 fami + return CPU_F12H; + case 0x14: + return CPU_F14H; ++ case 0x15: ++ return CPU_F15H; + default: + break; + } +@@ -435,6 +474,53 @@ static bool cat_mc0_mce(u16 ec, u8 xec) + return ret; + } + ++static bool f15h_mc0_mce(u16 ec, u8 xec) ++{ ++ bool ret = true; ++ ++ if (MEM_ERROR(ec)) { ++ ++ switch (xec) { ++ case 0x0: ++ Wprintf("Data Array access error.\n"); ++ break; ++ ++ case 0x1: ++ Wprintf("UC error during a linefill from L2/NB.\n"); ++ break; ++ ++ case 0x2: ++ case 0x11: ++ Wprintf("STQ access error.\n"); ++ break; ++ ++ case 0x3: ++ Wprintf("SCB access error.\n"); ++ break; ++ ++ case 0x10: ++ Wprintf("Tag error.\n"); ++ break; ++ ++ case 0x12: ++ Wprintf("LDQ access error.\n"); ++ break; ++ ++ default: ++ ret = false; ++ } ++ } else if (BUS_ERROR(ec)) { ++ ++ if (!xec) ++ Wprintf("System Read Data Error.\n"); ++ else ++ Wprintf(" Internal error condition type %d.\n", xec); ++ } else ++ ret = false; ++ ++ return ret; ++} ++ + static void decode_mc0_mce(struct amd_decoder_ops *ops, struct mce *m) + { + u16 ec = EC(m->status); +@@ -481,6 +567,36 @@ static bool cat_mc1_mce(u16 ec, u8 xec) + return ret; + } + ++static bool f15h_mc1_mce(u16 ec, u8 xec) ++{ ++ bool ret = true; ++ ++ if (!MEM_ERROR(ec)) ++ return false; ++ ++ switch (xec) { ++ case 0x0 ... 0xa: ++ Wprintf("%s.\n", f15h_mc1_mce_desc[xec]); ++ break; ++ ++ case 0xd: ++ Wprintf("%s.\n", f15h_mc1_mce_desc[xec-2]); ++ break; ++ ++ case 0x10: ++ Wprintf("%s.\n", f15h_mc1_mce_desc[xec-4]); ++ break; ++ ++ case 0x11 ... 0x14: ++ Wprintf("Decoder %s parity error.\n", f15h_mc1_mce_desc[xec-4]); ++ break; ++ ++ default: ++ ret = false; ++ } ++ return ret; ++} ++ + static void decode_mc1_mce(struct amd_decoder_ops *ops, struct mce *m) + { + u16 ec = EC(m->status); +@@ -537,6 +653,40 @@ static bool k8_mc2_mce(u16 ec, u8 xec) + return ret; + } + ++static bool f15h_mc2_mce(u16 ec, u8 xec) ++{ ++ bool ret = true; ++ ++ if (TLB_ERROR(ec)) { ++ if (xec == 0x0) ++ Wprintf("Data parity TLB read error.\n"); ++ else if (xec == 0x1) ++ Wprintf("Poison data provided for TLB fill.\n"); ++ else ++ ret = false; ++ } else if (BUS_ERROR(ec)) { ++ if (xec > 2) ++ ret = false; ++ ++ Wprintf("Error during attempted NB data read.\n"); ++ } else if (MEM_ERROR(ec)) { ++ switch (xec) { ++ case 0x4 ... 0xc: ++ Wprintf("%s.\n", f15h_mc2_mce_desc[xec - 0x4]); ++ break; ++ ++ case 0x10 ... 0x14: ++ Wprintf("%s.\n", f15h_mc2_mce_desc[xec - 0x7]); ++ break; ++ ++ default: ++ ret = false; ++ } ++ } ++ ++ return ret; ++} ++ + static void decode_mc2_mce(struct amd_decoder_ops *ops, struct mce *m) + { + u16 ec = EC(m->status); +@@ -741,6 +891,12 @@ struct amd_decoder_ops fam_ops[] = { + .mc1_mce = cat_mc1_mce, + .mc2_mce = k8_mc2_mce, + }, ++ [AMD_F15H] = { ++ .cpu = AMD_F15H, ++ .mc0_mce = f15h_mc0_mce, ++ .mc1_mce = f15h_mc1_mce, ++ .mc2_mce = f15h_mc2_mce, ++ }, + }; + + static void __decode_amd_mc(enum cputype cpu, struct mce *mce) +@@ -760,6 +916,10 @@ static void __decode_amd_mc(enum cputype + case CPU_F14H: + ops = &fam_ops[AMD_F14H]; + break; ++ case CPU_F15H: ++ xec_mask = 0x1f; ++ ops = &fam_ops[AMD_F15H]; ++ break; + default: + Eprintf("Huh? What family is it: 0x%x?!\n", cpu); + return; diff --git a/add-f16h-support.patch b/add-f16h-support.patch new file mode 100644 index 0000000..e373c83 --- /dev/null +++ b/add-f16h-support.patch @@ -0,0 +1,95 @@ +Add F16h decoding support + +Signed-off-by: Borislav Petkov +--- + amd.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++ + amd.h | 3 ++- + mcelog.c | 2 ++ + mcelog.h | 1 + + 4 files changed, 58 insertions(+), 1 deletion(-) + +Index: mcelog-189/amd.c +=================================================================== +--- mcelog-189.orig/amd.c ++++ mcelog-189/amd.c +@@ -200,6 +200,8 @@ enum cputype select_amd_cputype(u32 fami + return CPU_F14H; + case 0x15: + return CPU_F15H; ++ case 0x16: ++ return CPU_F16H; + default: + break; + } +@@ -687,6 +689,47 @@ static bool f15h_mc2_mce(u16 ec, u8 xec) + return ret; + } + ++static bool f16h_mc2_mce(u16 ec, u8 xec) ++{ ++ u8 r4 = R4(ec); ++ ++ if (!MEM_ERROR(ec)) ++ return false; ++ ++ switch (xec) { ++ case 0x04 ... 0x05: ++ Wprintf("%cBUFF parity error.\n", (r4 == R4_RD) ? 'I' : 'O'); ++ break; ++ ++ case 0x09 ... 0x0b: ++ case 0x0d ... 0x0f: ++ Wprintf("ECC error in L2 tag (%s).\n", ++ ((r4 == R4_GEN) ? "BankReq" : ++ ((r4 == R4_SNOOP) ? "Prb" : "Fill"))); ++ break; ++ ++ case 0x10 ... 0x19: ++ case 0x1b: ++ Wprintf("ECC error in L2 data array (%s).\n", ++ (((r4 == R4_RD) && !(xec & 0x3)) ? "Hit" : ++ ((r4 == R4_GEN) ? "Attr" : ++ ((r4 == R4_EVICT) ? "Vict" : "Fill")))); ++ break; ++ ++ case 0x1c ... 0x1d: ++ case 0x1f: ++ Wprintf("Parity error in L2 attribute bits (%s).\n", ++ ((r4 == R4_RD) ? "Hit" : ++ ((r4 == R4_GEN) ? "Attr" : "Fill"))); ++ break; ++ ++ default: ++ return false; ++ } ++ ++ return true; ++} ++ + static void decode_mc2_mce(struct amd_decoder_ops *ops, struct mce *m) + { + u16 ec = EC(m->status); +@@ -897,6 +940,12 @@ struct amd_decoder_ops fam_ops[] = { + .mc1_mce = f15h_mc1_mce, + .mc2_mce = f15h_mc2_mce, + }, ++ [AMD_F16H] = { ++ .cpu = AMD_F16H, ++ .mc0_mce = cat_mc0_mce, ++ .mc1_mce = cat_mc1_mce, ++ .mc2_mce = f16h_mc2_mce, ++ }, + }; + + static void __decode_amd_mc(enum cputype cpu, struct mce *mce) +@@ -920,6 +969,10 @@ static void __decode_amd_mc(enum cputype + xec_mask = 0x1f; + ops = &fam_ops[AMD_F15H]; + break; ++ case CPU_F16H: ++ xec_mask = 0x1f; ++ ops = &fam_ops[AMD_F16H]; ++ break; + default: + Eprintf("Huh? What family is it: 0x%x?!\n", cpu); + return; diff --git a/add_new_amd_cpu_defines b/add_new_amd_cpu_defines new file mode 100644 index 0000000..c9551b7 --- /dev/null +++ b/add_new_amd_cpu_defines @@ -0,0 +1,30 @@ +Index: mcelog-189/mkcputype +=================================================================== +--- mcelog-189.orig/mkcputype ++++ mcelog-189/mkcputype +@@ -5,6 +5,12 @@ awk -F\| 'BEGIN { + print "enum cputype {" > "cputype.tmp" + print "\tCPU_GENERIC," > "cputype.tmp" + print "\tCPU_K8," > "cputype.tmp" ++ print "\tCPU_F10H," > "cputype.tmp" ++ print "\tCPU_F11H," > "cputype.tmp" ++ print "\tCPU_F12H," > "cputype.tmp" ++ print "\tCPU_F14H," > "cputype.tmp" ++ print "\tCPU_F15H," > "cputype.tmp" ++ print "\tCPU_F16H," > "cputype.tmp" + + print "\n\n/* Insert any new non-intel CPU models before this line */\n\n" > "cputype.tmp" + print "\tCPU_INTEL," > "cputype.tmp" +@@ -44,6 +50,12 @@ END { + print "char *cputype_name[] = {" > "lookup_intel_cputype.tmp" + print "\t[CPU_GENERIC] = \"generic CPU\"," > "lookup_intel_cputype.tmp" + print "\t[CPU_K8] = \"AMD K8 and derivates\"," > "lookup_intel_cputype.tmp" ++ print "\t[CPU_F10H] = \"AMD Greyhound\"," > "lookup_intel_cputype.tmp" ++ print "\t[CPU_F11H] = \"AMD Griffin\"," > "lookup_intel_cputype.tmp" ++ print "\t[CPU_F12H] = \"AMD Llano\"," > "lookup_intel_cputype.tmp" ++ print "\t[CPU_F14H] = \"AMD Bobcat\"," > "lookup_intel_cputype.tmp" ++ print "\t[CPU_F15H] = \"AMD Bulldozer\"," > "lookup_intel_cputype.tmp" ++ print "\t[CPU_F16H] = \"AMD Jaguar\"," > "lookup_intel_cputype.tmp" + print "\t[CPU_INTEL] = \"Intel generic architectural MCA\"," > "lookup_intel_cputype.tmp" + print "\t[CPU_P4] = \"Intel P4\"," > "lookup_intel_cputype.tmp" + print "\t[CPU_TULSA] = \"Intel Xeon 7100 series\"," > "lookup_intel_cputype.tmp" diff --git a/email.patch b/email.patch new file mode 100644 index 0000000..22f5fc2 --- /dev/null +++ b/email.patch @@ -0,0 +1,510 @@ +--- + Makefile | 13 +++- + email.c | 200 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + email.h | 34 ++++++++++ + mcelog.c | 93 ++++++++++++++++++++++++++++- + mcelog.h | 1 + msg.c | 8 ++ + 6 files changed, 346 insertions(+), 3 deletions(-) + +Index: mcelog-195/Makefile +=================================================================== +--- mcelog-195.orig/Makefile ++++ mcelog-195/Makefile +@@ -1,3 +1,4 @@ ++CONFIG_EMAIL := 1 + CFLAGS := -g -Os + prefix := /usr + etcprefix := +@@ -38,16 +39,24 @@ OBJ := p4.o k8.o mcelog.o dmi.o tsc.o co + broadwell_de.o broadwell_epex.o skylake_xeon.o \ + denverton.o i10nm.o sapphire.o granite.o \ + msr.o bus.o unknown.o lookup_intel_cputype.o ++EMAIL_OBJ := email.o + CLEAN := mcelog dmi tsc dbquery .depend .depend.X dbquery.o \ + version.o version.c version.tmp cputype.h cputype.tmp \ +- lookup_intel_cputype.c lookup_intel_cputype.tmp ++ lookup_intel_cputype.c lookup_intel_cputype.tmp ${EMAIL_OBJ} + DOC := mce.pdf + + ADD_DEFINES := + ++ifdef CONFIG_EMAIL ++ADD_DEFINES := -DCONFIG_EMAIL=1 ++LIBS := -lesmtp ++OBJ += ${EMAIL_OBJ} ++endif ++ + SRC := $(OBJ:.o=.c) + + mcelog: ${OBJ} version.o ++ $(CC) $(LDFLAGS) $^ ${LIBS} -o $@ + + # dbquery intentionally not installed by default + install: install-nodoc mcelog.conf.5 mcelog.triggers.5 +@@ -85,7 +94,7 @@ dbquery: db.o dbquery.o memutil.o + depend: .depend + + %.o: %.c +- $(CC) -c $(CFLAGS) $(CPPFLAGS) $(WARNINGS) $(ADD_DEFINES) -o $@ $< ++ $(CC) -c $(CFLAGS) $(CPPFLAGS) $(WARNINGS) $(ADD_DEFINES) $< -o $@ + + version.tmp: FORCE + ( printf "char version[] = \"" ; \ +Index: mcelog-195/email.c +=================================================================== +--- /dev/null ++++ mcelog-195/email.c +@@ -0,0 +1,200 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define __USE_GNU ++/* To fetch the dnsname */ ++#include ++#include ++#include ++ ++#include ++#include "mcelog.h" ++#include "email.h" ++ ++#define MAX_STRING_LEN 512 ++char c_recipient[MAX_STRING_LEN] = ""; ++static int debug; ++static char dnsname[MAX_STRING_LEN]; ++ ++static char buf[128]; ++#define ERROR() { fprintf (stderr, "SMTP problem [%d] %s\n", __LINE__, \ ++ smtp_strerror (smtp_errno (), buf, sizeof buf)); \ ++ return -1; } ++ ++ ++void email_usage(void) { ++ fprintf(stderr, ++ "--email address Requires daemon mode\n"); ++} ++ ++int email_cmd(int opt, int ac, char **av) ++{ ++ char *arg = optarg; ++ ++ switch (opt) { ++ case O_EMAIL_ADDRESS: ++ if (arg) { ++ if (strlen(arg) >= MAX_STRING_LEN) { ++ Eprintf("email address too long" ++ " [max:%d]\n", MAX_STRING_LEN); ++ return 0; ++ } ++ strcpy(c_recipient, arg); ++ return 1; ++ } ++ case O_EMAIL_DEBUG: ++ debug = 1; ++ return 0; ++ } ++ return 0; ++} ++ ++int email_env(void) ++{ ++ char *email_env = getenv("MCELOG_EMAIL_DEBUG"); ++ ++ if (email_env) ++ debug=0; ++ ++ email_env = getenv("MCELOG_ADMIN_EMAIL"); ++ /* No email validation, but at least check for not being empty... */ ++ if (email_env && strlen(email_env) > 1) { ++ strncpy(c_recipient, email_env, MAX_STRING_LEN - 1); ++ return 1; ++ } ++ return 0; ++} ++ ++/* Callback to prnt the recipient status */ ++static void ++print_recipient_status (smtp_recipient_t recipient, ++ const char *mailbox, void *arg) ++{ ++ const smtp_status_t *status; ++ ++ status = smtp_recipient_status (recipient); ++ if (debug) ++ printf ("%s: %d %s", mailbox, status->code, status->text); ++} ++ ++void setup_mail_header(FILE *fp, struct mce *m) ++{ ++ char host[MAX_STRING_LEN]; ++ struct addrinfo hints; ++ struct addrinfo *res=NULL; ++ int ret, retry=3; ++ ++ /* Taken from net-tools hostname.c showhname() */ ++ memset(&hints, 0, sizeof(struct addrinfo)); ++ hints.ai_family = AF_UNSPEC; ++ hints.ai_flags = AI_CANONNAME | AI_CANONIDN; ++ hints.ai_socktype = SOCK_STREAM; ++ hints.ai_protocol = 0; ++ ++ if (gethostname(host, MAX_STRING_LEN)) { ++ fprintf(stderr, "Cannot get host name\n"); ++ return; ++ } ++ ++ do { ++ ret = getaddrinfo(host, NULL, &hints, &res); ++ } while(ret == EAI_AGAIN && retry-- > 0 ++ && usleep(50000) == 0); ++ ++ if (ret != 0 || res == NULL) { ++ fprintf(stderr, "Could not retrieve hostname\n"); ++ return; ++ } ++ ++ memset(dnsname, '\0', MAX_STRING_LEN); ++ strncpy(dnsname, res->ai_canonname, MAX_STRING_LEN - 1); ++ ++ fprintf(fp, "Return-Path: \r\n" ++ "Subject: Machine Check Exception on %s detected\r\n" ++ "MIME-Version: 1.0\r\n" ++ "Content-Type: text/plain;\r\n" ++ " charset=iso-8859-1\r\n" ++ "Content-Transfer-Encoding: 7bit\r\n\r\n", dnsname); ++ freeaddrinfo(res); ++} ++ ++ ++int send_mail(FILE *fp) ++{ ++ char smtp_host[MAX_STRING_LEN] = "localhost:25"; ++ char from[MAX_STRING_LEN]; ++ ++ const smtp_status_t *status; ++ smtp_session_t session; ++ smtp_message_t message; ++ smtp_recipient_t recipient; ++ struct sigaction sa; ++ ++ session = smtp_create_session (); ++ message = smtp_add_message (session); ++ ++ snprintf(from, MAX_STRING_LEN, "root@%s", dnsname); ++ ++ /* NB. libESMTP sets timeouts as it progresses through the protocol. ++ In addition the remote server might close its socket on a timeout. ++ Consequently libESMTP may sometimes try to write to a socket with ++ no reader. Ignore SIGPIPE, then the program doesn't get killed ++ if/when this happens. */ ++ sa.sa_handler = SIG_IGN; ++ sigemptyset (&sa.sa_mask); ++ sa.sa_flags = 0; ++ sigaction (SIGPIPE, &sa, NULL); ++ ++ /* Set the host running the SMTP server. LibESMTP has a default port ++ number of 587, however this is not widely deployed so the port ++ is specified as 25 along with the default MTA host. */ ++ if (!smtp_set_server (session, smtp_host)) ++ ERROR(); ++ ++ smtp_set_reverse_path (message, from); ++ ++ /* RFC 2822 doesn't require recipient headers but a To: header would ++ be nice to have if not present. */ ++ smtp_set_header (message, "To", NULL, NULL); ++ ++ /* RFC 2822 doesn't require recipient headers but a To: header would ++ be nice to have if not present. */ ++ if (!smtp_set_header (message, "From", "mcelog", from)) ++ ERROR(); ++ ++ smtp_set_message_fp (message, fp); ++ ++ recipient = smtp_add_recipient (message, c_recipient); ++ if (!recipient) ++ ERROR(); ++ if (!smtp_dsn_set_notify (recipient, Notify_NEVER)) ++ ERROR(); ++ ++ /* Initiate a connection to the SMTP server and transfer the ++ message. */ ++ if (!smtp_start_session (session)) ++ Eprintf("SMTP server problem %s\n", ++ smtp_strerror (smtp_errno (), buf, sizeof buf)); ++ else { ++ /* Report on the success or otherwise of the mail transfer. ++ */ ++ if (debug) { ++ status = smtp_message_transfer_status (message); ++ printf ("%d %s", status->code, ++ (status->text != NULL) ? status->text : "\n"); ++ } ++ smtp_enumerate_recipients (message, print_recipient_status, NULL); ++ } ++ ++ if (debug) ++ fprintf(stderr, "Email sent successfully!\n"); ++ ++ /* Free resources consumed by the program. ++ */ ++ smtp_destroy_session (session); ++ return 0; ++} +Index: mcelog-195/email.h +=================================================================== +--- /dev/null ++++ mcelog-195/email.h +@@ -0,0 +1,34 @@ ++#ifndef _MCELOG_EMAIL_H_ ++#define _MCELOG_EMAIL_H_ ++ ++extern FILE *email_fd; ++extern int email_mode; ++ ++#ifdef CONFIG_EMAIL ++extern int send_mail(FILE *email_fd); ++extern void setup_mail_header(FILE *email_fd, struct mce *m); ++extern void email_usage(void); ++extern int email_cmd(int opt, int ac, char **av); ++extern int email_env(void); ++ ++#define EMAIL_OPTIONS \ ++ { "email", 1, NULL, O_EMAIL_ADDRESS }, \ ++ { "email-debug", 0, NULL, O_EMAIL_DEBUG }, ++ ++enum email_options { ++ O_EMAIL_ADDRESS = O_EMAIL, ++ O_EMAIL_DEBUG, ++}; ++ ++#else ++/* ++static int send_mail(FILE *email_fd) { return 0; } ++static void setup_mail_header(FILE *email_fd) { return; }; ++*/ ++static void email_usage(void) { return; } ++static int email_cmd(int opt, int ac, char **av) { return 0; } ++static int email_env(void) { return 0; } ++#define EMAIL_OPTIONS ++#endif ++ ++#endif +Index: mcelog-195/mcelog.c +=================================================================== +--- mcelog-195.orig/mcelog.c ++++ mcelog-195/mcelog.c +@@ -37,6 +37,7 @@ + #include + #include + #include ++#include + #include + #include "mcelog.h" + #include "paths.h" +@@ -60,6 +61,9 @@ + #include "bus.h" + #include "unknown.h" + ++#include "email.h" ++int email_mode; ++ + enum cputype cputype = CPU_GENERIC; + + char *logfn = LOG_DEV_FILENAME; +@@ -71,7 +75,7 @@ static double cpumhz; + static int cpumhz_forced; + int ascii_mode; + int dump_raw_ascii; +-int daemon_mode; ++int daemon_mode = 0; + static char *inputfile; + char *processor_flags; + static int foreground; +@@ -906,6 +910,7 @@ void usage(void) + "--max-corr-err-counters Max page correctable error counters\n" + "--help Display this message.\n" + ); ++ email_usage(); + printf("\n"); + print_cputypes(); + } +@@ -977,6 +982,7 @@ static struct option options[] = { + { "max-corr-err-counters", 1, NULL, O_MAX_CORR_ERR_COUNTERS }, + { "help", 0, NULL, O_HELP }, + { "is-cpu-supported", 0, NULL, O_IS_CPU_SUPPORTED }, ++ EMAIL_OPTIONS + {} + }; + +@@ -1171,11 +1177,86 @@ static void drop_cred(void) + } + } + ++#ifdef CONFIG_EMAIL ++pid_t c_pid; ++ ++/* Not more than 12 mails in 5 mins... */ ++#define LAST_LIMIT_COUNT (60 * 5) ++#define LIMIT_COUNT 12 ++static time_t last_limit_count; ++static int limit_count; ++static const char *mail_thread = "mcelog_mail_thread"; ++ ++ ++static int setup_email(struct mce *m) { ++ int pdes[2]; ++ static int suppressed; ++ int ret; ++ ++ if (time(NULL) - last_limit_count < LAST_LIMIT_COUNT) { ++ if (limit_count >= LIMIT_COUNT && !suppressed) { ++ Eprintf("email rate limit [%d mails per %d mins]" ++ " reached, mails supressed\n", ++ LIMIT_COUNT, LAST_LIMIT_COUNT / 60); ++ suppressed = 1; ++ } ++ if (suppressed) ++ return -1; ++ } else { ++ suppressed = 0; ++ limit_count = 0; ++ last_limit_count = time(NULL); ++ } ++ ++ limit_count++; ++ ++ ret = pipe(pdes); ++ if (ret) ++ return ret; ++ ++ c_pid = mcelog_fork(mail_thread); ++ if ( c_pid == 0 ) { /* child */ ++ FILE *x = fdopen(pdes[0], "r"); ++ close(pdes[1]); ++ send_mail(x); ++ exit(0); ++ } else { ++ close(pdes[0]); ++ /* something went wrong, better close... */ ++ if (email_fd) ++ fclose(email_fd); ++ /* Wprintf will now also write into this pipe */ ++ email_fd = fdopen(pdes[1], "w"); ++ setup_mail_header(email_fd, m); ++ } ++ return 0; ++} ++ ++static int finish_email(void) { ++ int status; ++ ++ fclose(email_fd); ++ fprintf(stderr, "Email set up for sending\n"); ++ /* Anything else we can make sure we do not get orphaned threads? */ ++ waitpid (c_pid, &status, WUNTRACED); ++ if (WIFSTOPPED(status)){ ++ kill(c_pid, 9); ++ SYSERRprintf("Killed stopped email thread %d\n", ++ c_pid); ++ return -1; ++ } ++ email_fd = NULL; ++ return 0; ++} ++ ++#endif ++ + static void process(int fd, unsigned recordlen, unsigned loglen, char *buf) + { + int i; + int len, count; + int finish = 0, flags; ++ int mail_setup = 0; + + if (recordlen == 0) { + Wprintf("no data in mce record\n"); +@@ -1202,12 +1283,16 @@ static void process(int fd, unsigned rec + finish = 1; + if (!mce_filter(mce, recordlen)) + continue; ++ if (email_mode) ++ mail_setup = setup_email(mce); + if (!dump_raw_ascii) { + disclaimer(); + Wprintf("MCE %d\n", i); + dump_mce(mce, recordlen); + } else + dump_mce_raw_ascii(mce, recordlen); ++ if (email_mode && !mail_setup) ++ finish_email(); + flushlog(); + } + +@@ -1321,6 +1406,8 @@ int main(int ac, char **av) + noargs(ac, av); + fprintf(stderr, "mcelog %s\n", MCELOG_VERSION); + exit(0); ++ } else if (email_cmd(opt, ac, av)) { ++ email_mode = 1; + } else if (opt == 0) + break; + } +@@ -1355,6 +1442,10 @@ int main(int ac, char **av) + usage(); + exit(1); + } ++ if (email_mode == 0) ++ email_mode = email_env(); ++ /* email sending only in daemon mode */ ++ email_mode &= daemon_mode; + checkdmi(); + general_setup(); + +Index: mcelog-195/mcelog.h +=================================================================== +--- mcelog-195.orig/mcelog.h ++++ mcelog-195/mcelog.h +@@ -118,6 +118,7 @@ extern int open_logfile(char *fn); + enum option_ranges { + O_COMMON = 500, + O_DISKDB = 1000, ++ O_EMAIL = 1500, + }; + + enum syslog_opt { +Index: mcelog-195/msg.c +=================================================================== +--- mcelog-195.orig/msg.c ++++ mcelog-195/msg.c +@@ -8,10 +8,13 @@ + #include "mcelog.h" + #include "msg.h" + #include "memutil.h" ++#include "email.h" ++ + + enum syslog_opt syslog_opt = SYSLOG_REMARK; + int syslog_level = LOG_WARNING; + static FILE *output_fh; ++ FILE *email_fd; + static char *output_fn; + + int need_stdout(void) +@@ -135,6 +138,11 @@ int Wprintf(char *fmt, ...) + n = vfprintf(output_fh ? output_fh : stdout, fmt, ap); + va_end(ap); + } ++ if (email_fd) { ++ va_start(ap,fmt); ++ n = vfprintf(email_fd, fmt, ap); ++ va_end(ap); ++ } + return n; + } + diff --git a/fix_setgroups_missing_call.patch b/fix_setgroups_missing_call.patch new file mode 100644 index 0000000..cf890a3 --- /dev/null +++ b/fix_setgroups_missing_call.patch @@ -0,0 +1,31 @@ +--- + mcelog.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +Index: mcelog-189/mcelog.c +=================================================================== +--- mcelog-189.orig/mcelog.c ++++ mcelog-189/mcelog.c +@@ -37,6 +37,7 @@ + #include + #include + #include ++#include + #include + #include + #include "mcelog.h" +@@ -1155,6 +1156,14 @@ static void general_setup(void) + + static void drop_cred(void) + { ++ /* When dropping privileges from root, the `setgroups` call will ++ * remove any extraneous groups. If we don't call this, then ++ * even though our uid has dropped, we may still have groups ++ * that enable us to do super-user things. This will fail if we ++ * aren't root, so don't bother checking the return value, this ++ * is just done as an optimistic privilege dropping function. ++ */ ++ setgroups(0, NULL); + if (runcred.uid != -1U && runcred.gid == -1U) { + struct passwd *pw = getpwuid(runcred.uid); + if (pw) diff --git a/mcelog-196.obscpio b/mcelog-196.obscpio new file mode 100644 index 0000000..baeed03 --- /dev/null +++ b/mcelog-196.obscpio @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8372405d024e2220df40dba4bffb4f6271ff7fe454958acfd16ec55f0d880bdd +size 651788 diff --git a/mcelog-socket-path.patch b/mcelog-socket-path.patch new file mode 100644 index 0000000..32145f5 --- /dev/null +++ b/mcelog-socket-path.patch @@ -0,0 +1,13 @@ +--- mcelog-1.0.1.orig/paths.h ++++ mcelog-1.0.1/paths.h +@@ -4,8 +4,8 @@ + #define DIMM_DB_FILENAME PREFIX "/var/lib/memory-errors" + #define CONFIG_FILENAME PREFIX "/etc/mcelog/mcelog.conf" + +-#define SOCKET_PATH "/var/run/mcelog-client" ++#define SOCKET_PATH "/run/mcelog/mcelog-client" + + #define LOG_FILE "/var/log/mcelog" + +-#define PID_FILE "/var/run/mcelog.pid" ++#define PID_FILE "/run/mcelog/mcelog.pid" diff --git a/mcelog.changes b/mcelog.changes new file mode 100644 index 0000000..669ab20 --- /dev/null +++ b/mcelog.changes @@ -0,0 +1,641 @@ +------------------------------------------------------------------- +Mon Nov 20 12:01:41 UTC 2023 - trenn@suse.de + +- Update to version 196: + * mcelog: Add second model number for Arrowlake + +------------------------------------------------------------------- +Tue Sep 12 14:08:37 UTC 2023 - trenn@suse.de + +- This contains following features: + PED-6122 + [GNR] RAS: mcelog Add support for Granite Rapids (ALP) + PED-6102 + [GNR] RAS: mcelog Add support for Granite Rapids (SLE 15 SP6) + PED-6021 + [SRF] RAS: mcelog support for Sierra Forest (SLE 15 SP6) + PED-6050 + [SRF] RAS: mcelog support for Sierra Forest (ALP) +- Change git repo in _service file from git to https url +- Update to version 195: + * mcelog: Wire up model-specific decoding for Sierra Forest + * mcelog: Add model-specific decoding for Granite Rapids + * client.c: fix build w/ musl libc + * mcelog: New model number for Arrowlake + * mcelog: Don't overwrite model number when lookup fails + * mcelog: Add Graniterapids, Grandridge and Sierraforest + * mcelog: New model number for Lunarlake + * mcelog: Add Emerald Rapids + * Update PFA_test_howto +- Adopt to mainline: +M email.patch + +------------------------------------------------------------------- +Wed Jun 14 14:58:43 UTC 2023 - trenn@suse.de + +- Update to version 194 (jsc#PED-4218): + * client.c: fix build w/ musl libc + * mcelog: New model number for Arrowlake + * mcelog: Don't overwrite model number when lookup fails + * mcelog: Add Graniterapids, Grandridge and Sierraforest + * mcelog: New model number for Lunarlake + * mcelog: Add Emerald Rapids + * mcelog: Add decode support for Sapphire Rapids + * Update PFA_test_howto + * mcelog: Add support for Meteor Lake + +------------------------------------------------------------------- +Thu Oct 06 14:56:44 UTC 2022 - trenn@suse.de + +- Includes following SLE 15 SP5 jira features: + * jsc#PED-671 mcelog: Update to latest release + * jsc#PED-686 [CPU Features] Update mcelog support for ADL-N + * jsc#PED-638 [CPU Features] Update mcelog support for MTL-P +- Update to version 189: + * mcelog: Add another Raptor Lake CPU model + * Fix generation of cputype files + * mcelog: Add missing model numbers for Broadwell and Raptorlake + * mcelog: Makefile: Only touch cputype.h if needed to create it + * Makefile: add install-nodoc target + * Use env as the shebang target + * Add missing dependencies for cputype include files + * mcelog: Reverse sens of check to call resolveaddr() + * mcelog: Reverse the sense of the check to set memory_error_support + * mcelog: Drop CASE_INTEL define + * mcelog: Generate cpu_choices[] from table + * mcelog: Generate the cputype_name[] array from the table + * mcelog: Add CPU model numbers to table and generate switch function + * mcelog: Generate CPU_* enums from a table + * mcelog: Add two more Alderlake model numbers + * mcelog: Reduce default threshold for corrected error page offline + * Make genconfig use python3 + * mcelog: Add support for Raptorlake + * Fix warnings in sysfs.c + * mcelog: Change "DDR4" string to "DDR" for i10nm platforms + * Fix logrotate syntax + * remove outdated mcelog.conf.5 manual file + * add furture print function for Python2 + * fix python errors in genconfig.py + * fix the buf not freed in read_field + * mcelog: Print warning for locked down kernel + * mcelog: Handle sysfs files without length +- Had to adopt to latest CPU identification model + mainline patch: + b54ee05056a76e mcelog: Drop CASE_INTEL define + and friends +A add_new_amd_cpu_defines +D add-defines.patch +M Start-consolidating-AMD-specific-stuff.patch +M add-f10h-support.patch +M add-f11h-support.patch +M add-f12h-support.patch +M add-f14h-support.patch +M add-f15h-support.patch +M add-f16h-support.patch +M email.patch +M fix_setgroups_missing_call.patch + +------------------------------------------------------------------- +Tue May 03 11:32:42 UTC 2022 - moritz.kodytek@suse.com + +- Update to version 181: + * mcelog: Add support for Raptorlake +- Adopt patches to latest git version +M Start-consolidating-AMD-specific-stuff.patch +M add-f10h-support.patch +M add-f11h-support.patch +M add-f12h-support.patch +M add-f14h-support.patch +M add-f15h-support.patch +M add-f16h-support.patch +M email.patch +M fix_setgroups_missing_call.patch +M mcelog_invert_prefill_db_warning.patch +- Use Python3 shebang instead of python +A python3_shebang +- Use Github URL + +------------------------------------------------------------------- +Wed Apr 13 12:44:57 UTC 2022 - moritz.kodytek@suse.com + +- Update to version 180: + * Fix warnings in sysfs.c + * mcelog: Change "DDR4" string to "DDR" for i10nm platforms + * Fix logrotate syntax + * remove outdated mcelog.conf.5 manual file + * add furture print function for Python2 + * fix python errors in genconfig.py + * fix the buf not freed in read_field + * mcelog: Print warning for locked down kernel + * mcelog: Handle sysfs files without length + * Fix make test fail + +------------------------------------------------------------------- +Wed Sep 01 14:30:27 UTC 2021 - trenn@suse.de + +- Update to version 178: + * mcelog: Fix typo/thinko in yellow cache change + +------------------------------------------------------------------- +Mon Jul 19 13:44:53 UTC 2021 - trenn@suse.de + +- Update to version 177: + * README: Mark up filename as code/monospace + * README: Correct filename of `.os_version` + +------------------------------------------------------------------- +Fri Jul 09 13:29:25 UTC 2021 - trenn@suse.de + +- Update to version 177 (jsc#SLE-18903): + * mcelog: Update MSCOD error bit descriptions to match SDM + * mcelog: Fix issues with "yellow" cache offlining + * Add reference to Linux::MCELog + * test: avoid the pfa test hang +- Add _service git magic + +------------------------------------------------------------------- +Thu Apr 1 16:25:59 UTC 2021 - Yaroslav Kurlaev + +- Remove deprecated "StandardOutput=syslog" option from the systemd + unit file to remove a warning from systemd. (bsc#1185151) + +------------------------------------------------------------------- +Tue Jan 26 17:43:06 UTC 2021 - trenn@suse.de + +- Update to version 175 (jsc#SLE-14450): + * mcelog: Add a test case to test page error counter replacement. + * mcelog: Use 'num-errors' to specify the number of mce records to be injected. + * mcelog: Report how often the replacement of page CE counter happened + * mcelog: Limit memory consumption for counting CEs per page + * mcelog: Add support for Sapphirerapids server. (jsc#SLE-14450) + * mcelog: i10nm: Fix mapping from bank number to functional unit + +- Only refreshing patches, due to tarball modifications: +M Start-consolidating-AMD-specific-stuff.patch +M add-f10h-support.patch +M add-f11h-support.patch +M add-f12h-support.patch +M add-f14h-support.patch +M add-f15h-support.patch +M add-f16h-support.patch +M email.patch +M fix_setgroups_missing_call.patch +M mcelog_invert_prefill_db_warning.patch + + +------------------------------------------------------------------- +Mon Sep 28 10:16:15 UTC 2020 - trenn@suse.de + +- jsc#SLE-13505, jsc#SLE-13494 +- Update to version 173: + * mcelog: Rebalance the red-black tree after inserting a new node + * mcelog: Add Tigerlake, Rocketlake, Alderlake, Lakefield + * mcelog.service: Check existence of `/dev/mcelog` in systemd + * mcelog.service: Remove DefaultStandardOutput configuration + * mcelog: Add decode for MCi_MISC from 10nm memory controller + * Add reporter tracking to trigger-invoking functions. + * mcelog: Add "kflags" field to "struct mce" + +------------------------------------------------------------------- +Sun Aug 16 19:01:17 UTC 2020 - Dirk Mueller + +- update to 170: + * mcelog: Add Cometlake client model numbers + * mcelog: Do not start mcelog service if edac_mce_amd module is loaded + * mcelog: Decode and print stepping from cpuid + * mcelog: Add "kflags" field to "struct mce" + * Add reporter tracking to trigger-invoking functions. + * mcelog: Add decode for MCi_MISC from 10nm memory controller +- covers: + * jsc#SLE-12689 + +------------------------------------------------------------------- +Wed Nov 20 14:00:53 UTC 2019 - trenn@suse.de + +- Update to version 1.66 (jira SLE-10087, jira SLE-8853): + * mcelog: Add support for Icelake server, Icelake-D, and Snow Ridge +M email.patch +-> Patched with fuzz, refresh needed + +------------------------------------------------------------------- +Tue Oct 29 15:57:54 UTC 2019 - trenn@suse.de + +- Update to version 1.65: + * mcelog: Add Cascade Lake to supported models + +------------------------------------------------------------------- +Fri Sep 13 16:04:20 UTC 2019 - Jean Delvare + +- mcelog.systemd: Preload the dmi-sysfs kernel module. + When /dev/mem can't be read (which is the case when booting in + Secure Mode), mcelog can use the dmi-sysfs interface instead, + however for that the kernel module needs to be loaded first + (bsc#1149186). + +------------------------------------------------------------------- +Fri Sep 06 11:25:34 UTC 2019 - MMuschner@suse.com + +- Update to version 1.64: + * mcelog: Add Icelake client model numbers. + * add Hygon Dhyana support to not use mcelog, as Hygon Dhyana(0x18h) share similiar arch with AMD Family 17h + +------------------------------------------------------------------- +Mon Mar 25 11:31:24 UTC 2019 - christian.voegl@suse.com + +- Update to version 1.62: + * mcelog: Fix memory controller bank channel mappings for Skylake + * mcelog: update tests for new error code + * mcelog: Add decoding for Optane DC persistent memory mode + * mcelog: Deduce channel number for Haswell/Broadwell/Skylake systems +- Change mcelog.spec to use autosetup + +------------------------------------------------------------------- +Fri Sep 21 15:52:28 UTC 2018 - opensuse-packaging@opensuse.org + +(by trenn@suse.de) +- Update to version 1.60 (fate#326221): + * Turn back rb_color field into unsigned long + * trigger: add a sync argument for waiting trigger child process exit + * page: trigger: add pre/post sync trigger when doing soft memory offline + * fixed build errors for some lose code when merging code + * transfer the page address to pre/post-sync-trigger scripts + * mcelog: Fix "--ascii" parsing to cope with change in kernel output since v4.10 + * Remove now unused local variable + * Add scripts file to do MCA error code validation for a selected CPU model + * Add license file + * mcelog: Improve decoding for APEI reported errors + +------------------------------------------------------------------- +Thu Nov 23 13:40:46 UTC 2017 - rbrown@suse.com + +- Replace references to /var/adm/fillup-templates with new + %_fillupdir macro (boo#1069468) + +------------------------------------------------------------------- +Fri Jul 07 13:59:28 UTC 2017 - fschnizlein@suse.com + +- Update to version 1.53: + * Add service file + * dmi: Handle NULL DMI string + * Compress some fields in mempage. + * Add coverity fixes + * Fix typo in man page + * mcelog: Check whether we successfully changed directory for trigger. + * mcelog version: Add ability for OS to define version + * Document .os_release in README + * Set SO_PASSCRED on listen sockets + * memutil.h: add missing include for va_list + +------------------------------------------------------------------- +Mon Mar 20 14:28:54 UTC 2017 - trenn@suse.de + +- Package also includes fixes for (through previous version updates below): + * Add mcelog-skylake.patch patch to support Skylake Xeons (fate#319698) + * Add skylake support (bnc#946734) + * Avoid warnings at boot up (bsc#920197) + * Knights Landing (fate#319507) + * Broadwell Ex and Ep (fate#319697) + * Different Skylake models (fate#319696) + +------------------------------------------------------------------- +Fri Mar 3 09:26:33 UTC 2017 - mpluskal@suse.com + +- Update to version 1.48 + * Fix warning with gcc 6.x + * Remove obsolete TODO file + * Small fixes +- Use url for getting sources + +------------------------------------------------------------------- +Fri Jan 13 15:20:55 UTC 2017 - felix.gerling@suse.com + +- Version update to 1.47 (fate#321308, fate#320907, fate#321931): + * Fix PDF links + * Fix confusing error message + +------------------------------------------------------------------- +Sat Dec 17 00:02:34 UTC 2016 - tchvatal@suse.com + +- Version update to 1.46: + * Various cpu support for new machines +- Refresh patches: + * add-f10h-support.patch + * email.patch +- Force build with pic +- Use normal webpage as Url and do not point to git +- Fix build with --as-needed expanded Makefile patch for email.patch + +------------------------------------------------------------------- +Fri May 6 16:08:48 UTC 2016 - trenn@suse.de + +- Update to bugfix version 1.36 +- Do not start mcelog service based on an udev (/dev/mcelog) rule (bsc#976781) + +------------------------------------------------------------------- +Thu Jan 28 14:25:26 UTC 2016 - trenn@suse.de + +- Update to latest version 1.29. + Mostly little bug fixes. + +------------------------------------------------------------------- +Mon Sep 28 13:26:21 UTC 2015 - trenn@suse.de + +- Update to version v124. Adds skylake CPU support and some bug fixes. + +------------------------------------------------------------------- +Mon Jun 15 16:18:55 UTC 2015 - trenn@suse.de + +- Update to latest v120 git tag and name the version 1.20: + New supported CPUs: + - Add model number for Broadwell-DE + - Added Knights Landing (Xeon Phi) + - Add all current Atom cpuids + - Support Broadwell-U + - New manpages: mcelog.conf.5 and mcelog.triggers.5 + And quite some undocumented bugfixes, see git log for details + +------------------------------------------------------------------- +Fri Jan 23 11:04:40 UTC 2015 - trenn@suse.de + +- Update to version 1.0.8 +- Remove patch which got integrated mainline: + 0001-Continue-without-dmi-when-no-SMBIOS-or-SMBIOS-0x0-in.patch +- Fix possible security issue, build service complained about: + missing-call-to-setgroups-before-setuid + Add fix_setgroups_missing_call.patch + +------------------------------------------------------------------- +Fri Nov 14 18:25:22 UTC 2014 - crrodriguez@opensuse.org + +- While not yet defined, the tmpfiles_create macro takes + an argument for it to actually work + +------------------------------------------------------------------- +Sat Sep 20 03:16:05 UTC 2014 - crrodriguez@opensuse.org + +- mcelog.tmpfiles, mcelog-socket-path.patch, move socket + and pid file to /run/mcelog directory. + This update may require reboot as the relevant rpm macro + tmpfiles_create is not yet in any product. + +------------------------------------------------------------------- +Wed Sep 3 15:41:05 UTC 2014 - trenn@suse.de + +- Fixed the architecture tag to %{ix86} as suggested by: + Andreas Vetter + +------------------------------------------------------------------- +Wed Sep 3 14:41:21 UTC 2014 - meissner@suse.com + +- fixed the architecture tag to %ix86 + +------------------------------------------------------------------- +Fri Jul 11 08:17:28 UTC 2014 - juwolf@suse.com + +- Fixed license, GPL-2.0 + +------------------------------------------------------------------- +Fri Jun 27 13:54:52 UTC 2014 - juwolf@suse.com + +- Added: 0001-Continue-without-dmi-when-no-SMBIOS-or-SMBIOS-0x0-in.patch + Continue without dmi when no SMBIOS or SMBIOS=0x0 in /sys/firmware/efi/systab, bnc#829862 + +------------------------------------------------------------------- +Fri May 16 15:47:18 UTC 2014 - trenn@suse.de + +- Add mce decoding support for latest AMD CPUs (bnc#871881). +- Implementation done by Borislav Petkov + * Add patches/Start-consolidating-AMD-specific-stuff.patch + * Add add-defines.patch + * Add add-f10h-support.patch + * Add add-f11h-support.patch + * Add add-f12h-support.patch + * Add add-f14h-support.patch + * Add add-f15h-support.patch + * Add add-f16h-support.patch + +------------------------------------------------------------------- +Mon Apr 28 16:49:38 UTC 2014 - trenn@suse.de + +- Update to latest git tag v101. +- Mainline decided to finally do a version upgrade to v101 +- Remove v1.1 again, obsolete it and go for version v102 +- Some important fixes in the latest update: + - bnc#873159 + - bnc#873725 + +------------------------------------------------------------------- +Sun Oct 27 18:00:04 UTC 2013 - crrodriguez@opensuse.org + +- Cleanup spec file +- activate mcelog service via udev+systemd combo, if the kernel + registers a /dev/mcelog device the service will be automatically + started. +- drop sysvinit scripts, add appropiate %pre %post invocations + of the needed systemd macros. + +------------------------------------------------------------------- +Tue Oct 15 17:02:13 UTC 2013 - trenn@suse.de + +- Updated to latest git HEAD: + commit c7bf28088f056925c04d4fd5768504c59bbf19c4 + Author: Robin Holt + Date: Mon Sep 16 04:30:02 2013 -0500 + Because upstream does not use proper tags/revisions, I now + versioned this one mcelog-1.1 + +------------------------------------------------------------------- +Fri Feb 22 13:00:21 UTC 2013 - rmilasan@suse.com + +- Install mcelog.service accordingly (/usr/lib/systemd for 12.3 + and up or /lib/systemd for older versions). + +------------------------------------------------------------------- +Thu Aug 16 14:41:55 UTC 2012 - trenn@suse.de + +- bnc#774226 mcelog + systemd: won't start without MCELOG_ADMIN_EMAIL set +- fix uninitialized variable mail_setup + +------------------------------------------------------------------- +Wed Jul 18 12:49:43 UTC 2012 - trenn@suse.de + +- Also build mcelog packages for i386 (bnc#770726) + +------------------------------------------------------------------- +Tue Apr 24 23:43:56 YEKT 2012 - avm-xandry@yandex.ru + +- Fixed description in init-file. + +------------------------------------------------------------------- +Wed Nov 23 21:36:36 UTC 2011 - crrodriguez@opensuse.org + +- Add systemd unit. + +------------------------------------------------------------------- +Thu Aug 18 00:09:50 CEST 2011 - ro@suse.de + +- update to GIT of today (6e4e2a000124f08f1a4e3791c2b02ec9ae6af393) +- many bugfixes +- Implement re-parsing of mcelog output in ASCII +- Add support for non-page aligned EFI Configuration Tables +- Add --debug-numerrors +- Add decoder for corrected XEN events to --ascii +- Correctly log kernel supplied time +- record the trigger info in the log +- mcelog: Implement dmi decoding for UEFI +- mcelog: Add usage information to mcelog for --ignorenodev +- Fix length calculation of SMBIOS mapping +- change disclaimer +- explictly spell out corrected errors + +------------------------------------------------------------------- +Sat Jul 2 21:50:53 UTC 2011 - trenn@suse.de + +- Update to latest git version (fate#311830) + Unfortunately versions have not been increased, latest tag + still is 1.0-pre3 (same as 1 year ago), therefore the date + is included in the version. I try to push maintainers to + increase the version number. +- Invert logic of db prefill messages -> info if it works, silent + if not + +------------------------------------------------------------------- +Tue Jun 7 09:51:57 UTC 2011 - trenn@suse.de + +- Remove test email address from config + +------------------------------------------------------------------- +Mon Oct 25 15:48:57 CEST 2010 - trenn@suse.de + +- Add Sandybridge/Westmere decode support +- Fix domainname for email notification +- Update to latest git version + +------------------------------------------------------------------- +Tue Apr 6 15:15:45 CEST 2010 - trenn@suse.de + +- Update to latest git version having quite some fixes (no features): + - Fixed some memleaks and made app valgrind conform + - Fixed theoretical DoS attack (bnc#586241) + - Added support of additional cpus + - Fixed a lot messages (in manpage, in triggers, in README, ...) + +------------------------------------------------------------------- +Fri Feb 19 00:39:36 CET 2010 - ro@suse.de + +- Update to version 1.0pre3 + - Boxboro-EX enhancements + - Bugfixes + Minor pidfile handling adjusting in service file +- Added missing conf file and trigger scripts + +------------------------------------------------------------------- +Thu Dec 3 12:12:40 CET 2009 - trenn@suse.de + +- Minor .spec and init script fixes/cleanups + +------------------------------------------------------------------- +Thu Dec 3 11:28:05 CET 2009 - trenn@suse.de + +- Add service parts: + - let mcelog --daemon handle pid file in /var/run/mcelog.pid + - add insserv logic + - remove cron.daily script in update case + +------------------------------------------------------------------- +Fri Nov 27 22:01:40 CET 2009 - trenn@suse.de + +- Update to latest git version (called it 1.0pre1 myself, may differ + with a possible public 1.0pre1 version), this includes + following new featues: + - yellow bit support + - page predictive failure analysis support + - Initial memdb support + This allows to account memory errors in memory in daemon mode + And a lot more... + +------------------------------------------------------------------- +Fri Oct 2 17:06:03 CEST 2009 - trenn@suse.de + +- Update to latest git version (0.9pre) + Introduces mcelog damon mode, service file will follow in an + extra commit. + +------------------------------------------------------------------- +Fri Jan 9 08:41:58 CET 2009 - olh@suse.de + +- use ExclusiveArch as in /SRC/arch/ + +------------------------------------------------------------------- +Sat Sep 27 21:50:27 CEST 2008 - trenn@suse.de + +- fate #304279 mcelog support for Tigerton/Dunnington + Patch is from Andi himself with this statement: + While it looks large most of it is just new tables. + +------------------------------------------------------------------- +Mon May 29 16:23:12 CEST 2006 - ak@suse.de + +- decode intel thermal events too (#179327) + +------------------------------------------------------------------- +Fri May 5 19:00:23 CEST 2006 - ak@suse.de + +- Update to 0.7. This fixes + - Fix --dmi option (#166324) + - Incorporate old patches + +------------------------------------------------------------------- +Fri Mar 3 20:21:01 CET 2006 - ak@suse.de + +- Avoid cosmetic problem in --filter (#153347) + +------------------------------------------------------------------- +Wed Feb 8 14:58:50 CET 2006 - ak@suse.de + +- update to mcelog 0.6 + * Fixes bugs (#148869, #137985) + * Adds --dmi option to map addresses to DIMMs using SMBIOS + (default to off) + +------------------------------------------------------------------- +Wed Jan 25 21:45:03 CET 2006 - mls@suse.de + +- converted neededforbuild to BuildRequires + +------------------------------------------------------------------- +Mon Dec 19 10:42:35 CET 2005 - sf@suse.de + +- update to version 0.5 + * Clarify --ascii in the manpage + *Support for AMD K8 Revision F machine check DRAM error + thresholding + +------------------------------------------------------------------- +Fri Feb 11 10:39:53 CET 2005 - ak@suse.de + +- Use RPM_OPT_FLAGS +- Improve description again + +------------------------------------------------------------------- +Thu Feb 10 19:21:39 CET 2005 - ak@suse.de + +- mcelog-0.4: + * add support to decode AMD K8 (Opteron/Athlon64/AthlonFX) and + Intel P4 (Xeon and Pentium 4) events + * add --ascii option to decode machine check panic information +- Rewrite description in .spec file + +------------------------------------------------------------------- +Wed Jun 9 21:51:14 CEST 2004 - ak@suse.de + +- memlog-0.2: + * fix mcelog looping (#41863) + * Add GPL notices + +------------------------------------------------------------------- +Thu Mar 25 17:55:05 CET 2004 - sf@suse.de + +- initial version +- fixes #36898 + diff --git a/mcelog.obsinfo b/mcelog.obsinfo new file mode 100644 index 0000000..80c6b17 --- /dev/null +++ b/mcelog.obsinfo @@ -0,0 +1,4 @@ +name: mcelog +version: 196 +mtime: 1698794375 +commit: edfe78a0dc54a940f4916a9bd681eab7b3f746d1 diff --git a/mcelog.spec b/mcelog.spec new file mode 100644 index 0000000..72fcc2f --- /dev/null +++ b/mcelog.spec @@ -0,0 +1,118 @@ +# +# spec file for package mcelog +# +# Copyright (c) 2023 SUSE LLC +# +# All modifications and additions to the file contributed by third parties +# remain the property of their copyright owners, unless otherwise agreed +# upon. The license for this file, and modifications and additions to the +# file, is the same license as for the pristine package itself (unless the +# license for the pristine package is not an Open Source License, in which +# case the license is the MIT License). An "Open Source License" is a +# license that conforms to the Open Source Definition (Version 1.9) +# published by the Open Source Initiative. + +# Please submit bugfixes or comments via https://bugs.opensuse.org/ +# + + +#Compat macro for new _fillupdir macro introduced in Nov 2017 +%if ! %{defined _fillupdir} + %define _fillupdir %{_localstatedir}/adm/fillup-templates +%endif +Name: mcelog +Version: 196 +Release: 0 +Summary: Log Machine Check Events +License: GPL-2.0-only +Group: System/Monitoring +URL: https://mcelog.org/ +Source: mcelog-%{version}.tar.gz +Source2: mcelog.sysconfig +Source3: mcelog.systemd +Source5: mcelog.tmpfiles +Source6: README.email_setup +Patch1: email.patch +Patch2: mcelog_invert_prefill_db_warning.patch +Patch3: Start-consolidating-AMD-specific-stuff.patch +Patch4: add_new_amd_cpu_defines +Patch5: patches/add-f10h-support.patch +Patch6: patches/add-f11h-support.patch +Patch7: patches/add-f12h-support.patch +Patch8: patches/add-f14h-support.patch +Patch9: patches/add-f15h-support.patch +Patch10: patches/add-f16h-support.patch +Patch11: mcelog-socket-path.patch +Patch12: fix_setgroups_missing_call.patch +BuildRequires: libesmtp-devel +BuildRequires: pkgconfig +BuildRequires: pkgconfig(systemd) +Requires: logrotate +Requires(pre): %fillup_prereq +ExclusiveArch: %{ix86} x86_64 +%{?systemd_requires} + +%description +mcelog retrieves machine check events from an x86-64 kernel in a cron +job, decodes them, and logs them to %{_localstatedir}/log/mcelog. + +A machine check event is a hardware error detected by the CPU. +It should run on any x86-64 system. + +In addition, it allows decoding machine check kernel panic messages. + +%prep +%autosetup + +%build +echo "%{version}" > .os_version +%make_build CFLAGS="%{optflags} -fpie -pie" + +%install +export prefix=%{buildroot}%{_prefix} +export etcprefix=%{buildroot} +make -e install +mkdir -p %{buildroot}%{_sysconfdir}/logrotate.d/ +install -m644 mcelog.logrotate %{buildroot}%{_sysconfdir}/logrotate.d/mcelog + +mkdir -p %{buildroot}%{_fillupdir} +install -m 644 %{SOURCE2} %{buildroot}%{_fillupdir}/sysconfig.mcelog + +mkdir -p %{buildroot}/%{_docdir}/%{name} +install -m 644 %{SOURCE6} %{buildroot}/%{_docdir}/%{name}/README.email_setup +install -m 644 lk10-mcelog.pdf %{buildroot}/%{_docdir}/%{name}/lk10-mcelog.pdf +install -D -m 0644 %{SOURCE3} %{buildroot}%{_unitdir}/mcelog.service +install -D -m 0644 %{SOURCE5} %{buildroot}%{_tmpfilesdir}/mcelog.conf +ln -sf %{_sbindir}/service %{buildroot}%{_sbindir}/rcmcelog + +%pre +%service_add_pre %{name}.service + +%post +%fillup_only +%service_add_post %{name}.service +%{?tmpfiles_create:%tmpfiles_create %{_tmpfilesdir}/mcelog.conf} + +%preun +%service_del_preun %{name}.service + +%postun +%service_del_postun %{name}.service + +%files +%defattr (-,root,root,755) +%{_mandir}/man8/* +%{_mandir}/man5/* +%{_sbindir}/mcelog +%config %{_sysconfdir}/logrotate.d/mcelog +%dir %{_sysconfdir}/mcelog +%config %{_sysconfdir}/mcelog/mcelog.conf +%{_fillupdir}/sysconfig.mcelog +%{_sysconfdir}/mcelog/*trigger +%{_unitdir}/mcelog.service +%{_tmpfilesdir}/mcelog.conf +%{_docdir}/%{name} +%{_sbindir}/rcmcelog +%ghost /run/mcelog + +%changelog diff --git a/mcelog.sysconfig b/mcelog.sysconfig new file mode 100644 index 0000000..6168adb --- /dev/null +++ b/mcelog.sysconfig @@ -0,0 +1,10 @@ +## Path: Hardware/machine_check +## Description: email address machine check exceptions are sent to +## Type: string +## Default: "" +# +# Machine check exceptions like memory (correctable or uncorrectable +# ECC errors), processor or other HW are sent with a detailed description +# to this address. Also read README.email_setup for further details. +# +MCELOG_ADMIN_EMAIL="" diff --git a/mcelog.systemd b/mcelog.systemd new file mode 100644 index 0000000..e63ee1d --- /dev/null +++ b/mcelog.systemd @@ -0,0 +1,13 @@ +[Unit] +Description=Machine Check Exception Logging Daemon +ConditionVirtualization=false +ConditionPathExists=/dev/mcelog + +[Service] +EnvironmentFile=-/etc/sysconfig/mcelog +ExecStartPre=/sbin/modprobe msr +ExecStartPre=/sbin/modprobe dmi-sysfs +ExecStart=/usr/sbin/mcelog --ignorenodev --daemon --foreground + +[Install] +WantedBy=multi-user.target diff --git a/mcelog.tmpfiles b/mcelog.tmpfiles new file mode 100644 index 0000000..04e0554 --- /dev/null +++ b/mcelog.tmpfiles @@ -0,0 +1 @@ +d /run/mcelog 0755 root root - \ No newline at end of file diff --git a/mcelog_invert_prefill_db_warning.patch b/mcelog_invert_prefill_db_warning.patch new file mode 100644 index 0000000..47466d3 --- /dev/null +++ b/mcelog_invert_prefill_db_warning.patch @@ -0,0 +1,23 @@ +--- + memdb.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +--- a/memdb.c ++++ b/memdb.c +@@ -431,11 +431,11 @@ + md->location = xstrdup(bl); + md->name = xstrdup(dmi_getstring(&d->header, d->device_locator)); + } +- if (missed) { +- static int warned; +- if (!warned) { +- Eprintf("failed to prefill DIMM database from DMI data"); +- warned = 1; ++ if (!missed) { ++ static int db_rill_msg; ++ if (!db_rill_msg) { ++ Gprintf("Prefilled DIMM database from DMI data"); ++ db_rill_msg = 1; + } + } + }