Merge remote-tracking branches 'ras/edac-cxl', 'ras/edac-drivers' and 'ras/edac-misc' into edac-updates

* ras/edac-cxl:
  EDAC/device: Fix dev_set_name() format string
  EDAC: Update memory repair control interface for memory sparing feature
  EDAC: Add a memory repair control feature
  EDAC: Add a Error Check Scrub control feature
  EDAC: Add scrub control feature
  EDAC: Add support for EDAC device features control

* ras/edac-drivers:
  EDAC/ie31200: Switch Raptor Lake-S to interrupt mode
  EDAC/ie31200: Add Intel Raptor Lake-S SoCs support
  EDAC/ie31200: Break up ie31200_probe1()
  EDAC/ie31200: Fold the two channel loops into one loop
  EDAC/ie31200: Make struct dimm_data contain decoded information
  EDAC/ie31200: Make the memory controller resources configurable
  EDAC/ie31200: Simplify the pci_device_id table
  EDAC/ie31200: Fix the 3rd parameter name of *populate_dimm_info()
  EDAC/ie31200: Fix the error path order of ie31200_init()
  EDAC/ie31200: Fix the DIMM size mask for several SoCs
  EDAC/ie31200: Fix the size of EDAC_MC_LAYER_CHIP_SELECT layer
  EDAC/{skx_common,i10nm}: Fix some missing error reports on Emerald Rapids
  EDAC/igen6: Fix the flood of invalid error reports
  EDAC/ie31200: work around false positive build warning

* ras/edac-misc:
  MAINTAINERS: Add a secondary maintainer for bluefield_edac
  EDAC/pnd2: Make read-only const array intlv static
  EDAC/igen6: Constify struct res_config
  EDAC/amd64: Simplify return statement in dct_ecc_enabled()
  EDAC: Use string choice helper functions

Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
This commit is contained in:
Borislav Petkov (AMD) 2025-03-25 14:53:27 +01:00
commit 298ffd5375
13 changed files with 508 additions and 317 deletions

View File

@ -8214,6 +8214,7 @@ F: drivers/edac/aspeed_edac.c
EDAC-BLUEFIELD
M: Shravan Kumar Ramani <shravankr@nvidia.com>
M: David Thompson <davthompson@nvidia.com>
S: Supported
F: drivers/edac/bluefield_edac.c

View File

@ -196,7 +196,7 @@ config EDAC_I3200
config EDAC_IE31200
tristate "Intel e312xx"
depends on PCI && X86
depends on PCI && X86 && X86_MCE_INTEL
help
Support for error detection and correction on the Intel
E3-1200 based DRAM controllers.

View File

@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/ras.h>
#include <linux/string_choices.h>
#include "amd64_edac.h"
#include <asm/amd_nb.h>
#include <asm/amd_node.h>
@ -1171,22 +1172,21 @@ static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan)
edac_dbg(1, " LRDIMM %dx rank multiply\n", (dcsm & 0x3));
}
edac_dbg(1, "All DIMMs support ECC:%s\n",
(dclr & BIT(19)) ? "yes" : "no");
edac_dbg(1, "All DIMMs support ECC: %s\n", str_yes_no(dclr & BIT(19)));
edac_dbg(1, " PAR/ERR parity: %s\n",
(dclr & BIT(8)) ? "enabled" : "disabled");
str_enabled_disabled(dclr & BIT(8)));
if (pvt->fam == 0x10)
edac_dbg(1, " DCT 128bit mode width: %s\n",
(dclr & BIT(11)) ? "128b" : "64b");
edac_dbg(1, " x4 logical DIMMs present: L0: %s L1: %s L2: %s L3: %s\n",
(dclr & BIT(12)) ? "yes" : "no",
(dclr & BIT(13)) ? "yes" : "no",
(dclr & BIT(14)) ? "yes" : "no",
(dclr & BIT(15)) ? "yes" : "no");
str_yes_no(dclr & BIT(12)),
str_yes_no(dclr & BIT(13)),
str_yes_no(dclr & BIT(14)),
str_yes_no(dclr & BIT(15)));
}
#define CS_EVEN_PRIMARY BIT(0)
@ -1353,14 +1353,14 @@ static void umc_dump_misc_regs(struct amd64_pvt *pvt)
edac_dbg(1, "UMC%d UMC cap high: 0x%x\n", i, umc->umc_cap_hi);
edac_dbg(1, "UMC%d ECC capable: %s, ChipKill ECC capable: %s\n",
i, (umc->umc_cap_hi & BIT(30)) ? "yes" : "no",
(umc->umc_cap_hi & BIT(31)) ? "yes" : "no");
i, str_yes_no(umc->umc_cap_hi & BIT(30)),
str_yes_no(umc->umc_cap_hi & BIT(31)));
edac_dbg(1, "UMC%d All DIMMs support ECC: %s\n",
i, (umc->umc_cfg & BIT(12)) ? "yes" : "no");
i, str_yes_no(umc->umc_cfg & BIT(12)));
edac_dbg(1, "UMC%d x4 DIMMs present: %s\n",
i, (umc->dimm_cfg & BIT(6)) ? "yes" : "no");
i, str_yes_no(umc->dimm_cfg & BIT(6)));
edac_dbg(1, "UMC%d x16 DIMMs present: %s\n",
i, (umc->dimm_cfg & BIT(7)) ? "yes" : "no");
i, str_yes_no(umc->dimm_cfg & BIT(7)));
umc_debug_display_dimm_sizes(pvt, i);
}
@ -1371,11 +1371,11 @@ static void dct_dump_misc_regs(struct amd64_pvt *pvt)
edac_dbg(1, "F3xE8 (NB Cap): 0x%08x\n", pvt->nbcap);
edac_dbg(1, " NB two channel DRAM capable: %s\n",
(pvt->nbcap & NBCAP_DCT_DUAL) ? "yes" : "no");
str_yes_no(pvt->nbcap & NBCAP_DCT_DUAL));
edac_dbg(1, " ECC capable: %s, ChipKill ECC capable: %s\n",
(pvt->nbcap & NBCAP_SECDED) ? "yes" : "no",
(pvt->nbcap & NBCAP_CHIPKILL) ? "yes" : "no");
str_yes_no(pvt->nbcap & NBCAP_SECDED),
str_yes_no(pvt->nbcap & NBCAP_CHIPKILL));
debug_dump_dramcfg_low(pvt, pvt->dclr0, 0);
@ -1398,7 +1398,7 @@ static void dct_dump_misc_regs(struct amd64_pvt *pvt)
if (!dct_ganging_enabled(pvt))
debug_dump_dramcfg_low(pvt, pvt->dclr1, 1);
edac_dbg(1, " DramHoleValid: %s\n", dhar_valid(pvt) ? "yes" : "no");
edac_dbg(1, " DramHoleValid: %s\n", str_yes_no(dhar_valid(pvt)));
amd64_info("using x%u syndromes.\n", pvt->ecc_sym_sz);
}
@ -2027,15 +2027,15 @@ static void read_dram_ctl_register(struct amd64_pvt *pvt)
if (!dct_ganging_enabled(pvt))
edac_dbg(0, " Address range split per DCT: %s\n",
(dct_high_range_enabled(pvt) ? "yes" : "no"));
str_yes_no(dct_high_range_enabled(pvt)));
edac_dbg(0, " data interleave for ECC: %s, DRAM cleared since last warm reset: %s\n",
(dct_data_intlv_enabled(pvt) ? "enabled" : "disabled"),
(dct_memory_cleared(pvt) ? "yes" : "no"));
str_enabled_disabled(dct_data_intlv_enabled(pvt)),
str_yes_no(dct_memory_cleared(pvt)));
edac_dbg(0, " channel interleave: %s, "
"interleave bits selector: 0x%x\n",
(dct_interleave_enabled(pvt) ? "enabled" : "disabled"),
str_enabled_disabled(dct_interleave_enabled(pvt)),
dct_sel_interleave_addr(pvt));
}
@ -3208,8 +3208,7 @@ static bool nb_mce_bank_enabled_on_node(u16 nid)
nbe = reg->l & MSR_MCGCTL_NBE;
edac_dbg(0, "core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n",
cpu, reg->q,
(nbe ? "enabled" : "disabled"));
cpu, reg->q, str_enabled_disabled(nbe));
if (!nbe)
goto out;
@ -3353,12 +3352,9 @@ static bool dct_ecc_enabled(struct amd64_pvt *pvt)
edac_dbg(0, "NB MCE bank disabled, set MSR 0x%08x[4] on node %d to enable.\n",
MSR_IA32_MCG_CTL, nid);
edac_dbg(3, "Node %d: DRAM ECC %s.\n", nid, (ecc_en ? "enabled" : "disabled"));
edac_dbg(3, "Node %d: DRAM ECC %s.\n", nid, str_enabled_disabled(ecc_en));
if (!ecc_en || !nb_mce_en)
return false;
else
return true;
return ecc_en && nb_mce_en;
}
static bool umc_ecc_enabled(struct amd64_pvt *pvt)
@ -3378,7 +3374,7 @@ static bool umc_ecc_enabled(struct amd64_pvt *pvt)
}
}
edac_dbg(3, "Node %d: DRAM ECC %s.\n", pvt->mc_node_id, (ecc_en ? "enabled" : "disabled"));
edac_dbg(3, "Node %d: DRAM ECC %s.\n", pvt->mc_node_id, str_enabled_disabled(ecc_en));
return ecc_en;
}

View File

@ -1,4 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/string_choices.h>
#include "edac_module.h"
static struct dentry *edac_debugfs;
@ -22,7 +25,7 @@ static ssize_t edac_fake_inject_write(struct file *file,
"Generating %d %s fake error%s to %d.%d.%d to test core handling. NOTE: this won't test the driver-specific decoding logic.\n",
errcount,
(type == HW_EVENT_ERR_UNCORRECTED) ? "UE" : "CE",
errcount > 1 ? "s" : "",
str_plural(errcount),
mci->fake_inject_layer[0],
mci->fake_inject_layer[1],
mci->fake_inject_layer[2]

View File

@ -751,6 +751,8 @@ static int i10nm_get_ddr_munits(void)
continue;
} else {
d->imc[lmc].mdev = mdev;
if (res_cfg->type == SPR)
skx_set_mc_mapping(d, i, lmc);
lmc++;
}
}

View File

@ -31,6 +31,7 @@
#include <linux/slab.h>
#include <linux/edac.h>
#include <linux/mmzone.h>
#include <linux/string_choices.h>
#include "edac_module.h"
@ -899,7 +900,7 @@ static void decode_mtr(int slot_row, u16 mtr)
edac_dbg(2, "\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr));
edac_dbg(2, "\t\tELECTRICAL THROTTLING is %s\n",
MTR_DIMMS_ETHROTTLE(mtr) ? "enabled" : "disabled");
str_enabled_disabled(MTR_DIMMS_ETHROTTLE(mtr)));
edac_dbg(2, "\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr));
edac_dbg(2, "\t\tNUMRANK: %s\n",

View File

@ -23,6 +23,7 @@
#include <linux/slab.h>
#include <linux/edac.h>
#include <linux/mmzone.h>
#include <linux/string_choices.h>
#include "edac_module.h"
@ -620,7 +621,7 @@ static int decode_mtr(struct i7300_pvt *pvt,
edac_dbg(2, "\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr));
edac_dbg(2, "\t\tELECTRICAL THROTTLING is %s\n",
MTR_DIMMS_ETHROTTLE(mtr) ? "enabled" : "disabled");
str_enabled_disabled(MTR_DIMMS_ETHROTTLE(mtr)));
edac_dbg(2, "\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr));
edac_dbg(2, "\t\tNUMRANK: %s\n",
@ -871,9 +872,9 @@ static int i7300_get_mc_regs(struct mem_ctl_info *mci)
IS_MIRRORED(pvt->mc_settings) ? "" : "non-");
edac_dbg(0, "Error detection is %s\n",
IS_ECC_ENABLED(pvt->mc_settings) ? "enabled" : "disabled");
str_enabled_disabled(IS_ECC_ENABLED(pvt->mc_settings)));
edac_dbg(0, "Retry is %s\n",
IS_RETRY_ENABLED(pvt->mc_settings) ? "enabled" : "disabled");
str_enabled_disabled(IS_RETRY_ENABLED(pvt->mc_settings)));
/* Get Memory Interleave Range registers */
pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, MIR0,

View File

@ -51,6 +51,7 @@
#include <linux/edac.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <asm/mce.h>
#include "edac_module.h"
#define EDAC_MOD_STR "ie31200_edac"
@ -84,44 +85,23 @@
#define PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_9 0x3ec6
#define PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_10 0x3eca
/* Test if HB is for Skylake or later. */
#define DEVICE_ID_SKYLAKE_OR_LATER(did) \
(((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_8) || \
((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_9) || \
((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_10) || \
((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_11) || \
((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_12) || \
(((did) & PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK) == \
PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK))
/* Raptor Lake-S */
#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_1 0xa703
#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_2 0x4640
#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_3 0x4630
#define IE31200_DIMMS 4
#define IE31200_RANKS 8
#define IE31200_RANKS_PER_CHANNEL 4
#define IE31200_RANKS_PER_CHANNEL 8
#define IE31200_DIMMS_PER_CHANNEL 2
#define IE31200_CHANNELS 2
#define IE31200_IMC_NUM 2
/* Intel IE31200 register addresses - device 0 function 0 - DRAM Controller */
#define IE31200_MCHBAR_LOW 0x48
#define IE31200_MCHBAR_HIGH 0x4c
#define IE31200_MCHBAR_MASK GENMASK_ULL(38, 15)
#define IE31200_MMR_WINDOW_SIZE BIT(15)
/*
* Error Status Register (16b)
*
* 15 reserved
* 14 Isochronous TBWRR Run Behind FIFO Full
* (ITCV)
* 13 Isochronous TBWRR Run Behind FIFO Put
* (ITSTV)
* 12 reserved
* 11 MCH Thermal Sensor Event
* for SMI/SCI/SERR (GTSE)
* 10 reserved
* 9 LOCK to non-DRAM Memory Flag (LCKF)
* 8 reserved
* 7 DRAM Throttle Flag (DTF)
* 6:2 reserved
* 1 Multi-bit DRAM ECC Error Flag (DMERR)
* 0 Single-bit DRAM ECC Error Flag (DSERR)
*/
@ -130,68 +110,60 @@
#define IE31200_ERRSTS_CE BIT(0)
#define IE31200_ERRSTS_BITS (IE31200_ERRSTS_UE | IE31200_ERRSTS_CE)
/*
* Channel 0 ECC Error Log (64b)
*
* 63:48 Error Column Address (ERRCOL)
* 47:32 Error Row Address (ERRROW)
* 31:29 Error Bank Address (ERRBANK)
* 28:27 Error Rank Address (ERRRANK)
* 26:24 reserved
* 23:16 Error Syndrome (ERRSYND)
* 15: 2 reserved
* 1 Multiple Bit Error Status (MERRSTS)
* 0 Correctable Error Status (CERRSTS)
*/
#define IE31200_C0ECCERRLOG 0x40c8
#define IE31200_C1ECCERRLOG 0x44c8
#define IE31200_C0ECCERRLOG_SKL 0x4048
#define IE31200_C1ECCERRLOG_SKL 0x4448
#define IE31200_ECCERRLOG_CE BIT(0)
#define IE31200_ECCERRLOG_UE BIT(1)
#define IE31200_ECCERRLOG_RANK_BITS GENMASK_ULL(28, 27)
#define IE31200_ECCERRLOG_RANK_SHIFT 27
#define IE31200_ECCERRLOG_SYNDROME_BITS GENMASK_ULL(23, 16)
#define IE31200_ECCERRLOG_SYNDROME_SHIFT 16
#define IE31200_ECCERRLOG_SYNDROME(log) \
((log & IE31200_ECCERRLOG_SYNDROME_BITS) >> \
IE31200_ECCERRLOG_SYNDROME_SHIFT)
#define IE31200_CAPID0 0xe4
#define IE31200_CAPID0_PDCD BIT(4)
#define IE31200_CAPID0_DDPCD BIT(6)
#define IE31200_CAPID0_ECC BIT(1)
#define IE31200_MAD_DIMM_0_OFFSET 0x5004
#define IE31200_MAD_DIMM_0_OFFSET_SKL 0x500C
#define IE31200_MAD_DIMM_SIZE GENMASK_ULL(7, 0)
#define IE31200_MAD_DIMM_A_RANK BIT(17)
#define IE31200_MAD_DIMM_A_RANK_SHIFT 17
#define IE31200_MAD_DIMM_A_RANK_SKL BIT(10)
#define IE31200_MAD_DIMM_A_RANK_SKL_SHIFT 10
#define IE31200_MAD_DIMM_A_WIDTH BIT(19)
#define IE31200_MAD_DIMM_A_WIDTH_SHIFT 19
#define IE31200_MAD_DIMM_A_WIDTH_SKL GENMASK_ULL(9, 8)
#define IE31200_MAD_DIMM_A_WIDTH_SKL_SHIFT 8
/* Skylake reports 1GB increments, everything else is 256MB */
#define IE31200_PAGES(n, skl) \
(n << (28 + (2 * skl) - PAGE_SHIFT))
/* Non-constant mask variant of FIELD_GET() */
#define field_get(_mask, _reg) (((_reg) & (_mask)) >> (ffs(_mask) - 1))
static int nr_channels;
static struct pci_dev *mci_pdev;
static int ie31200_registered = 1;
struct res_config {
enum mem_type mtype;
bool cmci;
int imc_num;
/* Host MMIO configuration register */
u64 reg_mchbar_mask;
u64 reg_mchbar_window_size;
/* ECC error log register */
u64 reg_eccerrlog_offset[IE31200_CHANNELS];
u64 reg_eccerrlog_ce_mask;
u64 reg_eccerrlog_ce_ovfl_mask;
u64 reg_eccerrlog_ue_mask;
u64 reg_eccerrlog_ue_ovfl_mask;
u64 reg_eccerrlog_rank_mask;
u64 reg_eccerrlog_syndrome_mask;
/* MSR to clear ECC error log register */
u32 msr_clear_eccerrlog_offset;
/* DIMM characteristics register */
u64 reg_mad_dimm_size_granularity;
u64 reg_mad_dimm_offset[IE31200_CHANNELS];
u32 reg_mad_dimm_size_mask[IE31200_DIMMS_PER_CHANNEL];
u32 reg_mad_dimm_rank_mask[IE31200_DIMMS_PER_CHANNEL];
u32 reg_mad_dimm_width_mask[IE31200_DIMMS_PER_CHANNEL];
};
struct ie31200_priv {
void __iomem *window;
void __iomem *c0errlog;
void __iomem *c1errlog;
struct res_config *cfg;
struct mem_ctl_info *mci;
struct pci_dev *pdev;
struct device dev;
};
static struct ie31200_pvt {
struct ie31200_priv *priv[IE31200_IMC_NUM];
} ie31200_pvt;
enum ie31200_chips {
IE31200 = 0,
IE31200_1 = 1,
};
struct ie31200_dev_info {
@ -202,18 +174,22 @@ struct ie31200_error_info {
u16 errsts;
u16 errsts2;
u64 eccerrlog[IE31200_CHANNELS];
u64 erraddr;
};
static const struct ie31200_dev_info ie31200_devs[] = {
[IE31200] = {
.ctl_name = "IE31200"
},
[IE31200_1] = {
.ctl_name = "IE31200_1"
},
};
struct dimm_data {
u8 size; /* in multiples of 256MB, except Skylake is 1GB */
u8 dual_rank : 1,
x16_width : 2; /* 0 means x8 width */
u64 size; /* in bytes */
u8 ranks;
enum dev_type dtype;
};
static int how_many_channels(struct pci_dev *pdev)
@ -251,29 +227,54 @@ static bool ecc_capable(struct pci_dev *pdev)
return true;
}
static int eccerrlog_row(u64 log)
{
return ((log & IE31200_ECCERRLOG_RANK_BITS) >>
IE31200_ECCERRLOG_RANK_SHIFT);
}
#define mci_to_pci_dev(mci) (((struct ie31200_priv *)(mci)->pvt_info)->pdev)
static void ie31200_clear_error_info(struct mem_ctl_info *mci)
{
struct ie31200_priv *priv = mci->pvt_info;
struct res_config *cfg = priv->cfg;
/*
* The PCI ERRSTS register is deprecated. Write the MSR to clear
* the ECC error log registers in all memory controllers.
*/
if (cfg->msr_clear_eccerrlog_offset) {
if (wrmsr_safe(cfg->msr_clear_eccerrlog_offset,
cfg->reg_eccerrlog_ce_mask |
cfg->reg_eccerrlog_ce_ovfl_mask |
cfg->reg_eccerrlog_ue_mask |
cfg->reg_eccerrlog_ue_ovfl_mask, 0) < 0)
ie31200_printk(KERN_ERR, "Failed to wrmsr.\n");
return;
}
/*
* Clear any error bits.
* (Yes, we really clear bits by writing 1 to them.)
*/
pci_write_bits16(to_pci_dev(mci->pdev), IE31200_ERRSTS,
pci_write_bits16(mci_to_pci_dev(mci), IE31200_ERRSTS,
IE31200_ERRSTS_BITS, IE31200_ERRSTS_BITS);
}
static void ie31200_get_and_clear_error_info(struct mem_ctl_info *mci,
struct ie31200_error_info *info)
{
struct pci_dev *pdev;
struct pci_dev *pdev = mci_to_pci_dev(mci);
struct ie31200_priv *priv = mci->pvt_info;
pdev = to_pci_dev(mci->pdev);
/*
* The PCI ERRSTS register is deprecated, directly read the
* MMIO-mapped ECC error log registers.
*/
if (priv->cfg->msr_clear_eccerrlog_offset) {
info->eccerrlog[0] = lo_hi_readq(priv->c0errlog);
if (nr_channels == 2)
info->eccerrlog[1] = lo_hi_readq(priv->c1errlog);
ie31200_clear_error_info(mci);
return;
}
/*
* This is a mess because there is no atomic way to read all the
@ -309,46 +310,56 @@ static void ie31200_get_and_clear_error_info(struct mem_ctl_info *mci,
static void ie31200_process_error_info(struct mem_ctl_info *mci,
struct ie31200_error_info *info)
{
struct ie31200_priv *priv = mci->pvt_info;
struct res_config *cfg = priv->cfg;
int channel;
u64 log;
if (!(info->errsts & IE31200_ERRSTS_BITS))
return;
if (!cfg->msr_clear_eccerrlog_offset) {
if (!(info->errsts & IE31200_ERRSTS_BITS))
return;
if ((info->errsts ^ info->errsts2) & IE31200_ERRSTS_BITS) {
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0,
-1, -1, -1, "UE overwrote CE", "");
info->errsts = info->errsts2;
if ((info->errsts ^ info->errsts2) & IE31200_ERRSTS_BITS) {
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0,
-1, -1, -1, "UE overwrote CE", "");
info->errsts = info->errsts2;
}
}
for (channel = 0; channel < nr_channels; channel++) {
log = info->eccerrlog[channel];
if (log & IE31200_ECCERRLOG_UE) {
if (log & cfg->reg_eccerrlog_ue_mask) {
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
0, 0, 0,
eccerrlog_row(log),
info->erraddr >> PAGE_SHIFT, 0, 0,
field_get(cfg->reg_eccerrlog_rank_mask, log),
channel, -1,
"ie31200 UE", "");
} else if (log & IE31200_ECCERRLOG_CE) {
} else if (log & cfg->reg_eccerrlog_ce_mask) {
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
0, 0,
IE31200_ECCERRLOG_SYNDROME(log),
eccerrlog_row(log),
info->erraddr >> PAGE_SHIFT, 0,
field_get(cfg->reg_eccerrlog_syndrome_mask, log),
field_get(cfg->reg_eccerrlog_rank_mask, log),
channel, -1,
"ie31200 CE", "");
}
}
}
static void ie31200_check(struct mem_ctl_info *mci)
static void __ie31200_check(struct mem_ctl_info *mci, struct mce *mce)
{
struct ie31200_error_info info;
info.erraddr = mce ? mce->addr : 0;
ie31200_get_and_clear_error_info(mci, &info);
ie31200_process_error_info(mci, &info);
}
static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev)
static void ie31200_check(struct mem_ctl_info *mci)
{
__ie31200_check(mci, NULL);
}
static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev, struct res_config *cfg, int mc)
{
union {
u64 mchbar;
@ -361,7 +372,8 @@ static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev)
pci_read_config_dword(pdev, IE31200_MCHBAR_LOW, &u.mchbar_low);
pci_read_config_dword(pdev, IE31200_MCHBAR_HIGH, &u.mchbar_high);
u.mchbar &= IE31200_MCHBAR_MASK;
u.mchbar &= cfg->reg_mchbar_mask;
u.mchbar += cfg->reg_mchbar_window_size * mc;
if (u.mchbar != (resource_size_t)u.mchbar) {
ie31200_printk(KERN_ERR, "mmio space beyond accessible range (0x%llx)\n",
@ -369,7 +381,7 @@ static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev)
return NULL;
}
window = ioremap(u.mchbar, IE31200_MMR_WINDOW_SIZE);
window = ioremap(u.mchbar, cfg->reg_mchbar_window_size);
if (!window)
ie31200_printk(KERN_ERR, "Cannot map mmio space at 0x%llx\n",
(unsigned long long)u.mchbar);
@ -377,155 +389,108 @@ static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev)
return window;
}
static void __skl_populate_dimm_info(struct dimm_data *dd, u32 addr_decode,
int chan)
static void populate_dimm_info(struct dimm_data *dd, u32 addr_decode, int dimm,
struct res_config *cfg)
{
dd->size = (addr_decode >> (chan << 4)) & IE31200_MAD_DIMM_SIZE;
dd->dual_rank = (addr_decode & (IE31200_MAD_DIMM_A_RANK_SKL << (chan << 4))) ? 1 : 0;
dd->x16_width = ((addr_decode & (IE31200_MAD_DIMM_A_WIDTH_SKL << (chan << 4))) >>
(IE31200_MAD_DIMM_A_WIDTH_SKL_SHIFT + (chan << 4)));
dd->size = field_get(cfg->reg_mad_dimm_size_mask[dimm], addr_decode) * cfg->reg_mad_dimm_size_granularity;
dd->ranks = field_get(cfg->reg_mad_dimm_rank_mask[dimm], addr_decode) + 1;
dd->dtype = field_get(cfg->reg_mad_dimm_width_mask[dimm], addr_decode) + DEV_X8;
}
static void __populate_dimm_info(struct dimm_data *dd, u32 addr_decode,
int chan)
static void ie31200_get_dimm_config(struct mem_ctl_info *mci, void __iomem *window,
struct res_config *cfg, int mc)
{
dd->size = (addr_decode >> (chan << 3)) & IE31200_MAD_DIMM_SIZE;
dd->dual_rank = (addr_decode & (IE31200_MAD_DIMM_A_RANK << chan)) ? 1 : 0;
dd->x16_width = (addr_decode & (IE31200_MAD_DIMM_A_WIDTH << chan)) ? 1 : 0;
}
struct dimm_data dimm_info;
struct dimm_info *dimm;
unsigned long nr_pages;
u32 addr_decode;
int i, j, k;
static void populate_dimm_info(struct dimm_data *dd, u32 addr_decode, int chan,
bool skl)
{
if (skl)
__skl_populate_dimm_info(dd, addr_decode, chan);
else
__populate_dimm_info(dd, addr_decode, chan);
}
for (i = 0; i < IE31200_CHANNELS; i++) {
addr_decode = readl(window + cfg->reg_mad_dimm_offset[i]);
edac_dbg(0, "addr_decode: 0x%x\n", addr_decode);
for (j = 0; j < IE31200_DIMMS_PER_CHANNEL; j++) {
populate_dimm_info(&dimm_info, addr_decode, j, cfg);
edac_dbg(0, "mc: %d, channel: %d, dimm: %d, size: %lld MiB, ranks: %d, DRAM chip type: %d\n",
mc, i, j, dimm_info.size >> 20,
dimm_info.ranks,
dimm_info.dtype);
static int ie31200_probe1(struct pci_dev *pdev, int dev_idx)
{
int i, j, ret;
struct mem_ctl_info *mci = NULL;
struct edac_mc_layer layers[2];
struct dimm_data dimm_info[IE31200_CHANNELS][IE31200_DIMMS_PER_CHANNEL];
void __iomem *window;
struct ie31200_priv *priv;
u32 addr_decode, mad_offset;
nr_pages = MiB_TO_PAGES(dimm_info.size >> 20);
if (nr_pages == 0)
continue;
/*
* Kaby Lake, Coffee Lake seem to work like Skylake. Please re-visit
* this logic when adding new CPU support.
*/
bool skl = DEVICE_ID_SKYLAKE_OR_LATER(pdev->device);
edac_dbg(0, "MC:\n");
if (!ecc_capable(pdev)) {
ie31200_printk(KERN_INFO, "No ECC support\n");
return -ENODEV;
nr_pages = nr_pages / dimm_info.ranks;
for (k = 0; k < dimm_info.ranks; k++) {
dimm = edac_get_dimm(mci, (j * dimm_info.ranks) + k, i, 0);
dimm->nr_pages = nr_pages;
edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages);
dimm->grain = 8; /* just a guess */
dimm->mtype = cfg->mtype;
dimm->dtype = dimm_info.dtype;
dimm->edac_mode = EDAC_UNKNOWN;
}
}
}
}
static int ie31200_register_mci(struct pci_dev *pdev, struct res_config *cfg, int mc)
{
struct edac_mc_layer layers[2];
struct ie31200_priv *priv;
struct mem_ctl_info *mci;
void __iomem *window;
int ret;
nr_channels = how_many_channels(pdev);
layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
layers[0].size = IE31200_DIMMS;
layers[0].size = IE31200_RANKS_PER_CHANNEL;
layers[0].is_virt_csrow = true;
layers[1].type = EDAC_MC_LAYER_CHANNEL;
layers[1].size = nr_channels;
layers[1].is_virt_csrow = false;
mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers,
mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers,
sizeof(struct ie31200_priv));
if (!mci)
return -ENOMEM;
window = ie31200_map_mchbar(pdev);
window = ie31200_map_mchbar(pdev, cfg, mc);
if (!window) {
ret = -ENODEV;
goto fail_free;
}
edac_dbg(3, "MC: init mci\n");
mci->pdev = &pdev->dev;
if (skl)
mci->mtype_cap = MEM_FLAG_DDR4;
else
mci->mtype_cap = MEM_FLAG_DDR3;
mci->mtype_cap = BIT(cfg->mtype);
mci->edac_ctl_cap = EDAC_FLAG_SECDED;
mci->edac_cap = EDAC_FLAG_SECDED;
mci->mod_name = EDAC_MOD_STR;
mci->ctl_name = ie31200_devs[dev_idx].ctl_name;
mci->ctl_name = ie31200_devs[mc].ctl_name;
mci->dev_name = pci_name(pdev);
mci->edac_check = ie31200_check;
mci->edac_check = cfg->cmci ? NULL : ie31200_check;
mci->ctl_page_to_phys = NULL;
priv = mci->pvt_info;
priv->window = window;
if (skl) {
priv->c0errlog = window + IE31200_C0ECCERRLOG_SKL;
priv->c1errlog = window + IE31200_C1ECCERRLOG_SKL;
mad_offset = IE31200_MAD_DIMM_0_OFFSET_SKL;
} else {
priv->c0errlog = window + IE31200_C0ECCERRLOG;
priv->c1errlog = window + IE31200_C1ECCERRLOG;
mad_offset = IE31200_MAD_DIMM_0_OFFSET;
}
/* populate DIMM info */
for (i = 0; i < IE31200_CHANNELS; i++) {
addr_decode = readl(window + mad_offset +
(i * 4));
edac_dbg(0, "addr_decode: 0x%x\n", addr_decode);
for (j = 0; j < IE31200_DIMMS_PER_CHANNEL; j++) {
populate_dimm_info(&dimm_info[i][j], addr_decode, j,
skl);
edac_dbg(0, "size: 0x%x, rank: %d, width: %d\n",
dimm_info[i][j].size,
dimm_info[i][j].dual_rank,
dimm_info[i][j].x16_width);
}
}
priv->c0errlog = window + cfg->reg_eccerrlog_offset[0];
priv->c1errlog = window + cfg->reg_eccerrlog_offset[1];
priv->cfg = cfg;
priv->mci = mci;
priv->pdev = pdev;
device_initialize(&priv->dev);
/*
* The dram rank boundary (DRB) reg values are boundary addresses
* for each DRAM rank with a granularity of 64MB. DRB regs are
* cumulative; the last one will contain the total memory
* contained in all ranks.
* The EDAC core uses mci->pdev (pointer to the structure device)
* as the memory controller ID. The SoCs attach one or more memory
* controllers to a single pci_dev (a single pci_dev->dev can
* correspond to multiple memory controllers).
*
* To make mci->pdev unique, assign pci_dev->dev to mci->pdev
* for the first memory controller and assign a unique priv->dev
* to mci->pdev for each additional memory controller.
*/
for (i = 0; i < IE31200_DIMMS_PER_CHANNEL; i++) {
for (j = 0; j < IE31200_CHANNELS; j++) {
struct dimm_info *dimm;
unsigned long nr_pages;
nr_pages = IE31200_PAGES(dimm_info[j][i].size, skl);
if (nr_pages == 0)
continue;
if (dimm_info[j][i].dual_rank) {
nr_pages = nr_pages / 2;
dimm = edac_get_dimm(mci, (i * 2) + 1, j, 0);
dimm->nr_pages = nr_pages;
edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages);
dimm->grain = 8; /* just a guess */
if (skl)
dimm->mtype = MEM_DDR4;
else
dimm->mtype = MEM_DDR3;
dimm->dtype = DEV_UNKNOWN;
dimm->edac_mode = EDAC_UNKNOWN;
}
dimm = edac_get_dimm(mci, i * 2, j, 0);
dimm->nr_pages = nr_pages;
edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages);
dimm->grain = 8; /* same guess */
if (skl)
dimm->mtype = MEM_DDR4;
else
dimm->mtype = MEM_DDR3;
dimm->dtype = DEV_UNKNOWN;
dimm->edac_mode = EDAC_UNKNOWN;
}
}
mci->pdev = mc ? &priv->dev : &pdev->dev;
ie31200_get_dimm_config(mci, window, cfg, mc);
ie31200_clear_error_info(mci);
if (edac_mc_add_mc(mci)) {
@ -534,16 +499,115 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx)
goto fail_unmap;
}
/* get this far and it's successful */
ie31200_pvt.priv[mc] = priv;
return 0;
fail_unmap:
iounmap(window);
fail_free:
edac_mc_free(mci);
return ret;
}
static void mce_check(struct mce *mce)
{
struct ie31200_priv *priv;
int i;
for (i = 0; i < IE31200_IMC_NUM; i++) {
priv = ie31200_pvt.priv[i];
if (!priv)
continue;
__ie31200_check(priv->mci, mce);
}
}
static int mce_handler(struct notifier_block *nb, unsigned long val, void *data)
{
struct mce *mce = (struct mce *)data;
char *type;
if (mce->kflags & MCE_HANDLED_CEC)
return NOTIFY_DONE;
/*
* Ignore unless this is a memory related error.
* Don't check MCI_STATUS_ADDRV since it's not set on some CPUs.
*/
if ((mce->status & 0xefff) >> 7 != 1)
return NOTIFY_DONE;
type = mce->mcgstatus & MCG_STATUS_MCIP ? "Exception" : "Event";
edac_dbg(0, "CPU %d: Machine Check %s: 0x%llx Bank %d: 0x%llx\n",
mce->extcpu, type, mce->mcgstatus,
mce->bank, mce->status);
edac_dbg(0, "TSC 0x%llx\n", mce->tsc);
edac_dbg(0, "ADDR 0x%llx\n", mce->addr);
edac_dbg(0, "MISC 0x%llx\n", mce->misc);
edac_dbg(0, "PROCESSOR %u:0x%x TIME %llu SOCKET %u APIC 0x%x\n",
mce->cpuvendor, mce->cpuid, mce->time,
mce->socketid, mce->apicid);
mce_check(mce);
mce->kflags |= MCE_HANDLED_EDAC;
return NOTIFY_DONE;
}
static struct notifier_block ie31200_mce_dec = {
.notifier_call = mce_handler,
.priority = MCE_PRIO_EDAC,
};
static void ie31200_unregister_mcis(void)
{
struct ie31200_priv *priv;
struct mem_ctl_info *mci;
int i;
for (i = 0; i < IE31200_IMC_NUM; i++) {
priv = ie31200_pvt.priv[i];
if (!priv)
continue;
mci = priv->mci;
edac_mc_del_mc(mci->pdev);
iounmap(priv->window);
edac_mc_free(mci);
}
}
static int ie31200_probe1(struct pci_dev *pdev, struct res_config *cfg)
{
int i, ret;
edac_dbg(0, "MC:\n");
if (!ecc_capable(pdev)) {
ie31200_printk(KERN_INFO, "No ECC support\n");
return -ENODEV;
}
for (i = 0; i < cfg->imc_num; i++) {
ret = ie31200_register_mci(pdev, cfg, i);
if (ret)
goto fail_register;
}
if (cfg->cmci) {
mce_register_decode_chain(&ie31200_mce_dec);
edac_op_state = EDAC_OPSTATE_INT;
} else {
edac_op_state = EDAC_OPSTATE_POLL;
}
/* get this far and it's successful. */
edac_dbg(3, "MC: success\n");
return 0;
fail_unmap:
iounmap(window);
fail_free:
edac_mc_free(mci);
fail_register:
ie31200_unregister_mcis();
return ret;
}
@ -555,7 +619,7 @@ static int ie31200_init_one(struct pci_dev *pdev,
edac_dbg(0, "MC:\n");
if (pci_enable_device(pdev) < 0)
return -EIO;
rc = ie31200_probe1(pdev, ent->driver_data);
rc = ie31200_probe1(pdev, (struct res_config *)ent->driver_data);
if (rc == 0 && !mci_pdev)
mci_pdev = pci_dev_get(pdev);
@ -564,43 +628,112 @@ static int ie31200_init_one(struct pci_dev *pdev,
static void ie31200_remove_one(struct pci_dev *pdev)
{
struct mem_ctl_info *mci;
struct ie31200_priv *priv;
struct ie31200_priv *priv = ie31200_pvt.priv[0];
edac_dbg(0, "\n");
pci_dev_put(mci_pdev);
mci_pdev = NULL;
mci = edac_mc_del_mc(&pdev->dev);
if (!mci)
return;
priv = mci->pvt_info;
iounmap(priv->window);
edac_mc_free(mci);
if (priv->cfg->cmci)
mce_unregister_decode_chain(&ie31200_mce_dec);
ie31200_unregister_mcis();
}
static struct res_config snb_cfg = {
.mtype = MEM_DDR3,
.imc_num = 1,
.reg_mchbar_mask = GENMASK_ULL(38, 15),
.reg_mchbar_window_size = BIT_ULL(15),
.reg_eccerrlog_offset[0] = 0x40c8,
.reg_eccerrlog_offset[1] = 0x44c8,
.reg_eccerrlog_ce_mask = BIT_ULL(0),
.reg_eccerrlog_ue_mask = BIT_ULL(1),
.reg_eccerrlog_rank_mask = GENMASK_ULL(28, 27),
.reg_eccerrlog_syndrome_mask = GENMASK_ULL(23, 16),
.reg_mad_dimm_size_granularity = BIT_ULL(28),
.reg_mad_dimm_offset[0] = 0x5004,
.reg_mad_dimm_offset[1] = 0x5008,
.reg_mad_dimm_size_mask[0] = GENMASK(7, 0),
.reg_mad_dimm_size_mask[1] = GENMASK(15, 8),
.reg_mad_dimm_rank_mask[0] = BIT(17),
.reg_mad_dimm_rank_mask[1] = BIT(18),
.reg_mad_dimm_width_mask[0] = BIT(19),
.reg_mad_dimm_width_mask[1] = BIT(20),
};
static struct res_config skl_cfg = {
.mtype = MEM_DDR4,
.imc_num = 1,
.reg_mchbar_mask = GENMASK_ULL(38, 15),
.reg_mchbar_window_size = BIT_ULL(15),
.reg_eccerrlog_offset[0] = 0x4048,
.reg_eccerrlog_offset[1] = 0x4448,
.reg_eccerrlog_ce_mask = BIT_ULL(0),
.reg_eccerrlog_ue_mask = BIT_ULL(1),
.reg_eccerrlog_rank_mask = GENMASK_ULL(28, 27),
.reg_eccerrlog_syndrome_mask = GENMASK_ULL(23, 16),
.reg_mad_dimm_size_granularity = BIT_ULL(30),
.reg_mad_dimm_offset[0] = 0x500c,
.reg_mad_dimm_offset[1] = 0x5010,
.reg_mad_dimm_size_mask[0] = GENMASK(5, 0),
.reg_mad_dimm_size_mask[1] = GENMASK(21, 16),
.reg_mad_dimm_rank_mask[0] = BIT(10),
.reg_mad_dimm_rank_mask[1] = BIT(26),
.reg_mad_dimm_width_mask[0] = GENMASK(9, 8),
.reg_mad_dimm_width_mask[1] = GENMASK(25, 24),
};
struct res_config rpl_s_cfg = {
.mtype = MEM_DDR5,
.cmci = true,
.imc_num = 2,
.reg_mchbar_mask = GENMASK_ULL(41, 17),
.reg_mchbar_window_size = BIT_ULL(16),
.reg_eccerrlog_offset[0] = 0xe048,
.reg_eccerrlog_offset[1] = 0xe848,
.reg_eccerrlog_ce_mask = BIT_ULL(0),
.reg_eccerrlog_ce_ovfl_mask = BIT_ULL(1),
.reg_eccerrlog_ue_mask = BIT_ULL(2),
.reg_eccerrlog_ue_ovfl_mask = BIT_ULL(3),
.reg_eccerrlog_rank_mask = GENMASK_ULL(28, 27),
.reg_eccerrlog_syndrome_mask = GENMASK_ULL(23, 16),
.msr_clear_eccerrlog_offset = 0x791,
.reg_mad_dimm_offset[0] = 0xd80c,
.reg_mad_dimm_offset[1] = 0xd810,
.reg_mad_dimm_size_granularity = BIT_ULL(29),
.reg_mad_dimm_size_mask[0] = GENMASK(6, 0),
.reg_mad_dimm_size_mask[1] = GENMASK(22, 16),
.reg_mad_dimm_rank_mask[0] = GENMASK(10, 9),
.reg_mad_dimm_rank_mask[1] = GENMASK(27, 26),
.reg_mad_dimm_width_mask[0] = GENMASK(8, 7),
.reg_mad_dimm_width_mask[1] = GENMASK(25, 24),
};
static const struct pci_device_id ie31200_pci_tbl[] = {
{ PCI_VEND_DEV(INTEL, IE31200_HB_1), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
{ PCI_VEND_DEV(INTEL, IE31200_HB_2), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
{ PCI_VEND_DEV(INTEL, IE31200_HB_3), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
{ PCI_VEND_DEV(INTEL, IE31200_HB_4), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
{ PCI_VEND_DEV(INTEL, IE31200_HB_5), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
{ PCI_VEND_DEV(INTEL, IE31200_HB_6), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
{ PCI_VEND_DEV(INTEL, IE31200_HB_7), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
{ PCI_VEND_DEV(INTEL, IE31200_HB_8), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
{ PCI_VEND_DEV(INTEL, IE31200_HB_9), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
{ PCI_VEND_DEV(INTEL, IE31200_HB_10), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
{ PCI_VEND_DEV(INTEL, IE31200_HB_11), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
{ PCI_VEND_DEV(INTEL, IE31200_HB_12), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
{ PCI_VEND_DEV(INTEL, IE31200_HB_CFL_1), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
{ PCI_VEND_DEV(INTEL, IE31200_HB_CFL_2), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
{ PCI_VEND_DEV(INTEL, IE31200_HB_CFL_3), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
{ PCI_VEND_DEV(INTEL, IE31200_HB_CFL_4), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
{ PCI_VEND_DEV(INTEL, IE31200_HB_CFL_5), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
{ PCI_VEND_DEV(INTEL, IE31200_HB_CFL_6), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
{ PCI_VEND_DEV(INTEL, IE31200_HB_CFL_7), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
{ PCI_VEND_DEV(INTEL, IE31200_HB_CFL_8), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
{ PCI_VEND_DEV(INTEL, IE31200_HB_CFL_9), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
{ PCI_VEND_DEV(INTEL, IE31200_HB_CFL_10), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_1), (kernel_ulong_t)&snb_cfg },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_2), (kernel_ulong_t)&snb_cfg },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_3), (kernel_ulong_t)&snb_cfg },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_4), (kernel_ulong_t)&snb_cfg },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_5), (kernel_ulong_t)&snb_cfg },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_6), (kernel_ulong_t)&snb_cfg },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_7), (kernel_ulong_t)&snb_cfg },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_8), (kernel_ulong_t)&skl_cfg },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_9), (kernel_ulong_t)&skl_cfg },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_10), (kernel_ulong_t)&skl_cfg },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_11), (kernel_ulong_t)&skl_cfg },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_12), (kernel_ulong_t)&skl_cfg },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_1), (kernel_ulong_t)&skl_cfg },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_2), (kernel_ulong_t)&skl_cfg },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_3), (kernel_ulong_t)&skl_cfg },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_4), (kernel_ulong_t)&skl_cfg },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_5), (kernel_ulong_t)&skl_cfg },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_6), (kernel_ulong_t)&skl_cfg },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_7), (kernel_ulong_t)&skl_cfg },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_8), (kernel_ulong_t)&skl_cfg },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_9), (kernel_ulong_t)&skl_cfg },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_10), (kernel_ulong_t)&skl_cfg },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_1), (kernel_ulong_t)&rpl_s_cfg},
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_2), (kernel_ulong_t)&rpl_s_cfg},
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_3), (kernel_ulong_t)&rpl_s_cfg},
{ 0, } /* 0 terminated list. */
};
MODULE_DEVICE_TABLE(pci, ie31200_pci_tbl);
@ -617,12 +750,10 @@ static int __init ie31200_init(void)
int pci_rc, i;
edac_dbg(3, "MC:\n");
/* Ensure that the OPSTATE is set correctly for POLL or NMI */
opstate_init();
pci_rc = pci_register_driver(&ie31200_driver);
if (pci_rc < 0)
goto fail0;
return pci_rc;
if (!mci_pdev) {
ie31200_registered = 0;
@ -633,11 +764,13 @@ static int __init ie31200_init(void)
if (mci_pdev)
break;
}
if (!mci_pdev) {
edac_dbg(0, "ie31200 pci_get_device fail\n");
pci_rc = -ENODEV;
goto fail1;
goto fail0;
}
pci_rc = ie31200_init_one(mci_pdev, &ie31200_pci_tbl[i]);
if (pci_rc < 0) {
edac_dbg(0, "ie31200 init fail\n");
@ -645,12 +778,12 @@ static int __init ie31200_init(void)
goto fail1;
}
}
return 0;
return 0;
fail1:
pci_unregister_driver(&ie31200_driver);
fail0:
pci_dev_put(mci_pdev);
fail0:
pci_unregister_driver(&ie31200_driver);
return pci_rc;
}

View File

@ -125,7 +125,7 @@
#define MEM_SLICE_HASH_MASK(v) (GET_BITFIELD(v, 6, 19) << 6)
#define MEM_SLICE_HASH_LSB_MASK_BIT(v) GET_BITFIELD(v, 24, 26)
static struct res_config {
static const struct res_config {
bool machine_check;
int num_imc;
u32 imc_base;
@ -472,7 +472,7 @@ static u64 rpl_p_err_addr(u64 ecclog)
return ECC_ERROR_LOG_ADDR45(ecclog);
}
static struct res_config ehl_cfg = {
static const struct res_config ehl_cfg = {
.num_imc = 1,
.imc_base = 0x5000,
.ibecc_base = 0xdc00,
@ -482,7 +482,7 @@ static struct res_config ehl_cfg = {
.err_addr_to_imc_addr = ehl_err_addr_to_imc_addr,
};
static struct res_config icl_cfg = {
static const struct res_config icl_cfg = {
.num_imc = 1,
.imc_base = 0x5000,
.ibecc_base = 0xd800,
@ -492,7 +492,7 @@ static struct res_config icl_cfg = {
.err_addr_to_imc_addr = ehl_err_addr_to_imc_addr,
};
static struct res_config tgl_cfg = {
static const struct res_config tgl_cfg = {
.machine_check = true,
.num_imc = 2,
.imc_base = 0x5000,
@ -506,7 +506,7 @@ static struct res_config tgl_cfg = {
.err_addr_to_imc_addr = tgl_err_addr_to_imc_addr,
};
static struct res_config adl_cfg = {
static const struct res_config adl_cfg = {
.machine_check = true,
.num_imc = 2,
.imc_base = 0xd800,
@ -517,7 +517,7 @@ static struct res_config adl_cfg = {
.err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
};
static struct res_config adl_n_cfg = {
static const struct res_config adl_n_cfg = {
.machine_check = true,
.num_imc = 1,
.imc_base = 0xd800,
@ -528,7 +528,7 @@ static struct res_config adl_n_cfg = {
.err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
};
static struct res_config rpl_p_cfg = {
static const struct res_config rpl_p_cfg = {
.machine_check = true,
.num_imc = 2,
.imc_base = 0xd800,
@ -540,7 +540,7 @@ static struct res_config rpl_p_cfg = {
.err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
};
static struct res_config mtl_ps_cfg = {
static const struct res_config mtl_ps_cfg = {
.machine_check = true,
.num_imc = 2,
.imc_base = 0xd800,
@ -551,7 +551,7 @@ static struct res_config mtl_ps_cfg = {
.err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
};
static struct res_config mtl_p_cfg = {
static const struct res_config mtl_p_cfg = {
.machine_check = true,
.num_imc = 2,
.imc_base = 0xd800,
@ -785,13 +785,22 @@ static u64 ecclog_read_and_clear(struct igen6_imc *imc)
{
u64 ecclog = readq(imc->window + ECC_ERROR_LOG_OFFSET);
if (ecclog & (ECC_ERROR_LOG_CE | ECC_ERROR_LOG_UE)) {
/* Clear CE/UE bits by writing 1s */
writeq(ecclog, imc->window + ECC_ERROR_LOG_OFFSET);
return ecclog;
}
/*
* Quirk: The ECC_ERROR_LOG register of certain SoCs may contain
* the invalid value ~0. This will result in a flood of invalid
* error reports in polling mode. Skip it.
*/
if (ecclog == ~0)
return 0;
return 0;
/* Neither a CE nor a UE. Skip it.*/
if (!(ecclog & (ECC_ERROR_LOG_CE | ECC_ERROR_LOG_UE)))
return 0;
/* Clear CE/UE bits by writing 1s */
writeq(ecclog, imc->window + ECC_ERROR_LOG_OFFSET);
return ecclog;
}
static void errsts_clear(struct igen6_imc *imc)
@ -1374,7 +1383,7 @@ static void unregister_err_handler(void)
unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME);
}
static void opstate_set(struct res_config *cfg, const struct pci_device_id *ent)
static void opstate_set(const struct res_config *cfg, const struct pci_device_id *ent)
{
/*
* Quirk: Certain SoCs' error reporting interrupts don't work.

View File

@ -372,7 +372,7 @@ static int gen_asym_mask(struct b_cr_slice_channel_hash *p,
struct b_cr_asym_mem_region1_mchbar *as1,
struct b_cr_asym_2way_mem_region_mchbar *as2way)
{
const int intlv[] = { 0x5, 0xA, 0x3, 0xC };
static const int intlv[] = { 0x5, 0xA, 0x3, 0xC };
int mask = 0;
if (as2way->asym_2way_interleave_enable)
@ -489,7 +489,7 @@ static int dnv_get_registers(void)
*/
static int get_registers(void)
{
const int intlv[] = { 10, 11, 12, 12 };
static const int intlv[] = { 10, 11, 12, 12 };
if (RD_REG(&tolud, b_cr_tolud_pci) ||
RD_REG(&touud_lo, b_cr_touud_lo_pci) ||

View File

@ -121,6 +121,35 @@ void skx_adxl_put(void)
}
EXPORT_SYMBOL_GPL(skx_adxl_put);
static void skx_init_mc_mapping(struct skx_dev *d)
{
/*
* By default, the BIOS presents all memory controllers within each
* socket to the EDAC driver. The physical indices are the same as
* the logical indices of the memory controllers enumerated by the
* EDAC driver.
*/
for (int i = 0; i < NUM_IMC; i++)
d->mc_mapping[i] = i;
}
void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc)
{
edac_dbg(0, "Set the mapping of mc phy idx to logical idx: %02d -> %02d\n",
pmc, lmc);
d->mc_mapping[pmc] = lmc;
}
EXPORT_SYMBOL_GPL(skx_set_mc_mapping);
static u8 skx_get_mc_mapping(struct skx_dev *d, u8 pmc)
{
edac_dbg(0, "Get the mapping of mc phy idx to logical idx: %02d -> %02d\n",
pmc, d->mc_mapping[pmc]);
return d->mc_mapping[pmc];
}
static bool skx_adxl_decode(struct decoded_addr *res, enum error_source err_src)
{
struct skx_dev *d;
@ -188,6 +217,8 @@ static bool skx_adxl_decode(struct decoded_addr *res, enum error_source err_src)
return false;
}
res->imc = skx_get_mc_mapping(d, res->imc);
for (i = 0; i < adxl_component_count; i++) {
if (adxl_values[i] == ~0x0ull)
continue;
@ -326,6 +357,8 @@ int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list)
d->bus[0], d->bus[1], d->bus[2], d->bus[3]);
list_add_tail(&d->list, &dev_edac_list);
prev = pdev;
skx_init_mc_mapping(d);
}
if (list)

View File

@ -93,6 +93,16 @@ struct skx_dev {
struct pci_dev *uracu; /* for i10nm CPU */
struct pci_dev *pcu_cr3; /* for HBM memory detection */
u32 mcroute;
/*
* Some server BIOS may hide certain memory controllers, and the
* EDAC driver skips those hidden memory controllers. However, the
* ADXL still decodes memory error address using physical memory
* controller indices. The mapping table is used to convert the
* physical indices (reported by ADXL) to the logical indices
* (used the EDAC driver) of present memory controllers during the
* error handling process.
*/
u8 mc_mapping[NUM_IMC];
struct skx_imc {
struct mem_ctl_info *mci;
struct pci_dev *mdev; /* for i10nm CPU */
@ -242,6 +252,7 @@ void skx_adxl_put(void);
void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log);
void skx_set_mem_cfg(bool mem_cfg_2lm);
void skx_set_res_cfg(struct res_config *cfg);
void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc);
int skx_get_src_id(struct skx_dev *d, int off, u8 *id);

View File

@ -15,6 +15,7 @@
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/regmap.h>
#include <linux/string_choices.h>
#include "edac_module.h"
@ -1407,7 +1408,7 @@ static void xgene_edac_iob_gic_report(struct edac_device_ctl_info *edac_dev)
dev_err(edac_dev->dev, "Multiple XGIC write size error\n");
info = readl(ctx->dev_csr + XGICTRANSERRREQINFO);
dev_err(edac_dev->dev, "XGIC %s access @ 0x%08X (0x%08X)\n",
info & REQTYPE_MASK ? "read" : "write", ERRADDR_RD(info),
str_read_write(info & REQTYPE_MASK), ERRADDR_RD(info),
info);
writel(reg, ctx->dev_csr + XGICTRANSERRINTSTS);
@ -1489,19 +1490,19 @@ static void xgene_edac_rb_report(struct edac_device_ctl_info *edac_dev)
if (reg & AGENT_OFFLINE_ERR_MASK)
dev_err(edac_dev->dev,
"IOB bus %s access to offline agent error\n",
write ? "write" : "read");
str_write_read(write));
if (reg & UNIMPL_RBPAGE_ERR_MASK)
dev_err(edac_dev->dev,
"IOB bus %s access to unimplemented page error\n",
write ? "write" : "read");
str_write_read(write));
if (reg & WORD_ALIGNED_ERR_MASK)
dev_err(edac_dev->dev,
"IOB bus %s word aligned access error\n",
write ? "write" : "read");
str_write_read(write));
if (reg & PAGE_ACCESS_ERR_MASK)
dev_err(edac_dev->dev,
"IOB bus %s to page out of range access error\n",
write ? "write" : "read");
str_write_read(write));
if (regmap_write(ctx->edac->rb_map, RBEIR, 0))
return;
if (regmap_write(ctx->edac->rb_map, RBCSR, 0))
@ -1560,7 +1561,7 @@ rb_skip:
err_addr_lo = readl(ctx->dev_csr + IOBBATRANSERRREQINFOL);
err_addr_hi = readl(ctx->dev_csr + IOBBATRANSERRREQINFOH);
dev_err(edac_dev->dev, "IOB BA %s access at 0x%02X.%08X (0x%08X)\n",
REQTYPE_F2_RD(err_addr_hi) ? "read" : "write",
str_read_write(REQTYPE_F2_RD(err_addr_hi)),
ERRADDRH_F2_RD(err_addr_hi), err_addr_lo, err_addr_hi);
if (reg & WRERR_RESP_MASK)
dev_err(edac_dev->dev, "IOB BA requestor ID 0x%08X\n",
@ -1611,7 +1612,7 @@ chk_iob_axi0:
dev_err(edac_dev->dev,
"%sAXI slave 0 illegal %s access @ 0x%02X.%08X (0x%08X)\n",
reg & IOBAXIS0_M_ILLEGAL_ACCESS_MASK ? "Multiple " : "",
REQTYPE_RD(err_addr_hi) ? "read" : "write",
str_read_write(REQTYPE_RD(err_addr_hi)),
ERRADDRH_RD(err_addr_hi), err_addr_lo, err_addr_hi);
writel(reg, ctx->dev_csr + IOBAXIS0TRANSERRINTSTS);
@ -1625,7 +1626,7 @@ chk_iob_axi1:
dev_err(edac_dev->dev,
"%sAXI slave 1 illegal %s access @ 0x%02X.%08X (0x%08X)\n",
reg & IOBAXIS0_M_ILLEGAL_ACCESS_MASK ? "Multiple " : "",
REQTYPE_RD(err_addr_hi) ? "read" : "write",
str_read_write(REQTYPE_RD(err_addr_hi)),
ERRADDRH_RD(err_addr_hi), err_addr_lo, err_addr_hi);
writel(reg, ctx->dev_csr + IOBAXIS1TRANSERRINTSTS);
}