x86/mce/AMD: Read MSRs on the CPU allocating the threshold blocks
authorYazen Ghannam <Yazen.Ghannam@amd.com>
Mon, 12 Sep 2016 07:59:31 +0000 (09:59 +0200)
committerThomas Gleixner <tglx@linutronix.de>
Tue, 13 Sep 2016 13:23:08 +0000 (15:23 +0200)
Scalable MCA systems allow non-core MCA banks to only be accessible by
certain CPUs. The MSRs for these banks are Read-as-Zero on other CPUs.

During allocate_threshold_blocks(), get_block_address() can be scheduled
on CPUs other than the one allocating the block. This causes the MSRs to
be read on the wrong CPU and results in incorrect behavior.

Add a @cpu parameter to get_block_address() and pass this in to ensure
that the MSRs are only read on the CPU that is allocating the block.

Signed-off-by: Yazen Ghannam <Yazen.Ghannam@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: http://lkml.kernel.org/r/1472673994-12235-2-git-send-email-Yazen.Ghannam@amd.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
arch/x86/kernel/cpu/mcheck/mce_amd.c

index 419e0ee3b12f4cedf0edf18420bb01c3049f8daa..9da92fb2e07378291c266c441ada28ce2e89143c 100644 (file)
@@ -293,7 +293,7 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
        wrmsr(MSR_CU_DEF_ERR, low, high);
 }
 
-static u32 get_block_address(u32 current_addr, u32 low, u32 high,
+static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 high,
                             unsigned int bank, unsigned int block)
 {
        u32 addr = 0, offset = 0;
@@ -309,13 +309,13 @@ static u32 get_block_address(u32 current_addr, u32 low, u32 high,
                         */
                        u32 low, high;
 
-                       if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
+                       if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
                                return addr;
 
                        if (!(low & MCI_CONFIG_MCAX))
                                return addr;
 
-                       if (!rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
+                       if (!rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
                            (low & MASK_BLKPTR_LO))
                                addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
                }
@@ -421,12 +421,12 @@ out:
 void mce_amd_feature_init(struct cpuinfo_x86 *c)
 {
        u32 low = 0, high = 0, address = 0;
-       unsigned int bank, block;
+       unsigned int bank, block, cpu = smp_processor_id();
        int offset = -1;
 
        for (bank = 0; bank < mca_cfg.banks; ++bank) {
                for (block = 0; block < NR_BLOCKS; ++block) {
-                       address = get_block_address(address, low, high, bank, block);
+                       address = get_block_address(cpu, address, low, high, bank, block);
                        if (!address)
                                break;
 
@@ -544,15 +544,14 @@ static void amd_deferred_error_interrupt(void)
 static void amd_threshold_interrupt(void)
 {
        u32 low = 0, high = 0, address = 0;
-       int cpu = smp_processor_id();
-       unsigned int bank, block;
+       unsigned int bank, block, cpu = smp_processor_id();
 
        /* assume first bank caused it */
        for (bank = 0; bank < mca_cfg.banks; ++bank) {
                if (!(per_cpu(bank_map, cpu) & (1 << bank)))
                        continue;
                for (block = 0; block < NR_BLOCKS; ++block) {
-                       address = get_block_address(address, low, high, bank, block);
+                       address = get_block_address(cpu, address, low, high, bank, block);
                        if (!address)
                                break;
 
@@ -774,7 +773,7 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
        if (err)
                goto out_free;
 recurse:
-       address = get_block_address(address, low, high, bank, ++block);
+       address = get_block_address(cpu, address, low, high, bank, ++block);
        if (!address)
                return 0;