[POWERPC] kdump: Reserve the existing TCE mappings left by the first kernel
authorHaren Myneni <haren@us.ibm.com>
Fri, 23 Jun 2006 06:35:10 +0000 (23:35 -0700)
committerPaul Mackerras <paulus@samba.org>
Wed, 28 Jun 2006 01:59:46 +0000 (11:59 +1000)
During kdump boot, noticed some machines checkstop on dma protection
fault for ongoing DMA left in the first kernel. Instead of initializing
TCE entries in iommu_init() for the kdump boot, this patch fixes this
issue by walking through the each TCE table and checks whether the
entries are in use by the first kernel. If so, reserve those entries by
setting the corresponding bit in tbl->it_map such that these entries
will not be available for the kdump boot.

However it could be possible that all TCE entries might be used up due
to the driver bug that does continuous mapping. My observation is around
1700 TCE  entries are used on some systems (Ex: P4) at some point of
time during kdump boot and saving dump (either write into the disk or
sending to remote machine). Hence, this patch will make sure that
minimum of 2048 entries will be available such that kdump boot could be
successful in some cases.

Signed-off-by: Haren Myneni <haren@us.ibm.com>
Acked-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Paul Mackerras <paulus@samba.org>
arch/powerpc/kernel/iommu.c
arch/powerpc/platforms/pseries/iommu.c
include/asm-powerpc/kdump.h
include/asm-powerpc/machdep.h

index 7cb77c20fc5d0a5c979eade8dbe1ea86ecfa4bad..3d677ac996592d1cb4c15462499c41b6e0e1ecf0 100644 (file)
@@ -38,6 +38,7 @@
 #include <asm/iommu.h>
 #include <asm/pci-bridge.h>
 #include <asm/machdep.h>
+#include <asm/kdump.h>
 
 #define DBG(...)
 
@@ -440,8 +441,37 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
        tbl->it_largehint = tbl->it_halfpoint;
        spin_lock_init(&tbl->it_lock);
 
+#ifdef CONFIG_CRASH_DUMP
+       if (ppc_md.tce_get) {
+               unsigned long index, tceval;
+               unsigned long tcecount = 0;
+
+               /*
+                * Reserve the existing mappings left by the first kernel.
+                */
+               for (index = 0; index < tbl->it_size; index++) {
+                       tceval = ppc_md.tce_get(tbl, index + tbl->it_offset);
+                       /*
+                        * Freed TCE entry contains 0x7fffffffffffffff on JS20
+                        */
+                       if (tceval && (tceval != 0x7fffffffffffffffUL)) {
+                               __set_bit(index, tbl->it_map);
+                               tcecount++;
+                       }
+               }
+               if ((tbl->it_size - tcecount) < KDUMP_MIN_TCE_ENTRIES) {
+                       printk(KERN_WARNING "TCE table is full; ");
+                       printk(KERN_WARNING "freeing %d entries for the kdump boot\n",
+                               KDUMP_MIN_TCE_ENTRIES);
+                       for (index = tbl->it_size - KDUMP_MIN_TCE_ENTRIES;
+                               index < tbl->it_size; index++)
+                               __clear_bit(index, tbl->it_map);
+               }
+       }
+#else
        /* Clear the hardware table in case firmware left allocations in it */
        ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size);
+#endif
 
        if (!welcomed) {
                printk(KERN_INFO "IOMMU table initialized, virtual merging %s\n",
index d03a8b078f9db09ffa72cffbd01cba8aa4cc8413..8cfb5706790ecd395bd7039463a435a6a0574405 100644 (file)
@@ -92,6 +92,15 @@ static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages)
                *(tcep++) = 0;
 }
 
+static unsigned long tce_get_pseries(struct iommu_table *tbl, long index)
+{
+       u64 *tcep;
+
+       index <<= TCE_PAGE_FACTOR;
+       tcep = ((u64 *)tbl->it_base) + index;
+
+       return *tcep;
+}
 
 static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
                                long npages, unsigned long uaddr,
@@ -235,6 +244,25 @@ static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long n
        }
 }
 
+static unsigned long tce_get_pSeriesLP(struct iommu_table *tbl, long tcenum)
+{
+       u64 rc;
+       unsigned long tce_ret;
+
+       tcenum <<= TCE_PAGE_FACTOR;
+       rc = plpar_tce_get((u64)tbl->it_index, (u64)tcenum << 12, &tce_ret);
+
+       if (rc && printk_ratelimit()) {
+               printk("tce_get_pSeriesLP: plpar_tce_get failed. rc=%ld\n",
+                       rc);
+               printk("\tindex   = 0x%lx\n", (u64)tbl->it_index);
+               printk("\ttcenum  = 0x%lx\n", (u64)tcenum);
+               show_stack(current, (unsigned long *)__get_SP());
+       }
+
+       return tce_ret;
+}
+
 static void iommu_table_setparms(struct pci_controller *phb,
                                 struct device_node *dn,
                                 struct iommu_table *tbl)
@@ -254,7 +282,10 @@ static void iommu_table_setparms(struct pci_controller *phb,
        }
 
        tbl->it_base = (unsigned long)__va(*basep);
+
+#ifndef CONFIG_CRASH_DUMP
        memset((void *)tbl->it_base, 0, *sizep);
+#endif
 
        tbl->it_busno = phb->bus->number;
 
@@ -560,11 +591,13 @@ void iommu_init_early_pSeries(void)
                        ppc_md.tce_build = tce_build_pSeriesLP;
                        ppc_md.tce_free  = tce_free_pSeriesLP;
                }
+               ppc_md.tce_get   = tce_get_pSeriesLP;
                ppc_md.iommu_bus_setup = iommu_bus_setup_pSeriesLP;
                ppc_md.iommu_dev_setup = iommu_dev_setup_pSeriesLP;
        } else {
                ppc_md.tce_build = tce_build_pSeries;
                ppc_md.tce_free  = tce_free_pSeries;
+               ppc_md.tce_get   = tce_get_pseries;
                ppc_md.iommu_bus_setup = iommu_bus_setup_pSeries;
                ppc_md.iommu_dev_setup = iommu_dev_setup_pSeries;
        }
index 5a5c3b5ab1e0236686975c0abb9d94bb9a0ef9fd..dc1574c945f8fb8a605413aa8b5b384ef2bba19a 100644 (file)
@@ -15,6 +15,8 @@
 #define KDUMP_TRAMPOLINE_START 0x0100
 #define KDUMP_TRAMPOLINE_END   0x3000
 
+#define KDUMP_MIN_TCE_ENTRIES  2048
+
 #else /* !CONFIG_CRASH_DUMP */
 
 #define PHYSICAL_START 0x0
index 73db1f71329db02702e323408338b272a3cd6cc5..eba133d149a793d3e7c2b05b25bb4036920a6a5b 100644 (file)
@@ -81,6 +81,8 @@ struct machdep_calls {
        void            (*tce_free)(struct iommu_table *tbl,
                                    long index,
                                    long npages);
+       unsigned long   (*tce_get)(struct iommu_table *tbl,
+                                   long index);
        void            (*tce_flush)(struct iommu_table *tbl);
        void            (*iommu_dev_setup)(struct pci_dev *dev);
        void            (*iommu_bus_setup)(struct pci_bus *bus);