[SPARC64]: First cut at SUN4V PCI IOMMU handling.
authorDavid S. Miller <davem@sunset.davemloft.net>
Fri, 10 Feb 2006 08:08:26 +0000 (00:08 -0800)
committerDavid S. Miller <davem@sunset.davemloft.net>
Mon, 20 Mar 2006 09:12:15 +0000 (01:12 -0800)
Signed-off-by: David S. Miller <davem@davemloft.net>
arch/sparc64/kernel/pci_iommu.c
arch/sparc64/kernel/pci_sun4v.c
arch/sparc64/kernel/pci_sun4v.h
arch/sparc64/kernel/pci_sun4v_asm.S

index 8e52232f6f318c9b8ecbead46e3fb9c22907ca79..c9320eac45d39710396d5213444194dd397ade40 100644 (file)
@@ -562,9 +562,9 @@ static int pci_4u_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int n
        /* Fast path single entry scatterlists. */
        if (nelems == 1) {
                sglist->dma_address =
-                       pci_map_single(pdev,
-                                      (page_address(sglist->page) + sglist->offset),
-                                      sglist->length, direction);
+                       pci_4u_map_single(pdev,
+                                         (page_address(sglist->page) + sglist->offset),
+                                         sglist->length, direction);
                if (unlikely(sglist->dma_address == PCI_DMA_ERROR_CODE))
                        return 0;
                sglist->dma_length = sglist->length;
index abd9bfb245cbc38a3b82e64ac9bc07b5be0ae194..3f0e3c09f4d3d910128a679555fa17978de6ad33 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/interrupt.h>
+#include <linux/percpu.h>
 
 #include <asm/pbm.h>
 #include <asm/iommu.h>
 
 #include "pci_sun4v.h"
 
+#define PGLIST_NENTS   2048
+
+struct sun4v_pglist {
+       u64     pglist[PGLIST_NENTS];
+};
+
+static DEFINE_PER_CPU(struct sun4v_pglist, iommu_pglists);
+
+static long pci_arena_alloc(struct pci_iommu_arena *arena, unsigned long npages)
+{
+       unsigned long n, i, start, end, limit;
+       int pass;
+
+       limit = arena->limit;
+       start = arena->hint;
+       pass = 0;
+
+again:
+       n = find_next_zero_bit(arena->map, limit, start);
+       end = n + npages;
+       if (unlikely(end >= limit)) {
+               if (likely(pass < 1)) {
+                       limit = start;
+                       start = 0;
+                       pass++;
+                       goto again;
+               } else {
+                       /* Scanned the whole thing, give up. */
+                       return -1;
+               }
+       }
+
+       for (i = n; i < end; i++) {
+               if (test_bit(i, arena->map)) {
+                       start = i + 1;
+                       goto again;
+               }
+       }
+
+       for (i = n; i < end; i++)
+               __set_bit(i, arena->map);
+
+       arena->hint = end;
+
+       return n;
+}
+
+static void pci_arena_free(struct pci_iommu_arena *arena, unsigned long base, unsigned long npages)
+{
+       unsigned long i;
+
+       for (i = base; i < (base + npages); i++)
+               __clear_bit(i, arena->map);
+}
+
 static void *pci_4v_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_addrp)
 {
-       return NULL;
+       struct pcidev_cookie *pcp;
+       struct pci_iommu *iommu;
+       unsigned long devhandle, flags, order, first_page, npages, n;
+       void *ret;
+       long entry;
+       u64 *pglist;
+       int cpu;
+
+       size = IO_PAGE_ALIGN(size);
+       order = get_order(size);
+       if (order >= MAX_ORDER)
+               return NULL;
+
+       npages = size >> IO_PAGE_SHIFT;
+       if (npages > PGLIST_NENTS)
+               return NULL;
+
+       first_page = __get_free_pages(GFP_ATOMIC, order);
+       if (first_page == 0UL)
+               return NULL;
+       memset((char *)first_page, 0, PAGE_SIZE << order);
+
+       pcp = pdev->sysdata;
+       devhandle = pcp->pbm->devhandle;
+       iommu = pcp->pbm->iommu;
+
+       spin_lock_irqsave(&iommu->lock, flags);
+       entry = pci_arena_alloc(&iommu->arena, npages);
+       spin_unlock_irqrestore(&iommu->lock, flags);
+
+       if (unlikely(entry < 0L)) {
+               free_pages(first_page, order);
+               return NULL;
+       }
+
+       *dma_addrp = (iommu->page_table_map_base +
+                     (entry << IO_PAGE_SHIFT));
+       ret = (void *) first_page;
+       first_page = __pa(first_page);
+
+       cpu = get_cpu();
+
+       pglist = &__get_cpu_var(iommu_pglists).pglist[0];
+       for (n = 0; n < npages; n++)
+               pglist[n] = first_page + (n * PAGE_SIZE);
+
+       do {
+               unsigned long num;
+
+               num = pci_sun4v_iommu_map(devhandle, HV_PCI_TSBID(0, entry),
+                                         npages,
+                                         (HV_PCI_MAP_ATTR_READ |
+                                          HV_PCI_MAP_ATTR_WRITE),
+                                         __pa(pglist));
+               entry += num;
+               npages -= num;
+               pglist += num;
+       } while (npages != 0);
+
+       put_cpu();
+
+       return ret;
 }
 
 static void pci_4v_free_consistent(struct pci_dev *pdev, size_t size, void *cpu, dma_addr_t dvma)
 {
+       struct pcidev_cookie *pcp;
+       struct pci_iommu *iommu;
+       unsigned long flags, order, npages, entry, devhandle;
+
+       npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
+       pcp = pdev->sysdata;
+       iommu = pcp->pbm->iommu;
+       devhandle = pcp->pbm->devhandle;
+       entry = ((dvma - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
+
+       spin_lock_irqsave(&iommu->lock, flags);
+
+       pci_arena_free(&iommu->arena, entry, npages);
+
+       do {
+               unsigned long num;
+
+               num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry),
+                                           npages);
+               entry += num;
+               npages -= num;
+       } while (npages != 0);
+
+       spin_unlock_irqrestore(&iommu->lock, flags);
+
+       order = get_order(size);
+       if (order < 10)
+               free_pages((unsigned long)cpu, order);
 }
 
 static dma_addr_t pci_4v_map_single(struct pci_dev *pdev, void *ptr, size_t sz, int direction)
 {
-       return 0;
+       struct pcidev_cookie *pcp;
+       struct pci_iommu *iommu;
+       unsigned long flags, npages, oaddr;
+       unsigned long i, base_paddr, devhandle;
+       u32 bus_addr, ret;
+       unsigned long prot;
+       long entry;
+       u64 *pglist;
+       int cpu;
+
+       pcp = pdev->sysdata;
+       iommu = pcp->pbm->iommu;
+       devhandle = pcp->pbm->devhandle;
+
+       if (unlikely(direction == PCI_DMA_NONE))
+               goto bad;
+
+       oaddr = (unsigned long)ptr;
+       npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK);
+       npages >>= IO_PAGE_SHIFT;
+       if (unlikely(npages > PGLIST_NENTS))
+               goto bad;
+
+       spin_lock_irqsave(&iommu->lock, flags);
+       entry = pci_arena_alloc(&iommu->arena, npages);
+       spin_unlock_irqrestore(&iommu->lock, flags);
+
+       if (unlikely(entry < 0L))
+               goto bad;
+
+       bus_addr = (iommu->page_table_map_base +
+                   (entry << IO_PAGE_SHIFT));
+       ret = bus_addr | (oaddr & ~IO_PAGE_MASK);
+       base_paddr = __pa(oaddr & IO_PAGE_MASK);
+       prot = HV_PCI_MAP_ATTR_READ;
+       if (direction != PCI_DMA_TODEVICE)
+               prot |= HV_PCI_MAP_ATTR_WRITE;
+
+       cpu = get_cpu();
+
+       pglist = &__get_cpu_var(iommu_pglists).pglist[0];
+       for (i = 0; i < npages; i++, base_paddr += IO_PAGE_SIZE)
+               pglist[i] = base_paddr;
+
+       do {
+               unsigned long num;
+
+               num = pci_sun4v_iommu_map(devhandle, HV_PCI_TSBID(0, entry),
+                                         npages, prot,
+                                         __pa(pglist));
+               entry += num;
+               npages -= num;
+               pglist += num;
+       } while (npages != 0);
+
+       put_cpu();
+
+       return ret;
+
+bad:
+       if (printk_ratelimit())
+               WARN_ON(1);
+       return PCI_DMA_ERROR_CODE;
 }
 
 static void pci_4v_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int direction)
 {
+       struct pcidev_cookie *pcp;
+       struct pci_iommu *iommu;
+       unsigned long flags, npages, devhandle;
+       long entry;
+
+       if (unlikely(direction == PCI_DMA_NONE)) {
+               if (printk_ratelimit())
+                       WARN_ON(1);
+               return;
+       }
+
+       pcp = pdev->sysdata;
+       iommu = pcp->pbm->iommu;
+       devhandle = pcp->pbm->devhandle;
+
+       npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK);
+       npages >>= IO_PAGE_SHIFT;
+       bus_addr &= IO_PAGE_MASK;
+
+       spin_lock_irqsave(&iommu->lock, flags);
+
+       entry = (bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT;
+       pci_arena_free(&iommu->arena, entry, npages);
+
+       do {
+               unsigned long num;
+
+               num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry),
+                                           npages);
+               entry += num;
+               npages -= num;
+       } while (npages != 0);
+
+       spin_unlock_irqrestore(&iommu->lock, flags);
+}
+
+#define SG_ENT_PHYS_ADDRESS(SG)        \
+       (__pa(page_address((SG)->page)) + (SG)->offset)
+
+static inline void fill_sg(long entry, unsigned long devhandle,
+                          struct scatterlist *sg,
+                          int nused, int nelems, unsigned long prot)
+{
+       struct scatterlist *dma_sg = sg;
+       struct scatterlist *sg_end = sg + nelems;
+       int i, cpu, pglist_ent;
+       u64 *pglist;
+
+       cpu = get_cpu();
+       pglist = &__get_cpu_var(iommu_pglists).pglist[0];
+       pglist_ent = 0;
+       for (i = 0; i < nused; i++) {
+               unsigned long pteval = ~0UL;
+               u32 dma_npages;
+
+               dma_npages = ((dma_sg->dma_address & (IO_PAGE_SIZE - 1UL)) +
+                             dma_sg->dma_length +
+                             ((IO_PAGE_SIZE - 1UL))) >> IO_PAGE_SHIFT;
+               do {
+                       unsigned long offset;
+                       signed int len;
+
+                       /* If we are here, we know we have at least one
+                        * more page to map.  So walk forward until we
+                        * hit a page crossing, and begin creating new
+                        * mappings from that spot.
+                        */
+                       for (;;) {
+                               unsigned long tmp;
+
+                               tmp = SG_ENT_PHYS_ADDRESS(sg);
+                               len = sg->length;
+                               if (((tmp ^ pteval) >> IO_PAGE_SHIFT) != 0UL) {
+                                       pteval = tmp & IO_PAGE_MASK;
+                                       offset = tmp & (IO_PAGE_SIZE - 1UL);
+                                       break;
+                               }
+                               if (((tmp ^ (tmp + len - 1UL)) >> IO_PAGE_SHIFT) != 0UL) {
+                                       pteval = (tmp + IO_PAGE_SIZE) & IO_PAGE_MASK;
+                                       offset = 0UL;
+                                       len -= (IO_PAGE_SIZE - (tmp & (IO_PAGE_SIZE - 1UL)));
+                                       break;
+                               }
+                               sg++;
+                       }
+
+                       pteval = (pteval & IOPTE_PAGE);
+                       while (len > 0) {
+                               pglist[pglist_ent++] = pteval;
+                               pteval += IO_PAGE_SIZE;
+                               len -= (IO_PAGE_SIZE - offset);
+                               offset = 0;
+                               dma_npages--;
+                       }
+
+                       pteval = (pteval & IOPTE_PAGE) + len;
+                       sg++;
+
+                       /* Skip over any tail mappings we've fully mapped,
+                        * adjusting pteval along the way.  Stop when we
+                        * detect a page crossing event.
+                        */
+                       while (sg < sg_end &&
+                              (pteval << (64 - IO_PAGE_SHIFT)) != 0UL &&
+                              (pteval == SG_ENT_PHYS_ADDRESS(sg)) &&
+                              ((pteval ^
+                                (SG_ENT_PHYS_ADDRESS(sg) + sg->length - 1UL)) >> IO_PAGE_SHIFT) == 0UL) {
+                               pteval += sg->length;
+                               sg++;
+                       }
+                       if ((pteval << (64 - IO_PAGE_SHIFT)) == 0UL)
+                               pteval = ~0UL;
+               } while (dma_npages != 0);
+               dma_sg++;
+       }
+
+       BUG_ON(pglist_ent == 0);
+
+       do {
+               unsigned long num;
+
+               num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry),
+                                           pglist_ent);
+               entry += num;
+               pglist_ent -= num;
+       } while (pglist_ent != 0);
+
+       put_cpu();
 }
 
 static int pci_4v_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction)
 {
-       return nelems;
+       struct pcidev_cookie *pcp;
+       struct pci_iommu *iommu;
+       unsigned long flags, npages, prot, devhandle;
+       u32 dma_base;
+       struct scatterlist *sgtmp;
+       long entry;
+       int used;
+
+       /* Fast path single entry scatterlists. */
+       if (nelems == 1) {
+               sglist->dma_address =
+                       pci_4v_map_single(pdev,
+                                         (page_address(sglist->page) + sglist->offset),
+                                         sglist->length, direction);
+               if (unlikely(sglist->dma_address == PCI_DMA_ERROR_CODE))
+                       return 0;
+               sglist->dma_length = sglist->length;
+               return 1;
+       }
+
+       pcp = pdev->sysdata;
+       iommu = pcp->pbm->iommu;
+       devhandle = pcp->pbm->devhandle;
+       
+       if (unlikely(direction == PCI_DMA_NONE))
+               goto bad;
+
+       /* Step 1: Prepare scatter list. */
+       npages = prepare_sg(sglist, nelems);
+       if (unlikely(npages > PGLIST_NENTS))
+               goto bad;
+
+       /* Step 2: Allocate a cluster and context, if necessary. */
+       spin_lock_irqsave(&iommu->lock, flags);
+       entry = pci_arena_alloc(&iommu->arena, npages);
+       spin_unlock_irqrestore(&iommu->lock, flags);
+
+       if (unlikely(entry < 0L))
+               goto bad;
+
+       dma_base = iommu->page_table_map_base +
+               (entry << IO_PAGE_SHIFT);
+
+       /* Step 3: Normalize DMA addresses. */
+       used = nelems;
+
+       sgtmp = sglist;
+       while (used && sgtmp->dma_length) {
+               sgtmp->dma_address += dma_base;
+               sgtmp++;
+               used--;
+       }
+       used = nelems - used;
+
+       /* Step 4: Create the mappings. */
+       prot = HV_PCI_MAP_ATTR_READ;
+       if (direction != PCI_DMA_TODEVICE)
+               prot |= HV_PCI_MAP_ATTR_WRITE;
+
+       fill_sg(entry, devhandle, sglist, used, nelems, prot);
+
+       return used;
+
+bad:
+       if (printk_ratelimit())
+               WARN_ON(1);
+       return 0;
 }
 
 static void pci_4v_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction)
 {
+       struct pcidev_cookie *pcp;
+       struct pci_iommu *iommu;
+       unsigned long flags, i, npages, devhandle;
+       long entry;
+       u32 bus_addr;
+
+       if (unlikely(direction == PCI_DMA_NONE)) {
+               if (printk_ratelimit())
+                       WARN_ON(1);
+       }
+
+       pcp = pdev->sysdata;
+       iommu = pcp->pbm->iommu;
+       devhandle = pcp->pbm->devhandle;
+       
+       bus_addr = sglist->dma_address & IO_PAGE_MASK;
+
+       for (i = 1; i < nelems; i++)
+               if (sglist[i].dma_length == 0)
+                       break;
+       i--;
+       npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length) -
+                 bus_addr) >> IO_PAGE_SHIFT;
+
+       entry = ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
+
+       spin_lock_irqsave(&iommu->lock, flags);
+
+       pci_arena_free(&iommu->arena, entry, npages);
+
+       do {
+               unsigned long num;
+
+               num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry),
+                                           npages);
+               entry += num;
+               npages -= num;
+       } while (npages != 0);
+
+       spin_unlock_irqrestore(&iommu->lock, flags);
 }
 
 static void pci_4v_dma_sync_single_for_cpu(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int direction)
 {
+       /* Nothing to do... */
 }
 
 static void pci_4v_dma_sync_sg_for_cpu(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction)
 {
+       /* Nothing to do... */
 }
 
 struct pci_iommu_ops pci_sun4v_iommu_ops = {
@@ -264,9 +707,83 @@ static void pbm_register_toplevel_resources(struct pci_controller_info *p,
                                    &pbm->mem_space);
 }
 
+static void probe_existing_entries(struct pci_pbm_info *pbm,
+                                  struct pci_iommu *iommu)
+{
+       struct pci_iommu_arena *arena = &iommu->arena;
+       unsigned long i, devhandle;
+
+       devhandle = pbm->devhandle;
+       for (i = 0; i < arena->limit; i++) {
+               unsigned long ret, io_attrs, ra;
+
+               ret = pci_sun4v_iommu_getmap(devhandle,
+                                            HV_PCI_TSBID(0, i),
+                                            &io_attrs, &ra);
+               if (ret == HV_EOK)
+                       __set_bit(i, arena->map);
+       }
+}
+
 static void pci_sun4v_iommu_init(struct pci_pbm_info *pbm)
 {
-       /* XXX Implement me! XXX */
+       struct pci_iommu *iommu = pbm->iommu;
+       unsigned long num_tsb_entries, sz;
+       u32 vdma[2], dma_mask, dma_offset;
+       int err, tsbsize;
+
+       err = prom_getproperty(pbm->prom_node, "virtual-dma",
+                              (char *)&vdma[0], sizeof(vdma));
+       if (err == 0 || err == -1) {
+               /* No property, use default values. */
+               vdma[0] = 0x80000000;
+               vdma[1] = 0x80000000;
+       }
+
+       dma_mask = vdma[0];
+       switch (vdma[1]) {
+               case 0x20000000:
+                       dma_mask |= 0x1fffffff;
+                       tsbsize = 64;
+                       break;
+
+               case 0x40000000:
+                       dma_mask |= 0x3fffffff;
+                       tsbsize = 128;
+                       break;
+
+               case 0x80000000:
+                       dma_mask |= 0x7fffffff;
+                       tsbsize = 128;
+                       break;
+
+               default:
+                       prom_printf("PCI-SUN4V: strange virtual-dma size.\n");
+                       prom_halt();
+       };
+
+       num_tsb_entries = tsbsize / sizeof(iopte_t);
+
+       dma_offset = vdma[0];
+
+       /* Setup initial software IOMMU state. */
+       spin_lock_init(&iommu->lock);
+       iommu->ctx_lowest_free = 1;
+       iommu->page_table_map_base = dma_offset;
+       iommu->dma_addr_mask = dma_mask;
+
+       /* Allocate and initialize the free area map.  */
+       sz = num_tsb_entries / 8;
+       sz = (sz + 7UL) & ~7UL;
+       iommu->arena.map = kmalloc(sz, GFP_KERNEL);
+       if (!iommu->arena.map) {
+               prom_printf("PCI_IOMMU: Error, kmalloc(arena.map) failed.\n");
+               prom_halt();
+       }
+       memset(iommu->arena.map, 0, sz);
+       iommu->arena.limit = num_tsb_entries;
+
+       probe_existing_entries(pbm, iommu);
 }
 
 static void pci_sun4v_pbm_init(struct pci_controller_info *p, int prom_node)
index 5c7ed2ca1505427d741a65caa8984ab4ef442e39..00322ed0cf8a095cbc0fe0b392a948fdfe226be8 100644 (file)
@@ -16,6 +16,10 @@ extern unsigned long pci_sun4v_iommu_map(unsigned long devhandle,
 extern unsigned long pci_sun4v_iommu_demap(unsigned long devhandle,
                                           unsigned long tsbid,
                                           unsigned long num_ttes);
+extern unsigned long pci_sun4v_iommu_getmap(unsigned long devhandle,
+                                           unsigned long tsbid,
+                                           unsigned long *io_attributes,
+                                           unsigned long *real_address);
 extern unsigned long pci_sun4v_config_get(unsigned long devhandle,
                                          unsigned long pci_device,
                                          unsigned long config_offset,
index 2f1147146abe1ba9b031074892cdd051607e35dd..4a12341dd5d3395273a1b6aec6cc9070739945a3 100644 (file)
@@ -12,9 +12,7 @@
         */
        .globl  pci_sun4v_devino_to_sysino
 pci_sun4v_devino_to_sysino:
-       mov     %o1, %o2
-       mov     %o0, %o1
-       mov     HV_FAST_INTR_DEVINO2SYSINO, %o0
+       mov     HV_FAST_INTR_DEVINO2SYSINO, %o5
        ta      HV_FAST_TRAP
        retl
         mov    %o1, %o0
@@ -29,12 +27,7 @@ pci_sun4v_devino_to_sysino:
         */
        .globl  pci_sun4v_iommu_map
 pci_sun4v_iommu_map:
-       mov     %o4, %o5
-       mov     %o3, %o4
-       mov     %o2, %o3
-       mov     %o1, %o2
-       mov     %o0, %o1
-       mov     HV_FAST_PCI_IOMMU_MAP, %o0
+       mov     HV_FAST_PCI_IOMMU_MAP, %o5
        ta      HV_FAST_TRAP
        retl
         mov    %o1, %o0
@@ -47,14 +40,28 @@ pci_sun4v_iommu_map:
         */
        .globl  pci_sun4v_iommu_demap
 pci_sun4v_iommu_demap:
-       mov     %o2, %o3
-       mov     %o1, %o2
-       mov     %o0, %o1
-       mov     HV_FAST_PCI_IOMMU_DEMAP, %o0
+       mov     HV_FAST_PCI_IOMMU_DEMAP, %o5
        ta      HV_FAST_TRAP
        retl
         mov    %o1, %o0
 
+       /* %o0: devhandle
+        * %o1: tsbid
+        * %o2: &io_attributes
+        * %o3: &real_address
+        *
+        * returns %o0: status
+        */
+       .globl  pci_sun4v_iommu_getmap
+pci_sun4v_iommu_getmap:
+       mov     %o2, %o4
+       mov     HV_FAST_PCI_IOMMU_GETMAP, %o5
+       ta      HV_FAST_TRAP
+       stx     %o1, [%o4]
+       stx     %o2, [%o3]
+       retl
+        mov    %o0, %o0
+
        /* %o0: devhandle
         * %o1: pci_device
         * %o2: pci_config_offset
@@ -67,11 +74,7 @@ pci_sun4v_iommu_demap:
         */
        .globl  pci_sun4v_config_get
 pci_sun4v_config_get:
-       mov     %o3, %o4
-       mov     %o2, %o3
-       mov     %o1, %o2
-       mov     %o0, %o1
-       mov     HV_FAST_PCI_CONFIG_GET, %o0
+       mov     HV_FAST_PCI_CONFIG_GET, %o5
        ta      HV_FAST_TRAP
        brnz,a,pn %o1, 1f
         mov    -1, %o2
@@ -91,14 +94,9 @@ pci_sun4v_config_get:
         */
        .globl  pci_sun4v_config_put
 pci_sun4v_config_put:
-       mov     %o3, %o4
-       mov     %o2, %o3
-       mov     %o1, %o2
-       mov     %o0, %o1
-       mov     HV_FAST_PCI_CONFIG_PUT, %o0
+       mov     HV_FAST_PCI_CONFIG_PUT, %o5
        ta      HV_FAST_TRAP
        brnz,a,pn %o1, 1f
         mov    -1, %o1
 1:     retl
         mov    %o1, %o0
-