drm: add savage driver
authorDave Airlie <airlied@starflyer.(none)>
Sun, 7 Aug 2005 05:43:54 +0000 (15:43 +1000)
committerDave Airlie <airlied@linux.ie>
Sun, 7 Aug 2005 05:43:54 +0000 (15:43 +1000)
Add driver for savage chipsets.

From: Felix Kuehling
Signed-off-by: Dave Airlie <airlied@linux.ie>
drivers/char/drm/Kconfig
drivers/char/drm/Makefile
drivers/char/drm/savage_bci.c [new file with mode: 0644]
drivers/char/drm/savage_drm.h [new file with mode: 0644]
drivers/char/drm/savage_drv.c [new file with mode: 0644]
drivers/char/drm/savage_drv.h [new file with mode: 0644]
drivers/char/drm/savage_state.c [new file with mode: 0644]

index f31b9706ef650ff4d5d016b4b7be1e01b7d5b54b..56ace9d5e2aef75b4cd095b3424e4cbc00094b23 100644 (file)
@@ -96,3 +96,10 @@ config DRM_VIA
          Choose this option if you have a Via unichrome or compatible video
          chipset. If M is selected the module will be called via.
 
+config DRM_SAVAGE
+       tristate "Savage video cards"
+       depends on DRM
+       help
+         Choose this option if you have a Savage3D/4/SuperSavage/Pro/Twister
+         chipset. If M is selected the module will be called savage.
+
index 3f0cf8e9cc50ade9b3db6219b7c9e530b05c2900..1945138cb8fbe0109946cc7affe0f2108ff62e4d 100644 (file)
@@ -17,6 +17,7 @@ i915-objs   := i915_drv.o i915_dma.o i915_irq.o i915_mem.o
 radeon-objs := radeon_drv.o radeon_cp.o radeon_state.o radeon_mem.o radeon_irq.o
 ffb-objs    := ffb_drv.o ffb_context.o
 sis-objs    := sis_drv.o sis_ds.o sis_mm.o
+savage-objs := savage_drv.o savage_bci.o savage_state.o
 via-objs    := via_irq.o via_drv.o via_ds.o via_map.o via_mm.o via_dma.o via_verifier.o via_video.o
 
 ifeq ($(CONFIG_COMPAT),y)
@@ -37,5 +38,7 @@ obj-$(CONFIG_DRM_I830)        += i830.o
 obj-$(CONFIG_DRM_I915)  += i915.o
 obj-$(CONFIG_DRM_FFB)   += ffb.o
 obj-$(CONFIG_DRM_SIS)   += sis.o
+obj-$(CONFIG_DRM_SAVAGE)+= savage.o
 obj-$(CONFIG_DRM_VIA)  +=via.o
 
+
diff --git a/drivers/char/drm/savage_bci.c b/drivers/char/drm/savage_bci.c
new file mode 100644 (file)
index 0000000..2fd40ba
--- /dev/null
@@ -0,0 +1,1096 @@
+/* savage_bci.c -- BCI support for Savage
+ *
+ * Copyright 2004  Felix Kuehling
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL FELIX KUEHLING BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "drmP.h"
+#include "savage_drm.h"
+#include "savage_drv.h"
+
+/* Need a long timeout for shadow status updates can take a while
+ * and so can waiting for events when the queue is full. */
+#define SAVAGE_DEFAULT_USEC_TIMEOUT    1000000 /* 1s */
+#define SAVAGE_EVENT_USEC_TIMEOUT      5000000 /* 5s */
+#define SAVAGE_FREELIST_DEBUG          0
+
+static int
+savage_bci_wait_fifo_shadow(drm_savage_private_t *dev_priv, unsigned int n)
+{
+       uint32_t mask = dev_priv->status_used_mask;
+       uint32_t threshold = dev_priv->bci_threshold_hi;
+       uint32_t status;
+       int i;
+
+#if SAVAGE_BCI_DEBUG
+       if (n > dev_priv->cob_size + SAVAGE_BCI_FIFO_SIZE - threshold)
+               DRM_ERROR("Trying to emit %d words "
+                         "(more than guaranteed space in COB)\n", n);
+#endif
+
+       for (i = 0; i < SAVAGE_DEFAULT_USEC_TIMEOUT; i++) {
+               DRM_MEMORYBARRIER();
+               status = dev_priv->status_ptr[0];
+               if ((status & mask) < threshold)
+                       return 0;
+               DRM_UDELAY(1);
+       }
+
+#if SAVAGE_BCI_DEBUG
+       DRM_ERROR("failed!\n");
+       DRM_INFO("   status=0x%08x, threshold=0x%08x\n", status, threshold);
+#endif
+       return DRM_ERR(EBUSY);
+}
+
+static int
+savage_bci_wait_fifo_s3d(drm_savage_private_t *dev_priv, unsigned int n)
+{
+       uint32_t maxUsed = dev_priv->cob_size + SAVAGE_BCI_FIFO_SIZE - n;
+       uint32_t status;
+       int i;
+
+       for (i = 0; i < SAVAGE_DEFAULT_USEC_TIMEOUT; i++) {
+               status = SAVAGE_READ(SAVAGE_STATUS_WORD0);
+               if ((status & SAVAGE_FIFO_USED_MASK_S3D) <= maxUsed)
+                       return 0;
+               DRM_UDELAY(1);
+       }
+
+#if SAVAGE_BCI_DEBUG
+       DRM_ERROR("failed!\n");
+       DRM_INFO("   status=0x%08x\n", status);
+#endif
+       return DRM_ERR(EBUSY);
+}
+
+static int
+savage_bci_wait_fifo_s4(drm_savage_private_t *dev_priv, unsigned int n)
+{
+       uint32_t maxUsed = dev_priv->cob_size + SAVAGE_BCI_FIFO_SIZE - n;
+       uint32_t status;
+       int i;
+
+       for (i = 0; i < SAVAGE_DEFAULT_USEC_TIMEOUT; i++) {
+               status = SAVAGE_READ(SAVAGE_ALT_STATUS_WORD0);
+               if ((status & SAVAGE_FIFO_USED_MASK_S4) <= maxUsed)
+                       return 0;
+               DRM_UDELAY(1);
+       }
+
+#if SAVAGE_BCI_DEBUG
+       DRM_ERROR("failed!\n");
+       DRM_INFO("   status=0x%08x\n", status);
+#endif
+       return DRM_ERR(EBUSY);
+}
+
+/*
+ * Waiting for events.
+ *
+ * The BIOSresets the event tag to 0 on mode changes. Therefore we
+ * never emit 0 to the event tag. If we find a 0 event tag we know the
+ * BIOS stomped on it and return success assuming that the BIOS waited
+ * for engine idle.
+ *
+ * Note: if the Xserver uses the event tag it has to follow the same
+ * rule. Otherwise there may be glitches every 2^16 events.
+ */
+static int
+savage_bci_wait_event_shadow(drm_savage_private_t *dev_priv, uint16_t e)
+{
+       uint32_t status;
+       int i;
+
+       for (i = 0; i < SAVAGE_EVENT_USEC_TIMEOUT; i++) {
+               DRM_MEMORYBARRIER();
+               status = dev_priv->status_ptr[1];
+               if ((((status & 0xffff) - e) & 0xffff) <= 0x7fff ||
+                   (status & 0xffff) == 0)
+                       return 0;
+               DRM_UDELAY(1);
+       }
+
+#if SAVAGE_BCI_DEBUG
+       DRM_ERROR("failed!\n");
+       DRM_INFO("   status=0x%08x, e=0x%04x\n", status, e);
+#endif
+
+       return DRM_ERR(EBUSY);
+}
+
+static int
+savage_bci_wait_event_reg(drm_savage_private_t *dev_priv, uint16_t e)
+{
+       uint32_t status;
+       int i;
+
+       for (i = 0; i < SAVAGE_EVENT_USEC_TIMEOUT; i++) {
+               status = SAVAGE_READ(SAVAGE_STATUS_WORD1);
+               if ((((status & 0xffff) - e) & 0xffff) <= 0x7fff ||
+                   (status & 0xffff) == 0)
+                       return 0;
+               DRM_UDELAY(1);
+       }
+
+#if SAVAGE_BCI_DEBUG
+       DRM_ERROR("failed!\n");
+       DRM_INFO("   status=0x%08x, e=0x%04x\n", status, e);
+#endif
+
+       return DRM_ERR(EBUSY);
+}
+
+uint16_t savage_bci_emit_event(drm_savage_private_t *dev_priv,
+                              unsigned int flags)
+{
+       uint16_t count;
+       BCI_LOCALS;
+
+       if (dev_priv->status_ptr) {
+               /* coordinate with Xserver */
+               count = dev_priv->status_ptr[1023];
+               if (count < dev_priv->event_counter)
+                       dev_priv->event_wrap++;
+       } else {
+               count = dev_priv->event_counter;
+       }
+       count = (count + 1) & 0xffff;
+       if (count == 0) {
+               count++; /* See the comment above savage_wait_event_*. */
+               dev_priv->event_wrap++;
+       }
+       dev_priv->event_counter = count;
+       if (dev_priv->status_ptr)
+               dev_priv->status_ptr[1023] = (uint32_t)count;
+
+       if ((flags & (SAVAGE_WAIT_2D | SAVAGE_WAIT_3D))) {
+               unsigned int wait_cmd = BCI_CMD_WAIT;
+               if ((flags & SAVAGE_WAIT_2D))
+                       wait_cmd |= BCI_CMD_WAIT_2D;
+               if ((flags & SAVAGE_WAIT_3D))
+                       wait_cmd |= BCI_CMD_WAIT_3D;
+               BEGIN_BCI(2);
+               BCI_WRITE(wait_cmd);
+       } else {
+               BEGIN_BCI(1);
+       }
+       BCI_WRITE(BCI_CMD_UPDATE_EVENT_TAG | (uint32_t)count);
+
+       return count;
+}
+
+/*
+ * Freelist management
+ */
+static int savage_freelist_init(drm_device_t *dev)
+{
+       drm_savage_private_t *dev_priv = dev->dev_private;
+       drm_device_dma_t *dma = dev->dma;
+       drm_buf_t *buf;
+       drm_savage_buf_priv_t *entry;
+       int i;
+       DRM_DEBUG("count=%d\n", dma->buf_count);
+
+       dev_priv->head.next = &dev_priv->tail;
+       dev_priv->head.prev = NULL;
+       dev_priv->head.buf = NULL;
+
+       dev_priv->tail.next = NULL;
+       dev_priv->tail.prev = &dev_priv->head;
+       dev_priv->tail.buf = NULL;
+
+       for (i = 0; i < dma->buf_count; i++) {
+               buf = dma->buflist[i];
+               entry = buf->dev_private;
+
+               SET_AGE(&entry->age, 0, 0);
+               entry->buf = buf;
+
+               entry->next = dev_priv->head.next;
+               entry->prev = &dev_priv->head;
+               dev_priv->head.next->prev = entry;
+               dev_priv->head.next = entry;
+       }
+
+       return 0;
+}
+
+static drm_buf_t *savage_freelist_get(drm_device_t *dev)
+{
+       drm_savage_private_t *dev_priv = dev->dev_private;
+       drm_savage_buf_priv_t *tail = dev_priv->tail.prev;
+       uint16_t event;
+       unsigned int wrap;
+       DRM_DEBUG("\n");
+
+       UPDATE_EVENT_COUNTER();
+       if (dev_priv->status_ptr)
+               event = dev_priv->status_ptr[1] & 0xffff;
+       else
+               event = SAVAGE_READ(SAVAGE_STATUS_WORD1) & 0xffff;
+       wrap = dev_priv->event_wrap;
+       if (event > dev_priv->event_counter)
+               wrap--; /* hardware hasn't passed the last wrap yet */
+
+       DRM_DEBUG("   tail=0x%04x %d\n", tail->age.event, tail->age.wrap);
+       DRM_DEBUG("   head=0x%04x %d\n", event, wrap);
+
+       if (tail->buf && (TEST_AGE(&tail->age, event, wrap) || event == 0)) {
+               drm_savage_buf_priv_t *next = tail->next;
+               drm_savage_buf_priv_t *prev = tail->prev;
+               prev->next = next;
+               next->prev = prev;
+               tail->next = tail->prev = NULL;
+               return tail->buf;
+       }
+
+       DRM_DEBUG("returning NULL, tail->buf=%p!\n", tail->buf);
+       return NULL;
+}
+
+void savage_freelist_put(drm_device_t *dev, drm_buf_t *buf)
+{
+       drm_savage_private_t *dev_priv = dev->dev_private;
+       drm_savage_buf_priv_t *entry = buf->dev_private, *prev, *next;
+
+       DRM_DEBUG("age=0x%04x wrap=%d\n", entry->age.event, entry->age.wrap);
+
+       if (entry->next != NULL || entry->prev != NULL) {
+               DRM_ERROR("entry already on freelist.\n");
+               return;
+       }
+
+       prev = &dev_priv->head;
+       next = prev->next;
+       prev->next = entry;
+       next->prev = entry;
+       entry->prev = prev;
+       entry->next = next;
+}
+
+/*
+ * Command DMA
+ */
+static int savage_dma_init(drm_savage_private_t *dev_priv)
+{
+       unsigned int i;
+
+       dev_priv->nr_dma_pages = dev_priv->cmd_dma->size /
+               (SAVAGE_DMA_PAGE_SIZE*4);
+       dev_priv->dma_pages = drm_alloc(sizeof(drm_savage_dma_page_t) *
+                                       dev_priv->nr_dma_pages,
+                                       DRM_MEM_DRIVER);
+       if (dev_priv->dma_pages == NULL)
+               return DRM_ERR(ENOMEM);
+
+       for (i = 0; i < dev_priv->nr_dma_pages; ++i) {
+               SET_AGE(&dev_priv->dma_pages[i].age, 0, 0);
+               dev_priv->dma_pages[i].used = 0;
+               dev_priv->dma_pages[i].flushed = 0;
+       }
+       SET_AGE(&dev_priv->last_dma_age, 0, 0);
+
+       dev_priv->first_dma_page = 0;
+       dev_priv->current_dma_page = 0;
+
+       return 0;
+}
+
+void savage_dma_reset(drm_savage_private_t *dev_priv)
+{
+       uint16_t event;
+       unsigned int wrap, i;
+       event = savage_bci_emit_event(dev_priv, 0);
+       wrap = dev_priv->event_wrap;
+       for (i = 0; i < dev_priv->nr_dma_pages; ++i) {
+               SET_AGE(&dev_priv->dma_pages[i].age, event, wrap);
+               dev_priv->dma_pages[i].used = 0;
+               dev_priv->dma_pages[i].flushed = 0;
+       }
+       SET_AGE(&dev_priv->last_dma_age, event, wrap);
+       dev_priv->first_dma_page = dev_priv->current_dma_page = 0;
+}
+
+void savage_dma_wait(drm_savage_private_t *dev_priv, unsigned int page)
+{
+       uint16_t event;
+       unsigned int wrap;
+
+       /* Faked DMA buffer pages don't age. */
+       if (dev_priv->cmd_dma == &dev_priv->fake_dma)
+               return;
+
+       UPDATE_EVENT_COUNTER();
+       if (dev_priv->status_ptr)
+               event = dev_priv->status_ptr[1] & 0xffff;
+       else
+               event = SAVAGE_READ(SAVAGE_STATUS_WORD1) & 0xffff;
+       wrap = dev_priv->event_wrap;
+       if (event > dev_priv->event_counter)
+               wrap--; /* hardware hasn't passed the last wrap yet */
+
+       if (dev_priv->dma_pages[page].age.wrap > wrap ||
+           (dev_priv->dma_pages[page].age.wrap == wrap &&
+            dev_priv->dma_pages[page].age.event > event)) {
+               if (dev_priv->wait_evnt(dev_priv,
+                                       dev_priv->dma_pages[page].age.event)
+                   < 0)
+                       DRM_ERROR("wait_evnt failed!\n");
+       }
+}
+
+uint32_t *savage_dma_alloc(drm_savage_private_t *dev_priv, unsigned int n)
+{
+       unsigned int cur = dev_priv->current_dma_page;
+       unsigned int rest = SAVAGE_DMA_PAGE_SIZE -
+               dev_priv->dma_pages[cur].used;
+       unsigned int nr_pages = (n - rest + SAVAGE_DMA_PAGE_SIZE-1) /
+               SAVAGE_DMA_PAGE_SIZE;
+       uint32_t *dma_ptr;
+       unsigned int i;
+
+       DRM_DEBUG("cur=%u, cur->used=%u, n=%u, rest=%u, nr_pages=%u\n",
+                 cur, dev_priv->dma_pages[cur].used, n, rest, nr_pages);
+
+       if (cur + nr_pages < dev_priv->nr_dma_pages) {
+               dma_ptr = (uint32_t *)dev_priv->cmd_dma->handle +
+                       cur*SAVAGE_DMA_PAGE_SIZE +
+                       dev_priv->dma_pages[cur].used;
+               if (n < rest)
+                       rest = n;
+               dev_priv->dma_pages[cur].used += rest;
+               n -= rest;
+               cur++;
+       } else {
+               dev_priv->dma_flush(dev_priv);
+               nr_pages = (n + SAVAGE_DMA_PAGE_SIZE-1) / SAVAGE_DMA_PAGE_SIZE;
+               for (i = cur; i < dev_priv->nr_dma_pages; ++i) {
+                       dev_priv->dma_pages[i].age = dev_priv->last_dma_age;
+                       dev_priv->dma_pages[i].used = 0;
+                       dev_priv->dma_pages[i].flushed = 0;
+               }
+               dma_ptr = (uint32_t *)dev_priv->cmd_dma->handle;
+               dev_priv->first_dma_page = cur = 0;
+       }
+       for (i = cur; nr_pages > 0; ++i, --nr_pages) {
+#if SAVAGE_DMA_DEBUG
+               if (dev_priv->dma_pages[i].used) {
+                       DRM_ERROR("unflushed page %u: used=%u\n",
+                                 i, dev_priv->dma_pages[i].used);
+               }
+#endif
+               if (n > SAVAGE_DMA_PAGE_SIZE)
+                       dev_priv->dma_pages[i].used = SAVAGE_DMA_PAGE_SIZE;
+               else
+                       dev_priv->dma_pages[i].used = n;
+               n -= SAVAGE_DMA_PAGE_SIZE;
+       }
+       dev_priv->current_dma_page = --i;
+
+       DRM_DEBUG("cur=%u, cur->used=%u, n=%u\n",
+                 i, dev_priv->dma_pages[i].used, n);
+
+       savage_dma_wait(dev_priv, dev_priv->current_dma_page);
+
+       return dma_ptr;
+}
+
+static void savage_dma_flush(drm_savage_private_t *dev_priv)
+{
+       unsigned int first = dev_priv->first_dma_page;
+       unsigned int cur = dev_priv->current_dma_page;
+       uint16_t event;
+       unsigned int wrap, pad, align, len, i;
+       unsigned long phys_addr;
+       BCI_LOCALS;
+
+       if (first == cur &&
+           dev_priv->dma_pages[cur].used == dev_priv->dma_pages[cur].flushed)
+               return;
+
+       /* pad length to multiples of 2 entries
+        * align start of next DMA block to multiles of 8 entries */
+       pad = -dev_priv->dma_pages[cur].used & 1;
+       align = -(dev_priv->dma_pages[cur].used + pad) & 7;
+
+       DRM_DEBUG("first=%u, cur=%u, first->flushed=%u, cur->used=%u, "
+                 "pad=%u, align=%u\n",
+                 first, cur, dev_priv->dma_pages[first].flushed,
+                 dev_priv->dma_pages[cur].used, pad, align);
+
+       /* pad with noops */
+       if (pad) {
+               uint32_t *dma_ptr = (uint32_t *)dev_priv->cmd_dma->handle +
+                       cur * SAVAGE_DMA_PAGE_SIZE +
+                       dev_priv->dma_pages[cur].used;
+               dev_priv->dma_pages[cur].used += pad;
+               while(pad != 0) {
+                       *dma_ptr++ = BCI_CMD_WAIT;
+                       pad--;
+               }
+       }
+
+       DRM_MEMORYBARRIER();
+
+       /* do flush ... */
+       phys_addr = dev_priv->cmd_dma->offset +
+               (first * SAVAGE_DMA_PAGE_SIZE +
+                dev_priv->dma_pages[first].flushed) * 4;
+       len = (cur - first) * SAVAGE_DMA_PAGE_SIZE +
+               dev_priv->dma_pages[cur].used -
+               dev_priv->dma_pages[first].flushed;
+
+       DRM_DEBUG("phys_addr=%lx, len=%u\n",
+                 phys_addr | dev_priv->dma_type, len);
+
+       BEGIN_BCI(3);
+       BCI_SET_REGISTERS(SAVAGE_DMABUFADDR, 1);
+       BCI_WRITE(phys_addr | dev_priv->dma_type);
+       BCI_DMA(len);
+
+       /* fix alignment of the start of the next block */
+       dev_priv->dma_pages[cur].used += align;
+
+       /* age DMA pages */
+       event = savage_bci_emit_event(dev_priv, 0);
+       wrap = dev_priv->event_wrap;
+       for (i = first; i < cur; ++i) {
+               SET_AGE(&dev_priv->dma_pages[i].age, event, wrap);
+               dev_priv->dma_pages[i].used = 0;
+               dev_priv->dma_pages[i].flushed = 0;
+       }
+       /* age the current page only when it's full */
+       if (dev_priv->dma_pages[cur].used == SAVAGE_DMA_PAGE_SIZE) {
+               SET_AGE(&dev_priv->dma_pages[cur].age, event, wrap);
+               dev_priv->dma_pages[cur].used = 0;
+               dev_priv->dma_pages[cur].flushed = 0;
+               /* advance to next page */
+               cur++;
+               if (cur == dev_priv->nr_dma_pages)
+                       cur = 0;
+               dev_priv->first_dma_page = dev_priv->current_dma_page = cur;
+       } else {
+               dev_priv->first_dma_page = cur;
+               dev_priv->dma_pages[cur].flushed = dev_priv->dma_pages[i].used;
+       }
+       SET_AGE(&dev_priv->last_dma_age, event, wrap);
+
+       DRM_DEBUG("first=cur=%u, cur->used=%u, cur->flushed=%u\n", cur,
+                 dev_priv->dma_pages[cur].used,
+                 dev_priv->dma_pages[cur].flushed);
+}
+
+static void savage_fake_dma_flush(drm_savage_private_t *dev_priv)
+{
+       unsigned int i, j;
+       BCI_LOCALS;
+
+       if (dev_priv->first_dma_page == dev_priv->current_dma_page &&
+           dev_priv->dma_pages[dev_priv->current_dma_page].used == 0)
+               return;
+
+       DRM_DEBUG("first=%u, cur=%u, cur->used=%u\n",
+                 dev_priv->first_dma_page, dev_priv->current_dma_page,
+                 dev_priv->dma_pages[dev_priv->current_dma_page].used);
+
+       for (i = dev_priv->first_dma_page;
+            i <= dev_priv->current_dma_page && dev_priv->dma_pages[i].used;
+            ++i) {
+               uint32_t *dma_ptr = (uint32_t *)dev_priv->cmd_dma->handle +
+                       i * SAVAGE_DMA_PAGE_SIZE;
+#if SAVAGE_DMA_DEBUG
+               /* Sanity check: all pages except the last one must be full. */
+               if (i < dev_priv->current_dma_page &&
+                   dev_priv->dma_pages[i].used != SAVAGE_DMA_PAGE_SIZE) {
+                       DRM_ERROR("partial DMA page %u: used=%u",
+                                 i, dev_priv->dma_pages[i].used);
+               }
+#endif
+               BEGIN_BCI(dev_priv->dma_pages[i].used);
+               for (j = 0; j < dev_priv->dma_pages[i].used; ++j) {
+                       BCI_WRITE(dma_ptr[j]);
+               }
+               dev_priv->dma_pages[i].used = 0;
+       }
+
+       /* reset to first page */
+       dev_priv->first_dma_page = dev_priv->current_dma_page = 0;
+}
+
+/*
+ * Initalize mappings. On Savage4 and SavageIX the alignment
+ * and size of the aperture is not suitable for automatic MTRR setup
+ * in drm_addmap. Therefore we do it manually before the maps are
+ * initialized. We also need to take care of deleting the MTRRs in
+ * postcleanup.
+ */
+int savage_preinit(drm_device_t *dev, unsigned long chipset)
+{
+       drm_savage_private_t *dev_priv;
+       unsigned long mmio_base, fb_base, fb_size, aperture_base;
+       /* fb_rsrc and aper_rsrc aren't really used currently, but still exist
+        * in case we decide we need information on the BAR for BSD in the
+        * future.
+        */
+       unsigned int fb_rsrc, aper_rsrc;
+       int ret = 0;
+
+       dev_priv = drm_alloc(sizeof(drm_savage_private_t), DRM_MEM_DRIVER);
+       if (dev_priv == NULL)
+               return DRM_ERR(ENOMEM);
+
+       memset(dev_priv, 0, sizeof(drm_savage_private_t));
+       dev->dev_private = (void *)dev_priv;
+       dev_priv->chipset = (enum savage_family)chipset;
+
+       dev_priv->mtrr[0].handle = -1;
+       dev_priv->mtrr[1].handle = -1;
+       dev_priv->mtrr[2].handle = -1;
+       if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
+               fb_rsrc = 0;
+               fb_base = drm_get_resource_start(dev, 0);
+               fb_size = SAVAGE_FB_SIZE_S3;
+               mmio_base = fb_base + SAVAGE_FB_SIZE_S3;
+               aper_rsrc = 0;
+               aperture_base = fb_base + SAVAGE_APERTURE_OFFSET;
+               /* this should always be true */
+               if (drm_get_resource_len(dev, 0) == 0x08000000) {
+                       /* Don't make MMIO write-cobining! We need 3
+                        * MTRRs. */
+                       dev_priv->mtrr[0].base = fb_base;
+                       dev_priv->mtrr[0].size = 0x01000000;
+                       dev_priv->mtrr[0].handle = mtrr_add(
+                               dev_priv->mtrr[0].base, dev_priv->mtrr[0].size,
+                               MTRR_TYPE_WRCOMB, 1);
+                       dev_priv->mtrr[1].base = fb_base+0x02000000;
+                       dev_priv->mtrr[1].size = 0x02000000;
+                       dev_priv->mtrr[1].handle = mtrr_add(
+                               dev_priv->mtrr[1].base, dev_priv->mtrr[1].size,
+                               MTRR_TYPE_WRCOMB, 1);
+                       dev_priv->mtrr[2].base = fb_base+0x04000000;
+                       dev_priv->mtrr[2].size = 0x04000000;
+                       dev_priv->mtrr[2].handle = mtrr_add(
+                               dev_priv->mtrr[2].base, dev_priv->mtrr[2].size,
+                               MTRR_TYPE_WRCOMB, 1);
+               } else {
+                       DRM_ERROR("strange pci_resource_len %08lx\n",
+                                 drm_get_resource_len(dev, 0));
+               }
+       } else if (chipset != S3_SUPERSAVAGE && chipset != S3_SAVAGE2000) {
+               mmio_base = drm_get_resource_start(dev, 0);
+               fb_rsrc = 1;
+               fb_base = drm_get_resource_start(dev, 1);
+               fb_size = SAVAGE_FB_SIZE_S4;
+               aper_rsrc = 1;
+               aperture_base = fb_base + SAVAGE_APERTURE_OFFSET;
+               /* this should always be true */
+               if (drm_get_resource_len(dev, 1) == 0x08000000) {
+                       /* Can use one MTRR to cover both fb and
+                        * aperture. */
+                       dev_priv->mtrr[0].base = fb_base;
+                       dev_priv->mtrr[0].size = 0x08000000;
+                       dev_priv->mtrr[0].handle = mtrr_add(
+                               dev_priv->mtrr[0].base, dev_priv->mtrr[0].size,
+                               MTRR_TYPE_WRCOMB, 1);
+               } else {
+                       DRM_ERROR("strange pci_resource_len %08lx\n",
+                                 drm_get_resource_len(dev, 1));
+               }
+       } else {
+               mmio_base = drm_get_resource_start(dev, 0);
+               fb_rsrc = 1;
+               fb_base = drm_get_resource_start(dev, 1);
+               fb_size = drm_get_resource_len(dev, 1);
+               aper_rsrc = 2;
+               aperture_base = drm_get_resource_start(dev, 2);
+               /* Automatic MTRR setup will do the right thing. */
+       }
+
+       ret = drm_addmap(dev, mmio_base, SAVAGE_MMIO_SIZE, _DRM_REGISTERS,
+                        _DRM_READ_ONLY, &dev_priv->mmio);
+       if (ret)
+               return ret;
+
+       ret = drm_addmap(dev, fb_base, fb_size, _DRM_FRAME_BUFFER,
+                        _DRM_WRITE_COMBINING, &dev_priv->fb);
+       if (ret)
+               return ret;
+
+       ret = drm_addmap(dev, aperture_base, SAVAGE_APERTURE_SIZE,
+                        _DRM_FRAME_BUFFER, _DRM_WRITE_COMBINING,
+                        &dev_priv->aperture);
+       if (ret)
+               return ret;
+
+       return ret;
+}
+
+/*
+ * Delete MTRRs and free device-private data.
+ */
+int savage_postcleanup(drm_device_t *dev)
+{
+       drm_savage_private_t *dev_priv = dev->dev_private;
+       int i;
+
+       for (i = 0; i < 3; ++i)
+               if (dev_priv->mtrr[i].handle >= 0)
+                       mtrr_del(dev_priv->mtrr[i].handle,
+                                dev_priv->mtrr[i].base,
+                                dev_priv->mtrr[i].size);
+
+       drm_free(dev_priv, sizeof(drm_savage_private_t), DRM_MEM_DRIVER);
+
+       return 0;
+}
+
+static int savage_do_init_bci(drm_device_t *dev, drm_savage_init_t *init)
+{
+       drm_savage_private_t *dev_priv = dev->dev_private;
+
+       if (init->fb_bpp != 16 && init->fb_bpp != 32) {
+               DRM_ERROR("invalid frame buffer bpp %d!\n", init->fb_bpp);
+               return DRM_ERR(EINVAL);
+       }
+       if (init->depth_bpp != 16 && init->depth_bpp != 32) {
+               DRM_ERROR("invalid depth buffer bpp %d!\n", init->fb_bpp);
+               return DRM_ERR(EINVAL);
+       }
+       if (init->dma_type != SAVAGE_DMA_AGP &&
+           init->dma_type != SAVAGE_DMA_PCI) {
+               DRM_ERROR("invalid dma memory type %d!\n", init->dma_type);
+               return DRM_ERR(EINVAL);
+       }
+
+       dev_priv->cob_size = init->cob_size;
+       dev_priv->bci_threshold_lo = init->bci_threshold_lo;
+       dev_priv->bci_threshold_hi = init->bci_threshold_hi;
+       dev_priv->dma_type = init->dma_type;
+
+       dev_priv->fb_bpp = init->fb_bpp;
+       dev_priv->front_offset = init->front_offset;
+       dev_priv->front_pitch = init->front_pitch;
+       dev_priv->back_offset = init->back_offset;
+       dev_priv->back_pitch = init->back_pitch;
+       dev_priv->depth_bpp = init->depth_bpp;
+       dev_priv->depth_offset = init->depth_offset;
+       dev_priv->depth_pitch = init->depth_pitch;
+
+       dev_priv->texture_offset = init->texture_offset;
+       dev_priv->texture_size = init->texture_size;
+
+       DRM_GETSAREA();
+       if (!dev_priv->sarea) {
+               DRM_ERROR("could not find sarea!\n");
+               savage_do_cleanup_bci(dev);
+               return DRM_ERR(EINVAL);
+       }
+       if (init->status_offset != 0) {
+               dev_priv->status = drm_core_findmap(dev, init->status_offset);
+               if (!dev_priv->status) {
+                       DRM_ERROR("could not find shadow status region!\n");
+                       savage_do_cleanup_bci(dev);
+                       return DRM_ERR(EINVAL);
+               }
+       } else {
+               dev_priv->status = NULL;
+       }
+       if (dev_priv->dma_type == SAVAGE_DMA_AGP && init->buffers_offset) {
+               dev->agp_buffer_map = drm_core_findmap(dev,
+                                                      init->buffers_offset);
+               if (!dev->agp_buffer_map) {
+                       DRM_ERROR("could not find DMA buffer region!\n");
+                       savage_do_cleanup_bci(dev);
+                       return DRM_ERR(EINVAL);
+               }
+               drm_core_ioremap(dev->agp_buffer_map, dev);
+               if (!dev->agp_buffer_map) {
+                       DRM_ERROR("failed to ioremap DMA buffer region!\n");
+                       savage_do_cleanup_bci(dev);
+                       return DRM_ERR(ENOMEM);
+               }
+       }
+       if (init->agp_textures_offset) {
+               dev_priv->agp_textures =
+                       drm_core_findmap(dev, init->agp_textures_offset);
+               if (!dev_priv->agp_textures) {
+                       DRM_ERROR("could not find agp texture region!\n");
+                       savage_do_cleanup_bci(dev);
+                       return DRM_ERR(EINVAL);
+               }
+       } else {
+               dev_priv->agp_textures = NULL;
+       }
+
+       if (init->cmd_dma_offset) {
+               if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
+                       DRM_ERROR("command DMA not supported on "
+                                 "Savage3D/MX/IX.\n");
+                       savage_do_cleanup_bci(dev);
+                       return DRM_ERR(EINVAL);
+               }
+               if (dev->dma && dev->dma->buflist) {
+                       DRM_ERROR("command and vertex DMA not supported "
+                                 "at the same time.\n");
+                       savage_do_cleanup_bci(dev);
+                       return DRM_ERR(EINVAL);
+               }
+               dev_priv->cmd_dma = drm_core_findmap(dev, init->cmd_dma_offset);
+               if (!dev_priv->cmd_dma) {
+                       DRM_ERROR("could not find command DMA region!\n");
+                       savage_do_cleanup_bci(dev);
+                       return DRM_ERR(EINVAL);
+               }
+               if (dev_priv->dma_type == SAVAGE_DMA_AGP) {
+                       if (dev_priv->cmd_dma->type != _DRM_AGP) {
+                               DRM_ERROR("AGP command DMA region is not a "
+                                         "_DRM_AGP map!\n");
+                               savage_do_cleanup_bci(dev);
+                               return DRM_ERR(EINVAL);
+                       }
+                       drm_core_ioremap(dev_priv->cmd_dma, dev);
+                       if (!dev_priv->cmd_dma->handle) {
+                               DRM_ERROR("failed to ioremap command "
+                                         "DMA region!\n");
+                               savage_do_cleanup_bci(dev);
+                               return DRM_ERR(ENOMEM);
+                       }
+               } else if (dev_priv->cmd_dma->type != _DRM_CONSISTENT) {
+                       DRM_ERROR("PCI command DMA region is not a "
+                                 "_DRM_CONSISTENT map!\n");
+                       savage_do_cleanup_bci(dev);
+                       return DRM_ERR(EINVAL);
+               }
+       } else {
+               dev_priv->cmd_dma = NULL;
+       }
+
+       dev_priv->dma_flush = savage_dma_flush;
+       if (!dev_priv->cmd_dma) {
+               DRM_DEBUG("falling back to faked command DMA.\n");
+               dev_priv->fake_dma.offset = 0;
+               dev_priv->fake_dma.size = SAVAGE_FAKE_DMA_SIZE;
+               dev_priv->fake_dma.type = _DRM_SHM;
+               dev_priv->fake_dma.handle = drm_alloc(SAVAGE_FAKE_DMA_SIZE,
+                                                     DRM_MEM_DRIVER);
+               if (!dev_priv->fake_dma.handle) {
+                       DRM_ERROR("could not allocate faked DMA buffer!\n");
+                       savage_do_cleanup_bci(dev);
+                       return DRM_ERR(ENOMEM);
+               }
+               dev_priv->cmd_dma = &dev_priv->fake_dma;
+               dev_priv->dma_flush = savage_fake_dma_flush;
+       }
+
+       dev_priv->sarea_priv =
+               (drm_savage_sarea_t *)((uint8_t *)dev_priv->sarea->handle +
+                                      init->sarea_priv_offset);
+
+       /* setup bitmap descriptors */
+       {
+               unsigned int color_tile_format;
+               unsigned int depth_tile_format;
+               unsigned int front_stride, back_stride, depth_stride;
+               if (dev_priv->chipset <= S3_SAVAGE4) {
+                       color_tile_format = dev_priv->fb_bpp == 16 ?
+                               SAVAGE_BD_TILE_16BPP : SAVAGE_BD_TILE_32BPP;
+                       depth_tile_format = dev_priv->depth_bpp == 16 ?
+                               SAVAGE_BD_TILE_16BPP : SAVAGE_BD_TILE_32BPP;
+               } else {
+                       color_tile_format = SAVAGE_BD_TILE_DEST;
+                       depth_tile_format = SAVAGE_BD_TILE_DEST;
+               }
+               front_stride = dev_priv->front_pitch / (dev_priv->fb_bpp/8);
+               back_stride  = dev_priv-> back_pitch / (dev_priv->fb_bpp/8);
+               depth_stride = dev_priv->depth_pitch / (dev_priv->depth_bpp/8);
+
+               dev_priv->front_bd = front_stride | SAVAGE_BD_BW_DISABLE |
+                       (dev_priv->fb_bpp << SAVAGE_BD_BPP_SHIFT) |
+                       (color_tile_format << SAVAGE_BD_TILE_SHIFT);
+
+               dev_priv-> back_bd =  back_stride | SAVAGE_BD_BW_DISABLE |
+                       (dev_priv->fb_bpp << SAVAGE_BD_BPP_SHIFT) |
+                       (color_tile_format << SAVAGE_BD_TILE_SHIFT);
+
+               dev_priv->depth_bd = depth_stride | SAVAGE_BD_BW_DISABLE |
+                       (dev_priv->depth_bpp << SAVAGE_BD_BPP_SHIFT) |
+                       (depth_tile_format << SAVAGE_BD_TILE_SHIFT);
+       }
+
+       /* setup status and bci ptr */
+       dev_priv->event_counter = 0;
+       dev_priv->event_wrap = 0;
+       dev_priv->bci_ptr = (volatile uint32_t *)
+           ((uint8_t *)dev_priv->mmio->handle + SAVAGE_BCI_OFFSET);
+       if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
+               dev_priv->status_used_mask = SAVAGE_FIFO_USED_MASK_S3D;
+       } else {
+               dev_priv->status_used_mask = SAVAGE_FIFO_USED_MASK_S4;
+       }
+       if (dev_priv->status != NULL) {
+               dev_priv->status_ptr =
+                       (volatile uint32_t *)dev_priv->status->handle;
+               dev_priv->wait_fifo = savage_bci_wait_fifo_shadow;
+               dev_priv->wait_evnt = savage_bci_wait_event_shadow;
+               dev_priv->status_ptr[1023] = dev_priv->event_counter;
+       } else {
+               dev_priv->status_ptr = NULL;
+               if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
+                       dev_priv->wait_fifo = savage_bci_wait_fifo_s3d;
+               } else {
+                       dev_priv->wait_fifo = savage_bci_wait_fifo_s4;
+               }
+               dev_priv->wait_evnt = savage_bci_wait_event_reg;
+       }
+
+       /* cliprect functions */
+       if (S3_SAVAGE3D_SERIES(dev_priv->chipset))
+               dev_priv->emit_clip_rect = savage_emit_clip_rect_s3d;
+       else
+               dev_priv->emit_clip_rect = savage_emit_clip_rect_s4;
+
+       if (savage_freelist_init(dev) < 0) {
+               DRM_ERROR("could not initialize freelist\n");
+               savage_do_cleanup_bci(dev);
+               return DRM_ERR(ENOMEM);
+       }
+
+       if (savage_dma_init(dev_priv) <  0) {
+               DRM_ERROR("could not initialize command DMA\n");
+               savage_do_cleanup_bci(dev);
+               return DRM_ERR(ENOMEM);
+       }
+
+       return 0;
+}
+
+int savage_do_cleanup_bci(drm_device_t *dev)
+{
+       drm_savage_private_t *dev_priv = dev->dev_private;
+
+       if (dev_priv->cmd_dma == &dev_priv->fake_dma) {
+               if (dev_priv->fake_dma.handle)
+                       drm_free(dev_priv->fake_dma.handle,
+                                SAVAGE_FAKE_DMA_SIZE, DRM_MEM_DRIVER);
+       } else if (dev_priv->cmd_dma && dev_priv->cmd_dma->handle &&
+                  dev_priv->cmd_dma->type == _DRM_AGP &&
+                  dev_priv->dma_type == SAVAGE_DMA_AGP)
+               drm_core_ioremapfree(dev_priv->cmd_dma, dev);
+
+       if (dev_priv->dma_type == SAVAGE_DMA_AGP &&
+           dev->agp_buffer_map && dev->agp_buffer_map->handle) {
+               drm_core_ioremapfree(dev->agp_buffer_map, dev);
+               /* make sure the next instance (which may be running
+                * in PCI mode) doesn't try to use an old
+                * agp_buffer_map. */
+               dev->agp_buffer_map = NULL;
+       }
+
+       if (dev_priv->dma_pages)
+               drm_free(dev_priv->dma_pages,
+                        sizeof(drm_savage_dma_page_t)*dev_priv->nr_dma_pages,
+                        DRM_MEM_DRIVER);
+
+       return 0;
+}
+
+static int savage_bci_init(DRM_IOCTL_ARGS)
+{
+       DRM_DEVICE;
+       drm_savage_init_t init;
+
+       LOCK_TEST_WITH_RETURN(dev, filp);
+
+       DRM_COPY_FROM_USER_IOCTL(init, (drm_savage_init_t __user *)data,
+                                sizeof(init));
+
+       switch (init.func) {
+       case SAVAGE_INIT_BCI:
+               return savage_do_init_bci(dev, &init);
+       case SAVAGE_CLEANUP_BCI:
+               return savage_do_cleanup_bci(dev);
+       }
+
+       return DRM_ERR(EINVAL);
+}
+
+static int savage_bci_event_emit(DRM_IOCTL_ARGS)
+{
+       DRM_DEVICE;
+       drm_savage_private_t *dev_priv = dev->dev_private;
+       drm_savage_event_emit_t event;
+
+       DRM_DEBUG("\n");
+
+       LOCK_TEST_WITH_RETURN(dev, filp);
+
+       DRM_COPY_FROM_USER_IOCTL(event, (drm_savage_event_emit_t __user *)data,
+                                sizeof(event));
+
+       event.count = savage_bci_emit_event(dev_priv, event.flags);
+       event.count |= dev_priv->event_wrap << 16;
+       DRM_COPY_TO_USER_IOCTL(&((drm_savage_event_emit_t __user *)data)->count,
+                              event.count, sizeof(event.count));
+       return 0;
+}
+
+static int savage_bci_event_wait(DRM_IOCTL_ARGS)
+{
+       DRM_DEVICE;
+       drm_savage_private_t *dev_priv = dev->dev_private;
+       drm_savage_event_wait_t event;
+       unsigned int event_e, hw_e;
+       unsigned int event_w, hw_w;
+
+       DRM_DEBUG("\n");
+
+       DRM_COPY_FROM_USER_IOCTL(event, (drm_savage_event_wait_t __user *)data,
+                                sizeof(event));
+
+       UPDATE_EVENT_COUNTER();
+       if (dev_priv->status_ptr)
+               hw_e = dev_priv->status_ptr[1] & 0xffff;
+       else
+               hw_e = SAVAGE_READ(SAVAGE_STATUS_WORD1) & 0xffff;
+       hw_w = dev_priv->event_wrap;
+       if (hw_e > dev_priv->event_counter)
+               hw_w--; /* hardware hasn't passed the last wrap yet */
+
+       event_e = event.count & 0xffff;
+       event_w = event.count >> 16;
+
+       /* Don't need to wait if
+        * - event counter wrapped since the event was emitted or
+        * - the hardware has advanced up to or over the event to wait for.
+        */
+       if (event_w < hw_w || (event_w == hw_w && event_e <= hw_e) )
+               return 0;
+       else
+               return dev_priv->wait_evnt(dev_priv, event_e);
+}
+
+/*
+ * DMA buffer management
+ */
+
+static int savage_bci_get_buffers(DRMFILE filp, drm_device_t *dev, drm_dma_t *d)
+{
+       drm_buf_t *buf;
+       int i;
+
+       for (i = d->granted_count; i < d->request_count; i++) {
+               buf = savage_freelist_get(dev);
+               if (!buf)
+                       return DRM_ERR(EAGAIN);
+
+               buf->filp = filp;
+
+               if (DRM_COPY_TO_USER(&d->request_indices[i],
+                                    &buf->idx, sizeof(buf->idx)))
+                       return DRM_ERR(EFAULT);
+               if (DRM_COPY_TO_USER(&d->request_sizes[i],
+                                    &buf->total, sizeof(buf->total)))
+                       return DRM_ERR(EFAULT);
+
+               d->granted_count++;
+       }
+       return 0;
+}
+
+int savage_bci_buffers(DRM_IOCTL_ARGS)
+{
+       DRM_DEVICE;
+       drm_device_dma_t *dma = dev->dma;
+       drm_dma_t d;
+       int ret = 0;
+
+       LOCK_TEST_WITH_RETURN(dev, filp);
+
+       DRM_COPY_FROM_USER_IOCTL(d, (drm_dma_t __user *)data, sizeof(d));
+
+       /* Please don't send us buffers.
+        */
+       if (d.send_count != 0) {
+               DRM_ERROR("Process %d trying to send %d buffers via drmDMA\n",
+                         DRM_CURRENTPID, d.send_count);
+               return DRM_ERR(EINVAL);
+       }
+
+       /* We'll send you buffers.
+        */
+       if (d.request_count < 0 || d.request_count > dma->buf_count) {
+               DRM_ERROR("Process %d trying to get %d buffers (of %d max)\n",
+                         DRM_CURRENTPID, d.request_count, dma->buf_count);
+               return DRM_ERR(EINVAL);
+       }
+
+       d.granted_count = 0;
+
+       if (d.request_count) {
+               ret = savage_bci_get_buffers(filp, dev, &d);
+       }
+
+       DRM_COPY_TO_USER_IOCTL((drm_dma_t __user *)data, d, sizeof(d));
+
+       return ret;
+}
+
+void savage_reclaim_buffers(drm_device_t *dev, DRMFILE filp) {
+       drm_device_dma_t *dma = dev->dma;
+       drm_savage_private_t *dev_priv = dev->dev_private;
+       int i;
+
+       if (!dma)
+               return;
+       if (!dev_priv)
+               return;
+       if (!dma->buflist)
+               return;
+
+       /*i830_flush_queue(dev);*/
+
+       for (i = 0; i < dma->buf_count; i++) {
+               drm_buf_t *buf = dma->buflist[i];
+               drm_savage_buf_priv_t *buf_priv = buf->dev_private;
+
+               if (buf->filp == filp && buf_priv &&
+                   buf_priv->next == NULL && buf_priv->prev == NULL) {
+                       uint16_t event;
+                       DRM_DEBUG("reclaimed from client\n");
+                       event = savage_bci_emit_event(dev_priv, SAVAGE_WAIT_3D);
+                       SET_AGE(&buf_priv->age, event, dev_priv->event_wrap);
+                       savage_freelist_put(dev, buf);
+               }
+       }
+
+       drm_core_reclaim_buffers(dev, filp);
+}
+
+
+drm_ioctl_desc_t savage_ioctls[] = {
+       [DRM_IOCTL_NR(DRM_SAVAGE_BCI_INIT)] = {savage_bci_init, 1, 1},
+       [DRM_IOCTL_NR(DRM_SAVAGE_BCI_CMDBUF)] = {savage_bci_cmdbuf, 1, 0},
+       [DRM_IOCTL_NR(DRM_SAVAGE_BCI_EVENT_EMIT)] = {savage_bci_event_emit, 1, 0},
+       [DRM_IOCTL_NR(DRM_SAVAGE_BCI_EVENT_WAIT)] = {savage_bci_event_wait, 1, 0},
+};
+
+int savage_max_ioctl = DRM_ARRAY_SIZE(savage_ioctls);
diff --git a/drivers/char/drm/savage_drm.h b/drivers/char/drm/savage_drm.h
new file mode 100644 (file)
index 0000000..6526c9a
--- /dev/null
@@ -0,0 +1,209 @@
+/* savage_drm.h -- Public header for the savage driver
+ *
+ * Copyright 2004  Felix Kuehling
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL FELIX KUEHLING BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __SAVAGE_DRM_H__
+#define __SAVAGE_DRM_H__
+
+#ifndef __SAVAGE_SAREA_DEFINES__
+#define __SAVAGE_SAREA_DEFINES__
+
+/* 2 heaps (1 for card, 1 for agp), each divided into upto 128
+ * regions, subject to a minimum region size of (1<<16) == 64k.
+ *
+ * Clients may subdivide regions internally, but when sharing between
+ * clients, the region size is the minimum granularity.
+ */
+
+#define SAVAGE_CARD_HEAP               0
+#define SAVAGE_AGP_HEAP                        1
+#define SAVAGE_NR_TEX_HEAPS            2
+#define SAVAGE_NR_TEX_REGIONS          16
+#define SAVAGE_LOG_MIN_TEX_REGION_SIZE 16
+
+#endif /* __SAVAGE_SAREA_DEFINES__ */
+
+typedef struct _drm_savage_sarea {
+       /* LRU lists for texture memory in agp space and on the card.
+        */
+       drm_tex_region_t texList[SAVAGE_NR_TEX_HEAPS][SAVAGE_NR_TEX_REGIONS+1];
+       unsigned int texAge[SAVAGE_NR_TEX_HEAPS];
+
+       /* Mechanism to validate card state.
+        */
+       int ctxOwner;
+} drm_savage_sarea_t, *drm_savage_sarea_ptr;
+
+/* Savage-specific ioctls
+ */
+#define DRM_SAVAGE_BCI_INIT            0x00
+#define DRM_SAVAGE_BCI_CMDBUF           0x01
+#define DRM_SAVAGE_BCI_EVENT_EMIT      0x02
+#define DRM_SAVAGE_BCI_EVENT_WAIT      0x03
+
+#define DRM_IOCTL_SAVAGE_INIT          DRM_IOW( DRM_COMMAND_BASE + DRM_SAVAGE_BCI_INIT, drm_savage_init_t)
+#define DRM_IOCTL_SAVAGE_CMDBUF                DRM_IOW( DRM_COMMAND_BASE + DRM_SAVAGE_BCI_CMDBUF, drm_savage_cmdbuf_t)
+#define DRM_IOCTL_SAVAGE_EVENT_EMIT    DRM_IOWR(DRM_COMMAND_BASE + DRM_SAVAGE_BCI_EVENT_EMIT, drm_savage_event_emit_t)
+#define DRM_IOCTL_SAVAGE_EVENT_WAIT    DRM_IOW( DRM_COMMAND_BASE + DRM_SAVAGE_BCI_EVENT_WAIT, drm_savage_event_wait_t)
+
+#define SAVAGE_DMA_PCI 1
+#define SAVAGE_DMA_AGP 3
+typedef struct drm_savage_init {
+       enum {
+               SAVAGE_INIT_BCI = 1,
+               SAVAGE_CLEANUP_BCI = 2
+       } func;
+       unsigned int sarea_priv_offset;
+
+       /* some parameters */
+       unsigned int cob_size;
+       unsigned int bci_threshold_lo, bci_threshold_hi;
+       unsigned int dma_type;
+
+       /* frame buffer layout */
+       unsigned int fb_bpp;
+       unsigned int front_offset, front_pitch;
+       unsigned int back_offset, back_pitch;
+       unsigned int depth_bpp;
+       unsigned int depth_offset, depth_pitch;
+
+       /* local textures */
+       unsigned int texture_offset;
+       unsigned int texture_size;
+
+       /* physical locations of non-permanent maps */
+       unsigned long status_offset;
+       unsigned long buffers_offset;
+       unsigned long agp_textures_offset;
+       unsigned long cmd_dma_offset;
+} drm_savage_init_t;
+
+typedef union drm_savage_cmd_header drm_savage_cmd_header_t;
+typedef struct drm_savage_cmdbuf {
+                               /* command buffer in client's address space */
+       drm_savage_cmd_header_t __user *cmd_addr;
+       unsigned int size;      /* size of the command buffer in 64bit units */
+
+       unsigned int dma_idx;   /* DMA buffer index to use */
+       int discard;            /* discard DMA buffer when done */
+                               /* vertex buffer in client's address space */
+       unsigned int __user *vb_addr;
+       unsigned int vb_size;   /* size of client vertex buffer in bytes */
+       unsigned int vb_stride; /* stride of vertices in 32bit words */
+                               /* boxes in client's address space */
+       drm_clip_rect_t __user *box_addr;
+       unsigned int nbox;      /* number of clipping boxes */
+} drm_savage_cmdbuf_t;
+
+#define SAVAGE_WAIT_2D  0x1 /* wait for 2D idle before updating event tag */
+#define SAVAGE_WAIT_3D  0x2 /* wait for 3D idle before updating event tag */
+#define SAVAGE_WAIT_IRQ 0x4 /* emit or wait for IRQ, not implemented yet */
+typedef struct drm_savage_event {
+       unsigned int count;
+       unsigned int flags;
+} drm_savage_event_emit_t, drm_savage_event_wait_t;
+
+/* Commands for the cmdbuf ioctl
+ */
+#define SAVAGE_CMD_STATE       0  /* a range of state registers */
+#define SAVAGE_CMD_DMA_PRIM    1  /* vertices from DMA buffer */
+#define SAVAGE_CMD_VB_PRIM     2  /* vertices from client vertex buffer */
+#define SAVAGE_CMD_DMA_IDX     3  /* indexed vertices from DMA buffer */
+#define SAVAGE_CMD_VB_IDX      4  /* indexed vertices client vertex buffer */
+#define SAVAGE_CMD_CLEAR       5  /* clear buffers */
+#define SAVAGE_CMD_SWAP                6  /* swap buffers */
+
+/* Primitive types
+*/
+#define SAVAGE_PRIM_TRILIST    0  /* triangle list */
+#define SAVAGE_PRIM_TRISTRIP   1  /* triangle strip */
+#define SAVAGE_PRIM_TRIFAN     2  /* triangle fan */
+#define SAVAGE_PRIM_TRILIST_201        3  /* reorder verts for correct flat
+                                   * shading on s3d */
+
+/* Skip flags (vertex format)
+ */
+#define SAVAGE_SKIP_Z          0x01
+#define SAVAGE_SKIP_W          0x02
+#define SAVAGE_SKIP_C0         0x04
+#define SAVAGE_SKIP_C1         0x08
+#define SAVAGE_SKIP_S0         0x10
+#define SAVAGE_SKIP_T0         0x20
+#define SAVAGE_SKIP_ST0                0x30
+#define SAVAGE_SKIP_S1         0x40
+#define SAVAGE_SKIP_T1         0x80
+#define SAVAGE_SKIP_ST1                0xc0
+#define SAVAGE_SKIP_ALL_S3D    0x3f
+#define SAVAGE_SKIP_ALL_S4     0xff
+
+/* Buffer names for clear command
+ */
+#define SAVAGE_FRONT           0x1
+#define SAVAGE_BACK            0x2
+#define SAVAGE_DEPTH           0x4
+
+/* 64-bit command header
+ */
+union drm_savage_cmd_header {
+       struct {
+               unsigned char cmd;      /* command */
+               unsigned char pad0;
+               unsigned short pad1;
+               unsigned short pad2;
+               unsigned short pad3;
+       } cmd; /* generic */
+       struct {
+               unsigned char cmd;
+               unsigned char global;   /* need idle engine? */
+               unsigned short count;   /* number of consecutive registers */
+               unsigned short start;   /* first register */
+               unsigned short pad3;
+       } state; /* SAVAGE_CMD_STATE */
+       struct {
+               unsigned char cmd;
+               unsigned char prim;     /* primitive type */
+               unsigned short skip;    /* vertex format (skip flags) */
+               unsigned short count;   /* number of vertices */
+               unsigned short start;   /* first vertex in DMA/vertex buffer */
+       } prim; /* SAVAGE_CMD_DMA_PRIM, SAVAGE_CMD_VB_PRIM */
+       struct {
+               unsigned char cmd;
+               unsigned char prim;
+               unsigned short skip;
+               unsigned short count;   /* number of indices that follow */
+               unsigned short pad3;
+       } idx; /* SAVAGE_CMD_DMA_IDX, SAVAGE_CMD_VB_IDX */
+       struct {
+               unsigned char cmd;
+               unsigned char pad0;
+               unsigned short pad1;
+               unsigned int flags;
+       } clear0; /* SAVAGE_CMD_CLEAR */
+       struct {
+               unsigned int mask;
+               unsigned int value;
+       } clear1; /* SAVAGE_CMD_CLEAR data */
+};
+
+#endif
diff --git a/drivers/char/drm/savage_drv.c b/drivers/char/drm/savage_drv.c
new file mode 100644 (file)
index 0000000..ac8d270
--- /dev/null
@@ -0,0 +1,112 @@
+/* savage_drv.c -- Savage driver for Linux
+ *
+ * Copyright 2004  Felix Kuehling
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL FELIX KUEHLING BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/config.h>
+#include "drmP.h"
+#include "savage_drm.h"
+#include "savage_drv.h"
+
+#include "drm_pciids.h"
+
+static int postinit( struct drm_device *dev, unsigned long flags )
+{
+       DRM_INFO( "Initialized %s %d.%d.%d %s on minor %d: %s\n",
+               DRIVER_NAME,
+               DRIVER_MAJOR,
+               DRIVER_MINOR,
+               DRIVER_PATCHLEVEL,
+               DRIVER_DATE,
+               dev->primary.minor,
+               pci_pretty_name(dev->pdev)
+               );
+       return 0;
+}
+
+static int version( drm_version_t *version )
+{
+       int len;
+
+       version->version_major = DRIVER_MAJOR;
+       version->version_minor = DRIVER_MINOR;
+       version->version_patchlevel = DRIVER_PATCHLEVEL;
+       DRM_COPY( version->name, DRIVER_NAME );
+       DRM_COPY( version->date, DRIVER_DATE );
+       DRM_COPY( version->desc, DRIVER_DESC );
+       return 0;
+}
+
+static struct pci_device_id pciidlist[] = {
+       savage_PCI_IDS
+};
+
+extern drm_ioctl_desc_t savage_ioctls[];
+extern int savage_max_ioctl;
+
+static struct drm_driver driver = {
+       .driver_features =
+           DRIVER_USE_AGP | DRIVER_USE_MTRR |
+           DRIVER_HAVE_DMA | DRIVER_PCI_DMA,
+       .dev_priv_size = sizeof(drm_savage_buf_priv_t),
+       .preinit = savage_preinit,
+       .postinit = postinit,
+       .postcleanup = savage_postcleanup,
+       .reclaim_buffers = savage_reclaim_buffers,
+       .get_map_ofs = drm_core_get_map_ofs,
+       .get_reg_ofs = drm_core_get_reg_ofs,
+       .version = version,
+       .ioctls = savage_ioctls,
+       .dma_ioctl = savage_bci_buffers,
+       .fops = {
+               .owner   = THIS_MODULE,
+               .open    = drm_open,
+               .release = drm_release,
+               .ioctl   = drm_ioctl,
+               .mmap    = drm_mmap,
+               .poll = drm_poll,
+               .fasync  = drm_fasync,
+       },
+       .pci_driver = {
+               .name          = DRIVER_NAME,
+               .id_table      = pciidlist,
+       }
+};
+
+static int __init savage_init(void)
+{
+       driver.num_ioctls = savage_max_ioctl;
+       return drm_init(&driver);
+}
+
+static void __exit savage_exit(void)
+{
+       drm_exit(&driver);
+}
+
+module_init(savage_init);
+module_exit(savage_exit);
+
+MODULE_AUTHOR( DRIVER_AUTHOR );
+MODULE_DESCRIPTION( DRIVER_DESC );
+MODULE_LICENSE("GPL and additional rights");
diff --git a/drivers/char/drm/savage_drv.h b/drivers/char/drm/savage_drv.h
new file mode 100644 (file)
index 0000000..a454349
--- /dev/null
@@ -0,0 +1,579 @@
+/* savage_drv.h -- Private header for the savage driver
+ *
+ * Copyright 2004  Felix Kuehling
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL FELIX KUEHLING BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __SAVAGE_DRV_H__
+#define __SAVAGE_DRV_H__
+
+#define DRIVER_AUTHOR  "Felix Kuehling"
+
+#define DRIVER_NAME    "savage"
+#define DRIVER_DESC    "Savage3D/MX/IX, Savage4, SuperSavage, Twister, ProSavage[DDR]"
+#define DRIVER_DATE    "20050313"
+
+#define DRIVER_MAJOR           2
+#define DRIVER_MINOR           4
+#define DRIVER_PATCHLEVEL      1
+/* Interface history:
+ *
+ * 1.x   The DRM driver from the VIA/S3 code drop, basically a dummy
+ * 2.0   The first real DRM
+ * 2.1   Scissors registers managed by the DRM, 3D operations clipped by
+ *       cliprects of the cmdbuf ioctl
+ * 2.2   Implemented SAVAGE_CMD_DMA_IDX and SAVAGE_CMD_VB_IDX
+ * 2.3   Event counters used by BCI_EVENT_EMIT/WAIT ioctls are now 32 bits
+ *       wide and thus very long lived (unlikely to ever wrap). The size
+ *       in the struct was 32 bits before, but only 16 bits were used
+ * 2.4   Implemented command DMA. Now drm_savage_init_t.cmd_dma_offset is
+ *       actually used
+ */
+
+typedef struct drm_savage_age {
+       uint16_t event;
+       unsigned int wrap;
+} drm_savage_age_t;
+
+typedef struct drm_savage_buf_priv {
+       struct drm_savage_buf_priv *next;
+       struct drm_savage_buf_priv *prev;
+       drm_savage_age_t age;
+       drm_buf_t *buf;
+} drm_savage_buf_priv_t;
+
+typedef struct drm_savage_dma_page {
+       drm_savage_age_t age;
+       unsigned int used, flushed;
+} drm_savage_dma_page_t;
+#define SAVAGE_DMA_PAGE_SIZE 1024 /* in dwords */
+/* Fake DMA buffer size in bytes. 4 pages. Allows a maximum command
+ * size of 16kbytes or 4k entries. Minimum requirement would be
+ * 10kbytes for 255 40-byte vertices in one drawing command. */
+#define SAVAGE_FAKE_DMA_SIZE (SAVAGE_DMA_PAGE_SIZE*4*4)
+
+/* interesting bits of hardware state that are saved in dev_priv */
+typedef union {
+       struct drm_savage_common_state {
+               uint32_t vbaddr;
+       } common;
+       struct {
+               unsigned char pad[sizeof(struct drm_savage_common_state)];
+               uint32_t texctrl, texaddr;
+               uint32_t scstart, new_scstart;
+               uint32_t scend, new_scend;
+       } s3d;
+       struct {
+               unsigned char pad[sizeof(struct drm_savage_common_state)];
+               uint32_t texdescr, texaddr0, texaddr1;
+               uint32_t drawctrl0, new_drawctrl0;
+               uint32_t drawctrl1, new_drawctrl1;
+       } s4;
+} drm_savage_state_t;
+
+/* these chip tags should match the ones in the 2D driver in savage_regs.h. */
+enum savage_family {
+       S3_UNKNOWN = 0,
+       S3_SAVAGE3D,
+       S3_SAVAGE_MX,
+       S3_SAVAGE4,
+       S3_PROSAVAGE,
+       S3_TWISTER,
+       S3_PROSAVAGEDDR,
+       S3_SUPERSAVAGE,
+       S3_SAVAGE2000,
+       S3_LAST
+};
+
+#define S3_SAVAGE3D_SERIES(chip)  ((chip>=S3_SAVAGE3D) && (chip<=S3_SAVAGE_MX))
+
+#define S3_SAVAGE4_SERIES(chip)  ((chip==S3_SAVAGE4)            \
+                                  || (chip==S3_PROSAVAGE)       \
+                                  || (chip==S3_TWISTER)         \
+                                  || (chip==S3_PROSAVAGEDDR))
+
+#define        S3_SAVAGE_MOBILE_SERIES(chip)   ((chip==S3_SAVAGE_MX) || (chip==S3_SUPERSAVAGE))
+
+#define S3_SAVAGE_SERIES(chip)    ((chip>=S3_SAVAGE3D) && (chip<=S3_SAVAGE2000))
+
+#define S3_MOBILE_TWISTER_SERIES(chip)   ((chip==S3_TWISTER)    \
+                                          ||(chip==S3_PROSAVAGEDDR))
+
+/* flags */
+#define SAVAGE_IS_AGP 1
+
+typedef struct drm_savage_private {
+       drm_savage_sarea_t *sarea_priv;
+
+       drm_savage_buf_priv_t head, tail;
+
+       /* who am I? */
+       enum savage_family chipset;
+
+       unsigned int cob_size;
+       unsigned int bci_threshold_lo, bci_threshold_hi;
+       unsigned int dma_type;
+
+       /* frame buffer layout */
+       unsigned int fb_bpp;
+       unsigned int front_offset, front_pitch;
+       unsigned int back_offset, back_pitch;
+       unsigned int depth_bpp;
+       unsigned int depth_offset, depth_pitch;
+
+       /* bitmap descriptors for swap and clear */
+       unsigned int front_bd, back_bd, depth_bd;
+
+       /* local textures */
+       unsigned int texture_offset;
+       unsigned int texture_size;
+
+       /* memory regions in physical memory */
+       drm_local_map_t *sarea;
+       drm_local_map_t *mmio;
+       drm_local_map_t *fb;
+       drm_local_map_t *aperture;
+       drm_local_map_t *status;
+       drm_local_map_t *agp_textures;
+       drm_local_map_t *cmd_dma;
+       drm_local_map_t fake_dma;
+
+       struct {
+               int handle;
+               unsigned long base, size;
+       } mtrr[3];
+
+       /* BCI and status-related stuff */
+       volatile uint32_t *status_ptr, *bci_ptr;
+       uint32_t status_used_mask;
+       uint16_t event_counter;
+       unsigned int event_wrap;
+
+       /* Savage4 command DMA */
+       drm_savage_dma_page_t *dma_pages;
+       unsigned int nr_dma_pages, first_dma_page, current_dma_page;
+       drm_savage_age_t last_dma_age;
+
+       /* saved hw state for global/local check on S3D */
+       uint32_t hw_draw_ctrl, hw_zbuf_ctrl;
+       /* and for scissors (global, so don't emit if not changed) */
+       uint32_t hw_scissors_start, hw_scissors_end;
+
+       drm_savage_state_t state;
+
+       /* after emitting a wait cmd Savage3D needs 63 nops before next DMA */
+       unsigned int waiting;
+
+       /* config/hardware-dependent function pointers */
+       int (*wait_fifo)(struct drm_savage_private *dev_priv, unsigned int n);
+       int (*wait_evnt)(struct drm_savage_private *dev_priv, uint16_t e);
+       /* Err, there is a macro wait_event in include/linux/wait.h.
+        * Avoid unwanted macro expansion. */
+       void (*emit_clip_rect)(struct drm_savage_private *dev_priv,
+                              drm_clip_rect_t *pbox);
+       void (*dma_flush)(struct drm_savage_private *dev_priv);
+} drm_savage_private_t;
+
+/* ioctls */
+extern int savage_bci_cmdbuf(DRM_IOCTL_ARGS);
+extern int savage_bci_buffers(DRM_IOCTL_ARGS);
+
+/* BCI functions */
+extern uint16_t savage_bci_emit_event(drm_savage_private_t *dev_priv,
+                                     unsigned int flags);
+extern void savage_freelist_put(drm_device_t *dev, drm_buf_t *buf);
+extern void savage_dma_reset(drm_savage_private_t *dev_priv);
+extern void savage_dma_wait(drm_savage_private_t *dev_priv, unsigned int page);
+extern uint32_t *savage_dma_alloc(drm_savage_private_t *dev_priv,
+                                 unsigned int n);
+extern int savage_preinit(drm_device_t *dev, unsigned long chipset);
+extern int savage_postcleanup(drm_device_t *dev);
+extern int savage_do_cleanup_bci(drm_device_t *dev);
+extern void savage_reclaim_buffers(drm_device_t *dev, DRMFILE filp);
+
+/* state functions */
+extern void savage_emit_clip_rect_s3d(drm_savage_private_t *dev_priv,
+                                     drm_clip_rect_t *pbox);
+extern void savage_emit_clip_rect_s4(drm_savage_private_t *dev_priv,
+                                    drm_clip_rect_t *pbox);
+
+#define SAVAGE_FB_SIZE_S3      0x01000000      /*  16MB */
+#define SAVAGE_FB_SIZE_S4      0x02000000      /*  32MB */
+#define SAVAGE_MMIO_SIZE        0x00080000     /* 512kB */
+#define SAVAGE_APERTURE_OFFSET  0x02000000     /*  32MB */
+#define SAVAGE_APERTURE_SIZE    0x05000000     /* 5 tiled surfaces, 16MB each */
+
+#define SAVAGE_BCI_OFFSET       0x00010000      /* offset of the BCI region
+                                                * inside the MMIO region */
+#define SAVAGE_BCI_FIFO_SIZE   32              /* number of entries in on-chip
+                                                * BCI FIFO */
+
+/*
+ * MMIO registers
+ */
+#define SAVAGE_STATUS_WORD0            0x48C00
+#define SAVAGE_STATUS_WORD1            0x48C04
+#define SAVAGE_ALT_STATUS_WORD0        0x48C60
+
+#define SAVAGE_FIFO_USED_MASK_S3D      0x0001ffff
+#define SAVAGE_FIFO_USED_MASK_S4       0x001fffff
+
+/* Copied from savage_bci.h in the 2D driver with some renaming. */
+
+/* Bitmap descriptors */
+#define SAVAGE_BD_STRIDE_SHIFT 0
+#define SAVAGE_BD_BPP_SHIFT   16
+#define SAVAGE_BD_TILE_SHIFT  24
+#define SAVAGE_BD_BW_DISABLE  (1<<28)
+/* common: */
+#define        SAVAGE_BD_TILE_LINEAR           0
+/* savage4, MX, IX, 3D */
+#define        SAVAGE_BD_TILE_16BPP            2
+#define        SAVAGE_BD_TILE_32BPP            3
+/* twister, prosavage, DDR, supersavage, 2000 */
+#define        SAVAGE_BD_TILE_DEST             1
+#define        SAVAGE_BD_TILE_TEXTURE          2
+/* GBD - BCI enable */
+/* savage4, MX, IX, 3D */
+#define SAVAGE_GBD_BCI_ENABLE                    8
+/* twister, prosavage, DDR, supersavage, 2000 */
+#define SAVAGE_GBD_BCI_ENABLE_TWISTER            0
+
+#define SAVAGE_GBD_BIG_ENDIAN                    4
+#define SAVAGE_GBD_LITTLE_ENDIAN                 0
+#define SAVAGE_GBD_64                            1
+
+/*  Global Bitmap Descriptor */
+#define SAVAGE_BCI_GLB_BD_LOW             0x8168
+#define SAVAGE_BCI_GLB_BD_HIGH            0x816C
+
+/*
+ * BCI registers
+ */
+/* Savage4/Twister/ProSavage 3D registers */
+#define SAVAGE_DRAWLOCALCTRL_S4                0x1e
+#define SAVAGE_TEXPALADDR_S4           0x1f
+#define SAVAGE_TEXCTRL0_S4             0x20
+#define SAVAGE_TEXCTRL1_S4             0x21
+#define SAVAGE_TEXADDR0_S4             0x22
+#define SAVAGE_TEXADDR1_S4             0x23
+#define SAVAGE_TEXBLEND0_S4            0x24
+#define SAVAGE_TEXBLEND1_S4            0x25
+#define SAVAGE_TEXXPRCLR_S4            0x26 /* never used */
+#define SAVAGE_TEXDESCR_S4             0x27
+#define SAVAGE_FOGTABLE_S4             0x28
+#define SAVAGE_FOGCTRL_S4              0x30
+#define SAVAGE_STENCILCTRL_S4          0x31
+#define SAVAGE_ZBUFCTRL_S4             0x32
+#define SAVAGE_ZBUFOFF_S4              0x33
+#define SAVAGE_DESTCTRL_S4             0x34
+#define SAVAGE_DRAWCTRL0_S4            0x35
+#define SAVAGE_DRAWCTRL1_S4            0x36
+#define SAVAGE_ZWATERMARK_S4           0x37
+#define SAVAGE_DESTTEXRWWATERMARK_S4   0x38
+#define SAVAGE_TEXBLENDCOLOR_S4                0x39
+/* Savage3D/MX/IX 3D registers */
+#define SAVAGE_TEXPALADDR_S3D          0x18
+#define SAVAGE_TEXXPRCLR_S3D           0x19 /* never used */
+#define SAVAGE_TEXADDR_S3D             0x1A
+#define SAVAGE_TEXDESCR_S3D            0x1B
+#define SAVAGE_TEXCTRL_S3D             0x1C
+#define SAVAGE_FOGTABLE_S3D            0x20
+#define SAVAGE_FOGCTRL_S3D             0x30
+#define SAVAGE_DRAWCTRL_S3D            0x31
+#define SAVAGE_ZBUFCTRL_S3D            0x32
+#define SAVAGE_ZBUFOFF_S3D             0x33
+#define SAVAGE_DESTCTRL_S3D            0x34
+#define SAVAGE_SCSTART_S3D             0x35
+#define SAVAGE_SCEND_S3D               0x36
+#define SAVAGE_ZWATERMARK_S3D          0x37 
+#define SAVAGE_DESTTEXRWWATERMARK_S3D  0x38
+/* common stuff */
+#define SAVAGE_VERTBUFADDR             0x3e
+#define SAVAGE_BITPLANEWTMASK          0xd7
+#define SAVAGE_DMABUFADDR              0x51
+
+/* texture enable bits (needed for tex addr checking) */
+#define SAVAGE_TEXCTRL_TEXEN_MASK      0x00010000 /* S3D */
+#define SAVAGE_TEXDESCR_TEX0EN_MASK    0x02000000 /* S4 */
+#define SAVAGE_TEXDESCR_TEX1EN_MASK    0x04000000 /* S4 */
+
+/* Global fields in Savage4/Twister/ProSavage 3D registers:
+ *
+ * All texture registers and DrawLocalCtrl are local. All other
+ * registers are global. */
+
+/* Global fields in Savage3D/MX/IX 3D registers:
+ *
+ * All texture registers are local. DrawCtrl and ZBufCtrl are
+ * partially local. All other registers are global.
+ *
+ * DrawCtrl global fields: cullMode, alphaTestCmpFunc, alphaTestEn, alphaRefVal
+ * ZBufCtrl global fields: zCmpFunc, zBufEn
+ */
+#define SAVAGE_DRAWCTRL_S3D_GLOBAL     0x03f3c00c
+#define SAVAGE_ZBUFCTRL_S3D_GLOBAL     0x00000027
+
+/* Masks for scissor bits (drawCtrl[01] on s4, scissorStart/End on s3d)
+ */
+#define SAVAGE_SCISSOR_MASK_S4         0x00fff7ff
+#define SAVAGE_SCISSOR_MASK_S3D                0x07ff07ff
+
+/*
+ * BCI commands
+ */
+#define BCI_CMD_NOP                  0x40000000
+#define BCI_CMD_RECT                 0x48000000
+#define BCI_CMD_RECT_XP              0x01000000
+#define BCI_CMD_RECT_YP              0x02000000
+#define BCI_CMD_SCANLINE             0x50000000
+#define BCI_CMD_LINE                 0x5C000000
+#define BCI_CMD_LINE_LAST_PIXEL      0x58000000
+#define BCI_CMD_BYTE_TEXT            0x63000000
+#define BCI_CMD_NT_BYTE_TEXT         0x67000000
+#define BCI_CMD_BIT_TEXT             0x6C000000
+#define BCI_CMD_GET_ROP(cmd)         (((cmd) >> 16) & 0xFF)
+#define BCI_CMD_SET_ROP(cmd, rop)    ((cmd) |= ((rop & 0xFF) << 16))
+#define BCI_CMD_SEND_COLOR           0x00008000
+
+#define BCI_CMD_CLIP_NONE            0x00000000
+#define BCI_CMD_CLIP_CURRENT         0x00002000
+#define BCI_CMD_CLIP_LR              0x00004000
+#define BCI_CMD_CLIP_NEW             0x00006000
+
+#define BCI_CMD_DEST_GBD             0x00000000
+#define BCI_CMD_DEST_PBD             0x00000800
+#define BCI_CMD_DEST_PBD_NEW         0x00000C00
+#define BCI_CMD_DEST_SBD             0x00001000
+#define BCI_CMD_DEST_SBD_NEW         0x00001400
+
+#define BCI_CMD_SRC_TRANSPARENT      0x00000200
+#define BCI_CMD_SRC_SOLID            0x00000000
+#define BCI_CMD_SRC_GBD              0x00000020
+#define BCI_CMD_SRC_COLOR            0x00000040
+#define BCI_CMD_SRC_MONO             0x00000060
+#define BCI_CMD_SRC_PBD_COLOR        0x00000080
+#define BCI_CMD_SRC_PBD_MONO         0x000000A0
+#define BCI_CMD_SRC_PBD_COLOR_NEW    0x000000C0
+#define BCI_CMD_SRC_PBD_MONO_NEW     0x000000E0
+#define BCI_CMD_SRC_SBD_COLOR        0x00000100
+#define BCI_CMD_SRC_SBD_MONO         0x00000120
+#define BCI_CMD_SRC_SBD_COLOR_NEW    0x00000140
+#define BCI_CMD_SRC_SBD_MONO_NEW     0x00000160
+
+#define BCI_CMD_PAT_TRANSPARENT      0x00000010
+#define BCI_CMD_PAT_NONE             0x00000000
+#define BCI_CMD_PAT_COLOR            0x00000002
+#define BCI_CMD_PAT_MONO             0x00000003
+#define BCI_CMD_PAT_PBD_COLOR        0x00000004
+#define BCI_CMD_PAT_PBD_MONO         0x00000005
+#define BCI_CMD_PAT_PBD_COLOR_NEW    0x00000006
+#define BCI_CMD_PAT_PBD_MONO_NEW     0x00000007
+#define BCI_CMD_PAT_SBD_COLOR        0x00000008
+#define BCI_CMD_PAT_SBD_MONO         0x00000009
+#define BCI_CMD_PAT_SBD_COLOR_NEW    0x0000000A
+#define BCI_CMD_PAT_SBD_MONO_NEW     0x0000000B
+
+#define BCI_BD_BW_DISABLE            0x10000000
+#define BCI_BD_TILE_MASK             0x03000000
+#define BCI_BD_TILE_NONE             0x00000000
+#define BCI_BD_TILE_16               0x02000000
+#define BCI_BD_TILE_32               0x03000000
+#define BCI_BD_GET_BPP(bd)           (((bd) >> 16) & 0xFF)
+#define BCI_BD_SET_BPP(bd, bpp)      ((bd) |= (((bpp) & 0xFF) << 16))
+#define BCI_BD_GET_STRIDE(bd)        ((bd) & 0xFFFF)
+#define BCI_BD_SET_STRIDE(bd, st)    ((bd) |= ((st) & 0xFFFF))
+
+#define BCI_CMD_SET_REGISTER            0x96000000
+
+#define BCI_CMD_WAIT                    0xC0000000
+#define BCI_CMD_WAIT_3D                 0x00010000
+#define BCI_CMD_WAIT_2D                 0x00020000
+
+#define BCI_CMD_UPDATE_EVENT_TAG        0x98000000
+
+#define BCI_CMD_DRAW_PRIM               0x80000000
+#define BCI_CMD_DRAW_INDEXED_PRIM       0x88000000
+#define BCI_CMD_DRAW_CONT               0x01000000
+#define BCI_CMD_DRAW_TRILIST            0x00000000
+#define BCI_CMD_DRAW_TRISTRIP           0x02000000
+#define BCI_CMD_DRAW_TRIFAN             0x04000000
+#define BCI_CMD_DRAW_SKIPFLAGS          0x000000ff
+#define BCI_CMD_DRAW_NO_Z              0x00000001
+#define BCI_CMD_DRAW_NO_W              0x00000002
+#define BCI_CMD_DRAW_NO_CD             0x00000004
+#define BCI_CMD_DRAW_NO_CS             0x00000008
+#define BCI_CMD_DRAW_NO_U0             0x00000010
+#define BCI_CMD_DRAW_NO_V0             0x00000020
+#define BCI_CMD_DRAW_NO_UV0            0x00000030
+#define BCI_CMD_DRAW_NO_U1             0x00000040
+#define BCI_CMD_DRAW_NO_V1             0x00000080
+#define BCI_CMD_DRAW_NO_UV1            0x000000c0
+
+#define BCI_CMD_DMA                    0xa8000000
+
+#define BCI_W_H(w, h)                ((((h) << 16) | (w)) & 0x0FFF0FFF)
+#define BCI_X_Y(x, y)                ((((y) << 16) | (x)) & 0x0FFF0FFF)
+#define BCI_X_W(x, y)                ((((w) << 16) | (x)) & 0x0FFF0FFF)
+#define BCI_CLIP_LR(l, r)            ((((r) << 16) | (l)) & 0x0FFF0FFF)
+#define BCI_CLIP_TL(t, l)            ((((t) << 16) | (l)) & 0x0FFF0FFF)
+#define BCI_CLIP_BR(b, r)            ((((b) << 16) | (r)) & 0x0FFF0FFF)
+
+#define BCI_LINE_X_Y(x, y)           (((y) << 16) | ((x) & 0xFFFF))
+#define BCI_LINE_STEPS(diag, axi)    (((axi) << 16) | ((diag) & 0xFFFF))
+#define BCI_LINE_MISC(maj, ym, xp, yp, err) \
+       (((maj) & 0x1FFF) | \
+       ((ym) ? 1<<13 : 0) | \
+       ((xp) ? 1<<14 : 0) | \
+       ((yp) ? 1<<15 : 0) | \
+       ((err) << 16))
+
+/*
+ * common commands
+ */
+#define BCI_SET_REGISTERS( first, n )                  \
+       BCI_WRITE(BCI_CMD_SET_REGISTER |                \
+                 ((uint32_t)(n) & 0xff) << 16 |        \
+                 ((uint32_t)(first) & 0xffff))
+#define DMA_SET_REGISTERS( first, n )                  \
+       DMA_WRITE(BCI_CMD_SET_REGISTER |                \
+                 ((uint32_t)(n) & 0xff) << 16 |        \
+                 ((uint32_t)(first) & 0xffff))
+
+#define BCI_DRAW_PRIMITIVE(n, type, skip)         \
+        BCI_WRITE(BCI_CMD_DRAW_PRIM | (type) | (skip) | \
+                 ((n) << 16))
+#define DMA_DRAW_PRIMITIVE(n, type, skip)         \
+        DMA_WRITE(BCI_CMD_DRAW_PRIM | (type) | (skip) | \
+                 ((n) << 16))
+
+#define BCI_DRAW_INDICES_S3D(n, type, i0)         \
+        BCI_WRITE(BCI_CMD_DRAW_INDEXED_PRIM | (type) |  \
+                 ((n) << 16) | (i0))
+
+#define BCI_DRAW_INDICES_S4(n, type, skip)        \
+        BCI_WRITE(BCI_CMD_DRAW_INDEXED_PRIM | (type) |  \
+                  (skip) | ((n) << 16))
+
+#define BCI_DMA(n)     \
+       BCI_WRITE(BCI_CMD_DMA | (((n) >> 1) - 1))
+
+/*
+ * access to MMIO
+ */
+#define SAVAGE_READ(reg)       DRM_READ32(  dev_priv->mmio, (reg) )
+#define SAVAGE_WRITE(reg)      DRM_WRITE32( dev_priv->mmio, (reg) )
+
+/*
+ * access to the burst command interface (BCI)
+ */
+#define SAVAGE_BCI_DEBUG 1
+
+#define BCI_LOCALS    volatile uint32_t *bci_ptr;
+
+#define BEGIN_BCI( n ) do {                    \
+       dev_priv->wait_fifo(dev_priv, (n));     \
+       bci_ptr = dev_priv->bci_ptr;            \
+} while(0)
+
+#define BCI_WRITE( val ) *bci_ptr++ = (uint32_t)(val)
+
+#define BCI_COPY_FROM_USER(src,n) do {                         \
+    unsigned int i;                                            \
+    for (i = 0; i < n; ++i) {                                  \
+       uint32_t val;                                           \
+       DRM_GET_USER_UNCHECKED(val, &((uint32_t*)(src))[i]);    \
+       BCI_WRITE(val);                                         \
+    }                                                          \
+} while(0)
+
+/*
+ * command DMA support
+ */
+#define SAVAGE_DMA_DEBUG 1
+
+#define DMA_LOCALS   uint32_t *dma_ptr;
+
+#define BEGIN_DMA( n ) do {                                            \
+       unsigned int cur = dev_priv->current_dma_page;                  \
+       unsigned int rest = SAVAGE_DMA_PAGE_SIZE -                      \
+               dev_priv->dma_pages[cur].used;                          \
+       if ((n) > rest) {                                               \
+               dma_ptr = savage_dma_alloc(dev_priv, (n));              \
+       } else { /* fast path for small allocations */                  \
+               dma_ptr = (uint32_t *)dev_priv->cmd_dma->handle +       \
+                       cur * SAVAGE_DMA_PAGE_SIZE +                    \
+                       dev_priv->dma_pages[cur].used;                  \
+               if (dev_priv->dma_pages[cur].used == 0)                 \
+                       savage_dma_wait(dev_priv, cur);                 \
+               dev_priv->dma_pages[cur].used += (n);                   \
+       }                                                               \
+} while(0)
+
+#define DMA_WRITE( val ) *dma_ptr++ = (uint32_t)(val)
+
+#define DMA_COPY_FROM_USER(src,n) do {                         \
+       DRM_COPY_FROM_USER_UNCHECKED(dma_ptr, (src), (n)*4);    \
+       dma_ptr += n;                                           \
+} while(0)
+
+#if SAVAGE_DMA_DEBUG
+#define DMA_COMMIT() do {                                              \
+       unsigned int cur = dev_priv->current_dma_page;                  \
+       uint32_t *expected = (uint32_t *)dev_priv->cmd_dma->handle +    \
+                       cur * SAVAGE_DMA_PAGE_SIZE +                    \
+                       dev_priv->dma_pages[cur].used;                  \
+       if (dma_ptr != expected) {                                      \
+               DRM_ERROR("DMA allocation and use don't match: "        \
+                         "%p != %p\n", expected, dma_ptr);             \
+               savage_dma_reset(dev_priv);                             \
+       }                                                               \
+} while(0)
+#else
+#define DMA_COMMIT() do {/* nothing */} while(0)
+#endif
+
+#define DMA_FLUSH() dev_priv->dma_flush(dev_priv)
+
+/* Buffer aging via event tag
+ */
+
+#define UPDATE_EVENT_COUNTER( ) do {                   \
+       if (dev_priv->status_ptr) {                     \
+               uint16_t count;                         \
+               /* coordinate with Xserver */           \
+               count = dev_priv->status_ptr[1023];     \
+               if (count < dev_priv->event_counter)    \
+                       dev_priv->event_wrap++;         \
+               dev_priv->event_counter = count;        \
+       }                                               \
+} while(0)
+
+#define SET_AGE( age, e, w ) do {      \
+       (age)->event = e;               \
+       (age)->wrap = w;                \
+} while(0)
+
+#define TEST_AGE( age, e, w )                          \
+       ( (age)->wrap < (w) || ( (age)->wrap == (w) && (age)->event <= (e) ) )
+
+#endif /* __SAVAGE_DRV_H__ */
diff --git a/drivers/char/drm/savage_state.c b/drivers/char/drm/savage_state.c
new file mode 100644 (file)
index 0000000..475695a
--- /dev/null
@@ -0,0 +1,1146 @@
+/* savage_state.c -- State and drawing support for Savage
+ *
+ * Copyright 2004  Felix Kuehling
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL FELIX KUEHLING BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "drmP.h"
+#include "savage_drm.h"
+#include "savage_drv.h"
+
+void savage_emit_clip_rect_s3d(drm_savage_private_t *dev_priv,
+                              drm_clip_rect_t *pbox)
+{
+       uint32_t scstart = dev_priv->state.s3d.new_scstart;
+       uint32_t scend   = dev_priv->state.s3d.new_scend;
+       scstart = (scstart & ~SAVAGE_SCISSOR_MASK_S3D) |
+               ((uint32_t)pbox->x1 & 0x000007ff) | 
+               (((uint32_t)pbox->y1 << 16) & 0x07ff0000);
+       scend   = (scend   & ~SAVAGE_SCISSOR_MASK_S3D) |
+               (((uint32_t)pbox->x2-1) & 0x000007ff) |
+               ((((uint32_t)pbox->y2-1) << 16) & 0x07ff0000);
+       if (scstart != dev_priv->state.s3d.scstart ||
+           scend   != dev_priv->state.s3d.scend) {
+               DMA_LOCALS;
+               BEGIN_DMA(4);
+               DMA_WRITE(BCI_CMD_WAIT|BCI_CMD_WAIT_3D);
+               DMA_SET_REGISTERS(SAVAGE_SCSTART_S3D, 2);
+               DMA_WRITE(scstart);
+               DMA_WRITE(scend);
+               dev_priv->state.s3d.scstart = scstart;
+               dev_priv->state.s3d.scend   = scend;
+               dev_priv->waiting = 1;
+               DMA_COMMIT();
+       }
+}
+
+void savage_emit_clip_rect_s4(drm_savage_private_t *dev_priv,
+                             drm_clip_rect_t *pbox)
+{
+       uint32_t drawctrl0 = dev_priv->state.s4.new_drawctrl0;
+       uint32_t drawctrl1 = dev_priv->state.s4.new_drawctrl1;
+       drawctrl0 = (drawctrl0 & ~SAVAGE_SCISSOR_MASK_S4) |
+               ((uint32_t)pbox->x1 & 0x000007ff) |
+               (((uint32_t)pbox->y1 << 12) & 0x00fff000);
+       drawctrl1 = (drawctrl1 & ~SAVAGE_SCISSOR_MASK_S4) |
+               (((uint32_t)pbox->x2-1) & 0x000007ff) |
+               ((((uint32_t)pbox->y2-1) << 12) & 0x00fff000);
+       if (drawctrl0 != dev_priv->state.s4.drawctrl0 ||
+           drawctrl1 != dev_priv->state.s4.drawctrl1) {
+               DMA_LOCALS;
+               BEGIN_DMA(4);
+               DMA_WRITE(BCI_CMD_WAIT|BCI_CMD_WAIT_3D);
+               DMA_SET_REGISTERS(SAVAGE_DRAWCTRL0_S4, 2);
+               DMA_WRITE(drawctrl0);
+               DMA_WRITE(drawctrl1);
+               dev_priv->state.s4.drawctrl0 = drawctrl0;
+               dev_priv->state.s4.drawctrl1 = drawctrl1;
+               dev_priv->waiting = 1;
+               DMA_COMMIT();
+       }
+}
+
+static int savage_verify_texaddr(drm_savage_private_t *dev_priv, int unit,
+                                uint32_t addr)
+{
+       if ((addr & 6) != 2) { /* reserved bits */
+               DRM_ERROR("bad texAddr%d %08x (reserved bits)\n", unit, addr);
+               return DRM_ERR(EINVAL);
+       }
+       if (!(addr & 1)) { /* local */
+               addr &= ~7;
+               if (addr <  dev_priv->texture_offset ||
+                   addr >= dev_priv->texture_offset+dev_priv->texture_size) {
+                       DRM_ERROR("bad texAddr%d %08x (local addr out of range)\n",
+                                 unit, addr);
+                       return DRM_ERR(EINVAL);
+               }
+       } else { /* AGP */
+               if (!dev_priv->agp_textures) {
+                       DRM_ERROR("bad texAddr%d %08x (AGP not available)\n",
+                                 unit, addr);
+                       return DRM_ERR(EINVAL);
+               }
+               addr &= ~7;
+               if (addr < dev_priv->agp_textures->offset ||
+                   addr >= (dev_priv->agp_textures->offset +
+                            dev_priv->agp_textures->size)) {
+                       DRM_ERROR("bad texAddr%d %08x (AGP addr out of range)\n",
+                                 unit, addr);
+                       return DRM_ERR(EINVAL);
+               }
+       }
+       return 0;
+}
+
+#define SAVE_STATE(reg,where)                  \
+       if(start <= reg && start+count > reg)   \
+               DRM_GET_USER_UNCHECKED(dev_priv->state.where, &regs[reg-start])
+#define SAVE_STATE_MASK(reg,where,mask) do {                   \
+       if(start <= reg && start+count > reg) {                 \
+               uint32_t tmp;                                   \
+               DRM_GET_USER_UNCHECKED(tmp, &regs[reg-start]);  \
+               dev_priv->state.where = (tmp & (mask)) |        \
+                       (dev_priv->state.where & ~(mask));      \
+       }                                                       \
+} while (0)
+static int savage_verify_state_s3d(drm_savage_private_t *dev_priv,
+                                  unsigned int start, unsigned int count,
+                                  const uint32_t __user *regs)
+{
+       if (start < SAVAGE_TEXPALADDR_S3D ||
+           start+count-1 > SAVAGE_DESTTEXRWWATERMARK_S3D) {
+               DRM_ERROR("invalid register range (0x%04x-0x%04x)\n",
+                         start, start+count-1);
+               return DRM_ERR(EINVAL);
+       }
+
+       SAVE_STATE_MASK(SAVAGE_SCSTART_S3D, s3d.new_scstart,
+                       ~SAVAGE_SCISSOR_MASK_S3D);
+       SAVE_STATE_MASK(SAVAGE_SCEND_S3D, s3d.new_scend,
+                       ~SAVAGE_SCISSOR_MASK_S3D);
+
+       /* if any texture regs were changed ... */
+       if (start <= SAVAGE_TEXCTRL_S3D &&
+           start+count > SAVAGE_TEXPALADDR_S3D) {
+               /* ... check texture state */
+               SAVE_STATE(SAVAGE_TEXCTRL_S3D, s3d.texctrl);
+               SAVE_STATE(SAVAGE_TEXADDR_S3D, s3d.texaddr);
+               if (dev_priv->state.s3d.texctrl & SAVAGE_TEXCTRL_TEXEN_MASK)
+                       return savage_verify_texaddr(
+                               dev_priv, 0, dev_priv->state.s3d.texaddr);
+       }
+
+       return 0;
+}
+
+static int savage_verify_state_s4(drm_savage_private_t *dev_priv,
+                                 unsigned int start, unsigned int count,
+                                 const uint32_t __user *regs)
+{
+       int ret = 0;
+
+       if (start < SAVAGE_DRAWLOCALCTRL_S4 ||
+           start+count-1 > SAVAGE_TEXBLENDCOLOR_S4) {
+               DRM_ERROR("invalid register range (0x%04x-0x%04x)\n",
+                         start, start+count-1);
+               return DRM_ERR(EINVAL);
+       }
+
+       SAVE_STATE_MASK(SAVAGE_DRAWCTRL0_S4, s4.new_drawctrl0,
+                       ~SAVAGE_SCISSOR_MASK_S4);
+       SAVE_STATE_MASK(SAVAGE_DRAWCTRL1_S4, s4.new_drawctrl1,
+                       ~SAVAGE_SCISSOR_MASK_S4);
+
+       /* if any texture regs were changed ... */
+       if (start <= SAVAGE_TEXDESCR_S4 &&
+           start+count > SAVAGE_TEXPALADDR_S4) {
+               /* ... check texture state */
+               SAVE_STATE(SAVAGE_TEXDESCR_S4, s4.texdescr);
+               SAVE_STATE(SAVAGE_TEXADDR0_S4, s4.texaddr0);
+               SAVE_STATE(SAVAGE_TEXADDR1_S4, s4.texaddr1);
+               if (dev_priv->state.s4.texdescr & SAVAGE_TEXDESCR_TEX0EN_MASK)
+                       ret |= savage_verify_texaddr(
+                               dev_priv, 0, dev_priv->state.s4.texaddr0);
+               if (dev_priv->state.s4.texdescr & SAVAGE_TEXDESCR_TEX1EN_MASK)
+                       ret |= savage_verify_texaddr(
+                               dev_priv, 1, dev_priv->state.s4.texaddr1);
+       }
+
+       return ret;
+}
+#undef SAVE_STATE
+#undef SAVE_STATE_MASK
+
+static int savage_dispatch_state(drm_savage_private_t *dev_priv,
+                                const drm_savage_cmd_header_t *cmd_header,
+                                const uint32_t __user *regs)
+{
+       unsigned int count = cmd_header->state.count;
+       unsigned int start = cmd_header->state.start;
+       unsigned int count2 = 0;
+       unsigned int bci_size;
+       int ret;
+       DMA_LOCALS;
+
+       if (!count)
+               return 0;
+
+       if (DRM_VERIFYAREA_READ(regs, count*4))
+               return DRM_ERR(EFAULT);
+
+       if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
+               ret = savage_verify_state_s3d(dev_priv, start, count, regs);
+               if (ret != 0)
+                       return ret;
+               /* scissor regs are emitted in savage_dispatch_draw */
+               if (start < SAVAGE_SCSTART_S3D) {
+                       if (start+count > SAVAGE_SCEND_S3D+1)
+                               count2 = count - (SAVAGE_SCEND_S3D+1 - start);
+                       if (start+count > SAVAGE_SCSTART_S3D)
+                               count = SAVAGE_SCSTART_S3D - start;
+               } else if (start <= SAVAGE_SCEND_S3D) {
+                       if (start+count > SAVAGE_SCEND_S3D+1) {
+                               count -= SAVAGE_SCEND_S3D+1 - start;
+                               start = SAVAGE_SCEND_S3D+1;
+                       } else
+                               return 0;
+               }
+       } else {
+               ret = savage_verify_state_s4(dev_priv, start, count, regs);
+               if (ret != 0)
+                       return ret;
+               /* scissor regs are emitted in savage_dispatch_draw */
+               if (start < SAVAGE_DRAWCTRL0_S4) {
+                       if (start+count > SAVAGE_DRAWCTRL1_S4+1)
+                               count2 = count - (SAVAGE_DRAWCTRL1_S4+1 - start);
+                       if (start+count > SAVAGE_DRAWCTRL0_S4)
+                               count = SAVAGE_DRAWCTRL0_S4 - start;
+               } else if (start <= SAVAGE_DRAWCTRL1_S4) {
+                       if (start+count > SAVAGE_DRAWCTRL1_S4+1) {
+                               count -= SAVAGE_DRAWCTRL1_S4+1 - start;
+                               start = SAVAGE_DRAWCTRL1_S4+1;
+                       } else
+                               return 0;
+               }
+       }
+
+       bci_size = count + (count+254)/255 + count2 + (count2+254)/255;
+
+       if (cmd_header->state.global) {
+               BEGIN_DMA(bci_size+1);
+               DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D);
+               dev_priv->waiting = 1;
+       } else {
+               BEGIN_DMA(bci_size);
+       }
+
+       do {
+               while (count > 0) {
+                       unsigned int n = count < 255 ? count : 255;
+                       DMA_SET_REGISTERS(start, n);
+                       DMA_COPY_FROM_USER(regs, n);
+                       count -= n;
+                       start += n;
+                       regs += n;
+               }
+               start += 2;
+               regs += 2;
+               count = count2;
+               count2 = 0;
+       } while (count);
+
+       DMA_COMMIT();
+
+       return 0;
+}
+
+static int savage_dispatch_dma_prim(drm_savage_private_t *dev_priv,
+                                   const drm_savage_cmd_header_t *cmd_header,
+                                   const drm_buf_t *dmabuf)
+{
+       unsigned char reorder = 0;
+       unsigned int prim = cmd_header->prim.prim;
+       unsigned int skip = cmd_header->prim.skip;
+       unsigned int n = cmd_header->prim.count;
+       unsigned int start = cmd_header->prim.start;
+       unsigned int i;
+       BCI_LOCALS;
+
+       if (!dmabuf) {
+           DRM_ERROR("called without dma buffers!\n");
+           return DRM_ERR(EINVAL);
+       }
+
+       if (!n)
+               return 0;
+
+       switch (prim) {
+       case SAVAGE_PRIM_TRILIST_201:
+               reorder = 1;
+               prim = SAVAGE_PRIM_TRILIST;
+       case SAVAGE_PRIM_TRILIST:
+               if (n % 3 != 0) {
+                       DRM_ERROR("wrong number of vertices %u in TRILIST\n",
+                                 n);
+                       return DRM_ERR(EINVAL);
+               }
+               break;
+       case SAVAGE_PRIM_TRISTRIP:
+       case SAVAGE_PRIM_TRIFAN:
+               if (n < 3) {
+                       DRM_ERROR("wrong number of vertices %u in TRIFAN/STRIP\n",
+                                 n);
+                       return DRM_ERR(EINVAL);
+               }
+               break;
+       default:
+               DRM_ERROR("invalid primitive type %u\n", prim);
+               return DRM_ERR(EINVAL);
+       }
+
+       if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
+               if (skip != 0) {
+                       DRM_ERROR("invalid skip flags 0x%04x for DMA\n",
+                                 skip);
+                       return DRM_ERR(EINVAL);
+               }
+       } else {
+               unsigned int size = 10 - (skip & 1) - (skip >> 1 & 1) -
+                       (skip >> 2 & 1) - (skip >> 3 & 1) - (skip >> 4 & 1) -
+                       (skip >> 5 & 1) - (skip >> 6 & 1) - (skip >> 7 & 1);
+               if (skip > SAVAGE_SKIP_ALL_S4 || size != 8) {
+                       DRM_ERROR("invalid skip flags 0x%04x for DMA\n",
+                                 skip);
+                       return DRM_ERR(EINVAL);
+               }
+               if (reorder) {
+                       DRM_ERROR("TRILIST_201 used on Savage4 hardware\n");
+                       return DRM_ERR(EINVAL);
+               }
+       }
+
+       if (start + n > dmabuf->total/32) {
+               DRM_ERROR("vertex indices (%u-%u) out of range (0-%u)\n",
+                         start, start + n - 1, dmabuf->total/32);
+               return DRM_ERR(EINVAL);
+       }
+
+       /* Vertex DMA doesn't work with command DMA at the same time,
+        * so we use BCI_... to submit commands here. Flush buffered
+        * faked DMA first. */
+       DMA_FLUSH();
+
+       if (dmabuf->bus_address != dev_priv->state.common.vbaddr) {
+               BEGIN_BCI(2);
+               BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1);
+               BCI_WRITE(dmabuf->bus_address | dev_priv->dma_type);
+               dev_priv->state.common.vbaddr = dmabuf->bus_address;
+       }
+       if (S3_SAVAGE3D_SERIES(dev_priv->chipset) && dev_priv->waiting) {
+               /* Workaround for what looks like a hardware bug. If a
+                * WAIT_3D_IDLE was emitted some time before the
+                * indexed drawing command then the engine will lock
+                * up. There are two known workarounds:
+                * WAIT_IDLE_EMPTY or emit at least 63 NOPs. */
+               BEGIN_BCI(63);
+               for (i = 0; i < 63; ++i)
+                       BCI_WRITE(BCI_CMD_WAIT);
+               dev_priv->waiting = 0;
+       }
+
+       prim <<= 25;
+       while (n != 0) {
+               /* Can emit up to 255 indices (85 triangles) at once. */
+               unsigned int count = n > 255 ? 255 : n;
+               if (reorder) {
+                       /* Need to reorder indices for correct flat
+                        * shading while preserving the clock sense
+                        * for correct culling. Only on Savage3D. */
+                       int reorder[3] = {-1, -1, -1};
+                       reorder[start%3] = 2;
+
+                       BEGIN_BCI((count+1+1)/2);
+                       BCI_DRAW_INDICES_S3D(count, prim, start+2);
+
+                       for (i = start+1; i+1 < start+count; i += 2)
+                               BCI_WRITE((i + reorder[i % 3]) |
+                                         ((i+1 + reorder[(i+1) % 3]) << 16));
+                       if (i < start+count)
+                               BCI_WRITE(i + reorder[i%3]);
+               } else if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
+                       BEGIN_BCI((count+1+1)/2);
+                       BCI_DRAW_INDICES_S3D(count, prim, start);
+
+                       for (i = start+1; i+1 < start+count; i += 2)
+                               BCI_WRITE(i | ((i+1) << 16));
+                       if (i < start+count)
+                               BCI_WRITE(i);
+               } else {
+                       BEGIN_BCI((count+2+1)/2);
+                       BCI_DRAW_INDICES_S4(count, prim, skip);
+
+                       for (i = start; i+1 < start+count; i += 2)
+                               BCI_WRITE(i | ((i+1) << 16));
+                       if (i < start+count)
+                               BCI_WRITE(i);
+               }
+
+               start += count;
+               n -= count;
+
+               prim |= BCI_CMD_DRAW_CONT;
+       }
+
+       return 0;
+}
+
+static int savage_dispatch_vb_prim(drm_savage_private_t *dev_priv,
+                                  const drm_savage_cmd_header_t *cmd_header,
+                                  const uint32_t __user *vtxbuf,
+                                  unsigned int vb_size,
+                                  unsigned int vb_stride)
+{
+       unsigned char reorder = 0;
+       unsigned int prim = cmd_header->prim.prim;
+       unsigned int skip = cmd_header->prim.skip;
+       unsigned int n = cmd_header->prim.count;
+       unsigned int start = cmd_header->prim.start;
+       unsigned int vtx_size;
+       unsigned int i;
+       DMA_LOCALS;
+
+       if (!n)
+               return 0;
+
+       switch (prim) {
+       case SAVAGE_PRIM_TRILIST_201:
+               reorder = 1;
+               prim = SAVAGE_PRIM_TRILIST;
+       case SAVAGE_PRIM_TRILIST:
+               if (n % 3 != 0) {
+                       DRM_ERROR("wrong number of vertices %u in TRILIST\n",
+                                 n);
+                       return DRM_ERR(EINVAL);
+               }
+               break;
+       case SAVAGE_PRIM_TRISTRIP:
+       case SAVAGE_PRIM_TRIFAN:
+               if (n < 3) {
+                       DRM_ERROR("wrong number of vertices %u in TRIFAN/STRIP\n",
+                                 n);
+                       return DRM_ERR(EINVAL);
+               }
+               break;
+       default:
+               DRM_ERROR("invalid primitive type %u\n", prim);
+               return DRM_ERR(EINVAL);
+       }
+
+       if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
+               if (skip > SAVAGE_SKIP_ALL_S3D) {
+                       DRM_ERROR("invalid skip flags 0x%04x\n", skip);
+                       return DRM_ERR(EINVAL);
+               }
+               vtx_size = 8; /* full vertex */
+       } else {
+               if (skip > SAVAGE_SKIP_ALL_S4) {
+                       DRM_ERROR("invalid skip flags 0x%04x\n", skip);
+                       return DRM_ERR(EINVAL);
+               }
+               vtx_size = 10; /* full vertex */
+       }
+
+       vtx_size -= (skip & 1) + (skip >> 1 & 1) +
+               (skip >> 2 & 1) + (skip >> 3 & 1) + (skip >> 4 & 1) +
+               (skip >> 5 & 1) + (skip >> 6 & 1) + (skip >> 7 & 1);
+
+       if (vtx_size > vb_stride) {
+               DRM_ERROR("vertex size greater than vb stride (%u > %u)\n",
+                         vtx_size, vb_stride);
+               return DRM_ERR(EINVAL);
+       }
+
+       if (start + n > vb_size / (vb_stride*4)) {
+               DRM_ERROR("vertex indices (%u-%u) out of range (0-%u)\n",
+                         start, start + n - 1, vb_size / (vb_stride*4));
+               return DRM_ERR(EINVAL);
+       }
+
+       prim <<= 25;
+       while (n != 0) {
+               /* Can emit up to 255 vertices (85 triangles) at once. */
+               unsigned int count = n > 255 ? 255 : n;
+               if (reorder) {
+                       /* Need to reorder vertices for correct flat
+                        * shading while preserving the clock sense
+                        * for correct culling. Only on Savage3D. */
+                       int reorder[3] = {-1, -1, -1};
+                       reorder[start%3] = 2;
+
+                       BEGIN_DMA(count*vtx_size+1);
+                       DMA_DRAW_PRIMITIVE(count, prim, skip);
+
+                       for (i = start; i < start+count; ++i) {
+                               unsigned int j = i + reorder[i % 3];
+                               DMA_COPY_FROM_USER(&vtxbuf[vb_stride*j],
+                                                  vtx_size);
+                       }
+
+                       DMA_COMMIT();
+               } else {
+                       BEGIN_DMA(count*vtx_size+1);
+                       DMA_DRAW_PRIMITIVE(count, prim, skip);
+
+                       if (vb_stride == vtx_size) {
+                               DMA_COPY_FROM_USER(&vtxbuf[vb_stride*start],
+                                                  vtx_size*count);
+                       } else {
+                               for (i = start; i < start+count; ++i) {
+                                       DMA_COPY_FROM_USER(
+                                               &vtxbuf[vb_stride*i],
+                                               vtx_size);
+                               }
+                       }
+
+                       DMA_COMMIT();
+               }
+
+               start += count;
+               n -= count;
+
+               prim |= BCI_CMD_DRAW_CONT;
+       }
+
+       return 0;
+}
+
+static int savage_dispatch_dma_idx(drm_savage_private_t *dev_priv,
+                                  const drm_savage_cmd_header_t *cmd_header,
+                                  const uint16_t __user *usr_idx,
+                                  const drm_buf_t *dmabuf)
+{
+       unsigned char reorder = 0;
+       unsigned int prim = cmd_header->idx.prim;
+       unsigned int skip = cmd_header->idx.skip;
+       unsigned int n = cmd_header->idx.count;
+       unsigned int i;
+       BCI_LOCALS;
+
+       if (!dmabuf) {
+           DRM_ERROR("called without dma buffers!\n");
+           return DRM_ERR(EINVAL);
+       }
+
+       if (!n)
+               return 0;
+
+       switch (prim) {
+       case SAVAGE_PRIM_TRILIST_201:
+               reorder = 1;
+               prim = SAVAGE_PRIM_TRILIST;
+       case SAVAGE_PRIM_TRILIST:
+               if (n % 3 != 0) {
+                       DRM_ERROR("wrong number of indices %u in TRILIST\n",
+                                 n);
+                       return DRM_ERR(EINVAL);
+               }
+               break;
+       case SAVAGE_PRIM_TRISTRIP:
+       case SAVAGE_PRIM_TRIFAN:
+               if (n < 3) {
+                       DRM_ERROR("wrong number of indices %u in TRIFAN/STRIP\n",
+                                 n);
+                       return DRM_ERR(EINVAL);
+               }
+               break;
+       default:
+               DRM_ERROR("invalid primitive type %u\n", prim);
+               return DRM_ERR(EINVAL);
+       }
+
+       if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
+               if (skip != 0) {
+                       DRM_ERROR("invalid skip flags 0x%04x for DMA\n",
+                                 skip);
+                       return DRM_ERR(EINVAL);
+               }
+       } else {
+               unsigned int size = 10 - (skip & 1) - (skip >> 1 & 1) -
+                       (skip >> 2 & 1) - (skip >> 3 & 1) - (skip >> 4 & 1) -
+                       (skip >> 5 & 1) - (skip >> 6 & 1) - (skip >> 7 & 1);
+               if (skip > SAVAGE_SKIP_ALL_S4 || size != 8) {
+                       DRM_ERROR("invalid skip flags 0x%04x for DMA\n",
+                                 skip);
+                       return DRM_ERR(EINVAL);
+               }
+               if (reorder) {
+                       DRM_ERROR("TRILIST_201 used on Savage4 hardware\n");
+                       return DRM_ERR(EINVAL);
+               }
+       }
+
+       /* Vertex DMA doesn't work with command DMA at the same time,
+        * so we use BCI_... to submit commands here. Flush buffered
+        * faked DMA first. */
+       DMA_FLUSH();
+
+       if (dmabuf->bus_address != dev_priv->state.common.vbaddr) {
+               BEGIN_BCI(2);
+               BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1);
+               BCI_WRITE(dmabuf->bus_address | dev_priv->dma_type);
+               dev_priv->state.common.vbaddr = dmabuf->bus_address;
+       }
+       if (S3_SAVAGE3D_SERIES(dev_priv->chipset) && dev_priv->waiting) {
+               /* Workaround for what looks like a hardware bug. If a
+                * WAIT_3D_IDLE was emitted some time before the
+                * indexed drawing command then the engine will lock
+                * up. There are two known workarounds:
+                * WAIT_IDLE_EMPTY or emit at least 63 NOPs. */
+               BEGIN_BCI(63);
+               for (i = 0; i < 63; ++i)
+                       BCI_WRITE(BCI_CMD_WAIT);
+               dev_priv->waiting = 0;
+       }
+
+       prim <<= 25;
+       while (n != 0) {
+               /* Can emit up to 255 indices (85 triangles) at once. */
+               unsigned int count = n > 255 ? 255 : n;
+               /* Is it ok to allocate 510 bytes on the stack in an ioctl? */
+               uint16_t idx[255];
+
+               /* Copy and check indices */
+               DRM_COPY_FROM_USER_UNCHECKED(idx, usr_idx, count*2);
+               for (i = 0; i < count; ++i) {
+                       if (idx[i] > dmabuf->total/32) {
+                               DRM_ERROR("idx[%u]=%u out of range (0-%u)\n",
+                                         i, idx[i], dmabuf->total/32);
+                               return DRM_ERR(EINVAL);
+                       }
+               }
+
+               if (reorder) {
+                       /* Need to reorder indices for correct flat
+                        * shading while preserving the clock sense
+                        * for correct culling. Only on Savage3D. */
+                       int reorder[3] = {2, -1, -1};
+
+                       BEGIN_BCI((count+1+1)/2);
+                       BCI_DRAW_INDICES_S3D(count, prim, idx[2]);
+
+                       for (i = 1; i+1 < count; i += 2)
+                               BCI_WRITE(idx[i + reorder[i % 3]] |
+                                         (idx[i+1 + reorder[(i+1) % 3]] << 16));
+                       if (i < count)
+                               BCI_WRITE(idx[i + reorder[i%3]]);
+               } else if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
+                       BEGIN_BCI((count+1+1)/2);
+                       BCI_DRAW_INDICES_S3D(count, prim, idx[0]);
+
+                       for (i = 1; i+1 < count; i += 2)
+                               BCI_WRITE(idx[i] | (idx[i+1] << 16));
+                       if (i < count)
+                               BCI_WRITE(idx[i]);
+               } else {
+                       BEGIN_BCI((count+2+1)/2);
+                       BCI_DRAW_INDICES_S4(count, prim, skip);
+
+                       for (i = 0; i+1 < count; i += 2)
+                               BCI_WRITE(idx[i] | (idx[i+1] << 16));
+                       if (i < count)
+                               BCI_WRITE(idx[i]);
+               }
+
+               usr_idx += count;
+               n -= count;
+
+               prim |= BCI_CMD_DRAW_CONT;
+       }
+
+       return 0;
+}
+
+static int savage_dispatch_vb_idx(drm_savage_private_t *dev_priv,
+                                 const drm_savage_cmd_header_t *cmd_header,
+                                 const uint16_t __user *usr_idx,
+                                 const uint32_t __user *vtxbuf,
+                                 unsigned int vb_size,
+                                 unsigned int vb_stride)
+{
+       unsigned char reorder = 0;
+       unsigned int prim = cmd_header->idx.prim;
+       unsigned int skip = cmd_header->idx.skip;
+       unsigned int n = cmd_header->idx.count;
+       unsigned int vtx_size;
+       unsigned int i;
+       DMA_LOCALS;
+
+       if (!n)
+               return 0;
+
+       switch (prim) {
+       case SAVAGE_PRIM_TRILIST_201:
+               reorder = 1;
+               prim = SAVAGE_PRIM_TRILIST;
+       case SAVAGE_PRIM_TRILIST:
+               if (n % 3 != 0) {
+                       DRM_ERROR("wrong number of indices %u in TRILIST\n",
+                                 n);
+                       return DRM_ERR(EINVAL);
+               }
+               break;
+       case SAVAGE_PRIM_TRISTRIP:
+       case SAVAGE_PRIM_TRIFAN:
+               if (n < 3) {
+                       DRM_ERROR("wrong number of indices %u in TRIFAN/STRIP\n",
+                                 n);
+                       return DRM_ERR(EINVAL);
+               }
+               break;
+       default:
+               DRM_ERROR("invalid primitive type %u\n", prim);
+               return DRM_ERR(EINVAL);
+       }
+
+       if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
+               if (skip > SAVAGE_SKIP_ALL_S3D) {
+                       DRM_ERROR("invalid skip flags 0x%04x\n", skip);
+                       return DRM_ERR(EINVAL);
+               }
+               vtx_size = 8; /* full vertex */
+       } else {
+               if (skip > SAVAGE_SKIP_ALL_S4) {
+                       DRM_ERROR("invalid skip flags 0x%04x\n", skip);
+                       return DRM_ERR(EINVAL);
+               }
+               vtx_size = 10; /* full vertex */
+       }
+
+       vtx_size -= (skip & 1) + (skip >> 1 & 1) +
+               (skip >> 2 & 1) + (skip >> 3 & 1) + (skip >> 4 & 1) +
+               (skip >> 5 & 1) + (skip >> 6 & 1) + (skip >> 7 & 1);
+
+       if (vtx_size > vb_stride) {
+               DRM_ERROR("vertex size greater than vb stride (%u > %u)\n",
+                         vtx_size, vb_stride);
+               return DRM_ERR(EINVAL);
+       }
+
+       prim <<= 25;
+       while (n != 0) {
+               /* Can emit up to 255 vertices (85 triangles) at once. */
+               unsigned int count = n > 255 ? 255 : n;
+               /* Is it ok to allocate 510 bytes on the stack in an ioctl? */
+               uint16_t idx[255];
+
+               /* Copy and check indices */
+               DRM_COPY_FROM_USER_UNCHECKED(idx, usr_idx, count*2);
+               for (i = 0; i < count; ++i) {
+                       if (idx[i] > vb_size / (vb_stride*4)) {
+                               DRM_ERROR("idx[%u]=%u out of range (0-%u)\n",
+                                         i, idx[i],  vb_size / (vb_stride*4));
+                               return DRM_ERR(EINVAL);
+                       }
+               }
+
+               if (reorder) {
+                       /* Need to reorder vertices for correct flat
+                        * shading while preserving the clock sense
+                        * for correct culling. Only on Savage3D. */
+                       int reorder[3] = {2, -1, -1};
+
+                       BEGIN_DMA(count*vtx_size+1);
+                       DMA_DRAW_PRIMITIVE(count, prim, skip);
+
+                       for (i = 0; i < count; ++i) {
+                               unsigned int j = idx[i + reorder[i % 3]];
+                               DMA_COPY_FROM_USER(&vtxbuf[vb_stride*j],
+                                                  vtx_size);
+                       }
+
+                       DMA_COMMIT();
+               } else {
+                       BEGIN_DMA(count*vtx_size+1);
+                       DMA_DRAW_PRIMITIVE(count, prim, skip);
+
+                       for (i = 0; i < count; ++i) {
+                               unsigned int j = idx[i];
+                               DMA_COPY_FROM_USER(&vtxbuf[vb_stride*j],
+                                                  vtx_size);
+                       }
+
+                       DMA_COMMIT();
+               }
+
+               usr_idx += count;
+               n -= count;
+
+               prim |= BCI_CMD_DRAW_CONT;
+       }
+
+       return 0;
+}
+
+static int savage_dispatch_clear(drm_savage_private_t *dev_priv,
+                                const drm_savage_cmd_header_t *cmd_header,
+                                const drm_savage_cmd_header_t __user *data,
+                                unsigned int nbox,
+                                const drm_clip_rect_t __user *usr_boxes)
+{
+       unsigned int flags = cmd_header->clear0.flags, mask, value;
+       unsigned int clear_cmd;
+       unsigned int i, nbufs;
+       DMA_LOCALS;
+
+       if (nbox == 0)
+               return 0;
+
+       DRM_GET_USER_UNCHECKED(mask, &((const drm_savage_cmd_header_t*)data)
+                              ->clear1.mask);
+       DRM_GET_USER_UNCHECKED(value, &((const drm_savage_cmd_header_t*)data)
+                              ->clear1.value);
+
+       clear_cmd = BCI_CMD_RECT | BCI_CMD_RECT_XP | BCI_CMD_RECT_YP |
+               BCI_CMD_SEND_COLOR | BCI_CMD_DEST_PBD_NEW;
+       BCI_CMD_SET_ROP(clear_cmd,0xCC);
+
+       nbufs = ((flags & SAVAGE_FRONT) ? 1 : 0) +
+               ((flags & SAVAGE_BACK) ? 1 : 0) +
+               ((flags & SAVAGE_DEPTH) ? 1 : 0);
+       if (nbufs == 0)
+               return 0;
+
+       if (mask != 0xffffffff) {
+               /* set mask */
+               BEGIN_DMA(2);
+               DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1);
+               DMA_WRITE(mask);
+               DMA_COMMIT();
+       }
+       for (i = 0; i < nbox; ++i) {
+               drm_clip_rect_t box;
+               unsigned int x, y, w, h;
+               unsigned int buf;
+               DRM_COPY_FROM_USER_UNCHECKED(&box, &usr_boxes[i], sizeof(box));
+               x = box.x1, y = box.y1;
+               w = box.x2 - box.x1;
+               h = box.y2 - box.y1;
+               BEGIN_DMA(nbufs*6);
+               for (buf = SAVAGE_FRONT; buf <= SAVAGE_DEPTH; buf <<= 1) {
+                       if (!(flags & buf))
+                               continue;
+                       DMA_WRITE(clear_cmd);
+                       switch(buf) {
+                       case SAVAGE_FRONT:
+                               DMA_WRITE(dev_priv->front_offset);
+                               DMA_WRITE(dev_priv->front_bd);
+                               break;
+                       case SAVAGE_BACK:
+                               DMA_WRITE(dev_priv->back_offset);
+                               DMA_WRITE(dev_priv->back_bd);
+                               break;
+                       case SAVAGE_DEPTH:
+                               DMA_WRITE(dev_priv->depth_offset);
+                               DMA_WRITE(dev_priv->depth_bd);
+                               break;
+                       }
+                       DMA_WRITE(value);
+                       DMA_WRITE(BCI_X_Y(x, y));
+                       DMA_WRITE(BCI_W_H(w, h));
+               }
+               DMA_COMMIT();
+       }
+       if (mask != 0xffffffff) {
+               /* reset mask */
+               BEGIN_DMA(2);
+               DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1);
+               DMA_WRITE(0xffffffff);
+               DMA_COMMIT();
+       }
+
+       return 0;
+}
+
+static int savage_dispatch_swap(drm_savage_private_t *dev_priv,
+                               unsigned int nbox,
+                               const drm_clip_rect_t __user *usr_boxes)
+{
+       unsigned int swap_cmd;
+       unsigned int i;
+       DMA_LOCALS;
+
+       if (nbox == 0)
+               return 0;
+
+       swap_cmd = BCI_CMD_RECT | BCI_CMD_RECT_XP | BCI_CMD_RECT_YP |
+               BCI_CMD_SRC_PBD_COLOR_NEW | BCI_CMD_DEST_GBD;
+       BCI_CMD_SET_ROP(swap_cmd,0xCC);
+
+       for (i = 0; i < nbox; ++i) {
+               drm_clip_rect_t box;
+               DRM_COPY_FROM_USER_UNCHECKED(&box, &usr_boxes[i], sizeof(box));
+
+               BEGIN_DMA(6);
+               DMA_WRITE(swap_cmd);
+               DMA_WRITE(dev_priv->back_offset);
+               DMA_WRITE(dev_priv->back_bd);
+               DMA_WRITE(BCI_X_Y(box.x1, box.y1));
+               DMA_WRITE(BCI_X_Y(box.x1, box.y1));
+               DMA_WRITE(BCI_W_H(box.x2-box.x1, box.y2-box.y1));
+               DMA_COMMIT();
+       }
+
+       return 0;
+}
+
+static int savage_dispatch_draw(drm_savage_private_t *dev_priv,
+                               const drm_savage_cmd_header_t __user *start,
+                               const drm_savage_cmd_header_t __user *end,
+                               const drm_buf_t *dmabuf,
+                               const unsigned int __user *usr_vtxbuf,
+                               unsigned int vb_size, unsigned int vb_stride,
+                               unsigned int nbox,
+                               const drm_clip_rect_t __user *usr_boxes)
+{
+       unsigned int i, j;
+       int ret;
+
+       for (i = 0; i < nbox; ++i) {
+               drm_clip_rect_t box;
+               const drm_savage_cmd_header_t __user *usr_cmdbuf;
+               DRM_COPY_FROM_USER_UNCHECKED(&box, &usr_boxes[i], sizeof(box));
+               dev_priv->emit_clip_rect(dev_priv, &box);
+
+               usr_cmdbuf = start;
+               while (usr_cmdbuf < end) {
+                       drm_savage_cmd_header_t cmd_header;
+                       DRM_COPY_FROM_USER_UNCHECKED(&cmd_header, usr_cmdbuf,
+                                                    sizeof(cmd_header));
+                       usr_cmdbuf++;
+                       switch (cmd_header.cmd.cmd) {
+                       case SAVAGE_CMD_DMA_PRIM:
+                               ret = savage_dispatch_dma_prim(
+                                       dev_priv, &cmd_header, dmabuf);
+                               break;
+                       case SAVAGE_CMD_VB_PRIM:
+                               ret = savage_dispatch_vb_prim(
+                                       dev_priv, &cmd_header,
+                                       (const uint32_t __user *)usr_vtxbuf,
+                                       vb_size, vb_stride);
+                               break;
+                       case SAVAGE_CMD_DMA_IDX:
+                               j = (cmd_header.idx.count + 3) / 4;
+                               /* j was check in savage_bci_cmdbuf */
+                               ret = savage_dispatch_dma_idx(
+                                       dev_priv, &cmd_header,
+                                       (const uint16_t __user *)usr_cmdbuf,
+                                       dmabuf);
+                               usr_cmdbuf += j;
+                               break;
+                       case SAVAGE_CMD_VB_IDX:
+                               j = (cmd_header.idx.count + 3) / 4;
+                               /* j was check in savage_bci_cmdbuf */
+                               ret = savage_dispatch_vb_idx(
+                                       dev_priv, &cmd_header,
+                                       (const uint16_t __user *)usr_cmdbuf,
+                                       (const uint32_t __user *)usr_vtxbuf,
+                                       vb_size, vb_stride);
+                               usr_cmdbuf += j;
+                               break;
+                       default:
+                               /* What's the best return code? EFAULT? */
+                               DRM_ERROR("IMPLEMENTATION ERROR: "
+                                         "non-drawing-command %d\n",
+                                         cmd_header.cmd.cmd);
+                               return DRM_ERR(EINVAL);
+                       }
+
+                       if (ret != 0)
+                               return ret;
+               }
+       }
+
+       return 0;
+}
+
+int savage_bci_cmdbuf(DRM_IOCTL_ARGS)
+{
+       DRM_DEVICE;
+       drm_savage_private_t *dev_priv = dev->dev_private;
+       drm_device_dma_t *dma = dev->dma;
+       drm_buf_t *dmabuf;
+       drm_savage_cmdbuf_t cmdbuf;
+       drm_savage_cmd_header_t __user *usr_cmdbuf;
+       drm_savage_cmd_header_t __user *first_draw_cmd;
+       unsigned int __user *usr_vtxbuf;
+       drm_clip_rect_t __user *usr_boxes;
+       unsigned int i, j;
+       int ret = 0;
+
+       DRM_DEBUG("\n");
+       
+       LOCK_TEST_WITH_RETURN(dev, filp);
+
+       DRM_COPY_FROM_USER_IOCTL(cmdbuf, (drm_savage_cmdbuf_t __user *)data,
+                                sizeof(cmdbuf));
+
+       if (dma && dma->buflist) {
+               if (cmdbuf.dma_idx > dma->buf_count) {
+                       DRM_ERROR("vertex buffer index %u out of range (0-%u)\n",
+                                 cmdbuf.dma_idx, dma->buf_count-1);
+                       return DRM_ERR(EINVAL);
+               }
+               dmabuf = dma->buflist[cmdbuf.dma_idx];
+       } else {
+               dmabuf = NULL;
+       }
+
+       usr_cmdbuf = (drm_savage_cmd_header_t __user *)cmdbuf.cmd_addr;
+       usr_vtxbuf = (unsigned int __user *)cmdbuf.vb_addr;
+       usr_boxes = (drm_clip_rect_t __user *)cmdbuf.box_addr;
+       if ((cmdbuf.size && DRM_VERIFYAREA_READ(usr_cmdbuf, cmdbuf.size*8)) ||
+           (cmdbuf.vb_size && DRM_VERIFYAREA_READ(
+                   usr_vtxbuf, cmdbuf.vb_size)) ||
+           (cmdbuf.nbox && DRM_VERIFYAREA_READ(
+                   usr_boxes, cmdbuf.nbox*sizeof(drm_clip_rect_t))))
+               return DRM_ERR(EFAULT);
+
+       /* Make sure writes to DMA buffers are finished before sending
+        * DMA commands to the graphics hardware. */
+       DRM_MEMORYBARRIER();
+
+       /* Coming from user space. Don't know if the Xserver has
+        * emitted wait commands. Assuming the worst. */
+       dev_priv->waiting = 1;
+
+       i = 0;
+       first_draw_cmd = NULL;
+       while (i < cmdbuf.size) {
+               drm_savage_cmd_header_t cmd_header;
+               DRM_COPY_FROM_USER_UNCHECKED(&cmd_header, usr_cmdbuf,
+                                            sizeof(cmd_header));
+               usr_cmdbuf++;
+               i++;
+
+               /* Group drawing commands with same state to minimize
+                * iterations over clip rects. */
+               j = 0;
+               switch (cmd_header.cmd.cmd) {
+               case SAVAGE_CMD_DMA_IDX:
+               case SAVAGE_CMD_VB_IDX:
+                       j = (cmd_header.idx.count + 3) / 4;
+                       if (i + j > cmdbuf.size) {
+                               DRM_ERROR("indexed drawing command extends "
+                                         "beyond end of command buffer\n");
+                               DMA_FLUSH();
+                               return DRM_ERR(EINVAL);
+                       }
+                       /* fall through */
+               case SAVAGE_CMD_DMA_PRIM:
+               case SAVAGE_CMD_VB_PRIM:
+                       if (!first_draw_cmd)
+                               first_draw_cmd = usr_cmdbuf-1;
+                       usr_cmdbuf += j;
+                       i += j;
+                       break;
+               default:
+                       if (first_draw_cmd) {
+                               ret = savage_dispatch_draw (
+                                       dev_priv, first_draw_cmd, usr_cmdbuf-1,
+                                       dmabuf, usr_vtxbuf, cmdbuf.vb_size,
+                                       cmdbuf.vb_stride,
+                                       cmdbuf.nbox, usr_boxes);
+                               if (ret != 0)
+                                       return ret;
+                               first_draw_cmd = NULL;
+                       }
+               }
+               if (first_draw_cmd)
+                       continue;
+
+               switch (cmd_header.cmd.cmd) {
+               case SAVAGE_CMD_STATE:
+                       j = (cmd_header.state.count + 1) / 2;
+                       if (i + j > cmdbuf.size) {
+                               DRM_ERROR("command SAVAGE_CMD_STATE extends "
+                                         "beyond end of command buffer\n");
+                               DMA_FLUSH();
+                               return DRM_ERR(EINVAL);
+                       }
+                       ret = savage_dispatch_state(
+                               dev_priv, &cmd_header,
+                               (uint32_t __user *)usr_cmdbuf);
+                       usr_cmdbuf += j;
+                       i += j;
+                       break;
+               case SAVAGE_CMD_CLEAR:
+                       if (i + 1 > cmdbuf.size) {
+                               DRM_ERROR("command SAVAGE_CMD_CLEAR extends "
+                                         "beyond end of command buffer\n");
+                               DMA_FLUSH();
+                               return DRM_ERR(EINVAL);
+                       }
+                       ret = savage_dispatch_clear(dev_priv, &cmd_header,
+                                                   usr_cmdbuf,
+                                                   cmdbuf.nbox, usr_boxes);
+                       usr_cmdbuf++;
+                       i++;
+                       break;
+               case SAVAGE_CMD_SWAP:
+                       ret = savage_dispatch_swap(dev_priv,
+                                                  cmdbuf.nbox, usr_boxes);
+                       break;
+               default:
+                       DRM_ERROR("invalid command 0x%x\n", cmd_header.cmd.cmd);
+                       DMA_FLUSH();
+                       return DRM_ERR(EINVAL);
+               }
+
+               if (ret != 0) {
+                       DMA_FLUSH();
+                       return ret;
+               }
+       }
+
+       if (first_draw_cmd) {
+               ret = savage_dispatch_draw (
+                       dev_priv, first_draw_cmd, usr_cmdbuf, dmabuf,
+                       usr_vtxbuf, cmdbuf.vb_size, cmdbuf.vb_stride,
+                       cmdbuf.nbox, usr_boxes);
+               if (ret != 0) {
+                       DMA_FLUSH();
+                       return ret;
+               }
+       }
+
+       DMA_FLUSH();
+
+       if (dmabuf && cmdbuf.discard) {
+               drm_savage_buf_priv_t *buf_priv = dmabuf->dev_private;
+               uint16_t event;
+               event = savage_bci_emit_event(dev_priv, SAVAGE_WAIT_3D);
+               SET_AGE(&buf_priv->age, event, dev_priv->event_wrap);
+               savage_freelist_put(dev, dmabuf);
+       }
+
+       return 0;
+}