drm/nvc0-/gr: generate grctx template at init time, not first context ctor
authorBen Skeggs <bskeggs@redhat.com>
Sat, 4 Aug 2012 08:40:45 +0000 (18:40 +1000)
committerBen Skeggs <bskeggs@redhat.com>
Wed, 3 Oct 2012 03:12:56 +0000 (13:12 +1000)
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
drivers/gpu/drm/nouveau/core/engine/graph/ctxnvc0.c
drivers/gpu/drm/nouveau/core/engine/graph/ctxnve0.c
drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c
drivers/gpu/drm/nouveau/core/engine/graph/nvc0.h
drivers/gpu/drm/nouveau/core/engine/graph/nve0.c

index 57353015329f0fbf19207d1b803a123364e04a05..b19a406e55d98bbe9e0a83fdf5b4a9e501fcf62a 100644 (file)
@@ -35,6 +35,156 @@ nv_icmd(struct drm_device *priv, u32 icmd, u32 data)
        while (nv_rd32(priv, 0x400700) & 2) {}
 }
 
+int
+nvc0_grctx_init(struct drm_device *priv, struct nvc0_graph_priv *oprv,
+               struct nvc0_grctx *info)
+{
+       struct nouveau_gpuobj *chan;
+       u32 size = (0x80000 + oprv->size + 4095) & ~4095;
+       int ret, i;
+
+       /* allocate memory to for a "channel", which we'll use to generate
+        * the default context values
+        */
+       ret = nouveau_gpuobj_new(priv, NULL, size, 0x1000,
+                                NVOBJ_FLAG_ZERO_ALLOC, &info->chan);
+       chan = info->chan;
+       if (ret) {
+               NV_ERROR(priv, "failed to allocate channel memory, %d\n", ret);
+               return ret;
+       }
+
+       /* PGD pointer */
+       nv_wo32(chan, 0x0200, lower_32_bits(chan->addr + 0x1000));
+       nv_wo32(chan, 0x0204, upper_32_bits(chan->addr + 0x1000));
+       nv_wo32(chan, 0x0208, 0xffffffff);
+       nv_wo32(chan, 0x020c, 0x000000ff);
+
+       /* PGT[0] pointer */
+       nv_wo32(chan, 0x1000, 0x00000000);
+       nv_wo32(chan, 0x1004, 0x00000001 | (chan->addr + 0x2000) >> 8);
+
+       /* identity-map the whole "channel" into its own vm */
+       for (i = 0; i < size / 4096; i++) {
+               u64 addr = ((chan->addr + (i * 4096)) >> 8) | 1;
+               nv_wo32(chan, 0x2000 + (i * 8), lower_32_bits(addr));
+               nv_wo32(chan, 0x2004 + (i * 8), upper_32_bits(addr));
+       }
+
+       /* context pointer (virt) */
+       nv_wo32(chan, 0x0210, 0x00080004);
+       nv_wo32(chan, 0x0214, 0x00000000);
+
+       nvimem_flush(priv);
+
+       nv_wr32(priv, 0x100cb8, (chan->addr + 0x1000) >> 8);
+       nv_wr32(priv, 0x100cbc, 0x80000001);
+       nv_wait(priv, 0x100c80, 0x00008000, 0x00008000);
+
+       /* setup default state for mmio list construction */
+       info->dev  = priv;
+       info->data = oprv->mmio_data;
+       info->mmio = oprv->mmio_list;
+       info->addr = 0x2000 + (i * 8);
+       info->priv = oprv;
+       info->buffer_nr = 0;
+
+       if (oprv->firmware) {
+               nv_wr32(priv, 0x409840, 0x00000030);
+               nv_wr32(priv, 0x409500, 0x80000000 | chan->addr >> 12);
+               nv_wr32(priv, 0x409504, 0x00000003);
+               if (!nv_wait(priv, 0x409800, 0x00000010, 0x00000010))
+                       NV_ERROR(priv, "load_ctx timeout\n");
+
+               nv_wo32(chan, 0x8001c, 1);
+               nv_wo32(chan, 0x80020, 0);
+               nv_wo32(chan, 0x80028, 0);
+               nv_wo32(chan, 0x8002c, 0);
+               nvimem_flush(priv);
+               return 0;
+       }
+
+       /* HUB_FUC(SET_CHAN) */
+       nv_wr32(priv, 0x409840, 0x80000000);
+       nv_wr32(priv, 0x409500, 0x80000000 | chan->addr >> 12);
+       nv_wr32(priv, 0x409504, 0x00000001);
+       if (!nv_wait(priv, 0x409800, 0x80000000, 0x80000000)) {
+               NV_ERROR(priv, "HUB_SET_CHAN timeout\n");
+               nvc0_graph_ctxctl_debug(priv);
+               nouveau_gpuobj_ref(NULL, &info->chan);
+               return -EBUSY;
+       }
+
+       return 0;
+}
+
+void
+nvc0_grctx_data(struct nvc0_grctx *info, u32 size, u32 align, u32 access)
+{
+       info->buffer[info->buffer_nr]  = info->addr;
+       info->buffer[info->buffer_nr] +=  (align - 1);
+       info->buffer[info->buffer_nr] &= ~(align - 1);
+       info->addr = info->buffer[info->buffer_nr++] + size;
+
+       info->data->size = size;
+       info->data->align = align;
+       info->data->access = access;
+       info->data++;
+}
+
+void
+nvc0_grctx_mmio(struct nvc0_grctx *info, u32 addr, u32 data, u32 shift, u32 buf)
+{
+       info->mmio->addr = addr;
+       info->mmio->data = data;
+       info->mmio->shift = shift;
+       info->mmio->buffer = buf;
+       info->mmio++;
+
+       if (shift)
+               data |= info->buffer[buf] >> shift;
+       nv_wr32(info->dev, addr, data);
+}
+
+int
+nvc0_grctx_fini(struct nvc0_grctx *info)
+{
+       struct nvc0_graph_priv *priv = info->priv;
+       int i;
+
+       if (priv->firmware) {
+               nv_wr32(info->dev, 0x409840, 0x00000003);
+               nv_wr32(info->dev, 0x409500, 0x80000000 | info->chan->addr >> 12);
+               nv_wr32(info->dev, 0x409504, 0x00000009);
+               if (!nv_wait(info->dev, 0x409800, 0x00000001, 0x00000000)) {
+                       NV_ERROR(info->dev, "unload_ctx timeout\n");
+                       return -EBUSY;
+               }
+
+               goto save;
+       }
+
+       /* HUB_FUC(CTX_SAVE) */
+       nv_wr32(info->dev, 0x409840, 0x80000000);
+       nv_wr32(info->dev, 0x409500, 0x80000000 | info->chan->addr >> 12);
+       nv_wr32(info->dev, 0x409504, 0x00000002);
+       if (!nv_wait(info->dev, 0x409800, 0x80000000, 0x80000000)) {
+               NV_ERROR(info->dev, "HUB_CTX_SAVE timeout\n");
+               nvc0_graph_ctxctl_debug(info->dev);
+               return -EBUSY;
+       }
+
+save:
+       priv->data = kmalloc(priv->size, GFP_KERNEL);
+       if (priv->data) {
+               for (i = 0; i < priv->size; i += 4)
+                       priv->data[i / 4] = nv_ro32(info->chan, 0x80000 + i);
+       }
+
+       nouveau_gpuobj_ref(NULL, &info->chan);
+       return priv->data ? 0 : -ENOMEM;
+}
+
 static void
 nvc0_grctx_generate_9097(struct drm_device *priv)
 {
@@ -1779,16 +1929,19 @@ nvc0_grctx_generate_tp(struct drm_device *priv)
 }
 
 int
-nvc0_grctx_generate(struct nouveau_channel *chan)
+nvc0_grctx_generate(struct drm_device *priv)
 {
-       struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
-       struct nvc0_graph_priv *oprv = nv_engine(chan->dev, NVOBJ_ENGINE_GR);
-       struct nvc0_graph_chan *grch = chan->engctx[NVOBJ_ENGINE_GR];
-       struct drm_device *priv = chan->dev;
-       int i, gpc, tp, id;
+       struct drm_nouveau_private *dev_priv = priv->dev_private;
+       struct nvc0_graph_priv *oprv = nv_engine(priv, NVOBJ_ENGINE_GR);
+       struct nvc0_grctx info;
+       int ret, i, gpc, tpc, id;
        u32 fermi = nvc0_graph_class(priv);
        u32 r000260, tmp;
 
+       ret = nvc0_grctx_init(priv, oprv, &info);
+       if (ret)
+               return ret;
+
        r000260 = nv_rd32(priv, 0x000260);
        nv_wr32(priv, 0x000260, r000260 & ~1);
        nv_wr32(priv, 0x400208, 0x00000000);
@@ -1808,19 +1961,55 @@ nvc0_grctx_generate(struct nouveau_channel *chan)
 
        nv_wr32(priv, 0x404154, 0x00000000);
 
-       /* fuc "mmio list" writes */
-       for (i = 0; i < grch->mmio_nr * 8; i += 8) {
-               u32 reg = nv_ro32(grch->mmio, i + 0);
-               nv_wr32(priv, reg, nv_ro32(grch->mmio, i + 4));
+       /* generate per-context mmio list data */
+       mmio_data(0x002000, 0x0100, NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS);
+       mmio_data(0x008000, 0x0100, NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS);
+       mmio_data(0x060000, 0x1000, NV_MEM_ACCESS_RW);
+       mmio_list(0x408004, 0x00000000,  8, 0);
+       mmio_list(0x408008, 0x80000018,  0, 0);
+       mmio_list(0x40800c, 0x00000000,  8, 1);
+       mmio_list(0x408010, 0x80000000,  0, 0);
+       mmio_list(0x418810, 0x80000000, 12, 2);
+       mmio_list(0x419848, 0x10000000, 12, 2);
+       mmio_list(0x419004, 0x00000000,  8, 1);
+       mmio_list(0x419008, 0x00000000,  0, 0);
+       mmio_list(0x418808, 0x00000000,  8, 0);
+       mmio_list(0x41880c, 0x80000018,  0, 0);
+       if (dev_priv->chipset != 0xc1) {
+               tmp = 0x02180000;
+               mmio_list(0x405830, tmp, 0, 0);
+               for (gpc = 0; gpc < oprv->gpc_nr; gpc++) {
+                       for (tpc = 0; tpc < oprv->tpc_nr[gpc]; tpc++) {
+                               u32 reg = TPC_UNIT(gpc, tpc, 0x0520);
+                               mmio_list(reg, tmp, 0, 0);
+                               tmp += 0x0324;
+                       }
+               }
+       } else {
+               tmp = 0x02180000;
+               mmio_list(0x405830, 0x00000218 | tmp, 0, 0);
+               mmio_list(0x4064c4, 0x0086ffff, 0, 0);
+               for (gpc = 0; gpc < oprv->gpc_nr; gpc++) {
+                       for (tpc = 0; tpc < oprv->tpc_nr[gpc]; tpc++) {
+                               u32 reg = TPC_UNIT(gpc, tpc, 0x0520);
+                               mmio_list(reg, 0x10000000 | tmp, 0, 0);
+                               tmp += 0x0324;
+                       }
+                       for (tpc = 0; tpc < oprv->tpc_nr[gpc]; tpc++) {
+                               u32 reg = TPC_UNIT(gpc, tpc, 0x0544);
+                               mmio_list(reg, tmp, 0, 0);
+                               tmp += 0x0324;
+                       }
+               }
        }
 
-       for (tp = 0, id = 0; tp < 4; tp++) {
+       for (tpc = 0, id = 0; tpc < 4; tpc++) {
                for (gpc = 0; gpc < oprv->gpc_nr; gpc++) {
-                       if (tp < oprv->tpc_nr[gpc]) {
-                               nv_wr32(priv, TPC_UNIT(gpc, tp, 0x698), id);
-                               nv_wr32(priv, TPC_UNIT(gpc, tp, 0x4e8), id);
-                               nv_wr32(priv, GPC_UNIT(gpc, 0x0c10 + tp * 4), id);
-                               nv_wr32(priv, TPC_UNIT(gpc, tp, 0x088), id);
+                       if (tpc < oprv->tpc_nr[gpc]) {
+                               nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x698), id);
+                               nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x4e8), id);
+                               nv_wr32(priv, GPC_UNIT(gpc, 0x0c10 + tpc * 4), id);
+                               nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x088), id);
                                id++;
                        }
 
@@ -1843,18 +2032,18 @@ nvc0_grctx_generate(struct nouveau_channel *chan)
        nv_wr32(priv, 0x40587c, 0x00000000);
 
        if (1) {
-               u8 tpnr[GPC_MAX], data[TPC_MAX];
+               u8 tpcnr[GPC_MAX], data[TPC_MAX];
 
-               memcpy(tpnr, oprv->tpc_nr, sizeof(oprv->tpc_nr));
+               memcpy(tpcnr, oprv->tpc_nr, sizeof(oprv->tpc_nr));
                memset(data, 0x1f, sizeof(data));
 
                gpc = -1;
-               for (tp = 0; tp < oprv->tpc_total; tp++) {
+               for (tpc = 0; tpc < oprv->tpc_total; tpc++) {
                        do {
                                gpc = (gpc + 1) % oprv->gpc_nr;
-                       } while (!tpnr[gpc]);
-                       tpnr[gpc]--;
-                       data[tp] = gpc;
+                       } while (!tpcnr[gpc]);
+                       tpcnr[gpc]--;
+                       data[tpc] = gpc;
                }
 
                for (i = 0; i < 4; i++)
@@ -1863,24 +2052,24 @@ nvc0_grctx_generate(struct nouveau_channel *chan)
 
        if (1) {
                u32 data[6] = {}, data2[2] = {};
-               u8 tpnr[GPC_MAX];
+               u8 tpcnr[GPC_MAX];
                u8 shift, ntpcv;
 
                /* calculate first set of magics */
-               memcpy(tpnr, oprv->tpc_nr, sizeof(oprv->tpc_nr));
+               memcpy(tpcnr, oprv->tpc_nr, sizeof(oprv->tpc_nr));
 
                gpc = -1;
-               for (tp = 0; tp < oprv->tpc_total; tp++) {
+               for (tpc = 0; tpc < oprv->tpc_total; tpc++) {
                        do {
                                gpc = (gpc + 1) % oprv->gpc_nr;
-                       } while (!tpnr[gpc]);
-                       tpnr[gpc]--;
+                       } while (!tpcnr[gpc]);
+                       tpcnr[gpc]--;
 
-                       data[tp / 6] |= gpc << ((tp % 6) * 5);
+                       data[tpc / 6] |= gpc << ((tpc % 6) * 5);
                }
 
-               for (; tp < 32; tp++)
-                       data[tp / 6] |= 7 << ((tp % 6) * 5);
+               for (; tpc < 32; tpc++)
+                       data[tpc / 6] |= 7 << ((tpc % 6) * 5);
 
                /* and the second... */
                shift = 0;
@@ -1918,12 +2107,12 @@ nvc0_grctx_generate(struct nouveau_channel *chan)
        }
 
        if (1) {
-               u32 tp_mask = 0, tp_set = 0;
-               u8  tpnr[GPC_MAX], a, b;
+               u32 tpc_mask = 0, tpc_set = 0;
+               u8  tpcnr[GPC_MAX], a, b;
 
-               memcpy(tpnr, oprv->tpc_nr, sizeof(oprv->tpc_nr));
+               memcpy(tpcnr, oprv->tpc_nr, sizeof(oprv->tpc_nr));
                for (gpc = 0; gpc < oprv->gpc_nr; gpc++)
-                       tp_mask |= ((1 << oprv->tpc_nr[gpc]) - 1) << (gpc * 8);
+                       tpc_mask |= ((1 << oprv->tpc_nr[gpc]) - 1) << (gpc * 8);
 
                for (i = 0, gpc = -1, b = -1; i < 32; i++) {
                        a = (i * (oprv->tpc_total - 1)) / 32;
@@ -1931,14 +2120,14 @@ nvc0_grctx_generate(struct nouveau_channel *chan)
                                b = a;
                                do {
                                        gpc = (gpc + 1) % oprv->gpc_nr;
-                               } while (!tpnr[gpc]);
-                               tp = oprv->tpc_nr[gpc] - tpnr[gpc]--;
+                               } while (!tpcnr[gpc]);
+                               tpc = oprv->tpc_nr[gpc] - tpcnr[gpc]--;
 
-                               tp_set |= 1 << ((gpc * 8) + tp);
+                               tpc_set |= 1 << ((gpc * 8) + tpc);
                        }
 
-                       nv_wr32(priv, 0x406800 + (i * 0x20), tp_set);
-                       nv_wr32(priv, 0x406c00 + (i * 0x20), tp_set ^ tp_mask);
+                       nv_wr32(priv, 0x406800 + (i * 0x20), tpc_set);
+                       nv_wr32(priv, 0x406c00 + (i * 0x20), tpc_set ^ tpc_mask);
                }
        }
 
@@ -2867,5 +3056,5 @@ nvc0_grctx_generate(struct nouveau_channel *chan)
        nvc0_grctx_generate_90c0(priv);
 
        nv_wr32(priv, 0x000260, r000260);
-       return 0;
+       return nvc0_grctx_fini(&info);
 }
index 47bea8a7b2bc784ee508585149906d98f8f11406..e5503170d68c7100de320605c85a5bb45a5be25e 100644 (file)
@@ -2604,16 +2604,20 @@ nve0_graph_generate_tpcunk(struct drm_device *priv)
 }
 
 int
-nve0_grctx_generate(struct nouveau_channel *chan)
+nve0_grctx_generate(struct drm_device *priv)
 {
-       struct nvc0_graph_priv *oprv = nv_engine(chan->dev, NVOBJ_ENGINE_GR);
-       struct nvc0_graph_chan *grch = chan->engctx[NVOBJ_ENGINE_GR];
-       struct drm_device *priv = chan->dev;
+       struct nvc0_graph_priv *oprv = nv_engine(priv, NVOBJ_ENGINE_GR);
+       struct nvc0_grctx info;
+       int ret, i, gpc, tpc, id;
        u32 data[6] = {}, data2[2] = {}, tmp;
        u32 tpc_set = 0, tpc_mask = 0;
+       u32 magic[GPC_MAX][2], offset;
        u8 tpcnr[GPC_MAX], a, b;
        u8 shift, ntpcv;
-       int i, gpc, tpc, id;
+
+       ret = nvc0_grctx_init(priv, oprv, &info);
+       if (ret)
+               return ret;
 
        nv_mask(priv, 0x000260, 0x00000001, 0x00000000);
        nv_wr32(priv, 0x400204, 0x00000000);
@@ -2636,11 +2640,37 @@ nve0_grctx_generate(struct nouveau_channel *chan)
 
        nv_wr32(priv, 0x404154, 0x0);
 
-       for (i = 0; i < grch->mmio_nr * 8; i += 8) {
-               u32 reg = nv_ro32(grch->mmio, i + 0);
-               u32 val = nv_ro32(grch->mmio, i + 4);
-               nv_wr32(priv, reg, val);
+       mmio_data(0x003000, 0x0100, NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS);
+       mmio_data(0x008000, 0x0100, NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS);
+       mmio_data(0x060000, 0x1000, NV_MEM_ACCESS_RW);
+       mmio_list(0x40800c, 0x00000000,  8, 1);
+       mmio_list(0x408010, 0x80000000,  0, 0);
+       mmio_list(0x419004, 0x00000000,  8, 1);
+       mmio_list(0x419008, 0x00000000,  0, 0);
+       mmio_list(0x4064cc, 0x80000000,  0, 0);
+       mmio_list(0x408004, 0x00000000,  8, 0);
+       mmio_list(0x408008, 0x80000030,  0, 0);
+       mmio_list(0x418808, 0x00000000,  8, 0);
+       mmio_list(0x41880c, 0x80000030,  0, 0);
+       mmio_list(0x4064c8, 0x01800600,  0, 0);
+       mmio_list(0x418810, 0x80000000, 12, 2);
+       mmio_list(0x419848, 0x10000000, 12, 2);
+       mmio_list(0x405830, 0x02180648,  0, 0);
+       mmio_list(0x4064c4, 0x0192ffff,  0, 0);
+       for (gpc = 0, offset = 0; gpc < oprv->gpc_nr; gpc++) {
+               u16 magic0 = 0x0218 * oprv->tpc_nr[gpc];
+               u16 magic1 = 0x0648 * oprv->tpc_nr[gpc];
+               magic[gpc][0]  = 0x10000000 | (magic0 << 16) | offset;
+               magic[gpc][1]  = 0x00000000 | (magic1 << 16);
+               offset += 0x0324 * oprv->tpc_nr[gpc];
+       }
+       for (gpc = 0; gpc < oprv->gpc_nr; gpc++) {
+               mmio_list(GPC_UNIT(gpc, 0x30c0), magic[gpc][0], 0, 0);
+               mmio_list(GPC_UNIT(gpc, 0x30e4), magic[gpc][1] | offset, 0, 0);
+               offset += 0x07ff * oprv->tpc_nr[gpc];
        }
+       mmio_list(0x17e91c, 0x06060609, 0, 0);
+       mmio_list(0x17e920, 0x00090a05, 0, 0);
 
        nv_wr32(priv, 0x418c6c, 0x1);
        nv_wr32(priv, 0x41980c, 0x10);
@@ -2758,5 +2788,5 @@ nve0_grctx_generate(struct nouveau_channel *chan)
        nv_mask(priv, 0x000260, 0x00000001, 0x00000001);
        nv_wr32(priv, 0x418800, 0x7026860a); //XXX
        nv_wr32(priv, 0x41be10, 0x00bb8bc7); //XXX
-       return 0;
+       return nvc0_grctx_fini(&info);
 }
index 158d57619e5bf8b55e2b3cc94112d20aa5cf1a79..f994d2f7e8d55d0caae319f2309428a45608946e 100644 (file)
@@ -48,7 +48,7 @@ nvc0_graph_ctxctl_debug_unit(struct drm_device *dev, u32 base)
                nv_rd32(dev, base + 0x818), nv_rd32(dev, base + 0x81c));
 }
 
-static void
+void
 nvc0_graph_ctxctl_debug(struct drm_device *dev)
 {
        u32 gpcnr = nv_rd32(dev, 0x409604) & 0xffff;
@@ -59,259 +59,92 @@ nvc0_graph_ctxctl_debug(struct drm_device *dev)
                nvc0_graph_ctxctl_debug_unit(dev, 0x502000 + (gpc * 0x8000));
 }
 
-static int
-nvc0_graph_load_context(struct nouveau_channel *chan)
-{
-       struct drm_device *dev = chan->dev;
-
-       nv_wr32(dev, 0x409840, 0x00000030);
-       nv_wr32(dev, 0x409500, 0x80000000 | chan->ramin->addr >> 12);
-       nv_wr32(dev, 0x409504, 0x00000003);
-       if (!nv_wait(dev, 0x409800, 0x00000010, 0x00000010))
-               NV_ERROR(dev, "PGRAPH: load_ctx timeout\n");
-
-       return 0;
-}
-
-static int
-nvc0_graph_unload_context_to(struct drm_device *dev, u64 chan)
-{
-       nv_wr32(dev, 0x409840, 0x00000003);
-       nv_wr32(dev, 0x409500, 0x80000000 | chan >> 12);
-       nv_wr32(dev, 0x409504, 0x00000009);
-       if (!nv_wait(dev, 0x409800, 0x00000001, 0x00000000)) {
-               NV_ERROR(dev, "PGRAPH: unload_ctx timeout\n");
-               return -EBUSY;
-       }
-
-       return 0;
-}
-
-static int
-nvc0_graph_construct_context(struct nouveau_channel *chan)
+int
+nvc0_graph_context_new(struct nouveau_channel *chan, int engine)
 {
-       struct nvc0_graph_priv *priv = nv_engine(chan->dev, NVOBJ_ENGINE_GR);
-       struct nvc0_graph_chan *grch = chan->engctx[NVOBJ_ENGINE_GR];
        struct drm_device *dev = chan->dev;
+       struct nvc0_graph_priv *priv = nv_engine(dev, engine);
+       struct nvc0_graph_data *data = priv->mmio_data;
+       struct nvc0_graph_mmio *mmio = priv->mmio_list;
+       struct nvc0_graph_chan *grch;
+       struct nouveau_gpuobj *grctx;
        int ret, i;
-       u32 *ctx;
 
-       ctx = kmalloc(priv->grctx_size, GFP_KERNEL);
-       if (!ctx)
+       grch = kzalloc(sizeof(*grch), GFP_KERNEL);
+       if (!grch)
                return -ENOMEM;
+       chan->engctx[NVOBJ_ENGINE_GR] = grch;
 
-       if (!nouveau_ctxfw) {
-               nv_wr32(dev, 0x409840, 0x80000000);
-               nv_wr32(dev, 0x409500, 0x80000000 | chan->ramin->addr >> 12);
-               nv_wr32(dev, 0x409504, 0x00000001);
-               if (!nv_wait(dev, 0x409800, 0x80000000, 0x80000000)) {
-                       NV_ERROR(dev, "PGRAPH: HUB_SET_CHAN timeout\n");
-                       nvc0_graph_ctxctl_debug(dev);
-                       ret = -EBUSY;
-                       goto err;
-               }
-       } else {
-               nvc0_graph_load_context(chan);
-
-               nv_wo32(grch->grctx, 0x1c, 1);
-               nv_wo32(grch->grctx, 0x20, 0);
-               nv_wo32(grch->grctx, 0x28, 0);
-               nv_wo32(grch->grctx, 0x2c, 0);
-               nvimem_flush(dev);
-       }
-
-       ret = nvc0_grctx_generate(chan);
+       ret = nouveau_gpuobj_new(dev, NULL, priv->size, 256, 0, &grch->grctx);
        if (ret)
-               goto err;
-
-       if (!nouveau_ctxfw) {
-               nv_wr32(dev, 0x409840, 0x80000000);
-               nv_wr32(dev, 0x409500, 0x80000000 | chan->ramin->addr >> 12);
-               nv_wr32(dev, 0x409504, 0x00000002);
-               if (!nv_wait(dev, 0x409800, 0x80000000, 0x80000000)) {
-                       NV_ERROR(dev, "PGRAPH: HUB_CTX_SAVE timeout\n");
-                       nvc0_graph_ctxctl_debug(dev);
-                       ret = -EBUSY;
-                       goto err;
-               }
-       } else {
-               ret = nvc0_graph_unload_context_to(dev, chan->ramin->addr);
-               if (ret)
-                       goto err;
-       }
-
-       for (i = 0; i < priv->grctx_size; i += 4)
-               ctx[i / 4] = nv_ro32(grch->grctx, i);
-
-       priv->grctx_vals = ctx;
-       return 0;
-
-err:
-       kfree(ctx);
-       return ret;
-}
-
-static int
-nvc0_graph_create_context_mmio_list(struct nouveau_channel *chan)
-{
-       struct nvc0_graph_priv *priv = nv_engine(chan->dev, NVOBJ_ENGINE_GR);
-       struct nvc0_graph_chan *grch = chan->engctx[NVOBJ_ENGINE_GR];
-       struct drm_device *dev = chan->dev;
-       struct drm_nouveau_private *dev_priv = dev->dev_private;
-       int i = 0, gpc, tp, ret;
+               goto error;
 
-       ret = nouveau_gpuobj_new(dev, NULL, 0x2000, 256, 0, &grch->unk408004);
+       ret = nouveau_gpuobj_map_vm(grch->grctx, chan->vm, NV_MEM_ACCESS_RW |
+                                   NV_MEM_ACCESS_SYS, &grch->grctx_vma);
        if (ret)
                return ret;
 
-       ret = nouveau_gpuobj_map_vm(grch->unk408004, chan->vm,
-                                   NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS,
-                                   &grch->unk408004_vma);
-       if (ret)
-               return ret;
+       grctx = grch->grctx;
 
-       ret = nouveau_gpuobj_new(dev, NULL, 0x8000, 256, 0, &grch->unk40800c);
+       /* allocate memory for a "mmio list" buffer that's used by the HUB
+        * fuc to modify some per-context register settings on first load
+        * of the context.
+        */
+       ret = nouveau_gpuobj_new(dev, NULL, 0x1000, 0x100, 0, &grch->mmio);
        if (ret)
                return ret;
 
-       ret = nouveau_gpuobj_map_vm(grch->unk40800c, chan->vm,
+       ret = nouveau_gpuobj_map_vm(grch->mmio, chan->vm,
                                    NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS,
-                                   &grch->unk40800c_vma);
+                                   &grch->mmio_vma);
        if (ret)
                return ret;
 
-       ret = nouveau_gpuobj_new(dev, NULL, 384 * 1024, 4096, 0,
-                                &grch->unk418810);
-       if (ret)
-               return ret;
-
-       ret = nouveau_gpuobj_map_vm(grch->unk418810, chan->vm,
-                                   NV_MEM_ACCESS_RW, &grch->unk418810_vma);
-       if (ret)
-               return ret;
-
-       ret = nouveau_gpuobj_new(dev, NULL, 0x1000, 0, 0, &grch->mmio);
-       if (ret)
-               return ret;
+       /* allocate buffers referenced by mmio list */
+       for (i = 0; data->size && i < ARRAY_SIZE(priv->mmio_data); i++) {
+               ret = nouveau_gpuobj_new(dev, NULL, data->size, data->align,
+                                        0, &grch->data[i].mem);
+               if (ret)
+                       return ret;
 
-       ret = nouveau_gpuobj_map_vm(grch->mmio, chan->vm, NV_MEM_ACCESS_RW |
-                                   NV_MEM_ACCESS_SYS, &grch->mmio_vma);
-       if (ret)
-               return ret;
+               ret = nouveau_gpuobj_map_vm(grch->data[i].mem, chan->vm,
+                                           data->access,
+                                          &grch->data[i].vma);
+               if (ret)
+                       return ret;
 
-       nv_wo32(grch->mmio, i++ * 4, 0x00408004);
-       nv_wo32(grch->mmio, i++ * 4, grch->unk408004_vma.offset >> 8);
-       nv_wo32(grch->mmio, i++ * 4, 0x00408008);
-       nv_wo32(grch->mmio, i++ * 4, 0x80000018);
-
-       nv_wo32(grch->mmio, i++ * 4, 0x0040800c);
-       nv_wo32(grch->mmio, i++ * 4, grch->unk40800c_vma.offset >> 8);
-       nv_wo32(grch->mmio, i++ * 4, 0x00408010);
-       nv_wo32(grch->mmio, i++ * 4, 0x80000000);
-
-       nv_wo32(grch->mmio, i++ * 4, 0x00418810);
-       nv_wo32(grch->mmio, i++ * 4, 0x80000000 | grch->unk418810_vma.offset >> 12);
-       nv_wo32(grch->mmio, i++ * 4, 0x00419848);
-       nv_wo32(grch->mmio, i++ * 4, 0x10000000 | grch->unk418810_vma.offset >> 12);
-
-       nv_wo32(grch->mmio, i++ * 4, 0x00419004);
-       nv_wo32(grch->mmio, i++ * 4, grch->unk40800c_vma.offset >> 8);
-       nv_wo32(grch->mmio, i++ * 4, 0x00419008);
-       nv_wo32(grch->mmio, i++ * 4, 0x00000000);
-
-       nv_wo32(grch->mmio, i++ * 4, 0x00418808);
-       nv_wo32(grch->mmio, i++ * 4, grch->unk408004_vma.offset >> 8);
-       nv_wo32(grch->mmio, i++ * 4, 0x0041880c);
-       nv_wo32(grch->mmio, i++ * 4, 0x80000018);
-
-       if (dev_priv->chipset != 0xc1) {
-               u32 magic = 0x02180000;
-               nv_wo32(grch->mmio, i++ * 4, 0x00405830);
-               nv_wo32(grch->mmio, i++ * 4, magic);
-               for (gpc = 0; gpc < priv->gpc_nr; gpc++) {
-                       for (tp = 0; tp < priv->tpc_nr[gpc]; tp++) {
-                               u32 reg = TPC_UNIT(gpc, tp, 0x520);
-                               nv_wo32(grch->mmio, i++ * 4, reg);
-                               nv_wo32(grch->mmio, i++ * 4, magic);
-                               magic += 0x0324;
-                       }
-               }
-       } else {
-               u32 magic = 0x02180000;
-               nv_wo32(grch->mmio, i++ * 4, 0x00405830);
-               nv_wo32(grch->mmio, i++ * 4, magic | 0x0000218);
-               nv_wo32(grch->mmio, i++ * 4, 0x004064c4);
-               nv_wo32(grch->mmio, i++ * 4, 0x0086ffff);
-               for (gpc = 0; gpc < priv->gpc_nr; gpc++) {
-                       for (tp = 0; tp < priv->tpc_nr[gpc]; tp++) {
-                               u32 reg = TPC_UNIT(gpc, tp, 0x520);
-                               nv_wo32(grch->mmio, i++ * 4, reg);
-                               nv_wo32(grch->mmio, i++ * 4, (1 << 28) | magic);
-                               magic += 0x0324;
-                       }
-                       for (tp = 0; tp < priv->tpc_nr[gpc]; tp++) {
-                               u32 reg = TPC_UNIT(gpc, tp, 0x544);
-                               nv_wo32(grch->mmio, i++ * 4, reg);
-                               nv_wo32(grch->mmio, i++ * 4, magic);
-                               magic += 0x0324;
-                       }
-               }
+               data++;
        }
 
-       grch->mmio_nr = i / 2;
-       return 0;
-}
-
-static int
-nvc0_graph_context_new(struct nouveau_channel *chan, int engine)
-{
-       struct drm_device *dev = chan->dev;
-       struct nvc0_graph_priv *priv = nv_engine(dev, engine);
-       struct nvc0_graph_chan *grch;
-       struct nouveau_gpuobj *grctx;
-       int ret, i;
-
-       grch = kzalloc(sizeof(*grch), GFP_KERNEL);
-       if (!grch)
-               return -ENOMEM;
-       chan->engctx[NVOBJ_ENGINE_GR] = grch;
+       /* finally, fill in the mmio list and point the context at it */
+       for (i = 0; mmio->addr && i < ARRAY_SIZE(priv->mmio_list); i++) {
+               u32 addr = mmio->addr;
+               u32 data = mmio->data;
 
-       ret = nouveau_gpuobj_new(dev, NULL, priv->grctx_size, 256, 0,
-                                &grch->grctx);
-       if (ret)
-               goto error;
+               if (mmio->shift) {
+                       u64 info = grch->data[mmio->buffer].vma.offset;
+                       data |= info >> mmio->shift;
+               }
 
-       ret = nouveau_gpuobj_map_vm(grch->grctx, chan->vm, NV_MEM_ACCESS_RW |
-                                   NV_MEM_ACCESS_SYS, &grch->grctx_vma);
-       if (ret)
-               return ret;
+               nv_wo32(grch->mmio, grch->mmio_nr++ * 4, addr);
+               nv_wo32(grch->mmio, grch->mmio_nr++ * 4, data);
+               mmio++;
+       }
 
-       grctx = grch->grctx;
-
-       ret = nvc0_graph_create_context_mmio_list(chan);
-       if (ret)
-               goto error;
+       for (i = 0; i < priv->size; i += 4)
+               nv_wo32(grch->grctx, i, priv->data[i / 4]);
 
        nv_wo32(chan->ramin, 0x0210, lower_32_bits(grch->grctx_vma.offset) | 4);
        nv_wo32(chan->ramin, 0x0214, upper_32_bits(grch->grctx_vma.offset));
        nvimem_flush(dev);
 
-       if (!priv->grctx_vals) {
-               ret = nvc0_graph_construct_context(chan);
-               if (ret)
-                       goto error;
-       }
-
-       for (i = 0; i < priv->grctx_size; i += 4)
-               nv_wo32(grctx, i, priv->grctx_vals[i / 4]);
-
-       if (!nouveau_ctxfw) {
-               nv_wo32(grctx, 0x00, grch->mmio_nr);
+       if (!priv->firmware) {
+               nv_wo32(grctx, 0x00, grch->mmio_nr / 2);
                nv_wo32(grctx, 0x04, grch->mmio_vma.offset >> 8);
        } else {
                nv_wo32(grctx, 0xf4, 0);
                nv_wo32(grctx, 0xf8, 0);
-               nv_wo32(grctx, 0x10, grch->mmio_nr);
+               nv_wo32(grctx, 0x10, grch->mmio_nr / 2);
                nv_wo32(grctx, 0x14, lower_32_bits(grch->mmio_vma.offset));
                nv_wo32(grctx, 0x18, upper_32_bits(grch->mmio_vma.offset));
                nv_wo32(grctx, 0x1c, 1);
@@ -327,20 +160,21 @@ error:
        return ret;
 }
 
-static void
+void
 nvc0_graph_context_del(struct nouveau_channel *chan, int engine)
 {
        struct nvc0_graph_chan *grch = chan->engctx[engine];
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(grch->data); i++) {
+               nouveau_gpuobj_unmap(&grch->data[i].vma);
+               nouveau_gpuobj_ref(NULL, &grch->data[i].mem);
+       }
 
        nouveau_gpuobj_unmap(&grch->mmio_vma);
-       nouveau_gpuobj_unmap(&grch->unk418810_vma);
-       nouveau_gpuobj_unmap(&grch->unk40800c_vma);
-       nouveau_gpuobj_unmap(&grch->unk408004_vma);
-       nouveau_gpuobj_unmap(&grch->grctx_vma);
        nouveau_gpuobj_ref(NULL, &grch->mmio);
-       nouveau_gpuobj_ref(NULL, &grch->unk418810);
-       nouveau_gpuobj_ref(NULL, &grch->unk40800c);
-       nouveau_gpuobj_ref(NULL, &grch->unk408004);
+
+       nouveau_gpuobj_unmap(&grch->grctx_vma);
        nouveau_gpuobj_ref(NULL, &grch->grctx);
        chan->engctx[engine] = NULL;
 }
@@ -517,89 +351,102 @@ nvc0_graph_init_ctxctl(struct drm_device *dev)
        u32 r000260;
        int i;
 
-       if (!nouveau_ctxfw) {
-               /* load HUB microcode */
+       if (priv->firmware) {
+               /* load fuc microcode */
                r000260 = nv_mask(dev, 0x000260, 0x00000001, 0x00000000);
-               nv_wr32(dev, 0x4091c0, 0x01000000);
-               for (i = 0; i < sizeof(nvc0_grhub_data) / 4; i++)
-                       nv_wr32(dev, 0x4091c4, nvc0_grhub_data[i]);
-
-               nv_wr32(dev, 0x409180, 0x01000000);
-               for (i = 0; i < sizeof(nvc0_grhub_code) / 4; i++) {
-                       if ((i & 0x3f) == 0)
-                               nv_wr32(dev, 0x409188, i >> 6);
-                       nv_wr32(dev, 0x409184, nvc0_grhub_code[i]);
-               }
-
-               /* load GPC microcode */
-               nv_wr32(dev, 0x41a1c0, 0x01000000);
-               for (i = 0; i < sizeof(nvc0_grgpc_data) / 4; i++)
-                       nv_wr32(dev, 0x41a1c4, nvc0_grgpc_data[i]);
-
-               nv_wr32(dev, 0x41a180, 0x01000000);
-               for (i = 0; i < sizeof(nvc0_grgpc_code) / 4; i++) {
-                       if ((i & 0x3f) == 0)
-                               nv_wr32(dev, 0x41a188, i >> 6);
-                       nv_wr32(dev, 0x41a184, nvc0_grgpc_code[i]);
-               }
+               nvc0_graph_init_fuc(dev, 0x409000, &priv->fuc409c,
+                                                  &priv->fuc409d);
+               nvc0_graph_init_fuc(dev, 0x41a000, &priv->fuc41ac,
+                                                  &priv->fuc41ad);
                nv_wr32(dev, 0x000260, r000260);
 
-               /* start HUB ucode running, it'll init the GPCs */
-               nv_wr32(dev, 0x409800, dev_priv->chipset);
+               /* start both of them running */
+               nv_wr32(dev, 0x409840, 0xffffffff);
+               nv_wr32(dev, 0x41a10c, 0x00000000);
                nv_wr32(dev, 0x40910c, 0x00000000);
+               nv_wr32(dev, 0x41a100, 0x00000002);
                nv_wr32(dev, 0x409100, 0x00000002);
-               if (!nv_wait(dev, 0x409800, 0x80000000, 0x80000000)) {
-                       NV_ERROR(dev, "PGRAPH: HUB_INIT timed out\n");
-                       nvc0_graph_ctxctl_debug(dev);
+               if (!nv_wait(dev, 0x409800, 0x00000001, 0x00000001))
+                       NV_INFO(dev, "0x409800 wait failed\n");
+
+               nv_wr32(dev, 0x409840, 0xffffffff);
+               nv_wr32(dev, 0x409500, 0x7fffffff);
+               nv_wr32(dev, 0x409504, 0x00000021);
+
+               nv_wr32(dev, 0x409840, 0xffffffff);
+               nv_wr32(dev, 0x409500, 0x00000000);
+               nv_wr32(dev, 0x409504, 0x00000010);
+               if (!nv_wait_ne(dev, 0x409800, 0xffffffff, 0x00000000)) {
+                       NV_ERROR(dev, "fuc09 req 0x10 timeout\n");
                        return -EBUSY;
                }
+               priv->size = nv_rd32(dev, 0x409800);
 
-               priv->grctx_size = nv_rd32(dev, 0x409804);
-               return 0;
+               nv_wr32(dev, 0x409840, 0xffffffff);
+               nv_wr32(dev, 0x409500, 0x00000000);
+               nv_wr32(dev, 0x409504, 0x00000016);
+               if (!nv_wait_ne(dev, 0x409800, 0xffffffff, 0x00000000)) {
+                       NV_ERROR(dev, "fuc09 req 0x16 timeout\n");
+                       return -EBUSY;
+               }
+
+               nv_wr32(dev, 0x409840, 0xffffffff);
+               nv_wr32(dev, 0x409500, 0x00000000);
+               nv_wr32(dev, 0x409504, 0x00000025);
+               if (!nv_wait_ne(dev, 0x409800, 0xffffffff, 0x00000000)) {
+                       NV_ERROR(dev, "fuc09 req 0x25 timeout\n");
+                       return -EBUSY;
+               }
+
+               goto done;
        }
 
-       /* load fuc microcode */
+       /* load HUB microcode */
        r000260 = nv_mask(dev, 0x000260, 0x00000001, 0x00000000);
-       nvc0_graph_init_fuc(dev, 0x409000, &priv->fuc409c, &priv->fuc409d);
-       nvc0_graph_init_fuc(dev, 0x41a000, &priv->fuc41ac, &priv->fuc41ad);
+       nv_wr32(dev, 0x4091c0, 0x01000000);
+       for (i = 0; i < sizeof(nvc0_grhub_data) / 4; i++)
+               nv_wr32(dev, 0x4091c4, nvc0_grhub_data[i]);
+
+       nv_wr32(dev, 0x409180, 0x01000000);
+       for (i = 0; i < sizeof(nvc0_grhub_code) / 4; i++) {
+               if ((i & 0x3f) == 0)
+                       nv_wr32(dev, 0x409188, i >> 6);
+               nv_wr32(dev, 0x409184, nvc0_grhub_code[i]);
+       }
+
+       /* load GPC microcode */
+       nv_wr32(dev, 0x41a1c0, 0x01000000);
+       for (i = 0; i < sizeof(nvc0_grgpc_data) / 4; i++)
+               nv_wr32(dev, 0x41a1c4, nvc0_grgpc_data[i]);
+
+       nv_wr32(dev, 0x41a180, 0x01000000);
+       for (i = 0; i < sizeof(nvc0_grgpc_code) / 4; i++) {
+               if ((i & 0x3f) == 0)
+                       nv_wr32(dev, 0x41a188, i >> 6);
+               nv_wr32(dev, 0x41a184, nvc0_grgpc_code[i]);
+       }
        nv_wr32(dev, 0x000260, r000260);
 
-       /* start both of them running */
-       nv_wr32(dev, 0x409840, 0xffffffff);
-       nv_wr32(dev, 0x41a10c, 0x00000000);
+       /* start HUB ucode running, it'll init the GPCs */
+       nv_wr32(dev, 0x409800, dev_priv->chipset);
        nv_wr32(dev, 0x40910c, 0x00000000);
-       nv_wr32(dev, 0x41a100, 0x00000002);
        nv_wr32(dev, 0x409100, 0x00000002);
-       if (!nv_wait(dev, 0x409800, 0x00000001, 0x00000001))
-               NV_INFO(dev, "0x409800 wait failed\n");
-
-       nv_wr32(dev, 0x409840, 0xffffffff);
-       nv_wr32(dev, 0x409500, 0x7fffffff);
-       nv_wr32(dev, 0x409504, 0x00000021);
-
-       nv_wr32(dev, 0x409840, 0xffffffff);
-       nv_wr32(dev, 0x409500, 0x00000000);
-       nv_wr32(dev, 0x409504, 0x00000010);
-       if (!nv_wait_ne(dev, 0x409800, 0xffffffff, 0x00000000)) {
-               NV_ERROR(dev, "fuc09 req 0x10 timeout\n");
+       if (!nv_wait(dev, 0x409800, 0x80000000, 0x80000000)) {
+               NV_ERROR(dev, "PGRAPH: HUB_INIT timed out\n");
+               nvc0_graph_ctxctl_debug(dev);
                return -EBUSY;
        }
-       priv->grctx_size = nv_rd32(dev, 0x409800);
 
-       nv_wr32(dev, 0x409840, 0xffffffff);
-       nv_wr32(dev, 0x409500, 0x00000000);
-       nv_wr32(dev, 0x409504, 0x00000016);
-       if (!nv_wait_ne(dev, 0x409800, 0xffffffff, 0x00000000)) {
-               NV_ERROR(dev, "fuc09 req 0x16 timeout\n");
-               return -EBUSY;
-       }
+       priv->size = nv_rd32(dev, 0x409804);
+done:
+       if (priv->data == NULL) {
+               int ret = nvc0_grctx_generate(dev);
+               if (ret) {
+                       NV_ERROR(dev, "PGRAPH: failed to construct context\n");
+                       return ret;
+               }
 
-       nv_wr32(dev, 0x409840, 0xffffffff);
-       nv_wr32(dev, 0x409500, 0x00000000);
-       nv_wr32(dev, 0x409504, 0x00000025);
-       if (!nv_wait_ne(dev, 0x409800, 0xffffffff, 0x00000000)) {
-               NV_ERROR(dev, "fuc09 req 0x25 timeout\n");
-               return -EBUSY;
+               return 1;
        }
 
        return 0;
@@ -610,6 +457,7 @@ nvc0_graph_init(struct drm_device *dev, int engine)
 {
        int ret;
 
+reset:
        nv_mask(dev, 0x000200, 0x18001000, 0x00000000);
        nv_mask(dev, 0x000200, 0x18001000, 0x18001000);
 
@@ -636,8 +484,11 @@ nvc0_graph_init(struct drm_device *dev, int engine)
        nv_wr32(dev, 0x400054, 0x34ce3464);
 
        ret = nvc0_graph_init_ctxctl(dev);
-       if (ret)
+       if (ret) {
+               if (ret == 1)
+                       goto reset;
                return ret;
+       }
 
        return 0;
 }
@@ -784,20 +635,18 @@ nvc0_graph_destroy(struct drm_device *dev, int engine)
 {
        struct nvc0_graph_priv *priv = nv_engine(dev, engine);
 
-       if (nouveau_ctxfw) {
-               nvc0_graph_destroy_fw(&priv->fuc409c);
-               nvc0_graph_destroy_fw(&priv->fuc409d);
-               nvc0_graph_destroy_fw(&priv->fuc41ac);
-               nvc0_graph_destroy_fw(&priv->fuc41ad);
-       }
+       nvc0_graph_destroy_fw(&priv->fuc409c);
+       nvc0_graph_destroy_fw(&priv->fuc409d);
+       nvc0_graph_destroy_fw(&priv->fuc41ac);
+       nvc0_graph_destroy_fw(&priv->fuc41ad);
 
        nouveau_irq_unregister(dev, 12);
 
        nouveau_gpuobj_ref(NULL, &priv->unk4188b8);
        nouveau_gpuobj_ref(NULL, &priv->unk4188b4);
 
-       if (priv->grctx_vals)
-               kfree(priv->grctx_vals);
+       if (priv->data)
+               kfree(priv->data);
 
        NVOBJ_ENGINE_DEL(dev, GR);
        kfree(priv);
@@ -840,6 +689,7 @@ nvc0_graph_create(struct drm_device *dev)
                        ret = 0;
                        goto error;
                }
+               priv->firmware = true;
        }
 
        ret = nouveau_gpuobj_new(dev, NULL, 0x1000, 256, 0, &priv->unk4188b4);
index ba9b80fd89584351b8099bd36948543dc41d2663..30ea3ab135c6d708e7e9f34c3153ac0084e0d5cf 100644 (file)
@@ -66,27 +66,28 @@ struct nvc0_graph_priv {
        u8 tpc_nr[GPC_MAX];
        u8 tpc_total;
 
-       u32  grctx_size;
-       u32 *grctx_vals;
        struct nouveau_gpuobj *unk4188b4;
        struct nouveau_gpuobj *unk4188b8;
 
+       struct nvc0_graph_data mmio_data[4];
+       struct nvc0_graph_mmio mmio_list[4096/8];
+       u32  size;
+       u32 *data;
+
        u8 magic_not_rop_nr;
 };
 
 struct nvc0_graph_chan {
        struct nouveau_gpuobj *grctx;
        struct nouveau_vma     grctx_vma;
-       struct nouveau_gpuobj *unk408004; /* 0x418808 too */
-       struct nouveau_vma     unk408004_vma;
-       struct nouveau_gpuobj *unk40800c; /* 0x419004 too */
-       struct nouveau_vma     unk40800c_vma;
-       struct nouveau_gpuobj *unk418810; /* 0x419848 too */
-       struct nouveau_vma     unk418810_vma;
 
        struct nouveau_gpuobj *mmio;
        struct nouveau_vma     mmio_vma;
        int mmio_nr;
+       struct {
+               struct nouveau_gpuobj *mem;
+               struct nouveau_vma vma;
+       } data[4];
 };
 
 static inline u32
@@ -124,6 +125,7 @@ nv_mthd(struct drm_device *priv, u32 class, u32 mthd, u32 data)
 }
 
 struct nvc0_grctx {
+       struct drm_device *dev;
        struct nvc0_graph_priv *priv;
        struct nvc0_graph_data *data;
        struct nvc0_graph_mmio *mmio;
@@ -133,13 +135,14 @@ struct nvc0_grctx {
        u64 addr;
 };
 
-int  nvc0_grctx_generate(struct nouveau_channel *);
-int  nvc0_grctx_init(struct nvc0_graph_priv *, struct nvc0_grctx *);
+int  nvc0_grctx_generate(struct drm_device *);
+int  nvc0_grctx_init(struct drm_device *, struct nvc0_graph_priv *,
+                    struct nvc0_grctx *);
 void nvc0_grctx_data(struct nvc0_grctx *, u32, u32, u32);
 void nvc0_grctx_mmio(struct nvc0_grctx *, u32, u32, u32, u32);
 int  nvc0_grctx_fini(struct nvc0_grctx *);
 
-int  nve0_grctx_generate(struct nouveau_channel *);
+int  nve0_grctx_generate(struct drm_device *);
 
 #define mmio_data(s,a,p) nvc0_grctx_data(&info, (s), (a), (p))
 #define mmio_list(r,d,s,b) nvc0_grctx_mmio(&info, (r), (d), (s), (b))
@@ -154,4 +157,9 @@ int  nvc0_graph_context_ctor(struct nouveau_object *, struct nouveau_object *,
                             struct nouveau_object **);
 void nvc0_graph_context_dtor(struct nouveau_object *);
 
+void nvc0_graph_ctxctl_debug(struct drm_device *);
+
+int  nvc0_graph_context_new(struct nouveau_channel *, int);
+void nvc0_graph_context_del(struct nouveau_channel *, int);
+
 #endif
index 2bd55d8d10a73b73e535530852e4e1704eb6c853..a3a4ee7c0b2ed6b6396211affd98d793c77bc69f 100644 (file)
@@ -57,243 +57,6 @@ nve0_graph_ctxctl_debug(struct drm_device *dev)
                nve0_graph_ctxctl_debug_unit(dev, 0x502000 + (gpc * 0x8000));
 }
 
-static int
-nve0_graph_load_context(struct nouveau_channel *chan)
-{
-       struct drm_device *dev = chan->dev;
-
-       nv_wr32(dev, 0x409840, 0x00000030);
-       nv_wr32(dev, 0x409500, 0x80000000 | chan->ramin->addr >> 12);
-       nv_wr32(dev, 0x409504, 0x00000003);
-       if (!nv_wait(dev, 0x409800, 0x00000010, 0x00000010))
-               NV_ERROR(dev, "PGRAPH: load_ctx timeout\n");
-
-       return 0;
-}
-
-static int
-nve0_graph_unload_context_to(struct drm_device *dev, u64 chan)
-{
-       nv_wr32(dev, 0x409840, 0x00000003);
-       nv_wr32(dev, 0x409500, 0x80000000 | chan >> 12);
-       nv_wr32(dev, 0x409504, 0x00000009);
-       if (!nv_wait(dev, 0x409800, 0x00000001, 0x00000000)) {
-               NV_ERROR(dev, "PGRAPH: unload_ctx timeout\n");
-               return -EBUSY;
-       }
-
-       return 0;
-}
-
-static int
-nve0_graph_construct_context(struct nouveau_channel *chan)
-{
-       struct nvc0_graph_priv *priv = nv_engine(chan->dev, NVOBJ_ENGINE_GR);
-       struct nvc0_graph_chan *grch = chan->engctx[NVOBJ_ENGINE_GR];
-       struct drm_device *dev = chan->dev;
-       int ret, i;
-       u32 *ctx;
-
-       ctx = kmalloc(priv->grctx_size, GFP_KERNEL);
-       if (!ctx)
-               return -ENOMEM;
-
-       nve0_graph_load_context(chan);
-
-       nv_wo32(grch->grctx, 0x1c, 1);
-       nv_wo32(grch->grctx, 0x20, 0);
-       nv_wo32(grch->grctx, 0x28, 0);
-       nv_wo32(grch->grctx, 0x2c, 0);
-       nvimem_flush(dev);
-
-       ret = nve0_grctx_generate(chan);
-       if (ret)
-               goto err;
-
-       ret = nve0_graph_unload_context_to(dev, chan->ramin->addr);
-       if (ret)
-               goto err;
-
-       for (i = 0; i < priv->grctx_size; i += 4)
-               ctx[i / 4] = nv_ro32(grch->grctx, i);
-
-       priv->grctx_vals = ctx;
-       return 0;
-
-err:
-       kfree(ctx);
-       return ret;
-}
-
-static int
-nve0_graph_create_context_mmio_list(struct nouveau_channel *chan)
-{
-       struct nvc0_graph_priv *priv = nv_engine(chan->dev, NVOBJ_ENGINE_GR);
-       struct nvc0_graph_chan *grch = chan->engctx[NVOBJ_ENGINE_GR];
-       struct drm_device *dev = chan->dev;
-       u32 magic[GPC_MAX][2];
-       u16 offset = 0x0000;
-       int gpc;
-       int ret;
-
-       ret = nouveau_gpuobj_new(dev, NULL, 0x3000, 256, 0, &grch->unk408004);
-       if (ret)
-               return ret;
-
-       ret = nouveau_gpuobj_map_vm(grch->unk408004, chan->vm,
-                                   NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS,
-                                   &grch->unk408004_vma);
-       if (ret)
-               return ret;
-
-       ret = nouveau_gpuobj_new(dev, NULL, 0x8000, 256, 0, &grch->unk40800c);
-       if (ret)
-               return ret;
-
-       ret = nouveau_gpuobj_map_vm(grch->unk40800c, chan->vm,
-                                   NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS,
-                                   &grch->unk40800c_vma);
-       if (ret)
-               return ret;
-
-       ret = nouveau_gpuobj_new(dev, NULL, 384 * 1024, 4096, 0,
-                                &grch->unk418810);
-       if (ret)
-               return ret;
-
-       ret = nouveau_gpuobj_map_vm(grch->unk418810, chan->vm,
-                                   NV_MEM_ACCESS_RW, &grch->unk418810_vma);
-       if (ret)
-               return ret;
-
-       ret = nouveau_gpuobj_new(dev, NULL, 0x1000, 0, 0, &grch->mmio);
-       if (ret)
-               return ret;
-
-       ret = nouveau_gpuobj_map_vm(grch->mmio, chan->vm,
-                                   NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS,
-                                   &grch->mmio_vma);
-       if (ret)
-               return ret;
-
-#define mmio(r,v) do {                                                         \
-       nv_wo32(grch->mmio, (grch->mmio_nr * 8) + 0, (r));                     \
-       nv_wo32(grch->mmio, (grch->mmio_nr * 8) + 4, (v));                     \
-       grch->mmio_nr++;                                                       \
-} while (0)
-       mmio(0x40800c, grch->unk40800c_vma.offset >> 8);
-       mmio(0x408010, 0x80000000);
-       mmio(0x419004, grch->unk40800c_vma.offset >> 8);
-       mmio(0x419008, 0x00000000);
-       mmio(0x4064cc, 0x80000000);
-       mmio(0x408004, grch->unk408004_vma.offset >> 8);
-       mmio(0x408008, 0x80000030);
-       mmio(0x418808, grch->unk408004_vma.offset >> 8);
-       mmio(0x41880c, 0x80000030);
-       mmio(0x4064c8, 0x01800600);
-       mmio(0x418810, 0x80000000 | grch->unk418810_vma.offset >> 12);
-       mmio(0x419848, 0x10000000 | grch->unk418810_vma.offset >> 12);
-       mmio(0x405830, 0x02180648);
-       mmio(0x4064c4, 0x0192ffff);
-
-       for (gpc = 0; gpc < priv->gpc_nr; gpc++) {
-               u16 magic0 = 0x0218 * priv->tpc_nr[gpc];
-               u16 magic1 = 0x0648 * priv->tpc_nr[gpc];
-               magic[gpc][0]  = 0x10000000 | (magic0 << 16) | offset;
-               magic[gpc][1]  = 0x00000000 | (magic1 << 16);
-               offset += 0x0324 * priv->tpc_nr[gpc];
-       }
-
-       for (gpc = 0; gpc < priv->gpc_nr; gpc++) {
-               mmio(GPC_UNIT(gpc, 0x30c0), magic[gpc][0]);
-               mmio(GPC_UNIT(gpc, 0x30e4), magic[gpc][1] | offset);
-               offset += 0x07ff * priv->tpc_nr[gpc];
-       }
-
-       mmio(0x17e91c, 0x06060609);
-       mmio(0x17e920, 0x00090a05);
-#undef mmio
-       return 0;
-}
-
-static int
-nve0_graph_context_new(struct nouveau_channel *chan, int engine)
-{
-       struct drm_device *dev = chan->dev;
-       struct nvc0_graph_priv *priv = nv_engine(dev, engine);
-       struct nvc0_graph_chan *grch;
-       struct nouveau_gpuobj *grctx;
-       int ret, i;
-
-       grch = kzalloc(sizeof(*grch), GFP_KERNEL);
-       if (!grch)
-               return -ENOMEM;
-       chan->engctx[NVOBJ_ENGINE_GR] = grch;
-
-       ret = nouveau_gpuobj_new(dev, NULL, priv->grctx_size, 256, 0,
-                                &grch->grctx);
-       if (ret)
-               goto error;
-
-       ret = nouveau_gpuobj_map_vm(grch->grctx, chan->vm, NV_MEM_ACCESS_RW |
-                                   NV_MEM_ACCESS_SYS, &grch->grctx_vma);
-       if (ret)
-               return ret;
-
-       grctx = grch->grctx;
-
-       ret = nve0_graph_create_context_mmio_list(chan);
-       if (ret)
-               goto error;
-
-       nv_wo32(chan->ramin, 0x0210, lower_32_bits(grch->grctx_vma.offset) | 4);
-       nv_wo32(chan->ramin, 0x0214, upper_32_bits(grch->grctx_vma.offset));
-       nvimem_flush(dev);
-
-       if (!priv->grctx_vals) {
-               ret = nve0_graph_construct_context(chan);
-               if (ret)
-                       goto error;
-       }
-
-       for (i = 0; i < priv->grctx_size; i += 4)
-               nv_wo32(grctx, i, priv->grctx_vals[i / 4]);
-       nv_wo32(grctx, 0xf4, 0);
-       nv_wo32(grctx, 0xf8, 0);
-       nv_wo32(grctx, 0x10, grch->mmio_nr);
-       nv_wo32(grctx, 0x14, lower_32_bits(grch->mmio_vma.offset));
-       nv_wo32(grctx, 0x18, upper_32_bits(grch->mmio_vma.offset));
-       nv_wo32(grctx, 0x1c, 1);
-       nv_wo32(grctx, 0x20, 0);
-       nv_wo32(grctx, 0x28, 0);
-       nv_wo32(grctx, 0x2c, 0);
-
-       nvimem_flush(dev);
-       return 0;
-
-error:
-       priv->base.context_del(chan, engine);
-       return ret;
-}
-
-static void
-nve0_graph_context_del(struct nouveau_channel *chan, int engine)
-{
-       struct nvc0_graph_chan *grch = chan->engctx[engine];
-
-       nouveau_gpuobj_unmap(&grch->mmio_vma);
-       nouveau_gpuobj_unmap(&grch->unk418810_vma);
-       nouveau_gpuobj_unmap(&grch->unk40800c_vma);
-       nouveau_gpuobj_unmap(&grch->unk408004_vma);
-       nouveau_gpuobj_unmap(&grch->grctx_vma);
-       nouveau_gpuobj_ref(NULL, &grch->mmio);
-       nouveau_gpuobj_ref(NULL, &grch->unk418810);
-       nouveau_gpuobj_ref(NULL, &grch->unk40800c);
-       nouveau_gpuobj_ref(NULL, &grch->unk408004);
-       nouveau_gpuobj_ref(NULL, &grch->grctx);
-       chan->engctx[engine] = NULL;
-}
-
 static int
 nve0_graph_object_new(struct nouveau_channel *chan, int engine,
                      u32 handle, u16 class)
@@ -487,7 +250,7 @@ nve0_graph_init_ctxctl(struct drm_device *dev)
                NV_ERROR(dev, "fuc09 req 0x10 timeout\n");
                return -EBUSY;
        }
-       priv->grctx_size = nv_rd32(dev, 0x409800);
+       priv->size = nv_rd32(dev, 0x409800);
 
        nv_wr32(dev, 0x409840, 0xffffffff);
        nv_wr32(dev, 0x409500, 0x00000000);
@@ -534,6 +297,17 @@ nve0_graph_init_ctxctl(struct drm_device *dev)
        nv_wr32(dev, 0x409614, 0x00000070);
        nv_wr32(dev, 0x409614, 0x00000770);
        nv_wr32(dev, 0x40802c, 0x00000001);
+
+       if (priv->data == NULL) {
+               int ret = nve0_grctx_generate(dev);
+               if (ret) {
+                       NV_ERROR(dev, "PGRAPH: failed to construct context\n");
+                       return ret;
+               }
+
+               return 1;
+       }
+
        return 0;
 }
 
@@ -542,6 +316,7 @@ nve0_graph_init(struct drm_device *dev, int engine)
 {
        int ret;
 
+reset:
        nv_mask(dev, 0x000200, 0x18001000, 0x00000000);
        nv_mask(dev, 0x000200, 0x18001000, 0x18001000);
 
@@ -566,8 +341,11 @@ nve0_graph_init(struct drm_device *dev, int engine)
        nv_wr32(dev, 0x400054, 0x34ce3464);
 
        ret = nve0_graph_init_ctxctl(dev);
-       if (ret)
+       if (ret) {
+               if (ret == 1)
+                       goto reset;
                return ret;
+       }
 
        return 0;
 }
@@ -758,8 +536,8 @@ nve0_graph_destroy(struct drm_device *dev, int engine)
        nouveau_gpuobj_ref(NULL, &priv->unk4188b8);
        nouveau_gpuobj_ref(NULL, &priv->unk4188b4);
 
-       if (priv->grctx_vals)
-               kfree(priv->grctx_vals);
+       if (priv->data)
+               kfree(priv->data);
 
        NVOBJ_ENGINE_DEL(dev, GR);
        kfree(priv);
@@ -786,8 +564,8 @@ nve0_graph_create(struct drm_device *dev)
        priv->base.destroy = nve0_graph_destroy;
        priv->base.init = nve0_graph_init;
        priv->base.fini = nve0_graph_fini;
-       priv->base.context_new = nve0_graph_context_new;
-       priv->base.context_del = nve0_graph_context_del;
+       priv->base.context_new = nvc0_graph_context_new;
+       priv->base.context_del = nvc0_graph_context_del;
        priv->base.object_new = nve0_graph_object_new;
 
        NVOBJ_ENGINE_ADD(dev, GR, &priv->base);
@@ -801,6 +579,7 @@ nve0_graph_create(struct drm_device *dev)
                ret = 0;
                goto error;
        }
+       priv->firmware = true;
 
        ret = nouveau_gpuobj_new(dev, NULL, 0x1000, 256, 0, &priv->unk4188b4);
        if (ret)