From ac1499d9573f4aadd1d2beac11fe23af8ce90c24 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sat, 4 Aug 2012 18:40:45 +1000 Subject: [PATCH] drm/nvc0-/gr: generate grctx template at init time, not first context ctor Signed-off-by: Ben Skeggs --- .../drm/nouveau/core/engine/graph/ctxnvc0.c | 269 +++++++++-- .../drm/nouveau/core/engine/graph/ctxnve0.c | 50 +- .../gpu/drm/nouveau/core/engine/graph/nvc0.c | 450 ++++++------------ .../gpu/drm/nouveau/core/engine/graph/nvc0.h | 30 +- .../gpu/drm/nouveau/core/engine/graph/nve0.c | 265 +---------- 5 files changed, 460 insertions(+), 604 deletions(-) diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/ctxnvc0.c b/drivers/gpu/drm/nouveau/core/engine/graph/ctxnvc0.c index 57353015329f..b19a406e55d9 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/ctxnvc0.c +++ b/drivers/gpu/drm/nouveau/core/engine/graph/ctxnvc0.c @@ -35,6 +35,156 @@ nv_icmd(struct drm_device *priv, u32 icmd, u32 data) while (nv_rd32(priv, 0x400700) & 2) {} } +int +nvc0_grctx_init(struct drm_device *priv, struct nvc0_graph_priv *oprv, + struct nvc0_grctx *info) +{ + struct nouveau_gpuobj *chan; + u32 size = (0x80000 + oprv->size + 4095) & ~4095; + int ret, i; + + /* allocate memory to for a "channel", which we'll use to generate + * the default context values + */ + ret = nouveau_gpuobj_new(priv, NULL, size, 0x1000, + NVOBJ_FLAG_ZERO_ALLOC, &info->chan); + chan = info->chan; + if (ret) { + NV_ERROR(priv, "failed to allocate channel memory, %d\n", ret); + return ret; + } + + /* PGD pointer */ + nv_wo32(chan, 0x0200, lower_32_bits(chan->addr + 0x1000)); + nv_wo32(chan, 0x0204, upper_32_bits(chan->addr + 0x1000)); + nv_wo32(chan, 0x0208, 0xffffffff); + nv_wo32(chan, 0x020c, 0x000000ff); + + /* PGT[0] pointer */ + nv_wo32(chan, 0x1000, 0x00000000); + nv_wo32(chan, 0x1004, 0x00000001 | (chan->addr + 0x2000) >> 8); + + /* identity-map the whole "channel" into its own vm */ + for (i = 0; i < size / 4096; i++) { + u64 addr = ((chan->addr + (i * 4096)) >> 8) | 1; + nv_wo32(chan, 0x2000 + (i * 8), lower_32_bits(addr)); + nv_wo32(chan, 0x2004 + (i * 8), upper_32_bits(addr)); + } + + /* context pointer (virt) */ + nv_wo32(chan, 0x0210, 0x00080004); + nv_wo32(chan, 0x0214, 0x00000000); + + nvimem_flush(priv); + + nv_wr32(priv, 0x100cb8, (chan->addr + 0x1000) >> 8); + nv_wr32(priv, 0x100cbc, 0x80000001); + nv_wait(priv, 0x100c80, 0x00008000, 0x00008000); + + /* setup default state for mmio list construction */ + info->dev = priv; + info->data = oprv->mmio_data; + info->mmio = oprv->mmio_list; + info->addr = 0x2000 + (i * 8); + info->priv = oprv; + info->buffer_nr = 0; + + if (oprv->firmware) { + nv_wr32(priv, 0x409840, 0x00000030); + nv_wr32(priv, 0x409500, 0x80000000 | chan->addr >> 12); + nv_wr32(priv, 0x409504, 0x00000003); + if (!nv_wait(priv, 0x409800, 0x00000010, 0x00000010)) + NV_ERROR(priv, "load_ctx timeout\n"); + + nv_wo32(chan, 0x8001c, 1); + nv_wo32(chan, 0x80020, 0); + nv_wo32(chan, 0x80028, 0); + nv_wo32(chan, 0x8002c, 0); + nvimem_flush(priv); + return 0; + } + + /* HUB_FUC(SET_CHAN) */ + nv_wr32(priv, 0x409840, 0x80000000); + nv_wr32(priv, 0x409500, 0x80000000 | chan->addr >> 12); + nv_wr32(priv, 0x409504, 0x00000001); + if (!nv_wait(priv, 0x409800, 0x80000000, 0x80000000)) { + NV_ERROR(priv, "HUB_SET_CHAN timeout\n"); + nvc0_graph_ctxctl_debug(priv); + nouveau_gpuobj_ref(NULL, &info->chan); + return -EBUSY; + } + + return 0; +} + +void +nvc0_grctx_data(struct nvc0_grctx *info, u32 size, u32 align, u32 access) +{ + info->buffer[info->buffer_nr] = info->addr; + info->buffer[info->buffer_nr] += (align - 1); + info->buffer[info->buffer_nr] &= ~(align - 1); + info->addr = info->buffer[info->buffer_nr++] + size; + + info->data->size = size; + info->data->align = align; + info->data->access = access; + info->data++; +} + +void +nvc0_grctx_mmio(struct nvc0_grctx *info, u32 addr, u32 data, u32 shift, u32 buf) +{ + info->mmio->addr = addr; + info->mmio->data = data; + info->mmio->shift = shift; + info->mmio->buffer = buf; + info->mmio++; + + if (shift) + data |= info->buffer[buf] >> shift; + nv_wr32(info->dev, addr, data); +} + +int +nvc0_grctx_fini(struct nvc0_grctx *info) +{ + struct nvc0_graph_priv *priv = info->priv; + int i; + + if (priv->firmware) { + nv_wr32(info->dev, 0x409840, 0x00000003); + nv_wr32(info->dev, 0x409500, 0x80000000 | info->chan->addr >> 12); + nv_wr32(info->dev, 0x409504, 0x00000009); + if (!nv_wait(info->dev, 0x409800, 0x00000001, 0x00000000)) { + NV_ERROR(info->dev, "unload_ctx timeout\n"); + return -EBUSY; + } + + goto save; + } + + /* HUB_FUC(CTX_SAVE) */ + nv_wr32(info->dev, 0x409840, 0x80000000); + nv_wr32(info->dev, 0x409500, 0x80000000 | info->chan->addr >> 12); + nv_wr32(info->dev, 0x409504, 0x00000002); + if (!nv_wait(info->dev, 0x409800, 0x80000000, 0x80000000)) { + NV_ERROR(info->dev, "HUB_CTX_SAVE timeout\n"); + nvc0_graph_ctxctl_debug(info->dev); + return -EBUSY; + } + +save: + priv->data = kmalloc(priv->size, GFP_KERNEL); + if (priv->data) { + for (i = 0; i < priv->size; i += 4) + priv->data[i / 4] = nv_ro32(info->chan, 0x80000 + i); + } + + nouveau_gpuobj_ref(NULL, &info->chan); + return priv->data ? 0 : -ENOMEM; +} + static void nvc0_grctx_generate_9097(struct drm_device *priv) { @@ -1779,16 +1929,19 @@ nvc0_grctx_generate_tp(struct drm_device *priv) } int -nvc0_grctx_generate(struct nouveau_channel *chan) +nvc0_grctx_generate(struct drm_device *priv) { - struct drm_nouveau_private *dev_priv = chan->dev->dev_private; - struct nvc0_graph_priv *oprv = nv_engine(chan->dev, NVOBJ_ENGINE_GR); - struct nvc0_graph_chan *grch = chan->engctx[NVOBJ_ENGINE_GR]; - struct drm_device *priv = chan->dev; - int i, gpc, tp, id; + struct drm_nouveau_private *dev_priv = priv->dev_private; + struct nvc0_graph_priv *oprv = nv_engine(priv, NVOBJ_ENGINE_GR); + struct nvc0_grctx info; + int ret, i, gpc, tpc, id; u32 fermi = nvc0_graph_class(priv); u32 r000260, tmp; + ret = nvc0_grctx_init(priv, oprv, &info); + if (ret) + return ret; + r000260 = nv_rd32(priv, 0x000260); nv_wr32(priv, 0x000260, r000260 & ~1); nv_wr32(priv, 0x400208, 0x00000000); @@ -1808,19 +1961,55 @@ nvc0_grctx_generate(struct nouveau_channel *chan) nv_wr32(priv, 0x404154, 0x00000000); - /* fuc "mmio list" writes */ - for (i = 0; i < grch->mmio_nr * 8; i += 8) { - u32 reg = nv_ro32(grch->mmio, i + 0); - nv_wr32(priv, reg, nv_ro32(grch->mmio, i + 4)); + /* generate per-context mmio list data */ + mmio_data(0x002000, 0x0100, NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS); + mmio_data(0x008000, 0x0100, NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS); + mmio_data(0x060000, 0x1000, NV_MEM_ACCESS_RW); + mmio_list(0x408004, 0x00000000, 8, 0); + mmio_list(0x408008, 0x80000018, 0, 0); + mmio_list(0x40800c, 0x00000000, 8, 1); + mmio_list(0x408010, 0x80000000, 0, 0); + mmio_list(0x418810, 0x80000000, 12, 2); + mmio_list(0x419848, 0x10000000, 12, 2); + mmio_list(0x419004, 0x00000000, 8, 1); + mmio_list(0x419008, 0x00000000, 0, 0); + mmio_list(0x418808, 0x00000000, 8, 0); + mmio_list(0x41880c, 0x80000018, 0, 0); + if (dev_priv->chipset != 0xc1) { + tmp = 0x02180000; + mmio_list(0x405830, tmp, 0, 0); + for (gpc = 0; gpc < oprv->gpc_nr; gpc++) { + for (tpc = 0; tpc < oprv->tpc_nr[gpc]; tpc++) { + u32 reg = TPC_UNIT(gpc, tpc, 0x0520); + mmio_list(reg, tmp, 0, 0); + tmp += 0x0324; + } + } + } else { + tmp = 0x02180000; + mmio_list(0x405830, 0x00000218 | tmp, 0, 0); + mmio_list(0x4064c4, 0x0086ffff, 0, 0); + for (gpc = 0; gpc < oprv->gpc_nr; gpc++) { + for (tpc = 0; tpc < oprv->tpc_nr[gpc]; tpc++) { + u32 reg = TPC_UNIT(gpc, tpc, 0x0520); + mmio_list(reg, 0x10000000 | tmp, 0, 0); + tmp += 0x0324; + } + for (tpc = 0; tpc < oprv->tpc_nr[gpc]; tpc++) { + u32 reg = TPC_UNIT(gpc, tpc, 0x0544); + mmio_list(reg, tmp, 0, 0); + tmp += 0x0324; + } + } } - for (tp = 0, id = 0; tp < 4; tp++) { + for (tpc = 0, id = 0; tpc < 4; tpc++) { for (gpc = 0; gpc < oprv->gpc_nr; gpc++) { - if (tp < oprv->tpc_nr[gpc]) { - nv_wr32(priv, TPC_UNIT(gpc, tp, 0x698), id); - nv_wr32(priv, TPC_UNIT(gpc, tp, 0x4e8), id); - nv_wr32(priv, GPC_UNIT(gpc, 0x0c10 + tp * 4), id); - nv_wr32(priv, TPC_UNIT(gpc, tp, 0x088), id); + if (tpc < oprv->tpc_nr[gpc]) { + nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x698), id); + nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x4e8), id); + nv_wr32(priv, GPC_UNIT(gpc, 0x0c10 + tpc * 4), id); + nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x088), id); id++; } @@ -1843,18 +2032,18 @@ nvc0_grctx_generate(struct nouveau_channel *chan) nv_wr32(priv, 0x40587c, 0x00000000); if (1) { - u8 tpnr[GPC_MAX], data[TPC_MAX]; + u8 tpcnr[GPC_MAX], data[TPC_MAX]; - memcpy(tpnr, oprv->tpc_nr, sizeof(oprv->tpc_nr)); + memcpy(tpcnr, oprv->tpc_nr, sizeof(oprv->tpc_nr)); memset(data, 0x1f, sizeof(data)); gpc = -1; - for (tp = 0; tp < oprv->tpc_total; tp++) { + for (tpc = 0; tpc < oprv->tpc_total; tpc++) { do { gpc = (gpc + 1) % oprv->gpc_nr; - } while (!tpnr[gpc]); - tpnr[gpc]--; - data[tp] = gpc; + } while (!tpcnr[gpc]); + tpcnr[gpc]--; + data[tpc] = gpc; } for (i = 0; i < 4; i++) @@ -1863,24 +2052,24 @@ nvc0_grctx_generate(struct nouveau_channel *chan) if (1) { u32 data[6] = {}, data2[2] = {}; - u8 tpnr[GPC_MAX]; + u8 tpcnr[GPC_MAX]; u8 shift, ntpcv; /* calculate first set of magics */ - memcpy(tpnr, oprv->tpc_nr, sizeof(oprv->tpc_nr)); + memcpy(tpcnr, oprv->tpc_nr, sizeof(oprv->tpc_nr)); gpc = -1; - for (tp = 0; tp < oprv->tpc_total; tp++) { + for (tpc = 0; tpc < oprv->tpc_total; tpc++) { do { gpc = (gpc + 1) % oprv->gpc_nr; - } while (!tpnr[gpc]); - tpnr[gpc]--; + } while (!tpcnr[gpc]); + tpcnr[gpc]--; - data[tp / 6] |= gpc << ((tp % 6) * 5); + data[tpc / 6] |= gpc << ((tpc % 6) * 5); } - for (; tp < 32; tp++) - data[tp / 6] |= 7 << ((tp % 6) * 5); + for (; tpc < 32; tpc++) + data[tpc / 6] |= 7 << ((tpc % 6) * 5); /* and the second... */ shift = 0; @@ -1918,12 +2107,12 @@ nvc0_grctx_generate(struct nouveau_channel *chan) } if (1) { - u32 tp_mask = 0, tp_set = 0; - u8 tpnr[GPC_MAX], a, b; + u32 tpc_mask = 0, tpc_set = 0; + u8 tpcnr[GPC_MAX], a, b; - memcpy(tpnr, oprv->tpc_nr, sizeof(oprv->tpc_nr)); + memcpy(tpcnr, oprv->tpc_nr, sizeof(oprv->tpc_nr)); for (gpc = 0; gpc < oprv->gpc_nr; gpc++) - tp_mask |= ((1 << oprv->tpc_nr[gpc]) - 1) << (gpc * 8); + tpc_mask |= ((1 << oprv->tpc_nr[gpc]) - 1) << (gpc * 8); for (i = 0, gpc = -1, b = -1; i < 32; i++) { a = (i * (oprv->tpc_total - 1)) / 32; @@ -1931,14 +2120,14 @@ nvc0_grctx_generate(struct nouveau_channel *chan) b = a; do { gpc = (gpc + 1) % oprv->gpc_nr; - } while (!tpnr[gpc]); - tp = oprv->tpc_nr[gpc] - tpnr[gpc]--; + } while (!tpcnr[gpc]); + tpc = oprv->tpc_nr[gpc] - tpcnr[gpc]--; - tp_set |= 1 << ((gpc * 8) + tp); + tpc_set |= 1 << ((gpc * 8) + tpc); } - nv_wr32(priv, 0x406800 + (i * 0x20), tp_set); - nv_wr32(priv, 0x406c00 + (i * 0x20), tp_set ^ tp_mask); + nv_wr32(priv, 0x406800 + (i * 0x20), tpc_set); + nv_wr32(priv, 0x406c00 + (i * 0x20), tpc_set ^ tpc_mask); } } @@ -2867,5 +3056,5 @@ nvc0_grctx_generate(struct nouveau_channel *chan) nvc0_grctx_generate_90c0(priv); nv_wr32(priv, 0x000260, r000260); - return 0; + return nvc0_grctx_fini(&info); } diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/ctxnve0.c b/drivers/gpu/drm/nouveau/core/engine/graph/ctxnve0.c index 47bea8a7b2bc..e5503170d68c 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/ctxnve0.c +++ b/drivers/gpu/drm/nouveau/core/engine/graph/ctxnve0.c @@ -2604,16 +2604,20 @@ nve0_graph_generate_tpcunk(struct drm_device *priv) } int -nve0_grctx_generate(struct nouveau_channel *chan) +nve0_grctx_generate(struct drm_device *priv) { - struct nvc0_graph_priv *oprv = nv_engine(chan->dev, NVOBJ_ENGINE_GR); - struct nvc0_graph_chan *grch = chan->engctx[NVOBJ_ENGINE_GR]; - struct drm_device *priv = chan->dev; + struct nvc0_graph_priv *oprv = nv_engine(priv, NVOBJ_ENGINE_GR); + struct nvc0_grctx info; + int ret, i, gpc, tpc, id; u32 data[6] = {}, data2[2] = {}, tmp; u32 tpc_set = 0, tpc_mask = 0; + u32 magic[GPC_MAX][2], offset; u8 tpcnr[GPC_MAX], a, b; u8 shift, ntpcv; - int i, gpc, tpc, id; + + ret = nvc0_grctx_init(priv, oprv, &info); + if (ret) + return ret; nv_mask(priv, 0x000260, 0x00000001, 0x00000000); nv_wr32(priv, 0x400204, 0x00000000); @@ -2636,11 +2640,37 @@ nve0_grctx_generate(struct nouveau_channel *chan) nv_wr32(priv, 0x404154, 0x0); - for (i = 0; i < grch->mmio_nr * 8; i += 8) { - u32 reg = nv_ro32(grch->mmio, i + 0); - u32 val = nv_ro32(grch->mmio, i + 4); - nv_wr32(priv, reg, val); + mmio_data(0x003000, 0x0100, NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS); + mmio_data(0x008000, 0x0100, NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS); + mmio_data(0x060000, 0x1000, NV_MEM_ACCESS_RW); + mmio_list(0x40800c, 0x00000000, 8, 1); + mmio_list(0x408010, 0x80000000, 0, 0); + mmio_list(0x419004, 0x00000000, 8, 1); + mmio_list(0x419008, 0x00000000, 0, 0); + mmio_list(0x4064cc, 0x80000000, 0, 0); + mmio_list(0x408004, 0x00000000, 8, 0); + mmio_list(0x408008, 0x80000030, 0, 0); + mmio_list(0x418808, 0x00000000, 8, 0); + mmio_list(0x41880c, 0x80000030, 0, 0); + mmio_list(0x4064c8, 0x01800600, 0, 0); + mmio_list(0x418810, 0x80000000, 12, 2); + mmio_list(0x419848, 0x10000000, 12, 2); + mmio_list(0x405830, 0x02180648, 0, 0); + mmio_list(0x4064c4, 0x0192ffff, 0, 0); + for (gpc = 0, offset = 0; gpc < oprv->gpc_nr; gpc++) { + u16 magic0 = 0x0218 * oprv->tpc_nr[gpc]; + u16 magic1 = 0x0648 * oprv->tpc_nr[gpc]; + magic[gpc][0] = 0x10000000 | (magic0 << 16) | offset; + magic[gpc][1] = 0x00000000 | (magic1 << 16); + offset += 0x0324 * oprv->tpc_nr[gpc]; + } + for (gpc = 0; gpc < oprv->gpc_nr; gpc++) { + mmio_list(GPC_UNIT(gpc, 0x30c0), magic[gpc][0], 0, 0); + mmio_list(GPC_UNIT(gpc, 0x30e4), magic[gpc][1] | offset, 0, 0); + offset += 0x07ff * oprv->tpc_nr[gpc]; } + mmio_list(0x17e91c, 0x06060609, 0, 0); + mmio_list(0x17e920, 0x00090a05, 0, 0); nv_wr32(priv, 0x418c6c, 0x1); nv_wr32(priv, 0x41980c, 0x10); @@ -2758,5 +2788,5 @@ nve0_grctx_generate(struct nouveau_channel *chan) nv_mask(priv, 0x000260, 0x00000001, 0x00000001); nv_wr32(priv, 0x418800, 0x7026860a); //XXX nv_wr32(priv, 0x41be10, 0x00bb8bc7); //XXX - return 0; + return nvc0_grctx_fini(&info); } diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c b/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c index 158d57619e5b..f994d2f7e8d5 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c +++ b/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c @@ -48,7 +48,7 @@ nvc0_graph_ctxctl_debug_unit(struct drm_device *dev, u32 base) nv_rd32(dev, base + 0x818), nv_rd32(dev, base + 0x81c)); } -static void +void nvc0_graph_ctxctl_debug(struct drm_device *dev) { u32 gpcnr = nv_rd32(dev, 0x409604) & 0xffff; @@ -59,259 +59,92 @@ nvc0_graph_ctxctl_debug(struct drm_device *dev) nvc0_graph_ctxctl_debug_unit(dev, 0x502000 + (gpc * 0x8000)); } -static int -nvc0_graph_load_context(struct nouveau_channel *chan) -{ - struct drm_device *dev = chan->dev; - - nv_wr32(dev, 0x409840, 0x00000030); - nv_wr32(dev, 0x409500, 0x80000000 | chan->ramin->addr >> 12); - nv_wr32(dev, 0x409504, 0x00000003); - if (!nv_wait(dev, 0x409800, 0x00000010, 0x00000010)) - NV_ERROR(dev, "PGRAPH: load_ctx timeout\n"); - - return 0; -} - -static int -nvc0_graph_unload_context_to(struct drm_device *dev, u64 chan) -{ - nv_wr32(dev, 0x409840, 0x00000003); - nv_wr32(dev, 0x409500, 0x80000000 | chan >> 12); - nv_wr32(dev, 0x409504, 0x00000009); - if (!nv_wait(dev, 0x409800, 0x00000001, 0x00000000)) { - NV_ERROR(dev, "PGRAPH: unload_ctx timeout\n"); - return -EBUSY; - } - - return 0; -} - -static int -nvc0_graph_construct_context(struct nouveau_channel *chan) +int +nvc0_graph_context_new(struct nouveau_channel *chan, int engine) { - struct nvc0_graph_priv *priv = nv_engine(chan->dev, NVOBJ_ENGINE_GR); - struct nvc0_graph_chan *grch = chan->engctx[NVOBJ_ENGINE_GR]; struct drm_device *dev = chan->dev; + struct nvc0_graph_priv *priv = nv_engine(dev, engine); + struct nvc0_graph_data *data = priv->mmio_data; + struct nvc0_graph_mmio *mmio = priv->mmio_list; + struct nvc0_graph_chan *grch; + struct nouveau_gpuobj *grctx; int ret, i; - u32 *ctx; - ctx = kmalloc(priv->grctx_size, GFP_KERNEL); - if (!ctx) + grch = kzalloc(sizeof(*grch), GFP_KERNEL); + if (!grch) return -ENOMEM; + chan->engctx[NVOBJ_ENGINE_GR] = grch; - if (!nouveau_ctxfw) { - nv_wr32(dev, 0x409840, 0x80000000); - nv_wr32(dev, 0x409500, 0x80000000 | chan->ramin->addr >> 12); - nv_wr32(dev, 0x409504, 0x00000001); - if (!nv_wait(dev, 0x409800, 0x80000000, 0x80000000)) { - NV_ERROR(dev, "PGRAPH: HUB_SET_CHAN timeout\n"); - nvc0_graph_ctxctl_debug(dev); - ret = -EBUSY; - goto err; - } - } else { - nvc0_graph_load_context(chan); - - nv_wo32(grch->grctx, 0x1c, 1); - nv_wo32(grch->grctx, 0x20, 0); - nv_wo32(grch->grctx, 0x28, 0); - nv_wo32(grch->grctx, 0x2c, 0); - nvimem_flush(dev); - } - - ret = nvc0_grctx_generate(chan); + ret = nouveau_gpuobj_new(dev, NULL, priv->size, 256, 0, &grch->grctx); if (ret) - goto err; - - if (!nouveau_ctxfw) { - nv_wr32(dev, 0x409840, 0x80000000); - nv_wr32(dev, 0x409500, 0x80000000 | chan->ramin->addr >> 12); - nv_wr32(dev, 0x409504, 0x00000002); - if (!nv_wait(dev, 0x409800, 0x80000000, 0x80000000)) { - NV_ERROR(dev, "PGRAPH: HUB_CTX_SAVE timeout\n"); - nvc0_graph_ctxctl_debug(dev); - ret = -EBUSY; - goto err; - } - } else { - ret = nvc0_graph_unload_context_to(dev, chan->ramin->addr); - if (ret) - goto err; - } - - for (i = 0; i < priv->grctx_size; i += 4) - ctx[i / 4] = nv_ro32(grch->grctx, i); - - priv->grctx_vals = ctx; - return 0; - -err: - kfree(ctx); - return ret; -} - -static int -nvc0_graph_create_context_mmio_list(struct nouveau_channel *chan) -{ - struct nvc0_graph_priv *priv = nv_engine(chan->dev, NVOBJ_ENGINE_GR); - struct nvc0_graph_chan *grch = chan->engctx[NVOBJ_ENGINE_GR]; - struct drm_device *dev = chan->dev; - struct drm_nouveau_private *dev_priv = dev->dev_private; - int i = 0, gpc, tp, ret; + goto error; - ret = nouveau_gpuobj_new(dev, NULL, 0x2000, 256, 0, &grch->unk408004); + ret = nouveau_gpuobj_map_vm(grch->grctx, chan->vm, NV_MEM_ACCESS_RW | + NV_MEM_ACCESS_SYS, &grch->grctx_vma); if (ret) return ret; - ret = nouveau_gpuobj_map_vm(grch->unk408004, chan->vm, - NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS, - &grch->unk408004_vma); - if (ret) - return ret; + grctx = grch->grctx; - ret = nouveau_gpuobj_new(dev, NULL, 0x8000, 256, 0, &grch->unk40800c); + /* allocate memory for a "mmio list" buffer that's used by the HUB + * fuc to modify some per-context register settings on first load + * of the context. + */ + ret = nouveau_gpuobj_new(dev, NULL, 0x1000, 0x100, 0, &grch->mmio); if (ret) return ret; - ret = nouveau_gpuobj_map_vm(grch->unk40800c, chan->vm, + ret = nouveau_gpuobj_map_vm(grch->mmio, chan->vm, NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS, - &grch->unk40800c_vma); + &grch->mmio_vma); if (ret) return ret; - ret = nouveau_gpuobj_new(dev, NULL, 384 * 1024, 4096, 0, - &grch->unk418810); - if (ret) - return ret; - - ret = nouveau_gpuobj_map_vm(grch->unk418810, chan->vm, - NV_MEM_ACCESS_RW, &grch->unk418810_vma); - if (ret) - return ret; - - ret = nouveau_gpuobj_new(dev, NULL, 0x1000, 0, 0, &grch->mmio); - if (ret) - return ret; + /* allocate buffers referenced by mmio list */ + for (i = 0; data->size && i < ARRAY_SIZE(priv->mmio_data); i++) { + ret = nouveau_gpuobj_new(dev, NULL, data->size, data->align, + 0, &grch->data[i].mem); + if (ret) + return ret; - ret = nouveau_gpuobj_map_vm(grch->mmio, chan->vm, NV_MEM_ACCESS_RW | - NV_MEM_ACCESS_SYS, &grch->mmio_vma); - if (ret) - return ret; + ret = nouveau_gpuobj_map_vm(grch->data[i].mem, chan->vm, + data->access, + &grch->data[i].vma); + if (ret) + return ret; - nv_wo32(grch->mmio, i++ * 4, 0x00408004); - nv_wo32(grch->mmio, i++ * 4, grch->unk408004_vma.offset >> 8); - nv_wo32(grch->mmio, i++ * 4, 0x00408008); - nv_wo32(grch->mmio, i++ * 4, 0x80000018); - - nv_wo32(grch->mmio, i++ * 4, 0x0040800c); - nv_wo32(grch->mmio, i++ * 4, grch->unk40800c_vma.offset >> 8); - nv_wo32(grch->mmio, i++ * 4, 0x00408010); - nv_wo32(grch->mmio, i++ * 4, 0x80000000); - - nv_wo32(grch->mmio, i++ * 4, 0x00418810); - nv_wo32(grch->mmio, i++ * 4, 0x80000000 | grch->unk418810_vma.offset >> 12); - nv_wo32(grch->mmio, i++ * 4, 0x00419848); - nv_wo32(grch->mmio, i++ * 4, 0x10000000 | grch->unk418810_vma.offset >> 12); - - nv_wo32(grch->mmio, i++ * 4, 0x00419004); - nv_wo32(grch->mmio, i++ * 4, grch->unk40800c_vma.offset >> 8); - nv_wo32(grch->mmio, i++ * 4, 0x00419008); - nv_wo32(grch->mmio, i++ * 4, 0x00000000); - - nv_wo32(grch->mmio, i++ * 4, 0x00418808); - nv_wo32(grch->mmio, i++ * 4, grch->unk408004_vma.offset >> 8); - nv_wo32(grch->mmio, i++ * 4, 0x0041880c); - nv_wo32(grch->mmio, i++ * 4, 0x80000018); - - if (dev_priv->chipset != 0xc1) { - u32 magic = 0x02180000; - nv_wo32(grch->mmio, i++ * 4, 0x00405830); - nv_wo32(grch->mmio, i++ * 4, magic); - for (gpc = 0; gpc < priv->gpc_nr; gpc++) { - for (tp = 0; tp < priv->tpc_nr[gpc]; tp++) { - u32 reg = TPC_UNIT(gpc, tp, 0x520); - nv_wo32(grch->mmio, i++ * 4, reg); - nv_wo32(grch->mmio, i++ * 4, magic); - magic += 0x0324; - } - } - } else { - u32 magic = 0x02180000; - nv_wo32(grch->mmio, i++ * 4, 0x00405830); - nv_wo32(grch->mmio, i++ * 4, magic | 0x0000218); - nv_wo32(grch->mmio, i++ * 4, 0x004064c4); - nv_wo32(grch->mmio, i++ * 4, 0x0086ffff); - for (gpc = 0; gpc < priv->gpc_nr; gpc++) { - for (tp = 0; tp < priv->tpc_nr[gpc]; tp++) { - u32 reg = TPC_UNIT(gpc, tp, 0x520); - nv_wo32(grch->mmio, i++ * 4, reg); - nv_wo32(grch->mmio, i++ * 4, (1 << 28) | magic); - magic += 0x0324; - } - for (tp = 0; tp < priv->tpc_nr[gpc]; tp++) { - u32 reg = TPC_UNIT(gpc, tp, 0x544); - nv_wo32(grch->mmio, i++ * 4, reg); - nv_wo32(grch->mmio, i++ * 4, magic); - magic += 0x0324; - } - } + data++; } - grch->mmio_nr = i / 2; - return 0; -} - -static int -nvc0_graph_context_new(struct nouveau_channel *chan, int engine) -{ - struct drm_device *dev = chan->dev; - struct nvc0_graph_priv *priv = nv_engine(dev, engine); - struct nvc0_graph_chan *grch; - struct nouveau_gpuobj *grctx; - int ret, i; - - grch = kzalloc(sizeof(*grch), GFP_KERNEL); - if (!grch) - return -ENOMEM; - chan->engctx[NVOBJ_ENGINE_GR] = grch; + /* finally, fill in the mmio list and point the context at it */ + for (i = 0; mmio->addr && i < ARRAY_SIZE(priv->mmio_list); i++) { + u32 addr = mmio->addr; + u32 data = mmio->data; - ret = nouveau_gpuobj_new(dev, NULL, priv->grctx_size, 256, 0, - &grch->grctx); - if (ret) - goto error; + if (mmio->shift) { + u64 info = grch->data[mmio->buffer].vma.offset; + data |= info >> mmio->shift; + } - ret = nouveau_gpuobj_map_vm(grch->grctx, chan->vm, NV_MEM_ACCESS_RW | - NV_MEM_ACCESS_SYS, &grch->grctx_vma); - if (ret) - return ret; + nv_wo32(grch->mmio, grch->mmio_nr++ * 4, addr); + nv_wo32(grch->mmio, grch->mmio_nr++ * 4, data); + mmio++; + } - grctx = grch->grctx; - - ret = nvc0_graph_create_context_mmio_list(chan); - if (ret) - goto error; + for (i = 0; i < priv->size; i += 4) + nv_wo32(grch->grctx, i, priv->data[i / 4]); nv_wo32(chan->ramin, 0x0210, lower_32_bits(grch->grctx_vma.offset) | 4); nv_wo32(chan->ramin, 0x0214, upper_32_bits(grch->grctx_vma.offset)); nvimem_flush(dev); - if (!priv->grctx_vals) { - ret = nvc0_graph_construct_context(chan); - if (ret) - goto error; - } - - for (i = 0; i < priv->grctx_size; i += 4) - nv_wo32(grctx, i, priv->grctx_vals[i / 4]); - - if (!nouveau_ctxfw) { - nv_wo32(grctx, 0x00, grch->mmio_nr); + if (!priv->firmware) { + nv_wo32(grctx, 0x00, grch->mmio_nr / 2); nv_wo32(grctx, 0x04, grch->mmio_vma.offset >> 8); } else { nv_wo32(grctx, 0xf4, 0); nv_wo32(grctx, 0xf8, 0); - nv_wo32(grctx, 0x10, grch->mmio_nr); + nv_wo32(grctx, 0x10, grch->mmio_nr / 2); nv_wo32(grctx, 0x14, lower_32_bits(grch->mmio_vma.offset)); nv_wo32(grctx, 0x18, upper_32_bits(grch->mmio_vma.offset)); nv_wo32(grctx, 0x1c, 1); @@ -327,20 +160,21 @@ error: return ret; } -static void +void nvc0_graph_context_del(struct nouveau_channel *chan, int engine) { struct nvc0_graph_chan *grch = chan->engctx[engine]; + int i; + + for (i = 0; i < ARRAY_SIZE(grch->data); i++) { + nouveau_gpuobj_unmap(&grch->data[i].vma); + nouveau_gpuobj_ref(NULL, &grch->data[i].mem); + } nouveau_gpuobj_unmap(&grch->mmio_vma); - nouveau_gpuobj_unmap(&grch->unk418810_vma); - nouveau_gpuobj_unmap(&grch->unk40800c_vma); - nouveau_gpuobj_unmap(&grch->unk408004_vma); - nouveau_gpuobj_unmap(&grch->grctx_vma); nouveau_gpuobj_ref(NULL, &grch->mmio); - nouveau_gpuobj_ref(NULL, &grch->unk418810); - nouveau_gpuobj_ref(NULL, &grch->unk40800c); - nouveau_gpuobj_ref(NULL, &grch->unk408004); + + nouveau_gpuobj_unmap(&grch->grctx_vma); nouveau_gpuobj_ref(NULL, &grch->grctx); chan->engctx[engine] = NULL; } @@ -517,89 +351,102 @@ nvc0_graph_init_ctxctl(struct drm_device *dev) u32 r000260; int i; - if (!nouveau_ctxfw) { - /* load HUB microcode */ + if (priv->firmware) { + /* load fuc microcode */ r000260 = nv_mask(dev, 0x000260, 0x00000001, 0x00000000); - nv_wr32(dev, 0x4091c0, 0x01000000); - for (i = 0; i < sizeof(nvc0_grhub_data) / 4; i++) - nv_wr32(dev, 0x4091c4, nvc0_grhub_data[i]); - - nv_wr32(dev, 0x409180, 0x01000000); - for (i = 0; i < sizeof(nvc0_grhub_code) / 4; i++) { - if ((i & 0x3f) == 0) - nv_wr32(dev, 0x409188, i >> 6); - nv_wr32(dev, 0x409184, nvc0_grhub_code[i]); - } - - /* load GPC microcode */ - nv_wr32(dev, 0x41a1c0, 0x01000000); - for (i = 0; i < sizeof(nvc0_grgpc_data) / 4; i++) - nv_wr32(dev, 0x41a1c4, nvc0_grgpc_data[i]); - - nv_wr32(dev, 0x41a180, 0x01000000); - for (i = 0; i < sizeof(nvc0_grgpc_code) / 4; i++) { - if ((i & 0x3f) == 0) - nv_wr32(dev, 0x41a188, i >> 6); - nv_wr32(dev, 0x41a184, nvc0_grgpc_code[i]); - } + nvc0_graph_init_fuc(dev, 0x409000, &priv->fuc409c, + &priv->fuc409d); + nvc0_graph_init_fuc(dev, 0x41a000, &priv->fuc41ac, + &priv->fuc41ad); nv_wr32(dev, 0x000260, r000260); - /* start HUB ucode running, it'll init the GPCs */ - nv_wr32(dev, 0x409800, dev_priv->chipset); + /* start both of them running */ + nv_wr32(dev, 0x409840, 0xffffffff); + nv_wr32(dev, 0x41a10c, 0x00000000); nv_wr32(dev, 0x40910c, 0x00000000); + nv_wr32(dev, 0x41a100, 0x00000002); nv_wr32(dev, 0x409100, 0x00000002); - if (!nv_wait(dev, 0x409800, 0x80000000, 0x80000000)) { - NV_ERROR(dev, "PGRAPH: HUB_INIT timed out\n"); - nvc0_graph_ctxctl_debug(dev); + if (!nv_wait(dev, 0x409800, 0x00000001, 0x00000001)) + NV_INFO(dev, "0x409800 wait failed\n"); + + nv_wr32(dev, 0x409840, 0xffffffff); + nv_wr32(dev, 0x409500, 0x7fffffff); + nv_wr32(dev, 0x409504, 0x00000021); + + nv_wr32(dev, 0x409840, 0xffffffff); + nv_wr32(dev, 0x409500, 0x00000000); + nv_wr32(dev, 0x409504, 0x00000010); + if (!nv_wait_ne(dev, 0x409800, 0xffffffff, 0x00000000)) { + NV_ERROR(dev, "fuc09 req 0x10 timeout\n"); return -EBUSY; } + priv->size = nv_rd32(dev, 0x409800); - priv->grctx_size = nv_rd32(dev, 0x409804); - return 0; + nv_wr32(dev, 0x409840, 0xffffffff); + nv_wr32(dev, 0x409500, 0x00000000); + nv_wr32(dev, 0x409504, 0x00000016); + if (!nv_wait_ne(dev, 0x409800, 0xffffffff, 0x00000000)) { + NV_ERROR(dev, "fuc09 req 0x16 timeout\n"); + return -EBUSY; + } + + nv_wr32(dev, 0x409840, 0xffffffff); + nv_wr32(dev, 0x409500, 0x00000000); + nv_wr32(dev, 0x409504, 0x00000025); + if (!nv_wait_ne(dev, 0x409800, 0xffffffff, 0x00000000)) { + NV_ERROR(dev, "fuc09 req 0x25 timeout\n"); + return -EBUSY; + } + + goto done; } - /* load fuc microcode */ + /* load HUB microcode */ r000260 = nv_mask(dev, 0x000260, 0x00000001, 0x00000000); - nvc0_graph_init_fuc(dev, 0x409000, &priv->fuc409c, &priv->fuc409d); - nvc0_graph_init_fuc(dev, 0x41a000, &priv->fuc41ac, &priv->fuc41ad); + nv_wr32(dev, 0x4091c0, 0x01000000); + for (i = 0; i < sizeof(nvc0_grhub_data) / 4; i++) + nv_wr32(dev, 0x4091c4, nvc0_grhub_data[i]); + + nv_wr32(dev, 0x409180, 0x01000000); + for (i = 0; i < sizeof(nvc0_grhub_code) / 4; i++) { + if ((i & 0x3f) == 0) + nv_wr32(dev, 0x409188, i >> 6); + nv_wr32(dev, 0x409184, nvc0_grhub_code[i]); + } + + /* load GPC microcode */ + nv_wr32(dev, 0x41a1c0, 0x01000000); + for (i = 0; i < sizeof(nvc0_grgpc_data) / 4; i++) + nv_wr32(dev, 0x41a1c4, nvc0_grgpc_data[i]); + + nv_wr32(dev, 0x41a180, 0x01000000); + for (i = 0; i < sizeof(nvc0_grgpc_code) / 4; i++) { + if ((i & 0x3f) == 0) + nv_wr32(dev, 0x41a188, i >> 6); + nv_wr32(dev, 0x41a184, nvc0_grgpc_code[i]); + } nv_wr32(dev, 0x000260, r000260); - /* start both of them running */ - nv_wr32(dev, 0x409840, 0xffffffff); - nv_wr32(dev, 0x41a10c, 0x00000000); + /* start HUB ucode running, it'll init the GPCs */ + nv_wr32(dev, 0x409800, dev_priv->chipset); nv_wr32(dev, 0x40910c, 0x00000000); - nv_wr32(dev, 0x41a100, 0x00000002); nv_wr32(dev, 0x409100, 0x00000002); - if (!nv_wait(dev, 0x409800, 0x00000001, 0x00000001)) - NV_INFO(dev, "0x409800 wait failed\n"); - - nv_wr32(dev, 0x409840, 0xffffffff); - nv_wr32(dev, 0x409500, 0x7fffffff); - nv_wr32(dev, 0x409504, 0x00000021); - - nv_wr32(dev, 0x409840, 0xffffffff); - nv_wr32(dev, 0x409500, 0x00000000); - nv_wr32(dev, 0x409504, 0x00000010); - if (!nv_wait_ne(dev, 0x409800, 0xffffffff, 0x00000000)) { - NV_ERROR(dev, "fuc09 req 0x10 timeout\n"); + if (!nv_wait(dev, 0x409800, 0x80000000, 0x80000000)) { + NV_ERROR(dev, "PGRAPH: HUB_INIT timed out\n"); + nvc0_graph_ctxctl_debug(dev); return -EBUSY; } - priv->grctx_size = nv_rd32(dev, 0x409800); - nv_wr32(dev, 0x409840, 0xffffffff); - nv_wr32(dev, 0x409500, 0x00000000); - nv_wr32(dev, 0x409504, 0x00000016); - if (!nv_wait_ne(dev, 0x409800, 0xffffffff, 0x00000000)) { - NV_ERROR(dev, "fuc09 req 0x16 timeout\n"); - return -EBUSY; - } + priv->size = nv_rd32(dev, 0x409804); +done: + if (priv->data == NULL) { + int ret = nvc0_grctx_generate(dev); + if (ret) { + NV_ERROR(dev, "PGRAPH: failed to construct context\n"); + return ret; + } - nv_wr32(dev, 0x409840, 0xffffffff); - nv_wr32(dev, 0x409500, 0x00000000); - nv_wr32(dev, 0x409504, 0x00000025); - if (!nv_wait_ne(dev, 0x409800, 0xffffffff, 0x00000000)) { - NV_ERROR(dev, "fuc09 req 0x25 timeout\n"); - return -EBUSY; + return 1; } return 0; @@ -610,6 +457,7 @@ nvc0_graph_init(struct drm_device *dev, int engine) { int ret; +reset: nv_mask(dev, 0x000200, 0x18001000, 0x00000000); nv_mask(dev, 0x000200, 0x18001000, 0x18001000); @@ -636,8 +484,11 @@ nvc0_graph_init(struct drm_device *dev, int engine) nv_wr32(dev, 0x400054, 0x34ce3464); ret = nvc0_graph_init_ctxctl(dev); - if (ret) + if (ret) { + if (ret == 1) + goto reset; return ret; + } return 0; } @@ -784,20 +635,18 @@ nvc0_graph_destroy(struct drm_device *dev, int engine) { struct nvc0_graph_priv *priv = nv_engine(dev, engine); - if (nouveau_ctxfw) { - nvc0_graph_destroy_fw(&priv->fuc409c); - nvc0_graph_destroy_fw(&priv->fuc409d); - nvc0_graph_destroy_fw(&priv->fuc41ac); - nvc0_graph_destroy_fw(&priv->fuc41ad); - } + nvc0_graph_destroy_fw(&priv->fuc409c); + nvc0_graph_destroy_fw(&priv->fuc409d); + nvc0_graph_destroy_fw(&priv->fuc41ac); + nvc0_graph_destroy_fw(&priv->fuc41ad); nouveau_irq_unregister(dev, 12); nouveau_gpuobj_ref(NULL, &priv->unk4188b8); nouveau_gpuobj_ref(NULL, &priv->unk4188b4); - if (priv->grctx_vals) - kfree(priv->grctx_vals); + if (priv->data) + kfree(priv->data); NVOBJ_ENGINE_DEL(dev, GR); kfree(priv); @@ -840,6 +689,7 @@ nvc0_graph_create(struct drm_device *dev) ret = 0; goto error; } + priv->firmware = true; } ret = nouveau_gpuobj_new(dev, NULL, 0x1000, 256, 0, &priv->unk4188b4); diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.h b/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.h index ba9b80fd8958..30ea3ab135c6 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.h +++ b/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.h @@ -66,27 +66,28 @@ struct nvc0_graph_priv { u8 tpc_nr[GPC_MAX]; u8 tpc_total; - u32 grctx_size; - u32 *grctx_vals; struct nouveau_gpuobj *unk4188b4; struct nouveau_gpuobj *unk4188b8; + struct nvc0_graph_data mmio_data[4]; + struct nvc0_graph_mmio mmio_list[4096/8]; + u32 size; + u32 *data; + u8 magic_not_rop_nr; }; struct nvc0_graph_chan { struct nouveau_gpuobj *grctx; struct nouveau_vma grctx_vma; - struct nouveau_gpuobj *unk408004; /* 0x418808 too */ - struct nouveau_vma unk408004_vma; - struct nouveau_gpuobj *unk40800c; /* 0x419004 too */ - struct nouveau_vma unk40800c_vma; - struct nouveau_gpuobj *unk418810; /* 0x419848 too */ - struct nouveau_vma unk418810_vma; struct nouveau_gpuobj *mmio; struct nouveau_vma mmio_vma; int mmio_nr; + struct { + struct nouveau_gpuobj *mem; + struct nouveau_vma vma; + } data[4]; }; static inline u32 @@ -124,6 +125,7 @@ nv_mthd(struct drm_device *priv, u32 class, u32 mthd, u32 data) } struct nvc0_grctx { + struct drm_device *dev; struct nvc0_graph_priv *priv; struct nvc0_graph_data *data; struct nvc0_graph_mmio *mmio; @@ -133,13 +135,14 @@ struct nvc0_grctx { u64 addr; }; -int nvc0_grctx_generate(struct nouveau_channel *); -int nvc0_grctx_init(struct nvc0_graph_priv *, struct nvc0_grctx *); +int nvc0_grctx_generate(struct drm_device *); +int nvc0_grctx_init(struct drm_device *, struct nvc0_graph_priv *, + struct nvc0_grctx *); void nvc0_grctx_data(struct nvc0_grctx *, u32, u32, u32); void nvc0_grctx_mmio(struct nvc0_grctx *, u32, u32, u32, u32); int nvc0_grctx_fini(struct nvc0_grctx *); -int nve0_grctx_generate(struct nouveau_channel *); +int nve0_grctx_generate(struct drm_device *); #define mmio_data(s,a,p) nvc0_grctx_data(&info, (s), (a), (p)) #define mmio_list(r,d,s,b) nvc0_grctx_mmio(&info, (r), (d), (s), (b)) @@ -154,4 +157,9 @@ int nvc0_graph_context_ctor(struct nouveau_object *, struct nouveau_object *, struct nouveau_object **); void nvc0_graph_context_dtor(struct nouveau_object *); +void nvc0_graph_ctxctl_debug(struct drm_device *); + +int nvc0_graph_context_new(struct nouveau_channel *, int); +void nvc0_graph_context_del(struct nouveau_channel *, int); + #endif diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nve0.c b/drivers/gpu/drm/nouveau/core/engine/graph/nve0.c index 2bd55d8d10a7..a3a4ee7c0b2e 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/nve0.c +++ b/drivers/gpu/drm/nouveau/core/engine/graph/nve0.c @@ -57,243 +57,6 @@ nve0_graph_ctxctl_debug(struct drm_device *dev) nve0_graph_ctxctl_debug_unit(dev, 0x502000 + (gpc * 0x8000)); } -static int -nve0_graph_load_context(struct nouveau_channel *chan) -{ - struct drm_device *dev = chan->dev; - - nv_wr32(dev, 0x409840, 0x00000030); - nv_wr32(dev, 0x409500, 0x80000000 | chan->ramin->addr >> 12); - nv_wr32(dev, 0x409504, 0x00000003); - if (!nv_wait(dev, 0x409800, 0x00000010, 0x00000010)) - NV_ERROR(dev, "PGRAPH: load_ctx timeout\n"); - - return 0; -} - -static int -nve0_graph_unload_context_to(struct drm_device *dev, u64 chan) -{ - nv_wr32(dev, 0x409840, 0x00000003); - nv_wr32(dev, 0x409500, 0x80000000 | chan >> 12); - nv_wr32(dev, 0x409504, 0x00000009); - if (!nv_wait(dev, 0x409800, 0x00000001, 0x00000000)) { - NV_ERROR(dev, "PGRAPH: unload_ctx timeout\n"); - return -EBUSY; - } - - return 0; -} - -static int -nve0_graph_construct_context(struct nouveau_channel *chan) -{ - struct nvc0_graph_priv *priv = nv_engine(chan->dev, NVOBJ_ENGINE_GR); - struct nvc0_graph_chan *grch = chan->engctx[NVOBJ_ENGINE_GR]; - struct drm_device *dev = chan->dev; - int ret, i; - u32 *ctx; - - ctx = kmalloc(priv->grctx_size, GFP_KERNEL); - if (!ctx) - return -ENOMEM; - - nve0_graph_load_context(chan); - - nv_wo32(grch->grctx, 0x1c, 1); - nv_wo32(grch->grctx, 0x20, 0); - nv_wo32(grch->grctx, 0x28, 0); - nv_wo32(grch->grctx, 0x2c, 0); - nvimem_flush(dev); - - ret = nve0_grctx_generate(chan); - if (ret) - goto err; - - ret = nve0_graph_unload_context_to(dev, chan->ramin->addr); - if (ret) - goto err; - - for (i = 0; i < priv->grctx_size; i += 4) - ctx[i / 4] = nv_ro32(grch->grctx, i); - - priv->grctx_vals = ctx; - return 0; - -err: - kfree(ctx); - return ret; -} - -static int -nve0_graph_create_context_mmio_list(struct nouveau_channel *chan) -{ - struct nvc0_graph_priv *priv = nv_engine(chan->dev, NVOBJ_ENGINE_GR); - struct nvc0_graph_chan *grch = chan->engctx[NVOBJ_ENGINE_GR]; - struct drm_device *dev = chan->dev; - u32 magic[GPC_MAX][2]; - u16 offset = 0x0000; - int gpc; - int ret; - - ret = nouveau_gpuobj_new(dev, NULL, 0x3000, 256, 0, &grch->unk408004); - if (ret) - return ret; - - ret = nouveau_gpuobj_map_vm(grch->unk408004, chan->vm, - NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS, - &grch->unk408004_vma); - if (ret) - return ret; - - ret = nouveau_gpuobj_new(dev, NULL, 0x8000, 256, 0, &grch->unk40800c); - if (ret) - return ret; - - ret = nouveau_gpuobj_map_vm(grch->unk40800c, chan->vm, - NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS, - &grch->unk40800c_vma); - if (ret) - return ret; - - ret = nouveau_gpuobj_new(dev, NULL, 384 * 1024, 4096, 0, - &grch->unk418810); - if (ret) - return ret; - - ret = nouveau_gpuobj_map_vm(grch->unk418810, chan->vm, - NV_MEM_ACCESS_RW, &grch->unk418810_vma); - if (ret) - return ret; - - ret = nouveau_gpuobj_new(dev, NULL, 0x1000, 0, 0, &grch->mmio); - if (ret) - return ret; - - ret = nouveau_gpuobj_map_vm(grch->mmio, chan->vm, - NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS, - &grch->mmio_vma); - if (ret) - return ret; - -#define mmio(r,v) do { \ - nv_wo32(grch->mmio, (grch->mmio_nr * 8) + 0, (r)); \ - nv_wo32(grch->mmio, (grch->mmio_nr * 8) + 4, (v)); \ - grch->mmio_nr++; \ -} while (0) - mmio(0x40800c, grch->unk40800c_vma.offset >> 8); - mmio(0x408010, 0x80000000); - mmio(0x419004, grch->unk40800c_vma.offset >> 8); - mmio(0x419008, 0x00000000); - mmio(0x4064cc, 0x80000000); - mmio(0x408004, grch->unk408004_vma.offset >> 8); - mmio(0x408008, 0x80000030); - mmio(0x418808, grch->unk408004_vma.offset >> 8); - mmio(0x41880c, 0x80000030); - mmio(0x4064c8, 0x01800600); - mmio(0x418810, 0x80000000 | grch->unk418810_vma.offset >> 12); - mmio(0x419848, 0x10000000 | grch->unk418810_vma.offset >> 12); - mmio(0x405830, 0x02180648); - mmio(0x4064c4, 0x0192ffff); - - for (gpc = 0; gpc < priv->gpc_nr; gpc++) { - u16 magic0 = 0x0218 * priv->tpc_nr[gpc]; - u16 magic1 = 0x0648 * priv->tpc_nr[gpc]; - magic[gpc][0] = 0x10000000 | (magic0 << 16) | offset; - magic[gpc][1] = 0x00000000 | (magic1 << 16); - offset += 0x0324 * priv->tpc_nr[gpc]; - } - - for (gpc = 0; gpc < priv->gpc_nr; gpc++) { - mmio(GPC_UNIT(gpc, 0x30c0), magic[gpc][0]); - mmio(GPC_UNIT(gpc, 0x30e4), magic[gpc][1] | offset); - offset += 0x07ff * priv->tpc_nr[gpc]; - } - - mmio(0x17e91c, 0x06060609); - mmio(0x17e920, 0x00090a05); -#undef mmio - return 0; -} - -static int -nve0_graph_context_new(struct nouveau_channel *chan, int engine) -{ - struct drm_device *dev = chan->dev; - struct nvc0_graph_priv *priv = nv_engine(dev, engine); - struct nvc0_graph_chan *grch; - struct nouveau_gpuobj *grctx; - int ret, i; - - grch = kzalloc(sizeof(*grch), GFP_KERNEL); - if (!grch) - return -ENOMEM; - chan->engctx[NVOBJ_ENGINE_GR] = grch; - - ret = nouveau_gpuobj_new(dev, NULL, priv->grctx_size, 256, 0, - &grch->grctx); - if (ret) - goto error; - - ret = nouveau_gpuobj_map_vm(grch->grctx, chan->vm, NV_MEM_ACCESS_RW | - NV_MEM_ACCESS_SYS, &grch->grctx_vma); - if (ret) - return ret; - - grctx = grch->grctx; - - ret = nve0_graph_create_context_mmio_list(chan); - if (ret) - goto error; - - nv_wo32(chan->ramin, 0x0210, lower_32_bits(grch->grctx_vma.offset) | 4); - nv_wo32(chan->ramin, 0x0214, upper_32_bits(grch->grctx_vma.offset)); - nvimem_flush(dev); - - if (!priv->grctx_vals) { - ret = nve0_graph_construct_context(chan); - if (ret) - goto error; - } - - for (i = 0; i < priv->grctx_size; i += 4) - nv_wo32(grctx, i, priv->grctx_vals[i / 4]); - nv_wo32(grctx, 0xf4, 0); - nv_wo32(grctx, 0xf8, 0); - nv_wo32(grctx, 0x10, grch->mmio_nr); - nv_wo32(grctx, 0x14, lower_32_bits(grch->mmio_vma.offset)); - nv_wo32(grctx, 0x18, upper_32_bits(grch->mmio_vma.offset)); - nv_wo32(grctx, 0x1c, 1); - nv_wo32(grctx, 0x20, 0); - nv_wo32(grctx, 0x28, 0); - nv_wo32(grctx, 0x2c, 0); - - nvimem_flush(dev); - return 0; - -error: - priv->base.context_del(chan, engine); - return ret; -} - -static void -nve0_graph_context_del(struct nouveau_channel *chan, int engine) -{ - struct nvc0_graph_chan *grch = chan->engctx[engine]; - - nouveau_gpuobj_unmap(&grch->mmio_vma); - nouveau_gpuobj_unmap(&grch->unk418810_vma); - nouveau_gpuobj_unmap(&grch->unk40800c_vma); - nouveau_gpuobj_unmap(&grch->unk408004_vma); - nouveau_gpuobj_unmap(&grch->grctx_vma); - nouveau_gpuobj_ref(NULL, &grch->mmio); - nouveau_gpuobj_ref(NULL, &grch->unk418810); - nouveau_gpuobj_ref(NULL, &grch->unk40800c); - nouveau_gpuobj_ref(NULL, &grch->unk408004); - nouveau_gpuobj_ref(NULL, &grch->grctx); - chan->engctx[engine] = NULL; -} - static int nve0_graph_object_new(struct nouveau_channel *chan, int engine, u32 handle, u16 class) @@ -487,7 +250,7 @@ nve0_graph_init_ctxctl(struct drm_device *dev) NV_ERROR(dev, "fuc09 req 0x10 timeout\n"); return -EBUSY; } - priv->grctx_size = nv_rd32(dev, 0x409800); + priv->size = nv_rd32(dev, 0x409800); nv_wr32(dev, 0x409840, 0xffffffff); nv_wr32(dev, 0x409500, 0x00000000); @@ -534,6 +297,17 @@ nve0_graph_init_ctxctl(struct drm_device *dev) nv_wr32(dev, 0x409614, 0x00000070); nv_wr32(dev, 0x409614, 0x00000770); nv_wr32(dev, 0x40802c, 0x00000001); + + if (priv->data == NULL) { + int ret = nve0_grctx_generate(dev); + if (ret) { + NV_ERROR(dev, "PGRAPH: failed to construct context\n"); + return ret; + } + + return 1; + } + return 0; } @@ -542,6 +316,7 @@ nve0_graph_init(struct drm_device *dev, int engine) { int ret; +reset: nv_mask(dev, 0x000200, 0x18001000, 0x00000000); nv_mask(dev, 0x000200, 0x18001000, 0x18001000); @@ -566,8 +341,11 @@ nve0_graph_init(struct drm_device *dev, int engine) nv_wr32(dev, 0x400054, 0x34ce3464); ret = nve0_graph_init_ctxctl(dev); - if (ret) + if (ret) { + if (ret == 1) + goto reset; return ret; + } return 0; } @@ -758,8 +536,8 @@ nve0_graph_destroy(struct drm_device *dev, int engine) nouveau_gpuobj_ref(NULL, &priv->unk4188b8); nouveau_gpuobj_ref(NULL, &priv->unk4188b4); - if (priv->grctx_vals) - kfree(priv->grctx_vals); + if (priv->data) + kfree(priv->data); NVOBJ_ENGINE_DEL(dev, GR); kfree(priv); @@ -786,8 +564,8 @@ nve0_graph_create(struct drm_device *dev) priv->base.destroy = nve0_graph_destroy; priv->base.init = nve0_graph_init; priv->base.fini = nve0_graph_fini; - priv->base.context_new = nve0_graph_context_new; - priv->base.context_del = nve0_graph_context_del; + priv->base.context_new = nvc0_graph_context_new; + priv->base.context_del = nvc0_graph_context_del; priv->base.object_new = nve0_graph_object_new; NVOBJ_ENGINE_ADD(dev, GR, &priv->base); @@ -801,6 +579,7 @@ nve0_graph_create(struct drm_device *dev) ret = 0; goto error; } + priv->firmware = true; ret = nouveau_gpuobj_new(dev, NULL, 0x1000, 256, 0, &priv->unk4188b4); if (ret) -- 2.20.1