Commit | Line | Data |
---|---|---|
1978a2f2 BS |
1 | /* fuc microcode for nve0 PGRAPH/HUB |
2 | * | |
3 | * Copyright 2011 Red Hat Inc. | |
4 | * | |
5 | * Permission is hereby granted, free of charge, to any person obtaining a | |
6 | * copy of this software and associated documentation files (the "Software"), | |
7 | * to deal in the Software without restriction, including without limitation | |
8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
9 | * and/or sell copies of the Software, and to permit persons to whom the | |
10 | * Software is furnished to do so, subject to the following conditions: | |
11 | * | |
12 | * The above copyright notice and this permission notice shall be included in | |
13 | * all copies or substantial portions of the Software. | |
14 | * | |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
18 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
19 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
20 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
21 | * OTHER DEALINGS IN THE SOFTWARE. | |
22 | * | |
23 | * Authors: Ben Skeggs | |
24 | */ | |
25 | ||
26 | /* To build: | |
27 | * m4 nve0_grhub.fuc | envyas -a -w -m fuc -V nva3 -o nve0_grhub.fuc.h | |
28 | */ | |
29 | ||
30 | .section #nve0_grhub_data | |
31 | include(`nve0.fuc') | |
32 | gpc_count: .b32 0 | |
33 | rop_count: .b32 0 | |
34 | cmd_queue: queue_init | |
35 | hub_mmio_list_head: .b32 0 | |
36 | hub_mmio_list_tail: .b32 0 | |
37 | ||
38 | ctx_current: .b32 0 | |
39 | ||
40 | chipsets: | |
41 | .b8 0xe4 0 0 0 | |
42 | .b16 #nve4_hub_mmio_head | |
43 | .b16 #nve4_hub_mmio_tail | |
44 | .b8 0xe7 0 0 0 | |
45 | .b16 #nve4_hub_mmio_head | |
46 | .b16 #nve4_hub_mmio_tail | |
eca15296 BS |
47 | .b8 0xe6 0 0 0 |
48 | .b16 #nve4_hub_mmio_head | |
49 | .b16 #nve4_hub_mmio_tail | |
1978a2f2 BS |
50 | .b8 0 0 0 0 |
51 | ||
52 | nve4_hub_mmio_head: | |
53 | mmctx_data(0x17e91c, 2) | |
54 | mmctx_data(0x400204, 2) | |
55 | mmctx_data(0x404010, 7) | |
56 | mmctx_data(0x4040a8, 9) | |
57 | mmctx_data(0x4040d0, 7) | |
58 | mmctx_data(0x4040f8, 1) | |
59 | mmctx_data(0x404130, 3) | |
60 | mmctx_data(0x404150, 3) | |
61 | mmctx_data(0x404164, 1) | |
62 | mmctx_data(0x4041a0, 4) | |
63 | mmctx_data(0x404200, 4) | |
64 | mmctx_data(0x404404, 14) | |
65 | mmctx_data(0x404460, 4) | |
66 | mmctx_data(0x404480, 1) | |
67 | mmctx_data(0x404498, 1) | |
68 | mmctx_data(0x404604, 4) | |
69 | mmctx_data(0x404618, 4) | |
70 | mmctx_data(0x40462c, 2) | |
71 | mmctx_data(0x404640, 1) | |
72 | mmctx_data(0x404654, 1) | |
73 | mmctx_data(0x404660, 1) | |
74 | mmctx_data(0x404678, 19) | |
75 | mmctx_data(0x4046c8, 3) | |
76 | mmctx_data(0x404700, 3) | |
77 | mmctx_data(0x404718, 10) | |
78 | mmctx_data(0x404744, 2) | |
79 | mmctx_data(0x404754, 1) | |
80 | mmctx_data(0x405800, 1) | |
81 | mmctx_data(0x405830, 3) | |
82 | mmctx_data(0x405854, 1) | |
83 | mmctx_data(0x405870, 4) | |
84 | mmctx_data(0x405a00, 2) | |
85 | mmctx_data(0x405a18, 1) | |
86 | mmctx_data(0x405b00, 1) | |
87 | mmctx_data(0x405b10, 1) | |
88 | mmctx_data(0x406020, 1) | |
89 | mmctx_data(0x406028, 4) | |
90 | mmctx_data(0x4064a8, 2) | |
91 | mmctx_data(0x4064b4, 2) | |
92 | mmctx_data(0x4064c0, 12) | |
93 | mmctx_data(0x4064fc, 1) | |
94 | mmctx_data(0x407040, 1) | |
95 | mmctx_data(0x407804, 1) | |
96 | mmctx_data(0x40780c, 6) | |
97 | mmctx_data(0x4078bc, 1) | |
98 | mmctx_data(0x408000, 7) | |
99 | mmctx_data(0x408064, 1) | |
100 | mmctx_data(0x408800, 3) | |
101 | mmctx_data(0x408840, 1) | |
102 | mmctx_data(0x408900, 3) | |
103 | mmctx_data(0x408980, 1) | |
104 | nve4_hub_mmio_tail: | |
105 | ||
106 | .align 256 | |
107 | chan_data: | |
108 | chan_mmio_count: .b32 0 | |
109 | chan_mmio_address: .b32 0 | |
110 | ||
111 | .align 256 | |
112 | xfer_data: .b32 0 | |
113 | ||
114 | .section #nve0_grhub_code | |
115 | bra #init | |
116 | define(`include_code') | |
117 | include(`nve0.fuc') | |
118 | ||
119 | // reports an exception to the host | |
120 | // | |
121 | // In: $r15 error code (see nve0.fuc) | |
122 | // | |
123 | error: | |
124 | push $r14 | |
125 | mov $r14 0x814 | |
126 | shl b32 $r14 6 | |
127 | iowr I[$r14 + 0x000] $r15 // CC_SCRATCH[5] = error code | |
128 | mov $r14 0xc1c | |
129 | shl b32 $r14 6 | |
130 | mov $r15 1 | |
131 | iowr I[$r14 + 0x000] $r15 // INTR_UP_SET | |
132 | pop $r14 | |
133 | ret | |
134 | ||
135 | // HUB fuc initialisation, executed by triggering ucode start, will | |
136 | // fall through to main loop after completion. | |
137 | // | |
138 | // Input: | |
139 | // CC_SCRATCH[0]: chipset (PMC_BOOT_0 read returns 0x0bad0bad... sigh) | |
140 | // | |
141 | // Output: | |
142 | // CC_SCRATCH[0]: | |
143 | // 31:31: set to signal completion | |
144 | // CC_SCRATCH[1]: | |
145 | // 31:0: total PGRAPH context size | |
146 | // | |
147 | init: | |
148 | clear b32 $r0 | |
149 | mov $sp $r0 | |
150 | mov $xdbase $r0 | |
151 | ||
152 | // enable fifo access | |
153 | mov $r1 0x1200 | |
154 | mov $r2 2 | |
155 | iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE | |
156 | ||
157 | // setup i0 handler, and route all interrupts to it | |
158 | mov $r1 #ih | |
159 | mov $iv0 $r1 | |
160 | mov $r1 0x400 | |
161 | iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH | |
162 | ||
163 | // route HUB_CHANNEL_SWITCH to fuc interrupt 8 | |
164 | mov $r3 0x404 | |
165 | shl b32 $r3 6 | |
166 | mov $r2 0x2003 // { HUB_CHANNEL_SWITCH, ZERO } -> intr 8 | |
167 | iowr I[$r3 + 0x000] $r2 | |
168 | ||
169 | // not sure what these are, route them because NVIDIA does, and | |
170 | // the IRQ handler will signal the host if we ever get one.. we | |
171 | // may find out if/why we need to handle these if so.. | |
172 | // | |
173 | mov $r2 0x2004 | |
174 | iowr I[$r3 + 0x004] $r2 // { 0x04, ZERO } -> intr 9 | |
175 | mov $r2 0x200b | |
176 | iowr I[$r3 + 0x008] $r2 // { 0x0b, ZERO } -> intr 10 | |
177 | mov $r2 0x200c | |
178 | iowr I[$r3 + 0x01c] $r2 // { 0x0c, ZERO } -> intr 15 | |
179 | ||
180 | // enable all INTR_UP interrupts | |
181 | mov $r2 0xc24 | |
182 | shl b32 $r2 6 | |
183 | not b32 $r3 $r0 | |
184 | iowr I[$r2] $r3 | |
185 | ||
186 | // enable fifo, ctxsw, 9, 10, 15 interrupts | |
187 | mov $r2 -0x78fc // 0x8704 | |
188 | sethi $r2 0 | |
189 | iowr I[$r1 + 0x000] $r2 // INTR_EN_SET | |
190 | ||
191 | // fifo level triggered, rest edge | |
192 | sub b32 $r1 0x100 | |
193 | mov $r2 4 | |
194 | iowr I[$r1] $r2 | |
195 | ||
196 | // enable interrupts | |
197 | bset $flags ie0 | |
198 | ||
199 | // fetch enabled GPC/ROP counts | |
200 | mov $r14 -0x69fc // 0x409604 | |
201 | sethi $r14 0x400000 | |
202 | call #nv_rd32 | |
203 | extr $r1 $r15 16:20 | |
204 | st b32 D[$r0 + #rop_count] $r1 | |
205 | and $r15 0x1f | |
206 | st b32 D[$r0 + #gpc_count] $r15 | |
207 | ||
208 | // set BAR_REQMASK to GPC mask | |
209 | mov $r1 1 | |
210 | shl b32 $r1 $r15 | |
211 | sub b32 $r1 1 | |
212 | mov $r2 0x40c | |
213 | shl b32 $r2 6 | |
214 | iowr I[$r2 + 0x000] $r1 | |
215 | iowr I[$r2 + 0x100] $r1 | |
216 | ||
217 | // find context data for this chipset | |
218 | mov $r2 0x800 | |
219 | shl b32 $r2 6 | |
220 | iord $r2 I[$r2 + 0x000] // CC_SCRATCH[0] | |
221 | mov $r15 #chipsets - 8 | |
222 | init_find_chipset: | |
223 | add b32 $r15 8 | |
224 | ld b32 $r3 D[$r15 + 0x00] | |
225 | cmpu b32 $r3 $r2 | |
226 | bra e #init_context | |
227 | cmpu b32 $r3 0 | |
228 | bra ne #init_find_chipset | |
229 | // unknown chipset | |
230 | ret | |
231 | ||
232 | // context size calculation, reserve first 256 bytes for use by fuc | |
233 | init_context: | |
234 | mov $r1 256 | |
235 | ||
236 | // calculate size of mmio context data | |
237 | ld b16 $r14 D[$r15 + 4] | |
238 | ld b16 $r15 D[$r15 + 6] | |
239 | sethi $r14 0 | |
240 | st b32 D[$r0 + #hub_mmio_list_head] $r14 | |
241 | st b32 D[$r0 + #hub_mmio_list_tail] $r15 | |
242 | call #mmctx_size | |
243 | ||
244 | // set mmctx base addresses now so we don't have to do it later, | |
245 | // they don't (currently) ever change | |
246 | mov $r3 0x700 | |
247 | shl b32 $r3 6 | |
248 | shr b32 $r4 $r1 8 | |
249 | iowr I[$r3 + 0x000] $r4 // MMCTX_SAVE_SWBASE | |
250 | iowr I[$r3 + 0x100] $r4 // MMCTX_LOAD_SWBASE | |
251 | add b32 $r3 0x1300 | |
252 | add b32 $r1 $r15 | |
253 | shr b32 $r15 2 | |
254 | iowr I[$r3 + 0x000] $r15 // MMCTX_LOAD_COUNT, wtf for?!? | |
255 | ||
256 | // strands, base offset needs to be aligned to 256 bytes | |
257 | shr b32 $r1 8 | |
258 | add b32 $r1 1 | |
259 | shl b32 $r1 8 | |
260 | mov b32 $r15 $r1 | |
261 | call #strand_ctx_init | |
262 | add b32 $r1 $r15 | |
263 | ||
264 | // initialise each GPC in sequence by passing in the offset of its | |
265 | // context data in GPCn_CC_SCRATCH[1], and starting its FUC (which | |
266 | // has previously been uploaded by the host) running. | |
267 | // | |
268 | // the GPC fuc init sequence will set GPCn_CC_SCRATCH[0] bit 31 | |
269 | // when it has completed, and return the size of its context data | |
270 | // in GPCn_CC_SCRATCH[1] | |
271 | // | |
272 | ld b32 $r3 D[$r0 + #gpc_count] | |
273 | mov $r4 0x2000 | |
274 | sethi $r4 0x500000 | |
275 | init_gpc: | |
276 | // setup, and start GPC ucode running | |
277 | add b32 $r14 $r4 0x804 | |
278 | mov b32 $r15 $r1 | |
279 | call #nv_wr32 // CC_SCRATCH[1] = ctx offset | |
280 | add b32 $r14 $r4 0x800 | |
281 | mov b32 $r15 $r2 | |
282 | call #nv_wr32 // CC_SCRATCH[0] = chipset | |
283 | add b32 $r14 $r4 0x10c | |
284 | clear b32 $r15 | |
285 | call #nv_wr32 | |
286 | add b32 $r14 $r4 0x104 | |
287 | call #nv_wr32 // ENTRY | |
288 | add b32 $r14 $r4 0x100 | |
289 | mov $r15 2 // CTRL_START_TRIGGER | |
290 | call #nv_wr32 // CTRL | |
291 | ||
292 | // wait for it to complete, and adjust context size | |
293 | add b32 $r14 $r4 0x800 | |
294 | init_gpc_wait: | |
295 | call #nv_rd32 | |
296 | xbit $r15 $r15 31 | |
297 | bra e #init_gpc_wait | |
298 | add b32 $r14 $r4 0x804 | |
299 | call #nv_rd32 | |
300 | add b32 $r1 $r15 | |
301 | ||
302 | // next! | |
303 | add b32 $r4 0x8000 | |
304 | sub b32 $r3 1 | |
305 | bra ne #init_gpc | |
306 | ||
307 | // save context size, and tell host we're ready | |
308 | mov $r2 0x800 | |
309 | shl b32 $r2 6 | |
310 | iowr I[$r2 + 0x100] $r1 // CC_SCRATCH[1] = context size | |
311 | add b32 $r2 0x800 | |
312 | clear b32 $r1 | |
313 | bset $r1 31 | |
314 | iowr I[$r2 + 0x000] $r1 // CC_SCRATCH[0] |= 0x80000000 | |
315 | ||
316 | // Main program loop, very simple, sleeps until woken up by the interrupt | |
317 | // handler, pulls a command from the queue and executes its handler | |
318 | // | |
319 | main: | |
320 | // sleep until we have something to do | |
321 | bset $flags $p0 | |
322 | sleep $p0 | |
323 | mov $r13 #cmd_queue | |
324 | call #queue_get | |
325 | bra $p1 #main | |
326 | ||
327 | // context switch, requested by GPU? | |
328 | cmpu b32 $r14 0x4001 | |
329 | bra ne #main_not_ctx_switch | |
330 | trace_set(T_AUTO) | |
331 | mov $r1 0xb00 | |
332 | shl b32 $r1 6 | |
333 | iord $r2 I[$r1 + 0x100] // CHAN_NEXT | |
334 | iord $r1 I[$r1 + 0x000] // CHAN_CUR | |
335 | ||
336 | xbit $r3 $r1 31 | |
337 | bra e #chsw_no_prev | |
338 | xbit $r3 $r2 31 | |
339 | bra e #chsw_prev_no_next | |
340 | push $r2 | |
341 | mov b32 $r2 $r1 | |
342 | trace_set(T_SAVE) | |
343 | bclr $flags $p1 | |
344 | bset $flags $p2 | |
345 | call #ctx_xfer | |
346 | trace_clr(T_SAVE); | |
347 | pop $r2 | |
348 | trace_set(T_LOAD); | |
349 | bset $flags $p1 | |
350 | call #ctx_xfer | |
351 | trace_clr(T_LOAD); | |
352 | bra #chsw_done | |
353 | chsw_prev_no_next: | |
354 | push $r2 | |
355 | mov b32 $r2 $r1 | |
356 | bclr $flags $p1 | |
357 | bclr $flags $p2 | |
358 | call #ctx_xfer | |
359 | pop $r2 | |
360 | mov $r1 0xb00 | |
361 | shl b32 $r1 6 | |
362 | iowr I[$r1] $r2 | |
363 | bra #chsw_done | |
364 | chsw_no_prev: | |
365 | xbit $r3 $r2 31 | |
366 | bra e #chsw_done | |
367 | bset $flags $p1 | |
368 | bclr $flags $p2 | |
369 | call #ctx_xfer | |
370 | ||
371 | // ack the context switch request | |
372 | chsw_done: | |
373 | mov $r1 0xb0c | |
374 | shl b32 $r1 6 | |
375 | mov $r2 1 | |
376 | iowr I[$r1 + 0x000] $r2 // 0x409b0c | |
377 | trace_clr(T_AUTO) | |
378 | bra #main | |
379 | ||
380 | // request to set current channel? (*not* a context switch) | |
381 | main_not_ctx_switch: | |
382 | cmpu b32 $r14 0x0001 | |
383 | bra ne #main_not_ctx_chan | |
384 | mov b32 $r2 $r15 | |
385 | call #ctx_chan | |
386 | bra #main_done | |
387 | ||
388 | // request to store current channel context? | |
389 | main_not_ctx_chan: | |
390 | cmpu b32 $r14 0x0002 | |
391 | bra ne #main_not_ctx_save | |
392 | trace_set(T_SAVE) | |
393 | bclr $flags $p1 | |
394 | bclr $flags $p2 | |
395 | call #ctx_xfer | |
396 | trace_clr(T_SAVE) | |
397 | bra #main_done | |
398 | ||
399 | main_not_ctx_save: | |
400 | shl b32 $r15 $r14 16 | |
401 | or $r15 E_BAD_COMMAND | |
402 | call #error | |
403 | bra #main | |
404 | ||
405 | main_done: | |
406 | mov $r1 0x820 | |
407 | shl b32 $r1 6 | |
408 | clear b32 $r2 | |
409 | bset $r2 31 | |
410 | iowr I[$r1 + 0x000] $r2 // CC_SCRATCH[0] |= 0x80000000 | |
411 | bra #main | |
412 | ||
413 | // interrupt handler | |
414 | ih: | |
415 | push $r8 | |
416 | mov $r8 $flags | |
417 | push $r8 | |
418 | push $r9 | |
419 | push $r10 | |
420 | push $r11 | |
421 | push $r13 | |
422 | push $r14 | |
423 | push $r15 | |
424 | ||
425 | // incoming fifo command? | |
426 | iord $r10 I[$r0 + 0x200] // INTR | |
427 | and $r11 $r10 0x00000004 | |
428 | bra e #ih_no_fifo | |
429 | // queue incoming fifo command for later processing | |
430 | mov $r11 0x1900 | |
431 | mov $r13 #cmd_queue | |
432 | iord $r14 I[$r11 + 0x100] // FIFO_CMD | |
433 | iord $r15 I[$r11 + 0x000] // FIFO_DATA | |
434 | call #queue_put | |
435 | add b32 $r11 0x400 | |
436 | mov $r14 1 | |
437 | iowr I[$r11 + 0x000] $r14 // FIFO_ACK | |
438 | ||
439 | // context switch request? | |
440 | ih_no_fifo: | |
441 | and $r11 $r10 0x00000100 | |
442 | bra e #ih_no_ctxsw | |
443 | // enqueue a context switch for later processing | |
444 | mov $r13 #cmd_queue | |
445 | mov $r14 0x4001 | |
446 | call #queue_put | |
447 | ||
448 | // anything we didn't handle, bring it to the host's attention | |
449 | ih_no_ctxsw: | |
450 | mov $r11 0x104 | |
451 | not b32 $r11 | |
452 | and $r11 $r10 $r11 | |
453 | bra e #ih_no_other | |
454 | mov $r10 0xc1c | |
455 | shl b32 $r10 6 | |
456 | iowr I[$r10] $r11 // INTR_UP_SET | |
457 | ||
458 | // ack, and wake up main() | |
459 | ih_no_other: | |
460 | iowr I[$r0 + 0x100] $r10 // INTR_ACK | |
461 | ||
462 | pop $r15 | |
463 | pop $r14 | |
464 | pop $r13 | |
465 | pop $r11 | |
466 | pop $r10 | |
467 | pop $r9 | |
468 | pop $r8 | |
469 | mov $flags $r8 | |
470 | pop $r8 | |
471 | bclr $flags $p0 | |
472 | iret | |
473 | ||
1978a2f2 BS |
474 | // Again, not real sure |
475 | // | |
476 | // In: $r15 value to set 0x404170 to | |
477 | // | |
478 | ctx_4170s: | |
479 | mov $r14 0x4170 | |
480 | sethi $r14 0x400000 | |
481 | or $r15 0x10 | |
482 | call #nv_wr32 | |
483 | ret | |
484 | ||
485 | // Waits for a ctx_4170s() call to complete | |
486 | // | |
487 | ctx_4170w: | |
488 | mov $r14 0x4170 | |
489 | sethi $r14 0x400000 | |
490 | call #nv_rd32 | |
491 | and $r15 0x10 | |
492 | bra ne #ctx_4170w | |
493 | ret | |
494 | ||
495 | // Disables various things, waits a bit, and re-enables them.. | |
496 | // | |
497 | // Not sure how exactly this helps, perhaps "ENABLE" is not such a | |
498 | // good description for the bits we turn off? Anyways, without this, | |
499 | // funny things happen. | |
500 | // | |
501 | ctx_redswitch: | |
502 | mov $r14 0x614 | |
503 | shl b32 $r14 6 | |
504 | mov $r15 0x270 | |
505 | iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_GPC, POWER_ALL | |
506 | mov $r15 8 | |
507 | ctx_redswitch_delay: | |
508 | sub b32 $r15 1 | |
509 | bra ne #ctx_redswitch_delay | |
510 | mov $r15 0x770 | |
511 | iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_ALL, POWER_ALL | |
512 | ret | |
513 | ||
514 | // Not a clue what this is for, except that unless the value is 0x10, the | |
515 | // strand context is saved (and presumably restored) incorrectly.. | |
516 | // | |
517 | // In: $r15 value to set to (0x00/0x10 are used) | |
518 | // | |
519 | ctx_86c: | |
520 | mov $r14 0x86c | |
521 | shl b32 $r14 6 | |
522 | iowr I[$r14] $r15 // HUB(0x86c) = val | |
523 | mov $r14 -0x75ec | |
524 | sethi $r14 0x400000 | |
525 | call #nv_wr32 // ROP(0xa14) = val | |
526 | mov $r14 -0x5794 | |
527 | sethi $r14 0x410000 | |
528 | call #nv_wr32 // GPC(0x86c) = val | |
529 | ret | |
530 | ||
531 | // ctx_load - load's a channel's ctxctl data, and selects its vm | |
532 | // | |
533 | // In: $r2 channel address | |
534 | // | |
535 | ctx_load: | |
536 | trace_set(T_CHAN) | |
537 | ||
538 | // switch to channel, somewhat magic in parts.. | |
539 | mov $r10 12 // DONE_UNK12 | |
540 | call #wait_donez | |
541 | mov $r1 0xa24 | |
542 | shl b32 $r1 6 | |
543 | iowr I[$r1 + 0x000] $r0 // 0x409a24 | |
544 | mov $r3 0xb00 | |
545 | shl b32 $r3 6 | |
546 | iowr I[$r3 + 0x100] $r2 // CHAN_NEXT | |
547 | mov $r1 0xa0c | |
548 | shl b32 $r1 6 | |
549 | mov $r4 7 | |
550 | iowr I[$r1 + 0x000] $r2 // MEM_CHAN | |
551 | iowr I[$r1 + 0x100] $r4 // MEM_CMD | |
552 | ctx_chan_wait_0: | |
553 | iord $r4 I[$r1 + 0x100] | |
554 | and $r4 0x1f | |
555 | bra ne #ctx_chan_wait_0 | |
556 | iowr I[$r3 + 0x000] $r2 // CHAN_CUR | |
557 | ||
558 | // load channel header, fetch PGRAPH context pointer | |
559 | mov $xtargets $r0 | |
560 | bclr $r2 31 | |
561 | shl b32 $r2 4 | |
562 | add b32 $r2 2 | |
563 | ||
564 | trace_set(T_LCHAN) | |
565 | mov $r1 0xa04 | |
566 | shl b32 $r1 6 | |
567 | iowr I[$r1 + 0x000] $r2 // MEM_BASE | |
568 | mov $r1 0xa20 | |
569 | shl b32 $r1 6 | |
570 | mov $r2 0x0002 | |
571 | sethi $r2 0x80000000 | |
572 | iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vram | |
573 | mov $r1 0x10 // chan + 0x0210 | |
574 | mov $r2 #xfer_data | |
575 | sethi $r2 0x00020000 // 16 bytes | |
576 | xdld $r1 $r2 | |
577 | xdwait | |
578 | trace_clr(T_LCHAN) | |
579 | ||
580 | // update current context | |
581 | ld b32 $r1 D[$r0 + #xfer_data + 4] | |
582 | shl b32 $r1 24 | |
583 | ld b32 $r2 D[$r0 + #xfer_data + 0] | |
584 | shr b32 $r2 8 | |
585 | or $r1 $r2 | |
586 | st b32 D[$r0 + #ctx_current] $r1 | |
587 | ||
588 | // set transfer base to start of context, and fetch context header | |
589 | trace_set(T_LCTXH) | |
590 | mov $r2 0xa04 | |
591 | shl b32 $r2 6 | |
592 | iowr I[$r2 + 0x000] $r1 // MEM_BASE | |
593 | mov $r2 1 | |
594 | mov $r1 0xa20 | |
595 | shl b32 $r1 6 | |
596 | iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vm | |
597 | mov $r1 #chan_data | |
598 | sethi $r1 0x00060000 // 256 bytes | |
599 | xdld $r0 $r1 | |
600 | xdwait | |
601 | trace_clr(T_LCTXH) | |
602 | ||
603 | trace_clr(T_CHAN) | |
604 | ret | |
605 | ||
606 | // ctx_chan - handler for HUB_SET_CHAN command, will set a channel as | |
607 | // the active channel for ctxctl, but not actually transfer | |
608 | // any context data. intended for use only during initial | |
609 | // context construction. | |
610 | // | |
611 | // In: $r2 channel address | |
612 | // | |
613 | ctx_chan: | |
1978a2f2 BS |
614 | call #ctx_load |
615 | mov $r10 12 // DONE_UNK12 | |
616 | call #wait_donez | |
617 | mov $r1 0xa10 | |
618 | shl b32 $r1 6 | |
619 | mov $r2 5 | |
620 | iowr I[$r1 + 0x000] $r2 // MEM_CMD = 5 (???) | |
621 | ctx_chan_wait: | |
622 | iord $r2 I[$r1 + 0x000] | |
623 | or $r2 $r2 | |
624 | bra ne #ctx_chan_wait | |
1978a2f2 BS |
625 | ret |
626 | ||
627 | // Execute per-context state overrides list | |
628 | // | |
629 | // Only executed on the first load of a channel. Might want to look into | |
630 | // removing this and having the host directly modify the channel's context | |
631 | // to change this state... The nouveau DRM already builds this list as | |
632 | // it's definitely needed for NVIDIA's, so we may as well use it for now | |
633 | // | |
634 | // Input: $r1 mmio list length | |
635 | // | |
636 | ctx_mmio_exec: | |
637 | // set transfer base to be the mmio list | |
638 | ld b32 $r3 D[$r0 + #chan_mmio_address] | |
639 | mov $r2 0xa04 | |
640 | shl b32 $r2 6 | |
641 | iowr I[$r2 + 0x000] $r3 // MEM_BASE | |
642 | ||
643 | clear b32 $r3 | |
644 | ctx_mmio_loop: | |
645 | // fetch next 256 bytes of mmio list if necessary | |
646 | and $r4 $r3 0xff | |
647 | bra ne #ctx_mmio_pull | |
648 | mov $r5 #xfer_data | |
649 | sethi $r5 0x00060000 // 256 bytes | |
650 | xdld $r3 $r5 | |
651 | xdwait | |
652 | ||
653 | // execute a single list entry | |
654 | ctx_mmio_pull: | |
655 | ld b32 $r14 D[$r4 + #xfer_data + 0x00] | |
656 | ld b32 $r15 D[$r4 + #xfer_data + 0x04] | |
657 | call #nv_wr32 | |
658 | ||
659 | // next! | |
660 | add b32 $r3 8 | |
661 | sub b32 $r1 1 | |
662 | bra ne #ctx_mmio_loop | |
663 | ||
664 | // set transfer base back to the current context | |
665 | ctx_mmio_done: | |
666 | ld b32 $r3 D[$r0 + #ctx_current] | |
667 | iowr I[$r2 + 0x000] $r3 // MEM_BASE | |
668 | ||
669 | // disable the mmio list now, we don't need/want to execute it again | |
670 | st b32 D[$r0 + #chan_mmio_count] $r0 | |
671 | mov $r1 #chan_data | |
672 | sethi $r1 0x00060000 // 256 bytes | |
673 | xdst $r0 $r1 | |
674 | xdwait | |
675 | ret | |
676 | ||
677 | // Transfer HUB context data between GPU and storage area | |
678 | // | |
679 | // In: $r2 channel address | |
680 | // $p1 clear on save, set on load | |
681 | // $p2 set if opposite direction done/will be done, so: | |
682 | // on save it means: "a load will follow this save" | |
683 | // on load it means: "a save preceeded this load" | |
684 | // | |
685 | ctx_xfer: | |
eca15296 BS |
686 | // according to mwk, some kind of wait for idle |
687 | mov $r15 0xc00 | |
688 | shl b32 $r15 6 | |
689 | mov $r14 4 | |
690 | iowr I[$r15 + 0x200] $r14 | |
691 | ctx_xfer_idle: | |
692 | iord $r14 I[$r15 + 0x000] | |
693 | and $r14 0x2000 | |
694 | bra ne #ctx_xfer_idle | |
695 | ||
1978a2f2 BS |
696 | bra not $p1 #ctx_xfer_pre |
697 | bra $p2 #ctx_xfer_pre_load | |
698 | ctx_xfer_pre: | |
699 | mov $r15 0x10 | |
700 | call #ctx_86c | |
1978a2f2 BS |
701 | bra not $p1 #ctx_xfer_exec |
702 | ||
703 | ctx_xfer_pre_load: | |
704 | mov $r15 2 | |
705 | call #ctx_4170s | |
706 | call #ctx_4170w | |
707 | call #ctx_redswitch | |
708 | clear b32 $r15 | |
709 | call #ctx_4170s | |
710 | call #ctx_load | |
711 | ||
712 | // fetch context pointer, and initiate xfer on all GPCs | |
713 | ctx_xfer_exec: | |
714 | ld b32 $r1 D[$r0 + #ctx_current] | |
715 | mov $r2 0x414 | |
716 | shl b32 $r2 6 | |
717 | iowr I[$r2 + 0x000] $r0 // BAR_STATUS = reset | |
718 | mov $r14 -0x5b00 | |
719 | sethi $r14 0x410000 | |
720 | mov b32 $r15 $r1 | |
721 | call #nv_wr32 // GPC_BCAST_WRCMD_DATA = ctx pointer | |
722 | add b32 $r14 4 | |
723 | xbit $r15 $flags $p1 | |
724 | xbit $r2 $flags $p2 | |
725 | shl b32 $r2 1 | |
726 | or $r15 $r2 | |
727 | call #nv_wr32 // GPC_BCAST_WRCMD_CMD = GPC_XFER(type) | |
728 | ||
729 | // strands | |
730 | mov $r1 0x4afc | |
731 | sethi $r1 0x20000 | |
732 | mov $r2 0xc | |
733 | iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c | |
734 | call #strand_wait | |
735 | mov $r2 0x47fc | |
736 | sethi $r2 0x20000 | |
737 | iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00 | |
738 | xbit $r2 $flags $p1 | |
739 | add b32 $r2 3 | |
740 | iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD) | |
741 | ||
742 | // mmio context | |
743 | xbit $r10 $flags $p1 // direction | |
744 | or $r10 6 // first, last | |
745 | mov $r11 0 // base = 0 | |
746 | ld b32 $r12 D[$r0 + #hub_mmio_list_head] | |
747 | ld b32 $r13 D[$r0 + #hub_mmio_list_tail] | |
748 | mov $r14 0 // not multi | |
749 | call #mmctx_xfer | |
750 | ||
751 | // wait for GPCs to all complete | |
752 | mov $r10 8 // DONE_BAR | |
753 | call #wait_doneo | |
754 | ||
755 | // wait for strand xfer to complete | |
756 | call #strand_wait | |
757 | ||
758 | // post-op | |
759 | bra $p1 #ctx_xfer_post | |
760 | mov $r10 12 // DONE_UNK12 | |
761 | call #wait_donez | |
762 | mov $r1 0xa10 | |
763 | shl b32 $r1 6 | |
764 | mov $r2 5 | |
765 | iowr I[$r1] $r2 // MEM_CMD | |
766 | ctx_xfer_post_save_wait: | |
767 | iord $r2 I[$r1] | |
768 | or $r2 $r2 | |
769 | bra ne #ctx_xfer_post_save_wait | |
770 | ||
771 | bra $p2 #ctx_xfer_done | |
772 | ctx_xfer_post: | |
773 | mov $r15 2 | |
774 | call #ctx_4170s | |
775 | clear b32 $r15 | |
776 | call #ctx_86c | |
777 | call #strand_post | |
778 | call #ctx_4170w | |
779 | clear b32 $r15 | |
780 | call #ctx_4170s | |
781 | ||
782 | bra not $p1 #ctx_xfer_no_post_mmio | |
783 | ld b32 $r1 D[$r0 + #chan_mmio_count] | |
784 | or $r1 $r1 | |
785 | bra e #ctx_xfer_no_post_mmio | |
786 | call #ctx_mmio_exec | |
787 | ||
788 | ctx_xfer_no_post_mmio: | |
1978a2f2 BS |
789 | |
790 | ctx_xfer_done: | |
791 | ret | |
792 | ||
793 | .align 256 |