Merge branch 'timer/cleanup' into late/mvebu2
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / drivers / gpu / drm / nouveau / core / engine / graph / fuc / hubnve0.fuc
CommitLineData
1978a2f2
BS
1/* fuc microcode for nve0 PGRAPH/HUB
2 *
3 * Copyright 2011 Red Hat Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors: Ben Skeggs
24 */
25
26/* To build:
27 * m4 nve0_grhub.fuc | envyas -a -w -m fuc -V nva3 -o nve0_grhub.fuc.h
28 */
29
30.section #nve0_grhub_data
31include(`nve0.fuc')
32gpc_count: .b32 0
33rop_count: .b32 0
34cmd_queue: queue_init
35hub_mmio_list_head: .b32 0
36hub_mmio_list_tail: .b32 0
37
38ctx_current: .b32 0
39
40chipsets:
41.b8 0xe4 0 0 0
42.b16 #nve4_hub_mmio_head
43.b16 #nve4_hub_mmio_tail
44.b8 0xe7 0 0 0
45.b16 #nve4_hub_mmio_head
46.b16 #nve4_hub_mmio_tail
eca15296
BS
47.b8 0xe6 0 0 0
48.b16 #nve4_hub_mmio_head
49.b16 #nve4_hub_mmio_tail
1978a2f2
BS
50.b8 0 0 0 0
51
52nve4_hub_mmio_head:
53mmctx_data(0x17e91c, 2)
54mmctx_data(0x400204, 2)
55mmctx_data(0x404010, 7)
56mmctx_data(0x4040a8, 9)
57mmctx_data(0x4040d0, 7)
58mmctx_data(0x4040f8, 1)
59mmctx_data(0x404130, 3)
60mmctx_data(0x404150, 3)
61mmctx_data(0x404164, 1)
62mmctx_data(0x4041a0, 4)
63mmctx_data(0x404200, 4)
64mmctx_data(0x404404, 14)
65mmctx_data(0x404460, 4)
66mmctx_data(0x404480, 1)
67mmctx_data(0x404498, 1)
68mmctx_data(0x404604, 4)
69mmctx_data(0x404618, 4)
70mmctx_data(0x40462c, 2)
71mmctx_data(0x404640, 1)
72mmctx_data(0x404654, 1)
73mmctx_data(0x404660, 1)
74mmctx_data(0x404678, 19)
75mmctx_data(0x4046c8, 3)
76mmctx_data(0x404700, 3)
77mmctx_data(0x404718, 10)
78mmctx_data(0x404744, 2)
79mmctx_data(0x404754, 1)
80mmctx_data(0x405800, 1)
81mmctx_data(0x405830, 3)
82mmctx_data(0x405854, 1)
83mmctx_data(0x405870, 4)
84mmctx_data(0x405a00, 2)
85mmctx_data(0x405a18, 1)
86mmctx_data(0x405b00, 1)
87mmctx_data(0x405b10, 1)
88mmctx_data(0x406020, 1)
89mmctx_data(0x406028, 4)
90mmctx_data(0x4064a8, 2)
91mmctx_data(0x4064b4, 2)
92mmctx_data(0x4064c0, 12)
93mmctx_data(0x4064fc, 1)
94mmctx_data(0x407040, 1)
95mmctx_data(0x407804, 1)
96mmctx_data(0x40780c, 6)
97mmctx_data(0x4078bc, 1)
98mmctx_data(0x408000, 7)
99mmctx_data(0x408064, 1)
100mmctx_data(0x408800, 3)
101mmctx_data(0x408840, 1)
102mmctx_data(0x408900, 3)
103mmctx_data(0x408980, 1)
104nve4_hub_mmio_tail:
105
106.align 256
107chan_data:
108chan_mmio_count: .b32 0
109chan_mmio_address: .b32 0
110
111.align 256
112xfer_data: .b32 0
113
114.section #nve0_grhub_code
115bra #init
116define(`include_code')
117include(`nve0.fuc')
118
119// reports an exception to the host
120//
121// In: $r15 error code (see nve0.fuc)
122//
123error:
124 push $r14
125 mov $r14 0x814
126 shl b32 $r14 6
127 iowr I[$r14 + 0x000] $r15 // CC_SCRATCH[5] = error code
128 mov $r14 0xc1c
129 shl b32 $r14 6
130 mov $r15 1
131 iowr I[$r14 + 0x000] $r15 // INTR_UP_SET
132 pop $r14
133 ret
134
135// HUB fuc initialisation, executed by triggering ucode start, will
136// fall through to main loop after completion.
137//
138// Input:
139// CC_SCRATCH[0]: chipset (PMC_BOOT_0 read returns 0x0bad0bad... sigh)
140//
141// Output:
142// CC_SCRATCH[0]:
143// 31:31: set to signal completion
144// CC_SCRATCH[1]:
145// 31:0: total PGRAPH context size
146//
147init:
148 clear b32 $r0
149 mov $sp $r0
150 mov $xdbase $r0
151
152 // enable fifo access
153 mov $r1 0x1200
154 mov $r2 2
155 iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE
156
157 // setup i0 handler, and route all interrupts to it
158 mov $r1 #ih
159 mov $iv0 $r1
160 mov $r1 0x400
161 iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH
162
163 // route HUB_CHANNEL_SWITCH to fuc interrupt 8
164 mov $r3 0x404
165 shl b32 $r3 6
166 mov $r2 0x2003 // { HUB_CHANNEL_SWITCH, ZERO } -> intr 8
167 iowr I[$r3 + 0x000] $r2
168
169 // not sure what these are, route them because NVIDIA does, and
170 // the IRQ handler will signal the host if we ever get one.. we
171 // may find out if/why we need to handle these if so..
172 //
173 mov $r2 0x2004
174 iowr I[$r3 + 0x004] $r2 // { 0x04, ZERO } -> intr 9
175 mov $r2 0x200b
176 iowr I[$r3 + 0x008] $r2 // { 0x0b, ZERO } -> intr 10
177 mov $r2 0x200c
178 iowr I[$r3 + 0x01c] $r2 // { 0x0c, ZERO } -> intr 15
179
180 // enable all INTR_UP interrupts
181 mov $r2 0xc24
182 shl b32 $r2 6
183 not b32 $r3 $r0
184 iowr I[$r2] $r3
185
186 // enable fifo, ctxsw, 9, 10, 15 interrupts
187 mov $r2 -0x78fc // 0x8704
188 sethi $r2 0
189 iowr I[$r1 + 0x000] $r2 // INTR_EN_SET
190
191 // fifo level triggered, rest edge
192 sub b32 $r1 0x100
193 mov $r2 4
194 iowr I[$r1] $r2
195
196 // enable interrupts
197 bset $flags ie0
198
199 // fetch enabled GPC/ROP counts
200 mov $r14 -0x69fc // 0x409604
201 sethi $r14 0x400000
202 call #nv_rd32
203 extr $r1 $r15 16:20
204 st b32 D[$r0 + #rop_count] $r1
205 and $r15 0x1f
206 st b32 D[$r0 + #gpc_count] $r15
207
208 // set BAR_REQMASK to GPC mask
209 mov $r1 1
210 shl b32 $r1 $r15
211 sub b32 $r1 1
212 mov $r2 0x40c
213 shl b32 $r2 6
214 iowr I[$r2 + 0x000] $r1
215 iowr I[$r2 + 0x100] $r1
216
217 // find context data for this chipset
218 mov $r2 0x800
219 shl b32 $r2 6
220 iord $r2 I[$r2 + 0x000] // CC_SCRATCH[0]
221 mov $r15 #chipsets - 8
222 init_find_chipset:
223 add b32 $r15 8
224 ld b32 $r3 D[$r15 + 0x00]
225 cmpu b32 $r3 $r2
226 bra e #init_context
227 cmpu b32 $r3 0
228 bra ne #init_find_chipset
229 // unknown chipset
230 ret
231
232 // context size calculation, reserve first 256 bytes for use by fuc
233 init_context:
234 mov $r1 256
235
236 // calculate size of mmio context data
237 ld b16 $r14 D[$r15 + 4]
238 ld b16 $r15 D[$r15 + 6]
239 sethi $r14 0
240 st b32 D[$r0 + #hub_mmio_list_head] $r14
241 st b32 D[$r0 + #hub_mmio_list_tail] $r15
242 call #mmctx_size
243
244 // set mmctx base addresses now so we don't have to do it later,
245 // they don't (currently) ever change
246 mov $r3 0x700
247 shl b32 $r3 6
248 shr b32 $r4 $r1 8
249 iowr I[$r3 + 0x000] $r4 // MMCTX_SAVE_SWBASE
250 iowr I[$r3 + 0x100] $r4 // MMCTX_LOAD_SWBASE
251 add b32 $r3 0x1300
252 add b32 $r1 $r15
253 shr b32 $r15 2
254 iowr I[$r3 + 0x000] $r15 // MMCTX_LOAD_COUNT, wtf for?!?
255
256 // strands, base offset needs to be aligned to 256 bytes
257 shr b32 $r1 8
258 add b32 $r1 1
259 shl b32 $r1 8
260 mov b32 $r15 $r1
261 call #strand_ctx_init
262 add b32 $r1 $r15
263
264 // initialise each GPC in sequence by passing in the offset of its
265 // context data in GPCn_CC_SCRATCH[1], and starting its FUC (which
266 // has previously been uploaded by the host) running.
267 //
268 // the GPC fuc init sequence will set GPCn_CC_SCRATCH[0] bit 31
269 // when it has completed, and return the size of its context data
270 // in GPCn_CC_SCRATCH[1]
271 //
272 ld b32 $r3 D[$r0 + #gpc_count]
273 mov $r4 0x2000
274 sethi $r4 0x500000
275 init_gpc:
276 // setup, and start GPC ucode running
277 add b32 $r14 $r4 0x804
278 mov b32 $r15 $r1
279 call #nv_wr32 // CC_SCRATCH[1] = ctx offset
280 add b32 $r14 $r4 0x800
281 mov b32 $r15 $r2
282 call #nv_wr32 // CC_SCRATCH[0] = chipset
283 add b32 $r14 $r4 0x10c
284 clear b32 $r15
285 call #nv_wr32
286 add b32 $r14 $r4 0x104
287 call #nv_wr32 // ENTRY
288 add b32 $r14 $r4 0x100
289 mov $r15 2 // CTRL_START_TRIGGER
290 call #nv_wr32 // CTRL
291
292 // wait for it to complete, and adjust context size
293 add b32 $r14 $r4 0x800
294 init_gpc_wait:
295 call #nv_rd32
296 xbit $r15 $r15 31
297 bra e #init_gpc_wait
298 add b32 $r14 $r4 0x804
299 call #nv_rd32
300 add b32 $r1 $r15
301
302 // next!
303 add b32 $r4 0x8000
304 sub b32 $r3 1
305 bra ne #init_gpc
306
307 // save context size, and tell host we're ready
308 mov $r2 0x800
309 shl b32 $r2 6
310 iowr I[$r2 + 0x100] $r1 // CC_SCRATCH[1] = context size
311 add b32 $r2 0x800
312 clear b32 $r1
313 bset $r1 31
314 iowr I[$r2 + 0x000] $r1 // CC_SCRATCH[0] |= 0x80000000
315
316// Main program loop, very simple, sleeps until woken up by the interrupt
317// handler, pulls a command from the queue and executes its handler
318//
319main:
320 // sleep until we have something to do
321 bset $flags $p0
322 sleep $p0
323 mov $r13 #cmd_queue
324 call #queue_get
325 bra $p1 #main
326
327 // context switch, requested by GPU?
328 cmpu b32 $r14 0x4001
329 bra ne #main_not_ctx_switch
330 trace_set(T_AUTO)
331 mov $r1 0xb00
332 shl b32 $r1 6
333 iord $r2 I[$r1 + 0x100] // CHAN_NEXT
334 iord $r1 I[$r1 + 0x000] // CHAN_CUR
335
336 xbit $r3 $r1 31
337 bra e #chsw_no_prev
338 xbit $r3 $r2 31
339 bra e #chsw_prev_no_next
340 push $r2
341 mov b32 $r2 $r1
342 trace_set(T_SAVE)
343 bclr $flags $p1
344 bset $flags $p2
345 call #ctx_xfer
346 trace_clr(T_SAVE);
347 pop $r2
348 trace_set(T_LOAD);
349 bset $flags $p1
350 call #ctx_xfer
351 trace_clr(T_LOAD);
352 bra #chsw_done
353 chsw_prev_no_next:
354 push $r2
355 mov b32 $r2 $r1
356 bclr $flags $p1
357 bclr $flags $p2
358 call #ctx_xfer
359 pop $r2
360 mov $r1 0xb00
361 shl b32 $r1 6
362 iowr I[$r1] $r2
363 bra #chsw_done
364 chsw_no_prev:
365 xbit $r3 $r2 31
366 bra e #chsw_done
367 bset $flags $p1
368 bclr $flags $p2
369 call #ctx_xfer
370
371 // ack the context switch request
372 chsw_done:
373 mov $r1 0xb0c
374 shl b32 $r1 6
375 mov $r2 1
376 iowr I[$r1 + 0x000] $r2 // 0x409b0c
377 trace_clr(T_AUTO)
378 bra #main
379
380 // request to set current channel? (*not* a context switch)
381 main_not_ctx_switch:
382 cmpu b32 $r14 0x0001
383 bra ne #main_not_ctx_chan
384 mov b32 $r2 $r15
385 call #ctx_chan
386 bra #main_done
387
388 // request to store current channel context?
389 main_not_ctx_chan:
390 cmpu b32 $r14 0x0002
391 bra ne #main_not_ctx_save
392 trace_set(T_SAVE)
393 bclr $flags $p1
394 bclr $flags $p2
395 call #ctx_xfer
396 trace_clr(T_SAVE)
397 bra #main_done
398
399 main_not_ctx_save:
400 shl b32 $r15 $r14 16
401 or $r15 E_BAD_COMMAND
402 call #error
403 bra #main
404
405 main_done:
406 mov $r1 0x820
407 shl b32 $r1 6
408 clear b32 $r2
409 bset $r2 31
410 iowr I[$r1 + 0x000] $r2 // CC_SCRATCH[0] |= 0x80000000
411 bra #main
412
413// interrupt handler
414ih:
415 push $r8
416 mov $r8 $flags
417 push $r8
418 push $r9
419 push $r10
420 push $r11
421 push $r13
422 push $r14
423 push $r15
424
425 // incoming fifo command?
426 iord $r10 I[$r0 + 0x200] // INTR
427 and $r11 $r10 0x00000004
428 bra e #ih_no_fifo
429 // queue incoming fifo command for later processing
430 mov $r11 0x1900
431 mov $r13 #cmd_queue
432 iord $r14 I[$r11 + 0x100] // FIFO_CMD
433 iord $r15 I[$r11 + 0x000] // FIFO_DATA
434 call #queue_put
435 add b32 $r11 0x400
436 mov $r14 1
437 iowr I[$r11 + 0x000] $r14 // FIFO_ACK
438
439 // context switch request?
440 ih_no_fifo:
441 and $r11 $r10 0x00000100
442 bra e #ih_no_ctxsw
443 // enqueue a context switch for later processing
444 mov $r13 #cmd_queue
445 mov $r14 0x4001
446 call #queue_put
447
448 // anything we didn't handle, bring it to the host's attention
449 ih_no_ctxsw:
450 mov $r11 0x104
451 not b32 $r11
452 and $r11 $r10 $r11
453 bra e #ih_no_other
454 mov $r10 0xc1c
455 shl b32 $r10 6
456 iowr I[$r10] $r11 // INTR_UP_SET
457
458 // ack, and wake up main()
459 ih_no_other:
460 iowr I[$r0 + 0x100] $r10 // INTR_ACK
461
462 pop $r15
463 pop $r14
464 pop $r13
465 pop $r11
466 pop $r10
467 pop $r9
468 pop $r8
469 mov $flags $r8
470 pop $r8
471 bclr $flags $p0
472 iret
473
1978a2f2
BS
474// Again, not real sure
475//
476// In: $r15 value to set 0x404170 to
477//
478ctx_4170s:
479 mov $r14 0x4170
480 sethi $r14 0x400000
481 or $r15 0x10
482 call #nv_wr32
483 ret
484
485// Waits for a ctx_4170s() call to complete
486//
487ctx_4170w:
488 mov $r14 0x4170
489 sethi $r14 0x400000
490 call #nv_rd32
491 and $r15 0x10
492 bra ne #ctx_4170w
493 ret
494
495// Disables various things, waits a bit, and re-enables them..
496//
497// Not sure how exactly this helps, perhaps "ENABLE" is not such a
498// good description for the bits we turn off? Anyways, without this,
499// funny things happen.
500//
501ctx_redswitch:
502 mov $r14 0x614
503 shl b32 $r14 6
504 mov $r15 0x270
505 iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_GPC, POWER_ALL
506 mov $r15 8
507 ctx_redswitch_delay:
508 sub b32 $r15 1
509 bra ne #ctx_redswitch_delay
510 mov $r15 0x770
511 iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_ALL, POWER_ALL
512 ret
513
514// Not a clue what this is for, except that unless the value is 0x10, the
515// strand context is saved (and presumably restored) incorrectly..
516//
517// In: $r15 value to set to (0x00/0x10 are used)
518//
519ctx_86c:
520 mov $r14 0x86c
521 shl b32 $r14 6
522 iowr I[$r14] $r15 // HUB(0x86c) = val
523 mov $r14 -0x75ec
524 sethi $r14 0x400000
525 call #nv_wr32 // ROP(0xa14) = val
526 mov $r14 -0x5794
527 sethi $r14 0x410000
528 call #nv_wr32 // GPC(0x86c) = val
529 ret
530
531// ctx_load - load's a channel's ctxctl data, and selects its vm
532//
533// In: $r2 channel address
534//
535ctx_load:
536 trace_set(T_CHAN)
537
538 // switch to channel, somewhat magic in parts..
539 mov $r10 12 // DONE_UNK12
540 call #wait_donez
541 mov $r1 0xa24
542 shl b32 $r1 6
543 iowr I[$r1 + 0x000] $r0 // 0x409a24
544 mov $r3 0xb00
545 shl b32 $r3 6
546 iowr I[$r3 + 0x100] $r2 // CHAN_NEXT
547 mov $r1 0xa0c
548 shl b32 $r1 6
549 mov $r4 7
550 iowr I[$r1 + 0x000] $r2 // MEM_CHAN
551 iowr I[$r1 + 0x100] $r4 // MEM_CMD
552 ctx_chan_wait_0:
553 iord $r4 I[$r1 + 0x100]
554 and $r4 0x1f
555 bra ne #ctx_chan_wait_0
556 iowr I[$r3 + 0x000] $r2 // CHAN_CUR
557
558 // load channel header, fetch PGRAPH context pointer
559 mov $xtargets $r0
560 bclr $r2 31
561 shl b32 $r2 4
562 add b32 $r2 2
563
564 trace_set(T_LCHAN)
565 mov $r1 0xa04
566 shl b32 $r1 6
567 iowr I[$r1 + 0x000] $r2 // MEM_BASE
568 mov $r1 0xa20
569 shl b32 $r1 6
570 mov $r2 0x0002
571 sethi $r2 0x80000000
572 iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vram
573 mov $r1 0x10 // chan + 0x0210
574 mov $r2 #xfer_data
575 sethi $r2 0x00020000 // 16 bytes
576 xdld $r1 $r2
577 xdwait
578 trace_clr(T_LCHAN)
579
580 // update current context
581 ld b32 $r1 D[$r0 + #xfer_data + 4]
582 shl b32 $r1 24
583 ld b32 $r2 D[$r0 + #xfer_data + 0]
584 shr b32 $r2 8
585 or $r1 $r2
586 st b32 D[$r0 + #ctx_current] $r1
587
588 // set transfer base to start of context, and fetch context header
589 trace_set(T_LCTXH)
590 mov $r2 0xa04
591 shl b32 $r2 6
592 iowr I[$r2 + 0x000] $r1 // MEM_BASE
593 mov $r2 1
594 mov $r1 0xa20
595 shl b32 $r1 6
596 iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vm
597 mov $r1 #chan_data
598 sethi $r1 0x00060000 // 256 bytes
599 xdld $r0 $r1
600 xdwait
601 trace_clr(T_LCTXH)
602
603 trace_clr(T_CHAN)
604 ret
605
606// ctx_chan - handler for HUB_SET_CHAN command, will set a channel as
607// the active channel for ctxctl, but not actually transfer
608// any context data. intended for use only during initial
609// context construction.
610//
611// In: $r2 channel address
612//
613ctx_chan:
1978a2f2
BS
614 call #ctx_load
615 mov $r10 12 // DONE_UNK12
616 call #wait_donez
617 mov $r1 0xa10
618 shl b32 $r1 6
619 mov $r2 5
620 iowr I[$r1 + 0x000] $r2 // MEM_CMD = 5 (???)
621 ctx_chan_wait:
622 iord $r2 I[$r1 + 0x000]
623 or $r2 $r2
624 bra ne #ctx_chan_wait
1978a2f2
BS
625 ret
626
627// Execute per-context state overrides list
628//
629// Only executed on the first load of a channel. Might want to look into
630// removing this and having the host directly modify the channel's context
631// to change this state... The nouveau DRM already builds this list as
632// it's definitely needed for NVIDIA's, so we may as well use it for now
633//
634// Input: $r1 mmio list length
635//
636ctx_mmio_exec:
637 // set transfer base to be the mmio list
638 ld b32 $r3 D[$r0 + #chan_mmio_address]
639 mov $r2 0xa04
640 shl b32 $r2 6
641 iowr I[$r2 + 0x000] $r3 // MEM_BASE
642
643 clear b32 $r3
644 ctx_mmio_loop:
645 // fetch next 256 bytes of mmio list if necessary
646 and $r4 $r3 0xff
647 bra ne #ctx_mmio_pull
648 mov $r5 #xfer_data
649 sethi $r5 0x00060000 // 256 bytes
650 xdld $r3 $r5
651 xdwait
652
653 // execute a single list entry
654 ctx_mmio_pull:
655 ld b32 $r14 D[$r4 + #xfer_data + 0x00]
656 ld b32 $r15 D[$r4 + #xfer_data + 0x04]
657 call #nv_wr32
658
659 // next!
660 add b32 $r3 8
661 sub b32 $r1 1
662 bra ne #ctx_mmio_loop
663
664 // set transfer base back to the current context
665 ctx_mmio_done:
666 ld b32 $r3 D[$r0 + #ctx_current]
667 iowr I[$r2 + 0x000] $r3 // MEM_BASE
668
669 // disable the mmio list now, we don't need/want to execute it again
670 st b32 D[$r0 + #chan_mmio_count] $r0
671 mov $r1 #chan_data
672 sethi $r1 0x00060000 // 256 bytes
673 xdst $r0 $r1
674 xdwait
675 ret
676
677// Transfer HUB context data between GPU and storage area
678//
679// In: $r2 channel address
680// $p1 clear on save, set on load
681// $p2 set if opposite direction done/will be done, so:
682// on save it means: "a load will follow this save"
683// on load it means: "a save preceeded this load"
684//
685ctx_xfer:
eca15296
BS
686 // according to mwk, some kind of wait for idle
687 mov $r15 0xc00
688 shl b32 $r15 6
689 mov $r14 4
690 iowr I[$r15 + 0x200] $r14
691 ctx_xfer_idle:
692 iord $r14 I[$r15 + 0x000]
693 and $r14 0x2000
694 bra ne #ctx_xfer_idle
695
1978a2f2
BS
696 bra not $p1 #ctx_xfer_pre
697 bra $p2 #ctx_xfer_pre_load
698 ctx_xfer_pre:
699 mov $r15 0x10
700 call #ctx_86c
1978a2f2
BS
701 bra not $p1 #ctx_xfer_exec
702
703 ctx_xfer_pre_load:
704 mov $r15 2
705 call #ctx_4170s
706 call #ctx_4170w
707 call #ctx_redswitch
708 clear b32 $r15
709 call #ctx_4170s
710 call #ctx_load
711
712 // fetch context pointer, and initiate xfer on all GPCs
713 ctx_xfer_exec:
714 ld b32 $r1 D[$r0 + #ctx_current]
715 mov $r2 0x414
716 shl b32 $r2 6
717 iowr I[$r2 + 0x000] $r0 // BAR_STATUS = reset
718 mov $r14 -0x5b00
719 sethi $r14 0x410000
720 mov b32 $r15 $r1
721 call #nv_wr32 // GPC_BCAST_WRCMD_DATA = ctx pointer
722 add b32 $r14 4
723 xbit $r15 $flags $p1
724 xbit $r2 $flags $p2
725 shl b32 $r2 1
726 or $r15 $r2
727 call #nv_wr32 // GPC_BCAST_WRCMD_CMD = GPC_XFER(type)
728
729 // strands
730 mov $r1 0x4afc
731 sethi $r1 0x20000
732 mov $r2 0xc
733 iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c
734 call #strand_wait
735 mov $r2 0x47fc
736 sethi $r2 0x20000
737 iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00
738 xbit $r2 $flags $p1
739 add b32 $r2 3
740 iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD)
741
742 // mmio context
743 xbit $r10 $flags $p1 // direction
744 or $r10 6 // first, last
745 mov $r11 0 // base = 0
746 ld b32 $r12 D[$r0 + #hub_mmio_list_head]
747 ld b32 $r13 D[$r0 + #hub_mmio_list_tail]
748 mov $r14 0 // not multi
749 call #mmctx_xfer
750
751 // wait for GPCs to all complete
752 mov $r10 8 // DONE_BAR
753 call #wait_doneo
754
755 // wait for strand xfer to complete
756 call #strand_wait
757
758 // post-op
759 bra $p1 #ctx_xfer_post
760 mov $r10 12 // DONE_UNK12
761 call #wait_donez
762 mov $r1 0xa10
763 shl b32 $r1 6
764 mov $r2 5
765 iowr I[$r1] $r2 // MEM_CMD
766 ctx_xfer_post_save_wait:
767 iord $r2 I[$r1]
768 or $r2 $r2
769 bra ne #ctx_xfer_post_save_wait
770
771 bra $p2 #ctx_xfer_done
772 ctx_xfer_post:
773 mov $r15 2
774 call #ctx_4170s
775 clear b32 $r15
776 call #ctx_86c
777 call #strand_post
778 call #ctx_4170w
779 clear b32 $r15
780 call #ctx_4170s
781
782 bra not $p1 #ctx_xfer_no_post_mmio
783 ld b32 $r1 D[$r0 + #chan_mmio_count]
784 or $r1 $r1
785 bra e #ctx_xfer_no_post_mmio
786 call #ctx_mmio_exec
787
788 ctx_xfer_no_post_mmio:
1978a2f2
BS
789
790 ctx_xfer_done:
791 ret
792
793.align 256