/* AltiVec Registers (VPRs) */
-#define vr0 0
-#define vr1 1
-#define vr2 2
-#define vr3 3
-#define vr4 4
-#define vr5 5
-#define vr6 6
-#define vr7 7
-#define vr8 8
-#define vr9 9
-#define vr10 10
-#define vr11 11
-#define vr12 12
-#define vr13 13
-#define vr14 14
-#define vr15 15
-#define vr16 16
-#define vr17 17
-#define vr18 18
-#define vr19 19
-#define vr20 20
-#define vr21 21
-#define vr22 22
-#define vr23 23
-#define vr24 24
-#define vr25 25
-#define vr26 26
-#define vr27 27
-#define vr28 28
-#define vr29 29
-#define vr30 30
-#define vr31 31
+#define v0 0
+#define v1 1
+#define v2 2
+#define v3 3
+#define v4 4
+#define v5 5
+#define v6 6
+#define v7 7
+#define v8 8
+#define v9 9
+#define v10 10
+#define v11 11
+#define v12 12
+#define v13 13
+#define v14 14
+#define v15 15
+#define v16 16
+#define v17 17
+#define v18 18
+#define v19 19
+#define v20 20
+#define v21 21
+#define v22 22
+#define v23 23
+#define v24 24
+#define v25 25
+#define v26 26
+#define v27 27
+#define v28 28
+#define v29 29
+#define v30 30
+#define v31 31
/* VSX Registers (VSRs) */
#endif /* __powerpc64__ */
/*
- * Get/set all the altivec registers vr0..vr31, vscr, vrsave, in one go.
+ * Get/set all the altivec registers v0..v31, vscr, vrsave, in one go.
* The transfer totals 34 quadword. Quadwords 0-31 contain the
* corresponding vector registers. Quadword 32 contains the vscr as the
* last word (offset 12) within that quadword. Quadword 33 contains the
addi r7, r3, THREAD_TRANSACT_VRSTATE
SAVE_32VRS(0, r6, r7) /* r6 scratch, r7 transact vr state */
- mfvscr vr0
+ mfvscr v0
li r6, VRSTATE_VSCR
- stvx vr0, r7, r6
+ stvx v0, r7, r6
dont_backup_vec:
mfspr r0, SPRN_VRSAVE
std r0, THREAD_TRANSACT_VRSAVE(r3)
addi r8, r3, THREAD_VRSTATE
li r5, VRSTATE_VSCR
- lvx vr0, r8, r5
- mtvscr vr0
+ lvx v0, r8, r5
+ mtvscr v0
REST_32VRS(0, r5, r8) /* r5 scratch, r8 ptr */
dont_restore_vec:
ld r5, THREAD_VRSAVE(r3)
stw r4,THREAD_USED_VR(r3)
li r10,THREAD_TRANSACT_VRSTATE+VRSTATE_VSCR
- lvx vr0,r10,r3
- mtvscr vr0
+ lvx v0,r10,r3
+ mtvscr v0
addi r10,r3,THREAD_TRANSACT_VRSTATE
REST_32VRS(0,r4,r10)
*/
_GLOBAL(load_vr_state)
li r4,VRSTATE_VSCR
- lvx vr0,r4,r3
- mtvscr vr0
+ lvx v0,r4,r3
+ mtvscr v0
REST_32VRS(0,r4,r3)
blr
*/
_GLOBAL(store_vr_state)
SAVE_32VRS(0, r4, r3)
- mfvscr vr0
+ mfvscr v0
li r4, VRSTATE_VSCR
- stvx vr0, r4, r3
+ stvx v0, r4, r3
blr
/*
addi r4,r4,THREAD
addi r6,r4,THREAD_VRSTATE
SAVE_32VRS(0,r5,r6)
- mfvscr vr0
+ mfvscr v0
li r10,VRSTATE_VSCR
- stvx vr0,r10,r6
+ stvx v0,r10,r6
/* Disable VMX for last_task_used_altivec */
PPC_LL r5,PT_REGS(r4)
toreal(r5)
li r4,1
li r10,VRSTATE_VSCR
stw r4,THREAD_USED_VR(r5)
- lvx vr0,r10,r6
- mtvscr vr0
+ lvx v0,r10,r6
+ mtvscr v0
REST_32VRS(0,r4,r6)
#ifndef CONFIG_SMP
/* Update last_task_used_altivec to 'current' */
addi r7,r3,THREAD_VRSTATE
2: PPC_LCMPI 0,r5,0
SAVE_32VRS(0,r4,r7)
- mfvscr vr0
+ mfvscr v0
li r4,VRSTATE_VSCR
- stvx vr0,r4,r7
+ stvx v0,r4,r7
beq 1f
PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
#ifdef CONFIG_VSX
li r12,112
.align 5
-1: lvx vr7,r0,r4
- lvx vr6,r4,r6
- lvx vr5,r4,r7
- lvx vr4,r4,r8
- lvx vr3,r4,r9
- lvx vr2,r4,r10
- lvx vr1,r4,r11
- lvx vr0,r4,r12
+1: lvx v7,r0,r4
+ lvx v6,r4,r6
+ lvx v5,r4,r7
+ lvx v4,r4,r8
+ lvx v3,r4,r9
+ lvx v2,r4,r10
+ lvx v1,r4,r11
+ lvx v0,r4,r12
addi r4,r4,128
- stvx vr7,r0,r3
- stvx vr6,r3,r6
- stvx vr5,r3,r7
- stvx vr4,r3,r8
- stvx vr3,r3,r9
- stvx vr2,r3,r10
- stvx vr1,r3,r11
- stvx vr0,r3,r12
+ stvx v7,r0,r3
+ stvx v6,r3,r6
+ stvx v5,r3,r7
+ stvx v4,r3,r8
+ stvx v3,r3,r9
+ stvx v2,r3,r10
+ stvx v1,r3,r11
+ stvx v0,r3,r12
addi r3,r3,128
bdnz 1b
li r11,48
bf cr7*4+3,5f
-err3; lvx vr1,r0,r4
+err3; lvx v1,r0,r4
addi r4,r4,16
-err3; stvx vr1,r0,r3
+err3; stvx v1,r0,r3
addi r3,r3,16
5: bf cr7*4+2,6f
-err3; lvx vr1,r0,r4
-err3; lvx vr0,r4,r9
+err3; lvx v1,r0,r4
+err3; lvx v0,r4,r9
addi r4,r4,32
-err3; stvx vr1,r0,r3
-err3; stvx vr0,r3,r9
+err3; stvx v1,r0,r3
+err3; stvx v0,r3,r9
addi r3,r3,32
6: bf cr7*4+1,7f
-err3; lvx vr3,r0,r4
-err3; lvx vr2,r4,r9
-err3; lvx vr1,r4,r10
-err3; lvx vr0,r4,r11
+err3; lvx v3,r0,r4
+err3; lvx v2,r4,r9
+err3; lvx v1,r4,r10
+err3; lvx v0,r4,r11
addi r4,r4,64
-err3; stvx vr3,r0,r3
-err3; stvx vr2,r3,r9
-err3; stvx vr1,r3,r10
-err3; stvx vr0,r3,r11
+err3; stvx v3,r0,r3
+err3; stvx v2,r3,r9
+err3; stvx v1,r3,r10
+err3; stvx v0,r3,r11
addi r3,r3,64
7: sub r5,r5,r6
*/
.align 5
8:
-err4; lvx vr7,r0,r4
-err4; lvx vr6,r4,r9
-err4; lvx vr5,r4,r10
-err4; lvx vr4,r4,r11
-err4; lvx vr3,r4,r12
-err4; lvx vr2,r4,r14
-err4; lvx vr1,r4,r15
-err4; lvx vr0,r4,r16
+err4; lvx v7,r0,r4
+err4; lvx v6,r4,r9
+err4; lvx v5,r4,r10
+err4; lvx v4,r4,r11
+err4; lvx v3,r4,r12
+err4; lvx v2,r4,r14
+err4; lvx v1,r4,r15
+err4; lvx v0,r4,r16
addi r4,r4,128
-err4; stvx vr7,r0,r3
-err4; stvx vr6,r3,r9
-err4; stvx vr5,r3,r10
-err4; stvx vr4,r3,r11
-err4; stvx vr3,r3,r12
-err4; stvx vr2,r3,r14
-err4; stvx vr1,r3,r15
-err4; stvx vr0,r3,r16
+err4; stvx v7,r0,r3
+err4; stvx v6,r3,r9
+err4; stvx v5,r3,r10
+err4; stvx v4,r3,r11
+err4; stvx v3,r3,r12
+err4; stvx v2,r3,r14
+err4; stvx v1,r3,r15
+err4; stvx v0,r3,r16
addi r3,r3,128
bdnz 8b
mtocrf 0x01,r6
bf cr7*4+1,9f
-err3; lvx vr3,r0,r4
-err3; lvx vr2,r4,r9
-err3; lvx vr1,r4,r10
-err3; lvx vr0,r4,r11
+err3; lvx v3,r0,r4
+err3; lvx v2,r4,r9
+err3; lvx v1,r4,r10
+err3; lvx v0,r4,r11
addi r4,r4,64
-err3; stvx vr3,r0,r3
-err3; stvx vr2,r3,r9
-err3; stvx vr1,r3,r10
-err3; stvx vr0,r3,r11
+err3; stvx v3,r0,r3
+err3; stvx v2,r3,r9
+err3; stvx v1,r3,r10
+err3; stvx v0,r3,r11
addi r3,r3,64
9: bf cr7*4+2,10f
-err3; lvx vr1,r0,r4
-err3; lvx vr0,r4,r9
+err3; lvx v1,r0,r4
+err3; lvx v0,r4,r9
addi r4,r4,32
-err3; stvx vr1,r0,r3
-err3; stvx vr0,r3,r9
+err3; stvx v1,r0,r3
+err3; stvx v0,r3,r9
addi r3,r3,32
10: bf cr7*4+3,11f
-err3; lvx vr1,r0,r4
+err3; lvx v1,r0,r4
addi r4,r4,16
-err3; stvx vr1,r0,r3
+err3; stvx v1,r0,r3
addi r3,r3,16
/* Up to 15B to go */
li r10,32
li r11,48
- LVS(vr16,0,r4) /* Setup permute control vector */
-err3; lvx vr0,0,r4
+ LVS(v16,0,r4) /* Setup permute control vector */
+err3; lvx v0,0,r4
addi r4,r4,16
bf cr7*4+3,5f
-err3; lvx vr1,r0,r4
- VPERM(vr8,vr0,vr1,vr16)
+err3; lvx v1,r0,r4
+ VPERM(v8,v0,v1,v16)
addi r4,r4,16
-err3; stvx vr8,r0,r3
+err3; stvx v8,r0,r3
addi r3,r3,16
- vor vr0,vr1,vr1
+ vor v0,v1,v1
5: bf cr7*4+2,6f
-err3; lvx vr1,r0,r4
- VPERM(vr8,vr0,vr1,vr16)
-err3; lvx vr0,r4,r9
- VPERM(vr9,vr1,vr0,vr16)
+err3; lvx v1,r0,r4
+ VPERM(v8,v0,v1,v16)
+err3; lvx v0,r4,r9
+ VPERM(v9,v1,v0,v16)
addi r4,r4,32
-err3; stvx vr8,r0,r3
-err3; stvx vr9,r3,r9
+err3; stvx v8,r0,r3
+err3; stvx v9,r3,r9
addi r3,r3,32
6: bf cr7*4+1,7f
-err3; lvx vr3,r0,r4
- VPERM(vr8,vr0,vr3,vr16)
-err3; lvx vr2,r4,r9
- VPERM(vr9,vr3,vr2,vr16)
-err3; lvx vr1,r4,r10
- VPERM(vr10,vr2,vr1,vr16)
-err3; lvx vr0,r4,r11
- VPERM(vr11,vr1,vr0,vr16)
+err3; lvx v3,r0,r4
+ VPERM(v8,v0,v3,v16)
+err3; lvx v2,r4,r9
+ VPERM(v9,v3,v2,v16)
+err3; lvx v1,r4,r10
+ VPERM(v10,v2,v1,v16)
+err3; lvx v0,r4,r11
+ VPERM(v11,v1,v0,v16)
addi r4,r4,64
-err3; stvx vr8,r0,r3
-err3; stvx vr9,r3,r9
-err3; stvx vr10,r3,r10
-err3; stvx vr11,r3,r11
+err3; stvx v8,r0,r3
+err3; stvx v9,r3,r9
+err3; stvx v10,r3,r10
+err3; stvx v11,r3,r11
addi r3,r3,64
7: sub r5,r5,r6
*/
.align 5
8:
-err4; lvx vr7,r0,r4
- VPERM(vr8,vr0,vr7,vr16)
-err4; lvx vr6,r4,r9
- VPERM(vr9,vr7,vr6,vr16)
-err4; lvx vr5,r4,r10
- VPERM(vr10,vr6,vr5,vr16)
-err4; lvx vr4,r4,r11
- VPERM(vr11,vr5,vr4,vr16)
-err4; lvx vr3,r4,r12
- VPERM(vr12,vr4,vr3,vr16)
-err4; lvx vr2,r4,r14
- VPERM(vr13,vr3,vr2,vr16)
-err4; lvx vr1,r4,r15
- VPERM(vr14,vr2,vr1,vr16)
-err4; lvx vr0,r4,r16
- VPERM(vr15,vr1,vr0,vr16)
+err4; lvx v7,r0,r4
+ VPERM(v8,v0,v7,v16)
+err4; lvx v6,r4,r9
+ VPERM(v9,v7,v6,v16)
+err4; lvx v5,r4,r10
+ VPERM(v10,v6,v5,v16)
+err4; lvx v4,r4,r11
+ VPERM(v11,v5,v4,v16)
+err4; lvx v3,r4,r12
+ VPERM(v12,v4,v3,v16)
+err4; lvx v2,r4,r14
+ VPERM(v13,v3,v2,v16)
+err4; lvx v1,r4,r15
+ VPERM(v14,v2,v1,v16)
+err4; lvx v0,r4,r16
+ VPERM(v15,v1,v0,v16)
addi r4,r4,128
-err4; stvx vr8,r0,r3
-err4; stvx vr9,r3,r9
-err4; stvx vr10,r3,r10
-err4; stvx vr11,r3,r11
-err4; stvx vr12,r3,r12
-err4; stvx vr13,r3,r14
-err4; stvx vr14,r3,r15
-err4; stvx vr15,r3,r16
+err4; stvx v8,r0,r3
+err4; stvx v9,r3,r9
+err4; stvx v10,r3,r10
+err4; stvx v11,r3,r11
+err4; stvx v12,r3,r12
+err4; stvx v13,r3,r14
+err4; stvx v14,r3,r15
+err4; stvx v15,r3,r16
addi r3,r3,128
bdnz 8b
mtocrf 0x01,r6
bf cr7*4+1,9f
-err3; lvx vr3,r0,r4
- VPERM(vr8,vr0,vr3,vr16)
-err3; lvx vr2,r4,r9
- VPERM(vr9,vr3,vr2,vr16)
-err3; lvx vr1,r4,r10
- VPERM(vr10,vr2,vr1,vr16)
-err3; lvx vr0,r4,r11
- VPERM(vr11,vr1,vr0,vr16)
+err3; lvx v3,r0,r4
+ VPERM(v8,v0,v3,v16)
+err3; lvx v2,r4,r9
+ VPERM(v9,v3,v2,v16)
+err3; lvx v1,r4,r10
+ VPERM(v10,v2,v1,v16)
+err3; lvx v0,r4,r11
+ VPERM(v11,v1,v0,v16)
addi r4,r4,64
-err3; stvx vr8,r0,r3
-err3; stvx vr9,r3,r9
-err3; stvx vr10,r3,r10
-err3; stvx vr11,r3,r11
+err3; stvx v8,r0,r3
+err3; stvx v9,r3,r9
+err3; stvx v10,r3,r10
+err3; stvx v11,r3,r11
addi r3,r3,64
9: bf cr7*4+2,10f
-err3; lvx vr1,r0,r4
- VPERM(vr8,vr0,vr1,vr16)
-err3; lvx vr0,r4,r9
- VPERM(vr9,vr1,vr0,vr16)
+err3; lvx v1,r0,r4
+ VPERM(v8,v0,v1,v16)
+err3; lvx v0,r4,r9
+ VPERM(v9,v1,v0,v16)
addi r4,r4,32
-err3; stvx vr8,r0,r3
-err3; stvx vr9,r3,r9
+err3; stvx v8,r0,r3
+err3; stvx v9,r3,r9
addi r3,r3,32
10: bf cr7*4+3,11f
-err3; lvx vr1,r0,r4
- VPERM(vr8,vr0,vr1,vr16)
+err3; lvx v1,r0,r4
+ VPERM(v8,v0,v1,v16)
addi r4,r4,16
-err3; stvx vr8,r0,r3
+err3; stvx v8,r0,r3
addi r3,r3,16
/* Up to 15B to go */
_GLOBAL(_savevr_20)
li r11,-192
- stvx vr20,r11,r0
+ stvx v20,r11,r0
_GLOBAL(_savevr_21)
li r11,-176
- stvx vr21,r11,r0
+ stvx v21,r11,r0
_GLOBAL(_savevr_22)
li r11,-160
- stvx vr22,r11,r0
+ stvx v22,r11,r0
_GLOBAL(_savevr_23)
li r11,-144
- stvx vr23,r11,r0
+ stvx v23,r11,r0
_GLOBAL(_savevr_24)
li r11,-128
- stvx vr24,r11,r0
+ stvx v24,r11,r0
_GLOBAL(_savevr_25)
li r11,-112
- stvx vr25,r11,r0
+ stvx v25,r11,r0
_GLOBAL(_savevr_26)
li r11,-96
- stvx vr26,r11,r0
+ stvx v26,r11,r0
_GLOBAL(_savevr_27)
li r11,-80
- stvx vr27,r11,r0
+ stvx v27,r11,r0
_GLOBAL(_savevr_28)
li r11,-64
- stvx vr28,r11,r0
+ stvx v28,r11,r0
_GLOBAL(_savevr_29)
li r11,-48
- stvx vr29,r11,r0
+ stvx v29,r11,r0
_GLOBAL(_savevr_30)
li r11,-32
- stvx vr30,r11,r0
+ stvx v30,r11,r0
_GLOBAL(_savevr_31)
li r11,-16
- stvx vr31,r11,r0
+ stvx v31,r11,r0
blr
_GLOBAL(_restvr_20)
li r11,-192
- lvx vr20,r11,r0
+ lvx v20,r11,r0
_GLOBAL(_restvr_21)
li r11,-176
- lvx vr21,r11,r0
+ lvx v21,r11,r0
_GLOBAL(_restvr_22)
li r11,-160
- lvx vr22,r11,r0
+ lvx v22,r11,r0
_GLOBAL(_restvr_23)
li r11,-144
- lvx vr23,r11,r0
+ lvx v23,r11,r0
_GLOBAL(_restvr_24)
li r11,-128
- lvx vr24,r11,r0
+ lvx v24,r11,r0
_GLOBAL(_restvr_25)
li r11,-112
- lvx vr25,r11,r0
+ lvx v25,r11,r0
_GLOBAL(_restvr_26)
li r11,-96
- lvx vr26,r11,r0
+ lvx v26,r11,r0
_GLOBAL(_restvr_27)
li r11,-80
- lvx vr27,r11,r0
+ lvx v27,r11,r0
_GLOBAL(_restvr_28)
li r11,-64
- lvx vr28,r11,r0
+ lvx v28,r11,r0
_GLOBAL(_restvr_29)
li r11,-48
- lvx vr29,r11,r0
+ lvx v29,r11,r0
_GLOBAL(_restvr_30)
li r11,-32
- lvx vr30,r11,r0
+ lvx v30,r11,r0
_GLOBAL(_restvr_31)
li r11,-16
- lvx vr31,r11,r0
+ lvx v31,r11,r0
blr
#endif /* CONFIG_ALTIVEC */
.globl _savevr_20
_savevr_20:
li r12,-192
- stvx vr20,r12,r0
+ stvx v20,r12,r0
.globl _savevr_21
_savevr_21:
li r12,-176
- stvx vr21,r12,r0
+ stvx v21,r12,r0
.globl _savevr_22
_savevr_22:
li r12,-160
- stvx vr22,r12,r0
+ stvx v22,r12,r0
.globl _savevr_23
_savevr_23:
li r12,-144
- stvx vr23,r12,r0
+ stvx v23,r12,r0
.globl _savevr_24
_savevr_24:
li r12,-128
- stvx vr24,r12,r0
+ stvx v24,r12,r0
.globl _savevr_25
_savevr_25:
li r12,-112
- stvx vr25,r12,r0
+ stvx v25,r12,r0
.globl _savevr_26
_savevr_26:
li r12,-96
- stvx vr26,r12,r0
+ stvx v26,r12,r0
.globl _savevr_27
_savevr_27:
li r12,-80
- stvx vr27,r12,r0
+ stvx v27,r12,r0
.globl _savevr_28
_savevr_28:
li r12,-64
- stvx vr28,r12,r0
+ stvx v28,r12,r0
.globl _savevr_29
_savevr_29:
li r12,-48
- stvx vr29,r12,r0
+ stvx v29,r12,r0
.globl _savevr_30
_savevr_30:
li r12,-32
- stvx vr30,r12,r0
+ stvx v30,r12,r0
.globl _savevr_31
_savevr_31:
li r12,-16
- stvx vr31,r12,r0
+ stvx v31,r12,r0
blr
.globl _restvr_20
_restvr_20:
li r12,-192
- lvx vr20,r12,r0
+ lvx v20,r12,r0
.globl _restvr_21
_restvr_21:
li r12,-176
- lvx vr21,r12,r0
+ lvx v21,r12,r0
.globl _restvr_22
_restvr_22:
li r12,-160
- lvx vr22,r12,r0
+ lvx v22,r12,r0
.globl _restvr_23
_restvr_23:
li r12,-144
- lvx vr23,r12,r0
+ lvx v23,r12,r0
.globl _restvr_24
_restvr_24:
li r12,-128
- lvx vr24,r12,r0
+ lvx v24,r12,r0
.globl _restvr_25
_restvr_25:
li r12,-112
- lvx vr25,r12,r0
+ lvx v25,r12,r0
.globl _restvr_26
_restvr_26:
li r12,-96
- lvx vr26,r12,r0
+ lvx v26,r12,r0
.globl _restvr_27
_restvr_27:
li r12,-80
- lvx vr27,r12,r0
+ lvx v27,r12,r0
.globl _restvr_28
_restvr_28:
li r12,-64
- lvx vr28,r12,r0
+ lvx v28,r12,r0
.globl _restvr_29
_restvr_29:
li r12,-48
- lvx vr29,r12,r0
+ lvx v29,r12,r0
.globl _restvr_30
_restvr_30:
li r12,-32
- lvx vr30,r12,r0
+ lvx v30,r12,r0
.globl _restvr_31
_restvr_31:
li r12,-16
- lvx vr31,r12,r0
+ lvx v31,r12,r0
blr
#endif /* CONFIG_ALTIVEC */
extab 2b,3b
#ifdef CONFIG_ALTIVEC
-/* Get the contents of vrN into vr0; N is in r3. */
+/* Get the contents of vrN into v0; N is in r3. */
_GLOBAL(get_vr)
mflr r0
rlwinm r3,r3,3,0xf8
bcl 20,31,1f
- blr /* vr0 is already in vr0 */
+ blr /* v0 is already in v0 */
nop
reg = 1
.rept 31
- vor vr0,reg,reg /* assembler doesn't know vmr? */
+ vor v0,reg,reg /* assembler doesn't know vmr? */
blr
reg = reg + 1
.endr
mtlr r0
bctr
-/* Put the contents of vr0 into vrN; N is in r3. */
+/* Put the contents of v0 into vrN; N is in r3. */
_GLOBAL(put_vr)
mflr r0
rlwinm r3,r3,3,0xf8
bcl 20,31,1f
- blr /* vr0 is already in vr0 */
+ blr /* v0 is already in v0 */
nop
reg = 1
.rept 31
- vor reg,vr0,vr0
+ vor reg,v0,v0
blr
reg = reg + 1
.endr
MTMSRD(r7)
isync
beq cr7,1f
- stvx vr0,r1,r8
+ stvx v0,r1,r8
1: li r9,-EFAULT
-2: lvx vr0,0,r4
+2: lvx v0,0,r4
li r9,0
3: beq cr7,4f
bl put_vr
- lvx vr0,r1,r8
+ lvx v0,r1,r8
4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
mtlr r0
MTMSRD(r6)
MTMSRD(r7)
isync
beq cr7,1f
- stvx vr0,r1,r8
+ stvx v0,r1,r8
bl get_vr
1: li r9,-EFAULT
-2: stvx vr0,0,r4
+2: stvx v0,0,r4
li r9,0
3: beq cr7,4f
- lvx vr0,r1,r8
+ lvx v0,r1,r8
4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
mtlr r0
MTMSRD(r6)
mflr r0
rlwinm r3,r3,3,0x1f8
bcl 20,31,1f
- blr /* vr0 is already in vr0 */
+ blr /* v0 is already in v0 */
nop
reg = 1
.rept 63
li r11,48
bf cr7*4+3,5f
- lvx vr1,r0,r4
+ lvx v1,r0,r4
addi r4,r4,16
- stvx vr1,r0,r3
+ stvx v1,r0,r3
addi r3,r3,16
5: bf cr7*4+2,6f
- lvx vr1,r0,r4
- lvx vr0,r4,r9
+ lvx v1,r0,r4
+ lvx v0,r4,r9
addi r4,r4,32
- stvx vr1,r0,r3
- stvx vr0,r3,r9
+ stvx v1,r0,r3
+ stvx v0,r3,r9
addi r3,r3,32
6: bf cr7*4+1,7f
- lvx vr3,r0,r4
- lvx vr2,r4,r9
- lvx vr1,r4,r10
- lvx vr0,r4,r11
+ lvx v3,r0,r4
+ lvx v2,r4,r9
+ lvx v1,r4,r10
+ lvx v0,r4,r11
addi r4,r4,64
- stvx vr3,r0,r3
- stvx vr2,r3,r9
- stvx vr1,r3,r10
- stvx vr0,r3,r11
+ stvx v3,r0,r3
+ stvx v2,r3,r9
+ stvx v1,r3,r10
+ stvx v0,r3,r11
addi r3,r3,64
7: sub r5,r5,r6
*/
.align 5
8:
- lvx vr7,r0,r4
- lvx vr6,r4,r9
- lvx vr5,r4,r10
- lvx vr4,r4,r11
- lvx vr3,r4,r12
- lvx vr2,r4,r14
- lvx vr1,r4,r15
- lvx vr0,r4,r16
+ lvx v7,r0,r4
+ lvx v6,r4,r9
+ lvx v5,r4,r10
+ lvx v4,r4,r11
+ lvx v3,r4,r12
+ lvx v2,r4,r14
+ lvx v1,r4,r15
+ lvx v0,r4,r16
addi r4,r4,128
- stvx vr7,r0,r3
- stvx vr6,r3,r9
- stvx vr5,r3,r10
- stvx vr4,r3,r11
- stvx vr3,r3,r12
- stvx vr2,r3,r14
- stvx vr1,r3,r15
- stvx vr0,r3,r16
+ stvx v7,r0,r3
+ stvx v6,r3,r9
+ stvx v5,r3,r10
+ stvx v4,r3,r11
+ stvx v3,r3,r12
+ stvx v2,r3,r14
+ stvx v1,r3,r15
+ stvx v0,r3,r16
addi r3,r3,128
bdnz 8b
mtocrf 0x01,r6
bf cr7*4+1,9f
- lvx vr3,r0,r4
- lvx vr2,r4,r9
- lvx vr1,r4,r10
- lvx vr0,r4,r11
+ lvx v3,r0,r4
+ lvx v2,r4,r9
+ lvx v1,r4,r10
+ lvx v0,r4,r11
addi r4,r4,64
- stvx vr3,r0,r3
- stvx vr2,r3,r9
- stvx vr1,r3,r10
- stvx vr0,r3,r11
+ stvx v3,r0,r3
+ stvx v2,r3,r9
+ stvx v1,r3,r10
+ stvx v0,r3,r11
addi r3,r3,64
9: bf cr7*4+2,10f
- lvx vr1,r0,r4
- lvx vr0,r4,r9
+ lvx v1,r0,r4
+ lvx v0,r4,r9
addi r4,r4,32
- stvx vr1,r0,r3
- stvx vr0,r3,r9
+ stvx v1,r0,r3
+ stvx v0,r3,r9
addi r3,r3,32
10: bf cr7*4+3,11f
- lvx vr1,r0,r4
+ lvx v1,r0,r4
addi r4,r4,16
- stvx vr1,r0,r3
+ stvx v1,r0,r3
addi r3,r3,16
/* Up to 15B to go */
li r10,32
li r11,48
- LVS(vr16,0,r4) /* Setup permute control vector */
- lvx vr0,0,r4
+ LVS(v16,0,r4) /* Setup permute control vector */
+ lvx v0,0,r4
addi r4,r4,16
bf cr7*4+3,5f
- lvx vr1,r0,r4
- VPERM(vr8,vr0,vr1,vr16)
+ lvx v1,r0,r4
+ VPERM(v8,v0,v1,v16)
addi r4,r4,16
- stvx vr8,r0,r3
+ stvx v8,r0,r3
addi r3,r3,16
- vor vr0,vr1,vr1
+ vor v0,v1,v1
5: bf cr7*4+2,6f
- lvx vr1,r0,r4
- VPERM(vr8,vr0,vr1,vr16)
- lvx vr0,r4,r9
- VPERM(vr9,vr1,vr0,vr16)
+ lvx v1,r0,r4
+ VPERM(v8,v0,v1,v16)
+ lvx v0,r4,r9
+ VPERM(v9,v1,v0,v16)
addi r4,r4,32
- stvx vr8,r0,r3
- stvx vr9,r3,r9
+ stvx v8,r0,r3
+ stvx v9,r3,r9
addi r3,r3,32
6: bf cr7*4+1,7f
- lvx vr3,r0,r4
- VPERM(vr8,vr0,vr3,vr16)
- lvx vr2,r4,r9
- VPERM(vr9,vr3,vr2,vr16)
- lvx vr1,r4,r10
- VPERM(vr10,vr2,vr1,vr16)
- lvx vr0,r4,r11
- VPERM(vr11,vr1,vr0,vr16)
+ lvx v3,r0,r4
+ VPERM(v8,v0,v3,v16)
+ lvx v2,r4,r9
+ VPERM(v9,v3,v2,v16)
+ lvx v1,r4,r10
+ VPERM(v10,v2,v1,v16)
+ lvx v0,r4,r11
+ VPERM(v11,v1,v0,v16)
addi r4,r4,64
- stvx vr8,r0,r3
- stvx vr9,r3,r9
- stvx vr10,r3,r10
- stvx vr11,r3,r11
+ stvx v8,r0,r3
+ stvx v9,r3,r9
+ stvx v10,r3,r10
+ stvx v11,r3,r11
addi r3,r3,64
7: sub r5,r5,r6
*/
.align 5
8:
- lvx vr7,r0,r4
- VPERM(vr8,vr0,vr7,vr16)
- lvx vr6,r4,r9
- VPERM(vr9,vr7,vr6,vr16)
- lvx vr5,r4,r10
- VPERM(vr10,vr6,vr5,vr16)
- lvx vr4,r4,r11
- VPERM(vr11,vr5,vr4,vr16)
- lvx vr3,r4,r12
- VPERM(vr12,vr4,vr3,vr16)
- lvx vr2,r4,r14
- VPERM(vr13,vr3,vr2,vr16)
- lvx vr1,r4,r15
- VPERM(vr14,vr2,vr1,vr16)
- lvx vr0,r4,r16
- VPERM(vr15,vr1,vr0,vr16)
+ lvx v7,r0,r4
+ VPERM(v8,v0,v7,v16)
+ lvx v6,r4,r9
+ VPERM(v9,v7,v6,v16)
+ lvx v5,r4,r10
+ VPERM(v10,v6,v5,v16)
+ lvx v4,r4,r11
+ VPERM(v11,v5,v4,v16)
+ lvx v3,r4,r12
+ VPERM(v12,v4,v3,v16)
+ lvx v2,r4,r14
+ VPERM(v13,v3,v2,v16)
+ lvx v1,r4,r15
+ VPERM(v14,v2,v1,v16)
+ lvx v0,r4,r16
+ VPERM(v15,v1,v0,v16)
addi r4,r4,128
- stvx vr8,r0,r3
- stvx vr9,r3,r9
- stvx vr10,r3,r10
- stvx vr11,r3,r11
- stvx vr12,r3,r12
- stvx vr13,r3,r14
- stvx vr14,r3,r15
- stvx vr15,r3,r16
+ stvx v8,r0,r3
+ stvx v9,r3,r9
+ stvx v10,r3,r10
+ stvx v11,r3,r11
+ stvx v12,r3,r12
+ stvx v13,r3,r14
+ stvx v14,r3,r15
+ stvx v15,r3,r16
addi r3,r3,128
bdnz 8b
mtocrf 0x01,r6
bf cr7*4+1,9f
- lvx vr3,r0,r4
- VPERM(vr8,vr0,vr3,vr16)
- lvx vr2,r4,r9
- VPERM(vr9,vr3,vr2,vr16)
- lvx vr1,r4,r10
- VPERM(vr10,vr2,vr1,vr16)
- lvx vr0,r4,r11
- VPERM(vr11,vr1,vr0,vr16)
+ lvx v3,r0,r4
+ VPERM(v8,v0,v3,v16)
+ lvx v2,r4,r9
+ VPERM(v9,v3,v2,v16)
+ lvx v1,r4,r10
+ VPERM(v10,v2,v1,v16)
+ lvx v0,r4,r11
+ VPERM(v11,v1,v0,v16)
addi r4,r4,64
- stvx vr8,r0,r3
- stvx vr9,r3,r9
- stvx vr10,r3,r10
- stvx vr11,r3,r11
+ stvx v8,r0,r3
+ stvx v9,r3,r9
+ stvx v10,r3,r10
+ stvx v11,r3,r11
addi r3,r3,64
9: bf cr7*4+2,10f
- lvx vr1,r0,r4
- VPERM(vr8,vr0,vr1,vr16)
- lvx vr0,r4,r9
- VPERM(vr9,vr1,vr0,vr16)
+ lvx v1,r0,r4
+ VPERM(v8,v0,v1,v16)
+ lvx v0,r4,r9
+ VPERM(v9,v1,v0,v16)
addi r4,r4,32
- stvx vr8,r0,r3
- stvx vr9,r3,r9
+ stvx v8,r0,r3
+ stvx v9,r3,r9
addi r3,r3,32
10: bf cr7*4+3,11f
- lvx vr1,r0,r4
- VPERM(vr8,vr0,vr1,vr16)
+ lvx v1,r0,r4
+ VPERM(v8,v0,v1,v16)
addi r4,r4,16
- stvx vr8,r0,r3
+ stvx v8,r0,r3
addi r3,r3,16
/* Up to 15B to go */
#define r1 1
-#define vr0 0
-#define vr1 1
-#define vr2 2
-#define vr3 3
-#define vr4 4
-#define vr5 5
-#define vr6 6
-#define vr7 7
-#define vr8 8
-#define vr9 9
-#define vr10 10
-#define vr11 11
-#define vr12 12
-#define vr13 13
-#define vr14 14
-#define vr15 15
-#define vr16 16
-#define vr17 17
-#define vr18 18
-#define vr19 19
-#define vr20 20
-#define vr21 21
-#define vr22 22
-#define vr23 23
-#define vr24 24
-#define vr25 25
-#define vr26 26
-#define vr27 27
-#define vr28 28
-#define vr29 29
-#define vr30 30
-#define vr31 31
-
#define R14 r14
#define R15 r15
#define R16 r16