Merge branch 'nfsd-next' of git://linux-nfs.org/~bfields/linux
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / arch / powerpc / lib / copypage_power7.S
1 /*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15 *
16 * Copyright (C) IBM Corporation, 2012
17 *
18 * Author: Anton Blanchard <anton@au.ibm.com>
19 */
20 #include <asm/page.h>
21 #include <asm/ppc_asm.h>
22
23 _GLOBAL(copypage_power7)
24 /*
25 * We prefetch both the source and destination using enhanced touch
26 * instructions. We use a stream ID of 0 for the load side and
27 * 1 for the store side. Since source and destination are page
28 * aligned we don't need to clear the bottom 7 bits of either
29 * address.
30 */
31 ori r9,r3,1 /* stream=1 */
32
33 #ifdef CONFIG_PPC_64K_PAGES
34 lis r7,0x0E01 /* depth=7, units=512 */
35 #else
36 lis r7,0x0E00 /* depth=7 */
37 ori r7,r7,0x1000 /* units=32 */
38 #endif
39 ori r10,r7,1 /* stream=1 */
40
41 lis r8,0x8000 /* GO=1 */
42 clrldi r8,r8,32
43
44 .machine push
45 .machine "power4"
46 dcbt r0,r4,0b01000
47 dcbt r0,r7,0b01010
48 dcbtst r0,r9,0b01000
49 dcbtst r0,r10,0b01010
50 eieio
51 dcbt r0,r8,0b01010 /* GO */
52 .machine pop
53
54 #ifdef CONFIG_ALTIVEC
55 mflr r0
56 std r3,48(r1)
57 std r4,56(r1)
58 std r0,16(r1)
59 stdu r1,-STACKFRAMESIZE(r1)
60 bl .enter_vmx_copy
61 cmpwi r3,0
62 ld r0,STACKFRAMESIZE+16(r1)
63 ld r3,STACKFRAMESIZE+48(r1)
64 ld r4,STACKFRAMESIZE+56(r1)
65 mtlr r0
66
67 li r0,(PAGE_SIZE/128)
68 mtctr r0
69
70 beq .Lnonvmx_copy
71
72 addi r1,r1,STACKFRAMESIZE
73
74 li r6,16
75 li r7,32
76 li r8,48
77 li r9,64
78 li r10,80
79 li r11,96
80 li r12,112
81
82 .align 5
83 1: lvx vr7,r0,r4
84 lvx vr6,r4,r6
85 lvx vr5,r4,r7
86 lvx vr4,r4,r8
87 lvx vr3,r4,r9
88 lvx vr2,r4,r10
89 lvx vr1,r4,r11
90 lvx vr0,r4,r12
91 addi r4,r4,128
92 stvx vr7,r0,r3
93 stvx vr6,r3,r6
94 stvx vr5,r3,r7
95 stvx vr4,r3,r8
96 stvx vr3,r3,r9
97 stvx vr2,r3,r10
98 stvx vr1,r3,r11
99 stvx vr0,r3,r12
100 addi r3,r3,128
101 bdnz 1b
102
103 b .exit_vmx_copy /* tail call optimise */
104
105 #else
106 li r0,(PAGE_SIZE/128)
107 mtctr r0
108
109 stdu r1,-STACKFRAMESIZE(r1)
110 #endif
111
112 .Lnonvmx_copy:
113 std r14,STK_REG(R14)(r1)
114 std r15,STK_REG(R15)(r1)
115 std r16,STK_REG(R16)(r1)
116 std r17,STK_REG(R17)(r1)
117 std r18,STK_REG(R18)(r1)
118 std r19,STK_REG(R19)(r1)
119 std r20,STK_REG(R20)(r1)
120
121 1: ld r0,0(r4)
122 ld r5,8(r4)
123 ld r6,16(r4)
124 ld r7,24(r4)
125 ld r8,32(r4)
126 ld r9,40(r4)
127 ld r10,48(r4)
128 ld r11,56(r4)
129 ld r12,64(r4)
130 ld r14,72(r4)
131 ld r15,80(r4)
132 ld r16,88(r4)
133 ld r17,96(r4)
134 ld r18,104(r4)
135 ld r19,112(r4)
136 ld r20,120(r4)
137 addi r4,r4,128
138 std r0,0(r3)
139 std r5,8(r3)
140 std r6,16(r3)
141 std r7,24(r3)
142 std r8,32(r3)
143 std r9,40(r3)
144 std r10,48(r3)
145 std r11,56(r3)
146 std r12,64(r3)
147 std r14,72(r3)
148 std r15,80(r3)
149 std r16,88(r3)
150 std r17,96(r3)
151 std r18,104(r3)
152 std r19,112(r3)
153 std r20,120(r3)
154 addi r3,r3,128
155 bdnz 1b
156
157 ld r14,STK_REG(R14)(r1)
158 ld r15,STK_REG(R15)(r1)
159 ld r16,STK_REG(R16)(r1)
160 ld r17,STK_REG(R17)(r1)
161 ld r18,STK_REG(R18)(r1)
162 ld r19,STK_REG(R19)(r1)
163 ld r20,STK_REG(R20)(r1)
164 addi r1,r1,STACKFRAMESIZE
165 blr