Commit | Line | Data |
---|---|---|
3610cce8 MS |
1 | /* |
2 | * arch/s390/mm/pgtable.c | |
3 | * | |
4 | * Copyright IBM Corp. 2007 | |
5 | * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> | |
6 | */ | |
7 | ||
8 | #include <linux/sched.h> | |
9 | #include <linux/kernel.h> | |
10 | #include <linux/errno.h> | |
11 | #include <linux/mm.h> | |
12 | #include <linux/swap.h> | |
13 | #include <linux/smp.h> | |
14 | #include <linux/highmem.h> | |
15 | #include <linux/slab.h> | |
16 | #include <linux/pagemap.h> | |
17 | #include <linux/spinlock.h> | |
18 | #include <linux/module.h> | |
19 | #include <linux/quicklist.h> | |
20 | ||
21 | #include <asm/system.h> | |
22 | #include <asm/pgtable.h> | |
23 | #include <asm/pgalloc.h> | |
24 | #include <asm/tlb.h> | |
25 | #include <asm/tlbflush.h> | |
6252d702 | 26 | #include <asm/mmu_context.h> |
3610cce8 MS |
27 | |
28 | #ifndef CONFIG_64BIT | |
29 | #define ALLOC_ORDER 1 | |
146e4b3c MS |
30 | #define TABLES_PER_PAGE 4 |
31 | #define FRAG_MASK 15UL | |
32 | #define SECOND_HALVES 10UL | |
402b0862 CO |
33 | |
34 | void clear_table_pgstes(unsigned long *table) | |
35 | { | |
36 | clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4); | |
37 | memset(table + 256, 0, PAGE_SIZE/4); | |
38 | clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4); | |
39 | memset(table + 768, 0, PAGE_SIZE/4); | |
40 | } | |
41 | ||
3610cce8 MS |
42 | #else |
43 | #define ALLOC_ORDER 2 | |
146e4b3c MS |
44 | #define TABLES_PER_PAGE 2 |
45 | #define FRAG_MASK 3UL | |
46 | #define SECOND_HALVES 2UL | |
402b0862 CO |
47 | |
48 | void clear_table_pgstes(unsigned long *table) | |
49 | { | |
50 | clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); | |
51 | memset(table + 256, 0, PAGE_SIZE/2); | |
52 | } | |
53 | ||
3610cce8 MS |
54 | #endif |
55 | ||
56 | unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec) | |
57 | { | |
58 | struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); | |
59 | ||
60 | if (!page) | |
61 | return NULL; | |
62 | page->index = 0; | |
63 | if (noexec) { | |
64 | struct page *shadow = alloc_pages(GFP_KERNEL, ALLOC_ORDER); | |
65 | if (!shadow) { | |
66 | __free_pages(page, ALLOC_ORDER); | |
67 | return NULL; | |
68 | } | |
69 | page->index = page_to_phys(shadow); | |
70 | } | |
146e4b3c MS |
71 | spin_lock(&mm->page_table_lock); |
72 | list_add(&page->lru, &mm->context.crst_list); | |
73 | spin_unlock(&mm->page_table_lock); | |
3610cce8 MS |
74 | return (unsigned long *) page_to_phys(page); |
75 | } | |
76 | ||
146e4b3c | 77 | void crst_table_free(struct mm_struct *mm, unsigned long *table) |
3610cce8 MS |
78 | { |
79 | unsigned long *shadow = get_shadow_table(table); | |
146e4b3c | 80 | struct page *page = virt_to_page(table); |
3610cce8 | 81 | |
146e4b3c MS |
82 | spin_lock(&mm->page_table_lock); |
83 | list_del(&page->lru); | |
84 | spin_unlock(&mm->page_table_lock); | |
3610cce8 MS |
85 | if (shadow) |
86 | free_pages((unsigned long) shadow, ALLOC_ORDER); | |
87 | free_pages((unsigned long) table, ALLOC_ORDER); | |
88 | } | |
89 | ||
6252d702 MS |
90 | #ifdef CONFIG_64BIT |
91 | int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) | |
92 | { | |
93 | unsigned long *table, *pgd; | |
94 | unsigned long entry; | |
95 | ||
96 | BUG_ON(limit > (1UL << 53)); | |
97 | repeat: | |
98 | table = crst_table_alloc(mm, mm->context.noexec); | |
99 | if (!table) | |
100 | return -ENOMEM; | |
101 | spin_lock(&mm->page_table_lock); | |
102 | if (mm->context.asce_limit < limit) { | |
103 | pgd = (unsigned long *) mm->pgd; | |
104 | if (mm->context.asce_limit <= (1UL << 31)) { | |
105 | entry = _REGION3_ENTRY_EMPTY; | |
106 | mm->context.asce_limit = 1UL << 42; | |
107 | mm->context.asce_bits = _ASCE_TABLE_LENGTH | | |
108 | _ASCE_USER_BITS | | |
109 | _ASCE_TYPE_REGION3; | |
110 | } else { | |
111 | entry = _REGION2_ENTRY_EMPTY; | |
112 | mm->context.asce_limit = 1UL << 53; | |
113 | mm->context.asce_bits = _ASCE_TABLE_LENGTH | | |
114 | _ASCE_USER_BITS | | |
115 | _ASCE_TYPE_REGION2; | |
116 | } | |
117 | crst_table_init(table, entry); | |
118 | pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd); | |
119 | mm->pgd = (pgd_t *) table; | |
120 | table = NULL; | |
121 | } | |
122 | spin_unlock(&mm->page_table_lock); | |
123 | if (table) | |
124 | crst_table_free(mm, table); | |
125 | if (mm->context.asce_limit < limit) | |
126 | goto repeat; | |
127 | update_mm(mm, current); | |
128 | return 0; | |
129 | } | |
130 | ||
131 | void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) | |
132 | { | |
133 | pgd_t *pgd; | |
134 | ||
135 | if (mm->context.asce_limit <= limit) | |
136 | return; | |
137 | __tlb_flush_mm(mm); | |
138 | while (mm->context.asce_limit > limit) { | |
139 | pgd = mm->pgd; | |
140 | switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) { | |
141 | case _REGION_ENTRY_TYPE_R2: | |
142 | mm->context.asce_limit = 1UL << 42; | |
143 | mm->context.asce_bits = _ASCE_TABLE_LENGTH | | |
144 | _ASCE_USER_BITS | | |
145 | _ASCE_TYPE_REGION3; | |
146 | break; | |
147 | case _REGION_ENTRY_TYPE_R3: | |
148 | mm->context.asce_limit = 1UL << 31; | |
149 | mm->context.asce_bits = _ASCE_TABLE_LENGTH | | |
150 | _ASCE_USER_BITS | | |
151 | _ASCE_TYPE_SEGMENT; | |
152 | break; | |
153 | default: | |
154 | BUG(); | |
155 | } | |
156 | mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); | |
157 | crst_table_free(mm, (unsigned long *) pgd); | |
158 | } | |
159 | update_mm(mm, current); | |
160 | } | |
161 | #endif | |
162 | ||
3610cce8 MS |
163 | /* |
164 | * page table entry allocation/free routines. | |
165 | */ | |
146e4b3c | 166 | unsigned long *page_table_alloc(struct mm_struct *mm) |
3610cce8 | 167 | { |
146e4b3c | 168 | struct page *page; |
3610cce8 | 169 | unsigned long *table; |
146e4b3c | 170 | unsigned long bits; |
3610cce8 | 171 | |
402b0862 | 172 | bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL; |
146e4b3c MS |
173 | spin_lock(&mm->page_table_lock); |
174 | page = NULL; | |
175 | if (!list_empty(&mm->context.pgtable_list)) { | |
176 | page = list_first_entry(&mm->context.pgtable_list, | |
177 | struct page, lru); | |
178 | if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1)) | |
179 | page = NULL; | |
180 | } | |
181 | if (!page) { | |
182 | spin_unlock(&mm->page_table_lock); | |
183 | page = alloc_page(GFP_KERNEL|__GFP_REPEAT); | |
184 | if (!page) | |
3610cce8 | 185 | return NULL; |
146e4b3c MS |
186 | pgtable_page_ctor(page); |
187 | page->flags &= ~FRAG_MASK; | |
188 | table = (unsigned long *) page_to_phys(page); | |
402b0862 CO |
189 | if (mm->context.pgstes) |
190 | clear_table_pgstes(table); | |
191 | else | |
192 | clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); | |
146e4b3c MS |
193 | spin_lock(&mm->page_table_lock); |
194 | list_add(&page->lru, &mm->context.pgtable_list); | |
3610cce8 MS |
195 | } |
196 | table = (unsigned long *) page_to_phys(page); | |
146e4b3c MS |
197 | while (page->flags & bits) { |
198 | table += 256; | |
199 | bits <<= 1; | |
200 | } | |
201 | page->flags |= bits; | |
202 | if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1)) | |
203 | list_move_tail(&page->lru, &mm->context.pgtable_list); | |
204 | spin_unlock(&mm->page_table_lock); | |
3610cce8 MS |
205 | return table; |
206 | } | |
207 | ||
146e4b3c | 208 | void page_table_free(struct mm_struct *mm, unsigned long *table) |
3610cce8 | 209 | { |
146e4b3c MS |
210 | struct page *page; |
211 | unsigned long bits; | |
3610cce8 | 212 | |
402b0862 | 213 | bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL; |
146e4b3c MS |
214 | bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); |
215 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | |
216 | spin_lock(&mm->page_table_lock); | |
217 | page->flags ^= bits; | |
218 | if (page->flags & FRAG_MASK) { | |
219 | /* Page now has some free pgtable fragments. */ | |
220 | list_move(&page->lru, &mm->context.pgtable_list); | |
221 | page = NULL; | |
222 | } else | |
223 | /* All fragments of the 4K page have been freed. */ | |
224 | list_del(&page->lru); | |
225 | spin_unlock(&mm->page_table_lock); | |
226 | if (page) { | |
227 | pgtable_page_dtor(page); | |
228 | __free_page(page); | |
229 | } | |
230 | } | |
3610cce8 | 231 | |
146e4b3c MS |
232 | void disable_noexec(struct mm_struct *mm, struct task_struct *tsk) |
233 | { | |
234 | struct page *page; | |
235 | ||
236 | spin_lock(&mm->page_table_lock); | |
237 | /* Free shadow region and segment tables. */ | |
238 | list_for_each_entry(page, &mm->context.crst_list, lru) | |
239 | if (page->index) { | |
240 | free_pages((unsigned long) page->index, ALLOC_ORDER); | |
241 | page->index = 0; | |
242 | } | |
243 | /* "Free" second halves of page tables. */ | |
244 | list_for_each_entry(page, &mm->context.pgtable_list, lru) | |
245 | page->flags &= ~SECOND_HALVES; | |
246 | spin_unlock(&mm->page_table_lock); | |
247 | mm->context.noexec = 0; | |
248 | update_mm(mm, tsk); | |
3610cce8 | 249 | } |
402b0862 CO |
250 | |
251 | /* | |
252 | * switch on pgstes for its userspace process (for kvm) | |
253 | */ | |
254 | int s390_enable_sie(void) | |
255 | { | |
256 | struct task_struct *tsk = current; | |
257 | struct mm_struct *mm; | |
258 | int rc; | |
259 | ||
260 | task_lock(tsk); | |
261 | ||
262 | rc = 0; | |
263 | if (tsk->mm->context.pgstes) | |
264 | goto unlock; | |
265 | ||
266 | rc = -EINVAL; | |
267 | if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || | |
268 | tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) | |
269 | goto unlock; | |
270 | ||
271 | tsk->mm->context.pgstes = 1; /* dirty little tricks .. */ | |
272 | mm = dup_mm(tsk); | |
273 | tsk->mm->context.pgstes = 0; | |
274 | ||
275 | rc = -ENOMEM; | |
276 | if (!mm) | |
277 | goto unlock; | |
278 | mmput(tsk->mm); | |
279 | tsk->mm = tsk->active_mm = mm; | |
280 | preempt_disable(); | |
281 | update_mm(mm, tsk); | |
282 | cpu_set(smp_processor_id(), mm->cpu_vm_mask); | |
283 | preempt_enable(); | |
284 | rc = 0; | |
285 | unlock: | |
286 | task_unlock(tsk); | |
287 | return rc; | |
288 | } | |
289 | EXPORT_SYMBOL_GPL(s390_enable_sie); |