Root/
1 | /* arch/sparc64/mm/tsb.c |
2 | * |
3 | * Copyright (C) 2006, 2008 David S. Miller <davem@davemloft.net> |
4 | */ |
5 | |
6 | #include <linux/kernel.h> |
7 | #include <linux/preempt.h> |
8 | #include <linux/slab.h> |
9 | #include <asm/system.h> |
10 | #include <asm/page.h> |
11 | #include <asm/tlbflush.h> |
12 | #include <asm/tlb.h> |
13 | #include <asm/mmu_context.h> |
14 | #include <asm/pgtable.h> |
15 | #include <asm/tsb.h> |
16 | #include <asm/oplib.h> |
17 | |
18 | extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES]; |
19 | |
20 | static inline unsigned long tsb_hash(unsigned long vaddr, unsigned long hash_shift, unsigned long nentries) |
21 | { |
22 | vaddr >>= hash_shift; |
23 | return vaddr & (nentries - 1); |
24 | } |
25 | |
26 | static inline int tag_compare(unsigned long tag, unsigned long vaddr) |
27 | { |
28 | return (tag == (vaddr >> 22)); |
29 | } |
30 | |
31 | /* TSB flushes need only occur on the processor initiating the address |
32 | * space modification, not on each cpu the address space has run on. |
33 | * Only the TLB flush needs that treatment. |
34 | */ |
35 | |
36 | void flush_tsb_kernel_range(unsigned long start, unsigned long end) |
37 | { |
38 | unsigned long v; |
39 | |
40 | for (v = start; v < end; v += PAGE_SIZE) { |
41 | unsigned long hash = tsb_hash(v, PAGE_SHIFT, |
42 | KERNEL_TSB_NENTRIES); |
43 | struct tsb *ent = &swapper_tsb[hash]; |
44 | |
45 | if (tag_compare(ent->tag, v)) |
46 | ent->tag = (1UL << TSB_TAG_INVALID_BIT); |
47 | } |
48 | } |
49 | |
50 | static void __flush_tsb_one(struct mmu_gather *mp, unsigned long hash_shift, unsigned long tsb, unsigned long nentries) |
51 | { |
52 | unsigned long i; |
53 | |
54 | for (i = 0; i < mp->tlb_nr; i++) { |
55 | unsigned long v = mp->vaddrs[i]; |
56 | unsigned long tag, ent, hash; |
57 | |
58 | v &= ~0x1UL; |
59 | |
60 | hash = tsb_hash(v, hash_shift, nentries); |
61 | ent = tsb + (hash * sizeof(struct tsb)); |
62 | tag = (v >> 22UL); |
63 | |
64 | tsb_flush(ent, tag); |
65 | } |
66 | } |
67 | |
68 | void flush_tsb_user(struct mmu_gather *mp) |
69 | { |
70 | struct mm_struct *mm = mp->mm; |
71 | unsigned long nentries, base, flags; |
72 | |
73 | spin_lock_irqsave(&mm->context.lock, flags); |
74 | |
75 | base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb; |
76 | nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries; |
77 | if (tlb_type == cheetah_plus || tlb_type == hypervisor) |
78 | base = __pa(base); |
79 | __flush_tsb_one(mp, PAGE_SHIFT, base, nentries); |
80 | |
81 | #ifdef CONFIG_HUGETLB_PAGE |
82 | if (mm->context.tsb_block[MM_TSB_HUGE].tsb) { |
83 | base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb; |
84 | nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries; |
85 | if (tlb_type == cheetah_plus || tlb_type == hypervisor) |
86 | base = __pa(base); |
87 | __flush_tsb_one(mp, HPAGE_SHIFT, base, nentries); |
88 | } |
89 | #endif |
90 | spin_unlock_irqrestore(&mm->context.lock, flags); |
91 | } |
92 | |
93 | #if defined(CONFIG_SPARC64_PAGE_SIZE_8KB) |
94 | #define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_8K |
95 | #define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_8K |
96 | #elif defined(CONFIG_SPARC64_PAGE_SIZE_64KB) |
97 | #define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_64K |
98 | #define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_64K |
99 | #else |
100 | #error Broken base page size setting... |
101 | #endif |
102 | |
103 | #ifdef CONFIG_HUGETLB_PAGE |
104 | #if defined(CONFIG_HUGETLB_PAGE_SIZE_64K) |
105 | #define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_64K |
106 | #define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_64K |
107 | #elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K) |
108 | #define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_512K |
109 | #define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_512K |
110 | #elif defined(CONFIG_HUGETLB_PAGE_SIZE_4MB) |
111 | #define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_4MB |
112 | #define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_4MB |
113 | #else |
114 | #error Broken huge page size setting... |
115 | #endif |
116 | #endif |
117 | |
118 | static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsigned long tsb_bytes) |
119 | { |
120 | unsigned long tsb_reg, base, tsb_paddr; |
121 | unsigned long page_sz, tte; |
122 | |
123 | mm->context.tsb_block[tsb_idx].tsb_nentries = |
124 | tsb_bytes / sizeof(struct tsb); |
125 | |
126 | base = TSBMAP_BASE; |
127 | tte = pgprot_val(PAGE_KERNEL_LOCKED); |
128 | tsb_paddr = __pa(mm->context.tsb_block[tsb_idx].tsb); |
129 | BUG_ON(tsb_paddr & (tsb_bytes - 1UL)); |
130 | |
131 | /* Use the smallest page size that can map the whole TSB |
132 | * in one TLB entry. |
133 | */ |
134 | switch (tsb_bytes) { |
135 | case 8192 << 0: |
136 | tsb_reg = 0x0UL; |
137 | #ifdef DCACHE_ALIASING_POSSIBLE |
138 | base += (tsb_paddr & 8192); |
139 | #endif |
140 | page_sz = 8192; |
141 | break; |
142 | |
143 | case 8192 << 1: |
144 | tsb_reg = 0x1UL; |
145 | page_sz = 64 * 1024; |
146 | break; |
147 | |
148 | case 8192 << 2: |
149 | tsb_reg = 0x2UL; |
150 | page_sz = 64 * 1024; |
151 | break; |
152 | |
153 | case 8192 << 3: |
154 | tsb_reg = 0x3UL; |
155 | page_sz = 64 * 1024; |
156 | break; |
157 | |
158 | case 8192 << 4: |
159 | tsb_reg = 0x4UL; |
160 | page_sz = 512 * 1024; |
161 | break; |
162 | |
163 | case 8192 << 5: |
164 | tsb_reg = 0x5UL; |
165 | page_sz = 512 * 1024; |
166 | break; |
167 | |
168 | case 8192 << 6: |
169 | tsb_reg = 0x6UL; |
170 | page_sz = 512 * 1024; |
171 | break; |
172 | |
173 | case 8192 << 7: |
174 | tsb_reg = 0x7UL; |
175 | page_sz = 4 * 1024 * 1024; |
176 | break; |
177 | |
178 | default: |
179 | printk(KERN_ERR "TSB[%s:%d]: Impossible TSB size %lu, killing process.\n", |
180 | current->comm, current->pid, tsb_bytes); |
181 | do_exit(SIGSEGV); |
182 | }; |
183 | tte |= pte_sz_bits(page_sz); |
184 | |
185 | if (tlb_type == cheetah_plus || tlb_type == hypervisor) { |
186 | /* Physical mapping, no locked TLB entry for TSB. */ |
187 | tsb_reg |= tsb_paddr; |
188 | |
189 | mm->context.tsb_block[tsb_idx].tsb_reg_val = tsb_reg; |
190 | mm->context.tsb_block[tsb_idx].tsb_map_vaddr = 0; |
191 | mm->context.tsb_block[tsb_idx].tsb_map_pte = 0; |
192 | } else { |
193 | tsb_reg |= base; |
194 | tsb_reg |= (tsb_paddr & (page_sz - 1UL)); |
195 | tte |= (tsb_paddr & ~(page_sz - 1UL)); |
196 | |
197 | mm->context.tsb_block[tsb_idx].tsb_reg_val = tsb_reg; |
198 | mm->context.tsb_block[tsb_idx].tsb_map_vaddr = base; |
199 | mm->context.tsb_block[tsb_idx].tsb_map_pte = tte; |
200 | } |
201 | |
202 | /* Setup the Hypervisor TSB descriptor. */ |
203 | if (tlb_type == hypervisor) { |
204 | struct hv_tsb_descr *hp = &mm->context.tsb_descr[tsb_idx]; |
205 | |
206 | switch (tsb_idx) { |
207 | case MM_TSB_BASE: |
208 | hp->pgsz_idx = HV_PGSZ_IDX_BASE; |
209 | break; |
210 | #ifdef CONFIG_HUGETLB_PAGE |
211 | case MM_TSB_HUGE: |
212 | hp->pgsz_idx = HV_PGSZ_IDX_HUGE; |
213 | break; |
214 | #endif |
215 | default: |
216 | BUG(); |
217 | }; |
218 | hp->assoc = 1; |
219 | hp->num_ttes = tsb_bytes / 16; |
220 | hp->ctx_idx = 0; |
221 | switch (tsb_idx) { |
222 | case MM_TSB_BASE: |
223 | hp->pgsz_mask = HV_PGSZ_MASK_BASE; |
224 | break; |
225 | #ifdef CONFIG_HUGETLB_PAGE |
226 | case MM_TSB_HUGE: |
227 | hp->pgsz_mask = HV_PGSZ_MASK_HUGE; |
228 | break; |
229 | #endif |
230 | default: |
231 | BUG(); |
232 | }; |
233 | hp->tsb_base = tsb_paddr; |
234 | hp->resv = 0; |
235 | } |
236 | } |
237 | |
238 | static struct kmem_cache *tsb_caches[8] __read_mostly; |
239 | |
240 | static const char *tsb_cache_names[8] = { |
241 | "tsb_8KB", |
242 | "tsb_16KB", |
243 | "tsb_32KB", |
244 | "tsb_64KB", |
245 | "tsb_128KB", |
246 | "tsb_256KB", |
247 | "tsb_512KB", |
248 | "tsb_1MB", |
249 | }; |
250 | |
251 | void __init pgtable_cache_init(void) |
252 | { |
253 | unsigned long i; |
254 | |
255 | for (i = 0; i < 8; i++) { |
256 | unsigned long size = 8192 << i; |
257 | const char *name = tsb_cache_names[i]; |
258 | |
259 | tsb_caches[i] = kmem_cache_create(name, |
260 | size, size, |
261 | 0, NULL); |
262 | if (!tsb_caches[i]) { |
263 | prom_printf("Could not create %s cache\n", name); |
264 | prom_halt(); |
265 | } |
266 | } |
267 | } |
268 | |
269 | int sysctl_tsb_ratio = -2; |
270 | |
271 | static unsigned long tsb_size_to_rss_limit(unsigned long new_size) |
272 | { |
273 | unsigned long num_ents = (new_size / sizeof(struct tsb)); |
274 | |
275 | if (sysctl_tsb_ratio < 0) |
276 | return num_ents - (num_ents >> -sysctl_tsb_ratio); |
277 | else |
278 | return num_ents + (num_ents >> sysctl_tsb_ratio); |
279 | } |
280 | |
281 | /* When the RSS of an address space exceeds tsb_rss_limit for a TSB, |
282 | * do_sparc64_fault() invokes this routine to try and grow it. |
283 | * |
284 | * When we reach the maximum TSB size supported, we stick ~0UL into |
285 | * tsb_rss_limit for that TSB so the grow checks in do_sparc64_fault() |
286 | * will not trigger any longer. |
287 | * |
288 | * The TSB can be anywhere from 8K to 1MB in size, in increasing powers |
289 | * of two. The TSB must be aligned to it's size, so f.e. a 512K TSB |
290 | * must be 512K aligned. It also must be physically contiguous, so we |
291 | * cannot use vmalloc(). |
292 | * |
293 | * The idea here is to grow the TSB when the RSS of the process approaches |
294 | * the number of entries that the current TSB can hold at once. Currently, |
295 | * we trigger when the RSS hits 3/4 of the TSB capacity. |
296 | */ |
297 | void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long rss) |
298 | { |
299 | unsigned long max_tsb_size = 1 * 1024 * 1024; |
300 | unsigned long new_size, old_size, flags; |
301 | struct tsb *old_tsb, *new_tsb; |
302 | unsigned long new_cache_index, old_cache_index; |
303 | unsigned long new_rss_limit; |
304 | gfp_t gfp_flags; |
305 | |
306 | if (max_tsb_size > (PAGE_SIZE << MAX_ORDER)) |
307 | max_tsb_size = (PAGE_SIZE << MAX_ORDER); |
308 | |
309 | new_cache_index = 0; |
310 | for (new_size = 8192; new_size < max_tsb_size; new_size <<= 1UL) { |
311 | new_rss_limit = tsb_size_to_rss_limit(new_size); |
312 | if (new_rss_limit > rss) |
313 | break; |
314 | new_cache_index++; |
315 | } |
316 | |
317 | if (new_size == max_tsb_size) |
318 | new_rss_limit = ~0UL; |
319 | |
320 | retry_tsb_alloc: |
321 | gfp_flags = GFP_KERNEL; |
322 | if (new_size > (PAGE_SIZE * 2)) |
323 | gfp_flags = __GFP_NOWARN | __GFP_NORETRY; |
324 | |
325 | new_tsb = kmem_cache_alloc_node(tsb_caches[new_cache_index], |
326 | gfp_flags, numa_node_id()); |
327 | if (unlikely(!new_tsb)) { |
328 | /* Not being able to fork due to a high-order TSB |
329 | * allocation failure is very bad behavior. Just back |
330 | * down to a 0-order allocation and force no TSB |
331 | * growing for this address space. |
332 | */ |
333 | if (mm->context.tsb_block[tsb_index].tsb == NULL && |
334 | new_cache_index > 0) { |
335 | new_cache_index = 0; |
336 | new_size = 8192; |
337 | new_rss_limit = ~0UL; |
338 | goto retry_tsb_alloc; |
339 | } |
340 | |
341 | /* If we failed on a TSB grow, we are under serious |
342 | * memory pressure so don't try to grow any more. |
343 | */ |
344 | if (mm->context.tsb_block[tsb_index].tsb != NULL) |
345 | mm->context.tsb_block[tsb_index].tsb_rss_limit = ~0UL; |
346 | return; |
347 | } |
348 | |
349 | /* Mark all tags as invalid. */ |
350 | tsb_init(new_tsb, new_size); |
351 | |
352 | /* Ok, we are about to commit the changes. If we are |
353 | * growing an existing TSB the locking is very tricky, |
354 | * so WATCH OUT! |
355 | * |
356 | * We have to hold mm->context.lock while committing to the |
357 | * new TSB, this synchronizes us with processors in |
358 | * flush_tsb_user() and switch_mm() for this address space. |
359 | * |
360 | * But even with that lock held, processors run asynchronously |
361 | * accessing the old TSB via TLB miss handling. This is OK |
362 | * because those actions are just propagating state from the |
363 | * Linux page tables into the TSB, page table mappings are not |
364 | * being changed. If a real fault occurs, the processor will |
365 | * synchronize with us when it hits flush_tsb_user(), this is |
366 | * also true for the case where vmscan is modifying the page |
367 | * tables. The only thing we need to be careful with is to |
368 | * skip any locked TSB entries during copy_tsb(). |
369 | * |
370 | * When we finish committing to the new TSB, we have to drop |
371 | * the lock and ask all other cpus running this address space |
372 | * to run tsb_context_switch() to see the new TSB table. |
373 | */ |
374 | spin_lock_irqsave(&mm->context.lock, flags); |
375 | |
376 | old_tsb = mm->context.tsb_block[tsb_index].tsb; |
377 | old_cache_index = |
378 | (mm->context.tsb_block[tsb_index].tsb_reg_val & 0x7UL); |
379 | old_size = (mm->context.tsb_block[tsb_index].tsb_nentries * |
380 | sizeof(struct tsb)); |
381 | |
382 | |
383 | /* Handle multiple threads trying to grow the TSB at the same time. |
384 | * One will get in here first, and bump the size and the RSS limit. |
385 | * The others will get in here next and hit this check. |
386 | */ |
387 | if (unlikely(old_tsb && |
388 | (rss < mm->context.tsb_block[tsb_index].tsb_rss_limit))) { |
389 | spin_unlock_irqrestore(&mm->context.lock, flags); |
390 | |
391 | kmem_cache_free(tsb_caches[new_cache_index], new_tsb); |
392 | return; |
393 | } |
394 | |
395 | mm->context.tsb_block[tsb_index].tsb_rss_limit = new_rss_limit; |
396 | |
397 | if (old_tsb) { |
398 | extern void copy_tsb(unsigned long old_tsb_base, |
399 | unsigned long old_tsb_size, |
400 | unsigned long new_tsb_base, |
401 | unsigned long new_tsb_size); |
402 | unsigned long old_tsb_base = (unsigned long) old_tsb; |
403 | unsigned long new_tsb_base = (unsigned long) new_tsb; |
404 | |
405 | if (tlb_type == cheetah_plus || tlb_type == hypervisor) { |
406 | old_tsb_base = __pa(old_tsb_base); |
407 | new_tsb_base = __pa(new_tsb_base); |
408 | } |
409 | copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size); |
410 | } |
411 | |
412 | mm->context.tsb_block[tsb_index].tsb = new_tsb; |
413 | setup_tsb_params(mm, tsb_index, new_size); |
414 | |
415 | spin_unlock_irqrestore(&mm->context.lock, flags); |
416 | |
417 | /* If old_tsb is NULL, we're being invoked for the first time |
418 | * from init_new_context(). |
419 | */ |
420 | if (old_tsb) { |
421 | /* Reload it on the local cpu. */ |
422 | tsb_context_switch(mm); |
423 | |
424 | /* Now force other processors to do the same. */ |
425 | preempt_disable(); |
426 | smp_tsb_sync(mm); |
427 | preempt_enable(); |
428 | |
429 | /* Now it is safe to free the old tsb. */ |
430 | kmem_cache_free(tsb_caches[old_cache_index], old_tsb); |
431 | } |
432 | } |
433 | |
434 | int init_new_context(struct task_struct *tsk, struct mm_struct *mm) |
435 | { |
436 | #ifdef CONFIG_HUGETLB_PAGE |
437 | unsigned long huge_pte_count; |
438 | #endif |
439 | unsigned int i; |
440 | |
441 | spin_lock_init(&mm->context.lock); |
442 | |
443 | mm->context.sparc64_ctx_val = 0UL; |
444 | |
445 | #ifdef CONFIG_HUGETLB_PAGE |
446 | /* We reset it to zero because the fork() page copying |
447 | * will re-increment the counters as the parent PTEs are |
448 | * copied into the child address space. |
449 | */ |
450 | huge_pte_count = mm->context.huge_pte_count; |
451 | mm->context.huge_pte_count = 0; |
452 | #endif |
453 | |
454 | /* copy_mm() copies over the parent's mm_struct before calling |
455 | * us, so we need to zero out the TSB pointer or else tsb_grow() |
456 | * will be confused and think there is an older TSB to free up. |
457 | */ |
458 | for (i = 0; i < MM_NUM_TSBS; i++) |
459 | mm->context.tsb_block[i].tsb = NULL; |
460 | |
461 | /* If this is fork, inherit the parent's TSB size. We would |
462 | * grow it to that size on the first page fault anyways. |
463 | */ |
464 | tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm)); |
465 | |
466 | #ifdef CONFIG_HUGETLB_PAGE |
467 | if (unlikely(huge_pte_count)) |
468 | tsb_grow(mm, MM_TSB_HUGE, huge_pte_count); |
469 | #endif |
470 | |
471 | if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb)) |
472 | return -ENOMEM; |
473 | |
474 | return 0; |
475 | } |
476 | |
477 | static void tsb_destroy_one(struct tsb_config *tp) |
478 | { |
479 | unsigned long cache_index; |
480 | |
481 | if (!tp->tsb) |
482 | return; |
483 | cache_index = tp->tsb_reg_val & 0x7UL; |
484 | kmem_cache_free(tsb_caches[cache_index], tp->tsb); |
485 | tp->tsb = NULL; |
486 | tp->tsb_reg_val = 0UL; |
487 | } |
488 | |
489 | void destroy_context(struct mm_struct *mm) |
490 | { |
491 | unsigned long flags, i; |
492 | |
493 | for (i = 0; i < MM_NUM_TSBS; i++) |
494 | tsb_destroy_one(&mm->context.tsb_block[i]); |
495 | |
496 | spin_lock_irqsave(&ctx_alloc_lock, flags); |
497 | |
498 | if (CTX_VALID(mm->context)) { |
499 | unsigned long nr = CTX_NRBITS(mm->context); |
500 | mmu_context_bmap[nr>>6] &= ~(1UL << (nr & 63)); |
501 | } |
502 | |
503 | spin_unlock_irqrestore(&ctx_alloc_lock, flags); |
504 | } |
505 |
Branches:
ben-wpan
ben-wpan-stefan
javiroman/ks7010
jz-2.6.34
jz-2.6.34-rc5
jz-2.6.34-rc6
jz-2.6.34-rc7
jz-2.6.35
jz-2.6.36
jz-2.6.37
jz-2.6.38
jz-2.6.39
jz-3.0
jz-3.1
jz-3.11
jz-3.12
jz-3.13
jz-3.15
jz-3.16
jz-3.18-dt
jz-3.2
jz-3.3
jz-3.4
jz-3.5
jz-3.6
jz-3.6-rc2-pwm
jz-3.9
jz-3.9-clk
jz-3.9-rc8
jz47xx
jz47xx-2.6.38
master
Tags:
od-2011-09-04
od-2011-09-18
v2.6.34-rc5
v2.6.34-rc6
v2.6.34-rc7
v3.9