Root/
1 | /* |
2 | * TLB support routines. |
3 | * |
4 | * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co |
5 | * David Mosberger-Tang <davidm@hpl.hp.com> |
6 | * |
7 | * 08/02/00 A. Mallick <asit.k.mallick@intel.com> |
8 | * Modified RID allocation for SMP |
9 | * Goutham Rao <goutham.rao@intel.com> |
10 | * IPI based ptc implementation and A-step IPI implementation. |
11 | * Rohit Seth <rohit.seth@intel.com> |
12 | * Ken Chen <kenneth.w.chen@intel.com> |
13 | * Christophe de Dinechin <ddd@hp.com>: Avoid ptc.e on memory allocation |
14 | * Copyright (C) 2007 Intel Corp |
15 | * Fenghua Yu <fenghua.yu@intel.com> |
16 | * Add multiple ptc.g/ptc.ga instruction support in global tlb purge. |
17 | */ |
18 | #include <linux/module.h> |
19 | #include <linux/init.h> |
20 | #include <linux/kernel.h> |
21 | #include <linux/sched.h> |
22 | #include <linux/smp.h> |
23 | #include <linux/mm.h> |
24 | #include <linux/bootmem.h> |
25 | #include <linux/slab.h> |
26 | |
27 | #include <asm/delay.h> |
28 | #include <asm/mmu_context.h> |
29 | #include <asm/pgalloc.h> |
30 | #include <asm/pal.h> |
31 | #include <asm/tlbflush.h> |
32 | #include <asm/dma.h> |
33 | #include <asm/processor.h> |
34 | #include <asm/sal.h> |
35 | #include <asm/tlb.h> |
36 | |
37 | static struct { |
38 | u64 mask; /* mask of supported purge page-sizes */ |
39 | unsigned long max_bits; /* log2 of largest supported purge page-size */ |
40 | } purge; |
41 | |
42 | struct ia64_ctx ia64_ctx = { |
43 | .lock = __SPIN_LOCK_UNLOCKED(ia64_ctx.lock), |
44 | .next = 1, |
45 | .max_ctx = ~0U |
46 | }; |
47 | |
48 | DEFINE_PER_CPU(u8, ia64_need_tlb_flush); |
49 | DEFINE_PER_CPU(u8, ia64_tr_num); /*Number of TR slots in current processor*/ |
50 | DEFINE_PER_CPU(u8, ia64_tr_used); /*Max Slot number used by kernel*/ |
51 | |
52 | struct ia64_tr_entry *ia64_idtrs[NR_CPUS]; |
53 | |
54 | /* |
55 | * Initializes the ia64_ctx.bitmap array based on max_ctx+1. |
56 | * Called after cpu_init() has setup ia64_ctx.max_ctx based on |
57 | * maximum RID that is supported by boot CPU. |
58 | */ |
59 | void __init |
60 | mmu_context_init (void) |
61 | { |
62 | ia64_ctx.bitmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3); |
63 | ia64_ctx.flushmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3); |
64 | } |
65 | |
66 | /* |
67 | * Acquire the ia64_ctx.lock before calling this function! |
68 | */ |
69 | void |
70 | wrap_mmu_context (struct mm_struct *mm) |
71 | { |
72 | int i, cpu; |
73 | unsigned long flush_bit; |
74 | |
75 | for (i=0; i <= ia64_ctx.max_ctx / BITS_PER_LONG; i++) { |
76 | flush_bit = xchg(&ia64_ctx.flushmap[i], 0); |
77 | ia64_ctx.bitmap[i] ^= flush_bit; |
78 | } |
79 | |
80 | /* use offset at 300 to skip daemons */ |
81 | ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap, |
82 | ia64_ctx.max_ctx, 300); |
83 | ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap, |
84 | ia64_ctx.max_ctx, ia64_ctx.next); |
85 | |
86 | /* |
87 | * can't call flush_tlb_all() here because of race condition |
88 | * with O(1) scheduler [EF] |
89 | */ |
90 | cpu = get_cpu(); /* prevent preemption/migration */ |
91 | for_each_online_cpu(i) |
92 | if (i != cpu) |
93 | per_cpu(ia64_need_tlb_flush, i) = 1; |
94 | put_cpu(); |
95 | local_flush_tlb_all(); |
96 | } |
97 | |
98 | /* |
99 | * Implement "spinaphores" ... like counting semaphores, but they |
100 | * spin instead of sleeping. If there are ever any other users for |
101 | * this primitive it can be moved up to a spinaphore.h header. |
102 | */ |
103 | struct spinaphore { |
104 | unsigned long ticket; |
105 | unsigned long serve; |
106 | }; |
107 | |
108 | static inline void spinaphore_init(struct spinaphore *ss, int val) |
109 | { |
110 | ss->ticket = 0; |
111 | ss->serve = val; |
112 | } |
113 | |
114 | static inline void down_spin(struct spinaphore *ss) |
115 | { |
116 | unsigned long t = ia64_fetchadd(1, &ss->ticket, acq), serve; |
117 | |
118 | if (time_before(t, ss->serve)) |
119 | return; |
120 | |
121 | ia64_invala(); |
122 | |
123 | for (;;) { |
124 | asm volatile ("ld8.c.nc %0=[%1]" : "=r"(serve) : "r"(&ss->serve) : "memory"); |
125 | if (time_before(t, serve)) |
126 | return; |
127 | cpu_relax(); |
128 | } |
129 | } |
130 | |
131 | static inline void up_spin(struct spinaphore *ss) |
132 | { |
133 | ia64_fetchadd(1, &ss->serve, rel); |
134 | } |
135 | |
136 | static struct spinaphore ptcg_sem; |
137 | static u16 nptcg = 1; |
138 | static int need_ptcg_sem = 1; |
139 | static int toolatetochangeptcgsem = 0; |
140 | |
141 | /* |
142 | * Kernel parameter "nptcg=" overrides max number of concurrent global TLB |
143 | * purges which is reported from either PAL or SAL PALO. |
144 | * |
145 | * We don't have sanity checking for nptcg value. It's the user's responsibility |
146 | * for valid nptcg value on the platform. Otherwise, kernel may hang in some |
147 | * cases. |
148 | */ |
149 | static int __init |
150 | set_nptcg(char *str) |
151 | { |
152 | int value = 0; |
153 | |
154 | get_option(&str, &value); |
155 | setup_ptcg_sem(value, NPTCG_FROM_KERNEL_PARAMETER); |
156 | |
157 | return 1; |
158 | } |
159 | |
160 | __setup("nptcg=", set_nptcg); |
161 | |
162 | /* |
163 | * Maximum number of simultaneous ptc.g purges in the system can |
164 | * be defined by PAL_VM_SUMMARY (in which case we should take |
165 | * the smallest value for any cpu in the system) or by the PAL |
166 | * override table (in which case we should ignore the value from |
167 | * PAL_VM_SUMMARY). |
168 | * |
169 | * Kernel parameter "nptcg=" overrides maximum number of simultanesous ptc.g |
170 | * purges defined in either PAL_VM_SUMMARY or PAL override table. In this case, |
171 | * we should ignore the value from either PAL_VM_SUMMARY or PAL override table. |
172 | * |
173 | * Complicating the logic here is the fact that num_possible_cpus() |
174 | * isn't fully setup until we start bringing cpus online. |
175 | */ |
176 | void |
177 | setup_ptcg_sem(int max_purges, int nptcg_from) |
178 | { |
179 | static int kp_override; |
180 | static int palo_override; |
181 | static int firstcpu = 1; |
182 | |
183 | if (toolatetochangeptcgsem) { |
184 | if (nptcg_from == NPTCG_FROM_PAL && max_purges == 0) |
185 | BUG_ON(1 < nptcg); |
186 | else |
187 | BUG_ON(max_purges < nptcg); |
188 | return; |
189 | } |
190 | |
191 | if (nptcg_from == NPTCG_FROM_KERNEL_PARAMETER) { |
192 | kp_override = 1; |
193 | nptcg = max_purges; |
194 | goto resetsema; |
195 | } |
196 | if (kp_override) { |
197 | need_ptcg_sem = num_possible_cpus() > nptcg; |
198 | return; |
199 | } |
200 | |
201 | if (nptcg_from == NPTCG_FROM_PALO) { |
202 | palo_override = 1; |
203 | |
204 | /* In PALO max_purges == 0 really means it! */ |
205 | if (max_purges == 0) |
206 | panic("Whoa! Platform does not support global TLB purges.\n"); |
207 | nptcg = max_purges; |
208 | if (nptcg == PALO_MAX_TLB_PURGES) { |
209 | need_ptcg_sem = 0; |
210 | return; |
211 | } |
212 | goto resetsema; |
213 | } |
214 | if (palo_override) { |
215 | if (nptcg != PALO_MAX_TLB_PURGES) |
216 | need_ptcg_sem = (num_possible_cpus() > nptcg); |
217 | return; |
218 | } |
219 | |
220 | /* In PAL_VM_SUMMARY max_purges == 0 actually means 1 */ |
221 | if (max_purges == 0) max_purges = 1; |
222 | |
223 | if (firstcpu) { |
224 | nptcg = max_purges; |
225 | firstcpu = 0; |
226 | } |
227 | if (max_purges < nptcg) |
228 | nptcg = max_purges; |
229 | if (nptcg == PAL_MAX_PURGES) { |
230 | need_ptcg_sem = 0; |
231 | return; |
232 | } else |
233 | need_ptcg_sem = (num_possible_cpus() > nptcg); |
234 | |
235 | resetsema: |
236 | spinaphore_init(&ptcg_sem, max_purges); |
237 | } |
238 | |
239 | void |
240 | ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start, |
241 | unsigned long end, unsigned long nbits) |
242 | { |
243 | struct mm_struct *active_mm = current->active_mm; |
244 | |
245 | toolatetochangeptcgsem = 1; |
246 | |
247 | if (mm != active_mm) { |
248 | /* Restore region IDs for mm */ |
249 | if (mm && active_mm) { |
250 | activate_context(mm); |
251 | } else { |
252 | flush_tlb_all(); |
253 | return; |
254 | } |
255 | } |
256 | |
257 | if (need_ptcg_sem) |
258 | down_spin(&ptcg_sem); |
259 | |
260 | do { |
261 | /* |
262 | * Flush ALAT entries also. |
263 | */ |
264 | ia64_ptcga(start, (nbits << 2)); |
265 | ia64_srlz_i(); |
266 | start += (1UL << nbits); |
267 | } while (start < end); |
268 | |
269 | if (need_ptcg_sem) |
270 | up_spin(&ptcg_sem); |
271 | |
272 | if (mm != active_mm) { |
273 | activate_context(active_mm); |
274 | } |
275 | } |
276 | |
277 | void |
278 | local_flush_tlb_all (void) |
279 | { |
280 | unsigned long i, j, flags, count0, count1, stride0, stride1, addr; |
281 | |
282 | addr = local_cpu_data->ptce_base; |
283 | count0 = local_cpu_data->ptce_count[0]; |
284 | count1 = local_cpu_data->ptce_count[1]; |
285 | stride0 = local_cpu_data->ptce_stride[0]; |
286 | stride1 = local_cpu_data->ptce_stride[1]; |
287 | |
288 | local_irq_save(flags); |
289 | for (i = 0; i < count0; ++i) { |
290 | for (j = 0; j < count1; ++j) { |
291 | ia64_ptce(addr); |
292 | addr += stride1; |
293 | } |
294 | addr += stride0; |
295 | } |
296 | local_irq_restore(flags); |
297 | ia64_srlz_i(); /* srlz.i implies srlz.d */ |
298 | } |
299 | |
300 | void |
301 | flush_tlb_range (struct vm_area_struct *vma, unsigned long start, |
302 | unsigned long end) |
303 | { |
304 | struct mm_struct *mm = vma->vm_mm; |
305 | unsigned long size = end - start; |
306 | unsigned long nbits; |
307 | |
308 | #ifndef CONFIG_SMP |
309 | if (mm != current->active_mm) { |
310 | mm->context = 0; |
311 | return; |
312 | } |
313 | #endif |
314 | |
315 | nbits = ia64_fls(size + 0xfff); |
316 | while (unlikely (((1UL << nbits) & purge.mask) == 0) && |
317 | (nbits < purge.max_bits)) |
318 | ++nbits; |
319 | if (nbits > purge.max_bits) |
320 | nbits = purge.max_bits; |
321 | start &= ~((1UL << nbits) - 1); |
322 | |
323 | preempt_disable(); |
324 | #ifdef CONFIG_SMP |
325 | if (mm != current->active_mm || cpumask_weight(mm_cpumask(mm)) != 1) { |
326 | platform_global_tlb_purge(mm, start, end, nbits); |
327 | preempt_enable(); |
328 | return; |
329 | } |
330 | #endif |
331 | do { |
332 | ia64_ptcl(start, (nbits<<2)); |
333 | start += (1UL << nbits); |
334 | } while (start < end); |
335 | preempt_enable(); |
336 | ia64_srlz_i(); /* srlz.i implies srlz.d */ |
337 | } |
338 | EXPORT_SYMBOL(flush_tlb_range); |
339 | |
340 | void __devinit |
341 | ia64_tlb_init (void) |
342 | { |
343 | ia64_ptce_info_t uninitialized_var(ptce_info); /* GCC be quiet */ |
344 | u64 tr_pgbits; |
345 | long status; |
346 | pal_vm_info_1_u_t vm_info_1; |
347 | pal_vm_info_2_u_t vm_info_2; |
348 | int cpu = smp_processor_id(); |
349 | |
350 | if ((status = ia64_pal_vm_page_size(&tr_pgbits, &purge.mask)) != 0) { |
351 | printk(KERN_ERR "PAL_VM_PAGE_SIZE failed with status=%ld; " |
352 | "defaulting to architected purge page-sizes.\n", status); |
353 | purge.mask = 0x115557000UL; |
354 | } |
355 | purge.max_bits = ia64_fls(purge.mask); |
356 | |
357 | ia64_get_ptce(&ptce_info); |
358 | local_cpu_data->ptce_base = ptce_info.base; |
359 | local_cpu_data->ptce_count[0] = ptce_info.count[0]; |
360 | local_cpu_data->ptce_count[1] = ptce_info.count[1]; |
361 | local_cpu_data->ptce_stride[0] = ptce_info.stride[0]; |
362 | local_cpu_data->ptce_stride[1] = ptce_info.stride[1]; |
363 | |
364 | local_flush_tlb_all(); /* nuke left overs from bootstrapping... */ |
365 | status = ia64_pal_vm_summary(&vm_info_1, &vm_info_2); |
366 | |
367 | if (status) { |
368 | printk(KERN_ERR "ia64_pal_vm_summary=%ld\n", status); |
369 | per_cpu(ia64_tr_num, cpu) = 8; |
370 | return; |
371 | } |
372 | per_cpu(ia64_tr_num, cpu) = vm_info_1.pal_vm_info_1_s.max_itr_entry+1; |
373 | if (per_cpu(ia64_tr_num, cpu) > |
374 | (vm_info_1.pal_vm_info_1_s.max_dtr_entry+1)) |
375 | per_cpu(ia64_tr_num, cpu) = |
376 | vm_info_1.pal_vm_info_1_s.max_dtr_entry+1; |
377 | if (per_cpu(ia64_tr_num, cpu) > IA64_TR_ALLOC_MAX) { |
378 | static int justonce = 1; |
379 | per_cpu(ia64_tr_num, cpu) = IA64_TR_ALLOC_MAX; |
380 | if (justonce) { |
381 | justonce = 0; |
382 | printk(KERN_DEBUG "TR register number exceeds " |
383 | "IA64_TR_ALLOC_MAX!\n"); |
384 | } |
385 | } |
386 | } |
387 | |
388 | /* |
389 | * is_tr_overlap |
390 | * |
391 | * Check overlap with inserted TRs. |
392 | */ |
393 | static int is_tr_overlap(struct ia64_tr_entry *p, u64 va, u64 log_size) |
394 | { |
395 | u64 tr_log_size; |
396 | u64 tr_end; |
397 | u64 va_rr = ia64_get_rr(va); |
398 | u64 va_rid = RR_TO_RID(va_rr); |
399 | u64 va_end = va + (1<<log_size) - 1; |
400 | |
401 | if (va_rid != RR_TO_RID(p->rr)) |
402 | return 0; |
403 | tr_log_size = (p->itir & 0xff) >> 2; |
404 | tr_end = p->ifa + (1<<tr_log_size) - 1; |
405 | |
406 | if (va > tr_end || p->ifa > va_end) |
407 | return 0; |
408 | return 1; |
409 | |
410 | } |
411 | |
412 | /* |
413 | * ia64_insert_tr in virtual mode. Allocate a TR slot |
414 | * |
415 | * target_mask : 0x1 : itr, 0x2 : dtr, 0x3 : idtr |
416 | * |
417 | * va : virtual address. |
418 | * pte : pte entries inserted. |
419 | * log_size: range to be covered. |
420 | * |
421 | * Return value: <0 : error No. |
422 | * |
423 | * >=0 : slot number allocated for TR. |
424 | * Must be called with preemption disabled. |
425 | */ |
426 | int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size) |
427 | { |
428 | int i, r; |
429 | unsigned long psr; |
430 | struct ia64_tr_entry *p; |
431 | int cpu = smp_processor_id(); |
432 | |
433 | if (!ia64_idtrs[cpu]) { |
434 | ia64_idtrs[cpu] = kmalloc(2 * IA64_TR_ALLOC_MAX * |
435 | sizeof (struct ia64_tr_entry), GFP_KERNEL); |
436 | if (!ia64_idtrs[cpu]) |
437 | return -ENOMEM; |
438 | } |
439 | r = -EINVAL; |
440 | /*Check overlap with existing TR entries*/ |
441 | if (target_mask & 0x1) { |
442 | p = ia64_idtrs[cpu]; |
443 | for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu); |
444 | i++, p++) { |
445 | if (p->pte & 0x1) |
446 | if (is_tr_overlap(p, va, log_size)) { |
447 | printk(KERN_DEBUG "Overlapped Entry" |
448 | "Inserted for TR Reigster!!\n"); |
449 | goto out; |
450 | } |
451 | } |
452 | } |
453 | if (target_mask & 0x2) { |
454 | p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX; |
455 | for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu); |
456 | i++, p++) { |
457 | if (p->pte & 0x1) |
458 | if (is_tr_overlap(p, va, log_size)) { |
459 | printk(KERN_DEBUG "Overlapped Entry" |
460 | "Inserted for TR Reigster!!\n"); |
461 | goto out; |
462 | } |
463 | } |
464 | } |
465 | |
466 | for (i = IA64_TR_ALLOC_BASE; i < per_cpu(ia64_tr_num, cpu); i++) { |
467 | switch (target_mask & 0x3) { |
468 | case 1: |
469 | if (!((ia64_idtrs[cpu] + i)->pte & 0x1)) |
470 | goto found; |
471 | continue; |
472 | case 2: |
473 | if (!((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1)) |
474 | goto found; |
475 | continue; |
476 | case 3: |
477 | if (!((ia64_idtrs[cpu] + i)->pte & 0x1) && |
478 | !((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1)) |
479 | goto found; |
480 | continue; |
481 | default: |
482 | r = -EINVAL; |
483 | goto out; |
484 | } |
485 | } |
486 | found: |
487 | if (i >= per_cpu(ia64_tr_num, cpu)) |
488 | return -EBUSY; |
489 | |
490 | /*Record tr info for mca hander use!*/ |
491 | if (i > per_cpu(ia64_tr_used, cpu)) |
492 | per_cpu(ia64_tr_used, cpu) = i; |
493 | |
494 | psr = ia64_clear_ic(); |
495 | if (target_mask & 0x1) { |
496 | ia64_itr(0x1, i, va, pte, log_size); |
497 | ia64_srlz_i(); |
498 | p = ia64_idtrs[cpu] + i; |
499 | p->ifa = va; |
500 | p->pte = pte; |
501 | p->itir = log_size << 2; |
502 | p->rr = ia64_get_rr(va); |
503 | } |
504 | if (target_mask & 0x2) { |
505 | ia64_itr(0x2, i, va, pte, log_size); |
506 | ia64_srlz_i(); |
507 | p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i; |
508 | p->ifa = va; |
509 | p->pte = pte; |
510 | p->itir = log_size << 2; |
511 | p->rr = ia64_get_rr(va); |
512 | } |
513 | ia64_set_psr(psr); |
514 | r = i; |
515 | out: |
516 | return r; |
517 | } |
518 | EXPORT_SYMBOL_GPL(ia64_itr_entry); |
519 | |
520 | /* |
521 | * ia64_purge_tr |
522 | * |
523 | * target_mask: 0x1: purge itr, 0x2 : purge dtr, 0x3 purge idtr. |
524 | * slot: slot number to be freed. |
525 | * |
526 | * Must be called with preemption disabled. |
527 | */ |
528 | void ia64_ptr_entry(u64 target_mask, int slot) |
529 | { |
530 | int cpu = smp_processor_id(); |
531 | int i; |
532 | struct ia64_tr_entry *p; |
533 | |
534 | if (slot < IA64_TR_ALLOC_BASE || slot >= per_cpu(ia64_tr_num, cpu)) |
535 | return; |
536 | |
537 | if (target_mask & 0x1) { |
538 | p = ia64_idtrs[cpu] + slot; |
539 | if ((p->pte&0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) { |
540 | p->pte = 0; |
541 | ia64_ptr(0x1, p->ifa, p->itir>>2); |
542 | ia64_srlz_i(); |
543 | } |
544 | } |
545 | |
546 | if (target_mask & 0x2) { |
547 | p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + slot; |
548 | if ((p->pte & 0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) { |
549 | p->pte = 0; |
550 | ia64_ptr(0x2, p->ifa, p->itir>>2); |
551 | ia64_srlz_i(); |
552 | } |
553 | } |
554 | |
555 | for (i = per_cpu(ia64_tr_used, cpu); i >= IA64_TR_ALLOC_BASE; i--) { |
556 | if (((ia64_idtrs[cpu] + i)->pte & 0x1) || |
557 | ((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1)) |
558 | break; |
559 | } |
560 | per_cpu(ia64_tr_used, cpu) = i; |
561 | } |
562 | EXPORT_SYMBOL_GPL(ia64_ptr_entry); |
563 |
Branches:
ben-wpan
ben-wpan-stefan
javiroman/ks7010
jz-2.6.34
jz-2.6.34-rc5
jz-2.6.34-rc6
jz-2.6.34-rc7
jz-2.6.35
jz-2.6.36
jz-2.6.37
jz-2.6.38
jz-2.6.39
jz-3.0
jz-3.1
jz-3.11
jz-3.12
jz-3.13
jz-3.15
jz-3.16
jz-3.18-dt
jz-3.2
jz-3.3
jz-3.4
jz-3.5
jz-3.6
jz-3.6-rc2-pwm
jz-3.9
jz-3.9-clk
jz-3.9-rc8
jz47xx
jz47xx-2.6.38
master
Tags:
od-2011-09-04
od-2011-09-18
v2.6.34-rc5
v2.6.34-rc6
v2.6.34-rc7
v3.9