Root/arch/mips/mm/tlbex.c

Source at commit 694c7fbe86b8a9c91392e505afcb9fcfc91deccc created 12 years 8 months ago.
By Maarten ter Huurne, MIPS: JZ4740: Add cpufreq support
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Synthesize TLB refill handlers at runtime.
7 *
8 * Copyright (C) 2004, 2005, 2006, 2008 Thiemo Seufer
9 * Copyright (C) 2005, 2007, 2008, 2009 Maciej W. Rozycki
10 * Copyright (C) 2006 Ralf Baechle (ralf@linux-mips.org)
11 * Copyright (C) 2008, 2009 Cavium Networks, Inc.
12 * Copyright (C) 2011 MIPS Technologies, Inc.
13 *
14 * ... and the days got worse and worse and now you see
15 * I've gone completly out of my mind.
16 *
17 * They're coming to take me a away haha
18 * they're coming to take me a away hoho hihi haha
19 * to the funny farm where code is beautiful all the time ...
20 *
21 * (Condolences to Napoleon XIV)
22 */
23
24#include <linux/bug.h>
25#include <linux/kernel.h>
26#include <linux/types.h>
27#include <linux/smp.h>
28#include <linux/string.h>
29#include <linux/init.h>
30#include <linux/cache.h>
31
32#include <asm/cacheflush.h>
33#include <asm/cpu-type.h>
34#include <asm/pgtable.h>
35#include <asm/war.h>
36#include <asm/uasm.h>
37#include <asm/setup.h>
38
39/*
40 * TLB load/store/modify handlers.
41 *
42 * Only the fastpath gets synthesized at runtime, the slowpath for
43 * do_page_fault remains normal asm.
44 */
45extern void tlb_do_page_fault_0(void);
46extern void tlb_do_page_fault_1(void);
47
48struct work_registers {
49    int r1;
50    int r2;
51    int r3;
52};
53
54struct tlb_reg_save {
55    unsigned long a;
56    unsigned long b;
57} ____cacheline_aligned_in_smp;
58
59static struct tlb_reg_save handler_reg_save[NR_CPUS];
60
61static inline int r45k_bvahwbug(void)
62{
63    /* XXX: We should probe for the presence of this bug, but we don't. */
64    return 0;
65}
66
67static inline int r4k_250MHZhwbug(void)
68{
69    /* XXX: We should probe for the presence of this bug, but we don't. */
70    return 0;
71}
72
73static inline int __maybe_unused bcm1250_m3_war(void)
74{
75    return BCM1250_M3_WAR;
76}
77
78static inline int __maybe_unused r10000_llsc_war(void)
79{
80    return R10000_LLSC_WAR;
81}
82
83static int use_bbit_insns(void)
84{
85    switch (current_cpu_type()) {
86    case CPU_CAVIUM_OCTEON:
87    case CPU_CAVIUM_OCTEON_PLUS:
88    case CPU_CAVIUM_OCTEON2:
89    case CPU_CAVIUM_OCTEON3:
90        return 1;
91    default:
92        return 0;
93    }
94}
95
96static int use_lwx_insns(void)
97{
98    switch (current_cpu_type()) {
99    case CPU_CAVIUM_OCTEON2:
100    case CPU_CAVIUM_OCTEON3:
101        return 1;
102    default:
103        return 0;
104    }
105}
106#if defined(CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE) && \
107    CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE > 0
108static bool scratchpad_available(void)
109{
110    return true;
111}
112static int scratchpad_offset(int i)
113{
114    /*
115     * CVMSEG starts at address -32768 and extends for
116     * CAVIUM_OCTEON_CVMSEG_SIZE 128 byte cache lines.
117     */
118    i += 1; /* Kernel use starts at the top and works down. */
119    return CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE * 128 - (8 * i) - 32768;
120}
121#else
122static bool scratchpad_available(void)
123{
124    return false;
125}
126static int scratchpad_offset(int i)
127{
128    BUG();
129    /* Really unreachable, but evidently some GCC want this. */
130    return 0;
131}
132#endif
133/*
134 * Found by experiment: At least some revisions of the 4kc throw under
135 * some circumstances a machine check exception, triggered by invalid
136 * values in the index register. Delaying the tlbp instruction until
137 * after the next branch, plus adding an additional nop in front of
138 * tlbwi/tlbwr avoids the invalid index register values. Nobody knows
139 * why; it's not an issue caused by the core RTL.
140 *
141 */
142static int m4kc_tlbp_war(void)
143{
144    return (current_cpu_data.processor_id & 0xffff00) ==
145           (PRID_COMP_MIPS | PRID_IMP_4KC);
146}
147
148/* Handle labels (which must be positive integers). */
149enum label_id {
150    label_second_part = 1,
151    label_leave,
152    label_vmalloc,
153    label_vmalloc_done,
154    label_tlbw_hazard_0,
155    label_split = label_tlbw_hazard_0 + 8,
156    label_tlbl_goaround1,
157    label_tlbl_goaround2,
158    label_nopage_tlbl,
159    label_nopage_tlbs,
160    label_nopage_tlbm,
161    label_smp_pgtable_change,
162    label_r3000_write_probe_fail,
163    label_large_segbits_fault,
164#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
165    label_tlb_huge_update,
166#endif
167};
168
169UASM_L_LA(_second_part)
170UASM_L_LA(_leave)
171UASM_L_LA(_vmalloc)
172UASM_L_LA(_vmalloc_done)
173/* _tlbw_hazard_x is handled differently. */
174UASM_L_LA(_split)
175UASM_L_LA(_tlbl_goaround1)
176UASM_L_LA(_tlbl_goaround2)
177UASM_L_LA(_nopage_tlbl)
178UASM_L_LA(_nopage_tlbs)
179UASM_L_LA(_nopage_tlbm)
180UASM_L_LA(_smp_pgtable_change)
181UASM_L_LA(_r3000_write_probe_fail)
182UASM_L_LA(_large_segbits_fault)
183#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
184UASM_L_LA(_tlb_huge_update)
185#endif
186
187static int hazard_instance;
188
189static void uasm_bgezl_hazard(u32 **p, struct uasm_reloc **r, int instance)
190{
191    switch (instance) {
192    case 0 ... 7:
193        uasm_il_bgezl(p, r, 0, label_tlbw_hazard_0 + instance);
194        return;
195    default:
196        BUG();
197    }
198}
199
200static void uasm_bgezl_label(struct uasm_label **l, u32 **p, int instance)
201{
202    switch (instance) {
203    case 0 ... 7:
204        uasm_build_label(l, *p, label_tlbw_hazard_0 + instance);
205        break;
206    default:
207        BUG();
208    }
209}
210
211/*
212 * pgtable bits are assigned dynamically depending on processor feature
213 * and statically based on kernel configuration. This spits out the actual
214 * values the kernel is using. Required to make sense from disassembled
215 * TLB exception handlers.
216 */
217static void output_pgtable_bits_defines(void)
218{
219#define pr_define(fmt, ...) \
220    pr_debug("#define " fmt, ##__VA_ARGS__)
221
222    pr_debug("#include <asm/asm.h>\n");
223    pr_debug("#include <asm/regdef.h>\n");
224    pr_debug("\n");
225
226    pr_define("_PAGE_PRESENT_SHIFT %d\n", _PAGE_PRESENT_SHIFT);
227    pr_define("_PAGE_READ_SHIFT %d\n", _PAGE_READ_SHIFT);
228    pr_define("_PAGE_WRITE_SHIFT %d\n", _PAGE_WRITE_SHIFT);
229    pr_define("_PAGE_ACCESSED_SHIFT %d\n", _PAGE_ACCESSED_SHIFT);
230    pr_define("_PAGE_MODIFIED_SHIFT %d\n", _PAGE_MODIFIED_SHIFT);
231#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
232    pr_define("_PAGE_HUGE_SHIFT %d\n", _PAGE_HUGE_SHIFT);
233    pr_define("_PAGE_SPLITTING_SHIFT %d\n", _PAGE_SPLITTING_SHIFT);
234#endif
235    if (cpu_has_rixi) {
236#ifdef _PAGE_NO_EXEC_SHIFT
237        pr_define("_PAGE_NO_EXEC_SHIFT %d\n", _PAGE_NO_EXEC_SHIFT);
238#endif
239#ifdef _PAGE_NO_READ_SHIFT
240        pr_define("_PAGE_NO_READ_SHIFT %d\n", _PAGE_NO_READ_SHIFT);
241#endif
242    }
243    pr_define("_PAGE_GLOBAL_SHIFT %d\n", _PAGE_GLOBAL_SHIFT);
244    pr_define("_PAGE_VALID_SHIFT %d\n", _PAGE_VALID_SHIFT);
245    pr_define("_PAGE_DIRTY_SHIFT %d\n", _PAGE_DIRTY_SHIFT);
246    pr_define("_PFN_SHIFT %d\n", _PFN_SHIFT);
247    pr_debug("\n");
248}
249
250static inline void dump_handler(const char *symbol, const u32 *handler, int count)
251{
252    int i;
253
254    pr_debug("LEAF(%s)\n", symbol);
255
256    pr_debug("\t.set push\n");
257    pr_debug("\t.set noreorder\n");
258
259    for (i = 0; i < count; i++)
260        pr_debug("\t.word\t0x%08x\t\t# %p\n", handler[i], &handler[i]);
261
262    pr_debug("\t.set\tpop\n");
263
264    pr_debug("\tEND(%s)\n", symbol);
265}
266
267/* The only general purpose registers allowed in TLB handlers. */
268#define K0 26
269#define K1 27
270
271/* Some CP0 registers */
272#define C0_INDEX 0, 0
273#define C0_ENTRYLO0 2, 0
274#define C0_TCBIND 2, 2
275#define C0_ENTRYLO1 3, 0
276#define C0_CONTEXT 4, 0
277#define C0_PAGEMASK 5, 0
278#define C0_BADVADDR 8, 0
279#define C0_ENTRYHI 10, 0
280#define C0_EPC 14, 0
281#define C0_XCONTEXT 20, 0
282
283#ifdef CONFIG_64BIT
284# define GET_CONTEXT(buf, reg) UASM_i_MFC0(buf, reg, C0_XCONTEXT)
285#else
286# define GET_CONTEXT(buf, reg) UASM_i_MFC0(buf, reg, C0_CONTEXT)
287#endif
288
289/* The worst case length of the handler is around 18 instructions for
290 * R3000-style TLBs and up to 63 instructions for R4000-style TLBs.
291 * Maximum space available is 32 instructions for R3000 and 64
292 * instructions for R4000.
293 *
294 * We deliberately chose a buffer size of 128, so we won't scribble
295 * over anything important on overflow before we panic.
296 */
297static u32 tlb_handler[128];
298
299/* simply assume worst case size for labels and relocs */
300static struct uasm_label labels[128];
301static struct uasm_reloc relocs[128];
302
303static int check_for_high_segbits;
304
305static unsigned int kscratch_used_mask;
306
307static inline int __maybe_unused c0_kscratch(void)
308{
309    switch (current_cpu_type()) {
310    case CPU_XLP:
311    case CPU_XLR:
312        return 22;
313    default:
314        return 31;
315    }
316}
317
318static int allocate_kscratch(void)
319{
320    int r;
321    unsigned int a = cpu_data[0].kscratch_mask & ~kscratch_used_mask;
322
323    r = ffs(a);
324
325    if (r == 0)
326        return -1;
327
328    r--; /* make it zero based */
329
330    kscratch_used_mask |= (1 << r);
331
332    return r;
333}
334
335static int scratch_reg;
336static int pgd_reg;
337enum vmalloc64_mode {not_refill, refill_scratch, refill_noscratch};
338
339static struct work_registers build_get_work_registers(u32 **p)
340{
341    struct work_registers r;
342
343    if (scratch_reg >= 0) {
344        /* Save in CPU local C0_KScratch? */
345        UASM_i_MTC0(p, 1, c0_kscratch(), scratch_reg);
346        r.r1 = K0;
347        r.r2 = K1;
348        r.r3 = 1;
349        return r;
350    }
351
352    if (num_possible_cpus() > 1) {
353        /* Get smp_processor_id */
354        UASM_i_CPUID_MFC0(p, K0, SMP_CPUID_REG);
355        UASM_i_SRL_SAFE(p, K0, K0, SMP_CPUID_REGSHIFT);
356
357        /* handler_reg_save index in K0 */
358        UASM_i_SLL(p, K0, K0, ilog2(sizeof(struct tlb_reg_save)));
359
360        UASM_i_LA(p, K1, (long)&handler_reg_save);
361        UASM_i_ADDU(p, K0, K0, K1);
362    } else {
363        UASM_i_LA(p, K0, (long)&handler_reg_save);
364    }
365    /* K0 now points to save area, save $1 and $2 */
366    UASM_i_SW(p, 1, offsetof(struct tlb_reg_save, a), K0);
367    UASM_i_SW(p, 2, offsetof(struct tlb_reg_save, b), K0);
368
369    r.r1 = K1;
370    r.r2 = 1;
371    r.r3 = 2;
372    return r;
373}
374
375static void build_restore_work_registers(u32 **p)
376{
377    if (scratch_reg >= 0) {
378        UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg);
379        return;
380    }
381    /* K0 already points to save area, restore $1 and $2 */
382    UASM_i_LW(p, 1, offsetof(struct tlb_reg_save, a), K0);
383    UASM_i_LW(p, 2, offsetof(struct tlb_reg_save, b), K0);
384}
385
386#ifndef CONFIG_MIPS_PGD_C0_CONTEXT
387
388/*
389 * CONFIG_MIPS_PGD_C0_CONTEXT implies 64 bit and lack of pgd_current,
390 * we cannot do r3000 under these circumstances.
391 *
392 * Declare pgd_current here instead of including mmu_context.h to avoid type
393 * conflicts for tlbmiss_handler_setup_pgd
394 */
395extern unsigned long pgd_current[];
396
397/*
398 * The R3000 TLB handler is simple.
399 */
400static void build_r3000_tlb_refill_handler(void)
401{
402    long pgdc = (long)pgd_current;
403    u32 *p;
404
405    memset(tlb_handler, 0, sizeof(tlb_handler));
406    p = tlb_handler;
407
408    uasm_i_mfc0(&p, K0, C0_BADVADDR);
409    uasm_i_lui(&p, K1, uasm_rel_hi(pgdc)); /* cp0 delay */
410    uasm_i_lw(&p, K1, uasm_rel_lo(pgdc), K1);
411    uasm_i_srl(&p, K0, K0, 22); /* load delay */
412    uasm_i_sll(&p, K0, K0, 2);
413    uasm_i_addu(&p, K1, K1, K0);
414    uasm_i_mfc0(&p, K0, C0_CONTEXT);
415    uasm_i_lw(&p, K1, 0, K1); /* cp0 delay */
416    uasm_i_andi(&p, K0, K0, 0xffc); /* load delay */
417    uasm_i_addu(&p, K1, K1, K0);
418    uasm_i_lw(&p, K0, 0, K1);
419    uasm_i_nop(&p); /* load delay */
420    uasm_i_mtc0(&p, K0, C0_ENTRYLO0);
421    uasm_i_mfc0(&p, K1, C0_EPC); /* cp0 delay */
422    uasm_i_tlbwr(&p); /* cp0 delay */
423    uasm_i_jr(&p, K1);
424    uasm_i_rfe(&p); /* branch delay */
425
426    if (p > tlb_handler + 32)
427        panic("TLB refill handler space exceeded");
428
429    pr_debug("Wrote TLB refill handler (%u instructions).\n",
430         (unsigned int)(p - tlb_handler));
431
432    memcpy((void *)ebase, tlb_handler, 0x80);
433
434    dump_handler("r3000_tlb_refill", (u32 *)ebase, 32);
435}
436#endif /* CONFIG_MIPS_PGD_C0_CONTEXT */
437
438/*
439 * The R4000 TLB handler is much more complicated. We have two
440 * consecutive handler areas with 32 instructions space each.
441 * Since they aren't used at the same time, we can overflow in the
442 * other one.To keep things simple, we first assume linear space,
443 * then we relocate it to the final handler layout as needed.
444 */
445static u32 final_handler[64];
446
447/*
448 * Hazards
449 *
450 * From the IDT errata for the QED RM5230 (Nevada), processor revision 1.0:
451 * 2. A timing hazard exists for the TLBP instruction.
452 *
453 * stalling_instruction
454 * TLBP
455 *
456 * The JTLB is being read for the TLBP throughout the stall generated by the
457 * previous instruction. This is not really correct as the stalling instruction
458 * can modify the address used to access the JTLB. The failure symptom is that
459 * the TLBP instruction will use an address created for the stalling instruction
460 * and not the address held in C0_ENHI and thus report the wrong results.
461 *
462 * The software work-around is to not allow the instruction preceding the TLBP
463 * to stall - make it an NOP or some other instruction guaranteed not to stall.
464 *
465 * Errata 2 will not be fixed. This errata is also on the R5000.
466 *
467 * As if we MIPS hackers wouldn't know how to nop pipelines happy ...
468 */
469static void __maybe_unused build_tlb_probe_entry(u32 **p)
470{
471    switch (current_cpu_type()) {
472    /* Found by experiment: R4600 v2.0/R4700 needs this, too. */
473    case CPU_R4600:
474    case CPU_R4700:
475    case CPU_R5000:
476    case CPU_NEVADA:
477        uasm_i_nop(p);
478        uasm_i_tlbp(p);
479        break;
480
481    default:
482        uasm_i_tlbp(p);
483        break;
484    }
485}
486
487/*
488 * Write random or indexed TLB entry, and care about the hazards from
489 * the preceding mtc0 and for the following eret.
490 */
491enum tlb_write_entry { tlb_random, tlb_indexed };
492
493static void build_tlb_write_entry(u32 **p, struct uasm_label **l,
494                  struct uasm_reloc **r,
495                  enum tlb_write_entry wmode)
496{
497    void(*tlbw)(u32 **) = NULL;
498
499    switch (wmode) {
500    case tlb_random: tlbw = uasm_i_tlbwr; break;
501    case tlb_indexed: tlbw = uasm_i_tlbwi; break;
502    }
503
504    if (cpu_has_mips_r2) {
505        /*
506         * The architecture spec says an ehb is required here,
507         * but a number of cores do not have the hazard and
508         * using an ehb causes an expensive pipeline stall.
509         */
510        switch (current_cpu_type()) {
511        case CPU_M14KC:
512        case CPU_74K:
513            break;
514
515        default:
516            uasm_i_ehb(p);
517            break;
518        }
519        tlbw(p);
520        return;
521    }
522
523    switch (current_cpu_type()) {
524    case CPU_R4000PC:
525    case CPU_R4000SC:
526    case CPU_R4000MC:
527    case CPU_R4400PC:
528    case CPU_R4400SC:
529    case CPU_R4400MC:
530        /*
531         * This branch uses up a mtc0 hazard nop slot and saves
532         * two nops after the tlbw instruction.
533         */
534        uasm_bgezl_hazard(p, r, hazard_instance);
535        tlbw(p);
536        uasm_bgezl_label(l, p, hazard_instance);
537        hazard_instance++;
538        uasm_i_nop(p);
539        break;
540
541    case CPU_R4600:
542    case CPU_R4700:
543        uasm_i_nop(p);
544        tlbw(p);
545        uasm_i_nop(p);
546        break;
547
548    case CPU_R5000:
549    case CPU_NEVADA:
550        uasm_i_nop(p); /* QED specifies 2 nops hazard */
551        uasm_i_nop(p); /* QED specifies 2 nops hazard */
552        tlbw(p);
553        break;
554
555    case CPU_R4300:
556    case CPU_5KC:
557    case CPU_TX49XX:
558    case CPU_PR4450:
559    case CPU_XLR:
560        uasm_i_nop(p);
561        tlbw(p);
562        break;
563
564    case CPU_R10000:
565    case CPU_R12000:
566    case CPU_R14000:
567    case CPU_4KC:
568    case CPU_4KEC:
569    case CPU_M14KC:
570    case CPU_M14KEC:
571    case CPU_SB1:
572    case CPU_SB1A:
573    case CPU_4KSC:
574    case CPU_20KC:
575    case CPU_25KF:
576    case CPU_BMIPS32:
577    case CPU_BMIPS3300:
578    case CPU_BMIPS4350:
579    case CPU_BMIPS4380:
580    case CPU_BMIPS5000:
581    case CPU_LOONGSON2:
582    case CPU_R5500:
583        if (m4kc_tlbp_war())
584            uasm_i_nop(p);
585    case CPU_ALCHEMY:
586        tlbw(p);
587        break;
588
589    case CPU_RM7000:
590        uasm_i_nop(p);
591        uasm_i_nop(p);
592        uasm_i_nop(p);
593        uasm_i_nop(p);
594        tlbw(p);
595        break;
596
597    case CPU_VR4111:
598    case CPU_VR4121:
599    case CPU_VR4122:
600    case CPU_VR4181:
601    case CPU_VR4181A:
602        uasm_i_nop(p);
603        uasm_i_nop(p);
604        tlbw(p);
605        uasm_i_nop(p);
606        uasm_i_nop(p);
607        break;
608
609    case CPU_VR4131:
610    case CPU_VR4133:
611    case CPU_R5432:
612        uasm_i_nop(p);
613        uasm_i_nop(p);
614        tlbw(p);
615        break;
616
617    case CPU_JZRISC:
618        tlbw(p);
619        uasm_i_nop(p);
620        break;
621
622    default:
623        panic("No TLB refill handler yet (CPU type: %d)",
624              current_cpu_data.cputype);
625        break;
626    }
627}
628
629static __maybe_unused void build_convert_pte_to_entrylo(u32 **p,
630                            unsigned int reg)
631{
632    if (cpu_has_rixi) {
633        UASM_i_ROTR(p, reg, reg, ilog2(_PAGE_GLOBAL));
634    } else {
635#ifdef CONFIG_64BIT_PHYS_ADDR
636        uasm_i_dsrl_safe(p, reg, reg, ilog2(_PAGE_GLOBAL));
637#else
638        UASM_i_SRL(p, reg, reg, ilog2(_PAGE_GLOBAL));
639#endif
640    }
641}
642
643#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
644
645static void build_restore_pagemask(u32 **p, struct uasm_reloc **r,
646                   unsigned int tmp, enum label_id lid,
647                   int restore_scratch)
648{
649    if (restore_scratch) {
650        /* Reset default page size */
651        if (PM_DEFAULT_MASK >> 16) {
652            uasm_i_lui(p, tmp, PM_DEFAULT_MASK >> 16);
653            uasm_i_ori(p, tmp, tmp, PM_DEFAULT_MASK & 0xffff);
654            uasm_i_mtc0(p, tmp, C0_PAGEMASK);
655            uasm_il_b(p, r, lid);
656        } else if (PM_DEFAULT_MASK) {
657            uasm_i_ori(p, tmp, 0, PM_DEFAULT_MASK);
658            uasm_i_mtc0(p, tmp, C0_PAGEMASK);
659            uasm_il_b(p, r, lid);
660        } else {
661            uasm_i_mtc0(p, 0, C0_PAGEMASK);
662            uasm_il_b(p, r, lid);
663        }
664        if (scratch_reg >= 0)
665            UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg);
666        else
667            UASM_i_LW(p, 1, scratchpad_offset(0), 0);
668    } else {
669        /* Reset default page size */
670        if (PM_DEFAULT_MASK >> 16) {
671            uasm_i_lui(p, tmp, PM_DEFAULT_MASK >> 16);
672            uasm_i_ori(p, tmp, tmp, PM_DEFAULT_MASK & 0xffff);
673            uasm_il_b(p, r, lid);
674            uasm_i_mtc0(p, tmp, C0_PAGEMASK);
675        } else if (PM_DEFAULT_MASK) {
676            uasm_i_ori(p, tmp, 0, PM_DEFAULT_MASK);
677            uasm_il_b(p, r, lid);
678            uasm_i_mtc0(p, tmp, C0_PAGEMASK);
679        } else {
680            uasm_il_b(p, r, lid);
681            uasm_i_mtc0(p, 0, C0_PAGEMASK);
682        }
683    }
684}
685
686static void build_huge_tlb_write_entry(u32 **p, struct uasm_label **l,
687                       struct uasm_reloc **r,
688                       unsigned int tmp,
689                       enum tlb_write_entry wmode,
690                       int restore_scratch)
691{
692    /* Set huge page tlb entry size */
693    uasm_i_lui(p, tmp, PM_HUGE_MASK >> 16);
694    uasm_i_ori(p, tmp, tmp, PM_HUGE_MASK & 0xffff);
695    uasm_i_mtc0(p, tmp, C0_PAGEMASK);
696
697    build_tlb_write_entry(p, l, r, wmode);
698
699    build_restore_pagemask(p, r, tmp, label_leave, restore_scratch);
700}
701
702/*
703 * Check if Huge PTE is present, if so then jump to LABEL.
704 */
705static void
706build_is_huge_pte(u32 **p, struct uasm_reloc **r, unsigned int tmp,
707          unsigned int pmd, int lid)
708{
709    UASM_i_LW(p, tmp, 0, pmd);
710    if (use_bbit_insns()) {
711        uasm_il_bbit1(p, r, tmp, ilog2(_PAGE_HUGE), lid);
712    } else {
713        uasm_i_andi(p, tmp, tmp, _PAGE_HUGE);
714        uasm_il_bnez(p, r, tmp, lid);
715    }
716}
717
718static void build_huge_update_entries(u32 **p, unsigned int pte,
719                      unsigned int tmp)
720{
721    int small_sequence;
722
723    /*
724     * A huge PTE describes an area the size of the
725     * configured huge page size. This is twice the
726     * of the large TLB entry size we intend to use.
727     * A TLB entry half the size of the configured
728     * huge page size is configured into entrylo0
729     * and entrylo1 to cover the contiguous huge PTE
730     * address space.
731     */
732    small_sequence = (HPAGE_SIZE >> 7) < 0x10000;
733
734    /* We can clobber tmp. It isn't used after this.*/
735    if (!small_sequence)
736        uasm_i_lui(p, tmp, HPAGE_SIZE >> (7 + 16));
737
738    build_convert_pte_to_entrylo(p, pte);
739    UASM_i_MTC0(p, pte, C0_ENTRYLO0); /* load it */
740    /* convert to entrylo1 */
741    if (small_sequence)
742        UASM_i_ADDIU(p, pte, pte, HPAGE_SIZE >> 7);
743    else
744        UASM_i_ADDU(p, pte, pte, tmp);
745
746    UASM_i_MTC0(p, pte, C0_ENTRYLO1); /* load it */
747}
748
749static void build_huge_handler_tail(u32 **p, struct uasm_reloc **r,
750                    struct uasm_label **l,
751                    unsigned int pte,
752                    unsigned int ptr)
753{
754#ifdef CONFIG_SMP
755    UASM_i_SC(p, pte, 0, ptr);
756    uasm_il_beqz(p, r, pte, label_tlb_huge_update);
757    UASM_i_LW(p, pte, 0, ptr); /* Needed because SC killed our PTE */
758#else
759    UASM_i_SW(p, pte, 0, ptr);
760#endif
761    build_huge_update_entries(p, pte, ptr);
762    build_huge_tlb_write_entry(p, l, r, pte, tlb_indexed, 0);
763}
764#endif /* CONFIG_MIPS_HUGE_TLB_SUPPORT */
765
766#ifdef CONFIG_64BIT
767/*
768 * TMP and PTR are scratch.
769 * TMP will be clobbered, PTR will hold the pmd entry.
770 */
771static void
772build_get_pmde64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,
773         unsigned int tmp, unsigned int ptr)
774{
775#ifndef CONFIG_MIPS_PGD_C0_CONTEXT
776    long pgdc = (long)pgd_current;
777#endif
778    /*
779     * The vmalloc handling is not in the hotpath.
780     */
781    uasm_i_dmfc0(p, tmp, C0_BADVADDR);
782
783    if (check_for_high_segbits) {
784        /*
785         * The kernel currently implicitely assumes that the
786         * MIPS SEGBITS parameter for the processor is
787         * (PGDIR_SHIFT+PGDIR_BITS) or less, and will never
788         * allocate virtual addresses outside the maximum
789         * range for SEGBITS = (PGDIR_SHIFT+PGDIR_BITS). But
790         * that doesn't prevent user code from accessing the
791         * higher xuseg addresses. Here, we make sure that
792         * everything but the lower xuseg addresses goes down
793         * the module_alloc/vmalloc path.
794         */
795        uasm_i_dsrl_safe(p, ptr, tmp, PGDIR_SHIFT + PGD_ORDER + PAGE_SHIFT - 3);
796        uasm_il_bnez(p, r, ptr, label_vmalloc);
797    } else {
798        uasm_il_bltz(p, r, tmp, label_vmalloc);
799    }
800    /* No uasm_i_nop needed here, since the next insn doesn't touch TMP. */
801
802    if (pgd_reg != -1) {
803        /* pgd is in pgd_reg */
804        UASM_i_MFC0(p, ptr, c0_kscratch(), pgd_reg);
805    } else {
806#if defined(CONFIG_MIPS_PGD_C0_CONTEXT)
807        /*
808         * &pgd << 11 stored in CONTEXT [23..63].
809         */
810        UASM_i_MFC0(p, ptr, C0_CONTEXT);
811
812        /* Clear lower 23 bits of context. */
813        uasm_i_dins(p, ptr, 0, 0, 23);
814
815        /* 1 0 1 0 1 << 6 xkphys cached */
816        uasm_i_ori(p, ptr, ptr, 0x540);
817        uasm_i_drotr(p, ptr, ptr, 11);
818#elif defined(CONFIG_SMP)
819        UASM_i_CPUID_MFC0(p, ptr, SMP_CPUID_REG);
820        uasm_i_dsrl_safe(p, ptr, ptr, SMP_CPUID_PTRSHIFT);
821        UASM_i_LA_mostly(p, tmp, pgdc);
822        uasm_i_daddu(p, ptr, ptr, tmp);
823        uasm_i_dmfc0(p, tmp, C0_BADVADDR);
824        uasm_i_ld(p, ptr, uasm_rel_lo(pgdc), ptr);
825#else
826        UASM_i_LA_mostly(p, ptr, pgdc);
827        uasm_i_ld(p, ptr, uasm_rel_lo(pgdc), ptr);
828#endif
829    }
830
831    uasm_l_vmalloc_done(l, *p);
832
833    /* get pgd offset in bytes */
834    uasm_i_dsrl_safe(p, tmp, tmp, PGDIR_SHIFT - 3);
835
836    uasm_i_andi(p, tmp, tmp, (PTRS_PER_PGD - 1)<<3);
837    uasm_i_daddu(p, ptr, ptr, tmp); /* add in pgd offset */
838#ifndef __PAGETABLE_PMD_FOLDED
839    uasm_i_dmfc0(p, tmp, C0_BADVADDR); /* get faulting address */
840    uasm_i_ld(p, ptr, 0, ptr); /* get pmd pointer */
841    uasm_i_dsrl_safe(p, tmp, tmp, PMD_SHIFT-3); /* get pmd offset in bytes */
842    uasm_i_andi(p, tmp, tmp, (PTRS_PER_PMD - 1)<<3);
843    uasm_i_daddu(p, ptr, ptr, tmp); /* add in pmd offset */
844#endif
845}
846
847/*
848 * BVADDR is the faulting address, PTR is scratch.
849 * PTR will hold the pgd for vmalloc.
850 */
851static void
852build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,
853            unsigned int bvaddr, unsigned int ptr,
854            enum vmalloc64_mode mode)
855{
856    long swpd = (long)swapper_pg_dir;
857    int single_insn_swpd;
858    int did_vmalloc_branch = 0;
859
860    single_insn_swpd = uasm_in_compat_space_p(swpd) && !uasm_rel_lo(swpd);
861
862    uasm_l_vmalloc(l, *p);
863
864    if (mode != not_refill && check_for_high_segbits) {
865        if (single_insn_swpd) {
866            uasm_il_bltz(p, r, bvaddr, label_vmalloc_done);
867            uasm_i_lui(p, ptr, uasm_rel_hi(swpd));
868            did_vmalloc_branch = 1;
869            /* fall through */
870        } else {
871            uasm_il_bgez(p, r, bvaddr, label_large_segbits_fault);
872        }
873    }
874    if (!did_vmalloc_branch) {
875        if (uasm_in_compat_space_p(swpd) && !uasm_rel_lo(swpd)) {
876            uasm_il_b(p, r, label_vmalloc_done);
877            uasm_i_lui(p, ptr, uasm_rel_hi(swpd));
878        } else {
879            UASM_i_LA_mostly(p, ptr, swpd);
880            uasm_il_b(p, r, label_vmalloc_done);
881            if (uasm_in_compat_space_p(swpd))
882                uasm_i_addiu(p, ptr, ptr, uasm_rel_lo(swpd));
883            else
884                uasm_i_daddiu(p, ptr, ptr, uasm_rel_lo(swpd));
885        }
886    }
887    if (mode != not_refill && check_for_high_segbits) {
888        uasm_l_large_segbits_fault(l, *p);
889        /*
890         * We get here if we are an xsseg address, or if we are
891         * an xuseg address above (PGDIR_SHIFT+PGDIR_BITS) boundary.
892         *
893         * Ignoring xsseg (assume disabled so would generate
894         * (address errors?), the only remaining possibility
895         * is the upper xuseg addresses. On processors with
896         * TLB_SEGBITS <= PGDIR_SHIFT+PGDIR_BITS, these
897         * addresses would have taken an address error. We try
898         * to mimic that here by taking a load/istream page
899         * fault.
900         */
901        UASM_i_LA(p, ptr, (unsigned long)tlb_do_page_fault_0);
902        uasm_i_jr(p, ptr);
903
904        if (mode == refill_scratch) {
905            if (scratch_reg >= 0)
906                UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg);
907            else
908                UASM_i_LW(p, 1, scratchpad_offset(0), 0);
909        } else {
910            uasm_i_nop(p);
911        }
912    }
913}
914
915#else /* !CONFIG_64BIT */
916
917/*
918 * TMP and PTR are scratch.
919 * TMP will be clobbered, PTR will hold the pgd entry.
920 */
921static void __maybe_unused
922build_get_pgde32(u32 **p, unsigned int tmp, unsigned int ptr)
923{
924    if (pgd_reg != -1) {
925        /* pgd is in pgd_reg */
926        uasm_i_mfc0(p, ptr, c0_kscratch(), pgd_reg);
927        uasm_i_mfc0(p, tmp, C0_BADVADDR); /* get faulting address */
928    } else {
929        long pgdc = (long)pgd_current;
930
931        /* 32 bit SMP has smp_processor_id() stored in CONTEXT. */
932#ifdef CONFIG_SMP
933        uasm_i_mfc0(p, ptr, SMP_CPUID_REG);
934        UASM_i_LA_mostly(p, tmp, pgdc);
935        uasm_i_srl(p, ptr, ptr, SMP_CPUID_PTRSHIFT);
936        uasm_i_addu(p, ptr, tmp, ptr);
937#else
938        UASM_i_LA_mostly(p, ptr, pgdc);
939#endif
940        uasm_i_mfc0(p, tmp, C0_BADVADDR); /* get faulting address */
941        uasm_i_lw(p, ptr, uasm_rel_lo(pgdc), ptr);
942    }
943    uasm_i_srl(p, tmp, tmp, PGDIR_SHIFT); /* get pgd only bits */
944    uasm_i_sll(p, tmp, tmp, PGD_T_LOG2);
945    uasm_i_addu(p, ptr, ptr, tmp); /* add in pgd offset */
946}
947
948#endif /* !CONFIG_64BIT */
949
950static void build_adjust_context(u32 **p, unsigned int ctx)
951{
952    unsigned int shift = 4 - (PTE_T_LOG2 + 1) + PAGE_SHIFT - 12;
953    unsigned int mask = (PTRS_PER_PTE / 2 - 1) << (PTE_T_LOG2 + 1);
954
955    switch (current_cpu_type()) {
956    case CPU_VR41XX:
957    case CPU_VR4111:
958    case CPU_VR4121:
959    case CPU_VR4122:
960    case CPU_VR4131:
961    case CPU_VR4181:
962    case CPU_VR4181A:
963    case CPU_VR4133:
964        shift += 2;
965        break;
966
967    default:
968        break;
969    }
970
971    if (shift)
972        UASM_i_SRL(p, ctx, ctx, shift);
973    uasm_i_andi(p, ctx, ctx, mask);
974}
975
976static void build_get_ptep(u32 **p, unsigned int tmp, unsigned int ptr)
977{
978    /*
979     * Bug workaround for the Nevada. It seems as if under certain
980     * circumstances the move from cp0_context might produce a
981     * bogus result when the mfc0 instruction and its consumer are
982     * in a different cacheline or a load instruction, probably any
983     * memory reference, is between them.
984     */
985    switch (current_cpu_type()) {
986    case CPU_NEVADA:
987        UASM_i_LW(p, ptr, 0, ptr);
988        GET_CONTEXT(p, tmp); /* get context reg */
989        break;
990
991    default:
992        GET_CONTEXT(p, tmp); /* get context reg */
993        UASM_i_LW(p, ptr, 0, ptr);
994        break;
995    }
996
997    build_adjust_context(p, tmp);
998    UASM_i_ADDU(p, ptr, ptr, tmp); /* add in offset */
999}
1000
1001static void build_update_entries(u32 **p, unsigned int tmp, unsigned int ptep)
1002{
1003    /*
1004     * 64bit address support (36bit on a 32bit CPU) in a 32bit
1005     * Kernel is a special case. Only a few CPUs use it.
1006     */
1007#ifdef CONFIG_64BIT_PHYS_ADDR
1008    if (cpu_has_64bits) {
1009        uasm_i_ld(p, tmp, 0, ptep); /* get even pte */
1010        uasm_i_ld(p, ptep, sizeof(pte_t), ptep); /* get odd pte */
1011        if (cpu_has_rixi) {
1012            UASM_i_ROTR(p, tmp, tmp, ilog2(_PAGE_GLOBAL));
1013            UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */
1014            UASM_i_ROTR(p, ptep, ptep, ilog2(_PAGE_GLOBAL));
1015        } else {
1016            uasm_i_dsrl_safe(p, tmp, tmp, ilog2(_PAGE_GLOBAL)); /* convert to entrylo0 */
1017            UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */
1018            uasm_i_dsrl_safe(p, ptep, ptep, ilog2(_PAGE_GLOBAL)); /* convert to entrylo1 */
1019        }
1020        UASM_i_MTC0(p, ptep, C0_ENTRYLO1); /* load it */
1021    } else {
1022        int pte_off_even = sizeof(pte_t) / 2;
1023        int pte_off_odd = pte_off_even + sizeof(pte_t);
1024
1025        /* The pte entries are pre-shifted */
1026        uasm_i_lw(p, tmp, pte_off_even, ptep); /* get even pte */
1027        UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */
1028        uasm_i_lw(p, ptep, pte_off_odd, ptep); /* get odd pte */
1029        UASM_i_MTC0(p, ptep, C0_ENTRYLO1); /* load it */
1030    }
1031#else
1032    UASM_i_LW(p, tmp, 0, ptep); /* get even pte */
1033    UASM_i_LW(p, ptep, sizeof(pte_t), ptep); /* get odd pte */
1034    if (r45k_bvahwbug())
1035        build_tlb_probe_entry(p);
1036    if (cpu_has_rixi) {
1037        UASM_i_ROTR(p, tmp, tmp, ilog2(_PAGE_GLOBAL));
1038        if (r4k_250MHZhwbug())
1039            UASM_i_MTC0(p, 0, C0_ENTRYLO0);
1040        UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */
1041        UASM_i_ROTR(p, ptep, ptep, ilog2(_PAGE_GLOBAL));
1042    } else {
1043        UASM_i_SRL(p, tmp, tmp, ilog2(_PAGE_GLOBAL)); /* convert to entrylo0 */
1044        if (r4k_250MHZhwbug())
1045            UASM_i_MTC0(p, 0, C0_ENTRYLO0);
1046        UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */
1047        UASM_i_SRL(p, ptep, ptep, ilog2(_PAGE_GLOBAL)); /* convert to entrylo1 */
1048        if (r45k_bvahwbug())
1049            uasm_i_mfc0(p, tmp, C0_INDEX);
1050    }
1051    if (r4k_250MHZhwbug())
1052        UASM_i_MTC0(p, 0, C0_ENTRYLO1);
1053    UASM_i_MTC0(p, ptep, C0_ENTRYLO1); /* load it */
1054#endif
1055}
1056
1057struct mips_huge_tlb_info {
1058    int huge_pte;
1059    int restore_scratch;
1060};
1061
1062static struct mips_huge_tlb_info
1063build_fast_tlb_refill_handler (u32 **p, struct uasm_label **l,
1064                   struct uasm_reloc **r, unsigned int tmp,
1065                   unsigned int ptr, int c0_scratch_reg)
1066{
1067    struct mips_huge_tlb_info rv;
1068    unsigned int even, odd;
1069    int vmalloc_branch_delay_filled = 0;
1070    const int scratch = 1; /* Our extra working register */
1071
1072    rv.huge_pte = scratch;
1073    rv.restore_scratch = 0;
1074
1075    if (check_for_high_segbits) {
1076        UASM_i_MFC0(p, tmp, C0_BADVADDR);
1077
1078        if (pgd_reg != -1)
1079            UASM_i_MFC0(p, ptr, c0_kscratch(), pgd_reg);
1080        else
1081            UASM_i_MFC0(p, ptr, C0_CONTEXT);
1082
1083        if (c0_scratch_reg >= 0)
1084            UASM_i_MTC0(p, scratch, c0_kscratch(), c0_scratch_reg);
1085        else
1086            UASM_i_SW(p, scratch, scratchpad_offset(0), 0);
1087
1088        uasm_i_dsrl_safe(p, scratch, tmp,
1089                 PGDIR_SHIFT + PGD_ORDER + PAGE_SHIFT - 3);
1090        uasm_il_bnez(p, r, scratch, label_vmalloc);
1091
1092        if (pgd_reg == -1) {
1093            vmalloc_branch_delay_filled = 1;
1094            /* Clear lower 23 bits of context. */
1095            uasm_i_dins(p, ptr, 0, 0, 23);
1096        }
1097    } else {
1098        if (pgd_reg != -1)
1099            UASM_i_MFC0(p, ptr, c0_kscratch(), pgd_reg);
1100        else
1101            UASM_i_MFC0(p, ptr, C0_CONTEXT);
1102
1103        UASM_i_MFC0(p, tmp, C0_BADVADDR);
1104
1105        if (c0_scratch_reg >= 0)
1106            UASM_i_MTC0(p, scratch, c0_kscratch(), c0_scratch_reg);
1107        else
1108            UASM_i_SW(p, scratch, scratchpad_offset(0), 0);
1109
1110        if (pgd_reg == -1)
1111            /* Clear lower 23 bits of context. */
1112            uasm_i_dins(p, ptr, 0, 0, 23);
1113
1114        uasm_il_bltz(p, r, tmp, label_vmalloc);
1115    }
1116
1117    if (pgd_reg == -1) {
1118        vmalloc_branch_delay_filled = 1;
1119        /* 1 0 1 0 1 << 6 xkphys cached */
1120        uasm_i_ori(p, ptr, ptr, 0x540);
1121        uasm_i_drotr(p, ptr, ptr, 11);
1122    }
1123
1124#ifdef __PAGETABLE_PMD_FOLDED
1125#define LOC_PTEP scratch
1126#else
1127#define LOC_PTEP ptr
1128#endif
1129
1130    if (!vmalloc_branch_delay_filled)
1131        /* get pgd offset in bytes */
1132        uasm_i_dsrl_safe(p, scratch, tmp, PGDIR_SHIFT - 3);
1133
1134    uasm_l_vmalloc_done(l, *p);
1135
1136    /*
1137     * tmp ptr
1138     * fall-through case = badvaddr *pgd_current
1139     * vmalloc case = badvaddr swapper_pg_dir
1140     */
1141
1142    if (vmalloc_branch_delay_filled)
1143        /* get pgd offset in bytes */
1144        uasm_i_dsrl_safe(p, scratch, tmp, PGDIR_SHIFT - 3);
1145
1146#ifdef __PAGETABLE_PMD_FOLDED
1147    GET_CONTEXT(p, tmp); /* get context reg */
1148#endif
1149    uasm_i_andi(p, scratch, scratch, (PTRS_PER_PGD - 1) << 3);
1150
1151    if (use_lwx_insns()) {
1152        UASM_i_LWX(p, LOC_PTEP, scratch, ptr);
1153    } else {
1154        uasm_i_daddu(p, ptr, ptr, scratch); /* add in pgd offset */
1155        uasm_i_ld(p, LOC_PTEP, 0, ptr); /* get pmd pointer */
1156    }
1157
1158#ifndef __PAGETABLE_PMD_FOLDED
1159    /* get pmd offset in bytes */
1160    uasm_i_dsrl_safe(p, scratch, tmp, PMD_SHIFT - 3);
1161    uasm_i_andi(p, scratch, scratch, (PTRS_PER_PMD - 1) << 3);
1162    GET_CONTEXT(p, tmp); /* get context reg */
1163
1164    if (use_lwx_insns()) {
1165        UASM_i_LWX(p, scratch, scratch, ptr);
1166    } else {
1167        uasm_i_daddu(p, ptr, ptr, scratch); /* add in pmd offset */
1168        UASM_i_LW(p, scratch, 0, ptr);
1169    }
1170#endif
1171    /* Adjust the context during the load latency. */
1172    build_adjust_context(p, tmp);
1173
1174#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
1175    uasm_il_bbit1(p, r, scratch, ilog2(_PAGE_HUGE), label_tlb_huge_update);
1176    /*
1177     * The in the LWX case we don't want to do the load in the
1178     * delay slot. It cannot issue in the same cycle and may be
1179     * speculative and unneeded.
1180     */
1181    if (use_lwx_insns())
1182        uasm_i_nop(p);
1183#endif /* CONFIG_MIPS_HUGE_TLB_SUPPORT */
1184
1185
1186    /* build_update_entries */
1187    if (use_lwx_insns()) {
1188        even = ptr;
1189        odd = tmp;
1190        UASM_i_LWX(p, even, scratch, tmp);
1191        UASM_i_ADDIU(p, tmp, tmp, sizeof(pte_t));
1192        UASM_i_LWX(p, odd, scratch, tmp);
1193    } else {
1194        UASM_i_ADDU(p, ptr, scratch, tmp); /* add in offset */
1195        even = tmp;
1196        odd = ptr;
1197        UASM_i_LW(p, even, 0, ptr); /* get even pte */
1198        UASM_i_LW(p, odd, sizeof(pte_t), ptr); /* get odd pte */
1199    }
1200    if (cpu_has_rixi) {
1201        uasm_i_drotr(p, even, even, ilog2(_PAGE_GLOBAL));
1202        UASM_i_MTC0(p, even, C0_ENTRYLO0); /* load it */
1203        uasm_i_drotr(p, odd, odd, ilog2(_PAGE_GLOBAL));
1204    } else {
1205        uasm_i_dsrl_safe(p, even, even, ilog2(_PAGE_GLOBAL));
1206        UASM_i_MTC0(p, even, C0_ENTRYLO0); /* load it */
1207        uasm_i_dsrl_safe(p, odd, odd, ilog2(_PAGE_GLOBAL));
1208    }
1209    UASM_i_MTC0(p, odd, C0_ENTRYLO1); /* load it */
1210
1211    if (c0_scratch_reg >= 0) {
1212        UASM_i_MFC0(p, scratch, c0_kscratch(), c0_scratch_reg);
1213        build_tlb_write_entry(p, l, r, tlb_random);
1214        uasm_l_leave(l, *p);
1215        rv.restore_scratch = 1;
1216    } else if (PAGE_SHIFT == 14 || PAGE_SHIFT == 13) {
1217        build_tlb_write_entry(p, l, r, tlb_random);
1218        uasm_l_leave(l, *p);
1219        UASM_i_LW(p, scratch, scratchpad_offset(0), 0);
1220    } else {
1221        UASM_i_LW(p, scratch, scratchpad_offset(0), 0);
1222        build_tlb_write_entry(p, l, r, tlb_random);
1223        uasm_l_leave(l, *p);
1224        rv.restore_scratch = 1;
1225    }
1226
1227    uasm_i_eret(p); /* return from trap */
1228
1229    return rv;
1230}
1231
1232/*
1233 * For a 64-bit kernel, we are using the 64-bit XTLB refill exception
1234 * because EXL == 0. If we wrap, we can also use the 32 instruction
1235 * slots before the XTLB refill exception handler which belong to the
1236 * unused TLB refill exception.
1237 */
1238#define MIPS64_REFILL_INSNS 32
1239
1240static void build_r4000_tlb_refill_handler(void)
1241{
1242    u32 *p = tlb_handler;
1243    struct uasm_label *l = labels;
1244    struct uasm_reloc *r = relocs;
1245    u32 *f;
1246    unsigned int final_len;
1247    struct mips_huge_tlb_info htlb_info __maybe_unused;
1248    enum vmalloc64_mode vmalloc_mode __maybe_unused;
1249
1250    memset(tlb_handler, 0, sizeof(tlb_handler));
1251    memset(labels, 0, sizeof(labels));
1252    memset(relocs, 0, sizeof(relocs));
1253    memset(final_handler, 0, sizeof(final_handler));
1254
1255    if ((scratch_reg >= 0 || scratchpad_available()) && use_bbit_insns()) {
1256        htlb_info = build_fast_tlb_refill_handler(&p, &l, &r, K0, K1,
1257                              scratch_reg);
1258        vmalloc_mode = refill_scratch;
1259    } else {
1260        htlb_info.huge_pte = K0;
1261        htlb_info.restore_scratch = 0;
1262        vmalloc_mode = refill_noscratch;
1263        /*
1264         * create the plain linear handler
1265         */
1266        if (bcm1250_m3_war()) {
1267            unsigned int segbits = 44;
1268
1269            uasm_i_dmfc0(&p, K0, C0_BADVADDR);
1270            uasm_i_dmfc0(&p, K1, C0_ENTRYHI);
1271            uasm_i_xor(&p, K0, K0, K1);
1272            uasm_i_dsrl_safe(&p, K1, K0, 62);
1273            uasm_i_dsrl_safe(&p, K0, K0, 12 + 1);
1274            uasm_i_dsll_safe(&p, K0, K0, 64 + 12 + 1 - segbits);
1275            uasm_i_or(&p, K0, K0, K1);
1276            uasm_il_bnez(&p, &r, K0, label_leave);
1277            /* No need for uasm_i_nop */
1278        }
1279
1280#ifdef CONFIG_64BIT
1281        build_get_pmde64(&p, &l, &r, K0, K1); /* get pmd in K1 */
1282#else
1283        build_get_pgde32(&p, K0, K1); /* get pgd in K1 */
1284#endif
1285
1286#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
1287        build_is_huge_pte(&p, &r, K0, K1, label_tlb_huge_update);
1288#endif
1289
1290        build_get_ptep(&p, K0, K1);
1291        build_update_entries(&p, K0, K1);
1292        build_tlb_write_entry(&p, &l, &r, tlb_random);
1293        uasm_l_leave(&l, p);
1294        uasm_i_eret(&p); /* return from trap */
1295    }
1296#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
1297    uasm_l_tlb_huge_update(&l, p);
1298    build_huge_update_entries(&p, htlb_info.huge_pte, K1);
1299    build_huge_tlb_write_entry(&p, &l, &r, K0, tlb_random,
1300                   htlb_info.restore_scratch);
1301#endif
1302
1303#ifdef CONFIG_64BIT
1304    build_get_pgd_vmalloc64(&p, &l, &r, K0, K1, vmalloc_mode);
1305#endif
1306
1307    /*
1308     * Overflow check: For the 64bit handler, we need at least one
1309     * free instruction slot for the wrap-around branch. In worst
1310     * case, if the intended insertion point is a delay slot, we
1311     * need three, with the second nop'ed and the third being
1312     * unused.
1313     */
1314    switch (boot_cpu_type()) {
1315    default:
1316        if (sizeof(long) == 4) {
1317    case CPU_LOONGSON2:
1318        /* Loongson2 ebase is different than r4k, we have more space */
1319            if ((p - tlb_handler) > 64)
1320                panic("TLB refill handler space exceeded");
1321            /*
1322             * Now fold the handler in the TLB refill handler space.
1323             */
1324            f = final_handler;
1325            /* Simplest case, just copy the handler. */
1326            uasm_copy_handler(relocs, labels, tlb_handler, p, f);
1327            final_len = p - tlb_handler;
1328            break;
1329        } else {
1330            if (((p - tlb_handler) > (MIPS64_REFILL_INSNS * 2) - 1)
1331                || (((p - tlb_handler) > (MIPS64_REFILL_INSNS * 2) - 3)
1332                && uasm_insn_has_bdelay(relocs,
1333                            tlb_handler + MIPS64_REFILL_INSNS - 3)))
1334                panic("TLB refill handler space exceeded");
1335            /*
1336             * Now fold the handler in the TLB refill handler space.
1337             */
1338            f = final_handler + MIPS64_REFILL_INSNS;
1339            if ((p - tlb_handler) <= MIPS64_REFILL_INSNS) {
1340                /* Just copy the handler. */
1341                uasm_copy_handler(relocs, labels, tlb_handler, p, f);
1342                final_len = p - tlb_handler;
1343            } else {
1344#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
1345                const enum label_id ls = label_tlb_huge_update;
1346#else
1347                const enum label_id ls = label_vmalloc;
1348#endif
1349                u32 *split;
1350                int ov = 0;
1351                int i;
1352
1353                for (i = 0; i < ARRAY_SIZE(labels) && labels[i].lab != ls; i++)
1354                    ;
1355                BUG_ON(i == ARRAY_SIZE(labels));
1356                split = labels[i].addr;
1357
1358                /*
1359                 * See if we have overflown one way or the other.
1360                 */
1361                if (split > tlb_handler + MIPS64_REFILL_INSNS ||
1362                    split < p - MIPS64_REFILL_INSNS)
1363                    ov = 1;
1364
1365                if (ov) {
1366                    /*
1367                     * Split two instructions before the end. One
1368                     * for the branch and one for the instruction
1369                     * in the delay slot.
1370                     */
1371                    split = tlb_handler + MIPS64_REFILL_INSNS - 2;
1372
1373                    /*
1374                     * If the branch would fall in a delay slot,
1375                     * we must back up an additional instruction
1376                     * so that it is no longer in a delay slot.
1377                     */
1378                    if (uasm_insn_has_bdelay(relocs, split - 1))
1379                        split--;
1380                }
1381                /* Copy first part of the handler. */
1382                uasm_copy_handler(relocs, labels, tlb_handler, split, f);
1383                f += split - tlb_handler;
1384
1385                if (ov) {
1386                    /* Insert branch. */
1387                    uasm_l_split(&l, final_handler);
1388                    uasm_il_b(&f, &r, label_split);
1389                    if (uasm_insn_has_bdelay(relocs, split))
1390                        uasm_i_nop(&f);
1391                    else {
1392                        uasm_copy_handler(relocs, labels,
1393                                  split, split + 1, f);
1394                        uasm_move_labels(labels, f, f + 1, -1);
1395                        f++;
1396                        split++;
1397                    }
1398                }
1399
1400                /* Copy the rest of the handler. */
1401                uasm_copy_handler(relocs, labels, split, p, final_handler);
1402                final_len = (f - (final_handler + MIPS64_REFILL_INSNS)) +
1403                        (p - split);
1404            }
1405        }
1406        break;
1407    }
1408
1409    uasm_resolve_relocs(relocs, labels);
1410    pr_debug("Wrote TLB refill handler (%u instructions).\n",
1411         final_len);
1412
1413    memcpy((void *)ebase, final_handler, 0x100);
1414
1415    dump_handler("r4000_tlb_refill", (u32 *)ebase, 64);
1416}
1417
1418extern u32 handle_tlbl[], handle_tlbl_end[];
1419extern u32 handle_tlbs[], handle_tlbs_end[];
1420extern u32 handle_tlbm[], handle_tlbm_end[];
1421extern u32 tlbmiss_handler_setup_pgd[], tlbmiss_handler_setup_pgd_end[];
1422
1423static void build_setup_pgd(void)
1424{
1425    const int a0 = 4;
1426    const int __maybe_unused a1 = 5;
1427    const int __maybe_unused a2 = 6;
1428    u32 *p = tlbmiss_handler_setup_pgd;
1429    const int tlbmiss_handler_setup_pgd_size =
1430        tlbmiss_handler_setup_pgd_end - tlbmiss_handler_setup_pgd;
1431#ifndef CONFIG_MIPS_PGD_C0_CONTEXT
1432    long pgdc = (long)pgd_current;
1433#endif
1434
1435    memset(tlbmiss_handler_setup_pgd, 0, tlbmiss_handler_setup_pgd_size *
1436                    sizeof(tlbmiss_handler_setup_pgd[0]));
1437    memset(labels, 0, sizeof(labels));
1438    memset(relocs, 0, sizeof(relocs));
1439    pgd_reg = allocate_kscratch();
1440#ifdef CONFIG_MIPS_PGD_C0_CONTEXT
1441    if (pgd_reg == -1) {
1442        struct uasm_label *l = labels;
1443        struct uasm_reloc *r = relocs;
1444
1445        /* PGD << 11 in c0_Context */
1446        /*
1447         * If it is a ckseg0 address, convert to a physical
1448         * address. Shifting right by 29 and adding 4 will
1449         * result in zero for these addresses.
1450         *
1451         */
1452        UASM_i_SRA(&p, a1, a0, 29);
1453        UASM_i_ADDIU(&p, a1, a1, 4);
1454        uasm_il_bnez(&p, &r, a1, label_tlbl_goaround1);
1455        uasm_i_nop(&p);
1456        uasm_i_dinsm(&p, a0, 0, 29, 64 - 29);
1457        uasm_l_tlbl_goaround1(&l, p);
1458        UASM_i_SLL(&p, a0, a0, 11);
1459        uasm_i_jr(&p, 31);
1460        UASM_i_MTC0(&p, a0, C0_CONTEXT);
1461    } else {
1462        /* PGD in c0_KScratch */
1463        uasm_i_jr(&p, 31);
1464        UASM_i_MTC0(&p, a0, c0_kscratch(), pgd_reg);
1465    }
1466#else
1467#ifdef CONFIG_SMP
1468    /* Save PGD to pgd_current[smp_processor_id()] */
1469    UASM_i_CPUID_MFC0(&p, a1, SMP_CPUID_REG);
1470    UASM_i_SRL_SAFE(&p, a1, a1, SMP_CPUID_PTRSHIFT);
1471    UASM_i_LA_mostly(&p, a2, pgdc);
1472    UASM_i_ADDU(&p, a2, a2, a1);
1473    UASM_i_SW(&p, a0, uasm_rel_lo(pgdc), a2);
1474#else
1475    UASM_i_LA_mostly(&p, a2, pgdc);
1476    UASM_i_SW(&p, a0, uasm_rel_lo(pgdc), a2);
1477#endif /* SMP */
1478    uasm_i_jr(&p, 31);
1479
1480    /* if pgd_reg is allocated, save PGD also to scratch register */
1481    if (pgd_reg != -1)
1482        UASM_i_MTC0(&p, a0, c0_kscratch(), pgd_reg);
1483    else
1484        uasm_i_nop(&p);
1485#endif
1486    if (p >= tlbmiss_handler_setup_pgd_end)
1487        panic("tlbmiss_handler_setup_pgd space exceeded");
1488
1489    uasm_resolve_relocs(relocs, labels);
1490    pr_debug("Wrote tlbmiss_handler_setup_pgd (%u instructions).\n",
1491         (unsigned int)(p - tlbmiss_handler_setup_pgd));
1492
1493    dump_handler("tlbmiss_handler", tlbmiss_handler_setup_pgd,
1494                    tlbmiss_handler_setup_pgd_size);
1495}
1496
1497static void
1498iPTE_LW(u32 **p, unsigned int pte, unsigned int ptr)
1499{
1500#ifdef CONFIG_SMP
1501# ifdef CONFIG_64BIT_PHYS_ADDR
1502    if (cpu_has_64bits)
1503        uasm_i_lld(p, pte, 0, ptr);
1504    else
1505# endif
1506        UASM_i_LL(p, pte, 0, ptr);
1507#else
1508# ifdef CONFIG_64BIT_PHYS_ADDR
1509    if (cpu_has_64bits)
1510        uasm_i_ld(p, pte, 0, ptr);
1511    else
1512# endif
1513        UASM_i_LW(p, pte, 0, ptr);
1514#endif
1515}
1516
1517static void
1518iPTE_SW(u32 **p, struct uasm_reloc **r, unsigned int pte, unsigned int ptr,
1519    unsigned int mode)
1520{
1521#ifdef CONFIG_64BIT_PHYS_ADDR
1522    unsigned int hwmode = mode & (_PAGE_VALID | _PAGE_DIRTY);
1523#endif
1524
1525    uasm_i_ori(p, pte, pte, mode);
1526#ifdef CONFIG_SMP
1527# ifdef CONFIG_64BIT_PHYS_ADDR
1528    if (cpu_has_64bits)
1529        uasm_i_scd(p, pte, 0, ptr);
1530    else
1531# endif
1532        UASM_i_SC(p, pte, 0, ptr);
1533
1534    if (r10000_llsc_war())
1535        uasm_il_beqzl(p, r, pte, label_smp_pgtable_change);
1536    else
1537        uasm_il_beqz(p, r, pte, label_smp_pgtable_change);
1538
1539# ifdef CONFIG_64BIT_PHYS_ADDR
1540    if (!cpu_has_64bits) {
1541        /* no uasm_i_nop needed */
1542        uasm_i_ll(p, pte, sizeof(pte_t) / 2, ptr);
1543        uasm_i_ori(p, pte, pte, hwmode);
1544        uasm_i_sc(p, pte, sizeof(pte_t) / 2, ptr);
1545        uasm_il_beqz(p, r, pte, label_smp_pgtable_change);
1546        /* no uasm_i_nop needed */
1547        uasm_i_lw(p, pte, 0, ptr);
1548    } else
1549        uasm_i_nop(p);
1550# else
1551    uasm_i_nop(p);
1552# endif
1553#else
1554# ifdef CONFIG_64BIT_PHYS_ADDR
1555    if (cpu_has_64bits)
1556        uasm_i_sd(p, pte, 0, ptr);
1557    else
1558# endif
1559        UASM_i_SW(p, pte, 0, ptr);
1560
1561# ifdef CONFIG_64BIT_PHYS_ADDR
1562    if (!cpu_has_64bits) {
1563        uasm_i_lw(p, pte, sizeof(pte_t) / 2, ptr);
1564        uasm_i_ori(p, pte, pte, hwmode);
1565        uasm_i_sw(p, pte, sizeof(pte_t) / 2, ptr);
1566        uasm_i_lw(p, pte, 0, ptr);
1567    }
1568# endif
1569#endif
1570}
1571
1572/*
1573 * Check if PTE is present, if not then jump to LABEL. PTR points to
1574 * the page table where this PTE is located, PTE will be re-loaded
1575 * with it's original value.
1576 */
1577static void
1578build_pte_present(u32 **p, struct uasm_reloc **r,
1579          int pte, int ptr, int scratch, enum label_id lid)
1580{
1581    int t = scratch >= 0 ? scratch : pte;
1582
1583    if (cpu_has_rixi) {
1584        if (use_bbit_insns()) {
1585            uasm_il_bbit0(p, r, pte, ilog2(_PAGE_PRESENT), lid);
1586            uasm_i_nop(p);
1587        } else {
1588            uasm_i_andi(p, t, pte, _PAGE_PRESENT);
1589            uasm_il_beqz(p, r, t, lid);
1590            if (pte == t)
1591                /* You lose the SMP race :-(*/
1592                iPTE_LW(p, pte, ptr);
1593        }
1594    } else {
1595        uasm_i_andi(p, t, pte, _PAGE_PRESENT | _PAGE_READ);
1596        uasm_i_xori(p, t, t, _PAGE_PRESENT | _PAGE_READ);
1597        uasm_il_bnez(p, r, t, lid);
1598        if (pte == t)
1599            /* You lose the SMP race :-(*/
1600            iPTE_LW(p, pte, ptr);
1601    }
1602}
1603
1604/* Make PTE valid, store result in PTR. */
1605static void
1606build_make_valid(u32 **p, struct uasm_reloc **r, unsigned int pte,
1607         unsigned int ptr)
1608{
1609    unsigned int mode = _PAGE_VALID | _PAGE_ACCESSED;
1610
1611    iPTE_SW(p, r, pte, ptr, mode);
1612}
1613
1614/*
1615 * Check if PTE can be written to, if not branch to LABEL. Regardless
1616 * restore PTE with value from PTR when done.
1617 */
1618static void
1619build_pte_writable(u32 **p, struct uasm_reloc **r,
1620           unsigned int pte, unsigned int ptr, int scratch,
1621           enum label_id lid)
1622{
1623    int t = scratch >= 0 ? scratch : pte;
1624
1625    uasm_i_andi(p, t, pte, _PAGE_PRESENT | _PAGE_WRITE);
1626    uasm_i_xori(p, t, t, _PAGE_PRESENT | _PAGE_WRITE);
1627    uasm_il_bnez(p, r, t, lid);
1628    if (pte == t)
1629        /* You lose the SMP race :-(*/
1630        iPTE_LW(p, pte, ptr);
1631    else
1632        uasm_i_nop(p);
1633}
1634
1635/* Make PTE writable, update software status bits as well, then store
1636 * at PTR.
1637 */
1638static void
1639build_make_write(u32 **p, struct uasm_reloc **r, unsigned int pte,
1640         unsigned int ptr)
1641{
1642    unsigned int mode = (_PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID
1643                 | _PAGE_DIRTY);
1644
1645    iPTE_SW(p, r, pte, ptr, mode);
1646}
1647
1648/*
1649 * Check if PTE can be modified, if not branch to LABEL. Regardless
1650 * restore PTE with value from PTR when done.
1651 */
1652static void
1653build_pte_modifiable(u32 **p, struct uasm_reloc **r,
1654             unsigned int pte, unsigned int ptr, int scratch,
1655             enum label_id lid)
1656{
1657    if (use_bbit_insns()) {
1658        uasm_il_bbit0(p, r, pte, ilog2(_PAGE_WRITE), lid);
1659        uasm_i_nop(p);
1660    } else {
1661        int t = scratch >= 0 ? scratch : pte;
1662        uasm_i_andi(p, t, pte, _PAGE_WRITE);
1663        uasm_il_beqz(p, r, t, lid);
1664        if (pte == t)
1665            /* You lose the SMP race :-(*/
1666            iPTE_LW(p, pte, ptr);
1667    }
1668}
1669
1670#ifndef CONFIG_MIPS_PGD_C0_CONTEXT
1671
1672
1673/*
1674 * R3000 style TLB load/store/modify handlers.
1675 */
1676
1677/*
1678 * This places the pte into ENTRYLO0 and writes it with tlbwi.
1679 * Then it returns.
1680 */
1681static void
1682build_r3000_pte_reload_tlbwi(u32 **p, unsigned int pte, unsigned int tmp)
1683{
1684    uasm_i_mtc0(p, pte, C0_ENTRYLO0); /* cp0 delay */
1685    uasm_i_mfc0(p, tmp, C0_EPC); /* cp0 delay */
1686    uasm_i_tlbwi(p);
1687    uasm_i_jr(p, tmp);
1688    uasm_i_rfe(p); /* branch delay */
1689}
1690
1691/*
1692 * This places the pte into ENTRYLO0 and writes it with tlbwi
1693 * or tlbwr as appropriate. This is because the index register
1694 * may have the probe fail bit set as a result of a trap on a
1695 * kseg2 access, i.e. without refill. Then it returns.
1696 */
1697static void
1698build_r3000_tlb_reload_write(u32 **p, struct uasm_label **l,
1699                 struct uasm_reloc **r, unsigned int pte,
1700                 unsigned int tmp)
1701{
1702    uasm_i_mfc0(p, tmp, C0_INDEX);
1703    uasm_i_mtc0(p, pte, C0_ENTRYLO0); /* cp0 delay */
1704    uasm_il_bltz(p, r, tmp, label_r3000_write_probe_fail); /* cp0 delay */
1705    uasm_i_mfc0(p, tmp, C0_EPC); /* branch delay */
1706    uasm_i_tlbwi(p); /* cp0 delay */
1707    uasm_i_jr(p, tmp);
1708    uasm_i_rfe(p); /* branch delay */
1709    uasm_l_r3000_write_probe_fail(l, *p);
1710    uasm_i_tlbwr(p); /* cp0 delay */
1711    uasm_i_jr(p, tmp);
1712    uasm_i_rfe(p); /* branch delay */
1713}
1714
1715static void
1716build_r3000_tlbchange_handler_head(u32 **p, unsigned int pte,
1717                   unsigned int ptr)
1718{
1719    long pgdc = (long)pgd_current;
1720
1721    uasm_i_mfc0(p, pte, C0_BADVADDR);
1722    uasm_i_lui(p, ptr, uasm_rel_hi(pgdc)); /* cp0 delay */
1723    uasm_i_lw(p, ptr, uasm_rel_lo(pgdc), ptr);
1724    uasm_i_srl(p, pte, pte, 22); /* load delay */
1725    uasm_i_sll(p, pte, pte, 2);
1726    uasm_i_addu(p, ptr, ptr, pte);
1727    uasm_i_mfc0(p, pte, C0_CONTEXT);
1728    uasm_i_lw(p, ptr, 0, ptr); /* cp0 delay */
1729    uasm_i_andi(p, pte, pte, 0xffc); /* load delay */
1730    uasm_i_addu(p, ptr, ptr, pte);
1731    uasm_i_lw(p, pte, 0, ptr);
1732    uasm_i_tlbp(p); /* load delay */
1733}
1734
1735static void build_r3000_tlb_load_handler(void)
1736{
1737    u32 *p = handle_tlbl;
1738    const int handle_tlbl_size = handle_tlbl_end - handle_tlbl;
1739    struct uasm_label *l = labels;
1740    struct uasm_reloc *r = relocs;
1741
1742    memset(handle_tlbl, 0, handle_tlbl_size * sizeof(handle_tlbl[0]));
1743    memset(labels, 0, sizeof(labels));
1744    memset(relocs, 0, sizeof(relocs));
1745
1746    build_r3000_tlbchange_handler_head(&p, K0, K1);
1747    build_pte_present(&p, &r, K0, K1, -1, label_nopage_tlbl);
1748    uasm_i_nop(&p); /* load delay */
1749    build_make_valid(&p, &r, K0, K1);
1750    build_r3000_tlb_reload_write(&p, &l, &r, K0, K1);
1751
1752    uasm_l_nopage_tlbl(&l, p);
1753    uasm_i_j(&p, (unsigned long)tlb_do_page_fault_0 & 0x0fffffff);
1754    uasm_i_nop(&p);
1755
1756    if (p >= handle_tlbl_end)
1757        panic("TLB load handler fastpath space exceeded");
1758
1759    uasm_resolve_relocs(relocs, labels);
1760    pr_debug("Wrote TLB load handler fastpath (%u instructions).\n",
1761         (unsigned int)(p - handle_tlbl));
1762
1763    dump_handler("r3000_tlb_load", handle_tlbl, handle_tlbl_size);
1764}
1765
1766static void build_r3000_tlb_store_handler(void)
1767{
1768    u32 *p = handle_tlbs;
1769    const int handle_tlbs_size = handle_tlbs_end - handle_tlbs;
1770    struct uasm_label *l = labels;
1771    struct uasm_reloc *r = relocs;
1772
1773    memset(handle_tlbs, 0, handle_tlbs_size * sizeof(handle_tlbs[0]));
1774    memset(labels, 0, sizeof(labels));
1775    memset(relocs, 0, sizeof(relocs));
1776
1777    build_r3000_tlbchange_handler_head(&p, K0, K1);
1778    build_pte_writable(&p, &r, K0, K1, -1, label_nopage_tlbs);
1779    uasm_i_nop(&p); /* load delay */
1780    build_make_write(&p, &r, K0, K1);
1781    build_r3000_tlb_reload_write(&p, &l, &r, K0, K1);
1782
1783    uasm_l_nopage_tlbs(&l, p);
1784    uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff);
1785    uasm_i_nop(&p);
1786
1787    if (p >= handle_tlbs_end)
1788        panic("TLB store handler fastpath space exceeded");
1789
1790    uasm_resolve_relocs(relocs, labels);
1791    pr_debug("Wrote TLB store handler fastpath (%u instructions).\n",
1792         (unsigned int)(p - handle_tlbs));
1793
1794    dump_handler("r3000_tlb_store", handle_tlbs, handle_tlbs_size);
1795}
1796
1797static void build_r3000_tlb_modify_handler(void)
1798{
1799    u32 *p = handle_tlbm;
1800    const int handle_tlbm_size = handle_tlbm_end - handle_tlbm;
1801    struct uasm_label *l = labels;
1802    struct uasm_reloc *r = relocs;
1803
1804    memset(handle_tlbm, 0, handle_tlbm_size * sizeof(handle_tlbm[0]));
1805    memset(labels, 0, sizeof(labels));
1806    memset(relocs, 0, sizeof(relocs));
1807
1808    build_r3000_tlbchange_handler_head(&p, K0, K1);
1809    build_pte_modifiable(&p, &r, K0, K1, -1, label_nopage_tlbm);
1810    uasm_i_nop(&p); /* load delay */
1811    build_make_write(&p, &r, K0, K1);
1812    build_r3000_pte_reload_tlbwi(&p, K0, K1);
1813
1814    uasm_l_nopage_tlbm(&l, p);
1815    uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff);
1816    uasm_i_nop(&p);
1817
1818    if (p >= handle_tlbm_end)
1819        panic("TLB modify handler fastpath space exceeded");
1820
1821    uasm_resolve_relocs(relocs, labels);
1822    pr_debug("Wrote TLB modify handler fastpath (%u instructions).\n",
1823         (unsigned int)(p - handle_tlbm));
1824
1825    dump_handler("r3000_tlb_modify", handle_tlbm, handle_tlbm_size);
1826}
1827#endif /* CONFIG_MIPS_PGD_C0_CONTEXT */
1828
1829/*
1830 * R4000 style TLB load/store/modify handlers.
1831 */
1832static struct work_registers
1833build_r4000_tlbchange_handler_head(u32 **p, struct uasm_label **l,
1834                   struct uasm_reloc **r)
1835{
1836    struct work_registers wr = build_get_work_registers(p);
1837
1838#ifdef CONFIG_64BIT
1839    build_get_pmde64(p, l, r, wr.r1, wr.r2); /* get pmd in ptr */
1840#else
1841    build_get_pgde32(p, wr.r1, wr.r2); /* get pgd in ptr */
1842#endif
1843
1844#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
1845    /*
1846     * For huge tlb entries, pmd doesn't contain an address but
1847     * instead contains the tlb pte. Check the PAGE_HUGE bit and
1848     * see if we need to jump to huge tlb processing.
1849     */
1850    build_is_huge_pte(p, r, wr.r1, wr.r2, label_tlb_huge_update);
1851#endif
1852
1853    UASM_i_MFC0(p, wr.r1, C0_BADVADDR);
1854    UASM_i_LW(p, wr.r2, 0, wr.r2);
1855    UASM_i_SRL(p, wr.r1, wr.r1, PAGE_SHIFT + PTE_ORDER - PTE_T_LOG2);
1856    uasm_i_andi(p, wr.r1, wr.r1, (PTRS_PER_PTE - 1) << PTE_T_LOG2);
1857    UASM_i_ADDU(p, wr.r2, wr.r2, wr.r1);
1858
1859#ifdef CONFIG_SMP
1860    uasm_l_smp_pgtable_change(l, *p);
1861#endif
1862    iPTE_LW(p, wr.r1, wr.r2); /* get even pte */
1863    if (!m4kc_tlbp_war())
1864        build_tlb_probe_entry(p);
1865    return wr;
1866}
1867
1868static void
1869build_r4000_tlbchange_handler_tail(u32 **p, struct uasm_label **l,
1870                   struct uasm_reloc **r, unsigned int tmp,
1871                   unsigned int ptr)
1872{
1873    uasm_i_ori(p, ptr, ptr, sizeof(pte_t));
1874    uasm_i_xori(p, ptr, ptr, sizeof(pte_t));
1875    build_update_entries(p, tmp, ptr);
1876    build_tlb_write_entry(p, l, r, tlb_indexed);
1877    uasm_l_leave(l, *p);
1878    build_restore_work_registers(p);
1879    uasm_i_eret(p); /* return from trap */
1880
1881#ifdef CONFIG_64BIT
1882    build_get_pgd_vmalloc64(p, l, r, tmp, ptr, not_refill);
1883#endif
1884}
1885
1886static void build_r4000_tlb_load_handler(void)
1887{
1888    u32 *p = handle_tlbl;
1889    const int handle_tlbl_size = handle_tlbl_end - handle_tlbl;
1890    struct uasm_label *l = labels;
1891    struct uasm_reloc *r = relocs;
1892    struct work_registers wr;
1893
1894    memset(handle_tlbl, 0, handle_tlbl_size * sizeof(handle_tlbl[0]));
1895    memset(labels, 0, sizeof(labels));
1896    memset(relocs, 0, sizeof(relocs));
1897
1898    if (bcm1250_m3_war()) {
1899        unsigned int segbits = 44;
1900
1901        uasm_i_dmfc0(&p, K0, C0_BADVADDR);
1902        uasm_i_dmfc0(&p, K1, C0_ENTRYHI);
1903        uasm_i_xor(&p, K0, K0, K1);
1904        uasm_i_dsrl_safe(&p, K1, K0, 62);
1905        uasm_i_dsrl_safe(&p, K0, K0, 12 + 1);
1906        uasm_i_dsll_safe(&p, K0, K0, 64 + 12 + 1 - segbits);
1907        uasm_i_or(&p, K0, K0, K1);
1908        uasm_il_bnez(&p, &r, K0, label_leave);
1909        /* No need for uasm_i_nop */
1910    }
1911
1912    wr = build_r4000_tlbchange_handler_head(&p, &l, &r);
1913    build_pte_present(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbl);
1914    if (m4kc_tlbp_war())
1915        build_tlb_probe_entry(&p);
1916
1917    if (cpu_has_rixi) {
1918        /*
1919         * If the page is not _PAGE_VALID, RI or XI could not
1920         * have triggered it. Skip the expensive test..
1921         */
1922        if (use_bbit_insns()) {
1923            uasm_il_bbit0(&p, &r, wr.r1, ilog2(_PAGE_VALID),
1924                      label_tlbl_goaround1);
1925        } else {
1926            uasm_i_andi(&p, wr.r3, wr.r1, _PAGE_VALID);
1927            uasm_il_beqz(&p, &r, wr.r3, label_tlbl_goaround1);
1928        }
1929        uasm_i_nop(&p);
1930
1931        uasm_i_tlbr(&p);
1932
1933        switch (current_cpu_type()) {
1934        default:
1935            if (cpu_has_mips_r2) {
1936                uasm_i_ehb(&p);
1937
1938        case CPU_CAVIUM_OCTEON:
1939        case CPU_CAVIUM_OCTEON_PLUS:
1940        case CPU_CAVIUM_OCTEON2:
1941                break;
1942            }
1943        }
1944
1945        /* Examine entrylo 0 or 1 based on ptr. */
1946        if (use_bbit_insns()) {
1947            uasm_i_bbit0(&p, wr.r2, ilog2(sizeof(pte_t)), 8);
1948        } else {
1949            uasm_i_andi(&p, wr.r3, wr.r2, sizeof(pte_t));
1950            uasm_i_beqz(&p, wr.r3, 8);
1951        }
1952        /* load it in the delay slot*/
1953        UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO0);
1954        /* load it if ptr is odd */
1955        UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO1);
1956        /*
1957         * If the entryLo (now in wr.r3) is valid (bit 1), RI or
1958         * XI must have triggered it.
1959         */
1960        if (use_bbit_insns()) {
1961            uasm_il_bbit1(&p, &r, wr.r3, 1, label_nopage_tlbl);
1962            uasm_i_nop(&p);
1963            uasm_l_tlbl_goaround1(&l, p);
1964        } else {
1965            uasm_i_andi(&p, wr.r3, wr.r3, 2);
1966            uasm_il_bnez(&p, &r, wr.r3, label_nopage_tlbl);
1967            uasm_i_nop(&p);
1968        }
1969        uasm_l_tlbl_goaround1(&l, p);
1970    }
1971    build_make_valid(&p, &r, wr.r1, wr.r2);
1972    build_r4000_tlbchange_handler_tail(&p, &l, &r, wr.r1, wr.r2);
1973
1974#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
1975    /*
1976     * This is the entry point when build_r4000_tlbchange_handler_head
1977     * spots a huge page.
1978     */
1979    uasm_l_tlb_huge_update(&l, p);
1980    iPTE_LW(&p, wr.r1, wr.r2);
1981    build_pte_present(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbl);
1982    build_tlb_probe_entry(&p);
1983
1984    if (cpu_has_rixi) {
1985        /*
1986         * If the page is not _PAGE_VALID, RI or XI could not
1987         * have triggered it. Skip the expensive test..
1988         */
1989        if (use_bbit_insns()) {
1990            uasm_il_bbit0(&p, &r, wr.r1, ilog2(_PAGE_VALID),
1991                      label_tlbl_goaround2);
1992        } else {
1993            uasm_i_andi(&p, wr.r3, wr.r1, _PAGE_VALID);
1994            uasm_il_beqz(&p, &r, wr.r3, label_tlbl_goaround2);
1995        }
1996        uasm_i_nop(&p);
1997
1998        uasm_i_tlbr(&p);
1999
2000        switch (current_cpu_type()) {
2001        default:
2002            if (cpu_has_mips_r2) {
2003                uasm_i_ehb(&p);
2004
2005        case CPU_CAVIUM_OCTEON:
2006        case CPU_CAVIUM_OCTEON_PLUS:
2007        case CPU_CAVIUM_OCTEON2:
2008                break;
2009            }
2010        }
2011
2012        /* Examine entrylo 0 or 1 based on ptr. */
2013        if (use_bbit_insns()) {
2014            uasm_i_bbit0(&p, wr.r2, ilog2(sizeof(pte_t)), 8);
2015        } else {
2016            uasm_i_andi(&p, wr.r3, wr.r2, sizeof(pte_t));
2017            uasm_i_beqz(&p, wr.r3, 8);
2018        }
2019        /* load it in the delay slot*/
2020        UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO0);
2021        /* load it if ptr is odd */
2022        UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO1);
2023        /*
2024         * If the entryLo (now in wr.r3) is valid (bit 1), RI or
2025         * XI must have triggered it.
2026         */
2027        if (use_bbit_insns()) {
2028            uasm_il_bbit0(&p, &r, wr.r3, 1, label_tlbl_goaround2);
2029        } else {
2030            uasm_i_andi(&p, wr.r3, wr.r3, 2);
2031            uasm_il_beqz(&p, &r, wr.r3, label_tlbl_goaround2);
2032        }
2033        if (PM_DEFAULT_MASK == 0)
2034            uasm_i_nop(&p);
2035        /*
2036         * We clobbered C0_PAGEMASK, restore it. On the other branch
2037         * it is restored in build_huge_tlb_write_entry.
2038         */
2039        build_restore_pagemask(&p, &r, wr.r3, label_nopage_tlbl, 0);
2040
2041        uasm_l_tlbl_goaround2(&l, p);
2042    }
2043    uasm_i_ori(&p, wr.r1, wr.r1, (_PAGE_ACCESSED | _PAGE_VALID));
2044    build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2);
2045#endif
2046
2047    uasm_l_nopage_tlbl(&l, p);
2048    build_restore_work_registers(&p);
2049#ifdef CONFIG_CPU_MICROMIPS
2050    if ((unsigned long)tlb_do_page_fault_0 & 1) {
2051        uasm_i_lui(&p, K0, uasm_rel_hi((long)tlb_do_page_fault_0));
2052        uasm_i_addiu(&p, K0, K0, uasm_rel_lo((long)tlb_do_page_fault_0));
2053        uasm_i_jr(&p, K0);
2054    } else
2055#endif
2056    uasm_i_j(&p, (unsigned long)tlb_do_page_fault_0 & 0x0fffffff);
2057    uasm_i_nop(&p);
2058
2059    if (p >= handle_tlbl_end)
2060        panic("TLB load handler fastpath space exceeded");
2061
2062    uasm_resolve_relocs(relocs, labels);
2063    pr_debug("Wrote TLB load handler fastpath (%u instructions).\n",
2064         (unsigned int)(p - handle_tlbl));
2065
2066    dump_handler("r4000_tlb_load", handle_tlbl, handle_tlbl_size);
2067}
2068
2069static void build_r4000_tlb_store_handler(void)
2070{
2071    u32 *p = handle_tlbs;
2072    const int handle_tlbs_size = handle_tlbs_end - handle_tlbs;
2073    struct uasm_label *l = labels;
2074    struct uasm_reloc *r = relocs;
2075    struct work_registers wr;
2076
2077    memset(handle_tlbs, 0, handle_tlbs_size * sizeof(handle_tlbs[0]));
2078    memset(labels, 0, sizeof(labels));
2079    memset(relocs, 0, sizeof(relocs));
2080
2081    wr = build_r4000_tlbchange_handler_head(&p, &l, &r);
2082    build_pte_writable(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbs);
2083    if (m4kc_tlbp_war())
2084        build_tlb_probe_entry(&p);
2085    build_make_write(&p, &r, wr.r1, wr.r2);
2086    build_r4000_tlbchange_handler_tail(&p, &l, &r, wr.r1, wr.r2);
2087
2088#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
2089    /*
2090     * This is the entry point when
2091     * build_r4000_tlbchange_handler_head spots a huge page.
2092     */
2093    uasm_l_tlb_huge_update(&l, p);
2094    iPTE_LW(&p, wr.r1, wr.r2);
2095    build_pte_writable(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbs);
2096    build_tlb_probe_entry(&p);
2097    uasm_i_ori(&p, wr.r1, wr.r1,
2098           _PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID | _PAGE_DIRTY);
2099    build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2);
2100#endif
2101
2102    uasm_l_nopage_tlbs(&l, p);
2103    build_restore_work_registers(&p);
2104#ifdef CONFIG_CPU_MICROMIPS
2105    if ((unsigned long)tlb_do_page_fault_1 & 1) {
2106        uasm_i_lui(&p, K0, uasm_rel_hi((long)tlb_do_page_fault_1));
2107        uasm_i_addiu(&p, K0, K0, uasm_rel_lo((long)tlb_do_page_fault_1));
2108        uasm_i_jr(&p, K0);
2109    } else
2110#endif
2111    uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff);
2112    uasm_i_nop(&p);
2113
2114    if (p >= handle_tlbs_end)
2115        panic("TLB store handler fastpath space exceeded");
2116
2117    uasm_resolve_relocs(relocs, labels);
2118    pr_debug("Wrote TLB store handler fastpath (%u instructions).\n",
2119         (unsigned int)(p - handle_tlbs));
2120
2121    dump_handler("r4000_tlb_store", handle_tlbs, handle_tlbs_size);
2122}
2123
2124static void build_r4000_tlb_modify_handler(void)
2125{
2126    u32 *p = handle_tlbm;
2127    const int handle_tlbm_size = handle_tlbm_end - handle_tlbm;
2128    struct uasm_label *l = labels;
2129    struct uasm_reloc *r = relocs;
2130    struct work_registers wr;
2131
2132    memset(handle_tlbm, 0, handle_tlbm_size * sizeof(handle_tlbm[0]));
2133    memset(labels, 0, sizeof(labels));
2134    memset(relocs, 0, sizeof(relocs));
2135
2136    wr = build_r4000_tlbchange_handler_head(&p, &l, &r);
2137    build_pte_modifiable(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbm);
2138    if (m4kc_tlbp_war())
2139        build_tlb_probe_entry(&p);
2140    /* Present and writable bits set, set accessed and dirty bits. */
2141    build_make_write(&p, &r, wr.r1, wr.r2);
2142    build_r4000_tlbchange_handler_tail(&p, &l, &r, wr.r1, wr.r2);
2143
2144#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
2145    /*
2146     * This is the entry point when
2147     * build_r4000_tlbchange_handler_head spots a huge page.
2148     */
2149    uasm_l_tlb_huge_update(&l, p);
2150    iPTE_LW(&p, wr.r1, wr.r2);
2151    build_pte_modifiable(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbm);
2152    build_tlb_probe_entry(&p);
2153    uasm_i_ori(&p, wr.r1, wr.r1,
2154           _PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID | _PAGE_DIRTY);
2155    build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2);
2156#endif
2157
2158    uasm_l_nopage_tlbm(&l, p);
2159    build_restore_work_registers(&p);
2160#ifdef CONFIG_CPU_MICROMIPS
2161    if ((unsigned long)tlb_do_page_fault_1 & 1) {
2162        uasm_i_lui(&p, K0, uasm_rel_hi((long)tlb_do_page_fault_1));
2163        uasm_i_addiu(&p, K0, K0, uasm_rel_lo((long)tlb_do_page_fault_1));
2164        uasm_i_jr(&p, K0);
2165    } else
2166#endif
2167    uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff);
2168    uasm_i_nop(&p);
2169
2170    if (p >= handle_tlbm_end)
2171        panic("TLB modify handler fastpath space exceeded");
2172
2173    uasm_resolve_relocs(relocs, labels);
2174    pr_debug("Wrote TLB modify handler fastpath (%u instructions).\n",
2175         (unsigned int)(p - handle_tlbm));
2176
2177    dump_handler("r4000_tlb_modify", handle_tlbm, handle_tlbm_size);
2178}
2179
2180static void flush_tlb_handlers(void)
2181{
2182    local_flush_icache_range((unsigned long)handle_tlbl,
2183               (unsigned long)handle_tlbl_end);
2184    local_flush_icache_range((unsigned long)handle_tlbs,
2185               (unsigned long)handle_tlbs_end);
2186    local_flush_icache_range((unsigned long)handle_tlbm,
2187               (unsigned long)handle_tlbm_end);
2188    local_flush_icache_range((unsigned long)tlbmiss_handler_setup_pgd,
2189               (unsigned long)tlbmiss_handler_setup_pgd_end);
2190}
2191
2192void build_tlb_refill_handler(void)
2193{
2194    /*
2195     * The refill handler is generated per-CPU, multi-node systems
2196     * may have local storage for it. The other handlers are only
2197     * needed once.
2198     */
2199    static int run_once = 0;
2200
2201    output_pgtable_bits_defines();
2202
2203#ifdef CONFIG_64BIT
2204    check_for_high_segbits = current_cpu_data.vmbits > (PGDIR_SHIFT + PGD_ORDER + PAGE_SHIFT - 3);
2205#endif
2206
2207    switch (current_cpu_type()) {
2208    case CPU_R2000:
2209    case CPU_R3000:
2210    case CPU_R3000A:
2211    case CPU_R3081E:
2212    case CPU_TX3912:
2213    case CPU_TX3922:
2214    case CPU_TX3927:
2215#ifndef CONFIG_MIPS_PGD_C0_CONTEXT
2216        if (cpu_has_local_ebase)
2217            build_r3000_tlb_refill_handler();
2218        if (!run_once) {
2219            if (!cpu_has_local_ebase)
2220                build_r3000_tlb_refill_handler();
2221            build_setup_pgd();
2222            build_r3000_tlb_load_handler();
2223            build_r3000_tlb_store_handler();
2224            build_r3000_tlb_modify_handler();
2225            flush_tlb_handlers();
2226            run_once++;
2227        }
2228#else
2229        panic("No R3000 TLB refill handler");
2230#endif
2231        break;
2232
2233    case CPU_R6000:
2234    case CPU_R6000A:
2235        panic("No R6000 TLB refill handler yet");
2236        break;
2237
2238    case CPU_R8000:
2239        panic("No R8000 TLB refill handler yet");
2240        break;
2241
2242    default:
2243        if (!run_once) {
2244            scratch_reg = allocate_kscratch();
2245            build_setup_pgd();
2246            build_r4000_tlb_load_handler();
2247            build_r4000_tlb_store_handler();
2248            build_r4000_tlb_modify_handler();
2249            if (!cpu_has_local_ebase)
2250                build_r4000_tlb_refill_handler();
2251            flush_tlb_handlers();
2252            run_once++;
2253        }
2254        if (cpu_has_local_ebase)
2255            build_r4000_tlb_refill_handler();
2256    }
2257}
2258

Archive Download this file



interactive