Root/arch/s390/kernel/nmi.c

1/*
2 * Machine check handler
3 *
4 * Copyright IBM Corp. 2000,2009
5 * Author(s): Ingo Adlung <adlung@de.ibm.com>,
6 * Martin Schwidefsky <schwidefsky@de.ibm.com>,
7 * Cornelia Huck <cornelia.huck@de.ibm.com>,
8 * Heiko Carstens <heiko.carstens@de.ibm.com>,
9 */
10
11#include <linux/init.h>
12#include <linux/errno.h>
13#include <linux/hardirq.h>
14#include <linux/time.h>
15#include <linux/module.h>
16#include <asm/lowcore.h>
17#include <asm/smp.h>
18#include <asm/etr.h>
19#include <asm/cputime.h>
20#include <asm/nmi.h>
21#include <asm/crw.h>
22
23struct mcck_struct {
24    int kill_task;
25    int channel_report;
26    int warning;
27    unsigned long long mcck_code;
28};
29
30static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck);
31
32static NORET_TYPE void s390_handle_damage(char *msg)
33{
34    smp_send_stop();
35    disabled_wait((unsigned long) __builtin_return_address(0));
36    while (1);
37}
38
39/*
40 * Main machine check handler function. Will be called with interrupts enabled
41 * or disabled and machine checks enabled or disabled.
42 */
43void s390_handle_mcck(void)
44{
45    unsigned long flags;
46    struct mcck_struct mcck;
47
48    /*
49     * Disable machine checks and get the current state of accumulated
50     * machine checks. Afterwards delete the old state and enable machine
51     * checks again.
52     */
53    local_irq_save(flags);
54    local_mcck_disable();
55    mcck = __get_cpu_var(cpu_mcck);
56    memset(&__get_cpu_var(cpu_mcck), 0, sizeof(struct mcck_struct));
57    clear_thread_flag(TIF_MCCK_PENDING);
58    local_mcck_enable();
59    local_irq_restore(flags);
60
61    if (mcck.channel_report)
62        crw_handle_channel_report();
63    /*
64     * A warning may remain for a prolonged period on the bare iron.
65     * (actually until the machine is powered off, or the problem is gone)
66     * So we just stop listening for the WARNING MCH and avoid continuously
67     * being interrupted. One caveat is however, that we must do this per
68     * processor and cannot use the smp version of ctl_clear_bit().
69     * On VM we only get one interrupt per virtally presented machinecheck.
70     * Though one suffices, we may get one interrupt per (virtual) cpu.
71     */
72    if (mcck.warning) { /* WARNING pending ? */
73        static int mchchk_wng_posted = 0;
74
75        /* Use single cpu clear, as we cannot handle smp here. */
76        __ctl_clear_bit(14, 24); /* Disable WARNING MCH */
77        if (xchg(&mchchk_wng_posted, 1) == 0)
78            kill_cad_pid(SIGPWR, 1);
79    }
80    if (mcck.kill_task) {
81        local_irq_enable();
82        printk(KERN_EMERG "mcck: Terminating task because of machine "
83               "malfunction (code 0x%016llx).\n", mcck.mcck_code);
84        printk(KERN_EMERG "mcck: task: %s, pid: %d.\n",
85               current->comm, current->pid);
86        do_exit(SIGSEGV);
87    }
88}
89EXPORT_SYMBOL_GPL(s390_handle_mcck);
90
91/*
92 * returns 0 if all registers could be validated
93 * returns 1 otherwise
94 */
95static int notrace s390_revalidate_registers(struct mci *mci)
96{
97    int kill_task;
98    u64 zero;
99    void *fpt_save_area, *fpt_creg_save_area;
100
101    kill_task = 0;
102    zero = 0;
103
104    if (!mci->gr) {
105        /*
106         * General purpose registers couldn't be restored and have
107         * unknown contents. Process needs to be terminated.
108         */
109        kill_task = 1;
110    }
111    if (!mci->fp) {
112        /*
113         * Floating point registers can't be restored and
114         * therefore the process needs to be terminated.
115         */
116        kill_task = 1;
117    }
118#ifndef CONFIG_64BIT
119    asm volatile(
120        " ld 0,0(%0)\n"
121        " ld 2,8(%0)\n"
122        " ld 4,16(%0)\n"
123        " ld 6,24(%0)"
124        : : "a" (&S390_lowcore.floating_pt_save_area));
125#endif
126
127    if (MACHINE_HAS_IEEE) {
128#ifdef CONFIG_64BIT
129        fpt_save_area = &S390_lowcore.floating_pt_save_area;
130        fpt_creg_save_area = &S390_lowcore.fpt_creg_save_area;
131#else
132        fpt_save_area = (void *) S390_lowcore.extended_save_area_addr;
133        fpt_creg_save_area = fpt_save_area + 128;
134#endif
135        if (!mci->fc) {
136            /*
137             * Floating point control register can't be restored.
138             * Task will be terminated.
139             */
140            asm volatile("lfpc 0(%0)" : : "a" (&zero), "m" (zero));
141            kill_task = 1;
142
143        } else
144            asm volatile("lfpc 0(%0)" : : "a" (fpt_creg_save_area));
145
146        asm volatile(
147            " ld 0,0(%0)\n"
148            " ld 1,8(%0)\n"
149            " ld 2,16(%0)\n"
150            " ld 3,24(%0)\n"
151            " ld 4,32(%0)\n"
152            " ld 5,40(%0)\n"
153            " ld 6,48(%0)\n"
154            " ld 7,56(%0)\n"
155            " ld 8,64(%0)\n"
156            " ld 9,72(%0)\n"
157            " ld 10,80(%0)\n"
158            " ld 11,88(%0)\n"
159            " ld 12,96(%0)\n"
160            " ld 13,104(%0)\n"
161            " ld 14,112(%0)\n"
162            " ld 15,120(%0)\n"
163            : : "a" (fpt_save_area));
164    }
165    /* Revalidate access registers */
166    asm volatile(
167        " lam 0,15,0(%0)"
168        : : "a" (&S390_lowcore.access_regs_save_area));
169    if (!mci->ar) {
170        /*
171         * Access registers have unknown contents.
172         * Terminating task.
173         */
174        kill_task = 1;
175    }
176    /* Revalidate control registers */
177    if (!mci->cr) {
178        /*
179         * Control registers have unknown contents.
180         * Can't recover and therefore stopping machine.
181         */
182        s390_handle_damage("invalid control registers.");
183    } else {
184#ifdef CONFIG_64BIT
185        asm volatile(
186            " lctlg 0,15,0(%0)"
187            : : "a" (&S390_lowcore.cregs_save_area));
188#else
189        asm volatile(
190            " lctl 0,15,0(%0)"
191            : : "a" (&S390_lowcore.cregs_save_area));
192#endif
193    }
194    /*
195     * We don't even try to revalidate the TOD register, since we simply
196     * can't write something sensible into that register.
197     */
198#ifdef CONFIG_64BIT
199    /*
200     * See if we can revalidate the TOD programmable register with its
201     * old contents (should be zero) otherwise set it to zero.
202     */
203    if (!mci->pr)
204        asm volatile(
205            " sr 0,0\n"
206            " sckpf"
207            : : : "0", "cc");
208    else
209        asm volatile(
210            " l 0,0(%0)\n"
211            " sckpf"
212            : : "a" (&S390_lowcore.tod_progreg_save_area)
213            : "0", "cc");
214#endif
215    /* Revalidate clock comparator register */
216    if (S390_lowcore.clock_comparator == -1)
217        set_clock_comparator(S390_lowcore.mcck_clock);
218    else
219        set_clock_comparator(S390_lowcore.clock_comparator);
220    /* Check if old PSW is valid */
221    if (!mci->wp)
222        /*
223         * Can't tell if we come from user or kernel mode
224         * -> stopping machine.
225         */
226        s390_handle_damage("old psw invalid.");
227
228    if (!mci->ms || !mci->pm || !mci->ia)
229        kill_task = 1;
230
231    return kill_task;
232}
233
234#define MAX_IPD_COUNT 29
235#define MAX_IPD_TIME (5 * 60 * USEC_PER_SEC) /* 5 minutes */
236
237#define ED_STP_ISLAND 6 /* External damage STP island check */
238#define ED_STP_SYNC 7 /* External damage STP sync check */
239#define ED_ETR_SYNC 12 /* External damage ETR sync check */
240#define ED_ETR_SWITCH 13 /* External damage ETR switch to local */
241
242/*
243 * machine check handler.
244 */
245void notrace s390_do_machine_check(struct pt_regs *regs)
246{
247    static int ipd_count;
248    static DEFINE_SPINLOCK(ipd_lock);
249    static unsigned long long last_ipd;
250    struct mcck_struct *mcck;
251    unsigned long long tmp;
252    struct mci *mci;
253    int umode;
254
255    nmi_enter();
256    s390_idle_check(regs, S390_lowcore.mcck_clock,
257            S390_lowcore.mcck_enter_timer);
258
259    mci = (struct mci *) &S390_lowcore.mcck_interruption_code;
260    mcck = &__get_cpu_var(cpu_mcck);
261    umode = user_mode(regs);
262
263    if (mci->sd) {
264        /* System damage -> stopping machine */
265        s390_handle_damage("received system damage machine check.");
266    }
267    if (mci->pd) {
268        if (mci->b) {
269            /* Processing backup -> verify if we can survive this */
270            u64 z_mcic, o_mcic, t_mcic;
271#ifdef CONFIG_64BIT
272            z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<29);
273            o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 |
274                  1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 |
275                  1ULL<<30 | 1ULL<<21 | 1ULL<<20 | 1ULL<<17 |
276                  1ULL<<16);
277#else
278            z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<57 | 1ULL<<50 |
279                  1ULL<<29);
280            o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 |
281                  1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 |
282                  1ULL<<30 | 1ULL<<20 | 1ULL<<17 | 1ULL<<16);
283#endif
284            t_mcic = *(u64 *)mci;
285
286            if (((t_mcic & z_mcic) != 0) ||
287                ((t_mcic & o_mcic) != o_mcic)) {
288                s390_handle_damage("processing backup machine "
289                           "check with damage.");
290            }
291
292            /*
293             * Nullifying exigent condition, therefore we might
294             * retry this instruction.
295             */
296            spin_lock(&ipd_lock);
297            tmp = get_clock();
298            if (((tmp - last_ipd) >> 12) < MAX_IPD_TIME)
299                ipd_count++;
300            else
301                ipd_count = 1;
302            last_ipd = tmp;
303            if (ipd_count == MAX_IPD_COUNT)
304                s390_handle_damage("too many ipd retries.");
305            spin_unlock(&ipd_lock);
306        } else {
307            /* Processing damage -> stopping machine */
308            s390_handle_damage("received instruction processing "
309                       "damage machine check.");
310        }
311    }
312    if (s390_revalidate_registers(mci)) {
313        if (umode) {
314            /*
315             * Couldn't restore all register contents while in
316             * user mode -> mark task for termination.
317             */
318            mcck->kill_task = 1;
319            mcck->mcck_code = *(unsigned long long *) mci;
320            set_thread_flag(TIF_MCCK_PENDING);
321        } else {
322            /*
323             * Couldn't restore all register contents while in
324             * kernel mode -> stopping machine.
325             */
326            s390_handle_damage("unable to revalidate registers.");
327        }
328    }
329    if (mci->cd) {
330        /* Timing facility damage */
331        s390_handle_damage("TOD clock damaged");
332    }
333    if (mci->ed && mci->ec) {
334        /* External damage */
335        if (S390_lowcore.external_damage_code & (1U << ED_ETR_SYNC))
336            etr_sync_check();
337        if (S390_lowcore.external_damage_code & (1U << ED_ETR_SWITCH))
338            etr_switch_to_local();
339        if (S390_lowcore.external_damage_code & (1U << ED_STP_SYNC))
340            stp_sync_check();
341        if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND))
342            stp_island_check();
343    }
344    if (mci->se)
345        /* Storage error uncorrected */
346        s390_handle_damage("received storage error uncorrected "
347                   "machine check.");
348    if (mci->ke)
349        /* Storage key-error uncorrected */
350        s390_handle_damage("received storage key-error uncorrected "
351                   "machine check.");
352    if (mci->ds && mci->fa)
353        /* Storage degradation */
354        s390_handle_damage("received storage degradation machine "
355                   "check.");
356    if (mci->cp) {
357        /* Channel report word pending */
358        mcck->channel_report = 1;
359        set_thread_flag(TIF_MCCK_PENDING);
360    }
361    if (mci->w) {
362        /* Warning pending */
363        mcck->warning = 1;
364        set_thread_flag(TIF_MCCK_PENDING);
365    }
366    nmi_exit();
367}
368
369static int __init machine_check_init(void)
370{
371    ctl_set_bit(14, 25); /* enable external damage MCH */
372    ctl_set_bit(14, 27); /* enable system recovery MCH */
373    ctl_set_bit(14, 24); /* enable warning MCH */
374    return 0;
375}
376arch_initcall(machine_check_init);
377

Archive Download this file



interactive