Root/Documentation/ia64/err_inject.txt

1
2IPF Machine Check (MC) error inject tool
3========================================
4
5IPF Machine Check (MC) error inject tool is used to inject MC
6errors from Linux. The tool is a test bed for IPF MC work flow including
7hardware correctable error handling, OS recoverable error handling, MC
8event logging, etc.
9
10The tool includes two parts: a kernel driver and a user application
11sample. The driver provides interface to PAL to inject error
12and query error injection capabilities. The driver code is in
13arch/ia64/kernel/err_inject.c. The application sample (shown below)
14provides a combination of various errors and calls the driver's interface
15(sysfs interface) to inject errors or query error injection capabilities.
16
17The tool can be used to test Intel IPF machine MC handling capabilities.
18It's especially useful for people who can not access hardware MC injection
19tool to inject error. It's also very useful to integrate with other
20software test suits to do stressful testing on IPF.
21
22Below is a sample application as part of the whole tool. The sample
23can be used as a working test tool. Or it can be expanded to include
24more features. It also can be a integrated into a library or other user
25application to have more thorough test.
26
27The sample application takes err.conf as error configuration input. GCC
28compiles the code. After you install err_inject driver, you can run
29this sample application to inject errors.
30
31Errata: Itanium 2 Processors Specification Update lists some errata against
32the pal_mc_error_inject PAL procedure. The following err.conf has been tested
33on latest Montecito PAL.
34
35err.conf:
36
37#This is configuration file for err_inject_tool.
38#The format of the each line is:
39#cpu, loop, interval, err_type_info, err_struct_info, err_data_buffer
40#where
41# cpu: logical cpu number the error will be inject in.
42# loop: times the error will be injected.
43# interval: In second. every so often one error is injected.
44# err_type_info, err_struct_info: PAL parameters.
45#
46#Note: All values are hex w/o or w/ 0x prefix.
47
48
49#On cpu2, inject only total 0x10 errors, interval 5 seconds
50#corrected, data cache, hier-2, physical addr(assigned by tool code).
51#working on Montecito latest PAL.
522, 10, 5, 4101, 95
53
54#On cpu4, inject and consume total 0x10 errors, interval 5 seconds
55#corrected, data cache, hier-2, physical addr(assigned by tool code).
56#working on Montecito latest PAL.
574, 10, 5, 4109, 95
58
59#On cpu15, inject and consume total 0x10 errors, interval 5 seconds
60#recoverable, DTR0, hier-2.
61#working on Montecito latest PAL.
620xf, 0x10, 5, 4249, 15
63
64The sample application source code:
65
66err_injection_tool.c:
67
68/*
69 * This program is free software; you can redistribute it and/or modify
70 * it under the terms of the GNU General Public License as published by
71 * the Free Software Foundation; either version 2 of the License, or
72 * (at your option) any later version.
73 *
74 * This program is distributed in the hope that it will be useful, but
75 * WITHOUT ANY WARRANTY; without even the implied warranty of
76 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
77 * NON INFRINGEMENT. See the GNU General Public License for more
78 * details.
79 *
80 * You should have received a copy of the GNU General Public License
81 * along with this program; if not, write to the Free Software
82 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
83 *
84 * Copyright (C) 2006 Intel Co
85 * Fenghua Yu <fenghua.yu@intel.com>
86 *
87 */
88#include <sys/types.h>
89#include <sys/stat.h>
90#include <fcntl.h>
91#include <stdio.h>
92#include <sched.h>
93#include <unistd.h>
94#include <stdlib.h>
95#include <stdarg.h>
96#include <string.h>
97#include <errno.h>
98#include <time.h>
99#include <sys/ipc.h>
100#include <sys/sem.h>
101#include <sys/wait.h>
102#include <sys/mman.h>
103#include <sys/shm.h>
104
105#define MAX_FN_SIZE 256
106#define MAX_BUF_SIZE 256
107#define DATA_BUF_SIZE 256
108#define NR_CPUS 512
109#define MAX_TASK_NUM 2048
110#define MIN_INTERVAL 5 // seconds
111#define ERR_DATA_BUFFER_SIZE 3 // Three 8-byte.
112#define PARA_FIELD_NUM 5
113#define MASK_SIZE (NR_CPUS/64)
114#define PATH_FORMAT "/sys/devices/system/cpu/cpu%d/err_inject/"
115
116int sched_setaffinity(pid_t pid, unsigned int len, unsigned long *mask);
117
118int verbose;
119#define vbprintf if (verbose) printf
120
121int log_info(int cpu, const char *fmt, ...)
122{
123    FILE *log;
124    char fn[MAX_FN_SIZE];
125    char buf[MAX_BUF_SIZE];
126    va_list args;
127
128    sprintf(fn, "%d.log", cpu);
129    log=fopen(fn, "a+");
130    if (log==NULL) {
131        perror("Error open:");
132        return -1;
133    }
134
135    va_start(args, fmt);
136    vprintf(fmt, args);
137    memset(buf, 0, MAX_BUF_SIZE);
138    vsprintf(buf, fmt, args);
139    va_end(args);
140
141    fwrite(buf, sizeof(buf), 1, log);
142    fclose(log);
143
144    return 0;
145}
146
147typedef unsigned long u64;
148typedef unsigned int u32;
149
150typedef union err_type_info_u {
151    struct {
152        u64 mode : 3, /* 0-2 */
153            err_inj : 3, /* 3-5 */
154            err_sev : 2, /* 6-7 */
155            err_struct : 5, /* 8-12 */
156            struct_hier : 3, /* 13-15 */
157            reserved : 48; /* 16-63 */
158    } err_type_info_u;
159    u64 err_type_info;
160} err_type_info_t;
161
162typedef union err_struct_info_u {
163    struct {
164        u64 siv : 1, /* 0 */
165            c_t : 2, /* 1-2 */
166            cl_p : 3, /* 3-5 */
167            cl_id : 3, /* 6-8 */
168            cl_dp : 1, /* 9 */
169            reserved1 : 22, /* 10-31 */
170            tiv : 1, /* 32 */
171            trigger : 4, /* 33-36 */
172            trigger_pl : 3, /* 37-39 */
173            reserved2 : 24; /* 40-63 */
174    } err_struct_info_cache;
175    struct {
176        u64 siv : 1, /* 0 */
177            tt : 2, /* 1-2 */
178            tc_tr : 2, /* 3-4 */
179            tr_slot : 8, /* 5-12 */
180            reserved1 : 19, /* 13-31 */
181            tiv : 1, /* 32 */
182            trigger : 4, /* 33-36 */
183            trigger_pl : 3, /* 37-39 */
184            reserved2 : 24; /* 40-63 */
185    } err_struct_info_tlb;
186    struct {
187        u64 siv : 1, /* 0 */
188            regfile_id : 4, /* 1-4 */
189            reg_num : 7, /* 5-11 */
190            reserved1 : 20, /* 12-31 */
191            tiv : 1, /* 32 */
192            trigger : 4, /* 33-36 */
193            trigger_pl : 3, /* 37-39 */
194            reserved2 : 24; /* 40-63 */
195    } err_struct_info_register;
196    struct {
197        u64 reserved;
198    } err_struct_info_bus_processor_interconnect;
199    u64 err_struct_info;
200} err_struct_info_t;
201
202typedef union err_data_buffer_u {
203    struct {
204        u64 trigger_addr; /* 0-63 */
205        u64 inj_addr; /* 64-127 */
206        u64 way : 5, /* 128-132 */
207            index : 20, /* 133-152 */
208                    : 39; /* 153-191 */
209    } err_data_buffer_cache;
210    struct {
211        u64 trigger_addr; /* 0-63 */
212        u64 inj_addr; /* 64-127 */
213        u64 way : 5, /* 128-132 */
214            index : 20, /* 133-152 */
215            reserved : 39; /* 153-191 */
216    } err_data_buffer_tlb;
217    struct {
218        u64 trigger_addr; /* 0-63 */
219    } err_data_buffer_register;
220    struct {
221        u64 reserved; /* 0-63 */
222    } err_data_buffer_bus_processor_interconnect;
223    u64 err_data_buffer[ERR_DATA_BUFFER_SIZE];
224} err_data_buffer_t;
225
226typedef union capabilities_u {
227    struct {
228        u64 i : 1,
229            d : 1,
230            rv : 1,
231            tag : 1,
232            data : 1,
233            mesi : 1,
234            dp : 1,
235            reserved1 : 3,
236            pa : 1,
237            va : 1,
238            wi : 1,
239            reserved2 : 20,
240            trigger : 1,
241            trigger_pl : 1,
242            reserved3 : 30;
243    } capabilities_cache;
244    struct {
245        u64 d : 1,
246            i : 1,
247            rv : 1,
248            tc : 1,
249            tr : 1,
250            reserved1 : 27,
251            trigger : 1,
252            trigger_pl : 1,
253            reserved2 : 30;
254    } capabilities_tlb;
255    struct {
256        u64 gr_b0 : 1,
257            gr_b1 : 1,
258            fr : 1,
259            br : 1,
260            pr : 1,
261            ar : 1,
262            cr : 1,
263            rr : 1,
264            pkr : 1,
265            dbr : 1,
266            ibr : 1,
267            pmc : 1,
268            pmd : 1,
269            reserved1 : 3,
270            regnum : 1,
271            reserved2 : 15,
272            trigger : 1,
273            trigger_pl : 1,
274            reserved3 : 30;
275    } capabilities_register;
276    struct {
277        u64 reserved;
278    } capabilities_bus_processor_interconnect;
279} capabilities_t;
280
281typedef struct resources_s {
282    u64 ibr0 : 1,
283        ibr2 : 1,
284        ibr4 : 1,
285        ibr6 : 1,
286        dbr0 : 1,
287        dbr2 : 1,
288        dbr4 : 1,
289        dbr6 : 1,
290        reserved : 48;
291} resources_t;
292
293
294long get_page_size(void)
295{
296    long page_size=sysconf(_SC_PAGESIZE);
297    return page_size;
298}
299
300#define PAGE_SIZE (get_page_size()==-1?0x4000:get_page_size())
301#define SHM_SIZE (2*PAGE_SIZE*NR_CPUS)
302#define SHM_VA 0x2000000100000000
303
304int shmid;
305void *shmaddr;
306
307int create_shm(void)
308{
309    key_t key;
310    char fn[MAX_FN_SIZE];
311
312    /* cpu0 is always existing */
313    sprintf(fn, PATH_FORMAT, 0);
314    if ((key = ftok(fn, 's')) == -1) {
315        perror("ftok");
316        return -1;
317    }
318
319    shmid = shmget(key, SHM_SIZE, 0644 | IPC_CREAT);
320    if (shmid == -1) {
321        if (errno==EEXIST) {
322            shmid = shmget(key, SHM_SIZE, 0);
323            if (shmid == -1) {
324                perror("shmget");
325                return -1;
326            }
327        }
328        else {
329            perror("shmget");
330            return -1;
331        }
332    }
333    vbprintf("shmid=%d", shmid);
334
335    /* connect to the segment: */
336    shmaddr = shmat(shmid, (void *)SHM_VA, 0);
337    if (shmaddr == (void*)-1) {
338        perror("shmat");
339        return -1;
340    }
341
342    memset(shmaddr, 0, SHM_SIZE);
343    mlock(shmaddr, SHM_SIZE);
344
345    return 0;
346}
347
348int free_shm()
349{
350    munlock(shmaddr, SHM_SIZE);
351        shmdt(shmaddr);
352    semctl(shmid, 0, IPC_RMID);
353
354    return 0;
355}
356
357#ifdef _SEM_SEMUN_UNDEFINED
358union semun
359{
360    int val;
361    struct semid_ds *buf;
362    unsigned short int *array;
363    struct seminfo *__buf;
364};
365#endif
366
367u32 mode=1; /* 1: physical mode; 2: virtual mode. */
368int one_lock=1;
369key_t key[NR_CPUS];
370int semid[NR_CPUS];
371
372int create_sem(int cpu)
373{
374    union semun arg;
375    char fn[MAX_FN_SIZE];
376    int sid;
377
378    sprintf(fn, PATH_FORMAT, cpu);
379    sprintf(fn, "%s/%s", fn, "err_type_info");
380    if ((key[cpu] = ftok(fn, 'e')) == -1) {
381        perror("ftok");
382        return -1;
383    }
384
385    if (semid[cpu]!=0)
386        return 0;
387
388    /* clear old semaphore */
389    if ((sid = semget(key[cpu], 1, 0)) != -1)
390        semctl(sid, 0, IPC_RMID);
391
392    /* get one semaphore */
393    if ((semid[cpu] = semget(key[cpu], 1, IPC_CREAT | IPC_EXCL)) == -1) {
394        perror("semget");
395        printf("Please remove semaphore with key=0x%lx, then run the tool.\n",
396            (u64)key[cpu]);
397        return -1;
398    }
399
400    vbprintf("semid[%d]=0x%lx, key[%d]=%lx\n",cpu,(u64)semid[cpu],cpu,
401        (u64)key[cpu]);
402    /* initialize the semaphore to 1: */
403    arg.val = 1;
404    if (semctl(semid[cpu], 0, SETVAL, arg) == -1) {
405        perror("semctl");
406        return -1;
407    }
408
409    return 0;
410}
411
412static int lock(int cpu)
413{
414    struct sembuf lock;
415
416    lock.sem_num = cpu;
417    lock.sem_op = 1;
418    semop(semid[cpu], &lock, 1);
419
420        return 0;
421}
422
423static int unlock(int cpu)
424{
425    struct sembuf unlock;
426
427    unlock.sem_num = cpu;
428    unlock.sem_op = -1;
429    semop(semid[cpu], &unlock, 1);
430
431        return 0;
432}
433
434void free_sem(int cpu)
435{
436    semctl(semid[cpu], 0, IPC_RMID);
437}
438
439int wr_multi(char *fn, unsigned long *data, int size)
440{
441    int fd;
442    char buf[MAX_BUF_SIZE];
443    int ret;
444
445    if (size==1)
446        sprintf(buf, "%lx", *data);
447    else if (size==3)
448        sprintf(buf, "%lx,%lx,%lx", data[0], data[1], data[2]);
449    else {
450        fprintf(stderr,"write to file with wrong size!\n");
451        return -1;
452    }
453
454    fd=open(fn, O_RDWR);
455    if (!fd) {
456        perror("Error:");
457        return -1;
458    }
459    ret=write(fd, buf, sizeof(buf));
460    close(fd);
461    return ret;
462}
463
464int wr(char *fn, unsigned long data)
465{
466    return wr_multi(fn, &data, 1);
467}
468
469int rd(char *fn, unsigned long *data)
470{
471    int fd;
472    char buf[MAX_BUF_SIZE];
473
474    fd=open(fn, O_RDONLY);
475    if (fd<0) {
476        perror("Error:");
477        return -1;
478    }
479    read(fd, buf, MAX_BUF_SIZE);
480    *data=strtoul(buf, NULL, 16);
481    close(fd);
482    return 0;
483}
484
485int rd_status(char *path, int *status)
486{
487    char fn[MAX_FN_SIZE];
488    sprintf(fn, "%s/status", path);
489    if (rd(fn, (u64*)status)<0) {
490        perror("status reading error.\n");
491        return -1;
492    }
493
494    return 0;
495}
496
497int rd_capabilities(char *path, u64 *capabilities)
498{
499    char fn[MAX_FN_SIZE];
500    sprintf(fn, "%s/capabilities", path);
501    if (rd(fn, capabilities)<0) {
502        perror("capabilities reading error.\n");
503        return -1;
504    }
505
506    return 0;
507}
508
509int rd_all(char *path)
510{
511    unsigned long err_type_info, err_struct_info, err_data_buffer;
512    int status;
513    unsigned long capabilities, resources;
514    char fn[MAX_FN_SIZE];
515
516    sprintf(fn, "%s/err_type_info", path);
517    if (rd(fn, &err_type_info)<0) {
518        perror("err_type_info reading error.\n");
519        return -1;
520    }
521    printf("err_type_info=%lx\n", err_type_info);
522
523    sprintf(fn, "%s/err_struct_info", path);
524    if (rd(fn, &err_struct_info)<0) {
525        perror("err_struct_info reading error.\n");
526        return -1;
527    }
528    printf("err_struct_info=%lx\n", err_struct_info);
529
530    sprintf(fn, "%s/err_data_buffer", path);
531    if (rd(fn, &err_data_buffer)<0) {
532        perror("err_data_buffer reading error.\n");
533        return -1;
534    }
535    printf("err_data_buffer=%lx\n", err_data_buffer);
536
537    sprintf(fn, "%s/status", path);
538    if (rd("status", (u64*)&status)<0) {
539        perror("status reading error.\n");
540        return -1;
541    }
542    printf("status=%d\n", status);
543
544    sprintf(fn, "%s/capabilities", path);
545    if (rd(fn,&capabilities)<0) {
546        perror("capabilities reading error.\n");
547        return -1;
548    }
549    printf("capabilities=%lx\n", capabilities);
550
551    sprintf(fn, "%s/resources", path);
552    if (rd(fn, &resources)<0) {
553        perror("resources reading error.\n");
554        return -1;
555    }
556    printf("resources=%lx\n", resources);
557
558    return 0;
559}
560
561int query_capabilities(char *path, err_type_info_t err_type_info,
562            u64 *capabilities)
563{
564    char fn[MAX_FN_SIZE];
565    err_struct_info_t err_struct_info;
566    err_data_buffer_t err_data_buffer;
567
568    err_struct_info.err_struct_info=0;
569    memset(err_data_buffer.err_data_buffer, -1, ERR_DATA_BUFFER_SIZE*8);
570
571    sprintf(fn, "%s/err_type_info", path);
572    wr(fn, err_type_info.err_type_info);
573    sprintf(fn, "%s/err_struct_info", path);
574    wr(fn, 0x0);
575    sprintf(fn, "%s/err_data_buffer", path);
576    wr_multi(fn, err_data_buffer.err_data_buffer, ERR_DATA_BUFFER_SIZE);
577
578    // Fire pal_mc_error_inject procedure.
579    sprintf(fn, "%s/call_start", path);
580    wr(fn, mode);
581
582    if (rd_capabilities(path, capabilities)<0)
583        return -1;
584
585    return 0;
586}
587
588int query_all_capabilities()
589{
590    int status;
591    err_type_info_t err_type_info;
592    int err_sev, err_struct, struct_hier;
593    int cap=0;
594    u64 capabilities;
595    char path[MAX_FN_SIZE];
596
597    err_type_info.err_type_info=0; // Initial
598    err_type_info.err_type_info_u.mode=0; // Query mode;
599    err_type_info.err_type_info_u.err_inj=0;
600
601    printf("All capabilities implemented in pal_mc_error_inject:\n");
602    sprintf(path, PATH_FORMAT ,0);
603    for (err_sev=0;err_sev<3;err_sev++)
604        for (err_struct=0;err_struct<5;err_struct++)
605            for (struct_hier=0;struct_hier<5;struct_hier++)
606    {
607        status=-1;
608        capabilities=0;
609        err_type_info.err_type_info_u.err_sev=err_sev;
610        err_type_info.err_type_info_u.err_struct=err_struct;
611        err_type_info.err_type_info_u.struct_hier=struct_hier;
612
613        if (query_capabilities(path, err_type_info, &capabilities)<0)
614            continue;
615
616        if (rd_status(path, &status)<0)
617            continue;
618
619        if (status==0) {
620            cap=1;
621            printf("For err_sev=%d, err_struct=%d, struct_hier=%d: ",
622                err_sev, err_struct, struct_hier);
623            printf("capabilities 0x%lx\n", capabilities);
624        }
625    }
626    if (!cap) {
627        printf("No capabilities supported.\n");
628        return 0;
629    }
630
631    return 0;
632}
633
634int err_inject(int cpu, char *path, err_type_info_t err_type_info,
635        err_struct_info_t err_struct_info,
636        err_data_buffer_t err_data_buffer)
637{
638    int status;
639    char fn[MAX_FN_SIZE];
640
641    log_info(cpu, "err_type_info=%lx, err_struct_info=%lx, ",
642        err_type_info.err_type_info,
643        err_struct_info.err_struct_info);
644    log_info(cpu,"err_data_buffer=[%lx,%lx,%lx]\n",
645        err_data_buffer.err_data_buffer[0],
646        err_data_buffer.err_data_buffer[1],
647        err_data_buffer.err_data_buffer[2]);
648    sprintf(fn, "%s/err_type_info", path);
649    wr(fn, err_type_info.err_type_info);
650    sprintf(fn, "%s/err_struct_info", path);
651    wr(fn, err_struct_info.err_struct_info);
652    sprintf(fn, "%s/err_data_buffer", path);
653    wr_multi(fn, err_data_buffer.err_data_buffer, ERR_DATA_BUFFER_SIZE);
654
655    // Fire pal_mc_error_inject procedure.
656    sprintf(fn, "%s/call_start", path);
657    wr(fn,mode);
658
659    if (rd_status(path, &status)<0) {
660        vbprintf("fail: read status\n");
661        return -100;
662    }
663
664    if (status!=0) {
665        log_info(cpu, "fail: status=%d\n", status);
666        return status;
667    }
668
669    return status;
670}
671
672static int construct_data_buf(char *path, err_type_info_t err_type_info,
673        err_struct_info_t err_struct_info,
674        err_data_buffer_t *err_data_buffer,
675        void *va1)
676{
677    char fn[MAX_FN_SIZE];
678    u64 virt_addr=0, phys_addr=0;
679
680    vbprintf("va1=%lx\n", (u64)va1);
681    memset(&err_data_buffer->err_data_buffer_cache, 0, ERR_DATA_BUFFER_SIZE*8);
682
683    switch (err_type_info.err_type_info_u.err_struct) {
684        case 1: // Cache
685            switch (err_struct_info.err_struct_info_cache.cl_id) {
686                case 1: //Virtual addr
687                    err_data_buffer->err_data_buffer_cache.inj_addr=(u64)va1;
688                    break;
689                case 2: //Phys addr
690                    sprintf(fn, "%s/virtual_to_phys", path);
691                    virt_addr=(u64)va1;
692                    if (wr(fn,virt_addr)<0)
693                        return -1;
694                    rd(fn, &phys_addr);
695                    err_data_buffer->err_data_buffer_cache.inj_addr=phys_addr;
696                    break;
697                default:
698                    printf("Not supported cl_id\n");
699                    break;
700            }
701            break;
702        case 2: // TLB
703            break;
704        case 3: // Register file
705            break;
706        case 4: // Bus/system interconnect
707        default:
708            printf("Not supported err_struct\n");
709            break;
710    }
711
712    return 0;
713}
714
715typedef struct {
716    u64 cpu;
717    u64 loop;
718    u64 interval;
719    u64 err_type_info;
720    u64 err_struct_info;
721    u64 err_data_buffer[ERR_DATA_BUFFER_SIZE];
722} parameters_t;
723
724parameters_t line_para;
725int para;
726
727static int empty_data_buffer(u64 *err_data_buffer)
728{
729    int empty=1;
730    int i;
731
732    for (i=0;i<ERR_DATA_BUFFER_SIZE; i++)
733       if (err_data_buffer[i]!=-1)
734        empty=0;
735
736    return empty;
737}
738
739int err_inj()
740{
741    err_type_info_t err_type_info;
742    err_struct_info_t err_struct_info;
743    err_data_buffer_t err_data_buffer;
744    int count;
745    FILE *fp;
746    unsigned long cpu, loop, interval, err_type_info_conf, err_struct_info_conf;
747    u64 err_data_buffer_conf[ERR_DATA_BUFFER_SIZE];
748    int num;
749    int i;
750    char path[MAX_FN_SIZE];
751    parameters_t parameters[MAX_TASK_NUM]={};
752    pid_t child_pid[MAX_TASK_NUM];
753    time_t current_time;
754    int status;
755
756    if (!para) {
757        fp=fopen("err.conf", "r");
758        if (fp==NULL) {
759        perror("Error open err.conf");
760        return -1;
761        }
762
763        num=0;
764        while (!feof(fp)) {
765        char buf[256];
766        memset(buf,0,256);
767        fgets(buf, 256, fp);
768        count=sscanf(buf, "%lx, %lx, %lx, %lx, %lx, %lx, %lx, %lx\n",
769                &cpu, &loop, &interval,&err_type_info_conf,
770                &err_struct_info_conf,
771                &err_data_buffer_conf[0],
772                &err_data_buffer_conf[1],
773                &err_data_buffer_conf[2]);
774        if (count!=PARA_FIELD_NUM+3) {
775            err_data_buffer_conf[0]=-1;
776            err_data_buffer_conf[1]=-1;
777            err_data_buffer_conf[2]=-1;
778            count=sscanf(buf, "%lx, %lx, %lx, %lx, %lx\n",
779                &cpu, &loop, &interval,&err_type_info_conf,
780                &err_struct_info_conf);
781            if (count!=PARA_FIELD_NUM)
782                continue;
783        }
784
785        parameters[num].cpu=cpu;
786        parameters[num].loop=loop;
787        parameters[num].interval= interval>MIN_INTERVAL
788                      ?interval:MIN_INTERVAL;
789        parameters[num].err_type_info=err_type_info_conf;
790        parameters[num].err_struct_info=err_struct_info_conf;
791        memcpy(parameters[num++].err_data_buffer,
792            err_data_buffer_conf,ERR_DATA_BUFFER_SIZE*8) ;
793
794        if (num>=MAX_TASK_NUM)
795            break;
796        }
797    }
798    else {
799        parameters[0].cpu=line_para.cpu;
800        parameters[0].loop=line_para.loop;
801        parameters[0].interval= line_para.interval>MIN_INTERVAL
802                      ?line_para.interval:MIN_INTERVAL;
803        parameters[0].err_type_info=line_para.err_type_info;
804        parameters[0].err_struct_info=line_para.err_struct_info;
805        memcpy(parameters[0].err_data_buffer,
806            line_para.err_data_buffer,ERR_DATA_BUFFER_SIZE*8) ;
807
808        num=1;
809    }
810
811    /* Create semaphore: If one_lock, one semaphore for all processors.
812       Otherwise, one semaphore for each processor. */
813    if (one_lock) {
814        if (create_sem(0)) {
815            printf("Can not create semaphore...exit\n");
816            free_sem(0);
817            return -1;
818        }
819    }
820    else {
821        for (i=0;i<num;i++) {
822           if (create_sem(parameters[i].cpu)) {
823            printf("Can not create semaphore for cpu%d...exit\n",i);
824            free_sem(parameters[num].cpu);
825            return -1;
826           }
827        }
828    }
829
830    /* Create a shm segment which will be used to inject/consume errors on.*/
831    if (create_shm()==-1) {
832        printf("Error to create shm...exit\n");
833        return -1;
834    }
835
836    for (i=0;i<num;i++) {
837        pid_t pid;
838
839        current_time=time(NULL);
840        log_info(parameters[i].cpu, "\nBegine at %s", ctime(&current_time));
841        log_info(parameters[i].cpu, "Configurations:\n");
842        log_info(parameters[i].cpu,"On cpu%ld: loop=%lx, interval=%lx(s)",
843            parameters[i].cpu,
844            parameters[i].loop,
845            parameters[i].interval);
846        log_info(parameters[i].cpu," err_type_info=%lx,err_struct_info=%lx\n",
847            parameters[i].err_type_info,
848            parameters[i].err_struct_info);
849
850        sprintf(path, PATH_FORMAT, (int)parameters[i].cpu);
851        err_type_info.err_type_info=parameters[i].err_type_info;
852        err_struct_info.err_struct_info=parameters[i].err_struct_info;
853        memcpy(err_data_buffer.err_data_buffer,
854            parameters[i].err_data_buffer,
855            ERR_DATA_BUFFER_SIZE*8);
856
857        pid=fork();
858        if (pid==0) {
859            unsigned long mask[MASK_SIZE];
860            int j, k;
861
862            void *va1, *va2;
863
864            /* Allocate two memory areas va1 and va2 in shm */
865            va1=shmaddr+parameters[i].cpu*PAGE_SIZE;
866            va2=shmaddr+parameters[i].cpu*PAGE_SIZE+PAGE_SIZE;
867
868            vbprintf("va1=%lx, va2=%lx\n", (u64)va1, (u64)va2);
869            memset(va1, 0x1, PAGE_SIZE);
870            memset(va2, 0x2, PAGE_SIZE);
871
872            if (empty_data_buffer(err_data_buffer.err_data_buffer))
873                /* If not specified yet, construct data buffer
874                 * with va1
875                 */
876                construct_data_buf(path, err_type_info,
877                    err_struct_info, &err_data_buffer,va1);
878
879            for (j=0;j<MASK_SIZE;j++)
880                mask[j]=0;
881
882            cpu=parameters[i].cpu;
883            k = cpu%64;
884            j = cpu/64;
885            mask[j]=1<<k;
886
887            if (sched_setaffinity(0, MASK_SIZE*8, mask)==-1) {
888                perror("Error sched_setaffinity:");
889                return -1;
890            }
891
892            for (j=0; j<parameters[i].loop; j++) {
893                log_info(parameters[i].cpu,"Injection ");
894                log_info(parameters[i].cpu,"on cpu%ld: #%d/%ld ",
895
896                    parameters[i].cpu,j+1, parameters[i].loop);
897
898                /* Hold the lock */
899                if (one_lock)
900                    lock(0);
901                else
902                /* Hold lock on this cpu */
903                    lock(parameters[i].cpu);
904
905                if ((status=err_inject(parameters[i].cpu,
906                       path, err_type_info,
907                       err_struct_info, err_data_buffer))
908                       ==0) {
909                    /* consume the error for "inject only"*/
910                    memcpy(va2, va1, PAGE_SIZE);
911                    memcpy(va1, va2, PAGE_SIZE);
912                    log_info(parameters[i].cpu,
913                        "successful\n");
914                }
915                else {
916                    log_info(parameters[i].cpu,"fail:");
917                    log_info(parameters[i].cpu,
918                        "status=%d\n", status);
919                    unlock(parameters[i].cpu);
920                    break;
921                }
922                if (one_lock)
923                /* Release the lock */
924                    unlock(0);
925                /* Release lock on this cpu */
926                else
927                    unlock(parameters[i].cpu);
928
929                if (j < parameters[i].loop-1)
930                    sleep(parameters[i].interval);
931            }
932            current_time=time(NULL);
933            log_info(parameters[i].cpu, "Done at %s", ctime(&current_time));
934            return 0;
935        }
936        else if (pid<0) {
937            perror("Error fork:");
938            continue;
939        }
940        child_pid[i]=pid;
941    }
942    for (i=0;i<num;i++)
943        waitpid(child_pid[i], NULL, 0);
944
945    if (one_lock)
946        free_sem(0);
947    else
948        for (i=0;i<num;i++)
949            free_sem(parameters[i].cpu);
950
951    printf("All done.\n");
952
953    return 0;
954}
955
956void help()
957{
958    printf("err_inject_tool:\n");
959    printf("\t-q: query all capabilities. default: off\n");
960    printf("\t-m: procedure mode. 1: physical 2: virtual. default: 1\n");
961    printf("\t-i: inject errors. default: off\n");
962    printf("\t-l: one lock per cpu. default: one lock for all\n");
963    printf("\t-e: error parameters:\n");
964    printf("\t\tcpu,loop,interval,err_type_info,err_struct_info[,err_data_buffer[0],err_data_buffer[1],err_data_buffer[2]]\n");
965    printf("\t\t cpu: logical cpu number the error will be inject in.\n");
966    printf("\t\t loop: times the error will be injected.\n");
967    printf("\t\t interval: In second. every so often one error is injected.\n");
968    printf("\t\t err_type_info, err_struct_info: PAL parameters.\n");
969    printf("\t\t err_data_buffer: PAL parameter. Optional. If not present,\n");
970    printf("\t\t it's constructed by tool automatically. Be\n");
971    printf("\t\t careful to provide err_data_buffer and make\n");
972    printf("\t\t sure it's working with the environment.\n");
973    printf("\t Note:no space between error parameters.\n");
974    printf("\t default: Take error parameters from err.conf instead of command line.\n");
975    printf("\t-v: verbose. default: off\n");
976    printf("\t-h: help\n\n");
977    printf("The tool will take err.conf file as ");
978    printf("input to inject single or multiple errors ");
979    printf("on one or multiple cpus in parallel.\n");
980}
981
982int main(int argc, char **argv)
983{
984    char c;
985    int do_err_inj=0;
986    int do_query_all=0;
987    int count;
988    u32 m;
989
990    /* Default one lock for all cpu's */
991    one_lock=1;
992    while ((c = getopt(argc, argv, "m:iqvhle:")) != EOF)
993        switch (c) {
994            case 'm': /* Procedure mode. 1: phys 2: virt */
995                count=sscanf(optarg, "%x", &m);
996                if (count!=1 || (m!=1 && m!=2)) {
997                    printf("Wrong mode number.\n");
998                    help();
999                    return -1;
1000                }
1001                mode=m;
1002                break;
1003            case 'i': /* Inject errors */
1004                do_err_inj=1;
1005                break;
1006            case 'q': /* Query */
1007                do_query_all=1;
1008                break;
1009            case 'v': /* Verbose */
1010                verbose=1;
1011                break;
1012            case 'l': /* One lock per cpu */
1013                one_lock=0;
1014                break;
1015            case 'e': /* error arguments */
1016                /* Take parameters:
1017                 * #cpu, loop, interval, err_type_info, err_struct_info[, err_data_buffer]
1018                 * err_data_buffer is optional. Recommend not to specify
1019                 * err_data_buffer. Better to use tool to generate it.
1020                 */
1021                count=sscanf(optarg,
1022                    "%lx, %lx, %lx, %lx, %lx, %lx, %lx, %lx\n",
1023                    &line_para.cpu,
1024                    &line_para.loop,
1025                    &line_para.interval,
1026                    &line_para.err_type_info,
1027                    &line_para.err_struct_info,
1028                    &line_para.err_data_buffer[0],
1029                    &line_para.err_data_buffer[1],
1030                    &line_para.err_data_buffer[2]);
1031                if (count!=PARA_FIELD_NUM+3) {
1032                    line_para.err_data_buffer[0]=-1,
1033                    line_para.err_data_buffer[1]=-1,
1034                     line_para.err_data_buffer[2]=-1;
1035                    count=sscanf(optarg, "%lx, %lx, %lx, %lx, %lx\n",
1036                        &line_para.cpu,
1037                        &line_para.loop,
1038                        &line_para.interval,
1039                        &line_para.err_type_info,
1040                        &line_para.err_struct_info);
1041                    if (count!=PARA_FIELD_NUM) {
1042                    printf("Wrong error arguments.\n");
1043                    help();
1044                    return -1;
1045                    }
1046                }
1047                para=1;
1048                break;
1049            continue;
1050                break;
1051            case 'h':
1052                help();
1053                return 0;
1054            default:
1055                break;
1056        }
1057
1058    if (do_query_all)
1059        query_all_capabilities();
1060    if (do_err_inj)
1061        err_inj();
1062
1063    if (!do_query_all && !do_err_inj)
1064        help();
1065
1066    return 0;
1067}
1068
1069

Archive Download this file



interactive