Date:2011-09-18 04:02:34 (7 years 8 months ago)
Author:Werner Almesberger
Commit:9a5a22eda5e74d79cb4b7b0ac1664cbb42691f9a
Message:m1/perf/: main.c was missing. Added on-going work.

Files: m1/perf/main.c (1 diff)
m1/perf/sched.c (1 diff)
m1/perf/try (1 diff)

Change Details

m1/perf/main.c
1#include <stdlib.h>
2#include <stdio.h>
3
4#include "compiler.h"
5
6
7#define BUF_SIZE 1000000
8
9
10static void report(const char *s)
11{
12    fprintf(stderr, "%s\n", s);
13}
14
15
16static void usage(const char *name)
17{
18    fprintf(stderr, "usage: %s patch-file [loops]\n", name);
19    exit(1);
20}
21
22
23int main(int argc, char **argv)
24{
25    char buf[BUF_SIZE];
26    const char *name;
27    FILE *file;
28    size_t got;
29    int loops = 1;
30    int i;
31
32    switch (argc) {
33    case 2:
34        break;
35    case 3:
36        loops = atoi(argv[2]);
37        break;
38    default:
39        usage(*argv);
40    }
41
42    name = argv[1];
43    file = fopen(name, "r");
44    if (!file) {
45        perror(name);
46        exit(1);
47    }
48    got = fread(buf, sizeof(buf)-1, 1, file);
49    if (got < 0) {
50        perror(name);
51        exit(1);
52    }
53    buf[got] = 0;
54    fclose(file);
55
56    for (i = 0; i != loops; i++)
57        patch_compile(buf, report);
58
59    return 0;
60}
m1/perf/sched.c
1/*
2 * sched.c - O(n) ... O(n^2) scheduler
3 *
4 * Written 2011 by Werner Almesberger
5 *
6 * Based on gfpus.c
7 * Copyright (C) 2007, 2008, 2009, 2010 Sebastien Bourdeauducq
8 *
9 * This program is free software: you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation, version 3 of the License.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program. If not, see <http://www.gnu.org/licenses/>.
20 */
21
22#include <stdlib.h>
23#include <stdio.h>
24#include <string.h>
25
26#include <fpvm/is.h>
27#include <fpvm/fpvm.h>
28#include <fpvm/pfpu.h>
29#include <fpvm/gfpus.h>
30
31#include <hw/pfpu.h>
32
33
34#define MAX_LATENCY 8 /* maximum latency; okay to make this bigger */
35
36
37#define Dprintf(...)
38
39
40struct list {
41    struct list *next, *prev;
42};
43
44
45static struct insn {
46    struct list more; /* more insns on same schedule */
47    struct fpvm_instruction *vm_insn;
48    struct data_ref {
49        struct list more; /* more refs sharing the data */
50        struct insn *insn; /* insn this is part of */
51        struct insn *dep; /* insn we depend on */
52    } opa, opb, dest, cond;
53    int arity;
54    int latency;
55    int unresolved; /* number of data refs we need before we can sched */
56    int earliest; /* earliest cycle dependencies seen so far are met */
57    struct list dependants; /* list of dependencies (constant) */
58    int num_dependants; /* number of unresolved dependencies */
59} insns[FPVM_MAXCODELEN];
60
61
62/* ----- Register initialization ------------------------------------------- */
63
64
65/*
66 * Straight from gfpus.c, only with some whitespace changes.
67 */
68
69static void get_registers(struct fpvm_fragment *fragment,
70    unsigned int *registers)
71{
72    int i;
73    union {
74        float f;
75        unsigned int n;
76    } fconv;
77
78    for(i = 0; i < fragment->nbindings; i++)
79               if (fragment->bindings[i].isvar)
80            registers[i] = 0;
81        else {
82            fconv.f = fragment->bindings[i].b.c;
83            registers[i] = fconv.n;
84        }
85    for(; i < PFPU_REG_COUNT; i++)
86        registers[i] = 0;
87}
88
89
90/* ----- Doubly-linked list ------------------------------------------------ */
91
92
93/*
94 * Use naming conventions of include/linux/list.h
95 */
96
97static void list_init(struct list *list)
98{
99    list->next = list->prev = list;
100}
101
102
103static void list_del(struct list *item)
104{
105    item->prev->next = item->next;
106    item->next->prev = item->prev;
107}
108
109
110static void *list_pop(struct list *list)
111{
112    struct list *first;
113
114    first = list->next;
115    if (first == list)
116        return NULL;
117    list_del(first);
118    return first;
119}
120
121
122static void list_add_tail(struct list *list, struct list *item)
123{
124    item->next = list;
125    item->prev = list->prev;
126    list->prev->next = item;
127    list->prev = item;
128}
129
130
131static void list_add(struct list *list, struct list *item)
132{
133    item->next = list->next;
134    item->prev = list;
135    list->next->prev = item;
136    list->next = item;
137}
138
139
140static void list_concat(struct list *a, struct list *b)
141{
142    if (b->next != b) {
143        a->prev->next = b->next;
144        b->next->prev = a->prev;
145        b->prev->next = a;
146        a->prev = b->prev;
147    }
148    list_init(b);
149}
150
151
152/*
153 * Do not delete elements from the list while traversing it with foreach !
154 */
155
156#define foreach(var, head) \
157    for (var = (void *) ((struct list *) (head))->next; \
158        (var) != (void *) (head); \
159        var = (void *) ((struct list *) (var))->next)
160
161
162/* ----- Register management ----------------------------------------------- */
163
164
165static struct vm_reg {
166    struct insn *setter; /* instruction setting it; NULL if none */
167    int pfpu_reg; /* underlying PFPU register */
168    int refs; /* usage count */
169} *regs;
170
171static struct list pfpu_regs[PFPU_REG_COUNT];
172static struct list unallocated; /* unallocated registers */
173static int nbindings; /* "public" bindings */
174
175
176static int reg2idx(int reg)
177{
178    return reg >= 0 ? reg : nbindings-reg;
179}
180
181
182static int alloc_reg(struct insn *setter)
183{
184    struct list *reg;
185    int vm_reg, pfpu_reg, vm_idx;
186
187    vm_reg = setter->vm_insn->dest;
188    if (vm_reg >= 0)
189        return vm_reg;
190    reg = list_pop(&unallocated);
191    if (!reg)
192        abort();
193    pfpu_reg = reg-pfpu_regs;
194Dprintf(" alloc reg %d -> %d\n", vm_reg, pfpu_reg);
195    vm_idx = reg2idx(vm_reg);
196    regs[vm_idx].setter = setter;
197    regs[vm_idx].pfpu_reg = pfpu_reg;
198    regs[vm_idx].refs = setter->num_dependants+1;
199    return pfpu_reg;
200}
201
202
203static void put_reg(struct insn *setter)
204{
205    int vm_reg, vm_idx;
206
207    if (!setter)
208        return;
209
210    vm_reg = setter->vm_insn->dest;
211    if (vm_reg >= 0)
212        return;
213
214    vm_idx = reg2idx(vm_reg);
215    if (--regs[vm_idx].refs)
216        return;
217
218Dprintf(" free reg %d\n", regs[vm_idx].pfpu_reg);
219    /*
220     * Prepend so that register numbers stay small and bugs reveal
221     * themselves more rapidly.
222     */
223    list_add(&unallocated, pfpu_regs+regs[vm_idx].pfpu_reg);
224
225    /* clear it for style only */
226    regs[vm_idx].setter = NULL;
227    regs[vm_idx].pfpu_reg = 0;
228}
229
230
231static int lookup_pfpu_reg(int vm_reg)
232{
233    return vm_reg >= 0 ? vm_reg : regs[reg2idx(vm_reg)].pfpu_reg;
234}
235
236
237static void init_registers(struct fpvm_fragment *fragment,
238    unsigned int *registers)
239{
240    size_t regs_size;
241    int i;
242
243    get_registers(fragment, registers);
244    nbindings = fragment->nbindings;
245
246    regs_size = sizeof(struct vm_reg)*(nbindings-fragment->next_sur);
247    regs = malloc(regs_size);
248    memset(regs, 0, regs_size);
249
250    list_init(&unallocated);
251    for (i = fragment->nbindings; i != PFPU_REG_COUNT; i++)
252        list_add_tail(&unallocated, pfpu_regs+i);
253
254/*
255 * @@@ the rules are more complex, see use of dont_touch in
256 * init_scheduler_state
257 */
258}
259
260
261/* ----- Instruction scheduler --------------------------------------------- */
262
263
264static struct list unscheduled; /* unscheduled insns */
265static struct list waiting; /* insns waiting to be scheduled */
266static struct list ready[PFPU_PROGSIZE]; /* insns ready at nth cycle */
267static struct insn *exits[PFPU_PROGSIZE+MAX_LATENCY];
268                    /* insn writing at nth cycle */
269static struct insn dummy_insn; /* dummy, to signal occupancy */
270
271
272static struct vm_reg *add_data_ref(struct insn *insn, struct data_ref *ref,
273    int reg_num)
274{
275    struct vm_reg *reg;
276
277    reg = regs+reg2idx(reg_num);
278    ref->insn = insn;
279    ref->dep = reg->setter;
280    if (ref->dep) {
281        list_add_tail(&ref->dep->dependants, &ref->more);
282        ref->dep->num_dependants++;
283        insn->unresolved++;
284Dprintf("insn %lu: reg %d setter %lu unresolved %d\n",
285   insn-insns, reg_num, reg->setter-insns, insn->unresolved);
286    } else {
287        list_init(&ref->more);
288    }
289    return reg;
290}
291
292
293int catch = 0;
294static void init_scheduler(struct fpvm_fragment *frag)
295{
296    int i;
297    struct insn *insn;
298
299    list_init(&unscheduled);
300    list_init(&waiting);
301    for (i = 0; i != PFPU_PROGSIZE; i++)
302        list_init(&ready[i]);
303
304#if 0
305if (frag->ninstructions > 10) {
306  frag->ninstructions = 10;
307catch = 1;
308}
309#endif
310    for (i = 0; i != frag->ninstructions; i++) {
311        insn = insns+i;
312        memset(insn, 0, sizeof(struct insn));
313        insn->vm_insn = frag->code+i;
314        insn->arity = fpvm_get_arity(frag->code[i].opcode);
315        insn->latency = pfpu_get_latency(frag->code[i].opcode);
316        list_init(&insn->dependants);
317        switch (insn->arity) {
318        case 3:
319            add_data_ref(insn, &insn->opb, FPVM_REG_IFB);
320            /* fall through */
321        case 2:
322            add_data_ref(insn, &insn->opb, frag->code[i].opb);
323            /* fall through */
324        case 1:
325            add_data_ref(insn, &insn->opa, frag->code[i].opa);
326            /* fall through */
327        case 0:
328            add_data_ref(insn,
329                &insn->dest, frag->code[i].dest)->setter = insn;
330            break;
331        default:
332            abort();
333        }
334        if (insn->unresolved)
335            list_add_tail(&unscheduled, &insn->more);
336        else
337            list_add_tail(&ready[0], &insn->more);
338
339    }
340
341    /*
342     * We add a few dummy instructions at the end so that we don't need to
343     * check array boundaries for the unlikely case of overrunning the
344     * schedule.
345     */
346    for (i = 0; i != PFPU_PROGSIZE; i++)
347        exits[i] = NULL;
348    for (; i != PFPU_PROGSIZE+MAX_LATENCY; i++)
349        exits[i] = &dummy_insn;
350}
351
352
353static unsigned issue(struct insn *insn, int cycle)
354{
355    pfpu_instruction code;
356    struct data_ref *ref;
357    int end;
358int nada = 0;
359    end = cycle+insn->latency;
360    exits[end] = insn;
361
362Dprintf("cycle %d: insn %lu L %d (A %d B %d)\n",
363  cycle, insn-insns, insn->latency, insn->vm_insn->opa, insn->vm_insn->opb);
364    switch (insn->arity) {
365    case 3:
366        /* fall through */
367    case 2:
368        code.i.opb = lookup_pfpu_reg(insn->vm_insn->opb);
369        put_reg(insn->opb.dep);
370        /* fall through */
371    case 1:
372        code.i.opa = lookup_pfpu_reg(insn->vm_insn->opa);
373        put_reg(insn->opa.dep);
374        break;
375    case 0:
376        break;
377    default:
378        abort();
379    }
380
381    code.i.dest = alloc_reg(insn);
382    code.i.opcode = fpvm_to_pfpu(insn->vm_insn->opcode);
383
384    foreach (ref, &insn->dependants) {
385        if (ref->insn->earliest <= end)
386            ref->insn->earliest = end+1;
387        if (!--ref->insn->unresolved) {
388Dprintf(" unlocked %lu -> %u\n", ref->insn-insns, ref->insn->earliest);
389nada = 0;
390            list_del(&ref->insn->more);
391            list_add_tail(ready+ref->insn->earliest,
392                &ref->insn->more);
393        }
394    }
395if (nada && catch) *(volatile int *) 0 = 1;
396
397    return code.w;
398}
399
400
401static int count(const struct list *list)
402{
403    int n = 0;
404    const struct list *p;
405
406    for (p = list->next; p != list; p = p->next)
407        n++;
408    return n;
409}
410
411
412static int schedule(struct fpvm_fragment *frag, unsigned int *code)
413{
414    int remaining;
415    int i, last, end;
416    struct insn *insn;
417
418    remaining = frag->ninstructions;
419    for (i = 0; remaining; i++) {
420        if (i == PFPU_PROGSIZE)
421            return -1;
422Dprintf("@%d --- remaining %d, waiting %d + ready %d = ", i, remaining,
423  count(&waiting), count(&ready[i]));
424        list_concat(&waiting, &ready[i]);
425Dprintf("%d\n", count(&waiting));
426        foreach (insn, &waiting)
427            if (!exits[i+insn->latency]) {
428                code[i] = issue(insn, i);
429                list_del(&insn->more);
430                remaining--;
431                break;
432            }
433        if (exits[i])
434            put_reg(exits[i]);
435    }
436
437    /*
438     * Add NOPs to cover unfinished instructions.
439     */
440    last = i;
441    end = i+MAX_LATENCY;
442    if (end > PFPU_PROGSIZE)
443        end = PFPU_PROGSIZE;
444    while (i != end) {
445        if (exits[i])
446            last = i+1; /* @@@ ? */
447        i++;
448    }
449    return last;
450}
451
452
453int gfpus_schedule(struct fpvm_fragment *frag, unsigned int *code,
454    unsigned int *reg)
455{
456    pfpu_instruction vecout;
457    int res;
458
459    init_registers(frag, reg);
460    memset(code, 0, PFPU_PROGSIZE*sizeof(*code));
461    init_scheduler(frag);
462    res = schedule(frag, code);
463    free(regs);
464    if (res < 0)
465        return res;
466    if (frag->vector_mode)
467        return res;
468    if (res == PFPU_PROGSIZE)
469        return -1;
470
471    vecout.w = 0;
472    vecout.i.opcode = FPVM_OPCODE_VECTOUT;
473    code[res] = vecout.w;
474
475    return res+1;
476}
m1/perf/try
1#!/bin/sh -x
2
3M1=`make path`
4
5make CFLAGS_EXTRA=-DCOMP_DEBUG SCHED=sched.o
6gdb --args ./main $M1/flickernoise/patches/*/*Godhead*.fnp

Archive Download the corresponding diff file

Branches:
master



interactive