Date:2012-02-14 01:04:14 (12 years 1 month ago)
Author:Werner Almesberger
Commit:2d4a88681da5bd5b395c0ca980554ecf9edd3a8e
Message:m1/perf: deleted. all this has long moved into flickernoise/src/compiler/ptest

Files: m1/perf/Makefile (1 diff)
m1/perf/TODO (1 diff)
m1/perf/all-runs (1 diff)
m1/perf/eval.pl (1 diff)
m1/perf/fakes/bsp (1 diff)
m1/perf/fakes/milkymist_pfpu.h (1 diff)
m1/perf/fakes/rtems.h (0 diffs)
m1/perf/favg (1 diff)
m1/perf/main.c (1 diff)
m1/perf/runs (1 diff)
m1/perf/sched.c (1 diff)
m1/perf/tabulate (1 diff)
m1/perf/try (1 diff)

Change Details

m1/perf/Makefile
1M1=/home/qi/m1
2COMPILER_DIR=$(M1)/flickernoise/src
3M1SWINC_DIR=$(M1)/milkymist/software/include
4M1SWLIB_DIR=$(M1)/milkymist/software/libfpvm/x86-linux
5
6#CFLAGS_EXTRA=-DCOMP_DEBUG
7CFLAGS_EXTRA=
8CFLAGS_PROF=-pg
9CFLAGS_COMMON=-Wall -g $(CFLAGS_PROF) $(CFLAGS_EXTRA) -DPRINTF_FLOAT
10CFLAGS_M=-fno-builtin #-nostdinc -fno-builtin
11CFLAGS=$(CFLAGS_COMMON) \
12       -I$(COMPILER_DIR) \
13       -Ifakes -I$(M1SWINC_DIR)
14LDFLAGS=$(CFLAGS_PROF)
15LDLIBS=-L$(M1SWLIB_DIR) -lfpvm
16
17
18COMPILER_O = $(COMPILER_DIR)/compiler.o
19LIBFPVM_A = $(M1SWLIB_DIR)/libfpvm.a
20OBJS = main.o $(COMPILER_O)
21
22.PHONY: all clean spotless path
23
24all: main
25
26main: $(OBJS) $(LIBFPVM_A)
27
28$(COMPILER_O):
29        $(MAKE) -C $(COMPILER_DIR) CC=gcc \
30            CFLAGS="$(CFLAGS_COMMON) $(CFLAGS_M) -I$(M1SWINC_DIR) \
31            -I$(shell pwd)/fakes" \
32            compiler.o
33
34$(LIBFPVM_A):
35        $(MAKE) -C $(M1SWLIB_DIR) CC=gcc \
36            CFLAGS='$(CFLAGS_COMMON) $(CFLAGS_M) -I$(M1SWINC_DIR)'
37
38clean:
39        $(MAKE) -C $(M1SWLIB_DIR) clean
40        $(MAKE) -C $(COMPILER_DIR) clean
41        rm -f $(M1SWLIB_DIR)/sched.o
42        rm -f $(OBJS)
43
44spotless: clean
45        rm -f main
46
47path:
48        @echo $(M1)
m1/perf/TODO
1Done:
2- dynamically allocate scheduler context
3- see if preferring critical path can improve code efficiency (YES !)
4
5Pending:
6- see if dynamically adjusting the critical path leads to further improvements
7- test IF
8- run result comparison against full set of patches
9- check if result comparison actually compares meaningful data
10- compare run time and code size for all patches
11- see what optimization changes (may interfere with profiling)
12- build into Flickernoise (some things may need adapting, e.g., abort())
13- review code, see if things can be simplified
14- see if valgrind can do something useful
m1/perf/all-runs
1#!/bin/sh -ex
2
3rm -rf data
4
5mkdir data
6for n in ref new opt; do
7
8        case $n in
9        ref) flags=;;
10        new) flags=-n;;
11        opt) flags="-n -o";;
12        esac
13
14    mkdir data/$n
15        for m in out expr prof; do
16                case $m in
17                out) more=-s;;
18                expr) more=-e;;
19                prof) more=-p;;
20                esac
21
22                mkdir data/$n/$m
23                ./runs $flags $more data/$n/$m || exit
24        done
25done
26
m1/perf/eval.pl
1#!/usr/bin/perl
2
3
4sub flush
5{
6    if ($nregs) {
7        print 0+keys %reg, "/", (sort { $b cmp $a } keys %reg)[0],
8             "\n";
9        return;
10    }
11    for (sort keys %use) {
12        print "$_ = ".$reg{$_}."\n";
13    }
14    print $res;
15}
16
17
18if ($ARGV[0] eq "-r") {
19    shift @ARGV;
20    $nregs = 1;
21}
22
23
24while (<>) {
25    if (/FPVM fragment:/) {
26        &flush if $i;
27        undef %tmp;
28        undef $i;
29    }
30    if (/PFPU fragment:/) {
31        undef $res;
32        undef %reg;
33        undef @val;
34        %use = %tmp;
35        $i = 0;
36    }
37
38    $tmp{"R$1"} = 1 if /^\d+:.*-> R0(\d+)/;
39    next unless defined $i;
40
41    next unless
42        /^(\d+):\s+(\S+)\s+(R\d+)?(,(R\d+))?.*?(->\s+(R\d+))?\s*$/;
43    # 1 2 3 4 5 6 7
44    ($c, $op, $a, $b, $d) = ($1, $2, $3, $5, $7);
45    undef $e;
46    $e = $1 if /E=(\d+)>/;
47    die "($i) $_" if $c != $i;
48
49    $reg{$a} = 1 if $nregs && defined $a;
50    $reg{$b} = 1 if $nregs && defined $b;
51
52    print STDERR "$i: concurrent read/write on $a (A)\n"
53        if defined $d && $a eq $d;
54    print STDERR "$i: concurrent read/write on $b (B)\n"
55        if defined $d && $b eq $d;
56
57    $a = $reg{$a} if defined $reg{$a};
58    $b = $reg{$b} if defined $reg{$b};
59
60    if ($op eq "IF<R2>") {
61        $expr = "(IF ".$reg{"R002"}." $a $b)";
62        $reg{"R002"} = 1 if $nregs;
63    } elsif ($op eq "VECTOUT") {
64        $res = "A = $a\nB = $b\n";
65    } elsif (defined $b) {
66        $expr = "($op $a $b)";
67    } elsif (defined $a) {
68        $expr = "($op $a)";
69    } else {
70        $expr = "($op)";
71    }
72
73    $val[$e] = $expr if defined $e;
74    $reg{$d} = $val[$i] if defined $d;
75    $i++;
76}
77&flush;
m1/perf/fakes/bsp
1.
m1/perf/fakes/milkymist_pfpu.h
1#include "hw/pfpu.h"
m1/perf/fakes/rtems.h
m1/perf/favg
1#!/usr/bin/perl
2$sel = $ARGV[0];
3shift @ARGV;
4for (@ARGV) {
5    $s = 0;
6    $n = 0;
7    open(FILE, $_) || die "$_: $!";
8    while (<FILE>) {
9        $c = 0 if /<TR>/;
10        if (/<TD[^>]*>([0-9.]+)/ ) {
11            $c++;
12            next unless $c == $sel;
13            $s += $1;
14            $n++;
15        }
16    }
17    close FILE;
18    print "$s/$n = ", $s/$n, "\n";
19}
m1/perf/main.c
1#include <stdlib.h>
2#include <stdio.h>
3
4#include "compiler.h"
5
6
7#define BUF_SIZE 1000000
8
9
10static void report(const char *s)
11{
12    fprintf(stderr, "%s\n", s);
13}
14
15
16static void usage(const char *name)
17{
18    fprintf(stderr, "usage: %s patch-file [loops]\n", name);
19    exit(1);
20}
21
22
23int main(int argc, char **argv)
24{
25    char buf[BUF_SIZE];
26    const char *name;
27    FILE *file;
28    size_t got;
29    int loops = 1;
30    int i;
31
32    switch (argc) {
33    case 2:
34        break;
35    case 3:
36        loops = atoi(argv[2]);
37        break;
38    default:
39        usage(*argv);
40    }
41
42    name = argv[1];
43    file = fopen(name, "r");
44    if (!file) {
45        perror(name);
46        exit(1);
47    }
48    got = fread(buf, 1, sizeof(buf)-1, file);
49    if (got < 0) {
50        perror(name);
51        exit(1);
52    }
53    buf[got] = 0;
54    fclose(file);
55
56    for (i = 0; i != loops; i++)
57        if (!patch_compile(buf, report))
58            return 1;
59
60    return 0;
61}
m1/perf/runs
1#!/bin/sh -x
2
3
4sanitize()
5{
6    basename "$1" .fnp | tr ' ' _ | tr -cd 'A-Za-z0-9_-' | tr -s _ _
7}
8
9
10usage()
11{
12cat <<EOF 1>&2
13usage: $0 [-e|-p] [-n [-o] [-s]] dir
14
15  -e generate the calculated expression (default: just dump debug output)
16  -p profile 10000 runs (default: just dump debug output)
17  -n use "new" scheduler
18  -o enable LCPF optimizer
19  -s enable register pressure statistics
20EOF
21
22}
23
24
25M1=`make path`
26
27extra=
28sched=
29profile=false
30evaluate=false
31
32while [ "${1#-}" != "$1" ]; do
33    case "$1" in
34    -e) evaluate=true;;
35    -p) profile=true;;
36    -n) sched=SCHED=sched.o;;
37    -o) extra="$extra -DLCPF";;
38    -s) extra="$extra -DREG_STATS";;
39    *) usage;;
40    esac
41    shift
42done
43
44[ -z "$1" -o "$2" ] && usage
45if [ ! -d "$1" ]; then
46    echo "$1: directory not found" 1>&2
47    exit 1
48fi
49
50$profile || extra="$extra -DCOMP_DEBUG"
51
52make spotless
53make CFLAGS_EXTRA="$extra" $sched all
54
55for n in $M1/flickernoise/patches/*/*.fnp; do
56    s=`sanitize "$n"`
57    if $profile; then
58        ./main "$n" 10000 || exit
59        gprof main >"$1"/$s
60    elif $evaluate; then
61        ./main "$n" | ./eval.pl >"$1"/$s || exit
62    else
63        ./main "$n" >"$1"/$s || exit
64    fi
65done
m1/perf/sched.c
1/*
2 * lnfpus.c - O(n) ... O(n^2) scheduler
3 *
4 * Copyright (C) 2011 Werner Almesberger
5 *
6 * Based on gfpus.c
7 * Copyright (C) 2007, 2008, 2009, 2010 Sebastien Bourdeauducq
8 *
9 * This program is free software: you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation, version 3 of the License.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program. If not, see <http://www.gnu.org/licenses/>.
20 */
21
22#include <stdlib.h>
23#include <stdio.h>
24#include <string.h>
25#include <assert.h>
26
27#include <fpvm/is.h>
28#include <fpvm/fpvm.h>
29#include <fpvm/pfpu.h>
30#include <fpvm/gfpus.h>
31
32#include <hw/pfpu.h>
33
34
35//#define REG_STATS
36#define LCPF /* longest critical path first */
37
38//#define DEBUG
39#ifdef DEBUG
40#define Dprintf printf
41#else
42#define Dprintf(...)
43#endif
44
45
46#define MAX_LATENCY 8 /* maximum latency; okay to make this bigger */
47
48#define CODE(n) (((pfpu_instruction *) (code+(n)))->i)
49
50
51struct list {
52    struct list *next, *prev;
53};
54
55
56struct insn {
57    struct list more; /* more insns on same schedule */
58    struct fpvm_instruction *vm_insn;
59    struct data_ref {
60        struct list more; /* more refs sharing the data */
61        struct insn *insn; /* insn this is part of */
62        struct insn *dep; /* insn we depend on */
63    } opa, opb, dest, cond;
64    int arity;
65    int latency;
66    int rmw; /* non-zero if instruction is read-modify-write */
67    int unresolved; /* number of data refs we need before we can sched */
68    int earliest; /* earliest cycle dependencies seen so far are met */
69    struct list dependants; /* list of dependencies (constant) */
70    int num_dependants; /* number of dependencies */
71    struct insn *next_setter; /* next setter of the same register */
72#ifdef LCPF
73    int distance; /* minimum cycles on this path until the end */
74#endif
75};
76
77
78struct vm_reg {
79    struct insn *setter; /* instruction setting it; NULL if none */
80    struct insn *first_setter; /* first setter */
81    int pfpu_reg; /* underlying PFPU register */
82    int refs; /* usage count */
83};
84
85
86struct pfpu_reg {
87    struct list more; /* list of unallocated PFPU registers */
88    int vm_reg; /* corresponding FPVM register if allocated */
89    int used; /* used somewhere in the program */
90};
91
92
93static struct sched_ctx {
94    struct fpvm_fragment *frag;
95    struct insn insns[FPVM_MAXCODELEN];
96    struct vm_reg *regs; /* dynamically allocated */
97    struct pfpu_reg pfpu_regs[PFPU_REG_COUNT];
98    struct list unallocated; /* unallocated registers */
99    struct list unscheduled; /* unscheduled insns */
100    struct list waiting; /* insns waiting to be scheduled */
101    struct list ready[PFPU_PROGSIZE]; /* insns ready at nth cycle */
102    int cycle; /* the current cycle */
103#ifdef REG_STATS
104    int max_regs, curr_regs; /* allocation statistics */
105#endif
106} *sc;
107
108
109/* ----- Register initialization ------------------------------------------- */
110
111
112/*
113 * Straight from gfpus.c, only with some whitespace changes.
114 */
115
116static void get_registers(struct fpvm_fragment *fragment,
117    unsigned int *registers)
118{
119    int i;
120    union {
121        float f;
122        unsigned int n;
123    } fconv;
124
125    for(i = 0; i < fragment->nbindings; i++)
126        if(fragment->bindings[i].isvar)
127            registers[i] = 0;
128        else {
129            fconv.f = fragment->bindings[i].b.c;
130            registers[i] = fconv.n;
131        }
132    for(; i < PFPU_REG_COUNT; i++)
133        registers[i] = 0;
134}
135
136
137/* ----- Doubly-linked list ------------------------------------------------ */
138
139
140/*
141 * Use the naming conventions of include/linux/list.h
142 */
143
144
145#ifdef DEBUG
146
147static void list_poison(struct list *list)
148{
149    list->next = list->prev = NULL;
150}
151
152#else /* DEBUG */
153
154#define list_poison(list)
155
156#endif /* !DEBUG */
157
158
159static void list_init(struct list *list)
160{
161    list->next = list->prev = list;
162}
163
164
165static void list_del(struct list *item)
166{
167    assert(item->next != item);
168    item->prev->next = item->next;
169    item->next->prev = item->prev;
170    list_poison(item);
171}
172
173
174static void *list_pop(struct list *list)
175{
176    struct list *first;
177
178    first = list->next;
179    if(first == list)
180        return NULL;
181    list_del(first);
182    return first;
183}
184
185
186static void list_add_tail(struct list *list, struct list *item)
187{
188    item->next = list;
189    item->prev = list->prev;
190    list->prev->next = item;
191    list->prev = item;
192}
193
194
195static void list_add(struct list *list, struct list *item)
196{
197    item->next = list->next;
198    item->prev = list;
199    list->next->prev = item;
200    list->next = item;
201}
202
203
204static void list_concat(struct list *a, struct list *b)
205{
206    if(b->next != b) {
207        a->prev->next = b->next;
208        b->next->prev = a->prev;
209        b->prev->next = a;
210        a->prev = b->prev;
211    }
212    list_poison(b);
213}
214
215
216/*
217 * Do not delete elements from the list while traversing it with foreach !
218 */
219
220#define foreach(var, head) \
221    for(var = (void *) ((head))->next; \
222        (var) != (void *) (head); \
223        var = (void *) ((struct list *) (var))->next)
224
225
226/* ----- Register management ----------------------------------------------- */
227
228
229static int vm_reg2idx(int reg)
230{
231    return reg >= 0 ? reg : sc->frag->nbindings-reg;
232}
233
234
235static int alloc_reg(struct insn *setter)
236{
237    struct pfpu_reg *reg;
238    int vm_reg, pfpu_reg, vm_idx;
239
240    vm_reg = setter->vm_insn->dest;
241    if(vm_reg >= 0) {
242        pfpu_reg = vm_reg;
243        sc->pfpu_regs[vm_reg].vm_reg = vm_reg; /* @@@ global init */
244    } else {
245        reg = list_pop(&sc->unallocated);
246        if(!reg)
247            return -1;
248
249        #ifdef REG_STATS
250        sc->curr_regs++;
251        if(sc->curr_regs > sc->max_regs)
252            sc->max_regs = sc->curr_regs;
253        #endif
254
255        reg->vm_reg = vm_reg;
256        pfpu_reg = reg-sc->pfpu_regs;
257    }
258
259    Dprintf(" alloc reg %d -> %d\n", vm_reg, pfpu_reg);
260
261    vm_idx = vm_reg2idx(vm_reg);
262    sc->regs[vm_idx].setter = setter;
263    sc->regs[vm_idx].pfpu_reg = pfpu_reg;
264    sc->regs[vm_idx].refs = setter->num_dependants+1;
265
266    return pfpu_reg;
267}
268
269
270static void put_reg(int vm_reg)
271{
272    int vm_idx;
273    struct vm_reg *reg;
274
275    if(vm_reg >= 0)
276        return;
277
278    vm_idx = vm_reg2idx(vm_reg);
279    reg = sc->regs+vm_idx;
280
281    assert(reg->refs);
282    if(--reg->refs)
283        return;
284
285    Dprintf(" free reg %d\n", reg->pfpu_reg);
286
287#ifdef REG_STATS
288    assert(sc->curr_regs);
289    sc->curr_regs--;
290#endif
291
292    /*
293     * Prepend so that register numbers stay small and bugs reveal
294     * themselves more rapidly.
295     */
296    list_add(&sc->unallocated, &sc->pfpu_regs[reg->pfpu_reg].more);
297
298    /* clear it for style only */
299    reg->setter = NULL;
300    reg->pfpu_reg = 0;
301}
302
303
304static int lookup_pfpu_reg(int vm_reg)
305{
306    return vm_reg >= 0 ? vm_reg : sc->regs[vm_reg2idx(vm_reg)].pfpu_reg;
307}
308
309
310static void mark(int vm_reg)
311{
312    if(vm_reg > 0)
313        sc->pfpu_regs[vm_reg].used = 1;
314}
315
316
317static int init_registers(struct fpvm_fragment *frag,
318    unsigned int *registers)
319{
320    int i;
321
322    get_registers(frag, registers);
323
324    for(i = 0; i != frag->ninstructions; i++) {
325        mark(frag->code[i].opa);
326        mark(frag->code[i].opb);
327        mark(frag->code[i].dest);
328    }
329
330    list_init(&sc->unallocated);
331    for(i = PFPU_SPREG_COUNT; i != PFPU_REG_COUNT; i++)
332        if(!sc->pfpu_regs[i].used)
333            list_add_tail(&sc->unallocated, &sc->pfpu_regs[i].more);
334
335    return 0;
336}
337
338
339/* ----- Instruction scheduler --------------------------------------------- */
340
341
342static struct vm_reg *add_data_ref(struct insn *insn, struct data_ref *ref,
343    int reg_num)
344{
345    struct vm_reg *reg;
346
347    reg = sc->regs+vm_reg2idx(reg_num);
348    ref->insn = insn;
349    ref->dep = reg->setter;
350    if(insn->vm_insn->dest == reg_num)
351        insn->rmw = 1;
352    if(!ref->dep)
353        reg->refs++;
354    else {
355        list_add_tail(&ref->dep->dependants, &ref->more);
356        ref->dep->num_dependants++;
357        insn->unresolved++;
358
359        Dprintf("insn %lu: reg %d setter %lu unresolved %d\n",
360            insn-sc->insns, reg_num, reg->setter-sc->insns,
361            insn->unresolved);
362    }
363    return reg;
364}
365
366
367static void init_scheduler(struct fpvm_fragment *frag)
368{
369    int i;
370    struct insn *insn;
371    struct vm_reg *reg;
372    struct data_ref *ref;
373
374    list_init(&sc->unscheduled);
375    list_init(&sc->waiting);
376    for(i = 0; i != PFPU_PROGSIZE; i++)
377        list_init(sc->ready+i);
378
379    for(i = 0; i != frag->ninstructions; i++) {
380        insn = sc->insns+i;
381        insn->vm_insn = frag->code+i;
382        insn->arity = fpvm_get_arity(frag->code[i].opcode);
383        insn->latency = pfpu_get_latency(frag->code[i].opcode);
384        list_init(&insn->dependants);
385        switch (insn->arity) {
386            case 3:
387                add_data_ref(insn, &insn->cond, FPVM_REG_IFB);
388                /* fall through */
389            case 2:
390                add_data_ref(insn, &insn->opb, frag->code[i].opb);
391                /* fall through */
392            case 1:
393                add_data_ref(insn, &insn->opa, frag->code[i].opa);
394                /* fall through */
395            case 0:
396                reg = sc->regs+vm_reg2idx(frag->code[i].dest);
397                if(reg->setter) {
398                    reg->setter->next_setter = insn;
399                    foreach(ref, &reg->setter->dependants)
400                        if(ref->insn != insn)
401                            insn->unresolved++;
402                    if(!insn->rmw)
403                        insn->unresolved++;
404                } else {
405                    if(!insn->rmw)
406                        insn->unresolved += reg->refs;
407                    reg->first_setter = insn;
408                }
409                reg->setter = insn;
410                break;
411            default:
412                abort();
413        }
414        if(insn->unresolved)
415            list_add_tail(&sc->unscheduled, &insn->more);
416        else
417            list_add_tail(&sc->ready[0], &insn->more);
418    }
419
420#ifdef LCPF
421    struct data_ref *dep;
422
423    for(i = frag->ninstructions-1; i >= 0; i--) {
424        insn = sc->insns+i;
425#if 0
426        /*
427         * Theoretically, we should consider the distance through
428         * write-write dependencies too. In practice, this would
429         * mainly matter if we had operations whose result is ignored.
430         * This is a degenerate case that's probably not worth
431         * spending much effort on.
432         */
433        if(insn->next_setter) {
434            insn->distance =
435                insn->next_setter->distance-insn->distance+1;
436            if(insn->distance < 1)
437                insn->distance = 1;
438        }
439#endif
440        foreach(dep, &insn->dependants)
441            if(dep->insn->distance > insn->distance)
442                insn->distance = dep->insn->distance;
443        /*
444         * While it would be more correct to add one for the cycle
445         * following the write cycle, this also has the effect of
446         * producing slighly worse results on the example set of
447         * patches. Let's thus keep this "bug" for now.
448         */
449// insn->distance += insn->latency+1;
450        insn->distance += insn->latency;
451    }
452#endif
453}
454
455
456static void unblock(struct insn *insn)
457{
458    int slot;
459
460    assert(insn->unresolved);
461    if(--insn->unresolved)
462        return;
463    Dprintf(" unblocked %lu -> %u\n", insn-sc->insns, insn->earliest);
464    list_del(&insn->more);
465    slot = insn->earliest;
466    if(slot <= sc->cycle)
467        slot = sc->cycle+1;
468    list_add_tail(sc->ready+slot, &insn->more);
469}
470
471
472static void put_reg_by_ref(struct data_ref *ref, int vm_reg)
473{
474    struct insn *setter = ref->dep;
475    struct vm_reg *reg;
476
477    if(setter) {
478        put_reg(setter->vm_insn->dest);
479        if(setter->next_setter && setter->next_setter != ref->insn)
480            unblock(setter->next_setter);
481    } else {
482        reg = sc->regs+vm_reg2idx(vm_reg);
483        if(reg->first_setter && !reg->first_setter->rmw)
484            unblock(reg->first_setter);
485    }
486}
487
488
489static void unblock_after(struct insn *insn, int cycle)
490{
491    if(insn->earliest <= cycle)
492        insn->earliest = cycle+1;
493    unblock(insn);
494}
495
496
497static int issue(struct insn *insn, unsigned *code)
498{
499    struct data_ref *ref;
500    int end, reg;
501
502    end = sc->cycle+insn->latency;
503
504    Dprintf("cycle %d: insn %lu L %d (A %d B %d)\n", sc->cycle,
505        insn-sc->insns, insn->latency, insn->vm_insn->opa,
506        insn->vm_insn->opb);
507
508    switch (insn->arity) {
509        case 3:
510            put_reg_by_ref(&insn->cond, FPVM_REG_IFB);
511            /* fall through */
512        case 2:
513            CODE(sc->cycle).opb = lookup_pfpu_reg(insn->vm_insn->opb);
514            put_reg_by_ref(&insn->opb, insn->vm_insn->opb);
515            /* fall through */
516        case 1:
517            CODE(sc->cycle).opa = lookup_pfpu_reg(insn->vm_insn->opa);
518            put_reg_by_ref(&insn->opa, insn->vm_insn->opa);
519            break;
520        case 0:
521            break;
522        default:
523            abort();
524    }
525
526    reg = alloc_reg(insn);
527    if(reg < 0)
528        return -1;
529    CODE(end).dest = reg;
530    CODE(sc->cycle).opcode = fpvm_to_pfpu(insn->vm_insn->opcode);
531
532    foreach(ref, &insn->dependants)
533        unblock_after(ref->insn, end);
534    if(insn->next_setter && !insn->next_setter->rmw)
535        unblock_after(insn->next_setter,
536            end-insn->next_setter->latency);
537
538    return 0;
539}
540
541
542#ifdef DEBUG
543static int count(const struct list *list)
544{
545    int n = 0;
546    const struct list *p;
547
548    for(p = list->next; p != list; p = p->next)
549        n++;
550    return n;
551}
552#endif
553
554
555static int schedule(unsigned int *code)
556{
557    int remaining;
558    int i, last, end;
559    struct insn *insn;
560    struct insn *best;
561
562    remaining = sc->frag->ninstructions;
563    for(i = 0; remaining; i++) {
564        if(i == PFPU_PROGSIZE)
565            return -1;
566
567        sc->cycle = i;
568        Dprintf("@%d --- remaining %d, waiting %d + ready %d\n",
569            i, remaining, count(&sc->waiting), count(&sc->ready[i]));
570
571        list_concat(&sc->waiting, sc->ready+i);
572        best = NULL;
573        foreach(insn, &sc->waiting) {
574            end = i+insn->latency;
575            if(end >= PFPU_PROGSIZE)
576                return -1;
577            if(!CODE(end).dest) {
578#ifdef LCPF
579                if(!best || best->distance < insn->distance)
580                    best = insn;
581#else
582                best = insn;
583                break;
584#endif
585            }
586        }
587        if(best) {
588            if(issue(best, code) < 0)
589                return -1;
590            list_del(&best->more);
591            remaining--;
592        }
593        if(CODE(i).dest)
594            put_reg(sc->pfpu_regs[CODE(i).dest].vm_reg);
595    }
596
597    /*
598     * Add NOPs to cover unfinished instructions.
599     */
600    last = i;
601    end = i+MAX_LATENCY;
602    if(end > PFPU_PROGSIZE)
603        end = PFPU_PROGSIZE;
604    while(i != end) {
605        if(CODE(i).dest)
606            last = i+1;
607        i++;
608    }
609    return last;
610}
611
612
613int gfpus_schedule(struct fpvm_fragment *frag, unsigned int *code,
614    unsigned int *reg)
615{
616    /*
617     * allocate context and registers on stack because standalone FN has no
618     * memory allocator
619     */
620    struct sched_ctx sc_alloc;
621    struct vm_reg regs[frag->nbindings-frag->next_sur];
622    pfpu_instruction vecout;
623    int res;
624
625printf("greetings %lu %lu\n", sizeof(*sc), sizeof(regs));
626    sc = &sc_alloc;
627    memset(sc, 0, sizeof(*sc));
628    sc->frag = frag;
629    sc->regs = regs;
630    memset(regs, 0, sizeof(regs));
631
632    if(init_registers(frag, reg) < 0)
633        return -1;
634    init_scheduler(frag);
635
636    memset(code, 0, PFPU_PROGSIZE*sizeof(*code));
637    res = schedule(code);
638
639#ifdef REG_STATS
640    printf("regs: %d/%d\n", sc->curr_regs, sc->max_regs);
641#endif
642
643    if(res < 0)
644        return res;
645    if(frag->vector_mode)
646        return res;
647    if(res == PFPU_PROGSIZE)
648        return -1;
649
650    vecout.w = 0;
651    vecout.i.opcode = FPVM_OPCODE_VECTOUT;
652    code[res] = vecout.w;
653
654    return res+1;
655}
m1/perf/tabulate
1#!/bin/sh
2
3M1=`make path`
4
5
6time()
7{
8    sed '/^.*of \([0-9.]*\) seconds.*/s//\1/p;d' <data/$1/prof/$2
9}
10
11
12size()
13{
14    sed '/^0*\([1-9][0-9]*\): VECTOUT.*/s//\1/p;d' <data/$1/out/$2 |
15        tail -n 1
16}
17
18
19eff()
20{
21    sed '/^Efficiency: \([0-9.]*\)%/s//\1/p;d' <data/$1/out/$2 | tail -n 1
22}
23
24
25regs()
26{
27    ./eval.pl -r data/$1/out/$2 | sed '1d;s|/.*||'
28# sed '/^regs: 0\//s///p;d' <data/$1/out/$2 | tail -n 1
29}
30
31
32sum()
33{
34    md5sum <data/$1/expr/$2 | sed 's/ .*//'
35}
36
37
38eq()
39{
40    if [ "$1" = "$2" ]; then
41        echo Y
42    else
43        echo N
44    fi
45}
46
47
48trim()
49{
50    sed 's/[^0-9].*//'
51}
52
53
54cfield()
55{
56    echo "<TD bgcolor=\"$1\">$2"
57}
58
59
60rank()
61{
62    op=$1
63    txt=`$2 $3 $6`
64    a=`echo "$txt" | trim`
65    b=`$2 $4 $6 | trim`
66    c=`$2 $5 $6 | trim`
67
68    if [ $a $op $b -a $a $op $c ]; then
69        col=$green
70    elif [ $b $op $a -a $c $op $a ]; then
71        col=$red
72    else
73        col=white
74    fi
75    echo "<TD bgcolor=\"$col\" align="right">$txt$7"
76}
77
78
79red="#ffb0b0"
80green="#a0ffa0"
81
82html=false
83if [ $1 = -h ]; then
84    html=true
85    shift
86fi
87
88if $html; then
89    cat <<EOF
90<HTML>
91<TITLE>Scheduler comparison</TITLE>
92<BODY>
93<TABLE bgcolor="#f0f0f0">
94  <TR>
95    <TH colspan="5" align="left">Original</TH>
96    <TH colspan="5" align="left">New (no optimizer)</TH>
97    <TH colspan="5" align="left">New (LCPF)</TH>
98    <TH align="left">Equiv</TH>
99    <TH align="left">Name</TH>
100  <TR>
101     <TH>Time</TH><TH>Size</TH><TH>Eff</TH><TH>Regs</TH>
102     <TH>&nbsp;</TH>
103     <TH>Time</TH><TH>Size</TH><TH>Eff</TH><TH>Regs</TH>
104     <TH>&nbsp;</TH>
105     <TH>Time</TH><TH>Size</TH><TH>Eff</TH><TH>Regs</TH>
106     <TH>&nbsp;</TH>
107EOF
108    for n in `ls -1 data/ref/out`; do
109        ref=`sum ref $n`
110        new=`sum new $n`
111        opt=`sum opt $n`
112        same=`eq $ref $new`/`eq $ref $opt`/`eq $new $opt`
113
114        echo "<TR>"
115        rank -lt time ref new opt $n
116        rank -lt size ref new opt $n
117        rank -gt eff ref new opt $n %
118        rank -lt regs ref new opt $n
119
120        echo "<TD>"
121        rank -lt time new ref opt $n
122        rank -lt size new ref opt $n
123        rank -gt eff new ref opt $n %
124        rank -lt regs new ref opt $n
125
126        echo "<TD>"
127        rank -lt time opt ref new $n
128        rank -lt size opt ref new $n
129        rank -gt eff opt ref new $n %
130        rank -lt regs opt ref new $n
131
132        echo "<TD>"
133        if [ $same = Y/Y/Y ]; then
134            cfield $green $same
135        else
136            cfield $red $same
137        fi
138        cfield white "$n"
139    done
140    cat <<EOF
141</TABLE>
142</BODY>
143</HTML>
144EOF
145    exit
146fi
147
148
149echo "Original New sched (no opt) New sched (LCPF) Equiv Name"
150echo "Time Size Eff Regs Time Size Eff Regs Time Size Eff Regs"
151
152tref=0
153tnew=0
154topt=0
155
156for n in `ls -1 data/ref/out`; do
157    ref=`sum ref $n`
158    new=`sum new $n`
159    opt=`sum opt $n`
160    printf "%5.1f %4d%3d%% %4d %5.1f %4d%3d%% %4d %5.1f %4d%3d%% %4d %s " \
161      `time ref $n` `size ref $n` `eff ref $n` `regs ref $n` \
162      `time new $n` `size new $n` `eff new $n` `regs new $n` \
163      `time opt $n` `size opt $n` `eff opt $n` `regs opt $n` \
164      `eq $ref $new`/`eq $ref $opt`/`eq $new $opt`
165    echo $n
166    tref="$tref `time ref $n` +"
167    tnew="$tnew `time new $n` +"
168    topt="$topt `time opt $n` +"
169done
170
171printf "Original time: %9.1f s\n" `dc -e "$tref p"`
172printf "New (unopt) time: %6.1f s\n" `dc -e "$tnew p"`
173printf "New (opt) time: %8.1f s\n" `dc -e "$topt p"`
m1/perf/try
1#!/bin/sh -x
2
3M1=`make path`
4
5make CFLAGS_EXTRA=-DCOMP_DEBUG CFLAGS_PROF= SCHED=sched.o
6gdb --args ./main $M1/flickernoise/patches/*/*Godhead*.fnp

Archive Download the corresponding diff file

Branches:
master



interactive