Werner's Miscellanea
Sign in or create your account | Project List | Help
Werner's Miscellanea Commit Details
Date: | 2012-02-14 01:04:14 (12 years 1 month ago) |
---|---|
Author: | Werner Almesberger |
Commit: | 2d4a88681da5bd5b395c0ca980554ecf9edd3a8e |
Message: | m1/perf: deleted. all this has long moved into
flickernoise/src/compiler/ptest |
Files: |
m1/perf/Makefile (1 diff) m1/perf/TODO (1 diff) m1/perf/all-runs (1 diff) m1/perf/eval.pl (1 diff) m1/perf/fakes/bsp (1 diff) m1/perf/fakes/milkymist_pfpu.h (1 diff) m1/perf/fakes/rtems.h (0 diffs) m1/perf/favg (1 diff) m1/perf/main.c (1 diff) m1/perf/runs (1 diff) m1/perf/sched.c (1 diff) m1/perf/tabulate (1 diff) m1/perf/try (1 diff) |
Change Details
m1/perf/Makefile | ||
---|---|---|
1 | M1=/home/qi/m1 | |
2 | COMPILER_DIR=$(M1)/flickernoise/src | |
3 | M1SWINC_DIR=$(M1)/milkymist/software/include | |
4 | M1SWLIB_DIR=$(M1)/milkymist/software/libfpvm/x86-linux | |
5 | ||
6 | #CFLAGS_EXTRA=-DCOMP_DEBUG | |
7 | CFLAGS_EXTRA= | |
8 | CFLAGS_PROF=-pg | |
9 | CFLAGS_COMMON=-Wall -g $(CFLAGS_PROF) $(CFLAGS_EXTRA) -DPRINTF_FLOAT | |
10 | CFLAGS_M=-fno-builtin #-nostdinc -fno-builtin | |
11 | CFLAGS=$(CFLAGS_COMMON) \ | |
12 | -I$(COMPILER_DIR) \ | |
13 | -Ifakes -I$(M1SWINC_DIR) | |
14 | LDFLAGS=$(CFLAGS_PROF) | |
15 | LDLIBS=-L$(M1SWLIB_DIR) -lfpvm | |
16 | ||
17 | ||
18 | COMPILER_O = $(COMPILER_DIR)/compiler.o | |
19 | LIBFPVM_A = $(M1SWLIB_DIR)/libfpvm.a | |
20 | OBJS = main.o $(COMPILER_O) | |
21 | ||
22 | .PHONY: all clean spotless path | |
23 | ||
24 | all: main | |
25 | ||
26 | main: $(OBJS) $(LIBFPVM_A) | |
27 | ||
28 | $(COMPILER_O): | |
29 | $(MAKE) -C $(COMPILER_DIR) CC=gcc \ | |
30 | CFLAGS="$(CFLAGS_COMMON) $(CFLAGS_M) -I$(M1SWINC_DIR) \ | |
31 | -I$(shell pwd)/fakes" \ | |
32 | compiler.o | |
33 | ||
34 | $(LIBFPVM_A): | |
35 | $(MAKE) -C $(M1SWLIB_DIR) CC=gcc \ | |
36 | CFLAGS='$(CFLAGS_COMMON) $(CFLAGS_M) -I$(M1SWINC_DIR)' | |
37 | ||
38 | clean: | |
39 | $(MAKE) -C $(M1SWLIB_DIR) clean | |
40 | $(MAKE) -C $(COMPILER_DIR) clean | |
41 | rm -f $(M1SWLIB_DIR)/sched.o | |
42 | rm -f $(OBJS) | |
43 | ||
44 | spotless: clean | |
45 | rm -f main | |
46 | ||
47 | path: | |
48 | @echo $(M1) |
m1/perf/TODO | ||
---|---|---|
1 | Done: | |
2 | - dynamically allocate scheduler context | |
3 | - see if preferring critical path can improve code efficiency (YES !) | |
4 | ||
5 | Pending: | |
6 | - see if dynamically adjusting the critical path leads to further improvements | |
7 | - test IF | |
8 | - run result comparison against full set of patches | |
9 | - check if result comparison actually compares meaningful data | |
10 | - compare run time and code size for all patches | |
11 | - see what optimization changes (may interfere with profiling) | |
12 | - build into Flickernoise (some things may need adapting, e.g., abort()) | |
13 | - review code, see if things can be simplified | |
14 | - see if valgrind can do something useful |
m1/perf/all-runs | ||
---|---|---|
1 | #!/bin/sh -ex | |
2 | ||
3 | rm -rf data | |
4 | ||
5 | mkdir data | |
6 | for n in ref new opt; do | |
7 | ||
8 | case $n in | |
9 | ref) flags=;; | |
10 | new) flags=-n;; | |
11 | opt) flags="-n -o";; | |
12 | esac | |
13 | ||
14 | mkdir data/$n | |
15 | for m in out expr prof; do | |
16 | case $m in | |
17 | out) more=-s;; | |
18 | expr) more=-e;; | |
19 | prof) more=-p;; | |
20 | esac | |
21 | ||
22 | mkdir data/$n/$m | |
23 | ./runs $flags $more data/$n/$m || exit | |
24 | done | |
25 | done | |
26 |
m1/perf/eval.pl | ||
---|---|---|
1 | #!/usr/bin/perl | |
2 | ||
3 | ||
4 | sub flush | |
5 | { | |
6 | if ($nregs) { | |
7 | print 0+keys %reg, "/", (sort { $b cmp $a } keys %reg)[0], | |
8 | "\n"; | |
9 | return; | |
10 | } | |
11 | for (sort keys %use) { | |
12 | print "$_ = ".$reg{$_}."\n"; | |
13 | } | |
14 | print $res; | |
15 | } | |
16 | ||
17 | ||
18 | if ($ARGV[0] eq "-r") { | |
19 | shift @ARGV; | |
20 | $nregs = 1; | |
21 | } | |
22 | ||
23 | ||
24 | while (<>) { | |
25 | if (/FPVM fragment:/) { | |
26 | &flush if $i; | |
27 | undef %tmp; | |
28 | undef $i; | |
29 | } | |
30 | if (/PFPU fragment:/) { | |
31 | undef $res; | |
32 | undef %reg; | |
33 | undef @val; | |
34 | %use = %tmp; | |
35 | $i = 0; | |
36 | } | |
37 | ||
38 | $tmp{"R$1"} = 1 if /^\d+:.*-> R0(\d+)/; | |
39 | next unless defined $i; | |
40 | ||
41 | next unless | |
42 | /^(\d+):\s+(\S+)\s+(R\d+)?(,(R\d+))?.*?(->\s+(R\d+))?\s*$/; | |
43 | # 1 2 3 4 5 6 7 | |
44 | ($c, $op, $a, $b, $d) = ($1, $2, $3, $5, $7); | |
45 | undef $e; | |
46 | $e = $1 if /E=(\d+)>/; | |
47 | die "($i) $_" if $c != $i; | |
48 | ||
49 | $reg{$a} = 1 if $nregs && defined $a; | |
50 | $reg{$b} = 1 if $nregs && defined $b; | |
51 | ||
52 | print STDERR "$i: concurrent read/write on $a (A)\n" | |
53 | if defined $d && $a eq $d; | |
54 | print STDERR "$i: concurrent read/write on $b (B)\n" | |
55 | if defined $d && $b eq $d; | |
56 | ||
57 | $a = $reg{$a} if defined $reg{$a}; | |
58 | $b = $reg{$b} if defined $reg{$b}; | |
59 | ||
60 | if ($op eq "IF<R2>") { | |
61 | $expr = "(IF ".$reg{"R002"}." $a $b)"; | |
62 | $reg{"R002"} = 1 if $nregs; | |
63 | } elsif ($op eq "VECTOUT") { | |
64 | $res = "A = $a\nB = $b\n"; | |
65 | } elsif (defined $b) { | |
66 | $expr = "($op $a $b)"; | |
67 | } elsif (defined $a) { | |
68 | $expr = "($op $a)"; | |
69 | } else { | |
70 | $expr = "($op)"; | |
71 | } | |
72 | ||
73 | $val[$e] = $expr if defined $e; | |
74 | $reg{$d} = $val[$i] if defined $d; | |
75 | $i++; | |
76 | } | |
77 | &flush; |
m1/perf/fakes/bsp | ||
---|---|---|
1 | . |
m1/perf/fakes/milkymist_pfpu.h | ||
---|---|---|
1 | #include "hw/pfpu.h" |
m1/perf/fakes/rtems.h |
---|
m1/perf/favg | ||
---|---|---|
1 | #!/usr/bin/perl | |
2 | $sel = $ARGV[0]; | |
3 | shift @ARGV; | |
4 | for (@ARGV) { | |
5 | $s = 0; | |
6 | $n = 0; | |
7 | open(FILE, $_) || die "$_: $!"; | |
8 | while (<FILE>) { | |
9 | $c = 0 if /<TR>/; | |
10 | if (/<TD[^>]*>([0-9.]+)/ ) { | |
11 | $c++; | |
12 | next unless $c == $sel; | |
13 | $s += $1; | |
14 | $n++; | |
15 | } | |
16 | } | |
17 | close FILE; | |
18 | print "$s/$n = ", $s/$n, "\n"; | |
19 | } |
m1/perf/main.c | ||
---|---|---|
1 | #include <stdlib.h> | |
2 | #include <stdio.h> | |
3 | ||
4 | #include "compiler.h" | |
5 | ||
6 | ||
7 | #define BUF_SIZE 1000000 | |
8 | ||
9 | ||
10 | static void report(const char *s) | |
11 | { | |
12 | fprintf(stderr, "%s\n", s); | |
13 | } | |
14 | ||
15 | ||
16 | static void usage(const char *name) | |
17 | { | |
18 | fprintf(stderr, "usage: %s patch-file [loops]\n", name); | |
19 | exit(1); | |
20 | } | |
21 | ||
22 | ||
23 | int main(int argc, char **argv) | |
24 | { | |
25 | char buf[BUF_SIZE]; | |
26 | const char *name; | |
27 | FILE *file; | |
28 | size_t got; | |
29 | int loops = 1; | |
30 | int i; | |
31 | ||
32 | switch (argc) { | |
33 | case 2: | |
34 | break; | |
35 | case 3: | |
36 | loops = atoi(argv[2]); | |
37 | break; | |
38 | default: | |
39 | usage(*argv); | |
40 | } | |
41 | ||
42 | name = argv[1]; | |
43 | file = fopen(name, "r"); | |
44 | if (!file) { | |
45 | perror(name); | |
46 | exit(1); | |
47 | } | |
48 | got = fread(buf, 1, sizeof(buf)-1, file); | |
49 | if (got < 0) { | |
50 | perror(name); | |
51 | exit(1); | |
52 | } | |
53 | buf[got] = 0; | |
54 | fclose(file); | |
55 | ||
56 | for (i = 0; i != loops; i++) | |
57 | if (!patch_compile(buf, report)) | |
58 | return 1; | |
59 | ||
60 | return 0; | |
61 | } |
m1/perf/runs | ||
---|---|---|
1 | #!/bin/sh -x | |
2 | ||
3 | ||
4 | sanitize() | |
5 | { | |
6 | basename "$1" .fnp | tr ' ' _ | tr -cd 'A-Za-z0-9_-' | tr -s _ _ | |
7 | } | |
8 | ||
9 | ||
10 | usage() | |
11 | { | |
12 | cat <<EOF 1>&2 | |
13 | usage: $0 [-e|-p] [-n [-o] [-s]] dir | |
14 | ||
15 | -e generate the calculated expression (default: just dump debug output) | |
16 | -p profile 10000 runs (default: just dump debug output) | |
17 | -n use "new" scheduler | |
18 | -o enable LCPF optimizer | |
19 | -s enable register pressure statistics | |
20 | EOF | |
21 | ||
22 | } | |
23 | ||
24 | ||
25 | M1=`make path` | |
26 | ||
27 | extra= | |
28 | sched= | |
29 | profile=false | |
30 | evaluate=false | |
31 | ||
32 | while [ "${1#-}" != "$1" ]; do | |
33 | case "$1" in | |
34 | -e) evaluate=true;; | |
35 | -p) profile=true;; | |
36 | -n) sched=SCHED=sched.o;; | |
37 | -o) extra="$extra -DLCPF";; | |
38 | -s) extra="$extra -DREG_STATS";; | |
39 | *) usage;; | |
40 | esac | |
41 | shift | |
42 | done | |
43 | ||
44 | [ -z "$1" -o "$2" ] && usage | |
45 | if [ ! -d "$1" ]; then | |
46 | echo "$1: directory not found" 1>&2 | |
47 | exit 1 | |
48 | fi | |
49 | ||
50 | $profile || extra="$extra -DCOMP_DEBUG" | |
51 | ||
52 | make spotless | |
53 | make CFLAGS_EXTRA="$extra" $sched all | |
54 | ||
55 | for n in $M1/flickernoise/patches/*/*.fnp; do | |
56 | s=`sanitize "$n"` | |
57 | if $profile; then | |
58 | ./main "$n" 10000 || exit | |
59 | gprof main >"$1"/$s | |
60 | elif $evaluate; then | |
61 | ./main "$n" | ./eval.pl >"$1"/$s || exit | |
62 | else | |
63 | ./main "$n" >"$1"/$s || exit | |
64 | fi | |
65 | done |
m1/perf/sched.c | ||
---|---|---|
1 | /* | |
2 | * lnfpus.c - O(n) ... O(n^2) scheduler | |
3 | * | |
4 | * Copyright (C) 2011 Werner Almesberger | |
5 | * | |
6 | * Based on gfpus.c | |
7 | * Copyright (C) 2007, 2008, 2009, 2010 Sebastien Bourdeauducq | |
8 | * | |
9 | * This program is free software: you can redistribute it and/or modify | |
10 | * it under the terms of the GNU General Public License as published by | |
11 | * the Free Software Foundation, version 3 of the License. | |
12 | * | |
13 | * This program is distributed in the hope that it will be useful, | |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | * GNU General Public License for more details. | |
17 | * | |
18 | * You should have received a copy of the GNU General Public License | |
19 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | |
20 | */ | |
21 | ||
22 | #include <stdlib.h> | |
23 | #include <stdio.h> | |
24 | #include <string.h> | |
25 | #include <assert.h> | |
26 | ||
27 | #include <fpvm/is.h> | |
28 | #include <fpvm/fpvm.h> | |
29 | #include <fpvm/pfpu.h> | |
30 | #include <fpvm/gfpus.h> | |
31 | ||
32 | #include <hw/pfpu.h> | |
33 | ||
34 | ||
35 | //#define REG_STATS | |
36 | #define LCPF /* longest critical path first */ | |
37 | ||
38 | //#define DEBUG | |
39 | #ifdef DEBUG | |
40 | #define Dprintf printf | |
41 | #else | |
42 | #define Dprintf(...) | |
43 | #endif | |
44 | ||
45 | ||
46 | #define MAX_LATENCY 8 /* maximum latency; okay to make this bigger */ | |
47 | ||
48 | #define CODE(n) (((pfpu_instruction *) (code+(n)))->i) | |
49 | ||
50 | ||
51 | struct list { | |
52 | struct list *next, *prev; | |
53 | }; | |
54 | ||
55 | ||
56 | struct insn { | |
57 | struct list more; /* more insns on same schedule */ | |
58 | struct fpvm_instruction *vm_insn; | |
59 | struct data_ref { | |
60 | struct list more; /* more refs sharing the data */ | |
61 | struct insn *insn; /* insn this is part of */ | |
62 | struct insn *dep; /* insn we depend on */ | |
63 | } opa, opb, dest, cond; | |
64 | int arity; | |
65 | int latency; | |
66 | int rmw; /* non-zero if instruction is read-modify-write */ | |
67 | int unresolved; /* number of data refs we need before we can sched */ | |
68 | int earliest; /* earliest cycle dependencies seen so far are met */ | |
69 | struct list dependants; /* list of dependencies (constant) */ | |
70 | int num_dependants; /* number of dependencies */ | |
71 | struct insn *next_setter; /* next setter of the same register */ | |
72 | #ifdef LCPF | |
73 | int distance; /* minimum cycles on this path until the end */ | |
74 | #endif | |
75 | }; | |
76 | ||
77 | ||
78 | struct vm_reg { | |
79 | struct insn *setter; /* instruction setting it; NULL if none */ | |
80 | struct insn *first_setter; /* first setter */ | |
81 | int pfpu_reg; /* underlying PFPU register */ | |
82 | int refs; /* usage count */ | |
83 | }; | |
84 | ||
85 | ||
86 | struct pfpu_reg { | |
87 | struct list more; /* list of unallocated PFPU registers */ | |
88 | int vm_reg; /* corresponding FPVM register if allocated */ | |
89 | int used; /* used somewhere in the program */ | |
90 | }; | |
91 | ||
92 | ||
93 | static struct sched_ctx { | |
94 | struct fpvm_fragment *frag; | |
95 | struct insn insns[FPVM_MAXCODELEN]; | |
96 | struct vm_reg *regs; /* dynamically allocated */ | |
97 | struct pfpu_reg pfpu_regs[PFPU_REG_COUNT]; | |
98 | struct list unallocated; /* unallocated registers */ | |
99 | struct list unscheduled; /* unscheduled insns */ | |
100 | struct list waiting; /* insns waiting to be scheduled */ | |
101 | struct list ready[PFPU_PROGSIZE]; /* insns ready at nth cycle */ | |
102 | int cycle; /* the current cycle */ | |
103 | #ifdef REG_STATS | |
104 | int max_regs, curr_regs; /* allocation statistics */ | |
105 | #endif | |
106 | } *sc; | |
107 | ||
108 | ||
109 | /* ----- Register initialization ------------------------------------------- */ | |
110 | ||
111 | ||
112 | /* | |
113 | * Straight from gfpus.c, only with some whitespace changes. | |
114 | */ | |
115 | ||
116 | static void get_registers(struct fpvm_fragment *fragment, | |
117 | unsigned int *registers) | |
118 | { | |
119 | int i; | |
120 | union { | |
121 | float f; | |
122 | unsigned int n; | |
123 | } fconv; | |
124 | ||
125 | for(i = 0; i < fragment->nbindings; i++) | |
126 | if(fragment->bindings[i].isvar) | |
127 | registers[i] = 0; | |
128 | else { | |
129 | fconv.f = fragment->bindings[i].b.c; | |
130 | registers[i] = fconv.n; | |
131 | } | |
132 | for(; i < PFPU_REG_COUNT; i++) | |
133 | registers[i] = 0; | |
134 | } | |
135 | ||
136 | ||
137 | /* ----- Doubly-linked list ------------------------------------------------ */ | |
138 | ||
139 | ||
140 | /* | |
141 | * Use the naming conventions of include/linux/list.h | |
142 | */ | |
143 | ||
144 | ||
145 | #ifdef DEBUG | |
146 | ||
147 | static void list_poison(struct list *list) | |
148 | { | |
149 | list->next = list->prev = NULL; | |
150 | } | |
151 | ||
152 | #else /* DEBUG */ | |
153 | ||
154 | #define list_poison(list) | |
155 | ||
156 | #endif /* !DEBUG */ | |
157 | ||
158 | ||
159 | static void list_init(struct list *list) | |
160 | { | |
161 | list->next = list->prev = list; | |
162 | } | |
163 | ||
164 | ||
165 | static void list_del(struct list *item) | |
166 | { | |
167 | assert(item->next != item); | |
168 | item->prev->next = item->next; | |
169 | item->next->prev = item->prev; | |
170 | list_poison(item); | |
171 | } | |
172 | ||
173 | ||
174 | static void *list_pop(struct list *list) | |
175 | { | |
176 | struct list *first; | |
177 | ||
178 | first = list->next; | |
179 | if(first == list) | |
180 | return NULL; | |
181 | list_del(first); | |
182 | return first; | |
183 | } | |
184 | ||
185 | ||
186 | static void list_add_tail(struct list *list, struct list *item) | |
187 | { | |
188 | item->next = list; | |
189 | item->prev = list->prev; | |
190 | list->prev->next = item; | |
191 | list->prev = item; | |
192 | } | |
193 | ||
194 | ||
195 | static void list_add(struct list *list, struct list *item) | |
196 | { | |
197 | item->next = list->next; | |
198 | item->prev = list; | |
199 | list->next->prev = item; | |
200 | list->next = item; | |
201 | } | |
202 | ||
203 | ||
204 | static void list_concat(struct list *a, struct list *b) | |
205 | { | |
206 | if(b->next != b) { | |
207 | a->prev->next = b->next; | |
208 | b->next->prev = a->prev; | |
209 | b->prev->next = a; | |
210 | a->prev = b->prev; | |
211 | } | |
212 | list_poison(b); | |
213 | } | |
214 | ||
215 | ||
216 | /* | |
217 | * Do not delete elements from the list while traversing it with foreach ! | |
218 | */ | |
219 | ||
220 | #define foreach(var, head) \ | |
221 | for(var = (void *) ((head))->next; \ | |
222 | (var) != (void *) (head); \ | |
223 | var = (void *) ((struct list *) (var))->next) | |
224 | ||
225 | ||
226 | /* ----- Register management ----------------------------------------------- */ | |
227 | ||
228 | ||
229 | static int vm_reg2idx(int reg) | |
230 | { | |
231 | return reg >= 0 ? reg : sc->frag->nbindings-reg; | |
232 | } | |
233 | ||
234 | ||
235 | static int alloc_reg(struct insn *setter) | |
236 | { | |
237 | struct pfpu_reg *reg; | |
238 | int vm_reg, pfpu_reg, vm_idx; | |
239 | ||
240 | vm_reg = setter->vm_insn->dest; | |
241 | if(vm_reg >= 0) { | |
242 | pfpu_reg = vm_reg; | |
243 | sc->pfpu_regs[vm_reg].vm_reg = vm_reg; /* @@@ global init */ | |
244 | } else { | |
245 | reg = list_pop(&sc->unallocated); | |
246 | if(!reg) | |
247 | return -1; | |
248 | ||
249 | #ifdef REG_STATS | |
250 | sc->curr_regs++; | |
251 | if(sc->curr_regs > sc->max_regs) | |
252 | sc->max_regs = sc->curr_regs; | |
253 | #endif | |
254 | ||
255 | reg->vm_reg = vm_reg; | |
256 | pfpu_reg = reg-sc->pfpu_regs; | |
257 | } | |
258 | ||
259 | Dprintf(" alloc reg %d -> %d\n", vm_reg, pfpu_reg); | |
260 | ||
261 | vm_idx = vm_reg2idx(vm_reg); | |
262 | sc->regs[vm_idx].setter = setter; | |
263 | sc->regs[vm_idx].pfpu_reg = pfpu_reg; | |
264 | sc->regs[vm_idx].refs = setter->num_dependants+1; | |
265 | ||
266 | return pfpu_reg; | |
267 | } | |
268 | ||
269 | ||
270 | static void put_reg(int vm_reg) | |
271 | { | |
272 | int vm_idx; | |
273 | struct vm_reg *reg; | |
274 | ||
275 | if(vm_reg >= 0) | |
276 | return; | |
277 | ||
278 | vm_idx = vm_reg2idx(vm_reg); | |
279 | reg = sc->regs+vm_idx; | |
280 | ||
281 | assert(reg->refs); | |
282 | if(--reg->refs) | |
283 | return; | |
284 | ||
285 | Dprintf(" free reg %d\n", reg->pfpu_reg); | |
286 | ||
287 | #ifdef REG_STATS | |
288 | assert(sc->curr_regs); | |
289 | sc->curr_regs--; | |
290 | #endif | |
291 | ||
292 | /* | |
293 | * Prepend so that register numbers stay small and bugs reveal | |
294 | * themselves more rapidly. | |
295 | */ | |
296 | list_add(&sc->unallocated, &sc->pfpu_regs[reg->pfpu_reg].more); | |
297 | ||
298 | /* clear it for style only */ | |
299 | reg->setter = NULL; | |
300 | reg->pfpu_reg = 0; | |
301 | } | |
302 | ||
303 | ||
304 | static int lookup_pfpu_reg(int vm_reg) | |
305 | { | |
306 | return vm_reg >= 0 ? vm_reg : sc->regs[vm_reg2idx(vm_reg)].pfpu_reg; | |
307 | } | |
308 | ||
309 | ||
310 | static void mark(int vm_reg) | |
311 | { | |
312 | if(vm_reg > 0) | |
313 | sc->pfpu_regs[vm_reg].used = 1; | |
314 | } | |
315 | ||
316 | ||
317 | static int init_registers(struct fpvm_fragment *frag, | |
318 | unsigned int *registers) | |
319 | { | |
320 | int i; | |
321 | ||
322 | get_registers(frag, registers); | |
323 | ||
324 | for(i = 0; i != frag->ninstructions; i++) { | |
325 | mark(frag->code[i].opa); | |
326 | mark(frag->code[i].opb); | |
327 | mark(frag->code[i].dest); | |
328 | } | |
329 | ||
330 | list_init(&sc->unallocated); | |
331 | for(i = PFPU_SPREG_COUNT; i != PFPU_REG_COUNT; i++) | |
332 | if(!sc->pfpu_regs[i].used) | |
333 | list_add_tail(&sc->unallocated, &sc->pfpu_regs[i].more); | |
334 | ||
335 | return 0; | |
336 | } | |
337 | ||
338 | ||
339 | /* ----- Instruction scheduler --------------------------------------------- */ | |
340 | ||
341 | ||
342 | static struct vm_reg *add_data_ref(struct insn *insn, struct data_ref *ref, | |
343 | int reg_num) | |
344 | { | |
345 | struct vm_reg *reg; | |
346 | ||
347 | reg = sc->regs+vm_reg2idx(reg_num); | |
348 | ref->insn = insn; | |
349 | ref->dep = reg->setter; | |
350 | if(insn->vm_insn->dest == reg_num) | |
351 | insn->rmw = 1; | |
352 | if(!ref->dep) | |
353 | reg->refs++; | |
354 | else { | |
355 | list_add_tail(&ref->dep->dependants, &ref->more); | |
356 | ref->dep->num_dependants++; | |
357 | insn->unresolved++; | |
358 | ||
359 | Dprintf("insn %lu: reg %d setter %lu unresolved %d\n", | |
360 | insn-sc->insns, reg_num, reg->setter-sc->insns, | |
361 | insn->unresolved); | |
362 | } | |
363 | return reg; | |
364 | } | |
365 | ||
366 | ||
367 | static void init_scheduler(struct fpvm_fragment *frag) | |
368 | { | |
369 | int i; | |
370 | struct insn *insn; | |
371 | struct vm_reg *reg; | |
372 | struct data_ref *ref; | |
373 | ||
374 | list_init(&sc->unscheduled); | |
375 | list_init(&sc->waiting); | |
376 | for(i = 0; i != PFPU_PROGSIZE; i++) | |
377 | list_init(sc->ready+i); | |
378 | ||
379 | for(i = 0; i != frag->ninstructions; i++) { | |
380 | insn = sc->insns+i; | |
381 | insn->vm_insn = frag->code+i; | |
382 | insn->arity = fpvm_get_arity(frag->code[i].opcode); | |
383 | insn->latency = pfpu_get_latency(frag->code[i].opcode); | |
384 | list_init(&insn->dependants); | |
385 | switch (insn->arity) { | |
386 | case 3: | |
387 | add_data_ref(insn, &insn->cond, FPVM_REG_IFB); | |
388 | /* fall through */ | |
389 | case 2: | |
390 | add_data_ref(insn, &insn->opb, frag->code[i].opb); | |
391 | /* fall through */ | |
392 | case 1: | |
393 | add_data_ref(insn, &insn->opa, frag->code[i].opa); | |
394 | /* fall through */ | |
395 | case 0: | |
396 | reg = sc->regs+vm_reg2idx(frag->code[i].dest); | |
397 | if(reg->setter) { | |
398 | reg->setter->next_setter = insn; | |
399 | foreach(ref, ®->setter->dependants) | |
400 | if(ref->insn != insn) | |
401 | insn->unresolved++; | |
402 | if(!insn->rmw) | |
403 | insn->unresolved++; | |
404 | } else { | |
405 | if(!insn->rmw) | |
406 | insn->unresolved += reg->refs; | |
407 | reg->first_setter = insn; | |
408 | } | |
409 | reg->setter = insn; | |
410 | break; | |
411 | default: | |
412 | abort(); | |
413 | } | |
414 | if(insn->unresolved) | |
415 | list_add_tail(&sc->unscheduled, &insn->more); | |
416 | else | |
417 | list_add_tail(&sc->ready[0], &insn->more); | |
418 | } | |
419 | ||
420 | #ifdef LCPF | |
421 | struct data_ref *dep; | |
422 | ||
423 | for(i = frag->ninstructions-1; i >= 0; i--) { | |
424 | insn = sc->insns+i; | |
425 | #if 0 | |
426 | /* | |
427 | * Theoretically, we should consider the distance through | |
428 | * write-write dependencies too. In practice, this would | |
429 | * mainly matter if we had operations whose result is ignored. | |
430 | * This is a degenerate case that's probably not worth | |
431 | * spending much effort on. | |
432 | */ | |
433 | if(insn->next_setter) { | |
434 | insn->distance = | |
435 | insn->next_setter->distance-insn->distance+1; | |
436 | if(insn->distance < 1) | |
437 | insn->distance = 1; | |
438 | } | |
439 | #endif | |
440 | foreach(dep, &insn->dependants) | |
441 | if(dep->insn->distance > insn->distance) | |
442 | insn->distance = dep->insn->distance; | |
443 | /* | |
444 | * While it would be more correct to add one for the cycle | |
445 | * following the write cycle, this also has the effect of | |
446 | * producing slighly worse results on the example set of | |
447 | * patches. Let's thus keep this "bug" for now. | |
448 | */ | |
449 | // insn->distance += insn->latency+1; | |
450 | insn->distance += insn->latency; | |
451 | } | |
452 | #endif | |
453 | } | |
454 | ||
455 | ||
456 | static void unblock(struct insn *insn) | |
457 | { | |
458 | int slot; | |
459 | ||
460 | assert(insn->unresolved); | |
461 | if(--insn->unresolved) | |
462 | return; | |
463 | Dprintf(" unblocked %lu -> %u\n", insn-sc->insns, insn->earliest); | |
464 | list_del(&insn->more); | |
465 | slot = insn->earliest; | |
466 | if(slot <= sc->cycle) | |
467 | slot = sc->cycle+1; | |
468 | list_add_tail(sc->ready+slot, &insn->more); | |
469 | } | |
470 | ||
471 | ||
472 | static void put_reg_by_ref(struct data_ref *ref, int vm_reg) | |
473 | { | |
474 | struct insn *setter = ref->dep; | |
475 | struct vm_reg *reg; | |
476 | ||
477 | if(setter) { | |
478 | put_reg(setter->vm_insn->dest); | |
479 | if(setter->next_setter && setter->next_setter != ref->insn) | |
480 | unblock(setter->next_setter); | |
481 | } else { | |
482 | reg = sc->regs+vm_reg2idx(vm_reg); | |
483 | if(reg->first_setter && !reg->first_setter->rmw) | |
484 | unblock(reg->first_setter); | |
485 | } | |
486 | } | |
487 | ||
488 | ||
489 | static void unblock_after(struct insn *insn, int cycle) | |
490 | { | |
491 | if(insn->earliest <= cycle) | |
492 | insn->earliest = cycle+1; | |
493 | unblock(insn); | |
494 | } | |
495 | ||
496 | ||
497 | static int issue(struct insn *insn, unsigned *code) | |
498 | { | |
499 | struct data_ref *ref; | |
500 | int end, reg; | |
501 | ||
502 | end = sc->cycle+insn->latency; | |
503 | ||
504 | Dprintf("cycle %d: insn %lu L %d (A %d B %d)\n", sc->cycle, | |
505 | insn-sc->insns, insn->latency, insn->vm_insn->opa, | |
506 | insn->vm_insn->opb); | |
507 | ||
508 | switch (insn->arity) { | |
509 | case 3: | |
510 | put_reg_by_ref(&insn->cond, FPVM_REG_IFB); | |
511 | /* fall through */ | |
512 | case 2: | |
513 | CODE(sc->cycle).opb = lookup_pfpu_reg(insn->vm_insn->opb); | |
514 | put_reg_by_ref(&insn->opb, insn->vm_insn->opb); | |
515 | /* fall through */ | |
516 | case 1: | |
517 | CODE(sc->cycle).opa = lookup_pfpu_reg(insn->vm_insn->opa); | |
518 | put_reg_by_ref(&insn->opa, insn->vm_insn->opa); | |
519 | break; | |
520 | case 0: | |
521 | break; | |
522 | default: | |
523 | abort(); | |
524 | } | |
525 | ||
526 | reg = alloc_reg(insn); | |
527 | if(reg < 0) | |
528 | return -1; | |
529 | CODE(end).dest = reg; | |
530 | CODE(sc->cycle).opcode = fpvm_to_pfpu(insn->vm_insn->opcode); | |
531 | ||
532 | foreach(ref, &insn->dependants) | |
533 | unblock_after(ref->insn, end); | |
534 | if(insn->next_setter && !insn->next_setter->rmw) | |
535 | unblock_after(insn->next_setter, | |
536 | end-insn->next_setter->latency); | |
537 | ||
538 | return 0; | |
539 | } | |
540 | ||
541 | ||
542 | #ifdef DEBUG | |
543 | static int count(const struct list *list) | |
544 | { | |
545 | int n = 0; | |
546 | const struct list *p; | |
547 | ||
548 | for(p = list->next; p != list; p = p->next) | |
549 | n++; | |
550 | return n; | |
551 | } | |
552 | #endif | |
553 | ||
554 | ||
555 | static int schedule(unsigned int *code) | |
556 | { | |
557 | int remaining; | |
558 | int i, last, end; | |
559 | struct insn *insn; | |
560 | struct insn *best; | |
561 | ||
562 | remaining = sc->frag->ninstructions; | |
563 | for(i = 0; remaining; i++) { | |
564 | if(i == PFPU_PROGSIZE) | |
565 | return -1; | |
566 | ||
567 | sc->cycle = i; | |
568 | Dprintf("@%d --- remaining %d, waiting %d + ready %d\n", | |
569 | i, remaining, count(&sc->waiting), count(&sc->ready[i])); | |
570 | ||
571 | list_concat(&sc->waiting, sc->ready+i); | |
572 | best = NULL; | |
573 | foreach(insn, &sc->waiting) { | |
574 | end = i+insn->latency; | |
575 | if(end >= PFPU_PROGSIZE) | |
576 | return -1; | |
577 | if(!CODE(end).dest) { | |
578 | #ifdef LCPF | |
579 | if(!best || best->distance < insn->distance) | |
580 | best = insn; | |
581 | #else | |
582 | best = insn; | |
583 | break; | |
584 | #endif | |
585 | } | |
586 | } | |
587 | if(best) { | |
588 | if(issue(best, code) < 0) | |
589 | return -1; | |
590 | list_del(&best->more); | |
591 | remaining--; | |
592 | } | |
593 | if(CODE(i).dest) | |
594 | put_reg(sc->pfpu_regs[CODE(i).dest].vm_reg); | |
595 | } | |
596 | ||
597 | /* | |
598 | * Add NOPs to cover unfinished instructions. | |
599 | */ | |
600 | last = i; | |
601 | end = i+MAX_LATENCY; | |
602 | if(end > PFPU_PROGSIZE) | |
603 | end = PFPU_PROGSIZE; | |
604 | while(i != end) { | |
605 | if(CODE(i).dest) | |
606 | last = i+1; | |
607 | i++; | |
608 | } | |
609 | return last; | |
610 | } | |
611 | ||
612 | ||
613 | int gfpus_schedule(struct fpvm_fragment *frag, unsigned int *code, | |
614 | unsigned int *reg) | |
615 | { | |
616 | /* | |
617 | * allocate context and registers on stack because standalone FN has no | |
618 | * memory allocator | |
619 | */ | |
620 | struct sched_ctx sc_alloc; | |
621 | struct vm_reg regs[frag->nbindings-frag->next_sur]; | |
622 | pfpu_instruction vecout; | |
623 | int res; | |
624 | ||
625 | printf("greetings %lu %lu\n", sizeof(*sc), sizeof(regs)); | |
626 | sc = &sc_alloc; | |
627 | memset(sc, 0, sizeof(*sc)); | |
628 | sc->frag = frag; | |
629 | sc->regs = regs; | |
630 | memset(regs, 0, sizeof(regs)); | |
631 | ||
632 | if(init_registers(frag, reg) < 0) | |
633 | return -1; | |
634 | init_scheduler(frag); | |
635 | ||
636 | memset(code, 0, PFPU_PROGSIZE*sizeof(*code)); | |
637 | res = schedule(code); | |
638 | ||
639 | #ifdef REG_STATS | |
640 | printf("regs: %d/%d\n", sc->curr_regs, sc->max_regs); | |
641 | #endif | |
642 | ||
643 | if(res < 0) | |
644 | return res; | |
645 | if(frag->vector_mode) | |
646 | return res; | |
647 | if(res == PFPU_PROGSIZE) | |
648 | return -1; | |
649 | ||
650 | vecout.w = 0; | |
651 | vecout.i.opcode = FPVM_OPCODE_VECTOUT; | |
652 | code[res] = vecout.w; | |
653 | ||
654 | return res+1; | |
655 | } |
m1/perf/tabulate | ||
---|---|---|
1 | #!/bin/sh | |
2 | ||
3 | M1=`make path` | |
4 | ||
5 | ||
6 | time() | |
7 | { | |
8 | sed '/^.*of \([0-9.]*\) seconds.*/s//\1/p;d' <data/$1/prof/$2 | |
9 | } | |
10 | ||
11 | ||
12 | size() | |
13 | { | |
14 | sed '/^0*\([1-9][0-9]*\): VECTOUT.*/s//\1/p;d' <data/$1/out/$2 | | |
15 | tail -n 1 | |
16 | } | |
17 | ||
18 | ||
19 | eff() | |
20 | { | |
21 | sed '/^Efficiency: \([0-9.]*\)%/s//\1/p;d' <data/$1/out/$2 | tail -n 1 | |
22 | } | |
23 | ||
24 | ||
25 | regs() | |
26 | { | |
27 | ./eval.pl -r data/$1/out/$2 | sed '1d;s|/.*||' | |
28 | # sed '/^regs: 0\//s///p;d' <data/$1/out/$2 | tail -n 1 | |
29 | } | |
30 | ||
31 | ||
32 | sum() | |
33 | { | |
34 | md5sum <data/$1/expr/$2 | sed 's/ .*//' | |
35 | } | |
36 | ||
37 | ||
38 | eq() | |
39 | { | |
40 | if [ "$1" = "$2" ]; then | |
41 | echo Y | |
42 | else | |
43 | echo N | |
44 | fi | |
45 | } | |
46 | ||
47 | ||
48 | trim() | |
49 | { | |
50 | sed 's/[^0-9].*//' | |
51 | } | |
52 | ||
53 | ||
54 | cfield() | |
55 | { | |
56 | echo "<TD bgcolor=\"$1\">$2" | |
57 | } | |
58 | ||
59 | ||
60 | rank() | |
61 | { | |
62 | op=$1 | |
63 | txt=`$2 $3 $6` | |
64 | a=`echo "$txt" | trim` | |
65 | b=`$2 $4 $6 | trim` | |
66 | c=`$2 $5 $6 | trim` | |
67 | ||
68 | if [ $a $op $b -a $a $op $c ]; then | |
69 | col=$green | |
70 | elif [ $b $op $a -a $c $op $a ]; then | |
71 | col=$red | |
72 | else | |
73 | col=white | |
74 | fi | |
75 | echo "<TD bgcolor=\"$col\" align="right">$txt$7" | |
76 | } | |
77 | ||
78 | ||
79 | red="#ffb0b0" | |
80 | green="#a0ffa0" | |
81 | ||
82 | html=false | |
83 | if [ $1 = -h ]; then | |
84 | html=true | |
85 | shift | |
86 | fi | |
87 | ||
88 | if $html; then | |
89 | cat <<EOF | |
90 | <HTML> | |
91 | <TITLE>Scheduler comparison</TITLE> | |
92 | <BODY> | |
93 | <TABLE bgcolor="#f0f0f0"> | |
94 | <TR> | |
95 | <TH colspan="5" align="left">Original</TH> | |
96 | <TH colspan="5" align="left">New (no optimizer)</TH> | |
97 | <TH colspan="5" align="left">New (LCPF)</TH> | |
98 | <TH align="left">Equiv</TH> | |
99 | <TH align="left">Name</TH> | |
100 | <TR> | |
101 | <TH>Time</TH><TH>Size</TH><TH>Eff</TH><TH>Regs</TH> | |
102 | <TH> </TH> | |
103 | <TH>Time</TH><TH>Size</TH><TH>Eff</TH><TH>Regs</TH> | |
104 | <TH> </TH> | |
105 | <TH>Time</TH><TH>Size</TH><TH>Eff</TH><TH>Regs</TH> | |
106 | <TH> </TH> | |
107 | EOF | |
108 | for n in `ls -1 data/ref/out`; do | |
109 | ref=`sum ref $n` | |
110 | new=`sum new $n` | |
111 | opt=`sum opt $n` | |
112 | same=`eq $ref $new`/`eq $ref $opt`/`eq $new $opt` | |
113 | ||
114 | echo "<TR>" | |
115 | rank -lt time ref new opt $n | |
116 | rank -lt size ref new opt $n | |
117 | rank -gt eff ref new opt $n % | |
118 | rank -lt regs ref new opt $n | |
119 | ||
120 | echo "<TD>" | |
121 | rank -lt time new ref opt $n | |
122 | rank -lt size new ref opt $n | |
123 | rank -gt eff new ref opt $n % | |
124 | rank -lt regs new ref opt $n | |
125 | ||
126 | echo "<TD>" | |
127 | rank -lt time opt ref new $n | |
128 | rank -lt size opt ref new $n | |
129 | rank -gt eff opt ref new $n % | |
130 | rank -lt regs opt ref new $n | |
131 | ||
132 | echo "<TD>" | |
133 | if [ $same = Y/Y/Y ]; then | |
134 | cfield $green $same | |
135 | else | |
136 | cfield $red $same | |
137 | fi | |
138 | cfield white "$n" | |
139 | done | |
140 | cat <<EOF | |
141 | </TABLE> | |
142 | </BODY> | |
143 | </HTML> | |
144 | EOF | |
145 | exit | |
146 | fi | |
147 | ||
148 | ||
149 | echo "Original New sched (no opt) New sched (LCPF) Equiv Name" | |
150 | echo "Time Size Eff Regs Time Size Eff Regs Time Size Eff Regs" | |
151 | ||
152 | tref=0 | |
153 | tnew=0 | |
154 | topt=0 | |
155 | ||
156 | for n in `ls -1 data/ref/out`; do | |
157 | ref=`sum ref $n` | |
158 | new=`sum new $n` | |
159 | opt=`sum opt $n` | |
160 | printf "%5.1f %4d%3d%% %4d %5.1f %4d%3d%% %4d %5.1f %4d%3d%% %4d %s " \ | |
161 | `time ref $n` `size ref $n` `eff ref $n` `regs ref $n` \ | |
162 | `time new $n` `size new $n` `eff new $n` `regs new $n` \ | |
163 | `time opt $n` `size opt $n` `eff opt $n` `regs opt $n` \ | |
164 | `eq $ref $new`/`eq $ref $opt`/`eq $new $opt` | |
165 | echo $n | |
166 | tref="$tref `time ref $n` +" | |
167 | tnew="$tnew `time new $n` +" | |
168 | topt="$topt `time opt $n` +" | |
169 | done | |
170 | ||
171 | printf "Original time: %9.1f s\n" `dc -e "$tref p"` | |
172 | printf "New (unopt) time: %6.1f s\n" `dc -e "$tnew p"` | |
173 | printf "New (opt) time: %8.1f s\n" `dc -e "$topt p"` |
m1/perf/try | ||
---|---|---|
1 | #!/bin/sh -x | |
2 | ||
3 | M1=`make path` | |
4 | ||
5 | make CFLAGS_EXTRA=-DCOMP_DEBUG CFLAGS_PROF= SCHED=sched.o | |
6 | gdb --args ./main $M1/flickernoise/patches/*/*Godhead*.fnp |
Branches:
master