Werner's Miscellanea
Sign in or create your account | Project List | Help
Werner's Miscellanea Git Source Tree
Root/
Source at commit abd7de72651ce9ef077215cfeec797e0ec89e377 created 12 years 6 months ago. By Werner Almesberger, m1/perf/: added instrumentation for monitoring register allocation | |
---|---|
1 | /* |
2 | * sched.c - O(n) ... O(n^2) scheduler |
3 | * |
4 | * Written 2011 by Werner Almesberger |
5 | * |
6 | * Based on gfpus.c |
7 | * Copyright (C) 2007, 2008, 2009, 2010 Sebastien Bourdeauducq |
8 | * |
9 | * This program is free software: you can redistribute it and/or modify |
10 | * it under the terms of the GNU General Public License as published by |
11 | * the Free Software Foundation, version 3 of the License. |
12 | * |
13 | * This program is distributed in the hope that it will be useful, |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | * GNU General Public License for more details. |
17 | * |
18 | * You should have received a copy of the GNU General Public License |
19 | * along with this program. If not, see <http://www.gnu.org/licenses/>. |
20 | */ |
21 | |
22 | #include <stdlib.h> |
23 | #include <stdio.h> |
24 | #include <string.h> |
25 | |
26 | #include <fpvm/is.h> |
27 | #include <fpvm/fpvm.h> |
28 | #include <fpvm/pfpu.h> |
29 | #include <fpvm/gfpus.h> |
30 | |
31 | #include <hw/pfpu.h> |
32 | |
33 | |
34 | #define REG_STATS |
35 | |
36 | #ifdef DEBUG |
37 | #define Dprintf printf |
38 | #else |
39 | #define Dprintf(...) |
40 | #endif |
41 | |
42 | |
43 | #define MAX_LATENCY 8 /* maximum latency; okay to make this bigger */ |
44 | |
45 | #define FIELD(w) (((pfpu_instruction *) &(w))->i) |
46 | |
47 | |
48 | struct list { |
49 | struct list *next, *prev; |
50 | }; |
51 | |
52 | |
53 | struct insn { |
54 | struct list more; /* more insns on same schedule */ |
55 | struct fpvm_instruction *vm_insn; |
56 | struct data_ref { |
57 | struct list more; /* more refs sharing the data */ |
58 | struct insn *insn; /* insn this is part of */ |
59 | struct insn *dep; /* insn we depend on */ |
60 | } opa, opb, dest, cond; |
61 | int arity; |
62 | int latency; |
63 | int unresolved; /* number of data refs we need before we can sched */ |
64 | int earliest; /* earliest cycle dependencies seen so far are met */ |
65 | struct list dependants; /* list of dependencies (constant) */ |
66 | int num_dependants; /* number of unresolved dependencies */ |
67 | }; |
68 | |
69 | |
70 | struct vm_reg { |
71 | struct insn *setter; /* instruction setting it; NULL if none */ |
72 | int pfpu_reg; /* underlying PFPU register */ |
73 | int refs; /* usage count */ |
74 | }; |
75 | |
76 | |
77 | struct pfpu_reg { |
78 | struct list more; /* list of unallocated PFPU registers */ |
79 | int vm_reg; /* corresponding FPVM register if allocated */ |
80 | int used; /* used somewhere in the program */ |
81 | }; |
82 | |
83 | |
84 | static struct sched_ctx { |
85 | struct fpvm_fragment *frag; |
86 | struct insn insns[FPVM_MAXCODELEN]; |
87 | struct vm_reg *regs; /* dynamically allocated */ |
88 | struct pfpu_reg pfpu_regs[PFPU_REG_COUNT]; |
89 | struct list unallocated; /* unallocated registers */ |
90 | struct list unscheduled; /* unscheduled insns */ |
91 | struct list waiting; /* insns waiting to be scheduled */ |
92 | struct list ready[PFPU_PROGSIZE]; /* insns ready at nth cycle */ |
93 | #ifdef REG_STATS |
94 | int max_regs, curr_regs; /* allocation statistics */ |
95 | #endif |
96 | } *sc; |
97 | |
98 | |
99 | /* ----- Register initialization ------------------------------------------- */ |
100 | |
101 | |
102 | /* |
103 | * Straight from gfpus.c, only with some whitespace changes. |
104 | */ |
105 | |
106 | static void get_registers(struct fpvm_fragment *fragment, |
107 | unsigned int *registers) |
108 | { |
109 | int i; |
110 | union { |
111 | float f; |
112 | unsigned int n; |
113 | } fconv; |
114 | |
115 | for(i = 0; i < fragment->nbindings; i++) |
116 | if (fragment->bindings[i].isvar) |
117 | registers[i] = 0; |
118 | else { |
119 | fconv.f = fragment->bindings[i].b.c; |
120 | registers[i] = fconv.n; |
121 | } |
122 | for(; i < PFPU_REG_COUNT; i++) |
123 | registers[i] = 0; |
124 | } |
125 | |
126 | |
127 | /* ----- Doubly-linked list ------------------------------------------------ */ |
128 | |
129 | |
130 | /* |
131 | * Use the naming conventions of include/linux/list.h |
132 | */ |
133 | |
134 | static void list_init(struct list *list) |
135 | { |
136 | list->next = list->prev = list; |
137 | } |
138 | |
139 | |
140 | static void list_del(struct list *item) |
141 | { |
142 | item->prev->next = item->next; |
143 | item->next->prev = item->prev; |
144 | } |
145 | |
146 | |
147 | static void *list_pop(struct list *list) |
148 | { |
149 | struct list *first; |
150 | |
151 | first = list->next; |
152 | if (first == list) |
153 | return NULL; |
154 | list_del(first); |
155 | return first; |
156 | } |
157 | |
158 | |
159 | static void list_add_tail(struct list *list, struct list *item) |
160 | { |
161 | item->next = list; |
162 | item->prev = list->prev; |
163 | list->prev->next = item; |
164 | list->prev = item; |
165 | } |
166 | |
167 | |
168 | static void list_add(struct list *list, struct list *item) |
169 | { |
170 | item->next = list->next; |
171 | item->prev = list; |
172 | list->next->prev = item; |
173 | list->next = item; |
174 | } |
175 | |
176 | |
177 | static void list_concat(struct list *a, struct list *b) |
178 | { |
179 | if (b->next != b) { |
180 | a->prev->next = b->next; |
181 | b->next->prev = a->prev; |
182 | b->prev->next = a; |
183 | a->prev = b->prev; |
184 | } |
185 | list_init(b); |
186 | } |
187 | |
188 | |
189 | /* |
190 | * Do not delete elements from the list while traversing it with foreach ! |
191 | */ |
192 | |
193 | #define foreach(var, head) \ |
194 | for (var = (void *) ((struct list *) (head))->next; \ |
195 | (var) != (void *) (head); \ |
196 | var = (void *) ((struct list *) (var))->next) |
197 | |
198 | |
199 | /* ----- Register management ----------------------------------------------- */ |
200 | |
201 | |
202 | static int reg2idx(int reg) |
203 | { |
204 | return reg >= 0 ? reg : sc->frag->nbindings-reg; |
205 | } |
206 | |
207 | |
208 | static int alloc_reg(struct insn *setter) |
209 | { |
210 | struct pfpu_reg *reg; |
211 | int vm_reg, pfpu_reg, vm_idx; |
212 | |
213 | vm_reg = setter->vm_insn->dest; |
214 | if (vm_reg >= 0) |
215 | return vm_reg; |
216 | reg = list_pop(&sc->unallocated); |
217 | if (!reg) |
218 | abort(); |
219 | |
220 | #ifdef REG_STATS |
221 | sc->curr_regs++; |
222 | if (sc->curr_regs > sc->max_regs) |
223 | sc->max_regs = sc->curr_regs; |
224 | #endif |
225 | |
226 | reg->vm_reg = vm_reg; |
227 | pfpu_reg = reg-sc->pfpu_regs; |
228 | |
229 | Dprintf(" alloc reg %d -> %d\n", vm_reg, pfpu_reg); |
230 | |
231 | vm_idx = reg2idx(vm_reg); |
232 | sc->regs[vm_idx].setter = setter; |
233 | sc->regs[vm_idx].pfpu_reg = pfpu_reg; |
234 | sc->regs[vm_idx].refs = setter->num_dependants+1; |
235 | |
236 | return pfpu_reg; |
237 | } |
238 | |
239 | |
240 | static void put_reg(int vm_reg) |
241 | { |
242 | int vm_idx; |
243 | |
244 | if (vm_reg >= 0) |
245 | return; |
246 | |
247 | vm_idx = reg2idx(vm_reg); |
248 | if (--sc->regs[vm_idx].refs) |
249 | return; |
250 | |
251 | Dprintf(" free reg %d\n", regs[vm_idx].pfpu_reg); |
252 | |
253 | #ifdef REG_STATS |
254 | sc->curr_regs--; |
255 | #endif |
256 | |
257 | /* |
258 | * Prepend so that register numbers stay small and bugs reveal |
259 | * themselves more rapidly. |
260 | */ |
261 | list_add(&sc->unallocated, |
262 | &sc->pfpu_regs[sc->regs[vm_idx].pfpu_reg].more); |
263 | |
264 | /* clear it for style only */ |
265 | sc->regs[vm_idx].setter = NULL; |
266 | sc->regs[vm_idx].pfpu_reg = 0; |
267 | } |
268 | |
269 | |
270 | static void put_reg_by_setter(struct insn *setter) |
271 | { |
272 | if (setter) |
273 | put_reg(setter->vm_insn->dest); |
274 | } |
275 | |
276 | |
277 | static int lookup_pfpu_reg(int vm_reg) |
278 | { |
279 | return vm_reg >= 0 ? vm_reg : sc->regs[reg2idx(vm_reg)].pfpu_reg; |
280 | } |
281 | |
282 | |
283 | static void mark(int vm_reg) |
284 | { |
285 | if (vm_reg > 0) |
286 | sc->pfpu_regs[vm_reg].used = 1; |
287 | } |
288 | |
289 | |
290 | static void init_registers(struct fpvm_fragment *frag, |
291 | unsigned int *registers) |
292 | { |
293 | size_t regs_size; |
294 | int i; |
295 | |
296 | get_registers(frag, registers); |
297 | |
298 | regs_size = sizeof(struct vm_reg)*(frag->nbindings-frag->next_sur); |
299 | sc->regs = malloc(regs_size); |
300 | memset(sc->regs, 0, regs_size); |
301 | |
302 | for (i = 0; i != frag->ninstructions; i++) { |
303 | mark(frag->code[i].opa); |
304 | mark(frag->code[i].opb); |
305 | mark(frag->code[i].dest); |
306 | } |
307 | |
308 | list_init(&sc->unallocated); |
309 | for (i = PFPU_SPREG_COUNT; i != PFPU_REG_COUNT; i++) |
310 | if (!sc->pfpu_regs[i].used) |
311 | list_add_tail(&sc->unallocated, &sc->pfpu_regs[i].more); |
312 | } |
313 | |
314 | |
315 | /* ----- Instruction scheduler --------------------------------------------- */ |
316 | |
317 | |
318 | static struct vm_reg *add_data_ref(struct insn *insn, struct data_ref *ref, |
319 | int reg_num) |
320 | { |
321 | struct vm_reg *reg; |
322 | |
323 | reg = sc->regs+reg2idx(reg_num); |
324 | ref->insn = insn; |
325 | ref->dep = reg->setter; |
326 | if (ref->dep) { |
327 | list_add_tail(&ref->dep->dependants, &ref->more); |
328 | ref->dep->num_dependants++; |
329 | insn->unresolved++; |
330 | |
331 | Dprintf("insn %lu: reg %d setter %lu unresolved %d\n", |
332 | insn-sc->insns, reg_num, reg->setter-sc->insns, |
333 | insn->unresolved); |
334 | } else { |
335 | list_init(&ref->more); |
336 | } |
337 | return reg; |
338 | } |
339 | |
340 | |
341 | static void init_scheduler(struct fpvm_fragment *frag) |
342 | { |
343 | int i; |
344 | struct insn *insn; |
345 | |
346 | list_init(&sc->unscheduled); |
347 | list_init(&sc->waiting); |
348 | for (i = 0; i != PFPU_PROGSIZE; i++) |
349 | list_init(&sc->ready[i]); |
350 | |
351 | for (i = 0; i != frag->ninstructions; i++) { |
352 | insn = sc->insns+i; |
353 | insn->vm_insn = frag->code+i; |
354 | insn->arity = fpvm_get_arity(frag->code[i].opcode); |
355 | insn->latency = pfpu_get_latency(frag->code[i].opcode); |
356 | list_init(&insn->dependants); |
357 | switch (insn->arity) { |
358 | case 3: |
359 | add_data_ref(insn, &insn->opb, FPVM_REG_IFB); |
360 | /* fall through */ |
361 | case 2: |
362 | add_data_ref(insn, &insn->opb, frag->code[i].opb); |
363 | /* fall through */ |
364 | case 1: |
365 | add_data_ref(insn, &insn->opa, frag->code[i].opa); |
366 | /* fall through */ |
367 | case 0: |
368 | add_data_ref(insn, |
369 | &insn->dest, frag->code[i].dest)->setter = insn; |
370 | break; |
371 | default: |
372 | abort(); |
373 | } |
374 | if (insn->unresolved) |
375 | list_add_tail(&sc->unscheduled, &insn->more); |
376 | else |
377 | list_add_tail(&sc->ready[0], &insn->more); |
378 | } |
379 | } |
380 | |
381 | |
382 | static void issue(struct insn *insn, int cycle, unsigned *code) |
383 | { |
384 | struct data_ref *ref; |
385 | int end; |
386 | end = cycle+insn->latency; |
387 | |
388 | Dprintf("cycle %d: insn %lu L %d (A %d B %d)\n", cycle, |
389 | insn-sc->insns, insn->latency, insn->vm_insn->opa, |
390 | insn->vm_insn->opb); |
391 | |
392 | switch (insn->arity) { |
393 | case 3: |
394 | /* fall through */ |
395 | case 2: |
396 | FIELD(code[cycle]).opb = lookup_pfpu_reg(insn->vm_insn->opb); |
397 | put_reg_by_setter(insn->opb.dep); |
398 | /* fall through */ |
399 | case 1: |
400 | FIELD(code[cycle]).opa = lookup_pfpu_reg(insn->vm_insn->opa); |
401 | put_reg_by_setter(insn->opa.dep); |
402 | break; |
403 | case 0: |
404 | break; |
405 | default: |
406 | abort(); |
407 | } |
408 | |
409 | FIELD(code[end]).dest = alloc_reg(insn); |
410 | FIELD(code[cycle]).opcode = fpvm_to_pfpu(insn->vm_insn->opcode); |
411 | |
412 | foreach (ref, &insn->dependants) { |
413 | if (ref->insn->earliest <= end) |
414 | ref->insn->earliest = end+1; |
415 | if (!--ref->insn->unresolved) { |
416 | Dprintf(" unlocked %lu -> %u\n", ref->insn-insns, |
417 | ref->insn->earliest); |
418 | list_del(&ref->insn->more); |
419 | list_add_tail(sc->ready+ref->insn->earliest, |
420 | &ref->insn->more); |
421 | } |
422 | } |
423 | } |
424 | |
425 | |
426 | #ifdef DEBUG |
427 | static int count(const struct list *list) |
428 | { |
429 | int n = 0; |
430 | const struct list *p; |
431 | |
432 | for (p = list->next; p != list; p = p->next) |
433 | n++; |
434 | return n; |
435 | } |
436 | #endif |
437 | |
438 | |
439 | static int schedule(unsigned int *code) |
440 | { |
441 | int remaining; |
442 | int i, last, end; |
443 | struct insn *insn; |
444 | |
445 | remaining = sc->frag->ninstructions; |
446 | for (i = 0; remaining; i++) { |
447 | if (i == PFPU_PROGSIZE) |
448 | return -1; |
449 | |
450 | Dprintf("@%d --- remaining %d, waiting %d + ready %d\n", |
451 | i, remaining, count(&sc->waiting), count(&sc->ready[i])); |
452 | |
453 | list_concat(&sc->waiting, &sc->ready[i]); |
454 | foreach (insn, &sc->waiting) { |
455 | end = i+insn->latency; |
456 | if (end >= PFPU_PROGSIZE) |
457 | return -1; |
458 | if (!FIELD(code[end]).dest) { |
459 | issue(insn, i, code); |
460 | list_del(&insn->more); |
461 | remaining--; |
462 | break; |
463 | } |
464 | } |
465 | if (FIELD(code[i]).dest) |
466 | put_reg(sc->pfpu_regs[FIELD(code[i]).dest].vm_reg); |
467 | } |
468 | |
469 | /* |
470 | * Add NOPs to cover unfinished instructions. |
471 | */ |
472 | last = i; |
473 | end = i+MAX_LATENCY; |
474 | if (end > PFPU_PROGSIZE) |
475 | end = PFPU_PROGSIZE; |
476 | while (i != end) { |
477 | if (FIELD(code[i]).dest) |
478 | last = i+1; /* @@@ ? */ |
479 | i++; |
480 | } |
481 | return last; |
482 | } |
483 | |
484 | |
485 | static void init_scheduler_context(struct fpvm_fragment *frag, |
486 | unsigned int *reg) |
487 | { |
488 | sc = malloc(sizeof(*sc)); |
489 | memset(sc, 0, sizeof(*sc)); |
490 | |
491 | sc->frag = frag; |
492 | |
493 | init_registers(frag, reg); |
494 | init_scheduler(frag); |
495 | } |
496 | |
497 | |
498 | int gfpus_schedule(struct fpvm_fragment *frag, unsigned int *code, |
499 | unsigned int *reg) |
500 | { |
501 | pfpu_instruction vecout; |
502 | int res; |
503 | |
504 | init_scheduler_context(frag, reg); |
505 | memset(code, 0, PFPU_PROGSIZE*sizeof(*code)); |
506 | res = schedule(code); |
507 | |
508 | #ifdef REG_STATS |
509 | printf("regs: %d/%d\n", sc->curr_regs, sc->max_regs); |
510 | #endif |
511 | |
512 | free(sc->regs); |
513 | free(sc); |
514 | if (res < 0) |
515 | return res; |
516 | if (frag->vector_mode) |
517 | return res; |
518 | if (res == PFPU_PROGSIZE) |
519 | return -1; |
520 | |
521 | vecout.w = 0; |
522 | vecout.i.opcode = FPVM_OPCODE_VECTOUT; |
523 | code[res] = vecout.w; |
524 | |
525 | return res+1; |
526 | } |
527 |
Branches:
master