Root/m1/perf/sched.c

Source at commit c02c02903d5c91d5bd7a1a7faf982b4ae60afec2 created 8 years 29 days ago.
By Werner Almesberger, m1/perf/: a bunch of bug fixes all over the place
1/*
2 * sched.c - O(n) ... O(n^2) scheduler
3 *
4 * Written 2011 by Werner Almesberger
5 *
6 * Based on gfpus.c
7 * Copyright (C) 2007, 2008, 2009, 2010 Sebastien Bourdeauducq
8 *
9 * This program is free software: you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation, version 3 of the License.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program. If not, see <http://www.gnu.org/licenses/>.
20 */
21
22#include <stdlib.h>
23#include <stdio.h>
24#include <string.h>
25
26#include <fpvm/is.h>
27#include <fpvm/fpvm.h>
28#include <fpvm/pfpu.h>
29#include <fpvm/gfpus.h>
30
31#include <hw/pfpu.h>
32
33
34//#define REG_STATS
35//#define LCPF /* longest critical path first */
36
37#ifdef DEBUG
38#define Dprintf printf
39#else
40#define Dprintf(...)
41#endif
42
43
44#define MAX_LATENCY 8 /* maximum latency; okay to make this bigger */
45
46#define FIELD(w) (((pfpu_instruction *) &(w))->i)
47
48
49struct list {
50    struct list *next, *prev;
51};
52
53
54struct insn {
55    struct list more; /* more insns on same schedule */
56    struct fpvm_instruction *vm_insn;
57    struct data_ref {
58        struct list more; /* more refs sharing the data */
59        struct insn *insn; /* insn this is part of */
60        struct insn *dep; /* insn we depend on */
61    } opa, opb, dest, cond;
62    int arity;
63    int latency;
64    int unresolved; /* number of data refs we need before we can sched */
65    int earliest; /* earliest cycle dependencies seen so far are met */
66    struct list dependants; /* list of dependencies (constant) */
67    int num_dependants; /* number of unresolved dependencies */
68#ifdef LCPF
69    int distance; /* minimum cycles on this path until the end */
70#endif
71};
72
73
74struct vm_reg {
75    struct insn *setter; /* instruction setting it; NULL if none */
76    int pfpu_reg; /* underlying PFPU register */
77    int refs; /* usage count */
78};
79
80
81struct pfpu_reg {
82    struct list more; /* list of unallocated PFPU registers */
83    int vm_reg; /* corresponding FPVM register if allocated */
84    int used; /* used somewhere in the program */
85};
86
87
88static struct sched_ctx {
89    struct fpvm_fragment *frag;
90    struct insn insns[FPVM_MAXCODELEN];
91    struct vm_reg *regs; /* dynamically allocated */
92    struct pfpu_reg pfpu_regs[PFPU_REG_COUNT];
93    struct list unallocated; /* unallocated registers */
94    struct list unscheduled; /* unscheduled insns */
95    struct list waiting; /* insns waiting to be scheduled */
96    struct list ready[PFPU_PROGSIZE]; /* insns ready at nth cycle */
97#ifdef REG_STATS
98    int max_regs, curr_regs; /* allocation statistics */
99#endif
100} *sc;
101
102
103/* ----- Register initialization ------------------------------------------- */
104
105
106/*
107 * Straight from gfpus.c, only with some whitespace changes.
108 */
109
110static void get_registers(struct fpvm_fragment *fragment,
111    unsigned int *registers)
112{
113    int i;
114    union {
115        float f;
116        unsigned int n;
117    } fconv;
118
119    for(i = 0; i < fragment->nbindings; i++)
120               if (fragment->bindings[i].isvar)
121            registers[i] = 0;
122        else {
123            fconv.f = fragment->bindings[i].b.c;
124            registers[i] = fconv.n;
125        }
126    for(; i < PFPU_REG_COUNT; i++)
127        registers[i] = 0;
128}
129
130
131/* ----- Doubly-linked list ------------------------------------------------ */
132
133
134/*
135 * Use the naming conventions of include/linux/list.h
136 */
137
138static void list_init(struct list *list)
139{
140    list->next = list->prev = list;
141}
142
143
144static void list_del(struct list *item)
145{
146    item->prev->next = item->next;
147    item->next->prev = item->prev;
148}
149
150
151static void *list_pop(struct list *list)
152{
153    struct list *first;
154
155    first = list->next;
156    if (first == list)
157        return NULL;
158    list_del(first);
159    return first;
160}
161
162
163static void list_add_tail(struct list *list, struct list *item)
164{
165    item->next = list;
166    item->prev = list->prev;
167    list->prev->next = item;
168    list->prev = item;
169}
170
171
172static void list_add(struct list *list, struct list *item)
173{
174    item->next = list->next;
175    item->prev = list;
176    list->next->prev = item;
177    list->next = item;
178}
179
180
181static void list_concat(struct list *a, struct list *b)
182{
183    if (b->next != b) {
184        a->prev->next = b->next;
185        b->next->prev = a->prev;
186        b->prev->next = a;
187        a->prev = b->prev;
188    }
189    list_init(b);
190}
191
192
193/*
194 * Do not delete elements from the list while traversing it with foreach !
195 */
196
197#define foreach(var, head) \
198    for (var = (void *) ((struct list *) (head))->next; \
199        (var) != (void *) (head); \
200        var = (void *) ((struct list *) (var))->next)
201
202
203/* ----- Register management ----------------------------------------------- */
204
205
206static int reg2idx(int reg)
207{
208    return reg >= 0 ? reg : sc->frag->nbindings-reg;
209}
210
211
212static int alloc_reg(struct insn *setter)
213{
214    struct pfpu_reg *reg;
215    int vm_reg, pfpu_reg, vm_idx;
216
217    vm_reg = setter->vm_insn->dest;
218    if (vm_reg >= 0)
219        return vm_reg;
220    reg = list_pop(&sc->unallocated);
221    if (!reg)
222        abort();
223
224#ifdef REG_STATS
225    sc->curr_regs++;
226    if (sc->curr_regs > sc->max_regs)
227        sc->max_regs = sc->curr_regs;
228#endif
229
230    reg->vm_reg = vm_reg;
231    pfpu_reg = reg-sc->pfpu_regs;
232
233    Dprintf(" alloc reg %d -> %d\n", vm_reg, pfpu_reg);
234
235    vm_idx = reg2idx(vm_reg);
236    sc->regs[vm_idx].setter = setter;
237    sc->regs[vm_idx].pfpu_reg = pfpu_reg;
238    sc->regs[vm_idx].refs = setter->num_dependants+1;
239
240    return pfpu_reg;
241}
242
243
244static void put_reg(int vm_reg)
245{
246    int vm_idx;
247
248    if (vm_reg >= 0)
249        return;
250
251    vm_idx = reg2idx(vm_reg);
252    if (--sc->regs[vm_idx].refs)
253        return;
254
255    Dprintf(" free reg %d\n", regs[vm_idx].pfpu_reg);
256
257#ifdef REG_STATS
258    sc->curr_regs--;
259#endif
260
261    /*
262     * Prepend so that register numbers stay small and bugs reveal
263     * themselves more rapidly.
264     */
265    list_add(&sc->unallocated,
266        &sc->pfpu_regs[sc->regs[vm_idx].pfpu_reg].more);
267
268    /* clear it for style only */
269    sc->regs[vm_idx].setter = NULL;
270    sc->regs[vm_idx].pfpu_reg = 0;
271}
272
273
274static void put_reg_by_setter(struct insn *setter)
275{
276    if (setter)
277        put_reg(setter->vm_insn->dest);
278}
279
280
281static int lookup_pfpu_reg(int vm_reg)
282{
283    return vm_reg >= 0 ? vm_reg : sc->regs[reg2idx(vm_reg)].pfpu_reg;
284}
285
286
287static void mark(int vm_reg)
288{
289    if (vm_reg > 0)
290        sc->pfpu_regs[vm_reg].used = 1;
291}
292
293
294static void init_registers(struct fpvm_fragment *frag,
295    unsigned int *registers)
296{
297    size_t regs_size;
298    int i;
299
300    get_registers(frag, registers);
301
302    regs_size = sizeof(struct vm_reg)*(frag->nbindings-frag->next_sur);
303    sc->regs = malloc(regs_size);
304    memset(sc->regs, 0, regs_size);
305
306    for (i = 0; i != frag->ninstructions; i++) {
307        mark(frag->code[i].opa);
308        mark(frag->code[i].opb);
309        mark(frag->code[i].dest);
310    }
311
312    list_init(&sc->unallocated);
313    for (i = PFPU_SPREG_COUNT; i != PFPU_REG_COUNT; i++)
314        if (!sc->pfpu_regs[i].used)
315            list_add_tail(&sc->unallocated, &sc->pfpu_regs[i].more);
316}
317
318
319/* ----- Instruction scheduler --------------------------------------------- */
320
321
322static struct vm_reg *add_data_ref(struct insn *insn, struct data_ref *ref,
323    int reg_num)
324{
325    struct vm_reg *reg;
326
327    reg = sc->regs+reg2idx(reg_num);
328    ref->insn = insn;
329    ref->dep = reg->setter;
330    if (ref->dep) {
331        list_add_tail(&ref->dep->dependants, &ref->more);
332        ref->dep->num_dependants++;
333        insn->unresolved++;
334
335        Dprintf("insn %lu: reg %d setter %lu unresolved %d\n",
336            insn-sc->insns, reg_num, reg->setter-sc->insns,
337            insn->unresolved);
338    } else {
339        list_init(&ref->more);
340    }
341    return reg;
342}
343
344
345static void init_scheduler(struct fpvm_fragment *frag)
346{
347    int i;
348    struct insn *insn;
349
350    list_init(&sc->unscheduled);
351    list_init(&sc->waiting);
352    for (i = 0; i != PFPU_PROGSIZE; i++)
353        list_init(&sc->ready[i]);
354
355    for (i = 0; i != frag->ninstructions; i++) {
356        insn = sc->insns+i;
357        insn->vm_insn = frag->code+i;
358        insn->arity = fpvm_get_arity(frag->code[i].opcode);
359        insn->latency = pfpu_get_latency(frag->code[i].opcode);
360        list_init(&insn->dependants);
361        switch (insn->arity) {
362        case 3:
363            add_data_ref(insn, &insn->cond, FPVM_REG_IFB);
364            /* fall through */
365        case 2:
366            add_data_ref(insn, &insn->opb, frag->code[i].opb);
367            /* fall through */
368        case 1:
369            add_data_ref(insn, &insn->opa, frag->code[i].opa);
370            /* fall through */
371        case 0:
372            add_data_ref(insn,
373                &insn->dest, frag->code[i].dest)->setter = insn;
374            break;
375        default:
376            abort();
377        }
378        if (insn->unresolved)
379            list_add_tail(&sc->unscheduled, &insn->more);
380        else
381            list_add_tail(&sc->ready[0], &insn->more);
382    }
383
384#ifdef LCPF
385    struct data_ref *dep;
386
387    for (i = frag->ninstructions-1; i >= 0; i--) {
388        insn = sc->insns+i;
389        foreach (dep, &insn->dependants)
390            if (dep->insn->distance > insn->distance)
391                insn->distance = dep->insn->distance;
392        insn->distance += insn->latency;
393    }
394#endif
395}
396
397
398static void issue(struct insn *insn, int cycle, unsigned *code)
399{
400    struct data_ref *ref;
401    int end;
402    end = cycle+insn->latency;
403
404    Dprintf("cycle %d: insn %lu L %d (A %d B %d)\n", cycle,
405        insn-sc->insns, insn->latency, insn->vm_insn->opa,
406        insn->vm_insn->opb);
407
408    switch (insn->arity) {
409    case 3:
410        /* fall through */
411    case 2:
412        FIELD(code[cycle]).opb = lookup_pfpu_reg(insn->vm_insn->opb);
413        put_reg_by_setter(insn->opb.dep);
414        /* fall through */
415    case 1:
416        FIELD(code[cycle]).opa = lookup_pfpu_reg(insn->vm_insn->opa);
417        put_reg_by_setter(insn->opa.dep);
418        break;
419    case 0:
420        break;
421    default:
422        abort();
423    }
424
425    FIELD(code[end]).dest = alloc_reg(insn);
426    FIELD(code[cycle]).opcode = fpvm_to_pfpu(insn->vm_insn->opcode);
427
428    foreach (ref, &insn->dependants) {
429        if (ref->insn->earliest <= end)
430            ref->insn->earliest = end+1;
431        if (!--ref->insn->unresolved) {
432            Dprintf(" unlocked %lu -> %u\n", ref->insn-insns,
433                ref->insn->earliest);
434            list_del(&ref->insn->more);
435            list_add_tail(sc->ready+ref->insn->earliest,
436                &ref->insn->more);
437        }
438    }
439}
440
441
442#ifdef DEBUG
443static int count(const struct list *list)
444{
445    int n = 0;
446    const struct list *p;
447
448    for (p = list->next; p != list; p = p->next)
449        n++;
450    return n;
451}
452#endif
453
454
455static int schedule(unsigned int *code)
456{
457    int remaining;
458    int i, last, end;
459    struct insn *insn;
460#ifdef LCPF
461    struct insn *best;
462#endif
463
464    remaining = sc->frag->ninstructions;
465    for (i = 0; remaining; i++) {
466        if (i == PFPU_PROGSIZE)
467            return -1;
468
469        Dprintf("@%d --- remaining %d, waiting %d + ready %d\n",
470            i, remaining, count(&sc->waiting), count(&sc->ready[i]));
471
472        list_concat(&sc->waiting, &sc->ready[i]);
473#ifdef LCPF
474        best = NULL;
475#endif
476        foreach (insn, &sc->waiting) {
477            end = i+insn->latency;
478            if (end >= PFPU_PROGSIZE)
479                return -1;
480            if (!FIELD(code[end]).dest) {
481#ifdef LCPF
482                if (!best || best->distance < insn->distance)
483                    best = insn;
484#else
485                issue(insn, i, code);
486                list_del(&insn->more);
487                remaining--;
488                break;
489#endif
490            }
491        }
492#ifdef LCPF
493        if (best) {
494            issue(best, i, code);
495            list_del(&best->more);
496            remaining--;
497        }
498#endif
499        if (FIELD(code[i]).dest)
500            put_reg(sc->pfpu_regs[FIELD(code[i]).dest].vm_reg);
501    }
502
503    /*
504     * Add NOPs to cover unfinished instructions.
505     */
506    last = i;
507    end = i+MAX_LATENCY;
508    if (end > PFPU_PROGSIZE)
509        end = PFPU_PROGSIZE;
510    while (i != end) {
511        if (FIELD(code[i]).dest)
512            last = i+1; /* @@@ ? */
513        i++;
514    }
515    return last;
516}
517
518
519static void init_scheduler_context(struct fpvm_fragment *frag,
520    unsigned int *reg)
521{
522    sc = malloc(sizeof(*sc));
523    memset(sc, 0, sizeof(*sc));
524
525    sc->frag = frag;
526
527    init_registers(frag, reg);
528    init_scheduler(frag);
529}
530
531
532int gfpus_schedule(struct fpvm_fragment *frag, unsigned int *code,
533    unsigned int *reg)
534{
535    pfpu_instruction vecout;
536    int res;
537
538    init_scheduler_context(frag, reg);
539    memset(code, 0, PFPU_PROGSIZE*sizeof(*code));
540    res = schedule(code);
541
542#ifdef REG_STATS
543    printf("regs: %d/%d\n", sc->curr_regs, sc->max_regs);
544#endif
545
546    free(sc->regs);
547    free(sc);
548    if (res < 0)
549        return res;
550    if (frag->vector_mode)
551        return res;
552    if (res == PFPU_PROGSIZE)
553        return -1;
554
555    vecout.w = 0;
556    vecout.i.opcode = FPVM_OPCODE_VECTOUT;
557    code[res] = vecout.w;
558
559    return res+1;
560}
561

Archive Download this file

Branches:
master



interactive