Root/m1/perf/sched.c

Source at commit abd7de72651ce9ef077215cfeec797e0ec89e377 created 8 years 6 months ago.
By Werner Almesberger, m1/perf/: added instrumentation for monitoring register allocation
1/*
2 * sched.c - O(n) ... O(n^2) scheduler
3 *
4 * Written 2011 by Werner Almesberger
5 *
6 * Based on gfpus.c
7 * Copyright (C) 2007, 2008, 2009, 2010 Sebastien Bourdeauducq
8 *
9 * This program is free software: you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation, version 3 of the License.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program. If not, see <http://www.gnu.org/licenses/>.
20 */
21
22#include <stdlib.h>
23#include <stdio.h>
24#include <string.h>
25
26#include <fpvm/is.h>
27#include <fpvm/fpvm.h>
28#include <fpvm/pfpu.h>
29#include <fpvm/gfpus.h>
30
31#include <hw/pfpu.h>
32
33
34#define REG_STATS
35
36#ifdef DEBUG
37#define Dprintf printf
38#else
39#define Dprintf(...)
40#endif
41
42
43#define MAX_LATENCY 8 /* maximum latency; okay to make this bigger */
44
45#define FIELD(w) (((pfpu_instruction *) &(w))->i)
46
47
48struct list {
49    struct list *next, *prev;
50};
51
52
53struct insn {
54    struct list more; /* more insns on same schedule */
55    struct fpvm_instruction *vm_insn;
56    struct data_ref {
57        struct list more; /* more refs sharing the data */
58        struct insn *insn; /* insn this is part of */
59        struct insn *dep; /* insn we depend on */
60    } opa, opb, dest, cond;
61    int arity;
62    int latency;
63    int unresolved; /* number of data refs we need before we can sched */
64    int earliest; /* earliest cycle dependencies seen so far are met */
65    struct list dependants; /* list of dependencies (constant) */
66    int num_dependants; /* number of unresolved dependencies */
67};
68
69
70struct vm_reg {
71    struct insn *setter; /* instruction setting it; NULL if none */
72    int pfpu_reg; /* underlying PFPU register */
73    int refs; /* usage count */
74};
75
76
77struct pfpu_reg {
78    struct list more; /* list of unallocated PFPU registers */
79    int vm_reg; /* corresponding FPVM register if allocated */
80    int used; /* used somewhere in the program */
81};
82
83
84static struct sched_ctx {
85    struct fpvm_fragment *frag;
86    struct insn insns[FPVM_MAXCODELEN];
87    struct vm_reg *regs; /* dynamically allocated */
88    struct pfpu_reg pfpu_regs[PFPU_REG_COUNT];
89    struct list unallocated; /* unallocated registers */
90    struct list unscheduled; /* unscheduled insns */
91    struct list waiting; /* insns waiting to be scheduled */
92    struct list ready[PFPU_PROGSIZE]; /* insns ready at nth cycle */
93#ifdef REG_STATS
94    int max_regs, curr_regs; /* allocation statistics */
95#endif
96} *sc;
97
98
99/* ----- Register initialization ------------------------------------------- */
100
101
102/*
103 * Straight from gfpus.c, only with some whitespace changes.
104 */
105
106static void get_registers(struct fpvm_fragment *fragment,
107    unsigned int *registers)
108{
109    int i;
110    union {
111        float f;
112        unsigned int n;
113    } fconv;
114
115    for(i = 0; i < fragment->nbindings; i++)
116               if (fragment->bindings[i].isvar)
117            registers[i] = 0;
118        else {
119            fconv.f = fragment->bindings[i].b.c;
120            registers[i] = fconv.n;
121        }
122    for(; i < PFPU_REG_COUNT; i++)
123        registers[i] = 0;
124}
125
126
127/* ----- Doubly-linked list ------------------------------------------------ */
128
129
130/*
131 * Use the naming conventions of include/linux/list.h
132 */
133
134static void list_init(struct list *list)
135{
136    list->next = list->prev = list;
137}
138
139
140static void list_del(struct list *item)
141{
142    item->prev->next = item->next;
143    item->next->prev = item->prev;
144}
145
146
147static void *list_pop(struct list *list)
148{
149    struct list *first;
150
151    first = list->next;
152    if (first == list)
153        return NULL;
154    list_del(first);
155    return first;
156}
157
158
159static void list_add_tail(struct list *list, struct list *item)
160{
161    item->next = list;
162    item->prev = list->prev;
163    list->prev->next = item;
164    list->prev = item;
165}
166
167
168static void list_add(struct list *list, struct list *item)
169{
170    item->next = list->next;
171    item->prev = list;
172    list->next->prev = item;
173    list->next = item;
174}
175
176
177static void list_concat(struct list *a, struct list *b)
178{
179    if (b->next != b) {
180        a->prev->next = b->next;
181        b->next->prev = a->prev;
182        b->prev->next = a;
183        a->prev = b->prev;
184    }
185    list_init(b);
186}
187
188
189/*
190 * Do not delete elements from the list while traversing it with foreach !
191 */
192
193#define foreach(var, head) \
194    for (var = (void *) ((struct list *) (head))->next; \
195        (var) != (void *) (head); \
196        var = (void *) ((struct list *) (var))->next)
197
198
199/* ----- Register management ----------------------------------------------- */
200
201
202static int reg2idx(int reg)
203{
204    return reg >= 0 ? reg : sc->frag->nbindings-reg;
205}
206
207
208static int alloc_reg(struct insn *setter)
209{
210    struct pfpu_reg *reg;
211    int vm_reg, pfpu_reg, vm_idx;
212
213    vm_reg = setter->vm_insn->dest;
214    if (vm_reg >= 0)
215        return vm_reg;
216    reg = list_pop(&sc->unallocated);
217    if (!reg)
218        abort();
219
220#ifdef REG_STATS
221    sc->curr_regs++;
222    if (sc->curr_regs > sc->max_regs)
223        sc->max_regs = sc->curr_regs;
224#endif
225
226    reg->vm_reg = vm_reg;
227    pfpu_reg = reg-sc->pfpu_regs;
228
229    Dprintf(" alloc reg %d -> %d\n", vm_reg, pfpu_reg);
230
231    vm_idx = reg2idx(vm_reg);
232    sc->regs[vm_idx].setter = setter;
233    sc->regs[vm_idx].pfpu_reg = pfpu_reg;
234    sc->regs[vm_idx].refs = setter->num_dependants+1;
235
236    return pfpu_reg;
237}
238
239
240static void put_reg(int vm_reg)
241{
242    int vm_idx;
243
244    if (vm_reg >= 0)
245        return;
246
247    vm_idx = reg2idx(vm_reg);
248    if (--sc->regs[vm_idx].refs)
249        return;
250
251    Dprintf(" free reg %d\n", regs[vm_idx].pfpu_reg);
252
253#ifdef REG_STATS
254    sc->curr_regs--;
255#endif
256
257    /*
258     * Prepend so that register numbers stay small and bugs reveal
259     * themselves more rapidly.
260     */
261    list_add(&sc->unallocated,
262        &sc->pfpu_regs[sc->regs[vm_idx].pfpu_reg].more);
263
264    /* clear it for style only */
265    sc->regs[vm_idx].setter = NULL;
266    sc->regs[vm_idx].pfpu_reg = 0;
267}
268
269
270static void put_reg_by_setter(struct insn *setter)
271{
272    if (setter)
273        put_reg(setter->vm_insn->dest);
274}
275
276
277static int lookup_pfpu_reg(int vm_reg)
278{
279    return vm_reg >= 0 ? vm_reg : sc->regs[reg2idx(vm_reg)].pfpu_reg;
280}
281
282
283static void mark(int vm_reg)
284{
285    if (vm_reg > 0)
286        sc->pfpu_regs[vm_reg].used = 1;
287}
288
289
290static void init_registers(struct fpvm_fragment *frag,
291    unsigned int *registers)
292{
293    size_t regs_size;
294    int i;
295
296    get_registers(frag, registers);
297
298    regs_size = sizeof(struct vm_reg)*(frag->nbindings-frag->next_sur);
299    sc->regs = malloc(regs_size);
300    memset(sc->regs, 0, regs_size);
301
302    for (i = 0; i != frag->ninstructions; i++) {
303        mark(frag->code[i].opa);
304        mark(frag->code[i].opb);
305        mark(frag->code[i].dest);
306    }
307
308    list_init(&sc->unallocated);
309    for (i = PFPU_SPREG_COUNT; i != PFPU_REG_COUNT; i++)
310        if (!sc->pfpu_regs[i].used)
311            list_add_tail(&sc->unallocated, &sc->pfpu_regs[i].more);
312}
313
314
315/* ----- Instruction scheduler --------------------------------------------- */
316
317
318static struct vm_reg *add_data_ref(struct insn *insn, struct data_ref *ref,
319    int reg_num)
320{
321    struct vm_reg *reg;
322
323    reg = sc->regs+reg2idx(reg_num);
324    ref->insn = insn;
325    ref->dep = reg->setter;
326    if (ref->dep) {
327        list_add_tail(&ref->dep->dependants, &ref->more);
328        ref->dep->num_dependants++;
329        insn->unresolved++;
330
331        Dprintf("insn %lu: reg %d setter %lu unresolved %d\n",
332            insn-sc->insns, reg_num, reg->setter-sc->insns,
333            insn->unresolved);
334    } else {
335        list_init(&ref->more);
336    }
337    return reg;
338}
339
340
341static void init_scheduler(struct fpvm_fragment *frag)
342{
343    int i;
344    struct insn *insn;
345
346    list_init(&sc->unscheduled);
347    list_init(&sc->waiting);
348    for (i = 0; i != PFPU_PROGSIZE; i++)
349        list_init(&sc->ready[i]);
350
351    for (i = 0; i != frag->ninstructions; i++) {
352        insn = sc->insns+i;
353        insn->vm_insn = frag->code+i;
354        insn->arity = fpvm_get_arity(frag->code[i].opcode);
355        insn->latency = pfpu_get_latency(frag->code[i].opcode);
356        list_init(&insn->dependants);
357        switch (insn->arity) {
358        case 3:
359            add_data_ref(insn, &insn->opb, FPVM_REG_IFB);
360            /* fall through */
361        case 2:
362            add_data_ref(insn, &insn->opb, frag->code[i].opb);
363            /* fall through */
364        case 1:
365            add_data_ref(insn, &insn->opa, frag->code[i].opa);
366            /* fall through */
367        case 0:
368            add_data_ref(insn,
369                &insn->dest, frag->code[i].dest)->setter = insn;
370            break;
371        default:
372            abort();
373        }
374        if (insn->unresolved)
375            list_add_tail(&sc->unscheduled, &insn->more);
376        else
377            list_add_tail(&sc->ready[0], &insn->more);
378    }
379}
380
381
382static void issue(struct insn *insn, int cycle, unsigned *code)
383{
384    struct data_ref *ref;
385    int end;
386    end = cycle+insn->latency;
387
388    Dprintf("cycle %d: insn %lu L %d (A %d B %d)\n", cycle,
389        insn-sc->insns, insn->latency, insn->vm_insn->opa,
390        insn->vm_insn->opb);
391
392    switch (insn->arity) {
393    case 3:
394        /* fall through */
395    case 2:
396        FIELD(code[cycle]).opb = lookup_pfpu_reg(insn->vm_insn->opb);
397        put_reg_by_setter(insn->opb.dep);
398        /* fall through */
399    case 1:
400        FIELD(code[cycle]).opa = lookup_pfpu_reg(insn->vm_insn->opa);
401        put_reg_by_setter(insn->opa.dep);
402        break;
403    case 0:
404        break;
405    default:
406        abort();
407    }
408
409    FIELD(code[end]).dest = alloc_reg(insn);
410    FIELD(code[cycle]).opcode = fpvm_to_pfpu(insn->vm_insn->opcode);
411
412    foreach (ref, &insn->dependants) {
413        if (ref->insn->earliest <= end)
414            ref->insn->earliest = end+1;
415        if (!--ref->insn->unresolved) {
416            Dprintf(" unlocked %lu -> %u\n", ref->insn-insns,
417                ref->insn->earliest);
418            list_del(&ref->insn->more);
419            list_add_tail(sc->ready+ref->insn->earliest,
420                &ref->insn->more);
421        }
422    }
423}
424
425
426#ifdef DEBUG
427static int count(const struct list *list)
428{
429    int n = 0;
430    const struct list *p;
431
432    for (p = list->next; p != list; p = p->next)
433        n++;
434    return n;
435}
436#endif
437
438
439static int schedule(unsigned int *code)
440{
441    int remaining;
442    int i, last, end;
443    struct insn *insn;
444
445    remaining = sc->frag->ninstructions;
446    for (i = 0; remaining; i++) {
447        if (i == PFPU_PROGSIZE)
448            return -1;
449
450        Dprintf("@%d --- remaining %d, waiting %d + ready %d\n",
451            i, remaining, count(&sc->waiting), count(&sc->ready[i]));
452
453        list_concat(&sc->waiting, &sc->ready[i]);
454        foreach (insn, &sc->waiting) {
455            end = i+insn->latency;
456            if (end >= PFPU_PROGSIZE)
457                return -1;
458            if (!FIELD(code[end]).dest) {
459                issue(insn, i, code);
460                list_del(&insn->more);
461                remaining--;
462                break;
463            }
464        }
465        if (FIELD(code[i]).dest)
466            put_reg(sc->pfpu_regs[FIELD(code[i]).dest].vm_reg);
467    }
468
469    /*
470     * Add NOPs to cover unfinished instructions.
471     */
472    last = i;
473    end = i+MAX_LATENCY;
474    if (end > PFPU_PROGSIZE)
475        end = PFPU_PROGSIZE;
476    while (i != end) {
477        if (FIELD(code[i]).dest)
478            last = i+1; /* @@@ ? */
479        i++;
480    }
481    return last;
482}
483
484
485static void init_scheduler_context(struct fpvm_fragment *frag,
486    unsigned int *reg)
487{
488    sc = malloc(sizeof(*sc));
489    memset(sc, 0, sizeof(*sc));
490
491    sc->frag = frag;
492
493    init_registers(frag, reg);
494    init_scheduler(frag);
495}
496
497
498int gfpus_schedule(struct fpvm_fragment *frag, unsigned int *code,
499    unsigned int *reg)
500{
501    pfpu_instruction vecout;
502    int res;
503
504    init_scheduler_context(frag, reg);
505    memset(code, 0, PFPU_PROGSIZE*sizeof(*code));
506    res = schedule(code);
507
508#ifdef REG_STATS
509    printf("regs: %d/%d\n", sc->curr_regs, sc->max_regs);
510#endif
511
512    free(sc->regs);
513    free(sc);
514    if (res < 0)
515        return res;
516    if (frag->vector_mode)
517        return res;
518    if (res == PFPU_PROGSIZE)
519        return -1;
520
521    vecout.w = 0;
522    vecout.i.opcode = FPVM_OPCODE_VECTOUT;
523    code[res] = vecout.w;
524
525    return res+1;
526}
527

Archive Download this file

Branches:
master



interactive