Date:2011-09-18 11:12:00 (7 years 9 months ago)
Author:Werner Almesberger
Commit:b4abaffa5cd51c4b0d73c2c79a0095330dfa7b4e
Message:m1/perf/: allocate scheduler context (about 0.5 MB) dynamically

Files: m1/perf/TODO (2 diffs)
m1/perf/sched.c (14 diffs)

Change Details

m1/perf/TODO
1Done:
2- dynamically allocate scheduler context
3
4Pending:
15- see if preferring critical path can improve code efficiency
26- test IF
37- run result comparison against full set of patches
...... 
610- see what optimization changes (may interfere with profiling)
711- build into Flickernoise (some things may need adapting, e.g., abort())
812- review code, see if things can be simplified
13- see if valgrind can do something useful
m1/perf/sched.c
4848};
4949
5050
51static struct insn {
51struct insn {
5252    struct list more; /* more insns on same schedule */
5353    struct fpvm_instruction *vm_insn;
5454    struct data_ref {
...... 
6262    int earliest; /* earliest cycle dependencies seen so far are met */
6363    struct list dependants; /* list of dependencies (constant) */
6464    int num_dependants; /* number of unresolved dependencies */
65} insns[FPVM_MAXCODELEN];
65};
66
67
68struct vm_reg {
69    struct insn *setter; /* instruction setting it; NULL if none */
70    int pfpu_reg; /* underlying PFPU register */
71    int refs; /* usage count */
72};
73
74
75struct pfpu_reg {
76    struct list more; /* list of unallocated PFPU registers */
77    int vm_reg; /* corresponding FPVM register if allocated */
78    int used; /* used somewhere in the program */
79};
80
81
82static struct sched_ctx {
83    struct fpvm_fragment *frag;
84    struct insn insns[FPVM_MAXCODELEN];
85    struct vm_reg *regs; /* dynamically allocated */
86    struct pfpu_reg pfpu_regs[PFPU_REG_COUNT];
87    struct list unallocated; /* unallocated registers */
88    struct list unscheduled; /* unscheduled insns */
89    struct list waiting; /* insns waiting to be scheduled */
90    struct list ready[PFPU_PROGSIZE]; /* insns ready at nth cycle */
91} *sc;
6692
6793
6894/* ----- Register initialization ------------------------------------------- */
...... 
168194/* ----- Register management ----------------------------------------------- */
169195
170196
171static struct vm_reg {
172    struct insn *setter; /* instruction setting it; NULL if none */
173    int pfpu_reg; /* underlying PFPU register */
174    int refs; /* usage count */
175} *regs;
176
177static struct pfpu_reg {
178    struct list more; /* list of unallocated PFPU registers */
179    int vm_reg; /* corresponding FPVM register if allocated */
180    int used; /* used somewhere in the program */
181} pfpu_regs[PFPU_REG_COUNT];
182
183static struct list unallocated; /* unallocated registers */
184static int nbindings; /* "public" bindings */
185
186
187197static int reg2idx(int reg)
188198{
189    return reg >= 0 ? reg : nbindings-reg;
199    return reg >= 0 ? reg : sc->frag->nbindings-reg;
190200}
191201
192202
...... 
198208    vm_reg = setter->vm_insn->dest;
199209    if (vm_reg >= 0)
200210        return vm_reg;
201    reg = list_pop(&unallocated);
211    reg = list_pop(&sc->unallocated);
202212    if (!reg)
203213        abort();
204214    reg->vm_reg = vm_reg;
205    pfpu_reg = reg-pfpu_regs;
215    pfpu_reg = reg-sc->pfpu_regs;
206216
207217    Dprintf(" alloc reg %d -> %d\n", vm_reg, pfpu_reg);
208218
209219    vm_idx = reg2idx(vm_reg);
210    regs[vm_idx].setter = setter;
211    regs[vm_idx].pfpu_reg = pfpu_reg;
212    regs[vm_idx].refs = setter->num_dependants+1;
220    sc->regs[vm_idx].setter = setter;
221    sc->regs[vm_idx].pfpu_reg = pfpu_reg;
222    sc->regs[vm_idx].refs = setter->num_dependants+1;
223
213224    return pfpu_reg;
214225}
215226
...... 
222233        return;
223234
224235    vm_idx = reg2idx(vm_reg);
225    if (--regs[vm_idx].refs)
236    if (--sc->regs[vm_idx].refs)
226237        return;
227238
228Dprintf(" free reg %d\n", regs[vm_idx].pfpu_reg);
239    Dprintf(" free reg %d\n", regs[vm_idx].pfpu_reg);
240
229241    /*
230242     * Prepend so that register numbers stay small and bugs reveal
231243     * themselves more rapidly.
232244     */
233    list_add(&unallocated, &pfpu_regs[regs[vm_idx].pfpu_reg].more);
245    list_add(&sc->unallocated,
246        &sc->pfpu_regs[sc->regs[vm_idx].pfpu_reg].more);
234247
235248    /* clear it for style only */
236    regs[vm_idx].setter = NULL;
237    regs[vm_idx].pfpu_reg = 0;
249    sc->regs[vm_idx].setter = NULL;
250    sc->regs[vm_idx].pfpu_reg = 0;
238251}
239252
240253
...... 
247260
248261static int lookup_pfpu_reg(int vm_reg)
249262{
250    return vm_reg >= 0 ? vm_reg : regs[reg2idx(vm_reg)].pfpu_reg;
263    return vm_reg >= 0 ? vm_reg : sc->regs[reg2idx(vm_reg)].pfpu_reg;
251264}
252265
253266
254267static void mark(int vm_reg)
255268{
256269    if (vm_reg > 0)
257        pfpu_regs[vm_reg].used = 1;
270        sc->pfpu_regs[vm_reg].used = 1;
258271}
259272
260273
261static void init_registers(struct fpvm_fragment *fragment,
274static void init_registers(struct fpvm_fragment *frag,
262275    unsigned int *registers)
263276{
264277    size_t regs_size;
265278    int i;
266279
267    get_registers(fragment, registers);
268    nbindings = fragment->nbindings;
280    get_registers(frag, registers);
269281
270    regs_size = sizeof(struct vm_reg)*(nbindings-fragment->next_sur);
271    regs = malloc(regs_size);
272    memset(regs, 0, regs_size);
282    regs_size = sizeof(struct vm_reg)*(frag->nbindings-frag->next_sur);
283    sc->regs = malloc(regs_size);
284    memset(sc->regs, 0, regs_size);
273285
274    memset(pfpu_regs, 0, sizeof(pfpu_regs));
275    for (i = 0; i != fragment->ninstructions; i++) {
276        mark(fragment->code[i].opa);
277        mark(fragment->code[i].opb);
278        mark(fragment->code[i].dest);
286    for (i = 0; i != frag->ninstructions; i++) {
287        mark(frag->code[i].opa);
288        mark(frag->code[i].opb);
289        mark(frag->code[i].dest);
279290    }
280291
281    list_init(&unallocated);
292    list_init(&sc->unallocated);
282293    for (i = PFPU_SPREG_COUNT; i != PFPU_REG_COUNT; i++)
283        if (!pfpu_regs[i].used)
284            list_add_tail(&unallocated, &pfpu_regs[i].more);
294        if (!sc->pfpu_regs[i].used)
295            list_add_tail(&sc->unallocated, &sc->pfpu_regs[i].more);
285296}
286297
287298
288299/* ----- Instruction scheduler --------------------------------------------- */
289300
290301
291static struct list unscheduled; /* unscheduled insns */
292static struct list waiting; /* insns waiting to be scheduled */
293static struct list ready[PFPU_PROGSIZE]; /* insns ready at nth cycle */
294
295
296302static struct vm_reg *add_data_ref(struct insn *insn, struct data_ref *ref,
297303    int reg_num)
298304{
299305    struct vm_reg *reg;
300306
301    reg = regs+reg2idx(reg_num);
307    reg = sc->regs+reg2idx(reg_num);
302308    ref->insn = insn;
303309    ref->dep = reg->setter;
304310    if (ref->dep) {
...... 
307313        insn->unresolved++;
308314
309315        Dprintf("insn %lu: reg %d setter %lu unresolved %d\n",
310            insn-insns, reg_num, reg->setter-insns, insn->unresolved);
316            insn-sc->insns, reg_num, reg->setter-sc->insns,
317            insn->unresolved);
311318    } else {
312319        list_init(&ref->more);
313320    }
...... 
320327    int i;
321328    struct insn *insn;
322329
323    list_init(&unscheduled);
324    list_init(&waiting);
330    list_init(&sc->unscheduled);
331    list_init(&sc->waiting);
325332    for (i = 0; i != PFPU_PROGSIZE; i++)
326        list_init(&ready[i]);
333        list_init(&sc->ready[i]);
327334
328335    for (i = 0; i != frag->ninstructions; i++) {
329        insn = insns+i;
330        memset(insn, 0, sizeof(struct insn));
336        insn = sc->insns+i;
331337        insn->vm_insn = frag->code+i;
332338        insn->arity = fpvm_get_arity(frag->code[i].opcode);
333339        insn->latency = pfpu_get_latency(frag->code[i].opcode);
...... 
350356            abort();
351357        }
352358        if (insn->unresolved)
353            list_add_tail(&unscheduled, &insn->more);
359            list_add_tail(&sc->unscheduled, &insn->more);
354360        else
355            list_add_tail(&ready[0], &insn->more);
361            list_add_tail(&sc->ready[0], &insn->more);
356362    }
357363}
358364
...... 
364370    end = cycle+insn->latency;
365371
366372    Dprintf("cycle %d: insn %lu L %d (A %d B %d)\n", cycle,
367        insn-insns, insn->latency, insn->vm_insn->opa, insn->vm_insn->opb);
373        insn-sc->insns, insn->latency, insn->vm_insn->opa,
374        insn->vm_insn->opb);
368375
369376    switch (insn->arity) {
370377    case 3:
...... 
393400            Dprintf(" unlocked %lu -> %u\n", ref->insn-insns,
394401                ref->insn->earliest);
395402            list_del(&ref->insn->more);
396            list_add_tail(ready+ref->insn->earliest,
403            list_add_tail(sc->ready+ref->insn->earliest,
397404                &ref->insn->more);
398405        }
399406    }
...... 
413420#endif
414421
415422
416static int schedule(struct fpvm_fragment *frag, unsigned int *code)
423static int schedule(unsigned int *code)
417424{
418425    int remaining;
419426    int i, last, end;
420427    struct insn *insn;
421428
422    remaining = frag->ninstructions;
429    remaining = sc->frag->ninstructions;
423430    for (i = 0; remaining; i++) {
424431        if (i == PFPU_PROGSIZE)
425432            return -1;
426433
427434        Dprintf("@%d --- remaining %d, waiting %d + ready %d\n",
428            i, remaining, count(&waiting), count(&ready[i]));
435            i, remaining, count(&sc->waiting), count(&sc->ready[i]));
429436
430        list_concat(&waiting, &ready[i]);
431        foreach (insn, &waiting) {
437        list_concat(&sc->waiting, &sc->ready[i]);
438        foreach (insn, &sc->waiting) {
432439            end = i+insn->latency;
433440            if (end >= PFPU_PROGSIZE)
434441                return -1;
...... 
440447            }
441448        }
442449        if (FIELD(code[i]).dest)
443            put_reg(pfpu_regs[FIELD(code[i]).dest].vm_reg);
450            put_reg(sc->pfpu_regs[FIELD(code[i]).dest].vm_reg);
444451    }
445452
446453    /*
...... 
459466}
460467
461468
469static void init_scheduler_context(struct fpvm_fragment *frag,
470    unsigned int *reg)
471{
472    sc = malloc(sizeof(*sc));
473    memset(sc, 0, sizeof(*sc));
474
475    sc->frag = frag;
476
477    init_registers(frag, reg);
478    init_scheduler(frag);
479}
480
481
462482int gfpus_schedule(struct fpvm_fragment *frag, unsigned int *code,
463483    unsigned int *reg)
464484{
465485    pfpu_instruction vecout;
466486    int res;
467487
468    init_registers(frag, reg);
488    init_scheduler_context(frag, reg);
469489    memset(code, 0, PFPU_PROGSIZE*sizeof(*code));
470    init_scheduler(frag);
471    res = schedule(frag, code);
472    free(regs);
490    res = schedule(code);
491    free(sc->regs);
473492    if (res < 0)
474493        return res;
475494    if (frag->vector_mode)

Archive Download the corresponding diff file

Branches:
master



interactive