/* add an element * return 0 while success, -1 for else */ int kr_hashset_add(T_KRHashSet *krset, void *element) { KRDupFunc dup_func = kr_get_dup_func(krset->type); void *dup_key = dup_func(element); kr_hashtable_insert(krset->set, dup_key, dup_key); return 0; }
/* Generates a function, based on specified parameters */ cuc_func * generate_function (cuc_func * rf, char *name, char *cut_filename) { int b; char tmp[256]; cuc_timings tt; cuc_func *f; assert (f = dup_func (rf)); if (cuc_debug >= 2) print_cuc_bb (f, "BEFORE_GENERATE"); log ("Generating function %s.\n", name); PRINTF ("Generating function %s.\n", name); format_func_options (tmp, rf); if (strlen (tmp)) PRINTF ("Applying options: %s\n", tmp); else PRINTF ("Using basic options.\n"); /* Generate function as specified by options */ for (b = 0; b < f->num_bb; b++) { cuc_timings *st; if (rf->bb[b].selected_tim < 0) continue; st = &rf->bb[b].tim[rf->bb[b].selected_tim]; sprintf (tmp, "%s.bin.bb", name); preunroll_bb (&tmp[0], f, &tt, b, st->preroll, st->unroll); if (cuc_debug >= 1) print_cuc_bb (f, "AFTER_PREUNROLL"); } for (b = 0; b < f->num_bb; b++) { cuc_timings *st; if (rf->bb[b].selected_tim < 0) continue; st = &rf->bb[b].tim[rf->bb[b].selected_tim]; if (!st->nshared) continue; assert (0); //csm_gen (f, rf, st->nshared, st->shared); } add_latches (f); if (cuc_debug >= 1) print_cuc_bb (f, "AFTER_LATCHES"); analyse_timings (f, &tt); sprintf (tmp, "%s%s", cut_filename, name); output_verilog (f, tmp, name); return f; }
/* Analyses function; done when cuc command is entered in (sim) prompt */ cuc_func * analyse_function (char *module_name, long orig_time, unsigned long start_addr, unsigned long end_addr, int memory_order, int num_runs) { cuc_timings timings; cuc_func *func = (cuc_func *) malloc (sizeof (cuc_func)); cuc_func *saved; int b, i, j; char tmp1[256]; char tmp2[256]; func->orig_time = orig_time; func->start_addr = start_addr; func->end_addr = end_addr; func->memory_order = memory_order; func->nfdeps = 0; func->fdeps = NULL; func->num_runs = num_runs; sprintf (tmp1, "%s.bin", module_name); cucdebug (2, "Loading %s.bin\n", module_name); if (cuc_load (tmp1)) { free (func); return NULL; } log ("Detecting basic blocks\n"); detect_bb (func); if (cuc_debug >= 2) print_cuc_insns ("WITH_BB_LIMITS", 0); //sprintf (tmp1, "%s.bin.mp", module_name); sprintf (tmp2, "%s.bin.bb", module_name); generate_bb_seq (func, config.sim.mprof_fn, tmp2); log ("Assuming %i clk cycle load (%i cyc burst)\n", runtime.cuc.mdelay[0], runtime.cuc.mdelay[2]); log ("Assuming %i clk cycle store (%i cyc burst)\n", runtime.cuc.mdelay[1], runtime.cuc.mdelay[3]); build_bb (func); if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_BUILD_BB"); reg_dep (func); log ("Detecting dependencies\n"); if (cuc_debug >= 2) print_cuc_bb (func, "AFTER_REG_DEP"); cuc_optimize (func); #if 0 csm (func); #endif assert (saved = dup_func (func)); timings.preroll = timings.unroll = 1; timings.nshared = 0; add_latches (func); if (cuc_debug >= 1) print_cuc_bb (func, "AFTER_LATCHES"); analyse_timings (func, &timings); free_func (func); log ("Base option: pre%i,un%i,sha%i: %icyc %.1f\n", timings.preroll, timings.unroll, timings.nshared, timings.new_time, timings.size); saved->timings = timings; #if 1 /* detect and unroll simple loops */ for (b = 0; b < saved->num_bb; b++) { cuc_timings t[MAX_UNROLL * MAX_PREROLL]; cuc_timings *ut; cuc_timings *cut = &t[0]; int nt = 1; double csize; saved->bb[b].selected_tim = -1; /* Is it a loop? */ if (saved->bb[b].next[0] != b && saved->bb[b].next[1] != b) continue; log ("Found loop at BB%x. Trying to unroll.\n", b); t[0] = timings; t[0].b = b; t[0].preroll = 1; t[0].unroll = 1; t[0].nshared = 0; sprintf (tmp1, "%s.bin.bb", module_name); i = 1; do { cuc_timings *pt; cuc_timings *cpt = cut; j = 1; do { pt = cpt; cpt = preunroll_bb (tmp1, saved, &t[nt++], b, ++j, i); } while (j <= MAX_PREROLL && pt->new_time > cpt->new_time); i++; ut = cut; cut = preunroll_bb (tmp1, saved, &t[nt++], b, 1, i); } while (i <= MAX_UNROLL && ut->new_time > cut->new_time); /* Sort the timings */ #if 0 if (cuc_debug >= 3) for (i = 0; i < nt; i++) PRINTF ("%i:%i,%i: %icyc\n", t[i].b, t[i].preroll, t[i].unroll, t[i].new_time); #endif #if HAVE___COMPAR_FN_T qsort (t, nt, sizeof (cuc_timings), (__compar_fn_t) tim_comp); #else qsort (t, nt, sizeof (cuc_timings), (int (*) (const void *, const void *)) tim_comp); #endif /* Delete timings, that have worst time and bigger size than other */ j = 1; csize = t[0].size; for (i = 1; i < nt; i++) if (t[i].size < csize) t[j++] = t[i]; nt = j; cucdebug (1, "Available options\n"); for (i = 0; i < nt; i++) cucdebug (1, "%i:%i,%i: %icyc %.1f\n", t[i].b, t[i].preroll, t[i].unroll, t[i].new_time, t[i].size); /* Add results from CSM */ j = nt; for (i = 0; i < saved->bb[b].ntim; i++) { int i1; for (i1 = 0; i1 < nt; i1++) { t[j] = t[i1]; t[j].size += saved->bb[b].tim[i].size - timings.size; t[j].new_time += saved->bb[b].tim[i].new_time - timings.new_time; t[j].nshared = saved->bb[b].tim[i].nshared; t[j].shared = saved->bb[b].tim[i].shared; if (++j >= MAX_UNROLL * MAX_PREROLL) goto full; } } full: nt = j; cucdebug (1, "Available options:\n"); for (i = 0; i < nt; i++) cucdebug (1, "%i:%i,%i: %icyc %.1f\n", t[i].b, t[i].preroll, t[i].unroll, t[i].new_time, t[i].size); /* Sort again with new timings added */ #if HAVE___COMPAR_FN_T qsort (t, nt, sizeof (cuc_timings), (__compar_fn_t) tim_comp); #else qsort (t, nt, sizeof (cuc_timings), (int (*)(const void *, const void *)) tim_comp); #endif /* Delete timings, that have worst time and bigger size than other */ j = 1; csize = t[0].size; for (i = 1; i < nt; i++) if (t[i].size < csize) t[j++] = t[i]; nt = j; cucdebug (1, "Available options:\n"); for (i = 0; i < nt; i++) cucdebug (1, "%i:%i,%i: %icyc %.1f\n", t[i].b, t[i].preroll, t[i].unroll, t[i].new_time, t[i].size); if (saved->bb[b].ntim) free (saved->bb[b].tim); saved->bb[b].ntim = nt; assert (saved->bb[b].tim = (cuc_timings *) malloc (sizeof (cuc_timings) * nt)); /* Copy options in reverse order -- smallest first */ for (i = 0; i < nt; i++) saved->bb[b].tim[i] = t[nt - 1 - i]; log ("Available options:\n"); for (i = 0; i < saved->bb[b].ntim; i++) { log ("%i:pre%i,un%i,sha%i: %icyc %.1f\n", saved->bb[b].tim[i].b, saved->bb[b].tim[i].preroll, saved->bb[b].tim[i].unroll, saved->bb[b].tim[i].nshared, saved->bb[b].tim[i].new_time, saved->bb[b].tim[i].size); } } #endif return saved; }