void restore_automorphisms(FILE *ifp, int **head, int **list, struct pcp_vars *pcp) { int new_index = 0; int offset; register int alpha; int nmr_saved; int list_length; int retain; int nmr_items; nmr_items = fread(&nmr_saved, sizeof(int), 1, ifp); verify_read(nmr_items, 1); nmr_items = fread(&list_length, sizeof(int), 1, ifp); verify_read(nmr_items, 1); *head = allocate_vector(nmr_saved * pcp->m + 1, 0, FALSE); (*head)[0] = nmr_saved; *list = allocate_vector(list_length + 1, 0, FALSE); (*list)[0] = list_length; retain = MIN(pcp->lastg, nmr_saved); for (alpha = 1; alpha <= pcp->m; ++alpha) { offset = (alpha - 1) * retain; restore_auts(ifp, offset, nmr_saved, retain, &new_index, *head, *list); } (*head)[0] = retain; printf("Automorphisms read from file\n"); }
/* =========================================================================== get_spline x , y - spline points xx, yy - output (allocated) spline interpolation =========================================================================== */ void get_spline(int i_x[],int i_y[],int nwhisker_points, int min_x, int max_x, int **yy) { int i, status, x_val; float *x, *y, *y2; float y_val; int start_ind, end_ind; x = allocate_vector( 1, nwhisker_points ); y = allocate_vector( 1, nwhisker_points ); y2 = allocate_vector( 1, nwhisker_points ); for (i = 0; i<nwhisker_points; i++) { x[i+1] = i_x[i] + 0.; y[i+1] = i_y[i] + 0.; } *yy = allocate_ivector( min_x, max_x ); spline( x,y, y2, nwhisker_points ); /* calculate the 2nd derivatives of y at spline points */ for (x_val = min_x;x_val <= max_x; x_val++) { status = splint( x, y, y2, nwhisker_points, (float) x_val, &y_val ); if (status != 1) mexErrMsgTxt("bad spline"); (*yy)[x_val] = round ( y_val ); } free_vector( x, 1,nwhisker_points); free_vector( y, 1,nwhisker_points); free_vector( y2, 1,nwhisker_points); }
void evaluate_formula (int *queue, int *queue_length, struct pcp_vars *pcp) { register int *y = y_address; register int lastg = pcp->lastg; register int i; int nmr_entries; int *weight; int total; int nmr; total = 6 * lastg + 6; if (is_space_exhausted (total, pcp)) return; /* fudge the value of submlg because of possible call to power */ pcp->submlg -= total; read_value (TRUE, "Input number of components of formula: ", &nmr_entries, 1); weight = allocate_vector (nmr_entries, 1, FALSE); first = allocate_vector (nmr_entries + 1, 0, FALSE); last = allocate_vector (nmr_entries + 1, 0, FALSE); list = allocate_vector (nmr_entries + 1, 0, FALSE); printf ("Input weight of each component of formula: "); for (i = 1; i < nmr_entries; ++i) { read_value (FALSE, "", &weight[i], 1); } read_value (TRUE, "", &weight[i], 1); read_value (TRUE, "Input power of individual component: ", &power_of_entry, 1); read_value (TRUE, "Input power of word: ", &exponent, 1); for (i = 1; i <= nmr_entries; ++i) { first[i] = y[pcp->clend + weight[i] - 1] + 1; last[i] = y[pcp->clend + weight[i]]; } /* generate the list of words; evaluate each, echelonise it and build up the queue of redundant generators */ nmr = 0; loop (queue, queue_length, nmr_entries, list, &nmr, first[nmr_entries], last[nmr_entries], pcp); /* reset value of submlg */ pcp->submlg += total; free_vector (weight, 1); free_vector (first, 0); free_vector (last, 0); free_vector (list, 0); }
static struct vector_info reduction_or(struct vector_info cvec) { struct vector_info result; switch (cvec.base) { case 0: result.base = 0; result.wid = 1; break; case 1: result.base = 1; result.wid = 1; break; case 2: case 3: result.base = 0; result.wid = 1; break; default: clr_vector(cvec); result.base = allocate_vector(1); result.wid = 1; fprintf(vvp_out, " %%or/r %u, %u, %u;\n", result.base, cvec.base, cvec.wid); break; } return result; }
/* Difference Variance */ double f10_dvar (double **P, int Ng) { int i, j; double sum = 0, sum_sqr = 0, var = 0; double *Pxpy = allocate_vector (0, 2*Ng); for (i = 0; i <= 2 * Ng; ++i) Pxpy[i] = 0; for (i = 0; i < Ng; ++i) for (j = 0; j < Ng; ++j) Pxpy[abs (i - j)] += P[i][j]; /* Now calculate the variance of Pxpy (Px-y) */ for (i = 0; i < Ng; ++i) { sum += i * Pxpy[i] ; sum_sqr += i * i * Pxpy[i] ; /* M. Boland sum += Pxpy[i]; sum_sqr += Pxpy[i] * Pxpy[i];*/ } /*tmp = Ng * Ng ; M. Boland - wrong anyway, should be Ng */ /*var = ((tmp * sum_sqr) - (sum * sum)) / (tmp * tmp); */ var = sum_sqr - sum*sum ; free (Pxpy); return var; }
static struct vector_info get_vec_from_lval(ivl_statement_t net, struct vec_slice_info*slices) { struct vector_info res; unsigned lidx; unsigned cur_bit; res.wid = ivl_stmt_lwidth(net); res.base = allocate_vector(res.wid); cur_bit = 0; for (lidx = 0 ; lidx < ivl_stmt_lvals(net) ; lidx += 1) { unsigned bidx; ivl_lval_t lval; unsigned bit_limit = res.wid - cur_bit; lval = ivl_stmt_lval(net, lidx); if (bit_limit > ivl_lval_width(lval)) bit_limit = ivl_lval_width(lval); bidx = res.base + cur_bit; get_vec_from_lval_slice(lval, slices+lidx, bidx, bit_limit); cur_bit += bit_limit; } return res; }
/* * This function returns the elements that are have been removed in * compared to previous and update previous. */ struct String_vector* removed_and_set(const struct String_vector* current, struct String_vector** previous) { struct String_vector* diff = malloc(sizeof(struct String_vector)); int count = 0; int i; for(i = 0; i < (* previous)->count; i++) { if (!contains((* previous)->data[i], current)) { count++; } } allocate_vector(diff, count); int prev_count = count; count = 0; for(i = 0; i < (* previous)->count; i++) { if (!contains((* previous)->data[i], current)) { diff->data[count] = malloc(sizeof(char) * strlen((* previous)->data[i])); strcpy(diff->data[count++], (* previous)->data[i]); } } assert(prev_count == count); free_vector((struct String_vector*) *previous); (*previous) = make_copy(current); return diff; }
void bootstrap() { if(!connected) { LOG_WARN(("Client not connected to ZooKeeper")); return; } create_parent("/workers", ""); create_parent("/assign", ""); create_parent("/tasks", ""); create_parent("/status", ""); // Initialize tasks tasks = malloc(sizeof(struct String_vector)); allocate_vector(tasks, 0); workers = malloc(sizeof(struct String_vector)); allocate_vector(workers, 0); }
SCHEME_OBJECT allocate_marked_vector (unsigned int type, unsigned long length, bool gc_check_p) { if (gc_check_p) Primitive_GC_If_Needed (1 + length); return (allocate_vector (type, TC_MANIFEST_VECTOR, length, (&Free))); }
/* Information Measures of Correlation */ double f12_icorr (double **P, int Ng) { int i, j; double *px, *py; double hx = 0, hy = 0, hxy = 0, hxy1 = 0, hxy2 = 0; px = allocate_vector (0, Ng); py = allocate_vector (0, Ng); /* All /log10(2.0) added by M. Boland */ /* * px[i] is the (i-1)th entry in the marginal probability matrix obtained * by summing the rows of p[i][j] */ for (i = 0; i < Ng; ++i) { for (j = 0; j < Ng; ++j) { px[i] += P[i][j]; py[j] += P[i][j]; } } for (i = 0; i < Ng; ++i) for (j = 0; j < Ng; ++j) { hxy1 -= P[i][j] * log10 (px[i] * py[j] + EPSILON)/log10(2.0); hxy2 -= px[i] * py[j] * log10 (px[i] * py[j] + EPSILON)/log10(2.0); hxy -= P[i][j] * log10 (P[i][j] + EPSILON)/log10(2.0); } /* Calculate entropies of px and py - is this right? */ for (i = 0; i < Ng; ++i) { hx -= px[i] * log10 (px[i] + EPSILON)/log10(2.0); hy -= py[i] * log10 (py[i] + EPSILON)/log10(2.0); } free(px); free(py); if ((hx > hy ? hx : hy)==0) return(1); else return ((hxy - hxy1) / (hx > hy ? hx : hy)); }
double f13_icorr (double **P, int Ng) { int i, j; double *px, *py; double hx = 0, hy = 0, hxy = 0, hxy1 = 0, hxy2 = 0; px = allocate_vector (0, Ng); py = allocate_vector (0, Ng); /* All /log10(2.0) added by M. Boland */ /* * px[i] is the (i-1)th entry in the marginal probability matrix obtained * by summing the rows of p[i][j] */ for (i = 0; i < Ng; ++i) { for (j = 0; j < Ng; ++j) { px[i] += P[i][j]; py[j] += P[i][j]; } } for (i = 0; i < Ng; ++i) for (j = 0; j < Ng; ++j) { hxy1 -= P[i][j] * log10 (px[i] * py[j] + EPSILON)/log10(2.0); hxy2 -= px[i] * py[j] * log10 (px[i] * py[j] + EPSILON)/log10(2.0); hxy -= P[i][j] * log10 (P[i][j] + EPSILON)/log10(2.0); } /* Calculate entropies of px and py */ for (i = 0; i < Ng; ++i) { hx -= px[i] * log10 (px[i] + EPSILON)/log10(2.0); hy -= py[i] * log10 (py[i] + EPSILON)/log10(2.0); } free(px); free(py); return (sqrt (fabs (1 - exp (-2.0 * (hxy2 - hxy))))); }
struct vector_info draw_vpi_func_call(ivl_expr_t fnet, unsigned wid) { char call_string[1024]; struct vector_info res; res.base = allocate_vector(wid); res.wid = wid; sprintf(call_string, " %%vpi_func \"%s\", %u, %u", ivl_expr_name(fnet), res.base, res.wid); draw_vpi_taskfunc_args(call_string, 0, fnet); return res; }
struct vector_info draw_ufunc_expr(ivl_expr_t expr, unsigned wid) { unsigned swid = ivl_expr_width(expr); ivl_scope_t def = ivl_expr_def(expr); ivl_signal_t retval = ivl_scope_port(def, 0); struct vector_info res; unsigned load_wid; /* Take in arguments to function and call function code. */ draw_ufunc_preamble(expr); /* Fresh basic block starts after the join. */ clear_expression_lookaside(); /* The return value is in a signal that has the name of the expression. Load that into the thread and return the vector result. */ res.base = allocate_vector(wid); res.wid = wid; if (res.base == 0) { fprintf(stderr, "%s:%u: vvp.tgt error: " "Unable to allocate %u thread bits for function result.\n", ivl_expr_file(expr), ivl_expr_lineno(expr), wid); vvp_errors += 1; return res; } assert(res.base != 0); load_wid = swid; if (load_wid > ivl_signal_width(retval)) load_wid = ivl_signal_width(retval); assert(ivl_signal_dimensions(retval) == 0); fprintf(vvp_out, " %%load/v %u, v%p_0, %u;\n", res.base, retval, load_wid); /* Pad the signal value with zeros. */ if (load_wid < wid) pad_expr_in_place(expr, res, swid); draw_ufunc_epilogue(expr); return res; }
int *bitstring_to_subset(int K, struct pga_vars *pga) { int length = pga->s; /* number of elements of subset */ int *subset; register int i; int mask = 1 << (BITES_IN_INT - 1); subset = allocate_vector(pga->s, 0, 1); for (i = 1; i <= BITES_IN_INT && length > 0; ++i) { if ((K & mask) != 0) { --length; subset[length] = BITES_IN_INT - i; } K <<= 1; } return subset; }
double correlac (double **P, int Ng) { int i, j; double sum_sqrx = 0, sum_sqry = 0, tmp, *px; double meanx =0 , meany = 0 , stddevx, stddevy; px = allocate_vector (0, Ng); for (i = 0; i < Ng; ++i) px[i] = 0; /* * px[i] is the (i-1)th entry in the marginal probability matrix obtained * by summing the rows of p[i][j] */ for (i = 0; i < Ng; ++i) for (j = 0; j < Ng; ++j) px[i] += P[i][j]; /* Now calculate the means and standard deviations of px and py */ /*- fix supplied by J. Michael Christensen, 21 Jun 1991 */ /*- further modified by James Darrell McCauley, 16 Aug 1991 * after realizing that meanx=meany and stddevx=stddevy */ for (i = 0; i < Ng; ++i) { meanx += px[i]*i; sum_sqrx += px[i]*i*i; } /* M. Boland meanx = meanx/(sqrt(Ng)); */ meany = meanx; sum_sqry = sum_sqrx; stddevx = sqrt (sum_sqrx - (meanx * meanx)); stddevy = stddevx; /* Finally, the correlation ... */ for (tmp = 0, i = 0; i < Ng; ++i) for (j = 0; j < Ng; ++j) tmp += i*j*P[i][j]; free(px); if (stddevx * stddevy==0) return(1); /* protect from error */ else return (tmp - meanx * meany) / (stddevx * stddevy); }
/* Difference Entropy */ double f11_dentropy (double **P, int Ng) { int i, j; double sum = 0; double *Pxpy = allocate_vector (0, 2*Ng); for (i = 0; i <= 2 * Ng; ++i) Pxpy[i] = 0; for (i = 0; i < Ng; ++i) for (j = 0; j < Ng; ++j) Pxpy[abs (i - j)] += P[i][j]; for (i = 0; i < Ng; ++i) /* sum += Pxpy[i] * log10 (Pxpy[i] + EPSILON); */ sum += Pxpy[i] * log10 (Pxpy[i] + EPSILON)/log10(2.0) ; free (Pxpy); return -sum; }
/* Sum Entropy */ double f8_sentropy (double **P, int Ng) { int i, j; double sentropy = 0; double *Pxpy = allocate_vector (0, 2*Ng); for (i = 0; i <= 2 * Ng; ++i) Pxpy[i] = 0; for (i = 0; i < Ng; ++i) for (j = 0; j < Ng; ++j) Pxpy[i + j + 2] += P[i][j]; for (i = 2; i <= 2 * Ng; ++i) /* M. Boland sentropy -= Pxpy[i] * log10 (Pxpy[i] + EPSILON); */ sentropy -= Pxpy[i] * log10 (Pxpy[i] + EPSILON)/log10(2.0) ; free (Pxpy); return sentropy; }
double savg (double **P, int Ng) { int i, j; double savg = 0; double *Pxpy = allocate_vector (0, 2*Ng); for (i = 0; i <= 2 * Ng; ++i) Pxpy[i] = 0; for (i = 0; i < Ng; ++i) for (j = 0; j < Ng; ++j) /* M. Boland Pxpy[i + j + 2] += P[i][j]; */ /* Indexing from 2 instead of 0 is inconsistent with rest of code*/ Pxpy[i + j] += P[i][j]; /* M. Boland for (i = 2; i <= 2 * Ng; ++i) */ /* Indexing from 2 instead of 0 is inconsistent with rest of code*/ for (i = 0; i <= (2 * Ng - 2); ++i) savg += i * Pxpy[i]; free (Pxpy); return savg; }
/* Sum Variance */ double f7_svar (double **P, int Ng, double S) { int i, j; double var = 0; double *Pxpy = allocate_vector (0, 2*Ng); for (i = 0; i <= 2 * Ng; ++i) Pxpy[i] = 0; for (i = 0; i < Ng; ++i) for (j = 0; j < Ng; ++j) /* M. Boland Pxpy[i + j + 2] += P[i][j]; */ /* Indexing from 2 instead of 0 is inconsistent with rest of code*/ Pxpy[i + j] += P[i][j]; /* M. Boland for (i = 2; i <= 2 * Ng; ++i) */ /* Indexing from 2 instead of 0 is inconsistent with rest of code*/ for (i = 0; i <= (2 * Ng - 2); ++i) var += (i - S) * (i - S) * Pxpy[i]; free (Pxpy); return var; }
struct vector_info draw_vpi_func_call(ivl_expr_t fnet, unsigned wid) { char call_string[1024]; struct vector_info res; res.base = allocate_vector(wid); res.wid = wid; if (res.base == 0) { fprintf(stderr, "%s:%u: vvp.tgt error: " "Unable to allocate %u thread bits for system function result.\n", ivl_expr_file(fnet), ivl_expr_lineno(fnet), wid); vvp_errors += 1; } sprintf(call_string, " %%vpi_func %u %u \"%s\", %u, %u", ivl_file_table_index(ivl_expr_file(fnet)), ivl_expr_lineno(fnet), ivl_expr_name(fnet), res.base, res.wid); draw_vpi_taskfunc_args(call_string, 0, fnet); return res; }
void stabiliser_option(int option, int ***auts, int **perms, int *a, int *b, char *c, int *orbit_length, struct pga_vars *pga, struct pcp_vars *pcp) { int t; int i; /*Logical soluble_group;*/ FILE *OutputFile; char *StartName; int *rep; int *length; rep = allocate_vector(1, 1, 0); length = allocate_vector(1, 1, 0); t = runTime(); query_solubility(pga); if (pga->soluble) query_space_efficiency(pga); else pga->space_efficient = FALSE; /*soluble_group = (pga->soluble || pga->Degree == 1 || pga->nmr_of_perms == 0);*/ query_terminal(pga); query_exponent_law(pga); query_metabelian_law(pga); query_group_information(pga->p, pga); query_aut_group_information(pga); StartName = GetString("Enter output file name: "); OutputFile = OpenFileOutput(StartName); pga->final_stage = (pga->q == pga->multiplicator_rank); pga->nmr_of_descendants = 0; pga->nmr_of_capables = 0; if (option == STABILISER) { read_value(TRUE, "Input the orbit representative: ", &rep[1], 1); /* find the length of the orbit having this representative */ for (i = 1; i <= pga->nmr_orbits && pga->rep[i] != rep[1]; ++i) ; if (pga->rep[i] == rep[1]) length[1] = orbit_length[i]; else { printf("%d is not an orbit representative\n", rep[1]); return; } } if (option == STABILISER) setup_reps(rep, 1, length, perms, a, b, c, auts, OutputFile, OutputFile, pga, pcp); else setup_reps(pga->rep, pga->nmr_orbits, orbit_length, perms, a, b, c, auts, OutputFile, OutputFile, pga, pcp); /* #if defined (GAP_LINK) if (!soluble_group) QuitGap (); #endif */ RESET(OutputFile); printf("Time to process representative is %.2f seconds\n", (runTime() - t) * CLK_SCALE); }
void interactive_pga(Logical group_present, FILE *StartFile, int group_nmr, int ***auts, struct pga_vars *pga, struct pcp_vars *pcp) { struct pga_vars flag; int option; Logical soluble_group = TRUE; FILE *OutputFile = 0; FILE *LINK_input = 0; char *StartName = 0; int t; int **perms = 0; int index; int **S = 0; int k; int K; int label; int *a = 0, *b = 0; char *c = 0; int *orbit_length = 0; int nmr_of_exponents; int *subset = 0; int alpha; int upper_step; int rep; int i; list_interactive_pga_menu(); do { option = read_option(MAX_INTERACTIVE_OPTION); switch (option) { case -1: list_interactive_pga_menu(); break; case SUPPLY_AUTS: auts = read_auts(PGA, &pga->m, &nmr_of_exponents, pcp); #ifdef HAVE_GMP autgp_order(pga, pcp); #endif pga->soluble = TRUE; start_group(&StartFile, auts, pga, pcp); break; case EXTEND_AUTS: extend_automorphisms(auts, pga->m, pcp); print_auts(pga->m, pcp->lastg, auts, pcp); break; case RESTORE_GP: StartName = GetString("Enter input file name: "); StartFile = OpenFileInput(StartName); if (StartFile != NULL) { read_value(TRUE, "Which group? ", &group_nmr, 0); auts = restore_group(TRUE, StartFile, group_nmr, pga, pcp); RESET(StartFile); } break; case DISPLAY_GP: print_presentation(FALSE, pcp); print_structure(1, pcp->lastg, pcp); print_pcp_relations(pcp); break; case SINGLE_STAGE: t = runTime(); if (group_present && pga->m == 0) start_group(&StartFile, auts, pga, pcp); assert(OutputFile); construct(1, &flag, SINGLE_STAGE, OutputFile, StartFile, 0, ALL, group_nmr, pga, pcp); t = runTime() - t; printf("Time for intermediate stage is %.2f seconds\n", t * CLK_SCALE); break; case DEGREE: read_step_size(pga, pcp); read_subgroup_rank(&k); query_exponent_law(pga); enforce_laws(pga, pga, pcp); extend_automorphisms(auts, pga->m, pcp); step_range(k, &pga->s, &upper_step, auts, pga, pcp); if (pga->s > upper_step) printf("Desired step size is invalid for current group\n"); else { if (pga->s < upper_step) { printf("The permitted relative step sizes range from %d to %d\n", pga->s, upper_step); read_value( TRUE, "Input the chosen relative step size: ", &pga->s, 0); } store_definition_sets(pga->r, pga->s, pga->s, pga); get_definition_sets(pga); pga->print_degree = TRUE; compute_degree(pga); pga->print_degree = FALSE; } break; case PERMUTATIONS: if (pga->Degree != 0) { t = runTime(); query_solubility(pga); pga->trace = FALSE; if (pga->soluble) query_space_efficiency(pga); else pga->space_efficient = FALSE; query_perm_information(pga); strip_identities(auts, pga, pcp); soluble_group = (pga->soluble || pga->Degree == 1 || pga->nmr_of_perms == 0); if (!soluble_group) { #if defined(GAP_LINK) StartGapFile(pga); #else #if defined(GAP_LINK_VIA_FILE) start_GAP_file(&LINK_input, auts, pga, pcp); #endif #endif } perms = permute_subgroups(LINK_input, &a, &b, &c, auts, pga, pcp); #if defined(GAP_LINK_VIA_FILE) if (!soluble_group) CloseFile(LINK_input); #endif t = runTime() - t; printf("Time to compute permutations is %.2f seconds\n", t * CLK_SCALE); } else printf("You must first select option %d\n", DEGREE); break; case ORBITS: orbit_option(option, perms, &a, &b, &c, &orbit_length, pga); break; case STABILISERS: case STABILISER: assert(perms); stabiliser_option( option, auts, perms, a, b, c, orbit_length, pga, pcp); /* free_space (pga->soluble, perms, orbit_length, a, b, c, pga); */ break; case MATRIX_TO_LABEL: S = allocate_matrix(pga->s, pga->q, 0, FALSE); subset = allocate_vector(pga->s, 0, FALSE); printf("Input the %d x %d subgroup matrix:\n", pga->s, pga->q); read_matrix(S, pga->s, pga->q); K = echelonise_matrix(S, pga->s, pga->q, pga->p, subset, pga); printf("The standard matrix is:\n"); print_matrix(S, pga->s, pga->q); printf("The label is %d\n", subgroup_to_label(S, K, subset, pga)); free_vector(subset, 0); break; case LABEL_TO_MATRIX: read_value(TRUE, "Input allowable subgroup label: ", &label, 1); S = label_to_subgroup(&index, &subset, label, pga); printf("The corresponding standard matrix is\n"); print_matrix(S, pga->s, pga->q); break; case IMAGE: t = runTime(); /* invert_automorphisms (auts, pga, pcp); print_auts (pga->m, pcp->lastg, auts, pcp); */ printf("Input the subgroup label and automorphism number: "); read_value(TRUE, "", &label, 1); read_value(FALSE, "", &alpha, 1); printf("Image is %d\n", find_image(label, auts[alpha], pga, pcp)); t = runTime() - t; printf("Computation time in seconds is %.2f\n", t * CLK_SCALE); break; case SUBGROUP_RANK: read_subgroup_rank(&k); printf("Closure of initial segment subgroup has rank %d\n", close_subgroup(k, auts, pga, pcp)); break; case ORBIT_REP: printf("Input label for subgroup: "); read_value(TRUE, "", &label, 1); rep = abs(a[label]); for (i = 1; i <= pga->nmr_orbits && pga->rep[i] != rep; ++i) ; printf("Subgroup with label %d has representative %d and is in orbit " "%d\n", label, rep, i); break; case COMPACT_DESCRIPTION: Compact_Description = TRUE; read_value(TRUE, "Lower bound for order (0 for all groups generated)? ", &Compact_Order, 0); break; case AUT_CLASSES: t = runTime(); permute_elements(); t = runTime() - t; printf("Time to compute orbits is %.2f seconds\n", t * CLK_SCALE); break; /* printf ("Input label: "); scanf ("%d", &l); process_complete_orbit (a, l, pga, pcp); break; case TEMP: printf ("Input label: "); scanf ("%d", &l); printf ("Input label: "); scanf ("%d", &u); for (i = l; i <= u; ++i) { x = IsValidAllowableSubgroup (i, pga); printf ("%d is %d\n", i, x); } StartName = GetString ("Enter output file name: "); OutputFile = OpenFileOutput (StartName); part_setup_reps (pga->rep, pga->nmr_orbits, orbit_length, perms, a, b, c, auts, OutputFile, OutputFile, pga, pcp); list_word (pga, pcp); read_value (TRUE, "Input the rank of the subgroup: ", &pga->q, 1); strip_identities (auts, pga, pcp); break; */ case EXIT: case MAX_INTERACTIVE_OPTION: printf("Exiting from interactive p-group generation menu\n"); break; } /* switch */ } while (option != 0 && option != MAX_INTERACTIVE_OPTION); #if defined(GAP_LINK) if (!soluble_group) QuitGap(); #endif }
void FFT_NUC_VECTOR::resize(size_t minimum_size) { if (vector_size < minimum_size) allocate_vector(minimum_size); }
int main() { vector x,b; vector r,p,Ap; matrix A; double one=1.0, zero=0.0; double normr, rtrans, oldtrans, p_ap_dot , alpha, beta; int iter=0; //create matrix allocate_3d_poission_matrix(A,N); printf("Rows: %d, nnz: %d\n", A.num_rows, A.row_offsets[A.num_rows]); allocate_vector(x,A.num_rows); allocate_vector(Ap,A.num_rows); allocate_vector(r,A.num_rows); allocate_vector(p,A.num_rows); allocate_vector(b,A.num_rows); initialize_vector(x,100000); initialize_vector(b,1); waxpby(one, x, zero, x, p); matvec(A,p,Ap); waxpby(one, b, -one, Ap, r); rtrans=dot(r,r); normr=sqrt(rtrans); double st = omp_get_wtime(); do { if(iter==0) { waxpby(one,r,zero,r,p); } else { oldtrans=rtrans; rtrans = dot(r,r); beta = rtrans/oldtrans; waxpby(one,r,beta,p,p); } normr=sqrt(rtrans); matvec(A,p,Ap); p_ap_dot = dot(Ap,p); alpha = rtrans/p_ap_dot; waxpby(one,x,alpha,p,x); waxpby(one,r,-alpha,Ap,r); if(iter%10==0) printf("Iteration: %d, Tolerance: %.4e\n", iter, normr); iter++; } while(iter<MAX_ITERS && normr>TOL); double et = omp_get_wtime(); printf("Total Iterations: %d\n", iter); printf("Total Time: %lf s\n", (et-st)); free_vector(x); free_vector(r); free_vector(p); free_vector(Ap); free_matrix(A); return 0; }
static int show_stmt_assign_vector(ivl_statement_t net) { ivl_lval_t lval; ivl_expr_t rval = ivl_stmt_rval(net); ivl_memory_t mem; /* Handle the special case that the expression is a real value. Evaluate the real expression, then convert the result to a vector. Then store that vector into the l-value. */ if (ivl_expr_value(rval) == IVL_VT_REAL) { int word = draw_eval_real(rval); /* This is the accumulated with of the l-value of the assignment. */ unsigned wid = ivl_stmt_lwidth(net); struct vector_info vec; vec.base = allocate_vector(wid); vec.wid = wid; fprintf(vvp_out, " %%cvt/vr %u, %d, %u;\n", vec.base, word, vec.wid); clr_word(word); set_vec_to_lval(net, vec); clr_vector(vec); return 0; } /* Handle the special case that the r-value is a constant. We can generate the %set statement directly, without any worry about generating code to evaluate the r-value expressions. */ if (ivl_expr_type(rval) == IVL_EX_NUMBER) { unsigned lidx; const char*bits = ivl_expr_bits(rval); unsigned wid = ivl_expr_width(rval); unsigned cur_rbit = 0; for (lidx = 0 ; lidx < ivl_stmt_lvals(net) ; lidx += 1) { unsigned skip_set = transient_id++; unsigned skip_set_flag = 0; unsigned idx; unsigned bit_limit = wid - cur_rbit; lval = ivl_stmt_lval(net, lidx); /* If there is a mux for the lval, calculate the value and write it into index0. */ if (ivl_lval_mux(lval)) { calculate_into_x0(ivl_lval_mux(lval)); /* Generate code to skip around the set if the index has X values. */ fprintf(vvp_out, " %%jmp/1 t_%u, 4;\n", skip_set); skip_set_flag = 1; } mem = ivl_lval_mem(lval); if (mem) { draw_memory_index_expr(mem, ivl_lval_idx(lval)); /* Generate code to skip around the set if the index has X values. */ fprintf(vvp_out, " %%jmp/1 t_%u, 4;\n", skip_set); skip_set_flag = 1; } if (bit_limit > ivl_lval_pins(lval)) bit_limit = ivl_lval_pins(lval); if (mem) { for (idx = 0 ; idx < bit_limit ; idx += 1) { set_to_memory(mem, idx, bitchar_to_idx(bits[cur_rbit])); cur_rbit += 1; } for (idx = bit_limit ; idx < ivl_lval_pins(lval) ; idx += 1) set_to_memory(mem, idx, 0); } else { idx = 0; while (idx < bit_limit) { unsigned cnt = 1; while (((idx + cnt) < bit_limit) && (bits[cur_rbit] == bits[cur_rbit+cnt])) cnt += 1; set_to_lvariable(lval, idx, bitchar_to_idx(bits[cur_rbit]), cnt); cur_rbit += cnt; idx += cnt; } if (bit_limit < ivl_lval_pins(lval)) { unsigned cnt = ivl_lval_pins(lval) - bit_limit; set_to_lvariable(lval, bit_limit, 0, cnt); } } if (skip_set_flag) { fprintf(vvp_out, "t_%u ;\n", skip_set); clear_expression_lookaside(); } } return 0; } { struct vector_info res = draw_eval_expr(rval, 0); set_vec_to_lval(net, res); if (res.base > 3) clr_vector(res); } return 0; }
static void get_vec_from_lval_slice(ivl_lval_t lval, struct vec_slice_info*slice, unsigned bit, unsigned wid) { ivl_signal_t sig = ivl_lval_sig(lval); ivl_expr_t part_off_ex = ivl_lval_part_off(lval); unsigned long part_off = 0; /* Although Verilog doesn't support it, we'll handle here the case of an l-value part select of an array word if the address is constant. */ ivl_expr_t word_ix = ivl_lval_idx(lval); unsigned long use_word = 0; if (part_off_ex == 0) { part_off = 0; } else if (number_is_immediate(part_off_ex, IMM_WID, 0) && !number_is_unknown(part_off_ex)) { part_off = get_number_immediate(part_off_ex); part_off_ex = 0; } /* If the word index is a constant expression, then evaluate it to select the word, and pay no further heed to the expression itself. */ if (word_ix && number_is_immediate(word_ix, IMM_WID, 0)) { assert(! number_is_unknown(word_ix)); use_word = get_number_immediate(word_ix); word_ix = 0; } if (ivl_lval_mux(lval)) part_off_ex = ivl_lval_mux(lval); if (ivl_signal_dimensions(sig)==0 && part_off_ex==0 && word_ix==0 && part_off==0 && wid==ivl_signal_width(sig)) { slice->type = SLICE_SIMPLE_VECTOR; slice->u_.simple_vector.use_word = use_word; fprintf(vvp_out, " %%load/v %u, v%p_%lu, %u;\n", bit, sig, use_word, wid); } else if (ivl_signal_dimensions(sig)==0 && part_off_ex==0 && word_ix==0) { assert(use_word == 0); slice->type = SLICE_PART_SELECT_STATIC; slice->u_.part_select_static.part_off = part_off; fprintf(vvp_out, " %%ix/load 1, %lu, 0;\n", part_off); fprintf(vvp_out, " %%load/x1p %u, v%p_0, %u;\n", bit, sig, wid); } else if (ivl_signal_dimensions(sig)==0 && part_off_ex!=0 && word_ix==0) { unsigned skip_set = transient_id++; unsigned out_set = transient_id++; assert(use_word == 0); assert(part_off == 0); slice->type = SLICE_PART_SELECT_DYNAMIC; draw_eval_expr_into_integer(part_off_ex, 1); slice->u_.part_select_dynamic.word_idx_reg = allocate_word(); slice->u_.part_select_dynamic.x_flag = allocate_vector(1); fprintf(vvp_out, " %%mov %u, %u, 1;\n", slice->u_.part_select_dynamic.x_flag, 4); fprintf(vvp_out, " %%mov/wu %d, %d;\n", slice->u_.part_select_dynamic.word_idx_reg, 1); fprintf(vvp_out, " %%jmp/1 t_%u, 4;\n", skip_set); fprintf(vvp_out, " %%load/x1p %u, v%p_0, %u;\n", bit, sig, wid); fprintf(vvp_out, " %%jmp t_%u;\n", out_set); fprintf(vvp_out, "t_%u ;\n", skip_set); fprintf(vvp_out, " %%mov %u, 2, %u;\n", bit, wid); fprintf(vvp_out, "t_%u ;\n", out_set); } else if (ivl_signal_dimensions(sig) > 0 && word_ix == 0) { slice->type = SLICE_MEMORY_WORD_STATIC; slice->u_.memory_word_static.use_word = use_word; if (use_word < ivl_signal_array_count(sig)) { fprintf(vvp_out, " %%ix/load 3, %lu, 0;\n", use_word); fprintf(vvp_out, " %%load/av %u, v%p, %u;\n", bit, sig, wid); } else { fprintf(vvp_out, " %%mov %u, 2, %u; OUT OF BOUNDS\n", bit, wid); } } else if (ivl_signal_dimensions(sig) > 0 && word_ix != 0) { unsigned skip_set = transient_id++; unsigned out_set = transient_id++; slice->type = SLICE_MEMORY_WORD_DYNAMIC; draw_eval_expr_into_integer(word_ix, 3); slice->u_.memory_word_dynamic.word_idx_reg = allocate_word(); slice->u_.memory_word_dynamic.x_flag = allocate_vector(1); fprintf(vvp_out, " %%mov/wu %d, 3;\n", slice->u_.memory_word_dynamic.word_idx_reg); fprintf(vvp_out, " %%mov %u, 4, 1;\n", slice->u_.memory_word_dynamic.x_flag); fprintf(vvp_out, " %%jmp/1 t_%u, 4;\n", skip_set); fprintf(vvp_out, " %%ix/load 1, 0, 0;\n"); fprintf(vvp_out, " %%load/av %u, v%p, %u;\n", bit, sig, wid); fprintf(vvp_out, " %%jmp t_%u;\n", out_set); fprintf(vvp_out, "t_%u ;\n", skip_set); fprintf(vvp_out, " %%mov %u, 2, %u;\n", bit, wid); fprintf(vvp_out, "t_%u ;\n", out_set); } else { assert(0); } }
static int show_stmt_assign_vector(ivl_statement_t net) { ivl_expr_t rval = ivl_stmt_rval(net); struct vector_info res; struct vector_info lres = {0, 0}; struct vec_slice_info*slices = 0; /* If this is a compressed assignment, then get the contents of the l-value. We need these values as part of the r-value calculation. */ if (ivl_stmt_opcode(net) != 0) { slices = calloc(ivl_stmt_lvals(net), sizeof(struct vec_slice_info)); lres = get_vec_from_lval(net, slices); } /* Handle the special case that the expression is a real value. Evaluate the real expression, then convert the result to a vector. Then store that vector into the l-value. */ if (ivl_expr_value(rval) == IVL_VT_REAL) { draw_eval_real(rval); /* This is the accumulated with of the l-value of the assignment. */ unsigned wid = ivl_stmt_lwidth(net); res.base = allocate_vector(wid); res.wid = wid; if (res.base == 0) { fprintf(stderr, "%s:%u: vvp.tgt error: " "Unable to allocate %u thread bits for " "r-value expression.\n", ivl_expr_file(rval), ivl_expr_lineno(rval), wid); vvp_errors += 1; } fprintf(vvp_out, " %%cvt/vr %u, %u;\n", res.base, res.wid); } else { res = draw_eval_expr(rval, 0); } switch (ivl_stmt_opcode(net)) { case 0: set_vec_to_lval(net, res); break; case '+': if (res.base > 3) { fprintf(vvp_out, " %%add %u, %u, %u;\n", res.base, lres.base, res.wid); clr_vector(lres); } else { fprintf(vvp_out, " %%add %u, %u, %u;\n", lres.base, res.base, res.wid); res.base = lres.base; } put_vec_to_lval(net, slices, res); break; case '-': fprintf(vvp_out, " %%sub %u, %u, %u;\n", lres.base, res.base, res.wid); fprintf(vvp_out, " %%mov %u, %u, %u;\n", res.base, lres.base, res.wid); clr_vector(lres); put_vec_to_lval(net, slices, res); break; case '*': if (res.base > 3) { fprintf(vvp_out, " %%mul %u, %u, %u;\n", res.base, lres.base, res.wid); clr_vector(lres); } else { fprintf(vvp_out, " %%mul %u, %u, %u;\n", lres.base, res.base, res.wid); res.base = lres.base; } put_vec_to_lval(net, slices, res); break; case '/': fprintf(vvp_out, " %%div%s %u, %u, %u;\n", ivl_expr_signed(rval)? "/s" : "", lres.base, res.base, res.wid); fprintf(vvp_out, " %%mov %u, %u, %u;\n", res.base, lres.base, res.wid); clr_vector(lres); put_vec_to_lval(net, slices, res); break; case '%': fprintf(vvp_out, " %%mod%s %u, %u, %u;\n", ivl_expr_signed(rval)? "/s" : "", lres.base, res.base, res.wid); fprintf(vvp_out, " %%mov %u, %u, %u;\n", res.base, lres.base, res.wid); clr_vector(lres); put_vec_to_lval(net, slices, res); break; case '&': if (res.base > 3) { fprintf(vvp_out, " %%and %u, %u, %u;\n", res.base, lres.base, res.wid); clr_vector(lres); } else { fprintf(vvp_out, " %%and %u, %u, %u;\n", lres.base, res.base, res.wid); res.base = lres.base; } put_vec_to_lval(net, slices, res); break; case '|': if (res.base > 3) { fprintf(vvp_out, " %%or %u, %u, %u;\n", res.base, lres.base, res.wid); clr_vector(lres); } else { fprintf(vvp_out, " %%or %u, %u, %u;\n", lres.base, res.base, res.wid); res.base = lres.base; } put_vec_to_lval(net, slices, res); break; case '^': if (res.base > 3) { fprintf(vvp_out, " %%xor %u, %u, %u;\n", res.base, lres.base, res.wid); clr_vector(lres); } else { fprintf(vvp_out, " %%xor %u, %u, %u;\n", lres.base, res.base, res.wid); res.base = lres.base; } put_vec_to_lval(net, slices, res); break; case 'l': /* lres <<= res */ fprintf(vvp_out, " %%ix/get 0, %u, %u;\n", res.base, res.wid); fprintf(vvp_out, " %%shiftl/i0 %u, %u;\n", lres.base, res.wid); fprintf(vvp_out, " %%mov %u, %u, %u;\n", res.base, lres.base, res.wid); break; case 'r': /* lres >>= res */ fprintf(vvp_out, " %%ix/get 0, %u, %u;\n", res.base, res.wid); fprintf(vvp_out, " %%shiftr/i0 %u, %u;\n", lres.base, res.wid); fprintf(vvp_out, " %%mov %u, %u, %u;\n", res.base, lres.base, res.wid); break; case 'R': /* lres >>>= res */ fprintf(vvp_out, " %%ix/get 0, %u, %u;\n", res.base, res.wid); fprintf(vvp_out, " %%shiftr/s/i0 %u, %u;\n", lres.base, res.wid); fprintf(vvp_out, " %%mov %u, %u, %u;\n", res.base, lres.base, res.wid); break; default: fprintf(vvp_out, "; UNSUPPORTED ASSIGNMENT OPCODE: %c\n", ivl_stmt_opcode(net)); assert(0); break; } if (slices) free(slices); if (res.base > 3) clr_vector(res); return 0; }
int main(int argn, char** argc) { int err, i ,j; int numCPUs = 0; int gid; DATATYPE *a,*b,*c,*d; TimeData timer; double triad_time, copy_time, scale_time, stream_time; char estr[1024]; double result, scalar = 3.0; char* ptr; if (argn != 3) { printf("Usage: %s <cpustr> <events>\n", argc[0]); return 1; } strcpy(estr, argc[2]); allocate_vector(&a, SIZE); allocate_vector(&b, SIZE); allocate_vector(&c, SIZE); allocate_vector(&d, SIZE); err = topology_init(); if (err < 0) { printf("Failed to initialize LIKWID's topology module\n"); return 1; } CpuTopology_t topo = get_cpuTopology(); affinity_init(); int* cpus = (int*)malloc(topo->numHWThreads * sizeof(int)); if (!cpus) return 1; numCPUs = cpustr_to_cpulist(argc[1], cpus, topo->numHWThreads); omp_set_num_threads(numCPUs); err = perfmon_init(numCPUs, cpus); if (err < 0) { printf("Failed to initialize LIKWID's performance monitoring module\n"); affinity_finalize(); topology_finalize(); return 1; } gid = perfmon_addEventSet(estr); if (gid < 0) { printf("Failed to add event string %s to LIKWID's performance monitoring module\n", estr); perfmon_finalize(); affinity_finalize(); topology_finalize(); return 1; } err = perfmon_setupCounters(gid); if (err < 0) { printf("Failed to setup group %d in LIKWID's performance monitoring module\n", gid); perfmon_finalize(); affinity_finalize(); topology_finalize(); return 1; } #ifdef _OPENMP printf(HLINE); #pragma omp parallel { #pragma omp master { printf ("Number of Threads requested = %i\n",omp_get_num_threads()); } likwid_pinThread(cpus[omp_get_thread_num()]); printf ("Thread %d running on processor %d ....\n",omp_get_thread_num(),sched_getcpu()); } #endif #pragma omp parallel for for (int j=0; j<SIZE; j++) { a[j] = 1.0; b[j] = 2.0; c[j] = 0.0; d[j] = 1.0; } err = perfmon_startCounters(); if (err < 0) { printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1); perfmon_finalize(); topology_finalize(); return 1; } time_start(&timer); #pragma omp parallel { for (int k=0; k<ITER; k++) { LIKWID_MARKER_START("copy"); #pragma omp for for (int j=0; j<SIZE; j++) { c[j] = a[j]; } LIKWID_MARKER_STOP("copy"); } } time_stop(&timer); err = perfmon_stopCounters(); copy_time = time_print(&timer)/(double)ITER; if (err < 0) { printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1); perfmon_finalize(); topology_finalize(); return 1; } printf("Processed %.1f Mbyte at copy benchmark in %.4f seconds: %.2f MByte/s\n", 1E-6*(2*SIZE*sizeof(DATATYPE)), copy_time, 1E-6*((2*SIZE*sizeof(DATATYPE))/copy_time)); ptr = strtok(estr,","); j = 0; while (ptr != NULL) { for (i = 0;i < numCPUs; i++) { result = perfmon_getResult(gid, j, cpus[i]); printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result); } ptr = strtok(NULL,","); j++; } strcpy(estr, argc[2]); perfmon_setupCounters(gid); err = perfmon_startCounters(); if (err < 0) { printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1); perfmon_finalize(); topology_finalize(); return 1; } time_start(&timer); #pragma omp parallel { for (int k=0; k<ITER; k++) { LIKWID_MARKER_START("scale"); #pragma omp for for (int j=0; j<SIZE; j++) { b[j] = scalar*c[j]; } LIKWID_MARKER_STOP("scale"); } } time_stop(&timer); err = perfmon_stopCounters(); scale_time = time_print(&timer)/(double)ITER; if (err < 0) { printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1); perfmon_finalize(); topology_finalize(); return 1; } printf("Processed %.1f Mbyte at scale benchmark in %.4f seconds: %.2f MByte/s\n", 1E-6*(2*SIZE*sizeof(DATATYPE)), copy_time, 1E-6*((2*SIZE*sizeof(DATATYPE))/copy_time)); ptr = strtok(estr,","); j = 0; while (ptr != NULL) { for (i = 0;i < numCPUs; i++) { result = perfmon_getResult(gid, j, cpus[i]); printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result); } ptr = strtok(NULL,","); j++; } strcpy(estr, argc[2]); perfmon_setupCounters(gid); err = perfmon_startCounters(); if (err < 0) { printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1); perfmon_finalize(); topology_finalize(); return 1; } time_start(&timer); #pragma omp parallel { for (int k=0; k<ITER; k++) { LIKWID_MARKER_START("stream"); #pragma omp for for (int j=0; j<SIZE; j++) { c[j] = a[j] + b[j]; } LIKWID_MARKER_STOP("stream"); } } time_stop(&timer); err = perfmon_stopCounters(); stream_time = time_print(&timer)/(double)ITER; if (err < 0) { printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1); perfmon_finalize(); topology_finalize(); return 1; } printf("Processed %.1f Mbyte at stream benchmark in %.4f seconds: %.2f MByte/s\n", 1E-6*(2*SIZE*sizeof(DATATYPE)), copy_time, 1E-6*((2*SIZE*sizeof(DATATYPE))/copy_time)); ptr = strtok(estr,","); j = 0; while (ptr != NULL) { for (i = 0;i < numCPUs; i++) { result = perfmon_getResult(gid, j, cpus[i]); printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result); } ptr = strtok(NULL,","); j++; } strcpy(estr, argc[2]); perfmon_setupCounters(gid); err = perfmon_startCounters(); if (err < 0) { printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1); perfmon_finalize(); topology_finalize(); return 1; } time_start(&timer); #pragma omp parallel { for (int k=0; k<ITER; k++) { LIKWID_MARKER_START("triad"); #pragma omp for for (int j=0; j<SIZE; j++) { a[j] = b[j] + c[j] * scalar; } LIKWID_MARKER_STOP("triad"); } } time_stop(&timer); err = perfmon_stopCounters(); triad_time = time_print(&timer)/(double)ITER; if (err < 0) { printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1); perfmon_finalize(); topology_finalize(); return 1; } printf("Processed %.1f Mbyte at triad benchmark in %.4f seconds: %.2f MByte/s\n", 1E-6*(4*SIZE*sizeof(DATATYPE)), triad_time, 1E-6*((4*SIZE*sizeof(DATATYPE))/triad_time)); ptr = strtok(estr,","); j = 0; while (ptr != NULL) { for (i = 0;i < numCPUs; i++) { result = perfmon_getResult(gid, j, cpus[i]); printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result); } ptr = strtok(NULL,","); j++; } perfmon_finalize(); affinity_finalize(); topology_finalize(); return 0; }
void save_auts(FILE *ofp, int *head, int *list, struct pcp_vars *pcp) { register int alpha; register int offset; register int required_offset; register int prev = 0; register int m = pcp->m; register int required_ptr, stored_ptr; int required_length, stored_length; register int original, diff; register int j; int list_length; int retain; int nmr_items; int *copy_head; /* the action on more than lastg generators may be stored in list; if this is the case, establish how many entries from the array list must be stored in order to retain the description of the automorphisms on lastg generators */ original = head[0]; if (head[0] > pcp->lastg) { copy_head = allocate_vector(pcp->lastg * m + 1, 0, FALSE); list_length = 0; retain = pcp->lastg; diff = 0; for (alpha = 1; alpha <= m; ++alpha) { offset = (alpha - 1) * original; required_offset = (alpha - 1) * retain; for (j = 1; j <= retain; ++j) copy_head[required_offset + j] = head[offset + j] - diff; required_ptr = head[offset + retain]; stored_ptr = head[offset + original]; stored_length = stored_ptr + list[stored_ptr + 1] + 1 - prev; required_length = required_ptr + list[required_ptr + 1] + 1 - prev; diff += stored_length - required_length; list_length += required_length; prev += stored_length; } } else { copy_head = head; retain = head[0]; list_length = list[0]; } prev = 0; nmr_items = fwrite(&retain, sizeof(int), 1, ofp); verify_read(nmr_items, 1); nmr_items = fwrite(&list_length, sizeof(int), 1, ofp); verify_read(nmr_items, 1); for (alpha = 1; alpha <= m; ++alpha) { offset = (alpha - 1) * original; required_offset = (alpha - 1) * retain; nmr_items = fwrite(copy_head + required_offset + 1, sizeof(int), retain, ofp); verify_read(nmr_items, retain); required_ptr = head[offset + retain]; stored_ptr = head[offset + original]; stored_length = stored_ptr + list[stored_ptr + 1] + 1 - prev; required_length = required_ptr + list[required_ptr + 1] + 1 - prev; nmr_items = fwrite(&required_length, sizeof(int), 1, ofp); verify_read(nmr_items, 1); nmr_items = fwrite(list + prev + 1, sizeof(int), required_length, ofp); verify_read(nmr_items, required_length); prev += stored_length; } if (original != retain) free_vector(copy_head, 0); RESET(ofp); }
int main(){ int i, k; int nworkers, totalworkers; char cpuCount[20]; double *a, *b, *c, *d; double sums[2000]; cpu_set_t cpuset; TimeData timer; double triad_time, copy_time, total = 0; nprocessors = sysconf(_SC_NPROCESSORS_CONF); nworkers = cilk_spawn get_nworkers(); totalworkers = cilk_spawn get_totalworkers(); for (i=0;i<nworkers;i++) { sums[i] = 0; } LIKWID_MARKER_INIT; cilk_spawn allocate_vector(&a, SIZE); cilk_spawn allocate_vector(&b, SIZE); cilk_spawn allocate_vector(&c, SIZE); cilk_spawn allocate_vector(&d, SIZE); cilk_sync; for (i=0; i<SIZE; i++) { a[i] = 1.0; b[i] = 2.0; c[i] = 0.0; d[i] = 1.0; } time_start(&timer); for (k=0; k<ITER; k++) { for (i=0;i<nworkers;i++) { cilk_spawn LIKWID_MARKER_START("copy"); } cilk_sync; cilk_for(i=0;i<SIZE;i++) { c[i] = a[i]; } for (i=0;i<nworkers;i++) { cilk_spawn LIKWID_MARKER_STOP("copy"); } cilk_sync; } time_stop(&timer); copy_time = time_print(&timer)/(double)ITER; time_start(&timer); for (k=0; k<ITER; k++) { for (i=0;i<nworkers;i++) { cilk_spawn LIKWID_MARKER_START("triad"); } cilk_sync; cilk_for(i=0;i<SIZE;i++) { a[i] = b[i] + c[i] * d[i]; } for (i=0;i<nworkers;i++) { cilk_spawn LIKWID_MARKER_STOP("triad"); } cilk_sync; } time_stop(&timer); triad_time = time_print(&timer)/(double)ITER; printf("Processed %.1f Mbyte at copy benchmark in %.4f seconds: %.2f MByte/s\n", 1E-6*(2*SIZE*sizeof(double)), copy_time, 1E-6*((2*SIZE*sizeof(double))/copy_time)); printf("Processed %.1f Mbyte at triad benchmark in %.4f seconds: %.2f MByte/s\n", 1E-6*(4*SIZE*sizeof(double)), triad_time, 1E-6*((4*SIZE*sizeof(double))/triad_time)); printf("Main PID %d\n",getpid()); for (i=0;i<nworkers;i++) { cilk_spawn show_thread(); } cilk_sync; LIKWID_MARKER_CLOSE; }