static void Number_setat(storage_t *st, size_t off, at *x) { get_write_permit(st); ifn (NUMBERP(x)) error(NIL, "not a number", x); void (*set)(gptr,size_t,real) = storage_setd[st->type]; (*set)(st->data, off, Number(x)); }
void cg_grad_adaptor(double *g, double *x, int n) { static at *call = NIL; static int nx = -1; static storage_t *stx = NULL; static storage_t *stg = NULL; static at *(*listeval)(at *, at *) = NULL; if (n == -1) { /* initialize */ at *x0 = var_get(named("x0")); at *vargs = var_get(named("vargs")); at *g = var_get(named("g")); ifn (x0) error(NIL, "x0 not found", NIL); ifn (INDEXP(x0) && IND_STTYPE((index_t *)Mptr(x0))) error(NIL, "x0 not a double index", x0); ifn (g) error(NIL, "g not found", NIL); listeval = Class(g)->listeval; index_t *ind = Mptr(x0); nx = storage_nelems(IND_ST(ind)); stx = new_storage(ST_DOUBLE); stx->flags = STS_FOREIGN; stx->size = nx; stx->data = (char *)-1; stg = new_storage(ST_DOUBLE); stg->flags = STS_FOREIGN; stg->size = nx; stg->data = (char *)-1; call = new_cons(g, new_cons(NEW_INDEX(stg, IND_SHAPE(ind)), new_cons(NEW_INDEX(stx, IND_SHAPE(ind)), vargs))); } else { if (n != nx) error(NIL, "vector of different size expected", NEW_NUMBER(n)); stx->data = x; stg->data = g; listeval(Car(call), call); } }
static void clearconvert(void) { int i; ifn (netconvert) error(NIL,"you should perform (alloc-net xx xx)",NIL); for (i=1; i<neurnombre; i++) netconvert[i]= -1; netconvert[0]=0; }
object_t *object_from_cobject(struct CClass_object *cobj) { ifn (cobj->Vtbl && cobj->Vtbl->Cdoc && cobj->Vtbl->Cdoc->lispdata.atclass) error(NIL, "attempt to access instance of unlinked class", NIL); assert(cobj->Vtbl); object_t *obj = cobj->__lptr; ifn (obj) { class_t *cl = Mptr(cobj->Vtbl->Cdoc->lispdata.atclass); obj = _new_object(cl, cobj); } return obj; }
static at *storage_listeval(at *p, at *q) { storage_t *st = Mptr(p); if (!st->data) error(NIL, "unsized storage", p); q = eval_arglist(Cdr(q)); ifn (CONSP(q) && Car(q) && NUMBERP(Car(q))) error(NIL, "illegal subscript", q); ssize_t off = Number(Car(q)); if (off<0 || off>=st->size) error(NIL, "subscript out of range", q); if (Cdr(q)) { ifn (CONSP(Cdr(q)) && !Cddr(q)) error(NIL, "one or two arguments expected",q); storage_setat[st->type](st, off, Cadr(q)); return st->backptr; } else return storage_getat[st->type](st, off); }
object_t *new_object(class_t *cl) { int n = 0; while (unreachables && n++<2) { object_t *obj = unreachables; unreachables = obj->next_unreachable; object_class->dispose(obj); } ifn (cl->live) error(NIL, "class is obsolete", cl->classname); return _new_object(cl, NULL); }
double cg_value_adaptor(double *x, int n) { static at *call = NIL; static int nx = -1; static storage_t *st = NULL; static at *(*listeval)(at *, at *) = NULL; if (n == -1) { /* initialize */ at *x0 = var_get(named("x0")); at *vargs = var_get(named("vargs")); at *f = var_get(named("f")); ifn (x0) error(NIL, "x0 not found", NIL); ifn (INDEXP(x0) && IND_STTYPE((index_t *)Mptr(x0))) error(NIL, "x0 not a double index", x0); ifn (f) error(NIL, "f not found", NIL); listeval = Class(f)->listeval; index_t *ind = Mptr(x0); nx = storage_nelems(IND_ST(ind)); st = new_storage(ST_DOUBLE); st->flags = STS_FOREIGN; st->size = nx; st->data = (char *)-1; call = new_cons(f, new_cons(NEW_INDEX(st, IND_SHAPE(ind)), vargs)); return NAN; } else { if (n != nx) error(NIL, "vector of different size expected", NEW_NUMBER(n)); st->data = x; return Number(listeval(Car(call), call)); } }
/* interface calling into the fortran routine */ static int lbfgs(index_t *x0, at *f, at *g, double gtol, htable_t *p, at *vargs) { /* argument checking and setup */ extern void lbfgs_(int *n, int *m, double *x, double *fval, double *gval, \ int *diagco, double *diag, int iprint[2], double *gtol, \ double *xtol, double *w, int *iflag); ifn (IND_STTYPE(x0) == ST_DOUBLE) error(NIL, "not an array of doubles", x0->backptr); ifn (Class(f)->listeval) error(NIL, "not a function", f); ifn (Class(f)->listeval) error(NIL, "not a function", g); ifn (gtol > 0) error(NIL, "threshold value not positive", NEW_NUMBER(gtol)); at *gx = copy_array(x0)->backptr; at *(*listeval_f)(at *, at *) = Class(f)->listeval; at *(*listeval_g)(at *, at *) = Class(g)->listeval; at *callf = new_cons(f, new_cons(x0->backptr, vargs)); at *callg = new_cons(g, new_cons(gx, new_cons(x0->backptr, vargs))); htable_t *params = lbfgs_params(); if (p) htable_update(params, p); int iprint[2]; iprint[0] = (int)Number(htable_get(params, NEW_SYMBOL("iprint-1"))); iprint[1] = (int)Number(htable_get(params, NEW_SYMBOL("iprint-2"))); lb3_.gtol = Number(htable_get(params, NEW_SYMBOL("ls-gtol"))); lb3_.stpmin = Number(htable_get(params, NEW_SYMBOL("ls-stpmin"))); lb3_.stpmax = Number(htable_get(params, NEW_SYMBOL("ls-stpmax"))); int m = (int)Number(htable_get(params, NEW_SYMBOL("lbfgs-m"))); int n = index_nelems(x0); double *x = IND_ST(x0)->data; double fval; double *gval = IND_ST(Mptr(gx))->data; int diagco = false; double *diag = mm_blob(n*sizeof(double)); double *w = mm_blob((n*(m+m+1)+m+m)*sizeof(double)); double xtol = eps(1); /* machine precision */ int iflag = 0; ifn (n>0) error(NIL, "empty array", x0->backptr); ifn (m>0) error(NIL, "m-parameter must be positive", NEW_NUMBER(m)); /* reverse communication loop */ do { fval = Number(listeval_f(Car(callf), callf)); listeval_g(Car(callg), callg); lbfgs_(&n, &m, x, &fval, gval, &diagco, diag, iprint, >ol, &xtol, w, &iflag); assert(iflag<2); } while (iflag > 0); return iflag; }
at * new_string_bylen(int n) { at *q; char *buffer; struct string *st; st = allocate(&alloc_string); ifn (buffer = malloc(n+1)) error(NIL, "memory exhausted", NIL); buffer[0] = 0; buffer[n] = 0; st->flag = STRING_ALLOCATED; st->start = buffer; st->cptr = 0; q = new_extern(&string_class, st); q->flags |= X_STRING; return q; }
inline void InsertHash(const STRPTR a, struct Hash *h) { struct HashEntry **he, *newhe; if(!h->Index) { ifn((h->Index = calloc(HASH_SIZE, sizeof(struct HashEntry *)))) PrintPrgErr(pmWordListErr); } he = &h->Index[HashWord(a) % HASH_SIZE]; if((newhe = malloc(sizeof(struct HashEntry)))) { newhe->Next = *he; newhe->Str = a; *he = newhe; } else PrintPrgErr(pmWordListErr); }
at *oostruct_getslot(at *p, at *prop) { at *slot = Car(prop); ifn (SYMBOLP(slot)) error(NIL,"not a slot name", slot); prop = Cdr(prop); object_t *obj = Mptr(p); class_t *cl = Class(obj->backptr); for (int i=0; i<cl->num_slots; i++) if (slot == cl->slots[i]) { at *sloti = obj->slots[i]; if (i<cl->num_cslots) sloti = eval(sloti); if (prop) return getslot(sloti, prop); else return sloti; } error(NIL, "not a slot", slot); }
void oostruct_setslot(at *p, at *prop, at *val) { at *slot = Car(prop); ifn (SYMBOLP(slot)) error(NIL, "not a slot name", slot); prop = Cdr(prop); object_t *obj = Mptr(p); class_t *cl = Class(obj->backptr); for(int i=0; i<cl->num_slots; i++) if (slot == cl->slots[i]) { if (prop) setslot(&obj->slots[i], prop, val); else if (i<cl->num_cslots) { class_t *cl = classof(obj->slots[i]); cl->setslot(obj->slots[i], NIL, val); } else obj->slots[i] = val; return; } error(NIL, "not a slot", slot); }
QStringList fieldList_indirect(QSettings *cfg, const QString& filename, const QString& type, QString *typeSuggestion, bool *complete) { if ((!type.isEmpty() && !provides_indirect().contains(type)) || !understands_indirect(cfg, filename)) { return QStringList(); } QFile f(filename); if (!f.open(QIODevice::ReadOnly)) { return QStringList(); } char* data; if (0 >= f.readLine(data, 1000)) { return QStringList(); } QString ifn(data); KUrl url(ifn); if (url.isLocalFile() || url.protocol().isEmpty()) { if (QFileInfo(ifn).isRelative()) { ifn = QFileInfo(filename).absolutePath() + QDir::separator() + ifn; } } return KstDataSource::fieldListForSource(ifn.trimmed(), type, typeSuggestion, complete); }
int main(int argc, char *argv[]) { int taskId, totaltasks, i, j; int chunk; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &taskId); MPI_Comm_size(MPI_COMM_WORLD, &totaltasks); chunk = NGRID / totaltasks; FP_PREC xc[chunk + 2]; FP_PREC yc[chunk + 2]; FP_PREC dyc[chunk + 2]; FP_PREC derr[chunk + 2]; FP_PREC intg; FP_PREC dx; int prev_task = (taskId - 1) < 0 ? totaltasks - 1 : taskId - 1; int next_task = (taskId + 1) % totaltasks; MPI_Request reqs[4]; MPI_Status stats[4]; MPI_Irecv(&yc[0], 1, MPI_DOUBLE, prev_task, prev_task * 1000 + taskId, MPI_COMM_WORLD, &reqs[0]); MPI_Irecv(&yc[chunk + 1], 1, MPI_DOUBLE, next_task, next_task * 1000 + taskId, MPI_COMM_WORLD, &reqs[1]); for (i = 1; i <= chunk + 1; i++) { xc[i] = (XI + (XF - XI) * (FP_PREC) (i - 1) / (FP_PREC) (NGRID - 1)) + taskId * chunk; } //define the function for (i = 1; i <= chunk; i++) { yc[i] = fn(xc[i]); } MPI_Isend(&yc[chunk], 1, MPI_DOUBLE, next_task, taskId * 1000 + next_task, MPI_COMM_WORLD, &reqs[3]); MPI_Isend(&yc[1], 1, MPI_DOUBLE, prev_task, taskId * 1000 + prev_task, MPI_COMM_WORLD, &reqs[2]); MPI_Waitall(4, reqs, stats); dx = xc[2] - xc[1]; if (taskId == ROOT) { xc[0] = xc[1] - dx; yc[0] = fn(xc[0]); } if (taskId == totaltasks - 1) { xc[chunk + 1] = xc[chunk] + dx; yc[chunk + 1] = fn(xc[chunk + 1]); } //compute the derivative using first-order finite differencing for (i = 1; i <= chunk; i++) { dyc[i] = (yc[i + 1] - yc[i - 1]) / (2.0 * dx); } //compute the integral using Trapazoidal rule intg = 0.0; for (i = 1; i <= chunk; i++) { if (taskId == totaltasks - 1 && i == chunk) continue; intg += 0.5 * (xc[i + 1] - xc[i]) * (yc[i + 1] + yc[i]); } //compute the errors for (i = 1; i <= chunk; i++) { if (i - 1 != chunk - 1) derr[i] = fabs((dyc[i] - dfn(xc[i])) / dfn(xc[i])); } if (taskId != ROOT) { MPI_Request nreqs[2]; MPI_Status nstats[2]; MPI_Isend(derr + 1, chunk, MPI_DOUBLE, ROOT, taskId * 1000 + ROOT, MPI_COMM_WORLD, &nreqs[0]); MPI_Isend(&intg, 1, MPI_DOUBLE, ROOT, taskId * 1000 + ROOT, MPI_COMM_WORLD, &nreqs[1]); MPI_Waitall(2, nreqs, nstats); } else { FP_PREC allxc[NGRID]; FP_PREC allderr[NGRID]; FP_PREC allintg[totaltasks]; FP_PREC davg_err = 0.0; FP_PREC dstd_dev = 0.0; FP_PREC intg_err = 0.0; MPI_Request nreqs[2 * (totaltasks - 1)]; MPI_Status nstats[2 * (totaltasks - 1)]; for (i = 1; i < totaltasks; i++) { MPI_Irecv(allderr + (i * chunk), chunk, MPI_DOUBLE, i, i * 1000 + ROOT, MPI_COMM_WORLD, &nreqs[2 * (i - 1)]); MPI_Irecv(allintg + i, 1, MPI_DOUBLE, i, i * 1000 + ROOT, MPI_COMM_WORLD, &nreqs[2 * (i - 1) + 1]); } for (i = 0; i < chunk; i++) { allderr[i] = derr[i + 1]; } MPI_Waitall(2 * (totaltasks - 1), nreqs, nstats); //find the average error for (i = 0; i < NGRID; i++) davg_err += allderr[i]; for (i = 1; i < totaltasks; i++) { intg += allintg[i]; } davg_err /= (FP_PREC) NGRID; dstd_dev = 0.0; for (i = 0; i < NGRID; i++) { dstd_dev += pow(allderr[i] - davg_err, 2); } dstd_dev = sqrt(dstd_dev / (FP_PREC) NGRID); intg_err = fabs((ifn(XI, XF) - intg) / ifn(XI, XF)); for (i = 0; i < NGRID; i++) { allxc[i] = XI + (XF - XI) * (FP_PREC) i / (FP_PREC) (NGRID - 1); } //print_error_data(NGRID, davg_err, dstd_dev, &xc[1], derr, intg_err); print_error_data(NGRID, davg_err, dstd_dev, allxc, allderr, intg_err); } MPI_Finalize(); }
class_t *new_ooclass(at *classname, at *atsuper, at *new_slots, at *defaults) { ifn (SYMBOLP(classname)) error(NIL, "not a valid class name", classname); ifn (length(new_slots) == length(defaults)) error(NIL, "number of slots must match number of defaults", NIL); ifn (CLASSP(atsuper)) error(NIL, "not a class", atsuper); class_t *super = Mptr(atsuper); if (builtin_class_p(super)) error(NIL, "superclass not descendend of class <object>", atsuper); at *p = new_slots; at *q = defaults; int n = 0; while (CONSP(p) && CONSP(q)) { ifn (SYMBOLP(Car(p))) error(NIL, "not a symbol", Car(p)); for (int i=0; i<super->num_slots; i++) if (Car(p) == super->slots[i]) error(NIL, "a superclass has slot of this name", Car(p)); p = Cdr(p); q = Cdr(q); n++; } assert(!p && !q); /* builds the new class */ class_t *cl = mm_alloc(mt_class); *cl = *object_class; if (new_slots) { int num_slots = n + super->num_slots; cl->slots = mm_allocv(mt_refs, num_slots*sizeof(at *)); cl->defaults = mm_allocv(mt_refs, num_slots*sizeof(at *)); int i = 0; for (; i<super->num_slots; i++) { cl->slots[i] = super->slots[i]; cl->defaults[i] = super->defaults[i]; } for (at *s = new_slots, *d = defaults; CONSP(s); i++, s = Cdr(s), d = Cdr(d)) { cl->slots[i] = Car(s); cl->defaults[i] = Car(d); } assert(i == num_slots); cl->num_slots = num_slots; } else { cl->slots = super->slots; cl->defaults = super->defaults; cl->num_slots = super->num_slots; } /* Sets up classname */ cl->classname = classname; cl->priminame = classname; /* Sets up lists */ cl->super = super; cl->atsuper = atsuper; cl->nextclass = super->subclasses; cl->subclasses = 0L; super->subclasses = cl; /* Initialize the slots */ cl->myslots = new_slots; cl->mydefaults = defaults; /* Initialize the methods and hash table */ cl->methods = NIL; cl->hashtable = 0L; cl->hashsize = 0; cl->hashok = 0; /* Initialize DHCLASS stuff */ cl->has_compiled_part = super->has_compiled_part; cl->num_cslots = super->num_cslots; cl->classdoc = 0; cl->kname = 0; /* Create AT and returns it */ assert(class_class); cl->backptr = new_at(class_class, cl); return cl; }
void* _ksmem_mbat_alloc (client* c, memunit* unit, uvar32_64 size) { struct timeb tb; svar32_64 no, pf, pu, nf, nu, tmp; /* prevoius and next free and used block numbers corresopndevly */ isclientn (c); ismemunitn (unit, c); isvalidn (size, c, KSERR_MEMZEROSIZE); isownern (c, unit); /* no = index of free and suitable unit->mbat[] entry */ ifn ((no = _ksmem_mbat_find_free (c, unit, size)) == -1); _ksmem_mbat_neighboors (unit, no, unit->ffb, unit->lfb, &pf, &nf); _ksmem_mbat_neighboors (unit, no, unit->fub, unit->lub, &pu, &nu); /* Allocated new block */ atomic(unit) if (unit->maxfb == unit->mbat[no].size) { unit->maxfb = 0; for (tmp = unit->ffb; tmp != -1; tmp = unit->mbat[tmp].next) if (unit->maxfb < unit->mbat[tmp].size) unit->maxfb = unit->mbat[tmp].size; if (unit->maxfb < unit->psize - unit->pbottom) unit->maxfb = unit->psize - unit->pbottom; } if (unit->minfb == unit->mbat[no].size) { unit->minfb = (uvar32_64)-1; for (tmp = unit->ffb; tmp != -1; tmp = unit->mbat[tmp].next) if (unit->minfb > unit->mbat[tmp].size) unit->minfb = unit->mbat[tmp].size; if (unit->minfb > unit->psize - unit->pbottom) unit->minfb = unit->psize - unit->pbottom; } if (no == unit->ffb) unit->ffb = unit->mbat[no].next; if (no == (svar32_64)unit->tbottom) { unit->mbat[no].offset = unit->pbottom; unit->mbat[no].size = size; unit->pbottom += size; unit->tbottom++; unit->tfree--; } else if (unit->mbat[no].size != size && unit->tbottom < unit->tsize) { unit->mbat[unit->tbottom].size = unit->mbat[no].size - size; unit->mbat[unit->tbottom].offset = unit->mbat[no].offset + size; unit->mbat[unit->tbottom].next = -1; unit->mbat[no].size = size; if (unit->mbat[unit->tbottom].size < unit->minfb) unit->minfb = unit->mbat[unit->tbottom].size; if (unit->mbat[unit->tbottom].size > unit->maxfb) unit->maxfb = unit->mbat[unit->tbottom].size; if (unit->lfb != -1) { unit->mbat[unit->lfb].next = unit->tbottom; unit->lfb = unit->tbottom; } else unit->lfb = unit->tbottom; if (unit->ffb == -1) unit->ffb = unit->tbottom; if (nf == -1) nf = unit->tbottom; unit->tbottom++; } if (unit->lfb == no) unit->lfb = pf; if (unit->fub > no || unit->fub == -1) unit->fub = no; if (unit->lub < no || unit->lub == -1) unit->lub = no; if (pu != -1) unit->mbat[pu].next = no; if (pf != -1) unit->mbat[pf].next = nf; unit->mbat[no].next = nu; ftime (&tb); unit->modified = tb.time; unit->pfree -= size; unit->maxfb -= size; unit->minfb = (unit->minfb < unit->maxfb) ? unit->minfb : unit->maxfb; endatomic return ((void *)((uvar32_64)(unit->mpage) + unit->mbat[no].offset)); }
int main (int argc, char *argv[]) { int numproc, rank, len,i; char hostname[MPI_MAX_PROCESSOR_NAME]; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &numproc); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Get_processor_name(hostname, &len); FP_PREC *yc, *dyc, *derr, *fullerr; FP_PREC *xc, dx, intg, davg_err, dstd_dev, intg_err; FP_PREC globalSum = 0.0; // MPI vailables MPI_Request *requestList,request; MPI_Status *status; //"real" grid indices int imin, imax; imin = 1 + (rank * (NGRID/numproc)); if(rank == numproc - 1) imax = NGRID; else imax = (rank+1) * (NGRID/numproc); int range = imax - imin + 1; xc = (FP_PREC*) malloc((range + 2) * sizeof(FP_PREC)); yc = (FP_PREC*) malloc((range + 2) * sizeof(FP_PREC)); dyc = (FP_PREC*) malloc((range + 2) * sizeof(FP_PREC)); dx = (XF - XI)/(double)NGRID; for (i = 1; i <= range ; i++) { //xc[i] = imin + (XF - XI) * (FP_PREC)(i - 1)/(FP_PREC)(NGRID - 1); xc[i] = XI + dx * (imin + i - 2); } xc[0] = xc[1] - dx; xc[range + 1] = xc[range] + dx; for( i = 1; i <= range; i++ ) { yc[i] = fn(xc[i]); } yc[0] = fn(xc[0]); yc[range + 1] = fn(xc[range + 1]); for (i = 1; i <= range; i++) { dyc[i] = (yc[i + 1] - yc[i - 1])/(2.0 * dx); } intg = 0.0; for (i = 1; i <= range; i++) { intg += 0.5 * (xc[i + 1] - xc[i]) * (yc[i + 1] + yc[i]); } MPI_Reduce(&intg, &globalSum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); //compute the error, average error of the derivatives derr = (FP_PREC*)malloc((range + 2) * sizeof(FP_PREC)); //compute the errors for(i = 1; i <= range; i++) { derr[i] = fabs((dyc[i] - dfn(xc[i]))/dfn(xc[i])); } derr[0] = derr[range + 1] = 0.0; if(rank == 0) { fullerr = (FP_PREC *)malloc(sizeof(FP_PREC)*NGRID); requestList =(MPI_Request*)malloc((numproc-1)*sizeof(MPI_Request)); for(i = 0;i<range;i++) { fullerr[i] = derr[i+1]; } for(i = 1; i<numproc; i++) { int rmin, rmax, *indx; rmin = 1 + (i * (NGRID/numproc)); if(i == numproc - 1) rmax = NGRID; else rmax = (i+1) * (NGRID/numproc); MPI_Irecv(fullerr+rmin-1, rmax-rmin+1, MPI_DOUBLE, i, 1, MPI_COMM_WORLD, &(requestList[i-1])); } double sum = 0.0; for(i=0; i<NGRID; i++) { sum+=fullerr[i]; } davg_err = sum/(FP_PREC)NGRID; dstd_dev = 0.0; for(i = 0; i< NGRID; i++) { dstd_dev += pow(derr[i] - davg_err, 2); } dstd_dev = sqrt(dstd_dev/(FP_PREC)NGRID); intg_err = fabs((ifn(XI, XF) - globalSum)/ifn(XI, XF)); printf("%0.4e: %0.4e: %0.4e\n", davg_err, dstd_dev, intg_err); } else { MPI_Isend(derr+1, imax-imin+1, MPI_DOUBLE, 0, rank, MPI_COMM_WORLD, &request); fflush(stdout); } MPI_Finalize(); }
int main (int argc, char *argv[]) { int procid, num_procs; MPI_Status status; // derivative_time, integral_time, err_time is the local sum of runtime for each computation // tick is used to mark time double derivative_time = 0, integral_time = 0, err_time = 0, tick; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &procid); MPI_Comm_size(MPI_COMM_WORLD, &num_procs); // Calculate grid-points per process if(NGRID % num_procs > 0) { if(procid == 0) printf("NGRID should be divisible by the number of processes!"); MPI_Finalize(); return 1; } int points_per_node = NGRID / num_procs; //loop index int i; //domain array and step size FP_PREC xc[points_per_node], dx; //function array and derivative //the size will be dependent on the //number of processors used //to the program FP_PREC yc[points_per_node], dyc[points_per_node]; //integration values FP_PREC local_intg, intg; //error analysis array FP_PREC derr[points_per_node]; //error analysis values FP_PREC dlocal_sum_err, davg_err, dlocal_std_dev, dstd_dev, intg_err; //calculate dx dx = (FP_PREC)(XF - XI)/(FP_PREC)(NGRID - 1); // get start X for each process (my_XI) int bins_before_me = procid * points_per_node; FP_PREC my_XI = XI + bins_before_me * dx; //construct grid for (i = 0; i < points_per_node; ++i) { xc[i] = my_XI + i * dx; } //define the function for(i = 0; i < points_per_node; ++i) { yc[i] = fn(xc[i]); } //define holders for left and right bound value FP_PREC left_bound_yc, right_bound_yc; if(procid == 0) left_bound_yc = fn(XI-dx); if(procid == num_procs - 1) right_bound_yc = fn(XF+dx); tick = MPI_Wtime(); #if BLOCKING if(procid == 0) printf("Using blocking message! \n"); //Step 1: even nodes send to the right then receive back //Step 2: even nodes receive from the left then send back if(procid % 2 == 0) { if(procid < num_procs - 1) { MPI_Send(&yc[points_per_node-1], 1, MPI_DOUBLE, procid+1, 0, MPI_COMM_WORLD); MPI_Recv(&right_bound_yc, 1, MPI_DOUBLE, procid+1, 0, MPI_COMM_WORLD, &status); } if(procid > 0) { MPI_Recv(&left_bound_yc, 1, MPI_DOUBLE, procid-1, 0, MPI_COMM_WORLD, &status); MPI_Send(&yc[0], 1, MPI_DOUBLE, procid-1, 0, MPI_COMM_WORLD); } } else { MPI_Recv(&left_bound_yc, 1, MPI_DOUBLE, procid-1, 0, MPI_COMM_WORLD, &status); MPI_Send(&yc[0], 1, MPI_DOUBLE, procid-1, 0, MPI_COMM_WORLD); if(procid < num_procs - 1) { MPI_Send(&yc[points_per_node-1], 1, MPI_DOUBLE, procid+1, 0, MPI_COMM_WORLD); MPI_Recv(&right_bound_yc, 1, MPI_DOUBLE, procid+1, 0, MPI_COMM_WORLD, &status); } } #else if(procid == 0) printf("Using non-blocking message! \n"); MPI_Request request[4]; int current_request = 0; if(procid < num_procs - 1) { // receive right bound yc MPI_Irecv(&right_bound_yc, 1, MPI_DOUBLE, procid+1, 0, MPI_COMM_WORLD, &request[current_request]); ++current_request; } if(procid > 0) { // receive left bound yc MPI_Irecv(&left_bound_yc, 1, MPI_DOUBLE, procid-1, 0, MPI_COMM_WORLD, &request[current_request]); ++current_request; } if(procid < num_procs - 1) { // send right bound yc to right node MPI_Isend(&yc[points_per_node-1], 1, MPI_DOUBLE, procid+1, 0, MPI_COMM_WORLD, &request[current_request]); ++current_request; } if(procid > 0) { // send left bound yc to left node MPI_Isend(&yc[0], 1, MPI_DOUBLE, procid-1, 0, MPI_COMM_WORLD, &request[current_request]); ++current_request; } #endif derivative_time += MPI_Wtime() - tick; integral_time += MPI_Wtime() - tick; // Overlap computation and communication BEGIN //compute the derivative using first-order finite differencing tick = MPI_Wtime(); for (i = 1; i < points_per_node-1; ++i) { dyc[i] = (yc[i + 1] - yc[i - 1])/(2.0 * dx); } derivative_time += MPI_Wtime() - tick; //compute the integral using Trapazoidal rule tick = MPI_Wtime(); local_intg = 0.0; for (i = 0; i < points_per_node-1; ++i) { local_intg += 0.5 * (yc[i] + yc[i + 1]) * dx; } integral_time += MPI_Wtime() - tick; // Overlap computation and communication END // WAIT for non-blocking message complete before continue #if !BLOCKING tick = MPI_Wtime(); MPI_Waitall(current_request, request, MPI_STATUSES_IGNORE); derivative_time += MPI_Wtime() - tick; integral_time += MPI_Wtime() - tick; #endif // compute derivative of boundary points, runtime is not counted because it's quite small dyc[0] = (yc[1] - left_bound_yc)/(2.0 * dx); dyc[points_per_node-1] = (right_bound_yc - yc[points_per_node-2])/(2.0 * dx); // compute integral at right boundary point, runtime is not counted because it's quite small if(procid < num_procs-1) local_intg += 0.5 * (yc[points_per_node-1] + right_bound_yc) * dx; tick = MPI_Wtime(); //compute the error, average error of the derivatives for(i = 0; i < points_per_node; ++i) { if(dfn(xc[i]) == 0) { printf("WARNING: derivative at point %d on process %d is zero.\n", i, procid); derr[i] = 0; } else derr[i] = fabs((dyc[i] - dfn(xc[i]))/dfn(xc[i])); } //find the local average error dlocal_sum_err = 0.0; for(i = 0; i < points_per_node; ++i) { dlocal_sum_err += derr[i]; } //calculate and output errors #if SINGLE_CALL_REDUCTION if(procid == 0) printf("Using single call reduction! \n"); //all nodes collect sum err and convert it to the mean value MPI_Allreduce(&dlocal_sum_err, &davg_err, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); davg_err /= (FP_PREC)NGRID; // each process calculates global average #else if(procid == 0) printf("Using manual call reduction! \n"); //all nodes collect sum err and convert it to the mean value if(procid != 0) MPI_Send(&dlocal_sum_err, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD); else if(procid == 0) { davg_err = dlocal_sum_err; for(i = 1; i < num_procs; ++i) { MPI_Recv(&dlocal_sum_err, 1, MPI_DOUBLE, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status); davg_err += dlocal_sum_err; } davg_err /= (FP_PREC)NGRID; } MPI_Bcast(&davg_err, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); #endif //now all nodes have davg_err, find sum squared differences of local derr dlocal_std_dev = 0.0; for(i = 0; i < points_per_node; ++i) { dlocal_std_dev += pow(derr[i] - davg_err, 2); } err_time += MPI_Wtime() - tick; #if SINGLE_CALL_REDUCTION //reduce local integral & local (sum squared differences of derr) to root tick = MPI_Wtime(); MPI_Reduce(&dlocal_std_dev, &dstd_dev, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); err_time += MPI_Wtime() - tick; tick = MPI_Wtime(); MPI_Reduce(&local_intg, &intg, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); integral_time += MPI_Wtime() - tick; #else //reduce local integral & local (sum squared differences of derr) to root if(procid != 0) { tick = MPI_Wtime(); MPI_Send(&dlocal_std_dev, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD); err_time += MPI_Wtime() - tick; tick = MPI_Wtime(); MPI_Send(&local_intg, 1, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD); integral_time += MPI_Wtime() - tick; } else if(procid == 0) { dstd_dev = dlocal_std_dev; intg = local_intg; tick = MPI_Wtime(); for(i = 1; i < num_procs; ++i) { MPI_Recv(&dlocal_std_dev, 1, MPI_DOUBLE, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status); dstd_dev += dlocal_std_dev; } err_time += MPI_Wtime() - tick; tick = MPI_Wtime(); for(i = 1; i < num_procs; ++i) { MPI_Recv(&local_intg, 1, MPI_DOUBLE, MPI_ANY_SOURCE, 1, MPI_COMM_WORLD, &status); intg+= local_intg; } integral_time += MPI_Wtime() - tick; } #endif // print out the max runtime for each calculation double max_derivative_time, max_integral_time, max_err_time; MPI_Reduce(&derivative_time, &max_derivative_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); MPI_Reduce(&integral_time, &max_integral_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); MPI_Reduce(&err_time, &max_err_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); if(procid == 0) { printf("Max runtime to calculate derivatives is %e\n", max_derivative_time); printf("Max runtime to calculate integral is %e\n", max_integral_time); printf("Max runtime to calculate derivative errors is %e\n", max_err_time); } //gather derivative results & errors for output //this part shouldn't be included in running time measurements FP_PREC *final_dyc = NULL; FP_PREC *final_derr = NULL; if(procid == 0) { final_dyc = (FP_PREC*)malloc(NGRID * sizeof(FP_PREC)); final_derr = (FP_PREC*)malloc(NGRID * sizeof(FP_PREC)); } MPI_Gather(dyc, points_per_node, MPI_DOUBLE, final_dyc, points_per_node, MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Gather(derr, points_per_node, MPI_DOUBLE, final_derr, points_per_node, MPI_DOUBLE, 0, MPI_COMM_WORLD); //final output at root node (rank 0) if(procid == 0) { dstd_dev = sqrt(dstd_dev/(FP_PREC)NGRID); if(ifn(XI, XF) == 0) { printf("WARNING: true integral value from XI to XF is equal zero.\n"); intg_err = 0; } else { intg_err = fabs((ifn(XI, XF) - intg)/ifn(XI, XF)); } print_function_data(NGRID, dx, final_dyc); print_error_data(NGRID, davg_err, dstd_dev, intg_err, dx, final_derr); free(final_dyc); free(final_derr); } MPI_Finalize(); return 0; }
at *cdr(at *q) { ifn (LISTP(q)) RAISEF("not a list", q); return q ? Cdr(q) : q; }