static char solve2pt(int ind,int inda,int indb,float ha,float hb,char ch) { int kmin /* ,i,k ,i1,k1 */; float x1,x2; float xa,ta,xb,tb,x,t,tmin; float d,dp,dp2,df; char /* ch1='y', */ ch2='n'; int ntry=0; xa=*(x0+inda); ta=*(t0+inda); xb=*(x0+indb); tb=*(t0+indb); tmin=SF_MAX(ta,tb)-TOL; kmin=floor(tmin/ht); x1=SF_MIN(xa,xb)-TOL; x2=SF_MAX(xa,xb)+TOL; t=*(t0+ind); if( kmin<nt1 ) { while( ch2=='n' && ntry<NTRYMAX ) { x=lateral(t,xa,xb,ta,tb,ha,hb); /*i=SF_MAX(0,SF_MIN(nx1-1,floor(x/hx))); k=SF_MAX(0,SF_MIN(nt1-1,floor(t/ht))); */ d=fn(t,ta,tb,xa,xb,ha,hb); dp=2.0*d; dp2=dp; while( fabs(d)>TOL && fabs(dp2)>fabs(d) ) { df=dfn(t,ta,tb,xa,xb,ha,hb); t-=d/df; dp2=dp; dp=d; d=fn(t,ta,tb,xa,xb,ha,hb); } x=lateral(t,xa,xb,ta,tb,ha,hb); /* i1=SF_MAX(0,SF_MIN(nx1-1,floor(x/hx))); k1=SF_MAX(0,SF_MIN(nt1-1,floor(t/ht))); */ if( x>=x1 && x<=x2 && t>=tmin ) ch2='y'; ntry++; } if( fabs(d)<=TOL && t>=tmin ) { if( x>=x1 && x<=x2 ) { /* ch1='n'; */ if( *(pup+ind)<=1 || (*(pup+ind)==2 && t<(*(t0+ind))) ) { ch='s'; *(t0+ind)=t; *(x0+ind)=x; *(pup+ind)=2; *(v+ind)=1.0/linterp2(x,t); } ch=(ch=='s') ? 's' : 'n'; } } } else ch=(ch=='s') ? 's' : 'f'; return ch; }
/* * given this parent, depth first number its children. */ void dfn(nltype *parentp) { arctype *arcp; #ifdef DEBUG if (debug & DFNDEBUG) { (void) printf("[dfn] dfn("); printname(parentp); (void) printf(")\n"); } #endif /* DEBUG */ if (!dfn_stack) { dfn_sz = DFN_DEPTH; dfn_stack = (dfntype *) malloc(dfn_sz * sizeof (dfntype)); if (!dfn_stack) { (void) fprintf(stderr, "fatal: can't malloc %d objects\n", dfn_sz); exit(1); } } /* * if we're already numbered, no need to look any furthur. */ if (dfn_numbered(parentp)) return; /* * if we're already busy, must be a cycle */ if (dfn_busy(parentp)) { dfn_findcycle(parentp); return; } /* * visit yourself before your children */ dfn_pre_visit(parentp); /* * visit children */ for (arcp = parentp->children; arcp; arcp = arcp->arc_childlist) dfn(arcp->arc_childp); /* * visit yourself after your children */ dfn_post_visit(parentp); }
/* * given this parent, depth first number its children. */ void dfn(nltype *parentp) { arctype *arcp; # ifdef DEBUG if ( debug & DFNDEBUG ) { printf( "[dfn] dfn(" ); printname( parentp ); printf( ")\n" ); } # endif /* DEBUG */ /* * if we're already numbered, no need to look any further. */ if ( dfn_numbered( parentp ) ) { return; } /* * if we're already busy, must be a cycle */ if ( dfn_busy( parentp ) ) { dfn_findcycle( parentp ); return; } /* * visit yourself before your children */ dfn_pre_visit( parentp ); /* * visit children */ for ( arcp = parentp -> children ; arcp ; arcp = arcp -> arc_childlist ) { if ( arcp -> arc_flags & DEADARC ) continue; dfn( arcp -> arc_childp ); } /* * visit yourself after your children */ dfn_post_visit( parentp ); }
void get_scc(graph_t * g) { if (!g || !g->vex_count()) { return; } int C = g->vex_count(); list_t * L = g->get_structure(); _processed.assign(C, 0); _visited.assign(C, 0); std::stack<int> stk; std::vector<int> in_stack(C, 0), low(C, 0), dfn(C, 0); for (int i = C - 1; i >= 0; --i) { if (!_visited[i]) { _dfs(g, L, i, low, dfn, stk, in_stack); } } }
DValue* DtoNewClass(Loc& loc, TypeClass* tc, NewExp* newexp) { // resolve type DtoResolveClass(tc->sym); // allocate LLValue* mem; if (newexp->onstack) { // FIXME align scope class to its largest member mem = DtoRawAlloca(DtoType(tc)->getContainedType(0), 0, ".newclass_alloca"); } // custom allocator else if (newexp->allocator) { DtoResolveFunction(newexp->allocator); DFuncValue dfn(newexp->allocator, getIrFunc(newexp->allocator)->func); DValue* res = DtoCallFunction(newexp->loc, NULL, &dfn, newexp->newargs); mem = DtoBitCast(res->getRVal(), DtoType(tc), ".newclass_custom"); } // default allocator else { llvm::Function* fn = LLVM_D_GetRuntimeFunction(loc, gIR->module, "_d_newclass"); LLConstant* ci = DtoBitCast(getIrAggr(tc->sym)->getClassInfoSymbol(), DtoType(Type::typeinfoclass->type)); mem = gIR->CreateCallOrInvoke(fn, ci, ".newclass_gc_alloc").getInstruction(); mem = DtoBitCast(mem, DtoType(tc), ".newclass_gc"); } // init DtoInitClass(tc, mem); // init inner-class outer reference if (newexp->thisexp) { Logger::println("Resolving outer class"); LOG_SCOPE; DValue* thisval = toElem(newexp->thisexp); unsigned idx = getFieldGEPIndex(tc->sym, tc->sym->vthis); LLValue* src = thisval->getRVal(); LLValue* dst = DtoGEPi(mem, 0, idx); IF_LOG Logger::cout() << "dst: " << *dst << "\nsrc: " << *src << '\n'; DtoStore(src, DtoBitCast(dst, getPtrToType(src->getType()))); } // set the context for nested classes else if (tc->sym->isNested() && tc->sym->vthis) { DtoResolveNestedContext(loc, tc->sym, mem); } // call constructor if (newexp->member) { Logger::println("Calling constructor"); assert(newexp->arguments != NULL); DtoResolveFunction(newexp->member); DFuncValue dfn(newexp->member, getIrFunc(newexp->member)->func, mem); return DtoCallFunction(newexp->loc, tc, &dfn, newexp->arguments); } // return default constructed class return new DImValue(tc, mem); }
ArrayVector backward(const ArrayVector &input, const ArrayVector &gradOutput) { return { gradOutput[0] * dfn(input[0]) }; }
int main (int argc, char *argv[]) { int procid, num_procs; MPI_Status status; // derivative_time, integral_time, err_time is the local sum of runtime for each computation // tick is used to mark time double derivative_time = 0, integral_time = 0, err_time = 0, tick; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &procid); MPI_Comm_size(MPI_COMM_WORLD, &num_procs); // Calculate grid-points per process if(NGRID % num_procs > 0) { if(procid == 0) printf("NGRID should be divisible by the number of processes!"); MPI_Finalize(); return 1; } int points_per_node = NGRID / num_procs; //loop index int i; //domain array and step size FP_PREC xc[points_per_node], dx; //function array and derivative //the size will be dependent on the //number of processors used //to the program FP_PREC yc[points_per_node], dyc[points_per_node]; //integration values FP_PREC local_intg, intg; //error analysis array FP_PREC derr[points_per_node]; //error analysis values FP_PREC dlocal_sum_err, davg_err, dlocal_std_dev, dstd_dev, intg_err; //calculate dx dx = (FP_PREC)(XF - XI)/(FP_PREC)(NGRID - 1); // get start X for each process (my_XI) int bins_before_me = procid * points_per_node; FP_PREC my_XI = XI + bins_before_me * dx; //construct grid for (i = 0; i < points_per_node; ++i) { xc[i] = my_XI + i * dx; } //define the function for(i = 0; i < points_per_node; ++i) { yc[i] = fn(xc[i]); } //define holders for left and right bound value FP_PREC left_bound_yc, right_bound_yc; if(procid == 0) left_bound_yc = fn(XI-dx); if(procid == num_procs - 1) right_bound_yc = fn(XF+dx); tick = MPI_Wtime(); #if BLOCKING if(procid == 0) printf("Using blocking message! \n"); //Step 1: even nodes send to the right then receive back //Step 2: even nodes receive from the left then send back if(procid % 2 == 0) { if(procid < num_procs - 1) { MPI_Send(&yc[points_per_node-1], 1, MPI_DOUBLE, procid+1, 0, MPI_COMM_WORLD); MPI_Recv(&right_bound_yc, 1, MPI_DOUBLE, procid+1, 0, MPI_COMM_WORLD, &status); } if(procid > 0) { MPI_Recv(&left_bound_yc, 1, MPI_DOUBLE, procid-1, 0, MPI_COMM_WORLD, &status); MPI_Send(&yc[0], 1, MPI_DOUBLE, procid-1, 0, MPI_COMM_WORLD); } } else { MPI_Recv(&left_bound_yc, 1, MPI_DOUBLE, procid-1, 0, MPI_COMM_WORLD, &status); MPI_Send(&yc[0], 1, MPI_DOUBLE, procid-1, 0, MPI_COMM_WORLD); if(procid < num_procs - 1) { MPI_Send(&yc[points_per_node-1], 1, MPI_DOUBLE, procid+1, 0, MPI_COMM_WORLD); MPI_Recv(&right_bound_yc, 1, MPI_DOUBLE, procid+1, 0, MPI_COMM_WORLD, &status); } } #else if(procid == 0) printf("Using non-blocking message! \n"); MPI_Request request[4]; int current_request = 0; if(procid < num_procs - 1) { // receive right bound yc MPI_Irecv(&right_bound_yc, 1, MPI_DOUBLE, procid+1, 0, MPI_COMM_WORLD, &request[current_request]); ++current_request; } if(procid > 0) { // receive left bound yc MPI_Irecv(&left_bound_yc, 1, MPI_DOUBLE, procid-1, 0, MPI_COMM_WORLD, &request[current_request]); ++current_request; } if(procid < num_procs - 1) { // send right bound yc to right node MPI_Isend(&yc[points_per_node-1], 1, MPI_DOUBLE, procid+1, 0, MPI_COMM_WORLD, &request[current_request]); ++current_request; } if(procid > 0) { // send left bound yc to left node MPI_Isend(&yc[0], 1, MPI_DOUBLE, procid-1, 0, MPI_COMM_WORLD, &request[current_request]); ++current_request; } #endif derivative_time += MPI_Wtime() - tick; integral_time += MPI_Wtime() - tick; // Overlap computation and communication BEGIN //compute the derivative using first-order finite differencing tick = MPI_Wtime(); for (i = 1; i < points_per_node-1; ++i) { dyc[i] = (yc[i + 1] - yc[i - 1])/(2.0 * dx); } derivative_time += MPI_Wtime() - tick; //compute the integral using Trapazoidal rule tick = MPI_Wtime(); local_intg = 0.0; for (i = 0; i < points_per_node-1; ++i) { local_intg += 0.5 * (yc[i] + yc[i + 1]) * dx; } integral_time += MPI_Wtime() - tick; // Overlap computation and communication END // WAIT for non-blocking message complete before continue #if !BLOCKING tick = MPI_Wtime(); MPI_Waitall(current_request, request, MPI_STATUSES_IGNORE); derivative_time += MPI_Wtime() - tick; integral_time += MPI_Wtime() - tick; #endif // compute derivative of boundary points, runtime is not counted because it's quite small dyc[0] = (yc[1] - left_bound_yc)/(2.0 * dx); dyc[points_per_node-1] = (right_bound_yc - yc[points_per_node-2])/(2.0 * dx); // compute integral at right boundary point, runtime is not counted because it's quite small if(procid < num_procs-1) local_intg += 0.5 * (yc[points_per_node-1] + right_bound_yc) * dx; tick = MPI_Wtime(); //compute the error, average error of the derivatives for(i = 0; i < points_per_node; ++i) { if(dfn(xc[i]) == 0) { printf("WARNING: derivative at point %d on process %d is zero.\n", i, procid); derr[i] = 0; } else derr[i] = fabs((dyc[i] - dfn(xc[i]))/dfn(xc[i])); } //find the local average error dlocal_sum_err = 0.0; for(i = 0; i < points_per_node; ++i) { dlocal_sum_err += derr[i]; } //calculate and output errors #if SINGLE_CALL_REDUCTION if(procid == 0) printf("Using single call reduction! \n"); //all nodes collect sum err and convert it to the mean value MPI_Allreduce(&dlocal_sum_err, &davg_err, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); davg_err /= (FP_PREC)NGRID; // each process calculates global average #else if(procid == 0) printf("Using manual call reduction! \n"); //all nodes collect sum err and convert it to the mean value if(procid != 0) MPI_Send(&dlocal_sum_err, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD); else if(procid == 0) { davg_err = dlocal_sum_err; for(i = 1; i < num_procs; ++i) { MPI_Recv(&dlocal_sum_err, 1, MPI_DOUBLE, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status); davg_err += dlocal_sum_err; } davg_err /= (FP_PREC)NGRID; } MPI_Bcast(&davg_err, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); #endif //now all nodes have davg_err, find sum squared differences of local derr dlocal_std_dev = 0.0; for(i = 0; i < points_per_node; ++i) { dlocal_std_dev += pow(derr[i] - davg_err, 2); } err_time += MPI_Wtime() - tick; #if SINGLE_CALL_REDUCTION //reduce local integral & local (sum squared differences of derr) to root tick = MPI_Wtime(); MPI_Reduce(&dlocal_std_dev, &dstd_dev, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); err_time += MPI_Wtime() - tick; tick = MPI_Wtime(); MPI_Reduce(&local_intg, &intg, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); integral_time += MPI_Wtime() - tick; #else //reduce local integral & local (sum squared differences of derr) to root if(procid != 0) { tick = MPI_Wtime(); MPI_Send(&dlocal_std_dev, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD); err_time += MPI_Wtime() - tick; tick = MPI_Wtime(); MPI_Send(&local_intg, 1, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD); integral_time += MPI_Wtime() - tick; } else if(procid == 0) { dstd_dev = dlocal_std_dev; intg = local_intg; tick = MPI_Wtime(); for(i = 1; i < num_procs; ++i) { MPI_Recv(&dlocal_std_dev, 1, MPI_DOUBLE, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status); dstd_dev += dlocal_std_dev; } err_time += MPI_Wtime() - tick; tick = MPI_Wtime(); for(i = 1; i < num_procs; ++i) { MPI_Recv(&local_intg, 1, MPI_DOUBLE, MPI_ANY_SOURCE, 1, MPI_COMM_WORLD, &status); intg+= local_intg; } integral_time += MPI_Wtime() - tick; } #endif // print out the max runtime for each calculation double max_derivative_time, max_integral_time, max_err_time; MPI_Reduce(&derivative_time, &max_derivative_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); MPI_Reduce(&integral_time, &max_integral_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); MPI_Reduce(&err_time, &max_err_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); if(procid == 0) { printf("Max runtime to calculate derivatives is %e\n", max_derivative_time); printf("Max runtime to calculate integral is %e\n", max_integral_time); printf("Max runtime to calculate derivative errors is %e\n", max_err_time); } //gather derivative results & errors for output //this part shouldn't be included in running time measurements FP_PREC *final_dyc = NULL; FP_PREC *final_derr = NULL; if(procid == 0) { final_dyc = (FP_PREC*)malloc(NGRID * sizeof(FP_PREC)); final_derr = (FP_PREC*)malloc(NGRID * sizeof(FP_PREC)); } MPI_Gather(dyc, points_per_node, MPI_DOUBLE, final_dyc, points_per_node, MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Gather(derr, points_per_node, MPI_DOUBLE, final_derr, points_per_node, MPI_DOUBLE, 0, MPI_COMM_WORLD); //final output at root node (rank 0) if(procid == 0) { dstd_dev = sqrt(dstd_dev/(FP_PREC)NGRID); if(ifn(XI, XF) == 0) { printf("WARNING: true integral value from XI to XF is equal zero.\n"); intg_err = 0; } else { intg_err = fabs((ifn(XI, XF) - intg)/ifn(XI, XF)); } print_function_data(NGRID, dx, final_dyc); print_error_data(NGRID, davg_err, dstd_dev, intg_err, dx, final_derr); free(final_dyc); free(final_derr); } MPI_Finalize(); return 0; }
int main (int argc, char *argv[]) { int numproc, rank, len,i; char hostname[MPI_MAX_PROCESSOR_NAME]; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &numproc); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Get_processor_name(hostname, &len); FP_PREC *yc, *dyc, *derr, *fullerr; FP_PREC *xc, dx, intg, davg_err, dstd_dev, intg_err; FP_PREC globalSum = 0.0; // MPI vailables MPI_Request *requestList,request; MPI_Status *status; //"real" grid indices int imin, imax; imin = 1 + (rank * (NGRID/numproc)); if(rank == numproc - 1) imax = NGRID; else imax = (rank+1) * (NGRID/numproc); int range = imax - imin + 1; xc = (FP_PREC*) malloc((range + 2) * sizeof(FP_PREC)); yc = (FP_PREC*) malloc((range + 2) * sizeof(FP_PREC)); dyc = (FP_PREC*) malloc((range + 2) * sizeof(FP_PREC)); dx = (XF - XI)/(double)NGRID; for (i = 1; i <= range ; i++) { //xc[i] = imin + (XF - XI) * (FP_PREC)(i - 1)/(FP_PREC)(NGRID - 1); xc[i] = XI + dx * (imin + i - 2); } xc[0] = xc[1] - dx; xc[range + 1] = xc[range] + dx; for( i = 1; i <= range; i++ ) { yc[i] = fn(xc[i]); } yc[0] = fn(xc[0]); yc[range + 1] = fn(xc[range + 1]); for (i = 1; i <= range; i++) { dyc[i] = (yc[i + 1] - yc[i - 1])/(2.0 * dx); } intg = 0.0; for (i = 1; i <= range; i++) { intg += 0.5 * (xc[i + 1] - xc[i]) * (yc[i + 1] + yc[i]); } MPI_Reduce(&intg, &globalSum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); //compute the error, average error of the derivatives derr = (FP_PREC*)malloc((range + 2) * sizeof(FP_PREC)); //compute the errors for(i = 1; i <= range; i++) { derr[i] = fabs((dyc[i] - dfn(xc[i]))/dfn(xc[i])); } derr[0] = derr[range + 1] = 0.0; if(rank == 0) { fullerr = (FP_PREC *)malloc(sizeof(FP_PREC)*NGRID); requestList =(MPI_Request*)malloc((numproc-1)*sizeof(MPI_Request)); for(i = 0;i<range;i++) { fullerr[i] = derr[i+1]; } for(i = 1; i<numproc; i++) { int rmin, rmax, *indx; rmin = 1 + (i * (NGRID/numproc)); if(i == numproc - 1) rmax = NGRID; else rmax = (i+1) * (NGRID/numproc); MPI_Irecv(fullerr+rmin-1, rmax-rmin+1, MPI_DOUBLE, i, 1, MPI_COMM_WORLD, &(requestList[i-1])); } double sum = 0.0; for(i=0; i<NGRID; i++) { sum+=fullerr[i]; } davg_err = sum/(FP_PREC)NGRID; dstd_dev = 0.0; for(i = 0; i< NGRID; i++) { dstd_dev += pow(derr[i] - davg_err, 2); } dstd_dev = sqrt(dstd_dev/(FP_PREC)NGRID); intg_err = fabs((ifn(XI, XF) - globalSum)/ifn(XI, XF)); printf("%0.4e: %0.4e: %0.4e\n", davg_err, dstd_dev, intg_err); } else { MPI_Isend(derr+1, imax-imin+1, MPI_DOUBLE, 0, rank, MPI_COMM_WORLD, &request); fflush(stdout); } MPI_Finalize(); }
int main(int argc, char *argv[]) { int taskId, totaltasks, i, j; int chunk; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &taskId); MPI_Comm_size(MPI_COMM_WORLD, &totaltasks); chunk = NGRID / totaltasks; FP_PREC xc[chunk + 2]; FP_PREC yc[chunk + 2]; FP_PREC dyc[chunk + 2]; FP_PREC derr[chunk + 2]; FP_PREC intg; FP_PREC dx; int prev_task = (taskId - 1) < 0 ? totaltasks - 1 : taskId - 1; int next_task = (taskId + 1) % totaltasks; MPI_Request reqs[4]; MPI_Status stats[4]; MPI_Irecv(&yc[0], 1, MPI_DOUBLE, prev_task, prev_task * 1000 + taskId, MPI_COMM_WORLD, &reqs[0]); MPI_Irecv(&yc[chunk + 1], 1, MPI_DOUBLE, next_task, next_task * 1000 + taskId, MPI_COMM_WORLD, &reqs[1]); for (i = 1; i <= chunk + 1; i++) { xc[i] = (XI + (XF - XI) * (FP_PREC) (i - 1) / (FP_PREC) (NGRID - 1)) + taskId * chunk; } //define the function for (i = 1; i <= chunk; i++) { yc[i] = fn(xc[i]); } MPI_Isend(&yc[chunk], 1, MPI_DOUBLE, next_task, taskId * 1000 + next_task, MPI_COMM_WORLD, &reqs[3]); MPI_Isend(&yc[1], 1, MPI_DOUBLE, prev_task, taskId * 1000 + prev_task, MPI_COMM_WORLD, &reqs[2]); MPI_Waitall(4, reqs, stats); dx = xc[2] - xc[1]; if (taskId == ROOT) { xc[0] = xc[1] - dx; yc[0] = fn(xc[0]); } if (taskId == totaltasks - 1) { xc[chunk + 1] = xc[chunk] + dx; yc[chunk + 1] = fn(xc[chunk + 1]); } //compute the derivative using first-order finite differencing for (i = 1; i <= chunk; i++) { dyc[i] = (yc[i + 1] - yc[i - 1]) / (2.0 * dx); } //compute the integral using Trapazoidal rule intg = 0.0; for (i = 1; i <= chunk; i++) { if (taskId == totaltasks - 1 && i == chunk) continue; intg += 0.5 * (xc[i + 1] - xc[i]) * (yc[i + 1] + yc[i]); } //compute the errors for (i = 1; i <= chunk; i++) { if (i - 1 != chunk - 1) derr[i] = fabs((dyc[i] - dfn(xc[i])) / dfn(xc[i])); } if (taskId != ROOT) { MPI_Request nreqs[2]; MPI_Status nstats[2]; MPI_Isend(derr + 1, chunk, MPI_DOUBLE, ROOT, taskId * 1000 + ROOT, MPI_COMM_WORLD, &nreqs[0]); MPI_Isend(&intg, 1, MPI_DOUBLE, ROOT, taskId * 1000 + ROOT, MPI_COMM_WORLD, &nreqs[1]); MPI_Waitall(2, nreqs, nstats); } else { FP_PREC allxc[NGRID]; FP_PREC allderr[NGRID]; FP_PREC allintg[totaltasks]; FP_PREC davg_err = 0.0; FP_PREC dstd_dev = 0.0; FP_PREC intg_err = 0.0; MPI_Request nreqs[2 * (totaltasks - 1)]; MPI_Status nstats[2 * (totaltasks - 1)]; for (i = 1; i < totaltasks; i++) { MPI_Irecv(allderr + (i * chunk), chunk, MPI_DOUBLE, i, i * 1000 + ROOT, MPI_COMM_WORLD, &nreqs[2 * (i - 1)]); MPI_Irecv(allintg + i, 1, MPI_DOUBLE, i, i * 1000 + ROOT, MPI_COMM_WORLD, &nreqs[2 * (i - 1) + 1]); } for (i = 0; i < chunk; i++) { allderr[i] = derr[i + 1]; } MPI_Waitall(2 * (totaltasks - 1), nreqs, nstats); //find the average error for (i = 0; i < NGRID; i++) davg_err += allderr[i]; for (i = 1; i < totaltasks; i++) { intg += allintg[i]; } davg_err /= (FP_PREC) NGRID; dstd_dev = 0.0; for (i = 0; i < NGRID; i++) { dstd_dev += pow(allderr[i] - davg_err, 2); } dstd_dev = sqrt(dstd_dev / (FP_PREC) NGRID); intg_err = fabs((ifn(XI, XF) - intg) / ifn(XI, XF)); for (i = 0; i < NGRID; i++) { allxc[i] = XI + (XF - XI) * (FP_PREC) i / (FP_PREC) (NGRID - 1); } //print_error_data(NGRID, davg_err, dstd_dev, &xc[1], derr, intg_err); print_error_data(NGRID, davg_err, dstd_dev, allxc, allderr, intg_err); } MPI_Finalize(); }
DValue *DtoNewClass(Loc &loc, TypeClass *tc, NewExp *newexp) { // resolve type DtoResolveClass(tc->sym); // allocate LLValue *mem; bool doInit = true; if (newexp->onstack) { unsigned alignment = tc->sym->alignsize; if (alignment == STRUCTALIGN_DEFAULT) alignment = 0; mem = DtoRawAlloca(DtoType(tc)->getContainedType(0), alignment, ".newclass_alloca"); } // custom allocator else if (newexp->allocator) { DtoResolveFunction(newexp->allocator); DFuncValue dfn(newexp->allocator, DtoCallee(newexp->allocator)); DValue *res = DtoCallFunction(newexp->loc, nullptr, &dfn, newexp->newargs); mem = DtoBitCast(DtoRVal(res), DtoType(tc), ".newclass_custom"); } // default allocator else { const bool useEHAlloc = global.params.ehnogc && newexp->thrownew; llvm::Function *fn = getRuntimeFunction( loc, gIR->module, useEHAlloc ? "_d_newThrowable" : "_d_allocclass"); LLConstant *ci = DtoBitCast(getIrAggr(tc->sym)->getClassInfoSymbol(), DtoType(getClassInfoType())); mem = gIR->CreateCallOrInvoke(fn, ci, useEHAlloc ? ".newthrowable_alloc" : ".newclass_gc_alloc") .getInstruction(); mem = DtoBitCast(mem, DtoType(tc), useEHAlloc ? ".newthrowable" : ".newclass_gc"); doInit = !useEHAlloc; } // init if (doInit) DtoInitClass(tc, mem); // init inner-class outer reference if (newexp->thisexp) { Logger::println("Resolving outer class"); LOG_SCOPE; unsigned idx = getFieldGEPIndex(tc->sym, tc->sym->vthis); LLValue *src = DtoRVal(newexp->thisexp); LLValue *dst = DtoGEPi(mem, 0, idx); IF_LOG Logger::cout() << "dst: " << *dst << "\nsrc: " << *src << '\n'; DtoStore(src, DtoBitCast(dst, getPtrToType(src->getType()))); } // set the context for nested classes else if (tc->sym->isNested() && tc->sym->vthis) { DtoResolveNestedContext(loc, tc->sym, mem); } // call constructor if (newexp->member) { // evaluate argprefix if (newexp->argprefix) { toElemDtor(newexp->argprefix); } Logger::println("Calling constructor"); assert(newexp->arguments != NULL); DtoResolveFunction(newexp->member); DFuncValue dfn(newexp->member, DtoCallee(newexp->member), mem); // ignore ctor return value (C++ ctors on Posix may not return `this`) DtoCallFunction(newexp->loc, tc, &dfn, newexp->arguments); return new DImValue(tc, mem); } assert(newexp->argprefix == NULL); // return default constructed class return new DImValue(tc, mem); }
long double rtsafe(long double (fn)(long double, long double, long double, long double, int), long double (dfn)(long double, long double, long double, long double, int), long double z1, long double z2, long double z3, int z4, long double x1, long double x2, long double xacc) /* uses the Newton-Raphson method to find the root of a function known to lie in the interval [x1, x2] */ /* method is applied until accuracy is within +/- xacc */ /* funcd() is a user supplied function that returns both the function value and it's derivative at the point x */ { void nrerror(char error_text[]); int j; long double df,dx,dxold,f,fh,fl; long double nuisance; long double temp,xh,xl,rts; fl = fn(x1,z1,z2,z3,z4); fh = fn(x2,z1,z2,z3,z4); if ((fl > 0.0 && fh > 0.0) || (fl < 0.0 && fh < 0.0)) nrerror("Root must be bracketed in rtsafe"); if (fl == 0.0) return x1; if (fh == 0.0) return x2; if (fl < 0.0) { xl=x1; xh=x2; } else { xh=x1; xl=x2; } rts=0.5*(x1+x2); dxold=fabs(x2-x1); dx=dxold; f = fn(rts,z1,z2,z3,z4); df = dfn(rts,z1,z2,z3,z4); for (j=1;j<=JMAX;j++) { if ((((rts-xh)*df-f)*((rts-xl)*df-f) >= 0.0) || (fabs(2.0*f)>fabs(dxold*df))) { dxold=dx; dx=0.5*(xh-xl); rts=xl+dx; /* printf("\n option 1, rts=%Lf",rts); */ if (xl==rts) return rts; } else { dxold=dx; dx=f/df; temp=rts; rts -= dx; /* printf("\n option 2, rts=%Lf", rts); */ if (temp==rts) return rts; } if (fabs(dx) < xacc) return rts; f = fn(rts,z1,z2,z3,z4); df = dfn(rts,z1,z2,z3,z4); if (f < 0.0) xl=rts; else xh=rts; /* printf("\n rts = %Lf, xl = %Lf, xh = %Lf",rts,xl,xh); */ } nrerror("Maximum number of iterations exceeded in rtsafe"); return 0.0; }