Beispiel #1
0
static char solve2pt(int ind,int inda,int indb,float ha,float hb,char ch) 
{
    int kmin /* ,i,k  ,i1,k1 */;
  float x1,x2;
  float xa,ta,xb,tb,x,t,tmin;
  float d,dp,dp2,df;
  char /* ch1='y', */ ch2='n';
  int ntry=0;

  xa=*(x0+inda);
  ta=*(t0+inda);
  xb=*(x0+indb);
  tb=*(t0+indb);
  tmin=SF_MAX(ta,tb)-TOL;
  kmin=floor(tmin/ht);
  x1=SF_MIN(xa,xb)-TOL;
  x2=SF_MAX(xa,xb)+TOL;
  t=*(t0+ind);
  if( kmin<nt1 ) {
    while( ch2=='n' && ntry<NTRYMAX ) {
      x=lateral(t,xa,xb,ta,tb,ha,hb);
      /*i=SF_MAX(0,SF_MIN(nx1-1,floor(x/hx)));
	k=SF_MAX(0,SF_MIN(nt1-1,floor(t/ht))); */
      d=fn(t,ta,tb,xa,xb,ha,hb);
      dp=2.0*d;
      dp2=dp;
      while( fabs(d)>TOL && fabs(dp2)>fabs(d) ) {
        df=dfn(t,ta,tb,xa,xb,ha,hb);
        t-=d/df;
        dp2=dp;
        dp=d;
        d=fn(t,ta,tb,xa,xb,ha,hb);
      } 
      x=lateral(t,xa,xb,ta,tb,ha,hb);
/*      i1=SF_MAX(0,SF_MIN(nx1-1,floor(x/hx)));
	k1=SF_MAX(0,SF_MIN(nt1-1,floor(t/ht))); */
      if( x>=x1 && x<=x2 && t>=tmin ) ch2='y';
      ntry++;
    }
    if( fabs(d)<=TOL  && t>=tmin  ) {
      if( x>=x1 && x<=x2 ) {
/*	ch1='n'; */
	if( *(pup+ind)<=1 || (*(pup+ind)==2 && t<(*(t0+ind))) ) {
	  ch='s';
	  *(t0+ind)=t;
	  *(x0+ind)=x;
	  *(pup+ind)=2;
	  *(v+ind)=1.0/linterp2(x,t);
	}
	ch=(ch=='s') ? 's' : 'n';
      }
    }
  }
  else
    ch=(ch=='s') ? 's' : 'f';
  return ch;
}
Beispiel #2
0
/*
 *	given this parent, depth first number its children.
 */
void
dfn(nltype *parentp)
{
	arctype	*arcp;

#ifdef DEBUG
	if (debug & DFNDEBUG) {
		(void) printf("[dfn] dfn(");
		printname(parentp);
		(void) printf(")\n");
	}
#endif /* DEBUG */

	if (!dfn_stack) {
		dfn_sz = DFN_DEPTH;
		dfn_stack = (dfntype *) malloc(dfn_sz * sizeof (dfntype));
		if (!dfn_stack) {
			(void) fprintf(stderr,
			    "fatal: can't malloc %d objects\n", dfn_sz);
			exit(1);
		}
	}

	/*
	 *	if we're already numbered, no need to look any furthur.
	 */
	if (dfn_numbered(parentp))
		return;

	/*
	 *	if we're already busy, must be a cycle
	 */
	if (dfn_busy(parentp)) {
		dfn_findcycle(parentp);
		return;
	}

	/*
	 *	visit yourself before your children
	 */
	dfn_pre_visit(parentp);

	/*
	 *	visit children
	 */
	for (arcp = parentp->children; arcp; arcp = arcp->arc_childlist)
		dfn(arcp->arc_childp);

	/*
	 *	visit yourself after your children
	 */
	dfn_post_visit(parentp);
}
Beispiel #3
0
    /*
     *	given this parent, depth first number its children.
     */
void
dfn(nltype *parentp)
{
    arctype	*arcp;

#   ifdef DEBUG
	if ( debug & DFNDEBUG ) {
	    printf( "[dfn] dfn(" );
	    printname( parentp );
	    printf( ")\n" );
	}
#   endif /* DEBUG */
	/*
	 *	if we're already numbered, no need to look any further.
	 */
    if ( dfn_numbered( parentp ) ) {
	return;
    }
	/*
	 *	if we're already busy, must be a cycle
	 */
    if ( dfn_busy( parentp ) ) {
	dfn_findcycle( parentp );
	return;
    }
	/*
	 *	visit yourself before your children
	 */
    dfn_pre_visit( parentp );
	/*
	 *	visit children
	 */
    for ( arcp = parentp -> children ; arcp ; arcp = arcp -> arc_childlist ) {
	    if ( arcp -> arc_flags & DEADARC )
		continue;
	    dfn( arcp -> arc_childp );
    }
	/*
	 *	visit yourself after your children
	 */
    dfn_post_visit( parentp );
}
Beispiel #4
0
void get_scc(graph_t * g)
{
    if (!g || !g->vex_count()) {
        return;
    }

    int C = g->vex_count();
    list_t * L = g->get_structure();
    _processed.assign(C, 0);
    _visited.assign(C, 0);

    std::stack<int> stk;
    std::vector<int> in_stack(C, 0), low(C, 0), dfn(C, 0);

    for (int i = C - 1; i >= 0; --i) {
        if (!_visited[i]) {
            _dfs(g, L, i, low, dfn, stk, in_stack);
        }
    }
}
Beispiel #5
0
DValue* DtoNewClass(Loc& loc, TypeClass* tc, NewExp* newexp)
{
    // resolve type
    DtoResolveClass(tc->sym);

    // allocate
    LLValue* mem;
    if (newexp->onstack)
    {
        // FIXME align scope class to its largest member
        mem = DtoRawAlloca(DtoType(tc)->getContainedType(0), 0, ".newclass_alloca");
    }
    // custom allocator
    else if (newexp->allocator)
    {
        DtoResolveFunction(newexp->allocator);
        DFuncValue dfn(newexp->allocator, getIrFunc(newexp->allocator)->func);
        DValue* res = DtoCallFunction(newexp->loc, NULL, &dfn, newexp->newargs);
        mem = DtoBitCast(res->getRVal(), DtoType(tc), ".newclass_custom");
    }
    // default allocator
    else
    {
        llvm::Function* fn = LLVM_D_GetRuntimeFunction(loc, gIR->module, "_d_newclass");
        LLConstant* ci = DtoBitCast(getIrAggr(tc->sym)->getClassInfoSymbol(), DtoType(Type::typeinfoclass->type));
        mem = gIR->CreateCallOrInvoke(fn, ci, ".newclass_gc_alloc").getInstruction();
        mem = DtoBitCast(mem, DtoType(tc), ".newclass_gc");
    }

    // init
    DtoInitClass(tc, mem);

    // init inner-class outer reference
    if (newexp->thisexp)
    {
        Logger::println("Resolving outer class");
        LOG_SCOPE;
        DValue* thisval = toElem(newexp->thisexp);
        unsigned idx = getFieldGEPIndex(tc->sym, tc->sym->vthis);
        LLValue* src = thisval->getRVal();
        LLValue* dst = DtoGEPi(mem, 0, idx);
        IF_LOG Logger::cout() << "dst: " << *dst << "\nsrc: " << *src << '\n';
        DtoStore(src, DtoBitCast(dst, getPtrToType(src->getType())));
    }
    // set the context for nested classes
    else if (tc->sym->isNested() && tc->sym->vthis)
    {
        DtoResolveNestedContext(loc, tc->sym, mem);
    }

    // call constructor
    if (newexp->member)
    {
        Logger::println("Calling constructor");
        assert(newexp->arguments != NULL);
        DtoResolveFunction(newexp->member);
        DFuncValue dfn(newexp->member, getIrFunc(newexp->member)->func, mem);
        return DtoCallFunction(newexp->loc, tc, &dfn, newexp->arguments);
    }

    // return default constructed class
    return new DImValue(tc, mem);
}
Beispiel #6
0
 ArrayVector backward(const ArrayVector &input,
                      const ArrayVector &gradOutput)
 {
     return { gradOutput[0] * dfn(input[0]) };
 }
Beispiel #7
0
int main (int argc, char *argv[])
{
  int procid, num_procs;
  MPI_Status status;
  // derivative_time, integral_time, err_time is the local sum of runtime for each computation
  // tick is used to mark time
  double derivative_time = 0, integral_time = 0, err_time = 0, tick;

  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &procid);
  MPI_Comm_size(MPI_COMM_WORLD, &num_procs);

  // Calculate grid-points per process
  if(NGRID % num_procs > 0)
  {
	if(procid == 0) printf("NGRID should be divisible by the number of processes!");
	MPI_Finalize();
	return 1;
  }
  int points_per_node = NGRID / num_procs;

  //loop index
  int i;

  //domain array and step size
  FP_PREC xc[points_per_node], dx;

  //function array and derivative
  //the size will be dependent on the
  //number of processors used
  //to the program
  FP_PREC yc[points_per_node], dyc[points_per_node];
  
  //integration values
  FP_PREC local_intg, intg;

  //error analysis array
  FP_PREC derr[points_per_node];

  //error analysis values
  FP_PREC dlocal_sum_err, davg_err, dlocal_std_dev, dstd_dev, intg_err;

  //calculate dx
  dx = (FP_PREC)(XF - XI)/(FP_PREC)(NGRID - 1);

  // get start X for each process (my_XI)
  int bins_before_me = procid * points_per_node;
  FP_PREC my_XI = XI + bins_before_me * dx;

  //construct grid
  for (i = 0; i < points_per_node; ++i)
  {
    xc[i] = my_XI + i * dx;
  }

  //define the function
  for(i = 0; i < points_per_node; ++i)
  {
    yc[i] = fn(xc[i]);
  }

  //define holders for left and right bound value
  FP_PREC left_bound_yc, right_bound_yc;
  if(procid == 0) left_bound_yc = fn(XI-dx);
  if(procid == num_procs - 1) right_bound_yc = fn(XF+dx);

  tick = MPI_Wtime();
#if BLOCKING
  if(procid == 0) printf("Using blocking message! \n");
  //Step 1: even nodes send to the right then receive back
  //Step 2: even nodes receive from the left then send back
  if(procid % 2 == 0)
  {
    if(procid < num_procs - 1)
    {
	MPI_Send(&yc[points_per_node-1], 1, MPI_DOUBLE, procid+1, 0, MPI_COMM_WORLD);
	MPI_Recv(&right_bound_yc, 1, MPI_DOUBLE, procid+1, 0, MPI_COMM_WORLD, &status);
    }
    if(procid > 0)
    {
	MPI_Recv(&left_bound_yc, 1, MPI_DOUBLE, procid-1, 0, MPI_COMM_WORLD, &status);
	MPI_Send(&yc[0], 1, MPI_DOUBLE, procid-1, 0, MPI_COMM_WORLD);
    }
  } else
  {
    MPI_Recv(&left_bound_yc, 1, MPI_DOUBLE, procid-1, 0, MPI_COMM_WORLD, &status);
    MPI_Send(&yc[0], 1, MPI_DOUBLE, procid-1, 0, MPI_COMM_WORLD);
    if(procid < num_procs - 1)
    {
    	MPI_Send(&yc[points_per_node-1], 1, MPI_DOUBLE, procid+1, 0, MPI_COMM_WORLD);
    	MPI_Recv(&right_bound_yc, 1, MPI_DOUBLE, procid+1, 0, MPI_COMM_WORLD, &status);
    }
  }
#else
  if(procid == 0) printf("Using non-blocking message! \n");
  MPI_Request request[4];
  int current_request = 0;
  if(procid < num_procs - 1)
  { // receive right bound yc
      MPI_Irecv(&right_bound_yc, 1, MPI_DOUBLE, procid+1, 0, MPI_COMM_WORLD, &request[current_request]);
      ++current_request;
  }
  if(procid > 0)
  { // receive left bound yc
      MPI_Irecv(&left_bound_yc, 1, MPI_DOUBLE, procid-1, 0, MPI_COMM_WORLD, &request[current_request]);
      ++current_request;
  }
  if(procid < num_procs - 1)
  { // send right bound yc to right node
      MPI_Isend(&yc[points_per_node-1], 1, MPI_DOUBLE, procid+1, 0, MPI_COMM_WORLD, &request[current_request]);
      ++current_request;
  }
  if(procid > 0)
  { // send left bound yc to left node
      MPI_Isend(&yc[0], 1, MPI_DOUBLE, procid-1, 0, MPI_COMM_WORLD, &request[current_request]);
      ++current_request;
  }
#endif
  derivative_time += MPI_Wtime() - tick;
  integral_time += MPI_Wtime() - tick;

  // Overlap computation and communication BEGIN
  //compute the derivative using first-order finite differencing
  tick = MPI_Wtime();
  for (i = 1; i < points_per_node-1; ++i)
  {
    dyc[i] = (yc[i + 1] - yc[i - 1])/(2.0 * dx);
  }
  derivative_time += MPI_Wtime() - tick;

  //compute the integral using Trapazoidal rule
  tick = MPI_Wtime();
  local_intg = 0.0;
  for (i = 0; i < points_per_node-1; ++i)
  {
    local_intg += 0.5 * (yc[i] + yc[i + 1]) * dx;
  }
  integral_time += MPI_Wtime() - tick;
  // Overlap computation and communication END

  // WAIT for non-blocking message complete before continue
#if !BLOCKING
  tick = MPI_Wtime();
  MPI_Waitall(current_request, request, MPI_STATUSES_IGNORE);
  derivative_time += MPI_Wtime() - tick;
  integral_time += MPI_Wtime() - tick;
#endif

  // compute derivative of boundary points, runtime is not counted because it's quite small
  dyc[0] = (yc[1] - left_bound_yc)/(2.0 * dx);
  dyc[points_per_node-1] = (right_bound_yc - yc[points_per_node-2])/(2.0 * dx);

  // compute integral at right boundary point, runtime is not counted because it's quite small
  if(procid < num_procs-1) local_intg += 0.5 * (yc[points_per_node-1] + right_bound_yc) * dx;

  tick = MPI_Wtime();
  //compute the error, average error of the derivatives
  for(i = 0; i < points_per_node; ++i)
  {
    if(dfn(xc[i]) == 0)
    {
      printf("WARNING: derivative at point %d on process %d is zero.\n", i, procid);
      derr[i] = 0;
    }
    else derr[i] = fabs((dyc[i] - dfn(xc[i]))/dfn(xc[i]));
  }

  //find the local average error
  dlocal_sum_err = 0.0;
  for(i = 0; i < points_per_node; ++i)
  {
    dlocal_sum_err += derr[i];
  }

  //calculate and output errors
#if SINGLE_CALL_REDUCTION
  if(procid == 0) printf("Using single call reduction! \n");
  //all nodes collect sum err and convert it to the mean value
  MPI_Allreduce(&dlocal_sum_err, &davg_err, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  davg_err /= (FP_PREC)NGRID; // each process calculates global average
#else
  if(procid == 0) printf("Using manual call reduction! \n");
  //all nodes collect sum err and convert it to the mean value
  if(procid != 0) MPI_Send(&dlocal_sum_err, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
  else if(procid == 0)
  {
    davg_err = dlocal_sum_err;
    for(i = 1; i < num_procs; ++i)
    {
      MPI_Recv(&dlocal_sum_err, 1, MPI_DOUBLE, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
      davg_err += dlocal_sum_err;
    }
    davg_err /= (FP_PREC)NGRID;
  }
  MPI_Bcast(&davg_err, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
#endif

  //now all nodes have davg_err, find sum squared differences of local derr
  dlocal_std_dev = 0.0;
  for(i = 0; i < points_per_node; ++i)
  {
    dlocal_std_dev += pow(derr[i] - davg_err, 2);
  }
  err_time += MPI_Wtime() - tick;

#if SINGLE_CALL_REDUCTION
  //reduce local integral & local (sum squared differences of derr) to root
  tick = MPI_Wtime();
  MPI_Reduce(&dlocal_std_dev, &dstd_dev, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
  err_time += MPI_Wtime() - tick;
  tick = MPI_Wtime();
  MPI_Reduce(&local_intg, &intg, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
  integral_time += MPI_Wtime() - tick;
#else
  //reduce local integral & local (sum squared differences of derr) to root
  if(procid != 0)
  {
    tick = MPI_Wtime();
    MPI_Send(&dlocal_std_dev, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
    err_time += MPI_Wtime() - tick;
    tick = MPI_Wtime();
    MPI_Send(&local_intg, 1, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD);
    integral_time += MPI_Wtime() - tick;
  } else if(procid == 0)
  {
    dstd_dev = dlocal_std_dev;
    intg = local_intg;
    tick = MPI_Wtime();
    for(i = 1; i < num_procs; ++i)
    {
      MPI_Recv(&dlocal_std_dev, 1, MPI_DOUBLE, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
      dstd_dev += dlocal_std_dev;
    }
    err_time += MPI_Wtime() - tick;
    tick = MPI_Wtime();
    for(i = 1; i < num_procs; ++i)
    {
      MPI_Recv(&local_intg, 1, MPI_DOUBLE, MPI_ANY_SOURCE, 1, MPI_COMM_WORLD, &status);
      intg+= local_intg;
    }
    integral_time += MPI_Wtime() - tick;
  }
#endif

  // print out the max runtime for each calculation
  double max_derivative_time, max_integral_time, max_err_time;
  MPI_Reduce(&derivative_time, &max_derivative_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
  MPI_Reduce(&integral_time, &max_integral_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
  MPI_Reduce(&err_time, &max_err_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
  if(procid == 0)
  {
    printf("Max runtime to calculate derivatives is %e\n", max_derivative_time);
    printf("Max runtime to calculate integral is %e\n", max_integral_time);
    printf("Max runtime to calculate derivative errors is %e\n", max_err_time);
  }

  //gather derivative results & errors for output
  //this part shouldn't be included in running time measurements
  FP_PREC *final_dyc = NULL;
  FP_PREC *final_derr = NULL;
  if(procid == 0)
  {
    final_dyc = (FP_PREC*)malloc(NGRID * sizeof(FP_PREC));
    final_derr = (FP_PREC*)malloc(NGRID * sizeof(FP_PREC));
  }
  MPI_Gather(dyc, points_per_node, MPI_DOUBLE, final_dyc, points_per_node, MPI_DOUBLE, 0, MPI_COMM_WORLD);
  MPI_Gather(derr, points_per_node, MPI_DOUBLE, final_derr, points_per_node, MPI_DOUBLE, 0, MPI_COMM_WORLD);

  //final output at root node (rank 0)
  if(procid == 0)
  {
    dstd_dev = sqrt(dstd_dev/(FP_PREC)NGRID);
    if(ifn(XI, XF) == 0) {
      printf("WARNING: true integral value from XI to XF is equal zero.\n");
      intg_err = 0;
    } else {
      intg_err = fabs((ifn(XI, XF) - intg)/ifn(XI, XF));
    }

    print_function_data(NGRID, dx, final_dyc);
    print_error_data(NGRID, davg_err, dstd_dev, intg_err, dx, final_derr);

    free(final_dyc);
    free(final_derr);
  }

  MPI_Finalize();
  return 0;
}
Beispiel #8
0
int main (int argc, char *argv[])
{
    int  numproc, rank, len,i;
    char hostname[MPI_MAX_PROCESSOR_NAME];
    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &numproc);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Get_processor_name(hostname, &len);
    
    FP_PREC *yc, *dyc, *derr, *fullerr;
    FP_PREC *xc, dx, intg, davg_err, dstd_dev, intg_err;
    FP_PREC globalSum = 0.0;

    // MPI vailables 
    MPI_Request *requestList,request;
    MPI_Status  *status;

    //"real" grid indices
    int imin, imax;
    
    imin = 1 + (rank * (NGRID/numproc));
    
    if(rank == numproc - 1)
    imax = NGRID;
    
    else
    imax = (rank+1) * (NGRID/numproc);
       
    int range = imax - imin + 1;
    
    xc =  (FP_PREC*) malloc((range + 2) * sizeof(FP_PREC));
    yc =  (FP_PREC*) malloc((range + 2) * sizeof(FP_PREC));
    dyc =  (FP_PREC*) malloc((range + 2) * sizeof(FP_PREC));
    dx = (XF - XI)/(double)NGRID;
    for (i = 1; i <= range ; i++)
    {
        //xc[i] = imin + (XF - XI) * (FP_PREC)(i - 1)/(FP_PREC)(NGRID - 1);
        xc[i] = XI + dx * (imin + i - 2);
    }
    
    xc[0] = xc[1] - dx;
    xc[range + 1] = xc[range] + dx;
    
    for( i = 1; i <= range; i++ )
    {
        yc[i] = fn(xc[i]);
    }
    
    yc[0] = fn(xc[0]);
    yc[range + 1] = fn(xc[range + 1]);
    
    for (i = 1; i <= range; i++)
    {
        dyc[i] = (yc[i + 1] - yc[i - 1])/(2.0 * dx);
    }
    
    intg = 0.0;
    for (i = 1; i <= range; i++)
    {
        intg += 0.5 * (xc[i + 1] - xc[i]) * (yc[i + 1] + yc[i]);
    }
    
    MPI_Reduce(&intg, &globalSum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
    
    
    //compute the error, average error of the derivatives
    derr = (FP_PREC*)malloc((range + 2) * sizeof(FP_PREC));
    
    //compute the errors
    for(i = 1; i <= range; i++)
    {
        derr[i] = fabs((dyc[i] - dfn(xc[i]))/dfn(xc[i]));
    }
    
    derr[0] = derr[range + 1] = 0.0;
    
    if(rank == 0)
    {
        fullerr = (FP_PREC *)malloc(sizeof(FP_PREC)*NGRID);
        requestList =(MPI_Request*)malloc((numproc-1)*sizeof(MPI_Request));
        for(i = 0;i<range;i++)
        {
            fullerr[i] = derr[i+1];
        }
        for(i = 1; i<numproc; i++)
        {
            int rmin, rmax, *indx;
            rmin = 1 + (i * (NGRID/numproc));
            if(i == numproc - 1)
                rmax = NGRID;
            else
                rmax = (i+1) * (NGRID/numproc);
            MPI_Irecv(fullerr+rmin-1, rmax-rmin+1, MPI_DOUBLE, i, 1, MPI_COMM_WORLD, &(requestList[i-1]));
        }
        double sum = 0.0;
        for(i=0; i<NGRID; i++)
        {
            sum+=fullerr[i];
        }
        davg_err = sum/(FP_PREC)NGRID;
        dstd_dev = 0.0;
        for(i = 0; i< NGRID; i++)
        {
            dstd_dev += pow(derr[i] - davg_err, 2);
        }
        dstd_dev = sqrt(dstd_dev/(FP_PREC)NGRID);
        
        intg_err = fabs((ifn(XI, XF) - globalSum)/ifn(XI, XF));
        printf("%0.4e: %0.4e: %0.4e\n", davg_err, dstd_dev, intg_err);

    }
    else
    {
        MPI_Isend(derr+1, imax-imin+1, MPI_DOUBLE, 0, rank, MPI_COMM_WORLD, &request);
        fflush(stdout);
    }
    
    MPI_Finalize();
}
int main(int argc, char *argv[]) {
	int taskId, totaltasks, i, j;
	int chunk;
	MPI_Init(&argc, &argv);
	MPI_Comm_rank(MPI_COMM_WORLD, &taskId);
	MPI_Comm_size(MPI_COMM_WORLD, &totaltasks);

	chunk = NGRID / totaltasks;

	FP_PREC xc[chunk + 2];
	FP_PREC yc[chunk + 2];
	FP_PREC dyc[chunk + 2];
	FP_PREC derr[chunk + 2];

	FP_PREC intg;
	FP_PREC dx;

	int prev_task = (taskId - 1) < 0 ? totaltasks - 1 : taskId - 1;
	int next_task = (taskId + 1) % totaltasks;

	MPI_Request reqs[4];
	MPI_Status stats[4];

	MPI_Irecv(&yc[0], 1, MPI_DOUBLE, prev_task, prev_task * 1000 + taskId,
	MPI_COMM_WORLD, &reqs[0]);

	MPI_Irecv(&yc[chunk + 1], 1, MPI_DOUBLE, next_task,
			next_task * 1000 + taskId, MPI_COMM_WORLD, &reqs[1]);

	for (i = 1; i <= chunk + 1; i++) {
		xc[i] = (XI + (XF - XI) * (FP_PREC) (i - 1) / (FP_PREC) (NGRID - 1))
				+ taskId * chunk;
	}

	//define the function
	for (i = 1; i <= chunk; i++) {
		yc[i] = fn(xc[i]);
	}

	MPI_Isend(&yc[chunk], 1, MPI_DOUBLE, next_task, taskId * 1000 + next_task,
			MPI_COMM_WORLD, &reqs[3]);

	MPI_Isend(&yc[1], 1, MPI_DOUBLE, prev_task, taskId * 1000 + prev_task,
	MPI_COMM_WORLD, &reqs[2]);

	MPI_Waitall(4, reqs, stats);
	dx = xc[2] - xc[1];
	if (taskId == ROOT) {
		xc[0] = xc[1] - dx;
		yc[0] = fn(xc[0]);
	}

	if (taskId == totaltasks - 1) {
		xc[chunk + 1] = xc[chunk] + dx;
		yc[chunk + 1] = fn(xc[chunk + 1]);
	}

	//compute the derivative using first-order finite differencing
	for (i = 1; i <= chunk; i++) {
		dyc[i] = (yc[i + 1] - yc[i - 1]) / (2.0 * dx);
	}

	//compute the integral using Trapazoidal rule
	intg = 0.0;
	for (i = 1; i <= chunk; i++) {
		if (taskId == totaltasks - 1 && i == chunk)
			continue;
		intg += 0.5 * (xc[i + 1] - xc[i]) * (yc[i + 1] + yc[i]);
	}

	//compute the errors
	for (i = 1; i <= chunk; i++) {
		if (i - 1 != chunk - 1)
			derr[i] = fabs((dyc[i] - dfn(xc[i])) / dfn(xc[i]));
	}

	if (taskId != ROOT) {

		MPI_Request nreqs[2];
		MPI_Status nstats[2];

		MPI_Isend(derr + 1, chunk, MPI_DOUBLE, ROOT, taskId * 1000 + ROOT,
		MPI_COMM_WORLD, &nreqs[0]);
		MPI_Isend(&intg, 1, MPI_DOUBLE, ROOT, taskId * 1000 + ROOT,
		MPI_COMM_WORLD, &nreqs[1]);
		MPI_Waitall(2, nreqs, nstats);
	}

	else {

		FP_PREC allxc[NGRID];
		FP_PREC allderr[NGRID];
		FP_PREC allintg[totaltasks];
		FP_PREC davg_err = 0.0;
		FP_PREC dstd_dev = 0.0;
		FP_PREC intg_err = 0.0;
		MPI_Request nreqs[2 * (totaltasks - 1)];
		MPI_Status nstats[2 * (totaltasks - 1)];

		for (i = 1; i < totaltasks; i++) {
			MPI_Irecv(allderr + (i * chunk), chunk,
			MPI_DOUBLE, i, i * 1000 + ROOT, MPI_COMM_WORLD,
					&nreqs[2 * (i - 1)]);

			MPI_Irecv(allintg + i, 1, MPI_DOUBLE, i, i * 1000 + ROOT,
			MPI_COMM_WORLD, &nreqs[2 * (i - 1) + 1]);
		}

		for (i = 0; i < chunk; i++) {
			allderr[i] = derr[i + 1];
		}

		MPI_Waitall(2 * (totaltasks - 1), nreqs, nstats);

		//find the average error
		for (i = 0; i < NGRID; i++)
			davg_err += allderr[i];

		for (i = 1; i < totaltasks; i++) {
			intg += allintg[i];
		}

		davg_err /= (FP_PREC) NGRID;

		dstd_dev = 0.0;
		for (i = 0; i < NGRID; i++) {
			dstd_dev += pow(allderr[i] - davg_err, 2);
		}
		dstd_dev = sqrt(dstd_dev / (FP_PREC) NGRID);

		intg_err = fabs((ifn(XI, XF) - intg) / ifn(XI, XF));

		for (i = 0; i < NGRID; i++) {
			allxc[i] = XI + (XF - XI) * (FP_PREC) i / (FP_PREC) (NGRID - 1);
		}

		//print_error_data(NGRID, davg_err, dstd_dev, &xc[1], derr, intg_err);
		print_error_data(NGRID, davg_err, dstd_dev, allxc, allderr, intg_err);
	}

	MPI_Finalize();
}
Beispiel #10
0
DValue *DtoNewClass(Loc &loc, TypeClass *tc, NewExp *newexp) {
  // resolve type
  DtoResolveClass(tc->sym);

  // allocate
  LLValue *mem;
  bool doInit = true;
  if (newexp->onstack) {
    unsigned alignment = tc->sym->alignsize;
    if (alignment == STRUCTALIGN_DEFAULT)
      alignment = 0;
    mem = DtoRawAlloca(DtoType(tc)->getContainedType(0), alignment,
                       ".newclass_alloca");
  }
  // custom allocator
  else if (newexp->allocator) {
    DtoResolveFunction(newexp->allocator);
    DFuncValue dfn(newexp->allocator, DtoCallee(newexp->allocator));
    DValue *res = DtoCallFunction(newexp->loc, nullptr, &dfn, newexp->newargs);
    mem = DtoBitCast(DtoRVal(res), DtoType(tc), ".newclass_custom");
  }
  // default allocator
  else {
    const bool useEHAlloc = global.params.ehnogc && newexp->thrownew;
    llvm::Function *fn = getRuntimeFunction(
        loc, gIR->module, useEHAlloc ? "_d_newThrowable" : "_d_allocclass");
    LLConstant *ci = DtoBitCast(getIrAggr(tc->sym)->getClassInfoSymbol(),
                                DtoType(getClassInfoType()));
    mem = gIR->CreateCallOrInvoke(fn, ci,
                                  useEHAlloc ? ".newthrowable_alloc"
                                             : ".newclass_gc_alloc")
              .getInstruction();
    mem = DtoBitCast(mem, DtoType(tc),
                     useEHAlloc ? ".newthrowable" : ".newclass_gc");
    doInit = !useEHAlloc;
  }

  // init
  if (doInit)
    DtoInitClass(tc, mem);

  // init inner-class outer reference
  if (newexp->thisexp) {
    Logger::println("Resolving outer class");
    LOG_SCOPE;
    unsigned idx = getFieldGEPIndex(tc->sym, tc->sym->vthis);
    LLValue *src = DtoRVal(newexp->thisexp);
    LLValue *dst = DtoGEPi(mem, 0, idx);
    IF_LOG Logger::cout() << "dst: " << *dst << "\nsrc: " << *src << '\n';
    DtoStore(src, DtoBitCast(dst, getPtrToType(src->getType())));
  }
  // set the context for nested classes
  else if (tc->sym->isNested() && tc->sym->vthis) {
    DtoResolveNestedContext(loc, tc->sym, mem);
  }

  // call constructor
  if (newexp->member) {
    // evaluate argprefix
    if (newexp->argprefix) {
      toElemDtor(newexp->argprefix);
    }

    Logger::println("Calling constructor");
    assert(newexp->arguments != NULL);
    DtoResolveFunction(newexp->member);
    DFuncValue dfn(newexp->member, DtoCallee(newexp->member), mem);
    // ignore ctor return value (C++ ctors on Posix may not return `this`)
    DtoCallFunction(newexp->loc, tc, &dfn, newexp->arguments);
    return new DImValue(tc, mem);
  }

  assert(newexp->argprefix == NULL);

  // return default constructed class
  return new DImValue(tc, mem);
}
Beispiel #11
0
long double rtsafe(long double (fn)(long double, long double, long double, long double, int), long double (dfn)(long double, long double, long double, long double, int), long double z1, long double z2, long double z3, int z4, long double x1, long double x2, long double xacc)
     /* uses the Newton-Raphson method to find the root of a function known to lie in the interval [x1, x2]  */
     /* method is applied until accuracy is within +/- xacc  */
     /* funcd() is a user supplied function that returns both the function value and it's derivative at the point x  */
{
  void nrerror(char error_text[]);
  int j;
  long double df,dx,dxold,f,fh,fl;
  long double nuisance;
  long double temp,xh,xl,rts;
 
  fl = fn(x1,z1,z2,z3,z4);
  fh = fn(x2,z1,z2,z3,z4);

  if ((fl > 0.0 && fh > 0.0) || (fl < 0.0 && fh < 0.0))
    nrerror("Root must be bracketed in rtsafe");

  if (fl == 0.0) return x1;
  if (fh == 0.0) return x2;
  if (fl < 0.0) {
    xl=x1;
    xh=x2;
  }
  else {
    xh=x1;
    xl=x2;
  }
  rts=0.5*(x1+x2);
  dxold=fabs(x2-x1);
  dx=dxold;
    
  f = fn(rts,z1,z2,z3,z4);
  df = dfn(rts,z1,z2,z3,z4);

  for (j=1;j<=JMAX;j++) {

    if ((((rts-xh)*df-f)*((rts-xl)*df-f) >= 0.0) || (fabs(2.0*f)>fabs(dxold*df))) {
      dxold=dx;
      dx=0.5*(xh-xl);
      rts=xl+dx;
/*  printf("\n option 1, rts=%Lf",rts); */

      if (xl==rts) return rts;
    }
    else {
      dxold=dx;
      dx=f/df;
      temp=rts;
      rts -= dx;
/*       printf("\n option 2, rts=%Lf", rts); */
      if (temp==rts) return rts;
    }
   
    if (fabs(dx) < xacc) return rts;
    f = fn(rts,z1,z2,z3,z4);
    df = dfn(rts,z1,z2,z3,z4);

    if (f < 0.0)
      xl=rts;
    else xh=rts;

/*     printf("\n rts = %Lf, xl = %Lf, xh = %Lf",rts,xl,xh); */
  }
  nrerror("Maximum number of iterations exceeded in rtsafe");
  return 0.0;
}