KFR_INLINE auto handle_all_f(Fn&& fn, vec<T, N> x, Args&&... args) { return concat(fn(low(x), low(args)...), fn(high(x), high(args)...)); }
void SqrtRep::computeExactFlags() { if (!child->flagsComputed()) child->computeExactFlags(); if (rationalReduceFlag) ratFlag() = -1; sign() = child->sign(); if (sign() < 0) core_error("squareroot is called with negative operand.", __FILE__, __LINE__, true); uMSB() = child->uMSB() / EXTLONG_TWO; lMSB() = child->lMSB() / EXTLONG_TWO; // length() = child->length(); measure() = child->measure(); // BFMSS[2,5] bound. if (child->v2p() + ceilLg5(child->v5p()) + child->u25() >= child->v2m() + ceilLg5(child->v5m()) + child->l25()) { extLong vtilda2 = child->v2p() + child->v2m(); v2p() = vtilda2 / EXTLONG_TWO; v2m() = child->v2m(); extLong vmod2; if (v2p().isInfty()) vmod2 = CORE_INFTY; else vmod2 = vtilda2 - EXTLONG_TWO*v2p(); // == vtilda2 % 2 extLong vtilda5 = child->v5p() + child->v5m(); v5p() = vtilda5 / EXTLONG_TWO; v5m() = child->v5m(); extLong vmod5; if (v5p().isInfty()) vmod5 = CORE_INFTY; else vmod5 = vtilda5 - EXTLONG_TWO*v5p(); // == vtilda5 % 2 u25() = (child->u25() + child->l25() + vmod2 + ceilLg5(vmod5) + EXTLONG_ONE) / EXTLONG_TWO; l25() = child->l25(); } else { extLong vtilda2 = child->v2p() + child->v2m(); v2p() = child->v2p(); v2m() = vtilda2 / EXTLONG_TWO; extLong vmod2; if (v2m().isInfty()) vmod2 = CORE_INFTY; else vmod2 = vtilda2 - EXTLONG_TWO*v2m(); // == vtilda2 % 2 extLong vtilda5 = child->v5p() + child->v5m(); v5p() = child->v5p(); v5m() = vtilda5 / EXTLONG_TWO; u25() = child->u25(); extLong vmod5; if (v5m().isInfty()) vmod5 = CORE_INFTY; else vmod5 = vtilda5 - EXTLONG_TWO*v5m(); // == vtilda5 % 2 l25() = (child->u25() + child->l25() + vmod2 + ceilLg5(vmod5) + EXTLONG_ONE) / EXTLONG_TWO; } high() = (child->high() +EXTLONG_ONE)/EXTLONG_TWO; low() = child->low() / EXTLONG_TWO; lc() = child->lc(); tc() = child->tc(); flagsComputed() = true; }// SqrtRep::computeExactFlags
KFR_INLINE auto handle_all_reduce_f(RedFn&& redfn, Fn&& fn, vec<T, N> x, Args&&... args) { return redfn(fn(low(x), low(args)...), fn(high(x), high(args)...)); }
// This only copies the current information of the argument e to // *this ExprRep. void ExprRep::reduceTo(const ExprRep *e) { if (e->appComputed()) { appValue() = e->appValue(); appComputed() = true; flagsComputed() = true; knownPrecision() = e->knownPrecision(); #ifdef CORE_DEBUG relPrecision() = e->relPrecision(); absPrecision() = e->absPrecision(); numNodes() = e->numNodes(); #endif } d_e() = e->d_e(); //visited() = e->visited(); sign() = e->sign(); uMSB() = e->uMSB(); lMSB() = e->lMSB(); // length() = e->length(); // fixed? original = 1 measure() = e->measure(); // BFMSS[2,5] bound. u25() = e->u25(); l25() = e->l25(); v2p() = e->v2p(); v2m() = e->v2m(); v5p() = e->v5p(); v5m() = e->v5m(); high() = e->high(); low() = e->low(); // fixed? original = 0 lc() = e->lc(); tc() = e->tc(); // Chee (Mar 23, 2004), Notes on ratFlag(): // =============================================================== // For more information on the use of this flag, see progs/pentagon. // This is an integer valued member of the NodeInfo class. // Its value is used to determine whether // we can ``reduce'' an Expression to a single node containing // a BigRat value. This reduction is done if the global variable // rationalReduceFlag=true. The default value is false. // This is the intepretation of ratFlag: // ratFlag < 0 means irrational // ratFlag = 0 means not initialized // ratFlag > 0 means rational // Currently, ratFlag>0 is an upper bound on the size of the expression, // since we recursively compute // ratFlag(v) = ratFlag(v.lchild)+ratFlag(v.rchild) + 1. // PROPOSAL: if ratFlag() > RAT_REDUCE_THRESHHOLD // then we automatically do a reduction. We must determine // an empirical value for RAT_REDUCE_THRESHOLD if (rationalReduceFlag) { ratFlag() = e->ratFlag(); if (e->ratFlag() > 0 && e->ratValue() != NULL) { ratFlag() ++; if (ratValue() == NULL) ratValue() = new BigRat(*(e->ratValue())); else *(ratValue()) = *(e->ratValue()); } else ratFlag() = -1; } }
// Computes the root bit bound of the expression. // In effect, computeBound() returns the current value of low. extLong ExprRep::computeBound() { extLong measureBd = measure(); // extLong cauchyBd = length(); extLong ourBd = (d_e() - EXTLONG_ONE) * high() + lc(); // BFMSS[2,5] bound. extLong bfmsskBd; if (v2p().isInfty() || v2m().isInfty()) bfmsskBd = CORE_INFTY; else bfmsskBd = l25() + u25() * (d_e() - EXTLONG_ONE) - v2() - ceilLg5(v5()); // since we might compute \infty - \infty for this bound if (bfmsskBd.isNaN()) bfmsskBd = CORE_INFTY; extLong bd = core_min(measureBd, // core_min(cauchyBd, core_min(bfmsskBd, ourBd)); #ifdef CORE_SHOW_BOUNDS std::cout << "Bounds (" << measureBd << "," << bfmsskBd << ", " << ourBd << "), "; std::cout << "MIN = " << bd << std::endl; std::cout << "d_e= " << d_e() << std::endl; #endif #ifdef CORE_DEBUG_BOUND // Some statistics about which one is/are the winner[s]. computeBoundCallsCounter++; int number_of_winners = 0; std::cerr << " New contest " << std::endl; if (bd == bfmsskBd) { BFMSS_counter++; number_of_winners++; std::cerr << " BFMSS is the winner " << std::endl; } if (bd == measureBd) { Measure_counter++; number_of_winners++; std::cerr << " measureBd is the winner " << std::endl; } /* if (bd == cauchyBd) { Cauchy_counter++; number_of_winners++; std::cerr << " cauchyBd is the winner " << std::endl; } */ if (bd == ourBd) { LiYap_counter++; number_of_winners++; std::cerr << " ourBd is the winner " << std::endl; } assert(number_of_winners >= 1); if (number_of_winners == 1) { if (bd == bfmsskBd) { BFMSS_only_counter++; std::cerr << " BFMSSBd is the only winner " << std::endl; } else if (bd == measureBd) { Measure_only_counter++; std::cerr << " measureBd is the only winner " << std::endl; } /* else if (bd == cauchyBd) { Cauchy_only_counter++; std::cerr << " cauchyBd is the only winner " << std::endl; } */ else if (bd == ourBd) { LiYap_only_counter++; std::cerr << " ourBd is the only winner " << std::endl; } } #endif return bd; }//computeBound()
// Draws the first passage time from the propendity function const Real FirstPassageGreensFunction1DRad::drawTime (const Real rnd) const { const Real L(this->getL()); const Real k(this->getk()); const Real D(this->getD()); const Real r0(this->getr0()); THROW_UNLESS( std::invalid_argument, 0.0 <= rnd && rnd < 1.0 ); if ( D == 0.0 || L == INFINITY ) { return INFINITY; } if ( rnd <= EPSILON || L < 0.0 || fabs(r0 - L) < EPSILON*L ) { return 0.0; } const Real h(k/D); // the structure to store the numbers to calculate the numbers for 1-S struct drawT_params parameters; double An = 0; double tmp0, tmp1, tmp2, tmp3; double Xn, exponent; // produce the coefficients and the terms in the exponent and put them // in the params structure. This is not very efficient at this point, // coefficients should be calculated on demand->TODO for (int n=0; n<MAX_TERMEN; n++) { An = a_n (n+1); // get the n-th root of tan(alfa*L)=alfa/-k tmp0 = An * An; // An^2 tmp1 = An * r0; // An * z' tmp2 = An * L; // An * L tmp3 = h / An; // h / An Xn = (An*cos(tmp1) + h*sin(tmp1)) * (sin(tmp2)-tmp3*cos(tmp2)+tmp3) / (L*(tmp0+h*h)+h); exponent = -D*tmp0; // store the coefficients in the structure parameters.Xn[n] = Xn; // also store the values for the exponent parameters.exponent[n]=exponent; } // store the random number for the probability parameters.rnd = rnd; // store the number of terms used parameters.terms = MAX_TERMEN; parameters.tscale = this->t_scale; // Define the function for the rootfinder gsl_function F; F.function = &FirstPassageGreensFunction1DRad::drawT_f; F.params = ¶meters; // Find a good interval to determine the first passage time in // get the distance to absorbing boundary (disregard rad BC) const Real dist(L-r0); // construct a guess: msd = sqrt (2*d*D*t) const Real t_guess( dist * dist / ( 2. * D ) ); Real value( GSL_FN_EVAL( &F, t_guess ) ); Real low( t_guess ); Real high( t_guess ); // scale the interval around the guess such that the function straddles if( value < 0.0 ) { // if the guess was too low do { // keep increasing the upper boundary until the // function straddles high *= 10; value = GSL_FN_EVAL( &F, high ); if( fabs( high ) >= t_guess * 1e6 ) { std::cerr << "GF1DRad: Couldn't adjust high. F(" << high << ") = " << value << std::endl; throw std::exception(); } } while ( value <= 0.0 ); } else { // if the guess was too high // initialize with 2 so the test below survives the first // iteration Real value_prev( 2 ); do { if( fabs( low ) <= t_guess * 1e-6 || fabs(value-value_prev) < EPSILON*1.0 ) { std::cerr << "GF1DRad: Couldn't adjust low. F(" << low << ") = " << value << " t_guess: " << t_guess << " diff: " << (value - value_prev) << " value: " << value << " value_prev: " << value_prev << " rnd: " << rnd << std::endl; return low; } value_prev = value; // keep decreasing the lower boundary until the function straddles low *= .1; // get the accompanying value value = GSL_FN_EVAL( &F, low ); } while ( value >= 0.0 ); } // find the intersection on the y-axis between the random number and // the function // define a new solver type brent const gsl_root_fsolver_type* solverType( gsl_root_fsolver_brent ); // make a new solver instance // incl typecast? gsl_root_fsolver* solver( gsl_root_fsolver_alloc( solverType ) ); const Real t( findRoot( F, solver, low, high, t_scale*EPSILON, EPSILON, "FirstPassageGreensFunction1DRad::drawTime" ) ); // return the drawn time return t; }
int solver(int m,int n,int nz,int *iA, int *kA, double *A, double *b, double *c, double f, double *x, double *y, double *w, double *z) { double *dx, *dw, *dy, *dz; /* step directions */ double *fx, *fy, *gx, *gy; double phi, psi, dphi, dpsi; double *rho, *sigma, normr, norms; /* infeasibilites */ double *D, *E; /* diagonal matrices */ double gamma, beta, delta, mu, theta; /* parameters */ double *At; /* arrays for A^T */ int *iAt, *kAt; int i,j,iter,v=1,status=5; double primal_obj, dual_obj; /******************************************************************* * Allocate memory for arrays. *******************************************************************/ MALLOC( dx, n, double ); MALLOC( dw, m, double ); MALLOC( dy, m, double ); MALLOC( dz, n, double ); MALLOC( rho, m, double ); MALLOC( sigma, n, double ); MALLOC( D, n, double ); MALLOC( E, m, double ); MALLOC( fx, n, double ); MALLOC( fy, m, double ); MALLOC( gx, n, double ); MALLOC( gy, m, double ); MALLOC( At, nz, double ); MALLOC( iAt, nz, int ); MALLOC( kAt, m+1, int ); /**************************************************************** * Initialization. * ****************************************************************/ for (j=0; j<n; j++) { x[j] = 1.0; z[j] = 1.0; } for (i=0; i<m; i++) { w[i] = 1.0; y[i] = 1.0; } phi = 1.0; psi = 1.0; atnum(m,n,kA,iA,A,kAt,iAt,At); /**************************************************************** * Display Banner. ****************************************************************/ printf ("m = %d,n = %d,nz = %d\n",m,n,nz); printf( "--------------------------------------------------------------------------\n" " | Primal | Dual | |\n" " Iter | Obj Value Infeas | Obj Value Infeas | mu |\n" "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \n" ); fflush(stdout); /**************************************************************** * Iteration. ****************************************************************/ beta = 0.80; delta = 2*(1-beta); for (iter=0; iter<MAX_ITER; iter++) { /************************************************************* * STEP 1: Compute mu. *************************************************************/ mu = (dotprod(z,x,n)+dotprod(w,y,m)+phi*psi) / (n+m+1); /************************************************************* * STEP 1: Compute primal and dual objective function values. *************************************************************/ primal_obj = dotprod(c,x,n); dual_obj = dotprod(b,y,m); /************************************************************* * STEP 2: Check stopping rule. *************************************************************/ if ( mu < EPS ) { if ( phi > EPS ) { status = 0; break; /* OPTIMAL */ } else if ( dual_obj < 0.0) { status = 2; break; /* PRIMAL INFEASIBLE */ } else if ( primal_obj > 0.0) { status = 4; break; /* DUAL INFEASIBLE */ } else { status = 7; /* NUMERICAL PROBLEM */ break; } } /************************************************************* * STEP 3: Compute infeasibilities. *************************************************************/ smx(m,n,A,kA,iA,x,rho); for (i=0; i<m; i++) { rho[i] = rho[i] - b[i]*phi + w[i]; } normr = sqrt( dotprod(rho,rho,m) )/phi; for (i=0; i<m; i++) { rho[i] = -(1-delta)*rho[i] + w[i] - delta*mu/y[i]; } smx(n,m,At,kAt,iAt,y,sigma); for (j=0; j<n; j++) { sigma[j] = -sigma[j] + c[j]*phi + z[j]; } norms = sqrt( dotprod(sigma,sigma,n) )/phi; for (j=0; j<n; j++) { sigma[j] = -(1-delta)*sigma[j] + z[j] - delta*mu/x[j]; } gamma = -(1-delta)*(dual_obj - primal_obj + psi) + psi - delta*mu/phi; /************************************************************* * Print statistics. *************************************************************/ printf("%8d %14.7e %8.1e %14.7e %8.1e %8.1e \n", iter, high(primal_obj/phi+f), high(normr), high(dual_obj/phi+f), high(norms), high(mu) ); fflush(stdout); /************************************************************* * STEP 4: Compute step directions. *************************************************************/ for (j=0; j<n; j++) { D[j] = z[j]/x[j]; } for (i=0; i<m; i++) { E[i] = w[i]/y[i]; } ldltfac(n, m, kAt, iAt, At, E, D, kA, iA, A, v); for (j=0; j<n; j++) { fx[j] = -sigma[j]; } for (i=0; i<m; i++) { fy[i] = rho[i]; } forwardbackward(E, D, fy, fx); for (j=0; j<n; j++) { gx[j] = -c[j]; } for (i=0; i<m; i++) { gy[i] = -b[i]; } forwardbackward(E, D, gy, gx); dphi = (dotprod(c,fx,n)-dotprod(b,fy,m)+gamma)/ (dotprod(c,gx,n)-dotprod(b,gy,m)-psi/phi); for (j=0; j<n; j++) { dx[j] = fx[j] - gx[j]*dphi; } for (i=0; i<m; i++) { dy[i] = fy[i] - gy[i]*dphi; } for (j=0; j<n; j++) { dz[j] = delta*mu/x[j] - z[j] - D[j]*dx[j]; } for (i=0; i<m; i++) { dw[i] = delta*mu/y[i] - w[i] - E[i]*dy[i]; } dpsi = delta*mu/phi - psi - (psi/phi)*dphi; /************************************************************* * STEP 5: Compute step length. *************************************************************/ theta = 1.0; for (j=0; j<n; j++) { theta = MIN(theta, linesearch(x[j],z[j],dx[j],dz[j],beta,delta,mu)); } for (i=0; i<m; i++) { theta = MIN(theta,linesearch(y[i],w[i],dy[i],dw[i],beta,delta,mu)); } theta = MIN(theta,linesearch(phi,psi,dphi,dpsi,beta,delta,mu)); /* if (theta < 4*beta/(n+m+1)) { printf("ratio = %10.3e \n", theta*(n+m+1)/(4*beta)); status = 7; break; } */ if (theta < 1.0) theta *= 0.9999; /************************************************************* * STEP 6: Step to new point *************************************************************/ for (j=0; j<n; j++) { x[j] = x[j] + theta*dx[j]; z[j] = z[j] + theta*dz[j]; } for (i=0; i<m; i++) { y[i] = y[i] + theta*dy[i]; w[i] = w[i] + theta*dw[i]; } phi = phi + theta*dphi; psi = psi + theta*dpsi; } for (j=0; j<n; j++) { x[j] /= phi; z[j] /= phi; } for (i=0; i<m; i++) { y[i] /= phi; w[i] /= phi; } /**************************************************************** * Free work space * ****************************************************************/ FREE( w ); FREE( z ); FREE( dx ); FREE( dw ); FREE( dy ); FREE( dz ); FREE( rho ); FREE( sigma ); FREE( D ); FREE( E ); FREE( fx ); FREE( fy ); FREE( gx ); FREE( gy ); FREE( At ); FREE( iAt ); FREE( kAt ); return status; } /* End of solver */
static void print_long_array(typeArrayOop ta, int print_len, outputStream* st) { for (int index = 0; index < print_len; index++) { jlong v = ta->long_at(index); st->print_cr(" - %3d: 0x%x 0x%x", index, high(v), low(v)); } }