double leekesler::hdep() { double tr = T/Tcr; return tr*tr*I() + (1.0 - z())*tr; }

GLdouble GLVector::operator *(const GLVector &v )const{ return x() * v.x() + y() * v.y() + z() * v.z(); }

bool GLVector::operator == (const GLVector &v) const{ return ((v.x() == x()) & (v.y() == y()) & (v.z() == z())); }

bool testSymbolSpace() { bool passed = true; symbol::Space<int> point; symbol::Symbol x("x"); symbol::Symbol y("y"); symbol::Symbol z("z"); // reserved for "misses" // nothing to get from empty space passed &= (point.get(x) == NULL); passed &= (point.get(y) == NULL); passed &= (point.get(z) == NULL); point.del(x); point.del(y); point.del(z); // one key point.set(x, 42); passed &= (*point.get(x) == 42); passed &= (point.get(z) == NULL); point.del(z); point.set(x, 99); passed &= (*point.get(x) == 99); point.del(x); passed &= (point.get(x) == NULL); point.del(z); // two keys point.set(x, 1); point.set(y, 2); // append passed &= (*point.get(x) == 1); passed &= (*point.get(y) == 2); passed &= (point.get(z) == NULL); point.del(z); point.del(x); passed &= (point.get(x) == NULL); passed &= (*point.get(y) == 2); passed &= (point.get(z) == NULL); point.del(z); point.del(y); passed &= (point.get(x) == NULL); passed &= (point.get(y) == NULL); passed &= (point.get(z) == NULL); // two keys, again, different order point.set(y, 2); point.set(x, 1); // insert before head passed &= (*point.get(x) == 1); passed &= (*point.get(y) == 2); passed &= (point.get(z) == NULL); point.del(z); point.del(x); passed &= (point.get(x) == NULL); passed &= (*point.get(y) == 2); passed &= (point.get(z) == NULL); point.del(z); point.del(y); passed &= (point.get(x) == NULL); passed &= (point.get(y) == NULL); passed &= (point.get(z) == NULL); // two keys, with overwritting this time point.set(y, 2); point.set(x, 1); passed &= (*point.get(x) == 1); passed &= (*point.get(y) == 2); point.set(x, 3); passed &= (*point.get(x) == 3); passed &= (*point.get(y) == 2); point.set(y, 4); passed &= (*point.get(x) == 3); passed &= (*point.get(y) == 4); point.del(x); point.del(y); passed &= (point.get(x) == NULL); passed &= (point.get(y) == NULL); passed &= (point.get(z) == NULL); // three keys point.set(x, 1); point.set(z, 3); point.set(y, 2); // insert in the middle! passed &= (*point.get(x) == 1); passed &= (*point.get(y) == 2); passed &= (*point.get(z) == 3); // overwrite at head, middle, and end point.set(x, 4); point.set(y, 5); point.set(z, 6); passed &= (*point.get(x) == 4); passed &= (*point.get(y) == 5); passed &= (*point.get(z) == 6); // del from middle point.del(y); passed &= (*point.get(x) == 4); passed &= (point.get(y) == NULL); passed &= (*point.get(z) == 6); symbol::Space<int> letters; std::string test_data = "Templates are a way of making your classes more abstract by letting you" "define the behavior of the class without actually knowing what datatype" "will be handled by the operations of the class. In essence, this is what is" "known as generic programming; this term is a useful way to think about" "templates because it helps remind the programmer that a templated class" "does not depend on the datatype (or types) it deals with. To a large" "degree, a templated class is more focused on the algorithmic thought rather" "than the specific nuances of a single datatype. Templates can be used in" "conjunction with abstract datatypes in order to allow them to handle any" "type of data. For example, you could make a templated stack class that can" "handle a stack of any datatype, rather than having to create a stack class" "for every different datatype for which you want the stack to function. The" "ability to have a single class that can handle several different datatypes" "means the code is easier to maintain, and it makes classes more reusable."; for ( std::string::iterator it = test_data.begin(); it != test_data.end(); ++it) { std::string letter(1, *it); if ( symbol::validate(letter) ) { int count = 0; if ( letters.get(letter) == NULL ) { count = 0; } else { count = *letters.get(letter); } letters.set(letter, count+1); } } passed &= (*letters.get(symbol::Symbol("e")) == 100); // count them yourself! passed &= (*letters.get(symbol::Symbol("T")) == 4); if ( !passed ) { std::cout << "failed symbol::Space tests." << std::endl; } return passed; }

const GLVector GLVector::vectorMult( const GLVector& v)const{ return GLVector( y() * v.z() - z() * v.y(), z() * v.x() - x() * v.z(), x() * v.y() - y() * v.x()); }

void MVertex::writePLY2(FILE *fp) { if(_index < 0) return; // negative index vertices are never saved fprintf(fp, "%.16g %.16g %.16g\n", x(), y(), z()); }

void CollocationIntegrator::setupFG() { // Interpolation order deg_ = getOption("interpolation_order"); // All collocation time points std::vector<long double> tau_root = collocationPointsL(deg_, getOption("collocation_scheme")); // Coefficients of the collocation equation vector<vector<double> > C(deg_+1, vector<double>(deg_+1, 0)); // Coefficients of the continuity equation vector<double> D(deg_+1, 0); // Coefficients of the quadratures vector<double> B(deg_+1, 0); // For all collocation points for (int j=0; j<deg_+1; ++j) { // Construct Lagrange polynomials to get the polynomial basis at the collocation point Polynomial p = 1; for (int r=0; r<deg_+1; ++r) { if (r!=j) { p *= Polynomial(-tau_root[r], 1)/(tau_root[j]-tau_root[r]); } } // Evaluate the polynomial at the final time to get the // coefficients of the continuity equation D[j] = zeroIfSmall(p(1.0L)); // Evaluate the time derivative of the polynomial at all collocation points to // get the coefficients of the continuity equation Polynomial dp = p.derivative(); for (int r=0; r<deg_+1; ++r) { C[j][r] = zeroIfSmall(dp(tau_root[r])); } // Integrate polynomial to get the coefficients of the quadratures Polynomial ip = p.anti_derivative(); B[j] = zeroIfSmall(ip(1.0L)); } // Symbolic inputs MX x0 = MX::sym("x0", f_.input(DAE_X).sparsity()); MX p = MX::sym("p", f_.input(DAE_P).sparsity()); MX t = MX::sym("t", f_.input(DAE_T).sparsity()); // Implicitly defined variables (z and x) MX v = MX::sym("v", deg_*(nx_+nz_)); vector<int> v_offset(1, 0); for (int d=0; d<deg_; ++d) { v_offset.push_back(v_offset.back()+nx_); v_offset.push_back(v_offset.back()+nz_); } vector<MX> vv = vertsplit(v, v_offset); vector<MX>::const_iterator vv_it = vv.begin(); // Collocated states vector<MX> x(deg_+1), z(deg_+1); for (int d=1; d<=deg_; ++d) { x[d] = reshape(*vv_it++, this->x0().shape()); z[d] = reshape(*vv_it++, this->z0().shape()); } casadi_assert(vv_it==vv.end()); // Collocation time points vector<MX> tt(deg_+1); for (int d=0; d<=deg_; ++d) { tt[d] = t + h_*tau_root[d]; } // Equations that implicitly define v vector<MX> eq; // Quadratures MX qf = MX::zeros(f_.output(DAE_QUAD).sparsity()); // End state MX xf = D[0]*x0; // For all collocation points for (int j=1; j<deg_+1; ++j) { //for (int j=deg_; j>=1; --j) { // Evaluate the DAE vector<MX> f_arg(DAE_NUM_IN); f_arg[DAE_T] = tt[j]; f_arg[DAE_P] = p; f_arg[DAE_X] = x[j]; f_arg[DAE_Z] = z[j]; vector<MX> f_res = f_.call(f_arg); // Get an expression for the state derivative at the collocation point MX xp_j = C[0][j] * x0; for (int r=1; r<deg_+1; ++r) { xp_j += C[r][j] * x[r]; } // Add collocation equation eq.push_back(vec(h_*f_res[DAE_ODE] - xp_j)); // Add the algebraic conditions eq.push_back(vec(f_res[DAE_ALG])); // Add contribution to the final state xf += D[j]*x[j]; // Add contribution to quadratures qf += (B[j]*h_)*f_res[DAE_QUAD]; } // Form forward discrete time dynamics vector<MX> F_in(DAE_NUM_IN); F_in[DAE_T] = t; F_in[DAE_X] = x0; F_in[DAE_P] = p; F_in[DAE_Z] = v; vector<MX> F_out(DAE_NUM_OUT); F_out[DAE_ODE] = xf; F_out[DAE_ALG] = vertcat(eq); F_out[DAE_QUAD] = qf; F_ = MXFunction(F_in, F_out); F_.init(); // Backwards dynamics // NOTE: The following is derived so that it will give the exact adjoint // sensitivities whenever g is the reverse mode derivative of f. if (!g_.isNull()) { // Symbolic inputs MX rx0 = MX::sym("x0", g_.input(RDAE_RX).sparsity()); MX rp = MX::sym("p", g_.input(RDAE_RP).sparsity()); // Implicitly defined variables (rz and rx) MX rv = MX::sym("v", deg_*(nrx_+nrz_)); vector<int> rv_offset(1, 0); for (int d=0; d<deg_; ++d) { rv_offset.push_back(rv_offset.back()+nrx_); rv_offset.push_back(rv_offset.back()+nrz_); } vector<MX> rvv = vertsplit(rv, rv_offset); vector<MX>::const_iterator rvv_it = rvv.begin(); // Collocated states vector<MX> rx(deg_+1), rz(deg_+1); for (int d=1; d<=deg_; ++d) { rx[d] = reshape(*rvv_it++, this->rx0().shape()); rz[d] = reshape(*rvv_it++, this->rz0().shape()); } casadi_assert(rvv_it==rvv.end()); // Equations that implicitly define v eq.clear(); // Quadratures MX rqf = MX::zeros(g_.output(RDAE_QUAD).sparsity()); // End state MX rxf = D[0]*rx0; // For all collocation points for (int j=1; j<deg_+1; ++j) { // Evaluate the backward DAE vector<MX> g_arg(RDAE_NUM_IN); g_arg[RDAE_T] = tt[j]; g_arg[RDAE_P] = p; g_arg[RDAE_X] = x[j]; g_arg[RDAE_Z] = z[j]; g_arg[RDAE_RX] = rx[j]; g_arg[RDAE_RZ] = rz[j]; g_arg[RDAE_RP] = rp; vector<MX> g_res = g_.call(g_arg); // Get an expression for the state derivative at the collocation point MX rxp_j = -D[j]*rx0; for (int r=1; r<deg_+1; ++r) { rxp_j += (B[r]*C[j][r]) * rx[r]; } // Add collocation equation eq.push_back(vec(h_*B[j]*g_res[RDAE_ODE] - rxp_j)); // Add the algebraic conditions eq.push_back(vec(g_res[RDAE_ALG])); // Add contribution to the final state rxf += -B[j]*C[0][j]*rx[j]; // Add contribution to quadratures rqf += h_*B[j]*g_res[RDAE_QUAD]; } // Form backward discrete time dynamics vector<MX> G_in(RDAE_NUM_IN); G_in[RDAE_T] = t; G_in[RDAE_X] = x0; G_in[RDAE_P] = p; G_in[RDAE_Z] = v; G_in[RDAE_RX] = rx0; G_in[RDAE_RP] = rp; G_in[RDAE_RZ] = rv; vector<MX> G_out(RDAE_NUM_OUT); G_out[RDAE_ODE] = rxf; G_out[RDAE_ALG] = vertcat(eq); G_out[RDAE_QUAD] = rqf; G_ = MXFunction(G_in, G_out); G_.init(); } }

LOCAL_C void testFix(CArrayFix<TBuf<0x10> >& aFix) // // Test all methods // { test.Next(_L("Test all methods")); test(aFix.Count()==0); test(aFix.Length()==sizeof(TBuf<0x10>)); aFix.Compress(); test(TRUE); aFix.Reset(); test(TRUE); TKeyArrayFix kk(0,ECmpNormal,0x10); test(TRUE); aFix.Sort(kk); test(TRUE); TBuf<0x10> aa(_L("aaaaa")); aFix.InsertL(0,aa); test(TRUE); aFix[0].Fill(' '); test(TRUE); TBuf<0x10> z(aFix[0]); z.Length(); test(TRUE); aFix[0].Fill('a'); test(TRUE); TInt pp; test(aFix.Find(aa,kk,pp)==0); test(pp==0); aFix.Delete(0); TBuf<0x10> bb(_L("bbbbb")); aFix.AppendL(bb); test(aFix.Count()==1); test(aFix.InsertIsqAllowDuplicatesL(aa,kk)==0); test(aFix.InsertIsqAllowDuplicatesL(bb,kk)==2); test(aFix.FindIsq(aa,kk,pp)==0); test(pp==0); aFix.Reset(); for(TInt index=0;index<KTestGranularity*7/2;index++) aFix.AppendL(aa); const TBuf<0x10> *end=NULL; const TBuf<0x10> *ptr=NULL; for(TInt index2=0;index2<KTestGranularity*7/2;index2++) { if (end==ptr) { end=aFix.End(index2); ptr=&aFix[index2]; TInt seglen=end-ptr; test(seglen==KTestGranularity || seglen==(aFix.Count()-index2)); } test(&aFix[index2]==ptr++); } const TBuf<0x10> *bak=NULL; ptr=NULL; for(TInt index3=KTestGranularity*7/2;index3>0;index3--) { if (bak==ptr) { bak=aFix.Back(index3); ptr=&aFix[index3-1]+1; TInt seglen=ptr-bak; test(seglen==KTestGranularity || seglen==index3 || seglen==index3%KTestGranularity); } test(&aFix[index3-1]==--ptr); } //Test ExpandL //Expand array in slot 1 TBuf16<0x10> exp; exp=_L("abc AbC"); aFix.InsertL(0,exp); aFix.InsertL(1,exp); aFix.InsertL(2,exp); exp=aFix.ExpandL(1); test(aFix[0]==_L("abc AbC")); test(aFix[1]==_L("")); test(aFix[2]==_L("abc AbC")); test(aFix[3]==_L("abc AbC")); //Test ResizeL and InsertReplL //Resize the array to containing 20 records, //copying a record into any new slots. TBuf<0x10> res(_L("bbbbb")); aFix.Reset(); aFix.ResizeL(20,res); for(TInt i=0;i<20;i++) { test(aFix[1]==_L("bbbbb")); } }

void dninst_(int *n, int *nev, double *sigmar, double *sigmai, int *colptr, int *rowind, double *nzvals, double *dr, double *di, double *z, int *ldz, int *info, double *ptol) /* Arguement list: n (int*) Dimension of the problem. (INPUT) nev (int*) Number of eigenvalues requested. (INPUT/OUTPUT) This routine is used to compute NEV eigenvalues nearest to a shift (sigmar, sigmai). On return, it gives the number of converged eigenvalues. sigmar (double*) Real part of the shift. (INPUT) sigmai (double*) Imaginar part of the shift. (INPUT) colptr (int*) dimension n+1. (INPUT) Column pointers for the sparse matrix. rowind (int*) dimension colptr[*n]-1. (INPUT) Row indices for the sparse matrix. nzvals (double*) dimension colptr[*n]-1. (INPUT) Nonzero values of the sparse matrix. The sparse matrix is represented by the above three arrays colptr, rowind, nzvals. dr (double*) dimension nev+1. (OUTPUT) Real part of the eigenvalue. di (double*) dimension nev+1. (OUTPUT) Imaginar part of the eigenvalue. z (double*) dimension ldz by nev+1. (OUTPUT) Eigenvector matrix. If the j-th eigenvalue is real, the j-th column of z contains the corresponding eigenvector. If the j-th and j+1st eigenvalues form a complex conjuagate pair, then the j-th column of z contains the real part of the eigenvector, and the j+1st column of z contains the imaginary part of the eigenvector. ldz (int*) The leading dimension of z. (INPUT) info (int*) Error flag to indicate whether the eigenvalues calculation is successful. (OUTPUT) *info = 0, successful exit *info = 1, Maximum number of iteration is reached before all requested eigenvalues have converged. */ { int i, j, ibegin, iend, ncv, neqns, token, order=2; int lworkl, ldv, nnz, ione = 1; double tol=1.0e-10, zero = 0.0; double *workl, *workd, *resid, *workev, *v, *ax; double numr, numi, denr, deni; int *select, first; int ido, ishfts, maxitr, mode, rvec, ierr1, ierr2; int iparam[11], ipntr[14]; char *which="LM", bmat[2], *all="A"; #ifdef USE_COMPLEX doublecomplex *cvals, *cx, *crhs; #endif neqns = *n; *info = 0; tol = *ptol; if ( tol < 1.0e-10 ) tol = 1.0e-10; if ( tol > 1.0e-1 ) tol = 1.0e-1; if (*n - *nev < 2) { *info = -1000; fprintf(stderr, " NEV must be less than N-2!\n"); goto Error_handle; } /* set parameters and allocate temp space for ARPACK*/ ncv = max(*nev+20, 2*(*nev)); if (ncv > neqns) ncv = neqns; /* Convert from 1-based index to 0-based index */ nnz = colptr[neqns]-1; for (j=0;j<=neqns;j++) colptr[j]--; for (i=0;i<nnz;i++) rowind[i]--; /* Subtract shift from the matrix */ if ( *sigmai == 0.0) { /* real shift */ for (j = 0; j<neqns; j++) { ibegin = colptr[j]; iend = colptr[j+1]-1; for (i=ibegin;i<=iend;i++) if (j == rowind[i]) nzvals[i] = nzvals[i] - *sigmar; } } else { printf("Arpack/SuperLU : complex sigma not supported.\n"); exit(1); #ifdef USE_COMPLEX /* complex shift need additional storage for the shifted matrix */ cvals = (doublecomplex*)malloc(nnz*sizeof(doublecomplex)); if (!cvals) { fprintf(stderr, " Fail to allocate cvals!\n"); goto Error_handle; } for (i = 0; i<nnz; i++) { cvals[i].r = nzvals[i]; cvals[i].i = 0.0; } for (j = 0; j<neqns; j++) { ibegin = colptr[j]; iend = colptr[j+1]-1; for (i=ibegin;i<=iend;i++) if (j == rowind[i]) { cvals[i].r = cvals[i].r - *sigmar; cvals[i].i = -(*sigmai); } } #endif } /* order and factor the shifted matrix */ token = 0; if (*sigmai == 0.0) { dsparse_preprocess_(&token, &neqns, colptr, rowind, nzvals, &order); dsparse_factor_(&token); } #ifdef USE_COMPLEX else { zsparse_preprocess_(&token, &neqns, colptr, rowind, cvals, &order); zsparse_factor_(&token); } #endif /* add the shift back if shift is real */ if (*sigmai == 0) { for (j = 0; j<neqns; j++) { ibegin = colptr[j]; iend = colptr[j+1]-1; for (i=ibegin;i<=iend;i++) if (j == rowind[i]) nzvals[i] = nzvals[i] + *sigmar; } } /* change from 0-based index to 1-based index */ for (j=0;j<=neqns;j++) colptr[j]++; for (i=0;i<nnz;i++) rowind[i]++; /* set parameters and allocate temp space for ARPACK*/ lworkl = 3*ncv*ncv+6*ncv; ido = 0; ierr1 = 0; ishfts = 1; maxitr = 300; mode = 3; ldv = neqns; iparam[0] = ishfts; iparam[2] = maxitr; iparam[6] = mode; resid = (double*) malloc(neqns*sizeof(double)); if (!resid) { fprintf(stderr," Fail to allocate resid\n"); goto Error_handle; } workl = (double*) malloc(lworkl*sizeof(double)); if (!workl) { fprintf(stderr," Fail to allocate workl\n"); goto Error_handle; } v = (double*) malloc(ldv*ncv*sizeof(double)); if (!v) { fprintf(stderr," Fail to allocate v\n"); goto Error_handle; } workd = (double*) malloc(neqns*3*sizeof(double)); if (!workd) { fprintf(stderr, " Fail to allocate workd\n"); goto Error_handle; } workev= (double*) malloc(ncv*3*sizeof(double)); if (!workev) { fprintf(stderr, " Fail to allocate workev\n"); goto Error_handle; } select= (int*) malloc(ncv*sizeof(int)); if (!select) { fprintf(stderr, " Fail to allocate select\n"); goto Error_handle; } #ifdef USE_COMPLEX if (*sigmai != 0.0) { cx = (doublecomplex*)malloc(neqns*sizeof(doublecomplex)); if (!cx) { fprintf(stderr, " Fail to allocate cx\n"); goto Error_handle; } crhs = (doublecomplex*)malloc(neqns*sizeof(doublecomplex)); if (!crhs) { fprintf(stderr, " Fail to allocate crhs\n"); goto Error_handle; } } #endif /* intialize all work arrays */ for (i=0;i<neqns;i++) resid[i] = 0.0; for (i=0;i<lworkl;i++) workl[i]=0.0; for (i=0;i<ldv*ncv;i++) v[i]=0.0; for (i=0;i<3*neqns;i++) workd[i]=0.0; for (i=0;i<3*ncv;i++) workev[i]=0.0; for (i=0;i<ncv;i++) select[i] = 0; if (*sigmai == 0.0) { bmat[0] = 'I'; } else { bmat[0] = 'G'; } /* ARPACK reverse comm to compute eigenvalues and eigenvectors */ if (*sigmai == 0.0) { while (ido != 99 ) { dnaupd_(&ido, bmat, n, which, nev, &tol, resid, &ncv, v, &ldv, iparam, ipntr, workd, workl, &lworkl, &ierr1); if (ido == -1 || ido == 1) { dsparse_solve_(&token, &workd[ipntr[1]-1],&workd[ipntr[0]-1]); } } } #ifdef USE_COMPLEX else { while (ido != 99 ) { dnaupd_(&ido, bmat, n, which, nev, &tol, resid, &ncv, v, &ldv, iparam, ipntr, workd, workl, &lworkl, &ierr1); if (ido == -1) { dcopy_(n, &workd[ipntr[0]-1], &ione, &workd[ipntr[1]-1], &ione); for (i=0;i<neqns;i++) { crhs[i].r = workd[ipntr[1]-1+i]; crhs[i].i = 0.0; } zsparse_solve_(&token, cx, crhs); for (i=0;i<neqns;i++) { workd[ipntr[1]-1+i] = cx[i].r; } } else if (ido == 1) { for (i=0;i<neqns;i++) { crhs[i].r = workd[ipntr[2]-1+i]; crhs[i].i = 0.0; } zsparse_solve_(&token, cx, crhs); for (i=0;i<neqns;i++) { workd[ipntr[1]-1+i] = cx[i].r; } } else if (ido == 2) { dcopy_(n, &workd[ipntr[0]-1], &ione, &workd[ipntr[1]-1], &ione); } } } #endif /* ARPACK postprocessing */ if (ierr1 < 0) { fprintf(stderr, " Error with _naupd, ierr = %d\n", ierr1); } else { rvec = 1; dneupd_(&rvec, all, select, dr, di, z, ldz, sigmar, sigmai,workev, bmat, n, which, nev, &tol, resid, &ncv, v, &ldv, iparam, ipntr, workd, workl, &lworkl, &ierr2); *nev = iparam[4]; if (ierr2 != 0) { fprintf(stderr," Error with _neupd, ierr = %d\n",ierr2); goto Error_handle; } } #ifdef USE_COMPLEX if (*sigmai != 0) { /* Use Rayleigh quotient to recover Ritz values */ ax = (double*)malloc(neqns*sizeof(double)); if (!ax) { fprintf(stderr, " Fail to allocate AX!\n"); goto Error_handle; } for (i=0;i<neqns;i++) ax[i] = 0.0; first = 1; for (j = 1; j<=*nev; j++) { if (di(j) == 0.0) { dmvm_(n, nzvals, rowind, colptr, &z(1,j), ax, &ione); numr = ddot_(n, &z(1,j), &ione, ax, &ione); dcopy_(n, &z(1,j), &ione, ax, &ione); denr = ddot_(n, &z(1,j), &ione, ax, &ione); dr(j) = numr/denr; } else if (first) { /* compute trans(x) A x */ dmvm_(n, nzvals, rowind, colptr, &z(1,j), ax, &ione); numr = ddot_(n, &z(1,j), &ione, ax, &ione); numi = ddot_(n, &z(1,j+1), &ione, ax, &ione); dmvm_(n, nzvals, rowind, colptr, &z(1,j+1), ax, &ione); numr = numr + ddot_(n, &z(1,j+1), &ione, ax, &ione); numi = -numi + ddot_(n, &z(1,j), &ione, ax, &ione); /* compute trans(x) M x */ dcopy_(n, &z(1,j), &ione, ax, &ione); denr = ddot_(n, &z(1,j), &ione, ax, &ione); deni = ddot_(n, &z(1,j+1), &ione, ax, &ione); dcopy_(n, &z(1,j+1), &ione, ax, &ione); denr = denr + ddot_(n, &z(1,j+1), &ione, ax, &ione); deni = -deni + ddot_(n, &z(1,j), &ione, ax, &ione); dr(j) = (numr*denr+numi*deni)/dlapy2_(&denr, &deni); di(j) = (numi*denr-numr*deni)/dlapy2_(&denr, &deni); first = 0; } else { dr(j) = dr(j-1); di(j) = -di(j-1); first = 1; } } } #endif free(resid); free(workl); free(v); free(workd); free(workev); free(select); #ifdef USE_COMPLEX if (*sigmai != 0.0) { free(crhs); free(cx); free(cvals); free(ax); zsparse_destroy_(&token); } else { #endif dsparse_destroy_(&token); #ifdef USE_COMPLEX } #endif Error_handle: if (ierr1 != 0) *info = ierr1; if (ierr1 == 1) fprintf(stderr, " Maxiumum number of iteration reached.\n"); }

int main(int argc, char* argv[]) { #ifdef _DIST_ CnC::dist_cnc_init< my_context > dc_init; #endif bool verbose = false; int max_row = 100; int max_col = 100; int max_depth = 10000; int ai = 1; if (argc > ai && 0 == strcmp("-v", argv[ai])) { verbose = true; ai++; } if (argc == ai+3) { max_row = atoi(argv[ai]); max_col = atoi(argv[ai+1]); // you can't change a global variable when using distributed memory, // so this is in the context and must be set before the first tag/item is put max_depth = atoi(argv[ai+2]); } else { fprintf(stderr,"Usage: mandel [-v] rows columns max_depth\n"); return -1; } complex z(1.0,1.5); std::cout << mandel(z, max_depth) << std::endl; double r_origin = -2; double r_scale = 4.0/max_row; double c_origin = -2.0; double c_scale = 4.0/max_col; int *pixels = new int[max_row*max_col]; // set max-depth in the constructor my_context c( max_depth ); tbb::tick_count t0 = tbb::tick_count::now(); for (int i = 0; i < max_row; i++) { for (int j = 0; j < max_col; j++ ) { complex z = complex(r_scale*j +r_origin,c_scale*i + c_origin); c.m_data.put(pair(i,j),z); c.m_position.put(pair(i,j)); } } c.wait(); c.m_pixel.begin(); //in distCnC case, this gathers all items in one go for (int i = 0; i < max_row; i++) { for (int j = 0; j < max_col; j++ ) { c.m_pixel.get(pair(i,j), pixels[i*max_col + j]); } } tbb::tick_count t1 = tbb::tick_count::now(); printf("Mandel %d %d %d in %g seconds\n", max_row, max_col, max_depth, (t1-t0).seconds()); int check = 0; for (int i = 0; i < max_row; i++) { for (int j = 0; j < max_col; j++ ) { if (pixels[i*max_col + j ] == max_depth) check += (i*max_col +j ); } } printf("Mandel check %d \n", check); if (verbose) { for (int i = 0; i < max_row; i++) { for (int j = 0; j < max_col; j++ ) { if (pixels[i*max_col + j] == max_depth) { std::cout << " "; } else { std::cout << "."; } } std::cout << std::endl; } } return 0; }

void TurretShape::getCameraTransform(F32* pos,MatrixF* mat) { // Returns camera to world space transform // Handles first person / third person camera position if (isServerObject() && mShapeInstance) mShapeInstance->animateNodeSubtrees(true); if (*pos == 0) { getRenderEyeTransform(mat); return; } // Get the shape's camera parameters. F32 min,max; MatrixF rot; Point3F offset; getCameraParameters(&min,&max,&offset,&rot); // Start with the current eye position MatrixF eye; getRenderEyeTransform(&eye); // Build a transform that points along the eye axis // but where the Z axis is always up. { MatrixF cam(1); VectorF x,y,z(0,0,1); eye.getColumn(1, &y); mCross(y, z, &x); x.normalize(); mCross(x, y, &z); z.normalize(); cam.setColumn(0,x); cam.setColumn(1,y); cam.setColumn(2,z); mat->mul(cam,rot); } // Camera is positioned straight back along the eye's -Y axis. // A ray is cast to make sure the camera doesn't go through // anything solid. VectorF vp,vec; vp.x = vp.z = 0; vp.y = -(max - min) * *pos; eye.mulV(vp,&vec); // Use the camera node as the starting position if it exists. Point3F osp,sp; if (mDataBlock->cameraNode != -1) { mShapeInstance->mNodeTransforms[mDataBlock->cameraNode].getColumn(3,&osp); getRenderTransform().mulP(osp,&sp); } else eye.getColumn(3,&sp); // Make sure we don't hit ourself... disableCollision(); if (isMounted()) getObjectMount()->disableCollision(); // Cast the ray into the container database to see if we're going // to hit anything. RayInfo collision; Point3F ep = sp + vec + offset; if (mContainer->castRay(sp, ep, ~(WaterObjectType | GameBaseObjectType | DefaultObjectType | sTriggerMask), &collision) == true) { // Shift the collision point back a little to try and // avoid clipping against the front camera plane. F32 t = collision.t - (-mDot(vec, collision.normal) / vec.len()) * 0.1; if (t > 0.0f) ep = sp + offset + (vec * t); else eye.getColumn(3,&ep); } mat->setColumn(3,ep); // Re-enable our collision. if (isMounted()) getObjectMount()->enableCollision(); enableCollision(); // Apply Camera FX. mat->mul( gCamFXMgr.getTrans() ); }

void SdMsgBaseCanvas::save(QTextStream & st, QString & warning) const { nl_indent(st); st << "to "; dest->save(st, TRUE, warning); nl_indent(st); #ifdef FORCE_INT_COORD // note : << float bugged in Qt 3.3.3 st << "yz " << (int) y() << " " << (int) z(); #else QString sy, sz; st << "yz " << sy.setNum(y()) << " " << sz.setNum(z()); #endif if (msg != 0) { // not a lost, dest is duration if (msg->deletedp()) { warning += QString("<b>") + the_canvas()->browser_diagram()->full_name() + "</b> reference the class <b>" + ((SdDurationCanvas *) dest)->get_line()->get_obj()->get_class()->full_name() + "</b> deleted operation <b>" + msg->definition(TRUE, FALSE) + "</b><br>\n<br>\n"; if (warning[0] == '!') { st << " msg "; msg->save(st, TRUE, warning); } else { st << " explicitmsg "; save_string(msg->get_browser_node()->get_name(), st); } } else { st << " msg "; msg->save(st, TRUE, warning); } } else if (! explicit_msg.isEmpty()) { st << " explicitmsg "; save_string(explicit_msg, st); } else st << " unspecifiedmsg"; if (stereotype != 0) { nl_indent(st); st << "stereotype "; save_string(stereotype->get_name(), st); save_xyz(st, stereotype, " xyz"); } nl_indent(st); st << "show_full_operations_definition " << stringify(show_full_oper) << " drawing_language " << stringify(drawing_language) << " show_context_mode " << stringify(show_context_mode); if (label != 0) { if (! args.isEmpty()) { nl_indent(st); st << "args "; save_string(args, st); } nl_indent(st); save_xy(st, label, "label_xy"); } }

int main(int argc, char *argv[]) { int ierr = 0, forierr = 0; bool debug = false; #ifdef EPETRA_MPI // Initialize MPI MPI_Init(&argc,&argv); int rank; // My process ID MPI_Comm_rank(MPI_COMM_WORLD, &rank); Epetra_MpiComm Comm( MPI_COMM_WORLD ); #else int rank = 0; Epetra_SerialComm Comm; #endif bool verbose = false; // Check if we should print results to standard out if (argc>1) if (argv[1][0]=='-' && argv[1][1]=='v') verbose = true; int verbose_int = verbose ? 1 : 0; Comm.Broadcast(&verbose_int, 1, 0); verbose = verbose_int==1 ? true : false; // char tmp; // if (rank==0) cout << "Press any key to continue..."<< std::endl; // if (rank==0) cin >> tmp; // Comm.Barrier(); Comm.SetTracebackMode(0); // This should shut down any error traceback reporting int MyPID = Comm.MyPID(); int NumProc = Comm.NumProc(); if(verbose && MyPID==0) cout << Epetra_Version() << std::endl << std::endl; if (verbose) cout << "Processor "<<MyPID<<" of "<< NumProc << " is alive."<<endl; bool verbose1 = verbose; // Redefine verbose to only print on PE 0 if(verbose && rank!=0) verbose = false; int NumMyEquations = 10000; int NumGlobalEquations = (NumMyEquations * NumProc) + EPETRA_MIN(NumProc,3); if(MyPID < 3) NumMyEquations++; // Construct a Map that puts approximately the same Number of equations on each processor Epetra_Map Map(NumGlobalEquations, NumMyEquations, 0, Comm); // Get update list and number of local equations from newly created Map int* MyGlobalElements = new int[Map.NumMyElements()]; Map.MyGlobalElements(MyGlobalElements); // Create an integer vector NumNz that is used to build the Petra Matrix. // NumNz[i] is the Number of OFF-DIAGONAL term for the ith global equation on this processor int* NumNz = new int[NumMyEquations]; // We are building a tridiagonal matrix where each row has (-1 2 -1) // So we need 2 off-diagonal terms (except for the first and last equation) for (int i = 0; i < NumMyEquations; i++) if((MyGlobalElements[i] == 0) || (MyGlobalElements[i] == NumGlobalEquations - 1)) NumNz[i] = 1; else NumNz[i] = 2; // Create a Epetra_Matrix Epetra_CrsMatrix A(Copy, Map, NumNz); EPETRA_TEST_ERR(A.IndicesAreGlobal(),ierr); EPETRA_TEST_ERR(A.IndicesAreLocal(),ierr); // Add rows one-at-a-time // Need some vectors to help // Off diagonal Values will always be -1 double* Values = new double[2]; Values[0] = -1.0; Values[1] = -1.0; int* Indices = new int[2]; double two = 2.0; int NumEntries; forierr = 0; for (int i = 0; i < NumMyEquations; i++) { if(MyGlobalElements[i] == 0) { Indices[0] = 1; NumEntries = 1; } else if (MyGlobalElements[i] == NumGlobalEquations-1) { Indices[0] = NumGlobalEquations-2; NumEntries = 1; } else { Indices[0] = MyGlobalElements[i]-1; Indices[1] = MyGlobalElements[i]+1; NumEntries = 2; } forierr += !(A.InsertGlobalValues(MyGlobalElements[i], NumEntries, Values, Indices)==0); forierr += !(A.InsertGlobalValues(MyGlobalElements[i], 1, &two, MyGlobalElements+i)>0); // Put in the diagonal entry } EPETRA_TEST_ERR(forierr,ierr); int * indexOffsetTmp; int * indicesTmp; double * valuesTmp; // Finish up EPETRA_TEST_ERR(!(A.IndicesAreGlobal()),ierr); EPETRA_TEST_ERR(!(A.ExtractCrsDataPointers(indexOffsetTmp, indicesTmp, valuesTmp)==-1),ierr); // Should fail EPETRA_TEST_ERR(!(A.FillComplete(false)==0),ierr); EPETRA_TEST_ERR(!(A.ExtractCrsDataPointers(indexOffsetTmp, indicesTmp, valuesTmp)==-1),ierr); // Should fail EPETRA_TEST_ERR(!(A.IndicesAreLocal()),ierr); EPETRA_TEST_ERR(A.StorageOptimized(),ierr); A.OptimizeStorage(); EPETRA_TEST_ERR(!(A.StorageOptimized()),ierr); EPETRA_TEST_ERR(!(A.ExtractCrsDataPointers(indexOffsetTmp, indicesTmp, valuesTmp)==0),ierr); // Should succeed const Epetra_CrsGraph & GofA = A.Graph(); EPETRA_TEST_ERR((indicesTmp!=GofA[0] || valuesTmp!=A[0]),ierr); // Extra check to see if operator[] is consistent EPETRA_TEST_ERR(A.UpperTriangular(),ierr); EPETRA_TEST_ERR(A.LowerTriangular(),ierr); int NumMyNonzeros = 3 * NumMyEquations; if(A.LRID(0) >= 0) NumMyNonzeros--; // If I own first global row, then there is one less nonzero if(A.LRID(NumGlobalEquations-1) >= 0) NumMyNonzeros--; // If I own last global row, then there is one less nonzero EPETRA_TEST_ERR(check(A, NumMyEquations, NumGlobalEquations, NumMyNonzeros, 3*NumGlobalEquations-2, MyGlobalElements, verbose),ierr); forierr = 0; for (int i = 0; i < NumMyEquations; i++) forierr += !(A.NumGlobalEntries(MyGlobalElements[i])==NumNz[i]+1); EPETRA_TEST_ERR(forierr,ierr); forierr = 0; for (int i = 0; i < NumMyEquations; i++) forierr += !(A.NumMyEntries(i)==NumNz[i]+1); EPETRA_TEST_ERR(forierr,ierr); if (verbose) cout << "\n\nNumEntries function check OK" << std::endl<< std::endl; EPETRA_TEST_ERR(check_graph_sharing(Comm),ierr); // Create vectors for Power method Epetra_Vector q(Map); Epetra_Vector z(Map); Epetra_Vector resid(Map); // variable needed for iteration double lambda = 0.0; // int niters = 10000; int niters = 200; double tolerance = 1.0e-1; ///////////////////////////////////////////////////////////////////////////////////////////////// // Iterate Epetra_Flops flopcounter; A.SetFlopCounter(flopcounter); q.SetFlopCounter(A); z.SetFlopCounter(A); resid.SetFlopCounter(A); Epetra_Time timer(Comm); EPETRA_TEST_ERR(power_method(false, A, q, z, resid, &lambda, niters, tolerance, verbose),ierr); double elapsed_time = timer.ElapsedTime(); double total_flops = A.Flops() + q.Flops() + z.Flops() + resid.Flops(); double MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "\n\nTotal MFLOPs for first solve = " << MFLOPs << std::endl<< std::endl; ///////////////////////////////////////////////////////////////////////////////////////////////// // Solve transpose problem if (verbose) cout << "\n\nUsing transpose of matrix and solving again (should give same result).\n\n" << std::endl; // Iterate lambda = 0.0; flopcounter.ResetFlops(); timer.ResetStartTime(); EPETRA_TEST_ERR(power_method(true, A, q, z, resid, &lambda, niters, tolerance, verbose),ierr); elapsed_time = timer.ElapsedTime(); total_flops = A.Flops() + q.Flops() + z.Flops() + resid.Flops(); MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "\n\nTotal MFLOPs for transpose solve = " << MFLOPs << std::endl<< endl; ///////////////////////////////////////////////////////////////////////////////////////////////// // Increase diagonal dominance if (verbose) cout << "\n\nIncreasing the magnitude of first diagonal term and solving again\n\n" << endl; if (A.MyGlobalRow(0)) { int numvals = A.NumGlobalEntries(0); double * Rowvals = new double [numvals]; int * Rowinds = new int [numvals]; A.ExtractGlobalRowCopy(0, numvals, numvals, Rowvals, Rowinds); // Get A[0,0] for (int i=0; i<numvals; i++) if (Rowinds[i] == 0) Rowvals[i] *= 10.0; A.ReplaceGlobalValues(0, numvals, Rowvals, Rowinds); delete [] Rowvals; delete [] Rowinds; } // Iterate (again) lambda = 0.0; flopcounter.ResetFlops(); timer.ResetStartTime(); EPETRA_TEST_ERR(power_method(false, A, q, z, resid, &lambda, niters, tolerance, verbose),ierr); elapsed_time = timer.ElapsedTime(); total_flops = A.Flops() + q.Flops() + z.Flops() + resid.Flops(); MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "\n\nTotal MFLOPs for second solve = " << MFLOPs << endl<< endl; ///////////////////////////////////////////////////////////////////////////////////////////////// // Solve transpose problem if (verbose) cout << "\n\nUsing transpose of matrix and solving again (should give same result).\n\n" << endl; // Iterate (again) lambda = 0.0; flopcounter.ResetFlops(); timer.ResetStartTime(); EPETRA_TEST_ERR(power_method(true, A, q, z, resid, &lambda, niters, tolerance, verbose),ierr); elapsed_time = timer.ElapsedTime(); total_flops = A.Flops() + q.Flops() + z.Flops() + resid.Flops(); MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "\n\nTotal MFLOPs for tranpose of second solve = " << MFLOPs << endl<< endl; if (verbose) cout << "\n\n*****Testing constant entry constructor" << endl<< endl; Epetra_CrsMatrix AA(Copy, Map, 5); if (debug) Comm.Barrier(); double dble_one = 1.0; for (int i=0; i< NumMyEquations; i++) AA.InsertGlobalValues(MyGlobalElements[i], 1, &dble_one, MyGlobalElements+i); // Note: All processors will call the following Insert routines, but only the processor // that owns it will actually do anything int One = 1; if (AA.MyGlobalRow(0)) { EPETRA_TEST_ERR(!(AA.InsertGlobalValues(0, 0, &dble_one, &One)==0),ierr); } else EPETRA_TEST_ERR(!(AA.InsertGlobalValues(0, 1, &dble_one, &One)==-1),ierr); EPETRA_TEST_ERR(!(AA.FillComplete(false)==0),ierr); EPETRA_TEST_ERR(AA.StorageOptimized(),ierr); EPETRA_TEST_ERR(!(AA.UpperTriangular()),ierr); EPETRA_TEST_ERR(!(AA.LowerTriangular()),ierr); if (debug) Comm.Barrier(); EPETRA_TEST_ERR(check(AA, NumMyEquations, NumGlobalEquations, NumMyEquations, NumGlobalEquations, MyGlobalElements, verbose),ierr); if (debug) Comm.Barrier(); forierr = 0; for (int i=0; i<NumMyEquations; i++) forierr += !(AA.NumGlobalEntries(MyGlobalElements[i])==1); EPETRA_TEST_ERR(forierr,ierr); if (verbose) cout << "\n\nNumEntries function check OK" << endl<< endl; if (debug) Comm.Barrier(); if (verbose) cout << "\n\n*****Testing copy constructor" << endl<< endl; Epetra_CrsMatrix B(AA); EPETRA_TEST_ERR(check(B, NumMyEquations, NumGlobalEquations, NumMyEquations, NumGlobalEquations, MyGlobalElements, verbose),ierr); forierr = 0; for (int i=0; i<NumMyEquations; i++) forierr += !(B.NumGlobalEntries(MyGlobalElements[i])==1); EPETRA_TEST_ERR(forierr,ierr); if (verbose) cout << "\n\nNumEntries function check OK" << endl<< endl; if (debug) Comm.Barrier(); if (verbose) cout << "\n\n*****Testing local view constructor" << endl<< endl; Epetra_CrsMatrix BV(View, AA.RowMap(), AA.ColMap(), 0); forierr = 0; int* Inds; double* Vals; for (int i = 0; i < NumMyEquations; i++) { forierr += !(AA.ExtractMyRowView(i, NumEntries, Vals, Inds)==0); forierr += !(BV.InsertMyValues(i, NumEntries, Vals, Inds)==0); } BV.FillComplete(false); EPETRA_TEST_ERR(check(BV, NumMyEquations, NumGlobalEquations, NumMyEquations, NumGlobalEquations, MyGlobalElements, verbose),ierr); forierr = 0; for (int i=0; i<NumMyEquations; i++) forierr += !(BV.NumGlobalEntries(MyGlobalElements[i])==1); EPETRA_TEST_ERR(forierr,ierr); if (verbose) cout << "\n\nNumEntries function check OK" << endl<< endl; if (debug) Comm.Barrier(); if (verbose) cout << "\n\n*****Testing post construction modifications" << endl<< endl; EPETRA_TEST_ERR(!(B.InsertGlobalValues(0, 1, &dble_one, &One)==-2),ierr); // Release all objects delete [] NumNz; delete [] Values; delete [] Indices; delete [] MyGlobalElements; if (verbose1) { // Test ostream << operator (if verbose1) // Construct a Map that puts 2 equations on each PE int NumMyElements1 = 2; int NumMyEquations1 = NumMyElements1; int NumGlobalEquations1 = NumMyEquations1*NumProc; Epetra_Map Map1(-1, NumMyElements1, 0, Comm); // Get update list and number of local equations from newly created Map int * MyGlobalElements1 = new int[Map1.NumMyElements()]; Map1.MyGlobalElements(MyGlobalElements1); // Create an integer vector NumNz that is used to build the Petra Matrix. // NumNz[i] is the Number of OFF-DIAGONAL term for the ith global equation on this processor int * NumNz1 = new int[NumMyEquations1]; // We are building a tridiagonal matrix where each row has (-1 2 -1) // So we need 2 off-diagonal terms (except for the first and last equation) for (int i=0; i<NumMyEquations1; i++) if (MyGlobalElements1[i]==0 || MyGlobalElements1[i] == NumGlobalEquations1-1) NumNz1[i] = 1; else NumNz1[i] = 2; // Create a Epetra_Matrix Epetra_CrsMatrix A1(Copy, Map1, NumNz1); // Add rows one-at-a-time // Need some vectors to help // Off diagonal Values will always be -1 double *Values1 = new double[2]; Values1[0] = -1.0; Values1[1] = -1.0; int *Indices1 = new int[2]; double two1 = 2.0; int NumEntries1; forierr = 0; for (int i=0; i<NumMyEquations1; i++) { if (MyGlobalElements1[i]==0) { Indices1[0] = 1; NumEntries1 = 1; } else if (MyGlobalElements1[i] == NumGlobalEquations1-1) { Indices1[0] = NumGlobalEquations1-2; NumEntries1 = 1; } else { Indices1[0] = MyGlobalElements1[i]-1; Indices1[1] = MyGlobalElements1[i]+1; NumEntries1 = 2; } forierr += !(A1.InsertGlobalValues(MyGlobalElements1[i], NumEntries1, Values1, Indices1)==0); forierr += !(A1.InsertGlobalValues(MyGlobalElements1[i], 1, &two1, MyGlobalElements1+i)>0); // Put in the diagonal entry } EPETRA_TEST_ERR(forierr,ierr); delete [] Indices1; delete [] Values1; // Finish up EPETRA_TEST_ERR(!(A1.FillComplete(false)==0),ierr); // Test diagonal extraction function Epetra_Vector checkDiag(Map1); EPETRA_TEST_ERR(!(A1.ExtractDiagonalCopy(checkDiag)==0),ierr); forierr = 0; for (int i=0; i<NumMyEquations1; i++) forierr += !(checkDiag[i]==two1); EPETRA_TEST_ERR(forierr,ierr); // Test diagonal replacement method forierr = 0; for (int i=0; i<NumMyEquations1; i++) checkDiag[i]=two1*two1; EPETRA_TEST_ERR(forierr,ierr); EPETRA_TEST_ERR(!(A1.ReplaceDiagonalValues(checkDiag)==0),ierr); Epetra_Vector checkDiag1(Map1); EPETRA_TEST_ERR(!(A1.ExtractDiagonalCopy(checkDiag1)==0),ierr); forierr = 0; for (int i=0; i<NumMyEquations1; i++) forierr += !(checkDiag[i]==checkDiag1[i]); EPETRA_TEST_ERR(forierr,ierr); if (verbose) cout << "\n\nDiagonal extraction and replacement OK.\n\n" << endl; double orignorm = A1.NormOne(); EPETRA_TEST_ERR(!(A1.Scale(4.0)==0),ierr); EPETRA_TEST_ERR(!(A1.NormOne()!=orignorm),ierr); if (verbose) cout << "\n\nMatrix scale OK.\n\n" << endl; if (verbose) cout << "\n\nPrint out tridiagonal matrix, each part on each processor.\n\n" << endl; cout << A1 << endl; // Release all objects delete [] NumNz1; delete [] MyGlobalElements1; } if (verbose) cout << "\n\n*****Testing LeftScale and RightScale" << endl << endl; int NumMyElements2 = 7; int NumMyRows2 = 1;//This value should not be changed without editing the // code below. Epetra_Map RowMap(-1,NumMyRows2,0,Comm); Epetra_Map ColMap(NumMyElements2,NumMyElements2,0,Comm); // The DomainMap needs to be different from the ColMap for the test to // be meaningful. Epetra_Map DomainMap(NumMyElements2,0,Comm); int NumMyRangeElements2 = 0; // We need to distribute the elements differently for the range map also. if (MyPID % 2 == 0) NumMyRangeElements2 = NumMyRows2*2; //put elements on even number procs if (NumProc % 2 == 1 && MyPID == NumProc-1) NumMyRangeElements2 = NumMyRows2; //If number of procs is odd, put // the last NumMyElements2 elements on the last proc Epetra_Map RangeMap(-1,NumMyRangeElements2,0,Comm); Epetra_CrsMatrix A2(Copy,RowMap,ColMap,NumMyElements2); double * Values2 = new double[NumMyElements2]; int * Indices2 = new int[NumMyElements2]; for (int i=0; i<NumMyElements2; i++) { Values2[i] = i+MyPID; Indices2[i]=i; } A2.InsertMyValues(0,NumMyElements2,Values2,Indices2); A2.FillComplete(DomainMap,RangeMap,false); Epetra_CrsMatrix A2copy(A2); double * RowLeftScaleValues = new double[NumMyRows2]; double * ColRightScaleValues = new double[NumMyElements2]; int RowLoopLength = RowMap.MaxMyGID()-RowMap.MinMyGID()+1; for (int i=0; i<RowLoopLength; i++) RowLeftScaleValues[i] = (i + RowMap.MinMyGID() ) % 2 + 1; // For the column map, all procs own all elements for (int i=0; i<NumMyElements2;i++) ColRightScaleValues[i] = i % 2 + 1; int RangeLoopLength = RangeMap.MaxMyGID()-RangeMap.MinMyGID()+1; double * RangeLeftScaleValues = new double[RangeLoopLength]; int DomainLoopLength = DomainMap.MaxMyGID()-DomainMap.MinMyGID()+1; double * DomainRightScaleValues = new double[DomainLoopLength]; for (int i=0; i<RangeLoopLength; i++) RangeLeftScaleValues[i] = 1.0/((i + RangeMap.MinMyGID() ) % 2 + 1); for (int i=0; i<DomainLoopLength;i++) DomainRightScaleValues[i] = 1.0/((i + DomainMap.MinMyGID() ) % 2 + 1); Epetra_Vector xRow(View,RowMap,RowLeftScaleValues); Epetra_Vector xCol(View,ColMap,ColRightScaleValues); Epetra_Vector xRange(View,RangeMap,RangeLeftScaleValues); Epetra_Vector xDomain(View,DomainMap,DomainRightScaleValues); double A2infNorm = A2.NormInf(); double A2oneNorm = A2.NormOne(); if (verbose1) cout << A2; EPETRA_TEST_ERR(A2.LeftScale(xRow),ierr); double A2infNorm1 = A2.NormInf(); double A2oneNorm1 = A2.NormOne(); bool ScalingBroke = false; if (A2infNorm1>2*A2infNorm||A2infNorm1<A2infNorm) { EPETRA_TEST_ERR(-31,ierr); ScalingBroke = true; } if (A2oneNorm1>2*A2oneNorm||A2oneNorm1<A2oneNorm) { EPETRA_TEST_ERR(-32,ierr); ScalingBroke = true; } if (verbose1) cout << A2; EPETRA_TEST_ERR(A2.RightScale(xCol),ierr); double A2infNorm2 = A2.NormInf(); double A2oneNorm2 = A2.NormOne(); if (A2infNorm2>=2*A2infNorm1||A2infNorm2<=A2infNorm1) { EPETRA_TEST_ERR(-33,ierr); ScalingBroke = true; } if (A2oneNorm2>2*A2oneNorm1||A2oneNorm2<=A2oneNorm1) { EPETRA_TEST_ERR(-34,ierr); ScalingBroke = true; } if (verbose1) cout << A2; EPETRA_TEST_ERR(A2.RightScale(xDomain),ierr); double A2infNorm3 = A2.NormInf(); double A2oneNorm3 = A2.NormOne(); // The last two scaling ops cancel each other out if (A2infNorm3!=A2infNorm1) { EPETRA_TEST_ERR(-35,ierr) ScalingBroke = true; } if (A2oneNorm3!=A2oneNorm1) { EPETRA_TEST_ERR(-36,ierr) ScalingBroke = true; } if (verbose1) cout << A2; EPETRA_TEST_ERR(A2.LeftScale(xRange),ierr); double A2infNorm4 = A2.NormInf(); double A2oneNorm4 = A2.NormOne(); // The 4 scaling ops all cancel out if (A2infNorm4!=A2infNorm) { EPETRA_TEST_ERR(-37,ierr) ScalingBroke = true; } if (A2oneNorm4!=A2oneNorm) { EPETRA_TEST_ERR(-38,ierr) ScalingBroke = true; } // // Now try changing the values underneath and make sure that // telling one process about the change causes NormInf() and // NormOne() to recompute the norm on all processes. // double *values; int num_my_rows = A2.NumMyRows() ; int num_entries; for ( int i=0 ; i< num_my_rows; i++ ) { EPETRA_TEST_ERR( A2.ExtractMyRowView( i, num_entries, values ), ierr ); for ( int j = 0 ; j <num_entries; j++ ) { values[j] *= 2.0; } } if ( MyPID == 0 ) A2.SumIntoGlobalValues( 0, 0, 0, 0 ) ; double A2infNorm5 = A2.NormInf(); double A2oneNorm5 = A2.NormOne(); if (A2infNorm5!=2.0 * A2infNorm4) { EPETRA_TEST_ERR(-39,ierr) ScalingBroke = true; } if (A2oneNorm5!= 2.0 * A2oneNorm4) { EPETRA_TEST_ERR(-40,ierr) ScalingBroke = true; } // // Restore the values underneath // for ( int i=0 ; i< num_my_rows; i++ ) { EPETRA_TEST_ERR( A2.ExtractMyRowView( i, num_entries, values ), ierr ); for ( int j = 0 ; j <num_entries; j++ ) { values[j] /= 2.0; } } if (verbose1) cout << A2; if (ScalingBroke) { if (verbose) cout << endl << "LeftScale and RightScale tests FAILED" << endl << endl; } else { if (verbose) cout << endl << "LeftScale and RightScale tests PASSED" << endl << endl; } Comm.Barrier(); if (verbose) cout << "\n\n*****Testing InvRowMaxs and InvColMaxs" << endl << endl; if (verbose1) cout << A2 << endl; EPETRA_TEST_ERR(A2.InvRowMaxs(xRow),ierr); EPETRA_TEST_ERR(A2.InvRowMaxs(xRange),ierr); if (verbose1) cout << xRow << endl << xRange << endl; if (verbose) cout << "\n\n*****Testing InvRowSums and InvColSums" << endl << endl; bool InvSumsBroke = false; // Works! EPETRA_TEST_ERR(A2.InvRowSums(xRow),ierr); if (verbose1) cout << xRow; EPETRA_TEST_ERR(A2.LeftScale(xRow),ierr); float A2infNormFloat = A2.NormInf(); if (verbose1) cout << A2 << endl; if (fabs(1.0-A2infNormFloat) > 1.e-5) { EPETRA_TEST_ERR(-41,ierr); InvSumsBroke = true; } // Works int expectedcode = 1; if (Comm.NumProc()>1) expectedcode = 0; EPETRA_TEST_ERR(!(A2.InvColSums(xDomain)==expectedcode),ierr); // This matrix has a single row, the first column has a zero, so a warning is issued. if (verbose1) cout << xDomain << endl; EPETRA_TEST_ERR(A2.RightScale(xDomain),ierr); float A2oneNormFloat2 = A2.NormOne(); if (verbose1) cout << A2; if (fabs(1.0-A2oneNormFloat2)>1.e-5) { EPETRA_TEST_ERR(-42,ierr) InvSumsBroke = true; } // Works! EPETRA_TEST_ERR(A2.InvRowSums(xRange),ierr); if (verbose1) cout << xRange; EPETRA_TEST_ERR(A2.LeftScale(xRange),ierr); float A2infNormFloat2 = A2.NormInf(); // We use a float so that rounding error // will not prevent the sum from being 1.0. if (verbose1) cout << A2; if (fabs(1.0-A2infNormFloat2)>1.e-5) { cout << "InfNorm should be = 1, but InfNorm = " << A2infNormFloat2 << endl; EPETRA_TEST_ERR(-43,ierr); InvSumsBroke = true; } // Doesn't work - may not need this test because column ownership is not unique /* EPETRA_TEST_ERR(A2.InvColSums(xCol),ierr); cout << xCol; EPETRA_TEST_ERR(A2.RightScale(xCol),ierr); float A2oneNormFloat = A2.NormOne(); cout << A2; if (fabs(1.0-A2oneNormFloat)>1.e-5) { EPETRA_TEST_ERR(-44,ierr); InvSumsBroke = true; } */ delete [] ColRightScaleValues; delete [] DomainRightScaleValues; if (verbose) cout << "Begin partial sum testing." << endl; // Test with a matrix that has partial sums for a subset of the rows // on multiple processors. (Except for the serial case, of course.) int NumMyRows3 = 2; // Changing this requires further changes below int * myGlobalElements = new int[NumMyRows3]; for (int i=0; i<NumMyRows3; i++) myGlobalElements[i] = MyPID+i; Epetra_Map RowMap3(NumProc*2, NumMyRows3, myGlobalElements, 0, Comm); int NumMyElements3 = 5; Epetra_CrsMatrix A3(Copy, RowMap3, NumMyElements3); double * Values3 = new double[NumMyElements3]; int * Indices3 = new int[NumMyElements3]; for (int i=0; i < NumMyElements3; i++) { Values3[i] = (int) (MyPID + (i+1)); Indices3[i]=i; } for (int i=0; i<NumMyRows3; i++) { A3.InsertGlobalValues(myGlobalElements[i],NumMyElements3,Values3,Indices3); } Epetra_Map RangeMap3(NumProc+1, 0, Comm); Epetra_Map DomainMap3(NumMyElements3, 0, Comm); EPETRA_TEST_ERR(A3.FillComplete(DomainMap3, RangeMap3,false),ierr); if (verbose1) cout << A3; Epetra_Vector xRange3(RangeMap3,false); Epetra_Vector xDomain3(DomainMap3,false); EPETRA_TEST_ERR(A3.InvRowSums(xRange3),ierr); if (verbose1) cout << xRange3; EPETRA_TEST_ERR(A3.LeftScale(xRange3),ierr); float A3infNormFloat = A3.NormInf(); if (verbose1) cout << A3; if (1.0!=A3infNormFloat) { cout << "InfNorm should be = 1, but InfNorm = " << A3infNormFloat <<endl; EPETRA_TEST_ERR(-61,ierr); InvSumsBroke = true; } // we want to take the transpose of our matrix and fill in different values. int NumMyColumns3 = NumMyRows3; Epetra_Map ColMap3cm(RowMap3); Epetra_Map RowMap3cm(A3.ColMap()); Epetra_CrsMatrix A3cm(Copy,RowMap3cm,ColMap3cm,NumProc+1); double *Values3cm = new double[NumMyColumns3]; int * Indices3cm = new int[NumMyColumns3]; for (int i=0; i<NumMyColumns3; i++) { Values3cm[i] = MyPID + i + 1; Indices3cm[i]= i + MyPID; } for (int ii=0; ii<NumMyElements3; ii++) { A3cm.InsertGlobalValues(ii, NumMyColumns3, Values3cm, Indices3cm); } // The DomainMap and the RangeMap from the last test will work fine for // the RangeMap and DomainMap, respectively, but I will make copies to // avaoid confusion when passing what looks like a DomainMap where we // need a RangeMap and vice vera. Epetra_Map RangeMap3cm(DomainMap3); Epetra_Map DomainMap3cm(RangeMap3); EPETRA_TEST_ERR(A3cm.FillComplete(DomainMap3cm,RangeMap3cm),ierr); if (verbose1) cout << A3cm << endl; // Again, we can copy objects from the last example. //Epetra_Vector xRange3cm(xDomain3); //Don't use at this time Epetra_Vector xDomain3cm(DomainMap3cm,false); EPETRA_TEST_ERR(A3cm.InvColSums(xDomain3cm),ierr); if (verbose1) cout << xDomain3cm << endl; EPETRA_TEST_ERR(A3cm.RightScale(xDomain3cm),ierr); float A3cmOneNormFloat = A3cm.NormOne(); if (verbose1) cout << A3cm << endl; if (1.0!=A3cmOneNormFloat) { cout << "OneNorm should be = 1, but OneNorm = " << A3cmOneNormFloat << endl; EPETRA_TEST_ERR(-62,ierr); InvSumsBroke = true; } if (verbose) cout << "End partial sum testing" << endl; if (verbose) cout << "Begin replicated testing" << endl; // We will now view the shared row as a repliated row, rather than one // that has partial sums of its entries on mulitple processors. // We will reuse much of the data used for the partial sum tesitng. Epetra_Vector xRow3(RowMap3,false); Epetra_CrsMatrix A4(Copy, RowMap3, NumMyElements3); for (int ii=0; ii < NumMyElements3; ii++) { Values3[ii] = (int)((ii*.6)+1.0); } for (int ii=0; ii<NumMyRows3; ii++) { A4.InsertGlobalValues(myGlobalElements[ii],NumMyElements3,Values3,Indices3); } EPETRA_TEST_ERR(A4.FillComplete(DomainMap3, RangeMap3,false),ierr); if (verbose1) cout << A4 << endl; // The next two lines should be expanded into a verifiable test. EPETRA_TEST_ERR(A4.InvRowMaxs(xRow3),ierr); EPETRA_TEST_ERR(A4.InvRowMaxs(xRange3),ierr); if (verbose1) cout << xRow3 << xRange3; EPETRA_TEST_ERR(A4.InvRowSums(xRow3),ierr); if (verbose1) cout << xRow3; EPETRA_TEST_ERR(A4.LeftScale(xRow3),ierr); float A4infNormFloat = A4.NormInf(); if (verbose1) cout << A4; if (2.0!=A4infNormFloat && NumProc != 1) { if (verbose1) cout << "InfNorm should be = 2 (because one column is replicated on two processors and NormOne() does not handle replication), but InfNorm = " << A4infNormFloat <<endl; EPETRA_TEST_ERR(-63,ierr); InvSumsBroke = true; } else if (1.0!=A4infNormFloat && NumProc == 1) { if (verbose1) cout << "InfNorm should be = 1, but InfNorm = " << A4infNormFloat <<endl; EPETRA_TEST_ERR(-63,ierr); InvSumsBroke = true; } Epetra_Vector xCol3cm(ColMap3cm,false); Epetra_CrsMatrix A4cm(Copy, RowMap3cm, ColMap3cm, NumProc+1); //Use values from A3cm for (int ii=0; ii<NumMyElements3; ii++) { A4cm.InsertGlobalValues(ii,NumMyColumns3,Values3cm,Indices3cm); } EPETRA_TEST_ERR(A4cm.FillComplete(DomainMap3cm, RangeMap3cm,false),ierr); if (verbose1) cout << A4cm << endl; // The next two lines should be expanded into a verifiable test. EPETRA_TEST_ERR(A4cm.InvColMaxs(xCol3cm),ierr); EPETRA_TEST_ERR(A4cm.InvColMaxs(xDomain3cm),ierr); if (verbose1) cout << xCol3cm << xDomain3cm; EPETRA_TEST_ERR(A4cm.InvColSums(xCol3cm),ierr); if (verbose1) cout << xCol3cm << endl; EPETRA_TEST_ERR(A4cm.RightScale(xCol3cm),ierr); float A4cmOneNormFloat = A4cm.NormOne(); if (verbose1) cout << A4cm << endl; if (2.0!=A4cmOneNormFloat && NumProc != 1) { if (verbose1) cout << "OneNorm should be = 2 (because one column is replicated on two processors and NormOne() does not handle replication), but OneNorm = " << A4cmOneNormFloat << endl; EPETRA_TEST_ERR(-64,ierr); InvSumsBroke = true; } else if (1.0!=A4cmOneNormFloat && NumProc == 1) { if (verbose1) cout << "OneNorm should be = 1, but OneNorm = " << A4infNormFloat <<endl; EPETRA_TEST_ERR(-64,ierr); InvSumsBroke = true; } if (verbose) cout << "End replicated testing" << endl; if (InvSumsBroke) { if (verbose) cout << endl << "InvRowSums tests FAILED" << endl << endl; } else if (verbose) cout << endl << "InvRowSums tests PASSED" << endl << endl; A3cm.PutScalar(2.0); int nnz_A3cm = A3cm.Graph().NumGlobalNonzeros(); double check_frobnorm = sqrt(nnz_A3cm*4.0); double frobnorm = A3cm.NormFrobenius(); bool frobnorm_test_failed = false; if (fabs(check_frobnorm-frobnorm) > 5.e-5) { frobnorm_test_failed = true; } if (frobnorm_test_failed) { if (verbose) std::cout << "Frobenius-norm test FAILED."<<std::endl; EPETRA_TEST_ERR(-65, ierr); } // Subcommunicator test - only processor 1 has unknowns { int rv=0; int NumMyRows = (MyPID==0) ? NumGlobalEquations : 0; Epetra_Map Map1(-1,NumMyRows, 0, Comm); Epetra_CrsMatrix *A1 = new Epetra_CrsMatrix(Copy, Map1,0); double value = 1.0; for(int i=0; i<NumMyRows; i++) { int GID = Map1.GID(i); EPETRA_TEST_ERR(A1->InsertGlobalValues(GID, 1,&value,&GID),ierr); } EPETRA_TEST_ERR(A1->FillComplete(),ierr); Epetra_BlockMap *Map2 = Map1.RemoveEmptyProcesses(); rv=A1->RemoveEmptyProcessesInPlace(Map2); if(rv!=0) { if (verbose) std::cout << "Subcommunicator test FAILED."<<std::endl; EPETRA_TEST_ERR(-66, ierr); } delete Map2; } delete [] Values2; delete [] Indices2; delete [] myGlobalElements; delete [] Values3; delete [] Indices3; delete [] Values3cm; delete [] Indices3cm; delete [] RangeLeftScaleValues; delete [] RowLeftScaleValues; #ifdef EPETRA_MPI MPI_Finalize() ; #endif /* end main */ return ierr ; }

double leekesler::sdep() { double tr = T/Tcr; return tr*I() + J() - log(z()); }

TEST(McmcDenseEMetric, gradients) { rng_t base_rng(0); Eigen::VectorXd q = Eigen::VectorXd::Ones(11); stan::mcmc::dense_e_point z(q.size()); z.q = q; z.p.setOnes(); std::fstream data_stream(std::string("").c_str(), std::fstream::in); stan::io::dump data_var_context(data_stream); data_stream.close(); funnel_namespace::funnel model(data_var_context, &std::cout); stan::mcmc::dense_e_metric<funnel_namespace::funnel, rng_t> metric(model, &std::cout); double epsilon = 1e-6; metric.update(z); Eigen::VectorXd g1 = metric.dtau_dq(z); for (int i = 0; i < z.q.size(); ++i) { double delta = 0; z.q(i) += epsilon; metric.update(z); delta += metric.tau(z); z.q(i) -= 2 * epsilon; metric.update(z); delta -= metric.tau(z); z.q(i) += epsilon; metric.update(z); delta /= 2 * epsilon; EXPECT_NEAR(delta, g1(i), epsilon); } Eigen::VectorXd g2 = metric.dtau_dp(z); for (int i = 0; i < z.q.size(); ++i) { double delta = 0; z.p(i) += epsilon; delta += metric.tau(z); z.p(i) -= 2 * epsilon; delta -= metric.tau(z); z.p(i) += epsilon; delta /= 2 * epsilon; EXPECT_NEAR(delta, g2(i), epsilon); } Eigen::VectorXd g3 = metric.dphi_dq(z); for (int i = 0; i < z.q.size(); ++i) { double delta = 0; z.q(i) += epsilon; metric.update(z); delta += metric.phi(z); z.q(i) -= 2 * epsilon; metric.update(z); delta -= metric.phi(z); z.q(i) += epsilon; metric.update(z); delta /= 2 * epsilon; EXPECT_NEAR(delta, g3(i), epsilon); } }

int main (int argc, char **argv) { #if defined(_DIST_) CnC::dist_cnc_init< HeatEquation_bl_context > AAA; #endif // CnC::debug::set_num_threads(1); tbb::tick_count t0 = tbb::tick_count::now(); if( argc < 3 ) { std::cerr << "expecting 2 arguments: <file> <blocksize>\n"; exit( 1 ); } global_data d; int& block_size = d.block_size; int& Nx = d.Nx; int& Nt = d.Nt; int& N_bl = d.N_bl; double& Xa = d.Xa; double& Xb = d.Xb; double& T0 = d.T0; double& T1 = d.T1; double& k = d.k; double& hx = d.hx; double& ht = d.ht; block_size = atoi(argv[2]); if(block_size < 1) { std::cerr<<"Bad block size\n"; return 0; } { std::ifstream from(argv[1]); if( ! from ) { std::cerr << "couldn't open " << argv[1] << std::endl; exit( 2 ); } from >> d.Xa >> d.Xb >> d.Nx >> d.T0 >> d.T1 >> d.Nt >> d.k; from.close(); } if( block_size > Nx+1 ) { block_size = Nx+1; } else { Nx = ((Nx+1+block_size-1)/block_size)*block_size-1; } HeatEquation_bl_context c; hx = ( Xb - Xa ) / Nx; ht = ( T1 - T0 ) / Nt; // for ( int i = 0; i < Nx + 1; i++ ) { c.X.put(i,Xa+i*hx); } // for ( int i = 0; i < Nt + 1; i++ ) { c.T.put(i,T0+i*ht); } N_bl = (Nx+1) / block_size; c.gd.put(0,d); for ( int i = 0; i < N_bl; i++ ) { my_array<double> z(block_size); for(int j = 0; j < block_size; j++ ){ double x = calc_x(block_size*i+j,Xa,Xb,Nx); z.data[j] = Analitical( x, T0 ); } c.H.put(Pair(0,i),z); } Pair p; p.It = 1; for ( int j = 0; j < N_bl; j++ ) { p.Jx = j; c.Tag.put( p ); } // Wait for all steps to finish c.wait(); tbb::tick_count t1 = tbb::tick_count::now(); std::cout<<"Time taken: "<< (t1-t0).seconds()<<" \n"; if (argc >= 4){ for (int i = 0; i <= Nt; i++) { for (int j = 0; j < N_bl; j++){ my_array<double> z; c.H.get(Pair(i,j),z); for (int k = j*block_size; k < j*block_size+block_size && k <= Nx; k++){ printf("%.6lf ",double(z.data[k-j*block_size])); } } puts(""); } } Eo(N_bl); Eo(Nt); return 0; }

int main(int argc, char *argv[]) { Teuchos::GlobalMPISession mpiSession(&argc, &argv); // This little trick lets us print to std::cout only if a (dummy) command-line argument is provided. int iprint = argc - 1; Teuchos::RCP<std::ostream> outStream; Teuchos::oblackholestream bhs; // outputs nothing if (iprint > 0) outStream = Teuchos::rcp(&std::cout, false); else outStream = Teuchos::rcp(&bhs, false); int errorFlag = 0; // *** Test body. try { std::string filename = "input.xml"; Teuchos::RCP<Teuchos::ParameterList> parlist = Teuchos::rcp( new Teuchos::ParameterList() ); Teuchos::updateParametersFromXmlFile( filename, parlist.ptr() ); parlist->sublist("General").set("Inexact Hessian-Times-A-Vector",true); #if USE_HESSVEC parlist->sublist("General").set("Inexact Hessian-Times-A-Vector",false); #endif // Define Status Test Teuchos::RCP<ROL::StatusTest<RealT> > status = Teuchos::rcp(new ROL::StatusTest<RealT>(*parlist)); *outStream << "\n\n" << ROL::ETestObjectivesToString(ROL::TESTOBJECTIVES_ROSENBROCK) << "\n\n"; // Initial Guess Vector Teuchos::RCP<std::vector<RealT> > x0_rcp = Teuchos::rcp( new std::vector<RealT> ); ROL::StdVector<RealT> x0(x0_rcp); // Exact Solution Vector Teuchos::RCP<std::vector<RealT> > z_rcp = Teuchos::rcp( new std::vector<RealT> ); ROL::StdVector<RealT> z(z_rcp); // Get Objective Function Teuchos::RCP<ROL::Objective<RealT> > obj = Teuchos::null; ROL::getTestObjectives<RealT>(obj,x0,z,ROL::TESTOBJECTIVES_ROSENBROCK); // Get Dimension of Problem int dim = Teuchos::rcp_const_cast<std::vector<RealT> >( (Teuchos::dyn_cast<ROL::StdVector<RealT> >(x0)).getVector())->size(); parlist->sublist("General").sublist("Krylov").set("Iteration Limit", 2*dim); // Iteration Vector Teuchos::RCP<std::vector<RealT> > x_rcp = Teuchos::rcp( new std::vector<RealT> (dim, 0.0) ); ROL::StdVector<RealT> x(x_rcp); x.set(x0); // Error Vector Teuchos::RCP<std::vector<RealT> > e_rcp = Teuchos::rcp( new std::vector<RealT> (dim, 0.0) ); ROL::StdVector<RealT> e(e_rcp); e.zero(); for ( ROL::EDescent desc = ROL::DESCENT_STEEPEST; desc < ROL::DESCENT_LAST; desc++ ) { parlist->sublist("Step").sublist("Line Search").sublist("Descent Method").set("Type", ROL::EDescentToString(desc)); *outStream << "\n\n" << ROL::EDescentToString(desc) << "\n\n"; for (ROL::ELineSearch ls = ROL::LINESEARCH_BACKTRACKING; ls < ROL::LINESEARCH_USERDEFINED; ls++) { // Define Step parlist->sublist("Step").sublist("Line Search").sublist("Line-Search Method").set("Type",ROL::ELineSearchToString(ls)); Teuchos::RCP<ROL::LineSearchStep<RealT> > step = Teuchos::rcp(new ROL::LineSearchStep<RealT>(*parlist)); // Define Algorithm ROL::Algorithm<RealT> algo(step,status,false); // Run Algorithm x.set(x0); algo.run(x, *obj, true, *outStream); // Compute Error e.set(x); e.axpy(-1.0,z); *outStream << "\nNorm of Error: " << e.norm() << "\n"; //errorFlag += (int)(e.norm() < std::sqrt(ROL::ROL_EPSILON)); } } } catch (std::logic_error err) { *outStream << err.what() << "\n"; errorFlag = -1000; }; // end try if (errorFlag != 0) std::cout << "End Result: TEST FAILED\n"; else std::cout << "End Result: TEST PASSED\n"; return 0; }

// ------------------------------------------------------------ // SFCPartitioner implementation void SFCPartitioner::_do_partition (MeshBase& mesh, const unsigned int n) { libmesh_assert_greater (n, 0); // Check for an easy return if (n == 1) { this->single_partition (mesh); return; } // What to do if the sfcurves library IS NOT present #ifndef LIBMESH_HAVE_SFCURVES libmesh_here(); libMesh::err << "ERROR: The library has been built without" << std::endl << "Space Filling Curve support. Using a linear" << std::endl << "partitioner instead!" << std::endl; LinearPartitioner lp; lp.partition (mesh, n); // What to do if the sfcurves library IS present #else START_LOG("sfc_partition()", "SFCPartitioner"); const unsigned int n_active_elem = mesh.n_active_elem(); const unsigned int n_elem = mesh.n_elem(); // the forward_map maps the active element id // into a contiguous block of indices std::vector<unsigned int> forward_map (n_elem, libMesh::invalid_uint); // the reverse_map maps the contiguous ids back // to active elements std::vector<Elem*> reverse_map (n_active_elem, NULL); int size = static_cast<int>(n_active_elem); std::vector<double> x (size); std::vector<double> y (size); std::vector<double> z (size); std::vector<int> table (size); // We need to map the active element ids into a // contiguous range. { // active_elem_iterator elem_it (mesh.elements_begin()); // const active_elem_iterator elem_end(mesh.elements_end()); MeshBase::element_iterator elem_it = mesh.active_elements_begin(); const MeshBase::element_iterator elem_end = mesh.active_elements_end(); unsigned int el_num = 0; for (; elem_it != elem_end; ++elem_it) { libmesh_assert_less ((*elem_it)->id(), forward_map.size()); libmesh_assert_less (el_num, reverse_map.size()); forward_map[(*elem_it)->id()] = el_num; reverse_map[el_num] = *elem_it; el_num++; } libmesh_assert_equal_to (el_num, n_active_elem); } // Get the centroid for each active element { // const_active_elem_iterator elem_it (mesh.const_elements_begin()); // const const_active_elem_iterator elem_end(mesh.const_elements_end()); MeshBase::element_iterator elem_it = mesh.active_elements_begin(); const MeshBase::element_iterator elem_end = mesh.active_elements_end(); for (; elem_it != elem_end; ++elem_it) { const Elem* elem = *elem_it; libmesh_assert_less (elem->id(), forward_map.size()); const Point p = elem->centroid(); x[forward_map[elem->id()]] = p(0); y[forward_map[elem->id()]] = p(1); z[forward_map[elem->id()]] = p(2); } } // build the space-filling curve if (_sfc_type == "Hilbert") Sfc::hilbert (&x[0], &y[0], &z[0], &size, &table[0]); else if (_sfc_type == "Morton") Sfc::morton (&x[0], &y[0], &z[0], &size, &table[0]); else { libmesh_here(); libMesh::err << "ERROR: Unknown type: " << _sfc_type << std::endl << " Valid types are" << std::endl << " \"Hilbert\"" << std::endl << " \"Morton\"" << std::endl << " " << std::endl << "Proceeding with a Hilbert curve." << std::endl; Sfc::hilbert (&x[0], &y[0], &z[0], &size, &table[0]); } // Assign the partitioning to the active elements { // { // std::ofstream out ("sfc.dat"); // out << "variables=x,y,z" << std::endl; // out << "zone f=point" << std::endl; // for (unsigned int i=0; i<n_active_elem; i++) // out << x[i] << " " // << y[i] << " " // << z[i] << std::endl; // } const unsigned int blksize = (n_active_elem+n-1)/n; for (unsigned int i=0; i<n_active_elem; i++) { libmesh_assert_less (static_cast<unsigned int>(table[i]-1), reverse_map.size()); Elem* elem = reverse_map[table[i]-1]; elem->processor_id() = i/blksize; } } STOP_LOG("sfc_partition()", "SFCPartitioner"); #endif }

void MVertex::writeMSH(FILE *fp, bool binary, bool saveParametric, double scalingFactor) { if(_index < 0) return; // negative index vertices are never saved if(!binary){ fprintf(fp, "%d %.16g %.16g %.16g ", _index, x() * scalingFactor, y() * scalingFactor, z() * scalingFactor); } else{ fwrite(&_index, sizeof(int), 1, fp); double data[3] = {x() * scalingFactor, y() * scalingFactor, z() * scalingFactor}; fwrite(data, sizeof(double), 3, fp); } int zero = 0; if(!onWhat() || !saveParametric){ if(!binary) fprintf(fp, "0\n"); else fwrite(&zero, sizeof(int), 1, fp); } else{ int entity = onWhat()->tag(); int dim = onWhat()->dim(); if(!binary) fprintf(fp, "%d %d ", entity, dim); else{ fwrite(&entity, sizeof(int), 1, fp); fwrite(&dim, sizeof(int), 1, fp); } switch(dim){ case 0: if(!binary) fprintf(fp, "\n"); break; case 1: { double _u; getParameter(0, _u); if(!binary) fprintf(fp, "%.16g\n", _u); else fwrite(&_u, sizeof(double), 1, fp); } break; case 2: { double _u, _v; getParameter(0, _u); getParameter(1, _v); if(!binary) fprintf(fp, "%.16g %.16g\n", _u, _v); else{ fwrite(&_u, sizeof(double), 1, fp); fwrite(&_v, sizeof(double), 1, fp); } } break; default: if(!binary) fprintf(fp, "0 0 0\n"); else{ fwrite(&zero, sizeof(int), 1, fp); fwrite(&zero, sizeof(int), 1, fp); fwrite(&zero, sizeof(int), 1, fp); } break; } } }

int main(int argc, char** argv) { char port[] = "27015"; char ip[] = "10.0.0.67"; int numbytes; char buf[MAXDATASIZE]; // Get input parameters for(int a=0; a < argc; a++) { if( strcmp( argv[a], "--ip" ) == 0 ) { strcpy(ip, argv[a+1]); } if( strcmp( argv[a], "--port" ) == 0 ) { strcpy(port, argv[a+1]); } } TCP myClient = TCP(port, ip); ros::init(argc, argv, "joints"); ros::NodeHandle n; std::string receivedString; std::string attribute; std::stringstream ss; std::istringstream iss; tf::TransformBroadcaster br; tf::Transform transform; ros::Rate loop_rate(30); if( myClient.Connect() == 0 ) { // Receive data numbytes = recv(myClient.s,buf,MAXDATASIZE-1,0); while (numbytes != 0 && ros::ok()) { numbytes = recv(myClient.s,buf,MAXDATASIZE-1,0); buf[numbytes]='\0'; receivedString.assign(buf); // debug // std::cout << "Ahoy! Received " << numbytes << " bytes. " << receivedString << std::endl; std::istringstream iss(receivedString); std::vector<double> x (25, 0.0); std::vector<double> y (25, 0.0); std::vector<double> z (25, 0.0); std::vector<std::vector<double> > X (6, x); std::vector<std::vector<double> > Y (6, y); std::vector<std::vector<double> > Z (6, z); std::vector<double> qx (25, 0.0); std::vector<double> qy (25, 0.0); std::vector<double> qz (25, 0.0); std::vector<double> qw (25, 1.0); std::vector<std::vector<double> > QX (6, qx); std::vector<std::vector<double> > QY (6, qy); std::vector<std::vector<double> > QZ (6, qz); std::vector<std::vector<double> > QW (6, qw); int j; int count = 6; int id; std::vector<int> foundIdx; std::string frame; std::string joint; /* code */ while(!iss.eof()) { // joint = "joint_"; iss >> attribute; frame = "person_"; if( strcmp(attribute.c_str(), "count:") == 0 ) { iss >> count; // ROS_INFO("count %d",count); } else if( strcmp(attribute.c_str(), "id:") == 0 ) { iss >> id; foundIdx.push_back(id); frame.append(boost::lexical_cast<std::string>(id)); // ROS_INFO("id %s",frame); // std::cout << id << " " << frame << " "; }

int Stokhos::GMRESDivisionExpansionStrategy<ordinal_type,value_type,node_type>:: GMRES(const Teuchos::SerialDenseMatrix<int, double> & A, Teuchos::SerialDenseMatrix<int,double> & X, const Teuchos::SerialDenseMatrix<int,double> & B, int max_iter, double tolerance, int prec_iter, int order, int dim, int PrecNum, const Teuchos::SerialDenseMatrix<int, double> & M, int diag) { int n = A.numRows(); int k = 1; double resid; Teuchos::SerialDenseMatrix<int, double> P(n,n); Teuchos::SerialDenseMatrix<int, double> Ax(n,1); Ax.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,1.0, A, X, 0.0); Teuchos::SerialDenseMatrix<int, double> r0(B); r0-=Ax; resid=r0.normFrobenius(); //define vector v=r/norm(r) where r=b-Ax Teuchos::SerialDenseMatrix<int, double> v(n,1); r0.scale(1/resid); Teuchos::SerialDenseMatrix<int, double> h(1,1); //Matrix of orthog basis vectors V Teuchos::SerialDenseMatrix<int, double> V(n,1); //Set v=r0/norm(r0) to be 1st col of V for (int i=0; i<n; i++) { V(i,0)=r0(i,0); } //right hand side Teuchos::SerialDenseMatrix<int, double> bb(1,1); bb(0,0)=resid; Teuchos::SerialDenseMatrix<int, double> w(n,1); Teuchos::SerialDenseMatrix<int, double> c; Teuchos::SerialDenseMatrix<int, double> s; while (resid > tolerance && k < max_iter) { h.reshape(k+1,k); //Arnoldi iteration(Gram-Schmidt ) V.reshape(n,k+1); //set vk to be kth col of V Teuchos::SerialDenseMatrix<int, double> vk(Teuchos::Copy, V, n,1,0,k-1); //Preconditioning step: solve Mz=vk Teuchos::SerialDenseMatrix<int, double> z(vk); if (PrecNum == 1) { Stokhos::DiagPreconditioner precond(M); precond.ApplyInverse(vk,z,prec_iter); } else if (PrecNum == 2) { Stokhos::JacobiPreconditioner precond(M); precond.ApplyInverse(vk,z,2); } else if (PrecNum == 3) { Stokhos::GSPreconditioner precond(M,1); precond.ApplyInverse(vk,z,1); } else if (PrecNum == 4) { Stokhos::SchurPreconditioner precond(M, order, dim, diag); precond.ApplyInverse(vk,z,prec_iter); } w.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, 1, A, z, 0.0); Teuchos::SerialDenseMatrix<int, double> vi(n,1); Teuchos::SerialDenseMatrix<int, double> ip(1,1); for (int i=0; i<k; i++) { //set vi to be ith col of V Teuchos::SerialDenseMatrix<int, double> vi(Teuchos::Copy, V, n,1,0,i); //Calculate inner product ip.multiply(Teuchos::TRANS, Teuchos::NO_TRANS, 1.0, vi, w, 0.0); h(i,k-1)= ip(0,0); //scale vi by h(i,k-1) vi.scale(ip(0,0)); w-=vi; } h(k,k-1)=w.normFrobenius(); w.scale(1.0/h(k,k-1)); //add column vk+1=w to V for (int i=0; i<n; i++) { V(i,k)=w(i,0); } //Solve upper hessenberg least squares problem via Givens rotations //Compute previous Givens rotations for (int i=0; i<k-1; i++) { double q=c(i,0)*h(i,k-1)+s(i,0)*h(i+1,k-1); h(i+1,k-1)=-1*s(i,0)*h(i,k-1)+c(i,0)*h(i+1,k-1); h(i,k-1)=q; } //Compute next Givens rotations c.reshape(k,1); s.reshape(k,1); bb.reshape(k+1,1); double l = sqrt(h(k-1,k-1)*h(k-1,k-1)+h(k,k-1)*h(k,k-1)); c(k-1,0)=h(k-1,k-1)/l; s(k-1,0)=h(k,k-1)/l; // Givens rotation on h and bb h(k-1,k-1)=l; h(k,k-1)=0; bb(k,0)=-s(k-1,0)*bb(k-1,0); bb(k-1,0)=c(k-1,0)*bb(k-1,0); //Determine residual resid = fabs(bb(k,0)); k++; } //Extract upper triangular square matrix bb.reshape(h.numRows()-1 ,1); //Solve linear system int info; Teuchos::LAPACK<int, double> lapack; lapack.TRTRS('U', 'N', 'N', h.numRows()-1, 1, h.values(), h.stride(), bb.values(), bb.stride(),&info); Teuchos::SerialDenseMatrix<int, double> ans(X); V.reshape(n,k-1); ans.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, 1.0, V, bb, 0.0); if (PrecNum == 1) { Stokhos::DiagPreconditioner precond(M); precond.ApplyInverse(ans,ans,prec_iter); } else if (PrecNum == 2) { Stokhos::JacobiPreconditioner precond(M); precond.ApplyInverse(ans,ans,2); } else if (PrecNum == 3) { Stokhos::GSPreconditioner precond(M,1); precond.ApplyInverse(ans,ans,1); } else if (PrecNum == 4) { Stokhos::SchurPreconditioner precond(M, order, dim, diag); precond.ApplyInverse(ans,ans,prec_iter); } X+=ans; std::cout << "iteration count= " << k-1 << std::endl; return 0; }

int main(int argc, char *argv[]) { int ierr = 0, i, forierr = 0; #ifdef EPETRA_MPI // Initialize MPI MPI_Init(&argc,&argv); int rank; // My process ID MPI_Comm_rank(MPI_COMM_WORLD, &rank); Epetra_MpiComm Comm( MPI_COMM_WORLD ); #else int rank = 0; Epetra_SerialComm Comm; #endif bool verbose = false; // Check if we should print results to standard out if (argc>1) if (argv[1][0]=='-' && argv[1][1]=='v') verbose = true; int verbose_int = verbose ? 1 : 0; Comm.Broadcast(&verbose_int, 1, 0); verbose = verbose_int==1 ? true : false; // char tmp; // if (rank==0) cout << "Press any key to continue..."<< endl; // if (rank==0) cin >> tmp; // Comm.Barrier(); Comm.SetTracebackMode(0); // This should shut down any error traceback reporting int MyPID = Comm.MyPID(); int NumProc = Comm.NumProc(); if(verbose && MyPID==0) cout << Epetra_Version() << endl << endl; if (verbose) cout << "Processor "<<MyPID<<" of "<< NumProc << " is alive."<<endl; // Redefine verbose to only print on PE 0 if(verbose && rank!=0) verbose = false; int NumMyEquations = 10000; long long NumGlobalEquations = (NumMyEquations * NumProc) + EPETRA_MIN(NumProc,3); if(MyPID < 3) NumMyEquations++; // Construct a Map that puts approximately the same Number of equations on each processor Epetra_Map Map(NumGlobalEquations, NumMyEquations, 0LL, Comm); // Get update list and number of local equations from newly created Map vector<long long> MyGlobalElements(Map.NumMyElements()); Map.MyGlobalElements(&MyGlobalElements[0]); // Create an integer vector NumNz that is used to build the Petra Matrix. // NumNz[i] is the Number of OFF-DIAGONAL term for the ith global equation on this processor vector<int> NumNz(NumMyEquations); // We are building a tridiagonal matrix where each row has (-1 2 -1) // So we need 2 off-diagonal terms (except for the first and last equation) for(i = 0; i < NumMyEquations; i++) if((MyGlobalElements[i] == 0) || (MyGlobalElements[i] == NumGlobalEquations - 1)) NumNz[i] = 1; else NumNz[i] = 2; // Create a Epetra_Matrix Epetra_CrsMatrix A(Copy, Map, &NumNz[0]); EPETRA_TEST_ERR(A.IndicesAreGlobal(),ierr); EPETRA_TEST_ERR(A.IndicesAreLocal(),ierr); // Add rows one-at-a-time // Need some vectors to help // Off diagonal Values will always be -1 vector<double> Values(2); Values[0] = -1.0; Values[1] = -1.0; vector<long long> Indices(2); double two = 2.0; int NumEntries; forierr = 0; for(i = 0; i < NumMyEquations; i++) { if(MyGlobalElements[i] == 0) { Indices[0] = 1; NumEntries = 1; } else if (MyGlobalElements[i] == NumGlobalEquations-1) { Indices[0] = NumGlobalEquations-2; NumEntries = 1; } else { Indices[0] = MyGlobalElements[i]-1; Indices[1] = MyGlobalElements[i]+1; NumEntries = 2; } forierr += !(A.InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0])==0); forierr += !(A.InsertGlobalValues(MyGlobalElements[i], 1, &two, &MyGlobalElements[i])>0); // Put in the diagonal entry } EPETRA_TEST_ERR(forierr,ierr); // Finish up A.FillComplete(); A.OptimizeStorage(); Epetra_JadMatrix JadA(A); Epetra_JadMatrix JadA1(A); Epetra_JadMatrix JadA2(A); // Create vectors for Power method Epetra_Vector q(Map); Epetra_Vector z(Map); z.Random(); Epetra_Vector resid(Map); Epetra_Flops flopcounter; A.SetFlopCounter(flopcounter); q.SetFlopCounter(A); z.SetFlopCounter(A); resid.SetFlopCounter(A); JadA.SetFlopCounter(A); JadA1.SetFlopCounter(A); JadA2.SetFlopCounter(A); if (verbose) cout << "=======================================" << endl << "Testing Jad using CrsMatrix as input..." << endl << "=======================================" << endl; A.ResetFlops(); powerMethodTests(A, JadA, Map, q, z, resid, verbose); // Increase diagonal dominance if (verbose) cout << "\n\nIncreasing the magnitude of first diagonal term and solving again\n\n" << endl; if (A.MyGlobalRow(0)) { int numvals = A.NumGlobalEntries(0); vector<double> Rowvals(numvals); vector<long long> Rowinds(numvals); A.ExtractGlobalRowCopy(0, numvals, numvals, &Rowvals[0], &Rowinds[0]); // Get A[0,0] for (i=0; i<numvals; i++) if (Rowinds[i] == 0) Rowvals[i] *= 10.0; A.ReplaceGlobalValues(0, numvals, &Rowvals[0], &Rowinds[0]); } JadA.UpdateValues(A); A.ResetFlops(); powerMethodTests(A, JadA, Map, q, z, resid, verbose); if (verbose) cout << "================================================================" << endl << "Testing Jad using Jad matrix as input matrix for construction..." << endl << "================================================================" << endl; JadA1.ResetFlops(); powerMethodTests(JadA1, JadA2, Map, q, z, resid, verbose); #ifdef EPETRA_MPI MPI_Finalize() ; #endif return ierr ; }

int main(int argc, char *argv[]) { #ifdef HAVE_MPI MPI_Init(&argc, &argv); Epetra_MpiComm Comm(MPI_COMM_WORLD); #else Epetra_SerialComm Comm; #endif bool verbose = (Comm.MyPID() == 0); // set global dimension to 5, could be any number int NumGlobalElements = 5; // create a map Epetra_Map Map(NumGlobalElements,0,Comm); // local number of rows int NumMyElements = Map.NumMyElements(); // get update list int * MyGlobalElements = Map.MyGlobalElements( ); // ============= CONSTRUCTION OF THE MATRIX =========================== // Create a Epetra_Matrix Epetra_CrsMatrix A(Copy,Map,3); // Add rows one-at-a-time double *Values = new double[2]; Values[0] = -1.0; Values[1] = -1.0; int *Indices = new int[2]; double two = 2.0; int NumEntries; for( int i=0 ; i<NumMyElements; ++i ) { if (MyGlobalElements[i]==0) { Indices[0] = 1; NumEntries = 1; } else if (MyGlobalElements[i] == NumGlobalElements-1) { Indices[0] = NumGlobalElements-2; NumEntries = 1; } else { Indices[0] = MyGlobalElements[i]-1; Indices[1] = MyGlobalElements[i]+1; NumEntries = 2; } A.InsertGlobalValues(MyGlobalElements[i], NumEntries, Values, Indices); // Put in the diagonal entry A.InsertGlobalValues(MyGlobalElements[i], 1, &two, MyGlobalElements+i); } // Finish up A.FillComplete(); // ================ CONSTRUCTION OF VECTORS ======================= // build up two distributed vectors q and z, and compute // q = A * z Epetra_Vector q(A.RowMap()); Epetra_Vector z(A.RowMap()); // Fill z with 1's z.PutScalar( 1.0 ); // ================ USE OF TIME AND FLOPS ========================= Epetra_Flops counter; A.SetFlopCounter(counter); Epetra_Time timer(Comm); A.Multiply(false, z, q); // Compute q = A*z double elapsed_time = timer.ElapsedTime(); double total_flops =counter.Flops(); if (verbose) cout << "Total ops: " << total_flops << endl; double MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "Total MFLOPs for mat-vec = " << MFLOPs << endl<< endl; double dotProduct; z.SetFlopCounter(counter); timer.ResetStartTime(); z.Dot(q, &dotProduct); total_flops =counter.Flops(); if (verbose) cout << "Total ops: " << total_flops << endl; elapsed_time = timer.ElapsedTime(); if (elapsed_time != 0.0) MFLOPs = (total_flops / elapsed_time) / 1000000.0; else MFLOPs = 0; if (verbose) { cout << "Total MFLOPs for vec-vec = " << MFLOPs << endl<< endl; cout << "q dot z = " << dotProduct << endl; } #ifdef HAVE_MPI MPI_Finalize(); #endif return( 0 ); } /* main */

void test() { std::complex<T> z(1, 0); assert(arg(z) == 0); }

const GLVector GLVector::operator +(const GLVector& v)const{ return GLVector(x() + v.x(), y() + v.y(), z() + v.z()); }

unsigned long CSysSolve::CG_LinSolver(const CSysVector & b, CSysVector & x, CMatrixVectorProduct & mat_vec, CPreconditioner & precond, su2double tol, unsigned long m, bool monitoring) { int rank = 0; #ifdef HAVE_MPI MPI_Comm_rank(MPI_COMM_WORLD, &rank); #endif /*--- Check the subspace size ---*/ if (m < 1) { if (rank == MASTER_NODE) cerr << "CSysSolve::ConjugateGradient: illegal value for subspace size, m = " << m << endl; #ifndef HAVE_MPI exit(EXIT_FAILURE); #else MPI_Barrier(MPI_COMM_WORLD); MPI_Abort(MPI_COMM_WORLD,1); MPI_Finalize(); #endif } CSysVector r(b); CSysVector A_p(b); /*--- Calculate the initial residual, compute norm, and check if system is already solved ---*/ mat_vec(x, A_p); r -= A_p; // recall, r holds b initially su2double norm_r = r.norm(); su2double norm0 = b.norm(); if ( (norm_r < tol*norm0) || (norm_r < eps) ) { if (rank == MASTER_NODE) cout << "CSysSolve::ConjugateGradient(): system solved by initial guess." << endl; return 0; } su2double alpha, beta, r_dot_z; CSysVector z(r); precond(r, z); CSysVector p(z); /*--- Set the norm to the initial initial residual value ---*/ norm0 = norm_r; /*--- Output header information including initial residual ---*/ int i = 0; if ((monitoring) && (rank == MASTER_NODE)) { WriteHeader("CG", tol, norm_r); WriteHistory(i, norm_r, norm0); } /*--- Loop over all search directions ---*/ for (i = 0; i < (int)m; i++) { /*--- Apply matrix to p to build Krylov subspace ---*/ mat_vec(p, A_p); /*--- Calculate step-length alpha ---*/ r_dot_z = dotProd(r, z); alpha = dotProd(A_p, p); alpha = r_dot_z / alpha; /*--- Update solution and residual: ---*/ x.Plus_AX(alpha, p); r.Plus_AX(-alpha, A_p); /*--- Check if solution has converged, else output the relative residual if necessary ---*/ norm_r = r.norm(); if (norm_r < tol*norm0) break; if (((monitoring) && (rank == MASTER_NODE)) && ((i+1) % 5 == 0)) WriteHistory(i+1, norm_r, norm0); precond(r, z); /*--- Calculate Gram-Schmidt coefficient beta, beta = dotProd(r_{i+1}, z_{i+1}) / dotProd(r_{i}, z_{i}) ---*/ beta = 1.0 / r_dot_z; r_dot_z = dotProd(r, z); beta *= r_dot_z; /*--- Gram-Schmidt orthogonalization; p = beta *p + z ---*/ p.Equals_AX_Plus_BY(beta, p, 1.0, z); } if ((monitoring) && (rank == MASTER_NODE)) { cout << "# Conjugate Gradient final (true) residual:" << endl; cout << "# Iteration = " << i << ": |res|/|res0| = " << norm_r/norm0 << ".\n" << endl; } // /*--- Recalculate final residual (this should be optional) ---*/ // mat_vec(x, A_p); // r = b; // r -= A_p; // su2double true_res = r.norm(); // // if (fabs(true_res - norm_r) > tol*10.0) { // if (rank == MASTER_NODE) { // cout << "# WARNING in CSysSolve::ConjugateGradient(): " << endl; // cout << "# true residual norm and calculated residual norm do not agree." << endl; // cout << "# true_res - calc_res = " << true_res - norm_r << endl; // } // } return (unsigned long) i; }

GLVector GLVector::operator *(const GLdouble d )const{ return GLVector(x() * d, y() * d, z() * d); }

unsigned long CSysSolve::FGMRES_LinSolver(const CSysVector & b, CSysVector & x, CMatrixVectorProduct & mat_vec, CPreconditioner & precond, su2double tol, unsigned long m, su2double *residual, bool monitoring) { int rank = 0; #ifdef HAVE_MPI MPI_Comm_rank(MPI_COMM_WORLD, &rank); #endif /*--- Check the subspace size ---*/ if (m < 1) { if (rank == MASTER_NODE) cerr << "CSysSolve::FGMRES: illegal value for subspace size, m = " << m << endl; #ifndef HAVE_MPI exit(EXIT_FAILURE); #else MPI_Barrier(MPI_COMM_WORLD); MPI_Abort(MPI_COMM_WORLD,1); MPI_Finalize(); #endif } /*--- Check the subspace size ---*/ if (m > 1000) { if (rank == MASTER_NODE) cerr << "CSysSolve::FGMRES: illegal value for subspace size (too high), m = " << m << endl; #ifndef HAVE_MPI exit(EXIT_FAILURE); #else MPI_Abort(MPI_COMM_WORLD,1); MPI_Finalize(); #endif } /*--- Define various arrays Note: elements in w and z are initialized to x to avoid creating a temporary CSysVector object for the copy constructor ---*/ vector<CSysVector> w(m+1, x); vector<CSysVector> z(m+1, x); vector<su2double> g(m+1, 0.0); vector<su2double> sn(m+1, 0.0); vector<su2double> cs(m+1, 0.0); vector<su2double> y(m, 0.0); vector<vector<su2double> > H(m+1, vector<su2double>(m, 0.0)); /*--- Calculate the norm of the rhs vector ---*/ su2double norm0 = b.norm(); /*--- Calculate the initial residual (actually the negative residual) and compute its norm ---*/ mat_vec(x, w[0]); w[0] -= b; su2double beta = w[0].norm(); if ( (beta < tol*norm0) || (beta < eps) ) { /*--- System is already solved ---*/ if (rank == MASTER_NODE) cout << "CSysSolve::FGMRES(): system solved by initial guess." << endl; return 0; } /*--- Normalize residual to get w_{0} (the negative sign is because w[0] holds the negative residual, as mentioned above) ---*/ w[0] /= -beta; /*--- Initialize the RHS of the reduced system ---*/ g[0] = beta; /*--- Set the norm to the initial residual value ---*/ norm0 = beta; /*--- Output header information including initial residual ---*/ int i = 0; if ((monitoring) && (rank == MASTER_NODE)) { WriteHeader("FGMRES", tol, beta); WriteHistory(i, beta, norm0); } /*--- Loop over all search directions ---*/ for (i = 0; i < (int)m; i++) { /*--- Check if solution has converged ---*/ if (beta < tol*norm0) break; /*--- Precondition the CSysVector w[i] and store result in z[i] ---*/ precond(w[i], z[i]); /*--- Add to Krylov subspace ---*/ mat_vec(z[i], w[i+1]); /*--- Modified Gram-Schmidt orthogonalization ---*/ ModGramSchmidt(i, H, w); /*--- Apply old Givens rotations to new column of the Hessenberg matrix then generate the new Givens rotation matrix and apply it to the last two elements of H[:][i] and g ---*/ for (int k = 0; k < i; k++) ApplyGivens(sn[k], cs[k], H[k][i], H[k+1][i]); GenerateGivens(H[i][i], H[i+1][i], sn[i], cs[i]); ApplyGivens(sn[i], cs[i], g[i], g[i+1]); /*--- Set L2 norm of residual and check if solution has converged ---*/ beta = fabs(g[i+1]); /*--- Output the relative residual if necessary ---*/ if ((((monitoring) && (rank == MASTER_NODE)) && ((i+1) % 50 == 0)) && (rank == MASTER_NODE)) WriteHistory(i+1, beta, norm0); } /*--- Solve the least-squares system and update solution ---*/ SolveReduced(i, H, g, y); for (int k = 0; k < i; k++) { x.Plus_AX(y[k], z[k]); } if ((monitoring) && (rank == MASTER_NODE)) { cout << "# FGMRES final (true) residual:" << endl; cout << "# Iteration = " << i << ": |res|/|res0| = " << beta/norm0 << ".\n" << endl; } // /*--- Recalculate final (neg.) residual (this should be optional) ---*/ // mat_vec(x, w[0]); // w[0] -= b; // su2double res = w[0].norm(); // // if (fabs(res - beta) > tol*10) { // if (rank == MASTER_NODE) { // cout << "# WARNING in CSysSolve::FGMRES(): " << endl; // cout << "# true residual norm and calculated residual norm do not agree." << endl; // cout << "# res - beta = " << res - beta << endl; // } // } (*residual) = beta; return (unsigned long) i; }

GLdouble GLVector::length()const{ return sqrt(x()*x() + y()*y() + z()*z()); }

double leekesler::Pp() { return 8314.3*z()*Rho*T/Mw; }