VPUBLIC int Vgreen_coulomb_direct(Vgreen *thee, int npos, double *x, double *y, double *z, double *val) { Vatom *atom; double *apos, charge, dist, dx, dy, dz, scale; double *q, qtemp, fx, fy, fz; int iatom, ipos; if (thee == VNULL) { Vnm_print(2, "Vgreen_coulomb: Got NULL thee!\n"); return 0; } for (ipos=0; ipos<npos; ipos++) val[ipos] = 0.0; for (iatom=0; iatom<Valist_getNumberAtoms(thee->alist); iatom++) { atom = Valist_getAtom(thee->alist, iatom); apos = Vatom_getPosition(atom); charge = Vatom_getCharge(atom); for (ipos=0; ipos<npos; ipos++) { dx = apos[0] - x[ipos]; dy = apos[1] - y[ipos]; dz = apos[2] - z[ipos]; dist = VSQRT(VSQR(dx) + VSQR(dy) + VSQR(dz)); if (dist > VSMALL) val[ipos] += (charge/dist); } } scale = Vunit_ec/(4*Vunit_pi*Vunit_eps0*1.0e-10); for (ipos=0; ipos<npos; ipos++) val[ipos] = val[ipos]*scale; return 1; }
VPUBLIC int Vgreen_coulombD_direct(Vgreen *thee, int npos, double *x, double *y, double *z, double *pot, double *gradx, double *grady, double *gradz) { Vatom *atom; double *apos, charge, dist, dist2, idist3, dy, dz, dx, scale; double *q, qtemp; int iatom, ipos; if (thee == VNULL) { Vnm_print(2, "Vgreen_coulombD: Got VNULL thee!\n"); return 0; } for (ipos=0; ipos<npos; ipos++) { pot[ipos] = 0.0; gradx[ipos] = 0.0; grady[ipos] = 0.0; gradz[ipos] = 0.0; } for (iatom=0; iatom<Valist_getNumberAtoms(thee->alist); iatom++) { atom = Valist_getAtom(thee->alist, iatom); apos = Vatom_getPosition(atom); charge = Vatom_getCharge(atom); for (ipos=0; ipos<npos; ipos++) { dx = apos[0] - x[ipos]; dy = apos[1] - y[ipos]; dz = apos[2] - z[ipos]; dist2 = VSQR(dx) + VSQR(dy) + VSQR(dz); dist = VSQRT(dist2); if (dist > VSMALL) { idist3 = 1.0/(dist*dist2); gradx[ipos] -= (charge*dx*idist3); grady[ipos] -= (charge*dy*idist3); gradz[ipos] -= (charge*dz*idist3); pot[ipos] += (charge/dist); } } } scale = Vunit_ec/(4*VPI*Vunit_eps0*(1.0e-10)); for (ipos=0; ipos<npos; ipos++) { gradx[ipos] = gradx[ipos]*scale; grady[ipos] = grady[ipos]*scale; gradz[ipos] = gradz[ipos]*scale; pot[ipos] = pot[ipos]*scale; } return 1; }
VPUBLIC double Vxnrm2(int *nx, int *ny, int *nz, double *x) { double xnrm2 = 0.0; ///< Accumulates the calculated normal value MAT3(x, *nx, *ny, *nz); // The indices used to traverse the matrices int i, j, k; /// @todo parallel optimization for(k=2; k<=*nz-1; k++) for(j=2; j<=*ny-1; j++) for(i=2; i<=*nx-1; i++) xnrm2 += VAT3(x, i, j, k) * VAT3(x, i, j, k); return VSQRT(xnrm2); }
VPUBLIC void Vdpbfa(double *abd, int *lda, int *n, int *m, int *info) { double t, s; int ik, j, jk, k, mu; MAT2(abd, *lda, 1); *info = 0; for(j = 1; j <= *n; j++) { s = 0.0; ik = *m + 1; jk = VMAX2(j - *m, 1); mu = VMAX2(*m + 2 - j, 1); if (*m >= mu ) { for(k = mu; k <= *m; k++) { t = VAT2(abd, k, j) - Vddot(k - mu, RAT2(abd, ik, jk), 1, RAT2(abd, mu, j), 1); t /= VAT2(abd, *m + 1, jk); VAT2(abd, k, j) = t; s += t * t; ik--; jk++; } } s = VAT2(abd, *m + 1, j) - s; if (s <= 0.0) { *info = j; break; } VAT2(abd, *m + 1, j) = VSQRT(s); } }
/* /////////////////////////////////////////////////////////////////////////// // Routine: Vpee_ourSimp // // Purpose: Reduce code bloat by wrapping up the common steps for determining // whether the given simplex can be marked (i.e., belongs to our // partition or overlap region) // // Returns: 1 if could be marked, 0 otherwise // // Author: Nathan Baker /////////////////////////////////////////////////////////////////////////// */ VPRIVATE int Vpee_ourSimp(Vpee *thee, SS *sm, int rcol ) { int ivert; double dist, dx, dy, dz; if (thee->killFlag == 0) return 1; else if (thee->killFlag == 1) { if ((SS_chart(sm) == rcol) || (rcol < 0)) return 1; } else if (thee->killFlag == 2) { if (rcol < 0) return 1; else { /* We can only do distance-based searches on the local partition */ VASSERT(rcol == thee->localPartID); /* Find the closest distance between this simplex and the * center of the local partition and check it against * (thee->localPartRadius*thee->killParam) */ dist = 0; for (ivert=0; ivert<SS_dimVV(sm); ivert++) { dx = VV_coord(SS_vertex(sm, ivert), 0) - thee->localPartCenter[0]; dy = VV_coord(SS_vertex(sm, ivert), 1) - thee->localPartCenter[1]; dz = VV_coord(SS_vertex(sm, ivert), 2) - thee->localPartCenter[2]; dist = VSQRT((dx*dx + dy*dy + dz*dz)); } if (dist < thee->localPartRadius*thee->killParam) return 1; } } else if (thee->killFlag == 3) VASSERT(0); else VASSERT(0); return 0; }
/* * *************************************************************************** * Routine: markSimplex_default * * Purpose: DEFAULT: Simplex marking routine for refinement. * * Author: Michael Holst * *************************************************************************** */ VPRIVATE int markSimplex_default(int dim, int dimII, int simplexType, int faceType[4], int vertexType[4], int chart[], double vx[][3], void *data) { int j, k, less, more; double radius, d[4]; /* radius = radius of a refinement sphere for testing */ radius = 0.1; /* must be > 0 */ less = 0; more = 0; for (j=0; j<dim+1; j++) { d[j] = 0.0; for (k=0; k<3; k++) d[j] += VSQR( vx[j][k] ); d[j] = VSQRT( d[j] ); if (d[j] <= radius+VSMALL) less = 1; else more = 1; } /* return true if simplex touches or stradles surface of sphere */ return ( less && more ); }
/* /////////////////////////////////////////////////////////////////////////// // Routine: Vopot_gradient // // Authors: Nathan Baker /////////////////////////////////////////////////////////////////////////// */ VPUBLIC int Vopot_gradient(Vopot *thee, double pt[3], double grad[3]) { Vatom *atom; int iatom; double T, charge, eps_w, xkappa, size, val, *position; double dx, dy, dz, dist; Valist *alist; VASSERT(thee != VNULL); eps_w = Vpbe_getSolventDiel(thee->pbe); xkappa = (1.0e10)*Vpbe_getXkappa(thee->pbe); T = Vpbe_getTemperature(thee->pbe); alist = Vpbe_getValist(thee->pbe); if (!Vmgrid_gradient(thee->mgrid, pt, grad)) { switch (thee->bcfl) { case BCFL_ZERO: grad[0] = 0.0; grad[1] = 0.0; grad[2] = 0.0; break; case BCFL_SDH: grad[0] = 0.0; grad[1] = 0.0; grad[2] = 0.0; size = (1.0e-10)*Vpbe_getSoluteRadius(thee->pbe); position = Vpbe_getSoluteCenter(thee->pbe); charge = Vunit_ec*Vpbe_getSoluteCharge(thee->pbe); dx = position[0] - pt[0]; dy = position[1] - pt[1]; dz = position[2] - pt[2]; dist = VSQR(dx) + VSQR(dy) + VSQR(dz); dist = (1.0e-10)*VSQRT(dist); val = (charge)/(4*VPI*Vunit_eps0*eps_w); if (xkappa != 0.0) val = val*(exp(-xkappa*(dist-size))/(1+xkappa*size)); val = val*Vunit_ec/(Vunit_kb*T); grad[0] = val*dx/dist*(-1.0/dist/dist + xkappa/dist); grad[1] = val*dy/dist*(-1.0/dist/dist + xkappa/dist); grad[2] = val*dz/dist*(-1.0/dist/dist + xkappa/dist); break; case BCFL_MDH: grad[0] = 0.0; grad[1] = 0.0; grad[2] = 0.0; for (iatom=0; iatom<Valist_getNumberAtoms(alist); iatom++) { atom = Valist_getAtom(alist, iatom); position = Vatom_getPosition(atom); charge = Vunit_ec*Vatom_getCharge(atom); size = (1e-10)*Vatom_getRadius(atom); dx = position[0] - pt[0]; dy = position[1] - pt[1]; dz = position[2] - pt[2]; dist = VSQR(dx) + VSQR(dy) + VSQR(dz); dist = (1.0e-10)*VSQRT(dist); val = (charge)/(4*VPI*Vunit_eps0*eps_w); if (xkappa != 0.0) val = val*(exp(-xkappa*(dist-size))/(1+xkappa*size)); val = val*Vunit_ec/(Vunit_kb*T); grad[0] += (val*dx/dist*(-1.0/dist/dist + xkappa/dist)); grad[1] += (val*dy/dist*(-1.0/dist/dist + xkappa/dist)); grad[2] += (val*dz/dist*(-1.0/dist/dist + xkappa/dist)); } break; case BCFL_UNUSED: Vnm_print(2, "Vopot: Invalid bcfl (%d)!\n", thee->bcfl); return 0; case BCFL_FOCUS: Vnm_print(2, "Vopot: Invalid bcfl (%d)!\n", thee->bcfl); return 0; default: Vnm_print(2, "Vopot_pot: Bogus thee->bcfl flag (%d)!\n", thee->bcfl); return 0; break; } return 1; } return 1; }
/* /////////////////////////////////////////////////////////////////////////// // Routine: Vopot_curvature // // Notes: cflag=0 ==> Reduced Maximal Curvature // cflag=1 ==> Mean Curvature (Laplace) // cflag=2 ==> Gauss Curvature // cflag=3 ==> True Maximal Curvature // If we are off the grid, we can still evaluate the Laplacian; assuming, we // are away from the molecular surface, it is simply equal to the DH factor. // // Authors: Nathan Baker /////////////////////////////////////////////////////////////////////////// */ VPUBLIC int Vopot_curvature(Vopot *thee, double pt[3], int cflag, double *value) { Vatom *atom; int i, iatom; double u, T, charge, eps_w, xkappa, dist, size, val, *position, zkappa2; Valist *alist; VASSERT(thee != VNULL); eps_w = Vpbe_getSolventDiel(thee->pbe); xkappa = (1.0e10)*Vpbe_getXkappa(thee->pbe); zkappa2 = Vpbe_getZkappa2(thee->pbe); T = Vpbe_getTemperature(thee->pbe); alist = Vpbe_getValist(thee->pbe); u = 0; if (Vmgrid_curvature(thee->mgrid, pt, cflag, value)) return 1; else if (cflag != 1) { Vnm_print(2, "Vopot_curvature: Off mesh!\n"); return 1; } else { switch (thee->bcfl) { case BCFL_ZERO: u = 0; break; case BCFL_SDH: size = (1.0e-10)*Vpbe_getSoluteRadius(thee->pbe); position = Vpbe_getSoluteCenter(thee->pbe); charge = Vunit_ec*Vpbe_getSoluteCharge(thee->pbe); dist = 0; for (i=0; i<3; i++) dist += VSQR(position[i] - pt[i]); dist = (1.0e-10)*VSQRT(dist); if (xkappa != 0.0) u = zkappa2*(exp(-xkappa*(dist-size))/(1+xkappa*size)); break; case BCFL_MDH: u = 0; for (iatom=0; iatom<Valist_getNumberAtoms(alist); iatom++) { atom = Valist_getAtom(alist, iatom); position = Vatom_getPosition(atom); charge = Vunit_ec*Vatom_getCharge(atom); size = (1e-10)*Vatom_getRadius(atom); dist = 0; for (i=0; i<3; i++) dist += VSQR(position[i] - pt[i]); dist = (1.0e-10)*VSQRT(dist); if (xkappa != 0.0) val = zkappa2*(exp(-xkappa*(dist-size))/(1+xkappa*size)); u = u + val; } break; case BCFL_UNUSED: Vnm_print(2, "Vopot_pot: Invlid bcfl (%d)!\n", thee->bcfl); return 0; case BCFL_FOCUS: Vnm_print(2, "Vopot_pot: Invlid bcfl (%d)!\n", thee->bcfl); return 0; default: Vnm_print(2, "Vopot_pot: Bogus thee->bcfl flag (%d)!\n", thee->bcfl); return 0; break; } *value = u; } return 1; }
VPUBLIC int Vopot_pot(Vopot *thee, double pt[3], double *value) { Vatom *atom; int i, iatom; double u, T, charge, eps_w, xkappa, dist, size, val, *position; Valist *alist; VASSERT(thee != VNULL); eps_w = Vpbe_getSolventDiel(thee->pbe); xkappa = (1.0e10)*Vpbe_getXkappa(thee->pbe); T = Vpbe_getTemperature(thee->pbe); alist = Vpbe_getValist(thee->pbe); u = 0; /* See if we're on the mesh */ if (Vmgrid_value(thee->mgrid, pt, &u)) { *value = u; } else { switch (thee->bcfl) { case BCFL_ZERO: u = 0; break; case BCFL_SDH: size = (1.0e-10)*Vpbe_getSoluteRadius(thee->pbe); position = Vpbe_getSoluteCenter(thee->pbe); charge = Vunit_ec*Vpbe_getSoluteCharge(thee->pbe); dist = 0; for (i=0; i<3; i++) dist += VSQR(position[i] - pt[i]); dist = (1.0e-10)*VSQRT(dist); val = (charge)/(4*VPI*Vunit_eps0*eps_w*dist); if (xkappa != 0.0) val = val*(exp(-xkappa*(dist-size))/(1+xkappa*size)); val = val*Vunit_ec/(Vunit_kb*T); u = val; break; case BCFL_MDH: u = 0; for (iatom=0; iatom<Valist_getNumberAtoms(alist); iatom++) { atom = Valist_getAtom(alist, iatom); position = Vatom_getPosition(atom); charge = Vunit_ec*Vatom_getCharge(atom); size = (1e-10)*Vatom_getRadius(atom); dist = 0; for (i=0; i<3; i++) dist += VSQR(position[i] - pt[i]); dist = (1.0e-10)*VSQRT(dist); val = (charge)/(4*VPI*Vunit_eps0*eps_w*dist); if (xkappa != 0.0) val = val*(exp(-xkappa*(dist-size))/(1+xkappa*size)); val = val*Vunit_ec/(Vunit_kb*T); u = u + val; } break; case BCFL_UNUSED: Vnm_print(2, "Vopot_pot: Invalid bcfl flag (%d)!\n", thee->bcfl); return 0; case BCFL_FOCUS: Vnm_print(2, "Vopot_pot: Invalid bcfl flag (%d)!\n", thee->bcfl); return 0; default: Vnm_print(2, "Vopot_pot: Bogus thee->bcfl flag (%d)!\n", thee->bcfl); return 0; break; } *value = u; } return 1; }
/* * *************************************************************************** * Routine: Gem_formFix * * Purpose: Make some specified hacked fix to a given mesh. * * Notes: key==0 --> ? * * Author: Michael Holst * *************************************************************************** */ VPUBLIC void Gem_formFix(Gem *thee, int key) { int i, j, k, l, m, nabors, btype; double radk, radl, radm, myTol; VV *v[4]; SS *sm, *sm0, *sm1, *sm2; /* input check and some i/o */ btype = key; VASSERT( (0 <= btype) && (btype <= 2) ); /* go through all simplices and zero all boundary faces */ Vnm_print(0,"Gem_makeBnd: zeroing boundary faces/vertices.."); Gem_setNumBF(thee, 0); Gem_setNumBV(thee, 0); for (i=0; i<Gem_numSS(thee); i++) { sm = Gem_SS(thee,i); if ( (i>0) && (i % VPRTKEY) == 0 ) Vnm_print(0,"[BS:%d]",i); /* get local vertices */ for (j=0; j<Gem_dimVV(thee); j++) v[j] = SS_vertex(sm,j); /* reset all vertices and faces to interior type */ for (j=0; j<Gem_dimVV(thee); j++) { /* the other three local vertex/face numbers besides "j" */ k=(j+1) % Gem_dimVV(thee); l=(k+1) % Gem_dimVV(thee); m=(l+1) % Gem_dimVV(thee); SS_setFaceType(sm, j, 0); VV_setType(v[k], 0); VV_setType(v[l], 0); if (Gem_dim(thee) == 3) VV_setType(v[m], 0); } } Vnm_print(0,"..done.\n"); /* are we done */ /* if (btype == 0) return; */ /* okay now make a boundary */ Vnm_print(0,"Gem_makeBnd: rebuilding boundary faces/vertices.."); for (i=0; i<Gem_numSS(thee); i++) { sm = Gem_SS(thee,i); if ( (i>0) && (i % VPRTKEY) == 0 ) Vnm_print(0,"[BS:%d]",i); /* get local vertices */ for (j=0; j<Gem_dimVV(thee); j++) v[j] = SS_vertex(sm,j); /* rebuild everything */ for (j=0; j<Gem_dimVV(thee); j++) { /* the other three local vertex/face numbers besides "j" */ k=(j+1) % Gem_dimVV(thee); l=(k+1) % Gem_dimVV(thee); m=(l+1) % Gem_dimVV(thee); /* look for a face nabor sharing face "j" (opposite vertex "j") */ nabors = 0; for (sm0=VV_firstSS(v[k]); sm0!=VNULL;sm0=SS_link(sm0,v[k])) { for (sm1=VV_firstSS(v[l]); sm1!=VNULL; sm1=SS_link(sm1,v[l])) { if (Gem_dim(thee) == 2) { if ((sm0!=sm) && (sm0==sm1)) nabors++; } else { for (sm2=VV_firstSS(v[m]); sm2!=VNULL; sm2=SS_link(sm2,v[m])) { if ((sm0!=sm) && (sm0==sm1) && (sm0==sm2)) { nabors++; } } } } } /* if no one there, then face "j" is actually a boundary face */ if (nabors == 0) { myTol = 1.0e-2; if ( ( VABS(VV_coord(v[k],2) - 0.0) < myTol) && ( VABS(VV_coord(v[l],2) - 0.0) < myTol) && ( VABS(VV_coord(v[m],2) - 0.0) < myTol) ) { btype = 1; } else if ( ( VABS(VV_coord(v[k],2) - 68.03512) < myTol) && ( VABS(VV_coord(v[l],2) - 68.03512) < myTol) && ( VABS(VV_coord(v[m],2) - 68.03512) < myTol) ) { btype = 3; } else { radk = VSQRT( VSQR( VV_coord(v[k],0) ) + VSQR( VV_coord(v[k],1) ) ); radl = VSQRT( VSQR( VV_coord(v[l],0) ) + VSQR( VV_coord(v[l],1) ) ); radm = VSQRT( VSQR( VV_coord(v[m],0) ) + VSQR( VV_coord(v[m],1) ) ); if ( ( VABS(radk - 1.5) < myTol) && ( VABS(radl - 1.5) < myTol) && ( VABS(radm - 1.5) < myTol) ) { btype = 2; } else if ( ( VABS(radk - 2.0) < myTol) && ( VABS(radl - 2.0) < myTol) && ( VABS(radm - 2.0) < myTol) ){ btype = 4; } else { btype = 0; } } SS_setFaceType(sm, j, btype); Gem_numBFpp(thee); if (VINTERIOR( VV_type(v[k])) ) { VV_setType(v[k], btype); Gem_numBVpp(thee); } if (VINTERIOR( VV_type(v[l])) ) { VV_setType(v[l], btype); Gem_numBVpp(thee); } if (Gem_dim(thee) == 3) { if (VINTERIOR( VV_type(v[m])) ) { VV_setType(v[m], btype); Gem_numBVpp(thee); } } } } } Vnm_print(0,"..done.\n"); }
void Jit::Comp_VV2Op(u32 op) { CONDITIONAL_DISABLE; DISABLE; if (js.HasUnknownPrefix()) DISABLE; VectorSize sz = GetVecSize(op); int n = GetNumVectorElements(sz); u8 sregs[4], dregs[4]; GetVectorRegsPrefixS(sregs, sz, _VS); GetVectorRegsPrefixD(dregs, sz, _VD); ARMReg tempxregs[4]; for (int i = 0; i < n; ++i) { if (!IsOverlapSafeAllowS(dregs[i], i, n, sregs)) { int reg = fpr.GetTempV(); fpr.MapRegV(reg, MAP_NOINIT | MAP_DIRTY); fpr.SpillLockV(reg); tempxregs[i] = fpr.V(reg); } else { fpr.MapRegV(dregs[i], (dregs[i] == sregs[i] ? 0 : MAP_NOINIT) | MAP_DIRTY); fpr.SpillLockV(dregs[i]); tempxregs[i] = fpr.V(dregs[i]); } } // Warning: sregs[i] and tempxregs[i] may be the same reg. // Helps for vmov, hurts for vrcp, etc. for (int i = 0; i < n; ++i) { switch ((op >> 16) & 0x1f) { case 0: // d[i] = s[i]; break; //vmov // Probably for swizzle. VMOV(tempxregs[i], fpr.V(sregs[i])); break; case 1: // d[i] = fabsf(s[i]); break; //vabs //if (!fpr.V(sregs[i]).IsSimpleReg(tempxregs[i])) VABS(tempxregs[i], fpr.V(sregs[i])); break; case 2: // d[i] = -s[i]; break; //vneg VNEG(tempxregs[i], fpr.V(sregs[i])); break; case 4: // if (s[i] < 0) d[i] = 0; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break; // vsat0 DISABLE; break; case 5: // if (s[i] < -1.0f) d[i] = -1.0f; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break; // vsat1 DISABLE; break; case 16: // d[i] = 1.0f / s[i]; break; //vrcp MOVI2F(S0, 1.0f, R0); VDIV(tempxregs[i], S0, fpr.V(sregs[i])); break; case 17: // d[i] = 1.0f / sqrtf(s[i]); break; //vrsq MOVI2F(S0, 1.0f, R0); VSQRT(S1, fpr.V(sregs[i])); VDIV(tempxregs[i], S0, S1); break; case 18: // d[i] = sinf((float)M_PI_2 * s[i]); break; //vsin DISABLE; break; case 19: // d[i] = cosf((float)M_PI_2 * s[i]); break; //vcos DISABLE; break; case 20: // d[i] = powf(2.0f, s[i]); break; //vexp2 DISABLE; break; case 21: // d[i] = logf(s[i])/log(2.0f); break; //vlog2 DISABLE; break; case 22: // d[i] = sqrtf(s[i]); break; //vsqrt VSQRT(tempxregs[i], fpr.V(sregs[i])); VABS(tempxregs[i], tempxregs[i]); break; case 23: // d[i] = asinf(s[i] * (float)M_2_PI); break; //vasin DISABLE; break; case 24: // d[i] = -1.0f / s[i]; break; // vnrcp MOVI2F(S0, -1.0f, R0); VDIV(tempxregs[i], S0, fpr.V(sregs[i])); break; case 26: // d[i] = -sinf((float)M_PI_2 * s[i]); break; // vnsin DISABLE; break; case 28: // d[i] = 1.0f / expf(s[i] * (float)M_LOG2E); break; // vrexp2 DISABLE; break; } } fpr.MapRegsV(dregs, sz, MAP_NOINIT | MAP_DIRTY); for (int i = 0; i < n; ++i) { VMOV(fpr.V(dregs[i]), tempxregs[i]); } ApplyPrefixD(dregs, sz); fpr.ReleaseSpillLocks(); }
/* * *************************************************************************** * Routine: Aprx_partSpect * * Purpose: Partition the domain using spectral bisection. * * Notes: We solve the following generalized eigenvalue problem: * * Ax = lambda Bx * * for second smallest eigenpair. We then return the eigenvector * from the pair. We make this happen by turning it into a * regular eigenvalue problem: * * B^{-1/2} A B^{-1/2} ( B^{1/2} x ) = lambda ( B^{1/2} x ) * * or rather * * C y = lambda y, where C=B^{-1/2}AB^{-1/2}, y=B^{1/2}x. * * The matrix "B" is simply a diagonal matrix with a (positive) * error estimate for the element on the diagonal. Therefore, the * matrix B^{-1/2} is a well-defined positive diagonal matrix. * * We explicitly form the matrix C and then send it to the inverse * Rayleigh-quotient iteration to recover the second smallest * eigenpair. On return, we scale the eigenvector y by B^{-1/2} to * recover the actual eigenvector x = B^{-1/2} y. * * To handle the possibility that an element has zero error, in * which case the B matrix had a zero on the diagonal, we set the * corresponding entry in B^{-1/2} to be 1. * * Author: Michael Holst * *************************************************************************** */ VPUBLIC int Aprx_partSpect(Aprx *thee, int pcolor, int numC, double *evec, simHelper *simH, int *ford, int *rord, int general) { int i, j, k, dim, itmax, litmax, key, flag; int numF, *IA, *JA; int numB, numR[MAXV]; MATsym psym[MAXV][MAXV]; MATmirror pmirror[MAXV][MAXV]; MATformat pfrmt[MAXV][MAXV]; int numO[MAXV][MAXV], *IJA[MAXV][MAXV]; double lambda, normal, etol, letol, value; SS *sm, *sm0; VV *v[4]; Bmat *A; Bvec *u, *B2, *B2inv; Vnm_print(0,"Aprx_partSpect: [pc=%d] partitioning:\n", pcolor); /* dimensions */ dim = Gem_dim(thee->gm); /* go through the elements and enumerate elements and faces */ numF=0; for (i=0; i<numC; i++) { sm = Gem_SS(thee->gm,ford[i]); for (j=0;j<(int)SS_dimVV(sm);j++) { /* get the vertex pointers for this face */ for (k=0; k<dim; k++) { v[k] = SS_vertex( sm, vmapF[j][k] ); } /* do we already know our nabor */ if (dim == 2) { /* find the unique face nabor (if not on boundary) */ sm0 = VV_commonSimplex2(v[0],v[1],sm); } else { /* (dim == 3) */ /* find the unique face nabor (if not on boundary) */ sm0 = VV_commonSimplex3(v[0],v[1],v[2],sm); } /* okay we found a nabor */ if (sm0 != VNULL) { if ((int)SS_chart(sm0) == pcolor) { simH[i].diag++; k = rord[ SS_id(sm0) ]; if (k > i) { simH[i].faceId[j] = k; numF++; } } } } } /* el; loop over elements */ /* sort the rows and build matrix integer structures */ IJA[0][0] = Vmem_malloc( thee->vmem, numC+1+numF, sizeof(int) ); IA = IJA[0][0]; JA = IA + numC + 1; k = 0; IA[0] = 0; for (i=0; i<numC; i++) { Vnm_qsort(simH[i].faceId, 4); for (j=0; j<4; j++) { if (simH[i].faceId[j] > i) { JA[k] = simH[i].faceId[j]; k++; } } IA[i+1] = k; } VASSERT( k == numF ); /* create the real matrix object (creating space for matrix entries) */ numB = 1; numR[0] = numC; numO[0][0] = numF; psym[0][0] = IS_SYM; /* symmetric */ pmirror[0][0] = ISNOT_MIRROR; /* really exists */ pfrmt[0][0] = DRC_FORMAT; /* YSMP-bank */ A = Bmat_ctor( thee->vmem, "A", numB, numR, numR, pmirror ); Bmat_initStructure( A, pfrmt, psym, numO, IJA ); /* create the eigenvector */ u = Bvec_ctor( thee->vmem, "u", numB, numR ); /* create the scaling matrix */ B2 = Bvec_ctor( thee->vmem, "B2", numB, numR ); B2inv = Bvec_ctor( thee->vmem, "B2inv", numB, numR ); for (i=0; i<numC; i++) { if ( general ) { if ( simH[i].error > 0. ) { Bvec_setB( B2, 0, i, VSQRT(simH[i].error) ); Bvec_setB( B2inv, 0, i, 1./VSQRT(simH[i].error) ); } else if ( simH[i].error == 0. ) { Bvec_setB( B2, 0, i, 1. ); Bvec_setB( B2inv, 0, i, 1. ); } else { VASSERT(0); } } else { Bvec_setB( B2, 0, i, 1. ); Bvec_setB( B2inv, 0, i, 1. ); } } /* now build the scaled adjacency matrix entries */ k = 0; for (i=0; i<numC; i++) { value = (double)simH[i].diag * Bvec_valB( B2inv, 0, i ) * Bvec_valB( B2inv, 0, i ); Bmat_set( A, 0, 0, i, i, value ); for (j=0; j<4; j++) { if (simH[i].faceId[j] > i) { value = -1. * Bvec_valB( B2inv, 0, i ) * Bvec_valB( B2inv, 0, simH[i].faceId[j] ); Bmat_set( A, 0, 0, i, simH[i].faceId[j], value ); k++; } } } VASSERT( k == numO[0][0] ); /* the initial approximation */ for (i=0; i<numC; i++) { Bvec_setB( u, 0, i, evec[i] ); } /* print out matrix for testing */ /* Bmat_printSp(A, "lap.m"); */ /* finally, get eigenvector #2 (this is the costly part...) */ litmax = 500; letol = 1.0e-3; lambda = 0.; key = 0; flag = 0; itmax = 50; etol = 1.0e-4; Bvec_eig(u, A, litmax, letol, &lambda, key, flag, itmax, etol); /* re-scale the final approximation */ normal = 0; for (i=0; i<numC; i++) { evec[i] = Bvec_valB( B2inv, 0, i ) * Bvec_valB( u, 0, i ); normal += (evec[i]*evec[i]); } normal = VSQRT( normal ); /* normalize the final result */ for (i=0; i<numC; i++) { evec[i] = evec[i] / normal; } /* destroy the adjacency matrix and eigenvector */ Bmat_dtor( &A ); /* this frees our earlier IJA malloc! */ Bvec_dtor( &B2 ); Bvec_dtor( &B2inv ); Bvec_dtor( &u ); return 0; }
/* * *************************************************************************** * Routine: Aprx_partInert * * Purpose: Partition the domain using inertial bisection. * Partition sets of points in R^d (d=2 or d=3) by viewing them * as point masses of a rigid body, and by then employing the * classical mechanics ideas of inertia and Euler axes. * * Notes: We first locate the center of mass, then change the coordinate * system so that the center of mass is located at the origin. * We then form the (symmetric) dxd inertia tensor, and then find * the set of (real) eigenvalues and (orthogonal) eigenvectors. * The eigenvectors represent the principle inertial rotation axes, * and the eigenvalues represent the inertial strength in those * principle directions. The smallest inerial component along an * axis represents a direction along which the rigid body is most * "line-like" (assuming all the points have the same mass). * * For our purposes, it makes sense to using the axis (eigenvector) * corresponding to the smallest inertia (eigenvalue) as the line to * bisect with a line (d=2) or a plane (d=3). We know the center of * mass, and once we also have this particular eigenvector, we can * effectively bisect the point set into the two regions separated * by the line/plane simply by taking an inner-product of the * eigenvector with each point (or rather the 2- or 3-vector * representing the point). A positive inner-product represents one * side of the cutting line/plane, and a negative inner-product * represents the other side (a zero inner-product is right on the * cutting line/plane, so we arbitrarily assign it to one region or * the other). * * Author: Michael Holst * *************************************************************************** */ VPUBLIC int Aprx_partInert(Aprx *thee, int pcolor, int numC, double *evec, simHelper *simH) { int i, j, k, lambdaI; double rad, sca, lambda, normal, caxis[3]; Mat3 I, II, V, D; Vnm_print(0,"Aprx_partInert: WARNING: assuming single-chart manifold.\n"); Vnm_print(0,"Aprx_partInert: [pc=%d] partitioning:\n", pcolor); /* form the inertia tensors */ Mat3_eye(I); Mat3_init(II, 0.); for (i=0; i<numC; i++) { /* get vector length (squared!) */ rad = 0.; for (j=0; j<3; j++) { rad += ( simH[i].bc[j] * simH[i].bc[j] ); } /* add contribution to the inertia tensor */ for (j=0; j<3; j++) { for (k=0; k<3; k++) { II[j][k] += ( simH[i].mass * (I[j][k]*rad - simH[i].bc[j]*simH[i].bc[k]) ); } } } /* find the d-principle axes, and isolate the single axis we need */ /* (the principle axis we want is the one with SMALLEST moment) */ sca = Mat3_nrm8(II); Mat3_scal(II, 1./sca); (void)Mat3_qri(V, D, II); lambda = VLARGE; lambdaI = -1; for (i=0; i<3; i++) { if ( VABS(D[i][i]) < lambda ) { lambda = VABS(D[i][i]); lambdaI = i; } } VASSERT( lambda > 0. ); VASSERT( lambda != VLARGE ); VASSERT( lambdaI >= 0 ); for (i=0; i<3; i++) { caxis[i] = V[i][lambdaI]; } normal = Vec3_nrm2(caxis); VASSERT( normal > 0. ); Vec3_scal(caxis,1./normal); /* decompose points based on bisecting principle axis with a line or */ /* plane; we do this using an inner-product test with normal vec "caxis" */ normal = 0; for (i=0; i<numC; i++) { evec[i] = Vec3_dot( simH[i].bc, caxis ); normal += (evec[i]*evec[i]); } normal = VSQRT( normal ); /* normalize the final result */ for (i=0; i<numC; i++) { evec[i] = evec[i] / normal; } return 0; }
/* /////////////////////////////////////////////////////////////////////////// // Routine: Vpee_markRefine // // Author: Nathan Baker (and Michael Holst: the author of AM_markRefine, on // which this is based) /////////////////////////////////////////////////////////////////////////// */ VPUBLIC int Vpee_markRefine(Vpee *thee, AM *am, int level, int akey, int rcol, double etol, int bkey ) { Aprx *aprx; int marked = 0, markMe, i, smid, count, currentQ; double minError = 0.0, maxError = 0.0, errEst = 0.0, mlevel, barrier; SS *sm; VASSERT(thee != VNULL); /* Get the Aprx object from AM */ aprx = am->aprx; /* input check and some i/o */ if ( ! ((-1 <= akey) && (akey <= 4)) ) { Vnm_print(0,"Vpee_markRefine: bad refine key; simplices marked = %d\n", marked); return marked; } /* For uniform markings, we have no effect */ if ((-1 <= akey) && (akey <= 0)) { marked = Gem_markRefine(thee->gm, akey, rcol); return marked; } /* Informative I/O */ if (akey == 2) { Vnm_print(0,"Vpee_estRefine: using Aprx_estNonlinResid().\n"); } else if (akey == 3) { Vnm_print(0,"Vpee_estRefine: using Aprx_estLocalProblem().\n"); } else if (akey == 4) { Vnm_print(0,"Vpee_estRefine: using Aprx_estDualProblem().\n"); } else { Vnm_print(0,"Vpee_estRefine: bad key given; simplices marked = %d\n", marked); return marked; } if (thee->killFlag == 0) { Vnm_print(0, "Vpee_markRefine: No error attenuation -- simplices in all partitions will be marked.\n"); } else if (thee->killFlag == 1) { Vnm_print(0, "Vpee_markRefine: Maximum error attenuation -- only simplices in local partition will be marked.\n"); } else if (thee->killFlag == 2) { Vnm_print(0, "Vpee_markRefine: Spherical error attenutation -- simplices within a sphere of %4.3f times the size of the partition will be marked\n", thee->killParam); } else if (thee->killFlag == 2) { Vnm_print(0, "Vpee_markRefine: Neighbor-based error attenuation -- simplices in the local and neighboring partitions will be marked [NOT IMPLEMENTED]!\n"); VASSERT(0); } else { Vnm_print(2,"Vpee_markRefine: bogus killFlag given; simplices marked = %d\n", marked); return marked; } /* set the barrier type */ mlevel = (etol*etol) / Gem_numSS(thee->gm); if (bkey == 0) { barrier = (etol*etol); Vnm_print(0,"Vpee_estRefine: forcing [err per S] < [TOL] = %g\n", barrier); } else if (bkey == 1) { barrier = mlevel; Vnm_print(0,"Vpee_estRefine: forcing [err per S] < [(TOL^2/numS)^{1/2}] = %g\n", VSQRT(barrier)); } else { Vnm_print(0,"Vpee_estRefine: bad bkey given; simplices marked = %d\n", marked); return marked; } /* timer */ Vnm_tstart(30, "error estimation"); /* count = num generations to produce from marked simplices (minimally) */ count = 1; /* must be >= 1 */ /* check the refinement Q for emptyness */ currentQ = 0; if (Gem_numSQ(thee->gm,currentQ) > 0) { Vnm_print(0,"Vpee_markRefine: non-empty refinement Q%d....clearing..", currentQ); Gem_resetSQ(thee->gm,currentQ); Vnm_print(0,"..done.\n"); } if (Gem_numSQ(thee->gm,!currentQ) > 0) { Vnm_print(0,"Vpee_markRefine: non-empty refinement Q%d....clearing..", !currentQ); Gem_resetSQ(thee->gm,!currentQ); Vnm_print(0,"..done.\n"); } VASSERT( Gem_numSQ(thee->gm,currentQ) == 0 ); VASSERT( Gem_numSQ(thee->gm,!currentQ) == 0 ); /* clear everyone's refinement flags */ Vnm_print(0,"Vpee_markRefine: clearing all simplex refinement flags.."); for (i=0; i<Gem_numSS(thee->gm); i++) { if ( (i>0) && (i % VPRTKEY) == 0 ) Vnm_print(0,"[MS:%d]",i); sm = Gem_SS(thee->gm,i); SS_setRefineKey(sm,currentQ,0); SS_setRefineKey(sm,!currentQ,0); SS_setRefinementCount(sm,0); } Vnm_print(0,"..done.\n"); /* NON-ERROR-BASED METHODS */ /* Simplex flag clearing */ if (akey == -1) return marked; /* Uniform & user-defined refinement*/ if ((akey == 0) || (akey == 1)) { smid = 0; while ( smid < Gem_numSS(thee->gm)) { /* Get the simplex and find out if it's markable */ sm = Gem_SS(thee->gm,smid); markMe = Vpee_ourSimp(thee, sm, rcol); if (markMe) { if (akey == 0) { marked++; Gem_appendSQ(thee->gm,currentQ, sm); SS_setRefineKey(sm,currentQ,1); SS_setRefinementCount(sm,count); } else if (Vpee_userDefined(thee, sm)) { marked++; Gem_appendSQ(thee->gm,currentQ, sm); SS_setRefineKey(sm,currentQ,1); SS_setRefinementCount(sm,count); } } smid++; } } /* ERROR-BASED METHODS */ /* gerror = global error accumulation */ aprx->gerror = 0.; /* traverse the simplices and process the error estimates */ Vnm_print(0,"Vpee_markRefine: estimating error.."); smid = 0; while ( smid < Gem_numSS(thee->gm)) { /* Get the simplex and find out if it's markable */ sm = Gem_SS(thee->gm,smid); markMe = Vpee_ourSimp(thee, sm, rcol); if ( (smid>0) && (smid % VPRTKEY) == 0 ) Vnm_print(0,"[MS:%d]",smid); /* Produce an error estimate for this element if it is in the set */ if (markMe) { if (akey == 2) { errEst = Aprx_estNonlinResid(aprx, sm, am->u,am->ud,am->f); } else if (akey == 3) { errEst = Aprx_estLocalProblem(aprx, sm, am->u,am->ud,am->f); } else if (akey == 4) { errEst = Aprx_estDualProblem(aprx, sm, am->u,am->ud,am->f); } VASSERT( errEst >= 0. ); /* if error estimate above tol, mark element for refinement */ if ( errEst > barrier ) { marked++; Gem_appendSQ(thee->gm,currentQ, sm); /*add to refinement Q*/ SS_setRefineKey(sm,currentQ,1); /* note now on refine Q */ SS_setRefinementCount(sm,count); /* refine X many times? */ } /* keep track of min/max errors over the mesh */ minError = VMIN2( VSQRT(VABS(errEst)), minError ); maxError = VMAX2( VSQRT(VABS(errEst)), maxError ); /* store the estimate */ Bvec_set( aprx->wev, smid, errEst ); /* accumlate into global error (errEst is SQUAREd already) */ aprx->gerror += errEst; /* otherwise store a zero for the estimate */ } else { Bvec_set( aprx->wev, smid, 0. ); } smid++; } /* do some i/o */ Vnm_print(0,"..done. [marked=<%d/%d>]\n",marked,Gem_numSS(thee->gm)); Vnm_print(0,"Vpee_estRefine: TOL=<%g> Global_Error=<%g>\n", etol, aprx->gerror); Vnm_print(0,"Vpee_estRefine: (TOL^2/numS)^{1/2}=<%g> Max_Ele_Error=<%g>\n", VSQRT(mlevel),maxError); Vnm_tstop(30, "error estimation"); /* check for making the error tolerance */ if ((bkey == 1) && (aprx->gerror <= etol)) { Vnm_print(0, "Vpee_estRefine: *********************************************\n"); Vnm_print(0, "Vpee_estRefine: Global Error criterion met; setting marked=0.\n"); Vnm_print(0, "Vpee_estRefine: *********************************************\n"); marked = 0; } /* return */ return marked; }
/* /////////////////////////////////////////////////////////////////////////// // Routine: Vpee_ctor2 // // Author: Nathan Baker /////////////////////////////////////////////////////////////////////////// */ VPUBLIC int Vpee_ctor2(Vpee *thee, Gem *gm, int localPartID, int killFlag, double killParam ) { int ivert, nLocalVerts; SS *simp; VV *vert; double radius, dx, dy, dz; VASSERT(thee != VNULL); /* Sanity check on input values */ if (killFlag == 0) { Vnm_print(0, "Vpee_ctor2: No error attenuation outside partition.\n"); } else if (killFlag == 1) { Vnm_print(0, "Vpee_ctor2: Error outside local partition ignored.\n"); } else if (killFlag == 2) { Vnm_print(0, "Vpee_ctor2: Error ignored outside sphere with radius %4.3f times the radius of the circumscribing sphere\n", killParam); if (killParam < 1.0) { Vnm_print(2, "Vpee_ctor2: Warning! Parameter killParam = %4.3 < 1.0!\n", killParam); Vnm_print(2, "Vpee_ctor2: This may result in non-optimal marking and refinement!\n"); } } else if (killFlag == 3) { Vnm_print(0, "Vpee_ctor2: Error outside local partition and immediate neighbors ignored [NOT IMPLEMENTED].\n"); } else { Vnm_print(2, "Vpee_ctor2: UNRECOGNIZED killFlag PARAMETER! BAILING!.\n"); VASSERT(0); } thee->gm = gm; thee->localPartID = localPartID; thee->killFlag = killFlag; thee->killParam = killParam; thee->mem = Vmem_ctor("APBS::VPEE"); /* Now, figure out the center of geometry for the local partition. The * general plan is to loop through the vertices, loop through the * vertices' simplex lists and find the vertices with simplices containing * chart values we're interested in. */ thee->localPartCenter[0] = 0.0; thee->localPartCenter[1] = 0.0; thee->localPartCenter[2] = 0.0; nLocalVerts = 0; for (ivert=0; ivert<Gem_numVV(thee->gm); ivert++) { vert = Gem_VV(thee->gm, ivert); simp = VV_firstSS(vert); VASSERT(simp != VNULL); while (simp != VNULL) { if (SS_chart(simp) == thee->localPartID) { thee->localPartCenter[0] += VV_coord(vert, 0); thee->localPartCenter[1] += VV_coord(vert, 1); thee->localPartCenter[2] += VV_coord(vert, 2); nLocalVerts++; break; } simp = SS_link(simp, vert); } } VASSERT(nLocalVerts > 0); thee->localPartCenter[0] = thee->localPartCenter[0]/((double)(nLocalVerts)); thee->localPartCenter[1] = thee->localPartCenter[1]/((double)(nLocalVerts)); thee->localPartCenter[2] = thee->localPartCenter[2]/((double)(nLocalVerts)); Vnm_print(0, "Vpee_ctor2: Part %d centered at (%4.3f, %4.3f, %4.3f)\n", thee->localPartID, thee->localPartCenter[0], thee->localPartCenter[1], thee->localPartCenter[2]); /* Now, figure out the radius of the sphere circumscribing the local * partition. We need to keep track of vertices so we don't double count * them. */ thee->localPartRadius = 0.0; for (ivert=0; ivert<Gem_numVV(thee->gm); ivert++) { vert = Gem_VV(thee->gm, ivert); simp = VV_firstSS(vert); VASSERT(simp != VNULL); while (simp != VNULL) { if (SS_chart(simp) == thee->localPartID) { dx = thee->localPartCenter[0] - VV_coord(vert, 0); dy = thee->localPartCenter[1] - VV_coord(vert, 1); dz = thee->localPartCenter[2] - VV_coord(vert, 2); radius = dx*dx + dy*dy + dz*dz; if (radius > thee->localPartRadius) thee->localPartRadius = radius; break; } simp = SS_link(simp, vert); } } thee->localPartRadius = VSQRT(thee->localPartRadius); Vnm_print(0, "Vpee_ctor2: Part %d has circumscribing sphere of radius %4.3f\n", thee->localPartID, thee->localPartRadius); return 1; }
VPUBLIC void Vnewton(int *nx, int *ny, int *nz, double *x, int *iz, double *w0, double *w1, double *w2, double *w3, int *istop, int *itmax, int *iters, int *ierror, int *nlev, int *ilev, int *nlev_real, int *mgsolv, int *iok, int *iinfo, double *epsiln, double *errtol, double *omega, int *nu1, int *nu2, int *mgsmoo, double *cprime, double *rhs, double *xtmp, int *ipc, double *rpc, double *pc, double *ac, double *cc, double *fc, double *tru) { int level, lev; int itmax_s, iters_s, ierror_s, iok_s, iinfo_s, istop_s; double errtol_s, ord, bigc; double rsden, rsnrm, orsnrm; double xnorm_old, xnorm_new, damp, xnorm_med, xnorm_den; double rho_max, rho_min, rho_max_mod, rho_min_mod, errtol_p; int iter_d, itmax_d, mode, idamp, ipkey; int itmax_p, iters_p, iok_p, iinfo_p; // Utility and temproary parameters double alpha; MAT2(iz, 50, 1); // Recover level information level = 1; lev = (*ilev - 1) + level; // Do some i/o if requested if (*iinfo > 1) { VMESSAGE3("Starting: (%d, %d, %d)", *nx, *ny, *nz); } if (*iok != 0) { Vprtstp(*iok, -1, 0.0, 0.0, 0.0); } /************************************************************** *** note: if (iok!=0) then: use a stopping test. *** *** else: use just the itmax to stop iteration. *** ************************************************************** *** istop=0 most efficient (whatever it is) *** *** istop=1 relative residual *** *** istop=2 rms difference of successive iterates *** *** istop=3 relative true error (provided for testing) *** **************************************************************/ // Compute denominator for stopping criterion if (*istop == 0) { rsden = 1.0; } else if (*istop == 1) { // Compute initial residual with zero initial guess // this is analogous to the linear case where one can // simply take norm of rhs for a zero initial guess Vazeros(nx, ny, nz, w1); Vnmresid(nx, ny, nz, RAT(ipc, VAT2(iz, 5, lev)), RAT(rpc, VAT2(iz, 6, lev)), RAT( ac, VAT2(iz, 7, lev)), RAT( cc, VAT2(iz, 1, lev)), RAT( fc, VAT2(iz, 1, lev)), w1, w2, w3); rsden = Vxnrm1(nx, ny, nz, w2); } else if (*istop == 2) { rsden = VSQRT( *nx * *ny * *nz); } else if (*istop == 3) { rsden = Vxnrm2(nx, ny, nz, RAT(tru, VAT2(iz, 1, lev))); } else if (*istop == 4) { rsden = Vxnrm2(nx, ny, nz, RAT(tru, VAT2(iz, 1, lev))); } else if (*istop == 5) { Vnmatvec(nx, ny, nz, RAT(ipc, VAT2(iz, 5, lev)), RAT(rpc, VAT2(iz, 6, lev)), RAT( ac, VAT2(iz, 7, lev)), RAT( cc, VAT2(iz, 1, lev)), RAT(tru, VAT2(iz, 1, lev)), w1, w2); rsden = VSQRT(Vxdot(nx, ny, nz, RAT(tru, VAT2(iz, 1, lev)), w1)); } else { VABORT_MSG1("Bad istop value: %d\n", *istop); } if (rsden == 0.0) { rsden = 1.0; VWARN_MSG0(rsden != 0, "rhs is zero"); } rsnrm = rsden; orsnrm = rsnrm; if (*iok != 0) { Vprtstp(*iok, 0, rsnrm, rsden, orsnrm); } /********************************************************************* *** begin newton iteration *********************************************************************/ // Now compute residual with the initial guess Vnmresid(nx, ny, nz, RAT(ipc, VAT2(iz, 5, lev)), RAT(rpc, VAT2(iz, 6, lev)), RAT( ac, VAT2(iz, 7, lev)), RAT( cc, VAT2(iz, 1, lev)), RAT( fc, VAT2(iz, 1, lev)), RAT( x, VAT2(iz, 1, lev)), w0, w2); xnorm_old = Vxnrm1(nx, ny, nz, w0); if (*iok != 0) { xnorm_den = rsden; } else { xnorm_den = xnorm_old; } /********************************************************************* *** begin the loop *********************************************************************/ // Setup for the looping VMESSAGE0("Damping enabled"); idamp = 1; *iters = 0; //30 while(1) { (*iters)++; // Save iterate if stop test will use it on next iter if (*istop == 2) { Vxcopy(nx, ny, nz, RAT(x, VAT2(iz, 1, lev)), RAT(tru, VAT2(iz, 1, lev))); } // Compute the current jacobian system and rhs ipkey = VAT(ipc, 10); Vgetjac(nx, ny, nz, nlev_real, iz, ilev, &ipkey, x, w0, cprime, rhs, cc, pc); // Determine number of correct digits in current residual // Algorithm 5.3 in the thesis, test version (1') // Global-superlinear convergence bigc = 1.0; ord = 2.0; /* NAB 06-18-01: If complex problems are not converging, set this to * machine epsilon. This makes it use the exact jacobian rather than * the appropriate form (as here) */ errtol_s = VMIN2((0.9 * xnorm_old), (bigc * VPOW(xnorm_old, ord))); VMESSAGE1("Using errtol_s: %f", errtol_s); // Do a linear multigrid solve of the newton equations Vazeros(nx, ny, nz, RAT(xtmp, VAT2(iz, 1, lev))); itmax_s = 1000; istop_s = 0; iters_s = 0; ierror_s = 0; // NAB 06-18-01 -- What this used to be: iok_s = 0; iinfo_s = 0; if ((*iinfo >= 2) && (*ilev == 1)) iok_s = 2; // What it's changed to: if (*iinfo >= 2) iinfo_s = *iinfo; iok_s = 2; // End of NAB hack. Vmvcs(nx, ny, nz, xtmp, iz, w0, w1, w2, w3, &istop_s, &itmax_s, &iters_s, &ierror_s, nlev, ilev, nlev_real, mgsolv, &iok_s, &iinfo_s, epsiln, &errtol_s, omega, nu1, nu2, mgsmoo, ipc, rpc, pc, ac, cprime, rhs, tru); /************************************************************** *** note: rhs and cprime are now available as temp vectors *** **************************************************************/ // If damping is still enabled -- doit if (idamp == 1) { // Try the correction Vxcopy(nx, ny, nz, RAT(x, VAT2(iz, 1, lev)), w1); damp = 1.0; Vxaxpy(nx, ny, nz, &damp, RAT(xtmp, VAT2(iz, 1, lev)), w1); Vnmresid(nx, ny, nz, RAT(ipc, VAT2(iz, 5, lev)), RAT(rpc, VAT2(iz, 6, lev)), RAT( ac, VAT2(iz, 7, lev)), RAT( cc, VAT2(iz, 1, lev)), RAT( fc, VAT2(iz, 1, lev)), w1, w0, RAT(rhs, VAT2(iz, 1, lev))); xnorm_new = Vxnrm1(nx, ny, nz, w0); // Damping is still enabled -- doit damp = 1.0; iter_d = 0; itmax_d = 10; mode = 0; VMESSAGE1("Attempting damping, relres = %f", xnorm_new / xnorm_den); while(iter_d < itmax_d) { if (mode == 0) { if (xnorm_new < xnorm_old) { mode = 1; } } else if (xnorm_new > xnorm_med) { break; } // Keep old soln and residual around, and its norm Vxcopy(nx, ny, nz, w1, w2); Vxcopy(nx, ny, nz, w0, w3); xnorm_med = xnorm_new; // New damped correction, residual, and its norm Vxcopy(nx, ny, nz, RAT(x, VAT2(iz, 1, lev)), w1); damp = damp / 2.0; Vxaxpy(nx, ny, nz, &damp, RAT(xtmp, VAT2(iz, 1, lev)), w1); Vnmresid(nx, ny, nz, RAT(ipc, VAT2(iz, 5, lev)), RAT(rpc, VAT2(iz, 6, lev)), RAT( ac, VAT2(iz, 7, lev)), RAT( cc, VAT2(iz, 1, lev)), RAT( fc, VAT2(iz, 1, lev)), w1, w0, RAT(rhs, VAT2(iz, 1, lev))); xnorm_new = Vxnrm1(nx, ny, nz, w0); // Next iter... iter_d = iter_d + 1; VMESSAGE1("Attempting damping, relres = %f", xnorm_new / xnorm_den); } Vxcopy(nx, ny, nz, w2, RAT(x, VAT2(iz, 1, lev))); Vxcopy(nx, ny, nz, w3, w0); xnorm_new = xnorm_med; xnorm_old = xnorm_new; VMESSAGE1("Damping accepted, relres = %f", xnorm_new / xnorm_den); // Determine whether or not to disable damping if ((iter_d - 1) == 0) { VMESSAGE0("Damping disabled"); idamp = 0; } } else { // Damping is disabled -- accept the newton step damp = 1.0; Vxaxpy(nx, ny, nz, &damp, RAT(xtmp, VAT2(iz, 1, lev)), RAT(x, VAT2(iz, 1, lev))); Vnmresid(nx, ny, nz, RAT(ipc, VAT2(iz, 5, lev)), RAT(rpc, VAT2(iz, 6, lev)), RAT( ac, VAT2(iz, 7, lev)), RAT( cc, VAT2(iz, 1, lev)), RAT( fc, VAT2(iz, 1, lev)), RAT( x, VAT2(iz, 1, lev)), w0, RAT(rhs, VAT2(iz, 1, lev))); xnorm_new = Vxnrm1(nx, ny, nz, w0); xnorm_old = xnorm_new; } // Compute/check the current stopping test *** if (iok != 0) { orsnrm = rsnrm; if (*istop == 0) { rsnrm = xnorm_new; } else if (*istop == 1) { rsnrm = xnorm_new; } else if (*istop == 2) { Vxcopy(nx, ny, nz, RAT(tru, VAT2(iz, 1, lev)), w1); alpha = -1.0; Vxaxpy(nx, ny, nz, &alpha, RAT(x, VAT2(iz, 1, lev)), w1); rsnrm = Vxnrm1(nx, ny, nz, w1); } else if (*istop == 3) { Vxcopy(nx, ny, nz, RAT(tru, VAT2(iz, 1, lev)), w1); alpha = -1.0; Vxaxpy(nx, ny, nz, &alpha, RAT(x, VAT2(iz, 1, lev)), w1); rsnrm = Vxnrm2(nx, ny, nz, w1); } else if (*istop == 4) { Vxcopy(nx, ny, nz, RAT(tru, VAT2(iz, 1, lev)), w1); alpha = -1.0; Vxaxpy(nx, ny, nz, &alpha, RAT(x, VAT2(iz, 1, lev)), w1); rsnrm = Vxnrm2(nx, ny, nz, w1); } else if (*istop == 5) { Vxcopy(nx, ny, nz, RAT(tru, VAT2(iz, 1, lev)), w1); alpha = -1.0; Vxaxpy(nx, ny, nz, &alpha, RAT(x, VAT2(iz, 1, lev)), w1); Vnmatvec(nx, ny, nz, RAT(ipc, VAT2(iz, 5, lev)), RAT(rpc, VAT2(iz, 6, lev)), RAT( ac, VAT2(iz, 7, lev)), RAT( cc, VAT2(iz, 1, lev)), w1, w2, w3); rsnrm = VSQRT(Vxdot(nx, ny, nz, w1, w2)); } else { VABORT_MSG1("Bad istop value: %d", *istop); } Vprtstp(*iok, *iters, rsnrm, rsden, orsnrm); if ((rsnrm/rsden) <= *errtol) break; } // Check iteration count *** if (*iters >= *itmax) break; } // Condition estimate of final jacobian if (*iinfo > 2) { Vnm_print(2, "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"); Vnm_print(2, "% Vnewton: JACOBIAN ANALYSIS ==> (%d, %d, %d)\n", *nx, *ny, *nz ); // Largest eigenvalue of the jacobian matrix Vnm_print(2, "% Vnewton: Power calculating rho(JAC)\n"); itmax_p = 1000; errtol_p = 1.0e-4; iters_p = 0; iinfo_p = *iinfo; Vpower(nx, ny, nz, iz, ilev, ipc, rpc, ac, cprime, w0, w1, w2, w3, &rho_max, &rho_max_mod, &errtol_p, &itmax_p, &iters_p, &iinfo_p); Vnm_print(2, "% Vnewton: power iters = %d\n", iters_p); Vnm_print(2, "% Vnewton: power eigmax = %d\n", rho_max); Vnm_print(2, "% Vnewton: power (MODEL) = %d\n", rho_max_mod); // Smallest eigenvalue of the system matrix A *** Vnm_print(2, "% Vnewton: ipower calculating lambda_min(JAC)...\n"); itmax_p = 1000; errtol_p = 1.0e-4; iters_p = 0; iinfo_p = *iinfo; Vazeros(nx, ny, nz, xtmp); Vipower(nx, ny, nz, xtmp, iz, w0, w1, w2, w3, rhs, &rho_min, &rho_min_mod, &errtol_p, &itmax_p, &iters_p, nlev, ilev, nlev_real, mgsolv, &iok_p, &iinfo_p, epsiln, errtol, omega, nu1, nu2, mgsmoo, ipc, rpc, pc, ac, cprime, tru); Vnm_print(2, "% Vnewton: ipower iters = %d\n", iters_p); Vnm_print(2, "% Vnewton: ipower eigmin = %d\n", rho_min); Vnm_print(2, "% Vnewton: ipower (MODEL) = %d\n", rho_min_mod); // Condition number estimate Vnm_print(2, "% Vnewton: condition number = %f\n", (double)rho_max / rho_min); Vnm_print(2, "% Vnewton: condition (MODEL) = %f\n", (double)rho_max_mod / rho_min_mod); Vnm_print(2, "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"); } }
void Jit::Comp_FPU2op(u32 op) { CONDITIONAL_DISABLE; int fs = _FS; int fd = _FD; // logBlocks = 1; switch (op & 0x3f) { case 4: //F(fd) = sqrtf(F(fs)); break; //sqrt fpr.MapDirtyIn(fd, fs); VSQRT(fpr.R(fd), fpr.R(fs)); break; case 5: //F(fd) = fabsf(F(fs)); break; //abs fpr.MapDirtyIn(fd, fs); VABS(fpr.R(fd), fpr.R(fs)); break; case 6: //F(fd) = F(fs); break; //mov fpr.MapDirtyIn(fd, fs); VMOV(fpr.R(fd), fpr.R(fs)); break; case 7: //F(fd) = -F(fs); break; //neg fpr.MapDirtyIn(fd, fs); VNEG(fpr.R(fd), fpr.R(fs)); break; case 12: //FsI(fd) = (int)floorf(F(fs)+0.5f); break; //round.w.s fpr.MapDirtyIn(fd, fs); VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED); break; case 13: //FsI(fd) = Rto0(F(fs))); break; //trunc.w.s fpr.MapDirtyIn(fd, fs); VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED | ROUND_TO_ZERO); break; case 14: //FsI(fd) = (int)ceilf (F(fs)); break; //ceil.w.s fpr.MapDirtyIn(fd, fs); MOVI2F(S0, 0.5f, R0); VADD(S0,fpr.R(fs),S0); VCVT(fpr.R(fd), S0, TO_INT | IS_SIGNED); break; case 15: //FsI(fd) = (int)floorf(F(fs)); break; //floor.w.s fpr.MapDirtyIn(fd, fs); MOVI2F(S0, 0.5f, R0); VSUB(S0,fpr.R(fs),S0); VCVT(fpr.R(fd), S0, TO_INT | IS_SIGNED); break; case 32: //F(fd) = (float)FsI(fs); break; //cvt.s.w fpr.MapDirtyIn(fd, fs); VCVT(fpr.R(fd), fpr.R(fs), TO_FLOAT | IS_SIGNED); break; case 36: //FsI(fd) = (int) F(fs); break; //cvt.w.s fpr.MapDirtyIn(fd, fs); LDR(R0, CTXREG, offsetof(MIPSState, fcr31)); AND(R0, R0, Operand2(3)); // MIPS Rounding Mode: // 0: Round nearest // 1: Round to zero // 2: Round up (ceil) // 3: Round down (floor) CMP(R0, Operand2(2)); SetCC(CC_GE); MOVI2F(S0, 0.5f, R1); SetCC(CC_GT); VSUB(S0,fpr.R(fs),S0); SetCC(CC_EQ); VADD(S0,fpr.R(fs),S0); SetCC(CC_GE); VCVT(fpr.R(fd), S0, TO_INT | IS_SIGNED); /* 2,3 */ SetCC(CC_AL); CMP(R0, Operand2(1)); SetCC(CC_EQ); VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED | ROUND_TO_ZERO); /* 1 */ SetCC(CC_LT); VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED); /* 0 */ SetCC(CC_AL); break; default: DISABLE; } }
VPUBLIC void Vcghs(int *nx, int *ny, int *nz, int *ipc, double *rpc, double *ac, double *cc, double *fc, double *x, double *p, double *ap, double *r, int *itmax, int *iters, double *errtol, double *omega, int *iresid, int *iadjoint) { double rsnrm, pAp, denom; double rhok1, rhok2, alpha, beta; // Setup for the looping *iters = 0; if (*iters >= *itmax && *iresid == 0) return; Vmresid(nx, ny, nz, ipc, rpc, ac, cc, fc, x, r); denom = Vxnrm2(nx, ny, nz, r); if (denom == 0.0) return; if (*iters >= *itmax) return; while(1) { // Compute/check the current stopping test rhok2 = Vxdot(nx, ny, nz, r, r); rsnrm = VSQRT(rhok2); if (rsnrm / denom <= *errtol) break; if (*iters >= *itmax) break; // Form new direction vector from old one and residual if (*iters == 0) { Vxcopy(nx, ny, nz, r, p); } else { beta = rhok2 / rhok1; alpha = 1.0 / beta; Vxaxpy(nx, ny, nz, &alpha, r, p); Vxscal(nx, ny, nz, &beta, p); } // Linear case: alpha which minimizes energy norm of error Vmatvec(nx, ny, nz, ipc, rpc, ac, cc, p, ap); pAp = Vxdot(nx, ny, nz, p, ap); alpha = rhok2 / pAp; // Save rhok2 for next iteration rhok1 = rhok2; // Update solution in direction p of length alpha Vxaxpy(nx, ny, nz, &alpha, p, x); // Update residual alpha = -alpha; Vxaxpy(nx, ny, nz, &alpha, ap, r); // some bookkeeping (*iters)++; } }