void Jit::ApplyPrefixD(const u8 *vregs, VectorSize sz) { _assert_(js.prefixDFlag & ArmJitState::PREFIX_KNOWN); if (!js.prefixD) return; int n = GetNumVectorElements(sz); for (int i = 0; i < n; i++) { if (js.VfpuWriteMask(i)) continue; int sat = (js.prefixD >> (i * 2)) & 3; if (sat == 1) { // clamped = fabs(x) - fabs(x-0.5f) + 0.5f; // [ 0, 1] fpr.MapRegV(vregs[i], MAP_DIRTY); MOVI2F(S0, 0.5, R0); VABS(S1, fpr.V(vregs[i])); // S1 = fabs(x) VSUB(S2, fpr.V(vregs[i]), S0); // S2 = fabs(x-0.5f) {VABD} VABS(S2, S2); VSUB(fpr.V(vregs[i]), S1, S2); // v[i] = S1 - S2 + 0.5f VADD(fpr.V(vregs[i]), fpr.V(vregs[i]), S0); } else if (sat == 3) { // clamped = fabs(x) - fabs(x-1.0f); // [-1, 1] fpr.MapRegV(vregs[i], MAP_DIRTY); MOVI2F(S0, 1.0, R0); VABS(S1, fpr.V(vregs[i])); // S1 = fabs(x) VSUB(S2, fpr.V(vregs[i]), S0); // S2 = fabs(x-1.0f) {VABD} VABS(S2, S2); VSUB(fpr.V(vregs[i]), S1, S2); // v[i] = S1 - S2 } } }
void JitArm::ps_abs(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITPairedOff); FALLBACK_IF(inst.Rc); u32 b = inst.FB, d = inst.FD; ARMReg vB0 = fpr.R0(b); ARMReg vB1 = fpr.R1(b); ARMReg vD0 = fpr.R0(d, false); ARMReg vD1 = fpr.R1(d, false); VABS(vD0, vB0); VABS(vD1, vB1); }
/* /////////////////////////////////////////////////////////////////////////// // Routine: Ju // // Purpose: Evaluate the integrand J_k(u) of the energy functional J(u) // at the single point x. This is your nonlinear energy // functional for which your weak form PDE below in Fu_v() is the // Euler condition. (There may not be such a J(u) in all cases.) // // /\ /\ // J(u) = \ J_0(u) dx + \ J_1(u) ds // \/m \/dm // // Input: PDE = pointer to the PDE object // key = integrand to evaluate (0=J_0, 1=J_1) // // Output: Value of the integrand is returned // // Speed: This function is called by MC once times for a single // quadrature point during assembly, and needs to be fast. // // Author: Michael Holst /////////////////////////////////////////////////////////////////////////// */ VPUBLIC double Ju(PDE *thee, int key) { double value = 0.0; double mytime = PDE_getTime( thee ); int ekey = PDE_getEnergyKey( thee ); switch( ekey ) { case 0: /* interior form case */ if (key == 0) { value = my_US(thee->dim, thee->vec, xq, mytime); value = VABS(value - U[0]); /* boundary form case */ } else if (key == 1) { value = 0.0; } else { VASSERT(0); } break; case 1: value = 1.0; break; default: value = 0.0; break; } return value; }
void JitArm::fabsx(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(FloatingPoint) ARMReg vD = fpr.R0(inst.FD); ARMReg vB = fpr.R0(inst.FB); VABS(vD, vB); if (inst.Rc) Helper_UpdateCR1(vD); }
void Jit::ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz) { if (prefix == 0xE4) return; int n = GetNumVectorElements(sz); u8 origV[4]; static const float constantArray[8] = {0.f, 1.f, 2.f, 0.5f, 3.f, 1.f/3.f, 0.25f, 1.f/6.f}; for (int i = 0; i < n; i++) origV[i] = vregs[i]; for (int i = 0; i < n; i++) { int regnum = (prefix >> (i*2)) & 3; int abs = (prefix >> (8+i)) & 1; int negate = (prefix >> (16+i)) & 1; int constants = (prefix >> (12+i)) & 1; // Unchanged, hurray. if (!constants && regnum == i && !abs && !negate) continue; // This puts the value into a temp reg, so we won't write the modified value back. vregs[i] = fpr.GetTempV(); fpr.MapRegV(vregs[i], MAP_NOINIT | MAP_DIRTY); if (!constants) { // Prefix may say "z, z, z, z" but if this is a pair, we force to x. // TODO: But some ops seem to use const 0 instead? if (regnum >= n) { ERROR_LOG_REPORT(CPU, "Invalid VFPU swizzle: %08x / %d", prefix, sz); regnum = 0; } if (abs) { VABS(fpr.V(vregs[i]), fpr.V(origV[regnum])); } else { VMOV(fpr.V(vregs[i]), fpr.V(origV[regnum])); } } else { // TODO: There is VMOV s, imm on ARM, that can generate some of these constants. Not 1/3 or 1/6 though. MOVI2F(fpr.V(vregs[i]), constantArray[regnum + (abs<<2)], R0); } // TODO: This can be integrated into the VABS / VMOV above, and also the constants. if (negate) VNEG(fpr.V(vregs[i]), fpr.V(vregs[i])); // TODO: This probably means it will swap out soon, inefficiently... fpr.ReleaseSpillLockV(vregs[i]); } }
void Jit::ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz) { if (prefix == 0xE4) return; int n = GetNumVectorElements(sz); u8 origV[4]; static const float constantArray[8] = {0.f, 1.f, 2.f, 0.5f, 3.f, 1.f/3.f, 0.25f, 1.f/6.f}; for (int i = 0; i < n; i++) origV[i] = vregs[i]; for (int i = 0; i < n; i++) { int regnum = (prefix >> (i*2)) & 3; int abs = (prefix >> (8+i)) & 1; int negate = (prefix >> (16+i)) & 1; int constants = (prefix >> (12+i)) & 1; // Unchanged, hurray. if (!constants && regnum == i && !abs && !negate) continue; // This puts the value into a temp reg, so we won't write the modified value back. vregs[i] = fpr.GetTempV(); if (!constants) { fpr.MapDirtyInV(vregs[i], origV[regnum]); fpr.SpillLockV(vregs[i]); // Prefix may say "z, z, z, z" but if this is a pair, we force to x. // TODO: But some ops seem to use const 0 instead? if (regnum >= n) { WARN_LOG(CPU, "JIT: Invalid VFPU swizzle: %08x : %d / %d at PC = %08x (%s)", prefix, regnum, n, js.compilerPC, currentMIPS->DisasmAt(js.compilerPC)); regnum = 0; } if (abs) { VABS(fpr.V(vregs[i]), fpr.V(origV[regnum])); if (negate) VNEG(fpr.V(vregs[i]), fpr.V(vregs[i])); } else { if (negate) VNEG(fpr.V(vregs[i]), fpr.V(origV[regnum])); else VMOV(fpr.V(vregs[i]), fpr.V(origV[regnum])); } } else { fpr.MapRegV(vregs[i], MAP_DIRTY | MAP_NOINIT); fpr.SpillLockV(vregs[i]); MOVI2F(fpr.V(vregs[i]), constantArray[regnum + (abs<<2)], R0, negate); } } }
VPUBLIC double Vxnrm1(int *nx, int *ny, int *nz, double *x) { double xnrm1 = 0.0; ///< Accumulates the calculated normal value MAT3(x, *nx, *ny, *nz); // The indices used to traverse the matrices int i, j, k; /// @todo parallel optimization for(k=2; k<=*nz-1; k++) for(j=2; j<=*ny-1; j++) for(i=2; i<=*nx-1; i++) xnrm1 += VABS(VAT3(x, i, j, k)); return xnrm1; }
/* * *************************************************************************** * Routine: Slu_lnDet * * Purpose: Calculate the log of the determinant of a factored matrix. * * Notes: UMFPACK has a built-in routine to compute the determinant, * which returns both the mantissa and the exponent separately. * This avoids most overflow and underflow problems. * * Author: Stephen Bond * *************************************************************************** */ VPUBLIC double Slu_lnDet(Slu *thee) { int status; double Mx, Ex, lndet; void *Numeric = thee->work; VASSERT( thee != VNULL ); VASSERT( thee->statLU ); /* Determinant = Mx * 10^Ex */ status = umfpack_di_get_determinant( &Mx, &Ex, Numeric, VNULL ); /* LOG(DET) = LOG(Mantissa) + Exponent*LOG(10) */ lndet = VLOG(VABS(Mx)) + Ex*VLOG(10); if (UMFPACK_OK == status) { Vnm_print(0, "Slu_lnDet: ln(det(A)) = %g\n", lndet); return lndet; } else { Vnm_print(0, "Slu_lnDet: Failed! Returning 1.0\n"); return 1.0; } }
/* * *************************************************************************** * Routine: Gem_formFix * * Purpose: Make some specified hacked fix to a given mesh. * * Notes: key==0 --> ? * * Author: Michael Holst * *************************************************************************** */ VPUBLIC void Gem_formFix(Gem *thee, int key) { int i, j, k, l, m, nabors, btype; double radk, radl, radm, myTol; VV *v[4]; SS *sm, *sm0, *sm1, *sm2; /* input check and some i/o */ btype = key; VASSERT( (0 <= btype) && (btype <= 2) ); /* go through all simplices and zero all boundary faces */ Vnm_print(0,"Gem_makeBnd: zeroing boundary faces/vertices.."); Gem_setNumBF(thee, 0); Gem_setNumBV(thee, 0); for (i=0; i<Gem_numSS(thee); i++) { sm = Gem_SS(thee,i); if ( (i>0) && (i % VPRTKEY) == 0 ) Vnm_print(0,"[BS:%d]",i); /* get local vertices */ for (j=0; j<Gem_dimVV(thee); j++) v[j] = SS_vertex(sm,j); /* reset all vertices and faces to interior type */ for (j=0; j<Gem_dimVV(thee); j++) { /* the other three local vertex/face numbers besides "j" */ k=(j+1) % Gem_dimVV(thee); l=(k+1) % Gem_dimVV(thee); m=(l+1) % Gem_dimVV(thee); SS_setFaceType(sm, j, 0); VV_setType(v[k], 0); VV_setType(v[l], 0); if (Gem_dim(thee) == 3) VV_setType(v[m], 0); } } Vnm_print(0,"..done.\n"); /* are we done */ /* if (btype == 0) return; */ /* okay now make a boundary */ Vnm_print(0,"Gem_makeBnd: rebuilding boundary faces/vertices.."); for (i=0; i<Gem_numSS(thee); i++) { sm = Gem_SS(thee,i); if ( (i>0) && (i % VPRTKEY) == 0 ) Vnm_print(0,"[BS:%d]",i); /* get local vertices */ for (j=0; j<Gem_dimVV(thee); j++) v[j] = SS_vertex(sm,j); /* rebuild everything */ for (j=0; j<Gem_dimVV(thee); j++) { /* the other three local vertex/face numbers besides "j" */ k=(j+1) % Gem_dimVV(thee); l=(k+1) % Gem_dimVV(thee); m=(l+1) % Gem_dimVV(thee); /* look for a face nabor sharing face "j" (opposite vertex "j") */ nabors = 0; for (sm0=VV_firstSS(v[k]); sm0!=VNULL;sm0=SS_link(sm0,v[k])) { for (sm1=VV_firstSS(v[l]); sm1!=VNULL; sm1=SS_link(sm1,v[l])) { if (Gem_dim(thee) == 2) { if ((sm0!=sm) && (sm0==sm1)) nabors++; } else { for (sm2=VV_firstSS(v[m]); sm2!=VNULL; sm2=SS_link(sm2,v[m])) { if ((sm0!=sm) && (sm0==sm1) && (sm0==sm2)) { nabors++; } } } } } /* if no one there, then face "j" is actually a boundary face */ if (nabors == 0) { myTol = 1.0e-2; if ( ( VABS(VV_coord(v[k],2) - 0.0) < myTol) && ( VABS(VV_coord(v[l],2) - 0.0) < myTol) && ( VABS(VV_coord(v[m],2) - 0.0) < myTol) ) { btype = 1; } else if ( ( VABS(VV_coord(v[k],2) - 68.03512) < myTol) && ( VABS(VV_coord(v[l],2) - 68.03512) < myTol) && ( VABS(VV_coord(v[m],2) - 68.03512) < myTol) ) { btype = 3; } else { radk = VSQRT( VSQR( VV_coord(v[k],0) ) + VSQR( VV_coord(v[k],1) ) ); radl = VSQRT( VSQR( VV_coord(v[l],0) ) + VSQR( VV_coord(v[l],1) ) ); radm = VSQRT( VSQR( VV_coord(v[m],0) ) + VSQR( VV_coord(v[m],1) ) ); if ( ( VABS(radk - 1.5) < myTol) && ( VABS(radl - 1.5) < myTol) && ( VABS(radm - 1.5) < myTol) ) { btype = 2; } else if ( ( VABS(radk - 2.0) < myTol) && ( VABS(radl - 2.0) < myTol) && ( VABS(radm - 2.0) < myTol) ){ btype = 4; } else { btype = 0; } } SS_setFaceType(sm, j, btype); Gem_numBFpp(thee); if (VINTERIOR( VV_type(v[k])) ) { VV_setType(v[k], btype); Gem_numBVpp(thee); } if (VINTERIOR( VV_type(v[l])) ) { VV_setType(v[l], btype); Gem_numBVpp(thee); } if (Gem_dim(thee) == 3) { if (VINTERIOR( VV_type(v[m])) ) { VV_setType(v[m], btype); Gem_numBVpp(thee); } } } } } Vnm_print(0,"..done.\n"); }
/* * *************************************************************************** * Routine: Mat_printLN * * Purpose: Print an LN format matrix as a DENSE matrix in MATLAB format. * * Author: Stephen Bond and Michael Holst * *************************************************************************** */ VPUBLIC void Mat_printLN(Mat *thee) { int i, j; int numR, numC; char rn[80]; const int MaxRows = 30; const int MaxCols = 30; double matrix[30][30]; LinkA *mt; LinkRC *mtX; numR = thee->numR; numC = thee->numC; strncpy(rn,"Mat_printLN:",80); /* some i/o */ Vnm_print(0, "%s printing <%s>" " [dim=(%dx%d),sym=%d,numA=%d]\n", rn, thee->name, numR, numC, thee->sym, thee->numA); /* size check */ if ((numR > MaxRows) || (numC > MaxCols)) { Vnm_print(0, "%smatrix too large to view....skipping.\n", rn); return; } /* make a dense matrix first */ for (i=0; i<numR; i++) for (j=0; j<numC; j++) matrix[i][j] = 0.0; if (thee->state != NULL_STATE) { switch (thee->format) { case RLN_FORMAT: for (i=0; i<numR; i++) { for (mt=(LinkA*)Vset_access(thee->lnkU,i); mt!=VNULL; mt=mt->next) { if (mt->idx >= 0) { j = mt->idx; matrix[i][j] = mt->val; } } } break; case CLN_FORMAT: for (j=0; j<numC; j++) { for (mt=(LinkA*)Vset_access(thee->lnkL,j); mt!=VNULL; mt=mt->next) { if (mt->idx >= 0) { i = mt->idx; matrix[i][j] = mt->val; } } } break; case XLN_FORMAT: for (i=0; i<numR; i++) { if ( thee->sym == ISNOT_SYM ) { mtX = ((LinkRC**) thee->xln)[i]; } else { mtX = (LinkRC*) &( ((LinkRCS*) thee->xln)[i] ); matrix[i][i] = ((LinkRCS*) mtX)->val; mtX = mtX->next; } for ( /* no-op */ ; mtX!=VNULL; mtX=mtX->next) { j = mtX->idx; if (j < numC) { if ( thee->sym == ISNOT_SYM ) { matrix[i][j] = ((LinkRCS*) mtX)->val; } else if ( thee->sym == IS_SYM ) { matrix[i][j] = ((LinkRCS*) mtX)->val; matrix[j][i] = ((LinkRCS*) mtX)->val; } else { matrix[i][j] = ((LinkRCN*) mtX)->val; matrix[j][i] = ((LinkRCN*) mtX)->valT; } } } } break; default: Vnm_print(0, "%smatrix not in correct format to print....skipping.\n", rn); break; } } /* print the matrix */ Vnm_print(0, "%s = [\n", thee->name); for (i=0; i<numR; i++) { for (j=0; j<numC; j++) { if (VABS(matrix[i][j]) < 0.0001) { Vnm_print(0, " 0.0 "); } else { Vnm_print(0, "%7.3f", matrix[i][j]); } } Vnm_print(0, "\n"); } Vnm_print(0, "];\n"); }
VPUBLIC void Vpower(int *nx, int *ny, int *nz, int *iz, int *ilev, int *ipc, double *rpc, double *ac, double *cc, double *w1, double *w2, double *w3, double *w4, double *eigmax, double *eigmax_model, double *tol, int *itmax, int *iters, int *iinfo) { int lev, level; double denom, fac, rho, oldrho, error, relerr; /// @todo Just use a constant definition of PI here double pi = 4.0 * atan( 1.0 ); // Utility variables int skipIters = 0; double alpha; MAT2(iz, 50, 1); WARN_UNTESTED; // Recover level information level = 1; lev = (*ilev - 1) + level; // Seed vector: random to contain all components Vaxrand(nx, ny, nz, w1); Vazeros(nx, ny, nz, w2); Vazeros(nx, ny, nz, w3); Vazeros(nx, ny, nz, w4); // Compute raleigh quotient with the seed vector denom = Vxnrm2(nx, ny, nz, w1); fac = 1.0 / denom; Vxscal(nx, ny, nz, &fac, w1); Vmatvec(nx, ny, nz, RAT(ipc, VAT2(iz, 5, lev)), RAT(rpc, VAT2(iz, 6,lev)), RAT(ac, VAT2(iz, 7, lev)), RAT(cc, VAT2(iz, 1,lev)), w1, w2); oldrho = Vxdot(nx, ny, nz, w1, w2); // I/O if (oldrho == 0.0) { if (*iinfo > 3) { Vnm_print(2, "POWER: iter: estimate = %d %g\n", *iters, oldrho); } rho = oldrho; } else { // Main iteration *iters = 0; while(1) { (*iters)++; // Apply the matrix A Vmatvec(nx, ny, nz, RAT(ipc, VAT2(iz, 5, lev)), RAT(rpc, VAT2(iz, 6, lev)), RAT(ac, VAT2(iz, 7, lev)), RAT(cc, VAT2(iz, 1, lev)), w1, w2); Vxcopy(nx, ny, nz, w2, w1); // Normalize the new vector denom = Vxnrm2(nx, ny, nz, w1); fac = 1.0 / denom; Vxscal(nx, ny, nz, &fac, w1); // Compute the new raleigh quotient Vmatvec(nx, ny, nz, RAT(ipc, VAT2(iz, 5, lev)), RAT(rpc, VAT2(iz, 6, lev)), RAT( ac, VAT2(iz, 7, lev)), RAT( cc, VAT2(iz, 1, lev)), w1, w2); rho = Vxdot(nx, ny, nz, w1, w2); // Stopping test *** // w2=A*x, w1=x, stop = 2-norm(A*x-lamda*x) Vxcopy(nx, ny, nz, w1, w3); Vxcopy(nx, ny, nz, w2, w4); Vxscal(nx, ny, nz, &rho, w3); alpha = -1.0; Vxaxpy(nx, ny, nz, &alpha, w3, w4); error = Vxnrm2(nx, ny, nz, w4); relerr = VABS(rho - oldrho ) / VABS( rho ); // I/O if (*iinfo > 3) { Vnm_print(2, "POWER: iters =%d\n", *iters); Vnm_print(2, " error =%g\n", error); Vnm_print(2, " relerr =%g\n", relerr); Vnm_print(2, " rho =%g\n", rho); } if( relerr < *tol || *iters == *itmax) break; oldrho = rho; } } // Return some stuff *** *eigmax = rho; fac = VPOW(2.0, *ilev - 1); *eigmax_model = fac * (6.0 - 2.0 * VCOS((*nx - 2) * pi / (*nx - 1)) - 2.0 * VCOS((*ny - 2) * pi / (*ny - 1))); }
VPUBLIC void Vipower(int *nx,int *ny,int *nz, double *u, int *iz, double *w0, double *w1, double *w2, double *w3, double *w4, double *eigmin, double *eigmin_model, double *tol, int *itmax, int *iters, int *nlev, int *ilev, int *nlev_real, int *mgsolv, int *iok, int *iinfo, double *epsiln, double *errtol, double *omega, int *nu1, int *nu2, int *mgsmoo, int *ipc, double *rpc, double *pc, double *ac, double *cc, double *tru) { int level, lev; double denom, fac, rho, oldrho; double error, relerr, errtol_s; int itmax_s, iters_s, ierror_s, iok_s, iinfo_s, istop_s; int nu1_s, nu2_s, mgsmoo_s; /// @todo Just use a constant definition of PI here double pi = 4.0 * atan( 1.0 ); // Utility variables double alpha; MAT2(iz, 50, 1); WARN_UNTESTED; // Recover level information level = 1; lev = (*ilev - 1) + level; // Seed vector: random to contain all components Vaxrand(nx, ny, nz, w1); Vazeros(nx, ny, nz, w2); Vazeros(nx, ny, nz, w3); Vazeros(nx, ny, nz, w4); Vazeros(nx, ny, nz, RAT(w0, VAT2(iz, 1, lev))); Vazeros(nx, ny, nz, RAT( u, VAT2(iz, 1, lev))); // Compute raleigh quotient with the seed vector *** denom = Vxnrm2(nx, ny, nz, w1); fac = 1.0 / denom; Vxscal(nx, ny, nz, &fac, w1); Vmatvec(nx, ny, nz, RAT(ipc, VAT2(iz, 5, lev)), RAT(rpc, VAT2(iz, 6, lev)), RAT( ac, VAT2(iz, 7, lev)), RAT( cc, VAT2(iz, 1, lev)), w1, w2); oldrho = Vxdot(nx, ny, nz, w1, w2); // I/O if (oldrho == 0.0) { if (*iinfo > 3) { Vnm_print(2, "Vipower: iters=%d\n", *iters); Vnm_print(2, " estimate=%f\n", oldrho); } rho = oldrho; } else { //main iteration *iters = 0; while (1) { (*iters)++; // Apply the matrix A^{-1} (using MG solver) itmax_s = 100; iters_s = 0; ierror_s = 0; iok_s = 0; iinfo_s = 0; istop_s = 0; mgsmoo_s = 1; nu1_s = 1; nu2_s = 1; errtol_s = *epsiln; Vxcopy(nx, ny, nz, w1, RAT(w0, VAT2(iz, 1,lev))); Vmvcs(nx, ny, nz, u, iz, w1, w2, w3, w4, &istop_s, &itmax_s, &iters_s, &ierror_s, nlev, ilev, nlev_real, mgsolv, &iok_s, &iinfo_s, epsiln, &errtol_s, omega, &nu1_s, &nu2_s, &mgsmoo_s, ipc, rpc, pc, ac, cc, w0, tru); Vxcopy(nx, ny, nz, RAT(u, VAT2(iz, 1, lev)), w1); // Normalize the new vector denom = Vxnrm2(nx, ny, nz, w1); fac = 1.0 / denom; Vxscal(nx, ny, nz, &fac, w1); // Compute the new raleigh quotient Vmatvec(nx, ny, nz, RAT(ipc, VAT2(iz, 5, lev)), RAT(rpc, VAT2(iz, 6, lev)), RAT(ac, VAT2(iz, 7,lev)), RAT(cc, VAT2(iz, 1, lev)), w1, w2); rho = Vxdot(nx, ny, nz, w1, w2); // Stopping test // w2=A*x, w1=x, stop = 2-norm(A*x-lamda*x) *** Vxcopy(nx, ny, nz, w1, w3); Vxcopy(nx, ny, nz, w2, w4); Vxscal(nx, ny, nz, &rho, w3); alpha = -1.0; Vxaxpy(nx, ny, nz, &alpha, w3, w4); error = Vxnrm2(nx, ny, nz, w4); relerr = VABS(rho - oldrho ) / VABS( rho ); // I/O if (*iinfo > 3) { Vnm_print(2, "POWER: iters =%d\n", *iters); Vnm_print(2, " error =%g\n", error); Vnm_print(2, " relerr =%g\n", relerr); Vnm_print(2, " rho =%g\n", rho); } if (relerr < *tol || *iters == *itmax) break; oldrho = rho; } } // Return some stuff *eigmin = rho; fac = VPOW(2.0, *ilev - 1); *eigmin_model = fac * (6.0 - 2.0 * VCOS(pi / (*nx - 1)) - 2.0 * VCOS(pi / (*ny - 1)) - 2.0 * VCOS(pi / (*nz - 1))); }
VEXTERNC void Vmpower(int *nx, int *ny, int *nz, double *u, int *iz, double *w0, double *w1, double *w2, double *w3, double *w4, double *eigmax, double *tol, int *itmax, int *iters, int *nlev, int *ilev, int *nlev_real, int *mgsolv, int *iok, int *iinfo, double *epsiln, double *errtol, double *omega, int *nu1, int *nu2, int *mgsmoo, int *ipc, double *rpc, double *pc, double *ac, double *cc, double *fc, double *tru) { // Local variables int lev, level; double denom, fac, rho, oldrho, error; double relerr; int itmax_s, iters_s, ierror_s, iok_s, iinfo_s, istop_s; double alpha; MAT2(iz, 50, 1); // Recover level information level = 1; lev = (*ilev - 1) + level; // Seed vector: random to contain all components Vaxrand(nx, ny, nz, w1); Vazeros(nx, ny, nz, w2); Vazeros(nx, ny, nz, w3); Vazeros(nx, ny, nz, w4); Vazeros(nx, ny, nz, RAT(u, VAT2(iz, 1, lev))); // NOTE: we destroy "fc" on this level due to lack of vectors... *** Vazeros(nx,ny,nz,RAT(fc, VAT2(iz, 1, lev))); // Normalize the seed vector denom = Vxnrm2(nx, ny, nz, w1); fac = 1.0 / denom; Vxscal(nx, ny, nz, &fac, w1); // Compute raleigh quotient with the seed vector Vxcopy(nx, ny, nz, w1, RAT(u, VAT2(iz, 1, lev))); itmax_s = 1; iters_s = 0; ierror_s = 0; iok_s = 0; iinfo_s = 0; istop_s = 1; Vmvcs(nx, ny, nz, u, iz, w0, w2, w3, w4, &istop_s, &itmax_s, &iters_s, &ierror_s, nlev, ilev, nlev_real, mgsolv, &iok_s, &iinfo_s, epsiln, errtol, omega, nu1, nu2, mgsmoo, ipc, rpc, pc, ac, cc, fc, tru); oldrho = Vxdot(nx, ny, nz, w1, RAT(u, VAT2(iz, 1, lev))); // I/O if (oldrho == 0.0) { if (*iinfo > 3) { Vnm_print(2, "Vmp0ower: iter=%d, estimate=%f", *iters, oldrho); } rho = oldrho; } else { // Main iteration *iters = 0; while (1) { (*iters)++; // Apply the matrix M Vxcopy(nx, ny, nz, w1, RAT(u, VAT2(iz, 1, lev))); itmax_s = 1; iters_s = 0; ierror_s = 0; iok_s = 0; iinfo_s = 0; istop_s = 1; Vmvcs(nx, ny, nz, u, iz, w1, w2, w3, w4, &istop_s, &itmax_s, &iters_s, &ierror_s, nlev, ilev, nlev_real, mgsolv, &iok_s, &iinfo_s, epsiln, errtol, omega, nu1, nu2, mgsmoo, ipc, rpc, pc, ac, cc, fc, tru); Vxcopy(nx, ny, nz, RAT(u, VAT2(iz, 1, lev)), w1); // Normalize the new vector denom = Vxnrm2(nx, ny, nz, w1); fac = 1.0 / denom; Vxscal(nx, ny, nz, &fac, w1); // Compute the new raleigh quotient Vxcopy(nx, ny, nz, w1, RAT(u, VAT2(iz, 1, lev))); itmax_s = 1; iters_s = 0; ierror_s = 0; iok_s = 0; iinfo_s = 0; istop_s = 1; Vmvcs(nx, ny, nz, u, iz, w0, w2, w3, w4, &istop_s, &itmax_s, &iters_s, &ierror_s, nlev, ilev, nlev_real, mgsolv, &iok_s, &iinfo_s, epsiln, errtol, omega, nu1, nu2, mgsmoo, ipc, rpc, pc, ac, cc, fc, tru); Vxcopy(nx, ny, nz, RAT(u, VAT2(iz, 1, lev)), w2); rho = Vxdot(nx, ny, nz, w1, w2); // Stopping test // w2=A*x, w1=x, stop = 2-norm(A*x-lamda*x) alpha = -1.0; Vxcopy(nx, ny, nz, w1, w3); Vxcopy(nx, ny, nz, w2, w4); Vxscal(nx, ny, nz, &rho, w3); Vxaxpy(nx, ny, nz, &alpha, w3, w4); error = Vxnrm2(nx, ny, nz, w4); relerr = VABS( rho - oldrho ) / VABS( rho ); // I/O if (*iinfo > 3) { Vnm_print(2, "Vmpower: iter=%d; error=%f; relerr=%f; estimate=%f", *iters, error, relerr, rho); } if ((relerr < *tol) || (*iters == *itmax)) { break; } oldrho = rho; } } *eigmax = rho; }
void tunAutoconfigDeviceRec (LocalDevicePtr local, TunDevicePtr tun, TunDeviceInfo info) { if (TUN_DEVICE_TEST_VAL_REL (info, REL_X) && TUN_DEVICE_TEST_VAL_REL (info, REL_Y) && TUN_DEVICE_TEST_KEY (info, BTN_LEFT)) { OTLOG (local, "I found Rel(X,Y), Button(Left)"); OTLOG (local, "I think it is a mouse! :)"); VREL (REL_X, 0); VREL (REL_Y, 1); if (TUN_DEVICE_TEST_VAL_REL (info, REL_WHEEL)) { OTLOG (local, "I found mouse wheel - mapping to BUTTON 4 & 5"); RVALUATOR (REL_WHEEL). mouse_wheel_hack = TRUE; } BUTTON_TEST_AND_ASSIGN (BTN_LEFT, 1); BUTTON_TEST_AND_ASSIGN (BTN_RIGHT, 2); BUTTON_TEST_AND_ASSIGN (BTN_MIDDLE, 3); tun->is_absolute = FALSE; return; } if (TUN_DEVICE_TEST_VAL_ABS (info, ABS_X) && TUN_DEVICE_TEST_VAL_ABS (info, ABS_Y) && TUN_DEVICE_TEST_KEY (info, BTN_TOUCH)) { OTLOG (local, "I found Abs (X,Y), Button(Touch)"); OTLOG (local, "I think it is a Tablet! :)"); VABS (ABS_X,0); VABS (ABS_Y,1); TLOG ("Reverse Y coordinate (tablets have 0,0 in left lower corner)"); AVALUATOR (ABS_Y).upsidedown = TRUE; if (TUN_DEVICE_TEST_VAL_ABS (info, ABS_PRESSURE)) VABS (ABS_PRESSURE, 2); if (TUN_DEVICE_TEST_VAL_ABS (info, ABS_TILT_X)) VABS (ABS_TILT_X, 3); if (TUN_DEVICE_TEST_VAL_ABS (info, ABS_TILT_Y)) VABS (ABS_TILT_Y, 4); BUTTON (BTN_TOUCH, 1); BUTTON_TEST_AND_ASSIGN (BTN_STYLUS, 2); if (TUN_DEVICE_TEST_KEY (info, BTN_TOOL_PEN)) { OTLOG (local, "Proximity event using ToolPen button"); tun->lbut_to_xbut_tbl [BTN_TOOL_PEN - tun->first_lbutton] = TUN_BUTTON_PROXIMITY; } tun->is_absolute = TRUE; return; } if (TUN_DEVICE_TEST_VAL_ABS (info, ABS_X) && TUN_DEVICE_TEST_VAL_ABS (info, ABS_Y) && TUN_DEVICE_TEST_VAL_ABS (info, ABS_Z) && TUN_DEVICE_TEST_VAL_ABS (info, ABS_RX) && TUN_DEVICE_TEST_VAL_ABS (info, ABS_RY) && TUN_DEVICE_TEST_VAL_ABS (info, ABS_RZ) && AVALUATOR(ABS_X).min == - AVALUATOR(ABS_X).max && AVALUATOR(ABS_Y).min == - AVALUATOR(ABS_Y).max && AVALUATOR(ABS_Z).min == - AVALUATOR(ABS_Z).max && AVALUATOR(ABS_RX).min == - AVALUATOR(ABS_RX).max && AVALUATOR(ABS_RY).min == - AVALUATOR(ABS_RY).max && AVALUATOR(ABS_RZ).min == - AVALUATOR(ABS_RZ).max) { OTLOG (local, "found Abs (X,Y,Z,RX,RY,RZ)"); OTLOG (local, "I think it is some sort of 6DO device! :)"); VABSASREL (ABS_X, 0); VABSASREL (ABS_Y, 1); VABSASREL (ABS_Z, 2); VABSASREL (ABS_RX, 3); VABSASREL (ABS_RY, 4); VABSASREL (ABS_RZ, 5); tun->is_absolute = FALSE; return; } }
void pbdirectpolforce_(double uind[maxatm][3], double uinp[maxatm][3], double rff[maxatm][3], double rft[maxatm][3]) { Vpmg *pmg[NOSH_MAXCALC]; Vpmgp *pmgp[NOSH_MAXCALC]; Vpbe *pbe[NOSH_MAXCALC]; MGparm *mgparm = VNULL; PBEparm *pbeparm = VNULL; Vatom *atom = VNULL; double kT, force[3], torque[3]; double sign, zkappa2, epsp, epsw; int i,j; for (i=0; i<NOSH_MAXCALC; i++) { pmg[i] = VNULL; pmgp[i] = VNULL; pbe[i] = VNULL; } // Read the converged induced dipole data into APBS Vatom structures. for (i=0; i < alist[0]->number; i++){ atom = Valist_getAtom(alist[0],i); Vatom_setInducedDipole(atom, uind[i]); Vatom_setNLInducedDipole(atom, uinp[i]); for (j=0;j<3;j++){ rff[i][j] = 0.0; rft[i][j] = 0.0; } } for (i=0; i<2; i++) { VASSERT(permU[i] != VNULL); VASSERT(indU[i] != VNULL); VASSERT(nlIndU[i] != VNULL); pmg[i] = VNULL; pmgp[i] = VNULL; pbe[i] = VNULL; /* Useful local variables */ mgparm = nosh->calc[i]->mgparm; pbeparm = nosh->calc[i]->pbeparm; /* Set up problem */ if (!initMG(i, nosh, mgparm, pbeparm, realCenter, pbe, alist, dielXMap, dielYMap, dielZMap, kappaMap, chargeMap, pmgp, pmg, potMap)) { Vnm_tprint( 2, "Error setting up MG calculation!\n"); return; } if (i == 0) { sign = -1.0; } else { sign = 1.0; } // Q-Phi Force & Torque if (!pmg[i]->pmgp->nonlin && (pmg[i]->surfMeth == VSM_SPLINE || pmg[i]->surfMeth == VSM_SPLINE3 || pmg[i]->surfMeth == VSM_SPLINE4)) { for (j=0; j < alist[0]->number; j++){ Vpmg_qfDirectPolForce(pmg[i], permU[i], indU[i], j, force, torque); rff[j][0] += sign * force[0]; rff[j][1] += sign * force[1]; rff[j][2] += sign * force[2]; rft[j][0] += sign * torque[0]; rft[j][1] += sign * torque[1]; rft[j][2] += sign * torque[2]; Vpmg_qfNLDirectPolForce(pmg[i], permU[i], nlIndU[i], j,force,torque); rff[j][0] += sign * force[0]; rff[j][1] += sign * force[1]; rff[j][2] += sign * force[2]; rft[j][0] += sign * torque[0]; rft[j][1] += sign * torque[1]; rft[j][2] += sign * torque[2]; } // Dieletric Boundary Force epsp = Vpbe_getSoluteDiel(pmg[i]->pbe); epsw = Vpbe_getSolventDiel(pmg[i]->pbe); if (VABS(epsp-epsw) > VPMGSMALL) { for (j=0; j < alist[0]->number; j++){ Vpmg_dbDirectPolForce(pmg[i], permU[i], indU[i], j, force); rff[j][0] += sign * force[0]; rff[j][1] += sign * force[1]; rff[j][2] += sign * force[2]; Vpmg_dbNLDirectPolForce(pmg[i], permU[i], nlIndU[i], j, force); rff[j][0] += sign * force[0]; rff[j][1] += sign * force[1]; rff[j][2] += sign * force[2]; } } // Ionic Boundary Force zkappa2 = Vpbe_getZkappa2(pmg[i]->pbe); if (zkappa2 > VPMGSMALL) { for (j=0; j < alist[0]->number; j++){ Vpmg_ibDirectPolForce(pmg[i], permU[i], indU[i], j, force); rff[j][0] += sign * force[0]; rff[j][1] += sign * force[1]; rff[j][2] += sign * force[2]; Vpmg_ibNLDirectPolForce(pmg[i], permU[i], nlIndU[i], j, force); rff[j][0] += sign * force[0]; rff[j][1] += sign * force[1]; rff[j][2] += sign * force[2]; } } } } // kT in kcal/mol kT = Vunit_kb * (1e-3) * Vunit_Na * 298.15 / 4.184; for (i=0; i<alist[0]->number; i++){ rff[i][0] *= kT; rff[i][1] *= kT; rff[i][2] *= kT; rft[i][0] *= kT; rft[i][1] *= kT; rft[i][2] *= kT; } killMG(nosh, pbe, pmgp, pmg); }
void JitArm::fctiwx(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITFloatingPointOff) u32 b = inst.FB; u32 d = inst.FD; ARMReg vB = fpr.R0(b); ARMReg vD = fpr.R0(d); ARMReg V0 = fpr.GetReg(); ARMReg V1 = fpr.GetReg(); ARMReg V2 = fpr.GetReg(); ARMReg rA = gpr.GetReg(); ARMReg fpscrReg = gpr.GetReg(); FixupBranch DoneMax, DoneMin; LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); MOVI2R(rA, (u32)minmaxFloat); // Check if greater than max float { VLDR(V0, rA, 8); // Load Max VCMPE(vB, V0); VMRS(_PC); // Loads in to APSR FixupBranch noException = B_CC(CC_LE); VMOV(vD, V0); // Set to max SetFPException(fpscrReg, FPSCR_VXCVI); DoneMax = B(); SetJumpTarget(noException); } // Check if less than min float { VLDR(V0, rA, 0); VCMPE(vB, V0); VMRS(_PC); FixupBranch noException = B_CC(CC_GE); VMOV(vD, V0); SetFPException(fpscrReg, FPSCR_VXCVI); DoneMin = B(); SetJumpTarget(noException); } // Within ranges, convert to integer // Set rounding mode first // PPC <-> ARM rounding modes // 0, 1, 2, 3 <-> 0, 3, 1, 2 ARMReg rB = gpr.GetReg(); VMRS(rA); // Bits 22-23 BIC(rA, rA, Operand2(3, 5)); LDR(rB, R9, PPCSTATE_OFF(fpscr)); AND(rB, rB, 0x3); // Get the FPSCR rounding bits CMP(rB, 1); SetCC(CC_EQ); // zero ORR(rA, rA, Operand2(3, 5)); SetCC(CC_NEQ); CMP(rB, 2); // +inf SetCC(CC_EQ); ORR(rA, rA, Operand2(1, 5)); SetCC(CC_NEQ); CMP(rB, 3); // -inf SetCC(CC_EQ); ORR(rA, rA, Operand2(2, 5)); SetCC(); VMSR(rA); ORR(rA, rA, Operand2(3, 5)); VCVT(vD, vB, TO_INT | IS_SIGNED); VMSR(rA); gpr.Unlock(rB); VCMPE(vD, vB); VMRS(_PC); SetCC(CC_EQ); BIC(fpscrReg, fpscrReg, FRFIMask); FixupBranch DoneEqual = B(); SetCC(); SetFPException(fpscrReg, FPSCR_XX); ORR(fpscrReg, fpscrReg, FIMask); VABS(V1, vB); VABS(V2, vD); VCMPE(V2, V1); VMRS(_PC); SetCC(CC_GT); ORR(fpscrReg, fpscrReg, FRMask); SetCC(); SetJumpTarget(DoneEqual); SetJumpTarget(DoneMax); SetJumpTarget(DoneMin); MOVI2R(rA, (u32)&doublenum); VLDR(V0, rA, 0); NEONXEmitter nemit(this); nemit.VORR(vD, vD, V0); if (inst.Rc) Helper_UpdateCR1(fpscrReg, rA); STR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); gpr.Unlock(rA); gpr.Unlock(fpscrReg); fpr.Unlock(V0); fpr.Unlock(V1); fpr.Unlock(V2); }
int ArmJit::Replace_fabsf() { fpr.MapDirtyIn(0, 12); VABS(fpr.R(0), fpr.R(12)); return 4; // Number of instructions in the MIPS function }
/* /////////////////////////////////////////////////////////////////////////// // Routine: Vpee_markRefine // // Author: Nathan Baker (and Michael Holst: the author of AM_markRefine, on // which this is based) /////////////////////////////////////////////////////////////////////////// */ VPUBLIC int Vpee_markRefine(Vpee *thee, AM *am, int level, int akey, int rcol, double etol, int bkey ) { Aprx *aprx; int marked = 0, markMe, i, smid, count, currentQ; double minError = 0.0, maxError = 0.0, errEst = 0.0, mlevel, barrier; SS *sm; VASSERT(thee != VNULL); /* Get the Aprx object from AM */ aprx = am->aprx; /* input check and some i/o */ if ( ! ((-1 <= akey) && (akey <= 4)) ) { Vnm_print(0,"Vpee_markRefine: bad refine key; simplices marked = %d\n", marked); return marked; } /* For uniform markings, we have no effect */ if ((-1 <= akey) && (akey <= 0)) { marked = Gem_markRefine(thee->gm, akey, rcol); return marked; } /* Informative I/O */ if (akey == 2) { Vnm_print(0,"Vpee_estRefine: using Aprx_estNonlinResid().\n"); } else if (akey == 3) { Vnm_print(0,"Vpee_estRefine: using Aprx_estLocalProblem().\n"); } else if (akey == 4) { Vnm_print(0,"Vpee_estRefine: using Aprx_estDualProblem().\n"); } else { Vnm_print(0,"Vpee_estRefine: bad key given; simplices marked = %d\n", marked); return marked; } if (thee->killFlag == 0) { Vnm_print(0, "Vpee_markRefine: No error attenuation -- simplices in all partitions will be marked.\n"); } else if (thee->killFlag == 1) { Vnm_print(0, "Vpee_markRefine: Maximum error attenuation -- only simplices in local partition will be marked.\n"); } else if (thee->killFlag == 2) { Vnm_print(0, "Vpee_markRefine: Spherical error attenutation -- simplices within a sphere of %4.3f times the size of the partition will be marked\n", thee->killParam); } else if (thee->killFlag == 2) { Vnm_print(0, "Vpee_markRefine: Neighbor-based error attenuation -- simplices in the local and neighboring partitions will be marked [NOT IMPLEMENTED]!\n"); VASSERT(0); } else { Vnm_print(2,"Vpee_markRefine: bogus killFlag given; simplices marked = %d\n", marked); return marked; } /* set the barrier type */ mlevel = (etol*etol) / Gem_numSS(thee->gm); if (bkey == 0) { barrier = (etol*etol); Vnm_print(0,"Vpee_estRefine: forcing [err per S] < [TOL] = %g\n", barrier); } else if (bkey == 1) { barrier = mlevel; Vnm_print(0,"Vpee_estRefine: forcing [err per S] < [(TOL^2/numS)^{1/2}] = %g\n", VSQRT(barrier)); } else { Vnm_print(0,"Vpee_estRefine: bad bkey given; simplices marked = %d\n", marked); return marked; } /* timer */ Vnm_tstart(30, "error estimation"); /* count = num generations to produce from marked simplices (minimally) */ count = 1; /* must be >= 1 */ /* check the refinement Q for emptyness */ currentQ = 0; if (Gem_numSQ(thee->gm,currentQ) > 0) { Vnm_print(0,"Vpee_markRefine: non-empty refinement Q%d....clearing..", currentQ); Gem_resetSQ(thee->gm,currentQ); Vnm_print(0,"..done.\n"); } if (Gem_numSQ(thee->gm,!currentQ) > 0) { Vnm_print(0,"Vpee_markRefine: non-empty refinement Q%d....clearing..", !currentQ); Gem_resetSQ(thee->gm,!currentQ); Vnm_print(0,"..done.\n"); } VASSERT( Gem_numSQ(thee->gm,currentQ) == 0 ); VASSERT( Gem_numSQ(thee->gm,!currentQ) == 0 ); /* clear everyone's refinement flags */ Vnm_print(0,"Vpee_markRefine: clearing all simplex refinement flags.."); for (i=0; i<Gem_numSS(thee->gm); i++) { if ( (i>0) && (i % VPRTKEY) == 0 ) Vnm_print(0,"[MS:%d]",i); sm = Gem_SS(thee->gm,i); SS_setRefineKey(sm,currentQ,0); SS_setRefineKey(sm,!currentQ,0); SS_setRefinementCount(sm,0); } Vnm_print(0,"..done.\n"); /* NON-ERROR-BASED METHODS */ /* Simplex flag clearing */ if (akey == -1) return marked; /* Uniform & user-defined refinement*/ if ((akey == 0) || (akey == 1)) { smid = 0; while ( smid < Gem_numSS(thee->gm)) { /* Get the simplex and find out if it's markable */ sm = Gem_SS(thee->gm,smid); markMe = Vpee_ourSimp(thee, sm, rcol); if (markMe) { if (akey == 0) { marked++; Gem_appendSQ(thee->gm,currentQ, sm); SS_setRefineKey(sm,currentQ,1); SS_setRefinementCount(sm,count); } else if (Vpee_userDefined(thee, sm)) { marked++; Gem_appendSQ(thee->gm,currentQ, sm); SS_setRefineKey(sm,currentQ,1); SS_setRefinementCount(sm,count); } } smid++; } } /* ERROR-BASED METHODS */ /* gerror = global error accumulation */ aprx->gerror = 0.; /* traverse the simplices and process the error estimates */ Vnm_print(0,"Vpee_markRefine: estimating error.."); smid = 0; while ( smid < Gem_numSS(thee->gm)) { /* Get the simplex and find out if it's markable */ sm = Gem_SS(thee->gm,smid); markMe = Vpee_ourSimp(thee, sm, rcol); if ( (smid>0) && (smid % VPRTKEY) == 0 ) Vnm_print(0,"[MS:%d]",smid); /* Produce an error estimate for this element if it is in the set */ if (markMe) { if (akey == 2) { errEst = Aprx_estNonlinResid(aprx, sm, am->u,am->ud,am->f); } else if (akey == 3) { errEst = Aprx_estLocalProblem(aprx, sm, am->u,am->ud,am->f); } else if (akey == 4) { errEst = Aprx_estDualProblem(aprx, sm, am->u,am->ud,am->f); } VASSERT( errEst >= 0. ); /* if error estimate above tol, mark element for refinement */ if ( errEst > barrier ) { marked++; Gem_appendSQ(thee->gm,currentQ, sm); /*add to refinement Q*/ SS_setRefineKey(sm,currentQ,1); /* note now on refine Q */ SS_setRefinementCount(sm,count); /* refine X many times? */ } /* keep track of min/max errors over the mesh */ minError = VMIN2( VSQRT(VABS(errEst)), minError ); maxError = VMAX2( VSQRT(VABS(errEst)), maxError ); /* store the estimate */ Bvec_set( aprx->wev, smid, errEst ); /* accumlate into global error (errEst is SQUAREd already) */ aprx->gerror += errEst; /* otherwise store a zero for the estimate */ } else { Bvec_set( aprx->wev, smid, 0. ); } smid++; } /* do some i/o */ Vnm_print(0,"..done. [marked=<%d/%d>]\n",marked,Gem_numSS(thee->gm)); Vnm_print(0,"Vpee_estRefine: TOL=<%g> Global_Error=<%g>\n", etol, aprx->gerror); Vnm_print(0,"Vpee_estRefine: (TOL^2/numS)^{1/2}=<%g> Max_Ele_Error=<%g>\n", VSQRT(mlevel),maxError); Vnm_tstop(30, "error estimation"); /* check for making the error tolerance */ if ((bkey == 1) && (aprx->gerror <= etol)) { Vnm_print(0, "Vpee_estRefine: *********************************************\n"); Vnm_print(0, "Vpee_estRefine: Global Error criterion met; setting marked=0.\n"); Vnm_print(0, "Vpee_estRefine: *********************************************\n"); marked = 0; } /* return */ return marked; }
void Jit::Comp_VV2Op(u32 op) { CONDITIONAL_DISABLE; DISABLE; if (js.HasUnknownPrefix()) DISABLE; VectorSize sz = GetVecSize(op); int n = GetNumVectorElements(sz); u8 sregs[4], dregs[4]; GetVectorRegsPrefixS(sregs, sz, _VS); GetVectorRegsPrefixD(dregs, sz, _VD); ARMReg tempxregs[4]; for (int i = 0; i < n; ++i) { if (!IsOverlapSafeAllowS(dregs[i], i, n, sregs)) { int reg = fpr.GetTempV(); fpr.MapRegV(reg, MAP_NOINIT | MAP_DIRTY); fpr.SpillLockV(reg); tempxregs[i] = fpr.V(reg); } else { fpr.MapRegV(dregs[i], (dregs[i] == sregs[i] ? 0 : MAP_NOINIT) | MAP_DIRTY); fpr.SpillLockV(dregs[i]); tempxregs[i] = fpr.V(dregs[i]); } } // Warning: sregs[i] and tempxregs[i] may be the same reg. // Helps for vmov, hurts for vrcp, etc. for (int i = 0; i < n; ++i) { switch ((op >> 16) & 0x1f) { case 0: // d[i] = s[i]; break; //vmov // Probably for swizzle. VMOV(tempxregs[i], fpr.V(sregs[i])); break; case 1: // d[i] = fabsf(s[i]); break; //vabs //if (!fpr.V(sregs[i]).IsSimpleReg(tempxregs[i])) VABS(tempxregs[i], fpr.V(sregs[i])); break; case 2: // d[i] = -s[i]; break; //vneg VNEG(tempxregs[i], fpr.V(sregs[i])); break; case 4: // if (s[i] < 0) d[i] = 0; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break; // vsat0 DISABLE; break; case 5: // if (s[i] < -1.0f) d[i] = -1.0f; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break; // vsat1 DISABLE; break; case 16: // d[i] = 1.0f / s[i]; break; //vrcp MOVI2F(S0, 1.0f, R0); VDIV(tempxregs[i], S0, fpr.V(sregs[i])); break; case 17: // d[i] = 1.0f / sqrtf(s[i]); break; //vrsq MOVI2F(S0, 1.0f, R0); VSQRT(S1, fpr.V(sregs[i])); VDIV(tempxregs[i], S0, S1); break; case 18: // d[i] = sinf((float)M_PI_2 * s[i]); break; //vsin DISABLE; break; case 19: // d[i] = cosf((float)M_PI_2 * s[i]); break; //vcos DISABLE; break; case 20: // d[i] = powf(2.0f, s[i]); break; //vexp2 DISABLE; break; case 21: // d[i] = logf(s[i])/log(2.0f); break; //vlog2 DISABLE; break; case 22: // d[i] = sqrtf(s[i]); break; //vsqrt VSQRT(tempxregs[i], fpr.V(sregs[i])); VABS(tempxregs[i], tempxregs[i]); break; case 23: // d[i] = asinf(s[i] * (float)M_2_PI); break; //vasin DISABLE; break; case 24: // d[i] = -1.0f / s[i]; break; // vnrcp MOVI2F(S0, -1.0f, R0); VDIV(tempxregs[i], S0, fpr.V(sregs[i])); break; case 26: // d[i] = -sinf((float)M_PI_2 * s[i]); break; // vnsin DISABLE; break; case 28: // d[i] = 1.0f / expf(s[i] * (float)M_LOG2E); break; // vrexp2 DISABLE; break; } } fpr.MapRegsV(dregs, sz, MAP_NOINIT | MAP_DIRTY); for (int i = 0; i < n; ++i) { VMOV(fpr.V(dregs[i]), tempxregs[i]); } ApplyPrefixD(dregs, sz); fpr.ReleaseSpillLocks(); }
/* * *************************************************************************** * Routine: Aprx_partInert * * Purpose: Partition the domain using inertial bisection. * Partition sets of points in R^d (d=2 or d=3) by viewing them * as point masses of a rigid body, and by then employing the * classical mechanics ideas of inertia and Euler axes. * * Notes: We first locate the center of mass, then change the coordinate * system so that the center of mass is located at the origin. * We then form the (symmetric) dxd inertia tensor, and then find * the set of (real) eigenvalues and (orthogonal) eigenvectors. * The eigenvectors represent the principle inertial rotation axes, * and the eigenvalues represent the inertial strength in those * principle directions. The smallest inerial component along an * axis represents a direction along which the rigid body is most * "line-like" (assuming all the points have the same mass). * * For our purposes, it makes sense to using the axis (eigenvector) * corresponding to the smallest inertia (eigenvalue) as the line to * bisect with a line (d=2) or a plane (d=3). We know the center of * mass, and once we also have this particular eigenvector, we can * effectively bisect the point set into the two regions separated * by the line/plane simply by taking an inner-product of the * eigenvector with each point (or rather the 2- or 3-vector * representing the point). A positive inner-product represents one * side of the cutting line/plane, and a negative inner-product * represents the other side (a zero inner-product is right on the * cutting line/plane, so we arbitrarily assign it to one region or * the other). * * Author: Michael Holst * *************************************************************************** */ VPUBLIC int Aprx_partInert(Aprx *thee, int pcolor, int numC, double *evec, simHelper *simH) { int i, j, k, lambdaI; double rad, sca, lambda, normal, caxis[3]; Mat3 I, II, V, D; Vnm_print(0,"Aprx_partInert: WARNING: assuming single-chart manifold.\n"); Vnm_print(0,"Aprx_partInert: [pc=%d] partitioning:\n", pcolor); /* form the inertia tensors */ Mat3_eye(I); Mat3_init(II, 0.); for (i=0; i<numC; i++) { /* get vector length (squared!) */ rad = 0.; for (j=0; j<3; j++) { rad += ( simH[i].bc[j] * simH[i].bc[j] ); } /* add contribution to the inertia tensor */ for (j=0; j<3; j++) { for (k=0; k<3; k++) { II[j][k] += ( simH[i].mass * (I[j][k]*rad - simH[i].bc[j]*simH[i].bc[k]) ); } } } /* find the d-principle axes, and isolate the single axis we need */ /* (the principle axis we want is the one with SMALLEST moment) */ sca = Mat3_nrm8(II); Mat3_scal(II, 1./sca); (void)Mat3_qri(V, D, II); lambda = VLARGE; lambdaI = -1; for (i=0; i<3; i++) { if ( VABS(D[i][i]) < lambda ) { lambda = VABS(D[i][i]); lambdaI = i; } } VASSERT( lambda > 0. ); VASSERT( lambda != VLARGE ); VASSERT( lambdaI >= 0 ); for (i=0; i<3; i++) { caxis[i] = V[i][lambdaI]; } normal = Vec3_nrm2(caxis); VASSERT( normal > 0. ); Vec3_scal(caxis,1./normal); /* decompose points based on bisecting principle axis with a line or */ /* plane; we do this using an inner-product test with normal vec "caxis" */ normal = 0; for (i=0; i<numC; i++) { evec[i] = Vec3_dot( simH[i].bc, caxis ); normal += (evec[i]*evec[i]); } normal = VSQRT( normal ); /* normalize the final result */ for (i=0; i<numC; i++) { evec[i] = evec[i] / normal; } return 0; }
void JitArm::fctiwzx(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITFloatingPointOff) u32 b = inst.FB; u32 d = inst.FD; ARMReg vB = fpr.R0(b); ARMReg vD = fpr.R0(d); ARMReg V0 = fpr.GetReg(); ARMReg V1 = fpr.GetReg(); ARMReg V2 = fpr.GetReg(); ARMReg rA = gpr.GetReg(); ARMReg fpscrReg = gpr.GetReg(); FixupBranch DoneMax, DoneMin; LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); MOVI2R(rA, (u32)minmaxFloat); // Check if greater than max float { VLDR(V0, rA, 8); // Load Max VCMPE(vB, V0); VMRS(_PC); // Loads in to APSR FixupBranch noException = B_CC(CC_LE); VMOV(vD, V0); // Set to max SetFPException(fpscrReg, FPSCR_VXCVI); DoneMax = B(); SetJumpTarget(noException); } // Check if less than min float { VLDR(V0, rA, 0); VCMPE(vB, V0); VMRS(_PC); FixupBranch noException = B_CC(CC_GE); VMOV(vD, V0); SetFPException(fpscrReg, FPSCR_VXCVI); DoneMin = B(); SetJumpTarget(noException); } // Within ranges, convert to integer VCVT(vD, vB, TO_INT | IS_SIGNED | ROUND_TO_ZERO); VCMPE(vD, vB); VMRS(_PC); SetCC(CC_EQ); BIC(fpscrReg, fpscrReg, FRFIMask); FixupBranch DoneEqual = B(); SetCC(); SetFPException(fpscrReg, FPSCR_XX); ORR(fpscrReg, fpscrReg, FIMask); VABS(V1, vB); VABS(V2, vD); VCMPE(V2, V1); VMRS(_PC); SetCC(CC_GT); ORR(fpscrReg, fpscrReg, FRMask); SetCC(); SetJumpTarget(DoneEqual); SetJumpTarget(DoneMax); SetJumpTarget(DoneMin); MOVI2R(rA, (u32)&doublenum); VLDR(V0, rA, 0); NEONXEmitter nemit(this); nemit.VORR(vD, vD, V0); if (inst.Rc) Helper_UpdateCR1(fpscrReg, rA); STR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); gpr.Unlock(rA); gpr.Unlock(fpscrReg); fpr.Unlock(V0); fpr.Unlock(V1); fpr.Unlock(V2); }
void apbsempole_(int *natom, double x[maxatm][3], double rad[maxatm], double rpole[maxatm][13], double *total, double energy[maxatm], double fld[maxatm][3], double rff[maxatm][3], double rft[maxatm][3]) { /* Misc. pointers to APBS data structures */ Vpmg *pmg[NOSH_MAXCALC]; Vpmgp *pmgp[NOSH_MAXCALC]; Vpbe *pbe[NOSH_MAXCALC]; MGparm *mgparm = VNULL; PBEparm *pbeparm = VNULL; Vatom *atom = VNULL; /* Vgrid configuration for the kappa and dielectric maps */ double nx,ny,nz,hx,hy,hzed,xmin,ymin,zmin; double *data; double zkappa2, epsp, epsw; /* Loop indeces */ int i,j; /* Observables and unit conversion */ double sign, force[3], torque[3], field[3]; double kT,electric,debye; double charge, dipole[3], quad[9]; debye = 4.8033324; for (i=0; i<NOSH_MAXCALC; i++) { pmg[i] = VNULL; pmgp[i] = VNULL; pbe[i] = VNULL; } /* Kill the saved potential Vgrids */ for (i=0; i<2; i++){ if (permU[i] != VNULL) Vgrid_dtor(&permU[i]); if (indU[i] != VNULL) Vgrid_dtor(&indU[i]); if (nlIndU[i] != VNULL) Vgrid_dtor(&nlIndU[i]); } /* Kill the old atom list */ if (alist[0] != VNULL) { Valist_dtor(&alist[0]); } /* Create a new atom list (mol == 1) */ if (alist[0] == VNULL) { alist[0] = Valist_ctor(); alist[0]->atoms = Vmem_malloc(alist[0]->vmem, *natom, (sizeof(Vatom))); alist[0]->number = *natom; } /* Read TINKER input data into Vatom instances. */ for (i=0; i < alist[0]->number; i++){ atom = Valist_getAtom(alist[0],i); Vatom_setAtomID(atom, i); Vatom_setPosition(atom, x[i]); Vatom_setRadius(atom, rad[i]); charge = rpole[i][0]; Vatom_setCharge(atom, charge); dipole[0] = rpole[i][1]; dipole[1] = rpole[i][2]; dipole[2] = rpole[i][3]; Vatom_setDipole(atom, dipole); quad[0] = rpole[i][4]; quad[1] = rpole[i][5]; quad[2] = rpole[i][6]; quad[3] = rpole[i][7]; quad[4] = rpole[i][8]; quad[5] = rpole[i][9]; quad[6] = rpole[i][10]; quad[7] = rpole[i][11]; quad[8] = rpole[i][12]; Vatom_setQuadrupole(atom, quad); /* Useful check printf(" %i %f (%f,%f,%f)\n",i,rad[i], x[i][0], x[i][1], x[i][2]); printf(" %f\n %f,%f,%f\n", charge, dipole[0], dipole[1], dipole[2]); printf(" %f\n", quad[0]); printf(" %f %f\n", quad[3], quad[4]); printf(" %f %f %f\n", quad[6], quad[7], quad[8]); */ energy[i] = 0.0; for (j=0;j<3;j++){ fld[i][j] = 0.0; rff[i][j] = 0.0; rft[i][j] = 0.0; } } nosh->nmol = 1; Valist_getStatistics(alist[0]); /* Only call the setupCalc routine once, so that we can reuse this nosh object */ if (nosh->ncalc < 2) { if (NOsh_setupElecCalc(nosh, alist) != 1) { printf("Error setting up calculations\n"); exit(-1); } } /* Solve the LPBE for the homogeneous and then solvated states */ for (i=0; i<2; i++) { /* Useful local variables */ mgparm = nosh->calc[i]->mgparm; pbeparm = nosh->calc[i]->pbeparm; /* Just to be robust */ if (!MGparm_check(mgparm)){ printf("MGparm Check failed\n"); printMGPARM(mgparm, realCenter); exit(-1); } if (!PBEparm_check(pbeparm)){ printf("PBEparm Check failed\n"); printPBEPARM(pbeparm); exit(-1); } /* Set up the problem */ mgparm->chgs = VCM_PERMANENT; if (!initMG(i, nosh, mgparm, pbeparm, realCenter, pbe, alist, dielXMap, dielYMap, dielZMap, kappaMap, chargeMap, pmgp, pmg, potMap)) { Vnm_tprint( 2, "Error setting up MG calculation!\n"); return; } /* Solve the PDE */ if (solveMG(nosh, pmg[i], mgparm->type) != 1) { Vnm_tprint(2, "Error solving PDE!\n"); return; } /* Set partition information for observables and I/O */ /* Note - parallel operation has NOT been tested. */ if (setPartMG(nosh, mgparm, pmg[i]) != 1) { Vnm_tprint(2, "Error setting partition info!\n"); return; } nx = pmg[i]->pmgp->nx; ny = pmg[i]->pmgp->ny; nz = pmg[i]->pmgp->nz; hx = pmg[i]->pmgp->hx; hy = pmg[i]->pmgp->hy; hzed = pmg[i]->pmgp->hzed; xmin = pmg[i]->pmgp->xmin; ymin = pmg[i]->pmgp->ymin; zmin = pmg[i]->pmgp->zmin; /* Save dielectric/kappa maps into Vgrids, then change the nosh * data structure to think it read these maps in from a file. * The goal is to save setup time during convergence of the * induced dipoles. This is under consideration... * */ /* // X (shifted) data = Vmem_malloc(mem, nx*ny*nz, sizeof(double)); Vpmg_fillArray(pmg[i], data, VDT_DIELX, 0.0, pbeparm->pbetype); dielXMap[i] = Vgrid_ctor(nx,ny,nz,hx,hy,hzed, xmin + 0.5*hx,ymin,zmin,data); dielXMap[i]->readdata = 1; // Y (shifted) data = Vmem_malloc(mem, nx*ny*nz, sizeof(double)); Vpmg_fillArray(pmg[i], data, VDT_DIELY, 0.0, pbeparm->pbetype); dielYMap[i] = Vgrid_ctor(nx,ny,nz,hx,hy,hzed, xmin,ymin + 0.5*hy,zmin,data); dielYMap[i]->readdata = 1; // Z (shifted) data = Vmem_malloc(mem, nx*ny*nz, sizeof(double)); Vpmg_fillArray(pmg[i], data, VDT_DIELZ, 0.0, pbeparm->pbetype); dielZMap[i] = Vgrid_ctor(nx,ny,nz,hx,hy,hzed, xmin,ymin,zmin + 0.5*hzed,data); dielZMap[i]->readdata = 1; // Kappa data = Vmem_malloc(mem, nx*ny*nz, sizeof(double)); Vpmg_fillArray(pmg[i], data, VDT_KAPPA, 0.0, pbeparm->pbetype); kappaMap[i] = Vgrid_ctor(nx,ny,nz,hx,hy,hzed,xmin,ymin,zmin,data); kappaMap[i]->readdata = 1; // Update the pbeparam structure, since we now have // dielectric and kappap maps pbeparm->useDielMap = 1; pbeparm->dielMapID = i + 1; pbeparm->useKappaMap = 1; pbeparm->kappaMapID = i + 1; */ data = Vmem_malloc(mem, nx*ny*nz, sizeof(double)); Vpmg_fillArray(pmg[i], data, VDT_POT, 0.0, pbeparm->pbetype, pbeparm); permU[i] = Vgrid_ctor(nx,ny,nz,hx,hy,hzed,xmin,ymin,zmin,data); permU[i]->readdata = 1; // set readdata flag to have the dtor to free data if (i == 0){ sign = -1.0; } else { sign = 1.0; } /* Calculate observables */ for (j=0; j < alist[0]->number; j++){ energy[j] += sign * Vpmg_qfPermanentMultipoleEnergy(pmg[i], j); Vpmg_fieldSpline4(pmg[i], j, field); fld[j][0] += sign * field[0]; fld[j][1] += sign * field[1]; fld[j][2] += sign * field[2]; } if (!pmg[i]->pmgp->nonlin && (pmg[i]->surfMeth == VSM_SPLINE || pmg[i]->surfMeth == VSM_SPLINE3 || pmg[i]->surfMeth == VSM_SPLINE4)) { for (j=0; j < alist[0]->number; j++){ Vpmg_qfPermanentMultipoleForce(pmg[i], j, force, torque); rff[j][0] += sign * force[0]; rff[j][1] += sign * force[1]; rff[j][2] += sign * force[2]; rft[j][0] += sign * torque[0]; rft[j][1] += sign * torque[1]; rft[j][2] += sign * torque[2]; } kT = Vunit_kb * (1e-3) * Vunit_Na * 298.15 * 1.0/4.184; epsp = Vpbe_getSoluteDiel(pmg[i]->pbe); epsw = Vpbe_getSolventDiel(pmg[i]->pbe); if (VABS(epsp-epsw) > VPMGSMALL) { for (j=0; j < alist[0]->number; j++){ Vpmg_dbPermanentMultipoleForce(pmg[i], j, force); rff[j][0] += sign * force[0]; rff[j][1] += sign * force[1]; rff[j][2] += sign * force[2]; } } zkappa2 = Vpbe_getZkappa2(pmg[i]->pbe); if (zkappa2 > VPMGSMALL) { for (j=0; j < alist[0]->number; j++) { Vpmg_ibPermanentMultipoleForce(pmg[i], j, force); rff[j][0] += sign * force[0]; rff[j][1] += sign * force[1]; rff[j][2] += sign * force[2]; } } } } //nosh->ndiel = 2; //nosh->nkappa = 2; /* printf("Energy (multipole) %f Kcal/mol\n", *energy); printf("Energy (volume) %f Kcal/mol\n", evol * 0.5 * kT); */ // Convert results into kcal/mol units kT = Vunit_kb * (1e-3) * Vunit_Na * 298.15 * 1.0/4.184; // Electric converts from electron**2/Angstrom to kcal/mol electric = 332.063709; *total = 0.0; for (i=0; i<alist[0]->number; i++){ /* starting with the field in KT/e/Ang^2 multiply by kcal/mol/KT the field is then divided by "electric" to convert to e/Ang^2 */ energy[i] *= 0.5 * kT; *total += energy[i]; fld[i][0] *= kT / electric; fld[i][1] *= kT / electric; fld[i][2] *= kT / electric; rff[i][0] *= kT; rff[i][1] *= kT; rff[i][2] *= kT; rft[i][0] *= kT; rft[i][1] *= kT; rft[i][2] *= kT; } killMG(nosh, pbe, pmgp, pmg); }
void Jit::Comp_FPU2op(u32 op) { CONDITIONAL_DISABLE; int fs = _FS; int fd = _FD; // logBlocks = 1; switch (op & 0x3f) { case 4: //F(fd) = sqrtf(F(fs)); break; //sqrt fpr.MapDirtyIn(fd, fs); VSQRT(fpr.R(fd), fpr.R(fs)); break; case 5: //F(fd) = fabsf(F(fs)); break; //abs fpr.MapDirtyIn(fd, fs); VABS(fpr.R(fd), fpr.R(fs)); break; case 6: //F(fd) = F(fs); break; //mov fpr.MapDirtyIn(fd, fs); VMOV(fpr.R(fd), fpr.R(fs)); break; case 7: //F(fd) = -F(fs); break; //neg fpr.MapDirtyIn(fd, fs); VNEG(fpr.R(fd), fpr.R(fs)); break; case 12: //FsI(fd) = (int)floorf(F(fs)+0.5f); break; //round.w.s fpr.MapDirtyIn(fd, fs); VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED); break; case 13: //FsI(fd) = Rto0(F(fs))); break; //trunc.w.s fpr.MapDirtyIn(fd, fs); VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED | ROUND_TO_ZERO); break; case 14: //FsI(fd) = (int)ceilf (F(fs)); break; //ceil.w.s fpr.MapDirtyIn(fd, fs); MOVI2F(S0, 0.5f, R0); VADD(S0,fpr.R(fs),S0); VCVT(fpr.R(fd), S0, TO_INT | IS_SIGNED); break; case 15: //FsI(fd) = (int)floorf(F(fs)); break; //floor.w.s fpr.MapDirtyIn(fd, fs); MOVI2F(S0, 0.5f, R0); VSUB(S0,fpr.R(fs),S0); VCVT(fpr.R(fd), S0, TO_INT | IS_SIGNED); break; case 32: //F(fd) = (float)FsI(fs); break; //cvt.s.w fpr.MapDirtyIn(fd, fs); VCVT(fpr.R(fd), fpr.R(fs), TO_FLOAT | IS_SIGNED); break; case 36: //FsI(fd) = (int) F(fs); break; //cvt.w.s fpr.MapDirtyIn(fd, fs); LDR(R0, CTXREG, offsetof(MIPSState, fcr31)); AND(R0, R0, Operand2(3)); // MIPS Rounding Mode: // 0: Round nearest // 1: Round to zero // 2: Round up (ceil) // 3: Round down (floor) CMP(R0, Operand2(2)); SetCC(CC_GE); MOVI2F(S0, 0.5f, R1); SetCC(CC_GT); VSUB(S0,fpr.R(fs),S0); SetCC(CC_EQ); VADD(S0,fpr.R(fs),S0); SetCC(CC_GE); VCVT(fpr.R(fd), S0, TO_INT | IS_SIGNED); /* 2,3 */ SetCC(CC_AL); CMP(R0, Operand2(1)); SetCC(CC_EQ); VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED | ROUND_TO_ZERO); /* 1 */ SetCC(CC_LT); VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED); /* 0 */ SetCC(CC_AL); break; default: DISABLE; } }
/* * *************************************************************************** * Routine: Gem_makeBndExt * * Purpose: Mark selected boundary faces in a special way. * * Author: Michael Holst * *************************************************************************** */ VPUBLIC void Gem_makeBndExt(Gem *thee, int key) { int i, j, k, l, m, p, q, nabors, btype, done, btypeGeneric; VV *v[4]; SS *sm, *sm0, *sm1, *sm2; double x[4][3], xchk; /* go through all simplices and zero all boundary faces */ Vnm_print(0,"Gem_makeBnd: zeroing boundary faces/vertices.."); Gem_setNumBF(thee, 0); Gem_setNumBV(thee, 0); for (i=0; i<Gem_numSS(thee); i++) { sm = Gem_SS(thee,i); if ( (i>0) && (i % VPRTKEY) == 0 ) Vnm_print(0,"[BS:%d]",i); /* get local vertices */ for (j=0; j<Gem_dimVV(thee); j++) v[j] = SS_vertex(sm,j); /* reset all vertices and faces to interior type */ for (j=0; j<Gem_dimVV(thee); j++) { /* the other three local vertex/face numbers besides "j" */ k=(j+1) % Gem_dimVV(thee); l=(k+1) % Gem_dimVV(thee); m=(l+1) % Gem_dimVV(thee); SS_setFaceType(sm, j, 0); VV_setType(v[k], 0); VV_setType(v[l], 0); if (Gem_dim(thee) == 3) VV_setType(v[m], 0); } } Vnm_print(0,"..done.\n"); /* okay now make a boundary */ Vnm_print(0,"Gem_makeBnd: rebuilding boundary faces/vertices.."); for (i=0; i<Gem_numSS(thee); i++) { sm = Gem_SS(thee,i); if ( (i>0) && (i % VPRTKEY) == 0 ) Vnm_print(0,"[BS:%d]",i); /* get local vertices */ for (j=0; j<Gem_dimVV(thee); j++) v[j] = SS_vertex(sm,j); /* rebuild everything */ for (j=0; j<Gem_dimVV(thee); j++) { /* the other three local vertex/face numbers besides "j" */ k=(j+1) % Gem_dimVV(thee); l=(k+1) % Gem_dimVV(thee); m=(l+1) % Gem_dimVV(thee); /* look for a face nabor sharing face "j" (opposite vertex "j") */ nabors = 0; for (sm0=VV_firstSS(v[k]); sm0!=VNULL;sm0=SS_link(sm0,v[k])) { for (sm1=VV_firstSS(v[l]); sm1!=VNULL; sm1=SS_link(sm1,v[l])) { if (Gem_dim(thee) == 2) { if ((sm0!=sm) && (sm0==sm1)) nabors++; } else { for (sm2=VV_firstSS(v[m]); sm2!=VNULL; sm2=SS_link(sm2,v[m])) { if ((sm0!=sm) && (sm0==sm1) && (sm0==sm2)) { nabors++; } } } } } /* if no one there, then face "j" is actually a boundary face */ if (nabors == 0) { /* grab coordinates of the vertices of this face */ for (q=0; q<Gem_dim(thee); q++) { x[0][q] = VV_coord(v[k],q); } for (q=0; q<Gem_dim(thee); q++) { x[1][q] = VV_coord(v[l],q); } if (Gem_dim(thee) == 3) { for (q=0; q<Gem_dim(thee); q++) { x[2][q] = VV_coord(v[m],q); } } /* default is interior; should not occur! */ btypeGeneric = 18; done = 0; btype = btypeGeneric; /* ---------- check for base marking ---------- */ xchk = 0.0; for (p=0; p<Gem_dim(thee); p++) { xchk += VABS( x[p][1] - (-1.0) ); } if (xchk < VSMALL) { done = 1; btype = 1; } /* ---------- check for base marking again ---------- */ xchk = 0.0; for (p=0; p<Gem_dim(thee); p++) { xchk += VABS( x[p][1] - ( 0.0) ); } if (xchk < VSMALL) { done = 1; btype = 18; } /* ---------- check for first section ---------- */ if (!done) { done = 1; btype = 2; for (p=0; p<Gem_dim(thee); p++) { if (! ( ( 1.9 <= x[p][0]) && ( 6.1 >= x[p][0]) && (-VSMALL <= x[p][1]) && (-1.1 <= x[p][2]) && ( 1.1 >= x[p][2]) )) { done = 0; btype = btypeGeneric; } } if (done) { xchk = 0.0; for (p=0; p<Gem_dim(thee); p++) { xchk += VABS( x[p][1] - 10.0 ); } if (xchk < VSMALL) { btype = 10; } } } /* ---------- check for second section ---------- */ if (!done) { done = 1; btype = 4; for (p=0; p<Gem_dim(thee); p++) { if (! ( ( 7.9 <= x[p][0]) && (12.1 >= x[p][0]) && (-VSMALL <= x[p][1]) && (-1.1 <= x[p][2]) && ( 1.1 >= x[p][2]) )) { done = 0; btype = btypeGeneric; } } if (done) { xchk = 0.0; for (p=0; p<Gem_dim(thee); p++) { xchk += VABS( x[p][1] - 10.0 ); } if (xchk < VSMALL) { btype = 12; } } } /* ---------- check for third section ---------- */ if (!done) { done = 1; btype = 6; for (p=0; p<Gem_dim(thee); p++) { if (! ( (13.9 <= x[p][0]) && (18.1 >= x[p][0]) && (-VSMALL <= x[p][1]) && (-1.1 <= x[p][2]) && ( 1.1 >= x[p][2]) )) { done = 0; btype = btypeGeneric; } } if (done) { xchk = 0.0; for (p=0; p<Gem_dim(thee); p++) { xchk += VABS( x[p][1] - 10.0 ); } if (xchk < VSMALL) { btype = 14; } } } /* ---------- check for fourth section ---------- */ if (!done) { done = 1; btype = 8; for (p=0; p<Gem_dim(thee); p++) { if (! ( (19.9 <= x[p][0]) && (24.1 >= x[p][0]) && (-VSMALL <= x[p][1]) && (-1.1 <= x[p][2]) && ( 1.1 >= x[p][2]) )) { done = 0; btype = btypeGeneric; } } if (done) { xchk = 0.0; for (p=0; p<Gem_dim(thee); p++) { xchk += VABS( x[p][1] - 10.0 ); } if (xchk < VSMALL) { btype = 16; } } } /* should have been marked with SOME boundary type */ VASSERT( 0 != btype ); /* set the facetype */ SS_setFaceType(sm, j, btype); Gem_numBFpp(thee); /* set the vertex types (dirichlet overrides robin) */ if (!VDIRICHLET( VV_type(v[k])) ) { if (VINTERIOR( VV_type(v[k])) ) { Gem_numBVpp(thee); } VV_setType(v[k], btype); } if (!VDIRICHLET( VV_type(v[l])) ) { if (VINTERIOR( VV_type(v[l])) ) { Gem_numBVpp(thee); } VV_setType(v[l], btype); } if (Gem_dim(thee) == 3) { if (!VDIRICHLET( VV_type(v[m])) ) { if (VINTERIOR( VV_type(v[m])) ) { Gem_numBVpp(thee); } VV_setType(v[m], btype); } } } } } Vnm_print(0,"..done.\n"); }