Exemplo n.º 1
	void Jit::ApplyPrefixD(const u8 *vregs, VectorSize sz) {
		_assert_(js.prefixDFlag & ArmJitState::PREFIX_KNOWN);
		if (!js.prefixD) return;

		int n = GetNumVectorElements(sz);
		for (int i = 0; i < n; i++) 	{
			if (js.VfpuWriteMask(i))

			int sat = (js.prefixD >> (i * 2)) & 3;
			if (sat == 1) {
				// clamped = fabs(x) - fabs(x-0.5f) + 0.5f; // [ 0, 1]
				fpr.MapRegV(vregs[i], MAP_DIRTY);
				MOVI2F(S0, 0.5, R0);
				VABS(S1, fpr.V(vregs[i]));     // S1 = fabs(x)
				VSUB(S2, fpr.V(vregs[i]), S0); // S2 = fabs(x-0.5f) {VABD}
				VABS(S2, S2);
				VSUB(fpr.V(vregs[i]), S1, S2); // v[i] = S1 - S2 + 0.5f
				VADD(fpr.V(vregs[i]), fpr.V(vregs[i]), S0);
			} else if (sat == 3) {
				// clamped = fabs(x) - fabs(x-1.0f);        // [-1, 1]
				fpr.MapRegV(vregs[i], MAP_DIRTY);
				MOVI2F(S0, 1.0, R0);
				VABS(S1, fpr.V(vregs[i]));     // S1 = fabs(x)
				VSUB(S2, fpr.V(vregs[i]), S0); // S2 = fabs(x-1.0f) {VABD}
				VABS(S2, S2);
				VSUB(fpr.V(vregs[i]), S1, S2); // v[i] = S1 - S2
Exemplo n.º 2
void JitArm::ps_abs(UGeckoInstruction inst)

	u32 b = inst.FB, d = inst.FD;

	ARMReg vB0 = fpr.R0(b);
	ARMReg vB1 = fpr.R1(b);
	ARMReg vD0 = fpr.R0(d, false);
	ARMReg vD1 = fpr.R1(d, false);
	VABS(vD0, vB0);
	VABS(vD1, vB1);
Exemplo n.º 3
/* ///////////////////////////////////////////////////////////////////////////
// Routine:  Ju
// Purpose:  Evaluate the integrand J_k(u) of the energy functional J(u)
//           at the single point x.  This is your nonlinear energy
//           functional for which your weak form PDE below in Fu_v() is the
//           Euler condition.  (There may not be such a J(u) in all cases.)
//            /\              /\
//     J(u) = \  J_0(u) dx +  \  J_1(u) ds
//           \/m             \/dm
// Input:    PDE   = pointer to the PDE object
//           key   = integrand to evaluate (0=J_0, 1=J_1)
// Output:   Value of the integrand is returned
// Speed:    This function is called by MC once times for a single
//           quadrature point during assembly, and needs to be fast.
// Author:   Michael Holst
/////////////////////////////////////////////////////////////////////////// */
VPUBLIC double Ju(PDE *thee, int key)
    double value = 0.0;
    double mytime = PDE_getTime( thee );
    int ekey = PDE_getEnergyKey( thee );

    switch( ekey ) {
    case 0:
        /* interior form case */
        if (key == 0) {
            value = my_US(thee->dim, thee->vec, xq, mytime);
            value = VABS(value - U[0]);
        /* boundary form case */
        } else if (key == 1) {
            value = 0.0;
        } else { VASSERT(0); }
     case 1:
        value = 1.0;
        value = 0.0;

    return value;
Exemplo n.º 4
void JitArm::fabsx(UGeckoInstruction inst)

	ARMReg vD = fpr.R0(inst.FD);
	ARMReg vB = fpr.R0(inst.FB);

	VABS(vD, vB);

	if (inst.Rc) Helper_UpdateCR1(vD);
Exemplo n.º 5
	void Jit::ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz) {
		if (prefix == 0xE4) return;

		int n = GetNumVectorElements(sz);
		u8 origV[4];
		static const float constantArray[8] = {0.f, 1.f, 2.f, 0.5f, 3.f, 1.f/3.f, 0.25f, 1.f/6.f};

		for (int i = 0; i < n; i++)
			origV[i] = vregs[i];

		for (int i = 0; i < n; i++)
			int regnum = (prefix >> (i*2)) & 3;
			int abs    = (prefix >> (8+i)) & 1;
			int negate = (prefix >> (16+i)) & 1;
			int constants = (prefix >> (12+i)) & 1;

			// Unchanged, hurray.
			if (!constants && regnum == i && !abs && !negate)

			// This puts the value into a temp reg, so we won't write the modified value back.
			vregs[i] = fpr.GetTempV();
			fpr.MapRegV(vregs[i], MAP_NOINIT | MAP_DIRTY);

			if (!constants) {
				// Prefix may say "z, z, z, z" but if this is a pair, we force to x.
				// TODO: But some ops seem to use const 0 instead?
				if (regnum >= n) {
					ERROR_LOG_REPORT(CPU, "Invalid VFPU swizzle: %08x / %d", prefix, sz);
					regnum = 0;
				if (abs) {
					VABS(fpr.V(vregs[i]), fpr.V(origV[regnum]));
				} else {
					VMOV(fpr.V(vregs[i]), fpr.V(origV[regnum]));
			} else {
				// TODO: There is VMOV s, imm on ARM, that can generate some of these constants. Not 1/3 or 1/6 though.
				MOVI2F(fpr.V(vregs[i]), constantArray[regnum + (abs<<2)], R0);

			// TODO: This can be integrated into the VABS / VMOV above, and also the constants.
			if (negate)
				VNEG(fpr.V(vregs[i]), fpr.V(vregs[i]));

			// TODO: This probably means it will swap out soon, inefficiently...
Exemplo n.º 6
void Jit::ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz) {
    if (prefix == 0xE4) return;

    int n = GetNumVectorElements(sz);
    u8 origV[4];
    static const float constantArray[8] = {0.f, 1.f, 2.f, 0.5f, 3.f, 1.f/3.f, 0.25f, 1.f/6.f};

    for (int i = 0; i < n; i++)
        origV[i] = vregs[i];

    for (int i = 0; i < n; i++)
        int regnum = (prefix >> (i*2)) & 3;
        int abs    = (prefix >> (8+i)) & 1;
        int negate = (prefix >> (16+i)) & 1;
        int constants = (prefix >> (12+i)) & 1;

        // Unchanged, hurray.
        if (!constants && regnum == i && !abs && !negate)

        // This puts the value into a temp reg, so we won't write the modified value back.
        vregs[i] = fpr.GetTempV();
        if (!constants) {
            fpr.MapDirtyInV(vregs[i], origV[regnum]);

            // Prefix may say "z, z, z, z" but if this is a pair, we force to x.
            // TODO: But some ops seem to use const 0 instead?
            if (regnum >= n) {
                WARN_LOG(CPU, "JIT: Invalid VFPU swizzle: %08x : %d / %d at PC = %08x (%s)", prefix, regnum, n, js.compilerPC, currentMIPS->DisasmAt(js.compilerPC));
                regnum = 0;

            if (abs) {
                VABS(fpr.V(vregs[i]), fpr.V(origV[regnum]));
                if (negate)
                    VNEG(fpr.V(vregs[i]), fpr.V(vregs[i]));
            } else {
                if (negate)
                    VNEG(fpr.V(vregs[i]), fpr.V(origV[regnum]));
                    VMOV(fpr.V(vregs[i]), fpr.V(origV[regnum]));
        } else {
            fpr.MapRegV(vregs[i], MAP_DIRTY | MAP_NOINIT);
            MOVI2F(fpr.V(vregs[i]), constantArray[regnum + (abs<<2)], R0, negate);
Exemplo n.º 7
VPUBLIC double Vxnrm1(int *nx, int *ny, int *nz,
                      double *x) {

    double xnrm1 = 0.0;  ///< Accumulates the calculated normal value

    MAT3(x, *nx, *ny, *nz);

    // The indices used to traverse the matrices
    int i, j, k;

    /// @todo parallel optimization
    for(k=2; k<=*nz-1; k++)
        for(j=2; j<=*ny-1; j++)
            for(i=2; i<=*nx-1; i++)
                xnrm1 += VABS(VAT3(x, i, j, k));

    return xnrm1;
Exemplo n.º 8
 * ***************************************************************************
 * Routine:  Slu_lnDet
 * Purpose:  Calculate the log of the determinant of a factored matrix.
 * Notes:    UMFPACK has a built-in routine to compute the determinant,
 *           which returns both the mantissa and the exponent separately.
 *           This avoids most overflow and underflow problems.
 * Author:   Stephen Bond
 * ***************************************************************************
VPUBLIC double Slu_lnDet(Slu *thee)
    int status;
    double Mx, Ex, lndet;
    void *Numeric = thee->work;

    VASSERT( thee != VNULL );
    VASSERT( thee->statLU );

    /* Determinant = Mx * 10^Ex */
    status = umfpack_di_get_determinant( &Mx, &Ex, Numeric, VNULL );

    /* LOG(DET) = LOG(Mantissa) + Exponent*LOG(10) */
    lndet = VLOG(VABS(Mx)) + Ex*VLOG(10);

    if (UMFPACK_OK == status) {
        Vnm_print(0, "Slu_lnDet:  ln(det(A)) = %g\n", lndet);
        return lndet;
    } else {
        Vnm_print(0, "Slu_lnDet:  Failed!  Returning 1.0\n");
        return 1.0;
Exemplo n.º 9
 * ***************************************************************************
 * Routine:  Gem_formFix
 * Purpose:  Make some specified hacked fix to a given mesh.
 * Notes:    key==0 --> ?
 * Author:   Michael Holst
 * ***************************************************************************
VPUBLIC void Gem_formFix(Gem *thee, int key)
    int i, j, k, l, m, nabors, btype;
    double radk, radl, radm, myTol;
    VV *v[4];
    SS *sm, *sm0, *sm1, *sm2;

    /* input check and some i/o */
    btype = key;
    VASSERT( (0 <= btype) && (btype <= 2) );

    /* go through all simplices and zero all boundary faces */
    Vnm_print(0,"Gem_makeBnd: zeroing boundary faces/vertices..");
    Gem_setNumBF(thee, 0);
    Gem_setNumBV(thee, 0);
    for (i=0; i<Gem_numSS(thee); i++) {
        sm = Gem_SS(thee,i);
        if ( (i>0) && (i % VPRTKEY) == 0 ) Vnm_print(0,"[BS:%d]",i);

        /* get local vertices */
        for (j=0; j<Gem_dimVV(thee); j++)
            v[j] = SS_vertex(sm,j);

        /* reset all vertices and faces to interior type */
        for (j=0; j<Gem_dimVV(thee); j++) {
            /* the other three local vertex/face numbers besides "j" */
            k=(j+1) % Gem_dimVV(thee);
            l=(k+1) % Gem_dimVV(thee);
            m=(l+1) % Gem_dimVV(thee);
            SS_setFaceType(sm, j, 0);
            VV_setType(v[k], 0);
            VV_setType(v[l], 0);
            if (Gem_dim(thee) == 3) VV_setType(v[m], 0);

    /* are we done */
    /* if (btype == 0) return; */

    /* okay now make a boundary */
    Vnm_print(0,"Gem_makeBnd: rebuilding boundary faces/vertices..");
    for (i=0; i<Gem_numSS(thee); i++) {
        sm = Gem_SS(thee,i);
        if ( (i>0) && (i % VPRTKEY) == 0 ) Vnm_print(0,"[BS:%d]",i);

        /* get local vertices */
        for (j=0; j<Gem_dimVV(thee); j++)
            v[j] = SS_vertex(sm,j);

        /* rebuild everything */
        for (j=0; j<Gem_dimVV(thee); j++) {

            /* the other three local vertex/face numbers besides "j" */
            k=(j+1) % Gem_dimVV(thee);
            l=(k+1) % Gem_dimVV(thee);
            m=(l+1) % Gem_dimVV(thee);

            /* look for a face nabor sharing face "j" (opposite vertex "j") */
            nabors = 0;
            for (sm0=VV_firstSS(v[k]); sm0!=VNULL;sm0=SS_link(sm0,v[k])) {
                for (sm1=VV_firstSS(v[l]); sm1!=VNULL; sm1=SS_link(sm1,v[l])) {
                    if (Gem_dim(thee) == 2) {
                        if ((sm0!=sm) && (sm0==sm1)) nabors++;
                    } else {
                        for (sm2=VV_firstSS(v[m]); sm2!=VNULL; 
                          sm2=SS_link(sm2,v[m])) {
                            if ((sm0!=sm) && (sm0==sm1) && (sm0==sm2)) {

            /* if no one there, then face "j" is actually a boundary face */
            if (nabors == 0) {

                myTol = 1.0e-2;

                if ( ( VABS(VV_coord(v[k],2) - 0.0) < myTol) 
                  && ( VABS(VV_coord(v[l],2) - 0.0) < myTol)
                  && ( VABS(VV_coord(v[m],2) - 0.0) < myTol) ) {
                    btype = 1;
                } else if ( ( VABS(VV_coord(v[k],2) - 68.03512) < myTol)
                         && ( VABS(VV_coord(v[l],2) - 68.03512) < myTol) 
                         && ( VABS(VV_coord(v[m],2) - 68.03512) < myTol) ) {
                    btype = 3;
                } else {
                    radk = VSQRT( VSQR( VV_coord(v[k],0) )
                                + VSQR( VV_coord(v[k],1) ) );
                    radl = VSQRT( VSQR( VV_coord(v[l],0) )
                                + VSQR( VV_coord(v[l],1) ) );
                    radm = VSQRT( VSQR( VV_coord(v[m],0) )
                                + VSQR( VV_coord(v[m],1) ) );
                    if ( ( VABS(radk - 1.5) < myTol)
                      && ( VABS(radl - 1.5) < myTol)
                      && ( VABS(radm - 1.5) < myTol) ) {
                        btype = 2;
                    } else if ( ( VABS(radk - 2.0) < myTol)
                            &&  ( VABS(radl - 2.0) < myTol)
                            &&  ( VABS(radm - 2.0) < myTol) ){
                        btype = 4;
                    } else {
                        btype = 0;

                SS_setFaceType(sm, j, btype);

                if (VINTERIOR( VV_type(v[k])) ) {
                    VV_setType(v[k], btype);
                if (VINTERIOR( VV_type(v[l])) ) {
                    VV_setType(v[l], btype);
                if (Gem_dim(thee) == 3) {
                    if (VINTERIOR( VV_type(v[m])) ) {
                        VV_setType(v[m], btype);
Exemplo n.º 10
 * ***************************************************************************
 * Routine:  Mat_printLN
 * Purpose:  Print an LN format matrix as a DENSE matrix in MATLAB format.
 * Author:   Stephen Bond and Michael Holst
 * ***************************************************************************
VPUBLIC void Mat_printLN(Mat *thee)
    int i, j;
    int numR, numC;
    char rn[80];
    const int MaxRows = 30;
    const int MaxCols = 30;
    double matrix[30][30];
    LinkA *mt;
    LinkRC *mtX;

    numR = thee->numR;
    numC = thee->numC;


    /* some i/o */
    Vnm_print(0, "%s printing <%s>" " [dim=(%dx%d),sym=%d,numA=%d]\n",
        rn, thee->name, numR, numC, thee->sym, thee->numA);

    /* size check */
    if ((numR > MaxRows) || (numC > MaxCols)) {
        Vnm_print(0, "%smatrix too large to view....skipping.\n", rn);

    /* make a dense matrix first */
    for (i=0; i<numR; i++)
        for (j=0; j<numC; j++)
            matrix[i][j] = 0.0;

    if (thee->state != NULL_STATE) {

        switch (thee->format) {

          case RLN_FORMAT:
            for (i=0; i<numR; i++) {
                for (mt=(LinkA*)Vset_access(thee->lnkU,i);
                     mt!=VNULL; mt=mt->next) {
                    if (mt->idx >= 0) {
                        j = mt->idx;
                        matrix[i][j] = mt->val;

          case CLN_FORMAT:
            for (j=0; j<numC; j++) {
                for (mt=(LinkA*)Vset_access(thee->lnkL,j);
                     mt!=VNULL; mt=mt->next) {
                    if (mt->idx >= 0) {
                        i = mt->idx;
                        matrix[i][j] = mt->val;

          case XLN_FORMAT:
            for (i=0; i<numR; i++) {
                if ( thee->sym == ISNOT_SYM ) {
                    mtX = ((LinkRC**) thee->xln)[i];
                } else {
                    mtX = (LinkRC*) &( ((LinkRCS*) thee->xln)[i] );
                    matrix[i][i] = ((LinkRCS*) mtX)->val;
                    mtX = mtX->next;
                for ( /* no-op */ ; mtX!=VNULL; mtX=mtX->next) {
                    j = mtX->idx;
                    if (j < numC) {
                        if ( thee->sym == ISNOT_SYM ) {
                            matrix[i][j] = ((LinkRCS*) mtX)->val;
                        } else if ( thee->sym == IS_SYM ) {
                            matrix[i][j] = ((LinkRCS*) mtX)->val;
                            matrix[j][i] = ((LinkRCS*) mtX)->val;
                        } else {
                            matrix[i][j] = ((LinkRCN*) mtX)->val;
                            matrix[j][i] = ((LinkRCN*) mtX)->valT;
              "%smatrix not in correct format to print....skipping.\n", rn);

    /* print the matrix */
    Vnm_print(0, "%s = [\n", thee->name);
    for (i=0; i<numR; i++) {
        for (j=0; j<numC; j++) {
            if (VABS(matrix[i][j]) < 0.0001) {
                Vnm_print(0, "  0.0  ");
            } else {
                Vnm_print(0, "%7.3f", matrix[i][j]);
        Vnm_print(0, "\n");
    Vnm_print(0, "];\n");
Exemplo n.º 11
VPUBLIC void Vpower(int *nx, int *ny, int *nz,
        int *iz, int *ilev,
        int *ipc, double *rpc, double *ac, double *cc,
        double *w1, double *w2, double *w3, double *w4,
        double *eigmax, double *eigmax_model, double *tol,
        int *itmax, int *iters, int *iinfo) {

    int lev, level;
    double denom, fac, rho, oldrho, error, relerr;

    /// @todo  Just use a constant definition of PI here
    double pi = 4.0 * atan( 1.0 );

    // Utility variables
    int skipIters = 0;
    double alpha;

    MAT2(iz, 50, 1);


    // Recover level information
    level = 1;
    lev   = (*ilev - 1) + level;

    // Seed vector: random to contain all components

    Vaxrand(nx, ny, nz, w1);

    Vazeros(nx, ny, nz, w2);
    Vazeros(nx, ny, nz, w3);
    Vazeros(nx, ny, nz, w4);

    // Compute raleigh quotient with the seed vector
    denom = Vxnrm2(nx, ny, nz, w1);
    fac = 1.0 / denom;
    Vxscal(nx, ny, nz, &fac, w1);
    Vmatvec(nx, ny, nz,
            RAT(ipc, VAT2(iz, 5, lev)), RAT(rpc, VAT2(iz, 6,lev)),
             RAT(ac, VAT2(iz, 7, lev)),  RAT(cc, VAT2(iz, 1,lev)), w1, w2);
    oldrho = Vxdot(nx, ny, nz, w1, w2);

    // I/O
    if (oldrho == 0.0) {
        if (*iinfo > 3)  {
            Vnm_print(2, "POWER: iter: estimate = %d %g\n", *iters, oldrho);
        rho = oldrho;
    } else {

        // Main iteration
        *iters = 0;
        while(1) {

            // Apply the matrix A
            Vmatvec(nx, ny, nz,
                    RAT(ipc, VAT2(iz, 5, lev)), RAT(rpc, VAT2(iz, 6, lev)),
                     RAT(ac, VAT2(iz, 7, lev)),  RAT(cc, VAT2(iz, 1, lev)), w1, w2);

            Vxcopy(nx, ny, nz, w2, w1);

            // Normalize the new vector
            denom = Vxnrm2(nx, ny, nz, w1);
            fac = 1.0 / denom;
            Vxscal(nx, ny, nz, &fac, w1);

            // Compute the new raleigh quotient
            Vmatvec(nx, ny, nz,
                    RAT(ipc, VAT2(iz, 5, lev)), RAT(rpc, VAT2(iz, 6, lev)),
                    RAT( ac, VAT2(iz, 7, lev)), RAT( cc, VAT2(iz, 1, lev)), w1, w2);
            rho = Vxdot(nx, ny, nz, w1, w2);

            // Stopping test ***
            // w2=A*x, w1=x, stop = 2-norm(A*x-lamda*x)

            Vxcopy(nx, ny, nz, w1, w3);
            Vxcopy(nx, ny, nz, w2, w4);
            Vxscal(nx, ny, nz, &rho, w3);
            alpha = -1.0;
            Vxaxpy(nx, ny, nz, &alpha, w3, w4);
            error = Vxnrm2(nx, ny, nz, w4);
            relerr = VABS(rho - oldrho ) / VABS( rho );

            // I/O
            if (*iinfo > 3) {

                Vnm_print(2, "POWER:  iters  =%d\n", *iters);
                Vnm_print(2, "        error  =%g\n", error);
                Vnm_print(2, "        relerr =%g\n", relerr);
                Vnm_print(2, "        rho    =%g\n", rho);

            if( relerr < *tol || *iters == *itmax)

            oldrho = rho;

    // Return some stuff ***
    *eigmax = rho;
    fac = VPOW(2.0, *ilev - 1);
    *eigmax_model = fac * (6.0 - 2.0 * VCOS((*nx - 2) * pi / (*nx - 1))
                                     - 2.0 * VCOS((*ny - 2) * pi / (*ny - 1)));
Exemplo n.º 12
VPUBLIC void Vipower(int *nx,int *ny,int *nz,
        double *u, int *iz,
        double *w0, double *w1, double *w2, double *w3, double *w4,
        double *eigmin, double *eigmin_model, double *tol,
        int *itmax, int *iters,
        int *nlev, int *ilev, int *nlev_real, int *mgsolv,
        int *iok, int *iinfo, double *epsiln, double *errtol, double *omega,
        int *nu1, int *nu2, int *mgsmoo,
        int *ipc, double *rpc,
        double *pc, double *ac, double *cc, double *tru) {

    int level, lev;
    double denom, fac, rho, oldrho;
    double error, relerr, errtol_s;
    int itmax_s, iters_s, ierror_s, iok_s, iinfo_s, istop_s;
    int nu1_s, nu2_s, mgsmoo_s;

    /// @todo  Just use a constant definition of PI here
    double pi = 4.0 * atan( 1.0 );

    // Utility variables
    double alpha;

    MAT2(iz, 50, 1);


    // Recover level information
    level = 1;
    lev   = (*ilev - 1) + level;

    // Seed vector: random to contain all components
    Vaxrand(nx, ny, nz, w1);
    Vazeros(nx, ny, nz, w2);
    Vazeros(nx, ny, nz, w3);
    Vazeros(nx, ny, nz, w4);
    Vazeros(nx, ny, nz, RAT(w0, VAT2(iz, 1, lev)));
    Vazeros(nx, ny, nz, RAT( u, VAT2(iz, 1, lev)));

    // Compute raleigh quotient with the seed vector ***
    denom = Vxnrm2(nx, ny, nz, w1);
    fac = 1.0 / denom;
    Vxscal(nx, ny, nz, &fac, w1);
    Vmatvec(nx, ny, nz,
            RAT(ipc, VAT2(iz, 5, lev)), RAT(rpc, VAT2(iz, 6, lev)),
            RAT( ac, VAT2(iz, 7, lev)), RAT( cc, VAT2(iz, 1, lev)), w1, w2);
    oldrho = Vxdot(nx, ny, nz, w1, w2);

    // I/O
    if (oldrho == 0.0) {
           if (*iinfo > 3) {
               Vnm_print(2, "Vipower: iters=%d\n",    *iters);
               Vnm_print(2, "         estimate=%f\n", oldrho);
       rho = oldrho;
    } else {

        //main iteration
        *iters = 0;
        while (1) {

            // Apply the matrix A^{-1} (using MG solver)
               itmax_s = 100;
               iters_s = 0;
               ierror_s = 0;
               iok_s = 0;
               iinfo_s = 0;
               istop_s = 0;
            mgsmoo_s = 1;
            nu1_s = 1;
            nu2_s = 1;
            errtol_s = *epsiln;

            Vxcopy(nx, ny, nz, w1, RAT(w0, VAT2(iz, 1,lev)));
            Vmvcs(nx, ny, nz, u, iz,
                    w1, w2, w3, w4,
                    &istop_s, &itmax_s, &iters_s, &ierror_s,
                    nlev, ilev, nlev_real, mgsolv,
                    &iok_s, &iinfo_s, epsiln,
                    &errtol_s, omega, &nu1_s, &nu2_s, &mgsmoo_s,
                    ipc, rpc, pc, ac, cc, w0, tru);
            Vxcopy(nx, ny, nz, RAT(u, VAT2(iz, 1, lev)), w1);

            // Normalize the new vector
            denom = Vxnrm2(nx, ny, nz, w1);
            fac = 1.0 / denom;
            Vxscal(nx, ny, nz, &fac, w1);

            // Compute the new raleigh quotient
            Vmatvec(nx, ny, nz,
                    RAT(ipc, VAT2(iz, 5, lev)), RAT(rpc, VAT2(iz, 6, lev)),
                     RAT(ac, VAT2(iz, 7,lev)),   RAT(cc, VAT2(iz, 1, lev)), w1, w2);
            rho = Vxdot(nx, ny, nz, w1, w2);

            // Stopping test
            // w2=A*x, w1=x, stop = 2-norm(A*x-lamda*x) ***
            Vxcopy(nx, ny, nz, w1, w3);
            Vxcopy(nx, ny, nz, w2, w4);
            Vxscal(nx, ny, nz, &rho, w3);
            alpha = -1.0;
            Vxaxpy(nx, ny, nz, &alpha, w3, w4);
            error = Vxnrm2(nx, ny, nz, w4);
            relerr = VABS(rho - oldrho ) / VABS( rho );

            // I/O
            if (*iinfo > 3) {

                Vnm_print(2, "POWER:  iters  =%d\n", *iters);
                Vnm_print(2, "        error  =%g\n", error);
                Vnm_print(2, "        relerr =%g\n", relerr);
                Vnm_print(2, "        rho    =%g\n", rho);

            if (relerr < *tol || *iters == *itmax)

            oldrho = rho;

    // Return some stuff
    *eigmin = rho;
    fac = VPOW(2.0, *ilev - 1);
    *eigmin_model = fac * (6.0 - 2.0 * VCOS(pi / (*nx - 1))
                               - 2.0 * VCOS(pi / (*ny - 1))
                               - 2.0 * VCOS(pi / (*nz - 1)));
Exemplo n.º 13
VEXTERNC void Vmpower(int *nx, int *ny, int *nz,
        double *u, int *iz,
        double *w0, double *w1, double *w2, double *w3, double *w4,
        double *eigmax, double *tol,
        int *itmax, int *iters, int *nlev, int *ilev, int *nlev_real,
        int *mgsolv, int *iok, int *iinfo,
        double *epsiln, double *errtol, double *omega,
        int *nu1, int *nu2, int *mgsmoo, int *ipc, double *rpc,
        double *pc, double *ac, double *cc, double *fc, double *tru) {

    // Local variables
    int lev, level;
    double denom, fac, rho, oldrho, error;
    double relerr;
    int itmax_s, iters_s, ierror_s, iok_s, iinfo_s, istop_s;
    double alpha;

    MAT2(iz, 50, 1);

    // Recover level information
    level = 1;
    lev   = (*ilev - 1) + level;

    // Seed vector: random to contain all components
    Vaxrand(nx, ny, nz, w1);
    Vazeros(nx, ny, nz, w2);
    Vazeros(nx, ny, nz, w3);
    Vazeros(nx, ny, nz, w4);
    Vazeros(nx, ny, nz, RAT(u, VAT2(iz, 1, lev)));

    // NOTE: we destroy "fc" on this level due to lack of vectors... ***
    Vazeros(nx,ny,nz,RAT(fc, VAT2(iz, 1, lev)));

    // Normalize the seed vector
    denom = Vxnrm2(nx, ny, nz, w1);
    fac = 1.0 / denom;
    Vxscal(nx, ny, nz, &fac, w1);

    // Compute raleigh quotient with the seed vector
    Vxcopy(nx, ny, nz, w1, RAT(u, VAT2(iz, 1, lev)));
    itmax_s = 1;
    iters_s = 0;
    ierror_s = 0;
    iok_s = 0;
    iinfo_s = 0;
    istop_s = 1;
    Vmvcs(nx, ny, nz,
            u, iz, w0, w2, w3, w4,
            &istop_s, &itmax_s, &iters_s, &ierror_s,
            nlev, ilev, nlev_real, mgsolv,
            &iok_s, &iinfo_s,
            epsiln, errtol, omega, nu1, nu2, mgsmoo,
            ipc, rpc,
            pc, ac, cc, fc, tru);
    oldrho = Vxdot(nx, ny, nz, w1, RAT(u, VAT2(iz, 1, lev)));

    // I/O
    if (oldrho == 0.0) {
       if (*iinfo > 3) {
           Vnm_print(2, "Vmp0ower: iter=%d, estimate=%f", *iters, oldrho);
       rho = oldrho;

    } else {

        // Main iteration
        *iters = 0;
        while (1) {

            // Apply the matrix M
           Vxcopy(nx, ny, nz, w1, RAT(u, VAT2(iz, 1, lev)));
           itmax_s = 1;
           iters_s = 0;
           ierror_s = 0;
           iok_s = 0;
           iinfo_s = 0;
           istop_s = 1;
           Vmvcs(nx, ny, nz,
                   u, iz, w1, w2, w3, w4,
                   &istop_s, &itmax_s, &iters_s, &ierror_s,
                   nlev, ilev, nlev_real, mgsolv,
                   &iok_s, &iinfo_s,
                   epsiln, errtol, omega, nu1, nu2, mgsmoo,
                   ipc, rpc,
                   pc, ac, cc, fc, tru);
           Vxcopy(nx, ny, nz, RAT(u, VAT2(iz, 1, lev)), w1);

           // Normalize the new vector
           denom = Vxnrm2(nx, ny, nz, w1);
           fac = 1.0 / denom;
           Vxscal(nx, ny, nz, &fac, w1);

           // Compute the new raleigh quotient
           Vxcopy(nx, ny, nz, w1, RAT(u, VAT2(iz, 1, lev)));
           itmax_s = 1;
           iters_s = 0;
           ierror_s = 0;
           iok_s = 0;
           iinfo_s = 0;
           istop_s = 1;
           Vmvcs(nx, ny, nz,
                   u, iz, w0, w2, w3, w4,
                   &istop_s, &itmax_s, &iters_s, &ierror_s,
                   nlev, ilev, nlev_real, mgsolv,
                   &iok_s, &iinfo_s,
                   epsiln, errtol, omega, nu1, nu2, mgsmoo,
                   ipc, rpc,
                   pc, ac, cc, fc, tru);
           Vxcopy(nx, ny, nz, RAT(u, VAT2(iz, 1, lev)), w2);
           rho = Vxdot(nx, ny, nz, w1, w2);

           // Stopping test
           // w2=A*x, w1=x, stop = 2-norm(A*x-lamda*x)
           alpha = -1.0;
           Vxcopy(nx, ny, nz, w1, w3);
           Vxcopy(nx, ny, nz, w2, w4);
           Vxscal(nx, ny, nz, &rho, w3);
           Vxaxpy(nx, ny, nz, &alpha, w3, w4);
           error = Vxnrm2(nx, ny, nz, w4);
           relerr = VABS( rho - oldrho ) / VABS( rho );

           // I/O
           if (*iinfo > 3) {
               Vnm_print(2, "Vmpower: iter=%d; error=%f; relerr=%f; estimate=%f",
                       *iters, error, relerr, rho);

           if ((relerr < *tol) || (*iters == *itmax)) {

           oldrho = rho;

    *eigmax = rho;
Exemplo n.º 14
tunAutoconfigDeviceRec (LocalDevicePtr local, TunDevicePtr tun, TunDeviceInfo info)
  if (TUN_DEVICE_TEST_VAL_REL (info, REL_X) &&
      TUN_DEVICE_TEST_VAL_REL (info, REL_Y) &&
      OTLOG (local, "I found Rel(X,Y), Button(Left)");
      OTLOG (local, "I think it is a mouse! :)");
      VREL (REL_X, 0);
      VREL (REL_Y, 1);

	  OTLOG (local, "I found mouse wheel - mapping to BUTTON 4 & 5");
	  RVALUATOR (REL_WHEEL). mouse_wheel_hack = TRUE;

      tun->is_absolute = FALSE;

  if (TUN_DEVICE_TEST_VAL_ABS (info, ABS_X) &&
      TUN_DEVICE_TEST_VAL_ABS (info, ABS_Y) &&
      OTLOG (local, "I found Abs (X,Y), Button(Touch)");
      OTLOG (local, "I think it is a Tablet! :)");

      VABS (ABS_X,0);	
      VABS (ABS_Y,1);

      TLOG ("Reverse Y coordinate (tablets have 0,0 in left lower corner)");
      AVALUATOR (ABS_Y).upsidedown = TRUE;



      BUTTON (BTN_TOUCH, 1);

	  OTLOG (local, "Proximity event using ToolPen button");
	  tun->lbut_to_xbut_tbl [BTN_TOOL_PEN - tun->first_lbutton] = TUN_BUTTON_PROXIMITY;

      tun->is_absolute = TRUE;

  if (TUN_DEVICE_TEST_VAL_ABS (info, ABS_X) &&
      TUN_DEVICE_TEST_VAL_ABS (info, ABS_Y) &&
      TUN_DEVICE_TEST_VAL_ABS (info, ABS_Z) &&
      AVALUATOR(ABS_X).min ==  - AVALUATOR(ABS_X).max &&
      AVALUATOR(ABS_Y).min ==  - AVALUATOR(ABS_Y).max &&
      AVALUATOR(ABS_Z).min ==  - AVALUATOR(ABS_Z).max &&
      AVALUATOR(ABS_RX).min == - AVALUATOR(ABS_RX).max &&
      AVALUATOR(ABS_RY).min == - AVALUATOR(ABS_RY).max &&
      OTLOG (local, "found Abs (X,Y,Z,RX,RY,RZ)");
      OTLOG (local, "I think it is some sort of 6DO device! :)");

      VABSASREL (ABS_X, 0);
      VABSASREL (ABS_Y, 1);
      VABSASREL (ABS_Z, 2);

      VABSASREL (ABS_RX, 3);
      VABSASREL (ABS_RY, 4);
      VABSASREL (ABS_RZ, 5);

      tun->is_absolute = FALSE;
Exemplo n.º 15
void pbdirectpolforce_(double uind[maxatm][3], double uinp[maxatm][3],
                       double rff[maxatm][3], double rft[maxatm][3]) {

    Vpmg  *pmg[NOSH_MAXCALC];
    Vpmgp *pmgp[NOSH_MAXCALC];
    Vpbe  *pbe[NOSH_MAXCALC];
    MGparm *mgparm = VNULL;
    PBEparm *pbeparm = VNULL;
    Vatom *atom = VNULL;
    double kT, force[3], torque[3];
    double sign, zkappa2, epsp, epsw;
    int i,j;

    for (i=0; i<NOSH_MAXCALC; i++) {
       pmg[i] = VNULL;
       pmgp[i] = VNULL;
       pbe[i] = VNULL;

    // Read the converged induced dipole data into APBS Vatom structures.
    for (i=0; i < alist[0]->number; i++){
       atom = Valist_getAtom(alist[0],i);
       Vatom_setInducedDipole(atom, uind[i]);
       Vatom_setNLInducedDipole(atom, uinp[i]);
       for (j=0;j<3;j++){
          rff[i][j] = 0.0;
          rft[i][j] = 0.0;

    for (i=0; i<2; i++) {

       VASSERT(permU[i] != VNULL);
       VASSERT(indU[i] != VNULL);
       VASSERT(nlIndU[i] != VNULL);

       pmg[i] = VNULL;
       pmgp[i] = VNULL;
       pbe[i] = VNULL;

       /* Useful local variables */
       mgparm = nosh->calc[i]->mgparm;
       pbeparm = nosh->calc[i]->pbeparm;

       /* Set up problem */
       if (!initMG(i, nosh, mgparm, pbeparm, realCenter, pbe,
                   alist, dielXMap, dielYMap, dielZMap,
                   kappaMap, chargeMap, pmgp, pmg, potMap)) {
           Vnm_tprint( 2, "Error setting up MG calculation!\n");

       if (i == 0) {
         sign = -1.0;
       } else {
         sign = 1.0;

       // Q-Phi Force & Torque
       if (!pmg[i]->pmgp->nonlin &&
          (pmg[i]->surfMeth == VSM_SPLINE ||
           pmg[i]->surfMeth == VSM_SPLINE3 ||
           pmg[i]->surfMeth == VSM_SPLINE4)) {
          for (j=0; j < alist[0]->number; j++){
             Vpmg_qfDirectPolForce(pmg[i], permU[i], indU[i], j, force, torque);
             rff[j][0] += sign * force[0];
             rff[j][1] += sign * force[1];
             rff[j][2] += sign * force[2];
             rft[j][0] += sign * torque[0];
             rft[j][1] += sign * torque[1];
             rft[j][2] += sign * torque[2];
             Vpmg_qfNLDirectPolForce(pmg[i], permU[i],
                                     nlIndU[i], j,force,torque);
             rff[j][0] += sign * force[0];
             rff[j][1] += sign * force[1];
             rff[j][2] += sign * force[2];
             rft[j][0] += sign * torque[0];
             rft[j][1] += sign * torque[1];
             rft[j][2] += sign * torque[2];
           // Dieletric Boundary Force
           epsp = Vpbe_getSoluteDiel(pmg[i]->pbe);
           epsw = Vpbe_getSolventDiel(pmg[i]->pbe);
           if (VABS(epsp-epsw) > VPMGSMALL) {
              for (j=0; j < alist[0]->number; j++){
                 Vpmg_dbDirectPolForce(pmg[i], permU[i], indU[i], j, force);
                 rff[j][0] += sign * force[0];
                 rff[j][1] += sign * force[1];
                 rff[j][2] += sign * force[2];
                 Vpmg_dbNLDirectPolForce(pmg[i], permU[i], nlIndU[i], j, force);
                 rff[j][0] += sign * force[0];
                 rff[j][1] += sign * force[1];
                 rff[j][2] += sign * force[2];
           // Ionic Boundary Force
           zkappa2 = Vpbe_getZkappa2(pmg[i]->pbe);
           if (zkappa2 > VPMGSMALL) {
               for (j=0; j < alist[0]->number; j++){
                  Vpmg_ibDirectPolForce(pmg[i], permU[i], indU[i], j, force);
                  rff[j][0] += sign * force[0];
                  rff[j][1] += sign * force[1];
                  rff[j][2] += sign * force[2];
                  Vpmg_ibNLDirectPolForce(pmg[i], permU[i],
                                          nlIndU[i], j, force);
                  rff[j][0] += sign * force[0];
                  rff[j][1] += sign * force[1];
                  rff[j][2] += sign * force[2];

    // kT in kcal/mol
    kT = Vunit_kb * (1e-3) * Vunit_Na * 298.15 / 4.184;
    for (i=0; i<alist[0]->number; i++){
       rff[i][0] *= kT;
       rff[i][1] *= kT;
       rff[i][2] *= kT;
       rft[i][0] *= kT;
       rft[i][1] *= kT;
       rft[i][2] *= kT;

    killMG(nosh, pbe, pmgp, pmg);
Exemplo n.º 16
void JitArm::fctiwx(UGeckoInstruction inst)
	u32 b = inst.FB;
	u32 d = inst.FD;

	ARMReg vB = fpr.R0(b);
	ARMReg vD = fpr.R0(d);
	ARMReg V0 = fpr.GetReg();
	ARMReg V1 = fpr.GetReg();
	ARMReg V2 = fpr.GetReg();

	ARMReg rA = gpr.GetReg();
	ARMReg fpscrReg = gpr.GetReg();

	FixupBranch DoneMax, DoneMin;
	LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
	MOVI2R(rA, (u32)minmaxFloat);

	// Check if greater than max float
		VLDR(V0, rA, 8); // Load Max
		VCMPE(vB, V0);
		VMRS(_PC); // Loads in to APSR
		FixupBranch noException = B_CC(CC_LE);
		VMOV(vD, V0); // Set to max
		SetFPException(fpscrReg, FPSCR_VXCVI);
		DoneMax = B();
	// Check if less than min float
		VLDR(V0, rA, 0);
		VCMPE(vB, V0);
		FixupBranch noException = B_CC(CC_GE);
		VMOV(vD, V0);
		SetFPException(fpscrReg, FPSCR_VXCVI);
		DoneMin = B();
	// Within ranges, convert to integer
	// Set rounding mode first
	// PPC <-> ARM rounding modes
	// 0, 1, 2, 3 <-> 0, 3, 1, 2
	ARMReg rB = gpr.GetReg();
	// Bits 22-23
	BIC(rA, rA, Operand2(3, 5));

	LDR(rB, R9, PPCSTATE_OFF(fpscr));
	AND(rB, rB, 0x3); // Get the FPSCR rounding bits
	CMP(rB, 1);
	SetCC(CC_EQ); // zero
		ORR(rA, rA, Operand2(3, 5));
		CMP(rB, 2); // +inf
			ORR(rA, rA, Operand2(1, 5));
			CMP(rB, 3); // -inf
				ORR(rA, rA, Operand2(2, 5));
	ORR(rA, rA, Operand2(3, 5));
	VCMPE(vD, vB);

		BIC(fpscrReg, fpscrReg, FRFIMask);
		FixupBranch DoneEqual = B();
	SetFPException(fpscrReg, FPSCR_XX);
	ORR(fpscrReg, fpscrReg, FIMask);
	VABS(V1, vB);
	VABS(V2, vD);
	VCMPE(V2, V1);
		ORR(fpscrReg, fpscrReg, FRMask);


	MOVI2R(rA, (u32)&doublenum);
	VLDR(V0, rA, 0);
	NEONXEmitter nemit(this);
	nemit.VORR(vD, vD, V0);

	if (inst.Rc) Helper_UpdateCR1(fpscrReg, rA);

	STR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
Exemplo n.º 17
int ArmJit::Replace_fabsf() {
	fpr.MapDirtyIn(0, 12);
	VABS(fpr.R(0), fpr.R(12));
	return 4;  // Number of instructions in the MIPS function
Exemplo n.º 18
/* ///////////////////////////////////////////////////////////////////////////
// Routine:  Vpee_markRefine
// Author:   Nathan Baker (and Michael Holst: the author of AM_markRefine, on
//           which this is based)
/////////////////////////////////////////////////////////////////////////// */
VPUBLIC int Vpee_markRefine(Vpee *thee,
                            AM *am,
                            int level,
                            int akey,
                            int rcol,
                            double etol,
                            int bkey
                            ) {

    Aprx *aprx;
    int marked = 0,
    double minError = 0.0,
           maxError = 0.0,
           errEst = 0.0,
    SS *sm;

    VASSERT(thee != VNULL);

    /* Get the Aprx object from AM */
    aprx = am->aprx;

    /* input check and some i/o */
    if ( ! ((-1 <= akey) && (akey <= 4)) ) {
        Vnm_print(0,"Vpee_markRefine: bad refine key; simplices marked = %d\n",
        return marked;

    /* For uniform markings, we have no effect */
    if ((-1 <= akey) && (akey <= 0)) {
        marked = Gem_markRefine(thee->gm, akey, rcol);
        return marked;

    /* Informative I/O */
    if (akey == 2) {
        Vnm_print(0,"Vpee_estRefine: using Aprx_estNonlinResid().\n");
    } else if (akey == 3) {
        Vnm_print(0,"Vpee_estRefine: using Aprx_estLocalProblem().\n");
    } else if (akey == 4) {
        Vnm_print(0,"Vpee_estRefine: using Aprx_estDualProblem().\n");
    } else {
        Vnm_print(0,"Vpee_estRefine: bad key given; simplices marked = %d\n",
        return marked;
    if (thee->killFlag == 0) {
        Vnm_print(0, "Vpee_markRefine: No error attenuation -- simplices in all partitions will be marked.\n");
    } else if (thee->killFlag == 1) {
        Vnm_print(0, "Vpee_markRefine: Maximum error attenuation -- only simplices in local partition will be marked.\n");
    } else if (thee->killFlag == 2) {
        Vnm_print(0, "Vpee_markRefine: Spherical error attenutation -- simplices within a sphere of %4.3f times the size of the partition will be marked\n",
    } else if (thee->killFlag == 2) {
        Vnm_print(0, "Vpee_markRefine: Neighbor-based error attenuation -- simplices in the local and neighboring partitions will be marked [NOT IMPLEMENTED]!\n");
    } else {
        Vnm_print(2,"Vpee_markRefine: bogus killFlag given; simplices marked = %d\n",
        return marked;

    /* set the barrier type */
    mlevel = (etol*etol) / Gem_numSS(thee->gm);
    if (bkey == 0) {
        barrier = (etol*etol);
        Vnm_print(0,"Vpee_estRefine: forcing [err per S] < [TOL] = %g\n",
    } else if (bkey == 1) {
        barrier = mlevel;
        Vnm_print(0,"Vpee_estRefine: forcing [err per S] < [(TOL^2/numS)^{1/2}] = %g\n",
    } else {
        Vnm_print(0,"Vpee_estRefine: bad bkey given; simplices marked = %d\n",
        return marked;

    /* timer */
    Vnm_tstart(30, "error estimation");

    /* count = num generations to produce from marked simplices (minimally) */
    count = 1; /* must be >= 1 */

    /* check the refinement Q for emptyness */
    currentQ = 0;
    if (Gem_numSQ(thee->gm,currentQ) > 0) {
        Vnm_print(0,"Vpee_markRefine: non-empty refinement Q%d....clearing..",
    if (Gem_numSQ(thee->gm,!currentQ) > 0) {
        Vnm_print(0,"Vpee_markRefine: non-empty refinement Q%d....clearing..",
    VASSERT( Gem_numSQ(thee->gm,currentQ)  == 0 );
    VASSERT( Gem_numSQ(thee->gm,!currentQ) == 0 );

    /* clear everyone's refinement flags */
    Vnm_print(0,"Vpee_markRefine: clearing all simplex refinement flags..");
    for (i=0; i<Gem_numSS(thee->gm); i++) {
        if ( (i>0) && (i % VPRTKEY) == 0 ) Vnm_print(0,"[MS:%d]",i);
        sm = Gem_SS(thee->gm,i);

    /* Simplex flag clearing */
    if (akey == -1) return marked;
    /* Uniform & user-defined refinement*/
    if ((akey == 0) || (akey == 1)) {
        smid = 0;
        while ( smid < Gem_numSS(thee->gm)) {
            /* Get the simplex and find out if it's markable */
            sm = Gem_SS(thee->gm,smid);
            markMe = Vpee_ourSimp(thee, sm, rcol);
            if (markMe) {
                if (akey == 0) {
                    Gem_appendSQ(thee->gm,currentQ, sm);
                } else if (Vpee_userDefined(thee, sm)) {
                    Gem_appendSQ(thee->gm,currentQ, sm);

    /* gerror = global error accumulation */
    aprx->gerror = 0.;

    /* traverse the simplices and process the error estimates */
    Vnm_print(0,"Vpee_markRefine: estimating error..");
    smid = 0;
    while ( smid < Gem_numSS(thee->gm)) {

        /* Get the simplex and find out if it's markable */
        sm = Gem_SS(thee->gm,smid);
        markMe = Vpee_ourSimp(thee, sm, rcol);

        if ( (smid>0) && (smid % VPRTKEY) == 0 ) Vnm_print(0,"[MS:%d]",smid);

        /* Produce an error estimate for this element if it is in the set */
        if (markMe) {
            if (akey == 2) {
                errEst = Aprx_estNonlinResid(aprx, sm, am->u,am->ud,am->f);
            } else if (akey == 3) {
                errEst = Aprx_estLocalProblem(aprx, sm, am->u,am->ud,am->f);
            } else if (akey == 4) {
                errEst = Aprx_estDualProblem(aprx, sm, am->u,am->ud,am->f);
            VASSERT( errEst >= 0. );

            /* if error estimate above tol, mark element for refinement */
            if ( errEst > barrier ) {
                Gem_appendSQ(thee->gm,currentQ, sm); /*add to refinement Q*/
                SS_setRefineKey(sm,currentQ,1);      /* note now on refine Q */
                SS_setRefinementCount(sm,count);     /* refine X many times? */

            /* keep track of min/max errors over the mesh */
            minError = VMIN2( VSQRT(VABS(errEst)), minError );
            maxError = VMAX2( VSQRT(VABS(errEst)), maxError );

            /* store the estimate */
            Bvec_set( aprx->wev, smid, errEst );

            /* accumlate into global error (errEst is SQUAREd already) */
            aprx->gerror += errEst;

        /* otherwise store a zero for the estimate */
        } else {
            Bvec_set( aprx->wev, smid, 0. );


    /* do some i/o */
    Vnm_print(0,"..done.  [marked=<%d/%d>]\n",marked,Gem_numSS(thee->gm));
    Vnm_print(0,"Vpee_estRefine: TOL=<%g>  Global_Error=<%g>\n",
        etol, aprx->gerror);
    Vnm_print(0,"Vpee_estRefine: (TOL^2/numS)^{1/2}=<%g>  Max_Ele_Error=<%g>\n",
    Vnm_tstop(30, "error estimation");

    /* check for making the error tolerance */
    if ((bkey == 1) && (aprx->gerror <= etol)) {
            "Vpee_estRefine: *********************************************\n");
            "Vpee_estRefine: Global Error criterion met; setting marked=0.\n");
            "Vpee_estRefine: *********************************************\n");
        marked = 0;

    /* return */
    return marked;

Exemplo n.º 19
	void Jit::Comp_VV2Op(u32 op) {


		if (js.HasUnknownPrefix())

		VectorSize sz = GetVecSize(op);
		int n = GetNumVectorElements(sz);

		u8 sregs[4], dregs[4];
		GetVectorRegsPrefixS(sregs, sz, _VS);
		GetVectorRegsPrefixD(dregs, sz, _VD);

		ARMReg tempxregs[4];
		for (int i = 0; i < n; ++i)
			if (!IsOverlapSafeAllowS(dregs[i], i, n, sregs))
				int reg = fpr.GetTempV();
				fpr.MapRegV(reg, MAP_NOINIT | MAP_DIRTY);
				tempxregs[i] = fpr.V(reg);
				fpr.MapRegV(dregs[i], (dregs[i] == sregs[i] ? 0 : MAP_NOINIT) | MAP_DIRTY);
				tempxregs[i] = fpr.V(dregs[i]);

		// Warning: sregs[i] and tempxregs[i] may be the same reg.
		// Helps for vmov, hurts for vrcp, etc.
		for (int i = 0; i < n; ++i)
			switch ((op >> 16) & 0x1f)
			case 0: // d[i] = s[i]; break; //vmov
				// Probably for swizzle.
				VMOV(tempxregs[i], fpr.V(sregs[i]));
			case 1: // d[i] = fabsf(s[i]); break; //vabs
				//if (!fpr.V(sregs[i]).IsSimpleReg(tempxregs[i]))
				VABS(tempxregs[i], fpr.V(sregs[i]));
			case 2: // d[i] = -s[i]; break; //vneg
				VNEG(tempxregs[i], fpr.V(sregs[i]));
			case 4: // if (s[i] < 0) d[i] = 0; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break;    // vsat0
			case 5: // if (s[i] < -1.0f) d[i] = -1.0f; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break;  // vsat1
			case 16: // d[i] = 1.0f / s[i]; break; //vrcp
				MOVI2F(S0, 1.0f, R0);
				VDIV(tempxregs[i], S0, fpr.V(sregs[i]));
			case 17: // d[i] = 1.0f / sqrtf(s[i]); break; //vrsq
				MOVI2F(S0, 1.0f, R0);
				VSQRT(S1, fpr.V(sregs[i]));
				VDIV(tempxregs[i], S0, S1);
			case 18: // d[i] = sinf((float)M_PI_2 * s[i]); break; //vsin
			case 19: // d[i] = cosf((float)M_PI_2 * s[i]); break; //vcos
			case 20: // d[i] = powf(2.0f, s[i]); break; //vexp2
			case 21: // d[i] = logf(s[i])/log(2.0f); break; //vlog2
			case 22: // d[i] = sqrtf(s[i]); break; //vsqrt
				VSQRT(tempxregs[i], fpr.V(sregs[i]));
				VABS(tempxregs[i], tempxregs[i]);
			case 23: // d[i] = asinf(s[i] * (float)M_2_PI); break; //vasin
			case 24: // d[i] = -1.0f / s[i]; break; // vnrcp
				MOVI2F(S0, -1.0f, R0);
				VDIV(tempxregs[i], S0, fpr.V(sregs[i]));
			case 26: // d[i] = -sinf((float)M_PI_2 * s[i]); break; // vnsin
			case 28: // d[i] = 1.0f / expf(s[i] * (float)M_LOG2E); break; // vrexp2

		fpr.MapRegsV(dregs, sz, MAP_NOINIT | MAP_DIRTY);
		for (int i = 0; i < n; ++i)
			VMOV(fpr.V(dregs[i]), tempxregs[i]);

		ApplyPrefixD(dregs, sz);

Exemplo n.º 20
 * ***************************************************************************
 * Routine:  Aprx_partInert
 * Purpose:  Partition the domain using inertial bisection.
 *           Partition sets of points in R^d (d=2 or d=3) by viewing them
 *           as point masses of a rigid body, and by then employing the
 *           classical mechanics ideas of inertia and Euler axes.
 * Notes:    We first locate the center of mass, then change the coordinate
 *           system so that the center of mass is located at the origin.
 *           We then form the (symmetric) dxd inertia tensor, and then find
 *           the set of (real) eigenvalues and (orthogonal) eigenvectors.
 *           The eigenvectors represent the principle inertial rotation axes,
 *           and the eigenvalues represent the inertial strength in those
 *           principle directions.  The smallest inerial component along an
 *           axis represents a direction along which the rigid body is most
 *           "line-like" (assuming all the points have the same mass).
 *           For our purposes, it makes sense to using the axis (eigenvector)
 *           corresponding to the smallest inertia (eigenvalue) as the line to
 *           bisect with a line (d=2) or a plane (d=3).  We know the center of
 *           mass, and once we also have this particular eigenvector, we can
 *           effectively bisect the point set into the two regions separated
 *           by the line/plane simply by taking an inner-product of the
 *           eigenvector with each point (or rather the 2- or 3-vector
 *           representing the point).  A positive inner-product represents one
 *           side of the cutting line/plane, and a negative inner-product
 *           represents the other side (a zero inner-product is right on the
 *           cutting line/plane, so we arbitrarily assign it to one region or
 *           the other).
 * Author:   Michael Holst
 * ***************************************************************************
VPUBLIC int Aprx_partInert(Aprx *thee, int pcolor,
    int numC, double *evec, simHelper *simH)
    int i, j, k, lambdaI;
    double rad, sca, lambda, normal, caxis[3];
    Mat3 I, II, V, D;

    Vnm_print(0,"Aprx_partInert: WARNING: assuming single-chart manifold.\n");
    Vnm_print(0,"Aprx_partInert: [pc=%d] partitioning:\n", pcolor);

    /* form the inertia tensors */
    Mat3_init(II, 0.);
    for (i=0; i<numC; i++) {

        /* get vector length (squared!) */
        rad = 0.;
        for (j=0; j<3; j++) {
            rad += ( simH[i].bc[j] * simH[i].bc[j] );

        /* add contribution to the inertia tensor */
        for (j=0; j<3; j++) {
            for (k=0; k<3; k++) {
                II[j][k] += ( simH[i].mass *
                    (I[j][k]*rad - simH[i].bc[j]*simH[i].bc[k]) );

    /* find the d-principle axes, and isolate the single axis we need */
    /* (the principle axis we want is the one with SMALLEST moment) */
    sca = Mat3_nrm8(II);
    Mat3_scal(II, 1./sca);
    (void)Mat3_qri(V, D, II);
    lambda  = VLARGE;
    lambdaI = -1;
    for (i=0; i<3; i++) {
        if ( VABS(D[i][i]) < lambda ) {
            lambda  = VABS(D[i][i]);
            lambdaI = i;
    VASSERT( lambda > 0. );
    VASSERT( lambda != VLARGE );
    VASSERT( lambdaI >= 0 );
    for (i=0; i<3; i++) {
        caxis[i] = V[i][lambdaI];
    normal = Vec3_nrm2(caxis);
    VASSERT( normal > 0. );

    /* decompose points based on bisecting principle axis with a line or */
    /* plane; we do this using an inner-product test with normal vec "caxis" */
    normal = 0;
    for (i=0; i<numC; i++) {
        evec[i] = Vec3_dot( simH[i].bc, caxis );
        normal += (evec[i]*evec[i]);
    normal = VSQRT( normal );

    /* normalize the final result */
    for (i=0; i<numC; i++) {
        evec[i] = evec[i] / normal;

    return 0;
Exemplo n.º 21
void JitArm::fctiwzx(UGeckoInstruction inst)
	u32 b = inst.FB;
	u32 d = inst.FD;

	ARMReg vB = fpr.R0(b);
	ARMReg vD = fpr.R0(d);
	ARMReg V0 = fpr.GetReg();
	ARMReg V1 = fpr.GetReg();
	ARMReg V2 = fpr.GetReg();

	ARMReg rA = gpr.GetReg();
	ARMReg fpscrReg = gpr.GetReg();

	FixupBranch DoneMax, DoneMin;
	LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
	MOVI2R(rA, (u32)minmaxFloat);

	// Check if greater than max float
		VLDR(V0, rA, 8); // Load Max
		VCMPE(vB, V0);
		VMRS(_PC); // Loads in to APSR
		FixupBranch noException = B_CC(CC_LE);
		VMOV(vD, V0); // Set to max
		SetFPException(fpscrReg, FPSCR_VXCVI);
		DoneMax = B();
	// Check if less than min float
		VLDR(V0, rA, 0);
		VCMPE(vB, V0);
		FixupBranch noException = B_CC(CC_GE);
		VMOV(vD, V0);
		SetFPException(fpscrReg, FPSCR_VXCVI);
		DoneMin = B();
	// Within ranges, convert to integer
	VCMPE(vD, vB);

		BIC(fpscrReg, fpscrReg, FRFIMask);
		FixupBranch DoneEqual = B();
	SetFPException(fpscrReg, FPSCR_XX);
	ORR(fpscrReg, fpscrReg, FIMask);
	VABS(V1, vB);
	VABS(V2, vD);
	VCMPE(V2, V1);
		ORR(fpscrReg, fpscrReg, FRMask);


	MOVI2R(rA, (u32)&doublenum);
	VLDR(V0, rA, 0);
	NEONXEmitter nemit(this);
	nemit.VORR(vD, vD, V0);

	if (inst.Rc) Helper_UpdateCR1(fpscrReg, rA);

	STR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
Exemplo n.º 22
void apbsempole_(int *natom, double x[maxatm][3],
                 double rad[maxatm], double rpole[maxatm][13],
                 double *total,
                 double energy[maxatm], double fld[maxatm][3],
                 double rff[maxatm][3], double rft[maxatm][3]) {

    /* Misc. pointers to APBS data structures */
    Vpmg  *pmg[NOSH_MAXCALC];
    Vpmgp *pmgp[NOSH_MAXCALC];
    Vpbe  *pbe[NOSH_MAXCALC];
    MGparm *mgparm = VNULL;
    PBEparm *pbeparm = VNULL;
    Vatom *atom = VNULL;

    /* Vgrid configuration for the kappa and dielectric maps */
    double nx,ny,nz,hx,hy,hzed,xmin,ymin,zmin;
    double *data;
    double zkappa2, epsp, epsw;

    /* Loop indeces */
    int i,j;

    /* Observables and unit conversion */
    double sign, force[3], torque[3], field[3];
    double kT,electric,debye;
    double charge, dipole[3], quad[9];
    debye = 4.8033324;

    for (i=0; i<NOSH_MAXCALC; i++) {
       pmg[i] = VNULL;
       pmgp[i] = VNULL;
       pbe[i] = VNULL;

    /* Kill the saved potential Vgrids */
    for (i=0; i<2; i++){
        if (permU[i] != VNULL) Vgrid_dtor(&permU[i]);
        if (indU[i] != VNULL) Vgrid_dtor(&indU[i]);
        if (nlIndU[i] != VNULL) Vgrid_dtor(&nlIndU[i]);

    /* Kill the old atom list */
    if (alist[0] != VNULL) {

    /* Create a new atom list (mol == 1) */
    if (alist[0] == VNULL) {
       alist[0] = Valist_ctor();
       alist[0]->atoms = Vmem_malloc(alist[0]->vmem, *natom, (sizeof(Vatom)));
       alist[0]->number = *natom;

    /* Read TINKER input data into Vatom instances. */
    for (i=0; i < alist[0]->number; i++){
       atom = Valist_getAtom(alist[0],i);
       Vatom_setAtomID(atom, i);
       Vatom_setPosition(atom, x[i]);
       Vatom_setRadius(atom, rad[i]);
       charge = rpole[i][0];
       Vatom_setCharge(atom, charge);
       dipole[0] = rpole[i][1];
       dipole[1] = rpole[i][2];
       dipole[2] = rpole[i][3];
       Vatom_setDipole(atom, dipole);
       quad[0] = rpole[i][4];
       quad[1] = rpole[i][5];
       quad[2] = rpole[i][6];
       quad[3] = rpole[i][7];
       quad[4] = rpole[i][8];
       quad[5] = rpole[i][9];
       quad[6] = rpole[i][10];
       quad[7] = rpole[i][11];
       quad[8] = rpole[i][12];
       Vatom_setQuadrupole(atom, quad);
       /* Useful check
       printf(" %i %f (%f,%f,%f)\n",i,rad[i], x[i][0], x[i][1], x[i][2]);
       printf(" %f\n %f,%f,%f\n", charge, dipole[0], dipole[1], dipole[2]);
       printf(" %f\n", quad[0]);
       printf(" %f %f\n", quad[3], quad[4]);
       printf(" %f %f %f\n", quad[6], quad[7], quad[8]); */
       energy[i] = 0.0;
       for (j=0;j<3;j++){
          fld[i][j] = 0.0;
          rff[i][j] = 0.0;
          rft[i][j] = 0.0;

    nosh->nmol = 1;

    /* Only call the setupCalc routine once, so that we can
       reuse this nosh object */
    if (nosh->ncalc < 2) {
       if (NOsh_setupElecCalc(nosh, alist) != 1) {
          printf("Error setting up calculations\n");

    /* Solve the LPBE for the homogeneous and then solvated states */
    for (i=0; i<2; i++) {

       /* Useful local variables */
       mgparm = nosh->calc[i]->mgparm;
       pbeparm = nosh->calc[i]->pbeparm;

       /* Just to be robust */
       if (!MGparm_check(mgparm)){
          printf("MGparm Check failed\n");
          printMGPARM(mgparm, realCenter);
       if (!PBEparm_check(pbeparm)){
          printf("PBEparm Check failed\n");

       /* Set up the problem */
       mgparm->chgs = VCM_PERMANENT;
       if (!initMG(i, nosh, mgparm, pbeparm, realCenter, pbe,
                   alist, dielXMap, dielYMap, dielZMap,
                   kappaMap, chargeMap, pmgp, pmg, potMap)) {
              Vnm_tprint( 2, "Error setting up MG calculation!\n");

       /* Solve the PDE */
       if (solveMG(nosh, pmg[i], mgparm->type) != 1) {
           Vnm_tprint(2, "Error solving PDE!\n");

       /* Set partition information for observables and I/O */
       /* Note - parallel operation has NOT been tested. */
       if (setPartMG(nosh, mgparm, pmg[i]) != 1) {
           Vnm_tprint(2, "Error setting partition info!\n");

       nx = pmg[i]->pmgp->nx;
       ny = pmg[i]->pmgp->ny;
       nz = pmg[i]->pmgp->nz;
       hx = pmg[i]->pmgp->hx;
       hy = pmg[i]->pmgp->hy;
       hzed = pmg[i]->pmgp->hzed;
       xmin = pmg[i]->pmgp->xmin;
       ymin = pmg[i]->pmgp->ymin;
       zmin = pmg[i]->pmgp->zmin;

       /* Save dielectric/kappa maps into Vgrids, then change the nosh
        * data structure to think it read these maps in from a file.
        * The goal is to save setup time during convergence of the
        * induced dipoles. This is under consideration...
        * */
       // X (shifted)
       data = Vmem_malloc(mem, nx*ny*nz, sizeof(double));
       Vpmg_fillArray(pmg[i], data, VDT_DIELX, 0.0, pbeparm->pbetype);
       dielXMap[i] = Vgrid_ctor(nx,ny,nz,hx,hy,hzed,
                                xmin + 0.5*hx,ymin,zmin,data);
       dielXMap[i]->readdata = 1;
       // Y (shifted)
       data = Vmem_malloc(mem, nx*ny*nz, sizeof(double));
       Vpmg_fillArray(pmg[i], data, VDT_DIELY, 0.0, pbeparm->pbetype);
       dielYMap[i] = Vgrid_ctor(nx,ny,nz,hx,hy,hzed,
                                      xmin,ymin + 0.5*hy,zmin,data);
       dielYMap[i]->readdata = 1;
       // Z (shifted)
       data = Vmem_malloc(mem, nx*ny*nz, sizeof(double));
       Vpmg_fillArray(pmg[i], data, VDT_DIELZ, 0.0, pbeparm->pbetype);
       dielZMap[i] = Vgrid_ctor(nx,ny,nz,hx,hy,hzed,
                                      xmin,ymin,zmin + 0.5*hzed,data);
       dielZMap[i]->readdata = 1;
       // Kappa
       data = Vmem_malloc(mem, nx*ny*nz, sizeof(double));
       Vpmg_fillArray(pmg[i], data, VDT_KAPPA, 0.0, pbeparm->pbetype);
       kappaMap[i] = Vgrid_ctor(nx,ny,nz,hx,hy,hzed,xmin,ymin,zmin,data);
       kappaMap[i]->readdata = 1;

       // Update the pbeparam structure, since we now have
       // dielectric and kappap maps
       pbeparm->useDielMap = 1;
       pbeparm->dielMapID = i + 1;
       pbeparm->useKappaMap = 1;
       pbeparm->kappaMapID = i + 1;


       data = Vmem_malloc(mem, nx*ny*nz, sizeof(double));
       Vpmg_fillArray(pmg[i], data, VDT_POT, 0.0, pbeparm->pbetype, pbeparm);
       permU[i] = Vgrid_ctor(nx,ny,nz,hx,hy,hzed,xmin,ymin,zmin,data);
       permU[i]->readdata = 1;
       // set readdata flag to have the dtor to free data

       if (i == 0){
          sign = -1.0;
       } else {
          sign = 1.0;

       /* Calculate observables */
       for (j=0; j < alist[0]->number; j++){
         energy[j] += sign * Vpmg_qfPermanentMultipoleEnergy(pmg[i], j);
         Vpmg_fieldSpline4(pmg[i], j, field);
         fld[j][0] += sign * field[0];
         fld[j][1] += sign * field[1];
         fld[j][2] += sign * field[2];

       if (!pmg[i]->pmgp->nonlin &&
          (pmg[i]->surfMeth == VSM_SPLINE ||
           pmg[i]->surfMeth == VSM_SPLINE3 ||
           pmg[i]->surfMeth == VSM_SPLINE4)) {
          for (j=0; j < alist[0]->number; j++){
            Vpmg_qfPermanentMultipoleForce(pmg[i], j, force, torque);
            rff[j][0] += sign * force[0];
            rff[j][1] += sign * force[1];
            rff[j][2] += sign * force[2];
            rft[j][0] += sign * torque[0];
            rft[j][1] += sign * torque[1];
            rft[j][2] += sign * torque[2];
          kT = Vunit_kb * (1e-3) * Vunit_Na * 298.15 * 1.0/4.184;
          epsp = Vpbe_getSoluteDiel(pmg[i]->pbe);
          epsw = Vpbe_getSolventDiel(pmg[i]->pbe);
          if (VABS(epsp-epsw) > VPMGSMALL) {
             for (j=0; j < alist[0]->number; j++){
                Vpmg_dbPermanentMultipoleForce(pmg[i], j, force);
                rff[j][0] += sign * force[0];
                rff[j][1] += sign * force[1];
                rff[j][2] += sign * force[2];
          zkappa2 = Vpbe_getZkappa2(pmg[i]->pbe);
          if (zkappa2 > VPMGSMALL) {
             for (j=0; j < alist[0]->number; j++) {
                Vpmg_ibPermanentMultipoleForce(pmg[i], j, force);
                rff[j][0] += sign * force[0];
                rff[j][1] += sign * force[1];
                rff[j][2] += sign * force[2];

    //nosh->ndiel = 2;
    //nosh->nkappa = 2;
    printf("Energy (multipole) %f Kcal/mol\n", *energy);
    printf("Energy (volume)    %f Kcal/mol\n", evol * 0.5 * kT);

    // Convert results into kcal/mol units
    kT = Vunit_kb * (1e-3) * Vunit_Na * 298.15 * 1.0/4.184;
    // Electric converts from electron**2/Angstrom to kcal/mol
    electric = 332.063709;
    *total = 0.0;
    for (i=0; i<alist[0]->number; i++){
       /* starting with the field in KT/e/Ang^2 multiply by kcal/mol/KT
          the field is then divided by "electric" to convert to e/Ang^2 */
       energy[i] *= 0.5 * kT;
       *total += energy[i];
       fld[i][0] *= kT / electric;
       fld[i][1] *= kT / electric;
       fld[i][2] *= kT / electric;
       rff[i][0] *= kT;
       rff[i][1] *= kT;
       rff[i][2] *= kT;
       rft[i][0] *= kT;
       rft[i][1] *= kT;
       rft[i][2] *= kT;

    killMG(nosh, pbe, pmgp, pmg);
Exemplo n.º 23
void Jit::Comp_FPU2op(u32 op)

	int fs = _FS;
	int fd = _FD;
	// logBlocks = 1;

	switch (op & 0x3f) 
	case 4:	//F(fd)	   = sqrtf(F(fs));            break; //sqrt
		fpr.MapDirtyIn(fd, fs);
		VSQRT(fpr.R(fd), fpr.R(fs));
	case 5:	//F(fd)    = fabsf(F(fs));            break; //abs
		fpr.MapDirtyIn(fd, fs);
		VABS(fpr.R(fd), fpr.R(fs));
	case 6:	//F(fd)	   = F(fs);                   break; //mov
		fpr.MapDirtyIn(fd, fs);
		VMOV(fpr.R(fd), fpr.R(fs));
	case 7:	//F(fd)	   = -F(fs);                  break; //neg
		fpr.MapDirtyIn(fd, fs);
		VNEG(fpr.R(fd), fpr.R(fs));
	case 12: //FsI(fd) = (int)floorf(F(fs)+0.5f); break; //round.w.s
		fpr.MapDirtyIn(fd, fs);
		VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED);
	case 13: //FsI(fd) = Rto0(F(fs)));            break; //trunc.w.s
		fpr.MapDirtyIn(fd, fs);
		VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED | ROUND_TO_ZERO);
	case 14: //FsI(fd) = (int)ceilf (F(fs));      break; //ceil.w.s
		fpr.MapDirtyIn(fd, fs);
		MOVI2F(S0, 0.5f, R0);
		VCVT(fpr.R(fd), S0,        TO_INT | IS_SIGNED);
	case 15: //FsI(fd) = (int)floorf(F(fs));      break; //floor.w.s
		fpr.MapDirtyIn(fd, fs);
		MOVI2F(S0, 0.5f, R0);
		VCVT(fpr.R(fd), S0,        TO_INT | IS_SIGNED);
	case 32: //F(fd)   = (float)FsI(fs);          break; //cvt.s.w
		fpr.MapDirtyIn(fd, fs);
		VCVT(fpr.R(fd), fpr.R(fs), TO_FLOAT | IS_SIGNED);
	case 36: //FsI(fd) = (int)  F(fs);            break; //cvt.w.s
		fpr.MapDirtyIn(fd, fs);
		LDR(R0, CTXREG, offsetof(MIPSState, fcr31));
		AND(R0, R0, Operand2(3));
		// MIPS Rounding Mode:
		//	 0: Round nearest
		//	 1: Round to zero
		//	 2: Round up (ceil)
		//	 3: Round down (floor)
		CMP(R0, Operand2(2));
		SetCC(CC_GE); MOVI2F(S0, 0.5f, R1);
		SetCC(CC_GT); VSUB(S0,fpr.R(fs),S0);
		SetCC(CC_EQ); VADD(S0,fpr.R(fs),S0);
		SetCC(CC_GE); VCVT(fpr.R(fd), S0, TO_INT | IS_SIGNED); /* 2,3 */
		CMP(R0, Operand2(1));
		SetCC(CC_EQ); VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED | ROUND_TO_ZERO); /* 1 */
		SetCC(CC_LT); VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED); /* 0 */
Exemplo n.º 24
 * ***************************************************************************
 * Routine:  Gem_makeBndExt
 * Purpose:  Mark selected boundary faces in a special way.
 * Author:   Michael Holst
 * ***************************************************************************
VPUBLIC void Gem_makeBndExt(Gem *thee, int key)
    int i, j, k, l, m, p, q, nabors, btype, done, btypeGeneric;
    VV *v[4];
    SS *sm, *sm0, *sm1, *sm2;
    double x[4][3], xchk;

    /* go through all simplices and zero all boundary faces */
    Vnm_print(0,"Gem_makeBnd: zeroing boundary faces/vertices..");
    Gem_setNumBF(thee, 0);
    Gem_setNumBV(thee, 0);
    for (i=0; i<Gem_numSS(thee); i++) {
        sm = Gem_SS(thee,i);
        if ( (i>0) && (i % VPRTKEY) == 0 ) Vnm_print(0,"[BS:%d]",i);

        /* get local vertices */
        for (j=0; j<Gem_dimVV(thee); j++)
            v[j] = SS_vertex(sm,j);

        /* reset all vertices and faces to interior type */
        for (j=0; j<Gem_dimVV(thee); j++) {
            /* the other three local vertex/face numbers besides "j" */
            k=(j+1) % Gem_dimVV(thee);
            l=(k+1) % Gem_dimVV(thee);
            m=(l+1) % Gem_dimVV(thee);
            SS_setFaceType(sm, j, 0);
            VV_setType(v[k], 0);
            VV_setType(v[l], 0);
            if (Gem_dim(thee) == 3) VV_setType(v[m], 0);

    /* okay now make a boundary */
    Vnm_print(0,"Gem_makeBnd: rebuilding boundary faces/vertices..");
    for (i=0; i<Gem_numSS(thee); i++) {
        sm = Gem_SS(thee,i);
        if ( (i>0) && (i % VPRTKEY) == 0 ) Vnm_print(0,"[BS:%d]",i);

        /* get local vertices */
        for (j=0; j<Gem_dimVV(thee); j++)
            v[j] = SS_vertex(sm,j);

        /* rebuild everything */
        for (j=0; j<Gem_dimVV(thee); j++) {

            /* the other three local vertex/face numbers besides "j" */
            k=(j+1) % Gem_dimVV(thee);
            l=(k+1) % Gem_dimVV(thee);
            m=(l+1) % Gem_dimVV(thee);

            /* look for a face nabor sharing face "j" (opposite vertex "j") */
            nabors = 0;
            for (sm0=VV_firstSS(v[k]); sm0!=VNULL;sm0=SS_link(sm0,v[k])) {
                for (sm1=VV_firstSS(v[l]); sm1!=VNULL; sm1=SS_link(sm1,v[l])) {
                    if (Gem_dim(thee) == 2) {
                        if ((sm0!=sm) && (sm0==sm1)) nabors++;
                    } else {
                        for (sm2=VV_firstSS(v[m]); sm2!=VNULL; 
                          sm2=SS_link(sm2,v[m])) {
                            if ((sm0!=sm) && (sm0==sm1) && (sm0==sm2)) {

            /* if no one there, then face "j" is actually a boundary face */
            if (nabors == 0) {

                /* grab coordinates of the vertices of this face */
                for (q=0; q<Gem_dim(thee); q++) {
                    x[0][q] = VV_coord(v[k],q);
                for (q=0; q<Gem_dim(thee); q++) {
                    x[1][q] = VV_coord(v[l],q);
                if (Gem_dim(thee) == 3) {
                    for (q=0; q<Gem_dim(thee); q++) {
                        x[2][q] = VV_coord(v[m],q);

                /* default is interior; should not occur! */
                btypeGeneric = 18;
                done = 0;
                btype = btypeGeneric;

                /* ---------- check for base marking ---------- */
                xchk = 0.0;
                for (p=0; p<Gem_dim(thee); p++) {
                    xchk += VABS( x[p][1] - (-1.0) );
                if (xchk < VSMALL) {
                    done = 1;
                    btype = 1;

                /* ---------- check for base marking again ---------- */
                xchk = 0.0;
                for (p=0; p<Gem_dim(thee); p++) {
                    xchk += VABS( x[p][1] - ( 0.0) );
                if (xchk < VSMALL) {
                    done = 1;
                    btype = 18;

                /* ---------- check for first section ---------- */
                if (!done) {
                    done = 1;
                    btype = 2;
                    for (p=0; p<Gem_dim(thee); p++) {
                        if (! (  ( 1.9    <= x[p][0])
                              && ( 6.1    >= x[p][0])
                              && (-VSMALL <= x[p][1])
                              && (-1.1    <= x[p][2])
                              && ( 1.1    >= x[p][2]) )) {
                            done = 0;
                            btype = btypeGeneric;

                    if (done) {
                        xchk = 0.0;
                        for (p=0; p<Gem_dim(thee); p++) {
                            xchk += VABS( x[p][1] - 10.0 );
                        if (xchk < VSMALL) {
                            btype = 10;

                /* ---------- check for second section ---------- */
                if (!done) {
                    done = 1;
                    btype = 4;
                    for (p=0; p<Gem_dim(thee); p++) {
                        if (! (  ( 7.9    <= x[p][0])
                              && (12.1    >= x[p][0])
                              && (-VSMALL <= x[p][1])
                              && (-1.1    <= x[p][2])
                              && ( 1.1    >= x[p][2]) )) {
                            done = 0;
                            btype = btypeGeneric;

                    if (done) {
                        xchk = 0.0;
                        for (p=0; p<Gem_dim(thee); p++) {
                            xchk += VABS( x[p][1] - 10.0 );
                        if (xchk < VSMALL) {
                            btype = 12;

                /* ---------- check for third section ---------- */
                if (!done) {
                    done = 1;
                    btype = 6;
                    for (p=0; p<Gem_dim(thee); p++) {
                        if (! (  (13.9    <= x[p][0])
                              && (18.1    >= x[p][0])
                              && (-VSMALL <= x[p][1])
                              && (-1.1    <= x[p][2])
                              && ( 1.1    >= x[p][2]) )) {
                            done = 0;
                            btype = btypeGeneric;

                    if (done) {
                        xchk = 0.0;
                        for (p=0; p<Gem_dim(thee); p++) {
                            xchk += VABS( x[p][1] - 10.0 );
                        if (xchk < VSMALL) {
                            btype = 14;

                /* ---------- check for fourth section ---------- */
                if (!done) {
                    done = 1;
                    btype = 8;
                    for (p=0; p<Gem_dim(thee); p++) {
                        if (! (  (19.9    <= x[p][0])
                              && (24.1    >= x[p][0])
                              && (-VSMALL <= x[p][1])
                              && (-1.1    <= x[p][2])
                              && ( 1.1    >= x[p][2]) )) {
                            done = 0;
                            btype = btypeGeneric;

                    if (done) {
                        xchk = 0.0;
                        for (p=0; p<Gem_dim(thee); p++) {
                            xchk += VABS( x[p][1] - 10.0 );
                        if (xchk < VSMALL) {
                            btype = 16;

                /* should have been marked with SOME boundary type */
                VASSERT( 0 != btype );

                /* set the facetype */
                SS_setFaceType(sm, j, btype);

                /* set the vertex types (dirichlet overrides robin) */
                if (!VDIRICHLET( VV_type(v[k])) ) {
                    if (VINTERIOR( VV_type(v[k])) ) {
                    VV_setType(v[k], btype);
                if (!VDIRICHLET( VV_type(v[l])) ) {
                    if (VINTERIOR( VV_type(v[l])) ) {
                    VV_setType(v[l], btype);
                if (Gem_dim(thee) == 3) {
                    if (!VDIRICHLET( VV_type(v[m])) ) {
                        if (VINTERIOR( VV_type(v[m])) ) {
                        VV_setType(v[m], btype);