Example #1
0
void
add_quats(float q1[4], float q2[4], float dest[4])
{
    static int count=0;
    float t1[4], t2[4], t3[4];
    float tf[4];

#if 0
printf("q1 = %f %f %f %f\n", q1[0], q1[1], q1[2], q1[3]);
printf("q2 = %f %f %f %f\n", q2[0], q2[1], q2[2], q2[3]);
#endif

    vcopy(q1,t1);
    vscale(t1,q2[3]);

    vcopy(q2,t2);
    vscale(t2,q1[3]);

    vcross(q2,q1,t3);
    vadd(t1,t2,tf);
    vadd(t3,tf,tf);
    tf[3] = q1[3] * q2[3] - vdot(q1,q2);

#if 0
printf("tf = %f %f %f %f\n", tf[0], tf[1], tf[2], tf[3]);
#endif

    dest[0] = tf[0];
    dest[1] = tf[1];
    dest[2] = tf[2];
    dest[3] = tf[3];

    if (++count > RENORMCOUNT) {
        count = 0;
        normalize_quat(dest);
    }
}
Example #2
0
/* Given an quaternion compute an axis and angle */
void quat_to_axis(double vec[3], double *phi, double quat[4]){
   double scale;
   
   scale = quat[0]*quat[0] + quat[1]*quat[1] + quat[2]*quat[2];
   
   if(scale == 0){  /* no rotation, we're stuffed */
      vec[0] = 0;
      vec[1] = 0;
      vec[2] = 1;
      *phi = 0;
      }
   else{
      vcopy(vec, quat);
      vscale(vec, 1.0/scale);
      vnormal(vec);
      
      *phi  = 2.0*acos(quat[3]);
      }
   }
// viscosity
void Fluid2::fluidViscosity( const float dt )
{
	if( Scene::testcase >= Scene::SMOKE )
	{
        Array2< float > ucopy( velocityX );
        Array2< float > vcopy( velocityY );
        const Index2& sizeu = velocityX.getSize();
        const Index2& sizev = velocityY.getSize();

        const Vec2 dx = grid.getCellDx();
        const Vec2 invDxSq( 1.0f / ( dx.x * dx.x ), 1.0f / ( dx.y * dx.y ) );
        const float dtMuOverRho = dt * Scene::kViscosity / Scene::kDensity;

        for( unsigned int i = 0; i < sizeu.x; ++i )
		for( unsigned int j = 0; j < sizeu.y; ++j )
        {
            const Index2 id( i, j );
            const Index2 id1( clamp( i-1, 0, sizeu.x-1 ), j );
            const Index2 id2( clamp( i+1, 0, sizeu.x-1 ), j );
            const Index2 id3( i , clamp( j-1, 0, sizeu.y-1 ) );
            const Index2 id4( i , clamp( j+1, 0, sizeu.y-1 ) );
            velocityX[ id ] += dtMuOverRho * (
                ( ucopy[ id1 ] - 2.0f * ucopy[ id ] + ucopy[ id2 ] ) * invDxSq.x +
                ( ucopy[ id3 ] - 2.0f * ucopy[ id ] + ucopy[ id4 ] ) * invDxSq.y );
        }
        
		for( unsigned int i = 0; i < sizev.x; ++i )
		for( unsigned int j = 0; j < sizev.y; ++j )
        {
            const Index2 id( i, j );
            const Index2 id1( clamp( i-1, 0, sizev.x-1 ), j );
            const Index2 id2( clamp( i+1, 0, sizev.x-1 ), j );
            const Index2 id3( i , clamp( j-1, 0, sizev.y-1 ) );
            const Index2 id4( i , clamp( j+1, 0, sizev.y-1 ) );
            velocityY[ id ] += dtMuOverRho * (
                ( vcopy[ id1 ] - 2.0f * vcopy[ id ] + vcopy[ id2 ] ) * invDxSq.x +
                ( vcopy[ id3 ] - 2.0f * vcopy[ id ] + vcopy[ id4 ] ) * invDxSq.y );
        }
	}
}
Example #4
0
void vOut_next_a(IOUnit *unit, int inNumSamples)
{
	//Print("Out_next_a %d\n", unit->mNumInputs);
	World *world = unit->mWorld;
	int bufLength = world->mBufLength;
	int numChannels = unit->mNumInputs - 1;

	float fbusChannel = ZIN0(0);
	if (fbusChannel != unit->m_fbusChannel) {
		unit->m_fbusChannel = fbusChannel;
		int busChannel = (int)fbusChannel;
		int lastChannel = busChannel + numChannels;

		if (!(busChannel < 0 || lastChannel > (int)world->mNumAudioBusChannels)) {
			unit->m_bus = world->mAudioBus + (busChannel * bufLength);
			unit->m_busTouched = world->mAudioBusTouched + busChannel;
		}
	}

	float *out = unit->m_bus;
	int32 *touched = unit->m_busTouched;
	int32 bufCounter = unit->mWorld->mBufCounter;
	for (int i=0; i<numChannels; ++i, out+=bufLength) {
		ACQUIRE_BUS_AUDIO((int32)fbusChannel + i);
		float *in = IN(i+1);
		if (touched[i] == bufCounter)
		{
			vadd(out, out, in, inNumSamples);
		}
		else
		{
			vcopy(out, in, inNumSamples);
			touched[i] = bufCounter;
		}
		//Print("out %d %g %g\n", i, in[0], out[0]);
		RELEASE_BUS_AUDIO((int32)fbusChannel + i);
	}
}
Example #5
0
void vIn_next_a(IOUnit *unit, int inNumSamples)
{
	World *world = unit->mWorld;
	int bufLength = world->mBufLength;
	int numChannels = unit->mNumOutputs;

	float fbusChannel = ZIN0(0);
	if (fbusChannel != unit->m_fbusChannel) {
		unit->m_fbusChannel = fbusChannel;
		int busChannel = (uint32)fbusChannel;
		int lastChannel = busChannel + numChannels;

		if (!(busChannel < 0 || lastChannel > (int)world->mNumAudioBusChannels)) {
			unit->m_bus = world->mAudioBus + (busChannel * bufLength);
			unit->m_busTouched = world->mAudioBusTouched + busChannel;
		}
	}

	float *in = unit->m_bus;
	int32 *touched = unit->m_busTouched;
	int32 bufCounter = unit->mWorld->mBufCounter;

	for (int i=0; i<numChannels; ++i, in += bufLength) {
		ACQUIRE_BUS_AUDIO_SHARED((int32)fbusChannel + i);
		float *out = OUT(i);
		if (touched[i] == bufCounter)
		{
			vcopy(out, in, inNumSamples);
		}
		else
		{
			vfill(out, 0.f, inNumSamples);
		}
		RELEASE_BUS_AUDIO_SHARED((int32)fbusChannel + i);
	}
}
Example #6
0
void rcMarkConvexPolyArea(const float* verts, const int nverts,
                                                  const float hmin, const float hmax, unsigned char areaId,
                                                  rcCompactHeightfield& chf)
{
        float bmin[3], bmax[3];
        vcopy(bmin, verts);
        vcopy(bmax, verts);
        for (int i = 1; i < nverts; ++i)
        {
                vmin(bmin, &verts[i*3]);
                vmax(bmax, &verts[i*3]);
        }
        bmin[1] = hmin;
        bmax[1] = hmax;


        int minx = (int)((bmin[0]-chf.bmin[0])/chf.cs);
        int miny = (int)((bmin[1]-chf.bmin[1])/chf.ch);
        int minz = (int)((bmin[2]-chf.bmin[2])/chf.cs);
        int maxx = (int)((bmax[0]-chf.bmin[0])/chf.cs);
        int maxy = (int)((bmax[1]-chf.bmin[1])/chf.ch);
        int maxz = (int)((bmax[2]-chf.bmin[2])/chf.cs);
        
        if (maxx < 0) return;
        if (minx >= chf.width) return;
        if (maxz < 0) return;
        if (minz >= chf.height) return;
        
        if (minx < 0) minx = 0;
        if (maxx >= chf.width) maxx = chf.width-1;
        if (minz < 0) minz = 0;
        if (maxz >= chf.height) maxz = chf.height-1;    
        
        
        // TODO: Optimize.
        for (int z = minz; z <= maxz; ++z)
        {
                for (int x = minx; x <= maxx; ++x)
                {
                        const rcCompactCell& c = chf.cells[x+z*chf.width];
                        for (int i = (int)c.index, ni = (int)(c.index+c.count); i < ni; ++i)
                        {
                                rcCompactSpan& s = chf.spans[i];
                                if ((int)s.y >= miny && (int)s.y <= maxy)
                                {
                                        if (areaId < chf.areas[i])
                                        {
                                                float p[3];
                                                p[0] = chf.bmin[0] + (x+0.5f)*chf.cs; 
                                                p[1] = 0;
                                                p[2] = chf.bmin[2] + (z+0.5f)*chf.cs; 


                                                if (pointInPoly(nverts, verts, p))
                                                {
                                                        chf.areas[i] = areaId;
                                                }
                                        }
                                }
                        }
                }
        }
        


}
Example #7
0
/*******************************************************************
 Subroutine to do the EM algorithm
   matrix *D:       the pointer to the matrix data
   matrix *mean0_x: the pointer to a matrix containing the initial Means of clusters
   vector *w0:		the pointer to a vector containing the initial mixing proportion of clusters
   double vv:       the value for initializing the Covariance matrix of clusters
   double error:    the error threshold
   vector *Zjk_up:  the pointer to a vector containing Posterior probabilities of the up-level 
                         cluster samples
   matrix *mean1_x: the pointer to a matrix containing the Means of clusters in t-space
   vector *w0_t:	the pointer to a vector containing the mixing proportions of the identified 
                         clusters in t-space
   matrix *cov_mat: the pointer to a group of matrixs containing the Covariance
                         matrix of clusters in t-space
   matrix *Zjk:     the pointer to a matrix containing Posterior probabilities of all samples 
                         belonging to all the sub-level clusters, each column is for one cluster.
   
 return value: '1' - successfully exit
               '0' - exit with waring/error
*******************************************************************/
int veSubEM(matrix *D, matrix *mean0_x, vector *w0, double vv, double error, vector *Zjk_up, //input
			matrix *mean1_x, vector *w0_t, matrix *cov_mat, matrix *Zjk)  //output
{
	int k0, kc, n, p;
	int i, j, k, u, s;
	matrix *Var0;
	matrix Gxn;
	vector Fx;
	matrix MUK;
	matrix MU1;
	int zeroFx_num = 1;
	//double error = 0.01;
	double err = error + (double)1;
	vector Zjk_temp;

	n = D->m;
	p = D->n;
	k0 = mean0_x->m;
	kc = mean0_x->n;
	
	Var0 = new matrix[k0];
	for(i=0; i<k0; i++) {
		mnew(Var0+i, p, p);
	}
	mnew(&Gxn, n, k0);
	vnew(&Fx, n);
	vnew(&Zjk_temp, n);
	mnew(&MUK, k0, p);
	mcopy(mean0_x, &MUK);
	mnew(&MU1, k0, p);

	vector D_j;
	vector Zjk_k;
	double sum_tmp = 0;
	matrix Ck;
	vector D_i;
	vector MUK_k;
	vector cen_D_i;
	matrix mtmp;
	vector vtmp;

	vnew(&D_j, n);
	vnew(&Zjk_k, n);
	mnew(&Ck, p, p);
	vnew(&D_i, p);
	vnew(&MUK_k, p);
	vnew(&cen_D_i, p);
	mnew(&mtmp, p, p);
	vnew(&vtmp, n);

	//Initializing the parameters of mixture of Gaussians
	//Initinalize the covariance matrix
	//Use EM algorithm to perform the local training.
	
	//Test intialization of covarinace matrix 
	//printf("Testing covariance matrix initialization... \n");

	while (zeroFx_num != 0) {
		for(i=0; i<k0; i++) {
			meye(Var0+i);
			for (j=0; j<p; j++) {
				*((Var0+i)->pr+j*p+j) = vv;
			}
		}
	
		veModel(D, mean0_x, Var0, w0, &Gxn, &Fx);
		//printf("\n Gxn = :\n");
		//mprint(&Gxn);
		//printf("\n Fx = :\n");
		//vprint(&Fx);

		zeroFx_num = 0;
		for (i=0; i<n; i++) {
			if (*(Fx.pr+i) == 0) {
				zeroFx_num++;
			}
		}

		vv *= 2;
	
	}

	vones(&Zjk_temp);

	//printf("\n EM in t-space starts ... \n");
	//printf("\n Data = \n");
	//mprint(D);

	int l = 0;
	while (err > error) {
		
		#ifdef _DEBUG
		printf(" \n...... in EM loop %d ......\n", ++l);

		printf("\n L%d: w0 = \n", l);
		vprint(w0);
		printf("\n L%d: MUK = \n", l);
		mprint(&MUK);
		printf("\n L%d: Var0 = \n", l);
		for(i=0; i<k0; i++) {
			mprint(Var0+i);
			printf("\n");
		}
		printf("\n L%d: Zjk = \n", l);
		mprint(Zjk);
		#endif

		veModel(D, &MUK, Var0, w0, &Gxn, &Fx);
		
		#ifdef _DEBUG
		printf("\n L%d: Gxn = \n", l);
		mprint(&Gxn);
		printf("\n L%d: Fx = \n", l);
		vprint(&Fx);
		#endif

		for (k=0; k<k0; k++) {
			u = k*p;

			double zz = 0;
			double zz_up = 0;
			for (i=0; i<n; i++) {
				*(Zjk->pr+i*k0+k) = (*(w0->pr+k)) * Zjk_up->pr[i] * (*(Gxn.pr+i*k0+k)) / (*(Fx.pr+i));
				zz += *(Zjk->pr+i*k0+k);
				zz_up += Zjk_up->pr[i];
			}
			*(w0->pr+k) = zz/zz_up;

			for (j=0; j<p; j++) {
				getcolvec(D, j, &D_j);
				getcolvec(Zjk, k, &Zjk_k);
				sum_tmp = 0;
				for (i=0; i<n; i++) {
					sum_tmp += (*(Zjk_k.pr+i)) * (*(D_j.pr+i));
				}
				*(MU1.pr+u+j) = sum_tmp / zz;
			}

			mzero(&Ck);
			for (i=0; i<n; i++) {
				getrowvec(D, i, &D_i);
				getrowvec(&MUK, k, &MUK_k);
				for (j=0; j<p; j++) {
					*(cen_D_i.pr+j) = *(D_i.pr+j) - *(MUK_k.pr+j);
				}

				vvMul(&cen_D_i, &cen_D_i, &mtmp);
				
				for (j=0; j<p; j++) {
					for (s=0; s<p; s++) {
						*(Ck.pr+j*p+s) += (*(Zjk->pr+i*k0+k)) * (*(mtmp.pr+j*p+s));
					}
				}
			}
			for (j=0; j<p; j++) {
				for (s=0; s<p; s++) {
					*(Var0[k].pr+j*p+s) = (*(Ck.pr+j*p+s)) / zz;
				}
			}
		}   // for (k...

		mcopy(&MU1, &MUK);

		for (i=0; i<n; i++) {
			*(vtmp.pr+i) = fabs(*(Zjk_k.pr+i) - *(Zjk_temp.pr+i));
		}
		err = vmean(&vtmp);
		vcopy(&Zjk_k, &Zjk_temp);
		
		
    }  // while

	vcopy(w0, w0_t);
	mcopy(&MUK, mean1_x);
	for(i=0; i<k0; i++) {
		mcopy(Var0+i, cov_mat+i);
	}

	for(i=0; i<k0; i++) {
		mdelete(Var0+i);
	} 
	mdelete(&Gxn);
	vdelete(&Fx);
	vdelete(&Zjk_temp);
	mdelete(&MUK);
	mdelete(&MU1);
    vdelete(&D_j);
	vdelete(&Zjk_k);
	mdelete(&Ck);
	vdelete(&D_i);
	vdelete(&MUK_k);
	vdelete(&cen_D_i);
	mdelete(&mtmp);
	vdelete(&vtmp);

    return 1;
}
Example #8
0
msym_error_t partitionEquivalenceSets(int length, msym_element_t *elements[length], msym_element_t *pelements[length], msym_geometry_t g, int *esl, msym_equivalence_set_t **es, msym_thresholds_t *thresholds) {
    
    int ns = 0, gd = geometryDegenerate(g);
    double *e = calloc(length,sizeof(double));
    double *s = calloc(length,sizeof(double));
    
    int *sp = calloc(length,sizeof(int)); //set partition
    int *ss  = calloc(length,sizeof(int)); //set size
    
    double (*ev)[3] = calloc(length,sizeof(double[3]));
    double (*ep)[3] = calloc(length,sizeof(double[3]));
    
    double (*vec)[3] = calloc(length, sizeof(double[3]));
    double *m = calloc(length, sizeof(double));
    
    for(int i = 0;i < length;i++){
        vcopy(elements[i]->v, vec[i]);
        m[i] = elements[i]->m;
    }

    for(int i=0; i < length; i++){
        for(int j = i+1; j < length;j++){
            double w = m[i]*m[j]/(m[i]+m[j]);
            double dist;
            double v[3];
            double proji[3], projj[3];
            
            vnorm2(vec[i],v);
            vproj_plane(vec[j], v, proji);
            vscale(w, proji, proji);
            vadd(proji,ep[i],ep[i]);
            
            vnorm2(vec[j],v);
            vproj_plane(vec[i], v, projj);
            vscale(w, projj, projj);
            vadd(projj,ep[j],ep[j]);
            
            vsub(vec[j],vec[i],v);
            
            dist = vabs(v);
            
            vscale(w/dist,v,v);
            
            vadd(v,ev[i],ev[i]);
            vsub(ev[j],v,ev[j]);
            
            double dij = w*dist; //This is sqrt(I) for a diatomic molecule along an axis perpendicular to the bond with O at center of mass.
            e[i] += dij;
            e[j] += dij;
            
            s[i] += SQR(dij);
            s[j] += SQR(dij);
        }
        vsub(vec[i],ev[i],ev[i]);
        
    }

    for(int i = 0; i < length; i++){
        
        double v[3];
        double w = m[i]/2.0;
        double dist = vabs(elements[i]->v);
        double dii = w*dist;
        vscale(w,elements[i]->v,v);
        vsub(ev[i],v,ev[i]);
        
        // Plane projection can't really differentiate certain types of structures when we add the initial vector,
        // but not doing so will result in huge cancellation errors on degenerate point groups,
        // also large masses will mess up the eq check when this is 0.
        if(gd) vadd(ep[i],v,ep[i]);
        
        e[i] += dii;
        s[i] += SQR(dii);
    }
    for(int i = 0; i < length; i++){
        if(e[i] >= 0.0){
            sp[i] = i;
            for(int j = i+1; j < length;j++){
                if(e[j] >= 0.0){
                    double vabsevi = vabs(ev[i]), vabsevj = vabs(ev[j]), vabsepi = vabs(ep[i]), vabsepj = vabs(ep[j]);
                    double eep = 0.0, eev = fabs((vabsevi)-(vabsevj))/((vabsevi)+(vabsevj)), ee = fabs((e[i])-(e[j]))/((e[i])+(e[j])), es = fabs((s[i])-(s[j]))/((s[i])+(s[j]));
                    
                    if(!(vabsepi < thresholds->zero && vabsepj < thresholds->zero)){
                        eep = fabs((vabsepi)-(vabsepj))/((vabsepi)+(vabsepj));
                    }
                    
                    double max = fmax(eev,fmax(eep,fmax(ee, es)));
                    
                    if(max < thresholds->equivalence && elements[i]->n == elements[j]->n){
                        e[j] = max > 0.0 ? -max : -1.0;
                        sp[j] = i;
                    }
                }
            }
            e[i] = -1.0;
        }
    }
    
    for(int i = 0; i < length;i++){
        int j = sp[i];
        ns += (ss[j] == 0);
        ss[j]++;
    }

    msym_equivalence_set_t *eqs = calloc(ns,sizeof(msym_equivalence_set_t));
    msym_element_t **lelements = elements;
    msym_element_t **pe = pelements;
    
    if(elements == pelements){
        lelements = malloc(sizeof(msym_element_t *[length]));
        memcpy(lelements, elements, sizeof(msym_element_t *[length]));
    }
    
    for(int i = 0, ni = 0; i < length;i++){
        if(ss[i] > 0){
            int ei = 0;
            eqs[ni].elements = pe;
            eqs[ni].length = ss[i];
            for(int j = 0; j < length;j++){
                if(sp[j] == i){
                    double err = (e[j] > -1.0) ? fabs(e[j]) : 0.0;
                    eqs[ni].err = fmax(eqs[ni].err,err);
                    eqs[ni].elements[ei++] = lelements[j];
                }
            }
            pe += ss[i];
            ni++;
        }
    }

    if(elements == pelements){
        free(lelements);
    }
    free(m);
    free(vec);
    free(s);
    free(e);
    free(sp);
    free(ss);
    free(ev);
    free(ep);
    *es = eqs;
    *esl = ns;
    return MSYM_SUCCESS;
}
Example #9
0
//TODO: Use a preallocated pointer array instead of multiple mallocs
msym_error_t generateEquivalenceSet(msym_point_group_t *pg, int length, msym_element_t elements[length], int *glength, msym_element_t **gelements, int *esl, msym_equivalence_set_t **es,msym_thresholds_t *thresholds){
    msym_error_t ret = MSYM_SUCCESS;
    msym_element_t *ge = calloc(length,sizeof(msym_element_t[pg->order]));
    msym_equivalence_set_t *ges = calloc(length,sizeof(msym_equivalence_set_t));
    int gel = 0;
    int gesl = 0;
    for(int i = 0;i < length;i++){
        msym_equivalence_set_t *aes = NULL;
        int f;
        for(f = 0;f < gel;f++){
            if(ge[f].n == elements[i].n && ge[f].m == elements[i].m && 0 == strncmp(ge[f].name, elements[i].name, sizeof(ge[f].name)) && vequal(ge[f].v, elements[i].v, thresholds->permutation)){
                break;
            }
        }
        if(f == gel){
            aes = &ges[gesl++];
            aes->elements = calloc(pg->order,sizeof(msym_element_t*));
            aes->length = 0;
        } else {
            continue;
        }
        
        if(elements[i].aol > 0 || elements[i].ao != NULL){
            msymSetErrorDetails("Cannot (currently) generate equivalence sets from elements with orbitals");
            ret = MSYM_INVALID_ELEMENTS;
            goto err;
        }
        for(msym_symmetry_operation_t *s = pg->sops;s < (pg->sops + pg->sopsl);s++){
            double v[3];
            applySymmetryOperation(s, elements[i].v, v);
            
            for(f = 0;f < gel;f++){
                if(ge[f].n == elements[i].n && ge[f].m == elements[i].m && 0 == strncmp(ge[f].name, elements[i].name, sizeof(ge[f].name)) && vequal(ge[f].v, v, thresholds->permutation)){
                    break;
                }
            }
            if(f == gel){
                memcpy(&ge[gel],&elements[i],sizeof(msym_element_t));
                vcopy(v, ge[gel].v);
                aes->elements[aes->length++] = &ge[gel++];
            }
        }
        
        if(pg->order % aes->length != 0){
            msymSetErrorDetails("Equivalence set length (%d) not a divisor of point group order (%d)",pg->order);
            ret = MSYM_INVALID_EQUIVALENCE_SET;
            goto err;
        }
        
        aes->elements = realloc(aes->elements,sizeof(msym_element_t*[aes->length]));
    }
    
    msym_element_t *geo = ge;
    ge = realloc(ge,sizeof(msym_element_t[gel]));
    ges = realloc(ges,sizeof(msym_equivalence_set_t[gesl]) + sizeof(msym_element_t *[gel]));
    
    msym_element_t **ep = (msym_element_t **) &ges[gesl];
    for(int i = 0;i < gesl;i++){
        msym_element_t **tep = ep;
        for(int j = 0;j < ges[i].length;j++){
            *ep = ges[i].elements[j] - geo + ge;
            ep++;
        }
        free(ges[i].elements);
        ges[i].elements = tep;
    }

    *glength = gel;
    *gelements = ge;
    *es = ges;
    *esl = gesl;
    return ret;
    
err:
    free(ge);
    for(int i = 0; i < gesl;i++) free(ges[i].elements);
    free(ges);
    return ret;
}
void LpProjector::proj_lpball_newton_normalized(const double *y, double *xout,
					      double p,int &numiter){
  double normF,normz0,mu,chi;
  double tol=1e-15;
  numiter=0;
  // special case p=2, p=Inf
  if(p==2.0){
    radial_lp_project(y,xout,N,p);
    return;
  }
  if (p==Inf){
    l_infinity_project(y,xout,N);
    return;
  }
  
  //////////////////////////////////////////////////
  // Initialization
  // xn1 : radial projection onto Lp ball
  // xn2 : L\infty projection followed by radial projection
  // pick the one closest to y
  //////////////////////////////////////////////////
  radial_lp_project(y,xn1,N,p);
  l_infinity_project(y,xn2,N);
  radial_lp_project(xn2,xn2,N,p);
  if (dpnorm(xn1,y,N,2.0) < dpnorm(xn2,y,N,2.0)) {
    vcopy(xn1,z,N);  // initialize with xn1
  } 
  else {
    vcopy(xn2,z,N); // initialize with xn2
  }
  // initialize lagrange multiplier coordinate with least squares fit
  z[N]=lsq_lambda_init(z,y,p);

  // we are initialized!
  normz0=pnorm(z,N+1,2.0);
  normF=tol*normz0+1; 

  while ( (normF/normz0) > tol ) {
    // build residual F
    for (int k=0;k<N;k++){
      zpm1[k]=pow(z[k],p-1);
      F[k]=z[k]+z[N]*zpm1[k]-y[k];
    }
    double szp=0;
    for (int k=0;k<N;k++)
      szp+=pow(z[k],p);
    F[N]=(szp-1)/p;

    normF=pnorm(F,N+1,2.0);

    // build Jacobian matrix J
    for (int k=0;k<N;k++)
      d[k]=1+z[N]*(p-1)*pow(z[k],p-2);

    vdiv(zpm1,d,btwid,N);
    mu=dotp(zpm1,btwid,N);
    chi=-dotp(btwid,F,N); // uses only first N entries of F
    
    for (int k=0;k<N;k++)
      dz[k]=-F[k]/d[k] + btwid[k]*(-F[N]-chi)/mu;
    dz[N]=(chi+F[N])/mu;
    
    for (int k=0;k<N+1;k++)
      z[k]=z[k]+dz[k];

    if (verbose){
      vprint(F,N+1);
      mexPrintf("nF %e\n",normF); }

    numiter++;
    if (numiter>max_numiter)
      mexErrMsgTxt("maximum # of iterations exceeded in proj_lpball_newton\n");
  }
  // answer is first N coordinates of z.
  vcopy(z,xout,N);
}
static bool buildPolyDetail(const float* in, const int nin, unsigned short reg,
							const float sampleDist, const float sampleMaxError,
							const rcCompactHeightfield& chf, const rcHeightPatch& hp,
							float* verts, int& nverts, rcIntArray& tris,
							rcIntArray& edges, rcIntArray& idx, rcIntArray& samples)
{
	static const int MAX_VERTS = 256;
	static const int MAX_EDGE = 64;
	float edge[(MAX_EDGE+1)*3];

	nverts = 0;

	for (int i = 0; i < nin; ++i)
		vcopy(&verts[i*3], &in[i*3]);
	nverts = nin;
	
	const float ics = 1.0f/chf.cs;
	
	// Tesselate outlines.
	// This is done in separate pass in order to ensure
	// seamless height values across the ply boundaries.
	if (sampleDist > 0)
	{
		for (int i = 0, j = nin-1; i < nin; j=i++)
		{
			const float* vj = &in[j*3];
			const float* vi = &in[i*3];
			// Make sure the segments are always handled in same order
			// using lexological sort or else there will be seams.
			if (fabsf(vj[0]-vi[0]) < 1e-6f)
			{
				if (vj[2] > vi[2])
					rcSwap(vj,vi);
			}
			else
			{
				if (vj[0] > vi[0])
					rcSwap(vj,vi);
			}
			// Create samples along the edge.
			float dx = vi[0] - vj[0];
			float dy = vi[1] - vj[1];
			float dz = vi[2] - vj[2];
			float d = sqrtf(dx*dx + dz*dz);
			int nn = 1 + (int)floorf(d/sampleDist);
			if (nn > MAX_EDGE) nn = MAX_EDGE;
			if (nverts+nn >= MAX_VERTS)
				nn = MAX_VERTS-1-nverts;
			for (int k = 0; k <= nn; ++k)
			{
				float u = (float)k/(float)nn;
				float* pos = &edge[k*3];
				pos[0] = vj[0] + dx*u;
				pos[1] = vj[1] + dy*u;
				pos[2] = vj[2] + dz*u;
				pos[1] = chf.bmin[1] + getHeight(pos, chf.bmin, ics, hp)*chf.ch;
			}
			// Simplify samples.
			int idx[MAX_EDGE] = {0,nn};
			int nidx = 2;
			for (int k = 0; k < nidx-1; )
			{
				const int a = idx[k];
				const int b = idx[k+1];
				const float* va = &edge[a*3];
				const float* vb = &edge[b*3];
				// Find maximum deviation along the segment.
				float maxd = 0;
				int maxi = -1;
				for (int m = a+1; m < b; ++m)
				{
					float d = distancePtSeg(&edge[m*3],va,vb);
					if (d > maxd)
					{
						maxd = d;
						maxi = m;
					}
				}
				// If the max deviation is larger than accepted error,
				// add new point, else continue to next segment.
				if (maxi != -1 && maxd > rcSqr(sampleMaxError))
				{
					for (int m = nidx; m > k; --m)
						idx[m] = idx[m-1];
					idx[k+1] = maxi;
					nidx++;
				}
				else
				{
					++k;
				}
			}
			// Add new vertices.
			for (int k = 1; k < nidx-1; ++k)
			{
				vcopy(&verts[nverts*3], &edge[idx[k]*3]);
				nverts++;
			}
		}
	}
	
	// Tesselate the base mesh.
	edges.resize(0);
	tris.resize(0);
	idx.resize(0);
	delaunay(nverts, verts, idx, tris, edges);

	if (sampleDist > 0)
	{
		// Create sample locations in a grid.
		float bmin[3], bmax[3];
		vcopy(bmin, in);
		vcopy(bmax, in);
		for (int i = 1; i < nin; ++i)
		{
			vmin(bmin, &in[i*3]);
			vmax(bmax, &in[i*3]);
		}
		int x0 = (int)floorf(bmin[0]/sampleDist);
		int x1 = (int)ceilf(bmax[0]/sampleDist);
		int z0 = (int)floorf(bmin[2]/sampleDist);
		int z1 = (int)ceilf(bmax[2]/sampleDist);
		samples.resize(0);
		for (int z = z0; z < z1; ++z)
		{
			for (int x = x0; x < x1; ++x)
			{
				float pt[3];
				pt[0] = x*sampleDist;
				pt[2] = z*sampleDist;
				// Make sure the samples are not too close to the edges.
				if (distToPoly(nin,in,pt) > -sampleDist/2) continue;
				samples.push(x);
				samples.push(getHeight(pt, chf.bmin, ics, hp));
				samples.push(z);
			}
		}
				
		// Add the samples starting from the one that has the most
		// error. The procedure stops when all samples are added
		// or when the max error is within treshold.
		const int nsamples = samples.size()/3;
		for (int iter = 0; iter < nsamples; ++iter)
		{
			// Find sample with most error.
			float bestpt[3];
			float bestd = 0;
			for (int i = 0; i < nsamples; ++i)
			{
				float pt[3];
				pt[0] = samples[i*3+0]*sampleDist;
				pt[1] = chf.bmin[1] + samples[i*3+1]*chf.ch;
				pt[2] = samples[i*3+2]*sampleDist;
				float d = distToTriMesh(pt, verts, nverts, &tris[0], tris.size()/4);
				if (d < 0) continue; // did not hit the mesh.
				if (d > bestd)
				{
					bestd = d;
					vcopy(bestpt,pt);
				}
			}
			// If the max error is within accepted threshold, stop tesselating.
			if (bestd <= sampleMaxError)
				break;

			// Add the new sample point.
			vcopy(&verts[nverts*3],bestpt);
			nverts++;
			
			// Create new triangulation.
			// TODO: Incremental add instead of full rebuild.
			edges.resize(0);
			tris.resize(0);
			idx.resize(0);
			delaunay(nverts, verts, idx, tris, edges);

			if (nverts >= MAX_VERTS)
				break;
		}
	}

	return true;
}
Example #12
0
/********************************************************************
This is real GEMM kernel
********************************************************************/
bool ialglib::_i_rmatrixgemmf(int m,
     int n,
     int k,
     double alpha,
     const ap::real_2d_array& _a,
     int ia,
     int ja,
     int optypea,
     const ap::real_2d_array& _b,
     int ib,
     int jb,
     int optypeb,
     double beta,
     ap::real_2d_array& _c,
     int ic,
     int jc)
{
    if( m>alglib_r_block || n>alglib_r_block || k>alglib_r_block )
        return false;

    int i, stride, cstride;
    double *crow;
    double __abuf[alglib_r_block+alglib_simd_alignment];
    double __b[alglib_r_block*alglib_r_block+alglib_simd_alignment];
    double * const abuf = (double * const) alglib_align(__abuf,alglib_simd_alignment);
    double * const b    = (double * const) alglib_align(__b,   alglib_simd_alignment);

    //
    // copy b
    //
    if( optypeb==0 )
        mcopyblock(k, n, &_b(ib,jb), 1, _b.getstride(), b);
    else
        mcopyblock(n, k, &_b(ib,jb), 0, _b.getstride(), b);

    //
    // multiply B by A (from the right, by rows)
    // and store result in C
    //
    crow  = &_c(ic,jc);
    stride = _a.getstride();
    cstride = _c.getstride();
    if( optypea==0 )
    {
        const double *arow = &_a(ia,ja);
        for(i=0; i<m; i++)
        {
            vcopy(k, arow, 1, abuf, 1);
            if( beta==0 )
                vzero(n, crow, 1);
            mv(n, k, b, abuf, crow, 1, alpha, beta);
            crow += cstride;
            arow += stride;
        }
    }
    else
    {
        const double *acol = &_a(ia,ja);
        for(i=0; i<m; i++)
        {
            vcopy(k, acol, stride, abuf, 1);
            if( beta==0 )
                vzero(n, crow, 1);
            mv(n, k, b, abuf, crow, 1, alpha, beta);
            crow += cstride;
            acol++;
        }
    }
    return true;
}
Example #13
0
File: skin.c Project: Lenbok/dormin
static void translate (State *s, float *vdst, float *ndst)
{
    int i, j;
    struct abone *b;
    float *vsrc = s->ptrs[0];
    float *nsrc =
        (float *) ((char *) vsrc + AL32 (s->num_vertices * 3 * sizeof (GLfloat)));
    struct skin *skin = s->skin;

#ifdef TIMING
    double S = now (), E;
#endif

#ifdef USE_ALTIVEC
    vector unsigned char p0 =
        { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19 };
    vector unsigned char p1 =
        { 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19, 20, 21, 22, 23 };
    vector unsigned char p2 =
        { 8, 9, 10, 11, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27 };

    for (i = 0, j = 0; i < s->num_vertices >> 2; ++i, j += 48) {
        vector float v0, v1, v2, n0, n1, n2;
        vector float vx, vy, vz, nx, ny, nz;
        vector float vr0, vr1, vr2, vr3;
        vector float nr0, nr1, nr2, nr3;

#ifdef G4
        if (!(i & 3)) {
            DCB (dcbz, vdst, j);
            DCB (dcbz, ndst, j);
        }

        DCB (dcbz, vdst, j + 32);
        DCB (dcbz, ndst, j + 32);
#endif

        DCB (dcbt, skin, 0);
        DCB (dcbt, skin + 1, 0);
        DCB (dcbt, skin + 2, 0);
        DCB (dcbt, skin + 3, 0);

        DCB (dcbt, vsrc, j + 64);
        DCB (dcbt, nsrc, j + 64);
        DCB (dcbt, vsrc, j + 96);
        DCB (dcbt, nsrc, j + 96);

        /* Load */
        v0 = vec_ld (j, vsrc);
        v1 = vec_ld (j + 16, vsrc);
        v2 = vec_ld (j + 32, vsrc);
        n0 = vec_ld (j, nsrc);
        n1 = vec_ld (j + 16, nsrc);
        n2 = vec_ld (j + 32, nsrc);

        /* First vertex/normal */
        vx = vec_splat (v0, 0);
        vy = vec_splat (v0, 1);
        vz = vec_splat (v0, 2);
        nx = vec_splat (n0, 0);
        ny = vec_splat (n0, 1);
        nz = vec_splat (n0, 2);

        vr0 = appbones (s, skin, vx, vy, vz, nx, ny, nz, &nr0);
        skin++;

        /* Second vertex/normal */
        vx = vec_splat (v0, 3);
        vy = vec_splat (v1, 0);
        vz = vec_splat (v1, 1);
        nx = vec_splat (n0, 3);
        ny = vec_splat (n1, 0);
        nz = vec_splat (n1, 1);

        vr1 = appbones (s, skin, vx, vy, vz, nx, ny, nz, &nr1);
        skin++;

        /* Third vertex/normal */
        vx = vec_splat (v1, 2);
        vy = vec_splat (v1, 3);
        vz = vec_splat (v2, 0);
        nx = vec_splat (n1, 2);
        ny = vec_splat (n1, 3);
        nz = vec_splat (n2, 0);

        vr2 = appbones (s, skin, vx, vy, vz, nx, ny, nz, &nr2);
        skin++;

        /* Fourth vertex/normal */
        vx = vec_splat (v2, 1);
        vy = vec_splat (v2, 2);
        vz = vec_splat (v2, 3);
        nx = vec_splat (n2, 1);
        ny = vec_splat (n2, 2);
        nz = vec_splat (n2, 3);

        vr3 = appbones (s, skin, vx, vy, vz, nx, ny, nz, &nr3);
        skin++;

        /* Assemble */
        v0 = vec_perm (vr0, vr1, p0);
        v1 = vec_perm (vr1, vr2, p1);
        v2 = vec_perm (vr2, vr3, p2);

        n0 = vec_perm (nr0, nr1, p0);
        n1 = vec_perm (nr1, nr2, p1);
        n2 = vec_perm (nr2, nr3, p2);

        /* Store */
        vec_st (v0, j, vdst);
        vec_st (v1, j + 16, vdst);
        vec_st (v2, j + 32, vdst);

        vec_st (n0, j, ndst);
        vec_st (n1, j + 16, ndst);
        vec_st (n2, j + 32, ndst);
    }

    i <<= 2;
    vsrc += i*3;
    nsrc += i*3;
    vdst += i*3;
    ndst += i*3;
#else
    i = 0;
#endif

    for (; i < s->num_vertices; ++i, vsrc += 3, nsrc += 3, vdst += 3, ndst += 3,
             ++skin)
    {
        int num_bones, bone_index;
        float v[3] = {0,0,0}, n[3] = {0,0,0}, v0[4], v1[4], w;

        num_bones = skin->boneinfo & 3;
        bone_index = skin->boneinfo >> 2;
        for (j = 0; j < num_bones; ++j) {
            w = skin->weights[j];
            b = &s->abones[bone_index & 0x3ff];
            bone_index >>= 10;

            mapply_to_point (v1, b->cm, vsrc);
            v1[0] *= w;
            v1[1] *= w;
            v1[2] *= w;

            mapply_to_vector (v0, b->cm, nsrc);
            v0[0] *= w;
            v0[1] *= w;
            v0[2] *= w;

            vaddto (v, v1);
            vaddto (n, v0);
        }

        vcopy (vdst, v);
        vcopy (ndst, n);
    }

#ifdef TIMING
    E = now ();
    printf ("took %f sec\n", E - S);
#endif
}
Example #14
0
void
hunt_problem(Comm_Ex *cx,	/* array of communications structures */
	     Exo_DB *exo,	/* ptr to the finite element mesh database */
	     Dpi *dpi)	        /* distributed processing information */
{
  int    *ija=NULL;           /* column pointer array                         */
  double *a=NULL;             /* nonzero array                                */
  double *a_old=NULL;         /* nonzero array                                */
  double *x=NULL;             /* solution vector                              */

  int     iAC;                /* COUNTER                                      */
  double *x_AC = NULL;        /* SOLUTION VECTOR OF EXTRA UNKNOWNS            */
  double *x_AC_old=NULL;      /* old SOLUTION VECTOR OF EXTRA UNKNOWNS        */
  double *x_AC_dot = NULL; 

  int     iHC;                /* COUNTER                                      */
  
  int    *ija_attic=NULL;     /* storage for external dofs                    */

  int eb_indx, ev_indx;

  /* 
   * variables for path traversal 
   */
  
  double *x_old=NULL;         /* old solution vector                          */
  double *x_older=NULL;       /* older solution vector                        */
  double *x_oldest=NULL;      /* oldest solution vector saved                 */
  double *xdot=NULL;          /* current path derivative of soln              */
  double *xdot_old=NULL;
  double *x_update=NULL;

  double *x_sens=NULL;        /* solution sensitivity */
  double **x_sens_p=NULL;     /* solution sensitivity for parameters */
  int num_pvector=0;          /*  number of solution sensitivity vectors */
#ifdef COUPLED_FILL
  struct Aztec_Linear_Solver_System *ams[NUM_ALSS]={NULL}; 
#else /* COUPLED_FILL */
  struct Aztec_Linear_Solver_System *ams[NUM_ALSS]={NULL, NULL}; 
#endif /* COUPLED_FILL */
                              /* sl_util_structs.h */

  double *resid_vector=NULL;  /* residual */
  double *resid_vector_sens=NULL;    /* residual sensitivity */
  double *scale=NULL;      /* scale vector for modified newton */

  int 	 *node_to_fill = NULL;	

  int		n;            /* total number of path steps attempted */
  int		ni;           /* total number of nonlinear solves */
  int		nt;           /* total number of successful path steps */
  int		path_step_reform; /* counter for jacobian reformation stride */
  int		converged;    /* success or failure of Newton iteration */
  int		success_ds;   /* success or failure of path step */

  int           i;

  int           nprint=0, num_total_nodes;

  int           numProcUnknowns;
  int           *const_delta_s=NULL;
  int           step_print;
  double        i_print;
  int good_mesh = TRUE;
  double	*path=NULL, *path1=NULL;
  double	*delta_s=NULL, *delta_s_new=NULL, *delta_s_old=NULL;
  double        *delta_s_older=NULL, *delta_s_oldest=NULL;
  double        *hDelta_s0=NULL, *hDelta_s_min=NULL, *hDelta_s_max=NULL;
  double        delta_t;
  double	theta=0.0;
  double        damp;
  double        eps;
  double        *lambda=NULL, *lambdaEnd=NULL;
  double	hunt_par, dhunt_par, hunt_par_old;	/* hunting continuation parameter */
  double        timeValueRead = 0.0;

  /* 
   * ALC management variables
   */

  int           alqALC;
  int           *aldALC=NULL; 

  /*
   * Other local variables 
   */
  
  int	        error, err, is_steady_state, inewton;
  int 		*gindex = NULL, gsize;
  int		*p_gsize=NULL;
  double	*gvec=NULL;
  double        ***gvec_elem;
  double	err_dbl;
  FILE          *file=NULL;
  double 	toler_org[3],damp_org;
  
  struct Results_Description  *rd=NULL;
  
  int		tnv;		/* total number of nodal variables and kinds */
  int		tev;		/* total number of elem variables and kinds */
  int		tnv_post;	/* total number of nodal variables and kinds 
					   for post processing */
  int		tev_post;	/* total number of elem variables and kinds 
					   for post processing */

  int max_unk_elem, one, three; /* variables used as mf_setup arguments*/

  unsigned int
  matrix_systems_mask;

  double evol_local=0.0;
#ifdef PARALLEL
  double evol_global=0.0;
#endif

  static char yo[]="hunt_problem"; 

  /*
   * 		BEGIN EXECUTION
   */

#ifdef DEBUG
  fprintf(stderr, "hunt_problem() begins...\n");
#endif

  toler_org[0] = custom_tol1;
  toler_org[1] = custom_tol2;
  toler_org[2] = custom_tol3;
  damp_org = damp_factor1;

  is_steady_state = TRUE;

  p_gsize = &gsize;
  
  /* 
   * set aside space for gather global vectors to print to exoII file
   * note: this is temporary
   *
   * For 2D prototype problem:  allocate space for T, dx, dy arrays
   */

  if( strlen( Soln_OutFile)  )
    {
#ifdef DEBUG
      printf("Trying to open \"%s\" for writing.\n", Soln_OutFile);
#endif
      file = fopen(Soln_OutFile, "w");
      if (file == NULL)  {
	DPRINTF(stderr, "%s:  opening soln file for writing\n", yo);
        EH(-1, "\t");
      }
    }
#ifdef PARALLEL
  check_parallel_error("Soln output file error");
#endif

  /*
   * Some preliminaries to help setup EXODUS II database output.
   */

#ifdef DEBUG
  fprintf(stderr, "cnt_nodal_vars() begins...\n");
#endif

  tnv = cnt_nodal_vars();
  /*  tnv_post is calculated in load_nodal_tkn*/
  tev = cnt_elem_vars();
  /*  tev_post is calculated in load_elem_tkn*/
  
#ifdef DEBUG
  fprintf(stderr, "Found %d total primitive nodal variables to output.\n", tnv);
  fprintf(stderr, "Found %d total primitive elem variables to output.\n", tev);
#endif
  
  if ( tnv < 0 )
    {
      DPRINTF(stderr, "%s:\tbad tnv.\n", yo);
      EH(-1, "\t");
    }

  if ( tev < 0 )
    {
      DPRINTF(stderr, "%s:\tMaybe bad tev? See goma design committee ;) \n", yo);
/*       exit(-1); */
    }
  
  rd = (struct Results_Description *) 
    smalloc(sizeof(struct Results_Description));

  if (rd == NULL) 
    { EH(-1, "Could not grab Results Description."); }
  (void) memset((void *) rd, 0, sizeof(struct Results_Description));
  
  rd->nev = 0;			/* number element variables in results */
  rd->ngv = 0;			/* number global variables in results */
  rd->nhv = 0;			/* number history variables in results */
  
  if ( is_steady_state == TRUE ) {
    rd->ngv = 5;			/* number global variables in results 
					   see load_global_var_info for names*/
    error = load_global_var_info(rd, 0, "CONV");
    error = load_global_var_info(rd, 1, "NEWT_IT");
    error = load_global_var_info(rd, 2, "MAX_IT");
    error = load_global_var_info(rd, 3, "CONVRATE");
    error = load_global_var_info(rd, 4, "MESH_VOLUME");
  }
  
  /* load nodal types, kinds, names */
  error = load_nodal_tkn( rd,
			  &tnv,
			  &tnv_post); /* load nodal types, kinds, names */
  
  if (error !=0)
    {
      DPRINTF(stderr, "%s:  problem with load_nodal_tkn()\n", yo);
      EH(-1,"\t");
    }

  /* load elem types, names */
  error = load_elem_tkn( rd,
			 exo,
			 tev, 
			 &tev_post); /* load elem types, names */
  
  if ( error !=0 )
    {
      DPRINTF(stderr, "%s:  problem with load_elem_tkn()\n", yo);
      EH(-1,"\t");
    }

  /* 
   * Write out the names of the nodal variables that we will be sending to
   * the EXODUS II output file later.
   */

#ifdef DEBUG
  fprintf(stderr, "wr_result_prelim() starts...\n", tnv);
#endif

  gvec_elem = (double ***) smalloc ( (exo->num_elem_blocks)*sizeof(double **));
  for (i = 0; i < exo->num_elem_blocks; i++) {
    gvec_elem[i] = (double **) smalloc ( (tev + tev_post)*sizeof(double *));
  }

  wr_result_prelim_exo( rd, 
                        exo, 
                        ExoFileOut,
                        gvec_elem );

#ifdef DEBUG
  fprintf(stderr, "P_%d: wr_result_prelim_exo() ends...\n", ProcID, tnv);
#endif

  /* 
   * This gvec workhorse transports output variables as nodal based vectors
   * that are gather from the solution vector. Note: it is NOT a global
   * vector at all and only carries this processor's nodal variables to
   * the exodus database.
   */

  asdv(&gvec, Num_Node);

  /*
   * Allocate space and manipulate for all the nodes that this processor
   * is aware of...
   */

  num_total_nodes = dpi->num_universe_nodes;

  numProcUnknowns = NumUnknowns + NumExtUnknowns;

  /* allocate memory for Volume Constraint Jacobian. ACS 2/99 */

  if ( nAC > 0)
    {
      for(iAC=0;iAC<nAC;iAC++) {
	augc[iAC].d_evol_dx = (double*) malloc(numProcUnknowns*sizeof(double));
      } }
  
  asdv(&resid_vector, numProcUnknowns);
  asdv(&resid_vector_sens, numProcUnknowns);
  asdv(&scale, numProcUnknowns);

  for (i=0;i<NUM_ALSS;i++) 
    {
      ams[i] = (struct Aztec_Linear_Solver_System *) 
	array_alloc(1, 1, sizeof(struct Aztec_Linear_Solver_System )); 
    }

#ifdef MPI
  AZ_set_proc_config( ams[0]->proc_config, MPI_COMM_WORLD );
#ifndef COUPLED_FILL
  if( Explicit_Fill ) AZ_set_proc_config( ams[1]->proc_config, MPI_COMM_WORLD );
#endif /* not COUPLED_FILL */
#else /* MPI */
  AZ_set_proc_config( ams[0]->proc_config, 0 );
#ifndef COUPLED_FILL
  if( Explicit_Fill ) AZ_set_proc_config( ams[1]->proc_config, 0 );
#endif /* not COUPLED_FILL */
#endif /* MPI */

  /*
   * allocate space for and initialize solution arrays
   */

  asdv(&x,        numProcUnknowns);
  asdv(&x_old,    numProcUnknowns);
  asdv(&x_older,  numProcUnknowns);
  asdv(&x_oldest, numProcUnknowns);
  asdv(&xdot,     numProcUnknowns);
  asdv(&xdot_old, numProcUnknowns);
  asdv(&x_update, numProcUnknowns);

  asdv(&x_sens, numProcUnknowns);

  /*
   * Initialize solid inertia flag
   */
  set_solid_inertia();

  /*
   * ALLOCATE ALL THOSE WORK VECTORS FOR HUNTING
   */

  asdv(&lambda,         nHC);
  asdv(&lambdaEnd,      nHC);
  asdv(&path,           nHC);
  asdv(&path1,          nHC);
  asdv(&hDelta_s0,      nHC);
  asdv(&hDelta_s_min,   nHC);
  asdv(&hDelta_s_max,   nHC);
  asdv(&delta_s,        nHC);
  asdv(&delta_s_new,    nHC);
  asdv(&delta_s_old,    nHC);
  asdv(&delta_s_older,  nHC);
  asdv(&delta_s_oldest, nHC);

  aldALC        = Ivector_birth(nHC);
  const_delta_s = Ivector_birth(nHC);

  /*

   HUNTING BY ZERO AND FIRST ORDER CONTINUATION

  */

  alqALC = 1;

  damp = 1.0;

  delta_t = 0.0;
  tran->delta_t = 0.0;      /*for Newmark-Beta terms in Lagrangian Solid*/

  nprint = 0;

  MaxPathSteps      = cont->MaxPathSteps;
  eps               = cont->eps;

  for (iHC=0;iHC<nHC;iHC++) {

    const_delta_s[iHC] = 0;

    lambda[iHC]       = hunt[iHC].BegParameterValue;
    lambdaEnd[iHC]    = hunt[iHC].EndParameterValue;

    if ((lambdaEnd[iHC]-lambda[iHC]) > 0.0)
    {
      aldALC[iHC] = +1;
    }
    else
    {
      aldALC[iHC] = -1;
    } 

    if (hunt[iHC].ramp == 1) {
      hunt[iHC].Delta_s0 = fabs(lambdaEnd[iHC]-lambda[iHC])/((double)(MaxPathSteps-1));
      const_delta_s[iHC] = 1;
    }

    hDelta_s0[iHC]     = hunt[iHC].Delta_s0;
    hDelta_s_min[iHC]  = hunt[iHC].Delta_s_min;
    hDelta_s_max[iHC]  = hunt[iHC].Delta_s_max;

    path[iHC] = path1[iHC] = lambda[iHC];

    if (Debug_Flag && ProcID == 0) {
      fprintf(stderr,"MaxPathSteps: %d \tlambdaEnd: %f\n", MaxPathSteps, lambdaEnd[iHC]);
      fprintf(stderr,"continuation in progress\n");
    }

    if (hDelta_s0[iHC] > hDelta_s_max[iHC]) 
    {
      hDelta_s0[iHC] = hDelta_s_max[iHC];
    }

    delta_s[iHC] = delta_s_old[iHC] = delta_s_older[iHC] = hDelta_s0[iHC];
      
    /*
     * ADJUST NATURAL PARAMETER
     */
	
    update_parameterHC(iHC, path1[iHC], x, xdot, x_AC, delta_s[iHC], cx, exo, dpi); 
  }

  /*  define continuation parameter */

  if(hunt[0].EndParameterValue == hunt[0].BegParameterValue)
 	{	hunt_par = 1.0;	}
  else
 	{
	  hunt_par = (path1[0]-hunt[0].BegParameterValue)
	      /(hunt[0].EndParameterValue - hunt[0].BegParameterValue)  ;
          hunt_par=fabs(hunt_par);
 	}
  hunt_par_old = hunt_par;

  /* Call prefront (or mf_setup) if necessary */
  if (Linear_Solver == FRONT)
  {
    /* Also got to define these because it wants pointers to these numbers */
	  
    max_unk_elem = (MAX_PROB_VAR + MAX_CONC)*MDE;
    one = 1;
    three = 3;

    /* NOTE: We need a overall flag in the vn_glob struct that tells whether FULL_DG
       is on anywhere in domain.  This assumes only one material.  See sl_front_setup for test.
       that test needs to be in the input parser.  */

    if(vn_glob[0]->dg_J_model == FULL_DG) 
    {
      max_unk_elem = (MAX_PROB_VAR + MAX_CONC)*MDE + 4*vn_glob[0]->modes*4*MDE;
    }

    if (Num_Proc > 1) EH(-1, "Whoa.  No front allowed with nproc>1");  
	  
#ifdef HAVE_FRONT  
    err = mf_setup(&exo->num_elems, 
		   &NumUnknowns, 
		   &max_unk_elem, 
		   &three,
		   &one,
		   exo->elem_order_map,
		   fss->el_proc_assign,
		   fss->level,
		   fss->nopdof,
		   fss->ncn,
		   fss->constraint,
		   front_scratch_directory,
		   &fss->ntra);
    EH(err,"problems in frontal setup ");

#else
    EH(-1,"Don't have frontal solver compiled and linked in");
#endif
  }


  /*
         *  if compute parameter sensitivities, allocate space for solution
         *  sensitivity vectors
         */

        for(i=0;i<nn_post_fluxes_sens;i++)      {
          num_pvector=MAX(num_pvector,pp_fluxes_sens[i]->vector_id);}
        for(i=0;i<nn_post_data_sens;i++)        {
          num_pvector=MAX(num_pvector,pp_data_sens[i]->vector_id);}

  if((nn_post_fluxes_sens + nn_post_data_sens) > 0)
  {
    num_pvector++;
    num_pvector = MAX(num_pvector,2);
        x_sens_p = Dmatrix_birth(num_pvector,numProcUnknowns);
  }
  else
  {
    x_sens_p = NULL;
  }


  if (nAC > 0)
  {
    asdv(&x_AC, nAC);
    asdv(&x_AC_old, nAC);
    asdv(&x_AC_dot, nAC);
  }

  /* Allocate sparse matrix */

  if( strcmp( Matrix_Format, "msr" ) == 0)
  {
    log_msg("alloc_MSR_sparse_arrays...");
    alloc_MSR_sparse_arrays(&ija, 
			    &a, 
			    &a_old, 
			    0, 
			    node_to_fill, 
			    exo, 
			    dpi);
    /*
     * An attic to store external dofs column names is needed when
     * running in parallel.
     */

    alloc_extern_ija_buffer(num_universe_dofs, 
			    num_internal_dofs+num_boundary_dofs, 
			    ija, &ija_attic);
    /*
     * Any necessary one time initialization of the linear
     * solver package (Aztec).
     */
      
    ams[JAC]->bindx   = ija;
    ams[JAC]->val     = a;
    ams[JAC]->belfry  = ija_attic;
    ams[JAC]->val_old = a_old;
	  
    /*
     * These point to nowhere since we're using MSR instead of VBR
     * format.
     */
      
    ams[JAC]->indx  = NULL;
    ams[JAC]->bpntr = NULL;
    ams[JAC]->rpntr = NULL;
    ams[JAC]->cpntr = NULL;
    ams[JAC]->npn      = dpi->num_internal_nodes + dpi->num_boundary_nodes;
    ams[JAC]->npn_plus = dpi->num_internal_nodes + dpi->num_boundary_nodes + dpi->num_external_nodes;

    ams[JAC]->npu      = num_internal_dofs+num_boundary_dofs;
    ams[JAC]->npu_plus = num_universe_dofs;

    ams[JAC]->nnz = ija[num_internal_dofs+num_boundary_dofs] - 1;
    ams[JAC]->nnz_plus = ija[num_universe_dofs];

  }
  else if(  strcmp( Matrix_Format, "vbr" ) == 0)
  {
    log_msg("alloc_VBR_sparse_arrays...");
    alloc_VBR_sparse_arrays ( ams[JAC],
			      exo,
			      dpi);
    ija_attic = NULL;
    ams[JAC]->belfry  = ija_attic;

    a = ams[JAC]->val;
    if( !save_old_A ) a_old = ams[JAC]->val_old;
  }
  else if ( strcmp( Matrix_Format, "front") == 0 )
    {
      /* Don't allocate any sparse matrix space when using front */
      ams[JAC]->bindx   = NULL;
      ams[JAC]->val     = NULL;
      ams[JAC]->belfry  = NULL;
      ams[JAC]->val_old = NULL;
      ams[JAC]->indx  = NULL;
      ams[JAC]->bpntr = NULL;
      ams[JAC]->rpntr = NULL;
      ams[JAC]->cpntr = NULL;

    }
  else
  {
    EH(-1,"Attempted to allocate unknown sparse matrix format");
  }

  init_vec(x, cx, exo, dpi, x_AC, nAC, &timeValueRead);

/*  if read ACs, update data floats */
  if (nAC > 0)
  {
    if(augc[0].iread == 1)
      {
	for(iAC=0 ; iAC<nAC ; iAC++)	
	  { update_parameterAC(iAC, x, xdot, x_AC, cx, exo, dpi); }
      }
  }


  /* 
       * set boundary conditions on the initial conditions 
       */

  find_and_set_Dirichlet(x, xdot, exo, dpi);

  exchange_dof(cx, dpi, x);

  dcopy1(numProcUnknowns,x,x_old);
  dcopy1(numProcUnknowns,x_old,x_older);
  dcopy1(numProcUnknowns,x_older,x_oldest);

  if( nAC > 0)
  {
    dcopy1(nAC,x_AC, x_AC_old);}

  /* 
       * initialize the counters for when to print out data 
       */

  step_print = 1;

  matrix_systems_mask = 1;
      
  log_msg("sl_init()...");
  sl_init(matrix_systems_mask, ams, exo, dpi, cx);

#ifdef PARALLEL
  /*
  * Make sure the solver was properly initialized on all processors.
  */
  check_parallel_error("Solver initialization problems");
#endif

      ams[JAC]->options[AZ_keep_info] = 1;

    DPRINTF(stderr, "\nINITIAL ELEMENT QUALITY CHECK---\n");
    good_mesh = element_quality(exo, x, ams[0]->proc_config);

  /* 
       * set the number of successful path steps to zero 
       */

  nt = 0;   

  /* 
       * LOOP THROUGH PARAMETER UNTIL MAX NUMBER 
       * OF STEPS SURPASSED
       */

  for (n=0;n<MaxPathSteps;n++) {

    alqALC = 1;

    for (iHC=0;iHC<nHC;iHC++) {
	
      switch (aldALC[iHC]) {
      case -1: /* REDUCING PARAMETER DIRECTION */
	  if (path1[iHC] <= lambdaEnd[iHC]) { 
	    alqALC = -1;
	    path1[iHC] = lambdaEnd[iHC];
	    delta_s[iHC] = path[iHC]-path1[iHC];
	  } 
	  break;
      case +1: /* RISING PARAMETER DIRECTION */
	  if (path1[iHC] >= lambdaEnd[iHC]) { 
	    alqALC = -1;
	    path1[iHC] = lambdaEnd[iHC];
	    delta_s[iHC] = path1[iHC]-path[iHC];
	  } 
	  break;
      }

      /*
       * ADJUST NATURAL PARAMETER
       */

      update_parameterHC(iHC, path1[iHC], x, xdot, x_AC, delta_s[iHC], cx, exo, dpi); 
    }   /*  end of iHC loop */

  	if(hunt[0].EndParameterValue == hunt[0].BegParameterValue)
 		{	hunt_par = 1.0;	}
	else
 		{
		  hunt_par = (path1[0]-hunt[0].BegParameterValue)
		      /(hunt[0].EndParameterValue - hunt[0].BegParameterValue)  ;
                  hunt_par=fabs(hunt_par);
 		}

    /*
     * IF STEP CHANGED, REDO FIRST ORDER PREDICTION
     */

    if(alqALC == -1)
    {
      DPRINTF(stderr,"\n\t ******** LAST PATH STEP!\n");
      dcopy1(numProcUnknowns,x_old,x);

      dhunt_par = hunt_par-hunt_par_old;
      switch (Continuation) {
      case HUN_ZEROTH:
          break;
      case  HUN_FIRST:
          v1add(numProcUnknowns, &x[0], dhunt_par, &x_sens[0]);
	  break;
      }
    }

    /* 
     * reset Dirichlet condition Mask, node->DBC to -1 where it
     * is set in order for Dirichlet conditions to be 
     * set appropriately for each path step 
     */
	  
    nullify_dirichlet_bcs();
	  
    find_and_set_Dirichlet (x, xdot, exo, dpi); 

    exchange_dof(cx, dpi, x);

    if(ProcID ==0) {
      DPRINTF(stderr, "\n\t----------------------------------");
      switch (Continuation) {
      case HUN_ZEROTH:
	  DPRINTF(stderr, "\n\tZero Order Hunting:");
	  break;
      case  HUN_FIRST:
	  DPRINTF(stderr, "\n\tFirst Order Hunting:");
	  break; }
      DPRINTF(stderr, "\n\tStep number: %4d of %4d (max)", n+1, MaxPathSteps);
      DPRINTF(stderr, "\n\tAttempting solution at: theta = %g",hunt_par);
      for (iHC=0;iHC<nHC;iHC++) {
	switch (hunt[iHC].Type) {
	case 1: /* BC */
	    DPRINTF(stderr, "\n\tBCID=%3d DFID=%5d", hunt[iHC].BCID, hunt[iHC].DFID);
	    break;
	case 2: /* MT */
	    DPRINTF(stderr, "\n\tMTID=%3d MPID=%5d", hunt[iHC].MTID, hunt[iHC].MPID);
	    break;
 	case 3: /* AC */
 	    DPRINTF(stderr, "\n\tACID=%3d DFID=%5d", hunt[iHC].BCID, hunt[iHC].DFID);
 	    break;
	}
	DPRINTF(stderr, " Parameter= % 10.6e delta_s= %10.6e", path1[iHC], delta_s[iHC]);
      }
    }
	
    ni = 0;
    do {

#ifdef DEBUG
      fprintf(stderr, "%s: starting solve_nonlinear_problem\n", yo);
#endif
      err = solve_nonlinear_problem(ams[JAC], 
				    x, 
				    delta_t, 
				    theta,
				    x_old,
				    x_older, 
				    xdot,
				    xdot_old,
				    resid_vector,
				    x_update,
				    scale, 
				    &converged, 
				    &nprint, 
				    tev, 
				    tev_post,
				    NULL,
				    rd,
				    gindex,
				    p_gsize,
				    gvec, 
				    gvec_elem, 
 				    path1[0],
				    exo, 
				    dpi, 
				    cx, 
				    0, 
				    &path_step_reform,
				    is_steady_state,
				    x_AC,
 				    x_AC_dot,
				    hunt_par,
				    resid_vector_sens,
				    x_sens,
				    x_sens_p,
                                    NULL);

#ifdef DEBUG
      fprintf(stderr, "%s: returned from solve_nonlinear_problem\n", yo);
#endif

      if (err == -1) converged = 0;
      inewton = err;
      if (converged)
      {
	EH(error, "error writing ASCII soln file."); /* srs need to check */

	if (Write_Intermediate_Solutions == 0) {    
#ifdef DEBUG
	  fprintf(stderr, "%s: write_solution call WIS\n", yo);
#endif
	  write_solution(ExoFileOut, resid_vector, x, x_sens_p, x_old, 
			 xdot, xdot_old, tev, tev_post,NULL,  rd, gindex,
			 p_gsize, gvec, gvec_elem, &nprint, delta_s[0], 
 			 theta, path1[0], NULL, exo, dpi);
#ifdef DEBUG
	  fprintf(stderr, "%s: write_solution end call WIS\n", yo);
#endif
	}

	/*
	 * PRINT OUT VALUES OF EXTRA UNKNOWNS 
	 * FROM AUGMENTING CONDITIONS 
	 */

	if (nAC > 0) 
          {
	    
	    DPRINTF(stderr, "\n------------------------------\n");
	    DPRINTF(stderr, "Augmenting Conditions:    %4d\n", nAC);
	    DPRINTF(stderr, "Number of extra unknowns: %4d\n\n", nAC);

            for (iAC = 0; iAC < nAC; iAC++)
             {
              if (augc[iAC].Type == AC_USERBC)
               {
                DPRINTF(stderr, "\tAC[%4d] DF[%4d] = %10.6e\n",
                        augc[iAC].BCID, augc[iAC].DFID, x_AC[iAC]);
               }
              else if (augc[iAC].Type == AC_USERMAT  ||
                       augc[iAC].Type == AC_FLUX_MAT )
               {
                DPRINTF(stderr, "\n MT[%4d] MP[%4d] = %10.6e\n",
                        augc[iAC].MTID, augc[iAC].MPID, x_AC[iAC]);
               }
              else if(augc[iAC].Type == AC_VOLUME)
               {
                evol_local = augc[iAC].evol;
#ifdef PARALLEL
                if( Num_Proc > 1 ) {
                     MPI_Allreduce( &evol_local, &evol_global, 1,
                                    MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
                }
                evol_local = evol_global;
#endif
                DPRINTF(stderr, "\tMT[%4d] VC[%4d]=%10.6e Param=%10.6e\n",
                        augc[iAC].MTID, augc[iAC].VOLID, evol_local,
                        x_AC[iAC]);
               }
	      else if(augc[iAC].Type == AC_POSITION)
               {
                evol_local = augc[iAC].evol;
#ifdef PARALLEL
                if( Num_Proc > 1 ) {
                     MPI_Allreduce( &evol_local, &evol_global, 1,
                                    MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
                }
                evol_local = evol_global;
#endif
                DPRINTF(stderr, "\tMT[%4d] XY[%4d]=%10.6e Param=%10.6e\n",
                        augc[iAC].MTID, augc[iAC].VOLID, evol_local,
                        x_AC[iAC]);
               }
               else if(augc[iAC].Type == AC_FLUX)
               {
                DPRINTF(stderr, "\tBC[%4d] DF[%4d]=%10.6e\n",
                        augc[iAC].BCID, augc[iAC].DFID, x_AC[iAC]);
               }
             }
	  }

      /* Check element quality */
      good_mesh = element_quality(exo, x, ams[0]->proc_config);

	/*
	     
	  INTEGRATE FLUXES, FORCES  

	*/

	for (i = 0; i < nn_post_fluxes; i++)
	{
	  err_dbl = evaluate_flux ( exo, dpi, 
                                    pp_fluxes[i]->ss_id, 
				    pp_fluxes[i]->flux_type ,
                                    pp_fluxes[i]->flux_type_name ,
				    pp_fluxes[i]->blk_id , 
				    pp_fluxes[i]->species_number, 
				    pp_fluxes[i]->flux_filenm,
                                    pp_fluxes[i]->profile_flag,
 				    x,xdot,NULL,delta_s[0],path1[0],1); 
	}


	/*
	  COMPUTE FLUX, FORCE SENSITIVITIES
	*/


	for (i = 0; i < nn_post_fluxes_sens; i++)
	{
	  err_dbl = evaluate_flux_sens ( exo, dpi,
                                         pp_fluxes_sens[i]->ss_id,
					 pp_fluxes_sens[i]->flux_type ,
                                         pp_fluxes_sens[i]->flux_type_name ,
					 pp_fluxes_sens[i]->blk_id ,
					 pp_fluxes_sens[i]->species_number,
					 pp_fluxes_sens[i]->sens_type,
					 pp_fluxes_sens[i]->sens_id,
					 pp_fluxes_sens[i]->sens_flt,
					 pp_fluxes_sens[i]->sens_flt2,
					 pp_fluxes_sens[i]->vector_id,
					 pp_fluxes_sens[i]->flux_filenm,
                                         pp_fluxes_sens[i]->profile_flag,
 					 x,xdot,x_sens_p,delta_s[0],path1[0],1);
	}
 	/*
      	 * Compute global volumetric quantities
      	 */
     	 for (i = 0; i < nn_volume; i++ ) {
       		evaluate_volume_integral(exo, dpi,
                                pp_volume[i]->volume_type,
                                pp_volume[i]->volume_name,
                                pp_volume[i]->blk_id,
                                pp_volume[i]->species_no,
                                pp_volume[i]->volume_fname,
                                pp_volume[i]->params,
                                NULL,  x, xdot, delta_s[0],
                                path1[0], 1);
     		}

      } /* end of if converged block */

      /*
       * INCREMENT COUNTER
       */
   
      ni++;

      /*
       * 
       * DID IT CONVERGE ? 
       * IF NOT, REDUCE STEP SIZE AND TRY AGAIN
       * 
       */

      if (!converged) {

	if (ni > 10) {
 	  DPRINTF(stderr,"\n ************************************\n");
 	  DPRINTF(stderr," W: Did not converge in Newton steps.\n");
 	  DPRINTF(stderr,"    Find better initial guess.       \n");
 	  DPRINTF(stderr," ************************************\n"); 
 	  exit(0);
	}

        /*
         * ADJUST STEP SIZE - unless failed on first step
         */

        if ( nt != 0 )
        {
	DPRINTF(stderr, "\n\tFailed to converge:\n");

	for (iHC=0;iHC<nHC;iHC++) {

	  delta_s[iHC] *= 0.5;

	  switch (aldALC[iHC]) {
	  case -1: 
	      path1[iHC] = path[iHC] - delta_s[iHC];
	      break;
	  case +1: 
	      path1[iHC] = path[iHC] + delta_s[iHC];
	      break;
	  }

	  /*
	   * RESET
	   */

	  alqALC = 1;

	  DPRINTF(stderr, "Decreasing step-length to %10.6e.\n", delta_s[iHC]);

	  if (delta_s[iHC] < hDelta_s_min[iHC]) {
 	    DPRINTF(stderr,"\n X: C step-length reduced below minimum.");
 	    DPRINTF(stderr,"\n    Program terminated.\n");
	    /* This needs to have a return value of 0, indicating
	     * success, for the continuation script to not treat this
	     * as a failed command. */
	    exit(0);
	  } 
#ifdef PARALLEL
              check_parallel_error("\t");
#endif

	  /*
	   * ADJUST NATURAL PARAMETER
	   */
	    
	  update_parameterHC(iHC, path1[iHC], x, xdot, x_AC, delta_s[iHC], cx, exo, dpi);
	}  /* end of iHC loop  */

  	if(hunt[0].EndParameterValue == hunt[0].BegParameterValue)
 		{	hunt_par = 1.0;	}
	else
 		{
	  	hunt_par = (path1[0]-hunt[0].BegParameterValue)
	     	 /(hunt[0].EndParameterValue - hunt[0].BegParameterValue)  ;
                hunt_par=fabs(hunt_par);
 		}

	/*
	 * GET ZERO OR FIRST ORDER PREDICTION
	 */

	dhunt_par = hunt_par-hunt_par_old;

	switch (Continuation) {
	case HUN_ZEROTH:
	    vcopy(numProcUnknowns, &x[0], 1.0, &x_old[0]);
	    break;
	case  HUN_FIRST:
	    v2sum(numProcUnknowns, &x[0], 1.0, &x_old[0], dhunt_par, &x_sens[0]);
            break;
	}
	
	/* MMH: Needed to put this in, o/w it may find that the
         * solution and residual HAPPEN to satisfy the convergence
         * criterion for the next newton solve...
         */
        find_and_set_Dirichlet(x, xdot, exo, dpi);
	
        exchange_dof(cx, dpi, x);

	if (nAC > 0)
          {
	    dcopy1(nAC, x_AC_old, x_AC);
	    for(iAC=0 ; iAC<nAC ; iAC++)	
	      { update_parameterAC(iAC, x, xdot, x_AC, cx, exo, dpi); }
	  }

  		if(hunt[0].EndParameterValue == hunt[0].BegParameterValue)
 			{	hunt_par = 1.0;	}
  		else
 			{
	  		hunt_par = (path1[0]-hunt[0].BegParameterValue)
	      			/(hunt[0].EndParameterValue - hunt[0].BegParameterValue)  ;
                        hunt_par=fabs(hunt_par);
 			}

 	}
 	else if (inewton == -1)
 	{
 	DPRINTF(stderr,"\nHmm... trouble on first step \n  Let's try some more relaxation  \n");
 	      if((damp_factor1 <= 1. && damp_factor1 >= 0.) &&
 	         (damp_factor2 <= 1. && damp_factor2 >= 0.) &&
        		 (damp_factor3 <= 1. && damp_factor3 >= 0.))
 		{
 		custom_tol1 *= 0.01;
 		custom_tol2 *= 0.01;
 		custom_tol3 *= 0.01;
 	DPRINTF(stderr,"  custom tolerances %g %g %g  \n",custom_tol1,custom_tol2,custom_tol3);
 		}
 		else
 		{
 		damp_factor1 *= 0.5;
 	DPRINTF(stderr,"  damping factor %g  \n",damp_factor1);
 		}
 
 	    vcopy(numProcUnknowns, &x[0], 1.0, &x_old[0]);
 	
 	/* MMH: Needed to put this in, o/w it may find that the
          * solution and residual HAPPEN to satisfy the convergence
          * criterion for the next newton solve...
          */
         find_and_set_Dirichlet(x, xdot, exo, dpi);
 	
         exchange_dof(cx, dpi, x);
 
 
 	if (nAC > 0)
          {
 	    dcopy1(nAC, x_AC_old, x_AC);
 	    for(iAC=0 ; iAC<nAC ; iAC++)	
 	      { update_parameterAC(iAC, x, xdot, x_AC, cx, exo, dpi); }
 	  }
 
 	}
 	else 
 	{
 	DPRINTF(stderr,"\nHmm... could not converge on first step\n Let's try some more iterations\n");
 	      if((damp_factor1 <= 1. && damp_factor1 >= 0.) &&
 	         (damp_factor2 <= 1. && damp_factor2 >= 0.) &&
        		 (damp_factor3 <= 1. && damp_factor3 >= 0.))
 		{
 		custom_tol1 *= 100.;
 		custom_tol2 *= 100.;
 		custom_tol3 *= 100.;
 	DPRINTF(stderr,"  custom tolerances %g %g %g  \n",custom_tol1,custom_tol2,custom_tol3);
 		}
 		else
 		{
 		damp_factor1 *= 2.0;
		damp_factor1 = MIN(damp_factor1,1.0);
 	DPRINTF(stderr,"  damping factor %g  \n",damp_factor1);
 		}
 	}
 

      }  /* end of !converged */

    } while (converged == 0);

    /*
     * CONVERGED
     */
    nt++;
    custom_tol1 = toler_org[0];
    custom_tol2 = toler_org[1];
    custom_tol3 = toler_org[2];
    damp_factor1 = damp_org;
    DPRINTF(stderr,
	    "\n\tStep accepted, theta (proportion complete) = %10.6e\n",
	    hunt_par);
    for (iHC=0;iHC<nHC;iHC++) {
      switch (hunt[iHC].Type) {
      case 1:		/* BC */
	  DPRINTF(stderr, "\tStep accepted, BCID=%3d DFID=%5d",
		  hunt[iHC].BCID, hunt[iHC].DFID);
	  break;
      case 2:		/* MT */
	  DPRINTF(stderr, "\tStep accepted, MTID=%3d MPID=%5d",
		  hunt[iHC].MTID, hunt[iHC].MPID);
	  break;
      case 3:		/* AC */
	  DPRINTF(stderr, "\tStep accepted, ACID=%3d DFID=%5d",
 		  hunt[iHC].BCID, hunt[iHC].DFID);
 	  break;
      }
      DPRINTF(stderr, " Parameter= % 10.6e\n", path1[iHC]);
    }

    /* 
     * check path step error, if too large do not enlarge path step 
     */

    for (iHC=0;iHC<nHC;iHC++) {

      if ((ni == 1) && (n != 0) && (!const_delta_s[iHC])) 
      {
	delta_s_new[iHC] = path_step_control(num_total_nodes, 
					     delta_s[iHC], delta_s_old[iHC], 
					     x, 
					     eps, 
					     &success_ds, 
					     cont->use_var_norm, inewton);
	if (delta_s_new[iHC] > hDelta_s_max[iHC]) {delta_s_new[iHC] = hDelta_s_max[iHC];}
      }
      else 
      {
	success_ds = 1;
	delta_s_new[iHC] = delta_s[iHC];
      }
    }
	  
    /* 
     * determine whether to print out the data or not 
     */

    i_print = 0;
    if (nt == step_print) {
      i_print = 1;
      step_print += cont->print_freq; }

    if (alqALC == -1) 
    { i_print = 1; }
	  
    if (i_print) {
      error = write_ascii_soln(x, resid_vector, numProcUnknowns,
 			       x_AC, nAC, path1[0], file);
      if (error) {
	DPRINTF(stderr, "%s:  error writing ASCII soln file\n", yo);
      }	  
      if ( Write_Intermediate_Solutions == 0 ) {
	write_solution(ExoFileOut, resid_vector, x, x_sens_p, 
		       x_old, xdot, xdot_old, tev, tev_post, NULL, 
		       rd, gindex, p_gsize, gvec, gvec_elem, &nprint,
 		       delta_s[0], theta, path1[0], NULL, exo, dpi);
	nprint++;
      }
    }
	  
    /*
     * backup old solutions
     * can use previous solutions for prediction one day
     */
	  
    dcopy1(numProcUnknowns,x_older,x_oldest);
    dcopy1(numProcUnknowns,x_old,x_older);
    dcopy1(numProcUnknowns,x,x_old);

    dcopy1(nHC,delta_s_older,delta_s_oldest);
    dcopy1(nHC,delta_s_old  ,delta_s_older );
    dcopy1(nHC,delta_s      ,delta_s_old   );
    dcopy1(nHC,delta_s_new  ,delta_s       );
/*
    delta_s_oldest = delta_s_older;
    delta_s_older = delta_s_old;
    delta_s_old = delta_s;
    delta_s = delta_s_new;
*/
    hunt_par_old=hunt_par;
    if ( nAC > 0) {
      dcopy1(nAC, x_AC, x_AC_old);
    }

    /*
     * INCREMENT/DECREMENT PARAMETER
     */


    for (iHC=0;iHC<nHC;iHC++) {

      path[iHC]  = path1[iHC];
	  
      switch (aldALC[iHC]) {
      case -1: 
	  path1[iHC] = path[iHC] - delta_s[iHC];
	  break;
      case +1: 
	  path1[iHC] = path[iHC] + delta_s[iHC];
	  break;
      }
	  
      /*
       * ADJUST NATURAL PARAMETER
       */
	
      update_parameterHC(iHC, path1[iHC], x, xdot, x_AC, delta_s[iHC], cx, exo, dpi); 
    }  /*  end of iHC loop */

    /*
     * GET FIRST ORDER PREDICTION
     */

	  if(hunt[0].EndParameterValue == hunt[0].BegParameterValue)
 		{	hunt_par = 1.0;	}
  		else
 		{
	  	hunt_par = (path1[0]-hunt[0].BegParameterValue)
	      		/(hunt[0].EndParameterValue - hunt[0].BegParameterValue)  ;
                hunt_par=fabs(hunt_par);
 		}
    dhunt_par = hunt_par-hunt_par_old;
    switch (Continuation) {
    case HUN_ZEROTH:
	break;
    case  HUN_FIRST:
	v1add(numProcUnknowns, &x[0], dhunt_par, &x_sens[0]);
        break; }

        if (!good_mesh) goto free_and_clear;

    /*
     * 
     * CHECK END CONTINUATION
     *  
     */

    if (alqALC == -1)
    { alqALC = 0; }
    else
    { alqALC = 1; }

    if (alqALC == 0) {
      DPRINTF(stderr,"\n\n\t I will continue no more!\n\t No more continuation for you!\n");
      goto free_and_clear;
    }
	
  } /* n */

      if(n == MaxPathSteps &&
	 aldALC[0] * (lambdaEnd[0] - path[0]) > 0)
	{
	  DPRINTF(stderr,"\n\tFailed to reach end of hunt in maximum number of successful steps (%d).\n\tSorry.\n",
		  MaxPathSteps);
 	  exit(0);
	}
#ifdef PARALLEL
      check_parallel_error("Hunting error");
#endif

  /*
   * DONE CONTINUATION
   */

 free_and_clear: 

  /*
   * Transform the node point coordinates according to the
   * displacements and write out all the results using the
   * displaced coordinates. Set the displacement field to
   * zero, too.
   */

  if (Anneal_Mesh) {
#ifdef DEBUG
    fprintf(stderr, "%s: anneal_mesh()...\n", yo);
#endif
    err = anneal_mesh(x, tev, tev_post, NULL, rd, path1[0], exo, dpi);
#ifdef DEBUG
    DPRINTF(stderr, "%s: anneal_mesh()-done\n", yo);
#endif
    EH(err, "anneal_mesh() bad return.");
  }

  /* 
   * Free a bunch of variables that aren't needed anymore 
   */
  safer_free((void **) &ROT_Types);
  safer_free((void **) &node_to_fill);

  safer_free( (void **) &resid_vector);
  safer_free( (void **) &resid_vector_sens);
  safer_free( (void **) &scale);
  safer_free( (void **) &x);

  if (nAC > 0) {
    safer_free( (void **) &x_AC);
    safer_free( (void **) &x_AC_old);
    safer_free( (void **) &x_AC_dot);
  }

  safer_free( (void **) &x_old); 
  safer_free( (void **) &x_older); 
  safer_free( (void **) &x_oldest); 
  safer_free( (void **) &xdot); 
  safer_free( (void **) &xdot_old); 
  safer_free( (void **) &x_update); 

  safer_free( (void **) &x_sens); 

  if((nn_post_data_sens+nn_post_fluxes_sens) > 0)
          Dmatrix_death(x_sens_p,num_pvector,numProcUnknowns);

  for(i = 0; i < MAX_NUMBER_MATLS; i++) {
    for(n = 0; n < MAX_MODES; n++) {
      safer_free((void **) &(ve_glob[i][n]->gn));
      safer_free((void **) &(ve_glob[i][n]));
    }
    safer_free((void **) &(vn_glob[i]));
  }

  sl_free(matrix_systems_mask, ams);

  for (i=0;i<NUM_ALSS;i++) {
    safer_free( (void**) &(ams[i]));
  }					

  safer_free( (void **) &gvec);

  safer_free( (void **) &lambda);
  safer_free( (void **) &lambdaEnd);
  safer_free( (void **) &path);
  safer_free( (void **) &path1);
  safer_free( (void **) &hDelta_s0);
  safer_free( (void **) &hDelta_s_min);
  safer_free( (void **) &hDelta_s_max);
  safer_free( (void **) &delta_s);
  safer_free( (void **) &delta_s_new);
  safer_free( (void **) &delta_s_old);
  safer_free( (void **) &delta_s_older);
  safer_free( (void **) &delta_s_oldest);

  Ivector_death(&aldALC[0], nHC);
  Ivector_death(&const_delta_s[0], nHC);

  i = 0;
  for ( eb_indx = 0; eb_indx < exo->num_elem_blocks; eb_indx++ ) {
    for ( ev_indx = 0; ev_indx < rd->nev; ev_indx++ ) {
      if ( exo->elem_var_tab[i++] == 1 ) {
        safer_free ((void **) &(gvec_elem [eb_indx][ev_indx]) );
      }
    }
    safer_free ((void **) &(gvec_elem [eb_indx]));
  }

  safer_free( (void **) &gvec_elem); 

  safer_free( (void **) &rd);
  safer_free( (void **) &Local_Offset);
  safer_free( (void **) &Dolphin);

  if( strlen( Soln_OutFile)  )
    {
       fclose(file);
    }

  return;

} /* END of routine hunt_problem  */
Example #15
0
/********************************************************************
real TRSM kernel
********************************************************************/
bool ialglib::_i_rmatrixlefttrsmf(int m,
     int n,
     const ap::real_2d_array& a,
     int i1,
     int j1,
     bool isupper,
     bool isunit,
     int optype,
     ap::real_2d_array& x,
     int i2,
     int j2)
{
    if( m>alglib_r_block || n>alglib_r_block )
        return false;
    
    //
    // local buffers
    //
    double *pdiag, *arow;
    int i;
    double __abuf[alglib_r_block*alglib_r_block+alglib_simd_alignment];
    double __xbuf[alglib_r_block*alglib_r_block+alglib_simd_alignment];
    double __tmpbuf[alglib_r_block+alglib_simd_alignment];
    double * const abuf   = (double * const) alglib_align(__abuf,  alglib_simd_alignment);
    double * const xbuf   = (double * const) alglib_align(__xbuf,  alglib_simd_alignment);
    double * const tmpbuf = (double * const) alglib_align(__tmpbuf,alglib_simd_alignment);

    //
    // Prepare
    // Transpose X (so we may use mv, which calculates A*x, but not x*A)
    //
    bool uppera;
    mcopyblock(m, m, &a(i1,j1), optype, a.getstride(), abuf);
    mcopyblock(m, n, &x(i2,j2), 1, x.getstride(), xbuf);
    if( isunit )
        for(i=0,pdiag=abuf; i<m; i++,pdiag+=alglib_r_block+1)
            *pdiag = 1.0;
    if( optype==0 )
        uppera = isupper;
    else
        uppera = !isupper;

    //
    // Solve A^-1*Y^T=X^T where A is upper or lower triangular
    //
    if( uppera )
    {
        for(i=m-1,pdiag=abuf+(m-1)*alglib_r_block+(m-1); i>=0; i--,pdiag-=alglib_r_block+1)
        {
            double beta = 1.0/(*pdiag);
            double alpha = -beta;
            vcopy(m-1-i, pdiag+1, 1, tmpbuf, 1);
            mv(n, m-1-i, xbuf+i+1, tmpbuf, xbuf+i, alglib_r_block, alpha, beta);
        }
        mcopyunblock(m, n, xbuf, 1, &x(i2,j2), x.getstride());
    }
    else
    {   for(i=0,pdiag=abuf,arow=abuf; i<m; i++,pdiag+=alglib_r_block+1,arow+=alglib_r_block)
        {
            double beta = 1.0/(*pdiag);
            double alpha = -beta;
            vcopy(i, arow, 1, tmpbuf, 1);
            mv(n, i, xbuf, tmpbuf, xbuf+i, alglib_r_block, alpha, beta);
        }
        mcopyunblock(m, n, xbuf, 1, &x(i2,j2), x.getstride());
    }
    return true;
}
bool rcBuildPolyMesh(rcContourSet& cset, int nvp, rcPolyMesh& mesh)
{
	rcTimeVal startTime = rcGetPerformanceTimer();

	vcopy(mesh.bmin, cset.bmin);
	vcopy(mesh.bmax, cset.bmax);
	mesh.cs = cset.cs;
	mesh.ch = cset.ch;
	
	int maxVertices = 0;
	int maxTris = 0;
	int maxVertsPerCont = 0;
	for (int i = 0; i < cset.nconts; ++i)
	{
		maxVertices += cset.conts[i].nverts;
		maxTris += cset.conts[i].nverts - 2;
		maxVertsPerCont = rcMax(maxVertsPerCont, cset.conts[i].nverts);
	}
	
	if (maxVertices >= 0xfffe)
	{
		if (rcGetLog())
			rcGetLog()->log(RC_LOG_ERROR, "rcBuildPolyMesh: Too many vertices %d.", maxVertices);
		return false;
	}
	
	unsigned char* vflags = 0;
	int* nextVert = 0;
	int* firstVert = 0;
	int* indices = 0;
	int* tris = 0;
	unsigned short* polys = 0;
	
	vflags = new unsigned char[maxVertices];
	if (!vflags)
	{
		if (rcGetLog())
			rcGetLog()->log(RC_LOG_ERROR, "rcBuildPolyMesh: Out of memory 'mesh.verts' (%d).", maxVertices);
		goto failure;
	}
	memset(vflags, 0, maxVertices);
	
	mesh.verts = new unsigned short[maxVertices*3];
	if (!mesh.verts)
	{
		if (rcGetLog())
			rcGetLog()->log(RC_LOG_ERROR, "rcBuildPolyMesh: Out of memory 'mesh.verts' (%d).", maxVertices);
		goto failure;
	}
	mesh.polys = new unsigned short[maxTris*nvp*2];
	if (!mesh.polys)
	{
		if (rcGetLog())
			rcGetLog()->log(RC_LOG_ERROR, "rcBuildPolyMesh: Out of memory 'mesh.polys' (%d).", maxTris*nvp*2);
		goto failure;
	}
	mesh.regs = new unsigned short[maxTris];
	if (!mesh.regs)
	{
		if (rcGetLog())
			rcGetLog()->log(RC_LOG_ERROR, "rcBuildPolyMesh: Out of memory 'mesh.regs' (%d).", maxTris);
		goto failure;
	}
	mesh.nverts = 0;
	mesh.npolys = 0;
	mesh.nvp = nvp;
	
	memset(mesh.verts, 0, sizeof(unsigned short)*maxVertices*3);
	memset(mesh.polys, 0xff, sizeof(unsigned short)*maxTris*nvp*2);
	memset(mesh.regs, 0, sizeof(unsigned short)*maxTris);
	
	nextVert = new int[maxVertices];
	if (!nextVert)
	{
		if (rcGetLog())
			rcGetLog()->log(RC_LOG_ERROR, "rcBuildPolyMesh: Out of memory 'nextVert' (%d).", maxVertices);
		goto failure;
	}
	memset(nextVert, 0, sizeof(int)*maxVertices);
	
	firstVert = new int[VERTEX_BUCKET_COUNT];
	if (!firstVert)
	{
		if (rcGetLog())
			rcGetLog()->log(RC_LOG_ERROR, "rcBuildPolyMesh: Out of memory 'firstVert' (%d).", VERTEX_BUCKET_COUNT);
		goto failure;
	}
	for (int i = 0; i < VERTEX_BUCKET_COUNT; ++i)
		firstVert[i] = -1;
	
	indices = new int[maxVertsPerCont];
	if (!indices)
	{
		if (rcGetLog())
			rcGetLog()->log(RC_LOG_ERROR, "rcBuildPolyMesh: Out of memory 'indices' (%d).", maxVertsPerCont);
		goto failure;
	}
	tris = new int[maxVertsPerCont*3];
	if (!tris)
	{
		if (rcGetLog())
			rcGetLog()->log(RC_LOG_ERROR, "rcBuildPolyMesh: Out of memory 'tris' (%d).", maxVertsPerCont*3);
		goto failure;
	}
	polys = new unsigned short[(maxVertsPerCont+1)*nvp];
	if (!polys)
	{
		if (rcGetLog())
			rcGetLog()->log(RC_LOG_ERROR, "rcBuildPolyMesh: Out of memory 'polys' (%d).", maxVertsPerCont*nvp);
		goto failure;
	}
	unsigned short* tmpPoly = &polys[maxVertsPerCont*nvp];

	for (int i = 0; i < cset.nconts; ++i)
	{
		rcContour& cont = cset.conts[i];
		
		// Skip empty contours.
		if (cont.nverts < 3)
			continue;
		
		// Triangulate contour
		for (int j = 0; j < cont.nverts; ++j)
			indices[j] = j;
			
		int ntris = triangulate(cont.nverts, cont.verts, &indices[0], &tris[0]);
		if (ntris <= 0)
		{
			// Bad triangulation, should not happen.
/*			for (int k = 0; k < cont.nverts; ++k)
			{
				const int* v = &cont.verts[k*4];
				printf("\t\t%d,%d,%d,%d,\n", v[0], v[1], v[2], v[3]);
				if (nBadPos < 100)
				{
					badPos[nBadPos*3+0] = v[0];
					badPos[nBadPos*3+1] = v[1];
					badPos[nBadPos*3+2] = v[2];
					nBadPos++;
				}
			}*/
			ntris = -ntris;
		}
		// Add and merge vertices.
		for (int j = 0; j < cont.nverts; ++j)
		{
			const int* v = &cont.verts[j*4];
			indices[j] = addVertex((unsigned short)v[0], (unsigned short)v[1], (unsigned short)v[2],
								   mesh.verts, firstVert, nextVert, mesh.nverts);
			if (v[3] & RC_BORDER_VERTEX)
			{
				// This vertex should be removed.
				vflags[indices[j]] = 1;
			}
		}
		
		// Build initial polygons.
		int npolys = 0;
		memset(polys, 0xff, maxVertsPerCont*nvp*sizeof(unsigned short));
		for (int j = 0; j < ntris; ++j)
		{
			int* t = &tris[j*3];
			if (t[0] != t[1] && t[0] != t[2] && t[1] != t[2])
			{
				polys[npolys*nvp+0] = (unsigned short)indices[t[0]];
				polys[npolys*nvp+1] = (unsigned short)indices[t[1]];
				polys[npolys*nvp+2] = (unsigned short)indices[t[2]];
				npolys++;
			}
		}
		if (!npolys)
			continue;
		
		// Merge polygons.
		if (nvp > 3)
		{
			while (true)
			{
				// Find best polygons to merge.
				int bestMergeVal = 0;
				int bestPa, bestPb, bestEa, bestEb;
				
				for (int j = 0; j < npolys-1; ++j)
				{
					unsigned short* pj = &polys[j*nvp];
					for (int k = j+1; k < npolys; ++k)
					{
						unsigned short* pk = &polys[k*nvp];
						int ea, eb;
						int v = getPolyMergeValue(pj, pk, mesh.verts, ea, eb, nvp);
						if (v > bestMergeVal)
						{
							bestMergeVal = v;
							bestPa = j;
							bestPb = k;
							bestEa = ea;
							bestEb = eb;
						}
					}
				}
				
				if (bestMergeVal > 0)
				{
					// Found best, merge.
					unsigned short* pa = &polys[bestPa*nvp];
					unsigned short* pb = &polys[bestPb*nvp];
					mergePolys(pa, pb, mesh.verts, bestEa, bestEb, tmpPoly, nvp);
					memcpy(pb, &polys[(npolys-1)*nvp], sizeof(unsigned short)*nvp);
					npolys--;
				}
				else
				{
					// Could not merge any polygons, stop.
					break;
				}
			}
		}
		
		
		// Store polygons.
		for (int j = 0; j < npolys; ++j)
		{
			unsigned short* p = &mesh.polys[mesh.npolys*nvp*2];
			unsigned short* q = &polys[j*nvp];
			for (int k = 0; k < nvp; ++k)
				p[k] = q[k];
			mesh.regs[mesh.npolys] = cont.reg;
			mesh.npolys++;
		}
	}
	
	
	// Remove edge vertices.
	for (int i = 0; i < mesh.nverts; ++i)
	{
		if (vflags[i])
		{
			if (!removeVertex(mesh, i, maxTris))
				goto failure;
			for (int j = i; j < mesh.nverts-1; ++j)
				vflags[j] = vflags[j+1];
			--i;
		}
	}

	delete [] vflags;
	delete [] firstVert;
	delete [] nextVert;
	delete [] indices;
	delete [] tris;
	
	// Calculate adjacency.
	if (!buildMeshAdjacency(mesh.polys, mesh.npolys, mesh.nverts, nvp))
	{
		if (rcGetLog())
			rcGetLog()->log(RC_LOG_ERROR, "rcBuildPolyMesh: Adjacency failed.");
		return false;
	}
	
	rcTimeVal endTime = rcGetPerformanceTimer();
	
//	if (rcGetLog())
//		rcGetLog()->log(RC_LOG_PROGRESS, "Build polymesh: %.3f ms", rcGetDeltaTimeUsec(startTime, endTime)/1000.0f);
	if (rcGetBuildTimes())
		rcGetBuildTimes()->buildPolymesh += rcGetDeltaTimeUsec(startTime, endTime);
	
	return true;

failure:
	delete [] vflags;
	delete [] tmpPoly;
	delete [] firstVert;
	delete [] nextVert;
	delete [] indices;
	delete [] tris;

	return false;
}
bool rcMergePolyMeshes(rcPolyMesh** meshes, const int nmeshes, rcPolyMesh& mesh)
{
	if (!nmeshes || !meshes)
		return true;

	rcTimeVal startTime = rcGetPerformanceTimer();

	int* nextVert = 0;
	int* firstVert = 0;
	unsigned short* vremap = 0;

	mesh.nvp = meshes[0]->nvp;
	mesh.cs = meshes[0]->cs;
	mesh.ch = meshes[0]->ch;
	vcopy(mesh.bmin, meshes[0]->bmin);
	vcopy(mesh.bmax, meshes[0]->bmax);

	int maxVerts = 0;
	int maxPolys = 0;
	int maxVertsPerMesh = 0;
	for (int i = 0; i < nmeshes; ++i)
	{
		vmin(mesh.bmin, meshes[i]->bmin);
		vmax(mesh.bmax, meshes[i]->bmax);
		maxVertsPerMesh = rcMax(maxVertsPerMesh, meshes[i]->nverts);
		maxVerts += meshes[i]->nverts;
		maxPolys += meshes[i]->npolys;
	}
	
	mesh.nverts = 0;
	mesh.verts = new unsigned short[maxVerts*3];
	if (!mesh.verts)
	{
		if (rcGetLog())
			rcGetLog()->log(RC_LOG_ERROR, "rcMergePolyMeshes: Out of memory 'mesh.verts' (%d).", maxVerts*3);
		return false;
	}

	mesh.npolys = 0;
	mesh.polys = new unsigned short[maxPolys*2*mesh.nvp];
	if (!mesh.polys)
	{
		if (rcGetLog())
			rcGetLog()->log(RC_LOG_ERROR, "rcMergePolyMeshes: Out of memory 'mesh.polys' (%d).", maxPolys*2*mesh.nvp);
		return false;
	}
	memset(mesh.polys, 0xff, sizeof(unsigned short)*maxPolys*2*mesh.nvp);

	mesh.regs = new unsigned short[maxPolys];
	if (!mesh.regs)
	{
		if (rcGetLog())
			rcGetLog()->log(RC_LOG_ERROR, "rcMergePolyMeshes: Out of memory 'mesh.regs' (%d).", maxPolys);
		return false;
	}
	memset(mesh.regs, 0, sizeof(unsigned short)*maxPolys);
	
	nextVert = new int[maxVerts];
	if (!nextVert)
	{
		if (rcGetLog())
			rcGetLog()->log(RC_LOG_ERROR, "rcMergePolyMeshes: Out of memory 'nextVert' (%d).", maxVerts);
		goto failure;
	}
	memset(nextVert, 0, sizeof(int)*maxVerts);
	
	firstVert = new int[VERTEX_BUCKET_COUNT];
	if (!firstVert)
	{
		if (rcGetLog())
			rcGetLog()->log(RC_LOG_ERROR, "rcMergePolyMeshes: Out of memory 'firstVert' (%d).", VERTEX_BUCKET_COUNT);
		goto failure;
	}
	for (int i = 0; i < VERTEX_BUCKET_COUNT; ++i)
		firstVert[i] = -1;

	vremap = new unsigned short[maxVertsPerMesh];
	if (!vremap)
	{
		if (rcGetLog())
			rcGetLog()->log(RC_LOG_ERROR, "rcMergePolyMeshes: Out of memory 'vremap' (%d).", maxVertsPerMesh);
		goto failure;
	}
	memset(nextVert, 0, sizeof(int)*maxVerts);
	
	for (int i = 0; i < nmeshes; ++i)
	{
		const rcPolyMesh* pmesh = meshes[i];
		
		const unsigned short ox = (unsigned short)floorf((pmesh->bmin[0]-mesh.bmin[0])/mesh.cs+0.5f);
		const unsigned short oz = (unsigned short)floorf((pmesh->bmin[2]-mesh.bmin[2])/mesh.cs+0.5f);
		
		for (int j = 0; j < pmesh->nverts; ++j)
		{
			unsigned short* v = &pmesh->verts[j*3];
			vremap[j] = addVertex(v[0]+ox, v[1], v[2]+oz,
						   mesh.verts, firstVert, nextVert, mesh.nverts);
		}
		
		for (int j = 0; j < pmesh->npolys; ++j)
		{
			unsigned short* tgt = &mesh.polys[mesh.npolys*2*mesh.nvp];
			unsigned short* src = &pmesh->polys[j*2*mesh.nvp];
			mesh.regs[mesh.npolys] = pmesh->regs[j];
			mesh.npolys++;
			for (int k = 0; k < mesh.nvp; ++k)
			{
				if (src[k] == 0xffff) break;
				tgt[k] = vremap[src[k]];
			}
		}
	}

	// Calculate adjacency.
	if (!buildMeshAdjacency(mesh.polys, mesh.npolys, mesh.nverts, mesh.nvp))
	{
		if (rcGetLog())
			rcGetLog()->log(RC_LOG_ERROR, "rcMergePolyMeshes: Adjacency failed.");
		return false;
	}
		

	delete [] firstVert;
	delete [] nextVert;
	delete [] vremap;
	
	rcTimeVal endTime = rcGetPerformanceTimer();
	
	if (rcGetBuildTimes())
		rcGetBuildTimes()->mergePolyMesh += rcGetDeltaTimeUsec(startTime, endTime);
	
	return true;
	
failure:
	delete [] firstVert;
	delete [] nextVert;
	delete [] vremap;
	
	return false;
}
bool rcMergePolyMeshDetails(rcPolyMeshDetail** meshes, const int nmeshes, rcPolyMeshDetail& mesh)
{
	rcTimeVal startTime = rcGetPerformanceTimer();
	
	int maxVerts = 0;
	int maxTris = 0;
	int maxMeshes = 0;

	for (int i = 0; i < nmeshes; ++i)
	{
		if (!meshes[i]) continue;
		maxVerts += meshes[i]->nverts;
		maxTris += meshes[i]->ntris;
		maxMeshes += meshes[i]->nmeshes;
	}

	mesh.nmeshes = 0;
	mesh.meshes = new unsigned short[maxMeshes*4];
	if (!mesh.meshes)
	{
		if (rcGetLog())
			rcGetLog()->log(RC_LOG_ERROR, "rcBuildPolyMeshDetail: Out of memory 'pmdtl.meshes' (%d).", maxMeshes*4);
		return false;
	}

	mesh.ntris = 0;
	mesh.tris = new unsigned char[maxTris*4];
	if (!mesh.tris)
	{
		if (rcGetLog())
			rcGetLog()->log(RC_LOG_ERROR, "rcBuildPolyMeshDetail: Out of memory 'dmesh.tris' (%d).", maxTris*4);
		return false;
	}

	mesh.nverts = 0;
	mesh.verts = new float[maxVerts*3];
	if (!mesh.verts)
	{
		if (rcGetLog())
			rcGetLog()->log(RC_LOG_ERROR, "rcBuildPolyMeshDetail: Out of memory 'dmesh.verts' (%d).", maxVerts*3);
		return false;
	}
	
	// Merge datas.
	for (int i = 0; i < nmeshes; ++i)
	{
		rcPolyMeshDetail* dm = meshes[i];
		if (!dm) continue;
		for (int j = 0; j < dm->nmeshes; ++j)
		{
			unsigned short* dst = &mesh.meshes[mesh.nmeshes*4];
			unsigned short* src = &dm->meshes[j*4];
			dst[0] = mesh.nverts+src[0];
			dst[1] = src[1];
			dst[2] = mesh.ntris+src[2];
			dst[3] = src[3];
			mesh.nmeshes++;
		}
			
		for (int k = 0; k < dm->nverts; ++k)
		{
			vcopy(&mesh.verts[mesh.nverts*3], &dm->verts[k*3]);
			mesh.nverts++;
		}
		for (int k = 0; k < dm->ntris; ++k)
		{
			mesh.tris[mesh.ntris*4+0] = dm->tris[k*4+0];
			mesh.tris[mesh.ntris*4+1] = dm->tris[k*4+1];
			mesh.tris[mesh.ntris*4+2] = dm->tris[k*4+2];
			mesh.tris[mesh.ntris*4+3] = dm->tris[k*4+3];
			mesh.ntris++;
		}
	}

	rcTimeVal endTime = rcGetPerformanceTimer();
	
	if (rcGetBuildTimes())
		rcGetBuildTimes()->mergePolyMeshDetail += rcGetDeltaTimeUsec(startTime, endTime);
	
	return true;
}
Example #19
0
/* Given an axis and angle, compute quaternion */
void axis_to_quat(double vec[3], double phi, double quat[4]){
   vnormal(vec);
   vcopy(quat, vec);
   vscale(quat, sin(phi/2.0));
   quat[3] = cos(phi/2.0);
   }
// advection
void Fluid2::fluidAdvection( const float dt )
{
    // ink
    {
	    Array2<float> inkcopy( ink );
	    CellSampler inksampler( grid, inkcopy );

        const Index2& size = ink.getSize();
	    for( unsigned int i = 0; i < size.x; ++i )
	    for( unsigned int j = 0; j < size.y; ++j )
	    {
		    const Index2 id( i, j );

		    const Vec2 pos( grid.getCellPos( id ) );
		    const Vec2 vel( ( velocityX[ id ] + velocityX[ Index2( i+1, j ) ] ) * 0.5f,
                            ( velocityY[ id ] + velocityY[ Index2( i, j+1 ) ] ) * 0.5f );
		    const Vec2 endpos( pos - dt * vel );

		    ink[ id ] = inksampler.getValue( endpos );;
	    }
    }

    // velocity
    {
	    Array2< float > ucopy( velocityX );
        Array2< float > vcopy( velocityY );
	    FaceSampler usampler( grid, ucopy, 0 );
	    FaceSampler vsampler( grid, vcopy, 1 );
        const Index2& sizeu = velocityX.getSize();
        const Index2& sizev = velocityY.getSize();

	    for( unsigned int i = 0; i < sizeu.x; ++i )
	    for( unsigned int j = 0; j < sizeu.y; ++j )
	    {
		    const Index2 id( i, j );
            const Index2 idv1( clamp( i-1, 0, sizev.x-1 ), clamp( j  , 0, sizev.y-1 ) );
            const Index2 idv2( clamp( i  , 0, sizev.x-1 ), clamp( j  , 0, sizev.y-1 ) );
            const Index2 idv3( clamp( i-1, 0, sizev.x-1 ), clamp( j+1, 0, sizev.y-1 ) );
            const Index2 idv4( clamp( i  , 0, sizev.x-1 ), clamp( j+1, 0, sizev.y-1 ) );

		    const Vec2 pos( grid.getFaceXPos( id ) );
		    const Vec2 vel( ucopy[ id ], ( vcopy[ idv1 ] + vcopy[ idv2 ] + vcopy[ idv3 ] + vcopy[ idv4 ] ) * 0.25f );
		    const Vec2 endpos( pos - dt * vel );

		    velocityX[ id ] = usampler.getValue( endpos );
	    }

	    for( unsigned int i = 0; i < sizev.x; ++i )
	    for( unsigned int j = 0; j < sizev.y; ++j )
	    {
		    const Index2 id( i, j );
            const Index2 idu1( clamp( i  , 0, sizeu.x-1 ), clamp( j-1, 0, sizeu.y-1 ) );
            const Index2 idu2( clamp( i  , 0, sizeu.x-1 ), clamp( j  , 0, sizeu.y-1 ) );
            const Index2 idu3( clamp( i+1, 0, sizeu.x-1 ), clamp( j-1, 0, sizeu.y-1 ) );
            const Index2 idu4( clamp( i+1, 0, sizeu.x-1 ), clamp( j  , 0, sizeu.y-1 ) );

		    const Vec2 pos( grid.getFaceYPos( id ) );
		    const Vec2 vel( ( ucopy[ idu1 ] + ucopy[ idu2 ] + ucopy[ idu3 ] + ucopy[ idu4 ] ) * 0.25f, vcopy[ id ] );
		    const Vec2 endpos( pos - dt * vel );

		    velocityY[ id ] = vsampler.getValue( endpos );
	    }
    }
}
bool rcBuildCompactHeightfield(const int walkableHeight, const int walkableClimb,
							   unsigned char flags, rcHeightfield& hf,
							   rcCompactHeightfield& chf)
{
	rcTimeVal startTime = rcGetPerformanceTimer();
	
	const int w = hf.width;
	const int h = hf.height;
	const int spanCount = getSpanCount(flags, hf);

	// Fill in header.
	chf.width = w;
	chf.height = h;
	chf.spanCount = spanCount;
	chf.walkableHeight = walkableHeight;
	chf.walkableClimb = walkableClimb;
	chf.maxRegions = 0;
	vcopy(chf.bmin, hf.bmin);
	vcopy(chf.bmax, hf.bmax);
	chf.bmax[1] += walkableHeight*hf.ch;
	chf.cs = hf.cs;
	chf.ch = hf.ch;
	chf.cells = new rcCompactCell[w*h];
	if (!chf.cells)
	{
		if (rcGetLog())
			rcGetLog()->log(RC_LOG_ERROR, "rcBuildCompactHeightfield: Out of memory 'chf.cells' (%d)", w*h);
		return false;
	}
	memset(chf.cells, 0, sizeof(rcCompactCell)*w*h);
	chf.spans = new rcCompactSpan[spanCount];
	if (!chf.spans)
	{
		if (rcGetLog())
			rcGetLog()->log(RC_LOG_ERROR, "rcBuildCompactHeightfield: Out of memory 'chf.spans' (%d)", spanCount);
		return false;
	}
	memset(chf.spans, 0, sizeof(rcCompactSpan)*spanCount);
	
	const int MAX_HEIGHT = 0xffff;
	
	// Fill in cells and spans.
	int idx = 0;
	for (int y = 0; y < h; ++y)
	{
		for (int x = 0; x < w; ++x)
		{
			const rcSpan* s = hf.spans[x + y*w];
			// If there are no spans at this cell, just leave the data to index=0, count=0.
			if (!s) continue;
			rcCompactCell& c = chf.cells[x+y*w];
			c.index = idx;
			c.count = 0;
			while (s)
			{
				if (s->flags == flags)
				{
					const int bot = (int)s->smax;
					const int top = s->next ? (int)s->next->smin : MAX_HEIGHT;
					chf.spans[idx].y = (unsigned short)rcClamp(bot, 0, 0xffff);
					chf.spans[idx].h = (unsigned char)rcClamp(top - bot, 0, 0xff);
					idx++;
					c.count++;
				}
				s = s->next;
			}
		}
	}

	// Find neighbour connections.
	for (int y = 0; y < h; ++y)
	{
		for (int x = 0; x < w; ++x)
		{
			const rcCompactCell& c = chf.cells[x+y*w];
			for (int i = (int)c.index, ni = (int)(c.index+c.count); i < ni; ++i)
			{
				rcCompactSpan& s = chf.spans[i];
				for (int dir = 0; dir < 4; ++dir)
				{
					setCon(s, dir, 0xf);
					const int nx = x + rcGetDirOffsetX(dir);
					const int ny = y + rcGetDirOffsetY(dir);
					// First check that the neighbour cell is in bounds.
					if (nx < 0 || ny < 0 || nx >= w || ny >= h)
						continue;
					// Iterate over all neighbour spans and check if any of the is
					// accessible from current cell.
					const rcCompactCell& nc = chf.cells[nx+ny*w];
					for (int k = (int)nc.index, nk = (int)(nc.index+nc.count); k < nk; ++k)
					{
						const rcCompactSpan& ns = chf.spans[k];
						const int bot = rcMax(s.y, ns.y);
						const int top = rcMin(s.y+s.h, ns.y+ns.h);

						// Check that the gap between the spans is walkable,
						// and that the climb height between the gaps is not too high.
						if ((top - bot) >= walkableHeight && rcAbs((int)ns.y - (int)s.y) <= walkableClimb)
						{
							// Mark direction as walkable.
							setCon(s, dir, k - (int)nc.index);
							break;
						}
					}
				}
			}
		}
	}
	
	rcTimeVal endTime = rcGetPerformanceTimer();
	
	if (rcGetBuildTimes())
		rcGetBuildTimes()->buildCompact += rcGetDeltaTimeUsec(startTime, endTime);
	
	return true;
}
Example #22
0
/*
*  Given an axis and angle, compute quaternion.
*/
void axis_to_quat(float a[3], float phi, float q[4]) {
    vnormal(a);
    vcopy(a,q);
    vscale(q,sin(phi/2.0f));
    q[3] = cos(phi/2.0f);
}
Example #23
0
//------------------------------------------------------------------------------
void calc_color(GzRender *render, GzCoord N_orig, GzColor col, bool mulByK)
{
    
    GzCoord N = ZEROS;
    normalizeVector(N_orig, N);
    
    
    Matrix *Ncm = render->nStack.leftMulMatricesOnStack();
    if (Ncm == NULL)
    {
        fprintf(stderr, "Got NULL from normal stack in %s.\n",__FUNCTION__);
    }

    
    float array[4] = {N[X], N[Y], N[Z], 1};
    float Ntransformed[4] = {0, 0, 0, 0};
    Ncm->rightMultiply(array, 4, Ntransformed);
    
    GzColor ambient = ZEROS;
    GzColor diffuse = ZEROS;
    GzColor specular = ZEROS;
    
    calc_ambient(render, ambient, mulByK);
    calc_diffuse(render, Ntransformed, diffuse, mulByK);
    calc_specular(render, Ntransformed, specular, mulByK);

    addThreeColors(ambient, diffuse, specular, col);

    render->km_ka = KM_KA;
    render->km_kd = KM_KD;
    render->km_ks = KM_KS;

    //////////////////////////////////////////////////////////////////////////////////////////////////////
    // THIS PART IS NEW FOR THE PROJECT
    // (the normal homework code is commented out above, all functions it calls are unmodified)

#if 1
    // For each light, determine what layers of paint to add to the model
    scalarMultiply(col, 0.0f, col);
    GzCoord ones = {1.0f, 1.0f, 1.0f};
    vcopy(col, ones);
    GzCoord background_reflectance = BACKGROUND_REFLECTANCE;
    GzCoord background_transmittance = BACKGROUND_TRANSMITTANCE;
    for (int i = 0; i < render->numlights; i++)
    {
        // This light"s direction
        float (*ls)[3] = static_cast<float (*)[3]>(render->lights[i]);
        GzCoord ls_L_orig = {ls[0][0], ls[0][1], ls[0][2]};

        // Ambient light color
        float (*lamb)[3] = static_cast<float (*)[3]>(render->ambientlight);
        GzColor lambcolor = {lamb[1][0], lamb[1][1], lamb[1][2]};

        // This light"s color
        GzColor ls_intensity = {ls[1][0], ls[1][1], ls[1][2]};
        // printVector(ls_intensity, "ls_intensity");
        // printVector(render->Ka, "Ka");
        // printVector(render->Kd, "Kd");
        // printVector(render->Ks, "Ks");

        GzCoord diffuse_thicknesses = {0.0f,0.0f,0.0f};
        calc_diffuse_thickness(render, Ntransformed, ls_L_orig, diffuse_thicknesses);
        // printVector(diffuse_thicknesses, "diffuse_thicknesses");
        GzCoord unlit_thicknesses = {0.0f,0.0f,0.0f};
        calc_unlit_thickness(render, Ntransformed, ls_L_orig, unlit_thicknesses);
        // printVector(unlit_thicknesses, "unlit_thicknesses");

        // printf("\n\n");
        // Use KM Model to add each of these layers of paint to the color of this vertex
        GzCoord diffuse_reflectance = ZEROS;
        GzCoord diffuse_transmittance = ZEROS;
        GzCoord unlit_reflectance = ZEROS;
        GzCoord unlit_transmittance = ZEROS;
        
        // printf("diffuse layer\n");
        GzCoord diffuse_color = DIFFUSE_COLOR;
        kubelka_munk(diffuse_color, diffuse_thicknesses, diffuse_reflectance, diffuse_transmittance);
        // printf("unlit layer\n");
        GzCoord unlit_color = AMBIENT_COLOR;
        kubelka_munk(unlit_color, unlit_thicknesses, unlit_reflectance, unlit_transmittance);
        // printf("\n\n");

        // vmult(diffuse_reflectance, ls_intensity, diffuse_thicknesses);
        // vmult(unlit_reflectance, ls_intensity, unlit_thicknesses);

        GzCoord composite_reflectance = ZEROS;
        GzCoord composite_transmittance = ZEROS;
        km_composite_layers(diffuse_reflectance, diffuse_transmittance, diffuse_reflectance, 
                                diffuse_transmittance, background_reflectance, background_transmittance);
        km_composite_layers(background_reflectance, background_transmittance, unlit_reflectance, 
                                unlit_transmittance, diffuse_reflectance, diffuse_transmittance);
        // printVector(diffuse_reflectance, "diffuse_reflectance");
        // vmult(col, col, lambcolor);
        
        // break;  // ASSUME ONLY ONE COLOR!!!
    }

    vcopy(col, background_reflectance);
#endif

}
Example #24
0
/*******************************************************************
 Subroutine to do the Sub-Level PCA-PPM
   matrix *pcadata_re: the pointer to the new matrix containing the real part of data
                       projected onto the space defined by the PCA
   matrix *pcadata_re: the pointer to the new matrix containing the imaginary part of data
                       projected onto the space defined by the PCA
   matrix *pcavec_re:  the pointer to a matrix containing the real part
                       of eigenvector
   matrix *pcavec_im:  the pointer to a matrix containing the imaginary part
                       of eigenvector
   vector *pcaval_re:  the pointer to a vector containing the real part
                       of eigenvalues
   vector *pcaval_im:  the pointer to a vector containing the imaginary part
                       of eigenvalues
   vector *Zjk:  the pointer to a vector containing the Zjk values
   matrix *subpcappmvec_re:  the pointer to a matrix containing the real part of
                             sorted eigenvectors by sub kurtosis rank
   matrix *subpcappmvec_re:  the pointer to a matrix containing the imaginary part of
                             sorted eigenvectors by sub kurtosis rank

 return value: '1' - successfully exit
               '0' - exit with waring/error
*******************************************************************/
int veSubPCAPPM(matrix *pcadata_re, matrix *pcadata_im,
                matrix *pcavec_re, matrix *pcavec_im,
                vector *pcaval_re, vector *pcaval_im,
                vector *Zjk,
                matrix *subpcappmvec_re, matrix *subpcappmvec_im)
{
    int m, n;
    int i, j, u=0, v=0;
    vector X1n, Xm1;
    matrix mZjk;
    matrix M1;
    matrix data_pow2;
    matrix data_pow4;
    vector V1;
    vector V2;
    vector V4;
    vector kurt;
    int* kurt_id;
    double sumZjk;
    double cen_data;
    bool allreal = true;

    m=pcadata_re->m;
    n=pcadata_im->n;

    vnew(&X1n, n);
    vnew(&Xm1, m);
    mnew(&mZjk, m, n);
    mnew(&M1, m, n);
    mnew(&data_pow2, m, n);
    mnew(&data_pow4, m, n);
    vnew(&V1, n);
    vnew(&V2, n);
    vnew(&V4, n);
    vnew(&kurt, n);
    kurt_id = new int[n];


    vector V1_im;
    vector Xm1_im;
    matrix M1_im;
    double cen_data_im;
    matrix data_pow2_im;
    matrix data_pow4_im;
    vector V2_im;
    vector V4_im;
    vector kurt_im;

    vnew(&Xm1_im, m);
    mnew(&M1_im, m, n);
    mnew(&data_pow2_im, m, n);
    mnew(&data_pow4_im, m, n);
    vnew(&V1_im, n);
    vnew(&V2_im, n);
    vnew(&V4_im, n);
    vnew(&kurt_im, n);

    // whether complex eigenvalue exists
    for (i=0; i<n; i++) {
        if (*(pcaval_im->pr+i) != 0) {
            allreal = false;
            break;
        }
    }

    // center the data set its means
    // data_proj = data_proj - ones(n,1)*(sum(Zjk*ones(1,p).*(data_proj))./sum(Zjk));
    vones(&X1n);
    vones(&Xm1);

    vvMul(Zjk, &X1n, &mZjk);
    sumZjk = vsum(Zjk);

    if (allreal==true) {
        kurtmodel(&mZjk, sumZjk, pcadata_re, &V1);
        vvMul(&Xm1, &V1, &M1);

        for (i=0; i<m*n; i++) {
            cen_data = *(pcadata_re->pr + i) - *(M1.pr + i);
            //*(data->pr + i) = cen_data;
            *(data_pow2.pr+i) = pow(cen_data, 2);
            *(data_pow4.pr+i) = pow(cen_data, 4);
        }

        // calculate kurtosis : kurt(y) = E{y^4}-3(E{y^2})^2
        //kurt = sum(Zjk*ones(1,p).*(data_proj.^4))./sum(Zjk)...
        //- 3*(sum(Zjk*ones(1,p).*(data_proj.^2))./sum(Zjk)).^2; %Not normalized Kurtosis
        kurtmodel(&mZjk, sumZjk, &data_pow2, &V2);
        kurtmodel(&mZjk, sumZjk, &data_pow4, &V4);

        for (j=0; j<n; j++) {
            *(kurt.pr+j) = *(V4.pr+j) - 3*(pow(*(V2.pr+j), 2));
        }

    } else {

        ckurtmodel(&mZjk, sumZjk, pcadata_re, pcadata_im, &V1, &V1_im);
        cvvMul(&Xm1, &Xm1_im, &V1, &V1_im, &M1, &M1_im);

        for (i=0; i<m*n; i++) {
            cen_data = *(pcadata_re->pr + i) - *(M1.pr + i);
            cen_data_im = *(pcadata_im->pr + i) - *(M1_im.pr + i);
            //*(data->pr + i) = cen_data;
            *(data_pow2.pr+i) = pow(cen_data, 2) - pow(cen_data_im, 2);
            *(data_pow2_im.pr+i) = 2 * cen_data * cen_data_im;
            *(data_pow4.pr+i) = pow(*(data_pow2.pr+i), 2) - pow(*(data_pow2_im.pr+i), 2);
            *(data_pow4_im.pr+i) = 2 * (*(data_pow2.pr+i)) * (*(data_pow2_im.pr+i));
        }

        // calculate kurtosis : kurt(y) = E{y^4}-3(E{y^2})^2
        //kurt = sum(Zjk*ones(1,p).*(data_proj.^4))./sum(Zjk)...
        //- 3*(sum(Zjk*ones(1,p).*(data_proj.^2))./sum(Zjk)).^2; %Not normalized Kurtosis
        ckurtmodel(&mZjk, sumZjk, &data_pow2, &data_pow2_im, &V2, &V2_im);
        ckurtmodel(&mZjk, sumZjk, &data_pow4, &data_pow4_im, &V4, &V4_im);

        for (j=0; j<n; j++) {
            *(kurt.pr+j) = *(V4.pr+j) - 3*(pow(*(V2.pr+j), 2) - pow(*(V2_im.pr+j), 2));
            *(kurt_im.pr+j) = *(V4_im.pr+j) - 3 * 2 * (*(V2.pr+j)) * (*(V2_im.pr+j));
        }

    }




    // sort kurt value in ascending order and reorder the pca_vec
    int realeig_num;
    int *realeig_id;
    int *compeig_id;
    vector realkurt;
    int *real_order;

    realeig_num = n;
    for (i=0; i<n; i++) {
        if (*(pcaval_im->pr+i) != 0) {
            realeig_num--;
        }
    }
    vnew(&realkurt, realeig_num);

    realeig_id = new int[realeig_num];
    compeig_id = new int[n-realeig_num];
    real_order = new int[realeig_num];

    for (i=0; i<n; i++) {
        if (*(pcaval_im->pr+i) == 0) {
            realeig_id[u] = i;
            *(realkurt.pr+u) = *(kurt.pr+i);
            u++;
        } else {
            compeig_id[v] = i;
            v++;
        }
    }

    sort(&realkurt, real_order, 'a');
    int *tmp;

    tmp = new int[realeig_num];
    for (i=0; i<realeig_num; i++) {
        tmp[i] = realeig_id[i];
    }
    for (i=0; i<realeig_num; i++) {
        realeig_id[i] = tmp[real_order[i]];
    }
    delete []tmp;

    vector kurt0;
    vector kurt0_im;
    vnew(&kurt0, kurt.l);
    vcopy(&kurt, &kurt0);
    vnew(&kurt0_im, kurt.l);
    vcopy(&kurt_im, &kurt0_im);
    for (i=0; i<realeig_num; i++) {
        kurt_id[i] = realeig_id[i];
        *(kurt.pr+i) = *(realkurt.pr+i);
        *(kurt_im.pr+i) = 0;
    }
    for (i=0; i<n-realeig_num; i++) {
        kurt_id[i+realeig_num] = compeig_id[i];
        *(kurt.pr+i+realeig_num) = *(kurt0.pr + compeig_id[i]);
        *(kurt_im.pr+i+realeig_num) = *(kurt0_im.pr + compeig_id[i]);
    }

    //printf(" the real part of kurt value is : \n");
    //vprint(&kurt);
    //printf(" the imaginary part of kurt value is : \n");
    //vprint(&kurt_im);
    //printf(" the kurt id is : \n");
    //for (i=0; i<n; i++) {
    //   printf("%d\t", kurt_id[i]);
    //}
    sortcols(kurt_id, pcavec_re, subpcappmvec_re);
    sortcols(kurt_id, pcavec_im, subpcappmvec_im);


    vdelete(&X1n);
    vdelete(&Xm1);
    mdelete(&mZjk);
    mdelete(&M1);
    mdelete(&data_pow2);
    mdelete(&data_pow4);
    vdelete(&V1);
    vdelete(&V2);
    vdelete(&V4);
    vdelete(&kurt);
    vdelete(&kurt0);
    delete []kurt_id;
    vdelete(&realkurt);
    delete []realeig_id;
    delete []compeig_id;
    delete []real_order;
    vdelete(&Xm1_im);
    mdelete(&M1_im);
    mdelete(&data_pow2_im);
    mdelete(&data_pow4_im);
    vdelete(&V1_im);
    vdelete(&V2_im);
    vdelete(&V4_im);
    vdelete(&kurt_im);
    vdelete(&kurt0_im);

    return 1;
}
Example #25
0
/********************************************************************
real TRSM kernel
********************************************************************/
bool ialglib::_i_rmatrixrighttrsmf(int m,
     int n,
     const ap::real_2d_array& a,
     int i1,
     int j1,
     bool isupper,
     bool isunit,
     int optype,
     ap::real_2d_array& x,
     int i2,
     int j2)
{
    if( m>alglib_r_block || n>alglib_r_block )
        return false;

    //
    // local buffers
    //
    double *pdiag;
    int i;
    double __abuf[alglib_r_block*alglib_r_block+alglib_simd_alignment];
    double __xbuf[alglib_r_block*alglib_r_block+alglib_simd_alignment];
    double __tmpbuf[alglib_r_block+alglib_simd_alignment];
    double * const abuf   = (double * const) alglib_align(__abuf,  alglib_simd_alignment);
    double * const xbuf   = (double * const) alglib_align(__xbuf,  alglib_simd_alignment);
    double * const tmpbuf = (double * const) alglib_align(__tmpbuf,alglib_simd_alignment);

    //
    // Prepare
    //
    bool uppera;
    mcopyblock(n, n, &a(i1,j1), optype, a.getstride(), abuf);
    mcopyblock(m, n, &x(i2,j2), 0, x.getstride(), xbuf);
    if( isunit )
        for(i=0,pdiag=abuf; i<n; i++,pdiag+=alglib_r_block+1)
            *pdiag = 1.0;
    if( optype==0 )
        uppera = isupper;
    else
        uppera = !isupper;

    //
    // Solve Y*A^-1=X where A is upper or lower triangular
    //
    if( uppera )
    {
        for(i=0,pdiag=abuf; i<n; i++,pdiag+=alglib_r_block+1)
        {
            double beta  = 1.0/(*pdiag);
            double alpha = -beta;
            vcopy(i, abuf+i, alglib_r_block, tmpbuf, 1);
            mv(m, i, xbuf, tmpbuf, xbuf+i, alglib_r_block, alpha, beta);
        }
        mcopyunblock(m, n, xbuf, 0, &x(i2,j2), x.getstride());
    }
    else
    {
        for(i=n-1,pdiag=abuf+(n-1)*alglib_r_block+(n-1); i>=0; i--,pdiag-=alglib_r_block+1)
        {
            double beta = 1.0/(*pdiag);
            double alpha = -beta;
            vcopy(n-1-i, pdiag+alglib_r_block, alglib_r_block, tmpbuf, 1);
            mv(m, n-1-i, xbuf+i+1, tmpbuf, xbuf+i, alglib_r_block, alpha, beta);
        }
        mcopyunblock(m, n, xbuf, 0, &x(i2,j2), x.getstride());
    }
    return true;
}
Example #26
0
void
__glcore_transform_vertices (GLcontext *g)
{
    GLrenderstate *r = g->renderstate;
    GL_vertex *verts = r->verts;
    GL_procvert *procverts = r->procverts;
    int i;

    GL_float modelview[4][4];
    GL_float projection[4][4];
    GL_float texture[4][4];
    GL_float composite[4][4];
    GL_float invmodelview[4][4];

    minit(modelview, g->trans.modelview[g->trans.modelviewdepth]);
    minit(projection, g->trans.projection[g->trans.projectiondepth]);
    minit(texture, g->trans.texture[g->trans.texturedepth]);    
    mmult(composite, projection, modelview);
    minvtrans(invmodelview, modelview);

    for (i = 0; i < r->nverts; i++) {
	/* position */
	mmultv(procverts[i].position, composite, verts[i].position);

	/* eye position */
	mmultv(procverts[i].eyeposition, modelview, verts[i].position);

	/* color */
	if (g->lighting.lighting) {
	    GL_float objnormal[4];
	    GL_float normal[4];

	    /* object space normal */
	    vcopy(objnormal, verts[i].normal);
	    objnormal[3] = 0.0f;
	    if (verts[i].position[3] != 0.0f) {
		objnormal[3] = -vdot3(objnormal, verts[i].position);
		objnormal[3] /= verts[i].position[3];
	    }

	    /* eye space normal */
	    mmultv(normal, invmodelview, objnormal);
	    if (g->current.normalize)
		vnorm3(normal, normal);

	    /* front color */
	    compute_lighting(g, procverts[i].frontcolor,
			     procverts[i].eyeposition, normal,
			     &verts[i].frontmaterial);

	    /* back color */
	    if (g->lighting.lightmodeltwoside) {
		vscale(normal, normal, -1.0f);
		compute_lighting(g, procverts[i].backcolor,
				 procverts[i].eyeposition, normal,
				 &verts[i].backmaterial);
	    }
	}
	else {
	    vcopy(procverts[i].frontcolor, verts[i].color);
	    vcopy(procverts[i].backcolor, verts[i].color);
	}
	vclamp(procverts[i].frontcolor, procverts[i].frontcolor, 0.0f, 1.0f);
	vclamp(procverts[i].backcolor, procverts[i].backcolor, 0.0f, 1.0f);

	/* no texture coordinate generation */

	/* texture coords */
	mmultv(procverts[i].texcoord, texture, verts[i].texcoord);
    }
}
Example #27
0
// install installs the library, package, or binary associated with dir,
// which is relative to $GOROOT/src.
static void
install(char *dir)
{
	char *name, *p, *elem, *prefix, *exe;
	bool islib, ispkg, isgo, stale, ispackcmd;
	Buf b, b1, path;
	Vec compile, files, link, go, missing, clean, lib, extra;
	Time ttarg, t;
	int i, j, k, n, doclean, targ;

	if(vflag) {
		if(!streq(goos, gohostos) || !streq(goarch, gohostarch))
			errprintf("%s (%s/%s)\n", dir, goos, goarch);
		else
			errprintf("%s\n", dir);
	}

	binit(&b);
	binit(&b1);
	binit(&path);
	vinit(&compile);
	vinit(&files);
	vinit(&link);
	vinit(&go);
	vinit(&missing);
	vinit(&clean);
	vinit(&lib);
	vinit(&extra);


	// path = full path to dir.
	bpathf(&path, "%s/src/%s", goroot, dir);
	name = lastelem(dir);

	// For misc/prof, copy into the tool directory and we're done.
	if(hasprefix(dir, "misc/")) {
		copy(bpathf(&b, "%s/%s", tooldir, name),
			bpathf(&b1, "%s/misc/%s", goroot, name), 1);
		goto out;
	}

	// For release, cmd/prof is not included.
	if((streq(dir, "cmd/prof")) && !isdir(bstr(&path))) {
		if(vflag > 1)
			errprintf("skipping %s - does not exist\n", dir);
		goto out;
	}

	// set up gcc command line on first run.
	if(gccargs.len == 0) {
		bprintf(&b, "%s %s", defaultcc, defaultcflags);
		splitfields(&gccargs, bstr(&b));
		for(i=0; i<nelem(proto_gccargs); i++)
			vadd(&gccargs, proto_gccargs[i]);
		if(defaultcflags[0] == '\0') {
			for(i=0; i<nelem(proto_gccargs2); i++)
				vadd(&gccargs, proto_gccargs2[i]);
		}
		if(contains(gccargs.p[0], "clang")) {
			// disable ASCII art in clang errors, if possible
			vadd(&gccargs, "-fno-caret-diagnostics");
			// clang is too smart about unused command-line arguments
			vadd(&gccargs, "-Qunused-arguments");
		}
		// disable word wrapping in error messages
		vadd(&gccargs, "-fmessage-length=0");
		if(streq(gohostos, "darwin")) {
			// golang.org/issue/5261
			vadd(&gccargs, "-mmacosx-version-min=10.6");
		}
	}
	if(ldargs.len == 0 && defaultldflags[0] != '\0') {
		bprintf(&b, "%s", defaultldflags);
		splitfields(&ldargs, bstr(&b));
	}

	islib = hasprefix(dir, "lib") || streq(dir, "cmd/cc") || streq(dir, "cmd/gc");
	ispkg = hasprefix(dir, "pkg");
	isgo = ispkg || streq(dir, "cmd/go") || streq(dir, "cmd/cgo");

	exe = "";
	if(streq(gohostos, "windows"))
		exe = ".exe";

	// Start final link command line.
	// Note: code below knows that link.p[targ] is the target.
	ispackcmd = 0;
	if(islib) {
		// C library.
		vadd(&link, "ar");
		if(streq(gohostos, "plan9"))
			vadd(&link, "rc");
		else
			vadd(&link, "rsc");
		prefix = "";
		if(!hasprefix(name, "lib"))
			prefix = "lib";
		targ = link.len;
		vadd(&link, bpathf(&b, "%s/pkg/obj/%s_%s/%s%s.a", goroot, gohostos, gohostarch, prefix, name));
	} else if(ispkg) {
		// Go library (package).
		ispackcmd = 1;
		vadd(&link, "pack"); // program name - unused here, but all the other cases record one
		p = bprintf(&b, "%s/pkg/%s_%s/%s", goroot, goos, goarch, dir+4);
		*xstrrchr(p, '/') = '\0';
		xmkdirall(p);
		targ = link.len;
		vadd(&link, bpathf(&b, "%s/pkg/%s_%s/%s.a", goroot, goos, goarch, dir+4));
	} else if(streq(dir, "cmd/go") || streq(dir, "cmd/cgo")) {
		// Go command.
		vadd(&link, bpathf(&b, "%s/%sl", tooldir, gochar));
		vadd(&link, "-o");
		elem = name;
		if(streq(elem, "go"))
			elem = "go_bootstrap";
		targ = link.len;
		vadd(&link, bpathf(&b, "%s/%s%s", tooldir, elem, exe));
	} else {
		// C command. Use gccargs and ldargs.
		if(streq(gohostos, "plan9")) {
			vadd(&link, bprintf(&b, "%sl", gohostchar));
			vadd(&link, "-o");
			targ = link.len;
			vadd(&link, bpathf(&b, "%s/%s", tooldir, name));
		} else {
			vcopy(&link, gccargs.p, gccargs.len);
			vcopy(&link, ldargs.p, ldargs.len);
			if(sflag)
				vadd(&link, "-static");
			vadd(&link, "-o");
			targ = link.len;
			vadd(&link, bpathf(&b, "%s/%s%s", tooldir, name, exe));
			if(streq(gohostarch, "amd64"))
				vadd(&link, "-m64");
			else if(streq(gohostarch, "386"))
				vadd(&link, "-m32");
		}
	}
	ttarg = mtime(link.p[targ]);

	// Gather files that are sources for this target.
	// Everything in that directory, and any target-specific
	// additions.
	xreaddir(&files, bstr(&path));

	// Remove files beginning with . or _,
	// which are likely to be editor temporary files.
	// This is the same heuristic build.ScanDir uses.
	// There do exist real C files beginning with _,
	// so limit that check to just Go files.
	n = 0;
	for(i=0; i<files.len; i++) {
		p = files.p[i];
		if(hasprefix(p, ".") || (hasprefix(p, "_") && hassuffix(p, ".go")))
			xfree(p);
		else
			files.p[n++] = p;
	}
	files.len = n;

	for(i=0; i<nelem(deptab); i++) {
		if(streq(dir, deptab[i].prefix) ||
		   (hassuffix(deptab[i].prefix, "/") && hasprefix(dir, deptab[i].prefix))) {
			for(j=0; (p=deptab[i].dep[j])!=nil; j++) {
				breset(&b1);
				bwritestr(&b1, p);
				bsubst(&b1, "$GOROOT", goroot);
				bsubst(&b1, "$GOOS", goos);
				bsubst(&b1, "$GOARCH", goarch);
				p = bstr(&b1);
				if(hassuffix(p, ".a")) {
					vadd(&lib, bpathf(&b, "%s", p));
					continue;
				}
				if(hassuffix(p, "/*")) {
					bpathf(&b, "%s/%s", bstr(&path), p);
					b.len -= 2;
					xreaddir(&extra, bstr(&b));
					bprintf(&b, "%s", p);
					b.len -= 2;
					for(k=0; k<extra.len; k++)
						vadd(&files, bpathf(&b1, "%s/%s", bstr(&b), extra.p[k]));
					continue;
				}
				if(hasprefix(p, "-")) {
					p++;
					n = 0;
					for(k=0; k<files.len; k++) {
						if(hasprefix(files.p[k], p))
							xfree(files.p[k]);
						else
							files.p[n++] = files.p[k];
					}
					files.len = n;
					continue;
				}
				vadd(&files, p);
			}
		}
	}
	vuniq(&files);

	// Convert to absolute paths.
	for(i=0; i<files.len; i++) {
		if(!isabs(files.p[i])) {
			bpathf(&b, "%s/%s", bstr(&path), files.p[i]);
			xfree(files.p[i]);
			files.p[i] = btake(&b);
		}
	}

	// Is the target up-to-date?
	stale = rebuildall;
	n = 0;
	for(i=0; i<files.len; i++) {
		p = files.p[i];
		for(j=0; j<nelem(depsuffix); j++)
			if(hassuffix(p, depsuffix[j]))
				goto ok;
		xfree(files.p[i]);
		continue;
	ok:
		t = mtime(p);
		if(t != 0 && !hassuffix(p, ".a") && !shouldbuild(p, dir)) {
			xfree(files.p[i]);
			continue;
		}
		if(hassuffix(p, ".go"))
			vadd(&go, p);
		if(t > ttarg)
			stale = 1;
		if(t == 0) {
			vadd(&missing, p);
			files.p[n++] = files.p[i];
			continue;
		}
		files.p[n++] = files.p[i];
	}
	files.len = n;

	// If there are no files to compile, we're done.
	if(files.len == 0)
		goto out;
	
	for(i=0; i<lib.len && !stale; i++)
		if(mtime(lib.p[i]) > ttarg)
			stale = 1;

	if(!stale)
		goto out;

	// For package runtime, copy some files into the work space.
	if(streq(dir, "pkg/runtime")) {
		copy(bpathf(&b, "%s/arch_GOARCH.h", workdir),
			bpathf(&b1, "%s/arch_%s.h", bstr(&path), goarch), 0);
		copy(bpathf(&b, "%s/defs_GOOS_GOARCH.h", workdir),
			bpathf(&b1, "%s/defs_%s_%s.h", bstr(&path), goos, goarch), 0);
		p = bpathf(&b1, "%s/signal_%s_%s.h", bstr(&path), goos, goarch);
		if(isfile(p))
			copy(bpathf(&b, "%s/signal_GOOS_GOARCH.h", workdir), p, 0);
		copy(bpathf(&b, "%s/os_GOOS.h", workdir),
			bpathf(&b1, "%s/os_%s.h", bstr(&path), goos), 0);
		copy(bpathf(&b, "%s/signals_GOOS.h", workdir),
			bpathf(&b1, "%s/signals_%s.h", bstr(&path), goos), 0);
	}

	// Generate any missing files; regenerate existing ones.
	for(i=0; i<files.len; i++) {
		p = files.p[i];
		elem = lastelem(p);
		for(j=0; j<nelem(gentab); j++) {
			if(gentab[j].gen == nil)
				continue;
			if(hasprefix(elem, gentab[j].nameprefix)) {
				if(vflag > 1)
					errprintf("generate %s\n", p);
				gentab[j].gen(bstr(&path), p);
				// Do not add generated file to clean list.
				// In pkg/runtime, we want to be able to
				// build the package with the go tool,
				// and it assumes these generated files already
				// exist (it does not know how to build them).
				// The 'clean' command can remove
				// the generated files.
				goto built;
			}
		}
		// Did not rebuild p.
		if(find(p, missing.p, missing.len) >= 0)
			fatal("missing file %s", p);
	built:;
	}

	// One more copy for package runtime.
	// The last batch was required for the generators.
	// This one is generated.
	if(streq(dir, "pkg/runtime")) {
		copy(bpathf(&b, "%s/zasm_GOOS_GOARCH.h", workdir),
			bpathf(&b1, "%s/zasm_%s_%s.h", bstr(&path), goos, goarch), 0);
	}

	// Generate .c files from .goc files.
	if(streq(dir, "pkg/runtime")) {
		for(i=0; i<files.len; i++) {
			p = files.p[i];
			if(!hassuffix(p, ".goc"))
				continue;
			// b = path/zp but with _goos_goarch.c instead of .goc
			bprintf(&b, "%s%sz%s", bstr(&path), slash, lastelem(p));
			b.len -= 4;
			bwritef(&b, "_%s_%s.c", goos, goarch);
			goc2c(p, bstr(&b));
			vadd(&files, bstr(&b));
		}
		vuniq(&files);
	}

	if((!streq(goos, gohostos) || !streq(goarch, gohostarch)) && isgo) {
		// We've generated the right files; the go command can do the build.
		if(vflag > 1)
			errprintf("skip build for cross-compile %s\n", dir);
		goto nobuild;
	}

	// Compile the files.
	for(i=0; i<files.len; i++) {
		if(!hassuffix(files.p[i], ".c") && !hassuffix(files.p[i], ".s"))
			continue;
		name = lastelem(files.p[i]);

		vreset(&compile);
		if(!isgo) {
			// C library or tool.
			if(streq(gohostos, "plan9")) {
				vadd(&compile, bprintf(&b, "%sc", gohostchar));
				vadd(&compile, "-FTVwp");
				vadd(&compile, "-DPLAN9");
				vadd(&compile, "-D__STDC__=1");
				vadd(&compile, "-D__SIZE_TYPE__=ulong"); // for GNU Bison
				vadd(&compile, bpathf(&b, "-I%s/include/plan9", goroot));
				vadd(&compile, bpathf(&b, "-I%s/include/plan9/%s", goroot, gohostarch));
			} else {
				vcopy(&compile, gccargs.p, gccargs.len);
				vadd(&compile, "-c");
				if(streq(gohostarch, "amd64"))
					vadd(&compile, "-m64");
				else if(streq(gohostarch, "386"))
					vadd(&compile, "-m32");
	
				vadd(&compile, "-I");
				vadd(&compile, bpathf(&b, "%s/include", goroot));
			}

			if(streq(dir, "lib9"))
				vadd(&compile, "-DPLAN9PORT");


			vadd(&compile, "-I");
			vadd(&compile, bstr(&path));

			// lib9/goos.c gets the default constants hard-coded.
			if(streq(name, "goos.c")) {
				vadd(&compile, "-D");
				vadd(&compile, bprintf(&b, "GOOS=\"%s\"", goos));
				vadd(&compile, "-D");
				vadd(&compile, bprintf(&b, "GOARCH=\"%s\"", goarch));
				bprintf(&b1, "%s", goroot_final);
				bsubst(&b1, "\\", "\\\\");  // turn into C string
				vadd(&compile, "-D");
				vadd(&compile, bprintf(&b, "GOROOT=\"%s\"", bstr(&b1)));
				vadd(&compile, "-D");
				vadd(&compile, bprintf(&b, "GOVERSION=\"%s\"", goversion));
				vadd(&compile, "-D");
				vadd(&compile, bprintf(&b, "GOARM=\"%s\"", goarm));
				vadd(&compile, "-D");
				vadd(&compile, bprintf(&b, "GO386=\"%s\"", go386));
				vadd(&compile, "-D");
				vadd(&compile, bprintf(&b, "GO_EXTLINK_ENABLED=\"%s\"", goextlinkenabled));
			}

			// gc/lex.c records the GOEXPERIMENT setting used during the build.
			if(streq(name, "lex.c")) {
				xgetenv(&b, "GOEXPERIMENT");
				vadd(&compile, "-D");
				vadd(&compile, bprintf(&b1, "GOEXPERIMENT=\"%s\"", bstr(&b)));
			}
		} else {
			// Supporting files for a Go package.
			if(hassuffix(files.p[i], ".s"))
				vadd(&compile, bpathf(&b, "%s/%sa", tooldir, gochar));
			else {
				vadd(&compile, bpathf(&b, "%s/%sc", tooldir, gochar));
				vadd(&compile, "-F");
				vadd(&compile, "-V");
				vadd(&compile, "-w");
			}
			vadd(&compile, "-I");
			vadd(&compile, workdir);
			vadd(&compile, "-I");
			vadd(&compile, bprintf(&b, "%s/pkg/%s_%s", goroot, goos, goarch));
			vadd(&compile, "-D");
			vadd(&compile, bprintf(&b, "GOOS_%s", goos));
			vadd(&compile, "-D");
			vadd(&compile, bprintf(&b, "GOARCH_%s", goarch));
			vadd(&compile, "-D");
			vadd(&compile, bprintf(&b, "GOOS_GOARCH_%s_%s", goos, goarch));
		}

		bpathf(&b, "%s/%s", workdir, lastelem(files.p[i]));
		doclean = 1;
		if(!isgo && streq(gohostos, "darwin")) {
			// To debug C programs on OS X, it is not enough to say -ggdb
			// on the command line.  You have to leave the object files
			// lying around too.  Leave them in pkg/obj/, which does not
			// get removed when this tool exits.
			bpathf(&b1, "%s/pkg/obj/%s", goroot, dir);
			xmkdirall(bstr(&b1));
			bpathf(&b, "%s/%s", bstr(&b1), lastelem(files.p[i]));
			doclean = 0;
		}

		// Change the last character of the output file (which was c or s).
		if(streq(gohostos, "plan9"))
			b.p[b.len-1] = gohostchar[0];
		else
			b.p[b.len-1] = 'o';
		vadd(&compile, "-o");
		vadd(&compile, bstr(&b));
		vadd(&compile, files.p[i]);
		bgrunv(bstr(&path), CheckExit, &compile);

		vadd(&link, bstr(&b));
		if(doclean)
			vadd(&clean, bstr(&b));
	}
	bgwait();

	if(isgo) {
		// The last loop was compiling individual files.
		// Hand the Go files to the compiler en masse.
		vreset(&compile);
		vadd(&compile, bpathf(&b, "%s/%sg", tooldir, gochar));

		bpathf(&b, "%s/_go_.a", workdir);
		vadd(&compile, "-pack");
		vadd(&compile, "-o");
		vadd(&compile, bstr(&b));
		vadd(&clean, bstr(&b));
		if(!ispackcmd)
			vadd(&link, bstr(&b));

		vadd(&compile, "-p");
		if(hasprefix(dir, "pkg/"))
			vadd(&compile, dir+4);
		else
			vadd(&compile, "main");

		if(streq(dir, "pkg/runtime"))
			vadd(&compile, "-+");

		vcopy(&compile, go.p, go.len);

		runv(nil, bstr(&path), CheckExit, &compile);

		if(ispackcmd) {
			xremove(link.p[targ]);
			dopack(link.p[targ], bstr(&b), &link.p[targ+1], link.len - (targ+1));
			goto nobuild;
		}
	}

	if(!islib && !isgo) {
		// C binaries need the libraries explicitly, and -lm.
		vcopy(&link, lib.p, lib.len);
		if(!streq(gohostos, "plan9"))
			vadd(&link, "-lm");
	}

	// Remove target before writing it.
	xremove(link.p[targ]);

	runv(nil, nil, CheckExit, &link);

nobuild:
	// In package runtime, we install runtime.h and cgocall.h too,
	// for use by cgo compilation.
	if(streq(dir, "pkg/runtime")) {
		copy(bpathf(&b, "%s/pkg/%s_%s/cgocall.h", goroot, goos, goarch),
			bpathf(&b1, "%s/src/pkg/runtime/cgocall.h", goroot), 0);
		copy(bpathf(&b, "%s/pkg/%s_%s/runtime.h", goroot, goos, goarch),
			bpathf(&b1, "%s/src/pkg/runtime/runtime.h", goroot), 0);
	}


out:
	for(i=0; i<clean.len; i++)
		xremove(clean.p[i]);

	bfree(&b);
	bfree(&b1);
	bfree(&path);
	vfree(&compile);
	vfree(&files);
	vfree(&link);
	vfree(&go);
	vfree(&missing);
	vfree(&clean);
	vfree(&lib);
	vfree(&extra);
}
Example #28
0
int main(int argc, char **argv)
{
    struct timeval tstart, tstop;
    unsigned long int meas[NB_MEASURE];
    unsigned long int meas2[NB_MEASURE];
    unsigned long int duration = 0;
    unsigned long int var = 0;
    unsigned int size = 1;
    int verbose = 0;
    int i = 0;
    int c;
    int prefillingcache = 0;
    int coef = 1;
    int nbopt = 0;
int flops = 0;

    while ((c = getopt(argc, argv, "cf")) != -1)
	switch (c) {
	case 'c':
	    prefillingcache = 1;
	    nbopt++;
	    break;
	case 'f':
	flops = 1;
	nbopt++;
	break;
	default:
	    abort();
	}

    if (2 <= argc) {
	int pos = nbopt + 1;

	if (NULL != strcasestr(argv[pos], "kB"))
	    coef = 1024;
	else if (NULL != strcasestr(argv[pos], "MB"))
	    coef = 1024 * 1024;
	else if (NULL != strcasestr(argv[pos], "GB"))
	    coef = 1024 * 1024 * 1024;

	size *= coef * atoi(argv[pos]);
    }
    if (1 == coef) {
	size *= sizeof(float);
    }
    //Setup kernel arguments
    float *in = (float *) malloc(size);
    float *out = (float *) malloc(size);
    memset(out, 0, size);
    memset(in, 0, size);
    for (i = 0; i < (size / sizeof(float)); i++)
	in[i] = i;

    // Mesure NBR_COPY copy vector
    //gettimeofday(&tstart, NULL);
    //for (int i = 0; i < LOOPS; i++) {
    SNK_INIT_LPARM(lparm, size / sizeof(float));
    //Fill Caches
    if (prefillingcache) {
	printf("Pre-filling cache option SET\n");
	for (i = 0; i < 64; i++) {
		if(flops) flops_3(in, out, lparm)
		else vcopy(in, out, lparm);
	}
    }

    for (i = 0; i < NB_MEASURE; i++) {
	gettimeofday(&tstart, NULL);
	vcopy(in, out, lparm);
	gettimeofday(&tstop, NULL);
	meas[i] =
	    ((tstop.tv_sec - tstart.tv_sec) * 1000000L + tstop.tv_usec) -
	    tstart.tv_usec;
	//meas2[i] = meas[i] * meas[i];
    }
    for (i = 0; i < NB_MEASURE; i++) {
	duration += meas[i];
	//var += meas2[i];
    }
    duration /= NB_MEASURE;
    for (i = 0; i < NB_MEASURE; i++) {
	var += ((meas[i] - duration) * (meas[i] - duration));
    }
    var /= NB_MEASURE;
    //var -= duration * duration;

	if(flops){
	printf
                ("HSA: Vector of %lu integer of %d-bytes = %lu Bytes takes %lu usec [+/-var %lu] => Speed = %.3f\n",
                (size / sizeof(float)), (int) sizeof(float), size, duration, var, (float)(3.0 * size/sizeof(float) / duration * 1000000));
	}
	else{
    	printf
		("HSA: Vector of %lu integer of %d-bytes = %lu Bytes takes %lu usec [+/-var %lu]\n",
	 	(size / sizeof(float)), (int) sizeof(float), size, duration, var);
	}
    //Validate
    bool valid = true;
    int failIndex = 0;
    for (i = 0; i < (size / sizeof(float)); i++) {
	if (verbose && i < 10)
	    printf("in[%d]=%d, out[%d]=%d, ", i, in[i], i, out[i]);
	if (out[i] != in[i]) {
	    failIndex = i;
	    valid = false;
	    break;
	}
    }
    if (valid) {
	if (verbose)
	    printf("passed validation\n");
    } else
	printf("VALIDATION FAILED!\nBad index: %d\n", failIndex);
    free(in);
    free(out);
    return 0;
}
Example #29
0
int main(int argc, char* argv[])
{
    double t1, t2, t3, t4, t5;
    double sum1, sum2, sum3, sum4;
    int arg = 1, len = 0, iters = 0, verb = 0, run = 1;
    int do_vcopy = 1, do_vadd = 1, do_vjacobi = 1;
    while(argc>arg) {
        if      (strcmp(argv[arg],"-v")==0)  verb++;
        else if (strcmp(argv[arg],"-vv")==0) verb+=2;
        else if (strcmp(argv[arg],"-n")==0)  run = 0;
        else if (strcmp(argv[arg],"-c")==0)  do_vadd = 0,  do_vjacobi = 0;
        else if (strcmp(argv[arg],"-a")==0)  do_vcopy = 0, do_vjacobi = 0;
        else if (strcmp(argv[arg],"-j")==0)  do_vcopy = 0, do_vadd = 0;
        else
            break;
        arg++;
    }
    if (argc>arg) { len   = atoi(argv[arg]); arg++; }
    if (argc>arg) { iters = atoi(argv[arg]); arg++; }
    if (len == 0) len = 10000;
    if (iters == 0) iters = 20;
    len = len * 1000;

    printf("Alloc/init 3 double arrays of length %d ...\n", len);
    double* a = (double*) malloc(len * sizeof(double));
    double* b = (double*) malloc(len * sizeof(double));
    double* c = (double*) malloc(len * sizeof(double));
    for(int i = 0; i<len; i++) {
        a[i] = 1.0;
        b[i] = (double) (i % 20);
        c[i] = 3.0;
    }

    // Generate vectorized variants & run against naive/original

#if __AVX__
    bool do32 = true;
#else
    bool do32 = false;
#endif

    // vcopy

    if (do_vcopy) {
        vcopy_t vcopy16, vcopy32;

        Rewriter* rc16 = dbrew_new();
        if (verb>1) dbrew_verbose(rc16, true, true, true);
        dbrew_set_function(rc16, (uint64_t) vcopy);
        dbrew_config_parcount(rc16, 3);
        dbrew_config_force_unknown(rc16, 0);
        dbrew_set_vectorsize(rc16, 16);
        vcopy16 = (vcopy_t) dbrew_rewrite(rc16, a, b, len);
        if (verb) decode_func(rc16, "vcopy16");

        if (do32) {
            Rewriter* rc32 = dbrew_new();
            if (verb>1) dbrew_verbose(rc32, true, true, true);
            dbrew_set_function(rc32, (uint64_t) vcopy);
            dbrew_config_parcount(rc32, 3);
            dbrew_config_force_unknown(rc32, 0);
            dbrew_set_vectorsize(rc32, 32);
            vcopy32 = (vcopy_t) dbrew_rewrite(rc32, a, b, len);
            if (verb) decode_func(rc32, "vcopy32");
        }

        printf("Running %d iterations of vcopy ...\n", iters);
        t1 = wtime();
        for(int iter = 0; iter < iters; iter++)
            naive_vcopy(a, b, len);
        t2 = wtime();
        for(int iter = 0; iter < iters; iter++)
            vcopy(a, b, len);
        t3 = wtime();
        if (run)
            for(int iter = 0; iter < iters; iter++)
                vcopy16(a, b, len);
        t4 = wtime();
        if (do32 && run)
            for(int iter = 0; iter < iters; iter++)
                vcopy32(a, b, len);
        t5 = wtime();
        printf("  naive: %.3f s, un-rewritten: %.3f s, rewritten-16: %.3f s",
               t2-t1, t3-t2, t4-t3);
        if (do32)
            printf(", rewritten-32: %.3f s", t5-t4);
        printf("\n");
    }


    // vadd

    if (do_vadd) {
        vadd_t vadd16, vadd32;

        Rewriter* ra16 = dbrew_new();
        if (verb>1) dbrew_verbose(ra16, true, true, true);
        dbrew_set_function(ra16, (uint64_t) vadd);
        dbrew_config_parcount(ra16, 4);
        dbrew_config_force_unknown(ra16, 0);
        dbrew_set_vectorsize(ra16, 16);
        vadd16 = (vadd_t) dbrew_rewrite(ra16, a, b, c, len);
        if (verb) decode_func(ra16, "vadd16");

        if (do32) {
            Rewriter* ra32 = dbrew_new();
            if (verb>1) dbrew_verbose(ra32, true, true, true);
            dbrew_set_function(ra32, (uint64_t) vadd);
            dbrew_config_parcount(ra32, 4);
            dbrew_config_force_unknown(ra32, 0);
            dbrew_set_vectorsize(ra32, 32);
            vadd32 = (vadd_t) dbrew_rewrite(ra32, a, b, c, len);
            if (verb) decode_func(ra32, "vadd32");
        }

        sum1 = 0.0, sum2 = 0.0, sum3 = 0.0, sum4 = 0.0;
        printf("Running %d iterations of vadd ...\n", iters);
        t1 = wtime();
        for(int iter = 0; iter < iters; iter++)
            naive_vadd(a, b, c, len);
        for(int i = 0; i < len; i++) sum1 += a[i];
        t2 = wtime();
        for(int iter = 0; iter < iters; iter++)
            vadd(a, b, c, len);
        for(int i = 0; i < len; i++) sum2 += a[i];
        t3 = wtime();
        if (run)
            for(int iter = 0; iter < iters; iter++)
                vadd16(a, b, c, len);
        for(int i = 0; i < len; i++) sum3 += a[i];
        t4 = wtime();
        if (do32 && run)
            for(int iter = 0; iter < iters; iter++)
                vadd32(a, b, c, len);
        for(int i = 0; i < len; i++) sum4 += a[i];
        t5 = wtime();

        printf("  naive: %.3f s, un-rewritten: %.3f s, rewritten-16: %.3f s",
               t2-t1, t3-t2, t4-t3);
        if (do32)
            printf(", rewritten-32: %.3f s", t5-t4);
        printf("\n");
        printf("  sum naive: %f, sum rewritten-16: %f, sum rewritten-16: %f\n",
               sum1, sum3, sum4);
    }


    // vjacobi_1d

    if (do_vjacobi) {
        vcopy_t vjacobi_1d16, vjacobi_1d32;

        Rewriter* rj16 = dbrew_new();
        if (verb>1) dbrew_verbose(rj16, true, true, true);
        dbrew_set_function(rj16, (uint64_t) vjacobi_1d);
        dbrew_config_parcount(rj16, 3);
        dbrew_config_force_unknown(rj16, 0);
        dbrew_set_vectorsize(rj16, 16);
        vjacobi_1d16 = (vcopy_t) dbrew_rewrite(rj16, a, b, len);
        if (verb) decode_func(rj16, "vjacobi_1d16");

        if (do32) {
            Rewriter* rj32 = dbrew_new();
            if (verb>1) dbrew_verbose(rj32, true, true, true);
            dbrew_set_function(rj32, (uint64_t) vjacobi_1d);
            dbrew_config_parcount(rj32, 3);
            dbrew_config_force_unknown(rj32, 0);
            dbrew_set_vectorsize(rj32, 32);
            vjacobi_1d32 = (vcopy_t) dbrew_rewrite(rj32, a, b, len);
            if (verb) decode_func(rj32, "vjacobi_1d32");
        }

        sum1 = 0.0, sum2 = 0.0, sum3 = 0.0, sum4 = 0.0;
        printf("Running %d iterations of vjacobi_1d ...\n", iters);
        t1 = wtime();
        for(int iter = 0; iter < iters; iter++)
            naive_vjacobi_1d(a+1, b+1, len-2);
        for(int i = 0; i < len; i++) sum1 += a[i];
        t2 = wtime();
        for(int iter = 0; iter < iters; iter++)
            vjacobi_1d(a+1, b+1, len-2);
        for(int i = 0; i < len; i++) sum2 += a[i];
        t3 = wtime();
        if (run)
            for(int iter = 0; iter < iters; iter++)
                vjacobi_1d16(a+1, b+1, len-2);
        for(int i = 0; i < len; i++) sum3 += a[i];
        t4 = wtime();
        if (do32 && run)
            for(int iter = 0; iter < iters; iter++)
                vjacobi_1d32(a+1, b+1, len-2);
        for(int i = 0; i < len; i++) sum4 += a[i];
        t5 = wtime();
        printf("  naive: %.3f s, un-rewritten: %.3f s, rewritten-16: %.3f s",
               t2-t1, t3-t2, t4-t3);
        if (do32)
            printf(", rewritten-32: %.3f s", t5-t4);
        printf("\n");
        printf("  sum naive: %f, sum rewritten-16: %f, sum rewritten-16: %f\n",
               sum1, sum3, sum4);
    }
}
Example #30
0
/* Routines that handle the eigensolver.
 *
 * Linear stability analysis
 * Solve J z = t M z
 *
 * where 
 *
 * input:
 * J = jacobian matrix 
 * M = mass or overlap matrix
 *
 * output:
 * z = eigenvectors
 * t = eigenvalues
 *
 * Friendly warning:
 * Do not edit this unless you know what you are doing!!
 *
 * Originally written by Ian Gates
 * pre-CVS modification history:
 *   - Sep 24, 1997, first checkin
 *   - Feb 98 -> Oct 98, another checkin
 *   - Jan 13, 2000, MMH rearranged and conformed to Goma style.
 */
void
eggrollwrap(int *istuff,	/* info for eigenvalue extraction */
	    dbl *dstuff,	/* info for eigenvalue extraction */
	    
	    int *ija,		/* column pointer array */
	    dbl *jac,		/* nonzero array */
	    dbl *mas,		/* nonzero array - same structure 
				   as jac[] (ija[]) */
	    
	    dbl *x,		/* Value of the solution vector */
	    char *ExoFileOut,	/* Name of exoII output file */
	    int prob_type,
	    dbl delta_t,	/* time step size */
	    dbl theta,		/* variable time integration parameter
				   explicit (theta = 1) to 
				   implicit (theta = 0) */
	    dbl *x_old,		/* Value of the old solution vector */
	    dbl *xdot,		/* Value of xdot predicted for new 
				   solution */
	    dbl *xdot_old,      /* dx/dt at previous time step */
	    dbl *resid_vector,
	    int *converged,	/* whether the Newton has converged */
	    int *nprint,	/* counter for time step number */
	    int tnv,		/* number of nodal results */
	    int tnv_post,	/* number of post processing results */
	    struct Results_Description *rd,
	    int *gindex,
	    int *p_gsize,
	    dbl	*gvec,
	    dbl time_value,
	    Exo_DB *exo,	/* ptr to finite element mesh db */
	    int Num_Proc,	/* number of processors used */
	    Dpi	*dpi)		/* ptr to distributed processing info */
{
  int 
    i, j, ic, 
    nj, nnz_j, 
    first_linear_solver_call, Factor_Flag, matr_form, 
    error, rcflag, action, 
    ev_n, ev_jac, 
    filter, 
    mm, max_itr,
    nev_want, nev_found, lead, 
    /*    read_form, soln_tech, push_mode, */
    push_mode, 
    init_shft, recycle;
  dbl
    stol, ivector,
    dwork[20]; 
  dbl 
    *ev_e, *ev_i, *ev_r, *ev_x,  
    *v1, *v2, 
    *mat, 
    **evect, **schur;
  char save_ExoFileOut[MAX_FNL];

  static int UMF_system_id;	/* Used to uniquely identify the
				 * explicit fill system to solve from
				 * the other UMF systems. */
  /* Initialize
   */
  ic = error = rcflag = action = 0;
  ev_jac = 0;
  matr_form = 1;

  /* Set values
   */
  mm         = istuff[0];
  nj         = istuff[1];
  nnz_j      = istuff[2];
  filter     = istuff[3];
  recycle    = istuff[4];
  nev_want   = istuff[6];
  init_shft  = istuff[7];
  max_itr    = istuff[8];
  push_mode  = istuff[9];
  stol       = dstuff[0];
  ivector    = dstuff[3];

  printf(" Initializing variables and allocating space... ");

  /* Allocate spectrum storage
   */
  ev_e = Dvector_birth(mm+5);
  ev_i = Dvector_birth(mm+5);
  ev_r = Dvector_birth(mm+5);
  ev_x = Dvector_birth(mm+5);

  /* Set initial (real) shifts
   */
  ev_n = init_shft;
  vcopy(init_shft, &ev_r[0], 1.0, &dstuff[10]);

  /* Allocate auxiliary work vectors
   */
  mat = Dvector_birth(nnz_j+5);

  /* Allocate eigenvectors and schur storage
   */
  i = nj+5;
  j = mm+5; 
  evect = Dmatrix_birth(j, i);
  schur = Dmatrix_birth(j, i);

  /* Allocate reverse communication vectors
   */
  v1 = Dvector_birth(nj+5);
  v2 = Dvector_birth(nj+5);

  /* Check for something that seems to make no difference if it's on,
   * except for occasionally causing seg faults... */
  if(recycle != 0)
    EH(-1, "Eigen recycle currently doesn't work, turn it off.");

  /* Set initial vector
   */
  vinit(nj, &v1[0], 0.5);

  /* GEVP solution
   */
  ic = 0;
  first_linear_solver_call = +1;
  do {
    ic++;
    /* printf("ic = %d\n", ic); fflush(stdout); */
    gevp_solver_rc(nj, mm, max_itr, stol, filter, &ev_n, &ev_r[0],
		   &ev_i[0], &ev_e[0], &ev_x[0], &lead, ev_jac,
		   nev_want, &nev_found, schur, evect, recycle,
		   ivector, 0, &rcflag, &action, &dwork[0], &v1[0],
		   &v2[0]);
    /* printf("action = %d\n", action); fflush(stdout); */
    switch (action)
      {
      case  0: /* All done */
	break;  
      case  1: /* v2 = J*v1 */
	MV_MSR(&nj, &ija[0], &jac[0], &v1[0], &v2[0]);
	break;  
      case  2: /* v2 = M*v1 */
	MV_MSR(&nj, &ija[0], &mas[0], &v1[0], &v2[0]);
	break;  
      case  3: /* inv(J-sM) */
	/* Shift matrix step */
	v2sum(nnz_j, &mat[0], 1.0, &jac[0], -dwork[0], &mas[0]);

	/* Invert step - get LU for later */
	if(first_linear_solver_call == 1)
	  {
	    Factor_Flag = -2;
	    UMF_system_id = -1;
	  }
	else
	  Factor_Flag = -1;

	/*
	printf("Calling SL_UMF, first_linear_solver_call = %d, Factor_Flag = %d\n",
	       first_linear_solver_call, Factor_Flag); fflush(stdout); 
	*/

	UMF_system_id = SL_UMF(UMF_system_id,
			       &first_linear_solver_call, 
			       &Factor_Flag, 
			       &matr_form, 
			       &nj, 
			       &nnz_j, 
			       &ija[0], 
			       &ija[0], 
			       &mat[0], 
			       &v1[0], 
			       &v2[0]);
	first_linear_solver_call = 0;
	break;
      case  4: /* v2 = inv(J-sM)*M*v1 */
	Factor_Flag = 3;
	if(first_linear_solver_call)
	  EH(-1, "Tried to transform eigenvectors before a solve!");
	gevp_transformation(UMF_system_id, first_linear_solver_call,
			    Factor_Flag, matr_form, 1, nj, nnz_j,
			    &ija[0], &jac[0], &mas[0], &mat[0],
			    /*			  soln_tech, */
			    &v2[0], &v1[0], dwork[0], dwork[1]);
	break;
      default:
	EH(-1, "Uh-oh!  I shouldn't be here!");
	break;
      } /* switch(action) */
    if (ic > 10000)
      error = 1;
  } while ((rcflag != 0) && (error == 0));

  /* Error check
   */
  if (error == 1)
    {
      puts(" E: Too many iterations.  Escape.  ");
      exit(-1);
    }

  /* De-allocate solver storage
   */  

  first_linear_solver_call = -1;

  /* MMH sez: If first_linear_solver_call == -1, then we want to
   * deallocate memory so we shouldn't be trying to solve anything!
   * This was FMR'ing b/c SL_UMF was being called with
   * first_linear_solver_call = -1, and Factor_Flag = 3.  Bad.
   */
  Factor_Flag = -3;

  UMF_system_id = SL_UMF(UMF_system_id,
			 &first_linear_solver_call, 
			 &Factor_Flag, 
			 &matr_form, 
			 &nj, 
			 &nnz_j, 
			 ija, 
			 ija, 
			 mat, 
			 &v1[0], 
			 &v2[0]);  
  
  /* Display results 
   */
  printf("\n-------------------------------------------------------------------------------\n");
  if(Linear_Stability == LSA_3D_OF_2D)
    printf("NORMAL MODE WAVE NUMBER = %g\n", LSA_3D_of_2D_wave_number);
  printf(" Eigensolver required %d iterations.\n",ic);
  printf(" Found %d converged eigenvalues.\n", nev_found);
  printf(" Leading Eigenvalue  = % 10.6e%+10.6e i RES = % 10.6e\n", 
	 ev_r[lead], ev_i[lead], ev_e[lead]);
  printf("    Real           Imag           RES\n");
  for (i=0;i<nev_found;i++)
    printf(" % 10.6e %+10.6e i % 10.6e\n", ev_r[i], ev_i[i], ev_e[i]);

  /* MMH: I know this is stupid, but the filename for the "regular"
   * Exodus output is a global variable!!!  It is required in
   * post_process_nodal().  I swap it out here, and will swap it back
   * when we're done with LSA.  Why don't I just overwrite it
   * completely you may ask?  Well, I don't know if and/or when the
   * code will continue to do something useful after LSA.  If it ever
   * does, then it would probably like to know what the correct output
   * filename is.  Kinda like camping: Leave with what you came in
   * with.  */
  strncpy(save_ExoFileOut, ExoFileOut, MAX_FNL-1);

  /* Write results to file (exoII format)
   */
  printf(" push_mode                          = %12d  \n", push_mode);
  if (push_mode > 0)
    {
      puts(" Writing modes to file ...");
      /* Write to exo file
       * Each mode is written as a "time step" solution into exoII DB
       */
      for(i = 0; i < push_mode; i++)
	{
	  printf("\t\t Mode %4d ...", i);
	  if(LSA_3D_of_2D_wave_number == -1.0)
	    sprintf(ExoFileOut, "LSA_%d_of_%d_%s", i + 1, push_mode,
		    save_ExoFileOut);
	  else
	    sprintf(ExoFileOut, "LSA_%d_of_%d_wn=%g_%s", i + 1, push_mode,
		    LSA_3D_of_2D_wave_number, save_ExoFileOut);

	  /* Replicate basic mesh info */
	  one_base(exo);
	  wr_mesh_exo(exo, ExoFileOut, 0);
	  wr_result_prelim_exo(rd, exo, ExoFileOut, NULL);
	  /* Update exo file for distributed problem info 
	   */
	  if (Num_Proc > 1) {
	    wr_dpi(dpi, ExoFileOut, 0);
	  }
	  for (j = 0; j < tnv; j++) {
	    extract_nodal_vec(&evect[i][0], rd->nvtype[j], rd->nvkind[j], 
			      rd->nvmatID[j], gvec, exo, FALSE, time_value);
	    wr_nodal_result_exo(exo, ExoFileOut, gvec, j+1, 1, 
				time_value);
	  }

	  /*
	   *  Add additional user-specified post processing variables 
	   */
	  if (tnv_post > 0) {
	    post_process_nodal(&evect[i][0], NULL, x_old, xdot, xdot_old,
			       resid_vector, 1, &time_value, delta_t, 0.0,
                               NULL, exo, dpi, rd, ExoFileOut);
	  }
	  zero_base(exo);
	  printf(" recorded.\n");
	}
    }
  /* MMH: See comments above. */
  strncpy(ExoFileOut, save_ExoFileOut, MAX_FNL);

  /* De-allocate work vectors
   */
  printf("Deallocating memory ... ");
  i = nj+5;
  j = mm+5;
  Dmatrix_death(schur, j, i);
  Dmatrix_death(evect, j, i);
  Dvector_death(&v2[0], nj+5);
  Dvector_death(&v1[0], nj+5);
  Dvector_death(&mat[0], nnz_j+5);
  Dvector_death(&ev_e[0], mm+5);
  Dvector_death(&ev_i[0], mm+5);
  Dvector_death(&ev_r[0], mm+5);
  Dvector_death(&ev_x[0], mm+5);
  printf("done.\n");
}