void add_quats(float q1[4], float q2[4], float dest[4]) { static int count=0; float t1[4], t2[4], t3[4]; float tf[4]; #if 0 printf("q1 = %f %f %f %f\n", q1[0], q1[1], q1[2], q1[3]); printf("q2 = %f %f %f %f\n", q2[0], q2[1], q2[2], q2[3]); #endif vcopy(q1,t1); vscale(t1,q2[3]); vcopy(q2,t2); vscale(t2,q1[3]); vcross(q2,q1,t3); vadd(t1,t2,tf); vadd(t3,tf,tf); tf[3] = q1[3] * q2[3] - vdot(q1,q2); #if 0 printf("tf = %f %f %f %f\n", tf[0], tf[1], tf[2], tf[3]); #endif dest[0] = tf[0]; dest[1] = tf[1]; dest[2] = tf[2]; dest[3] = tf[3]; if (++count > RENORMCOUNT) { count = 0; normalize_quat(dest); } }
/* Given an quaternion compute an axis and angle */ void quat_to_axis(double vec[3], double *phi, double quat[4]){ double scale; scale = quat[0]*quat[0] + quat[1]*quat[1] + quat[2]*quat[2]; if(scale == 0){ /* no rotation, we're stuffed */ vec[0] = 0; vec[1] = 0; vec[2] = 1; *phi = 0; } else{ vcopy(vec, quat); vscale(vec, 1.0/scale); vnormal(vec); *phi = 2.0*acos(quat[3]); } }
// viscosity void Fluid2::fluidViscosity( const float dt ) { if( Scene::testcase >= Scene::SMOKE ) { Array2< float > ucopy( velocityX ); Array2< float > vcopy( velocityY ); const Index2& sizeu = velocityX.getSize(); const Index2& sizev = velocityY.getSize(); const Vec2 dx = grid.getCellDx(); const Vec2 invDxSq( 1.0f / ( dx.x * dx.x ), 1.0f / ( dx.y * dx.y ) ); const float dtMuOverRho = dt * Scene::kViscosity / Scene::kDensity; for( unsigned int i = 0; i < sizeu.x; ++i ) for( unsigned int j = 0; j < sizeu.y; ++j ) { const Index2 id( i, j ); const Index2 id1( clamp( i-1, 0, sizeu.x-1 ), j ); const Index2 id2( clamp( i+1, 0, sizeu.x-1 ), j ); const Index2 id3( i , clamp( j-1, 0, sizeu.y-1 ) ); const Index2 id4( i , clamp( j+1, 0, sizeu.y-1 ) ); velocityX[ id ] += dtMuOverRho * ( ( ucopy[ id1 ] - 2.0f * ucopy[ id ] + ucopy[ id2 ] ) * invDxSq.x + ( ucopy[ id3 ] - 2.0f * ucopy[ id ] + ucopy[ id4 ] ) * invDxSq.y ); } for( unsigned int i = 0; i < sizev.x; ++i ) for( unsigned int j = 0; j < sizev.y; ++j ) { const Index2 id( i, j ); const Index2 id1( clamp( i-1, 0, sizev.x-1 ), j ); const Index2 id2( clamp( i+1, 0, sizev.x-1 ), j ); const Index2 id3( i , clamp( j-1, 0, sizev.y-1 ) ); const Index2 id4( i , clamp( j+1, 0, sizev.y-1 ) ); velocityY[ id ] += dtMuOverRho * ( ( vcopy[ id1 ] - 2.0f * vcopy[ id ] + vcopy[ id2 ] ) * invDxSq.x + ( vcopy[ id3 ] - 2.0f * vcopy[ id ] + vcopy[ id4 ] ) * invDxSq.y ); } } }
void vOut_next_a(IOUnit *unit, int inNumSamples) { //Print("Out_next_a %d\n", unit->mNumInputs); World *world = unit->mWorld; int bufLength = world->mBufLength; int numChannels = unit->mNumInputs - 1; float fbusChannel = ZIN0(0); if (fbusChannel != unit->m_fbusChannel) { unit->m_fbusChannel = fbusChannel; int busChannel = (int)fbusChannel; int lastChannel = busChannel + numChannels; if (!(busChannel < 0 || lastChannel > (int)world->mNumAudioBusChannels)) { unit->m_bus = world->mAudioBus + (busChannel * bufLength); unit->m_busTouched = world->mAudioBusTouched + busChannel; } } float *out = unit->m_bus; int32 *touched = unit->m_busTouched; int32 bufCounter = unit->mWorld->mBufCounter; for (int i=0; i<numChannels; ++i, out+=bufLength) { ACQUIRE_BUS_AUDIO((int32)fbusChannel + i); float *in = IN(i+1); if (touched[i] == bufCounter) { vadd(out, out, in, inNumSamples); } else { vcopy(out, in, inNumSamples); touched[i] = bufCounter; } //Print("out %d %g %g\n", i, in[0], out[0]); RELEASE_BUS_AUDIO((int32)fbusChannel + i); } }
void vIn_next_a(IOUnit *unit, int inNumSamples) { World *world = unit->mWorld; int bufLength = world->mBufLength; int numChannels = unit->mNumOutputs; float fbusChannel = ZIN0(0); if (fbusChannel != unit->m_fbusChannel) { unit->m_fbusChannel = fbusChannel; int busChannel = (uint32)fbusChannel; int lastChannel = busChannel + numChannels; if (!(busChannel < 0 || lastChannel > (int)world->mNumAudioBusChannels)) { unit->m_bus = world->mAudioBus + (busChannel * bufLength); unit->m_busTouched = world->mAudioBusTouched + busChannel; } } float *in = unit->m_bus; int32 *touched = unit->m_busTouched; int32 bufCounter = unit->mWorld->mBufCounter; for (int i=0; i<numChannels; ++i, in += bufLength) { ACQUIRE_BUS_AUDIO_SHARED((int32)fbusChannel + i); float *out = OUT(i); if (touched[i] == bufCounter) { vcopy(out, in, inNumSamples); } else { vfill(out, 0.f, inNumSamples); } RELEASE_BUS_AUDIO_SHARED((int32)fbusChannel + i); } }
void rcMarkConvexPolyArea(const float* verts, const int nverts, const float hmin, const float hmax, unsigned char areaId, rcCompactHeightfield& chf) { float bmin[3], bmax[3]; vcopy(bmin, verts); vcopy(bmax, verts); for (int i = 1; i < nverts; ++i) { vmin(bmin, &verts[i*3]); vmax(bmax, &verts[i*3]); } bmin[1] = hmin; bmax[1] = hmax; int minx = (int)((bmin[0]-chf.bmin[0])/chf.cs); int miny = (int)((bmin[1]-chf.bmin[1])/chf.ch); int minz = (int)((bmin[2]-chf.bmin[2])/chf.cs); int maxx = (int)((bmax[0]-chf.bmin[0])/chf.cs); int maxy = (int)((bmax[1]-chf.bmin[1])/chf.ch); int maxz = (int)((bmax[2]-chf.bmin[2])/chf.cs); if (maxx < 0) return; if (minx >= chf.width) return; if (maxz < 0) return; if (minz >= chf.height) return; if (minx < 0) minx = 0; if (maxx >= chf.width) maxx = chf.width-1; if (minz < 0) minz = 0; if (maxz >= chf.height) maxz = chf.height-1; // TODO: Optimize. for (int z = minz; z <= maxz; ++z) { for (int x = minx; x <= maxx; ++x) { const rcCompactCell& c = chf.cells[x+z*chf.width]; for (int i = (int)c.index, ni = (int)(c.index+c.count); i < ni; ++i) { rcCompactSpan& s = chf.spans[i]; if ((int)s.y >= miny && (int)s.y <= maxy) { if (areaId < chf.areas[i]) { float p[3]; p[0] = chf.bmin[0] + (x+0.5f)*chf.cs; p[1] = 0; p[2] = chf.bmin[2] + (z+0.5f)*chf.cs; if (pointInPoly(nverts, verts, p)) { chf.areas[i] = areaId; } } } } } } }
/******************************************************************* Subroutine to do the EM algorithm matrix *D: the pointer to the matrix data matrix *mean0_x: the pointer to a matrix containing the initial Means of clusters vector *w0: the pointer to a vector containing the initial mixing proportion of clusters double vv: the value for initializing the Covariance matrix of clusters double error: the error threshold vector *Zjk_up: the pointer to a vector containing Posterior probabilities of the up-level cluster samples matrix *mean1_x: the pointer to a matrix containing the Means of clusters in t-space vector *w0_t: the pointer to a vector containing the mixing proportions of the identified clusters in t-space matrix *cov_mat: the pointer to a group of matrixs containing the Covariance matrix of clusters in t-space matrix *Zjk: the pointer to a matrix containing Posterior probabilities of all samples belonging to all the sub-level clusters, each column is for one cluster. return value: '1' - successfully exit '0' - exit with waring/error *******************************************************************/ int veSubEM(matrix *D, matrix *mean0_x, vector *w0, double vv, double error, vector *Zjk_up, //input matrix *mean1_x, vector *w0_t, matrix *cov_mat, matrix *Zjk) //output { int k0, kc, n, p; int i, j, k, u, s; matrix *Var0; matrix Gxn; vector Fx; matrix MUK; matrix MU1; int zeroFx_num = 1; //double error = 0.01; double err = error + (double)1; vector Zjk_temp; n = D->m; p = D->n; k0 = mean0_x->m; kc = mean0_x->n; Var0 = new matrix[k0]; for(i=0; i<k0; i++) { mnew(Var0+i, p, p); } mnew(&Gxn, n, k0); vnew(&Fx, n); vnew(&Zjk_temp, n); mnew(&MUK, k0, p); mcopy(mean0_x, &MUK); mnew(&MU1, k0, p); vector D_j; vector Zjk_k; double sum_tmp = 0; matrix Ck; vector D_i; vector MUK_k; vector cen_D_i; matrix mtmp; vector vtmp; vnew(&D_j, n); vnew(&Zjk_k, n); mnew(&Ck, p, p); vnew(&D_i, p); vnew(&MUK_k, p); vnew(&cen_D_i, p); mnew(&mtmp, p, p); vnew(&vtmp, n); //Initializing the parameters of mixture of Gaussians //Initinalize the covariance matrix //Use EM algorithm to perform the local training. //Test intialization of covarinace matrix //printf("Testing covariance matrix initialization... \n"); while (zeroFx_num != 0) { for(i=0; i<k0; i++) { meye(Var0+i); for (j=0; j<p; j++) { *((Var0+i)->pr+j*p+j) = vv; } } veModel(D, mean0_x, Var0, w0, &Gxn, &Fx); //printf("\n Gxn = :\n"); //mprint(&Gxn); //printf("\n Fx = :\n"); //vprint(&Fx); zeroFx_num = 0; for (i=0; i<n; i++) { if (*(Fx.pr+i) == 0) { zeroFx_num++; } } vv *= 2; } vones(&Zjk_temp); //printf("\n EM in t-space starts ... \n"); //printf("\n Data = \n"); //mprint(D); int l = 0; while (err > error) { #ifdef _DEBUG printf(" \n...... in EM loop %d ......\n", ++l); printf("\n L%d: w0 = \n", l); vprint(w0); printf("\n L%d: MUK = \n", l); mprint(&MUK); printf("\n L%d: Var0 = \n", l); for(i=0; i<k0; i++) { mprint(Var0+i); printf("\n"); } printf("\n L%d: Zjk = \n", l); mprint(Zjk); #endif veModel(D, &MUK, Var0, w0, &Gxn, &Fx); #ifdef _DEBUG printf("\n L%d: Gxn = \n", l); mprint(&Gxn); printf("\n L%d: Fx = \n", l); vprint(&Fx); #endif for (k=0; k<k0; k++) { u = k*p; double zz = 0; double zz_up = 0; for (i=0; i<n; i++) { *(Zjk->pr+i*k0+k) = (*(w0->pr+k)) * Zjk_up->pr[i] * (*(Gxn.pr+i*k0+k)) / (*(Fx.pr+i)); zz += *(Zjk->pr+i*k0+k); zz_up += Zjk_up->pr[i]; } *(w0->pr+k) = zz/zz_up; for (j=0; j<p; j++) { getcolvec(D, j, &D_j); getcolvec(Zjk, k, &Zjk_k); sum_tmp = 0; for (i=0; i<n; i++) { sum_tmp += (*(Zjk_k.pr+i)) * (*(D_j.pr+i)); } *(MU1.pr+u+j) = sum_tmp / zz; } mzero(&Ck); for (i=0; i<n; i++) { getrowvec(D, i, &D_i); getrowvec(&MUK, k, &MUK_k); for (j=0; j<p; j++) { *(cen_D_i.pr+j) = *(D_i.pr+j) - *(MUK_k.pr+j); } vvMul(&cen_D_i, &cen_D_i, &mtmp); for (j=0; j<p; j++) { for (s=0; s<p; s++) { *(Ck.pr+j*p+s) += (*(Zjk->pr+i*k0+k)) * (*(mtmp.pr+j*p+s)); } } } for (j=0; j<p; j++) { for (s=0; s<p; s++) { *(Var0[k].pr+j*p+s) = (*(Ck.pr+j*p+s)) / zz; } } } // for (k... mcopy(&MU1, &MUK); for (i=0; i<n; i++) { *(vtmp.pr+i) = fabs(*(Zjk_k.pr+i) - *(Zjk_temp.pr+i)); } err = vmean(&vtmp); vcopy(&Zjk_k, &Zjk_temp); } // while vcopy(w0, w0_t); mcopy(&MUK, mean1_x); for(i=0; i<k0; i++) { mcopy(Var0+i, cov_mat+i); } for(i=0; i<k0; i++) { mdelete(Var0+i); } mdelete(&Gxn); vdelete(&Fx); vdelete(&Zjk_temp); mdelete(&MUK); mdelete(&MU1); vdelete(&D_j); vdelete(&Zjk_k); mdelete(&Ck); vdelete(&D_i); vdelete(&MUK_k); vdelete(&cen_D_i); mdelete(&mtmp); vdelete(&vtmp); return 1; }
msym_error_t partitionEquivalenceSets(int length, msym_element_t *elements[length], msym_element_t *pelements[length], msym_geometry_t g, int *esl, msym_equivalence_set_t **es, msym_thresholds_t *thresholds) { int ns = 0, gd = geometryDegenerate(g); double *e = calloc(length,sizeof(double)); double *s = calloc(length,sizeof(double)); int *sp = calloc(length,sizeof(int)); //set partition int *ss = calloc(length,sizeof(int)); //set size double (*ev)[3] = calloc(length,sizeof(double[3])); double (*ep)[3] = calloc(length,sizeof(double[3])); double (*vec)[3] = calloc(length, sizeof(double[3])); double *m = calloc(length, sizeof(double)); for(int i = 0;i < length;i++){ vcopy(elements[i]->v, vec[i]); m[i] = elements[i]->m; } for(int i=0; i < length; i++){ for(int j = i+1; j < length;j++){ double w = m[i]*m[j]/(m[i]+m[j]); double dist; double v[3]; double proji[3], projj[3]; vnorm2(vec[i],v); vproj_plane(vec[j], v, proji); vscale(w, proji, proji); vadd(proji,ep[i],ep[i]); vnorm2(vec[j],v); vproj_plane(vec[i], v, projj); vscale(w, projj, projj); vadd(projj,ep[j],ep[j]); vsub(vec[j],vec[i],v); dist = vabs(v); vscale(w/dist,v,v); vadd(v,ev[i],ev[i]); vsub(ev[j],v,ev[j]); double dij = w*dist; //This is sqrt(I) for a diatomic molecule along an axis perpendicular to the bond with O at center of mass. e[i] += dij; e[j] += dij; s[i] += SQR(dij); s[j] += SQR(dij); } vsub(vec[i],ev[i],ev[i]); } for(int i = 0; i < length; i++){ double v[3]; double w = m[i]/2.0; double dist = vabs(elements[i]->v); double dii = w*dist; vscale(w,elements[i]->v,v); vsub(ev[i],v,ev[i]); // Plane projection can't really differentiate certain types of structures when we add the initial vector, // but not doing so will result in huge cancellation errors on degenerate point groups, // also large masses will mess up the eq check when this is 0. if(gd) vadd(ep[i],v,ep[i]); e[i] += dii; s[i] += SQR(dii); } for(int i = 0; i < length; i++){ if(e[i] >= 0.0){ sp[i] = i; for(int j = i+1; j < length;j++){ if(e[j] >= 0.0){ double vabsevi = vabs(ev[i]), vabsevj = vabs(ev[j]), vabsepi = vabs(ep[i]), vabsepj = vabs(ep[j]); double eep = 0.0, eev = fabs((vabsevi)-(vabsevj))/((vabsevi)+(vabsevj)), ee = fabs((e[i])-(e[j]))/((e[i])+(e[j])), es = fabs((s[i])-(s[j]))/((s[i])+(s[j])); if(!(vabsepi < thresholds->zero && vabsepj < thresholds->zero)){ eep = fabs((vabsepi)-(vabsepj))/((vabsepi)+(vabsepj)); } double max = fmax(eev,fmax(eep,fmax(ee, es))); if(max < thresholds->equivalence && elements[i]->n == elements[j]->n){ e[j] = max > 0.0 ? -max : -1.0; sp[j] = i; } } } e[i] = -1.0; } } for(int i = 0; i < length;i++){ int j = sp[i]; ns += (ss[j] == 0); ss[j]++; } msym_equivalence_set_t *eqs = calloc(ns,sizeof(msym_equivalence_set_t)); msym_element_t **lelements = elements; msym_element_t **pe = pelements; if(elements == pelements){ lelements = malloc(sizeof(msym_element_t *[length])); memcpy(lelements, elements, sizeof(msym_element_t *[length])); } for(int i = 0, ni = 0; i < length;i++){ if(ss[i] > 0){ int ei = 0; eqs[ni].elements = pe; eqs[ni].length = ss[i]; for(int j = 0; j < length;j++){ if(sp[j] == i){ double err = (e[j] > -1.0) ? fabs(e[j]) : 0.0; eqs[ni].err = fmax(eqs[ni].err,err); eqs[ni].elements[ei++] = lelements[j]; } } pe += ss[i]; ni++; } } if(elements == pelements){ free(lelements); } free(m); free(vec); free(s); free(e); free(sp); free(ss); free(ev); free(ep); *es = eqs; *esl = ns; return MSYM_SUCCESS; }
//TODO: Use a preallocated pointer array instead of multiple mallocs msym_error_t generateEquivalenceSet(msym_point_group_t *pg, int length, msym_element_t elements[length], int *glength, msym_element_t **gelements, int *esl, msym_equivalence_set_t **es,msym_thresholds_t *thresholds){ msym_error_t ret = MSYM_SUCCESS; msym_element_t *ge = calloc(length,sizeof(msym_element_t[pg->order])); msym_equivalence_set_t *ges = calloc(length,sizeof(msym_equivalence_set_t)); int gel = 0; int gesl = 0; for(int i = 0;i < length;i++){ msym_equivalence_set_t *aes = NULL; int f; for(f = 0;f < gel;f++){ if(ge[f].n == elements[i].n && ge[f].m == elements[i].m && 0 == strncmp(ge[f].name, elements[i].name, sizeof(ge[f].name)) && vequal(ge[f].v, elements[i].v, thresholds->permutation)){ break; } } if(f == gel){ aes = &ges[gesl++]; aes->elements = calloc(pg->order,sizeof(msym_element_t*)); aes->length = 0; } else { continue; } if(elements[i].aol > 0 || elements[i].ao != NULL){ msymSetErrorDetails("Cannot (currently) generate equivalence sets from elements with orbitals"); ret = MSYM_INVALID_ELEMENTS; goto err; } for(msym_symmetry_operation_t *s = pg->sops;s < (pg->sops + pg->sopsl);s++){ double v[3]; applySymmetryOperation(s, elements[i].v, v); for(f = 0;f < gel;f++){ if(ge[f].n == elements[i].n && ge[f].m == elements[i].m && 0 == strncmp(ge[f].name, elements[i].name, sizeof(ge[f].name)) && vequal(ge[f].v, v, thresholds->permutation)){ break; } } if(f == gel){ memcpy(&ge[gel],&elements[i],sizeof(msym_element_t)); vcopy(v, ge[gel].v); aes->elements[aes->length++] = &ge[gel++]; } } if(pg->order % aes->length != 0){ msymSetErrorDetails("Equivalence set length (%d) not a divisor of point group order (%d)",pg->order); ret = MSYM_INVALID_EQUIVALENCE_SET; goto err; } aes->elements = realloc(aes->elements,sizeof(msym_element_t*[aes->length])); } msym_element_t *geo = ge; ge = realloc(ge,sizeof(msym_element_t[gel])); ges = realloc(ges,sizeof(msym_equivalence_set_t[gesl]) + sizeof(msym_element_t *[gel])); msym_element_t **ep = (msym_element_t **) &ges[gesl]; for(int i = 0;i < gesl;i++){ msym_element_t **tep = ep; for(int j = 0;j < ges[i].length;j++){ *ep = ges[i].elements[j] - geo + ge; ep++; } free(ges[i].elements); ges[i].elements = tep; } *glength = gel; *gelements = ge; *es = ges; *esl = gesl; return ret; err: free(ge); for(int i = 0; i < gesl;i++) free(ges[i].elements); free(ges); return ret; }
void LpProjector::proj_lpball_newton_normalized(const double *y, double *xout, double p,int &numiter){ double normF,normz0,mu,chi; double tol=1e-15; numiter=0; // special case p=2, p=Inf if(p==2.0){ radial_lp_project(y,xout,N,p); return; } if (p==Inf){ l_infinity_project(y,xout,N); return; } ////////////////////////////////////////////////// // Initialization // xn1 : radial projection onto Lp ball // xn2 : L\infty projection followed by radial projection // pick the one closest to y ////////////////////////////////////////////////// radial_lp_project(y,xn1,N,p); l_infinity_project(y,xn2,N); radial_lp_project(xn2,xn2,N,p); if (dpnorm(xn1,y,N,2.0) < dpnorm(xn2,y,N,2.0)) { vcopy(xn1,z,N); // initialize with xn1 } else { vcopy(xn2,z,N); // initialize with xn2 } // initialize lagrange multiplier coordinate with least squares fit z[N]=lsq_lambda_init(z,y,p); // we are initialized! normz0=pnorm(z,N+1,2.0); normF=tol*normz0+1; while ( (normF/normz0) > tol ) { // build residual F for (int k=0;k<N;k++){ zpm1[k]=pow(z[k],p-1); F[k]=z[k]+z[N]*zpm1[k]-y[k]; } double szp=0; for (int k=0;k<N;k++) szp+=pow(z[k],p); F[N]=(szp-1)/p; normF=pnorm(F,N+1,2.0); // build Jacobian matrix J for (int k=0;k<N;k++) d[k]=1+z[N]*(p-1)*pow(z[k],p-2); vdiv(zpm1,d,btwid,N); mu=dotp(zpm1,btwid,N); chi=-dotp(btwid,F,N); // uses only first N entries of F for (int k=0;k<N;k++) dz[k]=-F[k]/d[k] + btwid[k]*(-F[N]-chi)/mu; dz[N]=(chi+F[N])/mu; for (int k=0;k<N+1;k++) z[k]=z[k]+dz[k]; if (verbose){ vprint(F,N+1); mexPrintf("nF %e\n",normF); } numiter++; if (numiter>max_numiter) mexErrMsgTxt("maximum # of iterations exceeded in proj_lpball_newton\n"); } // answer is first N coordinates of z. vcopy(z,xout,N); }
static bool buildPolyDetail(const float* in, const int nin, unsigned short reg, const float sampleDist, const float sampleMaxError, const rcCompactHeightfield& chf, const rcHeightPatch& hp, float* verts, int& nverts, rcIntArray& tris, rcIntArray& edges, rcIntArray& idx, rcIntArray& samples) { static const int MAX_VERTS = 256; static const int MAX_EDGE = 64; float edge[(MAX_EDGE+1)*3]; nverts = 0; for (int i = 0; i < nin; ++i) vcopy(&verts[i*3], &in[i*3]); nverts = nin; const float ics = 1.0f/chf.cs; // Tesselate outlines. // This is done in separate pass in order to ensure // seamless height values across the ply boundaries. if (sampleDist > 0) { for (int i = 0, j = nin-1; i < nin; j=i++) { const float* vj = &in[j*3]; const float* vi = &in[i*3]; // Make sure the segments are always handled in same order // using lexological sort or else there will be seams. if (fabsf(vj[0]-vi[0]) < 1e-6f) { if (vj[2] > vi[2]) rcSwap(vj,vi); } else { if (vj[0] > vi[0]) rcSwap(vj,vi); } // Create samples along the edge. float dx = vi[0] - vj[0]; float dy = vi[1] - vj[1]; float dz = vi[2] - vj[2]; float d = sqrtf(dx*dx + dz*dz); int nn = 1 + (int)floorf(d/sampleDist); if (nn > MAX_EDGE) nn = MAX_EDGE; if (nverts+nn >= MAX_VERTS) nn = MAX_VERTS-1-nverts; for (int k = 0; k <= nn; ++k) { float u = (float)k/(float)nn; float* pos = &edge[k*3]; pos[0] = vj[0] + dx*u; pos[1] = vj[1] + dy*u; pos[2] = vj[2] + dz*u; pos[1] = chf.bmin[1] + getHeight(pos, chf.bmin, ics, hp)*chf.ch; } // Simplify samples. int idx[MAX_EDGE] = {0,nn}; int nidx = 2; for (int k = 0; k < nidx-1; ) { const int a = idx[k]; const int b = idx[k+1]; const float* va = &edge[a*3]; const float* vb = &edge[b*3]; // Find maximum deviation along the segment. float maxd = 0; int maxi = -1; for (int m = a+1; m < b; ++m) { float d = distancePtSeg(&edge[m*3],va,vb); if (d > maxd) { maxd = d; maxi = m; } } // If the max deviation is larger than accepted error, // add new point, else continue to next segment. if (maxi != -1 && maxd > rcSqr(sampleMaxError)) { for (int m = nidx; m > k; --m) idx[m] = idx[m-1]; idx[k+1] = maxi; nidx++; } else { ++k; } } // Add new vertices. for (int k = 1; k < nidx-1; ++k) { vcopy(&verts[nverts*3], &edge[idx[k]*3]); nverts++; } } } // Tesselate the base mesh. edges.resize(0); tris.resize(0); idx.resize(0); delaunay(nverts, verts, idx, tris, edges); if (sampleDist > 0) { // Create sample locations in a grid. float bmin[3], bmax[3]; vcopy(bmin, in); vcopy(bmax, in); for (int i = 1; i < nin; ++i) { vmin(bmin, &in[i*3]); vmax(bmax, &in[i*3]); } int x0 = (int)floorf(bmin[0]/sampleDist); int x1 = (int)ceilf(bmax[0]/sampleDist); int z0 = (int)floorf(bmin[2]/sampleDist); int z1 = (int)ceilf(bmax[2]/sampleDist); samples.resize(0); for (int z = z0; z < z1; ++z) { for (int x = x0; x < x1; ++x) { float pt[3]; pt[0] = x*sampleDist; pt[2] = z*sampleDist; // Make sure the samples are not too close to the edges. if (distToPoly(nin,in,pt) > -sampleDist/2) continue; samples.push(x); samples.push(getHeight(pt, chf.bmin, ics, hp)); samples.push(z); } } // Add the samples starting from the one that has the most // error. The procedure stops when all samples are added // or when the max error is within treshold. const int nsamples = samples.size()/3; for (int iter = 0; iter < nsamples; ++iter) { // Find sample with most error. float bestpt[3]; float bestd = 0; for (int i = 0; i < nsamples; ++i) { float pt[3]; pt[0] = samples[i*3+0]*sampleDist; pt[1] = chf.bmin[1] + samples[i*3+1]*chf.ch; pt[2] = samples[i*3+2]*sampleDist; float d = distToTriMesh(pt, verts, nverts, &tris[0], tris.size()/4); if (d < 0) continue; // did not hit the mesh. if (d > bestd) { bestd = d; vcopy(bestpt,pt); } } // If the max error is within accepted threshold, stop tesselating. if (bestd <= sampleMaxError) break; // Add the new sample point. vcopy(&verts[nverts*3],bestpt); nverts++; // Create new triangulation. // TODO: Incremental add instead of full rebuild. edges.resize(0); tris.resize(0); idx.resize(0); delaunay(nverts, verts, idx, tris, edges); if (nverts >= MAX_VERTS) break; } } return true; }
/******************************************************************** This is real GEMM kernel ********************************************************************/ bool ialglib::_i_rmatrixgemmf(int m, int n, int k, double alpha, const ap::real_2d_array& _a, int ia, int ja, int optypea, const ap::real_2d_array& _b, int ib, int jb, int optypeb, double beta, ap::real_2d_array& _c, int ic, int jc) { if( m>alglib_r_block || n>alglib_r_block || k>alglib_r_block ) return false; int i, stride, cstride; double *crow; double __abuf[alglib_r_block+alglib_simd_alignment]; double __b[alglib_r_block*alglib_r_block+alglib_simd_alignment]; double * const abuf = (double * const) alglib_align(__abuf,alglib_simd_alignment); double * const b = (double * const) alglib_align(__b, alglib_simd_alignment); // // copy b // if( optypeb==0 ) mcopyblock(k, n, &_b(ib,jb), 1, _b.getstride(), b); else mcopyblock(n, k, &_b(ib,jb), 0, _b.getstride(), b); // // multiply B by A (from the right, by rows) // and store result in C // crow = &_c(ic,jc); stride = _a.getstride(); cstride = _c.getstride(); if( optypea==0 ) { const double *arow = &_a(ia,ja); for(i=0; i<m; i++) { vcopy(k, arow, 1, abuf, 1); if( beta==0 ) vzero(n, crow, 1); mv(n, k, b, abuf, crow, 1, alpha, beta); crow += cstride; arow += stride; } } else { const double *acol = &_a(ia,ja); for(i=0; i<m; i++) { vcopy(k, acol, stride, abuf, 1); if( beta==0 ) vzero(n, crow, 1); mv(n, k, b, abuf, crow, 1, alpha, beta); crow += cstride; acol++; } } return true; }
static void translate (State *s, float *vdst, float *ndst) { int i, j; struct abone *b; float *vsrc = s->ptrs[0]; float *nsrc = (float *) ((char *) vsrc + AL32 (s->num_vertices * 3 * sizeof (GLfloat))); struct skin *skin = s->skin; #ifdef TIMING double S = now (), E; #endif #ifdef USE_ALTIVEC vector unsigned char p0 = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19 }; vector unsigned char p1 = { 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19, 20, 21, 22, 23 }; vector unsigned char p2 = { 8, 9, 10, 11, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27 }; for (i = 0, j = 0; i < s->num_vertices >> 2; ++i, j += 48) { vector float v0, v1, v2, n0, n1, n2; vector float vx, vy, vz, nx, ny, nz; vector float vr0, vr1, vr2, vr3; vector float nr0, nr1, nr2, nr3; #ifdef G4 if (!(i & 3)) { DCB (dcbz, vdst, j); DCB (dcbz, ndst, j); } DCB (dcbz, vdst, j + 32); DCB (dcbz, ndst, j + 32); #endif DCB (dcbt, skin, 0); DCB (dcbt, skin + 1, 0); DCB (dcbt, skin + 2, 0); DCB (dcbt, skin + 3, 0); DCB (dcbt, vsrc, j + 64); DCB (dcbt, nsrc, j + 64); DCB (dcbt, vsrc, j + 96); DCB (dcbt, nsrc, j + 96); /* Load */ v0 = vec_ld (j, vsrc); v1 = vec_ld (j + 16, vsrc); v2 = vec_ld (j + 32, vsrc); n0 = vec_ld (j, nsrc); n1 = vec_ld (j + 16, nsrc); n2 = vec_ld (j + 32, nsrc); /* First vertex/normal */ vx = vec_splat (v0, 0); vy = vec_splat (v0, 1); vz = vec_splat (v0, 2); nx = vec_splat (n0, 0); ny = vec_splat (n0, 1); nz = vec_splat (n0, 2); vr0 = appbones (s, skin, vx, vy, vz, nx, ny, nz, &nr0); skin++; /* Second vertex/normal */ vx = vec_splat (v0, 3); vy = vec_splat (v1, 0); vz = vec_splat (v1, 1); nx = vec_splat (n0, 3); ny = vec_splat (n1, 0); nz = vec_splat (n1, 1); vr1 = appbones (s, skin, vx, vy, vz, nx, ny, nz, &nr1); skin++; /* Third vertex/normal */ vx = vec_splat (v1, 2); vy = vec_splat (v1, 3); vz = vec_splat (v2, 0); nx = vec_splat (n1, 2); ny = vec_splat (n1, 3); nz = vec_splat (n2, 0); vr2 = appbones (s, skin, vx, vy, vz, nx, ny, nz, &nr2); skin++; /* Fourth vertex/normal */ vx = vec_splat (v2, 1); vy = vec_splat (v2, 2); vz = vec_splat (v2, 3); nx = vec_splat (n2, 1); ny = vec_splat (n2, 2); nz = vec_splat (n2, 3); vr3 = appbones (s, skin, vx, vy, vz, nx, ny, nz, &nr3); skin++; /* Assemble */ v0 = vec_perm (vr0, vr1, p0); v1 = vec_perm (vr1, vr2, p1); v2 = vec_perm (vr2, vr3, p2); n0 = vec_perm (nr0, nr1, p0); n1 = vec_perm (nr1, nr2, p1); n2 = vec_perm (nr2, nr3, p2); /* Store */ vec_st (v0, j, vdst); vec_st (v1, j + 16, vdst); vec_st (v2, j + 32, vdst); vec_st (n0, j, ndst); vec_st (n1, j + 16, ndst); vec_st (n2, j + 32, ndst); } i <<= 2; vsrc += i*3; nsrc += i*3; vdst += i*3; ndst += i*3; #else i = 0; #endif for (; i < s->num_vertices; ++i, vsrc += 3, nsrc += 3, vdst += 3, ndst += 3, ++skin) { int num_bones, bone_index; float v[3] = {0,0,0}, n[3] = {0,0,0}, v0[4], v1[4], w; num_bones = skin->boneinfo & 3; bone_index = skin->boneinfo >> 2; for (j = 0; j < num_bones; ++j) { w = skin->weights[j]; b = &s->abones[bone_index & 0x3ff]; bone_index >>= 10; mapply_to_point (v1, b->cm, vsrc); v1[0] *= w; v1[1] *= w; v1[2] *= w; mapply_to_vector (v0, b->cm, nsrc); v0[0] *= w; v0[1] *= w; v0[2] *= w; vaddto (v, v1); vaddto (n, v0); } vcopy (vdst, v); vcopy (ndst, n); } #ifdef TIMING E = now (); printf ("took %f sec\n", E - S); #endif }
void hunt_problem(Comm_Ex *cx, /* array of communications structures */ Exo_DB *exo, /* ptr to the finite element mesh database */ Dpi *dpi) /* distributed processing information */ { int *ija=NULL; /* column pointer array */ double *a=NULL; /* nonzero array */ double *a_old=NULL; /* nonzero array */ double *x=NULL; /* solution vector */ int iAC; /* COUNTER */ double *x_AC = NULL; /* SOLUTION VECTOR OF EXTRA UNKNOWNS */ double *x_AC_old=NULL; /* old SOLUTION VECTOR OF EXTRA UNKNOWNS */ double *x_AC_dot = NULL; int iHC; /* COUNTER */ int *ija_attic=NULL; /* storage for external dofs */ int eb_indx, ev_indx; /* * variables for path traversal */ double *x_old=NULL; /* old solution vector */ double *x_older=NULL; /* older solution vector */ double *x_oldest=NULL; /* oldest solution vector saved */ double *xdot=NULL; /* current path derivative of soln */ double *xdot_old=NULL; double *x_update=NULL; double *x_sens=NULL; /* solution sensitivity */ double **x_sens_p=NULL; /* solution sensitivity for parameters */ int num_pvector=0; /* number of solution sensitivity vectors */ #ifdef COUPLED_FILL struct Aztec_Linear_Solver_System *ams[NUM_ALSS]={NULL}; #else /* COUPLED_FILL */ struct Aztec_Linear_Solver_System *ams[NUM_ALSS]={NULL, NULL}; #endif /* COUPLED_FILL */ /* sl_util_structs.h */ double *resid_vector=NULL; /* residual */ double *resid_vector_sens=NULL; /* residual sensitivity */ double *scale=NULL; /* scale vector for modified newton */ int *node_to_fill = NULL; int n; /* total number of path steps attempted */ int ni; /* total number of nonlinear solves */ int nt; /* total number of successful path steps */ int path_step_reform; /* counter for jacobian reformation stride */ int converged; /* success or failure of Newton iteration */ int success_ds; /* success or failure of path step */ int i; int nprint=0, num_total_nodes; int numProcUnknowns; int *const_delta_s=NULL; int step_print; double i_print; int good_mesh = TRUE; double *path=NULL, *path1=NULL; double *delta_s=NULL, *delta_s_new=NULL, *delta_s_old=NULL; double *delta_s_older=NULL, *delta_s_oldest=NULL; double *hDelta_s0=NULL, *hDelta_s_min=NULL, *hDelta_s_max=NULL; double delta_t; double theta=0.0; double damp; double eps; double *lambda=NULL, *lambdaEnd=NULL; double hunt_par, dhunt_par, hunt_par_old; /* hunting continuation parameter */ double timeValueRead = 0.0; /* * ALC management variables */ int alqALC; int *aldALC=NULL; /* * Other local variables */ int error, err, is_steady_state, inewton; int *gindex = NULL, gsize; int *p_gsize=NULL; double *gvec=NULL; double ***gvec_elem; double err_dbl; FILE *file=NULL; double toler_org[3],damp_org; struct Results_Description *rd=NULL; int tnv; /* total number of nodal variables and kinds */ int tev; /* total number of elem variables and kinds */ int tnv_post; /* total number of nodal variables and kinds for post processing */ int tev_post; /* total number of elem variables and kinds for post processing */ int max_unk_elem, one, three; /* variables used as mf_setup arguments*/ unsigned int matrix_systems_mask; double evol_local=0.0; #ifdef PARALLEL double evol_global=0.0; #endif static char yo[]="hunt_problem"; /* * BEGIN EXECUTION */ #ifdef DEBUG fprintf(stderr, "hunt_problem() begins...\n"); #endif toler_org[0] = custom_tol1; toler_org[1] = custom_tol2; toler_org[2] = custom_tol3; damp_org = damp_factor1; is_steady_state = TRUE; p_gsize = &gsize; /* * set aside space for gather global vectors to print to exoII file * note: this is temporary * * For 2D prototype problem: allocate space for T, dx, dy arrays */ if( strlen( Soln_OutFile) ) { #ifdef DEBUG printf("Trying to open \"%s\" for writing.\n", Soln_OutFile); #endif file = fopen(Soln_OutFile, "w"); if (file == NULL) { DPRINTF(stderr, "%s: opening soln file for writing\n", yo); EH(-1, "\t"); } } #ifdef PARALLEL check_parallel_error("Soln output file error"); #endif /* * Some preliminaries to help setup EXODUS II database output. */ #ifdef DEBUG fprintf(stderr, "cnt_nodal_vars() begins...\n"); #endif tnv = cnt_nodal_vars(); /* tnv_post is calculated in load_nodal_tkn*/ tev = cnt_elem_vars(); /* tev_post is calculated in load_elem_tkn*/ #ifdef DEBUG fprintf(stderr, "Found %d total primitive nodal variables to output.\n", tnv); fprintf(stderr, "Found %d total primitive elem variables to output.\n", tev); #endif if ( tnv < 0 ) { DPRINTF(stderr, "%s:\tbad tnv.\n", yo); EH(-1, "\t"); } if ( tev < 0 ) { DPRINTF(stderr, "%s:\tMaybe bad tev? See goma design committee ;) \n", yo); /* exit(-1); */ } rd = (struct Results_Description *) smalloc(sizeof(struct Results_Description)); if (rd == NULL) { EH(-1, "Could not grab Results Description."); } (void) memset((void *) rd, 0, sizeof(struct Results_Description)); rd->nev = 0; /* number element variables in results */ rd->ngv = 0; /* number global variables in results */ rd->nhv = 0; /* number history variables in results */ if ( is_steady_state == TRUE ) { rd->ngv = 5; /* number global variables in results see load_global_var_info for names*/ error = load_global_var_info(rd, 0, "CONV"); error = load_global_var_info(rd, 1, "NEWT_IT"); error = load_global_var_info(rd, 2, "MAX_IT"); error = load_global_var_info(rd, 3, "CONVRATE"); error = load_global_var_info(rd, 4, "MESH_VOLUME"); } /* load nodal types, kinds, names */ error = load_nodal_tkn( rd, &tnv, &tnv_post); /* load nodal types, kinds, names */ if (error !=0) { DPRINTF(stderr, "%s: problem with load_nodal_tkn()\n", yo); EH(-1,"\t"); } /* load elem types, names */ error = load_elem_tkn( rd, exo, tev, &tev_post); /* load elem types, names */ if ( error !=0 ) { DPRINTF(stderr, "%s: problem with load_elem_tkn()\n", yo); EH(-1,"\t"); } /* * Write out the names of the nodal variables that we will be sending to * the EXODUS II output file later. */ #ifdef DEBUG fprintf(stderr, "wr_result_prelim() starts...\n", tnv); #endif gvec_elem = (double ***) smalloc ( (exo->num_elem_blocks)*sizeof(double **)); for (i = 0; i < exo->num_elem_blocks; i++) { gvec_elem[i] = (double **) smalloc ( (tev + tev_post)*sizeof(double *)); } wr_result_prelim_exo( rd, exo, ExoFileOut, gvec_elem ); #ifdef DEBUG fprintf(stderr, "P_%d: wr_result_prelim_exo() ends...\n", ProcID, tnv); #endif /* * This gvec workhorse transports output variables as nodal based vectors * that are gather from the solution vector. Note: it is NOT a global * vector at all and only carries this processor's nodal variables to * the exodus database. */ asdv(&gvec, Num_Node); /* * Allocate space and manipulate for all the nodes that this processor * is aware of... */ num_total_nodes = dpi->num_universe_nodes; numProcUnknowns = NumUnknowns + NumExtUnknowns; /* allocate memory for Volume Constraint Jacobian. ACS 2/99 */ if ( nAC > 0) { for(iAC=0;iAC<nAC;iAC++) { augc[iAC].d_evol_dx = (double*) malloc(numProcUnknowns*sizeof(double)); } } asdv(&resid_vector, numProcUnknowns); asdv(&resid_vector_sens, numProcUnknowns); asdv(&scale, numProcUnknowns); for (i=0;i<NUM_ALSS;i++) { ams[i] = (struct Aztec_Linear_Solver_System *) array_alloc(1, 1, sizeof(struct Aztec_Linear_Solver_System )); } #ifdef MPI AZ_set_proc_config( ams[0]->proc_config, MPI_COMM_WORLD ); #ifndef COUPLED_FILL if( Explicit_Fill ) AZ_set_proc_config( ams[1]->proc_config, MPI_COMM_WORLD ); #endif /* not COUPLED_FILL */ #else /* MPI */ AZ_set_proc_config( ams[0]->proc_config, 0 ); #ifndef COUPLED_FILL if( Explicit_Fill ) AZ_set_proc_config( ams[1]->proc_config, 0 ); #endif /* not COUPLED_FILL */ #endif /* MPI */ /* * allocate space for and initialize solution arrays */ asdv(&x, numProcUnknowns); asdv(&x_old, numProcUnknowns); asdv(&x_older, numProcUnknowns); asdv(&x_oldest, numProcUnknowns); asdv(&xdot, numProcUnknowns); asdv(&xdot_old, numProcUnknowns); asdv(&x_update, numProcUnknowns); asdv(&x_sens, numProcUnknowns); /* * Initialize solid inertia flag */ set_solid_inertia(); /* * ALLOCATE ALL THOSE WORK VECTORS FOR HUNTING */ asdv(&lambda, nHC); asdv(&lambdaEnd, nHC); asdv(&path, nHC); asdv(&path1, nHC); asdv(&hDelta_s0, nHC); asdv(&hDelta_s_min, nHC); asdv(&hDelta_s_max, nHC); asdv(&delta_s, nHC); asdv(&delta_s_new, nHC); asdv(&delta_s_old, nHC); asdv(&delta_s_older, nHC); asdv(&delta_s_oldest, nHC); aldALC = Ivector_birth(nHC); const_delta_s = Ivector_birth(nHC); /* HUNTING BY ZERO AND FIRST ORDER CONTINUATION */ alqALC = 1; damp = 1.0; delta_t = 0.0; tran->delta_t = 0.0; /*for Newmark-Beta terms in Lagrangian Solid*/ nprint = 0; MaxPathSteps = cont->MaxPathSteps; eps = cont->eps; for (iHC=0;iHC<nHC;iHC++) { const_delta_s[iHC] = 0; lambda[iHC] = hunt[iHC].BegParameterValue; lambdaEnd[iHC] = hunt[iHC].EndParameterValue; if ((lambdaEnd[iHC]-lambda[iHC]) > 0.0) { aldALC[iHC] = +1; } else { aldALC[iHC] = -1; } if (hunt[iHC].ramp == 1) { hunt[iHC].Delta_s0 = fabs(lambdaEnd[iHC]-lambda[iHC])/((double)(MaxPathSteps-1)); const_delta_s[iHC] = 1; } hDelta_s0[iHC] = hunt[iHC].Delta_s0; hDelta_s_min[iHC] = hunt[iHC].Delta_s_min; hDelta_s_max[iHC] = hunt[iHC].Delta_s_max; path[iHC] = path1[iHC] = lambda[iHC]; if (Debug_Flag && ProcID == 0) { fprintf(stderr,"MaxPathSteps: %d \tlambdaEnd: %f\n", MaxPathSteps, lambdaEnd[iHC]); fprintf(stderr,"continuation in progress\n"); } if (hDelta_s0[iHC] > hDelta_s_max[iHC]) { hDelta_s0[iHC] = hDelta_s_max[iHC]; } delta_s[iHC] = delta_s_old[iHC] = delta_s_older[iHC] = hDelta_s0[iHC]; /* * ADJUST NATURAL PARAMETER */ update_parameterHC(iHC, path1[iHC], x, xdot, x_AC, delta_s[iHC], cx, exo, dpi); } /* define continuation parameter */ if(hunt[0].EndParameterValue == hunt[0].BegParameterValue) { hunt_par = 1.0; } else { hunt_par = (path1[0]-hunt[0].BegParameterValue) /(hunt[0].EndParameterValue - hunt[0].BegParameterValue) ; hunt_par=fabs(hunt_par); } hunt_par_old = hunt_par; /* Call prefront (or mf_setup) if necessary */ if (Linear_Solver == FRONT) { /* Also got to define these because it wants pointers to these numbers */ max_unk_elem = (MAX_PROB_VAR + MAX_CONC)*MDE; one = 1; three = 3; /* NOTE: We need a overall flag in the vn_glob struct that tells whether FULL_DG is on anywhere in domain. This assumes only one material. See sl_front_setup for test. that test needs to be in the input parser. */ if(vn_glob[0]->dg_J_model == FULL_DG) { max_unk_elem = (MAX_PROB_VAR + MAX_CONC)*MDE + 4*vn_glob[0]->modes*4*MDE; } if (Num_Proc > 1) EH(-1, "Whoa. No front allowed with nproc>1"); #ifdef HAVE_FRONT err = mf_setup(&exo->num_elems, &NumUnknowns, &max_unk_elem, &three, &one, exo->elem_order_map, fss->el_proc_assign, fss->level, fss->nopdof, fss->ncn, fss->constraint, front_scratch_directory, &fss->ntra); EH(err,"problems in frontal setup "); #else EH(-1,"Don't have frontal solver compiled and linked in"); #endif } /* * if compute parameter sensitivities, allocate space for solution * sensitivity vectors */ for(i=0;i<nn_post_fluxes_sens;i++) { num_pvector=MAX(num_pvector,pp_fluxes_sens[i]->vector_id);} for(i=0;i<nn_post_data_sens;i++) { num_pvector=MAX(num_pvector,pp_data_sens[i]->vector_id);} if((nn_post_fluxes_sens + nn_post_data_sens) > 0) { num_pvector++; num_pvector = MAX(num_pvector,2); x_sens_p = Dmatrix_birth(num_pvector,numProcUnknowns); } else { x_sens_p = NULL; } if (nAC > 0) { asdv(&x_AC, nAC); asdv(&x_AC_old, nAC); asdv(&x_AC_dot, nAC); } /* Allocate sparse matrix */ if( strcmp( Matrix_Format, "msr" ) == 0) { log_msg("alloc_MSR_sparse_arrays..."); alloc_MSR_sparse_arrays(&ija, &a, &a_old, 0, node_to_fill, exo, dpi); /* * An attic to store external dofs column names is needed when * running in parallel. */ alloc_extern_ija_buffer(num_universe_dofs, num_internal_dofs+num_boundary_dofs, ija, &ija_attic); /* * Any necessary one time initialization of the linear * solver package (Aztec). */ ams[JAC]->bindx = ija; ams[JAC]->val = a; ams[JAC]->belfry = ija_attic; ams[JAC]->val_old = a_old; /* * These point to nowhere since we're using MSR instead of VBR * format. */ ams[JAC]->indx = NULL; ams[JAC]->bpntr = NULL; ams[JAC]->rpntr = NULL; ams[JAC]->cpntr = NULL; ams[JAC]->npn = dpi->num_internal_nodes + dpi->num_boundary_nodes; ams[JAC]->npn_plus = dpi->num_internal_nodes + dpi->num_boundary_nodes + dpi->num_external_nodes; ams[JAC]->npu = num_internal_dofs+num_boundary_dofs; ams[JAC]->npu_plus = num_universe_dofs; ams[JAC]->nnz = ija[num_internal_dofs+num_boundary_dofs] - 1; ams[JAC]->nnz_plus = ija[num_universe_dofs]; } else if( strcmp( Matrix_Format, "vbr" ) == 0) { log_msg("alloc_VBR_sparse_arrays..."); alloc_VBR_sparse_arrays ( ams[JAC], exo, dpi); ija_attic = NULL; ams[JAC]->belfry = ija_attic; a = ams[JAC]->val; if( !save_old_A ) a_old = ams[JAC]->val_old; } else if ( strcmp( Matrix_Format, "front") == 0 ) { /* Don't allocate any sparse matrix space when using front */ ams[JAC]->bindx = NULL; ams[JAC]->val = NULL; ams[JAC]->belfry = NULL; ams[JAC]->val_old = NULL; ams[JAC]->indx = NULL; ams[JAC]->bpntr = NULL; ams[JAC]->rpntr = NULL; ams[JAC]->cpntr = NULL; } else { EH(-1,"Attempted to allocate unknown sparse matrix format"); } init_vec(x, cx, exo, dpi, x_AC, nAC, &timeValueRead); /* if read ACs, update data floats */ if (nAC > 0) { if(augc[0].iread == 1) { for(iAC=0 ; iAC<nAC ; iAC++) { update_parameterAC(iAC, x, xdot, x_AC, cx, exo, dpi); } } } /* * set boundary conditions on the initial conditions */ find_and_set_Dirichlet(x, xdot, exo, dpi); exchange_dof(cx, dpi, x); dcopy1(numProcUnknowns,x,x_old); dcopy1(numProcUnknowns,x_old,x_older); dcopy1(numProcUnknowns,x_older,x_oldest); if( nAC > 0) { dcopy1(nAC,x_AC, x_AC_old);} /* * initialize the counters for when to print out data */ step_print = 1; matrix_systems_mask = 1; log_msg("sl_init()..."); sl_init(matrix_systems_mask, ams, exo, dpi, cx); #ifdef PARALLEL /* * Make sure the solver was properly initialized on all processors. */ check_parallel_error("Solver initialization problems"); #endif ams[JAC]->options[AZ_keep_info] = 1; DPRINTF(stderr, "\nINITIAL ELEMENT QUALITY CHECK---\n"); good_mesh = element_quality(exo, x, ams[0]->proc_config); /* * set the number of successful path steps to zero */ nt = 0; /* * LOOP THROUGH PARAMETER UNTIL MAX NUMBER * OF STEPS SURPASSED */ for (n=0;n<MaxPathSteps;n++) { alqALC = 1; for (iHC=0;iHC<nHC;iHC++) { switch (aldALC[iHC]) { case -1: /* REDUCING PARAMETER DIRECTION */ if (path1[iHC] <= lambdaEnd[iHC]) { alqALC = -1; path1[iHC] = lambdaEnd[iHC]; delta_s[iHC] = path[iHC]-path1[iHC]; } break; case +1: /* RISING PARAMETER DIRECTION */ if (path1[iHC] >= lambdaEnd[iHC]) { alqALC = -1; path1[iHC] = lambdaEnd[iHC]; delta_s[iHC] = path1[iHC]-path[iHC]; } break; } /* * ADJUST NATURAL PARAMETER */ update_parameterHC(iHC, path1[iHC], x, xdot, x_AC, delta_s[iHC], cx, exo, dpi); } /* end of iHC loop */ if(hunt[0].EndParameterValue == hunt[0].BegParameterValue) { hunt_par = 1.0; } else { hunt_par = (path1[0]-hunt[0].BegParameterValue) /(hunt[0].EndParameterValue - hunt[0].BegParameterValue) ; hunt_par=fabs(hunt_par); } /* * IF STEP CHANGED, REDO FIRST ORDER PREDICTION */ if(alqALC == -1) { DPRINTF(stderr,"\n\t ******** LAST PATH STEP!\n"); dcopy1(numProcUnknowns,x_old,x); dhunt_par = hunt_par-hunt_par_old; switch (Continuation) { case HUN_ZEROTH: break; case HUN_FIRST: v1add(numProcUnknowns, &x[0], dhunt_par, &x_sens[0]); break; } } /* * reset Dirichlet condition Mask, node->DBC to -1 where it * is set in order for Dirichlet conditions to be * set appropriately for each path step */ nullify_dirichlet_bcs(); find_and_set_Dirichlet (x, xdot, exo, dpi); exchange_dof(cx, dpi, x); if(ProcID ==0) { DPRINTF(stderr, "\n\t----------------------------------"); switch (Continuation) { case HUN_ZEROTH: DPRINTF(stderr, "\n\tZero Order Hunting:"); break; case HUN_FIRST: DPRINTF(stderr, "\n\tFirst Order Hunting:"); break; } DPRINTF(stderr, "\n\tStep number: %4d of %4d (max)", n+1, MaxPathSteps); DPRINTF(stderr, "\n\tAttempting solution at: theta = %g",hunt_par); for (iHC=0;iHC<nHC;iHC++) { switch (hunt[iHC].Type) { case 1: /* BC */ DPRINTF(stderr, "\n\tBCID=%3d DFID=%5d", hunt[iHC].BCID, hunt[iHC].DFID); break; case 2: /* MT */ DPRINTF(stderr, "\n\tMTID=%3d MPID=%5d", hunt[iHC].MTID, hunt[iHC].MPID); break; case 3: /* AC */ DPRINTF(stderr, "\n\tACID=%3d DFID=%5d", hunt[iHC].BCID, hunt[iHC].DFID); break; } DPRINTF(stderr, " Parameter= % 10.6e delta_s= %10.6e", path1[iHC], delta_s[iHC]); } } ni = 0; do { #ifdef DEBUG fprintf(stderr, "%s: starting solve_nonlinear_problem\n", yo); #endif err = solve_nonlinear_problem(ams[JAC], x, delta_t, theta, x_old, x_older, xdot, xdot_old, resid_vector, x_update, scale, &converged, &nprint, tev, tev_post, NULL, rd, gindex, p_gsize, gvec, gvec_elem, path1[0], exo, dpi, cx, 0, &path_step_reform, is_steady_state, x_AC, x_AC_dot, hunt_par, resid_vector_sens, x_sens, x_sens_p, NULL); #ifdef DEBUG fprintf(stderr, "%s: returned from solve_nonlinear_problem\n", yo); #endif if (err == -1) converged = 0; inewton = err; if (converged) { EH(error, "error writing ASCII soln file."); /* srs need to check */ if (Write_Intermediate_Solutions == 0) { #ifdef DEBUG fprintf(stderr, "%s: write_solution call WIS\n", yo); #endif write_solution(ExoFileOut, resid_vector, x, x_sens_p, x_old, xdot, xdot_old, tev, tev_post,NULL, rd, gindex, p_gsize, gvec, gvec_elem, &nprint, delta_s[0], theta, path1[0], NULL, exo, dpi); #ifdef DEBUG fprintf(stderr, "%s: write_solution end call WIS\n", yo); #endif } /* * PRINT OUT VALUES OF EXTRA UNKNOWNS * FROM AUGMENTING CONDITIONS */ if (nAC > 0) { DPRINTF(stderr, "\n------------------------------\n"); DPRINTF(stderr, "Augmenting Conditions: %4d\n", nAC); DPRINTF(stderr, "Number of extra unknowns: %4d\n\n", nAC); for (iAC = 0; iAC < nAC; iAC++) { if (augc[iAC].Type == AC_USERBC) { DPRINTF(stderr, "\tAC[%4d] DF[%4d] = %10.6e\n", augc[iAC].BCID, augc[iAC].DFID, x_AC[iAC]); } else if (augc[iAC].Type == AC_USERMAT || augc[iAC].Type == AC_FLUX_MAT ) { DPRINTF(stderr, "\n MT[%4d] MP[%4d] = %10.6e\n", augc[iAC].MTID, augc[iAC].MPID, x_AC[iAC]); } else if(augc[iAC].Type == AC_VOLUME) { evol_local = augc[iAC].evol; #ifdef PARALLEL if( Num_Proc > 1 ) { MPI_Allreduce( &evol_local, &evol_global, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); } evol_local = evol_global; #endif DPRINTF(stderr, "\tMT[%4d] VC[%4d]=%10.6e Param=%10.6e\n", augc[iAC].MTID, augc[iAC].VOLID, evol_local, x_AC[iAC]); } else if(augc[iAC].Type == AC_POSITION) { evol_local = augc[iAC].evol; #ifdef PARALLEL if( Num_Proc > 1 ) { MPI_Allreduce( &evol_local, &evol_global, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); } evol_local = evol_global; #endif DPRINTF(stderr, "\tMT[%4d] XY[%4d]=%10.6e Param=%10.6e\n", augc[iAC].MTID, augc[iAC].VOLID, evol_local, x_AC[iAC]); } else if(augc[iAC].Type == AC_FLUX) { DPRINTF(stderr, "\tBC[%4d] DF[%4d]=%10.6e\n", augc[iAC].BCID, augc[iAC].DFID, x_AC[iAC]); } } } /* Check element quality */ good_mesh = element_quality(exo, x, ams[0]->proc_config); /* INTEGRATE FLUXES, FORCES */ for (i = 0; i < nn_post_fluxes; i++) { err_dbl = evaluate_flux ( exo, dpi, pp_fluxes[i]->ss_id, pp_fluxes[i]->flux_type , pp_fluxes[i]->flux_type_name , pp_fluxes[i]->blk_id , pp_fluxes[i]->species_number, pp_fluxes[i]->flux_filenm, pp_fluxes[i]->profile_flag, x,xdot,NULL,delta_s[0],path1[0],1); } /* COMPUTE FLUX, FORCE SENSITIVITIES */ for (i = 0; i < nn_post_fluxes_sens; i++) { err_dbl = evaluate_flux_sens ( exo, dpi, pp_fluxes_sens[i]->ss_id, pp_fluxes_sens[i]->flux_type , pp_fluxes_sens[i]->flux_type_name , pp_fluxes_sens[i]->blk_id , pp_fluxes_sens[i]->species_number, pp_fluxes_sens[i]->sens_type, pp_fluxes_sens[i]->sens_id, pp_fluxes_sens[i]->sens_flt, pp_fluxes_sens[i]->sens_flt2, pp_fluxes_sens[i]->vector_id, pp_fluxes_sens[i]->flux_filenm, pp_fluxes_sens[i]->profile_flag, x,xdot,x_sens_p,delta_s[0],path1[0],1); } /* * Compute global volumetric quantities */ for (i = 0; i < nn_volume; i++ ) { evaluate_volume_integral(exo, dpi, pp_volume[i]->volume_type, pp_volume[i]->volume_name, pp_volume[i]->blk_id, pp_volume[i]->species_no, pp_volume[i]->volume_fname, pp_volume[i]->params, NULL, x, xdot, delta_s[0], path1[0], 1); } } /* end of if converged block */ /* * INCREMENT COUNTER */ ni++; /* * * DID IT CONVERGE ? * IF NOT, REDUCE STEP SIZE AND TRY AGAIN * */ if (!converged) { if (ni > 10) { DPRINTF(stderr,"\n ************************************\n"); DPRINTF(stderr," W: Did not converge in Newton steps.\n"); DPRINTF(stderr," Find better initial guess. \n"); DPRINTF(stderr," ************************************\n"); exit(0); } /* * ADJUST STEP SIZE - unless failed on first step */ if ( nt != 0 ) { DPRINTF(stderr, "\n\tFailed to converge:\n"); for (iHC=0;iHC<nHC;iHC++) { delta_s[iHC] *= 0.5; switch (aldALC[iHC]) { case -1: path1[iHC] = path[iHC] - delta_s[iHC]; break; case +1: path1[iHC] = path[iHC] + delta_s[iHC]; break; } /* * RESET */ alqALC = 1; DPRINTF(stderr, "Decreasing step-length to %10.6e.\n", delta_s[iHC]); if (delta_s[iHC] < hDelta_s_min[iHC]) { DPRINTF(stderr,"\n X: C step-length reduced below minimum."); DPRINTF(stderr,"\n Program terminated.\n"); /* This needs to have a return value of 0, indicating * success, for the continuation script to not treat this * as a failed command. */ exit(0); } #ifdef PARALLEL check_parallel_error("\t"); #endif /* * ADJUST NATURAL PARAMETER */ update_parameterHC(iHC, path1[iHC], x, xdot, x_AC, delta_s[iHC], cx, exo, dpi); } /* end of iHC loop */ if(hunt[0].EndParameterValue == hunt[0].BegParameterValue) { hunt_par = 1.0; } else { hunt_par = (path1[0]-hunt[0].BegParameterValue) /(hunt[0].EndParameterValue - hunt[0].BegParameterValue) ; hunt_par=fabs(hunt_par); } /* * GET ZERO OR FIRST ORDER PREDICTION */ dhunt_par = hunt_par-hunt_par_old; switch (Continuation) { case HUN_ZEROTH: vcopy(numProcUnknowns, &x[0], 1.0, &x_old[0]); break; case HUN_FIRST: v2sum(numProcUnknowns, &x[0], 1.0, &x_old[0], dhunt_par, &x_sens[0]); break; } /* MMH: Needed to put this in, o/w it may find that the * solution and residual HAPPEN to satisfy the convergence * criterion for the next newton solve... */ find_and_set_Dirichlet(x, xdot, exo, dpi); exchange_dof(cx, dpi, x); if (nAC > 0) { dcopy1(nAC, x_AC_old, x_AC); for(iAC=0 ; iAC<nAC ; iAC++) { update_parameterAC(iAC, x, xdot, x_AC, cx, exo, dpi); } } if(hunt[0].EndParameterValue == hunt[0].BegParameterValue) { hunt_par = 1.0; } else { hunt_par = (path1[0]-hunt[0].BegParameterValue) /(hunt[0].EndParameterValue - hunt[0].BegParameterValue) ; hunt_par=fabs(hunt_par); } } else if (inewton == -1) { DPRINTF(stderr,"\nHmm... trouble on first step \n Let's try some more relaxation \n"); if((damp_factor1 <= 1. && damp_factor1 >= 0.) && (damp_factor2 <= 1. && damp_factor2 >= 0.) && (damp_factor3 <= 1. && damp_factor3 >= 0.)) { custom_tol1 *= 0.01; custom_tol2 *= 0.01; custom_tol3 *= 0.01; DPRINTF(stderr," custom tolerances %g %g %g \n",custom_tol1,custom_tol2,custom_tol3); } else { damp_factor1 *= 0.5; DPRINTF(stderr," damping factor %g \n",damp_factor1); } vcopy(numProcUnknowns, &x[0], 1.0, &x_old[0]); /* MMH: Needed to put this in, o/w it may find that the * solution and residual HAPPEN to satisfy the convergence * criterion for the next newton solve... */ find_and_set_Dirichlet(x, xdot, exo, dpi); exchange_dof(cx, dpi, x); if (nAC > 0) { dcopy1(nAC, x_AC_old, x_AC); for(iAC=0 ; iAC<nAC ; iAC++) { update_parameterAC(iAC, x, xdot, x_AC, cx, exo, dpi); } } } else { DPRINTF(stderr,"\nHmm... could not converge on first step\n Let's try some more iterations\n"); if((damp_factor1 <= 1. && damp_factor1 >= 0.) && (damp_factor2 <= 1. && damp_factor2 >= 0.) && (damp_factor3 <= 1. && damp_factor3 >= 0.)) { custom_tol1 *= 100.; custom_tol2 *= 100.; custom_tol3 *= 100.; DPRINTF(stderr," custom tolerances %g %g %g \n",custom_tol1,custom_tol2,custom_tol3); } else { damp_factor1 *= 2.0; damp_factor1 = MIN(damp_factor1,1.0); DPRINTF(stderr," damping factor %g \n",damp_factor1); } } } /* end of !converged */ } while (converged == 0); /* * CONVERGED */ nt++; custom_tol1 = toler_org[0]; custom_tol2 = toler_org[1]; custom_tol3 = toler_org[2]; damp_factor1 = damp_org; DPRINTF(stderr, "\n\tStep accepted, theta (proportion complete) = %10.6e\n", hunt_par); for (iHC=0;iHC<nHC;iHC++) { switch (hunt[iHC].Type) { case 1: /* BC */ DPRINTF(stderr, "\tStep accepted, BCID=%3d DFID=%5d", hunt[iHC].BCID, hunt[iHC].DFID); break; case 2: /* MT */ DPRINTF(stderr, "\tStep accepted, MTID=%3d MPID=%5d", hunt[iHC].MTID, hunt[iHC].MPID); break; case 3: /* AC */ DPRINTF(stderr, "\tStep accepted, ACID=%3d DFID=%5d", hunt[iHC].BCID, hunt[iHC].DFID); break; } DPRINTF(stderr, " Parameter= % 10.6e\n", path1[iHC]); } /* * check path step error, if too large do not enlarge path step */ for (iHC=0;iHC<nHC;iHC++) { if ((ni == 1) && (n != 0) && (!const_delta_s[iHC])) { delta_s_new[iHC] = path_step_control(num_total_nodes, delta_s[iHC], delta_s_old[iHC], x, eps, &success_ds, cont->use_var_norm, inewton); if (delta_s_new[iHC] > hDelta_s_max[iHC]) {delta_s_new[iHC] = hDelta_s_max[iHC];} } else { success_ds = 1; delta_s_new[iHC] = delta_s[iHC]; } } /* * determine whether to print out the data or not */ i_print = 0; if (nt == step_print) { i_print = 1; step_print += cont->print_freq; } if (alqALC == -1) { i_print = 1; } if (i_print) { error = write_ascii_soln(x, resid_vector, numProcUnknowns, x_AC, nAC, path1[0], file); if (error) { DPRINTF(stderr, "%s: error writing ASCII soln file\n", yo); } if ( Write_Intermediate_Solutions == 0 ) { write_solution(ExoFileOut, resid_vector, x, x_sens_p, x_old, xdot, xdot_old, tev, tev_post, NULL, rd, gindex, p_gsize, gvec, gvec_elem, &nprint, delta_s[0], theta, path1[0], NULL, exo, dpi); nprint++; } } /* * backup old solutions * can use previous solutions for prediction one day */ dcopy1(numProcUnknowns,x_older,x_oldest); dcopy1(numProcUnknowns,x_old,x_older); dcopy1(numProcUnknowns,x,x_old); dcopy1(nHC,delta_s_older,delta_s_oldest); dcopy1(nHC,delta_s_old ,delta_s_older ); dcopy1(nHC,delta_s ,delta_s_old ); dcopy1(nHC,delta_s_new ,delta_s ); /* delta_s_oldest = delta_s_older; delta_s_older = delta_s_old; delta_s_old = delta_s; delta_s = delta_s_new; */ hunt_par_old=hunt_par; if ( nAC > 0) { dcopy1(nAC, x_AC, x_AC_old); } /* * INCREMENT/DECREMENT PARAMETER */ for (iHC=0;iHC<nHC;iHC++) { path[iHC] = path1[iHC]; switch (aldALC[iHC]) { case -1: path1[iHC] = path[iHC] - delta_s[iHC]; break; case +1: path1[iHC] = path[iHC] + delta_s[iHC]; break; } /* * ADJUST NATURAL PARAMETER */ update_parameterHC(iHC, path1[iHC], x, xdot, x_AC, delta_s[iHC], cx, exo, dpi); } /* end of iHC loop */ /* * GET FIRST ORDER PREDICTION */ if(hunt[0].EndParameterValue == hunt[0].BegParameterValue) { hunt_par = 1.0; } else { hunt_par = (path1[0]-hunt[0].BegParameterValue) /(hunt[0].EndParameterValue - hunt[0].BegParameterValue) ; hunt_par=fabs(hunt_par); } dhunt_par = hunt_par-hunt_par_old; switch (Continuation) { case HUN_ZEROTH: break; case HUN_FIRST: v1add(numProcUnknowns, &x[0], dhunt_par, &x_sens[0]); break; } if (!good_mesh) goto free_and_clear; /* * * CHECK END CONTINUATION * */ if (alqALC == -1) { alqALC = 0; } else { alqALC = 1; } if (alqALC == 0) { DPRINTF(stderr,"\n\n\t I will continue no more!\n\t No more continuation for you!\n"); goto free_and_clear; } } /* n */ if(n == MaxPathSteps && aldALC[0] * (lambdaEnd[0] - path[0]) > 0) { DPRINTF(stderr,"\n\tFailed to reach end of hunt in maximum number of successful steps (%d).\n\tSorry.\n", MaxPathSteps); exit(0); } #ifdef PARALLEL check_parallel_error("Hunting error"); #endif /* * DONE CONTINUATION */ free_and_clear: /* * Transform the node point coordinates according to the * displacements and write out all the results using the * displaced coordinates. Set the displacement field to * zero, too. */ if (Anneal_Mesh) { #ifdef DEBUG fprintf(stderr, "%s: anneal_mesh()...\n", yo); #endif err = anneal_mesh(x, tev, tev_post, NULL, rd, path1[0], exo, dpi); #ifdef DEBUG DPRINTF(stderr, "%s: anneal_mesh()-done\n", yo); #endif EH(err, "anneal_mesh() bad return."); } /* * Free a bunch of variables that aren't needed anymore */ safer_free((void **) &ROT_Types); safer_free((void **) &node_to_fill); safer_free( (void **) &resid_vector); safer_free( (void **) &resid_vector_sens); safer_free( (void **) &scale); safer_free( (void **) &x); if (nAC > 0) { safer_free( (void **) &x_AC); safer_free( (void **) &x_AC_old); safer_free( (void **) &x_AC_dot); } safer_free( (void **) &x_old); safer_free( (void **) &x_older); safer_free( (void **) &x_oldest); safer_free( (void **) &xdot); safer_free( (void **) &xdot_old); safer_free( (void **) &x_update); safer_free( (void **) &x_sens); if((nn_post_data_sens+nn_post_fluxes_sens) > 0) Dmatrix_death(x_sens_p,num_pvector,numProcUnknowns); for(i = 0; i < MAX_NUMBER_MATLS; i++) { for(n = 0; n < MAX_MODES; n++) { safer_free((void **) &(ve_glob[i][n]->gn)); safer_free((void **) &(ve_glob[i][n])); } safer_free((void **) &(vn_glob[i])); } sl_free(matrix_systems_mask, ams); for (i=0;i<NUM_ALSS;i++) { safer_free( (void**) &(ams[i])); } safer_free( (void **) &gvec); safer_free( (void **) &lambda); safer_free( (void **) &lambdaEnd); safer_free( (void **) &path); safer_free( (void **) &path1); safer_free( (void **) &hDelta_s0); safer_free( (void **) &hDelta_s_min); safer_free( (void **) &hDelta_s_max); safer_free( (void **) &delta_s); safer_free( (void **) &delta_s_new); safer_free( (void **) &delta_s_old); safer_free( (void **) &delta_s_older); safer_free( (void **) &delta_s_oldest); Ivector_death(&aldALC[0], nHC); Ivector_death(&const_delta_s[0], nHC); i = 0; for ( eb_indx = 0; eb_indx < exo->num_elem_blocks; eb_indx++ ) { for ( ev_indx = 0; ev_indx < rd->nev; ev_indx++ ) { if ( exo->elem_var_tab[i++] == 1 ) { safer_free ((void **) &(gvec_elem [eb_indx][ev_indx]) ); } } safer_free ((void **) &(gvec_elem [eb_indx])); } safer_free( (void **) &gvec_elem); safer_free( (void **) &rd); safer_free( (void **) &Local_Offset); safer_free( (void **) &Dolphin); if( strlen( Soln_OutFile) ) { fclose(file); } return; } /* END of routine hunt_problem */
/******************************************************************** real TRSM kernel ********************************************************************/ bool ialglib::_i_rmatrixlefttrsmf(int m, int n, const ap::real_2d_array& a, int i1, int j1, bool isupper, bool isunit, int optype, ap::real_2d_array& x, int i2, int j2) { if( m>alglib_r_block || n>alglib_r_block ) return false; // // local buffers // double *pdiag, *arow; int i; double __abuf[alglib_r_block*alglib_r_block+alglib_simd_alignment]; double __xbuf[alglib_r_block*alglib_r_block+alglib_simd_alignment]; double __tmpbuf[alglib_r_block+alglib_simd_alignment]; double * const abuf = (double * const) alglib_align(__abuf, alglib_simd_alignment); double * const xbuf = (double * const) alglib_align(__xbuf, alglib_simd_alignment); double * const tmpbuf = (double * const) alglib_align(__tmpbuf,alglib_simd_alignment); // // Prepare // Transpose X (so we may use mv, which calculates A*x, but not x*A) // bool uppera; mcopyblock(m, m, &a(i1,j1), optype, a.getstride(), abuf); mcopyblock(m, n, &x(i2,j2), 1, x.getstride(), xbuf); if( isunit ) for(i=0,pdiag=abuf; i<m; i++,pdiag+=alglib_r_block+1) *pdiag = 1.0; if( optype==0 ) uppera = isupper; else uppera = !isupper; // // Solve A^-1*Y^T=X^T where A is upper or lower triangular // if( uppera ) { for(i=m-1,pdiag=abuf+(m-1)*alglib_r_block+(m-1); i>=0; i--,pdiag-=alglib_r_block+1) { double beta = 1.0/(*pdiag); double alpha = -beta; vcopy(m-1-i, pdiag+1, 1, tmpbuf, 1); mv(n, m-1-i, xbuf+i+1, tmpbuf, xbuf+i, alglib_r_block, alpha, beta); } mcopyunblock(m, n, xbuf, 1, &x(i2,j2), x.getstride()); } else { for(i=0,pdiag=abuf,arow=abuf; i<m; i++,pdiag+=alglib_r_block+1,arow+=alglib_r_block) { double beta = 1.0/(*pdiag); double alpha = -beta; vcopy(i, arow, 1, tmpbuf, 1); mv(n, i, xbuf, tmpbuf, xbuf+i, alglib_r_block, alpha, beta); } mcopyunblock(m, n, xbuf, 1, &x(i2,j2), x.getstride()); } return true; }
bool rcBuildPolyMesh(rcContourSet& cset, int nvp, rcPolyMesh& mesh) { rcTimeVal startTime = rcGetPerformanceTimer(); vcopy(mesh.bmin, cset.bmin); vcopy(mesh.bmax, cset.bmax); mesh.cs = cset.cs; mesh.ch = cset.ch; int maxVertices = 0; int maxTris = 0; int maxVertsPerCont = 0; for (int i = 0; i < cset.nconts; ++i) { maxVertices += cset.conts[i].nverts; maxTris += cset.conts[i].nverts - 2; maxVertsPerCont = rcMax(maxVertsPerCont, cset.conts[i].nverts); } if (maxVertices >= 0xfffe) { if (rcGetLog()) rcGetLog()->log(RC_LOG_ERROR, "rcBuildPolyMesh: Too many vertices %d.", maxVertices); return false; } unsigned char* vflags = 0; int* nextVert = 0; int* firstVert = 0; int* indices = 0; int* tris = 0; unsigned short* polys = 0; vflags = new unsigned char[maxVertices]; if (!vflags) { if (rcGetLog()) rcGetLog()->log(RC_LOG_ERROR, "rcBuildPolyMesh: Out of memory 'mesh.verts' (%d).", maxVertices); goto failure; } memset(vflags, 0, maxVertices); mesh.verts = new unsigned short[maxVertices*3]; if (!mesh.verts) { if (rcGetLog()) rcGetLog()->log(RC_LOG_ERROR, "rcBuildPolyMesh: Out of memory 'mesh.verts' (%d).", maxVertices); goto failure; } mesh.polys = new unsigned short[maxTris*nvp*2]; if (!mesh.polys) { if (rcGetLog()) rcGetLog()->log(RC_LOG_ERROR, "rcBuildPolyMesh: Out of memory 'mesh.polys' (%d).", maxTris*nvp*2); goto failure; } mesh.regs = new unsigned short[maxTris]; if (!mesh.regs) { if (rcGetLog()) rcGetLog()->log(RC_LOG_ERROR, "rcBuildPolyMesh: Out of memory 'mesh.regs' (%d).", maxTris); goto failure; } mesh.nverts = 0; mesh.npolys = 0; mesh.nvp = nvp; memset(mesh.verts, 0, sizeof(unsigned short)*maxVertices*3); memset(mesh.polys, 0xff, sizeof(unsigned short)*maxTris*nvp*2); memset(mesh.regs, 0, sizeof(unsigned short)*maxTris); nextVert = new int[maxVertices]; if (!nextVert) { if (rcGetLog()) rcGetLog()->log(RC_LOG_ERROR, "rcBuildPolyMesh: Out of memory 'nextVert' (%d).", maxVertices); goto failure; } memset(nextVert, 0, sizeof(int)*maxVertices); firstVert = new int[VERTEX_BUCKET_COUNT]; if (!firstVert) { if (rcGetLog()) rcGetLog()->log(RC_LOG_ERROR, "rcBuildPolyMesh: Out of memory 'firstVert' (%d).", VERTEX_BUCKET_COUNT); goto failure; } for (int i = 0; i < VERTEX_BUCKET_COUNT; ++i) firstVert[i] = -1; indices = new int[maxVertsPerCont]; if (!indices) { if (rcGetLog()) rcGetLog()->log(RC_LOG_ERROR, "rcBuildPolyMesh: Out of memory 'indices' (%d).", maxVertsPerCont); goto failure; } tris = new int[maxVertsPerCont*3]; if (!tris) { if (rcGetLog()) rcGetLog()->log(RC_LOG_ERROR, "rcBuildPolyMesh: Out of memory 'tris' (%d).", maxVertsPerCont*3); goto failure; } polys = new unsigned short[(maxVertsPerCont+1)*nvp]; if (!polys) { if (rcGetLog()) rcGetLog()->log(RC_LOG_ERROR, "rcBuildPolyMesh: Out of memory 'polys' (%d).", maxVertsPerCont*nvp); goto failure; } unsigned short* tmpPoly = &polys[maxVertsPerCont*nvp]; for (int i = 0; i < cset.nconts; ++i) { rcContour& cont = cset.conts[i]; // Skip empty contours. if (cont.nverts < 3) continue; // Triangulate contour for (int j = 0; j < cont.nverts; ++j) indices[j] = j; int ntris = triangulate(cont.nverts, cont.verts, &indices[0], &tris[0]); if (ntris <= 0) { // Bad triangulation, should not happen. /* for (int k = 0; k < cont.nverts; ++k) { const int* v = &cont.verts[k*4]; printf("\t\t%d,%d,%d,%d,\n", v[0], v[1], v[2], v[3]); if (nBadPos < 100) { badPos[nBadPos*3+0] = v[0]; badPos[nBadPos*3+1] = v[1]; badPos[nBadPos*3+2] = v[2]; nBadPos++; } }*/ ntris = -ntris; } // Add and merge vertices. for (int j = 0; j < cont.nverts; ++j) { const int* v = &cont.verts[j*4]; indices[j] = addVertex((unsigned short)v[0], (unsigned short)v[1], (unsigned short)v[2], mesh.verts, firstVert, nextVert, mesh.nverts); if (v[3] & RC_BORDER_VERTEX) { // This vertex should be removed. vflags[indices[j]] = 1; } } // Build initial polygons. int npolys = 0; memset(polys, 0xff, maxVertsPerCont*nvp*sizeof(unsigned short)); for (int j = 0; j < ntris; ++j) { int* t = &tris[j*3]; if (t[0] != t[1] && t[0] != t[2] && t[1] != t[2]) { polys[npolys*nvp+0] = (unsigned short)indices[t[0]]; polys[npolys*nvp+1] = (unsigned short)indices[t[1]]; polys[npolys*nvp+2] = (unsigned short)indices[t[2]]; npolys++; } } if (!npolys) continue; // Merge polygons. if (nvp > 3) { while (true) { // Find best polygons to merge. int bestMergeVal = 0; int bestPa, bestPb, bestEa, bestEb; for (int j = 0; j < npolys-1; ++j) { unsigned short* pj = &polys[j*nvp]; for (int k = j+1; k < npolys; ++k) { unsigned short* pk = &polys[k*nvp]; int ea, eb; int v = getPolyMergeValue(pj, pk, mesh.verts, ea, eb, nvp); if (v > bestMergeVal) { bestMergeVal = v; bestPa = j; bestPb = k; bestEa = ea; bestEb = eb; } } } if (bestMergeVal > 0) { // Found best, merge. unsigned short* pa = &polys[bestPa*nvp]; unsigned short* pb = &polys[bestPb*nvp]; mergePolys(pa, pb, mesh.verts, bestEa, bestEb, tmpPoly, nvp); memcpy(pb, &polys[(npolys-1)*nvp], sizeof(unsigned short)*nvp); npolys--; } else { // Could not merge any polygons, stop. break; } } } // Store polygons. for (int j = 0; j < npolys; ++j) { unsigned short* p = &mesh.polys[mesh.npolys*nvp*2]; unsigned short* q = &polys[j*nvp]; for (int k = 0; k < nvp; ++k) p[k] = q[k]; mesh.regs[mesh.npolys] = cont.reg; mesh.npolys++; } } // Remove edge vertices. for (int i = 0; i < mesh.nverts; ++i) { if (vflags[i]) { if (!removeVertex(mesh, i, maxTris)) goto failure; for (int j = i; j < mesh.nverts-1; ++j) vflags[j] = vflags[j+1]; --i; } } delete [] vflags; delete [] firstVert; delete [] nextVert; delete [] indices; delete [] tris; // Calculate adjacency. if (!buildMeshAdjacency(mesh.polys, mesh.npolys, mesh.nverts, nvp)) { if (rcGetLog()) rcGetLog()->log(RC_LOG_ERROR, "rcBuildPolyMesh: Adjacency failed."); return false; } rcTimeVal endTime = rcGetPerformanceTimer(); // if (rcGetLog()) // rcGetLog()->log(RC_LOG_PROGRESS, "Build polymesh: %.3f ms", rcGetDeltaTimeUsec(startTime, endTime)/1000.0f); if (rcGetBuildTimes()) rcGetBuildTimes()->buildPolymesh += rcGetDeltaTimeUsec(startTime, endTime); return true; failure: delete [] vflags; delete [] tmpPoly; delete [] firstVert; delete [] nextVert; delete [] indices; delete [] tris; return false; }
bool rcMergePolyMeshes(rcPolyMesh** meshes, const int nmeshes, rcPolyMesh& mesh) { if (!nmeshes || !meshes) return true; rcTimeVal startTime = rcGetPerformanceTimer(); int* nextVert = 0; int* firstVert = 0; unsigned short* vremap = 0; mesh.nvp = meshes[0]->nvp; mesh.cs = meshes[0]->cs; mesh.ch = meshes[0]->ch; vcopy(mesh.bmin, meshes[0]->bmin); vcopy(mesh.bmax, meshes[0]->bmax); int maxVerts = 0; int maxPolys = 0; int maxVertsPerMesh = 0; for (int i = 0; i < nmeshes; ++i) { vmin(mesh.bmin, meshes[i]->bmin); vmax(mesh.bmax, meshes[i]->bmax); maxVertsPerMesh = rcMax(maxVertsPerMesh, meshes[i]->nverts); maxVerts += meshes[i]->nverts; maxPolys += meshes[i]->npolys; } mesh.nverts = 0; mesh.verts = new unsigned short[maxVerts*3]; if (!mesh.verts) { if (rcGetLog()) rcGetLog()->log(RC_LOG_ERROR, "rcMergePolyMeshes: Out of memory 'mesh.verts' (%d).", maxVerts*3); return false; } mesh.npolys = 0; mesh.polys = new unsigned short[maxPolys*2*mesh.nvp]; if (!mesh.polys) { if (rcGetLog()) rcGetLog()->log(RC_LOG_ERROR, "rcMergePolyMeshes: Out of memory 'mesh.polys' (%d).", maxPolys*2*mesh.nvp); return false; } memset(mesh.polys, 0xff, sizeof(unsigned short)*maxPolys*2*mesh.nvp); mesh.regs = new unsigned short[maxPolys]; if (!mesh.regs) { if (rcGetLog()) rcGetLog()->log(RC_LOG_ERROR, "rcMergePolyMeshes: Out of memory 'mesh.regs' (%d).", maxPolys); return false; } memset(mesh.regs, 0, sizeof(unsigned short)*maxPolys); nextVert = new int[maxVerts]; if (!nextVert) { if (rcGetLog()) rcGetLog()->log(RC_LOG_ERROR, "rcMergePolyMeshes: Out of memory 'nextVert' (%d).", maxVerts); goto failure; } memset(nextVert, 0, sizeof(int)*maxVerts); firstVert = new int[VERTEX_BUCKET_COUNT]; if (!firstVert) { if (rcGetLog()) rcGetLog()->log(RC_LOG_ERROR, "rcMergePolyMeshes: Out of memory 'firstVert' (%d).", VERTEX_BUCKET_COUNT); goto failure; } for (int i = 0; i < VERTEX_BUCKET_COUNT; ++i) firstVert[i] = -1; vremap = new unsigned short[maxVertsPerMesh]; if (!vremap) { if (rcGetLog()) rcGetLog()->log(RC_LOG_ERROR, "rcMergePolyMeshes: Out of memory 'vremap' (%d).", maxVertsPerMesh); goto failure; } memset(nextVert, 0, sizeof(int)*maxVerts); for (int i = 0; i < nmeshes; ++i) { const rcPolyMesh* pmesh = meshes[i]; const unsigned short ox = (unsigned short)floorf((pmesh->bmin[0]-mesh.bmin[0])/mesh.cs+0.5f); const unsigned short oz = (unsigned short)floorf((pmesh->bmin[2]-mesh.bmin[2])/mesh.cs+0.5f); for (int j = 0; j < pmesh->nverts; ++j) { unsigned short* v = &pmesh->verts[j*3]; vremap[j] = addVertex(v[0]+ox, v[1], v[2]+oz, mesh.verts, firstVert, nextVert, mesh.nverts); } for (int j = 0; j < pmesh->npolys; ++j) { unsigned short* tgt = &mesh.polys[mesh.npolys*2*mesh.nvp]; unsigned short* src = &pmesh->polys[j*2*mesh.nvp]; mesh.regs[mesh.npolys] = pmesh->regs[j]; mesh.npolys++; for (int k = 0; k < mesh.nvp; ++k) { if (src[k] == 0xffff) break; tgt[k] = vremap[src[k]]; } } } // Calculate adjacency. if (!buildMeshAdjacency(mesh.polys, mesh.npolys, mesh.nverts, mesh.nvp)) { if (rcGetLog()) rcGetLog()->log(RC_LOG_ERROR, "rcMergePolyMeshes: Adjacency failed."); return false; } delete [] firstVert; delete [] nextVert; delete [] vremap; rcTimeVal endTime = rcGetPerformanceTimer(); if (rcGetBuildTimes()) rcGetBuildTimes()->mergePolyMesh += rcGetDeltaTimeUsec(startTime, endTime); return true; failure: delete [] firstVert; delete [] nextVert; delete [] vremap; return false; }
bool rcMergePolyMeshDetails(rcPolyMeshDetail** meshes, const int nmeshes, rcPolyMeshDetail& mesh) { rcTimeVal startTime = rcGetPerformanceTimer(); int maxVerts = 0; int maxTris = 0; int maxMeshes = 0; for (int i = 0; i < nmeshes; ++i) { if (!meshes[i]) continue; maxVerts += meshes[i]->nverts; maxTris += meshes[i]->ntris; maxMeshes += meshes[i]->nmeshes; } mesh.nmeshes = 0; mesh.meshes = new unsigned short[maxMeshes*4]; if (!mesh.meshes) { if (rcGetLog()) rcGetLog()->log(RC_LOG_ERROR, "rcBuildPolyMeshDetail: Out of memory 'pmdtl.meshes' (%d).", maxMeshes*4); return false; } mesh.ntris = 0; mesh.tris = new unsigned char[maxTris*4]; if (!mesh.tris) { if (rcGetLog()) rcGetLog()->log(RC_LOG_ERROR, "rcBuildPolyMeshDetail: Out of memory 'dmesh.tris' (%d).", maxTris*4); return false; } mesh.nverts = 0; mesh.verts = new float[maxVerts*3]; if (!mesh.verts) { if (rcGetLog()) rcGetLog()->log(RC_LOG_ERROR, "rcBuildPolyMeshDetail: Out of memory 'dmesh.verts' (%d).", maxVerts*3); return false; } // Merge datas. for (int i = 0; i < nmeshes; ++i) { rcPolyMeshDetail* dm = meshes[i]; if (!dm) continue; for (int j = 0; j < dm->nmeshes; ++j) { unsigned short* dst = &mesh.meshes[mesh.nmeshes*4]; unsigned short* src = &dm->meshes[j*4]; dst[0] = mesh.nverts+src[0]; dst[1] = src[1]; dst[2] = mesh.ntris+src[2]; dst[3] = src[3]; mesh.nmeshes++; } for (int k = 0; k < dm->nverts; ++k) { vcopy(&mesh.verts[mesh.nverts*3], &dm->verts[k*3]); mesh.nverts++; } for (int k = 0; k < dm->ntris; ++k) { mesh.tris[mesh.ntris*4+0] = dm->tris[k*4+0]; mesh.tris[mesh.ntris*4+1] = dm->tris[k*4+1]; mesh.tris[mesh.ntris*4+2] = dm->tris[k*4+2]; mesh.tris[mesh.ntris*4+3] = dm->tris[k*4+3]; mesh.ntris++; } } rcTimeVal endTime = rcGetPerformanceTimer(); if (rcGetBuildTimes()) rcGetBuildTimes()->mergePolyMeshDetail += rcGetDeltaTimeUsec(startTime, endTime); return true; }
/* Given an axis and angle, compute quaternion */ void axis_to_quat(double vec[3], double phi, double quat[4]){ vnormal(vec); vcopy(quat, vec); vscale(quat, sin(phi/2.0)); quat[3] = cos(phi/2.0); }
// advection void Fluid2::fluidAdvection( const float dt ) { // ink { Array2<float> inkcopy( ink ); CellSampler inksampler( grid, inkcopy ); const Index2& size = ink.getSize(); for( unsigned int i = 0; i < size.x; ++i ) for( unsigned int j = 0; j < size.y; ++j ) { const Index2 id( i, j ); const Vec2 pos( grid.getCellPos( id ) ); const Vec2 vel( ( velocityX[ id ] + velocityX[ Index2( i+1, j ) ] ) * 0.5f, ( velocityY[ id ] + velocityY[ Index2( i, j+1 ) ] ) * 0.5f ); const Vec2 endpos( pos - dt * vel ); ink[ id ] = inksampler.getValue( endpos );; } } // velocity { Array2< float > ucopy( velocityX ); Array2< float > vcopy( velocityY ); FaceSampler usampler( grid, ucopy, 0 ); FaceSampler vsampler( grid, vcopy, 1 ); const Index2& sizeu = velocityX.getSize(); const Index2& sizev = velocityY.getSize(); for( unsigned int i = 0; i < sizeu.x; ++i ) for( unsigned int j = 0; j < sizeu.y; ++j ) { const Index2 id( i, j ); const Index2 idv1( clamp( i-1, 0, sizev.x-1 ), clamp( j , 0, sizev.y-1 ) ); const Index2 idv2( clamp( i , 0, sizev.x-1 ), clamp( j , 0, sizev.y-1 ) ); const Index2 idv3( clamp( i-1, 0, sizev.x-1 ), clamp( j+1, 0, sizev.y-1 ) ); const Index2 idv4( clamp( i , 0, sizev.x-1 ), clamp( j+1, 0, sizev.y-1 ) ); const Vec2 pos( grid.getFaceXPos( id ) ); const Vec2 vel( ucopy[ id ], ( vcopy[ idv1 ] + vcopy[ idv2 ] + vcopy[ idv3 ] + vcopy[ idv4 ] ) * 0.25f ); const Vec2 endpos( pos - dt * vel ); velocityX[ id ] = usampler.getValue( endpos ); } for( unsigned int i = 0; i < sizev.x; ++i ) for( unsigned int j = 0; j < sizev.y; ++j ) { const Index2 id( i, j ); const Index2 idu1( clamp( i , 0, sizeu.x-1 ), clamp( j-1, 0, sizeu.y-1 ) ); const Index2 idu2( clamp( i , 0, sizeu.x-1 ), clamp( j , 0, sizeu.y-1 ) ); const Index2 idu3( clamp( i+1, 0, sizeu.x-1 ), clamp( j-1, 0, sizeu.y-1 ) ); const Index2 idu4( clamp( i+1, 0, sizeu.x-1 ), clamp( j , 0, sizeu.y-1 ) ); const Vec2 pos( grid.getFaceYPos( id ) ); const Vec2 vel( ( ucopy[ idu1 ] + ucopy[ idu2 ] + ucopy[ idu3 ] + ucopy[ idu4 ] ) * 0.25f, vcopy[ id ] ); const Vec2 endpos( pos - dt * vel ); velocityY[ id ] = vsampler.getValue( endpos ); } } }
bool rcBuildCompactHeightfield(const int walkableHeight, const int walkableClimb, unsigned char flags, rcHeightfield& hf, rcCompactHeightfield& chf) { rcTimeVal startTime = rcGetPerformanceTimer(); const int w = hf.width; const int h = hf.height; const int spanCount = getSpanCount(flags, hf); // Fill in header. chf.width = w; chf.height = h; chf.spanCount = spanCount; chf.walkableHeight = walkableHeight; chf.walkableClimb = walkableClimb; chf.maxRegions = 0; vcopy(chf.bmin, hf.bmin); vcopy(chf.bmax, hf.bmax); chf.bmax[1] += walkableHeight*hf.ch; chf.cs = hf.cs; chf.ch = hf.ch; chf.cells = new rcCompactCell[w*h]; if (!chf.cells) { if (rcGetLog()) rcGetLog()->log(RC_LOG_ERROR, "rcBuildCompactHeightfield: Out of memory 'chf.cells' (%d)", w*h); return false; } memset(chf.cells, 0, sizeof(rcCompactCell)*w*h); chf.spans = new rcCompactSpan[spanCount]; if (!chf.spans) { if (rcGetLog()) rcGetLog()->log(RC_LOG_ERROR, "rcBuildCompactHeightfield: Out of memory 'chf.spans' (%d)", spanCount); return false; } memset(chf.spans, 0, sizeof(rcCompactSpan)*spanCount); const int MAX_HEIGHT = 0xffff; // Fill in cells and spans. int idx = 0; for (int y = 0; y < h; ++y) { for (int x = 0; x < w; ++x) { const rcSpan* s = hf.spans[x + y*w]; // If there are no spans at this cell, just leave the data to index=0, count=0. if (!s) continue; rcCompactCell& c = chf.cells[x+y*w]; c.index = idx; c.count = 0; while (s) { if (s->flags == flags) { const int bot = (int)s->smax; const int top = s->next ? (int)s->next->smin : MAX_HEIGHT; chf.spans[idx].y = (unsigned short)rcClamp(bot, 0, 0xffff); chf.spans[idx].h = (unsigned char)rcClamp(top - bot, 0, 0xff); idx++; c.count++; } s = s->next; } } } // Find neighbour connections. for (int y = 0; y < h; ++y) { for (int x = 0; x < w; ++x) { const rcCompactCell& c = chf.cells[x+y*w]; for (int i = (int)c.index, ni = (int)(c.index+c.count); i < ni; ++i) { rcCompactSpan& s = chf.spans[i]; for (int dir = 0; dir < 4; ++dir) { setCon(s, dir, 0xf); const int nx = x + rcGetDirOffsetX(dir); const int ny = y + rcGetDirOffsetY(dir); // First check that the neighbour cell is in bounds. if (nx < 0 || ny < 0 || nx >= w || ny >= h) continue; // Iterate over all neighbour spans and check if any of the is // accessible from current cell. const rcCompactCell& nc = chf.cells[nx+ny*w]; for (int k = (int)nc.index, nk = (int)(nc.index+nc.count); k < nk; ++k) { const rcCompactSpan& ns = chf.spans[k]; const int bot = rcMax(s.y, ns.y); const int top = rcMin(s.y+s.h, ns.y+ns.h); // Check that the gap between the spans is walkable, // and that the climb height between the gaps is not too high. if ((top - bot) >= walkableHeight && rcAbs((int)ns.y - (int)s.y) <= walkableClimb) { // Mark direction as walkable. setCon(s, dir, k - (int)nc.index); break; } } } } } } rcTimeVal endTime = rcGetPerformanceTimer(); if (rcGetBuildTimes()) rcGetBuildTimes()->buildCompact += rcGetDeltaTimeUsec(startTime, endTime); return true; }
/* * Given an axis and angle, compute quaternion. */ void axis_to_quat(float a[3], float phi, float q[4]) { vnormal(a); vcopy(a,q); vscale(q,sin(phi/2.0f)); q[3] = cos(phi/2.0f); }
//------------------------------------------------------------------------------ void calc_color(GzRender *render, GzCoord N_orig, GzColor col, bool mulByK) { GzCoord N = ZEROS; normalizeVector(N_orig, N); Matrix *Ncm = render->nStack.leftMulMatricesOnStack(); if (Ncm == NULL) { fprintf(stderr, "Got NULL from normal stack in %s.\n",__FUNCTION__); } float array[4] = {N[X], N[Y], N[Z], 1}; float Ntransformed[4] = {0, 0, 0, 0}; Ncm->rightMultiply(array, 4, Ntransformed); GzColor ambient = ZEROS; GzColor diffuse = ZEROS; GzColor specular = ZEROS; calc_ambient(render, ambient, mulByK); calc_diffuse(render, Ntransformed, diffuse, mulByK); calc_specular(render, Ntransformed, specular, mulByK); addThreeColors(ambient, diffuse, specular, col); render->km_ka = KM_KA; render->km_kd = KM_KD; render->km_ks = KM_KS; ////////////////////////////////////////////////////////////////////////////////////////////////////// // THIS PART IS NEW FOR THE PROJECT // (the normal homework code is commented out above, all functions it calls are unmodified) #if 1 // For each light, determine what layers of paint to add to the model scalarMultiply(col, 0.0f, col); GzCoord ones = {1.0f, 1.0f, 1.0f}; vcopy(col, ones); GzCoord background_reflectance = BACKGROUND_REFLECTANCE; GzCoord background_transmittance = BACKGROUND_TRANSMITTANCE; for (int i = 0; i < render->numlights; i++) { // This light"s direction float (*ls)[3] = static_cast<float (*)[3]>(render->lights[i]); GzCoord ls_L_orig = {ls[0][0], ls[0][1], ls[0][2]}; // Ambient light color float (*lamb)[3] = static_cast<float (*)[3]>(render->ambientlight); GzColor lambcolor = {lamb[1][0], lamb[1][1], lamb[1][2]}; // This light"s color GzColor ls_intensity = {ls[1][0], ls[1][1], ls[1][2]}; // printVector(ls_intensity, "ls_intensity"); // printVector(render->Ka, "Ka"); // printVector(render->Kd, "Kd"); // printVector(render->Ks, "Ks"); GzCoord diffuse_thicknesses = {0.0f,0.0f,0.0f}; calc_diffuse_thickness(render, Ntransformed, ls_L_orig, diffuse_thicknesses); // printVector(diffuse_thicknesses, "diffuse_thicknesses"); GzCoord unlit_thicknesses = {0.0f,0.0f,0.0f}; calc_unlit_thickness(render, Ntransformed, ls_L_orig, unlit_thicknesses); // printVector(unlit_thicknesses, "unlit_thicknesses"); // printf("\n\n"); // Use KM Model to add each of these layers of paint to the color of this vertex GzCoord diffuse_reflectance = ZEROS; GzCoord diffuse_transmittance = ZEROS; GzCoord unlit_reflectance = ZEROS; GzCoord unlit_transmittance = ZEROS; // printf("diffuse layer\n"); GzCoord diffuse_color = DIFFUSE_COLOR; kubelka_munk(diffuse_color, diffuse_thicknesses, diffuse_reflectance, diffuse_transmittance); // printf("unlit layer\n"); GzCoord unlit_color = AMBIENT_COLOR; kubelka_munk(unlit_color, unlit_thicknesses, unlit_reflectance, unlit_transmittance); // printf("\n\n"); // vmult(diffuse_reflectance, ls_intensity, diffuse_thicknesses); // vmult(unlit_reflectance, ls_intensity, unlit_thicknesses); GzCoord composite_reflectance = ZEROS; GzCoord composite_transmittance = ZEROS; km_composite_layers(diffuse_reflectance, diffuse_transmittance, diffuse_reflectance, diffuse_transmittance, background_reflectance, background_transmittance); km_composite_layers(background_reflectance, background_transmittance, unlit_reflectance, unlit_transmittance, diffuse_reflectance, diffuse_transmittance); // printVector(diffuse_reflectance, "diffuse_reflectance"); // vmult(col, col, lambcolor); // break; // ASSUME ONLY ONE COLOR!!! } vcopy(col, background_reflectance); #endif }
/******************************************************************* Subroutine to do the Sub-Level PCA-PPM matrix *pcadata_re: the pointer to the new matrix containing the real part of data projected onto the space defined by the PCA matrix *pcadata_re: the pointer to the new matrix containing the imaginary part of data projected onto the space defined by the PCA matrix *pcavec_re: the pointer to a matrix containing the real part of eigenvector matrix *pcavec_im: the pointer to a matrix containing the imaginary part of eigenvector vector *pcaval_re: the pointer to a vector containing the real part of eigenvalues vector *pcaval_im: the pointer to a vector containing the imaginary part of eigenvalues vector *Zjk: the pointer to a vector containing the Zjk values matrix *subpcappmvec_re: the pointer to a matrix containing the real part of sorted eigenvectors by sub kurtosis rank matrix *subpcappmvec_re: the pointer to a matrix containing the imaginary part of sorted eigenvectors by sub kurtosis rank return value: '1' - successfully exit '0' - exit with waring/error *******************************************************************/ int veSubPCAPPM(matrix *pcadata_re, matrix *pcadata_im, matrix *pcavec_re, matrix *pcavec_im, vector *pcaval_re, vector *pcaval_im, vector *Zjk, matrix *subpcappmvec_re, matrix *subpcappmvec_im) { int m, n; int i, j, u=0, v=0; vector X1n, Xm1; matrix mZjk; matrix M1; matrix data_pow2; matrix data_pow4; vector V1; vector V2; vector V4; vector kurt; int* kurt_id; double sumZjk; double cen_data; bool allreal = true; m=pcadata_re->m; n=pcadata_im->n; vnew(&X1n, n); vnew(&Xm1, m); mnew(&mZjk, m, n); mnew(&M1, m, n); mnew(&data_pow2, m, n); mnew(&data_pow4, m, n); vnew(&V1, n); vnew(&V2, n); vnew(&V4, n); vnew(&kurt, n); kurt_id = new int[n]; vector V1_im; vector Xm1_im; matrix M1_im; double cen_data_im; matrix data_pow2_im; matrix data_pow4_im; vector V2_im; vector V4_im; vector kurt_im; vnew(&Xm1_im, m); mnew(&M1_im, m, n); mnew(&data_pow2_im, m, n); mnew(&data_pow4_im, m, n); vnew(&V1_im, n); vnew(&V2_im, n); vnew(&V4_im, n); vnew(&kurt_im, n); // whether complex eigenvalue exists for (i=0; i<n; i++) { if (*(pcaval_im->pr+i) != 0) { allreal = false; break; } } // center the data set its means // data_proj = data_proj - ones(n,1)*(sum(Zjk*ones(1,p).*(data_proj))./sum(Zjk)); vones(&X1n); vones(&Xm1); vvMul(Zjk, &X1n, &mZjk); sumZjk = vsum(Zjk); if (allreal==true) { kurtmodel(&mZjk, sumZjk, pcadata_re, &V1); vvMul(&Xm1, &V1, &M1); for (i=0; i<m*n; i++) { cen_data = *(pcadata_re->pr + i) - *(M1.pr + i); //*(data->pr + i) = cen_data; *(data_pow2.pr+i) = pow(cen_data, 2); *(data_pow4.pr+i) = pow(cen_data, 4); } // calculate kurtosis : kurt(y) = E{y^4}-3(E{y^2})^2 //kurt = sum(Zjk*ones(1,p).*(data_proj.^4))./sum(Zjk)... //- 3*(sum(Zjk*ones(1,p).*(data_proj.^2))./sum(Zjk)).^2; %Not normalized Kurtosis kurtmodel(&mZjk, sumZjk, &data_pow2, &V2); kurtmodel(&mZjk, sumZjk, &data_pow4, &V4); for (j=0; j<n; j++) { *(kurt.pr+j) = *(V4.pr+j) - 3*(pow(*(V2.pr+j), 2)); } } else { ckurtmodel(&mZjk, sumZjk, pcadata_re, pcadata_im, &V1, &V1_im); cvvMul(&Xm1, &Xm1_im, &V1, &V1_im, &M1, &M1_im); for (i=0; i<m*n; i++) { cen_data = *(pcadata_re->pr + i) - *(M1.pr + i); cen_data_im = *(pcadata_im->pr + i) - *(M1_im.pr + i); //*(data->pr + i) = cen_data; *(data_pow2.pr+i) = pow(cen_data, 2) - pow(cen_data_im, 2); *(data_pow2_im.pr+i) = 2 * cen_data * cen_data_im; *(data_pow4.pr+i) = pow(*(data_pow2.pr+i), 2) - pow(*(data_pow2_im.pr+i), 2); *(data_pow4_im.pr+i) = 2 * (*(data_pow2.pr+i)) * (*(data_pow2_im.pr+i)); } // calculate kurtosis : kurt(y) = E{y^4}-3(E{y^2})^2 //kurt = sum(Zjk*ones(1,p).*(data_proj.^4))./sum(Zjk)... //- 3*(sum(Zjk*ones(1,p).*(data_proj.^2))./sum(Zjk)).^2; %Not normalized Kurtosis ckurtmodel(&mZjk, sumZjk, &data_pow2, &data_pow2_im, &V2, &V2_im); ckurtmodel(&mZjk, sumZjk, &data_pow4, &data_pow4_im, &V4, &V4_im); for (j=0; j<n; j++) { *(kurt.pr+j) = *(V4.pr+j) - 3*(pow(*(V2.pr+j), 2) - pow(*(V2_im.pr+j), 2)); *(kurt_im.pr+j) = *(V4_im.pr+j) - 3 * 2 * (*(V2.pr+j)) * (*(V2_im.pr+j)); } } // sort kurt value in ascending order and reorder the pca_vec int realeig_num; int *realeig_id; int *compeig_id; vector realkurt; int *real_order; realeig_num = n; for (i=0; i<n; i++) { if (*(pcaval_im->pr+i) != 0) { realeig_num--; } } vnew(&realkurt, realeig_num); realeig_id = new int[realeig_num]; compeig_id = new int[n-realeig_num]; real_order = new int[realeig_num]; for (i=0; i<n; i++) { if (*(pcaval_im->pr+i) == 0) { realeig_id[u] = i; *(realkurt.pr+u) = *(kurt.pr+i); u++; } else { compeig_id[v] = i; v++; } } sort(&realkurt, real_order, 'a'); int *tmp; tmp = new int[realeig_num]; for (i=0; i<realeig_num; i++) { tmp[i] = realeig_id[i]; } for (i=0; i<realeig_num; i++) { realeig_id[i] = tmp[real_order[i]]; } delete []tmp; vector kurt0; vector kurt0_im; vnew(&kurt0, kurt.l); vcopy(&kurt, &kurt0); vnew(&kurt0_im, kurt.l); vcopy(&kurt_im, &kurt0_im); for (i=0; i<realeig_num; i++) { kurt_id[i] = realeig_id[i]; *(kurt.pr+i) = *(realkurt.pr+i); *(kurt_im.pr+i) = 0; } for (i=0; i<n-realeig_num; i++) { kurt_id[i+realeig_num] = compeig_id[i]; *(kurt.pr+i+realeig_num) = *(kurt0.pr + compeig_id[i]); *(kurt_im.pr+i+realeig_num) = *(kurt0_im.pr + compeig_id[i]); } //printf(" the real part of kurt value is : \n"); //vprint(&kurt); //printf(" the imaginary part of kurt value is : \n"); //vprint(&kurt_im); //printf(" the kurt id is : \n"); //for (i=0; i<n; i++) { // printf("%d\t", kurt_id[i]); //} sortcols(kurt_id, pcavec_re, subpcappmvec_re); sortcols(kurt_id, pcavec_im, subpcappmvec_im); vdelete(&X1n); vdelete(&Xm1); mdelete(&mZjk); mdelete(&M1); mdelete(&data_pow2); mdelete(&data_pow4); vdelete(&V1); vdelete(&V2); vdelete(&V4); vdelete(&kurt); vdelete(&kurt0); delete []kurt_id; vdelete(&realkurt); delete []realeig_id; delete []compeig_id; delete []real_order; vdelete(&Xm1_im); mdelete(&M1_im); mdelete(&data_pow2_im); mdelete(&data_pow4_im); vdelete(&V1_im); vdelete(&V2_im); vdelete(&V4_im); vdelete(&kurt_im); vdelete(&kurt0_im); return 1; }
/******************************************************************** real TRSM kernel ********************************************************************/ bool ialglib::_i_rmatrixrighttrsmf(int m, int n, const ap::real_2d_array& a, int i1, int j1, bool isupper, bool isunit, int optype, ap::real_2d_array& x, int i2, int j2) { if( m>alglib_r_block || n>alglib_r_block ) return false; // // local buffers // double *pdiag; int i; double __abuf[alglib_r_block*alglib_r_block+alglib_simd_alignment]; double __xbuf[alglib_r_block*alglib_r_block+alglib_simd_alignment]; double __tmpbuf[alglib_r_block+alglib_simd_alignment]; double * const abuf = (double * const) alglib_align(__abuf, alglib_simd_alignment); double * const xbuf = (double * const) alglib_align(__xbuf, alglib_simd_alignment); double * const tmpbuf = (double * const) alglib_align(__tmpbuf,alglib_simd_alignment); // // Prepare // bool uppera; mcopyblock(n, n, &a(i1,j1), optype, a.getstride(), abuf); mcopyblock(m, n, &x(i2,j2), 0, x.getstride(), xbuf); if( isunit ) for(i=0,pdiag=abuf; i<n; i++,pdiag+=alglib_r_block+1) *pdiag = 1.0; if( optype==0 ) uppera = isupper; else uppera = !isupper; // // Solve Y*A^-1=X where A is upper or lower triangular // if( uppera ) { for(i=0,pdiag=abuf; i<n; i++,pdiag+=alglib_r_block+1) { double beta = 1.0/(*pdiag); double alpha = -beta; vcopy(i, abuf+i, alglib_r_block, tmpbuf, 1); mv(m, i, xbuf, tmpbuf, xbuf+i, alglib_r_block, alpha, beta); } mcopyunblock(m, n, xbuf, 0, &x(i2,j2), x.getstride()); } else { for(i=n-1,pdiag=abuf+(n-1)*alglib_r_block+(n-1); i>=0; i--,pdiag-=alglib_r_block+1) { double beta = 1.0/(*pdiag); double alpha = -beta; vcopy(n-1-i, pdiag+alglib_r_block, alglib_r_block, tmpbuf, 1); mv(m, n-1-i, xbuf+i+1, tmpbuf, xbuf+i, alglib_r_block, alpha, beta); } mcopyunblock(m, n, xbuf, 0, &x(i2,j2), x.getstride()); } return true; }
void __glcore_transform_vertices (GLcontext *g) { GLrenderstate *r = g->renderstate; GL_vertex *verts = r->verts; GL_procvert *procverts = r->procverts; int i; GL_float modelview[4][4]; GL_float projection[4][4]; GL_float texture[4][4]; GL_float composite[4][4]; GL_float invmodelview[4][4]; minit(modelview, g->trans.modelview[g->trans.modelviewdepth]); minit(projection, g->trans.projection[g->trans.projectiondepth]); minit(texture, g->trans.texture[g->trans.texturedepth]); mmult(composite, projection, modelview); minvtrans(invmodelview, modelview); for (i = 0; i < r->nverts; i++) { /* position */ mmultv(procverts[i].position, composite, verts[i].position); /* eye position */ mmultv(procverts[i].eyeposition, modelview, verts[i].position); /* color */ if (g->lighting.lighting) { GL_float objnormal[4]; GL_float normal[4]; /* object space normal */ vcopy(objnormal, verts[i].normal); objnormal[3] = 0.0f; if (verts[i].position[3] != 0.0f) { objnormal[3] = -vdot3(objnormal, verts[i].position); objnormal[3] /= verts[i].position[3]; } /* eye space normal */ mmultv(normal, invmodelview, objnormal); if (g->current.normalize) vnorm3(normal, normal); /* front color */ compute_lighting(g, procverts[i].frontcolor, procverts[i].eyeposition, normal, &verts[i].frontmaterial); /* back color */ if (g->lighting.lightmodeltwoside) { vscale(normal, normal, -1.0f); compute_lighting(g, procverts[i].backcolor, procverts[i].eyeposition, normal, &verts[i].backmaterial); } } else { vcopy(procverts[i].frontcolor, verts[i].color); vcopy(procverts[i].backcolor, verts[i].color); } vclamp(procverts[i].frontcolor, procverts[i].frontcolor, 0.0f, 1.0f); vclamp(procverts[i].backcolor, procverts[i].backcolor, 0.0f, 1.0f); /* no texture coordinate generation */ /* texture coords */ mmultv(procverts[i].texcoord, texture, verts[i].texcoord); } }
// install installs the library, package, or binary associated with dir, // which is relative to $GOROOT/src. static void install(char *dir) { char *name, *p, *elem, *prefix, *exe; bool islib, ispkg, isgo, stale, ispackcmd; Buf b, b1, path; Vec compile, files, link, go, missing, clean, lib, extra; Time ttarg, t; int i, j, k, n, doclean, targ; if(vflag) { if(!streq(goos, gohostos) || !streq(goarch, gohostarch)) errprintf("%s (%s/%s)\n", dir, goos, goarch); else errprintf("%s\n", dir); } binit(&b); binit(&b1); binit(&path); vinit(&compile); vinit(&files); vinit(&link); vinit(&go); vinit(&missing); vinit(&clean); vinit(&lib); vinit(&extra); // path = full path to dir. bpathf(&path, "%s/src/%s", goroot, dir); name = lastelem(dir); // For misc/prof, copy into the tool directory and we're done. if(hasprefix(dir, "misc/")) { copy(bpathf(&b, "%s/%s", tooldir, name), bpathf(&b1, "%s/misc/%s", goroot, name), 1); goto out; } // For release, cmd/prof is not included. if((streq(dir, "cmd/prof")) && !isdir(bstr(&path))) { if(vflag > 1) errprintf("skipping %s - does not exist\n", dir); goto out; } // set up gcc command line on first run. if(gccargs.len == 0) { bprintf(&b, "%s %s", defaultcc, defaultcflags); splitfields(&gccargs, bstr(&b)); for(i=0; i<nelem(proto_gccargs); i++) vadd(&gccargs, proto_gccargs[i]); if(defaultcflags[0] == '\0') { for(i=0; i<nelem(proto_gccargs2); i++) vadd(&gccargs, proto_gccargs2[i]); } if(contains(gccargs.p[0], "clang")) { // disable ASCII art in clang errors, if possible vadd(&gccargs, "-fno-caret-diagnostics"); // clang is too smart about unused command-line arguments vadd(&gccargs, "-Qunused-arguments"); } // disable word wrapping in error messages vadd(&gccargs, "-fmessage-length=0"); if(streq(gohostos, "darwin")) { // golang.org/issue/5261 vadd(&gccargs, "-mmacosx-version-min=10.6"); } } if(ldargs.len == 0 && defaultldflags[0] != '\0') { bprintf(&b, "%s", defaultldflags); splitfields(&ldargs, bstr(&b)); } islib = hasprefix(dir, "lib") || streq(dir, "cmd/cc") || streq(dir, "cmd/gc"); ispkg = hasprefix(dir, "pkg"); isgo = ispkg || streq(dir, "cmd/go") || streq(dir, "cmd/cgo"); exe = ""; if(streq(gohostos, "windows")) exe = ".exe"; // Start final link command line. // Note: code below knows that link.p[targ] is the target. ispackcmd = 0; if(islib) { // C library. vadd(&link, "ar"); if(streq(gohostos, "plan9")) vadd(&link, "rc"); else vadd(&link, "rsc"); prefix = ""; if(!hasprefix(name, "lib")) prefix = "lib"; targ = link.len; vadd(&link, bpathf(&b, "%s/pkg/obj/%s_%s/%s%s.a", goroot, gohostos, gohostarch, prefix, name)); } else if(ispkg) { // Go library (package). ispackcmd = 1; vadd(&link, "pack"); // program name - unused here, but all the other cases record one p = bprintf(&b, "%s/pkg/%s_%s/%s", goroot, goos, goarch, dir+4); *xstrrchr(p, '/') = '\0'; xmkdirall(p); targ = link.len; vadd(&link, bpathf(&b, "%s/pkg/%s_%s/%s.a", goroot, goos, goarch, dir+4)); } else if(streq(dir, "cmd/go") || streq(dir, "cmd/cgo")) { // Go command. vadd(&link, bpathf(&b, "%s/%sl", tooldir, gochar)); vadd(&link, "-o"); elem = name; if(streq(elem, "go")) elem = "go_bootstrap"; targ = link.len; vadd(&link, bpathf(&b, "%s/%s%s", tooldir, elem, exe)); } else { // C command. Use gccargs and ldargs. if(streq(gohostos, "plan9")) { vadd(&link, bprintf(&b, "%sl", gohostchar)); vadd(&link, "-o"); targ = link.len; vadd(&link, bpathf(&b, "%s/%s", tooldir, name)); } else { vcopy(&link, gccargs.p, gccargs.len); vcopy(&link, ldargs.p, ldargs.len); if(sflag) vadd(&link, "-static"); vadd(&link, "-o"); targ = link.len; vadd(&link, bpathf(&b, "%s/%s%s", tooldir, name, exe)); if(streq(gohostarch, "amd64")) vadd(&link, "-m64"); else if(streq(gohostarch, "386")) vadd(&link, "-m32"); } } ttarg = mtime(link.p[targ]); // Gather files that are sources for this target. // Everything in that directory, and any target-specific // additions. xreaddir(&files, bstr(&path)); // Remove files beginning with . or _, // which are likely to be editor temporary files. // This is the same heuristic build.ScanDir uses. // There do exist real C files beginning with _, // so limit that check to just Go files. n = 0; for(i=0; i<files.len; i++) { p = files.p[i]; if(hasprefix(p, ".") || (hasprefix(p, "_") && hassuffix(p, ".go"))) xfree(p); else files.p[n++] = p; } files.len = n; for(i=0; i<nelem(deptab); i++) { if(streq(dir, deptab[i].prefix) || (hassuffix(deptab[i].prefix, "/") && hasprefix(dir, deptab[i].prefix))) { for(j=0; (p=deptab[i].dep[j])!=nil; j++) { breset(&b1); bwritestr(&b1, p); bsubst(&b1, "$GOROOT", goroot); bsubst(&b1, "$GOOS", goos); bsubst(&b1, "$GOARCH", goarch); p = bstr(&b1); if(hassuffix(p, ".a")) { vadd(&lib, bpathf(&b, "%s", p)); continue; } if(hassuffix(p, "/*")) { bpathf(&b, "%s/%s", bstr(&path), p); b.len -= 2; xreaddir(&extra, bstr(&b)); bprintf(&b, "%s", p); b.len -= 2; for(k=0; k<extra.len; k++) vadd(&files, bpathf(&b1, "%s/%s", bstr(&b), extra.p[k])); continue; } if(hasprefix(p, "-")) { p++; n = 0; for(k=0; k<files.len; k++) { if(hasprefix(files.p[k], p)) xfree(files.p[k]); else files.p[n++] = files.p[k]; } files.len = n; continue; } vadd(&files, p); } } } vuniq(&files); // Convert to absolute paths. for(i=0; i<files.len; i++) { if(!isabs(files.p[i])) { bpathf(&b, "%s/%s", bstr(&path), files.p[i]); xfree(files.p[i]); files.p[i] = btake(&b); } } // Is the target up-to-date? stale = rebuildall; n = 0; for(i=0; i<files.len; i++) { p = files.p[i]; for(j=0; j<nelem(depsuffix); j++) if(hassuffix(p, depsuffix[j])) goto ok; xfree(files.p[i]); continue; ok: t = mtime(p); if(t != 0 && !hassuffix(p, ".a") && !shouldbuild(p, dir)) { xfree(files.p[i]); continue; } if(hassuffix(p, ".go")) vadd(&go, p); if(t > ttarg) stale = 1; if(t == 0) { vadd(&missing, p); files.p[n++] = files.p[i]; continue; } files.p[n++] = files.p[i]; } files.len = n; // If there are no files to compile, we're done. if(files.len == 0) goto out; for(i=0; i<lib.len && !stale; i++) if(mtime(lib.p[i]) > ttarg) stale = 1; if(!stale) goto out; // For package runtime, copy some files into the work space. if(streq(dir, "pkg/runtime")) { copy(bpathf(&b, "%s/arch_GOARCH.h", workdir), bpathf(&b1, "%s/arch_%s.h", bstr(&path), goarch), 0); copy(bpathf(&b, "%s/defs_GOOS_GOARCH.h", workdir), bpathf(&b1, "%s/defs_%s_%s.h", bstr(&path), goos, goarch), 0); p = bpathf(&b1, "%s/signal_%s_%s.h", bstr(&path), goos, goarch); if(isfile(p)) copy(bpathf(&b, "%s/signal_GOOS_GOARCH.h", workdir), p, 0); copy(bpathf(&b, "%s/os_GOOS.h", workdir), bpathf(&b1, "%s/os_%s.h", bstr(&path), goos), 0); copy(bpathf(&b, "%s/signals_GOOS.h", workdir), bpathf(&b1, "%s/signals_%s.h", bstr(&path), goos), 0); } // Generate any missing files; regenerate existing ones. for(i=0; i<files.len; i++) { p = files.p[i]; elem = lastelem(p); for(j=0; j<nelem(gentab); j++) { if(gentab[j].gen == nil) continue; if(hasprefix(elem, gentab[j].nameprefix)) { if(vflag > 1) errprintf("generate %s\n", p); gentab[j].gen(bstr(&path), p); // Do not add generated file to clean list. // In pkg/runtime, we want to be able to // build the package with the go tool, // and it assumes these generated files already // exist (it does not know how to build them). // The 'clean' command can remove // the generated files. goto built; } } // Did not rebuild p. if(find(p, missing.p, missing.len) >= 0) fatal("missing file %s", p); built:; } // One more copy for package runtime. // The last batch was required for the generators. // This one is generated. if(streq(dir, "pkg/runtime")) { copy(bpathf(&b, "%s/zasm_GOOS_GOARCH.h", workdir), bpathf(&b1, "%s/zasm_%s_%s.h", bstr(&path), goos, goarch), 0); } // Generate .c files from .goc files. if(streq(dir, "pkg/runtime")) { for(i=0; i<files.len; i++) { p = files.p[i]; if(!hassuffix(p, ".goc")) continue; // b = path/zp but with _goos_goarch.c instead of .goc bprintf(&b, "%s%sz%s", bstr(&path), slash, lastelem(p)); b.len -= 4; bwritef(&b, "_%s_%s.c", goos, goarch); goc2c(p, bstr(&b)); vadd(&files, bstr(&b)); } vuniq(&files); } if((!streq(goos, gohostos) || !streq(goarch, gohostarch)) && isgo) { // We've generated the right files; the go command can do the build. if(vflag > 1) errprintf("skip build for cross-compile %s\n", dir); goto nobuild; } // Compile the files. for(i=0; i<files.len; i++) { if(!hassuffix(files.p[i], ".c") && !hassuffix(files.p[i], ".s")) continue; name = lastelem(files.p[i]); vreset(&compile); if(!isgo) { // C library or tool. if(streq(gohostos, "plan9")) { vadd(&compile, bprintf(&b, "%sc", gohostchar)); vadd(&compile, "-FTVwp"); vadd(&compile, "-DPLAN9"); vadd(&compile, "-D__STDC__=1"); vadd(&compile, "-D__SIZE_TYPE__=ulong"); // for GNU Bison vadd(&compile, bpathf(&b, "-I%s/include/plan9", goroot)); vadd(&compile, bpathf(&b, "-I%s/include/plan9/%s", goroot, gohostarch)); } else { vcopy(&compile, gccargs.p, gccargs.len); vadd(&compile, "-c"); if(streq(gohostarch, "amd64")) vadd(&compile, "-m64"); else if(streq(gohostarch, "386")) vadd(&compile, "-m32"); vadd(&compile, "-I"); vadd(&compile, bpathf(&b, "%s/include", goroot)); } if(streq(dir, "lib9")) vadd(&compile, "-DPLAN9PORT"); vadd(&compile, "-I"); vadd(&compile, bstr(&path)); // lib9/goos.c gets the default constants hard-coded. if(streq(name, "goos.c")) { vadd(&compile, "-D"); vadd(&compile, bprintf(&b, "GOOS=\"%s\"", goos)); vadd(&compile, "-D"); vadd(&compile, bprintf(&b, "GOARCH=\"%s\"", goarch)); bprintf(&b1, "%s", goroot_final); bsubst(&b1, "\\", "\\\\"); // turn into C string vadd(&compile, "-D"); vadd(&compile, bprintf(&b, "GOROOT=\"%s\"", bstr(&b1))); vadd(&compile, "-D"); vadd(&compile, bprintf(&b, "GOVERSION=\"%s\"", goversion)); vadd(&compile, "-D"); vadd(&compile, bprintf(&b, "GOARM=\"%s\"", goarm)); vadd(&compile, "-D"); vadd(&compile, bprintf(&b, "GO386=\"%s\"", go386)); vadd(&compile, "-D"); vadd(&compile, bprintf(&b, "GO_EXTLINK_ENABLED=\"%s\"", goextlinkenabled)); } // gc/lex.c records the GOEXPERIMENT setting used during the build. if(streq(name, "lex.c")) { xgetenv(&b, "GOEXPERIMENT"); vadd(&compile, "-D"); vadd(&compile, bprintf(&b1, "GOEXPERIMENT=\"%s\"", bstr(&b))); } } else { // Supporting files for a Go package. if(hassuffix(files.p[i], ".s")) vadd(&compile, bpathf(&b, "%s/%sa", tooldir, gochar)); else { vadd(&compile, bpathf(&b, "%s/%sc", tooldir, gochar)); vadd(&compile, "-F"); vadd(&compile, "-V"); vadd(&compile, "-w"); } vadd(&compile, "-I"); vadd(&compile, workdir); vadd(&compile, "-I"); vadd(&compile, bprintf(&b, "%s/pkg/%s_%s", goroot, goos, goarch)); vadd(&compile, "-D"); vadd(&compile, bprintf(&b, "GOOS_%s", goos)); vadd(&compile, "-D"); vadd(&compile, bprintf(&b, "GOARCH_%s", goarch)); vadd(&compile, "-D"); vadd(&compile, bprintf(&b, "GOOS_GOARCH_%s_%s", goos, goarch)); } bpathf(&b, "%s/%s", workdir, lastelem(files.p[i])); doclean = 1; if(!isgo && streq(gohostos, "darwin")) { // To debug C programs on OS X, it is not enough to say -ggdb // on the command line. You have to leave the object files // lying around too. Leave them in pkg/obj/, which does not // get removed when this tool exits. bpathf(&b1, "%s/pkg/obj/%s", goroot, dir); xmkdirall(bstr(&b1)); bpathf(&b, "%s/%s", bstr(&b1), lastelem(files.p[i])); doclean = 0; } // Change the last character of the output file (which was c or s). if(streq(gohostos, "plan9")) b.p[b.len-1] = gohostchar[0]; else b.p[b.len-1] = 'o'; vadd(&compile, "-o"); vadd(&compile, bstr(&b)); vadd(&compile, files.p[i]); bgrunv(bstr(&path), CheckExit, &compile); vadd(&link, bstr(&b)); if(doclean) vadd(&clean, bstr(&b)); } bgwait(); if(isgo) { // The last loop was compiling individual files. // Hand the Go files to the compiler en masse. vreset(&compile); vadd(&compile, bpathf(&b, "%s/%sg", tooldir, gochar)); bpathf(&b, "%s/_go_.a", workdir); vadd(&compile, "-pack"); vadd(&compile, "-o"); vadd(&compile, bstr(&b)); vadd(&clean, bstr(&b)); if(!ispackcmd) vadd(&link, bstr(&b)); vadd(&compile, "-p"); if(hasprefix(dir, "pkg/")) vadd(&compile, dir+4); else vadd(&compile, "main"); if(streq(dir, "pkg/runtime")) vadd(&compile, "-+"); vcopy(&compile, go.p, go.len); runv(nil, bstr(&path), CheckExit, &compile); if(ispackcmd) { xremove(link.p[targ]); dopack(link.p[targ], bstr(&b), &link.p[targ+1], link.len - (targ+1)); goto nobuild; } } if(!islib && !isgo) { // C binaries need the libraries explicitly, and -lm. vcopy(&link, lib.p, lib.len); if(!streq(gohostos, "plan9")) vadd(&link, "-lm"); } // Remove target before writing it. xremove(link.p[targ]); runv(nil, nil, CheckExit, &link); nobuild: // In package runtime, we install runtime.h and cgocall.h too, // for use by cgo compilation. if(streq(dir, "pkg/runtime")) { copy(bpathf(&b, "%s/pkg/%s_%s/cgocall.h", goroot, goos, goarch), bpathf(&b1, "%s/src/pkg/runtime/cgocall.h", goroot), 0); copy(bpathf(&b, "%s/pkg/%s_%s/runtime.h", goroot, goos, goarch), bpathf(&b1, "%s/src/pkg/runtime/runtime.h", goroot), 0); } out: for(i=0; i<clean.len; i++) xremove(clean.p[i]); bfree(&b); bfree(&b1); bfree(&path); vfree(&compile); vfree(&files); vfree(&link); vfree(&go); vfree(&missing); vfree(&clean); vfree(&lib); vfree(&extra); }
int main(int argc, char **argv) { struct timeval tstart, tstop; unsigned long int meas[NB_MEASURE]; unsigned long int meas2[NB_MEASURE]; unsigned long int duration = 0; unsigned long int var = 0; unsigned int size = 1; int verbose = 0; int i = 0; int c; int prefillingcache = 0; int coef = 1; int nbopt = 0; int flops = 0; while ((c = getopt(argc, argv, "cf")) != -1) switch (c) { case 'c': prefillingcache = 1; nbopt++; break; case 'f': flops = 1; nbopt++; break; default: abort(); } if (2 <= argc) { int pos = nbopt + 1; if (NULL != strcasestr(argv[pos], "kB")) coef = 1024; else if (NULL != strcasestr(argv[pos], "MB")) coef = 1024 * 1024; else if (NULL != strcasestr(argv[pos], "GB")) coef = 1024 * 1024 * 1024; size *= coef * atoi(argv[pos]); } if (1 == coef) { size *= sizeof(float); } //Setup kernel arguments float *in = (float *) malloc(size); float *out = (float *) malloc(size); memset(out, 0, size); memset(in, 0, size); for (i = 0; i < (size / sizeof(float)); i++) in[i] = i; // Mesure NBR_COPY copy vector //gettimeofday(&tstart, NULL); //for (int i = 0; i < LOOPS; i++) { SNK_INIT_LPARM(lparm, size / sizeof(float)); //Fill Caches if (prefillingcache) { printf("Pre-filling cache option SET\n"); for (i = 0; i < 64; i++) { if(flops) flops_3(in, out, lparm) else vcopy(in, out, lparm); } } for (i = 0; i < NB_MEASURE; i++) { gettimeofday(&tstart, NULL); vcopy(in, out, lparm); gettimeofday(&tstop, NULL); meas[i] = ((tstop.tv_sec - tstart.tv_sec) * 1000000L + tstop.tv_usec) - tstart.tv_usec; //meas2[i] = meas[i] * meas[i]; } for (i = 0; i < NB_MEASURE; i++) { duration += meas[i]; //var += meas2[i]; } duration /= NB_MEASURE; for (i = 0; i < NB_MEASURE; i++) { var += ((meas[i] - duration) * (meas[i] - duration)); } var /= NB_MEASURE; //var -= duration * duration; if(flops){ printf ("HSA: Vector of %lu integer of %d-bytes = %lu Bytes takes %lu usec [+/-var %lu] => Speed = %.3f\n", (size / sizeof(float)), (int) sizeof(float), size, duration, var, (float)(3.0 * size/sizeof(float) / duration * 1000000)); } else{ printf ("HSA: Vector of %lu integer of %d-bytes = %lu Bytes takes %lu usec [+/-var %lu]\n", (size / sizeof(float)), (int) sizeof(float), size, duration, var); } //Validate bool valid = true; int failIndex = 0; for (i = 0; i < (size / sizeof(float)); i++) { if (verbose && i < 10) printf("in[%d]=%d, out[%d]=%d, ", i, in[i], i, out[i]); if (out[i] != in[i]) { failIndex = i; valid = false; break; } } if (valid) { if (verbose) printf("passed validation\n"); } else printf("VALIDATION FAILED!\nBad index: %d\n", failIndex); free(in); free(out); return 0; }
int main(int argc, char* argv[]) { double t1, t2, t3, t4, t5; double sum1, sum2, sum3, sum4; int arg = 1, len = 0, iters = 0, verb = 0, run = 1; int do_vcopy = 1, do_vadd = 1, do_vjacobi = 1; while(argc>arg) { if (strcmp(argv[arg],"-v")==0) verb++; else if (strcmp(argv[arg],"-vv")==0) verb+=2; else if (strcmp(argv[arg],"-n")==0) run = 0; else if (strcmp(argv[arg],"-c")==0) do_vadd = 0, do_vjacobi = 0; else if (strcmp(argv[arg],"-a")==0) do_vcopy = 0, do_vjacobi = 0; else if (strcmp(argv[arg],"-j")==0) do_vcopy = 0, do_vadd = 0; else break; arg++; } if (argc>arg) { len = atoi(argv[arg]); arg++; } if (argc>arg) { iters = atoi(argv[arg]); arg++; } if (len == 0) len = 10000; if (iters == 0) iters = 20; len = len * 1000; printf("Alloc/init 3 double arrays of length %d ...\n", len); double* a = (double*) malloc(len * sizeof(double)); double* b = (double*) malloc(len * sizeof(double)); double* c = (double*) malloc(len * sizeof(double)); for(int i = 0; i<len; i++) { a[i] = 1.0; b[i] = (double) (i % 20); c[i] = 3.0; } // Generate vectorized variants & run against naive/original #if __AVX__ bool do32 = true; #else bool do32 = false; #endif // vcopy if (do_vcopy) { vcopy_t vcopy16, vcopy32; Rewriter* rc16 = dbrew_new(); if (verb>1) dbrew_verbose(rc16, true, true, true); dbrew_set_function(rc16, (uint64_t) vcopy); dbrew_config_parcount(rc16, 3); dbrew_config_force_unknown(rc16, 0); dbrew_set_vectorsize(rc16, 16); vcopy16 = (vcopy_t) dbrew_rewrite(rc16, a, b, len); if (verb) decode_func(rc16, "vcopy16"); if (do32) { Rewriter* rc32 = dbrew_new(); if (verb>1) dbrew_verbose(rc32, true, true, true); dbrew_set_function(rc32, (uint64_t) vcopy); dbrew_config_parcount(rc32, 3); dbrew_config_force_unknown(rc32, 0); dbrew_set_vectorsize(rc32, 32); vcopy32 = (vcopy_t) dbrew_rewrite(rc32, a, b, len); if (verb) decode_func(rc32, "vcopy32"); } printf("Running %d iterations of vcopy ...\n", iters); t1 = wtime(); for(int iter = 0; iter < iters; iter++) naive_vcopy(a, b, len); t2 = wtime(); for(int iter = 0; iter < iters; iter++) vcopy(a, b, len); t3 = wtime(); if (run) for(int iter = 0; iter < iters; iter++) vcopy16(a, b, len); t4 = wtime(); if (do32 && run) for(int iter = 0; iter < iters; iter++) vcopy32(a, b, len); t5 = wtime(); printf(" naive: %.3f s, un-rewritten: %.3f s, rewritten-16: %.3f s", t2-t1, t3-t2, t4-t3); if (do32) printf(", rewritten-32: %.3f s", t5-t4); printf("\n"); } // vadd if (do_vadd) { vadd_t vadd16, vadd32; Rewriter* ra16 = dbrew_new(); if (verb>1) dbrew_verbose(ra16, true, true, true); dbrew_set_function(ra16, (uint64_t) vadd); dbrew_config_parcount(ra16, 4); dbrew_config_force_unknown(ra16, 0); dbrew_set_vectorsize(ra16, 16); vadd16 = (vadd_t) dbrew_rewrite(ra16, a, b, c, len); if (verb) decode_func(ra16, "vadd16"); if (do32) { Rewriter* ra32 = dbrew_new(); if (verb>1) dbrew_verbose(ra32, true, true, true); dbrew_set_function(ra32, (uint64_t) vadd); dbrew_config_parcount(ra32, 4); dbrew_config_force_unknown(ra32, 0); dbrew_set_vectorsize(ra32, 32); vadd32 = (vadd_t) dbrew_rewrite(ra32, a, b, c, len); if (verb) decode_func(ra32, "vadd32"); } sum1 = 0.0, sum2 = 0.0, sum3 = 0.0, sum4 = 0.0; printf("Running %d iterations of vadd ...\n", iters); t1 = wtime(); for(int iter = 0; iter < iters; iter++) naive_vadd(a, b, c, len); for(int i = 0; i < len; i++) sum1 += a[i]; t2 = wtime(); for(int iter = 0; iter < iters; iter++) vadd(a, b, c, len); for(int i = 0; i < len; i++) sum2 += a[i]; t3 = wtime(); if (run) for(int iter = 0; iter < iters; iter++) vadd16(a, b, c, len); for(int i = 0; i < len; i++) sum3 += a[i]; t4 = wtime(); if (do32 && run) for(int iter = 0; iter < iters; iter++) vadd32(a, b, c, len); for(int i = 0; i < len; i++) sum4 += a[i]; t5 = wtime(); printf(" naive: %.3f s, un-rewritten: %.3f s, rewritten-16: %.3f s", t2-t1, t3-t2, t4-t3); if (do32) printf(", rewritten-32: %.3f s", t5-t4); printf("\n"); printf(" sum naive: %f, sum rewritten-16: %f, sum rewritten-16: %f\n", sum1, sum3, sum4); } // vjacobi_1d if (do_vjacobi) { vcopy_t vjacobi_1d16, vjacobi_1d32; Rewriter* rj16 = dbrew_new(); if (verb>1) dbrew_verbose(rj16, true, true, true); dbrew_set_function(rj16, (uint64_t) vjacobi_1d); dbrew_config_parcount(rj16, 3); dbrew_config_force_unknown(rj16, 0); dbrew_set_vectorsize(rj16, 16); vjacobi_1d16 = (vcopy_t) dbrew_rewrite(rj16, a, b, len); if (verb) decode_func(rj16, "vjacobi_1d16"); if (do32) { Rewriter* rj32 = dbrew_new(); if (verb>1) dbrew_verbose(rj32, true, true, true); dbrew_set_function(rj32, (uint64_t) vjacobi_1d); dbrew_config_parcount(rj32, 3); dbrew_config_force_unknown(rj32, 0); dbrew_set_vectorsize(rj32, 32); vjacobi_1d32 = (vcopy_t) dbrew_rewrite(rj32, a, b, len); if (verb) decode_func(rj32, "vjacobi_1d32"); } sum1 = 0.0, sum2 = 0.0, sum3 = 0.0, sum4 = 0.0; printf("Running %d iterations of vjacobi_1d ...\n", iters); t1 = wtime(); for(int iter = 0; iter < iters; iter++) naive_vjacobi_1d(a+1, b+1, len-2); for(int i = 0; i < len; i++) sum1 += a[i]; t2 = wtime(); for(int iter = 0; iter < iters; iter++) vjacobi_1d(a+1, b+1, len-2); for(int i = 0; i < len; i++) sum2 += a[i]; t3 = wtime(); if (run) for(int iter = 0; iter < iters; iter++) vjacobi_1d16(a+1, b+1, len-2); for(int i = 0; i < len; i++) sum3 += a[i]; t4 = wtime(); if (do32 && run) for(int iter = 0; iter < iters; iter++) vjacobi_1d32(a+1, b+1, len-2); for(int i = 0; i < len; i++) sum4 += a[i]; t5 = wtime(); printf(" naive: %.3f s, un-rewritten: %.3f s, rewritten-16: %.3f s", t2-t1, t3-t2, t4-t3); if (do32) printf(", rewritten-32: %.3f s", t5-t4); printf("\n"); printf(" sum naive: %f, sum rewritten-16: %f, sum rewritten-16: %f\n", sum1, sum3, sum4); } }
/* Routines that handle the eigensolver. * * Linear stability analysis * Solve J z = t M z * * where * * input: * J = jacobian matrix * M = mass or overlap matrix * * output: * z = eigenvectors * t = eigenvalues * * Friendly warning: * Do not edit this unless you know what you are doing!! * * Originally written by Ian Gates * pre-CVS modification history: * - Sep 24, 1997, first checkin * - Feb 98 -> Oct 98, another checkin * - Jan 13, 2000, MMH rearranged and conformed to Goma style. */ void eggrollwrap(int *istuff, /* info for eigenvalue extraction */ dbl *dstuff, /* info for eigenvalue extraction */ int *ija, /* column pointer array */ dbl *jac, /* nonzero array */ dbl *mas, /* nonzero array - same structure as jac[] (ija[]) */ dbl *x, /* Value of the solution vector */ char *ExoFileOut, /* Name of exoII output file */ int prob_type, dbl delta_t, /* time step size */ dbl theta, /* variable time integration parameter explicit (theta = 1) to implicit (theta = 0) */ dbl *x_old, /* Value of the old solution vector */ dbl *xdot, /* Value of xdot predicted for new solution */ dbl *xdot_old, /* dx/dt at previous time step */ dbl *resid_vector, int *converged, /* whether the Newton has converged */ int *nprint, /* counter for time step number */ int tnv, /* number of nodal results */ int tnv_post, /* number of post processing results */ struct Results_Description *rd, int *gindex, int *p_gsize, dbl *gvec, dbl time_value, Exo_DB *exo, /* ptr to finite element mesh db */ int Num_Proc, /* number of processors used */ Dpi *dpi) /* ptr to distributed processing info */ { int i, j, ic, nj, nnz_j, first_linear_solver_call, Factor_Flag, matr_form, error, rcflag, action, ev_n, ev_jac, filter, mm, max_itr, nev_want, nev_found, lead, /* read_form, soln_tech, push_mode, */ push_mode, init_shft, recycle; dbl stol, ivector, dwork[20]; dbl *ev_e, *ev_i, *ev_r, *ev_x, *v1, *v2, *mat, **evect, **schur; char save_ExoFileOut[MAX_FNL]; static int UMF_system_id; /* Used to uniquely identify the * explicit fill system to solve from * the other UMF systems. */ /* Initialize */ ic = error = rcflag = action = 0; ev_jac = 0; matr_form = 1; /* Set values */ mm = istuff[0]; nj = istuff[1]; nnz_j = istuff[2]; filter = istuff[3]; recycle = istuff[4]; nev_want = istuff[6]; init_shft = istuff[7]; max_itr = istuff[8]; push_mode = istuff[9]; stol = dstuff[0]; ivector = dstuff[3]; printf(" Initializing variables and allocating space... "); /* Allocate spectrum storage */ ev_e = Dvector_birth(mm+5); ev_i = Dvector_birth(mm+5); ev_r = Dvector_birth(mm+5); ev_x = Dvector_birth(mm+5); /* Set initial (real) shifts */ ev_n = init_shft; vcopy(init_shft, &ev_r[0], 1.0, &dstuff[10]); /* Allocate auxiliary work vectors */ mat = Dvector_birth(nnz_j+5); /* Allocate eigenvectors and schur storage */ i = nj+5; j = mm+5; evect = Dmatrix_birth(j, i); schur = Dmatrix_birth(j, i); /* Allocate reverse communication vectors */ v1 = Dvector_birth(nj+5); v2 = Dvector_birth(nj+5); /* Check for something that seems to make no difference if it's on, * except for occasionally causing seg faults... */ if(recycle != 0) EH(-1, "Eigen recycle currently doesn't work, turn it off."); /* Set initial vector */ vinit(nj, &v1[0], 0.5); /* GEVP solution */ ic = 0; first_linear_solver_call = +1; do { ic++; /* printf("ic = %d\n", ic); fflush(stdout); */ gevp_solver_rc(nj, mm, max_itr, stol, filter, &ev_n, &ev_r[0], &ev_i[0], &ev_e[0], &ev_x[0], &lead, ev_jac, nev_want, &nev_found, schur, evect, recycle, ivector, 0, &rcflag, &action, &dwork[0], &v1[0], &v2[0]); /* printf("action = %d\n", action); fflush(stdout); */ switch (action) { case 0: /* All done */ break; case 1: /* v2 = J*v1 */ MV_MSR(&nj, &ija[0], &jac[0], &v1[0], &v2[0]); break; case 2: /* v2 = M*v1 */ MV_MSR(&nj, &ija[0], &mas[0], &v1[0], &v2[0]); break; case 3: /* inv(J-sM) */ /* Shift matrix step */ v2sum(nnz_j, &mat[0], 1.0, &jac[0], -dwork[0], &mas[0]); /* Invert step - get LU for later */ if(first_linear_solver_call == 1) { Factor_Flag = -2; UMF_system_id = -1; } else Factor_Flag = -1; /* printf("Calling SL_UMF, first_linear_solver_call = %d, Factor_Flag = %d\n", first_linear_solver_call, Factor_Flag); fflush(stdout); */ UMF_system_id = SL_UMF(UMF_system_id, &first_linear_solver_call, &Factor_Flag, &matr_form, &nj, &nnz_j, &ija[0], &ija[0], &mat[0], &v1[0], &v2[0]); first_linear_solver_call = 0; break; case 4: /* v2 = inv(J-sM)*M*v1 */ Factor_Flag = 3; if(first_linear_solver_call) EH(-1, "Tried to transform eigenvectors before a solve!"); gevp_transformation(UMF_system_id, first_linear_solver_call, Factor_Flag, matr_form, 1, nj, nnz_j, &ija[0], &jac[0], &mas[0], &mat[0], /* soln_tech, */ &v2[0], &v1[0], dwork[0], dwork[1]); break; default: EH(-1, "Uh-oh! I shouldn't be here!"); break; } /* switch(action) */ if (ic > 10000) error = 1; } while ((rcflag != 0) && (error == 0)); /* Error check */ if (error == 1) { puts(" E: Too many iterations. Escape. "); exit(-1); } /* De-allocate solver storage */ first_linear_solver_call = -1; /* MMH sez: If first_linear_solver_call == -1, then we want to * deallocate memory so we shouldn't be trying to solve anything! * This was FMR'ing b/c SL_UMF was being called with * first_linear_solver_call = -1, and Factor_Flag = 3. Bad. */ Factor_Flag = -3; UMF_system_id = SL_UMF(UMF_system_id, &first_linear_solver_call, &Factor_Flag, &matr_form, &nj, &nnz_j, ija, ija, mat, &v1[0], &v2[0]); /* Display results */ printf("\n-------------------------------------------------------------------------------\n"); if(Linear_Stability == LSA_3D_OF_2D) printf("NORMAL MODE WAVE NUMBER = %g\n", LSA_3D_of_2D_wave_number); printf(" Eigensolver required %d iterations.\n",ic); printf(" Found %d converged eigenvalues.\n", nev_found); printf(" Leading Eigenvalue = % 10.6e%+10.6e i RES = % 10.6e\n", ev_r[lead], ev_i[lead], ev_e[lead]); printf(" Real Imag RES\n"); for (i=0;i<nev_found;i++) printf(" % 10.6e %+10.6e i % 10.6e\n", ev_r[i], ev_i[i], ev_e[i]); /* MMH: I know this is stupid, but the filename for the "regular" * Exodus output is a global variable!!! It is required in * post_process_nodal(). I swap it out here, and will swap it back * when we're done with LSA. Why don't I just overwrite it * completely you may ask? Well, I don't know if and/or when the * code will continue to do something useful after LSA. If it ever * does, then it would probably like to know what the correct output * filename is. Kinda like camping: Leave with what you came in * with. */ strncpy(save_ExoFileOut, ExoFileOut, MAX_FNL-1); /* Write results to file (exoII format) */ printf(" push_mode = %12d \n", push_mode); if (push_mode > 0) { puts(" Writing modes to file ..."); /* Write to exo file * Each mode is written as a "time step" solution into exoII DB */ for(i = 0; i < push_mode; i++) { printf("\t\t Mode %4d ...", i); if(LSA_3D_of_2D_wave_number == -1.0) sprintf(ExoFileOut, "LSA_%d_of_%d_%s", i + 1, push_mode, save_ExoFileOut); else sprintf(ExoFileOut, "LSA_%d_of_%d_wn=%g_%s", i + 1, push_mode, LSA_3D_of_2D_wave_number, save_ExoFileOut); /* Replicate basic mesh info */ one_base(exo); wr_mesh_exo(exo, ExoFileOut, 0); wr_result_prelim_exo(rd, exo, ExoFileOut, NULL); /* Update exo file for distributed problem info */ if (Num_Proc > 1) { wr_dpi(dpi, ExoFileOut, 0); } for (j = 0; j < tnv; j++) { extract_nodal_vec(&evect[i][0], rd->nvtype[j], rd->nvkind[j], rd->nvmatID[j], gvec, exo, FALSE, time_value); wr_nodal_result_exo(exo, ExoFileOut, gvec, j+1, 1, time_value); } /* * Add additional user-specified post processing variables */ if (tnv_post > 0) { post_process_nodal(&evect[i][0], NULL, x_old, xdot, xdot_old, resid_vector, 1, &time_value, delta_t, 0.0, NULL, exo, dpi, rd, ExoFileOut); } zero_base(exo); printf(" recorded.\n"); } } /* MMH: See comments above. */ strncpy(ExoFileOut, save_ExoFileOut, MAX_FNL); /* De-allocate work vectors */ printf("Deallocating memory ... "); i = nj+5; j = mm+5; Dmatrix_death(schur, j, i); Dmatrix_death(evect, j, i); Dvector_death(&v2[0], nj+5); Dvector_death(&v1[0], nj+5); Dvector_death(&mat[0], nnz_j+5); Dvector_death(&ev_e[0], mm+5); Dvector_death(&ev_i[0], mm+5); Dvector_death(&ev_r[0], mm+5); Dvector_death(&ev_x[0], mm+5); printf("done.\n"); }