static GLboolean seelight(float p[3], float dir[3]) { float c[3], b, a, d, t, dist[3]; vsub(c, p, objpos); b = -dprod(c, dir); a = dprod(c, c) - SPHERE_RADIUS * SPHERE_RADIUS; if ((d = b * b - a) < 0.0 || (b < 0.0 && a > 0.0)) return GL_FALSE; d = sqrt(d); t = b - d; if (t < EPSILON) { t = b + d; if (t < EPSILON) return GL_FALSE; } vsub(dist, lightpos, p); if (dprod(dist, dist) < t * t) return GL_FALSE; return GL_TRUE; }
friend Real NormFrob(const LowRankMatrixSVD& m){ const vector<vectCplx>& u = m.u; const vector<vectCplx>& v = m.v; const int& rank = m.rank; Cplx frob = 0.; for(int j=0; j<rank; j++){ for(int k=0; k<rank; k++){ frob += dprod(v[k],v[j])*dprod(u[k],u[j]) ; } } return sqrt(abs(frob)); }
/* * This function returns the solution of Ax = b where A is posititive definite, * based on the conjugate gradients method; see "An intro to the CG method" by J.R. Shewchuk, p. 50-51 * * A is mxm, b, x are is mx1. Argument niter specifies the maximum number of * iterations and eps is the desired solution accuracy. niter<0 signals that * x contains a valid initial approximation to the solution; if niter>0 then * the starting point is taken to be zero. Argument prec selects the desired * preconditioning method as follows: * 0: no preconditioning * 1: jacobi (diagonal) preconditioning * 2: SSOR preconditioning * Argument iscolmaj specifies whether A is stored in column or row major order. * * The function returns 0 in case of error, * the number of iterations performed if successfull * * This function is often called repetitively to solve problems of identical * dimensions. To avoid repetitive malloc's and free's, allocated memory is * retained between calls and free'd-malloc'ed when not of the appropriate size. * A call with NULL as the first argument forces this memory to be released. */ int sba_Axb_CG(double *A, double *B, double *x, int m, int niter, double eps, int prec, int iscolmaj) { static double *buf=NULL; static int buf_sz=0; register int i, j; register double *aim; int iter, a_sz, res_sz, d_sz, q_sz, s_sz, wk_sz, z_sz, tot_sz; double *a, *res, *d, *q, *s, *wk, *z; double delta0, deltaold, deltanew, alpha, beta, eps_sq=eps*eps; register double sum; int rec_res; if(A==NULL){ if(buf) free(buf); buf=NULL; buf_sz=0; return 1; } /* calculate required memory size */ a_sz=(iscolmaj)? m*m : 0; res_sz=m; d_sz=m; q_sz=m; if(prec!=SBA_CG_NOPREC){ s_sz=m; wk_sz=m; z_sz=(prec==SBA_CG_SSOR)? m : 0; } else s_sz=wk_sz=z_sz=0; tot_sz=a_sz+res_sz+d_sz+q_sz+s_sz+wk_sz+z_sz; if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(double *)malloc(buf_sz*sizeof(double)); if(!buf){ fprintf(stderr, "memory allocation request failed in sba_Axb_CG()\n"); exit(1); } } if(iscolmaj){ a=buf; /* store A (row major!) into a */ for(i=0; i<m; ++i) for(j=0, aim=a+i*m; j<m; ++j) aim[j]=A[i+j*m]; } else a=A; /* no copying required */ res=buf+a_sz; d=res+res_sz; q=d+d_sz; if(prec!=SBA_CG_NOPREC){ s=q+q_sz; wk=s+s_sz; z=(prec==SBA_CG_SSOR)? wk+wk_sz : NULL; for(i=0; i<m; ++i){ // compute jacobi (i.e. diagonal) preconditioners and save them in wk sum=a[i*m+i]; if(sum>DBL_EPSILON || -sum<-DBL_EPSILON) // != 0.0 wk[i]=1.0/sum; else wk[i]=1.0/DBL_EPSILON; } } else{ s=res; wk=z=NULL; } if(niter>0){ for(i=0; i<m; ++i){ // clear solution and initialize residual vector: res <-- B x[i]=0.0; res[i]=B[i]; } } else{ niter=-niter; for(i=0; i<m; ++i){ // initialize residual vector: res <-- B - A*x for(j=0, aim=a+i*m, sum=0.0; j<m; ++j) sum+=aim[j]*x[j]; res[i]=B[i]-sum; } } switch(prec){ case SBA_CG_NOPREC: for(i=0, deltanew=0.0; i<m; ++i){ d[i]=res[i]; deltanew+=res[i]*res[i]; } break; case SBA_CG_JACOBI: // jacobi preconditioning for(i=0, deltanew=0.0; i<m; ++i){ d[i]=res[i]*wk[i]; deltanew+=res[i]*d[i]; } break; case SBA_CG_SSOR: // SSOR preconditioning; see the "templates" book, fig. 3.2, p. 44 for(i=0; i<m; ++i){ for(j=0, sum=0.0, aim=a+i*m; j<i; ++j) sum+=aim[j]*z[j]; z[i]=wk[i]*(res[i]-sum); } for(i=m-1; i>=0; --i){ for(j=i+1, sum=0.0, aim=a+i*m; j<m; ++j) sum+=aim[j]*d[j]; d[i]=z[i]-wk[i]*sum; } deltanew=dprod(m, res, d); break; default: fprintf(stderr, "unknown preconditioning option %d in sba_Axb_CG\n", prec); exit(1); } delta0=deltanew; for(iter=1; deltanew>eps_sq*delta0 && iter<=niter; ++iter){ for(i=0; i<m; ++i){ // q <-- A d aim=a+i*m; /*** for(j=0, sum=0.0; j<m; ++j) sum+=aim[j]*d[j]; ***/ q[i]=dprod(m, aim, d); //sum; } /*** for(i=0, sum=0.0; i<m; ++i) sum+=d[i]*q[i]; ***/ alpha=deltanew/dprod(m, d, q); // deltanew/sum; /*** for(i=0; i<m; ++i) x[i]+=alpha*d[i]; ***/ daxpy(m, x, x, alpha, d); if(!(iter%50)){ for(i=0; i<m; ++i){ // accurate computation of the residual vector aim=a+i*m; /*** for(j=0, sum=0.0; j<m; ++j) sum+=aim[j]*x[j]; ***/ res[i]=B[i]-dprod(m, aim, x); //B[i]-sum; } rec_res=0; } else{ /*** for(i=0; i<m; ++i) // approximate computation of the residual vector res[i]-=alpha*q[i]; ***/ daxpy(m, res, res, -alpha, q); rec_res=1; } if(prec){ switch(prec){ case SBA_CG_JACOBI: // jacobi for(i=0; i<m; ++i) s[i]=res[i]*wk[i]; break; case SBA_CG_SSOR: // SSOR for(i=0; i<m; ++i){ for(j=0, sum=0.0, aim=a+i*m; j<i; ++j) sum+=aim[j]*z[j]; z[i]=wk[i]*(res[i]-sum); } for(i=m-1; i>=0; --i){ for(j=i+1, sum=0.0, aim=a+i*m; j<m; ++j) sum+=aim[j]*s[j]; s[i]=z[i]-wk[i]*sum; } break; } } deltaold=deltanew; /*** for(i=0, sum=0.0; i<m; ++i) sum+=res[i]*s[i]; ***/ deltanew=dprod(m, res, s); //sum; /* make sure that we get around small delta that are due to * accumulated floating point roundoff errors */ if(rec_res && deltanew<=eps_sq*delta0){ /* analytically recompute delta */ for(i=0; i<m; ++i){ for(j=0, aim=a+i*m, sum=0.0; j<m; ++j) sum+=aim[j]*x[j]; res[i]=B[i]-sum; } deltanew=dprod(m, res, s); } beta=deltanew/deltaold; /*** for(i=0; i<m; ++i) d[i]=s[i]+beta*d[i]; ***/ daxpy(m, d, s, beta, d); } return iter; }
//=========================// // PARTIAL PIVOT ACA // //=========================// // If reqrank=-1 (default value), we use the precision given by epsilon for the stopping criterion; // otherwise, we use the required rank for the stopping criterion (!: at the end the rank could be lower) LowRankMatrix(const SubMatrix& A, const vectInt& ir0, const vectInt& ic0, const Cluster& t, const Cluster& s, int reqrank=-1){ nr = nb_rows(A); nc = nb_cols(A); ir=ir0; ic=ic0; vector<bool> visited_row(nr,false); vector<bool> visited_col(nc,false); Real frob = 0.; Real aux = 0.; Cplx frob_aux=0; //// Choice of the first row (see paragraph 3.4.3 page 151 Bebendorf) Real dist=1e30; int I=0; for (int i =0;i<int(nr/ndofperelt);i++){ Real aux_dist= norm(pts_(t)[tab_(t)[num_(t)[i*ndofperelt]]]-ctr_(t)); if (dist>aux_dist){ dist=aux_dist; I=i*ndofperelt; } } int J=0; int q = 0; if(reqrank == 0){ rank = 0; // approximate with a zero matrix } else if ( (nr+nc)>=(nr*nc) ){ // even rank 1 is not advantageous rank=-5; // just a flag for BuildBlockTree (the block won't be treated as a FarField block) } else{ vectCplx r(nc),c(nr); // Compute the first cross //==================// // Recherche colonne Real rmax = 0.; for(int k=0; k<nc; k++){ r[k] = A(I,k); for(int j=0; j<u.size(); j++){ r[k] += -u[j][I]*v[j][k];} if( abs(r[k])>rmax && !visited_col[k] ){ J=k; rmax=abs(r[k]);} } visited_row[I] = true; //==================// // Recherche ligne if( abs(r[J]) > 1e-15 ){ Cplx gamma = Cplx(1.)/r[J]; Real cmax = 0.; for(int j=0; j<nr; j++){ c[j] = A(j,J); for(int k=0; k<u.size(); k++){ c[j] += -u[k][j]*v[k][J];} c[j] = gamma*c[j]; if( abs(c[j])>cmax && !visited_row[j] ){ I=j; cmax=abs(c[j]);} } visited_col[J] = true; // We accept the cross q++; //====================// // Estimateur d'erreur frob_aux = 0.; aux = abs(dprod(c,c)*dprod(r,r)); // aux: terme quadratiques du developpement du carre' de la norme de Frobenius de la matrice low rank for(int j=0; j<u.size(); j++){ frob_aux += dprod(r,v[j])*dprod(c,u[j]);} // frob_aux: termes croises du developpement du carre' de la norme de Frobenius de la matrice low rank frob += aux + 2*frob_aux.real(); // frob: Frobenius norm of the low rank matrix //==================// // Nouvelle croix u.push_back(c); v.push_back(r); } else{cout << "There is a zero row in the starting submatrix and ACA didn't work" << endl;} // Stopping criterion of slide 26 of Stephanie Chaillat and Rjasanow-Steinbach // (if epsilon>=1, it always stops to rank 1 since frob=aux) while ( ((reqrank > 0) && (q < reqrank) ) || ( (reqrank < 0) && ( sqrt(aux/frob)>epsilon ) ) ) { if (q >= min(nr,nc) ) break; if ( (q+1)*(nr +nc) > (nr*nc) ){ // one rank more is not advantageous if (reqrank <0){ // If we didn't required a rank, i.e. we required a precision with epsilon rank=-5; // a flag for BuildBlockTree to say that the block won't be treated as a FarField block } break; // If we required a rank, we keep the computed ACA approximation (of lower rank) } // Compute another cross //==================// // Recherche colonne rmax = 0.; for(int k=0; k<nc; k++){ r[k] = A(I,k); for(int j=0; j<u.size(); j++){ r[k] += -u[j][I]*v[j][k];} if( abs(r[k])>rmax && !visited_col[k] ){ J=k; rmax=abs(r[k]);} } visited_row[I] = true; //==================// // Recherche ligne if( abs(r[J]) > 1e-15 ){ Cplx gamma = Cplx(1.)/r[J]; Real cmax = 0.; for(int j=0; j<nr; j++){ c[j] = A(j,J); for(int k=0; k<u.size(); k++){ c[j] += -u[k][j]*v[k][J];} c[j] = gamma*c[j]; if( abs(c[j])>cmax && !visited_row[j] ){ I=j; cmax=abs(c[j]);} } visited_col[J] = true; aux = abs(dprod(c,c)*dprod(r,r)); // aux: terme quadratiques du developpement du carre' de la norme de Frobenius de la matrice low rank } else{ cout << "ACA's loop terminated" << endl; break; } // terminate algorithm with exact rank q (not full-rank submatrix) // We accept the cross q++; //====================// // Estimateur d'erreur frob_aux = 0.; for(int j=0; j<u.size(); j++){ frob_aux += dprod(r,v[j])*dprod(c,u[j]);} // frob_aux: termes croises du developpement du carre' de la norme de Frobenius de la matrice low rank frob += aux + 2*frob_aux.real(); // frob: Frobenius norm of the low rank matrix //==================// // Nouvelle croix u.push_back(c); v.push_back(r); } rank = u.size(); } // Use this for Bebendorf stopping criterion (3.58) pag 141 (not very flexible): // if(reqrank == 0) // rank = 0; // approximate with a zero matrix // else if ( (nr+nc)>=(nr*nc) ){ // even rank 1 is not advantageous // rank=-5; // just a flag for BuildBlockTree (the block won't be treated as a FarField block) // } else{ // vectCplx r(nc),c(nr); // // // Compute the first cross // // (don't modify the code because we want to really use the Bebendorf stopping criterion (3.58), // // i.e. we don't want to accept the new cross if it is not satisfied because otherwise the approximation would be more precise than desired) // //==================// // // Recherche colonne // Real rmax = 0.; // for(int k=0; k<nc; k++){ // r[k] = A(I,k); // for(int j=0; j<u.size(); j++){ // r[k] += -u[j][I]*v[j][k];} // if( abs(r[k])>rmax && !visited_col[k] ){ // J=k; rmax=abs(r[k]);} // } // visited_row[I] = true; // //==================// // // Recherche ligne // if( abs(r[J]) > 1e-15 ){ // Cplx gamma = Cplx(1.)/r[J]; // Real cmax = 0.; // for(int j=0; j<nr; j++){ // c[j] = A(j,J); // for(int k=0; k<u.size(); k++){ // c[j] += -u[k][j]*v[k][J];} // c[j] = gamma*c[j]; // if( abs(c[j])>cmax && !visited_row[j] ){ // I=j; cmax=abs(c[j]);} // } // visited_col[J] = true; // // aux = abs(dprod(c,c)*dprod(r,r)); // } // else{cout << "There is a zero row in the starting submatrix and ACA didn't work" << endl;} // // // (see Bebendorf stopping criterion (3.58) pag 141) // while ( (q == 0) || // ( (reqrank > 0) && (q < reqrank) ) || // ( (reqrank < 0) && ( sqrt(aux/frob)>Parametres.epsilon * (1 - Parametres.eta)/(1 + Parametres.epsilon) ) ) ) { // // // We accept the cross // q++; // //====================// // // Estimateur d'erreur // frob_aux = 0.; // //aux = abs(dprod(c,c)*dprod(r,r)); // (already computed to evaluate the test) // // aux: terme quadratiques du developpement du carre' de la norme de Frobenius de la matrice low rank // for(int j=0; j<u.size(); j++){ // frob_aux += dprod(r,v[j])*dprod(c,u[j]);} // // frob_aux: termes croises du developpement du carre' de la norme de Frobenius de la matrice low rank // frob += aux + 2*frob_aux.real(); // frob: Frobenius norm of the low rank matrix // //==================// // // Nouvelle croix // u.push_back(c); // v.push_back(r); // // if (q >= min(nr,nc) ) // break; // if ( (q+1)*(nr +nc) > (nr*nc) ){ // one rank more is not advantageous // if (reqrank <0){ // If we didn't required a rank, i.e. we required a precision with epsilon // rank=-5; // a flag for BuildBlockTree to say that the block won't be treated as a FarField block // } // break; // If we required a rank, we keep the computed ACA approximation (of lower rank) // } // // Compute another cross // //==================// // // Recherche colonne // rmax = 0.; // for(int k=0; k<nc; k++){ // r[k] = A(I,k); // for(int j=0; j<u.size(); j++){ // r[k] += -u[j][I]*v[j][k];} // if( abs(r[k])>rmax && !visited_col[k] ){ // J=k; rmax=abs(r[k]);} // } // visited_row[I] = true; // //==================// // // Recherche ligne // if( abs(r[J]) > 1e-15 ){ // Cplx gamma = Cplx(1.)/r[J]; // Real cmax = 0.; // for(int j=0; j<nr; j++){ // c[j] = A(j,J); // for(int k=0; k<u.size(); k++){ // c[j] += -u[k][j]*v[k][J];} // c[j] = gamma*c[j]; // if( abs(c[j])>cmax && !visited_row[j] ){ // I=j; cmax=abs(c[j]);} // } // visited_col[J] = true; // // aux = abs(dprod(c,c)*dprod(r,r)); // } // else{ cout << "ACA's loop terminated" << endl; break; } // terminate algorithm with exact rank q (not full-rank submatrix) // } // // rank = u.size(); // } }
T dprod(Vector<T> &x1, Vector<T> &x2) { T y; dprod(x1, x2, y); return y; }
static void updatereflectmap(int slot) { float rf, r, g, b, t, dfact, kfact, rdir[3]; float rcol[3], ppos[3], norm[3], ldir[3], h[3], vdir[3], planepos[3]; int x, y; glBindTexture(GL_TEXTURE_2D, reflectid); for (y = slot * TEX_REFLECT_SLOT_SIZE; y < (slot + 1) * TEX_REFLECT_SLOT_SIZE; y++) for (x = 0; x < TEX_REFLECT_WIDTH; x++) { ppos[0] = sphere_pos[y][x][0] + objpos[0]; ppos[1] = sphere_pos[y][x][1] + objpos[1]; ppos[2] = sphere_pos[y][x][2] + objpos[2]; vsub(norm, ppos, objpos); vnormalize(norm, norm); vsub(ldir, lightpos, ppos); vnormalize(ldir, ldir); vsub(vdir, obs, ppos); vnormalize(vdir, vdir); rf = 2.0f * dprod(norm, vdir); if (rf > EPSILON) { rdir[0] = rf * norm[0] - vdir[0]; rdir[1] = rf * norm[1] - vdir[1]; rdir[2] = rf * norm[2] - vdir[2]; t = -objpos[2] / rdir[2]; if (t > EPSILON) { planepos[0] = objpos[0] + t * rdir[0]; planepos[1] = objpos[1] + t * rdir[1]; planepos[2] = 0.0f; if (!colorcheckmap(planepos, rcol)) rcol[0] = rcol[1] = rcol[2] = 0.0f; } else rcol[0] = rcol[1] = rcol[2] = 0.0f; } else rcol[0] = rcol[1] = rcol[2] = 0.0f; dfact = 0.1f * dprod(ldir, norm); if (dfact < 0.0f) { dfact = 0.0f; kfact = 0.0f; } else { h[0] = 0.5f * (vdir[0] + ldir[0]); h[1] = 0.5f * (vdir[1] + ldir[1]); h[2] = 0.5f * (vdir[2] + ldir[2]); kfact = dprod(h, norm); kfact = pow(kfact, 4.0); if (kfact < 1.0e-10) kfact = 0.0; } r = dfact + kfact; g = dfact + kfact; b = dfact + kfact; r *= 255.0f; g *= 255.0f; b *= 255.0f; r += rcol[0]; g += rcol[1]; b += rcol[2]; r = clamp255(r); g = clamp255(g); b = clamp255(b); reflectmap[y][x][0] = (GLubyte) r; reflectmap[y][x][1] = (GLubyte) g; reflectmap[y][x][2] = (GLubyte) b; } glTexSubImage2D(GL_TEXTURE_2D, 0, 0, slot * TEX_REFLECT_SLOT_SIZE, TEX_REFLECT_WIDTH, TEX_REFLECT_SLOT_SIZE, GL_RGB, GL_UNSIGNED_BYTE, &reflectmap[slot * TEX_REFLECT_SLOT_SIZE][0][0]); }
static int colorcheckmap(float ppos[3], float c[3]) { static float norm[3] = { 0.0f, 0.0f, 1.0f }; float ldir[3], vdir[3], h[3], dfact, kfact, r, g, b; int x, y; x = (int) ((ppos[0] + BASESIZE / 2) * (10.0f / BASESIZE)); if ((x < 0) || (x > 10)) return GL_FALSE; y = (int) ((ppos[1] + BASESIZE / 2) * (10.0f / BASESIZE)); if ((y < 0) || (y > 10)) return GL_FALSE; r = 255.0f; if (y & 1) { if (x & 1) g = 255.0f; else g = 0.0f; } else { if (x & 1) g = 0.0f; else g = 255.0f; } b = 0.0f; vsub(ldir, lightpos, ppos); vnormalize(ldir, ldir); if (seelight(ppos, ldir)) { c[0] = r * 0.05f; c[1] = g * 0.05f; c[2] = b * 0.05f; return GL_TRUE; } dfact = dprod(ldir, norm); if (dfact < 0.0f) dfact = 0.0f; vsub(vdir, obs, ppos); vnormalize(vdir, vdir); h[0] = 0.5f * (vdir[0] + ldir[0]); h[1] = 0.5f * (vdir[1] + ldir[1]); h[2] = 0.5f * (vdir[2] + ldir[2]); kfact = dprod(h, norm); kfact = pow(kfact, 6.0) * 7.0 * 255.0; r = r * dfact + kfact; g = g * dfact + kfact; b = b * dfact + kfact; c[0] = clamp255(r); c[1] = clamp255(g); c[2] = clamp255(b); return GL_TRUE; }