Color *radiosity_prog(int n, Poly **p, Color *e, Color *rho) { int src, rcv, iter = 0; Real ff, mts, *a = NEWTARRAY(n, Real); Color d, *dm = NEWTARRAY(n, Color); Color ma, *m = NEWTARRAY(n, Color); initialize(n, m, dm, a, p, e); while (iter-- < max_iter) { src = select_shooter(n, dm, a); if (converged(src, dm)) break; for (rcv = 0; rcv < n; rcv++) { if (rcv == src || (ff = formfactor(src, rcv, n, p, a)) < REL_EPS) continue; d = c_scale(ff, c_mult(rho[rcv], dm[src])); m[rcv] = c_add(m[rcv], d); dm[rcv] = c_add(dm[rcv], d); } dm[src] = c_make(0,0,0); } ma = ambient_rad(n, dm, a); for (rcv = 0; rcv < n; rcv++) m[rcv] = c_add(m[rcv], ma); efree(a), efree(dm); return m; }
static void make_filter(T *result_b, T *result_a, const complex4c *alpha, const complex4c *beta, int K, T sigma) { const double denom = sigma * M_SQRT2PI; complex4c b[DERICHE_MAX_K], a[DERICHE_MAX_K + 1]; int k, j; b[0] = alpha[0]; /* Initialize b/a = alpha[0] / (1 + beta[0] z^-1) */ a[0] = make_complex(1, 0); a[1] = beta[0]; for (k = 1; k < K; ++k) { /* Add kth term, b/a += alpha[k] / (1 + beta[k] z^-1) */ b[k] = c_mul(beta[k], b[k-1]); for (j = k - 1; j > 0; --j) b[j] = c_add(b[j], c_mul(beta[k], b[j - 1])); for (j = 0; j <= k; ++j) b[j] = c_add(b[j], c_mul(alpha[k], a[j])); a[k + 1] = c_mul(beta[k], a[k]); for (j = k; j > 0; --j) a[j] = c_add(a[j], c_mul(beta[k], a[j - 1])); } for (k = 0; k < K; ++k) { result_b[k] = (T)(b[k].real / denom); result_a[k + 1] = (T)a[k + 1].real; } return; }
Color ray_shade(int level, Real w, Ray v, RContext *rc, Object *ol) { Inode *i = ray_intersect(ol, v); if (i != NULL) { Light *l; Real wf; Material *m = i->m; Vector3 p = ray_point(v, i->t); Cone recv = cone_make(p, i->n, PIOVER2); Color c = c_mult(m->c, c_scale(m->ka, ambient(rc))); rc->p = p; for (l = rc->l; l != NULL; l = l->next) if ((*l->transport)(l, recv, rc) && (wf = shadow(l, p, ol)) > RAY_WF_MIN) c = c_add(c, c_mult(m->c, c_scale(wf * m->kd * v3_dot(l->outdir,i->n), l->outcol))); if (level++ < MAX_RAY_LEVEL) { if ((wf = w * m->ks) > RAY_WF_MIN) { Ray r = ray_make(p, reflect_dir(v.d, i->n)); c = c_add(c, c_mult(m->s, c_scale(m->ks, ray_shade(level, wf, r, rc, ol)))); } if ((wf = w * m->kt) > RAY_WF_MIN) { Ray t = ray_make(p, refract_dir(v.d, i->n, (i->enter)? 1/m->ir: m->ir)); if (v3_sqrnorm(t.d) > 0) { c = c_add(c, c_mult(m->s, c_scale(m->kt, ray_shade(level, wf, t, rc, ol)))); } } } inode_free(i); return c; } else { return BG_COLOR; } }
Color shiny_surface(RContext *rc) { Color ce = environment_map(rc->m->tinfo, reflect_dir(rc->v, rc->n)); return c_add(c_scale(rc->m->ka, ambient(rc)), c_scale(rc->m->ks, c_add(ce, specular(rc)))); }
Color textured_plastic(RContext *rc) { Color c, ct = texture_map(rc->m->tinfo, rc->t); c = c_add(c_mult(ct, c_add(c_scale(rc->m->ka, ambient(rc)), c_scale(rc->m->kd, diffuse(rc)))), c_mult(rc->m->s, c_scale(rc->m->ks, specular(rc)))); return c; }
void polymult (header *hd) { header *st=hd,*hd1,*result; int c,c1,c2,i,r,j,k; double *m1,*m2,*mr,x; complex *mc1,*mc2,*mcr,xc,hc; interval *mi1,*mi2,*mir,xi,hi; hd1=next_param(st); equal_params_2(&hd,&hd1); if (error) return; getmatrix(hd,&r,&c1,&m1); if (r!=1) wrong_arg(); getmatrix(hd1,&r,&c2,&m2); if (r!=1) wrong_arg(); if ((LONG)c1+c2-1>INT_MAX) wrong_arg(); c=c1+c2-1; if (iscomplex(hd)) { mc1=(complex *)m1; mc2=(complex *)m2; result=new_cmatrix(1,c,""); if (error) return; mcr=(complex *)matrixof(result); c_copy(xc,*mc1); mc1++; for (i=0; i<c2; i++) c_mult(xc,mc2[i],mcr[i]); for (j=1; j<c1; j++) { c_copy(xc,*mc1); mc1++; for (k=j,i=0; i<c2-1; i++,k++) { c_mult(xc,mc2[i],hc); c_add(hc,mcr[k],mcr[k]); } c_mult(xc,mc2[i],mcr[k]); } } else if (isinterval(hd)) { mi1=(interval *)m1; mi2=(interval *)m2; result=new_imatrix(1,c,""); if (error) return; mir=(interval *)matrixof(result); i_copy(xi,*mi1); mi1++; for (i=0; i<c2; i++) i_mult(xi,mi2[i],mir[i]); for (j=1; j<c1; j++) { i_copy(xi,*mi1); mi1++; for (k=j,i=0; i<c2-1; i++,k++) { i_mult(xi,mi2[i],hi); c_add(hi,mir[k],mir[k]); } c_mult(xi,mi2[i],mir[k]); } } else if (isreal(hd)) { result=new_matrix(1,c,""); if (error) return; mr=matrixof(result); x=*m1++; for (i=0; i<c2; i++) mr[i]=x*m2[i]; for (j=1; j<c1; j++) { x=*m1++; for (k=j,i=0; i<c2-1; i++,k++) mr[k]+=x*m2[i]; mr[k]=x*m2[i]; } } else wrong_arg(); moveresult(st,result); }
struct c_complex Lentz_Dn(struct c_complex z,long n) /*:10*/ #line 126 "./mie.w" { struct c_complex alpha_j1,alpha_j2,zinv,aj; struct c_complex alpha,result,ratio,runratio; /*12:*/ #line 156 "./mie.w" zinv= c_sdiv(2.0,z); alpha= c_smul(n+0.5,zinv); aj= c_smul(-n-1.5,zinv); alpha_j1= c_add(aj,c_inv(alpha)); alpha_j2= aj; ratio= c_div(alpha_j1,alpha_j2); runratio= c_mul(alpha,ratio); /*:12*/ #line 131 "./mie.w" do /*13:*/ #line 179 "./mie.w" { aj.re= zinv.re-aj.re; aj.im= zinv.im-aj.im; alpha_j1= c_add(c_inv(alpha_j1),aj); alpha_j2= c_add(c_inv(alpha_j2),aj); ratio= c_div(alpha_j1,alpha_j2); zinv.re*= -1; zinv.im*= -1; runratio= c_mul(ratio,runratio); } /*:13*/ #line 134 "./mie.w" while(fabs(c_abs(ratio)-1.0)> 1e-12); result= c_add(c_sdiv((double)-n,z),runratio); return result; }
void calc_aver(FILE *fp,int nframes,int npair,t_pair pair[],t_sij *spec, real maxdist) { int i,j,m; real nf_1,fac,md_6; complex c1,c2,dc2; md_6 = pow(maxdist,-6.0); fac = 4*M_PI/5; nf_1 = 1.0/nframes; for(i=0; (i<npair); i++) { c2.re = 0; c2.im = 0; fprintf(fp,"%5d %5d",pair[i].ai,pair[i].aj); for(m=0; (m<5); m++) { c1.re = spec[i].Ylm[m].re*nf_1; c1.im = spec[i].Ylm[m].im*nf_1; dc2 = c_sqr(c1); c2 = c_add(dc2,c2); if (c1.im > 0) fprintf(fp," %8.3f+i%8.3f",c1.re,c1.im); else fprintf(fp," %8.3f-i%8.3f",c1.re,-c1.im); } fprintf(fp,"\n"); spec[i].rij_3 *= nf_1; spec[i].rij_6 *= nf_1; spec[i].y2.re = fac*c2.re; spec[i].y2.im = fac*c2.im; spec[i].bNOE = (spec[i].rij_6 > md_6); } }
// 1D Fourier Transform: fft_1d_helper(x, N, stride, y) // if N > 1 then // 1. take the FFT of the even elements in x, and store it in the first half of y // x' = x // stride' = 2 * stride // y' = y // 2. take the FFT of the odd elements in x, and store it in the second half of y // x' = x + stride // stride' = 2 * stride // y' = y + N/2 // 3. for each frequency step k in 0 ... N/2 // i. compute the component at Wkn = exp(-j*2*pi/N * k) // y[k] = y[k] + Wkn * y[k + N/2] // ii. compute the component at Wkn = exp(-j*2*pi/N * (k + N/2)) // y[k + N/2] = y[k] + Wkn * y[k + N/2] // otherwise just return the element at x[0] void fft_1d_helper(complex* x, int N, int stride, complex* y, complex* Wkn, int Wkn_stride){ if(N > 1) { fft_1d_helper(x, N/2, 2*stride, y, Wkn, 2*Wkn_stride); fft_1d_helper(x + stride, N/2, 2*stride, y + N/2, Wkn, 2*Wkn_stride); for(int k=0; k<N/2; k++) { complex Wkn1 = Wkn[k*Wkn_stride]; complex Wkn2 = Wkn[(k + N/2)*Wkn_stride]; complex yk1 = c_add(y[k], c_mult(Wkn1, y[k + N/2])); complex yk2 = c_add(y[k], c_mult(Wkn2, y[k + N/2])); y[k] = yk1; y[k+N/2] = yk2; } } else { y[0] = x[0]; } }
static Color ambient_rad(int n, Color *dm, Real *a) { int i; Real aa = 0; Color ma = c_make(0,0,0); for (i = 0; i < n; i++) { ma = c_add(ma, c_scale(a[i], dm[i])); aa += a[i]; } return c_scale(1.0/aa, ma); }
static struct complex *pol_value( struct polynom *p, struct complex *z, /* pointer to argument */ struct complex *v /* pointer to function value */ ) { int i; v->x=p->a[p->n].x; v->y=p->a[p->n].y; for (i=p->n-1; i>=0; --i) c_add(v,&(p->a[i]),c_mult(v,z,v)); return(v); }
/** * \brief Compute the variance of the impulse response * \param poles0 unscaled pole locations * \param q rescaling parameter * \param K number of poles * \return variance achieved by poles = poles0^(1/q) * \ingroup vyv_gaussian */ static double variance(const complex4c *poles0, int K, double q) { complex4c sum = { 0, 0 }; int k; for (k = 0; k < K; ++k) { complex4c z = c_real_pow(poles0[k], 1 / q), denom = z; denom.real -= 1; /* Compute sum += z / (z - 1)^2. */ sum = c_add(sum, c_div(z, c_mul(denom, denom))); } return 2 * sum.real; }
//SUMMARY // Adds the cache entry 'ent' to the end //NOTES // Must be locked over the cache - can be unlocked immediately following the call // No other synchronization needed, since old entries will automatically be displaced once cache is full void cache_putEntry(struct cache_entry* ent) { assert( ent->filedata != NULL ); fprintf(stderr, "cache_putEntry: Adding entry for filename %s\n", ent->filename); assert( cache_size <= max_cache_size ); if (cache_size == max_cache_size) { fprintf(stderr, "cache_putEntry: Cache full, shifting (%d, %d)\n", cache_size, max_cache_size); destroyCacheEntry( c_shift() ); // This will also free the file data stored in the entry } c_add(ent); }
static struct polynom *pol_mult(struct polynom *p,struct polynom *p1,struct polynom *p2) { int i,j; struct complex tulo; p->n=p1->n+p2->n; if (p->n>MAXN-1) { pol_dim_overflow(); return(p); } for (i=0; i<=p->n; ++i) p->a[i].x=p->a[i].y=0.0; for (i=0; i<=p1->n; ++i) for (j=0; j<=p2->n; ++j) c_add(&(p->a[i+j]),&(p->a[i+j]), c_mult(&tulo,&(p1->a[i]),&(p2->a[j]))); return(p); }
void Dn_down(struct c_complex z,long nstop,struct c_complex*D) /*:19*/ #line 247 "./mie.w" { long k; struct c_complex zinv,k_over_z; D[nstop-1]= Lentz_Dn(z,nstop); zinv= c_inv(z); for(k= nstop-1;k>=1;k--){ k_over_z= c_smul((double)k,zinv); D[k-1]= c_sub(k_over_z,c_inv(c_add(D[k],k_over_z))); } }
/** * \brief Derivative of variance with respect to q * \param poles0 unscaled pole locations * \param q rescaling parameter * \param K number of poles * \return derivative of variance with respect to q * \ingroup vyv_gaussian * * This function is used by compute_q() in solving for q. */ static double dq_variance(const complex4c *poles0, int K, double q) { complex4c sum = { 0, 0 }; int k; for (k = 0; k < K; ++k) { complex4c z = c_real_pow(poles0[k], 1 / q), w = z, denom = z; w.real += 1; denom.real -= 1; /* Compute sum += z log(z) (z + 1) / (z - 1)^3 */ sum = c_add(sum, c_div(c_mul(c_mul(z, c_log(z)), w), c_real_pow(denom, 3))); } return (2 / q) * sum.real; }
void polyadd (header *hd) { header *st=hd,*hd1,*result; int c,c1,c2,i,r; double *m1,*m2,*mr; complex *mc1,*mc2,*mcr; interval *mi1,*mi2,*mir; hd1=next_param(st); equal_params_2(&hd,&hd1); if (error) return; getmatrix(hd,&r,&c1,&m1); if (r!=1) wrong_arg(); getmatrix(hd1,&r,&c2,&m2); if (r!=1) wrong_arg(); c=max(c1,c2); if (iscomplex(hd)) /* complex values */ { mc1=(complex *)m1; mc2=(complex *)m2; result=new_cmatrix(1,c,""); if (error) return; mcr=(complex *)matrixof(result); for (i=0; i<c; i++) { if (i>=c1) { c_copy(*mcr,*mc2); mcr++; mc2++; } else if (i>=c2) { c_copy(*mcr,*mc1); mcr++; mc1++; } else { c_add(*mc1,*mc2,*mcr); mc1++; mc2++; mcr++; } } } else if (isinterval(hd)) { mi1=(interval *)m1; mi2=(interval *)m2; result=new_imatrix(1,c,""); if (error) return; mir=(interval *)matrixof(result); for (i=0; i<c; i++) { if (i>=c1) { i_copy(*mir,*mi2); mir++; mi2++; } else if (i>=c2) { i_copy(*mir,*mi1); mir++; mi1++; } else { i_add(*mi1,*mi2,*mir); mi1++; mi2++; mir++; } } } else if (isreal(hd)) { result=new_matrix(1,c,""); if (error) return; mr=matrixof(result); for (i=0; i<c; i++) { if (i>=c1) { *mr++ = *m2++; } else if (i>=c2) { *mr++ = *m1++; } else { *mr++ = *m1++ + *m2++; } } } else wrong_arg(); moveresult(st,result); }
void rfft (long m0, long p0, long q0, long n) /***** rfft make a fft on x[m],x[m+q0],...,x[m+(p0-1)*q0] (p points). one has xi_p0 = xi_n^n = zz[n] ; i.e., p0*n=nn. *****/ { long p,q,m,l; long mh,ml; int found=0; complex sum,h; if (p0==1) return; if (test_key()==escape) { error=301; return; } if (p0%2==0) { p=p0/2; q=2; } else { q=3; while (q*q<=p0) { if (p0%q==0) { found=1; break; } q+=2; } if (found) p=p0/q; else { q=p0; p=1; } } if (p>1) for (m=0; m<q; m++) rfft((m0+m*q0)%nn,p,q*q0,nn/p); mh=m0; for (l=0; l<p0; l++) { ml=l%p; c_copy(sum,ff[(m0+ml*q*q0)%nn]); for (m=1; m<q; m++) { c_mult(ff[(m0+(m+ml*q)*q0)%nn],zz[(n*l*m)%nn],h); c_add(sum,h,sum); } sum[0]/=q; sum[1]/=q; c_copy(fh[mh],sum); mh+=q0; if (mh>=nn) mh-=nn; } for (l=0; l<p0; l++) { c_copy(ff[m0],fh[m0]); m0+=q0; } }
// resample_1d(complex* x, int N, int stride, float* n) // 1. create a temporary buffer of size N // 2. for step i in 0 ... N // 3. if n[i] is outside of range 0 ... N, then x_interp = 0 // 4. get the lower element x_low = x[floor(n[i]) * stride] // 5. get the higher element x_high = x[ceil(n[i]) * stride] // 6. compute the interpolation mixing ratio, ratio = n[i] - floor(n[i]) // 7. compute the interpolated value, x_interp = (1 - ratio) * x_low + ratio * x_high // 8. store the interpolated value into the buffer // 9. copy the values from the temporary buffer back into x void resample_1d(complex* x, int N, int stride, float* n) { int i; complex buffer[N]; for(i=0; i<N; i++){ complex x_interp; if(n[i] < 0 || n[i] > N - 1){ x_interp.real = 0; x_interp.imag = 0; } else { complex x_low = x[(int)floor(n[i]) * stride]; complex x_high = x[(int)ceil(n[i]) * stride]; float ratio = n[i] - floor(n[i]); x_interp = c_add(c_scalar_mult(x_low, 1 - ratio), c_scalar_mult(x_high, ratio)); } buffer[i] = x_interp; } for(i=0; i<N; i++) x[i*stride] = buffer[i]; }
void polyzeros (header *hd) { header *st=hd,*result; int i,j,r,c; double *m,*mr,x; complex *mc,*mcr,xc,hc; hd=getvalue(hd); if (error) return; if (hd->type==s_real || hd->type==s_matrix) { getmatrix(hd,&r,&c,&m); if (r!=1) wrong_arg(); result=new_matrix(1,c+1,""); if (error) return; mr=matrixof(result); mr[0]=-m[0]; mr[1]=1.0; for (i=1; i<c; i++) { x=-m[i]; mr[i+1]=1.0; for (j=i; j>=1; j--) mr[j]=mr[j-1]+x*mr[j]; mr[0]*=x; } } else if (hd->type==s_complex || hd->type==s_cmatrix) { getmatrix(hd,&r,&c,&m); mc=(complex *)m; if (r!=1) wrong_arg(); result=new_cmatrix(1,c+1,""); if (error) return; mcr=(complex *)matrixof(result); mcr[0][0]=-mc[0][0]; mcr[0][1]=-mc[0][1]; mcr[1][0]=1.0; mcr[1][1]=0.0; for (i=1; i<c; i++) { xc[0]=-mc[i][0]; xc[1]=-mc[i][1]; mcr[i+1][0]=1.0; mcr[i+1][1]=0.0; for (j=i; j>=1; j--) { c_mult(xc,mcr[j],hc); c_add(hc,mcr[j-1],mcr[j]); } c_mult(xc,mcr[0],mcr[0]); } } else wrong_arg(); moveresult(st,result); }
static struct polynom *pol_add(struct polynom *p,struct polynom *p1,struct polynom *p2) { int i; // RS REM struct complex tulo; p->n=(p1->n>p2->n)? (p1->n):(p2->n); for (i=0; i<=p->n; ++i) { if (i<=p1->n) { if (i<=p2->n) c_add(&(p->a[i]),&(p1->a[i]),&(p2->a[i])); else { p->a[i].x=p1->a[i].x; p->a[i].y=p1->a[i].y; } } else { p->a[i].x=p2->a[i].x; p->a[i].y=p2->a[i].y; } } i=p->n; while (c_zero(&(p->a[i])) && i>0) --i; p->n=i; return(p); }
///////////////////////////////////////////////////////////////////////////////////////////// //int main(int argc, char *argv[ ]) //{ int main(void) { sortingindex = 0; if((fpp = fopen(INPUT_FILE, "r")) == NULL) { printf("Cannot open 'parameter_file'.\n"); } for(j = 0; j < 11; j++) { if(fscanf(fpp, "%lf", &tmp) != EOF) { my_array[j] = tmp; } else { printf("Not enough data in 'input_parameter'!"); } } fclose(fpp); In_n= (int) my_array[0]; In_vect_n= (int) my_array[1]; Out_n= (int) my_array[2]; Mf_n= (int) my_array[3]; training_data_n= (int) my_array[4]; checking_data_n= (int) my_array[5]; epoch_n= (int) my_array[6]; step_size=my_array[7]; increase_rate=my_array[8]; decrease_rate=my_array[9]; threshold = my_array[10]; Rule_n = (int)pow((double)Mf_n, (double)In_n); //number of rules Node_n = In_n + In_n*Mf_n + 3*Rule_n + In_n*Rule_n + Out_n; /* allocate matrices and memories */ int trnnumcheck[training_data_n + 1]; int trnnumchecku[training_data_n + 1]; for(i=0; i<training_data_n +1; i++) { trnnumcheck[i]=0; trnnumchecku[i]=0; } diff =(double **)create_matrix(Out_n, training_data_n, sizeof(double)); double chkvar[checking_data_n]; double cdavg[checking_data_n]; double chkvar_un[checking_data_n]; double cdavg_un[checking_data_n]; target = calloc(Out_n, sizeof(double)); de_out = calloc(Out_n, sizeof(double)); node_p = (NODE_T **)create_array(Node_n, sizeof(NODE_T *)); config = (int **)create_matrix(Node_n, Node_n, sizeof(int)); training_data_matrix = (double **)create_matrix(training_data_n, In_n*In_vect_n + Out_n, sizeof(double)); if(checking_data_n > 0) { checking_data_matrix = (double **)create_matrix(checking_data_n, In_n*In_vect_n +Out_n, sizeof(double)); checking_data_matrix_un = (double **)create_matrix(checking_data_n, Out_n, sizeof(double)); chk_output = (double **)create_matrix(checking_data_n, Out_n, sizeof(double)); } layer_1_to_4_output = (COMPLEX_T **)create_matrix(training_data_n, In_n*Mf_n + 3*Rule_n, sizeof(COMPLEX_T)); trn_rmse_error = calloc(epoch_n, sizeof(double)); trnNMSE = calloc(epoch_n, sizeof(double)); chk_rmse_error = calloc(epoch_n, sizeof(double)); kalman_parameter = (double **)create_matrix(Out_n ,(In_n*In_vect_n + 1)*Rule_n, sizeof(double)); kalman_data = (double **)create_matrix(Out_n ,(In_n*In_vect_n + 1)*Rule_n, sizeof(double)); step_size_array = calloc(epoch_n, sizeof(double)); ancfis_output = (double **)create_matrix(training_data_n , Out_n, sizeof(double)); trn_error =calloc(Out_n +1, sizeof(double)); chk_error_n = calloc(Out_n +1, sizeof(double));// changing size for adding new error measures chk_error_un = calloc(Out_n +1, sizeof(double));// changing size for adding new error measures trn_datapair_error = calloc(training_data_n, sizeof(double)); trn_datapair_error_sorted = (double **)create_matrix(2,training_data_n, sizeof(double)); NMSE = calloc(Out_n, sizeof(double)); NDEI = calloc(Out_n, sizeof(double)); unNMSE = calloc(Out_n, sizeof(double)); unNDEI = calloc(Out_n, sizeof(double)); //Build Matrix of 0 nd 1 to show the connected nodes gen_config(In_n, Mf_n,Out_n, config);//gen_config.c //With the above matrix, build ANCFIS connected nodes build_ancfis(config); //datastru.c //Find total number of nodes in layer 1 and 5 parameter_n = set_parameter_mode(); //datastru.c parameter_array = calloc(parameter_n, sizeof(double)); initpara(TRAIN_DATA_FILE, training_data_n, In_n, In_vect_n+1, Mf_n); // initpara.c // after this step, the parameters (they are present in layer 1 and layer 5 only) are assigned a random initial value // using some basic algebra and these value are then stored in "para.ini" get_parameter(node_p,Node_n,INIT_PARA_FILE); //input.c // after this step, the initial random values of the parametrs are read from "oara.ini" and assigned to the appropriate nodes in the node structure by accessing their para list. //Get training and testing data get_data(TRAIN_DATA_FILE, training_data_n, training_data_matrix); //input.c // after this step, the training input data is read from the "data.trn" fle and stroed in the training data matrix. get_data(CHECK_DATA_FILE, checking_data_n, checking_data_matrix); //input.c // after the above step, the checking data is read from the "data.chk" file and then stored in the checking data matrix. for(i=0; i< Out_n; i++) { for(j=0; j<training_data_n; j++) { trnavg = trnavg + training_data_matrix[j][(i+1)*In_vect_n +i]; } } trnavg = trnavg /(Out_n * training_data_n); for(i=0; i< Out_n; i++) { for(j=0; j<training_data_n; j++) { temp = training_data_matrix[j][(i+1)*In_vect_n +i]- trnavg; temp = temp*temp; trnvariance = trnvariance + temp; } } trnvariance = trnvariance /((Out_n * training_data_n)-1); temp = 0.0; for(i=0; i< Out_n; i++) { for(j=0; j< checking_data_n; j++) { chkavg = chkavg + checking_data_matrix[j][(i+1)*In_vect_n +i]; } } chkavg = chkavg /(Out_n * checking_data_n); for(i=0; i< Out_n; i++) { for(j=0; j<checking_data_n; j++) { temp = checking_data_matrix[j][(i+1)*In_vect_n +i]- chkavg; temp = temp*temp; chkvariance = chkvariance + temp; } } chkvariance = chkvariance /((Out_n * checking_data_n)-1); printf("epochs \t trn error \t tst error\n"); printf("------ \t --------- \t ---------\n"); //printf("not entering the epoch loop and the i loop yoyoyo\n"); /************** for(ep_n = 0; ep_n < epoch_n; ep_n++) { //step_size_pointer= &step_size; //printf("epoch numbernumber %d \n", ep_n); //step_size_array[ep_n] = step_size_pointer; step_size_array[ep_n] = step_size; // after the above step, the updated stepsize at the end of the last loop is stored in the step_size_array. // this will keep happening every time we start en epoch and hence at the end of the loop, step_size_array will // have a list of all the updated step sizes. Since this is a offline version, step sizes are updated only // at the end of an epoch. for(m = 0; m < Out_n; m++) { //printf("m loop number %d \n", m); for(j = 0; j < training_data_n; j++) { //printf("j loop number %d \n", j); //copy the input vector(s) to input node(s) put_input_data(node_p,j, training_data_matrix); //input.c // after this(above) step, the input data is transferred frm the training data matrix to the "node" structure. //printf("testing \n"); //printf("reeeetesting \n"); target[m] = training_data_matrix[j][(m+1)*In_vect_n+m]; // *** // this step assigns the value of the "m"th output of "j" th trainig data pair to target. //printf("testing \n"); //forward pass, get node outputs from layer 1 to layer 4 calculate_output(In_n, In_n + In_n*Mf_n + 3*Rule_n - 1, j); //forward.c // after this step, output of nodes in layer 1 to 4 is calculated. Please note that when this happens for the first // time, i.e. when ep_n=0, our network parametrs are already initialized. thus, it is possible to get the // output of each node using the function definitios proposed in forward.c. After first epoch, our parametrs get // updated and this output is then calculated using teh new parameters. The essential point to note here is that // we can always calculate the output of each node since we have already initialized our parameters. //printf("testing \n"); //put outputs of layer 1 to 4 into layer_1_to_4_output for(k = 0; k < Mf_n*In_n + 3*Rule_n; k++) { //printf("testing \n"); layer_1_to_4_output[j][k] = *node_p[k + In_n]->value; } // the above loop simply puts the values of nodes from layer 1 to layer 4 in the layer_1_to_4_output matrix. //identify layer 5 params using LSE (Kalman filter) //printf("testing \n"); get_kalman_data(kalman_data, target); //kalman.c // this function call finds out the values of O4iXnl .. these are basically the coefficients // of the kalman parametrs for a given training data pair //puts them in kalman_data matrix. // this kalman_data matrix has In_n number of rows and number of columns equal to number of parametrs that are // responsible for determining each output... as stated above, the outputs are actually the coefficients of the // parameters. //printf("testing \n"); //calculate Kalman parameters kalman(ep_n, j+(m*training_data_n), m, kalman_data, kalman_parameter,target); //kalman.c // this function call evaluates kalman parametrs for a given output, for a given epoch.. that is it takes the epoch // number from us, takes the info about how many times has kalman been invoked before, also takes in the // output number(row number) for whihc the parametrs are to be found out... it also takes kalman_data and reads // from it to estimate the kalman parameters... it also takes target .. and stores the output in the mth row of // kalman_parameter. //printf("testing \n"); } // let me tell u what the abopve loop is doing.. after observing closely, it is easy to see that in the above loop, // for a given output node, one by one, all the training data are taken inside the ANCFIS structure, outputs // are calculated from one to 4, then a recursive kalman filetr is used to identify the kalman // parametrs corresponding to the output node.. these kalman parameters are updated after every tarining data pair // and finally at the end of all the training data, we have an estimate for the kalman parametrs corresponding to // the output node. } // thus, at the of the above loop, the kalman parametrs for all the output nodes are evaluated... // now, we are ready to actually calculate the outputs.. plase remember that, all this while, the actual // values of the parametrs of nodes in layer 1 and layer 5 are the ones that were randomly initialized. for(j = 0; j < training_data_n; j++) { //printf("testing 1\n"); put_input_data(node_p,j, training_data_matrix); //input.c //printf("testing 2 \n"); for(k = 0; k < Mf_n*In_n + 3*Rule_n; k++) { *node_p[k + In_n]->value = layer_1_to_4_output[j][k]; } // u must be able to see that in the above two loops, each time, whatever output we got for a given training // datta pair, it was safely stored in layer_1_to_4 array...and each time, the value on the actual nodes in the // structure got changed.. due to new incoming training data pair..this was periodic with period trainingdata_n.. // that is for each output node, we got the same results for a given training dat aapir.. that is the node values // were independent of m. Now, for a given traing data pair, we are getting those value back in the actual node // node structure from that laye blh blah matrix.. //printf("testing 3\n"); put_kalman_parameter(Out_n,kalman_parameter); //kalman.c // using this function call, we are placing the setimated value of the layer 5 parametrs in the node structure // by accessing each node and its parameter list. // Do forward pass for L5 and L6 calculate_output(In_n + In_n*Mf_n + 3*Rule_n, Node_n, j); //forward.c // for a given value of the training data pair, this function calculates the output of layer 5 and layer 6 nodes // and places them in the node structure. calculate_root(training_data_matrix,diff,j,node_p); //debug.c // this function call calculates the square of the erro between the predicted value of an output node and the // corresponding actual value in the training data matrix..(actual output) and stores it in diff. // this call performs the above action for a given training data pair and for all output nodes. // Do backward pass and calculate all error rate for each node calculate_de_do(j,Out_n,target,ancfis_output); //backward.c // calculates de_do for each node fora given training data pair update_de_dp(j); //de_dp.c // updates de_do for each node.... } // thus at the end of this loop, estimated outputs for all the training data are calculated..also back propogatin // is done and de_dp for all the nodes is updated. //printf("testing 1\n"); calculate_trn_err(diff,trn_error,trn_datapair_error,training_data_n); //debug.c //printf("testing 2 \n"); //training_error_measure(target,ancfis_output,diff, training_data_n, trn_error,out_n); //trn_err.c trn_rmse_error[ep_n] = trn_error[Out_n]; printf("%3d \t %.11f \n", ep_n+1, trn_error[Out_n]); //Find RMSE of testing error /************************************* if(checking_data_n != 0) { printf("testing 3 \n"); epoch_checking_error(checking_data_matrix, checking_data_n, chk_error, training_data_n, chk_output, ep_n); //chk_err.c writes to tracking.txt printf("testing 4 \n"); chk_rmse_error[ep_n] = chk_error[Out_n]; for (i=0; i<Out_n; i++) //printf("%3d \t %.11f \t %.11f\n", ep_n+1, trn_error[i], chk_error[i]); printf("%3d \t %.11f \n", ep_n+1, trn_error[i]); //printf("%.11f\t %.11f\n", trn_error[Out_n],chk_error[Out_n]); printf("%.11f\t %.11f\n", trn_error[Out_n]); write_result(ep_n+1,Out_n,trn_error,chk_error); //debug.c writes to result.txt } else { for (i=0; i<Out_n; i++) printf("%4d \t %.11f\n", ep_n+1, trn_error[i]); } ***************************/ /** //Find minimum training error and its epoch-number if(trn_rmse_error[ep_n] < min_trn_RMSE) { min_trn_RMSE_epoch = ep_n +1; min_trn_RMSE = trn_rmse_error[ep_n]; record_parameter(parameter_array); } if(ep_n < epoch_n-1) { //update parameters in 1st layer (Using VNCSA) update_parameter(1, step_size); //new_para.c //update stepsize update_step_size(trn_rmse_error, ep_n, &step_size, decrease_rate, increase_rate); //stepsize.c } } ***/ //////////////////////////////////////////////////////////// fppp = (FILE *)open_file("status.txt", "w"); fpppp = (FILE *)open_file("trn.txt", "w"); ep_n=0; do { //step_size_pointer= &step_size; printf("epoch numbernumber %d \n", ep_n+1); //step_size_array[ep_n] = step_size_pointer; step_size_array[ep_n] = step_size; // after the above step, the updated stepsize at the end of the last loop is stored in the step_size_array. // this will keep happening every time we start en epoch and hence at the end of the loop, step_size_array will // have a list of all the updated step sizes. Since this is a offline version, step sizes are updated only // at the end of an epoch. for(m = 0; m < Out_n; m++) { //printf("m loop number %d \n", m); for(j = 0; j < training_data_n; j++) { //printf("j loop number %d \n", j); //copy the input vector(s) to input node(s) put_input_data(node_p,j, training_data_matrix); //input.c // after this(above) step, the input data is transferred frm the training data matrix to the "node" structure. //printf("testing \n"); //printf("reeeetesting \n"); target[m] = training_data_matrix[j][(m+1)*In_vect_n+m]; // *** // this step assigns the value of the "m"th output of "j" th trainig data pair to target. //printf("testing \n"); //forward pass, get node outputs from layer 1 to layer 4 calculate_output(In_n, In_n + In_n*Mf_n + 3*Rule_n, j); //forward.c // after this step, output of nodes in layer 1 to 4 is calculated. Please note that when this happens for the first // time, i.e. when ep_n=0, our network parametrs are already initialized. thus, it is possible to get the // output of each node using the function definitios proposed in forward.c. After first epoch, our parametrs get // updated and this output is then calculated using teh new parameters. The essential point to note here is that // we can always calculate the output of each node since we have already initialized our parameters. //printf("testing \n"); //put outputs of layer 1 to 4 into layer_1_to_4_output for(k = 0; k < Mf_n*In_n + 3*Rule_n; k++) { //printf("testing \n"); layer_1_to_4_output[j][k] = *node_p[k + In_n]->value; //fprintf(fppp, "%lf \t %lf \t \n", (layer_1_to_4_output[j][k]).real, (layer_1_to_4_output[j][k]).imag); } // the above loop simply puts the values of nodes from layer 1 to layer 4 in the layer_1_to_4_output matrix. //identify layer 5 params using LSE (Kalman filter) //printf("testing \n"); get_kalman_data(kalman_data, target); //kalman.c // this function call finds out the values of O4iXnl .. these are basically the coefficients // of the kalman parametrs for a given training data pair //puts them in kalman_data matrix. // this kalman_data matrix has In_n number of rows and number of columns equal to number of parametrs that are // responsible for determining each output... as stated above, the outputs are actually the coefficients of the // parameters. //printf("testing \n"); //calculate Kalman parameters kalman(ep_n, j+(m*training_data_n), m, kalman_data, kalman_parameter,target); //kalman.c // this function call evaluates kalman parametrs for a given output, for a given epoch.. that is it takes the epoch // number from us, takes the info about how many times has kalman been invoked before, also takes in the // output number(row number) for whihc the parametrs are to be found out... it also takes kalman_data and reads // from it to estimate the kalman parameters... it also takes target .. and stores the output in the mth row of // kalman_parameter. //printf("testing \n"); } // let me tell u what the abopve loop is doing.. after observing closely, it is easy to see that in the above loop, // for a given output node, one by one, all the training data are taken inside the ANCFIS structure, outputs // are calculated from one to 4, then a recursive kalman filetr is used to identify the kalman // parametrs corresponding to the output node.. these kalman parameters are updated after every tarining data pair // and finally at the end of all the training data, we have an estimate for the kalman parametrs corresponding to // the output node. } // thus, at the of the above loop, the kalman parametrs for all the output nodes are evaluated... // now, we are ready to actually calculate the outputs.. plase remember that, all this while, the actual // values of the parametrs of nodes in layer 1 and layer 5 are the ones that were randomly initialized. for(j = 0; j < training_data_n; j++) { //printf("testing 1\n"); put_input_data(node_p,j, training_data_matrix); //input.c //printf("testing 2 \n"); for(k = 0; k < Mf_n*In_n + 3*Rule_n; k++) { *node_p[k + In_n]->value = layer_1_to_4_output[j][k]; /*if(ep_n==1) { fprintf(fppp, "%d.\t %lf \t + \t i%lf \n", k, (layer_1_to_4_output[j][k]).real,(layer_1_to_4_output[j][k]).imag); }*/ } // u must be able to see that in the above two loops, each time, whatever output we got for a given training // datta pair, it was safely stored in layer_1_to_4 array...and each time, the value on the actual nodes in the // structure got changed.. due to new incoming training data pair..this was periodic with period trainingdata_n.. // that is for each output node, we got the same results for a given training dat aapir.. that is the node values // were independent of m. Now, for a given traing data pair, we are getting those value back in the actual node // node structure from that laye blh blah matrix.. //printf("testing 3\n"); put_kalman_parameter(Out_n,kalman_parameter); //kalman.c //printf("hihahahha \n"); // using this function call, we are placing the setimated value of the layer 5 parametrs in the node structure // by accessing each node and its parameter list. // Do forward pass for L5 and L6 calculate_output(In_n + In_n*Mf_n + 3*Rule_n, Node_n, j); //forward.c // for a given value of the training data pair, this function calculates the output of layer 5 and layer 6 nodes // and places them in the node structure. //printf("hihahahha no 2 \n"); calculate_root(training_data_matrix,diff,j,node_p); //debug.c // this function call calculates the square of the erro between the predicted value of an output node and the // corresponding actual value in the training data matrix..(actual output) and stores it in diff. // this call performs the above action for a given training data pair and for all output nodes. // Do backward pass and calculate all error rate for each node calculate_de_do(j,Out_n,target,ancfis_output); //backward.c //printf("hihahahha no 3 \n"); // calculates de_do for each node fora given training data pair update_de_dp(j); //de_dp.c // updates de_do for each node.... } // thus at the end of this loop, estimated outputs for all the training data are calculated..also back propogatin // is done and de_dp for all the nodes is updated. //printf("testing 1\n"); calculate_trn_err(diff,trn_error, trn_datapair_error, training_data_n); //debug.c //printf("testing 2 \n"); //training_error_measure(target,ancfis_output,diff, training_data_n, trn_error,out_n); //trn_err.c trn_rmse_error[ep_n] = trn_error[Out_n]; trnNMSE[ep_n] = trn_rmse_error[ep_n]*trn_rmse_error[ep_n]/trnvariance; fprintf(fppp, "epoch number is %d \t trn RMSE is %.11f \t trn NMSE is %lf \t \n", ep_n + 1, trn_rmse_error[ep_n], trnNMSE[ep_n]); //fprintf(fpppp, "\n"); fprintf(fpppp, "epoch number is %d \t trn RMSE is %.11f \t trn NMSE is %lf \t \n", ep_n + 1, trn_rmse_error[ep_n], trnNMSE[ep_n]); printf("trn RMSE is \t %lf \n", trn_rmse_error[ep_n]); printf("trn NMSE is \t %lf \n", trnNMSE[ep_n]); for(i=0; i<training_data_n; i++) { trn_datapair_error_sorted[0][i]=trn_datapair_error[i]; trn_datapair_error_sorted[1][i]= i+1; } for(j=1; j<training_data_n; j++) { for(i=0; i<training_data_n-j; i++) { if(trn_datapair_error_sorted[0][i]>trn_datapair_error_sorted[0][i+1]) { sorting=trn_datapair_error_sorted[0][i+1]; trn_datapair_error_sorted[0][i+1]=trn_datapair_error_sorted[0][i]; trn_datapair_error_sorted[0][i]=sorting; sortingindex = sorting=trn_datapair_error_sorted[1][i+1]; trn_datapair_error_sorted[1][i+1]=trn_datapair_error_sorted[1][i]; trn_datapair_error_sorted[1][i]=sortingindex; } } } for(j=0; j<training_data_n; j++) { fprintf(fppp, "\n"); fprintf(fppp, "training data pair sorted number \t %d \n", j+1); fprintf(fppp, "training data pair original number \t %d \n", (int)(trn_datapair_error_sorted[1][j])); fprintf(fppp, "training data pair sorted error in RMSE is \t %lf \n",trn_datapair_error_sorted[0][j]); fprintf(fpppp, "%d \t", (int)(trn_datapair_error_sorted[1][j])); complexsum = complex(0.0, 0.0); fprintf(fppp,"Normalized layer 3 outputs are as follows \n"); for(k= In_n*Mf_n + Rule_n; k< In_n*Mf_n + 2*Rule_n; k++) { fprintf(fppp, "%d.\t %lf + i%lf \t %lf < %lf \n", k, (layer_1_to_4_output[j][k]).real,(layer_1_to_4_output[j][k]).imag, c_abs(layer_1_to_4_output[j][k]), c_phase(layer_1_to_4_output[j][k])*180/PI); complexsum = c_add(complexsum, layer_1_to_4_output[j][k]); } fprintf(fppp, "Sum of the outputs of layer 3 is \t %lf+i%lf \t %lf<%lf \n", complexsum.real, complexsum.imag, c_abs(complexsum), c_phase(complexsum)*180/PI); complexsum = complex(0.0, 0.0); fprintf(fppp,"dot producted layer 4 outputs are as follows \n"); for(k=In_n*Mf_n + 2*Rule_n; k< In_n*Mf_n + 3*Rule_n; k++) { fprintf(fppp, "%d.\t %lf + i%lf \t %lf < %lf \n", k, (layer_1_to_4_output[j][k]).real,(layer_1_to_4_output[j][k]).imag, c_abs(layer_1_to_4_output[j][k]), c_phase(layer_1_to_4_output[j][k])*180/PI); complexsum = c_add(complexsum, layer_1_to_4_output[j][k]); } fprintf(fppp, "sum of the outputs of layer 4 is \t %lf +i%lf \t %lf<%lf \n", complexsum.real, complexsum.imag, c_abs(complexsum), c_phase(complexsum)*180/PI); if(j> training_data_n -6 ) { trnnumcheck[(int)(trn_datapair_error_sorted[1][j])]= trnnumcheck[(int)(trn_datapair_error_sorted[1][j])] +1; } if(j<5 ) { trnnumchecku[(int)(trn_datapair_error_sorted[1][j])]= trnnumchecku[(int)(trn_datapair_error_sorted[1][j])] +1; } } fprintf(fpppp, "\n"); //Find RMSE of testing error /******************************************************************************** if(checking_data_n != 0) { printf("testing 3 \n"); epoch_checking_error(checking_data_matrix, checking_data_n, chk_error, training_data_n, chk_output, ep_n); //chk_err.c writes to tracking.txt printf("testing 4 \n"); chk_rmse_error[ep_n] = chk_error[Out_n]; for (i=0; i<Out_n; i++) printf("%3d \t %.11f \t %.11f\n", ep_n+1, trn_error[i], chk_error[i]); printf("%.11f\t %.11f\n", trn_error[Out_n],chk_error[Out_n]); write_result(ep_n+1,Out_n,trn_error,chk_error); //debug.c writes to result.txt } else { for (i=0; i<Out_n; i++) printf("%4d \t %.11f\n", ep_n+1, trn_error[i]); } **************************************************************************************/ // check whether the current training RMSE is less than the threhold and store its epch number and parametrs if(trn_rmse_error[ep_n] < min_trn_RMSE) { min_trn_RMSE_epoch = ep_n +1; min_trn_RMSE = trn_rmse_error[ep_n]; min_trnNMSE = trnNMSE[ep_n]; record_parameter(parameter_array); } if(ep_n < epoch_n-1) { //update parameters in 1st layer (Using VNCSA) update_parameter(1, step_size); //new_para.c //update stepsize update_step_size(trn_rmse_error, ep_n, &step_size, decrease_rate, increase_rate); //stepsize.c } ep_n++; } while((trnNMSE[ep_n -1]>= threshold) && (ep_n <= epoch_n -1)); for(i=1; i<=training_data_n; i++) { fprintf(fpppp, "%d \t %d \n", i, trnnumcheck[i]); } for(i=1; i<=training_data_n; i++) { fprintf(fpppp, "%d \t %d \n", i, trnnumchecku[i]); } if(trnNMSE[ep_n -1]< threshold) { fprintf(fppp, "\n"); fprintf(fppp, "We have gone below the threshold value \n"); fprintf(fppp, "the epoch number in which this happened is %d \n", min_trn_RMSE_epoch); } else { fprintf(fppp, "\n"); fprintf(fppp, "We exhausted the available epochs and threshold was not broken :( \n"); fprintf(fppp, "the epoch number which yielded minimum training RMSE is %d \n", min_trn_RMSE_epoch); } fclose(fppp); fclose(fpppp); double *minmaxc; minmaxc= (double *)calloc(2*In_n, sizeof(double)); if((fpp = fopen("minmax.txt", "r")) == NULL) { printf("Cannot open 'parameter_file'.\n"); } for(j = 0; j < 2*In_n; j++) { if(fscanf(fpp, "%lf", &tmp) != EOF) { minmaxc[j] = tmp; } else { printf("Not enough data in 'input_parameter'!"); } } fclose(fpp); ////////////////////////////////////////////////////////////// restore_parameter(parameter_array); //output.c write_parameter(FINA_PARA_FILE); //output.c write_array(trnNMSE, epoch_n, TRAIN_ERR_FILE); //lib.c if (checking_data_n != 0) { //printf("testing 3 \n"); epoch_checking_error(checking_data_matrix, checking_data_n, chk_error_n, chk_error_un, training_data_n, chk_output, ep_n -1, minmaxc); //chk_err.c writes to tracking.txt //printf("testing 4 \n"); //chk_rmse_error[ep_n] = chk_error[Out_n]; min_chk_RMSE_n = chk_error_n[Out_n]; printf(" initial checking RMSE is %lf \n ", min_chk_RMSE_n); min_chk_RMSE_un = chk_error_un[Out_n]; //for (i=0; i<Out_n; i++) //printf("%3d \t %.11f \t %.11f\n", ep_n+1, trn_error[i], chk_error[i]); //printf("%3d \t %.11f \n", ep_n+1, trn_error[i]); //printf("%.11f\t %.11f\n", trn_error[Out_n],chk_error[Out_n]); //printf("%.11f\t \n", trn_error[Out_n]); //write_result(min_trn_RMSE_epoch ,Out_n,trn_rmse_error,chk_error); //debug.c writes to result.txt about the epoch number at which the stopping was done and the corresponding training RMSE and checking RMSE } //write_array(chk_rmse_error, epoch_n, CHECK_ERR_FILE); //lib.c //} write_array(step_size_array, epoch_n, STEP_SIZE_FILE); //lib.c /************************************************************************** min_chk_RMSE = chk_rmse_error[epoch_n -1]; min_chk_RMSE_epoch = epoch_n -1; for(j=0; j< epoch_n; j++) { if(chk_rmse_error[j]< min_chk_RMSE) { min_chk_RMSE = chk_rmse_error[j]; min_chk_RMSE_epoch = j; } } *************************************************************************/ /************************************************************** double minmaxc[2*In_n]; if((fpp = fopen("minmax.txt", "r")) == NULL) { printf("Cannot open 'parameter_file'.\n"); } for(j = 0; j < 2*In_n; j++) { if(fscanf(fpp, "%lf", &tmp) != EOF) { minmaxc[j] = tmp; } else { printf("Not enough data in 'input_parameter'!"); } } fclose(fpp); ***************************************************************************/ for(k=0; k< Out_n; k++) { for(j=0; j< checking_data_n; j++) { checking_data_matrix_un[j][k]= (checking_data_matrix[j][(k+1)*In_vect_n +k])* (minmaxc[(2*k) +1] - minmaxc[2*k]) + minmaxc[2*k]; } } // the following code calculates the cdavg_un and checking datat average bothe un normalized for(k=0; k< Out_n; k++) { for(j=0; j< checking_data_n; j++) { checking_data_average_un = checking_data_average_un + checking_data_matrix_un[j][k]; } cdavg_un[k]=checking_data_average_un/checking_data_n; checking_data_average_un_temp=checking_data_average_un_temp+checking_data_average_un; checking_data_average_un=0; } checking_data_average_un = checking_data_average_un_temp/(Out_n*checking_data_n); printf("%lf is the checking datat average non normalized\n", checking_data_average_un); // the following code calcuates the chkvar_un un normalized for(k=0; k< Out_n; k++) { for(j=0; j< checking_data_n; j++) { temp= temp + (checking_data_matrix_un[j][k] - cdavg_un[k])*(checking_data_matrix_un[j][k] - cdavg_un[k]); } chkvar_un[k]=temp/(checking_data_n-1); //checking_variance_un = checking_variance_un + temp; temp=0; } temp =0.0; // the following code cacluates the un normalized checking varinace for(j=0; j< checking_data_n; j++) { for(k=0; k< Out_n; k++) { temp = checking_data_matrix_un[j][k] - checking_data_average_un; temp = temp*temp; checking_variance_un = checking_variance_un + temp; } } checking_variance_un = checking_variance_un/((Out_n*checking_data_n)-1); printf("%lf is the checking variance non normalized \n", checking_variance_un); temp =0.0; checking_data_average_n=0.0; checking_data_average_n_temp=0.0; // the following code calculates the cdavg and checking data average both normalized for(k=0; k< Out_n; k++) { for(j=0; j< checking_data_n; j++) { checking_data_average_n = checking_data_average_n + checking_data_matrix[j][(k+1)*In_vect_n +k]; } cdavg[k]=checking_data_average_n/checking_data_n; checking_data_average_n_temp=checking_data_average_n_temp+checking_data_average_n; checking_data_average_n=0; } checking_data_average_n = checking_data_average_n_temp/(Out_n*checking_data_n); printf("%lf is the checking datat average normalized\n", checking_data_average_n); temp =0.0; checking_variance_n =0.0; // the following code cacluates the normalized checking varinace for(j=0; j< checking_data_n; j++) { for(k=0; k< Out_n; k++) { temp = checking_data_matrix[j][(k+1)*In_vect_n +k] - checking_data_average_n; temp = temp*temp; checking_variance_n = checking_variance_n + temp; } } checking_variance_n = checking_variance_n/((Out_n*checking_data_n)-1); temp = 0.0; printf("%lf is the checking variance normalized \n", checking_variance_n); // the following code calcuatres the normalized chkvar[k] temp=0.0; for(k=0; k< Out_n; k++) { for(j=0; j< checking_data_n; j++) { temp= temp + (checking_data_matrix[j][(k+1)*In_vect_n +k] - cdavg[k])*(checking_data_matrix[j][(k+1)*In_vect_n +k] - cdavg[k]); } chkvar[k]=temp/(checking_data_n-1); //checking_variance_n = checking_variance_n + temp; temp=0; } NMSE_un = min_chk_RMSE_un * min_chk_RMSE_un / checking_variance_un; NMSE_n = min_chk_RMSE_n * min_chk_RMSE_n / checking_variance_n; NMSE_n2 = min_chk_RMSE_n * min_chk_RMSE_n / chkvariance; NDEI_un = sqrt(NMSE_un); NDEI_n = sqrt(NMSE_n); for(k=0;k<Out_n;k++) { NMSE[k]=chk_error_n[k]*chk_error_n[k]/chkvar[k]; NDEI[k]=sqrt(NMSE[k]); unNMSE[k]=chk_error_un[k]*chk_error_un[k]/chkvar_un[k]; unNDEI[k]=sqrt(unNMSE[k]); } write_result(min_trn_RMSE_epoch ,Out_n,trn_rmse_error,chk_error_un, chk_error_n, NDEI_un, NMSE_un, NDEI_n, NMSE_n, NMSE, NDEI, unNMSE, unNDEI); //debug.c writes to result.txt about the epoch number at which the stopping was done and the corresponding training RMSE and checking RMSE printf("Minimum training RMSE is \t %f \t \n",min_trn_RMSE); printf("Minimum training RMSE epoch is \t %d \n",min_trn_RMSE_epoch); printf("Minimum training NMSE is \t %f \t \n",min_trnNMSE); //printf("Minimum training RMSE epoch is \t %d \n",min_trnNMSE_epoch); //printf("Minimum training RMSE is \t %f \t \n",min_trn_RMSE); //printf("Minimum training RMSE epoch is \t %d \n",min_trn_RMSE_epoch); printf("%f \t is the checking RMSE non normalized\n",min_chk_RMSE_un); printf("%f \t is the checking RMSE normalized\n",min_chk_RMSE_n); //printf("%f \t is the checking RMSE normalized22222222 \n",min_chk_RMSE_n2); printf(" checking NMSE non normlized is %f \t NDEI non normalized is %f \n",NMSE_un, NDEI_un); printf("checking NMSE normalized is %f \t NDEI normalized is %f \n",NMSE_n, NDEI_n); printf("checking NMSE2 normalized is %f \n",NMSE_n2); printf("traning data variance is %f \n",trnvariance); return(0);
/*! \brief * <pre> * Purpose * ======= * ilu_cdrop_row() - Drop some small rows from the previous * supernode (L-part only). * </pre> */ int ilu_cdrop_row( superlu_options_t *options, /* options */ int first, /* index of the first column in the supernode */ int last, /* index of the last column in the supernode */ double drop_tol, /* dropping parameter */ int quota, /* maximum nonzero entries allowed */ int *nnzLj, /* in/out number of nonzeros in L(:, 1:last) */ double *fill_tol, /* in/out - on exit, fill_tol=-num_zero_pivots, * does not change if options->ILU_MILU != SMILU1 */ GlobalLU_t *Glu, /* modified */ float swork[], /* working space * the length of swork[] should be no less than * the number of rows in the supernode */ float swork2[], /* working space with the same size as swork[], * used only by the second dropping rule */ int lastc /* if lastc == 0, there is nothing after the * working supernode [first:last]; * if lastc == 1, there is one more column after * the working supernode. */ ) { register int i, j, k, m1; register int nzlc; /* number of nonzeros in column last+1 */ register int xlusup_first, xlsub_first; int m, n; /* m x n is the size of the supernode */ int r = 0; /* number of dropped rows */ register float *temp; register complex *lusup = (complex *) Glu->lusup; register int *lsub = Glu->lsub; register int *xlsub = Glu->xlsub; register int *xlusup = Glu->xlusup; register float d_max = 0.0, d_min = 1.0; int drop_rule = options->ILU_DropRule; milu_t milu = options->ILU_MILU; norm_t nrm = options->ILU_Norm; complex zero = {0.0, 0.0}; complex one = {1.0, 0.0}; complex none = {-1.0, 0.0}; int i_1 = 1; int inc_diag; /* inc_diag = m + 1 */ int nzp = 0; /* number of zero pivots */ float alpha = pow((double)(Glu->n), -1.0 / options->ILU_MILU_Dim); xlusup_first = xlusup[first]; xlsub_first = xlsub[first]; m = xlusup[first + 1] - xlusup_first; n = last - first + 1; m1 = m - 1; inc_diag = m + 1; nzlc = lastc ? (xlusup[last + 2] - xlusup[last + 1]) : 0; temp = swork - n; /* Quick return if nothing to do. */ if (m == 0 || m == n || drop_rule == NODROP) { *nnzLj += m * n; return 0; } /* basic dropping: ILU(tau) */ for (i = n; i <= m1; ) { /* the average abs value of ith row */ switch (nrm) { case ONE_NORM: temp[i] = scasum_(&n, &lusup[xlusup_first + i], &m) / (double)n; break; case TWO_NORM: temp[i] = scnrm2_(&n, &lusup[xlusup_first + i], &m) / sqrt((double)n); break; case INF_NORM: default: k = icamax_(&n, &lusup[xlusup_first + i], &m) - 1; temp[i] = c_abs1(&lusup[xlusup_first + i + m * k]); break; } /* drop small entries due to drop_tol */ if (drop_rule & DROP_BASIC && temp[i] < drop_tol) { r++; /* drop the current row and move the last undropped row here */ if (r > 1) /* add to last row */ { /* accumulate the sum (for MILU) */ switch (milu) { case SMILU_1: case SMILU_2: caxpy_(&n, &one, &lusup[xlusup_first + i], &m, &lusup[xlusup_first + m - 1], &m); break; case SMILU_3: for (j = 0; j < n; j++) lusup[xlusup_first + (m - 1) + j * m].r += c_abs1(&lusup[xlusup_first + i + j * m]); break; case SILU: default: break; } ccopy_(&n, &lusup[xlusup_first + m1], &m, &lusup[xlusup_first + i], &m); } /* if (r > 1) */ else /* move to last row */ { cswap_(&n, &lusup[xlusup_first + m1], &m, &lusup[xlusup_first + i], &m); if (milu == SMILU_3) for (j = 0; j < n; j++) { lusup[xlusup_first + m1 + j * m].r = c_abs1(&lusup[xlusup_first + m1 + j * m]); lusup[xlusup_first + m1 + j * m].i = 0.0; } } lsub[xlsub_first + i] = lsub[xlsub_first + m1]; m1--; continue; } /* if dropping */ else { if (temp[i] > d_max) d_max = temp[i]; if (temp[i] < d_min) d_min = temp[i]; } i++; } /* for */ /* Secondary dropping: drop more rows according to the quota. */ quota = ceil((double)quota / (double)n); if (drop_rule & DROP_SECONDARY && m - r > quota) { register double tol = d_max; /* Calculate the second dropping tolerance */ if (quota > n) { if (drop_rule & DROP_INTERP) /* by interpolation */ { d_max = 1.0 / d_max; d_min = 1.0 / d_min; tol = 1.0 / (d_max + (d_min - d_max) * quota / (m - n - r)); } else /* by quick select */ { int len = m1 - n + 1; scopy_(&len, swork, &i_1, swork2, &i_1); tol = sqselect(len, swork2, quota - n); #if 0 register int *itemp = iwork - n; A = temp; for (i = n; i <= m1; i++) itemp[i] = i; qsort(iwork, m1 - n + 1, sizeof(int), _compare_); tol = temp[itemp[quota]]; #endif } } for (i = n; i <= m1; ) { if (temp[i] <= tol) { register int j; r++; /* drop the current row and move the last undropped row here */ if (r > 1) /* add to last row */ { /* accumulate the sum (for MILU) */ switch (milu) { case SMILU_1: case SMILU_2: caxpy_(&n, &one, &lusup[xlusup_first + i], &m, &lusup[xlusup_first + m - 1], &m); break; case SMILU_3: for (j = 0; j < n; j++) lusup[xlusup_first + (m - 1) + j * m].r += c_abs1(&lusup[xlusup_first + i + j * m]); break; case SILU: default: break; } ccopy_(&n, &lusup[xlusup_first + m1], &m, &lusup[xlusup_first + i], &m); } /* if (r > 1) */ else /* move to last row */ { cswap_(&n, &lusup[xlusup_first + m1], &m, &lusup[xlusup_first + i], &m); if (milu == SMILU_3) for (j = 0; j < n; j++) { lusup[xlusup_first + m1 + j * m].r = c_abs1(&lusup[xlusup_first + m1 + j * m]); lusup[xlusup_first + m1 + j * m].i = 0.0; } } lsub[xlsub_first + i] = lsub[xlsub_first + m1]; m1--; temp[i] = temp[m1]; continue; } i++; } /* for */ } /* if secondary dropping */ for (i = n; i < m; i++) temp[i] = 0.0; if (r == 0) { *nnzLj += m * n; return 0; } /* add dropped entries to the diagnal */ if (milu != SILU) { register int j; complex t; float omega; for (j = 0; j < n; j++) { t = lusup[xlusup_first + (m - 1) + j * m]; if (t.r == 0.0 && t.i == 0.0) continue; omega = SUPERLU_MIN(2.0 * (1.0 - alpha) / c_abs1(&t), 1.0); cs_mult(&t, &t, omega); switch (milu) { case SMILU_1: if ( !(c_eq(&t, &none)) ) { c_add(&t, &t, &one); cc_mult(&lusup[xlusup_first + j * inc_diag], &lusup[xlusup_first + j * inc_diag], &t); } else { cs_mult( &lusup[xlusup_first + j * inc_diag], &lusup[xlusup_first + j * inc_diag], *fill_tol); #ifdef DEBUG printf("[1] ZERO PIVOT: FILL col %d.\n", first + j); fflush(stdout); #endif nzp++; } break; case SMILU_2: cs_mult(&lusup[xlusup_first + j * inc_diag], &lusup[xlusup_first + j * inc_diag], 1.0 + c_abs1(&t)); break; case SMILU_3: c_add(&t, &t, &one); cc_mult(&lusup[xlusup_first + j * inc_diag], &lusup[xlusup_first + j * inc_diag], &t); break; case SILU: default: break; } } if (nzp > 0) *fill_tol = -nzp; } /* Remove dropped entries from the memory and fix the pointers. */ m1 = m - r; for (j = 1; j < n; j++) { register int tmp1, tmp2; tmp1 = xlusup_first + j * m1; tmp2 = xlusup_first + j * m; for (i = 0; i < m1; i++) lusup[i + tmp1] = lusup[i + tmp2]; } for (i = 0; i < nzlc; i++) lusup[xlusup_first + i + n * m1] = lusup[xlusup_first + i + n * m]; for (i = 0; i < nzlc; i++) lsub[xlsub[last + 1] - r + i] = lsub[xlsub[last + 1] + i]; for (i = first + 1; i <= last + 1; i++) { xlusup[i] -= r * (i - first); xlsub[i] -= r; } if (lastc) { xlusup[last + 2] -= r * n; xlsub[last + 2] -= r; } *nnzLj += (m - r) * n; return r; }
void schurFactorization(long n, complex **A, complex **T, complex **U) { /* Schur factorization: A = U*T*U', T = upper triangular, U = unitary */ long i,j,iter,maxIter; double tol, diff1,diff2; complex T11, T12, T21, T22; complex sigma1, sigma2, sigma; complex z, z1, z2; complex **P, **Q, **R; /* Allocate auxiliary matrices */ P = (complex **)Mem(MEM_ALLOC,n, sizeof(complex *)); Q = (complex **)Mem(MEM_ALLOC,n, sizeof(complex *)); R = (complex **)Mem(MEM_ALLOC,n, sizeof(complex *)); for (i=0; i<n; i++){ P[i] = (complex *)Mem(MEM_ALLOC,n, sizeof(complex)); Q[i] = (complex *)Mem(MEM_ALLOC,n, sizeof(complex)); R[i] = (complex *)Mem(MEM_ALLOC,n, sizeof(complex)); } /* ------------------------------------------------------------*/ /* Parameters for iteration */ maxIter = 500; tol = 1E-30; /* ------------------------------------------------------------*/ /* Init U = eye(n) (identity matrix) */ for (i=0; i<n; i++){ U[i][i].re = 1.0; U[i][i].im = 0.0; } /* ------------------------------------------------------------*/ /* Reduce A to Hessenberg form */ hessFactorization(n,A,P,T); /* ------------------------------------------------------------*/ /* Compute Schur factorization of Hessenberg matrix T */ for (j=n-1; j>0; j--){ /* Main loop */ for (iter=0; iter<maxIter; iter++){ /* Iteration loop */ sigma.re = T[j][j].re; sigma.im = T[j][j].im; /* -- Use Wilkinson shift -- */ /* submatrix considered in the shift */ T11 = T[j-1][j-1]; T12 = T[j-1][j]; T21 = T[j][j-1]; T22 = T[j][j]; /* Compute eigenvalues of submatrix */ z.re = 0.0; z.im = 0.0; z2.re = 0.0; z2.im = 0.0; /* z = T11*T11 + T22*T22 - 2*T11*T22 + 4*T12*T21 */ z1 = c_mul(T11,T11); z = c_add(z ,z1); z2 = c_add(z2,z1); z1 = c_mul(T22,T22); z = c_add(z ,z1); z2 = c_add(z2,z1); z1 = c_mul(T11,T22); z1.re = -2.0 * z1.re; z1.im = -2.0 * z1.im; z = c_add(z,z1); z1 = c_mul(T12,T21); z1.re = 4.0 * z1.re; z1.im = 4.0 * z1.im; z = c_add(z,z1); /* Square root*/ z = c_sqrt(z); /* Eigenvalues */ sigma1 = c_add(z2,z); sigma2 = c_sub(z2,z); /* printf("sigma1 = %e %e\n", sigma1.re, sigma1.im); */ /* printf("sigma2 = %e %e\n", sigma2.re, sigma2.im); */ /* Select eigenvalue for shift*/ diff1 = c_norm( c_sub(T[j][j], sigma1) ); diff2 = c_norm( c_sub(T[j][j], sigma2) ); if (diff1 < diff2){ sigma.re = sigma1.re; sigma.im = sigma1.im; }else{ sigma.re = sigma2.re; sigma.im = sigma2.im; } /* --- QR step with Wilkinson shift --- */ /* Shift: T(1:j,1:j) = T(1:j,1:j) - sigma * eye(j) */ for (i=0; i<j+1; i++){ CheckValue(FUNCTION_NAME, "T[i][i].re","", T[i][i].re, -INFTY, INFTY); CheckValue(FUNCTION_NAME, "T[i][i].im","", T[i][i].im, -INFTY, INFTY); T[i][i].re = T[i][i].re - sigma.re; T[i][i].im = T[i][i].im - sigma.im; } /* Compute QR factorization of shifted Hessenberg matrix */ for (i=0; i<n; i++){ memset(Q[i], 0, n*sizeof(complex)); memset(R[i], 0, n*sizeof(complex)); } QRfactorization(n,T,Q,R); /* T = T_new = R * Q */ for (i=0; i<n; i++){ memset(T[i], 0, n*sizeof(complex)); } matProduct(n, n, n, R, Q, T); /* T(1:j,1:j) = T(1:j,1:j) + sigma * eye(j) */ for (i=0; i<j+1; i++){ T[i][i].re = T[i][i].re + sigma.re; T[i][i].im = T[i][i].im + sigma.im; } /* R = U_new = U * Q */ for (i=0; i<n; i++){ memset(R[i], 0, n*sizeof(complex)); } matProduct(n,n,n,U,Q,R); /* U = R */ for (i=0; i<n; i++){ memcpy(U[i],R[i], n*sizeof(complex)); } /* Check convergence */ if (c_norm( T[j][j-1] ) <= tol * (c_norm(T[j-1][j-1]) + c_norm(T[j][j]))){ T[j][j-1].re = 0.0; T[j][j-1].im = 0.0; break; } } /* end of iter loop */ } /* end of main loop */ /* -------------------------------------------------------------*/ /* U = P*U */ for (i=0; i<n; i++){ memset(U[i], 0, n*sizeof(complex)); } matProduct(n,n,n,P,R,U); /* -------------------------------------------------------------*/ /* Free auxiliary variables */ for (i=0; i<n; i++){ Mem(MEM_FREE,P[i]); Mem(MEM_FREE,Q[i]); Mem(MEM_FREE,R[i]); } Mem(MEM_FREE,P); Mem(MEM_FREE,Q); Mem(MEM_FREE,R); /* Return */ return; }
void QRfactorization(long n, complex **A, complex **Q, complex **R) { /* QR factorization based on Householder transformations */ long i,j,k,m; double nrm; complex z,z1,z2; complex *vj; /* Init. Q = eye(n) (identity matrix) */ for (i=0; i<n; i++){ Q[i][i].re = 1.0; Q[i][i].im = 0.0; } /* Init. R = A */ for(j=0; j<n; j++){ for (i=0; i<n; i++){ R[i][j].re = A[i][j].re; R[i][j].im = A[i][j].im; CheckValue(FUNCTION_NAME, "A[i][j].re","", A[i][j].re, -INFTY, INFTY); CheckValue(FUNCTION_NAME, "A[i][j].im","", A[i][j].im, -INFTY, INFTY); } } /* Allocate auxiliary variables*/ vj = (complex *)Mem(MEM_ALLOC, n, sizeof(complex)); /* printf("begin calc, n = %d\n", n); */ /* ------------------------------------------------------------*/ for (j=0; j<n; j++){ /* Main loop */ /* R(j:end, j) */ for (i=j; i<n; i++){ vj[i-j].re = R[i][j].re; vj[i-j].im = R[i][j].im; } nrm = vectorNorm(n-j, vj); /* v(1) = v(1) + sign(R(j,j)) * norm(v) */ vj[0].re = vj[0].re + R[j][j].re / c_norm(R[j][j]) * nrm; vj[0].im = vj[0].im + R[j][j].im / c_norm(R[j][j]) * nrm; /* Update norm */ nrm = vectorNorm(n-j, vj); /* v = v./norm(v) */ for (i=0; i<n-j; i++){ vj[i].re = vj[i].re / nrm; vj[i].im = vj[i].im / nrm; } /* Update */ /* R(j:end, :) = R(j:end,:) - 2 * vj * vj' * R(j:end,:), : */ /* Q(:,j:end) = Q(:,j:end) - 2 * Q(:,j:end) * vj * vj^T */ for (k=0; k<n; k++){ /* (v * v' * A)_ik = v_i SUM_m Conj(v_m) A_mk */ z.re = 0.0; z.im = 0.0; for (m=j; m<n; m++){ z1 = c_con(vj[m-j]); z1 = c_mul(z1, R[m][k]); z = c_add(z,z1); } for (i=j; i<n; i++){ z2 = c_mul(vj[i-j],z); /* Update R(i,k) */ R[i][k].re = R[i][k].re - 2.0 * z2.re; R[i][k].im = R[i][k].im - 2.0 * z2.im; CheckValue(FUNCTION_NAME, "R[i][k].re","", R[i][k].re, -INFTY, INFTY); CheckValue(FUNCTION_NAME, "R[i][k].im","", R[i][k].im, -INFTY, INFTY); } /* (A * v * v^')_ki = v_i * SUM_m Conj(v_m) A_km */ z.re = 0.0; z.im = 0.0; for (m=j; m<n; m++){ z1 = vj[m-j]; z1 = c_mul(z1, Q[k][m]); z = c_add(z,z1); } for (i=j; i<n; i++){ z1 = c_con(vj[i-j]); z2 = c_mul(z1,z); /* Update Q(k,i)*/ Q[k][i].re = Q[k][i].re - 2.0 * z2.re; Q[k][i].im = Q[k][i].im - 2.0 * z2.im; CheckValue(FUNCTION_NAME, "Q[k][i].re","", Q[k][i].re, -INFTY, INFTY); CheckValue(FUNCTION_NAME, "Q[k][i].im","", Q[k][i].im, -INFTY, INFTY); } } } /* End of main loop (j) */ /* -------------------------------------------------------------*/ /* Free auxiliary variables */ Mem(MEM_FREE, vj); return; }
int ilu_ccopy_to_ucol( int jcol, /* in */ int nseg, /* in */ int *segrep, /* in */ int *repfnz, /* in */ int *perm_r, /* in */ complex *dense, /* modified - reset to zero on return */ int drop_rule,/* in */ milu_t milu, /* in */ double drop_tol, /* in */ int quota, /* maximum nonzero entries allowed */ complex *sum, /* out - the sum of dropped entries */ int *nnzUj, /* in - out */ GlobalLU_t *Glu, /* modified */ float *work /* working space with minimum size n, * used by the second dropping rule */ ) { /* * Gather from SPA dense[*] to global ucol[*]. */ int ksub, krep, ksupno; int i, k, kfnz, segsze; int fsupc, isub, irow; int jsupno, nextu; int new_next, mem_error; int *xsup, *supno; int *lsub, *xlsub; complex *ucol; int *usub, *xusub; int nzumax; int m; /* number of entries in the nonzero U-segments */ register float d_max = 0.0, d_min = 1.0 / dlamch_("Safe minimum"); register double tmp; complex zero = {0.0, 0.0}; int i_1 = 1; xsup = Glu->xsup; supno = Glu->supno; lsub = Glu->lsub; xlsub = Glu->xlsub; ucol = Glu->ucol; usub = Glu->usub; xusub = Glu->xusub; nzumax = Glu->nzumax; *sum = zero; if (drop_rule == NODROP) { drop_tol = -1.0, quota = Glu->n; } jsupno = supno[jcol]; nextu = xusub[jcol]; k = nseg - 1; for (ksub = 0; ksub < nseg; ksub++) { krep = segrep[k--]; ksupno = supno[krep]; if ( ksupno != jsupno ) { /* Should go into ucol[] */ kfnz = repfnz[krep]; if ( kfnz != EMPTY ) { /* Nonzero U-segment */ fsupc = xsup[ksupno]; isub = xlsub[fsupc] + kfnz - fsupc; segsze = krep - kfnz + 1; new_next = nextu + segsze; while ( new_next > nzumax ) { if ((mem_error = cLUMemXpand(jcol, nextu, UCOL, &nzumax, Glu)) != 0) return (mem_error); ucol = Glu->ucol; if ((mem_error = cLUMemXpand(jcol, nextu, USUB, &nzumax, Glu)) != 0) return (mem_error); usub = Glu->usub; lsub = Glu->lsub; } for (i = 0; i < segsze; i++) { irow = lsub[isub++]; tmp = c_abs1(&dense[irow]); /* first dropping rule */ if (quota > 0 && tmp >= drop_tol) { if (tmp > d_max) d_max = tmp; if (tmp < d_min) d_min = tmp; usub[nextu] = perm_r[irow]; ucol[nextu] = dense[irow]; nextu++; } else { switch (milu) { case SMILU_1: case SMILU_2: c_add(sum, sum, &dense[irow]); break; case SMILU_3: /* *sum += fabs(dense[irow]);*/ sum->r += tmp; break; case SILU: default: break; } #ifdef DEBUG num_drop_U++; #endif } dense[irow] = zero; } } } } /* for each segment... */ xusub[jcol + 1] = nextu; /* Close U[*,jcol] */ m = xusub[jcol + 1] - xusub[jcol]; /* second dropping rule */ if (drop_rule & DROP_SECONDARY && m > quota) { register double tol = d_max; register int m0 = xusub[jcol] + m - 1; if (quota > 0) { if (drop_rule & DROP_INTERP) { d_max = 1.0 / d_max; d_min = 1.0 / d_min; tol = 1.0 / (d_max + (d_min - d_max) * quota / m); } else { i_1 = xusub[jcol]; for (i = 0; i < m; ++i, ++i_1) work[i] = c_abs1(&ucol[i_1]); tol = sqselect(m, work, quota); #if 0 A = &ucol[xusub[jcol]]; for (i = 0; i < m; i++) work[i] = i; qsort(work, m, sizeof(int), _compare_); tol = fabs(usub[xusub[jcol] + work[quota]]); #endif } } for (i = xusub[jcol]; i <= m0; ) { if (c_abs1(&ucol[i]) <= tol) { switch (milu) { case SMILU_1: case SMILU_2: c_add(sum, sum, &ucol[i]); break; case SMILU_3: sum->r += tmp; break; case SILU: default: break; } ucol[i] = ucol[m0]; usub[i] = usub[m0]; m0--; m--; #ifdef DEBUG num_drop_U++; #endif xusub[jcol + 1]--; continue; } i++; } } if (milu == SMILU_2) { sum->r = c_abs1(sum); sum->i = 0.0; } if (milu == SMILU_3) sum->i = 0.0; *nnzUj += m; return 0; }
/*! \brief Performs one of the matrix-vector operations y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y * * <pre> * Purpose * ======= * * sp_cgemv() performs one of the matrix-vector operations * y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, * where alpha and beta are scalars, x and y are vectors and A is a * sparse A->nrow by A->ncol matrix. * * Parameters * ========== * * TRANS - (input) char* * On entry, TRANS specifies the operation to be performed as * follows: * TRANS = 'N' or 'n' y := alpha*A*x + beta*y. * TRANS = 'T' or 't' y := alpha*A'*x + beta*y. * TRANS = 'C' or 'c' y := alpha*A^H*x + beta*y. * * ALPHA - (input) complex * On entry, ALPHA specifies the scalar alpha. * * A - (input) SuperMatrix* * Before entry, the leading m by n part of the array A must * contain the matrix of coefficients. * * X - (input) complex*, array of DIMENSION at least * ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' * and at least * ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. * Before entry, the incremented array X must contain the * vector x. * * INCX - (input) int * On entry, INCX specifies the increment for the elements of * X. INCX must not be zero. * * BETA - (input) complex * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then Y need not be set on input. * * Y - (output) complex*, array of DIMENSION at least * ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' * and at least * ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. * Before entry with BETA non-zero, the incremented array Y * must contain the vector y. On exit, Y is overwritten by the * updated vector y. * * INCY - (input) int * On entry, INCY specifies the increment for the elements of * Y. INCY must not be zero. * * ==== Sparse Level 2 Blas routine. * </pre> */ int sp_cgemv(char *trans, complex alpha, SuperMatrix *A, complex *x, int incx, complex beta, complex *y, int incy) { /* Local variables */ NCformat *Astore; complex *Aval; int info; complex temp, temp1; int lenx, leny, i, j, irow; int iy, jx, jy, kx, ky; int notran; complex comp_zero = {0.0, 0.0}; complex comp_one = {1.0, 0.0}; notran = ( strncmp(trans, "N", 1)==0 || strncmp(trans, "n", 1)==0 ); Astore = A->Store; Aval = Astore->nzval; /* Test the input parameters */ info = 0; if ( !notran && strncmp(trans, "T", 1)!=0 && strncmp(trans, "C", 1)!=0) info = 1; else if ( A->nrow < 0 || A->ncol < 0 ) info = 3; else if (incx == 0) info = 5; else if (incy == 0) info = 8; if (info != 0) { input_error("sp_cgemv ", &info); return 0; } /* Quick return if possible. */ if (A->nrow == 0 || A->ncol == 0 || c_eq(&alpha, &comp_zero) && c_eq(&beta, &comp_one)) return 0; /* Set LENX and LENY, the lengths of the vectors x and y, and set up the start points in X and Y. */ if ( notran ) { lenx = A->ncol; leny = A->nrow; } else { lenx = A->nrow; leny = A->ncol; } if (incx > 0) kx = 0; else kx = - (lenx - 1) * incx; if (incy > 0) ky = 0; else ky = - (leny - 1) * incy; /* Start the operations. In this version the elements of A are accessed sequentially with one pass through A. */ /* First form y := beta*y. */ if ( !c_eq(&beta, &comp_one) ) { if (incy == 1) { if ( c_eq(&beta, &comp_zero) ) for (i = 0; i < leny; ++i) y[i] = comp_zero; else for (i = 0; i < leny; ++i) cc_mult(&y[i], &beta, &y[i]); } else { iy = ky; if ( c_eq(&beta, &comp_zero) ) for (i = 0; i < leny; ++i) { y[iy] = comp_zero; iy += incy; } else for (i = 0; i < leny; ++i) { cc_mult(&y[iy], &beta, &y[iy]); iy += incy; } } } if ( c_eq(&alpha, &comp_zero) ) return 0; if ( notran ) { /* Form y := alpha*A*x + y. */ jx = kx; if (incy == 1) { for (j = 0; j < A->ncol; ++j) { if ( !c_eq(&x[jx], &comp_zero) ) { cc_mult(&temp, &alpha, &x[jx]); for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { irow = Astore->rowind[i]; cc_mult(&temp1, &temp, &Aval[i]); c_add(&y[irow], &y[irow], &temp1); } } jx += incx; } } else { ABORT("Not implemented."); } } else if (strncmp(trans, "T", 1) == 0 || strncmp(trans, "t", 1) == 0) { /* Form y := alpha*A'*x + y. */ jy = ky; if (incx == 1) { for (j = 0; j < A->ncol; ++j) { temp = comp_zero; for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { irow = Astore->rowind[i]; cc_mult(&temp1, &Aval[i], &x[irow]); c_add(&temp, &temp, &temp1); } cc_mult(&temp1, &alpha, &temp); c_add(&y[jy], &y[jy], &temp1); jy += incy; } } else { ABORT("Not implemented."); } } else { /* trans == 'C' or 'c' */ /* Form y := alpha * conj(A) * x + y. */ complex temp2; jy = ky; if (incx == 1) { for (j = 0; j < A->ncol; ++j) { temp = comp_zero; for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { irow = Astore->rowind[i]; temp2.r = Aval[i].r; temp2.i = -Aval[i].i; /* conjugation */ cc_mult(&temp1, &temp2, &x[irow]); c_add(&temp, &temp, &temp1); } cc_mult(&temp1, &alpha, &temp); c_add(&y[jy], &y[jy], &temp1); jy += incy; } } else { ABORT("Not implemented."); } } return 0; } /* sp_cgemv */
int pcgstrf_column_bmod( const int pnum, /* process number */ const int jcol, /* current column in the panel */ const int fpanelc,/* first column in the panel */ const int nseg, /* number of s-nodes to update jcol */ int *segrep,/* in */ int *repfnz,/* in */ complex *dense, /* modified */ complex *tempv, /* working array */ pxgstrf_shared_t *pxgstrf_shared, /* modified */ Gstat_t *Gstat /* modified */ ) { /* * -- SuperLU MT routine (version 2.0) -- * Lawrence Berkeley National Lab, Univ. of California Berkeley, * and Xerox Palo Alto Research Center. * September 10, 2007 * * Purpose: * ======== * Performs numeric block updates (sup-col) in topological order. * It features: col-col, 2cols-col, 3cols-col, and sup-col updates. * Special processing on the supernodal portion of L\U[*,j]. * * Return value: * ============= * 0 - successful return * > 0 - number of bytes allocated when run out of space * */ #if ( MACH==CRAY_PVP ) _fcd ftcs1 = _cptofcd("L", strlen("L")), ftcs2 = _cptofcd("N", strlen("N")), ftcs3 = _cptofcd("U", strlen("U")); #endif #ifdef USE_VENDOR_BLAS int incx = 1, incy = 1; complex alpha, beta; #endif GlobalLU_t *Glu = pxgstrf_shared->Glu; /* modified */ /* krep = representative of current k-th supernode * fsupc = first supernodal column * nsupc = no of columns in supernode * nsupr = no of rows in supernode (used as leading dimension) * luptr = location of supernodal LU-block in storage * kfnz = first nonz in the k-th supernodal segment * no_zeros = no of leading zeros in a supernodal U-segment */ complex ukj, ukj1, ukj2; register int lptr, kfnz, isub, irow, i, no_zeros; register int luptr, luptr1, luptr2; int fsupc, nsupc, nsupr, segsze; int nrow; /* No of rows in the matrix of matrix-vector */ int jsupno, k, ksub, krep, krep_ind, ksupno; int ufirst, nextlu; int fst_col; /* First column within small LU update */ int d_fsupc; /* Distance between the first column of the current panel and the first column of the current snode.*/ int *xsup, *supno; int *lsub, *xlsub, *xlsub_end; complex *lusup; int *xlusup, *xlusup_end; complex *tempv1; int mem_error; register float flopcnt; complex zero = {0.0, 0.0}; complex one = {1.0, 0.0}; complex none = {-1.0, 0.0}; complex comp_temp, comp_temp1; xsup = Glu->xsup; supno = Glu->supno; lsub = Glu->lsub; xlsub = Glu->xlsub; xlsub_end = Glu->xlsub_end; lusup = Glu->lusup; xlusup = Glu->xlusup; xlusup_end = Glu->xlusup_end; jsupno = supno[jcol]; /* * For each nonz supernode segment of U[*,j] in topological order */ k = nseg - 1; for (ksub = 0; ksub < nseg; ksub++) { krep = segrep[k]; k--; ksupno = supno[krep]; #if ( DEBUGlvel>=2 ) if (jcol==BADCOL) printf("(%d) pcgstrf_column_bmod[1]: %d, nseg %d, krep %d, jsupno %d, ksupno %d\n", pnum, jcol, nseg, krep, jsupno, ksupno); #endif if ( jsupno != ksupno ) { /* Outside the rectangular supernode */ fsupc = xsup[ksupno]; fst_col = SUPERLU_MAX ( fsupc, fpanelc ); /* Distance from the current supernode to the current panel; d_fsupc=0 if fsupc >= fpanelc. */ d_fsupc = fst_col - fsupc; luptr = xlusup[fst_col] + d_fsupc; lptr = xlsub[fsupc] + d_fsupc; kfnz = repfnz[krep]; kfnz = SUPERLU_MAX ( kfnz, fpanelc ); segsze = krep - kfnz + 1; nsupc = krep - fst_col + 1; nsupr = xlsub_end[fsupc] - xlsub[fsupc]; /* Leading dimension */ nrow = nsupr - d_fsupc - nsupc; krep_ind = lptr + nsupc - 1; flopcnt = segsze * (segsze - 1) + 2 * nrow * segsze;//sj Gstat->procstat[pnum].fcops += flopcnt; #if ( DEBUGlevel>=2 ) if (jcol==BADCOL) printf("(%d) pcgstrf_column_bmod[2]: %d, krep %d, kfnz %d, segsze %d, d_fsupc %d,\ fsupc %d, nsupr %d, nsupc %d\n", pnum, jcol, krep, kfnz, segsze, d_fsupc, fsupc, nsupr, nsupc); #endif /* * Case 1: Update U-segment of size 1 -- col-col update */ if ( segsze == 1 ) { ukj = dense[lsub[krep_ind]]; luptr += nsupr*(nsupc-1) + nsupc; for (i = lptr + nsupc; i < xlsub_end[fsupc]; ++i) { irow = lsub[i]; cc_mult(&comp_temp, &ukj, &lusup[luptr]); c_sub(&dense[irow], &dense[irow], &comp_temp); luptr++; } } else if ( segsze <= 3 ) { ukj = dense[lsub[krep_ind]]; luptr += nsupr*(nsupc-1) + nsupc-1; ukj1 = dense[lsub[krep_ind - 1]]; luptr1 = luptr - nsupr; if ( segsze == 2 ) { /* Case 2: 2cols-col update */ cc_mult(&comp_temp, &ukj1, &lusup[luptr1]); c_sub(&ukj, &ukj, &comp_temp); dense[lsub[krep_ind]] = ukj; for (i = lptr + nsupc; i < xlsub_end[fsupc]; ++i) { irow = lsub[i]; luptr++; luptr1++; cc_mult(&comp_temp, &ukj, &lusup[luptr]); cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]); c_add(&comp_temp, &comp_temp, &comp_temp1); c_sub(&dense[irow], &dense[irow], &comp_temp); } } else { /* Case 3: 3cols-col update */ ukj2 = dense[lsub[krep_ind - 2]]; luptr2 = luptr1 - nsupr; cc_mult(&comp_temp, &ukj2, &lusup[luptr2-1]); c_sub(&ukj1, &ukj1, &comp_temp); cc_mult(&comp_temp, &ukj1, &lusup[luptr1]); cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]); c_add(&comp_temp, &comp_temp, &comp_temp1); c_sub(&ukj, &ukj, &comp_temp); dense[lsub[krep_ind]] = ukj; dense[lsub[krep_ind-1]] = ukj1; for (i = lptr + nsupc; i < xlsub_end[fsupc]; ++i) { irow = lsub[i]; luptr++; luptr1++; luptr2++; cc_mult(&comp_temp, &ukj, &lusup[luptr]); cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]); c_add(&comp_temp, &comp_temp, &comp_temp1); cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]); c_add(&comp_temp, &comp_temp, &comp_temp1); c_sub(&dense[irow], &dense[irow], &comp_temp); } } } else { /* * Case: sup-col update * Perform a triangular solve and block update, * then scatter the result of sup-col update to dense */ no_zeros = kfnz - fst_col; /* Copy U[*,j] segment from dense[*] to tempv[*] */ isub = lptr + no_zeros; for (i = 0; i < segsze; i++) { irow = lsub[isub]; tempv[i] = dense[irow]; ++isub; } /* Dense triangular solve -- start effective triangle */ luptr += nsupr * no_zeros + no_zeros; #ifdef USE_VENDOR_BLAS #if ( MACH==CRAY_PVP ) CTRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], &nsupr, tempv, &incx ); #else ctrsv_( "L", "N", "U", &segsze, &lusup[luptr], &nsupr, tempv, &incx ); #endif luptr += segsze; /* Dense matrix-vector */ tempv1 = &tempv[segsze]; alpha = one; beta = zero; #if ( MACH==CRAY_PVP ) CGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr], &nsupr, tempv, &incx, &beta, tempv1, &incy ); #else cgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr], &nsupr, tempv, &incx, &beta, tempv1, &incy ); #endif #else clsolve ( nsupr, segsze, &lusup[luptr], tempv ); luptr += segsze; /* Dense matrix-vector */ tempv1 = &tempv[segsze]; cmatvec (nsupr, nrow , segsze, &lusup[luptr], tempv, tempv1); #endif /* Scatter tempv[] into SPA dense[*] */ isub = lptr + no_zeros; for (i = 0; i < segsze; i++) { irow = lsub[isub]; dense[irow] = tempv[i]; /* Scatter */ tempv[i] = zero; isub++; } /* Scatter tempv1[] into SPA dense[*] */ for (i = 0; i < nrow; i++) { irow = lsub[isub]; c_sub(&dense[irow], &dense[irow], &tempv1[i]); tempv1[i] = zero; ++isub; } } /* else segsze >= 4 */ } /* if jsupno ... */ } /* for each segment... */ /* ------------------------------------------ Process the supernodal portion of L\U[*,j] ------------------------------------------ */ fsupc = SUPER_FSUPC (jsupno); nsupr = xlsub_end[fsupc] - xlsub[fsupc]; if ( (mem_error = Glu_alloc(pnum, jcol, nsupr, LUSUP, &nextlu, pxgstrf_shared)) ) return mem_error; xlusup[jcol] = nextlu; lusup = Glu->lusup; /* Gather the nonzeros from SPA dense[*,j] into L\U[*,j] */ for (isub = xlsub[fsupc]; isub < xlsub_end[fsupc]; ++isub) { irow = lsub[isub]; lusup[nextlu] = dense[irow]; dense[irow] = zero; #ifdef DEBUG if (jcol == -1) printf("(%d) pcgstrf_column_bmod[lusup] jcol %d, irow %d, lusup %.10e\n", pnum, jcol, irow, lusup[nextlu]); #endif ++nextlu; } xlusup_end[jcol] = nextlu; /* close L\U[*,jcol] */ #if ( DEBUGlevel>=2 ) if (jcol == -1) { nrow = xlusup_end[jcol] - xlusup[jcol]; print_double_vec("before sup-col update", nrow, &lsub[xlsub[fsupc]], &lusup[xlusup[jcol]]); } #endif /* * For more updates within the panel (also within the current supernode), * should start from the first column of the panel, or the first column * of the supernode, whichever is bigger. There are 2 cases: * (1) fsupc < fpanelc, then fst_col := fpanelc * (2) fsupc >= fpanelc, then fst_col := fsupc */ fst_col = SUPERLU_MAX ( fsupc, fpanelc ); if ( fst_col < jcol ) { /* distance between the current supernode and the current panel; d_fsupc=0 if fsupc >= fpanelc. */ d_fsupc = fst_col - fsupc; lptr = xlsub[fsupc] + d_fsupc; luptr = xlusup[fst_col] + d_fsupc; nsupr = xlsub_end[fsupc] - xlsub[fsupc]; /* Leading dimension */ nsupc = jcol - fst_col; /* Excluding jcol */ nrow = nsupr - d_fsupc - nsupc; /* points to the beginning of jcol in supernode L\U[*,jsupno] */ ufirst = xlusup[jcol] + d_fsupc; #if ( DEBUGlevel>=2 ) if (jcol==BADCOL) printf("(%d) pcgstrf_column_bmod[3] jcol %d, fsupc %d, nsupr %d, nsupc %d, nrow %d\n", pnum, jcol, fsupc, nsupr, nsupc, nrow); #endif flopcnt = nsupc * (nsupc - 1) + 2 * nrow * nsupc; //sj Gstat->procstat[pnum].fcops += flopcnt; /* ops[TRSV] += nsupc * (nsupc - 1); ops[GEMV] += 2 * nrow * nsupc; */ #ifdef USE_VENDOR_BLAS alpha = none; beta = one; /* y := beta*y + alpha*A*x */ #if ( MACH==CRAY_PVP ) CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], &nsupr, &lusup[ufirst], &incx ); CGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy ); #else ctrsv_( "L", "N", "U", &nsupc, &lusup[luptr], &nsupr, &lusup[ufirst], &incx ); cgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy ); #endif #else clsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] ); cmatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc], &lusup[ufirst], tempv ); /* Copy updates from tempv[*] into lusup[*] */ isub = ufirst + nsupc; for (i = 0; i < nrow; i++) { c_sub(&lusup[isub], &lusup[isub], &tempv[i]); tempv[i] = zero; ++isub; } #endif } /* if fst_col < jcol ... */ return 0; }
void cpanel_bmod ( const int m, /* in - number of rows in the matrix */ const int w, /* in */ const int jcol, /* in */ const int nseg, /* in */ complex *dense, /* out, of size n by w */ complex *tempv, /* working array */ int *segrep, /* in */ int *repfnz, /* in, of size n by w */ GlobalLU_t *Glu, /* modified */ SuperLUStat_t *stat /* output */ ) { #ifdef USE_VENDOR_BLAS #ifdef _CRAY _fcd ftcs1 = _cptofcd("L", strlen("L")), ftcs2 = _cptofcd("N", strlen("N")), ftcs3 = _cptofcd("U", strlen("U")); #endif int incx = 1, incy = 1; complex alpha, beta; #endif register int k, ksub; int fsupc, nsupc, nsupr, nrow; int krep, krep_ind; complex ukj, ukj1, ukj2; int luptr, luptr1, luptr2; int segsze; int block_nrow; /* no of rows in a block row */ register int lptr; /* Points to the row subscripts of a supernode */ int kfnz, irow, no_zeros; register int isub, isub1, i; register int jj; /* Index through each column in the panel */ int *xsup, *supno; int *lsub, *xlsub; complex *lusup; int *xlusup; int *repfnz_col; /* repfnz[] for a column in the panel */ complex *dense_col; /* dense[] for a column in the panel */ complex *tempv1; /* Used in 1-D update */ complex *TriTmp, *MatvecTmp; /* used in 2-D update */ complex zero = {0.0, 0.0}; complex one = {1.0, 0.0}; complex comp_temp, comp_temp1; register int ldaTmp; register int r_ind, r_hi; static int first = 1, maxsuper, rowblk, colblk; flops_t *ops = stat->ops; xsup = Glu->xsup; supno = Glu->supno; lsub = Glu->lsub; xlsub = Glu->xlsub; lusup = Glu->lusup; xlusup = Glu->xlusup; if ( first ) { maxsuper = SUPERLU_MAX( sp_ienv(3), sp_ienv(7) ); rowblk = sp_ienv(4); colblk = sp_ienv(5); first = 0; } ldaTmp = maxsuper + rowblk; /* * For each nonz supernode segment of U[*,j] in topological order */ k = nseg - 1; for (ksub = 0; ksub < nseg; ksub++) { /* for each updating supernode */ /* krep = representative of current k-th supernode * fsupc = first supernodal column * nsupc = no of columns in a supernode * nsupr = no of rows in a supernode */ krep = segrep[k--]; fsupc = xsup[supno[krep]]; nsupc = krep - fsupc + 1; nsupr = xlsub[fsupc+1] - xlsub[fsupc]; nrow = nsupr - nsupc; lptr = xlsub[fsupc]; krep_ind = lptr + nsupc - 1; repfnz_col = repfnz; dense_col = dense; if ( nsupc >= colblk && nrow > rowblk ) { /* 2-D block update */ TriTmp = tempv; /* Sequence through each column in panel -- triangular solves */ for (jj = jcol; jj < jcol + w; jj++, repfnz_col += m, dense_col += m, TriTmp += ldaTmp ) { kfnz = repfnz_col[krep]; if ( kfnz == EMPTY ) continue; /* Skip any zero segment */ segsze = krep - kfnz + 1; luptr = xlusup[fsupc]; ops[TRSV] += 4 * segsze * (segsze - 1); ops[GEMV] += 8 * nrow * segsze; /* Case 1: Update U-segment of size 1 -- col-col update */ if ( segsze == 1 ) { ukj = dense_col[lsub[krep_ind]]; luptr += nsupr*(nsupc-1) + nsupc; for (i = lptr + nsupc; i < xlsub[fsupc+1]; i++) { irow = lsub[i]; cc_mult(&comp_temp, &ukj, &lusup[luptr]); c_sub(&dense_col[irow], &dense_col[irow], &comp_temp); ++luptr; } } else if ( segsze <= 3 ) { ukj = dense_col[lsub[krep_ind]]; ukj1 = dense_col[lsub[krep_ind - 1]]; luptr += nsupr*(nsupc-1) + nsupc-1; luptr1 = luptr - nsupr; if ( segsze == 2 ) { cc_mult(&comp_temp, &ukj1, &lusup[luptr1]); c_sub(&ukj, &ukj, &comp_temp); dense_col[lsub[krep_ind]] = ukj; for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) { irow = lsub[i]; luptr++; luptr1++; cc_mult(&comp_temp, &ukj, &lusup[luptr]); cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]); c_add(&comp_temp, &comp_temp, &comp_temp1); c_sub(&dense_col[irow], &dense_col[irow], &comp_temp); } } else { ukj2 = dense_col[lsub[krep_ind - 2]]; luptr2 = luptr1 - nsupr; cc_mult(&comp_temp, &ukj2, &lusup[luptr2-1]); c_sub(&ukj1, &ukj1, &comp_temp); cc_mult(&comp_temp, &ukj1, &lusup[luptr1]); cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]); c_add(&comp_temp, &comp_temp, &comp_temp1); c_sub(&ukj, &ukj, &comp_temp); dense_col[lsub[krep_ind]] = ukj; dense_col[lsub[krep_ind-1]] = ukj1; for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) { irow = lsub[i]; luptr++; luptr1++; luptr2++; cc_mult(&comp_temp, &ukj, &lusup[luptr]); cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]); c_add(&comp_temp, &comp_temp, &comp_temp1); cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]); c_add(&comp_temp, &comp_temp, &comp_temp1); c_sub(&dense_col[irow], &dense_col[irow], &comp_temp); } } } else { /* segsze >= 4 */ /* Copy U[*,j] segment from dense[*] to TriTmp[*], which holds the result of triangular solves. */ no_zeros = kfnz - fsupc; isub = lptr + no_zeros; for (i = 0; i < segsze; ++i) { irow = lsub[isub]; TriTmp[i] = dense_col[irow]; /* Gather */ ++isub; } /* start effective triangle */ luptr += nsupr * no_zeros + no_zeros; #ifdef USE_VENDOR_BLAS #ifdef _CRAY CTRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], &nsupr, TriTmp, &incx ); #else ctrsv_( "L", "N", "U", &segsze, &lusup[luptr], &nsupr, TriTmp, &incx ); #endif #else clsolve ( nsupr, segsze, &lusup[luptr], TriTmp ); #endif } /* else ... */ } /* for jj ... end tri-solves */ /* Block row updates; push all the way into dense[*] block */ for ( r_ind = 0; r_ind < nrow; r_ind += rowblk ) { r_hi = SUPERLU_MIN(nrow, r_ind + rowblk); block_nrow = SUPERLU_MIN(rowblk, r_hi - r_ind); luptr = xlusup[fsupc] + nsupc + r_ind; isub1 = lptr + nsupc + r_ind; repfnz_col = repfnz; TriTmp = tempv; dense_col = dense; /* Sequence through each column in panel -- matrix-vector */ for (jj = jcol; jj < jcol + w; jj++, repfnz_col += m, dense_col += m, TriTmp += ldaTmp) { kfnz = repfnz_col[krep]; if ( kfnz == EMPTY ) continue; /* Skip any zero segment */ segsze = krep - kfnz + 1; if ( segsze <= 3 ) continue; /* skip unrolled cases */ /* Perform a block update, and scatter the result of matrix-vector to dense[]. */ no_zeros = kfnz - fsupc; luptr1 = luptr + nsupr * no_zeros; MatvecTmp = &TriTmp[maxsuper]; #ifdef USE_VENDOR_BLAS alpha = one; beta = zero; #ifdef _CRAY CGEMV(ftcs2, &block_nrow, &segsze, &alpha, &lusup[luptr1], &nsupr, TriTmp, &incx, &beta, MatvecTmp, &incy); #else cgemv_("N", &block_nrow, &segsze, &alpha, &lusup[luptr1], &nsupr, TriTmp, &incx, &beta, MatvecTmp, &incy); #endif #else cmatvec(nsupr, block_nrow, segsze, &lusup[luptr1], TriTmp, MatvecTmp); #endif /* Scatter MatvecTmp[*] into SPA dense[*] temporarily * such that MatvecTmp[*] can be re-used for the * the next blok row update. dense[] will be copied into * global store after the whole panel has been finished. */ isub = isub1; for (i = 0; i < block_nrow; i++) { irow = lsub[isub]; c_sub(&dense_col[irow], &dense_col[irow], &MatvecTmp[i]); MatvecTmp[i] = zero; ++isub; } } /* for jj ... */ } /* for each block row ... */ /* Scatter the triangular solves into SPA dense[*] */ repfnz_col = repfnz; TriTmp = tempv; dense_col = dense; for (jj = jcol; jj < jcol + w; jj++, repfnz_col += m, dense_col += m, TriTmp += ldaTmp) { kfnz = repfnz_col[krep]; if ( kfnz == EMPTY ) continue; /* Skip any zero segment */ segsze = krep - kfnz + 1; if ( segsze <= 3 ) continue; /* skip unrolled cases */ no_zeros = kfnz - fsupc; isub = lptr + no_zeros; for (i = 0; i < segsze; i++) { irow = lsub[isub]; dense_col[irow] = TriTmp[i]; TriTmp[i] = zero; ++isub; } } /* for jj ... */ } else { /* 1-D block modification */ /* Sequence through each column in the panel */ for (jj = jcol; jj < jcol + w; jj++, repfnz_col += m, dense_col += m) { kfnz = repfnz_col[krep]; if ( kfnz == EMPTY ) continue; /* Skip any zero segment */ segsze = krep - kfnz + 1; luptr = xlusup[fsupc]; ops[TRSV] += 4 * segsze * (segsze - 1); ops[GEMV] += 8 * nrow * segsze; /* Case 1: Update U-segment of size 1 -- col-col update */ if ( segsze == 1 ) { ukj = dense_col[lsub[krep_ind]]; luptr += nsupr*(nsupc-1) + nsupc; for (i = lptr + nsupc; i < xlsub[fsupc+1]; i++) { irow = lsub[i]; cc_mult(&comp_temp, &ukj, &lusup[luptr]); c_sub(&dense_col[irow], &dense_col[irow], &comp_temp); ++luptr; } } else if ( segsze <= 3 ) { ukj = dense_col[lsub[krep_ind]]; luptr += nsupr*(nsupc-1) + nsupc-1; ukj1 = dense_col[lsub[krep_ind - 1]]; luptr1 = luptr - nsupr; if ( segsze == 2 ) { cc_mult(&comp_temp, &ukj1, &lusup[luptr1]); c_sub(&ukj, &ukj, &comp_temp); dense_col[lsub[krep_ind]] = ukj; for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) { irow = lsub[i]; ++luptr; ++luptr1; cc_mult(&comp_temp, &ukj, &lusup[luptr]); cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]); c_add(&comp_temp, &comp_temp, &comp_temp1); c_sub(&dense_col[irow], &dense_col[irow], &comp_temp); } } else { ukj2 = dense_col[lsub[krep_ind - 2]]; luptr2 = luptr1 - nsupr; cc_mult(&comp_temp, &ukj2, &lusup[luptr2-1]); c_sub(&ukj1, &ukj1, &comp_temp); cc_mult(&comp_temp, &ukj1, &lusup[luptr1]); cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]); c_add(&comp_temp, &comp_temp, &comp_temp1); c_sub(&ukj, &ukj, &comp_temp); dense_col[lsub[krep_ind]] = ukj; dense_col[lsub[krep_ind-1]] = ukj1; for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) { irow = lsub[i]; ++luptr; ++luptr1; ++luptr2; cc_mult(&comp_temp, &ukj, &lusup[luptr]); cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]); c_add(&comp_temp, &comp_temp, &comp_temp1); cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]); c_add(&comp_temp, &comp_temp, &comp_temp1); c_sub(&dense_col[irow], &dense_col[irow], &comp_temp); } } } else { /* segsze >= 4 */ /* * Perform a triangular solve and block update, * then scatter the result of sup-col update to dense[]. */ no_zeros = kfnz - fsupc; /* Copy U[*,j] segment from dense[*] to tempv[*]: * The result of triangular solve is in tempv[*]; * The result of matrix vector update is in dense_col[*] */ isub = lptr + no_zeros; for (i = 0; i < segsze; ++i) { irow = lsub[isub]; tempv[i] = dense_col[irow]; /* Gather */ ++isub; } /* start effective triangle */ luptr += nsupr * no_zeros + no_zeros; #ifdef USE_VENDOR_BLAS #ifdef _CRAY CTRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], &nsupr, tempv, &incx ); #else ctrsv_( "L", "N", "U", &segsze, &lusup[luptr], &nsupr, tempv, &incx ); #endif luptr += segsze; /* Dense matrix-vector */ tempv1 = &tempv[segsze]; alpha = one; beta = zero; #ifdef _CRAY CGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr], &nsupr, tempv, &incx, &beta, tempv1, &incy ); #else cgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr], &nsupr, tempv, &incx, &beta, tempv1, &incy ); #endif #else clsolve ( nsupr, segsze, &lusup[luptr], tempv ); luptr += segsze; /* Dense matrix-vector */ tempv1 = &tempv[segsze]; cmatvec (nsupr, nrow, segsze, &lusup[luptr], tempv, tempv1); #endif /* Scatter tempv[*] into SPA dense[*] temporarily, such * that tempv[*] can be used for the triangular solve of * the next column of the panel. They will be copied into * ucol[*] after the whole panel has been finished. */ isub = lptr + no_zeros; for (i = 0; i < segsze; i++) { irow = lsub[isub]; dense_col[irow] = tempv[i]; tempv[i] = zero; isub++; } /* Scatter the update from tempv1[*] into SPA dense[*] */ /* Start dense rectangular L */ for (i = 0; i < nrow; i++) { irow = lsub[isub]; c_sub(&dense_col[irow], &dense_col[irow], &tempv1[i]); tempv1[i] = zero; ++isub; } } /* else segsze>=4 ... */ } /* for each column in the panel... */ } /* else 1-D update ... */ } /* for each updating supernode ... */ }
int ilu_cpivotL( const int jcol, /* in */ const double u, /* in - diagonal pivoting threshold */ int *usepr, /* re-use the pivot sequence given by * perm_r/iperm_r */ int *perm_r, /* may be modified */ int diagind, /* diagonal of Pc*A*Pc' */ int *swap, /* in/out record the row permutation */ int *iswap, /* in/out inverse of swap, it is the same as perm_r after the factorization */ int *marker, /* in */ int *pivrow, /* in/out, as an input if *usepr!=0 */ double fill_tol, /* in - fill tolerance of current column * used for a singular column */ milu_t milu, /* in */ complex drop_sum, /* in - computed in ilu_ccopy_to_ucol() (MILU only) */ GlobalLU_t *Glu, /* modified - global LU data structures */ SuperLUStat_t *stat /* output */ ) { int n; /* number of columns */ int fsupc; /* first column in the supernode */ int nsupc; /* no of columns in the supernode */ int nsupr; /* no of rows in the supernode */ int lptr; /* points to the starting subscript of the supernode */ register int pivptr; int old_pivptr, diag, ptr0; register float pivmax, rtemp; float thresh; complex temp; complex *lu_sup_ptr; complex *lu_col_ptr; int *lsub_ptr; register int isub, icol, k, itemp; int *lsub, *xlsub; complex *lusup; int *xlusup; flops_t *ops = stat->ops; int info; complex one = {1.0, 0.0}; /* Initialize pointers */ n = Glu->n; lsub = Glu->lsub; xlsub = Glu->xlsub; lusup = Glu->lusup; xlusup = Glu->xlusup; fsupc = (Glu->xsup)[(Glu->supno)[jcol]]; nsupc = jcol - fsupc; /* excluding jcol; nsupc >= 0 */ lptr = xlsub[fsupc]; nsupr = xlsub[fsupc+1] - lptr; lu_sup_ptr = &lusup[xlusup[fsupc]]; /* start of the current supernode */ lu_col_ptr = &lusup[xlusup[jcol]]; /* start of jcol in the supernode */ lsub_ptr = &lsub[lptr]; /* start of row indices of the supernode */ /* Determine the largest abs numerical value for partial pivoting; Also search for user-specified pivot, and diagonal element. */ pivmax = -1.0; pivptr = nsupc; diag = EMPTY; old_pivptr = nsupc; ptr0 = EMPTY; for (isub = nsupc; isub < nsupr; ++isub) { if (marker[lsub_ptr[isub]] > jcol) continue; /* do not overlap with a later relaxed supernode */ switch (milu) { case SMILU_1: c_add(&temp, &lu_col_ptr[isub], &drop_sum); rtemp = c_abs1(&temp); break; case SMILU_2: case SMILU_3: /* In this case, drop_sum contains the sum of the abs. value */ rtemp = c_abs1(&lu_col_ptr[isub]); break; case SILU: default: rtemp = c_abs1(&lu_col_ptr[isub]); break; } if (rtemp > pivmax) { pivmax = rtemp; pivptr = isub; } if (*usepr && lsub_ptr[isub] == *pivrow) old_pivptr = isub; if (lsub_ptr[isub] == diagind) diag = isub; if (ptr0 == EMPTY) ptr0 = isub; } if (milu == SMILU_2 || milu == SMILU_3) pivmax += drop_sum.r; /* Test for singularity */ if (pivmax < 0.0) { fprintf(stderr, "[0]: jcol=%d, SINGULAR!!!\n", jcol); fflush(stderr); exit(1); } if ( pivmax == 0.0 ) { if (diag != EMPTY) *pivrow = lsub_ptr[pivptr = diag]; else if (ptr0 != EMPTY) *pivrow = lsub_ptr[pivptr = ptr0]; else { /* look for the first row which does not belong to any later supernodes */ for (icol = jcol; icol < n; icol++) if (marker[swap[icol]] <= jcol) break; if (icol >= n) { fprintf(stderr, "[1]: jcol=%d, SINGULAR!!!\n", jcol); fflush(stderr); exit(1); } *pivrow = swap[icol]; /* pick up the pivot row */ for (isub = nsupc; isub < nsupr; ++isub) if ( lsub_ptr[isub] == *pivrow ) { pivptr = isub; break; } } pivmax = fill_tol; lu_col_ptr[pivptr].r = pivmax; lu_col_ptr[pivptr].i = 0.0; *usepr = 0; #ifdef DEBUG printf("[0] ZERO PIVOT: FILL (%d, %d).\n", *pivrow, jcol); fflush(stdout); #endif info =jcol + 1; } /* if (*pivrow == 0.0) */ else { thresh = u * pivmax; /* Choose appropriate pivotal element by our policy. */ if ( *usepr ) { switch (milu) { case SMILU_1: c_add(&temp, &lu_col_ptr[old_pivptr], &drop_sum); rtemp = c_abs1(&temp); break; case SMILU_2: case SMILU_3: rtemp = c_abs1(&lu_col_ptr[old_pivptr]) + drop_sum.r; break; case SILU: default: rtemp = c_abs1(&lu_col_ptr[old_pivptr]); break; } if ( rtemp != 0.0 && rtemp >= thresh ) pivptr = old_pivptr; else *usepr = 0; } if ( *usepr == 0 ) { /* Use diagonal pivot? */ if ( diag >= 0 ) { /* diagonal exists */ switch (milu) { case SMILU_1: c_add(&temp, &lu_col_ptr[diag], &drop_sum); rtemp = c_abs1(&temp); break; case SMILU_2: case SMILU_3: rtemp = c_abs1(&lu_col_ptr[diag]) + drop_sum.r; break; case SILU: default: rtemp = c_abs1(&lu_col_ptr[diag]); break; } if ( rtemp != 0.0 && rtemp >= thresh ) pivptr = diag; } *pivrow = lsub_ptr[pivptr]; } info = 0; /* Reset the diagonal */ switch (milu) { case SMILU_1: c_add(&lu_col_ptr[pivptr], &lu_col_ptr[pivptr], &drop_sum); break; case SMILU_2: case SMILU_3: temp = c_sgn(&lu_col_ptr[pivptr]); cc_mult(&temp, &temp, &drop_sum); c_add(&lu_col_ptr[pivptr], &lu_col_ptr[pivptr], &drop_sum); break; case SILU: default: break; } } /* else */ /* Record pivot row */ perm_r[*pivrow] = jcol; if (jcol < n - 1) { register int t1, t2, t; t1 = iswap[*pivrow]; t2 = jcol; if (t1 != t2) { t = swap[t1]; swap[t1] = swap[t2]; swap[t2] = t; t1 = swap[t1]; t2 = t; t = iswap[t1]; iswap[t1] = iswap[t2]; iswap[t2] = t; } } /* if (jcol < n - 1) */ /* Interchange row subscripts */ if ( pivptr != nsupc ) { itemp = lsub_ptr[pivptr]; lsub_ptr[pivptr] = lsub_ptr[nsupc]; lsub_ptr[nsupc] = itemp; /* Interchange numerical values as well, for the whole snode, such * that L is indexed the same way as A. */ for (icol = 0; icol <= nsupc; icol++) { itemp = pivptr + icol * nsupr; temp = lu_sup_ptr[itemp]; lu_sup_ptr[itemp] = lu_sup_ptr[nsupc + icol*nsupr]; lu_sup_ptr[nsupc + icol*nsupr] = temp; } } /* if */ /* cdiv operation */ ops[FACT] += 10 * (nsupr - nsupc); c_div(&temp, &one, &lu_col_ptr[nsupc]); for (k = nsupc+1; k < nsupr; k++) cc_mult(&lu_col_ptr[k], &lu_col_ptr[k], &temp); return info; }