static void VHmatExtract_f(svdObj_f *svd) { vsip_mview_f*B = svd->B; vsip_index i,j; vsip_length n = vsip_mgetrowlength_f(B); vsip_mview_f *Bs=svd->Bs; vsip_mview_f *V=svd->R; vsip_mview_f *Vs=svd->Rs; vsip_vview_f *v; vsip_scalar_f t; if(n < 3) return; v = row_sv_f(B,svd->bs,0); for(i=n-3; i>0; i--){ j=i+1; row_sv_f(msv_f(B,Bs,i,j),v,0); t=vsip_vget_f(v,0);vsip_vput_f(v,0,1.0); prodHouse_f(msv_f(V,Vs,j,j),v); vsip_vput_f(v,0,t); } row_sv_f(msv_f(B,Bs,0,1),v,0); t=vsip_vget_f(v,0);vsip_vput_f(v,0,1.0); prodHouse_f(msv_f(V,Vs,1,1),v); vsip_vput_f(v,0,t); }
/* sign function as defined in http://www.netlib.org/lapack/lawnspdf/lawn148.pdf */ static vsip_scalar_f sign_f(vsip_scalar_f a_in) { if(a_in < 0.0) return -1.0; else return 1.0; } /* same */ static void biDiagPhaseToZero_f( svdObj_f *svd) { vsip_mview_f *L = svd->L; vsip_vview_f *d = svd->d; vsip_vview_f *f = svd->f; vsip_mview_f *R = svd->R; vsip_scalar_f eps0 = svd->eps0; vsip_length n_d=vsip_vgetlength_f(d); vsip_length n_f=vsip_vgetlength_f(f); vsip_index i,j; vsip_scalar_f ps; vsip_scalar_f m; vsip_vview_f *l = svd->ls_one; vsip_vview_f *r = svd->rs_one; for(i=0; i<n_d; i++){ ps=vsip_vget_f(d,i); m = vsip_mag_f(ps); ps=sign_f(ps); if(m > eps0){ col_sv_f(L,l,i);vsip_svmul_f(ps,l,l); vsip_vput_f(d,i,m); if (i < n_f) vsip_vput_f(f,i,ps*vsip_vget_f(f,i)); } else { vsip_vput_f(d,i,0.0); } } svdZeroCheckAndSet_f(eps0,d,f); for (i=0; i<n_f-1; i++){ j=i+1; ps = vsip_vget_f(f,i); m = vsip_mag_f(ps); ps=sign_f(ps); col_sv_f(L, l, j);vsip_svmul_f(ps,l,l); row_sv_f(R,r,j);vsip_svmul_f(ps,r,r); vsip_vput_f(f,i,m); vsip_vput_f(f,j,ps * vsip_vget_f(f,j)); } j=n_f; i=j-1; ps=vsip_vget_f(f,i); m=vsip_mag_f(ps); ps=sign_f(ps); vsip_vput_f(f,i,m); col_sv_f(L, l, j);vsip_svmul_f(ps,l,l); row_sv_f(R,r,j);vsip_svmul_f(ps,r,r); } static void phaseCheck_f(svdObj_f *svd) { biDiagPhaseToZero_f(svd); } void houseProd_f(vsip_vview_f *v, vsip_mview_f *A) { vsip_mattr_f a_atr; vsip_vview_f *w; vsip_mview_f *B; vsip_mgetattrib_f(A,&a_atr); B=vsip_mcreate_f(a_atr.col_length,a_atr.row_length,VSIP_ROW,VSIP_MEM_NONE); w = vsip_vcreate_f(a_atr.row_length,VSIP_MEM_NONE); vsip_scalar_f beta = 2.0/vsip_vdot_f(v,v); vsip_vmprod_f(v,A,w); vsip_vouter_f(beta,v,w,B); vsip_msub_f(A,B,A); vsip_valldestroy_f(w); vsip_malldestroy_f(B); } /* need to remove create */ void prodHouse_f(vsip_mview_f *A, vsip_vview_f *v) { vsip_mattr_f a_atr; vsip_vview_f *w; vsip_mview_f *B; vsip_mgetattrib_f(A,&a_atr); B=vsip_mcreate_f(a_atr.col_length,a_atr.row_length,VSIP_ROW,VSIP_MEM_NONE); w = vsip_vcreate_f(a_atr.col_length,VSIP_MEM_NONE); vsip_scalar_f beta = 2.0/vsip_vdot_f(v,v); vsip_mvprod_f(A,v,w); vsip_vouter_f(beta,w,v,B); vsip_msub_f(A,B,A); vsip_valldestroy_f(w); vsip_malldestroy_f(B); } /* need to remove create */ static vsip_vview_f *houseVector_f(vsip_vview_f* x) { vsip_scalar_f nrm=vnorm2_f(x); vsip_scalar_f t = vsip_vget_f(x,0); vsip_scalar_f s = t + sign_f(t) * nrm; vsip_vput_f(x,0,s); nrm = vnorm2_f(x); if (nrm == 0.0) vsip_vput_f(x,0,1.0); else vsip_svmul_f(1.0/nrm,x,x); return x; }
void VU_mprodq_f( vsip_mview_f *C, vsip_mview_f *H, vsip_scalar_f *beta) { vsip_mattr_f attr_C,attr_C0; vsip_mattr_f attr_H0; vsip_vattr_f attr_h,attr_v0; vsip_vview_f *h = vsip_mcolview_f(H,0); vsip_length m,n; vsip_length j; vsip_stride k; vsip_vview_f *v, *w; vsip_mgetattrib_f(C,&attr_C); vsip_vgetattrib_f(h,&attr_h); vsip_mgetattrib_f(H,&attr_H0); attr_C0 = attr_C; m = attr_C.col_length; n = attr_C.row_length; v = vsip_vcreate_f(n,0); vsip_vgetattrib_f(v,&attr_v0); w = vsip_vcreate_f(m,0); for(k= 0; k < attr_H0.row_length; k++){ j = (vsip_length)k; attr_h.offset = j * attr_H0.row_stride + j * attr_H0.col_stride + attr_H0.offset; attr_h.length = attr_H0.col_length - j; vsip_vputlength_f(v,attr_h.length); vsip_vputoffset_f(v,n - attr_h.length); vsip_vputattrib_f(h,&attr_h); vsip_vcopy_f_f(h,v); vsip_vput_f(v,0,1); vsip_vputattrib_f(v,&attr_v0); VU_smvprod_f(-beta[j],C,v,w); VU_opu_f(C,w,v); vsip_vput_f(v,k,0); } printf("here 1\n"); vsip_mputattrib_f(C,&attr_C0); vsip_vdestroy_f(h); vsip_valldestroy_f(v); vsip_valldestroy_f(w); return; }
static void svdZeroCheckAndSet_f(vsip_scalar_f e, vsip_vview_f *b0, vsip_vview_f *b1) { vsip_index i; vsip_length n = vsip_vgetlength_f(b1); vsip_scalar_f z = 0.0; for(i=0; i<n; i++){ vsip_scalar_f b = vsip_mag_f(vsip_vget_f(b1,i)); vsip_scalar_f a = e*(vsip_mag_f(vsip_vget_f(b0,i)) + vsip_mag_f(vsip_vget_f(b0,i+1))); if( b < a ) vsip_vput_f(b1,i,z); } } /* same */
static void UmatExtract_f(svdObj_f *svd) { vsip_mview_f* B=svd->B; vsip_mview_f *U=svd->L; vsip_index i; vsip_length m = vsip_mgetcollength_f(B); vsip_length n = vsip_mgetrowlength_f(B); vsip_mview_f *Bs=svd->Bs; vsip_mview_f *Us=svd->Ls; vsip_vview_f *v; vsip_scalar_f t; v = col_sv_f(B,svd->bs,0); if (m > n){ i=n-1; col_sv_f(msv_f(B,Bs,i,i),v,0); t=vsip_vget_f(v,0); vsip_vput_f(v,0,1.0); houseProd_f(v,msv_f(U,Us,i,i)); vsip_vput_f(v,0,t); } for(i=n-2; i>0; i--){ col_sv_f(msv_f(B,Bs,i,i),v,0); t=vsip_vget_f(v,0); vsip_vput_f(v,0,1.0); houseProd_f(v,msv_f(U,Us,i,i)); vsip_vput_f(v,0,t); } col_sv_f(msv_f(B,Bs,0,0),v,0); t=vsip_vget_f(v,0); vsip_vput_f(v,0,1.0); houseProd_f(v,msv_f(U,Us,0,0)); vsip_vput_f(v,0,t); }
/* eps0 is a number << maximum singular value */ svd->eps0=mnormFro_f(B)/(vsip_scalar_f)vsip_mgetrowlength_f(B)*1E-10; bidiag_f(svd); UmatExtract_f(svd); VHmatExtract_f(svd); biDiagPhaseToZero_f(svd); vsip_vcopy_f_f(diag_sv_f(B,svd->bs,0),svd->d); vsip_vcopy_f_f(diag_sv_f(B,svd->bs,1),svd->f); } static void gtProd_f(vsip_index i, vsip_index j, vsip_scalar_f c,vsip_scalar_f s, svdObj_f* svd) { vsip_mview_f* R = svd->Rs; vsip_vview_f *a1= row_sv_f(R,svd->rs_one, i); vsip_vview_f *a2= row_sv_f(R,svd->rs_two, j); vsip_vview_f *a1c=vclone_f(a1,svd->t); vsip_svmul_f(c,a1c,a1); vsip_vsma_f(a2,s,a1,a1); vsip_svmul_f(-s,a1c,a1c); vsip_vsma_f(a2,c,a1c,a2); } static void prodG_f(svdObj_f* svd,vsip_index i, vsip_index j,vsip_scalar_f c, vsip_scalar_f s) { vsip_mview_f* L = svd->Ls; vsip_vview_f *a1= col_sv_f(L,svd->ls_one,i); vsip_vview_f *a2= col_sv_f(L,svd->ls_two,j); vsip_vview_f *a1c=vclone_f(a1,svd->t); vsip_svmul_f(c,a1c,a1); vsip_vsma_f(a2,s,a1,a1); vsip_svmul_f(-s,a1c,a1c);vsip_vsma_f(a2,c,a1c,a2); } static givensObj_f givensCoef_f(vsip_scalar_f x1, vsip_scalar_f x2) { givensObj_f retval; vsip_scalar_f t = vsip_hypot_f(x1,x2); if (x2 == 0.0){ retval.c=1.0;retval.s=0.0;retval.r=x1; } else if (x1 == 0.0) { retval.c=0.0;retval.s=sign_f(x2);retval.r=t; }else{ vsip_scalar_f sn = sign_f(x1); retval.c=vsip_mag_f(x1)/t;retval.s=sn*x2/t; retval.r=sn*t; } return retval; } /* same */ static void zeroCol_f(svdObj_f *svd) { vsip_vview_f *d=svd->ds; vsip_vview_f *f=svd->fs; vsip_length n = vsip_vgetlength_f(f); givensObj_f g; vsip_scalar_f xd,xf,t; vsip_index i,j,k; if (n == 1){ xd=vsip_vget_f(d,0); xf=vsip_vget_f(f,0); g=givensCoef_f(xd,xf); vsip_vput_f(d,0,g.r); vsip_vput_f(f,0,0.0); gtProd_f(0,1,g.c,g.s,svd); }else if (n == 2){ xd=vsip_vget_f(d,1); xf=vsip_vget_f(f,1); g=givensCoef_f(xd,xf); vsip_vput_f(d,1,g.r); vsip_vput_f(f,1,0.0); xf=vsip_vget_f(f,0); t= -xf * g.s; xf *= g.c; vsip_vput_f(f,0,xf); gtProd_f(1,2,g.c,g.s,svd); xd=vsip_vget_f(d,0); g=givensCoef_f(xd,t); vsip_vput_f(d,0,g.r); gtProd_f(0,2,g.c,g.s,svd); }else{ i=n-1; j=i-1; k=i; xd=vsip_vget_f(d,i); xf=vsip_vget_f(f,i); g=givensCoef_f(xd,xf); xf=vsip_vget_f(f,j); vsip_vput_f(f,i,0.0); vsip_vput_f(d,i,g.r); t=-xf*g.s; xf*=g.c; vsip_vput_f(f,j,xf); gtProd_f(i,k+1,g.c,g.s,svd); while (i > 1){ i = j; j = i-1; xd=vsip_vget_f(d,i); g=givensCoef_f(xd,t); vsip_vput_f(d,i,g.r); xf=vsip_vget_f(f,j); t= -xf * g.s; xf *= g.c; vsip_vput_f(f,j,xf); gtProd_f(i,k+1,g.c,g.s,svd); } xd=vsip_vget_f(d,0); g=givensCoef_f(xd,t); vsip_vput_f(d,0,g.r); gtProd_f(0,k+1,g.c,g.s,svd); } } static void zeroRow_f(svdObj_f *svd) { vsip_vview_f *d = svd->ds; vsip_vview_f *f = svd->fs; vsip_length n = vsip_vgetlength_f(d); givensObj_f g; vsip_scalar_f xd,xf,t; vsip_index i; xd=vsip_vget_f(d,0); xf=vsip_vget_f(f,0); g=givensCoef_f(xd,xf); if (n == 1){ vsip_vput_f(f,0,0.0); vsip_vput_f(d,0,g.r); }else{ vsip_vput_f(f,0,0.0); vsip_vput_f(d,0,g.r); xf=vsip_vget_f(f,1); t= -xf * g.s; xf *= g.c; vsip_vput_f(f,1,xf); prodG_f(svd,1,0,g.c,g.s); for(i=1; i<n-1; i++){ xd=vsip_vget_f(d,i); g=givensCoef_f(xd,t); prodG_f(svd,i+1,0,g.c,g.s); vsip_vput_f(d,i,g.r); xf=vsip_vget_f(f,i+1); t=-xf * g.s; xf *= g.c; vsip_vput_f(f,i+1,xf); } xd=vsip_vget_f(d,n-1); g=givensCoef_f(xd,t); vsip_vput_f(d,n-1,g.r); prodG_f(svd,n,0,g.c,g.s); } } static vsip_scalar_f svdMu_f(vsip_scalar_f d2,vsip_scalar_f f1,vsip_scalar_f d3,vsip_scalar_f f2) { vsip_scalar_f mu; vsip_scalar_f cu=d2 * d2 + f1 * f1; vsip_scalar_f cl=d3 * d3 + f2 * f2; vsip_scalar_f cd = d2 * f2; vsip_scalar_f D = (cu * cl - cd * cd); vsip_scalar_f T = (cu + cl); vsip_scalar_f root = vsip_sqrt_f(T*T - 4 * D); vsip_scalar_f lambda1 = (T + root)/(2.); vsip_scalar_f lambda2 = (T - root)/(2.); if(vsip_mag_f(lambda1 - cl) < vsip_mag_f(lambda2 - cl)) mu = lambda1; else mu = lambda2; return mu; } /* same */ static vsip_index zeroFind_f(vsip_vview_f* d, vsip_scalar_f eps0) { vsip_index j = vsip_vgetlength_f(d); vsip_scalar_f xd=vsip_vget_f(d,j-1); while(xd > eps0){ if (j > 1){ j -= 1; xd=vsip_vget_f(d,j-1); }else{ break; } } if(xd <= eps0) vsip_vput_f(d,j-1,0.0); if (j == 1) j=0; return j; } /* same */
void VU_qprodm_f( vsip_mview_f *C, vsip_mview_f *H, vsip_scalar_f *beta) { vsip_mattr_f attr_C,attr_C0; vsip_mattr_f attr_H0; vsip_vattr_f attr_h,attr_v0; vsip_vview_f *h = vsip_mcolview_f(H,0); vsip_length m,n; vsip_length j; vsip_stride k; vsip_vview_f *v, *w; vsip_mgetattrib_f(C,&attr_C); vsip_vgetattrib_f(h,&attr_h); vsip_mgetattrib_f(H,&attr_H0); attr_C0 = attr_C; m = attr_C.col_length; n = attr_C.row_length; v = vsip_vcreate_f(m,0); vsip_vgetattrib_f(v,&attr_v0); w = vsip_vcreate_f(n,0); vsip_vfill_f(0,v); for(k= attr_H0.row_length - 1; k >= 0; k--){ j = (vsip_length)k; attr_h.offset = j * attr_H0.row_stride + j * attr_H0.col_stride + attr_H0.offset; attr_h.length = attr_H0.col_length -j; vsip_vputlength_f(v,attr_h.length); vsip_vputoffset_f(v,m - attr_h.length); vsip_vputattrib_f(h,&attr_h); vsip_vcopy_f_f(h,v); vsip_vput_f(v,0,1); vsip_vputattrib_f(v,&attr_v0); VU_svmprod_f(-beta[j],v,C,w); VU_opu_f(C,v,w); } vsip_mputattrib_f(C,&attr_C0); vsip_vdestroy_f(h); vsip_valldestroy_f(v); vsip_valldestroy_f(w); return; }
/* sign function as defined in http://www.netlib.org/lapack/lawnspdf/lawn148.pdf */ static vsip_scalar_f sign_f(vsip_scalar_f a_in) { if(a_in < 0.0) return -1.0; else return 1.0; } /* same */ static void biDiagPhaseToZero_f( svdObj_f *svd) { vsip_mview_f *L = svd->L; vsip_vview_f *d = svd->d; vsip_vview_f *f = svd->f; vsip_mview_f *R = svd->R; vsip_scalar_f eps0 = svd->eps0; vsip_length n_d=vsip_vgetlength_f(d); vsip_length n_f=vsip_vgetlength_f(f); vsip_index i,j; vsip_scalar_f ps; vsip_scalar_f m; vsip_vview_f *l = svd->ls_one; vsip_vview_f *r = svd->rs_one; for(i=0; i<n_d; i++){ ps=vsip_vget_f(d,i); m = vsip_mag_f(ps); ps=sign_f(ps); if(m > eps0){ col_sv_f(L,l,i);vsip_svmul_f(ps,l,l); vsip_vput_f(d,i,m); if (i < n_f) vsip_vput_f(f,i,ps*vsip_vget_f(f,i)); } else { vsip_vput_f(d,i,0.0); } } svdZeroCheckAndSet_f(eps0,d,f); for (i=0; i<n_f-1; i++){ j=i+1; ps = vsip_vget_f(f,i); m = vsip_mag_f(ps); ps=sign_f(ps); col_sv_f(L, l, j);vsip_svmul_f(ps,l,l); row_sv_f(R,r,j);vsip_svmul_f(ps,r,r); vsip_vput_f(f,i,m); vsip_vput_f(f,j,ps * vsip_vget_f(f,j)); } j=n_f; i=j-1; ps=vsip_vget_f(f,i); m=vsip_mag_f(ps); ps=sign_f(ps); vsip_vput_f(f,i,m); col_sv_f(L, l, j);vsip_svmul_f(ps,l,l); row_sv_f(R,r,j);vsip_svmul_f(ps,r,r); }
static void zeroRow_f(svdObj_f *svd) { vsip_vview_f *d = svd->ds; vsip_vview_f *f = svd->fs; vsip_length n = vsip_vgetlength_f(d); givensObj_f g; vsip_scalar_f xd,xf,t; vsip_index i; xd=vsip_vget_f(d,0); xf=vsip_vget_f(f,0); g=givensCoef_f(xd,xf); if (n == 1){ vsip_vput_f(f,0,0.0); vsip_vput_f(d,0,g.r); }else{ vsip_vput_f(f,0,0.0); vsip_vput_f(d,0,g.r); xf=vsip_vget_f(f,1); t= -xf * g.s; xf *= g.c; vsip_vput_f(f,1,xf); prodG_f(svd,1,0,g.c,g.s); for(i=1; i<n-1; i++){ xd=vsip_vget_f(d,i); g=givensCoef_f(xd,t); prodG_f(svd,i+1,0,g.c,g.s); vsip_vput_f(d,i,g.r); xf=vsip_vget_f(f,i+1); t=-xf * g.s; xf *= g.c; vsip_vput_f(f,i+1,xf); } xd=vsip_vget_f(d,n-1); g=givensCoef_f(xd,t); vsip_vput_f(d,n-1,g.r); prodG_f(svd,n,0,g.c,g.s); } }
int main(){vsip_init((void*)0); { int solretval=0; vsip_scalar_vi i,j; vsip_mview_f *A = vsip_mcreate_f(M, N,VSIP_COL,0); vsip_mview_f *X = vsip_mcreate_f(M,NB,VSIP_ROW,0); /* Nullify the data-space */ for (i=0; i < vsip_mgetcollength_f(A); i++) for(j=0; j < vsip_mgetrowlength_f(A); j++) vsip_mput_f(A,i,j,(double)0); for (i=0; i < vsip_mgetcollength_f(X); i++) for(j=0; j < vsip_mgetrowlength_f(X); j++) vsip_mput_f(X,i,j,(double)0); /* Initialise matrix A */ for (i=0; i<M; i++) for (j = 0; j < N; j++) if(i == j) vsip_mput_f(A,i,j, (double)(M+1)); else vsip_mput_f(A,i,j, -1.0); { /* store data */ /* vsip_mput_f(A,0,0,1); vsip_mput_f(A,0,1,2); vsip_mput_f(A,0,2,1); */ /* vsip_mput_f(A,1,0,3); vsip_mput_f(A,1,1,-1); vsip_mput_f(A,1,2,0); */ /* vsip_mput_f(A,2,0,2); vsip_mput_f(A,2,1,1); vsip_mput_f(A,2,2,-1); */ /* vsip_mput_f(A,3,0,1); vsip_mput_f(A,3,1,2); vsip_mput_f(A,3,2,2); */ /* vsip_mput_f(X,0,0,1); */ /* vsip_mput_f(X,1,0,2); */ /* vsip_mput_f(X,2,0,2); */ /* vsip_mput_f(X,3,0,1); */ } {int i,j; printf("matrix\n A = [\n"); for(i=0; i<M; i++) { for(j=0; j< N; j++) printf("%9.2f%s",vsip_mget_f(A,i,j),(j == N-1) ? "":","); (i==M-1) ? printf(";]\n") : printf(";\n") ; } } { int j, k; vsip_vview_f *y = NULL; vsip_vview_f *x; vsip_length L = NB; vsip_length p = M; for(k=0; k<L; k++) { x = vsip_mcolview_f(X,k); for (j=0; j<p; j++) { y = vsip_mrowview_f(A,j); vsip_vput_f(x,j,(double)(k+1)*(vsip_vsumval_f(y))); /* vsip_vput_f(x,j,(vsip_vsumval_f(y)));*/ vsip_vdestroy_f(y); } vsip_vdestroy_f(x); } } {int i,j; printf("rhs matrix\n B = [\n"); for(i=0; i<NN; i++) { for(j=0; j<NB; j++) printf("%9.2f%s",vsip_mget_f(X,i,j),(j == NB-1) ? "":","); (i==NN-1) ? printf(";]\n") : printf(";\n") ; } } {vsip_qr_f* qrAop = vsip_qrd_create_f(M,N, QOPT); if(qrAop == NULL) exit(1); {int i,j; if(QOPT == VSIP_QRD_SAVEQ1) { printf("qrd returns %i\n",vsip_qrd_f(qrAop,A)); printf("matrix A after factorisation: skinny Q explicitly\n Q1 = [\n"); for(i= 0; i< M ; i++) { for(j=0; j< N; j++) printf("%8.4f %s",vsip_mget_f(A,i,j),(j == N-1) ? "":","); (i == M - 1) ? printf("]\n") : printf(";\n"); } } else if(QOPT == VSIP_QRD_SAVEQ || QOPT == VSIP_QRD_NOSAVEQ) { printf("qrd returns %i\n",vsip_qrd_f(qrAop,A)); printf("matrix A after fact.: R and "); (QOPT == VSIP_QRD_SAVEQ) ? printf("full Q implicitly\n Q/R = [\n") : printf("Q not saved -- ignore LT portion. \n R = [\n"); for(i= 0; i<M ; i++) { for(j=0; j< N; j++) printf("%9.5f %s",vsip_mget_f(A,i,j),(j == N-1) ? "":","); (i == M - 1) ? printf("]\n") : printf(";\n"); } } } if( QPROB == VSIP_LLS) { if (QOPT == VSIP_QRD_SAVEQ1 || QOPT == VSIP_QRD_SAVEQ) { if((solretval=vsip_qrsol_f(qrAop, QPROB, X))) { printf("Warning -- Least Squares soln returns %i -- Check\n", solretval); printf("Upper triang. mat. R, possibly singular\n"); } else printf("Least Squares soln returns %i\n", solretval); } else { printf("Least Squares systems cannot be solved by the NOSAVEQ option --exiting\n"); exit(1); } } else { if((solretval=vsip_qrsol_f(qrAop,QPROB, X))) { printf("Warning -- Covariance soln returns %i -- Check\n",solretval); printf("Upper triang. mat. R, possibly singular\n"); } else printf("Covariance soln returns %i\n",solretval); } vsip_qrd_destroy_f(qrAop); } {int i,j; printf("Soln Matrix\n"); for(i=0; i<N; i++) { for(j=0; j<NB; j++) printf("%9.5f%s",vsip_mget_f(X,i,j),(j == NB-1) ? "":","); printf(";\n"); } } vsip_malldestroy_f(X); vsip_malldestroy_f(A); } vsip_finalize((void*)0); return 1; }
int main() { vsip_init((void*)0); { int i,j; vsip_mview_f *A = vsip_mcreate_f(M,N,VSIP_COL,0); vsip_mview_f *X = vsip_mcreate_f( (NN>= M)?NN:M, (NB>=M)?NB:M, VSIP_ROW,0); /* put the appropriate row, col lengths of X */ X = vsip_mputcollength_f(X,NN); X = vsip_mputrowlength_f(X,NB); /* Initialise matrix A */ for (i=0; i<M; i++) for (j = 0; j < N; j++) if(i == j) vsip_mput_f(A,i,j, (double)(M+1)); else vsip_mput_f(A,i,j, -1.0); { int i,j; printf("matrix\n A = [\n"); for(i=0; i<M; i++) { for(j=0; j< N; j++) printf("%9.2f%s",vsip_mget_f(A,i,j),(j == N-1) ? "":","); (i == M-1)? printf("]\n") : printf(";\n"); } } { int j, k; vsip_vview_f *y = NULL; vsip_vview_f *x; vsip_length L = NB; for(k=0; k<L; k++) { x = vsip_mcolview_f(X,k); for (j=0; j<vsip_vgetlength_f(x); j++) { y = vsip_mrowview_f(A,j); vsip_vput_f(x,j,(double)(k+1)*(vsip_vsumval_f(y))); vsip_vdestroy_f(y); } vsip_vdestroy_f(x); } } { int i,j; printf("rhs matrix\n C = [\n"); for(i=0; i<NN; i++) { for(j=0; j<NB; j++) printf("%9.2f%s",vsip_mget_f(X,i,j),(j == NB-1) ? "":","); (i == NN - 1) ? printf("]\n") : printf(";\n"); } } { vsip_qr_f* qrAop = vsip_qrd_create_f(M,N, QOPT); if(qrAop == NULL) exit(1); { int i,j; if(QOPT == VSIP_QRD_SAVEQ1) { printf("qrd returns %i\n",vsip_qrd_f(qrAop,A)); printf("matrix A after factorisation: skinny Q explicitly\n Q1 = [\n"); for(i= 0; i< M ; i++) { for(j=0; j< N; j++) printf("%9.5f%s",vsip_mget_f(A,i,j),(j == N-1) ? "":","); (i == M - 1) ? printf("]\n") : printf(";\n"); } } else if(QOPT == VSIP_QRD_SAVEQ) { printf("qrd returns %i\n",vsip_qrd_f(qrAop,A)); printf("matrix A after factorisation: R + full Q implicitly\n Q/R = [\n"); for(i= 0; i<M ; i++) { for(j=0; j< N; j++) printf("%9.2f%s",vsip_mget_f(A,i,j),(j == N-1) ? "":","); (i == M-1)? printf("]\n") : printf(";\n"); } } else if(QOPT == VSIP_QRD_NOSAVEQ) { printf("Q is not saved with this option. \n"); printf("Product with Q is invalid, exiting\n"); vsip_qrd_destroy_f(qrAop); vsip_malldestroy_f(X); vsip_malldestroy_f(A); exit(1); } if (opQ == VSIP_MAT_TRANS || opQ == VSIP_MAT_HERM) { if(apQ == VSIP_MAT_RSIDE) /* C * Q^t */ { printf(" This is a product of type C <- C * Q^t \n"); if(vsip_qrdprodq_f(qrAop,opQ,apQ,X)) { printf("Size not conformal or invalid operation by Q: -- exiting\n"); vsip_qrd_destroy_f(qrAop); vsip_malldestroy_f(X); vsip_malldestroy_f(A); exit(1); } else { X = vsip_mputrowlength_f(X,M); } } else if(apQ == VSIP_MAT_LSIDE) /* Q^t * C */ { if(QOPT == 1) printf(" This is a product of type C <- Q^t * C \n"); if(QOPT == 2) printf(" This is a product of type C <- Q_1^t * C \n"); if(vsip_qrdprodq_f(qrAop,opQ,apQ,X)) { printf("Size not conformal or invalid operation by Q: -- exiting\n"); vsip_qrd_destroy_f(qrAop); vsip_malldestroy_f(X); vsip_malldestroy_f(A); exit(1); } else { if(QOPT == 2) X = vsip_mputcollength_f(X,N); } } } else if (opQ == VSIP_MAT_NTRANS) { if(apQ == VSIP_MAT_RSIDE) /* C * Q */ { printf(" This is a product of type C <- C * Q \n"); if(vsip_qrdprodq_f(qrAop,opQ,apQ,X)) { printf("Size not conformal or invalid operation by Q: -- exiting\n"); vsip_qrd_destroy_f(qrAop); vsip_malldestroy_f(X); vsip_malldestroy_f(A); exit(1); } else { X = vsip_mputrowlength_f(X,N); } } else if(apQ == VSIP_MAT_LSIDE) /* Q * C */ { if(QOPT == 1) printf(" This is a product of type C <- Q * C \n"); if(QOPT == 2) printf(" This is a product of type C <- Q_1 * C \n"); if(vsip_qrdprodq_f(qrAop,opQ,apQ,X)) { printf("Size not conformal or invalid operation by Q: -- exiting\n"); vsip_qrd_destroy_f(qrAop); vsip_malldestroy_f(X); vsip_malldestroy_f(A); exit(1); } else { X = vsip_mputcollength_f(X,M); } } } } vsip_qrd_destroy_f(qrAop); } { int i,j; printf("Soln Matrix\n C = [\n"); for(i=0; i< vsip_mgetcollength_f(X); i++) { for(j=0; j< vsip_mgetrowlength_f(X); j++) printf("%8.4f%s",vsip_mget_f(X,i,j), (j == vsip_mgetrowlength_f(X)-1) ? "":","); (i == vsip_mgetcollength_f(X)-1) ? printf("]\n") : printf(";\n"); } } vsip_malldestroy_f(X); vsip_malldestroy_f(A); } vsip_finalize((void*)0); return 1; }
static void svdStep_f(svdObj_f *svd) { vsip_vview_f *d = svd->ds; vsip_vview_f *f = svd->fs; givensObj_f g; vsip_length n = vsip_vgetlength_f(d); vsip_scalar_f mu=0.0, x1=0.0, x2=0.0; vsip_scalar_f t=0.0; vsip_index i,j,k; vsip_scalar_f d2,f1,d3,f2; if(n >= 3){ d2=vsip_vget_f(d,n-2);f1= vsip_vget_f(f,n-3);d3 = vsip_vget_f(d,n-1);f2= vsip_vget_f(f,n-2); } else if(n == 2){ d2=vsip_vget_f(d,0);f1= 0.0;d3 = vsip_vget_f(d,1);f2= vsip_vget_f(f,0); } else { d2=vsip_vget_f(d,0);f1 = 0.0;d3 = 0.0;f2 = 0.0; } mu = svdMu_f(d2,f1,d3,f2); x1=vsip_vget_f(d,0); x2 = x1 * vsip_vget_f(f,0); x1 *= x1; x1 -= mu; g=givensCoef_f(x1,x2); x1=vsip_vget_f(d,0);x2=vsip_vget_f(f,0); vsip_vput_f(f,0,g.c * x2 - g.s * x1); vsip_vput_f(d,0,x1 * g.c + x2 * g.s); t=vsip_vget_f(d,1); vsip_vput_f(d,1,t*g.c); t*=g.s; gtProd_f(0,1,g.c,g.s,svd); for(i=0; i<n-2; i++){ j=i+1; k=i+2; g = givensCoef_f(vsip_vget_f(d,i),t); vsip_vput_f(d,i,g.r); x1=vsip_vget_f(d,j)*g.c; x2=vsip_vget_f(f,i)*g.s; t= x1 - x2; x1=vsip_vget_f(f,i) * g.c; x2=vsip_vget_f(d,j) * g.s ; vsip_vput_f(f,i,x1+x2); vsip_vput_f(d,j,t); x1=vsip_vget_f(f,j); t=g.s * x1; vsip_vput_f(f,j, x1*g.c); prodG_f(svd,i, j, g.c, g.s); g=givensCoef_f(vsip_vget_f(f,i),t); vsip_vput_f(f,i,g.r); x1=vsip_vget_f(d,j); x2=vsip_vget_f(f,j); vsip_vput_f(d,j,g.c * x1 + g.s * x2); vsip_vput_f(f,j,g.c * x2 - g.s * x1); x1=vsip_vget_f(d,k); t=g.s * x1; vsip_vput_f(d,k,x1*g.c); gtProd_f(j,k, g.c, g.s,svd); } i=n-2; j=n-1; g = givensCoef_f(vsip_vget_f(d,i),t); vsip_vput_f(d,i,g.r); x1=vsip_vget_f(d,j)*g.c; x2=vsip_vget_f(f,i)*g.s; t=x1 - x2; x1 = vsip_vget_f(f,i) * g.c; x2=vsip_vget_f(d,j) * g.s; vsip_vput_f(f,i,x1+x2); vsip_vput_f(d,j,t); prodG_f(svd,i, j, g.c, g.s); }
static void zeroCol_f(svdObj_f *svd) { vsip_vview_f *d=svd->ds; vsip_vview_f *f=svd->fs; vsip_length n = vsip_vgetlength_f(f); givensObj_f g; vsip_scalar_f xd,xf,t; vsip_index i,j,k; if (n == 1){ xd=vsip_vget_f(d,0); xf=vsip_vget_f(f,0); g=givensCoef_f(xd,xf); vsip_vput_f(d,0,g.r); vsip_vput_f(f,0,0.0); gtProd_f(0,1,g.c,g.s,svd); }else if (n == 2){ xd=vsip_vget_f(d,1); xf=vsip_vget_f(f,1); g=givensCoef_f(xd,xf); vsip_vput_f(d,1,g.r); vsip_vput_f(f,1,0.0); xf=vsip_vget_f(f,0); t= -xf * g.s; xf *= g.c; vsip_vput_f(f,0,xf); gtProd_f(1,2,g.c,g.s,svd); xd=vsip_vget_f(d,0); g=givensCoef_f(xd,t); vsip_vput_f(d,0,g.r); gtProd_f(0,2,g.c,g.s,svd); }else{ i=n-1; j=i-1; k=i; xd=vsip_vget_f(d,i); xf=vsip_vget_f(f,i); g=givensCoef_f(xd,xf); xf=vsip_vget_f(f,j); vsip_vput_f(f,i,0.0); vsip_vput_f(d,i,g.r); t=-xf*g.s; xf*=g.c; vsip_vput_f(f,j,xf); gtProd_f(i,k+1,g.c,g.s,svd); while (i > 1){ i = j; j = i-1; xd=vsip_vget_f(d,i); g=givensCoef_f(xd,t); vsip_vput_f(d,i,g.r); xf=vsip_vget_f(f,j); t= -xf * g.s; xf *= g.c; vsip_vput_f(f,j,xf); gtProd_f(i,k+1,g.c,g.s,svd); } xd=vsip_vget_f(d,0); g=givensCoef_f(xd,t); vsip_vput_f(d,0,g.r); gtProd_f(0,k+1,g.c,g.s,svd); } }