/* sign function as defined in http://www.netlib.org/lapack/lawnspdf/lawn148.pdf */ static vsip_scalar_f sign_f(vsip_scalar_f a_in) { if(a_in < 0.0) return -1.0; else return 1.0; } /* same */ static void biDiagPhaseToZero_f( svdObj_f *svd) { vsip_mview_f *L = svd->L; vsip_vview_f *d = svd->d; vsip_vview_f *f = svd->f; vsip_mview_f *R = svd->R; vsip_scalar_f eps0 = svd->eps0; vsip_length n_d=vsip_vgetlength_f(d); vsip_length n_f=vsip_vgetlength_f(f); vsip_index i,j; vsip_scalar_f ps; vsip_scalar_f m; vsip_vview_f *l = svd->ls_one; vsip_vview_f *r = svd->rs_one; for(i=0; i<n_d; i++){ ps=vsip_vget_f(d,i); m = vsip_mag_f(ps); ps=sign_f(ps); if(m > eps0){ col_sv_f(L,l,i);vsip_svmul_f(ps,l,l); vsip_vput_f(d,i,m); if (i < n_f) vsip_vput_f(f,i,ps*vsip_vget_f(f,i)); } else { vsip_vput_f(d,i,0.0); } } svdZeroCheckAndSet_f(eps0,d,f); for (i=0; i<n_f-1; i++){ j=i+1; ps = vsip_vget_f(f,i); m = vsip_mag_f(ps); ps=sign_f(ps); col_sv_f(L, l, j);vsip_svmul_f(ps,l,l); row_sv_f(R,r,j);vsip_svmul_f(ps,r,r); vsip_vput_f(f,i,m); vsip_vput_f(f,j,ps * vsip_vget_f(f,j)); } j=n_f; i=j-1; ps=vsip_vget_f(f,i); m=vsip_mag_f(ps); ps=sign_f(ps); vsip_vput_f(f,i,m); col_sv_f(L, l, j);vsip_svmul_f(ps,l,l); row_sv_f(R,r,j);vsip_svmul_f(ps,r,r); } static void phaseCheck_f(svdObj_f *svd) { biDiagPhaseToZero_f(svd); } void houseProd_f(vsip_vview_f *v, vsip_mview_f *A) { vsip_mattr_f a_atr; vsip_vview_f *w; vsip_mview_f *B; vsip_mgetattrib_f(A,&a_atr); B=vsip_mcreate_f(a_atr.col_length,a_atr.row_length,VSIP_ROW,VSIP_MEM_NONE); w = vsip_vcreate_f(a_atr.row_length,VSIP_MEM_NONE); vsip_scalar_f beta = 2.0/vsip_vdot_f(v,v); vsip_vmprod_f(v,A,w); vsip_vouter_f(beta,v,w,B); vsip_msub_f(A,B,A); vsip_valldestroy_f(w); vsip_malldestroy_f(B); } /* need to remove create */ void prodHouse_f(vsip_mview_f *A, vsip_vview_f *v) { vsip_mattr_f a_atr; vsip_vview_f *w; vsip_mview_f *B; vsip_mgetattrib_f(A,&a_atr); B=vsip_mcreate_f(a_atr.col_length,a_atr.row_length,VSIP_ROW,VSIP_MEM_NONE); w = vsip_vcreate_f(a_atr.col_length,VSIP_MEM_NONE); vsip_scalar_f beta = 2.0/vsip_vdot_f(v,v); vsip_mvprod_f(A,v,w); vsip_vouter_f(beta,w,v,B); vsip_msub_f(A,B,A); vsip_valldestroy_f(w); vsip_malldestroy_f(B); } /* need to remove create */
int main() { vsip_init((void*)0);{ vsip_mview_f *A = vsip_mcreate_f(NN,NN,VSIP_COL,0); vsip_vview_f *x0 = vsip_vcreate_f(NN,0); vsip_vview_f *b = vsip_vcreate_f(NN,0); vsip_mview_f *X = vsip_mcreate_f(NN,3,VSIP_ROW,0); vsip_mview_f *XT = vsip_mcreate_f(NN,3,VSIP_COL,0); vsip_vramp_f(1,1,x0); vsip_vmul_f(x0,x0,x0); vsip_mput_f(A,0,0,-3); vsip_mput_f(A,0,1,7); vsip_mput_f(A,0,2,10); vsip_mput_f(A,0,3,12); vsip_mput_f(A,1,0,0); vsip_mput_f(A,1,1,13); vsip_mput_f(A,1,2,18); vsip_mput_f(A,1,3,6); vsip_mput_f(A,2,0,2); vsip_mput_f(A,2,1,-9); vsip_mput_f(A,2,2,6); vsip_mput_f(A,2,3,3); vsip_mput_f(A,3,0,1); vsip_mput_f(A,3,1,2); vsip_mput_f(A,3,2,3); vsip_mput_f(A,3,3,4); {int i,j; printf("A = [\n"); for(i=0; i<NN; i++){ for(j=0; j<NN; j++) printf("%9.2f%s",vsip_mget_f(A,i,j),(j == NN-1) ? "":","); printf(";\n"); } printf("]\n"); } { int k; vsip_vview_f *x; vsip_length L = vsip_mgetrowlength_f(X); for(k=0; k<L; k++){ x = vsip_mcolview_f(X,k); vsip_mvprod_f(A,x0,b); vsip_vcopy_f_f(b,x); vsip_svmul_f(2.0,x0,x0); vsip_vdestroy_f(x); } {int i,j; printf("X = [\n");for(i=0; i<NN; i++){ for(j=0; j<3; j++) printf("%9.2f%s",vsip_mget_f(X,i,j),(j == 2) ? "":","); printf(";\n"); } printf("]\n"); } { vsip_lu_f* luAop = vsip_lud_create_f(NN); vsip_mcopy_f_f(X,XT); if(luAop == NULL) exit(1); vsip_lud_f(luAop,A); { vsip_lu_attr_f attr; vsip_lud_getattr_f(luAop,&attr); printf("lud size %lu\n",attr.n); } vsip_lusol_f(luAop,0,X); vsip_lusol_f(luAop,1,XT); vsip_lud_destroy_f(luAop); } } {int i,j; printf("A\\X\n"); for(i=0; i<NN; i++){ for(j=0; j<3; j++) printf("%9.2f%s",vsip_mget_f(X,i,j),(j == 2) ? "":","); printf(";\n"); } } {int i,j; printf("A'\\X\n"); for(i=0; i<NN; i++){ for(j=0; j<3; j++) printf("%9.2f%s",vsip_mget_f(XT,i,j),(j == 2) ? "":","); printf(";\n"); } } { vsip_valldestroy_f(b); vsip_valldestroy_f(x0); vsip_malldestroy_f(X); vsip_malldestroy_f(A); } }vsip_finalize((void*)0);return 1; }
int main(){vsip_init((void*)0); { vsip_mview_f *Adummy = vsip_mcreate_f(5*NN,5*NN,VSIP_COL,0); vsip_mview_f *A = vsip_msubview_f(Adummy,3,2,NN,NN); vsip_vview_f *x0 = vsip_vcreate_f(NN,0); vsip_mview_f *X = vsip_mcreate_f(NN,3,VSIP_ROW,0); vsip_mview_f *XT = vsip_mcreate_f(NN,3,VSIP_COL,0); vsip_mputrowstride_f(A,2*vsip_mgetrowstride_f(A)); vsip_mputcolstride_f(A,3*vsip_mgetcolstride_f(A)); /* matrix data */ vsip_mput_f(A,0,0,0.5); vsip_mput_f(A,0,1,7); vsip_mput_f(A,0,2,10); vsip_mput_f(A,0,3,12); vsip_mput_f(A,0,4,-3); vsip_mput_f(A,0,5,0); vsip_mput_f(A,0,6,.05); vsip_mput_f(A,1,0,2); vsip_mput_f(A,1,1,13); vsip_mput_f(A,1,2,18); vsip_mput_f(A,1,3,6); vsip_mput_f(A,1,4,0); vsip_mput_f(A,1,5,130); vsip_mput_f(A,1,6,8); vsip_mput_f(A,2,0,3); vsip_mput_f(A,2,1,-9); vsip_mput_f(A,2,2,2); vsip_mput_f(A,2,3,3); vsip_mput_f(A,2,4,2); vsip_mput_f(A,2,5,-9); vsip_mput_f(A,2,6,6); vsip_mput_f(A,3,0,4); vsip_mput_f(A,3,1,2); vsip_mput_f(A,3,2,2); vsip_mput_f(A,3,3,4); vsip_mput_f(A,3,4,1); vsip_mput_f(A,3,5,2); vsip_mput_f(A,3,6,3); vsip_mput_f(A,4,0,.2); vsip_mput_f(A,4,1,2); vsip_mput_f(A,4,2,9); vsip_mput_f(A,4,3,4); vsip_mput_f(A,4,4,1); vsip_mput_f(A,4,5,2); vsip_mput_f(A,4,6,3); vsip_mput_f(A,5,0,.1); vsip_mput_f(A,5,1,2); vsip_mput_f(A,5,2,.3); vsip_mput_f(A,5,3,4); vsip_mput_f(A,5,4,1); vsip_mput_f(A,5,5,2); vsip_mput_f(A,5,6,3); vsip_mput_f(A,6,0,.01); vsip_mput_f(A,6,1,.2); vsip_mput_f(A,6,2,3); vsip_mput_f(A,6,3,4); vsip_mput_f(A,6,4,1); vsip_mput_f(A,6,5,2); vsip_mput_f(A,6,6,3); {int i,j; printf("A = [\n"); for(i=0; i<NN; i++){ for(j=0; j<NN; j++) printf("%9.2f%s",vsip_mget_f(A,i,j),(j == NN-1) ? "":","); printf(";\n"); } printf("]\n"); } { /* were solving for NTRANS Ax = B */ /* use a known X, calculate B using Ax */ int k; vsip_vview_f *x; vsip_mview_f *AT = vsip_mtransview_f(A); vsip_length L = vsip_mgetrowlength_f(X); vsip_vramp_f(1,1,x0); for(k=0; k<L; k++){ x = vsip_mcolview_f(X,k); vsip_mvprod_f(A,x0,x); vsip_svmul_f(2.0,x0,x0); vsip_vdestroy_f(x); } vsip_vramp_f(1,1,x0); for(k=0; k<L; k++){ x = vsip_mcolview_f(XT,k); vsip_mvprod_f(AT,x0,x); vsip_svmul_f(2.0,x0,x0); vsip_vdestroy_f(x); } vsip_mdestroy_f(AT); printf("X = 1 2 4\n 2 4 8\n 3 6 12\n 4 8 16\n 5 10 20\n 6 12 24\n 7 14 28\n"); { int i,j; printf("B = [\n");for(i=0; i<NN; i++){ for(j=0; j<3; j++) printf("%9.2f%s",vsip_mget_f(X,i,j),(j == 2) ? "":","); printf(";\n"); } printf("]\n"); } { /* then solve for B to see if we get X back */ vsip_lu_f* luAop = vsip_lud_create_f(NN); if(luAop == NULL) exit(1); vsip_lud_f(luAop,A); { vsip_lu_attr_f attr; vsip_lud_getattr_f(luAop,&attr); printf("lud size %lu\n",attr.n); } vsip_lusol_f(luAop,VSIP_MAT_NTRANS,X); vsip_lusol_f(luAop,VSIP_MAT_TRANS,XT); vsip_lud_destroy_f(luAop); } } {int i,j; printf("A\\X\n"); for(i=0; i<NN; i++){ for(j=0; j<3; j++) printf("%9.2f%s",vsip_mget_f(X,i,j),(j == 2) ? "":","); printf(";\n"); } } {int i,j; printf("A'\\X\n"); for(i=0; i<NN; i++){ for(j=0; j<3; j++) printf("%9.2f%s",vsip_mget_f(XT,i,j),(j == 2) ? "":","); printf(";\n"); } } { vsip_malldestroy_f(XT); vsip_valldestroy_f(x0); vsip_malldestroy_f(X); vsip_mdestroy_f(A); vsip_malldestroy_f(Adummy); } } vsip_finalize((void*)0); return 1; }
int main(){ int init = vsip_init((void*)0); int i,j, cholsol_retval,chold_retval; double t0 = VU_ansi_c_clock(); /* for doing some timeing */ vsip_cscalar_f czero = vsip_cmplx_f((vsip_scalar_f)0.0,(vsip_scalar_f)0.0); vsip_cmview_f *A = vsip_cmcreate_f(N,N,VSIP_COL,0); vsip_cmview_f *RU = vsip_cmcreate_f(N,N,VSIP_COL,0); vsip_cmview_f *RL = vsip_cmcreate_f(N,N,VSIP_COL,0); vsip_cmview_f *XB = vsip_cmcreate_f(N,M,VSIP_ROW,0); vsip_cchol_f* chol = vsip_cchold_create_f(UPORLO,N); /* NOTE: UPORLO macro above main() */ /* to make sure we have a valid Positive Symetric define */ /* an upper triangular (RU) with positive pivots and */ /* zero below the main diagonal. */ /* Then initialize RL with hermitian of RU */ /* finally create A as the matrix product of RL and RU */ /* Initialise matrix RU */ /* time this */ t0 = VU_ansi_c_clock(); for (i=0; i<N; i++){ for(j = i; j < N; j++){ #ifdef OBNOXIOUS /* make up some reasonably obnoxious data */ vsip_scalar_f a = cos(1.5/((j+1)*(i+1)))+sqrt(i*j); vsip_scalar_f b = (i + j + 1) * cos(M_PI * a); #else /* the above was to obnoxious for bigger than about N = 10 */ /* the following works for N > 100 */ vsip_scalar_f a = 1; vsip_scalar_f b = 1; #endif if(i == j) /* fill diagonal */ vsip_cmput_f(RU,i,j, vsip_cmplx_f(sqrt(N) + sqrt(i),0)); else { /* fill off diagonal */ vsip_cmput_f(RU,i,j,vsip_cmplx_f(b,a)); vsip_cmput_f(RU,j,i,czero); } } } /* initialize RL */ vsip_cmherm_f(RU,RL); #ifdef PRINT VU_cmprintm_f("7.4",RU); VU_cmprintm_f("7.4",RL); #endif printf("Matrix initialize for RU and RL = %f seconds\n",VU_ansi_c_clock() - t0); /* initialize A */ /* this step will take a long time so time it */ t0 = VU_ansi_c_clock(); vsip_cmprod_f(RL,RU,A); #ifdef OBNOXIOUS for(i=0; i<N; i++){ vsip_cvview_f *aview = vsip_cmrowview_f(A,i); vsip_cvrsdiv_f(aview,vsip_cmag_f(vsip_cvmeanval_f(aview)),aview); vsip_cvdestroy_f(aview); } #endif printf("Matrix multiply for initialization of A = %f seconds\n",VU_ansi_c_clock() - t0); /* print A */ /* we only want to do this if A is something reasonable to print */ /* selected as an option in the make file */ #ifdef PRINT printf("Matrix A =\n"); VU_cmprintm_f("4.2",A); fflush(stdout); #endif /* initialise rhs */ /* start out with XB = {1,2,3,...,M} */ /* calculate what B must be using A */ /* then solve to see if we get XB back */ { vsip_index i; vsip_vview_f *y = vsip_vcreate_f(vsip_cmgetcollength_f(A),VSIP_MEM_NONE); vsip_vview_f *x_r,*x_i; vsip_cvview_f *x; vsip_mview_f *A_r = vsip_mrealview_f(A), *A_i = vsip_mimagview_f(A); /* time this */ t0 = VU_ansi_c_clock(); for(i=0; i<M; i++){ vsip_vfill_f((vsip_scalar_f)i+1.0,y); x = vsip_cmcolview_f(XB,i); x_r = vsip_vrealview_f(x); x_i = vsip_vimagview_f(x); vsip_mvprod_f(A_r,y,x_r); vsip_mvprod_f(A_i,y,x_i); vsip_cvdestroy_f(x); vsip_vdestroy_f(x_r); vsip_vdestroy_f(x_i); } vsip_mdestroy_f(A_r); vsip_mdestroy_f(A_i); printf("Matrix init for B = %f seconds\n",VU_ansi_c_clock() - t0); } /* print XB */ /* we only want to do this if XB is something reasonable to print */ /* selected as an option in the make file */ #ifdef PRINT printf("Matrix B = \n"); VU_cmprintm_f("7.4",XB); fflush(stdout); #endif if(chol != NULL){ t0 = VU_ansi_c_clock(); /* we want to time the decomposition */ chold_retval = vsip_cchold_f(chol,A); printf("time decomp %f\n",VU_ansi_c_clock() - t0); printf("decompostion returns %d\n",chold_retval); /* now do the solution */ t0 = VU_ansi_c_clock(); /* we want to time the solution */ cholsol_retval=vsip_ccholsol_f(chol,XB); printf("time solution %f\n",VU_ansi_c_clock() - t0); printf("cholsol returns %d\n",cholsol_retval); /* print XB */ /* we only want to do this if XB is something reasonable to print */ /* selected as an option in the make file; otherwise */ /* we print a single row of XB if the matrix is to large since */ /* M is usally reasonable. Printed as a column vector */ #ifdef PRINT printf("Matrix X = \n"); VU_cmprintm_f("7.4",XB); fflush(stdout); #else { /* pick a row in the middle */ vsip_cvview_f *x = vsip_cmrowview_f(XB,N/2); printf("This output sould be 1,2,...,M\n"); VU_cvprintm_f("7.4",x); fflush(stdout); vsip_cvdestroy_f(x); } #endif } else { printf("failed to create cholesky object \n"); } vsip_cmalldestroy_f(XB); vsip_cmalldestroy_f(A); vsip_cmalldestroy_f(RL); vsip_cmalldestroy_f(RU); vsip_cchold_destroy_f(chol); vsip_finalize((void*)0); return 1; }