Exemple #1
0
/* sign function as defined in http://www.netlib.org/lapack/lawnspdf/lawn148.pdf */
static vsip_scalar_f sign_f(vsip_scalar_f a_in)
{
    if(a_in < 0.0)
       return -1.0;
    else 
       return 1.0;
} /* same */
static void biDiagPhaseToZero_f( svdObj_f *svd)
{
    vsip_mview_f *L = svd->L;
    vsip_vview_f *d = svd->d;
    vsip_vview_f *f = svd->f;
    vsip_mview_f *R = svd->R;
    vsip_scalar_f eps0 = svd->eps0;
    vsip_length n_d=vsip_vgetlength_f(d);
    vsip_length n_f=vsip_vgetlength_f(f);
    vsip_index i,j;
    vsip_scalar_f ps;
    vsip_scalar_f m;
    vsip_vview_f *l = svd->ls_one;
    vsip_vview_f *r = svd->rs_one;
    for(i=0; i<n_d; i++){
        ps=vsip_vget_f(d,i);
        m = vsip_mag_f(ps);
        ps=sign_f(ps);
        if(m > eps0){
            col_sv_f(L,l,i);vsip_svmul_f(ps,l,l);
            vsip_vput_f(d,i,m);
            if (i < n_f)
                vsip_vput_f(f,i,ps*vsip_vget_f(f,i));
        } else {
            vsip_vput_f(d,i,0.0);
        }
    }
    svdZeroCheckAndSet_f(eps0,d,f);          
    for (i=0; i<n_f-1; i++){
        j=i+1;
        ps = vsip_vget_f(f,i);
        m = vsip_mag_f(ps);
        ps=sign_f(ps);
        col_sv_f(L, l, j);vsip_svmul_f(ps,l,l);
        row_sv_f(R,r,j);vsip_svmul_f(ps,r,r);
        vsip_vput_f(f,i,m);
        vsip_vput_f(f,j,ps * vsip_vget_f(f,j));
    }
    j=n_f;
    i=j-1;
    ps=vsip_vget_f(f,i);
    m=vsip_mag_f(ps);
    ps=sign_f(ps);
    vsip_vput_f(f,i,m);
    col_sv_f(L, l, j);vsip_svmul_f(ps,l,l);
    row_sv_f(R,r,j);vsip_svmul_f(ps,r,r);
} 
static void phaseCheck_f(svdObj_f *svd)
{
    biDiagPhaseToZero_f(svd);
} 

void houseProd_f(vsip_vview_f *v, vsip_mview_f *A)
{
    vsip_mattr_f a_atr; 
    vsip_vview_f *w;
    vsip_mview_f *B;
    vsip_mgetattrib_f(A,&a_atr);
    B=vsip_mcreate_f(a_atr.col_length,a_atr.row_length,VSIP_ROW,VSIP_MEM_NONE);
    w = vsip_vcreate_f(a_atr.row_length,VSIP_MEM_NONE);
    vsip_scalar_f beta = 2.0/vsip_vdot_f(v,v);
    vsip_vmprod_f(v,A,w);
    vsip_vouter_f(beta,v,w,B);
    vsip_msub_f(A,B,A);
    vsip_valldestroy_f(w);
    vsip_malldestroy_f(B);
} /* need to remove create */
void prodHouse_f(vsip_mview_f *A, vsip_vview_f *v)
{
    vsip_mattr_f a_atr; 
    vsip_vview_f *w;
    vsip_mview_f *B;
    vsip_mgetattrib_f(A,&a_atr);
    B=vsip_mcreate_f(a_atr.col_length,a_atr.row_length,VSIP_ROW,VSIP_MEM_NONE);
    w = vsip_vcreate_f(a_atr.col_length,VSIP_MEM_NONE);
    vsip_scalar_f beta = 2.0/vsip_vdot_f(v,v);
    vsip_mvprod_f(A,v,w);
    vsip_vouter_f(beta,w,v,B);
    vsip_msub_f(A,B,A);
    vsip_valldestroy_f(w);
    vsip_malldestroy_f(B);
} /* need to remove create */
int main() {
vsip_init((void*)0);{
    vsip_mview_f *A  = vsip_mcreate_f(NN,NN,VSIP_COL,0);
    vsip_vview_f *x0 = vsip_vcreate_f(NN,0);
    vsip_vview_f *b  = vsip_vcreate_f(NN,0);
    vsip_mview_f *X  = vsip_mcreate_f(NN,3,VSIP_ROW,0);
    vsip_mview_f *XT  = vsip_mcreate_f(NN,3,VSIP_COL,0);
    vsip_vramp_f(1,1,x0); vsip_vmul_f(x0,x0,x0);
    vsip_mput_f(A,0,0,-3); vsip_mput_f(A,0,1,7); vsip_mput_f(A,0,2,10); vsip_mput_f(A,0,3,12);
    vsip_mput_f(A,1,0,0); vsip_mput_f(A,1,1,13); vsip_mput_f(A,1,2,18); vsip_mput_f(A,1,3,6);
    vsip_mput_f(A,2,0,2); vsip_mput_f(A,2,1,-9); vsip_mput_f(A,2,2,6); vsip_mput_f(A,2,3,3);
    vsip_mput_f(A,3,0,1); vsip_mput_f(A,3,1,2); vsip_mput_f(A,3,2,3); vsip_mput_f(A,3,3,4);
    {int i,j; printf("A = [\n"); for(i=0; i<NN; i++){
                  for(j=0; j<NN; j++) printf("%9.2f%s",vsip_mget_f(A,i,j),(j == NN-1) ? "":",");
                  printf(";\n");
              }
              printf("]\n");
    }
    { int k; 
      vsip_vview_f *x;
      vsip_length L    = vsip_mgetrowlength_f(X);
      for(k=0; k<L; k++){
        x  = vsip_mcolview_f(X,k);
        vsip_mvprod_f(A,x0,b);
        vsip_vcopy_f_f(b,x);
        vsip_svmul_f(2.0,x0,x0);
        vsip_vdestroy_f(x);
      }
    {int i,j; printf("X = [\n");for(i=0; i<NN; i++){
                  for(j=0; j<3; j++) printf("%9.2f%s",vsip_mget_f(X,i,j),(j == 2) ? "":",");
                  printf(";\n");
              }
              printf("]\n");
    }
    {
      vsip_lu_f* luAop = vsip_lud_create_f(NN);
      vsip_mcopy_f_f(X,XT);
      if(luAop == NULL) exit(1);
      vsip_lud_f(luAop,A);
      {  vsip_lu_attr_f attr;
         vsip_lud_getattr_f(luAop,&attr);
         printf("lud size %lu\n",attr.n);
      }  
      vsip_lusol_f(luAop,0,X);
      vsip_lusol_f(luAop,1,XT);
      vsip_lud_destroy_f(luAop);
    }
    }
    {int i,j; printf("A\\X\n"); for(i=0; i<NN; i++){
                  for(j=0; j<3; j++) printf("%9.2f%s",vsip_mget_f(X,i,j),(j == 2) ? "":",");
                  printf(";\n");
              }
    }
    {int i,j; printf("A'\\X\n"); for(i=0; i<NN; i++){
                  for(j=0; j<3; j++) printf("%9.2f%s",vsip_mget_f(XT,i,j),(j == 2) ? "":",");
                  printf(";\n");
              }
    }
    {
       vsip_valldestroy_f(b);
       vsip_valldestroy_f(x0);
       vsip_malldestroy_f(X);
       vsip_malldestroy_f(A);
    }
    }vsip_finalize((void*)0);return 1;
}
int main(){vsip_init((void*)0);
{
    vsip_mview_f *Adummy  = vsip_mcreate_f(5*NN,5*NN,VSIP_COL,0);
    vsip_mview_f *A = vsip_msubview_f(Adummy,3,2,NN,NN);
    vsip_vview_f *x0 = vsip_vcreate_f(NN,0);
    vsip_mview_f *X  = vsip_mcreate_f(NN,3,VSIP_ROW,0);
    vsip_mview_f *XT  = vsip_mcreate_f(NN,3,VSIP_COL,0);
    vsip_mputrowstride_f(A,2*vsip_mgetrowstride_f(A));
    vsip_mputcolstride_f(A,3*vsip_mgetcolstride_f(A));

    /* matrix data */
    vsip_mput_f(A,0,0,0.5); vsip_mput_f(A,0,1,7); vsip_mput_f(A,0,2,10); vsip_mput_f(A,0,3,12);
    vsip_mput_f(A,0,4,-3); vsip_mput_f(A,0,5,0); vsip_mput_f(A,0,6,.05);

    vsip_mput_f(A,1,0,2); vsip_mput_f(A,1,1,13); vsip_mput_f(A,1,2,18); vsip_mput_f(A,1,3,6);
    vsip_mput_f(A,1,4,0); vsip_mput_f(A,1,5,130); vsip_mput_f(A,1,6,8);

    vsip_mput_f(A,2,0,3); vsip_mput_f(A,2,1,-9); vsip_mput_f(A,2,2,2); vsip_mput_f(A,2,3,3);
    vsip_mput_f(A,2,4,2); vsip_mput_f(A,2,5,-9); vsip_mput_f(A,2,6,6);

    vsip_mput_f(A,3,0,4); vsip_mput_f(A,3,1,2); vsip_mput_f(A,3,2,2); vsip_mput_f(A,3,3,4);
    vsip_mput_f(A,3,4,1); vsip_mput_f(A,3,5,2); vsip_mput_f(A,3,6,3);

    vsip_mput_f(A,4,0,.2); vsip_mput_f(A,4,1,2); vsip_mput_f(A,4,2,9); vsip_mput_f(A,4,3,4);
    vsip_mput_f(A,4,4,1); vsip_mput_f(A,4,5,2); vsip_mput_f(A,4,6,3);

    vsip_mput_f(A,5,0,.1); vsip_mput_f(A,5,1,2); vsip_mput_f(A,5,2,.3); vsip_mput_f(A,5,3,4);
    vsip_mput_f(A,5,4,1); vsip_mput_f(A,5,5,2); vsip_mput_f(A,5,6,3);

    vsip_mput_f(A,6,0,.01); vsip_mput_f(A,6,1,.2); vsip_mput_f(A,6,2,3); vsip_mput_f(A,6,3,4);
    vsip_mput_f(A,6,4,1); vsip_mput_f(A,6,5,2); vsip_mput_f(A,6,6,3);

    {int i,j; printf("A = [\n"); for(i=0; i<NN; i++){
                  for(j=0; j<NN; j++) printf("%9.2f%s",vsip_mget_f(A,i,j),(j == NN-1) ? "":",");
                  printf(";\n");
              }
              printf("]\n");
    }
    { /* were solving for NTRANS Ax = B */
      /* use a known X, calculate B using Ax */
      int k; 
      vsip_vview_f *x;
      vsip_mview_f *AT = vsip_mtransview_f(A);
      vsip_length L    = vsip_mgetrowlength_f(X);
      vsip_vramp_f(1,1,x0);
      for(k=0; k<L; k++){
        x  = vsip_mcolview_f(X,k);
        vsip_mvprod_f(A,x0,x);
        vsip_svmul_f(2.0,x0,x0);
        vsip_vdestroy_f(x);
      }
      vsip_vramp_f(1,1,x0);
      for(k=0; k<L; k++){
        x  = vsip_mcolview_f(XT,k);
        vsip_mvprod_f(AT,x0,x);
        vsip_svmul_f(2.0,x0,x0);
        vsip_vdestroy_f(x);
      }
      vsip_mdestroy_f(AT);
      printf("X = 1  2  4\n    2  4  8\n    3  6 12\n    4  8 16\n    5 10 20\n    6 12 24\n    7 14 28\n");
      { 
         int i,j; printf("B = [\n");for(i=0; i<NN; i++){
                    for(j=0; j<3; j++) printf("%9.2f%s",vsip_mget_f(X,i,j),(j == 2) ? "":",");
                    printf(";\n");
                }
                printf("]\n");
      }
      {
        /* then solve for B to see if we get X back */
        vsip_lu_f* luAop = vsip_lud_create_f(NN);
        if(luAop == NULL) exit(1);
        vsip_lud_f(luAop,A);
        {  vsip_lu_attr_f attr;
           vsip_lud_getattr_f(luAop,&attr);
           printf("lud size %lu\n",attr.n);
        }  
        vsip_lusol_f(luAop,VSIP_MAT_NTRANS,X);
        vsip_lusol_f(luAop,VSIP_MAT_TRANS,XT);
        vsip_lud_destroy_f(luAop);
      }
    }
    {int i,j; printf("A\\X\n"); for(i=0; i<NN; i++){
                  for(j=0; j<3; j++) printf("%9.2f%s",vsip_mget_f(X,i,j),(j == 2) ? "":",");
                  printf(";\n");
              }
    }
    {int i,j; printf("A'\\X\n"); for(i=0; i<NN; i++){
                  for(j=0; j<3; j++) printf("%9.2f%s",vsip_mget_f(XT,i,j),(j == 2) ? "":",");
                  printf(";\n");
              }
    }
    {
       vsip_malldestroy_f(XT);
       vsip_valldestroy_f(x0);
       vsip_malldestroy_f(X);
       vsip_mdestroy_f(A);
       vsip_malldestroy_f(Adummy);
    }
    } vsip_finalize((void*)0); return 1;
}
int main(){
   int init = vsip_init((void*)0);
   int i,j, cholsol_retval,chold_retval;
   double t0 = VU_ansi_c_clock(); /* for doing some timeing */
   vsip_cscalar_f czero = vsip_cmplx_f((vsip_scalar_f)0.0,(vsip_scalar_f)0.0);
   vsip_cmview_f *A  = vsip_cmcreate_f(N,N,VSIP_COL,0);
   vsip_cmview_f *RU  = vsip_cmcreate_f(N,N,VSIP_COL,0);
   vsip_cmview_f *RL  = vsip_cmcreate_f(N,N,VSIP_COL,0);
   vsip_cmview_f *XB  = vsip_cmcreate_f(N,M,VSIP_ROW,0);
   vsip_cchol_f* chol = vsip_cchold_create_f(UPORLO,N);   /* NOTE: UPORLO macro above main() */

   /* to make sure we have a valid Positive Symetric define */
   /* an upper triangular (RU) with positive pivots and     */
   /* zero below the main diagonal.                         */
   /* Then initialize RL with hermitian of RU               */
   /* finally create A as the matrix product of RL and RU   */

   /* Initialise matrix RU  */
   /* time this             */
   t0 = VU_ansi_c_clock();
   for (i=0; i<N; i++){
      for(j = i; j < N; j++){
         #ifdef OBNOXIOUS
         /* make up some reasonably obnoxious data                */
         vsip_scalar_f a = cos(1.5/((j+1)*(i+1)))+sqrt(i*j);
         vsip_scalar_f b = (i + j + 1) * cos(M_PI * a);
         #else
         /* the above was to obnoxious for bigger than about N = 10 */
         /* the following works for N > 100 */
         vsip_scalar_f a = 1; vsip_scalar_f b = 1; 
         #endif
         if(i == j) /* fill diagonal */
             vsip_cmput_f(RU,i,j, vsip_cmplx_f(sqrt(N) + sqrt(i),0));
         else { /* fill off diagonal */
                vsip_cmput_f(RU,i,j,vsip_cmplx_f(b,a)); 
                vsip_cmput_f(RU,j,i,czero); 
         }
      }
   }
   /* initialize RL */
   vsip_cmherm_f(RU,RL);
   #ifdef PRINT
      VU_cmprintm_f("7.4",RU);
      VU_cmprintm_f("7.4",RL);
   #endif
   printf("Matrix initialize for RU and RL = %f seconds\n",VU_ansi_c_clock() - t0);

   /* initialize A */
   /* this step will take a long time so time it */
   t0 = VU_ansi_c_clock();
   vsip_cmprod_f(RL,RU,A);
   #ifdef OBNOXIOUS
      for(i=0; i<N; i++){
         vsip_cvview_f *aview = vsip_cmrowview_f(A,i);
         vsip_cvrsdiv_f(aview,vsip_cmag_f(vsip_cvmeanval_f(aview)),aview);
         vsip_cvdestroy_f(aview);
      }
   #endif
   printf("Matrix multiply for initialization of A = %f seconds\n",VU_ansi_c_clock() - t0);

   /* print  A                                                      */
   /* we only want to do this if A is something reasonable to print */
   /* selected as an option in the make file                        */
   #ifdef PRINT
      printf("Matrix A =\n");
      VU_cmprintm_f("4.2",A);
      fflush(stdout);
   #endif

   /* initialise rhs                      */
   /* start out with XB = {1,2,3,...,M}   */
   /* calculate what B must be using A    */
   /* then solve to see if we get XB back */
   {  vsip_index i;
      vsip_vview_f *y = vsip_vcreate_f(vsip_cmgetcollength_f(A),VSIP_MEM_NONE);
      vsip_vview_f *x_r,*x_i;
      vsip_cvview_f *x;
      vsip_mview_f *A_r = vsip_mrealview_f(A),
                   *A_i = vsip_mimagview_f(A);
      /* time this */
      t0 = VU_ansi_c_clock();
      for(i=0; i<M; i++){
         vsip_vfill_f((vsip_scalar_f)i+1.0,y);
         x = vsip_cmcolview_f(XB,i);
         x_r = vsip_vrealview_f(x);
         x_i = vsip_vimagview_f(x);
         vsip_mvprod_f(A_r,y,x_r);
         vsip_mvprod_f(A_i,y,x_i);
         vsip_cvdestroy_f(x);
         vsip_vdestroy_f(x_r);
         vsip_vdestroy_f(x_i);
      }
      vsip_mdestroy_f(A_r);
      vsip_mdestroy_f(A_i);
      printf("Matrix init for B = %f seconds\n",VU_ansi_c_clock() - t0);
   }

   /* print  XB                                                      */
   /* we only want to do this if XB is something reasonable to print */
   /* selected as an option in the make file                         */
   #ifdef PRINT
      printf("Matrix B = \n");
      VU_cmprintm_f("7.4",XB);
      fflush(stdout);
   #endif

   if(chol != NULL){
      t0 = VU_ansi_c_clock(); /* we want to time the decomposition */
      chold_retval = vsip_cchold_f(chol,A);
      printf("time decomp %f\n",VU_ansi_c_clock() - t0);
      printf("decompostion returns %d\n",chold_retval);
   
      /* now do the solution */
      t0 = VU_ansi_c_clock(); /* we want to time the solution */
      cholsol_retval=vsip_ccholsol_f(chol,XB);
      printf("time solution %f\n",VU_ansi_c_clock() - t0);
      printf("cholsol returns %d\n",cholsol_retval);

      /* print  XB                                                      */
      /* we only want to do this if XB is something reasonable to print */
      /* selected as an option in the make file; otherwise              */
      /* we print a single row of XB if the matrix is to large since    */
      /* M is usally reasonable. Printed as a column vector             */
      #ifdef PRINT
         printf("Matrix X = \n");
         VU_cmprintm_f("7.4",XB);
         fflush(stdout);
      #else
         {  /* pick a row in the middle */
            vsip_cvview_f *x = vsip_cmrowview_f(XB,N/2);
            printf("This output sould be 1,2,...,M\n");
            VU_cvprintm_f("7.4",x);
            fflush(stdout);
            vsip_cvdestroy_f(x);
         }
      #endif
   } else {
         printf("failed to create cholesky object \n");
   }
   vsip_cmalldestroy_f(XB);
   vsip_cmalldestroy_f(A);
   vsip_cmalldestroy_f(RL);
   vsip_cmalldestroy_f(RU);
   vsip_cchold_destroy_f(chol);
   vsip_finalize((void*)0);
   return 1;
}