コード例 #1
0
void eblas_zaxpy(int N, const complex& a, const complex* x, int incx, complex* y, int incy)
{	
	#ifdef MKL_PROVIDES_BLAS
	cblas_zaxpy(N, &a, x, incx, y, incy);
	#else
	threadLaunch((N<100000) ? 1 : 0,
		eblas_zaxpy_sub, N, &a, x, incx, y, incy);
	#endif
}
コード例 #2
0
static int check_solution(PLASMA_enum uplo, PLASMA_enum trans, int N, int K,
                          PLASMA_Complex64_t alpha, PLASMA_Complex64_t *A, int LDA,
                          PLASMA_Complex64_t *B, int LDB,
                          double beta, PLASMA_Complex64_t *Cref, PLASMA_Complex64_t *Cplasma, int LDC)
{
    int info_solution;
    double Anorm, Bnorm, Cinitnorm, Cplasmanorm, Clapacknorm, Rnorm, result;
    double eps;
    PLASMA_Complex64_t beta_const;

    double *work = (double *)malloc(max(N, K)* sizeof(double));

    beta_const  = -1.0;
    Anorm       = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), 
                                      (trans == PlasmaNoTrans) ? N : K, 
                                      (trans == PlasmaNoTrans) ? K : N, A, LDA, work);
    Bnorm       = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), 
                                      (trans == PlasmaNoTrans) ? N : K, 
                                      (trans == PlasmaNoTrans) ? K : N, B, LDB, work);
    Cinitnorm   = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cref,    LDC, work);
    Cplasmanorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cplasma, LDC, work);

    cblas_zher2k(CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, 
                 N, K, CBLAS_SADDR(alpha), A, LDA, B, LDB, (beta), Cref, LDC);

    Clapacknorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cref, LDC, work);

    cblas_zaxpy(LDC*N, CBLAS_SADDR(beta_const), Cplasma, 1, Cref, 1);

    Rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cref, LDC, work);

    eps = LAPACKE_dlamch_work('e');
    
    printf("Rnorm %e, Anorm %e, Cinitnorm %e, Cplasmanorm %e, Clapacknorm %e\n",
           Rnorm, Anorm, Cinitnorm, Cplasmanorm, Clapacknorm);

    result = Rnorm / ((Anorm + Bnorm + Cinitnorm) * N * eps);
    printf("============\n");
    printf("Checking the norm of the difference against reference ZHER2K \n");
    printf("-- ||Cplasma - Clapack||_oo/((||A||_oo+||C||_oo).N.eps) = %e \n", result);

    if (  isnan(Rnorm) || isinf(Rnorm) || isnan(result) || isinf(result) || (result > 10.0) ) {
         printf("-- The solution is suspicious ! \n");
         info_solution = 1;
    }
    else {
         printf("-- The solution is CORRECT ! \n");
         info_solution= 0 ;
    }

    free(work);

    return info_solution;
}
コード例 #3
0
void HostVector<std::complex<double> >::AddScale(const BaseVector<std::complex<double> > &x, const std::complex<double> alpha) {

  assert(&x != NULL);

  const HostVector<std::complex<double> > *cast_x = dynamic_cast<const HostVector<std::complex<double> >*> (&x);

  assert(cast_x != NULL);
  assert(this->size_ == cast_x->size_);

  cblas_zaxpy(this->size_, &alpha, cast_x->vec_, 1, this->vec_, 1);

}
コード例 #4
0
ファイル: blas_complex_double.cpp プロジェクト: Sable/VeloCty
//TODO: the parameter alpha is not used. 
VrArrayPtrCF64 BlasComplexDouble::vec_add(int ndims, VrArrayPtrCF64 X,VrArrayPtrCF64 Y, const double  complex alpha, const int incX,const int incY ){
        int N=1;
        for(int i=0;i<ndims;i++){
                N*=VR_GET_DIMS_CF64(X)[i];
        }
	double alph[]={1,0};
		
	VrArrayPtrCF64 Y1=vec_copy(ndims,Y);
        cblas_zaxpy(N,alph,reinterpret_cast<double*>(VR_GET_DATA_CF64(X)),1,reinterpret_cast<double*>(VR_GET_DATA_CF64(Y1)),1);
	return Y1;
	

}
コード例 #5
0
ファイル: bl1_axpy.c プロジェクト: anaptyxis/libflame
void bl1_zaxpy( int n, dcomplex* alpha, dcomplex* x, int incx, dcomplex* y, int incy )
{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
	cblas_zaxpy( n,
	             alpha,
	             x, incx,
	             y, incy );
#else
	F77_zaxpy( &n,
	           alpha,
	           x, &incx,
	           y, &incy );
#endif
}
コード例 #6
0
ファイル: testing_zsymm.c プロジェクト: gpichon/eigenproblems
static int check_solution(PLASMA_enum side, PLASMA_enum uplo, int M, int N,
                   PLASMA_Complex64_t alpha, PLASMA_Complex64_t *A, int LDA,
                   PLASMA_Complex64_t *B, int LDB,
                   PLASMA_Complex64_t beta, PLASMA_Complex64_t *Cref, PLASMA_Complex64_t *Cplasma, int LDC)
{
    int info_solution, NrowA;
    double Anorm, Bnorm, Cinitnorm, Cplasmanorm, Clapacknorm, Rnorm;
    double eps;
    PLASMA_Complex64_t beta_const;
    double result;
    double *work = (double *)malloc(max(M, N)* sizeof(double));

    beta_const  = (PLASMA_Complex64_t)-1.0;

    NrowA = (side == PlasmaLeft) ? M : N;
    Anorm       = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), NrowA, NrowA, A,       LDA, work);
    Bnorm       = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M,     N,     B,       LDB, work);
    Cinitnorm   = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M,     N,     Cref,    LDC, work);
    Cplasmanorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M,     N,     Cplasma, LDC, work);

    cblas_zsymm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, M, N, CBLAS_SADDR(alpha), A, LDA, B, LDB, CBLAS_SADDR(beta), Cref, LDC);

    Clapacknorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cref, LDC, work);

    cblas_zaxpy(LDC * N, CBLAS_SADDR(beta_const), Cplasma, 1, Cref, 1);

    Rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cref, LDC, work);

    eps = LAPACKE_dlamch_work('e');

    printf("Rnorm %e, Anorm %e, Bnorm %e, Cinitnorm %e, Cplasmanorm %e, Clapacknorm %e\n",Rnorm,Anorm,Bnorm,Cinitnorm,Cplasmanorm,Clapacknorm);

    result = Rnorm / ((Anorm + Bnorm + Cinitnorm) * N * eps);

    printf("============\n");
    printf("Checking the norm of the difference against reference ZSYMM \n");
    printf("-- ||Cplasma - Clapack||_oo/((||A||_oo+||B||_oo+||C||_oo).N.eps) = %e \n", result );

    if ( isinf(Clapacknorm) || isinf(Cplasmanorm) || isnan(result) || isinf(result) || (result > 10.0) ) {
        printf("-- The solution is suspicious ! \n");
        info_solution = 1;
    }
    else {
        printf("-- The solution is CORRECT ! \n");
        info_solution= 0 ;
    }
    free(work);
    return info_solution;
}
コード例 #7
0
ファイル: ComplexMatrix.cpp プロジェクト: dhold/Matlab_code
void phi_axpy(const int N, const Complex *alpha, const Complex *X,
                 const int incX, Complex *Y, const int incY){
#ifndef NOBLAS
    #ifdef SINGLEPRECISION 
    cblas_caxpy(N,alpha,X,1,Y,1);
    #else
    cblas_zaxpy(N,alpha,X,1,Y,1);
    #endif
#else
    int i;
    for(i = 0; i < N; ++i){
        Y[i] = (*alpha)*X[i]+Y[i];
    }
#endif
}
コード例 #8
0
ファイル: blas_complex_double.cpp プロジェクト: Sable/VeloCty
VrArrayPtrCF64 BlasComplexDouble::vec_sub(int ndims, VrArrayPtrCF64 X, VrArrayPtrCF64 Y ,const double complex alpha, const int incX, const int incY){
        VrArrayPtrCF64 X1=X;
        /*if(alpha[0]!=1){
                VrArrayPtrCF64 X1=vrAllocArrayF64CM(ndims,0,(int*)VR_GET_DIMS_CF64(X));
                X1=BlasComplexDouble::scal_mult(ndims,X,(double*)alpha);

        }*/
	const double arr[]={-1,0};
	//double complex arr = -1 ;
	int N=1;
        for(int i=0;i<ndims;i++){
                N*=VR_GET_DIMS_CF64(X)[i];
        }
        //std::cout<<"N="<<N<<std::endl;
        //double alph[]={1,0};

        VrArrayPtrCF64 Y1=vec_copy(ndims,X);
        cblas_zaxpy(N,(arr),reinterpret_cast<double*>(VR_GET_DATA_CF64(Y)),1,reinterpret_cast<double*>(VR_GET_DATA_CF64(Y1)),1);
	//double complex alph = 1+0*I;	
        //VrArrayPtrCF64 Y1= scal_mult(Y->ndims,Y,arr);// vec_copy(ndims,X);
       // cblas_zaxpy(N,&alph,VR_GET_DATA_CF64(X),1,VR_GET_DATA_CF64(Y1),1);
        return Y1;//BlasComplexDouble::vec_add(ndims, Y,X1, arr);
}
コード例 #9
0
ファイル: core_zttqrt.c プロジェクト: joao-lima/plasma-kaapi
int CORE_zttqrt(int M, int N, int IB,
                PLASMA_Complex64_t *A1, int LDA1,
                PLASMA_Complex64_t *A2, int LDA2,
                PLASMA_Complex64_t *T, int LDT,
                PLASMA_Complex64_t *TAU, PLASMA_Complex64_t *WORK)
{
    static PLASMA_Complex64_t zone  = 1.0;
    static PLASMA_Complex64_t zzero = 0.0;
    static int                ione  = 1;

    PLASMA_Complex64_t alpha;
    int i, j, ii, sb, mi, ni;

    /* Check input arguments */
    if (M < 0) {
        coreblas_error(1, "Illegal value of M");
        return -1;
    }
    if (N < 0) {
        coreblas_error(2, "Illegal value of N");
        return -2;
    }
    if (IB < 0) {
        coreblas_error(3, "Illegal value of IB");
        return -3;
    }
    if ((LDA2 < max(1,M)) && (M > 0)) {
        coreblas_error(7, "Illegal value of LDA2");
        return -7;
    }

    /* Quick return */
    if ((M == 0) || (N == 0) || (IB == 0))
        return PLASMA_SUCCESS;

    for(ii = 0; ii < N; ii += IB) {
        sb = min(N-ii, IB);
        for(i = 0; i < sb; i++) {
            /*
             * Generate elementary reflector H( II*IB+I ) to annihilate
             * A( II*IB+I:mi, II*IB+I ).
             */
            mi = ii + i + 1;
            LAPACKE_zlarfg_work(mi+1, &A1[LDA1*(ii+i)+ii+i], &A2[LDA2*(ii+i)], ione, &TAU[ii+i]);

            if (sb-i-1>0) {
                /*
                 * Apply H( II*IB+I ) to A( II*IB+I:M, II*IB+I+1:II*IB+IB ) from the left.
                 */
                ni = sb-i-1;
                cblas_zcopy(
                    ni,
                    &A1[LDA1*(ii+i+1)+(ii+i)], LDA1,
                    WORK, 1);

#ifdef COMPLEX
                LAPACKE_zlacgv_work(ni, WORK, ione);
#endif
                cblas_zgemv(
                    CblasColMajor, (CBLAS_TRANSPOSE)PlasmaConjTrans,
                    mi, ni,
                    CBLAS_SADDR(zone), &A2[LDA2*(ii+i+1)], LDA2,
                    &A2[LDA2*(ii+i)], 1,
                    CBLAS_SADDR(zone), WORK, 1);
#ifdef COMPLEX
                LAPACKE_zlacgv_work(ni, WORK, ione);
#endif
                alpha = -conj(TAU[ii+i]);
                cblas_zaxpy(
                    ni, CBLAS_SADDR(alpha),
                    WORK, 1,
                    &A1[LDA1*(ii+i+1)+ii+i], LDA1);
#ifdef COMPLEX
                LAPACKE_zlacgv_work(ni, WORK, ione);
#endif
                cblas_zgerc(
                    CblasColMajor, mi, ni,
                    CBLAS_SADDR(alpha), &A2[LDA2*(ii+i)], 1,
                    WORK, 1,
                    &A2[LDA2*(ii+i+1)], LDA2);
            }

            /*
             * Calculate T.
             */

            if (i > 0 ) {

                cblas_zcopy(i, &A2[LDA2*(ii+i)+ii], 1, &WORK[ii], 1);
    
                cblas_ztrmv(
                    CblasColMajor, (CBLAS_UPLO)PlasmaUpper,
                    (CBLAS_TRANSPOSE)PlasmaConjTrans, (CBLAS_DIAG)PlasmaNonUnit,
                    i, &A2[LDA2*ii+ii], LDA2,
                    &WORK[ii], 1);
    
                alpha = -(TAU[ii+i]);
                for(j = 0; j < i; j++) {
                    WORK[ii+j] = alpha * WORK[ii+j];
                }
    
                if (ii > 0) {
                    cblas_zgemv(
                        CblasColMajor, (CBLAS_TRANSPOSE)PlasmaConjTrans, ii, i,
                        CBLAS_SADDR(alpha), &A2[LDA2*ii], LDA2,
                        &A2[LDA2*(ii+i)], 1,
                        CBLAS_SADDR(zzero), WORK, 1);
    
                    cblas_zaxpy(i, CBLAS_SADDR(zone), &WORK[ii], 1, WORK, 1);
                }
    
                cblas_zcopy(i, WORK, 1, &T[LDT*(ii+i)], 1);
    
                cblas_ztrmv(
                    CblasColMajor, (CBLAS_UPLO)PlasmaUpper,
                    (CBLAS_TRANSPOSE)PlasmaNoTrans, (CBLAS_DIAG)PlasmaNonUnit,
                    i, &T[LDT*ii], LDT,
                    &T[LDT*(ii+i)], 1);

            }

            T[LDT*(ii+i)+i] = TAU[ii+i];
        }

        /* Apply Q' to the rest of the matrix to the left  */
        if (N > ii+sb) {
            CORE_zttrfb(
                PlasmaLeft, PlasmaConjTrans,
                PlasmaForward, PlasmaColumnwise,
                sb, N-(ii+sb), ii+sb, N-(ii+sb), sb,
                &A1[LDA1*(ii+sb)+ii], LDA1,
                &A2[LDA2*(ii+sb)], LDA2,
                &A2[LDA2*ii], LDA2,
                &T[LDT*ii], LDT,
                WORK, sb);
        }
    }
    return PLASMA_SUCCESS;
}
コード例 #10
0
ファイル: axpy.hpp プロジェクト: KerekesDavid/mpqc
//
// Overloaded function for dispatching to
// * CBLAS backend, and
// * complex<double> value-type.
//
inline void axpy( const int n, const std::complex<double> a,
        const std::complex<double>* x, const int incx,
        std::complex<double>* y, const int incy ) {
    cblas_zaxpy( n, &a, x, incx, y, incy );
}
コード例 #11
0
ファイル: example_ein.c プロジェクト: liulei/prepare_uv
int main(int argc, char *argv[]) {

  

  int i, j, Nx, Ny, Nr, Nb;
  int seedn=54;
  double sigma;
  double a;
  double mse;
  double snr;
  double snr_out;
  double gamma=0.001;
  double aux1, aux2, aux3, aux4;
  complex double alpha;
  

  purify_image img, img_copy;
  purify_visibility_filetype filetype_vis;
  purify_image_filetype filetype_img;
  complex double *xinc;
  complex double *y0;
  complex double *y;
  complex double *noise;
  double *xout;
  double *w;
  double *error;
  complex double *xoutc;
  double *wdx;
  double *wdy;
  double *dummyr;
  complex double *dummyc;

  
  //parameters for the continuos Fourier Transform
  double *deconv;
  purify_sparsemat_row gmat;
  purify_visibility vis_test;
  purify_measurement_cparam param_m1;
  purify_measurement_cparam param_m2;
  complex double *fft_temp1;
  complex double *fft_temp2;
  void *datafwd[5];
  void *dataadj[5];
  fftw_plan planfwd;
  fftw_plan planadj;

  //Structures for sparsity operator
  sopt_wavelet_type *dict_types;
  sopt_wavelet_type *dict_types1;
  sopt_wavelet_type *dict_types2;
  sopt_sara_param param1;
  sopt_sara_param param2;
  sopt_sara_param param3;
  void *datas[1];
  void *datas1[1];
  void *datas2[1];

  //Structures for the opmization problems
  sopt_l1_sdmmparam param4;
  sopt_l1_rwparam param5;
  sopt_prox_tvparam param6;
  sopt_tv_sdmmparam param7;
  sopt_tv_rwparam param8;

  clock_t start, stop;
  double t = 0.0;
  double start1, stop1;
  int dimy, dimx;
  
  //Image dimension of the zero padded image
  //Dimensions should be power of 2
  dimx = 256;
  dimy = 256;

  //Define parameters
  filetype_vis = PURIFY_VISIBILITY_FILETYPE_PROFILE_VIS;
  filetype_img = PURIFY_IMAGE_FILETYPE_FITS;

  //Read coverage
  purify_visibility_readfile(&vis_test,
             "./data/images/Coverages/cont_sim4.vis",
             filetype_vis); 
  printf("Number of visibilities: %i \n\n", vis_test.nmeas);  

   

  // Input image.
  img.fov_x = 1.0 / 180.0 * PURIFY_PI;
  img.fov_y = 1.0 / 180.0 * PURIFY_PI;
  img.nx = 4;
  img.ny = 4;

  //Read input image
  purify_image_readfile(&img, "data/images/Einstein.fits", 1);
  printf("Image dimension: %i, %i \n\n", img.nx, img.ny); 
//  purify_image_writefile(&img, "data/test/Einstein_double.fits", filetype_img);

  param_m1.nmeas = vis_test.nmeas;
  param_m1.ny1 = dimy;
  param_m1.nx1 = dimx;
  param_m1.ofy = 2;
  param_m1.ofx = 2;
  param_m1.ky = 2;
  param_m1.kx = 2;

  param_m2.nmeas = vis_test.nmeas;
  param_m2.ny1 = dimy;
  param_m2.nx1 = dimx;
  param_m2.ofy = 2;
  param_m2.ofx = 2;
  param_m2.ky = 2;
  param_m2.kx = 2;

  Nb = 9;
  Nx=param_m2.ny1*param_m2.nx1;
  Nr=Nb*Nx;
  Ny=param_m2.nmeas;

  //Memory allocation for the different variables
  deconv = (double*)malloc((Nx) * sizeof(double));
  PURIFY_ERROR_MEM_ALLOC_CHECK(deconv);
  xinc = (complex double*)malloc((Nx) * sizeof(complex double));
  PURIFY_ERROR_MEM_ALLOC_CHECK(xinc);
  xout = (double*)malloc((Nx) * sizeof(double));
  PURIFY_ERROR_MEM_ALLOC_CHECK(xout);
  y = (complex double*)malloc((vis_test.nmeas) * sizeof(complex double));
  PURIFY_ERROR_MEM_ALLOC_CHECK(y);
  y0 = (complex double*)malloc((vis_test.nmeas) * sizeof(complex double));
  PURIFY_ERROR_MEM_ALLOC_CHECK(y0);
  noise = (complex double*)malloc((vis_test.nmeas) * sizeof(complex double));
  PURIFY_ERROR_MEM_ALLOC_CHECK(noise);
  w = (double*)malloc((Nr) * sizeof(double));
  PURIFY_ERROR_MEM_ALLOC_CHECK(w);
  error = (double*)malloc((Nx) * sizeof(double));
  PURIFY_ERROR_MEM_ALLOC_CHECK(error);
  xoutc = (complex double*)malloc((Nx) * sizeof(complex double));
  PURIFY_ERROR_MEM_ALLOC_CHECK(xoutc);

  wdx = (double*)malloc((Nx) * sizeof(double));
  PURIFY_ERROR_MEM_ALLOC_CHECK(wdx);
  wdy = (double*)malloc((Nx) * sizeof(double));
  PURIFY_ERROR_MEM_ALLOC_CHECK(wdy);

  dummyr = malloc(Nr * sizeof(double));
  PURIFY_ERROR_MEM_ALLOC_CHECK(dummyr);
  dummyc = malloc(Nr * sizeof(complex double));
  PURIFY_ERROR_MEM_ALLOC_CHECK(dummyc);

  for (i=0; i < Nx; i++){
    xinc[i] = 0.0 + 0.0*I;
  }

  for (i=0; i < img.nx; i++){
    for (j=0; j < img.ny; j++){
      xinc[i+j*param_m1.nx1] = img.pix[i+j*img.nx] + 0.0*I;
    }
  }
  
  //Initialize griding matrix
  assert((start = clock())!=-1);
  purify_measurement_init_cft(&gmat, deconv, vis_test.u, vis_test.v, &param_m1);
  stop = clock();
  t = (double) (stop-start)/CLOCKS_PER_SEC;
  printf("Time initalization: %f \n\n", t);

  for(i = 0; i < img.nx * img.ny; ++i){
    deconv[i] = 1.0;
  }
  
  //Memory allocation for the fft
  i = Nx*param_m1.ofy*param_m1.ofx;
  fft_temp1 = (complex double*)malloc((i) * sizeof(complex double));
  PURIFY_ERROR_MEM_ALLOC_CHECK(fft_temp1);
  fft_temp2 = (complex double*)malloc((i) * sizeof(complex double));
  PURIFY_ERROR_MEM_ALLOC_CHECK(fft_temp2);

  //FFT plan  
  planfwd = fftw_plan_dft_2d(param_m1.nx1*param_m1.ofx, param_m1.ny1*param_m1.ofy, 
              fft_temp1, fft_temp1, 
              FFTW_FORWARD, FFTW_MEASURE);

  planadj = fftw_plan_dft_2d(param_m1.nx1*param_m1.ofx, param_m1.ny1*param_m1.ofy, 
              fft_temp2, fft_temp2, 
              FFTW_BACKWARD, FFTW_MEASURE);


  datafwd[0] = (void*)&param_m1;
  datafwd[1] = (void*)deconv;
  datafwd[2] = (void*)&gmat;
  datafwd[3] = (void*)&planfwd;
  datafwd[4] = (void*)fft_temp1;

  dataadj[0] = (void*)&param_m2;
  dataadj[1] = (void*)deconv;
  dataadj[2] = (void*)&gmat;
  dataadj[3] = (void*)&planadj;
  dataadj[4] = (void*)fft_temp2;


  printf("FFT plan done \n\n");
  
  
  assert((start = clock())!=-1);
  purify_measurement_cftfwd((void*)y0, (void*)xinc, datafwd);
  stop = clock();
  t = (double) (stop-start)/CLOCKS_PER_SEC;
  printf("Time forward operator: %f \n\n", t);

  
  //Noise realization
  //Input snr
  snr = 30.0;
  a = cblas_dznrm2(Ny, (void*)y0, 1);
  sigma = a*pow(10.0,-(snr/20.0))/sqrt(Ny);

  FILE *fout = fopen("ein.uv", "w");
    
  for (i=0; i < Ny; i++) {
//      noise[i] = (sopt_ran_gasdev2(seedn) + sopt_ran_gasdev2(seedn)*I)*(sigma/sqrt(2));
      noise[i] = 0;
      y[i] = y0[i] + noise[i];

    fprintf(fout, "%14.5e%14.5e%14.5e%14.5e%14.5e%14.5e\n", 
            vis_test.u[i], vis_test.v[i], vis_test.w[i],
            creal(y[i]), cimag(y[i]), 1.0);
  }

  fclose(fout);

  //Rescaling the measurements

  aux4 = (double)Ny/(double)Nx;

  for (i=0; i < Ny; i++) {
    y[i] = y[i]/sqrt(aux4);
  }

  for (i=0; i < Nx; i++) {
    deconv[i] = deconv[i]/sqrt(aux4);
  }
  
  
  // Output image.
  img_copy.fov_x = 1.0 / 180.0 * PURIFY_PI;
  img_copy.fov_y = 1.0 / 180.0 * PURIFY_PI;
  img_copy.nx = param_m1.nx1;
  img_copy.ny = param_m1.ny1;

  for (i=0; i < Nx; i++){
    xoutc[i] = 0.0 + 0.0*I;
  }
  
  //Dirty image
  purify_measurement_cftadj((void*)xoutc, (void*)y, dataadj);

  for (i=0; i < Nx; i++) {
    xout[i] = creal(xoutc[i]);
  }

  aux1 = purify_utils_maxarray(xout, Nx);

  

   img_copy.pix = (double*)malloc((Nx) * sizeof(double));
  PURIFY_ERROR_MEM_ALLOC_CHECK(img_copy.pix);

  for (i=0; i < Nx; i++){
    img_copy.pix[i] = creal(xoutc[i]);
  }
  
  purify_image_writefile(&img_copy, "eindirty.fits", filetype_img);
  
  return 0;
  
  //SARA structure initialization

    param1.ndict = Nb;
    param1.real = 0;

    dict_types = malloc(param1.ndict * sizeof(sopt_wavelet_type));
    PURIFY_ERROR_MEM_ALLOC_CHECK(dict_types);


    dict_types[0] = SOPT_WAVELET_DB1;
    dict_types[1] = SOPT_WAVELET_DB2;
    dict_types[2] = SOPT_WAVELET_DB3;
    dict_types[3] = SOPT_WAVELET_DB4;
    dict_types[4] = SOPT_WAVELET_DB5;
    dict_types[5] = SOPT_WAVELET_DB6;
    dict_types[6] = SOPT_WAVELET_DB7;
    dict_types[7] = SOPT_WAVELET_DB8;
    dict_types[8] = SOPT_WAVELET_Dirac;
    

    sopt_sara_initop(&param1, param_m1.ny1, param_m1.nx1, 4, dict_types);

    datas[0] = (void*)&param1;

    //Db8 structure initialization

    param2.ndict = 1;
    param2.real = 0;

    dict_types1 = malloc(param2.ndict * sizeof(sopt_wavelet_type));
    PURIFY_ERROR_MEM_ALLOC_CHECK(dict_types1);

    dict_types1[0] = SOPT_WAVELET_DB8;
    
    sopt_sara_initop(&param2, param_m1.ny1, param_m1.nx1, 4, dict_types1);

  datas1[0] = (void*)&param2;

  //Dirac structure initialization

    param3.ndict = 1;
    param3.real = 0;

    dict_types2 = malloc(param3.ndict * sizeof(sopt_wavelet_type));
    PURIFY_ERROR_MEM_ALLOC_CHECK(dict_types2);

    dict_types2[0] = SOPT_WAVELET_Dirac;
    
    sopt_sara_initop(&param3, param_m1.ny1, param_m1.nx1, 4, dict_types2);

  datas2[0] = (void*)&param3;

  //Scaling constants in the diferent representation domains

  sopt_sara_analysisop((void*)dummyc, (void*)xoutc, datas);

  for (i=0; i < Nr; i++) {
    dummyr[i] = creal(dummyc[i]);
  }

  aux2 = purify_utils_maxarray(dummyr, Nr);

  sopt_sara_analysisop((void*)dummyc, (void*)xoutc, datas1);

  for (i=0; i < Nr; i++) {
    dummyr[i] = creal(dummyc[i]);
  }

  aux3 = purify_utils_maxarray(dummyr, Nx);

  


  // Output image.
  img_copy.fov_x = 1.0 / 180.0 * PURIFY_PI;
  img_copy.fov_y = 1.0 / 180.0 * PURIFY_PI;
  img_copy.nx = param_m1.nx1;
  img_copy.ny = param_m1.ny1;



  //Initial solution and weights
  for (i=0; i < Nx; i++) {
      xoutc[i] = 0.0 + 0.0*I;
      wdx[i] = 1.0;
      wdy[i] = 1.0;
  }
  for (i=0; i < Nr; i++){
    w[i] = 1.0;
  }
  //Copy true image in xout
  for (i=0; i < Nx; i++) {
    xout[i] = creal(xinc[i]);
  }



  printf("**********************\n");
  printf("BPSA reconstruction\n");
  printf("**********************\n");

  
    
  //Structure for the L1 solver      
  param4.verbose = 2;
  param4.max_iter = 300;
  param4.gamma = gamma*aux2;
  param4.rel_obj = 0.001;
  param4.epsilon = sqrt(Ny + 2*sqrt(Ny))*sigma/sqrt(aux4);
  param4.epsilon_tol = 0.01;
  param4.real_data = 0;
  param4.cg_max_iter = 100;
  param4.cg_tol = 0.000001;

  
  //Initial solution
  for (i=0; i < Nx; i++) {
      xoutc[i] = 0.0 + 0.0*I;
  }
 
  #ifdef _OPENMP 
    start1 = omp_get_wtime();
  #else
    assert((start = clock())!=-1);
  #endif
  sopt_l1_sdmm((void*)xoutc, Nx,
                   &purify_measurement_cftfwd,
                   datafwd,
                   &purify_measurement_cftadj,
                   dataadj,
                   &sopt_sara_synthesisop,
                   datas,
                   &sopt_sara_analysisop,
                   datas,
                   Nr,
                   (void*)y, Ny, w, param4);

  #ifdef _OPENMP 
    stop1 = omp_get_wtime();
    t = stop1 - start1;
  #else
    stop = clock();
    t = (double) (stop-start)/CLOCKS_PER_SEC;
  #endif

  printf("Time BPSA: %f \n\n", t); 

  
    //SNR
    for (i=0; i < Nx; i++) {
        error[i] = creal(xoutc[i])-xout[i];
    }
    mse = cblas_dnrm2(Nx, error, 1);
    a = cblas_dnrm2(Nx, xout, 1);
    snr_out = 20.0*log10(a/mse);
    printf("SNR: %f dB\n\n", snr_out);


    
  for (i=0; i < Nx; i++){
    img_copy.pix[i] = creal(xoutc[i]);
  }
  
  purify_image_writefile(&img_copy, "./data/test/einbpsa.fits", filetype_img);

  //Residual image

  purify_measurement_cftfwd((void*)y0, (void*)xoutc, datafwd);
  alpha = -1.0 +0.0*I;
  cblas_zaxpy(Ny, (void*)&alpha, y, 1, y0, 1);
  purify_measurement_cftadj((void*)xinc, (void*)y0, dataadj);

  for (i=0; i < Nx; i++){
    img_copy.pix[i] = creal(xinc[i]);
  }
  
  purify_image_writefile(&img_copy, "data/test/einbpsares.fits", filetype_img);
  
  //Error image
  for (i=0; i < Nx; i++){
    img_copy.pix[i] = error[i];
  }
  
  purify_image_writefile(&img_copy, "data/test/einbpsaerror.fits", filetype_img);
  


  printf("**********************\n");
  printf("SARA reconstruction\n");
  printf("**********************\n");

  

  //Structure for the RWL1 solver    
  param5.verbose = 2;
  param5.max_iter = 5;
  param5.rel_var = 0.001;
  param5.sigma = sigma*sqrt((double)Ny/(double)Nr);
  param5.init_sol = 1;

  
  #ifdef _OPENMP 
    start1 = omp_get_wtime();
  #else
    assert((start = clock())!=-1);
  #endif
  sopt_l1_rwsdmm((void*)xoutc, Nx,
                   &purify_measurement_cftfwd,
                   datafwd,
                   &purify_measurement_cftadj,
                   dataadj,
                   &sopt_sara_synthesisop,
                   datas,
                   &sopt_sara_analysisop,
                   datas,
                   Nr,
                   (void*)y, Ny, param4, param5);

  #ifdef _OPENMP 
    stop1 = omp_get_wtime();
    t = stop1 - start1;
  #else
    stop = clock();
    t = (double) (stop-start)/CLOCKS_PER_SEC;
  #endif

  printf("Time SARA: %f \n\n", t); 

  
    //SNR
    for (i=0; i < Nx; i++) {
        error[i] = creal(xoutc[i])-xout[i];
    }
    mse = cblas_dnrm2(Nx, error, 1);
    a = cblas_dnrm2(Nx, xout, 1);
    snr_out = 20.0*log10(a/mse);
    printf("SNR: %f dB\n\n", snr_out);

  for (i=0; i < Nx; i++){
    img_copy.pix[i] = creal(xoutc[i]);
  }
  
  purify_image_writefile(&img_copy, "data/test/einsara.fits", filetype_img);

  

   //Residual image

  purify_measurement_cftfwd((void*)y0, (void*)xoutc, datafwd);
  alpha = -1.0 +0.0*I;
  cblas_zaxpy(Ny, (void*)&alpha, y, 1, y0, 1);
  purify_measurement_cftadj((void*)xinc, (void*)y0, dataadj);

  for (i=0; i < Nx; i++){
    img_copy.pix[i] = creal(xinc[i]);
  }
  
  purify_image_writefile(&img_copy, "data/test/einsarares.fits", filetype_img);

  //Error image
  for (i=0; i < Nx; i++){
    img_copy.pix[i] = error[i];
  }
  
  purify_image_writefile(&img_copy, "data/test/einsaraerror.fits", filetype_img);

  
    printf("**********************\n");
    printf("TV reconstruction\n");
    printf("**********************\n");

    
    //Structure for the TV prox
    param6.verbose = 1;
    param6.max_iter = 50;
    param6.rel_obj = 0.0001;

    
    //Structure for the TV solver    
    param7.verbose = 2;
    param7.max_iter = 300;
    param7.gamma = gamma*aux1;
    param7.rel_obj = 0.001;
    param7.epsilon = sqrt(Ny + 2*sqrt(Ny))*sigma/sqrt(aux4);
    param7.epsilon_tol = 0.01;
    param7.real_data = 0;
    param7.cg_max_iter = 100;
    param7.cg_tol = 0.000001;
    param7.paramtv = param6;

    //Initial solution and weights
    for (i=0; i < Nx; i++) {
        xoutc[i] = 0.0 + 0.0*I;
        wdx[i] = 1.0;
        wdy[i] = 1.0;
    }

    
    assert((start = clock())!=-1);
    sopt_tv_sdmm((void*)xoutc, dimx, dimy,
                   &purify_measurement_cftfwd,
                   datafwd,
                   &purify_measurement_cftadj,
                   dataadj,
                   (void*)y, Ny, wdx, wdy, param7);
    stop = clock();
    t = (double) (stop-start)/CLOCKS_PER_SEC;
    
    printf("Time TV: %f \n\n", t); 

    
   //SNR
    for (i=0; i < Nx; i++) {
        error[i] = creal(xoutc[i])-xout[i];
    }
    mse = cblas_dnrm2(Nx, error, 1);
    a = cblas_dnrm2(Nx, xout, 1);
    snr_out = 20.0*log10(a/mse);
    printf("SNR: %f dB\n\n", snr_out);

  for (i=0; i < Nx; i++){
    img_copy.pix[i] = creal(xoutc[i]);
  }
  
  purify_image_writefile(&img_copy, "data/test/eintv.fits", filetype_img);

   //Residual image

  purify_measurement_cftfwd((void*)y0, (void*)xoutc, datafwd);
  alpha = -1.0 +0.0*I;
  cblas_zaxpy(Ny, (void*)&alpha, y, 1, y0, 1);
  purify_measurement_cftadj((void*)xinc, (void*)y0, dataadj);

  for (i=0; i < Nx; i++){
    img_copy.pix[i] = creal(xinc[i]);
  }
  
  purify_image_writefile(&img_copy, "data/test/eintvres.fits", filetype_img);

  //Error image
  for (i=0; i < Nx; i++){
    img_copy.pix[i] = error[i];
  }
  
  purify_image_writefile(&img_copy, "data/test/eintverror.fits", filetype_img);

  printf("**********************\n");
  printf("RWTV reconstruction\n");
  printf("**********************\n");

  

  //Structure for the RWTV solver    
  param8.verbose = 2;
  param8.max_iter = 5;
  param8.rel_var = 0.001;
  param8.sigma = sigma*sqrt(Ny/(2*Nx));
  param8.init_sol = 1;

  
  assert((start = clock())!=-1);
    sopt_tv_rwsdmm((void*)xoutc, dimx, dimy,
                   &purify_measurement_cftfwd,
                   datafwd,
                   &purify_measurement_cftadj,
                   dataadj,
                   (void*)y, Ny, param7, param8);
    stop = clock();
    t = (double) (stop-start)/CLOCKS_PER_SEC;

  printf("Time RWTV: %f \n\n", t); 

  

    //SNR
    for (i=0; i < Nx; i++) {
        error[i] = creal(xoutc[i])-xout[i];
    }
    mse = cblas_dnrm2(Nx, error, 1);
    a = cblas_dnrm2(Nx, xout, 1);
    snr_out = 20.0*log10(a/mse);
    printf("SNR: %f dB\n\n", snr_out);

  for (i=0; i < Nx; i++){
    img_copy.pix[i] = creal(xoutc[i]);
  }
  
  purify_image_writefile(&img_copy, "data/test/einrwtv.fits", filetype_img);

   //Residual image

  purify_measurement_cftfwd((void*)y0, (void*)xoutc, datafwd);
  alpha = -1.0 +0.0*I;
  cblas_zaxpy(Ny, (void*)&alpha, y, 1, y0, 1);
  purify_measurement_cftadj((void*)xinc, (void*)y0, dataadj);

  for (i=0; i < Nx; i++){
    img_copy.pix[i] = creal(xinc[i]);
  }
  
  purify_image_writefile(&img_copy, "data/test/einrwtvres.fits", filetype_img);

  //Error image
  for (i=0; i < Nx; i++){
    img_copy.pix[i] = error[i];
  }
  
  purify_image_writefile(&img_copy, "data/test/einrwtverror.fits", filetype_img);

  printf("**********************\n");
  printf("Db8 reconstruction\n");
  printf("**********************\n");

  //Initial solution
  for (i=0; i < Nx; i++) {
        xoutc[i] = 0.0 + 0.0*I;
  }
  
  param4.gamma = gamma*aux3;

  assert((start = clock())!=-1);
   sopt_l1_sdmm((void*)xoutc, Nx,
                   &purify_measurement_cftfwd,
                   datafwd,
                   &purify_measurement_cftadj,
                   dataadj,
                   &sopt_sara_synthesisop,
                   datas1,
                   &sopt_sara_analysisop,
                   datas1,
                   Nx,
                   (void*)y, Ny, w, param4);

  stop = clock();
  t = (double) (stop-start)/CLOCKS_PER_SEC;

  printf("Time BPDb8: %f \n\n", t); 

  //SNR
  for (i=0; i < Nx; i++) {
        error[i] = creal(xoutc[i])-xout[i];
    }
    mse = cblas_dnrm2(Nx, error, 1);
    a = cblas_dnrm2(Nx, xout, 1);
    snr_out = 20.0*log10(a/mse);
    printf("SNR: %f dB\n\n", snr_out);

  for (i=0; i < Nx; i++){
    img_copy.pix[i] = creal(xoutc[i]);
  }
  
  purify_image_writefile(&img_copy, "data/test/eindb8.fits", filetype_img);

   //Residual image

  purify_measurement_cftfwd((void*)y0, (void*)xoutc, datafwd);
  alpha = -1.0 +0.0*I;
  cblas_zaxpy(Ny, (void*)&alpha, y, 1, y0, 1);
  purify_measurement_cftadj((void*)xinc, (void*)y0, dataadj);

  for (i=0; i < Nx; i++){
    img_copy.pix[i] = creal(xinc[i]);
  }
  
  purify_image_writefile(&img_copy, "data/test/eindb8res.fits", filetype_img);

  //Error image
  for (i=0; i < Nx; i++){
    img_copy.pix[i] = error[i];
  }
  
  purify_image_writefile(&img_copy, "data/test/eindb8error.fits", filetype_img);

  printf("**********************\n");
  printf("RWBPDb8 reconstruction\n");
  printf("**********************\n");

  

  //Structure for the RWL1 solver    
  param5.verbose = 2;
  param5.max_iter = 5;
  param5.rel_var = 0.001;
  param5.sigma = sigma*sqrt((double)Ny/(double)Nx);
  param5.init_sol = 1;

  
  assert((start = clock())!=-1);
  sopt_l1_rwsdmm((void*)xoutc, Nx,
                   &purify_measurement_cftfwd,
                   datafwd,
                   &purify_measurement_cftadj,
                   dataadj,
                   &sopt_sara_synthesisop,
                   datas1,
                   &sopt_sara_analysisop,
                   datas1,
                   Nx,
                   (void*)y, Ny, param4, param5);

  stop = clock();
  t = (double) (stop-start)/CLOCKS_PER_SEC;

  printf("Time RWBPDb8: %f \n\n", t); 

  

    //SNR
    for (i=0; i < Nx; i++) {
        error[i] = creal(xoutc[i])-xout[i];
    }
    mse = cblas_dnrm2(Nx, error, 1);
    a = cblas_dnrm2(Nx, xout, 1);
    snr_out = 20.0*log10(a/mse);
    printf("SNR: %f dB\n\n", snr_out);

  for (i=0; i < Nx; i++){
    img_copy.pix[i] = creal(xoutc[i]);
  }
  
  purify_image_writefile(&img_copy, "data/test/einrwdb8.fits", filetype_img);

   //Residual image

  purify_measurement_cftfwd((void*)y0, (void*)xoutc, datafwd);
  alpha = -1.0 +0.0*I;
  cblas_zaxpy(Ny, (void*)&alpha, y, 1, y0, 1);
  purify_measurement_cftadj((void*)xinc, (void*)y0, dataadj);

  for (i=0; i < Nx; i++){
    img_copy.pix[i] = creal(xinc[i]);
  }
  
  purify_image_writefile(&img_copy, "data/test/einrwdb8res.fits", filetype_img);

  //Error image
  for (i=0; i < Nx; i++){
    img_copy.pix[i] = error[i];
  }
  
  purify_image_writefile(&img_copy, "data/test/einrwdb8error.fits", filetype_img);



  printf("**********************\n");
  printf("BP reconstruction\n");
  printf("**********************\n");

  param4.gamma = gamma*aux1;

  //Initial solution
  for (i=0; i < Nx; i++) {
      xoutc[i] = 0.0 + 0.0*I;
  }

  assert((start = clock())!=-1);
  sopt_l1_sdmm((void*)xoutc, Nx,
                   &purify_measurement_cftfwd,
                   datafwd,
                   &purify_measurement_cftadj,
                   dataadj,
                   &sopt_sara_synthesisop,
                   datas2,
                   &sopt_sara_analysisop,
                   datas2,
                   Nx,
                   (void*)y, Ny, w, param4); 

  stop = clock();
  t = (double) (stop-start)/CLOCKS_PER_SEC;

  printf("Time BP: %f \n\n", t); 

  //SNR
  for (i=0; i < Nx; i++) {
        error[i] = creal(xoutc[i])-xout[i];
    }
    mse = cblas_dnrm2(Nx, error, 1);
    a = cblas_dnrm2(Nx, xout, 1);
    snr_out = 20.0*log10(a/mse);
    printf("SNR: %f dB\n\n", snr_out);

  for (i=0; i < Nx; i++){
    img_copy.pix[i] = creal(xoutc[i]);
  }
  
  purify_image_writefile(&img_copy, "data/test/einbp.fits", filetype_img);

   //Residual image

  purify_measurement_cftfwd((void*)y0, (void*)xoutc, datafwd);
  alpha = -1.0 +0.0*I;
  cblas_zaxpy(Ny, (void*)&alpha, y, 1, y0, 1);
  purify_measurement_cftadj((void*)xinc, (void*)y0, dataadj);

  for (i=0; i < Nx; i++){
    img_copy.pix[i] = creal(xinc[i]);
  }
  
  purify_image_writefile(&img_copy, "data/test/einbpres.fits", filetype_img);

  //Error image
  for (i=0; i < Nx; i++){
    img_copy.pix[i] = error[i];
  }
  
  purify_image_writefile(&img_copy, "data/test/einbperror.fits", filetype_img);

  printf("**********************\n");
  printf("RWBP reconstruction\n");
  printf("**********************\n");

  

  //Structure for the RWL1 solver    
  param5.verbose = 2;
  param5.max_iter = 5;
  param5.rel_var = 0.001;
  param5.sigma = sigma*sqrt((double)Ny/(double)Nx);
  param5.init_sol = 1;

  
  assert((start = clock())!=-1);
  sopt_l1_rwsdmm((void*)xoutc, Nx,
                   &purify_measurement_cftfwd,
                   datafwd,
                   &purify_measurement_cftadj,
                   dataadj,
                   &sopt_sara_synthesisop,
                   datas2,
                   &sopt_sara_analysisop,
                   datas2,
                   Nx,
                   (void*)y, Ny, param4, param5);

  stop = clock();
  t = (double) (stop-start)/CLOCKS_PER_SEC;

  printf("Time RWBP: %f \n\n", t); 

  

    //SNR
    for (i=0; i < Nx; i++) {
        error[i] = creal(xoutc[i])-xout[i];
    }
    mse = cblas_dnrm2(Nx, error, 1);
    a = cblas_dnrm2(Nx, xout, 1);
    snr_out = 20.0*log10(a/mse);
    printf("SNR: %f dB\n\n", snr_out);

  for (i=0; i < Nx; i++){
    img_copy.pix[i] = creal(xoutc[i]);
  }
  
  purify_image_writefile(&img_copy, "data/test/einrwbp.fits", filetype_img);

   //Residual image

  purify_measurement_cftfwd((void*)y0, (void*)xoutc, datafwd);
  alpha = -1.0 +0.0*I;
  cblas_zaxpy(Ny, (void*)&alpha, y, 1, y0, 1);
  purify_measurement_cftadj((void*)xinc, (void*)y0, dataadj);

  for (i=0; i < Nx; i++){
    img_copy.pix[i] = creal(xinc[i]);
  }
  
  purify_image_writefile(&img_copy, "data/test/einrwbpres.fits", filetype_img);

  //Error image
  for (i=0; i < Nx; i++){
    img_copy.pix[i] = error[i];
  }
  
  purify_image_writefile(&img_copy, "data/test/einrwbperror.fits", filetype_img);


  
  
  //Free all memory
  purify_image_free(&img);
  purify_image_free(&img_copy);
  free(deconv);
  purify_visibility_free(&vis_test);
  free(y);
  free(xinc);
  free(xout);
  free(w);
  free(noise);
  free(y0);
  free(error);
  free(xoutc);
  free(wdx);
  free(wdy);

  sopt_sara_free(&param1);
  sopt_sara_free(&param2);
  sopt_sara_free(&param3);
  free(dict_types);
  free(dict_types1);
  free(dict_types2);

  free(fft_temp1);
  free(fft_temp2);
  fftw_destroy_plan(planfwd);
  fftw_destroy_plan(planadj);
  purify_sparsemat_freer(&gmat);

  free(dummyr);
  free(dummyc);


  return 0;

}
コード例 #12
0
ファイル: _dotblas.c プロジェクト: jadolfbr/Pymol-script-repo
static PyObject *
dotblas_matrixproduct(PyObject *dummy, PyObject *args)
{
    PyObject *op1, *op2;
    PyArrayObject *ap1=NULL, *ap2=NULL, *ret=NULL;
    int j, l, lda, ldb, ldc;
    int typenum, nd;
    intp ap1stride=0;
    intp dimensions[MAX_DIMS];
    intp numbytes;
    static const float oneF[2] = {1.0, 0.0};
    static const float zeroF[2] = {0.0, 0.0};
    static const double oneD[2] = {1.0, 0.0};
    static const double zeroD[2] = {0.0, 0.0};
    double prior1, prior2;
    PyTypeObject *subtype;
    PyArray_Descr *dtype;
    MatrixShape ap1shape, ap2shape;

    if (!PyArg_ParseTuple(args, "OO", &op1, &op2)) return NULL;

    /*
     * "Matrix product" using the BLAS.
     * Only works for float double and complex types.
     */

    typenum = PyArray_ObjectType(op1, 0);
    typenum = PyArray_ObjectType(op2, typenum);

    /* This function doesn't handle other types */
    if ((typenum != PyArray_DOUBLE && typenum != PyArray_CDOUBLE &&
            typenum != PyArray_FLOAT && typenum != PyArray_CFLOAT)) {
        return PyArray_Return((PyArrayObject *)PyArray_MatrixProduct(op1, op2));
    }

    dtype = PyArray_DescrFromType(typenum);
    ap1 = (PyArrayObject *)PyArray_FromAny(op1, dtype, 0, 0, ALIGNED, NULL);
    if (ap1 == NULL) return NULL;
    Py_INCREF(dtype);
    ap2 = (PyArrayObject *)PyArray_FromAny(op2, dtype, 0, 0, ALIGNED, NULL);
    if (ap2 == NULL) goto fail;


    if ((ap1->nd > 2) || (ap2->nd > 2)) {
        /* This function doesn't handle dimensions greater than 2
           (or negative striding)  -- other
           than to ensure the dot function is altered
        */
        if (!altered) {
            /* need to alter dot product */
            PyObject *tmp1, *tmp2;
            tmp1 = PyTuple_New(0);
            tmp2 = dotblas_alterdot(NULL, tmp1);
            Py_DECREF(tmp1);
            Py_DECREF(tmp2);
        }
        ret = (PyArrayObject *)PyArray_MatrixProduct((PyObject *)ap1,
                (PyObject *)ap2);
        Py_DECREF(ap1);
        Py_DECREF(ap2);
        return PyArray_Return(ret);
    }

    if (_bad_strides(ap1)) {
        op1 = PyArray_NewCopy(ap1, PyArray_ANYORDER);
        Py_DECREF(ap1);
        ap1 = (PyArrayObject *)op1;
        if (ap1 == NULL) goto fail;
    }
    if (_bad_strides(ap2)) {
        op2 = PyArray_NewCopy(ap2, PyArray_ANYORDER);
        Py_DECREF(ap2);
        ap2 = (PyArrayObject *)op2;
        if (ap2 == NULL) goto fail;
    }
    ap1shape = _select_matrix_shape(ap1);
    ap2shape = _select_matrix_shape(ap2);

    if (ap1shape == _scalar || ap2shape == _scalar) {
        PyArrayObject *oap1, *oap2;
        oap1 = ap1;
        oap2 = ap2;
        /* One of ap1 or ap2 is a scalar */
        if (ap1shape == _scalar) { 		/* Make ap2 the scalar */
            PyArrayObject *t = ap1;
            ap1 = ap2;
            ap2 = t;
            ap1shape = ap2shape;
            ap2shape = _scalar;
        }

        if (ap1shape == _row) ap1stride = ap1->strides[1];
        else if (ap1->nd > 0) ap1stride = ap1->strides[0];

        if (ap1->nd == 0 || ap2->nd == 0) {
            intp *thisdims;
            if (ap1->nd == 0) {
                nd = ap2->nd;
                thisdims = ap2->dimensions;
            }
            else {
                nd = ap1->nd;
                thisdims = ap1->dimensions;
            }
            l = 1;
            for (j=0; j<nd; j++) {
                dimensions[j] = thisdims[j];
                l *= dimensions[j];
            }
        }
        else {
            l = oap1->dimensions[oap1->nd-1];

            if (oap2->dimensions[0] != l) {
                PyErr_SetString(PyExc_ValueError, "matrices are not aligned");
                goto fail;
            }
            nd = ap1->nd + ap2->nd - 2;
            /* nd = 0 or 1 or 2 */
            /* If nd == 0 do nothing ... */
            if (nd == 1) {
                /* Either ap1->nd is 1 dim or ap2->nd is 1 dim
                   and the other is 2-dim */
                dimensions[0] = (oap1->nd == 2) ? oap1->dimensions[0] : oap2->dimensions[1];
                l = dimensions[0];
                /* Fix it so that dot(shape=(N,1), shape=(1,))
                   and dot(shape=(1,), shape=(1,N)) both return
                   an (N,) array (but use the fast scalar code)
                */
            }
            else if (nd == 2) {
                dimensions[0] = oap1->dimensions[0];
                dimensions[1] = oap2->dimensions[1];
                /* We need to make sure that dot(shape=(1,1), shape=(1,N))
                   and dot(shape=(N,1),shape=(1,1)) uses
                   scalar multiplication appropriately
                */
                if (ap1shape == _row) l = dimensions[1];
                else l = dimensions[0];
            }
        }
    }
    else { /* (ap1->nd <= 2 && ap2->nd <= 2) */
        /*  Both ap1 and ap2 are vectors or matrices */
        l = ap1->dimensions[ap1->nd-1];

        if (ap2->dimensions[0] != l) {
            PyErr_SetString(PyExc_ValueError, "matrices are not aligned");
            goto fail;
        }
        nd = ap1->nd+ap2->nd-2;

        if (nd == 1)
            dimensions[0] = (ap1->nd == 2) ? ap1->dimensions[0] : ap2->dimensions[1];
        else if (nd == 2) {
            dimensions[0] = ap1->dimensions[0];
            dimensions[1] = ap2->dimensions[1];
        }
    }

    /* Choose which subtype to return */
    if (ap1->ob_type != ap2->ob_type) {
        prior2 = PyArray_GetPriority((PyObject *)ap2, 0.0);
        prior1 = PyArray_GetPriority((PyObject *)ap1, 0.0);
        subtype = (prior2 > prior1 ? ap2->ob_type : ap1->ob_type);
    }
    else {
        prior1 = prior2 = 0.0;
        subtype = ap1->ob_type;
    }

    ret = (PyArrayObject *)PyArray_New(subtype, nd, dimensions,
                                       typenum, NULL, NULL, 0, 0,
                                       (PyObject *)
                                       (prior2 > prior1 ? ap2 : ap1));

    if (ret == NULL) goto fail;
    numbytes = PyArray_NBYTES(ret);
    memset(ret->data, 0, numbytes);
    if (numbytes==0 || l == 0) {
        Py_DECREF(ap1);
        Py_DECREF(ap2);
        return PyArray_Return(ret);
    }


    if (ap2shape == _scalar) {
        /* Multiplication by a scalar -- Level 1 BLAS */
        /* if ap1shape is a matrix and we are not contiguous, then we can't
           just blast through the entire array using a single
           striding factor */
        NPY_BEGIN_ALLOW_THREADS

        if (typenum == PyArray_DOUBLE) {
            if (l == 1) {
                *((double *)ret->data) = *((double *)ap2->data) * \
                                         *((double *)ap1->data);
            }
            else if (ap1shape != _matrix) {
                cblas_daxpy(l, *((double *)ap2->data), (double *)ap1->data,
                            ap1stride/sizeof(double), (double *)ret->data, 1);
            }
            else {
                int maxind, oind, i, a1s, rets;
                char *ptr, *rptr;
                double val;
                maxind = (ap1->dimensions[0] >= ap1->dimensions[1] ? 0 : 1);
                oind = 1-maxind;
                ptr = ap1->data;
                rptr = ret->data;
                l = ap1->dimensions[maxind];
                val = *((double *)ap2->data);
                a1s = ap1->strides[maxind] / sizeof(double);
                rets = ret->strides[maxind] / sizeof(double);
                for (i=0; i < ap1->dimensions[oind]; i++) {
                    cblas_daxpy(l, val, (double *)ptr, a1s,
                                (double *)rptr, rets);
                    ptr += ap1->strides[oind];
                    rptr += ret->strides[oind];
                }
            }
        }
        else if (typenum == PyArray_CDOUBLE) {
            if (l == 1) {
                cdouble *ptr1, *ptr2, *res;
                ptr1 = (cdouble *)ap2->data;
                ptr2 = (cdouble *)ap1->data;
                res = (cdouble *)ret->data;
                res->real = ptr1->real * ptr2->real - ptr1->imag * ptr2->imag;
                res->imag = ptr1->real * ptr2->imag + ptr1->imag * ptr2->real;
            }
            else if (ap1shape != _matrix) {
                cblas_zaxpy(l, (double *)ap2->data, (double *)ap1->data,
                            ap1stride/sizeof(cdouble), (double *)ret->data, 1);
            }
            else {
                int maxind, oind, i, a1s, rets;
                char *ptr, *rptr;
                double *pval;
                maxind = (ap1->dimensions[0] >= ap1->dimensions[1] ? 0 : 1);
                oind = 1-maxind;
                ptr = ap1->data;
                rptr = ret->data;
                l = ap1->dimensions[maxind];
                pval = (double *)ap2->data;
                a1s = ap1->strides[maxind] / sizeof(cdouble);
                rets = ret->strides[maxind] / sizeof(cdouble);
                for (i=0; i < ap1->dimensions[oind]; i++) {
                    cblas_zaxpy(l, pval, (double *)ptr, a1s,
                                (double *)rptr, rets);
                    ptr += ap1->strides[oind];
                    rptr += ret->strides[oind];
                }
            }
        }
        else if (typenum == PyArray_FLOAT) {
            if (l == 1) {
                *((float *)ret->data) = *((float *)ap2->data) * \
                                        *((float *)ap1->data);
            }
            else if (ap1shape != _matrix) {
                cblas_saxpy(l, *((float *)ap2->data), (float *)ap1->data,
                            ap1stride/sizeof(float), (float *)ret->data, 1);
            }
            else {
                int maxind, oind, i, a1s, rets;
                char *ptr, *rptr;
                float val;
                maxind = (ap1->dimensions[0] >= ap1->dimensions[1] ? 0 : 1);
                oind = 1-maxind;
                ptr = ap1->data;
                rptr = ret->data;
                l = ap1->dimensions[maxind];
                val = *((float *)ap2->data);
                a1s = ap1->strides[maxind] / sizeof(float);
                rets = ret->strides[maxind] / sizeof(float);
                for (i=0; i < ap1->dimensions[oind]; i++) {
                    cblas_saxpy(l, val, (float *)ptr, a1s,
                                (float *)rptr, rets);
                    ptr += ap1->strides[oind];
                    rptr += ret->strides[oind];
                }
            }
        }
        else if (typenum == PyArray_CFLOAT) {
            if (l == 1) {
                cfloat *ptr1, *ptr2, *res;
                ptr1 = (cfloat *)ap2->data;
                ptr2 = (cfloat *)ap1->data;
                res = (cfloat *)ret->data;
                res->real = ptr1->real * ptr2->real - ptr1->imag * ptr2->imag;
                res->imag = ptr1->real * ptr2->imag + ptr1->imag * ptr2->real;
            }
            else if (ap1shape != _matrix) {
                cblas_caxpy(l, (float *)ap2->data, (float *)ap1->data,
                            ap1stride/sizeof(cfloat), (float *)ret->data, 1);
            }
            else {
                int maxind, oind, i, a1s, rets;
                char *ptr, *rptr;
                float *pval;
                maxind = (ap1->dimensions[0] >= ap1->dimensions[1] ? 0 : 1);
                oind = 1-maxind;
                ptr = ap1->data;
                rptr = ret->data;
                l = ap1->dimensions[maxind];
                pval = (float *)ap2->data;
                a1s = ap1->strides[maxind] / sizeof(cfloat);
                rets = ret->strides[maxind] / sizeof(cfloat);
                for (i=0; i < ap1->dimensions[oind]; i++) {
                    cblas_caxpy(l, pval, (float *)ptr, a1s,
                                (float *)rptr, rets);
                    ptr += ap1->strides[oind];
                    rptr += ret->strides[oind];
                }
            }
        }
        NPY_END_ALLOW_THREADS
    }
コード例 #13
0
static int check_solution(PLASMA_enum transA, PLASMA_enum transB, int M, int N, int K,
                          PLASMA_Complex64_t alpha, PLASMA_Complex64_t *A, int LDA,
                          PLASMA_Complex64_t *B, int LDB,
                          PLASMA_Complex64_t beta, PLASMA_Complex64_t *Cref, PLASMA_Complex64_t *Cplasma, int LDC)
{
    int info_solution;
    double Anorm, Bnorm, Cinitnorm, Cplasmanorm, Clapacknorm, Rnorm, result;
    double eps;
    PLASMA_Complex64_t beta_const;

    double *work = (double *)malloc(max(K,max(M, N))* sizeof(double));
    int Am, An, Bm, Bn;

    beta_const  = -1.0;

    if (transA == PlasmaNoTrans) {
        Am = M; An = K;
    } else {
        Am = K; An = M;
    }
    if (transB == PlasmaNoTrans) {
        Bm = K; Bn = N;
    } else {
        Bm = N; Bn = K;
    }

    Anorm       = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), Am, An, A,       LDA, work);
    Bnorm       = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), Bm, Bn, B,       LDB, work);
    Cinitnorm   = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M,  N,  Cref,    LDC, work);
    Cplasmanorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M,  N,  Cplasma, LDC, work);

    cblas_zgemm(CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, M, N, K, 
                CBLAS_SADDR(alpha), A, LDA, B, LDB, CBLAS_SADDR(beta), Cref, LDC);

    Clapacknorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cref, LDC, work);

    cblas_zaxpy(LDC * N, CBLAS_SADDR(beta_const), Cplasma, 1, Cref, 1);

    Rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cref, LDC, work);

    eps = LAPACKE_dlamch_work('e');

    printf("Rnorm %e, Anorm %e, Bnorm %e, Cinitnorm %e, Cplasmanorm %e, Clapacknorm %e\n", 
           Rnorm, Anorm, Bnorm, Cinitnorm, Cplasmanorm, Clapacknorm);

    result = Rnorm / ((Anorm + Bnorm + Cinitnorm) * N * eps);
    printf("============\n");
    printf("Checking the norm of the difference against reference ZGEMM \n");
    printf("-- ||Cplasma - Clapack||_oo/((||A||_oo+||B||_oo+||C||_oo).N.eps) = %e \n", 
           result);

    if (  isnan(Rnorm) || isinf(Rnorm) || isnan(result) || isinf(result) || (result > 10.0) ) {
         printf("-- The solution is suspicious ! \n");
         info_solution = 1;
    }
    else {
         printf("-- The solution is CORRECT ! \n");
         info_solution= 0 ;
    }

    free(work);

    return info_solution;
}
コード例 #14
0
ファイル: core_zttrfb.c プロジェクト: joao-lima/plasma-kaapi
int CORE_zttrfb(int side, int trans, int direct, int storev,
                int M1, int N1, int M2, int N2, int K,
                PLASMA_Complex64_t *A1, int LDA1,
                PLASMA_Complex64_t *A2, int LDA2,
                PLASMA_Complex64_t *V, int LDV,
                PLASMA_Complex64_t *T, int LDT,
                PLASMA_Complex64_t *WORK, int LDWORK)
{
    static PLASMA_Complex64_t zone  =  1.0;
    static PLASMA_Complex64_t mzone = -1.0;

    int j, vi;

    /* Check input arguments */
    if (M1 < 0) {
        coreblas_error(5, "Illegal value of M1");
        return -5;
    }
    if (N1 < 0) {
        coreblas_error(6, "Illegal value of N1");
        return -6;
    }
    if ((M2 < 0) ||
        ( (side == PlasmaRight) && (M1 != M2) ) ) {
        coreblas_error(7, "Illegal value of M2");
        return -7;
    }
    if ((N2 < 0) ||
        ( (side == PlasmaLeft) && (N1 != N2) ) ) {
        coreblas_error(8, "Illegal value of N2");
        return -8;
    }
    if (K < 0) {
        coreblas_error(9, "Illegal value of K");
        return -9;
    }

    /* Quick return */
    if ((M1 == 0) || (N1 == 0) || (M2 == 0) || (N2 == 0) || (K == 0))
        return PLASMA_SUCCESS;

    if (storev == PlasmaColumnwise) {

        if (direct == PlasmaForward) {
            /*
             * Let  V =  ( V1 )    (first K rows)
             *           ( V2 )
             * where V2 is non-unit upper triangular
             */
            if (side == PlasmaLeft) {

                /*
                 * Colwise / Forward / Left
                 * -------------------------
                 *
                 * Form  H * A  or  H' * A  where  A = ( A1 )
                 *                                     ( A2 )
                 * where A2 = ( A2_1 )
                 *            ( A2_2 )
                 */

                /*
                 * W = A1 + V' * A2
                 */

                /*
                 * W = A2_2
                 */
                LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,
                    lapack_const(PlasmaUpperLower),
                    K, N2,
                    &A2[M2-K], LDA2, WORK, LDWORK);

                /*
                 * W = V2' * A2_2
                 */
                cblas_ztrmm(
                    CblasColMajor, CblasLeft, CblasUpper,
                    CblasConjTrans, CblasNonUnit, K, N2,
                    CBLAS_SADDR(zone), &V[M2-K], LDV,
                    WORK, LDWORK);

                if (M2 > K) {
                    /*
                     * W = W + V1' * A2_1
                     */
                    cblas_zgemm(
                        CblasColMajor, CblasConjTrans, CblasNoTrans,
                        K, N2, M2-K,
                        CBLAS_SADDR(zone), V, LDV,
                        A2, LDA2,
                        CBLAS_SADDR(zone), WORK, LDWORK);
                }

                /*
                 * W = A1 + W
                 */
                for(j = 0; j < N1; j++) {
                    cblas_zaxpy(K, CBLAS_SADDR(zone),
                            &A1[LDA1*j], 1,
                            &WORK[LDWORK*j], 1);
                }

                /*
                 * A2 = A2 - V * T * W -> W = T * W, A2 = A2 - V * W
                 */

                /*
                 * W = T * W
                 */
                cblas_ztrmm(
                    CblasColMajor, CblasLeft, CblasUpper,
                    (CBLAS_TRANSPOSE)trans, CblasNonUnit, K, N2,
                    CBLAS_SADDR(zone), T, LDT, WORK, LDWORK);
                /*
                 * A1 = A1 - W
                 */
                for(j = 0; j < N1; j++) {
                    cblas_zaxpy(K, CBLAS_SADDR(mzone),
                            &WORK[LDWORK*j], 1,
                            &A1[LDA1*j], 1);
                }

                /*
                 * A2_1 = A2_1 - V1 * W
                 */
                if (M2 > K) {
                    cblas_zgemm(
                        CblasColMajor, CblasNoTrans, CblasNoTrans,
                        M2-K, N2, K,
                        CBLAS_SADDR(mzone), V, LDV,
                        WORK, LDWORK,
                        CBLAS_SADDR(zone), A2, LDA2);
                }

                /*
                 * W = - V2 * W
                 */
                cblas_ztrmm(
                    CblasColMajor, CblasLeft, CblasUpper,
                    CblasNoTrans, CblasNonUnit, K, N2,
                    CBLAS_SADDR(mzone), &V[M2-K], LDV,
                    WORK, LDWORK);

                /*
                 * A2_2 = A2_2 + W
                 */
                for(j = 0; j < N2; j++) {
                    cblas_zaxpy(
                        K, CBLAS_SADDR(zone),
                        &WORK[LDWORK*j], 1,
                        &A2[LDA2*j+(M2-K)], 1);
                }
            }
            else {
                /* 
                 * Colwise / Forward / Right
                 * -------------------------
                 *
                 * Form  H * A  or  H' * A  where:
                 * 
                 *   A  = ( A1 A2 )
                 *
                 *   A2 = ( A2_1 : A2_2 )
                 *
                 *         A2_1 is M2 x (M2-K)
                 *         A2_2 is M2 x K
                 *
                 *   V = ( V_1 ) 
                 *       ( V_2 )
                 *
                 *         V_1 is full and (N2-K) x K
                 *         V_2 is upper triangular and K x K
                 */

                /*
                 * W = ( A1 + A2_1*V_1 + A2_2*V_2 ) * op(T)
                 *
                 *    W  is M x K
                 *    A1 is M x K
                 *    A2 is M x N2 split as (A2_1 A2_2) such as
                 *       A2_1 is (N2-K) x K
                 *       A2_2 is M x K
                 */

                /* W = A2_2 */
                LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,
                    lapack_const(PlasmaUpperLower),
                    M2, K,
                    &A2[LDA2*(N2-K)], LDA2, WORK, LDWORK);

                /* W = W * V_2 --> W = A2_2 * V_2 */
                cblas_ztrmm(
                    CblasColMajor, CblasRight, CblasUpper,
                    CblasNoTrans, CblasNonUnit, M2, K,
                    CBLAS_SADDR(zone), &V[N2-K], LDV,
                    WORK, LDWORK);

                /* W = W + A2_1 * V_1 */
                if (N2 > K) {
                    cblas_zgemm(
                        CblasColMajor, CblasNoTrans, CblasNoTrans,
                        M2, K, N2-K,
                        CBLAS_SADDR(zone), A2, LDA2,
                        V, LDV,
                        CBLAS_SADDR(zone), WORK, LDWORK);
                }

                /* W = A1 + W */
                for (j = 0; j < K; j++) {
                    cblas_zaxpy(M1, CBLAS_SADDR(zone), &A1[LDA1*j], 1,
                                &WORK[LDWORK*j], 1);
                }

                /* W = W * T --> ( A1 + A2_1*V_1 + A2_2*V_2 ) * op(T) */
                cblas_ztrmm(
                    CblasColMajor, CblasRight, CblasUpper,
                    (CBLAS_TRANSPOSE)trans, CblasNonUnit, M2, K,
                    CBLAS_SADDR(zone), T, LDT, WORK, LDWORK);

                /*
                 * A1 = A1 - W
                 */

                for(j = 0; j < K; j++) {
                    cblas_zaxpy(M1, CBLAS_SADDR(mzone), &WORK[LDWORK*j], 1,
                                &A1[LDA1*j], 1);
                }

                /*
                 * A2 = A2 - W * V' --> A2 - W*V_1' - W*V_2'
                 */

                /* A2 = A2 - W * V_1' */
                if (N2 > K) {
                    cblas_zgemm(
                        CblasColMajor, CblasNoTrans, CblasConjTrans,
                        M2, N2-K, K,
                        CBLAS_SADDR(mzone), WORK, LDWORK,
                        V, LDV,
                        CBLAS_SADDR(zone), A2, LDA2);
                }

                /* A2 =  A2 -  W * V_2' */
                cblas_ztrmm(
                    CblasColMajor, CblasRight, CblasUpper,
                    CblasConjTrans, CblasNonUnit, M2, K,
                    CBLAS_SADDR(mzone), &V[N2-K], LDV,
                    WORK, LDWORK);

                for(j = 0; j < K; j++) {
                    cblas_zaxpy(
                        M2, CBLAS_SADDR(zone),
                        &WORK[LDWORK*j], 1,
                        &A2[LDA2*(j+N2-K)], 1);
                }
            }
        }
        else {
            coreblas_error(3, "Not implemented (ColWise / Backward / Left or Right)");
            return PLASMA_ERR_NOT_SUPPORTED;
        }
    }
    else {
        /*
         * Rowwise
         */
        if (direct == PlasmaForward) {
            /*
             * Let  V =  ( V1 V2 )    (V1: first K cols)
             * 
             * where V2 is non-unit lower triangular
             */

            if (side == PlasmaLeft) {

                /*
                 * Rowwise / Forward / Left
                 * -------------------------
                 *
                 * Form  H * A  or  H' * A  where  A = ( A1 )
                 *                                     ( A2 )
                 * where A2 = ( A2_1 )
                 *            ( A2_2 )
                 */

                /* V_2 first element */
                vi = LDV*(M2-K);

                /*
                 * W = A1 + V * A2
                 */

                /*
                 * W = A2_2
                 */
                LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,
                    lapack_const(PlasmaUpperLower),
                    K, N2,
                    &A2[M2-K], LDA2, WORK, LDWORK);

                /*
                 * W = V2 * A2_2
                 */
                //**DB CblasColMajor, CblasLeft, CblasUpper,
                cblas_ztrmm(
                    CblasColMajor, CblasLeft, CblasLower,
                    CblasNoTrans, CblasNonUnit, K, N2,
                    CBLAS_SADDR(zone), &V[vi], LDV,
                    WORK, LDWORK);

                if (M2 > K) {
                    /*
                     * W = W + V1 * A2_1
                     */
                    cblas_zgemm(
                        CblasColMajor, CblasNoTrans, CblasNoTrans,
                        K, N2, M2-K,
                        CBLAS_SADDR(zone), V, LDV,
                        A2, LDA2,
                        CBLAS_SADDR(zone), WORK, LDWORK);
                }

                /*
                 * W = A1 + W
                 */
                for(j = 0; j < N1; j++) {
                    cblas_zaxpy(
                            K, CBLAS_SADDR(zone),
                            &A1[LDA1*j], 1,
                            &WORK[LDWORK*j], 1);
                }

                /*
                 * W = T * W
                 */
                cblas_ztrmm(
                    CblasColMajor, CblasLeft, CblasUpper,
                    (CBLAS_TRANSPOSE)trans, CblasNonUnit, K, N2,
                    CBLAS_SADDR(zone), T, LDT, WORK, LDWORK);

                /*
                 * A1 = A1 - W
                 */
                for(j = 0; j < N1; j++) {
                    cblas_zaxpy(
                            K, CBLAS_SADDR(mzone),
                            &WORK[LDWORK*j], 1,
                            &A1[LDA1*j], 1);
                }

                /*
                 * A2 = A2 - V' * T * W -> A2 = A2 - V' * W
                 */

                /*
                 * A2_1 = A2_1 - V1' * W
                 */
                if (M2 > K) {
                    cblas_zgemm(
                        CblasColMajor, CblasConjTrans, CblasNoTrans,
                        M2-K, N2, K,
                        CBLAS_SADDR(mzone), V, LDV,
                        WORK, LDWORK,
                        CBLAS_SADDR(zone), A2, LDA2);
                }

                /*
                 * W = - V2' * W
                 */
                cblas_ztrmm(
                    CblasColMajor, CblasLeft, CblasLower,
                    CblasConjTrans, CblasNonUnit, K, N2,
                    CBLAS_SADDR(mzone), &V[vi], LDV,
                    WORK, LDWORK);

                /*
                 * A2_2 = A2_2 + W
                 */
                for(j = 0; j < N2; j++) {
                    cblas_zaxpy(
                        K, CBLAS_SADDR(zone),
                        &WORK[LDWORK*j], 1,
                        &A2[LDA2*j+(M2-K)], 1);
                }
            }
            else {
                /* 
                 * Rowwise / Forward / Right
                 * -------------------------
                 *
                 * Form  H * A  or  H' * A  where:
                 * 
                 *   A  = ( A1 A2 )
                 *
                 *   A2 = ( A2_1 : A2_2 )
                 *
                 *         A2_1 is M2 x (M2-K)
                 *         A2_2 is M2 x K
                 *
                 *   V = ( V_1 ) 
                 *       ( V_2 )
                 *
                 *         V_1 is full and (N2-K) x K
                 *         V_2 is lower triangular and K x K
                 */

                /*
                 * W = ( A1 + A2_1*V_1 + A2_2*V_2 ) * op(T)
                 *
                 *    W  is M x K
                 *    A1 is M x K
                 *    A2 is M x N2 split as (A2_1 A2_2) such as
                 *       A2_1 is (N2-K) x K
                 *       A2_2 is M x K
                 */

                /* V_2 and A2_2 first element */
                vi = LDV*(N2-K);

                /* W = A2_2 */
                LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,
                    lapack_const(PlasmaUpperLower),
                    M2, K,
                    &A2[LDA2*(N2-K)], LDA2, WORK, LDWORK);

                /* W = W * V_2' --> W = A2_2 * V_2' */
                cblas_ztrmm(
                    CblasColMajor, CblasRight, CblasLower,
                    CblasConjTrans, CblasNonUnit, M2, K,
                    CBLAS_SADDR(zone), &V[vi], LDV,
                    WORK, LDWORK);

                /* W = W + A2_1 * V_1' */
                if (N2 > K) {
                    cblas_zgemm(
                        CblasColMajor, CblasNoTrans, CblasConjTrans,
                        M2, K, N2-K,
                        CBLAS_SADDR(zone), A2, LDA2,
                        V, LDV,
                        CBLAS_SADDR(zone), WORK, LDWORK);
                }

                /* W = A1 + W */
                for (j = 0; j < K; j++) {
                    cblas_zaxpy(M1, CBLAS_SADDR(zone), &A1[LDA1*j], 1,
                                &WORK[LDWORK*j], 1);
                }

                /* W = W * op(T) --> ( A1 + A2_1*V_1 + A2_2*V_2 ) * op(T) */
                cblas_ztrmm(
                    CblasColMajor, CblasRight, CblasUpper,
                    (CBLAS_TRANSPOSE)trans, CblasNonUnit, M2, K,
                    CBLAS_SADDR(zone), T, LDT, WORK, LDWORK);

                /*
                 * A1 = A1 - W
                 */

                for(j = 0; j < K; j++) {
                    cblas_zaxpy(M1, CBLAS_SADDR(mzone), &WORK[LDWORK*j], 1,
                                &A1[LDA1*j], 1);
                }

                /*
                 * A2 = A2 - W * V --> A2 - W*V_1 - W*V_2
                 */

                /* A2 = A2 - W * V_1 */
                if (N2 > K) {
                    cblas_zgemm(
                        CblasColMajor, CblasNoTrans, CblasNoTrans,
                        M2, N2-K, K,
                        CBLAS_SADDR(mzone), WORK, LDWORK,
                        V, LDV,
                        CBLAS_SADDR(zone), A2, LDA2);
                }

                /* A2 =  A2 -  W * V_2 */
                cblas_ztrmm(
                    CblasColMajor, CblasRight, CblasLower,
                    CblasNoTrans, CblasNonUnit, M2, K,
                    CBLAS_SADDR(mzone), &V[vi], LDV,
                    WORK, LDWORK);

                for(j = 0; j < K; j++) {
                    cblas_zaxpy(
                        M2, CBLAS_SADDR(zone),
                        &WORK[LDWORK*j], 1,
                        &A2[LDA2*(N2-K+j)], 1);
                }

            }
 
        }
        else {
            coreblas_error(3, "Not implemented (Rowwise / Backward / Left or Right)");
            return PLASMA_ERR_NOT_SUPPORTED;
        }
    }
    return PLASMA_SUCCESS;
}
コード例 #15
0
ファイル: blas.c プロジェクト: grovesNL/mathnet-numerics
 DLLEXPORT void z_axpy(const blasint n, const openblas_complex_double alpha, const openblas_complex_double x[], openblas_complex_double y[]) {
     cblas_zaxpy(n, (double*)&alpha, (double*)x, 1, (double*)y, 1);
 }
コード例 #16
0
ファイル: c_zblas1.c プロジェクト: 4ker/OpenBLAS
void F77_zaxpy(const int *N, const void *alpha, void *X,
                    const int *incX, void *Y, const int *incY)
{
   cblas_zaxpy(*N, alpha, X, *incX, Y, *incY);
   return;
}
コード例 #17
0
void eblas_zaxpy_sub(size_t iStart, size_t iStop, const complex* a, const complex* x, int incx, complex* y, int incy)
{	cblas_zaxpy(iStop-iStart, a, x+incx*iStart, incx, y+incy*iStart, incy);
}
コード例 #18
0
ファイル: core_ztsqrt.c プロジェクト: joao-lima/plasma-kaapi
int CORE_ztsqrt(int M, int N, int IB,
                PLASMA_Complex64_t *A1, int LDA1,
                PLASMA_Complex64_t *A2, int LDA2,
                PLASMA_Complex64_t *T, int LDT,
                PLASMA_Complex64_t *TAU, PLASMA_Complex64_t *WORK)
{
    static PLASMA_Complex64_t zone  = 1.0;
    static PLASMA_Complex64_t zzero = 0.0;

    PLASMA_Complex64_t alpha;
    int i, ii, sb;

    /* Check input arguments */
    if (M < 0) {
        coreblas_error(1, "Illegal value of M");
        return -1;
    }
    if (N < 0) {
        coreblas_error(2, "Illegal value of N");
        return -2;
    }
    if (IB < 0) {
        coreblas_error(3, "Illegal value of IB");
        return -3;
    }
    if ((LDA2 < max(1,M)) && (M > 0)) {
        coreblas_error(8, "Illegal value of LDA2");
        return -8;
    }

    /* Quick return */
    if ((M == 0) || (N == 0) || (IB == 0))
        return PLASMA_SUCCESS;

    for(ii = 0; ii < N; ii += IB) {
        sb = min(N-ii, IB);
        for(i = 0; i < sb; i++) {
            /*
             * Generate elementary reflector H( II*IB+I ) to annihilate
             * A( II*IB+I:M, II*IB+I )
             */
            LAPACKE_zlarfg_work(M+1, &A1[LDA1*(ii+i)+ii+i], &A2[LDA2*(ii+i)], 1, &TAU[ii+i]);

            if (ii+i+1 < N) {
                /*
                 * Apply H( II*IB+I ) to A( II*IB+I:M, II*IB+I+1:II*IB+IB ) from the left
                 */
                alpha = -conj(TAU[ii+i]);
                cblas_zcopy(
                    sb-i-1,
                    &A1[LDA1*(ii+i+1)+(ii+i)], LDA1,
                    WORK, 1);
#ifdef COMPLEX
                LAPACKE_zlacgv_work(sb-i-1, WORK, 1);
#endif
                cblas_zgemv(
                    CblasColMajor, (CBLAS_TRANSPOSE)PlasmaConjTrans,
                    M, sb-i-1,
                    CBLAS_SADDR(zone), &A2[LDA2*(ii+i+1)], LDA2,
                    &A2[LDA2*(ii+i)], 1,
                    CBLAS_SADDR(zone), WORK, 1);
#ifdef COMPLEX
                LAPACKE_zlacgv_work(sb-i-1, WORK, 1 );
#endif
                cblas_zaxpy(
                    sb-i-1, CBLAS_SADDR(alpha),
                    WORK, 1,
                    &A1[LDA1*(ii+i+1)+ii+i], LDA1);
#ifdef COMPLEX
                LAPACKE_zlacgv_work(sb-i-1, WORK, 1 );
#endif
                cblas_zgerc(
                    CblasColMajor, M, sb-i-1, CBLAS_SADDR(alpha),
                    &A2[LDA2*(ii+i)], 1,
                    WORK, 1,
                    &A2[LDA2*(ii+i+1)], LDA2);
            }
            /*
             * Calculate T
             */
            alpha = -TAU[ii+i];
            cblas_zgemv(
                CblasColMajor, (CBLAS_TRANSPOSE)PlasmaConjTrans, M, i,
                CBLAS_SADDR(alpha), &A2[LDA2*ii], LDA2,
                &A2[LDA2*(ii+i)], 1,
                CBLAS_SADDR(zzero), &T[LDT*(ii+i)], 1);

            cblas_ztrmv(
                CblasColMajor, (CBLAS_UPLO)PlasmaUpper,
                (CBLAS_TRANSPOSE)PlasmaNoTrans, (CBLAS_DIAG)PlasmaNonUnit, i,
                &T[LDT*ii], LDT,
                &T[LDT*(ii+i)], 1);

            T[LDT*(ii+i)+i] = TAU[ii+i];
        }
        if (N > ii+sb) {
            CORE_ztsmqr(
                PlasmaLeft, PlasmaConjTrans,
                sb, N-(ii+sb), M, N-(ii+sb), IB, IB,
                &A1[LDA1*(ii+sb)+ii], LDA1,
                &A2[LDA2*(ii+sb)], LDA2,
                &A2[LDA2*ii], LDA2,
                &T[LDT*ii], LDT,
                WORK, sb);
        }
    }
    return PLASMA_SUCCESS;
}
コード例 #19
0
ファイル: core_zttlqt.c プロジェクト: gpichon/eigenproblems
int CORE_zttlqt(int M, int N, int IB,
                PLASMA_Complex64_t *A1, int LDA1,
                PLASMA_Complex64_t *A2, int LDA2,
                PLASMA_Complex64_t *T, int LDT,
                PLASMA_Complex64_t *TAU, PLASMA_Complex64_t *WORK)
{
    static PLASMA_Complex64_t zone  = 1.0;
    static PLASMA_Complex64_t zzero = 0.0;
#ifdef COMPLEX
    static int                ione  = 1;
#endif

    PLASMA_Complex64_t alpha;
    int i, j, l, ii, sb, mi, ni;

    /* Check input arguments */
    if (M < 0) {
        coreblas_error(1, "Illegal value of M");
        return -1;
    }
    if (N < 0) {
        coreblas_error(2, "Illegal value of N");
        return -2;
    }
    if (IB < 0) {
        coreblas_error(3, "Illegal value of IB");
        return -3;
    }
    if ((LDA2 < max(1,M)) && (M > 0)) {
        coreblas_error(7, "Illegal value of LDA2");
        return -7;
    }

    /* Quick return */
    if ((M == 0) || (N == 0) || (IB == 0))
        return PLASMA_SUCCESS;

    /* TODO: Need to check why some cases require
     *  this to not have uninitialized values */
    CORE_zlaset( PlasmaUpperLower, IB, N,
                 0., 0., T, LDT);

    for(ii = 0; ii < M; ii += IB) {
        sb = min(M-ii, IB);
        for(i = 0; i < sb; i++) {
            j  = ii + i;
            mi = sb-i-1;
            ni = min( j + 1, N);
            /*
             * Generate elementary reflector H( II*IB+I ) to annihilate A( II*IB+I, II*IB+I:M ).
             */
#ifdef COMPLEX
            LAPACKE_zlacgv_work(ni, &A2[j], LDA2);
            LAPACKE_zlacgv_work(ione, &A1[LDA1*j+j], LDA1);
#endif
            LAPACKE_zlarfg_work(ni+1, &A1[LDA1*j+j], &A2[j], LDA2, &TAU[j]);

            if (mi > 0) {
                /*
                 * Apply H( j-1 ) to A( j:II+IB-1, j-1:M  ) from the right.
                 */
                cblas_zcopy(
                    mi,
                    &A1[LDA1*j+(j+1)], 1,
                    WORK, 1);

                cblas_zgemv(
                    CblasColMajor, (CBLAS_TRANSPOSE)PlasmaNoTrans,
                    mi, ni,
                    CBLAS_SADDR(zone), &A2[j+1], LDA2,
                    &A2[j], LDA2,
                    CBLAS_SADDR(zone), WORK, 1);

                alpha = -(TAU[j]);
                cblas_zaxpy(
                    mi, CBLAS_SADDR(alpha),
                    WORK, 1,
                    &A1[LDA1*j+j+1], 1);

                cblas_zgerc(
                    CblasColMajor, mi, ni,
                    CBLAS_SADDR(alpha), WORK, 1,
                    &A2[j], LDA2,
                    &A2[j+1], LDA2);
            }

            /*
             * Calculate T.
             */

            if (i > 0 ) {

                l = min(i, max(0, N-ii));
                alpha = -(TAU[j]);

                CORE_zpemv(
                        PlasmaNoTrans, PlasmaRowwise,
                        i , min(j, N), l,
                        alpha, &A2[ii], LDA2,
                        &A2[j], LDA2,
                        zzero, &T[LDT*j], 1,
                        WORK);

                /* T(0:i-1, j) = T(0:i-1, ii:j-1) * T(0:i-1, j) */
                cblas_ztrmv(
                        CblasColMajor, (CBLAS_UPLO)PlasmaUpper,
                        (CBLAS_TRANSPOSE)PlasmaNoTrans,
                        (CBLAS_DIAG)PlasmaNonUnit,
                        i, &T[LDT*ii], LDT,
                        &T[LDT*j], 1);

            }

#ifdef COMPLEX
            LAPACKE_zlacgv_work(ni, &A2[j], LDA2 );
            LAPACKE_zlacgv_work(ione, &A1[LDA1*j+j], LDA1 );
#endif

            T[LDT*j+i] = TAU[j];
        }

        /* Apply Q to the rest of the matrix to the right */
        if (M > ii+sb) {
            mi = M-(ii+sb);
            ni = min(ii+sb, N);
            l  = min(sb, max(0, ni-ii));
            CORE_zparfb(
                PlasmaRight, PlasmaNoTrans,
                PlasmaForward, PlasmaRowwise,
                mi, IB, mi, ni, sb, l,
                &A1[LDA1*ii+ii+sb], LDA1,
                &A2[ii+sb], LDA2,
                &A2[ii], LDA2,
                &T[LDT*ii], LDT,
                WORK, M);

        }
    }
    return PLASMA_SUCCESS;
}
コード例 #20
0
ファイル: cblasmath.cpp プロジェクト: timbz/Hipnos
void CBlasMath::add(Matrix *a, Matrix *b)
{
    std::complex<double> alpha(1,0);
    cblas_zaxpy(a->getRows() * a->getCols(), &alpha, b->data(), 1, a->data(), 1);
}