void eblas_zaxpy(int N, const complex& a, const complex* x, int incx, complex* y, int incy) { #ifdef MKL_PROVIDES_BLAS cblas_zaxpy(N, &a, x, incx, y, incy); #else threadLaunch((N<100000) ? 1 : 0, eblas_zaxpy_sub, N, &a, x, incx, y, incy); #endif }
static int check_solution(PLASMA_enum uplo, PLASMA_enum trans, int N, int K, PLASMA_Complex64_t alpha, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *B, int LDB, double beta, PLASMA_Complex64_t *Cref, PLASMA_Complex64_t *Cplasma, int LDC) { int info_solution; double Anorm, Bnorm, Cinitnorm, Cplasmanorm, Clapacknorm, Rnorm, result; double eps; PLASMA_Complex64_t beta_const; double *work = (double *)malloc(max(N, K)* sizeof(double)); beta_const = -1.0; Anorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), (trans == PlasmaNoTrans) ? N : K, (trans == PlasmaNoTrans) ? K : N, A, LDA, work); Bnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), (trans == PlasmaNoTrans) ? N : K, (trans == PlasmaNoTrans) ? K : N, B, LDB, work); Cinitnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cref, LDC, work); Cplasmanorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cplasma, LDC, work); cblas_zher2k(CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, N, K, CBLAS_SADDR(alpha), A, LDA, B, LDB, (beta), Cref, LDC); Clapacknorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cref, LDC, work); cblas_zaxpy(LDC*N, CBLAS_SADDR(beta_const), Cplasma, 1, Cref, 1); Rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cref, LDC, work); eps = LAPACKE_dlamch_work('e'); printf("Rnorm %e, Anorm %e, Cinitnorm %e, Cplasmanorm %e, Clapacknorm %e\n", Rnorm, Anorm, Cinitnorm, Cplasmanorm, Clapacknorm); result = Rnorm / ((Anorm + Bnorm + Cinitnorm) * N * eps); printf("============\n"); printf("Checking the norm of the difference against reference ZHER2K \n"); printf("-- ||Cplasma - Clapack||_oo/((||A||_oo+||C||_oo).N.eps) = %e \n", result); if ( isnan(Rnorm) || isinf(Rnorm) || isnan(result) || isinf(result) || (result > 10.0) ) { printf("-- The solution is suspicious ! \n"); info_solution = 1; } else { printf("-- The solution is CORRECT ! \n"); info_solution= 0 ; } free(work); return info_solution; }
void HostVector<std::complex<double> >::AddScale(const BaseVector<std::complex<double> > &x, const std::complex<double> alpha) { assert(&x != NULL); const HostVector<std::complex<double> > *cast_x = dynamic_cast<const HostVector<std::complex<double> >*> (&x); assert(cast_x != NULL); assert(this->size_ == cast_x->size_); cblas_zaxpy(this->size_, &alpha, cast_x->vec_, 1, this->vec_, 1); }
//TODO: the parameter alpha is not used. VrArrayPtrCF64 BlasComplexDouble::vec_add(int ndims, VrArrayPtrCF64 X,VrArrayPtrCF64 Y, const double complex alpha, const int incX,const int incY ){ int N=1; for(int i=0;i<ndims;i++){ N*=VR_GET_DIMS_CF64(X)[i]; } double alph[]={1,0}; VrArrayPtrCF64 Y1=vec_copy(ndims,Y); cblas_zaxpy(N,alph,reinterpret_cast<double*>(VR_GET_DATA_CF64(X)),1,reinterpret_cast<double*>(VR_GET_DATA_CF64(Y1)),1); return Y1; }
void bl1_zaxpy( int n, dcomplex* alpha, dcomplex* x, int incx, dcomplex* y, int incy ) { #ifdef BLIS1_ENABLE_CBLAS_INTERFACES cblas_zaxpy( n, alpha, x, incx, y, incy ); #else F77_zaxpy( &n, alpha, x, &incx, y, &incy ); #endif }
static int check_solution(PLASMA_enum side, PLASMA_enum uplo, int M, int N, PLASMA_Complex64_t alpha, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *B, int LDB, PLASMA_Complex64_t beta, PLASMA_Complex64_t *Cref, PLASMA_Complex64_t *Cplasma, int LDC) { int info_solution, NrowA; double Anorm, Bnorm, Cinitnorm, Cplasmanorm, Clapacknorm, Rnorm; double eps; PLASMA_Complex64_t beta_const; double result; double *work = (double *)malloc(max(M, N)* sizeof(double)); beta_const = (PLASMA_Complex64_t)-1.0; NrowA = (side == PlasmaLeft) ? M : N; Anorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), NrowA, NrowA, A, LDA, work); Bnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, B, LDB, work); Cinitnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cref, LDC, work); Cplasmanorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cplasma, LDC, work); cblas_zsymm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, M, N, CBLAS_SADDR(alpha), A, LDA, B, LDB, CBLAS_SADDR(beta), Cref, LDC); Clapacknorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cref, LDC, work); cblas_zaxpy(LDC * N, CBLAS_SADDR(beta_const), Cplasma, 1, Cref, 1); Rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cref, LDC, work); eps = LAPACKE_dlamch_work('e'); printf("Rnorm %e, Anorm %e, Bnorm %e, Cinitnorm %e, Cplasmanorm %e, Clapacknorm %e\n",Rnorm,Anorm,Bnorm,Cinitnorm,Cplasmanorm,Clapacknorm); result = Rnorm / ((Anorm + Bnorm + Cinitnorm) * N * eps); printf("============\n"); printf("Checking the norm of the difference against reference ZSYMM \n"); printf("-- ||Cplasma - Clapack||_oo/((||A||_oo+||B||_oo+||C||_oo).N.eps) = %e \n", result ); if ( isinf(Clapacknorm) || isinf(Cplasmanorm) || isnan(result) || isinf(result) || (result > 10.0) ) { printf("-- The solution is suspicious ! \n"); info_solution = 1; } else { printf("-- The solution is CORRECT ! \n"); info_solution= 0 ; } free(work); return info_solution; }
void phi_axpy(const int N, const Complex *alpha, const Complex *X, const int incX, Complex *Y, const int incY){ #ifndef NOBLAS #ifdef SINGLEPRECISION cblas_caxpy(N,alpha,X,1,Y,1); #else cblas_zaxpy(N,alpha,X,1,Y,1); #endif #else int i; for(i = 0; i < N; ++i){ Y[i] = (*alpha)*X[i]+Y[i]; } #endif }
VrArrayPtrCF64 BlasComplexDouble::vec_sub(int ndims, VrArrayPtrCF64 X, VrArrayPtrCF64 Y ,const double complex alpha, const int incX, const int incY){ VrArrayPtrCF64 X1=X; /*if(alpha[0]!=1){ VrArrayPtrCF64 X1=vrAllocArrayF64CM(ndims,0,(int*)VR_GET_DIMS_CF64(X)); X1=BlasComplexDouble::scal_mult(ndims,X,(double*)alpha); }*/ const double arr[]={-1,0}; //double complex arr = -1 ; int N=1; for(int i=0;i<ndims;i++){ N*=VR_GET_DIMS_CF64(X)[i]; } //std::cout<<"N="<<N<<std::endl; //double alph[]={1,0}; VrArrayPtrCF64 Y1=vec_copy(ndims,X); cblas_zaxpy(N,(arr),reinterpret_cast<double*>(VR_GET_DATA_CF64(Y)),1,reinterpret_cast<double*>(VR_GET_DATA_CF64(Y1)),1); //double complex alph = 1+0*I; //VrArrayPtrCF64 Y1= scal_mult(Y->ndims,Y,arr);// vec_copy(ndims,X); // cblas_zaxpy(N,&alph,VR_GET_DATA_CF64(X),1,VR_GET_DATA_CF64(Y1),1); return Y1;//BlasComplexDouble::vec_add(ndims, Y,X1, arr); }
int CORE_zttqrt(int M, int N, int IB, PLASMA_Complex64_t *A1, int LDA1, PLASMA_Complex64_t *A2, int LDA2, PLASMA_Complex64_t *T, int LDT, PLASMA_Complex64_t *TAU, PLASMA_Complex64_t *WORK) { static PLASMA_Complex64_t zone = 1.0; static PLASMA_Complex64_t zzero = 0.0; static int ione = 1; PLASMA_Complex64_t alpha; int i, j, ii, sb, mi, ni; /* Check input arguments */ if (M < 0) { coreblas_error(1, "Illegal value of M"); return -1; } if (N < 0) { coreblas_error(2, "Illegal value of N"); return -2; } if (IB < 0) { coreblas_error(3, "Illegal value of IB"); return -3; } if ((LDA2 < max(1,M)) && (M > 0)) { coreblas_error(7, "Illegal value of LDA2"); return -7; } /* Quick return */ if ((M == 0) || (N == 0) || (IB == 0)) return PLASMA_SUCCESS; for(ii = 0; ii < N; ii += IB) { sb = min(N-ii, IB); for(i = 0; i < sb; i++) { /* * Generate elementary reflector H( II*IB+I ) to annihilate * A( II*IB+I:mi, II*IB+I ). */ mi = ii + i + 1; LAPACKE_zlarfg_work(mi+1, &A1[LDA1*(ii+i)+ii+i], &A2[LDA2*(ii+i)], ione, &TAU[ii+i]); if (sb-i-1>0) { /* * Apply H( II*IB+I ) to A( II*IB+I:M, II*IB+I+1:II*IB+IB ) from the left. */ ni = sb-i-1; cblas_zcopy( ni, &A1[LDA1*(ii+i+1)+(ii+i)], LDA1, WORK, 1); #ifdef COMPLEX LAPACKE_zlacgv_work(ni, WORK, ione); #endif cblas_zgemv( CblasColMajor, (CBLAS_TRANSPOSE)PlasmaConjTrans, mi, ni, CBLAS_SADDR(zone), &A2[LDA2*(ii+i+1)], LDA2, &A2[LDA2*(ii+i)], 1, CBLAS_SADDR(zone), WORK, 1); #ifdef COMPLEX LAPACKE_zlacgv_work(ni, WORK, ione); #endif alpha = -conj(TAU[ii+i]); cblas_zaxpy( ni, CBLAS_SADDR(alpha), WORK, 1, &A1[LDA1*(ii+i+1)+ii+i], LDA1); #ifdef COMPLEX LAPACKE_zlacgv_work(ni, WORK, ione); #endif cblas_zgerc( CblasColMajor, mi, ni, CBLAS_SADDR(alpha), &A2[LDA2*(ii+i)], 1, WORK, 1, &A2[LDA2*(ii+i+1)], LDA2); } /* * Calculate T. */ if (i > 0 ) { cblas_zcopy(i, &A2[LDA2*(ii+i)+ii], 1, &WORK[ii], 1); cblas_ztrmv( CblasColMajor, (CBLAS_UPLO)PlasmaUpper, (CBLAS_TRANSPOSE)PlasmaConjTrans, (CBLAS_DIAG)PlasmaNonUnit, i, &A2[LDA2*ii+ii], LDA2, &WORK[ii], 1); alpha = -(TAU[ii+i]); for(j = 0; j < i; j++) { WORK[ii+j] = alpha * WORK[ii+j]; } if (ii > 0) { cblas_zgemv( CblasColMajor, (CBLAS_TRANSPOSE)PlasmaConjTrans, ii, i, CBLAS_SADDR(alpha), &A2[LDA2*ii], LDA2, &A2[LDA2*(ii+i)], 1, CBLAS_SADDR(zzero), WORK, 1); cblas_zaxpy(i, CBLAS_SADDR(zone), &WORK[ii], 1, WORK, 1); } cblas_zcopy(i, WORK, 1, &T[LDT*(ii+i)], 1); cblas_ztrmv( CblasColMajor, (CBLAS_UPLO)PlasmaUpper, (CBLAS_TRANSPOSE)PlasmaNoTrans, (CBLAS_DIAG)PlasmaNonUnit, i, &T[LDT*ii], LDT, &T[LDT*(ii+i)], 1); } T[LDT*(ii+i)+i] = TAU[ii+i]; } /* Apply Q' to the rest of the matrix to the left */ if (N > ii+sb) { CORE_zttrfb( PlasmaLeft, PlasmaConjTrans, PlasmaForward, PlasmaColumnwise, sb, N-(ii+sb), ii+sb, N-(ii+sb), sb, &A1[LDA1*(ii+sb)+ii], LDA1, &A2[LDA2*(ii+sb)], LDA2, &A2[LDA2*ii], LDA2, &T[LDT*ii], LDT, WORK, sb); } } return PLASMA_SUCCESS; }
// // Overloaded function for dispatching to // * CBLAS backend, and // * complex<double> value-type. // inline void axpy( const int n, const std::complex<double> a, const std::complex<double>* x, const int incx, std::complex<double>* y, const int incy ) { cblas_zaxpy( n, &a, x, incx, y, incy ); }
int main(int argc, char *argv[]) { int i, j, Nx, Ny, Nr, Nb; int seedn=54; double sigma; double a; double mse; double snr; double snr_out; double gamma=0.001; double aux1, aux2, aux3, aux4; complex double alpha; purify_image img, img_copy; purify_visibility_filetype filetype_vis; purify_image_filetype filetype_img; complex double *xinc; complex double *y0; complex double *y; complex double *noise; double *xout; double *w; double *error; complex double *xoutc; double *wdx; double *wdy; double *dummyr; complex double *dummyc; //parameters for the continuos Fourier Transform double *deconv; purify_sparsemat_row gmat; purify_visibility vis_test; purify_measurement_cparam param_m1; purify_measurement_cparam param_m2; complex double *fft_temp1; complex double *fft_temp2; void *datafwd[5]; void *dataadj[5]; fftw_plan planfwd; fftw_plan planadj; //Structures for sparsity operator sopt_wavelet_type *dict_types; sopt_wavelet_type *dict_types1; sopt_wavelet_type *dict_types2; sopt_sara_param param1; sopt_sara_param param2; sopt_sara_param param3; void *datas[1]; void *datas1[1]; void *datas2[1]; //Structures for the opmization problems sopt_l1_sdmmparam param4; sopt_l1_rwparam param5; sopt_prox_tvparam param6; sopt_tv_sdmmparam param7; sopt_tv_rwparam param8; clock_t start, stop; double t = 0.0; double start1, stop1; int dimy, dimx; //Image dimension of the zero padded image //Dimensions should be power of 2 dimx = 256; dimy = 256; //Define parameters filetype_vis = PURIFY_VISIBILITY_FILETYPE_PROFILE_VIS; filetype_img = PURIFY_IMAGE_FILETYPE_FITS; //Read coverage purify_visibility_readfile(&vis_test, "./data/images/Coverages/cont_sim4.vis", filetype_vis); printf("Number of visibilities: %i \n\n", vis_test.nmeas); // Input image. img.fov_x = 1.0 / 180.0 * PURIFY_PI; img.fov_y = 1.0 / 180.0 * PURIFY_PI; img.nx = 4; img.ny = 4; //Read input image purify_image_readfile(&img, "data/images/Einstein.fits", 1); printf("Image dimension: %i, %i \n\n", img.nx, img.ny); // purify_image_writefile(&img, "data/test/Einstein_double.fits", filetype_img); param_m1.nmeas = vis_test.nmeas; param_m1.ny1 = dimy; param_m1.nx1 = dimx; param_m1.ofy = 2; param_m1.ofx = 2; param_m1.ky = 2; param_m1.kx = 2; param_m2.nmeas = vis_test.nmeas; param_m2.ny1 = dimy; param_m2.nx1 = dimx; param_m2.ofy = 2; param_m2.ofx = 2; param_m2.ky = 2; param_m2.kx = 2; Nb = 9; Nx=param_m2.ny1*param_m2.nx1; Nr=Nb*Nx; Ny=param_m2.nmeas; //Memory allocation for the different variables deconv = (double*)malloc((Nx) * sizeof(double)); PURIFY_ERROR_MEM_ALLOC_CHECK(deconv); xinc = (complex double*)malloc((Nx) * sizeof(complex double)); PURIFY_ERROR_MEM_ALLOC_CHECK(xinc); xout = (double*)malloc((Nx) * sizeof(double)); PURIFY_ERROR_MEM_ALLOC_CHECK(xout); y = (complex double*)malloc((vis_test.nmeas) * sizeof(complex double)); PURIFY_ERROR_MEM_ALLOC_CHECK(y); y0 = (complex double*)malloc((vis_test.nmeas) * sizeof(complex double)); PURIFY_ERROR_MEM_ALLOC_CHECK(y0); noise = (complex double*)malloc((vis_test.nmeas) * sizeof(complex double)); PURIFY_ERROR_MEM_ALLOC_CHECK(noise); w = (double*)malloc((Nr) * sizeof(double)); PURIFY_ERROR_MEM_ALLOC_CHECK(w); error = (double*)malloc((Nx) * sizeof(double)); PURIFY_ERROR_MEM_ALLOC_CHECK(error); xoutc = (complex double*)malloc((Nx) * sizeof(complex double)); PURIFY_ERROR_MEM_ALLOC_CHECK(xoutc); wdx = (double*)malloc((Nx) * sizeof(double)); PURIFY_ERROR_MEM_ALLOC_CHECK(wdx); wdy = (double*)malloc((Nx) * sizeof(double)); PURIFY_ERROR_MEM_ALLOC_CHECK(wdy); dummyr = malloc(Nr * sizeof(double)); PURIFY_ERROR_MEM_ALLOC_CHECK(dummyr); dummyc = malloc(Nr * sizeof(complex double)); PURIFY_ERROR_MEM_ALLOC_CHECK(dummyc); for (i=0; i < Nx; i++){ xinc[i] = 0.0 + 0.0*I; } for (i=0; i < img.nx; i++){ for (j=0; j < img.ny; j++){ xinc[i+j*param_m1.nx1] = img.pix[i+j*img.nx] + 0.0*I; } } //Initialize griding matrix assert((start = clock())!=-1); purify_measurement_init_cft(&gmat, deconv, vis_test.u, vis_test.v, ¶m_m1); stop = clock(); t = (double) (stop-start)/CLOCKS_PER_SEC; printf("Time initalization: %f \n\n", t); for(i = 0; i < img.nx * img.ny; ++i){ deconv[i] = 1.0; } //Memory allocation for the fft i = Nx*param_m1.ofy*param_m1.ofx; fft_temp1 = (complex double*)malloc((i) * sizeof(complex double)); PURIFY_ERROR_MEM_ALLOC_CHECK(fft_temp1); fft_temp2 = (complex double*)malloc((i) * sizeof(complex double)); PURIFY_ERROR_MEM_ALLOC_CHECK(fft_temp2); //FFT plan planfwd = fftw_plan_dft_2d(param_m1.nx1*param_m1.ofx, param_m1.ny1*param_m1.ofy, fft_temp1, fft_temp1, FFTW_FORWARD, FFTW_MEASURE); planadj = fftw_plan_dft_2d(param_m1.nx1*param_m1.ofx, param_m1.ny1*param_m1.ofy, fft_temp2, fft_temp2, FFTW_BACKWARD, FFTW_MEASURE); datafwd[0] = (void*)¶m_m1; datafwd[1] = (void*)deconv; datafwd[2] = (void*)&gmat; datafwd[3] = (void*)&planfwd; datafwd[4] = (void*)fft_temp1; dataadj[0] = (void*)¶m_m2; dataadj[1] = (void*)deconv; dataadj[2] = (void*)&gmat; dataadj[3] = (void*)&planadj; dataadj[4] = (void*)fft_temp2; printf("FFT plan done \n\n"); assert((start = clock())!=-1); purify_measurement_cftfwd((void*)y0, (void*)xinc, datafwd); stop = clock(); t = (double) (stop-start)/CLOCKS_PER_SEC; printf("Time forward operator: %f \n\n", t); //Noise realization //Input snr snr = 30.0; a = cblas_dznrm2(Ny, (void*)y0, 1); sigma = a*pow(10.0,-(snr/20.0))/sqrt(Ny); FILE *fout = fopen("ein.uv", "w"); for (i=0; i < Ny; i++) { // noise[i] = (sopt_ran_gasdev2(seedn) + sopt_ran_gasdev2(seedn)*I)*(sigma/sqrt(2)); noise[i] = 0; y[i] = y0[i] + noise[i]; fprintf(fout, "%14.5e%14.5e%14.5e%14.5e%14.5e%14.5e\n", vis_test.u[i], vis_test.v[i], vis_test.w[i], creal(y[i]), cimag(y[i]), 1.0); } fclose(fout); //Rescaling the measurements aux4 = (double)Ny/(double)Nx; for (i=0; i < Ny; i++) { y[i] = y[i]/sqrt(aux4); } for (i=0; i < Nx; i++) { deconv[i] = deconv[i]/sqrt(aux4); } // Output image. img_copy.fov_x = 1.0 / 180.0 * PURIFY_PI; img_copy.fov_y = 1.0 / 180.0 * PURIFY_PI; img_copy.nx = param_m1.nx1; img_copy.ny = param_m1.ny1; for (i=0; i < Nx; i++){ xoutc[i] = 0.0 + 0.0*I; } //Dirty image purify_measurement_cftadj((void*)xoutc, (void*)y, dataadj); for (i=0; i < Nx; i++) { xout[i] = creal(xoutc[i]); } aux1 = purify_utils_maxarray(xout, Nx); img_copy.pix = (double*)malloc((Nx) * sizeof(double)); PURIFY_ERROR_MEM_ALLOC_CHECK(img_copy.pix); for (i=0; i < Nx; i++){ img_copy.pix[i] = creal(xoutc[i]); } purify_image_writefile(&img_copy, "eindirty.fits", filetype_img); return 0; //SARA structure initialization param1.ndict = Nb; param1.real = 0; dict_types = malloc(param1.ndict * sizeof(sopt_wavelet_type)); PURIFY_ERROR_MEM_ALLOC_CHECK(dict_types); dict_types[0] = SOPT_WAVELET_DB1; dict_types[1] = SOPT_WAVELET_DB2; dict_types[2] = SOPT_WAVELET_DB3; dict_types[3] = SOPT_WAVELET_DB4; dict_types[4] = SOPT_WAVELET_DB5; dict_types[5] = SOPT_WAVELET_DB6; dict_types[6] = SOPT_WAVELET_DB7; dict_types[7] = SOPT_WAVELET_DB8; dict_types[8] = SOPT_WAVELET_Dirac; sopt_sara_initop(¶m1, param_m1.ny1, param_m1.nx1, 4, dict_types); datas[0] = (void*)¶m1; //Db8 structure initialization param2.ndict = 1; param2.real = 0; dict_types1 = malloc(param2.ndict * sizeof(sopt_wavelet_type)); PURIFY_ERROR_MEM_ALLOC_CHECK(dict_types1); dict_types1[0] = SOPT_WAVELET_DB8; sopt_sara_initop(¶m2, param_m1.ny1, param_m1.nx1, 4, dict_types1); datas1[0] = (void*)¶m2; //Dirac structure initialization param3.ndict = 1; param3.real = 0; dict_types2 = malloc(param3.ndict * sizeof(sopt_wavelet_type)); PURIFY_ERROR_MEM_ALLOC_CHECK(dict_types2); dict_types2[0] = SOPT_WAVELET_Dirac; sopt_sara_initop(¶m3, param_m1.ny1, param_m1.nx1, 4, dict_types2); datas2[0] = (void*)¶m3; //Scaling constants in the diferent representation domains sopt_sara_analysisop((void*)dummyc, (void*)xoutc, datas); for (i=0; i < Nr; i++) { dummyr[i] = creal(dummyc[i]); } aux2 = purify_utils_maxarray(dummyr, Nr); sopt_sara_analysisop((void*)dummyc, (void*)xoutc, datas1); for (i=0; i < Nr; i++) { dummyr[i] = creal(dummyc[i]); } aux3 = purify_utils_maxarray(dummyr, Nx); // Output image. img_copy.fov_x = 1.0 / 180.0 * PURIFY_PI; img_copy.fov_y = 1.0 / 180.0 * PURIFY_PI; img_copy.nx = param_m1.nx1; img_copy.ny = param_m1.ny1; //Initial solution and weights for (i=0; i < Nx; i++) { xoutc[i] = 0.0 + 0.0*I; wdx[i] = 1.0; wdy[i] = 1.0; } for (i=0; i < Nr; i++){ w[i] = 1.0; } //Copy true image in xout for (i=0; i < Nx; i++) { xout[i] = creal(xinc[i]); } printf("**********************\n"); printf("BPSA reconstruction\n"); printf("**********************\n"); //Structure for the L1 solver param4.verbose = 2; param4.max_iter = 300; param4.gamma = gamma*aux2; param4.rel_obj = 0.001; param4.epsilon = sqrt(Ny + 2*sqrt(Ny))*sigma/sqrt(aux4); param4.epsilon_tol = 0.01; param4.real_data = 0; param4.cg_max_iter = 100; param4.cg_tol = 0.000001; //Initial solution for (i=0; i < Nx; i++) { xoutc[i] = 0.0 + 0.0*I; } #ifdef _OPENMP start1 = omp_get_wtime(); #else assert((start = clock())!=-1); #endif sopt_l1_sdmm((void*)xoutc, Nx, &purify_measurement_cftfwd, datafwd, &purify_measurement_cftadj, dataadj, &sopt_sara_synthesisop, datas, &sopt_sara_analysisop, datas, Nr, (void*)y, Ny, w, param4); #ifdef _OPENMP stop1 = omp_get_wtime(); t = stop1 - start1; #else stop = clock(); t = (double) (stop-start)/CLOCKS_PER_SEC; #endif printf("Time BPSA: %f \n\n", t); //SNR for (i=0; i < Nx; i++) { error[i] = creal(xoutc[i])-xout[i]; } mse = cblas_dnrm2(Nx, error, 1); a = cblas_dnrm2(Nx, xout, 1); snr_out = 20.0*log10(a/mse); printf("SNR: %f dB\n\n", snr_out); for (i=0; i < Nx; i++){ img_copy.pix[i] = creal(xoutc[i]); } purify_image_writefile(&img_copy, "./data/test/einbpsa.fits", filetype_img); //Residual image purify_measurement_cftfwd((void*)y0, (void*)xoutc, datafwd); alpha = -1.0 +0.0*I; cblas_zaxpy(Ny, (void*)&alpha, y, 1, y0, 1); purify_measurement_cftadj((void*)xinc, (void*)y0, dataadj); for (i=0; i < Nx; i++){ img_copy.pix[i] = creal(xinc[i]); } purify_image_writefile(&img_copy, "data/test/einbpsares.fits", filetype_img); //Error image for (i=0; i < Nx; i++){ img_copy.pix[i] = error[i]; } purify_image_writefile(&img_copy, "data/test/einbpsaerror.fits", filetype_img); printf("**********************\n"); printf("SARA reconstruction\n"); printf("**********************\n"); //Structure for the RWL1 solver param5.verbose = 2; param5.max_iter = 5; param5.rel_var = 0.001; param5.sigma = sigma*sqrt((double)Ny/(double)Nr); param5.init_sol = 1; #ifdef _OPENMP start1 = omp_get_wtime(); #else assert((start = clock())!=-1); #endif sopt_l1_rwsdmm((void*)xoutc, Nx, &purify_measurement_cftfwd, datafwd, &purify_measurement_cftadj, dataadj, &sopt_sara_synthesisop, datas, &sopt_sara_analysisop, datas, Nr, (void*)y, Ny, param4, param5); #ifdef _OPENMP stop1 = omp_get_wtime(); t = stop1 - start1; #else stop = clock(); t = (double) (stop-start)/CLOCKS_PER_SEC; #endif printf("Time SARA: %f \n\n", t); //SNR for (i=0; i < Nx; i++) { error[i] = creal(xoutc[i])-xout[i]; } mse = cblas_dnrm2(Nx, error, 1); a = cblas_dnrm2(Nx, xout, 1); snr_out = 20.0*log10(a/mse); printf("SNR: %f dB\n\n", snr_out); for (i=0; i < Nx; i++){ img_copy.pix[i] = creal(xoutc[i]); } purify_image_writefile(&img_copy, "data/test/einsara.fits", filetype_img); //Residual image purify_measurement_cftfwd((void*)y0, (void*)xoutc, datafwd); alpha = -1.0 +0.0*I; cblas_zaxpy(Ny, (void*)&alpha, y, 1, y0, 1); purify_measurement_cftadj((void*)xinc, (void*)y0, dataadj); for (i=0; i < Nx; i++){ img_copy.pix[i] = creal(xinc[i]); } purify_image_writefile(&img_copy, "data/test/einsarares.fits", filetype_img); //Error image for (i=0; i < Nx; i++){ img_copy.pix[i] = error[i]; } purify_image_writefile(&img_copy, "data/test/einsaraerror.fits", filetype_img); printf("**********************\n"); printf("TV reconstruction\n"); printf("**********************\n"); //Structure for the TV prox param6.verbose = 1; param6.max_iter = 50; param6.rel_obj = 0.0001; //Structure for the TV solver param7.verbose = 2; param7.max_iter = 300; param7.gamma = gamma*aux1; param7.rel_obj = 0.001; param7.epsilon = sqrt(Ny + 2*sqrt(Ny))*sigma/sqrt(aux4); param7.epsilon_tol = 0.01; param7.real_data = 0; param7.cg_max_iter = 100; param7.cg_tol = 0.000001; param7.paramtv = param6; //Initial solution and weights for (i=0; i < Nx; i++) { xoutc[i] = 0.0 + 0.0*I; wdx[i] = 1.0; wdy[i] = 1.0; } assert((start = clock())!=-1); sopt_tv_sdmm((void*)xoutc, dimx, dimy, &purify_measurement_cftfwd, datafwd, &purify_measurement_cftadj, dataadj, (void*)y, Ny, wdx, wdy, param7); stop = clock(); t = (double) (stop-start)/CLOCKS_PER_SEC; printf("Time TV: %f \n\n", t); //SNR for (i=0; i < Nx; i++) { error[i] = creal(xoutc[i])-xout[i]; } mse = cblas_dnrm2(Nx, error, 1); a = cblas_dnrm2(Nx, xout, 1); snr_out = 20.0*log10(a/mse); printf("SNR: %f dB\n\n", snr_out); for (i=0; i < Nx; i++){ img_copy.pix[i] = creal(xoutc[i]); } purify_image_writefile(&img_copy, "data/test/eintv.fits", filetype_img); //Residual image purify_measurement_cftfwd((void*)y0, (void*)xoutc, datafwd); alpha = -1.0 +0.0*I; cblas_zaxpy(Ny, (void*)&alpha, y, 1, y0, 1); purify_measurement_cftadj((void*)xinc, (void*)y0, dataadj); for (i=0; i < Nx; i++){ img_copy.pix[i] = creal(xinc[i]); } purify_image_writefile(&img_copy, "data/test/eintvres.fits", filetype_img); //Error image for (i=0; i < Nx; i++){ img_copy.pix[i] = error[i]; } purify_image_writefile(&img_copy, "data/test/eintverror.fits", filetype_img); printf("**********************\n"); printf("RWTV reconstruction\n"); printf("**********************\n"); //Structure for the RWTV solver param8.verbose = 2; param8.max_iter = 5; param8.rel_var = 0.001; param8.sigma = sigma*sqrt(Ny/(2*Nx)); param8.init_sol = 1; assert((start = clock())!=-1); sopt_tv_rwsdmm((void*)xoutc, dimx, dimy, &purify_measurement_cftfwd, datafwd, &purify_measurement_cftadj, dataadj, (void*)y, Ny, param7, param8); stop = clock(); t = (double) (stop-start)/CLOCKS_PER_SEC; printf("Time RWTV: %f \n\n", t); //SNR for (i=0; i < Nx; i++) { error[i] = creal(xoutc[i])-xout[i]; } mse = cblas_dnrm2(Nx, error, 1); a = cblas_dnrm2(Nx, xout, 1); snr_out = 20.0*log10(a/mse); printf("SNR: %f dB\n\n", snr_out); for (i=0; i < Nx; i++){ img_copy.pix[i] = creal(xoutc[i]); } purify_image_writefile(&img_copy, "data/test/einrwtv.fits", filetype_img); //Residual image purify_measurement_cftfwd((void*)y0, (void*)xoutc, datafwd); alpha = -1.0 +0.0*I; cblas_zaxpy(Ny, (void*)&alpha, y, 1, y0, 1); purify_measurement_cftadj((void*)xinc, (void*)y0, dataadj); for (i=0; i < Nx; i++){ img_copy.pix[i] = creal(xinc[i]); } purify_image_writefile(&img_copy, "data/test/einrwtvres.fits", filetype_img); //Error image for (i=0; i < Nx; i++){ img_copy.pix[i] = error[i]; } purify_image_writefile(&img_copy, "data/test/einrwtverror.fits", filetype_img); printf("**********************\n"); printf("Db8 reconstruction\n"); printf("**********************\n"); //Initial solution for (i=0; i < Nx; i++) { xoutc[i] = 0.0 + 0.0*I; } param4.gamma = gamma*aux3; assert((start = clock())!=-1); sopt_l1_sdmm((void*)xoutc, Nx, &purify_measurement_cftfwd, datafwd, &purify_measurement_cftadj, dataadj, &sopt_sara_synthesisop, datas1, &sopt_sara_analysisop, datas1, Nx, (void*)y, Ny, w, param4); stop = clock(); t = (double) (stop-start)/CLOCKS_PER_SEC; printf("Time BPDb8: %f \n\n", t); //SNR for (i=0; i < Nx; i++) { error[i] = creal(xoutc[i])-xout[i]; } mse = cblas_dnrm2(Nx, error, 1); a = cblas_dnrm2(Nx, xout, 1); snr_out = 20.0*log10(a/mse); printf("SNR: %f dB\n\n", snr_out); for (i=0; i < Nx; i++){ img_copy.pix[i] = creal(xoutc[i]); } purify_image_writefile(&img_copy, "data/test/eindb8.fits", filetype_img); //Residual image purify_measurement_cftfwd((void*)y0, (void*)xoutc, datafwd); alpha = -1.0 +0.0*I; cblas_zaxpy(Ny, (void*)&alpha, y, 1, y0, 1); purify_measurement_cftadj((void*)xinc, (void*)y0, dataadj); for (i=0; i < Nx; i++){ img_copy.pix[i] = creal(xinc[i]); } purify_image_writefile(&img_copy, "data/test/eindb8res.fits", filetype_img); //Error image for (i=0; i < Nx; i++){ img_copy.pix[i] = error[i]; } purify_image_writefile(&img_copy, "data/test/eindb8error.fits", filetype_img); printf("**********************\n"); printf("RWBPDb8 reconstruction\n"); printf("**********************\n"); //Structure for the RWL1 solver param5.verbose = 2; param5.max_iter = 5; param5.rel_var = 0.001; param5.sigma = sigma*sqrt((double)Ny/(double)Nx); param5.init_sol = 1; assert((start = clock())!=-1); sopt_l1_rwsdmm((void*)xoutc, Nx, &purify_measurement_cftfwd, datafwd, &purify_measurement_cftadj, dataadj, &sopt_sara_synthesisop, datas1, &sopt_sara_analysisop, datas1, Nx, (void*)y, Ny, param4, param5); stop = clock(); t = (double) (stop-start)/CLOCKS_PER_SEC; printf("Time RWBPDb8: %f \n\n", t); //SNR for (i=0; i < Nx; i++) { error[i] = creal(xoutc[i])-xout[i]; } mse = cblas_dnrm2(Nx, error, 1); a = cblas_dnrm2(Nx, xout, 1); snr_out = 20.0*log10(a/mse); printf("SNR: %f dB\n\n", snr_out); for (i=0; i < Nx; i++){ img_copy.pix[i] = creal(xoutc[i]); } purify_image_writefile(&img_copy, "data/test/einrwdb8.fits", filetype_img); //Residual image purify_measurement_cftfwd((void*)y0, (void*)xoutc, datafwd); alpha = -1.0 +0.0*I; cblas_zaxpy(Ny, (void*)&alpha, y, 1, y0, 1); purify_measurement_cftadj((void*)xinc, (void*)y0, dataadj); for (i=0; i < Nx; i++){ img_copy.pix[i] = creal(xinc[i]); } purify_image_writefile(&img_copy, "data/test/einrwdb8res.fits", filetype_img); //Error image for (i=0; i < Nx; i++){ img_copy.pix[i] = error[i]; } purify_image_writefile(&img_copy, "data/test/einrwdb8error.fits", filetype_img); printf("**********************\n"); printf("BP reconstruction\n"); printf("**********************\n"); param4.gamma = gamma*aux1; //Initial solution for (i=0; i < Nx; i++) { xoutc[i] = 0.0 + 0.0*I; } assert((start = clock())!=-1); sopt_l1_sdmm((void*)xoutc, Nx, &purify_measurement_cftfwd, datafwd, &purify_measurement_cftadj, dataadj, &sopt_sara_synthesisop, datas2, &sopt_sara_analysisop, datas2, Nx, (void*)y, Ny, w, param4); stop = clock(); t = (double) (stop-start)/CLOCKS_PER_SEC; printf("Time BP: %f \n\n", t); //SNR for (i=0; i < Nx; i++) { error[i] = creal(xoutc[i])-xout[i]; } mse = cblas_dnrm2(Nx, error, 1); a = cblas_dnrm2(Nx, xout, 1); snr_out = 20.0*log10(a/mse); printf("SNR: %f dB\n\n", snr_out); for (i=0; i < Nx; i++){ img_copy.pix[i] = creal(xoutc[i]); } purify_image_writefile(&img_copy, "data/test/einbp.fits", filetype_img); //Residual image purify_measurement_cftfwd((void*)y0, (void*)xoutc, datafwd); alpha = -1.0 +0.0*I; cblas_zaxpy(Ny, (void*)&alpha, y, 1, y0, 1); purify_measurement_cftadj((void*)xinc, (void*)y0, dataadj); for (i=0; i < Nx; i++){ img_copy.pix[i] = creal(xinc[i]); } purify_image_writefile(&img_copy, "data/test/einbpres.fits", filetype_img); //Error image for (i=0; i < Nx; i++){ img_copy.pix[i] = error[i]; } purify_image_writefile(&img_copy, "data/test/einbperror.fits", filetype_img); printf("**********************\n"); printf("RWBP reconstruction\n"); printf("**********************\n"); //Structure for the RWL1 solver param5.verbose = 2; param5.max_iter = 5; param5.rel_var = 0.001; param5.sigma = sigma*sqrt((double)Ny/(double)Nx); param5.init_sol = 1; assert((start = clock())!=-1); sopt_l1_rwsdmm((void*)xoutc, Nx, &purify_measurement_cftfwd, datafwd, &purify_measurement_cftadj, dataadj, &sopt_sara_synthesisop, datas2, &sopt_sara_analysisop, datas2, Nx, (void*)y, Ny, param4, param5); stop = clock(); t = (double) (stop-start)/CLOCKS_PER_SEC; printf("Time RWBP: %f \n\n", t); //SNR for (i=0; i < Nx; i++) { error[i] = creal(xoutc[i])-xout[i]; } mse = cblas_dnrm2(Nx, error, 1); a = cblas_dnrm2(Nx, xout, 1); snr_out = 20.0*log10(a/mse); printf("SNR: %f dB\n\n", snr_out); for (i=0; i < Nx; i++){ img_copy.pix[i] = creal(xoutc[i]); } purify_image_writefile(&img_copy, "data/test/einrwbp.fits", filetype_img); //Residual image purify_measurement_cftfwd((void*)y0, (void*)xoutc, datafwd); alpha = -1.0 +0.0*I; cblas_zaxpy(Ny, (void*)&alpha, y, 1, y0, 1); purify_measurement_cftadj((void*)xinc, (void*)y0, dataadj); for (i=0; i < Nx; i++){ img_copy.pix[i] = creal(xinc[i]); } purify_image_writefile(&img_copy, "data/test/einrwbpres.fits", filetype_img); //Error image for (i=0; i < Nx; i++){ img_copy.pix[i] = error[i]; } purify_image_writefile(&img_copy, "data/test/einrwbperror.fits", filetype_img); //Free all memory purify_image_free(&img); purify_image_free(&img_copy); free(deconv); purify_visibility_free(&vis_test); free(y); free(xinc); free(xout); free(w); free(noise); free(y0); free(error); free(xoutc); free(wdx); free(wdy); sopt_sara_free(¶m1); sopt_sara_free(¶m2); sopt_sara_free(¶m3); free(dict_types); free(dict_types1); free(dict_types2); free(fft_temp1); free(fft_temp2); fftw_destroy_plan(planfwd); fftw_destroy_plan(planadj); purify_sparsemat_freer(&gmat); free(dummyr); free(dummyc); return 0; }
static PyObject * dotblas_matrixproduct(PyObject *dummy, PyObject *args) { PyObject *op1, *op2; PyArrayObject *ap1=NULL, *ap2=NULL, *ret=NULL; int j, l, lda, ldb, ldc; int typenum, nd; intp ap1stride=0; intp dimensions[MAX_DIMS]; intp numbytes; static const float oneF[2] = {1.0, 0.0}; static const float zeroF[2] = {0.0, 0.0}; static const double oneD[2] = {1.0, 0.0}; static const double zeroD[2] = {0.0, 0.0}; double prior1, prior2; PyTypeObject *subtype; PyArray_Descr *dtype; MatrixShape ap1shape, ap2shape; if (!PyArg_ParseTuple(args, "OO", &op1, &op2)) return NULL; /* * "Matrix product" using the BLAS. * Only works for float double and complex types. */ typenum = PyArray_ObjectType(op1, 0); typenum = PyArray_ObjectType(op2, typenum); /* This function doesn't handle other types */ if ((typenum != PyArray_DOUBLE && typenum != PyArray_CDOUBLE && typenum != PyArray_FLOAT && typenum != PyArray_CFLOAT)) { return PyArray_Return((PyArrayObject *)PyArray_MatrixProduct(op1, op2)); } dtype = PyArray_DescrFromType(typenum); ap1 = (PyArrayObject *)PyArray_FromAny(op1, dtype, 0, 0, ALIGNED, NULL); if (ap1 == NULL) return NULL; Py_INCREF(dtype); ap2 = (PyArrayObject *)PyArray_FromAny(op2, dtype, 0, 0, ALIGNED, NULL); if (ap2 == NULL) goto fail; if ((ap1->nd > 2) || (ap2->nd > 2)) { /* This function doesn't handle dimensions greater than 2 (or negative striding) -- other than to ensure the dot function is altered */ if (!altered) { /* need to alter dot product */ PyObject *tmp1, *tmp2; tmp1 = PyTuple_New(0); tmp2 = dotblas_alterdot(NULL, tmp1); Py_DECREF(tmp1); Py_DECREF(tmp2); } ret = (PyArrayObject *)PyArray_MatrixProduct((PyObject *)ap1, (PyObject *)ap2); Py_DECREF(ap1); Py_DECREF(ap2); return PyArray_Return(ret); } if (_bad_strides(ap1)) { op1 = PyArray_NewCopy(ap1, PyArray_ANYORDER); Py_DECREF(ap1); ap1 = (PyArrayObject *)op1; if (ap1 == NULL) goto fail; } if (_bad_strides(ap2)) { op2 = PyArray_NewCopy(ap2, PyArray_ANYORDER); Py_DECREF(ap2); ap2 = (PyArrayObject *)op2; if (ap2 == NULL) goto fail; } ap1shape = _select_matrix_shape(ap1); ap2shape = _select_matrix_shape(ap2); if (ap1shape == _scalar || ap2shape == _scalar) { PyArrayObject *oap1, *oap2; oap1 = ap1; oap2 = ap2; /* One of ap1 or ap2 is a scalar */ if (ap1shape == _scalar) { /* Make ap2 the scalar */ PyArrayObject *t = ap1; ap1 = ap2; ap2 = t; ap1shape = ap2shape; ap2shape = _scalar; } if (ap1shape == _row) ap1stride = ap1->strides[1]; else if (ap1->nd > 0) ap1stride = ap1->strides[0]; if (ap1->nd == 0 || ap2->nd == 0) { intp *thisdims; if (ap1->nd == 0) { nd = ap2->nd; thisdims = ap2->dimensions; } else { nd = ap1->nd; thisdims = ap1->dimensions; } l = 1; for (j=0; j<nd; j++) { dimensions[j] = thisdims[j]; l *= dimensions[j]; } } else { l = oap1->dimensions[oap1->nd-1]; if (oap2->dimensions[0] != l) { PyErr_SetString(PyExc_ValueError, "matrices are not aligned"); goto fail; } nd = ap1->nd + ap2->nd - 2; /* nd = 0 or 1 or 2 */ /* If nd == 0 do nothing ... */ if (nd == 1) { /* Either ap1->nd is 1 dim or ap2->nd is 1 dim and the other is 2-dim */ dimensions[0] = (oap1->nd == 2) ? oap1->dimensions[0] : oap2->dimensions[1]; l = dimensions[0]; /* Fix it so that dot(shape=(N,1), shape=(1,)) and dot(shape=(1,), shape=(1,N)) both return an (N,) array (but use the fast scalar code) */ } else if (nd == 2) { dimensions[0] = oap1->dimensions[0]; dimensions[1] = oap2->dimensions[1]; /* We need to make sure that dot(shape=(1,1), shape=(1,N)) and dot(shape=(N,1),shape=(1,1)) uses scalar multiplication appropriately */ if (ap1shape == _row) l = dimensions[1]; else l = dimensions[0]; } } } else { /* (ap1->nd <= 2 && ap2->nd <= 2) */ /* Both ap1 and ap2 are vectors or matrices */ l = ap1->dimensions[ap1->nd-1]; if (ap2->dimensions[0] != l) { PyErr_SetString(PyExc_ValueError, "matrices are not aligned"); goto fail; } nd = ap1->nd+ap2->nd-2; if (nd == 1) dimensions[0] = (ap1->nd == 2) ? ap1->dimensions[0] : ap2->dimensions[1]; else if (nd == 2) { dimensions[0] = ap1->dimensions[0]; dimensions[1] = ap2->dimensions[1]; } } /* Choose which subtype to return */ if (ap1->ob_type != ap2->ob_type) { prior2 = PyArray_GetPriority((PyObject *)ap2, 0.0); prior1 = PyArray_GetPriority((PyObject *)ap1, 0.0); subtype = (prior2 > prior1 ? ap2->ob_type : ap1->ob_type); } else { prior1 = prior2 = 0.0; subtype = ap1->ob_type; } ret = (PyArrayObject *)PyArray_New(subtype, nd, dimensions, typenum, NULL, NULL, 0, 0, (PyObject *) (prior2 > prior1 ? ap2 : ap1)); if (ret == NULL) goto fail; numbytes = PyArray_NBYTES(ret); memset(ret->data, 0, numbytes); if (numbytes==0 || l == 0) { Py_DECREF(ap1); Py_DECREF(ap2); return PyArray_Return(ret); } if (ap2shape == _scalar) { /* Multiplication by a scalar -- Level 1 BLAS */ /* if ap1shape is a matrix and we are not contiguous, then we can't just blast through the entire array using a single striding factor */ NPY_BEGIN_ALLOW_THREADS if (typenum == PyArray_DOUBLE) { if (l == 1) { *((double *)ret->data) = *((double *)ap2->data) * \ *((double *)ap1->data); } else if (ap1shape != _matrix) { cblas_daxpy(l, *((double *)ap2->data), (double *)ap1->data, ap1stride/sizeof(double), (double *)ret->data, 1); } else { int maxind, oind, i, a1s, rets; char *ptr, *rptr; double val; maxind = (ap1->dimensions[0] >= ap1->dimensions[1] ? 0 : 1); oind = 1-maxind; ptr = ap1->data; rptr = ret->data; l = ap1->dimensions[maxind]; val = *((double *)ap2->data); a1s = ap1->strides[maxind] / sizeof(double); rets = ret->strides[maxind] / sizeof(double); for (i=0; i < ap1->dimensions[oind]; i++) { cblas_daxpy(l, val, (double *)ptr, a1s, (double *)rptr, rets); ptr += ap1->strides[oind]; rptr += ret->strides[oind]; } } } else if (typenum == PyArray_CDOUBLE) { if (l == 1) { cdouble *ptr1, *ptr2, *res; ptr1 = (cdouble *)ap2->data; ptr2 = (cdouble *)ap1->data; res = (cdouble *)ret->data; res->real = ptr1->real * ptr2->real - ptr1->imag * ptr2->imag; res->imag = ptr1->real * ptr2->imag + ptr1->imag * ptr2->real; } else if (ap1shape != _matrix) { cblas_zaxpy(l, (double *)ap2->data, (double *)ap1->data, ap1stride/sizeof(cdouble), (double *)ret->data, 1); } else { int maxind, oind, i, a1s, rets; char *ptr, *rptr; double *pval; maxind = (ap1->dimensions[0] >= ap1->dimensions[1] ? 0 : 1); oind = 1-maxind; ptr = ap1->data; rptr = ret->data; l = ap1->dimensions[maxind]; pval = (double *)ap2->data; a1s = ap1->strides[maxind] / sizeof(cdouble); rets = ret->strides[maxind] / sizeof(cdouble); for (i=0; i < ap1->dimensions[oind]; i++) { cblas_zaxpy(l, pval, (double *)ptr, a1s, (double *)rptr, rets); ptr += ap1->strides[oind]; rptr += ret->strides[oind]; } } } else if (typenum == PyArray_FLOAT) { if (l == 1) { *((float *)ret->data) = *((float *)ap2->data) * \ *((float *)ap1->data); } else if (ap1shape != _matrix) { cblas_saxpy(l, *((float *)ap2->data), (float *)ap1->data, ap1stride/sizeof(float), (float *)ret->data, 1); } else { int maxind, oind, i, a1s, rets; char *ptr, *rptr; float val; maxind = (ap1->dimensions[0] >= ap1->dimensions[1] ? 0 : 1); oind = 1-maxind; ptr = ap1->data; rptr = ret->data; l = ap1->dimensions[maxind]; val = *((float *)ap2->data); a1s = ap1->strides[maxind] / sizeof(float); rets = ret->strides[maxind] / sizeof(float); for (i=0; i < ap1->dimensions[oind]; i++) { cblas_saxpy(l, val, (float *)ptr, a1s, (float *)rptr, rets); ptr += ap1->strides[oind]; rptr += ret->strides[oind]; } } } else if (typenum == PyArray_CFLOAT) { if (l == 1) { cfloat *ptr1, *ptr2, *res; ptr1 = (cfloat *)ap2->data; ptr2 = (cfloat *)ap1->data; res = (cfloat *)ret->data; res->real = ptr1->real * ptr2->real - ptr1->imag * ptr2->imag; res->imag = ptr1->real * ptr2->imag + ptr1->imag * ptr2->real; } else if (ap1shape != _matrix) { cblas_caxpy(l, (float *)ap2->data, (float *)ap1->data, ap1stride/sizeof(cfloat), (float *)ret->data, 1); } else { int maxind, oind, i, a1s, rets; char *ptr, *rptr; float *pval; maxind = (ap1->dimensions[0] >= ap1->dimensions[1] ? 0 : 1); oind = 1-maxind; ptr = ap1->data; rptr = ret->data; l = ap1->dimensions[maxind]; pval = (float *)ap2->data; a1s = ap1->strides[maxind] / sizeof(cfloat); rets = ret->strides[maxind] / sizeof(cfloat); for (i=0; i < ap1->dimensions[oind]; i++) { cblas_caxpy(l, pval, (float *)ptr, a1s, (float *)rptr, rets); ptr += ap1->strides[oind]; rptr += ret->strides[oind]; } } } NPY_END_ALLOW_THREADS }
static int check_solution(PLASMA_enum transA, PLASMA_enum transB, int M, int N, int K, PLASMA_Complex64_t alpha, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *B, int LDB, PLASMA_Complex64_t beta, PLASMA_Complex64_t *Cref, PLASMA_Complex64_t *Cplasma, int LDC) { int info_solution; double Anorm, Bnorm, Cinitnorm, Cplasmanorm, Clapacknorm, Rnorm, result; double eps; PLASMA_Complex64_t beta_const; double *work = (double *)malloc(max(K,max(M, N))* sizeof(double)); int Am, An, Bm, Bn; beta_const = -1.0; if (transA == PlasmaNoTrans) { Am = M; An = K; } else { Am = K; An = M; } if (transB == PlasmaNoTrans) { Bm = K; Bn = N; } else { Bm = N; Bn = K; } Anorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), Am, An, A, LDA, work); Bnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), Bm, Bn, B, LDB, work); Cinitnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cref, LDC, work); Cplasmanorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cplasma, LDC, work); cblas_zgemm(CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, M, N, K, CBLAS_SADDR(alpha), A, LDA, B, LDB, CBLAS_SADDR(beta), Cref, LDC); Clapacknorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cref, LDC, work); cblas_zaxpy(LDC * N, CBLAS_SADDR(beta_const), Cplasma, 1, Cref, 1); Rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cref, LDC, work); eps = LAPACKE_dlamch_work('e'); printf("Rnorm %e, Anorm %e, Bnorm %e, Cinitnorm %e, Cplasmanorm %e, Clapacknorm %e\n", Rnorm, Anorm, Bnorm, Cinitnorm, Cplasmanorm, Clapacknorm); result = Rnorm / ((Anorm + Bnorm + Cinitnorm) * N * eps); printf("============\n"); printf("Checking the norm of the difference against reference ZGEMM \n"); printf("-- ||Cplasma - Clapack||_oo/((||A||_oo+||B||_oo+||C||_oo).N.eps) = %e \n", result); if ( isnan(Rnorm) || isinf(Rnorm) || isnan(result) || isinf(result) || (result > 10.0) ) { printf("-- The solution is suspicious ! \n"); info_solution = 1; } else { printf("-- The solution is CORRECT ! \n"); info_solution= 0 ; } free(work); return info_solution; }
int CORE_zttrfb(int side, int trans, int direct, int storev, int M1, int N1, int M2, int N2, int K, PLASMA_Complex64_t *A1, int LDA1, PLASMA_Complex64_t *A2, int LDA2, PLASMA_Complex64_t *V, int LDV, PLASMA_Complex64_t *T, int LDT, PLASMA_Complex64_t *WORK, int LDWORK) { static PLASMA_Complex64_t zone = 1.0; static PLASMA_Complex64_t mzone = -1.0; int j, vi; /* Check input arguments */ if (M1 < 0) { coreblas_error(5, "Illegal value of M1"); return -5; } if (N1 < 0) { coreblas_error(6, "Illegal value of N1"); return -6; } if ((M2 < 0) || ( (side == PlasmaRight) && (M1 != M2) ) ) { coreblas_error(7, "Illegal value of M2"); return -7; } if ((N2 < 0) || ( (side == PlasmaLeft) && (N1 != N2) ) ) { coreblas_error(8, "Illegal value of N2"); return -8; } if (K < 0) { coreblas_error(9, "Illegal value of K"); return -9; } /* Quick return */ if ((M1 == 0) || (N1 == 0) || (M2 == 0) || (N2 == 0) || (K == 0)) return PLASMA_SUCCESS; if (storev == PlasmaColumnwise) { if (direct == PlasmaForward) { /* * Let V = ( V1 ) (first K rows) * ( V2 ) * where V2 is non-unit upper triangular */ if (side == PlasmaLeft) { /* * Colwise / Forward / Left * ------------------------- * * Form H * A or H' * A where A = ( A1 ) * ( A2 ) * where A2 = ( A2_1 ) * ( A2_2 ) */ /* * W = A1 + V' * A2 */ /* * W = A2_2 */ LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, lapack_const(PlasmaUpperLower), K, N2, &A2[M2-K], LDA2, WORK, LDWORK); /* * W = V2' * A2_2 */ cblas_ztrmm( CblasColMajor, CblasLeft, CblasUpper, CblasConjTrans, CblasNonUnit, K, N2, CBLAS_SADDR(zone), &V[M2-K], LDV, WORK, LDWORK); if (M2 > K) { /* * W = W + V1' * A2_1 */ cblas_zgemm( CblasColMajor, CblasConjTrans, CblasNoTrans, K, N2, M2-K, CBLAS_SADDR(zone), V, LDV, A2, LDA2, CBLAS_SADDR(zone), WORK, LDWORK); } /* * W = A1 + W */ for(j = 0; j < N1; j++) { cblas_zaxpy(K, CBLAS_SADDR(zone), &A1[LDA1*j], 1, &WORK[LDWORK*j], 1); } /* * A2 = A2 - V * T * W -> W = T * W, A2 = A2 - V * W */ /* * W = T * W */ cblas_ztrmm( CblasColMajor, CblasLeft, CblasUpper, (CBLAS_TRANSPOSE)trans, CblasNonUnit, K, N2, CBLAS_SADDR(zone), T, LDT, WORK, LDWORK); /* * A1 = A1 - W */ for(j = 0; j < N1; j++) { cblas_zaxpy(K, CBLAS_SADDR(mzone), &WORK[LDWORK*j], 1, &A1[LDA1*j], 1); } /* * A2_1 = A2_1 - V1 * W */ if (M2 > K) { cblas_zgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, M2-K, N2, K, CBLAS_SADDR(mzone), V, LDV, WORK, LDWORK, CBLAS_SADDR(zone), A2, LDA2); } /* * W = - V2 * W */ cblas_ztrmm( CblasColMajor, CblasLeft, CblasUpper, CblasNoTrans, CblasNonUnit, K, N2, CBLAS_SADDR(mzone), &V[M2-K], LDV, WORK, LDWORK); /* * A2_2 = A2_2 + W */ for(j = 0; j < N2; j++) { cblas_zaxpy( K, CBLAS_SADDR(zone), &WORK[LDWORK*j], 1, &A2[LDA2*j+(M2-K)], 1); } } else { /* * Colwise / Forward / Right * ------------------------- * * Form H * A or H' * A where: * * A = ( A1 A2 ) * * A2 = ( A2_1 : A2_2 ) * * A2_1 is M2 x (M2-K) * A2_2 is M2 x K * * V = ( V_1 ) * ( V_2 ) * * V_1 is full and (N2-K) x K * V_2 is upper triangular and K x K */ /* * W = ( A1 + A2_1*V_1 + A2_2*V_2 ) * op(T) * * W is M x K * A1 is M x K * A2 is M x N2 split as (A2_1 A2_2) such as * A2_1 is (N2-K) x K * A2_2 is M x K */ /* W = A2_2 */ LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, lapack_const(PlasmaUpperLower), M2, K, &A2[LDA2*(N2-K)], LDA2, WORK, LDWORK); /* W = W * V_2 --> W = A2_2 * V_2 */ cblas_ztrmm( CblasColMajor, CblasRight, CblasUpper, CblasNoTrans, CblasNonUnit, M2, K, CBLAS_SADDR(zone), &V[N2-K], LDV, WORK, LDWORK); /* W = W + A2_1 * V_1 */ if (N2 > K) { cblas_zgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, M2, K, N2-K, CBLAS_SADDR(zone), A2, LDA2, V, LDV, CBLAS_SADDR(zone), WORK, LDWORK); } /* W = A1 + W */ for (j = 0; j < K; j++) { cblas_zaxpy(M1, CBLAS_SADDR(zone), &A1[LDA1*j], 1, &WORK[LDWORK*j], 1); } /* W = W * T --> ( A1 + A2_1*V_1 + A2_2*V_2 ) * op(T) */ cblas_ztrmm( CblasColMajor, CblasRight, CblasUpper, (CBLAS_TRANSPOSE)trans, CblasNonUnit, M2, K, CBLAS_SADDR(zone), T, LDT, WORK, LDWORK); /* * A1 = A1 - W */ for(j = 0; j < K; j++) { cblas_zaxpy(M1, CBLAS_SADDR(mzone), &WORK[LDWORK*j], 1, &A1[LDA1*j], 1); } /* * A2 = A2 - W * V' --> A2 - W*V_1' - W*V_2' */ /* A2 = A2 - W * V_1' */ if (N2 > K) { cblas_zgemm( CblasColMajor, CblasNoTrans, CblasConjTrans, M2, N2-K, K, CBLAS_SADDR(mzone), WORK, LDWORK, V, LDV, CBLAS_SADDR(zone), A2, LDA2); } /* A2 = A2 - W * V_2' */ cblas_ztrmm( CblasColMajor, CblasRight, CblasUpper, CblasConjTrans, CblasNonUnit, M2, K, CBLAS_SADDR(mzone), &V[N2-K], LDV, WORK, LDWORK); for(j = 0; j < K; j++) { cblas_zaxpy( M2, CBLAS_SADDR(zone), &WORK[LDWORK*j], 1, &A2[LDA2*(j+N2-K)], 1); } } } else { coreblas_error(3, "Not implemented (ColWise / Backward / Left or Right)"); return PLASMA_ERR_NOT_SUPPORTED; } } else { /* * Rowwise */ if (direct == PlasmaForward) { /* * Let V = ( V1 V2 ) (V1: first K cols) * * where V2 is non-unit lower triangular */ if (side == PlasmaLeft) { /* * Rowwise / Forward / Left * ------------------------- * * Form H * A or H' * A where A = ( A1 ) * ( A2 ) * where A2 = ( A2_1 ) * ( A2_2 ) */ /* V_2 first element */ vi = LDV*(M2-K); /* * W = A1 + V * A2 */ /* * W = A2_2 */ LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, lapack_const(PlasmaUpperLower), K, N2, &A2[M2-K], LDA2, WORK, LDWORK); /* * W = V2 * A2_2 */ //**DB CblasColMajor, CblasLeft, CblasUpper, cblas_ztrmm( CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasNonUnit, K, N2, CBLAS_SADDR(zone), &V[vi], LDV, WORK, LDWORK); if (M2 > K) { /* * W = W + V1 * A2_1 */ cblas_zgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, K, N2, M2-K, CBLAS_SADDR(zone), V, LDV, A2, LDA2, CBLAS_SADDR(zone), WORK, LDWORK); } /* * W = A1 + W */ for(j = 0; j < N1; j++) { cblas_zaxpy( K, CBLAS_SADDR(zone), &A1[LDA1*j], 1, &WORK[LDWORK*j], 1); } /* * W = T * W */ cblas_ztrmm( CblasColMajor, CblasLeft, CblasUpper, (CBLAS_TRANSPOSE)trans, CblasNonUnit, K, N2, CBLAS_SADDR(zone), T, LDT, WORK, LDWORK); /* * A1 = A1 - W */ for(j = 0; j < N1; j++) { cblas_zaxpy( K, CBLAS_SADDR(mzone), &WORK[LDWORK*j], 1, &A1[LDA1*j], 1); } /* * A2 = A2 - V' * T * W -> A2 = A2 - V' * W */ /* * A2_1 = A2_1 - V1' * W */ if (M2 > K) { cblas_zgemm( CblasColMajor, CblasConjTrans, CblasNoTrans, M2-K, N2, K, CBLAS_SADDR(mzone), V, LDV, WORK, LDWORK, CBLAS_SADDR(zone), A2, LDA2); } /* * W = - V2' * W */ cblas_ztrmm( CblasColMajor, CblasLeft, CblasLower, CblasConjTrans, CblasNonUnit, K, N2, CBLAS_SADDR(mzone), &V[vi], LDV, WORK, LDWORK); /* * A2_2 = A2_2 + W */ for(j = 0; j < N2; j++) { cblas_zaxpy( K, CBLAS_SADDR(zone), &WORK[LDWORK*j], 1, &A2[LDA2*j+(M2-K)], 1); } } else { /* * Rowwise / Forward / Right * ------------------------- * * Form H * A or H' * A where: * * A = ( A1 A2 ) * * A2 = ( A2_1 : A2_2 ) * * A2_1 is M2 x (M2-K) * A2_2 is M2 x K * * V = ( V_1 ) * ( V_2 ) * * V_1 is full and (N2-K) x K * V_2 is lower triangular and K x K */ /* * W = ( A1 + A2_1*V_1 + A2_2*V_2 ) * op(T) * * W is M x K * A1 is M x K * A2 is M x N2 split as (A2_1 A2_2) such as * A2_1 is (N2-K) x K * A2_2 is M x K */ /* V_2 and A2_2 first element */ vi = LDV*(N2-K); /* W = A2_2 */ LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, lapack_const(PlasmaUpperLower), M2, K, &A2[LDA2*(N2-K)], LDA2, WORK, LDWORK); /* W = W * V_2' --> W = A2_2 * V_2' */ cblas_ztrmm( CblasColMajor, CblasRight, CblasLower, CblasConjTrans, CblasNonUnit, M2, K, CBLAS_SADDR(zone), &V[vi], LDV, WORK, LDWORK); /* W = W + A2_1 * V_1' */ if (N2 > K) { cblas_zgemm( CblasColMajor, CblasNoTrans, CblasConjTrans, M2, K, N2-K, CBLAS_SADDR(zone), A2, LDA2, V, LDV, CBLAS_SADDR(zone), WORK, LDWORK); } /* W = A1 + W */ for (j = 0; j < K; j++) { cblas_zaxpy(M1, CBLAS_SADDR(zone), &A1[LDA1*j], 1, &WORK[LDWORK*j], 1); } /* W = W * op(T) --> ( A1 + A2_1*V_1 + A2_2*V_2 ) * op(T) */ cblas_ztrmm( CblasColMajor, CblasRight, CblasUpper, (CBLAS_TRANSPOSE)trans, CblasNonUnit, M2, K, CBLAS_SADDR(zone), T, LDT, WORK, LDWORK); /* * A1 = A1 - W */ for(j = 0; j < K; j++) { cblas_zaxpy(M1, CBLAS_SADDR(mzone), &WORK[LDWORK*j], 1, &A1[LDA1*j], 1); } /* * A2 = A2 - W * V --> A2 - W*V_1 - W*V_2 */ /* A2 = A2 - W * V_1 */ if (N2 > K) { cblas_zgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, M2, N2-K, K, CBLAS_SADDR(mzone), WORK, LDWORK, V, LDV, CBLAS_SADDR(zone), A2, LDA2); } /* A2 = A2 - W * V_2 */ cblas_ztrmm( CblasColMajor, CblasRight, CblasLower, CblasNoTrans, CblasNonUnit, M2, K, CBLAS_SADDR(mzone), &V[vi], LDV, WORK, LDWORK); for(j = 0; j < K; j++) { cblas_zaxpy( M2, CBLAS_SADDR(zone), &WORK[LDWORK*j], 1, &A2[LDA2*(N2-K+j)], 1); } } } else { coreblas_error(3, "Not implemented (Rowwise / Backward / Left or Right)"); return PLASMA_ERR_NOT_SUPPORTED; } } return PLASMA_SUCCESS; }
DLLEXPORT void z_axpy(const blasint n, const openblas_complex_double alpha, const openblas_complex_double x[], openblas_complex_double y[]) { cblas_zaxpy(n, (double*)&alpha, (double*)x, 1, (double*)y, 1); }
void F77_zaxpy(const int *N, const void *alpha, void *X, const int *incX, void *Y, const int *incY) { cblas_zaxpy(*N, alpha, X, *incX, Y, *incY); return; }
void eblas_zaxpy_sub(size_t iStart, size_t iStop, const complex* a, const complex* x, int incx, complex* y, int incy) { cblas_zaxpy(iStop-iStart, a, x+incx*iStart, incx, y+incy*iStart, incy); }
int CORE_ztsqrt(int M, int N, int IB, PLASMA_Complex64_t *A1, int LDA1, PLASMA_Complex64_t *A2, int LDA2, PLASMA_Complex64_t *T, int LDT, PLASMA_Complex64_t *TAU, PLASMA_Complex64_t *WORK) { static PLASMA_Complex64_t zone = 1.0; static PLASMA_Complex64_t zzero = 0.0; PLASMA_Complex64_t alpha; int i, ii, sb; /* Check input arguments */ if (M < 0) { coreblas_error(1, "Illegal value of M"); return -1; } if (N < 0) { coreblas_error(2, "Illegal value of N"); return -2; } if (IB < 0) { coreblas_error(3, "Illegal value of IB"); return -3; } if ((LDA2 < max(1,M)) && (M > 0)) { coreblas_error(8, "Illegal value of LDA2"); return -8; } /* Quick return */ if ((M == 0) || (N == 0) || (IB == 0)) return PLASMA_SUCCESS; for(ii = 0; ii < N; ii += IB) { sb = min(N-ii, IB); for(i = 0; i < sb; i++) { /* * Generate elementary reflector H( II*IB+I ) to annihilate * A( II*IB+I:M, II*IB+I ) */ LAPACKE_zlarfg_work(M+1, &A1[LDA1*(ii+i)+ii+i], &A2[LDA2*(ii+i)], 1, &TAU[ii+i]); if (ii+i+1 < N) { /* * Apply H( II*IB+I ) to A( II*IB+I:M, II*IB+I+1:II*IB+IB ) from the left */ alpha = -conj(TAU[ii+i]); cblas_zcopy( sb-i-1, &A1[LDA1*(ii+i+1)+(ii+i)], LDA1, WORK, 1); #ifdef COMPLEX LAPACKE_zlacgv_work(sb-i-1, WORK, 1); #endif cblas_zgemv( CblasColMajor, (CBLAS_TRANSPOSE)PlasmaConjTrans, M, sb-i-1, CBLAS_SADDR(zone), &A2[LDA2*(ii+i+1)], LDA2, &A2[LDA2*(ii+i)], 1, CBLAS_SADDR(zone), WORK, 1); #ifdef COMPLEX LAPACKE_zlacgv_work(sb-i-1, WORK, 1 ); #endif cblas_zaxpy( sb-i-1, CBLAS_SADDR(alpha), WORK, 1, &A1[LDA1*(ii+i+1)+ii+i], LDA1); #ifdef COMPLEX LAPACKE_zlacgv_work(sb-i-1, WORK, 1 ); #endif cblas_zgerc( CblasColMajor, M, sb-i-1, CBLAS_SADDR(alpha), &A2[LDA2*(ii+i)], 1, WORK, 1, &A2[LDA2*(ii+i+1)], LDA2); } /* * Calculate T */ alpha = -TAU[ii+i]; cblas_zgemv( CblasColMajor, (CBLAS_TRANSPOSE)PlasmaConjTrans, M, i, CBLAS_SADDR(alpha), &A2[LDA2*ii], LDA2, &A2[LDA2*(ii+i)], 1, CBLAS_SADDR(zzero), &T[LDT*(ii+i)], 1); cblas_ztrmv( CblasColMajor, (CBLAS_UPLO)PlasmaUpper, (CBLAS_TRANSPOSE)PlasmaNoTrans, (CBLAS_DIAG)PlasmaNonUnit, i, &T[LDT*ii], LDT, &T[LDT*(ii+i)], 1); T[LDT*(ii+i)+i] = TAU[ii+i]; } if (N > ii+sb) { CORE_ztsmqr( PlasmaLeft, PlasmaConjTrans, sb, N-(ii+sb), M, N-(ii+sb), IB, IB, &A1[LDA1*(ii+sb)+ii], LDA1, &A2[LDA2*(ii+sb)], LDA2, &A2[LDA2*ii], LDA2, &T[LDT*ii], LDT, WORK, sb); } } return PLASMA_SUCCESS; }
int CORE_zttlqt(int M, int N, int IB, PLASMA_Complex64_t *A1, int LDA1, PLASMA_Complex64_t *A2, int LDA2, PLASMA_Complex64_t *T, int LDT, PLASMA_Complex64_t *TAU, PLASMA_Complex64_t *WORK) { static PLASMA_Complex64_t zone = 1.0; static PLASMA_Complex64_t zzero = 0.0; #ifdef COMPLEX static int ione = 1; #endif PLASMA_Complex64_t alpha; int i, j, l, ii, sb, mi, ni; /* Check input arguments */ if (M < 0) { coreblas_error(1, "Illegal value of M"); return -1; } if (N < 0) { coreblas_error(2, "Illegal value of N"); return -2; } if (IB < 0) { coreblas_error(3, "Illegal value of IB"); return -3; } if ((LDA2 < max(1,M)) && (M > 0)) { coreblas_error(7, "Illegal value of LDA2"); return -7; } /* Quick return */ if ((M == 0) || (N == 0) || (IB == 0)) return PLASMA_SUCCESS; /* TODO: Need to check why some cases require * this to not have uninitialized values */ CORE_zlaset( PlasmaUpperLower, IB, N, 0., 0., T, LDT); for(ii = 0; ii < M; ii += IB) { sb = min(M-ii, IB); for(i = 0; i < sb; i++) { j = ii + i; mi = sb-i-1; ni = min( j + 1, N); /* * Generate elementary reflector H( II*IB+I ) to annihilate A( II*IB+I, II*IB+I:M ). */ #ifdef COMPLEX LAPACKE_zlacgv_work(ni, &A2[j], LDA2); LAPACKE_zlacgv_work(ione, &A1[LDA1*j+j], LDA1); #endif LAPACKE_zlarfg_work(ni+1, &A1[LDA1*j+j], &A2[j], LDA2, &TAU[j]); if (mi > 0) { /* * Apply H( j-1 ) to A( j:II+IB-1, j-1:M ) from the right. */ cblas_zcopy( mi, &A1[LDA1*j+(j+1)], 1, WORK, 1); cblas_zgemv( CblasColMajor, (CBLAS_TRANSPOSE)PlasmaNoTrans, mi, ni, CBLAS_SADDR(zone), &A2[j+1], LDA2, &A2[j], LDA2, CBLAS_SADDR(zone), WORK, 1); alpha = -(TAU[j]); cblas_zaxpy( mi, CBLAS_SADDR(alpha), WORK, 1, &A1[LDA1*j+j+1], 1); cblas_zgerc( CblasColMajor, mi, ni, CBLAS_SADDR(alpha), WORK, 1, &A2[j], LDA2, &A2[j+1], LDA2); } /* * Calculate T. */ if (i > 0 ) { l = min(i, max(0, N-ii)); alpha = -(TAU[j]); CORE_zpemv( PlasmaNoTrans, PlasmaRowwise, i , min(j, N), l, alpha, &A2[ii], LDA2, &A2[j], LDA2, zzero, &T[LDT*j], 1, WORK); /* T(0:i-1, j) = T(0:i-1, ii:j-1) * T(0:i-1, j) */ cblas_ztrmv( CblasColMajor, (CBLAS_UPLO)PlasmaUpper, (CBLAS_TRANSPOSE)PlasmaNoTrans, (CBLAS_DIAG)PlasmaNonUnit, i, &T[LDT*ii], LDT, &T[LDT*j], 1); } #ifdef COMPLEX LAPACKE_zlacgv_work(ni, &A2[j], LDA2 ); LAPACKE_zlacgv_work(ione, &A1[LDA1*j+j], LDA1 ); #endif T[LDT*j+i] = TAU[j]; } /* Apply Q to the rest of the matrix to the right */ if (M > ii+sb) { mi = M-(ii+sb); ni = min(ii+sb, N); l = min(sb, max(0, ni-ii)); CORE_zparfb( PlasmaRight, PlasmaNoTrans, PlasmaForward, PlasmaRowwise, mi, IB, mi, ni, sb, l, &A1[LDA1*ii+ii+sb], LDA1, &A2[ii+sb], LDA2, &A2[ii], LDA2, &T[LDT*ii], LDT, WORK, M); } } return PLASMA_SUCCESS; }
void CBlasMath::add(Matrix *a, Matrix *b) { std::complex<double> alpha(1,0); cblas_zaxpy(a->getRows() * a->getCols(), &alpha, b->data(), 1, a->data(), 1); }