void CORE_caxpy(int M, int N, PLASMA_Complex32_t alpha, PLASMA_Complex32_t *A, int LDA, PLASMA_Complex32_t *B, int LDB) { int j; if (M == LDA) cblas_caxpy(M*N, CBLAS_SADDR(alpha), A, 1, B, 1); else { for (j = 0; j < N; j++) cblas_caxpy(M, CBLAS_SADDR(alpha), &A[j*LDA], 1, &B[j*LDA], 1); } }
static int check_solution(PLASMA_enum uplo, PLASMA_enum trans, int N, int K, PLASMA_Complex32_t alpha, PLASMA_Complex32_t *A, int LDA, PLASMA_Complex32_t *B, int LDB, float beta, PLASMA_Complex32_t *Cref, PLASMA_Complex32_t *Cplasma, int LDC) { int info_solution; float Anorm, Bnorm, Cinitnorm, Cplasmanorm, Clapacknorm, Rnorm, result; float eps; PLASMA_Complex32_t beta_const; float *work = (float *)malloc(max(N, K)* sizeof(float)); beta_const = -1.0; Anorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), (trans == PlasmaNoTrans) ? N : K, (trans == PlasmaNoTrans) ? K : N, A, LDA, work); Bnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), (trans == PlasmaNoTrans) ? N : K, (trans == PlasmaNoTrans) ? K : N, B, LDB, work); Cinitnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cref, LDC, work); Cplasmanorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cplasma, LDC, work); cblas_cher2k(CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, N, K, CBLAS_SADDR(alpha), A, LDA, B, LDB, (beta), Cref, LDC); Clapacknorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cref, LDC, work); cblas_caxpy(LDC*N, CBLAS_SADDR(beta_const), Cplasma, 1, Cref, 1); Rnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cref, LDC, work); eps = LAPACKE_slamch_work('e'); printf("Rnorm %e, Anorm %e, Cinitnorm %e, Cplasmanorm %e, Clapacknorm %e\n", Rnorm, Anorm, Cinitnorm, Cplasmanorm, Clapacknorm); result = Rnorm / ((Anorm + Bnorm + Cinitnorm) * N * eps); printf("============\n"); printf("Checking the norm of the difference against reference CHER2K \n"); printf("-- ||Cplasma - Clapack||_oo/((||A||_oo+||C||_oo).N.eps) = %e \n", result); if ( isnan(Rnorm) || isinf(Rnorm) || isnan(result) || isinf(result) || (result > 10.0) ) { printf("-- The solution is suspicious ! \n"); info_solution = 1; } else { printf("-- The solution is CORRECT ! \n"); info_solution= 0 ; } free(work); return info_solution; }
void CORE_caxpy_quark(Quark *quark) { int M; int N; PLASMA_Complex32_t alpha; PLASMA_Complex32_t *A; int LDA; PLASMA_Complex32_t *B; int LDB; int j; quark_unpack_args_7(quark, M, N, alpha, A, LDA, B, LDB); if (M == LDA) cblas_caxpy(M*N, CBLAS_SADDR(alpha), A, 1, B, 1); else { for (j = 0; j < N; j++) cblas_caxpy(M, CBLAS_SADDR(alpha), &A[j*LDA], 1, &B[j*LDA], 1); } }
JNIEXPORT void JNICALL Java_edu_berkeley_bid_CBLAS_caxpyxx (JNIEnv * env, jobject calling_obj, jint N, jfloatArray jA, jfloatArray jX, jint startX, jfloatArray jY, jint startY){ jfloat * X = (*env)->GetPrimitiveArrayCritical(env, jX, JNI_FALSE); jfloat * Y = (*env)->GetPrimitiveArrayCritical(env, jY, JNI_FALSE); jfloat * a = (*env)->GetPrimitiveArrayCritical(env, jA, JNI_FALSE); cblas_caxpy(N, a, X+startX, 1, Y+startY, 1); (*env)->ReleasePrimitiveArrayCritical(env, jA, a, 0); (*env)->ReleasePrimitiveArrayCritical(env, jY, Y, 0); (*env)->ReleasePrimitiveArrayCritical(env, jX, X, 0); }
void HostVector<std::complex<float> >::AddScale(const BaseVector<std::complex<float> > &x, const std::complex<float> alpha) { assert(&x != NULL); const HostVector<std::complex<float> > *cast_x = dynamic_cast<const HostVector<std::complex<float> >*> (&x); assert(cast_x != NULL); assert(this->size_ == cast_x->size_); cblas_caxpy(this->size_, &alpha, cast_x->vec_, 1, this->vec_, 1); }
VrArrayPtrCF32 BlasComplexSingle::vec_add(int ndims, VrArrayPtrCF32 X,VrArrayPtrCF32 Y, const float complex alpha, const int incX,const int incY ) { int N=1; VrArrayPtrCF32 Y1=vec_copy(VR_GET_NDIMS_CF32(Y),Y); for(int i=0;i<ndims;i++){ N*=VR_GET_DIMS_CF32(X)[i]; } const float alph[] = {1,0}; cblas_caxpy(N,alph,(float*)VR_GET_DATA_CF32(X),incX,(float*)VR_GET_DATA_CF32(Y1),incY); return Y1; }
void bl1_caxpy( int n, scomplex* alpha, scomplex* x, int incx, scomplex* y, int incy ) { #ifdef BLIS1_ENABLE_CBLAS_INTERFACES cblas_caxpy( n, alpha, x, incx, y, incy ); #else F77_caxpy( &n, alpha, x, &incx, y, &incy ); #endif }
static int check_solution(PLASMA_enum side, PLASMA_enum uplo, int M, int N, PLASMA_Complex32_t alpha, PLASMA_Complex32_t *A, int LDA, PLASMA_Complex32_t *B, int LDB, PLASMA_Complex32_t beta, PLASMA_Complex32_t *Cref, PLASMA_Complex32_t *Cplasma, int LDC) { int info_solution, NrowA; float Anorm, Bnorm, Cinitnorm, Cplasmanorm, Clapacknorm, Rnorm; float eps; PLASMA_Complex32_t beta_const; float result; float *work = (float *)malloc(max(M, N)* sizeof(float)); beta_const = (PLASMA_Complex32_t)-1.0; NrowA = (side == PlasmaLeft) ? M : N; Anorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), NrowA, NrowA, A, LDA, work); Bnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, B, LDB, work); Cinitnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cref, LDC, work); Cplasmanorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cplasma, LDC, work); cblas_csymm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, M, N, CBLAS_SADDR(alpha), A, LDA, B, LDB, CBLAS_SADDR(beta), Cref, LDC); Clapacknorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cref, LDC, work); cblas_caxpy(LDC * N, CBLAS_SADDR(beta_const), Cplasma, 1, Cref, 1); Rnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cref, LDC, work); eps = LAPACKE_slamch_work('e'); printf("Rnorm %e, Anorm %e, Bnorm %e, Cinitnorm %e, Cplasmanorm %e, Clapacknorm %e\n",Rnorm,Anorm,Bnorm,Cinitnorm,Cplasmanorm,Clapacknorm); result = Rnorm / ((Anorm + Bnorm + Cinitnorm) * N * eps); printf("============\n"); printf("Checking the norm of the difference against reference CSYMM \n"); printf("-- ||Cplasma - Clapack||_oo/((||A||_oo+||B||_oo+||C||_oo).N.eps) = %e \n", result ); if ( isinf(Clapacknorm) || isinf(Cplasmanorm) || isnan(result) || isinf(result) || (result > 10.0) ) { printf("-- The solution is suspicious ! \n"); info_solution = 1; } else { printf("-- The solution is CORRECT ! \n"); info_solution= 0 ; } free(work); return info_solution; }
void phi_axpy(const int N, const Complex *alpha, const Complex *X, const int incX, Complex *Y, const int incY){ #ifndef NOBLAS #ifdef SINGLEPRECISION cblas_caxpy(N,alpha,X,1,Y,1); #else cblas_zaxpy(N,alpha,X,1,Y,1); #endif #else int i; for(i = 0; i < N; ++i){ Y[i] = (*alpha)*X[i]+Y[i]; } #endif }
// // Overloaded function for dispatching to // * CBLAS backend, and // * complex<float> value-type. // inline void axpy( const int n, const std::complex<float> a, const std::complex<float>* x, const int incx, std::complex<float>* y, const int incy ) { cblas_caxpy( n, &a, x, incx, y, incy ); }
int CORE_ctsqrt(int M, int N, int IB, PLASMA_Complex32_t *A1, int LDA1, PLASMA_Complex32_t *A2, int LDA2, PLASMA_Complex32_t *T, int LDT, PLASMA_Complex32_t *TAU, PLASMA_Complex32_t *WORK) { static PLASMA_Complex32_t zone = 1.0; static PLASMA_Complex32_t zzero = 0.0; PLASMA_Complex32_t alpha; int i, ii, sb; /* Check input arguments */ if (M < 0) { coreblas_error(1, "Illegal value of M"); return -1; } if (N < 0) { coreblas_error(2, "Illegal value of N"); return -2; } if (IB < 0) { coreblas_error(3, "Illegal value of IB"); return -3; } if ((LDA2 < max(1,M)) && (M > 0)) { coreblas_error(8, "Illegal value of LDA2"); return -8; } /* Quick return */ if ((M == 0) || (N == 0) || (IB == 0)) return PLASMA_SUCCESS; for(ii = 0; ii < N; ii += IB) { sb = min(N-ii, IB); for(i = 0; i < sb; i++) { /* * Generate elementary reflector H( II*IB+I ) to annihilate * A( II*IB+I:M, II*IB+I ) */ LAPACKE_clarfg_work(M+1, &A1[LDA1*(ii+i)+ii+i], &A2[LDA2*(ii+i)], 1, &TAU[ii+i]); if (ii+i+1 < N) { /* * Apply H( II*IB+I ) to A( II*IB+I:M, II*IB+I+1:II*IB+IB ) from the left */ alpha = -conjf(TAU[ii+i]); cblas_ccopy( sb-i-1, &A1[LDA1*(ii+i+1)+(ii+i)], LDA1, WORK, 1); #ifdef COMPLEX LAPACKE_clacgv_work(sb-i-1, WORK, 1); #endif cblas_cgemv( CblasColMajor, (CBLAS_TRANSPOSE)PlasmaConjTrans, M, sb-i-1, CBLAS_SADDR(zone), &A2[LDA2*(ii+i+1)], LDA2, &A2[LDA2*(ii+i)], 1, CBLAS_SADDR(zone), WORK, 1); #ifdef COMPLEX LAPACKE_clacgv_work(sb-i-1, WORK, 1 ); #endif cblas_caxpy( sb-i-1, CBLAS_SADDR(alpha), WORK, 1, &A1[LDA1*(ii+i+1)+ii+i], LDA1); #ifdef COMPLEX LAPACKE_clacgv_work(sb-i-1, WORK, 1 ); #endif cblas_cgerc( CblasColMajor, M, sb-i-1, CBLAS_SADDR(alpha), &A2[LDA2*(ii+i)], 1, WORK, 1, &A2[LDA2*(ii+i+1)], LDA2); } /* * Calculate T */ alpha = -TAU[ii+i]; cblas_cgemv( CblasColMajor, (CBLAS_TRANSPOSE)PlasmaConjTrans, M, i, CBLAS_SADDR(alpha), &A2[LDA2*ii], LDA2, &A2[LDA2*(ii+i)], 1, CBLAS_SADDR(zzero), &T[LDT*(ii+i)], 1); cblas_ctrmv( CblasColMajor, (CBLAS_UPLO)PlasmaUpper, (CBLAS_TRANSPOSE)PlasmaNoTrans, (CBLAS_DIAG)PlasmaNonUnit, i, &T[LDT*ii], LDT, &T[LDT*(ii+i)], 1); T[LDT*(ii+i)+i] = TAU[ii+i]; } if (N > ii+sb) { CORE_ctsmqr( PlasmaLeft, PlasmaConjTrans, sb, N-(ii+sb), M, N-(ii+sb), IB, IB, &A1[LDA1*(ii+sb)+ii], LDA1, &A2[LDA2*(ii+sb)], LDA2, &A2[LDA2*ii], LDA2, &T[LDT*ii], LDT, WORK, sb); } } return PLASMA_SUCCESS; }
void F77_caxpy(const int *N, const void *alpha, void *X, const int *incX, void *Y, const int *incY) { cblas_caxpy(*N, alpha, X, *incX, Y, *incY); return; }
static PyObject * dotblas_matrixproduct(PyObject *dummy, PyObject *args) { PyObject *op1, *op2; PyArrayObject *ap1=NULL, *ap2=NULL, *ret=NULL; int j, l, lda, ldb, ldc; int typenum, nd; intp ap1stride=0; intp dimensions[MAX_DIMS]; intp numbytes; static const float oneF[2] = {1.0, 0.0}; static const float zeroF[2] = {0.0, 0.0}; static const double oneD[2] = {1.0, 0.0}; static const double zeroD[2] = {0.0, 0.0}; double prior1, prior2; PyTypeObject *subtype; PyArray_Descr *dtype; MatrixShape ap1shape, ap2shape; if (!PyArg_ParseTuple(args, "OO", &op1, &op2)) return NULL; /* * "Matrix product" using the BLAS. * Only works for float double and complex types. */ typenum = PyArray_ObjectType(op1, 0); typenum = PyArray_ObjectType(op2, typenum); /* This function doesn't handle other types */ if ((typenum != PyArray_DOUBLE && typenum != PyArray_CDOUBLE && typenum != PyArray_FLOAT && typenum != PyArray_CFLOAT)) { return PyArray_Return((PyArrayObject *)PyArray_MatrixProduct(op1, op2)); } dtype = PyArray_DescrFromType(typenum); ap1 = (PyArrayObject *)PyArray_FromAny(op1, dtype, 0, 0, ALIGNED, NULL); if (ap1 == NULL) return NULL; Py_INCREF(dtype); ap2 = (PyArrayObject *)PyArray_FromAny(op2, dtype, 0, 0, ALIGNED, NULL); if (ap2 == NULL) goto fail; if ((ap1->nd > 2) || (ap2->nd > 2)) { /* This function doesn't handle dimensions greater than 2 (or negative striding) -- other than to ensure the dot function is altered */ if (!altered) { /* need to alter dot product */ PyObject *tmp1, *tmp2; tmp1 = PyTuple_New(0); tmp2 = dotblas_alterdot(NULL, tmp1); Py_DECREF(tmp1); Py_DECREF(tmp2); } ret = (PyArrayObject *)PyArray_MatrixProduct((PyObject *)ap1, (PyObject *)ap2); Py_DECREF(ap1); Py_DECREF(ap2); return PyArray_Return(ret); } if (_bad_strides(ap1)) { op1 = PyArray_NewCopy(ap1, PyArray_ANYORDER); Py_DECREF(ap1); ap1 = (PyArrayObject *)op1; if (ap1 == NULL) goto fail; } if (_bad_strides(ap2)) { op2 = PyArray_NewCopy(ap2, PyArray_ANYORDER); Py_DECREF(ap2); ap2 = (PyArrayObject *)op2; if (ap2 == NULL) goto fail; } ap1shape = _select_matrix_shape(ap1); ap2shape = _select_matrix_shape(ap2); if (ap1shape == _scalar || ap2shape == _scalar) { PyArrayObject *oap1, *oap2; oap1 = ap1; oap2 = ap2; /* One of ap1 or ap2 is a scalar */ if (ap1shape == _scalar) { /* Make ap2 the scalar */ PyArrayObject *t = ap1; ap1 = ap2; ap2 = t; ap1shape = ap2shape; ap2shape = _scalar; } if (ap1shape == _row) ap1stride = ap1->strides[1]; else if (ap1->nd > 0) ap1stride = ap1->strides[0]; if (ap1->nd == 0 || ap2->nd == 0) { intp *thisdims; if (ap1->nd == 0) { nd = ap2->nd; thisdims = ap2->dimensions; } else { nd = ap1->nd; thisdims = ap1->dimensions; } l = 1; for (j=0; j<nd; j++) { dimensions[j] = thisdims[j]; l *= dimensions[j]; } } else { l = oap1->dimensions[oap1->nd-1]; if (oap2->dimensions[0] != l) { PyErr_SetString(PyExc_ValueError, "matrices are not aligned"); goto fail; } nd = ap1->nd + ap2->nd - 2; /* nd = 0 or 1 or 2 */ /* If nd == 0 do nothing ... */ if (nd == 1) { /* Either ap1->nd is 1 dim or ap2->nd is 1 dim and the other is 2-dim */ dimensions[0] = (oap1->nd == 2) ? oap1->dimensions[0] : oap2->dimensions[1]; l = dimensions[0]; /* Fix it so that dot(shape=(N,1), shape=(1,)) and dot(shape=(1,), shape=(1,N)) both return an (N,) array (but use the fast scalar code) */ } else if (nd == 2) { dimensions[0] = oap1->dimensions[0]; dimensions[1] = oap2->dimensions[1]; /* We need to make sure that dot(shape=(1,1), shape=(1,N)) and dot(shape=(N,1),shape=(1,1)) uses scalar multiplication appropriately */ if (ap1shape == _row) l = dimensions[1]; else l = dimensions[0]; } } } else { /* (ap1->nd <= 2 && ap2->nd <= 2) */ /* Both ap1 and ap2 are vectors or matrices */ l = ap1->dimensions[ap1->nd-1]; if (ap2->dimensions[0] != l) { PyErr_SetString(PyExc_ValueError, "matrices are not aligned"); goto fail; } nd = ap1->nd+ap2->nd-2; if (nd == 1) dimensions[0] = (ap1->nd == 2) ? ap1->dimensions[0] : ap2->dimensions[1]; else if (nd == 2) { dimensions[0] = ap1->dimensions[0]; dimensions[1] = ap2->dimensions[1]; } } /* Choose which subtype to return */ if (ap1->ob_type != ap2->ob_type) { prior2 = PyArray_GetPriority((PyObject *)ap2, 0.0); prior1 = PyArray_GetPriority((PyObject *)ap1, 0.0); subtype = (prior2 > prior1 ? ap2->ob_type : ap1->ob_type); } else { prior1 = prior2 = 0.0; subtype = ap1->ob_type; } ret = (PyArrayObject *)PyArray_New(subtype, nd, dimensions, typenum, NULL, NULL, 0, 0, (PyObject *) (prior2 > prior1 ? ap2 : ap1)); if (ret == NULL) goto fail; numbytes = PyArray_NBYTES(ret); memset(ret->data, 0, numbytes); if (numbytes==0 || l == 0) { Py_DECREF(ap1); Py_DECREF(ap2); return PyArray_Return(ret); } if (ap2shape == _scalar) { /* Multiplication by a scalar -- Level 1 BLAS */ /* if ap1shape is a matrix and we are not contiguous, then we can't just blast through the entire array using a single striding factor */ NPY_BEGIN_ALLOW_THREADS if (typenum == PyArray_DOUBLE) { if (l == 1) { *((double *)ret->data) = *((double *)ap2->data) * \ *((double *)ap1->data); } else if (ap1shape != _matrix) { cblas_daxpy(l, *((double *)ap2->data), (double *)ap1->data, ap1stride/sizeof(double), (double *)ret->data, 1); } else { int maxind, oind, i, a1s, rets; char *ptr, *rptr; double val; maxind = (ap1->dimensions[0] >= ap1->dimensions[1] ? 0 : 1); oind = 1-maxind; ptr = ap1->data; rptr = ret->data; l = ap1->dimensions[maxind]; val = *((double *)ap2->data); a1s = ap1->strides[maxind] / sizeof(double); rets = ret->strides[maxind] / sizeof(double); for (i=0; i < ap1->dimensions[oind]; i++) { cblas_daxpy(l, val, (double *)ptr, a1s, (double *)rptr, rets); ptr += ap1->strides[oind]; rptr += ret->strides[oind]; } } } else if (typenum == PyArray_CDOUBLE) { if (l == 1) { cdouble *ptr1, *ptr2, *res; ptr1 = (cdouble *)ap2->data; ptr2 = (cdouble *)ap1->data; res = (cdouble *)ret->data; res->real = ptr1->real * ptr2->real - ptr1->imag * ptr2->imag; res->imag = ptr1->real * ptr2->imag + ptr1->imag * ptr2->real; } else if (ap1shape != _matrix) { cblas_zaxpy(l, (double *)ap2->data, (double *)ap1->data, ap1stride/sizeof(cdouble), (double *)ret->data, 1); } else { int maxind, oind, i, a1s, rets; char *ptr, *rptr; double *pval; maxind = (ap1->dimensions[0] >= ap1->dimensions[1] ? 0 : 1); oind = 1-maxind; ptr = ap1->data; rptr = ret->data; l = ap1->dimensions[maxind]; pval = (double *)ap2->data; a1s = ap1->strides[maxind] / sizeof(cdouble); rets = ret->strides[maxind] / sizeof(cdouble); for (i=0; i < ap1->dimensions[oind]; i++) { cblas_zaxpy(l, pval, (double *)ptr, a1s, (double *)rptr, rets); ptr += ap1->strides[oind]; rptr += ret->strides[oind]; } } } else if (typenum == PyArray_FLOAT) { if (l == 1) { *((float *)ret->data) = *((float *)ap2->data) * \ *((float *)ap1->data); } else if (ap1shape != _matrix) { cblas_saxpy(l, *((float *)ap2->data), (float *)ap1->data, ap1stride/sizeof(float), (float *)ret->data, 1); } else { int maxind, oind, i, a1s, rets; char *ptr, *rptr; float val; maxind = (ap1->dimensions[0] >= ap1->dimensions[1] ? 0 : 1); oind = 1-maxind; ptr = ap1->data; rptr = ret->data; l = ap1->dimensions[maxind]; val = *((float *)ap2->data); a1s = ap1->strides[maxind] / sizeof(float); rets = ret->strides[maxind] / sizeof(float); for (i=0; i < ap1->dimensions[oind]; i++) { cblas_saxpy(l, val, (float *)ptr, a1s, (float *)rptr, rets); ptr += ap1->strides[oind]; rptr += ret->strides[oind]; } } } else if (typenum == PyArray_CFLOAT) { if (l == 1) { cfloat *ptr1, *ptr2, *res; ptr1 = (cfloat *)ap2->data; ptr2 = (cfloat *)ap1->data; res = (cfloat *)ret->data; res->real = ptr1->real * ptr2->real - ptr1->imag * ptr2->imag; res->imag = ptr1->real * ptr2->imag + ptr1->imag * ptr2->real; } else if (ap1shape != _matrix) { cblas_caxpy(l, (float *)ap2->data, (float *)ap1->data, ap1stride/sizeof(cfloat), (float *)ret->data, 1); } else { int maxind, oind, i, a1s, rets; char *ptr, *rptr; float *pval; maxind = (ap1->dimensions[0] >= ap1->dimensions[1] ? 0 : 1); oind = 1-maxind; ptr = ap1->data; rptr = ret->data; l = ap1->dimensions[maxind]; pval = (float *)ap2->data; a1s = ap1->strides[maxind] / sizeof(cfloat); rets = ret->strides[maxind] / sizeof(cfloat); for (i=0; i < ap1->dimensions[oind]; i++) { cblas_caxpy(l, pval, (float *)ptr, a1s, (float *)rptr, rets); ptr += ap1->strides[oind]; rptr += ret->strides[oind]; } } } NPY_END_ALLOW_THREADS }
int CORE_cttqrt(int M, int N, int IB, PLASMA_Complex32_t *A1, int LDA1, PLASMA_Complex32_t *A2, int LDA2, PLASMA_Complex32_t *T, int LDT, PLASMA_Complex32_t *TAU, PLASMA_Complex32_t *WORK) { static PLASMA_Complex32_t zone = 1.0; static PLASMA_Complex32_t zzero = 0.0; static int ione = 1; PLASMA_Complex32_t alpha; int i, j, ii, sb, mi, ni; /* Check input arguments */ if (M < 0) { coreblas_error(1, "Illegal value of M"); return -1; } if (N < 0) { coreblas_error(2, "Illegal value of N"); return -2; } if (IB < 0) { coreblas_error(3, "Illegal value of IB"); return -3; } if ((LDA2 < max(1,M)) && (M > 0)) { coreblas_error(7, "Illegal value of LDA2"); return -7; } /* Quick return */ if ((M == 0) || (N == 0) || (IB == 0)) return PLASMA_SUCCESS; for(ii = 0; ii < N; ii += IB) { sb = min(N-ii, IB); for(i = 0; i < sb; i++) { /* * Generate elementary reflector H( II*IB+I ) to annihilate * A( II*IB+I:mi, II*IB+I ). */ mi = ii + i + 1; LAPACKE_clarfg_work(mi+1, &A1[LDA1*(ii+i)+ii+i], &A2[LDA2*(ii+i)], ione, &TAU[ii+i]); if (sb-i-1>0) { /* * Apply H( II*IB+I ) to A( II*IB+I:M, II*IB+I+1:II*IB+IB ) from the left. */ ni = sb-i-1; cblas_ccopy( ni, &A1[LDA1*(ii+i+1)+(ii+i)], LDA1, WORK, 1); #ifdef COMPLEX LAPACKE_clacgv_work(ni, WORK, ione); #endif cblas_cgemv( CblasColMajor, (CBLAS_TRANSPOSE)PlasmaConjTrans, mi, ni, CBLAS_SADDR(zone), &A2[LDA2*(ii+i+1)], LDA2, &A2[LDA2*(ii+i)], 1, CBLAS_SADDR(zone), WORK, 1); #ifdef COMPLEX LAPACKE_clacgv_work(ni, WORK, ione); #endif alpha = -conjf(TAU[ii+i]); cblas_caxpy( ni, CBLAS_SADDR(alpha), WORK, 1, &A1[LDA1*(ii+i+1)+ii+i], LDA1); #ifdef COMPLEX LAPACKE_clacgv_work(ni, WORK, ione); #endif cblas_cgerc( CblasColMajor, mi, ni, CBLAS_SADDR(alpha), &A2[LDA2*(ii+i)], 1, WORK, 1, &A2[LDA2*(ii+i+1)], LDA2); } /* * Calculate T. */ if (i > 0 ) { cblas_ccopy(i, &A2[LDA2*(ii+i)+ii], 1, &WORK[ii], 1); cblas_ctrmv( CblasColMajor, (CBLAS_UPLO)PlasmaUpper, (CBLAS_TRANSPOSE)PlasmaConjTrans, (CBLAS_DIAG)PlasmaNonUnit, i, &A2[LDA2*ii+ii], LDA2, &WORK[ii], 1); alpha = -(TAU[ii+i]); for(j = 0; j < i; j++) { WORK[ii+j] = alpha * WORK[ii+j]; } if (ii > 0) { cblas_cgemv( CblasColMajor, (CBLAS_TRANSPOSE)PlasmaConjTrans, ii, i, CBLAS_SADDR(alpha), &A2[LDA2*ii], LDA2, &A2[LDA2*(ii+i)], 1, CBLAS_SADDR(zzero), WORK, 1); cblas_caxpy(i, CBLAS_SADDR(zone), &WORK[ii], 1, WORK, 1); } cblas_ccopy(i, WORK, 1, &T[LDT*(ii+i)], 1); cblas_ctrmv( CblasColMajor, (CBLAS_UPLO)PlasmaUpper, (CBLAS_TRANSPOSE)PlasmaNoTrans, (CBLAS_DIAG)PlasmaNonUnit, i, &T[LDT*ii], LDT, &T[LDT*(ii+i)], 1); } T[LDT*(ii+i)+i] = TAU[ii+i]; } /* Apply Q' to the rest of the matrix to the left */ if (N > ii+sb) { CORE_cttrfb( PlasmaLeft, PlasmaConjTrans, PlasmaForward, PlasmaColumnwise, sb, N-(ii+sb), ii+sb, N-(ii+sb), sb, &A1[LDA1*(ii+sb)+ii], LDA1, &A2[LDA2*(ii+sb)], LDA2, &A2[LDA2*ii], LDA2, &T[LDT*ii], LDT, WORK, sb); } } return PLASMA_SUCCESS; }
DLLEXPORT void c_axpy(const blasint n, const openblas_complex_float alpha, const openblas_complex_float x[], openblas_complex_float y[]) { cblas_caxpy(n, (float*)&alpha, (float*)x, 1, (float*)y, 1); }