Ejemplo n.º 1
0
void CORE_caxpy(int M, int N,  PLASMA_Complex32_t alpha,
                PLASMA_Complex32_t *A, int LDA,
                PLASMA_Complex32_t *B, int LDB)
{
    int j;

    if (M == LDA)
        cblas_caxpy(M*N, CBLAS_SADDR(alpha), A, 1, B, 1);
    else {
        for (j = 0; j < N; j++)
            cblas_caxpy(M, CBLAS_SADDR(alpha), &A[j*LDA], 1, &B[j*LDA], 1);
    }
}
Ejemplo n.º 2
0
static int check_solution(PLASMA_enum uplo, PLASMA_enum trans, int N, int K,
                          PLASMA_Complex32_t alpha, PLASMA_Complex32_t *A, int LDA,
                          PLASMA_Complex32_t *B, int LDB,
                          float beta, PLASMA_Complex32_t *Cref, PLASMA_Complex32_t *Cplasma, int LDC)
{
    int info_solution;
    float Anorm, Bnorm, Cinitnorm, Cplasmanorm, Clapacknorm, Rnorm, result;
    float eps;
    PLASMA_Complex32_t beta_const;

    float *work = (float *)malloc(max(N, K)* sizeof(float));

    beta_const  = -1.0;
    Anorm       = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), 
                                      (trans == PlasmaNoTrans) ? N : K, 
                                      (trans == PlasmaNoTrans) ? K : N, A, LDA, work);
    Bnorm       = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), 
                                      (trans == PlasmaNoTrans) ? N : K, 
                                      (trans == PlasmaNoTrans) ? K : N, B, LDB, work);
    Cinitnorm   = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cref,    LDC, work);
    Cplasmanorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cplasma, LDC, work);

    cblas_cher2k(CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, 
                 N, K, CBLAS_SADDR(alpha), A, LDA, B, LDB, (beta), Cref, LDC);

    Clapacknorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cref, LDC, work);

    cblas_caxpy(LDC*N, CBLAS_SADDR(beta_const), Cplasma, 1, Cref, 1);

    Rnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cref, LDC, work);

    eps = LAPACKE_slamch_work('e');
    
    printf("Rnorm %e, Anorm %e, Cinitnorm %e, Cplasmanorm %e, Clapacknorm %e\n",
           Rnorm, Anorm, Cinitnorm, Cplasmanorm, Clapacknorm);

    result = Rnorm / ((Anorm + Bnorm + Cinitnorm) * N * eps);
    printf("============\n");
    printf("Checking the norm of the difference against reference CHER2K \n");
    printf("-- ||Cplasma - Clapack||_oo/((||A||_oo+||C||_oo).N.eps) = %e \n", result);

    if (  isnan(Rnorm) || isinf(Rnorm) || isnan(result) || isinf(result) || (result > 10.0) ) {
         printf("-- The solution is suspicious ! \n");
         info_solution = 1;
    }
    else {
         printf("-- The solution is CORRECT ! \n");
         info_solution= 0 ;
    }

    free(work);

    return info_solution;
}
Ejemplo n.º 3
0
void CORE_caxpy_quark(Quark *quark)
{
    int M;
    int N;
    PLASMA_Complex32_t alpha;
    PLASMA_Complex32_t *A;
    int LDA;
    PLASMA_Complex32_t *B;
    int LDB;

    int j;

    quark_unpack_args_7(quark, M, N, alpha, A, LDA, B, LDB);
    if (M == LDA)
        cblas_caxpy(M*N, CBLAS_SADDR(alpha), A, 1, B, 1);
    else {
        for (j = 0; j < N; j++)
            cblas_caxpy(M, CBLAS_SADDR(alpha), &A[j*LDA], 1, &B[j*LDA], 1);
    }
}
Ejemplo n.º 4
0
JNIEXPORT void JNICALL Java_edu_berkeley_bid_CBLAS_caxpyxx
(JNIEnv * env, jobject calling_obj, jint N, jfloatArray jA, jfloatArray jX, jint startX, jfloatArray jY, jint startY){
	jfloat * X = (*env)->GetPrimitiveArrayCritical(env, jX, JNI_FALSE);
	jfloat * Y = (*env)->GetPrimitiveArrayCritical(env, jY, JNI_FALSE);
	jfloat * a = (*env)->GetPrimitiveArrayCritical(env, jA, JNI_FALSE);

    cblas_caxpy(N, a, X+startX, 1, Y+startY, 1);

	(*env)->ReleasePrimitiveArrayCritical(env, jA, a, 0);
	(*env)->ReleasePrimitiveArrayCritical(env, jY, Y, 0);
	(*env)->ReleasePrimitiveArrayCritical(env, jX, X, 0);
}
Ejemplo n.º 5
0
void HostVector<std::complex<float> >::AddScale(const BaseVector<std::complex<float> > &x, const std::complex<float> alpha) {

  assert(&x != NULL);

  const HostVector<std::complex<float> > *cast_x = dynamic_cast<const HostVector<std::complex<float> >*> (&x);

  assert(cast_x != NULL);
  assert(this->size_ == cast_x->size_);

  cblas_caxpy(this->size_, &alpha, cast_x->vec_, 1, this->vec_, 1);

}
Ejemplo n.º 6
0
VrArrayPtrCF32 BlasComplexSingle::vec_add(int ndims, VrArrayPtrCF32 X,VrArrayPtrCF32 Y, const float complex alpha, const int incX,const int incY ) {
        int N=1;
	VrArrayPtrCF32 Y1=vec_copy(VR_GET_NDIMS_CF32(Y),Y);
        for(int i=0;i<ndims;i++){
                N*=VR_GET_DIMS_CF32(X)[i];
        }
		const float alph[] = {1,0};
        cblas_caxpy(N,alph,(float*)VR_GET_DATA_CF32(X),incX,(float*)VR_GET_DATA_CF32(Y1),incY);
	return Y1;


}
Ejemplo n.º 7
0
void bl1_caxpy( int n, scomplex* alpha, scomplex* x, int incx, scomplex* y, int incy )
{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
	cblas_caxpy( n,
	             alpha,
	             x, incx,
	             y, incy );
#else
	F77_caxpy( &n,
	           alpha,
	           x, &incx,
	           y, &incy );
#endif
}
Ejemplo n.º 8
0
static int check_solution(PLASMA_enum side, PLASMA_enum uplo, int M, int N,
                   PLASMA_Complex32_t alpha, PLASMA_Complex32_t *A, int LDA,
                   PLASMA_Complex32_t *B, int LDB,
                   PLASMA_Complex32_t beta, PLASMA_Complex32_t *Cref, PLASMA_Complex32_t *Cplasma, int LDC)
{
    int info_solution, NrowA;
    float Anorm, Bnorm, Cinitnorm, Cplasmanorm, Clapacknorm, Rnorm;
    float eps;
    PLASMA_Complex32_t beta_const;
    float result;
    float *work = (float *)malloc(max(M, N)* sizeof(float));

    beta_const  = (PLASMA_Complex32_t)-1.0;

    NrowA = (side == PlasmaLeft) ? M : N;
    Anorm       = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), NrowA, NrowA, A,       LDA, work);
    Bnorm       = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M,     N,     B,       LDB, work);
    Cinitnorm   = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M,     N,     Cref,    LDC, work);
    Cplasmanorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M,     N,     Cplasma, LDC, work);

    cblas_csymm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, M, N, CBLAS_SADDR(alpha), A, LDA, B, LDB, CBLAS_SADDR(beta), Cref, LDC);

    Clapacknorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cref, LDC, work);

    cblas_caxpy(LDC * N, CBLAS_SADDR(beta_const), Cplasma, 1, Cref, 1);

    Rnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cref, LDC, work);

    eps = LAPACKE_slamch_work('e');

    printf("Rnorm %e, Anorm %e, Bnorm %e, Cinitnorm %e, Cplasmanorm %e, Clapacknorm %e\n",Rnorm,Anorm,Bnorm,Cinitnorm,Cplasmanorm,Clapacknorm);

    result = Rnorm / ((Anorm + Bnorm + Cinitnorm) * N * eps);

    printf("============\n");
    printf("Checking the norm of the difference against reference CSYMM \n");
    printf("-- ||Cplasma - Clapack||_oo/((||A||_oo+||B||_oo+||C||_oo).N.eps) = %e \n", result );

    if ( isinf(Clapacknorm) || isinf(Cplasmanorm) || isnan(result) || isinf(result) || (result > 10.0) ) {
        printf("-- The solution is suspicious ! \n");
        info_solution = 1;
    }
    else {
        printf("-- The solution is CORRECT ! \n");
        info_solution= 0 ;
    }
    free(work);
    return info_solution;
}
Ejemplo n.º 9
0
void phi_axpy(const int N, const Complex *alpha, const Complex *X,
                 const int incX, Complex *Y, const int incY){
#ifndef NOBLAS
    #ifdef SINGLEPRECISION 
    cblas_caxpy(N,alpha,X,1,Y,1);
    #else
    cblas_zaxpy(N,alpha,X,1,Y,1);
    #endif
#else
    int i;
    for(i = 0; i < N; ++i){
        Y[i] = (*alpha)*X[i]+Y[i];
    }
#endif
}
Ejemplo n.º 10
0
//
// Overloaded function for dispatching to
// * CBLAS backend, and
// * complex<float> value-type.
//
inline void axpy( const int n, const std::complex<float> a,
        const std::complex<float>* x, const int incx, std::complex<float>* y,
        const int incy ) {
    cblas_caxpy( n, &a, x, incx, y, incy );
}
Ejemplo n.º 11
0
int CORE_ctsqrt(int M, int N, int IB,
                PLASMA_Complex32_t *A1, int LDA1,
                PLASMA_Complex32_t *A2, int LDA2,
                PLASMA_Complex32_t *T, int LDT,
                PLASMA_Complex32_t *TAU, PLASMA_Complex32_t *WORK)
{
    static PLASMA_Complex32_t zone  = 1.0;
    static PLASMA_Complex32_t zzero = 0.0;

    PLASMA_Complex32_t alpha;
    int i, ii, sb;

    /* Check input arguments */
    if (M < 0) {
        coreblas_error(1, "Illegal value of M");
        return -1;
    }
    if (N < 0) {
        coreblas_error(2, "Illegal value of N");
        return -2;
    }
    if (IB < 0) {
        coreblas_error(3, "Illegal value of IB");
        return -3;
    }
    if ((LDA2 < max(1,M)) && (M > 0)) {
        coreblas_error(8, "Illegal value of LDA2");
        return -8;
    }

    /* Quick return */
    if ((M == 0) || (N == 0) || (IB == 0))
        return PLASMA_SUCCESS;

    for(ii = 0; ii < N; ii += IB) {
        sb = min(N-ii, IB);
        for(i = 0; i < sb; i++) {
            /*
             * Generate elementary reflector H( II*IB+I ) to annihilate
             * A( II*IB+I:M, II*IB+I )
             */
            LAPACKE_clarfg_work(M+1, &A1[LDA1*(ii+i)+ii+i], &A2[LDA2*(ii+i)], 1, &TAU[ii+i]);

            if (ii+i+1 < N) {
                /*
                 * Apply H( II*IB+I ) to A( II*IB+I:M, II*IB+I+1:II*IB+IB ) from the left
                 */
                alpha = -conjf(TAU[ii+i]);
                cblas_ccopy(
                    sb-i-1,
                    &A1[LDA1*(ii+i+1)+(ii+i)], LDA1,
                    WORK, 1);
#ifdef COMPLEX
                LAPACKE_clacgv_work(sb-i-1, WORK, 1);
#endif
                cblas_cgemv(
                    CblasColMajor, (CBLAS_TRANSPOSE)PlasmaConjTrans,
                    M, sb-i-1,
                    CBLAS_SADDR(zone), &A2[LDA2*(ii+i+1)], LDA2,
                    &A2[LDA2*(ii+i)], 1,
                    CBLAS_SADDR(zone), WORK, 1);
#ifdef COMPLEX
                LAPACKE_clacgv_work(sb-i-1, WORK, 1 );
#endif
                cblas_caxpy(
                    sb-i-1, CBLAS_SADDR(alpha),
                    WORK, 1,
                    &A1[LDA1*(ii+i+1)+ii+i], LDA1);
#ifdef COMPLEX
                LAPACKE_clacgv_work(sb-i-1, WORK, 1 );
#endif
                cblas_cgerc(
                    CblasColMajor, M, sb-i-1, CBLAS_SADDR(alpha),
                    &A2[LDA2*(ii+i)], 1,
                    WORK, 1,
                    &A2[LDA2*(ii+i+1)], LDA2);
            }
            /*
             * Calculate T
             */
            alpha = -TAU[ii+i];
            cblas_cgemv(
                CblasColMajor, (CBLAS_TRANSPOSE)PlasmaConjTrans, M, i,
                CBLAS_SADDR(alpha), &A2[LDA2*ii], LDA2,
                &A2[LDA2*(ii+i)], 1,
                CBLAS_SADDR(zzero), &T[LDT*(ii+i)], 1);

            cblas_ctrmv(
                CblasColMajor, (CBLAS_UPLO)PlasmaUpper,
                (CBLAS_TRANSPOSE)PlasmaNoTrans, (CBLAS_DIAG)PlasmaNonUnit, i,
                &T[LDT*ii], LDT,
                &T[LDT*(ii+i)], 1);

            T[LDT*(ii+i)+i] = TAU[ii+i];
        }
        if (N > ii+sb) {
            CORE_ctsmqr(
                PlasmaLeft, PlasmaConjTrans,
                sb, N-(ii+sb), M, N-(ii+sb), IB, IB,
                &A1[LDA1*(ii+sb)+ii], LDA1,
                &A2[LDA2*(ii+sb)], LDA2,
                &A2[LDA2*ii], LDA2,
                &T[LDT*ii], LDT,
                WORK, sb);
        }
    }
    return PLASMA_SUCCESS;
}
Ejemplo n.º 12
0
void F77_caxpy(const int *N, const void *alpha, void *X,
                    const int *incX, void *Y, const int *incY)
{
   cblas_caxpy(*N, alpha, X, *incX, Y, *incY);
   return;
}
Ejemplo n.º 13
0
static PyObject *
dotblas_matrixproduct(PyObject *dummy, PyObject *args)
{
    PyObject *op1, *op2;
    PyArrayObject *ap1=NULL, *ap2=NULL, *ret=NULL;
    int j, l, lda, ldb, ldc;
    int typenum, nd;
    intp ap1stride=0;
    intp dimensions[MAX_DIMS];
    intp numbytes;
    static const float oneF[2] = {1.0, 0.0};
    static const float zeroF[2] = {0.0, 0.0};
    static const double oneD[2] = {1.0, 0.0};
    static const double zeroD[2] = {0.0, 0.0};
    double prior1, prior2;
    PyTypeObject *subtype;
    PyArray_Descr *dtype;
    MatrixShape ap1shape, ap2shape;

    if (!PyArg_ParseTuple(args, "OO", &op1, &op2)) return NULL;

    /*
     * "Matrix product" using the BLAS.
     * Only works for float double and complex types.
     */

    typenum = PyArray_ObjectType(op1, 0);
    typenum = PyArray_ObjectType(op2, typenum);

    /* This function doesn't handle other types */
    if ((typenum != PyArray_DOUBLE && typenum != PyArray_CDOUBLE &&
            typenum != PyArray_FLOAT && typenum != PyArray_CFLOAT)) {
        return PyArray_Return((PyArrayObject *)PyArray_MatrixProduct(op1, op2));
    }

    dtype = PyArray_DescrFromType(typenum);
    ap1 = (PyArrayObject *)PyArray_FromAny(op1, dtype, 0, 0, ALIGNED, NULL);
    if (ap1 == NULL) return NULL;
    Py_INCREF(dtype);
    ap2 = (PyArrayObject *)PyArray_FromAny(op2, dtype, 0, 0, ALIGNED, NULL);
    if (ap2 == NULL) goto fail;


    if ((ap1->nd > 2) || (ap2->nd > 2)) {
        /* This function doesn't handle dimensions greater than 2
           (or negative striding)  -- other
           than to ensure the dot function is altered
        */
        if (!altered) {
            /* need to alter dot product */
            PyObject *tmp1, *tmp2;
            tmp1 = PyTuple_New(0);
            tmp2 = dotblas_alterdot(NULL, tmp1);
            Py_DECREF(tmp1);
            Py_DECREF(tmp2);
        }
        ret = (PyArrayObject *)PyArray_MatrixProduct((PyObject *)ap1,
                (PyObject *)ap2);
        Py_DECREF(ap1);
        Py_DECREF(ap2);
        return PyArray_Return(ret);
    }

    if (_bad_strides(ap1)) {
        op1 = PyArray_NewCopy(ap1, PyArray_ANYORDER);
        Py_DECREF(ap1);
        ap1 = (PyArrayObject *)op1;
        if (ap1 == NULL) goto fail;
    }
    if (_bad_strides(ap2)) {
        op2 = PyArray_NewCopy(ap2, PyArray_ANYORDER);
        Py_DECREF(ap2);
        ap2 = (PyArrayObject *)op2;
        if (ap2 == NULL) goto fail;
    }
    ap1shape = _select_matrix_shape(ap1);
    ap2shape = _select_matrix_shape(ap2);

    if (ap1shape == _scalar || ap2shape == _scalar) {
        PyArrayObject *oap1, *oap2;
        oap1 = ap1;
        oap2 = ap2;
        /* One of ap1 or ap2 is a scalar */
        if (ap1shape == _scalar) { 		/* Make ap2 the scalar */
            PyArrayObject *t = ap1;
            ap1 = ap2;
            ap2 = t;
            ap1shape = ap2shape;
            ap2shape = _scalar;
        }

        if (ap1shape == _row) ap1stride = ap1->strides[1];
        else if (ap1->nd > 0) ap1stride = ap1->strides[0];

        if (ap1->nd == 0 || ap2->nd == 0) {
            intp *thisdims;
            if (ap1->nd == 0) {
                nd = ap2->nd;
                thisdims = ap2->dimensions;
            }
            else {
                nd = ap1->nd;
                thisdims = ap1->dimensions;
            }
            l = 1;
            for (j=0; j<nd; j++) {
                dimensions[j] = thisdims[j];
                l *= dimensions[j];
            }
        }
        else {
            l = oap1->dimensions[oap1->nd-1];

            if (oap2->dimensions[0] != l) {
                PyErr_SetString(PyExc_ValueError, "matrices are not aligned");
                goto fail;
            }
            nd = ap1->nd + ap2->nd - 2;
            /* nd = 0 or 1 or 2 */
            /* If nd == 0 do nothing ... */
            if (nd == 1) {
                /* Either ap1->nd is 1 dim or ap2->nd is 1 dim
                   and the other is 2-dim */
                dimensions[0] = (oap1->nd == 2) ? oap1->dimensions[0] : oap2->dimensions[1];
                l = dimensions[0];
                /* Fix it so that dot(shape=(N,1), shape=(1,))
                   and dot(shape=(1,), shape=(1,N)) both return
                   an (N,) array (but use the fast scalar code)
                */
            }
            else if (nd == 2) {
                dimensions[0] = oap1->dimensions[0];
                dimensions[1] = oap2->dimensions[1];
                /* We need to make sure that dot(shape=(1,1), shape=(1,N))
                   and dot(shape=(N,1),shape=(1,1)) uses
                   scalar multiplication appropriately
                */
                if (ap1shape == _row) l = dimensions[1];
                else l = dimensions[0];
            }
        }
    }
    else { /* (ap1->nd <= 2 && ap2->nd <= 2) */
        /*  Both ap1 and ap2 are vectors or matrices */
        l = ap1->dimensions[ap1->nd-1];

        if (ap2->dimensions[0] != l) {
            PyErr_SetString(PyExc_ValueError, "matrices are not aligned");
            goto fail;
        }
        nd = ap1->nd+ap2->nd-2;

        if (nd == 1)
            dimensions[0] = (ap1->nd == 2) ? ap1->dimensions[0] : ap2->dimensions[1];
        else if (nd == 2) {
            dimensions[0] = ap1->dimensions[0];
            dimensions[1] = ap2->dimensions[1];
        }
    }

    /* Choose which subtype to return */
    if (ap1->ob_type != ap2->ob_type) {
        prior2 = PyArray_GetPriority((PyObject *)ap2, 0.0);
        prior1 = PyArray_GetPriority((PyObject *)ap1, 0.0);
        subtype = (prior2 > prior1 ? ap2->ob_type : ap1->ob_type);
    }
    else {
        prior1 = prior2 = 0.0;
        subtype = ap1->ob_type;
    }

    ret = (PyArrayObject *)PyArray_New(subtype, nd, dimensions,
                                       typenum, NULL, NULL, 0, 0,
                                       (PyObject *)
                                       (prior2 > prior1 ? ap2 : ap1));

    if (ret == NULL) goto fail;
    numbytes = PyArray_NBYTES(ret);
    memset(ret->data, 0, numbytes);
    if (numbytes==0 || l == 0) {
        Py_DECREF(ap1);
        Py_DECREF(ap2);
        return PyArray_Return(ret);
    }


    if (ap2shape == _scalar) {
        /* Multiplication by a scalar -- Level 1 BLAS */
        /* if ap1shape is a matrix and we are not contiguous, then we can't
           just blast through the entire array using a single
           striding factor */
        NPY_BEGIN_ALLOW_THREADS

        if (typenum == PyArray_DOUBLE) {
            if (l == 1) {
                *((double *)ret->data) = *((double *)ap2->data) * \
                                         *((double *)ap1->data);
            }
            else if (ap1shape != _matrix) {
                cblas_daxpy(l, *((double *)ap2->data), (double *)ap1->data,
                            ap1stride/sizeof(double), (double *)ret->data, 1);
            }
            else {
                int maxind, oind, i, a1s, rets;
                char *ptr, *rptr;
                double val;
                maxind = (ap1->dimensions[0] >= ap1->dimensions[1] ? 0 : 1);
                oind = 1-maxind;
                ptr = ap1->data;
                rptr = ret->data;
                l = ap1->dimensions[maxind];
                val = *((double *)ap2->data);
                a1s = ap1->strides[maxind] / sizeof(double);
                rets = ret->strides[maxind] / sizeof(double);
                for (i=0; i < ap1->dimensions[oind]; i++) {
                    cblas_daxpy(l, val, (double *)ptr, a1s,
                                (double *)rptr, rets);
                    ptr += ap1->strides[oind];
                    rptr += ret->strides[oind];
                }
            }
        }
        else if (typenum == PyArray_CDOUBLE) {
            if (l == 1) {
                cdouble *ptr1, *ptr2, *res;
                ptr1 = (cdouble *)ap2->data;
                ptr2 = (cdouble *)ap1->data;
                res = (cdouble *)ret->data;
                res->real = ptr1->real * ptr2->real - ptr1->imag * ptr2->imag;
                res->imag = ptr1->real * ptr2->imag + ptr1->imag * ptr2->real;
            }
            else if (ap1shape != _matrix) {
                cblas_zaxpy(l, (double *)ap2->data, (double *)ap1->data,
                            ap1stride/sizeof(cdouble), (double *)ret->data, 1);
            }
            else {
                int maxind, oind, i, a1s, rets;
                char *ptr, *rptr;
                double *pval;
                maxind = (ap1->dimensions[0] >= ap1->dimensions[1] ? 0 : 1);
                oind = 1-maxind;
                ptr = ap1->data;
                rptr = ret->data;
                l = ap1->dimensions[maxind];
                pval = (double *)ap2->data;
                a1s = ap1->strides[maxind] / sizeof(cdouble);
                rets = ret->strides[maxind] / sizeof(cdouble);
                for (i=0; i < ap1->dimensions[oind]; i++) {
                    cblas_zaxpy(l, pval, (double *)ptr, a1s,
                                (double *)rptr, rets);
                    ptr += ap1->strides[oind];
                    rptr += ret->strides[oind];
                }
            }
        }
        else if (typenum == PyArray_FLOAT) {
            if (l == 1) {
                *((float *)ret->data) = *((float *)ap2->data) * \
                                        *((float *)ap1->data);
            }
            else if (ap1shape != _matrix) {
                cblas_saxpy(l, *((float *)ap2->data), (float *)ap1->data,
                            ap1stride/sizeof(float), (float *)ret->data, 1);
            }
            else {
                int maxind, oind, i, a1s, rets;
                char *ptr, *rptr;
                float val;
                maxind = (ap1->dimensions[0] >= ap1->dimensions[1] ? 0 : 1);
                oind = 1-maxind;
                ptr = ap1->data;
                rptr = ret->data;
                l = ap1->dimensions[maxind];
                val = *((float *)ap2->data);
                a1s = ap1->strides[maxind] / sizeof(float);
                rets = ret->strides[maxind] / sizeof(float);
                for (i=0; i < ap1->dimensions[oind]; i++) {
                    cblas_saxpy(l, val, (float *)ptr, a1s,
                                (float *)rptr, rets);
                    ptr += ap1->strides[oind];
                    rptr += ret->strides[oind];
                }
            }
        }
        else if (typenum == PyArray_CFLOAT) {
            if (l == 1) {
                cfloat *ptr1, *ptr2, *res;
                ptr1 = (cfloat *)ap2->data;
                ptr2 = (cfloat *)ap1->data;
                res = (cfloat *)ret->data;
                res->real = ptr1->real * ptr2->real - ptr1->imag * ptr2->imag;
                res->imag = ptr1->real * ptr2->imag + ptr1->imag * ptr2->real;
            }
            else if (ap1shape != _matrix) {
                cblas_caxpy(l, (float *)ap2->data, (float *)ap1->data,
                            ap1stride/sizeof(cfloat), (float *)ret->data, 1);
            }
            else {
                int maxind, oind, i, a1s, rets;
                char *ptr, *rptr;
                float *pval;
                maxind = (ap1->dimensions[0] >= ap1->dimensions[1] ? 0 : 1);
                oind = 1-maxind;
                ptr = ap1->data;
                rptr = ret->data;
                l = ap1->dimensions[maxind];
                pval = (float *)ap2->data;
                a1s = ap1->strides[maxind] / sizeof(cfloat);
                rets = ret->strides[maxind] / sizeof(cfloat);
                for (i=0; i < ap1->dimensions[oind]; i++) {
                    cblas_caxpy(l, pval, (float *)ptr, a1s,
                                (float *)rptr, rets);
                    ptr += ap1->strides[oind];
                    rptr += ret->strides[oind];
                }
            }
        }
        NPY_END_ALLOW_THREADS
    }
Ejemplo n.º 14
0
int CORE_cttqrt(int M, int N, int IB,
                PLASMA_Complex32_t *A1, int LDA1,
                PLASMA_Complex32_t *A2, int LDA2,
                PLASMA_Complex32_t *T, int LDT,
                PLASMA_Complex32_t *TAU, PLASMA_Complex32_t *WORK)
{
    static PLASMA_Complex32_t zone  = 1.0;
    static PLASMA_Complex32_t zzero = 0.0;
    static int                ione  = 1;

    PLASMA_Complex32_t alpha;
    int i, j, ii, sb, mi, ni;

    /* Check input arguments */
    if (M < 0) {
        coreblas_error(1, "Illegal value of M");
        return -1;
    }
    if (N < 0) {
        coreblas_error(2, "Illegal value of N");
        return -2;
    }
    if (IB < 0) {
        coreblas_error(3, "Illegal value of IB");
        return -3;
    }
    if ((LDA2 < max(1,M)) && (M > 0)) {
        coreblas_error(7, "Illegal value of LDA2");
        return -7;
    }

    /* Quick return */
    if ((M == 0) || (N == 0) || (IB == 0))
        return PLASMA_SUCCESS;

    for(ii = 0; ii < N; ii += IB) {
        sb = min(N-ii, IB);
        for(i = 0; i < sb; i++) {
            /*
             * Generate elementary reflector H( II*IB+I ) to annihilate
             * A( II*IB+I:mi, II*IB+I ).
             */
            mi = ii + i + 1;
            LAPACKE_clarfg_work(mi+1, &A1[LDA1*(ii+i)+ii+i], &A2[LDA2*(ii+i)], ione, &TAU[ii+i]);

            if (sb-i-1>0) {
                /*
                 * Apply H( II*IB+I ) to A( II*IB+I:M, II*IB+I+1:II*IB+IB ) from the left.
                 */
                ni = sb-i-1;
                cblas_ccopy(
                    ni,
                    &A1[LDA1*(ii+i+1)+(ii+i)], LDA1,
                    WORK, 1);

#ifdef COMPLEX
                LAPACKE_clacgv_work(ni, WORK, ione);
#endif
                cblas_cgemv(
                    CblasColMajor, (CBLAS_TRANSPOSE)PlasmaConjTrans,
                    mi, ni,
                    CBLAS_SADDR(zone), &A2[LDA2*(ii+i+1)], LDA2,
                    &A2[LDA2*(ii+i)], 1,
                    CBLAS_SADDR(zone), WORK, 1);
#ifdef COMPLEX
                LAPACKE_clacgv_work(ni, WORK, ione);
#endif
                alpha = -conjf(TAU[ii+i]);
                cblas_caxpy(
                    ni, CBLAS_SADDR(alpha),
                    WORK, 1,
                    &A1[LDA1*(ii+i+1)+ii+i], LDA1);
#ifdef COMPLEX
                LAPACKE_clacgv_work(ni, WORK, ione);
#endif
                cblas_cgerc(
                    CblasColMajor, mi, ni,
                    CBLAS_SADDR(alpha), &A2[LDA2*(ii+i)], 1,
                    WORK, 1,
                    &A2[LDA2*(ii+i+1)], LDA2);
            }

            /*
             * Calculate T.
             */

            if (i > 0 ) {

                cblas_ccopy(i, &A2[LDA2*(ii+i)+ii], 1, &WORK[ii], 1);
    
                cblas_ctrmv(
                    CblasColMajor, (CBLAS_UPLO)PlasmaUpper,
                    (CBLAS_TRANSPOSE)PlasmaConjTrans, (CBLAS_DIAG)PlasmaNonUnit,
                    i, &A2[LDA2*ii+ii], LDA2,
                    &WORK[ii], 1);
    
                alpha = -(TAU[ii+i]);
                for(j = 0; j < i; j++) {
                    WORK[ii+j] = alpha * WORK[ii+j];
                }
    
                if (ii > 0) {
                    cblas_cgemv(
                        CblasColMajor, (CBLAS_TRANSPOSE)PlasmaConjTrans, ii, i,
                        CBLAS_SADDR(alpha), &A2[LDA2*ii], LDA2,
                        &A2[LDA2*(ii+i)], 1,
                        CBLAS_SADDR(zzero), WORK, 1);
    
                    cblas_caxpy(i, CBLAS_SADDR(zone), &WORK[ii], 1, WORK, 1);
                }
    
                cblas_ccopy(i, WORK, 1, &T[LDT*(ii+i)], 1);
    
                cblas_ctrmv(
                    CblasColMajor, (CBLAS_UPLO)PlasmaUpper,
                    (CBLAS_TRANSPOSE)PlasmaNoTrans, (CBLAS_DIAG)PlasmaNonUnit,
                    i, &T[LDT*ii], LDT,
                    &T[LDT*(ii+i)], 1);

            }

            T[LDT*(ii+i)+i] = TAU[ii+i];
        }

        /* Apply Q' to the rest of the matrix to the left  */
        if (N > ii+sb) {
            CORE_cttrfb(
                PlasmaLeft, PlasmaConjTrans,
                PlasmaForward, PlasmaColumnwise,
                sb, N-(ii+sb), ii+sb, N-(ii+sb), sb,
                &A1[LDA1*(ii+sb)+ii], LDA1,
                &A2[LDA2*(ii+sb)], LDA2,
                &A2[LDA2*ii], LDA2,
                &T[LDT*ii], LDT,
                WORK, sb);
        }
    }
    return PLASMA_SUCCESS;
}
Ejemplo n.º 15
0
 DLLEXPORT void c_axpy(const blasint n, const openblas_complex_float alpha, const openblas_complex_float x[], openblas_complex_float y[]) {
     cblas_caxpy(n, (float*)&alpha, (float*)x, 1, (float*)y, 1);
 }