Esempio n. 1
0
void F77_ctrsm(int *layout, char *rtlf, char *uplow, char *transp, char *diagn,
         int *m, int *n, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, 
         int *lda, CBLAS_TEST_COMPLEX *b, int *ldb) {
  int i,j,LDA,LDB;
  CBLAS_TEST_COMPLEX *A, *B;
  CBLAS_SIDE side;
  CBLAS_DIAG diag;
  CBLAS_UPLO uplo;
  CBLAS_TRANSPOSE trans;

  get_uplo_type(uplow,&uplo);
  get_transpose_type(transp,&trans);
  get_diag_type(diagn,&diag);
  get_side_type(rtlf,&side);

  if (*layout == TEST_ROW_MJR) {
     if (side == CblasLeft) {
        LDA = *m+1;
        A=(CBLAS_TEST_COMPLEX* )malloc( (*m)*LDA*sizeof(CBLAS_TEST_COMPLEX ) );
        for( i=0; i<*m; i++ )
           for( j=0; j<*m; j++ ) {
              A[i*LDA+j].real=a[j*(*lda)+i].real;
              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
           }
     }
     else{
        LDA = *n+1;
        A=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX));
        for( i=0; i<*n; i++ )
           for( j=0; j<*n; j++ ) {
              A[i*LDA+j].real=a[j*(*lda)+i].real;
              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
           }
     }
     LDB = *n+1;
     B=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDB*sizeof(CBLAS_TEST_COMPLEX));
     for( i=0; i<*m; i++ )
        for( j=0; j<*n; j++ ) {
           B[i*LDB+j].real=b[j*(*ldb)+i].real;
           B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
        }
     cblas_ctrsm(CblasRowMajor, side, uplo, trans, diag, *m, *n, alpha, 
		 A, LDA, B, LDB );
     for( j=0; j<*n; j++ )
        for( i=0; i<*m; i++ ) {
           b[j*(*ldb)+i].real=B[i*LDB+j].real;
           b[j*(*ldb)+i].imag=B[i*LDB+j].imag;
        }
     free(A);
     free(B);
  }
  else if (*layout == TEST_COL_MJR)
     cblas_ctrsm(CblasColMajor, side, uplo, trans, diag, *m, *n, alpha, 
		   a, *lda, b, *ldb);
  else
     cblas_ctrsm(UNDEFINED, side, uplo, trans, diag, *m, *n, alpha, 
		   a, *lda, b, *ldb);
}
static inline void CORE_cgetrf_reclap_update(const int M, const int column, const int n1, const int n2,
                                             PLASMA_Complex32_t *A, const int LDA, int *IPIV, 
                                             const int thidx, const int thcnt)
{
    static PLASMA_Complex32_t posone =  1.0;
    static PLASMA_Complex32_t negone = -1.0;
    PLASMA_Complex32_t *Atop  = A    + column*LDA;
    PLASMA_Complex32_t *Atop2 = Atop + n1    *LDA;
    int coff, ccnt, lm, loff;

    CORE_cbarrier_thread( thidx, thcnt );
    
    psplit( n2, thidx, thcnt, &coff, &ccnt );

    if (ccnt > 0) {
        CORE_claswap1( ccnt, Atop2 + coff*LDA, LDA, column, n1 + column, IPIV ); /* swap to the right */
        
        cblas_ctrsm( CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasUnit,
                     n1, ccnt, CBLAS_SADDR(posone), Atop + column, LDA, Atop2 + coff*LDA + column, LDA );
    }
    
    /* __sync_synchronize(); */ /* hopefully we will not need memory fences */
    
    /* need to wait for pivoting and triangular solve to finish */
    CORE_cbarrier_thread( thidx, thcnt );
    
    psplit( M, thidx, thcnt, &loff, &lm );
    if (thidx == 0) {
        loff = column + n1;
        lm  -= column + n1;
    };
    
    cblas_cgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, lm, n2, n1,
                 CBLAS_SADDR(negone), Atop+loff, LDA, Atop2 + column, LDA, CBLAS_SADDR(posone), Atop2+loff, LDA );
}
Esempio n. 3
0
inline void trsm(
	CBLAS_ORDER order, CBLAS_UPLO uplo,CBLAS_TRANSPOSE transA, 
	CBLAS_SIDE side, CBLAS_DIAG unit,
	int n, int nRHS,
	std::complex<float> const *A, int lda, std::complex<float> *B, int ldb
) {
	std::complex<float> alpha(1.0,0);
	cblas_ctrsm(order, side, uplo, transA, unit,n, nRHS,
		reinterpret_cast<cblas_float_complex_type const *>(&alpha),
	        reinterpret_cast<cblas_float_complex_type const *>(A), lda,
	        reinterpret_cast<cblas_float_complex_type *>(B), ldb);
}
Esempio n. 4
0
	DLLEXPORT int c_qr_solve(int m, int n, int bn, complex r[], complex b[], complex x[], complex work[], int len)
	{
		int info = 0;
		complex* clone_r = new complex[m*n];
		memcpy(clone_r, r, m*n*sizeof(complex));

		complex* tau = new complex[min(m,n)];
		cgeqrf_(&m, &n, clone_r, &m, tau, work, &len, &info);

		if (info != 0)
		{
			delete[] clone_r;
			delete[] tau;
			return info;
		}

		char side ='L';
		char tran = 'C';

		complex* clone_b = new complex[m*bn];
		memcpy(clone_b, b, m*bn*sizeof(complex));

		cunmqr_(&side, &tran, &m, &bn, &n, clone_r, &m, tau, clone_b, &m, work, &len, &info);
		complex one = {1.0, 0.0};
		cblas_ctrsm(CblasColMajor, CblasLeft, CblasUpper, CblasNoTrans, CblasNonUnit, n, bn, &one, clone_r, m, clone_b, m);

		for (int i = 0; i < n; ++i)
		{
			for (int j = 0; j < bn; ++j)
			{
				x[j * n + i] = clone_b[j * m + i];
			}
		}

		delete[] clone_r;
		delete[] tau;
		delete[] clone_b;
		return info;
	}
Esempio n. 5
0
	DLLEXPORT MKL_INT c_qr_solve_factored(MKL_INT m, MKL_INT n, MKL_INT bn, MKL_Complex8 r[], MKL_Complex8 b[], MKL_Complex8 tau[], MKL_Complex8 x[], MKL_Complex8 work[], MKL_INT len)
	{
		char side ='L';
		char tran = 'C';
		MKL_INT info = 0;

		MKL_Complex8* clone_b = new MKL_Complex8[m*bn];
		std::memcpy(clone_b, b, m*bn*sizeof(MKL_Complex8));

		cunmqr_(&side, &tran, &m, &bn, &n, r, &m, tau, clone_b, &m, work, &len, &info);
		MKL_Complex8 one = {1.0f, 0.0f};
		cblas_ctrsm(CblasColMajor, CblasLeft, CblasUpper, CblasNoTrans, CblasNonUnit, n, bn, &one, r, m, clone_b, m);
		for (MKL_INT i = 0; i < n; ++i)
		{
			for (MKL_INT j = 0; j < bn; ++j)
			{
				x[j * n + i] = clone_b[j * m + i];
			}
		}

		delete[] clone_b;
		return info;
	}
Esempio n. 6
0
	DLLEXPORT int c_qr_solve_factored(int m, int n, int bn, complex r[], complex b[], complex tau[], complex x[], complex work[], int len)
	{
		char side ='L';
		char tran = 'C';
		int info = 0;

		complex* clone_b = new complex[m*bn];
		memcpy(clone_b, b, m*bn*sizeof(complex));

		cunmqr_(&side, &tran, &m, &bn, &n, r, &m, tau, clone_b, &m, work, &len, &info);
		complex one = {1.0f, 0.0f};
		cblas_ctrsm(CblasColMajor, CblasLeft, CblasUpper, CblasNoTrans, CblasNonUnit, n, bn, &one, r, m, clone_b, m);
		for (int i = 0; i < n; ++i)
		{
			for (int j = 0; j < bn; ++j)
			{
				x[j * n + i] = clone_b[j * m + i];
			}
		}

		delete[] clone_b;
		return info;
	}
Esempio n. 7
0
int CORE_cgessm(int M, int N, int K, int IB,
                int *IPIV,
                PLASMA_Complex32_t *L, int LDL,
                PLASMA_Complex32_t *A, int LDA)
{
    static PLASMA_Complex32_t zone  =  1.0;
    static PLASMA_Complex32_t mzone = -1.0;
    static int                ione  =  1;

    int i, sb;
    int tmp, tmp2;

    /* Check input arguments */
    if (M < 0) {
        coreblas_error(1, "Illegal value of M");
        return -1;
    }
    if (N < 0) {
        coreblas_error(2, "Illegal value of N");
        return -2;
    }
    if (K < 0) {
        coreblas_error(3, "Illegal value of K");
        return -3;
    }
    if (IB < 0) {
        coreblas_error(4, "Illegal value of IB");
        return -4;
    }
    if ((LDL < max(1,M)) && (M > 0)) {
        coreblas_error(7, "Illegal value of LDL");
        return -7;
    }
    if ((LDA < max(1,M)) && (M > 0)) {
        coreblas_error(9, "Illegal value of LDA");
        return -9;
    }

    /* Quick return */
    if ((M == 0) || (N == 0) || (K == 0) || (IB == 0))
        return PLASMA_SUCCESS;

    for(i = 0; i < K; i += IB) {
        sb = min(IB, K-i);
        /*
         * Apply interchanges to columns I*IB+1:IB*( I+1 )+1.
         */
        tmp  = i+1;
        tmp2 = i+sb;
        LAPACKE_claswp_work(LAPACK_COL_MAJOR, N, A, LDA, tmp, tmp2, IPIV, ione);
        /*
         * Compute block row of U.
         */
        cblas_ctrsm(
            CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasUnit,
            sb, N, CBLAS_SADDR(zone),
            &L[LDL*i+i], LDL,
            &A[i], LDA );

        if (i+sb < M) {
        /*
        * Update trailing submatrix.
        */
        cblas_cgemm(
            CblasColMajor, CblasNoTrans, CblasNoTrans,
            M-(i+sb), N, sb,
            CBLAS_SADDR(mzone), &L[LDL*i+(i+sb)], LDL,
            &A[i], LDA,
            CBLAS_SADDR(zone), &A[i+sb], LDA );
        }
    }
    return PLASMA_SUCCESS;
}