void assign_decor_obs_mtx(u8 num_sats, sdiff_t *sats_with_ref_first, double ref_ecef[3], double *decor_mtx, double *obs_mtx) { u32 num_diffs = num_sats-1; u32 state_dim = num_diffs + 6; u32 obs_dim = 2 * num_diffs; memset(obs_mtx, 0, state_dim * obs_dim * sizeof(double)); double DE[num_diffs * 3]; assign_de_mtx(num_sats, sats_with_ref_first, ref_ecef, &DE[0]); //assign L^-1 * DE into DE cblas_dtrmm(CblasRowMajor, CblasLeft, CblasLower, CblasNoTrans, CblasUnit, // CBLAS_ORDER, CBLAS_SIDE, CBLAS_UPLO, CBLAS_TRANSPOSE, CBLAS_DIAG num_diffs, 3, //M, N 1, &decor_mtx[0], num_diffs, //alpha, A, lda &DE[0], 3); //B, ldb for (u32 i=0; i<num_diffs; i++) { obs_mtx[i*state_dim] = DE[i*3] / GPS_L1_LAMBDA_NO_VAC; obs_mtx[i*state_dim + 1] = DE[i*3 + 1] / GPS_L1_LAMBDA_NO_VAC; obs_mtx[i*state_dim + 2] = DE[i*3 + 2] / GPS_L1_LAMBDA_NO_VAC; memcpy(&obs_mtx[(i+num_diffs)*state_dim], &DE[i*3], 3 * sizeof(double)); memcpy(&obs_mtx[i*state_dim + 6], &decor_mtx[i*num_diffs], (i+1) * sizeof(double)); } }
void STARPU_DTRMM(const char *side, const char *uplo, const char *transA, const char *diag, const int m, const int n, const double alpha, const double *A, const int lda, double *B, const int ldb) { enum CBLAS_SIDE side_ = (toupper(side[0]) == 'L')?CblasLeft:CblasRight; enum CBLAS_UPLO uplo_ = (toupper(uplo[0]) == 'U')?CblasUpper:CblasLower; enum CBLAS_TRANSPOSE transA_ = (toupper(transA[0]) == 'N')?CblasNoTrans:CblasTrans; enum CBLAS_DIAG diag_ = (toupper(diag[0]) == 'N')?CblasNonUnit:CblasUnit; cblas_dtrmm(CblasColMajor, side_, uplo_, transA_, diag_, m, n, alpha, A, lda, B, ldb); }
JNIEXPORT void JNICALL Java_uncomplicate_neanderthal_CBLAS_dtrmm (JNIEnv *env, jclass clazz, jint Order, jint Side, jint Uplo, jint TransA, jint Diag, jint M, jint N, jdouble alpha, jobject A, jint lda, jobject B, jint ldb) { double *cA = (double *) (*env)->GetDirectBufferAddress(env, A); double *cB = (double *) (*env)->GetDirectBufferAddress(env, B); cblas_dtrmm(Order, Side, Uplo, TransA, Diag, M, N, alpha, cA, lda, cB, ldb); };
inline void trmm( CBLAS_ORDER const order, CBLAS_SIDE const side, CBLAS_UPLO const uplo, CBLAS_TRANSPOSE const transA, CBLAS_DIAG const unit, int const M, int const N, double const *A, int const lda, double* B, int const incB ) { cblas_dtrmm(order, side, uplo, transA, unit, M, N, 1.0, A, lda, B, incB ); }
int testing_dtrmm(int argc, char **argv) { /* Check for number of arguments*/ if ( argc != 5 ) { USAGE("TRMM", "alpha M N LDA LDB", " - alpha : alpha coefficient\n" " - M : number of rows of matrices B\n" " - N : number of columns of matrices B\n" " - LDA : leading dimension of matrix A\n" " - LDB : leading dimension of matrix B\n"); return -1; } double alpha = (double) atol(argv[0]); int M = atoi(argv[1]); int N = atoi(argv[2]); int LDA = atoi(argv[3]); int LDB = atoi(argv[4]); double eps; int info_solution; int s, u, t, d, i; int LDAxM = LDA*max(M,N); int LDBxN = LDB*max(M,N); double *A = (double *)malloc(LDAxM*sizeof(double)); double *B = (double *)malloc(LDBxN*sizeof(double)); double *Binit = (double *)malloc(LDBxN*sizeof(double)); double *Bfinal = (double *)malloc(LDBxN*sizeof(double)); /* Check if unable to allocate memory */ if ( (!A) || (!B) || (!Binit) || (!Bfinal)){ printf("Out of Memory \n "); return -2; } eps = LAPACKE_dlamch_work('e'); printf("\n"); printf("------ TESTS FOR PLASMA DTRMM ROUTINE ------- \n"); printf(" Size of the Matrix B : %d by %d\n", M, N); printf("\n"); printf(" The matrix A is randomly generated for each test.\n"); printf("============\n"); printf(" The relative machine precision (eps) is to be %e \n",eps); printf(" Computational tests pass if scaled residuals are less than 10.\n"); /*---------------------------------------------------------- * TESTING DTRMM */ /* Initialize A, B, C */ LAPACKE_dlarnv_work(IONE, ISEED, LDAxM, A); LAPACKE_dlarnv_work(IONE, ISEED, LDBxN, B); for(i=0;i<max(M,N);i++) A[LDA*i+i] = A[LDA*i+i] + 2.0; for (s=0; s<2; s++) { for (u=0; u<2; u++) { #ifdef COMPLEX for (t=0; t<3; t++) { #else for (t=0; t<2; t++) { #endif for (d=0; d<2; d++) { memcpy(Binit, B, LDBxN*sizeof(double)); memcpy(Bfinal, B, LDBxN*sizeof(double)); /* PLASMA DTRMM */ PLASMA_dtrmm(side[s], uplo[u], trans[t], diag[d], M, N, alpha, A, LDA, Bfinal, LDB); /* Check the solution */ info_solution = check_solution(side[s], uplo[u], trans[t], diag[d], M, N, alpha, A, LDA, Binit, Bfinal, LDB); printf("***************************************************\n"); if (info_solution == 0) { printf(" ---- TESTING DTRMM (%s, %s, %s, %s) ...... PASSED !\n", sidestr[s], uplostr[u], transstr[t], diagstr[d]); } else { printf(" ---- TESTING DTRMM (%s, %s, %s, %s) ... FAILED !\n", sidestr[s], uplostr[u], transstr[t], diagstr[d]); } printf("***************************************************\n"); } } } } free(A); free(B); free(Binit); free(Bfinal); return 0; } /*-------------------------------------------------------------- * Check the solution */ static int check_solution(PLASMA_enum side, PLASMA_enum uplo, PLASMA_enum trans, PLASMA_enum diag, int M, int N, double alpha, double *A, int LDA, double *Bref, double *Bplasma, int LDB) { int info_solution; double Anorm, Binitnorm, Bplasmanorm, Blapacknorm, Rnorm, result; double eps; double mzone = (double)-1.0; double *work = (double *)malloc(max(M, N)* sizeof(double)); int Am, An; if (side == PlasmaLeft) { Am = M; An = M; } else { Am = N; An = N; } Anorm = LAPACKE_dlantr_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), lapack_const(uplo), lapack_const(diag), Am, An, A, LDA, work); Binitnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Bref, LDB, work); Bplasmanorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Bplasma, LDB, work); cblas_dtrmm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, (CBLAS_DIAG)diag, M, N, (alpha), A, LDA, Bref, LDB); Blapacknorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Bref, LDB, work); cblas_daxpy(LDB * N, (mzone), Bplasma, 1, Bref, 1); Rnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Bref, LDB, work); eps = LAPACKE_dlamch_work('e'); printf("Rnorm %e, Anorm %e, Binitnorm %e, Bplasmanorm %e, Blapacknorm %e\n", Rnorm, Anorm, Binitnorm, Bplasmanorm, Blapacknorm); result = Rnorm / ((Anorm + Blapacknorm) * max(M,N) * eps); printf("============\n"); printf("Checking the norm of the difference against reference DTRMM \n"); printf("-- ||Cplasma - Clapack||_oo/((||A||_oo+||B||_oo).N.eps) = %e \n", result); if ( isinf(Blapacknorm) || isinf(Bplasmanorm) || isnan(result) || isinf(result) || (result > 10.0) ) { printf("-- The solution is suspicious ! \n"); info_solution = 1; } else { printf("-- The solution is CORRECT ! \n"); info_solution= 0 ; } free(work); return info_solution; }
static inline int CORE_dpamm_a2(PLASMA_enum side, PLASMA_enum trans, PLASMA_enum uplo, int M, int N, int K, int L, int vi2, int vi3, double *A2, int LDA2, const double *V, int LDV, double *W, int LDW) { /* * A2 = A2 + op(V) * W or A2 = A2 + W * op(V) */ int j; static double zone = 1.0; static double mzone = -1.0; if (side == PlasmaLeft) { if (((trans == PlasmaTrans) && (uplo == CblasUpper)) || ((trans == PlasmaNoTrans) && (uplo == CblasLower))) { printf("Left Upper/ConjTrans & Lower/NoTrans not implemented yet\n"); return PLASMA_ERR_NOT_SUPPORTED; } else { //trans /* * A2 = A2 - V * W */ /* A2_1 = A2_1 - V_1 * W_1 */ if (M > L) { cblas_dgemm( CblasColMajor, (CBLAS_TRANSPOSE)trans, CblasNoTrans, M-L, N, L, (mzone), V, LDV, W, LDW, (zone), A2, LDA2); } /* W_1 = V_2 * W_1 */ cblas_dtrmm( CblasColMajor, CblasLeft, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, CblasNonUnit, L, N, (zone), &V[vi2], LDV, W, LDW); /* A2_2 = A2_2 - W_1 */ for(j = 0; j < N; j++) { cblas_daxpy( L, (mzone), &W[LDW*j], 1, &A2[LDA2*j+(M-L)], 1); } /* A2 = A2 - V_3 * W_2 */ if (K > L) { cblas_dgemm( CblasColMajor, (CBLAS_TRANSPOSE)trans, CblasNoTrans, M, N, (K-L), (mzone), &V[vi3], LDV, &W[L], LDW, (zone), A2, LDA2); } } } else { //side right if (((trans == PlasmaTrans) && (uplo == CblasUpper)) || ((trans == PlasmaNoTrans) && (uplo == CblasLower))) { /* * A2 = A2 - W * V' */ /* A2 = A2 - W_2 * V_3' */ if (K > L) { cblas_dgemm( CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)trans, M, N, K-L, (mzone), &W[LDW*L], LDW, &V[vi3], LDV, (zone), A2, LDA2); } /* A2_1 = A2_1 - W_1 * V_1' */ if (N > L) { cblas_dgemm( CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)trans, M, N-L, L, (mzone), W, LDW, V, LDV, (zone), A2, LDA2); } /* A2_2 = A2_2 - W_1 * V_2' */ if (L > 0) { cblas_dtrmm( CblasColMajor, CblasRight, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, CblasNonUnit, M, L, (mzone), &V[vi2], LDV, W, LDW); for (j = 0; j < L; j++) { cblas_daxpy( M, (zone), &W[LDW*j], 1, &A2[LDA2*(N-L+j)], 1); } } } else { printf("Right Upper/NoTrans & Lower/ConjTrans not implemented yet\n"); return PLASMA_ERR_NOT_SUPPORTED; } } return PLASMA_SUCCESS; }
static inline int CORE_dpamm_w(PLASMA_enum side, PLASMA_enum trans, PLASMA_enum uplo, int M, int N, int K, int L, int vi2, int vi3, const double *A1, int LDA1, double *A2, int LDA2, const double *V, int LDV, double *W, int LDW) { /* * W = A1 + op(V) * A2 or W = A1 + A2 * op(V) */ int j; static double zone = 1.0; static double zzero = 0.0; if (side == PlasmaLeft) { if (((trans == PlasmaTrans) && (uplo == CblasUpper)) || ((trans == PlasmaNoTrans) && (uplo == CblasLower))) { /* * W = A1 + V' * A2 */ /* W = A2_2 */ LAPACKE_dlacpy_work(LAPACK_COL_MAJOR, lapack_const(PlasmaUpperLower), L, N, &A2[K-L], LDA2, W, LDW); /* W = V_2' * W + V_1' * A2_1 (ge+tr, top L rows of V') */ if (L > 0) { /* W = V_2' * W */ cblas_dtrmm( CblasColMajor, CblasLeft, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, CblasNonUnit, L, N, (zone), &V[vi2], LDV, W, LDW); /* W = W + V_1' * A2_1 */ if (K > L) { cblas_dgemm( CblasColMajor, (CBLAS_TRANSPOSE)trans, CblasNoTrans, L, N, K-L, (zone), V, LDV, A2, LDA2, (zone), W, LDW); } } /* W_2 = V_3' * A2: (ge, bottom M-L rows of V') */ if (M > L) { cblas_dgemm( CblasColMajor, (CBLAS_TRANSPOSE)trans, CblasNoTrans, (M-L), N, K, (zone), &V[vi3], LDV, A2, LDA2, (zzero), &W[L], LDW); } /* W = A1 + W */ for(j = 0; j < N; j++) { cblas_daxpy( M, (zone), &A1[LDA1*j], 1, &W[LDW*j], 1); } } else { printf("Left Upper/NoTrans & Lower/ConjTrans not implemented yet\n"); return PLASMA_ERR_NOT_SUPPORTED; } } else { //side right if (((trans == PlasmaTrans) && (uplo == CblasUpper)) || ((trans == PlasmaNoTrans) && (uplo == CblasLower))) { printf("Right Upper/ConjTrans & Lower/NoTrans not implemented yet\n"); return PLASMA_ERR_NOT_SUPPORTED; } else { /* * W = A1 + A2 * V */ if (L > 0) { /* W = A2_2 */ LAPACKE_dlacpy_work(LAPACK_COL_MAJOR, lapack_const(PlasmaUpperLower), M, L, &A2[LDA2*(K-L)], LDA2, W, LDW); /* W = W * V_2 --> W = A2_2 * V_2 */ cblas_dtrmm( CblasColMajor, CblasRight, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, CblasNonUnit, M, L, (zone), &V[vi2], LDV, W, LDW); /* W = W + A2_1 * V_1 */ if (K > L) { cblas_dgemm( CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)trans, M, L, K-L, (zone), A2, LDA2, V, LDV, (zone), W, LDW); } } /* W = W + A2 * V_3 */ if (N > L) { cblas_dgemm( CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)trans, M, N-L, K, (zone), A2, LDA2, &V[vi3], LDV, (zzero), &W[LDW*L], LDW); } /* W = A1 + W */ for (j = 0; j < N; j++) { cblas_daxpy( M, (zone), &A1[LDA1*j], 1, &W[LDW*j], 1); } } } return PLASMA_SUCCESS; }
static int check_reduction(int itype, int uplo, int N, int bw, double *A1, double *A2, int LDA, double *B2, int LDB, double *Q, double eps ) { double alpha = 1.0; double beta = 0.0; double Anorm, Rnorm, result; int info_reduction; int i, j; char *str; double *Aorig = (double *)malloc(N*N*sizeof(double)); double *Residual = (double *)malloc(N*N*sizeof(double)); double *T = (double *)malloc(N*N*sizeof(double)); double *work = (double *)malloc(N*sizeof(double)); memset((void*)T, 0, N*N*sizeof(double)); LAPACKE_dlacpy_work(LAPACK_COL_MAJOR, ' ', N, N, A1, LDA, Residual, N); /* Rebuild the T */ LAPACKE_dlacpy_work(LAPACK_COL_MAJOR, lapack_const(uplo), N, N, A2, LDA, T, N); if (uplo == PlasmaLower) { /* Set the reflectors to 0 */ for (i = bw+1; i < N; i++) for (j = 0 ; (j < N) && (j < i-bw); j++) T[j*N+i] = 0.; /* Copy the lower part to the upper part to rebuild the symmetry */ for (i = 0; i < N; i++) for (j = 0 ; j < i; j++) T[i*N+j] = (T[j*N+i]); } else { /* Set the reflectors to 0 */ for (j = bw+1; j < N; j++) for (i = 0 ; (i < N) && (i < j-bw); i++) T[j*N+i] = 0.; /* Copy the upper part to the lower part to rebuild the symmetry */ for (i = 0; i < N; i++) for (j = i+1 ; j < N; j++) T[i*N+j] = (T[j*N+i]); } memset((void*)Aorig, 0, N*N*sizeof(double)); if (itype == 1) { if (uplo == PlasmaLower) { str = "L*Q*T*Q'*L'"; /* Compute Aorig=Q*T*Q' */ cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, N, N, N, (alpha), Q, LDA, T, N, (beta), Aorig, N); cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans, N, N, N, (alpha), Aorig, N, Q, LDA, (beta), T, N); LAPACKE_dlacpy_work(LAPACK_COL_MAJOR, PlasmaUpperLower, N, N, T, N, Aorig, N); cblas_dtrmm(CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N); cblas_dtrmm(CblasColMajor, CblasRight, CblasLower, CblasTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N); } else { str = "U'*Q*T*Q'*U"; /* Compute Aorig=Q'*T*Q */ cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, N, N, N, (alpha), Q, LDA, T, N, (beta), Aorig, N); cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, N, N, N, (alpha), Aorig, N, Q, LDA, (beta), T, N); LAPACKE_dlacpy_work(LAPACK_COL_MAJOR, 'A', N, N, T, N, Aorig, N); cblas_dtrmm(CblasColMajor, CblasLeft, CblasUpper, CblasTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N); cblas_dtrmm(CblasColMajor, CblasRight, CblasUpper, CblasNoTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N); } } else { if (uplo == PlasmaLower) { str = "inv(L')*Q*A2*Q'*inv(L)"; /* Compute Aorig=Q*T*Q' */ cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, N, N, N, (alpha), Q, LDA, T, N, (beta), Aorig, N); cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans, N, N, N, (alpha), Aorig, N, Q, LDA, (beta), T, N ); LAPACKE_dlacpy_work(LAPACK_COL_MAJOR, 'A', N, N, T, N, Aorig, N); cblas_dtrsm(CblasColMajor, CblasLeft, CblasLower, CblasTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N); cblas_dtrsm(CblasColMajor, CblasRight, CblasLower, CblasNoTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N); } else{ str = "inv(U)*Q*A2*Q'*inv(U')"; /* Compute Aorig=Q'*T*Q */ cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, N, N, N, (alpha), Q, LDA, T, N, (beta), Aorig, N); cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, N, N, N, (alpha), Aorig, N, Q, LDA, (beta), T, N); LAPACKE_dlacpy_work(LAPACK_COL_MAJOR, 'A', N, N, T, N, Aorig, N); cblas_dtrsm(CblasColMajor, CblasLeft, CblasUpper, CblasNoTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N); cblas_dtrsm(CblasColMajor, CblasRight, CblasUpper, CblasTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N); } } /* Compute the Residual */ for (i = 0; i < N; i++) for (j = 0 ; j < N; j++) Residual[j*N+i] = A1[j*LDA+i]-Aorig[j*N+i]; Rnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Residual, N, work); Anorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, A2, LDA, work); result = Rnorm / (Anorm * N *eps); printf("============\n"); printf("Checking the tridiagonal reduction \n"); printf("-- ||A-%s||_oo/(||A||_oo.N.eps) = %e \n", str, result ); if (isnan(result) || isinf(result) || (result > 60.0) ) { printf("-- Reduction is suspicious ! \n"); info_reduction = 1; } else { printf("-- Reduction is CORRECT ! \n"); info_reduction = 0; } free(Aorig); free(Residual); free(T); free(work); return info_reduction; }