/* Reference code for compact dgemm. Note that this just copies data into a buffer from the compact storage and calls the regular dgemm code. This is very naive reference code just used for testing purposes */ LIBXSMM_INLINE void compact_dgemm_ ( unsigned int *layout, char *transa, char *transb, unsigned int *m, unsigned int *n, unsigned int *k, double *alpha, double *A, unsigned int *lda, double *B, unsigned int *ldb, double *beta, double *C, unsigned int *ldc, unsigned int *nmat, unsigned int *VLEN ) { unsigned int i, j, num, info; double *Ap, Atemp[BUFSIZE]; double *Bp, Btemp[BUFSIZE]; double *Cp, Ctemp[BUFSIZE]; static int ntimes = 0; char ntrans='N'; if ( ++ntimes < 3 ) printf("Inside reference compact_dgemm_()\n"); if ( ++ntimes < 3 ) printf("layout=%d m/n/k=%d %d %d lda/b/c=%d %d %d nmat=%d VLEN=%d\n",*layout,*m,*n,*k,*lda,*ldb,*ldc,*nmat,*VLEN); for ( i = 0, num = 0 ; i < (*nmat) ; i+= *VLEN, num++ ) { for ( j = 0 ; j < *VLEN ; j++ ) { /* Unpack the data, call a reference DGEMM, repack the data */ Ap = &A[j+num*(*lda)*(*k)*(*VLEN)]; Bp = &B[j+num*(*ldb)*(*n)*(*VLEN)]; Cp = &C[j+num*(*ldc)*(*n)*(*VLEN)]; if (++ntimes < 3 ) printf("Doing a dgemm at place i=%d j=%d num=%d Ap[%d]=%g\n",i,j,num,j+num*(*lda)*(*k)*(*VLEN),Ap[0]); dcopy_to_temp ( *layout, Ap, *lda, *m, *k, Atemp, *VLEN ); dcopy_to_temp ( *layout, Bp, *ldb, *k, *n, Btemp, *VLEN ); dcopy_to_temp ( *layout, Cp, *ldc, *m, *n, Ctemp, *VLEN ); dgemm_ ( transa, transb, m, n, k, alpha, Atemp, m, Btemp, k, beta, Ctemp, m ); dcopy_from_temp ( *layout, Cp, *ldc, *m, *n, Ctemp, *VLEN ); } } }
/* Note: if layout==101 (row major), then this code is known to only work when * nmat == VLEN. To check for accuracy otherwise, transpose everything */ LIBXSMM_INLINE void compact_dtrsm_ ( unsigned int *layout, char *side, char *uplo, char *transa, char *diag, unsigned int *m, unsigned int *n, double *alpha, double *A, unsigned int *lda, double *B, unsigned int *ldb, unsigned int *nmat, unsigned int *VLEN ) { int i, j, num, asize, offseta, offsetb; double *Ap, *Bp, Atemp[BUFSIZE], Btemp[BUFSIZE]; static int ntimes = 0; if ( ++ntimes < 3 ) printf("Inside reference compact_dtrsm_()\n"); if ( *layout == 102 ) { if ( (*side == 'L') || (*side == 'l') ) asize = *m; else asize = *n; offsetb = (*ldb)*(*n)*(*VLEN); } else { if ( (*side == 'L') || (*side == 'l') ) asize = *n; else asize = *m; offsetb = (*ldb)*(*m)*(*VLEN); } offseta = (*lda)*asize*(*VLEN); if ( ++ntimes < 3 ) printf("m/n=%u,%u layout=%u asize=%i VLEN=%u nmat=%u offseta=%i offsetb=%i\n",*m,*n,*layout, asize, *VLEN, *nmat, offseta, offsetb ); for ( i = 0, num = 0 ; i < (int)(*nmat) ; i+= *VLEN, num++ ) { for ( j = 0 ; j < (int)*VLEN ; j++ ) { /* Unpack the data, call a reference DTRSM, repack the data */ Ap = &A[j+num*offseta]; Bp = &B[j+num*offsetb]; if (++ntimes < 15 ) printf("Doing a dtrsm at place i=%d j=%d num=%d Ap[%d]=%g Bp[%d]=%g\n",i,j,num,j+num*offseta,Ap[0],j+num*offsetb,Bp[0]); dcopy_to_temp ( *layout, Ap, *lda, asize, asize, Atemp, *VLEN ); dcopy_to_temp ( *layout, Bp, *ldb, *m, *n, Btemp, *VLEN ); dtrsm_ ( side, uplo, transa, diag, m, n, alpha, Atemp, &asize, Btemp, m); dcopy_from_temp ( *layout, Bp, *ldb, *m, *n, Btemp, *VLEN ); } } }