Пример #1
0
/* Note: if layout==101 (row major), then this code is known to only work when
 *        nmat == VLEN. To check for accuracy otherwise, transpose everything */
LIBXSMM_INLINE
void compact_strsm_ ( unsigned int *layout, char *side, char *uplo,
                      char *transa, char *diag, unsigned int *m,
                      unsigned int *n, float *alpha, float *A,
                      unsigned int *lda, float *B, unsigned int *ldb,
                      unsigned int *nmat, unsigned int *VLEN )
{
    int i, j, num, asize;
    float *Ap, *Bp, Atemp[BUFSIZE], Btemp[BUFSIZE];

    if ( (*side == 'L') || (*side == 'l') ) asize = *m;
    else asize = *n;
    for ( i = 0, num = 0 ; i < (int)(*nmat) ; i+= *VLEN, num++ )
    {
       for ( j = 0 ; j < (int)*VLEN ; j++ )
       {
           /* Unpack the data, call a reference DTRSM, repack the data */
           Ap = &A[j+num*(*lda)*asize*(*VLEN)];
           Bp = &B[j+num*(*ldb)*(*n)*(*VLEN)];
           scopy_to_temp ( *layout, Ap, *lda, asize, asize, Atemp, *VLEN );
           scopy_to_temp ( *layout, Bp, *ldb, *m, *n, Btemp, *VLEN );
           strsm_ ( side, uplo, transa, diag, m, n, alpha, Atemp, &asize, Btemp, m);
           scopy_from_temp ( *layout, Bp, *ldb, *m, *n, Btemp, *VLEN );
       }
    }
}
Пример #2
0
Файл: gemm.c Проект: hfp/libxsmm
/* Note: if layout==101 (row major), then this code is known to only work when
 *        nmat == VLEN. To check for accuracy otherwise, transpose everything */
LIBXSMM_INLINE
void compact_sgemm_ ( char *transa, char *transb,
                      unsigned int *layout, unsigned int *m, unsigned int *n,
                      unsigned int *k, float *alpha, float *A,
                      unsigned int *lda, float *B, unsigned int *ldb,
                      float *beta, float *C, unsigned int *ldc,
                      unsigned int *nmat, unsigned int *VLEN )
{
    unsigned int i, j, num, info;
    float *Ap, Atemp[BUFSIZE];
    float *Bp, Btemp[BUFSIZE];
    float *Cp, Ctemp[BUFSIZE];
    static int ntimes = 0;
    char ntrans='N';

    if ( ++ntimes < 3 ) printf("Inside reference compact_sgemm_()\n");
    if ( ++ntimes < 3 ) printf("layout=%d m/n/k=%d %d %d lda/b/c=%d %d %d nmat=%d VLEN=%d\n",*layout,*m,*n,*k,*lda,*ldb,*ldc,*nmat,*VLEN);
    for ( i = 0, num = 0 ; i < (*nmat) ; i+= *VLEN, num++ )
    {
       for ( j = 0 ; j < *VLEN ; j++ )
       {
           /* Unpack the data, call a reference DGEMM, repack the data */
           Ap = &A[j+num*(*lda)*(*k)*(*VLEN)];
           Bp = &B[j+num*(*ldb)*(*n)*(*VLEN)];
           Cp = &C[j+num*(*ldc)*(*n)*(*VLEN)];
if (++ntimes < 3 ) printf("Doing a sgemm at place i=%d j=%d num=%d Ap[%d]=%g\n",i,j,num,j+num*(*lda)*(*k)*(*VLEN),Ap[0]);
           scopy_to_temp ( *layout, Ap, *lda, *m, *k, Atemp, *VLEN );
           scopy_to_temp ( *layout, Bp, *ldb, *k, *n, Btemp, *VLEN );
           scopy_to_temp ( *layout, Cp, *ldc, *m, *n, Ctemp, *VLEN );
           sgemm_ ( transa, transb, m, n, k, alpha, Atemp, m, Btemp, k, beta, Ctemp, m );
           scopy_from_temp ( *layout, Cp, *ldc, *m, *n, Ctemp, *VLEN );
       }
    }
}