Ejemplo n.º 1
0
Archivo: gemm.c Proyecto: hfp/libxsmm
/* Reference code for compact dgemm. Note that this just copies data into
   a buffer from the compact storage and calls the regular dgemm code. This
   is very naive reference code just used for testing purposes */
LIBXSMM_INLINE
void compact_dgemm_ ( unsigned int *layout, char *transa, char *transb,
                      unsigned int *m, unsigned int *n,
                      unsigned int *k, double *alpha, double *A,
                      unsigned int *lda, double *B, unsigned int *ldb,
                      double *beta, double *C, unsigned int *ldc,
                      unsigned int *nmat, unsigned int *VLEN )
{
    unsigned int i, j, num, info;
    double *Ap, Atemp[BUFSIZE];
    double *Bp, Btemp[BUFSIZE];
    double *Cp, Ctemp[BUFSIZE];
    static int ntimes = 0;
    char ntrans='N';

    if ( ++ntimes < 3 ) printf("Inside reference compact_dgemm_()\n");
    if ( ++ntimes < 3 ) printf("layout=%d m/n/k=%d %d %d lda/b/c=%d %d %d nmat=%d VLEN=%d\n",*layout,*m,*n,*k,*lda,*ldb,*ldc,*nmat,*VLEN);
    for ( i = 0, num = 0 ; i < (*nmat) ; i+= *VLEN, num++ )
    {
       for ( j = 0 ; j < *VLEN ; j++ )
       {
           /* Unpack the data, call a reference DGEMM, repack the data */
           Ap = &A[j+num*(*lda)*(*k)*(*VLEN)];
           Bp = &B[j+num*(*ldb)*(*n)*(*VLEN)];
           Cp = &C[j+num*(*ldc)*(*n)*(*VLEN)];
if (++ntimes < 3 ) printf("Doing a dgemm at place i=%d j=%d num=%d Ap[%d]=%g\n",i,j,num,j+num*(*lda)*(*k)*(*VLEN),Ap[0]);
           dcopy_to_temp ( *layout, Ap, *lda, *m, *k, Atemp, *VLEN );
           dcopy_to_temp ( *layout, Bp, *ldb, *k, *n, Btemp, *VLEN );
           dcopy_to_temp ( *layout, Cp, *ldc, *m, *n, Ctemp, *VLEN );
           dgemm_ ( transa, transb, m, n, k, alpha, Atemp, m, Btemp, k, beta, Ctemp, m );
           dcopy_from_temp ( *layout, Cp, *ldc, *m, *n, Ctemp, *VLEN );
       }
    }
}
Ejemplo n.º 2
0
/* Note: if layout==101 (row major), then this code is known to only work when
 *        nmat == VLEN. To check for accuracy otherwise, transpose everything */
LIBXSMM_INLINE
void compact_dtrsm_ ( unsigned int *layout, char *side, char *uplo,
                      char *transa, char *diag, unsigned int *m,
                      unsigned int *n, double *alpha, double *A,
                      unsigned int *lda, double *B, unsigned int *ldb,
                      unsigned int *nmat, unsigned int *VLEN )
{
    int i, j, num, asize, offseta, offsetb;
    double *Ap, *Bp, Atemp[BUFSIZE], Btemp[BUFSIZE];
    static int ntimes = 0;

    if ( ++ntimes < 3 ) printf("Inside reference compact_dtrsm_()\n");
    if ( *layout == 102 )
    {
       if ( (*side == 'L') || (*side == 'l') ) asize = *m;
       else asize = *n;
       offsetb = (*ldb)*(*n)*(*VLEN);
    } else {
       if ( (*side == 'L') || (*side == 'l') ) asize = *n;
       else asize = *m;
       offsetb = (*ldb)*(*m)*(*VLEN);
    }
    offseta = (*lda)*asize*(*VLEN);
    if ( ++ntimes < 3 ) printf("m/n=%u,%u layout=%u asize=%i VLEN=%u nmat=%u offseta=%i offsetb=%i\n",*m,*n,*layout, asize, *VLEN, *nmat, offseta, offsetb );
    for ( i = 0, num = 0 ; i < (int)(*nmat) ; i+= *VLEN, num++ )
    {
       for ( j = 0 ; j < (int)*VLEN ; j++ )
       {
           /* Unpack the data, call a reference DTRSM, repack the data */
           Ap = &A[j+num*offseta];
           Bp = &B[j+num*offsetb];
if (++ntimes < 15 ) printf("Doing a dtrsm at place i=%d j=%d num=%d Ap[%d]=%g Bp[%d]=%g\n",i,j,num,j+num*offseta,Ap[0],j+num*offsetb,Bp[0]);
           dcopy_to_temp ( *layout, Ap, *lda, asize, asize, Atemp, *VLEN );
           dcopy_to_temp ( *layout, Bp, *ldb, *m, *n, Btemp, *VLEN );
           dtrsm_ ( side, uplo, transa, diag, m, n, alpha, Atemp, &asize, Btemp, m);
           dcopy_from_temp ( *layout, Bp, *ldb, *m, *n, Btemp, *VLEN );
       }
    }
}