void THBlas_ger(long m, long n, float alpha, float *x, long incx, float *y, long incy, float *a, long lda) { if(n == 1) lda = m; #ifdef USEBLAS int i_m = (int)m; int i_n = (int)n; int i_lda = (int)lda; int i_incx = (int)incx; int i_incy = (int)incy; sger_(&i_m, &i_n, &alpha, x, &i_incx, y, &i_incy, a, &i_lda); #else sger(m, n, alpha, x, incx, y, incy, a, lda); #endif }
int main(int argc, char *argv[]) { srand(time(0)); int m = atoi(argv[1]); int n = atoi(argv[2]); int i, j; float *x; float *y; float *A; float *t; int incx = 1; int incy = 1; float alpha; int lda = m; alpha = rand()/1.0/RAND_MAX - 0.5; x = (float*)malloc(sizeof(float)*m); y = (float*)malloc(sizeof(float)*n); t = (float*)malloc(sizeof(float)*m*n); A = (float*)malloc(sizeof(float)*m*n); for (i = 0; i < m; i++) x[i] = rand()/1.0/RAND_MAX - 0.5; for (i = 0; i < n; i++) y[i] = rand()/1.0/RAND_MAX - 0.5; for (i = 0; i < m*n; i++) t[i] = rand()/1.0/RAND_MAX - 0.5; unsigned long long int t1,t2,t3,t4,t5,t6; printf("acm\n");//ACML version memcpy(A,t,sizeof(float)*m*n); clock_gettime(CLOCK_MONOTONIC, &begin); sger(m, n, alpha, x, incx, y, incy, A, lda); clock_gettime(CLOCK_MONOTONIC, &end); t1 = 1000000000L*(end.tv_sec - begin.tv_sec) + end.tv_nsec - begin.tv_nsec; printf("ori\n");//Native version memcpy(A,t,sizeof(float)*m*n); clock_gettime(CLOCK_MONOTONIC, &begin); ori(m, n, alpha, x, incx, y, incy, A, lda); clock_gettime(CLOCK_MONOTONIC, &end); t2 = 1000000000L*(end.tv_sec - begin.tv_sec) + end.tv_nsec - begin.tv_nsec; printf("one\n");//Native version with checksum memcpy(A,t,sizeof(float)*m*n); clock_gettime(CLOCK_MONOTONIC, &begin); one(m, n, alpha, x, incx, y, incy, A, lda); clock_gettime(CLOCK_MONOTONIC, &end); t3 = 1000000000L*(end.tv_sec - begin.tv_sec) + end.tv_nsec - begin.tv_nsec; printf("exx\n");//Native version with checksum (optimized) memcpy(A,t,sizeof(float)*m*n); clock_gettime(CLOCK_MONOTONIC, &begin); exx(m, n, alpha, x, incx, y, incy, A, lda); clock_gettime(CLOCK_MONOTONIC, &end); t4 = 1000000000L*(end.tv_sec - begin.tv_sec) + end.tv_nsec - begin.tv_nsec; printf("dou\n");//Native version with recalculation memcpy(A,t,sizeof(float)*m*n); clock_gettime(CLOCK_MONOTONIC, &begin); dou(m, n, alpha, x, incx, y, incy, A, lda); clock_gettime(CLOCK_MONOTONIC, &end); t5 = 1000000000L*(end.tv_sec - begin.tv_sec) + end.tv_nsec - begin.tv_nsec; printf("exd\n");//Native version with recalculation (optimized) memcpy(A,t,sizeof(float)*m*n); clock_gettime(CLOCK_MONOTONIC, &begin); exd(m, n, alpha, x, incx, y, incy, A, lda); clock_gettime(CLOCK_MONOTONIC, &end); t6 = 1000000000L*(end.tv_sec - begin.tv_sec) + end.tv_nsec - begin.tv_nsec; printf("acm%16lld\n",t1); printf("ori%16lld\n",t2); printf("one%16lld\n",t3); printf("exx%16lld\n",t4); printf("dou%16lld\n",t5); printf("exd%16lld\n",t6); return 0; }