void main(){ initialize_arrays(); input_data(); print_result(); }
int main(int argc, char **argv) { int limit, i, j, dim, iterations, real_iterations; double tmicrosec = 0.0, refcnt= 0.0, prefcnt = 0.0, opcnt = 0.0; type = usage(argc, argv, &iterations, &real_iterations); iterations += (iterations % 4); initialize_arrays(type); setbuf(stdout,NULL); #ifdef T3E foofn = _cptofcd(&fooc, sizeof(fooc)); #endif /* Measure cache */ if (type & VECTORVECTOR) { print_header(type, "AXPY Cache Test"); if (type & SINGLEPRECISION) { float *sx, *sy; dim = compute_axpy_dimension(memsize,(int)sizeof(float)); DBG(fprintf(stderr,"Max dimension in bytes is %d\n",dim*(int)sizeof(float))); DBG(fprintf(stderr,"Total bytes used %d out of %d\n",(dim+dim)*(int)sizeof(float),memsize)); assert(sx = (float *)malloc(dim*(int)sizeof(float))); assert(sy = (float *)malloc(dim*(int)sizeof(float))); memset(sx,0x00,dim*(int)sizeof(float)); memset(sy,0x00,dim*(int)sizeof(float)); initall_flt(sx,dim); initall_flt(sy,dim); for (i = 0; i < timeslots; i++) { limit = compute_axpy_dimension(sizes[i],(int)sizeof(float)); DBG(fprintf(stderr,"Max dimension in floats is %d\n",limit)); DBG(fprintf(stderr,"Cache size is %d bytes, %d floats\n", sizes[i],sizes[i]/(int)sizeof(float))); if ((type & CONSTANTITERATIONS) == 0) { real_iterations = (i == 0 ? iterations : (int)(prefcnt/(3.0*(double)limit))); if (real_iterations < LOWEST_ITERATION_COUNT) real_iterations = LOWEST_ITERATION_COUNT; } refcnt = (double)real_iterations*3.0*(double)limit; opcnt = (double)real_iterations*2.0*(double)limit; DBG(printf("refcnt now %f, was %f, doing %d iterations\n",refcnt,prefcnt,real_iterations)); prefcnt = refcnt; for (j = 0; j < repeat_count; j++) { flushall(memsize); TIMER_START; do_saxpy(sx,sy,real_iterations,&limit); TIMER_STOP; tmicrosec = TIMER_ELAPSED_US; DBG(fprintf(stderr,"R: %f us. total\n",tmicrosec)); DBG(fprintf(stderr,"R: %f ns. per ref\n",(tmicrosec*1000.0) / refcnt)); compute_stats(i,j,refcnt,(double)sizeof(float),opcnt, tmicrosec,real_iterations, (limit+limit)*(int)sizeof(float), (limit+limit)*(int)sizeof(float), limit,limit); } } free(sx); free(sy); } else { double *dx, *dy; dim = compute_axpy_dimension(memsize,(int)sizeof(double)); DBG(fprintf(stderr,"Max dimension in bytes is %d\n",dim*(int)sizeof(double))); DBG(fprintf(stderr,"Total bytes used %d out of %d\n",(dim+dim)*(int)sizeof(double),memsize)); assert(dx = (double *)malloc(dim*(int)sizeof(double))); assert(dy = (double *)malloc(dim*(int)sizeof(double))); memset(dx,0x00,dim*(int)sizeof(double)); memset(dy,0x00,dim*(int)sizeof(double)); initall_dbl(dx,dim); initall_dbl(dy,dim); for (i = 0; i < timeslots; i++) { limit = compute_axpy_dimension(sizes[i],(int)sizeof(double)); DBG(fprintf(stderr,"Max dimension in doubles is %d\n",limit)); DBG(fprintf(stderr,"Cache size is %d bytes, %d doubles\n", sizes[i],sizes[i]/(int)sizeof(double))); if ((type & CONSTANTITERATIONS) == 0) { real_iterations = (i == 0 ? iterations : (int)(prefcnt/(3.0*(double)limit))); if (real_iterations < LOWEST_ITERATION_COUNT) real_iterations = LOWEST_ITERATION_COUNT; } refcnt = (double)real_iterations*3.0*(double)limit; opcnt = (double)real_iterations*2.0*(double)limit; DBG(printf("refcnt now %f, was %f, doing %d iterations\n",refcnt,prefcnt,real_iterations)); prefcnt = refcnt; for (j = 0; j < repeat_count; j++) { flushall(memsize); TIMER_START; do_daxpy(dx,dy,real_iterations,&limit); TIMER_STOP; tmicrosec = TIMER_ELAPSED_US; DBG(fprintf(stderr,"R: %f us. total\n",tmicrosec)); DBG(fprintf(stderr,"R: %f ns. per ref\n",(tmicrosec*1000.0) / refcnt)); compute_stats(i,j,refcnt,(double)sizeof(double),opcnt, tmicrosec,real_iterations, (limit+limit)*(int)sizeof(double), (limit+limit)*(int)sizeof(double), limit,limit); } } free(dx); free(dy); } } if (type & VECTORMATRIX) { int lda; print_header(type, "GEMV Cache Test"); if (type & SINGLEPRECISION) { float *sa, *sx, *sy; dim = compute_gemv_dimension(memsize,(int)sizeof(float)); DBG(fprintf(stderr,"Max dimension in bytes is %d\n",dim*(int)sizeof(float))); DBG(fprintf(stderr,"Total bytes used %d out of %d\n", ((dim*dim)+dim+dim)*(int)sizeof(float),memsize)); assert(sa = (float *)malloc(dim*dim*(int)sizeof(float))); assert(sx = (float *)malloc(dim*(int)sizeof(float))); assert(sy = (float *)malloc(dim*(int)sizeof(float))); memset(sa,0x00,dim*dim*(int)sizeof(float)); memset(sx,0x00,dim*(int)sizeof(float)); memset(sy,0x00,dim*(int)sizeof(float)); initall_flt(sa,dim*dim); initall_flt(sx,dim); initall_flt(sy,dim); for (i = 0; i < timeslots; i++) { limit = compute_gemv_dimension(sizes[i],(int)sizeof(float)); DBG(fprintf(stderr,"Max dimension in floats is %d\n",limit)); DBG(fprintf(stderr,"Cache size is %d bytes, %d floats\n", sizes[i],sizes[i]/(int)sizeof(float))); if ((type & CONSTANTITERATIONS) == 0) { real_iterations = (i == 0 ? iterations : (int)(prefcnt/(2*limit*limit + 2*limit))); if (real_iterations < LOWEST_ITERATION_COUNT) real_iterations = LOWEST_ITERATION_COUNT; } refcnt = (double)real_iterations*(2*limit*limit + 2*limit); opcnt = (double)real_iterations*(2*limit*limit + 2*limit); DBG(printf("refcnt now %f, was %f, doing %d iterations\n",refcnt,prefcnt,real_iterations)); prefcnt = refcnt; lda = ((type & HOLDLDA) ? dim : limit); for (j = 0; j < repeat_count; j++) { flushall(memsize); TIMER_START; do_sgemv(sa,sx,sy,real_iterations,&limit,&lda); TIMER_STOP; tmicrosec = TIMER_ELAPSED_US; DBG(fprintf(stderr,"R: %f us. total\n",tmicrosec)); DBG(fprintf(stderr,"R: %f ns. per ref\n",(tmicrosec*1000.0) / refcnt)); compute_stats(i,j,refcnt,(double)sizeof(float),opcnt, tmicrosec,real_iterations, ((limit*limit)+limit+limit)*(int)sizeof(float), ((lda*lda)+lda+lda)*(int)sizeof(float), limit,lda); } } free(sa); free(sx); free(sy); } else { double *da, *dx, *dy; dim = compute_gemv_dimension(memsize,(int)sizeof(double)); DBG(fprintf(stderr,"Max dimension in bytes is %d\n",dim*(int)sizeof(double))); DBG(fprintf(stderr,"Total bytes used %d out of %d\n", ((dim*dim)+dim+dim)*(int)sizeof(double),memsize)); assert(da = (double *)malloc(dim*dim*(int)sizeof(double))); assert(dx = (double *)malloc(dim*(int)sizeof(double))); assert(dy = (double *)malloc(dim*(int)sizeof(double))); memset(da,0x00,dim*dim*(int)sizeof(double)); memset(dx,0x00,dim*(int)sizeof(double)); memset(dy,0x00,dim*(int)sizeof(double)); initall_dbl(da,dim*dim); initall_dbl(dx,dim); initall_dbl(dy,dim); for (i = 0; i < timeslots; i++) { limit = compute_gemv_dimension(sizes[i],(int)sizeof(double)); DBG(fprintf(stderr,"Max dimension in doubles is %d\n",limit)); DBG(fprintf(stderr,"Cache size is %d bytes, %d doubles\n", sizes[i],sizes[i]/(int)sizeof(double))); if ((type & CONSTANTITERATIONS) == 0) { real_iterations = (i == 0 ? iterations : (int)(prefcnt/(2*limit*limit + 2*limit))); if (real_iterations < LOWEST_ITERATION_COUNT) real_iterations = LOWEST_ITERATION_COUNT; } refcnt = (double)real_iterations*(2*limit*limit + 2*limit); opcnt = (double)real_iterations*(2*limit*limit + 2*limit); DBG(printf("refcnt now %f, was %f, doing %d iterations\n",refcnt,prefcnt,real_iterations)); prefcnt = refcnt; lda = ((type & HOLDLDA) ? dim : limit); for (j = 0; j < repeat_count; j++) { flushall(memsize); TIMER_START; do_dgemv(da,dx,dy,real_iterations,&limit,&lda); TIMER_STOP; tmicrosec = TIMER_ELAPSED_US; DBG(fprintf(stderr,"R: %f us. total\n",tmicrosec)); DBG(fprintf(stderr,"R: %f ns. per ref\n",(tmicrosec*1000.0) / refcnt)); compute_stats(i,j,refcnt,(double)sizeof(double),opcnt, tmicrosec,real_iterations, ((limit*limit)+limit+limit)*(int)sizeof(double), ((lda*lda)+lda+lda)*(int)sizeof(double), limit,lda); } } free(da); free(dx); free(dy); } } if (type & MATRIXMATRIX) { int lda; print_header(type, "GEMM Cache Test"); if (type & SINGLEPRECISION) { float *sa, *sb, *sc; dim = compute_gemm_dimension(memsize,(int)sizeof(float)); DBG(fprintf(stderr,"Max dimension in bytes is %d\n",dim*(int)sizeof(float))); DBG(fprintf(stderr,"Total bytes used %d out of %d\n",(3*dim*dim)*(int)sizeof(float),memsize)); assert(sa = (float *)malloc(dim*dim*(int)sizeof(float))); assert(sb = (float *)malloc(dim*dim*(int)sizeof(float))); assert(sc = (float *)malloc(dim*dim*(int)sizeof(float))); memset(sa,0x00,dim*dim*(int)sizeof(float)); memset(sb,0x00,dim*dim*(int)sizeof(float)); memset(sc,0x00,dim*dim*(int)sizeof(float)); initall_flt(sa,dim*dim); initall_flt(sa,dim*dim); initall_flt(sa,dim*dim); for (i = 0; i < timeslots; i++) { limit = compute_gemm_dimension(sizes[i],(int)sizeof(float)); DBG(fprintf(stderr,"Max dimension in floats is %d\n",limit)); DBG(fprintf(stderr,"Cache size is %d bytes, %d floats\n", sizes[i],sizes[i]/(int)sizeof(float))); if ((type & CONSTANTITERATIONS) == 0) { real_iterations = (i == 0 ? iterations : (int)(prefcnt/(2*limit*limit*limit+2*limit*limit))); /* For each of n*n elements, we have 2*n + 1 reads, 1 write */ if (real_iterations < LOWEST_ITERATION_COUNT) real_iterations = LOWEST_ITERATION_COUNT; } refcnt = (double)real_iterations*(2*limit*limit*limit+2*limit*limit); opcnt = (double)real_iterations*(2*limit*limit*limit+2*limit*limit); DBG(printf("refcnt now %f, was %f, doing %d iterations\n",refcnt,prefcnt,real_iterations)); prefcnt = refcnt; lda = ((type & HOLDLDA) ? dim : limit); for (j = 0; j < repeat_count; j++) { flushall(memsize); TIMER_START; do_sgemm(sa,sb,sc,real_iterations,&limit,&lda); TIMER_STOP; tmicrosec = TIMER_ELAPSED_US; DBG(fprintf(stderr,"R: %f us. total\n",tmicrosec)); DBG(fprintf(stderr,"R: %f ns. per ref\n",(tmicrosec*1000.0) / refcnt)); compute_stats(i,j,refcnt,(double)sizeof(float),opcnt, tmicrosec,real_iterations, (3*limit*limit)*(int)sizeof(float), (3*lda*lda)*(int)sizeof(float), limit,lda); } } free(sa); free(sb); free(sc); } else { double *da, *db, *dc; dim = compute_gemm_dimension(memsize,(int)sizeof(double)); DBG(fprintf(stderr,"Max dimension in bytes is %d\n",dim*(int)sizeof(double))); DBG(fprintf(stderr,"Total bytes used %d out of %d\n",(3*dim*dim)*(int)sizeof(double),memsize)); assert(da = (double *)malloc(dim*dim*(int)sizeof(double))); assert(db = (double *)malloc(dim*dim*(int)sizeof(double))); assert(dc = (double *)malloc(dim*dim*(int)sizeof(double))); memset(da,0x00,dim*dim*(int)sizeof(double)); memset(db,0x00,dim*dim*(int)sizeof(double)); memset(dc,0x00,dim*dim*(int)sizeof(double)); initall_dbl(da,dim*dim); initall_dbl(db,dim*dim); initall_dbl(dc,dim*dim); for (i = 0; i < timeslots; i++) { limit = compute_gemm_dimension(sizes[i],(int)sizeof(double)); DBG(fprintf(stderr,"Max dimension in doubles is %d\n",limit)); DBG(fprintf(stderr,"Cache size is %d bytes, %d doubles\n", sizes[i],sizes[i]/(int)sizeof(double))) if ((type & CONSTANTITERATIONS) == 0) { real_iterations = (i == 0 ? iterations : (int)(prefcnt/((limit*limit)*((2*limit)+2)))); /* For each of n*n elements, we have 2*n + 1 reads, 1 write */ if (real_iterations < LOWEST_ITERATION_COUNT) real_iterations = LOWEST_ITERATION_COUNT; } refcnt = (double)real_iterations*(limit*limit)*((2.0*limit)+2.0); opcnt = (double)real_iterations*(2.0*limit*limit*limit+2.0*limit*limit); DBG(printf("refcnt now %f, was %f, doing %d iterations\n",refcnt,prefcnt,real_iterations)); prefcnt = refcnt; lda = ((type & HOLDLDA) ? dim : limit); for (j = 0; j < repeat_count; j++) { flushall(memsize); TIMER_START; do_dgemm(da,db,dc,real_iterations,&limit,&lda); TIMER_STOP; tmicrosec = TIMER_ELAPSED_US; DBG(fprintf(stderr,"R: %f us. total\n",tmicrosec)); DBG(fprintf(stderr,"R: %f ns. per ref\n",(tmicrosec*1000.0) / refcnt)); compute_stats(i,j,refcnt,(double)sizeof(double),opcnt, tmicrosec,real_iterations, (3*limit*limit)*(int)sizeof(double), (3*lda*lda)*(int)sizeof(double), limit,lda); } } free(da); free(db); free(dc); } }