int main(int argc, char **argv) { int m[ROWS][COLS] = {{1, 2, 3}, {4, 5, 6}}; int n[COLS][ROWS]; print(*m, ROWS, COLS); //transpose(*m, *n, ROWS, COLS); transpose2(*m, *n); print(*n, COLS, ROWS); return 0; }
int main(int argc, char **argv) { int opt, exp = -1, var = -1; bool err = false; while ((opt = getopt(argc, argv, "n:v:")) != -1) { if (opt == 'n') exp = atoi(optarg); else if (opt == 'v') var = atoi(optarg); else err = true; } if (err || exp < 0 || var < 0 || var >= 2) { fprintf(stderr, "Usage: %s -n log2(size) -v variant\n", argv[0]); return 1; } int n = 1 << exp; size_t size = n * n * sizeof(int); int *src = NULL, *dst = NULL; posix_memalign((void **)&src, getpagesize(), size); posix_memalign((void **)&dst, getpagesize(), size); printf("Generate matrix %d x %d (%ld KiB)\n", n, n, size >> 10); fill(src, n); bzero(dst, size); flush_cache(); printf("Performing matrix transposition.\n"); _timer_t timer; timer_reset(&timer); timer_start(&timer); if (var == 0) transpose1(dst, src, n); else transpose2(dst, src, n); timer_stop(&timer); timer_print(&timer); free(src); free(dst); return 0; }
int main(int argc, char *argv[]) { int nx = 512, ny = 512, mx = 16, my = 16; int nblock = 64; int j, k, irc; float eps, epsmax; double dtime; struct timeval itime; float *a2 = NULL, *b2 = NULL, *c2 = NULL; float *g_a2 = NULL, *g_b2 = NULL; /* allocate host data */ a2 = (float *) malloc(ny*nx*sizeof(float)); b2 = (float *) malloc(nx*ny*sizeof(float)); c2 = (float *) malloc(ny*nx*sizeof(float)); /* set up GPU */ irc = 0; setgbsize(nblock); init_cu(0,&irc); if (irc != 0) { printf("CUDA initialization error!\n"); exit(1); } /* allocate 2d data on GPU */ gpu_fallocate(&g_a2,ny*nx,&irc); gpu_fallocate(&g_b2,nx*ny,&irc); if (irc != 0) { printf("GPU allocate error!\n"); exit(1); } /* initialize 2d data on host */ for (k = 0; k < ny; k++) { for (j = 0; j < nx; j++) { b2[j+nx*k] = (float) (j + nx*k + 1); a2[k+ny*j] = 0.0; } } gpu_fcopyin(a2,g_a2,ny*nx); /* measure overhead time by running empty kernel */ dtimer(&dtime,&itime,-1); emptykernel(); dtimer(&dtime,&itime,1); printf("C empty kernel time=%e\n",(float)dtime); /* segmented 2d transpose on host with block size mx, my */ dtimer(&dtime,&itime,-1); /* transpose0(a2,b2,nx,ny); */ transpose2(a2,b2,mx,my,nx,ny); dtimer(&dtime,&itime,1); printf("C 2d transpose time=%e\n",(float)dtime); /* 2d transpose on GPU with block size mx, mx */ gpu_fcopyin(b2,g_b2,nx*ny); dtimer(&dtime,&itime,-1); gpu_transpose2(g_a2,g_b2,mx,nx,ny); dtimer(&dtime,&itime,1); printf("GPU 2d transpose time=%e\n",(float)dtime); gpu_fcopyout(c2,g_a2,nx*ny); /* Check for correctness: compare a2 and g_a2 */ epsmax = 0.0; for (k = 0; k < ny; k++) { for (j = 0; j < nx; j++) { eps = a2[j+nx*k] - c2[j+nx*k]; if (eps < 0.0) eps = -eps; if (eps > epsmax) epsmax = eps; } } printf("2d transpose maximum difference = %e\n",epsmax); /* deallocate memory on GPU */ gpu_deallocate((void *)g_a2,&irc); gpu_deallocate((void *)g_b2,&irc); /* close down GPU */ end_cu(); return 0; }
void rotate (void *iadr, XLONG **oadr, int type, int nx, int ny, int dir) { int nelem; static int first_time = 1, sv_nx=0, sv_ny=0;; static ulong *bufl = (ulong *) NULL; static ushort *bufs = (ushort *) NULL; static uchar *bufc = (uchar *) NULL; if ((nx*ny) != (sv_nx*sv_ny)) { if (type == 1 && bufc) free ((uchar *)bufc); if (type == 2 && bufs) free ((ushort *)bufs); if (type == 4 && bufl) free ((ulong *)bufl); sv_nx = nx; sv_ny = ny; first_time = 1; } nelem = nx * ny; switch (type) { case (1): /* uchar */ ubip = (uchar *) iadr; if (first_time) { ubop = bufc = (uchar *) calloc (1, nelem * sizeof(uchar)); *oadr = (XLONG *) ubop; first_time = 0; } else { ubop = (uchar *)bufc; *oadr = (XLONG *) bufc; } break; case (2): /* ushort */ usip = (ushort *) iadr; if (first_time) { usop = bufs = (ushort *) calloc (1, nelem * sizeof(ushort)); *oadr = (XLONG *) usop; first_time = 0; } else { usop = (ushort *)bufs; *oadr = (XLONG *) bufs; } break; case (4): /* ulong */ ulip = (ulong *) iadr; if (first_time) { ulop = bufl = (ulong *) calloc (1, nelem * sizeof(ulong)); *oadr = (XLONG *) ulop; first_time = 0; } else { ulop = (ulong *)bufl; *oadr = (XLONG *) bufl; } break; default: ; break; } switch (dir) { case (1): /* flip x axis */ flipx(type, nx, ny); break; case (2): /* flip y axis */ flipy(type, nx, ny); break; case (3): /* transpose 1st diag */ transpose1(type, nx, ny); break; case (4): /* transpose 2nd diag */ transpose2(type, nx, ny); break; default: break; } }