// ------------------------------------------------------------- // MatTranspose_DenseGA // ------------------------------------------------------------- static PetscErrorCode MatTranspose_DenseGA(Mat mat, MatReuse reuse, Mat *B) { PetscErrorCode ierr = 0; MPI_Comm comm; ierr = PetscObjectGetComm((PetscObject)mat, &comm); CHKERRQ(ierr); struct MatGACtx *ctx, *newctx; ierr = MatShellGetContext(mat, &ctx); CHKERRQ(ierr); PetscInt lrows, grows, lcols, gcols; ierr = MatGetSize(mat, &grows, &gcols); CHKERRQ(ierr); ierr = MatGetLocalSize(mat, &lrows, &lcols); CHKERRQ(ierr); ierr = PetscMalloc(sizeof(struct MatGACtx), &newctx); CHKERRQ(ierr); newctx->gaGroup = ctx->gaGroup; ierr = CreateMatGA(newctx->gaGroup, lcols, lrows, gcols, grows, &(newctx->ga)); CHKERRQ(ierr); GA_Transpose(ctx->ga, newctx->ga); ierr = MatCreateShell(comm, lcols, lrows, gcols, grows, newctx, B); CHKERRQ(ierr); ierr = MatSetOperations_DenseGA(*B); return ierr; }
main(int argc, char **argv) { int rank, nprocs, i, j; int g_A, g_B, **local_value=NULL; int dims[DIM]={SIZE,SIZE}, lo[DIM]={SIZE-SIZE,SIZE-SIZE}, hi[DIM]={SIZE-1,SIZE-1}, ld=SIZE; local_value=(int**)malloc(SIZE*sizeof(int*)); for(i=0; i<SIZE; i++) local_value[i]=(int*)malloc(SIZE*sizeof(int)); MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MA_init(C_INT, 1000, 1000); GA_Initialize(); g_A = NGA_Create(C_INT, DIM, dims, "array_A", NULL); g_B = NGA_Create(C_INT, DIM, dims, "array_B", NULL); for(i=0; i<SIZE; i++) for(j=0; j<SIZE; j++) local_value[i][j]=rand()%10; if(rank==0) NGA_Put(g_A, lo, hi, local_value, &ld); GA_Transpose(g_A, g_B); if(rank==0) validate_transpose(g_A, g_B, lo, hi, ld); GA_Sync(); if(rank == 1) GA_PRINT_MSG(); GA_Terminate(); MPI_Finalize(); }
void do_work() { int ONE=1 ; /* useful constants */ int g_a, g_b; int n=N, type=MT_F_DBL; int me=GA_Nodeid(), nproc=GA_Nnodes(); int i, row; int dims[2]={N,N}; int lo[2], hi[2], ld; /* Note: on all current platforms DoublePrecision == double */ double buf[N], err, alpha, beta; if(me==0)printf("Creating matrix A\n"); g_a = NGA_Create(type, 2, dims, "A", NULL); if(!g_a) GA_Error("create failed: A",n); if(me==0)printf("OK\n"); if(me==0)printf("Creating matrix B\n"); /* create matrix B so that it has dims and distribution of A*/ g_b = GA_Duplicate(g_a, "B"); if(! g_b) GA_Error("duplicate failed",n); if(me==0)printf("OK\n"); GA_Zero(g_a); /* zero the matrix */ if(me==0)printf("Initializing matrix A\n"); /* fill in matrix A with random values in range 0.. 1 */ lo[1]=0; hi[1]=n-1; for(row=me; row<n; row+= nproc){ /* each process works on a different row in MIMD style */ lo[0]=hi[0]=row; for(i=0; i<n; i++) buf[i]=sin((double)i + 0.1*(row+1)); NGA_Put(g_a, lo, hi, buf, &n); } if(me==0)printf("Symmetrizing matrix A\n"); GA_Symmetrize(g_a); /* symmetrize the matrix A = 0.5*(A+A') */ /* check if A is symmetric */ if(me==0)printf("Checking if matrix A is symmetric\n"); GA_Transpose(g_a, g_b); /* B=A' */ alpha=1.; beta=-1.; GA_Add(&alpha, g_a, &beta, g_b, g_b); /* B= A - B */ err= GA_Ddot(g_b, g_b); if(me==0)printf("Error=%f\n",(double)err); if(me==0)printf("\nChecking atomic accumulate \n"); GA_Zero(g_a); /* zero the matrix */ for(i=0; i<n; i++) buf[i]=(double)i; /* everybody accumulates to the same location/row */ alpha = 1.0; row = n/2; lo[0]=hi[0]=row; lo[1]=0; hi[1]=n-1; ld = hi[1]-lo[1]+1; NGA_Acc(g_a, lo, hi, buf, &ld, &alpha ); GA_Sync(); if(me==0){ /* node 0 is checking the result */ NGA_Get(g_a, lo, hi, buf,&ld); for(i=0; i<n; i++) if(buf[i] != (double)nproc*i) GA_Error("failed: column=",i); printf("OK\n\n"); } GA_Destroy(g_a); GA_Destroy(g_b); }
/* input is matrix size */ void ga_lu(double *A, int matrix_size) { int g_a, g_b, dims[2], type=C_DBL; int lo[2], hi[2], ld; int block_size[2], proc_grid[2]; double time, gflops; /* create a 2-d GA (global matrix) */ dims[0] = matrix_size; dims[1] = matrix_size; block_size[0] = BLOCK_SIZE; block_size[1] = BLOCK_SIZE; #ifdef USE_SCALAPACK_DISTR proc_grid[0] = 2; proc_grid[1] = nprocs/2; if(nprocs%2) GA_Error("For ScaLAPACK stle distribution, nprocs must be " " divisible by 2", 0); #endif #ifndef BLOCK_CYCLIC g_a = NGA_Create(type, 2, dims, "A", NULL); g_b = GA_Duplicate(g_a, "transposed array B"); #else g_a = GA_Create_handle(); GA_Set_data(g_a, 2, dims, type); GA_Set_array_name(g_a,"A"); # ifdef USE_SCALAPACK_DISTR GA_Set_block_cyclic_proc_grid(g_a, block_size, proc_grid); # else GA_Set_block_cyclic(g_a, block_size); # endif GA_Allocate(g_a); g_b = GA_Create_handle(); GA_Set_data(g_b, 2, dims, type); GA_Set_array_name(g_b,"B"); # ifdef USE_SCALAPACK_DISTR GA_Set_block_cyclic_proc_grid(g_b, block_size, proc_grid); # else GA_Set_block_cyclic(g_b, block_size); # endif GA_Allocate(g_b); #endif /* copy the local matrix into GA */ if(me==0) { lo[0] = 0; hi[0] = matrix_size - 1; lo[1] = 0; hi[1] = matrix_size - 1; ld = matrix_size; NGA_Put(g_a, lo, hi, A, &ld); } GA_Sync(); GA_Transpose(g_a, g_b); time = CLOCK_(); GA_Lu('n', g_b); time = CLOCK_() - time; /* 2/3 N^3 - 1/2 N^2 flops for LU and 2*N^2 for solver */ gflops = ( (((double)matrix_size) * matrix_size)/(time*1.0e+9) * (2.0/3.0 * (double)matrix_size - 0.5) ); if(me==0) printf("\nGA_Lu: N=%d flops=%2.5e Gflops, time=%2.5e secs\n\n", matrix_size, gflops, time); #if DEBUG GA_Print(g_a); GA_Print(g_b); #endif /* if(me==0) lu(A, matrix_size); */ GA_Destroy(g_a); GA_Destroy(g_b); }