irregular_array1(int rank) { int g_A, g_B; int dims[DIM]={5,10}, dims2[DIM], ndim, type, value=5, block[DIM]={2,3}, map[5]={0,2,0,4,6}, val=7; int n_block[DIM], block_dims[DIM], i; g_A = NGA_Create(C_INT, DIM, dims, "array_A", NULL); g_B = NGA_Create_irreg(C_INT, DIM, dims, "array_B", block, map); GA_Fill(g_A, &value); GA_Print(g_A); GA_Fill(g_B, &val); GA_Print(g_B); GA_Sync(); NGA_Inquire(g_A, &type, &ndim, dims2); //printf(" %d -- %d,,\n", type, ndim); /* GA_Get_block_info(g_B, n_block, block_dims); for(i=0; i<DIM; i++) printf(" %d: %d ___ %d --- \n", rank, n_block[i], block_dims[i]); */ GA_Destroy(g_A); GA_Destroy(g_B); }
irregular_array2(int rank) { int g_A, g_B; int dims[DIM]={GSIZE,GSIZE}, dims2[DIM], block[DIM]={3,2}, map[5]={0,2,6,0,4}, val_A=4, val_B=7; int n_block[DIM], block_dims[DIM], i; g_A = NGA_Create(C_INT, DIM, dims, "array_A", NULL); g_B = NGA_Create_irreg(C_INT, DIM, dims, "array_B", block, map); GA_Fill(g_A, &val_A); GA_Print(g_A); GA_Fill(g_B, &val_B); GA_Print(g_B); GA_Sync(); /* GA_Get_block_info(g_B, n_block, block_dims); for(i=0; i<DIM; i++) printf(" %d: %d ___ %d --- \n", rank, n_block[i], block_dims[i]); */ GA_Destroy(g_A); GA_Destroy(g_B); }
auto_number2(int rank, int nprocs) { int g_A, g_B; int dims[DIM]={GSIZE, GSIZE}, dims2[DIM], block[DIM], *map=NULL, val=7; int n_block[DIM], block_dims[DIM], b_temp, i; int b1, b2, inc=0; do{ b2=DIM+inc; b1=nprocs/b2; inc++; }while(nprocs/b2>=GSIZE); block[0]=b1; block[1]=b2; map=(int*)malloc(nprocs*sizeof(int)); for(i=0; i<b1; i++) map[i]=i; for(i=b1; i<(b2+b1); i++) map[i]=i-b1; if(rank==0) { for(i=0; i<(b1+b2); i++) printf("map[%d] - %d\n", i, map[i]); for(i=0; i<DIM; i++) printf("BLOCK[%d] - %d\n", i, block[i]); } g_B = NGA_Create_irreg(C_INT, DIM, dims, "array_B", block, map); GA_Fill(g_B, &val); GA_Print(g_B); GA_Sync(); if(rank==1) { GA_Get_block_info(g_B, n_block, block_dims); for(i=0; i<DIM; i++) printf(" %d: %d --- %d ... %d\n", rank, n_block[i], block_dims[i], b_temp); } GA_Destroy(g_B); }
int schwartz_screening(PFock_t pfock, BasisSet_t basis) { int myrank; MPI_Comm_rank(MPI_COMM_WORLD, &myrank); // create shell pairs values //ERD_t erd; int nthreads = omp_get_max_threads(); //CInt_createERD(basis, &erd, nthreads); int nshells = pfock->nshells; // create global arrays for screening int nprow = pfock->nprow; int npcol = pfock->npcol; int dims[2]; int block[2]; int map[nprow + npcol]; for (int i = 0; i < nprow; i++) { map[i] = pfock->rowptr_sh[i]; } for (int i = 0; i < npcol; i++) { map[i + nprow] = pfock->colptr_sh[i]; } dims[0] = nshells; dims[1] = nshells; block[0] = nprow; block[1] = npcol; pfock->ga_screening = NGA_Create_irreg(C_DBL, 2, dims, "array Screening", block, map); if (0 == pfock->ga_screening) { return -1; } // compute the max shell value double *sq_values = (double *)PFOCK_MALLOC(sizeof(double) * pfock->nshells_row * pfock->nshells_col); if (NULL == sq_values) { return -1; } int startM = pfock->sshell_row; int startN = pfock->sshell_col; int endM = pfock->eshell_row; int endN = pfock->eshell_col; double maxtmp = 0.0; #pragma omp parallel { int tid = omp_get_thread_num(); #pragma omp for reduction(max:maxtmp) for (int M = startM; M <= endM; M++) { int dimM = CInt_getShellDim(basis, M); for (int N = startN; N <= endN; N++) { int dimN = CInt_getShellDim(basis, N); double *integrals; int nints= ComputeShellQuartet(basis,tid,M,N,M,N,&integrals); //CInt_computeShellQuartet(basis, erd, tid, M, N, M, N, // &integrals, &nints); double maxvalue = 0.0; if (nints != 0) { for (int iM = 0; iM < dimM; iM++) { for (int iN = 0; iN < dimN; iN++) { int index = iM * (dimN*dimM*dimN+dimN) + iN * (dimM*dimN+1); if (maxvalue < fabs(integrals[index])) { maxvalue = fabs(integrals[index]); } } } } sq_values[(M - startM) * (endN - startN + 1) + (N - startN)] = maxvalue; if (maxvalue > maxtmp) { maxtmp = maxvalue; } } } } int lo[2]; int hi[2]; lo[0] = startM; hi[0] = endM; lo[1] = startN; hi[1] = endN; int ld = endN - startN + 1; NGA_Put(pfock->ga_screening, lo, hi, sq_values, &ld); // max value MPI_Allreduce(&maxtmp, &(pfock->maxvalue), 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); //CInt_destroyERD(erd); PFOCK_FREE(sq_values); // init shellptr sq_values = (double *)PFOCK_MALLOC(sizeof(double) * nshells); if (NULL == sq_values) { return -1; } int nnz = 0; double eta = pfock->tolscr2 / pfock->maxvalue; pfock->shellptr = (int *)PFOCK_MALLOC(sizeof(int) * (nshells + 1)); pfock->mem_cpu += 1.0 * sizeof(int) * (nshells + 1); if (NULL == pfock->shellptr) { return -1; } memset(pfock->shellptr, 0, sizeof(int) * (nshells + 1)); for (int M = 0; M < nshells; M++) { pfock->shellptr[M] = nnz; lo[0] = M; hi[0] = M; lo[1] = 0; hi[1] = nshells - 1; ld = nshells; NGA_Get(pfock->ga_screening, lo, hi, sq_values, &ld); for (int N = 0; N < nshells; N++) { double maxvalue = sq_values[N]; if (maxvalue > eta) { if (M > N && (M + N) % 2 == 1 || M < N && (M + N) % 2 == 0) { continue; } else { nnz++; } } } pfock->shellptr[M + 1] = nnz; } pfock->nnz = nnz; double maxvalue; pfock->shellvalue = (double *)PFOCK_MALLOC(sizeof(double) * nnz); pfock->shellid = (int *)PFOCK_MALLOC(sizeof(int) * nnz); pfock->shellrid = (int *)PFOCK_MALLOC(sizeof(int) * nnz); pfock->mem_cpu += 1.0 * sizeof(double) * nnz + 2.0 * sizeof(int) * nnz; nshells = pfock->nshells; if (pfock->shellvalue == NULL || pfock->shellid == NULL || pfock->shellrid == NULL) { return -1; } nnz = 0; for (int A = 0; A < nshells; A++) { pfock->shellptr[A] = nnz; lo[0] = A; hi[0] = A; lo[1] = 0; hi[1] = nshells - 1; ld = nshells; NGA_Get(pfock->ga_screening, lo, hi, sq_values, &ld); for (int B = 0; B < nshells; B++) { maxvalue = sq_values[B]; if (maxvalue > eta) { if (A > B && (A + B) % 2 == 1 || A < B && (A + B) % 2 == 0) continue; if (A == B) { pfock->shellvalue[nnz] = maxvalue; } else { pfock->shellvalue[nnz] = -maxvalue; } pfock->shellid[nnz] = B; pfock->shellrid[nnz] = A; nnz++; } } } PFOCK_FREE(sq_values); GA_Destroy(pfock->ga_screening); return 0; }
void do_work() { int g_a, g_b; int me=GA_Nodeid(), nproc=GA_Nnodes(), proc, loop; int dims[NDIM], lo[NDIM], hi[NDIM], block[NDIM], ld[NDIM-1]; int i,d,*proclist, offset; int adims[NDIM], ndim,type; typedef struct { int lo[NDIM]; int hi[NDIM]; } patch_t; patch_t *regions; int *map; double *buf; /***** create array A with default distribution *****/ if(me==0){printf("Creating array A\n"); fflush(stdout);} for(i = 0; i<NDIM; i++)dims[i] = N*(i+1); #ifdef NEW_API g_a = GA_Create_handle(); GA_Set_data(g_a,NDIM,dims,MT_F_DBL); GA_Set_array_name(g_a,"array A"); (void)GA_Allocate(g_a); #else g_a = NGA_Create(MT_F_DBL, NDIM, dims, "array A", NULL); #endif if(!g_a) GA_Error("create failed: A",0); if(me==0)printf("OK\n\n"); /* print info about array we got */ NGA_Inquire(g_a, &type, &ndim, adims); GA_Print_distribution(g_a); GA_Sync(); /* duplicate array A with ga_create irreg rather than ga_duplicate * -- want to show distribution control * -- with ga_duplicate it would be g_b=GA_Duplicate(g_a,name) */ if(me==0)printf("\nReconstructing distribution description for A\n"); /* get memory for arrays describing distribution */ proclist = (int*)malloc(nproc*sizeof(int)); if(!proclist)GA_Error("malloc failed for proclist",0); regions = (patch_t*)malloc(nproc*sizeof(patch_t)); if(!regions)GA_Error("malloc failed for regions",0); map = (int*)malloc((nproc+ndim)*sizeof(int)); /* ubound= nproc+mdim */ if(!map)GA_Error("malloc failed for map",0); /* first find out how array g_a is distributed */ for(i=0;i<ndim;i++)lo[i]=BASE; for(i=0;i<ndim;i++)hi[i]=adims[i] -1 + BASE; proc = NGA_Locate_region(g_a, lo, hi, (int*)regions, proclist); if(proc<1) GA_Error("error in NGA_Locate_region",proc); /* determine blocking for each dimension */ for(i=0;i<ndim;i++)block[i]=0; for(i=0;i<ndim;i++)adims[i]=0; offset =0; for(d=0; d<ndim; d++) for(i=0;i<proc;i++) if( regions[i].hi[d]>adims[d] ){ map[offset] = regions[i].lo[d]; offset++; block[d]++; adims[d]= regions[i].hi[d]; } if(me==0){ printf("Distribution map contains %d elements\n",offset); print_subscript("number of blocks for each dimension",ndim,block,"\n"); print_subscript("distribution map",offset,map,"\n\n"); fflush(stdout); } if(me==0)printf("Creating array B applying distribution of A\n"); # ifdef USE_DUPLICATE g_b = GA_Duplicate(g_a,"array B"); # else g_b = NGA_Create_irreg(MT_F_DBL, NDIM, dims, "array B", block,map); # endif if(!g_b) GA_Error("create failed: B",0); if(me==0)printf("OK\n\n"); free(proclist); free(regions); free(map); GA_Print_distribution(g_b); GA_Sync(); if(me==0){ printf("\nCompare distributions of A and B\n"); if(GA_Compare_distr(g_a,g_b)) printf("Failure: distributions NOT identical\n"); else printf("Success: distributions identical\n"); fflush(stdout); } if(me==0){ printf("\nAccessing local elements of A: set them to the owner process id\n"); fflush(stdout); } GA_Sync(); NGA_Distribution(g_a,me,lo,hi); if(hi[0]>=0){/* -1 means no elements stored on this processor */ double *ptr; int locdim[NDIM]; NGA_Access(g_a, lo,hi, &ptr, ld); for(i=0;i<ndim;i++)locdim[i]=hi[i]-lo[i]+1; fill_patch(ptr, locdim, ld, ndim,(double)me); } for(i=0;i<nproc; i++){ if(me==i && hi[0]>=0){ char msg[100]; sprintf(msg,"%d: leading dimensions",me); print_subscript(msg,ndim-1,ld,"\n"); fflush(stdout); } GA_Sync(); } GA_Sync(); if(me==0)printf("\nRandomly checking the update using ga_get on array sections\n"); GA_Sync(); /* show ga_get working and verify array updates * every process does N random gets * for simplicity get only a single row at a time */ srand(me); /* different seed for every process */ hi[ndim-1]=adims[ndim-1] -1 + BASE; for(i=1;i<ndim-1; i++)ld[i]=1; ld[ndim-2]=adims[ndim-1] -1 + BASE; /* get buffer memory */ buf = (double*)malloc(adims[ndim-1]*sizeof(double)); if(!buf)GA_Error("malloc failed for buf",0); /* half of the processes check the result */ if(me<=nproc/2) for(loop = 0; loop< N; loop++){ /* task parallel loop */ lo[ndim-1]=BASE; for (i= 0; i < ndim -1; i ++){ lo[i] = hi[i] = rand()%adims[i]+BASE; } /* print_subscript("getting",ndim,lo,"\n");*/ NGA_Get(g_a,lo,hi,buf,ld); /* check values */ for(i=0;i<adims[ndim-1]; i++){ int p = NGA_Locate(g_a, lo); if((double)p != buf[i]) { char msg[100]; sprintf(msg,"%d: wrong value: %d != %lf a",me, p, buf[i]); print_subscript(msg,ndim,lo,"\n"); GA_Error("Error - bye",i); } lo[ndim-1]++; } } free(buf); GA_Sync(); if(me==0)printf("OK\n"); GA_Destroy(g_a); GA_Destroy(g_b); }