main(int argc, char **argv) { int rank, nprocs; int g_A, dims[D]={SIZE,SIZE}, *local_A=NULL, *local_G=NULL, **sub_array=NULL, **s_array=NULL; int i, j, value=5; MPI_Init(&argc, &argv); GA_Initialize(); MA_init(C_INT, 1000, 1000); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); s_array=(int**)malloc(N*sizeof(int*)); for(i=0; i<N; i++) { s_array[i]=(int*)malloc(D*sizeof(int)); for(j=0; j<D; j++) s_array[i][j]=rand()%10; } sub_array=(int**)malloc(N*sizeof(int*)); for(i=0; i<N; i++) { sub_array[i]=(int*)malloc(D*sizeof(int)); for(j=0; j<D; j++) sub_array[i][j]=rand()%10; } for(i=0; i<N; i++) local_A=(int*)malloc(N*sizeof(int)); for(i=0; i<N; i++) local_G=(int*)malloc(N*sizeof(int)); g_A=NGA_Create(C_INT, D, dims, "array_A", NULL); GA_Fill(g_A, &value); GA_Sync(); NGA_Scatter(g_A, local_A, s_array, N); NGA_Gather(g_A, local_G, s_array, N); GA_Sync(); GA_Print(g_A); if(rank==0) { for(i=0; i<N; i++) if(local_G[i]!=local_A[i]) printf("GA Error: \n"); } GA_Sync(); if(rank==0) GA_PRINT_MSG(); GA_Terminate(); MPI_Finalize(); return 0; }
// ------------------------------------------------------------- // AdjacencyList::ready // ------------------------------------------------------------- void AdjacencyList::ready(void) { #if 1 int grp = this->communicator().getGroup(); int me = GA_Pgroup_nodeid(grp); int nprocs = GA_Pgroup_nnodes(grp); p_adjacency.clear(); p_adjacency.resize(p_global_nodes.size()); // Find total number of nodes and edges. Assume no duplicates int nedges = p_edges.size(); int total_edges = nedges; char plus[2]; strcpy(plus,"+"); GA_Pgroup_igop(grp,&total_edges, 1, plus); int nnodes = p_original_nodes.size(); int total_nodes = nnodes; GA_Pgroup_igop(grp,&total_nodes, 1, plus); // Create a global array containing original indices of all nodes and indexed // by the global index of the node int i, p; int dist[nprocs]; for (p=0; p<nprocs; p++) { dist[p] = 0; } dist[me] = nnodes; GA_Pgroup_igop(grp,dist,nprocs,plus); int *mapc = new int[nprocs+1]; mapc[0] = 0; for (p=1; p<nprocs; p++) { mapc[p] = mapc[p-1] + dist[p-1]; } mapc[nprocs] = total_nodes; int g_nodes = GA_Create_handle(); int dims = total_nodes; NGA_Set_data(g_nodes,1,&dims,C_INT); NGA_Set_pgroup(g_nodes, grp); if (!GA_Allocate(g_nodes)) { char buf[256]; sprintf(buf,"AdjacencyList::ready: Unable to allocate distributed array" " for bus indices\n"); printf(buf); throw gridpack::Exception(buf); } int lo, hi; lo = mapc[me]; hi = mapc[me+1]-1; int size = hi - lo + 1; int o_idx[size], g_idx[size]; for (i=0; i<size; i++) o_idx[i] = p_original_nodes[i]; for (i=0; i<size; i++) g_idx[i] = p_global_nodes[i]; int **indices= new int*[size]; int *iptr = g_idx; for (i=0; i<size; i++) { indices[i] = iptr; iptr++; } if (size > 0) NGA_Scatter(g_nodes,o_idx,indices,size); GA_Pgroup_sync(grp); delete [] indices; delete [] mapc; // Cycle through all nodes and match them up with nodes at end of edges. for (p=0; p<nprocs; p++) { int iproc = (me+p)%nprocs; // Get node data from process iproc NGA_Distribution(g_nodes,iproc,&lo,&hi); size = hi - lo + 1; if (size <= 0) continue; int *buf = new int[size]; int ld = 1; NGA_Get(g_nodes,&lo,&hi,buf,&ld); // Create a map of the nodes from process p std::map<int,int> nmap; std::map<int,int>::iterator it; std::pair<int,int> pr; for (i=lo; i<=hi; i++){ pr = std::pair<int,int>(buf[i-lo],i); nmap.insert(pr); } delete [] buf; // scan through the edges looking for matches. If there is a match, set the // global index int idx; for (i=0; i<nedges; i++) { idx = static_cast<int>(p_edges[i].original_conn.first); it = nmap.find(idx); if (it != nmap.end()) { p_edges[i].global_conn.first = static_cast<Index>(it->second); } idx = static_cast<int>(p_edges[i].original_conn.second); it = nmap.find(idx); if (it != nmap.end()) { p_edges[i].global_conn.second = static_cast<Index>(it->second); } } } GA_Destroy(g_nodes); // All edges now have global indices assigned to them. Begin constructing // adjacency list. Start by creating a global array containing all edges dist[0] = 0; for (p=1; p<nprocs; p++) { double max = static_cast<double>(total_edges); max = (static_cast<double>(p))*(max/(static_cast<double>(nprocs))); dist[p] = 2*(static_cast<int>(max)); } int g_edges = GA_Create_handle(); dims = 2*total_edges; NGA_Set_data(g_edges,1,&dims,C_INT); NGA_Set_irreg_distr(g_edges,dist,&nprocs); NGA_Set_pgroup(g_edges, grp); if (!GA_Allocate(g_edges)) { char buf[256]; sprintf(buf,"AdjacencyList::ready: Unable to allocate distributed array" " for branch indices\n"); printf(buf); throw gridpack::Exception(buf); } // Add edge information to global array. Start by figuring out how much data // is associated with each process for (p=0; p<nprocs; p++) { dist[p] = 0; } dist[me] = nedges; GA_Pgroup_igop(grp,dist, nprocs, plus); int offset[nprocs]; offset[0] = 0; for (p=1; p<nprocs; p++) { offset[p] = offset[p-1] + 2*dist[p-1]; } // Figure out where local data goes in GA and then copy it to GA lo = offset[me]; hi = lo + 2*nedges - 1; int edge_ids[2*nedges]; for (i=0; i<nedges; i++) { edge_ids[2*i] = static_cast<int>(p_edges[i].global_conn.first); edge_ids[2*i+1] = static_cast<int>(p_edges[i].global_conn.second); } if (lo <= hi) { int ld = 1; NGA_Put(g_edges,&lo,&hi,edge_ids,&ld); } GA_Pgroup_sync(grp); // Cycle through all edges and find out how many are attached to the nodes on // your process. Start by creating a map between the global node indices and // the local node indices std::map<int,int> gmap; std::map<int,int>::iterator it; std::pair<int,int> pr; for (i=0; i<nnodes; i++){ pr = std::pair<int,int>(static_cast<int>(p_global_nodes[i]),i); gmap.insert(pr); } // Cycle through edge information on each processor for (p=0; p<nprocs; p++) { int iproc = (me+p)%nprocs; NGA_Distribution(g_edges,iproc,&lo,&hi); int size = hi - lo + 1; int *buf = new int[size]; int ld = 1; NGA_Get(g_edges,&lo,&hi,buf,&ld); BOOST_ASSERT(size%2 == 0); size = size/2; int idx1, idx2; Index idx; for (i=0; i<size; i++) { idx1 = buf[2*i]; idx2 = buf[2*i+1]; it = gmap.find(idx1); if (it != gmap.end()) { idx = static_cast<Index>(idx2); p_adjacency[it->second].push_back(idx); } it = gmap.find(idx2); if (it != gmap.end()) { idx = static_cast<Index>(idx1); p_adjacency[it->second].push_back(idx); } } delete [] buf; } GA_Destroy(g_edges); GA_Pgroup_sync(grp); #else int me(this->processor_rank()); int nproc(this->processor_size()); p_adjacency.clear(); p_adjacency.resize(p_nodes.size()); IndexVector current_indexes; IndexVector connected_indexes; for (int p = 0; p < nproc; ++p) { // broadcast the node indexes owned by process p to all processes, // all processes work on these at once current_indexes.clear(); if (me == p) { std::copy(p_nodes.begin(), p_nodes.end(), std::back_inserter(current_indexes)); // std::cout << me << ": node indexes: "; // std::copy(current_indexes.begin(), current_indexes.end(), // std::ostream_iterator<Index>(std::cout, ",")); // std::cout << std::endl; } boost::mpi::broadcast(this->communicator(), current_indexes, p); // make a copy of the local edges in a list (so it's easier to // remove those completely accounted for) std::list<p_Edge> tmpedges; std::copy(p_edges.begin(), p_edges.end(), std::back_inserter(tmpedges)); // loop over the process p's node index set int local_index(0); for (IndexVector::iterator n = current_indexes.begin(); n != current_indexes.end(); ++n, ++local_index) { // determine the local edges that refer to the current node index connected_indexes.clear(); std::list<p_Edge>::iterator e(tmpedges.begin()); // std::cout << me << ": current node index: " << *n // << ", edges: " << tmpedges.size() // << std::endl; while (e != tmpedges.end()) { if (*n == e->conn.first && e->conn.second != bogus) { connected_indexes.push_back(e->conn.second); e->found.first = true; // std::cout << me << ": found connection: edge " << e->index // << " (" << e->conn.first << ", " << e->conn.second << ")" // << std::endl; } if (*n == e->conn.second && e->conn.first != bogus) { connected_indexes.push_back(e->conn.first); e->found.second = true; // std::cout << me << ": found connection: edge " << e->index // << " (" << e->conn.first << ", " << e->conn.second << ")" // << std::endl; } if (e->found.first && e->found.second) { e = tmpedges.erase(e); } else if (e->conn.first == bogus || e->conn.second == bogus) { e = tmpedges.erase(e); } else { ++e; } } // gather all connections for the current node index to the // node's owner process, we have to gather the vectors because // processes will have different numbers of connections if (me == p) { size_t allsize; boost::mpi::reduce(this->communicator(), connected_indexes.size(), allsize, std::plus<size_t>(), p); std::vector<IndexVector> all_connected_indexes; boost::mpi::gather(this->communicator(), connected_indexes, all_connected_indexes, p); p_adjacency[local_index].clear(); for (std::vector<IndexVector>::iterator k = all_connected_indexes.begin(); k != all_connected_indexes.end(); ++k) { std::copy(k->begin(), k->end(), std::back_inserter(p_adjacency[local_index])); } } else { boost::mpi::reduce(this->communicator(), connected_indexes.size(), std::plus<size_t>(), p); boost::mpi::gather(this->communicator(), connected_indexes, p); } this->communicator().barrier(); } this->communicator().barrier(); } #endif }
main(int argc, char **argv) { int rank, nprocs; int g_A, dims[D]={5,10}, local_A[N], local_G[N], **sub_array=NULL, **s_array=NULL; int i, j, value=5; MPI_Init(&argc, &argv); GA_Initialize(); MA_init(C_INT, 1000, 1000); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); s_array=(int**)malloc(N*sizeof(int*)); for(i=0; i<N; i++) { s_array[i]=(int*)malloc(D*sizeof(int)); for(j=0; j<D; j++) s_array[i][j]=rand()%5; } sub_array=(int**)malloc(N*sizeof(int*)); for(i=0; i<N; i++) { sub_array[i]=(int*)malloc(D*sizeof(int)); for(j=0; j<D; j++) sub_array[i][j]=rand()%5; } for(i=0; i<N; i++) //local_A=(int*)malloc(N*sizeof(int)); /* * depends on the value of array ..we can generate the location values in randon * we can also use the if-condition */ // PRINTing all the genrated array for reference for(i=0; i<N; i++) { for(j=0; j<D; j++)printf("%d ",s_array[i][j]); printf("\n"); } printf("\n"); for(i=0; i<N; i++) { for(j=0; j<D; j++)printf("%d ",sub_array[i][j]); printf("\n"); } printf("\n"); for(i=0; i<N; i++)printf("%d \n",local_A[i]=rand()%5+1); // PRINT done - now creating array g_A=NGA_Create(C_INT, D, dims, "array_A", NULL); GA_Fill(g_A, &value); GA_Sync(); NGA_Scatter(g_A, local_A, s_array, N); NGA_Gather(g_A, local_G, s_array, N); GA_Sync(); GA_Print(g_A); for(i=0; i<N; i++)printf("%d \n",local_G[i]); printf("\n"); if(rank==0) { for(i=0; i<N; i++) if(local_G[i]!=local_A[i]) printf("GA Error: \n"); } GA_Sync(); if(rank==0) GA_PRINT_MSG(); GA_Terminate(); MPI_Finalize(); return 0; }
int main( int argc, char **argv ) { int g_a, g_b, i, j, size, size_me; int icnt, idx, jdx, ld; int n=N, type=MT_C_INT, one; int *values, *ptr; int **indices; int dims[2]={N,N}; int lo[2], hi[2]; int heap=3000000, stack=2000000; int me, nproc; int datatype, elements; double *prealloc_mem; MP_INIT(argc,argv); #if 1 GA_INIT(argc,argv); /* initialize GA */ me=GA_Nodeid(); nproc=GA_Nnodes(); if(me==0) { if(GA_Uses_fapi())GA_Error("Program runs with C array API only",1); printf("\nUsing %ld processes\n",(long)nproc); fflush(stdout); } heap /= nproc; stack /= nproc; if(! MA_init(MT_F_DBL, stack, heap)) GA_Error("MA_init failed",stack+heap); /* initialize memory allocator*/ /* Create a regular matrix. */ if(me==0)printf("\nCreating matrix A of size %d x %d\n",N,N); g_a = NGA_Create(type, 2, dims, "A", NULL); if(!g_a) GA_Error("create failed: A",n); /* Fill matrix using scatter routines */ size = N*N; if (size%nproc == 0) { size_me = size/nproc; } else { i = size - size%nproc; size_me = i/nproc; if (me < size%nproc) size_me++; } /* Check that sizes are all okay */ i = size_me; GA_Igop(&i,1,"+"); if (i != size) { GA_Error("Sizes don't add up correctly: ",i); } else if (me==0) { printf("\nSizes add up correctly\n"); } /* Allocate index and value arrays */ indices = (int**)malloc(size_me*sizeof(int*)); values = (int*)malloc(size_me*sizeof(int)); icnt = me; for (i=0; i<size_me; i++) { values[i] = icnt; idx = icnt%N; jdx = (icnt-idx)/N; if (idx >= N || idx < 0) { printf("p[%d] Bogus index i: %d\n",me,idx); } if (jdx >= N || jdx < 0) { printf("p[%d] Bogus index j: %d\n",me,jdx); } indices[i] = (int*)malloc(2*sizeof(int)); (indices[i])[0] = idx; (indices[i])[1] = jdx; icnt += nproc; } /* Scatter values into g_a */ NGA_Scatter(g_a, values, indices, size_me); GA_Sync(); /* Check to see if contents of g_a are correct */ NGA_Distribution( g_a, me, lo, hi ); NGA_Access(g_a, lo, hi, &ptr, &ld); for (i=lo[0]; i<hi[0]; i++) { idx = i-lo[0]; for (j=lo[1]; j<hi[1]; j++) { jdx = j-lo[1]; if (ptr[idx*ld+jdx] != j*N+i) { printf("p[%d] (Scatter) expected: %d actual: %d\n",me,j*N+i,ptr[idx*ld+jdx]); } } } if (me==0) printf("\nCompleted test of NGA_Scatter\n"); for (i=0; i<size_me; i++) { values[i] = 0; } GA_Sync(); NGA_Gather(g_a, values, indices, size_me); icnt = me; for (i=0; i<size_me; i++) { if (icnt != values[i]) { printf("p[%d] (Gather) expected: %d actual: %d\n",me,icnt,values[i]); } icnt += nproc; } if (me==0) printf("\nCompleted test of NGA_Gather\n"); GA_Sync(); /* Scatter-accumulate values back into GA*/ one = 1; NGA_Scatter_acc(g_a, values, indices, size_me, &one); GA_Sync(); /* Check to see if contents of g_a are correct */ for (i=lo[0]; i<hi[0]; i++) { idx = i-lo[0]; for (j=lo[1]; j<hi[1]; j++) { jdx = j-lo[1]; if (ptr[idx*ld+jdx] != 2*(j*N+i)) { printf("p[%d] (Scatter_acc) expected: %d actual: %d\n",me,2*(j*N+i),ptr[idx*ld+jdx]); } } } if (me==0) printf("\nCompleted test of NGA_Scatter_acc\n"); NGA_Release(g_a, lo, hi); /* Test fixed buffer size */ NGA_Alloc_gatscat_buf(size_me); /* Scatter-accumulate values back into GA*/ GA_Sync(); NGA_Scatter_acc(g_a, values, indices, size_me, &one); GA_Sync(); /* Check to see if contents of g_a are correct */ for (i=lo[0]; i<hi[0]; i++) { idx = i-lo[0]; for (j=lo[1]; j<hi[1]; j++) { jdx = j-lo[1]; if (ptr[idx*ld+jdx] != 3*(j*N+i)) { printf("p[%d] (Scatter_acc) expected: %d actual: %d\n",me,3*(j*N+i),ptr[idx*ld+jdx]); } } } if (me==0) printf("\nCompleted test of NGA_Scatter_acc using fixed buffers\n"); NGA_Release(g_a, lo, hi); NGA_Free_gatscat_buf(); GA_Destroy(g_a); if(me==0)printf("\nSuccess\n"); GA_Terminate(); #endif MP_FINALIZE(); return 0; }
/** * Evaluate offsets for each network component */ void setOffsets(void) { // Interleave contributions from buses and branches to match matrices int i,j,jdx,jdx1,jdx2; int *i_bus_offsets = new int[p_nBuses]; int *i_branch_offsets = new int[p_nBranches]; for (i=0; i<p_nBuses; i++) { i_bus_offsets[i] = 0; } for (i=0; i<p_nBranches; i++) { i_branch_offsets[i] = 0; } int icnt = 0; int nsize; // Evaluate offsets for individual network components for (i=0; i<p_nBuses; i++) { if (p_network->getActiveBus(i)) { i_bus_offsets[i] = icnt; icnt += p_network->getBus(i)->vectorNumElements(); std::vector<int> nghbrs = p_network->getConnectedBranches(i); nsize = nghbrs.size(); for (j=0; j<nsize; j++) { // Need to avoid double counting of branches when evaluating offsets. // If branch is non-local and it is active, then include it in offsets. // Otherwise, if branch is local and bus i is equal to the "from" bus, // then include it in the offsets. jdx = nghbrs[j]; if (isLocalBranch(jdx)) { p_network->getBranchEndpoints(jdx,&jdx1,&jdx2); if (jdx1 == i) { i_branch_offsets[jdx] = icnt; icnt += p_network->getBranch(jdx)->vectorNumElements(); } } else { if (p_network->getActiveBranch(jdx)) { i_branch_offsets[jdx] = icnt; icnt += p_network->getBranch(jdx)->vectorNumElements(); } } } } } // Total number of rows and columns from this processor have been evaluated, // now create buffers that can scatter individual offsets to global arrays int **i_bus_index = new int*[p_nBuses]; int **i_branch_index = new int*[p_nBranches]; int *i_bus_index_buf = new int[p_nBuses]; int *i_branch_index_buf = new int[p_nBranches]; int *i_bus_value_buf = new int[p_nBuses]; int *i_branch_value_buf = new int[p_nBranches]; int i_bus_cnt = 0; int i_branch_cnt = 0; int row_offset = p_Offsets[p_me]; int nbus = 0; int nbranch = 0; for (i=0; i<p_nBuses; i++) { if (p_network->getActiveBus(i)) { nbus++; i_bus_value_buf[i_bus_cnt] = i_bus_offsets[i]+row_offset; i_bus_index_buf[i_bus_cnt] = p_network->getGlobalBusIndex(i); i_bus_index[i_bus_cnt] = &i_bus_index_buf[i_bus_cnt]; i_bus_cnt++; } } for (i=0; i<p_nBranches; i++) { if (p_network->getActiveBranch(i)) { nbranch++; i_branch_value_buf[i_branch_cnt] = i_branch_offsets[i]+row_offset; i_branch_index_buf[i_branch_cnt] = p_network->getGlobalBranchIndex(i); i_branch_index[i_branch_cnt] = &i_branch_index_buf[i_branch_cnt]; i_branch_cnt++; } } delete [] i_bus_offsets; delete [] i_branch_offsets; // Create global arrays that hold column and row offsets for all buses and // branches in the network. First create map array for global arrays int *t_busMap = new int[p_nNodes]; int *t_branchMap = new int[p_nNodes]; for (i=0; i<p_nNodes; i++) { t_busMap[i] = 0; t_branchMap[i] = 0; } t_busMap[p_me] = nbus; t_branchMap[p_me] = nbranch; char plus[2]; strcpy(plus,"+"); GA_Pgroup_igop(p_GAgrp, t_busMap, p_nNodes, plus); GA_Pgroup_igop(p_GAgrp, t_branchMap, p_nNodes, plus); int *busMap = new int[p_nNodes]; int *branchMap = new int[p_nNodes]; busMap[0] = 0; branchMap[0] = 0; int total_buses = t_busMap[0]; int total_branches = t_branchMap[0]; for (i=1; i<p_nNodes; i++) { busMap[i] = busMap[i-1] + t_busMap[i-1]; total_buses += t_busMap[i]; branchMap[i] = branchMap[i-1] + t_branchMap[i-1]; total_branches += t_branchMap[i]; } delete [] t_busMap; delete [] t_branchMap; int one = 1; g_bus_offsets = GA_Create_handle(); GA_Set_data(g_bus_offsets, one, &total_buses, C_INT); GA_Set_irreg_distr(g_bus_offsets, busMap, &p_nNodes); GA_Set_pgroup(g_bus_offsets, p_GAgrp); if (!GA_Allocate(g_bus_offsets)) { char buf[256]; sprintf(buf,"GenVectorMap::setOffsets: Unable to allocate distributed array for bus offsets\n"); printf("%s",buf); throw gridpack::Exception(buf); } GA_Zero(g_bus_offsets); g_branch_offsets = GA_Create_handle(); GA_Set_data(g_branch_offsets, one, &total_branches, C_INT); GA_Set_irreg_distr(g_branch_offsets, branchMap, &p_nNodes); GA_Set_pgroup(g_branch_offsets, p_GAgrp); if (!GA_Allocate(g_branch_offsets)) { char buf[256]; sprintf(buf,"GenVectorMap::setOffsets: Unable to allocate distributed array for branch offsets\n"); printf("%s",buf); throw gridpack::Exception(buf); } GA_Zero(g_branch_offsets); delete [] busMap; delete [] branchMap; // Scatter offsets to global arrays NGA_Scatter(g_bus_offsets, i_bus_value_buf, i_bus_index, i_bus_cnt); NGA_Scatter(g_branch_offsets, i_branch_value_buf, i_branch_index, i_branch_cnt); NGA_Pgroup_sync(p_GAgrp); delete [] i_bus_index; delete [] i_branch_index; delete [] i_bus_index_buf; delete [] i_branch_index_buf; delete [] i_bus_value_buf; delete [] i_branch_value_buf; }