/* * Add a new element, whose position is i, to the list */ void inslist(List *l, int i, ...) { Posn *pp; void **vpp; va_list list; va_start(list, i); switch(l->type){ case 'P': growlist(l, sizeof(*pp)); pp = l->posnptr+i; memmove(pp+1, pp, (l->nused-i)*sizeof(*pp)); *pp = va_arg(list, Posn); break; case 'p': growlist(l, sizeof(*vpp)); vpp = l->voidpptr+i; memmove(vpp+1, vpp, (l->nused-i)*sizeof(*vpp)); *vpp = va_arg(list, void*); break; } va_end(list); l->nused++; }
int reordernodes() /* **-------------------------------------------------------------- ** Input: none ** Output: returns 1 if successful, 0 if not ** Purpose: re-orders nodes to minimize # of non-zeros that ** will appear in factorized solution matrix **-------------------------------------------------------------- */ { int k, knode, m, n; for (k=1; k<=Nnodes; k++) { Row[k] = k; Order[k] = k; } n = Njuncs; for (k=1; k<=n; k++) /* Examine each junction */ { m = mindegree(k,n); /* Node with lowest degree */ knode = Order[m]; /* Node's index */ if (!growlist(knode)) return(101); /* Augment adjacency list */ Order[m] = Order[k]; /* Switch order of nodes */ Order[k] = knode; Degree[knode] = 0; /* In-activate node */ } for (k=1; k<=n; k++) /* Assign nodes to rows of */ Row[Order[k]] = k; /* coeff. matrix */ return(0); } /* End of reordernodes */
/* * Add a new element, whose position is i, to the list */ void inslist(List *l, int i, void *v) { growlist(l); memmove(&l->ptr[i+1], &l->ptr[i], (l->nused-i)*sizeof(void*)); l->ptr[i] = v; l->nused++; }
/* * Add a new element, whose position is i, to the list */ void inslist(List *l, int i, int64_t val) { growlist(l); memmove(&l->longptr[i+1], &l->longptr[i], (l->nused-i)*sizeof(l->g)); l->longptr[i] = val; l->nused++; }
static inline void markcall (i_puint32 cp) { growlist(i_puint32, i_calls, i_calls_cur, i_calls_lim, dn_calls); i_calls[i_calls_cur++] = cp; }
void Comm::borders(Atom &atom) { int i, m, n, iswap, idim, ineed, nsend, nrecv, nall, nfirst, nlast; MMD_float lo, hi; int pbc_flags[4]; MMD_float** x; MPI_Request request; MPI_Status status; /* erase all ghost atoms */ atom.nghost = 0; /* do swaps over all 3 dimensions */ iswap = 0; int tid = omp_get_thread_num(); #pragma omp master { if(atom.nlocal > maxnlocal) { send_flag = new int[atom.nlocal]; maxnlocal = atom.nlocal; } if(maxthreads < threads->omp_num_threads) { maxthreads = threads->omp_num_threads; nsend_thread = new int [maxthreads]; nrecv_thread = new int [maxthreads]; nholes_thread = new int [maxthreads]; maxsend_thread = new int [maxthreads]; exc_sendlist_thread = new int*[maxthreads]; for(int i = 0; i < maxthreads; i++) { maxsend_thread[i] = maxsend; exc_sendlist_thread[i] = (int*) malloc(maxsend * sizeof(int)); } } } for(idim = 0; idim < 3; idim++) { nlast = 0; for(ineed = 0; ineed < 2 * need[idim]; ineed++) { // find atoms within slab boundaries lo/hi using <= and >= // check atoms between nfirst and nlast // for first swaps in a dim, check owned and ghost // for later swaps in a dim, only check newly arrived ghosts // store sent atom indices in list for use in future timesteps lo = slablo[iswap]; hi = slabhi[iswap]; pbc_flags[0] = pbc_any[iswap]; pbc_flags[1] = pbc_flagx[iswap]; pbc_flags[2] = pbc_flagy[iswap]; pbc_flags[3] = pbc_flagz[iswap]; x = atom.x; if(ineed % 2 == 0) { nfirst = nlast; nlast = atom.nlocal + atom.nghost; } #pragma omp for for(int i = 0; i < threads->omp_num_threads; i++) { nsend_thread[i] = 0; } #pragma omp barrier nsend = 0; m = 0; #pragma omp for for(int i = nfirst; i < nlast; i++) { if(x[i][idim] >= lo && x[i][idim] <= hi) { if(nsend >= maxsend_thread[tid]) { maxsend_thread[tid] = nsend + 100; exc_sendlist_thread[tid] = (int*) realloc(exc_sendlist_thread[tid], (nsend + 100) * sizeof(int)); } exc_sendlist_thread[tid][nsend++] = i; } } nsend_thread[tid] = nsend; #pragma omp barrier #pragma omp master { int total_nsend = 0; for(int i = 0; i < threads->omp_num_threads; i++) { total_nsend += nsend_thread[i]; nsend_thread[i] = total_nsend; } if(total_nsend > maxsendlist[iswap]) growlist(iswap, total_nsend); if(total_nsend * 3 > maxsend) growsend(total_nsend * 3); } #pragma omp barrier for(int k = 0; k < nsend; k++) { atom.pack_border(exc_sendlist_thread[tid][k], &buf_send[(k + nsend_thread[tid] - nsend) * 3], pbc_flags); sendlist[iswap][k + nsend_thread[tid] - nsend] = exc_sendlist_thread[tid][k]; } #pragma omp barrier /* swap atoms with other proc put incoming ghosts at end of my atom arrays if swapping with self, simply copy, no messages */ #pragma omp master { nsend = nsend_thread[threads->omp_num_threads - 1]; if(sendproc[iswap] != me) { MPI_Send(&nsend, 1, MPI_INT, sendproc[iswap], 0, MPI_COMM_WORLD); MPI_Recv(&nrecv, 1, MPI_INT, recvproc[iswap], 0, MPI_COMM_WORLD, &status); if(nrecv * atom.border_size > maxrecv) growrecv(nrecv * atom.border_size); if(sizeof(MMD_float) == 4) { MPI_Irecv(buf_recv, nrecv * atom.border_size, MPI_FLOAT, recvproc[iswap], 0, MPI_COMM_WORLD, &request); MPI_Send(buf_send, nsend * atom.border_size, MPI_FLOAT, sendproc[iswap], 0, MPI_COMM_WORLD); } else { MPI_Irecv(buf_recv, nrecv * atom.border_size, MPI_DOUBLE, recvproc[iswap], 0, MPI_COMM_WORLD, &request); MPI_Send(buf_send, nsend * atom.border_size, MPI_DOUBLE, sendproc[iswap], 0, MPI_COMM_WORLD); } MPI_Wait(&request, &status); buf = buf_recv; } else { nrecv = nsend; buf = buf_send; } nrecv_atoms = nrecv; } /* unpack buffer */ #pragma omp barrier n = atom.nlocal + atom.nghost; nrecv = nrecv_atoms; #pragma omp for for(int i = 0; i < nrecv; i++) atom.unpack_border(n + i, &buf[i * 3]); #pragma omp barrier /* set all pointers & counters */ #pragma omp master { sendnum[iswap] = nsend; recvnum[iswap] = nrecv; comm_send_size[iswap] = nsend * atom.comm_size; comm_recv_size[iswap] = nrecv * atom.comm_size; reverse_send_size[iswap] = nrecv * atom.reverse_size; reverse_recv_size[iswap] = nsend * atom.reverse_size; firstrecv[iswap] = atom.nlocal + atom.nghost; atom.nghost += nrecv; } #pragma omp barrier iswap++; } } /* insure buffers are large enough for reverse comm */ int max1, max2; max1 = max2 = 0; for(iswap = 0; iswap < nswap; iswap++) { max1 = MAX(max1, reverse_send_size[iswap]); max2 = MAX(max2, reverse_recv_size[iswap]); } if(max1 > maxsend) growsend(max1); if(max2 > maxrecv) growrecv(max2); }