/** Copy lattice data with layout described by fieldtype. * @param r_buffer data destination * @param s_buffer data source * @param type field layout type */ MDINLINE void halo_dtcopy(void *r_buffer, void *s_buffer, int count, Fieldtype type) { int i, j; HALO_TRACE(fprintf(stderr, "%d: halo_dtcopy r_buffer=%p s_buffer=%p blocks=%d stride=%d skip=%d\n",this_node,r_buffer,s_buffer,type->vblocks,type->vstride,type->vskip)); if (type->subtype) { halo_copy_vector(r_buffer, s_buffer, count, type, type->vflag); } else { for (i=0; i<count; i++, s_buffer+=type->extent, r_buffer+=type->extent) { if (!type->count) { memcpy(r_buffer,s_buffer,type->extent); } else { for (j=0; j<type->count; j++) { memcpy(r_buffer+type->disps[j],s_buffer+type->disps[j],type->lengths[j]); } } } } }
/** Frees datastrutures associated with a halo communicator * @param hc halo communicator to be released */ void release_halo_communication(HaloCommunicator *hc) { int n; for (n=0; n<hc->num; n++) { HALO_TRACE(fprintf(stderr,"%d: freeing %p\n",this_node,&(hc->halo_info[n].datatype))); MPI_Type_free(&(hc->halo_info[n].datatype)); } free(hc->halo_info); }
MDINLINE void halo_copy_vector(void *r_buffer, void *s_buffer, int count, Fieldtype type, int vflag) { int i, j; void *dest, *src; int vblocks = type->vblocks; int vstride = type->vstride; int vskip = type->vskip; int extent = type->extent; HALO_TRACE(fprintf(stderr, "%d: halo_copy_vector %p %p vblocks=%d vstride=%d vskip=%d extent=%d subtype_extent=%d\n",this_node,r_buffer,s_buffer,vblocks,vstride,vskip,extent,type->subtype->extent)); if (vflag){ vskip *= type->subtype->extent; } for (i=0; i<count; i++, s_buffer+=extent, r_buffer+=extent) { for (j=0, dest=r_buffer, src=s_buffer; j<vblocks; j++, dest+=vskip, src+=vskip) { halo_dtcopy(dest,src,vstride,type->subtype); } } }
/** Copy lattice data with layout described by fieldtype. * @param r_buffer data destination * @param s_buffer data source * @param type field layout type */ MDINLINE void halo_dtcopy(void *r_buffer, void *s_buffer, Fieldtype type) { int i, j, k; void *dest, *src; int vblocks = type->vblocks; int vstride = type->vstride; int vskip = type->vskip; int count = type->count; int *lens = type->lengths; int *disps = type->disps; int extent = type->extent; HALO_TRACE(fprintf(stderr, "%d: halo comm local copy r_buffer=%p s_buffer=%p\n",this_node,r_buffer,s_buffer)); for (i=0; i<vblocks; i++, r_buffer+=vskip*extent, s_buffer+=vskip*extent) { for (j=0, dest=r_buffer, src=s_buffer; j<vstride; j++, dest+=extent, src+=extent) { for (k=0; k<count; k++) { memcpy(dest+disps[k],src+disps[k],lens[k]); } } } }
/** Perform communication according to the parallelization scheme * described by the halo communicator * @param hc halo communicator describing the parallelization scheme */ void halo_communication(HaloCommunicator *hc, void *base) { int n, comm_type, s_node, r_node; void *s_buffer, *r_buffer ; Fieldtype fieldtype; MPI_Datatype datatype; MPI_Request request; MPI_Status status; HALO_TRACE(fprintf(stderr, "%d: halo_comm base=%p num=%d\n", this_node, base, hc->num)) ; for (n = 0; n < hc->num; n++) { HALO_TRACE(fprintf(stderr, "%d: halo_comm round %d\n", this_node, n)) ; comm_type = hc->halo_info[n].type ; s_buffer = (char *)base + hc->halo_info[n].s_offset; r_buffer = (char *)base + hc->halo_info[n].r_offset; switch (comm_type) { case HALO_LOCL: fieldtype = hc->halo_info[n].fieldtype; halo_dtcopy(r_buffer,s_buffer,1,fieldtype); break ; case HALO_SENDRECV: datatype = hc->halo_info[n].datatype; s_node = hc->halo_info[n].source_node ; r_node = hc->halo_info[n].dest_node ; HALO_TRACE(fprintf(stderr,"%d: halo_comm sendrecv %d to %d (%d) (%p)\n",this_node,s_node,r_node,REQ_HALO_SPREAD,&datatype)); MPI_Sendrecv(s_buffer, 1, datatype, r_node, REQ_HALO_SPREAD, r_buffer, 1, datatype, s_node, REQ_HALO_SPREAD, MPI_COMM_WORLD, &status); break ; case HALO_SEND: datatype = hc->halo_info[n].datatype; fieldtype = hc->halo_info[n].fieldtype; s_node = hc->halo_info[n].source_node ; r_node = hc->halo_info[n].dest_node ; HALO_TRACE(fprintf(stderr,"%d: halo_comm send to %d.\n",this_node,r_node)); MPI_Isend(s_buffer, 1, datatype, r_node, REQ_HALO_SPREAD, MPI_COMM_WORLD, &request); halo_dtset(r_buffer,0,fieldtype); MPI_Wait(&request,&status); break; case HALO_RECV: datatype = hc->halo_info[n].datatype; s_node = hc->halo_info[n].source_node ; r_node = hc->halo_info[n].dest_node ; HALO_TRACE(fprintf(stderr,"%d: halo_comm recv from %d.\n",this_node,s_node)); MPI_Irecv(r_buffer, 1, datatype, s_node, REQ_HALO_SPREAD, MPI_COMM_WORLD, &request); MPI_Wait(&request,&status); break; case HALO_OPEN: fieldtype = hc->halo_info[n].fieldtype; HALO_TRACE(fprintf(stderr,"%d: halo_comm open boundaries\n",this_node)); /* \todo this does not work for the n_i - <n_i> */ halo_dtset(r_buffer,0,fieldtype); break; } } }
/** Preparation of the halo parallelization scheme. Sets up the * necessary datastructures for \ref halo_communication * @param hc halo communicator beeing created (Input/Output) * @param lattice lattice the communcation is created for (Input) * @param fieldtype field layout of the lattice data (Input) * @param datatype MPI datatype for the lattice data (Input) */ void prepare_halo_communication(HaloCommunicator *hc, Lattice *lattice, Fieldtype fieldtype, MPI_Datatype datatype) { int k, n, dir, lr, cnt, num = 0 ; int *grid = lattice->grid ; int *period = lattice->halo_grid ; for (n=0; n<hc->num; n++) { MPI_Type_free(&(hc->halo_info[n].datatype)); } num = 2*3; /* two communications in each space direction */ hc->num = num ; hc->halo_info = realloc(hc->halo_info,num*sizeof(HaloInfo)) ; int extent = fieldtype->extent; cnt = 0 ; for (dir=0; dir<3; dir++) { for (lr=0; lr<2; lr++) { HaloInfo *hinfo = &(hc->halo_info[cnt]) ; int nblocks = 1 ; for (k=dir+1;k<3;k++) { nblocks *= period[k] ; } int stride = 1 ; for (k=0;k<dir;k++) { stride *= period[k] ; } int skip = 1 ; for (k=0;k<dir+1 && k<2;k++) { skip *= period[k] ; } if (lr==0) { /* send to left, recv from right */ hinfo->s_offset = extent * stride * 1; hinfo->r_offset = extent * stride * (grid[dir]+1); } else { /* send to right, recv from left */ hinfo->s_offset = extent * stride * grid[dir]; hinfo->r_offset = extent * stride * 0; } hinfo->source_node = node_neighbors[2*dir+1-lr]; hinfo->dest_node = node_neighbors[2*dir+lr]; halo_create_field_vector(nblocks, stride, skip, fieldtype, &hinfo->fieldtype); MPI_Type_vector(nblocks, stride, skip, datatype, &hinfo->datatype); MPI_Type_commit(&hinfo->datatype); #ifdef PARTIAL_PERIODIC if ( !PERIODIC(dir) && (boundary[2*dir+lr] != 0 || boundary[2*dir+1-lr] != 0) ) { if (node_grid[dir] == 1) { hinfo->type = HALO_OPEN; } else if (lr == 0) { if (boundary[2*dir+lr] == 1) { hinfo->type = HALO_RECV; } else { hinfo->type = HALO_SEND; } } else { if (boundary[2*dir+lr] == -1) { hinfo->type = HALO_RECV; } else { hinfo->type = HALO_SEND; } } } else #endif { if (node_grid[dir] == 1) { hc->halo_info[cnt].type = HALO_LOCL; } else { hc->halo_info[cnt].type = HALO_SENDRECV; } } HALO_TRACE(fprintf(stderr,"%d: prepare_halo_communication dir=%d lr=%d s_offset=%ld r_offset=%ld s_node=%d d_node=%d type=%d\n",this_node,dir,lr,hinfo->s_offset,hinfo->r_offset,hinfo->source_node,hinfo->dest_node,hinfo->type)) ; cnt++; } } }
/** Perform communication according to the parallelization scheme * described by the halo communicator * @param hc halo communicator describing the parallelization scheme */ void halo_communication(HaloCommunicator *hc) { int n, comm_type, s_node, r_node; void *s_buffer, *r_buffer ; Fieldtype fieldtype; MPI_Datatype datatype; MPI_Request request; MPI_Status status; HALO_TRACE(fprintf(stderr, "%d: halo_comm %p (num=%d)\n", this_node, hc, hc->num)) ; for (n = 0; n < hc->num; n++) { HALO_TRACE(fprintf(stderr, "%d: halo_comm round %d\n", this_node, n)) ; comm_type = hc->halo_info[n].type ; s_buffer = hc->halo_info[n].send_buffer ; r_buffer = hc->halo_info[n].recv_buffer ; switch (comm_type) { case HALO_LOCL: fieldtype = hc->halo_info[n].fieldtype; halo_dtcopy(r_buffer,s_buffer,fieldtype); break ; case HALO_SENDRECV: datatype = hc->halo_info[n].datatype; s_node = hc->halo_info[n].source_node ; r_node = hc->halo_info[n].dest_node ; HALO_TRACE(fprintf(stderr,"%d: halo_comm sendrecv %d to %d (%d) (%p)\n",this_node,s_node,r_node,REQ_HALO_SPREAD,&datatype)); MPI_Sendrecv(s_buffer, 1, datatype, r_node, REQ_HALO_SPREAD, r_buffer, 1, datatype, s_node, REQ_HALO_SPREAD, MPI_COMM_WORLD, &status); break ; case HALO_SEND: datatype = hc->halo_info[n].datatype; fieldtype = hc->halo_info[n].fieldtype; s_node = hc->halo_info[n].source_node ; r_node = hc->halo_info[n].dest_node ; HALO_TRACE(fprintf(stderr,"%d: halo_comm send to %d.\n",this_node,r_node)); MPI_Isend(s_buffer, 1, datatype, r_node, REQ_HALO_SPREAD, MPI_COMM_WORLD, &request); halo_dtset(r_buffer,0,fieldtype); MPI_Wait(&request,&status); break; case HALO_RECV: datatype = hc->halo_info[n].datatype; s_node = hc->halo_info[n].source_node ; r_node = hc->halo_info[n].dest_node ; HALO_TRACE(fprintf(stderr,"%d: halo_comm recv from %d.\n",this_node,s_node)); MPI_Irecv(r_buffer, 1, datatype, s_node, REQ_HALO_SPREAD, MPI_COMM_WORLD, &request); MPI_Wait(&request,&status); break; case HALO_OPEN: fieldtype = hc->halo_info[n].fieldtype; HALO_TRACE(fprintf(stderr,"%d: halo_comm open boundaries\n",this_node)); halo_dtset(r_buffer,0,fieldtype); break; } } HALO_TRACE(fprintf(stderr, "%d: halo_comm %p finished\n", this_node, hc)); }