static inline int NBC_Start_round(NBC_Handle *handle) { int num; /* number of operations */ int res; char* ptr; MPI_Request *tmp; NBC_Fn_type type; NBC_Args_send sendargs; NBC_Args_recv recvargs; NBC_Args_op opargs; NBC_Args_copy copyargs; NBC_Args_unpack unpackargs; void *buf1, *buf2; /* get round-schedule address */ ptr = handle->schedule->data + handle->row_offset; NBC_GET_BYTES(ptr,num); NBC_DEBUG(10, "start_round round at offset %d : posting %i operations\n", handle->row_offset, num); for (int i = 0 ; i < num ; ++i) { int offset = (intptr_t)(ptr - handle->schedule->data); memcpy (&type, ptr, sizeof (type)); switch(type) { case SEND: NBC_DEBUG(5," SEND (offset %li) ", offset); NBC_GET_BYTES(ptr,sendargs); NBC_DEBUG(5,"*buf: %p, count: %i, type: %p, dest: %i, tag: %i)\n", sendargs.buf, sendargs.count, sendargs.datatype, sendargs.dest, handle->tag); /* get an additional request */ handle->req_count++; /* get buffer */ if(sendargs.tmpbuf) { buf1=(char*)handle->tmpbuf+(long)sendargs.buf; } else { buf1=(void *)sendargs.buf; } #ifdef NBC_TIMING Isend_time -= MPI_Wtime(); #endif tmp = (MPI_Request *) realloc ((void *) handle->req_array, handle->req_count * sizeof (MPI_Request)); if (NULL == tmp) { return OMPI_ERR_OUT_OF_RESOURCE; } handle->req_array = tmp; res = MCA_PML_CALL(isend(buf1, sendargs.count, sendargs.datatype, sendargs.dest, handle->tag, MCA_PML_BASE_SEND_STANDARD, sendargs.local?handle->comm->c_local_comm:handle->comm, handle->req_array+handle->req_count - 1)); if (OMPI_SUCCESS != res) { NBC_Error ("Error in MPI_Isend(%lu, %i, %p, %i, %i, %lu) (%i)", (unsigned long)buf1, sendargs.count, sendargs.datatype, sendargs.dest, handle->tag, (unsigned long)handle->comm, res); return res; } #ifdef NBC_TIMING Isend_time += MPI_Wtime(); #endif break; case RECV: NBC_DEBUG(5, " RECV (offset %li) ", offset); NBC_GET_BYTES(ptr,recvargs); NBC_DEBUG(5, "*buf: %p, count: %i, type: %p, source: %i, tag: %i)\n", recvargs.buf, recvargs.count, recvargs.datatype, recvargs.source, handle->tag); /* get an additional request - TODO: req_count NOT thread safe */ handle->req_count++; /* get buffer */ if(recvargs.tmpbuf) { buf1=(char*)handle->tmpbuf+(long)recvargs.buf; } else { buf1=recvargs.buf; } #ifdef NBC_TIMING Irecv_time -= MPI_Wtime(); #endif tmp = (MPI_Request *) realloc ((void *) handle->req_array, handle->req_count * sizeof (MPI_Request)); if (NULL == tmp) { return OMPI_ERR_OUT_OF_RESOURCE; } handle->req_array = tmp; res = MCA_PML_CALL(irecv(buf1, recvargs.count, recvargs.datatype, recvargs.source, handle->tag, recvargs.local?handle->comm->c_local_comm:handle->comm, handle->req_array+handle->req_count-1)); if (OMPI_SUCCESS != res) { NBC_Error("Error in MPI_Irecv(%lu, %i, %p, %i, %i, %lu) (%i)", (unsigned long)buf1, recvargs.count, recvargs.datatype, recvargs.source, handle->tag, (unsigned long)handle->comm, res); return res; } #ifdef NBC_TIMING Irecv_time += MPI_Wtime(); #endif break; case OP: NBC_DEBUG(5, " OP2 (offset %li) ", offset); NBC_GET_BYTES(ptr,opargs); NBC_DEBUG(5, "*buf1: %p, buf2: %p, count: %i, type: %p)\n", opargs.buf1, opargs.buf2, opargs.count, opargs.datatype); /* get buffers */ if(opargs.tmpbuf1) { buf1=(char*)handle->tmpbuf+(long)opargs.buf1; } else { buf1=(void *)opargs.buf1; } if(opargs.tmpbuf2) { buf2=(char*)handle->tmpbuf+(long)opargs.buf2; } else { buf2=opargs.buf2; } ompi_op_reduce(opargs.op, buf1, buf2, opargs.count, opargs.datatype); break; case COPY: NBC_DEBUG(5, " COPY (offset %li) ", offset); NBC_GET_BYTES(ptr,copyargs); NBC_DEBUG(5, "*src: %lu, srccount: %i, srctype: %p, *tgt: %lu, tgtcount: %i, tgttype: %p)\n", (unsigned long) copyargs.src, copyargs.srccount, copyargs.srctype, (unsigned long) copyargs.tgt, copyargs.tgtcount, copyargs.tgttype); /* get buffers */ if(copyargs.tmpsrc) { buf1=(char*)handle->tmpbuf+(long)copyargs.src; } else { buf1=copyargs.src; } if(copyargs.tmptgt) { buf2=(char*)handle->tmpbuf+(long)copyargs.tgt; } else { buf2=copyargs.tgt; } res = NBC_Copy (buf1, copyargs.srccount, copyargs.srctype, buf2, copyargs.tgtcount, copyargs.tgttype, handle->comm); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { return res; } break; case UNPACK: NBC_DEBUG(5, " UNPACK (offset %li) ", offset); NBC_GET_BYTES(ptr,unpackargs); NBC_DEBUG(5, "*src: %lu, srccount: %i, srctype: %p, *tgt: %lu\n", (unsigned long) unpackargs.inbuf, unpackargs.count, unpackargs.datatype, (unsigned long) unpackargs.outbuf); /* get buffers */ if(unpackargs.tmpinbuf) { buf1=(char*)handle->tmpbuf+(long)unpackargs.inbuf; } else { buf1=unpackargs.inbuf; } if(unpackargs.tmpoutbuf) { buf2=(char*)handle->tmpbuf+(long)unpackargs.outbuf; } else { buf2=unpackargs.outbuf; } res = NBC_Unpack (buf1, unpackargs.count, unpackargs.datatype, buf2, handle->comm); if (OMPI_SUCCESS != res) { NBC_Error ("NBC_Unpack() failed (code: %i)", res); return res; } break; default: NBC_Error ("NBC_Start_round: bad type %li at offset %li", (long)type, offset); return OMPI_ERROR; } } /* check if we can make progress - not in the first round, this allows us to leave the * initialization faster and to reach more overlap * * threaded case: calling progress in the first round can lead to a * deadlock if NBC_Free is called in this round :-( */ if (handle->row_offset) { res = NBC_Progress(handle); if ((NBC_OK != res) && (NBC_CONTINUE != res)) { return OMPI_ERROR; } } return OMPI_SUCCESS; }
static inline int NBC_Start_round(NBC_Handle *handle) { int num; /* number of operations */ int i, res, ret=NBC_OK; char* ptr; NBC_Fn_type type; NBC_Args_send sendargs; NBC_Args_recv recvargs; NBC_Args_op opargs; NBC_Args_copy copyargs; NBC_Args_unpack unpackargs; NBC_Schedule myschedule; void *buf1, *buf2, *buf3; /* get round-schedule address */ myschedule = (NBC_Schedule*)((char*)*handle->schedule + handle->row_offset); ptr = (char*) myschedule; NBC_GET_BYTES(ptr,num); NBC_DEBUG(10, "start_round round at address %p : posting %i operations\n", myschedule, num); for (i=0; i<num; i++) { NBC_GET_BYTES(ptr,type); switch(type) { case SEND: NBC_DEBUG(5," SEND (offset %li) ", (long)ptr-(long)myschedule); NBC_GET_BYTES(ptr,sendargs); NBC_DEBUG(5,"*buf: %p, count: %i, type: %lu, dest: %i, tag: %i)\n", sendargs.buf, sendargs.count, (unsigned long)sendargs.datatype, sendargs.dest, handle->tag); /* get an additional request */ handle->req_count++; /* get buffer */ if(sendargs.tmpbuf) { buf1=(char*)handle->tmpbuf+(long)sendargs.buf; } else { buf1=sendargs.buf; } #ifdef NBC_TIMING Isend_time -= MPI_Wtime(); #endif handle->req_array = (MPI_Request*)realloc((void*)handle->req_array, (handle->req_count)*sizeof(MPI_Request)); NBC_CHECK_NULL(handle->req_array); res = MCA_PML_CALL(isend(buf1, sendargs.count, sendargs.datatype, sendargs.dest, handle->tag, MCA_PML_BASE_SEND_STANDARD, handle->comm, handle->req_array+handle->req_count-1)); if(OMPI_SUCCESS != res) { printf("Error in MPI_Isend(%lu, %i, %lu, %i, %i, %lu) (%i)\n", (unsigned long)buf1, sendargs.count, (unsigned long)sendargs.datatype, sendargs.dest, handle->tag, (unsigned long)handle->comm, res); ret=res; goto error; } #ifdef NBC_TIMING Isend_time += MPI_Wtime(); #endif break; case RECV: NBC_DEBUG(5, " RECV (offset %li) ", (long)ptr-(long)myschedule); NBC_GET_BYTES(ptr,recvargs); NBC_DEBUG(5, "*buf: %p, count: %i, type: %lu, source: %i, tag: %i)\n", recvargs.buf, recvargs.count, (unsigned long)recvargs.datatype, recvargs.source, handle->tag); /* get an additional request - TODO: req_count NOT thread safe */ handle->req_count++; /* get buffer */ if(recvargs.tmpbuf) { buf1=(char*)handle->tmpbuf+(long)recvargs.buf; } else { buf1=recvargs.buf; } #ifdef NBC_TIMING Irecv_time -= MPI_Wtime(); #endif handle->req_array = (MPI_Request*)realloc((void*)handle->req_array, (handle->req_count)*sizeof(MPI_Request)); NBC_CHECK_NULL(handle->req_array); res = MCA_PML_CALL(irecv(buf1, recvargs.count, recvargs.datatype, recvargs.source, handle->tag, handle->comm, handle->req_array+handle->req_count-1)); if(OMPI_SUCCESS != res) { printf("Error in MPI_Irecv(%lu, %i, %lu, %i, %i, %lu) (%i)\n", (unsigned long)buf1, recvargs.count, (unsigned long)recvargs.datatype, recvargs.source, handle->tag, (unsigned long)handle->comm, res); ret=res; goto error; } #ifdef NBC_TIMING Irecv_time += MPI_Wtime(); #endif break; case OP: NBC_DEBUG(5, " OP (offset %li) ", (long)ptr-(long)myschedule); NBC_GET_BYTES(ptr,opargs); NBC_DEBUG(5, "*buf1: %p, buf2: %p, buf3: %p, count: %i, type: %lu)\n", opargs.buf1, opargs.buf2, opargs.buf3, opargs.count, (unsigned long)opargs.datatype); /* get buffers */ if(opargs.tmpbuf1) { buf1=(char*)handle->tmpbuf+(long)opargs.buf1; } else { buf1=opargs.buf1; } if(opargs.tmpbuf2) { buf2=(char*)handle->tmpbuf+(long)opargs.buf2; } else { buf2=opargs.buf2; } if(opargs.tmpbuf3) { buf3=(char*)handle->tmpbuf+(long)opargs.buf3; } else { buf3=opargs.buf3; } ompi_3buff_op_reduce(opargs.op, buf1, buf2, buf3, opargs.count, opargs.datatype); break; case COPY: NBC_DEBUG(5, " COPY (offset %li) ", (long)ptr-(long)myschedule); NBC_GET_BYTES(ptr,copyargs); NBC_DEBUG(5, "*src: %lu, srccount: %i, srctype: %lu, *tgt: %lu, tgtcount: %i, tgttype: %lu)\n", (unsigned long)copyargs.src, copyargs.srccount, (unsigned long)copyargs.srctype, (unsigned long)copyargs.tgt, copyargs.tgtcount, (unsigned long)copyargs.tgttype); /* get buffers */ if(copyargs.tmpsrc) { buf1=(char*)handle->tmpbuf+(long)copyargs.src; } else { buf1=copyargs.src; } if(copyargs.tmptgt) { buf2=(char*)handle->tmpbuf+(long)copyargs.tgt; } else { buf2=copyargs.tgt; } res = NBC_Copy(buf1, copyargs.srccount, copyargs.srctype, buf2, copyargs.tgtcount, copyargs.tgttype, handle->comm); if(res != NBC_OK) { printf("NBC_Copy() failed (code: %i)\n", res); ret=res; goto error; } break; case UNPACK: NBC_DEBUG(5, " UNPACK (offset %li) ", (long)ptr-(long)myschedule); NBC_GET_BYTES(ptr,unpackargs); NBC_DEBUG(5, "*src: %lu, srccount: %i, srctype: %lu, *tgt: %lu\n", (unsigned long)unpackargs.inbuf, unpackargs.count, (unsigned long)unpackargs.datatype, (unsigned long)unpackargs.outbuf); /* get buffers */ if(unpackargs.tmpinbuf) { buf1=(char*)handle->tmpbuf+(long)unpackargs.inbuf; } else { buf1=unpackargs.outbuf; } if(unpackargs.tmpoutbuf) { buf2=(char*)handle->tmpbuf+(long)unpackargs.outbuf; } else { buf2=unpackargs.outbuf; } res = NBC_Unpack(buf1, unpackargs.count, unpackargs.datatype, buf2, handle->comm); if(res != NBC_OK) { printf("NBC_Unpack() failed (code: %i)\n", res); ret=res; goto error; } break; default: printf("NBC_Start_round: bad type %li at offset %li\n", (long)type, (long)ptr-(long)myschedule); ret=NBC_BAD_SCHED; goto error; } } /* check if we can make progress - not in the first round, this allows us to leave the * initialization faster and to reach more overlap * * threaded case: calling progress in the first round can lead to a * deadlock if NBC_Free is called in this round :-( */ if(handle->row_offset != sizeof(int)) { res = NBC_Progress(handle); if((NBC_OK != res) && (NBC_CONTINUE != res)) { printf("Error in NBC_Progress() (%i)\n", res); ret=res; goto error; } } error: return ret; }