void FaceBuffer::exchangeCpuSpinor(cpuColorSpinorField &spinor, int oddBit, int dagger) { //for all dimensions int len[4] = { nFace*faceVolumeCB[0]*Ninternal*precision, nFace*faceVolumeCB[1]*Ninternal*precision, nFace*faceVolumeCB[2]*Ninternal*precision, nFace*faceVolumeCB[3]*Ninternal*precision }; // allocate the ghost buffer if not yet allocated spinor.allocateGhostBuffer(); for(int i=0;i < 4; i++){ spinor.packGhost(spinor.backGhostFaceSendBuffer[i], i, QUDA_BACKWARDS, (QudaParity)oddBit, dagger); spinor.packGhost(spinor.fwdGhostFaceSendBuffer[i], i, QUDA_FORWARDS, (QudaParity)oddBit, dagger); } unsigned long recv_request1[4], recv_request2[4]; unsigned long send_request1[4], send_request2[4]; int back_nbr[4] = {X_BACK_NBR, Y_BACK_NBR, Z_BACK_NBR,T_BACK_NBR}; int fwd_nbr[4] = {X_FWD_NBR, Y_FWD_NBR, Z_FWD_NBR,T_FWD_NBR}; int uptags[4] = {XUP, YUP, ZUP, TUP}; int downtags[4] = {XDOWN, YDOWN, ZDOWN, TDOWN}; for(int i= 0;i < 4; i++){ recv_request1[i] = comm_recv_with_tag(spinor.backGhostFaceBuffer[i], len[i], back_nbr[i], uptags[i]); recv_request2[i] = comm_recv_with_tag(spinor.fwdGhostFaceBuffer[i], len[i], fwd_nbr[i], downtags[i]); send_request1[i]= comm_send_with_tag(spinor.fwdGhostFaceSendBuffer[i], len[i], fwd_nbr[i], uptags[i]); send_request2[i] = comm_send_with_tag(spinor.backGhostFaceSendBuffer[i], len[i], back_nbr[i], downtags[i]); } for(int i=0;i < 4;i++){ comm_wait(recv_request1[i]); comm_wait(recv_request2[i]); comm_wait(send_request1[i]); comm_wait(send_request2[i]); } }
// This is just an initial hack for CPU comms - should be creating the message handlers at instantiation void FaceBuffer::exchangeCpuSpinor(cpuColorSpinorField &spinor, int oddBit, int dagger) { // allocate the ghost buffer if not yet allocated spinor.allocateGhostBuffer(); for(int i=0;i < 4; i++){ spinor.packGhost(spinor.backGhostFaceSendBuffer[i], i, QUDA_BACKWARDS, (QudaParity)oddBit, dagger); spinor.packGhost(spinor.fwdGhostFaceSendBuffer[i], i, QUDA_FORWARDS, (QudaParity)oddBit, dagger); } #ifdef QMP_COMMS QMP_msgmem_t mm_send_fwd[4]; QMP_msgmem_t mm_from_back[4]; QMP_msgmem_t mm_from_fwd[4]; QMP_msgmem_t mm_send_back[4]; QMP_msghandle_t mh_send_fwd[4]; QMP_msghandle_t mh_from_back[4]; QMP_msghandle_t mh_from_fwd[4]; QMP_msghandle_t mh_send_back[4]; for (int i=0; i<4; i++) { mm_send_fwd[i] = QMP_declare_msgmem(spinor.fwdGhostFaceSendBuffer[i], nbytes[i]); if( mm_send_fwd[i] == NULL ) errorQuda("Unable to allocate send fwd message mem"); mm_send_back[i] = QMP_declare_msgmem(spinor.backGhostFaceSendBuffer[i], nbytes[i]); if( mm_send_back == NULL ) errorQuda("Unable to allocate send back message mem"); mm_from_fwd[i] = QMP_declare_msgmem(spinor.fwdGhostFaceBuffer[i], nbytes[i]); if( mm_from_fwd[i] == NULL ) errorQuda("Unable to allocate recv from fwd message mem"); mm_from_back[i] = QMP_declare_msgmem(spinor.backGhostFaceBuffer[i], nbytes[i]); if( mm_from_back[i] == NULL ) errorQuda("Unable to allocate recv from back message mem"); mh_send_fwd[i] = QMP_declare_send_relative(mm_send_fwd[i], i, +1, 0); if( mh_send_fwd[i] == NULL ) errorQuda("Unable to allocate forward send"); mh_send_back[i] = QMP_declare_send_relative(mm_send_back[i], i, -1, 0); if( mh_send_back[i] == NULL ) errorQuda("Unable to allocate backward send"); mh_from_fwd[i] = QMP_declare_receive_relative(mm_from_fwd[i], i, +1, 0); if( mh_from_fwd[i] == NULL ) errorQuda("Unable to allocate forward recv"); mh_from_back[i] = QMP_declare_receive_relative(mm_from_back[i], i, -1, 0); if( mh_from_back[i] == NULL ) errorQuda("Unable to allocate backward recv"); } for (int i=0; i<4; i++) { QMP_start(mh_from_back[i]); QMP_start(mh_from_fwd[i]); QMP_start(mh_send_fwd[i]); QMP_start(mh_send_back[i]); } for (int i=0; i<4; i++) { QMP_wait(mh_send_fwd[i]); QMP_wait(mh_send_back[i]); QMP_wait(mh_from_back[i]); QMP_wait(mh_from_fwd[i]); } for (int i=0; i<4; i++) { QMP_free_msghandle(mh_send_fwd[i]); QMP_free_msghandle(mh_send_back[i]); QMP_free_msghandle(mh_from_fwd[i]); QMP_free_msghandle(mh_from_back[i]); QMP_free_msgmem(mm_send_fwd[i]); QMP_free_msgmem(mm_send_back[i]); QMP_free_msgmem(mm_from_back[i]); QMP_free_msgmem(mm_from_fwd[i]); } #else for (int i=0; i<4; i++) { //printf("%d COPY length = %d\n", i, nbytes[i]/precision); memcpy(spinor.fwdGhostFaceBuffer[i], spinor.backGhostFaceSendBuffer[i], nbytes[i]); memcpy(spinor.backGhostFaceBuffer[i], spinor.fwdGhostFaceSendBuffer[i], nbytes[i]); } #endif }