int MPID_nem_tcp_module_lmt_start_recv (MPIDI_VC_t *vc, MPID_Request *req) { int mpi_errno = MPI_SUCCESS; int ret; MPIDI_msg_sz_t data_sz; int dt_contig; MPI_Aint dt_true_lb; MPID_Datatype * dt_ptr; MPIDI_msg_sz_t last; int nb; int r_len; MPIDI_CH3I_VC *vc_ch = (MPIDI_CH3I_VC *)vc->channel_private; MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_START_RECV); MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_START_RECV); free_cookie (vc_ch->net.tcp.lmt_cookie); if (!vc_ch->net.tcp.lmt_connected) { int len; struct sockaddr_in saddr; int connfd; len = sizeof (saddr); connfd = accept (vc_ch->net.tcp.lmt_desc, (struct sockaddr *)&saddr, &len); MPIU_ERR_CHKANDJUMP2 (connfd == -1, mpi_errno, MPI_ERR_OTHER, "**sock|poll|accept", "**sock|poll|accept %d %s", errno, strerror (errno)); /* close listen fd */ do ret = close (vc_ch->net.tcp.lmt_desc); while (ret == -1 && errno == EINTR); MPIU_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**closesocket", "**closesocket %s %d", strerror (errno), errno); /* set lmt_desc to new connected fd */ vc_ch->net.tcp.lmt_desc = connfd; vc_ch->net.tcp.lmt_connected = 1; // ret = fcntl (vc_ch->net.tcp.lmt_desc, F_SETFL, O_NONBLOCK); // MPIU_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", strerror (errno), errno); } MPIDI_Datatype_get_info (req->dev.user_count, req->dev.datatype, dt_contig, data_sz, dt_ptr, dt_true_lb); if (data_sz > vc_ch->net.tcp.lmt_s_len) { data_sz = vc_ch->net.tcp.lmt_s_len; } else if (data_sz < vc_ch->net.tcp.lmt_s_len) { /* message will be truncated */ r_len = data_sz; req->status.MPI_ERROR = MPIU_ERR_SET2 (mpi_errno, MPI_ERR_TRUNCATE, "**truncate", "**truncate %d %d", vc_ch->net.tcp.lmt_s_len, r_len); } MPID_Segment_init (req->dev.user_buf, req->dev.user_count, req->dev.datatype, &req->dev.segment, 0); req->dev.segment_first = 0; req->dev.segment_size = data_sz; req->dev.iov_count = MPID_IOV_LIMIT; req->dev.iov_offset = 0; last = data_sz; do { int iov_offset; int left_to_recv; MPID_Segment_unpack_vector (&req->dev.segment, req->dev.segment_first, &last, req->dev.iov, &req->dev.iov_count); left_to_recv = last - req->dev.segment_first; iov_offset = 0; #ifdef TESTING_CHUNKING { char *buf = req->dev.iov[0].MPID_IOV_BUF; int l; while (left_to_recv) { if (left_to_recv > CHUNK) l = CHUNK; else l = left_to_recv; do nb = read (vc_ch->net.tcp.lmt_desc, buf, l); while (nb == -1 && errno == EINTR); MPIU_ERR_CHKANDJUMP (nb == -1, mpi_errno, MPI_ERR_OTHER, "**sock_writev"); left_to_recv -= nb; buf += nb; } MPIDI_CH3U_Request_complete (req); goto fn_exit; } #endif do nb = readv (vc_ch->net.tcp.lmt_desc, &req->dev.iov[iov_offset], req->dev.iov_count - iov_offset); while (nb == -1 && errno == EINTR); MPIU_ERR_CHKANDJUMP2 (nb == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", strerror (errno), errno); MPIU_ERR_CHKANDJUMP (nb == 0, mpi_errno, MPI_ERR_OTHER, "**fail"); left_to_recv -= nb; while (left_to_recv) { /* recv rest of iov */ while (nb >= req->dev.iov[iov_offset].MPID_IOV_LEN) { /* update iov to reflect sent bytes */ nb -= req->dev.iov[iov_offset].MPID_IOV_LEN; ++iov_offset; } req->dev.iov[iov_offset].MPID_IOV_BUF = (char *)req->dev.iov[iov_offset].MPID_IOV_BUF + nb; req->dev.iov[iov_offset].MPID_IOV_LEN -= nb; do nb = readv (vc_ch->net.tcp.lmt_desc, &req->dev.iov[iov_offset], req->dev.iov_count - iov_offset); while (nb == -1 && errno == EINTR); MPIU_ERR_CHKANDJUMP2 (nb == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", strerror (errno), errno); MPIU_ERR_CHKANDJUMP (nb == 0, mpi_errno, MPI_ERR_OTHER, "**fail"); left_to_recv -= nb; } } while (last < data_sz); MPIDI_CH3U_Request_complete (req); fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_START_RECV); return mpi_errno; fn_fail: goto fn_exit; }
int MPIR_Localcopy(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype, void *recvbuf, MPI_Aint recvcount, MPI_Datatype recvtype) { int mpi_errno = MPI_SUCCESS; int sendtype_iscontig, recvtype_iscontig; MPI_Aint sendsize, recvsize, sdata_sz, rdata_sz, copy_sz; MPI_Aint true_extent, sendtype_true_lb, recvtype_true_lb; MPIU_CHKLMEM_DECL(1); MPID_MPI_STATE_DECL(MPID_STATE_MPIR_LOCALCOPY); MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_LOCALCOPY); MPID_Datatype_get_size_macro(sendtype, sendsize); MPID_Datatype_get_size_macro(recvtype, recvsize); sdata_sz = sendsize * sendcount; rdata_sz = recvsize * recvcount; /* if there is no data to copy, bail out */ if (!sdata_sz || !rdata_sz) goto fn_exit; #if defined(HAVE_ERROR_CHECKING) if (sdata_sz > rdata_sz) { MPIU_ERR_SET2(mpi_errno, MPI_ERR_TRUNCATE, "**truncate", "**truncate %d %d", sdata_sz, rdata_sz); copy_sz = rdata_sz; } else #endif /* HAVE_ERROR_CHECKING */ copy_sz = sdata_sz; /* Builtin types is the common case; optimize for it */ if ((HANDLE_GET_KIND(sendtype) == HANDLE_KIND_BUILTIN) && HANDLE_GET_KIND(recvtype) == HANDLE_KIND_BUILTIN) { MPIU_Memcpy(recvbuf, sendbuf, copy_sz); goto fn_exit; } MPIR_Datatype_iscontig(sendtype, &sendtype_iscontig); MPIR_Datatype_iscontig(recvtype, &recvtype_iscontig); MPIR_Type_get_true_extent_impl(sendtype, &sendtype_true_lb, &true_extent); MPIR_Type_get_true_extent_impl(recvtype, &recvtype_true_lb, &true_extent); if (sendtype_iscontig && recvtype_iscontig) { #if defined(HAVE_ERROR_CHECKING) MPIU_ERR_CHKMEMCPYANDJUMP(mpi_errno, ((char *)recvbuf + recvtype_true_lb), ((char *)sendbuf + sendtype_true_lb), copy_sz); #endif MPIU_Memcpy(((char *) recvbuf + recvtype_true_lb), ((char *) sendbuf + sendtype_true_lb), copy_sz); } else if (sendtype_iscontig) { MPID_Segment seg; MPI_Aint last; MPID_Segment_init(recvbuf, recvcount, recvtype, &seg, 0); last = copy_sz; MPID_Segment_unpack(&seg, 0, &last, (char*)sendbuf + sendtype_true_lb); MPIU_ERR_CHKANDJUMP(last != copy_sz, mpi_errno, MPI_ERR_TYPE, "**dtypemismatch"); } else if (recvtype_iscontig) { MPID_Segment seg; MPI_Aint last; MPID_Segment_init(sendbuf, sendcount, sendtype, &seg, 0); last = copy_sz; MPID_Segment_pack(&seg, 0, &last, (char*)recvbuf + recvtype_true_lb); MPIU_ERR_CHKANDJUMP(last != copy_sz, mpi_errno, MPI_ERR_TYPE, "**dtypemismatch"); } else { char * buf; MPIDI_msg_sz_t buf_off; MPID_Segment sseg; MPIDI_msg_sz_t sfirst; MPID_Segment rseg; MPIDI_msg_sz_t rfirst; MPIU_CHKLMEM_MALLOC(buf, char *, COPY_BUFFER_SZ, mpi_errno, "buf"); MPID_Segment_init(sendbuf, sendcount, sendtype, &sseg, 0); MPID_Segment_init(recvbuf, recvcount, recvtype, &rseg, 0); sfirst = 0; rfirst = 0; buf_off = 0; while (1) { MPI_Aint last; char * buf_end; if (copy_sz - sfirst > COPY_BUFFER_SZ - buf_off) { last = sfirst + (COPY_BUFFER_SZ - buf_off); } else { last = copy_sz; } MPID_Segment_pack(&sseg, sfirst, &last, buf + buf_off); MPIU_Assert(last > sfirst); buf_end = buf + buf_off + (last - sfirst); sfirst = last; MPID_Segment_unpack(&rseg, rfirst, &last, buf); MPIU_Assert(last > rfirst); rfirst = last; if (rfirst == copy_sz) { /* successful completion */ break; } /* if the send side finished, but the recv side couldn't unpack it, there's a datatype mismatch */ MPIU_ERR_CHKANDJUMP(sfirst == copy_sz, mpi_errno, MPI_ERR_TYPE, "**dtypemismatch"); /* if not all data was unpacked, copy it to the front of the buffer for next time */ buf_off = sfirst - rfirst; if (buf_off > 0) { memmove(buf, buf_end - buf_off, buf_off); } } } fn_exit: MPIU_CHKLMEM_FREEALL(); MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_LOCALCOPY); return mpi_errno; fn_fail: goto fn_exit; }
int MPID_nem_tcp_module_lmt_start_send (MPIDI_VC_t *vc, MPID_Request *req, MPID_IOV r_cookie) { int mpi_errno = MPI_SUCCESS; int ret; MPIDI_msg_sz_t data_sz; int dt_contig; MPI_Aint dt_true_lb; MPID_Datatype * dt_ptr; MPIDI_msg_sz_t last; int nb; int s_len = 0; int r_len; int r_port; char *r_hostname; MPIDI_CH3I_VC *vc_ch = (MPIDI_CH3I_VC *)vc->channel_private; MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_START_SEND); MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_START_SEND); mpi_errno = read_r_cookie (r_cookie, &r_hostname, &r_port, &r_len); if (mpi_errno) MPIU_ERR_POP (mpi_errno); free_cookie (vc_ch->net.tcp.lmt_cookie); if (!vc_ch->net.tcp.lmt_connected) { struct sockaddr_in saddr; struct hostent *hp; vc_ch->net.tcp.lmt_desc = socket (AF_INET, SOCK_STREAM, 0); MPIU_ERR_CHKANDJUMP2 (vc_ch->net.tcp.lmt_desc == -1, mpi_errno, MPI_ERR_OTHER, "**sock_create", "**sock_create %s %d", strerror (errno), errno); // ret = fcntl (vc_ch->net.tcp.lmt_desc, F_SETFL, O_NONBLOCK); // MPIU_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", strerror (errno), errno); hp = gethostbyname (r_hostname); MPIU_ERR_CHKANDJUMP2 (hp == NULL, mpi_errno, MPI_ERR_OTHER, "**gethostbyname", "**gethostbyname %s %d", hstrerror (h_errno), h_errno); memset (&saddr, sizeof(saddr), 0); saddr.sin_family = AF_INET; saddr.sin_port = htons (r_port); MPIU_Memcpy (&saddr.sin_addr, hp->h_addr, hp->h_length); set_sockopts (vc_ch->net.tcp.lmt_desc); ret = connect (vc_ch->net.tcp.lmt_desc, (struct sockaddr *)&saddr, sizeof(saddr)); MPIU_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", strerror (errno), errno); vc_ch->net.tcp.lmt_connected = 1; } MPIDI_Datatype_get_info (req->dev.user_count, req->dev.datatype, dt_contig, data_sz, dt_ptr, dt_true_lb); if (r_len < data_sz) { /* message will be truncated */ s_len = data_sz; data_sz = r_len; req->status.MPI_ERROR = MPIU_ERR_SET2 (mpi_errno, MPI_ERR_TRUNCATE, "**truncate", "**truncate %d %d", s_len, r_len); } MPID_Segment_init (req->dev.user_buf, req->dev.user_count, req->dev.datatype, &req->dev.segment, 0); req->dev.segment_first = 0; req->dev.segment_size = data_sz; req->dev.iov_count = MPID_IOV_LIMIT; req->dev.iov_offset = 0; last = data_sz; do { int iov_offset; int left_to_send; MPID_Segment_pack_vector (&req->dev.segment, req->dev.segment_first, &last, req->dev.iov, &req->dev.iov_count); left_to_send = last - req->dev.segment_first; iov_offset = 0; #ifdef TESTING_CHUNKING { char *buf = req->dev.iov[0].MPID_IOV_BUF; int l; while (left_to_send) { if (left_to_send > CHUNK) l = CHUNK; else l = left_to_send; do nb = write (vc_ch->net.tcp.lmt_desc, buf, l); while (nb == -1 && errno == EINTR); MPIU_ERR_CHKANDJUMP (nb == -1, mpi_errno, MPI_ERR_OTHER, "**sock_writev"); left_to_send -= nb; buf += nb; } MPIDI_CH3U_Request_complete (req); goto fn_exit; } #endif do nb = writev (vc_ch->net.tcp.lmt_desc, &req->dev.iov[iov_offset], req->dev.iov_count - iov_offset); while (nb == -1 && errno == EINTR); MPIU_ERR_CHKANDJUMP (nb == -1, mpi_errno, MPI_ERR_OTHER, "**sock_writev"); left_to_send -= nb; while (left_to_send) { /* send rest of iov */ while (nb >= req->dev.iov[iov_offset].MPID_IOV_LEN) { /* update iov to reflect sent bytes */ nb -= req->dev.iov[iov_offset].MPID_IOV_LEN; ++iov_offset; } req->dev.iov[iov_offset].MPID_IOV_BUF = (char *)req->dev.iov[iov_offset].MPID_IOV_BUF + nb; req->dev.iov[iov_offset].MPID_IOV_LEN -= nb; do nb = writev (vc_ch->net.tcp.lmt_desc, &req->dev.iov[iov_offset], req->dev.iov_count - iov_offset); while (nb == -1 && errno == EINTR); MPIU_ERR_CHKANDJUMP (nb == -1, mpi_errno, MPI_ERR_OTHER, "**sock_writev"); left_to_send -= nb; } } while (last < data_sz); MPIDI_CH3U_Request_complete (req); fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_START_SEND); return mpi_errno; fn_fail: goto fn_exit; }