void MPID_nem_newmad_handle_sreq(MPID_Request *req) { int (*reqFn)(MPIDI_VC_t *, MPID_Request *, int *); #ifdef DEBUG fprintf(stdout,"========> Completing Send req %p \n",req); #endif reqFn = req->dev.OnDataAvail; if (!reqFn){ MPIDI_CH3U_Request_complete(req); MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete"); } else{ MPIDI_VC_t *vc = req->ch.vc; int complete = 0; reqFn(vc, req, &complete); if(!complete) { MPIU_Assert(complete == TRUE); } } if (REQ_FIELD(req,iov) != NULL) MPIU_Free((REQ_FIELD(req,iov))); mpid_nem_newmad_pending_send_req--; }
int MPID_nem_ib_lmt_done_recv(struct MPIDI_VC *vc, struct MPID_Request *rreq) { int mpi_errno = MPI_SUCCESS; MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_IB_LMT_DONE_RECV); MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_IB_LMT_DONE_RECV); dprintf("lmt_done_recv,enter,rreq=%p,head=%p\n", rreq, MPID_nem_ib_lmtq.head); int is_contig; MPID_Datatype_is_contig(rreq->dev.datatype, &is_contig); if (!is_contig) { dprintf("lmt_done_recv,copying noncontiguous data to user buffer\n"); /* see MPIDI_CH3U_Request_unpack_uebuf (in /src/mpid/ch3/src/ch3u_request.c) */ /* or MPIDI_CH3U_Receive_data_found (in src/mpid/ch3/src/ch3u_handle_recv_pkt.c) */ MPIDI_msg_sz_t unpack_sz = rreq->ch.lmt_data_sz; MPID_Segment seg; MPI_Aint last; MPID_Segment_init(rreq->dev.user_buf, rreq->dev.user_count, rreq->dev.datatype, &seg, 0); last = unpack_sz; MPID_Segment_unpack(&seg, 0, &last, REQ_FIELD(rreq, lmt_pack_buf)); if (last != unpack_sz) { /* --BEGIN ERROR HANDLING-- */ /* received data was not entirely consumed by unpack() * because too few bytes remained to fill the next basic * datatype */ MPIR_STATUS_SET_COUNT(rreq->status, last); rreq->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TYPE, "**MPID_nem_ib_lmt_done_recv", 0); /* --END ERROR HANDLING-- */ } //MPIU_Free(REQ_FIELD(rreq, lmt_pack_buf)); MPID_nem_ib_stfree(REQ_FIELD(rreq, lmt_pack_buf), (size_t) rreq->ch.lmt_data_sz); } dprintf("lmt_done_recv,1,req=%p,pcc=%d\n", rreq, MPIDI_CH3I_progress_completion_count.v); MPIDI_CH3U_Request_complete(rreq); dprintf("lmt_done_recv,complete,req=%p\n", rreq); dprintf("lmt_done_recv,2,pcc=%d\n", MPIDI_CH3I_progress_completion_count.v); fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_IB_LMT_DONE_RECV); return mpi_errno; //fn_fail: goto fn_exit; }
int MPID_nem_newmad_anysource_matched(MPID_Request *rreq) { /* This function is called when an anysource request in the posted receive queue is matched and dequeued */ nm_sr_request_t *nmad_request = NULL; int ret; int matched = FALSE; #ifdef DEBUG fprintf(stdout,"========> Any Source : MPID_nem_newmad_anysource_matched , req is %p\n",rreq); #endif MPID_NEM_NMAD_GET_REQ_FROM_HASH(rreq,nmad_request); if(nmad_request != NULL) { #ifdef DEBUG fprintf(stdout,"========> Any Source nmad req found :%p \n",nmad_request); #endif ret = nm_sr_rcancel(mpid_nem_newmad_session,nmad_request); if (ret != NM_ESUCCESS) { #ifdef DEBUG fprintf(stdout,"========> Any Source nmad req (%p) not cancelled \n",nmad_request); #endif size_t size; nm_tag_t match_info; MPIU_Assert(MPIDI_Request_get_type(rreq) != MPIDI_REQUEST_TYPE_GET_RESP); ret = nm_sr_rwait(mpid_nem_newmad_session,nmad_request); MPIU_Assert(ret == NM_ESUCCESS); nm_sr_request_unset_completion_queue(mpid_nem_newmad_session,nmad_request); nm_sr_get_rtag(mpid_nem_newmad_session,nmad_request,&match_info); nm_sr_get_size(mpid_nem_newmad_session,nmad_request,&size); MPID_nem_newmad_handle_rreq(rreq,match_info, size); matched = TRUE; } else { MPID_Segment_free(rreq->dev.segment_ptr); if (REQ_FIELD(rreq,iov) != NULL) MPIU_Free(REQ_FIELD(rreq,iov)); } MPIU_Free(nmad_request); } return matched; }
void ngx_http_reqstat_count(void *data, off_t offset, ngx_int_t incr) { ngx_http_reqstat_rbnode_t *node = data; (void) ngx_atomic_fetch_add(REQ_FIELD(node, offset), incr); }
int MPID_nem_mx_cancel_send(MPIDI_VC_t *vc, MPID_Request *sreq) { mx_request_t *mx_request = NULL; mx_return_t ret; uint32_t result; int mpi_errno = MPI_SUCCESS; int handled = FALSE; if (!VC_CH(vc)->is_local) { mx_request = &(REQ_FIELD(sreq,mx_request)); ret = mx_cancel(MPID_nem_mx_local_endpoint,mx_request,&result); MPIU_ERR_CHKANDJUMP1(ret != MX_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**mx_cancel", "**mx_cancel %s", mx_strerror(ret)); if (result) { sreq->status.cancelled = TRUE; sreq->cc = 0; MPIU_Object_set_ref(sreq, 1); MPID_nem_mx_pending_send_req--; } else { sreq->status.cancelled = FALSE; } handled = TRUE; } fn_exit: return handled; fn_fail: goto fn_exit; }
int MPID_nem_ib_lmt_done_send(struct MPIDI_VC *vc, struct MPID_Request *req) { int mpi_errno = MPI_SUCCESS; MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_IB_LMT_DONE_SEND); MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_IB_LMT_DONE_SEND); dprintf("lmt_done_send,enter,%d<-%d,req=%p,REQ_FIELD(req, lmt_pack_buf)=%p\n", MPID_nem_ib_myrank, vc->pg_rank, req, REQ_FIELD(req, lmt_pack_buf)); /* free memory area for cookie */ if (!req->ch.s_cookie) { dprintf("lmt_done_send,enter,req->ch.s_cookie is zero"); } MPIU_Free(req->ch.s_cookie); //dprintf("lmt_done_send,free cookie,%p\n", req->ch.s_cookie); /* free temporal buffer for eager-send non-contiguous data. * MPIDI_CH3U_Recvq_FDU_or_AEP (in mpid_isend.c) sets req->dev.datatype */ int is_contig; MPID_Datatype_is_contig(req->dev.datatype, &is_contig); if (!is_contig && REQ_FIELD(req, lmt_pack_buf)) { dprintf("lmt_done_send,lmt-get,non-contiguous,free lmt_pack_buf\n"); #if 1 /* debug, enable again later */ MPIU_Free(REQ_FIELD(req, lmt_pack_buf)); #endif } /* mark completion on sreq */ MPIU_ERR_CHKANDJUMP(req->dev.OnDataAvail, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_ib_lmt_done_send"); dprintf("lmt_done_send,1,req=%p,pcc=%d\n", req, MPIDI_CH3I_progress_completion_count.v); MPIDI_CH3U_Request_complete(req); dprintf("lmt_done_send,complete,req=%p\n", req); dprintf("lmt_done_send,2,req=%p,pcc=%d\n", req, MPIDI_CH3I_progress_completion_count.v); //dprintf("lmt_done_send, mark completion on sreq\n"); fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_IB_LMT_DONE_SEND); return mpi_errno; fn_fail: goto fn_exit; }
int MPID_nem_ib_lmt_start_recv_core(struct MPID_Request *req, void *raddr, uint32_t rkey, void *write_to_buf) { int mpi_errno = MPI_SUCCESS; int ibcom_errno; struct MPIDI_VC *vc = req->ch.vc; MPID_nem_ib_vc_area *vc_ib = VC_IB(vc); MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_IB_LMT_START_RECV_CORE); MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_IB_LMT_START_RECV_CORE); ibcom_errno = MPID_nem_ib_com_lrecv(vc_ib->sc->fd, (uint64_t) req, raddr, req->ch.lmt_data_sz, rkey, write_to_buf); MPID_nem_ib_ncqe += 1; //dprintf("start_recv,ncqe=%d\n", MPID_nem_ib_ncqe); MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_ib_com_lrecv"); dprintf("lmt_start_recv_core,MPID_nem_ib_ncqe=%d\n", MPID_nem_ib_ncqe); dprintf ("lmt_start_recv_core,req=%p,sz=%ld,write_to_buf=%p,lmt_pack_buf=%p,user_buf=%p,raddr=%p,rkey=%08x,tail=%p=%02x\n", req, req->ch.lmt_data_sz, write_to_buf, REQ_FIELD(req, lmt_pack_buf), req->dev.user_buf, raddr, rkey, write_to_buf + req->ch.lmt_data_sz - sizeof(uint8_t), *((uint8_t *) (write_to_buf + req->ch.lmt_data_sz - sizeof(uint8_t)))); #ifdef MPID_NEM_IB_LMT_GET_CQE MPID_nem_ib_ncqe_to_drain += 1; /* use CQE instead of polling */ #else /* drain_scq and ib_poll is not ordered, so both can decrement ref_count */ MPIR_Request_add_ref(req); /* register to poll list in ib_poll() */ /* don't use req->dev.next because it causes unknown problem */ MPID_nem_ib_lmtq_enqueue(&MPID_nem_ib_lmtq, req); dprintf("lmt_start_recv_core,lmtq enqueue\n"); //volatile uint8_t* tailmagic = (uint8_t*)((void*)req->dev.user_buf + req->ch.lmt_data_sz - sizeof(uint8_t)); //dprintf("start_recv_core,cur_tail=%02x,lmt_receiver_tail=%02x\n", *tailmagic, REQ_FIELD(req, lmt_receiver_tail)); #endif fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_IB_LMT_START_RECV_CORE); return mpi_errno; fn_fail: goto fn_exit; }
int MPID_nem_mx_cancel_recv(MPIDI_VC_t *vc, MPID_Request *rreq) { mx_request_t *mx_request = NULL; mx_return_t ret; uint32_t result; int mpi_errno = MPI_SUCCESS; int handled = FALSE; mx_request = &(REQ_FIELD(rreq,mx_request)); /* FIXME this test is probably not correct with multiple netmods */ /* We need to know to which netmod a recv request actually "belongs" to */ if(mx_request != NULL) { ret = mx_cancel(MPID_nem_mx_local_endpoint,mx_request,&result); MPIU_ERR_CHKANDJUMP1(ret != MX_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**mx_cancel", "**mx_cancel %s", mx_strerror(ret)); if (result) { int found; rreq->status.cancelled = TRUE; found = MPIDI_CH3U_Recvq_DP(rreq); MPIU_Assert(found); rreq->status.count = 0; MPID_REQUEST_SET_COMPLETED(rreq); MPID_Request_release(rreq); } else { rreq->status.cancelled = FALSE; MPIU_DBG_MSG_P(CH3_OTHER,VERBOSE, "request 0x%08x already matched, unable to cancel", rreq->handle); } handled = TRUE; } fn_exit: return mpi_errno; fn_fail: goto fn_exit; }
int MPID_nem_newmad_cancel_recv(MPIDI_VC_t *vc, MPID_Request *rreq) { nm_sr_request_t *nmad_req = NULL; int mpi_errno = MPI_SUCCESS; int ret; nmad_req = &(REQ_FIELD(rreq,newmad_req)); ret = nm_sr_rcancel(mpid_nem_newmad_session,nmad_req); if (ret == NM_ESUCCESS) { rreq->status.cancelled = TRUE; } else { rreq->status.cancelled = FALSE; } fn_exit: return mpi_errno; fn_fail: ATTRIBUTE((unused)) goto fn_exit; }
void MPID_nem_newmad_anysource_posted(MPID_Request *rreq) { /* This function is called whenever an anyource request has been posted to the posted receive queue. */ MPIR_Context_id_t context; Nmad_Nem_tag_t tag; nm_tag_t match_info = 0; nm_tag_t match_mask = NEM_NMAD_MATCH_FULL_MASK; nm_sr_request_t *newmad_req = MPIU_Malloc(sizeof(nm_sr_request_t)); int num_seg = 1; int ret; MPIDI_msg_sz_t data_sz; int dt_contig; MPI_Aint dt_true_lb; MPID_Datatype *dt_ptr; struct iovec *newmad_iov = (struct iovec *)MPIU_Malloc(NMAD_IOV_MAX_DEPTH*sizeof(struct iovec)); tag = rreq->dev.match.parts.tag; context = rreq->dev.match.parts.context_id; NEM_NMAD_DIRECT_MATCH(match_info,0,0,context); if (tag != MPI_ANY_TAG) { NEM_NMAD_SET_TAG(match_info,tag); } else { NEM_NMAD_SET_ANYTAG(match_info); NEM_NMAD_SET_ANYTAG(match_mask); } NEM_NMAD_SET_ANYSRC(match_info); NEM_NMAD_SET_ANYSRC(match_mask); #ifdef DEBUG fprintf(stdout,"========> Any Source : Posting Recv req %p (nmad req is %p) (match is %lx) (mask is %lx) \n", rreq,newmad_req,match_info,match_mask); #endif MPIDI_Datatype_get_info(rreq->dev.user_count,rreq->dev.datatype, dt_contig, data_sz, dt_ptr,dt_true_lb); rreq->dev.OnDataAvail = NULL; if (dt_contig) { newmad_iov[0].iov_base = (char*)(rreq->dev.user_buf) + dt_true_lb; newmad_iov[0].iov_len = data_sz; } else { struct iovec *newmad_iov_ptr = &(newmad_iov[0]); MPID_nem_newmad_process_rdtype(&rreq,dt_ptr,data_sz,&newmad_iov_ptr,&num_seg); } ret = nm_sr_irecv_iov_with_ref_tagged(mpid_nem_newmad_session,NM_ANY_GATE,match_info,match_mask, newmad_iov,num_seg,newmad_req,(void*)rreq); REQ_FIELD(rreq,iov) = newmad_iov; MPID_MEM_NMAD_ADD_REQ_IN_HASH(rreq,newmad_req); /* #ifdef DEBUG fprintf(stdout,"========> Any Source : callback end \n"); #endif */ }
static int MPID_nem_newmad_handle_rreq(MPID_Request *req, nm_tag_t match_info, size_t size) { int mpi_errno = MPI_SUCCESS; int complete = FALSE; int dt_contig; MPI_Aint dt_true_lb; MPIDI_msg_sz_t userbuf_sz; MPID_Datatype *dt_ptr; MPIDI_msg_sz_t data_sz; MPIDI_VC_t *vc = NULL; #ifdef DEBUG fprintf(stdout,"========> Completing Recv req %p (match is %lx) \n",req,match_info); #endif NEM_NMAD_MATCH_GET_RANK(match_info,req->status.MPI_SOURCE); NEM_NMAD_MATCH_GET_TAG(match_info,req->status.MPI_TAG); req->status.count = size; req->dev.recv_data_sz = size; MPIDI_Datatype_get_info(req->dev.user_count, req->dev.datatype, dt_contig, userbuf_sz, dt_ptr, dt_true_lb); if (size <= userbuf_sz) { data_sz = req->dev.recv_data_sz; } else { MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST, "receive buffer too small; message truncated, msg_sz=" MPIDI_MSG_SZ_FMT ", userbuf_sz=" MPIDI_MSG_SZ_FMT, req->dev.recv_data_sz, userbuf_sz)); req->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TRUNCATE, "**truncate", "**truncate %d %d %d %d", req->status.MPI_SOURCE, req->status.MPI_TAG, req->dev.recv_data_sz, userbuf_sz ); req->status.count = userbuf_sz; data_sz = userbuf_sz; } if ((!dt_contig)&&(req->dev.tmpbuf != NULL)) { MPIDI_msg_sz_t last; last = req->dev.recv_data_sz; MPID_Segment_unpack( req->dev.segment_ptr, 0, &last, req->dev.tmpbuf); MPIU_Free(req->dev.tmpbuf); if (last != data_sz) { req->status.count = (int)last; if (req->dev.recv_data_sz <= userbuf_sz) { MPIU_ERR_SETSIMPLE(req->status.MPI_ERROR,MPI_ERR_TYPE,"**dtypemismatch"); } } } if (REQ_FIELD(req,iov) != NULL) MPIU_Free(REQ_FIELD(req,iov)); MPIDI_Comm_get_vc_set_active(req->comm, req->status.MPI_SOURCE, &vc); MPIDI_CH3U_Handle_recv_req(vc, req, &complete); MPIU_Assert(complete == TRUE); #ifdef DEBUG fprintf(stdout,"========> Completing Recv req %p done \n",req); #endif fn_exit: return mpi_errno; fn_fail: ATTRIBUTE((unused)) goto fn_exit; }
int MPID_nem_ib_lmt_switch_send(struct MPIDI_VC *vc, struct MPID_Request *req) { int mpi_errno = MPI_SUCCESS; int dt_contig; MPIDI_msg_sz_t data_sz; MPID_Datatype *dt_ptr; MPI_Aint dt_true_lb; MPID_IOV r_cookie = req->ch.lmt_tmp_cookie; MPID_nem_ib_lmt_cookie_t *r_cookie_buf = r_cookie.iov_base; MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_IB_LMT_SWITCH_SEND); MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_IB_LMT_SWITCH_SEND); MPIDI_Datatype_get_info(req->dev.user_count, req->dev.datatype, dt_contig, data_sz, dt_ptr, dt_true_lb); void *write_from_buf; if (dt_contig) { write_from_buf = req->dev.user_buf; } else { /* see MPIDI_CH3_EagerNoncontigSend (in ch3u_eager.c) */ req->dev.segment_ptr = MPID_Segment_alloc(); MPIU_ERR_CHKANDJUMP((req->dev.segment_ptr == NULL), mpi_errno, MPI_ERR_OTHER, "**outofmemory"); MPID_Segment_init(req->dev.user_buf, req->dev.user_count, req->dev.datatype, req->dev.segment_ptr, 0); req->dev.segment_first = 0; req->dev.segment_size = data_sz; MPIDI_msg_sz_t last; last = req->dev.segment_size; /* segment_size is byte offset */ MPIU_Assert(last > 0); REQ_FIELD(req, lmt_pack_buf) = MPIU_Malloc(data_sz); MPIU_ERR_CHKANDJUMP(!REQ_FIELD(req, lmt_pack_buf), mpi_errno, MPI_ERR_OTHER, "**outofmemory"); MPID_Segment_pack(req->dev.segment_ptr, req->dev.segment_first, &last, (char *) (REQ_FIELD(req, lmt_pack_buf))); MPIU_Assert(last == req->dev.segment_size); write_from_buf = REQ_FIELD(req, lmt_pack_buf); } //assert(dt_true_lb == 0); uint8_t *tailp = (uint8_t *) ((uint8_t *) write_from_buf /*+ dt_true_lb */ + data_sz - sizeof(uint8_t)); #if 0 *is_end_flag_same = (r_cookie_buf->tail == *tailp) ? 1 : 0; #else REQ_FIELD(req, lmt_receiver_tail) = r_cookie_buf->tail; REQ_FIELD(req, lmt_sender_tail) = *tailp; dprintf("lmt_switch_send,tail on sender=%02x,tail onreceiver=%02x,req=%p\n", *tailp, r_cookie_buf->tail, req); #ifdef MPID_NEM_IB_DEBUG_LMT uint8_t *tail_wordp = (uint8_t *) ((uint8_t *) write_from_buf + data_sz - sizeof(uint32_t) * 2); #endif dprintf("lmt_switch_send,tail on sender=%d\n", *tail_wordp); fflush(stdout); #endif fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_IB_LMT_SWITCH_SEND); return mpi_errno; fn_fail: goto fn_exit; }
int MPID_nem_ib_lmt_initiate_lmt(struct MPIDI_VC *vc, union MPIDI_CH3_Pkt *rts_pkt, struct MPID_Request *req) { int mpi_errno = MPI_SUCCESS; int dt_contig; MPIDI_msg_sz_t data_sz; MPID_Datatype *dt_ptr; MPI_Aint dt_true_lb; #if 0 MPID_nem_ib_vc_area *vc_ib = VC_IB(vc); #endif MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_IB_LMT_INITIATE_LMT); MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_IB_LMT_INITIATE_LMT); dprintf("lmt_initiate_lmt,enter,%d->%d,req=%p\n", MPID_nem_ib_myrank, vc->pg_rank, req); /* obtain dt_true_lb */ /* see MPIDI_Datatype_get_info(in, in, out, out, out, out) (in src/mpid/ch3/include/mpidimpl.h) */ MPIDI_Datatype_get_info(req->dev.user_count, req->dev.datatype, dt_contig, data_sz, dt_ptr, dt_true_lb); /* FIXME: who frees s_cookie_buf? */ /* malloc memory area for cookie. auto variable is NG because isend does not copy payload */ MPID_nem_ib_lmt_cookie_t *s_cookie_buf = (MPID_nem_ib_lmt_cookie_t *) MPIU_Malloc(sizeof(MPID_nem_ib_lmt_cookie_t)); /* remember address to "free" when receiving DONE from receiver */ req->ch.s_cookie = s_cookie_buf; /* see MPIDI_CH3_PktHandler_RndvClrToSend (in src/mpid/ch3/src/ch3u_rndv.c) */ //assert(dt_true_lb == 0); void *write_from_buf; if (dt_contig) { write_from_buf = (void *) ((char *) req->dev.user_buf + dt_true_lb); } else { /* see MPIDI_CH3_EagerNoncontigSend (in ch3u_eager.c) */ req->dev.segment_ptr = MPID_Segment_alloc(); MPIU_ERR_CHKANDJUMP((req->dev.segment_ptr == NULL), mpi_errno, MPI_ERR_OTHER, "**outofmemory"); MPID_Segment_init(req->dev.user_buf, req->dev.user_count, req->dev.datatype, req->dev.segment_ptr, 0); req->dev.segment_first = 0; req->dev.segment_size = data_sz; MPIDI_msg_sz_t last; last = req->dev.segment_size; /* segment_size is byte offset */ MPIU_Assert(last > 0); REQ_FIELD(req, lmt_pack_buf) = MPIU_Malloc((size_t) req->dev.segment_size); MPIU_ERR_CHKANDJUMP(!REQ_FIELD(req, lmt_pack_buf), mpi_errno, MPI_ERR_OTHER, "**outofmemory"); MPID_Segment_pack(req->dev.segment_ptr, req->dev.segment_first, &last, (char *) (REQ_FIELD(req, lmt_pack_buf))); MPIU_Assert(last == req->dev.segment_size); write_from_buf = REQ_FIELD(req, lmt_pack_buf); } dprintf ("lmt_initate_lmt,dt_contig=%d,write_from_buf=%p,req->dev.user_buf=%p,REQ_FIELD(req, lmt_pack_buf)=%p\n", dt_contig, write_from_buf, req->dev.user_buf, REQ_FIELD(req, lmt_pack_buf)); #ifdef HAVE_LIBDCFA #else s_cookie_buf->addr = write_from_buf; #endif /* put sz, see MPID_nem_lmt_RndvSend (in src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt.c) */ /* TODO remove sz field * pkt_RTS_handler (in src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt.c) * rreq->ch.lmt_data_sz = rts_pkt->data_sz; */ //s_cookie_buf->sz = (uint32_t)((MPID_nem_pkt_lmt_rts_t*)rts_pkt)->data_sz; /* preserve and put tail, because tail magic is written on the tail of payload * because we don't want to add another SGE or RDMA command */ MPIU_Assert(((MPID_nem_pkt_lmt_rts_t *) rts_pkt)->data_sz == data_sz); s_cookie_buf->tail = *((uint8_t *) ((uint8_t *) write_from_buf + data_sz - sizeof(uint8_t))); /* prepare magic */ //*((uint32_t*)(write_from_buf + data_sz - sizeof(tailmagic_t))) = MPID_NEM_IB_COM_MAGIC; #if 0 /* moving to packet header */ /* embed RDMA-write-to buffer occupancy information */ dprintf("lmt_initiate_lmt,rsr_seq_num_tail=%d\n", vc_ib->ibcom->rsr_seq_num_tail); /* embed RDMA-write-to buffer occupancy information */ s_cookie_buf->seq_num_tail = vc_ib->ibcom->rsr_seq_num_tail; /* remember the last one sent */ vc_ib->ibcom->rsr_seq_num_tail_last_sent = vc_ib->ibcom->rsr_seq_num_tail; #endif int post_num; uint32_t max_msg_sz; MPID_nem_ib_vc_area *vc_ib = VC_IB(vc); MPID_nem_ib_com_get_info_conn(vc_ib->sc->fd, MPID_NEM_IB_COM_INFOKEY_PATTR_MAX_MSG_SZ, &max_msg_sz, sizeof(uint32_t)); /* Type of max_msg_sz is uint32_t. */ post_num = (data_sz + (long) max_msg_sz - 1) / (long) max_msg_sz; s_cookie_buf->max_msg_sz = max_msg_sz; s_cookie_buf->seg_seq_num = 1; s_cookie_buf->seg_num = post_num; REQ_FIELD(req, buf.from) = write_from_buf; REQ_FIELD(req, data_sz) = data_sz; REQ_FIELD(req, seg_seq_num) = 1; // only send 1st-segment, even if there are some segments. REQ_FIELD(req, seg_num) = post_num; REQ_FIELD(req, max_msg_sz) = max_msg_sz; long length; if (post_num > 1) { length = max_msg_sz; } else { length = data_sz; } /* put IB rkey */ struct MPID_nem_ib_com_reg_mr_cache_entry_t *mr_cache = MPID_nem_ib_com_reg_mr_fetch(write_from_buf, length, 0, MPID_NEM_IB_COM_REG_MR_GLOBAL); MPIU_ERR_CHKANDJUMP(!mr_cache, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_ib_com_reg_mr_fetch"); struct ibv_mr *mr = mr_cache->mr; REQ_FIELD(req, lmt_mr_cache) = (void *) mr_cache; #ifdef HAVE_LIBDCFA s_cookie_buf->addr = (void *) mr->host_addr; dprintf("lmt_initiate_lmt,s_cookie_buf->addr=%p\n", s_cookie_buf->addr); #endif s_cookie_buf->rkey = mr->rkey; dprintf("lmt_initiate_lmt,tail=%02x,mem-tail=%p,%02x,sz=%ld,raddr=%p,rkey=%08x\n", s_cookie_buf->tail, write_from_buf + data_sz - sizeof(uint8_t), *((uint8_t *) (write_from_buf + data_sz - sizeof(uint8_t))), data_sz, s_cookie_buf->addr, s_cookie_buf->rkey); /* send cookie. rts_pkt as the MPI-header, s_cookie_buf as the payload */ MPID_nem_lmt_send_RTS(vc, (MPID_nem_pkt_lmt_rts_t *) rts_pkt, s_cookie_buf, sizeof(MPID_nem_ib_lmt_cookie_t)); fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_IB_LMT_INITIATE_LMT); return mpi_errno; fn_fail: goto fn_exit; }
int MPID_nem_ib_lmt_start_recv_core(struct MPID_Request *req, void *raddr, uint32_t rkey, long len, void *write_to_buf, uint32_t max_msg_sz, int end) { int mpi_errno = MPI_SUCCESS; int ibcom_errno; struct MPIDI_VC *vc = req->ch.vc; MPID_nem_ib_vc_area *vc_ib = VC_IB(vc); int i; int divide; int posted_num; int last; uint32_t r_max_msg_sz; /* responder's max_msg_sz */ void *write_pos; void *addr; long data_sz; MPIDI_msg_sz_t rest_data_sz; MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_IB_LMT_START_RECV_CORE); MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_IB_LMT_START_RECV_CORE); MPID_nem_ib_com_get_info_conn(vc_ib->sc->fd, MPID_NEM_IB_COM_INFOKEY_PATTR_MAX_MSG_SZ, &r_max_msg_sz, sizeof(uint32_t)); divide = (max_msg_sz + r_max_msg_sz - 1) / r_max_msg_sz; write_pos = write_to_buf; posted_num = 0; last = MPID_NEM_IB_LMT_PART_OF_SEGMENT; rest_data_sz = len; addr = raddr; for (i = 0; i < divide; i++) { if (i == divide - 1) data_sz = max_msg_sz - i * r_max_msg_sz; else data_sz = r_max_msg_sz; if (i == divide - 1) { if (end) last = MPID_NEM_IB_LMT_LAST_PKT; /* last part of last segment packet */ else last = MPID_NEM_IB_LMT_SEGMENT_LAST; /* last part of this segment */ /* last data may be smaller than initiator's max_msg_sz */ if (rest_data_sz < max_msg_sz) data_sz = rest_data_sz; } ibcom_errno = MPID_nem_ib_com_lrecv(vc_ib->sc->fd, (uint64_t) req, addr, data_sz, rkey, write_pos, last); MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_ib_com_lrecv"); /* update position */ write_pos = (void *) ((char *) write_pos + data_sz); addr = (void *) ((char *) addr + data_sz); /* update rest data size */ rest_data_sz -= data_sz; /* count request number */ posted_num++; } MPIU_Assert(rest_data_sz == 0); MPID_nem_ib_ncqe += posted_num; //dprintf("start_recv,ncqe=%d\n", MPID_nem_ib_ncqe); dprintf("lmt_start_recv_core,MPID_nem_ib_ncqe=%d\n", MPID_nem_ib_ncqe); dprintf ("lmt_start_recv_core,req=%p,sz=%ld,write_to_buf=%p,lmt_pack_buf=%p,user_buf=%p,raddr=%p,rkey=%08x,tail=%p=%02x\n", req, req->ch.lmt_data_sz, write_to_buf, REQ_FIELD(req, lmt_pack_buf), req->dev.user_buf, raddr, rkey, write_to_buf + req->ch.lmt_data_sz - sizeof(uint8_t), *((uint8_t *) (write_to_buf + req->ch.lmt_data_sz - sizeof(uint8_t)))); //fflush(stdout); #ifdef MPID_NEM_IB_LMT_GET_CQE MPID_nem_ib_ncqe_to_drain += posted_num; /* use CQE instead of polling */ #else /* drain_scq and ib_poll is not ordered, so both can decrement ref_count */ MPIR_Request_add_ref(req); /* register to poll list in ib_poll() */ /* don't use req->dev.next because it causes unknown problem */ MPID_nem_ib_lmtq_enqueue(&MPID_nem_ib_lmtq, req); dprintf("lmt_start_recv_core,lmtq enqueue\n"); //volatile uint8_t* tailmagic = (uint8_t*)((void*)req->dev.user_buf + req->ch.lmt_data_sz - sizeof(uint8_t)); //dprintf("start_recv_core,cur_tail=%02x,lmt_receiver_tail=%02x\n", *tailmagic, REQ_FIELD(req, lmt_receiver_tail)); #endif fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_IB_LMT_START_RECV_CORE); return mpi_errno; fn_fail: goto fn_exit; }
static ngx_int_t ngx_http_reqstat_show_handler(ngx_http_request_t *r) { ngx_int_t rc; ngx_buf_t *b; ngx_uint_t i, j; ngx_array_t *display; ngx_chain_t *tl, *free, *busy; ngx_queue_t *q; ngx_shm_zone_t **shm_zone; ngx_http_reqstat_ctx_t *ctx; ngx_http_reqstat_conf_t *slcf; ngx_http_reqstat_conf_t *smcf; ngx_http_reqstat_rbnode_t *node; slcf = ngx_http_get_module_loc_conf(r, ngx_http_reqstat_module); smcf = ngx_http_get_module_main_conf(r, ngx_http_reqstat_module); display = slcf->display == NULL ? smcf->monitor : slcf->display; if (display == NULL) { r->headers_out.status = NGX_HTTP_NO_CONTENT; return ngx_http_send_header(r); } r->headers_out.status = NGX_HTTP_OK; ngx_http_clear_content_length(r); rc = ngx_http_send_header(r); if (rc == NGX_ERROR || rc > NGX_OK || r->header_only) { return rc; } shm_zone = display->elts; for (free = busy = NULL, i = 0; i < display->nelts; i++) { ctx = shm_zone[i]->data; for (q = ngx_queue_head(&ctx->sh->queue); q != ngx_queue_sentinel(&ctx->sh->queue); q = ngx_queue_next(q)) { node = ngx_queue_data(q, ngx_http_reqstat_rbnode_t, queue); tl = ngx_chain_get_free_buf(r->pool, &free); if (tl == NULL) { return NGX_HTTP_INTERNAL_SERVER_ERROR; } b = tl->buf; if (b->start == NULL) { b->start = ngx_pcalloc(r->pool, 512); if (b->start == NULL) { return NGX_HTTP_INTERNAL_SERVER_ERROR; } b->end = b->start + 512; } b->last = b->pos = b->start; b->memory = 1; b->temporary = 1; b->last = ngx_slprintf(b->last, b->end, "%*s,", (size_t) node->len, node->data); for (j = 0; j < sizeof(ngx_http_reqstat_fields) / sizeof(off_t); j++) { b->last = ngx_slprintf(b->last, b->end, "%uA,", *REQ_FIELD(node, ngx_http_reqstat_fields[j])); } *(b->last - 1) = '\n'; if (ngx_http_output_filter(r, tl) == NGX_ERROR) { return NGX_HTTP_INTERNAL_SERVER_ERROR; } #if nginx_version >= 1002000 ngx_chain_update_chains(r->pool, &free, &busy, &tl, (ngx_buf_tag_t) &ngx_http_reqstat_module); #else ngx_chain_update_chains(&free, &busy, &tl, (ngx_buf_tag_t) &ngx_http_reqstat_module); #endif } } tl = ngx_chain_get_free_buf(r->pool, &free); if (tl == NULL) { return NGX_HTTP_INTERNAL_SERVER_ERROR; } b = tl->buf; b->last_buf = 1; return ngx_http_output_filter(r, tl); }
int MPID_nem_newmad_directRecv(MPIDI_VC_t *vc, MPID_Request *rreq) { int mpi_errno = MPI_SUCCESS; MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_NEWMAD_DIRECTRECV); MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_NEWMAD_DIRECTRECV); if (!VC_CH(vc)->is_local) { nm_tag_t match_info = 0; nm_tag_t match_mask = NEM_NMAD_MATCH_FULL_MASK; MPIR_Rank_t source = rreq->dev.match.parts.rank; MPIR_Context_id_t context = rreq->dev.match.parts.context_id; Nmad_Nem_tag_t tag = rreq->dev.match.parts.tag; int ret; MPIDI_msg_sz_t data_sz; int dt_contig; MPI_Aint dt_true_lb; MPID_Datatype *dt_ptr; NEM_NMAD_DIRECT_MATCH(match_info,0,source,context); if (tag != MPI_ANY_TAG) { NEM_NMAD_SET_TAG(match_info,tag); } else { NEM_NMAD_SET_ANYTAG(match_info); NEM_NMAD_SET_ANYTAG(match_mask); } #ifdef DEBUG fprintf(stdout,"========> Posting Recv req %p (match is %lx) \n",rreq,match_info); #endif MPIDI_Datatype_get_info(rreq->dev.user_count,rreq->dev.datatype, dt_contig, data_sz, dt_ptr,dt_true_lb); rreq->dev.OnDataAvail = NULL; if (dt_contig) { ret = nm_sr_irecv_with_ref_tagged(mpid_nem_newmad_session,VC_FIELD(vc,p_gate),match_info,match_mask, (char*)(rreq->dev.user_buf) + dt_true_lb,data_sz, &(REQ_FIELD(rreq,newmad_req)),(void*)rreq); REQ_FIELD(rreq,iov) = NULL; } else { int num_seg = 0; struct iovec *newmad_iov = (struct iovec *)MPIU_Malloc(NMAD_IOV_MAX_DEPTH*sizeof(struct iovec)); struct iovec *newmad_iov_ptr = &(newmad_iov[0]); MPID_nem_newmad_process_rdtype(&rreq,dt_ptr,data_sz,&newmad_iov_ptr,&num_seg); MPIU_Assert(num_seg <= NMAD_IOV_MAX_DEPTH); #ifdef DEBUG { int index; for(index = 0; index < num_seg ; index++) { fprintf(stdout,"======================\n"); fprintf(stdout,"RECV nmad_iov[%i]: [base %p][len %i]\n",index, newmad_iov[index].iov_base,newmad_iov[index].iov_len); } } #endif ret = nm_sr_irecv_iov_with_ref_tagged(mpid_nem_newmad_session,VC_FIELD(vc,p_gate),match_info,match_mask, newmad_iov,num_seg,&(REQ_FIELD(rreq,newmad_req)),(void*)rreq); REQ_FIELD(rreq,iov) = newmad_iov; } } else { /* Fixme : this might not work in the case of multiple netmods */ memset((&(REQ_FIELD(rreq,newmad_req))),0,sizeof(nm_sr_request_t)); } fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_NEWMAD_DIRECTRECV); return mpi_errno; fn_fail: ATTRIBUTE((unused)) goto fn_exit; }
int MPID_nem_ib_lmt_start_recv(struct MPIDI_VC *vc, struct MPID_Request *req, MPID_IOV s_cookie) { int mpi_errno = MPI_SUCCESS; int dt_contig; MPIDI_msg_sz_t data_sz; MPID_Datatype *dt_ptr; MPI_Aint dt_true_lb; MPID_nem_ib_vc_area *vc_ib = VC_IB(vc); MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_IB_LMT_START_RECV); MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_IB_LMT_START_RECV); dprintf("lmt_start_recv,enter,%d<-%d,req=%p\n", MPID_nem_ib_myrank, vc->pg_rank, req); /* obtain dt_true_lb */ /* see MPIDI_Datatype_get_info(in, in, out, out, out, out) (in src/mpid/ch3/include/mpidimpl.h) */ MPIDI_Datatype_get_info(req->dev.user_count, req->dev.datatype, dt_contig, data_sz, dt_ptr, dt_true_lb); MPID_nem_ib_lmt_cookie_t *s_cookie_buf = s_cookie.iov_base; /* stash vc for ib_poll */ req->ch.vc = vc; void *write_to_buf; if (dt_contig) { write_to_buf = (void *) ((char *) req->dev.user_buf + dt_true_lb); } else { //REQ_FIELD(req, lmt_pack_buf) = MPIU_Malloc((size_t)req->ch.lmt_data_sz); REQ_FIELD(req, lmt_pack_buf) = MPID_nem_ib_stmalloc((size_t) req->ch.lmt_data_sz); MPIU_ERR_CHKANDJUMP(!REQ_FIELD(req, lmt_pack_buf), mpi_errno, MPI_ERR_OTHER, "**outofmemory"); write_to_buf = REQ_FIELD(req, lmt_pack_buf); } REQ_FIELD(req, buf.to) = write_to_buf; #ifdef MPID_NEM_IB_LMT_GET_CQE #else /* unmark magic */ *((uint8_t *) (write_to_buf + req->ch.lmt_data_sz - sizeof(uint8_t))) = ~s_cookie_buf->tail; /* size in cookie was not set */ #endif dprintf ("lmt_start_recv,dt_contig=%d,write_to_buf=%p,req->dev.user_buf=%p,REQ_FIELD(req, lmt_pack_buf)=%p,marked-tail=%02x,unmarked-tail=%02x\n", dt_contig, write_to_buf, req->dev.user_buf, REQ_FIELD(req, lmt_pack_buf), s_cookie_buf->tail, *((uint8_t *) (write_to_buf + req->ch.lmt_data_sz - sizeof(uint8_t)))); /* stash tail for poll because do_cts in mpid_nem_lmt.c free s_cookie_buf just after this function */ REQ_FIELD(req, lmt_tail) = s_cookie_buf->tail; dprintf("lmt_start_recv,mem-tail=%p,%02x\n", write_to_buf + req->ch.lmt_data_sz - sizeof(uint8_t), *((uint8_t *) (write_to_buf + req->ch.lmt_data_sz - sizeof(uint8_t)))); //dprintf("lmt_start_recv,sendq_empty=%d,ncom=%d,ncqe=%d\n", MPID_nem_ib_sendq_empty(vc_ib->sendq), vc_ib->ibcom->ncom < MPID_NEM_IB_COM_MAX_SQ_CAPACITY, MPID_nem_ib_ncqe < MPID_NEM_IB_COM_MAX_CQ_CAPACITY); int last = 1; long length = req->ch.lmt_data_sz; if (s_cookie_buf->seg_seq_num != s_cookie_buf->seg_num) { last = 0; length = s_cookie_buf->max_msg_sz; } REQ_FIELD(req, max_msg_sz) = s_cookie_buf->max_msg_sz; /* store initiator's max_msg_sz */ REQ_FIELD(req, seg_num) = s_cookie_buf->seg_num; /* store number of segments */ /* try to issue RDMA-read command */ int slack = 1; /* slack for control packet bringing sequence number */ if (MPID_nem_ib_sendq_empty(vc_ib->sendq) && vc_ib->ibcom->ncom < MPID_NEM_IB_COM_MAX_SQ_CAPACITY - slack && MPID_nem_ib_ncqe < MPID_NEM_IB_COM_MAX_CQ_CAPACITY - slack) { mpi_errno = MPID_nem_ib_lmt_start_recv_core(req, s_cookie_buf->addr, s_cookie_buf->rkey, length, write_to_buf, s_cookie_buf->max_msg_sz, last); if (mpi_errno) { MPIU_ERR_POP(mpi_errno); } } else { /* enqueue command into send_queue */ dprintf("lmt_start_recv, enqueuing,sendq_empty=%d,ncom=%d,ncqe=%d\n", MPID_nem_ib_sendq_empty(vc_ib->sendq), vc_ib->ibcom->ncom < MPID_NEM_IB_COM_MAX_SQ_CAPACITY, MPID_nem_ib_ncqe < MPID_NEM_IB_COM_MAX_CQ_CAPACITY); /* make raddr, (sz is in rreq->ch.lmt_data_sz), rkey, (user_buf is in req->dev.user_buf) survive enqueue, free cookie, dequeue */ REQ_FIELD(req, lmt_raddr) = s_cookie_buf->addr; REQ_FIELD(req, lmt_rkey) = s_cookie_buf->rkey; REQ_FIELD(req, lmt_write_to_buf) = write_to_buf; REQ_FIELD(req, lmt_szsend) = length; REQ_FIELD(req, last) = last; MPID_nem_ib_sendq_enqueue(&vc_ib->sendq, req); } #if 0 /* moving to packet header */ /* extract embeded RDMA-write-to buffer occupancy information */ dprintf("lmt_start_recv,old lsr_seq_num=%d,s_cookie_buf->seq_num_tail=%d\n", vc_ib->ibcom->lsr_seq_num_tail, s_cookie_buf->seq_num_tail); vc_ib->ibcom->lsr_seq_num_tail = s_cookie_buf->seq_num_tail; //dprintf("lmt_start_recv,new lsr_seq_num=%d\n", vc_ib->ibcom->lsr_seq_num_tail); #endif #ifndef MPID_NEM_IB_DISABLE_VAR_OCC_NOTIFY_RATE /* change remote notification policy of RDMA-write-to buf */ //dprintf("lmt_start_recv,reply_seq_num,old rstate=%d\n", vc_ib->ibcom->rdmabuf_occupancy_notify_rstate); MPID_nem_ib_change_rdmabuf_occupancy_notify_policy_lw(vc_ib, &vc_ib->ibcom->lsr_seq_num_tail); //dprintf("lmt_start_recv,reply_seq_num,new rstate=%d\n", vc_ib->ibcom->rdmabuf_occupancy_notify_rstate); #endif //dprintf("lmt_start_recv,reply_seq_num,sendq_empty=%d,ncom=%d,ncqe=%d,rdmabuf_occ=%d\n", MPID_nem_ib_sendq_empty(vc_ib->sendq), vc_ib->ibcom->ncom, MPID_nem_ib_ncqe, MPID_nem_ib_diff16(vc_ib->ibcom->sseq_num, vc_ib->ibcom->lsr_seq_num_tail)); /* try to send from sendq because at least one RDMA-write-to buffer has been released */ //dprintf("lmt_start_recv,reply_seq_num,send_progress\n"); if (!MPID_nem_ib_sendq_empty(vc_ib->sendq)) { dprintf("lmt_start_recv,ncom=%d,ncqe=%d,diff=%d\n", vc_ib->ibcom->ncom < MPID_NEM_IB_COM_MAX_SQ_CAPACITY, MPID_nem_ib_ncqe < MPID_NEM_IB_COM_MAX_CQ_CAPACITY, MPID_nem_ib_diff16(vc_ib->ibcom->sseq_num, vc_ib->ibcom->lsr_seq_num_tail) < MPID_NEM_IB_COM_RDMABUF_NSEG); } if (!MPID_nem_ib_sendq_empty(vc_ib->sendq) && MPID_nem_ib_sendq_ready_to_send_head(vc_ib)) { dprintf("lmt_start_recv,send_progress\n"); fflush(stdout); MPID_nem_ib_send_progress(vc); } fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_IB_LMT_START_RECV); return mpi_errno; fn_fail: goto fn_exit; }