int orte_iof_base_endpoint_ack( orte_iof_base_endpoint_t* endpoint, uint32_t seq) { bool window_closed, window_open; OPAL_THREAD_LOCK(&orte_iof_base.iof_lock); window_closed = ORTE_IOF_BASE_SEQDIFF(endpoint->ep_seq,endpoint->ep_ack) >= orte_iof_base.iof_window_size; endpoint->ep_ack = seq; window_open = ORTE_IOF_BASE_SEQDIFF(endpoint->ep_seq,endpoint->ep_ack) < orte_iof_base.iof_window_size; /* someone is waiting on all output to be flushed */ if(orte_iof_base.iof_waiting && endpoint->ep_seq == endpoint->ep_ack) { opal_condition_signal(&orte_iof_base.iof_condition); } /* check to see if we need to reenable forwarding */ if(window_closed && window_open) { opal_output(orte_iof_base.iof_output, "iof_base_endpoint ack; re-enabled reading for endpoint"); opal_event_add(&endpoint->ep_event, 0); } OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock); return ORTE_SUCCESS; }
static void exit_callback(int fd, short event, void *arg) { /* Trigger the normal exit conditions */ orted_globals.exit_condition = true; opal_condition_signal(&orted_globals.condition); OPAL_THREAD_UNLOCK(&orted_globals.mutex); }
static void orte_pls_rsh_launch_cb(int fd, short event, void* args) { orte_pls_rsh_stack_t *stack = (orte_pls_rsh_stack_t*)args; OPAL_THREAD_LOCK(&stack->mutex); stack->rc = orte_pls_rsh_launch(stack->jobid); stack->complete = true; opal_condition_signal(&stack->cond); OPAL_THREAD_UNLOCK(&stack->mutex); }
/********************************************************************* * * Local Functions * * None of these functions should lock mutex. All but blk_waitpid_cb * should only be called if the mutex is already locked. * ********************************************************************/ static void blk_waitpid_cb(pid_t wpid, int status, void *data) { blk_waitpid_data_t *wp_data = (blk_waitpid_data_t*) data; wp_data->status = status; wp_data->done = 1; opal_condition_signal(wp_data->cond); wp_data->free = 1; }
int mca_oob_ud_msg_status_update (mca_oob_ud_msg_t *msg, mca_oob_ud_status_t status) { int rc; opal_output_verbose(10, orte_oob_base_framework.framework_output, "%s oob:ud:msg_status_update setting status of msg %p to %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (void *) msg, (int) status); OPAL_THREAD_LOCK(&msg->lock); if (status != msg->status) { if (MCA_OOB_UD_MSG_STATUS_COMPLETE == status) { opal_output_verbose(10, orte_oob_base_framework.framework_output, "%s oob:ud:msg_status_update setting peer %s as available", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&msg->peer->peer_name)); msg->peer->peer_available = true; } switch (status) { case MCA_OOB_UD_MSG_STATUS_TIMEOUT: rc = ORTE_ERR_TIMEOUT; break; case MCA_OOB_UD_MSG_STATUS_COMPLETE: rc = ORTE_SUCCESS; break; case MCA_OOB_UD_MSG_STATUS_ERROR: default: rc = ORTE_ERROR; } if (msg->cbfunc) { msg->cbfunc (msg, rc); } /* signal status change */ msg->status = status; opal_condition_signal (&msg->status_changed); OPAL_THREAD_UNLOCK(&msg->lock); if (false == msg->persist) { mca_oob_ud_msg_return (msg); } return ORTE_SUCCESS; } OPAL_THREAD_UNLOCK(&msg->lock); return ORTE_SUCCESS; }
static int opal_crs_blcr_thread_callback(void *arg) { const struct cr_checkpoint_info *ckpt_info = cr_get_checkpoint_info(); int ret; opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, "crs:blcr: thread_callback()"); OPAL_THREAD_LOCK(&blcr_lock); blcr_current_state = OPAL_CRS_CHECKPOINT; /* * Allow the checkpoint to be taken, if we requested it */ #if CRS_BLCR_HAVE_INFO_REQUESTER == 1 if( ckpt_info->requester != my_pid ) { ret = cr_checkpoint(CR_CHECKPOINT_OMIT); blcr_current_state = OPAL_CRS_RUNNING; opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, "crs:blcr: thread_callback(); WARNING: An external agent attempted to checkpoint this process " "when it did not expect to be checkpointed. Skipping this checkpoint request." " [%d != %d].", ckpt_info->requester, my_pid); return 0; } else #endif { ret = cr_checkpoint(0); } /* * Restarting */ if ( 0 < ret ) { opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, "crs:blcr: thread_callback: Restarting."); blcr_current_state = OPAL_CRS_RESTART; } /* * Continuing */ else { opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, "crs:blcr: thread_callback: Continue."); blcr_current_state = OPAL_CRS_CONTINUE; } OPAL_THREAD_UNLOCK(&blcr_lock); opal_condition_signal(&blcr_cond); return 0; }
/* * Request completed - free buffer and decrement pending count */ int mca_pml_base_bsend_request_free(void* addr) { /* remove from list of pending requests */ OPAL_THREAD_LOCK(&mca_pml_bsend_mutex); /* free buffer */ mca_pml_bsend_allocator->alc_free(mca_pml_bsend_allocator, addr); /* decrement count of buffered requests */ if(--mca_pml_bsend_count == 0) opal_condition_signal(&mca_pml_bsend_condition); OPAL_THREAD_UNLOCK(&mca_pml_bsend_mutex); return OMPI_SUCCESS; }
static void* thr2_run(opal_object_t* obj) { int i; clock_t c1, c2; opal_mutex_lock(&mutex); c1 = clock(); for(i=0; i<TEST_COUNT; i++) { opal_condition_signal(&thr1_cond); opal_condition_wait(&thr2_cond, &mutex); thr2_count++; } c2 = clock(); opal_mutex_unlock(&mutex); fprintf(stderr, "thr2: time per iteration: %ld usec\n", (long)((c2 - c1) / TEST_COUNT)); return NULL; }
static void orte_pls_base_orted_default_wakeup(int fd, short event, void *arg) { /* protect for threads */ OPAL_THREAD_LOCK(&orte_pls_base.orted_cmd_lock); /* cancel the receive - we didn't get everyone's response in time */ orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_PLS_ORTED_ACK); /* set the completion status to reflect timeout error */ completion_status = ORTE_ERR_TIMEOUT; /* declare us "done" so we can exit cleanly */ opal_condition_signal(&orte_pls_base.orted_cmd_cond); /* unlock us */ OPAL_THREAD_UNLOCK(&orte_pls_base.orted_cmd_lock); }
void mca_io_base_request_progress_fini(void) { #if OMPI_ENABLE_PROGRESS_THREADS void *ret; /* make the helper thread die */ thread_done = true; if (thread_running) { opal_condition_signal(&progress_cond); opal_thread_join(&progress_thread, &ret); } /* clean up */ OBJ_DESTRUCT(&progress_thread); OBJ_DESTRUCT(&progress_cond); OBJ_DESTRUCT(&progress_mutex); #endif /* OMPI_ENABLE_PROGRESS_THREADS */ }
int orte_pls_base_orted_cancel_operation(void) { /* protect for threads */ OPAL_THREAD_LOCK(&orte_pls_base.orted_cmd_lock); /* cancel any waiting receive - we don't want to hear it */ orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_PLS_ORTED_ACK); /* set the completion status to reflect cancellation -- no need to print anything */ completion_status = ORTE_ERR_SILENT; /* declare us "done" so we can exit cleanly */ opal_condition_signal(&orte_pls_base.orted_cmd_cond); /* unlock us */ OPAL_THREAD_UNLOCK(&orte_pls_base.orted_cmd_lock); return ORTE_SUCCESS; }
OMPI_DECLSPEC void mca_io_base_request_progress_add(void) { #if OMPI_ENABLE_PROGRESS_THREADS /* if we don't have a progress thread, make us have a progress thread */ if (! thread_running) { OPAL_THREAD_LOCK(&progress_mutex); if (! thread_running) { thread_running = true; opal_thread_start(&progress_thread); } OPAL_THREAD_UNLOCK(&progress_mutex); } #endif /* OMPI_ENABLE_PROGRESS_THREADS */ OPAL_THREAD_ADD32(&mca_io_base_request_num_pending, 1); #if OMPI_ENABLE_PROGRESS_THREADS opal_condition_signal(&progress_cond); #endif /* OMPI_ENABLE_PROGRESS_THREADS */ }
/* * Request completed - free buffer and decrement pending count */ int mca_pml_base_bsend_request_fini(ompi_request_t* request) { mca_pml_base_send_request_t* sendreq = (mca_pml_base_send_request_t*)request; if(sendreq->req_bytes_packed == 0 || sendreq->req_addr == NULL || sendreq->req_addr == sendreq->req_base.req_addr) return OMPI_SUCCESS; /* remove from list of pending requests */ OPAL_THREAD_LOCK(&mca_pml_bsend_mutex); /* free buffer */ mca_pml_bsend_allocator->alc_free(mca_pml_bsend_allocator, sendreq->req_addr); sendreq->req_addr = sendreq->req_base.req_addr; /* decrement count of buffered requests */ if(--mca_pml_bsend_count == 0) opal_condition_signal(&mca_pml_bsend_condition); OPAL_THREAD_UNLOCK(&mca_pml_bsend_mutex); return OMPI_SUCCESS; }
static void orte_pls_base_cmd_ack(int status, orte_process_name_t* sender, orte_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata) { int ret; OPAL_THREAD_LOCK(&orte_pls_base.orted_cmd_lock); orted_cmd_num_active--; if (orted_cmd_num_active == 0) { opal_condition_signal(&orte_pls_base.orted_cmd_cond); } else { ret = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_PLS_ORTED_ACK, ORTE_RML_NON_PERSISTENT, orte_pls_base_cmd_ack, NULL); if (ret != ORTE_SUCCESS) { ORTE_ERROR_LOG(ret); return; } } OPAL_THREAD_UNLOCK(&orte_pls_base.orted_cmd_lock); return; }
/* * Progress a completed recv: * (1) signal a posted recv as complete * (2) queue an unexpected message in the recv list */ static void mca_oob_tcp_msg_data(mca_oob_tcp_msg_t* msg, mca_oob_tcp_peer_t* peer) { /* attempt to match unexpected message to a posted recv */ mca_oob_tcp_msg_t* post; int rc; OPAL_THREAD_LOCK(&mca_oob_tcp_component.tcp_match_lock); /* if I'm not a proc, check if this message came from * another job family - procs dont' need to do this because * they always route through their daemons anyway */ if (!ORTE_PROC_IS_MPI) { if ((ORTE_JOB_FAMILY(msg->msg_hdr.msg_origin.jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) && (0 != ORTE_JOB_FAMILY(msg->msg_hdr.msg_origin.jobid))) { /* this message came from a different job family that is not * a local slave, so we may * not know how to route any reply back to the originator. Update * our route so we can dynamically build the routing table */ if (ORTE_SUCCESS != (rc = orte_routed.update_route(&(msg->msg_hdr.msg_origin), &(msg->msg_hdr.msg_src)))) { /* Nothing we can do about errors here as we definitely want * the receive to complete, but at least bark loudly */ ORTE_ERROR_LOG(rc); } } } /* match msg against posted receives */ post = mca_oob_tcp_msg_match_post(&msg->msg_hdr.msg_origin, msg->msg_hdr.msg_tag); if(NULL != post) { if(NULL == post->msg_uiov || 0 == post->msg_ucnt) { opal_output(0, "msg_data returning bad param"); post->msg_rc = ORTE_ERR_BAD_PARAM; } else { /* copy msg data into posted recv */ if (post->msg_flags & ORTE_RML_ALLOC) msg->msg_flags |= ORTE_RML_ALLOC; post->msg_rc = mca_oob_tcp_msg_copy(msg, post->msg_uiov, post->msg_ucnt); if(post->msg_flags & ORTE_RML_TRUNC) { int i, size = 0; for(i=1; i<msg->msg_rwcnt+1; i++) size += msg->msg_rwiov[i].iov_len; post->msg_rc = size; } } if(post->msg_flags & ORTE_RML_PEEK) { /* will need message for actual receive */ opal_list_append(&mca_oob_tcp_component.tcp_msg_recv, &msg->super.super); } else { MCA_OOB_TCP_MSG_RETURN(msg); } mca_oob_tcp_component.tcp_match_count++; OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_match_lock); if(post->msg_flags & ORTE_RML_PERSISTENT) { post->msg_cbfunc( post->msg_rc, &peer->peer_name, post->msg_uiov, post->msg_ucnt, post->msg_hdr.msg_tag, post->msg_cbdata); } else { mca_oob_tcp_msg_complete(post, &msg->msg_hdr.msg_origin); } OPAL_THREAD_LOCK(&mca_oob_tcp_component.tcp_match_lock); if(--mca_oob_tcp_component.tcp_match_count == 0) opal_condition_signal(&mca_oob_tcp_component.tcp_match_cond); OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_match_lock); } else { opal_list_append(&mca_oob_tcp_component.tcp_msg_recv, (opal_list_item_t*)msg); OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_match_lock); } }
static void orte_daemon_recv(int status, orte_process_name_t* sender, orte_buffer_t *buffer, orte_rml_tag_t tag, void* cbdata) { orte_buffer_t *answer; orte_daemon_cmd_flag_t command; int ret; orte_std_cntr_t n; char *contact_info; OPAL_TRACE(1); OPAL_THREAD_LOCK(&orted_globals.mutex); if (orted_globals.debug_daemons) { opal_output(0, "[%lu,%lu,%lu] orted_recv: received message from [%ld,%ld,%ld]", ORTE_NAME_ARGS(orte_process_info.my_name), ORTE_NAME_ARGS(sender)); } n = 1; if (ORTE_SUCCESS != (ret = orte_dss.unpack(buffer, &command, &n, ORTE_DAEMON_CMD))) { ORTE_ERROR_LOG(ret); OPAL_THREAD_UNLOCK(&orted_globals.mutex); return; } answer = OBJ_NEW(orte_buffer_t); if (NULL == answer) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); goto DONE; } switch(command) { /**** EXIT COMMAND ****/ case ORTE_DAEMON_EXIT_CMD: if (orted_globals.debug_daemons) { opal_output(0, "[%lu,%lu,%lu] orted_recv: received exit", ORTE_NAME_ARGS(orte_process_info.my_name)); } orted_globals.exit_condition = true; opal_condition_signal(&orted_globals.condition); break; /**** HALT VM COMMAND ****/ case ORTE_DAEMON_HALT_VM_CMD: if (orted_globals.debug_daemons) { opal_output(0, "[%lu,%lu,%lu] orted_recv: received halt vm", ORTE_NAME_ARGS(orte_process_info.my_name)); } halt_vm(); break; /**** CONTACT QUERY COMMAND ****/ case ORTE_DAEMON_CONTACT_QUERY_CMD: /* send back contact info */ contact_info = orte_rml.get_uri(); if (NULL == contact_info) { ORTE_ERROR_LOG(ORTE_ERROR); goto CLEANUP; } if (ORTE_SUCCESS != (ret = orte_dss.pack(answer, &contact_info, 1, ORTE_STRING))) { ORTE_ERROR_LOG(ret); goto CLEANUP; } if (0 > orte_rml.send_buffer(sender, answer, tag, 0)) { ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); } break; /**** HOSTFILE COMMAND ****/ case ORTE_DAEMON_HOSTFILE_CMD: ORTE_ERROR_LOG(ORTE_ERR_NOT_IMPLEMENTED); break; /**** SCRIPTFILE COMMAND ****/ case ORTE_DAEMON_SCRIPTFILE_CMD: ORTE_ERROR_LOG(ORTE_ERR_NOT_IMPLEMENTED); break; /**** HEARTBEAT COMMAND ****/ case ORTE_DAEMON_HEARTBEAT_CMD: ORTE_ERROR_LOG(ORTE_ERR_NOT_IMPLEMENTED); break; default: ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); } CLEANUP: OBJ_RELEASE(answer); DONE: OPAL_THREAD_UNLOCK(&orted_globals.mutex); /* reissue the non-blocking receive */ ret = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DAEMON, ORTE_RML_NON_PERSISTENT, orte_daemon_recv, NULL); if (ret != ORTE_SUCCESS && ret != ORTE_ERR_NOT_IMPLEMENTED) { ORTE_ERROR_LOG(ret); } return; }
static void orte_pls_rsh_wait_daemon(pid_t pid, int status, void* cbdata) { orte_pls_daemon_info_t *info = (orte_pls_daemon_info_t*) cbdata; orte_mapped_node_t *node; orte_mapped_proc_t *proc; opal_list_item_t *item; int rc; unsigned long deltat; struct timeval launchstop; /* if ssh exited abnormally, set the child processes to aborted and print something useful to the user. The usual reasons for ssh to exit abnormally all are a pretty good indication that the child processes aren't going to start up properly. This should somehow be pushed up to the calling level, but we don't really have a way to do that just yet. */ if (! WIFEXITED(status) || ! WEXITSTATUS(status) == 0) { /* get the mapping for our node so we can cancel the right things */ rc = orte_rmaps.get_node_map(&node, info->cell, info->nodename, info->active_job); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; } /* set state of all processes associated with the daemon as terminated */ for(item = opal_list_get_first(&node->procs); item != opal_list_get_end(&node->procs); item = opal_list_get_next(item)) { proc = (orte_mapped_proc_t*) item; /* Clean up the session directory as if we were the process itself. This covers the case where the process died abnormally and didn't cleanup its own session directory. */ orte_session_dir_finalize(&(proc->name)); rc = orte_smr.set_proc_state(&(proc->name), ORTE_PROC_STATE_ABORTED, status); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); } } OBJ_RELEASE(node); cleanup: /* tell the user something went wrong */ opal_output(0, "ERROR: A daemon on node %s failed to start as expected.", info->nodename); opal_output(0, "ERROR: There may be more information available from"); opal_output(0, "ERROR: the remote shell (see above)."); if (WIFEXITED(status)) { opal_output(0, "ERROR: The daemon exited unexpectedly with status %d.", WEXITSTATUS(status)); } else if (WIFSIGNALED(status)) { #ifdef WCOREDUMP if (WCOREDUMP(status)) { opal_output(0, "The daemon received a signal %d (with core).", WTERMSIG(status)); } else { opal_output(0, "The daemon received a signal %d.", WTERMSIG(status)); } #else opal_output(0, "The daemon received a signal %d.", WTERMSIG(status)); #endif /* WCOREDUMP */ } else { opal_output(0, "No extra status information is available: %d.", status); } OPAL_THREAD_LOCK(&mca_pls_rsh_component.lock); /* tell the system that this daemon is gone */ if (ORTE_SUCCESS != (rc = orte_pls_base_remove_daemon(info))) { ORTE_ERROR_LOG(rc); } /* remove the daemon from our local list */ opal_list_remove_item(&active_daemons, &info->super); OBJ_RELEASE(info); OPAL_THREAD_UNLOCK(&mca_pls_rsh_component.lock); } /* if abnormal exit */ /* release any waiting threads */ OPAL_THREAD_LOCK(&mca_pls_rsh_component.lock); /* first check timing request */ if (mca_pls_rsh_component.timing) { if (0 != gettimeofday(&launchstop, NULL)) { opal_output(0, "pls_rsh: could not obtain stop time"); } else { deltat = (launchstop.tv_sec - launchstart[info->name->vpid].tv_sec)*1000000 + (launchstop.tv_usec - launchstart[info->name->vpid].tv_usec); avgtime = avgtime + deltat; if (deltat < mintime) { mintime = deltat; miniter = (unsigned long)info->name->vpid; } if (deltat > maxtime) { maxtime = deltat; maxiter = (unsigned long)info->name->vpid; } } } if (mca_pls_rsh_component.num_children-- >= mca_pls_rsh_component.num_concurrent || mca_pls_rsh_component.num_children == 0) { opal_condition_signal(&mca_pls_rsh_component.cond); } if (mca_pls_rsh_component.timing && mca_pls_rsh_component.num_children == 0) { if (0 != gettimeofday(&joblaunchstop, NULL)) { opal_output(0, "pls_rsh: could not obtain job launch stop time"); } else { deltat = (joblaunchstop.tv_sec - joblaunchstart.tv_sec)*1000000 + (joblaunchstop.tv_usec - joblaunchstart.tv_usec); opal_output(0, "pls_rsh: total time to launch job is %lu usec", deltat); if (mintime < 999999999) { /* had at least one non-local node */ avgtime = avgtime/opal_list_get_size(&active_daemons); opal_output(0, "pls_rsh: average time to launch one daemon %f usec", avgtime); opal_output(0, "pls_rsh: min time to launch a daemon was %lu usec for iter %lu", mintime, miniter); opal_output(0, "pls_rsh: max time to launch a daemon was %lu usec for iter %lu", maxtime, maxiter); } else { opal_output(0, "No nonlocal launches to report for timing info"); } } free(launchstart); } OPAL_THREAD_UNLOCK(&mca_pls_rsh_component.lock); }
/** * Setup io for the current node, then tell orterun we are ready for the actual * processes. * @retval ORTE_SUCCESS * @retval error */ int orte_odls_bproc_launch_local_procs(orte_gpr_notify_data_t *data, char **base_environ) { odls_bproc_child_t *child; opal_list_item_t* item; orte_gpr_value_t *value, **values; orte_gpr_keyval_t *kval; char *node_name; int rc; orte_std_cntr_t i, j, kv, kv2, *sptr; int src = 0; orte_buffer_t *ack; bool connect_stdin; orte_jobid_t jobid; int cycle = 0; /* first, retrieve the job number we are to launch from the * returned data - we can extract the jobid directly from the * subscription name we created */ if (ORTE_SUCCESS != (rc = orte_schema.extract_jobid_from_std_trigger_name(&jobid, data->target))) { ORTE_ERROR_LOG(rc); return rc; } /** * hack for bproc4, change process group so that we do not receive signals * from the parent/front-end process, as bproc4 does not currently allow the * process to intercept the signal */ setpgid(0,0); /* loop through the returned data to find the global info and * the info for processes going onto this node */ values = (orte_gpr_value_t**)(data->values)->addr; for (j=0, i=0; i < data->cnt && j < (data->values)->size; j++) { /* loop through all returned values */ if (NULL != values[j]) { i++; value = values[j]; /* this must have come from one of the process containers, so it must * contain data for a proc structure - see if it belongs to this node */ for (kv=0; kv < value->cnt; kv++) { kval = value->keyvals[kv]; if (strcmp(kval->key, ORTE_NODE_NAME_KEY) == 0) { /* Most C-compilers will bark if we try to directly compare the string in the * kval data area against a regular string, so we need to "get" the data * so we can access it */ if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&node_name, kval->value, ORTE_STRING))) { ORTE_ERROR_LOG(rc); return rc; } /* if this is our node...must also protect against a zero-length string */ if (NULL != node_name && 0 == strcmp(node_name, orte_system_info.nodename)) { /* ...harvest the info into a new child structure */ child = OBJ_NEW(odls_bproc_child_t); for (kv2 = 0; kv2 < value->cnt; kv2++) { kval = value->keyvals[kv2]; if(strcmp(kval->key, ORTE_PROC_NAME_KEY) == 0) { /* copy the name into the child object */ if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(child->name), kval->value->data, ORTE_NAME))) { ORTE_ERROR_LOG(rc); return rc; } continue; } if(strcmp(kval->key, ORTE_PROC_APP_CONTEXT_KEY) == 0) { if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, kval->value, ORTE_STD_CNTR))) { ORTE_ERROR_LOG(rc); return rc; } child->app_idx = *sptr; /* save the index into the app_context objects */ continue; } } /* kv2 */ /* protect operation on the global list of children */ OPAL_THREAD_LOCK(&mca_odls_bproc_component.mutex); opal_list_append(&mca_odls_bproc_component.children, &child->super); opal_condition_signal(&mca_odls_bproc_component.cond); OPAL_THREAD_UNLOCK(&mca_odls_bproc_component.mutex); } } } /* for kv */ } /* for j */ } /* set up the io files for our children */ for(item = opal_list_get_first(&mca_odls_bproc_component.children); item != opal_list_get_end(&mca_odls_bproc_component.children); item = opal_list_get_next(item)) { child = (odls_bproc_child_t *) item; if(0 < mca_odls_bproc_component.debug) { opal_output(0, "orte_odls_bproc_launch: setting up io for " "[%lu,%lu,%lu] proc rank %lu\n", ORTE_NAME_ARGS((child->name)), child->name->vpid); } /* only setup to forward stdin if it is rank 0, otherwise connect * to /dev/null */ if(0 == child->name->vpid) { connect_stdin = true; } else { connect_stdin = false; } rc = odls_bproc_setup_stdio(child->name, cycle, jobid, child->app_idx, connect_stdin); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; } cycle++; } /* message to indicate that we are ready */ ack = OBJ_NEW(orte_buffer_t); rc = orte_dss.pack(ack, &src, 1, ORTE_INT); if(ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); } rc = mca_oob_send_packed_nb(ORTE_PROC_MY_HNP, ack, ORTE_RML_TAG_BPROC, 0, odls_bproc_send_cb, NULL); if (0 > rc) { ORTE_ERROR_LOG(rc); goto cleanup; } rc = ORTE_SUCCESS; cleanup: return rc; }
static void orte_daemon_recv_pls(int status, orte_process_name_t* sender, orte_buffer_t *buffer, orte_rml_tag_t tag, void* cbdata) { orte_daemon_cmd_flag_t command; orte_buffer_t answer; int ret; orte_std_cntr_t n; int32_t signal; orte_gpr_notify_data_t *ndat; orte_jobid_t job; OPAL_TRACE(1); OPAL_THREAD_LOCK(&orted_globals.mutex); if (orted_globals.debug_daemons) { opal_output(0, "[%lu,%lu,%lu] orted_recv_pls: received message from [%ld,%ld,%ld]", ORTE_NAME_ARGS(orte_process_info.my_name), ORTE_NAME_ARGS(sender)); } /* unpack the command */ n = 1; if (ORTE_SUCCESS != (ret = orte_dss.unpack(buffer, &command, &n, ORTE_DAEMON_CMD))) { ORTE_ERROR_LOG(ret); goto CLEANUP; } switch(command) { /**** KILL_LOCAL_PROCS ****/ case ORTE_DAEMON_KILL_LOCAL_PROCS: if (orted_globals.debug_daemons) { opal_output(0, "[%lu,%lu,%lu] orted_recv_pls: received kill_local_procs", ORTE_NAME_ARGS(orte_process_info.my_name)); } /* unpack the jobid - could be JOBID_WILDCARD, which would indicatge * we should kill all local procs. Otherwise, only kill those within * the specified jobid */ n = 1; if (ORTE_SUCCESS != (ret = orte_dss.unpack(buffer, &job, &n, ORTE_JOBID))) { ORTE_ERROR_LOG(ret); goto CLEANUP; } if (ORTE_SUCCESS != (ret = orte_odls.kill_local_procs(job, true))) { ORTE_ERROR_LOG(ret); } break; /**** SIGNAL_LOCAL_PROCS ****/ case ORTE_DAEMON_SIGNAL_LOCAL_PROCS: if (orted_globals.debug_daemons) { opal_output(0, "[%lu,%lu,%lu] orted_recv_pls: received signal_local_procs", ORTE_NAME_ARGS(orte_process_info.my_name)); } /* get the signal */ n = 1; if (ORTE_SUCCESS != (ret = orte_dss.unpack(buffer, &signal, &n, ORTE_INT32))) { ORTE_ERROR_LOG(ret); goto CLEANUP; } /* see if they specified a process to signal, or if we * should just signal them all * * NOTE: FOR NOW, WE JUST SIGNAL ALL CHILDREN */ if (ORTE_SUCCESS != (ret = orte_odls.signal_local_procs(NULL, signal))) { ORTE_ERROR_LOG(ret); } break; /**** ADD_LOCAL_PROCS ****/ case ORTE_DAEMON_ADD_LOCAL_PROCS: if (orted_globals.debug_daemons) { opal_output(0, "[%lu,%lu,%lu] orted_recv_pls: received add_local_procs", ORTE_NAME_ARGS(orte_process_info.my_name)); } /* unpack the notify data object */ n = 1; if (ORTE_SUCCESS != (ret = orte_dss.unpack(buffer, &ndat, &n, ORTE_GPR_NOTIFY_DATA))) { ORTE_ERROR_LOG(ret); goto CLEANUP; } /* launch the processes */ if (ORTE_SUCCESS != (ret = orte_odls.launch_local_procs(ndat, orted_globals.saved_environ))) { ORTE_ERROR_LOG(ret); } /* cleanup the memory */ OBJ_RELEASE(ndat); break; /**** EXIT COMMAND ****/ case ORTE_DAEMON_EXIT_CMD: if (orted_globals.debug_daemons) { opal_output(0, "[%lu,%lu,%lu] orted_recv_pls: received exit", ORTE_NAME_ARGS(orte_process_info.my_name)); } /* no response to send here - we'll send it when nearly exit'd */ orted_globals.exit_condition = true; opal_condition_signal(&orted_globals.condition); OPAL_THREAD_UNLOCK(&orted_globals.mutex); return; break; default: ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); break; } CLEANUP: /* send an ack that command is done */ OBJ_CONSTRUCT(&answer, orte_buffer_t); if (0 > orte_rml.send_buffer(sender, &answer, ORTE_RML_TAG_PLS_ORTED_ACK, 0)) { ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); } OBJ_DESTRUCT(&answer); OPAL_THREAD_UNLOCK(&orted_globals.mutex); /* reissue the non-blocking receive */ ret = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_PLS_ORTED, ORTE_RML_NON_PERSISTENT, orte_daemon_recv_pls, NULL); if (ret != ORTE_SUCCESS && ret != ORTE_ERR_NOT_IMPLEMENTED) { ORTE_ERROR_LOG(ret); } return; }
static void signal_callback(int fd, short flags, void *arg) { OPAL_TRACE(1); orted_globals.exit_condition = true; opal_condition_signal(&orted_globals.condition); }
static int construct_child_list(opal_buffer_t *data, orte_jobid_t *job) { int rc; orte_vpid_t j, host_daemon; orte_odls_child_t *child; orte_std_cntr_t cnt; orte_process_name_t proc; orte_odls_job_t *jobdat=NULL; opal_list_item_t *item; orte_app_idx_t *app_idx=NULL; orte_proc_state_t *states=NULL; orte_vpid_t *locations=NULL; int32_t *restarts=NULL; char **slot_str=NULL; bool add_child; orte_job_t *jptr, *daemons; orte_proc_t *pptr, *dptr; orte_node_t *nptr; int32_t ljob; OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, "%s odls:constructing child list", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* unpack the jobid we are to launch */ cnt=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, job, &cnt, ORTE_JOBID))) { /* if the buffer was empty, then we know that all we are doing is * launching debugger daemons */ if (ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER == rc) { goto done; } *job = ORTE_JOBID_INVALID; ORTE_ERROR_LOG(rc); goto REPORT_ERROR; } OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, "%s odls:construct_child_list unpacking data to launch job %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(*job))); /* even though we are unpacking an add_local_procs cmd, we cannot assume * that no job record for this jobid exists. A race condition exists that * could allow another daemon's procs to call us with a collective prior * to our unpacking add_local_procs. So lookup the job record for this jobid * and see if it already exists */ for (item = opal_list_get_first(&orte_local_jobdata); item != opal_list_get_end(&orte_local_jobdata); item = opal_list_get_next(item)) { orte_odls_job_t *jdat = (orte_odls_job_t*)item; /* is this the specified job? */ if (jdat->jobid == *job) { OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, "%s odls:construct_child_list found existing jobdat for job %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(*job))); jobdat = jdat; break; } } if (NULL == jobdat) { /* setup jobdat object for this job */ OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, "%s odls:construct_child_list adding new jobdat for job %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(*job))); jobdat = OBJ_NEW(orte_odls_job_t); jobdat->jobid = *job; opal_list_append(&orte_local_jobdata, &jobdat->super); } /* UNPACK JOB-SPECIFIC DATA */ /* unpack the job instance */ cnt=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jobdat->instance, &cnt, OPAL_STRING))) { *job = ORTE_JOBID_INVALID; ORTE_ERROR_LOG(rc); goto REPORT_ERROR; } /* unpack the job name */ cnt=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jobdat->name, &cnt, OPAL_STRING))) { *job = ORTE_JOBID_INVALID; ORTE_ERROR_LOG(rc); goto REPORT_ERROR; } /* unpack the job state so we can know if this is a restart vs initial launch */ cnt=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jobdat->state, &cnt, ORTE_JOB_STATE))) { *job = ORTE_JOBID_INVALID; ORTE_ERROR_LOG(rc); goto REPORT_ERROR; } /* unpack the number of nodes involved in this job */ cnt=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jobdat->num_nodes, &cnt, ORTE_STD_CNTR))) { ORTE_ERROR_LOG(rc); goto REPORT_ERROR; } /* unpack the number of procs in this launch */ cnt=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jobdat->num_procs, &cnt, ORTE_VPID))) { ORTE_ERROR_LOG(rc); goto REPORT_ERROR; } /* unpack the total slots allocated to us */ cnt=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jobdat->total_slots_alloc, &cnt, ORTE_STD_CNTR))) { ORTE_ERROR_LOG(rc); goto REPORT_ERROR; } /* unpack the mapping policy for the job */ cnt=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jobdat->policy, &cnt, ORTE_MAPPING_POLICY))) { ORTE_ERROR_LOG(rc); goto REPORT_ERROR; } /* unpack the cpus/rank for the job */ cnt=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jobdat->cpus_per_rank, &cnt, OPAL_INT16))) { ORTE_ERROR_LOG(rc); goto REPORT_ERROR; } /* unpack the stride for the job */ cnt=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jobdat->stride, &cnt, OPAL_INT16))) { ORTE_ERROR_LOG(rc); goto REPORT_ERROR; } /* unpack the control flags for the job */ cnt=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jobdat->controls, &cnt, ORTE_JOB_CONTROL))) { ORTE_ERROR_LOG(rc); goto REPORT_ERROR; } /* unpack the stdin target for the job */ cnt=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jobdat->stdin_target, &cnt, ORTE_VPID))) { ORTE_ERROR_LOG(rc); goto REPORT_ERROR; } /* unpack whether or not process recovery is allowed for this job */ cnt=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jobdat->enable_recovery, &cnt, OPAL_BOOL))) { ORTE_ERROR_LOG(rc); goto REPORT_ERROR; } /* unpack the number of app_contexts for this job */ cnt=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jobdat->num_apps, &cnt, ORTE_APP_IDX))) { ORTE_ERROR_LOG(rc); goto REPORT_ERROR; } OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, "%s odls:construct_child_list unpacking %ld app_contexts", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (long)jobdat->num_apps)); /* allocate space and unpack the app_contexts for this job - the HNP checked * that there must be at least one, so don't bother checking here again */ if (NULL != jobdat->apps) { free(jobdat->apps); } jobdat->apps = (orte_app_context_t**)malloc(jobdat->num_apps * sizeof(orte_app_context_t*)); if (NULL == jobdat->apps) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); goto REPORT_ERROR; } cnt = jobdat->num_apps; if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, jobdat->apps, &cnt, ORTE_APP_CONTEXT))) { ORTE_ERROR_LOG(rc); goto REPORT_ERROR; } /* allocate memory for app_idx */ app_idx = (orte_app_idx_t*)malloc(jobdat->num_procs * sizeof(orte_app_idx_t)); /* unpack app_idx in one shot */ cnt=jobdat->num_procs; if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, app_idx, &cnt, ORTE_APP_IDX))) { ORTE_ERROR_LOG(rc); goto REPORT_ERROR; } /* allocate memory for states */ states = (orte_proc_state_t*)malloc(jobdat->num_procs * sizeof(orte_proc_state_t)); /* unpack states in one shot */ cnt=jobdat->num_procs; if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, states, &cnt, ORTE_PROC_STATE))) { ORTE_ERROR_LOG(rc); goto REPORT_ERROR; } /* allocate memory for locations */ locations = (orte_vpid_t*)malloc(jobdat->num_procs * sizeof(orte_vpid_t)); /* unpack locations in one shot */ cnt=jobdat->num_procs; if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, locations, &cnt, ORTE_VPID))) { ORTE_ERROR_LOG(rc); goto REPORT_ERROR; } /* allocate memory for restarts */ restarts = (int32_t*)malloc(jobdat->num_procs * sizeof(int32_t)); /* unpack restarts in one shot */ cnt=jobdat->num_procs; if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, restarts, &cnt, OPAL_INT32))) { ORTE_ERROR_LOG(rc); goto REPORT_ERROR; } /* cycle thru the procs and build/update the global arrays */ daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); if (NULL == (jptr = orte_get_job_data_object(jobdat->jobid))) { jptr = OBJ_NEW(orte_job_t); jptr->jobid = jobdat->jobid; /* store it on the global job data pool */ ljob = ORTE_LOCAL_JOBID(jptr->jobid); opal_pointer_array_set_item(orte_job_data, ljob, jptr); } jptr->enable_recovery = jobdat->enable_recovery; for (j=0; j < jobdat->num_procs; j++) { if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(jptr->procs, j))) { pptr = OBJ_NEW(orte_proc_t); pptr->name.jobid = jobdat->jobid; pptr->name.vpid = j; opal_pointer_array_set_item(jptr->procs, j, pptr); } pptr->local_rank = 0; pptr->node_rank = 0; pptr->state = states[j]; pptr->app_idx = app_idx[j]; pptr->restarts = restarts[j]; if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, locations[j]))) { nptr = OBJ_NEW(orte_node_t); nptr->index = locations[j]; opal_pointer_array_set_item(orte_node_pool, locations[j], nptr); } OBJ_RETAIN(nptr); /* maintain accounting */ pptr->node = nptr; if (NULL == (dptr = (orte_proc_t*)opal_pointer_array_get_item(daemons->procs, locations[j]))) { /* got BIG problem */ opal_output(0, "%s CANNOT FIND REFERENCED DAEMON %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_VPID_PRINT(locations[j])); rc = ORTE_ERR_NOT_FOUND; goto REPORT_ERROR; } OBJ_RETAIN(dptr); nptr->daemon = dptr; } /* cycle through the procs and find mine */ proc.jobid = jobdat->jobid; for (j=0; j < jobdat->num_procs; j++) { proc.vpid = j; if (ORTE_PROC_STATE_INIT != states[j]) { OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, "%s odls:constructing child list - proc %s not at INIT", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&proc))); continue; } host_daemon = locations[j]; #if 0 /* get the vpid of the daemon that is to host this proc */ if (ORTE_VPID_INVALID == (host_daemon = orte_ess.proc_get_daemon(&proc))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); rc = ORTE_ERR_NOT_FOUND; goto REPORT_ERROR; } #endif OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, "%s odls:constructing child list - checking proc %s on daemon %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&proc), ORTE_VPID_PRINT(host_daemon))); /* does this proc belong to us? */ if (ORTE_PROC_MY_NAME->vpid == host_daemon) { OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, "%s odls:constructing child list - found proc %s for me!", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&proc))); add_child = true; /* if this job is restarting procs, then we need to treat things * a little differently. We may be adding a proc to our local * children (if the proc moved here from somewhere else), or we * may simply be restarting someone already here. */ if (ORTE_JOB_STATE_RESTART == jobdat->state) { /* look for this job on our current list of children */ for (item = opal_list_get_first(&orte_local_children); item != opal_list_get_end(&orte_local_children); item = opal_list_get_next(item)) { child = (orte_odls_child_t*)item; if (child->name->jobid == proc.jobid && child->name->vpid == proc.vpid) { /* do not duplicate this child on the list! */ OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, "proc %s is on list and is %s", ORTE_NAME_PRINT(&proc), (child->alive) ? "ALIVE" : "DEAD")); add_child = false; child->do_not_barrier = true; child->restarts = restarts[j]; /* mark that this app_context is being used on this node */ jobdat->apps[app_idx[j]]->used_on_node = true; break; } } } /* if we need to add the child, do so */ if (add_child) { OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, "adding proc %s to my local list", ORTE_NAME_PRINT(&proc))); /* keep tabs of the number of local procs */ jobdat->num_local_procs++; /* add this proc to our child list */ child = OBJ_NEW(orte_odls_child_t); /* copy the name to preserve it */ if (ORTE_SUCCESS != (rc = opal_dss.copy((void**)&child->name, &proc, ORTE_NAME))) { ORTE_ERROR_LOG(rc); goto REPORT_ERROR; } child->app_idx = app_idx[j]; /* save the index into the app_context objects */ /* if the job is in restart mode, the child must not barrier when launched */ if (ORTE_JOB_STATE_RESTART == jobdat->state) { child->do_not_barrier = true; } child->restarts = restarts[j]; if (NULL != slot_str && NULL != slot_str[j]) { child->slot_list = strdup(slot_str[j]); } /* mark that this app_context is being used on this node */ jobdat->apps[app_idx[j]]->used_on_node = true; /* protect operation on the global list of children */ OPAL_THREAD_LOCK(&orte_odls_globals.mutex); opal_list_append(&orte_local_children, &child->super); opal_condition_signal(&orte_odls_globals.cond); OPAL_THREAD_UNLOCK(&orte_odls_globals.mutex); } } } /* flag that the launch msg has been processed so daemon collectives can proceed */ OPAL_THREAD_LOCK(&jobdat->lock); jobdat->launch_msg_processed = true; opal_condition_broadcast(&jobdat->cond); OPAL_THREAD_UNLOCK(&jobdat->lock); done: if (NULL != app_idx) { free(app_idx); app_idx = NULL; } if (NULL != states) { free(states); states = NULL; } if (NULL != slot_str) { for (j=0; j < jobdat->num_procs; j++) { free(slot_str[j]); } free(slot_str); slot_str = NULL; } return ORTE_SUCCESS; REPORT_ERROR: /* we have to report an error back to the HNP so we don't just * hang. Although there shouldn't be any errors once this is * all debugged, it is still good practice to have a way * for it to happen - especially so developers don't have to * deal with the hang! */ orte_errmgr.update_state(*job, ORTE_JOB_STATE_NEVER_LAUNCHED, NULL, ORTE_PROC_STATE_UNDEF, 0, rc); if (NULL != app_idx) { free(app_idx); app_idx = NULL; } if (NULL != states) { free(states); states = NULL; } if (NULL != slot_str && NULL != jobdat) { for (j=0; j < jobdat->num_procs; j++) { if (NULL != slot_str[j]) { free(slot_str[j]); } } free(slot_str); slot_str = NULL; } return rc; }
static void orte_iof_base_endpoint_write_handler(int sd, short flags, void *user) { int errno_save; orte_iof_base_endpoint_t* endpoint = (orte_iof_base_endpoint_t*)user; /* * step through the list of queued fragments and attempt to write * until the output descriptor would block */ OPAL_THREAD_LOCK(&orte_iof_base.iof_lock); while(opal_list_get_size(&endpoint->ep_sink_frags)) { orte_iof_base_frag_t* frag = (orte_iof_base_frag_t*)opal_list_get_first(&endpoint->ep_sink_frags); int rc; /* close connection on zero byte message */ if(frag->frag_len == 0) { orte_iof_base_endpoint_closed(endpoint); OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock); return; } /* progress pending messages */ rc = write(endpoint->ep_fd, frag->frag_ptr, frag->frag_len); errno_save = errno; if (rc < 0) { if (EAGAIN == errno_save) { break; } if (EINTR == errno_save) { continue; } /* All other errors -- to include sigpipe -- mean that Something Bad happened and we should abort in despair. */ orte_iof_base_endpoint_closed(endpoint); /* Send a ACK-AND-CLOSE back to the service so that it knows not to wait for any further ACKs */ orte_iof_base_frag_ack(frag, true); OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock); return; } frag->frag_len -= rc; frag->frag_ptr += rc; if(frag->frag_len > 0) { break; } opal_list_remove_item(&endpoint->ep_sink_frags, &frag->super.super); OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock); orte_iof_base_frag_ack(frag, false); OPAL_THREAD_LOCK(&orte_iof_base.iof_lock); } /* is there anything left to write? */ if(opal_list_get_size(&endpoint->ep_sink_frags) == 0) { opal_event_del(&endpoint->ep_event); if(orte_iof_base.iof_waiting) { opal_condition_signal(&orte_iof_base.iof_condition); } } OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock); }