void orte_iof_base_write_handler(int fd, short event, void *cbdata) { orte_iof_sink_t *sink = (orte_iof_sink_t*)cbdata; orte_iof_write_event_t *wev = sink->wev; opal_list_item_t *item; orte_iof_write_output_t *output; int num_written; OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, "%s write:handler writing data to %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), wev->fd)); while (NULL != (item = opal_list_remove_first(&wev->outputs))) { output = (orte_iof_write_output_t*)item; if (0 == output->numbytes) { /* indicates we are to close this stream */ OBJ_RELEASE(sink); return; } num_written = write(wev->fd, output->data, output->numbytes); if (num_written < 0) { if (EAGAIN == errno || EINTR == errno) { /* push this item back on the front of the list */ opal_list_prepend(&wev->outputs, item); /* leave the write event running so it will call us again * when the fd is ready. */ return; } /* otherwise, something bad happened so all we can do is abort * this attempt */ OBJ_RELEASE(output); goto ABORT; } else if (num_written < output->numbytes) { /* incomplete write - adjust data to avoid duplicate output */ memmove(output->data, &output->data[num_written], output->numbytes - num_written); /* push this item back on the front of the list */ opal_list_prepend(&wev->outputs, item); /* leave the write event running so it will call us again * when the fd is ready */ return; } OBJ_RELEASE(output); } ABORT: opal_event_del(wev->ev); wev->pending = false; }
static inline int mca_btl_ugni_post_pending (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_device_t *device) { int pending_post_count = opal_list_get_size (&device->pending_post); mca_btl_ugni_post_descriptor_t *post_desc; int rc; /* check if there are any posts pending resources */ if (OPAL_LIKELY(0 == pending_post_count)) { return 0; } BTL_VERBOSE(("progressing %d pending FMA/RDMA operations", pending_post_count)); for (int i = 0 ; i < pending_post_count ; ++i) { mca_btl_ugni_device_lock (device); post_desc = (mca_btl_ugni_post_descriptor_t *) opal_list_remove_first (&device->pending_post); mca_btl_ugni_device_unlock (device); if (NULL == post_desc) { break; } rc = mca_btl_ugni_repost (ugni_module, post_desc); if (OPAL_SUCCESS != rc) { mca_btl_ugni_device_lock (device); opal_list_prepend (&device->pending_post, (opal_list_item_t *) post_desc); mca_btl_ugni_device_unlock (device); break; } } return 1; }
int mca_mpool_rdma_deregister(struct mca_mpool_base_module_t *mpool, mca_mpool_base_registration_t *reg) { mca_mpool_rdma_module_t *mpool_rdma = (mca_mpool_rdma_module_t*)mpool; int rc = OMPI_SUCCESS; assert(reg->ref_count > 0); OPAL_THREAD_LOCK(&mpool->rcache->lock); reg->ref_count--; if(reg->ref_count > 0) { OPAL_THREAD_UNLOCK(&mpool->rcache->lock); return OMPI_SUCCESS; } if(mca_mpool_rdma_component.leave_pinned && !(reg->flags & (MCA_MPOOL_FLAGS_CACHE_BYPASS|MCA_MPOOL_FLAGS_PERSIST))) { /* if leave_pinned is set don't deregister memory, but put it * on MRU list for future use */ opal_list_prepend(&mpool_rdma->mru_list, (opal_list_item_t*)reg); } else { rc = dereg_mem(mpool, reg); if(OMPI_SUCCESS == rc) { if(!(reg->flags & MCA_MPOOL_FLAGS_CACHE_BYPASS)) mpool->rcache->rcache_delete(mpool->rcache, reg); OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list, (ompi_free_list_item_t*)reg); } } OPAL_THREAD_UNLOCK(&mpool->rcache->lock); return rc; }
int mca_rcache_rgpusm_deregister_no_lock(struct mca_rcache_base_module_t *rcache, mca_rcache_base_registration_t *reg) { mca_rcache_rgpusm_module_t *rcache_rgpusm = (mca_rcache_rgpusm_module_t*)rcache; int rc = OPAL_SUCCESS; assert(reg->ref_count > 0); reg->ref_count--; opal_output(-1, "Deregister: reg->ref_count=%d", (int)reg->ref_count); if(reg->ref_count > 0) { return OPAL_SUCCESS; } if(mca_rcache_rgpusm_component.leave_pinned && registration_is_cachebale(reg)) { /* if leave_pinned is set don't deregister memory, but put it * on LRU list for future use */ opal_list_prepend(&rcache_rgpusm->lru_list, (opal_list_item_t*)reg); } else { /* Remove from rcache first */ if(!(reg->flags & MCA_RCACHE_FLAGS_CACHE_BYPASS)) mca_rcache_base_vma_delete (rcache_rgpusm->vma_module, reg); assert(reg->ref_count == 0); rc = cuda_closememhandle (NULL, reg); if(OPAL_SUCCESS == rc) { opal_free_list_return (&rcache_rgpusm->reg_list, (opal_free_list_item_t*)reg); } } return rc; }
/* * Free a module-specific IO MPI_Request */ OMPI_DECLSPEC void mca_io_base_request_free(ompi_file_t *file, mca_io_base_request_t *req) { /* Put the request back on the per-module freelist, since it's been initialized for that module */ OPAL_THREAD_LOCK(&file->f_io_requests_lock); opal_list_prepend(&file->f_io_requests, (opal_list_item_t*) req); OPAL_THREAD_UNLOCK(&file->f_io_requests_lock); }
int mca_mpool_rgpusm_deregister(struct mca_mpool_base_module_t *mpool, mca_mpool_base_registration_t *reg) { mca_mpool_rgpusm_module_t *mpool_rgpusm = (mca_mpool_rgpusm_module_t*)mpool; int rc = OMPI_SUCCESS; assert(reg->ref_count > 0); OPAL_THREAD_LOCK(&mpool->rcache->lock); reg->ref_count--; opal_output(-1, "Deregister: reg->ref_count=%d", (int)reg->ref_count); if(reg->ref_count > 0) { OPAL_THREAD_UNLOCK(&mpool->rcache->lock); return OMPI_SUCCESS; } if(mca_mpool_rgpusm_component.leave_pinned && registration_is_cachebale(reg)) { /* if leave_pinned is set don't deregister memory, but put it * on LRU list for future use */ opal_list_prepend(&mpool_rgpusm->lru_list, (opal_list_item_t*)reg); } else { /* Remove from rcache first */ if(!(reg->flags & MCA_MPOOL_FLAGS_CACHE_BYPASS)) mpool->rcache->rcache_delete(mpool->rcache, reg); /* Drop the rcache lock before deregistring the memory */ OPAL_THREAD_UNLOCK(&mpool->rcache->lock); { mca_mpool_rgpusm_module_t *mpool_rgpusm = (mca_mpool_rgpusm_module_t *)mpool; assert(reg->ref_count == 0); rc = mpool_rgpusm->resources.deregister_mem(mpool_rgpusm->resources.reg_data, reg); } OPAL_THREAD_LOCK(&mpool->rcache->lock); if(OMPI_SUCCESS == rc) { OMPI_FREE_LIST_RETURN_MT(&mpool_rgpusm->reg_list, (ompi_free_list_item_t*)reg); } } OPAL_THREAD_UNLOCK(&mpool->rcache->lock); /* Cleanup any vmas that we have deferred deletion on */ mpool->rcache->rcache_clean(mpool->rcache); return rc; }
int mca_mpool_rgpusm_deregister(struct mca_mpool_base_module_t *mpool, mca_mpool_base_registration_t *reg) { mca_mpool_rgpusm_module_t *mpool_rgpusm = (mca_mpool_rgpusm_module_t*)mpool; int rc = OPAL_SUCCESS; assert(reg->ref_count > 0); opal_mutex_lock (&mpool->rcache->lock); reg->ref_count--; opal_output(-1, "Deregister: reg->ref_count=%d", (int)reg->ref_count); if(reg->ref_count > 0) { opal_mutex_unlock (&mpool->rcache->lock); return OPAL_SUCCESS; } if(mca_mpool_rgpusm_component.leave_pinned && registration_is_cacheable(reg)) { /* if leave_pinned is set don't deregister memory, but put it * on LRU list for future use */ opal_output_verbose(20, mca_mpool_rgpusm_component.output, "RGPUSM: Deregister: addr=%p, size=%d: cacheable and pinned, leave in cache, PUSH IN LRU", reg->base, (int)(reg->bound - reg->base + 1)); opal_list_prepend(&mpool_rgpusm->lru_list, (opal_list_item_t*)reg); } else { /* Remove from rcache first */ if(!(reg->flags & MCA_MPOOL_FLAGS_CACHE_BYPASS)) mpool->rcache->rcache_delete(mpool->rcache, reg); /* Drop the rcache lock before deregistring the memory */ opal_mutex_unlock (&mpool->rcache->lock); { mca_mpool_rgpusm_module_t *mpool_rgpusm = (mca_mpool_rgpusm_module_t *)mpool; assert(reg->ref_count == 0); rc = mpool_rgpusm->resources.deregister_mem(mpool_rgpusm->resources.reg_data, reg); } opal_mutex_lock (&mpool->rcache->lock); if(OPAL_SUCCESS == rc) { opal_free_list_return (&mpool_rgpusm->reg_list, (opal_free_list_item_t*)reg); } } opal_mutex_unlock (&mpool->rcache->lock); return rc; }
void ompi_mtl_portals4_pending_list_progress() { int ret, val; opal_list_item_t *item; ompi_mtl_portals4_pending_request_t *pending; while ((!ompi_mtl_portals4.flowctl.flowctl_active) && (0 != opal_list_get_size(&ompi_mtl_portals4.flowctl.pending_sends))) { val = OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, -1); if (val < 0) { OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); return; } item = opal_list_remove_first(&ompi_mtl_portals4.flowctl.pending_sends); if (OPAL_UNLIKELY(NULL == item)) { OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); return; } pending = (ompi_mtl_portals4_pending_request_t*) item; if (pending->length <= ompi_mtl_portals4.eager_limit) { ret = ompi_mtl_portals4_short_isend(pending->mode, pending->start, pending->length, pending->contextid, pending->tag, pending->my_rank, pending->ptl_proc, pending->ptl_request); } else { ret = ompi_mtl_portals4_long_isend(pending->start, pending->length, pending->contextid, pending->tag, pending->my_rank, pending->ptl_proc, pending->ptl_request); } if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { opal_list_prepend(&ompi_mtl_portals4.flowctl.pending_sends, &pending->super.super); OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); } } }
int mca_rcache_rgpusm_deregister(struct mca_rcache_base_module_t *rcache, mca_rcache_base_registration_t *reg) { mca_rcache_rgpusm_module_t *rcache_rgpusm = (mca_rcache_rgpusm_module_t*)rcache; int rc = OPAL_SUCCESS; assert(reg->ref_count > 0); OPAL_THREAD_LOCK(&rcache->lock); reg->ref_count--; opal_output(-1, "Deregister: reg->ref_count=%d", (int)reg->ref_count); if(reg->ref_count > 0) { OPAL_THREAD_UNLOCK(&rcache->lock); return OPAL_SUCCESS; } if(mca_rcache_rgpusm_component.leave_pinned && registration_is_cachebale(reg)) { /* if leave_pinned is set don't deregister memory, but put it * on LRU list for future use */ opal_output_verbose(20, mca_rcache_rgpusm_component.output, "RGPUSM: Deregister: addr=%p, size=%d: cacheable and pinned, leave in cache, PUSH IN LRU", reg->base, (int)(reg->bound - reg->base + 1)); opal_list_prepend(&rcache_rgpusm->lru_list, (opal_list_item_t*)reg); } else { /* Remove from rcache first */ if(!(reg->flags & MCA_RCACHE_FLAGS_CACHE_BYPASS)) mca_rcache_base_vma_delete (rcache_rgpusm->vma_module, reg); /* Drop the rcache lock before deregistring the memory */ OPAL_THREAD_UNLOCK(&rcache->lock); { assert(reg->ref_count == 0); rc = cuda_closememhandle (NULL, reg); } OPAL_THREAD_LOCK(&rcache->lock); if(OPAL_SUCCESS == rc) { opal_free_list_return (&rcache_rgpusm->reg_list, (opal_free_list_item_t*)reg); } } OPAL_THREAD_UNLOCK(&rcache->lock); return rc; }
/* * Insert an item at a specific place in a list */ bool opal_list_insert(opal_list_t *list, opal_list_item_t *item, long long idx) { /* Adds item to list at index and retains item. */ int i; volatile opal_list_item_t *ptr, *next; if ( idx >= (long long)list->opal_list_length ) { return false; } if ( 0 == idx ) { opal_list_prepend(list, item); } else { #if OPAL_ENABLE_DEBUG /* Spot check: ensure that this item is previously on no lists */ assert(0 == item->opal_list_item_refcount); #endif /* pointer to element 0 */ ptr = list->opal_list_sentinel.opal_list_next; for ( i = 0; i < idx-1; i++ ) ptr = ptr->opal_list_next; next = ptr->opal_list_next; item->opal_list_next = next; item->opal_list_prev = ptr; next->opal_list_prev = item; ptr->opal_list_next = item; #if OPAL_ENABLE_DEBUG /* Spot check: ensure this item is only on the list that we just insertted it into */ (void)opal_atomic_add( &(item->opal_list_item_refcount), 1 ); assert(1 == item->opal_list_item_refcount); item->opal_list_item_belong_to = list; #endif } list->opal_list_length++; return true; }
int mca_btl_ugni_progress_send_wait_list (mca_btl_base_endpoint_t *endpoint) { mca_btl_ugni_base_frag_t *frag; int rc; while (NULL != (frag = (mca_btl_ugni_base_frag_t *) opal_list_remove_first (&endpoint->frag_wait_list))) { rc = mca_btl_ugni_send_frag (endpoint, frag); if (OPAL_UNLIKELY(OMPI_SUCCESS > rc)) { if (OPAL_LIKELY(OMPI_ERR_OUT_OF_RESOURCE == rc)) { opal_list_prepend (&endpoint->frag_wait_list, (opal_list_item_t *) frag); } else { mca_btl_ugni_frag_complete (frag, rc); } return rc; } } return OMPI_SUCCESS; }
int mca_mpool_rgpusm_deregister_no_lock(struct mca_mpool_base_module_t *mpool, mca_mpool_base_registration_t *reg) { mca_mpool_rgpusm_module_t *mpool_rgpusm = (mca_mpool_rgpusm_module_t*)mpool; int rc = OPAL_SUCCESS; assert(reg->ref_count > 0); reg->ref_count--; opal_output(-1, "Deregister: reg->ref_count=%d", (int)reg->ref_count); if(reg->ref_count > 0) { return OPAL_SUCCESS; } if(mca_mpool_rgpusm_component.leave_pinned && registration_is_cachebale(reg)) { /* if leave_pinned is set don't deregister memory, but put it * on LRU list for future use */ opal_list_prepend(&mpool_rgpusm->lru_list, (opal_list_item_t*)reg); } else { /* Remove from rcache first */ if(!(reg->flags & MCA_MPOOL_FLAGS_CACHE_BYPASS)) mpool->rcache->rcache_delete(mpool->rcache, reg); { mca_mpool_rgpusm_module_t *mpool_rgpusm = (mca_mpool_rgpusm_module_t *)mpool; assert(reg->ref_count == 0); rc = mpool_rgpusm->resources.deregister_mem(mpool_rgpusm->resources.reg_data, reg); } if(OPAL_SUCCESS == rc) { opal_free_list_return (&mpool_rgpusm->reg_list, (opal_free_list_item_t*)reg); } } return rc; }
/******************************************************************************* * Parasite cleanup */ static int mca_pml_v_component_parasite_finalize(void) { mca_base_component_list_item_t *cli = NULL; V_OUTPUT_VERBOSE(500, "parasite_finalize"); /* Make sure we'll get closed again with the true close function */ mca_pml_v_component.pmlm_version.mca_close_component = mca_pml_v_component_parasite_close; cli = OBJ_NEW(mca_base_component_list_item_t); cli->cli_component = (mca_base_component_t *) &mca_pml_v_component; opal_list_prepend(&ompi_pml_base_framework.framework_components, (opal_list_item_t *) cli); /* finalize vprotocol component */ if(mca_vprotocol_base_selected()) mca_vprotocol_component.pmlm_finalize(); if(mca_pml_v.host_pml_component.pmlm_finalize != NULL) return mca_pml_v.host_pml_component.pmlm_finalize(); else return OMPI_SUCCESS; }
static void stdin_write_handler(int fd, short event, void *cbdata) { orte_iof_sink_t *sink = (orte_iof_sink_t*)cbdata; orte_iof_write_event_t *wev = sink->wev; opal_list_item_t *item; orte_iof_write_output_t *output; int num_written; OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, "%s orted:stdin:write:handler writing data to %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), wev->fd)); /* lock us up to protect global operations */ OPAL_THREAD_LOCK(&mca_iof_orted_component.lock); wev->pending = false; while (NULL != (item = opal_list_remove_first(&wev->outputs))) { output = (orte_iof_write_output_t*)item; if (0 == output->numbytes) { /* this indicates we are to close the fd - there is * nothing to write */ OPAL_OUTPUT_VERBOSE((20, orte_iof_base.iof_output, "%s iof:orted closing fd %d on write event due to zero bytes output", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), wev->fd)); OBJ_RELEASE(wev); sink->wev = NULL; goto DEPART; } num_written = write(wev->fd, output->data, output->numbytes); OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, "%s orted:stdin:write:handler wrote %d bytes", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), num_written)); if (num_written < 0) { if (EAGAIN == errno || EINTR == errno) { /* push this item back on the front of the list */ opal_list_prepend(&wev->outputs, item); /* leave the write event running so it will call us again * when the fd is ready. */ wev->pending = true; opal_event_add(wev->ev, 0); goto CHECK; } /* otherwise, something bad happened so all we can do is declare an * error and abort */ OBJ_RELEASE(output); OPAL_OUTPUT_VERBOSE((20, orte_iof_base.iof_output, "%s iof:orted closing fd %d on write event due to negative bytes written", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), wev->fd)); OBJ_RELEASE(wev); sink->wev = NULL; /* tell the HNP to stop sending us stuff */ if (!mca_iof_orted_component.xoff) { mca_iof_orted_component.xoff = true; orte_iof_orted_send_xonxoff(ORTE_IOF_XOFF); } goto DEPART; } else if (num_written < output->numbytes) { OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, "%s orted:stdin:write:handler incomplete write %d - adjusting data", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), num_written)); /* incomplete write - adjust data to avoid duplicate output */ memmove(output->data, &output->data[num_written], output->numbytes - num_written); /* push this item back on the front of the list */ opal_list_prepend(&wev->outputs, item); /* leave the write event running so it will call us again * when the fd is ready. */ wev->pending = true; opal_event_add(wev->ev, 0); goto CHECK; } OBJ_RELEASE(output); } CHECK: if (mca_iof_orted_component.xoff) { /* if we have told the HNP to stop reading stdin, see if * the proc has absorbed enough to justify restart * * RHC: Note that when multiple procs want stdin, we * can get into a fight between a proc turnin stdin * back "on" and other procs turning it "off". There * is no clear way to resolve this as different procs * may take input at different rates. */ if (opal_list_get_size(&wev->outputs) < ORTE_IOF_MAX_INPUT_BUFFERS) { /* restart the read */ mca_iof_orted_component.xoff = false; orte_iof_orted_send_xonxoff(ORTE_IOF_XON); } } DEPART: /* unlock and go */ OPAL_THREAD_UNLOCK(&mca_iof_orted_component.lock); }
/* * Query the registry for all nodes allocated to a specified app_context */ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr_t *total_num_slots, orte_app_context_t *app, orte_mapping_policy_t policy, bool initial_map, bool silent) { opal_list_item_t *item, *next; orte_node_t *node, *nd, *nptr; orte_std_cntr_t num_slots; orte_std_cntr_t i; int rc; orte_job_t *daemons; bool novm; opal_list_t nodes; char *hosts; /** set default answer */ *total_num_slots = 0; /* get the daemon job object */ daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); /* see if we have a vm or not */ novm = orte_get_attribute(&daemons->attributes, ORTE_JOB_NO_VM, NULL, OPAL_BOOL); /* if this is NOT a managed allocation, then we use the nodes * that were specified for this app - there is no need to collect * all available nodes and "filter" them */ if (!orte_managed_allocation) { OBJ_CONSTRUCT(&nodes, opal_list_t); /* if the app provided a dash-host, and we are not treating * them as requested or "soft" locations, then use those nodes */ hosts = NULL; if (!orte_soft_locations && orte_get_attribute(&app->attributes, ORTE_APP_DASH_HOST, (void**)&hosts, OPAL_STRING)) { OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s using dash_host %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hosts)); if (ORTE_SUCCESS != (rc = orte_util_add_dash_host_nodes(&nodes, hosts, false))) { ORTE_ERROR_LOG(rc); free(hosts); return rc; } free(hosts); } else if (orte_get_attribute(&app->attributes, ORTE_APP_HOSTFILE, (void**)&hosts, OPAL_STRING)) { /* otherwise, if the app provided a hostfile, then use that */ OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s using hostfile %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hosts)); if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes(&nodes, hosts))) { free(hosts); ORTE_ERROR_LOG(rc); return rc; } free(hosts); } else if (NULL != orte_rankfile) { /* use the rankfile, if provided */ OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s using rankfile %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), orte_rankfile)); if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes(&nodes, orte_rankfile))) { ORTE_ERROR_LOG(rc); return rc; } if (0 == opal_list_get_size(&nodes)) { OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s nothing found in given rankfile", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); OBJ_DESTRUCT(&nodes); return ORTE_ERR_BAD_PARAM; } } else if (NULL != orte_default_hostfile) { /* fall back to the default hostfile, if provided */ OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s using default hostfile %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), orte_default_hostfile)); if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes(&nodes, orte_default_hostfile))) { ORTE_ERROR_LOG(rc); return rc; } /* this is a special case - we always install a default * hostfile, but it is empty. If the user didn't remove it * or put something into it, then we will have pursued that * option and found nothing. This isn't an error, we just need * to add all the known nodes */ if (0 == opal_list_get_size(&nodes)) { OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s nothing in default hostfile - using known nodes", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); goto addknown; } } else { /* if nothing else was available, then use all known nodes, which * will include ourselves */ OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s using known nodes", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); goto addknown; } /** if we still don't have anything */ if (0 == opal_list_get_size(&nodes)) { if (!silent) { orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-available-resources", true); } OBJ_DESTRUCT(&nodes); return ORTE_ERR_SILENT; } /* find the nodes in our node array and assemble them * in daemon order if the vm was launched */ while (NULL != (item = opal_list_remove_first(&nodes))) { nptr = (orte_node_t*)item; nd = NULL; for (i=0; i < orte_node_pool->size; i++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { continue; } if (0 != strcmp(node->name, nptr->name)) { OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output, "NODE %s DOESNT MATCH NODE %s", node->name, nptr->name)); continue; } /* ignore nodes that are marked as do-not-use for this mapping */ if (ORTE_NODE_STATE_DO_NOT_USE == node->state) { OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output, "NODE %s IS MARKED NO_USE", node->name)); /* reset the state so it can be used another time */ node->state = ORTE_NODE_STATE_UP; continue; } if (ORTE_NODE_STATE_DOWN == node->state) { OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output, "NODE %s IS DOWN", node->name)); continue; } if (ORTE_NODE_STATE_NOT_INCLUDED == node->state) { OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output, "NODE %s IS MARKED NO_INCLUDE", node->name)); /* not to be used */ continue; } /* if this node wasn't included in the vm (e.g., by -host), ignore it, * unless we are mapping prior to launching the vm */ if (NULL == node->daemon && !novm) { OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output, "NODE %s HAS NO DAEMON", node->name)); continue; } /* retain a copy for our use in case the item gets * destructed along the way */ OBJ_RETAIN(node); if (initial_map) { /* if this is the first app_context we * are getting for an initial map of a job, * then mark all nodes as unmapped */ ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED); } if (NULL == nd || NULL == nd->daemon || NULL == node->daemon || nd->daemon->name.vpid < node->daemon->name.vpid) { /* just append to end */ opal_list_append(allocated_nodes, &node->super); nd = node; } else { /* starting from end, put this node in daemon-vpid order */ while (node->daemon->name.vpid < nd->daemon->name.vpid) { if (opal_list_get_begin(allocated_nodes) == opal_list_get_prev(&nd->super)) { /* insert at beginning */ opal_list_prepend(allocated_nodes, &node->super); goto moveon1; } nd = (orte_node_t*)opal_list_get_prev(&nd->super); } item = opal_list_get_next(&nd->super); if (item == opal_list_get_end(allocated_nodes)) { /* we are at the end - just append */ opal_list_append(allocated_nodes, &node->super); } else { nd = (orte_node_t*)item; opal_list_insert_pos(allocated_nodes, item, &node->super); } moveon1: /* reset us back to the end for the next node */ nd = (orte_node_t*)opal_list_get_last(allocated_nodes); } } OBJ_RELEASE(nptr); } OBJ_DESTRUCT(&nodes); /* now prune for usage and compute total slots */ goto complete; } addknown: /* if the hnp was allocated, include it unless flagged not to */ if (orte_hnp_is_allocated && !(ORTE_GET_MAPPING_DIRECTIVE(policy) & ORTE_MAPPING_NO_USE_LOCAL)) { if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0))) { if (ORTE_NODE_STATE_DO_NOT_USE == node->state) { OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output, "HNP IS MARKED NO_USE")); /* clear this for future use, but don't include it */ node->state = ORTE_NODE_STATE_UP; } else if (ORTE_NODE_STATE_NOT_INCLUDED != node->state) { OBJ_RETAIN(node); if (initial_map) { /* if this is the first app_context we * are getting for an initial map of a job, * then mark all nodes as unmapped */ ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED); } opal_list_append(allocated_nodes, &node->super); } } } /* add everything in the node pool that can be used - add them * in daemon order, which may be different than the order in the * node pool. Since an empty list is passed into us, the list at * this point either has the HNP node or nothing, and the HNP * node obviously has a daemon on it (us!) */ if (0 == opal_list_get_size(allocated_nodes)) { /* the list is empty */ nd = NULL; } else { nd = (orte_node_t*)opal_list_get_last(allocated_nodes); } for (i=1; i < orte_node_pool->size; i++) { if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { /* ignore nodes that are marked as do-not-use for this mapping */ if (ORTE_NODE_STATE_DO_NOT_USE == node->state) { OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output, "NODE %s IS MARKED NO_USE", node->name)); /* reset the state so it can be used another time */ node->state = ORTE_NODE_STATE_UP; continue; } if (ORTE_NODE_STATE_DOWN == node->state) { OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output, "NODE %s IS MARKED DOWN", node->name)); continue; } if (ORTE_NODE_STATE_NOT_INCLUDED == node->state) { OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output, "NODE %s IS MARKED NO_INCLUDE", node->name)); /* not to be used */ continue; } /* if this node wasn't included in the vm (e.g., by -host), ignore it, * unless we are mapping prior to launching the vm */ if (NULL == node->daemon && !novm) { OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output, "NODE %s HAS NO DAEMON", node->name)); continue; } /* retain a copy for our use in case the item gets * destructed along the way */ OBJ_RETAIN(node); if (initial_map) { /* if this is the first app_context we * are getting for an initial map of a job, * then mark all nodes as unmapped */ ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED); } if (NULL == nd || NULL == nd->daemon || NULL == node->daemon || nd->daemon->name.vpid < node->daemon->name.vpid) { /* just append to end */ opal_list_append(allocated_nodes, &node->super); nd = node; } else { /* starting from end, put this node in daemon-vpid order */ while (node->daemon->name.vpid < nd->daemon->name.vpid) { if (opal_list_get_begin(allocated_nodes) == opal_list_get_prev(&nd->super)) { /* insert at beginning */ opal_list_prepend(allocated_nodes, &node->super); goto moveon; } nd = (orte_node_t*)opal_list_get_prev(&nd->super); } item = opal_list_get_next(&nd->super); if (item == opal_list_get_end(allocated_nodes)) { /* we are at the end - just append */ opal_list_append(allocated_nodes, &node->super); } else { nd = (orte_node_t*)item; opal_list_insert_pos(allocated_nodes, item, &node->super); } moveon: /* reset us back to the end for the next node */ nd = (orte_node_t*)opal_list_get_last(allocated_nodes); } } } OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s Starting with %d nodes in list", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)opal_list_get_size(allocated_nodes))); /** check that anything is here */ if (0 == opal_list_get_size(allocated_nodes)) { if (!silent) { orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-available-resources", true); } return ORTE_ERR_SILENT; } /* filter the nodes thru any hostfile and dash-host options */ OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s Filtering thru apps", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); if (ORTE_SUCCESS != (rc = orte_rmaps_base_filter_nodes(app, allocated_nodes, true)) && ORTE_ERR_TAKE_NEXT_OPTION != rc) { ORTE_ERROR_LOG(rc); return rc; } OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s Retained %d nodes in list", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)opal_list_get_size(allocated_nodes))); complete: /* remove all nodes that are already at max usage, and * compute the total number of allocated slots while * we do so */ num_slots = 0; item = opal_list_get_first(allocated_nodes); while (item != opal_list_get_end(allocated_nodes)) { /** save the next pointer in case we remove this node */ next = opal_list_get_next(item); /** check to see if this node is fully used - remove if so */ node = (orte_node_t*)item; if (0 != node->slots_max && node->slots_inuse > node->slots_max) { OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s Removing node %s: max %d inuse %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name, node->slots_max, node->slots_inuse)); opal_list_remove_item(allocated_nodes, item); OBJ_RELEASE(item); /* "un-retain" it */ } else if (node->slots <= node->slots_inuse && (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(policy))) { /* remove the node as fully used */ OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s Removing node %s slots %d inuse %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name, node->slots, node->slots_inuse)); opal_list_remove_item(allocated_nodes, item); OBJ_RELEASE(item); /* "un-retain" it */ } else if (node->slots > node->slots_inuse) { /* add the available slots */ OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s node %s has %d slots available", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name, node->slots - node->slots_inuse)); num_slots += node->slots - node->slots_inuse; } else if (!(ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(policy))) { /* nothing needed to do here - we don't add slots to the * count as we don't have any available. Just let the mapper * do what it needs to do to meet the request */ OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s node %s is fully used, but available for oversubscrition", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name)); } else { /* if we cannot use it, remove it from list */ opal_list_remove_item(allocated_nodes, item); OBJ_RELEASE(item); /* "un-retain" it */ } /** go on to next item */ item = next; } /* Sanity check to make sure we have resources available */ if (0 == opal_list_get_size(allocated_nodes)) { if (silent) { /* let the caller know that the resources exist, * but are currently busy */ return ORTE_ERR_RESOURCE_BUSY; } else { orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:all-available-resources-used", true); return ORTE_ERR_SILENT; } } /* pass back the total number of available slots */ *total_num_slots = num_slots; if (4 < opal_output_get_verbosity(orte_rmaps_base_framework.framework_output)) { opal_output(0, "AVAILABLE NODES FOR MAPPING:"); for (item = opal_list_get_first(allocated_nodes); item != opal_list_get_end(allocated_nodes); item = opal_list_get_next(item)) { node = (orte_node_t*)item; opal_output(0, " node: %s daemon: %s", node->name, (NULL == node->daemon) ? "NULL" : ORTE_VPID_PRINT(node->daemon->name.vpid)); } } return ORTE_SUCCESS; }
/* * Lookup a peer by name, create one if it doesn't exist. * @param name Peers globally unique identifier. * @retval Pointer to the newly created struture or NULL on error. */ mca_oob_tcp_peer_t * mca_oob_tcp_peer_lookup(const orte_process_name_t* name) { int rc; mca_oob_tcp_peer_t * peer, *old; if (NULL == name) { /* can't look this one up */ return NULL; } OPAL_THREAD_LOCK(&mca_oob_tcp_component.tcp_lock); peer = (mca_oob_tcp_peer_t*)orte_hash_table_get_proc( &mca_oob_tcp_component.tcp_peers, name); if(NULL != peer && memcmp(&peer->peer_name,name,sizeof(peer->peer_name)) == 0) { OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_lock); return peer; } /* allocate from free list */ MCA_OOB_TCP_PEER_ALLOC(peer, rc); if(NULL == peer) { OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_lock); return NULL; } /* initialize peer state */ peer->peer_name = *name; peer->peer_addr = NULL; peer->peer_sd = -1; peer->peer_state = MCA_OOB_TCP_CLOSED; peer->peer_recv_msg = NULL; peer->peer_send_msg = NULL; peer->peer_retries = 0; /* add to lookup table */ if(ORTE_SUCCESS != orte_hash_table_set_proc(&mca_oob_tcp_component.tcp_peers, &peer->peer_name, peer)) { MCA_OOB_TCP_PEER_RETURN(peer); OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_lock); return NULL; } /* if the peer list is over the maximum size, remove one unsed peer */ opal_list_prepend(&mca_oob_tcp_component.tcp_peer_list, (opal_list_item_t *) peer); if(mca_oob_tcp_component.tcp_peer_limit > 0 && (int)opal_list_get_size(&mca_oob_tcp_component.tcp_peer_list) > mca_oob_tcp_component.tcp_peer_limit) { old = (mca_oob_tcp_peer_t *) opal_list_get_last(&mca_oob_tcp_component.tcp_peer_list); while(1) { if(0 == opal_list_get_size(&(old->peer_send_queue)) && NULL == peer->peer_recv_msg) { opal_list_remove_item(&mca_oob_tcp_component.tcp_peer_list, (opal_list_item_t *) old); MCA_OOB_TCP_PEER_RETURN(old); break; } else { old = (mca_oob_tcp_peer_t *) opal_list_get_prev(old); if(opal_list_get_begin(&mca_oob_tcp_component.tcp_peer_list) == (opal_list_item_t*)old) { /* we tried, but we couldn't find one that was valid to get rid * of. Oh well. */ break; } } } } OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_lock); return peer; }
int main(int argc, char **argv) { /* local variables */ opal_list_t list, x; size_t indx,i,list_size, tmp_size_1, tmp_size_2,size_elements; int error_cnt; test_data_t *elements, *ele; opal_list_item_t *item; opal_init(); test_init("opal_list_t"); /* initialize list */ OBJ_CONSTRUCT(&list, opal_list_t); OBJ_CONSTRUCT(&x, opal_list_t); /* check length of list */ list_size=opal_list_get_size(&list); if( 0 == list_size ) { test_success(); } else { test_failure(" opal_list_get_size"); } /* check for empty */ if (opal_list_is_empty(&list)) { test_success(); } else { test_failure(" opal_list_is_empty(empty list)"); } /* create test elements */ size_elements=4; elements=(test_data_t *)malloc(sizeof(test_data_t)*size_elements); assert(elements); for(i=0 ; i < size_elements ; i++) { OBJ_CONSTRUCT(elements + i, test_data_t); (elements+i)->data=i; } /* populate list */ for(i=0 ; i < size_elements ; i++) { opal_list_append(&list,(opal_list_item_t *)(elements+i)); } list_size=opal_list_get_size(&list); if( list_size == size_elements ) { test_success(); } else { test_failure(" populating list"); } /* checking for empty on non-empty list */ if (!opal_list_is_empty(&list)) { test_success(); } else { test_failure(" opal_list_is_empty(non-empty list)"); } /* check that list is ordered as expected */ i=0; error_cnt=0; for(ele = (test_data_t *) opal_list_get_first(&list); ele != (test_data_t *) opal_list_get_end(&list); ele = (test_data_t *) ((opal_list_item_t *)ele)->opal_list_next) { if( ele->data != i ) error_cnt++; i++; } if( 0 == error_cnt ) { test_success(); } else { test_failure(" error in list order "); } /* check opal_list_get_first */ ele = (test_data_t *)NULL; ele = (test_data_t *) opal_list_get_first(&list); assert(ele); if( 0 == ele->data ) { test_success(); } else { test_failure(" error in opal_list_get_first"); } i=0; for(ele = (test_data_t *) opal_list_get_first(&list); ele != (test_data_t *) opal_list_get_end(&list); ele = (test_data_t *) ((opal_list_item_t *)ele)->opal_list_next) { i++; } if( size_elements == i ) { test_success(); } else { test_failure(" error in opal_list_get_first - list size changed "); } /* check opal_list_get_last */ ele = (test_data_t *)NULL; ele = (test_data_t *) opal_list_get_last(&list); assert(ele); if( (size_elements-1) == ele->data ) { test_success(); } else { test_failure(" error in opal_list_get_last"); } i=0; for(ele = (test_data_t *) opal_list_get_first(&list); ele != (test_data_t *) opal_list_get_end(&list); ele = (test_data_t *) ((opal_list_item_t *)ele)->opal_list_next) { i++; } if( size_elements == i ) { test_success(); } else { test_failure(" error in opal_list_get_first - list size changed "); } /* check opal_list_remove_first */ ele = (test_data_t *)NULL; ele = (test_data_t *) opal_list_remove_first(&list); assert(ele); if( 0 == ele->data ) { test_success(); } else { test_failure(" error in opal_list_remove_first"); } i=0; for(ele = (test_data_t *) opal_list_get_first(&list); ele != (test_data_t *) opal_list_get_end(&list); ele = (test_data_t *) ((opal_list_item_t *)ele)->opal_list_next) { i++; } if( (size_elements-1) == i ) { test_success(); } else { test_failure(" error in opal_list_remove_first - list size changed "); } /* test opal_list_prepend */ opal_list_prepend(&list,(opal_list_item_t *)elements); ele = (test_data_t *)NULL; ele = (test_data_t *) opal_list_get_first(&list); assert(ele); if( 0 == ele->data ) { test_success(); } else { test_failure(" error in opal_list_prepend"); } i=0; for(ele = (test_data_t *) opal_list_get_first(&list); ele != (test_data_t *) opal_list_get_end(&list); ele = (test_data_t *) ((opal_list_item_t *)ele)->opal_list_next) { i++; } if( size_elements == i ) { test_success(); } else { test_failure(" error in opal_list_prepend - list size changed "); } /* check opal_list_remove_last */ ele = (test_data_t *)NULL; ele = (test_data_t *) opal_list_remove_last(&list); assert(ele); if( (size_elements-1) == ele->data ) { test_success(); } else { test_failure(" error in opal_list_remove_last"); } i=0; for(ele = (test_data_t *) opal_list_get_first(&list); ele != (test_data_t *) opal_list_get_end(&list); ele = (test_data_t *) ((opal_list_item_t *)ele)->opal_list_next) { i++; } if( (size_elements-1) == i ) { test_success(); } else { test_failure(" error in opal_list_remove_last - list size changed "); } /* test opal_list_append */ opal_list_append(&list,(opal_list_item_t *)(elements+size_elements-1)); ele = (test_data_t *)NULL; ele = (test_data_t *) opal_list_get_last(&list); assert(ele); if( (size_elements-1) == ele->data ) { test_success(); } else { test_failure(" error in opal_list_append"); } i=0; for(ele = (test_data_t *) opal_list_get_first(&list); ele != (test_data_t *) opal_list_get_end(&list); ele = (test_data_t *) ((opal_list_item_t *)ele)->opal_list_next) { i++; } if( size_elements == i ) { test_success(); } else { test_failure(" error in opal_list_append - list size changed "); } /* remove element from list */ indx=size_elements/2; if( 0 == indx ) indx=1; assert(2 <= size_elements); ele = (test_data_t *)NULL; ele = (test_data_t *) opal_list_remove_item(&list,(opal_list_item_t *)(elements+indx)); assert(ele); if( (indx-1) == ele->data ) { test_success(); } else { test_failure(" error in opal_list_remove - previous"); } ele=(test_data_t *)(((opal_list_item_t *)ele)->opal_list_next); if( (indx+1) == ele->data ) { test_success(); } else { test_failure(" error in opal_list_remove - next"); } i=0; for(ele = (test_data_t *) opal_list_get_first(&list); ele != (test_data_t *) opal_list_get_end(&list); ele = (test_data_t *) ((opal_list_item_t *)ele)->opal_list_next) { i++; } if( (size_elements-1) == i ) { test_success(); } else { test_failure(" error in opal_list_remove - list size changed incorrectly"); } /* test the insert function */ i=opal_list_insert(&list,(opal_list_item_t *)(elements+indx),indx); if( 1 == i ) { test_success(); } else { test_failure(" error in opal_list_remove_item \n"); } i=0; for(ele = (test_data_t *) opal_list_get_first(&list); ele != (test_data_t *) opal_list_get_end(&list); ele = (test_data_t *) ((opal_list_item_t *)ele)->opal_list_next) { i++; } if( size_elements == i ) { test_success(); } else { test_failure(" error in opal_list_insert - incorrect list length"); } i=0; error_cnt=0; for(ele = (test_data_t *) opal_list_get_first(&list); ele != (test_data_t *) opal_list_get_end(&list); ele = (test_data_t *) ((opal_list_item_t *)ele)->opal_list_next) { if( ele->data != i ) error_cnt++; i++; } if( 0 == error_cnt ) { test_success(); } else { test_failure(" error in list order - opal_list_remove_item "); } /* test the splice and join functions */ list_size = opal_list_get_size(&list); for (i = 0, item = opal_list_get_first(&list) ; i < list_size / 2 ; ++i, item = opal_list_get_next(item)) { } opal_list_splice(&x, opal_list_get_end(&x), &list, item, opal_list_get_end(&list)); tmp_size_1 = opal_list_get_size(&list); tmp_size_2 = opal_list_get_size(&x); if (tmp_size_1 != i) { test_failure(" error in splice (size of list)"); } else if (tmp_size_2 != list_size - tmp_size_1) { test_failure(" error in splice (size of x)"); } else { test_success(); } opal_list_join(&list, opal_list_get_end(&list), &x); tmp_size_1 = opal_list_get_size(&list); tmp_size_2 = opal_list_get_size(&x); if (tmp_size_1 != list_size) { test_failure(" error in join (size of list)"); } else if (tmp_size_2 != 0) { test_failure(" error in join (size of x)"); } else { test_success(); } if (NULL != elements) free(elements); opal_finalize(); return test_finalize(); }
static void stdin_write_handler(int fd, short event, void *cbdata) { orte_iof_sink_t *sink = (orte_iof_sink_t*)cbdata; orte_iof_write_event_t *wev = sink->wev; opal_list_item_t *item; orte_iof_write_output_t *output; int num_written; OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, "%s hnp:stdin:write:handler writing data to %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), wev->fd)); /* lock us up to protect global operations */ OPAL_THREAD_LOCK(&mca_iof_hnp_component.lock); wev->pending = false; while (NULL != (item = opal_list_remove_first(&wev->outputs))) { output = (orte_iof_write_output_t*)item; /* if an abnormal termination has occurred, just dump * this data as we are aborting */ if (orte_abnormal_term_ordered) { OBJ_RELEASE(output); continue; } if (0 == output->numbytes) { /* this indicates we are to close the fd - there is * nothing to write */ OPAL_OUTPUT_VERBOSE((20, orte_iof_base.iof_output, "%s iof:hnp closing fd %d on write event due to zero bytes output", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), wev->fd)); OBJ_RELEASE(wev); sink->wev = NULL; /* just leave - we don't want to restart the * read event! */ goto DEPART; } num_written = write(wev->fd, output->data, output->numbytes); OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, "%s hnp:stdin:write:handler wrote %d bytes", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), num_written)); if (num_written < 0) { if (EAGAIN == errno || EINTR == errno) { /* push this item back on the front of the list */ opal_list_prepend(&wev->outputs, item); /* leave the write event running so it will call us again * when the fd is ready. */ wev->pending = true; opal_event_add(&wev->ev, 0); goto CHECK; } /* otherwise, something bad happened so all we can do is declare an * error and abort */ OBJ_RELEASE(output); OPAL_OUTPUT_VERBOSE((20, orte_iof_base.iof_output, "%s iof:hnp closing fd %d on write event due to negative bytes written", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), wev->fd)); OBJ_RELEASE(wev); sink->wev = NULL; goto DEPART; } else if (num_written < output->numbytes) { OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, "%s hnp:stdin:write:handler incomplete write %d - adjusting data", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), num_written)); /* incomplete write - adjust data to avoid duplicate output */ memmove(output->data, &output->data[num_written], output->numbytes - num_written); /* push this item back on the front of the list */ opal_list_prepend(&wev->outputs, item); /* leave the write event running so it will call us again * when the fd is ready. */ wev->pending = true; opal_event_add(&wev->ev, 0); goto CHECK; } OBJ_RELEASE(output); } CHECK: if (NULL != mca_iof_hnp_component.stdinev && !orte_abnormal_term_ordered && !mca_iof_hnp_component.stdinev->active) { OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, "read event is off - checking if okay to restart")); /* if we have turned off the read event, check to * see if the output list has shrunk enough to * turn it back on * * RHC: Note that when multiple procs want stdin, we * can get into a fight between a proc turnin stdin * back "on" and other procs turning it "off". There * is no clear way to resolve this as different procs * may take input at different rates. */ if (opal_list_get_size(&wev->outputs) < ORTE_IOF_MAX_INPUT_BUFFERS) { /* restart the read */ OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, "restarting read event")); mca_iof_hnp_component.stdinev->active = true; opal_event_add(&(mca_iof_hnp_component.stdinev->ev), 0); } } DEPART: /* unlock and go */ OPAL_THREAD_UNLOCK(&mca_iof_hnp_component.lock); }