static int finalize(void) { opal_list_item_t* item; orte_iof_write_output_t *output; orte_iof_write_event_t *wev; int num_written; bool dump; int i; orte_job_t *jdata; /* check if anything is still trying to be written out */ wev = orte_iof_base.iof_write_stdout->wev; if (!opal_list_is_empty(&wev->outputs)) { dump = false; /* make one last attempt to write this out */ while (NULL != (item = opal_list_remove_first(&wev->outputs))) { output = (orte_iof_write_output_t*)item; if (!dump) { num_written = write(wev->fd, output->data, output->numbytes); if (num_written < output->numbytes) { /* don't retry - just cleanout the list and dump it */ dump = true; } } OBJ_RELEASE(output); } } if (!orte_xml_output) { /* we only opened stderr channel if we are NOT doing xml output */ wev = orte_iof_base.iof_write_stderr->wev; if (!opal_list_is_empty(&wev->outputs)) { dump = false; /* make one last attempt to write this out */ while (NULL != (item = opal_list_remove_first(&wev->outputs))) { output = (orte_iof_write_output_t*)item; if (!dump) { num_written = write(wev->fd, output->data, output->numbytes); if (num_written < output->numbytes) { /* don't retry - just cleanout the list and dump it */ dump = true; } } OBJ_RELEASE(output); } } } orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_IOF_HNP); /* clear our stdin job array */ for (i=0; i < mca_iof_mr_hnp_component.stdin_jobs.size; i++) { if (NULL == (jdata = (orte_job_t*)opal_pointer_array_get_item(&mca_iof_mr_hnp_component.stdin_jobs, i))) { continue; } OBJ_RELEASE(jdata); } OBJ_DESTRUCT(&mca_iof_mr_hnp_component.stdin_jobs); return ORTE_SUCCESS; }
/* * See description in iof_base_endpoint.h */ bool orte_iof_base_endpoint_have_pending_frags( orte_iof_base_endpoint_t* endpoint) { if (ORTE_IOF_SOURCE == endpoint->ep_mode) { return !opal_list_is_empty(&endpoint->ep_source_frags); } else { return !opal_list_is_empty(&endpoint->ep_sink_frags); } }
static int finalize(void) { opal_list_item_t* item; orte_iof_write_output_t *output; orte_iof_write_event_t *wev; int num_written; bool dump; /* check if anything is still trying to be written out */ wev = orte_iof_base.iof_write_stdout->wev; if (!opal_list_is_empty(&wev->outputs)) { dump = false; /* make one last attempt to write this out */ while (NULL != (item = opal_list_remove_first(&wev->outputs))) { output = (orte_iof_write_output_t*)item; if (!dump) { num_written = write(wev->fd, output->data, output->numbytes); if (num_written < output->numbytes) { /* don't retry - just cleanout the list and dump it */ dump = true; } } OBJ_RELEASE(output); } } OBJ_RELEASE(orte_iof_base.iof_write_stdout); if (!orte_xml_output) { /* we only opened stderr channel if we are NOT doing xml output */ wev = orte_iof_base.iof_write_stderr->wev; if (!opal_list_is_empty(&wev->outputs)) { dump = false; /* make one last attempt to write this out */ while (NULL != (item = opal_list_remove_first(&wev->outputs))) { output = (orte_iof_write_output_t*)item; if (!dump) { num_written = write(wev->fd, output->data, output->numbytes); if (num_written < output->numbytes) { /* don't retry - just cleanout the list and dump it */ dump = true; } } OBJ_RELEASE(output); } } OBJ_RELEASE(orte_iof_base.iof_write_stderr); } /* Cancel the RML receive */ orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_IOF_PROXY); return ORTE_SUCCESS; }
int orte_ras_base_node_segment_empty(bool *empty) { int ret; opal_list_t nodes; opal_list_item_t *item; /* See what's already on the node segment */ OBJ_CONSTRUCT(&nodes, opal_list_t); if (ORTE_SUCCESS != (ret = orte_ras_base_node_query(&nodes))) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&nodes); return ret; } *empty = opal_list_is_empty(&nodes); /* Free the list */ while (NULL != (item = opal_list_remove_first(&nodes))) { OBJ_RELEASE(item); } OBJ_DESTRUCT(&nodes); /* All done */ return ORTE_SUCCESS; }
static void stop(orte_jobid_t jobid) { opal_list_item_t *item; file_tracker_t *ft; /* cannot monitor my own job */ if (jobid == ORTE_PROC_MY_NAME->jobid && ORTE_JOBID_WILDCARD != jobid) { return; } for (item = opal_list_get_first(&jobs); item != opal_list_get_end(&jobs); item = opal_list_get_next(item)) { ft = (file_tracker_t*)item; if (jobid == ft->jobid || ORTE_JOBID_WILDCARD == jobid) { opal_list_remove_item(&jobs, item); OBJ_RELEASE(item); } } /* if no jobs remain, stop the sampling */ if (opal_list_is_empty(&jobs) && NULL != sample_ev) { opal_event_del(sample_ev); free(sample_ev); sample_ev = NULL; } return; }
/** * Function to remove previously memory from the tree without freeing it * * @param base pointer to the memory to free * * @retval OMPI_SUCCESS * @retval OMPI_ERR_BAD_PARAM if the passed base pointer was invalid */ int mca_rcache_vma_tree_delete(mca_rcache_vma_module_t* vma_rcache, mca_mpool_base_registration_t* reg) { mca_rcache_vma_t *vma; vma = (mca_rcache_vma_t*)ompi_rb_tree_find_with(&vma_rcache->rb_tree, reg->base, mca_rcache_vma_tree_node_compare_search); if(!vma) return OMPI_ERROR; while(vma != (mca_rcache_vma_t*)opal_list_get_end(&vma_rcache->vma_list) && vma->start <= (uintptr_t)reg->bound) { mca_rcache_vma_remove_reg(vma, reg); if(opal_list_is_empty(&vma->reg_list)) { mca_rcache_vma_t *next = (mca_rcache_vma_t*)opal_list_get_next(&vma->super); ompi_rb_tree_delete(&vma_rcache->rb_tree, vma); mca_rcache_vma_update_byte_count(vma_rcache, vma->start - vma->end - 1); opal_list_remove_item(&vma_rcache->vma_list, &vma->super); opal_list_append(&vma_rcache->vma_delete_list, &vma->super); vma = next; } else { int merged; do { mca_rcache_vma_t *prev = NULL, *next = NULL; if(opal_list_get_begin(&vma_rcache->vma_list) != opal_list_get_prev(vma)) prev = (mca_rcache_vma_t*)opal_list_get_prev(vma); merged = 0; if(prev && vma->start == prev->end + 1 && mca_rcache_vma_compare_reg_lists(vma, prev)) { prev->end = vma->end; opal_list_remove_item(&vma_rcache->vma_list, &vma->super); ompi_rb_tree_delete(&vma_rcache->rb_tree, vma); opal_list_append(&vma_rcache->vma_delete_list, &vma->super); vma = prev; merged = 1; } if(opal_list_get_end(&vma_rcache->vma_list) != opal_list_get_next(vma)) next = (mca_rcache_vma_t*)opal_list_get_next(vma); if(next && vma->end + 1 == next->start && mca_rcache_vma_compare_reg_lists(vma, next)) { vma->end = next->end; opal_list_remove_item(&vma_rcache->vma_list, &next->super); ompi_rb_tree_delete(&vma_rcache->rb_tree, next); opal_list_append(&vma_rcache->vma_delete_list, &next->super); merged = 1; } } while(merged); vma = (mca_rcache_vma_t*)opal_list_get_next(vma); } } return 0; }
/* * called when the connect module has completed setup of an endpoint */ void mca_btl_wv_endpoint_connected(mca_btl_wv_endpoint_t *endpoint) { opal_list_item_t *frag_item; mca_btl_wv_send_frag_t *frag; bool master = false; opal_output(-1, "Now we are CONNECTED"); endpoint->endpoint_state = MCA_BTL_IB_CONNECTED; endpoint->endpoint_btl->device->non_eager_rdma_endpoints++; /* The connection is correctly setup. Now we can decrease the event trigger. */ opal_progress_event_users_decrement(); /* Process pending packet on the endpoint */ /* While there are frags in the list, process them */ while (!opal_list_is_empty(&(endpoint->pending_lazy_frags))) { frag_item = opal_list_remove_first(&(endpoint->pending_lazy_frags)); frag = to_send_frag(frag_item); /* We need to post this one */ if(OMPI_SUCCESS != mca_btl_wv_endpoint_post_send(endpoint, frag)) BTL_ERROR(("Error posting send")); } OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock); /* if upper layer called put or get before connection moved to connected * state then we restart them here */ mca_btl_wv_frag_progress_pending_put_get(endpoint, mca_btl_wv_component.rdma_qp); }
int ompi_osc_portals4_free(struct ompi_win_t *win) { ompi_osc_portals4_module_t *module = (ompi_osc_portals4_module_t*) win->w_osc_module; int ret = OMPI_SUCCESS; /* synchronize */ module->comm->c_coll.coll_barrier(module->comm, module->comm->c_coll.coll_barrier_module); /* cleanup */ PtlMEUnlink(module->data_me_h); PtlMDRelease(module->md_h); PtlMDRelease(module->req_md_h); PtlCTFree(module->ct_h); if (NULL != module->disp_units) free(module->disp_units); ompi_comm_free(&module->comm); if (NULL != module->free_after) free(module->free_after); if (!opal_list_is_empty(&module->outstanding_locks)) { ret = OMPI_ERR_RMA_SYNC; } OBJ_DESTRUCT(&module->outstanding_locks); free(module); return ret; }
int mca_io_ompio_file_sync (ompi_file_t *fh) { int ret = OMPI_SUCCESS; mca_common_ompio_data_t *data; data = (mca_common_ompio_data_t *) fh->f_io_selected_data; OPAL_THREAD_LOCK(&fh->f_lock); if ( !opal_list_is_empty (&mca_common_ompio_pending_requests) ) { OPAL_THREAD_UNLOCK(&fh->f_lock); return MPI_ERR_OTHER; } if ( data->ompio_fh.f_amode & MPI_MODE_RDONLY ) { OPAL_THREAD_UNLOCK(&fh->f_lock); return MPI_ERR_ACCESS; } // Make sure all processes reach this point before syncing the file. ret = data->ompio_fh.f_comm->c_coll->coll_barrier (data->ompio_fh.f_comm, data->ompio_fh.f_comm->c_coll->coll_barrier_module); if ( MPI_SUCCESS != ret ) { OPAL_THREAD_UNLOCK(&fh->f_lock); return ret; } ret = data->ompio_fh.f_fs->fs_file_sync (&data->ompio_fh); OPAL_THREAD_UNLOCK(&fh->f_lock); return ret; }
/** * Discover available (pre-allocated) nodes and report * them back to the caller. * */ static int allocate(opal_list_t *nodes) { int ret; char *pbs_jobid; /* get our PBS jobid from the environment */ if (NULL == (pbs_jobid = getenv("PBS_JOBID"))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } /* save that value in the global job ident string for * later use in any error reporting */ orte_job_ident = strdup(pbs_jobid); if (ORTE_SUCCESS != (ret = discover(nodes, pbs_jobid))) { ORTE_ERROR_LOG(ret); return ret; } /* in the TM world, if we didn't find anything, then this * is an unrecoverable error - report it */ if (opal_list_is_empty(nodes)) { orte_show_help("help-ras-tm.txt", "no-nodes-found", true, filename); return ORTE_ERR_NOT_FOUND; } /* All done */ return ORTE_SUCCESS; }
static void udsensors_send_log_to_analytics(opal_list_t *key, opal_list_t *non_compute, opal_list_t *compute) { orcm_analytics_value_t *analytics_vals = NULL; if (!opal_list_is_empty(compute)){ /* send data to analytics */ analytics_vals = orcm_util_load_orcm_analytics_value(key, non_compute, compute); orcm_analytics.send_data(analytics_vals); } SAFE_RELEASE(analytics_vals); }
/* * Close the component */ static int basesmuma_close(void) { int ret; bcol_basesmuma_registration_data_t *net_ctx; bcol_base_network_context_t *net_reg; mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component; /* gvm Leak FIX */ while(!opal_list_is_empty(&(cs->ctl_structures))) { opal_list_item_t *item; item = opal_list_remove_first(&(cs->ctl_structures)); OBJ_DESTRUCT(item); } OBJ_DESTRUCT(&(cs->ctl_structures)); /* deregister the progress function */ ret=opal_progress_unregister(bcol_basesmuma_progress); if (MPI_SUCCESS != ret) { opal_output(0, "failed to unregister the progress function\n"); } /* remove the control structure backing file */ ret=mca_bcol_basesmuma_deregister_ctl_sm(&mca_bcol_basesmuma_component); if (MPI_SUCCESS != ret) { opal_output(0, "failed to remove control structure backing file\n"); } /* remove the network contexts - only one network context defined for * this component. */ /* file_name returne by asprintf, so need to free the resource */ if(mca_bcol_basesmuma_component.super.network_contexts ) { net_reg=(bcol_base_network_context_t *) mca_bcol_basesmuma_component.super.network_contexts[0]; if(net_reg) { net_ctx=(bcol_basesmuma_registration_data_t *)net_reg->context_data; if( net_ctx) { if(net_ctx->file_name) { free(net_ctx->file_name); } free(net_ctx); } free(net_reg); } free(mca_bcol_basesmuma_component.super.network_contexts); mca_bcol_basesmuma_component.super.network_contexts=NULL; } /* normal return */ return OMPI_SUCCESS; }
/* In case if XRC recv qp was closed and sender still don't know about it * we need close the qp, reset the ib_adrr status to CLOSED and start everything * from scratch. */ static void xoob_restart_connect(mca_btl_base_endpoint_t *endpoint) { BTL_VERBOSE(("Restarting the connection for the endpoint")); OPAL_THREAD_LOCK(&endpoint->ib_addr->addr_lock); switch (endpoint->ib_addr->status) { case MCA_BTL_IB_ADDR_CONNECTED: /* so we have the send qp, we just need the recive site. * Send request for SRQ numbers */ BTL_VERBOSE(("Restart The IB addr: sid %" PRIx64 " lid %d" "in MCA_BTL_IB_ADDR_CONNECTED status," " Changing to MCA_BTL_IB_ADDR_CLOSED and starting from scratch\n", endpoint->ib_addr->subnet_id,endpoint->ib_addr->lid)); /* Switching back to closed and starting from scratch */ endpoint->ib_addr->status = MCA_BTL_IB_ADDR_CLOSED; /* destroy the qp */ /* the reciver site was alredy closed so all pending list must be clean ! */ assert (opal_list_is_empty(&endpoint->qps->no_wqe_pending_frags[0])); assert (opal_list_is_empty(&endpoint->qps->no_wqe_pending_frags[1])); if(ibv_destroy_qp(endpoint->qps[0].qp->lcl_qp)) BTL_ERROR(("Failed to destroy QP")); case MCA_BTL_IB_ADDR_CLOSED: case MCA_BTL_IB_ADDR_CONNECTING: BTL_VERBOSE(("Restart The IB addr: sid %" PRIx64 " lid %d" "in MCA_BTL_IB_ADDR_CONNECTING or MCA_BTL_IB_ADDR_CLOSED status," " starting from scratch\n", endpoint->ib_addr->subnet_id,endpoint->ib_addr->lid)); OPAL_THREAD_UNLOCK(&endpoint->ib_addr->addr_lock); /* xoob_module_start_connect() should automaticly handle all other cases */ if (OMPI_SUCCESS != xoob_module_start_connect(NULL, endpoint)) BTL_ERROR(("Failed to restart connection from MCA_BTL_IB_ADDR_CONNECTING/CLOSED")); break; default : BTL_ERROR(("Invalid endpoint status %d", endpoint->ib_addr->status)); OPAL_THREAD_UNLOCK(&endpoint->ib_addr->addr_lock); } }
int orte_sds_base_close(void) { /* finalize running component */ if (NULL != orte_sds_base_module) { orte_sds_base_module->finalize(); } /* shutdown any remaining opened components */ if (! opal_list_is_empty(&orte_sds_base_components_available)) { mca_base_components_close(0, &orte_sds_base_components_available, NULL); } OBJ_DESTRUCT(&orte_sds_base_components_available); return ORTE_SUCCESS; }
int ompi_osc_ucx_free(struct ompi_win_t *win) { ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t*) win->w_osc_module; int i, ret = OMPI_SUCCESS; if ((module->epoch_type.access != NONE_EPOCH && module->epoch_type.access != FENCE_EPOCH) || module->epoch_type.exposure != NONE_EPOCH) { ret = OMPI_ERR_RMA_SYNC; } if (module->start_group != NULL || module->post_group != NULL) { ret = OMPI_ERR_RMA_SYNC; } assert(module->global_ops_num == 0); assert(module->lock_count == 0); assert(opal_list_is_empty(&module->pending_posts) == true); OBJ_DESTRUCT(&module->outstanding_locks); OBJ_DESTRUCT(&module->pending_posts); while (module->state.lock != TARGET_LOCK_UNLOCKED) { /* not sure if this is required */ ucp_worker_progress(mca_osc_ucx_component.ucp_worker); } ret = module->comm->c_coll->coll_barrier(module->comm, module->comm->c_coll->coll_barrier_module); for (i = 0; i < ompi_comm_size(module->comm); i++) { ucp_rkey_destroy((module->win_info_array[i]).rkey); ucp_rkey_destroy((module->state_info_array[i]).rkey); } free(module->win_info_array); free(module->state_info_array); free(module->per_target_ops_nums); ucp_mem_unmap(mca_osc_ucx_component.ucp_context, module->memh); ucp_mem_unmap(mca_osc_ucx_component.ucp_context, module->state_memh); if (module->disp_units) free(module->disp_units); ompi_comm_free(&module->comm); free(module); return ret; }
static int orcm_octl_logical_group_print_list(opal_hash_table_t *groups) { char *key = NULL; size_t key_size = 0; opal_list_t *value = NULL; opal_list_t *new_value = NULL; void *in_member = NULL; void *o_member = NULL; orcm_logical_group_member_t *member_item = NULL; while (ORCM_SUCCESS == opal_hash_table_get_next_key_ptr(groups, (void**)&key, &key_size, (void**)&value, in_member, &o_member)) { new_value = orcm_logical_group_convert_members_list(value, MAX_LINE_LENGTH); if (NULL != new_value && !opal_list_is_empty(new_value)) { ORCM_UTIL_MSG_WITH_ARG("\ngroup name=%s", key); OPAL_LIST_FOREACH(member_item, new_value, orcm_logical_group_member_t) { ORCM_UTIL_MSG_WITH_ARG("member list=%s", member_item->member); } }
oshmem_group_t* find_group_in_cache(int PE_start, int logPE_stride, int PE_size) { int cache_look_up_id[3] = { PE_start, logPE_stride, PE_size }; opal_list_item_t *item; if (opal_list_is_empty(&oshmem_group_cache_list)) { return NULL ; } for (item = opal_list_get_first(&oshmem_group_cache_list); item && (item != opal_list_get_end(&oshmem_group_cache_list)); item = opal_list_get_next(item)) { if (!memcmp(((oshmem_group_cache_t *) item)->cache_id, cache_look_up_id, 3 * sizeof(int))) { return ((oshmem_group_cache_t *) item)->group; } } return NULL ; }
/* * Discover available (pre-allocated) nodes. Allocate the * requested number of nodes/process slots to the job. */ static int orte_ras_loadleveler_allocate(orte_job_t *jdata, opal_list_t *nodes) { int ret = ORTE_SUCCESS; if (ORTE_SUCCESS != (ret = orte_ras_loadleveler_discover(nodes))) { ORTE_ERROR_LOG(ret); return ret; } /* If we didn't find anything, then this * is an unrecoverable error - report it */ if (opal_list_is_empty(nodes)) { opal_output(orte_ras_base.ras_output, "ras:loadleveler:allocate: No nodes were found in the LOADL_HOSTFILE - %s", getenv("LOADL_HOSTFILE")); return ORTE_ERR_NOT_FOUND; } return ret; }
/* * Forcibly drain all pending output on an endpoint, without waiting for * actual completion. */ void ompi_btl_usnic_flush_endpoint( ompi_btl_usnic_endpoint_t *endpoint) { ompi_btl_usnic_send_frag_t *frag; /* First, free all pending fragments */ while (!opal_list_is_empty(&endpoint->endpoint_frag_send_queue)) { frag = (ompi_btl_usnic_send_frag_t *)opal_list_remove_first( &endpoint->endpoint_frag_send_queue); /* _cond still needs to check ownership, but make sure the * fragment is marked as done. */ frag->sf_ack_bytes_left = 0; frag->sf_seg_post_cnt = 0; ompi_btl_usnic_send_frag_return_cond(endpoint->endpoint_module, frag); } /* Now, ACK everything that is pending */ ompi_btl_usnic_handle_ack(endpoint, endpoint->endpoint_next_seq_to_send-1); }
/* This function must be called with the rcache lock held */ static void do_unregistration_gc(struct mca_mpool_base_module_t *mpool) { mca_mpool_rdma_module_t *mpool_rdma = (mca_mpool_rdma_module_t*)mpool; mca_mpool_base_registration_t *reg; do { /* Remove registration from garbage collection list before deregistering it */ reg = (mca_mpool_base_registration_t *) opal_list_remove_first(&mpool_rdma->gc_list); mpool->rcache->rcache_delete(mpool->rcache, reg); /* Drop the rcache lock before calling dereg_mem as there may be memory allocations */ OPAL_THREAD_UNLOCK(&mpool->rcache->lock); dereg_mem(mpool, reg); OPAL_THREAD_LOCK(&mpool->rcache->lock); OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list, (ompi_free_list_item_t*)reg); } while(!opal_list_is_empty(&mpool_rdma->gc_list)); }
static int ompi_comm_register_cid (uint32_t cid ) { opal_list_item_t *item; ompi_comm_reg_t *regcom; ompi_comm_reg_t *newentry = OBJ_NEW(ompi_comm_reg_t); newentry->cid = cid; if ( !(opal_list_is_empty (&ompi_registered_comms)) ) { for (item = opal_list_get_first(&ompi_registered_comms); item != opal_list_get_end(&ompi_registered_comms); item = opal_list_get_next(item)) { regcom = (ompi_comm_reg_t *)item; if ( regcom->cid > cid ) { break; } #if OMPI_ENABLE_THREAD_MULTIPLE if( regcom->cid == cid ) { /** * The MPI standard state that is the user responsability to * schedule the global communications in order to avoid any * kind of troubles. As, managing communicators involve several * collective communications, we should enforce a sequential * execution order. This test only allow one communicator * creation function based on the same communicator. */ OBJ_RELEASE(newentry); return OMPI_ERROR; } #endif /* OMPI_ENABLE_THREAD_MULTIPLE */ } opal_list_insert_pos (&ompi_registered_comms, item, (opal_list_item_t *)newentry); } else { opal_list_append (&ompi_registered_comms, (opal_list_item_t *)newentry); } return OMPI_SUCCESS; }
mca_bcol_base_lmngr_block_t* mca_coll_ml_lmngr_alloc ( mca_coll_ml_lmngr_t *lmngr) { int rc; opal_list_t *list = &lmngr->blocks_list; /* Check if the list manager was initialized */ if(OPAL_UNLIKELY(NULL == lmngr->base_addr)) { ML_VERBOSE(7 ,("Starting memory initialization")); rc = mca_coll_ml_lmngr_init(lmngr); if (OMPI_SUCCESS != rc) { ML_ERROR(("Failed to init memory")); return NULL; } } if(OPAL_UNLIKELY(opal_list_is_empty(list))) { /* Upper layer need to handle the NULL */ ML_VERBOSE(1, ("List manager is empty.")); return NULL; } return (mca_bcol_base_lmngr_block_t *)opal_list_remove_first(list); }
static void mca_btl_mvapi_endpoint_connected(mca_btl_mvapi_endpoint_t *endpoint) { opal_list_item_t *frag_item; mca_btl_mvapi_frag_t *frag; mca_btl_mvapi_module_t* mvapi_btl; /* While there are frags in the list, process them */ endpoint->endpoint_state = MCA_BTL_IB_CONNECTED; /** * The connection is correctly setup. Now we can decrease the event trigger. */ opal_progress_event_decrement(); while(!opal_list_is_empty(&(endpoint->pending_send_frags))) { frag_item = opal_list_remove_first(&(endpoint->pending_send_frags)); frag = (mca_btl_mvapi_frag_t *) frag_item; mvapi_btl = endpoint->endpoint_btl; /* We need to post this one */ if(OMPI_SUCCESS != mca_btl_mvapi_endpoint_post_send(mvapi_btl, endpoint, frag)) BTL_ERROR(("error in mca_btl_mvapi_endpoint_send")); } }
/* * Function for selecting one component from all those that are * available. */ void orte_ras_base_allocate(int fd, short args, void *cbdata) { int rc; orte_job_t *jdata; opal_list_t nodes; orte_node_t *node; orte_std_cntr_t i; orte_app_context_t *app; orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output, "%s ras:base:allocate", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* convenience */ jdata = caddy->jdata; /* if we already did this, don't do it again - the pool of * global resources is set. */ if (orte_ras_base.allocation_read) { OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output, "%s ras:base:allocate allocation already read", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); goto next_state; } orte_ras_base.allocation_read = true; /* Otherwise, we have to create * the initial set of resources that will delineate all * further operations serviced by this HNP. This list will * contain ALL nodes that can be used by any subsequent job. * * In other words, if a node isn't found in this step, then * no job launched by this HNP will be able to utilize it. */ /* construct a list to hold the results */ OBJ_CONSTRUCT(&nodes, opal_list_t); /* if a component was selected, then we know we are in a managed * environment. - the active module will return a list of what it found */ if (NULL != orte_ras_base.active_module) { /* read the allocation */ if (ORTE_SUCCESS != (rc = orte_ras_base.active_module->allocate(jdata, &nodes))) { if (ORTE_ERR_ALLOCATION_PENDING == rc) { /* an allocation request is underway, so just do nothing */ OBJ_DESTRUCT(&nodes); OBJ_RELEASE(caddy); return; } if (ORTE_ERR_SYSTEM_WILL_BOOTSTRAP == rc) { /* this module indicates that nodes will be discovered * on a bootstrap basis, so all we do here is add our * own node to the list */ goto addlocal; } if (ORTE_ERR_TAKE_NEXT_OPTION == rc) { /* we have an active module, but it is unable to * allocate anything for this job - this indicates * that it isn't a fatal error, but could be if * an allocation is required */ if (orte_allocation_required) { /* an allocation is required, so this is fatal */ OBJ_DESTRUCT(&nodes); orte_show_help("help-ras-base.txt", "ras-base:no-allocation", true); ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); OBJ_RELEASE(caddy); return; } else { /* an allocation is not required, so we can just * run on the local node - go add it */ goto addlocal; } } ORTE_ERROR_LOG(rc); OBJ_DESTRUCT(&nodes); ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); OBJ_RELEASE(caddy); return; } } /* If something came back, save it and we are done */ if (!opal_list_is_empty(&nodes)) { /* store the results in the global resource pool - this removes the * list items */ if (ORTE_SUCCESS != (rc = orte_ras_base_node_insert(&nodes, jdata))) { ORTE_ERROR_LOG(rc); OBJ_DESTRUCT(&nodes); ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); OBJ_RELEASE(caddy); return; } OBJ_DESTRUCT(&nodes); /* default to no-oversubscribe-allowed for managed systems */ if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) { ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE); } /* flag that the allocation is managed */ orte_managed_allocation = true; goto DISPLAY; } else if (orte_allocation_required) { /* if nothing was found, and an allocation is * required, then error out */ OBJ_DESTRUCT(&nodes); orte_show_help("help-ras-base.txt", "ras-base:no-allocation", true); ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); OBJ_RELEASE(caddy); return; } OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output, "%s ras:base:allocate nothing found in module - proceeding to hostfile", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* nothing was found, or no active module was alive. Our next * option is to look for a hostfile and assign our global * pool from there. * * Individual hostfile names, if given, are included * in the app_contexts for this job. We therefore need to * retrieve the app_contexts for the job, and then cycle * through them to see if anything is there. The parser will * add the nodes found in each hostfile to our list - i.e., * the resulting list contains the UNION of all nodes specified * in hostfiles from across all app_contexts * * We then continue to add any hosts provided by dash-host and * the default hostfile, if we have it. We will then filter out * all the non-desired hosts (i.e., those not specified by * -host and/or -hostfile) when we start the mapping process * * Note that any relative node syntax found in the hostfiles will * generate an error in this scenario, so only non-relative syntax * can be present */ if (NULL != orte_default_hostfile) { OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output, "%s ras:base:allocate parsing default hostfile %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), orte_default_hostfile)); /* a default hostfile was provided - parse it */ if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes(&nodes, orte_default_hostfile))) { ORTE_ERROR_LOG(rc); OBJ_DESTRUCT(&nodes); ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); OBJ_RELEASE(caddy); return; } } for (i=0; i < jdata->apps->size; i++) { if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) { continue; } if (NULL != app->hostfile) { OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output, "%s ras:base:allocate adding hostfile %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), app->hostfile)); /* hostfile was specified - parse it and add it to the list */ if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes(&nodes, app->hostfile))) { ORTE_ERROR_LOG(rc); OBJ_DESTRUCT(&nodes); /* set an error event */ ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); OBJ_RELEASE(caddy); return; } } else if (!orte_soft_locations && NULL != app->dash_host) { /* if we are using soft locations, then any dash-host would * just include desired nodes and not required. We don't want * to pick them up here as this would mean the request was * always satisfied - instead, we want to allow the request * to fail later on and use whatever nodes are actually * available */ OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output, "%s ras:base:allocate adding dash_hosts", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); if (ORTE_SUCCESS != (rc = orte_util_add_dash_host_nodes(&nodes, app->dash_host))) { ORTE_ERROR_LOG(rc); OBJ_DESTRUCT(&nodes); ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); OBJ_RELEASE(caddy); return; } } } /* if something was found in the hostfile(s), we use that as our global * pool - set it and we are done */ if (!opal_list_is_empty(&nodes)) { /* store the results in the global resource pool - this removes the * list items */ if (ORTE_SUCCESS != (rc = orte_ras_base_node_insert(&nodes, jdata))) { ORTE_ERROR_LOG(rc); ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); OBJ_RELEASE(caddy); return; } /* cleanup */ OBJ_DESTRUCT(&nodes); goto DISPLAY; } OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output, "%s ras:base:allocate nothing found in hostfiles - checking for rankfile", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* Our next option is to look for a rankfile - if one was provided, we * will use its nodes to create a default allocation pool */ if (NULL != orte_rankfile) { /* check the rankfile for node information */ if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes(&nodes, orte_rankfile))) { ORTE_ERROR_LOG(rc); OBJ_DESTRUCT(&nodes); ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); OBJ_RELEASE(caddy); return ; } } /* if something was found in rankfile, we use that as our global * pool - set it and we are done */ if (!opal_list_is_empty(&nodes)) { /* store the results in the global resource pool - this removes the * list items */ if (ORTE_SUCCESS != (rc = orte_ras_base_node_insert(&nodes, jdata))) { ORTE_ERROR_LOG(rc); ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); OBJ_RELEASE(caddy); return; } /* rankfile is considered equivalent to an RM allocation */ if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) { ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE); } /* cleanup */ OBJ_DESTRUCT(&nodes); goto DISPLAY; } OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output, "%s ras:base:allocate nothing found in rankfile - inserting current node", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); addlocal: /* if nothing was found by any of the above methods, then we have no * earthly idea what to do - so just add the local host */ node = OBJ_NEW(orte_node_t); if (NULL == node) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); OBJ_DESTRUCT(&nodes); ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); OBJ_RELEASE(caddy); return; } /* use the same name we got in orte_process_info so we avoid confusion in * the session directories */ node->name = strdup(orte_process_info.nodename); node->state = ORTE_NODE_STATE_UP; node->slots_inuse = 0; node->slots_max = 0; node->slots = 1; opal_list_append(&nodes, &node->super); /* store the results in the global resource pool - this removes the * list items */ if (ORTE_SUCCESS != (rc = orte_ras_base_node_insert(&nodes, jdata))) { ORTE_ERROR_LOG(rc); OBJ_DESTRUCT(&nodes); ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); OBJ_RELEASE(caddy); return; } OBJ_DESTRUCT(&nodes); DISPLAY: /* shall we display the results? */ if (4 < opal_output_get_verbosity(orte_ras_base_framework.framework_output)) { orte_ras_base_display_alloc(); } next_state: /* are we to report this event? */ if (orte_report_events) { if (ORTE_SUCCESS != (rc = orte_util_comm_report_event(ORTE_COMM_EVENT_ALLOCATE))) { ORTE_ERROR_LOG(rc); ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); OBJ_RELEASE(caddy); } } /* set total slots alloc */ jdata->total_slots_alloc = orte_ras_base.total_slots_alloc; /* set the job state to the next position */ ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_ALLOCATION_COMPLETE); /* cleanup */ OBJ_RELEASE(caddy); }
int orte_ras_base_add_hosts(orte_job_t *jdata) { int rc; opal_list_t nodes; int i; orte_app_context_t *app; /* construct a list to hold the results */ OBJ_CONSTRUCT(&nodes, opal_list_t); /* Individual add-hostfile names, if given, are included * in the app_contexts for this job. We therefore need to * retrieve the app_contexts for the job, and then cycle * through them to see if anything is there. The parser will * add the nodes found in each add-hostfile to our list - i.e., * the resulting list contains the UNION of all nodes specified * in add-hostfiles from across all app_contexts * * Note that any relative node syntax found in the add-hostfiles will * generate an error in this scenario, so only non-relative syntax * can be present */ for (i=0; i < jdata->apps->size; i++) { if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) { continue; } if (NULL != app->add_hostfile) { OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output, "%s ras:base:add_hosts checking add-hostfile %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), app->add_hostfile)); /* hostfile was specified - parse it and add it to the list */ if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes(&nodes, app->add_hostfile))) { ORTE_ERROR_LOG(rc); OBJ_DESTRUCT(&nodes); return rc; } /* now indicate that this app is to run across it */ app->hostfile = app->add_hostfile; app->add_hostfile = NULL; } } /* We next check for and add any add-host options. Note this is * a -little- different than dash-host in that (a) we add these * nodes to the global pool regardless of what may already be there, * and (b) as a result, any job and/or app_context can access them. * * Note that any relative node syntax found in the add-host lists will * generate an error in this scenario, so only non-relative syntax * can be present */ for (i=0; i < jdata->apps->size; i++) { if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) { continue; } if (NULL != app->add_host) { if (4 < opal_output_get_verbosity(orte_ras_base_framework.framework_output)) { char *fff = opal_argv_join(app->add_host, ','); opal_output(0, "%s ras:base:add_hosts checking add-host %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), fff); free(fff); } if (ORTE_SUCCESS != (rc = orte_util_add_dash_host_nodes(&nodes, app->add_host))) { ORTE_ERROR_LOG(rc); OBJ_DESTRUCT(&nodes); return rc; } /* now indicate that this app is to run across them */ app->dash_host = app->add_host; app->add_host = NULL; } } /* if something was found, we add that to our global pool */ if (!opal_list_is_empty(&nodes)) { /* store the results in the global resource pool - this removes the * list items */ if (ORTE_SUCCESS != (rc = orte_ras_base_node_insert(&nodes, jdata))) { ORTE_ERROR_LOG(rc); } /* cleanup */ OBJ_DESTRUCT(&nodes); } /* shall we display the results? */ if (0 < opal_output_get_verbosity(orte_ras_base_framework.framework_output)) { orte_ras_base_display_alloc(); } return ORTE_SUCCESS; }
static void xcast_recv(int status, orte_process_name_t* sender, opal_buffer_t* buffer, orte_rml_tag_t tg, void* cbdata) { opal_list_item_t *item; orte_namelist_t *nm; int ret, cnt; opal_buffer_t *relay, *rly; orte_daemon_cmd_flag_t command = ORTE_DAEMON_NULL_CMD; opal_buffer_t wireup; opal_byte_object_t *bo; int8_t flag; orte_job_t *jdata; orte_proc_t *rec; opal_list_t coll; orte_grpcomm_signature_t *sig; orte_rml_tag_t tag; OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_framework.framework_output, "%s grpcomm:direct:xcast:recv: with %d bytes", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)buffer->bytes_used)); /* we need a passthru buffer to send to our children */ rly = OBJ_NEW(opal_buffer_t); opal_dss.copy_payload(rly, buffer); /* get the signature that we do not need */ cnt=1; if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &sig, &cnt, ORTE_SIGNATURE))) { ORTE_ERROR_LOG(ret); ORTE_FORCED_TERMINATE(ret); return; } OBJ_RELEASE(sig); /* get the target tag */ cnt=1; if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &tag, &cnt, ORTE_RML_TAG))) { ORTE_ERROR_LOG(ret); ORTE_FORCED_TERMINATE(ret); return; } /* setup a buffer we can pass to ourselves - this just contains * the initial message, minus the headers inserted by xcast itself */ relay = OBJ_NEW(opal_buffer_t); opal_dss.copy_payload(relay, buffer); /* setup the relay list */ OBJ_CONSTRUCT(&coll, opal_list_t); /* if this is headed for the daemon command processor, * then we first need to check for add_local_procs * as that command includes some needed wireup info */ if (ORTE_RML_TAG_DAEMON == tag) { /* peek at the command */ cnt=1; if (ORTE_SUCCESS == (ret = opal_dss.unpack(buffer, &command, &cnt, ORTE_DAEMON_CMD))) { /* if it is add_procs, then... */ if (ORTE_DAEMON_ADD_LOCAL_PROCS == command || ORTE_DAEMON_DVM_NIDMAP_CMD == command) { /* extract the byte object holding the daemonmap */ cnt=1; if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &bo, &cnt, OPAL_BYTE_OBJECT))) { ORTE_ERROR_LOG(ret); goto relay; } /* update our local nidmap, if required - the decode function * knows what to do - it will also free the bytes in the byte object */ if (ORTE_PROC_IS_HNP) { /* no need - already have the info */ if (NULL != bo) { if (NULL != bo->bytes) { free(bo->bytes); } free(bo); } } else { OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output, "%s grpcomm:direct:xcast updating daemon nidmap", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); if (ORTE_SUCCESS != (ret = orte_util_decode_daemon_nodemap(bo))) { ORTE_ERROR_LOG(ret); goto relay; } } /* update the routing plan */ orte_routed.update_routing_plan(); /* see if we have wiring info as well */ cnt=1; if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &flag, &cnt, OPAL_INT8))) { ORTE_ERROR_LOG(ret); goto relay; } if (ORTE_DAEMON_ADD_LOCAL_PROCS == command) { OBJ_RELEASE(relay); relay = OBJ_NEW(opal_buffer_t); /* repack the command */ if (OPAL_SUCCESS != (ret = opal_dss.pack(relay, &command, 1, ORTE_DAEMON_CMD))) { ORTE_ERROR_LOG(ret); goto relay; } if (0 == flag) { /* copy the remainder of the payload */ opal_dss.copy_payload(relay, buffer); /* no - just return */ goto relay; } } /* unpack the byte object */ cnt=1; if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &bo, &cnt, OPAL_BYTE_OBJECT))) { ORTE_ERROR_LOG(ret); goto relay; } if (0 < bo->size) { /* load it into a buffer */ OBJ_CONSTRUCT(&wireup, opal_buffer_t); opal_dss.load(&wireup, bo->bytes, bo->size); /* pass it for processing */ if (ORTE_SUCCESS != (ret = orte_routed.init_routes(ORTE_PROC_MY_NAME->jobid, &wireup))) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&wireup); goto relay; } /* done with the wireup buffer - dump it */ OBJ_DESTRUCT(&wireup); } free(bo); if (ORTE_DAEMON_ADD_LOCAL_PROCS == command) { /* copy the remainder of the payload */ opal_dss.copy_payload(relay, buffer); } } } else { ORTE_ERROR_LOG(ret); goto CLEANUP; } } relay: /* get the list of next recipients from the routed module */ orte_routed.get_routing_list(&coll); /* if list is empty, no relay is required */ if (opal_list_is_empty(&coll)) { OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output, "%s grpcomm:direct:send_relay - recipient list is empty!", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); OBJ_RELEASE(rly); goto CLEANUP; } /* send the message to each recipient on list, deconstructing it as we go */ while (NULL != (item = opal_list_remove_first(&coll))) { nm = (orte_namelist_t*)item; OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output, "%s grpcomm:direct:send_relay sending relay msg of %d bytes to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)rly->bytes_used, ORTE_NAME_PRINT(&nm->name))); OBJ_RETAIN(rly); /* check the state of the recipient - no point * sending to someone not alive */ jdata = orte_get_job_data_object(nm->name.jobid); if (NULL == (rec = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, nm->name.vpid))) { opal_output(0, "%s grpcomm:direct:send_relay proc %s not found - cannot relay", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&nm->name)); OBJ_RELEASE(rly); OBJ_RELEASE(item); continue; } if (ORTE_PROC_STATE_RUNNING < rec->state) { opal_output(0, "%s grpcomm:direct:send_relay proc %s not running - cannot relay", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&nm->name)); OBJ_RELEASE(rly); OBJ_RELEASE(item); continue; } if (ORTE_SUCCESS != (ret = orte_rml.send_buffer_nb(&nm->name, rly, ORTE_RML_TAG_XCAST, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(rly); OBJ_RELEASE(item); continue; } OBJ_RELEASE(item); } OBJ_RELEASE(rly); // retain accounting CLEANUP: /* cleanup */ OBJ_DESTRUCT(&coll); /* now send the relay buffer to myself for processing */ if (ORTE_DAEMON_DVM_NIDMAP_CMD != command) { if (ORTE_SUCCESS != (ret = orte_rml.send_buffer_nb(ORTE_PROC_MY_NAME, relay, tag, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(relay); } } }
/** * Discover available (pre-allocated) nodes. Allocate the * requested number of nodes/process slots to the job. */ static int orte_ras_ccp_allocate(opal_list_t *nodes) { int ret, i; size_t len; char *cluster_head = NULL; HRESULT hr = S_OK; ICluster* pCluster = NULL; /* CCP is not thread safe. Use the apartment model. */ CoInitializeEx(NULL, COINIT_APARTMENTTHREADED); /* Create the Cluster object. */ hr = CoCreateInstance( __uuidof(Cluster), NULL, CLSCTX_INPROC_SERVER, __uuidof(ICluster), reinterpret_cast<void **> (&pCluster) ); if (FAILED(hr)) { OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output, "ras:ccp:allocate: failed to create cluster object!")); return ORTE_ERROR; } if(NULL == orte_ccp_headnode) { /* Get the cluster head nodes name */ _dupenv_s(&cluster_head, &len, "LOGONSERVER"); if(cluster_head == NULL) { OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output, "ras:ccp:allocate: connot find cluster head node!")); return ORTE_ERROR; } /* Get rid of the beginning '//'. */ for( i = 0; i < len - 2; i++){ cluster_head[i] = cluster_head[i+2]; cluster_head[i+2] = '\0'; } } else { cluster_head = orte_ccp_headnode; } /* Connect to the cluster's head node */ hr = pCluster->Connect(_bstr_t(cluster_head)); if (FAILED(hr)) { ras_get_cluster_message(pCluster); OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output, "ras:ccp:allocate: connection failed!")); return ORTE_ERROR; } if (ORTE_SUCCESS != (ret = discover(nodes, pCluster))) { OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output, "ras:ccp:allocate: discover failed!")); return ret; } /* in the CCP world, if we didn't find anything, then this * is an unrecoverable error - report it */ if (opal_list_is_empty(nodes)) { orte_show_help("help-ras-ccp.txt", "no-nodes-found", true); return ORTE_ERR_NOT_FOUND; } /* All finished, release cluster object*/ pCluster->Release(); CoUninitialize(); return ret; }
static int orte_rds_hostfile_query(orte_jobid_t job) { opal_list_t existing; opal_list_t updates, rds_updates; opal_list_item_t *item; orte_rds_cell_desc_t *rds_item; orte_rds_cell_attr_t *new_attr; orte_ras_node_t *ras_item; int rc; if (orte_rds_hostfile_queried) { /* if we have already been queried, then * our info is on the registry, so just * return. Note that this restriction * may eventually be lifted - ideally, * we might check to see if this is a * new file name and go ahead with the * query if so. */ return ORTE_SUCCESS; } orte_rds_hostfile_queried = true; OBJ_CONSTRUCT(&existing, opal_list_t); OBJ_CONSTRUCT(&updates, opal_list_t); OBJ_CONSTRUCT(&rds_updates, opal_list_t); rc = orte_ras_base_node_query(&existing); if(ORTE_SUCCESS != rc) { goto cleanup; } rc = mca_base_param_find("rds", "hostfile", "path"); mca_base_param_lookup_string(rc, &mca_rds_hostfile_component.path); rc = orte_rds_hostfile_parse(mca_rds_hostfile_component.path, &existing, &updates); if (ORTE_ERR_NOT_FOUND == rc) { if(mca_rds_hostfile_component.default_hostfile) { rc = ORTE_SUCCESS; } else { opal_show_help("help-rds-hostfile.txt", "rds:no-hostfile", true, mca_rds_hostfile_component.path); } goto cleanup; } else if (ORTE_SUCCESS != rc) { goto cleanup; } if ( !opal_list_is_empty(&updates) ) { /* Convert RAS update list to RDS update list */ for ( ras_item = (orte_ras_node_t*)opal_list_get_first(&updates); ras_item != (orte_ras_node_t*)opal_list_get_end(&updates); ras_item = (orte_ras_node_t*)opal_list_get_next(ras_item)) { rds_item = OBJ_NEW(orte_rds_cell_desc_t); if (NULL == rds_item) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } rds_item->site = strdup("Hostfile"); rds_item->name = strdup(ras_item->node_name); if (need_cellid) { #if 0 /* JJH Repair when cellid's are fixed */ /* Create a new cellid for this hostfile */ rc = orte_ns.create_cellid(&local_cellid, rds_item->site, rds_item->name); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; } #endif local_cellid = 0; need_cellid = false; } rds_item->cellid = local_cellid; ras_item->node_cellid = local_cellid; new_attr = OBJ_NEW(orte_rds_cell_attr_t); if (NULL == new_attr) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } new_attr->keyval.key = strdup(ORTE_RDS_NAME); new_attr->keyval.value = OBJ_NEW(orte_data_value_t); if (NULL == new_attr->keyval.value) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } new_attr->keyval.value->type = ORTE_STRING; new_attr->keyval.value->data = strdup(ras_item->node_name); opal_list_append(&(rds_item->attributes), &new_attr->super); new_attr = OBJ_NEW(orte_rds_cell_attr_t); if (NULL == new_attr) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } new_attr->keyval.key = strdup(ORTE_CELLID_KEY); new_attr->keyval.value = OBJ_NEW(orte_data_value_t); if (NULL == new_attr->keyval.value) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } new_attr->keyval.value->type = ORTE_CELLID; if (ORTE_SUCCESS != (rc = orte_dss.copy(&(new_attr->keyval.value->data), &(rds_item->cellid), ORTE_CELLID))) { ORTE_ERROR_LOG(rc); return rc; } opal_list_append(&(rds_item->attributes), &new_attr->super); opal_list_append(&rds_updates, &rds_item->super); } /* Insert the new node into the RDS */ rc = orte_rds.store_resource(&rds_updates); if (ORTE_SUCCESS != rc) { goto cleanup; } /* Then the RAS, since we can assume that any * resources listed in the hostfile have been * already allocated for our use. */ rc = orte_ras_base_node_insert(&updates); if (ORTE_SUCCESS != rc) { goto cleanup; } /* and now, indicate that ORTE should override any oversubscribed conditions * based on local hardware limits since the user (a) might not have * provided us any info on the #slots for a node, and (b) the user * might have been wrong! If we don't check the number of local physical * processors, then we could be too aggressive on our sched_yield setting * and cause performance problems. */ rc = orte_ras_base_set_oversubscribe_override(job); if (ORTE_SUCCESS != rc) { goto cleanup; } } cleanup: if (NULL != mca_rds_hostfile_component.path) { free(mca_rds_hostfile_component.path); mca_rds_hostfile_component.path = NULL; } while(NULL != (item = opal_list_remove_first(&existing))) { OBJ_RELEASE(item); } while(NULL != (item = opal_list_remove_first(&updates))) { OBJ_RELEASE(item); } while (NULL != (rds_item = (orte_rds_cell_desc_t*)opal_list_remove_first(&rds_updates))) { while (NULL != (new_attr = (orte_rds_cell_attr_t*)opal_list_remove_first(&(rds_item->attributes)))) { OBJ_RELEASE(new_attr); } OBJ_RELEASE(rds_item); } OBJ_DESTRUCT(&existing); OBJ_DESTRUCT(&updates); OBJ_DESTRUCT(&rds_updates); return rc; }
/* the -host option can always be used in both absolute * and relative mode, so we have to check for pre-existing * allocations if we are to use relative node syntax */ int orte_util_filter_dash_host_nodes(opal_list_t *nodes, char** host_argv) { opal_list_item_t* item; bool found; opal_list_item_t *next; orte_std_cntr_t i, j, k, len_mapped_node=0; int rc; char **mapped_nodes = NULL, **mini_map, *cptr; orte_node_t *node, **nodepool; int nodeidx; int num_empty=0; opal_list_t keep; bool want_all_empty = false; /* if the incoming node list is empty, then there * is nothing to filter! */ if (opal_list_is_empty(nodes)) { return ORTE_SUCCESS; } /* setup for relative node syntax */ nodepool = (orte_node_t**)orte_node_pool->addr; /* Accumulate all of the host name mappings */ for (j = 0; j < opal_argv_count(host_argv); ++j) { mini_map = opal_argv_split(host_argv[j], ','); for (k = 0; NULL != mini_map[k]; ++k) { if ('+' == mini_map[k][0]) { /* see if we specified empty nodes */ if ('e' == mini_map[k][1] || 'E' == mini_map[k][1]) { /* request for empty nodes - do they want * all of them? */ if (NULL != (cptr = strchr(mini_map[k], ':'))) { /* the colon indicates a specific # are requested */ cptr++; /* step past : */ /* put a marker into the list */ cptr--; *cptr = '*'; opal_argv_append_nosize(&mapped_nodes, cptr); } else { /* add a marker to the list */ opal_argv_append_nosize(&mapped_nodes, "*"); want_all_empty = true; } } else if ('n' == mini_map[k][1] || 'N' == mini_map[k][1]) { /* they want a specific relative node #, so * look it up on global pool */ nodeidx = strtol(&mini_map[k][2], NULL, 10); if (nodeidx < 0 || nodeidx > (int)orte_node_pool->size) { /* this is an error */ orte_show_help("help-dash-host.txt", "dash-host:relative-node-out-of-bounds", true, nodeidx, mini_map[k]); rc = ORTE_ERR_SILENT; goto cleanup; } /* if the HNP is not allocated, then we need to * adjust the index as the node pool is offset * by one */ if (!orte_hnp_is_allocated) { nodeidx++; } /* see if that location is filled */ if (NULL == nodepool[nodeidx]) { /* this is an error */ orte_show_help("help-dash-host.txt", "dash-host:relative-node-not-found", true, nodeidx, mini_map[k]); rc = ORTE_ERR_SILENT; goto cleanup; } /* add this node to the list */ opal_argv_append_nosize(&mapped_nodes, nodepool[nodeidx]->name); } else { /* invalid relative node syntax */ orte_show_help("help-dash-host.txt", "dash-host:invalid-relative-node-syntax", true, mini_map[k]); rc = ORTE_ERR_SILENT; goto cleanup; } } else { /* non-relative syntax - add to list */ if (OPAL_SUCCESS != (rc = opal_argv_append_nosize(&mapped_nodes, mini_map[k]))) { goto cleanup; } } } opal_argv_free(mini_map); } /* Did we find anything? If not, then do nothing */ if (NULL == mapped_nodes && 0 == num_empty) { return ORTE_SUCCESS; } /* we found some info - filter what is on the list... * i.e., go through the list and remove any nodes that * were -not- included on the -host list. * * NOTE: The following logic is based on knowing that * any node can only be included on the incoming * nodes list ONCE. */ len_mapped_node = opal_argv_count(mapped_nodes); /* setup a working list so we can put the final list * of nodes in order. This way, if the user specifies a * set of nodes, we will use them in the order in which * they were specifed. Note that empty node requests * will always be appended to the end */ OBJ_CONSTRUCT(&keep, opal_list_t); for (i = 0; i < len_mapped_node; ++i) { /* check if we are supposed to add some number of empty * nodes here */ if ('*' == mapped_nodes[i][0]) { /* if there is a number after the '*', then we are * to insert a specific # of nodes */ if ('\0' == mapped_nodes[i][1]) { /* take all empty nodes from the list */ num_empty = INT_MAX; } else { /* extract number of nodes to take */ num_empty = strtol(&mapped_nodes[i][1], NULL, 10); } /* search for empty nodes and take them */ item = opal_list_get_first(nodes); while (0 < num_empty && item != opal_list_get_end(nodes)) { next = opal_list_get_next(item); /* save this position */ node = (orte_node_t*)item; /* see if this node is empty */ if (0 == node->slots_inuse) { /* check to see if it is specified later */ for (j=i+1; j < len_mapped_node; j++) { if (0 == strcmp(mapped_nodes[j], node->name)) { /* specified later - skip this one */ goto skipnode; } } /* remove item from list */ opal_list_remove_item(nodes, item); /* xfer to keep list */ opal_list_append(&keep, item); --num_empty; } skipnode: item = next; } } else { /* we are looking for a specific node on the list * we have a match if one of two conditions is met: * 1. the node_name and mapped_nodes directly match * 2. the node_name is the local system name AND * either the mapped_node is "localhost" OR it * is a local interface as found by opal_ifislocal */ item = opal_list_get_first(nodes); while (item != opal_list_get_end(nodes)) { next = opal_list_get_next(item); /* save this position */ node = (orte_node_t*)item; /* search -host list to see if this one is found */ found = false; if ((0 == strcmp(node->name, mapped_nodes[i]) || (0 == strcmp(node->name, orte_process_info.nodename) && (0 == strcmp(mapped_nodes[i], "localhost") || opal_ifislocal(mapped_nodes[i]))))) { /* remove item from list */ opal_list_remove_item(nodes, item); /* xfer to keep list */ opal_list_append(&keep, item); break; } item = next; } } /* done with the mapped entry */ free(mapped_nodes[i]); mapped_nodes[i] = NULL; } /* was something specified that was -not- found? */ for (i=0; i < len_mapped_node; i++) { if (NULL != mapped_nodes[i]) { orte_show_help("help-dash-host.txt", "not-all-mapped-alloc", true, mapped_nodes[i]); rc = ORTE_ERR_SILENT; goto cleanup; } } /* clear the rest of the nodes list */ while (NULL != (item = opal_list_remove_first(nodes))) { OBJ_RELEASE(item); } /* the nodes list has been cleared - rebuild it in order */ while (NULL != (item = opal_list_remove_first(&keep))) { opal_list_append(nodes, item); } /* did they ask for more than we could provide */ if (!want_all_empty && 0 < num_empty) { orte_show_help("help-dash-host.txt", "dash-host:not-enough-empty", true, num_empty); rc = ORTE_ERR_SILENT; goto cleanup; } rc = ORTE_SUCCESS; /* done filtering existing list */ cleanup: for (i=0; i < len_mapped_node; i++) { if (NULL != mapped_nodes[i]) { free(mapped_nodes[i]); mapped_nodes[i] = NULL; } } if (NULL != mapped_nodes) { free(mapped_nodes); } return rc; }
/* * Start monitoring of local processes */ static void start(orte_jobid_t jobid) { mca_base_component_t *c = &mca_sensor_file_component.super.base_version; opal_list_item_t *item; orte_odls_job_t *jobdat; orte_app_context_t *app, *aptr; int rc, tmp; char *filename; file_tracker_t *ft; /* cannot monitor my own job */ if (jobid == ORTE_PROC_MY_NAME->jobid && ORTE_JOBID_WILDCARD != jobid) { return; } OPAL_OUTPUT_VERBOSE((1, orte_sensor_base.output, "%s starting file monitoring for job %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jobid))); /* get the local jobdat for this job */ for (item = opal_list_get_first(&orte_local_jobdata); item != opal_list_get_end(&orte_local_jobdata); item = opal_list_get_end(&orte_local_jobdata)) { jobdat = (orte_odls_job_t*)item; if (jobid == jobdat->jobid || ORTE_JOBID_WILDCARD == jobid) { /* must be at least one app_context, so use the first one found */ app = NULL; for (tmp=0; tmp < jobdat->apps.size; tmp++) { if (NULL != (aptr = (orte_app_context_t*)opal_pointer_array_get_item(&jobdat->apps, tmp))) { app = aptr; break; } } if (NULL == app) { /* got a problem */ ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); continue; } /* search the environ to get the filename */ if (ORTE_SUCCESS != (rc = mca_base_param_find_string(c, "filename", app->env, &filename))) { /* was a default file given */ if (NULL == mca_sensor_file_component.file) { /* can't do anything without a file */ OPAL_OUTPUT_VERBOSE((1, orte_sensor_base.output, "%s sensor:file no file for job %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jobid))); continue; } filename = mca_sensor_file_component.file; } /* create the tracking object */ ft = OBJ_NEW(file_tracker_t); ft->jobid = jobid; ft->file = strdup(filename); /* search the environ to see what we are checking */ tmp = 0; if (ORTE_SUCCESS != (rc = mca_base_param_find_int(c, "check_size", app->env, &tmp))) { /* was a default value given */ if (0 < mca_sensor_file_component.check_size) { ft->check_size = OPAL_INT_TO_BOOL(mca_sensor_file_component.check_size); } } else { ft->check_size = OPAL_INT_TO_BOOL(tmp); } tmp = 0; if (ORTE_SUCCESS != (rc = mca_base_param_find_int(c, "check_access", app->env, &tmp))) { /* was a default value given */ if (0 < mca_sensor_file_component.check_access) { ft->check_access = OPAL_INT_TO_BOOL(mca_sensor_file_component.check_access); } } else { ft->check_access = OPAL_INT_TO_BOOL(tmp); } tmp = 0; if (ORTE_SUCCESS != (rc = mca_base_param_find_int(c, "check_mod", app->env, &tmp))) { /* was a default value given */ if (0 < mca_sensor_file_component.check_mod) { ft->check_mod = OPAL_INT_TO_BOOL(mca_sensor_file_component.check_mod); } } else { ft->check_mod = OPAL_INT_TO_BOOL(tmp); } tmp = 0; if (ORTE_SUCCESS != (rc = mca_base_param_find_int(c, "limit", app->env, &tmp))) { ft->limit = mca_sensor_file_component.limit; } else { ft->limit = tmp; } opal_list_append(&jobs, &ft->super); OPAL_OUTPUT_VERBOSE((1, orte_sensor_base.output, "%s file %s monitored for %s%s%s with limit %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ft->file, ft->check_size ? "SIZE:" : " ", ft->check_access ? "ACCESS TIME:" : " ", ft->check_mod ? "MOD TIME" : " ", ft->limit)); } } /* start sampling */ if (NULL == sample_ev && !opal_list_is_empty(&jobs)) { /* startup a timer to wake us up periodically * for a data sample */ sample_ev = (opal_event_t *) malloc(sizeof(opal_event_t)); opal_event_evtimer_set(opal_event_base, sample_ev, sample, sample_ev); sample_time.tv_sec = mca_sensor_file_component.sample_rate; sample_time.tv_usec = 0; opal_event_evtimer_add(sample_ev, &sample_time); } return; }