Beispiel #1
0
static void mca_coll_hcoll_module_destruct(mca_coll_hcoll_module_t *hcoll_module)
{
    opal_list_item_t  *item, *item_next;
    opal_list_t *am;
    mca_coll_hcoll_module_t *module;
    ompi_communicator_t *comm;
    int context_destroyed;

    am = &mca_coll_hcoll_component.active_modules;

    if (hcoll_module->comm == &ompi_mpi_comm_world.comm){
        /* If we get here then we are detroying MPI_COMM_WORLD now. So,
         * it is safe to destory all the other communicators and corresponding
         * hcoll contexts that could still be on the "active_modules" list.
         */
        item = opal_list_get_first(am);
        while (item != opal_list_get_end(am)){
            item_next = opal_list_get_next(item);
            module = ((mca_coll_hcoll_module_list_item_wrapper_t *)item)->module;
            comm = module->comm;
            context_destroyed = 0;
            while(!context_destroyed){
                hcoll_destroy_context(module->hcoll_context,
                                      (rte_grp_handle_t)comm,
                                      &context_destroyed);
            }
            module->hcoll_context = NULL;
            OBJ_RELEASE(comm);
            opal_list_remove_item(am,item);
            OBJ_RELEASE(item);
            item = item_next;
        }

        /* Now destory the comm_world hcoll context as well */
        context_destroyed = 0;
        while(!context_destroyed){
            hcoll_destroy_context(hcoll_module->hcoll_context,
                                  (rte_grp_handle_t)hcoll_module->comm,
                                  &context_destroyed);
        }
    }

    OBJ_RELEASE(hcoll_module->previous_barrier_module);
    OBJ_RELEASE(hcoll_module->previous_bcast_module);
    OBJ_RELEASE(hcoll_module->previous_reduce_module);
    OBJ_RELEASE(hcoll_module->previous_allreduce_module);
    OBJ_RELEASE(hcoll_module->previous_allgather_module);
    OBJ_RELEASE(hcoll_module->previous_allgatherv_module);
    OBJ_RELEASE(hcoll_module->previous_gather_module);
    OBJ_RELEASE(hcoll_module->previous_gatherv_module);
    OBJ_RELEASE(hcoll_module->previous_alltoall_module);
    OBJ_RELEASE(hcoll_module->previous_alltoallv_module);
    OBJ_RELEASE(hcoll_module->previous_alltoallw_module);
    OBJ_RELEASE(hcoll_module->previous_reduce_scatter_module);
    OBJ_RELEASE(hcoll_module->previous_ibarrier_module);
    OBJ_RELEASE(hcoll_module->previous_ibcast_module);
    OBJ_RELEASE(hcoll_module->previous_iallreduce_module);
    OBJ_RELEASE(hcoll_module->previous_iallgather_module);

    mca_coll_hcoll_module_clear(hcoll_module);
}
Beispiel #2
0
static int store(const opal_identifier_t *uid,
                 opal_db_locality_t locality,
                 const char *key, const void *data,
                 opal_data_type_t type)
{
    proc_data_t *proc_data;
    opal_value_t *kv;
    opal_byte_object_t *boptr;
    opal_identifier_t id;

    /* to protect alignment, copy the data across */
    memcpy(&id, uid, sizeof(opal_identifier_t));

    /* we are at the bottom of the store priorities, so
     * if this fell to us, we store it
     */
    opal_output_verbose(1, opal_db_base_framework.framework_output,
                        "db:hash:store storing data for proc %" PRIu64 " at locality %d",
                        id, (int)locality);

    /* lookup the proc data object for this proc */
    if (NULL == (proc_data = lookup_opal_proc(&hash_data, id))) {
        /* unrecoverable error */
        OPAL_OUTPUT_VERBOSE((5, opal_db_base_framework.framework_output,
                             "db:hash:store: storing key %s[%s] for proc %" PRIu64 " unrecoverably failed",
                             key, opal_dss.lookup_data_type(type), id));
        return OPAL_ERR_OUT_OF_RESOURCE;
    }

    /* see if we already have this key in the data - means we are updating
     * a pre-existing value
     */
    kv = lookup_keyval(proc_data, key);
    OPAL_OUTPUT_VERBOSE((5, opal_db_base_framework.framework_output,
                         "db:hash:store: %s key %s[%s] for proc %" PRIu64 "",
                         (NULL == kv ? "storing" : "updating"),
                         key, opal_dss.lookup_data_type(type), id));

    if (NULL != kv) {
        opal_list_remove_item(&proc_data->data, &kv->super);
        OBJ_RELEASE(kv);
    }
    kv = OBJ_NEW(opal_value_t);
    kv->key = strdup(key);
    opal_list_append(&proc_data->data, &kv->super);

    /* the type could come in as an OPAL one (e.g., OPAL_VPID). Since
     * the value is an OPAL definition, it cannot cover OPAL data
     * types, so convert to the underlying OPAL type
     */
    switch (type) {
    case OPAL_STRING:
        kv->type = OPAL_STRING;
        if (NULL != data) {
            kv->data.string = strdup( (const char *) data);
        } else {
            kv->data.string = NULL;
        }
        break;
    case OPAL_UINT32:
        if (NULL == data) {
            OPAL_ERROR_LOG(OPAL_ERR_BAD_PARAM);
            return OPAL_ERR_BAD_PARAM;
        }
        kv->type = OPAL_UINT32;
        kv->data.uint32 = *(uint32_t*)data;
        break;
    case OPAL_UINT16:
        if (NULL == data) {
            OPAL_ERROR_LOG(OPAL_ERR_BAD_PARAM);
            return OPAL_ERR_BAD_PARAM;
        }
        kv->type = OPAL_UINT16;
        kv->data.uint16 = *(uint16_t*)(data);
        break;
    case OPAL_INT:
        if (NULL == data) {
            OPAL_ERROR_LOG(OPAL_ERR_BAD_PARAM);
            return OPAL_ERR_BAD_PARAM;
        }
        kv->type = OPAL_INT;
        kv->data.integer = *(int*)(data);
        break;
    case OPAL_UINT:
        if (NULL == data) {
            OPAL_ERROR_LOG(OPAL_ERR_BAD_PARAM);
            return OPAL_ERR_BAD_PARAM;
        }
        kv->type = OPAL_UINT;
        kv->data.uint = *(unsigned int*)(data);
        break;
    case OPAL_BYTE_OBJECT:
        kv->type = OPAL_BYTE_OBJECT;
        boptr = (opal_byte_object_t*)data;
        if (NULL != boptr && NULL != boptr->bytes && 0 < boptr->size) {
            kv->data.bo.bytes = (uint8_t *) malloc(boptr->size);
            memcpy(kv->data.bo.bytes, boptr->bytes, boptr->size);
            kv->data.bo.size = boptr->size;
        } else {
            kv->data.bo.bytes = NULL;
            kv->data.bo.size = 0;
        }
        break;
    default:
        OPAL_ERROR_LOG(OPAL_ERR_NOT_SUPPORTED);
        return OPAL_ERR_NOT_SUPPORTED;
    }

    return OPAL_SUCCESS;
}
Beispiel #3
0
int main(int argc, char **argv)
{
    /* local variables */
    opal_list_t list, x;
    size_t indx,i,list_size, tmp_size_1, tmp_size_2,size_elements;
    int error_cnt, rc;
    test_data_t *elements, *ele;
    opal_list_item_t *item;

    rc = opal_init_util(&argc, &argv);
    test_verify_int(OPAL_SUCCESS, rc);
    if (OPAL_SUCCESS != rc) {
        test_finalize();
        exit(1);
    }

    test_init("opal_list_t");

    /* initialize list */
    OBJ_CONSTRUCT(&list, opal_list_t);
    OBJ_CONSTRUCT(&x, opal_list_t);

    /* check length of list */
    list_size=opal_list_get_size(&list);
    if( 0 == list_size ) {
        test_success();
    } else {
        test_failure(" opal_list_get_size");
    }

    /* check for empty */
    if (opal_list_is_empty(&list)) {
        test_success();
    } else {
        test_failure(" opal_list_is_empty(empty list)");
    }

    /* create test elements */
    size_elements=4;
    elements=(test_data_t *)malloc(sizeof(test_data_t)*size_elements);
    assert(elements);
    for(i=0 ; i < size_elements ; i++) {
        OBJ_CONSTRUCT(elements + i, test_data_t);
        (elements+i)->data=i;
    }

    /* populate list */
    for(i=0 ; i < size_elements ; i++) {
        opal_list_append(&list,(opal_list_item_t *)(elements+i));
    }
    list_size=opal_list_get_size(&list);
    if( list_size == size_elements ) {
        test_success();
    } else {
        test_failure(" populating list");
    }

    /* checking for empty on non-empty list */
    if (!opal_list_is_empty(&list)) {
        test_success();
    } else {
        test_failure(" opal_list_is_empty(non-empty list)");
    }

    /* check that list is ordered as expected */
    i=0;
    error_cnt=0;
    for(ele = (test_data_t *) opal_list_get_first(&list);
            ele != (test_data_t *) opal_list_get_end(&list);
            ele = (test_data_t *) ((opal_list_item_t *)ele)->opal_list_next) {
        if( ele->data != i )
            error_cnt++;
        i++;
    }
    if( 0 == error_cnt ) {
        test_success();
    } else {
        test_failure(" error in list order ");
    }

    /* check opal_list_get_first */
    ele = (test_data_t *)NULL;
    ele = (test_data_t *) opal_list_get_first(&list);
    assert(ele);
    if( 0 == ele->data ) {
        test_success();
    } else {
        test_failure(" error in opal_list_get_first");
    }
    i=0;
    for(ele = (test_data_t *) opal_list_get_first(&list);
            ele != (test_data_t *) opal_list_get_end(&list);
            ele = (test_data_t *) ((opal_list_item_t *)ele)->opal_list_next) {
        i++;
    }
    if( size_elements == i ) {
        test_success();
    } else {
        test_failure(" error in opal_list_get_first - list size changed ");
    }

    /* check opal_list_get_last */
    ele = (test_data_t *)NULL;
    ele = (test_data_t *) opal_list_get_last(&list);
    assert(ele);
    if( (size_elements-1) == ele->data ) {
        test_success();
    } else {
        test_failure(" error in opal_list_get_last");
    }
    i=0;
    for(ele = (test_data_t *) opal_list_get_first(&list);
            ele != (test_data_t *) opal_list_get_end(&list);
            ele = (test_data_t *) ((opal_list_item_t *)ele)->opal_list_next) {
        i++;
    }
    if( size_elements == i ) {
        test_success();
    } else {
        test_failure(" error in opal_list_get_first - list size changed ");
    }

    /* check opal_list_remove_first */
    ele = (test_data_t *)NULL;
    ele = (test_data_t *) opal_list_remove_first(&list);
    assert(ele);
    if( 0 == ele->data ) {
        test_success();
    } else {
        test_failure(" error in opal_list_remove_first");
    }
    i=0;
    for(ele = (test_data_t *) opal_list_get_first(&list);
            ele != (test_data_t *) opal_list_get_end(&list);
            ele = (test_data_t *) ((opal_list_item_t *)ele)->opal_list_next) {
        i++;
    }
    if( (size_elements-1) == i ) {
        test_success();
    } else {
        test_failure(" error in opal_list_remove_first - list size changed ");
    }

    /* test opal_list_prepend */
    opal_list_prepend(&list,(opal_list_item_t *)elements);
    ele = (test_data_t *)NULL;
    ele = (test_data_t *) opal_list_get_first(&list);
    assert(ele);
    if( 0 == ele->data ) {
        test_success();
    } else {
        test_failure(" error in opal_list_prepend");
    }
    i=0;
    for(ele = (test_data_t *) opal_list_get_first(&list);
            ele != (test_data_t *) opal_list_get_end(&list);
            ele = (test_data_t *) ((opal_list_item_t *)ele)->opal_list_next) {
        i++;
    }
    if( size_elements == i ) {
        test_success();
    } else {
        test_failure(" error in opal_list_prepend - list size changed ");
    }

    /* check opal_list_remove_last */
    ele = (test_data_t *)NULL;
    ele = (test_data_t *) opal_list_remove_last(&list);
    assert(ele);
    if( (size_elements-1) == ele->data ) {
        test_success();
    } else {
        test_failure(" error in opal_list_remove_last");
    }
    i=0;
    for(ele = (test_data_t *) opal_list_get_first(&list);
            ele != (test_data_t *) opal_list_get_end(&list);
            ele = (test_data_t *) ((opal_list_item_t *)ele)->opal_list_next) {
        i++;
    }
    if( (size_elements-1) == i ) {
        test_success();
    } else {
        test_failure(" error in opal_list_remove_last - list size changed ");
    }

    /* test opal_list_append */
    opal_list_append(&list,(opal_list_item_t *)(elements+size_elements-1));
    ele = (test_data_t *)NULL;
    ele = (test_data_t *) opal_list_get_last(&list);
    assert(ele);
    if( (size_elements-1) == ele->data ) {
        test_success();
    } else {
        test_failure(" error in opal_list_append");
    }
    i=0;
    for(ele = (test_data_t *) opal_list_get_first(&list);
            ele != (test_data_t *) opal_list_get_end(&list);
            ele = (test_data_t *) ((opal_list_item_t *)ele)->opal_list_next) {
        i++;
    }
    if( size_elements == i ) {
        test_success();
    } else {
        test_failure(" error in opal_list_append - list size changed ");
    }

    /* remove element from list */
    indx=size_elements/2;
    if( 0 == indx )
        indx=1;
    assert(2 <= size_elements);
    ele = (test_data_t *)NULL;
    ele = (test_data_t *) 
        opal_list_remove_item(&list,(opal_list_item_t *)(elements+indx));
    assert(ele);
    if( (indx-1) == ele->data ) {
        test_success();
    } else {
        test_failure(" error in opal_list_remove - previous");
    }
    ele=(test_data_t *)(((opal_list_item_t *)ele)->opal_list_next);
    if( (indx+1) == ele->data ) {
        test_success();
    } else {
        test_failure(" error in opal_list_remove - next");
    }
    i=0;
    for(ele = (test_data_t *) opal_list_get_first(&list);
            ele != (test_data_t *) opal_list_get_end(&list);
            ele = (test_data_t *) ((opal_list_item_t *)ele)->opal_list_next) {
        i++;
    }
    if( (size_elements-1) == i ) {
        test_success();
    } else {
        test_failure(" error in opal_list_remove - list size changed incorrectly");
    }

    /* test the insert function */
    i=opal_list_insert(&list,(opal_list_item_t *)(elements+indx),indx);
    if( 1 == i ) {
        test_success();
    } else {
        test_failure(" error in opal_list_remove_item \n");
    }

    i=0;
    for(ele = (test_data_t *) opal_list_get_first(&list);
            ele != (test_data_t *) opal_list_get_end(&list);
            ele = (test_data_t *) ((opal_list_item_t *)ele)->opal_list_next) {
        i++;
    }
    if( size_elements == i ) {
        test_success();
    } else {
        test_failure(" error in opal_list_insert - incorrect list length");
    }
    i=0;
    error_cnt=0;
    for(ele = (test_data_t *) opal_list_get_first(&list);
            ele != (test_data_t *) opal_list_get_end(&list);
            ele = (test_data_t *) ((opal_list_item_t *)ele)->opal_list_next) {
        if( ele->data != i )
            error_cnt++;
        i++;
    }
    if( 0 == error_cnt ) {
        test_success();
    } else {
        test_failure(" error in list order - opal_list_remove_item ");
    }

    /* test the splice and join functions  */
    list_size = opal_list_get_size(&list);
    for (i = 0, item = opal_list_get_first(&list) ; 
         i < list_size / 2 ; ++i, item = opal_list_get_next(item)) {
    }
    opal_list_splice(&x, opal_list_get_end(&x),
                     &list, item, opal_list_get_end(&list));
    tmp_size_1 = opal_list_get_size(&list);
    tmp_size_2 = opal_list_get_size(&x);
    if (tmp_size_1 != i) {
        test_failure(" error in splice (size of list)");
    } else if (tmp_size_2 != list_size - tmp_size_1) {
        test_failure(" error in splice (size of x)");
    } else {
        test_success();
    }

    opal_list_join(&list, opal_list_get_end(&list), &x);
    tmp_size_1 = opal_list_get_size(&list);
    tmp_size_2 = opal_list_get_size(&x);
    if (tmp_size_1 != list_size) {
        test_failure(" error in join (size of list)");
    } else if (tmp_size_2 != 0) {
        test_failure(" error in join (size of x)");
    } else {
        test_success();
    }

    if (NULL != elements) free(elements);

    opal_finalize_util ();

    return test_finalize();
}
static int route_lost(const orte_process_name_t *route)
{
    opal_list_item_t *item;
    orte_routed_tree_t *child;
    orte_routed_jobfam_t *jfam;
    uint16_t jfamily;
    int i;

    OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output,
                         "%s route to %s lost",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_NAME_PRINT(route)));

    /* if the route is to a different job family and we are the HNP, look it up */
    if ((ORTE_JOB_FAMILY(route->jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) &&
        ORTE_PROC_IS_HNP) {
        jfamily = ORTE_JOB_FAMILY(route->jobid);
        for (i=0; i < orte_routed_jobfams.size; i++) {
            if (NULL == (jfam = (orte_routed_jobfam_t*)opal_pointer_array_get_item(&orte_routed_jobfams, i))) {
                continue;
            }
            if (jfam->job_family == jfamily) {
                OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output,
                                     "%s routed_radix: route to %s lost",
                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                     ORTE_JOB_FAMILY_PRINT(route->jobid)));
                opal_pointer_array_set_item(&orte_routed_jobfams, i, NULL);
                OBJ_RELEASE(jfam);
                break;
            }
        }
    }

    /* if we lose the connection to the lifeline and we are NOT already,
     * in finalize, tell the OOB to abort.
     * NOTE: we cannot call abort from here as the OOB needs to first
     * release a thread-lock - otherwise, we will hang!!
     */
    if (!orte_finalizing &&
        NULL != lifeline &&
        OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, route, lifeline)) {
        OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output,
                             "%s routed:radix: Connection to lifeline %s lost",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             ORTE_NAME_PRINT(lifeline)));
        return ORTE_ERR_FATAL;
    }

    /* if we are the HNP or daemon, and the route is a daemon,
     * see if it is one of our children - if so, remove it
     */
    if ((ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) &&
        route->jobid == ORTE_PROC_MY_NAME->jobid) {
        for (item = opal_list_get_first(&my_children);
             item != opal_list_get_end(&my_children);
             item = opal_list_get_next(item)) {
            child = (orte_routed_tree_t*)item;
            if (child->vpid == route->vpid) {
                opal_list_remove_item(&my_children, item);
                OBJ_RELEASE(item);
                return ORTE_SUCCESS;
            }
        }
    }

    /* we don't care about this one, so return success */
    return ORTE_SUCCESS;
}
Beispiel #5
0
int mca_base_components_filter (const char *framework_name, opal_list_t *components, int output_id,
                                const char *filter_names, uint32_t filter_flags)
{
    mca_base_component_list_item_t *cli, *next;
    char **requested_component_names = NULL;
    bool include_mode, can_use;
    int ret;

    assert (NULL != components);

    if (0 == filter_flags && NULL == filter_names) {
        return OPAL_SUCCESS;
    }

    ret = mca_base_component_parse_requested (filter_names, &include_mode,
                           &requested_component_names);
    if (OPAL_SUCCESS != ret) {
        return ret;
    }

    OPAL_LIST_FOREACH_SAFE(cli, next, components, mca_base_component_list_item_t) {
        const mca_base_component_t *component = cli->cli_component;
        mca_base_open_only_dummy_component_t *dummy =
            (mca_base_open_only_dummy_component_t *) cli->cli_component;

        can_use = use_component (include_mode, (const char **) requested_component_names,
                                 cli->cli_component->mca_component_name);

        if (!can_use || (filter_flags & dummy->data.param_field) != filter_flags) {
            if (can_use && (filter_flags & MCA_BASE_METADATA_PARAM_CHECKPOINT) &&
                !(MCA_BASE_METADATA_PARAM_CHECKPOINT & dummy->data.param_field)) {
                opal_output_verbose(10, output_id,
                                    "mca: base: components_filter: "
                                    "(%s) Component %s is *NOT* Checkpointable - Disabled",
                                    component->reserved,
                                    component->mca_component_name);
            }

            opal_list_remove_item (components, &cli->super);

            mca_base_component_unload (component, output_id);

            OBJ_RELEASE(cli);
        } else if (filter_flags & MCA_BASE_METADATA_PARAM_CHECKPOINT) {
            opal_output_verbose(10, output_id,
                                "mca: base: components_filter: "
                                "(%s) Component %s is Checkpointable",
                                component->reserved,
                                component->mca_component_name);
        }
    }

    if (include_mode) {
        ret = component_find_check (framework_name, requested_component_names, components);
    } else {
        ret = OPAL_SUCCESS;
    }

    if (NULL != requested_component_names) {
        opal_argv_free (requested_component_names);
    }

    return ret;
}
static int staged_mapper(orte_job_t *jdata)
{
    mca_base_component_t *c=&mca_rmaps_staged_component.base_version;
    int i, j, k, rc;
    orte_app_context_t *app;
    opal_list_t node_list, desired;
    orte_std_cntr_t num_slots;
    orte_proc_t *proc;
    orte_node_t *node, *next;
    bool work_to_do = false, first_pass = false;
    opal_list_item_t *item, *it2;
    char *cptr, **minimap;
    orte_vpid_t load;

    /* only use this mapper if it was specified */
    if (NULL == jdata->map->req_mapper ||
        0 != strcasecmp(jdata->map->req_mapper, c->mca_component_name) ||
        ORTE_MAPPING_STAGED != ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
        /* I wasn't specified */
        opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                            "mca:rmaps:staged: job %s not using staged mapper",
                            ORTE_JOBID_PRINT(jdata->jobid));
        return ORTE_ERR_TAKE_NEXT_OPTION;
    }

    opal_output_verbose(2, orte_rmaps_base_framework.framework_output,
                        "%s mca:rmaps:staged: mapping job %s with %d procs",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        ORTE_JOBID_PRINT(jdata->jobid), (int)jdata->num_procs);
 
    /* flag that I did the mapping */
    if (NULL != jdata->map->last_mapper) {
        free(jdata->map->last_mapper);
    }
    jdata->map->last_mapper = strdup(c->mca_component_name);

    /* if there are no nodes in the map, then this is our first
     * pass thru this job
     */
    if (0 == jdata->map->num_nodes) {
        first_pass = true;
    }

    /* we assume that the app_contexts are in priority order,
     * with the highest priority being the first entry in the
     * job's app_context array. Loop across the app_contexts
     * in order, looking for apps that have not been
     * fully mapped
     */
    for (i=0; i < jdata->apps->size; i++) {
        if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
            continue;
        }
        /* has it been fully mapped? */
        if (ORTE_APP_STATE_ALL_MAPPED <= app->state) {
            continue;
        }
        opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                            "%s mca:rmaps:staged: working app %s",
                            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), app->app);

        /* find nodes that meet any constraints provided in the form of
         * -hostfile or -host directives
         */
        OBJ_CONSTRUCT(&node_list, opal_list_t);
        /* get nodes based on a strict interpretation of the location hints */
        if (ORTE_SUCCESS != (rc = orte_rmaps_base_get_target_nodes(&node_list, &num_slots, app,
                                                                   jdata->map->mapping, false, true))) {
            /* we were unable to get any nodes that match those
             * specified in the app
             */
            if (ORTE_ERR_RESOURCE_BUSY == rc) {
                /* if the return is "busy", then at least one of the
                 * specified resources must exist, but no slots are
                 * currently available. This means there is at least
                 * a hope of eventually being able to map this app
                 * within its specified constraints, so continue working
                 */
                if (orte_soft_locations) {
                    /* if soft locations were given, then we know that
                     * none of the nodes in this allocation are available,
                     * so there is no point in continuing to check the
                     * remaining apps
                     */
                    while (NULL != (item = opal_list_remove_first(&node_list))) {
                        OBJ_RELEASE(item);
                    }
                    OBJ_DESTRUCT(&node_list);
                    goto complete;
                }
                opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                                    "%s mca:rmaps:staged: all nodes for this app are currently busy",
                                    ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
                OBJ_DESTRUCT(&node_list);
                continue;
            } else {
                /* this indicates that there are no nodes that match
                 * the specified constraints, so there is no hope of
                 * ever being able to execute this app. This is an
                 * unrecoverable error - note that a return of
                 * "silent" means that the function already printed
                 * an error message, so the error_log will print nothing
                 */
                ORTE_ERROR_LOG(rc);
                return rc;
            }
        }

        /* if a max number of procs/node was given for this
         * app, remove all nodes from the list that exceed
         * that limit
         */
        if (0 < app->max_procs_per_node) {
            item = opal_list_get_first(&node_list);
            while (item != opal_list_get_end(&node_list)) {
                it2 = opal_list_get_next(item);
                node = (orte_node_t*)item;
                if (app->max_procs_per_node <= node->num_procs) {
                    opal_list_remove_item(&node_list, item);
                    OBJ_RELEASE(item);
                }
                item = it2;
            }
        }

        /* if we have no available nodes, then move on to next app */
        if (0 == opal_list_get_size(&node_list)) {
            OBJ_DESTRUCT(&node_list);
            continue;
        }

        /* if the app specified locations, soft or not, search the list of nodes
         * for those that match the requested locations and move those
         * to the desired list so we use them first
         */
        if (NULL != app->dash_host) {
            OBJ_CONSTRUCT(&desired, opal_list_t);
            /* no particular order is required */
            for (j=0; j < opal_argv_count(app->dash_host); j++) {
                minimap = opal_argv_split(app->dash_host[j], ',');
                for (k=0; k < opal_argv_count(minimap); k++) {
                    cptr = minimap[k];
                    for (item = opal_list_get_first(&node_list);
                         item != opal_list_get_end(&node_list);
                         item = opal_list_get_next(item)) {
                        node = (orte_node_t*)item;
                        if (0 == strcmp(node->name, cptr) ||
                            (0 == strcmp("localhost", cptr) &&
                             0 == strcmp(node->name, orte_process_info.nodename))) {
                            opal_list_remove_item(&node_list, item);
                            opal_list_append(&desired, item);
                            opal_output_verbose(10, orte_rmaps_base_framework.framework_output,
                                                "%s mca:rmaps:staged: placing node %s on desired list",
                                                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                                node->name);
                            break;
                        }
                    }
                }
                opal_argv_free(minimap);
            }
            /* if no nodes made the transition and the app specified soft
             * locations, then we can skip to look at the non-desired list
             */
            if (0 == opal_list_get_size(&desired)) {
                OBJ_DESTRUCT(&desired);
                if (orte_soft_locations) {
                    goto process;
                } else {
                    /* move on to next app */
                    continue;
                }
            }
            /* cycle thru the procs for this app and attempt to map them
             * to the desired nodes using a load-balancing algo
             */
            for (j=0; j < app->procs.size; j++) {
                if (NULL == (proc = opal_pointer_array_get_item(&app->procs, j))) {
                    continue;
                }
                if (ORTE_PROC_STATE_UNDEF != proc->state) {
                    /* this proc has already been mapped or executed */
                    opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                                        "%s mca:rmaps:staged: proc %s has already been mapped",
                                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                        ORTE_NAME_PRINT(&proc->name));
                    continue;
                }
                /* flag that there is at least one proc still to
                 * be executed
                 */
                work_to_do = true;
                /* track number mapped */
                jdata->num_mapped++;
                /* find the lightest-loaded node on the desired list */
                node = NULL;
                load = ORTE_VPID_MAX;
                for (item = opal_list_get_first(&desired);
                     item != opal_list_get_end(&desired);
                     item = opal_list_get_next(item)) {
                    next = (orte_node_t*)item;
                    if (next->num_procs < load) {
                        node = next;
                        load = next->num_procs;
                    }
                }
                /* put the proc there */
                proc->node = node;
                proc->nodename = node->name;
                /* the local rank is the number of procs
                 * on this node from this job - we don't
                 * directly track this number, so it must
                 * be found by looping across the node->procs
                 * array and counting it each time. For now,
                 * since we don't use this value in this mode
                 * of operation, just set it to something arbitrary
                 */
                proc->local_rank = node->num_procs;
                /* the node rank is simply the number of procs
                 * on the node at this time
                 */
                proc->node_rank = node->num_procs;
                /* track number of procs on node and number of slots used */
                node->num_procs++;
                node->slots_inuse++;
                opal_output_verbose(10, orte_rmaps_base_framework.framework_output,
                                    "%s Proc %s on node %s: slots %d inuse %d",
                                    ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                    ORTE_NAME_PRINT(&proc->name), node->name,
                                    (int)node->slots, (int)node->slots_inuse);
                if (node->slots_inuse == node->slots) {
                    opal_list_remove_item(&desired, &node->super);
                    OBJ_RELEASE(node);
                }
                if (0 > (rc = opal_pointer_array_add(node->procs, (void*)proc))) {
                    ORTE_ERROR_LOG(rc);
                    OBJ_RELEASE(proc);
                    return rc;
                }
                /* retain the proc struct so that we correctly track its release */
                OBJ_RETAIN(proc);
                proc->state = ORTE_PROC_STATE_INIT;
                /* flag the proc as updated so it will be included
                 * in the next pidmap message
                 */
                proc->updated =true;
                /* add the node to the map, if needed */
                if (!node->mapped) {
                    if (ORTE_SUCCESS > (rc = opal_pointer_array_add(jdata->map->nodes, (void*)node))) {
                        ORTE_ERROR_LOG(rc);
                        return rc;
                    }
                    node->mapped = true;
                    OBJ_RETAIN(node);  /* maintain accounting on object */
                    jdata->map->num_nodes++;
                }
                if (0 == opal_list_get_size(&desired)) {
                    /* nothing more we can do */
                    break;
                }
            }
            /* clear the list */
            while (NULL != (item = opal_list_remove_first(&desired))) {
                OBJ_RELEASE(item);
            }
            OBJ_DESTRUCT(&desired);
        }

    process:
        for (j=0; j < app->procs.size; j++) {
            if (NULL == (proc = opal_pointer_array_get_item(&app->procs, j))) {
                continue;
            }
            if (ORTE_PROC_STATE_UNDEF != proc->state) {
                /* this proc has already been mapped or executed */
	        opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
				    "%s mca:rmaps:staged: proc %s has already been mapped",
				    ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
				    ORTE_NAME_PRINT(&proc->name));
                continue;
            }
            /* find the lightest-loaded node on the node list */
            node = NULL;
            load = ORTE_VPID_MAX;
            for (item = opal_list_get_first(&node_list);
                 item != opal_list_get_end(&node_list);
                 item = opal_list_get_next(item)) {
                next = (orte_node_t*)item;
                if (next->num_procs < load) {
                    node = next;
                    load = next->num_procs;
                }
            }
            /* flag that there is at least one proc still to
             * be executed
             */
            work_to_do = true;
            /* track number mapped */
            jdata->num_mapped++;
            /* map this proc to the first available slot */
            OBJ_RETAIN(node);  /* maintain accounting on object */    
	    opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
				"%s mca:rmaps:staged: assigning proc %s to node %s",
				ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
				ORTE_NAME_PRINT(&proc->name), node->name);
            proc->node = node;
            proc->nodename = node->name;
	    /* the local rank is the number of procs
	     * on this node from this job - we don't
	     * directly track this number, so it must
	     * be found by looping across the node->procs
	     * array and counting it each time. For now,
	     * since we don't use this value in this mode
	     * of operation, just set it to something arbitrary
	     */
	    proc->local_rank = node->num_procs;
	    /* the node rank is simply the number of procs
	     * on the node at this time
	     */
	    proc->node_rank = node->num_procs;
	    /* track number of procs on node and number of slots used */
            node->num_procs++;
            node->slots_inuse++;
            opal_output_verbose(10, orte_rmaps_base_framework.framework_output,
                                "%s Proc %s on node %s: slots %d inuse %d",
                                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                ORTE_NAME_PRINT(&proc->name), node->name,
                                (int)node->slots, (int)node->slots_inuse);
            if (node->slots_inuse == node->slots) {
                opal_list_remove_item(&node_list, &node->super);
                OBJ_RELEASE(node);
            }
            if (0 > (rc = opal_pointer_array_add(node->procs, (void*)proc))) {
                ORTE_ERROR_LOG(rc);
                OBJ_RELEASE(proc);
                return rc;
            }
            /* retain the proc struct so that we correctly track its release */
            OBJ_RETAIN(proc);
            proc->state = ORTE_PROC_STATE_INIT;
            /* flag the proc as updated so it will be included
             * in the next pidmap message
             */
            proc->updated =true;
            /* add the node to the map, if needed */
            if (!node->mapped) {
                if (ORTE_SUCCESS > (rc = opal_pointer_array_add(jdata->map->nodes, (void*)node))) {
                    ORTE_ERROR_LOG(rc);
                    return rc;
                }
                node->mapped = true;
                OBJ_RETAIN(node);  /* maintain accounting on object */
                jdata->map->num_nodes++;
            }
            if (0 == opal_list_get_size(&node_list)) {
                /* nothing more we can do */
                break;
            }
        }
	/* clear the list */
	while (NULL != (item = opal_list_remove_first(&node_list))) {
            OBJ_RELEASE(item);
	}
	OBJ_DESTRUCT(&node_list);
    }

 complete:
    /* if there isn't at least one proc that can be launched,
     * then indicate that we don't need to proceed with the
     * launch sequence
     */
    if (!work_to_do) {
        return ORTE_ERR_RESOURCE_BUSY;
    }
 
    /* flag that the job was updated so it will be
     * included in the pidmap message
     */
    jdata->updated = true;

    /* if we successfully mapped ALL procs in the first pass,
     * then this job is capable of supporting MPI procs
     */
    if (first_pass && jdata->num_mapped == jdata->num_procs) {
        opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                            "%s mca:rmaps:staged: job %s is MPI-capable",
                            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                            ORTE_JOBID_PRINT(jdata->jobid));
        jdata->gang_launched = true;
    }

    return ORTE_SUCCESS;
}
Beispiel #7
0
/*
 * Sequentially map the ranks according to the placement in the
 * specified hostfile
 */
static int orte_rmaps_seq_map(orte_job_t *jdata)
{
    orte_job_map_t *map;
    orte_app_context_t *app;
    int i, n;
    orte_std_cntr_t j;
    opal_list_item_t *item;
    orte_node_t *node, *nd;
    seq_node_t *sq, *save=NULL, *seq;;
    orte_vpid_t vpid;
    orte_std_cntr_t num_nodes;
    int rc;
    opal_list_t default_seq_list;
    opal_list_t node_list, *seq_list, sq_list;
    orte_proc_t *proc;
    mca_base_component_t *c = &mca_rmaps_seq_component.base_version;
    char *hosts = NULL, *sep, *eptr;
    FILE *fp;
    opal_hwloc_resource_type_t rtype;

    OPAL_OUTPUT_VERBOSE((1, orte_rmaps_base_framework.framework_output,
                         "%s rmaps:seq called on job %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_JOBID_PRINT(jdata->jobid)));

    /* this mapper can only handle initial launch
     * when seq mapping is desired - allow
     * restarting of failed apps
     */
    if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_RESTART)) {
        opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                            "mca:rmaps:seq: job %s is being restarted - seq cannot map",
                            ORTE_JOBID_PRINT(jdata->jobid));
        return ORTE_ERR_TAKE_NEXT_OPTION;
    }
    if (NULL != jdata->map->req_mapper) {
        if (0 != strcasecmp(jdata->map->req_mapper, c->mca_component_name)) {
            /* a mapper has been specified, and it isn't me */
            opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                                "mca:rmaps:seq: job %s not using sequential mapper",
                                ORTE_JOBID_PRINT(jdata->jobid));
            return ORTE_ERR_TAKE_NEXT_OPTION;
        }
        /* we need to process it */
        goto process;
    }
    if (ORTE_MAPPING_SEQ != ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
        /* I don't know how to do these - defer */
        opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                            "mca:rmaps:seq: job %s not using seq mapper",
                            ORTE_JOBID_PRINT(jdata->jobid));
        return ORTE_ERR_TAKE_NEXT_OPTION;
    }

 process:
    opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                        "mca:rmaps:seq: mapping job %s",
                        ORTE_JOBID_PRINT(jdata->jobid));

    /* flag that I did the mapping */
    if (NULL != jdata->map->last_mapper) {
        free(jdata->map->last_mapper);
    }
    jdata->map->last_mapper = strdup(c->mca_component_name);

    /* convenience def */
    map = jdata->map;

    /* if there is a default hostfile, go and get its ordered list of nodes */
    OBJ_CONSTRUCT(&default_seq_list, opal_list_t);
    if (NULL != orte_default_hostfile) {
        char *hstname = NULL;
        /* open the file */
        fp = fopen(orte_default_hostfile, "r");
        if (NULL == fp) {
            ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
            rc = ORTE_ERR_NOT_FOUND;
            goto error;
        }
        while (NULL != (hstname = orte_getline(fp))) {
            if (0 == strlen(hstname)) {
                free(hstname);
                /* blank line - ignore */
                continue;
            }
            if( '#' == hstname[0] ) {
                free(hstname);
                /* Comment line - ignore */
                continue;
            }
            sq = OBJ_NEW(seq_node_t);
            if (NULL != (sep = strchr(hstname, ' '))) {
                *sep = '\0';
                sep++;
                /* remove any trailing space */
                eptr = sep + strlen(sep) - 1;
                while (eptr > sep && isspace(*eptr)) {
                    eptr--;
                }
                *(eptr+1) = 0;
                sq->cpuset = strdup(sep);
            }

            // Strip off the FQDN if present, ignore IP addresses
            if( !orte_keep_fqdn_hostnames && !opal_net_isaddr(hstname) ) {
                char *ptr;
                if (NULL != (ptr = strchr(hstname, '.'))) {
                    *ptr = '\0';
                }
            }

            sq->hostname = hstname;
            opal_list_append(&default_seq_list, &sq->super);
        }
        fclose(fp);
    }

    /* start at the beginning... */
    vpid = 0;
    jdata->num_procs = 0;
    if (0 < opal_list_get_size(&default_seq_list)) {
        save = (seq_node_t*)opal_list_get_first(&default_seq_list);
    }

    /* default to LOGICAL processors */
    if (orte_get_attribute(&jdata->attributes, ORTE_JOB_PHYSICAL_CPUIDS, NULL, OPAL_BOOL)) {
        opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                            "mca:rmaps:seq: using PHYSICAL processors");
        rtype = OPAL_HWLOC_PHYSICAL;
    } else {
        opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                            "mca:rmaps:seq: using LOGICAL processors");
        rtype = OPAL_HWLOC_LOGICAL;
    }

    /* initialize all the nodes as not included in this job map */
    for (j=0; j < orte_node_pool->size; j++) {
        if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, j))) {
            ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED);
        }
    }

    /* cycle through the app_contexts, mapping them sequentially */
    for(i=0; i < jdata->apps->size; i++) {
        if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
            continue;
        }

        /* dash-host trumps hostfile */
        if (orte_get_attribute(&app->attributes, ORTE_APP_DASH_HOST, (void**)&hosts, OPAL_STRING)) {
            opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                                "mca:rmaps:seq: using dash-host nodes on app %s", app->app);
            OBJ_CONSTRUCT(&node_list, opal_list_t);
            /* dash host entries cannot specify cpusets, so used the std function to retrieve the list */
            if (ORTE_SUCCESS != (rc = orte_util_get_ordered_dash_host_list(&node_list, hosts))) {
                ORTE_ERROR_LOG(rc);
                free(hosts);
                goto error;
            }
            free(hosts);
            /* transfer the list to a seq_node_t list */
            OBJ_CONSTRUCT(&sq_list, opal_list_t);
            while (NULL != (nd = (orte_node_t*)opal_list_remove_first(&node_list))) {
                sq = OBJ_NEW(seq_node_t);
                sq->hostname = strdup(nd->name);
                opal_list_append(&sq_list, &sq->super);
                OBJ_RELEASE(nd);
            }
            OBJ_DESTRUCT(&node_list);
            seq_list = &sq_list;
        } else if (orte_get_attribute(&app->attributes, ORTE_APP_HOSTFILE, (void**)&hosts, OPAL_STRING)) {
            char *hstname;
            if (NULL == hosts) {
                rc = ORTE_ERR_NOT_FOUND;
                goto error;
            }
            opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                                "mca:rmaps:seq: using hostfile %s nodes on app %s", hosts, app->app);
            OBJ_CONSTRUCT(&sq_list, opal_list_t);
            /* open the file */
            fp = fopen(hosts, "r");
            if (NULL == fp) {
                ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
                rc = ORTE_ERR_NOT_FOUND;
                OBJ_DESTRUCT(&sq_list);
                goto error;
            }
            while (NULL != (hstname = orte_getline(fp))) {
                if (0 == strlen(hstname)) {
                    free(hstname);
                    /* blank line - ignore */
                    continue;
                }
                if( '#' == hstname[0] ) {
                    free(hstname);
                    /* Comment line - ignore */
                    continue;
                }
                sq = OBJ_NEW(seq_node_t);
                if (NULL != (sep = strchr(hstname, ' '))) {
                    *sep = '\0';
                    sep++;
                    /* remove any trailing space */
                    eptr = sep + strlen(sep) - 1;
                    while (eptr > sep && isspace(*eptr)) {
                        eptr--;
                    }
                    *(eptr+1) = 0;
                    sq->cpuset = strdup(sep);
                }

                // Strip off the FQDN if present, ignore IP addresses
                if( !orte_keep_fqdn_hostnames && !opal_net_isaddr(hstname) ) {
                    char *ptr;
                    if (NULL != (ptr = strchr(hstname, '.'))) {
                        (*ptr) = '\0';
                    }
                }

                sq->hostname = hstname;
                opal_list_append(&sq_list, &sq->super);
            }
            fclose(fp);
            free(hosts);
            seq_list = &sq_list;
        } else if (0 < opal_list_get_size(&default_seq_list)) {
            opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                                "mca:rmaps:seq: using default hostfile nodes on app %s", app->app);
            seq_list = &default_seq_list;
        } else {
            /* can't do anything - no nodes available! */
            orte_show_help("help-orte-rmaps-base.txt",
                           "orte-rmaps-base:no-available-resources",
                           true);
            return ORTE_ERR_SILENT;
        }

        /* check for nolocal and remove the head node, if required */
        if (map->mapping & ORTE_MAPPING_NO_USE_LOCAL) {
            for (item  = opal_list_get_first(seq_list);
                 item != opal_list_get_end(seq_list);
                 item  = opal_list_get_next(item) ) {
                seq = (seq_node_t*)item;
                /* need to check ifislocal because the name in the
                 * hostfile may not have been FQDN, while name returned
                 * by gethostname may have been (or vice versa)
                 */
                if (orte_ifislocal(seq->hostname)) {
                    opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                                        "mca:rmaps:seq: removing head node %s", seq->hostname);
                    opal_list_remove_item(seq_list, item);
                    OBJ_RELEASE(item);  /* "un-retain" it */
                }
            }
        }

        if (NULL == seq_list || 0 == (num_nodes = (orte_std_cntr_t)opal_list_get_size(seq_list))) {
            orte_show_help("help-orte-rmaps-base.txt",
                           "orte-rmaps-base:no-available-resources",
                           true);
            return ORTE_ERR_SILENT;
        }

        /* if num_procs wasn't specified, set it now */
        if (0 == app->num_procs) {
            app->num_procs = num_nodes;
            opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                                "mca:rmaps:seq: setting num procs to %s for app %s",
                                ORTE_VPID_PRINT(app->num_procs), app->app);
        } else if (num_nodes < app->num_procs) {
            orte_show_help("help-orte-rmaps-base.txt", "seq:not-enough-resources", true,
                           app->num_procs, num_nodes);
            return ORTE_ERR_SILENT;
        }

        if (seq_list == &default_seq_list) {
            sq = save;
        } else {
            sq = (seq_node_t*)opal_list_get_first(seq_list);
        }
        for (n=0; n < app->num_procs; n++) {
            /* find this node on the global array - this is necessary so
             * that our mapping gets saved on that array as the objects
             * returned by the hostfile function are -not- on the array
             */
            node = NULL;
            for (j=0; j < orte_node_pool->size; j++) {
                if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, j))) {
                    continue;
                }
                if (0 == strcmp(sq->hostname, node->name)) {
                    break;
                }
            }
            if (NULL == node) {
                /* wasn't found - that is an error */
                orte_show_help("help-orte-rmaps-seq.txt",
                               "orte-rmaps-seq:resource-not-found",
                               true, sq->hostname);
                rc = ORTE_ERR_SILENT;
                goto error;
            }
            /* ensure the node is in the map */
            if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) {
                OBJ_RETAIN(node);
                opal_pointer_array_add(map->nodes, node);
                jdata->map->num_nodes++;
                ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED);
            }
            proc = orte_rmaps_base_setup_proc(jdata, node, i);
            if ((node->slots < (int)node->num_procs) ||
                (0 < node->slots_max && node->slots_max < (int)node->num_procs)) {
                if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
                    orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
                                   true, node->num_procs, app->app);
                    ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
                    rc = ORTE_ERR_SILENT;
                    goto error;
                }
                /* flag the node as oversubscribed so that sched-yield gets
                 * properly set
                 */
                ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED);
                ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_OVERSUBSCRIBED);
                /* check for permission */
                if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) {
                    /* if we weren't given a directive either way, then we will error out
                     * as the #slots were specifically given, either by the host RM or
                     * via hostfile/dash-host */
                    if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
                        orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
                                       true, app->num_procs, app->app);
                        ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
                        return ORTE_ERR_SILENT;
                    } else if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
                        /* if we were explicitly told not to oversubscribe, then don't */
                        orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
                                       true, app->num_procs, app->app);
                        ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
                        return ORTE_ERR_SILENT;
                    }
                }
            }
            /* assign the vpid */
            proc->name.vpid = vpid++;
            opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                                "mca:rmaps:seq: assign proc %s to node %s for app %s",
                                ORTE_VPID_PRINT(proc->name.vpid), sq->hostname, app->app);

            /* record the cpuset, if given */
            if (NULL != sq->cpuset) {
                hwloc_cpuset_t bitmap;
                char *cpu_bitmap;
                if (NULL == node->topology || NULL == node->topology->topo) {
                    /* not allowed - for sequential cpusets, we must have
                     * the topology info
                     */
                    orte_show_help("help-orte-rmaps-base.txt", "rmaps:no-topology", true, node->name);
                    rc = ORTE_ERR_SILENT;
                    goto error;
                }
                /* if we are using hwthreads as cpus and binding to hwthreads, then
                 * we can just copy the cpuset across as it already specifies things
                 * at that level */
                if (opal_hwloc_use_hwthreads_as_cpus &&
                    OPAL_BIND_TO_HWTHREAD == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
                    cpu_bitmap = strdup(sq->cpuset);
                } else {
                    /* setup the bitmap */
                    bitmap = hwloc_bitmap_alloc();
                    /* parse the slot_list to find the socket and core */
                    if (ORTE_SUCCESS != (rc = opal_hwloc_base_cpu_list_parse(sq->cpuset, node->topology->topo, rtype, bitmap))) {
                        ORTE_ERROR_LOG(rc);
                        hwloc_bitmap_free(bitmap);
                        goto error;
                    }
                    /* note that we cannot set the proc locale to any specific object
                     * as the slot list may have assigned it to more than one - so
                     * leave that field NULL
                     */
                    /* set the proc to the specified map */
                    hwloc_bitmap_list_asprintf(&cpu_bitmap, bitmap);
                    hwloc_bitmap_free(bitmap);
                }
                orte_set_attribute(&proc->attributes, ORTE_PROC_CPU_BITMAP, ORTE_ATTR_GLOBAL, cpu_bitmap, OPAL_STRING);
                opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                                    "mca:rmaps:seq: binding proc %s to cpuset %s bitmap %s",
                                    ORTE_VPID_PRINT(proc->name.vpid), sq->cpuset, cpu_bitmap);
                /* we are going to bind to cpuset since the user is specifying the cpus */
                OPAL_SET_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_CPUSET);
                /* note that the user specified the mapping */
                ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYUSER);
                ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_GIVEN);
                /* cleanup */
                free(cpu_bitmap);
            } else {
                hwloc_obj_t locale;

                /* assign the locale - okay for the topo to be null as
                 * it just means it wasn't returned
                 */
                if (NULL != node->topology && NULL != node->topology->topo) {
                    locale = hwloc_get_root_obj(node->topology->topo);
                    orte_set_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE,
                                       ORTE_ATTR_LOCAL, locale, OPAL_PTR);
                }
            }

            /* add to the jdata proc array */
            if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) {
                ORTE_ERROR_LOG(rc);
                goto error;
            }
            /* move to next node */
            sq = (seq_node_t*)opal_list_get_next(&sq->super);
        }

        /** track the total number of processes we mapped */
        jdata->num_procs += app->num_procs;

        /* cleanup the node list if it came from this app_context */
        if (seq_list != &default_seq_list) {
            OPAL_LIST_DESTRUCT(seq_list);
        } else {
            save = sq;
        }
    }

    /* mark that this job is to be fully
     * described in the launch msg */
    orte_set_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL);

    return ORTE_SUCCESS;

 error:
    OPAL_LIST_DESTRUCT(&default_seq_list);
    return rc;
}
/* Parse the provided hostfile and filter the nodes that are
 * on the input list, removing those that
 * are not found in the hostfile
 */
int orte_util_filter_hostfile_nodes(opal_list_t *nodes,
                                    char *hostfile,
                                    bool remove)
{
    opal_list_t newnodes, exclude;
    opal_list_item_t *item1, *item2, *next, *item3;
    orte_node_t *node_from_list, *node_from_file, *node_from_pool, *node3;
    int rc = ORTE_SUCCESS;
    char *cptr;
    int num_empty, nodeidx;
    bool want_all_empty = false;
    opal_list_t keep;
    bool found;

    OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
                        "%s hostfile: filtering nodes through hostfile %s",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hostfile));

    /* parse the hostfile and create local list of findings */
    OBJ_CONSTRUCT(&newnodes, opal_list_t);
    OBJ_CONSTRUCT(&exclude, opal_list_t);
    if (ORTE_SUCCESS != (rc = hostfile_parse(hostfile, &newnodes, &exclude, false))) {
        OBJ_DESTRUCT(&newnodes);
        OBJ_DESTRUCT(&exclude);
        return rc;
    }
    
    /* if the hostfile was empty, then treat it as a no-op filter */
    if (0 == opal_list_get_size(&newnodes)) {
        OBJ_DESTRUCT(&newnodes);
        OBJ_DESTRUCT(&exclude);
        /* indicate that the hostfile was empty */
        return ORTE_ERR_TAKE_NEXT_OPTION;
    }

    /* remove from the list of newnodes those that are in the exclude list
     * since we could have added duplicate names above due to the */
    while (NULL != (item1 = opal_list_remove_first(&exclude))) {
        node_from_file = (orte_node_t*)item1;
        /* check for matches on nodes */
        for (item2 = opal_list_get_first(&newnodes);
             item2 != opal_list_get_end(&newnodes);
             item2 = opal_list_get_next(item2)) {
            orte_node_t *node = (orte_node_t*)item2;
            if (0 == strcmp(node_from_file->name, node->name)) {
                /* match - remove it */
                opal_output(0, "HOST %s ON EXCLUDE LIST - REMOVING", node->name);
                opal_list_remove_item(&newnodes, item2);
                OBJ_RELEASE(item2);
                break;
            }
        }
        OBJ_RELEASE(item1);
    }
    
    /* now check our nodes and keep or mark those that match. We can
     * destruct our hostfile list as we go since this won't be needed
     */
    OBJ_CONSTRUCT(&keep, opal_list_t);
    while (NULL != (item2 = opal_list_remove_first(&newnodes))) {
        node_from_file = (orte_node_t*)item2;
        
        next = opal_list_get_next(item2);
        
        /* see if this is a relative node syntax */
        if ('+' == node_from_file->name[0]) {
            /* see if we specified empty nodes */
            if ('e' == node_from_file->name[1] ||
                'E' == node_from_file->name[1]) {
                /* request for empty nodes - do they want
                 * all of them?
                 */
                if (NULL != (cptr = strchr(node_from_file->name, ':'))) {
                    /* the colon indicates a specific # are requested */
                    cptr++; /* step past : */
                    num_empty = strtol(cptr, NULL, 10);
                } else {
                    /* want them all - set num_empty to max */
                    num_empty = INT_MAX;
                    want_all_empty = true;
                }
                /* search the list of nodes provided to us and find those
                 * that are empty
                 */
                item1 = opal_list_get_first(nodes);
                while (0 < num_empty && item1 != opal_list_get_end(nodes)) {
                    node_from_list = (orte_node_t*)item1;
                    next = opal_list_get_next(item1);  /* keep our place */
                    if (0 == node_from_list->slots_inuse) {
                        /* check to see if this node is explicitly called
                         * out later - if so, don't use it here
                         */
                        for (item3 = opal_list_get_first(&newnodes);
                             item3 != opal_list_get_end(&newnodes);
                             item3 = opal_list_get_next(item3)) {
                            node3 = (orte_node_t*)item3;
                            if (0 == strcmp(node3->name, node_from_list->name)) {
                                /* match - don't use it */
                                goto skipnode;
                            }
                        }
                        if (remove) {
                            /* remove item from list */
                            opal_list_remove_item(nodes, item1);
                            /* xfer to keep list */
                            opal_list_append(&keep, item1);
                        } else {
                            /* mark as included */
                            node_from_list->mapped = true;
                        }
                        --num_empty;
                    }
                skipnode:
                    item1 = next;
                }
                /* did they get everything they wanted? */
                if (!want_all_empty && 0 < num_empty) {
                    orte_show_help("help-hostfile.txt", "hostfile:not-enough-empty",
                                   true, num_empty);
                    rc = ORTE_ERR_SILENT;
                    goto cleanup;
                }            
            } else if ('n' == node_from_file->name[1] ||
                       'N' == node_from_file->name[1]) {
                /* they want a specific relative node #, so
                 * look it up on global pool
                 */
                nodeidx = strtol(&node_from_file->name[2], NULL, 10);
                if (NULL == (node_from_pool = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, nodeidx))) {
                    /* this is an error */
                    orte_show_help("help-hostfile.txt", "hostfile:relative-node-not-found",
                                   true, nodeidx, node_from_file->name);
                    rc = ORTE_ERR_SILENT;
                    goto cleanup;
                }
                /* search the list of nodes provided to us and find it */
                for (item1 = opal_list_get_first(nodes);
                     item1 != opal_list_get_end(nodes);
                     item1 = opal_list_get_next(nodes)) {
                    node_from_list = (orte_node_t*)item1;
                    if (0 == strcmp(node_from_list->name, node_from_pool->name)) {
                        if (remove) {
                            /* match - remove item from list */
                            opal_list_remove_item(nodes, item1);
                            /* xfer to keep list */
                            opal_list_append(&keep, item1);
                        } else {
                            /* mark as included */
                            node_from_list->mapped = true;
                        }
                        break;
                    }
                }
            } else {
                /* invalid relative node syntax */
                orte_show_help("help-hostfile.txt", "hostfile:invalid-relative-node-syntax",
                               true, node_from_file->name);
                rc = ORTE_ERR_SILENT;
                goto cleanup;
            }
        } else {
            /* we are looking for a specific node on the list
             * search the provided list of nodes to see if this
             * one is found
             */
            found = false;
            for (item1 = opal_list_get_first(nodes);
                 item1 != opal_list_get_end(nodes);
                 item1 = opal_list_get_next(item1)) {
                node_from_list = (orte_node_t*)item1;
                /* since the name in the hostfile might not match
                 * our local name, and yet still be intended to match,
                 * we have to check for local interfaces
                 */
                if (0 == strcmp(node_from_file->name, node_from_list->name) ||
                    (0 == strcmp(node_from_file->name, "localhost") &&
                     0 == strcmp(node_from_list->name, orte_process_info.nodename)) ||
                    (opal_ifislocal(node_from_list->name) &&
                     opal_ifislocal(node_from_file->name))) {
                    /* if the slot count here is less than the
                     * total slots avail on this node, set it
                     * to the specified count - this allows people
                     * to subdivide an allocation
                     */
                    if (node_from_file->slots < node_from_list->slots) {
                        node_from_list->slots = node_from_file->slots;
                    }
                    if (remove) {
                        /* remove the node from the list */
                        opal_list_remove_item(nodes, item1);
                        /* xfer it to keep list */
                        opal_list_append(&keep, item1);
                    } else {
                        /* mark as included */
                        node_from_list->mapped = true;
                    }
                    found = true;
                    break;
                }
            }
            /* if the host in the newnode list wasn't found,
             * then that is an error we need to report to the
             * user and abort
             */
            if (!found) {
                orte_show_help("help-hostfile.txt", "hostfile:extra-node-not-found",
                               true, hostfile, node_from_file->name);
                rc = ORTE_ERR_SILENT;
                goto cleanup;
            }
        }
        /* cleanup the newnode list */
        OBJ_RELEASE(item2);
    }
    
    /* if we still have entries on our hostfile list, then
     * there were requested hosts that were not in our allocation.
     * This is an error - report it to the user and return an error
     */
    if (0 != opal_list_get_size(&newnodes)) {
        orte_show_help("help-hostfile.txt", "not-all-mapped-alloc",
                       true, hostfile);
        while (NULL != (item1 = opal_list_remove_first(&newnodes))) {
            OBJ_RELEASE(item1);
        }
        OBJ_DESTRUCT(&newnodes);
        return ORTE_ERR_SILENT;
    }

    if (!remove) {
        /* all done */
        OBJ_DESTRUCT(&newnodes);
        return ORTE_SUCCESS;
    }

    /* clear the rest of the nodes list */
    while (NULL != (item1 = opal_list_remove_first(nodes))) {
        OBJ_RELEASE(item1);
    }
    
    /* the nodes list has been cleared - rebuild it in order */
    while (NULL != (item1 = opal_list_remove_first(&keep))) {
        opal_list_append(nodes, item1);
    }
    
cleanup:
    OBJ_DESTRUCT(&newnodes);

    return rc;
}
int orte_util_get_ordered_host_list(opal_list_t *nodes,
                                    char *hostfile)
{
    opal_list_t exclude;
    opal_list_item_t *item, *itm, *item2, *item1;
    char *cptr;
    int num_empty, i, nodeidx, startempty=0;
    bool want_all_empty=false;
    orte_node_t *node_from_pool, *newnode;
    int rc;
    
    OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
                         "%s hostfile: creating ordered list of hosts from hostfile %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hostfile));
    
    OBJ_CONSTRUCT(&exclude, opal_list_t);
    
    /* parse the hostfile and add the contents to the list, keeping duplicates */
    if (ORTE_SUCCESS != (rc = hostfile_parse(hostfile, nodes, &exclude, true))) {
        goto cleanup;
    }
    
    /* parse the nodes to process any relative node directives */
    item2 = opal_list_get_first(nodes);
    while (item2 != opal_list_get_end(nodes)) {
        orte_node_t *node=(orte_node_t*)item2;
        
        /* save the next location in case this one gets removed */
        item1 = opal_list_get_next(item2);
        
        if ('+' != node->name[0]) {
            item2 = item1;
            continue;
        }
        
        /* see if we specified empty nodes */
        if ('e' == node->name[1] ||
            'E' == node->name[1]) {
            /* request for empty nodes - do they want
             * all of them?
             */
            if (NULL != (cptr = strchr(node->name, ':'))) {
                /* the colon indicates a specific # are requested */
                cptr++; /* step past : */
                num_empty = strtol(cptr, NULL, 10);
            } else {
                /* want them all - set num_empty to max */
                num_empty = INT_MAX;
                want_all_empty = true;
            }
            /* insert empty nodes into newnodes list in place of the current item.
             * since item1 is the next item, we insert in front of it
             */
            if (!orte_hnp_is_allocated && 0 == startempty) {
               startempty = 1;
            }
            for (i=startempty; 0 < num_empty && i < orte_node_pool->size; i++) {
                if (NULL == (node_from_pool = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
                    continue;
                }
                if (0 == node_from_pool->slots_inuse) {
                    newnode = OBJ_NEW(orte_node_t);
                    newnode->name = strdup(node_from_pool->name);
                    /* if the slot count here is less than the
                     * total slots avail on this node, set it
                     * to the specified count - this allows people
                     * to subdivide an allocation
                     */
                    if (node->slots < node_from_pool->slots) {
                        newnode->slots = node->slots;
                    } else {
                        newnode->slots = node_from_pool->slots;
                    }
                    opal_list_insert_pos(nodes, item1, &newnode->super);
                    /* track number added */
                    --num_empty;
                }
            }
            /* bookmark where we stopped in case they ask for more */
            startempty = i;
            /* did they get everything they wanted? */
            if (!want_all_empty && 0 < num_empty) {
                orte_show_help("help-hostfile.txt", "hostfile:not-enough-empty",
                               true, num_empty);
                rc = ORTE_ERR_SILENT;
                goto cleanup;
            }            
            /* since we have expanded the provided node, remove
             * it from list
             */
            opal_list_remove_item(nodes, item2);
            OBJ_RELEASE(item2);
        } else if ('n' == node->name[1] ||
                   'N' == node->name[1]) {
            /* they want a specific relative node #, so
             * look it up on global pool
             */
            nodeidx = strtol(&node->name[2], NULL, 10);
            /* if the HNP is not allocated, then we need to
             * adjust the index as the node pool is offset
             * by one
             */
            if (!orte_hnp_is_allocated) {
                nodeidx++;
            }
            /* see if that location is filled */
            if (NULL == (node_from_pool = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, nodeidx))) {
                /* this is an error */
                orte_show_help("help-hostfile.txt", "hostfile:relative-node-not-found",
                               true, nodeidx, node->name);
                rc = ORTE_ERR_SILENT;
                goto cleanup;
            }
            /* create the node object */
            newnode = OBJ_NEW(orte_node_t);
            newnode->name = strdup(node_from_pool->name);
            /* if the slot count here is less than the
             * total slots avail on this node, set it
             * to the specified count - this allows people
             * to subdivide an allocation
             */
            if (node->slots < node_from_pool->slots) {
                newnode->slots = node->slots;
            } else {
                newnode->slots = node_from_pool->slots;
            }
            /* insert it before item1 */
            opal_list_insert_pos(nodes, item1, &newnode->super);
            /* since we have expanded the provided node, remove
             * it from list
             */
            opal_list_remove_item(nodes, item2);
            OBJ_RELEASE(item2);
        } else {
            /* invalid relative node syntax */
            orte_show_help("help-hostfile.txt", "hostfile:invalid-relative-node-syntax",
                           true, node->name);
            rc = ORTE_ERR_SILENT;
            goto cleanup;
        }
        
        /* move to next */
        item2 = item1;
    }

    /* remove from the list of nodes those that are in the exclude list */
    while(NULL != (item = opal_list_remove_first(&exclude))) {
        orte_node_t *exnode = (orte_node_t*)item;
        /* check for matches on nodes */
        for (itm = opal_list_get_first(nodes);
             itm != opal_list_get_end(nodes);
             itm = opal_list_get_next(itm)) {
            orte_node_t *node=(orte_node_t*)itm;
            if (0 == strcmp(exnode->name, node->name)) {
                /* match - remove it */
                opal_list_remove_item(nodes, itm);
                OBJ_RELEASE(itm);
                /* have to cycle through the entire list as we could
                 * have duplicates
                 */
            }
        }
        OBJ_RELEASE(item);
    }
    
cleanup:
    OBJ_DESTRUCT(&exclude);
    
    return rc;
}
Beispiel #10
0
static int hostfile_parse_line(int token, opal_list_t* updates,
                               opal_list_t* exclude, bool keep_all)
{
    int rc;
    orte_node_t* node;
    bool got_max = false;
    char* value;
    char** argv;
    char* node_name = NULL;
    char* node_alias = NULL;
    char* username = NULL;
    int cnt;
    int number_of_slots = 0;
    char buff[64];

    if (ORTE_HOSTFILE_STRING == token ||
        ORTE_HOSTFILE_HOSTNAME == token ||
        ORTE_HOSTFILE_INT == token ||
        ORTE_HOSTFILE_IPV4 == token ||
        ORTE_HOSTFILE_IPV6 == token) {

        if(ORTE_HOSTFILE_INT == token) {
            snprintf(buff, 64, "%d", orte_util_hostfile_value.ival);
            value = buff;
        } else {
            value = orte_util_hostfile_value.sval;
        }
        argv = opal_argv_split (value, '@');
        
        cnt = opal_argv_count (argv);
        if (1 == cnt) {
            node_name = strdup(argv[0]);
        } else if (2 == cnt) {
            username = strdup(argv[0]);
            node_name = strdup(argv[1]);
        } else {
            opal_output(0, "WARNING: Unhandled user@host-combination\n"); /* XXX */
        }
        opal_argv_free (argv);

        /* if the first letter of the name is '^', then this is a node
         * to be excluded. Remove the ^ character so the nodename is
         * usable, and put it on the exclude list
         */
        if ('^' == node_name[0]) {
            int i, len;
            len = strlen(node_name);
            for (i=1; i < len; i++) {
                node_name[i-1] = node_name[i];
            }
            node_name[len-1] = '\0';  /* truncate */
            
            OPAL_OUTPUT_VERBOSE((3, orte_ras_base_framework.framework_output,
                                 "%s hostfile: node %s is being excluded",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node_name));
            
            /* convert this into something globally unique */
            if (strcmp(node_name, "localhost") == 0 || opal_ifislocal(node_name)) {
                /* Nodename has been allocated, that is for sure */
                if (orte_show_resolved_nodenames &&
                    0 != strcmp(node_name, orte_process_info.nodename)) {
                    node_alias = strdup(node_name);
                }
                free (node_name);
                node_name = strdup(orte_process_info.nodename);
            }
            
            /* Do we need to make a new node object?  First check to see
               if it's already in the exclude list */
            if (NULL == (node = hostfile_lookup(exclude, node_name))) {
                node = OBJ_NEW(orte_node_t);
                node->name = node_name;
                if (NULL != username) {
                    node->username = strdup(username);
                }
                opal_list_append(exclude, &node->super);
            }
            return ORTE_SUCCESS;
        }
        
        /* this is not a node to be excluded, so we need to process it and
         * add it to the "include" list. See if this host is actually us.
         */
        if (strcmp(node_name, "localhost") == 0 || opal_ifislocal(node_name)) {
            /* Nodename has been allocated, that is for sure */
            if (orte_show_resolved_nodenames &&
                0 != strcmp(node_name, orte_process_info.nodename)) {
                node_alias = strdup(node_name);
            }
            free (node_name);
            node_name = strdup(orte_process_info.nodename);
        }

        OPAL_OUTPUT_VERBOSE((3, orte_ras_base_framework.framework_output,
                             "%s hostfile: node %s is being included - keep all is %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node_name,
                             keep_all ? "TRUE" : "FALSE"));

        /* Do we need to make a new node object? */
        if (keep_all || NULL == (node = hostfile_lookup(updates, node_name))) {
            node = OBJ_NEW(orte_node_t);
            node->name = node_name;
            node->slots = 1;
            if (NULL != username) {
                node->username = strdup(username);
            }
            opal_list_append(updates, &node->super);
        } else {
            /* this node was already found once - add a slot and mark slots as "given" */
            node->slots++;
            node->slots_given = true;
        }
        /* do we need to record an alias for this node? */
        if (NULL != node_alias) {
            /* add to list of aliases for this node - only add if unique */
            opal_argv_append_unique_nosize(&node->alias, node_alias, false);
            free(node_alias);
        }
    } else if (ORTE_HOSTFILE_RELATIVE == token) {
        /* store this for later processing */
        node = OBJ_NEW(orte_node_t);
        node->name = strdup(orte_util_hostfile_value.sval);
        if (NULL != username) {
            node->username = strdup(username);
        }
        opal_list_append(updates, &node->super);
    } else if (ORTE_HOSTFILE_RANK == token) {
        /* we can ignore the rank, but we need to extract the node name. we
         * first need to shift over to the other side of the equal sign as
         * this is where the node name will be
         */
        while (!orte_util_hostfile_done &&
               ORTE_HOSTFILE_EQUAL != token) {
            token = orte_util_hostfile_lex();
        }
        if (orte_util_hostfile_done) {
            /* bad syntax somewhere */
            return ORTE_ERROR;
        }
        /* next position should be the node name */
        token = orte_util_hostfile_lex();
        if(ORTE_HOSTFILE_INT == token) {
            snprintf(buff, 64, "%d", orte_util_hostfile_value.ival);
            value = buff;
        } else {
            value = orte_util_hostfile_value.sval;
        }
        
        argv = opal_argv_split (value, '@');
        
        cnt = opal_argv_count (argv);
        if (1 == cnt) {
            node_name = strdup(argv[0]);
        } else if (2 == cnt) {
            username = strdup(argv[0]);
            node_name = strdup(argv[1]);
        } else {
            opal_output(0, "WARNING: Unhandled user@host-combination\n"); /* XXX */
        }
        opal_argv_free (argv);
        /* Do we need to make a new node object? */
        if (NULL == (node = hostfile_lookup(updates, node_name))) {
            node = OBJ_NEW(orte_node_t);
            node->name = node_name;
            node->slots = 1;
            if (NULL != username) {
                node->username = strdup(username);
            }
            opal_list_append(updates, &node->super);
        } else {
            /* add a slot */
            node->slots++;
        }
        OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
                             "%s hostfile: node %s slots %d",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name, node->slots));
        /* mark the slots as "given" since we take them as being the
         * number specified via the rankfile
         */
        node->slots_given = true;
        /* skip to end of line */
        while (!orte_util_hostfile_done &&
               ORTE_HOSTFILE_NEWLINE != token) {
            token = orte_util_hostfile_lex();
        }
        return ORTE_SUCCESS;
    } else {
        hostfile_parse_error(token);
        return ORTE_ERROR;
    }
    
    while (!orte_util_hostfile_done) {
        token = orte_util_hostfile_lex();
        
        switch (token) {            
        case ORTE_HOSTFILE_DONE:
            goto done;

        case ORTE_HOSTFILE_NEWLINE:
            goto done;

        case ORTE_HOSTFILE_USERNAME:
            node->username = hostfile_parse_string();
            break;

        case ORTE_HOSTFILE_COUNT:
        case ORTE_HOSTFILE_CPU:
        case ORTE_HOSTFILE_SLOTS:
            rc = hostfile_parse_int();
            if (rc < 0) {
                orte_show_help("help-hostfile.txt", "slots",
                               true,
                               cur_hostfile_name, rc);
                opal_list_remove_item(updates, &node->super);
                OBJ_RELEASE(node);
                return ORTE_ERROR;
            }
            if (node->slots_given) {
                /* multiple definitions were given for the
                 * slot count - this is not allowed
                 */
                orte_show_help("help-hostfile.txt", "slots-given",
                               true,
                               cur_hostfile_name, node->name);
                opal_list_remove_item(updates, &node->super);
                OBJ_RELEASE(node);
                return ORTE_ERROR;
            }
            node->slots = rc;
            node->slots_given = true;

            /* Ensure that slots_max >= slots */
            if (node->slots_max != 0 && node->slots_max < node->slots) {
                node->slots_max = node->slots;
            }
            break;

        case ORTE_HOSTFILE_SLOTS_MAX:
            rc = hostfile_parse_int();
            if (rc < 0) {
                orte_show_help("help-hostfile.txt", "max_slots",
                               true,
                               cur_hostfile_name, ((size_t) rc));
                opal_list_remove_item(updates, &node->super);
                OBJ_RELEASE(node);
                return ORTE_ERROR;
            }
            /* Only take this update if it puts us >= node_slots */
            if (rc >= node->slots) {
                if (node->slots_max != rc) {
                    node->slots_max = rc;
                    got_max = true;
                }
            } else {
                orte_show_help("help-hostfile.txt", "max_slots_lt",
                               true,
                               cur_hostfile_name, node->slots, rc);
                ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
                opal_list_remove_item(updates, &node->super);
                OBJ_RELEASE(node);
                return ORTE_ERROR;
            }
            break;

        default:
            hostfile_parse_error(token);
            opal_list_remove_item(updates, &node->super);
            OBJ_RELEASE(node);
            return ORTE_ERROR;
        }
        if (number_of_slots > node->slots) {
            ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
            opal_list_remove_item(updates, &node->super);
            OBJ_RELEASE(node);
            return ORTE_ERROR;
        }
    }

 done:
    if (got_max && !node->slots_given) {
        node->slots = node->slots_max;
        node->slots_given = true;
    }

    return ORTE_SUCCESS;
}
Beispiel #11
0
int orte_util_add_hostfile_nodes(opal_list_t *nodes,
                                 char *hostfile)
{
    opal_list_t exclude, adds;
    opal_list_item_t *item, *itm;
    int rc;
    orte_node_t *nd, *node;
    bool found;

    OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
                         "%s hostfile: checking hostfile %s for nodes",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hostfile));

    OBJ_CONSTRUCT(&exclude, opal_list_t);
    OBJ_CONSTRUCT(&adds, opal_list_t);

    /* parse the hostfile and add any new contents to the list */
    if (ORTE_SUCCESS != (rc = hostfile_parse(hostfile, &adds, &exclude, false))) {
        goto cleanup;
    }
    
    /* check for any relative node directives */
    for (item = opal_list_get_first(&adds);
         item != opal_list_get_end(&adds);
         item = opal_list_get_next(item)) {
        node=(orte_node_t*)item;
        
        if ('+' == node->name[0]) {
            orte_show_help("help-hostfile.txt", "hostfile:relative-syntax",
                           true, node->name);
            rc = ORTE_ERR_SILENT;
            goto cleanup;
        }
    }
    
    /* remove from the list of nodes those that are in the exclude list */
    while (NULL != (item = opal_list_remove_first(&exclude))) {
        nd = (orte_node_t*)item;
        /* check for matches on nodes */
        for (itm = opal_list_get_first(&adds);
             itm != opal_list_get_end(&adds);
             itm = opal_list_get_next(itm)) {
            node = (orte_node_t*)itm;
            if (0 == strcmp(nd->name, node->name)) {
                /* match - remove it */
                opal_list_remove_item(&adds, itm);
                OBJ_RELEASE(itm);
                break;
            }
        }
        OBJ_RELEASE(item);
    }
    
    /* transfer across all unique nodes */
    while (NULL != (item = opal_list_remove_first(&adds))) {
        nd = (orte_node_t*)item;
        found = false;
        for (itm = opal_list_get_first(nodes);
             itm != opal_list_get_end(nodes);
             itm = opal_list_get_next(itm)) {
            node = (orte_node_t*)itm;
            if (0 == strcmp(nd->name, node->name)) {
                found = true;
                break;
            }
        }
        if (!found) {
            opal_list_append(nodes, &nd->super);
            OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
                                 "%s hostfile: adding node %s slots %d",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), nd->name, nd->slots));
        } else {
            OBJ_RELEASE(item);
        }
    }

cleanup:
    OPAL_LIST_DESTRUCT(&exclude);
    OPAL_LIST_DESTRUCT(&adds);

    return rc;
}
Beispiel #12
0
static void process_reads(int fd, short args, void *cbdata)
{
    orte_dfs_request_t *read_dfs = (orte_dfs_request_t*)cbdata;
    orte_dfs_tracker_t *tptr, *trk;
    opal_list_item_t *item;
    opal_buffer_t *buffer;
    int64_t i64;
    int rc;

    /* look in our local records for this fd */
    trk = NULL;
    for (item = opal_list_get_first(&active_files);
         item != opal_list_get_end(&active_files);
         item = opal_list_get_next(item)) {
        tptr = (orte_dfs_tracker_t*)item;
        if (tptr->local_fd == read_dfs->local_fd) {
            trk = tptr;
            break;
        }
    }
    if (NULL == trk) {
        ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
        OBJ_RELEASE(read_dfs);
        return;
    }

    /* add this request to our pending list */
    read_dfs->id = req_id++;
    opal_list_append(&requests, &read_dfs->super);

    /* setup a message for the daemon telling
     * them what file to read
     */
    buffer = OBJ_NEW(opal_buffer_t);
    if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &read_dfs->cmd, 1, ORTE_DFS_CMD_T))) {
        ORTE_ERROR_LOG(rc);
        goto complete;
    }
    /* include the request id */
    if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &read_dfs->id, 1, OPAL_UINT64))) {
        ORTE_ERROR_LOG(rc);
        goto complete;
    }
    if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &trk->remote_fd, 1, OPAL_INT))) {
        ORTE_ERROR_LOG(rc);
        goto complete;
    }
    i64 = (int64_t)read_dfs->read_length;
    if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &i64, 1, OPAL_INT64))) {
        ORTE_ERROR_LOG(rc);
        goto complete;
    }
    
    opal_output_verbose(1, orte_dfs_base_framework.framework_output,
                        "%s sending read file request to %s for fd %d",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        ORTE_NAME_PRINT(&trk->host_daemon),
                        trk->local_fd);
    /* send it */
    if (0 > (rc = orte_rml.send_buffer_nb(&trk->host_daemon, buffer,
                                          ORTE_RML_TAG_DFS_CMD,
                                          orte_rml_send_callback, NULL))) {
        ORTE_ERROR_LOG(rc);
        OBJ_RELEASE(buffer);
    }
    /* don't release the request */
    return;

 complete:
    /* don't need to hang on to this request */
    opal_list_remove_item(&requests, &read_dfs->super);
    OBJ_RELEASE(read_dfs);
}
Beispiel #13
0
static void process_sizes(int fd, short args, void *cbdata)
{
    orte_dfs_request_t *size_dfs = (orte_dfs_request_t*)cbdata;
    orte_dfs_tracker_t *tptr, *trk;
    opal_list_item_t *item;
    opal_buffer_t *buffer;
    int rc;

    opal_output_verbose(1, orte_dfs_base_framework.framework_output,
                        "%s processing get_size on fd %d",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        size_dfs->local_fd);

    /* look in our local records for this fd */
    trk = NULL;
    for (item = opal_list_get_first(&active_files);
         item != opal_list_get_end(&active_files);
         item = opal_list_get_next(item)) {
        tptr = (orte_dfs_tracker_t*)item;
        if (tptr->local_fd == size_dfs->local_fd) {
            trk = tptr;
            break;
        }
    }
    if (NULL == trk) {
        ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
        OBJ_RELEASE(size_dfs);
        return;
    }

    /* add this request to our local list so we can
     * match it with the returned response when it comes
     */
    size_dfs->id = req_id++;
    opal_list_append(&requests, &size_dfs->super);

    /* setup a message for the daemon telling
     * them what file we want to access
     */
    buffer = OBJ_NEW(opal_buffer_t);
    if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &size_dfs->cmd, 1, ORTE_DFS_CMD_T))) {
        ORTE_ERROR_LOG(rc);
        opal_list_remove_item(&requests, &size_dfs->super);
        goto complete;
    }
    /* pass the request id */
    if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &size_dfs->id, 1, OPAL_UINT64))) {
        ORTE_ERROR_LOG(rc);
        opal_list_remove_item(&requests, &size_dfs->super);
        goto complete;
    }
    if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &trk->remote_fd, 1, OPAL_INT))) {
        ORTE_ERROR_LOG(rc);
        opal_list_remove_item(&requests, &size_dfs->super);
        goto complete;
    }

    opal_output_verbose(1, orte_dfs_base_framework.framework_output,
                        "%s sending get_size request to %s for fd %d",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        ORTE_NAME_PRINT(&trk->host_daemon),
                        trk->local_fd);
    /* send it */
    if (0 > (rc = orte_rml.send_buffer_nb(&trk->host_daemon, buffer,
                                          ORTE_RML_TAG_DFS_CMD,
                                          orte_rml_send_callback, NULL))) {
        ORTE_ERROR_LOG(rc);
        OBJ_RELEASE(buffer);
        opal_list_remove_item(&requests, &size_dfs->super);
        if (NULL != size_dfs->size_cbfunc) {
            size_dfs->size_cbfunc(-1, size_dfs->cbdata);
        }
        goto complete;
    }
    /* leave the request there */
    return;

 complete:
    OBJ_RELEASE(size_dfs);
}
Beispiel #14
0
static void process_close(int fd, short args, void *cbdata)
{
    orte_dfs_request_t *close_dfs = (orte_dfs_request_t*)cbdata;
    orte_dfs_tracker_t *tptr, *trk;
    opal_list_item_t *item;
    opal_buffer_t *buffer;
    int rc;

    opal_output_verbose(1, orte_dfs_base_framework.framework_output,
                        "%s closing fd %d",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        close_dfs->local_fd);

    /* look in our local records for this fd */
    trk = NULL;
    for (item = opal_list_get_first(&active_files);
         item != opal_list_get_end(&active_files);
         item = opal_list_get_next(item)) {
        tptr = (orte_dfs_tracker_t*)item;
        if (tptr->local_fd == close_dfs->local_fd) {
            trk = tptr;
            break;
        }
    }
    if (NULL == trk) {
        ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
        if (NULL != close_dfs->close_cbfunc) {
            close_dfs->close_cbfunc(close_dfs->local_fd, close_dfs->cbdata);
        }
        OBJ_RELEASE(close_dfs);
        return;
    }

    /* setup a message for the daemon telling
     * them what file to close
     */
    buffer = OBJ_NEW(opal_buffer_t);
    if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &close_dfs->cmd, 1, ORTE_DFS_CMD_T))) {
        ORTE_ERROR_LOG(rc);
        goto complete;
    }
    if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &trk->remote_fd, 1, OPAL_INT))) {
        ORTE_ERROR_LOG(rc);
        goto complete;
    }
    
    opal_output_verbose(1, orte_dfs_base_framework.framework_output,
                        "%s sending close file request to %s for fd %d",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        ORTE_NAME_PRINT(&trk->host_daemon),
                        trk->local_fd);
    /* send it */
    if (0 > (rc = orte_rml.send_buffer_nb(&trk->host_daemon, buffer,
                                          ORTE_RML_TAG_DFS_CMD,
                                          orte_rml_send_callback, NULL))) {
        ORTE_ERROR_LOG(rc);
        OBJ_RELEASE(buffer);
        goto complete;
    }

 complete:
    opal_list_remove_item(&active_files, &trk->super);
    OBJ_RELEASE(trk);
    if (NULL != close_dfs->close_cbfunc) {
        close_dfs->close_cbfunc(close_dfs->local_fd, close_dfs->cbdata);
    }
    OBJ_RELEASE(close_dfs);
}
Beispiel #15
0
static void process_opens(int fd, short args, void *cbdata)
{
    orte_dfs_request_t *dfs = (orte_dfs_request_t*)cbdata;
    int rc;
    opal_buffer_t *buffer;
    char *scheme, *host, *filename, *hostname;
    orte_process_name_t daemon;
    bool found;
    orte_vpid_t v;
    opal_list_t myvals;
    opal_value_t *kv;

    opal_output(0, "%s PROCESSING OPEN", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
    /* get the scheme to determine if we can process locally or not */
    if (NULL == (scheme = opal_uri_get_scheme(dfs->uri))) {
        ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
        goto complete;
    }
    opal_output(0, "%s GOT SCHEME", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));

    if (0 != strcmp(scheme, "file")) {
        /* not yet supported */
        orte_show_help("orte_dfs_help.txt", "unsupported-filesystem",
                       true, dfs->uri);
        goto complete;
    }

    /* dissect the uri to extract host and filename/path */
    if (NULL == (filename = opal_filename_from_uri(dfs->uri, &host))) {
        goto complete;
    }
    opal_output(0, "%s GOT FILENAME %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), filename);
    if (NULL == host) {
        host = strdup(orte_process_info.nodename);
    }

    /* ident the daemon on that host */
    daemon.jobid = ORTE_PROC_MY_DAEMON->jobid;
    found = false;
    for (v=0; v < orte_process_info.num_daemons; v++) {
        daemon.vpid = v;
        /* fetch the hostname where this daemon is located */
        OBJ_CONSTRUCT(&myvals, opal_list_t);
        if (ORTE_SUCCESS != (rc = opal_dstore.fetch(opal_dstore_internal,
                                                    &daemon,
                                                    OPAL_DSTORE_HOSTNAME, &myvals))) {
            ORTE_ERROR_LOG(rc);
            OPAL_LIST_DESTRUCT(&myvals);
            goto complete;
        }
        kv = (opal_value_t*)opal_list_get_first(&myvals);
        hostname = strdup(kv->data.string);
        OPAL_LIST_DESTRUCT(&myvals);
        opal_output(0, "%s GOT HOST %s HOSTNAME %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), host, hostname);
        if (0 == strcmp(host, hostname)) {
            found = true;
            break;
        }
    }
    if (!found) {
        ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
        goto complete;
    }
    opal_output_verbose(1, orte_dfs_base_framework.framework_output,
                        "%s file %s on host %s daemon %s",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        filename, host, ORTE_NAME_PRINT(&daemon));

    /* add this request to our local list so we can
     * match it with the returned response when it comes
     */
    dfs->id = req_id++;
    opal_list_append(&requests, &dfs->super);

    /* setup a message for the daemon telling
     * them what file we want to access
     */
    buffer = OBJ_NEW(opal_buffer_t);
    if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &dfs->cmd, 1, ORTE_DFS_CMD_T))) {
        ORTE_ERROR_LOG(rc);
        opal_list_remove_item(&requests, &dfs->super);
        goto complete;
    }
    /* pass the request id */
    if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &dfs->id, 1, OPAL_UINT64))) {
        ORTE_ERROR_LOG(rc);
        opal_list_remove_item(&requests, &dfs->super);
        goto complete;
    }
    if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &filename, 1, OPAL_STRING))) {
        ORTE_ERROR_LOG(rc);
        opal_list_remove_item(&requests, &dfs->super);
        goto complete;
    }
    
    opal_output_verbose(1, orte_dfs_base_framework.framework_output,
                        "%s sending open file request to %s file %s",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        ORTE_NAME_PRINT(&daemon),
                        filename);
    /* send it */
    if (0 > (rc = orte_rml.send_buffer_nb(&daemon, buffer,
                                          ORTE_RML_TAG_DFS_CMD,
                                          orte_rml_send_callback, NULL))) {
        ORTE_ERROR_LOG(rc);
        OBJ_RELEASE(buffer);
        opal_list_remove_item(&requests, &dfs->super);
        goto complete;
    }
    /* don't release it */
    return;

 complete:
    /* we get here if an error occurred - execute any
     * pending callback so the proc doesn't hang
     */
    if (NULL != dfs->open_cbfunc) {
        dfs->open_cbfunc(-1, dfs->cbdata);
    }
    OBJ_RELEASE(dfs);
}
Beispiel #16
0
/* receives take place in an event, so we are free to process
 * the request list without fear of getting things out-of-order
 */
static void recv_dfs(int status, orte_process_name_t* sender,
                     opal_buffer_t* buffer, orte_rml_tag_t tag,
                     void* cbdata)
{
    orte_dfs_cmd_t cmd;
    int32_t cnt;
    orte_dfs_request_t *dfs, *dptr;
    opal_list_item_t *item;
    int remote_fd, rc;
    int64_t i64;
    uint64_t rid;
    orte_dfs_tracker_t *trk;

    /* unpack the command this message is responding to */
    cnt = 1;
    if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &cmd, &cnt, ORTE_DFS_CMD_T))) {
        ORTE_ERROR_LOG(rc);
        return;
    }

    opal_output_verbose(1, orte_dfs_base_framework.framework_output,
                        "%s recvd cmd %d from sender %s",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)cmd,
                        ORTE_NAME_PRINT(sender));

    switch (cmd) {
    case ORTE_DFS_OPEN_CMD:
        /* unpack the request id */
        cnt = 1;
        if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) {
            ORTE_ERROR_LOG(rc);
            return;
        }
        /* unpack the remote fd */
        cnt = 1;
        if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &remote_fd, &cnt, OPAL_INT))) {
            ORTE_ERROR_LOG(rc);
            return;
        }
        /* search our list of requests to find the matching one */
        dfs = NULL;
        for (item = opal_list_get_first(&requests);
             item != opal_list_get_end(&requests);
             item = opal_list_get_next(item)) {
            dptr = (orte_dfs_request_t*)item;
            if (dptr->id == rid) {
                /* as the request has been fulfilled, remove it */
                opal_list_remove_item(&requests, item);
                dfs = dptr;
                break;
            }
        }
        if (NULL == dfs) {
            opal_output_verbose(1, orte_dfs_base_framework.framework_output,
                                "%s recvd open file - no corresponding request found for local fd %d",
                                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), local_fd);
            ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
            return;
        }

        /* if the remote_fd < 0, then we had an error, so return
         * the error value to the caller
         */
        if (remote_fd < 0) {
            opal_output_verbose(1, orte_dfs_base_framework.framework_output,
                                "%s recvd open file response error file %s [error: %d]",
                                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                dfs->uri, remote_fd);
            if (NULL != dfs->open_cbfunc) {
                dfs->open_cbfunc(remote_fd, dfs->cbdata);
            }
            /* release the request */
            OBJ_RELEASE(dfs);
            return;
        }
        /* otherwise, create a tracker for this file */
        trk = OBJ_NEW(orte_dfs_tracker_t);
        trk->requestor.jobid = ORTE_PROC_MY_NAME->jobid;
        trk->requestor.vpid = ORTE_PROC_MY_NAME->vpid;
        trk->host_daemon.jobid = sender->jobid;
        trk->host_daemon.vpid = sender->vpid;
        trk->filename = strdup(dfs->uri);
        /* define the local fd */
        trk->local_fd = local_fd++;
        /* record the remote file descriptor */
        trk->remote_fd = remote_fd;
        /* add it to our list of active files */
        opal_list_append(&active_files, &trk->super);
        /* return the local_fd to the caller for
         * subsequent operations
         */
        opal_output_verbose(1, orte_dfs_base_framework.framework_output,
                            "%s recvd open file completed for file %s [local fd: %d remote fd: %d]",
                            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                            dfs->uri, trk->local_fd, remote_fd);
        if (NULL != dfs->open_cbfunc) {
            dfs->open_cbfunc(trk->local_fd, dfs->cbdata);
        }
        /* release the request */
        OBJ_RELEASE(dfs);
        break;

    case ORTE_DFS_SIZE_CMD:
        /* unpack the request id for this request */
        cnt = 1;
        if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) {
            ORTE_ERROR_LOG(rc);
            return;
        }
        /* search our list of requests to find the matching one */
        dfs = NULL;
        for (item = opal_list_get_first(&requests);
             item != opal_list_get_end(&requests);
             item = opal_list_get_next(item)) {
            dptr = (orte_dfs_request_t*)item;
            if (dptr->id == rid) {
                /* request was fulfilled, so remove it */
                opal_list_remove_item(&requests, item);
                dfs = dptr;
                break;
            }
        }
        if (NULL == dfs) {
            opal_output_verbose(1, orte_dfs_base_framework.framework_output,
                                "%s recvd size - no corresponding request found for local fd %d",
                                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), local_fd);
            ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
            return;
        }
        /* get the size */
        cnt = 1;
        if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &i64, &cnt, OPAL_INT64))) {
            ORTE_ERROR_LOG(rc);
            OBJ_RELEASE(dfs);
            return;
        }
        /* pass it back to the original caller */
        if (NULL != dfs->size_cbfunc) {
            dfs->size_cbfunc(i64, dfs->cbdata);
        }
        /* release the request */
        OBJ_RELEASE(dfs);
        break;

    case ORTE_DFS_SEEK_CMD:
        /* unpack the request id for this read */
        cnt = 1;
        if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) {
            ORTE_ERROR_LOG(rc);
            return;
        }
        /* search our list of requests to find the matching one */
        dfs = NULL;
        for (item = opal_list_get_first(&requests);
             item != opal_list_get_end(&requests);
             item = opal_list_get_next(item)) {
            dptr = (orte_dfs_request_t*)item;
            if (dptr->id == rid) {
                /* request was fulfilled, so remove it */
                opal_list_remove_item(&requests, item);
                dfs = dptr;
                break;
            }
        }
        if (NULL == dfs) {
            opal_output_verbose(1, orte_dfs_base_framework.framework_output,
                                "%s recvd seek - no corresponding request found for local fd %d",
                                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), local_fd);
            ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
            return;
        }
        /* get the returned offset/status */
        cnt = 1;
        if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &i64, &cnt, OPAL_INT64))) {
            ORTE_ERROR_LOG(rc);
            OBJ_RELEASE(dfs);
            return;
        }
        /* pass it back to the original caller */
        if (NULL != dfs->seek_cbfunc) {
            dfs->seek_cbfunc(i64, dfs->cbdata);
        }
        /* release the request */
        OBJ_RELEASE(dfs);
        break;

    case ORTE_DFS_READ_CMD:
        /* unpack the request id for this read */
        cnt = 1;
        if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) {
            ORTE_ERROR_LOG(rc);
            return;
        }
        /* search our list of requests to find the matching one */
        dfs = NULL;
        for (item = opal_list_get_first(&requests);
             item != opal_list_get_end(&requests);
             item = opal_list_get_next(item)) {
            dptr = (orte_dfs_request_t*)item;
            if (dptr->id == rid) {
                /* request was fulfilled, so remove it */
                opal_list_remove_item(&requests, item);
                dfs = dptr;
                break;
            }
        }
        if (NULL == dfs) {
            opal_output_verbose(1, orte_dfs_base_framework.framework_output,
                                "%s recvd read - no corresponding request found for local fd %d",
                                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), local_fd);
            ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
            return;
        }
        /* get the bytes read */
        cnt = 1;
        if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &i64, &cnt, OPAL_INT64))) {
            ORTE_ERROR_LOG(rc);
            OBJ_RELEASE(dfs);
            return;
        }
        if (0 < i64) {
            cnt = i64;
            if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, dfs->read_buffer, &cnt, OPAL_UINT8))) {
                ORTE_ERROR_LOG(rc);
                OBJ_RELEASE(dfs);
                return;
            }
        }
        /* pass them back to the original caller */
        if (NULL != dfs->read_cbfunc) {
            dfs->read_cbfunc(i64, dfs->read_buffer, dfs->cbdata);
        }
        /* release the request */
        OBJ_RELEASE(dfs);
        break;

    case ORTE_DFS_POST_CMD:
        /* unpack the request id for this read */
        cnt = 1;
        if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) {
            ORTE_ERROR_LOG(rc);
            return;
        }
        /* search our list of requests to find the matching one */
        dfs = NULL;
        for (item = opal_list_get_first(&requests);
             item != opal_list_get_end(&requests);
             item = opal_list_get_next(item)) {
            dptr = (orte_dfs_request_t*)item;
            if (dptr->id == rid) {
                /* request was fulfilled, so remove it */
                opal_list_remove_item(&requests, item);
                dfs = dptr;
                break;
            }
        }
        if (NULL == dfs) {
            opal_output_verbose(1, orte_dfs_base_framework.framework_output,
                                "%s recvd post - no corresponding request found",
                                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
            ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
            return;
        }
        if (NULL != dfs->post_cbfunc) {
            dfs->post_cbfunc(dfs->cbdata);
        }
        OBJ_RELEASE(dfs);
        break;

    case ORTE_DFS_GETFM_CMD:
        /* unpack the request id for this read */
        cnt = 1;
        if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) {
            ORTE_ERROR_LOG(rc);
            return;
        }
        /* search our list of requests to find the matching one */
        dfs = NULL;
        for (item = opal_list_get_first(&requests);
             item != opal_list_get_end(&requests);
             item = opal_list_get_next(item)) {
            dptr = (orte_dfs_request_t*)item;
            if (dptr->id == rid) {
                /* request was fulfilled, so remove it */
                opal_list_remove_item(&requests, item);
                dfs = dptr;
                break;
            }
        }
        if (NULL == dfs) {
            opal_output_verbose(1, orte_dfs_base_framework.framework_output,
                                "%s recvd getfm - no corresponding request found",
                                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
            ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
            return;
        }
        /* return it to caller */
        if (NULL != dfs->fm_cbfunc) {
            dfs->fm_cbfunc(buffer, dfs->cbdata);
        }
        OBJ_RELEASE(dfs);
        break;

    default:
        opal_output(0, "TEST:DFS:RECV WTF");
        break;
    }
}