Exemplo n.º 1
0
static int get_wireup_info(opal_buffer_t *buf)
{
    int rc;
    int i;
    orte_routed_jobfam_t *jfam;

    if (ORTE_PROC_IS_HNP) {
        /* if we are not using static ports, then we need to share the
         * comm info - otherwise, just return
         */
        if (orte_static_ports) {
            return ORTE_SUCCESS;
        }
    
        if (ORTE_SUCCESS != (rc = orte_rml_base_get_contact_info(ORTE_PROC_MY_NAME->jobid, buf))) {
            ORTE_ERROR_LOG(rc);
        }
        return rc;
    }
    
    /* if I am an application, this is occurring during connect_accept.
     * We need to return the stored information of other HNPs we
     * know about, if any
     */
    if (ORTE_PROC_IS_APP) {
        for (i=0; i < orte_routed_jobfams.size; i++) {
            if (NULL != (jfam = (orte_routed_jobfam_t*)opal_pointer_array_get_item(&orte_routed_jobfams, i))) {
                opal_dss.pack(buf, &(jfam->hnp_uri), 1, OPAL_STRING);
            }
        }
        return ORTE_SUCCESS;
    }

    return ORTE_SUCCESS;
}
Exemplo n.º 2
0
static int get_wireup_info(opal_buffer_t *buf)
{
    int rc;
    
    /* if I am anything other than the HNP, this
     * is a meaningless command as I cannot get
     * the requested info
     */
    if (!ORTE_PROC_IS_HNP) {
        return ORTE_ERR_NOT_SUPPORTED;
    }
    
    /* if we are not using static ports, then we need to share the
     * comm info - otherwise, just return
     */
    if (orte_static_ports) {
        return ORTE_SUCCESS;
    }
    
    if (ORTE_SUCCESS != (rc = orte_rml_base_get_contact_info(ORTE_PROC_MY_NAME->jobid, buf))) {
        ORTE_ERROR_LOG(rc);
        OBJ_RELEASE(buf);
        return rc;
    }

    return ORTE_SUCCESS;
}
Exemplo n.º 3
0
static int get_wireup_info(opal_buffer_t *buf)
{
    int rc;

    if (ORTE_PROC_IS_HNP) {
        if (ORTE_SUCCESS != (rc = orte_rml_base_get_contact_info(ORTE_PROC_MY_NAME->jobid, buf))) {
            ORTE_ERROR_LOG(rc);
        }
        return rc;
    }
    return ORTE_SUCCESS;
}
Exemplo n.º 4
0
static void vm_ready(int fd, short args, void *cbdata)
{
    orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
    int rc;
    opal_buffer_t *buf;
    orte_daemon_cmd_flag_t command = ORTE_DAEMON_DVM_NIDMAP_CMD;
    orte_grpcomm_signature_t *sig;
    opal_buffer_t *wireup;
    opal_byte_object_t bo, *boptr;
    int8_t flag;
    int32_t numbytes;

    /* if this is my job, then we are done */
    if (ORTE_PROC_MY_NAME->jobid == caddy->jdata->jobid) {
        /* send the daemon map to every daemon in this DVM - we
         * do this here so we don't have to do it for every
         * job we are going to launch */
        buf = OBJ_NEW(opal_buffer_t);
        /* pack the "load nidmap" cmd */
        if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &command, 1, ORTE_DAEMON_CMD))) {
            ORTE_ERROR_LOG(rc);
            OBJ_RELEASE(buf);
            return;
        }
        /* flag that daemons were launched so we will update the nidmap */
        flag = 1;
        opal_dss.pack(buf, &flag, 1, OPAL_INT8);
        /* construct a nodemap with everything in it */
        if (ORTE_SUCCESS != (rc = orte_util_encode_nodemap(buf))) {
            ORTE_ERROR_LOG(rc);
            OBJ_RELEASE(buf);
            return;
        }

        if (!orte_static_ports && !orte_fwd_mpirun_port) {
            /* pack a flag indicating wiring info is provided */
            flag = 1;
            opal_dss.pack(buf, &flag, 1, OPAL_INT8);
            /* get wireup info for daemons per the selected routing module */
            wireup = OBJ_NEW(opal_buffer_t);
            if (ORTE_SUCCESS != (rc = orte_rml_base_get_contact_info(ORTE_PROC_MY_NAME->jobid, wireup))) {
                ORTE_ERROR_LOG(rc);
                OBJ_RELEASE(wireup);
                OBJ_RELEASE(buf);
                return;
            }
            /* put it in a byte object for xmission */
            opal_dss.unload(wireup, (void**)&bo.bytes, &numbytes);
            /* pack the byte object - zero-byte objects are fine */
            bo.size = numbytes;
            boptr = &bo;
            if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &boptr, 1, OPAL_BYTE_OBJECT))) {
                ORTE_ERROR_LOG(rc);
                OBJ_RELEASE(wireup);
                OBJ_RELEASE(buf);
                return;
            }
            /* release the data since it has now been copied into our buffer */
            if (NULL != bo.bytes) {
                free(bo.bytes);
            }
            OBJ_RELEASE(wireup);
        } else {
            flag = 0;
            opal_dss.pack(buf, &flag, 1, OPAL_INT8);
        }

        /* goes to all daemons */
        sig = OBJ_NEW(orte_grpcomm_signature_t);
        sig->signature = (orte_process_name_t*)malloc(sizeof(orte_process_name_t));
        sig->signature[0].jobid = ORTE_PROC_MY_NAME->jobid;
        sig->signature[0].vpid = ORTE_VPID_WILDCARD;
        if (ORTE_SUCCESS != (rc = orte_grpcomm.xcast(sig, ORTE_RML_TAG_DAEMON, buf))) {
            ORTE_ERROR_LOG(rc);
            OBJ_RELEASE(buf);
            OBJ_RELEASE(sig);
            ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
            return;
        }
        OBJ_RELEASE(buf);
        /* notify that the vm is ready */
        fprintf(stdout, "DVM ready\n");
        OBJ_RELEASE(caddy);
        return;
    }

    /* progress the job */
    caddy->jdata->state = ORTE_JOB_STATE_VM_READY;

    /* position any required files */
    if (ORTE_SUCCESS != orte_filem.preposition_files(caddy->jdata, files_ready, caddy->jdata)) {
        ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
    }

    /* cleanup */
    OBJ_RELEASE(caddy);
}