Ejemplo n.º 1
0
Archivo: pmi2.c Proyecto: bosilca/ompi
void pmi_init(int *rank, int *size)
{
    int spawned, appnum;
    int rc;

    *size = -1;
    *rank = -1;
    appnum = -1;

    if (PMI2_SUCCESS != (rc = PMI2_Init(&spawned, size, rank, &appnum))) {
        fprintf(stderr, "pmi2: PMI2_Init: error rc = %d\n", rc);
        abort();
    }
    if( *size < 0 || *rank < 0 ){
        fprintf(stderr, "pmi2: PMI2_Init: bad size=%d or rank=%d values\n",
               *size, *rank);
        abort();
    }
    my_rank = *rank;

    kvs_name = (char*)malloc(PMI2_MAX_VALLEN);
    if( kvs_name == NULL ){
        fprintf(stderr, "pmi_init (pmi2): cannot malloc kvs name buffer\n");
        abort();
    }
    rc = PMI2_Job_GetId(kvs_name, PMI2_MAX_VALLEN);
    if( PMI2_SUCCESS != rc ){
        fprintf(stderr, "pmi_init (pmi2): cannot get kvs name: rc = %d\n", rc);
        abort();
    }
}
Ejemplo n.º 2
0
int MPIDU_Ftb_init(void)
{
    int mpi_errno = MPI_SUCCESS;
    int ret;
    FTB_client_t ci;
    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_FTB_INIT);

    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_FTB_INIT);

    MPL_strncpy(ci.event_space, "ftb.mpi.mpich", sizeof(ci.event_space));
    MPL_strncpy(ci.client_name, "mpich " MPICH_VERSION, sizeof(ci.client_name));
    MPL_strncpy(ci.client_subscription_style, "FTB_SUBSCRIPTION_NONE", sizeof(ci.client_subscription_style));
    ci.client_polling_queue_len = -1;
    
#ifdef USE_PMI2_API
    ret = PMI2_Job_GetId(ci.client_jobid, sizeof(ci.client_jobid));
    MPIR_ERR_CHKANDJUMP(ret, mpi_errno, MPI_ERR_OTHER, "**pmi_jobgetid");
#else
    ret = PMI_KVS_Get_my_name(ci.client_jobid, sizeof(ci.client_jobid));
    MPIR_ERR_CHKANDJUMP(ret, mpi_errno, MPI_ERR_OTHER, "**pmi_get_id");
#endif
    
    ret = FTB_Connect(&ci, &client_handle);
    MPIR_ERR_CHKANDJUMP(ret, mpi_errno, MPI_ERR_OTHER, "**ftb_connect");

    ret = FTB_Declare_publishable_events(client_handle, NULL, event_info, sizeof(event_info) / sizeof(event_info[0]));
    MPIR_ERR_CHKANDJUMP(ret, mpi_errno, MPI_ERR_OTHER, "**ftb_declare_publishable_events");

fn_exit:
    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_FTB_INIT);
    return mpi_errno;
fn_fail:
    goto fn_exit;
}
Ejemplo n.º 3
0
int
shmem_runtime_init(void)
{
    int spawned, appnum, my_node;
    int initialized;

    if (PMI2_SUCCESS != PMI2_Initialized()) {
        return 1;
    }

    if (!initialized) {
        if (PMI2_SUCCESS != PMI2_Init(&spawned, &size, &rank, &appnum)) {
            return 2;
        }
    }

    max_name_len = PMI2_MAX_VALLEN;
    kvs_name = (char*) malloc(max_name_len);
    if (NULL == kvs_name) return 4;

    max_key_len = PMI2_MAX_KEYLEN;
    kvs_key = (char*) malloc(max_key_len);
    if (NULL == kvs_key) return 6;

    max_val_len = PMI2_MAX_VALLEN;
    kvs_value = (char*) malloc(max_val_len);
    if (NULL == kvs_value) return 8;

    if (PMI2_SUCCESS != PMI2_Job_GetId(kvs_name, max_name_len)) {
        return 7;
    }

    return 0;
}
/**
 * Initialize the module
 */
static int init(void)
{
    int max_length, rc;

#if WANT_PMI2_SUPPORT
    /* TODO -- is this ok */
    max_length = 1024;
#else
    if (PMI_SUCCESS != (rc = PMI_KVS_Get_name_length_max(&max_length))) {
        OPAL_PMI_ERROR(rc, "PMI_KVS_Get_name_length_max");
        return ORTE_ERROR;
    }
#endif
    pmi_kvs_name = (char*)malloc(max_length);
    if (NULL == pmi_kvs_name) {
        return ORTE_ERR_OUT_OF_RESOURCE;
    }

#if WANT_PMI2_SUPPORT
    rc = PMI2_Job_GetId(pmi_kvs_name, max_length);
#else
    rc = PMI_KVS_Get_my_name(pmi_kvs_name,max_length);
#endif
    if (PMI_SUCCESS != rc) {
        OPAL_PMI_ERROR(rc, "PMI_KVS_Get_my_name");
        return ORTE_ERROR;
    }
    return ORTE_SUCCESS;
}
Ejemplo n.º 5
0
RTE_PUBLIC int rte_pmi_init(int *argc, char ***argv, rte_group_t *out_group)
{
   int rc = RTE_SUCCESS, spawned, size, rank, appnum;

#if RTE_WANT_PMI2 == 0
   rc = PMI_Init (&spawned);
   if (PMI_SUCCESS != rc) {
       return RTE_ERROR;
   }
#else  
   rc = PMI2_Init (&spawned, &rte_pmi2_info.size, &rte_pmi2_info.rank,
                   &rte_pmi2_info.appnum);
   if (PMI_SUCCESS != rc) {
       return RTE_ERROR;
   }

   rc = PMI2_Job_GetId (rte_pmi2_info.jobid, 16);
#endif

   /* set up the RTE proc table */
   rc = rte_pmi_proclist_init ();
   
   *out_group = &my_pmi_dummy_group;

   return rc;
}
static int setup_pmi(void)
{
    int max_length, rc;

#if WANT_CRAY_PMI2_EXT
    pmi_vallen_max = PMI2_MAX_VALLEN;
#else
    rc = PMI_KVS_Get_value_length_max(&pmi_vallen_max);
    if (PMI_SUCCESS != rc) {
        ORTE_PMI_ERROR(rc, "PMI_Get_value_length_max");
        return ORTE_ERROR;
    }
#endif
    pmi_attr_val = malloc(pmi_vallen_max);
    if (NULL == pmi_attr_val) {
        return ORTE_ERR_OUT_OF_RESOURCE;
    }

#if WANT_CRAY_PMI2_EXT
    /* TODO -- is this ok */
    max_length = 1024;
#else
    if (PMI_SUCCESS != (rc = PMI_KVS_Get_name_length_max(&max_length))) {
        ORTE_PMI_ERROR(rc, "PMI_KVS_Get_name_length_max");
        return ORTE_ERROR;
    }
#endif
    pmi_kvs_name = malloc(max_length);
    if (NULL == pmi_kvs_name) {
        return ORTE_ERR_OUT_OF_RESOURCE;
    }

#if WANT_CRAY_PMI2_EXT
    rc = PMI2_Job_GetId(pmi_kvs_name, max_length);
#else
    rc = PMI_KVS_Get_my_name(pmi_kvs_name,max_length);
#endif
    if (PMI_SUCCESS != rc) {
        ORTE_PMI_ERROR(rc, "PMI_KVS_Get_my_name");
        return ORTE_ERROR;
    }

#if WANT_CRAY_PMI2_EXT
    pmi_keylen_max = PMI2_MAX_KEYLEN;
#else
    if (PMI_SUCCESS != (rc = PMI_KVS_Get_key_length_max(&pmi_keylen_max))) {
        ORTE_PMI_ERROR(rc, "PMI_KVS_Get_key_length_max");
        return ORTE_ERROR;
    }
#endif
    pmi_kvs_key = malloc(pmi_keylen_max);

    return ORTE_SUCCESS;
}
Ejemplo n.º 7
0
static int setup_pmi(void)
{
    int max_length, rc;

#if WANT_PMI2_SUPPORT
    pmi_vallen_max = PMI2_MAX_VALLEN;
#else
    rc = PMI_KVS_Get_value_length_max(&pmi_vallen_max);
    if (PMI_SUCCESS != rc) {
        return OMPI_ERROR;
    }
#endif

#if WANT_PMI2_SUPPORT
    /* TODO -- is this ok */
    max_length = 1024;
#else
    if (PMI_SUCCESS != (rc = PMI_KVS_Get_name_length_max(&max_length))) {
        return OMPI_ERROR;
    }
#endif
    pmi_kvs_name = (char*)malloc(max_length);
    if (NULL == pmi_kvs_name) {
        return OMPI_ERR_OUT_OF_RESOURCE;
    }

#if WANT_PMI2_SUPPORT
    rc = PMI2_Job_GetId(pmi_kvs_name, max_length);
#else
    rc = PMI_KVS_Get_my_name(pmi_kvs_name,max_length);
#endif
    if (PMI_SUCCESS != rc) {
        return OMPI_ERROR;
    }

#if WANT_PMI2_SUPPORT
    pmi_keylen_max = PMI2_MAX_KEYLEN;
#else
    if (PMI_SUCCESS != (rc = PMI_KVS_Get_key_length_max(&pmi_keylen_max))) {
        return OMPI_ERROR;
    }
#endif

    return OMPI_SUCCESS;
}
Ejemplo n.º 8
0
void init_pmi(int *spawned, int *size, int *rank, int *appnum) {
  int res = PMI2_Init(spawned, size, rank, appnum);
  assert(res == MPI_SUCCESS);

  max_name_len = PMI2_MAX_VALLEN;
  kvs_name = (char*) malloc(max_name_len);
  assert(kvs_name != NULL);

  max_key_len = PMI2_MAX_KEYLEN;
  kvs_key = (char*) malloc(max_key_len);
  assert(kvs_key != NULL);

  max_val_len = PMI2_MAX_VALLEN;
  kvs_value = (char*) malloc(max_val_len);
  assert(kvs_value != NULL);

  res = PMI2_Job_GetId(kvs_name, max_name_len);
  assert(res == MPI_SUCCESS);
}
Ejemplo n.º 9
0
static int test_item1(void)
{
    int rc = 0;
    int val = 0;

    log_info("spawned=%d size=%d rank=%d appnum=%d\n", spawned, size, rank, appnum);

    log_assert(spawned == 0 || spawned == 1, "");
    log_assert(size >= 0, "");
    log_assert(rank >= 0, "");
    log_assert(rank < size, "");

    sprintf(jobid, "%s", __FUNCTION__);
    if (PMI2_SUCCESS != (rc = PMI2_Job_GetId(jobid, sizeof(jobid)))) {
        log_fatal("PMI2_Job_GetId failed: %d\n", rc);
        return rc;
    }

    log_info("jobid=%s\n", jobid);
    log_assert(memcmp(jobid, __FUNCTION__, sizeof(__FUNCTION__)), "");

    val = random_value(10, 100);
    if (PMI2_SUCCESS != (rc = PMI2_Job_GetRank(&val))) {
        log_fatal("PMI2_Job_GetRank failed: %d\n", rc);
        return rc;
    }
    log_assert(rank == val, "");

    val = -1;
    if (PMI2_SUCCESS != (rc = PMI2_Info_GetSize(&val))) {
        log_fatal("PMI2_Info_GetSize failed: %d\n", rc);
        return rc;
    }
    log_assert(0 < val, "");

    return rc;
}
Ejemplo n.º 10
0
int
main(int argc, char **argv)
{
	int rank;
	int size;
	int appnum;
	int spawned;
	int flag;
	int len;
	int i;
	struct timeval tv;
	struct timeval tv2;
	char jobid[128];
	char key[128];
	char val[128];
	char buf[128];

	{
		int x = 1;
		while (x == 0) {
			sleep(2);
		}
	}

	gettimeofday(&tv, NULL);
	srand(tv.tv_sec);

	PMI2_Init(&spawned, &size, &rank, &appnum);

	PMI2_Job_GetId(jobid, sizeof(buf));

	memset(val, 0, sizeof(val));
	PMI2_Info_GetJobAttr("mpi_reserved_ports",
			     val,
			     PMI2_MAX_ATTRVALUE,
			     &flag);

	sprintf(key, "mpi_reserved_ports");
	PMI2_KVS_Put(key, val);

	memset(val, 0, sizeof(val));
	sprintf(buf, "PMI_netinfo_of_task");
	PMI2_Info_GetJobAttr(buf,
			     val,
			     PMI2_MAX_ATTRVALUE,
			     &flag);
	sprintf(key, buf);
	PMI2_KVS_Put(key, val);

	memset(val, 0, sizeof(val));
	sprintf(key, "david@%d", rank);
	sprintf(val, "%s", mrand(97, 122));
	PMI2_KVS_Put(key, val);

	PMI2_KVS_Fence();

	for (i = 0; i < size; i++) {

		memset(val, 0, sizeof(val));
		sprintf(key, "PMI_netinfo_of_task");
		PMI2_KVS_Get(jobid,
			     PMI2_ID_NULL,
			     key,
			     val,
			     sizeof(val),
			     &len);
		printf("rank: %d key:%s val:%s\n", rank, key, val);

		memset(val, 0, sizeof(val));
		sprintf(key, "david@%d", rank);
		PMI2_KVS_Get(jobid,
			     PMI2_ID_NULL,
			     key,
			     val,
			     sizeof(val),
			     &len);
		printf("rank: %d key:%s val:%s\n", rank, key, val);

		memset(val, 0, sizeof(val));
		sprintf(key, "mpi_reserved_ports");
		PMI2_KVS_Get(jobid,
			     PMI2_ID_NULL,
			     key,
			     val,
			     sizeof(val),
			     &len);
		printf("rank: %d key:%s val:%s\n", rank, key, val);
	}

	PMI2_Finalize();

	gettimeofday(&tv2, NULL);
	printf("%f\n",
	       ((tv2.tv_sec - tv.tv_sec) * 1000.0
		+ (tv2.tv_usec - tv.tv_usec) / 1000.0));

	return 0;
}
Ejemplo n.º 11
0
static int cray_get_jobid(char jobId[], int jobIdSize)
{
    return PMI2_Job_GetId(jobId,jobIdSize);
}
Ejemplo n.º 12
0
static int cray_init(void)
{
    int i, spawned, size, rank, appnum, my_node;
    int rc, ret = OPAL_ERROR;
    char *pmapping = NULL;
    char buf[PMI2_MAX_ATTRVALUE];
    int found;
    uint32_t jobfam;

    ++pmix_init_count;

    /* if we can't startup PMI, we can't be used */
    if ( PMI2_Initialized () ) {
        return OPAL_SUCCESS;
    }
    size = -1;
    rank = -1;
    appnum = -1;
    if (PMI_SUCCESS != (rc = PMI2_Init(&spawned, &size, &rank, &appnum))) {
        opal_show_help("help-pmix-base.txt", "pmix2-init-failed", true, rc);
        return OPAL_ERROR;
    }
    if( size < 0 || rank < 0 ){
        opal_show_help("help-pmix-base.txt", "pmix2-init-returned-bad-values", true);
        goto err_exit;
    }

    pmix_size = size;
    pmix_rank = rank;
    pmix_appnum = appnum;

    pmix_vallen_max = PMI2_MAX_VALLEN;
    pmix_kvslen_max = PMI2_MAX_VALLEN; // FIX ME: What to put here for versatility?
    pmix_keylen_max = PMI2_MAX_KEYLEN;

    rc = PMI2_Info_GetJobAttr("universeSize", buf, 16, &found);
    if( PMI_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc, "PMI_Get_universe_size");
        goto err_exit;
    }
    pmix_usize = atoi(buf);

    pmix_kvs_name = (char*)malloc(pmix_kvslen_max);
    if( pmix_kvs_name == NULL ){
         PMI2_Finalize();
         ret = OPAL_ERR_OUT_OF_RESOURCE;
         goto err_exit;
    }
    rc = PMI2_Job_GetId(pmix_kvs_name, pmix_kvslen_max);
    if( PMI_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc, "PMI2_Job_GetId");
        goto err_exit;
    }

    rc = sscanf(pmix_kvs_name,"kvs_%u",&jobfam);
    if (rc != 1) {
        OPAL_PMI_ERROR(rc, "PMI2_Job_GetId");
        rc = OPAL_ERROR;
        goto err_exit;
    }

    pmix_jobid = jobfam << 16;

    /* store our name in the opal_proc_t so that
     * debug messages will make sense - an upper
     * layer will eventually overwrite it, but that
     * won't do any harm */
    pmix_pname.jid = pmix_jobid;
    pmix_pname.vid = pmix_rank;
    opal_proc_set_name((opal_process_name_t*)&pmix_pname);
    opal_output_verbose(10, opal_pmix_base_framework.framework_output,
                        "%s pmix:cray: assigned tmp name %d %d pmix_kvs_name %s",
                        OPAL_NAME_PRINT(*(opal_process_name_t*)&pmix_pname),pmix_pname.jid,pmix_pname.vid,pmix_kvs_name);

    pmapping = (char*)malloc(PMI2_MAX_VALLEN);
    if( pmapping == NULL ){
        rc = OPAL_ERR_OUT_OF_RESOURCE;
        OPAL_ERROR_LOG(rc);
        return rc;
    }

    rc = PMI2_Info_GetJobAttr("PMI_process_mapping", pmapping, PMI2_MAX_VALLEN, &found);
    if( !found || PMI_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc,"PMI2_Info_GetJobAttr");
        return OPAL_ERROR;
    }

    pmix_lranks = pmix_cray_parse_pmap(pmapping, pmix_rank, &my_node, &pmix_nlranks);
    if (NULL == pmix_lranks) {
        rc = OPAL_ERR_OUT_OF_RESOURCE;
        OPAL_ERROR_LOG(rc);
        return rc;
    }

    free(pmapping);

    /* find ourselves */
    for (i=0; i < pmix_nlranks; i++) {
        if (pmix_rank == pmix_lranks[i]) {
            pmix_lrank = i;
            pmix_nrank = my_node;
            break;
        }
    }

    return OPAL_SUCCESS;
err_exit:
    PMI2_Finalize();
    return ret;
}
Ejemplo n.º 13
0
static int cray_init(void)
{
    int i, spawned, size, rank, appnum, my_node;
    int rc, ret = OPAL_ERROR;
    char *pmapping = NULL;
    char buf[PMI2_MAX_ATTRVALUE];
    int found;
    int major, minor, revision;
    uint32_t jobfam;
    opal_value_t kv;
    opal_process_name_t ldr;
    char nmtmp[64];
    char *str, **localranks = NULL;

    ++pmix_init_count;

    /* if we can't startup PMI, we can't be used */
    if ( PMI2_Initialized () ) {
        opal_output_verbose(10, opal_pmix_base_framework.framework_output,
                        "%s pmix:cray: pmi already initialized",
                        OPAL_NAME_PRINT(pmix_pname));
        return OPAL_SUCCESS;
    }
    size = -1;
    rank = -1;
    appnum = -1;
    if (PMI_SUCCESS != (rc = PMI2_Init(&spawned, &size, &rank, &appnum))) {
        opal_show_help("help-pmix-base.txt", "pmix2-init-failed", true, rc);
        return OPAL_ERROR;
    }
    if( size < 0 || rank < 0 ){
        opal_show_help("help-pmix-base.txt", "pmix2-init-returned-bad-values", true);
        goto err_exit;
    }

    pmix_size = size;
    pmix_rank = rank;
    pmix_appnum = appnum;

    pmix_vallen_max = PMI2_MAX_VALLEN;
    pmix_kvslen_max = PMI2_MAX_VALLEN; // FIX ME: What to put here for versatility?
    pmix_keylen_max = PMI2_MAX_KEYLEN;
    pmix_vallen_threshold = PMI2_MAX_VALLEN * 3;
    pmix_vallen_threshold >>= 2;

    /*
     * get the version info
     */

    if (PMI_SUCCESS != PMI_Get_version_info(&major,&minor,&revision)) {
        return OPAL_ERROR;
    }

    snprintf(cray_pmi_version, sizeof(cray_pmi_version),
             "%d.%d.%d", major, minor, revision);

    pmix_kvs_name = (char*)malloc(pmix_kvslen_max);
    if( pmix_kvs_name == NULL ){
         PMI2_Finalize();
         ret = OPAL_ERR_OUT_OF_RESOURCE;
         goto err_exit;
    }

    rc = PMI2_Job_GetId(pmix_kvs_name, pmix_kvslen_max);
    if( PMI_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc, "PMI2_Job_GetId");
        goto err_exit;
    }

    rc = sscanf(pmix_kvs_name,"kvs_%u",&jobfam);
    if (rc != 1) {
        opal_output_verbose(10, opal_pmix_base_framework.framework_output,
                           "%s pmix:cray: pmix_kvs_name %s",
                            OPAL_NAME_PRINT(pmix_pname), pmix_kvs_name);
        rc = OPAL_ERROR;
        goto err_exit;
    }

    pmix_jobid = jobfam << 16;

    /* store our name in the opal_proc_t so that
     * debug messages will make sense - an upper
     * layer will eventually overwrite it, but that
     * won't do any harm */
    pmix_pname.jobid = pmix_jobid;
    pmix_pname.vpid = pmix_rank;
    opal_proc_set_name(&pmix_pname);
    opal_output_verbose(10, opal_pmix_base_framework.framework_output,
                        "%s pmix:cray: assigned tmp name %d %d pmix_kvs_name %s",
                        OPAL_NAME_PRINT(pmix_pname),pmix_pname.jobid,pmix_pname.vpid,pmix_kvs_name);

    pmapping = (char*)malloc(PMI2_MAX_VALLEN);
    if( pmapping == NULL ){
        rc = OPAL_ERR_OUT_OF_RESOURCE;
        OPAL_ERROR_LOG(rc);
        return rc;
    }

    rc = PMI2_Info_GetJobAttr("PMI_process_mapping", pmapping, PMI2_MAX_VALLEN, &found);
    if( !found || PMI_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc,"PMI2_Info_GetJobAttr");
        return OPAL_ERROR;
    }

    pmix_lranks = pmix_cray_parse_pmap(pmapping, pmix_rank, &my_node, &pmix_nlranks);
    if (NULL == pmix_lranks) {
        rc = OPAL_ERR_OUT_OF_RESOURCE;
        OPAL_ERROR_LOG(rc);
        return rc;
    }

    free(pmapping);

    // setup hash table
    opal_pmix_base_hash_init();

    /* save the job size */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_JOB_SIZE);
    kv.type = OPAL_UINT32;
    kv.data.uint32 = pmix_size;
    if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(rc);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);

    /* save the appnum */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_APPNUM);
    kv.type = OPAL_UINT32;
    kv.data.uint32 = pmix_appnum;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);

    rc = PMI2_Info_GetJobAttr("universeSize", buf, 16, &found);
    if( PMI_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc, "PMI_Get_universe_size");
        goto err_exit;
    }

    pmix_usize = atoi(buf);

    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_UNIV_SIZE);
    kv.type = OPAL_UINT32;
    kv.data.uint32 = pmix_usize;
    if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(rc);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);

    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_JOBID);
    kv.type = OPAL_UINT32;
    kv.data.uint32 = pmix_jobid;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);

    /* save the local size */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_LOCAL_SIZE);
    kv.type = OPAL_UINT16;
    kv.data.uint16 = pmix_nlranks;
    if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(rc);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);

    ldr.vpid = pmix_lranks[0];
    ldr.jobid = pmix_pname.jobid;

    /* find ourselves and build up a string for local peer info */
    memset(nmtmp, 0, 64);
    for (i=0; i < pmix_nlranks; i++) {
        ret = snprintf(nmtmp, 64, "%d", pmix_lranks[i]);
        opal_argv_append_nosize(&localranks, nmtmp);
        if (pmix_rank == pmix_lranks[i]) {
            pmix_lrank = i;
            pmix_nrank = i;
        }
    }

    str = opal_argv_join(localranks, ',');
    opal_argv_free(localranks);

    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_LOCAL_PEERS);
    kv.type = OPAL_STRING;
    kv.data.string = str;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);

    /* save the local leader */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_LOCALLDR);
    kv.type = OPAL_UINT64;
    kv.data.uint64 = *(uint64_t*)&ldr;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }

    /* save our local rank */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_LOCAL_RANK);
    kv.type = OPAL_UINT16;
    kv.data.uint16 = pmix_lrank;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }

    /* and our node rank */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_NODE_RANK);
    kv.type = OPAL_UINT16;
    kv.data.uint16 = pmix_nrank;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);

    return OPAL_SUCCESS;
err_exit:
    PMI2_Finalize();
    return ret;
}
Ejemplo n.º 14
0
static int mca_initialize_pmi_v2(void)
{
    int spawned, size, rank, appnum;
    int rc, ret = OPAL_ERROR;

    /* deal with a Slurm bug by first checking if we were
     * even launched by a PMI server before attempting
     * to use PMI */
    if (NULL == getenv("PMI_FD")) {
        return OPAL_ERROR;
    }

    /* if we can't startup PMI, we can't be used */
    if ( PMI2_Initialized () ) {
        return OPAL_SUCCESS;
    }
    size = -1;
    rank = -1;
    appnum = -1;
    if (PMI2_SUCCESS != (rc = PMI2_Init(&spawned, &size, &rank, &appnum))) {
        opal_show_help("help-common-pmi.txt", "pmi2-init-failed", true, rc);
        return OPAL_ERROR;
    }
    if( size < 0 || rank < 0 ){
        opal_output(0, "SIZE %d RANK %d", size, rank);
        opal_show_help("help-common-pmi.txt", "pmi2-init-returned-bad-values", true);
        goto err_exit;
    }


    pmi_size = size;
    pmi_rank = rank;
    pmi_appnum = appnum;

    pmi_vallen_max = PMI2_MAX_VALLEN;
    pmi_kvslen_max = PMI2_MAX_VALLEN; // FIX ME: What to put here for versatility?
    pmi_keylen_max = PMI2_MAX_KEYLEN;


    char buf[16];
    int found;
    
    rc = PMI2_Info_GetJobAttr("universeSize", buf, 16, &found);
    if( PMI2_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc, "PMI_Get_universe_size");
        goto err_exit;
    }
    pmi_usize = atoi(buf);

    pmi_kvs_name = (char*)malloc(pmi_kvslen_max);
    if( pmi_kvs_name == NULL ){
         PMI2_Finalize();
         ret = OPAL_ERR_OUT_OF_RESOURCE;
         goto err_exit;
    }
    rc = PMI2_Job_GetId(pmi_kvs_name, pmi_kvslen_max);
    if( PMI2_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc, "PMI2_Job_GetId");
        goto err_exit;
    }

    return OPAL_SUCCESS;
err_exit:
    PMI2_Finalize();
    return ret;
}
Ejemplo n.º 15
0
/* gasneti_bootstrapInit
 */
int gasneti_bootstrapInit_pmi(
        int *argc_p, char ***argv_p,
        gasnet_node_t *nodes_p, gasnet_node_t *mynode_p) {
    int size, rank;

#if USE_PMI2_API
    int spawned, appnum;
    if (PMI2_SUCCESS != PMI2_Init(&spawned, &size, &rank, &appnum)) {
        return GASNET_ERR_NOT_INIT;
    }
#else
    if (PMI_SUCCESS != PMI_Initialized(&gasneti_pmi_initialized)) {
        return GASNET_ERR_NOT_INIT;
    }

    if (PMI_FALSE == gasneti_pmi_initialized) {
        int spawned;
        if (PMI_SUCCESS != PMI_Init(&spawned)) {
            return GASNET_ERR_NOT_INIT;
        }
    }

    if (PMI_SUCCESS != PMI_Get_rank(&rank)) {
        gasneti_fatalerror("PMI_Get_rank() failed");
    }

    if (PMI_SUCCESS != PMI_Get_size(&size)) {
        gasneti_fatalerror("PMI_Get_size() failed");
    }
#endif

    *mynode_p = rank;
    *nodes_p = size;

#if USE_PMI2_API
    max_name_len = 1024; /* XXX: can almost certainly be shorter than this! */
    max_key_len = PMI2_MAX_KEYLEN;
    max_val_len = PMI2_MAX_VALLEN;
#else
    if (PMI_SUCCESS != PMI_KVS_Get_name_length_max(&max_name_len)) {
        gasneti_fatalerror("PMI_KVS_Get_name_length_max() failed");
    }
    if (PMI_SUCCESS != PMI_KVS_Get_key_length_max(&max_key_len)) {
        gasneti_fatalerror("PMI_KVS_Get_key_length_max() failed");
    }
    if (PMI_SUCCESS != PMI_KVS_Get_value_length_max(&max_val_len)) {
        gasneti_fatalerror("PMI_KVS_Get_value_length_max() failed");
    }
#endif

    kvs_name = (char*) gasneti_malloc(max_name_len);
    kvs_key = (char*) gasneti_malloc(max_key_len);
    kvs_value = (char*) gasneti_malloc(max_val_len);
    max_val_bytes = 4 * (max_val_len / 5);

#if USE_PMI2_API
    if (PMI2_SUCCESS != PMI2_Job_GetId(kvs_name, max_name_len)) {
        gasneti_fatalerror("PMI2_Job_GetId() failed");
    }
#else
    if (PMI_SUCCESS != PMI_KVS_Get_my_name(kvs_name, max_name_len)) {
        gasneti_fatalerror("PMI_KVS_Get_my_name() failed");
    }
#endif

    return GASNET_OK;
}
Ejemplo n.º 16
0
static int s2_init(void)
{
    int spawned, size, rank, appnum;
    int rc, ret = OPAL_ERROR;
    char buf[16];
    int found;
    int my_node;
    char *tmp;
    uint32_t jobfam, stepid;
    int i;

    /* if we can't startup PMI, we can't be used */
    if ( PMI2_Initialized () ) {
        return OPAL_SUCCESS;
    }
    size = -1;
    rank = -1;
    appnum = -1;
    if (PMI2_SUCCESS != (rc = PMI2_Init(&spawned, &size, &rank, &appnum))) {
        opal_show_help("help-pmix-base.txt", "pmix2-init-failed", true, rc);
        return OPAL_ERROR;
    }
    if( size < 0 || rank < 0 ){
        opal_show_help("help-pmix-base.txt", "pmix2-init-returned-bad-values", true);
        goto err_exit;
    }

    s2_jsize = size;
    s2_rank = rank;
    s2_appnum = appnum;

    pmix_vallen_max = PMI2_MAX_VALLEN;
    pmix_kvslen_max = PMI2_MAX_VALLEN; // FIX ME: What to put here for versatility?
    pmix_keylen_max = PMI2_MAX_KEYLEN;
    pmix_vallen_threshold = PMI2_MAX_VALLEN * 3;
    pmix_vallen_threshold >>= 2;

    rc = PMI2_Info_GetJobAttr("universeSize", buf, 16, &found);
    if( PMI2_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc, "PMI_Get_universe_size");
        goto err_exit;
    }
    s2_usize = atoi(buf);

    pmix_kvs_name = (char*)malloc(pmix_kvslen_max);
    if( pmix_kvs_name == NULL ){
        PMI2_Finalize();
        ret = OPAL_ERR_OUT_OF_RESOURCE;
        goto err_exit;
    }
    rc = PMI2_Job_GetId(pmix_kvs_name, pmix_kvslen_max);
    if( PMI2_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc, "PMI2_Job_GetId");
        goto err_exit;
    }

    /* Slurm PMI provides the job id as an integer followed
     * by a '.', followed by essentially a stepid. The first integer
     * defines an overall job number. The second integer is the number of
     * individual jobs we have run within that allocation. So we translate
     * this as the overall job number equating to our job family, and
     * the individual number equating to our local jobid
     */

    jobfam = strtoul(pmix_kvs_name, &tmp, 10);
    if (NULL == tmp) {
        /* hmmm - no '.', so let's just use zero */
        stepid = 0;
    } else {
        tmp++; /* step over the '.' */
        stepid = strtoul(tmp, NULL, 10);
    }
    /* now build the jobid */
    s2_jobid = (jobfam << 16) | stepid;

    /* store our name in the opal_proc_t so that
     * debug messages will make sense - an upper
     * layer will eventually overwrite it, but that
     * won't do any harm */
    s2_pname.jobid = s2_jobid;
    s2_pname.vpid = s2_rank;
    opal_proc_set_name(&s2_pname);
    opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                        "%s pmix:s2: assigned tmp name",
                        OPAL_NAME_PRINT(s2_pname));

    char *pmapping = (char*)malloc(PMI2_MAX_VALLEN);
    if( pmapping == NULL ){
        rc = OPAL_ERR_OUT_OF_RESOURCE;
        OPAL_ERROR_LOG(rc);
        return rc;
    }

    rc = PMI2_Info_GetJobAttr("PMI_process_mapping", pmapping, PMI2_MAX_VALLEN, &found);
    if( !found || PMI2_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc,"PMI2_Info_GetJobAttr");
        return OPAL_ERROR;
    }

    s2_lranks = mca_common_pmi2_parse_pmap(pmapping, s2_pname.vpid, &my_node, &s2_nlranks);
    if (NULL == s2_lranks) {
        rc = OPAL_ERR_OUT_OF_RESOURCE;
        OPAL_ERROR_LOG(rc);
        return rc;
    }

    free(pmapping);

    /* find ourselves */
    for (i=0; i < s2_nlranks; i++) {
        if (s2_rank == s2_lranks[i]) {
            s2_lrank = i;
            s2_nrank = i;
            break;
        }
    }

    /* increment the init count */
    ++pmix_init_count;

    return OPAL_SUCCESS;
 err_exit:
    PMI2_Finalize();
    return ret;
}
Ejemplo n.º 17
0
static int s2_init(void)
{
    int spawned, size, rank, appnum;
    int rc, ret = OPAL_ERROR;
    char buf[16];
    int found;
    int my_node;
    uint32_t stepid;
    int i;
    opal_process_name_t ldr;
    opal_value_t kv;
    char **localranks;
    char *str;
    char nmtmp[64];
    opal_process_name_t wildcard_rank;

    /* if we can't startup PMI, we can't be used */
    if ( PMI2_Initialized () ) {
        return OPAL_SUCCESS;
    }
    size = -1;
    rank = -1;
    appnum = -1;
    // setup hash table so we always can finalize it
    opal_pmix_base_hash_init();

    if (PMI2_SUCCESS != (rc = PMI2_Init(&spawned, &size, &rank, &appnum))) {
        opal_show_help("help-pmix-base.txt", "pmix2-init-failed", true, rc);
        return OPAL_ERROR;
    }
    if( size < 0 || rank < 0 ){
        opal_show_help("help-pmix-base.txt", "pmix2-init-returned-bad-values", true);
        goto err_exit;
    }

    s2_jsize = size;
    s2_rank = rank;
    s2_appnum = appnum;

    pmix_vallen_max = PMI2_MAX_VALLEN;
    pmix_kvslen_max = PMI2_MAX_VALLEN; // FIX ME: What to put here for versatility?
    pmix_keylen_max = PMI2_MAX_KEYLEN;
    pmix_vallen_threshold = PMI2_MAX_VALLEN * 3;
    pmix_vallen_threshold >>= 2;

    pmix_kvs_name = (char*)malloc(pmix_kvslen_max);
    if( pmix_kvs_name == NULL ){
        PMI2_Finalize();
        ret = OPAL_ERR_OUT_OF_RESOURCE;
        goto err_exit;
    }
    rc = PMI2_Job_GetId(pmix_kvs_name, pmix_kvslen_max);
    if( PMI2_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc, "PMI2_Job_GetId");
        free(pmix_kvs_name);
        goto err_exit;
    }

    /* store our name in the opal_proc_t so that
     * debug messages will make sense - an upper
     * layer will eventually overwrite it, but that
     * won't do any harm */
    s2_pname.jobid = strtoul(pmix_kvs_name, &str, 10);
    s2_pname.jobid = (s2_pname.jobid << 16) & 0xffff0000;
    if (NULL != str) {
        stepid = strtoul(str, NULL, 10);
        s2_pname.jobid |= (stepid & 0x0000ffff);
    }
    s2_pname.vpid = s2_rank;
    opal_proc_set_name(&s2_pname);
    opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                        "%s pmix:s2: assigned tmp name",
                        OPAL_NAME_PRINT(s2_pname));

    /* setup wildcard rank*/
    wildcard_rank = OPAL_PROC_MY_NAME;
    wildcard_rank.vpid = OPAL_VPID_WILDCARD;

    /* Slurm PMI provides the job id as an integer followed
     * by a '.', followed by essentially a stepid. The first integer
     * defines an overall job number. The second integer is the number of
     * individual jobs we have run within that allocation.
     */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_JOBID);
    kv.type = OPAL_UINT32;
    kv.data.uint32 = s2_pname.jobid;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&wildcard_rank, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);  // frees pmix_kvs_name

    /* save the job size */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_JOB_SIZE);
    kv.type = OPAL_UINT32;
    kv.data.uint32 = size;
    if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&wildcard_rank, &kv))) {
        OPAL_ERROR_LOG(rc);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);

    /* save the appnum */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_APPNUM);
    kv.type = OPAL_UINT32;
    kv.data.uint32 = appnum;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);

    rc = PMI2_Info_GetJobAttr("universeSize", buf, 16, &found);
    if( PMI2_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc, "PMI_Get_universe_size");
        goto err_exit;
    }
    /* save it */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_UNIV_SIZE);
    kv.type = OPAL_UINT32;
    kv.data.uint32 = atoi(buf);
    if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&wildcard_rank, &kv))) {
        OPAL_ERROR_LOG(rc);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);
    /* push this into the dstore for subsequent fetches */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_MAX_PROCS);
    kv.type = OPAL_UINT32;
    kv.data.uint32 = atoi(buf);
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&wildcard_rank, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);

    char *pmapping = (char*)malloc(PMI2_MAX_VALLEN);
    if( pmapping == NULL ){
        rc = OPAL_ERR_OUT_OF_RESOURCE;
        OPAL_ERROR_LOG(rc);
        return rc;
    }

    rc = PMI2_Info_GetJobAttr("PMI_process_mapping", pmapping, PMI2_MAX_VALLEN, &found);
    if( !found || PMI2_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc,"PMI2_Info_GetJobAttr");
        return OPAL_ERROR;
    }

    s2_lranks = mca_common_pmi2_parse_pmap(pmapping, s2_pname.vpid, &my_node, &s2_nlranks);
    if (NULL == s2_lranks) {
        rc = OPAL_ERR_OUT_OF_RESOURCE;
        OPAL_ERROR_LOG(rc);
        return rc;
    }

    free(pmapping);

    /* save the local size */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_LOCAL_SIZE);
    kv.type = OPAL_UINT32;
    kv.data.uint32 = s2_nlranks;
    if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&wildcard_rank, &kv))) {
        OPAL_ERROR_LOG(rc);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);

    s2_lrank = 0;
    s2_nrank = 0;
    ldr.vpid = rank;
    localranks = NULL;
    if (0 < s2_nlranks && NULL != s2_lranks) {
        /* note the local ldr */
        ldr.vpid = s2_lranks[0];
        /* find ourselves */
        ldr.jobid = s2_pname.jobid;
        ldr.vpid = s2_pname.vpid;
        memset(nmtmp, 0, 64);
        for (i=0; i < s2_nlranks; i++) {
            (void)snprintf(nmtmp, 64, "%d", s2_lranks[i]);
            opal_argv_append_nosize(&localranks, nmtmp);
            if (s2_rank == s2_lranks[i]) {
                s2_lrank = i;
                s2_nrank = i;
            }
        }
        str = opal_argv_join(localranks, ',');
        opal_argv_free(localranks);
        OBJ_CONSTRUCT(&kv, opal_value_t);
        kv.key = strdup(OPAL_PMIX_LOCAL_PEERS);
        kv.type = OPAL_STRING;
        kv.data.string = str;
        if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&wildcard_rank, &kv))) {
            OPAL_ERROR_LOG(ret);
            OBJ_DESTRUCT(&kv);
            goto err_exit;
        }
        OBJ_DESTRUCT(&kv);
    }

    /* save the local leader */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_LOCALLDR);
    kv.type = OPAL_UINT64;
    kv.data.uint64 = *(uint64_t*)&ldr;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);
    /* save our local rank */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_LOCAL_RANK);
    kv.type = OPAL_UINT16;
    kv.data.uint16 = s2_lrank;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);
    /* and our node rank */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_NODE_RANK);
    kv.type = OPAL_UINT16;
    kv.data.uint16 = s2_nrank;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);

    /* increment the init count */
    ++pmix_init_count;

    return OPAL_SUCCESS;
err_exit:
    PMI2_Finalize();
    return ret;
}