Пример #1
0
int MPID_Comm_spawn_multiple(int count, char *commands[], char **argvs[], const int maxprocs[],
                             MPIR_Info * info_ptrs[], int root, MPIR_Comm * comm_ptr,
                             MPIR_Comm ** intercomm, int errcodes[])
{
    char port_name[MPI_MAX_PORT_NAME];
    int *info_keyval_sizes = 0, i, mpi_errno = MPI_SUCCESS;
    PMI_keyval_t **info_keyval_vectors = 0, preput_keyval_vector;
    int *pmi_errcodes = 0, pmi_errno = 0;
    int total_num_processes, should_accept = 1;

    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_COMM_SPAWN_MULTIPLE);
    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_COMM_SPAWN_MULTIPLE);

    memset(port_name, 0, sizeof(port_name));

    if (comm_ptr->rank == root) {
        total_num_processes = 0;

        for (i = 0; i < count; i++)
            total_num_processes += maxprocs[i];

        pmi_errcodes = (int *) MPL_malloc(sizeof(int) * total_num_processes, MPL_MEM_BUFFER);
        MPIR_ERR_CHKANDJUMP(!pmi_errcodes, mpi_errno, MPI_ERR_OTHER, "**nomem");

        for (i = 0; i < total_num_processes; i++)
            pmi_errcodes[i] = 0;

        mpi_errno = MPID_Open_port(NULL, port_name);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);

        info_keyval_sizes = (int *) MPL_malloc(count * sizeof(int), MPL_MEM_BUFFER);
        MPIR_ERR_CHKANDJUMP(!info_keyval_sizes, mpi_errno, MPI_ERR_OTHER, "**nomem");
        info_keyval_vectors =
            (PMI_keyval_t **) MPL_malloc(count * sizeof(PMI_keyval_t *), MPL_MEM_BUFFER);
        MPIR_ERR_CHKANDJUMP(!info_keyval_vectors, mpi_errno, MPI_ERR_OTHER, "**nomem");

        if (!info_ptrs)
            for (i = 0; i < count; i++) {
                info_keyval_vectors[i] = 0;
                info_keyval_sizes[i] = 0;
        } else
            for (i = 0; i < count; i++) {
                mpi_errno = mpi_to_pmi_keyvals(info_ptrs[i], &info_keyval_vectors[i],
                                               &info_keyval_sizes[i]);
                if (mpi_errno)
                    MPIR_ERR_POP(mpi_errno);
            }

        preput_keyval_vector.key = MPIDI_PARENT_PORT_KVSKEY;
        preput_keyval_vector.val = port_name;
        pmi_errno = PMI_Spawn_multiple(count, (const char **)
                                       commands,
                                       (const char ***) argvs,
                                       maxprocs, info_keyval_sizes, (const PMI_keyval_t **)
                                       info_keyval_vectors, 1, &preput_keyval_vector, pmi_errcodes);

        if (pmi_errno != PMI_SUCCESS)
            MPIR_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER,
                                 "**pmi_spawn_multiple", "**pmi_spawn_multiple %d", pmi_errno);

        if (errcodes != MPI_ERRCODES_IGNORE) {
            for (i = 0; i < total_num_processes; i++) {
                errcodes[i] = pmi_errcodes[0];
                should_accept = should_accept && errcodes[i];
            }

            should_accept = !should_accept;
        }
    }

    if (errcodes != MPI_ERRCODES_IGNORE) {
        MPIR_Errflag_t errflag = MPIR_ERR_NONE;
        mpi_errno = MPIR_Bcast(&should_accept, 1, MPI_INT, root, comm_ptr, &errflag);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);

        mpi_errno = MPIR_Bcast(&pmi_errno, 1, MPI_INT, root, comm_ptr, &errflag);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);

        mpi_errno = MPIR_Bcast(&total_num_processes, 1, MPI_INT, root, comm_ptr, &errflag);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);

        mpi_errno = MPIR_Bcast(errcodes, total_num_processes, MPI_INT, root, comm_ptr, &errflag);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);
    }

    if (should_accept) {
        mpi_errno = MPID_Comm_accept(port_name, NULL, root, comm_ptr, intercomm);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);
    } else {
        if ((pmi_errno == PMI_SUCCESS) && (errcodes[0] != 0)) {
            mpi_errno = MPIR_Comm_create(intercomm);
            if (mpi_errno)
                MPIR_ERR_POP(mpi_errno);
        }
    }

    if (comm_ptr->rank == root) {
        mpi_errno = MPID_Close_port(port_name);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);
    }

  fn_exit:

    if (info_keyval_vectors) {
        free_pmi_keyvals(info_keyval_vectors, count, info_keyval_sizes);
        MPL_free(info_keyval_vectors);
    }

    MPL_free(info_keyval_sizes);
    MPL_free(pmi_errcodes);

    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_COMM_SPAWN_MULTIPLE);
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}
int MPIDI_Comm_spawn_multiple(int count, char **commands,
                                  char ***argvs, const int *maxprocs,
                                  MPIR_Info **info_ptrs, int root,
                                  MPIR_Comm *comm_ptr, MPIR_Comm
                                  **intercomm, int *errcodes) 
{
    char port_name[MPI_MAX_PORT_NAME];
    int *info_keyval_sizes=0, i, mpi_errno=MPI_SUCCESS;
    PMI_keyval_t **info_keyval_vectors=0, preput_keyval_vector;
    int *pmi_errcodes = 0, pmi_errno;
    int total_num_processes, should_accept = 1;
    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_COMM_SPAWN_MULTIPLE);

    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_COMM_SPAWN_MULTIPLE);


    if (comm_ptr->rank == root) {
	/* create an array for the pmi error codes */
	total_num_processes = 0;
	for (i=0; i<count; i++) {
	    total_num_processes += maxprocs[i];
	}
	pmi_errcodes = (int*)MPL_malloc(sizeof(int) * total_num_processes);
	if (pmi_errcodes == NULL) {
	    MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomem");
	}

	/* initialize them to 0 */
	for (i=0; i<total_num_processes; i++)
	    pmi_errcodes[i] = 0;

	/* Open a port for the spawned processes to connect to */
	/* FIXME: info may be needed for port name */
        mpi_errno = MPID_Open_port(NULL, port_name);
	/* --BEGIN ERROR HANDLING-- */
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);
	/* --END ERROR HANDLING-- */

	/* Spawn the processes */
#ifdef USE_PMI2_API
        MPIR_Assert(count > 0);
        {
            int *argcs = MPL_malloc(count*sizeof(int));
            struct MPIR_Info preput;
            struct MPIR_Info *preput_p[1] = { &preput };

            MPIR_Assert(argcs);
            /*
            info_keyval_sizes = MPL_malloc(count * sizeof(int));
            */

            /* FIXME cheating on constness */
            preput.key = (char *)PARENT_PORT_KVSKEY;
            preput.value = port_name;
            preput.next = NULL;

            /* compute argcs array */
            for (i = 0; i < count; ++i) {
                argcs[i] = 0;
                if (argvs != NULL && argvs[i] != NULL) {
                    while (argvs[i][argcs[i]]) {
                        ++argcs[i];
                    }
                }

                /* a fib for now */
                /*
                info_keyval_sizes[i] = 0;
                */
            }
            /* XXX DJG don't need this, PMI API is thread-safe? */
            /*MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_PMI_MUTEX);*/
            /* release the global CS for spawn PMI calls */
            MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
            pmi_errno = PMI2_Job_Spawn(count, (const char **)commands,
                                       argcs, (const char ***)argvs,
                                       maxprocs,
                                       info_keyval_sizes, (const MPIR_Info **)info_ptrs,
                                       1, (const struct MPIR_Info **)preput_p,
                                       NULL, 0,
                                       /*jobId, jobIdSize,*/ /* XXX DJG job stuff? */
                                       pmi_errcodes);
            MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
            /*MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_PMI_MUTEX);*/
            MPL_free(argcs);
            if (pmi_errno != PMI2_SUCCESS) {
                MPIR_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER,
                     "**pmi_spawn_multiple", "**pmi_spawn_multiple %d", pmi_errno);
            }
        }
#else
        /* FIXME: This is *really* awkward.  We should either
           Fix on MPI-style info data structures for PMI (avoid unnecessary
           duplication) or add an MPIU_Info_getall(...) that creates
           the necessary arrays of key/value pairs */

        /* convert the infos into PMI keyvals */
        info_keyval_sizes   = (int *) MPL_malloc(count * sizeof(int));
        info_keyval_vectors = 
            (PMI_keyval_t**) MPL_malloc(count * sizeof(PMI_keyval_t*));
        if (!info_keyval_sizes || !info_keyval_vectors) { 
            MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomem");
        }

        if (!info_ptrs) {
            for (i=0; i<count; i++) {
                info_keyval_vectors[i] = 0;
                info_keyval_sizes[i]   = 0;
            }
        }
        else {
            for (i=0; i<count; i++) {
                mpi_errno = mpi_to_pmi_keyvals( info_ptrs[i], 
                                                &info_keyval_vectors[i],
                                                &info_keyval_sizes[i] );
                if (mpi_errno) { MPIR_ERR_POP(mpi_errno); }
            }
        }

        preput_keyval_vector.key = PARENT_PORT_KVSKEY;
        preput_keyval_vector.val = port_name;


        MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_PMI_MUTEX);
        pmi_errno = PMI_Spawn_multiple(count, (const char **)
                                       commands, 
                                       (const char ***) argvs,
                                       maxprocs, info_keyval_sizes,
                                       (const PMI_keyval_t **)
                                       info_keyval_vectors, 1, 
                                       &preput_keyval_vector,
                                       pmi_errcodes);
	MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_PMI_MUTEX);
        if (pmi_errno != PMI_SUCCESS) {
	    MPIR_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER,
		 "**pmi_spawn_multiple", "**pmi_spawn_multiple %d", pmi_errno);
        }
#endif

	if (errcodes != MPI_ERRCODES_IGNORE) {
	    for (i=0; i<total_num_processes; i++) {
		/* FIXME: translate the pmi error codes here */
		errcodes[i] = pmi_errcodes[i];
                /* We want to accept if any of the spawns succeeded.
                   Alternatively, this is the same as we want to NOT accept if
                   all of them failed.  should_accept = NAND(e_0, ..., e_n)
                   Remember, success equals false (0). */
                should_accept = should_accept && errcodes[i];
	    }
            should_accept = !should_accept; /* the `N' in NAND */
	}
    }

    if (errcodes != MPI_ERRCODES_IGNORE) {
        MPIR_Errflag_t errflag = MPIR_ERR_NONE;
        mpi_errno = MPIR_Bcast_impl(&should_accept, 1, MPI_INT, root, comm_ptr, &errflag);
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);

        mpi_errno = MPIR_Bcast_impl(&total_num_processes, 1, MPI_INT, root, comm_ptr, &errflag);
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);
        
        mpi_errno = MPIR_Bcast_impl(errcodes, total_num_processes, MPI_INT, root, comm_ptr, &errflag);
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);

        MPIR_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
    }

    if (should_accept) {
        mpi_errno = MPID_Comm_accept(port_name, NULL, root, comm_ptr, intercomm); 
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);
    }
    else {
        MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**pmi_spawn_multiple");
    }

    if (comm_ptr->rank == root) {
	/* Close the port opened for the spawned processes to connect to */
	mpi_errno = MPID_Close_port(port_name);
	/* --BEGIN ERROR HANDLING-- */
	if (mpi_errno != MPI_SUCCESS)
	{
	    MPIR_ERR_POP(mpi_errno);
	}
	/* --END ERROR HANDLING-- */
    }

 fn_exit:
    if (info_keyval_vectors) {
	free_pmi_keyvals(info_keyval_vectors, count, info_keyval_sizes);
	MPL_free(info_keyval_sizes);
	MPL_free(info_keyval_vectors);
    }
    if (pmi_errcodes) {
	MPL_free(pmi_errcodes);
    }
    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_COMM_SPAWN_MULTIPLE);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}