Exemple #1
0
/* FIXME: this is not a scalable algorithm because everyone is polling on the same cacheline */
int MPID_nem_barrier(void)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_BARRIER);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_BARRIER);

    if (MPID_nem_mem_region.num_local == 1)
        goto fn_exit;

    MPIR_ERR_CHKINTERNAL(!barrier_init, mpi_errno, "barrier not initialized");

    if (OPA_fetch_and_incr_int(&MPID_nem_mem_region.barrier->val) == MPID_nem_mem_region.num_local - 1)
    {
	OPA_store_int(&MPID_nem_mem_region.barrier->val, 0);
	OPA_store_int(&MPID_nem_mem_region.barrier->wait, 1 - sense);
        OPA_write_barrier();
    }
    else
    {
	/* wait */
	while (OPA_load_int(&MPID_nem_mem_region.barrier->wait) == sense)
            MPL_sched_yield(); /* skip */
    }
    sense = 1 - sense;

 fn_fail:
 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_BARRIER);
    return mpi_errno;
}
/*
 * MPID_Get_universe_size - Get the universe size from the process manager
 *
 * Notes: The ch3 device requires that the PMI routines are used to 
 * communicate with the process manager.  If a channel wishes to 
 * bypass the standard PMI implementations, it is the responsibility of the
 * channel to provide an implementation of the PMI routines.
 */
int MPID_Get_universe_size(int  * universe_size)
{
    int mpi_errno = MPI_SUCCESS;
#ifdef USE_PMI2_API
    char val[PMI2_MAX_VALLEN];
    int found = 0;
    char *endptr;
    
    mpi_errno = PMI2_Info_GetJobAttr("universeSize", val, sizeof(val), &found);
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);

    if (!found)
	*universe_size = MPIR_UNIVERSE_SIZE_NOT_AVAILABLE;
    else {
        *universe_size = strtol(val, &endptr, 0);
        MPIR_ERR_CHKINTERNAL(endptr - val != strlen(val), mpi_errno, "can't parse universe size");
    }

#else
    int pmi_errno = PMI_SUCCESS;

    pmi_errno = PMI_Get_universe_size(universe_size);
    if (pmi_errno != PMI_SUCCESS) {
	MPIR_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, 
			     "**pmi_get_universe_size",
			     "**pmi_get_universe_size %d", pmi_errno);
    }
    if (*universe_size < 0)
    {
	*universe_size = MPIR_UNIVERSE_SIZE_NOT_AVAILABLE;
    }
#endif
    
fn_exit:
    return mpi_errno;

    /* --BEGIN ERROR HANDLING-- */
fn_fail:
    *universe_size = MPIR_UNIVERSE_SIZE_NOT_AVAILABLE;
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}
Exemple #3
0
int MPIDI_CH3_Connect_to_root (const char *port_name, MPIDI_VC_t **new_vc)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_VC_t * vc;
    MPIU_CHKPMEM_DECL(1);
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_CONNECT_TO_ROOT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_CONNECT_TO_ROOT);

    *new_vc = NULL; /* so that the err handling knows to cleanup */

    MPIU_CHKPMEM_MALLOC (vc, MPIDI_VC_t *, sizeof(MPIDI_VC_t), mpi_errno, "vc");
    /* FIXME - where does this vc get freed?
       ANSWER (goodell@) - ch3u_port.c FreeNewVC
                           (but the VC_Destroy is in this file) */

    /* init ch3 portion of vc */
    MPIDI_VC_Init (vc, NULL, 0);

    /* init channel portion of vc */
    MPIR_ERR_CHKINTERNAL(!nemesis_initialized, mpi_errno, "Nemesis not initialized");
    vc->ch.recv_active = NULL;
    MPIDI_CHANGE_VC_STATE(vc, ACTIVE);

    *new_vc = vc; /* we now have a valid, disconnected, temp VC */

    mpi_errno = MPID_nem_connect_to_root (port_name, vc);
    if (mpi_errno) MPIR_ERR_POP (mpi_errno);

    MPIU_CHKPMEM_COMMIT();
fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_CONNECT_TO_ROOT);
    return mpi_errno;
fn_fail:
    /* freeing without giving the lower layer a chance to cleanup can lead to
       leaks on error */
    if (*new_vc)
        MPIDI_CH3_VC_Destroy(*new_vc);
    MPIU_CHKPMEM_REAP();
    goto fn_exit;
}
Exemple #4
0
int MPID_Create_intercomm_from_lpids( MPID_Comm *newcomm_ptr,
			    int size, const int lpids[] )
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Comm *commworld_ptr;
    int i;
    MPIDI_PG_iterator iter;

    commworld_ptr = MPIR_Process.comm_world;
    /* Setup the communicator's vc table: remote group */
    MPIDI_VCRT_Create( size, &newcomm_ptr->dev.vcrt );
    for (i=0; i<size; i++) {
	MPIDI_VC_t *vc = 0;

	/* For rank i in the new communicator, find the corresponding
	   virtual connection.  For lpids less than the size of comm_world,
	   we can just take the corresponding entry from comm_world.
	   Otherwise, we need to search through the process groups.
	*/
	/* printf( "[%d] Remote rank %d has lpid %d\n", 
	   MPIR_Process.comm_world->rank, i, lpids[i] ); */
	if (lpids[i] < commworld_ptr->remote_size) {
	    vc = commworld_ptr->dev.vcrt->vcr_table[lpids[i]];
	}
	else {
	    /* We must find the corresponding vcr for a given lpid */	
	    /* For now, this means iterating through the process groups */
	    MPIDI_PG_t *pg = 0;
	    int j;

	    MPIDI_PG_Get_iterator(&iter);
	    /* Skip comm_world */
	    MPIDI_PG_Get_next( &iter, &pg );
	    do {
		MPIDI_PG_Get_next( &iter, &pg );
                MPIR_ERR_CHKINTERNAL(!pg, mpi_errno, "no pg");
		/* FIXME: a quick check on the min/max values of the lpid
		   for this process group could help speed this search */
		for (j=0; j<pg->size; j++) {
		    /*printf( "Checking lpid %d against %d in pg %s\n",
			    lpids[i], pg->vct[j].lpid, (char *)pg->id );
			    fflush(stdout); */
		    if (pg->vct[j].lpid == lpids[i]) {
			vc = &pg->vct[j];
			/*printf( "found vc %x for lpid = %d in another pg\n", 
			  (int)vc, lpids[i] );*/
			break;
		    }
		}
	    } while (!vc);
	}

	/* printf( "about to dup vc %x for lpid = %d in another pg\n", 
	   (int)vc, lpids[i] ); */
	/* Note that his will increment the ref count for the associate
	   PG if necessary.  */
	MPIDI_VCR_Dup( vc, &newcomm_ptr->dev.vcrt->vcr_table[i] );
    }
fn_exit:
    return mpi_errno;
fn_fail:
    goto fn_exit;
}
Exemple #5
0
int MPIDI_CH3U_Get_failed_group(int last_rank, MPIR_Group **failed_group)
{
    char *c;
    int i, mpi_errno = MPI_SUCCESS, rank;
    UT_array *failed_procs = NULL;
    MPIR_Group *world_group;
    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_GET_FAILED_GROUP);

    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_GET_FAILED_GROUP);

    MPL_DBG_MSG_D(MPIDI_CH3_DBG_OTHER, VERBOSE, "Getting failed group with %d as last acknowledged\n", last_rank);

    if (-1 == last_rank) {
        MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER, VERBOSE, "No failure acknowledged");
        *failed_group = MPIR_Group_empty;
        goto fn_exit;
    }

    if (*MPIDI_failed_procs_string == '\0') {
        MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER, VERBOSE, "Found no failed ranks");
        *failed_group = MPIR_Group_empty;
        goto fn_exit;
    }

    utarray_new(failed_procs, &ut_int_icd);

    /* parse list of failed processes.  This is a comma separated list
       of ranks or ranges of ranks (e.g., "1, 3-5, 11") */
    i = 0;
    c = MPIDI_failed_procs_string;
    while(1) {
        parse_rank(&rank);
        ++i;
        MPL_DBG_MSG_D(MPIDI_CH3_DBG_OTHER, VERBOSE, "Found failed rank: %d", rank);
        utarray_push_back(failed_procs, &rank);
        MPIDI_last_known_failed = rank;
        MPIR_ERR_CHKINTERNAL(*c != ',' && *c != '\0', mpi_errno, "error parsing failed process list");
        if (*c == '\0' || last_rank == rank)
            break;
        ++c; /* skip ',' */
    }

    /* Create group of failed processes for comm_world.  Failed groups for other
       communicators can be created from this one using group_intersection. */
    mpi_errno = MPIR_Comm_group_impl(MPIR_Process.comm_world, &world_group);
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);

    mpi_errno = MPIR_Group_incl_impl(world_group, i, ut_int_array(failed_procs), failed_group);
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);

    mpi_errno = MPIR_Group_release(world_group);
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);

fn_exit:
    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_GET_FAILED_GROUP);
    if (failed_procs)
        utarray_free(failed_procs);
    return mpi_errno;
fn_oom:
    MPIR_ERR_SET1(mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "utarray");
fn_fail:
    goto fn_exit;
}
Exemple #6
0
/* Returns MPI_SUCCESS on success, an MPI error code on failure.  Code above
 * needs to call MPIO_Err_return_xxx.
 */
static int MPII_Type_cyclic(int *array_of_gsizes, int dim, int ndims, int nprocs,
                            int rank, int darg, int order, MPI_Aint orig_extent,
                            MPI_Datatype type_old, MPI_Datatype * type_new, MPI_Aint * st_offset)
{
/* nprocs = no. of processes in dimension dim of grid
   rank = coordinate of this process in dimension dim */
    int mpi_errno = MPI_SUCCESS;
    int blksize, i, blklens[3], st_index, end_index, local_size, rem, count;
    MPI_Aint stride, disps[3];
    MPI_Datatype type_tmp, types[3];

    if (darg == MPI_DISTRIBUTE_DFLT_DARG)
        blksize = 1;
    else
        blksize = darg;

    MPIR_ERR_CHKINTERNAL(blksize <= 0, mpi_errno, "blksize must be > 0");

    st_index = rank * blksize;
    end_index = array_of_gsizes[dim] - 1;

    if (end_index < st_index)
        local_size = 0;
    else {
        local_size = ((end_index - st_index + 1) / (nprocs * blksize)) * blksize;
        rem = (end_index - st_index + 1) % (nprocs * blksize);
        local_size += (rem < blksize) ? rem : blksize;
    }

    count = local_size / blksize;
    rem = local_size % blksize;

    stride = ((MPI_Aint) nprocs) * ((MPI_Aint) blksize) * orig_extent;
    if (order == MPI_ORDER_FORTRAN)
        for (i = 0; i < dim; i++)
            stride *= (MPI_Aint) (array_of_gsizes[i]);
    else
        for (i = ndims - 1; i > dim; i--)
            stride *= (MPI_Aint) (array_of_gsizes[i]);

    mpi_errno = MPIR_Type_hvector_impl(count, blksize, stride, type_old, type_new);
    if (mpi_errno)
        MPIR_ERR_POP(mpi_errno);

    if (rem) {
        /* if the last block is of size less than blksize, include
         * it separately using MPI_Type_struct */

        types[0] = *type_new;
        types[1] = type_old;
        disps[0] = 0;
        disps[1] = ((MPI_Aint) count) * stride;
        blklens[0] = 1;
        blklens[1] = rem;

        mpi_errno = MPIR_Type_struct_impl(2, blklens, disps, types, &type_tmp);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);

        MPIR_Type_free_impl(type_new);
        *type_new = type_tmp;
    }

    /* In the first iteration, we need to set the displacement in that
     * dimension correctly. */
    if (((order == MPI_ORDER_FORTRAN) && (dim == 0)) ||
        ((order == MPI_ORDER_C) && (dim == ndims - 1))) {
        types[0] = MPI_LB;
        disps[0] = 0;
        types[1] = *type_new;
        disps[1] = ((MPI_Aint) rank) * ((MPI_Aint) blksize) * orig_extent;
        types[2] = MPI_UB;
        disps[2] = orig_extent * ((MPI_Aint) (array_of_gsizes[dim]));
        blklens[0] = blklens[1] = blklens[2] = 1;

        mpi_errno = MPIR_Type_struct_impl(3, blklens, disps, types, &type_tmp);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);

        MPIR_Type_free_impl(type_new);
        *type_new = type_tmp;

        *st_offset = 0; /* set it to 0 because it is taken care of in
                         * the struct above */
    } else {
        *st_offset = ((MPI_Aint) rank) * ((MPI_Aint) blksize);
        /* st_offset is in terms of no. of elements of type oldtype in
         * this dimension */
    }

    if (local_size == 0)
        *st_offset = 0;

  fn_exit:
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}
Exemple #7
0
static int MPII_Type_block(int *array_of_gsizes, int dim, int ndims, int nprocs,
                           int rank, int darg, int order, MPI_Aint orig_extent,
                           MPI_Datatype type_old, MPI_Datatype * type_new, MPI_Aint * st_offset)
{
/* nprocs = no. of processes in dimension dim of grid
   rank = coordinate of this process in dimension dim */
    int mpi_errno = MPI_SUCCESS;
    int blksize, global_size, mysize, i, j;
    MPI_Aint stride;

    global_size = array_of_gsizes[dim];

    if (darg == MPI_DISTRIBUTE_DFLT_DARG)
        blksize = (global_size + nprocs - 1) / nprocs;
    else {
        blksize = darg;

        MPIR_ERR_CHKINTERNAL(blksize <= 0, mpi_errno, "blksize must be > 0");
        MPIR_ERR_CHKINTERNAL(blksize * nprocs < global_size, mpi_errno,
                             "blksize * nprocs must be >= global size");
    }

    j = global_size - blksize * rank;
    mysize = (blksize < j) ? blksize : j;
    if (mysize < 0)
        mysize = 0;

    stride = orig_extent;
    if (order == MPI_ORDER_FORTRAN) {
        if (dim == 0) {
            mpi_errno = MPIR_Type_contiguous_impl(mysize, type_old, type_new);
            if (mpi_errno)
                MPIR_ERR_POP(mpi_errno);
        } else {
            for (i = 0; i < dim; i++)
                stride *= (MPI_Aint) (array_of_gsizes[i]);
            mpi_errno = MPIR_Type_hvector_impl(mysize, 1, stride, type_old, type_new);
            if (mpi_errno)
                MPIR_ERR_POP(mpi_errno);
        }
    } else {
        if (dim == ndims - 1) {
            mpi_errno = MPIR_Type_contiguous_impl(mysize, type_old, type_new);
            if (mpi_errno)
                MPIR_ERR_POP(mpi_errno);
        } else {
            for (i = ndims - 1; i > dim; i--)
                stride *= (MPI_Aint) (array_of_gsizes[i]);
            mpi_errno = MPIR_Type_hvector_impl(mysize, 1, stride, type_old, type_new);
            if (mpi_errno)
                MPIR_ERR_POP(mpi_errno);
        }
    }

    *st_offset = blksize * rank;
    /* in terms of no. of elements of type oldtype in this dimension */
    if (mysize == 0)
        *st_offset = 0;

  fn_exit:
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}