Beispiel #1
0
int tMPI_Cart_get(tMPI_Comm comm, int maxdims, int *dims, int *periods,
                  int *coords)
{
    int i;
    int myrank=tMPI_Comm_seek_rank(comm, tMPI_Get_current());

#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Cart_get(%p, %d, %p, %p, %p)", comm, maxdims, 
                       dims, periods, coords);
#endif
    if (!comm)
    {
        return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_COMM);
    }
    if (!comm->cart || comm->cart->ndims==0)
        return TMPI_SUCCESS;

    tMPI_Cart_coords(comm, myrank, maxdims, coords);

    for(i=0;i<comm->cart->ndims;i++)
    {
        if (i>=maxdims)
        {
            return tMPI_Error(comm, TMPI_ERR_DIMS);
        }
        dims[i]=comm->cart->dims[i];
        periods[i]=comm->cart->periods[i];
    }

    return TMPI_SUCCESS;
}
Beispiel #2
0
int tMPI_Barrier(tMPI_Comm comm) 
{
#ifdef TMPI_PROFILE
    struct tmpi_thread *cur=tMPI_Get_current();
    tMPI_Profile_count_start(cur);
#endif

#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Barrier(%p, %d, %p, %d, %d, %p, %p)", comm);
#endif

    if (!comm)
    {
        return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_COMM);
    }

    if (comm->grp.N>1)
    {
#if defined(TMPI_PROFILE) 
        tMPI_Profile_wait_start(cur);
#endif

        tMPI_Barrier_wait( &(comm->barrier) );
#if defined(TMPI_PROFILE) 
        tMPI_Profile_wait_stop(cur, TMPIWAIT_Barrier);
#endif
    }

#ifdef TMPI_PROFILE
    tMPI_Profile_count_stop(cur, TMPIFN_Barrier);
#endif
    return TMPI_SUCCESS;
}
Beispiel #3
0
/* TODO: there must be better ways to do this */
double tMPI_Wtime(void)
{
    double ret=0;

#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Wtime()");
#endif

#if ! (defined( _WIN32 ) || defined( _WIN64 ) )
    {
        struct timeval tv;
        long int secdiff;
        int usecdiff;

        gettimeofday(&tv, NULL);
        secdiff = tv.tv_sec - tmpi_global->timer_init.tv_sec;
        usecdiff = tv.tv_usec - tmpi_global->timer_init.tv_usec;

        ret=(double)secdiff + 1e-6*usecdiff;
    }
#else
    {
        DWORD tv=GetTickCount();
   
        /* the windows absolute time GetTickCount() wraps around in ~49 days,
           so it's safer to always use differences, and assume that our
           program doesn't run that long.. */
        ret=1e-3*((unsigned int)(tv - tmpi_global->timer_init));
    }
#endif
    return ret;
}
Beispiel #4
0
static void* tMPI_Thread_starter(void *arg)
{
    struct tmpi_thread *th = (struct tmpi_thread*)arg;

#ifdef TMPI_TRACE
    tMPI_Trace_print("Created thread nr. %d", (int)(th-threads));
#endif

    tMPI_Thread_init(th);

    /* start_fn, start_arg, argc and argv were set by the calling function */
    if (!th->start_fn)
    {
        th->start_fn_main(th->argc, th->argv);
    }
    else
    {
        th->start_fn(th->start_arg);
        if (!tmpi_finalized)
        {
            tMPI_Finalize();
        }
    }

    return 0;
}
Beispiel #5
0
int tMPI_Comm_rank(tMPI_Comm comm, int *rank)
{
#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Comm_rank(%p, %p)", comm, rank);
#endif
    return tMPI_Group_rank(&(comm->grp), rank);
}
Beispiel #6
0
int tMPI_Comm_size(tMPI_Comm comm, int *size)
{
#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Comm_size(%p, %p)", comm, size);
#endif
    return tMPI_Group_size(&(comm->grp), size);
}
Beispiel #7
0
int tMPI_Cart_coords(tMPI_Comm comm, int rank, int maxdims, int *coords)
{
    int i;
    int rank_left=rank;

#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Cart_coords(%p, %d, %d, %p)", comm, rank, maxdims, 
                     coords);
#endif
    if (!comm)
    {
        return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_COMM);
    }
    if (!comm->cart || comm->cart->ndims==0)
        return TMPI_SUCCESS;
    if (maxdims < comm->cart->ndims)
    {
        return tMPI_Error(comm, TMPI_ERR_DIMS);
    }

    /* again, row-major ordering */
    for(i=comm->cart->ndims-1;i>=0;i--)
    {
        coords[i]=rank_left%comm->cart->dims[i];
        rank_left /= comm->cart->dims[i];
    }   

    return TMPI_SUCCESS;
}
Beispiel #8
0
int tMPI_Finalized(int *flag)
{
#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Finalized(%p)", flag);
#endif
    *flag=tmpi_finalized;

    return TMPI_SUCCESS;
}
Beispiel #9
0
int tMPI_Errhandler_free(tMPI_Errhandler *errhandler)
{
#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Errhandler_free(%p)", errhandler);
#endif

    free(*errhandler);
    return TMPI_SUCCESS;
}
Beispiel #10
0
int tMPI_Comm_get_errhandler(tMPI_Comm comm, tMPI_Errhandler *errhandler)
{
#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Comm_get_errhandler(%p, %p)", comm, errhandler);
#endif

    *errhandler=comm->erh;
    return TMPI_SUCCESS;
}
Beispiel #11
0
int tMPI_Cart_sub(tMPI_Comm comm, int *remain_dims, tMPI_Comm *newcomm)
{
    int myrank;
    int ndims=0;
    int *dims=NULL;
    int *periods=NULL;
    int *oldcoords=NULL;
    int i;
    int ndims_notused=1;
    int color_notused=0;

#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Cart_sub(%p, %p, %p)", comm, remain_dims, newcomm);
#endif
    tMPI_Comm_rank(comm, &myrank);
    if ( comm->cart )
    {
        oldcoords=(int*)tMPI_Malloc(sizeof(int)*comm->cart->ndims);
        dims=(int*)tMPI_Malloc(sizeof(int)*comm->cart->ndims);
        periods=(int*)tMPI_Malloc(sizeof(int)*comm->cart->ndims);

        /* get old coordinates */
        tMPI_Cart_coords(comm, myrank, comm->cart->ndims, oldcoords);

        for(i=0;i<comm->cart->ndims;i++)
        {
            if (remain_dims[i])
            {
                /* for the remaining dimensions, copy dimensionality data */
                dims[ndims]=comm->cart->dims[i];
                periods[ndims]=comm->cart->periods[i];
                ndims++;
            }
            else
            {
                /* base color on not used coordinates. We keep a 
                   ndims_notused index multiplier.*/
                color_notused += oldcoords[i]*ndims_notused;
                ndims_notused *= comm->cart->dims[i];
            }
        }
    }

    /* key=myrank, because we want the order to remain the same */
    tMPI_Comm_split(comm, color_notused, myrank, newcomm);
    tMPI_Cart_init(newcomm, ndims, dims, periods);

    if (oldcoords)
        free(oldcoords);
    if (dims)
        free(dims);
    if (periods)
        free(periods);

    return TMPI_SUCCESS;
}
Beispiel #12
0
int tMPI_Initialized(int *flag)
{
#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Initialized(%p)", flag);
#endif

    *flag=(TMPI_COMM_WORLD && !tmpi_finalized);

    return TMPI_SUCCESS;
}
Beispiel #13
0
int tMPI_Comm_dup(tMPI_Comm comm, tMPI_Comm *newcomm)
{
#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Comm_dup(%p, %p)", comm, newcomm);
#endif
    /* we just call Comm_split because it already contains all the
       neccesary synchronization constructs. */
    return tMPI_Comm_split(comm, 0, tMPI_Comm_seek_rank(comm,
                                                        tMPI_Get_current()), newcomm);
}
Beispiel #14
0
/* communicator query&manipulation functions */
int tMPI_Comm_N(tMPI_Comm comm)
{
#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Comm_N(%p)", comm);
#endif
    if (!comm)
    {
        return 0;
    }
    return comm->grp.N;
}
Beispiel #15
0
int tMPI_Cart_create(tMPI_Comm comm_old, int ndims, int *dims, int *periods,
                     int reorder, tMPI_Comm *comm_cart)
{
    int myrank = tMPI_Comm_seek_rank(comm_old, tMPI_Get_current());
    int key    = myrank;
    int color  = 0;
    int Ntot   = 1;
    int i;


#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Cart_create(%p, %d, %p, %p, %d, %p)", comm_old,
                     ndims, dims, periods, reorder, comm_cart);
#endif
    if (!comm_old)
    {
        return tMPI_Error(comm_old, TMPI_ERR_COMM);
    }
    /* calculate the total number of procs in cartesian comm */
    for (i = 0; i < ndims; i++)
    {
        Ntot *= dims[i];
    }
    /* refuse to create if there's not enough procs */
    if (comm_old->grp.N < Ntot)
    {
        *comm_cart = TMPI_COMM_NULL;
#if 1
        return tMPI_Error(comm_old, TMPI_ERR_CART_CREATE_NPROCS);
#endif
    }

    if (key >= Ntot)
    {
        key = TMPI_UNDEFINED;
    }

    if (reorder)
    {
        tMPI_Cart_map(comm_old, ndims, dims, periods, &key);
    }

    if (key == TMPI_UNDEFINED)
    {
        color = TMPI_UNDEFINED;
    }

    tMPI_Comm_split(comm_old, color, key, comm_cart);

    tMPI_Cart_init(comm_cart, ndims, dims, periods);

    return TMPI_SUCCESS;
}
Beispiel #16
0
int tMPI_Get_processor_name(char *name, int *resultlen)
{
    int nr=tMPI_Threadnr(tMPI_Get_current());
    unsigned int digits=0;
    const unsigned int base=10;

#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Get_processor_name(%p, %p)", name, resultlen);
#endif
    /* we don't want to call sprintf here (it turns out to be not entirely
       thread-safe on Mac OS X, for example), so we do it our own way: */

    /* first determine number of digits */
    {
        int rest=nr;
        while(rest > 0)
        {
            rest /= base;
            digits++;
        }
        if (digits==0)
            digits=1;
    }
#if ! (defined( _WIN32 ) || defined( _WIN64 ) )
    strcpy(name, "thread #");
#else
    strncpy_s(name, TMPI_MAX_PROCESSOR_NAME, "thread #", TMPI_MAX_PROCESSOR_NAME);
#endif
    /* now construct the number */
    {
        size_t len=strlen(name);
        unsigned int i;
        int rest=nr;

        for(i=0;i<digits;i++)
        {
            size_t pos=len + (digits-i-1);
            if (pos < (TMPI_MAX_PROCESSOR_NAME -1) )
                name[ pos ]=(char)('0' + rest%base);
            rest /= base;
        }
        if ( (digits+len) < TMPI_MAX_PROCESSOR_NAME)
            name[digits + len]='\0';
        else
            name[TMPI_MAX_PROCESSOR_NAME]='\0';

    }
    if (resultlen)
        *resultlen=(int)strlen(name); /* For some reason the MPI standard
                                         uses ints instead of size_ts for
                                         sizes. */
    return TMPI_SUCCESS;
}
Beispiel #17
0
int tMPI_Get_count(tMPI_Status *status, tMPI_Datatype datatype, int *count)
{
#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Get_count(%p, %p, %p)", status, datatype, count);
#endif
    if (!status)
    {
        return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_STATUS);
    }
    *count = (int)(status->transferred/datatype->size);
    return TMPI_SUCCESS;
}
Beispiel #18
0
int tMPI_Comm_compare(tMPI_Comm comm1, tMPI_Comm comm2, int *result)
{
    int i, j;
#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Comm_compare(%p, %p, %p)", comm1, comm2, result);
#endif
    if (comm1 == comm2)
    {
        *result = TMPI_IDENT;
        return TMPI_SUCCESS;
    }

    if ( (!comm1) || (!comm2) )
    {
        *result = TMPI_UNEQUAL;
        return TMPI_SUCCESS;
    }

    if (comm1->grp.N != comm2->grp.N)
    {
        *result = TMPI_UNEQUAL;
        return TMPI_SUCCESS;
    }

    *result = TMPI_CONGRUENT;
    /* we assume that there are two identical comm members within a comm */
    for (i = 0; i < comm1->grp.N; i++)
    {
        if (comm1->grp.peers[i] != comm2->grp.peers[i])
        {
            tmpi_bool found = FALSE;

            *result = TMPI_SIMILAR;
            for (j = 0; j < comm2->grp.N; j++)
            {
                if (comm1->grp.peers[i] == comm2->grp.peers[j])
                {
                    found = TRUE;
                    break;
                }
            }
            if (!found)
            {
                *result = TMPI_UNEQUAL;
                return TMPI_SUCCESS;
            }
        }
    }
    return TMPI_SUCCESS;
}
Beispiel #19
0
int tMPI_Comm_create(tMPI_Comm comm, tMPI_Group group, tMPI_Comm *newcomm)
{
    int color = TMPI_UNDEFINED;
    int key   = tMPI_Comm_seek_rank(comm, tMPI_Get_current());

#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Comm_create(%p, %p, %p)", comm, group, newcomm);
#endif
    if (tMPI_In_group(group))
    {
        color = 1;
    }
    /* the MPI specs specifically say that this is equivalent */
    return tMPI_Comm_split(comm, color, key, newcomm);
}
Beispiel #20
0
int tMPI_Abort(tMPI_Comm comm, int errorcode)
{
#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Abort(%p, %d)", comm, errorcode);
#endif
#if 0
    /* we abort(). This way we can run a debugger on it */
    fprintf(stderr, "tMPI_Abort called with error code %d", errorcode);
    if (comm == TMPI_COMM_WORLD)
    {
        fprintf(stderr, " on TMPI_COMM_WORLD");
    }
    fprintf(stderr, "\n");
    fflush(stdout);

    abort();
#else
    /* we just kill all threads, but not the main process */

    if (tMPI_Is_master())
    {
        if (comm == TMPI_COMM_WORLD)
        {
            fprintf(stderr,
                    "tMPI_Abort called on TMPI_COMM_WORLD main with errorcode=%d\n",
                    errorcode);
        }
        else
        {
            fprintf(stderr, "tMPI_Abort called on main thread with errorcode=%d\n",
                    errorcode);
        }
        fflush(stderr);
        exit(errorcode);
    }
    else
    {
        int *ret;
        /* kill myself */
        fprintf(stderr, "tMPI_Abort called with error code %d on thread %d\n",
                errorcode, tMPI_This_threadnr());
        fflush(stderr);
        ret = (int*)malloc(sizeof(int));
        tMPI_Thread_exit(ret);
    }
#endif
    return TMPI_SUCCESS;
}
Beispiel #21
0
int tMPI_Cartdim_get(tMPI_Comm comm, int *ndims)
{
#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Cartdim_get(%p, %p)", comm, ndims);
#endif
    if (!comm)
    {
        return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_COMM);
    }
    if (!comm->cart || comm->cart->ndims==0)
    {
        return TMPI_SUCCESS;
    }
    *ndims=comm->cart->ndims;
    return TMPI_SUCCESS;
}
Beispiel #22
0
int tMPI_Type_contiguous(int count, tMPI_Datatype oldtype,
                         tMPI_Datatype *newtype)
{
    struct tmpi_datatype_ *ntp;

#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Type_contiguous(%d, %p, %p)", count, oldtype,
                     newtype);
#endif
    ntp               = (struct tmpi_datatype_*)tMPI_Malloc(sizeof(struct tmpi_datatype_));
    ntp->size         = count*oldtype->size;
    ntp->op_functions = NULL;

    /* establish components */
    ntp->N_comp = 1;
    ntp->comps  = (struct tmpi_datatype_component*)tMPI_Malloc(
                sizeof(struct tmpi_datatype_component)*1);
    ntp->comps[0].type  = oldtype;
    ntp->comps[0].count = 1;
    ntp->committed      = FALSE;

    /* now add it to the list.  */
    tMPI_Spinlock_lock(&(tmpi_global->datatype_lock));
    /* check whether there's space */
    if (tmpi_global->N_usertypes + 1 >= tmpi_global->Nalloc_usertypes)
    {
        /* make space */
        tmpi_global->Nalloc_usertypes = Nthreads*(tmpi_global->N_usertypes) + 1;
        tmpi_global->usertypes        = (struct tmpi_datatype_**)
            tMPI_Realloc(tmpi_global->usertypes,
                         (sizeof(struct tmpi_datatype_ *)*
                          tmpi_global->Nalloc_usertypes)
                         );

    }
    /* add to the list */
    tmpi_global->usertypes[tmpi_global->N_usertypes] = ntp;
    tmpi_global->N_usertypes++;
    *newtype = ntp;
    tMPI_Spinlock_unlock(&(tmpi_global->datatype_lock));

    return TMPI_SUCCESS;
}
Beispiel #23
0
int tMPI_Cart_rank(tMPI_Comm comm, int *coords, int *rank)
{
    int i, mul = 1, ret = 0;

#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Cart_get(%p, %p, %p)", comm, coords, rank);
#endif
    if (!comm)
    {
        return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_COMM);
    }
    if (!comm->cart || comm->cart->ndims == 0)
    {
        return TMPI_SUCCESS;
    }

    /* because of row-major ordering, we count the dimensions down */
    for (i = comm->cart->ndims-1; i >= 0; i--)
    {
        int rcoord = coords[i];
        if (comm->cart->periods[i])
        {
            /* apply periodic boundary conditions */
            rcoord = rcoord % comm->cart->dims[i];
            if (rcoord < 0)
            {
                rcoord += comm->cart->dims[i];
            }
        }
        else
        {
            if (rcoord < 0 || rcoord >= comm->cart->dims[i])
            {
                return tMPI_Error(comm, TMPI_ERR_DIMS);
            }
        }
        ret += mul*rcoord;
        mul *= comm->cart->dims[i];
    }
    *rank = ret;
    return TMPI_SUCCESS;
}
Beispiel #24
0
int tMPI_Init_fn(int main_thread_returns, int N, 
                 void (*start_function)(void*), void *arg)
{
#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Init_fn(%d, %p, %p)", N, start_function, arg);
#endif

    if (N<1)
    {
        N=tMPI_Thread_get_hw_number();
        if (N<1) N=1; /*because that's what the fn returns if it doesn't know*/
    }

    if (TMPI_COMM_WORLD==0 && N>=1) /* we're the main process */
    {
        tMPI_Start_threads(main_thread_returns, N, 0, 0, start_function, arg, 
                           NULL);
    }
    return TMPI_SUCCESS;
}
Beispiel #25
0
int tMPI_Create_errhandler(tMPI_Errhandler_fn *function, 
                           tMPI_Errhandler *errhandler) 
{
#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Create_errhandler(%p, %p)", function, errhandler);
#endif

    /* we don't use a special malloc here because this is the error handler
       creation function. */
    *errhandler=(tMPI_Errhandler)malloc(sizeof(struct tmpi_errhandler_));
    if (!*errhandler)
    {
        fprintf(stderr, "tMPI fatal error (%s), bailing out\n", 
                tmpi_errmsg[TMPI_ERR_MALLOC]);
        abort();
    }
    (*errhandler)->err=0;
    (*errhandler)->fn=*function;
    return TMPI_SUCCESS;
}
Beispiel #26
0
/* topology functions */
int tMPI_Topo_test(tMPI_Comm comm, int *status)
{
#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Topo_test(%p, %p)", comm, status);
#endif

    if (!comm)
    {
        return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_COMM);
    }

    if (comm->cart)
        *status=TMPI_CART;
    /*else if (comm->graph)
        status=MPI_GRAPH;*/
    else 
        *status=TMPI_UNDEFINED;

    return TMPI_SUCCESS;
}
Beispiel #27
0
int tMPI_Init(int *argc, char ***argv, int (*start_function)(int, char**))
{
#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Init(%p, %p, %p)", argc, argv, start_function);
#endif


    if (TMPI_COMM_WORLD==0) /* we're the main process */
    {
        int N=0;
        tMPI_Get_N(argc, argv, "-nt", &N);
        tMPI_Start_threads(FALSE, N, argc, argv, NULL, NULL, start_function);
    }
    else
    {
        /* if we're a sub-thread we need don't need to do anyhing, because 
           everything has already been set up by either the main thread, 
           or the thread runner function.*/
    }
    return TMPI_SUCCESS;
}
Beispiel #28
0
int tMPI_Cart_map(tMPI_Comm comm, int ndims, int *dims, int *periods, 
                  int *newrank)
{
    /* this function doesn't actually do anything beyond returning the current 
       rank (or TMPI_UNDEFINED if it doesn't fit in the new topology */
    int myrank=tMPI_Comm_seek_rank(comm, tMPI_Get_current());
    int Ntot=1;
    int i;

#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Cart_map(%p, %d, %p, %p, %p)", comm, ndims, dims, 
                     periods, newrank);
#endif
    if (!comm)
    {
        return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_COMM);
    }
    if (!periods)
    {
        return tMPI_Error(comm, TMPI_ERR_DIMS);
    }
 
    /* calculate the total number of procs in cartesian comm */
    for(i=0;i<ndims;i++)
    {
        Ntot *= dims[i];
    }

    if (myrank >= Ntot)
    {
        *newrank=TMPI_UNDEFINED;
    }
    else
    {
        *newrank=myrank;
    }

    return TMPI_SUCCESS;
}
Beispiel #29
0
int tMPI_Scan(void* sendbuf, void* recvbuf, int count,
              tMPI_Datatype datatype, tMPI_Op op, tMPI_Comm comm)
{
    struct tmpi_thread *cur=tMPI_Get_current();
    int myrank=tMPI_Comm_seek_rank(comm, cur);
    int N=tMPI_Comm_N(comm);
    int prev=myrank - 1; /* my previous neighbor */
    int next=myrank + 1; /* my next neighbor */

#ifdef TMPI_PROFILE
    tMPI_Profile_count_start(cur);
#endif
#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Scan(%p, %p, %d, %p, %p, %p)",
                     sendbuf, recvbuf, count, datatype, op, comm);
#endif
    if (count==0)
        return TMPI_SUCCESS;
    if (!recvbuf)
    {
        return tMPI_Error(comm, TMPI_ERR_BUF);
    }
    if (sendbuf==TMPI_IN_PLACE) 
    {
        sendbuf=recvbuf;
    }

    /* we set our send and recv buffers */
    tMPI_Atomic_ptr_set(&(comm->reduce_sendbuf[myrank]),sendbuf);
    tMPI_Atomic_ptr_set(&(comm->reduce_recvbuf[myrank]),recvbuf);

    /* now wait for the previous rank to finish */
    if (myrank > 0)
    {
        void *a, *b;
        int ret;

#if defined(TMPI_PROFILE) && defined(TMPI_CYCLE_COUNT)
        tMPI_Profile_wait_start(cur);
#endif
        /* wait for the previous neighbor's data to be ready */
        tMPI_Event_wait( &(comm->csync[myrank].events[prev]) );
        tMPI_Event_process( &(comm->csync[myrank].events[prev]), 1);
#if defined(TMPI_PROFILE) && defined(TMPI_CYCLE_COUNT)
        tMPI_Profile_wait_stop(cur, TMPIWAIT_Reduce);
#endif
#ifdef TMPI_DEBUG
        printf("%d: scanning with %d \n", myrank, prev, iteration);
        fflush(stdout);
#endif
        /* now do the reduction */
        if (prev > 0)
        {
            a = (void*)tMPI_Atomic_ptr_get(&(comm->reduce_recvbuf[prev]));
        }
        else
        {
            a = (void*)tMPI_Atomic_ptr_get(&(comm->reduce_sendbuf[prev]));
        }
        b = sendbuf;

        if ((ret=tMPI_Reduce_run_op(recvbuf, a, b, datatype,
                                    count, op, comm)) != TMPI_SUCCESS)
        {
            return ret;
        }

        /* signal to my previous neighbor that I'm done with the data */
        tMPI_Event_signal( &(comm->csync[prev].events[prev]) );
    }
    else
    {
        if (sendbuf != recvbuf)
        {
            /* copy the data if this is rank 0, and not MPI_IN_PLACE */
            memcpy(recvbuf, sendbuf, count*datatype->size);
        }
    }

    if (myrank < N-1)
    {
        /* signal to my next neighbor that I have the data */
        tMPI_Event_signal( &(comm->csync[next].events[myrank]) );
        /* and wait for my next neighbor to finish */
        tMPI_Event_wait( &(comm->csync[myrank].events[myrank]) );
        tMPI_Event_process( &(comm->csync[myrank].events[myrank]), 1);
    }


#if defined(TMPI_PROFILE) && defined(TMPI_CYCLE_COUNT)
    tMPI_Profile_wait_start(cur);
#endif
    /*tMPI_Barrier_wait( &(comm->barrier));*/
#if defined(TMPI_PROFILE)
    /*tMPI_Profile_wait_stop(cur, TMPIWAIT_Reduce);*/
    tMPI_Profile_count_stop(cur, TMPIFN_Scan);
#endif
    return TMPI_SUCCESS;
}
Beispiel #30
0
/* this is the main comm creation function. All other functions that create
    comms use this*/
int tMPI_Comm_split(tMPI_Comm comm, int color, int key, tMPI_Comm *newcomm)
{
    int                 i, j;
    int                 N = tMPI_Comm_N(comm);
    volatile tMPI_Comm *newcomm_list;
    volatile int        colors[MAX_PREALLOC_THREADS]; /* array with the colors
                                                         of each thread */
    volatile int        keys[MAX_PREALLOC_THREADS];   /* same for keys (only one of
                                                         the threads actually suplies
                                                         these arrays to the comm
                                                         structure) */
    tmpi_bool          i_am_first = FALSE;
    int                myrank     = tMPI_Comm_seek_rank(comm, tMPI_Get_current());
    struct tmpi_split *spl;
    int                ret;

#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Comm_split(%p, %d, %d, %p)", comm, color, key,
                     newcomm);
#endif
    if (!comm)
    {
        *newcomm = NULL;
        return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_COMM);
    }

    ret = tMPI_Thread_mutex_lock(&(comm->comm_create_lock));
    if (ret != 0)
    {
        return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO);
    }
    /* first get the colors */
    if (!comm->new_comm)
    {
        /* i am apparently  first */
        comm->split    = (struct tmpi_split*)tMPI_Malloc(sizeof(struct tmpi_split));
        comm->new_comm = (tMPI_Comm*)tMPI_Malloc(N*sizeof(tMPI_Comm));
        if (N <= MAX_PREALLOC_THREADS)
        {
            comm->split->colors = colors;
            comm->split->keys   = keys;
        }
        else
        {
            comm->split->colors = (int*)tMPI_Malloc(N*sizeof(int));
            comm->split->keys   = (int*)tMPI_Malloc(N*sizeof(int));
        }
        comm->split->Ncol_init  = tMPI_Comm_N(comm);
        comm->split->can_finish = FALSE;
        i_am_first              = TRUE;
        /* the main communicator contains a list the size of grp.N */
    }
    newcomm_list        = comm->new_comm; /* we copy it to the local stacks because
                                             we can later erase comm->new_comm safely */
    spl                 = comm->split;    /* we do the same for spl */
    spl->colors[myrank] = color;
    spl->keys[myrank]   = key;
    spl->Ncol_init--;

    if (spl->Ncol_init == 0)
    {
        ret = tMPI_Thread_cond_signal(&(comm->comm_create_prep));
        if (ret != 0)
        {
            return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO);
        }
    }

    if (!i_am_first)
    {
        /* all other threads can just wait until the creator thread is
           finished */
        while (!spl->can_finish)
        {
            ret = tMPI_Thread_cond_wait(&(comm->comm_create_finish),
                                        &(comm->comm_create_lock) );
            if (ret != 0)
            {
                return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO);
            }
        }
    }
    else
    {
        int        Ncomms = 0;
        int        comm_color_[MAX_PREALLOC_THREADS];
        int        comm_N_[MAX_PREALLOC_THREADS];
        int       *comm_color = comm_color_; /* there can't be more comms than N*/
        int       *comm_N     = comm_N_;     /* the number of procs in a group */

        int       *comm_groups;              /* the groups */
        tMPI_Comm *comms;                    /* the communicators */

        /* wait for the colors to be done */
        /*if (N>1)*/
        while (spl->Ncol_init > 0)
        {
            ret = tMPI_Thread_cond_wait(&(comm->comm_create_prep),
                                        &(comm->comm_create_lock));
            if (ret != 0)
            {
                return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO);
            }
        }

        /* reset the state so that a new comm creating function can run */
        spl->Ncol_destroy = N;
        comm->new_comm    = 0;
        comm->split       = 0;

        comm_groups = (int*)tMPI_Malloc(N*N*sizeof(int));
        if (N > MAX_PREALLOC_THREADS)
        {
            comm_color = (int*)tMPI_Malloc(N*sizeof(int));
            comm_N     = (int*)tMPI_Malloc(N*sizeof(int));
        }

        /* count colors, allocate and split up communicators */
        tMPI_Split_colors(N, (int*)spl->colors,
                          (int*)spl->keys,
                          &Ncomms,
                          comm_N, comm_color, comm_groups);


        /* allocate a bunch of communicators */
        comms = (tMPI_Comm*)tMPI_Malloc(Ncomms*sizeof(tMPI_Comm));
        for (i = 0; i < Ncomms; i++)
        {
            ret = tMPI_Comm_alloc(&(comms[i]), comm, comm_N[i]);
            if (ret != TMPI_SUCCESS)
            {
                return ret;
            }
        }

        /* now distribute the comms */
        for (i = 0; i < Ncomms; i++)
        {
            comms[i]->grp.N = comm_N[i];
            for (j = 0; j < comm_N[i]; j++)
            {
                comms[i]->grp.peers[j] =
                    comm->grp.peers[comm_groups[i*comm->grp.N + j]];
            }
        }
        /* and put them into the newcomm_list */
        for (i = 0; i < N; i++)
        {
            newcomm_list[i] = TMPI_COMM_NULL;
            for (j = 0; j < Ncomms; j++)
            {
                if (spl->colors[i] == comm_color[j])
                {
                    newcomm_list[i] = comms[j];
                    break;
                }
            }
        }

#ifdef TMPI_DEBUG
        /* output */
        for (i = 0; i < Ncomms; i++)
        {
            printf("Group %d (color %d) has %d members: ",
                   i, comm_color[i], comm_N[i]);
            for (j = 0; j < comm_N[i]; j++)
            {
                printf(" %d ", comm_groups[comm->grp.N*i + j]);
            }

            printf(" rank: ");
            for (j = 0; j < comm_N[i]; j++)
            {
                printf(" %d ", spl->keys[comm_groups[N*i + j]]);
            }
            printf(" color: ");
            for (j = 0; j < comm_N[i]; j++)
            {
                printf(" %d ", spl->colors[comm_groups[N*i + j]]);
            }
            printf("\n");
        }
#endif
        if (N > MAX_PREALLOC_THREADS)
        {
            free((int*)spl->colors);
            free((int*)spl->keys);
            free(comm_color);
            free(comm_N);
        }
        free(comm_groups);
        free(comms);
        spl->can_finish = TRUE;

        /* tell the waiting threads that there's a comm ready */
        ret = tMPI_Thread_cond_broadcast(&(comm->comm_create_finish));
        if (ret != 0)
        {
            return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO);
        }
    }
    /* here the individual threads get their comm object */
    *newcomm = newcomm_list[myrank];

    /* free when we have assigned them all, so we can reuse the object*/
    spl->Ncol_destroy--;
    if (spl->Ncol_destroy == 0)
    {
        free((void*)newcomm_list);
        free(spl);
    }

    ret = tMPI_Thread_mutex_unlock(&(comm->comm_create_lock));
    if (ret != 0)
    {
        return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO);
    }

    return TMPI_SUCCESS;
}