static void tMPI_Thread_init(struct tmpi_thread *th) { int N_envelopes=(Nthreads+1)*N_EV_ALLOC; int N_send_envelopes=N_EV_ALLOC; int N_reqs=(Nthreads+1)*N_EV_ALLOC; int i; /* we set our thread id, as a thread-specific piece of global data. */ tMPI_Thread_setspecific(id_key, th); /* allocate comm.self */ th->self_comm=tMPI_Comm_alloc(TMPI_COMM_WORLD, 1); th->self_comm->grp.peers[0]=th; /* allocate envelopes */ tMPI_Free_env_list_init( &(th->envelopes), N_envelopes ); /* recv list */ tMPI_Recv_env_list_init( &(th->evr)); /* send lists */ th->evs=(struct send_envelope_list*)tMPI_Malloc( sizeof(struct send_envelope_list)*Nthreads); for(i=0;i<Nthreads;i++) { tMPI_Send_env_list_init( &(th->evs[i]), N_send_envelopes); } tMPI_Atomic_set( &(th->ev_outgoing_received), 0); tMPI_Event_init( &(th->p2p_event) ); /* allocate requests */ tMPI_Req_list_init(&(th->rql), N_reqs); #ifdef USE_COLLECTIVE_COPY_BUFFER /* allcate copy_buffer list */ tMPI_Copy_buffer_list_init(&(th->cbl_multi), (Nthreads+1)*(N_COLL_ENV+1), Nthreads*COPY_BUFFER_SIZE); #endif #ifdef TMPI_PROFILE tMPI_Profile_init(&(th->profile)); #endif /* now wait for all other threads to come on line, before we start the MPI program */ tMPI_Thread_barrier_wait( &(tmpi_global->barrier) ); }
/* this is the main comm creation function. All other functions that create comms use this*/ int tMPI_Comm_split(tMPI_Comm comm, int color, int key, tMPI_Comm *newcomm) { int i, j; int N = tMPI_Comm_N(comm); volatile tMPI_Comm *newcomm_list; volatile int colors[MAX_PREALLOC_THREADS]; /* array with the colors of each thread */ volatile int keys[MAX_PREALLOC_THREADS]; /* same for keys (only one of the threads actually suplies these arrays to the comm structure) */ tmpi_bool i_am_first = FALSE; int myrank = tMPI_Comm_seek_rank(comm, tMPI_Get_current()); struct tmpi_split *spl; int ret; #ifdef TMPI_TRACE tMPI_Trace_print("tMPI_Comm_split(%p, %d, %d, %p)", comm, color, key, newcomm); #endif if (!comm) { *newcomm = NULL; return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_COMM); } ret = tMPI_Thread_mutex_lock(&(comm->comm_create_lock)); if (ret != 0) { return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO); } /* first get the colors */ if (!comm->new_comm) { /* i am apparently first */ comm->split = (struct tmpi_split*)tMPI_Malloc(sizeof(struct tmpi_split)); comm->new_comm = (tMPI_Comm*)tMPI_Malloc(N*sizeof(tMPI_Comm)); if (N <= MAX_PREALLOC_THREADS) { comm->split->colors = colors; comm->split->keys = keys; } else { comm->split->colors = (int*)tMPI_Malloc(N*sizeof(int)); comm->split->keys = (int*)tMPI_Malloc(N*sizeof(int)); } comm->split->Ncol_init = tMPI_Comm_N(comm); comm->split->can_finish = FALSE; i_am_first = TRUE; /* the main communicator contains a list the size of grp.N */ } newcomm_list = comm->new_comm; /* we copy it to the local stacks because we can later erase comm->new_comm safely */ spl = comm->split; /* we do the same for spl */ spl->colors[myrank] = color; spl->keys[myrank] = key; spl->Ncol_init--; if (spl->Ncol_init == 0) { ret = tMPI_Thread_cond_signal(&(comm->comm_create_prep)); if (ret != 0) { return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO); } } if (!i_am_first) { /* all other threads can just wait until the creator thread is finished */ while (!spl->can_finish) { ret = tMPI_Thread_cond_wait(&(comm->comm_create_finish), &(comm->comm_create_lock) ); if (ret != 0) { return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO); } } } else { int Ncomms = 0; int comm_color_[MAX_PREALLOC_THREADS]; int comm_N_[MAX_PREALLOC_THREADS]; int *comm_color = comm_color_; /* there can't be more comms than N*/ int *comm_N = comm_N_; /* the number of procs in a group */ int *comm_groups; /* the groups */ tMPI_Comm *comms; /* the communicators */ /* wait for the colors to be done */ /*if (N>1)*/ while (spl->Ncol_init > 0) { ret = tMPI_Thread_cond_wait(&(comm->comm_create_prep), &(comm->comm_create_lock)); if (ret != 0) { return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO); } } /* reset the state so that a new comm creating function can run */ spl->Ncol_destroy = N; comm->new_comm = 0; comm->split = 0; comm_groups = (int*)tMPI_Malloc(N*N*sizeof(int)); if (N > MAX_PREALLOC_THREADS) { comm_color = (int*)tMPI_Malloc(N*sizeof(int)); comm_N = (int*)tMPI_Malloc(N*sizeof(int)); } /* count colors, allocate and split up communicators */ tMPI_Split_colors(N, (int*)spl->colors, (int*)spl->keys, &Ncomms, comm_N, comm_color, comm_groups); /* allocate a bunch of communicators */ comms = (tMPI_Comm*)tMPI_Malloc(Ncomms*sizeof(tMPI_Comm)); for (i = 0; i < Ncomms; i++) { ret = tMPI_Comm_alloc(&(comms[i]), comm, comm_N[i]); if (ret != TMPI_SUCCESS) { return ret; } } /* now distribute the comms */ for (i = 0; i < Ncomms; i++) { comms[i]->grp.N = comm_N[i]; for (j = 0; j < comm_N[i]; j++) { comms[i]->grp.peers[j] = comm->grp.peers[comm_groups[i*comm->grp.N + j]]; } } /* and put them into the newcomm_list */ for (i = 0; i < N; i++) { newcomm_list[i] = TMPI_COMM_NULL; for (j = 0; j < Ncomms; j++) { if (spl->colors[i] == comm_color[j]) { newcomm_list[i] = comms[j]; break; } } } #ifdef TMPI_DEBUG /* output */ for (i = 0; i < Ncomms; i++) { printf("Group %d (color %d) has %d members: ", i, comm_color[i], comm_N[i]); for (j = 0; j < comm_N[i]; j++) { printf(" %d ", comm_groups[comm->grp.N*i + j]); } printf(" rank: "); for (j = 0; j < comm_N[i]; j++) { printf(" %d ", spl->keys[comm_groups[N*i + j]]); } printf(" color: "); for (j = 0; j < comm_N[i]; j++) { printf(" %d ", spl->colors[comm_groups[N*i + j]]); } printf("\n"); } #endif if (N > MAX_PREALLOC_THREADS) { free((int*)spl->colors); free((int*)spl->keys); free(comm_color); free(comm_N); } free(comm_groups); free(comms); spl->can_finish = TRUE; /* tell the waiting threads that there's a comm ready */ ret = tMPI_Thread_cond_broadcast(&(comm->comm_create_finish)); if (ret != 0) { return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO); } } /* here the individual threads get their comm object */ *newcomm = newcomm_list[myrank]; /* free when we have assigned them all, so we can reuse the object*/ spl->Ncol_destroy--; if (spl->Ncol_destroy == 0) { free((void*)newcomm_list); free(spl); } ret = tMPI_Thread_mutex_unlock(&(comm->comm_create_lock)); if (ret != 0) { return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO); } return TMPI_SUCCESS; }
void tMPI_Start_threads(tmpi_bool main_returns, int N, int *argc, char ***argv, void (*start_fn)(void*), void *start_arg, int (*start_fn_main)(int, char**)) { #ifdef TMPI_TRACE tMPI_Trace_print("tMPI_Start_threads(%d, %p, %p, %p, %p)", N, argc, argv, start_fn, start_arg); #endif if (N>0) { int i; int set_affinity=FALSE; tmpi_finalized=FALSE; Nthreads=N; /* allocate global data */ tmpi_global=(struct tmpi_global*) tMPI_Malloc(sizeof(struct tmpi_global)); tMPI_Global_init(tmpi_global, N); /* allocate world and thread data */ threads=(struct tmpi_thread*)tMPI_Malloc(sizeof(struct tmpi_thread)*N); TMPI_COMM_WORLD=tMPI_Comm_alloc(NULL, N); TMPI_GROUP_EMPTY=tMPI_Group_alloc(); if (tMPI_Thread_key_create(&id_key, NULL)) { tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_INIT); } for(i=0;i<N;i++) { TMPI_COMM_WORLD->grp.peers[i]=&(threads[i]); /* copy argc, argv */ if (argc && argv) { int j; threads[i].argc=*argc; threads[i].argv=(char**)tMPI_Malloc(threads[i].argc* sizeof(char*)); for(j=0;j<threads[i].argc;j++) { #if ! (defined( _WIN32 ) || defined( _WIN64 ) ) threads[i].argv[j]=strdup( (*argv)[j] ); #else threads[i].argv[j]=_strdup( (*argv)[j] ); #endif } } else { threads[i].argc=0; threads[i].argv=NULL; } threads[i].start_fn=start_fn; threads[i].start_fn_main=start_fn_main; threads[i].start_arg=start_arg; } /* now check whether to set affinity */ #ifdef TMPI_THREAD_AFFINITY { int nhw=tMPI_Thread_get_hw_number(); if ((nhw > 1) && (nhw == N)) { set_affinity=TRUE; } } #endif for(i=1;i<N;i++) /* zero is the main thread */ { int ret; if (set_affinity) { ret=tMPI_Thread_create_aff(&(threads[i].thread_id), tMPI_Thread_starter, (void*)&(threads[i]) ) ; } else { ret=tMPI_Thread_create(&(threads[i].thread_id), tMPI_Thread_starter, (void*)&(threads[i]) ) ; } if(ret) { tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_INIT); } } /* the main thread now also runs start_fn if we don't want it to return */ if (!main_returns) tMPI_Thread_starter((void*)&(threads[0])); else tMPI_Thread_init(&(threads[0])); } }