int tMPI_Comm_free(tMPI_Comm *comm) { int size; int sum; int ret; #ifdef TMPI_TRACE tMPI_Trace_print("tMPI_Comm_free(%p)", comm); #endif #ifndef TMPI_STRICT if (!*comm) { return TMPI_SUCCESS; } if ((*comm)->grp.N > 1) { /* we remove ourselves from the comm. */ ret = tMPI_Thread_mutex_lock(&((*comm)->comm_create_lock)); if (ret != 0) { return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO); } (*comm)->grp.peers[myrank] = (*comm)->grp.peers[(*comm)->grp.N-1]; (*comm)->grp.N--; ret = tMPI_Thread_mutex_unlock(&((*comm)->comm_create_lock)); if (ret != 0) { return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO); } } else { /* we're the last one so we can safely destroy it */ ret = tMPI_Comm_destroy(*comm, TRUE); if (ret != 0) { return ret; } } #else /* This is correct if programs actually treat Comm_free as a collective call */ if (!*comm) { return TMPI_SUCCESS; } size = (*comm)->grp.N; /* we add 1 to the destroy counter and actually deallocate if the counter reaches N. */ sum = tMPI_Atomic_fetch_add( &((*comm)->destroy_counter), 1) + 1; /* this is a collective call on a shared data structure, so only one process (the last one in this case) should do anything */ if (sum == size) { ret = tMPI_Comm_destroy(*comm, TRUE); if (ret != 0) { return ret; } } #endif return TMPI_SUCCESS; }
int tMPI_Finalize(void) { int i; #ifdef TMPI_TRACE tMPI_Trace_print("tMPI_Finalize()"); #endif #ifdef TMPI_DEBUG printf("%5d: tMPI_Finalize called\n", tMPI_This_threadnr()); fflush(stdout); #endif #ifdef TMPI_PROFILE { struct tmpi_thread *cur=tMPI_Get_current(); tMPI_Profile_stop( &(cur->profile) ); tMPI_Thread_barrier_wait( &(tmpi_global->barrier) ); if (tMPI_Is_master()) { tMPI_Profiles_summarize(Nthreads, threads); } } #endif tMPI_Thread_barrier_wait( &(tmpi_global->barrier) ); if (tMPI_Is_master()) { /* we just wait for all threads to finish; the order isn't very relevant, as all threads should arrive at their endpoints soon. */ for(i=1;i<Nthreads;i++) { if (tMPI_Thread_join(threads[i].thread_id, NULL)) { tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_FINALIZE); } tMPI_Thread_destroy(&(threads[i])); } /* at this point, we are the only thread left, so we can destroy the global structures with impunity. */ tMPI_Thread_destroy(&(threads[0])); free(threads); tMPI_Thread_key_delete(id_key); /* de-allocate all the comm stuctures. */ { tMPI_Comm cur=TMPI_COMM_WORLD->next; while(cur && (cur!=TMPI_COMM_WORLD) ) { tMPI_Comm next=cur->next; tMPI_Comm_destroy(cur); cur=next; } tMPI_Comm_destroy(TMPI_COMM_WORLD); } tMPI_Group_free(&TMPI_GROUP_EMPTY); threads=0; TMPI_COMM_WORLD=NULL; TMPI_GROUP_EMPTY=NULL; Nthreads=0; /* deallocate the 'global' structure */ tMPI_Global_destroy(tmpi_global); free(tmpi_global); tmpi_finalized=TRUE; } else { tMPI_Thread_exit(0); } return TMPI_SUCCESS; }