JNIEXPORT jboolean JNICALL Java_mpi_MPI_isFinalized(JNIEnv *env, jclass jthis) { int flag; int rc = MPI_Finalized(&flag); ompi_java_exceptionCheck(env, rc); return flag ? JNI_TRUE : JNI_FALSE; }
/** Initialize DALEC. MPI must be initialized before this can be called. It * invalid to make DALEC calls before initialization. Collective on the world * group. * * @return Zero on success */ int PDALEC_Initialize(MPI_Comm user_comm) { int dalec_alive = atomic_fetch_sub_explicit(&(DALECI_GLOBAL_STATE.alive), 1,memory_order_seq_cst); if (dalec_alive == 0) { /* Initialize, since this is the first call to this function. */ int mpi_is_init, mpi_is_fin; MPI_Initialized(&mpi_is_init); MPI_Finalized(&mpi_is_fin); if (!mpi_is_init || mpi_is_fin) { DALECI_Warning("MPI must be active when calling DALEC_Initialize"); return DALEC_ERROR_MPI_USAGE; } /* Always dupe the user communicator for internal usage. */ /* Do not abort on MPI failure, let user handle if MPI does not abort. */ int rc = MPI_Comm_dup(user_comm, &DALECI_GLOBAL_STATE.mpi_comm); return DALECI_Check_MPI("DALEC_Initialize", "MPI_Comm_dup", rc); /* Determine what level of threading MPI supports. */ int mpi_thread_level; MPI_Query_thread(&mpi_thread_level); DALECI_GLOBAL_STATE.mpi_thread_level; } else { /* Library has already been initialized. */ return DALEC_SUCCESS; } }
//Make sure we finalize MPI void asynch_onexit(void) { int flag; MPI_Finalized(&flag); if (!flag) MPI_Finalize(); }
void init(int *argc, char ***argv) { #ifdef GMX_LIB_MPI int isInitialized = 0, isFinalized = 0; MPI_Finalized(&isFinalized); GMX_RELEASE_ASSERT(!isFinalized, "Invalid attempt to initialize MPI after finalization"); MPI_Initialized(&isInitialized); if (isInitialized) { if (0 == g_initializationCounter) { // Some other code has already initialized MPI, so bump the counter so that // we know not to finalize MPI ourselves later. g_initializationCounter++; } } else { #ifdef GMX_FAHCORE (void) fah_MPI_Init(argc, argv); #else (void) MPI_Init(argc, argv); #endif } // Bump the counter to record this initialization event g_initializationCounter++; #else GMX_UNUSED_VALUE(argc); GMX_UNUSED_VALUE(argv); #endif }
int cfio_finalize() { int ret,flag; cfio_msg_t *msg; ret = MPI_Finalized(&flag); if(flag) { error("***You should not call MPI_Finalize before cfio_Finalized*****\n"); return CFIO_ERROR_FINAL_AFTER_MPI; } if(cfio_map_proc_type(rank) == CFIO_MAP_TYPE_CLIENT) { cfio_send_io_done(&msg, rank); } if(cfio_map_proc_type(rank) == CFIO_MAP_TYPE_SERVER) { cfio_server_final(); }else if(cfio_map_proc_type(rank) == CFIO_MAP_TYPE_CLIENT) { cfio_id_final(); cfio_send_final(); } cfio_map_final(); debug(DEBUG_CFIO, "success return."); return CFIO_ERROR_NONE; }
int wc_mpi_init(int *argc, char ***argv) { int rc = 0; int flag = 0; if (MPI_Finalized(&flag) == MPI_SUCCESS) { if (flag != 0) { WC_ERROR("MPI has already been finalized.\n"); return -1; } } flag = 0; if (MPI_Initialized(&flag) == MPI_SUCCESS) { if (flag == 0) { rc = MPI_Init(argc, argv); WC_HANDLE_MPI_ERROR(MPI_Init, rc); if (rc == MPI_SUCCESS) rc |= MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN); return rc; } else { rc = 0; } } else { WC_HANDLE_MPI_ERROR(MPI_Initialized, rc); } return rc; }
void PyMPI_IHaveFinalizedMPI() { #ifdef MPI_FINALIZED MPI_Finalized(&someoneHasFinalizedMPI); if ( !someoneHasFinalizedMPI ) PyMPI_finalizeMPI(); #endif someoneHasFinalizedMPI = 1; }
static void mpi_finalize(void) { // PHDF5 might have finalized already int finalized; MPI_Finalized(&finalized); if (!finalized) MPI_Finalize(); }
int main(int argc, char**argv) { int flag; MPI_Init(&argc, &argv); MPI_Finalized(&flag); printf ("flag = %d\n",flag); return 0; }
/// Tear down MPI communication. Either call this before exiting, or /// let the destructor do it for you. void MPIConnection::finalize() { int finalized = 0; MPI_CHECK( MPI_Finalized( &finalized ) ); if( !finalized ) { barrier(); MPI_CHECK( MPI_Finalize() ); } }
int PyMPI_Finalized(int* result) { #ifndef MPI_FINALIZED *result = someoneHasFinalizedMPI; return MPI_SUCCESS; #else return MPI_Finalized(result); #endif }
int main(int argc, char** argv){ /* Este programa bem simples aproxima pi calculando pi = integral * de 0 até 1 de 4/(1+x*x)dx que é aproximando pela soma de * k=1 até N de 4 / ((1+[(1/N)*(k-1/2)]**2) e então * multiplicando a soma por (1/N). (Esta regra de integração numérica * é chamada de "Midpoint rule" e pode ser encontrada em vários livros * de análise numérica). O único dado de entrada necessário é N. */ double sum, w, total = 0; int i, N; int rank, np, dest = 0, tag = 50, iniciado, finalizado; double tempo_inicial, tempo_final; MPI_Status status; MPI_Initialized(&iniciado); if(!iniciado) MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &np); /* * A rotina solicita irá pegar e propagar o valor de N */ if(rank == 0) N = solicita(); MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD); //while (N > 0) { tempo_inicial = MPI_Wtime(); w = 1.0/(double)N; sum = 0.0; total = 0.0; for (i = rank; i <= N; i += np) sum = sum + f(((double)i-0.5)*w); //sum = sum * w; /* * A rotina coleta irá coletar e imprimir resultados */ if(rank == 0){ total += sum; for(int s = 1; s < np; s++){ MPI_Recv(&sum, 1, MPI_DOUBLE, s, tag, MPI_COMM_WORLD, &status); total += sum; } total = total * w; tempo_final = MPI_Wtime(); coleta(total, tempo_inicial, tempo_final); //N = solicita (); //MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD); } else{ MPI_Send(&sum, 1, MPI_DOUBLE, dest, tag, MPI_COMM_WORLD); } //} MPI_Finalized(&finalizado); if(!finalizado) MPI_Finalize(); return (0); }
void op_exit() { op_mpi_exit(); op_rt_exit(); op_exit_core(); int flag = 0; MPI_Finalized(&flag); if(!flag) MPI_Finalize(); }
int SAMRAI_MPI::Finalized( int* flag) { int rval = MPI_SUCCESS; #ifdef HAVE_MPI rval = MPI_Finalized(flag); #else *flag = true; #endif return rval; }
/*---------------------------------------------------------------------------*/ static void na_test_mpi_finalize(struct na_test_info *na_test_info) { int mpi_finalized = 0; MPI_Finalized(&mpi_finalized); if (!mpi_finalized && !na_test_info->mpi_no_finalize) { if (na_test_info->mpi_static) MPI_Comm_free(&na_test_info->mpi_comm); MPI_Finalize(); } }
/* * Class: mpi_MPI * Method: isFinalized * Signature: ()Z */ JNIEXPORT jboolean JNICALL Java_mpi_MPI_isFinalized(JNIEnv *env, jclass jthis) { int flag, rc; rc = MPI_Finalized(&flag); ompi_java_exceptionCheck(env, rc); if (flag==0) { return JNI_FALSE; } else { return JNI_TRUE; } }
void gmx_finalize_par(void) { #ifndef GMX_MPI /* Compiled without MPI, no MPI finalizing needed */ return; #else int initialized, finalized; int ret; MPI_Initialized(&initialized); if (!initialized) { return; } /* just as a check; we don't want to finalize twice */ MPI_Finalized(&finalized); if (finalized) { return; } /* We sync the processes here to try to avoid problems * with buggy MPI implementations that could cause * unfinished processes to terminate. */ MPI_Barrier(MPI_COMM_WORLD); /* if (DOMAINDECOMP(cr)) { if (cr->npmenodes > 0 || cr->dd->bCartesian) MPI_Comm_free(&cr->mpi_comm_mygroup); if (cr->dd->bCartesian) MPI_Comm_free(&cr->mpi_comm_mysim); } */ /* Apparently certain mpich implementations cause problems * with MPI_Finalize. In that case comment out MPI_Finalize. */ if (debug) { fprintf(debug, "Will call MPI_Finalize now\n"); } ret = MPI_Finalize(); if (debug) { fprintf(debug, "Return code from MPI_Finalize = %d\n", ret); } #endif }
int wc_mpi_finalize() { int rc = 0; int flag = 0; if (MPI_Finalized(&flag) == MPI_SUCCESS) { if (flag != 0) { WC_DEBUG("MPI has already been finalized.\n"); return 0; } } rc = MPI_Finalize(); WC_HANDLE_MPI_ERROR(MPI_Finalize, rc); return (rc == MPI_SUCCESS) ? 0 : -1; }
/*---------------------------------------------------------------------------*/ static void na_test_mpi_finalize(void) { int mpi_finalized = 0; MPI_Finalized(&mpi_finalized); if (!mpi_finalized && mpi_internally_initialized) { if (na_test_use_static_mpi_g) { MPI_Comm_free(&na_test_comm_g); } MPI_Finalize(); mpi_internally_initialized = NA_FALSE; } }
~MPI_Gang() { # ifdef USE_MPI if( !owner ) return; int final_flag; MPI_Finalized(&final_flag); if( final_flag ) return; if( pool.group!=MPI_GROUP_NULL ) MPI_Group_free(&pool.group); if( gang.group!=MPI_GROUP_NULL ) MPI_Group_free(&gang.group); if( lead.group!=MPI_GROUP_NULL ) MPI_Group_free(&lead.group); if( pool.comm!=MPI_COMM_NULL && pool.comm!=MPI_COMM_WORLD ) MPI_Comm_free(&pool.comm); if( gang.comm!=MPI_COMM_NULL && pool.comm!=MPI_COMM_WORLD ) MPI_Comm_free(&gang.comm); if( lead.comm!=MPI_COMM_NULL && pool.comm!=MPI_COMM_WORLD ) MPI_Comm_free(&lead.comm); # endif }
Master::~Master() { // Freeing the constants for (auto &c : constants_) { free(c.second); } // Freeing MPI objects int is_finalized; MPI_Finalized(&is_finalized); if (!is_finalized) { MPI_Win_free(&public_window_); MPI_Win_free(&critical_window_); MPI_Type_free(&MetaEvolutionDescriptionMPIDatatype); MPI_Comm_free(&MasterComm_); } }
/*-------------------------------------------------------------------------- * NAME * H5_init_library -- Initialize library-global information * USAGE * herr_t H5_init_library() * * RETURNS * Non-negative on success/Negative on failure * * DESCRIPTION * Initializes any library-global data or routines. * *-------------------------------------------------------------------------- */ herr_t H5_init_library(void) { herr_t ret_value = SUCCEED; FUNC_ENTER_NOAPI(FAIL) #ifdef H5_HAVE_PARALLEL { int mpi_initialized; int mpi_finalized; int mpi_code; MPI_Initialized(&mpi_initialized); MPI_Finalized(&mpi_finalized); #ifdef H5_HAVE_MPE /* Initialize MPE instrumentation library. */ if (!H5_MPEinit_g) { int mpe_code; if (mpi_initialized && !mpi_finalized) { mpe_code = MPE_Init_log(); HDassert(mpe_code >=0); H5_MPEinit_g = TRUE; } } #endif /*H5_HAVE_MPE*/ /* add an attribute on MPI_COMM_SELF to call H5_term_library when it is destroyed, i.e. on MPI_Finalize */ if (mpi_initialized && !mpi_finalized) { int key_val; if(MPI_SUCCESS != (mpi_code = MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, (MPI_Comm_delete_attr_function *)H5_mpi_delete_cb, &key_val, NULL))) HMPI_GOTO_ERROR(FAIL, "MPI_Comm_create_keyval failed", mpi_code) if(MPI_SUCCESS != (mpi_code = MPI_Comm_set_attr(MPI_COMM_SELF, key_val, NULL))) HMPI_GOTO_ERROR(FAIL, "MPI_Comm_set_attr failed", mpi_code) if(MPI_SUCCESS != (mpi_code = MPI_Comm_free_keyval(&key_val))) HMPI_GOTO_ERROR(FAIL, "MPI_Comm_free_keyval failed", mpi_code) } }
/** * Finish off MPI routines */ void nest::Communicator::finalize() { MPI_Type_free( &MPI_OFFGRID_SPIKE ); int finalized; MPI_Finalized( &finalized ); int initialized; MPI_Initialized( &initialized ); if ( finalized == 0 && initialized == 1 ) { if ( !net_->quit_by_error() ) #ifdef HAVE_MUSIC { if ( music_runtime == 0 ) { // we need a Runtime object to call finalize(), so we create // one, if we don't have one already music_runtime = new MUSIC::Runtime( music_setup, 1e-3 ); } music_runtime->finalize(); delete music_runtime; } #else /* #ifdef HAVE_MUSIC */ MPI_Finalize(); #endif /* #ifdef HAVE_MUSIC */ else { net_->message( SLIInterpreter::M_INFO, "Communicator::finalize()", "Calling MPI_Abort() due to errors in the script." ); MPI_Abort( MPI_COMM_WORLD, net_->get_exitcode() ); } }
/** Finalize DALEC. Must be called before MPI is finalized. DALEC calls are * not valid after finalization. Collective on world group. * * @return Zero on success */ int PDALEC_Finalize(void) { int dalec_alive = atomic_fetch_sub_explicit(&(DALECI_GLOBAL_STATE.alive), 1,memory_order_seq_cst); if (dalec_alive == 1) { /* Check for MPI initialization */ int mpi_is_init, mpi_is_fin; MPI_Initialized(&mpi_is_init); MPI_Finalized(&mpi_is_fin); /* Free communicator if possible and return */ if (!mpi_is_init || mpi_is_fin) { DALECI_Warning("MPI must be active when calling DALEC_Finalize"); return DALEC_ERROR_MPI_USAGE; } else { int rc = MPI_Comm_free(&DALECI_GLOBAL_STATE.mpi_comm); return DALECI_Check_MPI("DALEC_Finalize", "MPI_Comm_free", rc); } } else { /* Library is still active. */ return DALEC_SUCCESS; } }
int delete_fn(MPI_Comm comm, int keyval, void *attribute_val, void *extra_state) { int flag; int i; int my_idx = (int) (long) attribute_val; if (my_idx < 0 || my_idx > NUM_TEST_ATTRS) { printf("internal error, my_idx=%d is invalid!\n", my_idx); fflush(stdout); } was_called[my_idx]++; MPI_Finalized(&flag); if (flag) { printf("my_idx=%d, MPI_Finalized returned %d, should have been 0", my_idx, flag); foundError++; } /* since attributes were added in 0..(NUM_TEST_ATTRS-1) order, they will be * called in (NUM_TEST_ATTRS-1)..0 order */ for (i = 0; i < my_idx; ++i) { if (was_called[i] != 0) { printf("my_idx=%d, was_called[%d]=%d but should be 0\n", my_idx, i, was_called[i]); foundError++; } } for (i = my_idx; i < NUM_TEST_ATTRS; ++i) { if (was_called[i] != 1) { printf("my_idx=%d, was_called[%d]=%d but should be 1\n", my_idx, i, was_called[i]); foundError++; } } return MPI_SUCCESS; }
std::ostream& pout() { #ifdef CH_MPI // the common case is _open == true, which just returns s_pout if ( ! s_pout_open ) { // the uncommon cae: the file isn't opened, MPI may not be // initialized, and the basename may not have been set int flag_i, flag_f; MPI_Initialized(&flag_i); MPI_Finalized(&flag_f); // app hasn't set a basename yet, so set the default if ( ! s_pout_init ) { s_pout_basename = "pout" ; s_pout_init = true ; } // if MPI not initialized, we cant open the file so return cout if ( ! flag_i || flag_f) { return std::cout; // MPI hasn't been started yet, or has ended.... } // MPI is initialized, so file must not be, so open it setFileName() ; openFile() ; // finally, in case the open failed, return cout if ( ! s_pout_open ) { return std::cout ; } } return s_pout ; #else return std::cout; #endif }
int pmrrr (char *jobz, char *range, int *np, double *D, double *E, double *vl, double *vu, int *il, int *iu, int *tryracp, MPI_Comm comm, int *nzp, int *offsetp, double *W, double *Z, int *ldz, int *Zsupp) { /* Input parameter */ int n = *np; bool onlyW = toupper(jobz[0]) == 'N'; bool wantZ = toupper(jobz[0]) == 'V'; bool cntval = toupper(jobz[0]) == 'C'; bool alleig = toupper(range[0]) == 'A'; bool valeig = toupper(range[0]) == 'V'; bool indeig = toupper(range[0]) == 'I'; /* Check input parameters */ if(!(onlyW || wantZ || cntval)) return 1; if(!(alleig || valeig || indeig)) return 1; if(n <= 0) return 1; if (valeig) { if(*vu<=*vl) return 1; } else if (indeig) { if (*il<1 || *il>n || *iu<*il || *iu>n) return 1; } /* MPI & multithreading info */ int is_init, is_final; MPI_Initialized(&is_init); MPI_Finalized(&is_final); if (is_init!=1 || is_final==1) { fprintf(stderr, "ERROR: MPI is not active! (init=%d, final=%d) \n", is_init, is_final); return 1; } MPI_Comm comm_dup; MPI_Comm_dup(comm, &comm_dup); int nproc, pid, thread_support; MPI_Comm_size(comm_dup, &nproc); MPI_Comm_rank(comm_dup, &pid); MPI_Query_thread(&thread_support); int nthreads; if ( !(thread_support == MPI_THREAD_MULTIPLE || thread_support == MPI_THREAD_FUNNELED) ) { /* Disable multithreading; note: to support multithreading with * MPI_THREAD_SERIALIZED the code must be changed slightly; this * is not supported at the moment */ nthreads = 1; } else { char *ompvar = getenv("PMR_NUM_THREADS"); if (ompvar == NULL) { nthreads = DEFAULT_NUM_THREADS; } else { nthreads = atoi(ompvar); } } #if defined(MVAPICH2_VERSION) if (nthreads>1) { int mv2_affinity=1; char *mv2_string = getenv("MV2_ENABLE_AFFINITY"); if (mv2_string != NULL) mv2_affinity = atoi(mv2_string); if (mv2_affinity!=0) { nthreads = 1; if (pid==0) { fprintf(stderr, "WARNING: PMRRR incurs a significant performance penalty when multithreaded with MVAPICH2 with affinity enabled. The number of threads has been reduced to one; please rerun with MV2_ENABLE_AFFINITY=0 or PMR_NUM_THREADS=1 in the future.\n"); fflush(stderr); } } } #endif /* If only maximal number of local eigenvectors are queried * return if possible here */ *nzp = 0; *offsetp = 0; if (cntval) { if ( alleig || n < DSTEMR_IF_SMALLER ) { *nzp = iceil(n,nproc); MPI_Comm_free(&comm_dup); return 0; } else if (indeig) { *nzp = iceil(*iu-*il+1,nproc); MPI_Comm_free(&comm_dup); return 0; } } /* Check if computation should be done by multiple processes */ int info; if (n < DSTEMR_IF_SMALLER) { info = handle_small_cases(jobz, range, np, D, E, vl, vu, il, iu, tryracp, comm, nzp, offsetp, W, Z, ldz, Zsupp); MPI_Comm_free(&comm_dup); return info; } /* Allocate memory */ double *Werr = (double*)malloc(n*sizeof(double)); assert(Werr!=NULL); double *Wgap = (double*)malloc(n*sizeof(double)); assert(Wgap!=NULL); double *gersch = (double*)malloc(2*n*sizeof(double)); assert(gersch!=NULL); int *iblock = (int*)calloc(n,sizeof(int)); assert(iblock!=NULL); int *iproc = (int*)malloc(n*sizeof(int)); assert(iproc!=NULL); int *Windex = (int*)malloc(n*sizeof(int)); assert(Windex!=NULL); int *isplit = (int*)malloc(n*sizeof(int)); assert(isplit!=NULL); int *Zindex = (int*)malloc(n*sizeof(int)); assert(Zindex!=NULL); proc_t *procinfo = (proc_t*)malloc(sizeof(proc_t)); assert(procinfo!=NULL); in_t *Dstruct = (in_t*)malloc(sizeof(in_t)); assert(Dstruct!=NULL); val_t *Wstruct = (val_t*)malloc(sizeof(val_t)); assert(Wstruct!=NULL); vec_t *Zstruct = (vec_t*)malloc(sizeof(vec_t)); assert(Zstruct!=NULL); tol_t *tolstruct = (tol_t*)malloc(sizeof(tol_t)); assert(tolstruct!=NULL); /* Bundle variables into a structures */ procinfo->pid = pid; procinfo->nproc = nproc; procinfo->comm = comm_dup; procinfo->nthreads = nthreads; procinfo->thread_support = thread_support; Dstruct->n = n; Dstruct->D = D; Dstruct->E = E; Dstruct->isplit = isplit; Wstruct->n = n; Wstruct->vl = vl; Wstruct->vu = vu; Wstruct->il = il; Wstruct->iu = iu; Wstruct->W = W; Wstruct->Werr = Werr; Wstruct->Wgap = Wgap; Wstruct->Windex = Windex; Wstruct->iblock = iblock; Wstruct->iproc = iproc; Wstruct->gersch = gersch; Zstruct->ldz = *ldz; Zstruct->nz = 0; Zstruct->Z = Z; Zstruct->Zsupp = Zsupp; Zstruct->Zindex = Zindex; /* Scale matrix to allowable range, returns 1.0 if not scaled */ double scale = scale_matrix(Dstruct, Wstruct, valeig); /* Test if matrix warrants more expensive computations which * guarantees high relative accuracy */ if (*tryracp) odrrr(&n, D, E, &info); /* 0 - rel acc */ else info = -1; int i; double *Dcopy, *E2copy; if (info == 0) { /* This case is extremely rare in practice */ tolstruct->split = DBL_EPSILON; /* Copy original data needed for refinement later */ Dcopy = (double*)malloc(n*sizeof(double)); assert(Dcopy!=NULL); memcpy(Dcopy, D, n*sizeof(double)); E2copy = (double*)malloc(n*sizeof(double)); assert(E2copy!=NULL); for (i=0; i<n-1; i++) E2copy[i] = E[i]*E[i]; } else { /* Neg. threshold forces old splitting criterion */ tolstruct->split = -DBL_EPSILON; *tryracp = 0; } if (!wantZ) { /* Compute eigenvalues to full precision */ tolstruct->rtol1 = 4.0 * DBL_EPSILON; tolstruct->rtol2 = 4.0 * DBL_EPSILON; } else { /* Do not compute to full accuracy first, but refine later */ tolstruct->rtol1 = sqrt(DBL_EPSILON); tolstruct->rtol1 = fmin(1e-2*MIN_RELGAP, tolstruct->rtol1); tolstruct->rtol2 = sqrt(DBL_EPSILON)*5.0E-3; tolstruct->rtol2 = fmin(5e-6*MIN_RELGAP, tolstruct->rtol2); tolstruct->rtol2 = fmax(4.0 * DBL_EPSILON, tolstruct->rtol2); } /* Compute all eigenvalues: sorted by block */ info = plarre(procinfo,jobz,range,Dstruct,Wstruct,tolstruct,nzp,offsetp); assert(info == 0); /* If just number of local eigenvectors are queried */ if (cntval & valeig) { clean_up(comm_dup, Werr, Wgap, gersch, iblock, iproc, Windex, isplit, Zindex, procinfo, Dstruct, Wstruct, Zstruct, tolstruct); return 0; } /* If only eigenvalues are to be computed */ if (!wantZ) { /* Refine to high relative with respect to input T */ if (*tryracp) { info = refine_to_highrac (procinfo, jobz, Dcopy, E2copy, Dstruct, nzp, Wstruct, tolstruct); assert(info == 0); } /* Sort eigenvalues */ qsort(W, n, sizeof(double), cmp); /* Only keep subset ifirst:ilast */ int ifirst, ilast, isize; int iil = *il; int iiu = *iu; int ifirst_tmp=iil; for (i=0; i<nproc; i++) { int chunk = (iiu-iil+1)/nproc + (i < (iiu-iil+1)%nproc); int ilast_tmp; if (i == nproc-1) { ilast_tmp = iiu; } else { ilast_tmp = ifirst_tmp + chunk - 1; ilast_tmp = imin(ilast_tmp, iiu); } if (i == pid) { ifirst = ifirst_tmp; ilast = ilast_tmp; isize = ilast - ifirst + 1; *offsetp = ifirst - iil; *nzp = isize; } ifirst_tmp = ilast_tmp + 1; ifirst_tmp = imin(ifirst_tmp, iiu + 1); } if (isize > 0) { memmove(W, &W[ifirst-1], *nzp * sizeof(double)); } /* If matrix was scaled, rescale eigenvalues */ invscale_eigenvalues(Wstruct, scale, *nzp); clean_up (comm_dup, Werr, Wgap, gersch, iblock, iproc, Windex, isplit, Zindex, procinfo, Dstruct, Wstruct, Zstruct, tolstruct); return 0; } /* end of only eigenvalues to compute */ /* Compute eigenvectors */ info = plarrv(procinfo, Dstruct, Wstruct, Zstruct, tolstruct, nzp, offsetp); assert(info == 0); /* Refine to high relative with respect to input matrix */ if (*tryracp) { info = refine_to_highrac(procinfo, jobz, Dcopy, E2copy, Dstruct, nzp, Wstruct, tolstruct); assert(info == 0); } /* If matrix was scaled, rescale eigenvalues */ invscale_eigenvalues(Wstruct, scale, n); /* Make the first nz elements of W contains the eigenvalues * associated to the process */ int j, im=0; for (j=0; j<n; j++) { if (iproc[j] == pid) { W[im] = W[j]; Windex[im] = Windex[j]; Zindex[im] = Zindex[j]; im++; } } clean_up(comm_dup, Werr, Wgap, gersch, iblock, iproc, Windex, isplit, Zindex, procinfo, Dstruct, Wstruct, Zstruct, tolstruct); if (*tryracp) { free(Dcopy); free(E2copy); } return 0; } /* end pmrrr */
int main(int argc, char *argv[]) { int rc, i, done, do_put_answer, all_ranks_put, work_unit_size, time_for_fake_work; int my_world_rank, nranks, num_work_units, num_answers, provided; int work_type, work_handle[HANDLE_SIZE], work_len, answer_rank; int num_handled_by_me; int max_message_size = 50; int req_types[4]; int num_types = 3; int type_vect[4] = { WORK, ANSWER }; int num_types_in_req; int final_rc; char thread_type[32]; char *work_unit_buf; char *findbuf = (char *)malloc(75); char *getbuf = (char *)malloc(75); char *ansbuf = (char *)malloc(75); double temptime; double start_job_time, end_put_time, start_work_time, end_work_time; double total_work_time, total_loop_time; double total_reserve_time, total_get_time; double total_put_time = 0.0; do_put_answer = DEFAULT_DO_PUT_ANSWER; /* will halt by exhaustion */ all_ranks_put = DEFAULT_ALL_RANKS_PUT; work_unit_size = DEFAULT_WORK_UNIT_SIZE; num_work_units = DEFAULT_NUM_WORK_UNITS; time_for_fake_work = DEFAULT_NSECS_FAKE_WORK; total_work_time = 0.0; total_loop_time = 0.0; total_reserve_time = 0.0; total_get_time = 0.0; for (i=1; i < argc; i++) { if (strcmp(argv[i],"-dpa") == 0) do_put_answer = 1; else if (strcmp(argv[i], "-alt") == 0) all_ranks_put = 1; else if (strcmp(argv[i],"-n") == 0) num_work_units = atoi(argv[++i]); else if (strcmp(argv[i],"-s") == 0) work_unit_size = atoi(argv[++i]); else if (strcmp(argv[i],"-t") == 0) time_for_fake_work = atoi(argv[++i]); else { printf("unrecognized cmd-line arg at %d :%s:\n", my_world_rank, i, argv[i]); exit(-1); } } rc = MPI_Init_thread(NULL,NULL,MPI_THREAD_MULTIPLE,&provided); if (rc != MPI_SUCCESS) { printf("MPI_Init_thread failed with rc=%d\n", rc); exit(-1); } MPI_Comm_size(MPI_COMM_WORLD,&nranks); MPI_Comm_rank(MPI_COMM_WORLD,&my_world_rank); num_handled_by_me = 0; work_unit_buf = (char *)malloc(work_unit_size); rc = PP_Init(SRVR_MAX_MALLOC_AMT,num_types,type_vect); if (rc != PP_SUCCESS) { MPI_Abort(MPI_COMM_WORLD, -1); exit(-1); } // print out info chart if (my_world_rank == 0) { printf("------------------------------------------------------------------------------\n"); printf("%1s%30s%2s%23s%2s%18s%2s\n", "|", "ARGUMENTS", "|", "RETURN CODES", "|", "WORK UNITS", "|"); printf("%1s%20s%3s%7d%2s%16s%3s%4d%2s%10s%3s%5d%2s\n", "|", "do_put_answer", "=", do_put_answer, "|", "PP_FAIL", "=", PP_FAIL, "|", "WORK", "=", WORK, "|"); printf("%1s%20s%3s%7d%2s%16s%3s%4d%2s%10s%3s%5d%2s\n", "|", "all_ranks_put", "=", all_ranks_put, "|", "PP_SUCCESS", "=", PP_SUCCESS, "|", "ANSWER", "=", ANSWER, "|"); printf("%1s%20s%3s%7d%2s%16s%3s%4d%2s%20s\n", "|", "num_work_units", "=", num_work_units, "|", "PP_NO_MORE_WORK", "=", PP_NO_MORE_WORK, "|", "|"); printf("%1s%20s%3s%7d%2s%16s%3s%4d%2s%20s\n", "|", "work_unit_size", "=", work_unit_size, "|", "PP_EXHAUSTION", "=", PP_EXHAUSTION, "|", "|"); printf("%1s%20s%3s%7d%2s%25s%20s\n", "|", "time_for_fake_work", "=", time_for_fake_work, "|", "|", "|"); printf("%1s%20s%3s%7d%2s%25s%20s\n", "|", "num_ranks", "=", nranks, "|", "|", "|"); printf("------------------------------------------------------------------------------\n"); printf("***\n"); } rc = MPI_Barrier( MPI_COMM_WORLD ); start_job_time = end_work_time = MPI_Wtime(); /* dummy val until set below */ int my_put_count = 0; if (all_ranks_put == 1) { num_answers = 0; for (i = 0; i < num_work_units; i++) { memset(work_unit_buf, 'X', work_unit_size); if (work_unit_size >= 20) sprintf(work_unit_buf, "workunit:r%d:u%d", my_world_rank, i); rc = PP_Put(work_unit_buf, work_unit_size, WORK, -1, 0, work_handle); my_put_count++; printf("rank=%2d PUT rc=%d data=%s handle_key=%d:%d\n", my_world_rank, rc, work_unit_buf, work_handle[0], work_handle[1]); } total_put_time = MPI_Wtime() - start_job_time; num_work_units *= nranks; } else { if (my_world_rank == 0) { /* if master app, put work */ num_answers = 0; for (i=0; i < num_work_units; i++) { memset(work_unit_buf, 'X', work_unit_size); if (work_unit_size >= 20) sprintf(work_unit_buf,"workunit:r%d:u%d", my_world_rank, i); rc = PP_Put( work_unit_buf, work_unit_size, WORK, -1, -1, work_handle); my_put_count++; printf("rank=%2d PUT rc=%d DATA=%s handle_key=%d:%d\n", my_world_rank, rc, work_unit_buf, work_handle[0], work_handle[1]); } total_put_time = MPI_Wtime() - start_job_time; } } rc = MPI_Barrier( MPI_COMM_WORLD ); end_put_time = start_work_time = MPI_Wtime(); done = 0; while ( !done ) { if (do_put_answer) { if (my_world_rank == 0) { req_types[0] = ANSWER; req_types[1] = WORK; num_types_in_req = 2; } else { req_types[0] = WORK; num_types_in_req = 1; } } else { req_types[0] = WORK; num_types_in_req = 1; } temptime = MPI_Wtime(); memset(findbuf, ' ', max_message_size); memset(getbuf, ' ', max_message_size); memset(ansbuf, ' ', max_message_size); // if all ranks put data targeted to rank 0, rank 0 should do a "find" rather than find and reserve if (all_ranks_put) { //printf("Before find on Rank %d\n", my_world_rank); rc = PP_Find(num_types_in_req, req_types, &work_len, &work_type, work_handle); //printf("Find rc=%d\n", rc); if (rc == PP_SUCCESS) { sprintf(findbuf, "FIND: rc=%d h_key=%d:%d type=%d size=%d", rc, work_handle[0], work_handle[1], work_type, work_len); } else { sprintf(findbuf, "FIND: rc=%d", rc); } } else { rc = PP_FindAndReserve(num_types_in_req, req_types, &work_len, &work_type, work_handle); if (rc == PP_SUCCESS) { sprintf(findbuf, "FIND: rc=%d h_key=%d:%d type=%d size=%d", rc, work_handle[0], work_handle[1], work_type, work_len); } else { sprintf(findbuf, "FIND: rc=%d", rc); } } if (rc == PP_EXHAUSTION) { MPI_Barrier(MPI_COMM_WORLD); printf("rank=%2d Terminated by EXHAUSTION\n", my_world_rank); final_rc = rc; break; } else if ( rc == PP_NO_MORE_WORK ) { MPI_Barrier(MPI_COMM_WORLD); printf("rank=%2d Terminated by NO_MORE_WORK\n", my_world_rank); final_rc = rc; break; } if (work_type == WORK) { total_reserve_time += MPI_Wtime() - temptime; /* only count for work */ temptime = MPI_Wtime(); //if(my_world_rank ==1) // printf("Starting Get on rank1\n"); rc = PP_Get(work_unit_buf, work_handle); total_get_time += (MPI_Wtime() - temptime); if (rc == PP_SUCCESS) { sprintf(getbuf, "GET: rc=%d h_key=%d:%d data=%s", rc, work_handle[0], work_handle[1], work_unit_buf); } else { sprintf(getbuf, "GET: rc=%d", rc); } /* got good work, do dummy/fake work */ num_handled_by_me++; temptime = MPI_Wtime(); while (1) { for (i=0; i < 1000000; i++) ; if (((MPI_Wtime() - temptime) * 1000) > time_for_fake_work) break; } if (do_put_answer) { rc = PP_Put(NULL, 0, ANSWER, -1, -1, work_handle); sprintf(ansbuf, "PUT_ANS: rc=%d h_key=%d:%d", rc, work_handle[0], work_handle[1]); } end_work_time = MPI_Wtime(); /* chgs on each work unit */ } else if (work_type == ANSWER) { num_answers++; sprintf(getbuf, "FOUND_ANS: num_answers: %d", num_answers); if (all_ranks_put) { rc = PP_Get(work_unit_buf, work_handle); } if (num_answers >= num_work_units) { PP_Set_problem_done(); } } else { printf("rank=%2d ERROR UNEXPECTED_WORK_TYPE=%d\n", my_world_rank, work_type ); PP_Abort(-1); } printf("rank=%2d %-45s %-45s %-45s\n", my_world_rank, findbuf, getbuf, ansbuf); if (work_type == ANSWER && num_answers >= num_work_units) { printf("rank=%2d PP_Set_problem_done() has been called\n", my_world_rank); } } rc = MPI_Barrier( MPI_COMM_WORLD ); total_loop_time = MPI_Wtime() - start_work_time; float avg_work_time, avg_reserve_time, avg_get_time; avg_work_time = ((float)total_loop_time) / ((float)num_handled_by_me); int *ar_num_handled_by_rank = NULL; int *ar_final_rc = NULL; int *ar_put_counts = NULL; double *ar_total_reserve_time = NULL; double *ar_total_get_time = NULL; double *ar_total_put_time = NULL; if (my_world_rank == 0) { ar_num_handled_by_rank = (int *)malloc(nranks * sizeof(int)); ar_final_rc = (int *)malloc(nranks * sizeof(int)); ar_put_counts = (int *)malloc(nranks * sizeof(int)); ar_total_reserve_time = (double *)malloc(nranks * sizeof(double)); ar_total_get_time = (double *)malloc(nranks * sizeof(double)); ar_total_put_time = (double *)malloc(nranks * sizeof(double)); } MPI_Gather(&num_handled_by_me, 1, MPI_INT, ar_num_handled_by_rank, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Gather(&final_rc, 1, MPI_INT, ar_final_rc, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Gather(&my_put_count, 1, MPI_INT, ar_put_counts, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Gather(&total_reserve_time, 1, MPI_DOUBLE, ar_total_reserve_time, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Gather(&total_get_time, 1, MPI_DOUBLE, ar_total_get_time, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Gather(&total_put_time, 1, MPI_DOUBLE, ar_total_put_time, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); int grand_total_work = 0; int grand_total_puts = 0; double grand_total_resv = 0; double grand_total_get = 0; double grand_total_put = 0; if (my_world_rank == 0) { printf("***\n"); printf("----Stats:----\n"); printf("%10s%15s%15s%20s%20s%20s%15s%20s%20s\n", "RANK", "NUM_PUTS", "WORK_UNITS", "TOTAL_PUT_TIME", "TOTAL_RESV_TIME", "TOTAL_GET_TIME", "FINAL_RC", "AVG_WORK_TIME", "WORK_PER_SECOND"); for (i=0; i < nranks; i++) { printf("%10d%15d%15d%20.5f%20.5f%20.5f%15d%20.5f%20.5f\n", i, ar_put_counts[i], ar_num_handled_by_rank[i], ar_total_put_time[i], ar_total_reserve_time[i], ar_total_get_time[i], ar_final_rc[i], ((double)total_loop_time / (double)ar_num_handled_by_rank[i]), (((double)ar_num_handled_by_rank[i]) / total_loop_time)); grand_total_work += ar_num_handled_by_rank[i]; grand_total_puts += ar_put_counts[i]; grand_total_resv += ar_total_reserve_time[i]; grand_total_get += ar_total_get_time[i]; grand_total_put += ar_total_put_time[i]; } printf("%10s%15s%15s%20s%20s%20s\n", "-", "-", "-", "-", "-", "-"); printf("%10s%15d%15d%20.5f%20.5f%20.5f\n", "TOTALS:", grand_total_puts, grand_total_work, grand_total_put, grand_total_resv, grand_total_get); printf("%11s%.5f\n", "TIME: ", total_loop_time); } PP_Finalize(); rc = MPI_Finalized(&i); if ( ! i) MPI_Finalize(); return 0; }
void myhbwmalloc_init(void) { /* set to NULL before trying to initialize. if we return before * successful creation of the mspace, then it will still be NULL, * and we can use that in subsequent library calls to determine * that the library failed to initialize. */ myhbwmalloc_mspace = NULL; /* verbose printout? */ myhbwmalloc_verbose = 0; { char * env_char = getenv("HBWMALLOC_VERBOSE"); if (env_char != NULL) { myhbwmalloc_verbose = 1; printf("hbwmalloc: HBWMALLOC_VERBOSE set\n"); } } /* fail hard or soft? */ myhbwmalloc_hardfail = 1; { char * env_char = getenv("HBWMALLOC_SOFTFAIL"); if (env_char != NULL) { myhbwmalloc_hardfail = 0; printf("hbwmalloc: HBWMALLOC_SOFTFAIL set\n"); } } /* set the atexit handler that will destroy the mspace and free the numa allocation */ atexit(myhbwmalloc_final); /* detect and configure use of NUMA memory nodes */ { int max_possible_node = numa_max_possible_node(); int num_possible_nodes = numa_num_possible_nodes(); int max_numa_nodes = numa_max_node(); int num_configured_nodes = numa_num_configured_nodes(); int num_configured_cpus = numa_num_configured_cpus(); if (myhbwmalloc_verbose) { printf("hbwmalloc: numa_max_possible_node() = %d\n", max_possible_node); printf("hbwmalloc: numa_num_possible_nodes() = %d\n", num_possible_nodes); printf("hbwmalloc: numa_max_node() = %d\n", max_numa_nodes); printf("hbwmalloc: numa_num_configured_nodes() = %d\n", num_configured_nodes); printf("hbwmalloc: numa_num_configured_cpus() = %d\n", num_configured_cpus); } /* FIXME this is a hack. assumes HBW is only numa node 1. */ if (num_configured_nodes <= 2) { myhbwmalloc_numa_node = num_configured_nodes-1; } else { fprintf(stderr,"hbwmalloc: we support only 2 numa nodes, not %d\n", num_configured_nodes); } if (myhbwmalloc_verbose) { for (int i=0; i<num_configured_nodes; i++) { unsigned max_numa_cpus = numa_num_configured_cpus(); struct bitmask * mask = numa_bitmask_alloc( max_numa_cpus ); int rc = numa_node_to_cpus(i, mask); if (rc != 0) { fprintf(stderr, "hbwmalloc: numa_node_to_cpus failed\n"); } else { printf("hbwmalloc: numa node %d cpu mask:", i); for (unsigned j=0; j<max_numa_cpus; j++) { int bit = numa_bitmask_isbitset(mask,j); printf(" %d", bit); } printf("\n"); } numa_bitmask_free(mask); } fflush(stdout); } } #if 0 /* unused */ /* see if the user specifies a slab size */ size_t slab_size_requested = 0; { char * env_char = getenv("HBWMALLOC_BYTES"); if (env_char!=NULL) { long units = 1L; if ( NULL != strstr(env_char,"G") ) units = 1000000000L; else if ( NULL != strstr(env_char,"M") ) units = 1000000L; else if ( NULL != strstr(env_char,"K") ) units = 1000L; else units = 1L; int num_count = strspn(env_char, "0123456789"); memset( &env_char[num_count], ' ', strlen(env_char)-num_count); slab_size_requested = units * atol(env_char); } if (myhbwmalloc_verbose) { printf("hbwmalloc: requested slab_size_requested = %zu\n", slab_size_requested); } } #endif /* see what libnuma says is available */ size_t myhbwmalloc_slab_size; { int node = myhbwmalloc_numa_node; long long freemem; long long maxmem = numa_node_size64(node, &freemem); if (myhbwmalloc_verbose) { printf("hbwmalloc: numa_node_size64 says maxmem=%lld freemem=%lld for numa node %d\n", maxmem, freemem, node); } myhbwmalloc_slab_size = freemem; } /* assume threads, disable if MPI knows otherwise, then allow user to override. */ int multithreaded = 1; #ifdef HAVE_MPI int nprocs; { int is_init, is_final; MPI_Initialized(&is_init); MPI_Finalized(&is_final); if (is_init && !is_final) { MPI_Comm_size(MPI_COMM_WORLD, &nprocs); } /* give equal portion to every MPI process */ myhbwmalloc_slab_size /= nprocs; /* if the user initializes MPI with MPI_Init or * MPI_Init_thread(MPI_THREAD_SINGLE), they assert there * are no threads at all, which means we can skip the * malloc mspace lock. * * if the user lies to MPI, they deserve any bad thing * that comes of it. */ int provided; MPI_Query_thread(&provided); if (provided==MPI_THREAD_SINGLE) { multithreaded = 0; } else { multithreaded = 1; } if (myhbwmalloc_verbose) { printf("hbwmalloc: MPI processes = %d (threaded = %d)\n", nprocs, multithreaded); printf("hbwmalloc: myhbwmalloc_slab_size = %d\n", myhbwmalloc_slab_size); } } #endif /* user can assert that hbwmalloc and friends need not be thread-safe */ { char * env_char = getenv("HBWMALLOC_LOCKLESS"); if (env_char != NULL) { multithreaded = 0; if (myhbwmalloc_verbose) { printf("hbwmalloc: user has disabled locking in mspaces by setting HBWMALLOC_LOCKLESS\n"); } } } myhbwmalloc_slab = numa_alloc_onnode( myhbwmalloc_slab_size, myhbwmalloc_numa_node); if (myhbwmalloc_slab==NULL) { fprintf(stderr, "hbwmalloc: numa_alloc_onnode returned NULL for size = %zu\n", myhbwmalloc_slab_size); return; } else { if (myhbwmalloc_verbose) { printf("hbwmalloc: numa_alloc_onnode succeeded for size %zu\n", myhbwmalloc_slab_size); } /* part (less than 128*sizeof(size_t) bytes) of this space is used for bookkeeping, * so the capacity must be at least this large */ if (myhbwmalloc_slab_size < 128*sizeof(size_t)) { fprintf(stderr, "hbwmalloc: not enough space for mspace bookkeeping\n"); return; } /* see above regarding if the user lies to MPI. */ int locked = multithreaded; myhbwmalloc_mspace = create_mspace_with_base( myhbwmalloc_slab, myhbwmalloc_slab_size, locked); if (myhbwmalloc_mspace == NULL) { fprintf(stderr, "hbwmalloc: create_mspace_with_base returned NULL\n"); return; } else if (myhbwmalloc_verbose) { printf("hbwmalloc: create_mspace_with_base succeeded for size %zu\n", myhbwmalloc_slab_size); } } }
/*------------------------------------------------------------------------- * Function: h5_show_hostname * * Purpose: Show hostname. Show process ID if in MPI environment. * * Return: void * * Programmer: Albert Cheng * 2002/04/22 * * Modifications: * *------------------------------------------------------------------------- */ void h5_show_hostname(void) { char hostname[80]; #ifdef H5_HAVE_WIN32_API WSADATA wsaData; int err; #endif /* try show the process or thread id in multiple processes cases*/ #ifdef H5_HAVE_PARALLEL { int mpi_rank, mpi_initialized, mpi_finalized; MPI_Initialized(&mpi_initialized); MPI_Finalized(&mpi_finalized); if(mpi_initialized && !mpi_finalized) { MPI_Comm_rank(MPI_COMM_WORLD,&mpi_rank); printf("MPI-process %d.", mpi_rank); } else printf("thread 0."); } #elif defined(H5_HAVE_THREADSAFE) printf("thread %lu.", HDpthread_self_ulong()); #else printf("thread 0."); #endif #ifdef H5_HAVE_WIN32_API err = WSAStartup( MAKEWORD(2,2), &wsaData ); if ( err != 0 ) { /* could not find a usable WinSock DLL */ return; } /* Confirm that the WinSock DLL supports 2.2.*/ /* Note that if the DLL supports versions greater */ /* than 2.2 in addition to 2.2, it will still return */ /* 2.2 in wVersion since that is the version we */ /* requested. */ if ( LOBYTE( wsaData.wVersion ) != 2 || HIBYTE( wsaData.wVersion ) != 2 ) { /* could not find a usable WinSock DLL */ WSACleanup( ); return; } #endif #ifdef H5_HAVE_GETHOSTNAME if (gethostname(hostname, (size_t)80) < 0) printf(" gethostname failed\n"); else printf(" hostname=%s\n", hostname); #else printf(" gethostname not supported\n"); #endif #ifdef H5_HAVE_WIN32_API WSACleanup(); #endif }