MPI_Comm NM_MPI_com(NumericsMatrix* A) { if (!A || (A && NM_linearSolverParams(A)->mpi_com == MPI_COMM_NULL)) { int myid; int argc = 0; /* C99 requires that argv[argc] == NULL. With openmpi 1.8, we get a * segfault if this is not true */ char *argv0 = NULL; char **argv = &argv0; CHECK_MPI(MPI_Init(&argc, &argv)); CHECK_MPI(MPI_Comm_rank(MPI_COMM_WORLD, &myid)); if (A) { NM_linearSolverParams(A)->mpi_com = MPI_COMM_WORLD; NM_linearSolverParams(A)->mpi_com_init = 1; } } if(A) { return NM_linearSolverParams(A)->mpi_com; } else { return MPI_COMM_WORLD; } }
int main(int argc, char ** argv) { int rc; /* These are the desired and available thread support. A hybrid code where all MPI calls are made from the main thread can used FUNNELED. If threads are making MPI calls, MULTIPLE is appropriate. */ int requested = MPI_THREAD_FUNNELED, provided; /* MPICH2 will be substantially more efficient than OpenMPI for MPI_THREAD_{FUNNELED,SERIALIZED} but this is unlikely to be a serious bottleneck. */ rc = MPI_Init_thread(&argc, &argv, requested, &provided); CHECK_MPI(rc); if (provided<requested) { printf("MPI_Init_thread provided %s when %s was requested. Exiting. \n", MPI_THREAD_STRING(provided), MPI_THREAD_STRING(requested) ); exit(1); } int world_size, world_rank; rc = MPI_Comm_size(MPI_COMM_WORLD,&world_size); CHECK_MPI(rc); rc = MPI_Comm_rank(MPI_COMM_WORLD,&world_rank); CHECK_MPI(rc); int root = 0, count = 1; /* the ternary is often branchless... */ long i, n = (argc>1 ? atol(argv[1]) : 100000); rc = MPI_Bcast(&n, count, MPI_LONG, root, MPI_COMM_WORLD); CHECK_MPI(rc); if (world_rank==0) printf("%d: using %ld samples.\n", world_rank, world_size*n); /* seed the RNG with something unique to a rank */ srand(world_rank); long in = 0, total = 0; for (i=0;i<n;i++) { register double x = (double)rand()/(double)RAND_MAX; register double y = (double)rand()/(double)RAND_MAX; register double z = x*x + y*y; if (z<1.0) in++; } rc = MPI_Reduce(&in, &total, count, MPI_LONG, MPI_SUM, root, MPI_COMM_WORLD); CHECK_MPI(rc); double pi = 4.0*(double)total/(world_size*n); if (world_rank==0) printf("%d: pi = %12.8lf.\n", world_rank, pi); MPI_Finalize(); return 0; }
void sopalin_launch_thread(void * sopalin_data_ref, PASTIX_INT procnum, PASTIX_INT procnbr, void *ptr, PASTIX_INT verbose, PASTIX_INT calc_thrdnbr, void * (*calc_routine)(void *), void *calc_data, PASTIX_INT comm_thrdnbr, void * (*comm_routine)(void *), void *comm_data, PASTIX_INT ooc_thrdnbr, void * (*ooc_routine)(void *), void *ooc_data) { sopthread_data_t *d = NULL; pthread_t *calltab = NULL; PASTIX_INT i; PASTIX_INT ret; PASTIX_INT thrdnbr; PASTIX_INT thrdnbr_wo_ooc; Sopalin_Data_t *sopalin_data = sopalin_data_ref; (void)procnbr; (void)ptr; thrdnbr = calc_thrdnbr + comm_thrdnbr + ooc_thrdnbr ; thrdnbr_wo_ooc = calc_thrdnbr + comm_thrdnbr; if (verbose > API_VERBOSE_NO) print_one("Launching %d threads" " (%d commputation, %d communication, %d out-of-core)\n", (int) thrdnbr, (int)calc_thrdnbr, (int)comm_thrdnbr, (int)ooc_thrdnbr); MALLOC_INTERN(calltab, thrdnbr, pthread_t); MALLOC_INTERN(d, thrdnbr, sopthread_data_t); if (calc_thrdnbr > 1) { int comm_size; CHECK_MPI(MPI_Comm_size(MPI_COMM_WORLD, &comm_size)); if (comm_size > 1) CHECK_THREAD_LEVEL(sopalin_data->sopar->iparm[IPARM_THREAD_COMM_MODE]); } /* Lancement des threads de calcul */ for (i=0;i<calc_thrdnbr;i++) { pthread_attr_t attr; pthread_attr_init(&attr); #ifdef MARCEL2 { int cpu = (i+procnum*thrdnbr)%sysconf(_SC_NPROCESSORS_ONLN); if (thrdnbr <= sysconf(_SC_NPROCESSORS_ONLN)) marcel_attr_setvpset(&attr, MARCEL_VPSET_VP(cpu)); } #endif d[i].me = i; d[i].data = calc_data; ret = pthread_create(&calltab[i],&attr,calc_routine,(void *)&d[i]); if (ret) {errorPrint("thread create."); EXIT(MOD_SOPALIN,THREAD_ERR);} } /* Lancement des threads de chargement ooc */ for (i=thrdnbr_wo_ooc; i<thrdnbr; i++) { pthread_attr_t attr; pthread_attr_init(&attr); #ifdef MARCEL2 { int cpu = (i+procnum*thrdnbr)%sysconf(_SC_NPROCESSORS_ONLN); if (thrdnbr <= sysconf(_SC_NPROCESSORS_ONLN)) marcel_attr_setvpset(&attr, MARCEL_VPSET_VP(cpu)); } #endif d[i].me = i; d[i].data = ooc_data; ret = pthread_create(&calltab[i],&attr,ooc_routine,(void *)&d[i]); if (ret) {errorPrint("thread create."); EXIT(MOD_SOPALIN,THREAD_ERR);} } /* Lancement des threads de communication */ if ((comm_thrdnbr > 0) && (comm_routine != NULL)) { /* print_one("-- Options Communication --\n"); */ /* print_one(" - Type : %d\n", sopar->type_comm); */ /* print_one(" - Nbthread : %d\n", sopar->nbthrdcomm); */ if (comm_thrdnbr > 1) { for (i=calc_thrdnbr;i<thrdnbr_wo_ooc;i++) { pthread_attr_t attr; pthread_attr_init(&attr); d[i].me = i; d[i].data = comm_data; ret = pthread_create(&calltab[i], &attr, comm_routine, (void *)&d[i]); if (ret) { errorPrint("thread create."); EXIT(MOD_SOPALIN,THREAD_ERR); } } } else { d[calc_thrdnbr].me = calc_thrdnbr; d[calc_thrdnbr].data = comm_data; comm_routine((void*)&d[calc_thrdnbr]); } } /* Recuperation de tous les threads lancés */ for (i=0;i<thrdnbr;i++) { /* On ne recupere pas le thread qd il a pas été lancé */ if ((comm_thrdnbr == 1) && (i == calc_thrdnbr)) continue; ret = pthread_join(calltab[i],(void**)NULL); if (ret) {errorPrint("thread join."); EXIT(MOD_SOPALIN,THREAD_ERR);} } memFree_null(calltab); memFree_null(d); }