extern "C" magma_int_t magma_dbulge_back_m(magma_int_t nrgpu, magma_int_t threads, char uplo, magma_int_t n, magma_int_t nb, magma_int_t ne, magma_int_t Vblksiz, double *Z, magma_int_t ldz, double *V, magma_int_t ldv, double *TAU, double *T, magma_int_t ldt, magma_int_t* info) { magma_setlapack_numthreads(1); double timeaplQ2=0.0; double f= 1.; magma_int_t n_gpu = ne; //#if defined(PRECISION_s) || defined(PRECISION_d) // double gpu_cpu_perf = 32; //gpu over cpu performance //#else // double gpu_cpu_perf = 32; // gpu over cpu performance //#endif double perf_temp= .85; double perf_temp2= perf_temp; for (magma_int_t itmp=1; itmp<nrgpu; ++itmp) perf_temp2*=perf_temp; magma_int_t gpu_cpu_perf = magma_get_dbulge_gcperf(); if(threads>1){ f = 1. / (1. + (double)(threads-1)/ ((double)gpu_cpu_perf*(1.-perf_temp2)/(1.-perf_temp))); n_gpu = (magma_int_t)(f*ne); } /**************************************************** * apply V2 from left to the eigenvectors Z. dZ = (I-V2*T2*V2')*Z * **************************************************/ timeaplQ2 = magma_wtime(); /*============================ * use GPU+CPU's *==========================*/ //n_gpu = ne; if(n_gpu < ne) { // define the size of Q to be done on CPU's and the size on GPU's // note that GPU use Q(1:N_GPU) and CPU use Q(N_GPU+1:N) #ifdef ENABLE_DEBUG printf("---> calling GPU + CPU(if N_CPU>0) to apply V2 to Z with NE %d N_GPU %d N_CPU %d\n",ne, n_gpu, ne-n_gpu); #endif magma_dapplyQ_m_data data_applyQ(nrgpu, threads, n, ne, n_gpu, nb, Vblksiz, Z, ldz, V, ldv, TAU, T, ldt); magma_dapplyQ_m_id_data* arg; magma_malloc_cpu((void**) &arg, threads*sizeof(magma_dapplyQ_m_id_data)); pthread_t* thread_id; magma_malloc_cpu((void**) &thread_id, threads*sizeof(pthread_t)); pthread_attr_t thread_attr; // =============================== // relaunch thread to apply Q // =============================== // Set one thread per core pthread_attr_init(&thread_attr); pthread_attr_setscope(&thread_attr, PTHREAD_SCOPE_SYSTEM); pthread_setconcurrency(threads); // Launch threads for (magma_int_t thread = 1; thread < threads; thread++) { arg[thread] = magma_dapplyQ_m_id_data(thread, &data_applyQ); pthread_create(&thread_id[thread], &thread_attr, magma_dapplyQ_m_parallel_section, &arg[thread]); } arg[0] = magma_dapplyQ_m_id_data(0, &data_applyQ); magma_dapplyQ_m_parallel_section(&arg[0]); // Wait for completion for (magma_int_t thread = 1; thread < threads; thread++) { void *exitcodep; pthread_join(thread_id[thread], &exitcodep); } magma_free_cpu(thread_id); magma_free_cpu(arg); /*============================ * use only GPU *==========================*/ }else{ magma_dbulge_applyQ_v2_m(nrgpu, 'L', ne, n, nb, Vblksiz, Z, ldz, V, ldv, T, ldt, info); magma_device_sync(); } timeaplQ2 = magma_wtime()-timeaplQ2; magma_setlapack_numthreads(threads); return MAGMA_SUCCESS; }
/** Purpose ------- Arguments --------- @param[in] uplo magma_uplo_t - = MagmaUpper: Upper triangles of A is stored; - = MagmaLower: Lower triangles of A is stored. @param[in] n INTEGER The order of the matrix A. N >= 0. @param[in] nb INTEGER The order of the band matrix A. N >= NB >= 0. @param[in] Vblksiz INTEGER The size of the block of householder vectors applied at once. @param[in] A (workspace) COMPLEX array, dimension (LDA, N) On entry the band matrix stored in the following way: @param[in] lda INTEGER The leading dimension of the array A. LDA >= 2*NB. @param[out] d DOUBLE array, dimension (N) The diagonal elements of the tridiagonal matrix T: D(i) = A(i,i). @param[out] e DOUBLE array, dimension (N-1) The off-diagonal elements of the tridiagonal matrix T: E(i) = A(i,i+1) if UPLO = MagmaUpper, E(i) = A(i+1,i) if UPLO = MagmaLower. @param[out] V COMPLEX array, dimension (BLKCNT, LDV, VBLKSIZ) On exit it contains the blocks of householder reflectors BLKCNT is the number of block and it is returned by the funtion MAGMA_BULGE_GET_BLKCNT. @param[in] ldv INTEGER The leading dimension of V. LDV > NB + VBLKSIZ + 1 @param[out] TAU COMPLEX dimension(BLKCNT, VBLKSIZ) ??? @param[in] compT INTEGER if COMPT = 0 T is not computed if COMPT = 1 T is computed @param[out] T COMPLEX dimension(LDT *) if COMPT = 1 on exit contains the matrices T needed for Q2 if COMPT = 0 T is not referenced @param[in] ldt INTEGER The leading dimension of T. LDT > Vblksiz @ingroup magma_cheev_2stage ********************************************************************/ extern "C" magma_int_t magma_chetrd_hb2st( magma_uplo_t uplo, magma_int_t n, magma_int_t nb, magma_int_t Vblksiz, magmaFloatComplex *A, magma_int_t lda, float *d, float *e, magmaFloatComplex *V, magma_int_t ldv, magmaFloatComplex *TAU, magma_int_t compT, magmaFloatComplex *T, magma_int_t ldt) { #ifdef ENABLE_TIMER real_Double_t timeblg=0.0; #endif magma_int_t threads = magma_get_parallel_numthreads(); magma_int_t mklth = magma_get_lapack_numthreads(); magma_set_lapack_numthreads(1); //const char* uplo_ = lapack_uplo_const( uplo ); magma_int_t INgrsiz=1; magma_int_t blkcnt = magma_bulge_get_blkcnt(n, nb, Vblksiz); magma_int_t nbtiles = magma_ceildiv(n, nb); memset(T, 0, blkcnt*ldt*Vblksiz*sizeof(magmaFloatComplex)); memset(TAU, 0, blkcnt*Vblksiz*sizeof(magmaFloatComplex)); memset(V, 0, blkcnt*ldv*Vblksiz*sizeof(magmaFloatComplex)); volatile magma_int_t* prog; magma_malloc_cpu((void**) &prog, (2*nbtiles+threads+10)*sizeof(magma_int_t)); memset((void *) prog, 0, (2*nbtiles+threads+10)*sizeof(magma_int_t)); magma_cbulge_id_data* arg; magma_malloc_cpu((void**) &arg, threads*sizeof(magma_cbulge_id_data)); pthread_t* thread_id; magma_malloc_cpu((void**) &thread_id, threads*sizeof(pthread_t)); pthread_attr_t thread_attr; magma_cbulge_data data_bulge; magma_cbulge_data_init(&data_bulge, threads, n, nb, nbtiles, INgrsiz, Vblksiz, compT, A, lda, V, ldv, TAU, T, ldt, prog); // Set one thread per core pthread_attr_init(&thread_attr); pthread_attr_setscope(&thread_attr, PTHREAD_SCOPE_SYSTEM); pthread_setconcurrency(threads); //timing #ifdef ENABLE_TIMER timeblg = magma_wtime(); #endif // Launch threads for (magma_int_t thread = 1; thread < threads; thread++) { magma_cbulge_id_data_init(&(arg[thread]), thread, &data_bulge); pthread_create(&thread_id[thread], &thread_attr, magma_chetrd_hb2st_parallel_section, &arg[thread]); } magma_cbulge_id_data_init(&(arg[0]), 0, &data_bulge); magma_chetrd_hb2st_parallel_section(&arg[0]); // Wait for completion for (magma_int_t thread = 1; thread < threads; thread++) { void *exitcodep; pthread_join(thread_id[thread], &exitcodep); } // timing #ifdef ENABLE_TIMER timeblg = magma_wtime()-timeblg; printf(" time BULGE+T = %f\n", timeblg); #endif magma_free_cpu(thread_id); magma_free_cpu(arg); magma_free_cpu((void *) prog); magma_cbulge_data_destroy(&data_bulge); magma_set_lapack_numthreads(mklth); /*================================================ * store resulting diag and lower diag d and e * note that d and e are always real *================================================*/ /* Make diagonal and superdiagonal elements real, * storing them in d and e */ /* In complex case, the off diagonal element are * not necessary real. we have to make off-diagonal * elements real and copy them to e. * When using HouseHolder elimination, * the CLARFG give us a real as output so, all the * diagonal/off-diagonal element except the last one are already * real and thus we need only to take the abs of the last * one. * */ #if defined(PRECISION_z) || defined(PRECISION_c) if (uplo == MagmaLower) { for (magma_int_t i=0; i < n-1; i++) { d[i] = MAGMA_C_REAL( A[i*lda ] ); e[i] = MAGMA_C_REAL( A[i*lda+1] ); } d[n-1] = MAGMA_C_REAL(A[(n-1)*lda]); } else { /* MagmaUpper not tested yet */ for (magma_int_t i=0; i < n-1; i++) { d[i] = MAGMA_C_REAL( A[i*lda+nb] ); e[i] = MAGMA_C_REAL( A[i*lda+nb-1] ); } d[n-1] = MAGMA_C_REAL(A[(n-1)*lda+nb]); } /* end MagmaUpper */ #else if ( uplo == MagmaLower ) { for (magma_int_t i=0; i < n-1; i++) { d[i] = A[i*lda]; // diag e[i] = A[i*lda+1]; // lower diag } d[n-1] = A[(n-1)*lda]; } else { for (magma_int_t i=0; i < n-1; i++) { d[i] = A[i*lda+nb]; // diag e[i] = A[i*lda+nb-1]; // lower diag } d[n-1] = A[(n-1)*lda+nb]; } #endif return MAGMA_SUCCESS; }
//__________________________________________________________________________________ int main (int argc, char* argv[]) { mainArgCount = argc - 1; #ifdef __HYPHYMPI__ int rank, size; MPI_Init (&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); setParameter (mpiNodeID, (_Parameter)rank); setParameter (mpiNodeCount, (_Parameter)size); _hy_mpi_node_rank = rank; if (rank == 0) { #endif //for (long k=0; k<NSIG; k++) //{ // signal(k, &hyphyBreak); //} #ifdef __HYPHYMPI__ } #endif char curWd[4096], dirSlash = GetPlatformDirectoryChar (); getcwd (curWd,4096); _String baseDir (curWd), argFile; baseDir=baseDir & dirSlash; pathNames&& &baseDir; baseDirectory = baseDir; baseArgDir = baseDirectory; _ExecutionList ex; #ifdef _OPENMP systemCPUCount = omp_get_max_threads(); #endif for (long i=1; i<argc;i++) { _String thisArg (argv[i]); if (thisArg.sData[0]=='-') { ProcessConfigStr (thisArg); } else if (thisArg.beginswith ("BASEPATH=")) { baseArgDir = thisArg.Cut(9,-1); if (baseArgDir.sLength) { if (baseArgDir.sData[baseArgDir.sLength-1]!=dirSlash) baseArgDir = baseArgDir&dirSlash; baseDirectory = baseArgDir; } } else if (thisArg.beginswith ("USEPATH=")) { baseDir = thisArg.Cut(8,-1); errorFileName = baseDir & errorFileName; messageFileName = baseDir & messageFileName; pathNames.Delete (0); pathNames&& &baseDir; } else #ifdef __MP__ if (thisArg.beginswith ("CPU=")) { _String cpus = thisArg.Cut(4,-1); systemCPUCount = cpus.toNum(); if (systemCPUCount<1) systemCPUCount = 1; #ifdef __MP2__ pthread_setconcurrency (systemCPUCount+1); #endif } else #endif #ifdef __HYPHYMPI__ if (thisArg == _String("MPIOPTIMIZER")) { mpiParallelOptimizer = true; setParameter (mpiNodeCount, 0.0); } else if (thisArg == _String("MPIPARTITIONS")) { mpiPartitionOptimizer = true; setParameter (mpiNodeCount, 0.0); } else #endif argFile = thisArg; } GlobalStartup(); if (calculatorMode) { printf ("\nHYPHY is running in calculator mode. Type 'exit' when you are finished.\n"); while (ExpressionCalculator()) ; return 0; } if (pipeMode) { _String bfIn (stdin); _ExecutionList exIn (bfIn); exIn.Execute(); GlobalShutdown(); return 0; } // try to read the preferences _String prefFile (curWd); prefFile = prefFile & '/' & prefFileName; FILE * testPrefFile = fopen (prefFile.sData,"r"); if (!testPrefFile) { prefFile = baseArgDir & prefFileName; testPrefFile = fopen (prefFile.sData,"r"); } if (testPrefFile) { fclose(testPrefFile); ReadBatchFile (prefFile,ex); ex.Execute(); ex.Clear(); } //printf ("Node %d before mpiParallelOptimizer\n", rank); #ifdef __HYPHYMPI__ if (rank>0) { if (mpiParallelOptimizer || mpiPartitionOptimizer) mpiOptimizerLoop (rank, size); else mpiNormalLoop (rank, size, baseDir); /*argFile = "SHUTDOWN_CONFIRM"; MPISendString (argFile, senderID);*/ } else { #endif if (!argFile.sLength) { long selection = -2; if (!updateMode) selection = DisplayListOfChoices(); if (selection == -1) { dialogPrompt = "Batch file to run:"; _String fStr (ReturnDialogInput (true)); if (logInputMode) { _String tts = loggedFileEntry&fStr; loggedUserInputs && & tts; } PushFilePath (fStr); ReadBatchFile (fStr,ex); } else { _String templ; if (selection >= 0) templ = baseArgDir &"TemplateBatchFiles" & dirSlash; else templ = baseArgDir & "TemplateBatchFiles" & dirSlash & "WebUpdate.bf"; if (selection >= 0) templ= templ&*(_String*)(*(_List*)availableTemplateFiles(selection))(2); PushFilePath (templ); ReadBatchFile (templ,ex); } } else { #ifndef __MINGW32__ if (argFile.sData[0] != '/') argFile = baseDirectory & argFile; #else if (argFile.sData[1] != ':') // not an absolute path argFile = baseDirectory & argFile; #endif PushFilePath (argFile); ReadBatchFile (argFile,ex); } ex.Execute(); if (usePostProcessors && (!updateMode)) { ReadInPostFiles(); printf ("\n\n**********Continue with result processing (y/n)?"); _String c_str (StringFromConsole()); if (logInputMode) loggedUserInputs && & c_str; if (c_str.sData[0]!='n' && c_str.sData[0]!='N' ) { long choice = DisplayListOfPostChoices(); while (choice != -1) { _ExecutionList postEx; argFile = *(_String*)(*(_List*)availablePostProcessors(choice-1))(1); PushFilePath (argFile); ReadBatchFile (argFile, postEx); postEx.Execute(); PopFilePath (); printf ("\n\n**********Continue with result processing (y/n)?"); _String c_str (StringFromConsole()); if (logInputMode) loggedUserInputs && & c_str; if (c_str.sData[0]=='n' || c_str.sData[0]=='N' ) break; choice = DisplayListOfPostChoices(); } } } #ifdef __HYPHYMPI__ } argFile = _String ("Node ") & (long)rank & " is shutting down\n"; ReportWarning (argFile); #endif batchLanguageFunctions.Clear(); GlobalShutdown(); #ifdef __HYPHYMPI__ if (rank == 0) printf ("\n\n"); #endif }
extern "C" magma_int_t magma_ssytrd_hb2st(magma_int_t threads, char uplo, magma_int_t n, magma_int_t nb, magma_int_t Vblksiz, float *A, magma_int_t lda, float *D, float *E, float *V, magma_int_t ldv, float *TAU, magma_int_t compT, float *T, magma_int_t ldt) { /* -- MAGMA (version 1.3.0) -- Univ. of Tennessee, Knoxville Univ. of California, Berkeley Univ. of Colorado, Denver November 2012 Purpose ======= Arguments ========= THREADS (input) INTEGER Specifies the number of pthreads used. THREADS > 0 UPLO (input) CHARACTER*1 = 'U': Upper triangles of A is stored; = 'L': Lower triangles of A is stored. N (input) INTEGER The order of the matrix A. N >= 0. NB (input) INTEGER The order of the band matrix A. N >= NB >= 0. VBLKSIZ (input) INTEGER The size of the block of householder vectors applied at once. A (input/workspace) COMPLEX*16 array, dimension (LDA, N) On entry the band matrix stored in the following way: LDA (input) INTEGER The leading dimension of the array A. LDA >= 2*NB. D (output) DOUBLE array, dimension (N) The diagonal elements of the tridiagonal matrix T: D(i) = A(i,i). E (output) DOUBLE array, dimension (N-1) The off-diagonal elements of the tridiagonal matrix T: E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. V (output) COMPLEX*16 array, dimension (BLKCNT, LDV, VBLKSIZ) On exit it contains the blocks of householder reflectors BLKCNT is the number of block and it is returned by the funtion MAGMA_BULGE_GET_BLKCNT. LDV (input) INTEGER The leading dimension of V. LDV > NB + VBLKSIZ + 1 TAU (output) COMPLEX*16 dimension(BLKCNT, VBLKSIZ) ??? COMPT (input) INTEGER if COMPT = 0 T is not computed if COMPT = 1 T is computed T (output) COMPLEX*16 dimension(LDT *) if COMPT = 1 on exit contains the matrices T needed for Q2 if COMPT = 0 T is not referenced LDT (input) INTEGER The leading dimension of T. LDT > Vblksiz INFO (output) INTEGER ???????????????????????????????????????????????????????????????????????????????????? = 0: successful exit ===================================================================== */ char uplo_[2] = {uplo, 0}; float timeblg=0.0; magma_int_t mklth = threads; magma_int_t INgrsiz=1; magma_int_t blkcnt = magma_bulge_get_blkcnt(n, nb, Vblksiz); magma_int_t nbtiles = magma_ceildiv(n, nb); memset(T, 0, blkcnt*ldt*Vblksiz*sizeof(float)); memset(TAU, 0, blkcnt*Vblksiz*sizeof(float)); memset(V, 0, blkcnt*ldv*Vblksiz*sizeof(float)); magma_int_t* prog = new magma_int_t[2*nbtiles+threads+10]; memset(prog, 0, (2*nbtiles+threads+10)*sizeof(magma_int_t)); magma_sbulge_id_data* arg = new magma_sbulge_id_data[threads]; pthread_t* thread_id = new pthread_t[threads]; pthread_attr_t thread_attr; #if defined(USEMKL) mkl_set_num_threads( 1 ); #endif #if defined(USEACML) omp_set_num_threads(1); #endif magma_sbulge_data data_bulge(threads, n, nb, nbtiles, INgrsiz, Vblksiz, compT, A, lda, V, ldv, TAU, T, ldt, prog); // Set one thread per core pthread_attr_init(&thread_attr); pthread_attr_setscope(&thread_attr, PTHREAD_SCOPE_SYSTEM); pthread_setconcurrency(threads); //timing timeblg = magma_wtime(); // Launch threads for (magma_int_t thread = 1; thread < threads; thread++) { arg[thread] = magma_sbulge_id_data(thread, &data_bulge); pthread_create(&thread_id[thread], &thread_attr, magma_ssytrd_hb2st_parallel_section, &arg[thread]); } arg[0] = magma_sbulge_id_data(0, &data_bulge); magma_ssytrd_hb2st_parallel_section(&arg[0]); // Wait for completion for (magma_int_t thread = 1; thread < threads; thread++) { void *exitcodep; pthread_join(thread_id[thread], &exitcodep); } // timing timeblg = magma_wtime()-timeblg; delete[] thread_id; delete[] arg; delete[] prog; printf("time BULGE+T = %f \n" ,timeblg); #if defined(USEMKL) mkl_set_num_threads( mklth ); #endif #if defined(USEACML) omp_set_num_threads(mklth); #endif /*================================================ * store resulting diag and lower diag D and E * note that D and E are always real *================================================*/ /* Make diagonal and superdiagonal elements real, * storing them in D and E */ /* In real case, the off diagonal element are * not necessary real. we have to make off-diagonal * elements real and copy them to E. * When using HouseHolder elimination, * the SLARFG give us a real as output so, all the * diagonal/off-diagonal element except the last one are already * real and thus we need only to take the abs of the last * one. * */ #if defined(PRECISION_z) || defined(PRECISION_c) if(uplo==MagmaLower){ for (magma_int_t i=0; i < n-1 ; i++) { D[i] = MAGMA_S_REAL(A[i*lda ]); E[i] = MAGMA_S_REAL(A[i*lda+1]); } D[n-1] = MAGMA_S_REAL(A[(n-1)*lda]); } else { /* MagmaUpper not tested yet */ for (magma_int_t i=0; i<n-1; i++) { D[i] = MAGMA_S_REAL(A[i*lda+nb]); E[i] = MAGMA_S_REAL(A[i*lda+nb-1]); } D[n-1] = MAGMA_S_REAL(A[(n-1)*lda+nb]); } /* end MagmaUpper */ #else if( uplo == MagmaLower ){ for (magma_int_t i=0; i < n-1; i++) { D[i] = A[i*lda]; // diag E[i] = A[i*lda+1]; //lower diag } D[n-1] = A[(n-1)*lda]; } else { for (magma_int_t i=0; i < n-1; i++) { D[i] = A[i*lda+nb]; // diag E[i] = A[i*lda+nb-1]; //lower diag } D[n-1] = A[(n-1)*lda+nb]; } #endif return MAGMA_SUCCESS; }
/* Function: workpool_start() * Date: SRE, Thu Jul 16 11:09:05 1998 [St. Louis] * * Purpose: Initialize a workpool_s structure, and return it. * * Args: hmm - the HMM to calibrate * fixedlen - 0, or a fixed length for seqs (bypass of Gaussian) * lenmean - mean sequence length * lensd - std. dev. for sequence length * randomseq- i.i.d. frequencies for residues, 0..Alphabet_size-1 * nsample - how many seqs to calibrate on * hist - histogram structure for storing results * num_threads - how many processors to run on * * Returns: ptr to struct workpool_s. * Caller must wait for threads to finish with workpool_stop(), * then free the structure with workpool_free(). */ static struct workpool_s * workpool_start(struct plan7_s *hmm, float lenmean, float lensd, int fixedlen, float *randomseq, int nsample, struct histogram_s *hist, int num_threads) { struct workpool_s *wpool; pthread_attr_t attr; int i; int rtn; wpool = MallocOrDie(sizeof(struct workpool_s)); wpool->thread = MallocOrDie(num_threads * sizeof(pthread_t)); wpool->hmm = hmm; wpool->fixedlen = fixedlen; wpool->lenmean = lenmean; wpool->lensd = lensd; wpool->randomseq = randomseq; wpool->nsample = nsample; wpool->nseq = 0; wpool->hist = hist; wpool->max_score = -FLT_MAX; wpool->num_threads= num_threads; StopwatchZero(&(wpool->watch)); if ((rtn = pthread_mutex_init(&(wpool->input_lock), NULL)) != 0) Die("pthread_mutex_init FAILED; %s\n", strerror(rtn)); if ((rtn = pthread_mutex_init(&(wpool->output_lock), NULL)) != 0) Die("pthread_mutex_init FAILED; %s\n", strerror(rtn)); /* Create slave threads. * Note the crazy machinations we have to go through to achieve concurrency. * You'd think that POSIX threads were portable... ha. * On IRIX 6.5, system scope threads are only available to root, or if * /etc/capability has been configured specially, so to avoid strange * permissions errors we can't set PTHREAD_SCOPE_SYSTEM for IRIX. * On IRIX pre-6.5, we can't get good concurrency, period. As of 6.5, * SGI provides the nonportable pthread_setconcurrency() call. * On FreeBSD (3.0 snapshots), the pthread_attr_setscope() call isn't * even provided, apparently on grounds of "if it doesn't do anything, * why provide it?" Hello? POSIX compliance, perhaps? * On Sun Solaris, we need to set system scope to achieve concurrency. * Linux and DEC Digital UNIX seem to work fine in either process scope * or system scope, without a pthread_setconcurrency call. */ pthread_attr_init(&attr); #ifndef __sgi #ifdef HAVE_PTHREAD_ATTR_SETSCOPE pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM); #endif #endif #ifdef HAVE_PTHREAD_SETCONCURRENCY pthread_setconcurrency(num_threads+1); #endif for (i = 0; i < num_threads; i++) if ((rtn = pthread_create(&(wpool->thread[i]), &attr, worker_thread , (void *) wpool)) != 0) Die("Failed to create thread %d; return code %d\n", i, rtn); pthread_attr_destroy(&attr); return wpool; }
/* * Use global *tty_tmp and term_parent */ int main( int argc, char **argv ) { struct cl_args *cl_args; struct term_node *tty_node = NULL; pid_t pid; pid_t parent_id; #if defined(HAVE_PTHREAD_SETCONCURRENCY) && !defined(LINUX) int concurrent; #endif handle_signals_parent(); tcgetattr(0, &term_parent); parent_id = getpid(); if ((pid = fork()) < 0) { exit(1); } else { if (pid != 0) { wait(NULL); tcsetattr(0, TCSANOW, &term_parent); exit(0); } } fatal_error = 4; /* Disable all signals while initializing data...*/ handle_signals(); setvbuf(stdout, NULL, _IONBF, 0); tty_tmp = (struct term_tty *)calloc(1,sizeof(struct term_tty)); if (tty_tmp == NULL) { printf("Out of memory on calloc tty_tmp\n"); clean_exit(); } tty_tmp->term = (struct termios *)calloc(1,sizeof(struct termios)); if (tty_tmp->term == NULL) { printf("Out of memory on calloc tty_tmp->term\n"); clean_exit(); } /* default values */ tty_tmp->interactive = 0; tty_tmp->gtk = 0; tty_tmp->attack = -1; tty_tmp->mac_spoofing = -1; tty_tmp->splash = -1; strncpy(tty_tmp->username, VTY_USER, MAX_USERNAME); strncpy(tty_tmp->password, VTY_PASS, MAX_PASSWORD); strncpy(tty_tmp->e_password, VTY_ENABLE, MAX_PASSWORD); tty_tmp->port = VTY_PORT; tty_tmp->ip_filter = NULL; #ifdef HAVE_GTK tty_tmp->buffer_log = NULL; #endif cl_args = (struct cl_args *)calloc(1,sizeof(struct cl_args)); if (cl_args == NULL) { printf("Out of memory on calloc cl_args\n"); clean_exit(); } if ( argc == 1 ) { printf("GNU %s %s %s\n", PACKAGE, VERSION, "$Date: 2006/03/23 08:40:14 $"); printf("Try '%s -h' to display the help.\n",PACKAGE); clean_exit(); } if (getuid() != 0) { printf("You must be root to run %s %s\n", PACKAGE, VERSION); clean_exit(); } if (term_init() < 0) g00dbye(); /* Register all the protocols */ protocol_init(); cl_args->proto_index = -1; if (parser_initial(tty_tmp, cl_args, argc, argv) < 0) { clean_exit(); } init_log(); #if defined(HAVE_PTHREAD_SETCONCURRENCY) && !defined(LINUX) /* concurrent = pthread_getconcurrency();*/ concurrent = 15;/*(MAX_TERMS*MAX_PROTOCOLS*MAX_THREAD_ATTACK*2)+3;*/ if (pthread_setconcurrency(concurrent) != 0) { thread_error("init pthread_setconcurrency()",errno); g00dbye(); } #endif if (interfaces_init(&terms->pcap_listen_th) < 0 ) g00dbye(); /* Establish TERM signal handler...*/ posix_signal(SIGTERM, final); #ifdef HAVE_REMOTE_ADMIN if (tty_tmp->daemonize) { if (admin_init(tty_tmp) < 0) g00dbye(); } #endif if (thread_create(&terms->uptime_th.id, &th_uptime, (void *)NULL) < 0) g00dbye(); /* Command line and ncurses cannot be choosed simultaneously...*/ if ((!tty_tmp->interactive) && (!tty_tmp->gtk) && (cl_args->proto_index != -1)) { terms->work_state = INITIAL; tty_node = term_type[TERM_TTY].list; if (thread_create(&tty_node[0].thread.id, &th_tty_peer, (void *)cl_args) < 0) g00dbye(); while(terms->work_state != STOPPED) thread_usleep(100000); } #ifdef HAS_CURSES if (tty_tmp->interactive) { terms->work_state = INITIAL; if (thread_create(&terms->gui_th.id, &ncurses_gui, NULL) < 0 ) g00dbye(); /* Wait until the ncurses GUI is over */ while(terms->work_state != STOPPED) thread_usleep(100000); } else { #endif #ifdef HAVE_GTK if (tty_tmp->gtk) { terms->work_state = INITIAL; if (thread_create(&terms->gui_gtk_th.id, >k_gui, NULL) < 0 ) g00dbye(); /* Wait until the GTK GUI is over */ while(terms->work_state != STOPPED) thread_usleep(100000); } #endif #ifdef HAS_CURSES } #endif #ifdef HAVE_REMOTE_ADMIN if (tty_tmp->daemonize) { /* Ok, now that console (ncurses) is finished * we can become a true daemon... */ become_daemon(parent_id); /* Wait until some important thread exits due to fatal_error...*/ while (fatal_error == 4) thread_usleep(100000); } #endif g00dbye(); exit(1); }
extern "C" magma_int_t magma_dbulge_back( magma_uplo_t uplo, magma_int_t n, magma_int_t nb, magma_int_t ne, magma_int_t Vblksiz, double *Z, magma_int_t ldz, magmaDouble_ptr dZ, magma_int_t lddz, double *V, magma_int_t ldv, double *TAU, double *T, magma_int_t ldt, magma_int_t* info) { magma_int_t threads = magma_get_parallel_numthreads(); magma_int_t mklth = magma_get_lapack_numthreads(); magma_set_lapack_numthreads(1); real_Double_t timeaplQ2=0.0; double f= 1.; magma_int_t n_gpu = ne; //#if defined(PRECISION_s) || defined(PRECISION_d) //double gpu_cpu_perf = 50; // gpu over cpu performance //100% ev // SandyB. - Kepler (K20c) //double gpu_cpu_perf = 16; // gpu over cpu performance //100% ev // SandyB. - Fermi (M2090) //#else // double gpu_cpu_perf = 27.5; // gpu over cpu performance //100% ev // Westmere - Fermi (M2090) //double gpu_cpu_perf = 37; // gpu over cpu performance //100% ev // SandyB. - Kepler (K20c) // double gpu_cpu_perf = 130; // gpu over cpu performance //100% ev // Bulldozer - Kepler (K20X) //#endif magma_int_t gpu_cpu_perf = magma_get_dbulge_gcperf(); if (threads > 1) { f = 1. / (1. + (double)(threads-1)/ ((double)gpu_cpu_perf) ); n_gpu = (magma_int_t)(f*ne); } /**************************************************** * apply V2 from left to the eigenvectors Z. dZ = (I-V2*T2*V2')*Z * **************************************************/ //$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ //$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ //$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ //n_gpu=ne; //$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ //$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ //$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ //$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ timeaplQ2 = magma_wtime(); /*============================ * use GPU+CPU's *==========================*/ if (n_gpu < ne) { // define the size of Q to be done on CPU's and the size on GPU's // note that GPU use Q(1:N_GPU) and CPU use Q(N_GPU+1:N) #ifdef ENABLE_DEBUG printf("---> calling GPU + CPU(if N_CPU > 0) to apply V2 to Z with NE %d N_GPU %d N_CPU %d\n",ne, n_gpu, ne-n_gpu); #endif magma_dapplyQ_data data_applyQ; magma_dapplyQ_data_init(&data_applyQ, threads, n, ne, n_gpu, nb, Vblksiz, Z, ldz, V, ldv, TAU, T, ldt, dZ, lddz); magma_dapplyQ_id_data* arg; magma_malloc_cpu((void**) &arg, threads*sizeof(magma_dapplyQ_id_data)); pthread_t* thread_id; magma_malloc_cpu((void**) &thread_id, threads*sizeof(pthread_t)); pthread_attr_t thread_attr; // =============================== // relaunch thread to apply Q // =============================== // Set one thread per core pthread_attr_init(&thread_attr); pthread_attr_setscope(&thread_attr, PTHREAD_SCOPE_SYSTEM); pthread_setconcurrency(threads); // Launch threads for (magma_int_t thread = 1; thread < threads; thread++) { magma_dapplyQ_id_data_init(&(arg[thread]), thread, &data_applyQ); pthread_create(&thread_id[thread], &thread_attr, magma_dapplyQ_parallel_section, &arg[thread]); } magma_dapplyQ_id_data_init(&(arg[0]), 0, &data_applyQ); magma_dapplyQ_parallel_section(&arg[0]); // Wait for completion for (magma_int_t thread = 1; thread < threads; thread++) { void *exitcodep; pthread_join(thread_id[thread], &exitcodep); } magma_free_cpu(thread_id); magma_free_cpu(arg); magma_dapplyQ_data_destroy(&data_applyQ); magma_dsetmatrix(n, ne-n_gpu, Z + n_gpu*ldz, ldz, dZ + n_gpu*ldz, lddz); /*============================ * use only GPU *==========================*/ } else { magma_dsetmatrix(n, ne, Z, ldz, dZ, lddz); magma_dbulge_applyQ_v2(MagmaLeft, ne, n, nb, Vblksiz, dZ, lddz, V, ldv, T, ldt, info); magma_device_sync(); } timeaplQ2 = magma_wtime()-timeaplQ2; magma_set_lapack_numthreads(mklth); return MAGMA_SUCCESS; }
WRAP_END } #ifndef MUSL static int wrap_pthread_setschedprio(pthread_t thread, int prio) { WRAP_START while (!err) err = pthread_setschedprio(thread, prio); WRAP_END } #endif static int wrap_pthread_setconcurrency(int new_level) { WRAP_START while (!err) err = pthread_setconcurrency(new_level); WRAP_END } static int wrap_pthread_detach(pthread_t thread) { WRAP_START while (!err) err = pthread_detach(thread); WRAP_END } static int wrap_pthread_key_create(pthread_key_t *key, void (*destructor)(void*)) { /* this is clumsy: having to key_create lessens key_delete's chances of being interrupted - test not definite */ int err2;
int main (int argc, char *argv[]) { #if defined(_MT) || defined(_REENTRANT) int min_threads, max_threads ; int num_rounds ; int chperthread ; #endif unsigned seed=12345 ; int num_chunks=10000; long sleep_cnt; int matches; if (argc > 7) { sleep_cnt = atoi(argv[1]); min_size = atoi(argv[2]); max_size = atoi(argv[3]); chperthread = atoi(argv[4]); num_rounds = atoi(argv[5]); seed = atoi(argv[6]); max_threads = atoi(argv[7]); min_threads = max_threads; goto DoneWithInput; } #if defined(_MT) || defined(_REENTRANT) //#ifdef _MT printf( "\nMulti-threaded test driver \n") ; #else printf( "\nSingle-threaded test driver \n") ; #endif printf("C version (malloc and free)\n") ; printf("runtime (sec): ") ; /* 15 seconds */ if ((matches = scanf ("%ld", &sleep_cnt)) != 1) { printf("error scanning stdin for sleep_cnt - exiting\n"); return(1); } printf("chunk size (min,max): ") ; /* 8 40 */ if ((matches = scanf("%d %d", &min_size, &max_size )) != 2) { printf("error scanning stdin for chunk size (min,max) - exiting\n"); return(1); } #if defined(_MT) || defined(_REENTRANT) //#ifdef _MT printf("threads (min, max): ") ; /* same, 1 1, 2 2, etc. */ if ((matches = scanf("%d %d", &min_threads, &max_threads)) != 2) { printf("error scanning stdin for threads (min,max) - exiting\n"); return(1); } printf("chunks/thread: ") ; if ((matches = scanf("%d", &chperthread )) != 1) { /* 10K */ printf("error scanning stdin for chunks/thread - exiting\n"); return(1); } printf("no of rounds: ") ; if ((matches = scanf("%d", &num_rounds )) != 1) { /* 10 */ printf("error scanning stdin for no of rounds - exiting\n"); return(1); } num_chunks = max_threads*chperthread ; #else printf("no of chunks: ") ; if ((matches = scanf("%d", &num_chunks )) != 1) { printf("error scanning stdin for no of chunks - exiting\n"); return(1); } #endif printf("random seed: ") ; if ((matches = scanf("%d", &seed)) != 1) { printf("error scanning stdin for random seed - exiting\n"); return(1); } printf("\n"); DoneWithInput: if( num_chunks > MAX_BLOCKS ){ printf("Max %d chunks - exiting\n", MAX_BLOCKS ) ; return(1) ; } pthread_setconcurrency (max_threads); lran2_init(&rgen, seed) ; // Call allocator-specific initialization function mm_init(); numCPU=getNumProcessors(); #if defined(_MT) || defined(_REENTRANT) //#ifdef _MT runthreads(sleep_cnt, min_threads, max_threads, chperthread, num_rounds) ; #else runloops(sleep_cnt, num_chunks ) ; #endif #ifdef _DEBUG _cputs("Hit any key to exit...") ; (void)_getch() ; #endif return(0) ; } /* main */
void test8() { printf("########################################\n" "# Test 8: Multi-processor yielding\n" "# -> Show that the schedulers on multiple processors are \n" "# independant by measuring yield times accross a bunch\n" "# of processors\n"); uval numprocessors; uval numchildren; uval child, i; double sum, min, max; SysStatus rc; // Print out warning about nanosleep() not being finished once // before starting our timings: sleep(1); numprocessors = DREFGOBJ(TheProcessRef)->ppCount(); pthread_setconcurrency(numprocessors); printf("plot forkjoin_multiproc %ld green\n", numprocessors); Done = 0; TotalChildren = 0; FinishedChildren = numprocessors; for (numchildren = 1; numchildren <= numprocessors; numchildren++) { child = numchildren - 1; // Start up a thread on a remote processor: rc = MPMsgMgr::SendAsyncUval(Scheduler::GetEnabledMsgMgr(), SysTypes::DSPID(0, VPNum(child)), test8_child, child); if (rc != 0) { printf("Error, SendAsyncUval() returned %lx\n", rc); return; } // Wait for the child thread to start: while (TotalChildren < numchildren) { sched_yield(); } // Init our aray: for (i = 0; i < numchildren; i++) { AvgTime[i] = 0; } // This is a barrier release: when we set this to 0 all the // children start working: FinishedChildren = 0; // Wait for the children to finish: while (FinishedChildren < TotalChildren) { sleep(1); } sum = 0; min = MinTime[0]; max = MaxTime[0]; for (i = 0; i < numchildren; i++) { sum += AvgTime[i]; if (min > MinTime[i]) { min = MinTime[i]; } if (max > MaxTime[i]) { max = MaxTime[i]; } printf("Child %ld took min %f us, max %f us, avg %f us, per fork/join.\n", i, MinTime[i], MaxTime[i], AvgTime[i]); } printf("%ld %f %f %f\n", numchildren, sum / double(numchildren), min, max); } Done = 1; printf("xlabel Number of threads\n" "ylabel Fork-join time (us)\n" "title Fork-join time vs. number of threads\n" "# All times averaged over %ld iterations\n", COUNT_NUM); }
int main (int argc, char* argv[]) { mainArgCount = argc - 1; #ifdef __HYPHYMPI__ int rank, size; MPI_Init (&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); setParameter (mpiNodeID, (_Parameter)rank); setParameter (mpiNodeCount, (_Parameter)size); _hy_mpi_node_rank = rank; if (rank == 0) { mpiNodesThatCantSwitch.Populate (size,1,0); /* { char hostname[256]; gethostname(hostname, sizeof(hostname)); printf("PID %d on %s ready for attach\n", getpid(), hostname); fflush(stdout); //getchar (); } */ #endif //for (long k=0; k<NSIG; k++) //{ // signal(k, &hyphyBreak); //} #ifdef __HYPHYMPI__ } #endif char curWd[4096], dirSlash = GetPlatformDirectoryChar (); getcwd (curWd,4096); _String baseDir (curWd); if (baseDir.getChar (baseDir.sLength-1) != dirSlash) { baseDir=baseDir & dirSlash; } #if defined _HYPHY_LIBDIRECTORY_ _String libDir (_HYPHY_LIBDIRECTORY_); if (libDir.getChar (libDir.sLength-1) != dirSlash) { libDir=libDir & dirSlash; } pathNames&& &libDir; #else pathNames&& &baseDir; _String libDir = baseDir; #endif _String argFile; libDirectory = libDir; libArgDir = libDirectory; baseDirectory = baseDir; baseArgDir = baseDirectory; _ExecutionList ex; #ifdef _OPENMP systemCPUCount = omp_get_max_threads(); #endif #ifdef _MINGW32_MEGA_ { char pid[16]; snprintf (pid,16,"%u", GetCurrentProcessId()); _String pipeName = _String("\\\\.\\pipe\\MEGAPipe") & pid; printf ("Pipe name = %s\n", pipeName.sData); if ((_HY_MEGA_Pipe = CreateFile(pipeName.sData, GENERIC_WRITE, 0, NULL, OPEN_EXISTING, 0, NULL)) == INVALID_HANDLE_VALUE) { char* lpMsgBuf; FormatMessage( FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, GetLastError(), MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPTSTR) &lpMsgBuf, 0, NULL ); FlagError (_String("Failed to create a pipe named '") & pipeName & "' to send data from HyPhy to MEGA. Error: "&lpMsgBuf); } } #endif for (long i=1; i<argc; i++) { _String thisArg (argv[i]); if (thisArg.sData[0]=='-') { ProcessConfigStr (thisArg); } else if (thisArg.beginswith ("BASEPATH=")) { baseArgDir = thisArg.Cut(9,-1); if (baseArgDir.sLength) { if (baseArgDir.sData[baseArgDir.sLength-1]!=dirSlash) { baseArgDir = baseArgDir&dirSlash; } baseDirectory = baseArgDir; } } else if (thisArg.beginswith ("LIBPATH=")) { libArgDir = thisArg.Cut(8,-1); if (libArgDir.sLength) { if (libArgDir.sData[libArgDir.sLength-1] != dirSlash) { libArgDir = libArgDir & dirSlash; } libDirectory = libArgDir; } } else if (thisArg.beginswith ("USEPATH=")) { baseDir = thisArg.Cut(8,-1); errorFileName = baseDir & errorFileName; messageFileName = baseDir & messageFileName; pathNames.Delete (0); pathNames&& &baseDir; } else #ifdef __MP__ if (thisArg.beginswith ("CPU=")) { _String cpus = thisArg.Cut(4,-1); systemCPUCount = cpus.toNum(); if (systemCPUCount<1) { systemCPUCount = 1; } pthread_setconcurrency (systemCPUCount+1); } else #endif argFile = thisArg; } GlobalStartup(); if (calculatorMode) { printf ("\nHYPHY is running in calculator mode. Type 'exit' when you are finished.\n"); while (ExpressionCalculator()) ; return 0; } if (pipeMode) { _String bfIn (stdin); _ExecutionList exIn (bfIn); exIn.Execute(); GlobalShutdown(); return 0; } // try to read the preferences _String prefFile (curWd); prefFile = prefFile & '/' & prefFileName; FILE * testPrefFile = fopen (prefFile.sData,"r"); if (!testPrefFile) { prefFile = baseArgDir & prefFileName; testPrefFile = fopen (prefFile.sData,"r"); } if (testPrefFile) { fclose(testPrefFile); ReadBatchFile (prefFile,ex); ex.Execute(); ex.Clear(); } //printf ("Node %d before mpiParallelOptimizer\n", rank); #ifdef __HYPHYMPI__ if (rank>0) { //if (mpiParallelOptimizer || mpiPartitionOptimizer) // mpiOptimizerLoop (rank, size); //else _String defaultBaseDirectory = *(_String*)pathNames(0); mpiNormalLoop (rank, size, defaultBaseDirectory); /*argFile = "SHUTDOWN_CONFIRM"; MPISendString (argFile, senderID);*/ } else { #endif if (!argFile.sLength) { long selection = -2; if (!updateMode) { selection = DisplayListOfChoices(); } if (selection == -1) { dialogPrompt = "Batch file to run:"; _String fStr (ReturnDialogInput (true)); if (logInputMode) { _String tts = loggedFileEntry&fStr; loggedUserInputs && & tts; } PushFilePath (fStr); ReadBatchFile (fStr,ex); } else { _String templ; if (selection >= 0) { templ = baseArgDir &"TemplateBatchFiles" & dirSlash; } else { templ = baseArgDir & "TemplateBatchFiles" & dirSlash & "WebUpdate.bf"; } if (selection >= 0) { templ= templ&*(_String*)(*(_List*)availableTemplateFiles(selection))(2); } PushFilePath (templ); ReadBatchFile (templ,ex); } } else { #ifndef __MINGW32__ if (argFile.sData[0] != '/') { argFile = baseDirectory & argFile; } #else if (argFile.sData[1] != ':') { // not an absolute path argFile = baseDirectory & argFile; } #endif PushFilePath (argFile); ReadBatchFile (argFile,ex); } ex.Execute(); if (usePostProcessors && (!updateMode)) { ReadInPostFiles(); printf ("\n\n**********Continue with result processing (y/n)?"); _String c_str (StringFromConsole()); if (logInputMode) { loggedUserInputs && & c_str; } if (c_str.getChar(0) !='n' && c_str.getChar(0)!='N' ) { long choice = DisplayListOfPostChoices(); while (choice != -1) { _ExecutionList postEx; argFile = *(_String*)(*(_List*)availablePostProcessors(choice-1))(1); PushFilePath (argFile); ReadBatchFile (argFile, postEx); postEx.Execute(); PopFilePath (); printf ("\n\n**********Continue with result processing (y/n)?"); c_str = StringFromConsole(); if (logInputMode) { loggedUserInputs && & c_str; } if (c_str.getChar(0)=='n' || c_str.getChar(0)=='N' ) { break; } choice = DisplayListOfPostChoices(); } } } #ifdef __HYPHYMPI__ } ReportWarning (_String ("Node ") & (long)rank & " is shutting down\n"); #endif #ifdef _MINGW32_MEGA_ if (_HY_MEGA_Pipe != INVALID_HANDLE_VALUE) { CloseHandle (_HY_MEGA_Pipe); } #endif PurgeAll (true); GlobalShutdown (); #ifdef __HYPHYMPI__ if (rank == 0) { printf ("\n\n"); } #endif }
int main( int argc, char *argv[] ) { #ifdef __HYPHYMPI__ int rank, size; MPI_Init (&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); _hy_mpi_node_rank = rank; setParameter (mpiNodeID, (_Parameter)rank); setParameter (mpiNodeCount, (_Parameter)size); if (rank == 0) #endif gtk_init (&argc, &argv); /* set up globals */ char curWd[4096]; getcwd (curWd,4096); _String baseDir (curWd); baseDir=baseDir&'/'; pathNames&& &baseDir; baseDirectory = baseDir; for (long i=1; i<argc;i++) { _String thisArg (argv[i]); if (thisArg.beginswith ("BASEPATH=")) { baseDirectory = thisArg.Cut(9,-1); if (baseDirectory.sLength) { if (baseDirectory.sData[baseDirectory.sLength-1]!='/') baseDirectory = baseDirectory&"/"; } } else if (thisArg.beginswith ("USEPATH=")) { _String baseArgDir (thisArg,8,-1); errorFileName = baseArgDir & errorFileName; messageFileName = baseArgDir & messageFileName; pathNames.Delete (0); pathNames&& &baseDir; } else if (thisArg.beginswith ("CPU=")) { #ifdef __MP__ _String cpus = thisArg.Cut(4,-1); systemCPUCount = cpus.toNum(); if (systemCPUCount<1) systemCPUCount = 1; #ifdef __MP2__ pthread_setconcurrency (systemCPUCount+1); #endif #endif } #ifdef __HYPHYMPI__ else if (thisArg == _String("MPIOPTIMIZER")) { mpiParallelOptimizer = true; setParameter (mpiNodeCount, 0.0); } else if (thisArg == _String("MPIPARTITIONS")) { mpiPartitionOptimizer = true; setParameter (mpiNodeCount, 0.0); } #endif } #ifdef __HYPHYMPI__ if (rank == 0) #endif { baseDir = baseDirectory & "GTKResources"; _List scanRes; ScanDirectoryForFileNames(baseDir,scanRes,false); if (scanRes.lLength == 0) { GtkWidget * noRez = gtk_message_dialog_new (NULL, GTK_DIALOG_MODAL, GTK_MESSAGE_ERROR, GTK_BUTTONS_OK, "HYPHY_GTK was unable to find a required GTKResources directory in %s. Please use BASEPATH= command line option to specify where the installation directory of HyPhy can be found.", baseDirectory.sData); gtk_dialog_run (GTK_DIALOG (noRez)); gtk_widget_destroy (noRez); return 1; } _String rcPath = baseDir & "/theme/theme.rc"; //printf ("Loading res files from %s\n", rcPath.sData); gtk_rc_parse (rcPath.sData); } GlobalStartup(); #ifdef __HYPHYMPI__ if (rank == 0) { #endif GdkDisplay * defDisplay = gdk_screen_get_display (gdk_screen_get_default()); hSizeCursor = gdk_cursor_new_for_display (defDisplay,GDK_SB_H_DOUBLE_ARROW); pickUpCursor = gdk_cursor_new_for_display (defDisplay,GDK_TARGET); dropOffCursor = gdk_cursor_new_for_display (defDisplay,GDK_TCROSS); screenPContext = gdk_pango_context_get_for_screen (gdk_screen_get_default()); tablePDMenuIcon = (GdkPixbuf*)ProcureIconResource(4020); /*{ GdkScreen * defD = gdk_screen_get_default(); fontConversionFactor = 72.27 / (gdk_screen_get_height (defD) *25.4 / gdk_screen_get_height_mm(defD)); printf ("Pango conversion factor computed at: %g\n", fontConversionFactor); }*/ ReadInTemplateFiles (); hyphyConsoleWindow = new _HYConsoleWindow ("HYPHY Console"); ReadPreferences (); SetStatusLine ("None","Idle","00:00:00"); while (gtk_events_pending()) gtk_main_iteration(); SetPreferences (); ReadGeneticCodes (); ReadModelTemplates (); ReadTreeProcessors (); MoveConsoleWindow (consolePositionRectangle); StringToConsole (hyphyCiteString); hyphyConsoleWindow->BringToFront(); #ifdef __HYPHYMPI__ { char statBuffer[1024]; sprintf (statBuffer,"MPI version of HyPhy running on %d nodes (a master and %d compute nodes) in %s mode\n", size, size-1, mpiPartitionOptimizer?"partition":(mpiParallelOptimizer?"rate heterogeneity":"normal")); BufferToConsole (statBuffer); } #endif g_timeout_add (100,GlobalQueueTimer,nil); g_timeout_add (1000,progressTimerFunction,nil); gtk_main (); WritePreferences(); #ifdef __HYPHYMPI__ } else // slave node { if (mpiParallelOptimizer || mpiPartitionOptimizer) mpiOptimizerLoop (rank, size); else mpiNormalLoop (rank, size, baseDir); } #endif GlobalShutdown(); return 0; }