Пример #1
0
extern "C" magma_int_t 
magma_dbulge_back_m(magma_int_t nrgpu, magma_int_t threads, char uplo, 
                        magma_int_t n, magma_int_t nb, 
                        magma_int_t ne, magma_int_t Vblksiz,
                        double *Z, magma_int_t ldz,
                        double *V, magma_int_t ldv, 
                        double *TAU, 
                        double *T, magma_int_t ldt, 
                        magma_int_t* info)
{
    magma_setlapack_numthreads(1);

    double timeaplQ2=0.0;

    double f= 1.;
    magma_int_t n_gpu = ne;

//#if defined(PRECISION_s) || defined(PRECISION_d)
//    double gpu_cpu_perf = 32; //gpu over cpu performance
//#else
//    double gpu_cpu_perf = 32;  // gpu over cpu performance
//#endif

    double perf_temp= .85;
    double perf_temp2= perf_temp;
    for (magma_int_t itmp=1; itmp<nrgpu; ++itmp)
        perf_temp2*=perf_temp;
    magma_int_t gpu_cpu_perf = magma_get_dbulge_gcperf();
    if(threads>1){
        f = 1. / (1. + (double)(threads-1)/ ((double)gpu_cpu_perf*(1.-perf_temp2)/(1.-perf_temp)));
        n_gpu = (magma_int_t)(f*ne);
    }






    /****************************************************
     *  apply V2 from left to the eigenvectors Z. dZ = (I-V2*T2*V2')*Z
     * **************************************************/

    timeaplQ2 = magma_wtime();

    /*============================
     *  use GPU+CPU's
     *==========================*/
//n_gpu = ne;
    if(n_gpu < ne)
    {

        // define the size of Q to be done on CPU's and the size on GPU's
        // note that GPU use Q(1:N_GPU) and CPU use Q(N_GPU+1:N)
        #ifdef ENABLE_DEBUG
        printf("---> calling GPU + CPU(if N_CPU>0) to apply V2 to Z with NE %d     N_GPU %d   N_CPU %d\n",ne, n_gpu, ne-n_gpu);
        #endif
        magma_dapplyQ_m_data data_applyQ(nrgpu, threads, n, ne, n_gpu, nb, Vblksiz, Z, ldz, V, ldv, TAU, T, ldt);

        magma_dapplyQ_m_id_data* arg;
        magma_malloc_cpu((void**) &arg, threads*sizeof(magma_dapplyQ_m_id_data));

        pthread_t* thread_id;
        magma_malloc_cpu((void**) &thread_id, threads*sizeof(pthread_t));

        pthread_attr_t thread_attr;

        // ===============================
        // relaunch thread to apply Q
        // ===============================
        // Set one thread per core
        pthread_attr_init(&thread_attr);
        pthread_attr_setscope(&thread_attr, PTHREAD_SCOPE_SYSTEM);
        pthread_setconcurrency(threads);

        // Launch threads
        for (magma_int_t thread = 1; thread < threads; thread++)
        {
            arg[thread] = magma_dapplyQ_m_id_data(thread, &data_applyQ);
            pthread_create(&thread_id[thread], &thread_attr, magma_dapplyQ_m_parallel_section, &arg[thread]);
        }
        arg[0] = magma_dapplyQ_m_id_data(0, &data_applyQ);
        magma_dapplyQ_m_parallel_section(&arg[0]);

        // Wait for completion
        for (magma_int_t thread = 1; thread < threads; thread++)
        {
            void *exitcodep;
            pthread_join(thread_id[thread], &exitcodep);
        }

        magma_free_cpu(thread_id);
        magma_free_cpu(arg);

        /*============================
         *  use only GPU
         *==========================*/
    }else{
        magma_dbulge_applyQ_v2_m(nrgpu, 'L', ne, n, nb, Vblksiz, Z, ldz, V, ldv, T, ldt, info);
        magma_device_sync();
    }

    timeaplQ2 = magma_wtime()-timeaplQ2;

    magma_setlapack_numthreads(threads);
    return MAGMA_SUCCESS;
}
Пример #2
0
/**
    Purpose
    -------


    Arguments
    ---------
    @param[in]
    uplo    magma_uplo_t
      -     = MagmaUpper:  Upper triangles of A is stored;
      -     = MagmaLower:  Lower triangles of A is stored.

    @param[in]
    n       INTEGER
            The order of the matrix A.  N >= 0.

    @param[in]
    nb      INTEGER
            The order of the band matrix A.  N >= NB >= 0.

    @param[in]
    Vblksiz INTEGER
            The size of the block of householder vectors applied at once.

    @param[in]
    A       (workspace) COMPLEX array, dimension (LDA, N)
            On entry the band matrix stored in the following way:

    @param[in]
    lda     INTEGER
            The leading dimension of the array A.  LDA >= 2*NB.

    @param[out]
    d       DOUBLE array, dimension (N)
            The diagonal elements of the tridiagonal matrix T:
            D(i) = A(i,i).

    @param[out]
    e       DOUBLE array, dimension (N-1)
            The off-diagonal elements of the tridiagonal matrix T:
            E(i) = A(i,i+1) if UPLO = MagmaUpper, E(i) = A(i+1,i) if UPLO = MagmaLower.

    @param[out]
    V       COMPLEX array, dimension (BLKCNT, LDV, VBLKSIZ)
            On exit it contains the blocks of householder reflectors
            BLKCNT is the number of block and it is returned by the funtion MAGMA_BULGE_GET_BLKCNT.

    @param[in]
    ldv     INTEGER
            The leading dimension of V.
            LDV > NB + VBLKSIZ + 1

    @param[out]
    TAU     COMPLEX dimension(BLKCNT, VBLKSIZ)
            ???

    @param[in]
    compT   INTEGER
            if COMPT = 0 T is not computed
            if COMPT = 1 T is computed

    @param[out]
    T       COMPLEX dimension(LDT *)
            if COMPT = 1 on exit contains the matrices T needed for Q2
            if COMPT = 0 T is not referenced

    @param[in]
    ldt     INTEGER
            The leading dimension of T.
            LDT > Vblksiz

    @ingroup magma_cheev_2stage
    ********************************************************************/
extern "C" magma_int_t
magma_chetrd_hb2st(
    magma_uplo_t uplo, magma_int_t n, magma_int_t nb, magma_int_t Vblksiz,
    magmaFloatComplex *A, magma_int_t lda, float *d, float *e,
    magmaFloatComplex *V, magma_int_t ldv, magmaFloatComplex *TAU,
    magma_int_t compT, magmaFloatComplex *T, magma_int_t ldt)
{
    #ifdef ENABLE_TIMER
    real_Double_t timeblg=0.0;
    #endif

    magma_int_t threads = magma_get_parallel_numthreads();
    magma_int_t mklth   = magma_get_lapack_numthreads();
    magma_set_lapack_numthreads(1);

    //const char* uplo_ = lapack_uplo_const( uplo );
    magma_int_t INgrsiz=1;
    magma_int_t blkcnt = magma_bulge_get_blkcnt(n, nb, Vblksiz);
    magma_int_t nbtiles = magma_ceildiv(n, nb);

    memset(T,   0, blkcnt*ldt*Vblksiz*sizeof(magmaFloatComplex));
    memset(TAU, 0, blkcnt*Vblksiz*sizeof(magmaFloatComplex));
    memset(V,   0, blkcnt*ldv*Vblksiz*sizeof(magmaFloatComplex));

    volatile magma_int_t* prog;
    magma_malloc_cpu((void**) &prog, (2*nbtiles+threads+10)*sizeof(magma_int_t));
    memset((void *) prog, 0, (2*nbtiles+threads+10)*sizeof(magma_int_t));

    magma_cbulge_id_data* arg;
    magma_malloc_cpu((void**) &arg, threads*sizeof(magma_cbulge_id_data));

    pthread_t* thread_id;
    magma_malloc_cpu((void**) &thread_id, threads*sizeof(pthread_t));
    pthread_attr_t thread_attr;

    magma_cbulge_data data_bulge;
    magma_cbulge_data_init(&data_bulge, threads, n, nb, nbtiles, INgrsiz, Vblksiz, compT,
                                 A, lda, V, ldv, TAU, T, ldt, prog);

    // Set one thread per core
    pthread_attr_init(&thread_attr);
    pthread_attr_setscope(&thread_attr, PTHREAD_SCOPE_SYSTEM);
    pthread_setconcurrency(threads);

    //timing
    #ifdef ENABLE_TIMER
    timeblg = magma_wtime();
    #endif

    // Launch threads
    for (magma_int_t thread = 1; thread < threads; thread++) {
        magma_cbulge_id_data_init(&(arg[thread]), thread, &data_bulge);
        pthread_create(&thread_id[thread], &thread_attr, magma_chetrd_hb2st_parallel_section, &arg[thread]);
    }
    magma_cbulge_id_data_init(&(arg[0]), 0, &data_bulge);
    magma_chetrd_hb2st_parallel_section(&arg[0]);

    // Wait for completion
    for (magma_int_t thread = 1; thread < threads; thread++) {
        void *exitcodep;
        pthread_join(thread_id[thread], &exitcodep);
    }

    // timing
    #ifdef ENABLE_TIMER
    timeblg = magma_wtime()-timeblg;
    printf("  time BULGE+T = %f\n", timeblg);
    #endif

    magma_free_cpu(thread_id);
    magma_free_cpu(arg);
    magma_free_cpu((void *) prog);
    magma_cbulge_data_destroy(&data_bulge);

    magma_set_lapack_numthreads(mklth);
    /*================================================
     *  store resulting diag and lower diag d and e
     *  note that d and e are always real
     *================================================*/

    /* Make diagonal and superdiagonal elements real,
     * storing them in d and e
     */
    /* In complex case, the off diagonal element are
     * not necessary real. we have to make off-diagonal
     * elements real and copy them to e.
     * When using HouseHolder elimination,
     * the CLARFG give us a real as output so, all the
     * diagonal/off-diagonal element except the last one are already
     * real and thus we need only to take the abs of the last
     * one.
     *  */

#if defined(PRECISION_z) || defined(PRECISION_c)
    if (uplo == MagmaLower) {
        for (magma_int_t i=0; i < n-1; i++) {
            d[i] = MAGMA_C_REAL( A[i*lda  ] );
            e[i] = MAGMA_C_REAL( A[i*lda+1] );
        }
        d[n-1] = MAGMA_C_REAL(A[(n-1)*lda]);
    } else { /* MagmaUpper not tested yet */
        for (magma_int_t i=0; i < n-1; i++) {
            d[i] = MAGMA_C_REAL( A[i*lda+nb]   );
            e[i] = MAGMA_C_REAL( A[i*lda+nb-1] );
        }
        d[n-1] = MAGMA_C_REAL(A[(n-1)*lda+nb]);
    } /* end MagmaUpper */
#else
    if ( uplo == MagmaLower ) {
        for (magma_int_t i=0; i < n-1; i++) {
            d[i] = A[i*lda];   // diag
            e[i] = A[i*lda+1]; // lower diag
        }
        d[n-1] = A[(n-1)*lda];
    } else {
        for (magma_int_t i=0; i < n-1; i++) {
            d[i] = A[i*lda+nb];   // diag
            e[i] = A[i*lda+nb-1]; // lower diag
        }
        d[n-1] = A[(n-1)*lda+nb];
    }
#endif
    return MAGMA_SUCCESS;
}
Пример #3
0
//__________________________________________________________________________________
int main (int argc, char* argv[])
{
	mainArgCount = argc - 1;
	
	
	#ifdef	__HYPHYMPI__
		  int 		   rank, 
		  			   size;
		  			   			   			 
		  MPI_Init	   (&argc, &argv);
		  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
		  MPI_Comm_size(MPI_COMM_WORLD, &size);
		  
		  setParameter  (mpiNodeID, (_Parameter)rank);
		  setParameter	(mpiNodeCount, (_Parameter)size);
		  _hy_mpi_node_rank = rank;
		  
		  if (rank == 0)
		  {
		  	
	#endif
	
	
	//for (long k=0; k<NSIG; k++)
	//{
	//	signal(k, &hyphyBreak);
	//}
	
	#ifdef	__HYPHYMPI__
		  }
	#endif
	
	char 	curWd[4096],
		    dirSlash = GetPlatformDirectoryChar ();
	getcwd (curWd,4096);

	_String baseDir (curWd), 
			argFile;
		

	baseDir=baseDir & dirSlash;
	pathNames&& &baseDir;
	
	baseDirectory = baseDir;
	baseArgDir	  = baseDirectory;
	
	_ExecutionList ex;
			
#ifdef _OPENMP
	systemCPUCount = omp_get_max_threads();
#endif

	for (long i=1; i<argc;i++)
	{
		_String thisArg (argv[i]);
		if (thisArg.sData[0]=='-')
		{
			ProcessConfigStr (thisArg);
		}
		else
			if (thisArg.beginswith ("BASEPATH="))
			{
				baseArgDir = thisArg.Cut(9,-1);
				if (baseArgDir.sLength)
				{
					if (baseArgDir.sData[baseArgDir.sLength-1]!=dirSlash)
						baseArgDir = baseArgDir&dirSlash;
						
					baseDirectory = baseArgDir;
				}
			}
			else
				if (thisArg.beginswith ("USEPATH="))
				{
					baseDir 			= thisArg.Cut(8,-1);
					errorFileName 		= baseDir & errorFileName;
					messageFileName 	= baseDir & messageFileName;
					pathNames.Delete 	(0);
					pathNames&& 		&baseDir;
				}
				else
					#ifdef __MP__
					if (thisArg.beginswith ("CPU="))
					{
						_String cpus = thisArg.Cut(4,-1);
						systemCPUCount = cpus.toNum();
						if (systemCPUCount<1)
							systemCPUCount = 1;
						#ifdef __MP2__
							pthread_setconcurrency (systemCPUCount+1);
						#endif
					}
					else
					#endif
					#ifdef __HYPHYMPI__
					if (thisArg == _String("MPIOPTIMIZER"))
					{
						mpiParallelOptimizer = true;
			  		    setParameter	(mpiNodeCount, 0.0);
			  		}
					else
						if (thisArg == _String("MPIPARTITIONS"))
						{
							mpiPartitionOptimizer = true;
				  		    setParameter	(mpiNodeCount, 0.0);
				  		}
				  		else
					#endif				
				argFile = thisArg;
	}
	
  	GlobalStartup();

	if (calculatorMode)
	{
		printf ("\nHYPHY is running in calculator mode. Type 'exit' when you are finished.\n");
		while (ExpressionCalculator()) ;
		return 0;
	}
	
	if (pipeMode)
	{
		_String bfIn (stdin);
		_ExecutionList exIn (bfIn);
		exIn.Execute();
		GlobalShutdown();
		return 0;
	}
	
	// try to read the preferences
	_String		prefFile (curWd);
	prefFile = prefFile & '/' & prefFileName;
	FILE	 * testPrefFile = fopen (prefFile.sData,"r");
	if (!testPrefFile)
	{
		prefFile = baseArgDir & prefFileName;
		testPrefFile = fopen (prefFile.sData,"r");
	}
	if (testPrefFile)
	{		
		fclose(testPrefFile);
		ReadBatchFile (prefFile,ex);
		ex.Execute();
		ex.Clear();
	}
	//printf ("Node %d before mpiParallelOptimizer\n", rank);
	#ifdef __HYPHYMPI__
		if (rank>0)
		{
			if (mpiParallelOptimizer || mpiPartitionOptimizer)
				mpiOptimizerLoop (rank, size);
			else
				mpiNormalLoop (rank, size, baseDir);
			/*argFile = "SHUTDOWN_CONFIRM";
			MPISendString (argFile, senderID);*/
		}
		else
		{
	#endif
	if (!argFile.sLength)
	{	
		long selection = -2;
		if (!updateMode)
			selection = DisplayListOfChoices();

		if (selection == -1)
		{			
			dialogPrompt = "Batch file to run:";
			_String fStr (ReturnDialogInput (true));
			if (logInputMode)
			{
				_String tts = loggedFileEntry&fStr;
				loggedUserInputs && & tts;
			}
			
			PushFilePath (fStr);
			ReadBatchFile (fStr,ex);
		}
		else
		{
			_String templ;

			if (selection >= 0)
			    templ = baseArgDir &"TemplateBatchFiles" & dirSlash;
			else
			  	templ = baseArgDir & "TemplateBatchFiles" & dirSlash & "WebUpdate.bf";				
			
			if (selection >= 0)
				templ= templ&*(_String*)(*(_List*)availableTemplateFiles(selection))(2);

			PushFilePath (templ);
			ReadBatchFile (templ,ex);
		}
	}
	else
	{
#ifndef __MINGW32__
		if (argFile.sData[0] != '/')
			argFile		  = baseDirectory & argFile;
#else
		if (argFile.sData[1] != ':') // not an absolute path 
			argFile		  = baseDirectory & argFile;		
#endif
		PushFilePath  (argFile);
		ReadBatchFile (argFile,ex);
	}
	
	ex.Execute();
	
	if (usePostProcessors && (!updateMode))
	{
		ReadInPostFiles();
		printf ("\n\n**********Continue with result processing (y/n)?");
		_String c_str (StringFromConsole());
	
		if (logInputMode)
			loggedUserInputs && & c_str;

		if (c_str.sData[0]!='n' && c_str.sData[0]!='N' )
		{
			long choice = DisplayListOfPostChoices();
			while (choice != -1)
			{
				_ExecutionList postEx;
				argFile = *(_String*)(*(_List*)availablePostProcessors(choice-1))(1);
				PushFilePath (argFile);
				ReadBatchFile (argFile, postEx);
				postEx.Execute();	
				PopFilePath ();
				printf ("\n\n**********Continue with result processing (y/n)?");

				_String c_str (StringFromConsole());
				if (logInputMode)
					loggedUserInputs && & c_str;

				if (c_str.sData[0]=='n' || c_str.sData[0]=='N' ) break;
				
				choice = DisplayListOfPostChoices();				
			}
		}
	}
	#ifdef __HYPHYMPI__
	}
	argFile = _String ("Node ") & (long)rank & " is shutting down\n"; 
	ReportWarning (argFile);
	#endif
	batchLanguageFunctions.Clear();
	GlobalShutdown();
	
	#ifdef __HYPHYMPI__
		if (rank == 0)
			printf ("\n\n");			
	#endif
	
}
Пример #4
0
extern "C" magma_int_t magma_ssytrd_hb2st(magma_int_t threads, char uplo, magma_int_t n, magma_int_t nb, magma_int_t Vblksiz,
                                          float *A, magma_int_t lda, float *D, float *E,
                                          float *V, magma_int_t ldv, float *TAU, magma_int_t compT, float *T, magma_int_t ldt)
{
    /*  -- MAGMA (version 1.3.0) --
       Univ. of Tennessee, Knoxville
       Univ. of California, Berkeley
       Univ. of Colorado, Denver
       November 2012

    Purpose
    =======


    Arguments
    =========
    THREADS (input) INTEGER
            Specifies the number of pthreads used.
            THREADS > 0

    UPLO    (input) CHARACTER*1
            = 'U':  Upper triangles of A is stored;
            = 'L':  Lower triangles of A is stored.

    N       (input) INTEGER
            The order of the matrix A.  N >= 0.
     
    NB      (input) INTEGER
            The order of the band matrix A.  N >= NB >= 0.
     
    VBLKSIZ (input) INTEGER
            The size of the block of householder vectors applied at once.

    A       (input/workspace) COMPLEX*16 array, dimension (LDA, N)
            On entry the band matrix stored in the following way:

    LDA     (input) INTEGER
            The leading dimension of the array A.  LDA >= 2*NB.

    D       (output) DOUBLE array, dimension (N)   
            The diagonal elements of the tridiagonal matrix T:   
            D(i) = A(i,i).   

    E       (output) DOUBLE array, dimension (N-1)   
            The off-diagonal elements of the tridiagonal matrix T:   
            E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.

    V       (output) COMPLEX*16 array, dimension (BLKCNT, LDV, VBLKSIZ)
            On exit it contains the blocks of householder reflectors
            BLKCNT is the number of block and it is returned by the funtion MAGMA_BULGE_GET_BLKCNT.

    LDV     (input) INTEGER
            The leading dimension of V.
            LDV > NB + VBLKSIZ + 1

    TAU     (output) COMPLEX*16 dimension(BLKCNT, VBLKSIZ)
            ???
     
    COMPT   (input) INTEGER
            if COMPT = 0 T is not computed
            if COMPT = 1 T is computed

    T       (output) COMPLEX*16 dimension(LDT *)
            if COMPT = 1 on exit contains the matrices T needed for Q2
            if COMPT = 0 T is not referenced
     
    LDT     (input) INTEGER
            The leading dimension of T.
            LDT > Vblksiz
     
    INFO    (output) INTEGER ????????????????????????????????????????????????????????????????????????????????????
            = 0:  successful exit
            

    =====================================================================  */
    
    char uplo_[2] = {uplo, 0};
    float timeblg=0.0;

    magma_int_t mklth = threads;

    magma_int_t INgrsiz=1;

    magma_int_t blkcnt = magma_bulge_get_blkcnt(n, nb, Vblksiz);

    magma_int_t nbtiles = magma_ceildiv(n, nb);
    
    memset(T,   0, blkcnt*ldt*Vblksiz*sizeof(float));
    memset(TAU, 0, blkcnt*Vblksiz*sizeof(float));
    memset(V,   0, blkcnt*ldv*Vblksiz*sizeof(float));
    
    magma_int_t* prog = new magma_int_t[2*nbtiles+threads+10];
    memset(prog, 0, (2*nbtiles+threads+10)*sizeof(magma_int_t));
    
    magma_sbulge_id_data* arg = new magma_sbulge_id_data[threads];
    pthread_t* thread_id = new pthread_t[threads];

    pthread_attr_t thread_attr;
    
#if defined(USEMKL)
    mkl_set_num_threads( 1 );
#endif
#if defined(USEACML)
    omp_set_num_threads(1);
#endif

    magma_sbulge_data data_bulge(threads, n, nb, nbtiles, INgrsiz, Vblksiz, compT,
                                 A, lda, V, ldv, TAU, T, ldt, prog);

    // Set one thread per core
    pthread_attr_init(&thread_attr);
    pthread_attr_setscope(&thread_attr, PTHREAD_SCOPE_SYSTEM);
    pthread_setconcurrency(threads);

    //timing
    timeblg = magma_wtime();

    // Launch threads
    for (magma_int_t thread = 1; thread < threads; thread++)
    {
        arg[thread] = magma_sbulge_id_data(thread, &data_bulge);
        pthread_create(&thread_id[thread], &thread_attr, magma_ssytrd_hb2st_parallel_section, &arg[thread]);
    }
    arg[0] = magma_sbulge_id_data(0, &data_bulge);
    magma_ssytrd_hb2st_parallel_section(&arg[0]);

    // Wait for completion
    for (magma_int_t thread = 1; thread < threads; thread++)
    {
        void *exitcodep;
        pthread_join(thread_id[thread], &exitcodep);
    }
    
    // timing
    timeblg = magma_wtime()-timeblg;

    
    delete[] thread_id;
    delete[] arg;
    delete[] prog;
    
    printf("time BULGE+T = %f \n" ,timeblg);

#if defined(USEMKL)
    mkl_set_num_threads( mklth );
#endif
#if defined(USEACML)
    omp_set_num_threads(mklth);
#endif
    
    /*================================================
     *  store resulting diag and lower diag D and E
     *  note that D and E are always real
     *================================================*/

    /* Make diagonal and superdiagonal elements real,
     * storing them in D and E
     */
    /* In real case, the off diagonal element are
     * not necessary real. we have to make off-diagonal
     * elements real and copy them to E.
     * When using HouseHolder elimination,
     * the SLARFG give us a real as output so, all the
     * diagonal/off-diagonal element except the last one are already
     * real and thus we need only to take the abs of the last
     * one.
     *  */

#if defined(PRECISION_z) || defined(PRECISION_c)
    if(uplo==MagmaLower){
        for (magma_int_t i=0; i < n-1 ; i++)
        {
            D[i] = MAGMA_S_REAL(A[i*lda  ]);
            E[i] = MAGMA_S_REAL(A[i*lda+1]);
        }
        D[n-1] = MAGMA_S_REAL(A[(n-1)*lda]);
    } else { /* MagmaUpper not tested yet */
        for (magma_int_t i=0; i<n-1; i++)
        {
            D[i]  =  MAGMA_S_REAL(A[i*lda+nb]);
            E[i] = MAGMA_S_REAL(A[i*lda+nb-1]);
        }
        D[n-1] = MAGMA_S_REAL(A[(n-1)*lda+nb]);
    } /* end MagmaUpper */
#else
    if( uplo == MagmaLower ){
        for (magma_int_t i=0; i < n-1; i++) {
            D[i] = A[i*lda];   // diag
            E[i] = A[i*lda+1]; //lower diag
        }
        D[n-1] = A[(n-1)*lda];
    } else {
        for (magma_int_t i=0; i < n-1; i++) {
            D[i] = A[i*lda+nb];   // diag
            E[i] = A[i*lda+nb-1]; //lower diag
        }
        D[n-1] = A[(n-1)*lda+nb];
    }
#endif
    return MAGMA_SUCCESS;
    
}
Пример #5
0
/* Function: workpool_start()
 * Date:     SRE, Thu Jul 16 11:09:05 1998 [St. Louis]
 *
 * Purpose:  Initialize a workpool_s structure, and return it.
 *
 * Args:     hmm      - the HMM to calibrate
 *           fixedlen - 0, or a fixed length for seqs (bypass of Gaussian)
 *           lenmean  - mean sequence length 
 *           lensd    - std. dev. for sequence length
 *           randomseq- i.i.d. frequencies for residues, 0..Alphabet_size-1
 *           nsample  - how many seqs to calibrate on
 *           hist     - histogram structure for storing results
 *           num_threads - how many processors to run on
 *
 * Returns:  ptr to struct workpool_s.
 *           Caller must wait for threads to finish with workpool_stop(),
 *           then free the structure with workpool_free().
 */
static struct workpool_s *
workpool_start(struct plan7_s *hmm, float lenmean, float lensd, int fixedlen,
	       float *randomseq, int nsample, struct histogram_s *hist, 
	       int num_threads)
{
  struct workpool_s *wpool;
  pthread_attr_t    attr;
  int i;
  int rtn;

  wpool         = MallocOrDie(sizeof(struct workpool_s));
  wpool->thread = MallocOrDie(num_threads * sizeof(pthread_t));
  wpool->hmm        = hmm;
  wpool->fixedlen   = fixedlen;
  wpool->lenmean    = lenmean;
  wpool->lensd      = lensd;
  wpool->randomseq  = randomseq;
  wpool->nsample    = nsample;
  
  wpool->nseq       = 0;
  wpool->hist       = hist;
  wpool->max_score  = -FLT_MAX;
  wpool->num_threads= num_threads;

  StopwatchZero(&(wpool->watch));
  
  if ((rtn = pthread_mutex_init(&(wpool->input_lock), NULL)) != 0)
    Die("pthread_mutex_init FAILED; %s\n", strerror(rtn));
  if ((rtn = pthread_mutex_init(&(wpool->output_lock), NULL)) != 0)
    Die("pthread_mutex_init FAILED; %s\n", strerror(rtn));

  /* Create slave threads.
   * Note the crazy machinations we have to go through to achieve concurrency.
   * You'd think that POSIX threads were portable... ha.
   * On IRIX 6.5, system scope threads are only available to root, or if
   *   /etc/capability has been configured specially, so to avoid strange
   *   permissions errors we can't set PTHREAD_SCOPE_SYSTEM for IRIX.
   * On IRIX pre-6.5, we can't get good concurrency, period. As of 6.5,
   *   SGI provides the nonportable pthread_setconcurrency() call.
   * On FreeBSD (3.0 snapshots), the pthread_attr_setscope() call isn't
   *   even provided, apparently on grounds of "if it doesn't do anything,
   *   why provide it?" Hello? POSIX compliance, perhaps?
   * On Sun Solaris, we need to set system scope to achieve concurrency.
   * Linux and DEC Digital UNIX seem to work fine in either process scope
   *   or system scope, without a pthread_setconcurrency call.
   */
  pthread_attr_init(&attr);
#ifndef __sgi
#ifdef HAVE_PTHREAD_ATTR_SETSCOPE
  pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM);
#endif
#endif
#ifdef HAVE_PTHREAD_SETCONCURRENCY
  pthread_setconcurrency(num_threads+1);
#endif
  for (i = 0; i < num_threads; i++)
    if ((rtn = pthread_create(&(wpool->thread[i]), &attr,
			      worker_thread , (void *) wpool)) != 0)
      Die("Failed to create thread %d; return code %d\n", i, rtn);

  pthread_attr_destroy(&attr);

  return wpool;
}
Пример #6
0
/*
 * Use global *tty_tmp and term_parent
 */
int
main( int argc, char **argv )
{
   struct cl_args *cl_args;
   struct term_node *tty_node = NULL;
   pid_t pid;
   pid_t parent_id;
#if defined(HAVE_PTHREAD_SETCONCURRENCY) && !defined(LINUX)
   int concurrent;
#endif

   handle_signals_parent();

   tcgetattr(0, &term_parent);   

   parent_id = getpid();

   if ((pid = fork()) < 0)
   {
      exit(1);
   }
   else
   {
      if (pid != 0)
      {
         wait(NULL);
         tcsetattr(0, TCSANOW, &term_parent);
         exit(0);
      }
   }

   fatal_error = 4;

   /* Disable all signals while initializing data...*/
   handle_signals();

   setvbuf(stdout, NULL, _IONBF, 0);

   tty_tmp = (struct term_tty *)calloc(1,sizeof(struct term_tty));

   if (tty_tmp == NULL)
   {
      printf("Out of memory on calloc tty_tmp\n");
      clean_exit();
   }

   tty_tmp->term = (struct termios *)calloc(1,sizeof(struct termios));      

   if (tty_tmp->term == NULL)
   {
      printf("Out of memory on calloc tty_tmp->term\n");
      clean_exit();
   }

   /* default values */
   tty_tmp->interactive = 0;
   tty_tmp->gtk = 0;
   tty_tmp->attack = -1;
   tty_tmp->mac_spoofing = -1;
   tty_tmp->splash = -1;
   strncpy(tty_tmp->username, VTY_USER, MAX_USERNAME);
   strncpy(tty_tmp->password, VTY_PASS, MAX_PASSWORD);
   strncpy(tty_tmp->e_password, VTY_ENABLE, MAX_PASSWORD);
   tty_tmp->port = VTY_PORT;
   tty_tmp->ip_filter = NULL;
#ifdef HAVE_GTK
   tty_tmp->buffer_log = NULL;
#endif

   cl_args = (struct cl_args *)calloc(1,sizeof(struct cl_args));

   if (cl_args == NULL)
   {
      printf("Out of memory on calloc cl_args\n");
      clean_exit();
   }

   if ( argc == 1 )                                                          
   {
      printf("GNU %s %s %s\n", PACKAGE, VERSION,
            "$Date: 2006/03/23 08:40:14 $");
      printf("Try '%s -h' to display the help.\n",PACKAGE);
      clean_exit();
   }

   if (getuid() != 0) 
   {
      printf("You must be root to run %s %s\n", PACKAGE, VERSION);
      clean_exit();
   }

   if (term_init() < 0)
      g00dbye();

   /* Register all the protocols */
   protocol_init();

   cl_args->proto_index = -1;

   if (parser_initial(tty_tmp, cl_args, argc, argv) < 0) {
      clean_exit();
   } 

   init_log();

#if defined(HAVE_PTHREAD_SETCONCURRENCY) && !defined(LINUX)
/*   concurrent = pthread_getconcurrency();*/

   concurrent = 15;/*(MAX_TERMS*MAX_PROTOCOLS*MAX_THREAD_ATTACK*2)+3;*/

   if (pthread_setconcurrency(concurrent) != 0)
   {
      thread_error("init pthread_setconcurrency()",errno);
      g00dbye();
   }
#endif

   if (interfaces_init(&terms->pcap_listen_th) < 0 )
      g00dbye();

   /* Establish TERM signal handler...*/
   posix_signal(SIGTERM, final);


#ifdef HAVE_REMOTE_ADMIN
   if (tty_tmp->daemonize)
   {
      if (admin_init(tty_tmp) < 0)
         g00dbye();
   }
#endif 

   if (thread_create(&terms->uptime_th.id, &th_uptime, (void *)NULL) < 0)
      g00dbye();

   /* Command line and ncurses cannot be choosed simultaneously...*/
   if ((!tty_tmp->interactive) && (!tty_tmp->gtk) && (cl_args->proto_index != -1)) 
   {
      terms->work_state = INITIAL;
      tty_node = term_type[TERM_TTY].list;
      if (thread_create(&tty_node[0].thread.id, &th_tty_peer, 
               (void *)cl_args) < 0)
         g00dbye();

      while(terms->work_state != STOPPED)
         thread_usleep(100000);
   }

#ifdef HAS_CURSES
   if (tty_tmp->interactive)
   {
      terms->work_state = INITIAL;
      if (thread_create(&terms->gui_th.id, &ncurses_gui, NULL) < 0 )
         g00dbye();
      /* Wait until the ncurses GUI is over */
      while(terms->work_state != STOPPED)
         thread_usleep(100000);
   }
   else
   {
#endif
#ifdef HAVE_GTK
      if (tty_tmp->gtk)
      {
         terms->work_state = INITIAL;
         if (thread_create(&terms->gui_gtk_th.id, &gtk_gui, NULL) < 0 )
            g00dbye();
         /* Wait until the GTK GUI is over */
         while(terms->work_state != STOPPED)
            thread_usleep(100000);
      }
#endif
#ifdef HAS_CURSES
   }
#endif

#ifdef HAVE_REMOTE_ADMIN
   if (tty_tmp->daemonize)
   {
      /* Ok, now that console (ncurses) is finished
       * we can become a true daemon... */
      become_daemon(parent_id);

      /* Wait until some important thread exits due to fatal_error...*/
      while (fatal_error == 4)
         thread_usleep(100000);
   }
#endif

   g00dbye();

   exit(1);
}
Пример #7
0
extern "C" magma_int_t
magma_dbulge_back(
    magma_uplo_t uplo,
    magma_int_t n, magma_int_t nb,
    magma_int_t ne, magma_int_t Vblksiz,
    double *Z, magma_int_t ldz,
    magmaDouble_ptr dZ, magma_int_t lddz,
    double *V, magma_int_t ldv,
    double *TAU,
    double *T, magma_int_t ldt,
    magma_int_t* info)
{
    magma_int_t threads = magma_get_parallel_numthreads();
    magma_int_t mklth   = magma_get_lapack_numthreads();
    magma_set_lapack_numthreads(1);

    real_Double_t timeaplQ2=0.0;
    double f= 1.;
    magma_int_t n_gpu = ne;

//#if defined(PRECISION_s) || defined(PRECISION_d)
    //double gpu_cpu_perf = 50;  // gpu over cpu performance  //100% ev // SandyB. - Kepler (K20c)
    //double gpu_cpu_perf = 16;  // gpu over cpu performance  //100% ev // SandyB. - Fermi (M2090)
//#else
//    double gpu_cpu_perf = 27.5;  // gpu over cpu performance  //100% ev // Westmere - Fermi (M2090)
    //double gpu_cpu_perf = 37;  // gpu over cpu performance  //100% ev // SandyB. - Kepler (K20c)
//    double gpu_cpu_perf = 130;  // gpu over cpu performance  //100% ev // Bulldozer - Kepler (K20X)
//#endif

    magma_int_t gpu_cpu_perf = magma_get_dbulge_gcperf();
    if (threads > 1) {
        f = 1. / (1. + (double)(threads-1)/ ((double)gpu_cpu_perf)    );
        n_gpu = (magma_int_t)(f*ne);
    }

    /****************************************************
     *  apply V2 from left to the eigenvectors Z. dZ = (I-V2*T2*V2')*Z
     * **************************************************/
//$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
//$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
//$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
//n_gpu=ne;
//$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
//$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
//$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
//$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
    timeaplQ2 = magma_wtime();
    /*============================
     *  use GPU+CPU's
     *==========================*/

    if (n_gpu < ne) {
        // define the size of Q to be done on CPU's and the size on GPU's
        // note that GPU use Q(1:N_GPU) and CPU use Q(N_GPU+1:N)
        #ifdef ENABLE_DEBUG
        printf("---> calling GPU + CPU(if N_CPU > 0) to apply V2 to Z with NE %d     N_GPU %d   N_CPU %d\n",ne, n_gpu, ne-n_gpu);
        #endif
        magma_dapplyQ_data data_applyQ;
        magma_dapplyQ_data_init(&data_applyQ, threads, n, ne, n_gpu, nb, Vblksiz, Z, ldz, V, ldv, TAU, T, ldt, dZ, lddz);

        magma_dapplyQ_id_data* arg;
        magma_malloc_cpu((void**) &arg, threads*sizeof(magma_dapplyQ_id_data));

        pthread_t* thread_id;
        magma_malloc_cpu((void**) &thread_id, threads*sizeof(pthread_t));

        pthread_attr_t thread_attr;

        // ===============================
        // relaunch thread to apply Q
        // ===============================
        // Set one thread per core
        pthread_attr_init(&thread_attr);
        pthread_attr_setscope(&thread_attr, PTHREAD_SCOPE_SYSTEM);
        pthread_setconcurrency(threads);

        // Launch threads
        for (magma_int_t thread = 1; thread < threads; thread++) {
            magma_dapplyQ_id_data_init(&(arg[thread]), thread, &data_applyQ);
            pthread_create(&thread_id[thread], &thread_attr, magma_dapplyQ_parallel_section, &arg[thread]);
        }
        magma_dapplyQ_id_data_init(&(arg[0]), 0, &data_applyQ);
        magma_dapplyQ_parallel_section(&arg[0]);

        // Wait for completion
        for (magma_int_t thread = 1; thread < threads; thread++) {
            void *exitcodep;
            pthread_join(thread_id[thread], &exitcodep);
        }

        magma_free_cpu(thread_id);
        magma_free_cpu(arg);
        magma_dapplyQ_data_destroy(&data_applyQ);


        magma_dsetmatrix(n, ne-n_gpu, Z + n_gpu*ldz, ldz, dZ + n_gpu*ldz, lddz);

        /*============================
         *  use only GPU
         *==========================*/
    } else {
        magma_dsetmatrix(n, ne, Z, ldz, dZ, lddz);
        magma_dbulge_applyQ_v2(MagmaLeft, ne, n, nb, Vblksiz, dZ, lddz, V, ldv, T, ldt, info);
        magma_device_sync();
    }

    timeaplQ2 = magma_wtime()-timeaplQ2;

    magma_set_lapack_numthreads(mklth);
    return MAGMA_SUCCESS;
}
Пример #8
0
    WRAP_END
}
#ifndef MUSL
static int wrap_pthread_setschedprio(pthread_t thread, int prio)
{
    WRAP_START
    while (!err)
        err = pthread_setschedprio(thread, prio);
    WRAP_END
}
#endif
static int wrap_pthread_setconcurrency(int new_level)
{
    WRAP_START
    while (!err)
        err = pthread_setconcurrency(new_level);
    WRAP_END
}
static int wrap_pthread_detach(pthread_t thread)
{
    WRAP_START
    while (!err)
        err = pthread_detach(thread);
    WRAP_END
}
static int wrap_pthread_key_create(pthread_key_t *key,
                                   void (*destructor)(void*))
{
    /* this is clumsy: having to key_create lessens key_delete's chances
       of being interrupted - test not definite */
    int err2;
Пример #9
0
int main (int argc, char *argv[])
{

#if defined(_MT) || defined(_REENTRANT)
  int          min_threads, max_threads ;
  int          num_rounds ;
  int          chperthread ;
#endif
  unsigned     seed=12345 ;
  int          num_chunks=10000;
  long sleep_cnt;
  int matches;

  if (argc > 7) {
    sleep_cnt = atoi(argv[1]);
    min_size = atoi(argv[2]);
    max_size = atoi(argv[3]);
    chperthread = atoi(argv[4]);
    num_rounds = atoi(argv[5]);
    seed = atoi(argv[6]);
    max_threads = atoi(argv[7]);
    min_threads = max_threads;
    goto DoneWithInput;
  }

#if defined(_MT) || defined(_REENTRANT)
  //#ifdef _MT
  printf( "\nMulti-threaded test driver \n") ;
#else
  printf( "\nSingle-threaded test driver \n") ;
#endif
  printf("C version (malloc and free)\n") ;

  printf("runtime (sec): ") ; /* 15 seconds */
  if ((matches = scanf ("%ld", &sleep_cnt)) != 1) {
	  printf("error scanning stdin for sleep_cnt - exiting\n");
	  return(1);
  }

  printf("chunk size (min,max): ") ; /* 8 40 */
  if ((matches = scanf("%d %d", &min_size, &max_size )) != 2) {
	  printf("error scanning stdin for chunk size (min,max) - exiting\n");
	  return(1);
  }

#if defined(_MT) || defined(_REENTRANT)
  //#ifdef _MT
  printf("threads (min, max):   ") ; /* same, 1 1, 2 2, etc. */
  if ((matches = scanf("%d %d", &min_threads, &max_threads)) != 2) {
	  printf("error scanning stdin for threads (min,max) - exiting\n");
	  return(1);
  }

  printf("chunks/thread:  ") ; 
  if ((matches = scanf("%d", &chperthread )) != 1) { /* 10K */
	  printf("error scanning stdin for chunks/thread - exiting\n");
	  return(1);
  }
	  
  printf("no of rounds:   ") ; 
  if ((matches = scanf("%d", &num_rounds )) != 1) {  /* 10 */
	  printf("error scanning stdin for no of rounds - exiting\n");
	  return(1);
  }
  num_chunks = max_threads*chperthread ;
#else 
  printf("no of chunks:  ") ; 
  if ((matches = scanf("%d", &num_chunks )) != 1) {
	  printf("error scanning stdin for no of chunks - exiting\n");
	  return(1);
  }	  
#endif
  printf("random seed:    ") ; 
  if ((matches = scanf("%d", &seed)) != 1) {
	  printf("error scanning stdin for random seed - exiting\n");
	  return(1);
  }	  	  
  printf("\n");
 DoneWithInput:

  if( num_chunks > MAX_BLOCKS ){
    printf("Max %d chunks - exiting\n", MAX_BLOCKS ) ;
    return(1) ;
  }

  pthread_setconcurrency (max_threads);

  lran2_init(&rgen, seed) ;

  // Call allocator-specific initialization function
  mm_init();

  numCPU=getNumProcessors();

#if defined(_MT) || defined(_REENTRANT)
  //#ifdef _MT
  runthreads(sleep_cnt, min_threads, max_threads, chperthread, num_rounds) ;
#else
  runloops(sleep_cnt, num_chunks ) ;
#endif

#ifdef _DEBUG
  _cputs("Hit any key to exit...") ;	(void)_getch() ;
#endif


  return(0) ;

} /* main */
Пример #10
0
void test8()
{
    printf("########################################\n"
	   "# Test 8: Multi-processor yielding\n"
	   "#    -> Show that the schedulers on multiple processors are \n"
	   "#       independant by measuring yield times accross a bunch\n"
	   "#       of processors\n");

    uval numprocessors;
    uval numchildren;
    uval child, i;
    double sum, min, max;
    SysStatus rc;

    // Print out warning about nanosleep() not being finished once
    // before starting our timings:
    sleep(1);

    numprocessors = DREFGOBJ(TheProcessRef)->ppCount();

    pthread_setconcurrency(numprocessors);

    printf("plot forkjoin_multiproc %ld green\n", numprocessors);

    Done = 0;
    TotalChildren = 0;
    FinishedChildren = numprocessors;

    for (numchildren = 1; numchildren <= numprocessors; numchildren++) {
	child = numchildren - 1;

	// Start up a thread on a remote processor:
	rc = MPMsgMgr::SendAsyncUval(Scheduler::GetEnabledMsgMgr(),
				     SysTypes::DSPID(0, VPNum(child)),
				     test8_child, child);
	if (rc != 0) {
	    printf("Error, SendAsyncUval() returned %lx\n", rc);
	    return;
	}

	// Wait for the child thread to start:
	while (TotalChildren < numchildren) {
	    sched_yield();
	}

	// Init our aray:
	for (i = 0; i < numchildren; i++) {
	    AvgTime[i] = 0;
	}

	// This is a barrier release: when we set this to 0 all the
	// children start working:
	FinishedChildren = 0;

	// Wait for the children to finish:
	while (FinishedChildren < TotalChildren) {
	    sleep(1);
	}

	sum = 0;
	min = MinTime[0];
	max = MaxTime[0];
	for (i = 0; i < numchildren; i++) {
	    sum += AvgTime[i];
	    if (min > MinTime[i]) {
		min = MinTime[i];
	    }
	    if (max > MaxTime[i]) {
		max = MaxTime[i];
	    }
	    printf("Child %ld took min %f us, max %f us, avg %f us, per fork/join.\n", i,
		   MinTime[i], MaxTime[i], AvgTime[i]);
	}

	printf("%ld %f %f %f\n", numchildren, sum / double(numchildren),
	       min, max);
    }

    Done = 1;

    printf("xlabel Number of threads\n"
	   "ylabel Fork-join time (us)\n"
	   "title Fork-join time vs. number of threads\n"
	   "# All times averaged over %ld iterations\n", COUNT_NUM);
}
Пример #11
0
int main (int argc, char* argv[])
{
    mainArgCount = argc - 1;


#ifdef  __HYPHYMPI__
    int            rank,
                   size;

    MPI_Init       (&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);

    setParameter  (mpiNodeID,    (_Parameter)rank);
    setParameter    (mpiNodeCount, (_Parameter)size);
    _hy_mpi_node_rank = rank;

    if (rank == 0) {
        mpiNodesThatCantSwitch.Populate (size,1,0);
        /* {
              char hostname[256];
              gethostname(hostname, sizeof(hostname));
              printf("PID %d on %s ready for attach\n", getpid(), hostname);
              fflush(stdout);
              //getchar ();
          } */
#endif


        //for (long k=0; k<NSIG; k++)
        //{
        //  signal(k, &hyphyBreak);
        //}

#ifdef  __HYPHYMPI__
    }
#endif

    char    curWd[4096],
            dirSlash = GetPlatformDirectoryChar ();
    getcwd (curWd,4096);

    _String baseDir (curWd);

    if (baseDir.getChar (baseDir.sLength-1) != dirSlash) {
        baseDir=baseDir & dirSlash;
    }


#if defined _HYPHY_LIBDIRECTORY_
    _String libDir (_HYPHY_LIBDIRECTORY_);

    if (libDir.getChar (libDir.sLength-1) != dirSlash) {
        libDir=libDir & dirSlash;
    }

    pathNames&& &libDir;
#else
     pathNames&& &baseDir;
    _String libDir = baseDir;
#endif

    _String argFile;

    libDirectory  = libDir;
    libArgDir     = libDirectory;
    baseDirectory = baseDir;
    baseArgDir    = baseDirectory;

    _ExecutionList ex;

#ifdef _OPENMP
    systemCPUCount = omp_get_max_threads();
#endif

#ifdef _MINGW32_MEGA_
    {
        char pid[16];
        snprintf (pid,16,"%u", GetCurrentProcessId());

        _String pipeName = _String("\\\\.\\pipe\\MEGAPipe") & pid;
        printf ("Pipe name = %s\n", pipeName.sData);
        if ((_HY_MEGA_Pipe = CreateFile(pipeName.sData, GENERIC_WRITE, 0, NULL, OPEN_EXISTING, 0, NULL)) == INVALID_HANDLE_VALUE) {
            char* lpMsgBuf;
            FormatMessage(
                FORMAT_MESSAGE_ALLOCATE_BUFFER |
                FORMAT_MESSAGE_FROM_SYSTEM |
                FORMAT_MESSAGE_IGNORE_INSERTS,
                NULL,
                GetLastError(),
                MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
                (LPTSTR) &lpMsgBuf,
                0, NULL );
            FlagError (_String("Failed to create a pipe named '") & pipeName & "' to send data from HyPhy to MEGA. Error: "&lpMsgBuf);
        }
    }
#endif

    for (long i=1; i<argc; i++) {
        _String thisArg (argv[i]);
        if (thisArg.sData[0]=='-') {
            ProcessConfigStr (thisArg);
        } else if (thisArg.beginswith ("BASEPATH=")) {
            baseArgDir = thisArg.Cut(9,-1);
            if (baseArgDir.sLength) {
                if (baseArgDir.sData[baseArgDir.sLength-1]!=dirSlash) {
                    baseArgDir = baseArgDir&dirSlash;
                }

                baseDirectory = baseArgDir;
            }
        } else if (thisArg.beginswith ("LIBPATH=")) {
            libArgDir = thisArg.Cut(8,-1);
            if (libArgDir.sLength) {
                if (libArgDir.sData[libArgDir.sLength-1] != dirSlash) {
                    libArgDir = libArgDir & dirSlash;
                }
                libDirectory = libArgDir;
            }
        } else if (thisArg.beginswith ("USEPATH=")) {
            baseDir             = thisArg.Cut(8,-1);
            errorFileName       = baseDir & errorFileName;
            messageFileName     = baseDir & messageFileName;
            pathNames.Delete    (0);
            pathNames&&         &baseDir;
        } else
#ifdef __MP__
            if (thisArg.beginswith ("CPU=")) {
                _String cpus = thisArg.Cut(4,-1);
                systemCPUCount = cpus.toNum();
                if (systemCPUCount<1) {
                    systemCPUCount = 1;
                }
                pthread_setconcurrency (systemCPUCount+1);
            } else
#endif
                argFile = thisArg;
    }

    GlobalStartup();

    if (calculatorMode) {
        printf ("\nHYPHY is running in calculator mode. Type 'exit' when you are finished.\n");
        while (ExpressionCalculator()) ;
        return 0;
    }

    if (pipeMode) {
        _String bfIn (stdin);
        _ExecutionList exIn (bfIn);
        exIn.Execute();
        GlobalShutdown();
        return 0;
    }

    // try to read the preferences
    _String     prefFile (curWd);
    prefFile = prefFile & '/' & prefFileName;
    FILE     * testPrefFile = fopen (prefFile.sData,"r");
    if (!testPrefFile) {
        prefFile = baseArgDir & prefFileName;
        testPrefFile = fopen (prefFile.sData,"r");
    }
    if (testPrefFile) {
        fclose(testPrefFile);
        ReadBatchFile (prefFile,ex);
        ex.Execute();
        ex.Clear();
    }
    //printf ("Node %d before mpiParallelOptimizer\n", rank);
#ifdef __HYPHYMPI__
    if (rank>0) {
        //if (mpiParallelOptimizer || mpiPartitionOptimizer)
        //  mpiOptimizerLoop (rank, size);
        //else
        _String defaultBaseDirectory = *(_String*)pathNames(0);
        mpiNormalLoop (rank, size, defaultBaseDirectory);
        /*argFile = "SHUTDOWN_CONFIRM";
        MPISendString (argFile, senderID);*/
    } else {
#endif
        if (!argFile.sLength) {
            long selection = -2;
            if (!updateMode) {
                selection = DisplayListOfChoices();
            }

            if (selection == -1) {
                dialogPrompt = "Batch file to run:";
                _String fStr (ReturnDialogInput (true));
                if (logInputMode) {
                    _String tts = loggedFileEntry&fStr;
                    loggedUserInputs && & tts;
                }

                PushFilePath (fStr);
                ReadBatchFile (fStr,ex);
            } else {
                _String templ;

                if (selection >= 0) {
                    templ = baseArgDir &"TemplateBatchFiles" & dirSlash;
                } else {
                    templ = baseArgDir & "TemplateBatchFiles" & dirSlash & "WebUpdate.bf";
                }

                if (selection >= 0) {
                    templ= templ&*(_String*)(*(_List*)availableTemplateFiles(selection))(2);
                }

                PushFilePath (templ);
                ReadBatchFile (templ,ex);
            }
        } else {
#ifndef __MINGW32__
            if (argFile.sData[0] != '/') {
                argFile       = baseDirectory & argFile;
            }
#else
            if (argFile.sData[1] != ':') { // not an absolute path
                argFile       = baseDirectory & argFile;
            }
#endif
            PushFilePath  (argFile);
            ReadBatchFile (argFile,ex);
        }

        ex.Execute();

        if (usePostProcessors && (!updateMode)) {
            ReadInPostFiles();
            printf ("\n\n**********Continue with result processing (y/n)?");
            _String c_str (StringFromConsole());

            if (logInputMode) {
                loggedUserInputs && & c_str;
            }

            if (c_str.getChar(0) !='n' && c_str.getChar(0)!='N' ) {
                long choice = DisplayListOfPostChoices();
                while (choice != -1) {
                    _ExecutionList postEx;
                    argFile = *(_String*)(*(_List*)availablePostProcessors(choice-1))(1);
                    PushFilePath (argFile);
                    ReadBatchFile (argFile, postEx);
                    postEx.Execute();
                    PopFilePath ();
                    printf ("\n\n**********Continue with result processing (y/n)?");

                    c_str = StringFromConsole();
                    if (logInputMode) {
                        loggedUserInputs && & c_str;
                    }

                    if (c_str.getChar(0)=='n' || c_str.getChar(0)=='N' ) {
                        break;
                    }

                    choice = DisplayListOfPostChoices();
                }
            }
        }
#ifdef __HYPHYMPI__
    }
    ReportWarning               (_String ("Node ") & (long)rank & " is shutting down\n");
#endif


#ifdef _MINGW32_MEGA_
    if (_HY_MEGA_Pipe != INVALID_HANDLE_VALUE) {
        CloseHandle (_HY_MEGA_Pipe);
    }
#endif

    PurgeAll                    (true);
    GlobalShutdown              ();

#ifdef __HYPHYMPI__
    if (rank == 0) {
        printf ("\n\n");
    }
#endif

}
Пример #12
0
int main( int   argc, char *argv[] )
{

	#ifdef	__HYPHYMPI__
		  int 		   rank, 
		  			   size;
		  			   			   			 
		  MPI_Init	   (&argc, &argv);
		  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
		  MPI_Comm_size(MPI_COMM_WORLD, &size);
		  
		  _hy_mpi_node_rank = rank;
		  
		  setParameter  (mpiNodeID, (_Parameter)rank);
		  setParameter	(mpiNodeCount, (_Parameter)size);
		  
		  if (rank == 0)
	#endif
   
	gtk_init (&argc, &argv);

	/* set up globals */
	
	char curWd[4096];
	getcwd (curWd,4096);

	_String baseDir (curWd);
	baseDir=baseDir&'/';

	pathNames&& &baseDir;
	baseDirectory = baseDir;
	for (long i=1; i<argc;i++)
	{
		_String thisArg (argv[i]);
		if (thisArg.beginswith ("BASEPATH="))
		{
			baseDirectory = thisArg.Cut(9,-1);
			if (baseDirectory.sLength)
			{
				if (baseDirectory.sData[baseDirectory.sLength-1]!='/')
					baseDirectory = baseDirectory&"/";
			}
		}
		else
			if (thisArg.beginswith ("USEPATH="))
			{
				_String		baseArgDir 			(thisArg,8,-1);
				errorFileName					= baseArgDir & errorFileName;
				messageFileName					= baseArgDir & messageFileName;
				pathNames.Delete				(0);
				pathNames&&						&baseDir;
			}
			else
				if (thisArg.beginswith ("CPU="))
				{
					#ifdef __MP__
					_String cpus = thisArg.Cut(4,-1);
					systemCPUCount = cpus.toNum();
					if (systemCPUCount<1)
						systemCPUCount = 1;
					#ifdef __MP2__
						pthread_setconcurrency (systemCPUCount+1);
					#endif
					#endif
				}
				#ifdef	__HYPHYMPI__
					else
						if (thisArg == _String("MPIOPTIMIZER"))
						{
							mpiParallelOptimizer = true;
							setParameter	(mpiNodeCount, 0.0);
						}
						else
							if (thisArg == _String("MPIPARTITIONS"))
							{
								mpiPartitionOptimizer = true;
								setParameter	(mpiNodeCount, 0.0);
							}
				#endif
	}
	
	#ifdef	__HYPHYMPI__
	if (rank == 0)
	#endif
	{
		baseDir = baseDirectory & "GTKResources";
		_List scanRes;
		ScanDirectoryForFileNames(baseDir,scanRes,false);
		if (scanRes.lLength == 0)
		{
			GtkWidget * noRez = gtk_message_dialog_new (NULL, GTK_DIALOG_MODAL, GTK_MESSAGE_ERROR, GTK_BUTTONS_OK, "HYPHY_GTK was unable to find a required GTKResources directory in %s. Please use BASEPATH= command line option to specify where the installation directory of HyPhy can be found.", baseDirectory.sData);
			gtk_dialog_run (GTK_DIALOG (noRez));
			gtk_widget_destroy (noRez);
			return 1;
		}
		_String rcPath = baseDir & "/theme/theme.rc";
		//printf ("Loading res files from %s\n", rcPath.sData);
		gtk_rc_parse (rcPath.sData);
	}
	
 	GlobalStartup();

	#ifdef	__HYPHYMPI__
	if (rank == 0)
	{
	#endif
	GdkDisplay * defDisplay = gdk_screen_get_display (gdk_screen_get_default());
	hSizeCursor = gdk_cursor_new_for_display (defDisplay,GDK_SB_H_DOUBLE_ARROW);
	pickUpCursor = gdk_cursor_new_for_display (defDisplay,GDK_TARGET);
	dropOffCursor = gdk_cursor_new_for_display (defDisplay,GDK_TCROSS);
	
	screenPContext = gdk_pango_context_get_for_screen (gdk_screen_get_default());
	tablePDMenuIcon = (GdkPixbuf*)ProcureIconResource(4020);
		
	/*{
		GdkScreen * defD = gdk_screen_get_default();
		fontConversionFactor = 72.27 / (gdk_screen_get_height (defD) *25.4 / gdk_screen_get_height_mm(defD)); 
		printf ("Pango conversion factor computed at: %g\n", fontConversionFactor);
	}*/

		
	ReadInTemplateFiles ();
		
	hyphyConsoleWindow = new _HYConsoleWindow ("HYPHY Console");
	ReadPreferences		();
	SetStatusLine ("None","Idle","00:00:00");
	while (gtk_events_pending())
		gtk_main_iteration();

	SetPreferences		();
	ReadGeneticCodes	();	
	ReadModelTemplates	();
	ReadTreeProcessors ();
	MoveConsoleWindow  (consolePositionRectangle);
	StringToConsole (hyphyCiteString);
	hyphyConsoleWindow->BringToFront();

	#ifdef __HYPHYMPI__ 
	{
		char statBuffer[1024];
		sprintf (statBuffer,"MPI version of HyPhy running on %d nodes (a master and %d compute nodes) in %s mode\n",
							 size, 
							 size-1,
							 mpiPartitionOptimizer?"partition":(mpiParallelOptimizer?"rate heterogeneity":"normal"));
		BufferToConsole (statBuffer);
	}
	#endif
		
	g_timeout_add  (100,GlobalQueueTimer,nil);
	g_timeout_add  (1000,progressTimerFunction,nil);
	gtk_main ();

	WritePreferences();
	#ifdef	__HYPHYMPI__
	}
	else // slave node
	{
		if (mpiParallelOptimizer || mpiPartitionOptimizer)
			mpiOptimizerLoop (rank, size);
		else
			mpiNormalLoop (rank, size, baseDir);
	}
	#endif

	GlobalShutdown();
    return 0;
}