コード例 #1
0
// the core function doing progressive dynalign calculations and templating
int Multilign_object::ProgressiveMultilign(
        const short int numProcessors,
        const bool Dsv, const bool Ali,
        const short int maxtrace,
        const short int bpwin, const short int awin,
        const short int percent,
        const short int imaxseparation,
        const float gap,
        const bool singleinsert,
        const short int singlefold_subopt_percent,
        const bool local){

    // prepare the object to be ready for the real calculation.
#ifndef MULTIFIND
  if (ErrorCode = PrepInput())
    return ErrorCode;
#else
  if(ErrorCode = PrepMultifindInput())
    return ErrorCode;
#endif
  
    // percent of progress
    int Ppercent = 5;
    int Ppercentstep = (100-Ppercent) / (seqPair.size() * iterations);
    int Ppercentstep1 = Ppercentstep * 0.2; // percent advanced for a dsv template
    int Ppercentstep2 = Ppercentstep - Ppercentstep1; // percent advanced for a dynalign calculation
    if (progress!=NULL) progress->update(Ppercent);
    //   dGIndex.resize(input_alignment.size()-1);
    //  energies.resize(input_alignment.size()-1);
    // if Dsv is true, .dsv files will be stored as
    // j.i_<seq1_name>_<seq2_name>.aout,
    // where j is the cycle number and i is which number of calculation in the cycle
#ifndef MULTIFIND 
    if (Dsv) NameDsvFiles();
#else
    if(Dsv) NameMultifindDsvFiles();
#endif
    // if Ali is true, .aout files will be stored as
    // aliFiles[j][i] = j.i_<seq1_name>_<seq2_name>.aout,
    // where j is the cycle number and i is which number of calculation in the cycle.
#ifndef MULTIFIND
    if (Ali) NameAliFiles();
#else
    if (Ali) NameMultifindAliFiles();
#endif
    int stepBP = 0, totalBP = 0;
    string tmpfilename;
    int struct_num;
    RNA *rna;
    for (int j = 0; j < iterations; ++j){
        for ( size_t i = 0; i < seqPair.size(); ++i){
            //cout << "\nPair " << i+1 << " in cycle " << j+1 << ':' << endl;
            //cout << '\t' << inputList[seqPair[i].first][0] << "<==>" << inputList[seqPair[i].second][0] << endl;
#ifndef MULTIFIND      
            instance = new Dynalign_object(inputList[seqPair[i].first][0].c_str(), 2,
                                    inputList[seqPair[i].second][0].c_str(), 2, isRNA);
#else
	    //   cerr<<input_sequences[seqPair[i].first]<<"\n";
	    //   cerr<<input_sequences[seqPair[i].second]<<"\n";
		    
	    instance = new Dynalign_object(input_sequences[seqPair[i].first].c_str(),input_sequences[seqPair[i].second].c_str());
	  
#endif	  
            instance->GetRNA1()->SetTemperature(temperature);

            // read constraint file for the first seq if it exists
#ifndef MULTIFIND
            if(!inputList[seqPair[i].first][2].empty()){
                //cout << "\tConstraint 1: " << inputList[seqPair[i].first][2] << endl;
                if(ErrorCode = instance->GetRNA1()->ReadConstraints(inputList[seqPair[i].first][2].c_str()))
                    return ErrorCode;
            }

            // read constraint file for the second seq if it exists
            if(!inputList[seqPair[i].second][2].empty()){
                //cout << "\tConstraint 2: " << inputList[seqPair[i].second][2] << endl;
                if(ErrorCode = instance->GetRNA2()->ReadConstraints(inputList[seqPair[i].second][2].c_str()))
                   return ErrorCode;
            }

            structure *ct;
            ct = instance->GetRNA1()->GetStructure();
            // read SHAPE file for the first seq if it exists
            if(!inputList[seqPair[i].first][3].empty()){
                //cout << "\tSHAPE 1: " << inputList[seqPair[i].first][3] << endl;
                ct->SHAPEslope = SHAPESlope * conversionfactor;
                ct->SHAPEintercept = SHAPEIntercept * conversionfactor;
                ct->ReadSHAPE(inputList[seqPair[i].first][3].c_str());
            }

            ct = instance->GetRNA2()->GetStructure();
            // read SHAPE file for the second seq if it exists
            if(!inputList[seqPair[i].second][3].empty()){
                //cout << "\tSHAPE 2: " << inputList[seqPair[i].second][3] << endl;
                ct->SHAPEslope = SHAPESlope * conversionfactor;
                ct->SHAPEintercept = SHAPEIntercept * conversionfactor;
                ct->ReadSHAPE(inputList[seqPair[i].second][3].c_str());
            }
#else
#endif
            // doing dsv templating.
            if (i!=0) tmpfilename = dsvFiles[j][i-1];
            // i == 0 && j != 0
            else if (j!=0) tmpfilename = dsvFiles[j-1][seqPair.size()-1];
            // the first Dynalign is not templated
        if (!(i==0 && j==0) ){
                //cout << "\tdsv template file: " << tmpfilename << endl;
                if(ErrorCode=instance->Templatefromdsv(tmpfilename.c_str(), maxDsv))
                    return ErrorCode;
            }

            if(progress!=NULL){
                Ppercent += Ppercentstep1;
                progress->update(Ppercent);
            }

            // set up the number of base pairs to template
            if (i != 0 && j != 0 && j < iterations - 1) {
                totalBP =- stepBP;
            }
            else totalBP = maxPairs;
	    // cerr<<dsvFiles[j][i]<<"\n";
	    //	    cerr<<"i "<<i<<" j "<<j<<"\n";
            if (ErrorCode = instance->Dynalign(maxtrace, bpwin, awin,
                                               percent, imaxseparation,
                                               gap, singleinsert,
                                               dsvFiles[j][i].c_str(),
                                               false, // Multilign has to set the optimal_only to false
                                               singlefold_subopt_percent,
                                               local, numProcessors,
                                               totalBP) ){
	      
                //cout << "ERROR(cycle " << ++j << ' ' << (seq_pair+i)->first << ' ' << (seq_pair+i)->second << "): " << instance->GetErrorMessage(ErrorCode);
                return ErrorCode;
            }
	    //  cerr<<"we are out!\n";
            if(progress!=NULL){
                Ppercent += Ppercentstep2;
                progress->update(Ppercent);
            }

            if (i==0 && j==0) {
                totalBP = CountBP();
                if (iterations==1)
                    stepBP = (totalBP - maxPairs) / (iterations * seqPair.size() );
                else
                    stepBP = (totalBP - maxPairs) / ( (iterations-1) * seqPair.size() );
            }

            instance->WriteAlignment( aliFiles[j][i].c_str() );

            // last iteration
            if (j == iterations-1){
                if (i==seqPair.size()-1){
                    rna = instance->GetRNA1();
                    struct_num = rna->GetStructureNumber();
                    // calculate free energyies and write into ct variables
                    for(int n = 1; n <= struct_num; ++n) {
                        rna->CalculateFreeEnergy(n);
                    }
                    //cout << "295l   " << struct_num << endl;
#ifndef MULTIFIND
                    if (ErrorCode = instance->GetRNA1()->WriteCt( inputList[seqPair[i].first][1].c_str() )) {
		      return ErrorCode;
		    }
#else
		    if(ct_files.size())if (ErrorCode = instance->GetRNA1()->WriteCt( ct_files[seqPair[i].first].c_str() )) {

                        return ErrorCode;
                    }
#endif
                }
                ///// This is for debug/test.//////////
                ///// output the ct files of the index sequence in the ith pair of dynalign calculations.
                //else {
                    //rna = instance->GetRNA1();
                    //struct_num = rna->GetStructureNumber();
                    //// calculate free energyies and write into ct variables
                    //for(int n = 1; n <= struct_num; ++n) {
                        //rna->CalculateFreeEnergy(n);
                    //}
                    //if (ErrorCode = instance->GetRNA1()->WriteCt( inputList[seqPair[i].first][1] + '1').c_str() )) return ErrorCode;
                //}
                ///// This is for debug/test./////////
		dGIndex.push_back(instance->GetRNA1()->CalculateFreeEnergy(1));
                rna = instance->GetRNA2();
                struct_num = rna->GetStructureNumber();
                // calculate free energyies and write into ct variables
                for(int n = 1; n <= struct_num; ++n) {
		  if (n==1)
    	            energies.push_back(rna->CalculateFreeEnergy(n));
		  else
                    rna->CalculateFreeEnergy(n);
                }
#ifndef MULTIFIND	      
                if (ErrorCode = instance->GetRNA2()->WriteCt( inputList[seqPair[i].second][1].c_str() )) return ErrorCode;
#else 
		//	cerr<<ct_files.size()<<"\n";
		if(ct_files.size())
		if (ErrorCode = instance->GetRNA2()->WriteCt( ct_files[seqPair[i].second].c_str() ) ) return ErrorCode;
#endif	       
            }

            delete instance;

        }
    }

    //if (ErrorCode = WriteAlignment(allali)) return ErrorCode;
    if(progress != NULL && Ppercent != 100) progress->update(100);

    return 0;
}
コード例 #2
0
ファイル: efn2.cpp プロジェクト: mayc2/PseudoKnot_research
///////////////////////////////////////////////////////////////////////////////
// Run calculations.
///////////////////////////////////////////////////////////////////////////////
void efn2Interface::run() {

	// Create a variable that handles errors.
	int error = 0;

	/*
	 * Use the constructor for RNA that specifies a filename.
	 * Specify type = 1 (CT file).
	 * isRNA identifies whether the strand is RNA (true) or DNA (false).
	 *
	 * After construction of the strand data structure, create the error checker which monitors for errors.  
	 * Throughout, the error status of the calculation is checked with a variant of the isErrorStatus method, which returns 0 if no error occurred.
	 * The calculation proceeds as long as error = 0.
	 */
	cout << "Initializing nucleic acids..." << flush;
	RNA* strand = new RNA( ctFile.c_str(), 1, isRNA );
	ErrorChecker<RNA>* checker = new ErrorChecker<RNA>( strand );
	error = checker->isErrorStatus();
	if( error == 0 ) { cout << "done." << endl; }

	/*
	 * Check the strand for pseudoknots by looking through each structure with the ContainsPseudoknot method.
	 * efn2 cannot handle pseudoknots, so if the strand contains pseudoknots, this is considered an error.
	 */
	if( error == 0 ) {
		structures = strand->GetStructureNumber();
		for( int i = 1; i <= structures; i++ ) {
			if( strand->ContainsPseudoknot( i ) ) {
				cerr << "Nucleic acids contain pseudoknots; cannot proceed." << endl;
				error = -1;
			}
		}
	}

	/*
	 * Set the temperature using the SetTemperature method.
	 * Only set the temperature if a given temperature doesn't equal the default.
	 * If the temperature does need to be set, use the error checker's isErrorStatus method to check for errors.
	 */
	if( ( error == 0 ) && ( temperature != 310.15 ) ) {

		// Show a message saying that the temperature is being set.
		cout << "Setting temperature..." << flush;

		// Set the temperature and check for errors.
		int tempError = strand->SetTemperature( temperature );
		error = checker->isErrorStatus( tempError );

		// If no error occurred, print a message saying that temperature is set.
		if( error == 0 ) { cout << "done." << endl; }
	}

	/*
	 * Read SHAPE constraints, if applicable.
	 * When reading SHAPE constraints, use the ReadSHAPE method.
	 * After constraints are read, use the error checker's isErrorStatus method to check for errors.
	 */
	if( error == 0 && SHAPEFile != "" ) {

		// Show a message saying that SHAPE constraints are being read.
		cout << "Applying SHAPE constraints..." << flush;

		// Initialize the single stranded SHAPE slope and intercept.
		// For now, these are hard-coded as 0.
		double slopeSingle = 0;
		double interceptSingle = 0;

		// Read SHAPE constraints and check for errors.
		int constraintError = strand->ReadSHAPE( SHAPEFile.c_str(), slope, intercept, slopeSingle, interceptSingle );
		error = checker->isErrorStatus( constraintError );

		// If no error occurred, print a message saying that SHAPE constraints are set.
		if( error == 0 ) { cout << "done." << endl; }
	}

	/*
	 * Do the efn2 calculation.
	 * If the user wants a simple output file, get free energies for each structure using the CalculateFreeEnergy method.
	 * If the user wants a thermodynamic details file, write the file with the WriteThemodynamicDetails method.
	 */
	if( error == 0 ) {

		// Write a thermodynamic details file, if asked.
		if( writeTherm ) {

			// Show a message saying that the details file is being written.
			cout << "Writing thermodynamic details file..." << flush;

			// Write the thermodynamic details file and check for errors.
			int thermError = strand->WriteThermodynamicDetails( outFile.c_str() );
			error = checker->isErrorStatus( thermError );

			// Print a message saying that the details file has been written.
			if( error == 0 ) { cout << "done." << endl; }
		}

		// Write a simple list file, if asked.
		else {

			// Show a message saying that the list file is being written.
			cout << "Writing free energy list file..." << flush;

			// For each structure, calculate its energy and push it into the energies vector.
			// Changed this 5-14-2013, no longer pushes into a vector. Instead use calculatefreeenergy method from RNA class.
			// This is so it works with openMP -- vector would be filled out of order when loop runs in parallel and the indexing would be messed up.
			// If an error occurs, set it.
			//vector<double> energies;
			strand->CalculateFreeEnergy( 1 , simple );			//calculate the free energy of the first structure first (this is to make it play well with openMP)			
			#ifdef SMP
			#pragma omp parallel for
			#endif
			for( int i = 2; i <= structures; i++ ) { 
				strand->CalculateFreeEnergy( i , simple ); 		//now calculate the free energy of the remaining structures
				error = checker->isErrorStatus();
				//if( error == 0 ) { energies.push_back( energy ); }
				//else break;
			}

			// If all free energies were calculated correctly, write the output file.
			if( error == 0 ) {
				ofstream out( outFile.c_str() );
				for( int i = 1; i <= structures; i++ ) {
					int index = i - 1;
					out << "Structure: " << i << "   Energy = " << fixed << setprecision( 1 ) << strand->GetFreeEnergy(i)  << endl; //changed to GetFreeEnergy instead of writing the energies vector
				}
				out.close();
			}

			// Print a message saying that the list file has been written.
			if( error == 0 ) { cout << "done." << endl; }

			// If the output should be piped to standard output, then pipe it.
                        if( ( error == 0 ) && ( stdPrint ) ) {
                          cout << endl << "Generated output file: " << outFile << endl << endl;
                          for( int i = 1; i <= structures; i++ ) {
	                          cout << "Structure: " << i << "   Energy = " << fixed << setprecision( 1 ) << strand->GetFreeEnergy(i) << endl;
			  }
                          cout << endl;
                        }
		}
	}

	// Delete the error checker and data structure.
	delete checker;
	delete strand;

	// Print confirmation of run finishing.
	if( error == 0 ) { cout << calcType << " complete." << endl; }
	else { cerr << calcType << " complete with errors." << endl; }
}