Exemple #1
0
//1100110 record
//------------------------------------------------------------------------------
int main(int argc, char *argv[]){
	int i;
	
	if(! std_input_check(argc, argv))
		return EXIT_FAILURE;

	gl_pageID = 1;
	init_programs();
	for (i=0 ; i<gl_program_count; i++){
		printf("pid:%i  \t  initML: %i  \t  totalML:%i  \t  totalPages: %i P#:%i \t Flag:%i \n",\
				gl_programs[i].pid, \
				gl_programs[i].init_memorylocation, \
				gl_programs[i].total_memorylocation, \
				gl_programs[i].total_pages, \
				gl_programs[i].pagetable[gl_programs[0].init_memorylocation].pagenumber,\
				gl_programs[i].pagetable[gl_programs[0].init_memorylocation].validbid);
				//gl_programs[0] because it was the border point
	}
	run_programs();	
}
Exemple #2
0
void Biceps::readMGF()
{
  std::string filename = genOp_.mgfAbsPath.string();

  std::cout <<"Now reading " << filename << std::endl;
  fasta_.setNumTags(genOp_.numTags);

  std::string linebuffer; //< buffering the textlines in the mgf file
  bool validSpectrum = true; //<So we can just skip if it's invalid.

  //Now checking for the filetype.
  bool dosformat = isDosFile(filename);

  std::ifstream mgfFile;
  mgfFile.open(filename.c_str(), ios::binary); //<The Filename of the spectraFile


  bool inBeginIons = 0; bool inPeakList = 0; //status, where are we in the spectrum?

  //Only for buffer reasons, what's the charge etc. of the current spectrum?
  int charge = 0; 
  float precursormass = 0;
  string title="";

  std::vector<int> indices; //to remember the corresponding spectrum-indices of valid results
  std::vector<string> titles; //remember the spectrum titles
  size_t numGoodscores = 0;

  ofstream bufferfile;
  ofstream results;
  string tempresultname = std::string("Biceps.tempResults.") + genOp_.mgfShortName + string(".txt");
  results.open(tempresultname.c_str(), ios::binary); //Biceps_results will handle the final output.

  while(std::getline(mgfFile, linebuffer, '\n') )
  {  

    //first case, we're at the beginning of a new spectrum-subsection
    if(linebuffer.substr(0, 8) == "BEGIN IO"){

      mgfId_++;
      if (inBeginIons)
      {
        //throw runtime_error(("BEGIN IONS tag found without previous BEGIN IONS being closed at" + lexical_cast<string>(size_t(mgfFile.tellg())-linebuffer.length()-1) + "\n")); 
        cout << "BEGIN IONS tag found without previous BEGIN IONS being closed at" + lexical_cast<string>(size_t(mgfFile.tellg())-linebuffer.length()-1) + "\n" << std::endl;
        cout << "will begin new spectrum here" << std::endl;
        bufferfile.close();
      }
      validSpectrum = true;
      inBeginIons = true;
      if (bufferfile.is_open()) bufferfile.close();
      bufferfile.open(genOp_.sSpectrumFN.c_str(), fstream::trunc | ios::binary);
      if (!bufferfile.good()) { cerr << "Problem writing to buffer, check your writing rights please." << std::endl;} 

      if (dosformat)
      {
        bufferfile << linebuffer.substr(0,linebuffer.size()-1).c_str() << "\n";
      }
      else
      {
        bufferfile << linebuffer.c_str() << "\n";
      }
      //continue;
    } //if BEGIN IO - Beginning of a spectrum 


    //second case
    else if (linebuffer.substr(0, 8) != ("END IONS")) //we are in a spectrum, parse the lines.
    {
      if (!inBeginIons) continue;



      try
      {
        if (!inPeakList)
        {

          if (linebuffer.substr(0, 6) == ("TITLE="))
          {
            // if a title is found, use it as the id instead of the index
            //spectrum.id = lineStr.substr(6);
            title = linebuffer.substr(6);
            titles.push_back(title);
          }
          else if (linebuffer.substr(0, 8) == ("PEPMASS="))
          {

            string pepMassStr = linebuffer.substr(8);
            pepMassStr = pepMassStr.substr(0,pepMassStr.find_first_of("\t "));
            //bal::trim(pepMassStr);
            precursormass= lexical_cast<float>(pepMassStr);

            linebuffer = linebuffer.substr(0,linebuffer.find_first_of("\t "));
            //						selectedIon.set(MS_m_z, mz);

          }
          else if (linebuffer.substr(0, 7) == ("CHARGE="))
          {
            string pepChargeStr = linebuffer.substr(7);
            //bal::trim_if(pepChargeStr, bal::is_any_of("+- \t\r"));
            size_t rest = pepChargeStr.find_first_not_of("0123456789",0);
            if (rest != string::npos)
              pepChargeStr.erase(rest);
            charge = lexical_cast<size_t>(pepChargeStr);
            if (charge > 3) 
            {
              std::cout << "chargevalue " << charge << " is unhandled, skipping." << std::endl;                          
              validSpectrum = false;
            }
            //						selectedIon.set(MS_charge_state, charge);

          }
          else if(linebuffer.find('=') != string::npos)
          {
            continue; // ignored attribute
          }
          else
          {
            if (inBeginIons)
              inPeakList = true;
          }
        } //if (!inPeakList)
      } // if try
      catch(const std::exception& e)
      {
        //throw runtime_error(("[SpectrumList_MGF::parseSpectrum] Error parsing line at offset " +
        //                   lexical_cast<string>(size_t(mgfFile.tellg())-linebuffer.length()-1) + ": " + linebuffer + "\n"));
        cout << "[SpectrumList_MGF::parseSpectrum] Error parsing line at offset " +
          lexical_cast<string>(size_t(mgfFile.tellg())-linebuffer.length()-1) + ": " + linebuffer + "\n" << std::endl;
        inBeginIons = false;
        validSpectrum = false;
      }
      if (dosformat)
      {
        bufferfile << linebuffer.substr(0,linebuffer.size()-1).c_str() << "\n";
      }

      else
      {
        bufferfile << linebuffer.c_str() << "\n";
      }
      //continue;
    }	//else if (not end of spectrum)




    else //END IONS
    {

      //We found the last line of a spectrum, now it's time to finish the mgf buffer file and
      //do pepnovo/directag and pepsplice afterwards.

      if (!inBeginIons)
      { 
        //incorrect mgf file, please fix.

        //throw runtime_error(("END IONS tag found without opening BEGIN IONS at" + lexical_cast<string>(size_t(mgfFile.tellg())-linebuffer.length()-1) + "\n")); //
        std::cout << "END IONS tag found without opening BEGIN IONS at" + lexical_cast<string>(size_t(mgfFile.tellg())-linebuffer.length()-1) + "\n" << std::endl;
        validSpectrum = false;
      }
      inBeginIons = false;
      inPeakList = false;
      if (dosformat)
      {
        bufferfile << linebuffer.substr(0,linebuffer.size()-1).c_str() << "\n";
      }
      else
      {
        bufferfile << linebuffer.c_str() << "\n";
      }
      bufferfile.close();


      if (validSpectrum == false) //either charge too high or spectrum invalid
      {
        continue; //just go to the next line
      }

      fasta_.initializeMGF(charge, precursormass);

      cout << "Analyzing spectrum " << mgfId_ << endl;

      //Start analyzing by calling run_programs, which will run directag/pepnovo and then pepsplice afterwards
      bool success = run_programs(); //if an actual result was found, return true, else return false;
      if (success){ 
        Pepsplice::PepspliceResult & res = pepResults_.back();
        indices.push_back(mgfId_);
        titles.push_back(title);
        //if it's good, write the result to a file
        writeResult(results, res, mgfId_, title);
        //pepres.pop_back();
        ++numGoodscores;
      }
    } //END IONS, parsed one spectrum, call other programs //end else
  }	//while File.good







  assert(pepResults_.size() == numGoodscores);//check if all the results are in pepres

  ofstream resfile;
  string resfileName = string("Biceps.gmm.") + genOp_.mgfShortName + string(".txt");
  resfile.open(resfileName.c_str(), ios::trunc | ios::binary);

  //temporary score-output to call bic on.
  for(size_t i = 0; i < pepResults_.size(); ++i)
  {
    resfile << pepResults_[i].score << "\n";
  }
  resfile << std::flush;
  resfile.close();

  //now beginning BIC part
  std::vector<double> mu,sigma;
  std::vector<int> labels;
  double cutoff = -99999999;

  //final output

  if (pepResults_.size() > 10){
    gmm_bic(2,numGoodscores,resfileName.c_str(), mu, sigma, labels);
    cutoff = findCutoff(mu, sigma, 2); 
  }
  Biceps::writeCompleteResult(pepResults_, indices, titles, labels, mu, sigma, cutoff);
  Biceps::writeFasta(pepResults_);
}