Beispiel #1
0
void Mase::writeSequences(ostream& output, const SequenceContainer& sc) const throw (Exception)
{
  // Checking the existence of specified file, and possibility to open it in write mode
  if (!output) { throw IOException ("Mase::write : failed to open file"); }

  Comments comments = sc.getGeneralComments();

  // Writing all general comments in file
  if (comments.size() == 0) {
    output << ";;" << endl;
  }
  for (size_t i = 0 ; i < comments.size() ; i++) {
    output << ";;" << comments[i] << endl;
  }

  string seq, temp = "";  // Initialization

  // Main loop : for all sequences
  vector<string> names = sc.getSequencesNames();
  for (size_t i = 0 ; i < names.size() ; i ++)
  {
    comments = sc.getComments(names[i]);

    // Writing all sequence comments in file
    // If no comments are associated with current sequence, an empy commentary line will be writed
    if (comments.size() == 0)
    {
      output << ";" << endl;
    }
    else
    {
      for (size_t j = 0 ; j < comments.size() ; j++)
      {
        output << ";" << comments[j] << endl;
      }
    }

    // Sequence name writing
    output << names[i] << endl;

    // Sequence cutting to specified characters number per line
    seq = sc.toString(names[i]);
    while (seq != "")
    {
      if (seq.size() > charsByLine_)
      {
        temp = seq;
        temp.erase(temp.begin() + static_cast<ptrdiff_t>(charsByLine_), temp.end());
        output << temp  << endl;
        seq.erase(seq.begin(), seq.begin() + static_cast<ptrdiff_t>(charsByLine_));
      }
      else
      {
        output << seq << endl;
        seq = "";
      }
    }
  }
}
Beispiel #2
0
unsigned int MaseTools::getPhase(const Comments & maseFileHeader, const string &setName) throw (Exception)
{
	unsigned int phase = 0;
  string::size_type index = 0;
	for(unsigned int i = 0; i < maseFileHeader.size(); i++) {
		string current = maseFileHeader[i];

		index = current.find("# of");
		if(index < current.npos) {
			StringTokenizer st(string(current.begin() + index + 12 , current.end()), " \t\n\f\r=;");
			//unsigned int numberOfSegments = TextTools::toInt(st.nextToken());
			//cout << "Number of regions: " << st.nextToken() << endl;
			string name;
			while(st.hasMoreToken()) {
				name = st.nextToken();
				//cout << "Name of regions: " << name << endl;
			}
			if(name == setName) {
				return phase;
			}
		}

		index = current.find("/codon_start");
		if(index < current.npos) {
			StringTokenizer st(string(current.begin() + index + 12, current.end()), " \t\n\f\r=;");
			phase = TextTools::toInt(st.nextToken());
		}
	}
	throw Exception("PolymorphismSequenceContainer::getPhase: no /codon_start found, or site selection missing.");
}
string PolymorphismSequenceContainerTools::getIngroupSpeciesName(const PolymorphismSequenceContainer & psc)
{
  string key;
  unsigned int n;
  string speciesName;
  Comments maseFileHeader = psc.getGeneralComments();
  if(!maseFileHeader.size()) return speciesName;
  map<string, unsigned int> groupMap = MaseTools::getAvailableSequenceSelections(maseFileHeader);
  for(map<string, unsigned int>::iterator mi = groupMap.begin() ; mi != groupMap.end() ; mi++) {
    key = mi->first;
    n = mi->second;
    if (key.compare(0, 7, "INGROUP") == 0 ) {
      StringTokenizer * sptk = new StringTokenizer(key, "_");
      speciesName = sptk -> getToken(1) + " " + sptk -> getToken(2);
    }
  }
  return speciesName;
}
Beispiel #4
0
SiteSelection MaseTools::getSiteSet(const Comments & maseFileHeader, const string & setName) throw (IOException)
{
	SiteSelection selection;
	for(unsigned int i = 0; i < maseFileHeader.size(); i++) {
		string current = maseFileHeader[i];
		
    string::size_type index = current.find("# of");
		if(index < current.npos) {
			StringTokenizer st(string(current.begin() + index + 4, current.end()), " \t\n\f\r=;");
			st.nextToken(); //skip next word: may be 'regions' or 'segments' or else ;-)
			unsigned int numberOfSegments = TextTools::toInt(st.nextToken());
			string name;
			while(st.hasMoreToken()) {
				name += st.nextToken();
			}
			if(name == setName) {
				//cout << numberOfSegments << " segments found." << endl;
				//Then look for the set definition:
				i++;//next line.
                unsigned int counter = 0;
				while(i < maseFileHeader.size()) {
					current = maseFileHeader[i++];
					StringTokenizer st2(current);
	 				//st.nextToken(); //Skip ';;'
					while(st2.hasMoreToken()) {
						StringTokenizer st3(st2.nextToken(), ",");
						unsigned int begin = TextTools::toInt(st3.nextToken());
						unsigned int end   = TextTools::toInt(st3.nextToken());
                        //WARNING!!! In the mase+ format, sites are numbered from 1 to nbSites,
                        //Whereas in SiteContainer the index begins at 0.
						for(unsigned int j = begin; j <= end; j++) selection.push_back(j - 1);//bounds included.
                        counter++;
						if(counter == numberOfSegments) return selection;
					}
				}
			}
		}
	}
	if(selection.size() == 0) {
		throw IOException("Site set " + setName + " has not been found in the sequence file.");
	}
	return selection;
}
Beispiel #5
0
map<string, unsigned int> MaseTools::getAvailableSiteSelections(const Comments & maseHeader)
{
  map<string, unsigned int> selections;
  for(unsigned int i = 0; i < maseHeader.size(); i++) {
		string current = maseHeader[i];

    string::size_type index = current.find("# of");
		if(index < current.npos) {
			StringTokenizer st(string(current.begin() + index + 4, current.end()), " \t\n\f\r=;");
			st.nextToken(); //skip next word: may be 'sequences' or else ;-)
			unsigned int numberOfSegments = TextTools::toInt(st.nextToken());
			string name = st.nextToken();
			while(st.hasMoreToken()) {
				name += " " +st.nextToken();
			}
			unsigned int counter = 0;
			unsigned nbSites = 0;
			while(i < maseHeader.size()) {
				i++;
				current = maseHeader[i];
				StringTokenizer st2(current);
				//st.nextToken(); //Skip ';;'
				while(st2.hasMoreToken()) {
					StringTokenizer st3(st2.nextToken(), ",");
					unsigned int begin = TextTools::toInt(st3.nextToken());
					unsigned int end   = TextTools::toInt(st3.nextToken());
					counter++;
					nbSites += end - begin + 1;
				}
				if(counter == numberOfSegments) {
					selections[name] = nbSites;
					break;
				}
			}
    }
	}
  return selections;
}
Beispiel #6
0
map<string, unsigned int> MaseTools::getAvailableSequenceSelections(const Comments & maseHeader)
{
	map<string, unsigned int> selections;
	for(unsigned int i = 0; i < maseHeader.size(); i++) {
		string current = maseHeader[i];

    string::size_type index = current.find("@ of");
		if(index < current.npos) {
      StringTokenizer st(string(current.begin() + index + 4, current.end()), " \t\n\f\r=;");
      st.nextToken(); //skip next word: may be 'sequences' or else ;-)
      unsigned int numberOfSequences = TextTools::fromString<unsigned int>(st.nextToken());
      string name = st.nextToken();
      while(st.hasMoreToken()) {
        name += st.nextToken();
      }
      selections[name] = numberOfSequences;
    }
  }
  return selections;
}
Beispiel #7
0
int main()
{
	list<string> commands;
	list<string> files;
	string tempString;
	string input;
	//Algorithm to split the input and put files in list "files" and comamnds in list "commands"
	getline(cin, input);
	for (int i = 0; i < input.length(); i++)
	{
		do
		{
			tempString.append(1, input[i]);
			i++;

		} while (input[i] != ' ' && i < input.length());
		if (tempString[0] == '-')
		{
			commands.push_back(tempString);
		}
		else
		{
			files.push_back(tempString);
		}
		tempString.clear();

	}
	//Foreaching each file with each command all in the requested sequence. Creating object of each class for the needed 
	// operation and using their properties and methods. On each itteration the input file is copied to the OLD file
	// and the input file is being modified depending on the requested command.
	for each (string file in files)
	{
		int counter = 0;
		for each (string command in commands)
		{
				counter++;
				string newLine;
				ifstream inputFileRead(file);
				string newFile;
				newFile.append(file);
				newFile.append(".old");
				ofstream outputFileRead(newFile);
				while (getline(inputFileRead, newLine))
				{
					outputFileRead << newLine << endl;
				}
				inputFileRead.close();
				outputFileRead.close();
				newLine.clear();
			
			if (command == "--comments")
			{
				Comments comment;
				ifstream outputFileWrite(newFile);
				ofstream inputFileWrite(file);

				while (getline(outputFileWrite, newLine))
				{
					comment.removeComments(newLine);
					if (comment.newLineFixed.empty())
					{
						continue;
					}
					else
						inputFileWrite << comment.newLineFixed << endl;
				}
				outputFileWrite.close();
				inputFileWrite.close();
			}
			else if (command == "--newlines=CRLF")
			{
				ifstream outputFileWrite(newFile);
				ofstream inputFileWrite(file);
				NewLines newlines;
				while (getline(outputFileWrite, newLine))
				{
					newlines.newLinesCRFL(newLine);
					inputFileWrite << newlines.newLineFix << endl;
				}
			}
			else if (command == "--newlines=LF")
			{
				ifstream outputFileWrite(newFile);
				ofstream inputFileWrite(file);
				NewLines newlines;
				while (getline(outputFileWrite, newLine))
				{
					newlines.newLinesCRFL(newLine);
					inputFileWrite << newlines.newLineFix << endl;
				}
			}
			else if (command == "--identation=spaces")
			{
				ifstream outputFileWrite(newFile);
				ofstream inputFileWrite(file);
				Identation identation;
				while (getline(outputFileWrite, newLine))
				{
					identation.identationSpaces(newLine);
					inputFileWrite << identation.newLineFix << endl;
				}
			}
			else if (command == "--identation=tabs")
			{
				ifstream outputFileWrite(newFile);
				ofstream inputFileWrite(file);
				Identation identation;
				while (getline(outputFileWrite, newLine))
				{
					identation.identationTabs(newLine);
					inputFileWrite << identation.newLineFix << endl;
				}
			}

		}