void Mase::writeSequences(ostream& output, const SequenceContainer& sc) const throw (Exception) { // Checking the existence of specified file, and possibility to open it in write mode if (!output) { throw IOException ("Mase::write : failed to open file"); } Comments comments = sc.getGeneralComments(); // Writing all general comments in file if (comments.size() == 0) { output << ";;" << endl; } for (size_t i = 0 ; i < comments.size() ; i++) { output << ";;" << comments[i] << endl; } string seq, temp = ""; // Initialization // Main loop : for all sequences vector<string> names = sc.getSequencesNames(); for (size_t i = 0 ; i < names.size() ; i ++) { comments = sc.getComments(names[i]); // Writing all sequence comments in file // If no comments are associated with current sequence, an empy commentary line will be writed if (comments.size() == 0) { output << ";" << endl; } else { for (size_t j = 0 ; j < comments.size() ; j++) { output << ";" << comments[j] << endl; } } // Sequence name writing output << names[i] << endl; // Sequence cutting to specified characters number per line seq = sc.toString(names[i]); while (seq != "") { if (seq.size() > charsByLine_) { temp = seq; temp.erase(temp.begin() + static_cast<ptrdiff_t>(charsByLine_), temp.end()); output << temp << endl; seq.erase(seq.begin(), seq.begin() + static_cast<ptrdiff_t>(charsByLine_)); } else { output << seq << endl; seq = ""; } } } }
unsigned int MaseTools::getPhase(const Comments & maseFileHeader, const string &setName) throw (Exception) { unsigned int phase = 0; string::size_type index = 0; for(unsigned int i = 0; i < maseFileHeader.size(); i++) { string current = maseFileHeader[i]; index = current.find("# of"); if(index < current.npos) { StringTokenizer st(string(current.begin() + index + 12 , current.end()), " \t\n\f\r=;"); //unsigned int numberOfSegments = TextTools::toInt(st.nextToken()); //cout << "Number of regions: " << st.nextToken() << endl; string name; while(st.hasMoreToken()) { name = st.nextToken(); //cout << "Name of regions: " << name << endl; } if(name == setName) { return phase; } } index = current.find("/codon_start"); if(index < current.npos) { StringTokenizer st(string(current.begin() + index + 12, current.end()), " \t\n\f\r=;"); phase = TextTools::toInt(st.nextToken()); } } throw Exception("PolymorphismSequenceContainer::getPhase: no /codon_start found, or site selection missing."); }
string PolymorphismSequenceContainerTools::getIngroupSpeciesName(const PolymorphismSequenceContainer & psc) { string key; unsigned int n; string speciesName; Comments maseFileHeader = psc.getGeneralComments(); if(!maseFileHeader.size()) return speciesName; map<string, unsigned int> groupMap = MaseTools::getAvailableSequenceSelections(maseFileHeader); for(map<string, unsigned int>::iterator mi = groupMap.begin() ; mi != groupMap.end() ; mi++) { key = mi->first; n = mi->second; if (key.compare(0, 7, "INGROUP") == 0 ) { StringTokenizer * sptk = new StringTokenizer(key, "_"); speciesName = sptk -> getToken(1) + " " + sptk -> getToken(2); } } return speciesName; }
SiteSelection MaseTools::getSiteSet(const Comments & maseFileHeader, const string & setName) throw (IOException) { SiteSelection selection; for(unsigned int i = 0; i < maseFileHeader.size(); i++) { string current = maseFileHeader[i]; string::size_type index = current.find("# of"); if(index < current.npos) { StringTokenizer st(string(current.begin() + index + 4, current.end()), " \t\n\f\r=;"); st.nextToken(); //skip next word: may be 'regions' or 'segments' or else ;-) unsigned int numberOfSegments = TextTools::toInt(st.nextToken()); string name; while(st.hasMoreToken()) { name += st.nextToken(); } if(name == setName) { //cout << numberOfSegments << " segments found." << endl; //Then look for the set definition: i++;//next line. unsigned int counter = 0; while(i < maseFileHeader.size()) { current = maseFileHeader[i++]; StringTokenizer st2(current); //st.nextToken(); //Skip ';;' while(st2.hasMoreToken()) { StringTokenizer st3(st2.nextToken(), ","); unsigned int begin = TextTools::toInt(st3.nextToken()); unsigned int end = TextTools::toInt(st3.nextToken()); //WARNING!!! In the mase+ format, sites are numbered from 1 to nbSites, //Whereas in SiteContainer the index begins at 0. for(unsigned int j = begin; j <= end; j++) selection.push_back(j - 1);//bounds included. counter++; if(counter == numberOfSegments) return selection; } } } } } if(selection.size() == 0) { throw IOException("Site set " + setName + " has not been found in the sequence file."); } return selection; }
map<string, unsigned int> MaseTools::getAvailableSiteSelections(const Comments & maseHeader) { map<string, unsigned int> selections; for(unsigned int i = 0; i < maseHeader.size(); i++) { string current = maseHeader[i]; string::size_type index = current.find("# of"); if(index < current.npos) { StringTokenizer st(string(current.begin() + index + 4, current.end()), " \t\n\f\r=;"); st.nextToken(); //skip next word: may be 'sequences' or else ;-) unsigned int numberOfSegments = TextTools::toInt(st.nextToken()); string name = st.nextToken(); while(st.hasMoreToken()) { name += " " +st.nextToken(); } unsigned int counter = 0; unsigned nbSites = 0; while(i < maseHeader.size()) { i++; current = maseHeader[i]; StringTokenizer st2(current); //st.nextToken(); //Skip ';;' while(st2.hasMoreToken()) { StringTokenizer st3(st2.nextToken(), ","); unsigned int begin = TextTools::toInt(st3.nextToken()); unsigned int end = TextTools::toInt(st3.nextToken()); counter++; nbSites += end - begin + 1; } if(counter == numberOfSegments) { selections[name] = nbSites; break; } } } } return selections; }
map<string, unsigned int> MaseTools::getAvailableSequenceSelections(const Comments & maseHeader) { map<string, unsigned int> selections; for(unsigned int i = 0; i < maseHeader.size(); i++) { string current = maseHeader[i]; string::size_type index = current.find("@ of"); if(index < current.npos) { StringTokenizer st(string(current.begin() + index + 4, current.end()), " \t\n\f\r=;"); st.nextToken(); //skip next word: may be 'sequences' or else ;-) unsigned int numberOfSequences = TextTools::fromString<unsigned int>(st.nextToken()); string name = st.nextToken(); while(st.hasMoreToken()) { name += st.nextToken(); } selections[name] = numberOfSequences; } } return selections; }
int main() { list<string> commands; list<string> files; string tempString; string input; //Algorithm to split the input and put files in list "files" and comamnds in list "commands" getline(cin, input); for (int i = 0; i < input.length(); i++) { do { tempString.append(1, input[i]); i++; } while (input[i] != ' ' && i < input.length()); if (tempString[0] == '-') { commands.push_back(tempString); } else { files.push_back(tempString); } tempString.clear(); } //Foreaching each file with each command all in the requested sequence. Creating object of each class for the needed // operation and using their properties and methods. On each itteration the input file is copied to the OLD file // and the input file is being modified depending on the requested command. for each (string file in files) { int counter = 0; for each (string command in commands) { counter++; string newLine; ifstream inputFileRead(file); string newFile; newFile.append(file); newFile.append(".old"); ofstream outputFileRead(newFile); while (getline(inputFileRead, newLine)) { outputFileRead << newLine << endl; } inputFileRead.close(); outputFileRead.close(); newLine.clear(); if (command == "--comments") { Comments comment; ifstream outputFileWrite(newFile); ofstream inputFileWrite(file); while (getline(outputFileWrite, newLine)) { comment.removeComments(newLine); if (comment.newLineFixed.empty()) { continue; } else inputFileWrite << comment.newLineFixed << endl; } outputFileWrite.close(); inputFileWrite.close(); } else if (command == "--newlines=CRLF") { ifstream outputFileWrite(newFile); ofstream inputFileWrite(file); NewLines newlines; while (getline(outputFileWrite, newLine)) { newlines.newLinesCRFL(newLine); inputFileWrite << newlines.newLineFix << endl; } } else if (command == "--newlines=LF") { ifstream outputFileWrite(newFile); ofstream inputFileWrite(file); NewLines newlines; while (getline(outputFileWrite, newLine)) { newlines.newLinesCRFL(newLine); inputFileWrite << newlines.newLineFix << endl; } } else if (command == "--identation=spaces") { ifstream outputFileWrite(newFile); ofstream inputFileWrite(file); Identation identation; while (getline(outputFileWrite, newLine)) { identation.identationSpaces(newLine); inputFileWrite << identation.newLineFix << endl; } } else if (command == "--identation=tabs") { ifstream outputFileWrite(newFile); ofstream inputFileWrite(file); Identation identation; while (getline(outputFileWrite, newLine)) { identation.identationTabs(newLine); inputFileWrite << identation.newLineFix << endl; } } }