Ejemplo n.º 1
0
void Mase::writeHeader_(std::ostream& output, const MaseHeader& header) const
{
  //Write trees:
  vector<string> treeNames = header.getTreeNames();
  for (size_t i = 0; i < treeNames.size(); ++i) {
    output << ";;$ " + treeNames[i] << endl;
    output << ";;" + header.getTree(treeNames[i]);
    output << endl;
  }

  //Write site selections:
  vector<string> siteSelectionNames = header.getSiteSelectionNames();
  for (size_t i = 0; i < siteSelectionNames.size(); ++i) {
    MultiRange<size_t> ranges = header.getSiteSelection(siteSelectionNames[i]);
    output << ";;Site selection " << siteSelectionNames[i] << " (" << ranges.totalLength() << " sites)" << endl;
    output << ";;# of segments=" << ranges.size() << " " << siteSelectionNames[i] << endl;
    output << ";;";
    for (size_t j = 0; j < ranges.size(); ++j) {
      output << " " << (ranges.getRange(j).begin() + 1) << "," << ranges.getRange(j).end();
      if ((j + 1) % 10 == 0)
        output << endl << ";;";
    }
    output << endl;
  }

  //Write sequence selections:
  vector<string> sequenceSelectionNames = header.getSequenceSelectionNames();
  for (size_t i = 0; i < sequenceSelectionNames.size(); ++i) {
    vector<size_t> set = header.getSequenceSelection(sequenceSelectionNames[i]);
    output << ";;@ of species=" << set.size() << " " << sequenceSelectionNames[i] << endl;
    output << ";;";
    for (unsigned int j = 0; j < set.size(); ++j) {
      output << " " << set[j];
      if ((j + 1) % 10 == 0)
        output << endl << ";;";
    }
    output << endl;
  }
}
Ejemplo n.º 2
0
void Mase::readHeader_(std::istream& input, MaseHeader& header) const throw (Exception)
{
  do {
    //Check if the line is a header line:
    if (input.peek() == ';') {
      char c;
      input.get(c);
      if (input.peek() == ';') {
        input.get(c);
        string line = FileTools::getNextLine(input);
        
        //Check the type of line...

        //Site selection:
        string::size_type index = line.find("# of");
        if (index < line.npos) {
          StringTokenizer st(string(line.begin() + static_cast<ptrdiff_t>(index + 4), line.end()), " \t=;");
          st.nextToken(); //skip next word: may be 'regions' or 'segments' or else ;-)
          unsigned int numberOfSegments = TextTools::to<unsigned int>(st.nextToken());
          string name = st.unparseRemainingTokens();
          //Then look for the set definition:
          MultiRange<size_t> siteSelection;
          while (siteSelection.size() < numberOfSegments) {
            line = FileTools::getNextLine(input);
            if (line[0] != ';' || line[1] != ';')
              throw Exception("Mase::readHeader_(): corrupted file, site selection " + name + " is incomplete. Aborting.");
            line = line.substr(2);
            StringTokenizer st2(line);
            while (st2.hasMoreToken()) {
              StringTokenizer st3(st2.nextToken(), ",");
              unsigned int begin = TextTools::to<unsigned int>(st3.nextToken());
              unsigned int end   = TextTools::to<unsigned int>(st3.nextToken());
              //WARNING!!! In the mase+ format, sites numerotation is 1-based, including, while ranges are 0-based, [a, b[:
              siteSelection.addRange(Range<size_t>(begin - 1, end));
            }
            if (siteSelection.size() > numberOfSegments)
              throw Exception("Mase::readHeader_(): incorrected file, found " + TextTools::toString(siteSelection.size()) + "segments while expected " + TextTools::toString(numberOfSegments));
          }
          header.setSiteSelection(name, siteSelection);
        } else {
          //Sequence selection:
          index = line.find("@ of");
          if (index < line.npos) {
            StringTokenizer st(line.substr(index + 4), " \t=;");
            st.nextToken(); //skip next word: may be 'sequences' or else ;-)
            unsigned int numberOfSequences = TextTools::to<unsigned int>(st.nextToken());
            string name = st.unparseRemainingTokens();
            //The look for the set definition:
            vector<size_t> sequenceSelection;
            while (sequenceSelection.size() < numberOfSequences) {
              line = FileTools::getNextLine(input);
              if (line[0] != ';' || line[1] != ';')
                throw Exception("Mase::readHeader_(): corrupted file, sequence selection " + name + " is incomplete. Aborting.");
              line = line.substr(2);
              StringTokenizer st2(line, ", ");
              while (st2.hasMoreToken()) {
                unsigned int pos = TextTools::to<unsigned int>(st2.nextToken());
                //WARNING!!! In the mase+ format, sequence numerotation is 1-based 
                sequenceSelection.push_back(pos);
            }
            if (sequenceSelection.size() > numberOfSequences)
              throw Exception("Mase::readHeader_(): incorrected file, found " + TextTools::toString(sequenceSelection.size()) + "sequences while expected " + TextTools::toString(numberOfSequences));
            }
            header.setSequenceSelection(name, sequenceSelection);
          } else {
            //Tree:
            index = line.find("$");
            if (index < line.npos) {
              string name = TextTools::removeSurroundingWhiteSpaces(line.substr(index + 1));
              //Here we stop if the line ends with a ";"
              string tree = "";
              do {
                line = FileTools::getNextLine(input);
                if (line[0] != ';' || line[1] != ';')
                  throw Exception("Mase::readHeader_(): corrupted file, tree " + name + " is incomplete. Aborting.");
                line = TextTools::removeSurroundingWhiteSpaces(line.substr(2));
                tree += line;
              } while (! TextTools::endsWith(line, ";"));
              header.setTree(name, tree);
            }
          }
        }
      } else {
        input.putback(c);
        break;
      }
    }
  } while (true);
}