Exemplo n.º 1
0
VectorSiteContainer* SequenceApplicationTools::getSiteContainer(
  const Alphabet* alpha,
  map<string, string>& params,
  const string& suffix,
  bool suffixIsOptional,
  bool verbose,
  int warn)
{
  string sequenceFilePath = ApplicationTools::getAFilePath("input.sequence.file", params, true, true, suffix, suffixIsOptional, "none", warn);
  string sequenceFormat = ApplicationTools::getStringParameter("input.sequence.format", params, "Fasta()", suffix, suffixIsOptional, warn);
  BppOAlignmentReaderFormat bppoReader(warn);
  unique_ptr<IAlignment> iAln(bppoReader.read(sequenceFormat));
  map<string, string> args(bppoReader.getUnparsedArguments());
  if (verbose)
  {
    ApplicationTools::displayResult("Sequence file " + suffix, sequenceFilePath);
    ApplicationTools::displayResult("Sequence format " + suffix, iAln->getFormatName());
  }

  const Alphabet* alpha2;
  if (AlphabetTools::isRNYAlphabet(alpha))
    alpha2 = &dynamic_cast<const RNY*>(alpha)->getLetterAlphabet();
  else
    alpha2 = alpha;

  const SequenceContainer* seqCont = iAln->readAlignment(sequenceFilePath, alpha2);

  VectorSiteContainer* sites2 = new VectorSiteContainer(*dynamic_cast<const OrderedSequenceContainer*>(seqCont));

  delete seqCont;

  VectorSiteContainer* sites;

  if (AlphabetTools::isRNYAlphabet(alpha))
  {
    const SequenceTools ST;
    sites = new VectorSiteContainer(alpha);
    for (unsigned int i = 0; i < sites2->getNumberOfSequences(); i++)
    {
      sites->addSequence(*(ST.RNYslice(sites2->getSequence(i))));
    }
    delete sites2;
  }
  else
    sites = sites2;

  // Look for site selection:
  if (iAln->getFormatName() == "MASE file")
  {
    // getting site set:
    string siteSet = ApplicationTools::getStringParameter("siteSelection", args, "none", suffix, suffixIsOptional, warn + 1);
    if (siteSet != "none")
    {
      VectorSiteContainer* selectedSites;
      try
      {
        selectedSites = dynamic_cast<VectorSiteContainer*>(MaseTools::getSelectedSites(*sites, siteSet));
        if (verbose)
          ApplicationTools::displayResult("Set found", TextTools::toString(siteSet) + " sites.");
      }
      catch (IOException& ioe)
      {
        throw ioe;
      }
      if (selectedSites->getNumberOfSites() == 0)
      {
        throw Exception("Site set '" + siteSet + "' is empty.");
      }
      delete sites;
      sites = selectedSites;
    }
  }
  else
  {
    // getting site set:
    size_t nbSites = sites->getNumberOfSites();

    string siteSet = ApplicationTools::getStringParameter("input.site.selection", params, "none", suffix, suffixIsOptional, warn + 1);

    VectorSiteContainer* selectedSites = 0;
    if (siteSet != "none")
    {
      vector<size_t> vSite;
      try
      {
        vector<int> vSite1 = NumCalcApplicationTools::seqFromString(siteSet,",",":");
        for (size_t i = 0; i < vSite1.size(); ++i)
        {
          int x = (vSite1[i] >= 0 ? vSite1[i] : static_cast<int>(nbSites) + vSite1[i]+ 1);
          if (x<=(int)nbSites)
          {
            if (x > 0)
              vSite.push_back(static_cast<size_t>(x - 1));
            else
              throw Exception("SequenceApplicationTools::getSiteContainer(). Incorrect null index: " + TextTools::toString(x));
          }
          else
            throw Exception("SequenceApplicationTools::getSiteContainer(). Too large index: " + TextTools::toString(x));
        }
        selectedSites = dynamic_cast<VectorSiteContainer*>(SiteContainerTools::getSelectedSites(*sites, vSite));
        selectedSites->reindexSites();
      }
      catch (Exception& e)
      {
        string seln;
        map<string, string> selArgs;
        KeyvalTools::parseProcedure(siteSet, seln, selArgs);
        if (seln == "Sample")
        {
          size_t n = ApplicationTools::getParameter<size_t>("n", selArgs, nbSites, "", true, warn + 1);
          bool replace = ApplicationTools::getBooleanParameter("replace", selArgs, false, "", true, warn + 1);

          vSite.resize(n);
          vector<size_t> vPos;
          for (size_t p = 0; p < nbSites; ++p)
          {
            vPos.push_back(p);
          }

          RandomTools::getSample(vPos, vSite, replace);

          selectedSites = dynamic_cast<VectorSiteContainer*>(SiteContainerTools::getSelectedSites(*sites, vSite));
          if (replace)
            selectedSites->reindexSites();
        }
        else
          throw Exception("Unknown site selection description: " + siteSet);
      }

      if (verbose)
        ApplicationTools::displayResult("Selected sites", TextTools::toString(siteSet));

      if (selectedSites && (selectedSites->getNumberOfSites() == 0))
      {
        throw Exception("Site set '" + siteSet + "' is empty.");
      }
      delete sites;
      sites = selectedSites;
    }
  }
  return sites;
}