SiteContainer* SiteContainerTools::getCompleteSites(const SiteContainer& sites) { vector<string> seqNames = sites.getSequencesNames(); VectorSiteContainer* noGapCont = new VectorSiteContainer(seqNames.size(), sites.getAlphabet()); noGapCont->setSequencesNames(seqNames, false); CompleteSiteContainerIterator csi(sites); while (csi.hasMoreSites()) { noGapCont->addSite(*csi.nextSite()); } return noGapCont; }
SiteContainer* SiteContainerTools::removeGapSites(const SiteContainer& sites, double maxFreqGaps) { vector<string> seqNames = sites.getSequencesNames(); VectorSiteContainer* noGapCont = new VectorSiteContainer(seqNames.size(), sites.getAlphabet()); noGapCont->setSequencesNames(seqNames, false); for (unsigned int i = 0; i < sites.getNumberOfSites(); ++i) { map<int, double> freq; SiteTools::getFrequencies(sites.getSite(i), freq); if (freq[-1] <= maxFreqGaps) noGapCont->addSite(sites.getSite(i), false); } return noGapCont; }
SiteContainer* SiteContainerTools::removeGapOrUnresolvedOnlySites(const SiteContainer& sites) { vector<string> seqNames = sites.getSequencesNames(); VectorSiteContainer* noGapCont = new VectorSiteContainer(seqNames.size(), sites.getAlphabet()); noGapCont->setSequencesNames(seqNames, false); for (unsigned int i = 0; i < sites.getNumberOfSites(); i++) { const Site* site = &sites.getSite(i); if (!SiteTools::isGapOrUnresolvedOnly(*site)) noGapCont->addSite(*site, false); } return noGapCont; }
SiteContainer* SiteContainerTools::getSelectedSites( const SiteContainer& sequences, const SiteSelection& selection) { vector<string> seqNames = sequences.getSequencesNames(); VectorSiteContainer* sc = new VectorSiteContainer(seqNames.size(), sequences.getAlphabet()); sc->setSequencesNames(seqNames, false); for (unsigned int i = 0; i < selection.size(); i++) { sc->addSite(sequences.getSite(selection[i]), false); // We do not check names, we suppose that the container passed as an argument is correct. // WARNING: what if selection contains many times the same indice? ... } sc->setGeneralComments(sequences.getGeneralComments()); return sc; }
SiteContainer* SiteContainerTools::removeStopCodonSites(const SiteContainer& sites) throw (AlphabetException) { const CodonAlphabet* pca = dynamic_cast<const CodonAlphabet*>(sites.getAlphabet()); if (!pca) throw AlphabetException("Not a Codon Alphabet", sites.getAlphabet()); vector<string> seqNames = sites.getSequencesNames(); VectorSiteContainer* noStopCont = new VectorSiteContainer(seqNames.size(), sites.getAlphabet()); noStopCont->setSequencesNames(seqNames, false); for (unsigned int i = 0; i < sites.getNumberOfSites(); i++) { const Site* site = &sites.getSite(i); if (!SiteTools::hasStopCodon(*site)) noStopCont->addSite(*site, false); } return noStopCont; }
SiteContainer* SiteContainerTools::resolveDottedAlignment( const SiteContainer& dottedAln, const Alphabet* resolvedAlphabet) throw (AlphabetException, Exception) { if (!AlphabetTools::isDefaultAlphabet(dottedAln.getAlphabet())) throw AlphabetException("SiteContainerTools::resolveDottedAlignment. Alignment alphabet should of class 'DefaultAlphabet'.", dottedAln.getAlphabet()); // First we look for the reference sequence: size_t n = dottedAln.getNumberOfSequences(); if (n == 0) throw Exception("SiteContainerTools::resolveDottedAlignment. Input alignment contains no sequence."); const Sequence* refSeq = 0; for (size_t i = 0; i < n; ++i) // Test each sequence { const Sequence* seq = &dottedAln.getSequence(i); bool isRef = true; for (unsigned int j = 0; isRef && j < seq->size(); ++j) // For each site in the sequence { if (seq->getChar(j) == ".") isRef = false; } if (isRef) // We found the reference sequence! { refSeq = new BasicSequence(*seq); } } if (!refSeq) throw Exception("SiteContainerTools::resolveDottedAlignment. No reference sequence was found in the input alignment."); // Now we build a new VectorSiteContainer: VectorSiteContainer* sites = new VectorSiteContainer(n, resolvedAlphabet); // We add each site one by one: size_t m = dottedAln.getNumberOfSites(); string state; for (unsigned int i = 0; i < m; ++i) { string resolved = refSeq->getChar(i); const Site* site = &dottedAln.getSite(i); Site resolvedSite(resolvedAlphabet, site->getPosition()); for (unsigned int j = 0; j < n; j++) { state = site->getChar(j); if (state == ".") { state = resolved; } resolvedSite.addElement(state); } // Add the new site: sites->addSite(resolvedSite); } // Seq sequence names: sites->setSequencesNames(dottedAln.getSequencesNames()); // Delete the copied sequence: delete refSeq; // Return result: return sites; }