int Locus :: ComputeAllBPs () { list <Allele*>::const_iterator AIterator; Allele* nextAllele = mAlleleList.front (); STRAlleleName firstCore (nextAllele->GetName ()); STRAlleleName* nextRep; int bpDisp; for (AIterator = mAlleleList.begin(); AIterator != mAlleleList.end(); AIterator++) { nextAllele = *AIterator; nextRep = new STRAlleleName (nextAllele->GetName ()); bpDisp = nextRep->GetBPDifferenceFrom (firstCore, mCoreRepeat); nextAllele->SetBP (bpDisp + mFirstCoreLocusBP); delete nextRep; } nextRep = new STRAlleleName (mLastExtendedAllele); bpDisp = nextRep->GetBPDifferenceFrom (firstCore, mCoreRepeat); mMaxLocusBP = bpDisp + mFirstCoreLocusBP; delete nextRep; nextRep = new STRAlleleName (mFirstExtendedAllele); bpDisp = firstCore.GetBPDifferenceFrom (*nextRep, mCoreRepeat); mMinLocusBP = mFirstCoreLocusBP - bpDisp; delete nextRep; return 0; }
void Caller::calculateStatistics() { std::unordered_map<std::string, Location>::iterator iter; for( iter = locationTable.begin(); iter != locationTable.end(); ++iter) { std::vector<double> variantPercentages; std::vector<Sample> sampleList = ( iter->second).getSamples(); for( int i = 0; i < sampleList.size(); i++) { ReadcountEntry re = sampleList[i].getReadcountEntry(); Allele mostFreqVariant = re.getMostFreqVariantAllele(); variantPercentages.push_back( mostFreqVariant.getPercentage()); } // Calculate mean double mean = Statistics::mean( variantPercentages); // Calculate variance double variance = Statistics::variance( variantPercentages, mean); // Calculate std double std = Statistics::standardDeviation( variance); // Calculate snr double cov = Statistics::coefficientOfVariation( mean, std); // Set statistics for the current Location ( iter->second).setMeanVAP( mean); ( iter->second).setVarianceVAP( variance); ( iter->second).setStdVAP( std); ( iter->second).setCOV( cov); } }
string stringForAllele(const Allele &allele) { stringstream out; if (!allele.genotypeAllele) { out.precision(1); out << allele.sampleID << ":" << allele.readID << ":" << allele.typeStr() << ":" << allele.cigar << ":" << scientific << fixed << allele.position << ":" << allele.length << ":" << (allele.strand == STRAND_FORWARD ? "+" : "-") << ":" << allele.referenceSequence << ":" << allele.alternateSequence << ":" << allele.quality << ":" << allele.basesLeft << ":" << allele.basesRight; } else { out << allele.typeStr() << ":" << allele.cigar << ":" << scientific << fixed << allele.position << ":" << allele.length << ":" << allele.alternateSequence; } return out.str(); }
void Locus :: OutputTo (RGTextOutput& xmlFile) { xmlFile << "\t\t\t<Locus>\n"; xmlFile << "\t\t\t\t<Name>" << mName.GetData () << "</Name>\n"; xmlFile << "\t\t\t\t<Channel>" << mChannel << "</Channel>\n"; if (mDoNotExtend) xmlFile << "\t\t\t\t<NoExtension>true</NoExtension>\n"; xmlFile << "\t\t\t\t<MinBP>" << mMinLocusBP << "</MinBP>\n"; xmlFile << "\t\t\t\t<MaxBP>" << mMaxLocusBP << "</MaxBP>\n"; if (GetGenerateILSFamilies ()) { xmlFile << "\t\t\t\t<SearchRegions>\n"; xmlFile << "\t\t\t\t\t<Region>\n"; xmlFile << "\t\t\t\t\t\t<ILSName>" << GetILSName () << "</ILSName>\n"; xmlFile << "\t\t\t\t\t\t<MinGrid>" << 0.01 * floor (100.0 * mMinSearchILSBP + 0.5) << "</MinGrid>\n"; xmlFile << "\t\t\t\t\t\t<MaxGrid>" << 0.01 * floor (100.0 * mMaxSearchILSBP + 0.5) << "</MaxGrid>\n"; xmlFile << "\t\t\t\t\t</Region>\n"; xmlFile << "\t\t\t\t</SearchRegions>\n"; } else { xmlFile << "\t\t\t\t<MinGridLSBasePair>" << 0.01 * floor (100.0 * mMinSearchILSBP + 0.5) << "</MinGridLSBasePair>\n"; xmlFile << "\t\t\t\t<MaxGridLSBasePair>" << 0.01 * floor (100.0 * mMaxSearchILSBP + 0.5) << "</MaxGridLSBasePair>\n"; } if (mCoreRepeat != 4) xmlFile << "\t\t\t\t<CoreRepeatNumber>" << mCoreRepeat << "</CoreRepeatNumber>\n"; if (mYLinked) xmlFile << "\t\t\t\t<YLinked>true</YLinked>\n"; if (mMaxExpectedAlleles != 2) xmlFile << "\t\t\t\t<MaxExpectedAlleles>" << mMaxExpectedAlleles << "</MaxExpectedAlleles>\n"; if (mMinExpectedAlleles != 1) xmlFile << "\t\t\t\t<MinExpectedAlleles>" << mMinExpectedAlleles << "</MinExpectedAlleles>\n"; xmlFile << "\t\t\t\t<LadderAlleles>\n"; list <Allele*>::const_iterator AIterator; Allele* nextAllele; for (AIterator = mAlleleList.begin(); AIterator != mAlleleList.end(); AIterator++) { nextAllele = *AIterator; if (mNeedsRelativeHeightInfo) nextAllele->SetRelativeHeight ("H"); nextAllele->OutputTo (xmlFile); } xmlFile << "\t\t\t\t</LadderAlleles>\n"; xmlFile << "\t\t\t</Locus>\n"; }
// returns true if this indel is not properly flanked by reference-matching sequence bool isUnflankedIndel(const Allele& allele) { if (allele.isReference() || allele.isSNP() || allele.isMNP()) { return false; } else { vector<pair<int, string> > cigarV = splitCigar(allele.cigar); if (cigarV.back().second == "D" || cigarV.back().second == "I" || cigarV.front().second == "D" || cigarV.front().second == "I") { return true; } else { return false; } } }
void addMutation(const string& chrom, Pos pos, unsigned numReplaced, const Allele &replacement) { Mutation mutation = {numReplaced, replacement}; chromMutators[chrom].addMutation(pos, mutation); unsigned numReplacements = (unsigned)replacement.size(); if (numReplacements > numReplaced) // Overcounting is okay. basesGained += numReplacements - numReplaced; }
bool hasAmbiguous(Allele & allele) { if (allele.seq().find_first_of("N") != string::npos) { return true; } else { return false; } }
string getAlleleStringAttribute(Allele & allele, const string attribute) { auto att_value = allele.info().getValue<string>(attribute); if (att_value.second) { if (att_value.first == ".") { return "NoValue"; } else { assert(!(allele.isMissing())); return att_value.first; } } else { return "Reference"; } }
std::vector<Location> Caller::callPoissonDist( double poissonLambda, int minQScore) { std::vector<Location> newCandidateLocations; std::unordered_map<std::string, Location>::iterator iter; std::string altBase; for( iter = locationTable.begin(); iter != locationTable.end(); ++iter) { Location newLocation = iter->second; // Clear the Sample list of the copy of the location newLocation.clearSamples(); bool keepLocation = false; std::vector<Sample> sampleList = ( iter->second).getSamples(); for( int i = 0; i < sampleList.size(); i++) { ReadcountEntry readcountEntry = sampleList[i].getReadcountEntry(); Allele mostFreqVariantAllele = readcountEntry.getMostFreqVariantAllele(); int mostFreqNonRefCount = mostFreqVariantAllele.getCount(); double lambda = readcountEntry.getReadDepth() * poissonLambda; // call illuminaPoissonFilter double pValue = Filter::illuminaPoissonFilter( mostFreqNonRefCount, lambda); double qScore = -10 * std::log10( pValue); // if at least one Sample passes through the filter, keep the location if( qScore > minQScore) { //mostFreqVariantAllele.setPValue( pValue); //mostFreqVariantAllele.setQScore( qScore); // Add only the called Samples to the emptied list newLocation.addSample( sampleList[i]); keepLocation = true; } } std::vector<Sample> newSamples = newLocation.getSamples(); double highestVAP = -1; for( int i = 0; i < newSamples.size(); i++) { ReadcountEntry readcountEntry = newSamples[i].getReadcountEntry(); Allele variantAllele = readcountEntry.getMostFreqVariantAllele(); if( variantAllele.getPercentage() > highestVAP) { highestVAP = variantAllele.getPercentage(); altBase = variantAllele.getBase(); } } ( iter->second).setMutatedBase( altBase); if( keepLocation) { newCandidateLocations.push_back( newLocation); } } return newCandidateLocations; }
AlleleAttributes alleleAttributes(Allele & main_allele, Allele & reference_allele) { assert(!(main_allele.seq().empty())); assert(!(reference_allele.seq().empty())); assert(!(reference_allele.isMissing())); if (main_allele.isMissing()) { return AlleleAttributes(Type::Missing, 0, 0, 0); } if (main_allele == reference_allele) { return AlleleAttributes(Type::Reference, main_allele.seq().size(), count(main_allele.seq().begin(), main_allele.seq().end(), 'N'), 0); } Allele trimmed_main_allele = main_allele; Allele trimmed_reference_allele = reference_allele; fullTrimAllelePair(&trimmed_main_allele, &trimmed_reference_allele); assert(!(trimmed_main_allele.seq().empty()) or !(trimmed_reference_allele.seq().empty())); uint trimmed_main_allele_length = trimmed_main_allele.seq().size(); uint trimmed_reference_allele_length = trimmed_reference_allele.seq().size(); uint trimmed_main_allele_num_ambiguous = count(trimmed_main_allele.seq().begin(), trimmed_main_allele.seq().end(), 'N'); if (trimmed_main_allele_length == trimmed_reference_allele_length) { auto allele_type = Type::Complex; if (trimmed_main_allele_length == 1) { allele_type = Type::SNP; } else if (isInversion(trimmed_main_allele, trimmed_reference_allele, 0.95, 10)) { allele_type = Type::Inversion; } return AlleleAttributes(allele_type, trimmed_main_allele_length, trimmed_main_allele_num_ambiguous, 0); } else { auto allele_type = Type::Complex; if (trimmed_main_allele_length == 0) { allele_type = Type::Deletion; } else if (trimmed_reference_allele_length == 0) { allele_type = Type::Insertion; } return AlleleAttributes(allele_type, trimmed_main_allele_length, trimmed_main_allele_num_ambiguous, trimmed_main_allele_length - trimmed_reference_allele_length); } }
void Locus :: OutputTo (RGTextOutput& xmlFile) { xmlFile << "\t\t\t<Locus>\n"; xmlFile << "\t\t\t\t<Name>" << mName.GetData () << "</Name>\n"; xmlFile << "\t\t\t\t<Channel>" << mChannel << "</Channel>\n"; xmlFile << "\t\t\t\t<MinBP>" << mMinLocusBP << "</MinBP>\n"; xmlFile << "\t\t\t\t<MaxBP>" << mMaxLocusBP << "</MaxBP>\n"; xmlFile << "\t\t\t\t<MinGridLSBasePair>" << 0.01 * floor (100.0 * mMinSearchILSBP + 0.5) << "</MinGridLSBasePair>\n"; xmlFile << "\t\t\t\t<MaxGridLSBasePair>" << 0.01 * floor (100.0 * mMaxSearchILSBP + 0.5) << "</MaxGridLSBasePair>\n"; if (mCoreRepeat != 4) xmlFile << "\t\t\t\t<CoreRepeatNumber>" << mCoreRepeat << "</CoreRepeatNumber>\n"; if (mYLinked) xmlFile << "\t\t\t\t<YLinked>true</YLinked>\n"; if (mMaxExpectedAlleles != 2) xmlFile << "\t\t\t\t<MaxExpectedAlleles>" << mMaxExpectedAlleles << "</MaxExpectedAlleles>\n"; if (mMinExpectedAlleles != 1) xmlFile << "\t\t\t\t<MinExpectedAlleles>" << mMinExpectedAlleles << "</MinExpectedAlleles>\n"; xmlFile << "\t\t\t\t<LadderAlleles>\n"; list <Allele*>::const_iterator AIterator; Allele* nextAllele; for (AIterator = mAlleleList.begin(); AIterator != mAlleleList.end(); AIterator++) { nextAllele = *AIterator; nextAllele->OutputTo (xmlFile); } xmlFile << "\t\t\t\t</LadderAlleles>\n"; xmlFile << "\t\t\t</Locus>\n"; }
int Locus :: AddAllele (Allele* newAllele) { // returns -1 if identical to existing allele list <Allele*>::const_iterator AIterator; Allele* nextAllele; int status = 0; for (AIterator = mAlleleList.begin(); AIterator != mAlleleList.end(); AIterator++) { nextAllele = *AIterator; if (nextAllele->isEqual (newAllele)) { status = -1; break; } } if (status == 0) mAlleleList.push_back (newAllele); return status; }
bool isAlleleCalled(Allele & allele, const float min_acp) { auto acp = allele.info().getValue<float>("ACP"); if (acp.second) { if (acp.first >= min_acp) { return true; } else { return false; } } else { return false; } }
bool isInversion(Allele & main_allele, Allele & reference_allele, const float min_match_fraction, const uint min_size) { if (main_allele.seq().size() != reference_allele.seq().size()) { return false; } if (main_allele.seq().size() < min_size) { return false; } string main_allele_rv = reverseComplementSequence(main_allele.seq()); assert(main_allele_rv.size() == reference_allele.seq().size()); auto main_rv_it = main_allele_rv.begin(); auto reference_rit = reference_allele.seq().begin(); uint num_correct_bases = 0; while (main_rv_it != main_allele_rv.end()) { if ((*main_rv_it == *reference_rit) and (*main_rv_it != 'N')) { num_correct_bases++; } main_rv_it++; reference_rit++; } assert(num_correct_bases <= main_allele_rv.size()); assert(reference_rit == reference_allele.seq().end()); if ((static_cast<float>(num_correct_bases)/main_allele_rv.size()) < min_match_fraction) { return false; } else { return true; } }
bool isAlleleAnnotated(Allele & allele) { auto annotation = allele.info().getValue<string>("AAI"); if (annotation.second) { assert(!(annotation.first.empty())); if (annotation.first != ".") { return true; } else { return false; } } else { return false; } }
bool allelesEquivalent(Allele &a, Allele &b) { return a.equivalent(b); }
/** \brief accessor */ inline bool isSilent(const Allele& a) { return a.get().second; }
/** \brief accessor */ inline std::string getName(const Allele& a) { return a.get().first; }
void PopState::mutate(Mutation m, int tot) { int pos = scape->rng->uniform_int(0, tot); pair<Allele *, int> res = find_mutation_allele_and_position(pos); Allele *a = res.first; int x = res.second; int len; Sequence seq(a->get_seq()); string type; /* do mutation type-specific stuff */ switch (m) { case point:{ type = "point"; len = 1; char replacement = scape->pick_mutation(seq.code(x)); seq.replace(x, replacement); break; } case deletion: { len = (int)scape->rng->rnb(deletion_neg_bin_n, deletion_neg_bin_p); type = "deletion"; /* throw out mutations that go beyond the end of the sequence or are * zero length */ if (x+len > (int)seq.length() || len == 0) return; seq.delete_part(x, len); break; } case duplication: { len = (int)scape->rng->rnb(duplication_neg_bin_n, duplication_neg_bin_p); type = "duplication"; /* throw out mutations that go beyond the end of the sequence or are * zero length */ if (x+len > (int)seq.length() || len == 0) return; seq.duplicate_part(x, len); break; } default: throw SimError("invalid mutation type"); } /* see if the allele already exists */ AlleleList::iterator i = alleles.find(seq); bool isnew = false; if (i == alleles.end()) { alleles[seq] = new Allele(seq, 1, generation, scape); alleles[seq]->mutations = a->mutations+1; isnew = true; } else { i->second->copies++; } /* see if we should print mutation information */ if (real_time_flags[string("mutational_effects")]) { Sequence bg = a->get_seq(); string from, to; from = bg.subseq(from, x, len); switch (m) { case point: to = seq.subseq(to, x, len); break; case deletion: to.clear(); break; case duplication: to = bg.subseq(to, x, len); to = to + to; break; default: throw SimError("invalid mutation type"); } cout << "gen: " << generation << " pstat_mutational_effects: " << "background: " << bg << " old_id: " << a->allele_id << " new_id: " << alleles[seq]->allele_id << " copies: " << a->copies << " type: " << type << " site: " << x << " len: " << len << " from: '" << from << "'" << " to: '" << to << "'" << " bfit: " << a->fitness << " mfit: " << alleles[seq]->fitness << " new: " << seq << " isnew: " << isnew << endl; } if (a->copies <= 0) throw SimError("too few alleles"); if (a->copies == 1) { AlleleList::iterator k = alleles.find(a->get_seq()); if (k == alleles.end()) throw SimError("where did the allele go?"); if (real_time_flags[string("allele_loss")]) { cout << "gen: " << generation << " pstat_allele_loss: " << k->second->allele_id << endl; } alleles.erase(k); } else { a->copies--; } }