Пример #1
int Locus :: ComputeAllBPs () {

	list <Allele*>::const_iterator AIterator;
	Allele* nextAllele = mAlleleList.front ();
	STRAlleleName firstCore (nextAllele->GetName ());
	STRAlleleName* nextRep;
	int bpDisp;

	for (AIterator = mAlleleList.begin(); AIterator != mAlleleList.end(); AIterator++) {

		nextAllele = *AIterator;
		nextRep = new STRAlleleName (nextAllele->GetName ());
		bpDisp = nextRep->GetBPDifferenceFrom (firstCore, mCoreRepeat);
		nextAllele->SetBP (bpDisp + mFirstCoreLocusBP);
		delete nextRep;

	nextRep = new STRAlleleName (mLastExtendedAllele);
	bpDisp = nextRep->GetBPDifferenceFrom (firstCore, mCoreRepeat);
	mMaxLocusBP = bpDisp + mFirstCoreLocusBP;
	delete nextRep;
	nextRep = new STRAlleleName (mFirstExtendedAllele);
	bpDisp = firstCore.GetBPDifferenceFrom (*nextRep, mCoreRepeat);
	mMinLocusBP = mFirstCoreLocusBP - bpDisp;
	delete nextRep;
	return 0;
Пример #2
void Caller::calculateStatistics()
	std::unordered_map<std::string, Location>::iterator iter;
	for( iter = locationTable.begin(); iter != locationTable.end(); ++iter)
		std::vector<double> variantPercentages;
		std::vector<Sample> sampleList = ( iter->second).getSamples();
		for( int i = 0; i < sampleList.size(); i++)
			ReadcountEntry re = sampleList[i].getReadcountEntry();
			Allele mostFreqVariant = re.getMostFreqVariantAllele();
			variantPercentages.push_back( mostFreqVariant.getPercentage());

		// Calculate mean
		double mean = Statistics::mean( variantPercentages);

		// Calculate variance
		double variance = Statistics::variance( variantPercentages, mean);

		// Calculate std
		double std = Statistics::standardDeviation( variance);

		// Calculate snr
		double cov = Statistics::coefficientOfVariation( mean, std);

		// Set statistics for the current Location
		( iter->second).setMeanVAP( mean);
		( iter->second).setVarianceVAP( variance);
		( iter->second).setStdVAP( std);
		( iter->second).setCOV( cov);
Пример #3
string stringForAllele(const Allele &allele) {

    stringstream out;
    if (!allele.genotypeAllele) {
            << allele.sampleID << ":"
            << allele.readID << ":"
            << allele.typeStr() << ":"
            << allele.cigar << ":"
            << scientific << fixed << allele.position << ":"
            << allele.length << ":"
            << (allele.strand == STRAND_FORWARD ? "+" : "-") << ":"
            << allele.referenceSequence << ":"
            << allele.alternateSequence << ":"
            << allele.quality << ":"
            << allele.basesLeft << ":"
            << allele.basesRight;
    } else {
        out << allele.typeStr() << ":"
            << allele.cigar << ":"
            << scientific << fixed << allele.position << ":"
            << allele.length << ":"
            << allele.alternateSequence;

    return out.str();
Пример #4
void Locus :: OutputTo (RGTextOutput& xmlFile) {

	xmlFile << "\t\t\t<Locus>\n";
	xmlFile << "\t\t\t\t<Name>" << mName.GetData () << "</Name>\n";
	xmlFile << "\t\t\t\t<Channel>" << mChannel << "</Channel>\n";

	if (mDoNotExtend)
		xmlFile << "\t\t\t\t<NoExtension>true</NoExtension>\n";

	xmlFile << "\t\t\t\t<MinBP>" << mMinLocusBP << "</MinBP>\n";
	xmlFile << "\t\t\t\t<MaxBP>" << mMaxLocusBP << "</MaxBP>\n";

	if (GetGenerateILSFamilies ()) {

		xmlFile << "\t\t\t\t<SearchRegions>\n";
		xmlFile << "\t\t\t\t\t<Region>\n";
		xmlFile << "\t\t\t\t\t\t<ILSName>" << GetILSName () << "</ILSName>\n";
		xmlFile << "\t\t\t\t\t\t<MinGrid>" << 0.01 * floor (100.0 * mMinSearchILSBP + 0.5) << "</MinGrid>\n";
		xmlFile << "\t\t\t\t\t\t<MaxGrid>" << 0.01 * floor (100.0 * mMaxSearchILSBP + 0.5) << "</MaxGrid>\n";
		xmlFile << "\t\t\t\t\t</Region>\n";
		xmlFile << "\t\t\t\t</SearchRegions>\n";

	else {

		xmlFile << "\t\t\t\t<MinGridLSBasePair>" << 0.01 * floor (100.0 * mMinSearchILSBP + 0.5) << "</MinGridLSBasePair>\n";
		xmlFile << "\t\t\t\t<MaxGridLSBasePair>" << 0.01 * floor (100.0 * mMaxSearchILSBP + 0.5) << "</MaxGridLSBasePair>\n";

	if (mCoreRepeat != 4)
		xmlFile << "\t\t\t\t<CoreRepeatNumber>" << mCoreRepeat << "</CoreRepeatNumber>\n";

	if (mYLinked)
		xmlFile << "\t\t\t\t<YLinked>true</YLinked>\n";

	if (mMaxExpectedAlleles != 2)
		xmlFile << "\t\t\t\t<MaxExpectedAlleles>" << mMaxExpectedAlleles << "</MaxExpectedAlleles>\n";

	if (mMinExpectedAlleles != 1)
		xmlFile << "\t\t\t\t<MinExpectedAlleles>" << mMinExpectedAlleles << "</MinExpectedAlleles>\n";

	xmlFile << "\t\t\t\t<LadderAlleles>\n";

	list <Allele*>::const_iterator AIterator;
	Allele* nextAllele;

	for (AIterator = mAlleleList.begin(); AIterator != mAlleleList.end(); AIterator++) {

		nextAllele = *AIterator;

		if (mNeedsRelativeHeightInfo)
			nextAllele->SetRelativeHeight ("H");

		nextAllele->OutputTo (xmlFile);

	xmlFile << "\t\t\t\t</LadderAlleles>\n";
	xmlFile << "\t\t\t</Locus>\n";
Пример #5
// returns true if this indel is not properly flanked by reference-matching sequence
bool isUnflankedIndel(const Allele& allele) {
    if (allele.isReference() || allele.isSNP() || allele.isMNP()) {
        return false;
    } else {
        vector<pair<int, string> > cigarV = splitCigar(allele.cigar);
        if (cigarV.back().second == "D"
            || cigarV.back().second == "I"
            || cigarV.front().second == "D"
            || cigarV.front().second == "I") {
            return true;
        } else {
            return false;
Пример #6
 void addMutation(const string& chrom, Pos pos, unsigned numReplaced,
                  const Allele &replacement) {
     Mutation mutation = {numReplaced, replacement};
     chromMutators[chrom].addMutation(pos, mutation);
     unsigned numReplacements = (unsigned)replacement.size();
     if (numReplacements > numReplaced) // Overcounting is okay.
         basesGained += numReplacements - numReplaced;
    bool hasAmbiguous(Allele & allele) {

        if (allele.seq().find_first_of("N") != string::npos) {

            return true;
        } else {

            return false;
	string getAlleleStringAttribute(Allele & allele, const string attribute) {

		auto att_value = allele.info().getValue<string>(attribute);
		if (att_value.second) {									
			if (att_value.first == ".") {

				return "NoValue";
			} else {

				return att_value.first;

		} else {

			return "Reference";
Пример #9
std::vector<Location> Caller::callPoissonDist( double poissonLambda, int minQScore)
	std::vector<Location> newCandidateLocations;
	std::unordered_map<std::string, Location>::iterator iter;
	std::string altBase;
	for( iter = locationTable.begin(); iter != locationTable.end(); ++iter)
		Location newLocation = iter->second;

		// Clear the Sample list of the copy of the location
		bool keepLocation = false;

		std::vector<Sample> sampleList = ( iter->second).getSamples();
		for( int i = 0; i < sampleList.size(); i++)
			ReadcountEntry readcountEntry = sampleList[i].getReadcountEntry();
			Allele mostFreqVariantAllele = readcountEntry.getMostFreqVariantAllele();

			int mostFreqNonRefCount = mostFreqVariantAllele.getCount();
			double lambda = readcountEntry.getReadDepth() * poissonLambda;

			// call illuminaPoissonFilter
			double pValue = Filter::illuminaPoissonFilter( mostFreqNonRefCount, lambda);
			double qScore = -10 * std::log10( pValue);

			// if at least one Sample passes through the filter, keep the location
			if( qScore > minQScore)
				//mostFreqVariantAllele.setPValue( pValue);
				//mostFreqVariantAllele.setQScore( qScore);

				// Add only the called Samples to the emptied list
				newLocation.addSample( sampleList[i]);
				keepLocation = true;

		std::vector<Sample> newSamples = newLocation.getSamples();
		double highestVAP = -1;
		for( int i = 0; i < newSamples.size(); i++)
			ReadcountEntry readcountEntry = newSamples[i].getReadcountEntry();
			Allele variantAllele = readcountEntry.getMostFreqVariantAllele();

			if( variantAllele.getPercentage() > highestVAP)
				highestVAP = variantAllele.getPercentage();
				altBase = variantAllele.getBase();

		( iter->second).setMutatedBase( altBase);
		if( keepLocation)
			newCandidateLocations.push_back( newLocation);
	return newCandidateLocations;
    AlleleAttributes alleleAttributes(Allele & main_allele, Allele & reference_allele) {



    	if (main_allele.isMissing()) {

    		return AlleleAttributes(Type::Missing, 0, 0, 0);

    	if (main_allele == reference_allele) {

    		return AlleleAttributes(Type::Reference, main_allele.seq().size(), count(main_allele.seq().begin(), main_allele.seq().end(), 'N'), 0);

    	Allele trimmed_main_allele = main_allele;
    	Allele trimmed_reference_allele = reference_allele;

    	fullTrimAllelePair(&trimmed_main_allele, &trimmed_reference_allele);
        assert(!(trimmed_main_allele.seq().empty()) or !(trimmed_reference_allele.seq().empty()));

        uint trimmed_main_allele_length = trimmed_main_allele.seq().size();
        uint trimmed_reference_allele_length = trimmed_reference_allele.seq().size();

        uint trimmed_main_allele_num_ambiguous = count(trimmed_main_allele.seq().begin(), trimmed_main_allele.seq().end(), 'N');

    	if (trimmed_main_allele_length == trimmed_reference_allele_length) {

            auto allele_type = Type::Complex;

    		if (trimmed_main_allele_length == 1) {

	    		allele_type = Type::SNP;

    		} else if (isInversion(trimmed_main_allele, trimmed_reference_allele, 0.95, 10)) {

                allele_type = Type::Inversion;

	    	return AlleleAttributes(allele_type, trimmed_main_allele_length, trimmed_main_allele_num_ambiguous, 0);

    	} else {

            auto allele_type = Type::Complex;

            if (trimmed_main_allele_length == 0) {

                allele_type = Type::Deletion;

            } else if (trimmed_reference_allele_length == 0) {

                allele_type = Type::Insertion;

            return AlleleAttributes(allele_type, trimmed_main_allele_length, trimmed_main_allele_num_ambiguous, trimmed_main_allele_length - trimmed_reference_allele_length);          
Пример #11
void Locus :: OutputTo (RGTextOutput& xmlFile) {

	xmlFile << "\t\t\t<Locus>\n";
	xmlFile << "\t\t\t\t<Name>" << mName.GetData () << "</Name>\n";
	xmlFile << "\t\t\t\t<Channel>" << mChannel << "</Channel>\n";
	xmlFile << "\t\t\t\t<MinBP>" << mMinLocusBP << "</MinBP>\n";
	xmlFile << "\t\t\t\t<MaxBP>" << mMaxLocusBP << "</MaxBP>\n";
	xmlFile << "\t\t\t\t<MinGridLSBasePair>" << 0.01 * floor (100.0 * mMinSearchILSBP + 0.5) << "</MinGridLSBasePair>\n";
	xmlFile << "\t\t\t\t<MaxGridLSBasePair>" << 0.01 * floor (100.0 * mMaxSearchILSBP + 0.5) << "</MaxGridLSBasePair>\n";

	if (mCoreRepeat != 4)
		xmlFile << "\t\t\t\t<CoreRepeatNumber>" << mCoreRepeat << "</CoreRepeatNumber>\n";

	if (mYLinked)
		xmlFile << "\t\t\t\t<YLinked>true</YLinked>\n";

	if (mMaxExpectedAlleles != 2)
		xmlFile << "\t\t\t\t<MaxExpectedAlleles>" << mMaxExpectedAlleles << "</MaxExpectedAlleles>\n";

	if (mMinExpectedAlleles != 1)
		xmlFile << "\t\t\t\t<MinExpectedAlleles>" << mMinExpectedAlleles << "</MinExpectedAlleles>\n";

	xmlFile << "\t\t\t\t<LadderAlleles>\n";

	list <Allele*>::const_iterator AIterator;
	Allele* nextAllele;

	for (AIterator = mAlleleList.begin(); AIterator != mAlleleList.end(); AIterator++) {

		nextAllele = *AIterator;
		nextAllele->OutputTo (xmlFile);

	xmlFile << "\t\t\t\t</LadderAlleles>\n";
	xmlFile << "\t\t\t</Locus>\n";
Пример #12
int Locus :: AddAllele (Allele* newAllele) {
	// returns -1 if identical to existing allele

	list <Allele*>::const_iterator AIterator;
	Allele* nextAllele;
	int status = 0;

	for (AIterator = mAlleleList.begin(); AIterator != mAlleleList.end(); AIterator++) {

		nextAllele = *AIterator;

		if (nextAllele->isEqual (newAllele)) {

			status = -1;

	if (status == 0)
		mAlleleList.push_back (newAllele);

	return status;
    bool isAlleleCalled(Allele & allele, const float min_acp) {

        auto acp = allele.info().getValue<float>("ACP");
        if (acp.second) {

            if (acp.first >= min_acp) {

                return true;
            } else {

                return false;

        } else {

            return false;
    bool isInversion(Allele & main_allele, Allele & reference_allele, const float min_match_fraction, const uint min_size) {

    	if (main_allele.seq().size() != reference_allele.seq().size()) {

    		return false;

        if (main_allele.seq().size() < min_size) {

            return false;

        string main_allele_rv = reverseComplementSequence(main_allele.seq());
        assert(main_allele_rv.size() == reference_allele.seq().size());

    	auto main_rv_it = main_allele_rv.begin();
    	auto reference_rit = reference_allele.seq().begin();

    	uint num_correct_bases = 0;

    	while (main_rv_it != main_allele_rv.end()) {

            if ((*main_rv_it == *reference_rit) and (*main_rv_it != 'N')) {



    	assert(num_correct_bases <= main_allele_rv.size());
    	assert(reference_rit == reference_allele.seq().end());

    	if ((static_cast<float>(num_correct_bases)/main_allele_rv.size()) < min_match_fraction) {

    		return false;

    	} else {

    		return true;
    bool isAlleleAnnotated(Allele & allele) {

        auto annotation = allele.info().getValue<string>("AAI");
        if (annotation.second) {


            if (annotation.first != ".") {

                return true;
            } else {

                return false;

        } else {

            return false;
Пример #16
bool allelesEquivalent(Allele &a, Allele &b) { return a.equivalent(b); }
Пример #17
        /** \brief accessor */
        inline bool isSilent(const Allele& a) {
            return a.get().second;
Пример #18
        /** \brief accessor */
        inline std::string getName(const Allele& a) {
            return a.get().first;
Пример #19
void PopState::mutate(Mutation m, int tot) {
   int pos = scape->rng->uniform_int(0, tot);
   pair<Allele *, int> res = find_mutation_allele_and_position(pos);
   Allele *a = res.first;
   int x = res.second;
   int len;
   Sequence seq(a->get_seq());
   string type;

   /* do mutation type-specific stuff */
   switch (m) {
      case point:{
         type = "point";
         len = 1;
         char replacement = scape->pick_mutation(seq.code(x));
         seq.replace(x, replacement);
      case deletion: {
         len = (int)scape->rng->rnb(deletion_neg_bin_n, deletion_neg_bin_p);
         type =  "deletion";

         /* throw out mutations that go beyond the end of the sequence or are 
          * zero length */
         if (x+len > (int)seq.length() || len == 0) return; 

         seq.delete_part(x, len);
      case duplication: {
         len = (int)scape->rng->rnb(duplication_neg_bin_n, 
         type = "duplication";

         /* throw out mutations that go beyond the end of the sequence or are 
          * zero length */
         if (x+len > (int)seq.length() || len == 0) return; 

         seq.duplicate_part(x, len);
         throw SimError("invalid mutation type");

   /* see if the allele already exists */
   AlleleList::iterator i = alleles.find(seq);
   bool isnew = false;
   if (i == alleles.end()) {
      alleles[seq] = new Allele(seq, 1, generation, scape);
      alleles[seq]->mutations = a->mutations+1;
      isnew = true;
   } else {

   /* see if we should print mutation information */
   if (real_time_flags[string("mutational_effects")]) {
      Sequence bg = a->get_seq();
      string from, to;
      from = bg.subseq(from, x, len);
      switch (m) {
         case point:
            to = seq.subseq(to, x, len);
         case deletion: 
         case duplication: 
            to = bg.subseq(to, x, len);
            to = to + to;
            throw SimError("invalid mutation type");
      cout << "gen: " << generation << " pstat_mutational_effects: " 
         << "background: " << bg 
         << " old_id: " << a->allele_id 
         << " new_id: " << alleles[seq]->allele_id
         << " copies: " << a->copies 
         << " type: " << type
         << " site: " << x
         << " len: " << len
         << " from: '" << from << "'"
         << " to: '" << to << "'"
         << " bfit: " << a->fitness
         << " mfit: " << alleles[seq]->fitness
         << " new: " << seq
         << " isnew: " << isnew
         << endl;

   if (a->copies <= 0) throw SimError("too few alleles");
   if (a->copies == 1) {
      AlleleList::iterator k = alleles.find(a->get_seq());
      if (k == alleles.end()) throw SimError("where did the allele go?");
      if (real_time_flags[string("allele_loss")]) {
         cout << "gen: " << generation << " pstat_allele_loss: " 
            << k->second->allele_id << endl;
   } else {