StringVector GetWeatherDBDataFileList(const std::string& filePath, const CWeatherDatabaseOptimization& zop)
	{
		StringVector filesList;

		if (IsHourlyDB(filePath))
		{
			filesList.resize(zop.size());
			for (size_t i = 0; i < zop.size(); i++)
				filesList[i] = CHourlyDatabase().GetDataFilePath(filePath, zop[i].GetDataFileName());
		}
		else if (IsDailyDB(filePath))
		{
			filesList.resize(zop.size());
			for (size_t i = 0; i < zop.size(); i++)
				filesList[i] = CDailyDatabase().GetDataFilePath(filePath, zop[i].GetDataFileName());
		}
		else if (IsNormalsDB(filePath))
		{
			if (CNormalsDatabase::IsExtendedDatabase(filePath))
			{
				filesList.resize(zop.size());
				for (size_t i = 0; i < zop.size(); i++)
					filesList[i] = CNormalsDatabase().GetDataFilePath(filePath, zop[i].GetDataFileName());
			}
			else
			{
				filesList.push_back(CNormalsDatabase::GetNormalsDataFilePath(filePath));
			}

		}

		return filesList;
	}
Example #2
0
// Run dindel on a pair of samples
DindelReturnCode DindelUtil::runDindelPairMatePair(const std::string& id,
                                                   const StringVector& base_haplotypes,
                                                   const StringVector& variant_haplotypes,
                                                   const GraphCompareParameters& parameters,
                                                   std::ostream& baseOut,
                                                   std::ostream& variantOut,
                                                   std::ostream& callsOut,
                                                   DindelReadReferenceAlignmentVector* pReadAlignments)
{
    PROFILE_FUNC("runDindelPairMatePair")

    StringVector inHaplotypes;
    inHaplotypes.insert(inHaplotypes.end(), base_haplotypes.begin(), base_haplotypes.end());
    inHaplotypes.insert(inHaplotypes.end(), variant_haplotypes.begin(), variant_haplotypes.end());

    //
    // First, extract the reads from the normal and variant data sets that match each haplotype
    //
    assert(inHaplotypes.size() > 0);

    // Get canidate alignments for the input haplotypes
    HapgenAlignmentVector candidateAlignments;

    // Choose the kmer size for alignment
    size_t align_kmer = 31;
    for(size_t i = 0; i < inHaplotypes.size(); ++i)
    {
        HapgenAlignmentVector thisCandidateAlignments;
        HapgenUtil::alignHaplotypeToReferenceKmer(align_kmer,
                                                  inHaplotypes[i],
                                                  parameters.referenceIndex,
                                                  parameters.pRefTable,
                                                  thisCandidateAlignments);

        candidateAlignments.insert(candidateAlignments.end(), thisCandidateAlignments.begin(), thisCandidateAlignments.end());
    }
   
    // Remove duplicate or bad alignment pairs
    HapgenUtil::coalesceAlignments(candidateAlignments);

    if(Verbosity::Instance().getPrintLevel() > 3)
        printf("runDindel -- %zu candidate alignments found\n", candidateAlignments.size());
    
    size_t MAX_ALIGNMENTS = 10;
    if(candidateAlignments.size() > MAX_ALIGNMENTS)
        return DRC_AMBIGUOUS_ALIGNMENT;

    // Join each haplotype with flanking sequence from the reference genome for each alignment
    // This function also adds a haplotype (with flanking sequence) for the piece of the reference
    int FLANKING_SIZE = 0;
    if (parameters.dindelRealignParameters.realignMatePairs)
        FLANKING_SIZE = 1000;
    StringVector flankingHaplotypes;

    // This vector contains the internal portion of the haplotypes, without the flanking sequence
    // It is used to extract reads
    StringVector candidateHaplotypes;
    for(size_t i = 0; i < candidateAlignments.size(); ++i)
    {
        HapgenUtil::makeFlankingHaplotypes(candidateAlignments[i],
                                           parameters.pRefTable,
                                           FLANKING_SIZE,
                                           inHaplotypes,
                                           flankingHaplotypes,
                                           candidateHaplotypes);
    
    }

    if(Verbosity::Instance().getPrintLevel() > 3)
        printf("runDindel -- made %zu flanking haplotypes\n", candidateHaplotypes.size());

    // Normal reads
    SeqRecordVector normalReads;
    SeqRecordVector normalRCReads;

    // Remove non-unique candidate haplotypes
    std::sort(candidateHaplotypes.begin(), candidateHaplotypes.end());
    StringVector::iterator haplotype_iterator = std::unique(candidateHaplotypes.begin(), candidateHaplotypes.end());
    candidateHaplotypes.resize(haplotype_iterator - candidateHaplotypes.begin());

    // Set the value to use for extracting reads that potentially match the haplotype
    // Do not use a kmer for extraction greater than this value
    size_t KMER_CEILING = 31;
    size_t extractionKmer = parameters.kmer < KMER_CEILING ? parameters.kmer : KMER_CEILING;
    
    bool extractOK = true;
    if(!parameters.bReferenceMode)
    {
        // Reads on the same strand as the haplotype
        extractOK = HapgenUtil::extractHaplotypeReads(candidateHaplotypes, parameters.baseIndex, extractionKmer, 
                                                      false, parameters.maxReads, parameters.maxExtractionIntervalSize, &normalReads, NULL);

        if(!extractOK)
            return DRC_OVER_DEPTH;

        // Reads on the reverse strand
        extractOK = HapgenUtil::extractHaplotypeReads(candidateHaplotypes, parameters.baseIndex, extractionKmer, 
                                                      true, parameters.maxReads, parameters.maxExtractionIntervalSize, &normalRCReads, NULL);

        if(!extractOK)
            return DRC_OVER_DEPTH;
    }

    // Variant reads
    SeqRecordVector variantReads;
    SeqRecordVector variantRCReads;

    extractOK = HapgenUtil::extractHaplotypeReads(candidateHaplotypes, parameters.variantIndex, extractionKmer, 
                                                  false, parameters.maxReads, parameters.maxExtractionIntervalSize, &variantReads, NULL);

    if(!extractOK)
        return DRC_OVER_DEPTH;

    extractOK = HapgenUtil::extractHaplotypeReads(candidateHaplotypes, parameters.variantIndex, extractionKmer, 
                                                  true, parameters.maxReads, parameters.maxExtractionIntervalSize, &variantRCReads, NULL);

    if(!extractOK)
        return DRC_OVER_DEPTH;

    size_t normal_reads = normalReads.size() + normalRCReads.size();
    size_t variant_reads = variantReads.size() + variantRCReads.size();
    size_t total_reads = normal_reads + variant_reads;
    
    if(Verbosity::Instance().getPrintLevel() > 3)
        printf("Extracted %zu normal reads, %zu variant reads\n", normal_reads, variant_reads);

    if(total_reads > parameters.maxReads)
        return DRC_OVER_DEPTH;

    if (total_reads == 0)
        return DRC_UNDER_DEPTH;

    // Generate the input haplotypes for dindel
    // We need at least 2 haplotypes (one is the reference)
    size_t totFlankingHaplotypes = flankingHaplotypes.size();

    if(totFlankingHaplotypes < 2)
        return DRC_NO_ALIGNMENT;

    // Ensure the reference haplotype is a non-empty string
    if(flankingHaplotypes[0].size() == 0)
        return DRC_NO_ALIGNMENT;

    // Make Dindel referenceMappings
    StringVector dindelHaplotypes;
    std::set<DindelReferenceMapping> refMappings;

    //
    for(size_t i = 0; i < candidateAlignments.size(); ++i)
    {
        std::string upstream, defined, downstream;
        std::string refName = parameters.pRefTable->getRead(candidateAlignments[i].referenceID).id;

        HapgenUtil::extractReferenceSubstrings(candidateAlignments[i],parameters.pRefTable, 
                                               FLANKING_SIZE, upstream, defined, downstream);

        std::string refSeq = upstream + defined + downstream;
     
        int refStart = candidateAlignments[i].position - int(upstream.size()) + 1;

        // Here the score is used as an estimate of how unique "defined" is in the reference sequence.
        // "defined" is not the reference sequence but a candidate haplotype.
        // It is conservative because the flanking sequence is not used in this estimation.
    	DindelReferenceMapping rm(refName, 
                                  refSeq, 
                                  refStart, 
                                  candidateAlignments[i].score+2*FLANKING_SIZE, 
                                  candidateAlignments[i].isRC);

        std::set<DindelReferenceMapping>::iterator rmit = refMappings.find(rm);
        if(rmit == refMappings.end())
        {
            refMappings.insert(rm);
        }
	    else
        {
            if(rm.referenceAlignmentScore > rmit->referenceAlignmentScore) 
                rmit->referenceAlignmentScore = rm.referenceAlignmentScore;
        }
    }
    
    // RESET MAPPING SCORES
    for(std::set<DindelReferenceMapping>::iterator it = refMappings.begin(); it != refMappings.end(); it++) 
        it->referenceAlignmentScore = 1000;
    
    // make flankingHaplotypes unique
    std::set< std::string > setFlanking(flankingHaplotypes.begin(), flankingHaplotypes.end());

    for(std::set< std::string >::const_iterator it = setFlanking.begin(); it != setFlanking.end(); it++)
    {
        dindelHaplotypes.push_back(*it);
        //dindelRefMappings[i] = std::vector<DindelReferenceMapping>(refMappings.begin(),refMappings.end());
    }

    std::vector<DindelReferenceMapping> dRefMappings(refMappings.begin(),refMappings.end());
    DindelWindow dWindow(dindelHaplotypes, dRefMappings);

    //
    // Run Dindel
    //
    
    // Initialize VCF collections
    VCFCollection vcfCollections[2];

    // If in multisample mode, load the sample names into the VCFCollection
    if(parameters.variantIndex.pPopIdx != NULL)
    {
        for(size_t i = 0; i <= 1; ++i)
            vcfCollections[i].samples = parameters.variantIndex.pPopIdx->getSamples();
    }

    size_t start_i = parameters.bReferenceMode ? 1 : 0;

    DindelRealignWindowResult *pThisResult = NULL;
    DindelRealignWindowResult *pPreviousResult = NULL;

    for(size_t i = start_i; i <= 1; ++i)
    {
        SeqRecordVector& fwdReads = (i == 0) ? normalReads : variantReads;
        SeqRecordVector& rcReads = (i == 0) ? normalRCReads : variantRCReads;
        const BWTIndexSet* indices = &parameters.variantIndex;

        // Create dindel reads
        // Mates must be at the end of the array.
        std::vector<DindelRead> dReads;
        for(size_t j = 0; j < fwdReads.size(); ++j)
            dReads.push_back(convertToDindelRead(indices, fwdReads[j], true));

        for(size_t j = 0; j < rcReads.size(); ++j)
        {
            rcReads[j].seq.reverseComplement();
            std::reverse(rcReads[j].qual.begin(), rcReads[j].qual.end());
            dReads.push_back(convertToDindelRead(indices, rcReads[j], false));
        }

        pThisResult = new DindelRealignWindowResult();

        std::stringstream out_ss;
        try
        {
            DindelRealignWindow dRealignWindow(&dWindow, dReads, parameters.dindelRealignParameters);
            dRealignWindow.run("hmm", vcfCollections[i], pReadAlignments, id, pThisResult, pPreviousResult, parameters.pRefTable);
        }
        catch(std::string e)
        {
            std::cerr << "Dindel Exception: " << e << "\n";
            exit(DRC_EXCEPTION);
        }


        if(i == 0)
            pPreviousResult = pThisResult;
    }

    // Copy raw VCFRecords to output
    for(size_t i = 0; i <= 1; ++i)
    {
        std::ostream& curr_out = i == 0 ? baseOut : variantOut;
        for(size_t j = 0; j < vcfCollections[i].records.size(); ++j)
            curr_out << vcfCollections[i].records[j] << "\n";
    }

    // Make comparative calls
    size_t VARIANT_IDX = 1;
    size_t BASE_IDX = 0;
    bool has_base_calls = !vcfCollections[BASE_IDX].records.empty();
    for(size_t i = 0; i < vcfCollections[1].records.size(); ++i)
    {
        bool not_called_in_base = true;
        if(has_base_calls)
            not_called_in_base = vcfCollections[BASE_IDX].records[i].passStr == "NoCall" ||
                                 vcfCollections[BASE_IDX].records[i].passStr == "NoSupp";

        bool called_in_variant = vcfCollections[VARIANT_IDX].records[i].passStr == "PASS";
        if(called_in_variant && not_called_in_base)
            callsOut << vcfCollections[VARIANT_IDX].records[i] << "\n";
    }

    baseOut.flush();
    variantOut.flush();

    delete pThisResult;
    delete pPreviousResult;

    return DRC_OK;
}
ForceValidationResult* ValidateOpenMMForces::compareForce(Context& context, std::vector<int>& compareForces,
                                                          Platform& platform1, Platform& platform2 ) const {

// ---------------------------------------------------------------------------------------

    //static const std::string methodName      = "ValidateOpenMMForces::compareForce";

// ---------------------------------------------------------------------------------------

    // note if platforms are identical

    if( getLog() && platform1.getName().compare( platform2.getName() ) == 0 ){
        (void) fprintf( getLog(), "Note: Platforms to compares %s are identical.\n", platform1.getName().c_str() );
        (void) fflush( getLog() );
    }

    const System& system         = context.getSystem();

    // collect systemForceNameMap[forceName] = index in system
    //         systemForceNameIndex[index]   = force name

    StringIntMap systemForceNameMap;
    StringVector systemForceNameIndex;
    systemForceNameIndex.resize( system.getNumForces() );
    for( int ii = 0; ii < system.getNumForces(); ii++ ){
        std::string forceName         = getForceName( system.getForce( ii ) );
        if( forceName.compare( "NA" ) == 0 ){
            std::stringstream message;
            message << "Force at index=" << ii << " not found -- aborting!";
            std::cerr << message.str() << std::endl;
            throw OpenMM::OpenMMException(message.str());
        }
        systemForceNameMap[forceName] = ii;
        systemForceNameIndex[ii]      = forceName;
    }

    // diagnostics

    if( 0 && getLog() ){
        for( StringIntMapI ii = systemForceNameMap.begin(); ii != systemForceNameMap.end(); ii++ ){
            int index = (*ii).second;
            (void) fprintf( getLog(), "  System force map %s index=%d reverse map=%s\n", (*ii).first.c_str(), index, systemForceNameIndex[index].c_str() );
        }
        for( unsigned int ii = 0; ii < compareForces.size(); ii++ ){
           (void) fprintf( getLog(), "   ValidateOpenMMForces %u %s\n", ii, systemForceNameIndex[compareForces[ii]].c_str() );
        }
        (void) fflush( getLog() );
    }

    // get system copy and add forces to system

    System* validationSystem     = copySystemExcludingForces( system );
    StringUIntMap forceNamesMap;
    for( unsigned int ii = 0; ii < compareForces.size(); ii++ ){
        const Force& forceToCopy = system.getForce( compareForces[ii] );
        Force* force             = copyForce( forceToCopy );
        validationSystem->addForce( force );
        forceNamesMap[systemForceNameIndex[compareForces[ii]]] = ii;
    }

    // include any missing dependencies (e.g, OBC force requires NB force for Cuda platform)

    for( StringUIntMapI ii = forceNamesMap.begin(); ii != forceNamesMap.end(); ii++ ){
       std::string forceName = (*ii).first;
       StringVector dependencyVector;
       getForceDependencies( forceName, dependencyVector ); 
       for( unsigned int jj = 0; jj < dependencyVector.size(); jj++ ){
           std::string dependentForceName = dependencyVector[jj];
           StringUIntMapCI dependent      = forceNamesMap.find( dependentForceName );
           if( dependent == forceNamesMap.end() ){
              forceNamesMap[dependentForceName] = 1;
              int forceIndex                    = systemForceNameMap[dependentForceName];
              const Force& forceToCopy          = system.getForce( forceIndex );
              validationSystem->addForce( copyForce( forceToCopy ) ); 
           }
       }
    }

    // create contexts

    VerletIntegrator verletIntegrator( 0.001 );
    Context* validationContext1  = new Context( *validationSystem, verletIntegrator, platform1);
    Context* validationContext2  = new Context( *validationSystem, verletIntegrator, platform2);

    // set positions

    synchContexts( context, *validationContext1 );
    synchContexts( context, *validationContext2 );

    // diagnostics

    if( 0 && getLog() ){
        std::stringstream forceNames;
        (void) fprintf( getLog(), "    Validating system forces=%d\n", validationSystem->getNumForces() );
        for( int ii = 0; ii < validationSystem->getNumForces(); ii++ ){
            std::string forceName         = getForceName( validationSystem->getForce( ii ) );
            forceNames << forceName;
            if( ii < (validationSystem->getNumForces()-1) ){
                forceNames << "_";
            } else {
                forceNames << "Parameters.txt";
            }
            (void) fprintf( getLog(), "       force %d %s\n", ii, forceName.c_str() );
        }
        writeParameterFile( *validationContext1, forceNames.str() ); 
        (void) fflush( getLog() );
    }

    // calculate forces & build return result

    ForceValidationResult* forceValidationResult = new ForceValidationResult( *validationContext1, *validationContext2, forceNamesMap );

    delete validationContext1;
    delete validationContext2;
    delete validationSystem;
        
    return forceValidationResult;
}
Example #4
0
bool SGSmoothingVisitor::visit(StringGraph* pGraph, Vertex* pVertex)
{
    (void)pGraph;
    if(pVertex->getColor() == GC_RED)
        return false;

    bool found = false;
    for(size_t idx = 0; idx < ED_COUNT; idx++)
    {
        EdgeDir dir = EDGE_DIRECTIONS[idx];
        EdgePtrVec edges = pVertex->getEdges(dir);
        if(edges.size() <= 1)
            continue;

        for(size_t i = 0; i < edges.size(); ++i)
        {
            if(edges[i]->getEnd()->getColor() == GC_RED)
                return false;
        }

        //std::cout << "Smoothing " << pVertex->getID() << "\n";

        const int MAX_WALKS = 10;
        const int MAX_DISTANCE = 5000;
        bool bIsDegenerate = false;
        bool bFailGapCheck = false;
        bool bFailDivergenceCheck = false;
        bool bFailIndelSizeCheck = false;

        SGWalkVector variantWalks;
        SGSearch::findVariantWalks(pVertex, dir, MAX_DISTANCE, MAX_WALKS, variantWalks);

        if(variantWalks.size() > 0)
        {
            found = true;
            size_t selectedIdx = -1;
            size_t selectedCoverage = 0;

            // Calculate the minimum amount overlapped on the start/end vertex.
            // This is used to properly extract the sequences from walks that represent the variation.
            int minOverlapX = std::numeric_limits<int>::max();
            int minOverlapY = std::numeric_limits<int>::max();

            for(size_t i = 0; i < variantWalks.size(); ++i)
            {
                if(variantWalks[i].getNumEdges() <= 1)
                    bIsDegenerate = true;

                // Calculate the walk coverage using the internal vertices of the walk. 
                // The walk with the highest coverage will be retained
                size_t walkCoverage = 0;
                for(size_t j = 1; j < variantWalks[i].getNumVertices() - 1; ++j)
                    walkCoverage += variantWalks[i].getVertex(j)->getCoverage();

                if(walkCoverage > selectedCoverage || selectedCoverage == 0)
                {
                    selectedIdx = i;
                    selectedCoverage = walkCoverage;
                }
                
                Edge* pFirstEdge = variantWalks[i].getFirstEdge();
                Edge* pLastEdge = variantWalks[i].getLastEdge();

                if((int)pFirstEdge->getMatchLength() < minOverlapX)
                    minOverlapX = pFirstEdge->getMatchLength();

                if((int)pLastEdge->getTwin()->getMatchLength() < minOverlapY)
                    minOverlapY = pLastEdge->getTwin()->getMatchLength();
            }

            // Calculate the strings for each walk that represent the region of variation
            StringVector walkStrings;
            for(size_t i = 0; i < variantWalks.size(); ++i)
            {
                Vertex* pStartVertex = variantWalks[i].getStartVertex();
                Vertex* pLastVertex = variantWalks[i].getLastVertex();
                assert(pStartVertex != NULL && pLastVertex != NULL);
                
                std::string full = variantWalks[i].getString(SGWT_START_TO_END);
                int posStart = 0;
                int posEnd = 0;

                if(dir == ED_ANTISENSE)
                {
                    // pLast   -----------
                    // pStart          ------------
                    // full    --------------------
                    // out             ----
                    posStart = pLastVertex->getSeqLen() - minOverlapY;
                    posEnd = full.size() - (pStartVertex->getSeqLen() - minOverlapX);
                }
                else
                {
                    // pStart         --------------
                    // pLast   -----------
                    // full    ---------------------
                    // out            ----
                    posStart = pStartVertex->getSeqLen() - minOverlapX; // match start position
                    posEnd = full.size() - (pLastVertex->getSeqLen() - minOverlapY); // match end position
                }
                
                std::string out;
                if(posEnd > posStart)
                    out = full.substr(posStart, posEnd - posStart);
                walkStrings.push_back(out);
            }

            assert(selectedIdx != (size_t)-1);
            SGWalk& selectedWalk = variantWalks[selectedIdx];
            assert(selectedWalk.isIndexed());

            // Check the divergence of the other walks to this walk
            StringVector cigarStrings;
            std::vector<int> maxIndel;
            std::vector<double> gapPercent; // percentage of matching that is gaps
            std::vector<double> totalPercent; // percent of total alignment that is mismatch or gap

            cigarStrings.resize(variantWalks.size());
            gapPercent.resize(variantWalks.size());
            totalPercent.resize(variantWalks.size());
            maxIndel.resize(variantWalks.size());

            for(size_t i = 0; i < variantWalks.size(); ++i)
            {
                if(i == selectedIdx)
                    continue;

                // We want to compute the total gap length, total mismatches and percent
                // divergence between the two paths.
                int matchLen = 0;
                int totalDiff = 0;
                int gapLength = 0;
                int maxGapLength = 0;
                // We have to handle the degenerate case where one internal string has zero length
                // this can happen when there is an isolated insertion/deletion and the walks are like:
                // x -> y -> z
                // x -> z
                if(walkStrings[selectedIdx].empty() || walkStrings[i].empty())
                {
                    matchLen = std::max(walkStrings[selectedIdx].size(), walkStrings[i].size());
                    totalDiff = matchLen;
                    gapLength = matchLen;
                }
                else
                {
                    AlnAln *aln_global;
                    aln_global = aln_stdaln(walkStrings[selectedIdx].c_str(), walkStrings[i].c_str(), &aln_param_blast, 1, 1);

                    // Calculate the alignment parameters
                    while(aln_global->outm[matchLen] != '\0')
                    {
                        if(aln_global->outm[matchLen] == ' ')
                            totalDiff += 1;
                        matchLen += 1;
                    }

                    std::stringstream cigarSS;
                    for (int j = 0; j != aln_global->n_cigar; ++j)
                    {
                        char cigarOp = "MID"[aln_global->cigar32[j]&0xf];
                        int cigarLen = aln_global->cigar32[j]>>4;
                        if(cigarOp == 'I' || cigarOp == 'D')
                        {
                            gapLength += cigarLen;
                            if(gapLength > maxGapLength)
                                maxGapLength = gapLength;
                        }

                        cigarSS << cigarLen;
                        cigarSS << cigarOp;
                    }
                    cigarStrings[i] = cigarSS.str();
                    aln_free_AlnAln(aln_global);
                }

                double percentDiff = (double)totalDiff / matchLen;
                double percentGap = (double)gapLength / matchLen;

                if(percentDiff > m_maxTotalDivergence)
                    bFailDivergenceCheck = true;
                
                if(percentGap > m_maxGapDivergence)
                    bFailGapCheck = true;

                if(maxGapLength > m_maxIndelLength)
                    bFailIndelSizeCheck = true;

                gapPercent[i] = percentGap;
                totalPercent[i] = percentDiff;
                maxIndel[i] = maxGapLength;
            }

            if(bIsDegenerate || bFailGapCheck || bFailDivergenceCheck || bFailIndelSizeCheck)
                continue;

            // Write the selected path to the variants file as variant 0
            int variantIdx = 0;
            std::string selectedSequence = selectedWalk.getString(SGWT_START_TO_END);
            std::stringstream ss;
            ss << "variant-" << m_numRemovedTotal << "/" << variantIdx++;
            writeFastaRecord(&m_outFile, ss.str(), selectedSequence);


            // The vertex set for each walk is not necessarily disjoint,
            // the selected walk may contain vertices that are part
            // of other paths. We handle this be initially marking all
            // vertices of the 
            for(size_t i = 0; i < variantWalks.size(); ++i)
            {
                if(i == selectedIdx)
                    continue;

                SGWalk& currWalk = variantWalks[i];
                for(size_t j = 0; j < currWalk.getNumEdges() - 1; ++j)
                {
                    Edge* currEdge = currWalk.getEdge(j);
                    
                    // If the vertex is also on the selected path, do not mark it
                    Vertex* currVertex = currEdge->getEnd();
                    if(!selectedWalk.containsVertex(currVertex->getID()))
                    {
                        currEdge->getEnd()->setColor(GC_RED);
                    }
                }

                // Write the variant to a file
                std::string variantSequence = currWalk.getString(SGWT_START_TO_END);
                std::stringstream variantID;
                std::stringstream ss;
                ss << "variant-" << m_numRemovedTotal << "/" << variantIdx++;
                ss << " IGD:" << (double)gapPercent[i] << " ITD:" << totalPercent[i] << " MID: " << maxIndel[i] << " InternalCigar:" << cigarStrings[i];
                writeFastaRecord(&m_outFile, ss.str(), variantSequence);
            }

            if(variantWalks.size() == 2)
                m_simpleBubblesRemoved += 1;
            else
                m_complexBubblesRemoved += 1;
            ++m_numRemovedTotal;
        }
    }