void DindelUtil::doMultipleReadHaplotypeAlignment(const std::vector<DindelRead> & dReads, const StringVector & haplotypes) { // globally align haplotypes to the first haplotype (arbitrary) assert(haplotypes.size()>0); for (size_t h = 0; h < haplotypes.size(); ++h) { std::cout << "ALIGNING EVERYTHING AGAINST HAPLOTYPE " << h << "\n"; MultipleAlignment ma; const std::string rootSequence = haplotypes[h]; ma.addBaseSequence("root", rootSequence, ""); std::string hid; for(size_t j = 0; j < haplotypes.size(); j++) { std::stringstream ss; if (j!=h) ss << "haplotype-" << j; else ss << "HAPLOTYPE-" << j; SequenceOverlap o = Overlapper::computeOverlap(rootSequence, haplotypes[j]); ma.addOverlap(ss.str(), haplotypes[j], "", o); } for(size_t r = 0; r < dReads.size(); ++r) { std::stringstream ss; if (r<dReads.size()/2) ss << "read-" << r << "(" << dReads[r].getID() << ")"; else ss << "MATE read-" << r; SequenceOverlap o = Overlapper::computeOverlap(rootSequence, dReads[r].getSequence()); ma.addOverlap(ss.str(), dReads[r].getSequence(), "", o); } ma.print(100000); } }
bool OverlapHaplotypeBuilder::buildInitialGraph(const StringVector& reads) { PROFILE_FUNC("OverlapHaplotypeBuilder::buildInitialGraph") // Compute initial ordering of reads based on the position of the // starting kmer sequence. If the starting kmer was corrected out // of a read, it is discarded. StringVector ordered_reads; orderReadsInitial(m_initial_kmer_string, reads, &ordered_reads); if(ordered_reads.size() < m_parameters.minDiscoveryCount) return false; #ifdef SHOW_MULTIPLE_ALIGNMENT //DEBUG print MA MultipleAlignment ma = buildMultipleAlignment(ordered_reads); ma.print(200); #endif // Insert initial reads into graph for(size_t i = 0; i < ordered_reads.size(); ++i) insertVertexIntoGraph("seed-", ordered_reads[i]); return true; }
int IntersectByMaster(CCdCore* ccd, double rowFraction) { int result = -1; unsigned int masterLen = (ccd) ? ccd->GetSequenceStringByRow(0).length() : 0; if (masterLen == 0) return result; int slaveStart; int nAlignedIBM = 0; unsigned int i, j, nBlocks; unsigned int nRows = ccd->GetNumRows(); // If there is already a consistent block model, do nothing. MultipleAlignment* ma = new MultipleAlignment(ccd); if (ma && ma->isBlockAligned()) { delete ma; return 0; } delete ma; BlockIntersector blockIntersector(masterLen); BlockModel* intersectedBlockModel; //BlockModel* simpleIntersectedBlockModel; BlockModelPair* bmp; vector<BlockModelPair*> blockModelPairs; set<int> forcedCTerminiInIntersection; list< CRef< CSeq_align > >& cdSeqAligns = ccd->GetSeqAligns(); list< CRef< CSeq_align > >::iterator cdSeqAlignIt = cdSeqAligns.begin(), cdSeqAlignEnd = cdSeqAligns.end(); for (i = 0; cdSeqAlignIt != cdSeqAlignEnd; ++cdSeqAlignIt, ++i) { bmp = new BlockModelPair(*cdSeqAlignIt); // We assume # of blocks and all block lengths are same on master and slave. if (bmp && bmp->isValid()) { blockModelPairs.push_back(bmp); blockIntersector.addOneAlignment(bmp->getMaster()); // Find places the intersection can't merge blocks (i.e., where there are // gaps in the slave across a block boundary, but not in the master). BlockModel& slave = bmp->getSlave(); nBlocks = slave.getBlocks().size(); for (j = 0; j < nBlocks - 1; ++j) { // '-1' as I don't care about end of the C-terminal block if (slave.getGapToCTerminal(j) > 0 && bmp->getMaster().getGapToCTerminal(j) == 0) { forcedCTerminiInIntersection.insert(bmp->getMaster().getBlock(j).getEnd()); } } } } // There was a problem creating one of the BlockModelPair objects from a seq_align, // or one or more seq_align was invalid. if (blockModelPairs.size() != cdSeqAligns.size()) { return result; } //simpleIntersectedBlockModel = blockIntersector.getIntersectedAlignment(forcedCTerminiInIntersection); intersectedBlockModel = blockIntersector.getIntersectedAlignment(forcedCTerminiInIntersection, rowFraction); nAlignedIBM = (intersectedBlockModel) ? intersectedBlockModel->getTotalBlockLength() : 0; if (nAlignedIBM == 0) { return result; } /* string testStr, testStr2; string sint = intersectedBlockModel->toString(); string sintsimple = simpleIntersectedBlockModel->toString(); delete simpleIntersectedBlockModel; cout << "rowFraction = 1:\n" << sintsimple << endl; cout << "rowFraction = " << rowFraction << ":\n" << sint << endl; */ // As we have case where every block model isn't identical, // change each seq-align to reflect the common set of aligned columns. nBlocks = intersectedBlockModel->getBlocks().size(); for (i = 0, cdSeqAlignIt = cdSeqAligns.begin(); i < nRows - 1 ; ++i, ++cdSeqAlignIt) { bmp = blockModelPairs[i]; //BlockModelPair seqAlignPair(*cdSeqAlignIt); BlockModel* intersectedSeqAlignSlave = new BlockModel(bmp->getSlave().getSeqId(), false); bmp->reverse(); for (j = 0; j < nBlocks; ++j) { const Block& jthMasterBlock = intersectedBlockModel->getBlock(j); slaveStart = bmp->mapToMaster(jthMasterBlock.getStart()); // since we're dealing w/ an intersection, slaveStart should always be valid assert(slaveStart != -1); Block b(slaveStart, jthMasterBlock.getLen(), jthMasterBlock.getId()); intersectedSeqAlignSlave->addBlock(b); } *cdSeqAlignIt = intersectedSeqAlignSlave->toSeqAlign(*intersectedBlockModel); //testStr = intersectedSeqAlignSlave->toString(); //testStr2 = bmp->getMaster().toString(); // original *slave* alignment delete bmp; } blockModelPairs.clear(); result = nBlocks; delete intersectedBlockModel; return result; }
void GetAlignmentColumnsForCD(CCdCore* cd, map<unsigned int, string>& columns, unsigned int referenceRow) { bool isOK = true, useRefRow = true; int j; unsigned int i, col, row, pos, mapIndex, nRows, nCols, nBlocks; char** alignedResidues = NULL; string rowString, colString; // Map column number to position on the selected reference row. map<unsigned int, unsigned int> colToPos; map<unsigned int, string> rowStrings; vector<int> starts, lengths; CRef< CSeq_align > seqAlign; // Empty the columns map first, as this is used as a way to flag problems. columns.clear(); if (!cd) return; // Check if the block structure is consistent. try { MultipleAlignment* ma = new MultipleAlignment(cd); if (!ma) { ERR_POST("Creation of MultipleAlignment object failed for CD " << cd->GetAccession() << "."); return; } else if (! ma->isBlockAligned()) { delete ma; ERR_POST("CD " << cd->GetAccession() << " must have a consistent block structure for column extraction."); return; } delete ma; ma = NULL; } catch (...) { ERR_POST("Could not extract columns for CD " << cd->GetAccession()); } nCols = cd->GetAlignmentLength(); nRows = cd->GetNumRows(); // Get a reference seq-align for mapping between alignment rows. // If the columns map index will simply be the column count, use the master, row 0. if (referenceRow >= nRows) { useRefRow = false; referenceRow = 0; } if (! cd->GetSeqAlign(referenceRow, seqAlign)) { isOK = false; } // Initialize the column # -> reference row position mapping. // If useRefRow is true, use the indicated row's coordinates as the position. // Otherwise, use the column number as the position. if (isOK && GetBlockStarts(seqAlign, starts, (referenceRow == 0)) > 0 && GetBlockLengths(seqAlign, lengths) > 0) { nBlocks = starts.size(); if (nBlocks == lengths.size()) { for (i = 0, col = 0; i < nBlocks; ++i) { pos = (useRefRow) ? starts[i] : col; for (j = 0; j < lengths[i]; ++j, ++col, ++pos) { // Not explicitly checking if 'pos' is aligned since above // we confirmed the CD has a valid block model. colToPos[col] = pos; } } } else { isOK = false; } } else { isOK = false; } SetAlignedResiduesForCD(cd, alignedResidues, true); // Construct the columns as string objects. if (isOK && alignedResidues) { for (col = 0; col < nCols; ++col) { colString.erase(); for (row = 0; row < nRows; ++row) { colString += alignedResidues[row][col]; } mapIndex = colToPos[col]; columns[mapIndex] = colString; } } // Clean up array of characters. if (alignedResidues) { for (row = 0; row < nRows; ++row) { delete [] alignedResidues[row]; } delete [] alignedResidues; } }
int main (int argc, char *argv[]) { int i; PlatformSupport* Plat = new PlatformSupport(); ColumnComp* CC; Alignment* ALIGN; Tree* T; MultipleAlignment* MA; ProteinDomains* PROTS =NULL; MultiAlignRec* pssmAlignment; char outFileName[STR_LEN]; strcpy(outFileName, "out"); bool colChosen=false, alignChosen=false, treeChosen=false, maChosen=false, usingDomains=false, inputProvided=false, scoresProvided=false; bool neuralTree=false; bool testing=false;bool testingAcc=false; bool testingTree=false; bool famNames=false; bool treeClusts=false; bool printTreeClusts=false; bool ma_off=false; bool tree_loocv=false;//true; bool silent=false, htmlOutput=false; bool simMatching=false; bool weighting_on=false; int matchTopX = TOP_MATCH; char inputTFs[STR_LEN]; char matchTFs[STR_LEN]; char scoreDist[STR_LEN]; char inputProteins[STR_LEN]; //Misc option settings bool genRandMotifs=false; bool genRandScores=false; char randMatOut[STR_LEN]; char scoresOut[STR_LEN]; //Default alignment settings double gapOpen = DFLT_GAP_OPEN; double gapExtend = DFLT_GAP_EXTEND; bool overlapAlign = DFLT_OVLP_ALIGN; bool extendOverlap=false; bool FBP_on = false; bool preAlign=false; bool pairwiseOnly=false; bool forwardAlignOnly=false; bool ungapped=false; for(i=1; i<argc; i++){ if(strcmp(argv[i], "-silent")==0) silent=true; if(strcmp(argv[i], "-html")==0) htmlOutput=true; } //Welcome message if(!silent && !htmlOutput){printf("\n\tSTAMP\n\tSimilarity, Tree-building, & Alignment of Motifs and Profiles\n\n\tShaun Mahony\n\tDepartment of Computational Biology\n\tUniversity of Pittsburgh\n\tVersion 1.0 (Winter 2006)\n\n");} if(argc ==1) //First and Foremost, the help option { DisplayHelp(); }else{ for(i=1; i<argc; i++) { if(strcmp(argv[i], "-h")==0 || strcmp(argv[i], "?")==0) //First and Foremost, the help option { DisplayHelp(); } if(strcmp(argv[i], "-out")==0) //Output file (for trees & similarity matching) { if(argv[i+1]!=NULL) { strcpy(outFileName, argv[i+1]);} } if(strcmp(argv[i], "-genrand")==0) //Generate random motifs { if(argv[i+1]!=NULL) { strcpy(randMatOut, argv[i+1]);} genRandMotifs=true; } if(strcmp(argv[i], "-genscores")==0) //Generate simulation scores { if(argv[i+1]!=NULL) { strcpy(scoresOut, argv[i+1]);} genRandScores=true; } if((strcmp(argv[i], "-cc")) ==0) //Choose a column comparison measure { if((strcmp(argv[i+1], "PCC"))==0 || (strcmp(argv[i+1], "pcc"))==0){ CC = new PearsonCorrelation(); //Pearson's correllation coefficient }else if((strcmp(argv[i+1], "ALLR"))==0 || (strcmp(argv[i+1], "allr"))==0){ CC = new ALLR(); //ALLR }else if((strcmp(argv[i+1], "ALLR_LL"))==0 || (strcmp(argv[i+1], "allr_ll"))==0){ CC = new ALLR_LL(); //ALLR with lower limit }else if((strcmp(argv[i+1], "CS"))==0 || (strcmp(argv[i+1], "cs"))==0){ CC = new ChiSq(); //Pearson's Chi Square }else if((strcmp(argv[i+1], "KL"))==0 || (strcmp(argv[i+1], "kl"))==0){ CC = new KullbackLieber(); //Kullback-Lieber }else if((strcmp(argv[i+1], "SSD"))==0 || (strcmp(argv[i+1], "ssd"))==0){ CC = new SumSqDiff(); //sum of squared difference }else{ CC = new PearsonCorrelation(); //Default = PCC } colChosen=true; } //check for alignment settings if((strcmp(argv[i], "-go")) ==0){ //Gap Open if(argv[i+1]!=NULL) { gapOpen=strtod(argv[i+1], NULL);} } if((strcmp(argv[i], "-ge")) ==0){ //Gap Extend if(argv[i+1]!=NULL) { gapExtend=strtod(argv[i+1], NULL);} } if((strcmp(argv[i], "-overlapalign")) ==0){ //Only complete overlapping alignments overlapAlign = true; if(!silent && !htmlOutput){printf("Overlapping alignments only\n");} }if((strcmp(argv[i], "-nooverlapalign")) ==0){ //All overlapping alignments overlapAlign = false; } if((strcmp(argv[i], "-extendoverlap")) ==0){ extendOverlap=true; if(!silent && !htmlOutput){printf("Extending the overlapping alignments\n");} } if((strcmp(argv[i], "-forwardonly")) ==0){ //Consider forward alignments only forwardAlignOnly = true; if(!silent && !htmlOutput){printf("Considering forward direction alignments only\n");} } if((strcmp(argv[i], "-printpairwise")) ==0){ pairwiseOnly=true; if(!silent && !htmlOutput){printf("Printing pairwise scores only\n");} } if((strcmp(argv[i], "-FBP")) ==0){ FBP_on=true; if(!silent && !htmlOutput){printf("Using FBP profiles\n");} } if((strcmp(argv[i], "-useweighting")) ==0){ weighting_on=true; if(!silent && !htmlOutput){printf("Using weighting in FBP construction\n");} } if((strcmp(argv[i], "-prealigned")) ==0){ preAlign=true; if(!silent && !htmlOutput){printf("Profiles are pre-aligned\n");} } //Input TF dataset name if((strcmp(argv[i], "-tf")) ==0) { if(argv[i+1]!=NULL) { strcpy(inputTFs, argv[i+1]);} inputProvided=true; } //Score distribution file Make an auto function for this!!!!!!! if((strcmp(argv[i], "-sd")) ==0) { if(argv[i+1]!=NULL) { strcpy(scoreDist, argv[i+1]);} scoresProvided=true; } //Match input TFs against this dataset if((strcmp(argv[i], "-match")) ==0) { if(argv[i+1]!=NULL) { strcpy(matchTFs, argv[i+1]);} if(argv[i+2]!=NULL && strcmp(argv[i+2], "fams")==0){ famNames=true; } simMatching=true; } if((strcmp(argv[i], "-match_top")) ==0){ //Report the top X matches if(argv[i+1]!=NULL) { matchTopX=strtol(argv[i+1], NULL, 10);} } //Matching input protein (Pfam) alignment dataset name if((strcmp(argv[i], "-prot")) ==0) { if(argv[i+1]!=NULL) { strcpy(inputProteins, argv[i+1]);} usingDomains = true; } //Run some tests if((strcmp(argv[i], "-test")) ==0) { testing=true; } //Run some different tests if((strcmp(argv[i], "-testacc")) ==0) { testingAcc=true; famNames=true; } //Run some tests with trees if((strcmp(argv[i], "-testtree")) ==0) { testingTree=true; famNames=true; }//Run Calinski & Harabasz with trees if((strcmp(argv[i], "-ch")) ==0) { testingTree=true; treeClusts=true; }//Run Calinski & Harabasz with trees and print the resulting clusters if((strcmp(argv[i], "-chp")) ==0) { testingTree=true; printTreeClusts=true; treeClusts=true; } } //Defaults if(!colChosen) { CC = new PearsonCorrelation();} //Second Pass for(i=1; i<argc; i++) { if((strcmp(argv[i], "-align")) ==0) //Choose an alignment method { if((strcmp(argv[i+1], "NW"))==0 || (strcmp(argv[i+1], "nw"))==0){ ALIGN = new NeedlemanWunsch(CC, gapOpen, gapExtend, overlapAlign, extendOverlap, forwardAlignOnly); } if((strcmp(argv[i+1], "SWU"))==0 || (strcmp(argv[i+1], "swu"))==0){ ALIGN = new SmithWatermanUngappedExtended(CC,forwardAlignOnly); ungapped=true; } if((strcmp(argv[i+1], "SWA"))==0 || (strcmp(argv[i+1], "swa"))==0){ ALIGN = new SmithWatermanAffine(CC, gapOpen, gapExtend, overlapAlign, extendOverlap,forwardAlignOnly); } if((strcmp(argv[i+1], "SW"))==0 || (strcmp(argv[i+1], "sw"))==0){ ALIGN = new SmithWaterman(CC, gapOpen, gapExtend, overlapAlign, extendOverlap,forwardAlignOnly); } alignChosen = true; } //Choose a multiple alignment method if((strcmp(argv[i], "-ma")) ==0) { if((strcmp(argv[i+1], "PPA"))==0 || (strcmp(argv[i+1], "ppa"))==0){ MA = new ProgressiveProfileAlignment(outFileName, htmlOutput); maChosen=true; } if((strcmp(argv[i+1], "IR"))==0 || (strcmp(argv[i+1], "ir"))==0){ MA = new IterativeRefinementAlignment(outFileName, htmlOutput); maChosen=true; } if((strcmp(argv[i+1], "NONE"))==0 || (strcmp(argv[i+1], "none"))==0){ maChosen=true; ma_off=true; } } } if(!alignChosen) { ALIGN = new SmithWatermanAffine(CC, gapOpen, gapExtend, overlapAlign, extendOverlap); } if(!maChosen) MA = new ProgressiveProfileAlignment(outFileName, htmlOutput); //Third pass //Choose a tree-construction method for(i=1; i<argc; i++) { if((strcmp(argv[i], "-tree")) ==0) { if((strcmp(argv[i+1], "UPGMA"))==0 || (strcmp(argv[i+1], "upgma"))==0){ T = new UPGMA(ALIGN); } if((strcmp(argv[i+1], "SOTA"))==0 || (strcmp(argv[i+1], "sota"))==0){ T = new SOTA(ALIGN, MA); neuralTree=true; } if((strcmp(argv[i+1], "NJ"))==0 || (strcmp(argv[i+1], "nj"))==0){ T = new Neighbourjoin(ALIGN); printf("Using Neighbour-joining... ensure that the distance metric is additive\n"); } if((strcmp(argv[i+1], "TDHC"))==0 || (strcmp(argv[i+1], "tdhc"))==0){ T = new TopDownHClust(ALIGN, MA); neuralTree=true; } treeChosen=true; } } if(!treeChosen) T = new UPGMA(ALIGN); T->BeQuiet(silent); //////////////////////////////////////////////////////////////////////////////////// //////// Main Program ///////////////////////////////////////////////////////////// //Initialise the background Plat->ReadBackground(); if(inputProvided){ //Read in the matrices Plat->ReadTransfacFile(inputTFs, famNames,true, weighting_on); if(!silent && !htmlOutput){ printf("MatCount: %d\n", Plat->GetMatCount()); if(ungapped) printf("Ungapped Alignment\n"); else printf("Gap open = %.3lf, gap extend = %.3lf\n", gapOpen, gapExtend); } }else{ printf("No input motifs provided!\n\n"); } if(genRandMotifs){ //Generate some random matrices RandPSSMGen* RPG = new RandPSSMGen(Plat->inputMotifs, Plat->GetMatCount(), 10000, randMatOut); RPG->RunGenerator(); } if(genRandScores){ //Find rand dist Plat->GetRandDistrib(scoresOut, ALIGN); }else if(!scoresProvided){ printf("No score distribution file provided!\n\n"); } if(testing){ PlatformTesting* PT = new PlatformTesting(CC); //Print the distribution of column depth // PT->ColumnDepthDist(Plat->inputMotifs, Plat->GetMatCount()); //Print the similarities of all columns against all others // PT->ColumnScoreDist(Plat->inputMotifs, Plat->GetMatCount(), 0.05); double z; for(z=0.25; z<0.8; z+=0.05) PT->RandColumns(Plat, z); for(z=0.8; z<=1.0; z+=0.01) PT->RandColumns(Plat, z); delete(PT); } if(scoresProvided || preAlign){ Plat->ReadScoreDists(scoreDist); if(!silent && !htmlOutput){printf("Scores read\n");} if(Plat->GetMatCount()>1){ if(preAlign){ //No alignments or trees built here pssmAlignment = MA->PreAlignedInput(Plat); }else{ //Multiple alignment procedure Plat->PreAlign(ALIGN); if(pairwiseOnly){ if(!silent && !htmlOutput){printf("\nPairwise alignment scores:\n");} Plat->PrintPairwise(); }if(!ma_off){ MA->ImportBasics(Plat, ALIGN); if(!silent && !htmlOutput){printf("Alignments Finished\n");} if(!testingAcc){ if(tree_loocv && testingTree){ T->LOOCVBuildTree(Plat, testingTree); }else{ if(testingTree && !silent && !htmlOutput){printf("Calinski & Harabasz:\n\tNumClust\tC&H_Metric\n");} T->BuildTree(Plat, testingTree); if(!silent && treeClusts){printf("The Calinski & Harabasz statistic suggests %.0lf clusters in the input motifs\n", T->GetNodesMinCH());} if(printTreeClusts){ T->PrintLevel(outFileName, int(T->GetNodesMinCH())); } } T->PrintTree(outFileName); if(!silent && !htmlOutput){printf("Tree Built\n");} if(!silent){ if(!silent && !htmlOutput){printf("Multiple Alignment:\n");} pssmAlignment = MA->BuildAlignment(Plat, ALIGN, T); } } } } //Experiment with the Protein Domains if(usingDomains){ PROTS = new ProteinDomains(); PROTS->ReadDomains(inputProteins, Plat->inputMotifs, Plat->GetMatCount()); PROTS->MutualInformation(pssmAlignment, MA->Alignment2Profile(pssmAlignment, "AlignmentMotif"), Plat->inputMotifs, Plat->GetMatCount()); delete PROTS; } } //Similarity match against the database if(simMatching){ Plat->ReadTransfacFile(matchTFs, famNames, false, false); Plat->SimilarityMatching(ALIGN, outFileName, famNames, matchTopX); } } if(testingAcc && scoresProvided && inputProvided && Plat->GetMatCount()>1){ PlatformTesting* PT = new PlatformTesting(CC); PT->PairwisePredictionAccuracy(Plat); } delete(MA); delete(T); delete(CC); delete(ALIGN); } delete(Plat); return(0); }
void generate_errors_per_base(JSONWriter* pWriter, const BWTIndexSet& index_set) { int n_samples = 100000; size_t k = 25; double max_error_rate = 0.95; size_t min_overlap = 50; std::vector<size_t> position_count; std::vector<size_t> error_count; Timer timer("test", true); #if HAVE_OPENMP omp_set_num_threads(opt::numThreads); #pragma omp parallel for #endif for(int i = 0; i < n_samples; ++i) { std::string s = BWTAlgorithms::sampleRandomString(index_set.pBWT); KmerOverlaps::retrieveMatches(s, k, min_overlap, max_error_rate, 2, index_set); //KmerOverlaps::approximateMatch(s, min_overlap, max_error_rate, 2, 200, index_set); MultipleAlignment ma = KmerOverlaps::buildMultipleAlignment(s, k, min_overlap, max_error_rate, 2, index_set); // Skip when there is insufficient depth to classify errors size_t ma_rows = ma.getNumRows(); if(ma_rows <= 1) continue; size_t ma_cols = ma.getNumColumns(); size_t position = 0; for(size_t j = 0; j < ma_cols; ++j) { char s_symbol = ma.getSymbol(0, j); // Skip gaps if(s_symbol == '-' || s_symbol == '\0') continue; SymbolCountVector scv = ma.getSymbolCountVector(j); int s_symbol_count = 0; char max_symbol = 0; int max_count = 0; for(size_t k = 0; k < scv.size(); ++k) { if(scv[k].symbol == s_symbol) s_symbol_count = scv[k].count; if(scv[k].count > max_count) { max_count = scv[k].count; max_symbol = scv[k].symbol; } } //printf("P: %zu S: %c M: %c MC: %d\n", position, s_symbol, max_symbol, max_count); // Call an error at this position if the consensus symbol differs from the read // and the support for the read symbol is less than 4 and the consensus symbol // is strongly supported. bool is_error = s_symbol != max_symbol && s_symbol_count < 4 && max_count >= 3; #if HAVE_OPENMP #pragma omp critical #endif { if(position >= position_count.size()) { position_count.resize(position+1); error_count.resize(position+1); } position_count[position]++; error_count[position] += is_error; } position += 1; } } pWriter->String("ErrorsPerBase"); pWriter->StartObject(); pWriter->String("base_count"); pWriter->StartArray(); for(size_t i = 0; i < position_count.size(); ++i) pWriter->Int(position_count[i]); pWriter->EndArray(); pWriter->String("error_count"); pWriter->StartArray(); for(size_t i = 0; i < position_count.size(); ++i) pWriter->Int(error_count[i]); pWriter->EndArray(); pWriter->EndObject(); }