void generate_errors_per_base(JSONWriter* pWriter, const BWTIndexSet& index_set) { int n_samples = 100000; size_t k = 25; double max_error_rate = 0.95; size_t min_overlap = 50; std::vector<size_t> position_count; std::vector<size_t> error_count; Timer timer("test", true); #if HAVE_OPENMP omp_set_num_threads(opt::numThreads); #pragma omp parallel for #endif for(int i = 0; i < n_samples; ++i) { std::string s = BWTAlgorithms::sampleRandomString(index_set.pBWT); KmerOverlaps::retrieveMatches(s, k, min_overlap, max_error_rate, 2, index_set); //KmerOverlaps::approximateMatch(s, min_overlap, max_error_rate, 2, 200, index_set); MultipleAlignment ma = KmerOverlaps::buildMultipleAlignment(s, k, min_overlap, max_error_rate, 2, index_set); // Skip when there is insufficient depth to classify errors size_t ma_rows = ma.getNumRows(); if(ma_rows <= 1) continue; size_t ma_cols = ma.getNumColumns(); size_t position = 0; for(size_t j = 0; j < ma_cols; ++j) { char s_symbol = ma.getSymbol(0, j); // Skip gaps if(s_symbol == '-' || s_symbol == '\0') continue; SymbolCountVector scv = ma.getSymbolCountVector(j); int s_symbol_count = 0; char max_symbol = 0; int max_count = 0; for(size_t k = 0; k < scv.size(); ++k) { if(scv[k].symbol == s_symbol) s_symbol_count = scv[k].count; if(scv[k].count > max_count) { max_count = scv[k].count; max_symbol = scv[k].symbol; } } //printf("P: %zu S: %c M: %c MC: %d\n", position, s_symbol, max_symbol, max_count); // Call an error at this position if the consensus symbol differs from the read // and the support for the read symbol is less than 4 and the consensus symbol // is strongly supported. bool is_error = s_symbol != max_symbol && s_symbol_count < 4 && max_count >= 3; #if HAVE_OPENMP #pragma omp critical #endif { if(position >= position_count.size()) { position_count.resize(position+1); error_count.resize(position+1); } position_count[position]++; error_count[position] += is_error; } position += 1; } } pWriter->String("ErrorsPerBase"); pWriter->StartObject(); pWriter->String("base_count"); pWriter->StartArray(); for(size_t i = 0; i < position_count.size(); ++i) pWriter->Int(position_count[i]); pWriter->EndArray(); pWriter->String("error_count"); pWriter->StartArray(); for(size_t i = 0; i < position_count.size(); ++i) pWriter->Int(error_count[i]); pWriter->EndArray(); pWriter->EndObject(); }