void remove_pcr_duplicates(BaseQuality& base_quality, bool use_bam_rgs, std::map<std::string, std::string>& rg_to_library, std::vector< std::vector<BamTools::BamAlignment> >& paired_strs_by_rg, std::vector< std::vector<BamTools::BamAlignment> >& mate_pairs_by_rg, std::vector< std::vector<BamTools::BamAlignment> >& unpaired_strs_by_rg, std::ostream& logger){ int32_t dup_count = 0; assert(paired_strs_by_rg.size() == mate_pairs_by_rg.size() && paired_strs_by_rg.size() == unpaired_strs_by_rg.size()); for (unsigned int i = 0; i < paired_strs_by_rg.size(); i++){ assert(paired_strs_by_rg[i].size() == mate_pairs_by_rg[i].size()); std::vector<ReadPair> read_pairs; for (unsigned int j = 0; j < paired_strs_by_rg[i].size(); j++){ std::string library = use_bam_rgs ? get_library(paired_strs_by_rg[i][j], rg_to_library): rg_to_library[paired_strs_by_rg[i][j].Filename]; read_pairs.push_back(ReadPair(paired_strs_by_rg[i][j], mate_pairs_by_rg[i][j], library)); } for (unsigned int j = 0; j < unpaired_strs_by_rg[i].size(); j++){ std::string library = use_bam_rgs ? get_library(unpaired_strs_by_rg[i][j], rg_to_library): rg_to_library[unpaired_strs_by_rg[i][j].Filename]; read_pairs.push_back(ReadPair(unpaired_strs_by_rg[i][j], library)); } std::sort(read_pairs.begin(), read_pairs.end()); paired_strs_by_rg[i].clear(); mate_pairs_by_rg[i].clear(); unpaired_strs_by_rg[i].clear(); if (read_pairs.size() == 0) continue; int best_index = 0; for (unsigned int j = 1; j < read_pairs.size(); j++){ if (read_pairs[j].duplicate(read_pairs[best_index])){ dup_count++; // Update index if new pair's STR read has a higher total base quality if (base_quality.sum_log_prob_correct(read_pairs[j].aln_one().Qualities) > base_quality.sum_log_prob_correct(read_pairs[best_index].aln_one().Qualities)) best_index = j; } else { // Keep best pair from prior set of duplicates if (read_pairs[best_index].single_ended()) unpaired_strs_by_rg[i].push_back(read_pairs[best_index].aln_one()); else { paired_strs_by_rg[i].push_back(read_pairs[best_index].aln_one()); mate_pairs_by_rg[i].push_back(read_pairs[best_index].aln_two()); } best_index = j; // Update index for new set of duplicates } } // Keep best pair for last set of duplicates if (read_pairs[best_index].single_ended()) unpaired_strs_by_rg[i].push_back(read_pairs[best_index].aln_one()); else { paired_strs_by_rg[i].push_back(read_pairs[best_index].aln_one()); mate_pairs_by_rg[i].push_back(read_pairs[best_index].aln_two()); } } logger << "Removed " << dup_count << " sets of PCR duplicate reads" << std::endl; }
int NASAKeywordHandler::ReadGroup( const char *pszPathPrefix ) { CPLString osName, osValue; for( ; TRUE; ) { if( !ReadPair( osName, osValue ) ) return FALSE; if( EQUAL(osName,"OBJECT") || EQUAL(osName,"GROUP") ) { if( !ReadGroup( (CPLString(pszPathPrefix) + osValue + ".").c_str() ) ) return FALSE; } else if( EQUALN(osName,"END",3) ) { return TRUE; } else { osName = pszPathPrefix + osName; papszKeywordList = CSLSetNameValue( papszKeywordList, osName, osValue ); } } }
bool plConfigSource::ReadString(const std::string & in) { std::string work = in; xtl::trim(work); // comment if (work[0] == '#') return true; // comment if (work[0] == ';') return true; // section if (work[0] == '[') { int close = work.find_first_of("]"); if(close == std::string::npos) return false; fCurrSection = work.substr(1,close-1); fEffectiveSection = fCurrSection; return true; } // key=value std::string key, value; SplitAt(key, value, '=', work); // dot notation makes section change for this key=value only. int t = key.find('.'); if (t>0 && t<key.size()-1) { fEffectiveSection.assign(key.substr(0,t)); key.assign(key.substr(t+1)); } bool ret=ReadPair(key, value); fEffectiveSection = fCurrSection; if(ret && strcmp("LoadIni",key.c_str()) == 0) { ret = ReadSubSource( value.c_str() ); } return ret; }
bool plIniNoSectionsConfigSource::ReadString(const std::string & in) { std::string work = in; xtl::trim(work); // ignore comments if (work[0]=='#' || work[0]==';') return true; // ignore sections if (work[0] == '[') return true; // parse key value std::string key, value; SplitAt(key, value, '=', work); return ReadPair(key, value); }
int VICARKeywordHandler::ReadGroup( CPL_UNUSED const char *pszPathPrefix ) { CPLString osName, osValue, osProperty; for( ; true; ) { if( !ReadPair( osName, osValue ) ) return FALSE; if( EQUAL(osName,"END") ) return TRUE; if( EQUAL(osName,"PROPERTY") || EQUAL(osName,"HISTORY") || EQUAL(osName,"TASK")) osProperty = osValue; else { if ( !EQUAL(osProperty,"") ) osName = osProperty + "." + osName; papszKeywordList = CSLSetNameValue( papszKeywordList, osName, osValue ); } } }