Esempio n. 1
0
void remove_pcr_duplicates(BaseQuality& base_quality, bool use_bam_rgs,
			   std::map<std::string, std::string>& rg_to_library,
			   std::vector< std::vector<BamTools::BamAlignment> >& paired_strs_by_rg,
			   std::vector< std::vector<BamTools::BamAlignment> >& mate_pairs_by_rg,
			   std::vector< std::vector<BamTools::BamAlignment> >& unpaired_strs_by_rg, std::ostream& logger){
  int32_t dup_count = 0;
  assert(paired_strs_by_rg.size() == mate_pairs_by_rg.size() && paired_strs_by_rg.size() == unpaired_strs_by_rg.size());
  for (unsigned int i = 0; i < paired_strs_by_rg.size(); i++){
    assert(paired_strs_by_rg[i].size() == mate_pairs_by_rg[i].size());

    std::vector<ReadPair> read_pairs;
    for (unsigned int j = 0; j < paired_strs_by_rg[i].size(); j++){
      std::string library = use_bam_rgs ? get_library(paired_strs_by_rg[i][j], rg_to_library): rg_to_library[paired_strs_by_rg[i][j].Filename];
      read_pairs.push_back(ReadPair(paired_strs_by_rg[i][j], mate_pairs_by_rg[i][j], library));
    }
    for (unsigned int j = 0; j < unpaired_strs_by_rg[i].size(); j++){
      std::string library = use_bam_rgs ? get_library(unpaired_strs_by_rg[i][j], rg_to_library): rg_to_library[unpaired_strs_by_rg[i][j].Filename];
      read_pairs.push_back(ReadPair(unpaired_strs_by_rg[i][j], library));
    }
    std::sort(read_pairs.begin(), read_pairs.end());

    paired_strs_by_rg[i].clear();
    mate_pairs_by_rg[i].clear();
    unpaired_strs_by_rg[i].clear();
    if (read_pairs.size() == 0)
      continue;
    int best_index = 0;
    for (unsigned int j = 1; j < read_pairs.size(); j++){
      if (read_pairs[j].duplicate(read_pairs[best_index])){
	dup_count++;
	// Update index if new pair's STR read has a higher total base quality
	if (base_quality.sum_log_prob_correct(read_pairs[j].aln_one().Qualities) > 
	    base_quality.sum_log_prob_correct(read_pairs[best_index].aln_one().Qualities))
	  best_index = j;
      }
      else {
	// Keep best pair from prior set of duplicates
	if (read_pairs[best_index].single_ended())
	  unpaired_strs_by_rg[i].push_back(read_pairs[best_index].aln_one());
	else {
	  paired_strs_by_rg[i].push_back(read_pairs[best_index].aln_one());
	  mate_pairs_by_rg[i].push_back(read_pairs[best_index].aln_two());
	}
	best_index = j; // Update index for new set of duplicates
      }
    }

    // Keep best pair for last set of duplicates
    if (read_pairs[best_index].single_ended())
      unpaired_strs_by_rg[i].push_back(read_pairs[best_index].aln_one());
    else {
      paired_strs_by_rg[i].push_back(read_pairs[best_index].aln_one());
      mate_pairs_by_rg[i].push_back(read_pairs[best_index].aln_two());
    }
  }
  logger << "Removed " << dup_count << " sets of PCR duplicate reads" << std::endl;
}
Esempio n. 2
0
int NASAKeywordHandler::ReadGroup( const char *pszPathPrefix )

{
    CPLString osName, osValue;

    for( ; TRUE; )
    {
        if( !ReadPair( osName, osValue ) )
            return FALSE;

        if( EQUAL(osName,"OBJECT") || EQUAL(osName,"GROUP") )
        {
            if( !ReadGroup( (CPLString(pszPathPrefix) + osValue + ".").c_str() ) )
                return FALSE;
        }
        else if( EQUALN(osName,"END",3) )
        {
            return TRUE;
        }
        else
        {
            osName = pszPathPrefix + osName;
            papszKeywordList = CSLSetNameValue( papszKeywordList, 
                                                osName, osValue );
        }
    }
}
Esempio n. 3
0
bool plConfigSource::ReadString(const std::string & in)
{
    std::string work = in;
    xtl::trim(work);
    
    // comment
    if (work[0] == '#')
        return true;
    
    // comment
    if (work[0] == ';')
        return true;
    
    // section
    if (work[0] == '[')
    {
        int close = work.find_first_of("]");
        if(close == std::string::npos)
            return false;
        fCurrSection = work.substr(1,close-1);
        fEffectiveSection = fCurrSection;
        return true;
    }

    // key=value
    std::string  key, value;
    SplitAt(key, value, '=', work);

    // dot notation makes section change for this key=value only.
    int t = key.find('.');
    if (t>0 && t<key.size()-1)
    {
        fEffectiveSection.assign(key.substr(0,t));
        key.assign(key.substr(t+1));
    }

    bool ret=ReadPair(key, value);
    fEffectiveSection = fCurrSection;

    if(ret && strcmp("LoadIni",key.c_str()) == 0)
    {
        ret = ReadSubSource( value.c_str() );
    }
    
    return ret;
}
Esempio n. 4
0
bool plIniNoSectionsConfigSource::ReadString(const std::string & in)
{
    std::string work = in;
    xtl::trim(work);
    
    // ignore comments
    if (work[0]=='#' || work[0]==';')
        return true;
    
    // ignore sections
    if (work[0] == '[')
        return true;

    // parse key value
    std::string  key, value;
    SplitAt(key, value, '=', work);

    return ReadPair(key, value);
}
Esempio n. 5
0
int VICARKeywordHandler::ReadGroup( CPL_UNUSED const char *pszPathPrefix ) {
    CPLString osName, osValue, osProperty;

    for( ; true; ) {
        if( !ReadPair( osName, osValue ) )
            return FALSE;

        if( EQUAL(osName,"END") )
            return TRUE;

        if( EQUAL(osName,"PROPERTY") || EQUAL(osName,"HISTORY") || EQUAL(osName,"TASK"))
            osProperty = osValue;
        else {
            if ( !EQUAL(osProperty,"") )
                osName = osProperty + "." + osName;
            papszKeywordList = CSLSetNameValue( papszKeywordList, osName, osValue );
        }
    }
}