예제 #1
0
static int fetch_disc_read_callback(const bam1_t* alignment, void* data) {
    //    MEI_data* mei_data = static_cast<MEI_data*>(data);
    std::pair<MEI_data*, UserDefinedSettings*>* env = static_cast<std::pair<MEI_data*, UserDefinedSettings*>*>(data);
    MEI_data* mei_data = env->first;
    UserDefinedSettings* userSettings = env->second;
    if (!(alignment->core.flag & BAM_FUNMAP || alignment->core.flag & BAM_FMUNMAP) && // Both ends are mapped.
        !is_concordant(alignment, mei_data->current_insert_size) &&                   // Ends map discordantly.
        // Extra check for (very) large mapping distance.  This is done beside the check for read
        // discordance to speed up computation by ignoring signals from small structural variants.
        (alignment->core.tid != alignment->core.mtid ||
         abs(alignment->core.pos - alignment->core.mpos) > userSettings->MIN_DD_MAP_DISTANCE)) {
            
            // Save alignment as simple_read object.
            std::string read_name = enrich_read_name(bam1_qname(alignment), alignment->core.flag & BAM_FREAD1);
            char strand = bam1_strand(alignment)? Minus : Plus;
            char mate_strand = bam1_mstrand(alignment)? Minus : Plus;
            std::string read_group;
            get_read_group(alignment, read_group);
            std::string sample_name;
            get_sample_name(read_group, mei_data->sample_names, sample_name);
            
            simple_read* read = new simple_read(read_name, alignment->core.tid, alignment->core.pos, strand, sample_name,
                                                get_sequence(bam1_seq(alignment), alignment->core.l_qseq),
                                                alignment->core.mtid, alignment->core.mpos, mate_strand);
            mei_data->discordant_reads.push_back(read);
        }
    return 0;
}
예제 #2
0
TString get_full_name(TString name, TString prefix)
{
  TString sample = get_sample_name(name);
  TString sys = get_sys_name(name);
  TString var = get_var_name(name);
  TString res(prefix.Data());
  //res.Append("__" + sample + "__" + sys + "__" + var); // only for sytematics
  res.Append("__" + sample);
  //cout << "sample name = " << sample << " sys = " << sys << " var = " << var << endl;
  return res;
}
예제 #3
0
int set_all_sample_interval( unsigned int sample_interval )		//include cpu memory temperature
{
    struct list_head *pos=NULL;
    ac_sample_t *sample=NULL;
    const char *sample_name=NULL;
        
    sample_container.sample_interval = sample_interval;
    
    list_for_each(pos,sample_head)
    {
    	 sample = LPSAMPLE(pos);
	 sample_name=get_sample_name( sample );
    	 if ( !strcmp ( SAMPLE_NAME_CPU, sample_name )
		 	||!strcmp ( SAMPLE_NAME_MEMUSAGE, sample_name )
		 	||!strcmp ( SAMPLE_NAME_TMP, sample_name ) 
		 	||!strcmp ( SAMPLE_NAME_DHCPUSE, sample_name ))
    	 {
        	set_sample_interval( LPSAMPLE(pos), sample_interval );
    	 }
    }
예제 #4
0
// Returns a breakpoint for a cluster of connected reads.  If no viable
// breakpoint can be found, it returns a breakpoint with position -1.
// Note: returned pointer must be deleted by caller.
static void get_breakpoints(std::vector<simple_read*>& cluster, std::vector<bam_info>& bam_sources, int insert_size,
                            int cluster_tid, char cluster_strand, const Chromosome* chromosome, 
                            std::map<std::string, std::string>& sample_dict, std::vector<MEI_breakpoint>& breakpoints,
                            UserDefinedSettings* userSettings) {
    std::vector<SPLIT_READ> split_reads;
    int outer_read_pos = (cluster_strand == Minus)? cluster.at(cluster.size()-1)->pos : cluster.at(0)->pos;
//    int inner_read_pos = (cluster_strand == Minus)? cluster.at(0)->pos : cluster.at(cluster.size()-1)->pos;
    get_split_reads_for_cluster(bam_sources, cluster_strand, outer_read_pos, chromosome, split_reads);
    
    // Search for split reads with a mate close to the outer read of the
    // cluster.  Store candidate breakpoints.
    // Todo: speedup by exploiting the fact that both clusters and split reads are sorted
    // by mapping location.
    std::map<int, std::vector<simple_read> > bio_candidate_breakpoints;
    for (size_t i = 0; i < split_reads.size(); i++) {
        SPLIT_READ read = split_reads.at(i);
        
        char anchor_strand = read.MatchedD;
        if (cluster_strand != anchor_strand) {
            continue;
        }
        
        unsigned int comp_candidate_bp = read.getLastAbsLocCloseEnd();
        unsigned int bio_candidate_bp = get_bio_chr_index(comp_candidate_bp);
        
        if (bio_candidate_breakpoints.find(bio_candidate_bp) == bio_candidate_breakpoints.end()) {
            // New candidate, look ahead to check whether there are enough supporting split reads.
            int SR_support = 1;
            for (size_t j = i + 1; j < split_reads.size(); j++) {
                if (split_reads.at(j).getLastAbsLocCloseEnd() == comp_candidate_bp && 
                    split_reads.at(j).MatchedD == cluster_strand) {
                    SR_support++;
                }
            }
            if (SR_support < userSettings->MIN_DD_BREAKPOINT_SUPPORT) {
                // Not enough support, skip it.
                continue;
            } else {
                std::vector<simple_read> new_bp_split_reads;
                bio_candidate_breakpoints.insert(std::make_pair(bio_candidate_bp, new_bp_split_reads));
            }
        }
        
        // Store the unmatched sequence as it should be matched on the opposite strand of
        // the mapped mate.
        std::string whole_sequence;
        std::string mapped_part;
        std::string unmapped_part;
        if (anchor_strand == Plus) {
            whole_sequence = ReverseComplement(read.getUnmatchedSeq());
            mapped_part = whole_sequence.substr(0, read.CloseEndLength);
            unmapped_part = whole_sequence.substr(read.CloseEndLength, whole_sequence.length());
        } else {
            whole_sequence = read.getUnmatchedSeq();
            mapped_part = whole_sequence.substr(whole_sequence.length() - read.CloseEndLength, 
                                                whole_sequence.length());
            unmapped_part = whole_sequence.substr(0, whole_sequence.length() - read.CloseEndLength);
        }

        std::string sample_name;
        get_sample_name(read.read_group, sample_dict, sample_name);
        simple_read simple_split_read(read.Name, -1, -1, '?', sample_name, whole_sequence, mapped_part, 
                                      unmapped_part);
        (*bio_candidate_breakpoints.find(bio_candidate_bp)).second.push_back(simple_split_read);
    }
    
  
    char SR_mapping_strand = (cluster_strand == Plus)? Minus : Plus;
    
    // Remove any split reads for which a far end can be found locally, these are
    // assumed to contribute to some local variants.
    // Todo: determine region that is searched for far end.
    std::map<int, std::vector<simple_read> >::iterator map_iter;
    for (map_iter = bio_candidate_breakpoints.begin(); map_iter != bio_candidate_breakpoints.end(); ++map_iter) {
        
        std::string mapped_consensus = get_consensus_unmapped((*map_iter).second, SR_mapping_strand);
        std::vector<simple_read> sreads = (*map_iter).second;
        if (mapped_consensus.length() == 0) {
            LOG_DEBUG(*logStream << time_log() << "Consensus building failed for split read mapping ends (" << 
                      map_iter->second.size() << " reads @ " << map_iter->first << ")" << std::endl);
            continue;
        }
        int bio_bp = (*map_iter).first;
                
        // If far end consensus is not found in local window, store breakpoint.
        size_t FE_window_start = std::max(0, get_comp_chr_index(bio_bp) - userSettings->MIN_DD_MAP_DISTANCE);
        size_t FE_window_size = std::min(chromosome->getCompSize() - (unsigned) FE_window_start, 
                                         2 * (unsigned) userSettings->MIN_DD_MAP_DISTANCE);
        if (!contains_subseq_any_strand(mapped_consensus, chromosome->getSeq().substr(FE_window_start, 
                FE_window_size), MIN_CONSENSUS_LENGTH)) {
            MEI_breakpoint bp(cluster_tid, bio_bp, cluster_strand);
            bp.associated_split_reads = (*map_iter).second;

            // Link associated discordant reads (all reads from cluster) and split reads.
            std::vector<simple_read*>::iterator read_iter;
            for (read_iter = cluster.begin(); read_iter != cluster.end(); ++read_iter) {
                bp.associated_reads.push_back(*(*read_iter));
            }
            breakpoints.push_back(bp);
        }
    }
}