static int fetch_disc_read_callback(const bam1_t* alignment, void* data) { // MEI_data* mei_data = static_cast<MEI_data*>(data); std::pair<MEI_data*, UserDefinedSettings*>* env = static_cast<std::pair<MEI_data*, UserDefinedSettings*>*>(data); MEI_data* mei_data = env->first; UserDefinedSettings* userSettings = env->second; if (!(alignment->core.flag & BAM_FUNMAP || alignment->core.flag & BAM_FMUNMAP) && // Both ends are mapped. !is_concordant(alignment, mei_data->current_insert_size) && // Ends map discordantly. // Extra check for (very) large mapping distance. This is done beside the check for read // discordance to speed up computation by ignoring signals from small structural variants. (alignment->core.tid != alignment->core.mtid || abs(alignment->core.pos - alignment->core.mpos) > userSettings->MIN_DD_MAP_DISTANCE)) { // Save alignment as simple_read object. std::string read_name = enrich_read_name(bam1_qname(alignment), alignment->core.flag & BAM_FREAD1); char strand = bam1_strand(alignment)? Minus : Plus; char mate_strand = bam1_mstrand(alignment)? Minus : Plus; std::string read_group; get_read_group(alignment, read_group); std::string sample_name; get_sample_name(read_group, mei_data->sample_names, sample_name); simple_read* read = new simple_read(read_name, alignment->core.tid, alignment->core.pos, strand, sample_name, get_sequence(bam1_seq(alignment), alignment->core.l_qseq), alignment->core.mtid, alignment->core.mpos, mate_strand); mei_data->discordant_reads.push_back(read); } return 0; }
TString get_full_name(TString name, TString prefix) { TString sample = get_sample_name(name); TString sys = get_sys_name(name); TString var = get_var_name(name); TString res(prefix.Data()); //res.Append("__" + sample + "__" + sys + "__" + var); // only for sytematics res.Append("__" + sample); //cout << "sample name = " << sample << " sys = " << sys << " var = " << var << endl; return res; }
int set_all_sample_interval( unsigned int sample_interval ) //include cpu memory temperature { struct list_head *pos=NULL; ac_sample_t *sample=NULL; const char *sample_name=NULL; sample_container.sample_interval = sample_interval; list_for_each(pos,sample_head) { sample = LPSAMPLE(pos); sample_name=get_sample_name( sample ); if ( !strcmp ( SAMPLE_NAME_CPU, sample_name ) ||!strcmp ( SAMPLE_NAME_MEMUSAGE, sample_name ) ||!strcmp ( SAMPLE_NAME_TMP, sample_name ) ||!strcmp ( SAMPLE_NAME_DHCPUSE, sample_name )) { set_sample_interval( LPSAMPLE(pos), sample_interval ); } }
// Returns a breakpoint for a cluster of connected reads. If no viable // breakpoint can be found, it returns a breakpoint with position -1. // Note: returned pointer must be deleted by caller. static void get_breakpoints(std::vector<simple_read*>& cluster, std::vector<bam_info>& bam_sources, int insert_size, int cluster_tid, char cluster_strand, const Chromosome* chromosome, std::map<std::string, std::string>& sample_dict, std::vector<MEI_breakpoint>& breakpoints, UserDefinedSettings* userSettings) { std::vector<SPLIT_READ> split_reads; int outer_read_pos = (cluster_strand == Minus)? cluster.at(cluster.size()-1)->pos : cluster.at(0)->pos; // int inner_read_pos = (cluster_strand == Minus)? cluster.at(0)->pos : cluster.at(cluster.size()-1)->pos; get_split_reads_for_cluster(bam_sources, cluster_strand, outer_read_pos, chromosome, split_reads); // Search for split reads with a mate close to the outer read of the // cluster. Store candidate breakpoints. // Todo: speedup by exploiting the fact that both clusters and split reads are sorted // by mapping location. std::map<int, std::vector<simple_read> > bio_candidate_breakpoints; for (size_t i = 0; i < split_reads.size(); i++) { SPLIT_READ read = split_reads.at(i); char anchor_strand = read.MatchedD; if (cluster_strand != anchor_strand) { continue; } unsigned int comp_candidate_bp = read.getLastAbsLocCloseEnd(); unsigned int bio_candidate_bp = get_bio_chr_index(comp_candidate_bp); if (bio_candidate_breakpoints.find(bio_candidate_bp) == bio_candidate_breakpoints.end()) { // New candidate, look ahead to check whether there are enough supporting split reads. int SR_support = 1; for (size_t j = i + 1; j < split_reads.size(); j++) { if (split_reads.at(j).getLastAbsLocCloseEnd() == comp_candidate_bp && split_reads.at(j).MatchedD == cluster_strand) { SR_support++; } } if (SR_support < userSettings->MIN_DD_BREAKPOINT_SUPPORT) { // Not enough support, skip it. continue; } else { std::vector<simple_read> new_bp_split_reads; bio_candidate_breakpoints.insert(std::make_pair(bio_candidate_bp, new_bp_split_reads)); } } // Store the unmatched sequence as it should be matched on the opposite strand of // the mapped mate. std::string whole_sequence; std::string mapped_part; std::string unmapped_part; if (anchor_strand == Plus) { whole_sequence = ReverseComplement(read.getUnmatchedSeq()); mapped_part = whole_sequence.substr(0, read.CloseEndLength); unmapped_part = whole_sequence.substr(read.CloseEndLength, whole_sequence.length()); } else { whole_sequence = read.getUnmatchedSeq(); mapped_part = whole_sequence.substr(whole_sequence.length() - read.CloseEndLength, whole_sequence.length()); unmapped_part = whole_sequence.substr(0, whole_sequence.length() - read.CloseEndLength); } std::string sample_name; get_sample_name(read.read_group, sample_dict, sample_name); simple_read simple_split_read(read.Name, -1, -1, '?', sample_name, whole_sequence, mapped_part, unmapped_part); (*bio_candidate_breakpoints.find(bio_candidate_bp)).second.push_back(simple_split_read); } char SR_mapping_strand = (cluster_strand == Plus)? Minus : Plus; // Remove any split reads for which a far end can be found locally, these are // assumed to contribute to some local variants. // Todo: determine region that is searched for far end. std::map<int, std::vector<simple_read> >::iterator map_iter; for (map_iter = bio_candidate_breakpoints.begin(); map_iter != bio_candidate_breakpoints.end(); ++map_iter) { std::string mapped_consensus = get_consensus_unmapped((*map_iter).second, SR_mapping_strand); std::vector<simple_read> sreads = (*map_iter).second; if (mapped_consensus.length() == 0) { LOG_DEBUG(*logStream << time_log() << "Consensus building failed for split read mapping ends (" << map_iter->second.size() << " reads @ " << map_iter->first << ")" << std::endl); continue; } int bio_bp = (*map_iter).first; // If far end consensus is not found in local window, store breakpoint. size_t FE_window_start = std::max(0, get_comp_chr_index(bio_bp) - userSettings->MIN_DD_MAP_DISTANCE); size_t FE_window_size = std::min(chromosome->getCompSize() - (unsigned) FE_window_start, 2 * (unsigned) userSettings->MIN_DD_MAP_DISTANCE); if (!contains_subseq_any_strand(mapped_consensus, chromosome->getSeq().substr(FE_window_start, FE_window_size), MIN_CONSENSUS_LENGTH)) { MEI_breakpoint bp(cluster_tid, bio_bp, cluster_strand); bp.associated_split_reads = (*map_iter).second; // Link associated discordant reads (all reads from cluster) and split reads. std::vector<simple_read*>::iterator read_iter; for (read_iter = cluster.begin(); read_iter != cluster.end(); ++read_iter) { bp.associated_reads.push_back(*(*read_iter)); } breakpoints.push_back(bp); } } }