// See documentation in header file. void searchMEIBreakpoints(MEI_data& currentState, std::vector<bam_info>& bam_sources, const Chromosome* chromosome, UserDefinedSettings* userSettings) { LOG_DEBUG(*logStream << time_log() << "Start searching for breakpoints..." << std::endl); std::vector<std::vector<simple_read*> > clusters; cluster_reads(currentState.discordant_reads, currentState.current_insert_size, clusters, userSettings); // Find breakpoints per cluster. int bp_count = 0; for (size_t i = 0; i < clusters.size(); i++) { // print cluster debug info std::vector<simple_read*> cluster = clusters.at(i); if (cluster.size() < ((size_t) userSettings->MIN_DD_CLUSTER_SIZE)) { // Fluke cluster, skip it. (If there are very few reads in the cluster, // we likely won't find enough split reads supporting an insertion) continue; } // Find breakpoint for this cluster char cluster_strand = cluster.at(0)->strand; int cluster_tid = cluster.at(0)->tid; std::vector<MEI_breakpoint> MEI_bps; std::vector<MEI_breakpoint>::iterator MEI_iter; get_breakpoints(cluster, bam_sources, currentState.current_insert_size, cluster_tid, cluster_strand, chromosome, currentState.sample_names, MEI_bps, userSettings); if (MEI_bps.size() > 1) { // More than one breakpoints found for current cluster. Select only the one with the // most split reads supporting it. size_t best_support = 0; MEI_breakpoint best_bp; for (MEI_iter = MEI_bps.begin(); MEI_iter != MEI_bps.end(); ++MEI_iter) { if (MEI_iter->associated_split_reads.size() > best_support) { best_bp = *MEI_iter; best_support = MEI_iter->associated_split_reads.size(); } } MEI_bps.clear(); MEI_bps.push_back(best_bp); } else if (MEI_bps.size() == 0) { // No breakpoint found with split read support. Estimate breakpoint from cluster reads. get_breakpoint_estimation(cluster, currentState.current_insert_size, cluster_tid, cluster_strand, MEI_bps); } // Check breakpoint validity. for (MEI_iter = MEI_bps.begin(); MEI_iter != MEI_bps.end(); ++MEI_iter) { currentState.MEI_breakpoints.push_back(*MEI_iter); bp_count += 1; LOG_INFO(*logStream << "Found potential DD breakpoint: " << (*MEI_iter).breakpoint_tid << ", " << (*MEI_iter).breakpoint_pos << ", " << (*MEI_iter).cluster_strand << ", " << (*MEI_iter).associated_reads.size() << ", " << (*MEI_iter).associated_split_reads.size() << std::endl); } } LOG_DEBUG(*logStream << time_log() << "Found " << bp_count << " breakpoints for " << clusters.size() << " clusters." << std::endl); }
/* create a stream of breakpoint values from a breakpoint file initialize stream counter and increment value */ BRKSTREAM* bps_newstream(FILE* fp, unsigned long srate, unsigned long* size) { BRKSTREAM* stream; BREAKPOINT* points; unsigned long npoints; if (srate==0) { printf("Error creating stream - sample rate cannot be zero.\n"); return NULL; } stream = (BRKSTREAM*)malloc(sizeof(BRKSTREAM)); if (stream==NULL) return NULL; /* load breakpoint file and setup stream info */ points = get_breakpoints(fp,&npoints); if (points == NULL) { free(stream); return NULL; } if (npoints<2) { printf("breakpoint file is too small - " "at least two points required\n"); free(stream); return(NULL); } /* init the stream object */ stream->points = points; stream->npoints = npoints; /* counters */ stream->curpos = 0.0; stream->ileft = 0; stream->iright = 1; stream->incr = 1.0/srate; /* first span */ stream->leftpoint = stream->points[stream->ileft]; stream->rightpoint = stream->points[stream->iright]; stream->width = stream->rightpoint.time - stream->leftpoint.time; stream->height = stream->rightpoint.value - stream->leftpoint.value; stream->more_points = 1; if (size) *size = stream->npoints; return stream; }