Beispiel #1
0
// See documentation in header file.
void searchMEIBreakpoints(MEI_data& currentState, std::vector<bam_info>& bam_sources, const Chromosome* chromosome,
                          UserDefinedSettings* userSettings) {
    LOG_DEBUG(*logStream << time_log() << "Start searching for breakpoints..." << std::endl);
    std::vector<std::vector<simple_read*> > clusters;
    cluster_reads(currentState.discordant_reads, currentState.current_insert_size, clusters, userSettings);
    
    // Find breakpoints per cluster.
    int bp_count = 0;
    for (size_t i = 0; i < clusters.size(); i++) {
        // print cluster debug info
        std::vector<simple_read*> cluster = clusters.at(i);
   
        if (cluster.size() < ((size_t) userSettings->MIN_DD_CLUSTER_SIZE)) {
            // Fluke cluster, skip it. (If there are very few reads in the cluster,
            // we likely won't find enough split reads supporting an insertion)
            continue;
        }


        
        // Find breakpoint for this cluster
        char cluster_strand = cluster.at(0)->strand;
        int cluster_tid = cluster.at(0)->tid;
        std::vector<MEI_breakpoint> MEI_bps;
        std::vector<MEI_breakpoint>::iterator MEI_iter;
        get_breakpoints(cluster, bam_sources, currentState.current_insert_size, cluster_tid, cluster_strand, chromosome,
                        currentState.sample_names, MEI_bps, userSettings);
        if (MEI_bps.size() > 1) {
            // More than one breakpoints found for current cluster.  Select only the one with the
            // most split reads supporting it.
            size_t best_support = 0;
            MEI_breakpoint best_bp;
            for (MEI_iter = MEI_bps.begin(); MEI_iter != MEI_bps.end(); ++MEI_iter) {
                if (MEI_iter->associated_split_reads.size() > best_support) {
                    best_bp = *MEI_iter;
                    best_support = MEI_iter->associated_split_reads.size();
                }
            }
            MEI_bps.clear();
            MEI_bps.push_back(best_bp);
        } else if (MEI_bps.size() == 0) {
            // No breakpoint found with split read support.  Estimate breakpoint from cluster reads.
            get_breakpoint_estimation(cluster, currentState.current_insert_size, cluster_tid, cluster_strand, MEI_bps);
        }
        // Check breakpoint validity.
        for (MEI_iter = MEI_bps.begin(); MEI_iter != MEI_bps.end(); ++MEI_iter) {
            currentState.MEI_breakpoints.push_back(*MEI_iter);
            bp_count += 1;
            LOG_INFO(*logStream << "Found potential DD breakpoint: " << (*MEI_iter).breakpoint_tid << ", " <<
                     (*MEI_iter).breakpoint_pos << ", " << (*MEI_iter).cluster_strand << ", " <<
                     (*MEI_iter).associated_reads.size() << ", " <<
                     (*MEI_iter).associated_split_reads.size() << std::endl);
        }
    }
    LOG_DEBUG(*logStream << time_log() << "Found " << bp_count << " breakpoints for " << clusters.size() <<
              " clusters." << std::endl);
}
Beispiel #2
0
/* create a stream of breakpoint values from a breakpoint file 
   initialize stream counter and increment value */ 
BRKSTREAM* bps_newstream(FILE* fp, unsigned long srate, unsigned long* size)
{
	BRKSTREAM* stream;
	BREAKPOINT* points;
	unsigned long npoints;
	
	if (srate==0)
	{
		printf("Error creating stream - sample rate cannot be zero.\n");
		return NULL;
	}

	stream = (BRKSTREAM*)malloc(sizeof(BRKSTREAM));
	if (stream==NULL)
		return NULL;
	/* load breakpoint file and setup stream info */
	points = get_breakpoints(fp,&npoints);
	if (points == NULL)
	{
		free(stream);
		return NULL;
	}
	if (npoints<2)
	{
		printf("breakpoint file is too small - "
		       "at least two points required\n");
		free(stream);
		return(NULL);	
	}
	/* init the stream object */
	stream->points = points;
	stream->npoints = npoints;
	/* counters */
	stream->curpos = 0.0;
	stream->ileft = 0;
	stream->iright = 1;
	stream->incr = 1.0/srate;
	/* first span */
	stream->leftpoint = stream->points[stream->ileft];
	stream->rightpoint = stream->points[stream->iright];
	stream->width = stream->rightpoint.time - stream->leftpoint.time;
	stream->height = stream->rightpoint.value - stream->leftpoint.value;
	stream->more_points = 1;
	if (size)
		*size = stream->npoints;
	return stream;
}