int time_adjust(xclock_t * clock, int dmsec, int dusec, int dnsec){
  
   int u, n;

   time_log(("time_adjust: to [%d:%d:%d]\n", dmsec, dusec, dnsec));

   if(dmsec==0 && dusec==0 && dnsec==0) return OS_OK;

   xthr_lock(clock->lock);

   n = clock->nsec + dnsec;

   clock->nsec = n % 1000000000;

   u = clock->usec + dusec + n/1000000000;

   clock->usec = u % 1000000;

   clock->msec += dmsec + u/1000000;

   xthr_unlock(clock->lock);

   time_log(("time_adjust: clock adjusted to [%d:%d:%d].\n", clock->msec, clock->usec, clock->nsec));

   return OS_OK;
}
Exemple #2
0
// See documentation in header file.
void searchMEIBreakpoints(MEI_data& currentState, std::vector<bam_info>& bam_sources, const Chromosome* chromosome,
                          UserDefinedSettings* userSettings) {
    LOG_DEBUG(*logStream << time_log() << "Start searching for breakpoints..." << std::endl);
    std::vector<std::vector<simple_read*> > clusters;
    cluster_reads(currentState.discordant_reads, currentState.current_insert_size, clusters, userSettings);
    
    // Find breakpoints per cluster.
    int bp_count = 0;
    for (size_t i = 0; i < clusters.size(); i++) {
        // print cluster debug info
        std::vector<simple_read*> cluster = clusters.at(i);
   
        if (cluster.size() < ((size_t) userSettings->MIN_DD_CLUSTER_SIZE)) {
            // Fluke cluster, skip it. (If there are very few reads in the cluster,
            // we likely won't find enough split reads supporting an insertion)
            continue;
        }


        
        // Find breakpoint for this cluster
        char cluster_strand = cluster.at(0)->strand;
        int cluster_tid = cluster.at(0)->tid;
        std::vector<MEI_breakpoint> MEI_bps;
        std::vector<MEI_breakpoint>::iterator MEI_iter;
        get_breakpoints(cluster, bam_sources, currentState.current_insert_size, cluster_tid, cluster_strand, chromosome,
                        currentState.sample_names, MEI_bps, userSettings);
        if (MEI_bps.size() > 1) {
            // More than one breakpoints found for current cluster.  Select only the one with the
            // most split reads supporting it.
            size_t best_support = 0;
            MEI_breakpoint best_bp;
            for (MEI_iter = MEI_bps.begin(); MEI_iter != MEI_bps.end(); ++MEI_iter) {
                if (MEI_iter->associated_split_reads.size() > best_support) {
                    best_bp = *MEI_iter;
                    best_support = MEI_iter->associated_split_reads.size();
                }
            }
            MEI_bps.clear();
            MEI_bps.push_back(best_bp);
        } else if (MEI_bps.size() == 0) {
            // No breakpoint found with split read support.  Estimate breakpoint from cluster reads.
            get_breakpoint_estimation(cluster, currentState.current_insert_size, cluster_tid, cluster_strand, MEI_bps);
        }
        // Check breakpoint validity.
        for (MEI_iter = MEI_bps.begin(); MEI_iter != MEI_bps.end(); ++MEI_iter) {
            currentState.MEI_breakpoints.push_back(*MEI_iter);
            bp_count += 1;
            LOG_INFO(*logStream << "Found potential DD breakpoint: " << (*MEI_iter).breakpoint_tid << ", " <<
                     (*MEI_iter).breakpoint_pos << ", " << (*MEI_iter).cluster_strand << ", " <<
                     (*MEI_iter).associated_reads.size() << ", " <<
                     (*MEI_iter).associated_split_reads.size() << std::endl);
        }
    }
    LOG_DEBUG(*logStream << time_log() << "Found " << bp_count << " breakpoints for " << clusters.size() <<
              " clusters." << std::endl);
}
Exemple #3
0
static int load_discordant_reads(MEI_data& mei_data, std::vector<bam_info>& bam_sources, const std::string& chr_name,
                                 const SearchWindow& window, UserDefinedSettings* userSettings) {
    // Loop over associated bam files.
    for (size_t i = 0; i < bam_sources.size(); i++) {
        // Locate file.
        bam_info source = bam_sources.at(i);
        
        LOG_DEBUG(*logStream << time_log() << "Loading discordant reads from " << source.BamFile << std::endl);
        
        // Setup link to bamfile, its index and header.
        bamFile fp = bam_open(source.BamFile.c_str(), "r");
        bam_index_t *idx = bam_index_load(source.BamFile.c_str());
        
        if (idx == NULL) {
            LOG_WARN(*logStream << time_log() << "Failed to load index for " << source.BamFile.c_str() << std::endl);
            LOG_WARN(*logStream << "Skipping window: " << chr_name << ", " << window.getStart() << "--" <<
                     window.getEnd() << " for BAM-file: " << source.BamFile.c_str() << std::endl);
            continue;
        }
        
        bam_header_t *header = bam_header_read(fp);
        bam_init_header_hash(header);
        int tid = bam_get_tid(header, chr_name.c_str());
        
        if (tid < 0) {
            LOG_WARN(*logStream << time_log() << "Could not find sequence in alignment file: '" << chr_name <<
                     "'" << std::endl);
            LOG_WARN(*logStream << "Skipping window: " << chr_name << ", " << window.getStart() << "--" <<
                     window.getEnd() << " for BAM-file: " << source.BamFile.c_str() << std::endl);
            continue;
        }
        
        mei_data.sample_names = get_sample_dictionary(header);
        
        // Save insert size of current bamfile in data object provided for callback function.
        // Note: the insert size should ideally be separate from the MEI_data object, tried to do
        // this using a std::pair object, which did not work.  Suggestions are welcome here.
        mei_data.current_insert_size = source.InsertSize;
        mei_data.current_chr_name = chr_name;
        
        // Set up environment variable for callback function.
        std::pair<MEI_data*, UserDefinedSettings*> env = std::make_pair(&mei_data, userSettings);
        
        // Load discordant reads into mei_data.
        bam_fetch(fp, idx, tid, window.getStart(), window.getEnd(), &env, fetch_disc_read_callback);
        bam_index_destroy(idx);
    }
    return 0;
}
Exemple #4
0
// Report MEI events.
static void reportMEIevent(MEI_data& mei_data, MEI_event& event, int MEI_count, Genome& genome,
                           std::map<int, std::string>& seq_name_dict, std::ostream& out) {
    
    // Set evidence strand for event's reads (they'll be reported).
    set_evidence_strands(event);
    
    // List all read info that needs to be reported.
    std::vector<simple_read> all_reads;
    get_event_supporting_reads(event, all_reads);
    
    LOG_DEBUG(*logStream << time_log()
                         << "reporting DD: #fwd.disc.: " << event.fwd_cluster_bp.associated_reads.size()
                         << ", #fwd.split: " << event.fwd_cluster_bp.associated_split_reads.size()
                         << ", #rev.disc.: " << event.rev_cluster_bp.associated_reads.size()
                         << ", #rev.split: " << event.rev_cluster_bp.associated_split_reads.size() << std::endl);
    
    size_t all_read_count = event.fwd_cluster_bp.associated_reads.size() +
                            event.fwd_cluster_bp.associated_split_reads.size() +
                            event.rev_cluster_bp.associated_reads.size() +
                            event.rev_cluster_bp.associated_split_reads.size();
    
    out << "####################################################################################################" << std::endl;
    // Print machine summary line.
    out << MEI_count << "\t" << "DD" << "\t";
    out << seq_name_dict.at(event.fwd_cluster_bp.breakpoint_tid) << "\t" <<
           event.fwd_cluster_bp.breakpoint_pos << "\t" << event.rev_cluster_bp.breakpoint_pos;
    
    out << "\t" << all_read_count << "\t" << event.fwd_cluster_bp.associated_reads.size() << "\t"
        << event.fwd_cluster_bp.associated_split_reads.size();
    out << "\t" << event.rev_cluster_bp.associated_reads.size() << "\t"
        << event.rev_cluster_bp.associated_split_reads.size() << std::endl;
    
    // Print human-readable summary lines.
    out << COMMENT_PREFIX << "Dispersed Duplication insertion (DD) found on chromosome '" <<
           seq_name_dict.at(event.fwd_cluster_bp.breakpoint_tid) << "', breakpoint at " <<
           event.fwd_cluster_bp.breakpoint_pos << " (estimated from + strand), " <<
           event.rev_cluster_bp.breakpoint_pos << " (estimated from - strand)" << std::endl;
    out << COMMENT_PREFIX << "Found " << all_read_count << " supporting reads, of which " <<
           event.fwd_cluster_bp.associated_reads.size() << " discordant reads and " <<
           event.fwd_cluster_bp.associated_split_reads.size() << " split reads at 5' end, " <<
           event.rev_cluster_bp.associated_reads.size() << " discordant reads and " <<
           event.rev_cluster_bp.associated_split_reads.size() << " split reads at 3' end." << std::endl;

    // Print support for breakpoint at 5' end.
    out << COMMENT_PREFIX << "Supporting reads for insertion location (5' end):" << std::endl;
    report_split_read_support(genome, event.fwd_cluster_bp, true, seq_name_dict, out);
    // Print support for breakpoint at 3' end.
    out << COMMENT_PREFIX << "Supporting reads for insertion location (3' end):" << std::endl;
    report_split_read_support(genome, event.rev_cluster_bp, false, seq_name_dict, out);
        
    // Print all supporting reads and read fragments for the inserted element.
    report_supporting_reads(all_reads, seq_name_dict, out);
}
 int lrtime_sleep(xclock_t * clock, rtime_t howlong, rtime_t *remain){

    int r;

    if (howlong <=0) return OS_OK;

    struct timespec tv, rem;
    tv.tv_sec = howlong / LRTIME_SECOND_DIVISOR;
    tv.tv_nsec = (howlong % LRTIME_SECOND_DIVISOR) * LRT_HRT_DIVISOR;
    time_log(("lrtime_sleep: sleep %ums =  %u sec %u nsec\n", howlong, (uint)tv.tv_sec, (uint)tv.tv_nsec));
    r = nanosleep(&tv, &rem);
    if(remain)
        *remain = rem.tv_sec * LRTIME_SECOND_DIVISOR + rem.tv_nsec / LRT_HRT_DIVISOR;

    if(r == -1) return OS_EINTR;

    return OS_OK;
 }
Exemple #6
0
void searchMEI(MEI_data& finalState, Genome& genome, std::map<int, std::string>& seq_name_dict,
               UserDefinedSettings* userSettings, ControlState& current_state, std::ostream& out) {
    LOG_INFO(*logStream << time_log() << "Start calling dispersed duplication events from found breakpoints..." << std::endl);
    std::vector<MEI_event> insertion_events;
    
    size_t bp_amount = finalState.MEI_breakpoints.size();
    LOG_INFO(*logStream << time_log() << "Examining " << bp_amount << 
             " breakpoints in total." << std::endl);
    
    std::sort(finalState.MEI_breakpoints.begin(), finalState.MEI_breakpoints.end(), comp_breakpoint_pos);
    LOG_DEBUG(*logStream << time_log() << "Sorted breakpoints." << std::endl);
        
    for (size_t i = 0; i < (bp_amount-1); i++) {
        if (finalState.MEI_breakpoints.at(i).cluster_strand == finalState.MEI_breakpoints.at(i+1).cluster_strand ||
            (finalState.MEI_breakpoints.at(i+1).breakpoint_pos - finalState.MEI_breakpoints.at(i).breakpoint_pos) > 
                userSettings->MAX_DD_BREAKPOINT_DISTANCE ||
            finalState.MEI_breakpoints.at(i).breakpoint_tid != finalState.MEI_breakpoints.at(i+1).breakpoint_tid) {
            // Current two consecutive breakpoints cannot be combined into an event.
            continue;
        }
        
        MEI_event event;
        if (finalState.MEI_breakpoints.at(i).cluster_strand == Plus) {
            event = MEI_event(finalState.MEI_breakpoints.at(i), finalState.MEI_breakpoints.at(i+1));
        } else {
            event = MEI_event(finalState.MEI_breakpoints.at(i+1), finalState.MEI_breakpoints.at(i));
        }
        
        insertion_events.push_back(event);
    }
    
    LOG_INFO(*logStream << time_log() << "Found " << insertion_events.size() << " dispersed duplication events."
             << std::endl);
    
    if (userSettings->DD_REPORT_DUPLICATION_READS) {
        // Append information about reads mapping inside DDs.
        LOG_INFO(*logStream << time_log() << "Collecting discordant read information for dispersed duplication "
                 << "events." << std::endl);
        append_cluster_connections(insertion_events, current_state, userSettings);
    }
    
    LOG_INFO(*logStream << time_log() << "Reporting " << insertion_events.size() << " dispersed duplication events to "
             << userSettings->getMEIOutputFilename().c_str() << std::endl);
    
    // Report events.
    for (size_t i = 0; i < insertion_events.size(); i++) {
        reportMEIevent(finalState, insertion_events.at(i), i + 1, genome, seq_name_dict, out);
    }
}
int posix_time_now(xclock_t * clock)
{
    struct timeval then = clock->now;
    
    gettimeofday(&clock->now, NULL);
    
    clock->msec += (clock->now.tv_sec - then.tv_sec) * 1000;
    clock->msec += (clock->now.tv_usec - then.tv_usec) / 1000;
    
    clock->usec += (clock->now.tv_sec - then.tv_sec) * 1000000;
    clock->usec += clock->now.tv_usec - then.tv_usec;

    clock->nsec += (clock->now.tv_sec - then.tv_sec) * 1000000000;
    clock->nsec += (clock->now.tv_usec - then.tv_usec) * 1000;

    clock->ntp_usec = clock->now.tv_usec;

    time_log(("posix_time_now: msec = %d, nsec = %u\n",clock->msec , clock->nsec));

    return clock->nsec;
 }
 xclock_t * time_begin(rtime_t lrt, rtime_t hrt){

    xclock_t * clock = (xclock_t *)xmalloc(sizeof(struct xrtp_clock_s));
    if(clock){

        gettimeofday(&clock->now, NULL);

        clock->msec = lrt;
        clock->nsec = hrt;

        clock->hrtime_now = posix_time_now;

        clock->lock = xthr_new_lock();
        if(!clock->lock){

          xfree(clock);
          return NULL;
        }
    }

    time_log(("time_begin: new clock[@%u] created.\n", (int)(clock)));
    return clock;
 }
Exemple #9
0
// This function is based on Pindel's main function.  Todo: integrate with pindel's main structure.
int searchMEImain(ControlState& current_state, Genome& genome, UserDefinedSettings* userSettings) {
    
    // Reset genome before traversal.
    g_genome.reset();
    
    std::ofstream file_output(userSettings->getMEIOutputFilename().c_str());
    MEI_data mei_data;
    int result;
    
    std::string CurrentChrName;
	std::string PreviousChrName = "";
    
    // Loop over BED-regions defined in control state.
	for (unsigned bed_index = 0; bed_index < current_state.IncludeBed.size(); bed_index++) {
		std::string Bed_ChrName = current_state.IncludeBed[bed_index].ChrName;
		unsigned Bed_start = current_state.IncludeBed[bed_index].Start;
		unsigned Bed_end = current_state.IncludeBed[bed_index].End;
        
		const Chromosome* currentChromosome = g_genome.getChr(Bed_ChrName);
        
		if (currentChromosome == NULL) {
			std::cout << "There is no " << CurrentChrName << " in the reference file." << std::endl;
			return 1;
		}
        
		LOG_INFO(*logStream << time_log() << "Dispersed Duplication detection current window: " << Bed_ChrName <<
                 ", " << Bed_start << "--" << Bed_end << std::endl);
        
		CurrentChrMask.resize(currentChromosome->getCompSize());
		for (unsigned int i = 0; i < currentChromosome->getCompSize(); i++) {
            CurrentChrMask[i] = 'N';
		}
        
		userSettings->getRegion()->SetRegion(Bed_ChrName, Bed_start, Bed_end);
		LoopingSearchWindow currentWindow( userSettings->getRegion(), currentChromosome, WINDOW_SIZE, Bed_start, Bed_end );
        
        // loop over one bed region
        do {
            result = load_discordant_reads(mei_data, current_state.bams_to_parse, currentChromosome->getName(),
                                           currentWindow, userSettings);
            if (result) {
                // something went wrong loading the reads, return error code.
                return result;
            }
            
            searchMEIBreakpoints(mei_data, current_state.bams_to_parse, currentChromosome, userSettings);
            cleanup_reads(mei_data.discordant_reads);
            
			currentWindow.next();
        } while (!currentWindow.finished());
	}
    
    // Reset genome for subsequent traversals.
    g_genome.reset();
    
    std::map<int, std::string> seq_name_dictionary = get_sequence_name_dictionary(current_state);
   
    searchMEI(mei_data, genome, seq_name_dictionary, userSettings, current_state, file_output);
    file_output.close();
    return 0;
}
Exemple #10
0
static int append_cluster_connections(std::vector<MEI_event>& insertion_events, ControlState& current_state,
                                      UserDefinedSettings* userSettings) {
    
    // Setup maps for base read names of mates we need to collect.  Also setup 'exclude_names' holding
    // the original read names (we don't want those, only their mates, which fall inside the event).
    std::map<std::string, size_t> fwd_name_links, rev_name_links, exclude_names;
    std::string tmp_basename;
    for (size_t i = 0; i < insertion_events.size(); i++) {
        MEI_event event = insertion_events.at(i);
        for (size_t j = 0; j < event.fwd_cluster_bp.associated_reads.size(); j++) {
            tmp_basename = base_read_name(event.fwd_cluster_bp.associated_reads.at(j).name);
            fwd_name_links.insert(std::make_pair(tmp_basename, i));
            exclude_names.insert(std::make_pair(event.fwd_cluster_bp.associated_reads.at(j).name, i));
        }
        for (size_t j = 0; j < event.rev_cluster_bp.associated_reads.size(); j++) {
            tmp_basename = base_read_name(event.rev_cluster_bp.associated_reads.at(j).name);
            rev_name_links.insert(std::make_pair(tmp_basename, i));
            exclude_names.insert(std::make_pair(event.rev_cluster_bp.associated_reads.at(j).name, i));
        }
    }
    
    
    // Loop over whole genome to find mates of discordant reads near DD breakpoints.
    g_genome.reset();
    MEI_data mei_data;
    int result;
    
    // Make dummy BED-records spanning the whole genome.
    std::vector<BED> dummy_beds;
    for (unsigned index = 0; index < g_ChrNameAndSizeAndIndex.size(); index++) {
        BED OneBedRecord;
        OneBedRecord.ChrName = g_ChrNameAndSizeAndIndex[index].ChrName;
        OneBedRecord.Start = 1;
        OneBedRecord.End = g_ChrNameAndSizeAndIndex[index].ChrSize;
        dummy_beds.push_back(OneBedRecord);
    }
    
    // Loop over BED-regions.
	for (unsigned bed_index = 0; bed_index < dummy_beds.size(); bed_index++) {
		std::string Bed_ChrName = dummy_beds[bed_index].ChrName;
		unsigned Bed_start = dummy_beds[bed_index].Start;
		unsigned Bed_end = dummy_beds[bed_index].End;
        
		const Chromosome* currentChromosome = g_genome.getChr(Bed_ChrName);
        
		if (currentChromosome == NULL) {
			return 1;
		}
        
		LOG_INFO(*logStream << time_log() << "Discordant read collection for current window: " << Bed_ChrName <<
                 ", " << Bed_start << "--" << Bed_end << std::endl);
        
		CurrentChrMask.resize(currentChromosome->getCompSize());
		for (unsigned int i = 0; i < currentChromosome->getCompSize(); i++) {
            CurrentChrMask[i] = 'N';
		}
        
		userSettings->getRegion()->SetRegion(Bed_ChrName, Bed_start, Bed_end);
		LoopingSearchWindow currentWindow( userSettings->getRegion(), currentChromosome, WINDOW_SIZE, Bed_start, Bed_end );
        
        // loop over one bed region
        do {
            result = load_discordant_reads(mei_data, current_state.bams_to_parse, currentChromosome->getName(),
                                           currentWindow, userSettings);
            if (result) {
                // something went wrong loading the reads, return error code.
                return result;
            }
            
            std::map<std::string, size_t>::iterator name_match;
            size_t disc_read_count = mei_data.discordant_reads.size();
            for (size_t i = 0; i < disc_read_count; i++) {
                // Determine event and strand for which mate is evidence.
                tmp_basename = base_read_name(mei_data.discordant_reads.at(i)->name);
                int event_idx = -1;
                char strand = Plus;
                name_match = fwd_name_links.find(tmp_basename);
                if (name_match != fwd_name_links.end()) {
                    // Current read referenced by a DD event near bp on fwd strand.
                    event_idx = (*name_match).second;
                } else {
                    name_match = rev_name_links.find(tmp_basename);
                    if (name_match != rev_name_links.end()) {
                        // Current read referenced by a DD event near bp on rev strand.
                        event_idx = (*name_match).second;
                        strand = Minus;
                    }
                }
                
                if (event_idx == -1) {
                    // No match found, this read is not related to an event.
                    continue;
                }
                
                if (exclude_names.find(mei_data.discordant_reads.at(i)->name) != exclude_names.end()) {
                    // read name in exlude list, this is one of the reads we used for calling
                    // the breakpoint, skip it!
                    continue;
                }
                
                if (strand == Plus) {
                    insertion_events.at(event_idx).fwd_mapping_reads.push_back(*(mei_data.discordant_reads.at(i)));
                } else {
                    insertion_events.at(event_idx).rev_mapping_reads.push_back(*(mei_data.discordant_reads.at(i)));
                }
            }
            
            cleanup_reads(mei_data.discordant_reads);
			
            currentWindow.next();
        } while (!currentWindow.finished());
	}
    
    return 0;
}
Exemple #11
0
// Returns a breakpoint for a cluster of connected reads.  If no viable
// breakpoint can be found, it returns a breakpoint with position -1.
// Note: returned pointer must be deleted by caller.
static void get_breakpoints(std::vector<simple_read*>& cluster, std::vector<bam_info>& bam_sources, int insert_size,
                            int cluster_tid, char cluster_strand, const Chromosome* chromosome, 
                            std::map<std::string, std::string>& sample_dict, std::vector<MEI_breakpoint>& breakpoints,
                            UserDefinedSettings* userSettings) {
    std::vector<SPLIT_READ> split_reads;
    int outer_read_pos = (cluster_strand == Minus)? cluster.at(cluster.size()-1)->pos : cluster.at(0)->pos;
//    int inner_read_pos = (cluster_strand == Minus)? cluster.at(0)->pos : cluster.at(cluster.size()-1)->pos;
    get_split_reads_for_cluster(bam_sources, cluster_strand, outer_read_pos, chromosome, split_reads);
    
    // Search for split reads with a mate close to the outer read of the
    // cluster.  Store candidate breakpoints.
    // Todo: speedup by exploiting the fact that both clusters and split reads are sorted
    // by mapping location.
    std::map<int, std::vector<simple_read> > bio_candidate_breakpoints;
    for (size_t i = 0; i < split_reads.size(); i++) {
        SPLIT_READ read = split_reads.at(i);
        
        char anchor_strand = read.MatchedD;
        if (cluster_strand != anchor_strand) {
            continue;
        }
        
        unsigned int comp_candidate_bp = read.getLastAbsLocCloseEnd();
        unsigned int bio_candidate_bp = get_bio_chr_index(comp_candidate_bp);
        
        if (bio_candidate_breakpoints.find(bio_candidate_bp) == bio_candidate_breakpoints.end()) {
            // New candidate, look ahead to check whether there are enough supporting split reads.
            int SR_support = 1;
            for (size_t j = i + 1; j < split_reads.size(); j++) {
                if (split_reads.at(j).getLastAbsLocCloseEnd() == comp_candidate_bp && 
                    split_reads.at(j).MatchedD == cluster_strand) {
                    SR_support++;
                }
            }
            if (SR_support < userSettings->MIN_DD_BREAKPOINT_SUPPORT) {
                // Not enough support, skip it.
                continue;
            } else {
                std::vector<simple_read> new_bp_split_reads;
                bio_candidate_breakpoints.insert(std::make_pair(bio_candidate_bp, new_bp_split_reads));
            }
        }
        
        // Store the unmatched sequence as it should be matched on the opposite strand of
        // the mapped mate.
        std::string whole_sequence;
        std::string mapped_part;
        std::string unmapped_part;
        if (anchor_strand == Plus) {
            whole_sequence = ReverseComplement(read.getUnmatchedSeq());
            mapped_part = whole_sequence.substr(0, read.CloseEndLength);
            unmapped_part = whole_sequence.substr(read.CloseEndLength, whole_sequence.length());
        } else {
            whole_sequence = read.getUnmatchedSeq();
            mapped_part = whole_sequence.substr(whole_sequence.length() - read.CloseEndLength, 
                                                whole_sequence.length());
            unmapped_part = whole_sequence.substr(0, whole_sequence.length() - read.CloseEndLength);
        }

        std::string sample_name;
        get_sample_name(read.read_group, sample_dict, sample_name);
        simple_read simple_split_read(read.Name, -1, -1, '?', sample_name, whole_sequence, mapped_part, 
                                      unmapped_part);
        (*bio_candidate_breakpoints.find(bio_candidate_bp)).second.push_back(simple_split_read);
    }
    
  
    char SR_mapping_strand = (cluster_strand == Plus)? Minus : Plus;
    
    // Remove any split reads for which a far end can be found locally, these are
    // assumed to contribute to some local variants.
    // Todo: determine region that is searched for far end.
    std::map<int, std::vector<simple_read> >::iterator map_iter;
    for (map_iter = bio_candidate_breakpoints.begin(); map_iter != bio_candidate_breakpoints.end(); ++map_iter) {
        
        std::string mapped_consensus = get_consensus_unmapped((*map_iter).second, SR_mapping_strand);
        std::vector<simple_read> sreads = (*map_iter).second;
        if (mapped_consensus.length() == 0) {
            LOG_DEBUG(*logStream << time_log() << "Consensus building failed for split read mapping ends (" << 
                      map_iter->second.size() << " reads @ " << map_iter->first << ")" << std::endl);
            continue;
        }
        int bio_bp = (*map_iter).first;
                
        // If far end consensus is not found in local window, store breakpoint.
        size_t FE_window_start = std::max(0, get_comp_chr_index(bio_bp) - userSettings->MIN_DD_MAP_DISTANCE);
        size_t FE_window_size = std::min(chromosome->getCompSize() - (unsigned) FE_window_start, 
                                         2 * (unsigned) userSettings->MIN_DD_MAP_DISTANCE);
        if (!contains_subseq_any_strand(mapped_consensus, chromosome->getSeq().substr(FE_window_start, 
                FE_window_size), MIN_CONSENSUS_LENGTH)) {
            MEI_breakpoint bp(cluster_tid, bio_bp, cluster_strand);
            bp.associated_split_reads = (*map_iter).second;

            // Link associated discordant reads (all reads from cluster) and split reads.
            std::vector<simple_read*>::iterator read_iter;
            for (read_iter = cluster.begin(); read_iter != cluster.end(); ++read_iter) {
                bp.associated_reads.push_back(*(*read_iter));
            }
            breakpoints.push_back(bp);
        }
    }
}