示例#1
0
// Extracts junctions from all the SAM hits (based on REF_SKIPs) in the hit file
// resets the stream when finished.
void get_junctions_from_hits(HitStream& hit_stream, 
			     ReadTable& it, 
			     JunctionSet& junctions)
{
  HitsForRead curr_hit_group;
  hit_stream.next_read_hits(curr_hit_group);
  
  uint32_t curr_obs_order = it.observation_order(curr_hit_group.insert_id);
  
  while(curr_obs_order != 0xFFFFFFFF)
    {
      for (size_t i = 0; i < curr_hit_group.hits.size(); ++i)
	{
	  BowtieHit& bh = curr_hit_group.hits[i];
	  if (!bh.contiguous())
	    {
	      junctions_from_alignment(bh, junctions);
	    }
	  hit_stream.next_read_hits(curr_hit_group);
	  curr_obs_order = it.observation_order(curr_hit_group.insert_id);
	}
    }
  
  hit_stream.reset();
}
示例#2
0
void closure_driver(vector<FZPipe>& map1, 
		    vector<FZPipe>& map2, 
		    ifstream& ref_stream, 
		    FILE* juncs_file,
		    FILE* fusions_out)
{
  typedef RefSequenceTable::Sequence Reference;
  
  ReadTable it;
  RefSequenceTable rt(true);

  BowtieHitFactory hit_factory(it, rt);

  std::set<Fusion> fusions;
  
  fprintf (stderr, "Finding near-covered motifs...");
  CoverageMapVisitor cov_map_visitor(ref_stream, rt);
  uint32_t coverage_attempts = 0;
  
  assert(map1.size() == map2.size());
  for (size_t num = 0; num < map1.size(); ++num)
    {
      HitStream left_hs(map1[num].file, &hit_factory, false, true, false);
      HitStream right_hs(map2[num].file, &hit_factory, false, true, false);
      
      HitsForRead curr_left_hit_group;
      HitsForRead curr_right_hit_group;
      
      left_hs.next_read_hits(curr_left_hit_group);
      right_hs.next_read_hits(curr_right_hit_group);
      
      uint32_t curr_right_obs_order = it.observation_order(curr_left_hit_group.insert_id);
      uint32_t curr_left_obs_order = it.observation_order(curr_right_hit_group.insert_id);
      
      while(curr_left_obs_order != VMAXINT32 &&
	    curr_right_obs_order != VMAXINT32)
	{
	  while (curr_left_obs_order < curr_right_obs_order&&
		 curr_left_obs_order != VMAXINT32 && curr_right_obs_order != VMAXINT32)
	    {
	      // Get hit group
	      
	      left_hs.next_read_hits(curr_left_hit_group);
	      curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id);
	    }
	  
	  while (curr_left_obs_order > curr_right_obs_order &&
		 curr_left_obs_order != VMAXINT32 && curr_right_obs_order != VMAXINT32)
	    {
	      // Get hit group
	      
	      right_hs.next_read_hits(curr_right_hit_group);
	      curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id);
	    }
	  
	  while (curr_left_obs_order == curr_right_obs_order &&
		 curr_left_obs_order != VMAXINT32 && curr_right_obs_order != VMAXINT32)
	    {
	      if (num == 0)
		find_fusion_closure(curr_left_hit_group, curr_right_hit_group, fusions);
	      
	      if (coverage_attempts++ % 10000 == 0)
		fprintf (stderr, "Adding covered motifs from pair %d\n", coverage_attempts);

	      visit_best_pairing(curr_left_hit_group, curr_right_hit_group, cov_map_visitor);
	      
	      left_hs.next_read_hits(curr_left_hit_group);
	      curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id);
		    
	      right_hs.next_read_hits(curr_right_hit_group);
	      curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id);
	    }
	}
    }
  
  cov_map_visitor.finalize();
  fprintf (stderr, "done\n");
  
  ClosureJunctionSet fwd_splices;
  ClosureJunctionSet rev_splices;
  
  JunctionMapVisitor junc_map_visitor(fwd_splices, rev_splices, cov_map_visitor.finders);
  fprintf (stderr, "Searching for closures...");
  uint32_t closure_attempts = 0;
  
  for (size_t num = 0; num < map1.size(); ++num)
    {
      map1[num].rewind();
      map2[num].rewind();
      
      HitStream left_hs = HitStream(map1[num].file, &hit_factory, false, true, false);
      HitStream right_hs = HitStream(map2[num].file, &hit_factory, false, true, false);
      
      HitsForRead curr_left_hit_group;
      HitsForRead curr_right_hit_group;
      
      left_hs.next_read_hits(curr_left_hit_group);
      right_hs.next_read_hits(curr_right_hit_group);
      
      uint32_t curr_right_obs_order = it.observation_order(curr_left_hit_group.insert_id);
      uint32_t curr_left_obs_order = it.observation_order(curr_right_hit_group.insert_id);
      
      while(curr_left_obs_order != VMAXINT32 &&
	    curr_right_obs_order != VMAXINT32)
	{
	  while (curr_left_obs_order < curr_right_obs_order &&
		 curr_left_obs_order != VMAXINT32 && curr_right_obs_order != VMAXINT32)
	    {
	      // Get hit group
	      
	      left_hs.next_read_hits(curr_left_hit_group);
	      curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id);
	    }
	  
	  while (curr_left_obs_order > curr_right_obs_order &&
		 curr_left_obs_order != VMAXINT32 && curr_right_obs_order != VMAXINT32)
	    {
	      // Get hit group
	      
	      right_hs.next_read_hits(curr_right_hit_group);
	      curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id);
	    }
	  
	  while (curr_left_obs_order == curr_right_obs_order &&
		 curr_left_obs_order != VMAXINT32 && curr_right_obs_order != VMAXINT32)
	    {	
	      if (closure_attempts++ % 10000 == 0)
		fprintf (stderr, "Trying to close pair %d\n", closure_attempts);

	      visit_best_pairing(curr_left_hit_group, curr_right_hit_group, junc_map_visitor);
	      left_hs.next_read_hits(curr_left_hit_group);
	      curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id);
	      
	      right_hs.next_read_hits(curr_right_hit_group);
	      curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id);
	    }
	}
    }

  for (size_t num = 0; num < map1.size(); ++num)
    {
      map1[num].close();
      map2[num].close();
    }
  
  fprintf(stderr, "%lu Forward strand splices\n", fwd_splices.size());
  fprintf(stderr, "%lu Reverse strand splices\n", rev_splices.size());
  
  fprintf (stderr, "done\n");
  uint32_t num_potential_splices = 0;
  fprintf (stderr, "Reporting possible junctions...");
  map<uint32_t, pair<JunctionMapVisitor::JunctionTable, JunctionMapVisitor::JunctionTable> >::iterator f_itr;
  f_itr = junc_map_visitor._finders.begin();
  
  ClosureJunctionSet::iterator j_itr;
  j_itr = fwd_splices.begin();
  while (j_itr != fwd_splices.end())
    {
      fprintf (juncs_file,"%s\t%u\t%u\t%c\n",
	       rt.get_name(j_itr->refid),
	       j_itr->left,j_itr->right,'+');
      ++num_potential_splices;
      ++j_itr;
    }
  
  j_itr = rev_splices.begin();
  while (j_itr != rev_splices.end())
    {
      fprintf (juncs_file,"%s\t%u\t%u\t%c\n",
	       rt.get_name(j_itr->refid),
	       j_itr->left,j_itr->right,'-');
      ++num_potential_splices;
      ++j_itr;
    }
  
  //accept_all_best_hits(best_status_for_inserts);
  fprintf(stderr, "done\n");
  fprintf(stderr, "Searched for closures between %d pairs\n", searched);
  fprintf(stderr, "Successfully closed %d pairs\n", closed);
  
  fprintf(stderr, "Found %d total possible splices\n", num_potential_splices);

  // daehwan
#if 0
  fprintf (stderr, "Reporting potential fusions...\n");
  if(fusions_out){
    for(std::set<Fusion>::iterator itr = fusions.begin(); itr != fusions.end(); ++itr){
      const char* ref_name1 = rt.get_name(itr->refid1);
      const char* ref_name2 = rt.get_name(itr->refid2);
      
      const char* dir = "";
      if (itr->dir == FUSION_FR)
	dir = "fr";
      else if(itr->dir == FUSION_RF)
	dir = "rf";
      else
	dir = "ff";
      
      fprintf(fusions_out,
	      "%s\t%d\t%s\t%d\t%s\n",
	      ref_name1,
	      itr->left,
	      ref_name2,
	      itr->right,
	      dir);
    }
    fclose(fusions_out);
  }else{
    fprintf(stderr, "Failed to open fusions file for writing\n");
  }
#endif
}
示例#3
0
void best_insert_mappings(uint64_t refid,
						  ReadTable& it,
						  /*const string& name,*/
						  HitList& hits1_in_ref,
						  HitList& hits2_in_ref,
						  BestInsertAlignmentTable& best_status_for_inserts,
						  bool prefer_shorter_pairs)
{	
	
	long chucked_for_shorter_pair = 0;
	std::set<size_t> marked;
	HitList::iterator last_good = hits2_in_ref.begin();
	
	for (size_t i = 0; i < hits1_in_ref.size(); ++i)
	{
		BowtieHit& h1 = hits1_in_ref[i];
		pair<HitList::iterator, HitList::iterator> range_pair;
		range_pair = equal_range(last_good, hits2_in_ref.end(),
								 h1, hit_insert_id_lt);
		bool found_hit = false;
		if (range_pair.first != range_pair.second)
			last_good = range_pair.first;
		
		uint32_t obs_order = it.observation_order(h1.insert_id());
		
		for (HitList::iterator f = range_pair.first;
			 f != range_pair.second;
			 ++f)
		{
			BowtieHit& h2 = *f;
			
			if (h1.insert_id() == h2.insert_id())
			{
				// max mate inner distance (genomic)
				int min_mate_inner_dist = inner_dist_mean - inner_dist_std_dev;
				if (max_mate_inner_dist == -1)
				{
					max_mate_inner_dist = inner_dist_mean + inner_dist_std_dev;
				}
				
				InsertAlignmentGrade s(h1, h2, min_mate_inner_dist, max_mate_inner_dist);
				
				pair<InsertAlignmentGrade, vector<InsertAlignment> >& insert_best
					= best_status_for_inserts[obs_order];
				InsertAlignmentGrade& current = insert_best.first;
				// Is the new status better than the current best one?
				if (current < s)
				{
					insert_best.second.clear();
					current = s;
					insert_best.second.push_back(InsertAlignment(refid, &h1, &h2));
				}
				else if (!(s < current))
				{
					if (prefer_shorter_pairs && current.num_mapped == 2)
					{
						pair<int, int> dc = pair_distances(*(insert_best.second[0].left_alignment), *(insert_best.second[0].right_alignment));
						pair<int, int> ds = pair_distances(h1,h2);
						if (ds.second < dc.second)
						{
							chucked_for_shorter_pair += insert_best.second.size();
							insert_best.second.clear();
							current = s;
							insert_best.second.push_back(InsertAlignment(refid, &h1, &h2));
						}
					}
					else
					{
						insert_best.second.push_back(InsertAlignment(refid, &h1, &h2));
					}
				}
				
				marked.insert(f - hits2_in_ref.begin());
				found_hit = true;
			}
			
		}
		if (!found_hit)
		{
			pair<InsertAlignmentGrade, vector<InsertAlignment> >& insert_best
			= best_status_for_inserts[obs_order];
			InsertAlignmentGrade& current = insert_best.first;	
			
			InsertAlignmentGrade s(h1);
			
			if (current < s)
			{	
				insert_best.second.clear();
				current = s;
				insert_best.second.push_back(InsertAlignment(refid, &h1, NULL));
			}
			else if (! (s < current))
			{
				insert_best.second.push_back(InsertAlignment(refid, &h1, NULL));
			}
			
		}
	}
	
	for (size_t i = 0; i < hits2_in_ref.size(); ++i)
	{
		BowtieHit& h2 = hits2_in_ref[i];
		uint32_t obs_order = it.observation_order(h2.insert_id());
		pair<InsertAlignmentGrade, vector<InsertAlignment> >& insert_best
			= best_status_for_inserts[obs_order];
		InsertAlignmentGrade& current = insert_best.first;	
		
		InsertAlignmentGrade s(h2);
		// Did we include h2 as part of a pairing already, or is this first time
		// we've seen it?  If so, it's a singleton.
		if (marked.find(i) == marked.end())
		{
			if (current < s)
			{
				insert_best.second.clear();
				current = s;
				insert_best.second.push_back(InsertAlignment(refid, NULL, &h2));
			}
			else if (! (s < current))
			{
				insert_best.second.push_back(InsertAlignment(refid, NULL, &h2));
			}
		}
	}	
	fprintf(stderr, "Chucked %ld pairs for shorter pairing of same mates\n", chucked_for_shorter_pair);
}