Пример #1
0
void accept_all_junctions(JunctionSet& junctions,
			  const uint32_t refid)
{
  fprintf(stderr, "Accepting all junctions\n");
  for (JunctionSet::iterator itr = junctions.begin(); itr != junctions.end(); ++itr)
    {
      itr->second.accepted = true;
    }
}
Пример #2
0
void filter_junctions(JunctionSet& junctions)
{
  for (JunctionSet::iterator i = junctions.begin(); i != junctions.end(); ++i)
    {
      accept_if_valid(i->first, i->second);
    }
	
  knockout_shadow_junctions(junctions);
}
Пример #3
0
void validate_junctions(const JunctionSet& junctions)
{
  uint32_t invalid_juncs = 0;
  for (JunctionSet::const_iterator i = junctions.begin();
       i != junctions.end();
       ++i)
    {
      if (!i->first.valid())
	invalid_juncs++;
    }
  fprintf(stderr, "Found %d invalid junctions\n", invalid_juncs);
}
Пример #4
0
void print_junctions(FILE* junctions_out, 
		     const JunctionSet& junctions,
		     RefSequenceTable& ref_sequences)
{
  uint64_t junc_id = 1;
  fprintf(junctions_out, "track name=junctions description=\"TopHat junctions\"\n");
  for (JunctionSet::const_iterator i = junctions.begin();
       i != junctions.end();
       ++i)
    {
      const pair<Junction, JunctionStats>& j_itr = *i; 
      const Junction& j = j_itr.first;
      const JunctionStats& s = j_itr.second;			
      
      assert(ref_sequences.get_name(j.refid));
      //fprintf(stdout,"%d\t%d\t%d\t%c\n", j.refid, j.left, j.right, j.antisense ? '-' : '+');
      print_junction(junctions_out, 
		     ref_sequences.get_name(j.refid),
		     j,
		     s, 
		     junc_id++);
    }
  //fprintf(stderr, "Rejected %d / %d alignments, %d / %d spliced\n", rejected, total, rejected_spliced, total_spliced);
}
Пример #5
0
void driver(const vector<FILE*>& splice_coords_files,
	    const vector<FILE*>& insertion_coords_files,
	    const vector<FILE*>& deletion_coords_files,
	    const vector<FILE*>& fusion_coords_files, 
	    ifstream& ref_stream)
{	
	char splice_buf[2048];
	RefSequenceTable rt(sam_header, true);
	get_seqs(ref_stream, rt, true);

	JunctionSet junctions;
	for (size_t i = 0; i < splice_coords_files.size(); ++i)
	{
		FILE* splice_coords = splice_coords_files[i];
		if (!splice_coords)
			continue;
		while (fgets(splice_buf, 2048, splice_coords))
		{
			char* nl = strrchr(splice_buf, '\n');
			char* buf = splice_buf;
			if (nl) *nl = 0;
			
			/**
			 Fields are:
			 1) reference name
			 2) left coord of splice (last char of the left exon)
			 3) right coord of splice (first char of the right exon)
			 */
			
			char* ref_name                   = get_token((char**)&buf, "\t");
			char* scan_left_coord            = get_token((char**)&buf, "\t");
			char* scan_right_coord           = get_token((char**)&buf, "\t");
			char* orientation				 = get_token((char**)&buf, "\t");
			
			if (!scan_left_coord || !scan_right_coord || !orientation)
			{
				fprintf(stderr,"Error: malformed splice coordinate record\n");
				exit(1);
			}
			uint32_t ref_id = rt.get_id(ref_name, NULL, 0);
			uint32_t left_coord = atoi(scan_left_coord);
			uint32_t right_coord = atoi(scan_right_coord);
			bool antisense = *orientation == '-';
			junctions.insert(make_pair<Junction, JunctionStats>(Junction(ref_id, left_coord, right_coord, antisense), JunctionStats()));
		}
	}


	/*
	 * Read in the deletion coordinates
	 * and store in a set
	 */	
	std::set<Deletion> deletions;
	for(size_t i=0; i < deletion_coords_files.size(); ++i){
		FILE* deletion_coords = deletion_coords_files[i];
		if(!deletion_coords){
			continue;
		} 
		while (fgets(splice_buf, 2048, deletion_coords))
		{
			char* nl = strrchr(splice_buf, '\n');
			char* buf = splice_buf;
			if (nl) *nl = 0;
			
			/**
			 Fields are:
			 1) reference name
			 2) left coord of splice (last char of the left exon)
			 3) right coord of splice (first char of the right exon)
			 */
			
			char* ref_name                   = get_token((char**)&buf, "\t");
			char* scan_left_coord            = get_token((char**)&buf, "\t");
			char* scan_right_coord           = get_token((char**)&buf, "\t");
			
			if (!scan_left_coord || !scan_right_coord)
			{
				fprintf(stderr,"Error: malformed deletion coordinate record\n");
				exit(1);
			}

			/*
			 * Note that when reading in a deletion, the left co-ord is the position of the 
			 * first deleted based. Since we are co-opting the junction data structure, need
			 * to fix up this location
			 */
			uint32_t ref_id = rt.get_id(ref_name, NULL, 0);
			uint32_t left_coord = atoi(scan_left_coord);
			uint32_t right_coord = atoi(scan_right_coord);
			deletions.insert(Deletion(ref_id, left_coord - 1, right_coord, false));
		}
	}

	/*
	 * Read in the insertion coordinates
	 * and store in a set
	 */
	std::set<Insertion> insertions;
	for(size_t i=0; i < insertion_coords_files.size(); ++i){
		FILE* insertion_coords = insertion_coords_files[i];
		if(!insertion_coords){
			continue;
		} 
		while(fgets(splice_buf, 2048, insertion_coords)){
			char* nl = strrchr(splice_buf, '\n');
			char* buf = splice_buf;
			if (nl) *nl = 0;
			
			char* ref_name = get_token((char**)&buf, "\t");
			char* scan_left_coord = get_token((char**)&buf, "\t");
			char* scan_right_coord = get_token((char**)&buf, "\t");
			char* scan_sequence = get_token((char**)&buf, "\t");

			if (!scan_left_coord || !scan_sequence || !scan_right_coord)
			{
				fprintf(stderr,"Error: malformed insertion coordinate record\n");
				exit(1);
			}
			
			seqan::Dna5String sequence = seqan::Dna5String(scan_sequence);
			bool containsN = false;
			for(size_t index = 0; index < seqan::length(sequence); index += 1){
				/*
				 * Don't allow any ambiguities in the insertion
				 */
				if(sequence[index] == 'N'){
					containsN = true;
					break;	
				}
			}
			if(containsN){
				continue;
			}
			seqan::CharString charSequence = sequence;
			uint32_t ref_id = rt.get_id(ref_name,NULL,0);
			uint32_t left_coord = atoi(scan_left_coord);
			insertions.insert(Insertion(ref_id, left_coord, seqan::toCString(charSequence)));
		}
	}

	std::set<Fusion> fusions;
	for(size_t i=0; i < fusion_coords_files.size(); ++i){
		FILE* fusion_coords = fusion_coords_files[i];
		if(!fusion_coords){
			continue;
		} 
		while(fgets(splice_buf, 2048, fusion_coords)){
			char* nl = strrchr(splice_buf, '\n');
			char* buf = splice_buf;
			if (nl) *nl = 0;
			
			char* ref_name1 = strsep((char**)&buf, "\t");
			char* scan_left_coord = strsep((char**)&buf, "\t");
			char* ref_name2 = strsep((char**)&buf, "\t");
			char* scan_right_coord = strsep((char**)&buf, "\t");
			char* scan_dir = strsep((char**)&buf, "\t");

			if (!ref_name1 || !scan_left_coord || !ref_name2 || !scan_right_coord || !scan_dir)
			{
			  fprintf(stderr,"Error: malformed insertion coordinate record\n");
			  exit(1);
			}
			
			uint32_t ref_id1 = rt.get_id(ref_name1, NULL, 0);
			uint32_t ref_id2 = rt.get_id(ref_name2, NULL, 0);
			uint32_t left_coord = atoi(scan_left_coord);
			uint32_t right_coord = atoi(scan_right_coord);
			uint32_t dir = FUSION_FF;
			if (strcmp(scan_dir, "fr") == 0)
			  dir = FUSION_FR;
			else if(strcmp(scan_dir, "rf") == 0)
			  dir = FUSION_RF;
			else if(strcmp(scan_dir, "rr") == 0)
			  dir = FUSION_RR;
		  
			fusions.insert(Fusion(ref_id1, ref_id2, left_coord, right_coord, dir));
		}
	}

	{
	  JunctionSet::iterator itr = junctions.begin();
	  for (; itr != junctions.end(); ++itr)
	    {
	      RefSequenceTable::Sequence* ref_str = rt.get_seq(itr->first.refid);
	      if (ref_str == NULL) continue;
	      
	      const char* name = rt.get_name(itr->first.refid);
	      print_splice(itr->first, read_length, itr->first.antisense ? "GTAG|rev" : "GTAG|fwd", *ref_str, name, cout);
	    }
	}

	{
	  std::set<Deletion>::iterator itr = deletions.begin();
	  for (; itr != deletions.end(); ++itr)
	    {
	      RefSequenceTable::Sequence* ref_str = rt.get_seq(itr->refid);
	      if (ref_str == NULL) continue;
	      
	      const char* name = rt.get_name(itr->refid);
	      print_splice((Junction)*itr, read_length, itr->antisense ? "del|rev" : "del|fwd", *ref_str, name, cout);
	    }
	}

	{
	  std::set<Insertion>::iterator itr  = insertions.begin();
	  for (; itr != insertions.end(); ++itr){
	    RefSequenceTable::Sequence* ref_str = rt.get_seq(itr->refid);
	    if (ref_str == NULL) continue;
	    
	    const char* name = rt.get_name(itr->refid);
	    print_insertion(*itr, read_length, *ref_str, name, cout);	
	  }
	}

	{
	  std::set<Fusion>::iterator itr = fusions.begin();
	  for (; itr != fusions.end(); ++itr){
	    RefSequenceTable::Sequence* left_ref_str = rt.get_seq(itr->refid1);
	    RefSequenceTable::Sequence* right_ref_str = rt.get_seq(itr->refid2);

	    if (left_ref_str == NULL || right_ref_str == NULL) continue;
	    
	    const char* left_ref_name = rt.get_name(itr->refid1);
	    const char* right_ref_name = rt.get_name(itr->refid2);
	    print_fusion(*itr, read_length, *left_ref_str, *right_ref_str, left_ref_name, right_ref_name, cout);	
	  }
	}
}
Пример #6
0
void knockout_shadow_junctions(JunctionSet& junctions)
{
  vector<uint32_t> ref_ids;
  
  for (JunctionSet::iterator i = junctions.begin(); i != junctions.end(); ++i)
    {
      ref_ids.push_back(i->first.refid);
    }
  
  sort(ref_ids.begin(), ref_ids.end());
  vector<uint32_t>::iterator new_end = unique(ref_ids.begin(), ref_ids.end());
  ref_ids.erase(new_end, ref_ids.end());
  
  for(size_t i = 0; i < ref_ids.size(); ++i)
    {
      uint32_t refid = ref_ids[i];
		
      Junction dummy_left(refid, 0, 0, true);
      Junction dummy_right(refid, 0xFFFFFFFF, 0xFFFFFFFF, true);
      
      pair<JunctionSet::iterator, JunctionSet::iterator> r;
      r.first = junctions.lower_bound(dummy_left);
      r.second = junctions.upper_bound(dummy_right);
      
      JunctionSet::iterator itr = r.first;
      
      while(itr != r.second && itr != junctions.end())
	{
	  if (itr->second.accepted)
	    {
	      Junction fuzzy_left = itr->first;
	      Junction fuzzy_right = itr->first;
	      fuzzy_left.left -= min_anchor_len;
	      fuzzy_right.right += min_anchor_len;
	      fuzzy_left.antisense = !itr->first.antisense;
	      fuzzy_right.antisense = !itr->first.antisense;
	      
	      pair<JunctionSet::iterator, JunctionSet::iterator> s;
	      s.first = junctions.lower_bound(fuzzy_left);
	      s.second = junctions.upper_bound(fuzzy_right);
	      JunctionSet::iterator itr2 = s.first;
	      
	      int junc_support = itr->second.supporting_hits;
	      
	      while(itr2 != s.second && itr2 != junctions.end())
		{
		  int left_diff = itr->first.left - itr2->first.left;
		  int right_diff = itr->first.right - itr2->first.right;
		  if (itr != itr2 && 
		      itr->first.antisense != itr2->first.antisense && 
		      (left_diff < min_anchor_len || right_diff < min_anchor_len))
		    {
		      if (junc_support < itr2->second.supporting_hits)
			itr->second.accepted = false;
		    }
		  ++itr2;
		}
	    }
	  ++itr;
	}
    }
}