Esempio n. 1
0
void driver(const vector<FILE*>& splice_coords_files,
			const vector<FILE*>& insertion_coords_files,
			const vector<FILE*>& deletion_coords_files, 
			ifstream& ref_stream)
{	
	char splice_buf[2048];
	RefSequenceTable rt(true);
	JunctionSet junctions;
	for (size_t i = 0; i < splice_coords_files.size(); ++i)
	{
		FILE* splice_coords = splice_coords_files[i];
		if (!splice_coords)
			continue;
		while (fgets(splice_buf, 2048, splice_coords))
		{
			char* nl = strrchr(splice_buf, '\n');
			char* buf = splice_buf;
			if (nl) *nl = 0;
			
			/**
			 Fields are:
			 1) reference name
			 2) left coord of splice (last char of the left exon)
			 3) right coord of splice (first char of the right exon)
			 */
			
			char* ref_name                   = get_token((char**)&buf, "\t");
			char* scan_left_coord            = get_token((char**)&buf, "\t");
			char* scan_right_coord           = get_token((char**)&buf, "\t");
			char* orientation				 = get_token((char**)&buf, "\t");
			
			if (!scan_left_coord || !scan_right_coord || !orientation)
			{
				fprintf(stderr,"Error: malformed splice coordinate record\n");
				exit(1);
			}
			uint32_t ref_id = rt.get_id(ref_name, NULL, 0);
			uint32_t left_coord = atoi(scan_left_coord);
			uint32_t right_coord = atoi(scan_right_coord);
			bool antisense = *orientation == '-';
			junctions.insert(make_pair<Junction, JunctionStats>(Junction(ref_id, left_coord, right_coord, antisense), JunctionStats()));
		}
	}


	/*
	 * Read in the deletion coordinates
	 * and store in a set
	 */	
	std::set<Deletion> deletions;
	for(size_t i=0; i < deletion_coords_files.size(); ++i){
		FILE* deletion_coords = deletion_coords_files[i];
		if(!deletion_coords){
			continue;
		} 
		while (fgets(splice_buf, 2048, deletion_coords))
		{
			char* nl = strrchr(splice_buf, '\n');
			char* buf = splice_buf;
			if (nl) *nl = 0;
			
			/**
			 Fields are:
			 1) reference name
			 2) left coord of splice (last char of the left exon)
			 3) right coord of splice (first char of the right exon)
			 */
			
			char* ref_name                   = get_token((char**)&buf, "\t");
			char* scan_left_coord            = get_token((char**)&buf, "\t");
			char* scan_right_coord           = get_token((char**)&buf, "\t");
			
			if (!scan_left_coord || !scan_right_coord)
			{
				fprintf(stderr,"Error: malformed deletion coordinate record\n");
				exit(1);
			}

			/*
			 * Note that when reading in a deletion, the left co-ord is the position of the 
			 * first deleted based. Since we are co-opting the junction data structure, need
			 * to fix up this location
			 */
			uint32_t ref_id = rt.get_id(ref_name, NULL, 0);
			uint32_t left_coord = atoi(scan_left_coord);
			uint32_t right_coord = atoi(scan_right_coord);
			deletions.insert(Deletion(ref_id, left_coord - 1, right_coord, false));
		}
	}

	/*
	 * Read in the insertion coordinates
	 * and store in a set
	 */
	std::set<Insertion> insertions;
	for(size_t i=0; i < insertion_coords_files.size(); ++i){
		FILE* insertion_coords = insertion_coords_files[i];
		if(!insertion_coords){
			continue;
		} 
		while(fgets(splice_buf, 2048, insertion_coords)){
			char* nl = strrchr(splice_buf, '\n');
			char* buf = splice_buf;
			if (nl) *nl = 0;
			
			char* ref_name = get_token((char**)&buf, "\t");
			char* scan_left_coord = get_token((char**)&buf, "\t");
			char* scan_right_coord = get_token((char**)&buf, "\t");
			char* scan_sequence = get_token((char**)&buf, "\t");

			if (!scan_left_coord || !scan_sequence || !scan_right_coord)
			{
				fprintf(stderr,"Error: malformed insertion coordinate record\n");
				exit(1);
			}
			
			seqan::Dna5String sequence = seqan::Dna5String(scan_sequence);
			bool containsN = false;
			for(size_t index = 0; index < seqan::length(sequence); index += 1){
				/*
				 * Don't allow any ambiguities in the insertion
				 */
				if(sequence[index] == 'N'){
					containsN = true;
					break;	
				}
			}
			if(containsN){
				continue;
			}
			seqan::CharString charSequence = sequence;
			uint32_t ref_id = rt.get_id(ref_name,NULL,0);
			uint32_t left_coord = atoi(scan_left_coord);
			insertions.insert(Insertion(ref_id, left_coord, seqan::toCString(charSequence)));
		}
	}


	typedef RefSequenceTable::Sequence Reference;
	
	while(ref_stream.good() && 
		  !ref_stream.eof()) 
	{
		Reference ref_str;
		string name;

		readMeta(ref_stream, name, Fasta());
		string::size_type space_pos = name.find_first_of(" \t\r");
		if (space_pos != string::npos)
		{
			name.resize(space_pos);
		}
		
		read(ref_stream, ref_str, Fasta());
		
		uint32_t refid = rt.get_id(name, NULL, 0);
		Junction dummy_left(refid, 0, 0, true);
		Junction dummy_right(refid, VMAXINT32, VMAXINT32, true);
		
		pair<JunctionSet::iterator, JunctionSet::iterator> r;
		r.first = junctions.lower_bound(dummy_left);
		r.second = junctions.upper_bound(dummy_right);
		
		JunctionSet::iterator itr = r.first;
		
		while(itr != r.second && itr != junctions.end())
		{
			print_splice(itr->first, read_length, itr->first.antisense ? "GTAG|rev" : "GTAG|fwd", ref_str, name, cout);
			++itr;
		}
	}


	ref_stream.clear();
	ref_stream.seekg(0, ios::beg);


	while(ref_stream.good() && 
		  !ref_stream.eof()) 
	{
		Reference ref_str;
		string name;

		readMeta(ref_stream, name, Fasta());
		string::size_type space_pos = name.find_first_of(" \t\r");
		if (space_pos != string::npos)
		{
			name.resize(space_pos);
		}
		
		read(ref_stream, ref_str, Fasta());
		
		uint32_t refid = rt.get_id(name, NULL,0);
		Deletion dummy_left(refid, 0, 0, true);
		Deletion dummy_right(refid, VMAXINT32, VMAXINT32, true);
		
		pair<std::set<Deletion>::iterator, std::set<Deletion>::iterator> r;
		r.first = deletions.lower_bound(dummy_left);
		r.second = deletions.upper_bound(dummy_right);
		
		std::set<Deletion>::iterator itr = r.first;
		
		while(itr != r.second && itr != deletions.end())
		{
			print_splice((Junction)*itr, read_length, itr->antisense ? "del|rev" : "del|fwd", ref_str, name, cout);
			++itr;
		}
	}

	ref_stream.clear();
	ref_stream.seekg(0, ios::beg);



	while(ref_stream.good() && 
		  !ref_stream.eof()) 
	{
		Reference ref_str;
		string name;

		readMeta(ref_stream, name, Fasta());
		string::size_type space_pos = name.find_first_of(" \t\r");
		if (space_pos != string::npos)
		{
			name.resize(space_pos);
		}
		
		read(ref_stream, ref_str, Fasta());
		
		uint32_t refid = rt.get_id(name, NULL,0);
		Insertion dummy_left(refid, 0, "");
		Insertion dummy_right(refid, VMAXINT32, "");
	
		std::set<Insertion>::iterator itr = insertions.lower_bound(dummy_left);
		std::set<Insertion>::iterator upper   = insertions.upper_bound(dummy_right);

		while(itr != upper && itr != insertions.end()){
			print_insertion(*itr, read_length, ref_str, name, cout);	
			++itr;
		}	
	}

}
Esempio n. 2
0
void knockout_shadow_junctions(JunctionSet& junctions)
{
  vector<uint32_t> ref_ids;
  
  for (JunctionSet::iterator i = junctions.begin(); i != junctions.end(); ++i)
    {
      ref_ids.push_back(i->first.refid);
    }
  
  sort(ref_ids.begin(), ref_ids.end());
  vector<uint32_t>::iterator new_end = unique(ref_ids.begin(), ref_ids.end());
  ref_ids.erase(new_end, ref_ids.end());
  
  for(size_t i = 0; i < ref_ids.size(); ++i)
    {
      uint32_t refid = ref_ids[i];
		
      Junction dummy_left(refid, 0, 0, true);
      Junction dummy_right(refid, 0xFFFFFFFF, 0xFFFFFFFF, true);
      
      pair<JunctionSet::iterator, JunctionSet::iterator> r;
      r.first = junctions.lower_bound(dummy_left);
      r.second = junctions.upper_bound(dummy_right);
      
      JunctionSet::iterator itr = r.first;
      
      while(itr != r.second && itr != junctions.end())
	{
	  if (itr->second.accepted)
	    {
	      Junction fuzzy_left = itr->first;
	      Junction fuzzy_right = itr->first;
	      fuzzy_left.left -= min_anchor_len;
	      fuzzy_right.right += min_anchor_len;
	      fuzzy_left.antisense = !itr->first.antisense;
	      fuzzy_right.antisense = !itr->first.antisense;
	      
	      pair<JunctionSet::iterator, JunctionSet::iterator> s;
	      s.first = junctions.lower_bound(fuzzy_left);
	      s.second = junctions.upper_bound(fuzzy_right);
	      JunctionSet::iterator itr2 = s.first;
	      
	      int junc_support = itr->second.supporting_hits;
	      
	      while(itr2 != s.second && itr2 != junctions.end())
		{
		  int left_diff = itr->first.left - itr2->first.left;
		  int right_diff = itr->first.right - itr2->first.right;
		  if (itr != itr2 && 
		      itr->first.antisense != itr2->first.antisense && 
		      (left_diff < min_anchor_len || right_diff < min_anchor_len))
		    {
		      if (junc_support < itr2->second.supporting_hits)
			itr->second.accepted = false;
		    }
		  ++itr2;
		}
	    }
	  ++itr;
	}
    }
}