Ejemplo n.º 1
0
// reads must ALWAYS be requested in increasing order of their ID
bool ReadStream::getRead(uint64_t r_id,
		Read& read,
		bool strip_slash,
		uint64_t begin_id,
		uint64_t end_id,
		/*
		GBamWriter* um_out, //unmapped reads output
		char um_code, //if non-zero, write the found read to um_out with this code
		int64_t* unmapped_counter, //update this counter for unmapped/skipped reads *only*
		int64_t* multimapped_counter //update this counter for too multi-mapped reads
		*/
		GetReadProc* rProc,
		bool is_unmapped )
 {
	if (!fstream.file)
		err_die("Error: calling ReadStream::getRead() with no file handle!");
	if (r_id<last_id)
		err_die("Error: ReadStream::getRead() called with out-of-order id#!");
	last_id=r_id;
	bool found=false;
	read.clear();
	while (!found) {
		QReadData rdata;
		if (!next_read(rdata))
			break;
		/*
    if (strip_slash) {
       string::size_type slash = rdata.read.name.rfind("/");
       if (slash != string::npos)
          rdata.read.name.resize(slash);
       }
    uint64_t id = (uint64_t)atol(read.name.c_str());
		 */
		if (rdata->id >= end_id)
			return false;

		if (rdata->id < begin_id)
			continue; //silently skip until begin_id found
		//does not trigger rProc->process() until begin_id

		if (rdata->id == r_id)
		{
			read=rdata->read; //it will be returned
			found=true;
		}
		else if (rdata->id > r_id)
		{ //can't find it, went too far
			//only happens when reads [mates] were removed for some reason
			//read_pq.push(make_pair(id, read));
			read_pq.push(rdata);
			break;
		}
		if (rProc) { //skipped read processing (unmapped reads)
			if (!rProc->process(rdata, found)) //, is_unmapped))
				//  rProc->process() should normally return TRUE
				return false; //abort search for r_id, return "not found"
		}
	} //while target read id not found
	return found;
}
Ejemplo n.º 2
0
void bam2Read(bam1_t *b, Read& rd, bool alt_name=false) {
  GBamRecord bamrec(b);
  rd.clear();
  rd.seq=bamrec.seqData(&rd.qual);
  rd.name=bam1_qname(b);
  if (alt_name)
    rd.alt_name=bamrec.tag_str("ZN");
}
Ejemplo n.º 3
0
bool next_fastx_read(FLineReader& fr, Read& read, ReadFormat rd_format,
                        FLineReader* frq) {
  /*
  if (fr.pushed_read)
    {
      read = fr.last_read;
      fr.pushed_read = false;
      return true;
    }
  */
  read.clear();
  char* buf=NULL;
  while ((buf=fr.nextLine())!=NULL) {
    if (buf[0]==0) continue; //skip empty lines
    if ((rd_format == FASTA && buf[0] == '>') ||
          (rd_format == FASTQ && (buf[0] == '+' || buf[0] == '@'))) { //next record
        if (read.seq.length()>0) { //current record ending
           fr.pushBack();
           break;
           }
        read.name=buf+1;
        string::size_type space_pos = read.name.find_first_of(" \t");
        if (space_pos != string::npos) {
            read.name.resize(space_pos);
            }
        continue;
        } //defline
    // sequence line
    read.seq.append(buf);
    } //line reading loop

  replace(read.seq.begin(), read.seq.end(), '.', color ? '4' : 'N'); //shouldn't really be needed for FASTA files
  if (rd_format != FASTQ && frq==NULL)
      return (!read.seq.empty());
  if (frq==NULL) frq=&fr; //FASTQ
  //FASTQ or quals in a separate file -- now read quality values
  buf=frq->nextLine();
  if (buf==NULL) return false;
  while (buf[0]==0) { //skip empty lines
    buf=frq->nextLine();
    if (buf==NULL) return false;
    }
  //must be on '+' line here
  if (buf==NULL || (rd_format == FASTQ && buf[0] != '+') ||
           (rd_format == FASTA && buf[0] != '>')) {
     err_exit("Error: beginning of quality values record not found! (%s)\n",buf);
     return false;
     }
  read.alt_name=buf+1;
  string::size_type space_pos = read.alt_name.find_first_of(" \t");
  if (space_pos != string::npos) read.alt_name.resize(space_pos);
   //read qv line(s) now:
  while ((buf=frq->nextLine())!=NULL) {
    if (integer_quals)
      {
        vector<string> integer_qual_values;
        tokenize(string(buf), " ", integer_qual_values);
        string temp_qual;
        for (size_t i = 0; i < integer_qual_values.size(); ++i)
          {
            int qual_value = atoi(integer_qual_values[i].c_str());
            if (qual_value < 0) qual_value = 0;
            temp_qual.push_back((char)(qual_value + 33));
          }
        read.qual.append(temp_qual);
      }
    else {
        read.qual.append(buf);
      }
    if (read.qual.length()>=read.seq.length()-1)
          break;
    } //while qv lines
  
  // final check
  if (color) {
     if (read.seq.length()==read.qual.length()) {
        //discard first qv
        read.qual=read.qual.substr(1);
        }
     if (read.seq.length()!=read.qual.length()+1) {
        err_exit("Error: length of quality string does not match sequence length (%d) for color read %s!\n",
            read.seq.length(), read.alt_name.c_str());
        }
     }
  else {
   if (read.seq.length()!=read.qual.length()) {
           err_exit("Error: qual length (%d) differs from seq length (%d) for fastq record %s!\n",
               read.qual.length(), read.seq.length(), read.alt_name.c_str());
           return false;
           }
    }

  //fr.last_read = read;
  return !(read.seq.empty());
}