// reads must ALWAYS be requested in increasing order of their ID bool ReadStream::getRead(uint64_t r_id, Read& read, bool strip_slash, uint64_t begin_id, uint64_t end_id, /* GBamWriter* um_out, //unmapped reads output char um_code, //if non-zero, write the found read to um_out with this code int64_t* unmapped_counter, //update this counter for unmapped/skipped reads *only* int64_t* multimapped_counter //update this counter for too multi-mapped reads */ GetReadProc* rProc, bool is_unmapped ) { if (!fstream.file) err_die("Error: calling ReadStream::getRead() with no file handle!"); if (r_id<last_id) err_die("Error: ReadStream::getRead() called with out-of-order id#!"); last_id=r_id; bool found=false; read.clear(); while (!found) { QReadData rdata; if (!next_read(rdata)) break; /* if (strip_slash) { string::size_type slash = rdata.read.name.rfind("/"); if (slash != string::npos) rdata.read.name.resize(slash); } uint64_t id = (uint64_t)atol(read.name.c_str()); */ if (rdata->id >= end_id) return false; if (rdata->id < begin_id) continue; //silently skip until begin_id found //does not trigger rProc->process() until begin_id if (rdata->id == r_id) { read=rdata->read; //it will be returned found=true; } else if (rdata->id > r_id) { //can't find it, went too far //only happens when reads [mates] were removed for some reason //read_pq.push(make_pair(id, read)); read_pq.push(rdata); break; } if (rProc) { //skipped read processing (unmapped reads) if (!rProc->process(rdata, found)) //, is_unmapped)) // rProc->process() should normally return TRUE return false; //abort search for r_id, return "not found" } } //while target read id not found return found; }
void bam2Read(bam1_t *b, Read& rd, bool alt_name=false) { GBamRecord bamrec(b); rd.clear(); rd.seq=bamrec.seqData(&rd.qual); rd.name=bam1_qname(b); if (alt_name) rd.alt_name=bamrec.tag_str("ZN"); }
bool next_fastx_read(FLineReader& fr, Read& read, ReadFormat rd_format, FLineReader* frq) { /* if (fr.pushed_read) { read = fr.last_read; fr.pushed_read = false; return true; } */ read.clear(); char* buf=NULL; while ((buf=fr.nextLine())!=NULL) { if (buf[0]==0) continue; //skip empty lines if ((rd_format == FASTA && buf[0] == '>') || (rd_format == FASTQ && (buf[0] == '+' || buf[0] == '@'))) { //next record if (read.seq.length()>0) { //current record ending fr.pushBack(); break; } read.name=buf+1; string::size_type space_pos = read.name.find_first_of(" \t"); if (space_pos != string::npos) { read.name.resize(space_pos); } continue; } //defline // sequence line read.seq.append(buf); } //line reading loop replace(read.seq.begin(), read.seq.end(), '.', color ? '4' : 'N'); //shouldn't really be needed for FASTA files if (rd_format != FASTQ && frq==NULL) return (!read.seq.empty()); if (frq==NULL) frq=&fr; //FASTQ //FASTQ or quals in a separate file -- now read quality values buf=frq->nextLine(); if (buf==NULL) return false; while (buf[0]==0) { //skip empty lines buf=frq->nextLine(); if (buf==NULL) return false; } //must be on '+' line here if (buf==NULL || (rd_format == FASTQ && buf[0] != '+') || (rd_format == FASTA && buf[0] != '>')) { err_exit("Error: beginning of quality values record not found! (%s)\n",buf); return false; } read.alt_name=buf+1; string::size_type space_pos = read.alt_name.find_first_of(" \t"); if (space_pos != string::npos) read.alt_name.resize(space_pos); //read qv line(s) now: while ((buf=frq->nextLine())!=NULL) { if (integer_quals) { vector<string> integer_qual_values; tokenize(string(buf), " ", integer_qual_values); string temp_qual; for (size_t i = 0; i < integer_qual_values.size(); ++i) { int qual_value = atoi(integer_qual_values[i].c_str()); if (qual_value < 0) qual_value = 0; temp_qual.push_back((char)(qual_value + 33)); } read.qual.append(temp_qual); } else { read.qual.append(buf); } if (read.qual.length()>=read.seq.length()-1) break; } //while qv lines // final check if (color) { if (read.seq.length()==read.qual.length()) { //discard first qv read.qual=read.qual.substr(1); } if (read.seq.length()!=read.qual.length()+1) { err_exit("Error: length of quality string does not match sequence length (%d) for color read %s!\n", read.seq.length(), read.alt_name.c_str()); } } else { if (read.seq.length()!=read.qual.length()) { err_exit("Error: qual length (%d) differs from seq length (%d) for fastq record %s!\n", read.qual.length(), read.seq.length(), read.alt_name.c_str()); return false; } } //fr.last_read = read; return !(read.seq.empty()); }