void abcSmartCounts::changeChr(int newRefId){ if(doSmartCounts==0) return; // fprintf(stderr,"cur:%d new:%d\n",curChr,newRefId); if(curChr!=-1){ int64_t retVal =bgzf_tell(fbin); int clen = strlen(header->name[curChr]); bgzf_write(fbin,&clen,sizeof(int)); bgzf_write(fbin,header->name[curChr],clen); bgzf_write(fbin,&len,sizeof(int)); for(int i=0;i<4;i++) bgzf_write(fbin,counts[i],len);//write len of chr //write index stuff fprintf(stderr,"Writing index for chr: %s\n",header->name[curChr]); fwrite(&clen,sizeof(int),1,fidx); fwrite(header->name[curChr] ,sizeof(char),clen,fidx); fwrite(&len,sizeof(int),1,fidx); fwrite(&retVal,sizeof(int64_t),1,fidx); } curChr = newRefId; len = header->l_ref[curChr]; for(int i=0;i<4;i++){ delete [] counts[i]; counts[i] = new unsigned char[len]; memset(counts[i],0,len); } }
int64_t writeAll(std::vector<the_t> &thetas, char *chr,BGZF *fp){ fprintf(stderr,"\tWriting: chr:%s with nSites:%zu\n",chr,thetas.size()); int64_t retVal =bgzf_tell(fp); size_t clen=strlen(chr); bgzf_write(fp,&clen,sizeof(size_t));//write len of chr bgzf_write(fp,chr,clen);//write chr size_t vLen = thetas.size(); bgzf_write(fp,&vLen,sizeof(size_t));//write len of positions; int *posi = new int[thetas.size()]; static float **the = new float*[5]; for(int i=0;i<5;i++) the[i] = new float[thetas.size()]; for(size_t i=0;i<thetas.size();i++){ posi[i] =thetas[i].posi; for(int j=0;j<5;j++) the[j][i] = thetas[i].vals[j]; delete [] thetas[i].vals; } bgzf_write(fp,posi,sizeof(int)*thetas.size()); for(int j=0;j<5;j++){ bgzf_write(fp,the[j],sizeof(float)*thetas.size()); delete [] the[j]; } delete [] posi; fprintf(stderr,"\tDone writing: %s\n",chr); return retVal; }
abcSmartCounts::~abcSmartCounts(){ if(doSmartCounts==0) return; int64_t retVal =bgzf_tell(fbin); int clen = strlen(header->name[curChr]); bgzf_write(fbin,&clen,sizeof(int)); bgzf_write(fbin,header->name[curChr],clen); bgzf_write(fbin,&len,sizeof(int)); for(int i=0;i<4;i++) bgzf_write(fbin,counts[i],len);//write len of chr //write index stuff fwrite(&clen,sizeof(int),1,fidx); fwrite(header->name[curChr],sizeof(char),clen,fidx); fwrite(&len,sizeof(int),1,fidx); fwrite(&retVal,sizeof(int64_t),1,fidx); for(int i=0;i<4;i++) delete [] counts[i]; delete [] counts; fclose(fidx); bgzf_close(fbin); }
void VariantList::printToCompressedVCF(IHeader::SharedPtr headerPtr, bool printHeader, int out) { BGZF* fp = bgzf_dopen(out, "w"); if (printHeader) { bgzf_write(fp, headerPtr->getHeader().c_str(), headerPtr->getHeader().size()); } for(const auto variantPtr : this->m_variant_ptrs) { bgzf_write(fp, variantPtr->getVariantLine(headerPtr).c_str(), variantPtr->getVariantLine(headerPtr).size()); } bgzf_close(fp); }
int bcf_write1(BGZF *fp, const bcf1_t *v) { uint32_t x[8]; x[0] = v->shared.l + 24; // to include six 32-bit integers x[1] = v->indiv.l; memcpy(x + 2, v, 16); x[6] = (uint32_t)v->n_allele<<16 | v->n_info; x[7] = (uint32_t)v->n_fmt<<24 | v->n_sample; bgzf_write(fp, x, 32); bgzf_write(fp, v->shared.s, v->shared.l); bgzf_write(fp, v->indiv.s, v->indiv.l); return 0; }
int bam_reheader(BGZF *in, const bam_header_t *h, int fd) { BGZF *fp; bam_header_t *old; int len; uint8_t *buf; if (in->open_mode != 'r') return -1; buf = malloc(BUF_SIZE); old = bam_header_read(in); fp = bgzf_dopen(fd, "w"); bam_header_write(fp, h); if (in->block_offset < in->block_length) { bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset); bgzf_flush(fp); } #ifdef _USE_KNETFILE while ((len = knet_read(in->fp, buf, BUF_SIZE)) > 0) fwrite(buf, 1, len, fp->fp); #else while (!feof(in->fp) && (len = fread(buf, 1, BUF_SIZE, in->fp)) > 0) fwrite(buf, 1, len, fp->fp); #endif free(buf); fp->block_offset = in->block_offset = 0; bgzf_close(fp); return 0; }
void abcScounts::print(funkyPars *pars){ if(doScounts==0) return; for(int s=0;s<pars->numSites;s++){ if(pars->keepSites[s]!=0){ char tmpname[1024]; sprintf(tmpname,"%s %d",header->target_name[pars->refId],pars->posi[s]+1); aMap::iterator it = am.find(tmpname); if(it==am.end()){ fprintf(stderr,"\t-> problem finding site: %s\n",tmpname); continue; } counts cnts; if(pars->counts[s][0]>3||pars->counts[s][1]>3||pars->counts[s][2]>3||pars->counts[s][3]>3){ fprintf(stderr,"\t-> skipping posi tmpname:%s du to depth>3\n",tmpname); continue; } if(pars->counts[s][0]+pars->counts[s][1]+pars->counts[s][2]+pars->counts[3]==0) continue; cnts.rel_pos = it->second; // fprintf(stderr,"realpos: %d rel_pos:%d\n",pars->posi[s]+1,it->second); cnts.A = pars->counts[s][0]; cnts.C = pars->counts[s][1]; cnts.G = pars->counts[s][2]; cnts.T = pars->counts[s][3]; assert(sizeof(counts)==bgzf_write(outfile,&cnts,sizeof(counts)*1)); } } }
void BAMbinSortByCoordinate(uint32 iBin, uint binN, uint binS, uint nThreads, string dirBAMsort, Parameters *P) { if (binS==0) return; //nothing to do for empty bins //allocate arrays char *bamIn=new char[binS]; uint *startPos=new uint[binN*3]; uint bamInBytes=0; //load all aligns for (uint it=0; it<nThreads; it++) { string bamInFile=dirBAMsort+to_string(it)+"/"+to_string((uint) iBin); ifstream bamInStream (bamInFile.c_str()); bamInStream.read(bamIn+bamInBytes,binS);//read the whole file bamInBytes += bamInStream.gcount(); bamInStream.close(); remove(bamInFile.c_str()); }; if (bamInBytes!=binS) { ostringstream errOut; errOut << "EXITING because of FATAL ERROR: number of bytes expected from the BAM bin does not agree with the actual size on disk: "; errOut << binS <<" "<< bamInBytes <<" "<< iBin <<"\n"; exitWithError(errOut.str(),std::cerr, P->inOut->logMain, 1, *P); }; //extract coordinates for (uint ib=0,ia=0;ia<binN;ia++) { uint32 *bamIn32=(uint32*) (bamIn+ib); startPos[ia*3] =( ((uint) bamIn32[1]) << 32) | ( (uint)bamIn32[2] ); startPos[ia*3+2]=ib; ib+=bamIn32[0]+sizeof(uint32);//note that size of the BAM record does not include the size record itself startPos[ia*3+1]=*( (uint*) (bamIn+ib) ); //read order ib+=sizeof(uint); }; //sort qsort((void*) startPos, binN, sizeof(uint)*3, funCompareUint2); BGZF *bgzfBin; bgzfBin=bgzf_open((dirBAMsort+"/b"+to_string((uint) iBin)).c_str(),("w"+to_string((long long) P->outBAMcompression)).c_str()); outBAMwriteHeader(bgzfBin,P->samHeaderSortedCoord,P->chrName,P->chrLength); //send ordered aligns to bgzf one-by-one for (uint ia=0;ia<binN;ia++) { char* ib=bamIn+startPos[ia*3+2]; bgzf_write(bgzfBin,ib, *((uint32*) ib)+sizeof(uint32) ); }; bgzf_flush(bgzfBin); bgzf_close(bgzfBin); //release memory delete [] bamIn; delete [] startPos; };
void BAMoutput::unsortedOneAlign (char *bamIn, uint bamSize, uint bamSize2) {//record one alignment to the buffer, write buffer if needed if (binBytes1+bamSize2 > bamArraySize) {//write out this buffer if (g_threadChunks.threadBool) pthread_mutex_lock(&g_threadChunks.mutexOutSAM); bgzf_write(bgzfBAM,bamArray,binBytes1); if (g_threadChunks.threadBool) pthread_mutex_unlock(&g_threadChunks.mutexOutSAM); binBytes1=0;//rewind the buffer }; memcpy(bamArray+binBytes1, bamIn, bamSize); binBytes1 += bamSize; };
void outBAMwriteHeader (BGZF* fp, const string &samh, const vector <string> &chrn, const vector <uint> &chrl) { bgzf_write(fp,"BAM\001",4); int32 hlen=samh.size(); bgzf_write(fp,(char*) &hlen,sizeof(hlen)); bgzf_write(fp,samh.c_str(),hlen); int32 nchr=(int32) chrn.size(); bgzf_write(fp,(char*) &nchr,sizeof(nchr)); for (int32 ii=0;ii<nchr;ii++) { int32 rlen = (int32) (chrn.at(ii).size()+1); int32 slen = (int32) chrl[ii]; bgzf_write(fp,(char*) &rlen,sizeof(rlen)); bgzf_write(fp,chrn.at(ii).data(),rlen); //this includes \0 at the end of the string bgzf_write(fp,(char*) &slen,sizeof(slen)); }; bgzf_flush(fp); };
/* * Reads a file and outputs a new BAM file to fd with 'h' replaced as * the header. No checks are made to the validity. */ int bam_reheader(BGZF *in, bam_hdr_t *h, int fd, const char *arg_list, int add_PG) { BGZF *fp; ssize_t len; uint8_t *buf; if (in->is_write) return -1; buf = malloc(BUF_SIZE); if (bam_hdr_read(in) == NULL) { fprintf(stderr, "Couldn't read header\n"); free(buf); return -1; } fp = bgzf_fdopen(fd, "w"); if (add_PG) { // Around the houses, but it'll do until we can manipulate bam_hdr_t natively. SAM_hdr *sh = sam_hdr_parse_(h->text, h->l_text); if (sam_hdr_add_PG(sh, "samtools", "VN", samtools_version(), arg_list ? "CL": NULL, arg_list ? arg_list : NULL, NULL) != 0) return -1; free(h->text); h->text = strdup(sam_hdr_str(sh)); h->l_text = sam_hdr_length(sh); if (!h->text) return -1; sam_hdr_free(sh); } bam_hdr_write(fp, h); if (in->block_offset < in->block_length) { bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset); bgzf_flush(fp); } while ((len = bgzf_raw_read(in, buf, BUF_SIZE)) > 0) bgzf_raw_write(fp, buf, len); free(buf); fp->block_offset = in->block_offset = 0; bgzf_close(fp); return 0; }
int bam_reheader(BGZF *in, const bam_header_t *h, int fd) { BGZF *fp; bam_header_t *old; ssize_t len; uint8_t *buf; if (in->is_write) return -1; buf = malloc(BUF_SIZE); old = bam_header_read(in); fp = bgzf_fdopen(fd, "w"); bam_header_write(fp, h); if (in->block_offset < in->block_length) { bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset); bgzf_flush(fp); } while ((len = bgzf_raw_read(in, buf, BUF_SIZE)) > 0) bgzf_raw_write(fp, buf, len); free(buf); fp->block_offset = in->block_offset = 0; bgzf_close(fp); return 0; }
int main(int argc, char **argv) { int c, compress, pstdout, is_forced, index = 0, rebgzip = 0, reindex = 0; BGZF *fp; void *buffer; long start, end, size; char *index_fname = NULL; int threads = 1; static const struct option loptions[] = { {"help", no_argument, NULL, 'h'}, {"offset", required_argument, NULL, 'b'}, {"stdout", no_argument, NULL, 'c'}, {"decompress", no_argument, NULL, 'd'}, {"force", no_argument, NULL, 'f'}, {"index", no_argument, NULL, 'i'}, {"index-name", required_argument, NULL, 'I'}, {"reindex", no_argument, NULL, 'r'}, {"rebgzip",no_argument,NULL,'g'}, {"size", required_argument, NULL, 's'}, {"threads", required_argument, NULL, '@'}, {"version", no_argument, NULL, 1}, {NULL, 0, NULL, 0} }; compress = 1; pstdout = 0; start = 0; size = -1; end = -1; is_forced = 0; while((c = getopt_long(argc, argv, "cdh?fb:@:s:iI:gr",loptions,NULL)) >= 0){ switch(c){ case 'd': compress = 0; break; case 'c': pstdout = 1; break; case 'b': start = atol(optarg); compress = 0; pstdout = 1; break; case 's': size = atol(optarg); pstdout = 1; break; case 'f': is_forced = 1; break; case 'i': index = 1; break; case 'I': index_fname = optarg; break; case 'g': rebgzip = 1; break; case 'r': reindex = 1; compress = 0; break; case '@': threads = atoi(optarg); break; case 1: printf( "bgzip (htslib) %s\n" "Copyright (C) 2017 Genome Research Ltd.\n", hts_version()); return EXIT_SUCCESS; case 'h': case '?': return bgzip_main_usage(); } } if (size >= 0) end = start + size; if (end >= 0 && end < start) { fprintf(stderr, "[bgzip] Illegal region: [%ld, %ld]\n", start, end); return 1; } if (compress == 1) { struct stat sbuf; int f_src = fileno(stdin); if ( argc>optind ) { if ( stat(argv[optind],&sbuf)<0 ) { fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]); return 1; } if ((f_src = open(argv[optind], O_RDONLY)) < 0) { fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]); return 1; } if (pstdout) fp = bgzf_open("-", "w"); else { char *name = malloc(strlen(argv[optind]) + 5); strcpy(name, argv[optind]); strcat(name, ".gz"); fp = bgzf_open(name, is_forced? "w" : "wx"); if (fp == NULL && errno == EEXIST && confirm_overwrite(name)) fp = bgzf_open(name, "w"); if (fp == NULL) { fprintf(stderr, "[bgzip] can't create %s: %s\n", name, strerror(errno)); free(name); return 1; } free(name); } } else if (!pstdout && isatty(fileno((FILE *)stdout)) ) return bgzip_main_usage(); else if ( index && !index_fname ) { fprintf(stderr, "[bgzip] Index file name expected when writing to stdout\n"); return 1; } else fp = bgzf_open("-", "w"); if ( index && rebgzip ) { fprintf(stderr, "[bgzip] Can't produce a index and rebgzip simultaneously\n"); return 1; } if ( rebgzip && !index_fname ) { fprintf(stderr, "[bgzip] Index file name expected when writing to stdout\n"); return 1; } if (threads > 1) bgzf_mt(fp, threads, 256); if ( index ) bgzf_index_build_init(fp); buffer = malloc(WINDOW_SIZE); #ifdef _WIN32 _setmode(f_src, O_BINARY); #endif if (rebgzip){ if ( bgzf_index_load(fp, index_fname, NULL) < 0 ) error("Could not load index: %s.gzi\n", argv[optind]); while ((c = read(f_src, buffer, WINDOW_SIZE)) > 0) if (bgzf_block_write(fp, buffer, c) < 0) error("Could not write %d bytes: Error %d\n", c, fp->errcode); } else { while ((c = read(f_src, buffer, WINDOW_SIZE)) > 0) if (bgzf_write(fp, buffer, c) < 0) error("Could not write %d bytes: Error %d\n", c, fp->errcode); } if ( index ) { if (index_fname) { if (bgzf_index_dump(fp, index_fname, NULL) < 0) error("Could not write index to '%s'\n", index_fname); } else { if (bgzf_index_dump(fp, argv[optind], ".gz.gzi") < 0) error("Could not write index to '%s.gz.gzi'", argv[optind]); } } if (bgzf_close(fp) < 0) error("Close failed: Error %d", fp->errcode); if (argc > optind && !pstdout) unlink(argv[optind]); free(buffer); close(f_src); return 0; } else if ( reindex ) { if ( argc>optind ) { fp = bgzf_open(argv[optind], "r"); if ( !fp ) error("[bgzip] Could not open file: %s\n", argv[optind]); } else { if ( !index_fname ) error("[bgzip] Index file name expected when reading from stdin\n"); fp = bgzf_open("-", "r"); if ( !fp ) error("[bgzip] Could not read from stdin: %s\n", strerror(errno)); } buffer = malloc(BGZF_BLOCK_SIZE); bgzf_index_build_init(fp); int ret; while ( (ret=bgzf_read(fp, buffer, BGZF_BLOCK_SIZE))>0 ) ; free(buffer); if ( ret<0 ) error("Is the file gzipped or bgzipped? The latter is required for indexing.\n"); if ( index_fname ) { if (bgzf_index_dump(fp, index_fname, NULL) < 0) error("Could not write index to '%s'\n", index_fname); } else { if (bgzf_index_dump(fp, argv[optind], ".gzi") < 0) error("Could not write index to '%s.gzi'\n", argv[optind]); } if ( bgzf_close(fp)<0 ) error("Close failed: Error %d\n",fp->errcode); return 0; } else { struct stat sbuf; int f_dst; if ( argc>optind ) { if ( stat(argv[optind],&sbuf)<0 ) { fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]); return 1; } char *name; int len = strlen(argv[optind]); if ( strcmp(argv[optind]+len-3,".gz") ) { fprintf(stderr, "[bgzip] %s: unknown suffix -- ignored\n", argv[optind]); return 1; } fp = bgzf_open(argv[optind], "r"); if (fp == NULL) { fprintf(stderr, "[bgzip] Could not open file: %s\n", argv[optind]); return 1; } if (pstdout) { f_dst = fileno(stdout); } else { const int wrflags = O_WRONLY | O_CREAT | O_TRUNC; name = strdup(argv[optind]); name[strlen(name) - 3] = '\0'; f_dst = open(name, is_forced? wrflags : wrflags|O_EXCL, 0666); if (f_dst < 0 && errno == EEXIST && confirm_overwrite(name)) f_dst = open(name, wrflags, 0666); if (f_dst < 0) { fprintf(stderr, "[bgzip] can't create %s: %s\n", name, strerror(errno)); free(name); return 1; } free(name); } } else if (!pstdout && isatty(fileno((FILE *)stdin)) ) return bgzip_main_usage(); else { f_dst = fileno(stdout); fp = bgzf_open("-", "r"); if (fp == NULL) { fprintf(stderr, "[bgzip] Could not read from stdin: %s\n", strerror(errno)); return 1; } } if (threads > 1) bgzf_mt(fp, threads, 256); buffer = malloc(WINDOW_SIZE); if ( start>0 ) { if ( bgzf_index_load(fp, argv[optind], ".gzi") < 0 ) error("Could not load index: %s.gzi\n", argv[optind]); if ( bgzf_useek(fp, start, SEEK_SET) < 0 ) error("Could not seek to %d-th (uncompressd) byte\n", start); } #ifdef _WIN32 _setmode(f_dst, O_BINARY); #endif while (1) { if (end < 0) c = bgzf_read(fp, buffer, WINDOW_SIZE); else c = bgzf_read(fp, buffer, (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start)); if (c == 0) break; if (c < 0) error("Could not read %d bytes: Error %d\n", (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start), fp->errcode); start += c; if ( write(f_dst, buffer, c) != c ) { #ifdef _WIN32 if (GetLastError() != ERROR_NO_DATA) #endif error("Could not write %d bytes\n", c); } if (end >= 0 && start >= end) break; } free(buffer); if (bgzf_close(fp) < 0) error("Close failed: Error %d\n",fp->errcode); if (!pstdout) unlink(argv[optind]); return 0; } }
void bcf_hdr_write(BGZF *fp, const bcf_hdr_t *h) { bgzf_write(fp, "BCF\2\1", 5); bgzf_write(fp, &h->l_text, 4); bgzf_write(fp, h->text, h->l_text); }
int BGZipFileWriter::writeLine(const char* s) { int ret = bgzf_write(this->fp, s, strlen(s)); ret += bgzf_write(this->fp, "\n", 1); return (ret); };
int BGZipFileWriter::write(const char* s) { return bgzf_write(this->fp, s, strlen(s)); };
int fst_index(int argc,char **argv){ if(argc<1){ fprintf(stderr,"Must supply afile.saf.idx [chrname, write more info]\n"); return 0; } args *arg = getArgs(argc,argv); if(!arg->fstout){ fprintf(stderr,"\t-> Must supply -fstout for doing fstindex\n"); return 0; } std::vector<persaf *> &saf =arg->saf; //assert(saf.size()==2); size_t nSites = arg->nSites; if(nSites == 0){//if no -nSites is specified nSites = 100000;//<- set default to 100k sites, no need to load everything... // nSites=nsites(saf,arg); } fprintf(stderr,"\t-> nSites: %lu\n",nSites); std::vector<Matrix<float> *> gls; for(int i=0;i<saf.size();i++) gls.push_back(alloc<float>(nSites,saf[i]->nChr+1)); // int ndim= parspace(saf); if(arg->sfsfname.size()!=choose(saf.size(),2)){ fprintf(stderr,"\t-> You have supplied: %lu populations, that is %d pairs\n",saf.size(),choose(saf.size(),2)); fprintf(stderr,"\t-> You therefore need to supply %d 2dsfs priors instead of:%lu\n",choose(saf.size(),2),arg->sfsfname.size()); exit(0); } std::vector<double *> sfs; int inc =0; for(int i=0;i<saf.size();i++) for(int j=i+1;j<saf.size();j++){ size_t pairdim = (saf[i]->nChr+1)*(saf[j]->nChr+1); double *ddd=new double[pairdim]; readSFS(arg->sfsfname[inc],pairdim,ddd); normalize(ddd,pairdim); sfs.push_back(ddd); inc++; } double **a1,**b1; a1=new double*[choose(saf.size(),2)]; b1=new double*[choose(saf.size(),2)]; inc=0; for(int i=0;i<saf.size();i++) for(int j=i+1;j<saf.size();j++){ calcCoef((int)saf[i]->nChr,(int)saf[j]->nChr,&a1[inc],&b1[inc]); // fprintf(stderr,"a1[%d]:%p b1[%d]:%p\n",inc,&a1[inc][0],inc,&b1[inc][0]); inc++; } BGZF *fstbg = openFileBG(arg->fstout,".fst.gz"); FILE *fstfp = openFile(arg->fstout,".fst.idx"); char buf[8]="fstv1"; bgzf_write(fstbg,buf,8); fwrite(buf,1,8,fstfp); #if 0 for(int i=0;i<ndim;i++) fprintf(stdout,"%f %f\n",a1[i],b1[i]); exit(0); #endif #if 1 size_t nsafs=saf.size(); fwrite(&nsafs,sizeof(size_t),1,fstfp); for(int i=0;i<nsafs;i++){ size_t clen= strlen(saf[i]->fname); fwrite(&clen,sizeof(size_t),1,fstfp); fwrite(saf[i]->fname,1,clen,fstfp); } #endif int asdf = choose(saf.size(),2); std::vector<double> *ares = new std::vector<double> [choose(saf.size(),2)]; std::vector<double> *bres = new std::vector<double> [choose(saf.size(),2)]; // for(int i=0;i<3;i++) // fprintf(stderr,"ares.size():%lu bres.size():%lu sfs:%p\n",ares[i].size(),bres[i].size(),&sfs[i][0]); std::vector<int> posi; setGloc(saf,nSites); int *posiToPrint = new int[nSites]; for(myMap::iterator it = saf[0]->mm.begin();it!=saf[0]->mm.end();++it) { // fprintf(stderr,"doing chr:%s\n",it->first); if(arg->chooseChr!=NULL){ it = saf[0]->mm.find(arg->chooseChr); if(it==saf[0]->mm.end()){ fprintf(stderr,"Problem finding chr: %s\n",arg->chooseChr); break; } } for(int i=0;i<choose(saf.size(),2);i++){ ares[i].clear(); bres[i].clear(); } posi.clear(); while(1) { int ret=readdata(saf,gls,nSites,it->first,arg->start,arg->stop,posiToPrint,NULL);//read nsites from data // fprintf(stderr,"ret:%d glsx:%lu\n",ret,gls[0]->x); //if(gls[0]->x!=nSites&&arg->chooseChr==NULL&&ret!=-3){ //fprintf(stderr,"continue continue\n"); // continue; //} fprintf(stderr,"\t-> Will now do fst temp dump using a chunk of %lu\n",gls[0]->x); int inc=0; for(int i=0;i<saf.size();i++) for(int j=i+1;j<saf.size();j++){ // fprintf(stderr,"i:%d j:%d inc:%d gls[i]:%p gls[j]:%p sfs:%p a1:%p b1:%p\n",i,j,inc,gls[i],gls[j],sfs[i],&a1[inc][0],&a1[inc][0]); block_coef(gls[i],gls[j],sfs[inc],a1[inc],b1[inc],ares[inc],bres[inc]); inc++; } for(int i=0;i<gls[0]->x;i++) posi.push_back(posiToPrint[i]); for(int i=0;i<gls.size();i++) gls[i]->x =0; if(ret==-2)//no more data in files or in chr, eith way we break; break; } size_t clen = strlen(it->first); fwrite(&clen,sizeof(size_t),1,fstfp); fwrite(it->first,1,clen,fstfp); size_t nit=posi.size(); assert(1==fwrite(&nit,sizeof(size_t),1,fstfp)); int64_t tell = bgzf_tell(fstbg); fwrite(&tell,sizeof(int64_t),1,fstfp); bgzf_write(fstbg,&posi[0],posi.size()*sizeof(int)); int inc =0; for(int i=0;i<saf.size();i++) for(int j=i+1;j<saf.size();j++){ bgzf_write(fstbg,&(ares[inc][0]),ares[inc].size()*sizeof(double)); bgzf_write(fstbg,&(bres[inc][0]),bres[inc].size()*sizeof(double)); inc++; } if(arg->chooseChr!=NULL) break; } delGloc(saf,nSites); destroy(gls,nSites); destroy_args(arg); for(int i=0;i<sfs.size();i++) delete [] sfs[i]; #if 0 fprintf(stderr,"\n\t-> NB NB output is no longer log probs of the frequency spectrum!\n"); fprintf(stderr,"\t-> Output is now simply the expected values! \n"); fprintf(stderr,"\t-> You can convert to the old format simply with log(norm(x))\n"); #endif bgzf_close(fstbg); fclose(fstfp); fprintf(stderr,"\t-> fst index finished with no errors!\n"); return 0; }
int bam_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outbam) { BGZF *fp; uint8_t *buf; uint8_t ebuf[BGZF_EMPTY_BLOCK_SIZE]; const int es=BGZF_EMPTY_BLOCK_SIZE; int i; fp = strcmp(outbam, "-")? bgzf_open(outbam, "w") : bgzf_fdopen(fileno(stdout), "w"); if (fp == 0) { fprintf(stderr, "[%s] ERROR: fail to open output file '%s'.\n", __func__, outbam); return 1; } if (h) bam_hdr_write(fp, h); buf = (uint8_t*) malloc(BUF_SIZE); for(i = 0; i < nfn; ++i){ BGZF *in; bam_hdr_t *old; int len,j; in = strcmp(fn[i], "-")? bgzf_open(fn[i], "r") : bgzf_fdopen(fileno(stdin), "r"); if (in == 0) { fprintf(stderr, "[%s] ERROR: fail to open file '%s'.\n", __func__, fn[i]); return -1; } if (in->is_write) return -1; old = bam_hdr_read(in); if (old == NULL) { fprintf(stderr, "[%s] ERROR: couldn't read header for '%s'.\n", __func__, fn[i]); bgzf_close(in); return -1; } if (h == 0 && i == 0) bam_hdr_write(fp, old); if (in->block_offset < in->block_length) { bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset); bgzf_flush(fp); } j=0; while ((len = bgzf_raw_read(in, buf, BUF_SIZE)) > 0) { if(len<es){ int diff=es-len; if(j==0) { fprintf(stderr, "[%s] ERROR: truncated file?: '%s'.\n", __func__, fn[i]); return -1; } bgzf_raw_write(fp, ebuf, len); memcpy(ebuf,ebuf+len,diff); memcpy(ebuf+diff,buf,len); } else { if(j!=0) bgzf_raw_write(fp, ebuf, es); len-= es; memcpy(ebuf,buf+len,es); bgzf_raw_write(fp, buf, len); } j=1; } /* check final gzip block */ { const uint8_t gzip1=ebuf[0]; const uint8_t gzip2=ebuf[1]; const uint32_t isize=*((uint32_t*)(ebuf+es-4)); if(((gzip1!=GZIPID1) || (gzip2!=GZIPID2)) || (isize!=0)) { fprintf(stderr, "[%s] WARNING: Unexpected block structure in file '%s'.", __func__, fn[i]); fprintf(stderr, " Possible output corruption.\n"); bgzf_raw_write(fp, ebuf, es); } } bam_hdr_destroy(old); bgzf_close(in); } free(buf); bgzf_close(fp); return 0; }
void vcf_file::print_bcf(const parameters ¶ms) { LOG.printLOG("Outputting BCF file...\n"); BGZF * out; if(!params.stream_out) { string output_file = params.output_prefix + ".recode.bcf"; out = bgzf_open(output_file.c_str(), "w"); } else out = bgzf_dopen(1, "w"); string header_str; uint32_t len_text = 0; vector<char> header; char magic[5] = {'B','C','F','\2', '\1'}; bgzf_write(out, magic, 5); if (meta_data.has_idx) { LOG.warning("VCF file contains IDX values in header. These are being removed for conversion to BCF."); meta_data.reprint(); meta_data.reparse(); } for (unsigned int ui=0; ui<meta_data.lines.size(); ui++) { for (unsigned int uj=0; uj<meta_data.lines[ui].length(); uj++) header.push_back( meta_data.lines[ui][uj] ); header.push_back('\n'); } if (meta_data.has_contigs == false) { vector<string> contig_vector; get_contigs(params.contigs_file, contig_vector); for(unsigned int ui=0; ui<contig_vector.size(); ui++) { meta_data.add_CONTIG_descriptor(contig_vector[ui].substr(10, contig_vector[ui].size()-8),int(ui)); for(unsigned int uj=0; uj<contig_vector[ui].size(); uj++) header.push_back(contig_vector[ui][uj]); header.push_back('\n'); } } header_str = "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"; if (meta_data.N_indv > 0) header_str += "\tFORMAT"; for (unsigned int ui=0; ui<meta_data.N_indv; ui++) if (include_indv[ui]) { header_str += "\t"; header_str += meta_data.indv[ui]; } header_str += "\n"; for (unsigned int ui=0; ui<header_str.length(); ui++) header.push_back( header_str[ui] ); header.push_back( '\0' ); len_text = header.size(); bgzf_write(out, (char *)&len_text, sizeof(len_text) ); bgzf_write(out, (char *)&header[0], len_text ); vector<char> variant_line; entry * e = new vcf_entry(meta_data, include_indv); while(!eof()) { get_entry(variant_line); e->reset(variant_line); N_entries += e->apply_filters(params); if(!e->passed_filters) continue; N_kept_entries++; e->parse_basic_entry(true, true, true); e->parse_full_entry(true); e->parse_genotype_entries(true,true,true,true); e->print_bcf(out, params.recode_INFO_to_keep, params.recode_all_INFO); } delete e; bgzf_close(out); }
int main(int argc, char **argv) { int c, compress, pstdout, is_forced, index = 0, reindex = 0; BGZF *fp; void *buffer; long start, end, size; char *index_fname = NULL; static struct option loptions[] = { {"help",0,0,'h'}, {"offset",1,0,'b'}, {"stdout",0,0,'c'}, {"decompress",0,0,'d'}, {"force",0,0,'f'}, {"index",0,0,'i'}, {"index-name",1,0,'I'}, {"reindex",0,0,'r'}, {"size",1,0,'s'}, {0,0,0,0} }; compress = 1; pstdout = 0; start = 0; size = -1; end = -1; is_forced = 0; while((c = getopt_long(argc, argv, "cdh?fb:s:iI:r",loptions,NULL)) >= 0){ switch(c){ case 'd': compress = 0; break; case 'c': pstdout = 1; break; case 'b': start = atol(optarg); compress = 0; pstdout = 1; break; case 's': size = atol(optarg); pstdout = 1; break; case 'f': is_forced = 1; break; case 'i': index = 1; break; case 'I': index_fname = optarg; break; case 'r': reindex = 1; compress = 0; break; case 'h': case '?': return bgzip_main_usage(); } } if (size >= 0) end = start + size; if (end >= 0 && end < start) { fprintf(stderr, "[bgzip] Illegal region: [%ld, %ld]\n", start, end); return 1; } if (compress == 1) { struct stat sbuf; int f_src = fileno(stdin); int f_dst = fileno(stdout); if ( argc>optind ) { if ( stat(argv[optind],&sbuf)<0 ) { fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]); return 1; } if ((f_src = open(argv[optind], O_RDONLY)) < 0) { fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]); return 1; } if (pstdout) f_dst = fileno(stdout); else { char *name = malloc(strlen(argv[optind]) + 5); strcpy(name, argv[optind]); strcat(name, ".gz"); f_dst = write_open(name, is_forced); if (f_dst < 0) return 1; free(name); } } else if (!pstdout && isatty(fileno((FILE *)stdout)) ) return bgzip_main_usage(); else if ( index && !index_fname ) { fprintf(stderr, "[bgzip] Index file name expected when writing to stdout\n"); return 1; } fp = bgzf_fdopen(f_dst, "w"); if ( index ) bgzf_index_build_init(fp); buffer = malloc(WINDOW_SIZE); while ((c = read(f_src, buffer, WINDOW_SIZE)) > 0) if (bgzf_write(fp, buffer, c) < 0) error("Could not write %d bytes: Error %d\n", c, fp->errcode); // f_dst will be closed here if ( index ) { if ( index_fname ) bgzf_index_dump(fp, index_fname, NULL); else bgzf_index_dump(fp, argv[optind], ".gz.gzi"); } if (bgzf_close(fp) < 0) error("Close failed: Error %d", fp->errcode); if (argc > optind && !pstdout) unlink(argv[optind]); free(buffer); close(f_src); return 0; } else if ( reindex ) { if ( argc>optind ) { fp = bgzf_open(argv[optind], "r"); if ( !fp ) error("[bgzip] Could not open file: %s\n", argv[optind]); } else { if ( !index_fname ) error("[bgzip] Index file name expected when reading from stdin\n"); fp = bgzf_fdopen(fileno(stdin), "r"); if ( !fp ) error("[bgzip] Could not read from stdin: %s\n", strerror(errno)); } buffer = malloc(BGZF_BLOCK_SIZE); bgzf_index_build_init(fp); int ret; while ( (ret=bgzf_read(fp, buffer, BGZF_BLOCK_SIZE))>0 ) ; free(buffer); if ( ret<0 ) error("Is the file gzipped or bgzipped? The latter is required for indexing.\n"); if ( index_fname ) bgzf_index_dump(fp, index_fname, NULL); else bgzf_index_dump(fp, argv[optind], ".gzi"); if ( bgzf_close(fp)<0 ) error("Close failed: Error %d\n",fp->errcode); return 0; } else { struct stat sbuf; int f_dst; if ( argc>optind ) { if ( stat(argv[optind],&sbuf)<0 ) { fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]); return 1; } char *name; int len = strlen(argv[optind]); if ( strcmp(argv[optind]+len-3,".gz") ) { fprintf(stderr, "[bgzip] %s: unknown suffix -- ignored\n", argv[optind]); return 1; } fp = bgzf_open(argv[optind], "r"); if (fp == NULL) { fprintf(stderr, "[bgzip] Could not open file: %s\n", argv[optind]); return 1; } if (pstdout) { f_dst = fileno(stdout); } else { name = strdup(argv[optind]); name[strlen(name) - 3] = '\0'; f_dst = write_open(name, is_forced); free(name); } } else if (!pstdout && isatty(fileno((FILE *)stdin)) ) return bgzip_main_usage(); else { f_dst = fileno(stdout); fp = bgzf_fdopen(fileno(stdin), "r"); if (fp == NULL) { fprintf(stderr, "[bgzip] Could not read from stdin: %s\n", strerror(errno)); return 1; } } buffer = malloc(WINDOW_SIZE); if ( start>0 ) { if ( bgzf_index_load(fp, argv[optind], ".gzi") < 0 ) error("Could not load index: %s.gzi\n", argv[optind]); if ( bgzf_useek(fp, start, SEEK_SET) < 0 ) error("Could not seek to %d-th (uncompressd) byte\n", start); } while (1) { if (end < 0) c = bgzf_read(fp, buffer, WINDOW_SIZE); else c = bgzf_read(fp, buffer, (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start)); if (c == 0) break; if (c < 0) error("Could not read %d bytes: Error %d\n", (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start), fp->errcode); start += c; if ( write(f_dst, buffer, c) != c ) error("Could not write %d bytes\n", c); if (end >= 0 && start >= end) break; } free(buffer); if (bgzf_close(fp) < 0) error("Close failed: Error %d\n",fp->errcode); if (!pstdout) unlink(argv[optind]); return 0; } return 0; }
int reheader_file(const char *header, const char *file, int meta) { BGZF *fp = bgzf_open(file,"r"); if (bgzf_read_block(fp) != 0 || !fp->block_length) return -1; char *buffer = fp->uncompressed_block; int skip_until = 0; if ( buffer[0]==meta ) { skip_until = 1; // Skip the header while (1) { if ( buffer[skip_until]=='\n' ) { skip_until++; if ( skip_until>=fp->block_length ) { if (bgzf_read_block(fp) != 0 || !fp->block_length) error("no body?\n"); skip_until = 0; } // The header has finished if ( buffer[skip_until]!=meta ) break; } skip_until++; if ( skip_until>=fp->block_length ) { if (bgzf_read_block(fp) != 0 || !fp->block_length) error("no body?\n"); skip_until = 0; } } } FILE *fh = fopen(header,"r"); if ( !fh ) error("%s: %s", header,strerror(errno)); int page_size = getpagesize(); char *buf = valloc(page_size); BGZF *bgzf_out = bgzf_fdopen(fileno(stdout), "w"); ssize_t nread; while ( (nread=fread(buf,1,page_size-1,fh))>0 ) { if ( nread<page_size-1 && buf[nread-1]!='\n' ) buf[nread++] = '\n'; if (bgzf_write(bgzf_out, buf, nread) < 0) error("Error: %s\n",bgzf_out->error); } fclose(fh); if ( fp->block_length - skip_until > 0 ) { if (bgzf_write(bgzf_out, buffer+skip_until, fp->block_length-skip_until) < 0) error("Error: %s\n",fp->error); } if (bgzf_flush(bgzf_out) < 0) error("Error: %s\n",bgzf_out->error); while (1) { #ifdef _USE_KNETFILE nread = knet_read(fp->x.fpr, buf, page_size); #else nread = fread(buf, 1, page_size, fp->file); #endif if ( nread<=0 ) break; #ifdef _USE_KNETFILE int count = fwrite(buf, 1, nread, bgzf_out->x.fpw); #else int count = fwrite(buf, 1, nread, bgzf_out->file); #endif if (count != nread) error("Write failed, wrote %d instead of %d bytes.\n", count,(int)nread); } if (bgzf_close(bgzf_out) < 0) error("Error: %s\n",bgzf_out->error); return 0; }
void BAMoutput::unsortedFlush () {//flush all alignments if (g_threadChunks.threadBool) pthread_mutex_lock(&g_threadChunks.mutexOutSAM); bgzf_write(bgzfBAM,bamArray,binBytes1); if (g_threadChunks.threadBool) pthread_mutex_unlock(&g_threadChunks.mutexOutSAM); binBytes1=0;//rewind the buffer };
int main_bam2fq(int argc, char *argv[]) { BGZF *fp, *fpse = 0; bam1_t *b; uint8_t *buf; int max_buf, c, has12 = 0; kstring_t str; int64_t n_singletons = 0, n_reads = 0; char last[512], *fnse = 0; while ((c = getopt(argc, argv, "as:")) > 0) if (c == 'a') has12 = 1; else if (c == 's') fnse = optarg; if (argc == optind) { fprintf(stderr, "\nUsage: bam2fq [-a] [-s outSE] <in.bam>\n\n"); fprintf(stderr, "Options: -a append /1 and /2 to the read name\n"); fprintf(stderr, " -s FILE write singleton reads to FILE [assume single-end]\n"); fprintf(stderr, "\n"); return 1; } fp = strcmp(argv[optind], "-")? bgzf_open(argv[optind], "r") : bgzf_dopen(fileno(stdin), "r"); assert(fp); bam_hdr_destroy(bam_hdr_read(fp)); buf = 0; max_buf = 0; str.l = str.m = 0; str.s = 0; last[0] = 0; if (fnse) fpse = bgzf_open(fnse, "w1"); b = bam_init1(); while (bam_read1(fp, b) >= 0) { int i, qlen = b->core.l_qseq, is_print = 0; uint8_t *qual, *seq; if (b->flag&BAM_FSECONDARY) continue; // skip secondary alignments ++n_reads; if (fpse) { if (str.l && strcmp(last, bam_get_qname(b))) { bgzf_write(fpse, str.s, str.l); str.l = 0; ++n_singletons; } if (str.l) is_print = 1; strcpy(last, bam_get_qname(b)); } else is_print = 1; qual = bam_get_qual(b); kputc(qual[0] == 0xff? '>' : '@', &str); kputsn(bam_get_qname(b), b->core.l_qname - 1, &str); if (has12) { kputc('/', &str); kputw(b->core.flag>>6&3, &str); } kputc('\n', &str); if (max_buf < qlen + 1) { max_buf = qlen + 1; kroundup32(max_buf); buf = (uint8_t*)realloc(buf, max_buf); } buf[qlen] = 0; seq = bam_get_seq(b); for (i = 0; i < qlen; ++i) buf[i] = bam_seqi(seq, i); // copy the sequence if (bam_is_rev(b)) { // reverse complement for (i = 0; i < qlen>>1; ++i) { int8_t t = seq_comp_table[buf[qlen - 1 - i]]; buf[qlen - 1 - i] = seq_comp_table[buf[i]]; buf[i] = t; } if (qlen&1) buf[i] = seq_comp_table[buf[i]]; } for (i = 0; i < qlen; ++i) buf[i] = seq_nt16_str[buf[i]]; kputsn((char*)buf, qlen, &str); kputc('\n', &str); if (qual[0] != 0xff) { kputsn("+\n", 2, &str); for (i = 0; i < qlen; ++i) buf[i] = 33 + qual[i]; if (bam_is_rev(b)) { // reverse for (i = 0; i < qlen>>1; ++i) { uint8_t t = buf[qlen - 1 - i]; buf[qlen - 1 - i] = buf[i]; buf[i] = t; } } } kputsn((char*)buf, qlen, &str); kputc('\n', &str); if (is_print) { fwrite(str.s, 1, str.l, stdout); str.l = 0; } } if (fpse) { if (str.l) { bgzf_write(fpse, str.s, str.l); ++n_singletons; } fprintf(stderr, "[M::%s] discarded %lld singletons\n", __func__, (long long)n_singletons); bgzf_close(fpse); } fprintf(stderr, "[M::%s] processed %lld reads\n", __func__, (long long)n_reads); free(buf); free(str.s); bam_destroy1(b); bgzf_close(fp); return 0; }
int main(int argc, char **argv) { int c, compress, pstdout, is_forced; BGZF *fp; void *buffer; long start, end, size; compress = 1; pstdout = 0; start = 0; size = -1; end = -1; is_forced = 0; while((c = getopt(argc, argv, "cdhfb:s:")) >= 0){ switch(c){ case 'h': return bgzip_main_usage(); case 'd': compress = 0; break; case 'c': pstdout = 1; break; case 'b': start = atol(optarg); break; case 's': size = atol(optarg); break; case 'f': is_forced = 1; break; } } if (size >= 0) end = start + size; if (end >= 0 && end < start) { fprintf(stderr, "[bgzip] Illegal region: [%ld, %ld]\n", start, end); return 1; } if (compress == 1) { struct stat sbuf; int f_src = fileno(stdin); int f_dst = fileno(stdout); if ( argc>optind ) { if ( stat(argv[optind],&sbuf)<0 ) { fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]); return 1; } if ((f_src = open(argv[optind], O_RDONLY)) < 0) { fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]); return 1; } if (pstdout) f_dst = fileno(stdout); else { char *name = malloc(strlen(argv[optind]) + 5); strcpy(name, argv[optind]); strcat(name, ".gz"); f_dst = write_open(name, is_forced); if (f_dst < 0) return 1; free(name); } } else if (!pstdout && isatty(fileno((FILE *)stdout)) ) return bgzip_main_usage(); fp = bgzf_fdopen(f_dst, "w"); buffer = malloc(WINDOW_SIZE); while ((c = read(f_src, buffer, WINDOW_SIZE)) > 0) if (bgzf_write(fp, buffer, c) < 0) fail(fp); // f_dst will be closed here if (bgzf_close(fp) < 0) fail(fp); if (argc > optind && !pstdout) unlink(argv[optind]); free(buffer); close(f_src); return 0; } else { struct stat sbuf; int f_dst; if ( argc>optind ) { if ( stat(argv[optind],&sbuf)<0 ) { fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]); return 1; } char *name; int len = strlen(argv[optind]); if ( strcmp(argv[optind]+len-3,".gz") ) { fprintf(stderr, "[bgzip] %s: unknown suffix -- ignored\n", argv[optind]); return 1; } fp = bgzf_open(argv[optind], "r"); if (fp == NULL) { fprintf(stderr, "[bgzip] Could not open file: %s\n", argv[optind]); return 1; } if (pstdout) { f_dst = fileno(stdout); } else { name = strdup(argv[optind]); name[strlen(name) - 3] = '\0'; f_dst = write_open(name, is_forced); free(name); } } else if (!pstdout && isatty(fileno((FILE *)stdin)) ) return bgzip_main_usage(); else { f_dst = fileno(stdout); fp = bgzf_fdopen(fileno(stdin), "r"); if (fp == NULL) { fprintf(stderr, "[bgzip] Could not read from stdin: %s\n", strerror(errno)); return 1; } } buffer = malloc(WINDOW_SIZE); if (bgzf_seek(fp, start, SEEK_SET) < 0) fail(fp); while (1) { if (end < 0) c = bgzf_read(fp, buffer, WINDOW_SIZE); else c = bgzf_read(fp, buffer, (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start)); if (c == 0) break; if (c < 0) fail(fp); start += c; write(f_dst, buffer, c); if (end >= 0 && start >= end) break; } free(buffer); if (bgzf_close(fp) < 0) fail(fp); if (!pstdout) unlink(argv[optind]); return 0; } }
int bam_cat(int nfn, char * const *fn, const bam_header_t *h, const char* outbam) { BGZF *fp; FILE* fp_file; uint8_t *buf; uint8_t ebuf[BGZF_EMPTY_BLOCK_SIZE]; const int es=BGZF_EMPTY_BLOCK_SIZE; int i; fp = strcmp(outbam, "-")? bgzf_open(outbam, "w") : bgzf_fdopen(_fileno(stdout), "w"); if (fp == 0) { fprintf(stderr, "[%s] ERROR: fail to open output file '%s'.\n", __FUNCTION__, outbam); return 1; } if (h) bam_header_write(fp, h); buf = (uint8_t*) malloc(BUF_SIZE); for(i = 0; i < nfn; ++i){ BGZF *in; bam_header_t *old; int len,j; in = strcmp(fn[i], "-")? bam_open(fn[i], "r") : bam_dopen(_fileno(stdin), "r"); if (in == 0) { fprintf(stderr, "[%s] ERROR: fail to open file '%s'.\n", __FUNCTION__, fn[i]); return -1; } if (in->open_mode != 'r') return -1; old = bam_header_read(in); if (h == 0 && i == 0) bam_header_write(fp, old); if (in->block_offset < in->block_length) { bgzf_write(fp, (uint8_t*)in->uncompressed_block + in->block_offset, in->block_length - in->block_offset); bgzf_flush(fp); } j=0; #ifdef _USE_KNETFILE fp_file=fp->x.fpw; while ((len = knet_read(in->x.fpr, buf, BUF_SIZE)) > 0) { #else fp_file=fp->file; while (!feof(in->file) && (len = fread(buf, 1, BUF_SIZE, in->file)) > 0) { #endif if(len<es){ int diff=es-len; if(j==0) { fprintf(stderr, "[%s] ERROR: truncated file?: '%s'.\n", __FUNCTION__, fn[i]); return -1; } fwrite(ebuf, 1, len, fp_file); memcpy(ebuf,ebuf+len,diff); memcpy(ebuf+diff,buf,len); } else { if(j!=0) fwrite(ebuf, 1, es, fp_file); len-= es; memcpy(ebuf,buf+len,es); fwrite(buf, 1, len, fp_file); } j=1; } /* check final gzip block */ { const uint8_t gzip1=ebuf[0]; const uint8_t gzip2=ebuf[1]; const uint32_t isize=*((uint32_t*)(ebuf+es-4)); if(((gzip1!=GZIPID1) || (gzip2!=GZIPID2)) || (isize!=0)) { fprintf(stderr, "[%s] WARNING: Unexpected block structure in file '%s'.", __FUNCTION__, fn[i]); fprintf(stderr, " Possible output corruption.\n"); fwrite(ebuf, 1, es, fp_file); } } bam_header_destroy(old); bgzf_close(in); } free(buf); bgzf_close(fp); return 0; } int main_cat(int argc, char *argv[]) { bam_header_t *h = 0; char *outfn = 0; int c, ret; while ((c = getopt(argc, argv, "h:o:")) >= 0) { switch (c) { case 'h': { tamFile fph = sam_open(optarg); if (fph == 0) { fprintf(stderr, "[%s] ERROR: fail to read the header from '%s'.\n", __FUNCTION__, argv[1]); return 1; } h = sam_header_read(fph); sam_close(fph); break; } case 'o': outfn = strdup(optarg); break; } } if (argc - optind < 2) { fprintf(stderr, "Usage: samtools cat [-h header.sam] [-o out.bam] <in1.bam> <in2.bam> [...]\n"); return 1; } ret = bam_cat(argc - optind, argv + optind, h, outfn? outfn : "-"); free(outfn); return ret; }
void bcf_file::print_bcf(const parameters ¶ms) { LOG.printLOG("Outputting BCF file...\n"); BGZF * out; if(!params.stream_out) { string output_file = params.output_prefix + ".recode.bcf"; out = bgzf_open(output_file.c_str(), "w"); } else out = bgzf_dopen(1, "w"); string header_str; uint32_t len_text = 0; vector<char> header; char magic[5] = {'B','C','F','\2','\2'}; bgzf_write(out, magic, 5); for (unsigned int ui=0; ui<meta_data.lines.size(); ui++) { for (unsigned int uj=0; uj<meta_data.lines[ui].length(); uj++) header.push_back( meta_data.lines[ui][uj] ); header.push_back('\n'); } header_str = "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"; if (meta_data.N_indv > 0) header_str += "\tFORMAT"; for (unsigned int ui=0; ui<meta_data.N_indv; ui++) if (include_indv[ui]) { header_str += "\t"; header_str += meta_data.indv[ui]; } header_str += "\n"; for (unsigned int ui=0; ui<header_str.length(); ui++) header.push_back( header_str[ui] ); header.push_back( '\0' ); len_text = header.size(); bgzf_write(out, (char *)&len_text, sizeof(len_text) ); bgzf_write(out, (char *)&header[0], len_text ); vector<char> variant_line; entry * e = new bcf_entry(meta_data, include_indv); while(!eof()) { get_entry(variant_line); e->reset(variant_line); N_entries += e->apply_filters(params); if(!e->passed_filters) continue; N_kept_entries++; e->parse_basic_entry(true, true, true); e->parse_full_entry(true); e->parse_genotype_entries(true); e->print_bcf(out, params.recode_INFO_to_keep, params.recode_all_INFO); } delete e; bgzf_close(out); }
void ReadDB::import_reads(const std::string& input_filename, const std::string& out_fasta_filename) { // Open readers FILE* read_fp = fopen(input_filename.c_str(), "r"); if(read_fp == NULL) { fprintf(stderr, "error: could not open %s for read\n", input_filename.c_str()); exit(EXIT_FAILURE); } gzFile gz_read_fp = gzdopen(fileno(read_fp), "r"); if(gz_read_fp == NULL) { fprintf(stderr, "error: could not open %s using gzdopen\n", input_filename.c_str()); exit(EXIT_FAILURE); } // Open writers FILE* write_fp = fopen(out_fasta_filename.c_str(), "w"); if(write_fp == NULL) { fprintf(stderr, "error: could not open %s for write\n", out_fasta_filename.c_str()); exit(EXIT_FAILURE); } BGZF* bgzf_write_fp = bgzf_dopen(fileno(write_fp), "w"); if(bgzf_write_fp == NULL) { fprintf(stderr, "error: could not open %s for bgzipped write\n", out_fasta_filename.c_str()); exit(EXIT_FAILURE); } // read input sequences, add to DB and convert to fasta int ret = 0; kseq_t* seq = kseq_init(gz_read_fp); while((ret = kseq_read(seq)) >= 0) { // Check for a path to the fast5 file in the comment of the read std::string path = ""; if(seq->comment.l > 0) { // This splitting code implicitly handles both the 2 and 3 field // fasta format that poretools will output. The FAST5 path // is always the last field. std::vector<std::string> fields = split(seq->comment.s, ' '); path = fields.back(); // as a sanity check we require the path name to end in ".fast5" if(path.length() < 6 || path.substr(path.length() - 6) != ".fast5") { path = ""; } } // sanity check that the read does not exist in the database // JTS 04/2019: changed error to warning to account for duplicate reads coming out of // some versions of guppy. auto iter = m_data.find(seq->name.s); if(iter != m_data.end()) { fprintf(stderr, "Warning: duplicate read name %s found in fasta file\n", seq->name.s); continue; } // add path add_signal_path(seq->name.s, path); // write sequence in gzipped fasta for fai indexing later std::string out_record; out_record += ">"; out_record += seq->name.s; out_record += "\n"; out_record += seq->seq.s; out_record += "\n"; size_t write_length = bgzf_write(bgzf_write_fp, out_record.c_str(), out_record.length()); if(write_length != out_record.length()) { fprintf(stderr, "error in bgzf_write, aborting\n"); exit(EXIT_FAILURE); } } // check for abnormal exit conditions if(ret <= -2) { fprintf(stderr, "kseq_read returned %d indicating an error with the input file %s\n", ret, input_filename.c_str()); exit(EXIT_FAILURE); } // cleanup kseq_destroy(seq); gzclose(gz_read_fp); fclose(read_fp); bgzf_close(bgzf_write_fp); fclose(write_fp); }
static void reheader_vcf_gz(args_t *args) { BGZF *fp = bgzf_open(args->fname,"r"); if ( !fp || bgzf_read_block(fp) != 0 || !fp->block_length ) error("Failed to read %s: %s\n", args->fname, strerror(errno)); kstring_t hdr = {0,0,0}; char *buffer = (char*) fp->uncompressed_block; // Read the header and find the position of the data block if ( buffer[0]!='#' ) error("Could not parse the header, expected '#', found '%c'\n", buffer[0]); int skip_until = 1; // end of the header in the current uncompressed block while (1) { if ( buffer[skip_until]=='\n' ) { skip_until++; if ( skip_until>=fp->block_length ) { kputsn(buffer,skip_until,&hdr); if ( bgzf_read_block(fp) != 0 || !fp->block_length ) error("FIXME: No body in the file: %s\n", args->fname); skip_until = 0; } // The header has finished if ( buffer[skip_until]!='#' ) { kputsn(buffer,skip_until,&hdr); break; } } skip_until++; if ( skip_until>=fp->block_length ) { kputsn(buffer,fp->block_length,&hdr); if (bgzf_read_block(fp) != 0 || !fp->block_length) error("FIXME: No body in the file: %s\n", args->fname); skip_until = 0; } } int nsamples = 0; char **samples = NULL; if ( args->samples_fname ) samples = hts_readlines(args->samples_fname, &nsamples); if ( args->header_fname ) { free(hdr.s); hdr.s = NULL; hdr.l = hdr.m = 0; read_header_file(args->header_fname, &hdr); } if ( samples ) { set_samples(samples, nsamples, &hdr); int i; for (i=0; i<nsamples; i++) free(samples[i]); free(samples); } // Output the modified header BGZF *bgzf_out = bgzf_dopen(fileno(stdout), "w"); bgzf_write(bgzf_out, hdr.s, hdr.l); free(hdr.s); // Output all remainig data read with the header block if ( fp->block_length - skip_until > 0 ) { if ( bgzf_write(bgzf_out, buffer+skip_until, fp->block_length-skip_until)<0 ) error("Error: %d\n",fp->errcode); } if ( bgzf_flush(bgzf_out)<0 ) error("Error: %d\n",bgzf_out->errcode); // Stream the rest of the file without as it is, without decompressing ssize_t nread; int page_size = getpagesize(); char *buf = (char*) valloc(page_size); while (1) { nread = bgzf_raw_read(fp, buf, page_size); if ( nread<=0 ) break; int count = bgzf_raw_write(bgzf_out, buf, nread); if (count != nread) error("Write failed, wrote %d instead of %d bytes.\n", count,(int)nread); } if (bgzf_close(bgzf_out) < 0) error("Error: %d\n",bgzf_out->errcode); if (bgzf_close(fp) < 0) error("Error: %d\n",fp->errcode); free(buf); }
static void naive_concat(args_t *args) { // only compressed BCF atm BGZF *bgzf_out = bgzf_open(args->output_fname,"w");; const size_t page_size = 32768; char *buf = (char*) malloc(page_size); kstring_t tmp = {0,0,0}; int i; for (i=0; i<args->nfnames; i++) { htsFile *hts_fp = hts_open(args->fnames[i],"r"); if ( !hts_fp ) error("Failed to open: %s\n", args->fnames[i]); htsFormat type = *hts_get_format(hts_fp); if ( type.format==vcf ) error("The --naive option currently works only for compressed BCFs, sorry :-/\n"); if ( type.compression!=bgzf ) error("The --naive option currently works only for compressed BCFs, sorry :-/\n"); BGZF *fp = hts_get_bgzfp(hts_fp); if ( !fp || bgzf_read_block(fp) != 0 || !fp->block_length ) error("Failed to read %s: %s\n", args->fnames[i], strerror(errno)); uint8_t magic[5]; if ( bgzf_read(fp, magic, 5) != 5 ) error("Failed to read the BCF header in %s\n", args->fnames[i]); if (strncmp((char*)magic, "BCF\2\2", 5) != 0) error("Invalid BCF magic string in %s\n", args->fnames[i]); if ( bgzf_read(fp, &tmp.l, 4) != 4 ) error("Failed to read the BCF header in %s\n", args->fnames[i]); hts_expand(char,tmp.l,tmp.m,tmp.s); if ( bgzf_read(fp, tmp.s, tmp.l) != tmp.l ) error("Failed to read the BCF header in %s\n", args->fnames[i]); // write only the first header if ( i==0 ) { if ( bgzf_write(bgzf_out, "BCF\2\2", 5) !=5 ) error("Failed to write %d bytes to %s\n", 5,args->output_fname); if ( bgzf_write(bgzf_out, &tmp.l, 4) !=4 ) error("Failed to write %d bytes to %s\n", 4,args->output_fname); if ( bgzf_write(bgzf_out, tmp.s, tmp.l) != tmp.l) error("Failed to write %d bytes to %s\n", tmp.l,args->output_fname); } // Output all non-header data that were read together with the header block int nskip = fp->block_offset; if ( fp->block_length - nskip > 0 ) { if ( bgzf_write(bgzf_out, fp->uncompressed_block+nskip, fp->block_length-nskip)<0 ) error("Error: %d\n",fp->errcode); } if ( bgzf_flush(bgzf_out)<0 ) error("Error: %d\n",bgzf_out->errcode); // Stream the rest of the file as it is, without recompressing, but remove BGZF EOF blocks ssize_t nread, ncached = 0, nwr; const int neof = 28; char cached[neof]; while (1) { nread = bgzf_raw_read(fp, buf, page_size); // page_size boundary may occur in the middle of the EOF block, so we need to cache the blocks' ends if ( nread<=0 ) break; if ( nread<=neof ) // last block { if ( ncached ) { // flush the part of the cache that won't be needed nwr = bgzf_raw_write(bgzf_out, cached, nread); if (nwr != nread) error("Write failed, wrote %d instead of %d bytes.\n", nwr,(int)nread); // make space in the cache so that we can append to the end if ( nread!=neof ) memmove(cached,cached+nread,neof-nread); } // fill the cache and check for eof outside this loop memcpy(cached+neof-nread,buf,nread); break; } // not the last block, flush the cache if full if ( ncached ) { nwr = bgzf_raw_write(bgzf_out, cached, ncached); if (nwr != ncached) error("Write failed, wrote %d instead of %d bytes.\n", nwr,(int)ncached); ncached = 0; } // fill the cache nread -= neof; memcpy(cached,buf+nread,neof); ncached = neof; nwr = bgzf_raw_write(bgzf_out, buf, nread); if (nwr != nread) error("Write failed, wrote %d instead of %d bytes.\n", nwr,(int)nread); } if ( ncached && memcmp(cached,"\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\033\0\3\0\0\0\0\0\0\0\0\0",neof) ) { nwr = bgzf_raw_write(bgzf_out, cached, neof); if (nwr != neof) error("Write failed, wrote %d instead of %d bytes.\n", nwr,(int)neof); } if (hts_close(hts_fp)) error("Close failed: %s\n",args->fnames[i]); } free(buf); free(tmp.s); if (bgzf_close(bgzf_out) < 0) error("Error: %d\n",bgzf_out->errcode); }
int reheader_file(const char *fname, const char *header, int ftype, tbx_conf_t *conf) { if ( ftype & IS_TXT || !ftype ) { BGZF *fp = bgzf_open(fname,"r"); if ( !fp || bgzf_read_block(fp) != 0 || !fp->block_length ) return -1; char *buffer = fp->uncompressed_block; int skip_until = 0; // Skip the header: find out the position of the data block if ( buffer[0]==conf->meta_char ) { skip_until = 1; while (1) { if ( buffer[skip_until]=='\n' ) { skip_until++; if ( skip_until>=fp->block_length ) { if ( bgzf_read_block(fp) != 0 || !fp->block_length ) error("FIXME: No body in the file: %s\n", fname); skip_until = 0; } // The header has finished if ( buffer[skip_until]!=conf->meta_char ) break; } skip_until++; if ( skip_until>=fp->block_length ) { if (bgzf_read_block(fp) != 0 || !fp->block_length) error("FIXME: No body in the file: %s\n", fname); skip_until = 0; } } } // Output the new header FILE *hdr = fopen(header,"r"); if ( !hdr ) error("%s: %s", header,strerror(errno)); const size_t page_size = 32768; char *buf = malloc(page_size); BGZF *bgzf_out = bgzf_open("-", "w"); ssize_t nread; while ( (nread=fread(buf,1,page_size-1,hdr))>0 ) { if ( nread<page_size-1 && buf[nread-1]!='\n' ) buf[nread++] = '\n'; if (bgzf_write(bgzf_out, buf, nread) < 0) error("Error: %d\n",bgzf_out->errcode); } if ( fclose(hdr) ) error("close failed: %s\n", header); // Output all remainig data read with the header block if ( fp->block_length - skip_until > 0 ) { if (bgzf_write(bgzf_out, buffer+skip_until, fp->block_length-skip_until) < 0) error("Error: %d\n",fp->errcode); } if (bgzf_flush(bgzf_out) < 0) error("Error: %d\n",bgzf_out->errcode); while (1) { nread = bgzf_raw_read(fp, buf, page_size); if ( nread<=0 ) break; int count = bgzf_raw_write(bgzf_out, buf, nread); if (count != nread) error("Write failed, wrote %d instead of %d bytes.\n", count,(int)nread); } if (bgzf_close(bgzf_out) < 0) error("Error: %d\n",bgzf_out->errcode); if (bgzf_close(fp) < 0) error("Error: %d\n",fp->errcode); free(buf); } else error("todo: reheader BCF, BAM\n"); // BCF is difficult, records contain pointers to the header. return 0; }