Exemple #1
0
abcGL::~abcGL(){

  free(angsd_tmpdir);
  
  if(GL==0&&doGlf==0)
    return;
  else if(GL==1)
    bam_likes_destroy();
  else if(GL==2)
    gatk_destroy();
  else if(GL==4)
    abcError::killGlobalErrorProbs(errorProbs);
  else if(GL==5)
    phys_destroy();
  if(doGlf)    bgzf_close(gzoutfile);
    
  if(gzoutfile!=NULL)
    bgzf_close(gzoutfile2);

  if(bufstr.s!=NULL)
    free(bufstr.s);

  if(errors){
    for(int i=0;i<4;i++)
      delete [] errors[i];
    delete [] errors;
  }
  delete [] logfactorial;
}
Exemple #2
0
abcCounts::~abcCounts(){
  if(oFileCountsBin!=NULL)
    bgzf_close(oFileCountsBin);
  if(oFileCountsPos!=NULL)
    bgzf_close(oFileCountsPos);
  if(doQsDist){
    FILE *oFileQs = NULL;
    oFileQs = aio::openFile(oFiles,postfix3);
    fprintf(oFileQs,"qscore\tcounts\n");
    printQs(oFileQs,qsDist);
    if(oFileQs) fclose(oFileQs);
    delete[] qsDist;

  }

  if(doDepth){
    FILE *oFileSamplDepth = aio::openFile(oFiles,postfix4);
    FILE *oFileGlobDepth = aio::openFile(oFiles,postfix5);
    for(int i=0;i<nInd;i++){
      for(int j=0;j<maxDepth+1;j++){
	fprintf(oFileSamplDepth,"%lu\t",depthCount[i][j]);
      }
      fprintf(oFileSamplDepth,"\n");
    }
    //thorfinn
    for(int j=0;j<maxDepth+1;j++)
      fprintf(oFileGlobDepth,"%lu\t",globCount[j]);
    fprintf(oFileGlobDepth,"\n");
  

    //clean depthCount
    for(int i=0;i<nInd;i++)
      delete[]  depthCount[i];
    delete[] depthCount; 
    
    if(oFileSamplDepth) fclose(oFileSamplDepth);
    if(oFileSamplDepth) fclose(oFileGlobDepth);
  }
  
  if(minQfile!=NULL){
    //  angsd::printMatrix(minQmat,stderr);
    angsd::deleteMatrix(minQmat);
  }
  if(oFileIcounts!=NULL)
    bgzf_close(oFileIcounts);
  
  free(oFiles);
  free(bpos.s);
  free(bbin.s);
  free(bufstr.s);
  if(globCount)
    delete [] globCount;
}
Exemple #3
0
void perpsmc_destroy(perpsmc *pp){
  bgzf_close(pp->bgzf_gls);
  bgzf_close(pp->bgzf_pos);
  destroy(pp->mm);
  
  if(pp->pos)
    delete [] pp->pos;
  if(pp->gls)
    delete [] pp->gls;

  free(pp->fname);
  delete pp;
}
Exemple #4
0
abcHaploCall::~abcHaploCall(){

  if(doHaploCall==0)
    return; 
   
  if(outfileZ!=NULL) bgzf_close(outfileZ);
}
int bam_reheader(BGZF *in, const bam_header_t *h, int fd)
{
	BGZF *fp;
	bam_header_t *old;
	int len;
	uint8_t *buf;
	if (in->open_mode != 'r') return -1;
	buf = malloc(BUF_SIZE);
	old = bam_header_read(in);
	fp = bgzf_dopen(fd, "w");
	bam_header_write(fp, h);
	if (in->block_offset < in->block_length) {
		bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset);
		bgzf_flush(fp);
	}
#ifdef _USE_KNETFILE
	while ((len = knet_read(in->fp, buf, BUF_SIZE)) > 0)
		fwrite(buf, 1, len, fp->fp);
#else
	while (!feof(in->fp) && (len = fread(buf, 1, BUF_SIZE, in->fp)) > 0)
		fwrite(buf, 1, len, fp->fp);
#endif
	free(buf);
	fp->block_offset = in->block_offset = 0;
	bgzf_close(fp);
	return 0;
}
Exemple #6
0
void perfst_destroy(perfst *pp){
  bgzf_close(pp->fp);
  destroy(pp->mm);
  for(int i=0;i<pp->names.size();i++)
    free(pp->names[i]);
  delete pp;
}
Exemple #7
0
int fai_build(const char *fn)
{
	char *str;
	BGZF *bgzf;
	FILE *fp;
	faidx_t *fai;
	str = (char*)calloc(strlen(fn) + 5, 1);
	sprintf(str, "%s.fai", fn);
	bgzf = bgzf_open(fn, "r");
	if ( !bgzf ) {
		fprintf(stderr, "[fai_build] fail to open the FASTA file %s\n",fn);
		free(str);
		return -1;
	}
    if ( bgzf->is_compressed ) bgzf_index_build_init(bgzf);
	fai = fai_build_core(bgzf);
    if ( bgzf->is_compressed ) bgzf_index_dump(bgzf, fn, ".gzi");
	bgzf_close(bgzf);
	fp = fopen(str, "wb");
	if ( !fp ) {
		fprintf(stderr, "[fai_build] fail to write FASTA index %s\n",str);
		fai_destroy(fai); free(str);
		return -1;
	}
	fai_save(fai, fp);
	fclose(fp);
	free(str);
	fai_destroy(fai);
	return 0;
}
void SingleChromosomeBCFIndex::close() {
  if (fBcfFile_) {
    bgzf_close(fBcfFile_);
    fBcfFile_ = NULL;
  }
  closeIndex();
}
int main_reheader(int argc, char *argv[])
{
	bam_header_t *h;
	BGZF *in;
	if (argc != 3) {
		fprintf(stderr, "Usage: samtools reheader <in.header.sam> <in.bam>\n");
		return 1;
	}
	{ // read the header
		tamFile fph = sam_open(argv[1]);
		if (fph == 0) {
			fprintf(stderr, "[%s] fail to read the header from %s.\n", __func__, argv[1]);
			return 1;
		}
		h = sam_header_read(fph);
		sam_close(fph);
	}
	in = strcmp(argv[2], "-")? bam_open(argv[2], "r") : bam_dopen(fileno(stdin), "r");
	if (in == 0) {
		fprintf(stderr, "[%s] fail to open file %s.\n", __func__, argv[2]);
		return 1;
	}
	bam_reheader(in, h, fileno(stdout));
	bgzf_close(in);
	return 0;
}
Exemple #10
0
int bgzf_check_bgzf(const char *fn)
{
    BGZF *fp;
    uint8_t buf[10],magic[10]="\037\213\010\4\0\0\0\0\0\377";
    int n;

    if ((fp = bgzf_open(fn, "r")) == 0) 
    {
        fprintf(stderr, "[bgzf_check_bgzf] failed to open the file: %s\n",fn);
        return -1;
    }

#ifdef _USE_KNETFILE
    n = knet_read(fp->x.fpr, buf, 10);
#else
    n = fread(buf, 1, 10, fp->file);
#endif
    bgzf_close(fp);

    if ( n!=10 ) 
        return -1;

    if ( !memcmp(magic, buf, 10) ) return 1;
    return 0;
}
abcSmartCounts::~abcSmartCounts(){

  if(doSmartCounts==0)
    return;

  int64_t retVal =bgzf_tell(fbin); 
  int clen = strlen(header->name[curChr]);
  bgzf_write(fbin,&clen,sizeof(int));
  bgzf_write(fbin,header->name[curChr],clen);
  bgzf_write(fbin,&len,sizeof(int));
  for(int i=0;i<4;i++)
    bgzf_write(fbin,counts[i],len);//write len of chr
  
  //write index stuff
  fwrite(&clen,sizeof(int),1,fidx);
  fwrite(header->name[curChr],sizeof(char),clen,fidx);
  fwrite(&len,sizeof(int),1,fidx);
  fwrite(&retVal,sizeof(int64_t),1,fidx);

  
  for(int i=0;i<4;i++)
    delete [] counts[i];
  delete [] counts;

  fclose(fidx);
  bgzf_close(fbin);

}
Exemple #12
0
void fai_destroy(faidx_t *fai)
{
    int i;
    for (i = 0; i < fai->n; ++i) free(fai->name[i]);
    free(fai->name);
    kh_destroy(s, fai->hash);
    if (fai->bgzf) bgzf_close(fai->bgzf);
    free(fai);
}
Exemple #13
0
abcHWE::~abcHWE(){

  if(doHWE==0)
    return;
  if(doHWE>0)
    if(outfileZ!=NULL)
      bgzf_close(outfileZ);
  delete chisq;
}
void BAMbinSortByCoordinate(uint32 iBin, uint binN, uint binS, uint nThreads, string dirBAMsort, Parameters *P) {
      
    if (binS==0) return; //nothing to do for empty bins
    //allocate arrays
    char *bamIn=new char[binS];
    uint *startPos=new uint[binN*3];

    uint bamInBytes=0;
    //load all aligns
    for (uint it=0; it<nThreads; it++) {
        string bamInFile=dirBAMsort+to_string(it)+"/"+to_string((uint) iBin);
        ifstream bamInStream (bamInFile.c_str());
        bamInStream.read(bamIn+bamInBytes,binS);//read the whole file
        bamInBytes += bamInStream.gcount();
        bamInStream.close();
        remove(bamInFile.c_str());
    };
    if (bamInBytes!=binS) {
        ostringstream errOut;
        errOut << "EXITING because of FATAL ERROR: number of bytes expected from the BAM bin does not agree with the actual size on disk: ";
        errOut << binS <<"   "<< bamInBytes <<"   "<< iBin <<"\n";
        exitWithError(errOut.str(),std::cerr, P->inOut->logMain, 1, *P);
    };
  
    //extract coordinates
    
    for (uint ib=0,ia=0;ia<binN;ia++) {
        uint32 *bamIn32=(uint32*) (bamIn+ib);
        startPos[ia*3]  =( ((uint) bamIn32[1]) << 32) | ( (uint)bamIn32[2] );
        startPos[ia*3+2]=ib;      
        ib+=bamIn32[0]+sizeof(uint32);//note that size of the BAM record does not include the size record itself
        startPos[ia*3+1]=*( (uint*) (bamIn+ib) ); //read order
        ib+=sizeof(uint);
    };
        
    //sort
    qsort((void*) startPos, binN, sizeof(uint)*3, funCompareUint2);
    
    BGZF *bgzfBin;
    bgzfBin=bgzf_open((dirBAMsort+"/b"+to_string((uint) iBin)).c_str(),("w"+to_string((long long) P->outBAMcompression)).c_str());
    outBAMwriteHeader(bgzfBin,P->samHeaderSortedCoord,P->chrName,P->chrLength);
    //send ordered aligns to bgzf one-by-one
    for (uint ia=0;ia<binN;ia++) {
        char* ib=bamIn+startPos[ia*3+2];
        bgzf_write(bgzfBin,ib, *((uint32*) ib)+sizeof(uint32) ); 
    };
    
    bgzf_flush(bgzfBin);
    bgzf_close(bgzfBin);
    //release memory
    delete [] bamIn;
    delete [] startPos;
};
Exemple #15
0
void
ifq_destroy_index(ifq_index_t *index)
{
    if( index != NULL )
    {
        cmph_destroy( index->hash );
        munmap( index->table, index->lookup_size );
        fclose( index->hash_file );
        bgzf_close( index->fastq_file );
        close( index->lookup_fd );
    }
}
Exemple #16
0
void
reader_destroy(reader_t *r)
{
  if(NULL == r) return;
  if(0 == r->compress) {
      if(bgzf_close(r->fp_bgzf) < 0) {
          fprintf(stderr, "reader bgzf_close: bug encountered\n");
          exit(1);
      }
  }
  free(r);
}
Exemple #17
0
void dalloc(filt *f){
  for(pMap::iterator it=f->offs.begin();it!=f->offs.end();++it)
    free(it->first);
  f->offs.clear();
  bgzf_close(f->bg);
  fclose(f->fp);
  free(f->keeps);
  free(f->major);
  free(f->minor);
  delete f;
  f=NULL;

}
	void VariantList::printToCompressedVCF(IHeader::SharedPtr headerPtr, bool printHeader, int out)
	{
		BGZF* fp = bgzf_dopen(out, "w");
		if (printHeader)
		{
			bgzf_write(fp, headerPtr->getHeader().c_str(), headerPtr->getHeader().size());
		}
		for(const auto variantPtr : this->m_variant_ptrs)
		{
			bgzf_write(fp, variantPtr->getVariantLine(headerPtr).c_str(), variantPtr->getVariantLine(headerPtr).size());
		}
		bgzf_close(fp);
	}
Exemple #19
0
int main(int argc, char *argv[]) {
    if (argc <= 1) {
        fprintf(stderr, "Usage: thrash_threads1 input.bam\n");
        exit(1);
    }

    int i;
    for (i = 0; i < 10000; i++) {
        printf("i=%d\n", i);
        BGZF *fpin  = bgzf_open(argv[1], "r");
        bgzf_mt(fpin, 2, 256);
        if (bgzf_close(fpin) < 0) abort();
    }
    return 0;
}
Exemple #20
0
abcAsso::~abcAsso(){
  if(doPrint)
    fprintf(stderr,"staring [%s]\t[%s]\n",__FILE__,__FUNCTION__);


  if(doAsso==0)
    return;
  for(int i=0;i<ymat.y;i++)
    if(multiOutfile[i]!=NULL)
      bgzf_close(multiOutfile[i]);
  delete [] multiOutfile;

  if(covfile!=NULL)
    angsd::deleteMatrix(covmat);
  angsd::deleteMatrix(ymat);

}
Exemple #21
0
/*
 * Reads a file and outputs a new BAM file to fd with 'h' replaced as
 * the header.    No checks are made to the validity.
 */
int bam_reheader(BGZF *in, bam_hdr_t *h, int fd,
                 const char *arg_list, int add_PG)
{
    BGZF *fp;
    ssize_t len;
    uint8_t *buf;
    if (in->is_write) return -1;
    buf = malloc(BUF_SIZE);
    if (bam_hdr_read(in) == NULL) {
        fprintf(stderr, "Couldn't read header\n");
        free(buf);
        return -1;
    }
    fp = bgzf_fdopen(fd, "w");

    if (add_PG) {
        // Around the houses, but it'll do until we can manipulate bam_hdr_t natively.
        SAM_hdr *sh = sam_hdr_parse_(h->text, h->l_text);
        if (sam_hdr_add_PG(sh, "samtools",
                           "VN", samtools_version(),
                           arg_list ? "CL": NULL,
                           arg_list ? arg_list : NULL,
                           NULL) != 0)
            return -1;

        free(h->text);
        h->text = strdup(sam_hdr_str(sh));
        h->l_text = sam_hdr_length(sh);
        if (!h->text)
            return -1;
        sam_hdr_free(sh);
    }

    bam_hdr_write(fp, h);
    if (in->block_offset < in->block_length) {
        bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset);
        bgzf_flush(fp);
    }
    while ((len = bgzf_raw_read(in, buf, BUF_SIZE)) > 0)
        bgzf_raw_write(fp, buf, len);
    free(buf);
    fp->block_offset = in->block_offset = 0;
    bgzf_close(fp);
    return 0;
}
Exemple #22
0
int main_getalt(int argc, char *argv[])
{
	int c;
	char *fn;
	BGZF *fp;
	bcf1_t *b;
	bcf_hdr_t *h;
	kstring_t s = {0,0,0};

	while ((c = getopt(argc, argv, "")) >= 0) {
	}
	if (argc - optind == 0) {
		fprintf(stderr, "Usage: bgt getalt <bgt-base>\n");
		return 1;
	}

	fn = (char*)calloc(strlen(argv[optind]) + 5, 1);
	sprintf(fn, "%s.bcf", argv[optind]);
	fp = bgzf_open(fn, "r");
	free(fn);
	assert(fp);

	h = bcf_hdr_read(fp);
	b = bcf_init1();
	while (bcf_read1(fp, b) >= 0) {
		char *ref, *alt;
		int l_ref, l_alt, i, min_l;
		bcf_get_ref_alt1(b, &l_ref, &ref, &l_alt, &alt);
		min_l = l_ref < l_alt? l_ref : l_alt;
		for (i = 0; i < min_l && ref[i] == alt[i]; ++i);
		s.l = 0;
		kputs(h->id[BCF_DT_CTG][b->rid].key, &s);
		kputc(':', &s); kputw(b->pos + 1 + i, &s);
		kputc(':', &s); kputw(b->rlen - i, &s);
		kputc(':', &s); kputsn(alt + i, l_alt - i, &s);
		puts(s.s);
	}
	bcf_destroy1(b);
	bcf_hdr_destroy(h);

	bgzf_close(fp);
	free(s.s);
	return 0;
}
Exemple #23
0
void
writer_destroy(writer_t *w)
{
  if(NULL == w) return;
  if(0 == w->compress) {
      if(fclose(w->fp_file) < 0) {
          fprintf(stderr, "writer bzf_close: bug encountered\n");
          exit(1);
      }
  }
  else {
      if(bgzf_close(w->fp_bgzf) < 0) {
          fprintf(stderr, "writer bzf_close: bug encountered\n");
          exit(1);
      }
      // TODO
  }
  block_pool_destroy(w->pool_local);
  free(w);
}
Exemple #24
0
int bam_reheader(BGZF *in, const bam_header_t *h, int fd)
{
    BGZF *fp;
    bam_header_t *old;
    ssize_t len;
    uint8_t *buf;
    if (in->is_write) return -1;
    buf = malloc(BUF_SIZE);
    old = bam_header_read(in);
    fp = bgzf_fdopen(fd, "w");
    bam_header_write(fp, h);
    if (in->block_offset < in->block_length) {
        bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset);
        bgzf_flush(fp);
    }
    while ((len = bgzf_raw_read(in, buf, BUF_SIZE)) > 0)
        bgzf_raw_write(fp, buf, len);
    free(buf);
    fp->block_offset = in->block_offset = 0;
    bgzf_close(fp);
    return 0;
}
Exemple #25
0
int bgzf_check_bgzf(const char *fn)
{
    BGZF *fp;
    unsigned char buf[10];
    unsigned char magic[]="\037\213\010\4\0\0\0\0\0\377";
    int n;

    if ((fp = bgzf_open(fn, "r")) == 0)
    {
        fprintf(stderr, "[bgzf_check_bgzf] failed to open the file: %s\n",fn);
        return -1;
    }

    n = fread(buf, 1, 10, fp->file);
    bgzf_close(fp);

    if (n != 10)
        return -1;

    if (!memcmp(magic, buf, 10))
        return 1;
    return 0;
}
Exemple #26
0
int main (int argc, char **argv) {
    /////////////////////
    // Parse Arguments //
    /////////////////////
    params *pars = new params;
    init_pars(pars);
    parse_cmd_args(argc, argv, pars);
    if( pars->version ) {
        printf("ngsF v%s\nCompiled on %s @ %s", version, __DATE__, __TIME__);
#ifdef _USE_BGZF
        printf(" (BGZF library)\n");
#else
        printf(" (STD library)\n");
#endif

        exit(0);
    }
    if( pars->verbose >= 1 ) {
        printf("==> Input Arguments:\n");
        printf("\tglf file: %s\n\tinit_values: %s\n\tfreq_fixed: %s\n\tout file: %s\n\tn_ind: %d\n\tn_sites: %lu\n\tchunk_size: %lu\n\tfast_lkl: %s\n\tapprox_EM: %s\n\tcall_geno: %s\n\tmax_iters: %d\n\tmin_epsilon: %.10f\n\tn_threads: %d\n\tseed: %lu\n\tquick: %s\n\tversion: %s\n\tverbose: %d\n\n",
               pars->in_glf, pars->init_values, pars->freq_fixed ? "true":"false", pars->out_file, pars->n_ind, pars->n_sites, pars->max_chunk_size, pars->fast_lkl ? "true":"false", pars->approx_EM ? "true":"false", pars->call_geno ? "true":"false", pars->max_iters, pars->min_epsilon, pars->n_threads, pars->seed, pars->quick ? "true":"false", version, pars->verbose);
    }
    if( pars->verbose > 4 ) printf("==> Verbose values greater than 4 for debugging purpose only. Expect large amounts of info on screen\n");



    /////////////////////
    // Check Arguments //
    /////////////////////
    if(pars->in_glf == NULL)
        error(__FUNCTION__,"GL input file (-glf) missing!");
    else if( strcmp(pars->in_glf, "-") == 0 ) {
        pars->in_glf_type = new char[6];
        pars->in_glf_type = strcat(pars->in_glf_type, "STDIN");
    } else {
        pars->in_glf_type = strrchr(pars->in_glf, '.');
        if(pars->in_glf_type == NULL)
            error(__FUNCTION__,"invalid file type!");
    }
    if(pars->out_file == NULL)
        error(__FUNCTION__,"output file (-out) missing!");
    if(pars->n_ind == 0)
        error(__FUNCTION__,"number of individuals (-n_ind) missing!");
    if(pars->n_sites == 0)
        error(__FUNCTION__,"number of sites (-n_sites) missing!");



    ///////////////////////
    // Check input files //
    ///////////////////////
    // Get file total size
    struct stat st;
    stat(pars->in_glf, &st);
    if( strcmp(pars->in_glf_type, "STDIN") != 0 ) {
        if( pars->n_sites == st.st_size/sizeof(double)/pars->n_ind/3 && strcmp(pars->in_glf_type, ".glf") == 0 ) {
            if(pars->verbose >= 1)
                printf("==> UNCOMP input file (\"%s\"): number of sites (%lu) match expected file size\n", pars->in_glf_type, pars->n_sites);
        } else if( strcmp(pars->in_glf_type, ".glf") != 0 ) {
            if( pars->verbose >= 1)
                printf("==> COMPRESSED input file (\"%s\"): number of sites (%lu) do NOT match expected file size\n", pars->in_glf_type, pars->n_sites);
        } else
            error(__FUNCTION__,"wrong number of sites or invalid/corrupt file!");
    }


    // Adjust max_chunk_size in case of fewer sites
    if(pars->max_chunk_size > pars->n_sites) {
        if( pars->verbose >= 1 ) printf("==> Fewer sites (%lu) than chunk_size (%lu). Reducing chunk size to match number of sites\n", pars->n_sites, pars->max_chunk_size);
        pars->max_chunk_size = pars->n_sites;
    }
    // Calculate total number of chunks
    pars->n_chunks = ceil( (double) pars->n_sites/ (double) pars->max_chunk_size );
    if( pars->verbose >= 1 ) printf("==> Analysis will be run in %ld chunk(s)\n", pars->n_chunks);
    // Alocate memory for the chunk index
    pars->chunks_voffset = new int64_t[pars->n_chunks];
    memset(pars->chunks_voffset, 0, pars->n_chunks*sizeof(int64_t));
    // Adjust thread number to chunks
    if(pars->n_chunks < pars->n_threads) {
        if( pars->verbose >= 1 ) printf("==> Fewer chunks (%ld) than threads (%d). Reducing the number of threads to match number of chunks\n", pars->n_chunks, pars->n_threads);
        pars->n_threads = pars->n_chunks;
    }


    // Open input file
#ifdef _USE_BGZF
    if( pars->verbose >= 1 ) printf("==> Using BGZF I/O library\n");
    // Open BGZIP file
    if( strcmp(pars->in_glf_type, ".bgz") == 0 ) {
        if( (pars->in_glf_fh = bgzf_open(pars->in_glf, "rb")) < 0 )
            error(__FUNCTION__,"Cannot open BGZIP file!");
    } else
        error(__FUNCTION__,"BGZF library only supports BGZIP files!");

    bgzf_set_cache_size(pars->in_glf_fh, CACHE_SIZE * 1024uL * 1024uL * 1024uL);
#else

    if( pars->verbose >= 1 ) printf("==> Using native I/O library\n");
    // Open GLF file
    if( strcmp(pars->in_glf_type, "STDIN") == 0 )
        pars->in_glf_fh = stdin;
    else if( strcmp(pars->in_glf_type, ".glf") == 0 ) {
        if( (pars->in_glf_fh = fopen(pars->in_glf, "rb")) == NULL )
            error(__FUNCTION__,"Cannot open GLF file!");
    } else
        error(__FUNCTION__,"Standard library only supports UNCOMPRESSED GLF files!");

    // Allocate memory and read from the file
    pars->data = new double* [pars->n_sites];
    for(uint64_t s = 0; s < pars->n_sites; s++) {
        pars->data[s] = new double[pars->n_ind * 3];
        if( fread (pars->data[s], sizeof(double), pars->n_ind * 3, pars->in_glf_fh) != pars->n_ind * 3)
            error(__FUNCTION__,"cannot read GLF file!");
        if(pars->call_geno)
            call_geno(pars->data[s], pars->n_ind, 3);
    }
#endif
    if( pars->in_glf_fh == NULL )
        error(__FUNCTION__,"cannot open GLF file!");



    ///////////////////////////////////
    // Declare variables for results //
    ///////////////////////////////////
    out_data *output = new out_data;
    output->site_freq = new double[pars->n_sites];
    output->site_freq_num = new double[pars->n_sites];
    output->site_freq_den = new double[pars->n_sites];
    output->site_prob_var = new double[pars->n_sites];
    output->site_tmpprob_var = new double[pars->n_sites];
    output->indF = new double[pars->n_ind];
    output->indF_num = new double[pars->n_ind];
    output->indF_den = new double[pars->n_ind];
    output->ind_lkl = new double[pars->n_ind];
    // Initialize output
    init_output(pars, output);



    //////////////////
    // Analyze Data //
    //////////////////
    if( pars->verbose >= 1 && !pars->fast_lkl && strcmp("e", pars->init_values) != 0 ) {
        printf("==> Initial LogLkl: %.15f\n", full_HWE_like(pars, output->site_freq, output->indF, 0, pars->n_ind));
        fflush(stdout);
    }
    do_EM(pars, output);
    if( pars->verbose >= 1 ) printf("\nFinal logLkl: %f\n", output->global_lkl);



    //////////////////
    // Print Output //
    //////////////////
    FILE *out_file;
    if( pars->verbose >= 1 ) printf("Printing Output...\n");

    out_file = fopen(pars->out_file, "w");
    if(out_file == NULL)
        error(__FUNCTION__,"Cannot open OUTPUT file!");
    for(uint16_t i = 0; i < pars->n_ind; i++)
        fprintf(out_file,"%f\n", output->indF[i]);
    fclose(out_file);



    //////////////////////
    // Close Input File //
    //////////////////////
    if( pars->verbose >= 1 ) printf("Exiting...\n");
#ifdef _USE_BGZF
    bgzf_close(pars->in_glf_fh);
#else
    for(uint64_t s = 0; s < pars->n_sites; s++)
        delete [] pars->data[s];
    delete [] pars->data;
    fclose(pars->in_glf_fh);
#endif



    /////////////////
    // Free Memory //
    /////////////////
    delete [] output->site_freq;
    delete [] output->site_freq_num;
    delete [] output->site_freq_den;
    delete [] output->site_prob_var;
    delete [] output->indF;
    delete [] output->indF_num;
    delete [] output->indF_den;
    delete [] output->ind_lkl;
    delete output;
    //if( strcmp("e", pars->init_values) == 0 )
    //delete [] pars->init_values;
    delete [] pars->chunks_voffset;
    delete pars;

    return 0;
}
Exemple #27
0
static void naive_concat(args_t *args)
{
    // only compressed BCF atm
    BGZF *bgzf_out = bgzf_open(args->output_fname,"w");;

    const size_t page_size = 32768;
    char *buf = (char*) malloc(page_size);
    kstring_t tmp = {0,0,0};
    int i;
    for (i=0; i<args->nfnames; i++)
    {
        htsFile *hts_fp = hts_open(args->fnames[i],"r");
        if ( !hts_fp ) error("Failed to open: %s\n", args->fnames[i]);
        htsFormat type = *hts_get_format(hts_fp);

        if ( type.format==vcf ) error("The --naive option currently works only for compressed BCFs, sorry :-/\n");
        if ( type.compression!=bgzf ) error("The --naive option currently works only for compressed BCFs, sorry :-/\n");

        BGZF *fp = hts_get_bgzfp(hts_fp);
        if ( !fp || bgzf_read_block(fp) != 0 || !fp->block_length )
            error("Failed to read %s: %s\n", args->fnames[i], strerror(errno));

        uint8_t magic[5];
        if ( bgzf_read(fp, magic, 5) != 5 ) error("Failed to read the BCF header in %s\n", args->fnames[i]);
        if (strncmp((char*)magic, "BCF\2\2", 5) != 0) error("Invalid BCF magic string in %s\n", args->fnames[i]);

        if ( bgzf_read(fp, &tmp.l, 4) != 4 ) error("Failed to read the BCF header in %s\n", args->fnames[i]);
        hts_expand(char,tmp.l,tmp.m,tmp.s);
        if ( bgzf_read(fp, tmp.s, tmp.l) != tmp.l ) error("Failed to read the BCF header in %s\n", args->fnames[i]);

        // write only the first header
        if ( i==0 )
        {
            if ( bgzf_write(bgzf_out, "BCF\2\2", 5) !=5 ) error("Failed to write %d bytes to %s\n", 5,args->output_fname);
            if ( bgzf_write(bgzf_out, &tmp.l, 4) !=4 ) error("Failed to write %d bytes to %s\n", 4,args->output_fname);
            if ( bgzf_write(bgzf_out, tmp.s, tmp.l) != tmp.l) error("Failed to write %d bytes to %s\n", tmp.l,args->output_fname);
        }

        // Output all non-header data that were read together with the header block
        int nskip = fp->block_offset;
        if ( fp->block_length - nskip > 0 )
        {
            if ( bgzf_write(bgzf_out, fp->uncompressed_block+nskip, fp->block_length-nskip)<0 ) error("Error: %d\n",fp->errcode);
        }
        if ( bgzf_flush(bgzf_out)<0 ) error("Error: %d\n",bgzf_out->errcode);


        // Stream the rest of the file as it is, without recompressing, but remove BGZF EOF blocks
        ssize_t nread, ncached = 0, nwr;
        const int neof = 28;
        char cached[neof];
        while (1)
        {
            nread = bgzf_raw_read(fp, buf, page_size);

            // page_size boundary may occur in the middle of the EOF block, so we need to cache the blocks' ends
            if ( nread<=0 ) break;
            if ( nread<=neof )      // last block
            {
                if ( ncached )
                {
                    // flush the part of the cache that won't be needed
                    nwr = bgzf_raw_write(bgzf_out, cached, nread);
                    if (nwr != nread) error("Write failed, wrote %d instead of %d bytes.\n", nwr,(int)nread);

                    // make space in the cache so that we can append to the end
                    if ( nread!=neof ) memmove(cached,cached+nread,neof-nread);
                }

                // fill the cache and check for eof outside this loop
                memcpy(cached+neof-nread,buf,nread);
                break;
            }

            // not the last block, flush the cache if full
            if ( ncached )
            {
                nwr = bgzf_raw_write(bgzf_out, cached, ncached);
                if (nwr != ncached) error("Write failed, wrote %d instead of %d bytes.\n", nwr,(int)ncached);
                ncached = 0;
            }

            // fill the cache
            nread -= neof;
            memcpy(cached,buf+nread,neof);
            ncached = neof;

            nwr = bgzf_raw_write(bgzf_out, buf, nread);
            if (nwr != nread) error("Write failed, wrote %d instead of %d bytes.\n", nwr,(int)nread);
        }
        if ( ncached && memcmp(cached,"\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\033\0\3\0\0\0\0\0\0\0\0\0",neof) )
        {
            nwr = bgzf_raw_write(bgzf_out, cached, neof);
            if (nwr != neof) error("Write failed, wrote %d instead of %d bytes.\n", nwr,(int)neof);
        }
        if (hts_close(hts_fp)) error("Close failed: %s\n",args->fnames[i]);
    }
    free(buf);
    free(tmp.s);
    if (bgzf_close(bgzf_out) < 0) error("Error: %d\n",bgzf_out->errcode);
}
Exemple #28
0
int reheader_file(const char *header, const char *file, int meta)
{
    BGZF *fp = bgzf_open(file,"r");
    if (bgzf_read_block(fp) != 0 || !fp->block_length)
        return -1;
    
    char *buffer = fp->uncompressed_block;
    int skip_until = 0;

    if ( buffer[0]==meta )
    {
        skip_until = 1;

        // Skip the header
        while (1)
        {
            if ( buffer[skip_until]=='\n' )
            {
                skip_until++;
                if ( skip_until>=fp->block_length )
                {
                    if (bgzf_read_block(fp) != 0 || !fp->block_length)
                        error("no body?\n");
                    skip_until = 0;
                }
                // The header has finished
                if ( buffer[skip_until]!=meta ) break;
            }
            skip_until++;
            if ( skip_until>=fp->block_length )
            {
                if (bgzf_read_block(fp) != 0 || !fp->block_length)
                    error("no body?\n");
                skip_until = 0;
            }
        }
    }

    FILE *fh = fopen(header,"r");
    if ( !fh )
        error("%s: %s", header,strerror(errno));
    int page_size = getpagesize();
    char *buf = valloc(page_size);
    BGZF *bgzf_out = bgzf_fdopen(fileno(stdout), "w");
    ssize_t nread;
    while ( (nread=fread(buf,1,page_size-1,fh))>0 )
    {
        if ( nread<page_size-1 && buf[nread-1]!='\n' )
            buf[nread++] = '\n';
        if (bgzf_write(bgzf_out, buf, nread) < 0) error("Error: %s\n",bgzf_out->error);
    }
    fclose(fh);

    if ( fp->block_length - skip_until > 0 )
    {
        if (bgzf_write(bgzf_out, buffer+skip_until, fp->block_length-skip_until) < 0) 
            error("Error: %s\n",fp->error);
    }
    if (bgzf_flush(bgzf_out) < 0) 
        error("Error: %s\n",bgzf_out->error);

    while (1)
    {
#ifdef _USE_KNETFILE
        nread = knet_read(fp->x.fpr, buf, page_size);
#else
        nread = fread(buf, 1, page_size, fp->file);
#endif
        if ( nread<=0 ) 
            break;

#ifdef _USE_KNETFILE
        int count = fwrite(buf, 1, nread, bgzf_out->x.fpw);
#else
        int count = fwrite(buf, 1, nread, bgzf_out->file);
#endif
        if (count != nread)
            error("Write failed, wrote %d instead of %d bytes.\n", count,(int)nread);
    }

    if (bgzf_close(bgzf_out) < 0) 
        error("Error: %s\n",bgzf_out->error);
   
    return 0;
}
Exemple #29
0
void ReadDB::import_reads(const std::string& input_filename, const std::string& out_fasta_filename)
{
    // Open readers
    FILE* read_fp = fopen(input_filename.c_str(), "r");
    if(read_fp == NULL) {
        fprintf(stderr, "error: could not open %s for read\n", input_filename.c_str());
        exit(EXIT_FAILURE);
    }

    gzFile gz_read_fp = gzdopen(fileno(read_fp), "r");
    if(gz_read_fp == NULL) {
        fprintf(stderr, "error: could not open %s using gzdopen\n", input_filename.c_str());
        exit(EXIT_FAILURE);
    }

    // Open writers
    FILE* write_fp = fopen(out_fasta_filename.c_str(), "w");
    if(write_fp == NULL) {
        fprintf(stderr, "error: could not open %s for write\n", out_fasta_filename.c_str());
        exit(EXIT_FAILURE);
    }

    BGZF* bgzf_write_fp = bgzf_dopen(fileno(write_fp), "w");
    if(bgzf_write_fp == NULL) {
        fprintf(stderr, "error: could not open %s for bgzipped write\n", out_fasta_filename.c_str());
        exit(EXIT_FAILURE);
    }

    // read input sequences, add to DB and convert to fasta
    int ret = 0;
    kseq_t* seq = kseq_init(gz_read_fp);
    while((ret = kseq_read(seq)) >= 0) {

        // Check for a path to the fast5 file in the comment of the read
        std::string path = "";
        if(seq->comment.l > 0) {

            // This splitting code implicitly handles both the 2 and 3 field
            // fasta format that poretools will output. The FAST5 path
            // is always the last field.
            std::vector<std::string> fields = split(seq->comment.s, ' ');
            path = fields.back();

            // as a sanity check we require the path name to end in ".fast5"
            if(path.length() < 6 || path.substr(path.length() - 6) != ".fast5") {
                path = "";
            }
        }
        
        // sanity check that the read does not exist in the database
        // JTS 04/2019: changed error to warning to account for duplicate reads coming out of
        // some versions of guppy.
        auto iter = m_data.find(seq->name.s);
        if(iter != m_data.end()) {
            fprintf(stderr, "Warning: duplicate read name %s found in fasta file\n", seq->name.s);
            continue;
        }
        
        // add path
        add_signal_path(seq->name.s, path);

        // write sequence in gzipped fasta for fai indexing later
        std::string out_record;
        out_record += ">";
        out_record += seq->name.s;
        out_record += "\n";
        out_record += seq->seq.s;
        out_record += "\n";
        size_t write_length = bgzf_write(bgzf_write_fp, out_record.c_str(), out_record.length());
        if(write_length != out_record.length()) {
            fprintf(stderr, "error in bgzf_write, aborting\n");
            exit(EXIT_FAILURE);
        }
    }

    // check for abnormal exit conditions
    if(ret <= -2) {
        fprintf(stderr, "kseq_read returned %d indicating an error with the input file %s\n", ret, input_filename.c_str());
        exit(EXIT_FAILURE);
    }

    // cleanup
    kseq_destroy(seq);
    
    gzclose(gz_read_fp);
    fclose(read_fp);

    bgzf_close(bgzf_write_fp);
    fclose(write_fp);
}
Exemple #30
0
void signalFromBAM(const string bamFileName, const string sigFileName, Parameters P) {

    bam1_t *bamA;
    bamA=bam_init1();

    double nMult=0, nUniq=0;

    if (P.outWigFlags.norm==1) {//count reads in the BAM file
        BGZF *bamIn=bgzf_open(bamFileName.c_str(),"r");
        bam_hdr_t *bamHeader=bam_hdr_read(bamIn);
        while ( true ) {//until the end of file
            int bamBytes1=bam_read1(bamIn, bamA);
            if (bamBytes1<0) break; //end of file
            if (bamA->core.tid<0) continue; //unmapped read
//             if ( !std::regex_match(chrName.at(bamA->core.tid),std::regex(P.outWigReferencesPrefix))) continue; //reference does not mathc required references
            if ( P.outWigReferencesPrefix!="-" && (P.outWigReferencesPrefix.compare(0,P.outWigReferencesPrefix.size(),bamHeader->target_name[bamA->core.tid],P.outWigReferencesPrefix.size())!=0) ) continue; //reference does not match required references

            uint8_t* aNHp=bam_aux_get(bamA,"NH");
            if (aNHp!=NULL) {
                uint32_t aNH=bam_aux2i(aNHp);
                if (aNH==1) {//unique mappers
                    ++nUniq;
                } else if (aNH>1) {
                    nMult+=1.0/aNH;
                };
            };
        };
        bgzf_close(bamIn);
    };

    BGZF *bamIn=bgzf_open(bamFileName.c_str(),"r");
    bam_hdr_t *bamHeader=bam_hdr_read(bamIn);

    int sigN=P.outWigFlags.strand ? 4 : 2;

    double *normFactor=new double[sigN];

    ofstream **sigOutAll=new ofstream* [sigN];

    string* sigOutFileName=new string[sigN];
    sigOutFileName[0]=sigFileName+".Unique.str1.out";
    sigOutFileName[1]=sigFileName+".UniqueMultiple.str1.out";
    if (P.outWigFlags.strand) {
        sigOutFileName[2]=sigFileName+".Unique.str2.out";
        sigOutFileName[3]=sigFileName+".UniqueMultiple.str2.out";
    };

    for (int ii=0; ii<sigN; ii++) {
        sigOutFileName[ii]+= (P.outWigFlags.format==0 ? ".bg" : ".wig");
        sigOutAll[ii]=new ofstream ( sigOutFileName[ii].c_str() );
    };

    if (P.outWigFlags.norm==0) {//raw counts
        normFactor[0]=1;
        normFactor[1]=1;
    } else if (P.outWigFlags.norm==1) {//normlaized
        normFactor[0]=1.0e6 / nUniq;
        normFactor[1]=1.0e6 / (nUniq+nMult);
        for (int is=0;is<sigN;is++) {//formatting double output
            *sigOutAll[is]<<setiosflags(ios::fixed) << setprecision(5);
        };
    };
    if (P.outWigFlags.strand) {
        normFactor[2]=normFactor[0];
        normFactor[3]=normFactor[1];
    };


    int iChr=-999;
    double *sigAll=NULL;
    uint32_t chrLen=0;
    while ( true ) {//until the end of file
        int bamBytes1=bam_read1(bamIn, bamA);
        if (bamA->core.tid!=iChr || bamBytes1<0) {
            //output to file
            if (iChr!=-999) {//iChr=-999 marks chromosomes that are not output, including unmapped reads
                for (int is=0;is<sigN;is++) {
                    if (P.outWigFlags.format==1) {
                        *sigOutAll[is] <<"variableStep chrom="<<bamHeader->target_name[iChr] <<"\n";
                    };
                    double prevSig=0;
                    for (uint32_t ig=0;ig<chrLen;ig++) {
                        double newSig=sigAll[sigN*ig+is];
                        if (P.outWigFlags.format==0) {//bedGraph
                            if (newSig!=prevSig) {
                                if (prevSig!=0) {//finish previous record
                                    *sigOutAll[is] <<ig<<"\t"<<prevSig*normFactor[is] <<"\n"; //1-based end
                                };
                                if (newSig!=0) {
                                    *sigOutAll[is] << bamHeader->target_name[iChr] <<"\t"<< ig <<"\t"; //0-based beginning
                                };
                                prevSig=newSig;
                            };
                        } else if (P.outWigFlags.format==1){//wiggle
                            if (newSig!=0) {
                                *sigOutAll[is] <<ig+1<<"\t"<<newSig*normFactor[is] <<"\n";
                            };
                        };
                    };
                };
            };
            if (bamBytes1<0) {//no more reads
                break;
            };

            iChr=bamA->core.tid;
            if ( iChr==-1 || (P.outWigReferencesPrefix!="-" && (P.outWigReferencesPrefix.compare(0,P.outWigReferencesPrefix.size(),bamHeader->target_name[bamA->core.tid],P.outWigReferencesPrefix.size())!=0) ) ) {
                iChr=-999;
                continue; //reference does not match required references
            };

            chrLen=bamHeader->target_len[iChr]+1;//one extra base at the end which sohuld always be 0
            delete [] sigAll;
            sigAll= new double[sigN*chrLen];
            memset(sigAll, 0, sizeof(*sigAll)*sigN*chrLen);
        };

//         uint32_t nCigar =(bamA->core.flag<<16)>>16;
//         uint32_t mapFlag=bamA->core.flag>>16;
//         uint32_t mapQ=(bamA->core.flag<<16)>>24;

        #define BAM_CIGAR_OperationShift 4
        #define BAM_CIGAR_LengthBits 28
        #define BAM_CIGAR_M 0
        #define BAM_CIGAR_I 1
        #define BAM_CIGAR_D 2
        #define BAM_CIGAR_N 3
        #define BAM_CIGAR_S 4
        #define BAM_CIGAR_H 5
        #define BAM_CIGAR_P 6
        #define BAM_CIGAR_EQ 7
        #define BAM_CIGAR_X 8

        //by default, alignments marked as duplicate are not processed
        if ( (bamA->core.flag & 0x400) > 0 ) continue;

        //NH attribute
        uint8_t* aNHp=bam_aux_get(bamA,"NH");
        uint32_t aNH;
        if (aNHp==NULL) {
            aNH=1; //no NH tag: assume NH=1
            //continue; //do not process lines without NH field
        } else {
            aNH=bam_aux2i(bam_aux_get(bamA,"NH")); //write a safer function allowing for lacking NH tag
        };
        if (aNH==0) continue; //do not process lines without NH=0
        uint32_t aG=bamA->core.pos;
        uint32_t iStrand=0;
        if (P.outWigFlags.strand) {//strand for stranded data from SAM flag
            iStrand= ( (bamA->core.flag & 0x10) > 0 ) == ( (bamA->core.flag & 0x80) == 0 );//0/1 for +/-
        };
        if (P.outWigFlags.type==1) {//5' of the1st read signal only, RAMPAGE/CAGE
            if ( (bamA->core.flag & 0x80)>0) continue; //skip if this the second mate
            if (iStrand==0) {
                if (aNH==1) {//unique mappers
                    sigAll[aG*sigN+0+2*iStrand]++;
                };
                sigAll[aG*sigN+1+2*iStrand]+=1.0/aNH;//U+M, normalized by the number of multi-mapping loci
                continue; //record only the first position
            };
        };

        uint32_t* cigar=(uint32_t*) (bamA->data+bamA->core.l_qname);

        for (uint32_t ic=0; ic<bamA->core.n_cigar; ic++) {
            uint32_t cigOp=(cigar[ic]<<BAM_CIGAR_LengthBits)>>BAM_CIGAR_LengthBits;
            uint32_t cigL=cigar[ic]>>BAM_CIGAR_OperationShift;
            switch (cigOp) {
                case(BAM_CIGAR_D):
                case(BAM_CIGAR_N):
                    aG+=cigL;
                    break;
                case(BAM_CIGAR_M):
                    if (P.outWigFlags.type==0 || (P.outWigFlags.type==2 && (bamA->core.flag & 0x80)>0 )) {//full signal, or second mate onyl signal
                        for (uint32_t ig=0;ig<cigL;ig++) {
                            if (aG>=chrLen) {
                                cerr << "BUG: alignment extends past chromosome in signalFromBAM.cpp\n";
                                exit(-1);
                            };
                            if (aNH==1) {//unique mappers
                                sigAll[aG*sigN+0+2*iStrand]++;
                            };
                            sigAll[aG*sigN+1+2*iStrand]+=1.0/aNH;//U+M, normalized by the number of multi-mapping loci
                            aG++;
                        };
                    } else {
                        aG+=cigL;
                    };
            };
        };
        if (P.outWigFlags.type==1) {//full signal
            --aG;
            if (aNH==1) {//unique mappers
                sigAll[aG*sigN+0+2*iStrand]++;
            };
            sigAll[aG*sigN+1+2*iStrand]+=1.0/aNH;//U+M, normalized by the number of multi-mapping loci
        };
    };
    delete [] sigAll;

    for (int is=0; is<sigN; is++) {// flush/close all signal files
        sigOutAll[is]->flush();
        sigOutAll[is]->close();
    };
};