Esempio n. 1
0
static BAM_FILE _bamfile_open_r(SEXP filename, SEXP indexname, SEXP filemode)
{
    BAM_FILE bfile = (BAM_FILE) Calloc(1, _BAM_FILE);

    bfile->file = NULL;
    if (0 != Rf_length(filename)) {
        const char *cfile = translateChar(STRING_ELT(filename, 0));
        bfile->file = _bam_tryopen(cfile, CHAR(STRING_ELT(filemode, 0)), 0);
        if ((bfile->file->type & TYPE_BAM) != 1) {
            samclose(bfile->file);
            Free(bfile);
            Rf_error("'filename' is not a BAM file\n  file: %s", cfile);
        }
        bfile->pos0 = bam_tell(bfile->file->x.bam);
        bfile->irange0 = 0;
    }

    bfile->index = NULL;
    if (0 != Rf_length(indexname)) {
        const char *cindex = translateChar(STRING_ELT(indexname, 0));
        bfile->index = _bam_tryindexload(cindex);
        if (NULL == bfile->index) {
            samclose(bfile->file);
            Free(bfile);
            Rf_error("failed to open BAM index\n  index: %s\n", cindex);
        }
    }

    bfile->iter = NULL;
    bfile->pbuffer = NULL;
    return bfile;
}
Esempio n. 2
0
static void _check_quality(char *OUTPUT_PREFIX, int WRITE_LOWQ, int WRITE_SPLITREAD, int MAPPING_QUALITY, int MIN_ALIGNED_PCT, int IGNORE_DUPLICATES) {
  bam1_t *b1 = bam_init1(), *b2 = bam_init1();;
  if (WRITE_SPLITREAD) {
    samfile_t *split_file = b2g_samfile_open("%s_splitread.bam", "rb", 0, OUTPUT_PREFIX);
    while (-1 < samread(split_file, b1)) {
      samread(split_file, b2);
      assert(b2g_bam_pair_split(b1, b2, MAPPING_QUALITY, MIN_ALIGNED_PCT, IGNORE_DUPLICATES));
    }
    samclose(split_file);
  }
  if (WRITE_LOWQ) {
    samfile_t *lowq_file = b2g_samfile_open("%s_lowqual.bam", "rb", 0, OUTPUT_PREFIX);
    while (-1 < samread(lowq_file, b1)) {
      samread(lowq_file, b2);
      if (WRITE_SPLITREAD) {
	assert(!b2g_bam_pair_split(b1, b2, MAPPING_QUALITY, MIN_ALIGNED_PCT, IGNORE_DUPLICATES));
	assert(!b2g_bams_highq(b1, b2, MAPPING_QUALITY, MIN_ALIGNED_PCT, IGNORE_DUPLICATES));
      }
      else assert(!b2g_bams_highq(b1, b2, MAPPING_QUALITY, MIN_ALIGNED_PCT, IGNORE_DUPLICATES) || b2g_bam_pair_split(b1, b2, MAPPING_QUALITY, MIN_ALIGNED_PCT, IGNORE_DUPLICATES));
    }
    samclose(lowq_file);
  }

  bam_destroy1(b1);
  bam_destroy1(b2);
}
Esempio n. 3
0
// Splits a sam file into individual files, one per chromosome. The files are created in the specified directory.
// Each splitted file contains the same header as the original samFile. The generated files may only contain
// a header if no alignments to that chromosome exist. The names of the split files come from the sam header
// with an additional .sam extension. The unmapped alignments are collected in the file splitChrSam_unaligned.sam
// Returns the chromosome names in the order in which they occur in the sam file header
SEXP split_sam_chr(SEXP samFile, SEXP outDir)
{
  if (!Rf_isString(samFile) || 1 != Rf_length(samFile)){
    Rf_error("'samFile' must be character(1)");
  }

  if (!Rf_isString(outDir) || 1 != Rf_length(outDir)){
    Rf_error("'outDir' must be character(1)");
  }

  const char * sam_file =  Rf_translateChar(STRING_ELT(samFile, 0));
  const char * out_dir =  Rf_translateChar(STRING_ELT(outDir, 0));

  // open the input sam file
  samfile_t *fin = _bam_tryopen(sam_file, "r", NULL);
  if (fin->header == 0) {
    samclose(fin);
    Rf_error("invalid header");
  }

  // remove \r from header if exists (for windows)
  int j, k = 0;
  for(j = 0; j<fin->header->l_text; j++){
    if(fin->header->text[j] != '\r'){
      fin->header->text[k++] = fin->header->text[j];
    }
  }
  if(j != k){
    fin->header->text[k] = '\0';
    fin->header->l_text = (uint32_t)strlen(fin->header->text);
  }

  // allocate memory for a list of filehandles (n+1 because of the unaligned reads)
  samfile_t **foutList = (samfile_t**)calloc((size_t)(fin->header->n_targets+1), sizeof(samfile_t*));

  // open the output file handles (n+1 due to the unaligned reads)
  int i;
  SEXP chrNames;
  PROTECT(chrNames = allocVector(STRSXP, (fin->header->n_targets+1))); // protect from garbage collector

  for (i = 0; i < (fin->header->n_targets); i++) {
    foutList[i] = _bam_tryopen(_assemble_file_name(out_dir,fin->header->target_name[i]), "wh", fin->header);
    SET_STRING_ELT(chrNames, i, mkChar(fin->header->target_name[i]));
  }
  foutList[fin->header->n_targets] = _bam_tryopen(_assemble_file_name(out_dir,"splitChrSam_unaligned"), "wh", fin->header);
  SET_STRING_ELT(chrNames, fin->header->n_targets, mkChar("splitChrSam_unaligned"));

  // split the sam file based on chromosome
  _walk_through_sam_and_split(fin,foutList);

  // close all the file handles
  for (i = 0; i < (fin->header->n_targets+1); i++){samclose(foutList[i]);}
  samclose(fin);

  UNPROTECT(1); // release
  return chrNames;
}
Esempio n. 4
0
/**
 * DATE: 2010-7-29
 * FUNCTION: close the sam/bam file
 * PARAMETER: void
 * RETURN: void
 */
void SamCtrl::close() 
{
	if (m_fn_list != 0)
		delete m_fn_list;
	if (m_in != 0)
		samclose(m_in);
	if (m_out != 0)
		samclose(m_out);
	m_in = 0;
	m_out = 0;
	m_fn_list = 0;
}
Esempio n. 5
0
// Releases all resources and passes the error through for easy chaining with return.
static b2g_error_t _cleanup(samfile_t *in, FILE *chromosome_naming_file, FILE *cutoff_file, samfile_t *lowq_file, samfile_t *split_file, FILE *info_file, FILE *gasv_file, FILE *gasvpro_file, hash_t *pairtable, b2g_error_t ret) {
  if (in) samclose(in);
  if (chromosome_naming_file) fclose(chromosome_naming_file);
  if (cutoff_file) fclose(cutoff_file);
  if (lowq_file) samclose(lowq_file);
  if (split_file) samclose(split_file);
  if (info_file) fclose(info_file);
  if (gasv_file) fclose(gasv_file);
  if (gasvpro_file) fclose(gasvpro_file);
  if (pairtable) hash_free(pairtable, NULL);
  return ret;
}
Esempio n. 6
0
hash_table* hash_ids(const char* fn)
{
    fprintf(stderr, "hashing ... \n");

    hash_table* T = create_hash_table();

    samfile_t* f = samopen(fn, "rb", NULL);
    if (f == NULL) {
        fprintf(stderr, "can't open bam file %s\n", fn);
        exit(1);
    }

    bam1_t* b = bam_init1();

    uint32_t n = 0;

    while (samread(f, b) >= 0) {
        if (++n % 1000000 == 0) {
            fprintf(stderr, "\t%d reads\n", n);
        }

        inc_hash_table(T, bam1_qname(b), b->core.l_qname);
    }

    bam_destroy1(b);
    samclose(f);

    fprintf(stderr, "done.\n");
    return T;
}
Esempio n. 7
0
bam_streamer::
~bam_streamer()
{
    if (nullptr != _hitr) hts_itr_destroy(_hitr);
    if (nullptr != _hidx) hts_idx_destroy(_hidx);
    if (nullptr != _bfp) samclose(_bfp);
}
Esempio n. 8
0
bam_streamer::
~bam_streamer()
{
    if (NULL != _biter) bam_iter_destroy(_biter);
    if (NULL != _bidx) bam_index_destroy(_bidx);
    if (NULL != _bfp) samclose(_bfp);
}
Esempio n. 9
0
void samToOpenBed(char *samIn, FILE *f)
/* Like samToOpenBed, but the output is the already open file f. */
{
    samfile_t *sf = samopen(samIn, "r", NULL);
    bam_header_t *bamHeader = sf->header;
    bam1_t one;
    ZeroVar(&one);
    int err;
    while ((err = samread(sf, &one)) >= 0)
    {
        int32_t tid = one.core.tid;
        if (tid < 0)
            continue;
        char *chrom = bamHeader->target_name[tid];
        // Approximate here... can do better if parse cigar.
        int start = one.core.pos;
        int size = one.core.l_qseq;
        int end = start + size;
        boolean isRc = (one.core.flag & BAM_FREVERSE);
        char strand = '+';
        if (isRc)
        {
            strand = '-';
            reverseIntRange(&start, &end, bamHeader->target_len[tid]);
        }
        fprintf(f, "%s\t%d\t%d\t.\t0\t%c\n", chrom, start, end, strand);
    }
    if (err < 0 && err != -1)
        errnoAbort("samread err %d", err);
    samclose(sf);
}
Esempio n. 10
0
File: SAM.cpp Progetto: vezzi/ERNE
void SAM::close() {
	if (sam_file != NULL) {
		samfile_is_open = false;
		samclose(sam_file);
		sam_file = NULL;
	}
}
Esempio n. 11
0
bool YTranscriptFetcher::fetchBAMTranscripts(const char* filename, const char *refName, unsigned int start, unsigned int end, std::vector<YTranscript*> *transcripts,std::set<std::string> *transcriptNames) {
    //Open the region in the bam file

    fetch_data_t data;
    fetch_data_t *d = &data;
    d->beg = start-1-buffer;
    d->end = end+buffer;

    d->transcripts = transcripts;
    d->requestedTranscripts = transcriptNames;
    d->in = samopen(filename, "rb", 0);

    if (d->in == 0) {
        fprintf(stderr, "Failed to open BAM file %s\n", filename);
        return 0;
    }
    bam_index_t *idx;
    idx = bam_index_load(filename); // load BAM index
    if (idx == 0) {
        fprintf(stderr, "BAM indexing file is not available.\n");
        return 0;
    }
    bam_init_header_hash(d->in->header);
    d->tid = bam_get_tid(d->in->header, refName);
    if(d->tid == -1) {
        fprintf(stderr, "Reference id %s not found in BAM file",refName);
        return 0;
    }
    bam_fetch(d->in->x.bam, idx, d->tid, d->beg, d->end, d, fetch_func);
    bam_index_destroy(idx);
    samclose(d->in);
    return 1;
}
Esempio n. 12
0
File: SAM.cpp Progetto: vezzi/ERNE
/*static*/ bam_header_t * SAM::update_header_from_list(bam_header_t *header, names_list_t & list) {
	Temporary_File samfile;
	samfile.close_file();
	samfile_t * sf = samopen(samfile.get_filename().c_str(),"wh",header);
	samclose(sf);

	Temporary_File tempfile;
	ofstream &output = tempfile.get_stream();

	ifstream input(samfile.get_filename().c_str());
	string temp;
	while (not input.eof()) {
		getline(input,temp);
		if ((temp.size() >= 3) and (temp[0] != '@' or temp[1] != 'S' or temp[2] != 'Q'))
			output << temp << '\n';
	}

	for (names_list_t::iterator iter = list.begin(); iter != list.end(); iter++)
		output << "@SQ\tSN:" << iter->first << "\tLN:" << iter->second << '\n';
	tempfile.close_file();

	tamFile fp = sam_open(tempfile.get_filename().c_str());

	bam_header_t * newheader = sam_header_read(fp);
	sam_close(fp);

	return newheader;
}
Esempio n. 13
0
boolean bamFileExists(char *fileOrUrl)
/* Return TRUE if we can successfully open the bam file and its index file. */
{
    char *bamFileName = fileOrUrl;
    samfile_t *fh = samopen(bamFileName, "rb", NULL);
    boolean usingUrl = TRUE;
    usingUrl = (strstr(fileOrUrl, "tp://") || strstr(fileOrUrl, "https://"));
    if (fh != NULL)
    {
#ifndef KNETFILE_HOOKS
        // When file is an URL, this caches the index file in addition to validating:
        // Since samtools's url-handling code saves the .bai file to the current directory,
        // chdir to a trash directory before calling bam_index_load, then chdir back.
        char *runDir = getCurrentDir();
        char *samDir = getSamDir();
        if (usingUrl)
            setCurrentDir(samDir);
#endif//ndef KNETFILE_HOOKS
        bam_index_t *idx = bam_index_load(bamFileName);
#ifndef KNETFILE_HOOKS
        if (usingUrl)
            setCurrentDir(runDir);
#endif//ndef KNETFILE_HOOKS
        samclose(fh);
        if (idx == NULL)
        {
            warn("bamFileExists: failed to read index corresponding to %s", bamFileName);
            return FALSE;
        }
        free(idx); // Not freeMem, freez etc -- sam just uses malloc/calloc.
        return TRUE;
    }
    return FALSE;
}
int main(int argc, char* argv[]) {
    samfile_t *ifile = NULL, *ofile = NULL;
    bam1_t *read = bam_init1();
    int keep = 0;
    char *p = NULL;

    //Open input file, either SAM or BAM
    p = strrchr(argv[1], '.');
    if(strcmp(p, ".bam") == 0) {
        ifile = samopen(argv[1], "rb", NULL);
    } else {
        ifile = samopen(argv[1], "r", NULL);
    }

    bam_header_t *head = ifile->header;

    //Open output file
    // ofile = samopen("AND_type.bam", "wb", ifile->header);
    ofile = samopen(argv[2], "wb", ifile->header);


    //Iterate through the lines
    while(samread(ifile, read) > 1) {
        keep = 0;
        //Is the read's mate on the same chromosome/contig?
        if(read->core.tid == read->core.mtid) {
            //Are the mates on opposite strands?
            if(read->core.flag & BAM_FREVERSE && !(read->core.flag & BAM_FMREVERSE)) {
                if(read->core.pos < read->core.mpos) {
                    // Are mates 500 bp or less from the ends?
                    if (read-> core.pos <= 500 && read->core.mpos > head->target_len[read->core.tid] - 500)
                        keep=1;
                }
            } else if(!(read->core.flag & BAM_FREVERSE) && read->core.flag & BAM_FMREVERSE) {
                if(read->core.mpos < read->core.pos) {
                    if (read-> core.mpos <= 500 && read->core.pos > head->target_len[read->core.tid] - 500)
                        keep=1;
                }
            }
        }
        if(keep) samwrite(ofile, read);
    }
    bam_destroy1(read);
    samclose(ifile);
    samclose(ofile);
    return 0;
}
Esempio n. 15
0
void gt_samfile_iterator_delete(GtSamfileIterator *s_iter)
{
  if (!s_iter) return;
  samclose(s_iter->samfile);
  if (s_iter->current_alignment)
    gt_sam_alignment_delete(s_iter->current_alignment);
  gt_alphabet_delete(s_iter->alphabet);
  gt_free(s_iter);
}
Esempio n. 16
0
void bamClose(samfile_t **pSamFile)
/* Close down a samefile_t */
{
    if (pSamFile != NULL)
    {
        samclose(*pSamFile);
        *pSamFile = NULL;
    }
}
Esempio n. 17
0
int sam_fetch(char *ifn, char *ofn, char *reg, void *data, sam_fetch_f func) {
	int ret = 0;
	samfile_t *in = samopen(ifn, "rb", 0);
	samfile_t *out = 0;
	if (ofn) out = samopen(ofn, "wb", in->header);

	if (reg) {
		bam_index_t *idx = bam_index_load(ifn);
		if (idx == 0) {
			fprintf(stderr, "[%s:%d] Random alignment retrieval only works for indexed BAM files.\n",
							__func__, __LINE__);
			exit(1);
		}
		int tid, beg, end;
		bam_parse_region(in->header, reg, &tid, &beg, &end);
		if (tid < 0) {
			fprintf(stderr, "[%s:%d] Region \"%s\" specifies an unknown reference name. \n",
							__func__, __LINE__, reg);
			exit(1);
		}
		bam_iter_t iter;
		bam1_t *b = bam_init1();
		iter = bam_iter_query(idx, tid, beg, end);
		while ((ret = bam_iter_read(in->x.bam, iter, b)) >= 0) func(b, in, out, data);
		bam_iter_destroy(iter);
		bam_destroy1(b);
		bam_index_destroy(idx);
	} else {
		bam1_t *b = bam_init1();
		while ((ret = samread(in, b)) >= 0) func(b, in, out, data);
		bam_destroy1(b);
	}
	if (out) samclose(out);
	samclose(in);
			
	if (ret != -1) {					/* truncated is -2 */
		fprintf(stderr, "[%s:%d] Alignment retrieval failed due to truncated file\n",
						__func__, __LINE__);
		exit(1);
	}

	return ret;
}
Esempio n. 18
0
static int _count_reads(char *path) {
  int count = 0;
  samfile_t *bamfile = b2g_samfile_open(path, "rb", 0);
  if (!bamfile) return 0;
  bam1_t *bam = bam_init1();
  while (-1 < samread(bamfile, bam)) count++;
  bam_destroy1(bam);
  samclose(bamfile);
  return count;
}
Esempio n. 19
0
hash_table* hash_ids(const char* fn)
{
    fprintf(stderr, "hashing ... \n");

    hash_table* T = create_hash_table();

    samfile_t* f = samopen(fn, "rb", NULL);
    if (f == NULL) {
        fprintf(stderr, "can't open bam file %s\n", fn);
        exit(1);
    }

    bam1_t* b = bam_init1();

    uint32_t n = 0;

    char* qname = NULL;
    size_t qname_size = 0;

    while (samread(f, b) >= 0) {
        if (++n % 1000000 == 0) {
            fprintf(stderr, "\t%d reads\n", n);
        }

        if (qname_size < b->core.l_qname + 3) {
            qname_size = b->core.l_qname + 3;
            qname = realloc(qname, qname_size);
        }

        memcpy(qname, bam1_qname(b), b->core.l_qname);

        if (b->core.flag & BAM_FREAD2) {
            qname[b->core.l_qname]     = '/';
            qname[b->core.l_qname + 1] = '2';
            qname[b->core.l_qname + 2] = '\0';
        }
        else {
            qname[b->core.l_qname]     = '/';
            qname[b->core.l_qname + 1] = '1';
            qname[b->core.l_qname + 2] = '\0';
        }


        inc_hash_table(T, qname, b->core.l_qname + 2);
    }

    free(qname);

    bam_destroy1(b);
    samclose(f);

    fprintf(stderr, "done.\n");
    return T;
}
Esempio n. 20
0
int bam_rmdupse(int argc, char *argv[])
{
	samfile_t *in, *out;
	buffer_t *buf;
	if (argc < 3) {
		fprintf(stderr, "Usage: samtools rmdupse <in.bam> <out.bam>\n\n");
		fprintf(stderr, "Note: Picard is recommended for this task.\n");
		return 1;
	}
	buf = calloc(1, sizeof(buffer_t));
	in = samopen(argv[1], "rb", 0);
	out = samopen(argv[2], "wb", in->header);
	while (fill_buf(in, buf)) {
		rmdupse_buf(buf);
		dump_buf(buf, out);
	}
	samclose(in); samclose(out);
	free(buf->buf); free(buf);
	return 0;
}
Esempio n. 21
0
samfile_t *_bam_tryopen(const char *filename, const char *filemode, void *aux)
{
    samfile_t *sfile = samopen(filename, filemode, aux);
    if (sfile == 0)
        Rf_error("failed to open SAM/BAM file\n  file: '%s'", filename);
    if (sfile->header == 0 || sfile->header->n_targets == 0) {
        samclose(sfile);
        Rf_error("SAM/BAM header missing or empty\n  file: '%s'", filename);
    }
    return sfile;
}
Esempio n. 22
0
void hash_reads( table* T, const char* reads_fn, interval_stack* is )
{
    samfile_t* reads_f = samopen( reads_fn, "rb", NULL );
    if( reads_f == NULL ) {
        failf( "Can't open bam file '%s'.", reads_fn );
    }

    bam_index_t* reads_index = bam_index_load( reads_fn );
    if( reads_index == NULL ) {
        failf( "Can't open bam index '%s.bai'.", reads_fn );
    }

    bam_init_header_hash( reads_f->header );

    table_create( T, reads_f->header->n_targets );
    T->seq_names = (char**)malloc( sizeof(char*) * reads_f->header->n_targets );
    size_t k;
    for( k = 0; k < reads_f->header->n_targets; k++ ) {
        T->seq_names[k] = strdup(reads_f->header->target_name[k]);
    }

    log_puts( LOG_MSG, "hashing reads ... \n" );
    log_indent();
    bam_iter_t read_iter;
    bam1_t* read = bam_init1();
    int tid;

    interval_stack::iterator i;
    for( i = is->begin(); i != is->end(); i++ ) {
        tid = bam_get_tid( reads_f->header, i->seqname );
        if( tid < 0 ) continue;

        read_iter = bam_iter_query( reads_index, tid,
                                    i->start, i->end );

        while( bam_iter_read( reads_f->x.bam, read_iter, read ) >= 0 ) {
            if( bam1_strand(read) == i->strand ) {
                table_inc( T, read );
            }
        }

        bam_iter_destroy(read_iter);
    }

    bam_destroy1(read);

    log_unindent();
    log_printf( LOG_MSG, "done. (%zu unique reads hashed)\n", T->m );


    bam_index_destroy(reads_index);
    samclose(reads_f);
}
Esempio n. 23
0
MyBamWrap::~MyBamWrap() {
	// TODO Auto-generated destructor stub
	if(in!=NULL)
	{
		samclose(in);
	}
	if(idx!=NULL)
	{
		bam_index_destroy(idx);
	}

}
Esempio n. 24
0
int main(int argc, char *argv[])
{
     char *progname;

     char *bamfilename;
     int32_t tid;

     samfile_t *bamin;
     bam_index_t *bamidx;
     bam_plbuf_t *buf;
     bam1_t *bam_read;
     uint32_t next_pos = 1;

     progname = *argv;
     argv++; argc--;
     if (argc < 2) {
          printf("Usage: %s bam_file tid\n", progname);
          exit(1);
     }
     else {
          bamfilename = argv[0];
          tid = strtol(argv[1], NULL, 10);
     }

     /* try to open bam file */
     bamin = samopen(bamfilename, "rb", NULL);
     if (!bamin) {
          fprintf(stderr, "Error opening bamfile %s\n", bamfilename);
          exit(1);
     }
     /* try to open index */
     bamidx = bam_index_load(bamfilename);
     if (!bamidx) {
          fprintf(stderr, "Error opening index for %s\n", bamfilename);
          exit(1);
     }
     bam_read = bam_init1();

     buf = bam_plbuf_init(&pileup_func, &next_pos);
     /* disable maximum pileup depth */
     bam_plp_set_maxcnt(buf->iter, INT_MAX);
     bam_fetch(bamin->x.bam, bamidx,
               tid, 0, INT_MAX,
               buf, &fetch_func);
     bam_plbuf_push(0, buf);    /* finish pileup */

     bam_plbuf_destroy(buf);
     bam_destroy1(bam_read);
     bam_index_destroy(bamidx);
     samclose(bamin);
     return 0;
}
Esempio n. 25
0
int main(int argc, char *argv[])
{
    samfile_t *fp;
    if ((fp = samopen(argv[1], "rb", 0)) == 0) {
        fprintf(stderr, "showbam: Fail to open BAM file %s\n", argv[1]);
        return 1;
    }
    bam1_t *b = bam_init1();
    while (samread(fp, b) >= 0) fetch_func(b);
    bam_destroy1(b);
    samclose(fp);
    return 0;
}
Esempio n. 26
0
void edwSamRepeatAnalysis(char *inSam, char *outRa)
/* edwSamRepeatAnalysis - Analyze result of alignment vs. RepeatMasker type libraries.. */
{
/* Go through sam file, filling in hiLevelHash with count of each hi level repeat class we see. */
struct hash *hiLevelHash = hashNew(0);
samfile_t *sf = samopen(inSam, "r", NULL);
bam_header_t *bamHeader = sf->header;
bam1_t one;
ZeroVar(&one);
int err;
long long hit = 0, miss = 0;
while ((err = samread(sf, &one)) >= 0)
    {
    int32_t tid = one.core.tid;
    if (tid < 0)
	{
	++miss;
        continue;
	}
    ++hit;

    /* Parse out hiLevel classification from target,  which is something like 7SLRNA#SINE/Alu 
     * from which we'd want to extract SINE.  The '/' is not present in all input. */
    char *target = bamHeader->target_name[tid];
    char *hashPos = strchr(target, '#');
    if (hashPos == NULL)
        errAbort("# not found in target %s", target);
    char *hiLevel = cloneString(hashPos + 1);
    char *slashPos = strchr(hiLevel, '/');
    if (slashPos != NULL)
        *slashPos = 0;

    hashIncInt(hiLevelHash, hiLevel);
    }
samclose(sf);

/* Output some basic stats as well as contents of hash */
FILE *f = mustOpen(outRa, "w");
double invTotal = 1.0 / (hit + miss);
double mapRatio = (double)hit * invTotal;
struct hashEl *hel, *helList = hashElListHash(hiLevelHash);
slSort(&helList, hashElCmp);
for (hel = helList; hel != NULL; hel = hel->next)
    {
    double hitRatio = ptToInt(hel->val) * invTotal;
    fprintf(f, "%s %g\n", hel->name, hitRatio);
    }
fprintf(f, "total %g\n", mapRatio);
carefulClose(&f);
}
BamMerge::~BamMerge()
{
  while (_lines.size() > 0)
    {
      CBamLine brec(_lines.top());
      brec.b_free();
      _lines.pop();
    };
  
  for (size_t i = 0; i < _src_files.size(); ++i)
    samclose(_src_files[i]);
  
  _src_files.clear();
}
Esempio n. 28
0
void filter_by_id(const char* fn, hash_table* T)
{
    fprintf(stderr, "filtering ... \n");

    samfile_t* fin = samopen(fn, "rb", NULL);
    if (fin == NULL) {
        fprintf(stderr, "can't open bam file %s\n", fn);
        exit(1);
    }

    samfile_t* fout = samopen("-", "w", (void*)fin->header);
    if (fout == NULL) {
        fprintf(stderr, "can't open stdout, for some reason.\n");
        exit(1);
    }

    fputs(fin->header->text, stdout);

    bam1_t* b = bam_init1();
    uint32_t n = 0;

    while (samread(fin, b) >= 0) {
        if (++n % 1000000 == 0) {
            fprintf(stderr, "\t%d reads\n", n);
        }

        if (get_hash_table(T, bam1_qname(b), b->core.l_qname) == 1) {
            samwrite(fout, b);
        }
    }

    bam_destroy1(b);
    samclose(fout);
    samclose(fin);

    fprintf(stderr, "done.\n");
}
Esempio n. 29
0
static void _bamfile_close(SEXP ext)
{
    BAM_FILE bfile = BAMFILE(ext);
    if (NULL != bfile->file)
        samclose(bfile->file);
    if (NULL != bfile->index)
        bam_index_destroy(bfile->index);
    if (NULL != bfile->iter)
        bam_mate_iter_destroy(bfile->iter);
    if (NULL != bfile->pbuffer)
        pileup_pbuffer_destroy(bfile->pbuffer);
    bfile->file = NULL;
    bfile->index = NULL;
    bfile->iter = NULL;
}
Esempio n. 30
0
static void scanSam(char *samIn, FILE *f, struct genomeRangeTree *grt, long long *retHit, 
    long long *retMiss,  long long *retTotalBasesInHits)
/* Scan through sam file doing several things:counting how many reads hit and how many 
 * miss target during mapping phase, copying those that hit to a little bed file, and 
 * also defining regions covered in a genomeRangeTree. */
{
samfile_t *sf = samopen(samIn, "r", NULL);
bam_header_t *bamHeader = sf->header;
bam1_t one;
ZeroVar(&one);
int err;
long long hit = 0, miss = 0, totalBasesInHits = 0;
while ((err = samread(sf, &one)) >= 0)
    {
    int32_t tid = one.core.tid;
    if (tid < 0)
	{
	++miss;
        continue;
	}
    ++hit;
    char *chrom = bamHeader->target_name[tid];
    // Approximate here... can do better if parse cigar.
    int start = one.core.pos;
    int size = one.core.l_qseq;
    int end = start + size;	
    totalBasesInHits += size;
    boolean isRc = (one.core.flag & BAM_FREVERSE);
    char strand = '+';
    if (isRc)
	{
	strand = '-';
	reverseIntRange(&start, &end, bamHeader->target_len[tid]);
	}
    if (start < 0) start=0;
    if (f != NULL)
	fprintf(f, "%s\t%d\t%d\t.\t0\t%c\n", chrom, start, end, strand);
    genomeRangeTreeAdd(grt, chrom, start, end);
    }
if (err < 0 && err != -1)
    errnoAbort("samread err %d", err);
samclose(sf);
*retHit = hit;
*retMiss = miss;
*retTotalBasesInHits = totalBasesInHits;
}