Exemple #1
0
READ_ALN* new_read_aln(hashtable ht,char*read_name) {

  // look for read in HT
  // add to hash table
  READ_ALN* r;
  if((r=get_read_aln(ht,read_name))==NULL) {
    // Memory chunck: |[index_entry]len bytes+1|
    int len=strlen(read_name);
    ulong key=hashit(read_name);
    char *mem_block=(char*)malloc(sizeof(READ_ALN)+len+1);
    index_mem+=sizeof(READ_ALN)+len+1+sizeof(hashnode);
    if (mem_block==NULL) { 
      fprintf(stderr,"Error allocating memory\n");
      exit(1);
    }
    r=(READ_ALN*)&mem_block[0];    
    r->name=(char*)&mem_block[sizeof(READ_ALN)];
    r->name[len]='\0';
    strncpy(r->name,read_name,len+1);
    r->ctr=0;
    if(insere(ht,key,r)<0){
      fprintf(stderr,"Error adding %s to index\n",read_name);
      exit(1);
    }
  } 
  r->ctr++;
  return(r);
}
Exemple #2
0
int main(int argc, char *argv[])  
{  
  short out2stdout=0;
  hashtable ht=new_hashtable(HASHSIZE);
  bamFile in,in2; 
  bamFile out; 
  int paired;//1 if not paired or pair read 1, 2 otherwise
  index_mem=sizeof(hashtable)*sizeof(hashnode**)*HASHSIZE*2;

  if (argc != 3) {  
    fprintf(stderr, "Usage: bam_fix_NH <in.bam> <out.bam or - for stdout>\n");  
    return 1;  
  }  
  // Open file and exit if error
  in = bam_open(argv[1], "rb");
  out2stdout = strcmp(argv[2], "-")? 0 : 1; 
  out = strcmp(argv[2], "-")? bam_open(argv[2], "w") : bam_dopen(fileno(stdout), "w"); 
  if (in == 0 ) {  
    fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[1]);  
    return 1;  
  }  
  if (out == 0) {  
    fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[2]);  
    return 1;  
  }  

  unsigned long num_alns=0;
  int ref;  

  // ***********
  // Copy header
  bam_header_t *header;
  header = bam_header_read(in);
  bam_header_write(out,header);

  // sorted by name?
  // Should not rely on the value in SO 
  bam1_t *aln=bam_init1();
  bam1_t *prev=bam_init1();

  if (!out2stdout) {
    fprintf(stderr,"bam_fix_NH version %s\n",VERSION);
    fprintf(stderr,"Processing %s\n",argv[1]);
    fprintf(stderr,"Hashing...\n");fflush(stderr);
  }

  while(bam_read1(in,aln)>=0) { // read alignment
    if (aln->core.tid < 0) continue;//ignore unaligned reads
    if (aln->core.flag & BAM_FUNMAP) continue;
    if (aln->core.flag & BAM_FREAD2) paired=2;
    else paired=1;
    ++num_alns;
    new_read_aln(ht,fix_read_name(bam1_qname(aln),paired));
    if(!out2stdout) PRINT_ALNS_PROCESSED(num_alns);
  }
  bam_close(in);  
  if(!out2stdout) {
    fprintf(stderr,"%s%lu\n",BACKLINE,num_alns);
    fprintf(stderr,"Hashing complete (%lu alignments)\n",num_alns);
    fprintf(stderr,"Memory used: %ld MB\n",index_mem/1024/1024);  
    fprintf(stderr,"Updating entries with NH and printing BAM...\n");
    fflush(stderr);
  }
  // reopen
  in2 = bam_open(argv[1], "rb");
  if (in2 == 0 ) {  
    fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[1]);  
    return 1;  
  }  

  header = bam_header_read(in2);
  num_alns=0;
  while(bam_read1(in2,aln)>=0) { // read alignment
    paired=1;
    if (aln->core.tid < 0) continue;//ignore unaligned reads
    if (aln->core.flag & BAM_FUNMAP) continue;
    if (aln->core.flag & BAM_FREAD2) paired=2;
    ++num_alns;
    READ_ALN *r=get_read_aln(ht,fix_read_name(bam1_qname(aln),paired));

    assert(r!=NULL);
    // update the NH field
    uint8_t *old_nh = bam_aux_get(aln, "NH");    
    int32_t nh=r->ctr;
    if (old_nh) {
      if (nh!=bam_aux2i(old_nh)) {
	fprintf(stderr,"warning: value mismatch! replacing>%s %d->%d\n",bam1_qname(aln),bam_aux2i(old_nh),nh);
      }
      bam_aux_del(aln, old_nh);
      bam_aux_append(aln, "NH", 'i', 4, (uint8_t*)&nh);
#ifdef DEBUG
      //      printf("!>%s %d\n",bam1_qname(aln),r->ctr);
#endif
    }
    if (!old_nh) { // add NH  
      bam_aux_append(aln, "NH", 'i', 4, (uint8_t*)&nh);
#ifdef DEBUG
      fprintf(stderr,"!>%s %d\n",bam1_qname(aln),bam_aux2i(old_nh));
#endif
    }
    bam_write1(out,aln);
    if(!out2stdout) PRINT_ALNS_PROCESSED(num_alns);
  }
  // 
  bam_destroy1(aln);
  bam_close(in2);  
  bam_close(out);  
  if(!out2stdout) {
    fprintf(stderr,"%s%lu\n",BACKLINE,num_alns);
    fprintf(stderr,"Done.\n");
  }
  return 0;  
}  
Exemple #3
0
int main(int argc, char *argv[])  
{  
  hashtable ht=new_hashtable(HASHSIZE);
  bamFile in,in2; 
  bamFile out; 
  
  if (argc != 3) {  
    fprintf(stderr, "Usage: bam_fix_NH <in.bam> <out.bam>\n");  
    return 1;  
  }  
  
  // Open file and exit if error
  //in = strcmp(argv[1], "-")? bam_open(argv[1], "rb") : bam_dopen(fileno(stdin), "rb");
  in = bam_open(argv[1], "rb");
  out = strcmp(argv[2], "-")? bam_open(argv[2], "w") : bam_dopen(fileno(stdout), "w"); 
  if (in == 0 ) {  
    fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[1]);  
    return 1;  
  }  
  if (out == 0) {  
    fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[2]);  
    return 1;  
  }  

  unsigned long num_alns=0;
  int ref;  

  // ***********
  // Copy header
  bam_header_t *header;
  header = bam_header_read(in);
  bam_header_write(out,header);

  // sorted by name?
  // Should not rely on the value in SO 
  bam1_t *aln=bam_init1();
  bam1_t *prev=bam_init1();

  printf("Hashing...\n");flush(stdout);
  while(bam_read1(in,aln)>=0) { // read alignment
    if (aln->core.tid < 0) continue;//ignore unaligned reads
    ++num_alns;
    new_read_aln(ht,bam1_qname(aln));
  }
  bam_close(in);  
  printf("Hashing complete (%lu alignments)\n",num_alns);
  printf("Memory used in the hash: %ld MB\n",index_mem/1024/1024);  
  flush(stdout);
  // reopen
  in2 = bam_open(argv[1], "rb");
  if (in2 == 0 ) {  
    fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[1]);  
    return 1;  
  }  

  header = bam_header_read(in2);
  
  while(bam_read1(in2,aln)>=0) { // read alignment
    if (aln->core.tid < 0) continue;//ignore unaligned reads
    ++num_alns;
    
    READ_ALN *r=get_read_aln(ht,bam1_qname(aln));

    //assert(r!=NULL);
    // update the NH field
    uint8_t *old_nh = bam_aux_get(aln, "NH");    
    uint8_t nh=r->ctr;
    if (old_nh) {
      if (nh!=bam_aux2i(old_nh)) {
	fprintf(stderr,"warning: value mismatch! replacing>%s %d->%d\n",bam1_qname(aln),bam_aux2i(old_nh),nh);
      }
      bam_aux_del(aln, old_nh);
      bam_aux_append(aln, "NH", 'i', 4, (uint8_t*)&nh);
    }
    if (!old_nh) { // add NH  
      bam_aux_append(aln, "NH", 'i', 4, (uint8_t*)&nh);
#ifdef DEBUG
      printf("!>%s %d\n",bam1_qname(aln),bam_aux2i(old_nh));
#endif
    }
    // in->header
    // Also fix the XS:A tag
    // BAM_FREAD1
    // BAM_FREAD2
    // BAM_FREVERSE the read is mapped to the reverse strand 
    //bam1_cigar(b) 
      //BAM_CREF_SKIP 3 CIGAR skip on the reference (e.g. spliced alignment)
      //BAM_FREVERSE 16 the read is mapped to the reverse strand
    if (aln->core.flag & BAM_FSECONDARY) continue; // skip secondary alignments
    if (aln->core.flag & ! BAM_FPAIRED) continue; // not paired
    if (aln->core.flag & ! BAM_FPROPER_PAIR) continue; // not a proper pair
    if (aln->core.flag & ! BAM_FMUNMAP) continue; // the mate is mapped
    if (aln->core.flag & BAM_FSECONDARY) continue; // secundary read
    if (aln->core.flag & BAM_FREAD2) continue; // only count each pair once
    // core.strand == 0 (f/+) 1 r/-
    // flag
    // bam1_qname(b)
    bam_write1(out,aln);
  }
  // 
  bam_destroy1(aln);
  bam_close(in2);  
  bam_close(out);  
  return 0;  
/*
uint8_t *old_nm = bam_aux_get(b, "NM");
90 	if (c->flag & BAM_FUNMAP) return;
91 	if (old_nm) old_nm_i = bam_aux2i(old_nm);
92 	if (!old_nm) bam_aux_append(b, "NM", 'i', 4, (uint8_t*)&nm);
93 	else if (nm != old_nm_i) {
94 	fprintf(stderr, "[bam_fillmd1] different NM for read '%s': %d -> %d\n", bam1_qname(b), old_nm_i, nm);
95 	bam_aux_del(b, old_nm);
96 	bam_aux_append(b, "NM", 'i', 4, (uint8_t*)&nm);
97 	}
*/
}