예제 #1
0
파일: bam_fix_NH.c 프로젝트: hjanime/irap
int main(int argc, char *argv[])  
{  
  short out2stdout=0;
  hashtable ht=new_hashtable(HASHSIZE);
  bamFile in,in2; 
  bamFile out; 
  int paired;//1 if not paired or pair read 1, 2 otherwise
  index_mem=sizeof(hashtable)*sizeof(hashnode**)*HASHSIZE*2;

  if (argc != 3) {  
    fprintf(stderr, "Usage: bam_fix_NH <in.bam> <out.bam or - for stdout>\n");  
    return 1;  
  }  
  // Open file and exit if error
  in = bam_open(argv[1], "rb");
  out2stdout = strcmp(argv[2], "-")? 0 : 1; 
  out = strcmp(argv[2], "-")? bam_open(argv[2], "w") : bam_dopen(fileno(stdout), "w"); 
  if (in == 0 ) {  
    fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[1]);  
    return 1;  
  }  
  if (out == 0) {  
    fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[2]);  
    return 1;  
  }  

  unsigned long num_alns=0;
  int ref;  

  // ***********
  // Copy header
  bam_header_t *header;
  header = bam_header_read(in);
  bam_header_write(out,header);

  // sorted by name?
  // Should not rely on the value in SO 
  bam1_t *aln=bam_init1();
  bam1_t *prev=bam_init1();

  if (!out2stdout) {
    fprintf(stderr,"bam_fix_NH version %s\n",VERSION);
    fprintf(stderr,"Processing %s\n",argv[1]);
    fprintf(stderr,"Hashing...\n");fflush(stderr);
  }

  while(bam_read1(in,aln)>=0) { // read alignment
    if (aln->core.tid < 0) continue;//ignore unaligned reads
    if (aln->core.flag & BAM_FUNMAP) continue;
    if (aln->core.flag & BAM_FREAD2) paired=2;
    else paired=1;
    ++num_alns;
    new_read_aln(ht,fix_read_name(bam1_qname(aln),paired));
    if(!out2stdout) PRINT_ALNS_PROCESSED(num_alns);
  }
  bam_close(in);  
  if(!out2stdout) {
    fprintf(stderr,"%s%lu\n",BACKLINE,num_alns);
    fprintf(stderr,"Hashing complete (%lu alignments)\n",num_alns);
    fprintf(stderr,"Memory used: %ld MB\n",index_mem/1024/1024);  
    fprintf(stderr,"Updating entries with NH and printing BAM...\n");
    fflush(stderr);
  }
  // reopen
  in2 = bam_open(argv[1], "rb");
  if (in2 == 0 ) {  
    fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[1]);  
    return 1;  
  }  

  header = bam_header_read(in2);
  num_alns=0;
  while(bam_read1(in2,aln)>=0) { // read alignment
    paired=1;
    if (aln->core.tid < 0) continue;//ignore unaligned reads
    if (aln->core.flag & BAM_FUNMAP) continue;
    if (aln->core.flag & BAM_FREAD2) paired=2;
    ++num_alns;
    READ_ALN *r=get_read_aln(ht,fix_read_name(bam1_qname(aln),paired));

    assert(r!=NULL);
    // update the NH field
    uint8_t *old_nh = bam_aux_get(aln, "NH");    
    int32_t nh=r->ctr;
    if (old_nh) {
      if (nh!=bam_aux2i(old_nh)) {
	fprintf(stderr,"warning: value mismatch! replacing>%s %d->%d\n",bam1_qname(aln),bam_aux2i(old_nh),nh);
      }
      bam_aux_del(aln, old_nh);
      bam_aux_append(aln, "NH", 'i', 4, (uint8_t*)&nh);
#ifdef DEBUG
      //      printf("!>%s %d\n",bam1_qname(aln),r->ctr);
#endif
    }
    if (!old_nh) { // add NH  
      bam_aux_append(aln, "NH", 'i', 4, (uint8_t*)&nh);
#ifdef DEBUG
      fprintf(stderr,"!>%s %d\n",bam1_qname(aln),bam_aux2i(old_nh));
#endif
    }
    bam_write1(out,aln);
    if(!out2stdout) PRINT_ALNS_PROCESSED(num_alns);
  }
  // 
  bam_destroy1(aln);
  bam_close(in2);  
  bam_close(out);  
  if(!out2stdout) {
    fprintf(stderr,"%s%lu\n",BACKLINE,num_alns);
    fprintf(stderr,"Done.\n");
  }
  return 0;  
}  
예제 #2
0
int main(int argc, char *argv[])  
{  
  bamFile in; 
  sqlite3 * db;
  sqlite3_stmt * stmt;
  char * sErrMsg = NULL;
  char * tail = 0;
  int nRetCode;
  char sSQL [BUFFER_SIZE] = "\0";
  char database[BUFFER_SIZE];
  clock_t startClock,startClock2;

  if (argc != 2) {  
    fprintf(stderr, "Usage: bamRindex <in.bam>\n");  
    return 1;  
  }  

  // Open file and exit if error
  //in = strcmp(argv[1], "-")? bam_open(argv[1], "rb") : bam_dopen(fileno(stdin), "rb");
  //fprintf(stderr,"Options ok\n");
  in = bam_open(argv[1], "rb");
  if (in == 0 ) {  
    fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[1]);  
    return 1;  
  }  
  //fprintf(stderr,"BAM opened\n");
  assert(strcpy(database,argv[1])!=NULL);
  assert(strcat(database,".ridx")!=NULL);
  remove(database);
  // ***********
  // Read header
  bam_header_t *header;
  header = bam_header_read(in);
  // sorted by name?
  // Should not rely on the value in SO 
  bam1_t *aln=bam_init1();
  unsigned long num_alns=0;

  /*********************************************/
  /* Open the Database and create the Schema */
  // TODO: check the errors
  sqlite3_open(database, &db);
  sqlite3_exec(db, TABLE, NULL, NULL, &sErrMsg); // create the table
  SQLITE_CHECK_ERROR();
  startClock = clock();
  sqlite3_exec(db, "PRAGMA synchronous = 0;", NULL, NULL, &sErrMsg);
  SQLITE_CHECK_ERROR();
  sqlite3_exec(db, "PRAGMA journal_mode = OFF;", NULL, NULL, &sErrMsg);
  SQLITE_CHECK_ERROR();
  // Use up to 8GB of memory
  sqlite3_exec(db, "PRAGMA cache_size = -8000000;", NULL, NULL, &sErrMsg);
  SQLITE_CHECK_ERROR();
  sqlite3_exec(db, "BEGIN TRANSACTION;", NULL, NULL, &sErrMsg);
  SQLITE_CHECK_ERROR();
  while(bam_read1(in,aln)>=0) { // read alignment
    //aln->core.tid < 0 ? 
    uint8_t *nh = bam_aux_get(aln, "NH");
    uint8_t *nm = bam_aux_get(aln, "NM");
    uint8_t *xs = bam_aux_get(aln, "XS");
    
    BOOLEAN isPrimary;
    BOOLEAN isMapped;
    BOOLEAN notMapped;
    BOOLEAN isDuplicate;
    BOOLEAN isNotPassingQualityControls;
    BOOLEAN isPaired;
    BOOLEAN isSecondMateRead,isProperPair;
    //secondary alignment
    notMapped=(aln->core.flag & BAM_FUNMAP) ? TRUE: FALSE;
    //notMapped=((aln->core.flag & BAM_FUNMAP) || (aln->core.mtid ==0)) ? TRUE: FALSE;
    isMapped=!notMapped;
    isPrimary= (aln->core.flag & BAM_FSECONDARY) ? FALSE:TRUE;
    isProperPair=(aln->core.flag & BAM_FPROPER_PAIR) ? TRUE:FALSE;
    isPaired=(aln->core.flag & BAM_FPAIRED ) ? TRUE:FALSE;
    isSecondMateRead=(aln->core.flag & BAM_FREAD2 ) ? TRUE: FALSE;
    isNotPassingQualityControls=(aln->core.flag & BAM_FQCFAIL ) ? TRUE:FALSE;
    isDuplicate=(aln->core.flag & BAM_FDUP) ? TRUE: FALSE;

    BOOLEAN isSpliced=FALSE;
    BOOLEAN hasSimpleCigar=TRUE;
    int nSpliced=0;
    int i;
    if (aln->core.n_cigar != 0) {
      for (i = 0; i < aln->core.n_cigar; ++i) {
	char l="MIDNSHP=X"[bam1_cigar(aln)[i]&BAM_CIGAR_MASK];
	//fprintf(stderr,"%c",l);
	if ( l == 'N' ) { isSpliced=TRUE; hasSimpleCigar=FALSE;++nSpliced;}	  
	if ( l != 'M' && l!='=' ) {  hasSimpleCigar=FALSE;}	  
      }
    } 
    //fprintf(stderr,"read %ld\n",num_alns);
    // isDuplicate,isNotPassingQualityControls,
    // isSpliced,isPAired,isPrimary,hasSimpleCigar,isSecondMateRead,isProperPair,nh,nm,qual/mapq,xs
    sprintf(sSQL,"INSERT into bam_index values (%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,'%c')",
	   isDuplicate,isNotPassingQualityControls,
	   nSpliced,isPaired,isPrimary,isMapped,hasSimpleCigar,isSecondMateRead,isProperPair,
	   (nh==0?0:bam_aux2i(nh)),(nm==0?0:bam_aux2i(nm)),
	    aln->core.qual,
	    (xs==0?' ':(bam_aux2A(xs)==0?' ':bam_aux2A(xs))));
    sqlite3_exec(db, sSQL, NULL, NULL, &sErrMsg);
    SQLITE_CHECK_ERROR();
    ++num_alns;
    PRINT_ALNS_PROCESSED(num_alns);
  }
  bam_close(in);  
  sqlite3_exec(db, "END TRANSACTION;", NULL, NULL, &sErrMsg);
  SQLITE_CHECK_ERROR();
  printf("\nImported %d records in %4.2f seconds\n", num_alns, ( (double) (clock() - startClock))/CLOCKS_PER_SEC);
  // Create the indexes
  startClock2 = clock();
  // generating the indexes does not pay off
  //sqlite3_exec(db, INDEXES, NULL, NULL, &sErrMsg);
  //printf("Indexed %d records in %4.2f seconds\n", num_alns, ( (double) (clock() - startClock2))/CLOCKS_PER_SEC);
  printf("Total time: %4.2f seconds\n", ((double)(clock() - startClock))/CLOCKS_PER_SEC);
  sqlite3_close(db);
  return 0;  
}  
예제 #3
0
int main(int argc, char *argv[])  
{  
  short out2stdout=0;
  bamFile in,in2; 
  bamFile out; 


  if (argc != 3) {  
    fprintf(stderr, "Usage: bam_fix_se_flag <in.bam> <out.bam or - for stdout>\n");  
    return 1;  
  }  
  // Open file and exit if error
  in = bam_open(argv[1], "rb");
  out2stdout = strcmp(argv[2], "-")? 0 : 1; 
  out = strcmp(argv[2], "-")? bam_open(argv[2], "w") : bam_dopen(fileno(stdout), "w"); 
  if (in == 0 ) {  
    fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[1]);  
    return 1;  
  }  
  if (out == 0) {  
    fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[2]);  
    return 1;  
  }  

  unsigned long num_alns=0;
  int ref;  

  // ***********
  // Copy header
  bam_header_t *header;
  header = bam_header_read(in);
  bam_header_write(out,header);

  // sorted by name?
  // Should not rely on the value in SO 
  bam1_t *aln=bam_init1();
  bam1_t *prev=bam_init1();

  if (!out2stdout) {
    fprintf(stderr,"bam_fix_se_flag version %s\n",VERSION);
    fprintf(stderr,"Processing %s\n",argv[1]);
  }

  // reopen
  in2 = bam_open(argv[1], "rb");
  if (in2 == 0 ) {  
    fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[1]);  
    return 1;  
  }  

  header = bam_header_read(in2);
  num_alns=0;
  while(bam_read1(in2,aln)>=0) { // read alignment
    if (aln->core.tid < 0) continue;//ignore unaligned reads
    if (aln->core.flag & BAM_FUNMAP) continue;
    if (aln->core.flag & BAM_FPAIRED ) { // PAIRED

    } else { //SE 
      //turn off the other pair related flags
      aln->core.flag&=~BAM_FPROPER_PAIR;
      aln->core.flag&=~BAM_FMUNMAP;
      aln->core.flag&=~BAM_FREAD1;
      aln->core.flag&=~BAM_FREAD2;
      fprintf(stderr, ".");  
    }
    bam_write1(out,aln);
    if(!out2stdout) PRINT_ALNS_PROCESSED(num_alns);
    ++num_alns;
  }
  // 
  bam_destroy1(aln);
  bam_close(in2);  
  bam_close(out);  
  if(!out2stdout) {
    fprintf(stderr,"%s%lu\n",BACKLINE,num_alns);
    fprintf(stderr,"Done.\n");
  }
  return 0;  
}