int main(int argc, char *argv[]) { short out2stdout=0; hashtable ht=new_hashtable(HASHSIZE); bamFile in,in2; bamFile out; int paired;//1 if not paired or pair read 1, 2 otherwise index_mem=sizeof(hashtable)*sizeof(hashnode**)*HASHSIZE*2; if (argc != 3) { fprintf(stderr, "Usage: bam_fix_NH <in.bam> <out.bam or - for stdout>\n"); return 1; } // Open file and exit if error in = bam_open(argv[1], "rb"); out2stdout = strcmp(argv[2], "-")? 0 : 1; out = strcmp(argv[2], "-")? bam_open(argv[2], "w") : bam_dopen(fileno(stdout), "w"); if (in == 0 ) { fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[1]); return 1; } if (out == 0) { fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[2]); return 1; } unsigned long num_alns=0; int ref; // *********** // Copy header bam_header_t *header; header = bam_header_read(in); bam_header_write(out,header); // sorted by name? // Should not rely on the value in SO bam1_t *aln=bam_init1(); bam1_t *prev=bam_init1(); if (!out2stdout) { fprintf(stderr,"bam_fix_NH version %s\n",VERSION); fprintf(stderr,"Processing %s\n",argv[1]); fprintf(stderr,"Hashing...\n");fflush(stderr); } while(bam_read1(in,aln)>=0) { // read alignment if (aln->core.tid < 0) continue;//ignore unaligned reads if (aln->core.flag & BAM_FUNMAP) continue; if (aln->core.flag & BAM_FREAD2) paired=2; else paired=1; ++num_alns; new_read_aln(ht,fix_read_name(bam1_qname(aln),paired)); if(!out2stdout) PRINT_ALNS_PROCESSED(num_alns); } bam_close(in); if(!out2stdout) { fprintf(stderr,"%s%lu\n",BACKLINE,num_alns); fprintf(stderr,"Hashing complete (%lu alignments)\n",num_alns); fprintf(stderr,"Memory used: %ld MB\n",index_mem/1024/1024); fprintf(stderr,"Updating entries with NH and printing BAM...\n"); fflush(stderr); } // reopen in2 = bam_open(argv[1], "rb"); if (in2 == 0 ) { fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[1]); return 1; } header = bam_header_read(in2); num_alns=0; while(bam_read1(in2,aln)>=0) { // read alignment paired=1; if (aln->core.tid < 0) continue;//ignore unaligned reads if (aln->core.flag & BAM_FUNMAP) continue; if (aln->core.flag & BAM_FREAD2) paired=2; ++num_alns; READ_ALN *r=get_read_aln(ht,fix_read_name(bam1_qname(aln),paired)); assert(r!=NULL); // update the NH field uint8_t *old_nh = bam_aux_get(aln, "NH"); int32_t nh=r->ctr; if (old_nh) { if (nh!=bam_aux2i(old_nh)) { fprintf(stderr,"warning: value mismatch! replacing>%s %d->%d\n",bam1_qname(aln),bam_aux2i(old_nh),nh); } bam_aux_del(aln, old_nh); bam_aux_append(aln, "NH", 'i', 4, (uint8_t*)&nh); #ifdef DEBUG // printf("!>%s %d\n",bam1_qname(aln),r->ctr); #endif } if (!old_nh) { // add NH bam_aux_append(aln, "NH", 'i', 4, (uint8_t*)&nh); #ifdef DEBUG fprintf(stderr,"!>%s %d\n",bam1_qname(aln),bam_aux2i(old_nh)); #endif } bam_write1(out,aln); if(!out2stdout) PRINT_ALNS_PROCESSED(num_alns); } // bam_destroy1(aln); bam_close(in2); bam_close(out); if(!out2stdout) { fprintf(stderr,"%s%lu\n",BACKLINE,num_alns); fprintf(stderr,"Done.\n"); } return 0; }
int main(int argc, char *argv[]) { bamFile in; sqlite3 * db; sqlite3_stmt * stmt; char * sErrMsg = NULL; char * tail = 0; int nRetCode; char sSQL [BUFFER_SIZE] = "\0"; char database[BUFFER_SIZE]; clock_t startClock,startClock2; if (argc != 2) { fprintf(stderr, "Usage: bamRindex <in.bam>\n"); return 1; } // Open file and exit if error //in = strcmp(argv[1], "-")? bam_open(argv[1], "rb") : bam_dopen(fileno(stdin), "rb"); //fprintf(stderr,"Options ok\n"); in = bam_open(argv[1], "rb"); if (in == 0 ) { fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[1]); return 1; } //fprintf(stderr,"BAM opened\n"); assert(strcpy(database,argv[1])!=NULL); assert(strcat(database,".ridx")!=NULL); remove(database); // *********** // Read header bam_header_t *header; header = bam_header_read(in); // sorted by name? // Should not rely on the value in SO bam1_t *aln=bam_init1(); unsigned long num_alns=0; /*********************************************/ /* Open the Database and create the Schema */ // TODO: check the errors sqlite3_open(database, &db); sqlite3_exec(db, TABLE, NULL, NULL, &sErrMsg); // create the table SQLITE_CHECK_ERROR(); startClock = clock(); sqlite3_exec(db, "PRAGMA synchronous = 0;", NULL, NULL, &sErrMsg); SQLITE_CHECK_ERROR(); sqlite3_exec(db, "PRAGMA journal_mode = OFF;", NULL, NULL, &sErrMsg); SQLITE_CHECK_ERROR(); // Use up to 8GB of memory sqlite3_exec(db, "PRAGMA cache_size = -8000000;", NULL, NULL, &sErrMsg); SQLITE_CHECK_ERROR(); sqlite3_exec(db, "BEGIN TRANSACTION;", NULL, NULL, &sErrMsg); SQLITE_CHECK_ERROR(); while(bam_read1(in,aln)>=0) { // read alignment //aln->core.tid < 0 ? uint8_t *nh = bam_aux_get(aln, "NH"); uint8_t *nm = bam_aux_get(aln, "NM"); uint8_t *xs = bam_aux_get(aln, "XS"); BOOLEAN isPrimary; BOOLEAN isMapped; BOOLEAN notMapped; BOOLEAN isDuplicate; BOOLEAN isNotPassingQualityControls; BOOLEAN isPaired; BOOLEAN isSecondMateRead,isProperPair; //secondary alignment notMapped=(aln->core.flag & BAM_FUNMAP) ? TRUE: FALSE; //notMapped=((aln->core.flag & BAM_FUNMAP) || (aln->core.mtid ==0)) ? TRUE: FALSE; isMapped=!notMapped; isPrimary= (aln->core.flag & BAM_FSECONDARY) ? FALSE:TRUE; isProperPair=(aln->core.flag & BAM_FPROPER_PAIR) ? TRUE:FALSE; isPaired=(aln->core.flag & BAM_FPAIRED ) ? TRUE:FALSE; isSecondMateRead=(aln->core.flag & BAM_FREAD2 ) ? TRUE: FALSE; isNotPassingQualityControls=(aln->core.flag & BAM_FQCFAIL ) ? TRUE:FALSE; isDuplicate=(aln->core.flag & BAM_FDUP) ? TRUE: FALSE; BOOLEAN isSpliced=FALSE; BOOLEAN hasSimpleCigar=TRUE; int nSpliced=0; int i; if (aln->core.n_cigar != 0) { for (i = 0; i < aln->core.n_cigar; ++i) { char l="MIDNSHP=X"[bam1_cigar(aln)[i]&BAM_CIGAR_MASK]; //fprintf(stderr,"%c",l); if ( l == 'N' ) { isSpliced=TRUE; hasSimpleCigar=FALSE;++nSpliced;} if ( l != 'M' && l!='=' ) { hasSimpleCigar=FALSE;} } } //fprintf(stderr,"read %ld\n",num_alns); // isDuplicate,isNotPassingQualityControls, // isSpliced,isPAired,isPrimary,hasSimpleCigar,isSecondMateRead,isProperPair,nh,nm,qual/mapq,xs sprintf(sSQL,"INSERT into bam_index values (%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,'%c')", isDuplicate,isNotPassingQualityControls, nSpliced,isPaired,isPrimary,isMapped,hasSimpleCigar,isSecondMateRead,isProperPair, (nh==0?0:bam_aux2i(nh)),(nm==0?0:bam_aux2i(nm)), aln->core.qual, (xs==0?' ':(bam_aux2A(xs)==0?' ':bam_aux2A(xs)))); sqlite3_exec(db, sSQL, NULL, NULL, &sErrMsg); SQLITE_CHECK_ERROR(); ++num_alns; PRINT_ALNS_PROCESSED(num_alns); } bam_close(in); sqlite3_exec(db, "END TRANSACTION;", NULL, NULL, &sErrMsg); SQLITE_CHECK_ERROR(); printf("\nImported %d records in %4.2f seconds\n", num_alns, ( (double) (clock() - startClock))/CLOCKS_PER_SEC); // Create the indexes startClock2 = clock(); // generating the indexes does not pay off //sqlite3_exec(db, INDEXES, NULL, NULL, &sErrMsg); //printf("Indexed %d records in %4.2f seconds\n", num_alns, ( (double) (clock() - startClock2))/CLOCKS_PER_SEC); printf("Total time: %4.2f seconds\n", ((double)(clock() - startClock))/CLOCKS_PER_SEC); sqlite3_close(db); return 0; }
int main(int argc, char *argv[]) { short out2stdout=0; bamFile in,in2; bamFile out; if (argc != 3) { fprintf(stderr, "Usage: bam_fix_se_flag <in.bam> <out.bam or - for stdout>\n"); return 1; } // Open file and exit if error in = bam_open(argv[1], "rb"); out2stdout = strcmp(argv[2], "-")? 0 : 1; out = strcmp(argv[2], "-")? bam_open(argv[2], "w") : bam_dopen(fileno(stdout), "w"); if (in == 0 ) { fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[1]); return 1; } if (out == 0) { fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[2]); return 1; } unsigned long num_alns=0; int ref; // *********** // Copy header bam_header_t *header; header = bam_header_read(in); bam_header_write(out,header); // sorted by name? // Should not rely on the value in SO bam1_t *aln=bam_init1(); bam1_t *prev=bam_init1(); if (!out2stdout) { fprintf(stderr,"bam_fix_se_flag version %s\n",VERSION); fprintf(stderr,"Processing %s\n",argv[1]); } // reopen in2 = bam_open(argv[1], "rb"); if (in2 == 0 ) { fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[1]); return 1; } header = bam_header_read(in2); num_alns=0; while(bam_read1(in2,aln)>=0) { // read alignment if (aln->core.tid < 0) continue;//ignore unaligned reads if (aln->core.flag & BAM_FUNMAP) continue; if (aln->core.flag & BAM_FPAIRED ) { // PAIRED } else { //SE //turn off the other pair related flags aln->core.flag&=~BAM_FPROPER_PAIR; aln->core.flag&=~BAM_FMUNMAP; aln->core.flag&=~BAM_FREAD1; aln->core.flag&=~BAM_FREAD2; fprintf(stderr, "."); } bam_write1(out,aln); if(!out2stdout) PRINT_ALNS_PROCESSED(num_alns); ++num_alns; } // bam_destroy1(aln); bam_close(in2); bam_close(out); if(!out2stdout) { fprintf(stderr,"%s%lu\n",BACKLINE,num_alns); fprintf(stderr,"Done.\n"); } return 0; }