void convert_bam_to_sam(char* bam_input, char* sam_input) { int read_bytes; bam1_t* bam_p = bam_init1(); char* bam_string; LOG_DEBUG("CONVERT-START: bam to sam\n"); //open BAM file for read if (time_flag) { start_timer(t1_convert); } bam_file_t* bam_file_p = bam_fopen_mode(bam_input, NULL, "r"); //open SAM file for write, SAM file is a text file!!! FILE* sam_fd = fopen(sam_input, "w"); if (sam_fd == NULL) { char log_message[200]; sprintf(log_message, "Error opening file '%.150s' in mode 'r' !!!!!\n", sam_input); LOG_FATAL(log_message); } //header for BAM file has been done in the opening bam_header_t* bam_header_p = bam_file_p->bam_header_p; //write header text to SAM file fprintf(sam_fd, "%s", bam_header_p->text); //write string alignments to SAM file while ((read_bytes = bam_read1(bam_file_p->bam_fd, bam_p)) > 0) { bam_string = bam_format1(bam_header_p, bam_p); fprintf(sam_fd, "%s\n", bam_string); free(bam_string); // it was allocated by the sam-tools, we must free it !! num_alignments++; } //close BAM and SAM files, free bam alignment and bam file object bam_fclose(bam_file_p); fclose(sam_fd); bam_destroy1(bam_p); if (time_flag) { stop_timer(t1_convert, t2_convert, convert_time); } //number_of_batchs = 1, convention value for statistics (not real batch) number_of_batchs = 1; LOG_DEBUG("CONVERT-START: bam to sam\n"); }
void convert_sam_to_bam(char* sam_input, char* bam_input) { bam1_t* bam_p = bam_init1(); LOG_DEBUG("CONVERT-START: sam to bam\n"); //open SAM file for read if (time_flag) { start_timer(t1_convert); } tamFile sam_fd = sam_open(sam_input); //open BAM file for write bam_file_t* bam_file_p = bam_fopen_mode(bam_input, NULL, "w"); //read header from SAM file bam_header_t* bam_header_p = sam_header_read(sam_fd); //write header to BAM file bam_header_write(bam_file_p->bam_fd, bam_header_p); //write alignments to BAM file while (sam_read1(sam_fd, bam_header_p, bam_p) > 0) { bam_write1(bam_file_p->bam_fd, bam_p); num_alignments++; } //close BAM and SAM files, free bam alignment and bam file object bam_fclose(bam_file_p); sam_close(sam_fd); bam_header_destroy(bam_header_p); bam_destroy1(bam_p); if (time_flag) { stop_timer(t1_convert, t2_convert, convert_time); } //number_of_batchs = 1, convention value for statistics (not real batch) number_of_batchs = 1; }
void batch_writer(batch_writer_input_t* input_p) { struct timespec ts; ts.tv_sec = 1; ts.tv_nsec = 0; alignment_t **buffer_p; bam1_t* bam1_p; bam_header_t* bam_header_p; bam_file_t* bam_file_p; char* match_filename = input_p->match_filename; //char* mismatch_filename = input_p->mismatch_filename; char* splice_exact_filename = input_p->splice_exact_filename; char* splice_extend_filename = input_p->splice_extend_filename; list_t* list_p = input_p->list_p; printf("batch_writer (%i): START\n", omp_get_thread_num()); list_item_t *item_p = NULL; write_batch_t* batch_p; FILE* fd; FILE* splice_exact_fd = fopen(splice_exact_filename, "w"); FILE* splice_extend_fd = fopen(splice_extend_filename, "w"); //printf("HEADER FROM WRITE: %s\n", input_p->header_filename); bam_header_p = bam_header_new(HUMAN, NCBI37, input_p->header_filename); //bam_file_p = bam_fopen(match_filename); bam_file_p = bam_fopen_mode(match_filename, bam_header_p, "w"); bam_fwrite_header(bam_header_p, bam_file_p); // main loop while ( (item_p = list_remove_item(list_p)) != NULL ) { if (time_on) { timing_start(BATCH_WRITER, 0, timing_p); } batch_p = (write_batch_t*) item_p->data_p; //printf("*********************************Extract one item*********************************\n"); if (batch_p->flag == MATCH_FLAG || batch_p->flag == MISMATCH_FLAG) { //fd = match_fd; //printf("start write alignment. Total %d\n", batch_p->size); buffer_p = (alignment_t **)batch_p->buffer_p; for(int i = 0; i < batch_p->size; i++) { //alignment_print(buffer_p[i]); bam1_p = convert_to_bam(buffer_p[i], 33); bam_fwrite(bam1_p, bam_file_p); bam_destroy1(bam1_p); alignment_free(buffer_p[i]); } } else { if (batch_p->flag == SPLICE_EXACT_FLAG) { fd = splice_exact_fd; } else if (batch_p->flag == SPLICE_EXTEND_FLAG) { fd = splice_extend_fd; } else { fd = NULL; } if (fd != NULL) { //printf("start write batch, %i bytes...\n", batch_p->size); fwrite((char *)batch_p->buffer_p, batch_p->size, 1, fd); //printf("write done !!\n"); //if (time_on) { stop_timer(t1_write, t2_write, write_time); } } } //printf("Free batch\n"); write_batch_free(batch_p); list_item_free(item_p); if (time_on) { timing_stop(BATCH_WRITER, 0, timing_p); } } // end of batch loop //fclose(match_fd); //fclose(mismatch_fd); fclose(splice_exact_fd); fclose(splice_extend_fd); bam_fclose(bam_file_p); //bam_header_free(bam_header_p); printf("batch_writer: END\n"); }
/** * Run framework contexts. */ int bfwork_run(bam_fwork_t *fwork) { int err = 0, c; double times; assert(fwork); assert(fwork->input_file_str); assert(fwork->regions_list); printf("============== BEGIN RUN ==============\n"); //Check if contexts present if(fwork->v_context_l == 0) { LOG_WARN("No contexts have been specified to run!\n"); printf("============== END RUN ==============\n\n"); return NO_ERROR; } //Open reference if(fwork->reference_str) { //Obtain reference filename and dirpath from full path char *ref_path = NULL, *ref_name = NULL, *aux; ref_path = strdup(fwork->reference_str); aux = strrchr(ref_path, '/'); if (aux) { ref_name = strdup(aux); *aux = '\0'; } printf("Reference path: %s\n", ref_path); printf("Reference name: %s\n", ref_name); printf("Opening reference genome from \"%s%s\" ...\n", ref_path, ref_name); fwork->reference = genome_new(ref_name, ref_path, BWT_MODE); assert(fwork->reference); printf("Reference opened!...\n"); if (ref_path) free(ref_path); if (ref_name) free(ref_name); } printf("--------------------------------------\n"); for(c = 0; c < fwork->v_context_l; c++) { //Select next context fwork->context = fwork->v_context[c]; assert(fwork->context); #ifdef D_TIME_DEBUG times = omp_get_wtime(); #endif //Open input bam { //If last context had no output if(!fwork->last_temp_file_str) { //Open initial input file printf("Opening BAM from \"%s\" ...\n", fwork->input_file_str); fwork->input_file = bam_fopen(fwork->input_file_str); assert(fwork->input_file); printf("BAM opened!...\n"); } else { //Open last context output printf("Opening intermediate BAM from \"%s\" ...\n", fwork->last_temp_file_str); fwork->input_file = bam_fopen(fwork->last_temp_file_str); assert(fwork->input_file); printf("Intermediate BAM opened!...\n"); } } //Create new output bam if last context fwork->erase_tmp = 0; if(c == fwork->v_context_l - 1) { if(fwork->output_file_str) { if(fwork->context->output_file_str == NULL) { //Allocate fwork->context->output_file_str = (char *)malloc(256 * sizeof(char)); } //Set final output strncpy(fwork->context->output_file_str, fwork->output_file_str, 256); } } else { //Temporary file? if(fwork->context->output_temp) { //Allocate fwork->context->output_file_str = malloc(256 * sizeof(char)); //Set output temp path sprintf(fwork->context->output_file_str, "/tmp/bfwork_tmp_%d.tmp", c); fwork->erase_tmp = 1; } } //Create new temporary bam if context have output if(fwork->context->output_file_str != NULL) { printf("Creating new intermediate bam file in \"%s\"...\n", fwork->context->output_file_str); fwork->output_file = bam_fopen_mode(fwork->context->output_file_str, fwork->input_file->bam_header_p, "w"); assert(fwork->output_file); bam_fwrite_header(fwork->output_file->bam_header_p, fwork->output_file); fwork->output_file->bam_header_p = NULL; printf("New intermediate BAM initialized!...\n"); } #ifdef D_TIME_DEBUG times = omp_get_wtime() - times; if(fwork->context->time_stats) time_add_time_slot(D_FWORK_INIT, fwork->context->time_stats, times); #endif //Logging if(fwork->context->tag != NULL) { printf("Context %s is now running\n", fwork->context->tag); } else { printf("Context %d is now running\n", c); } #ifdef D_TIME_DEBUG times = omp_get_wtime(); #endif //Run this context if(omp_get_max_threads() > 1) { //Run in multithreaded mode err = bfwork_run_threaded(fwork); } else { //Run in sequential mode err = bfwork_run_sequential(fwork); } //Reduce needed? if(fwork->context->reduce != NULL && fwork->context->reduce_dest != NULL) { //Reduce into context reduce data bfwork_context_local_user_data_reduce(fwork->context, fwork->context->reduce_dest, fwork->context->reduce); } #ifdef D_TIME_DEBUG times = omp_get_wtime() - times; if(fwork->context->time_stats) time_add_time_slot(D_FWORK_TOTAL, fwork->context->time_stats, times); #endif //Close input BAM printf("\nClosing BAM file...\n"); bam_fclose(fwork->input_file); fwork->input_file = NULL; printf("BAM closed.\n"); //Close output file if(fwork->output_file != NULL) { printf("Closing \"%s\" BAM file...\n", fwork->output_file->filename); bam_fclose(fwork->output_file); fwork->output_file = NULL; printf("BAM closed.\n"); } //Remove last temporary file if(fwork->last_temp_file_str != NULL && fwork->erase_tmp) { //Delete file printf("Deleting %s...\n", fwork->last_temp_file_str); remove(fwork->last_temp_file_str); fwork->last_temp_file_str = NULL; fwork->erase_tmp = 0; } //Set last file if(fwork->context->output_file_str) { //Set last temporary file if not the last context if(c < fwork->v_context_l - 1) fwork->last_temp_file_str = fwork->context->output_file_str; } //Logging printf("--------------------------------------\n"); LOG_INFO("Context SUCCESS!\n"); } //Remove last temporary file if(fwork->last_temp_file_str != NULL && fwork->erase_tmp) { //Delete file printf("Deleting %s...\n", fwork->last_temp_file_str); remove(fwork->last_temp_file_str); } //Close reference if(fwork->reference != NULL) { printf("\nClosing reference file...\n"); genome_free(fwork->reference); printf("Reference closed.\n"); } //Logging LOG_INFO("Framework SUCCESS!\n"); printf("============== END RUN ==============\n\n"); return err; }
void batch_writer2(batch_writer_input_t* input) { printf("START: batch_writer (%i): START, for file %s\n", omp_get_thread_num(), input->match_filename); bam1_t *bam1; bam_header_t *bam_header; bam_file_t *bam_file; alignment_t *alig; char* match_filename = input->match_filename; // char* splice_filename = input->splice_filename; list_t *write_list = input->list_p; array_list_t *array_list; list_item_t *item = NULL; aligner_batch_t *batch = NULL; fastq_batch_t *fq_batch = NULL; FILE* fd; static char aux[10]; size_t read_len; bam_header = bam_header_new(HUMAN, NCBI37, input->header_filename); bam_file = bam_fopen_mode(match_filename, bam_header, "w"); bam_fwrite_header(bam_header, bam_file); size_t num_reads = 0, num_items = 0, total_mappings = 0; // main loop while ( (item = list_remove_item(write_list)) != NULL ) { // if (array_list == NULL) printf("batch_writer.c...\n"); batch = (aligner_batch_t *) item->data_p; fq_batch = batch->fq_batch; num_reads = batch->num_mapping_lists; for (size_t i = 0; i < num_reads; i++) { array_list = batch->mapping_lists[i]; // if (array_list == NULL) printf("READ %d, writer, list is NULL\n", i); // printf("----> list == NULL ? %d\n", (array_list == NULL)); num_items = (array_list == NULL ? 0 : array_list_size(array_list)); // printf("----> number of items = %d, num_items <= 0 ? %d\n", num_items, num_items <= 0); read_len = fq_batch->data_indices[i + 1] - fq_batch->data_indices[i] - 1; // mapped or not mapped ? if (num_items == 0) { //printf("\tWRITE : read %i (%d items): unmapped...\n", i, num_items); // calculating cigar sprintf(aux, "%luX", read_len); alig = alignment_new(); alignment_init_single_end(&(fq_batch->header[fq_batch->header_indices[i]])+1, &(fq_batch->seq[fq_batch->data_indices[i]]), &(fq_batch->quality[fq_batch->data_indices[i]]), 0, 0, 0, aux, 1, 255, 0, 0, alig); bam1 = convert_to_bam(alig, 33); bam_fwrite(bam1, bam_file); bam_destroy1(bam1); // some cosmetic stuff before freeing the alignment, // (in order to not free twice some fields) alig->query_name = NULL; alig->sequence = NULL; alig->quality = NULL; alig->cigar = NULL; alignment_free(alig); // printf("\tWRITE : read %i (%d items): unmapped...done !!\n", i, num_items); } else { // printf("\tWRITE : read %d (%d items): mapped...\n", i, num_items); for (size_t j = 0; j < num_items; j++) { alig = (alignment_t *) array_list_get(j, array_list); if (alig != NULL) { bam1 = convert_to_bam(alig, 33); bam_fwrite(bam1, bam_file); bam_destroy1(bam1); alignment_free(alig); } } // printf("\tWRITE : read %d (%d items): mapped...done !!\n", i, num_items); } if (array_list != NULL) array_list_free(array_list, NULL); } if (batch != NULL) aligner_batch_free(batch); if (item != NULL) list_item_free(item); if (time_on) { timing_stop(BATCH_WRITER, 0, timing_p); } } // end of batch loop bam_fclose(bam_file); printf("END: batch_writer (total mappings %lu)\n", total_mappings); }