void write_unmapped_read(fastq_read_t *fq_read, bam_file_t *bam_file) { static char aux[1024] = ""; alignment_t *alig; size_t header_len; char *id; bam1_t *bam1; // calculating cigar //sprintf(aux, "%luX", fq_read->length); alig = alignment_new(); //header_len = strlen(fq_read->id); //id = (char *) malloc(sizeof(char) * (header_len + 1)); //get_to_first_blank(fq_read->id, header_len, id); //free(fq_read->id); alignment_init_single_end(strdup(fq_read->id), fq_read->sequence, fq_read->quality, 0, -1, -1, /*strdup(aux)*/"", 0, 0, 0, 0, 0, NULL, alig); bam1 = convert_to_bam(alig, 33); bam_fwrite(bam1, bam_file); bam_destroy1(bam1); alig->sequence = NULL; alig->quality = NULL; alig->cigar = NULL; alignment_free(alig); //printf("\tWRITE : read %i (%d items): unmapped...done !!\n", i, num_items); }
void write_mapped_read(array_list_t *array_list, bam_file_t *bam_file) { size_t num_items = array_list_size(array_list); alignment_t *alig; bam1_t *bam1; for (size_t j = 0; j < num_items; j++) { alig = (alignment_t *) array_list_get(j, array_list); //printf("\t******** %i(%i)\n", j, num_items); //printf("is null alig->name %i\n", (alig->query_name == NULL)); //printf("name = %s\n", alig->query_name); //printf("read = %s\n", alig->sequence); //printf("\t-----> %s\n", alig->cigar); LOG_DEBUG("writting bam..\n"); //alignment_print(alig); //exit(-1); if (alig != NULL) { bam1 = convert_to_bam(alig, 33); bam_fwrite(bam1, bam_file); bam_destroy1(bam1); alignment_free(alig); } else { LOG_FATAL_F("alig is NULL, num_items = %lu\n", num_items); } //printf("\t**************** %i(%i)\n", j, num_items); } if (array_list) { array_list_free(array_list, NULL); } }
int sa_bam_writer(void *data) { sa_wf_batch_t *wf_batch = (sa_wf_batch_t *) data; sa_mapping_batch_t *mapping_batch = (sa_mapping_batch_t *) wf_batch->mapping_batch; if (mapping_batch == NULL) { printf("bam_writer1: error, NULL mapping batch\n"); return 0; } // for (int i = 0; i < NUM_COUNTERS; i++) { // counters[i] += mapping_batch->counters[i]; // } #ifdef _TIMING for (int i = 0; i < NUM_TIMING; i++) { func_times[i] += mapping_batch->func_times[i]; } #endif int flag, len; char *sequence, *quality; fastq_read_t *read; array_list_t *read_list = mapping_batch->fq_reads; bam1_t *bam1; alignment_t *alig; array_list_t *mapping_list; bam_file_t *out_file = wf_batch->writer_input->bam_file; sa_genome3_t *genome = wf_batch->sa_index->genome; size_t num_reads, num_mappings, num_mate_mappings; num_reads = mapping_batch->num_reads; for (size_t i = 0; i < num_reads; i++) { read = (fastq_read_t *) array_list_get(i, read_list); mapping_list = mapping_batch->mapping_lists[i]; num_mappings = array_list_size(mapping_list); num_total_mappings += num_mappings; #ifdef _VERBOSE if (num_mappings > 1) { num_dup_reads++; num_total_dup_reads += num_mappings; } #endif if (num_mappings > 0) { num_mapped_reads++; if (num_mappings > 1) { num_multihit_reads++; } for (size_t j = 0; j < num_mappings; j++) { alig = (alignment_t *) array_list_get(j, mapping_list); // update alignment if (num_mappings > 1) { alig->map_quality = 0; } else { alig->map_quality = alig->mapq; } bam1 = convert_to_bam(alig, 33); bam_fwrite(bam1, out_file); bam_destroy1(bam1); alignment_free(alig); } } else { num_unmapped_reads++; if (read->adapter) { // sequences and cigar len = read->length + abs(read->adapter_length); sequence = (char *) malloc(len + 1); quality = (char *) malloc(len + 1); if (read->adapter_length < 0) { strcpy(quality, read->adapter_quality); strcat(quality, read->quality); } else { strcpy(quality, read->quality); strcat(quality, read->adapter_quality); } if ((read->adapter_strand == 0 && read->adapter_length < 0) || (read->adapter_strand == 1 && read->adapter_length > 0)) { strcpy(sequence, read->adapter); strcat(sequence, read->sequence); } else { strcpy(sequence, read->sequence); strcat(sequence, read->adapter); } sequence[len] = 0; quality[len] = 0; } else { // sequences sequence = read->sequence; quality = read->quality; } alig = alignment_new(); alignment_init_single_end(strdup(read->id), sequence, quality, 0, -1, -1, /*strdup(aux)*/"", 0, 0, 0, 0, 0, NULL, alig); bam1 = convert_to_bam(alig, 33); bam_fwrite(bam1, out_file); // free memory bam_destroy1(bam1); alig->sequence = NULL; alig->quality = NULL; alig->cigar = NULL; alignment_free(alig); if (read->adapter) { free(sequence); free(quality); } } array_list_free(mapping_list, (void *) NULL); } // free memory sa_mapping_batch_free(mapping_batch); if (wf_batch) sa_wf_batch_free(wf_batch); return 0; }
void batch_writer(batch_writer_input_t* input_p) { struct timespec ts; ts.tv_sec = 1; ts.tv_nsec = 0; alignment_t **buffer_p; bam1_t* bam1_p; bam_header_t* bam_header_p; bam_file_t* bam_file_p; char* match_filename = input_p->match_filename; //char* mismatch_filename = input_p->mismatch_filename; char* splice_exact_filename = input_p->splice_exact_filename; char* splice_extend_filename = input_p->splice_extend_filename; list_t* list_p = input_p->list_p; printf("batch_writer (%i): START\n", omp_get_thread_num()); list_item_t *item_p = NULL; write_batch_t* batch_p; FILE* fd; FILE* splice_exact_fd = fopen(splice_exact_filename, "w"); FILE* splice_extend_fd = fopen(splice_extend_filename, "w"); //printf("HEADER FROM WRITE: %s\n", input_p->header_filename); bam_header_p = bam_header_new(HUMAN, NCBI37, input_p->header_filename); //bam_file_p = bam_fopen(match_filename); bam_file_p = bam_fopen_mode(match_filename, bam_header_p, "w"); bam_fwrite_header(bam_header_p, bam_file_p); // main loop while ( (item_p = list_remove_item(list_p)) != NULL ) { if (time_on) { timing_start(BATCH_WRITER, 0, timing_p); } batch_p = (write_batch_t*) item_p->data_p; //printf("*********************************Extract one item*********************************\n"); if (batch_p->flag == MATCH_FLAG || batch_p->flag == MISMATCH_FLAG) { //fd = match_fd; //printf("start write alignment. Total %d\n", batch_p->size); buffer_p = (alignment_t **)batch_p->buffer_p; for(int i = 0; i < batch_p->size; i++) { //alignment_print(buffer_p[i]); bam1_p = convert_to_bam(buffer_p[i], 33); bam_fwrite(bam1_p, bam_file_p); bam_destroy1(bam1_p); alignment_free(buffer_p[i]); } } else { if (batch_p->flag == SPLICE_EXACT_FLAG) { fd = splice_exact_fd; } else if (batch_p->flag == SPLICE_EXTEND_FLAG) { fd = splice_extend_fd; } else { fd = NULL; } if (fd != NULL) { //printf("start write batch, %i bytes...\n", batch_p->size); fwrite((char *)batch_p->buffer_p, batch_p->size, 1, fd); //printf("write done !!\n"); //if (time_on) { stop_timer(t1_write, t2_write, write_time); } } } //printf("Free batch\n"); write_batch_free(batch_p); list_item_free(item_p); if (time_on) { timing_stop(BATCH_WRITER, 0, timing_p); } } // end of batch loop //fclose(match_fd); //fclose(mismatch_fd); fclose(splice_exact_fd); fclose(splice_extend_fd); bam_fclose(bam_file_p); //bam_header_free(bam_header_p); printf("batch_writer: END\n"); }
void batch_writer2(batch_writer_input_t* input) { printf("START: batch_writer (%i): START, for file %s\n", omp_get_thread_num(), input->match_filename); bam1_t *bam1; bam_header_t *bam_header; bam_file_t *bam_file; alignment_t *alig; char* match_filename = input->match_filename; // char* splice_filename = input->splice_filename; list_t *write_list = input->list_p; array_list_t *array_list; list_item_t *item = NULL; aligner_batch_t *batch = NULL; fastq_batch_t *fq_batch = NULL; FILE* fd; static char aux[10]; size_t read_len; bam_header = bam_header_new(HUMAN, NCBI37, input->header_filename); bam_file = bam_fopen_mode(match_filename, bam_header, "w"); bam_fwrite_header(bam_header, bam_file); size_t num_reads = 0, num_items = 0, total_mappings = 0; // main loop while ( (item = list_remove_item(write_list)) != NULL ) { // if (array_list == NULL) printf("batch_writer.c...\n"); batch = (aligner_batch_t *) item->data_p; fq_batch = batch->fq_batch; num_reads = batch->num_mapping_lists; for (size_t i = 0; i < num_reads; i++) { array_list = batch->mapping_lists[i]; // if (array_list == NULL) printf("READ %d, writer, list is NULL\n", i); // printf("----> list == NULL ? %d\n", (array_list == NULL)); num_items = (array_list == NULL ? 0 : array_list_size(array_list)); // printf("----> number of items = %d, num_items <= 0 ? %d\n", num_items, num_items <= 0); read_len = fq_batch->data_indices[i + 1] - fq_batch->data_indices[i] - 1; // mapped or not mapped ? if (num_items == 0) { //printf("\tWRITE : read %i (%d items): unmapped...\n", i, num_items); // calculating cigar sprintf(aux, "%luX", read_len); alig = alignment_new(); alignment_init_single_end(&(fq_batch->header[fq_batch->header_indices[i]])+1, &(fq_batch->seq[fq_batch->data_indices[i]]), &(fq_batch->quality[fq_batch->data_indices[i]]), 0, 0, 0, aux, 1, 255, 0, 0, alig); bam1 = convert_to_bam(alig, 33); bam_fwrite(bam1, bam_file); bam_destroy1(bam1); // some cosmetic stuff before freeing the alignment, // (in order to not free twice some fields) alig->query_name = NULL; alig->sequence = NULL; alig->quality = NULL; alig->cigar = NULL; alignment_free(alig); // printf("\tWRITE : read %i (%d items): unmapped...done !!\n", i, num_items); } else { // printf("\tWRITE : read %d (%d items): mapped...\n", i, num_items); for (size_t j = 0; j < num_items; j++) { alig = (alignment_t *) array_list_get(j, array_list); if (alig != NULL) { bam1 = convert_to_bam(alig, 33); bam_fwrite(bam1, bam_file); bam_destroy1(bam1); alignment_free(alig); } } // printf("\tWRITE : read %d (%d items): mapped...done !!\n", i, num_items); } if (array_list != NULL) array_list_free(array_list, NULL); } if (batch != NULL) aligner_batch_free(batch); if (item != NULL) list_item_free(item); if (time_on) { timing_stop(BATCH_WRITER, 0, timing_p); } } // end of batch loop bam_fclose(bam_file); printf("END: batch_writer (total mappings %lu)\n", total_mappings); }