void map_reads(gzFile *output_files, gzFile multi_out_file, gzFile unmapped_out_file, gzFile reads_f, Mapper *mapper, int reads_format, int output_type) { FastqRead fastq_read; MapRead map_read; long warn_count, n_fastq_rec, n_fastq_err; long n_map_uniq, n_map_multi, n_map_none; map_read.fwd_nucs = my_new(unsigned char, FASTQ_MAX_LINE); map_read.rev_nucs = my_new(unsigned char, FASTQ_MAX_LINE); n_map_uniq = n_map_multi = n_map_none = 0; warn_count = n_fastq_err = n_fastq_rec = 0; /* loop over all records in FASTQ file */ while(TRUE) { long r = 0; /* read fastq record from file */ if(reads_format == READS_FORMAT_FASTQ) { r = fastq_parse_read(&fastq_read, reads_f); } else if(reads_format == READS_FORMAT_QSEQ) { r = fastq_parse_qseq_read(&fastq_read, reads_f); } else { my_err("%s:%d: unknown read format", __FILE__, __LINE__); } if(r == FASTQ_END) { /* we have reached the end of the file */ break; } if(r == FASTQ_ERR) { /* this fastq record contains an error */ if(warn_count < FASTQ_MAX_WARN) { warn_count += 1; my_warn("%s:%d: skipping invalid fastq record:\n", __FILE__, __LINE__); fprintf(stderr, " %s\n %s\n %s\n %s\n", fastq_read.line1, fastq_read.line2, fastq_read.line3, fastq_read.line4); } n_fastq_err += 1; } else if(fastq_read.read_len != mapper->seed_finder_fwd->read_len) { /* check that read length is correct */ warn_count += 1; my_warn("%s:%d: specified read length is %u, but got %d, " "skipping read\n", __FILE__, __LINE__, mapper->seed_finder_fwd->read_len, fastq_read.read_len); n_fastq_err += 1; } else if(r == FASTQ_OK) { n_fastq_rec += 1; read_from_fastq_record(&map_read, &fastq_read); if((n_fastq_rec % 1000000) == 0) { fprintf(stderr, "."); } /* try to map this read to genome */ mapper_map_one_read(mapper, &map_read); if(map_read.map_code == MAP_CODE_NONE) { /* read does not map to genome */ n_map_none += 1; if(output_type == OUTPUT_TYPE_SINGLE) { write_unmapped_read(output_files[0], &map_read); } else { write_unmapped_read(unmapped_out_file, &map_read); } } else if(map_read.map_code == MAP_CODE_MULTI) { /* read maps to multiple genomic locations */ n_map_multi += 1; if(output_type == OUTPUT_TYPE_SINGLE) { write_read(output_files, mapper->chr_tab, &map_read, FALSE); } else { write_read(&multi_out_file, mapper->chr_tab, &map_read, FALSE); } } else if(map_read.map_code == MAP_CODE_UNIQUE) { /* read maps to single genomic location */ n_map_uniq += 1; if(output_type == OUTPUT_TYPE_SINGLE) { write_read(output_files, mapper->chr_tab, &map_read, FALSE); } else { write_read(output_files, mapper->chr_tab, &map_read, TRUE); } } else { my_err("%s:%d: unknown mapping code", __FILE__, __LINE__); } } else { my_err("%s:%d: unknown fastq status", __FILE__, __LINE__); } } fprintf(stderr, "\ndone\n"); fprintf(stderr, "fastq errors: %ld\n", n_fastq_err); fprintf(stderr, "fastq records (without errors): %ld\n", n_fastq_rec); fprintf(stderr, "unmapped reads: %ld\n", n_map_none); fprintf(stderr, "uniquely mapping reads: %ld\n", n_map_uniq); fprintf(stderr, "multiply mapping reads: %ld\n", n_map_multi); my_free(map_read.fwd_nucs); my_free(map_read.rev_nucs); }
int bam_writer(void *data) { struct timeval start, end; double time; //if (time_on) { start_timer(start); } batch_t *batch = (batch_t *) data; fastq_read_t *fq_read; array_list_t *array_list; size_t num_items; //bam1_t *bam1; //alignment_t *alig; mapping_batch_t *mapping_batch = (mapping_batch_t *) batch->mapping_batch; batch_writer_input_t *writer_input = batch->writer_input; bam_file_t *bam_file = writer_input->bam_file; linked_list_t *linked_list = writer_input->list_p; size_t num_reads_b = array_list_size(mapping_batch->fq_batch); size_t num_mapped_reads = 0; size_t total_mappings = 0; unsigned char found_p1 = 0; unsigned char found_p2 = 0; int i = 0; extern size_t bwt_correct; extern size_t bwt_error; extern pthread_mutex_t bwt_mutex, mutex_sp; writer_input->total_batches++; extern size_t *histogram_sw; extern size_t num_reads_map; extern size_t num_reads; extern size_t tot_reads; extern st_bwt_t st_bwt; st_bwt.total_reads += num_reads_b; free(mapping_batch->histogram_sw); // // DNA/RNA mode // for (size_t i = 0; i < num_reads_b; i++) { num_items = array_list_size(mapping_batch->mapping_lists[i]); total_mappings += num_items; fq_read = (fastq_read_t *) array_list_get(i, mapping_batch->fq_batch); // mapped or not mapped ? if (num_items == 0) { total_mappings++; write_unmapped_read(fq_read, bam_file); if (mapping_batch->mapping_lists[i]) { array_list_free(mapping_batch->mapping_lists[i], NULL); } } else { num_mapped_reads++; if (array_list_size(mapping_batch->mapping_lists[i]) == 1) { st_bwt.single_alig++; } else { st_bwt.multi_alig++; } write_mapped_read(mapping_batch->mapping_lists[i], bam_file); } } if (mapping_batch) { mapping_batch_free(mapping_batch); } if (batch) batch_free(batch); basic_statistics_add(num_reads_b, num_mapped_reads, total_mappings, 0, basic_st); }