void update_seed_right(alig_out_t *alig_out, seed_t *seed, seed_cal_t *cal) { if (alig_out->match > 0) { //if (score > 0 || (alig_out.map_len1 + suffix_len) > 20) { // update seed seed->num_mismatches += alig_out->mismatch; seed->num_open_gaps += alig_out->gap_open; seed->num_extend_gaps += alig_out->gap_extend; seed->read_end += alig_out->map_len1; seed->genome_end += alig_out->map_len2; // update cigar with the sw output if (alig_out->cigar.num_ops > 0) { cigar_concat(&alig_out->cigar, &seed->cigar); } } }
void update_seed_left(alig_out_t *alig_out, seed_t *seed, seed_cal_t *cal) { if (alig_out->match > 0) { // update seed seed->num_mismatches += alig_out->mismatch; seed->num_open_gaps += alig_out->gap_open; seed->num_extend_gaps += alig_out->gap_extend; seed->read_start -= alig_out->map_len1; seed->genome_start -= alig_out->map_len2; // update cigar with the sw output if (alig_out->cigar.num_ops > 0) { cigar_t cigar; cigar_init(&cigar); cigar_concat(&seed->cigar, &alig_out->cigar); cigar_init(&seed->cigar); cigar_copy(&seed->cigar, &alig_out->cigar); } } }
int sa_sam_writer(void *data) { sa_wf_batch_t *wf_batch = (sa_wf_batch_t *) data; sa_mapping_batch_t *mapping_batch = (sa_mapping_batch_t *) wf_batch->mapping_batch; if (mapping_batch == NULL) { printf("bam_writer1: error, NULL mapping batch\n"); return 0; } /* for (int i = 0; i < NUM_COUNTERS; i++) { counters[i] += mapping_batch->counters[i]; } */ #ifdef _TIMING for (int i = 0; i < NUM_TIMING; i++) { func_times[i] += mapping_batch->func_times[i]; } #endif int num_mismatches, num_cigar_ops; size_t flag, pnext = 0, tlen = 0; char *cigar_string, *cigar_M_string, *rnext = "*"; fastq_read_t *read; array_list_t *read_list = mapping_batch->fq_reads; array_list_t *mapping_list, *mate_list; FILE *out_file = (FILE *) wf_batch->writer_input->bam_file; sa_genome3_t *genome = wf_batch->sa_index->genome; size_t num_reads, num_mappings, num_mate_mappings; num_reads = mapping_batch->num_reads; if (mapping_batch->options->pair_mode != SINGLE_END_MODE) { // PAIR MODE int len; char *sequence, *quality; char *seq, *opt_fields; alignment_t *alig; for (size_t i = 0; i < num_reads; i++) { read = (fastq_read_t *) array_list_get(i, read_list); // seq = read->sequence; /* if (i % 2 == 0) { mate_list = mapping_batch->mapping_lists[i+1]; num_mate_mappings = array_list_size(mate_list); } else { mate_list = mapping_list; num_mate_mappings = num_mappings; } */ mapping_list = mapping_batch->mapping_lists[i]; num_mappings = array_list_size(mapping_list); num_total_mappings += num_mappings; #ifdef _VERBOSE if (num_mappings > 1) { num_dup_reads++; num_total_dup_reads += num_mappings; } #endif if (num_mappings > 0) { num_mapped_reads++; if (num_mappings > 1) { num_multihit_reads++; } for (size_t j = 0; j < num_mappings; j++) { alig = (alignment_t *) array_list_get(j, mapping_list); /* // update alignment alig->secondary_alignment = 0; if (num_mate_mappings != 1) { alig->is_mate_mapped = 0; alig->is_paired_end_mapped = 0; alig->mate_strand = 0; } */ if (alig->optional_fields) { opt_fields = (char *) calloc(strlen(alig->optional_fields) + 100, sizeof(char)); sprintf(opt_fields, "NH:i:%i\t%s", num_mappings, alig->optional_fields); // sprintf(opt_fields, "NH:i:%i\t%s\tXU:i:%i", num_mappings, alig->optional_fields, mapping_batch->status[i]); } else { opt_fields = (char *) calloc(100, sizeof(char)); sprintf(opt_fields, "NH:i:%i", num_mappings); // sprintf(opt_fields, "NH:i:%i\tXU:i:%i", num_mappings, mapping_batch->status[i]); } /* // update alignment alig->secondary_alignment = 0; if (num_mate_mappings != 1) { alig->is_mate_mapped = 0; alig->is_paired_end_mapped = 0; alig->mate_strand = 0; } */ flag = 0; if (alig->is_paired_end) flag += BAM_FPAIRED; if (alig->is_paired_end_mapped) flag += BAM_FPROPER_PAIR; if (!alig->is_seq_mapped) flag += BAM_FUNMAP; if ((!alig->is_mate_mapped) && (alig->is_paired_end)) flag += BAM_FMUNMAP; if (alig->mate_strand) flag += BAM_FMREVERSE; if (alig->pair_num == 1) flag += BAM_FREAD1; if (alig->pair_num == 2) flag += BAM_FREAD2; if (alig->secondary_alignment) flag += BAM_FSECONDARY; if (alig->fails_quality_check) flag += BAM_FQCFAIL; if (alig->pc_optical_duplicate) flag += BAM_FDUP; if (alig->seq_strand) flag += BAM_FREVERSE; fprintf(out_file, "%s\t%lu\t%s\t%i\t%i\t%s\t%s\t%i\t%i\t%s\t%s\t%s\n", read->id, flag, genome->chrom_names[alig->chromosome], alig->position + 1, (num_mappings > 1 ? 0 : alig->mapq), //60, //(alig->map_quality > 3 ? 0 : alig->map_quality), alig->cigar, (alig->chromosome == alig->mate_chromosome ? "=" : genome->chrom_names[alig->mate_chromosome]), alig->mate_position + 1, alig->template_length, alig->sequence, alig->quality, opt_fields ); // free memory free(opt_fields); alignment_free(alig); } // end for num_mappings } else { num_unmapped_reads++; opt_fields = (char *) calloc(100, sizeof(char)); sprintf(opt_fields, "XM:i:%i XU:i:%i", num_mappings, mapping_batch->status[i]); if (read->adapter) { len = read->length + abs(read->adapter_length); sequence = (char *) malloc(len + 1); quality = (char *) malloc(len + 1); if (read->adapter_length < 0) { strcpy(quality, read->adapter_quality); strcat(quality, read->quality); } else { strcpy(quality, read->quality); strcat(quality, read->adapter_quality); } if ((read->adapter_strand == 0 && read->adapter_length < 0) || (read->adapter_strand == 1 && read->adapter_length > 0)) { strcpy(sequence, read->adapter); strcat(sequence, read->sequence); } else { strcpy(sequence, read->sequence); strcat(sequence, read->adapter); } sequence[len] = 0; quality[len] = 0; } else { sequence = read->sequence; quality = read->quality; } fprintf(out_file, "%s\t4\t*\t0\t0\t*\t*\t0\t0\t%s\t%s\t%s\n", read->id, sequence, quality, opt_fields ); free(opt_fields); if (read->adapter) { free(sequence); free(quality); } } array_list_free(mapping_list, (void *) NULL); } } else { // SINGLE MODE int len, mapq; char *seq; seed_cal_t *cal; cigar_t *cigar; char *sequence, *revcomp, *quality; for (size_t i = 0; i < num_reads; i++) { read = (fastq_read_t *) array_list_get(i, read_list); mapping_list = mapping_batch->mapping_lists[i]; num_mappings = array_list_size(mapping_list); num_total_mappings += num_mappings; #ifdef _VERBOSE if (num_mappings > 1) { num_dup_reads++; num_total_dup_reads += num_mappings; } #endif if (num_mappings > 0) { num_mapped_reads++; if (num_mappings > 1) { num_multihit_reads++; } for (size_t j = 0; j < num_mappings; j++) { cal = (seed_cal_t *) array_list_get(j, mapping_list); if (read->adapter) { // sequences and cigar len = read->length + abs(read->adapter_length); sequence = (char *) malloc(len + 1); revcomp = (char *) malloc(len + 1); quality = (char *) malloc(len + 1); cigar = cigar_new_empty(); if (read->adapter_length < 0) { strcpy(quality, read->adapter_quality); strcat(quality, read->quality); } else { strcpy(quality, read->quality); strcat(quality, read->adapter_quality); } if ( (cal->strand == 1 && ((read->adapter_strand == 0 && read->adapter_length > 0) || (read->adapter_strand == 1 && read->adapter_length < 0))) || (cal->strand == 0 && ((read->adapter_strand == 0 && read->adapter_length < 0) || (read->adapter_strand == 1 && read->adapter_length > 0))) ) { strcpy(sequence, read->adapter); strcat(sequence, read->sequence); strcpy(revcomp, read->adapter_revcomp); strcat(revcomp, read->revcomp); cigar_append_op(abs(read->adapter_length), 'S', cigar); cigar_concat(&cal->cigar, cigar); } else { strcpy(sequence, read->sequence); strcat(sequence, read->adapter); strcpy(revcomp, read->revcomp); strcat(revcomp, read->adapter_revcomp); cigar_concat(&cal->cigar, cigar); cigar_append_op(read->adapter_length, 'S', cigar); } sequence[len] = 0; revcomp[len] = 0; quality[len] = 0; } else { // sequences and cigar sequence = read->sequence; revcomp = read->revcomp; quality = read->quality; cigar = &cal->cigar; } if (cal->strand) { flag = 16; seq = revcomp; } else { flag = 0; seq = sequence; } /* if (i == 0) { flag += BAM_FSECONDARY; } */ cigar_string = cigar_to_string(cigar); cigar_M_string = cigar_to_M_string(&num_mismatches, &num_cigar_ops, cigar); if (num_mappings > 1) { cal->mapq = 0; } fprintf(out_file, "%s\t%i\t%s\t%i\t%i\t%s\t%s\t%lu\t%i\t%s\t%s\tNH:i:%i\tNM:i:%i\n", read->id, flag, genome->chrom_names[cal->chromosome_id], cal->start + 1, (num_mappings == 1 ? cal->mapq : 0), cigar_M_string, rnext, pnext, tlen, seq, quality, num_mappings, num_mismatches ); // free memory free(cigar_M_string); free(cigar_string); seed_cal_free(cal); if (read->adapter) { free(sequence); free(revcomp); free(quality); cigar_free(cigar); } } } else { num_unmapped_reads++; if (read->adapter) { // sequences and cigar len = read->length + abs(read->adapter_length); sequence = (char *) malloc(len + 1); quality = (char *) malloc(len + 1); if (read->adapter_length < 0) { strcpy(quality, read->adapter_quality); strcat(quality, read->quality); } else { strcpy(quality, read->quality); strcat(quality, read->adapter_quality); } if ((read->adapter_strand == 0 && read->adapter_length < 0) || (read->adapter_strand == 1 && read->adapter_length > 0)) { strcpy(sequence, read->adapter); strcat(sequence, read->sequence); } else { strcpy(sequence, read->sequence); strcat(sequence, read->adapter); } sequence[len] = 0; quality[len] = 0; } else { // sequences sequence = read->sequence; quality = read->quality; } fprintf(out_file, "%s\t4\t*\t0\t0\t*\t*\t0\t0\t%s\t%s\n", read->id, sequence, quality ); if (read->adapter) { free(sequence); free(quality); } } array_list_free(mapping_list, (void *) NULL); } // end for num_reads } // free memory sa_mapping_batch_free(mapping_batch); if (wf_batch) sa_wf_batch_free(wf_batch); return 0; }