int gt_reverse_complement(char *dna_seq, unsigned long seqlen, GtError *err) { char *front_char, *back_char, tmp_char; int had_err = 0; gt_error_check(err); gt_assert(dna_seq); for (front_char = dna_seq, back_char = dna_seq + seqlen - 1; front_char <= back_char; front_char++, back_char--) { had_err = gt_complement(&tmp_char, *front_char, err); if (!had_err) had_err = gt_complement(front_char, *back_char, err); if (!had_err) *back_char = tmp_char; if (had_err) break; } return had_err; }
static int snp_annotator_visitor_feature_node(GtNodeVisitor *nv, GtFeatureNode *fn, GtError *err) { int had_err = 0; GtSNPAnnotatorVisitor *sav; GtFeatureNodeIterator *fni, *mrnafni; GtFeatureNode *curnode, *curnode2; GtRange snp_rng; gt_error_check(err); sav = snp_annotator_visitor_cast(nv); /* ignore non-nodes */ if (!fn) return 0; /* only process SNPs */ if (!(gt_feature_node_get_type(fn) == sav->SNV_type || gt_feature_node_get_type(fn) == sav->SNP_type)) { return 0; } fni = gt_feature_node_iterator_new_direct(sav->gene); snp_rng = gt_genome_node_get_range((GtGenomeNode*) fn); while (!had_err && (curnode = gt_feature_node_iterator_next(fni))) { if (gt_feature_node_get_type(curnode) == sav->mRNA_type) { GtStrand mrna_strand = gt_feature_node_get_strand(curnode); #ifndef NDEBUG const char *refstr; #endif GtUword mrnasnppos = 0; mrnafni = gt_feature_node_iterator_new(curnode); while (!had_err && (curnode2 = gt_feature_node_iterator_next(mrnafni))) { if (gt_feature_node_get_type(curnode2) == sav->CDS_type) { GtRange cds_rng = gt_genome_node_get_range((GtGenomeNode*) curnode2); if (gt_range_overlap(&snp_rng, &cds_rng)) { char *mRNA, origchar; char *variantchars, *variantptr = NULL; GT_UNUSED char *refchars, *refptr = NULL; mRNA = (char*) gt_hashmap_get(sav->rnaseqs, curnode); gt_assert(mRNA); gt_assert(snp_rng.start >= cds_rng.start); mrnasnppos += (snp_rng.start - cds_rng.start); if (mrna_strand == GT_STRAND_REVERSE) mrnasnppos = strlen(mRNA) - mrnasnppos - 1; gt_assert(mrnasnppos < strlen(mRNA)); origchar = mRNA[mrnasnppos]; #ifndef NDEBUG refstr = refptr = gt_cstr_dup(gt_feature_node_get_attribute(fn, GT_GVF_REFERENCE_SEQ)); if (!had_err && refstr) { if (gt_feature_node_get_strand(curnode) == GT_STRAND_REVERSE) { int rval = gt_complement(&origchar, origchar, err); gt_assert(rval == 0); } gt_assert(toupper(origchar) == toupper(refstr[0])); } #endif variantchars = variantptr = gt_cstr_dup( gt_feature_node_get_attribute(fn, GT_GVF_VARIANT_SEQ)); if (!had_err && variantchars) { GtUword i = 0; while (!had_err && (*variantchars != ';' && *variantchars != '\0')) { if (*variantchars != ',' && *variantchars != origchar) { char variantchar = *variantchars; #ifndef NDEBUG char refchar = refstr ? refstr[0] : '-'; /* XXX */ if (!had_err && mrna_strand == GT_STRAND_REVERSE) had_err = gt_complement(&refchar, refchar, err); #endif if (!had_err && mrna_strand == GT_STRAND_REVERSE) had_err = gt_complement(&variantchar, variantchar, err); if (!had_err) { had_err = snp_annotator_classify_snp(sav, curnode, fn, mrnasnppos, i++, variantchar, #ifndef NDEBUG refchar, #endif err); } } else if (*variantchars == origchar) { i++; } variantchars++; } gt_free(variantptr); gt_free(refptr); } } else { mrnasnppos += gt_range_length(&cds_rng); } } } gt_feature_node_iterator_delete(mrnafni); } } gt_feature_node_iterator_delete(fni); return had_err; }
static void gt_hpol_processor_output_stats(GtAlignedSegment *as, unsigned long r_hpos, unsigned long coverage, unsigned long r_hlen, unsigned long r_supp, unsigned long s_hlen, unsigned long a_hlen, unsigned long a_supp, char s_char, double s_q_ave, unsigned long c_len, GtFile *outfp) { unsigned long i, pos, s_hpos = 0, s_offset, s_q_bef, s_q_aft, s_q_value, s_q_min, s_q_max, s_q_range, s_q_first, s_q_last = 0, s_hend, s_mapq; char *s_qual, *q, edit, s_or; const char *s_id; gt_assert(r_hlen != s_hlen); edit = r_hlen > s_hlen ? 'I' : 'D'; gt_assert(coverage > 0); r_supp = r_supp * 100 / coverage; a_supp = a_supp * 100 / coverage; s_id = gt_aligned_segment_description(as); s_mapq = gt_aligned_segment_mapping_quality(as); q = gt_aligned_segment_qual(as); gt_assert(s_hlen > 0); s_hpos = gt_aligned_segment_orig_seqpos_for_refpos(as, r_hpos); s_offset = gt_aligned_segment_offset_for_refpos(as, r_hpos); s_qual = gt_malloc(sizeof (*s_qual) * (s_hlen + 1UL)); s_q_bef = GT_UNDEF_ULONG; for (i = s_offset; i > 0; /**/) { i--; if (q[i] != GT_UNDEF_CHAR) { s_q_bef = GT_HPOL_PROCESSOR_QUAL(q[i]); break; } } gt_assert(s_q_bef != GT_UNDEF_ULONG); s_q_min = ULONG_MAX; s_q_max = 0; s_q_first = GT_UNDEF_ULONG; if (!gt_aligned_segment_is_reverse(as)) { for (i = s_offset, pos = 0; pos < s_hlen; i++) { if (q[i] != GT_UNDEF_CHAR) { s_qual[pos] = q[i]; pos++; s_q_value = GT_HPOL_PROCESSOR_QUAL(q[i]); if (s_q_value < s_q_min) s_q_min = s_q_value; if (s_q_value > s_q_max) s_q_max = s_q_value; if (s_q_first == GT_UNDEF_ULONG) s_q_first = s_q_value; s_q_last = s_q_value; } } } else { for (i = s_offset, pos = s_hlen; pos > 0; i++) { if (q[i] != GT_UNDEF_CHAR) { s_qual[pos - 1UL] = q[i]; pos--; s_q_value = GT_HPOL_PROCESSOR_QUAL(q[i]); if (s_q_value < s_q_min) s_q_min = s_q_value; if (s_q_value > s_q_max) s_q_max = s_q_value; if (s_q_first == GT_UNDEF_ULONG) s_q_first = s_q_value; s_q_last = s_q_value; } } } s_qual[s_hlen] = '\0'; s_q_aft = GT_UNDEF_ULONG; for (/**/; i < gt_aligned_segment_length(as); i++) { if (q[i] != GT_UNDEF_CHAR) { s_q_aft = GT_HPOL_PROCESSOR_QUAL(q[i]); break; } } gt_assert(s_q_aft != GT_UNDEF_ULONG); gt_assert(s_q_min < ULONG_MAX); gt_assert(s_q_max >= s_q_min); s_q_range = s_q_max - s_q_min + 1UL; /* convert to 1-based coordinates */ r_hpos++; s_hpos++; /* handle reverse alignments */ if (gt_aligned_segment_is_reverse(as)) { /* complement char */ GtError *err = gt_error_new(); (void)gt_complement(&s_char, s_char, err); gt_error_delete(err); /* correct coords on s */ s_hend = s_hpos; s_hpos = s_hpos - s_hlen + 1UL; s_or = '-'; /* swap q values */ s_q_value = s_q_aft; s_q_aft = s_q_bef; s_q_bef = s_q_value; s_q_value = s_q_last; s_q_last = s_q_first; s_q_first = s_q_value; } else { s_hend = s_hpos + s_hlen - 1UL; s_or = '+'; } gt_file_xprintf(outfp, "%lu\t%c\t%lu\t%lu\t%c\t%c\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t" "%lu\t%lu\t%lu\t%.2f\t%lu\t%lu\t%lu\t%lu\t%s\t%s\n", r_hpos, edit, s_hpos, s_hend, s_char, s_or, c_len, coverage, r_hlen, r_supp, s_hlen, a_hlen, a_supp, s_mapq, s_q_bef, s_q_first, s_q_min, s_q_ave, s_q_max, s_q_range, s_q_last, s_q_aft, s_qual, s_id); gt_free(s_qual); }