示例#1
0
int gt_reverse_complement(char *dna_seq, unsigned long seqlen, GtError *err)
{
  char *front_char, *back_char, tmp_char;
  int had_err = 0;
  gt_error_check(err);
  gt_assert(dna_seq);
  for (front_char = dna_seq, back_char = dna_seq + seqlen - 1;
       front_char <= back_char;
       front_char++, back_char--) {
    had_err = gt_complement(&tmp_char, *front_char, err);
    if (!had_err)
      had_err = gt_complement(front_char, *back_char, err);
    if (!had_err)
      *back_char = tmp_char;
    if (had_err)
      break;
  }
  return had_err;
}
static int snp_annotator_visitor_feature_node(GtNodeVisitor *nv,
                                              GtFeatureNode *fn,
                                              GtError *err)
{
  int had_err = 0;
  GtSNPAnnotatorVisitor *sav;
  GtFeatureNodeIterator *fni,
                        *mrnafni;
  GtFeatureNode *curnode,
                *curnode2;
  GtRange snp_rng;
  gt_error_check(err);
  sav = snp_annotator_visitor_cast(nv);

  /* ignore non-nodes */
  if (!fn) return 0;

  /* only process SNPs */
  if (!(gt_feature_node_get_type(fn) == sav->SNV_type ||
        gt_feature_node_get_type(fn) == sav->SNP_type)) {
    return 0;
  }

  fni = gt_feature_node_iterator_new_direct(sav->gene);
  snp_rng = gt_genome_node_get_range((GtGenomeNode*) fn);
  while (!had_err && (curnode = gt_feature_node_iterator_next(fni))) {
    if (gt_feature_node_get_type(curnode) == sav->mRNA_type) {
      GtStrand mrna_strand = gt_feature_node_get_strand(curnode);
#ifndef NDEBUG
      const char *refstr;
#endif
      GtUword mrnasnppos = 0;
      mrnafni = gt_feature_node_iterator_new(curnode);
      while (!had_err && (curnode2 = gt_feature_node_iterator_next(mrnafni))) {
        if (gt_feature_node_get_type(curnode2) == sav->CDS_type) {
          GtRange cds_rng = gt_genome_node_get_range((GtGenomeNode*) curnode2);
          if (gt_range_overlap(&snp_rng, &cds_rng)) {
            char *mRNA,
                 origchar;
            char *variantchars, *variantptr = NULL;
            GT_UNUSED char *refchars, *refptr = NULL;
            mRNA = (char*) gt_hashmap_get(sav->rnaseqs, curnode);
            gt_assert(mRNA);
            gt_assert(snp_rng.start >= cds_rng.start);
            mrnasnppos += (snp_rng.start - cds_rng.start);
            if (mrna_strand == GT_STRAND_REVERSE)
              mrnasnppos = strlen(mRNA) - mrnasnppos - 1;
            gt_assert(mrnasnppos < strlen(mRNA));
            origchar = mRNA[mrnasnppos];
#ifndef NDEBUG
            refstr = refptr = gt_cstr_dup(gt_feature_node_get_attribute(fn,
                                                         GT_GVF_REFERENCE_SEQ));
            if (!had_err && refstr) {
              if (gt_feature_node_get_strand(curnode) == GT_STRAND_REVERSE) {
                int rval = gt_complement(&origchar, origchar, err);
                gt_assert(rval == 0);
              }
              gt_assert(toupper(origchar) == toupper(refstr[0]));
            }
#endif
            variantchars = variantptr = gt_cstr_dup(
                         gt_feature_node_get_attribute(fn, GT_GVF_VARIANT_SEQ));
            if (!had_err && variantchars) {
              GtUword i = 0;

              while (!had_err &&
                              (*variantchars != ';' && *variantchars != '\0')) {
                if (*variantchars != ',' && *variantchars != origchar) {
                  char variantchar = *variantchars;
#ifndef NDEBUG
                  char refchar = refstr ? refstr[0] : '-';  /* XXX */
                  if (!had_err && mrna_strand == GT_STRAND_REVERSE)
                    had_err = gt_complement(&refchar, refchar, err);
#endif
                  if (!had_err && mrna_strand == GT_STRAND_REVERSE)
                    had_err = gt_complement(&variantchar, variantchar, err);
                  if (!had_err) {
                    had_err = snp_annotator_classify_snp(sav, curnode, fn,
                                                         mrnasnppos,
                                                         i++,
                                                         variantchar,
#ifndef NDEBUG
                                                         refchar,
#endif
                                                         err);
                  }
                } else if (*variantchars == origchar) {
                  i++;
                }
                variantchars++;
              }
              gt_free(variantptr);
              gt_free(refptr);
            }
          } else {
            mrnasnppos += gt_range_length(&cds_rng);
          }
        }
      }
      gt_feature_node_iterator_delete(mrnafni);
    }
  }
  gt_feature_node_iterator_delete(fni);

  return had_err;
}
示例#3
0
static void gt_hpol_processor_output_stats(GtAlignedSegment *as,
    unsigned long r_hpos, unsigned long coverage, unsigned long r_hlen,
    unsigned long r_supp, unsigned long s_hlen, unsigned long a_hlen,
    unsigned long a_supp, char s_char, double s_q_ave,
    unsigned long c_len, GtFile *outfp)
{
  unsigned long i, pos, s_hpos = 0, s_offset, s_q_bef, s_q_aft, s_q_value,
                s_q_min, s_q_max, s_q_range, s_q_first, s_q_last = 0, s_hend,
                s_mapq;
  char *s_qual, *q, edit, s_or;
  const char *s_id;
  gt_assert(r_hlen != s_hlen);
  edit = r_hlen > s_hlen ? 'I' : 'D';
  gt_assert(coverage > 0);
  r_supp = r_supp * 100 / coverage;
  a_supp = a_supp * 100 / coverage;
  s_id = gt_aligned_segment_description(as);
  s_mapq = gt_aligned_segment_mapping_quality(as);
  q = gt_aligned_segment_qual(as);
  gt_assert(s_hlen > 0);
  s_hpos = gt_aligned_segment_orig_seqpos_for_refpos(as, r_hpos);
  s_offset = gt_aligned_segment_offset_for_refpos(as, r_hpos);
  s_qual = gt_malloc(sizeof (*s_qual) * (s_hlen + 1UL));
  s_q_bef = GT_UNDEF_ULONG;
  for (i = s_offset; i > 0; /**/)
  {
    i--;
    if (q[i] != GT_UNDEF_CHAR)
    {
      s_q_bef = GT_HPOL_PROCESSOR_QUAL(q[i]);
      break;
    }
  }
  gt_assert(s_q_bef != GT_UNDEF_ULONG);
  s_q_min = ULONG_MAX;
  s_q_max = 0;
  s_q_first = GT_UNDEF_ULONG;
  if (!gt_aligned_segment_is_reverse(as))
  {
    for (i = s_offset, pos = 0; pos < s_hlen; i++)
    {
      if (q[i] != GT_UNDEF_CHAR)
      {
        s_qual[pos] = q[i];
        pos++;
        s_q_value = GT_HPOL_PROCESSOR_QUAL(q[i]);
        if (s_q_value < s_q_min)
          s_q_min = s_q_value;
        if (s_q_value > s_q_max)
          s_q_max = s_q_value;
        if (s_q_first == GT_UNDEF_ULONG)
          s_q_first = s_q_value;
        s_q_last = s_q_value;
      }
    }
  }
  else
  {
    for (i = s_offset, pos = s_hlen; pos > 0; i++)
    {
      if (q[i] != GT_UNDEF_CHAR)
      {
        s_qual[pos - 1UL] = q[i];
        pos--;
        s_q_value = GT_HPOL_PROCESSOR_QUAL(q[i]);
        if (s_q_value < s_q_min)
          s_q_min = s_q_value;
        if (s_q_value > s_q_max)
          s_q_max = s_q_value;
        if (s_q_first == GT_UNDEF_ULONG)
          s_q_first = s_q_value;
        s_q_last = s_q_value;
      }
    }
  }
  s_qual[s_hlen] = '\0';
  s_q_aft = GT_UNDEF_ULONG;
  for (/**/; i < gt_aligned_segment_length(as); i++)
  {
    if (q[i] != GT_UNDEF_CHAR)
    {
      s_q_aft = GT_HPOL_PROCESSOR_QUAL(q[i]);
      break;
    }
  }
  gt_assert(s_q_aft != GT_UNDEF_ULONG);
  gt_assert(s_q_min < ULONG_MAX);
  gt_assert(s_q_max >= s_q_min);
  s_q_range = s_q_max - s_q_min + 1UL;
  /* convert to 1-based coordinates */
  r_hpos++;
  s_hpos++;
  /* handle reverse alignments */
  if (gt_aligned_segment_is_reverse(as))
  {
    /* complement char */
    GtError *err = gt_error_new();
    (void)gt_complement(&s_char, s_char, err);
    gt_error_delete(err);
    /* correct coords on s */
    s_hend = s_hpos;
    s_hpos = s_hpos - s_hlen + 1UL;
    s_or = '-';
    /* swap q values */
    s_q_value = s_q_aft;
    s_q_aft = s_q_bef;
    s_q_bef = s_q_value;
    s_q_value = s_q_last;
    s_q_last = s_q_first;
    s_q_first = s_q_value;
  }
  else
  {
    s_hend = s_hpos + s_hlen - 1UL;
    s_or = '+';
  }
  gt_file_xprintf(outfp,
      "%lu\t%c\t%lu\t%lu\t%c\t%c\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t"
      "%lu\t%lu\t%lu\t%.2f\t%lu\t%lu\t%lu\t%lu\t%s\t%s\n",
      r_hpos, edit, s_hpos, s_hend, s_char, s_or, c_len, coverage, r_hlen,
      r_supp, s_hlen, a_hlen, a_supp, s_mapq, s_q_bef, s_q_first, s_q_min,
      s_q_ave, s_q_max, s_q_range, s_q_last, s_q_aft, s_qual, s_id);
  gt_free(s_qual);
}