コード例 #1
0
static void process_orf(GtRange orf_rng, unsigned int orf_frame,
                        GtStrand strand, GtFeatureNode *gf,
                        unsigned long offset, unsigned int min,
                        unsigned int max, GT_UNUSED GtError *err)
{

  gt_assert(gf);
  unsigned long tmp;

  if ((gt_range_length(&orf_rng) >= min) &&
      (gt_range_length(&orf_rng) <= max)) {
    switch (strand) {
      case GT_STRAND_FORWARD:
        orf_rng.start = orf_rng.start + offset;
        orf_rng.end = orf_rng.end + offset;
        break;
      case GT_STRAND_REVERSE:
        tmp = orf_rng.start;
        orf_rng.start = offset - orf_rng.end;
        orf_rng.end = offset - tmp;
        break;
      default:
        exit(GT_EXIT_PROGRAMMING_ERROR);
        break;
    }
    orf_attach_results_to_gff3(gf, orf_rng, orf_frame, strand, err);
  }
}
コード例 #2
0
ファイル: range.c プロジェクト: AlexWoroschilow/uni_hamburg
int gt_range_compare_by_length_ptr(const GtRange *range_a,
                                   const GtRange *range_b)
{
  GtUword range_a_length, range_b_length;
  gt_assert(range_a && range_b);
  range_a_length = gt_range_length(range_a);
  range_b_length = gt_range_length(range_b);
  if (range_a_length == range_b_length)
    return 0;
  if (range_a_length > range_b_length)
    return -1;
  return 1;
}
コード例 #3
0
ファイル: seq_con.c プロジェクト: oeigenbrod/genometools
unsigned long gth_seq_con_get_length(GthSeqCon *seq_con, unsigned long seq_num)
{
  GtRange range;
  gt_assert(seq_con);
  range = gth_seq_con_get_range(seq_con, seq_num);
  return gt_range_length(&range);
}
コード例 #4
0
ファイル: range.c プロジェクト: AlexWoroschilow/uni_hamburg
GtUword gt_ranges_spanned_length(const GtArray *ranges)
{
  GtRange spanned_range;
  gt_assert(ranges);
  spanned_range.start = ((GtRange*) gt_array_get_first(ranges))->start;
  spanned_range.end   = ((GtRange*) gt_array_get_last(ranges))->end;
  return gt_range_length(&spanned_range);
}
コード例 #5
0
ファイル: diagram.c プロジェクト: AlexWoroschilow/uni_hamburg
static int get_caption_display_status(GtDiagram *d, const char *gft,
                                      bool *result, GtError *err)
{
  bool *status;
  gt_assert(d && gft);
  status = (bool*) gt_hashmap_get(d->caption_display_status, gft);
  if (!status)
  {
    GtUword threshold = GT_UNDEF_UWORD;
    double tmp = GT_UNDEF_DOUBLE;
    status = gt_malloc(sizeof (bool));
    *status = true;
    if (gt_style_get_bool(d->style, "format", "show_block_captions", status,
                          NULL, err) == GT_STYLE_QUERY_ERROR) {
      gt_free(status);
      return -1;
    }
    if (*status)
    {
      GtStyleQueryStatus rval;
      rval = gt_style_get_num(d->style,
                              gft, "max_capt_show_width",
                              &tmp, NULL, err);
      switch (rval) {
        case GT_STYLE_QUERY_ERROR:
          gt_free(status);
          return -1;
          break; /* should never reach this */
        case GT_STYLE_QUERY_NOT_SET:
          *status = true;
          break;
        default:
          gt_assert(tmp != GT_UNDEF_DOUBLE);
          threshold = tmp;
          gt_assert(tmp != GT_UNDEF_UWORD);
          *status = (gt_range_length(&d->range) <= threshold);
          break;
      }
        *status = (gt_range_length(&d->range) <= threshold);
    }
    gt_hashmap_add(d->caption_display_status, (void*) gft, status);
  }
  *result = *status;
  return 0;
}
コード例 #6
0
ファイル: stat_visitor.c プロジェクト: 9beckert/TIR
static void compute_type_statistics(GtFeatureNode *fn, GtStatVisitor *sv)
{
    GtRange range;
    gt_assert(fn && sv);
    if (gt_feature_node_has_type(fn, gt_ft_gene)) {
        sv->number_of_genes++;
        if (gt_feature_node_has_CDS(fn))
            sv->number_of_protein_coding_genes++;
        if (sv->gene_length_distribution) {
            range = gt_genome_node_get_range((GtGenomeNode*) fn);
            gt_disc_distri_add(sv->gene_length_distribution, gt_range_length(&range));
        }
        if (sv->gene_score_distribution) {
            gt_disc_distri_add(sv->gene_score_distribution,
                               gt_feature_node_get_score(fn) * 100.0);
        }
    }
    else if (gt_feature_node_has_type(fn, gt_ft_mRNA)) {
        sv->number_of_mRNAs++;
        if (gt_feature_node_has_CDS(fn))
            sv->number_of_protein_coding_mRNAs++;
    }
    else if (gt_feature_node_has_type(fn, gt_ft_exon)) {
        sv->number_of_exons++;
        if (sv->exon_length_distribution) {
            range = gt_genome_node_get_range((GtGenomeNode*) fn);
            gt_disc_distri_add(sv->exon_length_distribution,
                               gt_range_length(&range));
        }
    }
    else if (gt_feature_node_has_type(fn, gt_ft_CDS)) {
        sv->number_of_CDSs++;
    }
    else if (gt_feature_node_has_type(fn, gt_ft_intron)) {
        if (sv->intron_length_distribution) {
            range = gt_genome_node_get_range((GtGenomeNode*) fn);
            gt_disc_distri_add(sv->intron_length_distribution,
                               gt_range_length(&range));
        }
    }
    else if (gt_feature_node_has_type(fn, gt_ft_LTR_retrotransposon)) {
        sv->number_of_LTR_retrotransposons++;
    }
}
コード例 #7
0
ファイル: stat_visitor.c プロジェクト: 9beckert/TIR
static int stat_visitor_region_node(GtNodeVisitor *nv, GtRegionNode *rn,
                                    GT_UNUSED GtError *err)
{
    GtStatVisitor *sv;
    GtRange range;
    gt_error_check(err);
    sv = stat_visitor_cast(nv);
    sv->number_of_sequence_regions++;
    range = gt_genome_node_get_range((GtGenomeNode*) rn);
    sv->total_length_of_sequence_regions += gt_range_length(&range);
    return 0;
}
コード例 #8
0
GT_UNUSED static int gt_cluster_matches_gap(GtArray *matches,
                                            GtClusteredSet *cs,
                                            unsigned long max_gap_size,
                                            GtError *err)
{
  GtMatchReference *mref;
  GtMatchEdgeTable matchedgetab;
  GtMatchEdge matchedge;
  GtMatch *match;
  GtRange range;
  unsigned long i,
                j,
                gap_size = 0,
                length = 0;
  unsigned long num_of_matches;
  int had_err = 0;

  num_of_matches = gt_array_size(matches);
  if (gt_clustered_set_num_of_elements(cs, err) != num_of_matches) {
    had_err = -1;
    gt_error_set(err,
                 "number of matches (%lu) unequals number of elements in"
                 "clustered set (%lu)",
                 num_of_matches, gt_clustered_set_num_of_elements(cs, err));
  }
  if (!had_err) {
    matchedgetab.edges = gt_array_new(sizeof (GtMatchEdge));
    matchedgetab.num_of_edges = 0;

    mref = gt_mirror_and_sort_matches(matches);

    for (i = 0; i < (2 * (num_of_matches) - 1); i++) {
      for (j = i + 1; j < (2 * num_of_matches); j++) {
        match = *(GtMatch**) gt_array_get(matches, mref[i].matchnum);
        gt_match_get_range_seq1(match, &range);
        length = gt_range_length(&range);
        gap_size = mref[j].startpos - mref[i].startpos + length;

        if (gap_size > max_gap_size)
          break;
        if (mref[i].matchnum != mref[j].matchnum) {
          STORECLUSTEREDGEG(mref[i].matchnum, mref[j].matchnum, gap_size);
        }
      }
    }
    if (gt_cluster_matches(cs, &matchedgetab, err) != 0)
      had_err = -1;
    gt_array_delete(matchedgetab.edges);
    gt_free(mref);
  }
  return had_err;
}
コード例 #9
0
ファイル: stat_visitor.c プロジェクト: 9beckert/TIR
static int add_exon_or_cds_number(GtFeatureNode *fn, void *data,
                                  GT_UNUSED GtError *err)
{
    GtStatVisitor *sv = (GtStatVisitor*) data;
    gt_error_check(err);
    gt_assert(sv && fn);
    if (gt_feature_node_has_type(fn, gt_ft_exon))
        sv->exon_number_for_distri++;
    else if (gt_feature_node_has_type(fn, gt_ft_CDS)) {
        GtRange range = gt_genome_node_get_range((GtGenomeNode*) fn);
        sv->cds_length_for_distri += gt_range_length(&range);
    }
    return 0;
}
コード例 #10
0
int gt_feature_index_get_features_for_range(GtFeatureIndex *feature_index,
                                            GtArray *results,
                                            const char *seqid,
                                            const GtRange *range, GtError *err)
{
  int ret;
  gt_assert(feature_index && feature_index->c_class && results && seqid &&
            range);
  gt_assert(gt_range_length(range) > 0);
  gt_rwlock_rdlock(feature_index->pvt->lock);
  ret = feature_index->c_class->get_features_for_range(feature_index, results,
                                                       seqid, range, err);
  gt_rwlock_unlock(feature_index->pvt->lock);
  return ret;
}
コード例 #11
0
static double gaeval_visitor_coverage_resolve(GtFeatureNode *genemodel,
                                              GtArray *exon_coverage)
{
  agn_assert(genemodel && exon_coverage);
  agn_assert(gt_feature_node_has_type(genemodel, "mRNA"));

  GtUword cum_exon_length =
      agn_typecheck_feature_combined_length(genemodel, agn_typecheck_exon);

  GtUword i, covered = 0;
  for(i = 0; i < gt_array_size(exon_coverage); i++)
  {
    GtRange *range = gt_array_get(exon_coverage, i);
    covered += gt_range_length(range);
  }
  agn_assert(covered <= cum_exon_length);
  return (double)covered / (double)cum_exon_length;
}
コード例 #12
0
GtRange gt_pbs_hit_get_coords(const GtPBSHit *h)
{
  GtRange rng;
  gt_assert(h && h->end >= h->start);
  rng.start = h->start;
  rng.end = h->end;
  switch (h->strand)
  {
    case GT_STRAND_FORWARD:
    default:
      rng.start = h->res->elem->leftLTR_3 + 1 - h->res->opts->radius
                    + rng.start;
      rng.end = rng.start + (h->end - h->start);
      break;
    case GT_STRAND_REVERSE:
      rng.end = h->res->elem->rightLTR_5 - 1 + h->res->opts->radius - rng.start;
      rng.start = rng.end - (h->end - h->start);
      break;
  }
  gt_assert(gt_range_length(&rng) == (h->end - h->start + 1));
  return rng;
}
コード例 #13
0
static int extract_join_feature(GtGenomeNode *gn, const char *type,
                                GtRegionMapping *region_mapping,
                                GtStr *sequence, bool *reverse_strand,
                                bool *first_child_of_type_seen, GtPhase *phase,
                                GtError *err)
{
  char *outsequence;
  GtFeatureNode *fn;
  GtRange range;
  int had_err = 0;

  gt_error_check(err);
  fn = gt_feature_node_cast(gn);
  gt_assert(fn);

  if (gt_feature_node_has_type(fn, type)) {
    if (gt_feature_node_get_strand(fn) == GT_STRAND_REVERSE) {
      *reverse_strand = true;
      *phase = gt_feature_node_get_phase(fn);
    } else {
      if (!(*first_child_of_type_seen)) {
        *first_child_of_type_seen = true;
        *phase = gt_feature_node_get_phase(fn);
      } else *phase = GT_PHASE_UNDEFINED;
    }
    range = gt_genome_node_get_range(gn);
    had_err = gt_region_mapping_get_sequence(region_mapping, &outsequence,
                                             gt_genome_node_get_seqid(gn),
                                             range.start, range.end, err);
    if (!had_err) {
      gt_str_append_cstr_nt(sequence, outsequence, gt_range_length(&range));
      gt_free(outsequence);
    }
  }
  return had_err;
}
コード例 #14
0
ファイル: coords.c プロジェクト: AlexWoroschilow/uni_hamburg
double gt_coords_convert_point(GtRange viewrange, GtWord pos)
{
  return ((double) (((GtWord) pos -(GtWord) viewrange.start)))
                  / ((double) gt_range_length(&viewrange));
}
コード例 #15
0
static int gt_extract_feature_sequence_generic(GtStr *sequence,
                                GtGenomeNode *gn,
                                const char *type, bool join, GtStr *seqid,
                                GtStrArray *target_ids,
                                unsigned int *out_phase_offset,
                                GtRegionMapping *region_mapping, GtError *err)
{
  GtFeatureNode *fn;
  GtRange range;
  unsigned int phase_offset = 0;
  char *outsequence;
  const char *target;
  int had_err = 0;

  gt_error_check(err);
  fn = gt_genome_node_cast(gt_feature_node_class(), gn);
  gt_assert(fn);

  if (seqid)
    gt_str_append_str(seqid, gt_genome_node_get_seqid(gn));
  if (target_ids &&
      (target = gt_feature_node_get_attribute(fn, GT_GFF_TARGET))) {
    had_err = gt_gff3_parser_parse_all_target_attributes(target, false,
                                                         target_ids, NULL,
                                                         NULL, "", 0, err);
  }
  if (!had_err) {
    if (join) {
      GtFeatureNodeIterator *fni;
      GtFeatureNode *child;
      bool reverse_strand = false,
           first_child = true,
           first_child_of_type_seen = false;
      GtPhase phase = GT_PHASE_UNDEFINED;
      /* in this case we have to traverse the children */
      fni = gt_feature_node_iterator_new_direct(gt_feature_node_cast(gn));
      while (!had_err && (child = gt_feature_node_iterator_next(fni))) {
        if (first_child) {
          if (target_ids &&
               (target = gt_feature_node_get_attribute(child, GT_GFF_TARGET))) {
            gt_str_array_reset(target_ids);
            had_err = gt_gff3_parser_parse_all_target_attributes(target, false,
                                                                 target_ids,
                                                                 NULL,
                                                                 NULL, "", 0,
                                                                 err);
          }
          first_child = false;
        }
        if (!had_err) {
          if (extract_join_feature((GtGenomeNode*) child, type, region_mapping,
                                   sequence, &reverse_strand,
                                   &first_child_of_type_seen,
                                   &phase, err)) {
            had_err = -1;
          }
          if (phase != GT_PHASE_UNDEFINED) {
            phase_offset = (int) phase;
          }
        }
      }
      gt_feature_node_iterator_delete(fni);
      gt_assert(phase_offset <= (unsigned int) GT_PHASE_UNDEFINED);
      if (!had_err && gt_str_length(sequence)) {
        if (reverse_strand) {
          had_err = gt_reverse_complement(gt_str_get(sequence),
                                          gt_str_length(sequence), err);
        }
      }
    }
    else if (gt_feature_node_get_type(fn) == type) {
      GtPhase phase = gt_feature_node_get_phase(fn);
      gt_assert(!had_err);
      if (phase != GT_PHASE_UNDEFINED)
        phase_offset = (unsigned int) phase;
      /* otherwise we only have to look at this feature */
      range = gt_genome_node_get_range(gn);
      gt_assert(range.start); /* 1-based coordinates */
      had_err = gt_region_mapping_get_sequence(region_mapping, &outsequence,
                                               gt_genome_node_get_seqid(gn),
                                               range.start, range.end, err);
      if (!had_err) {
        gt_str_append_cstr_nt(sequence, outsequence, gt_range_length(&range));
        gt_free(outsequence);
        if (gt_feature_node_get_strand(fn) == GT_STRAND_REVERSE) {
          had_err = gt_reverse_complement(gt_str_get(sequence),
                                          gt_str_length(sequence), err);
        }
      }
    }
  }
  if (out_phase_offset && phase_offset != GT_PHASE_UNDEFINED) {
    *out_phase_offset = phase_offset;
  }
  return had_err;
}
コード例 #16
0
ファイル: select_visitor.c プロジェクト: 9beckert/TIR
static int select_visitor_feature_node(GtNodeVisitor *nv,
                                       GtFeatureNode *fn,
                                       GT_UNUSED GtError *err)
{
  GtSelectVisitor *fv;
  bool filter_node = false;
  gt_error_check(err);
  fv = select_visitor_cast(nv);
  fv->current_feature++;
  if ((!gt_str_length(fv->seqid) || /* no seqid was specified or seqids are
                                       equal */
       !gt_str_cmp(fv->seqid, gt_genome_node_get_seqid((GtGenomeNode*) fn))) &&
      (!gt_str_length(fv->source) || /* no source was specified or sources are
                                        equal */
       !strcmp(gt_str_get(fv->source), gt_feature_node_get_source(fn)))) {
    GtRange range = gt_genome_node_get_range((GtGenomeNode*) fn);
    /* enforce maximum gene length */
    /* XXX: we (spuriously) assume that genes are always root nodes */
    if (fn && gt_feature_node_has_type(fn, gt_ft_gene)) {
      if (fv->max_gene_length != GT_UNDEF_ULONG &&
          gt_range_length(&range) > fv->max_gene_length) {
        filter_node = true;
      }
      else if (fv->max_gene_num != GT_UNDEF_ULONG &&
               fv->gene_num >= fv->max_gene_num) {
        filter_node = true;
      }
      else if (fv->min_gene_score != GT_UNDEF_DOUBLE &&
               gt_feature_node_get_score(fn) < fv->min_gene_score) {
        filter_node = true;
      }
      else if (fv->max_gene_score != GT_UNDEF_DOUBLE &&
               gt_feature_node_get_score(fn) > fv->max_gene_score) {
        filter_node = true;
      }
      else if (fv->feature_num != GT_UNDEF_ULONG &&
               fv->feature_num != fv->current_feature) {
        filter_node = true;
      }
      if (!filter_node)
        fv->gene_num++; /* gene passed filter */
    }
  }
  else
    filter_node = true;

  if (!filter_node)
    filter_node = filter_contain_range(fn, fv->contain_range);

  if (!filter_node)
    filter_node = filter_overlap_range(fn, fv->overlap_range);

  if (!filter_node)
    filter_node = filter_strand(fn, fv->strand);

  if (!filter_node)
    filter_node = filter_targetstrand(fn, fv->targetstrand);

  if (!filter_node)
    filter_node = filter_has_CDS(fn, fv->has_CDS);

  if (!filter_node)
    filter_node = filter_min_average_ssp(fn, fv->min_average_splice_site_prob);

  if (filter_node)
    gt_genome_node_delete((GtGenomeNode*) fn);
  else
    gt_queue_add(fv->node_buffer, fn);

  return 0;
}
コード例 #17
0
static int gt_snp_annotator_visitor_prepare_gene(GtSNPAnnotatorVisitor *sav,
                                                 GtError *err)
{
  GtFeatureNodeIterator *fni,
                        *mrnafni;
  GtFeatureNode *curnode,
                *last_mRNA = NULL;
  GtStr *mrnaseq,
        *seqid;
  int had_err = 0;

  mrnaseq = gt_str_new();
  seqid = gt_genome_node_get_seqid((GtGenomeNode*) sav->gene);
  fni = gt_feature_node_iterator_new(sav->gene);
  while (!had_err && (curnode = gt_feature_node_iterator_next(fni))) {
    if (gt_feature_node_get_type(curnode) == sav->mRNA_type) {
      GtFeatureNode *curnode2;
      if (last_mRNA) {
        char *mrna_charseq = gt_calloc(gt_str_length(mrnaseq)+1, sizeof (char));
        (void) strncpy(mrna_charseq, gt_str_get(mrnaseq),
                       gt_str_length(mrnaseq));
        if (gt_feature_node_get_strand(sav->gene) == GT_STRAND_REVERSE) {
          had_err = gt_reverse_complement(mrna_charseq, gt_str_length(mrnaseq),
                                          err);
        }
        if (!had_err) {
          gt_hashmap_add(sav->rnaseqs, last_mRNA, mrna_charseq);
          last_mRNA = curnode;
          gt_str_reset(mrnaseq);
        }
      } else last_mRNA = curnode;
      if (!had_err) {
        mrnafni = gt_feature_node_iterator_new(curnode);
        while (!had_err && (curnode2 =
                                      gt_feature_node_iterator_next(mrnafni))) {
          if (gt_feature_node_get_type(curnode2) == sav->CDS_type) {
            char *tmp;
            GtRange rng = gt_genome_node_get_range((GtGenomeNode*) curnode2);
            had_err = gt_region_mapping_get_sequence(sav->rmap, &tmp, seqid,
                                                     rng.start, rng.end, err);
            if (!had_err) {
              gt_str_append_cstr_nt(mrnaseq, tmp, gt_range_length(&rng));
              gt_free(tmp);
            }
          }
        }
        gt_feature_node_iterator_delete(mrnafni);
      }
    }
  }
  if (!had_err && last_mRNA) {
    char *mrna_charseq = gt_calloc(gt_str_length(mrnaseq)+1, sizeof (char));
    (void) strncpy(mrna_charseq, gt_str_get(mrnaseq), gt_str_length(mrnaseq));
    if (gt_feature_node_get_strand(sav->gene) == GT_STRAND_REVERSE) {
      had_err = gt_reverse_complement(mrna_charseq, gt_str_length(mrnaseq),
                                      err);
    }
    if (!had_err) {
      gt_hashmap_add(sav->rnaseqs, last_mRNA, mrna_charseq);
    }
  }
  gt_feature_node_iterator_delete(fni);
  gt_str_delete(mrnaseq);
  return had_err;
}
コード例 #18
0
static int snp_annotator_visitor_feature_node(GtNodeVisitor *nv,
                                              GtFeatureNode *fn,
                                              GtError *err)
{
  int had_err = 0;
  GtSNPAnnotatorVisitor *sav;
  GtFeatureNodeIterator *fni,
                        *mrnafni;
  GtFeatureNode *curnode,
                *curnode2;
  GtRange snp_rng;
  gt_error_check(err);
  sav = snp_annotator_visitor_cast(nv);

  /* ignore non-nodes */
  if (!fn) return 0;

  /* only process SNPs */
  if (!(gt_feature_node_get_type(fn) == sav->SNV_type ||
        gt_feature_node_get_type(fn) == sav->SNP_type)) {
    return 0;
  }

  fni = gt_feature_node_iterator_new_direct(sav->gene);
  snp_rng = gt_genome_node_get_range((GtGenomeNode*) fn);
  while (!had_err && (curnode = gt_feature_node_iterator_next(fni))) {
    if (gt_feature_node_get_type(curnode) == sav->mRNA_type) {
      GtStrand mrna_strand = gt_feature_node_get_strand(curnode);
#ifndef NDEBUG
      const char *refstr;
#endif
      GtUword mrnasnppos = 0;
      mrnafni = gt_feature_node_iterator_new(curnode);
      while (!had_err && (curnode2 = gt_feature_node_iterator_next(mrnafni))) {
        if (gt_feature_node_get_type(curnode2) == sav->CDS_type) {
          GtRange cds_rng = gt_genome_node_get_range((GtGenomeNode*) curnode2);
          if (gt_range_overlap(&snp_rng, &cds_rng)) {
            char *mRNA,
                 origchar;
            char *variantchars, *variantptr = NULL;
            GT_UNUSED char *refchars, *refptr = NULL;
            mRNA = (char*) gt_hashmap_get(sav->rnaseqs, curnode);
            gt_assert(mRNA);
            gt_assert(snp_rng.start >= cds_rng.start);
            mrnasnppos += (snp_rng.start - cds_rng.start);
            if (mrna_strand == GT_STRAND_REVERSE)
              mrnasnppos = strlen(mRNA) - mrnasnppos - 1;
            gt_assert(mrnasnppos < strlen(mRNA));
            origchar = mRNA[mrnasnppos];
#ifndef NDEBUG
            refstr = refptr = gt_cstr_dup(gt_feature_node_get_attribute(fn,
                                                         GT_GVF_REFERENCE_SEQ));
            if (!had_err && refstr) {
              if (gt_feature_node_get_strand(curnode) == GT_STRAND_REVERSE) {
                int rval = gt_complement(&origchar, origchar, err);
                gt_assert(rval == 0);
              }
              gt_assert(toupper(origchar) == toupper(refstr[0]));
            }
#endif
            variantchars = variantptr = gt_cstr_dup(
                         gt_feature_node_get_attribute(fn, GT_GVF_VARIANT_SEQ));
            if (!had_err && variantchars) {
              GtUword i = 0;

              while (!had_err &&
                              (*variantchars != ';' && *variantchars != '\0')) {
                if (*variantchars != ',' && *variantchars != origchar) {
                  char variantchar = *variantchars;
#ifndef NDEBUG
                  char refchar = refstr ? refstr[0] : '-';  /* XXX */
                  if (!had_err && mrna_strand == GT_STRAND_REVERSE)
                    had_err = gt_complement(&refchar, refchar, err);
#endif
                  if (!had_err && mrna_strand == GT_STRAND_REVERSE)
                    had_err = gt_complement(&variantchar, variantchar, err);
                  if (!had_err) {
                    had_err = snp_annotator_classify_snp(sav, curnode, fn,
                                                         mrnasnppos,
                                                         i++,
                                                         variantchar,
#ifndef NDEBUG
                                                         refchar,
#endif
                                                         err);
                  }
                } else if (*variantchars == origchar) {
                  i++;
                }
                variantchars++;
              }
              gt_free(variantptr);
              gt_free(refptr);
            }
          } else {
            mrnasnppos += gt_range_length(&cds_rng);
          }
        }
      }
      gt_feature_node_iterator_delete(mrnafni);
    }
  }
  gt_feature_node_iterator_delete(fni);

  return had_err;
}
コード例 #19
0
int gt_ltrfileout_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *err)
{
  GtLTRdigestFileOutStream *ls;
  GtFeatureNode *fn;
  GtRange lltr_rng = {GT_UNDEF_UWORD, GT_UNDEF_UWORD},
          rltr_rng = {GT_UNDEF_UWORD, GT_UNDEF_UWORD},
          ppt_rng = {GT_UNDEF_UWORD, GT_UNDEF_UWORD},
          pbs_rng = {GT_UNDEF_UWORD, GT_UNDEF_UWORD};
  int had_err;
  GtUword i=0;

  gt_error_check(err);
  ls = gt_ltrdigest_file_out_stream_cast(ns);

  /* initialize this element */
  memset(&ls->element, 0, sizeof (GtLTRElement));

  /* get annotations from parser */
  had_err = gt_node_stream_next(ls->in_stream, gn, err);
  if (!had_err && *gn)
  {
    GtFeatureNodeIterator* gni;
    GtFeatureNode *mygn;

    /* only process feature nodes */
    if (!(fn = gt_feature_node_try_cast(*gn)))
      return 0;

    ls->element.pdomorder = gt_array_new(sizeof (const char*));

    /* fill LTRElement structure from GFF3 subgraph */
    gni = gt_feature_node_iterator_new(fn);
    for (mygn = fn; mygn != NULL; mygn = gt_feature_node_iterator_next(gni))
      (void) gt_genome_node_accept((GtGenomeNode*) mygn,
                                   (GtNodeVisitor*) ls->lv,
                                   err);
    gt_feature_node_iterator_delete(gni);
  }

  if (!had_err && ls->element.mainnode != NULL)
  {
    char desc[GT_MAXFASTAHEADER];
    GtFeatureNode *ltr3, *ltr5;
    GtStr *sdesc, *sreg, *seq;

    /* find sequence in GtEncseq */
    sreg = gt_genome_node_get_seqid((GtGenomeNode*) ls->element.mainnode);

    sdesc = gt_str_new();
    had_err = gt_region_mapping_get_description(ls->rmap, sdesc, sreg, err);

    if (!had_err) {
      GtRange rng;
      ls->element.seqid = gt_calloc((size_t) ls->seqnamelen+1, sizeof (char));
      (void) snprintf(ls->element.seqid,
                      MIN((size_t) gt_str_length(sdesc),
                          (size_t) ls->seqnamelen)+1,
                      "%s", gt_str_get(sdesc));
      gt_cstr_rep(ls->element.seqid, ' ', '_');
      if (gt_str_length(sdesc) > (GtUword) ls->seqnamelen)
        ls->element.seqid[ls->seqnamelen] = '\0';

      (void) gt_ltrelement_format_description(&ls->element,
                                              ls->seqnamelen,
                                              desc,
                                              (size_t) (GT_MAXFASTAHEADER-1));
      gt_str_delete(sdesc);

      /* output basic retrotransposon data */
      lltr_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.leftLTR);
      rltr_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.rightLTR);
      rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.mainnode);
      gt_file_xprintf(ls->tabout_file,
                      GT_WU"\t"GT_WU"\t"GT_WU"\t%s\t"GT_WU"\t"GT_WU"\t"GT_WU"\t"
                      GT_WU"\t"GT_WU"\t"GT_WU"\t",
                      rng.start, rng.end, gt_ltrelement_length(&ls->element),
                      ls->element.seqid, lltr_rng.start, lltr_rng.end,
                      gt_ltrelement_leftltrlen(&ls->element), rltr_rng.start,
                      rltr_rng.end, gt_ltrelement_rightltrlen(&ls->element));
    }
    seq = gt_str_new();

    /* output TSDs */
    if (!had_err && ls->element.leftTSD != NULL)
    {
      GtRange tsd_rng;
      tsd_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.leftTSD);
      had_err = gt_extract_feature_sequence(seq,
                                       (GtGenomeNode*) ls->element.leftTSD,
                                       gt_symbol(gt_ft_target_site_duplication),
                                       false,
                                       NULL, NULL, ls->rmap, err);
      if (!had_err) {
        gt_file_xprintf(ls->tabout_file,
                         ""GT_WU"\t"GT_WU"\t%s\t",
                         tsd_rng.start,
                         tsd_rng.end,
                         gt_str_get(seq));
      }
    gt_str_reset(seq);
    } else gt_file_xprintf(ls->tabout_file, "\t\t\t");

    if (!had_err && ls->element.rightTSD != NULL)
    {
      GtRange tsd_rng;

      tsd_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.rightTSD);
      had_err = gt_extract_feature_sequence(seq,
                                       (GtGenomeNode*) ls->element.rightTSD,
                                       gt_symbol(gt_ft_target_site_duplication),
                                       false,
                                       NULL, NULL, ls->rmap, err);
      if (!had_err) {
        gt_file_xprintf(ls->tabout_file,
                           ""GT_WU"\t"GT_WU"\t%s\t",
                           tsd_rng.start,
                           tsd_rng.end,
                           gt_str_get(seq));
      }
      gt_str_reset(seq);
    } else gt_file_xprintf(ls->tabout_file, "\t\t\t");

    /* output PPT */
    if (!had_err && ls->element.ppt != NULL)
    {
      GtStrand ppt_strand = gt_feature_node_get_strand(ls->element.ppt);

      ppt_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.ppt);
      had_err = gt_extract_feature_sequence(seq,
                                            (GtGenomeNode*) ls->element.ppt,
                                            gt_symbol(gt_ft_RR_tract), false,
                                            NULL, NULL, ls->rmap, err);
      if (!had_err) {
        gt_fasta_show_entry(desc, gt_str_get(seq), gt_range_length(&ppt_rng),
                            GT_FSWIDTH, ls->pptout_file);
        gt_file_xprintf(ls->tabout_file,
                           ""GT_WU"\t"GT_WU"\t%s\t%c\t%d\t",
                           ppt_rng.start,
                           ppt_rng.end,
                           gt_str_get(seq),
                           GT_STRAND_CHARS[ppt_strand],
                           (ppt_strand == GT_STRAND_FORWARD ?
                               abs((int) (rltr_rng.start - ppt_rng.end)) :
                               abs((int) (lltr_rng.end - ppt_rng.start))));
      }
      gt_str_reset(seq);
    } else gt_file_xprintf(ls->tabout_file, "\t\t\t\t\t");

    /* output PBS */
    if (!had_err && ls->element.pbs != NULL)
    {
      GtStrand pbs_strand;

      pbs_strand = gt_feature_node_get_strand(ls->element.pbs);
      pbs_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.pbs);
      had_err = gt_extract_feature_sequence(seq,
                                           (GtGenomeNode*) ls->element.pbs,
                                           gt_symbol(gt_ft_primer_binding_site),
                                           false, NULL, NULL, ls->rmap, err);
      if (!had_err) {
        gt_fasta_show_entry(desc, gt_str_get(seq), gt_range_length(&pbs_rng),
                            GT_FSWIDTH, ls->pbsout_file);
        gt_file_xprintf(ls->tabout_file,
                         ""GT_WU"\t"GT_WU"\t%c\t%s\t%s\t%s\t%s\t%s\t",
                         pbs_rng.start,
                         pbs_rng.end,
                         GT_STRAND_CHARS[pbs_strand],
                         gt_feature_node_get_attribute(ls->element.pbs, "trna"),
                         gt_str_get(seq),
                         gt_feature_node_get_attribute(ls->element.pbs,
                                                       "pbsoffset"),
                         gt_feature_node_get_attribute(ls->element.pbs,
                                                       "trnaoffset"),
                         gt_feature_node_get_attribute(ls->element.pbs,
                                                       "edist"));
      }
      gt_str_reset(seq);
    } else gt_file_xprintf(ls->tabout_file, "\t\t\t\t\t\t\t\t");

    /* output protein domains */
    if (!had_err && ls->element.pdoms != NULL)
    {
      GtStr *pdomorderstr = gt_str_new();
      for (i=0; !had_err && i<gt_array_size(ls->element.pdomorder); i++)
      {
        const char* key = *(const char**) gt_array_get(ls->element.pdomorder,
                                                       i);
        GtArray *entry = (GtArray*) gt_hashmap_get(ls->element.pdoms, key);
        had_err = write_pdom(ls, entry, key, ls->rmap, desc, err);
      }

      if (GT_STRAND_REVERSE == gt_feature_node_get_strand(ls->element.mainnode))
        gt_array_reverse(ls->element.pdomorder);

      for (i=0 ;!had_err && i<gt_array_size(ls->element.pdomorder); i++)
      {
        const char* name = *(const char**) gt_array_get(ls->element.pdomorder,
                                                        i);
        gt_str_append_cstr(pdomorderstr, name);
        if (i != gt_array_size(ls->element.pdomorder)-1)
          gt_str_append_cstr(pdomorderstr, "/");
      }
      gt_file_xprintf(ls->tabout_file, "%s", gt_str_get(pdomorderstr));
      gt_str_delete(pdomorderstr);
    }

    /* output LTRs (we just expect them to exist) */
    switch (gt_feature_node_get_strand(ls->element.mainnode))
    {
      case GT_STRAND_REVERSE:
        ltr5 = ls->element.rightLTR;
        ltr3 = ls->element.leftLTR;
        break;
      case GT_STRAND_FORWARD:
      default:
        ltr5 = ls->element.leftLTR;
        ltr3 = ls->element.rightLTR;
        break;
    }

    if (!had_err) {
      had_err = gt_extract_feature_sequence(seq, (GtGenomeNode*) ltr5,
                                          gt_symbol(gt_ft_long_terminal_repeat),
                                          false,
                                          NULL, NULL, ls->rmap, err);
    }
    if (!had_err) {
      gt_fasta_show_entry(desc, gt_str_get(seq), gt_str_length(seq),
                          GT_FSWIDTH, ls->ltr5out_file);
      gt_str_reset(seq);
    }
    if (!had_err) {
      had_err = gt_extract_feature_sequence(seq, (GtGenomeNode*) ltr3,
                                          gt_symbol(gt_ft_long_terminal_repeat),
                                          false,
                                          NULL, NULL, ls->rmap, err);
    }
    if (!had_err) {
      gt_fasta_show_entry(desc, gt_str_get(seq), gt_str_length(seq),
                          GT_FSWIDTH, ls->ltr3out_file);
      gt_str_reset(seq);
    }

    /* output complete oriented element */
    if (!had_err) {
      had_err = gt_extract_feature_sequence(seq,
                                           (GtGenomeNode*) ls->element.mainnode,
                                           gt_symbol(gt_ft_LTR_retrotransposon),
                                           false,
                                           NULL, NULL, ls->rmap, err);
    }
    if (!had_err) {
      gt_fasta_show_entry(desc,gt_str_get(seq), gt_str_length(seq),
                          GT_FSWIDTH, ls->elemout_file);
      gt_str_reset(seq);
    }
    gt_file_xprintf(ls->tabout_file, "\n");
    gt_str_delete(seq);
  }
  gt_hashmap_delete(ls->element.pdoms);
  gt_array_delete(ls->element.pdomorder);
  gt_free(ls->element.seqid);
  return had_err;
}
コード例 #20
0
static int write_pdom(GtLTRdigestFileOutStream *ls, GtArray *pdoms,
                      const char *pdomname, GT_UNUSED GtRegionMapping *rmap,
                      char *desc, GtError *err)
{
  int had_err = 0;
  GtFile *seqfile = NULL,
            *alifile = NULL,
            *aafile = NULL;
  GtUword i = 0,
                seq_length = 0;
  GtStr *pdom_seq,
        *pdom_aaseq;
  gt_error_check(err);

  pdom_seq = gt_str_new();
  pdom_aaseq = gt_str_new();

  /* get protein domain output file */
  seqfile = (GtFile*) gt_hashmap_get(ls->pdomout_files, pdomname);
  if (seqfile == NULL)
  {
    /* no file opened for this domain yet, do it */
    char buffer[GT_MAXFILENAMELEN];
    (void) snprintf(buffer, (size_t) (GT_MAXFILENAMELEN-1), "%s_pdom_%s.fas",
                    ls->fileprefix, pdomname);
    seqfile = gt_file_xopen(buffer, "w+");
    gt_hashmap_add(ls->pdomout_files, gt_cstr_dup(pdomname), seqfile);
  }

  /* get protein alignment output file */
  if (ls->write_pdom_alignments)
  {
    alifile = (GtFile*) gt_hashmap_get(ls->pdomali_files, pdomname);
    if (alifile == NULL)
    {
      /* no file opened for this domain yet, do it */
      char buffer[GT_MAXFILENAMELEN];
      (void) snprintf(buffer, (size_t) (GT_MAXFILENAMELEN-1), "%s_pdom_%s.ali",
                      ls->fileprefix, pdomname);
      alifile = gt_file_xopen(buffer, "w+");
      gt_hashmap_add(ls->pdomali_files, gt_cstr_dup(pdomname), alifile);
    }
  }

  /* get amino acid sequence output file */
  if (ls->write_pdom_aaseqs)
  {
    aafile = (GtFile*) gt_hashmap_get(ls->pdomaa_files, pdomname);
    if (aafile == NULL)
    {
      /* no file opened for this domain yet, do it */
      char buffer[GT_MAXFILENAMELEN];
      (void) snprintf(buffer, (size_t) (GT_MAXFILENAMELEN-1),
                      "%s_pdom_%s_aa.fas",
                      ls->fileprefix, pdomname);
      aafile = gt_file_xopen(buffer, "w+");
      gt_hashmap_add(ls->pdomaa_files, gt_cstr_dup(pdomname), aafile);
    }
  }

  if (gt_array_size(pdoms) > 1UL)
  {
    for (i=1UL; i<gt_array_size(pdoms); i++)
    {
      gt_assert(gt_genome_node_cmp(*(GtGenomeNode**)gt_array_get(pdoms, i),
                                *(GtGenomeNode**)gt_array_get(pdoms, i-1))
                >= 0);
    }
    if (gt_feature_node_get_strand(*(GtFeatureNode**) gt_array_get(pdoms, 0UL))
        == GT_STRAND_REVERSE)
    {
      gt_array_reverse(pdoms);
    }
  }

  /* output protein domain data */
  for (i=0;i<gt_array_size(pdoms);i++)
  {
    GtRange pdom_rng;
    GtStr *ali,
          *aaseq;
    GtFeatureNode *fn;
    int rval;

    fn = *(GtFeatureNode**) gt_array_get(pdoms, i);

    ali = gt_genome_node_get_user_data((GtGenomeNode*) fn, "pdom_alignment");
    aaseq = gt_genome_node_get_user_data((GtGenomeNode*) fn, "pdom_aaseq");
    pdom_rng = gt_genome_node_get_range((GtGenomeNode*) fn);

    rval = gt_extract_feature_sequence(pdom_seq, (GtGenomeNode*) fn,
                                       gt_symbol(gt_ft_protein_match), false,
                                       NULL, NULL, rmap, err);

    if (rval)
    {
      had_err = -1;
      break;
    }
    if (ls->write_pdom_alignments && ali)
    {
      char buf[BUFSIZ];

      /* write away alignment */
      (void) snprintf(buf, BUFSIZ-1, "Protein domain alignment in translated "
                                     "sequence for candidate\n'%s':\n\n",
                                     desc);
      gt_file_xwrite(alifile, buf, (size_t) strlen(buf) * sizeof (char));
      gt_file_xwrite(alifile, gt_str_get(ali),
                        (size_t) gt_str_length(ali) * sizeof (char));
      gt_file_xwrite(alifile, "---\n\n", 5 * sizeof (char));
    }
    if (ls->write_pdom_aaseqs && aaseq)
    {
      /* append amino acid sequence */
      gt_str_append_str(pdom_aaseq, aaseq);
    }
    gt_genome_node_release_user_data((GtGenomeNode*) fn, "pdom_alignment");
    gt_genome_node_release_user_data((GtGenomeNode*) fn, "pdom_aaseq");
    seq_length += gt_range_length(&pdom_rng);
  }

  if (!had_err)
  {
    gt_fasta_show_entry(desc,
                        gt_str_get(pdom_seq),
                        seq_length,
                        GT_FSWIDTH,
                        seqfile);
    if (ls->write_pdom_aaseqs)
    {
      gt_fasta_show_entry(desc,
                          gt_str_get(pdom_aaseq),
                          gt_str_length(pdom_aaseq),
                          GT_FSWIDTH,
                          aafile);
    }
  }
  gt_str_delete(pdom_seq);
  gt_str_delete(pdom_aaseq);
  return had_err;
}
コード例 #21
0
ファイル: diagram.c プロジェクト: AlexWoroschilow/uni_hamburg
static int process_node(GtDiagram *d, GtFeatureNode *node,
                        GtFeatureNode *parent, GtError *err)
{
  GtRange elem_range;
  bool *collapse;
  GtShouldGroupByParent *group;
  const char *feature_type = NULL,
             *parent_gft = NULL;
  double tmp;
  GtStyleQueryStatus rval;
  GtUword max_show_width = GT_UNDEF_UWORD,
                par_max_show_width = GT_UNDEF_UWORD;

  gt_assert(d && node);

  gt_log_log(">> getting '%s'", gt_feature_node_get_type(node));

  /* skip pseudonodes */
  if (gt_feature_node_is_pseudo(node))
    return 0;

  feature_type = gt_feature_node_get_type(node);
  gt_assert(feature_type);

  /* discard elements that do not overlap with visible range */
  elem_range = gt_genome_node_get_range((GtGenomeNode*) node);
  if (!gt_range_overlap(&d->range, &elem_range))
    return 0;

  /* get maximal view widths in nucleotides to show this type */
  rval = gt_style_get_num(d->style, feature_type, "max_show_width", &tmp, NULL,
                          err);
  switch (rval) {
    case GT_STYLE_QUERY_OK:
      max_show_width = tmp;
      break;
    case GT_STYLE_QUERY_ERROR:
      return -1;
      break; /* should never be reached */
    default:
      /* do not change default value */
      break;
  }

  /* for non-root nodes, get maximal view with to show parent */
  if (parent)
  {
    if (!gt_feature_node_is_pseudo(parent))
    {
      parent_gft = gt_feature_node_get_type(parent);
      rval = gt_style_get_num(d->style,
                              parent_gft, "max_show_width",
                              &tmp, NULL, err);
      switch (rval) {
        case GT_STYLE_QUERY_OK:
          par_max_show_width = tmp;
          break;
        case GT_STYLE_QUERY_ERROR:
          return -1;
          break; /* should never be reached */
        default:
          /* do not change default value */
          break;
      }
    } else
      par_max_show_width = GT_UNDEF_UWORD;
  }

  /* check if this type is to be displayed at all */
  if (max_show_width != GT_UNDEF_UWORD &&
      gt_range_length(&d->range) > max_show_width)
  {
    return 0;
  }

  /* disregard parent node if it is configured not to be shown */
  if (parent
        && par_max_show_width != GT_UNDEF_UWORD
        && gt_range_length(&d->range) > par_max_show_width)
  {
    parent = NULL;
  }

  /* check if this is a collapsing type, cache result */
  if ((collapse = (bool*) gt_hashmap_get(d->collapsingtypes,
                                         feature_type)) == NULL)
  {
    collapse = gt_malloc(sizeof (bool));
    *collapse = false;
    if (gt_style_get_bool(d->style, feature_type, "collapse_to_parent",
                           collapse, NULL, err) == GT_STYLE_QUERY_ERROR) {
      gt_free(collapse);
      return -1;
    }
    gt_hashmap_add(d->collapsingtypes, (void*) feature_type, collapse);
  }

  /* check if type should be grouped by parent, cache result */
  if ((group = (GtShouldGroupByParent*) gt_hashmap_get(d->groupedtypes,
                                                       feature_type)) == NULL)
  {
    bool tmp;
    group = gt_malloc(sizeof (GtShouldGroupByParent));
    rval = gt_style_get_bool(d->style, feature_type, "group_by_parent",
                             &tmp, NULL, err);
    switch (rval) {
      case GT_STYLE_QUERY_OK:
        if (tmp)
          *group = GT_GROUP_BY_PARENT;
        else
          *group = GT_DO_NOT_GROUP_BY_PARENT;
        break;
      case GT_STYLE_QUERY_NOT_SET:
        *group = GT_UNDEFINED_GROUPING;
        break;
      case GT_STYLE_QUERY_ERROR:
        gt_free(group);
        return -1;
        break; /* should never be reached */
    }
    gt_hashmap_add(d->groupedtypes, (void*) feature_type, group);
  }

  /* decide where to place this feature: */
  if (*collapse)
  {
    /* user has specified collapsing to parent for this type */
    if (parent && !gt_feature_node_is_pseudo(parent)) {
      /* collapsing child nodes are added to upwards blocks,
         but never collapse into pseudo nodes */
      add_recursive(d, node, parent, node);
    } else {
      /* if no parent or only pseudo-parent, do not collapse */
      if (add_to_current(d, node, parent, err) < 0) {
        return -1;
      }
    }
  }
  else  /* (!*collapse) */
  {
    if (parent) {
      bool do_not_overlap = false;
      do_not_overlap = gt_feature_node_direct_children_do_not_overlap_st(parent,
                                                                         node);
      if (*group == GT_GROUP_BY_PARENT
          || (do_not_overlap && *group == GT_UNDEFINED_GROUPING))
      {
        if (gt_feature_node_is_pseudo(parent)
              && gt_feature_node_is_multi(node))
        {
          if (add_to_rep(d, node, parent, err) < 0) {
            return -1;
          }
        } else if
            (gt_feature_node_number_of_children(parent) > 1)
        {
          if (add_to_parent(d, node, parent, err) < 0) {
            return -1;
          }
        } else {
          if (add_to_current(d, node, parent, err) < 0) {
            return -1;
          }
        }
      } else {
        if (gt_feature_node_is_pseudo(parent)
              && gt_feature_node_is_multi(node))
        {
          if (add_to_rep(d, node, parent, err) < 0) {
            return -1;
          }
        } else {
          if (add_to_current(d, node, parent, err) < 0) {
            return -1;
          }
        }
      }
    } else {
      /* root nodes always get their own block */
      if (add_to_current(d, node, parent, err) < 0) {
        return -1;
      }
    }
  }

  /* we can now assume that this node (or its representative)
     has been processed into the reverse lookup structure */
#ifndef NDEBUG
  if (gt_feature_node_is_multi(node))
  {
    GtFeatureNode *rep;
    rep = gt_feature_node_get_multi_representative((GtFeatureNode*) node);
    gt_assert(gt_hashmap_get(d->nodeinfo, rep));
  }
  else
    gt_assert(gt_hashmap_get(d->nodeinfo, node));
#endif

  return 0;
}
コード例 #22
0
static int calc_spliced_alignments(GthSACollection *sa_collection,
                                   GthChainCollection *chain_collection,
                                   GthCallInfo *call_info,
                                   GthInput *input,
                                   GthStat *stat,
                                   GtUword gen_file_num,
                                   GtUword ref_file_num,
                                   bool directmatches,
                                   GthMatchInfo *match_info,
                                   GthDNACompletePathMatrixJT
                                   dna_complete_path_matrix_jt,
                                   GthProteinCompletePathMatrixJT
                                   protein_complete_path_matrix_jt)
{
  const unsigned char *ref_seq_tran, *ref_seq_orig, *ref_seq_tran_rc = NULL,
                      *ref_seq_orig_rc = NULL;
  GtUword chainctr, gen_offset = GT_UNDEF_UWORD, gen_total_length,
                ref_total_length;
  GtFile *outfp = call_info->out->outfp;
  GtRange gen_seq_bounds, gen_seq_bounds_rc;
  bool refseqisdna;
  GthChain *chain;
  GtRange range;
  GthSA *saA;
  int rval;

  gt_assert(sa_collection && chain_collection);

  refseqisdna = gth_input_ref_file_is_dna(input, ref_file_num);

  for (chainctr = 0;
       chainctr < gth_chain_collection_size(chain_collection);
       chainctr++) {
       chain = gth_chain_collection_get(chain_collection, chainctr);
    if (++match_info->call_number > call_info->firstalshown &&
        call_info->firstalshown > 0) {
      if (!(call_info->out->xmlout || call_info->out->gff3out))
        gt_file_xfputc('\n', outfp);
      else if (call_info->out->xmlout)
        gt_file_xprintf(outfp, "<!--\n");

      if (!call_info->out->gff3out) {
        gt_file_xprintf(outfp, "Maximal matching %s count (%u) reached.\n",
                        refseqisdna ? "EST" : "protein",
                        call_info->firstalshown);
        gt_file_xprintf(outfp, "Only the first %u matches will be "
                           "displayed.\n", call_info->firstalshown);
      }

      if (!(call_info->out->xmlout || call_info->out->gff3out))
        gt_file_xfputc('\n', outfp);
      else if (call_info->out->xmlout)
        gt_file_xprintf(outfp, "-->\n");

      match_info->max_call_number_reached = true;
      break; /* break out of loop */
    }

    /* compute considered genomic regions if not set by -frompos */
    if (!gth_input_use_substring_spec(input)) {
      gen_seq_bounds = gth_input_get_genomic_range(input, chain->gen_file_num,
                                                   chain->gen_seq_num);
      gen_total_length      = gt_range_length(&gen_seq_bounds);
      gen_offset            = gen_seq_bounds.start;
      gen_seq_bounds_rc     = gen_seq_bounds;
    }
    else {
      /* genomic multiseq contains exactly one sequence */
      gt_assert(gth_input_num_of_gen_seqs(input, chain->gen_file_num) == 1);
      gen_total_length = gth_input_genomic_file_total_length(input,
                                                             chain
                                                             ->gen_file_num);
      gen_seq_bounds.start    = gth_input_genomic_substring_from(input);
      gen_seq_bounds.end      = gth_input_genomic_substring_to(input);
      gen_offset              = 0;
      gen_seq_bounds_rc.start = gen_total_length - 1 - gen_seq_bounds.end;
      gen_seq_bounds_rc.end   = gen_total_length - 1 - gen_seq_bounds.start;
    }

    /* "retrieving" the reference sequence */
    range = gth_input_get_reference_range(input, chain->ref_file_num,
                                          chain->ref_seq_num);
    ref_seq_tran = gth_input_current_ref_seq_tran(input) + range.start;
    ref_seq_orig = gth_input_current_ref_seq_orig(input) + range.start;
    if (refseqisdna) {
      ref_seq_tran_rc = gth_input_current_ref_seq_tran_rc(input) + range.start;
      ref_seq_orig_rc = gth_input_current_ref_seq_orig_rc(input) + range.start;
    }
    ref_total_length = range.end - range.start + 1;

    /* check if protein sequences have a stop amino acid */
    if (!refseqisdna && !match_info->stop_amino_acid_warning &&
       ref_seq_orig[ref_total_length - 1] != GT_STOP_AMINO) {
      GtStr *ref_id = gt_str_new();
      gth_input_save_ref_id(input, ref_id, chain->ref_file_num,
                            chain->ref_seq_num);
      gt_warning("protein sequence '%s' (#" GT_WU " in file %s) does not end "
                 "with a stop amino acid ('%c'). If it is not a protein "
                 "fragment you should add a stop amino acid to improve the "
                 "prediction. For example with `gt seqtransform "
                 "-addstopaminos` (see http://genometools.org for details).",
                 gt_str_get(ref_id), chain->ref_seq_num,
                 gth_input_get_reference_filename(input, chain->ref_file_num),
                 GT_STOP_AMINO);
      match_info->stop_amino_acid_warning = true;
      gt_str_delete(ref_id);
    }

    /* allocating space for alignment */
    saA = gth_sa_new_and_set(directmatches, true, input, chain->gen_file_num,
                             chain->gen_seq_num, chain->ref_file_num,
                             chain->ref_seq_num, match_info->call_number,
                             gen_total_length, gen_offset, ref_total_length);

    /* extend the DP borders to the left and to the right */
    gth_chain_extend_borders(chain, &gen_seq_bounds, &gen_seq_bounds_rc,
                             gen_total_length, gen_offset);

    /* From here on the dp positions always refer to the forward strand of the
       genomic DNA. */

    /* call the Dynamic Programming */
    if (refseqisdna) {
      rval = call_dna_DP(directmatches, call_info, input, stat,
                         sa_collection, saA, gen_file_num, ref_file_num,
                         gen_total_length, gen_offset, &gen_seq_bounds,
                         &gen_seq_bounds_rc, ref_total_length, range.start,
                         chainctr, gth_chain_collection_size(chain_collection),
                         match_info, ref_seq_tran, ref_seq_orig,
                         ref_seq_tran_rc, ref_seq_orig_rc, chain,
                         dna_complete_path_matrix_jt,
                         protein_complete_path_matrix_jt);
    }
    else {
      rval = call_protein_DP(directmatches, call_info, input,
                             stat, sa_collection, saA, gen_file_num,
                             ref_file_num, gen_total_length, gen_offset,
                             &gen_seq_bounds, &gen_seq_bounds_rc,
                             ref_total_length, range.start, chainctr,
                             gth_chain_collection_size(chain_collection),
                             match_info, ref_seq_tran, ref_seq_orig, chain,
                             dna_complete_path_matrix_jt,
                             protein_complete_path_matrix_jt);
    }
    /* check return value */
    if (rval == GTH_ERROR_DP_PARAMETER_ALLOCATION_FAILED) {
      /* statistics bookkeeping */
      gth_stat_increment_numoffailedDPparameterallocations(stat);
      gth_stat_increment_numofundeterminedSAs(stat);
      /* free space */
      gth_sa_delete(saA);
      match_info->call_number--;
      continue; /* continue with the next DP range */
    }
    else if (rval)
      return -1;
  }

  if (!call_info->out->xmlout && !call_info->out->gff3out && !directmatches &&
      !match_info->significant_match_found &&
      match_info->call_number <= call_info->firstalshown) {
    show_no_match_line(gth_input_get_alphatype(input, ref_file_num), outfp);
  }

  return 0;
}
コード例 #23
0
static int gt_ltrdigest_pdom_visitor_feature_node(GtNodeVisitor *nv,
                                                  GtFeatureNode *fn,
                                                  GtError *err)
{
  GtLTRdigestPdomVisitor *lv;
  GtFeatureNodeIterator *fni;
  GtFeatureNode *curnode = NULL;
  int had_err = 0;
  GtRange rng;
  GtUword i;
  lv = gt_ltrdigest_pdom_visitor_cast(nv);
  gt_assert(lv);
  gt_error_check(err);

  /* traverse annotation subgraph and find LTR element */
  fni = gt_feature_node_iterator_new(fn);
  while (!had_err && (curnode = gt_feature_node_iterator_next(fni))) {
    if (strcmp(gt_feature_node_get_type(curnode), lv->root_type) == 0) {
      lv->ltr_retrotrans = curnode;
    }
  }
  gt_feature_node_iterator_delete(fni);

  if (!had_err && lv->ltr_retrotrans != NULL) {
    GtCodonIterator *ci;
    GtTranslator *tr;
    GtTranslatorStatus status;
    GtUword seqlen;
    char translated, *rev_seq;
#ifndef _WIN32
    FILE *instream;
    GtHMMERParseStatus *pstatus;
#endif
    unsigned int frame;
    GtStr *seq;

    seq = gt_str_new();
    rng = gt_genome_node_get_range((GtGenomeNode*) lv->ltr_retrotrans);
    lv->leftLTR_5 = rng.start - 1;
    lv->rightLTR_3 = rng.end - 1;
    seqlen = gt_range_length(&rng);

    had_err = gt_extract_feature_sequence(seq,
                                          (GtGenomeNode*) lv->ltr_retrotrans,
                                          lv->root_type,
                                          false, NULL, NULL, lv->rmap, err);

    if (!had_err) {
      for (i = 0UL; i < 3UL; i++) {
        gt_str_reset(lv->fwd[i]);
        gt_str_reset(lv->rev[i]);
      }

      /* create translations */
      ci = gt_codon_iterator_simple_new(gt_str_get(seq), seqlen, NULL);
      gt_assert(ci);
      tr = gt_translator_new(ci);
      status = gt_translator_next(tr, &translated, &frame, err);
      while (status == GT_TRANSLATOR_OK && translated) {
        gt_str_append_char(lv->fwd[frame], translated);
        status = gt_translator_next(tr, &translated, &frame, NULL);
      }
      if (status == GT_TRANSLATOR_ERROR) had_err = -1;
      if (!had_err) {
        rev_seq = gt_malloc((size_t) seqlen * sizeof (char));
        strncpy(rev_seq, gt_str_get(seq), (size_t) seqlen * sizeof (char));
        (void) gt_reverse_complement(rev_seq, seqlen, NULL);
        gt_codon_iterator_delete(ci);
        ci = gt_codon_iterator_simple_new(rev_seq, seqlen, NULL);
        gt_translator_set_codon_iterator(tr, ci);
        status = gt_translator_next(tr, &translated, &frame, err);
        while (status == GT_TRANSLATOR_OK && translated) {
          gt_str_append_char(lv->rev[frame], translated);
          status = gt_translator_next(tr, &translated, &frame, NULL);
        }
        if (status == GT_TRANSLATOR_ERROR) had_err = -1;
        gt_free(rev_seq);
      }
      gt_codon_iterator_delete(ci);
      gt_translator_delete(tr);
    }

    /* run HMMER and handle results */
    if (!had_err) {
#ifndef _WIN32
      int pid, pc[2], cp[2];
      GT_UNUSED int rval;

      (void) signal(SIGCHLD, SIG_IGN); /* XXX: for now, ignore child's
                                               exit status */
      rval = pipe(pc);
      gt_assert(rval == 0);
      rval = pipe(cp);
      gt_assert(rval == 0);

      switch ((pid = (int) fork())) {
        case -1:
          perror("Can't fork");
          exit(1);   /* XXX: error handling */
        case 0:    /* child */
          (void) close(1);    /* close current stdout. */
          rval = dup(cp[1]);  /* make stdout go to write end of pipe. */
          (void) close(0);    /* close current stdin. */
          rval = dup(pc[0]);  /* make stdin come from read end of pipe. */
          (void) close(pc[0]);
          (void) close(pc[1]);
          (void) close(cp[0]);
          (void) close(cp[1]);
          (void) execvp("hmmscan", lv->args); /* XXX: read path from env */
          perror("couldn't execute hmmscan!");
          exit(1);
        default:    /* parent */
          for (i = 0UL; i < 3UL; i++) {
            char buf[5];
            GT_UNUSED ssize_t written;
            (void) sprintf(buf, ">"GT_WU"%c\n", i, '+');
            written = write(pc[1], buf, 4 * sizeof (char));
            written = write(pc[1], gt_str_get(lv->fwd[i]),
                            (size_t) gt_str_length(lv->fwd[i]) * sizeof (char));
            written = write(pc[1], "\n", 1 * sizeof (char));
            (void) sprintf(buf, ">"GT_WU"%c\n", i, '-');
            written = write(pc[1], buf, 4 * sizeof (char));
            written = write(pc[1], gt_str_get(lv->rev[i]),
                            (size_t) gt_str_length(lv->rev[i]) * sizeof (char));
            written = write(pc[1], "\n", 1 * sizeof (char));
          }
          (void) close(pc[0]);
          (void) close(pc[1]);
          (void) close(cp[1]);
          instream = fdopen(cp[0], "r");
          pstatus = gt_hmmer_parse_status_new();
          had_err = gt_ltrdigest_pdom_visitor_parse_output(lv, pstatus,
                                                           instream, err);
          (void) fclose(instream);
          if (!had_err)
            had_err = gt_ltrdigest_pdom_visitor_process_hits(lv, pstatus, err);
          gt_hmmer_parse_status_delete(pstatus);
      }
#else
      /* XXX */
      gt_error_set(err, "HMMER call not implemented on Windows\n");
      had_err = -1;
#endif
    }
    gt_str_delete(seq);
  }
  if (!had_err)
    had_err = gt_ltrdigest_pdom_visitor_choose_strand(lv);
  return had_err;
}
コード例 #24
0
ファイル: canvas_cairo.c プロジェクト: oeigenbrod/genometools
/* Renders a ruler with dynamic scale labeling and optional grid. */
int gt_canvas_cairo_draw_ruler(GtCanvas *canvas, GtRange viewrange,
                                GtError *err)
{
  double step, minorstep, vmajor, vminor,
         theight = gt_graphics_get_text_height(canvas->pvt->g);
  long base_length, tick;
  GtColor rulercol, gridcol;
  GtStr *left_str, *right_str, *unit;
  char str[BUFSIZ];
  GtStyleQueryStatus rval;
  bool showgrid = true;
  gt_assert(canvas);

  if (gt_style_get_bool(canvas->pvt->sty, "format", "show_grid", &showgrid,
                        NULL, err) == GT_STYLE_QUERY_ERROR) {
    return -1;
  }
  if (gt_style_get_num(canvas->pvt->sty, "format", "ruler_font_size",
                       &theight, NULL, err) == GT_STYLE_QUERY_ERROR) {
    return -1;
  }

  /* get unit value from style, default: empty */
  unit = gt_str_new();
  if (gt_style_get_str(canvas->pvt->sty,
                       "format", "unit",
                       unit, NULL, err) == GT_STYLE_QUERY_ERROR) {
    gt_str_delete(unit);
    return -1;
  }

  /* get additional description texts from style */
  left_str = gt_str_new();
  rval = gt_style_get_str(canvas->pvt->sty, "format",
                          "ruler_left_text", left_str, NULL, err);
  switch (rval) {
    case GT_STYLE_QUERY_NOT_SET:
      gt_str_append_cstr(left_str, FIVE_PRIME_STRING);
      break;
    case GT_STYLE_QUERY_ERROR:
      gt_str_delete(unit);
      gt_str_delete(left_str);
      return -1;
      break;  /* shouldn't reach this */
    default:
      break;
  }
  right_str = gt_str_new();
  rval = gt_style_get_str(canvas->pvt->sty, "format",
                        "ruler_right_text", right_str, NULL, err);
  switch (rval) {
    case GT_STYLE_QUERY_NOT_SET:
      gt_str_append_cstr(right_str, THREE_PRIME_STRING);
      break;
    case GT_STYLE_QUERY_ERROR:
      gt_str_delete(unit);
      gt_str_delete(left_str);
      gt_str_delete(right_str);
      return -1;
      break;  /* shouldn't reach this */
    default:
      break;
  }

  /* reset font to default */
  gt_graphics_set_font(canvas->pvt->g,
                       "Sans",
                       SLANT_NORMAL,
                       WEIGHT_NORMAL,
                       theight);

  rulercol.red = rulercol.green = rulercol.blue = RULER_GREY;
  rulercol.alpha = 1.0;
  gridcol.red = gridcol.green = gridcol.blue = GRID_GREY;
  gridcol.alpha = 1.0;

  /* determine range and step of the scale */
  base_length = gt_range_length(&viewrange);

  /* determine tick steps */
  step = pow(10,ceil(log10(base_length))-1);
  minorstep = step/10.0;

  /* calculate starting positions */
  vminor = (double) (floor(viewrange.start / minorstep))*minorstep;
  vmajor = (double) (floor(viewrange.start / step))*step;

  /* draw major ticks */
  for (tick = vmajor; tick <= viewrange.end; tick += step)
  {
    double drawtick = (gt_coords_convert_point(viewrange, tick)
                       * (canvas->pvt->width-2*canvas->pvt->margins))
                       + canvas->pvt->margins;
    if (tick < viewrange.start) continue;
    gt_graphics_draw_vertical_line(canvas->pvt->g,
                                   drawtick,
                                   canvas->pvt->y + 30,
                                   rulercol,
                                   10,
                                   1.0);
    gt_format_ruler_label(str, tick, gt_str_get(unit), BUFSIZ);
    gt_graphics_draw_text_centered(canvas->pvt->g,
                                   drawtick,
                                   canvas->pvt->y + 20,
                                   str);
  }
  /* draw minor ticks */
  if (minorstep >= 1)
  {
    for (tick = vminor; tick <= viewrange.end; tick += minorstep)
    {
      double drawtick;
      if (tick < viewrange.start)
        continue;
      drawtick = (gt_coords_convert_point(viewrange, tick)
                    * (canvas->pvt->width-2*canvas->pvt->margins))
                  + canvas->pvt->margins;
      if (showgrid)
      {
        gt_graphics_draw_vertical_line(canvas->pvt->g,
                                       drawtick,
                                       canvas->pvt->y + 40,
                                       gridcol,
                                       canvas->pvt->height - 40 - 15,
                                       1.0);
      }
      gt_graphics_draw_vertical_line(canvas->pvt->g,
                                     drawtick,
                                     canvas->pvt->y + 35,
                                     rulercol,
                                     5,
                                     1.0);
    }
  }
  /* draw ruler line */
  gt_graphics_draw_horizontal_line(canvas->pvt->g,
                                   canvas->pvt->margins,
                                   canvas->pvt->y + 40,
                                   rulercol,
                                   canvas->pvt->width - 2
                                     * canvas->pvt->margins,
                                   1.25);

    gt_graphics_draw_text_right(canvas->pvt->g,
                              canvas->pvt->margins - 10,
                              canvas->pvt->y + 39 + (theight/2),
                              gt_str_get(left_str));
  gt_graphics_draw_text(canvas->pvt->g,
                        canvas->pvt->width - canvas->pvt->margins + 10,
                        canvas->pvt->y + 39 + (theight/2),
                        gt_str_get(right_str));

  gt_str_delete(unit);
  gt_str_delete(left_str);
  gt_str_delete(right_str);

  return 0;
}
コード例 #25
0
static int run_orffinder(GtRegionMapping *rmap,
                         GtFeatureNode *gf,
                         unsigned long start,
                         GT_UNUSED unsigned long end,
                         unsigned int min,
                         unsigned int max,
                         bool all,
                         GtError *err)
{
  int had_err = 0, i;
  unsigned long sum;
  GtCodonIterator* ci = NULL;
  GtTranslator* translator = NULL;
  GtORFIterator* orfi = NULL;
  GtORFIteratorStatus state;
  GtRange orf_rng, tmp_orf_rng[3];
  GtStr *seq;
  unsigned int orf_frame;

  /* forward strand */
  seq = gt_str_new();
  had_err = gt_extract_feature_sequence(seq,
                                        (GtGenomeNode*) gf,
                                        gt_feature_node_get_type(gf),
                                        false, NULL, NULL, rmap, err);

  ci = gt_codon_iterator_simple_new(gt_str_get(seq), gt_str_length(seq), err);
  gt_assert(ci);
  translator = gt_translator_new(ci);
  gt_assert(translator);

  orfi = gt_orf_iterator_new(ci, translator);
  gt_assert(orfi);

  for (i = 0; i < 3; i++) {
    tmp_orf_rng[i].start = GT_UNDEF_ULONG;
    tmp_orf_rng[i].end = GT_UNDEF_ULONG;
  }

  while ((state = gt_orf_iterator_next(orfi, &orf_rng, &orf_frame,
                                              err)) == GT_ORF_ITERATOR_OK) {
      if (all) {
        process_orf(orf_rng, orf_frame, GT_STRAND_FORWARD, gf,
                    start, min, max, err);
      } else {
        if (gt_range_length(&orf_rng) >
            gt_range_length(&tmp_orf_rng[orf_frame])) {
          tmp_orf_rng[orf_frame].start = orf_rng.start;
          tmp_orf_rng[orf_frame].end = orf_rng.end;
        }
      }
  }
  if (state == GT_ORF_ITERATOR_ERROR)
    had_err = -1;

  if (!had_err) {
    if (!all) {
      for (i = 0; i < 3; i++) {
        if (tmp_orf_rng[i].start != GT_UNDEF_ULONG) {
          process_orf(tmp_orf_rng[i], (unsigned int) i, GT_STRAND_FORWARD, gf,
                      start, min, max, err);
        }
      }
    }
    gt_codon_iterator_delete(ci);
    gt_translator_delete(translator);
    gt_orf_iterator_delete(orfi);
    orfi = NULL;
    ci = NULL;
    translator = NULL;

    for (i = 0; i < 3; i++) {
      tmp_orf_rng[i].start = GT_UNDEF_ULONG;
      tmp_orf_rng[i].end = GT_UNDEF_ULONG;
    }

    /* reverse strand */
    if (!had_err) {
      GT_UNUSED int rval = 0;
      unsigned long length = gt_str_length(seq);
      char *strp = (char*) gt_str_get_mem(seq);
      rval = gt_reverse_complement(strp, gt_str_length(seq), err);
      gt_assert(!rval); /* XXX */
      ci = gt_codon_iterator_simple_new(gt_str_get(seq), gt_str_length(seq),
                                        err);
      gt_assert(ci);
      translator = gt_translator_new(ci);
      gt_assert(translator);
      orfi = gt_orf_iterator_new(ci, translator);
      gt_assert(orfi);

      sum = start + length - 1;

      while ((state = gt_orf_iterator_next(orfi, &orf_rng, &orf_frame,
                                                  err)) == GT_ORF_ITERATOR_OK) {
          if (all) {
            process_orf(orf_rng, orf_frame, GT_STRAND_REVERSE, gf,
                        sum, min, max, err);
          } else {
            if (gt_range_length(&orf_rng) >
                gt_range_length(&tmp_orf_rng[orf_frame])) {
              tmp_orf_rng[orf_frame].start = orf_rng.start;
              tmp_orf_rng[orf_frame].end = orf_rng.end;
            }
          }
      }
      if (state == GT_ORF_ITERATOR_ERROR)
        had_err = -1;
      if (!had_err) {
        if (!all) {
          for (i = 0; i < 3; i++) {
            if (tmp_orf_rng[i].start != GT_UNDEF_ULONG) {
              process_orf(tmp_orf_rng[i], (unsigned int) i, GT_STRAND_REVERSE,
                          gf, sum, min, max, err);
            }
          }
        }
      }
    }
    gt_str_delete(seq);
    gt_codon_iterator_delete(ci);
    gt_translator_delete(translator);
    gt_orf_iterator_delete(orfi);
  }
  return had_err;
}
コード例 #26
0
static int cluster_sequences(GtArray *matches,
                             GtClusteredSet *cs,
                             GtHashmap *seqdesc2seqnum,
                             unsigned int psmall,
                             unsigned int plarge,
                             GtEncseq *encseq,
                             GtError *err)
{
  GtMatch *match;
  GtMatchEdgeTable matchedgetab;
  GtMatchEdge matchedge;
  GtRange rng_seq1,
          rng_seq2;
  int had_err = 0;
  unsigned long i,
                lsmall,
                llarge,
                matchlen1,
                matchlen2,
                num_of_seq,
                seqnum1 = 0,
                seqnum2 = 0;
  const char *seqid;

  num_of_seq = gt_encseq_num_of_sequences(encseq);
  gt_assert(matches && cs && seqdesc2seqnum && encseq);

  if (gt_clustered_set_num_of_elements(cs, err) != num_of_seq) {
    had_err = -1;
    gt_error_set(err,
                 "number of sequences (%lu) unequals number of elements in"
                 " clustered set (%lu)",
                 num_of_seq, gt_clustered_set_num_of_elements(cs, err));
  }
  if (!had_err) {
    matchedgetab.edges = gt_array_new(sizeof (GtMatchEdge));
    matchedgetab.num_of_edges = 0;

    for (i = 0; i < gt_array_size(matches); i++) {
      match = *(GtMatch**) gt_array_get(matches, i);
      gt_match_get_range_seq1(match, &rng_seq1);
      gt_match_get_range_seq2(match, &rng_seq2);

      matchlen1 =  gt_range_length(&rng_seq1);
      matchlen2 =  gt_range_length(&rng_seq2);

      seqid = gt_match_get_seqid1(match);
      if (gt_hashmap_get(seqdesc2seqnum, (void*) seqid) != NULL)
        seqnum1 = ((unsigned long) gt_hashmap_get(seqdesc2seqnum, seqid)) - 1;
      else {
        had_err = -1;
        gt_error_set(err, "key %s not found", seqid);
      }

      seqid = gt_match_get_seqid2(match);
      if (!had_err && gt_hashmap_get(seqdesc2seqnum, (void*) seqid) != NULL)
        seqnum2 = ((unsigned long) gt_hashmap_get(seqdesc2seqnum, seqid)) - 1;
      else {
        had_err = -1;
        gt_error_set(err, "key %s not found", seqid);
      }

      if (!had_err) {
        if (gt_encseq_seqlength(encseq, seqnum1) >
            gt_encseq_seqlength(encseq, seqnum2)) {
          llarge = gt_encseq_seqlength(encseq, seqnum1);
          lsmall = gt_encseq_seqlength(encseq, seqnum2);
        } else {
          lsmall = gt_encseq_seqlength(encseq, seqnum1);
          llarge = gt_encseq_seqlength(encseq, seqnum2);
        }
        if (((llarge * plarge)/100 <= matchlen1) &&
            ((lsmall * psmall)/100 <= matchlen1) &&
            ((llarge * plarge)/100 <= matchlen2) &&
            ((lsmall * psmall)/100 <= matchlen2)) {
          if (seqnum1 != seqnum2) {
            matchedge.matchnum0 = seqnum1;
            matchedge.matchnum1 = seqnum2;
            gt_array_add(matchedgetab.edges, matchedge);
            matchedgetab.num_of_edges++;
          }
        }
      }
    }
  }
  if (!had_err)
    if (gt_cluster_matches(cs, &matchedgetab, err) != 0)
      had_err = -1;
  if (!had_err)
    gt_array_delete(matchedgetab.edges);
  return had_err;
}