Пример #1
0
void gth_chain_copy(GthChain *dest, const GthChain *src)
{
  gt_assert(dest&& src);
  chain_copy_core(dest, src);
  gt_array_add_array(dest->forwardranges, src->forwardranges);
  gt_array_add_array(dest->reverseranges, src->reverseranges);
}
Пример #2
0
static void potentialintronspostpro(GtArray *intronstoprocess,
                                    unsigned long icdelta,
                                    unsigned long icminremintronlength)
{
  GtArray *originalintrons;
  GtRange potintron;
  unsigned long i, potintronlength,
       minintronlength = 2 * icdelta + icminremintronlength;

  originalintrons = gt_array_new(sizeof (GtRange));

  /* save all (potential) introns */
  gt_array_add_array(originalintrons, intronstoprocess);

  /* reset introns to process */
  gt_array_set_size(intronstoprocess, 0);

  /* store introns */
  for (i = 0; i < gt_array_size(originalintrons); i++) {
    potintron       = *(GtRange*) gt_array_get(originalintrons, i);
    potintronlength = potintron.end - potintron.start + 1;

    if (potintronlength >= minintronlength) {
      /* keep this intron (plus/minus intron deltas)
         that is, this intron is cut out later */
      potintron.start  += icdelta;
      potintron.end -= icdelta;
      gt_array_add(intronstoprocess, potintron);
    }
    /* else: skip this intron
       that is, this intron is not cut out later */
  }

  gt_array_delete(originalintrons);
}
Пример #3
0
static GtArray *gaeval_visitor_union(GtArray *cov1, GtArray *cov2)
{
  agn_assert(cov1 && cov2);
  gt_array_add_array(cov1, cov2);
  if(gt_array_size(cov1) > 1)
    gt_array_sort(cov1, (GtCompare)gt_range_compare);

  GtArray *runion = gt_array_new(sizeof(GtRange));
  if(gt_array_size(cov1) == 0)
    return runion;
  GtRange *rng = gt_array_get(cov1, 0);
  gt_array_add(runion, *rng);
  GtRange *prev = gt_array_get(runion, 0);
  if(gt_array_size(cov1) == 1)
    return runion;

  GtUword i;
  for(i = 1; i < gt_array_size(cov1); i++)
  {
    rng = gt_array_get(cov1, i);
    if(gt_range_overlap(rng, prev))
      *prev = gt_range_join(rng, prev);
    else
    {
      gt_array_add(runion, *rng);
      prev = gt_array_get(runion, gt_array_size(runion) - 1);
    }
  }

  return runion;
}
Пример #4
0
static GtArray* generic_ranges_uniq_in_place(GtArray *ranges, bool count)
{
  GtArray *out_ranges, *count_array;
  gt_assert(ranges);
  out_ranges = gt_array_new(sizeof (GtRange));
  count_array = generic_ranges_uniq(out_ranges, ranges, count);
  gt_array_reset(ranges);
  gt_array_add_array(ranges, out_ranges); /* XXX: could be more efficient
                                               with something like
                                               gt_array_replace(ranges,
                                                             out_ranges) */
  gt_array_delete(out_ranges);
  return count_array;
}
Пример #5
0
void gth_backtrace_path_prepend(GthBacktracePath *out_bp,
                                const GthBacktracePath *in_bp)
{
  gt_assert(out_bp && in_bp);
  gt_assert(!backtrace_path_start_cutoffs_are_set(out_bp));
  gt_assert(!backtrace_path_cutoffs_are_set(in_bp));
  gt_assert(in_bp->gen_dp_start + in_bp->gen_dp_length == out_bp->gen_dp_start);
  gt_assert(in_bp->ref_dp_start + in_bp->ref_dp_length == out_bp->ref_dp_start);
  out_bp->gen_dp_start = in_bp->gen_dp_start;
  out_bp->gen_dp_length += in_bp->gen_dp_length;
  out_bp->ref_dp_start = in_bp->ref_dp_start;
  out_bp->ref_dp_length += in_bp->ref_dp_length;
  gt_array_add_array(out_bp->editoperations, in_bp->editoperations);
}
Пример #6
0
bool gt_tool_iterator_next(GtToolIterator *tool_iterator, const char **name,
                           GtTool **tool)
{
  ToolIterationInfo tii;
  gt_assert(tool_iterator && name && tool);
  if (gt_array_size(tool_iterator->tool_stack)) {
    ToolEntry *entry = gt_array_pop(tool_iterator->tool_stack);
    *name = entry->name;
    *tool = entry->tool;
    if (tool_iterator->prefixptr) {
      gt_str_reset(tool_iterator->prefixptr);
      if (entry->prefix) {
        gt_str_append_str(tool_iterator->prefixptr, entry->prefix);
        gt_str_append_char(tool_iterator->prefixptr, tool_iterator->prefixsep);
      }
    }
    if (gt_tool_is_toolbox(entry->tool)) {
      GtToolbox *toolbox;
      GtArray *toollist;
      GtStr *myprefix;
      myprefix =
                gt_str_new_cstr(entry->prefix ? gt_str_get(entry->prefix) : "");
      gt_str_append_cstr(myprefix, entry->name);
      toolbox = gt_tool_get_toolbox(entry->tool);
      toollist = gt_array_new(sizeof (ToolEntry));
      tii.arr = toollist;
      tii.str = myprefix;
      gt_toolbox_iterate(toolbox, add_tool_to_stack, &tii);
      if (gt_array_size(toollist)) {
        gt_array_reverse(toollist); /* alphabetical order */
        gt_array_add_array(tool_iterator->tool_stack, toollist);
      }
      gt_array_delete(toollist);
      gt_str_delete(myprefix);
    } else
      gt_str_delete(entry->prefix);
    return true;
  }
  else
    return false;
}
Пример #7
0
int gt_range_unit_test(GtError *err)
{
  static GtRange ranges_in[] = {  { 620432, 620536 }, { 620432, 620536 },
                                { 620957, 621056 }, { 620957, 621056 },
                                { 625234, 625253 }, { 625500, 625655 },
                                { 625533, 625655 }, { 625533, 625655 },
                                { 627618, 627729 }, { 627618, 627729 },
                                { 627618, 627729 }, { 662083, 662194 },
                                { 662083, 662194 }, { 662083, 662194 },
                                { 663032, 663166 }, { 663032, 663166 },
                                { 663032, 663166 }, { 664782, 664906 },
                                { 664782, 664906 }, { 664782, 664906 },
                                { 665748, 665823 }, { 665748, 665823 },
                                { 665748, 665823 }, { 666825, 666881 },
                                { 666825, 666881 }, { 667797, 667954 },
                                { 667845, 667954 }, { 667845, 667954 },
                                { 679175, 679280 }, { 679175, 679280 },
                                { 679175, 679280 }, { 680427, 680540 },
                                { 680427, 680540 }, { 680427, 680540 },
                                { 684144, 684293 }, { 684144, 684293 },
                                { 684144, 684293 }, { 724903, 724985 },
                                { 724903, 724985 }, { 727099, 727325 },
                                { 727099, 727325 }, { 732544, 732821 },
                                { 732544, 732821 }, { 750016, 750280 },
                                { 750016, 750280 }, { 769508, 769734 },
                                { 769508, 769734 } },
               ranges_out[] = { { 620432, 620536 }, { 620957, 621056 },
                                { 625234, 625253 }, { 625500, 625655 },
                                { 625533, 625655 }, { 627618, 627729 },
                                { 662083, 662194 }, { 663032, 663166 },
                                { 664782, 664906 }, { 665748, 665823 },
                                { 666825, 666881 }, { 667797, 667954 },
                                { 667845, 667954 }, { 679175, 679280 },
                                { 680427, 680540 }, { 684144, 684293 },
                                { 724903, 724985 }, { 727099, 727325 },
                                { 732544, 732821 }, { 750016, 750280 },
                                { 769508, 769734 }};
  GtUword counts[] = { 2, 2, 1, 1, 2, 3, 3, 3, 3, 3, 2, 1, 2, 3, 3, 3, 2,
                             2, 2, 2, 2 };
  GtArray *ranges, *tmp_ranges, *ctr;
  GtUword i;
  int had_err = 0;
  gt_error_check(err);

  gt_ensure(sizeof (ranges_out) / sizeof (ranges_out[0]) ==
                  sizeof (counts)     / sizeof (counts[0]));

  /* test gt_ranges_uniq() */
  ranges = gt_array_new(sizeof (GtRange));
  tmp_ranges = gt_array_new(sizeof (GtRange));
  for (i = 0;
       i < sizeof (ranges_in) / sizeof (ranges_in[0]) && !had_err;
       i++)
    gt_array_add(ranges, ranges_in[i]);
  gt_ranges_uniq(tmp_ranges, ranges);
  gt_ensure(gt_array_size(ranges) ==
                  sizeof (ranges_in) / sizeof (ranges_in[0]));
  gt_ensure(gt_array_size(tmp_ranges) ==
                  sizeof (ranges_out) / sizeof (ranges_out[0]));
  for (i = 0; i < gt_array_size(tmp_ranges) && !had_err; i++) {
    gt_ensure(ranges_out[i].start ==
                    (*(GtRange*) gt_array_get(tmp_ranges, i)).start);
    gt_ensure(ranges_out[i].end ==
                    (*(GtRange*) gt_array_get(tmp_ranges, i)).end);
  }

  /* test gt_ranges_uniq_in_place() */
  gt_array_reset(tmp_ranges);
  gt_array_add_array(tmp_ranges, ranges);
  gt_ranges_uniq_in_place(tmp_ranges);
  for (i = 0; i < gt_array_size(tmp_ranges) && !had_err; i++) {
    gt_ensure(ranges_out[i].start ==
                    (*(GtRange*) gt_array_get(tmp_ranges, i)).start);
    gt_ensure(ranges_out[i].end ==
                    (*(GtRange*) gt_array_get(tmp_ranges, i)).end);
  }

  /* test gt_ranges_uniq_count() */
  gt_array_reset(tmp_ranges);
  ctr = gt_ranges_uniq_count(tmp_ranges, ranges);
  gt_ensure(gt_array_size(tmp_ranges) == gt_array_size(ctr));
  gt_ensure(
            gt_array_size(ctr) == sizeof (counts) / sizeof (counts[0]));
  for (i = 0; i < gt_array_size(ctr) && !had_err; i++) {
    gt_ensure(counts[i] == *(GtUword*) gt_array_get(ctr, i));
    gt_ensure(ranges_out[i].start ==
                    (*(GtRange*) gt_array_get(tmp_ranges, i)).start);
    gt_ensure(ranges_out[i].end ==
                    (*(GtRange*) gt_array_get(tmp_ranges, i)).end);
  }
  gt_array_delete(ctr);

  /* test gt_ranges_uniq_in_place_count() */
  ctr = gt_ranges_uniq_in_place_count(ranges);
  gt_ensure(gt_array_size(ranges) == gt_array_size(ctr));
  gt_ensure(
            gt_array_size(ctr) == sizeof (counts) / sizeof (counts[0]));
  for (i = 0; i < gt_array_size(ctr) && !had_err; i++) {
    gt_ensure(counts[i] == *(GtUword*) gt_array_get(ctr, i));
    gt_ensure(
           ranges_out[i].start == (*(GtRange*)
                                             gt_array_get(ranges, i)).start);
    gt_ensure(
           ranges_out[i].end == (*(GtRange*) gt_array_get(ranges, i)).end);
  }
  gt_array_delete(ctr);

  /* test gt_range_reorder() */
  if (!had_err) {
    GtRange range = { 1, 100 };
    range = gt_range_reorder(range);
    gt_ensure(range.start == 1 && range.end == 100);
    range.start = 100;
    range.end = 1;
    range = gt_range_reorder(range);
    gt_ensure(range.start == 1 && range.end == 100);
  }

  /* free */
  gt_array_delete(ranges);
  gt_array_delete(tmp_ranges);
  return had_err;
}
Пример #8
0
static void ensure_eop_of_len_1_before_introns(GtArray *editoperations)
{
  Editoperation eop, *eopptr;
  Eoptype eoptype;
  unsigned long eoplength;
  GtArray *backup;
  bool processing_necessary = false,
       split_match          = false;

  /* check if processing is necessary
     the check is rather simple, it might be possible that
     ``processing_necessary'' is set to ``true'' whereas in fact no processing
     is necessary */
  for (eopptr = gt_array_get_space(editoperations);
       eopptr < (Editoperation*) gt_array_get_space(editoperations) +
                                 gt_array_size(editoperations) - 1;
       eopptr++) {
    if ((eoptype = gt_editoperation_type(*eopptr, true)) ==
        EOP_TYPE_INTRON_WITH_1_BASE_LEFT ||
        eoptype == EOP_TYPE_INTRON_WITH_2_BASES_LEFT) {
      processing_necessary = true;
      break;
    }
  }

  if (processing_necessary) {
    /* init backup for the editoperations */
    backup = gt_array_new(sizeof (Editoperation));

    /* fill backup */
    gt_array_add_array(backup, editoperations);

    /* reset the original edit operations */
    gt_array_set_size(editoperations, 0);

    /* process the backup and fill the original editoperations */
    for (eopptr = gt_array_get_space(backup);
         eopptr < (Editoperation*)
                  gt_array_get_space(backup) + gt_array_size(backup);
         eopptr++) {

      if ((eoptype = gt_editoperation_length(*eopptr, true)) ==
          EOP_TYPE_INTRON_WITH_1_BASE_LEFT ||
          eoptype == EOP_TYPE_INTRON_WITH_2_BASES_LEFT) {
        split_match = true;
      }
      else if (split_match) {
        if (eoptype == EOP_TYPE_MATCH) {
          split_match = false;
          if ((eoplength = gt_editoperation_length(*eopptr, true)) > 1) {
            eop = 1;
            gt_array_add(editoperations, eop);
            eop = eoplength - 1;
            gt_array_add(editoperations, eop);
            continue;
          }
        }
        else if (eoptype == EOP_TYPE_MISMATCH ||
                 eoptype == EOP_TYPE_MISMATCH_WITH_1_GAP) {
          split_match = false;
        }
      }
      gt_array_add(editoperations, *eopptr);
    }

    /* free backup */
    gt_array_delete(backup);
  }
}
Пример #9
0
static double gaeval_visitor_calculate_integrity(AgnGaevalVisitor *v,
                                                 GtFeatureNode *genemodel,
                                                 double coverage,
                                                 double *components,
                                                 GtError *error)
{
  agn_assert(v && genemodel);

  GtStr *seqid = gt_genome_node_get_seqid((GtGenomeNode *)genemodel);
  GtRange mrna_range = gt_genome_node_get_range((GtGenomeNode *)genemodel);
  GtArray *overlapping = gt_array_new( sizeof(GtFeatureNode *) );
  bool hasseqid;
  gt_feature_index_has_seqid(v->alignments, &hasseqid, gt_str_get(seqid),error);
  if(hasseqid)
  {
    gt_feature_index_get_features_for_range(v->alignments, overlapping,
                                            gt_str_get(seqid), &mrna_range,
                                            error);
  }

  GtArray *gaps = gt_array_new( sizeof(GtFeatureNode *) );
  while(gt_array_size(overlapping) > 0)
  {
    GtFeatureNode *alignment = *(GtFeatureNode **)gt_array_pop(overlapping);
    GtArray *agaps = agn_typecheck_select(alignment,
                                          gaeval_visitor_typecheck_gap);
    gt_array_add_array(gaps, agaps);
    gt_array_delete(agaps);
  }
  gt_array_delete(overlapping);

  GtUword utr5p_len = agn_mrna_5putr_length(genemodel);
  double utr5p_score = 0.0;
  if(utr5p_len >= v->params.exp_5putr_len)
    utr5p_score = 1.0;
  else
    utr5p_score = (double)utr5p_len / (double)v->params.exp_5putr_len;

  GtUword utr3p_len = agn_mrna_3putr_length(genemodel);
  double utr3p_score = 0.0;
  if(utr3p_len >= v->params.exp_3putr_len)
    utr3p_score = 1.0;
  else
    utr3p_score = (double)utr3p_len / (double)v->params.exp_3putr_len;

  GtArray *introns = agn_typecheck_select(genemodel, agn_typecheck_intron);
  GtUword exoncount = agn_typecheck_count(genemodel, agn_typecheck_exon);
  agn_assert(gt_array_size(introns) == exoncount - 1);
  double structure_score = 0.0;
  if(gt_array_size(introns) == 0)
  {
    GtUword cdslen = agn_mrna_cds_length(genemodel);
    if(cdslen >= v->params.exp_cds_len)
      structure_score = 1.0;
    else
      structure_score = (double)cdslen / (double)v->params.exp_cds_len;
  }
  else
  {
    structure_score = gaeval_visitor_introns_confirmed(introns, gaps);
  }
  gt_array_delete(gaps);
  gt_array_delete(introns);

  double integrity = (v->params.alpha   * structure_score) +
                     (v->params.beta    * coverage)        +
                     (v->params.gamma   * utr5p_score)     +
                     (v->params.epsilon * utr3p_score);
  if(components != NULL)
  {
    components[0] = structure_score;
    components[1] = coverage;
    components[2] = utr5p_score;
    components[3] = utr3p_score;
  }

  return integrity;
}