コード例 #1
0
static int gt_pbs_hit_compare(const void *h1, const void *h2)
{
  GtPBSHit *hp1 = *(GtPBSHit**) h1;
  GtPBSHit *hp2 = *(GtPBSHit**) h2;

  return (gt_double_compare(hp2->score, hp1->score));
}
コード例 #2
0
static int gt_ltrdigest_pdom_visitor_process_hit(GT_UNUSED void *key, void *val,
                                                 void *data,
                                                 GT_UNUSED GtError *err)
{
  GtHMMERModelHit *mh = (GtHMMERModelHit*) val;
  GtLTRdigestPdomVisitor *lv = (GtLTRdigestPdomVisitor*) data;
  const char *mdl = (const char*) key;
  GtArray *hits = NULL;
  GtUword nof_hits;
  GtFragment *frags;

  if (gt_double_compare(mh->best_fwd, mh->best_rev) <= 0)
    hits = mh->fwd_hits;
  else
    hits = mh->rev_hits;
  gt_assert(hits);
  nof_hits = gt_array_size(hits);
  if (nof_hits == 0) return 0;

  if (nof_hits > 1UL) {
    GtUword i, chainno;
    frags = gt_malloc((size_t) nof_hits * sizeof (GtFragment));
    for (i = 0; i < nof_hits; i++) {
      GtHMMERSingleHit *h = *(GtHMMERSingleHit**) gt_array_get(hits, i);
      gt_assert(h);
      frags[i].startpos1 = h->hmmfrom;
      frags[i].endpos1   = h->hmmto;
      frags[i].startpos2 = h->alifrom;
      frags[i].endpos2   = h->alito;
      frags[i].weight    = (GtWord) (h->alito - h->alifrom + 1) * h->score;
      frags[i].data      = h;
    }
    qsort(frags, (size_t) nof_hits, sizeof (GtFragment),
          gt_ltrdigest_pdom_visitor_fragcmp);
    gt_log_log("%s: chaining "GT_WU" frags", mdl, nof_hits);
    gt_globalchaining_max(frags, nof_hits,
                         (GtUword) lv->chain_max_gap_length,
                         gt_ltrdigest_pdom_visitor_chainproc, &chainno);
    gt_free(frags);
    for (i = 0; i < nof_hits; i++) {
      GtHMMERSingleHit *h = *(GtHMMERSingleHit**) gt_array_get(hits, i);
      (void) gt_ltrdigest_pdom_visitor_attach_hit(lv, mh, h);
    }
  } else {
    GtUword chainno = 0UL;
    GtHMMERSingleHit *h = *(GtHMMERSingleHit**) gt_array_get(hits, 0);
    gt_array_add(h->chains, chainno);
    (void) gt_ltrdigest_pdom_visitor_attach_hit(lv, mh, h);
  }

  return 0;
}
コード例 #3
0
ファイル: mathsupport.c プロジェクト: 9beckert/TIR
int gt_mathsupport_unit_test(GtError *err)
{
  int had_err = 0;
  double less_than_epsilon = 0.0000000000000001;
  gt_error_check(err);

  gt_ensure(had_err, !gt_double_equals_one(1.1));
  gt_ensure(had_err, gt_double_equals_one(1));
  gt_ensure(had_err, gt_double_equals_one(1+less_than_epsilon));
  gt_ensure(had_err, !gt_double_equals_one(-1-less_than_epsilon));

  gt_ensure(had_err, !gt_double_equals_double(1.0, 2.0));
  gt_ensure(had_err, !gt_double_equals_double(-1.0, 1.0));
  gt_ensure(had_err, !gt_double_equals_double(1.0, -1.0));
  gt_ensure(had_err, !gt_double_equals_double(-1.0, 1+less_than_epsilon));
  gt_ensure(had_err, !gt_double_equals_double(1.0, 1.1));
  gt_ensure(had_err, gt_double_equals_double(1.0, 1+less_than_epsilon));
  gt_ensure(had_err, gt_double_equals_double(1.0, 1.0));
  gt_ensure(had_err, gt_double_equals_double(0.0, 0.0));
  gt_ensure(had_err, gt_double_equals_double(-1.0, -1.0));
  gt_ensure(had_err, gt_double_equals_double(-1.0+less_than_epsilon, -1.0));
  gt_ensure(had_err, gt_double_equals_double(-1.0, -1.0+less_than_epsilon));
  gt_ensure(had_err, gt_double_equals_double(1.0+less_than_epsilon, 1.0));
  gt_ensure(had_err, gt_double_equals_double(1.0, 1.0+less_than_epsilon));

  gt_ensure(had_err, gt_double_compare(1.0, 1.0) == 0);
  gt_ensure(had_err, gt_double_compare(1.0, 1.1) < 0);
  gt_ensure(had_err, gt_double_compare(1.1, 1.0) > 0);
  gt_ensure(had_err, gt_double_compare(1.1, -1.0) > 0);
  gt_ensure(had_err, gt_double_compare(-1.1, -1.0) < 0);
  gt_ensure(had_err, gt_double_compare(1+less_than_epsilon, 1.0) == 0);
  gt_ensure(had_err, gt_double_compare(1+less_than_epsilon, -1.0) > 0);
  gt_ensure(had_err, gt_double_compare(-1+less_than_epsilon, -1.0) == 0);
  gt_ensure(had_err, gt_double_compare(-1+less_than_epsilon, 1.0) < 0);

  gt_ensure(had_err, gt_double_smaller_double(1.0, 1.1));
  gt_ensure(had_err, gt_double_smaller_double(-1.0, 1.1));
  gt_ensure(had_err, gt_double_smaller_double(-1.1, -1.0));
  gt_ensure(had_err, !gt_double_smaller_double(-1.0, -1.1));
  gt_ensure(had_err, !gt_double_smaller_double(1.0-less_than_epsilon, 1.0));

  return had_err;
}
コード例 #4
0
static void gt_hmmer_parse_status_add_hit(GtHMMERParseStatus *s,
                                          GtHMMERSingleHit *hit)
{
  GtHMMERModelHit *mh;
  gt_assert(s);
  if (!(mh = gt_hashmap_get(s->models, gt_str_get(s->cur_model)))) {
    mh = gt_calloc((size_t) 1, sizeof (*mh));
    mh->fwd_hits = gt_array_new(sizeof (GtHMMERSingleHit*));
    mh->rev_hits = gt_array_new(sizeof (GtHMMERSingleHit*));
    mh->best_rev = mh->best_fwd = DBL_MAX;
    mh->modelname = gt_cstr_dup(gt_str_get(s->cur_model));
    gt_hashmap_add(s->models, mh->modelname, mh);
  }
  gt_assert(mh && mh->fwd_hits &&mh->rev_hits);
  if (hit->strand == GT_STRAND_FORWARD) {
    if (gt_double_compare(mh->best_fwd, hit->evalue) > 0)
      mh->best_fwd = hit->evalue;
    gt_array_add(mh->fwd_hits, hit);
  } else {
    if (gt_double_compare(mh->best_rev, hit->evalue) > 0)
      mh->best_rev = hit->evalue;
    gt_array_add(mh->rev_hits, hit);
  }
}
コード例 #5
0
int gt_pbs_unit_test(GtError *err)
{
  int had_err = 0;
  GtLTRElement element;
  GtPBSOptions o;
  GtStr *tmpfilename;
  FILE *tmpfp;
  GtPBSResults *res;
  GtPBSHit *hit;
  double score1, score2;
  GtRange rng;
  char *rev_seq,
       *seq,
       tmp[BUFSIZ];
  const char *fullseq =                           "aaaaaaaaaaaaaaaaaaaa"
                    "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa"
                    "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa"
                    "acatactaggatgctag" /* <- PBS forward */
                                     "aatatagtttcgaatatagcactgcatttcgaa"
                    "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa"
                    "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa"
                    "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa"
                    "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa"
                    "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa"
                    "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa"
                    "tatagcactgcatttcgaatatagtttcgaatatag"
                                   /* PBS reverse -> */ "gatcctaaggctac"
                    "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa"
                    "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa"
                    "aaaaaaaaaaaaaaaaaaaa";

  /* notice previous errors */
  gt_error_check(err);

  /* create temporary tRNA library file */
  tmpfilename = gt_str_new();
  tmpfp = gt_xtmpfp(tmpfilename);
  fprintf(tmpfp, ">test1\nccccccccccccccctagcatcctagtatgtccc\n"
                 ">test2\ncccccccccgatcctagggctaccctttc\n");
  gt_fa_xfclose(tmpfp);
  ensure(had_err, gt_file_exists(gt_str_get(tmpfilename)));

  /* setup testing parameters */
  o.radius = 30;
  o.max_edist = 1;
  o.alilen.start = 11;
  o.alilen.end = 30;
  o.offsetlen.start = 0;
  o.offsetlen.end = 5;
  o.trnaoffsetlen.start = 0;
  o.trnaoffsetlen.end =  40;
  o.ali_score_match = 5;
  o.ali_score_mismatch = -10;
  o.ali_score_insertion = o.ali_score_deletion = -20;
  o.trna_lib = gt_bioseq_new(gt_str_get(tmpfilename), err);
  ensure(had_err, gt_bioseq_number_of_sequences(o.trna_lib) == 2);

  element.leftLTR_5 = 20;
  element.leftLTR_3 = 119;
  element.rightLTR_5 = 520;
  element.rightLTR_3 = 619;

  /* setup sequences */
  seq     = gt_malloc(600 * sizeof (char));
  rev_seq = gt_malloc(600 * sizeof (char));
  memcpy(seq,     fullseq + 20, 600);
  memcpy(rev_seq, fullseq + 20, 600);
  gt_reverse_complement(rev_seq, 600, err);

  /* try to find PBS in sequences */
  res = gt_pbs_find(seq, rev_seq, &element, &o, err);
  ensure(had_err, res != NULL);
  ensure(had_err, gt_pbs_results_get_number_of_hits(res) == 2);

  /* check first hit on forward strand */
  hit = gt_pbs_results_get_ranked_hit(res, 0);
  ensure(had_err, hit != NULL);
  ensure(had_err, gt_pbs_hit_get_alignment_length(hit) == 17);
  ensure(had_err, gt_pbs_hit_get_edist(hit) == 0);
  ensure(had_err, gt_pbs_hit_get_offset(hit) == 0);
  ensure(had_err, gt_pbs_hit_get_tstart(hit) == 3);
  ensure(had_err, strcmp(gt_pbs_hit_get_trna(hit), "test1") == 0);
  rng = gt_pbs_hit_get_coords(hit);
  ensure(had_err, rng.start == 120);
  ensure(had_err, rng.end == 136);
  score1 = gt_pbs_hit_get_score(hit);
  ensure(had_err, gt_pbs_hit_get_strand(hit) == GT_STRAND_FORWARD);
  memset(tmp, 0, BUFSIZ-1);
  memcpy(tmp, fullseq + (rng.start * sizeof (char)),
         (rng.end - rng.start + 1) * sizeof (char));
  ensure(had_err, strcmp(tmp, "acatactaggatgctag" ) == 0);

  /* check second hit on reverse strand */
  hit = gt_pbs_results_get_ranked_hit(res, 1);
  ensure(had_err, hit != NULL);
  ensure(had_err, gt_pbs_hit_get_alignment_length(hit) == 14);
  ensure(had_err, gt_pbs_hit_get_edist(hit) == 1);
  ensure(had_err, gt_pbs_hit_get_offset(hit) == 0);
  ensure(had_err, gt_pbs_hit_get_tstart(hit) == 6);
  ensure(had_err, strcmp(gt_pbs_hit_get_trna(hit), "test2") == 0);
  rng = gt_pbs_hit_get_coords(hit);
  ensure(had_err, rng.start == 506);
  ensure(had_err, rng.end == 519);
  score2 = gt_pbs_hit_get_score(hit);
  ensure(had_err, gt_double_compare(score1, score2) > 0);
  ensure(had_err, gt_pbs_hit_get_strand(hit) == GT_STRAND_REVERSE);
  memset(tmp, 0, BUFSIZ-1);
  memcpy(tmp, fullseq + (rng.start * sizeof (char)),
         (rng.end - rng.start + 1) * sizeof (char));
  ensure(had_err, strcmp(tmp, "gatcctaaggctac" ) == 0);

  /* clean up */
  gt_xremove(gt_str_get(tmpfilename));
  ensure(had_err, !gt_file_exists(gt_str_get(tmpfilename)));
  gt_str_delete(tmpfilename);
  gt_bioseq_delete(o.trna_lib);
  gt_free(rev_seq);
  gt_free(seq);
  gt_pbs_results_delete(res);

  return had_err;
}
コード例 #6
0
static int gt_ltrdigest_pdom_visitor_choose_strand(GtLTRdigestPdomVisitor *lv)
{
  int had_err = 0;
  double log_eval_fwd = 0.0,
         log_eval_rev = 0.0;
  GtFeatureNodeIterator *fni;
  GtStrand strand;
  double score;
  bool seen_fwd = false,
       seen_rev = false;
  GtFeatureNode *curnode = NULL;
  GtUword i;
  GtArray *to_delete;

  fni = gt_feature_node_iterator_new(lv->ltr_retrotrans);
  while (!had_err && (curnode = gt_feature_node_iterator_next(fni))) {
    if (strcmp(gt_feature_node_get_type(curnode),
               gt_ft_protein_match) == 0) {
      strand = gt_feature_node_get_strand(curnode);
      score = (double) gt_feature_node_get_score(curnode);
      if (strand == GT_STRAND_FORWARD) {
        log_eval_fwd += log(score);
        seen_fwd = true;
      } else if (strand == GT_STRAND_REVERSE) {
        log_eval_rev += log(score);
        seen_rev = true;
      }
    }
  }
  gt_feature_node_iterator_delete(fni);

  if (seen_rev && !seen_fwd)
    gt_feature_node_set_strand(lv->ltr_retrotrans, GT_STRAND_REVERSE);
  else if (!seen_rev && seen_fwd)
    gt_feature_node_set_strand(lv->ltr_retrotrans, GT_STRAND_FORWARD);
  else if (!seen_rev && !seen_fwd)
    return had_err;
  else {
    gt_assert(seen_rev && seen_fwd);
    if (gt_double_compare(log_eval_fwd, log_eval_rev) < 0)
      strand = GT_STRAND_FORWARD;
    else
      strand = GT_STRAND_REVERSE;
    gt_feature_node_set_strand(lv->ltr_retrotrans, strand);

    to_delete = gt_array_new(sizeof (GtFeatureNode*));
    fni = gt_feature_node_iterator_new(lv->ltr_retrotrans);
    while (!had_err && (curnode = gt_feature_node_iterator_next(fni))) {
      if (strcmp(gt_feature_node_get_type(curnode),
                 gt_ft_protein_match) == 0) {
        if (strand != gt_feature_node_get_strand(curnode)) {
          gt_array_add(to_delete, curnode);
        }
      }
    }
    gt_feature_node_iterator_delete(fni);
    gt_assert(gt_array_size(to_delete) > 0);
    for (i = 0; i < gt_array_size(to_delete); i++) {
      gt_feature_node_remove_leaf(lv->ltr_retrotrans,
                                  *(GtFeatureNode**) gt_array_get(to_delete,
                                                                  i));
    }
    gt_array_delete(to_delete);
  }
  return had_err;
}
コード例 #7
0
ファイル: mg_computepath.c プロジェクト: 9beckert/TIR
int mg_computepath(CombinedScoreMatrixEntry **combinedscore_matrix,
                   HitInformation *hit_information,
                   unsigned long rows,
                   unsigned long contig_len,
                   ParseStruct *parsestruct_ptr, GtError * err)
{
  int had_err = 0;

  /* Initialisieren der Matrix fuer die Pfadberechnung */
  PathMatrixEntry **path_matrix;

  /* i: Zaehlvariable fuer die Matrix-Zeilen; k: Zaehlvariable Precursors
     (von 0 bis max 2) maxpath_frame: Speichern des vorherigen Frames von
     dem der max-Wert berechnet wird */
  unsigned short row_index = 0,
    precursor_index = 0,
    precursors_row = 0,
    maxpath_frame = 0;

  /* Position in der Query-DNA */
  unsigned long column_index = 0;

  /* Variablen fuer den aktuellen Frame, den vorherigen Frame(speichert
     einen Wert aus precursors[], die Zeile des vorherigen Frames, GtArray
     mit den Precursors-Frames */
  short current_frame = 0,
    precursors_frame = 0,
    precursors[NUM_PRECURSORS];

  /* q ist der Wert, der bei Aus- oder Eintreten in ein Gen auf dem
     Forward- bzw. Reverse-Strang berechnet wird */
  double q = ARGUMENTSSTRUCT(leavegene_value),
    max_new = 1,
    max_old = 1;

  /* Speicherreservierung fuer die Path-Matrix - Groesse entsprechend der
     CombinedScore-Matrix */
  gt_array2dim_calloc(path_matrix, 7, contig_len);

  gt_error_check(err);

  /* fuer die erste Spalte der Path-Matrix wird die erste Spalte der
     CombinedScore-Matrix uebernommen */
  for (row_index = 0; row_index < rows; row_index++)
  {
    path_matrix[row_index][0].score =
      combinedscore_matrix[row_index][0].matrix_score;
    path_matrix[row_index][0].path_frame = row_index;
  }

  /* Spaltenweise Berechnung des opt. Pfades */
  for (column_index = 1; column_index < contig_len; column_index++)
  {
    for (row_index = 0; row_index < rows; row_index++)
    {
      /* Zaehlvariable fuer die Zeile wird umgerechnet in den entsprechenden
         Leserahmen */
      current_frame = get_current_frame(row_index);
      /* Aufruf der Methode zum Berechnen der moeglichen Leserahmen anhand von
         aktuellem Leserahmen und der Query-DNA-Sequenz */
      compute_precursors(current_frame,
                         column_index,
                         precursors);

      /* der max-Wert der moeglichen Vorgaenger wird berechnet */
      for (precursor_index = 0;
           precursor_index < NUM_PRECURSORS
             && (precursors[precursor_index] != UNDEFINED);
           ++precursor_index)
      {
        /* aktueller Vorgaengerleserahmen - es gibt max. 3 moegliche
           Vorgaenger */
        precursors_frame = precursors[precursor_index];
        /* Vorgaengerleserahmen wird umgerechnet in die entsprechende
           Matrix-Zeile */
        precursors_row = get_matrix_row(precursors_frame);

        /* der DP-Algo umfasst 3 moegliche Faelle
           1. Fall: Wechsel vom Reversen- auf den Forward-Strang bzw.
           umgekehrt */
        if ((current_frame < 0 && precursors_frame > 0) ||
            (current_frame > 0 && precursors_frame < 0))
        {
            max_new = path_matrix[precursors_row][column_index-1].score +
                      combinedscore_matrix[row_index][column_index].matrix_score
                      + 2*q;
        }
        /* 2. Fall: Einfacher Wechsel des Leserahmens, also von + zu +
           bzw.- zu - */
        else if (current_frame != 0 && precursors_frame != current_frame)
        {
            max_new = path_matrix[precursors_row][column_index-1].score +
                      combinedscore_matrix[row_index][column_index].matrix_score
                      + q;
        }
        /* 3. Fall: Leserahmen wird beibehalten bzw. Wechsel von kodierend zu
           nicht-kodierend oder umgekehrt */
        else
        {
            max_new = path_matrix[precursors_row][column_index-1].score +
                      combinedscore_matrix[row_index][column_index]
                      .matrix_score;
        }

        /* Bestimmen des Max-Wertes der max. 3 Moeglichkeiten und Speichern der
           Zeile, von der der Max-Wert stammt */
        if (gt_double_compare(max_new, max_old) > 0)
        {
            max_old = max_new;
            maxpath_frame = precursors_row;
        }
      }

      /* Speichern des Max-Wertes und der "Vorgaenger"-Zeile;
         zuruecksetzen der Variablen */
      path_matrix[row_index][column_index].score      = max_old;
      path_matrix[row_index][column_index].path_frame = maxpath_frame;

      max_new = DBL_MIN;
      max_old = DBL_MIN;
      maxpath_frame = 0;
    }
  }

  /* Aufruf der Methode zur Genvorhersage */
  had_err = mg_compute_gene_prediction(combinedscore_matrix,
                                       path_matrix,
                                       contig_len,
                                       hit_information,
                                       parsestruct_ptr, err);

  gt_array2dim_delete(path_matrix);

  return had_err;
}
コード例 #8
0
ファイル: mathsupport.c プロジェクト: 9beckert/TIR
bool gt_double_larger_double(double d1, double d2)
{
  return gt_double_compare(d1, d2) > 0;
}
コード例 #9
0
ファイル: mathsupport.c プロジェクト: 9beckert/TIR
bool gt_double_smaller_double(double d1, double d2)
{
  return gt_double_compare(d1, d2) < 0;
}