Ejemplo n.º 1
0
int gt_clustered_set_union_find_unit_test(GtError *err)
{
  int had_err = 0, i = 0, j = 0;
  GtClusteredSet *cs = NULL;
  cs = gt_clustered_set_union_find_new(1, err);
  gt_ensure(
            gt_clustered_set_union_find_num_of_elements(cs, err) ==
            gt_clustered_set_union_find_cluster_num(cs, 0, err));
  gt_ensure(
            gt_clustered_set_union_find_num_of_clusters(cs, err) == 0);
  gt_clustered_set_union_find_delete(cs, err);

  if (!had_err) {
    cs = gt_clustered_set_union_find_new(2, err);
    gt_clustered_set_union_find_merge_clusters(cs, 0, 1, err);

    gt_ensure(
              gt_clustered_set_union_find_num_of_clusters(cs, err) == 1);
    gt_ensure(
              gt_clustered_set_union_find_cluster_num(cs, 0, err) ==
              gt_clustered_set_union_find_cluster_num(cs, 1, err));
    gt_ensure(
              gt_clustered_set_union_find_num_of_clusters(cs, err) == 1);

    gt_clustered_set_union_find_delete(cs, err);
  }

  if (!had_err) {
    cs = gt_clustered_set_union_find_new(3, err);
    gt_clustered_set_union_find_merge_clusters(cs, 0, 1, err);
    gt_clustered_set_union_find_merge_clusters(cs, 1, 2, err);

    gt_ensure(
               gt_clustered_set_union_find_num_of_clusters(cs, err) == 1);
    gt_ensure(
               gt_clustered_set_union_find_cluster_num(cs, 0, err) ==
               gt_clustered_set_union_find_cluster_num(cs, 1, err));

    gt_ensure(
               gt_clustered_set_union_find_cluster_num(cs, 0, err) ==
               gt_clustered_set_union_find_cluster_num(cs, 2, err));

    gt_ensure(
              gt_clustered_set_union_find_cluster_num(cs, 1, err) ==
              gt_clustered_set_union_find_cluster_num(cs, 2, err));

    gt_clustered_set_union_find_delete(cs, err);
  }

  if (!had_err) {
    cs = gt_clustered_set_union_find_new(4, err);
    gt_clustered_set_union_find_merge_clusters(cs, 0, 1, err);
    gt_clustered_set_union_find_merge_clusters(cs, 2, 3, err);

    gt_ensure(
              gt_clustered_set_union_find_num_of_clusters(cs, err) == 2);

    gt_clustered_set_union_find_merge_clusters(cs, 0, 2, err);

    for (i = 0; i < 4 - 1; i++) {
      for (j = i + 1; j < 4; j++) {
        gt_ensure(
                  gt_clustered_set_union_find_cluster_num(cs, i, err) ==
                  gt_clustered_set_union_find_cluster_num(cs, j, err));
      }
    }
    gt_clustered_set_union_find_delete(cs, err);
  }

  if (!had_err) {
    cs = gt_clustered_set_union_find_new(CLUSTERED_SET_UNION_FIND_TEST_SIZE,
                                         err);

    for (i = 0; !had_err && i < CLUSTERED_SET_UNION_FIND_TEST_SIZE; i++) {
      gt_ensure(
                gt_clustered_set_union_find_num_of_elements(cs, err) ==
                gt_clustered_set_union_find_cluster_num(cs, i, err));
    }
    for (i = 1;!had_err && i < CLUSTERED_SET_UNION_FIND_TEST_SIZE; i++) {
      gt_clustered_set_union_find_merge_clusters(cs, 0, i, err);
    }

    for (i = 0; !had_err && i < CLUSTERED_SET_UNION_FIND_TEST_SIZE; i++) {
      gt_ensure(
                gt_clustered_set_union_find_cluster_num(cs, i, err) == 0);
    }
    gt_clustered_set_union_find_delete(cs, err);
  }

  return had_err;
}
Ejemplo n.º 2
0
static int process_feature(GtLTRClusterStream *lcs,
                           const char *feature,
                           GtError *err)
{
  GtArray *matches;
  GtMatchIterator *mi = NULL;
  GtMatch *match = NULL;
  GtMatchIteratorStatus status;
  GtEncseq *encseq;
  unsigned long i;
  int had_err = 0;

  if (lcs->current_state != NULL) {
    char tmp[BUFSIZ];
    gt_free(*lcs->current_state);
    (void) snprintf(tmp, BUFSIZ, "Clustering feature: %s", feature);
    *lcs->current_state = gt_cstr_dup(tmp);
  }
  matches = gt_array_new(sizeof(GtMatch*));
  encseq = (GtEncseq*) gt_hashmap_get(lcs->feat_to_encseq, feature);
  gt_log_log("found encseq %p for feature %s", encseq, feature);
  if (!had_err) {
    mi = gt_match_iterator_last_new(encseq, encseq, lcs->match_score,
                                        lcs->mismatch_cost,
                                        lcs->gap_open_cost,
                                        lcs->gap_ext_cost,
                                        lcs->xdrop,
                                        lcs->ydrop,
                                        lcs->zdrop,
                                        lcs->k,
                                        lcs->mscoregapped,
                                        lcs->mscoregapless, err);
    if (mi != NULL) {
      while ((status = gt_match_iterator_next(mi, &match, err))
             != GT_MATCHER_STATUS_END) {
        if (status == GT_MATCHER_STATUS_OK) {
          gt_array_add(matches, match);
        } else {
          gt_assert(status == GT_MATCHER_STATUS_ERROR);
          had_err = -1;
          break;
        }
      }
    } else
      had_err = -1;
  }
  if (!had_err) {
    GtClusteredSet *cs;
    GtHashmap *seqdesc2seqnum;
    GtMatch *tmp_match;
    const char *description;
    char *output;
    unsigned long desclen,
                  num_of_seq;

    seqdesc2seqnum = gt_hashmap_new(GT_HASH_STRING, free_hash, NULL);
    num_of_seq = gt_encseq_num_of_sequences(encseq);
    for (i = 0; i < num_of_seq; i++) {
      description = gt_encseq_description(encseq, &desclen, i);
      output = gt_calloc((size_t) (desclen + 1), sizeof (char));
      strncpy(output, description, (size_t) desclen);
      output[desclen] = '\0';
      gt_hashmap_add(seqdesc2seqnum, (void*) gt_cstr_dup(output),
                     (void*) (i + 1));
      gt_free(output);
    }
    cs = gt_clustered_set_union_find_new(num_of_seq, err);
    if (cs != NULL) {
      if (cluster_sequences(matches, cs, seqdesc2seqnum, (unsigned) lcs->psmall,
                            (unsigned) lcs->plarge, encseq, err) != 0) {
        had_err = -1;
      }
      if (!had_err) {
        (void) cluster_annotate_nodes(cs, encseq, feature, lcs->nodes, err);
      }
    } else
      had_err = -1;

    for (i = 0; i < gt_array_size(matches); i++) {
      tmp_match = *(GtMatch**) gt_array_get(matches, i);
      gt_match_delete(tmp_match);
    }
    gt_array_delete(matches);
    matches = NULL;
    gt_hashmap_delete(seqdesc2seqnum);
    gt_clustered_set_delete(cs, err);
  }
  gt_match_iterator_delete(mi);

  return had_err;
}