Example #1
0
int gth_match_processor(GthMatchProcessorInfo *info, GthSeqCon *gen_seq_con,
                        GthSeqCon *ref_seq_con, GthMatch *match)
{
  if (info->matchnumcounter) {
    info->matchnumcounter[match->Storeseqnumreference]++;

    if (info->maxnumofmatches > 0 &&
        info->matchnumcounter[match->Storeseqnumreference] >
        info->maxnumofmatches) {
      /* discard matchA */
      return 0;
    }
  }

  if (!(info->refseqisindex && !info->online) &&
      match->Storeseqnumreference != info->lastrefseqnum &&
      gt_array_size(info->matches)) {
    gt_assert(info->chain_collection && info->chaining_info);

    /* chain all current matches */
    calc_chains_from_matches(info->chain_collection, info->matches,
                             info->chaining_info, gen_seq_con, ref_seq_con,
                             info->rare, info->fragweightfactor,
                             info->jump_table_new, info->jump_table_new_reverse,
                             info->jump_table_delete);

    /* and remove them afterwards */
    gt_array_reset(info->matches);
  }

  /*...only if it does not equal the last one */
  if (gt_array_size(info->matches) &&
      gth_matches_are_equal(gt_array_get_last(info->matches), match)) {
    return 0;
  }
  gt_array_add_elem(info->matches, match, sizeof *match);

  /* update last reference sequence number */
  info->lastrefseqnum = match->Storeseqnumreference;

  return 0;
}
static int add_auto_sr_to_queue(GT_UNUSED void *key, void *value, void *data,
                                GT_UNUSED GtError *err)
{
  AutomaticSequenceRegion *auto_sr = value;
  GtQueue *genome_nodes = data;
  GtGenomeNode *gf;
  unsigned int i;
  gt_error_check(err);
  gt_assert(key && value && data);
  if (gt_array_size(auto_sr->feature_nodes)) {
    gt_queue_add(genome_nodes, auto_sr->sequence_region);
    auto_sr->sequence_region = NULL;
    for (i = 0; i < gt_array_size(auto_sr->feature_nodes); i++) {
      gf = *(GtGenomeNode**) gt_array_get(auto_sr->feature_nodes, i);
      gt_queue_add(genome_nodes, gf);
    }
    gt_array_reset(auto_sr->feature_nodes);
  }
  return 0;
}
Example #3
0
static int gt_load_stream_next(GtNodeStream *ns, GtGenomeNode **gn,
                               GtError *err)
{
  GtLoadStream *load_stream;
  GtGenomeNode *node, *eofn;
  int had_err = 0;
  gt_error_check(err);
  load_stream = gt_load_stream_cast(ns);

  if (!load_stream->full) {
    while (!(had_err = gt_node_stream_next(load_stream->in_stream, &node,
                                           err)) && node) {
      if ((eofn = gt_eof_node_try_cast(node)))
        gt_genome_node_delete(eofn); /* get rid of EOF nodes */
      else
        gt_array_add(load_stream->nodes, node);
    }
    if (!had_err) {
      load_stream->full = true;
    }
  }

  if (!had_err) {
    gt_assert(load_stream->full);
    if (load_stream->idx < gt_array_size(load_stream->nodes)) {
      *gn = *(GtGenomeNode**) gt_array_get(load_stream->nodes,
                                           load_stream->idx);
      load_stream->idx++;
      return 0;
    }
  }

  if (!had_err) {
    gt_array_reset(load_stream->nodes);
    *gn = NULL;
  }

  return had_err;
}
Example #4
0
int gt_range_unit_test(GtError *err)
{
  static GtRange ranges_in[] = {  { 620432, 620536 }, { 620432, 620536 },
                                { 620957, 621056 }, { 620957, 621056 },
                                { 625234, 625253 }, { 625500, 625655 },
                                { 625533, 625655 }, { 625533, 625655 },
                                { 627618, 627729 }, { 627618, 627729 },
                                { 627618, 627729 }, { 662083, 662194 },
                                { 662083, 662194 }, { 662083, 662194 },
                                { 663032, 663166 }, { 663032, 663166 },
                                { 663032, 663166 }, { 664782, 664906 },
                                { 664782, 664906 }, { 664782, 664906 },
                                { 665748, 665823 }, { 665748, 665823 },
                                { 665748, 665823 }, { 666825, 666881 },
                                { 666825, 666881 }, { 667797, 667954 },
                                { 667845, 667954 }, { 667845, 667954 },
                                { 679175, 679280 }, { 679175, 679280 },
                                { 679175, 679280 }, { 680427, 680540 },
                                { 680427, 680540 }, { 680427, 680540 },
                                { 684144, 684293 }, { 684144, 684293 },
                                { 684144, 684293 }, { 724903, 724985 },
                                { 724903, 724985 }, { 727099, 727325 },
                                { 727099, 727325 }, { 732544, 732821 },
                                { 732544, 732821 }, { 750016, 750280 },
                                { 750016, 750280 }, { 769508, 769734 },
                                { 769508, 769734 } },
               ranges_out[] = { { 620432, 620536 }, { 620957, 621056 },
                                { 625234, 625253 }, { 625500, 625655 },
                                { 625533, 625655 }, { 627618, 627729 },
                                { 662083, 662194 }, { 663032, 663166 },
                                { 664782, 664906 }, { 665748, 665823 },
                                { 666825, 666881 }, { 667797, 667954 },
                                { 667845, 667954 }, { 679175, 679280 },
                                { 680427, 680540 }, { 684144, 684293 },
                                { 724903, 724985 }, { 727099, 727325 },
                                { 732544, 732821 }, { 750016, 750280 },
                                { 769508, 769734 }};
  GtUword counts[] = { 2, 2, 1, 1, 2, 3, 3, 3, 3, 3, 2, 1, 2, 3, 3, 3, 2,
                             2, 2, 2, 2 };
  GtArray *ranges, *tmp_ranges, *ctr;
  GtUword i;
  int had_err = 0;
  gt_error_check(err);

  gt_ensure(sizeof (ranges_out) / sizeof (ranges_out[0]) ==
                  sizeof (counts)     / sizeof (counts[0]));

  /* test gt_ranges_uniq() */
  ranges = gt_array_new(sizeof (GtRange));
  tmp_ranges = gt_array_new(sizeof (GtRange));
  for (i = 0;
       i < sizeof (ranges_in) / sizeof (ranges_in[0]) && !had_err;
       i++)
    gt_array_add(ranges, ranges_in[i]);
  gt_ranges_uniq(tmp_ranges, ranges);
  gt_ensure(gt_array_size(ranges) ==
                  sizeof (ranges_in) / sizeof (ranges_in[0]));
  gt_ensure(gt_array_size(tmp_ranges) ==
                  sizeof (ranges_out) / sizeof (ranges_out[0]));
  for (i = 0; i < gt_array_size(tmp_ranges) && !had_err; i++) {
    gt_ensure(ranges_out[i].start ==
                    (*(GtRange*) gt_array_get(tmp_ranges, i)).start);
    gt_ensure(ranges_out[i].end ==
                    (*(GtRange*) gt_array_get(tmp_ranges, i)).end);
  }

  /* test gt_ranges_uniq_in_place() */
  gt_array_reset(tmp_ranges);
  gt_array_add_array(tmp_ranges, ranges);
  gt_ranges_uniq_in_place(tmp_ranges);
  for (i = 0; i < gt_array_size(tmp_ranges) && !had_err; i++) {
    gt_ensure(ranges_out[i].start ==
                    (*(GtRange*) gt_array_get(tmp_ranges, i)).start);
    gt_ensure(ranges_out[i].end ==
                    (*(GtRange*) gt_array_get(tmp_ranges, i)).end);
  }

  /* test gt_ranges_uniq_count() */
  gt_array_reset(tmp_ranges);
  ctr = gt_ranges_uniq_count(tmp_ranges, ranges);
  gt_ensure(gt_array_size(tmp_ranges) == gt_array_size(ctr));
  gt_ensure(
            gt_array_size(ctr) == sizeof (counts) / sizeof (counts[0]));
  for (i = 0; i < gt_array_size(ctr) && !had_err; i++) {
    gt_ensure(counts[i] == *(GtUword*) gt_array_get(ctr, i));
    gt_ensure(ranges_out[i].start ==
                    (*(GtRange*) gt_array_get(tmp_ranges, i)).start);
    gt_ensure(ranges_out[i].end ==
                    (*(GtRange*) gt_array_get(tmp_ranges, i)).end);
  }
  gt_array_delete(ctr);

  /* test gt_ranges_uniq_in_place_count() */
  ctr = gt_ranges_uniq_in_place_count(ranges);
  gt_ensure(gt_array_size(ranges) == gt_array_size(ctr));
  gt_ensure(
            gt_array_size(ctr) == sizeof (counts) / sizeof (counts[0]));
  for (i = 0; i < gt_array_size(ctr) && !had_err; i++) {
    gt_ensure(counts[i] == *(GtUword*) gt_array_get(ctr, i));
    gt_ensure(
           ranges_out[i].start == (*(GtRange*)
                                             gt_array_get(ranges, i)).start);
    gt_ensure(
           ranges_out[i].end == (*(GtRange*) gt_array_get(ranges, i)).end);
  }
  gt_array_delete(ctr);

  /* test gt_range_reorder() */
  if (!had_err) {
    GtRange range = { 1, 100 };
    range = gt_range_reorder(range);
    gt_ensure(range.start == 1 && range.end == 100);
    range.start = 100;
    range.end = 1;
    range = gt_range_reorder(range);
    gt_ensure(range.start == 1 && range.end == 100);
  }

  /* free */
  gt_array_delete(ranges);
  gt_array_delete(tmp_ranges);
  return had_err;
}
Example #5
0
static int hcr_next_seq_qual(GtHcrSeqDecoder *seq_dec, char *seq, char *qual,
                             GtError *err)
{
    enum state {
        HCR_ERROR = -1,
        END,
        SUCCESS
    };
    unsigned char base;
    GtUword i,
            nearestsample,
            *symbol;
    size_t startofnearestsample = 0;
    enum state status = END;
    FastqFileInfo cur_read;
    FastqFileInfo *fileinfo = NULL;

    if (seq_dec->cur_read <= seq_dec->num_of_reads) {
        status = SUCCESS;
        if (seq_dec->symbols == NULL)
            seq_dec->symbols = gt_array_new(sizeof (GtUword));
        else
            gt_array_reset(seq_dec->symbols);

        cur_read.readnum = seq_dec->cur_read;
        gt_log_log("cur_read: "GT_WU"",seq_dec->cur_read);
        fileinfo = (FastqFileInfo *)gt_rbtree_next_key(seq_dec->file_info_rbt,
                   &cur_read,
                   hcr_cmp_FastqFileInfo,
                   NULL);
        gt_assert(fileinfo);

        /* reset huffman_decoder if next read is sampled */
        if (gt_sampling_get_next_elementnum(seq_dec->sampling) ==
                seq_dec->cur_read) {
            gt_log_log("reset because sampled read is next");
            (void) gt_sampling_get_next_sample(seq_dec->sampling,
                                               &nearestsample,
                                               &startofnearestsample);
            reset_data_iterator_to_pos(seq_dec->data_iter, startofnearestsample);
            (void) gt_huffman_decoder_get_new_mem_chunk(seq_dec->huff_dec, err);
            if (gt_error_is_set(err))
                status = HCR_ERROR;
        }
        if (status != HCR_ERROR) {
            int ret;
            ret =  gt_huffman_decoder_next(seq_dec->huff_dec, seq_dec->symbols,
                                           fileinfo->readlength, err);
            if (ret != 1)
                status = HCR_ERROR;
            if (ret == 0)
                gt_error_set(err, "reached end of file");
        }
        if (qual || seq) {
            gt_log_log("set strings");
            for (i = 0; i < gt_array_size(seq_dec->symbols); i++) {
                symbol = (GtUword*) gt_array_get(seq_dec->symbols, i);
                if (qual != NULL)
                    qual[i] = get_qual_from_symbol(seq_dec, *symbol);
                if (seq != NULL) {
                    base = get_base_from_symbol(seq_dec, *symbol);
                    seq[i] = (char)toupper(gt_alphabet_decode(seq_dec->alpha,
                                           (GtUchar) base));
                }
            }
            if (qual != NULL)
                qual[gt_array_size(seq_dec->symbols)] = '\0';
            if (seq != NULL)
                seq[gt_array_size(seq_dec->symbols)] = '\0';
        }
        seq_dec->cur_read++;
    }
    return (int) status;
}
Example #6
0
static void compute_csas(ConsensusSA *csa)
{
  unsigned long i, sa_i, sa_i_size = 0, sa_prime, sa_prime_size;
  GtArray *splice_form;
  GtBittab **C, **left, **right, **L, **R, *U_i, *SA_i, *SA_prime;
#ifndef NDEBUG
  unsigned long u_i_size, u_i_minus_1_size;
  gt_assert(csa && csa->set_of_sas);
#endif

  /* init sets */
  C     = gt_malloc(sizeof (GtBittab*) * csa->number_of_sas);
  left  = gt_malloc(sizeof (GtBittab*) * csa->number_of_sas);
  right = gt_malloc(sizeof (GtBittab*) * csa->number_of_sas);
  L     = gt_malloc(sizeof (GtBittab*) * csa->number_of_sas);
  R     = gt_malloc(sizeof (GtBittab*) * csa->number_of_sas);

  for (i = 0; i < csa->number_of_sas; i++) {
    C[i]     = gt_bittab_new(csa->number_of_sas);
    left[i]  = gt_bittab_new(csa->number_of_sas);
    right[i] = gt_bittab_new(csa->number_of_sas);
    L[i]     = gt_bittab_new(csa->number_of_sas);
    R[i]     = gt_bittab_new(csa->number_of_sas);
  }

  U_i      = gt_bittab_new(csa->number_of_sas);
  SA_i     = gt_bittab_new(csa->number_of_sas);
  SA_prime = gt_bittab_new(csa->number_of_sas);

  splice_form = gt_array_new(sizeof (unsigned long));

  /* compute sets */
  compute_C(C, csa);
  compute_left(left, csa);
  compute_right(right, csa);
  compute_L(L, C, left, csa->number_of_sas);
  compute_R(R, C, right, csa->number_of_sas);

  /* U_0 = SA */
  for (i = 0; i < csa->number_of_sas; i++)
    gt_bittab_set_bit(U_i, i);

#ifndef NDEBUG
  /* preparation for assertion below */
  u_i_minus_1_size = gt_bittab_count_set_bits(U_i);
#endif
  while (gt_bittab_is_true(U_i)) {
    sa_i = GT_UNDEF_ULONG;
    for (sa_prime  = gt_bittab_get_first_bitnum(U_i);
         sa_prime != gt_bittab_get_last_bitnum(U_i);
         sa_prime  = gt_bittab_get_next_bitnum(U_i, sa_prime)) {
      if (sa_i == GT_UNDEF_ULONG) {
        sa_i = sa_prime;
        gt_bittab_or(SA_i, L[sa_i], R[sa_i]);
        sa_i_size = gt_bittab_count_set_bits(SA_i);
      }
      else {
        gt_bittab_or(SA_prime, L[sa_prime], R[sa_prime]);
        sa_prime_size = gt_bittab_count_set_bits(SA_prime);
        if (sa_prime_size > sa_i_size) {
          sa_i = sa_prime;
          sa_i_size = sa_prime_size;
          gt_bittab_equal(SA_i, SA_prime);
        }
      }
    }

    /* make sure the computed splice form is maximal w.r.t. to compatibility */
    gt_assert(splice_form_is_valid(SA_i, csa));

    /* process splice form */
    if (csa->process_splice_form) {
      gt_array_reset(splice_form);
      gt_bittab_get_all_bitnums(SA_i, splice_form);
      csa->process_splice_form(splice_form, csa->set_of_sas, csa->number_of_sas,
                               csa->size_of_sa, csa->userdata);
    }

    /* U_i = U_i-1 \ SA_i */
    gt_bittab_nand(U_i, U_i, SA_i);

#ifndef NDEBUG
    /* ensure that |U_i| < |U_i-1| */
    u_i_size = gt_bittab_count_set_bits(U_i);
    gt_assert(u_i_size < u_i_minus_1_size);
    u_i_minus_1_size = u_i_size;
#endif
  }

  /* free sets */
  for (i = 0; i < csa->number_of_sas; i++) {
    gt_bittab_delete(C[i]);
    gt_bittab_delete(left[i]);
    gt_bittab_delete(right[i]);
    gt_bittab_delete(L[i]);
    gt_bittab_delete(R[i]);
  }
  gt_free(C);
  gt_free(left);
  gt_free(right);
  gt_free(L);
  gt_free(R);
  gt_bittab_delete(U_i);
  gt_bittab_delete(SA_i);
  gt_bittab_delete(SA_prime);
  gt_array_delete(splice_form);
}
Example #7
0
int gt_ovlfind_kmp_unit_test(GtError *err)
{
  int had_err = 0;
  GtArray *a;
  struct GtOvlfindKmpResult *r;
  GtContfind retval;
  gt_kmp_t *u_pi, *v_pi;

  /*@i1@*/ gt_error_check(err);

  had_err = gt_kmp_preproc_unit_test(err);
  if (had_err != 0)
    return had_err;

  a = gt_array_new(sizeof (struct GtOvlfindKmpResult));

  /* u suffix == v prefix */
  if (!had_err)
  {
    gt_array_reset(a);
    u_pi = gt_kmp_preproc("aacgcacctg", 10UL);
    v_pi = gt_kmp_preproc("acctgatttc", 10UL);
    retval = gt_ovlfind_kmp("aacgcacctg", 10UL, u_pi, "acctgatttc", 10UL, v_pi,
        GT_OVLFIND_PROPER_SPM, 1UL, false, ovlfind_kmp_test_save, a);
    gt_ensure(had_err, retval == GT_CONTFIND_NO);
    gt_ensure(had_err, gt_array_size(a) == 1UL);
    GT_OVLFIND_KMP_EXPECT_RESULT(0UL, true, 5UL);
    gt_free(u_pi);
    gt_free(v_pi);
  }
  /* v suffix == u prefix */
  if (!had_err)
  {
    gt_array_reset(a);
    u_pi = gt_kmp_preproc("atccgtgacgtg", 12UL);
    v_pi = gt_kmp_preproc("aagaagaatccg", 12UL);
    retval = gt_ovlfind_kmp("atccgtgacgtg", 12UL, u_pi, "aagaagaatccg", 12UL,
        v_pi, GT_OVLFIND_ALL, 1UL, false, ovlfind_kmp_test_save, a);
    gt_ensure(had_err, retval == GT_CONTFIND_NO);
    gt_ensure(had_err, gt_array_size(a) == 1UL);
    GT_OVLFIND_KMP_EXPECT_RESULT(0UL, false, 5UL);
    gt_free(u_pi);
    gt_free(v_pi);
  }
  /* no overlap */
  if (!had_err)
  {
    gt_array_reset(a);
    u_pi = gt_kmp_preproc("aac", 3UL);
    v_pi = gt_kmp_preproc("tgc", 3UL);
    retval = gt_ovlfind_kmp("aac", 3UL, u_pi, "tgc", 3UL, v_pi,
        GT_OVLFIND_PROPER_SPM, 1UL, false, ovlfind_kmp_test_save, a);
    gt_ensure(had_err, retval == GT_CONTFIND_NO);
    gt_ensure(had_err, gt_array_size(a) == 0UL);
    gt_free(u_pi);
    gt_free(v_pi);
  }
  /* u suffix of v */
  if (!had_err)
  {
    gt_array_reset(a);
    u_pi = gt_kmp_preproc("acagc", 5UL);
    v_pi = gt_kmp_preproc("gtacagc", 7UL);
    retval = gt_ovlfind_kmp("acagc", 5UL, u_pi, "gtacagc", 7UL, v_pi,
        GT_OVLFIND_SPM, 1UL, false, ovlfind_kmp_test_save, a);
    gt_ensure(had_err, gt_array_size(a) == 1UL);
    gt_ensure(had_err, retval == GT_CONTFIND_OFF);
    GT_OVLFIND_KMP_EXPECT_RESULT(0UL, false, 5UL);

    gt_array_reset(a);
    retval = gt_ovlfind_kmp("acagc", 5UL, u_pi, "gtacagc", 7UL, v_pi,
        GT_OVLFIND_PROPER_SPM, 1UL, false, ovlfind_kmp_test_save, a);
    gt_ensure(had_err, retval == GT_CONTFIND_U);
    gt_ensure(had_err, gt_array_size(a) == 0UL);
    gt_array_reset(a);

    retval = gt_ovlfind_kmp("acagc", 5UL, u_pi, "gtacagc", 7UL, v_pi,
        GT_OVLFIND_CNT, 1UL, false, ovlfind_kmp_test_save, a);
    gt_ensure(had_err, gt_array_size(a) == 0UL);
    gt_ensure(had_err, retval == GT_CONTFIND_U);

    gt_array_reset(a);
    retval = gt_ovlfind_kmp("acagc", 5UL, u_pi, "gtacagc", 7UL, v_pi,
        GT_OVLFIND_ALL, 1UL, false, ovlfind_kmp_test_save, a);
    gt_ensure(had_err, gt_array_size(a) == 1UL);
    gt_ensure(had_err, retval == GT_CONTFIND_U);
    GT_OVLFIND_KMP_EXPECT_RESULT(0UL, false, 5UL);
    gt_free(u_pi);
    gt_free(v_pi);
  }
  /* v suffix of u */
  if (!had_err)
  {
    gt_array_reset(a);
    u_pi = gt_kmp_preproc("gtacagc", 7UL);
    v_pi = gt_kmp_preproc("acagc", 5UL);

    retval = gt_ovlfind_kmp("gtacagc", 7UL, u_pi, "acagc", 5UL, v_pi,
        GT_OVLFIND_SPM, 1UL, false, ovlfind_kmp_test_save, a);
    gt_ensure(had_err, retval == GT_CONTFIND_OFF);
    gt_ensure(had_err, gt_array_size(a) == 1UL);
    GT_OVLFIND_KMP_EXPECT_RESULT(0UL, true, 5UL);

    gt_array_reset(a);
    retval = gt_ovlfind_kmp("gtacagc", 7UL, u_pi, "acagc", 5UL, v_pi,
        GT_OVLFIND_PROPER_SPM, 1UL, false, ovlfind_kmp_test_save, a);
    gt_ensure(had_err, retval == GT_CONTFIND_V);
    gt_ensure(had_err, gt_array_size(a) == 0UL);

    gt_free(u_pi);
    gt_free(v_pi);
  }
  /* u prefix of v */
  if (!had_err)
  {
    gt_array_reset(a);
    u_pi = gt_kmp_preproc("ctat", 4UL);
    v_pi = gt_kmp_preproc("ctatacagg", 9UL);
    retval = gt_ovlfind_kmp("ctat", 4UL, u_pi, "ctatacagg", 9UL, v_pi,
        GT_OVLFIND_SPM, 1UL, false, ovlfind_kmp_test_save, a);
    gt_ensure(had_err, retval == GT_CONTFIND_OFF);
    gt_ensure(had_err, gt_array_size(a) == 1UL);
    GT_OVLFIND_KMP_EXPECT_RESULT(0UL, true, 4UL);

    gt_array_reset(a);
    retval = gt_ovlfind_kmp("ctat", 4UL, u_pi, "ctatacagg", 9UL, v_pi,
        GT_OVLFIND_PROPER_SPM, 1UL, false, ovlfind_kmp_test_save, a);
    gt_ensure(had_err, retval == GT_CONTFIND_U);
    gt_ensure(had_err, gt_array_size(a) == 0UL);
    gt_free(u_pi);
    gt_free(v_pi);
  }
  /* v prefix of u */
  if (!had_err)
  {
    gt_array_reset(a);
    u_pi = gt_kmp_preproc("ctatacagg", 9UL);
    v_pi = gt_kmp_preproc("ctat", 4UL);
    retval = gt_ovlfind_kmp("ctatacagg", 9UL, u_pi, "ctat", 4UL, v_pi,
        GT_OVLFIND_SPM, 1UL, false, ovlfind_kmp_test_save, a);
    gt_ensure(had_err, retval == GT_CONTFIND_OFF);
    gt_ensure(had_err, gt_array_size(a) == 1UL);
    GT_OVLFIND_KMP_EXPECT_RESULT(0UL, false, 4UL);

    gt_array_reset(a);
    retval = gt_ovlfind_kmp("ctatacagg", 9UL, u_pi, "ctat", 4UL, v_pi,
        GT_OVLFIND_PROPER_SPM, 1UL, false, ovlfind_kmp_test_save, a);
    gt_ensure(had_err, retval == GT_CONTFIND_V);
    gt_ensure(had_err, gt_array_size(a) == 0UL);
    gt_free(u_pi);
    gt_free(v_pi);
  }
  /* identical sequences */
  if (!had_err)
  {
    gt_array_reset(a);
    u_pi = gt_kmp_preproc("acagc", 5UL);
    retval = gt_ovlfind_kmp("acagc", 5UL, u_pi, "acagc", 5UL, u_pi,
        GT_OVLFIND_SPM, 1UL, false, ovlfind_kmp_test_save, a);
    gt_ensure(had_err, retval == GT_CONTFIND_OFF);
    gt_ensure(had_err, gt_array_size(a) == 2UL);
    GT_OVLFIND_KMP_EXPECT_RESULT(0UL, true, 5UL);
    GT_OVLFIND_KMP_EXPECT_RESULT(1UL, false, 5UL);

    gt_array_reset(a);
    retval = gt_ovlfind_kmp("acagc", 5UL, u_pi, "acagc", 5UL, u_pi,
        GT_OVLFIND_PROPER_SPM, 1UL, false, ovlfind_kmp_test_save, a);
    gt_ensure(had_err, retval == GT_CONTFIND_EQ);
    gt_ensure(had_err, gt_array_size(a) == 0UL);
    gt_free(u_pi);
  }
  /* find_nonmaximal */
  if (!had_err)
  {
    gt_array_reset(a);
    u_pi = gt_kmp_preproc("aacagtagtagt", 12UL);
    v_pi = gt_kmp_preproc("agtagtagttaa", 12UL);
    retval = gt_ovlfind_kmp("aacagtagtagt", 12UL, u_pi, "agtagtagttaa", 12UL,
        v_pi, GT_OVLFIND_SPM, 1UL, false, ovlfind_kmp_test_save, a);
    gt_ensure(had_err, retval == GT_CONTFIND_OFF);
    gt_ensure(had_err, gt_array_size(a) == 2UL);
    GT_OVLFIND_KMP_EXPECT_RESULT(0UL, true, 9UL);
    GT_OVLFIND_KMP_EXPECT_RESULT(1UL, false, 2UL);
    gt_array_reset(a);
    retval = gt_ovlfind_kmp("aacagtagtagt", 12UL, u_pi, "agtagtagttaa", 12UL,
        v_pi, GT_OVLFIND_SPM, 1UL, true, ovlfind_kmp_test_save, a);
    gt_ensure(had_err, gt_array_size(a) == 5UL);
    gt_ensure(had_err, retval == GT_CONTFIND_OFF);
    gt_free(u_pi);
    gt_free(v_pi);
  }
  /* min_length */
  if (!had_err)
  {
    gt_array_reset(a);
    u_pi = gt_kmp_preproc("aggaccagtagt", 12UL);
    v_pi = gt_kmp_preproc("agtagttactac", 12UL);
    retval = gt_ovlfind_kmp("aggaccagtagt", 12UL, u_pi, "agtagttactac", 12UL,
        v_pi, GT_OVLFIND_SPM, 1UL, true, ovlfind_kmp_test_save, a);
    gt_ensure(had_err, retval == GT_CONTFIND_OFF);
    gt_ensure(had_err, gt_array_size(a) == 2UL);
    gt_array_reset(a);
    retval = gt_ovlfind_kmp("aggaccagtagt", 12UL, u_pi, "agtagttactac", 12UL,
        v_pi, GT_OVLFIND_SPM, 4UL, true, ovlfind_kmp_test_save, a);
    gt_ensure(had_err, gt_array_size(a) == 1UL);
    gt_ensure(had_err, retval == GT_CONTFIND_OFF);
    gt_free(u_pi);
    gt_free(v_pi);
  }
  gt_array_delete(a);
  return had_err;
}
int gt_string_matching_unit_test(GtError *err)
{
  char s[STRING_MATCHING_MAX_STRING_LENGTH+1],
       p[STRING_MATCHING_MAX_PATTERN_LENGTH+1], *text = "foo";
  GtArray *brute_force_matches,
        *bmh_matches,
        *kmp_matches,
        *shift_and_matches;
  unsigned long i, brute_force_match, bmh_match, kmp_match, shift_and_match;
  int had_err = 0;

  gt_error_check(err);

  brute_force_matches = gt_array_new(sizeof (unsigned long));
  bmh_matches = gt_array_new(sizeof (unsigned long));
  kmp_matches = gt_array_new(sizeof (unsigned long));
  shift_and_matches = gt_array_new(sizeof (unsigned long));

  /* match the empty pattern */
  gt_string_matching_brute_force(text, strlen(text), "", 0, store_match,
                              brute_force_matches);
  gt_string_matching_bmh(text, strlen(text), "", 0, store_match, bmh_matches);
  gt_string_matching_kmp(text, strlen(text), "", 0, store_match, kmp_matches);
  gt_string_matching_shift_and(text, strlen(text), "", 0, store_match,
                            shift_and_matches);

  ensure(had_err, !gt_array_size(brute_force_matches));
  ensure(had_err, !gt_array_size(bmh_matches));
  ensure(had_err, !gt_array_size(kmp_matches));
  ensure(had_err, !gt_array_size(shift_and_matches));

  for (i = 0; !had_err && i < STRING_MATCHING_NUM_OF_TESTS; i++) {
    unsigned long j, n, m;
    /* generate random string and pattern */
    n = gt_rand_max(STRING_MATCHING_MAX_STRING_LENGTH);
    m = gt_rand_max(STRING_MATCHING_MAX_PATTERN_LENGTH);
    for (j = 0; j < n; j++)
      s[j] = gt_rand_char();
    s[n] = '\0';
    for (j = 0; j < m; j++)
      p[j] = gt_rand_char();
    p[m] = '\0';
    /* matching (first match) */
    brute_force_match = GT_UNDEF_ULONG;
    bmh_match = GT_UNDEF_ULONG;
    kmp_match = GT_UNDEF_ULONG;
    shift_and_match = GT_UNDEF_ULONG;
    gt_string_matching_brute_force(s, n, p, m, store_first_match,
                                &brute_force_match);
    gt_string_matching_bmh(s, n, p, m, store_first_match, &bmh_match);
    gt_string_matching_kmp(s, n, p, m, store_first_match, &kmp_match);
    gt_string_matching_shift_and(s, n, p, m, store_first_match,
                                 &shift_and_match);
    /* comparing (first match) */
    ensure(had_err, brute_force_match == bmh_match);
    ensure(had_err, brute_force_match == kmp_match);
    ensure(had_err, brute_force_match == shift_and_match);
    /* matching (all matches) */
    gt_string_matching_brute_force(s, n, p, m, store_match,
                                   brute_force_matches);
    gt_string_matching_bmh(s, n, p, m, store_match, bmh_matches);
    gt_string_matching_kmp(s, n, p, m, store_match, kmp_matches);
    gt_string_matching_shift_and(s, n, p, m, store_match, shift_and_matches);
    /* comparing (all matches) */
    ensure(had_err, gt_array_size(brute_force_matches) ==
                    gt_array_size(bmh_matches));
    ensure(had_err, gt_array_size(brute_force_matches) ==
                    gt_array_size(kmp_matches));
    ensure(had_err, gt_array_size(brute_force_matches) ==
                    gt_array_size(shift_and_matches));
    ensure(had_err, !gt_array_cmp(brute_force_matches, bmh_matches));
    ensure(had_err, !gt_array_cmp(brute_force_matches, kmp_matches));
    ensure(had_err, !gt_array_cmp(brute_force_matches, shift_and_matches));
    /* reset */
    gt_array_reset(brute_force_matches);
    gt_array_reset(bmh_matches);
    gt_array_reset(kmp_matches);
    gt_array_reset(shift_and_matches);
  }

  gt_array_delete(shift_and_matches);
  gt_array_delete(bmh_matches);
  gt_array_delete(kmp_matches);
  gt_array_delete(brute_force_matches);

  return had_err;
}
Example #9
0
int gt_interval_tree_unit_test(GT_UNUSED GtError *err)
{
  GtIntervalTree *it = NULL;
  GtIntervalTreeNode *res = NULL;
  unsigned long i = 0;
  int had_err = 0, num_testranges = 3000,
      num_samples = 300000, num_find_all_samples = 10000,
      gt_range_max_basepos = 90000, width = 700,
      query_width = 5000;
  GtRange *res_rng = NULL, qrange;
  GtArray *arr = NULL, *narr = NULL;

  arr = gt_array_new(sizeof (GtRange*));

  /* generate test ranges */
  for (i = 0;i<num_testranges;i++)
  {
    unsigned long start;
    GtRange *rng;
    rng  = gt_calloc(1, sizeof (GtRange));
    start = gt_rand_max(gt_range_max_basepos);
    rng->start = start;
    rng->end = start + gt_rand_max(width);
    gt_array_add(arr, rng);
  }

  it = gt_interval_tree_new(gt_free_func);

  /* insert ranges */
  for (i = 0; i < num_testranges && !had_err; i++)
  {
    GtIntervalTreeNode *new_node;
    GtRange *rng;
    rng = *(GtRange**) gt_array_get(arr, i);
    new_node = gt_interval_tree_node_new(rng, rng->start, rng->end);
    gt_interval_tree_insert(it, new_node);
  }
  gt_ensure(had_err, gt_interval_tree_size(it) == num_testranges);

  /* perform test queries */
  for (i = 0; i < num_samples && !had_err; i++)
  {
    unsigned long start = gt_rand_max(gt_range_max_basepos);
    qrange.start = start;
    qrange.end = start + gt_rand_max(width);
    res = gt_interval_tree_find_first_overlapping(it, qrange.start, qrange.end);
    if (res)
    {
      /* we have a hit, check if really overlapping */
      res_rng = (GtRange*) gt_interval_tree_node_get_data(res);
      gt_ensure(had_err, gt_range_overlap(&qrange, res_rng));
    } else {
      /* no hit, check whether there really is no overlapping
         interval in tree */
      GtRange *this_rng;
      unsigned long j;
      bool found = false;
      for (j = 0; j < gt_array_size(arr); j++)
      {
        this_rng = *(GtRange**) gt_array_get(arr, j);
        if (gt_range_overlap(this_rng, &qrange))
        {
          found = true;
          break;
        }
      }
      gt_ensure(had_err, !found);
    }
  }

  /* test searching for all overlapping intervals */
  for (i = 0; i < num_find_all_samples && !had_err; i++)
  {
    unsigned long start = gt_rand_max(gt_range_max_basepos);
    qrange.start = start;
    qrange.end = start + gt_rand_max(query_width);
    GtArray *res = gt_array_new(sizeof (GtRange*));
    gt_interval_tree_find_all_overlapping(it, qrange.start, qrange.end, res);
    if (res)
    {
      /* generate reference overlapping interval list by linear search */
      GtArray *ref;
      unsigned long j;
      ref = gt_array_new(sizeof (GtRange*));
      for (j = 0; j < gt_array_size(arr); j++)
      {
        GtRange *this_rng;
        this_rng = *(GtRange**) gt_array_get(arr, j);
        if (gt_range_overlap(this_rng, &qrange))
        {
          gt_array_add(ref, this_rng);
        }
      }
      /* compare reference with interval tree query result */
      gt_array_sort_stable(ref, range_ptr_compare);
      gt_array_sort_stable(res, range_ptr_compare);
      /* must be equal */
      gt_ensure(had_err, gt_array_cmp(ref, res)==0);
      gt_array_delete(ref);
    }
    gt_array_delete(res);
  }
  gt_interval_tree_delete(it);

  it = gt_interval_tree_new(NULL);
  gt_array_reset(arr);

  /* generate test ranges */
  for (i = 0;i<num_testranges && !had_err;i++)
  {
    unsigned long start;
    GtIntervalTreeNode *new_node;
    start = gt_rand_max(gt_range_max_basepos);
    new_node = gt_interval_tree_node_new((void*) i, start,
                                          start + gt_rand_max(width));
    gt_interval_tree_insert(it, new_node);
  }
  gt_ensure(had_err, gt_interval_tree_size(it) == num_testranges);

  narr = gt_array_new(sizeof (GtIntervalTreeNode*));
  for (i = 0; i < num_testranges && !had_err; i++) {
    unsigned long idx, n, val;
    GtIntervalTreeNode *node = NULL;

    /* get all nodes referenced by the interval tree */
    interval_tree_find_all_internal(it, it->root, itree_test_get_node, 0,
                                    gt_range_max_basepos+width, narr);

    /* remove a random node */
    idx = gt_rand_max(gt_array_size(narr)-1);
    node = *(GtIntervalTreeNode**) gt_array_get(narr, idx);
    gt_ensure(had_err, node != NULL);
    val = (unsigned long) gt_interval_tree_node_get_data(node);
    gt_interval_tree_remove(it, node);
    gt_array_reset(narr);

    /* make sure that the node has disappeared */
    gt_ensure(had_err, gt_interval_tree_size(it) == num_testranges - (i+1));
    interval_tree_find_all_internal(it, it->root, itree_test_get_node, 0,
                                    gt_range_max_basepos+width, narr);
    gt_ensure(had_err, gt_array_size(narr) == num_testranges - (i+1));
    for (n = 0; !had_err && n < gt_array_size(narr); n++) {
      GtIntervalTreeNode *onode = *(GtIntervalTreeNode**) gt_array_get(narr, n);
      gt_ensure(had_err, (unsigned long) gt_interval_tree_node_get_data(onode)
                           != val);
    }
  }

  gt_array_delete(arr);
  gt_array_delete(narr);
  gt_interval_tree_delete(it);
  return had_err;
}