Ejemplo n.º 1
0
static int extracttarget_from_seqfiles(const char *target,
                                       GtStrArray *seqfiles,
                                       GtError *err)
{
  GtStr *unescaped_target;
  char *escaped_target;
  GtSplitter *splitter;
  unsigned long i;
  int had_err = 0;
  gt_error_check(err);
  gt_assert(target && seqfiles);
  splitter = gt_splitter_new();
  unescaped_target = gt_str_new();
  escaped_target = gt_cstr_dup(target);
  gt_splitter_split(splitter, escaped_target, strlen(escaped_target), ',');
  for (i = 0; !had_err && i < gt_splitter_size(splitter); i++) {
    GtSplitter *blank_splitter;
    char *token = gt_splitter_get_token(splitter, i);
    blank_splitter = gt_splitter_new();
    gt_splitter_split(blank_splitter, token, strlen(token), ' ');
    had_err = gt_gff3_unescape(unescaped_target,
                               gt_splitter_get_token(blank_splitter, 0),
                               strlen(gt_splitter_get_token(blank_splitter, 0)),
                               err);
    if (!had_err) {
      unsigned long j;
      for (j = 0; j < gt_str_array_size(seqfiles); j++) {
        unsigned long k;
        GtBioseq *bioseq;
        if (!(bioseq =  gt_bioseq_new(gt_str_array_get(seqfiles, j), err))) {
          had_err = -1;
          break;
        }
        for (k = 0; k < gt_bioseq_number_of_sequences(bioseq); k++) {
          TargetInfo target_info;
          const char *desc = gt_bioseq_get_description(bioseq, k);
          target_info.bioseq = bioseq;
          target_info.seqnum = k;
          gt_string_matching_bmh(desc, strlen(desc),
                                 gt_str_get(unescaped_target),
                                 gt_str_length(unescaped_target), show_target,
                                 &target_info);
        }
        gt_bioseq_delete(bioseq);
      }
    }
    gt_splitter_delete(blank_splitter);
  }
  gt_free(escaped_target);
  gt_str_delete(unescaped_target);
  gt_splitter_delete(splitter);
  return had_err;
}
Ejemplo n.º 2
0
int gt_string_matching_unit_test(GtError *err)
{
  char s[STRING_MATCHING_MAX_STRING_LENGTH+1],
       p[STRING_MATCHING_MAX_PATTERN_LENGTH+1], *text = "foo";
  GtArray *brute_force_matches,
        *bmh_matches,
        *kmp_matches,
        *shift_and_matches;
  unsigned long i, brute_force_match, bmh_match, kmp_match, shift_and_match;
  int had_err = 0;

  gt_error_check(err);

  brute_force_matches = gt_array_new(sizeof (unsigned long));
  bmh_matches = gt_array_new(sizeof (unsigned long));
  kmp_matches = gt_array_new(sizeof (unsigned long));
  shift_and_matches = gt_array_new(sizeof (unsigned long));

  /* match the empty pattern */
  gt_string_matching_brute_force(text, strlen(text), "", 0, store_match,
                              brute_force_matches);
  gt_string_matching_bmh(text, strlen(text), "", 0, store_match, bmh_matches);
  gt_string_matching_kmp(text, strlen(text), "", 0, store_match, kmp_matches);
  gt_string_matching_shift_and(text, strlen(text), "", 0, store_match,
                            shift_and_matches);

  ensure(had_err, !gt_array_size(brute_force_matches));
  ensure(had_err, !gt_array_size(bmh_matches));
  ensure(had_err, !gt_array_size(kmp_matches));
  ensure(had_err, !gt_array_size(shift_and_matches));

  for (i = 0; !had_err && i < STRING_MATCHING_NUM_OF_TESTS; i++) {
    unsigned long j, n, m;
    /* generate random string and pattern */
    n = gt_rand_max(STRING_MATCHING_MAX_STRING_LENGTH);
    m = gt_rand_max(STRING_MATCHING_MAX_PATTERN_LENGTH);
    for (j = 0; j < n; j++)
      s[j] = gt_rand_char();
    s[n] = '\0';
    for (j = 0; j < m; j++)
      p[j] = gt_rand_char();
    p[m] = '\0';
    /* matching (first match) */
    brute_force_match = GT_UNDEF_ULONG;
    bmh_match = GT_UNDEF_ULONG;
    kmp_match = GT_UNDEF_ULONG;
    shift_and_match = GT_UNDEF_ULONG;
    gt_string_matching_brute_force(s, n, p, m, store_first_match,
                                &brute_force_match);
    gt_string_matching_bmh(s, n, p, m, store_first_match, &bmh_match);
    gt_string_matching_kmp(s, n, p, m, store_first_match, &kmp_match);
    gt_string_matching_shift_and(s, n, p, m, store_first_match,
                                 &shift_and_match);
    /* comparing (first match) */
    ensure(had_err, brute_force_match == bmh_match);
    ensure(had_err, brute_force_match == kmp_match);
    ensure(had_err, brute_force_match == shift_and_match);
    /* matching (all matches) */
    gt_string_matching_brute_force(s, n, p, m, store_match,
                                   brute_force_matches);
    gt_string_matching_bmh(s, n, p, m, store_match, bmh_matches);
    gt_string_matching_kmp(s, n, p, m, store_match, kmp_matches);
    gt_string_matching_shift_and(s, n, p, m, store_match, shift_and_matches);
    /* comparing (all matches) */
    ensure(had_err, gt_array_size(brute_force_matches) ==
                    gt_array_size(bmh_matches));
    ensure(had_err, gt_array_size(brute_force_matches) ==
                    gt_array_size(kmp_matches));
    ensure(had_err, gt_array_size(brute_force_matches) ==
                    gt_array_size(shift_and_matches));
    ensure(had_err, !gt_array_cmp(brute_force_matches, bmh_matches));
    ensure(had_err, !gt_array_cmp(brute_force_matches, kmp_matches));
    ensure(had_err, !gt_array_cmp(brute_force_matches, shift_and_matches));
    /* reset */
    gt_array_reset(brute_force_matches);
    gt_array_reset(bmh_matches);
    gt_array_reset(kmp_matches);
    gt_array_reset(shift_and_matches);
  }

  gt_array_delete(shift_and_matches);
  gt_array_delete(bmh_matches);
  gt_array_delete(kmp_matches);
  gt_array_delete(brute_force_matches);

  return had_err;
}