コード例 #1
0
ファイル: unit_test_sorted.c プロジェクト: yifanwu/cs165db
void ut_unsorted(int size)
{

  printf("Testing UNSORTED.\n\n");
  char *name = "test_unsorted";
  make_new_column_file(name, SORTED);
  column_t *c = malloc(sizeof(column_t));
  init_col(c, name);

  // append things!
  for(int i = 0; i < size; i ++) {
    append_value_to_file(c, i);
    append_value_to_file(c, i-1);
  }

  print_data_file(c->fp);
  init_col(c, name);
  bv_t *bv = create_bv(2 * size);

  for(int i = 0; i < size - 3; i ++) {
    mark_matching_bv_for_unsorted(c, bv, i-1, i);
    assert(is_marked(bv, (2*i)));
    assert(is_marked(bv, (2*i + 1)));
    assert(is_marked(bv, (2*i + 3)));
    unmark_all_bv(bv);
  }
  return;
}
コード例 #2
0
ファイル: agrep-dp.c プロジェクト: gconcepcion/sratoolkit
/* Call with pattern as the text, text as pattern. */
LIB_EXPORT uint32_t CC has_inside_approx_match( char *pattern, uint32_t plen, uint32_t errors, 
                                 char *buf, size_t buflen, 
                                 uint32_t *skip, uint32_t *errcnt )
{
    int32_t *prev = malloc(sizeof(int)*(plen+1));
    int32_t *nxt = malloc(sizeof(int)*(plen+1));
    int32_t *tmp;
    int32_t j;
    int32_t allowable;
    int32_t dist;

    int32_t found = 0;
    int32_t foundpos = 0;
    int32_t founderr = 0;

    allowable = 1+lroundf((float)errors * plen / (float)buflen);


    init_col(pattern, plen, nxt);
    
    for (j=0; j<buflen; j++) {
        tmp = prev; prev = nxt; nxt = tmp;
        compute_dp_next_col(pattern, plen, 0, buf[j], 0, prev, nxt);

        dist = nxt[plen];

        /* 
           We still have to do this kind of thing because otherwise
           the match will extend past the end of the text (here pattern),
           and will match "useless" letters that just increase the score.
           So we continue looking at smaller subsequences of the pattern
           to see if something smaller matches better. 
        */
        
        if (found) {
            if (dist <= founderr && dist <= allowable) {
                foundpos = j;
                founderr = dist;
            } else {
                goto DONE;
            }
        } else if (dist <= allowable) {
            /* Found a match */
            found = 1;
            foundpos = j;
            founderr = dist;

            /* Continue a bit to see if something's equally good or better */
        }
    }
DONE:
    free(prev);
    free(nxt);
    if (found) {
        *skip = foundpos;
        *errcnt = founderr;
        return 1;
    }
    return 0;
}
コード例 #3
0
ファイル: unit_test_sorted.c プロジェクト: yifanwu/cs165db
void ut_sorted_bounds(int size)
{
  printf("Testing SORTED with BOUNDS.\n\n");
  char *name = "test_sorted_bounds";
  make_new_column_file(name, SORTED);
  column_t *c = malloc(sizeof(column_t));
  init_col(c, name);
  bv_t *bv = create_bv(size);

  int insert_index;

  for (int i = size-1; i>=0; i--) {
    insert_index = get_lower_bound(c->fp, c->m.size, i);
    insert_value_to_file(c, insert_index, i);
  }

  printf("Finished inserting data.\n");

#ifdef UT_VERBOSE
  print_data_file(c->bpt_fp);
#endif
  for (int i = 0; i < size; i++) {
    printf("Searching values at %d\n", i);
    mark_matching_bv_for_sorted(c, bv, i, i);
    assert(is_marked(bv, i));
    unmark_all_bv(bv);
  }
  return;
}
コード例 #4
0
ファイル: helper_updates.c プロジェクト: yifanwu/cs165db
int delete_helper(char *col_name, int *delete_pos, uint32_t num_pos)
{
  column_t c;
  init_col(&c, col_name);
  dbg_assert(c.m.type == UNSORTED);

  int last_val;
  uint32_t num_read;
  for (uint32_t u = 0; u < num_pos; u++) {
    fseek(c.fp, sizeof(column_meta_t) + (c.m.size - 1) * sizeof(int), SEEK_SET);
    num_read = fread(&last_val, sizeof(int), 1, c.fp);
    dbg_assert(num_read == 1);
    debug("We are deleting position %d with position %d\n", delete_pos[u], c.m.size - 1);
    fseek(c.fp, sizeof(column_meta_t) + delete_pos[u] * sizeof(int), SEEK_SET);
    fwrite(&last_val, sizeof(int), 1, c.fp);
    // won't actually delete the data until this gets overwritten by the next value
    // saves time moving everything around
    c.m.size--;
  }

  fseek(c.fp, 0, SEEK_SET);
  fwrite(&(c.m), sizeof(column_meta_t), 1, c.fp);
  // force flush and release space
  fclose(c.fp);
  return 0;
}
コード例 #5
0
ファイル: agrep-dp.c プロジェクト: gconcepcion/sratoolkit
/* Try the longest match first. */
LIB_EXPORT uint32_t CC has_left_approx_match( char *pattern, uint32_t errors, 
                               char *buf, size_t buflen, 
                               uint32_t *length, uint32_t *errcnt )
{
    int32_t plen = strlen(pattern);
    int32_t *prev = malloc(sizeof(int)*(plen+1));
    int32_t *nxt = malloc(sizeof(int)*(plen+1));
    int32_t *tmp;
    int32_t i, j;
    int32_t allowable;
    char *subpattern;
    int32_t dist;

    int32_t found = 0;
    int32_t foundpos = 0;
    int32_t founderr = 0;

    for (i=plen; i>=8; i--) {

        /* See if the first i chars of the text match the last i
           chars of the pattern with (errors) errors.
        */
        subpattern = pattern + (plen - i);
        init_col(subpattern, i, nxt);
        for (j=0; j<i; j++) {
            tmp = prev; prev = nxt; nxt = tmp;
            compute_dp_next_col(subpattern, i, 0, buf[j], j, prev, nxt);
        }
        dist = nxt[i];
        allowable = 1+lroundf((float)i * (float)errors / (float)plen);
        if (found) {
            if (dist <= founderr) {
                foundpos = i-1;
                founderr = dist;
            } else {
                goto DONE;
            }
        } else if (dist <= allowable) {
            /* Found a match */
            found = 1;
            foundpos = i-1;
            founderr = dist;

            /* Continue a bit to see if something's equally good or better */
        }
    }
    DONE:
    free(prev);
    free(nxt);
    if (found) {
        *length = foundpos+1;
        *errcnt = founderr;
        return 1;
    }
    return 0;
}
コード例 #6
0
ファイル: agrep-dp.c プロジェクト: gconcepcion/sratoolkit
static
uint32_t dp_find_end(const char *pattern, AgrepFlags mode, int32_t threshold, const char *buf, int32_t buflen, int32_t *bestpos, int32_t *bestscore) 
{
    int32_t plen = strlen(pattern);
    int32_t *prev = malloc(sizeof(int32_t)*(plen+1));
    int32_t *nxt = malloc(sizeof(int32_t)*(plen+1));
    int32_t *tmp;
    int32_t i;

    int32_t foundit = 0;


    *bestscore = 10000;
    *bestpos = 1;

    init_col(pattern, plen, nxt);
#if _TRACE
    print_col_as_row(nxt, plen);
#endif
    for (i=0; i<buflen; i++) {
        tmp = prev; prev = nxt; nxt = tmp;
        compute_dp_next_col(pattern, plen, mode, 0, buf[i], prev, nxt);
        if (nxt[plen] <= threshold) {
            if (foundit) {
                if (nxt[plen] <= *bestscore) {
                    *bestpos = i;
                    *bestscore = nxt[plen];
                } else {
                    /* Here we'd extend even if the score was equal, maybe */
                }
            } else {
                /* Ok, we have a match under threshold.
                   Let's continue and see if we can improve on it.
                */
                *bestpos = i;
                *bestscore = nxt[plen];
                foundit = 1;
            }
        } else {
            if (foundit)
                goto EXIT;
        }
#if _TRACE
        print_col_as_row(nxt, plen);
#endif
    }
EXIT:
    free(prev);
    free(nxt);
    if (foundit)
        return 1;
    return 0;
}
コード例 #7
0
ファイル: agrep-dp.c プロジェクト: gconcepcion/sratoolkit
static
uint32_t dp_find_begin(char *reverse_pattern, AgrepFlags mode, int32_t threshold, const char *buf, int32_t buflen, int32_t end, int32_t *begin) 
{
    int32_t plen = strlen(reverse_pattern);
    int32_t *prev = malloc(sizeof(int32_t)*(plen+1));
    int32_t *nxt = malloc(sizeof(int32_t)*(plen+1));
    int32_t *tmp;
    int32_t i;

    int32_t limit;

    int32_t foundit = 0;
    int32_t bestscore = 10000;

    *begin = 0;

    limit = end - plen - threshold - 1;
    if (limit < 0)
        limit = 0;

    init_col(reverse_pattern, plen, nxt);
#if _TRACE
    print_col_as_row(nxt, plen);
#endif
    for (i=end; i>=limit; i--) {
        tmp = prev; prev = nxt; nxt = tmp;
        /* We need to make the initial cost of this column
           reflect the cost of skipping the suffix (up to this point)
           of the text */
        compute_dp_next_col(reverse_pattern, plen, mode, end-i, buf[i], prev, nxt);

        if (nxt[plen] <= threshold) {
            *begin = i;
            bestscore = nxt[plen];
            foundit = 1;
        } else {
            if (foundit)
                goto EXIT;
        }
#if _TRACE
        print_col_as_row(nxt, plen);
#endif
    }
EXIT:
    free(prev);
    free(nxt);
    if (foundit)
        return 1;
    return 0;
}
コード例 #8
0
ファイル: helper_updates.c プロジェクト: yifanwu/cs165db
int update_helper(char *col_name, int *update_pos, uint32_t num_pos, int val)
{
  column_t c;
  init_col(&c, col_name);
  dbg_assert(c.m.type == UNSORTED);

  for (uint32_t u = 0; u < num_pos; u++) {
    fseek(c.fp, sizeof(column_meta_t) + update_pos[u] * sizeof(int), SEEK_SET);
    fwrite(&val, sizeof(int), 1, c.fp);
  }
  // force flush and release space
  fclose(c.fp);
  return 0;
}
コード例 #9
0
ファイル: agrep-dp.c プロジェクト: gconcepcion/sratoolkit
/* Try the longest match first. */
LIB_EXPORT uint32_t CC has_right_approx_match( char *pattern, uint32_t errors, 
                                char *buf, size_t buflen, 
                                uint32_t *bestpos, uint32_t *errcnt )
{
    uint32_t plen = strlen(pattern);
    int32_t *prev = malloc(sizeof(int)*(plen+1));
    int32_t *nxt = malloc(sizeof(int)*(plen+1));
    int32_t *tmp;
    int32_t i, j;
    int32_t allowable;
    char *subpattern, chBackup;
    char *subpattern_r;
    int32_t dist;

    int32_t found = 0;
    int32_t foundpos = 0;
    int32_t founderr = 0;
    int32_t bufj;

    int bound = plen;
    if (buflen < bound) {
        bound = buflen;
    }

    subpattern = malloc(plen + 1);
    subpattern_r = malloc(plen + 1);
    strncpy(subpattern, pattern, plen);

    for (i=bound; i>=8; i--, subpattern[i] = chBackup) {

        /* See if the first i chars of the pattern match the last i
           chars of the text with (errors) errors.
           We match in reverse, so the initial penalty of skipping
           the "first part" of the pattern means skipping the end
        */
        /* making prefix of length i out of pattern
        (subpattern contains full copy of pattern)*/
        chBackup = subpattern[i];
        subpattern[i] = '\0';

        reverse_string(subpattern, i, subpattern_r);
        init_col(subpattern_r, i, nxt);

        for (j=0; j<i; j++) {
            bufj = buflen - j - 1;
            tmp = prev; prev = nxt; nxt = tmp;
            compute_dp_next_col(subpattern_r, i, 0, buf[bufj], j, prev, nxt);
        }
        dist = nxt[i];
        allowable = 1+lroundf((float)i * (float)errors / (float)plen);
        if (found) {
            if (dist <= founderr) {
                foundpos = buflen - i;
                founderr = dist;
            } else {
                goto DONE;
            }
        } else if (dist <= allowable) {
            /* Found a match */
            found = 1;
            foundpos = buflen - i;
            founderr = dist;

            /* Continue a bit to see if something's equally good or better */
        }
    }
    DONE:
    free(subpattern);
    free(subpattern_r);
    free(prev);
    free(nxt);
    if (found) {
        *bestpos = foundpos;
        *errcnt = founderr;
        return 1;
    }
    return 0;
}
コード例 #10
0
ファイル: agrep-dp.c プロジェクト: gconcepcion/sratoolkit
static
void dp_callback_end( const AgrepCallArgs *args )
{
    AgrepFlags mode = args->self->mode;
    char *pattern = args->self->dp->pattern;
    int32_t threshold = args->threshold;
    const char *buf = args->buf;
    int32_t buflen = args->buflen;

    AgrepMatchCallback cb = dp_end_callback;
    const void *cbinfo = args;

    int32_t plen = strlen(pattern);
    int32_t *prev = malloc(sizeof(int32_t)*(plen+1));
    int32_t *nxt = malloc(sizeof(int32_t)*(plen+1));
    int32_t curscore = 0;
    int32_t curlast = 0;
    int32_t continuing = 0;

    int32_t startingcost = 0;
    int32_t limit;

    AgrepMatch match;
    AgrepContinueFlag cont;
    int32_t *tmp;
    int32_t i;

    init_col(pattern, plen, nxt);
#if _TRACE
    print_col_as_row(nxt, plen);
#endif
    
    limit = buflen;
    if (mode & AGREP_ANCHOR_LEFT) {
        limit = args->self->dp->plen + threshold+1;
        if (limit > buflen) {
            limit = buflen;
        }
        cb = args->cb;
        cbinfo = args->cbinfo;
    }

    for (i=0; i<limit; i++) {
        tmp = prev; prev = nxt; nxt = tmp;

        if (mode & AGREP_ANCHOR_LEFT) 
            startingcost = i+1;
        compute_dp_next_col(pattern, plen, mode, startingcost, 
                            buf[i], prev, nxt);
        if (nxt[plen] <= threshold) {

            if (continuing) {
                if (nxt[plen] < curscore &&
                    ((mode & AGREP_EXTEND_BETTER) ||
                     (mode & AGREP_EXTEND_SAME))) {
                    curscore = nxt[plen];
                    curlast = i;
                } else if (nxt[plen] == curscore &&
                           ((mode & AGREP_EXTEND_BETTER) ||
                            (mode & AGREP_EXTEND_SAME))) {
                    if (mode & AGREP_EXTEND_SAME) {
                        curlast = i;
                    }
                } else {
                    continuing = 0;
                    match.score = curscore;
                    if (mode & AGREP_ANCHOR_LEFT) {
                        match.position = 0;
                        match.length = curlast+1;
                    } else {
                        match.position = curlast;
                        match.length = -1;
                    }
                    cont = AGREP_CONTINUE;
                    (*cb)(cbinfo, &match, &cont);
                    if (cont != AGREP_CONTINUE)
                        goto EXIT;
                }
            } else if ((mode & AGREP_EXTEND_SAME) ||
                       (mode & AGREP_EXTEND_BETTER)) {
                curscore = nxt[plen];
                curlast = i;
                continuing = 1;
            } else {
                match.score = nxt[plen];
                if (mode & AGREP_ANCHOR_LEFT) {
                    match.position = 0;
                    match.length = i+1;
                } else {
                    match.position = i;
                    match.length = -1;
                }
                cont = AGREP_CONTINUE;
                (*cb)(cbinfo, &match, &cont);
                if (cont != AGREP_CONTINUE)
                    goto EXIT;
            }
            /* If we're no longer under the threshold, we might
               have been moving forward looking for a better match 
            */
        } else if (continuing) {
            continuing = 0;
            match.score = curscore;
            if (mode & AGREP_ANCHOR_LEFT) {
                match.position = 0;
                match.length = curlast+1;
            } else {
                match.position = curlast;
                match.length = -1;
            }
            cont = AGREP_CONTINUE;
            (*cb)(cbinfo, &match, &cont);
            if (cont != AGREP_CONTINUE)
                goto EXIT;
        }
#if _TRACE
        print_col_as_row(nxt, plen);
#endif
    }
    if (continuing) {
        continuing = 0;
        match.score = curscore;
        if (mode & AGREP_ANCHOR_LEFT) {
            match.position = 0;
            match.length = curlast+1;
        } else {
            match.position = curlast;
            match.length = -1;
        }
        (*cb)(cbinfo, &match, &cont);
    }
EXIT:
    free(prev);
    free(nxt);
}
コード例 #11
0
ファイル: agrep-dp.c プロジェクト: gconcepcion/sratoolkit
static
AgrepContinueFlag dp_callback_begin(const AgrepCallArgs *args, int32_t end, int32_t forwardscore)
{
    AgrepFlags mode = args->self->mode;
    char *reverse_pattern = args->self->dp->rpattern;
    int32_t threshold = args->threshold;
    const char *buf = args->buf;
    AgrepMatchCallback cb = args->cb;
    void *cbinfo = args->cbinfo;

    int32_t plen = strlen(reverse_pattern);
    int32_t *prev = malloc(sizeof(int32_t)*(plen+1));
    int32_t *nxt = malloc(sizeof(int32_t)*(plen+1));
    AgrepMatch match;
    AgrepContinueFlag cont;
    int32_t *tmp;
    int32_t i;

    int32_t curscore = 0;
    int32_t curlast = 0;
    int32_t continuing = 0;

    int32_t limit;

    init_col(reverse_pattern, plen, nxt);
#if _TRACE
    print_col_as_row(nxt, plen);
#endif

    limit = end - args->self->dp->plen - threshold - 1;
    if (limit < 0)
        limit = 0;
    
    for (i=end; i>=limit; i--) {
        tmp = prev; prev = nxt; nxt = tmp;
        /* For the reverse scan, we need to make the initial cost
           of the column depend upon the price of skipping the
           suffix (up to this point) of the text */
        compute_dp_next_col(reverse_pattern, plen, mode, end-i+1,
                            buf[i], prev, nxt);

        if ((mode & AGREP_LEFT_MAINTAIN_SCORE)?
            nxt[plen] <= forwardscore:
            nxt[plen] <= threshold)
        {
            if (continuing) {
                if (nxt[plen] < curscore) {
                    curscore = nxt[plen];
                    curlast = i;
                } else if (nxt[plen] == curscore &&
                           (mode & AGREP_EXTEND_SAME)) {
                    curlast = i;
                } else {
                    continuing = 0;
                    match.score = curscore;
                    match.position = curlast;
                    match.length = end - curlast + 1;
                    cont = AGREP_CONTINUE;
                    (*cb)(cbinfo, &match, &cont);
                    if (cont != AGREP_CONTINUE)
                        goto EXIT;
                }
            } else if ((mode & AGREP_EXTEND_SAME) ||
                       (mode & AGREP_EXTEND_BETTER)) {
                curscore = nxt[plen];
                curlast = i;
                continuing = 1;
            } else {
                match.score = nxt[plen];
                match.position = i;
                match.length = end - i + 1;
                cont = AGREP_CONTINUE;
                (*cb)(cbinfo, &match, &cont);
                if (cont != AGREP_CONTINUE)
                    goto EXIT;
            }
        }
#if _TRACE
        print_col_as_row(nxt, plen);
#endif
    }
    if (continuing) {
        continuing = 0;
        match.score = curscore;
        match.position = curlast;
        match.length = end - curlast + 1;
        cont = AGREP_CONTINUE;
        (*cb)(cbinfo, &match, &cont);
        goto EXIT;
    }
    cont = AGREP_CONTINUE;
EXIT:
    free(prev);
    free(nxt);
    return cont;
}
コード例 #12
0
ファイル: agrep-dp.c プロジェクト: gconcepcion/sratoolkit
LIB_EXPORT void CC dp_scan_for_left_match ( char *pattern, int errors, char *buf, 
    int buflen, int *bestpos, int *ret_hits, int *ret_misses )
{
    int plen = strlen(pattern);
    int *prev = malloc(sizeof(int)*(plen+1));
    int *nxt = malloc(sizeof(int)*(plen+1));
    int *tmp;
    int i;

    int patlen = strlen(pattern);
    float errrate = (float)errors / (float)patlen;
    float play;

    int hits = 0;
    int misses = 0;

    int change;

    int lastscore = plen;

    int cont = 1;

    int lastwasmiss = 0;
    int trailing_misses = 0;

    *bestpos = -1;
#if _TRACE
    printf("Err rate is %f\n", errrate);
#endif

    init_col(pattern, plen, nxt);
#if _TRACE
    print_col_as_row(nxt, plen);
#endif
    for (i=0; i<buflen && cont; i++) {
        tmp = prev; prev = nxt; nxt = tmp;
        compute_dp_next_col(pattern, plen, 0, buf[i], i, prev, nxt);

        change = lastscore - nxt[plen];
        if (change == 1) {
            lastwasmiss = 0;
            trailing_misses = 0;
            hits++;
        } else {
            if (lastwasmiss)
                trailing_misses++;
            else
                trailing_misses = 1;
            misses++;
            lastwasmiss = 1;
        }

        play = errrate * (i+1);

        cont = (misses < (1.0+play));

#if _TRACE
        printf("i %d char %c score %d diff %d continue %d misses %d play %f\n", 
               i, buf[i], nxt[plen], lastscore - nxt[plen], cont, misses, play);
#endif
        lastscore = nxt[plen];

    }
    /* Settle up */
    i--;
#if _TRACE
    printf("Total hits: %d trailing misses: %d  position: %d\n",
           hits, trailing_misses, i);
#endif
    free(prev);
    free(nxt);

    *bestpos = i - trailing_misses;
    /* Not our usual score. */
    *ret_hits = hits;
    *ret_misses = misses;
}
コード例 #13
0
ファイル: unit_test_sorted.c プロジェクト: yifanwu/cs165db
void ut_sorted()
{

  printf("Testing SORTED.\n\n");
  char *name = "test_sorted";
  make_new_column_file(name, SORTED);
  column_t *c = malloc(sizeof(column_t));
  init_col(c, name);

  // let's load!
  insert_value_to_file(c, 0, 5);
  insert_value_to_file(c, 0, 1);
  insert_value_to_file(c, 1, 2);
  insert_value_to_file(c, 2, 3);
  // final: 1 2 3 5

  printf("Data inserted\n");
  int num;
  int count;
  fseek(c->fp, 0, SEEK_SET);
  column_meta_t m;
  fread(&m, sizeof(column_meta_t), 1, c->fp);
  printf("Size written in file is %d\n", m.size);
  assert(m.size == 4);
  count = fread(&num, sizeof(int), 1, c->fp);
  printf("Checking values at 0, we got %d\n", num);
  assert(count == 1);
  assert(num == 1);
  printf("Checking values at 1\n");
  count = fread(&num, sizeof(int), 1, c->fp);
  assert(count == 1);
  assert(num == 2);
  printf("Checking values at 2\n");
  count = fread(&num, sizeof(int), 1, c->fp);
  assert(count == 1);
  assert(num == 3);
  printf("Checking values at 3\n");
  count = fread(&num, sizeof(int), 1, c->fp);
  assert(count == 1);
  assert(num == 5);

  bv_t *bv = create_bv(4);

  printf("Searching values at 1\n");
  mark_matching_bv_for_sorted(c, bv, 1, 1);
  assert(is_marked(bv, 0));
  assert(!is_marked(bv, 1));
  unmark_all_bv(bv);

  printf("Searching values at 2\n");
  mark_matching_bv_for_sorted(c, bv, 2, 2);
  assert(is_marked(bv, 1));
  assert(!is_marked(bv, 0));
  unmark_all_bv(bv);

  printf("Searching values at 3\n");
  mark_matching_bv_for_sorted(c, bv, 3, 3);
  assert(is_marked(bv, 2));
  assert(!is_marked(bv, 1));
  unmark_all_bv(bv);

  printf("Searching values at 5\n");
  mark_matching_bv_for_sorted(c, bv, 5, 5);
  assert(is_marked(bv, 3));
  assert(!is_marked(bv, 1));
  unmark_all_bv(bv);

  printf("Searching values between 1 and 3\n");
  mark_matching_bv_for_sorted(c, bv, 1, 3);
  assert(is_marked(bv, 0));
  assert(is_marked(bv, 1));
  assert(is_marked(bv, 2));
  assert(!is_marked(bv, 3));
  unmark_all_bv(bv);

  return;
}
コード例 #14
0
ファイル: sim4.init.c プロジェクト: 5432935/crossbridge
int
main(int argc, char *argv[])
{
  int count;
  seq_t seq1, seq2;
  hash_env_t he;
  collec_t res, rev_res;
#if defined(DEBUG) && (DEBUG > 1)
  mcheck(NULL);
  mtrace();
#endif
  argv0 = argv[0];
  if (setlocale(LC_ALL, "POSIX") == NULL)
    fprintf(stderr, "%s: Warning: could not set locale to POSIX\n", argv[0]);
  signal(SIGSEGV, bug_handler);
#ifndef __MINGW32__  
  signal(SIGBUS, bug_handler);
#endif  
  /* Default options.  */
  options.C = DEFAULT_C;
  options.cutoff = DIST_CUTOFF;
  options.gapPct = DEFAULT_GAPPCT;
  options.intron_window = 6;
  options.K = DEFAULT_K;
  options.splice_type_list = "GTAG,GCAG,GTAC,ATAC";
  options.nbSplice = 4;
  options.scoreSplice_window = 10;
  options.mismatchScore = MISMATCH;
  options.reverse = 2;
  options.matchScore = MATCH;
  options.W = DEFAULT_W;
  options.X = DEFAULT_X;
  options.filterPct = DEFAULT_FILTER;
  options.minScore_cutoff = MATCH_CUTOFF;
  while (1) {
    int c = getopt(argc, argv, "A:C:c:E:f:g:I:K:L:M:o:q:R:r:W:X:");
    if (c == -1)
      break;
    switch (c) {
    case 'A':
      options.ali_flag = atoi(optarg);
      if (options.ali_flag < 0 || options.ali_flag > 4)
	fatal("A must be one of 0, 1, 2, 3, or 4.\n");
      break;
    case 'C': {
      int val = atoi(optarg);
      if (val < 0)
	fatal("Value for option C must be non-negative.\n");
      options.C = val;
      break;
    }
    case 'c': {
      int val = atoi(optarg);
      if (val < 0)
	fatal("Value for option c must be non-negative.\n");
      options.minScore_cutoff = val;
      break;
    }
    case 'E':
      options.cutoff = atoi(optarg);
      if (options.cutoff < 3 || options.cutoff > 10)
	fatal("Cutoff (E) must be within [3,10].\n");
      break;
    case 'f':
      options.filterPct = atoi(optarg);
      if (options.filterPct > 100)
	fatal("Filter in percent (f) must be within [0,100].\n");
      break;
    case 'g':
      options.gapPct = atoi(optarg);
      break;
    case 'I':
      options.intron_window = atoi(optarg);
      break;
    case 'K': {
      int val = atoi(optarg);
      if (val < 0)
	fatal("Value for option K must be non-negative.\n");
      options.K = val;
      break;
    }
    case 'L': {
      size_t i;
      size_t len = strlen(optarg);
      options.splice_type_list = optarg;
      options.nbSplice = 1;
      if (len % 5 != 4)
	fatal("Splice types list has illegal length (%zu)\n", len);
      for (i = 0; i < len; i++)
	if (i % 5 == 4) {
	  if (options.splice_type_list[i] != ',')
	    fatal("Comma expected instead of %c at position %zu"
		  "in splice types list.\n",
		  options.splice_type_list[i], i);
	  options.nbSplice += 1;
	} else {
	  if (options.splice_type_list[i] != 'A'
	      && options.splice_type_list[i] != 'C'
	      && options.splice_type_list[i] != 'G'
	      && options.splice_type_list[i] != 'T')
	    fatal("Expected 'A', 'C', 'G' or 'T' instead of '%c' at"
		  "position %zu in splice types list.\n",
		  options.splice_type_list[i], i);
	}
      break;
    }
    case 'M': {
      int val = atoi(optarg);
      if (val < 0)
	fatal("Value for option M must be non-negative.\n");
      options.scoreSplice_window = val;
      break;
    }
    case 'o':
      options.dnaOffset = atoi(optarg);
      break;
    case 'q':
      options.mismatchScore = atoi(optarg);
      break;
    case 'R':
      options.reverse = atoi(optarg);
      if (options.reverse < 0 || options.reverse > 2)
	fatal("R must be one of 0, 1, or 2.\n");
      break;
    case 'r':
      options.matchScore = atoi(optarg);
      break;
    case 'W':
      options.W = atoi(optarg);
      if (options.W < 1 || options.W > 15)
	fatal("W must be within [1,15].\n");
      break;
    case 'X':
      options.X = atoi(optarg);
      if (options.X < 1)
	fatal("X must be positive.\n");
      break;
    case '?':
      break;
    default:
      fprintf(stderr, "?? getopt returned character code 0%o ??\n", c);
    }
  }
  if (optind + 2 != argc) {
    fprintf(stderr, Usage, argv[0], options.ali_flag, options.C,
	    options.minScore_cutoff, options.cutoff,
	    options.filterPct, options.gapPct, options.intron_window,
	    options.K, options.splice_type_list, options.scoreSplice_window,
	    options.dnaOffset, options.mismatchScore, options.reverse,
	    options.matchScore, options.W, options.X);
    return 1;
  }

  /* read seq1 */
  init_seq(argv[optind], &seq1);
  if (get_next_seq(&seq1, options.dnaOffset, 1) != 0)
    fatal("Cannot read sequence from %s.\n", argv[optind]);
  strncpy(dna_seq_head, seq1.header, 256);

  /* read seq2 */
  init_seq(argv[optind + 1], &seq2);
  if (get_next_seq(&seq2, 0, 0) != 0)
    fatal("Cannot read sequence from %s.\n", argv[optind + 1]);

  init_encoding();
  init_hash_env(&he, options.W, seq1.seq, seq1.len);
  init_col(&res, 1);
  init_col(&rev_res, 1);
  bld_table(&he);
  init_splice_junctions();

  count = 0;
  while (!count || get_next_seq(&seq2, 0, 0) == 0) {
    unsigned int curRes;
    strncpy(rna_seq_head, seq2.header, 256);
    ++count;

    switch (options.reverse) {
    case  0:
      SIM4(&he, &seq2, &res);
      break;
    case  2:
      SIM4(&he, &seq2, &res);
    case  1:
      seq_revcomp_inplace(&seq2);
      SIM4(&he, &seq2, &rev_res);
      break;
    default:
      fatal ("Unrecognized request for EST orientation.\n");
    }
    /* Keep only the best matches, according to filterPct.  */
    if (options.filterPct > 0) {
      unsigned int max_nmatches = 0;
      for (curRes = 0; curRes < rev_res.nb; curRes++) {
	result_p_t r = rev_res.e.result[curRes];
	if (r->st.nmatches > max_nmatches)
	  max_nmatches = r->st.nmatches;
      }
      for (curRes = 0; curRes < res.nb; curRes++) {
	result_p_t r = res.e.result[curRes];
	if (r->st.nmatches > max_nmatches)
	  max_nmatches = r->st.nmatches;
      }
      max_nmatches = (max_nmatches * options.filterPct) / 100;
      for (curRes = 0; curRes < rev_res.nb; curRes++) {
	result_p_t r = rev_res.e.result[curRes];
	if (r->st.nmatches < max_nmatches)
	  r->st.nmatches = 0;
      }
      for (curRes = 0; curRes < res.nb; curRes++) {
	result_p_t r = res.e.result[curRes];
	if (r->st.nmatches < max_nmatches)
	  r->st.nmatches = 0;
      }
    }
    /* Now, print results.  */
    for (curRes = 0; curRes < rev_res.nb; curRes++)
      print_res(rev_res.e.result[curRes], 1, &seq1, &seq2);
    rev_res.nb = 0;
    if (options.reverse && options.ali_flag)
      /* reverse-complement back seq2 for alignment */
      seq_revcomp_inplace(&seq2);
    for (curRes = 0; curRes < res.nb; curRes++)
      print_res(res.e.result[curRes], 0, &seq1, &seq2);
    res.nb = 0;
  }
#ifdef DEBUG
  fprintf(stderr, "DEBUG mode: freeing all memory...\n");
  fflush(stdout);
  fflush(stderr);
  free_hash_env(&he);
  free_seq(&seq1);
  free_seq(&seq2);
  free(options.splice);
  free(res.e.elt);
  free(rev_res.e.elt);
#endif
  return 0;
}