コード例 #1
0
ファイル: re_parse.c プロジェクト: brenns10/libstephen
static int test_TERM_CharSym(void)
{
  Lexer l;
  l.tok = (Token){.sym=0, .c=0};
  l.input.str = "a";
  l.input.wstr = NULL;
  l.index = 0;
  l.nbuf = 0;

  nextsym(&l);
  PTree *tree = TERM(&l);
  expect(Eof, &l);

  TA_PTR_NE(tree, NULL);
  TA_INT_EQ(tree->nt, TERMnt);
  TA_INT_EQ(tree->nchildren, 1);
  TA_INT_EQ(tree->children[0]->tok.sym, CharSym);
  TA_CHAR_EQ(tree->children[0]->tok.c, 'a');

  free_tree(tree);
  return 0;
}

static int test_TERM_Minus(void)
{
  Lexer l;
  l.tok = (Token){.sym=0, .c=0};
  l.input.str = "-";
  l.input.wstr = NULL;
  l.index = 0;
  l.nbuf = 0;

  nextsym(&l);
  PTree *tree = TERM(&l);
  expect(Eof, &l);

  TA_PTR_NE(tree, NULL);
  TA_INT_EQ(tree->nt, TERMnt);
  TA_INT_EQ(tree->nchildren, 1);
  TA_INT_EQ(tree->children[0]->tok.sym, Minus);
  TA_CHAR_EQ(tree->children[0]->tok.c, '-');

  free_tree(tree);
  return 0;
}

static int test_TERM_Caret(void)
{
  Lexer l;
  l.tok = (Token){.sym=0, .c=0};
  l.input.str = "^";
  l.input.wstr = NULL;
  l.index = 0;
  l.nbuf = 0;

  nextsym(&l);
  PTree *tree = TERM(&l);
  expect(Eof, &l);

  TA_PTR_NE(tree, NULL);
  TA_INT_EQ(tree->nt, TERMnt);
  TA_INT_EQ(tree->nchildren, 1);
  TA_INT_EQ(tree->children[0]->tok.sym, Caret);
  TA_CHAR_EQ(tree->children[0]->tok.c, '^');

  free_tree(tree);
  return 0;
}

static int test_TERM_Dot(void)
{
  Lexer l;
  l.tok = (Token){.sym=0, .c=0};
  l.input.str = ".";
  l.input.wstr = NULL;
  l.index = 0;
  l.nbuf = 0;

  nextsym(&l);
  PTree *tree = TERM(&l);
  expect(Eof, &l);

  TA_PTR_NE(tree, NULL);
  TA_INT_EQ(tree->nt, TERMnt);
  TA_INT_EQ(tree->nchildren, 1);
  TA_INT_EQ(tree->children[0]->tok.sym, Dot);
  TA_CHAR_EQ(tree->children[0]->tok.c, '.');

  free_tree(tree);
  return 0;
}

static int test_TERM_Special(void)
{
  Lexer l;
  l.tok = (Token){.sym=0, .c=0};
  l.input.str = "\\w";
  l.input.wstr = NULL;
  l.index = 0;
  l.nbuf = 0;

  nextsym(&l);
  PTree *tree = TERM(&l);
  expect(Eof, &l);

  TA_PTR_NE(tree, NULL);
  TA_INT_EQ(tree->nt, TERMnt);
  TA_INT_EQ(tree->nchildren, 1);
  TA_INT_EQ(tree->children[0]->tok.sym, Special);
  TA_CHAR_EQ(tree->children[0]->tok.c, 'w');

  free_tree(tree);
  return 0;
}

static int test_TERM_Subexpr(void)
{
  Lexer l;
  l.tok = (Token){.sym=0, .c=0};
  l.input.str = "(a+)";
  l.input.wstr = NULL;
  l.index = 0;
  l.nbuf = 0;

  nextsym(&l);
  PTree *tree = TERM(&l);
  expect(Eof, &l);

  TA_PTR_NE(tree, NULL);
  TA_INT_EQ(tree->nt, TERMnt);
  TA_INT_EQ(tree->nchildren, 3);
  TA_INT_EQ(tree->children[1]->nt, REGEXnt);

  free_tree(tree);
  return 0;
}

static int test_TERM_Class(void)
{
  Lexer l;
  l.tok = (Token){.sym=0, .c=0};
  l.input.str = "[abc]";
  l.input.wstr = NULL;
  l.index = 0;
  l.nbuf = 0;

  nextsym(&l);
  PTree *tree = TERM(&l);
  expect(Eof, &l);

  TA_PTR_NE(tree, NULL);
  TA_INT_EQ(tree->nt, TERMnt);
  TA_INT_EQ(tree->nchildren, 3);
  TA_INT_EQ(tree->production, 3);
  TA_INT_EQ(tree->children[1]->nt, CLASSnt);

  free_tree(tree);
  return 0;
}

static int test_TERM_NClass(void)
{
  Lexer l;
  l.tok = (Token){.sym=0, .c=0};
  l.input.str = "[^abc]";
  l.input.wstr = NULL;
  l.index = 0;
  l.nbuf = 0;

  nextsym(&l);
  PTree *tree = TERM(&l);
  expect(Eof, &l);

  TA_PTR_NE(tree, NULL);
  TA_INT_EQ(tree->nt, TERMnt);
  TA_INT_EQ(tree->nchildren, 3);
  TA_INT_EQ(tree->production, 4);
  TA_INT_EQ(tree->children[1]->nt, CLASSnt);

  free_tree(tree);
  return 0;
}

static int test_EXPR_Term(void)
{
  Lexer l;
  l.tok = (Token){.sym=0, .c=0};
  l.input.str = "a";
  l.input.wstr = NULL;
  l.index = 0;
  l.nbuf = 0;

  nextsym(&l);
  PTree *tree = EXPR(&l);
  expect(Eof, &l);

  TA_PTR_NE(tree, NULL);
  TA_INT_EQ(tree->nt, EXPRnt);
  TA_INT_EQ(tree->nchildren, 1);
  TA_INT_EQ(tree->children[0]->nt, TERMnt);
  TA_INT_EQ(tree->children[0]->nchildren, 1);

  free_tree(tree);
  return 0;
}

static int test_EXPR_Plus(void)
{
  Lexer l;
  l.tok = (Token){.sym=0, .c=0};
  l.input.str = "a+";
  l.input.wstr = NULL;
  l.index = 0;
  l.nbuf = 0;

  nextsym(&l);
  PTree *tree = EXPR(&l);
  expect(Eof, &l);

  TA_PTR_NE(tree, NULL);
  TA_INT_EQ(tree->nt, EXPRnt);
  TA_INT_EQ(tree->nchildren, 2);
  TA_INT_EQ(tree->children[0]->nt, TERMnt);
  TA_INT_EQ(tree->children[0]->nchildren, 1);
  TEST_ASSERT(tree->children[1]->tok.sym == Plus);

  free_tree(tree);
  return 0;
}

static int test_EXPR_PlusQuestion(void)
{
  Lexer l;
  l.tok = (Token){.sym=0, .c=0};
  l.input.str = "a+?";
  l.input.wstr = NULL;
  l.index = 0;
  l.nbuf = 0;

  nextsym(&l);
  PTree *tree = EXPR(&l);
  expect(Eof, &l);

  TA_PTR_NE(tree, NULL);
  TA_INT_EQ(tree->nt, EXPRnt);
  TA_INT_EQ(tree->nchildren, 3);
  TA_INT_EQ(tree->children[0]->nt, TERMnt);
  TA_INT_EQ(tree->children[0]->nchildren, 1);
  TEST_ASSERT(tree->children[1]->tok.sym == Plus);
  TEST_ASSERT(tree->children[2]->tok.sym == Question);

  free_tree(tree);
  return 0;
}

static int test_EXPR_Star(void)
{
  Lexer l;
  l.tok = (Token){.sym=0, .c=0};
  l.input.str = "a*";
  l.input.wstr = NULL;
  l.index = 0;
  l.nbuf = 0;

  nextsym(&l);
  PTree *tree = EXPR(&l);
  expect(Eof, &l);

  TA_PTR_NE(tree, NULL);
  TA_INT_EQ(tree->nt, EXPRnt);
  TA_INT_EQ(tree->nchildren, 2);
  TA_INT_EQ(tree->children[0]->nt, TERMnt);
  TA_INT_EQ(tree->children[0]->nchildren, 1);
  TEST_ASSERT(tree->children[1]->tok.sym == Star);

  free_tree(tree);
  return 0;
}

static int test_EXPR_StarQuestion(void)
{
  Lexer l;
  l.tok = (Token){.sym=0, .c=0};
  l.input.str = "a*?";
  l.input.wstr = NULL;
  l.index = 0;
  l.nbuf = 0;

  nextsym(&l);
  PTree *tree = EXPR(&l);
  expect(Eof, &l);

  TA_PTR_NE(tree, NULL);
  TA_INT_EQ(tree->nt, EXPRnt);
  TA_INT_EQ(tree->nchildren, 3);
  TA_INT_EQ(tree->children[0]->nt, TERMnt);
  TA_INT_EQ(tree->children[0]->nchildren, 1);
  TEST_ASSERT(tree->children[1]->tok.sym == Star);
  TEST_ASSERT(tree->children[2]->tok.sym == Question);

  free_tree(tree);
  return 0;
}

static int test_EXPR_Question(void)
{
  Lexer l;
  l.tok = (Token){.sym=0, .c=0};
  l.input.str = "a?";
  l.input.wstr = NULL;
  l.index = 0;
  l.nbuf = 0;

  nextsym(&l);
  PTree *tree = EXPR(&l);
  expect(Eof, &l);

  TA_PTR_NE(tree, NULL);
  TA_INT_EQ(tree->nt, EXPRnt);
  TA_INT_EQ(tree->nchildren, 2);
  TA_INT_EQ(tree->children[0]->nt, TERMnt);
  TA_INT_EQ(tree->children[0]->nchildren, 1);
  TEST_ASSERT(tree->children[1]->tok.sym == Question);

  free_tree(tree);
  return 0;
}

static int test_EXPR_QuestionQuestion(void)
{
  Lexer l;
  l.tok = (Token){.sym=0, .c=0};
  l.input.str = "a??";
  l.input.wstr = NULL;
  l.index = 0;
  l.nbuf = 0;

  nextsym(&l);
  PTree *tree = EXPR(&l);
  expect(Eof, &l);

  TA_PTR_NE(tree, NULL);
  TA_INT_EQ(tree->nt, EXPRnt);
  TA_INT_EQ(tree->nchildren, 3);
  TA_INT_EQ(tree->children[0]->nt, TERMnt);
  TA_INT_EQ(tree->children[0]->nchildren, 1);
  TEST_ASSERT(tree->children[1]->tok.sym == Question);
  TEST_ASSERT(tree->children[2]->tok.sym == Question);

  free_tree(tree);
  return 0;
}

static int test_SUB_Normal(void)
{
  Lexer l;
  l.tok = (Token){.sym=0, .c=0};
  l.input.str = "a";
  l.input.wstr = NULL;
  l.index = 0;
  l.nbuf = 0;

  nextsym(&l);
  PTree *tree = SUB(&l);
  expect(Eof, &l);

  TA_PTR_NE(tree, NULL);
  TA_INT_EQ(tree->nt, SUBnt);
  TA_INT_EQ(tree->nchildren, 1);
  TA_INT_EQ(tree->children[0]->nt, EXPRnt);
  TA_INT_EQ(tree->children[0]->nchildren, 1);
  free_tree(tree);
  return 0;
}

static int test_SUB_Concat(void)
{
  Lexer l;
  l.tok = (Token){.sym=0, .c=0};
  l.input.str = "ab";
  l.input.wstr = NULL;
  l.index = 0;
  l.nbuf = 0;

  nextsym(&l);
  PTree *tree = SUB(&l);
  expect(Eof, &l);

  TA_PTR_NE(tree, NULL);
  TA_INT_EQ(tree->nt, SUBnt);
  TA_INT_EQ(tree->nchildren, 2);
  TA_INT_EQ(tree->children[0]->nt, EXPRnt);
  TA_INT_EQ(tree->children[0]->nchildren, 1);
  TA_INT_EQ(tree->children[1]->nt, SUBnt);
  TA_INT_EQ(tree->children[1]->nchildren, 1);
  TA_INT_EQ(tree->children[1]->children[0]->nt, EXPRnt);

  free_tree(tree);
  return 0;
}

static int test_REGEX_Normal(void)
{
  Lexer l;
  l.tok = (Token){.sym=0, .c=0};
  l.input.str = "a";
  l.input.wstr = NULL;
  l.index = 0;
  l.nbuf = 0;

  nextsym(&l);
  PTree *tree = REGEX(&l);
  expect(Eof, &l);

  TA_PTR_NE(tree, NULL);
  TA_INT_EQ(tree->nt, REGEXnt);
  TA_INT_EQ(tree->nchildren, 1);
  TA_INT_EQ(tree->children[0]->nt, SUBnt);
  TA_INT_EQ(tree->children[0]->nchildren, 1);

  free_tree(tree);
  return 0;
}

static int test_REGEX_Alternate(void)
{
  Lexer l;
  l.tok = (Token){.sym=0, .c=0};
  l.input.str = "a|b";
  l.input.wstr = NULL;
  l.index = 0;
  l.nbuf = 0;

  nextsym(&l);
  PTree *tree = REGEX(&l);
  expect(Eof, &l);

  TA_PTR_NE(tree, NULL);
  TA_INT_EQ(tree->nt, REGEXnt);
  TA_INT_EQ(tree->nchildren, 3);
  TA_INT_EQ(tree->children[0]->nt, SUBnt);
  TA_INT_EQ(tree->children[0]->nchildren, 1);
  TA_INT_EQ(tree->children[1]->tok.sym, Pipe);
  TA_INT_EQ(tree->children[2]->nt, REGEXnt);
  TA_INT_EQ(tree->children[2]->nchildren, 1);
  TA_INT_EQ(tree->children[2]->children[0]->nt, SUBnt);

  free_tree(tree);
  return 0;
}

static int test_CLASS_range(void)
{
  Lexer l;
  l.tok = (Token){.sym=0, .c=0};
  l.input.str = "a-b";
  l.input.wstr = NULL;
  l.index = 0;
  l.nbuf = 0;

  nextsym(&l);
  PTree *tree = CLASS(&l);
  expect(Eof, &l);

  TA_PTR_NE(tree, NULL);
  TA_INT_EQ(tree->nt, CLASSnt);
  TA_INT_EQ(tree->nchildren, 2);
  TA_INT_EQ(tree->children[0]->tok.sym, CharSym);
  TA_CHAR_EQ(tree->children[0]->tok.c, 'a');
  TA_INT_EQ(tree->children[1]->tok.sym, CharSym);
  TA_CHAR_EQ(tree->children[1]->tok.c, 'b');

  free_tree(tree);
  return 0;
}

static int test_CLASS_range_range(void)
{
  Lexer l;
  l.tok = (Token){.sym=0, .c=0};
  l.input.str = "a-b1-2";
  l.input.wstr = NULL;
  l.index = 0;
  l.nbuf = 0;

  nextsym(&l);
  PTree *tree = CLASS(&l);
  expect(Eof, &l);

  TA_PTR_NE(tree, NULL);
  TA_INT_EQ(tree->nt, CLASSnt);
  TA_INT_EQ(tree->nchildren, 3);
  TA_INT_EQ(tree->children[0]->tok.sym, CharSym);
  TA_CHAR_EQ(tree->children[0]->tok.c, 'a');
  TA_INT_EQ(tree->children[1]->tok.sym, CharSym);
  TA_CHAR_EQ(tree->children[1]->tok.c, 'b');
  TA_INT_EQ(tree->children[2]->nt, CLASSnt);
  TA_INT_EQ(tree->children[2]->nchildren, 2);
  TA_INT_EQ(tree->children[2]->children[0]->tok.sym, CharSym);
  TA_CHAR_EQ(tree->children[2]->children[0]->tok.c, '1');
  TA_INT_EQ(tree->children[2]->children[1]->tok.sym, CharSym);
  TA_CHAR_EQ(tree->children[2]->children[1]->tok.c, '2');

  free_tree(tree);
  return 0;
}

static int test_CLASS_single(void)
{
  char *accept[] = {".", "+", "*", "?", "(", ")", "|"};
  for (size_t i = 0; i < nelem(accept); i++) {
    Lexer l;
    l.tok = (Token){.sym=0, .c=0};
    l.input.str = accept[i];
    l.input.wstr = NULL;
    l.index = 0;
    l.nbuf = 0;

    nextsym(&l);
    PTree *tree = CLASS(&l);
    expect(Eof, &l);

    TA_PTR_NE(tree, NULL);
    TA_INT_EQ(tree->nt, CLASSnt);
    TA_INT_EQ(tree->nchildren, 1);
    TA_INT_EQ(tree->children[0]->tok.sym, CharSym);
    TA_INT_EQ(tree->children[0]->tok.c, accept[i][0]);

    free_tree(tree);
  }
  return 0;
}

static int test_CLASS_single_hyphen(void)
{
  Lexer l;
  l.tok = (Token){.sym=0, .c=0};
  l.input.str = "a-";
  l.input.wstr = NULL;
  l.index = 0;
  l.nbuf = 0;

  nextsym(&l);
  PTree *tree = CLASS(&l);
  expect(Eof, &l);

  TA_PTR_NE(tree, NULL);
  TA_INT_EQ(tree->nt, CLASSnt);
  TA_INT_EQ(tree->nchildren, 2);
  TA_INT_EQ(tree->children[0]->tok.sym, CharSym);
  TA_CHAR_EQ(tree->children[0]->tok.c, 'a');
  TA_INT_EQ(tree->children[1]->nt, CLASSnt);
  TA_INT_EQ(tree->children[1]->nchildren, 1);
  TA_INT_EQ(tree->children[1]->children[0]->tok.sym, Minus);

  free_tree(tree);
  return 0;
}

static int test_reparse(void)
{
  PTree *tree = reparse("a+|b*");

  TA_PTR_NE(tree, NULL);
  TA_INT_EQ(tree->nt, REGEXnt);
  TA_INT_EQ(tree->nchildren, 3);
  TA_INT_EQ(tree->children[0]->nt, SUBnt);
  TA_INT_EQ(tree->children[0]->nchildren, 1);
  TA_INT_EQ(tree->children[0]->children[0]->nt, EXPRnt);
  TA_INT_EQ(tree->children[0]->children[0]->nchildren, 2);
  TA_INT_EQ(tree->children[0]->children[0]->children[0]->nt, TERMnt);
  TA_INT_EQ(tree->children[0]->children[0]->children[0]->nchildren, 1);
  TA_INT_EQ(tree->children[0]->children[0]->children[0]->children[0]->tok.sym, CharSym);
  TA_CHAR_EQ(tree->children[0]->children[0]->children[0]->children[0]->tok.c, 'a');
  TA_INT_EQ(tree->children[0]->children[0]->children[1]->tok.sym, Plus);
  TA_INT_EQ(tree->children[1]->tok.sym, Pipe);
  TA_INT_EQ(tree->children[2]->nt, REGEXnt);
  TA_INT_EQ(tree->children[2]->nchildren, 1);
  TA_INT_EQ(tree->children[2]->children[0]->nt, SUBnt);
  TA_INT_EQ(tree->children[2]->children[0]->nchildren, 1);
  TA_INT_EQ(tree->children[2]->children[0]->children[0]->nt, EXPRnt);
  TA_INT_EQ(tree->children[2]->children[0]->children[0]->nchildren, 2);
  TA_INT_EQ(tree->children[2]->children[0]->children[0]->children[0]->nt, TERMnt);
  TA_INT_EQ(tree->children[2]->children[0]->children[0]->children[0]->nchildren, 1);
  TA_INT_EQ(tree->children[2]->children[0]->children[0]->children[0]->children[0]->tok.sym, CharSym);
  TA_CHAR_EQ(tree->children[2]->children[0]->children[0]->children[0]->children[0]->tok.c, 'b');
  TA_INT_EQ(tree->children[2]->children[0]->children[0]->children[1]->tok.sym, Star);

  free_tree(tree);
  return 0;
}
コード例 #2
0
int edview_search_tag_anno(edview *xx, int dir, int strand, char *value) {
    contig_iterator *iter;
    int start, end;
    rangec_t *r;
    rangec_t *(*ifunc)(GapIO *io, contig_iterator *ci);
    char *r_exp = NULL;
    contig_t *c = cache_search(xx->io, GT_Contig, xx->cnum);

    if (value) {
	if (NULL == (r_exp = REGCMP(xx->interp, value))) {
	    verror(ERR_WARN, "Search by anno", "invalid regular expression");
	    return -1;
	}
    }

    if (dir) {
	start = xx->cursor_apos + (dir ? 1 : -1);
	end   = c->end;
	ifunc = contig_iter_next;
    } else {
	start = c->start;
	end   = xx->cursor_apos + (dir ? 1 : -1);
	ifunc = contig_iter_prev;
    }

    iter = contig_iter_new_by_type(xx->io, xx->cnum, 1,
				   dir == 1 ? CITER_FIRST : CITER_LAST,
				   start, end, GRANGE_FLAG_ISANNO);
    if (!iter)
	return -1;

    while (r = ifunc(xx->io, iter)) {
	anno_ele_t *ae;

	if ((dir  && r->start < start) ||
	    (!dir && r->start > end))
	    continue;

	if (!r_exp)
	    break; /* blank expr => match all */

	ae = cache_search(xx->io, GT_AnnoEle, r->rec);
	if (!ae->comment)
	    continue;

	if (REGEX(xx->interp, ae->comment, r_exp))
	    break;
    }

    REGFREE(xx->interp, r_exp);

    if (r) {
	if (r->flags & GRANGE_FLAG_TAG_SEQ) {
	    int pos;
	    sequence_get_position(xx->io, r->pair_rec, NULL, &pos, NULL, NULL);
	    pos = r->start - pos;
	    edSetCursorPos(xx, GT_Seq, r->pair_rec, pos, 1);
	} else {
	    edSetCursorPos(xx, GT_Contig, xx->cnum, r->start, 1);
	}
	contig_iter_del(iter);
	return 0;
    }

    contig_iter_del(iter);
    return -1;
}