/** * Test if the given regex's canonical regex is the same as this canonical * regex's canonical regex. Confused? Ok, then: 1. construct a dfa A from the * given 'regex' 2. get the canonical regex of dfa A 3. construct a dfa B from * this canonical regex 3. compare the canonical regex of dfa A with the * canonical regex of dfa B. * * @param regex regular expression used for this test (see above). * * @return 0 on success, 1 on failure */ static unsigned int test_proof (const char *regex) { unsigned int error; struct REGEX_INTERNAL_Automaton *dfa; char *c_rx1; const char *c_rx2; dfa = REGEX_INTERNAL_construct_dfa (regex, strlen (regex), 1); GNUNET_assert (NULL != dfa); c_rx1 = GNUNET_strdup (REGEX_INTERNAL_get_canonical_regex (dfa)); REGEX_INTERNAL_automaton_destroy (dfa); dfa = REGEX_INTERNAL_construct_dfa (c_rx1, strlen (c_rx1), 1); GNUNET_assert (NULL != dfa); c_rx2 = REGEX_INTERNAL_get_canonical_regex (dfa); error = (0 == strcmp (c_rx1, c_rx2)) ? 0 : 1; if (error > 0) { GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Comparing canonical regex of\n%s\nfailed:\n%s\nvs.\n%s\n", regex, c_rx1, c_rx2); } GNUNET_free (c_rx1); REGEX_INTERNAL_automaton_destroy (dfa); return error; }
/** * Test a number of known examples of regexes for proper canonicalization. * * @return 0 on success, number of failures otherwise. */ static unsigned int test_proofs_static () { unsigned int i; unsigned int error; const char *regex[8] = { "a|aa*a", "a+", "a*", "a*a*", "(F*C|WfPf|y+F*C)", "y*F*C|WfPf", "((a|b)c|(a|b)(d|(a|b)e))", "((a|b)(c|d)|(a|b)(a|b)e)" }; const char *canon_rx1; const char *canon_rx2; struct REGEX_INTERNAL_Automaton *dfa1; struct REGEX_INTERNAL_Automaton *dfa2; error = 0; for (i = 0; i < 8; i += 2) { dfa1 = REGEX_INTERNAL_construct_dfa (regex[i], strlen (regex[i]), 1); dfa2 = REGEX_INTERNAL_construct_dfa (regex[i + 1], strlen (regex[i + 1]), 1); GNUNET_assert (NULL != dfa1); GNUNET_assert (NULL != dfa2); canon_rx1 = REGEX_INTERNAL_get_canonical_regex (dfa1); canon_rx2 = REGEX_INTERNAL_get_canonical_regex (dfa2); error += (0 == strcmp (canon_rx1, canon_rx2)) ? 0 : 1; if (error > 0) { GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Comparing canonical regex failed:\nrx1:\t%s\ncrx1:\t%s\nrx2:\t%s\ncrx2:\t%s\n", regex[i], canon_rx1, regex[i + 1], canon_rx2); } REGEX_INTERNAL_automaton_destroy (dfa1); REGEX_INTERNAL_automaton_destroy (dfa2); } return error; }
/** * The main function of the regex performace test. * * Read a set of regex from a file, combine them and create a DFA from the * resulting combined regex. * * @param argc number of arguments from the command line * @param argv command line arguments * @return 0 ok, 1 on error */ int main (int argc, char *const *argv) { struct REGEX_INTERNAL_Automaton* dfa; char **regexes; char *buffer; char *regex; int compression; long size; GNUNET_log_setup ("perf-regex", "DEBUG", NULL); if (3 != argc) { fprintf (stderr, "Usage: %s REGEX_FILE COMPRESSION\n", argv[0]); return 1; } regexes = REGEX_TEST_read_from_file (argv[1]); if (NULL == regexes) { fprintf (stderr, "Failed to read regexes from `%s'\n", argv[1]); return 2; } compression = atoi (argv[2]); buffer = REGEX_TEST_combine (regexes); GNUNET_asprintf (®ex, "GNUNET_REGEX_PROFILER_(%s)(0|1)*", buffer); size = strlen (regex); fprintf (stderr, "Combined regex (%ld bytes):\n%s\n", size, regex); dfa = REGEX_INTERNAL_construct_dfa (regex, size, compression); printf ("********* ALL EDGES *********'\n"); REGEX_INTERNAL_iterate_all_edges (dfa, &print_edge, NULL); printf ("\n\n********* REACHABLE EDGES *********'\n"); REGEX_INTERNAL_iterate_reachable_edges (dfa, &print_edge, NULL); REGEX_INTERNAL_automaton_destroy (dfa); GNUNET_free (buffer); REGEX_TEST_free_from_file (regexes); GNUNET_free (regex); return 0; }
/** * Announce a regular expression: put all states of the automaton in the DHT. * Does not free resources, must call #REGEX_INTERNAL_announce_cancel() for that. * * @param dht An existing and valid DHT service handle. CANNOT be NULL. * @param priv our private key, must remain valid until the announcement is cancelled * @param regex Regular expression to announce. * @param compression How many characters per edge can we squeeze? * @param stats Optional statistics handle to report usage. Can be NULL. * @return Handle to reuse o free cached resources. * Must be freed by calling #REGEX_INTERNAL_announce_cancel(). */ struct REGEX_INTERNAL_Announcement * REGEX_INTERNAL_announce (struct GNUNET_DHT_Handle *dht, const struct GNUNET_CRYPTO_EddsaPrivateKey *priv, const char *regex, uint16_t compression, struct GNUNET_STATISTICS_Handle *stats) { struct REGEX_INTERNAL_Announcement *h; GNUNET_assert (NULL != dht); h = GNUNET_new (struct REGEX_INTERNAL_Announcement); h->regex = regex; h->dht = dht; h->stats = stats; h->priv = priv; h->dfa = REGEX_INTERNAL_construct_dfa (regex, strlen (regex), compression); REGEX_INTERNAL_reannounce (h); return h; }
int main (int argc, char *argv[]) { GNUNET_log_setup ("test-regex", "WARNING", NULL); int error; struct REGEX_INTERNAL_Automaton *dfa; unsigned int i; unsigned int num_transitions; char *filename = NULL; struct IteratorContext ctx = { 0, 0, NULL, 0, NULL, 0 }; error = 0; const struct RegexStringPair rxstr[13] = { {INITIAL_PADDING "ab(c|d)+c*(a(b|c)+d)+(bla)+", 2, {INITIAL_PADDING "abcdcdca", INITIAL_PADDING "abcabdbl"}}, {INITIAL_PADDING "abcdefghixxxxxxxxxxxxxjklmnop*qstoisdjfguisdfguihsdfgbdsuivggsd", 1, {INITIAL_PADDING "abcdefgh"}}, {INITIAL_PADDING "VPN-4-1(0|1)*", 2, {INITIAL_PADDING "VPN-4-10", INITIAL_PADDING "VPN-4-11"}}, {INITIAL_PADDING "(a+X*y+c|p|R|Z*K*y*R+w|Y*6+n+h*k*w+V*F|W*B*e*)", 2, {INITIAL_PADDING "aaaaaaaa", INITIAL_PADDING "aaXXyyyc"}}, {INITIAL_PADDING "a*", 1, {INITIAL_PADDING "aaaaaaaa"}}, {INITIAL_PADDING "xzxzxzxzxz", 1, {INITIAL_PADDING "xzxzxzxz"}}, {INITIAL_PADDING "xyz*", 1, {INITIAL_PADDING "xyzzzzzz"}}, {INITIAL_PADDING "abcd:(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1):(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)", 2, {INITIAL_PADDING "abcd:000", INITIAL_PADDING "abcd:101"}}, {INITIAL_PADDING "(x*|(0|1|2)(a|b|c|d)+)", 2, {INITIAL_PADDING "xxxxxxxx", INITIAL_PADDING "0abcdbad"}}, {INITIAL_PADDING "(0|1)(0|1)23456789ABC", 1, {INITIAL_PADDING "11234567"}}, {INITIAL_PADDING "0*123456789ABC*", 3, {INITIAL_PADDING "00123456", INITIAL_PADDING "00000000", INITIAL_PADDING "12345678"}}, {INITIAL_PADDING "0123456789A*BC", 1, {INITIAL_PADDING "01234567"}}, {"GNUNETVPN000100000IPEX6-fc5a:4e1:c2ba::1", 1, {"GNUNETVPN000100000IPEX6-"}} }; const char *graph_start_str = "digraph G {\nrankdir=LR\n"; const char *graph_end_str = "\n}\n"; for (i = 0; i < 13; i++) { GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating DFA for regex %s\n", rxstr[i].regex); /* Create graph */ if (GNUNET_YES == REGEX_INTERNAL_ITERATE_SAVE_DEBUG_GRAPH) { GNUNET_asprintf (&filename, "iteration_graph_%u.dot", i); ctx.graph_filep = fopen (filename, "w"); if (NULL == ctx.graph_filep) { GNUNET_log (GNUNET_ERROR_TYPE_WARNING, "Could not open file %s for saving iteration graph.\n", filename); ctx.should_save_graph = GNUNET_NO; } else { ctx.should_save_graph = GNUNET_YES; fwrite (graph_start_str, strlen (graph_start_str), 1, ctx.graph_filep); } GNUNET_free (filename); } else { ctx.should_save_graph = GNUNET_NO; ctx.graph_filep = NULL; } /* Iterate over DFA edges */ transition_counter = 0; ctx.string_count = rxstr[i].string_count; ctx.strings = rxstr[i].strings; ctx.match_count = 0; dfa = REGEX_INTERNAL_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex), 0); REGEX_INTERNAL_iterate_all_edges (dfa, key_iterator, &ctx); num_transitions = REGEX_INTERNAL_get_transition_count (dfa) - dfa->start->transition_count; if (transition_counter < num_transitions) { GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Automaton has %d transitions, iterated over %d transitions\n", num_transitions, transition_counter); error += 1; } if (ctx.match_count < ctx.string_count) { GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Missing initial states for regex %s\n", rxstr[i].regex); error += (ctx.string_count - ctx.match_count); } else if (ctx.match_count > ctx.string_count) { GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Duplicate initial transitions for regex %s\n", rxstr[i].regex); error += (ctx.string_count - ctx.match_count); } REGEX_INTERNAL_automaton_destroy (dfa); /* Finish graph */ if (GNUNET_YES == ctx.should_save_graph) { fwrite (graph_end_str, strlen (graph_end_str), 1, ctx.graph_filep); fclose (ctx.graph_filep); ctx.graph_filep = NULL; ctx.should_save_graph = GNUNET_NO; } } for (i = 0; i < 13; i++) { ctx.string_count = rxstr[i].string_count; ctx.strings = rxstr[i].strings; ctx.match_count = 0; dfa = REGEX_INTERNAL_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex), 0); REGEX_INTERNAL_dfa_add_multi_strides (NULL, dfa, 2); REGEX_INTERNAL_iterate_all_edges (dfa, key_iterator, &ctx); if (ctx.match_count < ctx.string_count) { GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Missing initial states for regex %s\n", rxstr[i].regex); error += (ctx.string_count - ctx.match_count); } REGEX_INTERNAL_automaton_destroy (dfa); } error += ctx.error; return error; }
int main (int argc, char *argv[]) { int error; struct REGEX_INTERNAL_Automaton *a; unsigned int i; const char *filename = "test_graph.dot"; const char *regex[12] = { "ab(c|d)+c*(a(b|c)+d)+(bla)+", "(bla)*", "b(lab)*la", "(ab)*", "ab(c|d)+c*(a(b|c)+d)+(bla)(bla)*", "z(abc|def)?xyz", "1*0(0|1)*", "a*b*", "a+X*y+c|p|R|Z*K*y*R+w|Y*6+n+h*k*w+V*F|W*B*e*", "a", "a|b", "PADPADPADPADPADPabcdefghixxxxxxxxxxxxxjklmnop*qstoisdjfguisdfguihsdfgbdsuivggsd" }; GNUNET_log_setup ("test-regex", "WARNING", NULL); error = 0; for (i = 0; i < 12; i++) { /* Check NFA graph creation */ a = REGEX_INTERNAL_construct_nfa (regex[i], strlen (regex[i])); REGEX_TEST_automaton_save_graph (a, filename, REGEX_TEST_GRAPH_DEFAULT); REGEX_INTERNAL_automaton_destroy (a); error += filecheck (filename); a = REGEX_INTERNAL_construct_nfa (regex[i], strlen (regex[i])); REGEX_TEST_automaton_save_graph (a, filename, REGEX_TEST_GRAPH_DEFAULT | REGEX_TEST_GRAPH_VERBOSE); REGEX_INTERNAL_automaton_destroy (a); error += filecheck (filename); a = REGEX_INTERNAL_construct_nfa (regex[i], strlen (regex[i])); REGEX_TEST_automaton_save_graph (a, filename, REGEX_TEST_GRAPH_DEFAULT | REGEX_TEST_GRAPH_COLORING); REGEX_INTERNAL_automaton_destroy (a); error += filecheck (filename); a = REGEX_INTERNAL_construct_nfa (regex[i], strlen (regex[i])); REGEX_TEST_automaton_save_graph (a, filename, REGEX_TEST_GRAPH_DEFAULT | REGEX_TEST_GRAPH_VERBOSE | REGEX_TEST_GRAPH_COLORING); REGEX_INTERNAL_automaton_destroy (a); error += filecheck (filename); /* Check DFA graph creation */ a = REGEX_INTERNAL_construct_dfa (regex[i], strlen (regex[i]), 0); REGEX_TEST_automaton_save_graph (a, filename, REGEX_TEST_GRAPH_DEFAULT); REGEX_INTERNAL_automaton_destroy (a); error += filecheck (filename); a = REGEX_INTERNAL_construct_dfa (regex[i], strlen (regex[i]), 0); REGEX_TEST_automaton_save_graph (a, filename, REGEX_TEST_GRAPH_DEFAULT | REGEX_TEST_GRAPH_VERBOSE); REGEX_INTERNAL_automaton_destroy (a); error += filecheck (filename); a = REGEX_INTERNAL_construct_dfa (regex[i], strlen (regex[i]), 0); REGEX_TEST_automaton_save_graph (a, filename, REGEX_TEST_GRAPH_DEFAULT | REGEX_TEST_GRAPH_COLORING); REGEX_INTERNAL_automaton_destroy (a); error += filecheck (filename); a = REGEX_INTERNAL_construct_dfa (regex[i], strlen (regex[i]), 4); REGEX_TEST_automaton_save_graph (a, filename, REGEX_TEST_GRAPH_DEFAULT); REGEX_INTERNAL_automaton_destroy (a); error += filecheck (filename); } return error; }