const grok_capture *grok_capture_get_by_capture_number(grok_t *grok, int capture_number) { int unused_size; const grok_capture *gct; gct = tctreeget(grok->captures_by_capture_number, &capture_number, sizeof(capture_number), &unused_size); return gct; }
const grok_capture *grok_capture_get_by_subname(const grok_t *grok, const char *subname) { int unused_size; const grok_capture *gct; const TCLIST *by_subname_list; by_subname_list = tctreeget(grok->captures_by_subname, subname, strlen(subname), &unused_size); if (by_subname_list == NULL) return NULL; gct = tclistval(by_subname_list, 0, &unused_size); return gct; }
const grok_capture *grok_capture_get_by_name(const grok_t *grok, const char *name) { int unused_size; const grok_capture *gct; const TCLIST *by_name_list; by_name_list = tctreeget(grok->captures_by_name, name, strlen(name), &unused_size); if (by_name_list == NULL) return NULL; /* return the first capture by this name in the list */ gct = tclistval(by_name_list, 0, &unused_size); return gct; }
int grok_pattern_find(const grok_t *grok, const char *name, size_t name_len, const char **regexp, size_t *regexp_len) { TCTREE *patterns = grok->patterns; *regexp = tctreeget(patterns, name, name_len, (int*) regexp_len); grok_log(grok, LOG_PATTERNS, "Searching for pattern '%s' (%s): %.*s", name, *regexp == NULL ? "not found" : "found", *regexp_len, *regexp); if (*regexp == NULL) { grok_log(grok, LOG_PATTERNS, "pattern '%s': not found", name); *regexp = NULL; *regexp_len = 0; return GROK_ERROR_PATTERN_NOT_FOUND; } return GROK_OK; }
const grok_capture *grok_capture_walk_next(const TCTREE_ITER *iter, const grok_t *grok) { int id_size; int gct_size; int *id; const grok_capture *gct; id = (int *)tctreeiternext(iter, &id_size); if (id == NULL) { grok_log(grok, LOG_CAPTURE, "walknext null"); return NULL; } grok_log(grok, LOG_CAPTURE, "walknext ok %d", *id); gct = (grok_capture *)tctreeget(grok->captures_by_id, id, id_size, &gct_size); return gct; }
void main () { uint32_t key = 123; const char *val = "abcdefkrnglrg"; // Make a new tree TCTREE *tree = tctreenew(); // Put an integer key tctreeput(tree, &key, sizeof(key), val, strlen(val)); // Put a different key key = 122; tctreeput(tree, &key, sizeof(key), val, strlen(val)); // Put the same key twice tctreeput(tree, &key, sizeof(key), val, strlen(val)); // Put the same key but keep the old value tctreeputkeep(tree, &key, sizeof(key), val, strlen(val)); // Get back a value int size; void *newVal = tctreeget(tree, &key, sizeof(key), &size); printf("Got value %s\n", newVal); // Create an iterator tctreeiterinit(tree); // Walk the tree tctreeiternext(tree, &size); tctreeiternext(tree, &size); tctreeiternext(tree, &size); // Clear the tree tctreeclear(tree); // Put one value back in the tree to make sure it's freed on delete tctreeput(tree, &key, sizeof(key), val, strlen(val)); // Delete the tree tctreedel(tree); // Make a list TCLIST *list = tclistnew(); // Push a few times tclistpush(list, &key, sizeof(key)); key += 1; tclistpush(list, &key, sizeof(key)); key += 1; tclistpush(list, &key, sizeof(key)); // Overwrite an existing element tclistover(list, 1, &key, sizeof(key)); // Get a value tclistval(list, 2, &size); // Remove some values newVal = tclistremove(list, 2, &size); free(newVal); newVal = tclistremove(list, 0, &size); free(newVal); // Free the whole list tclistdel(list); }
void grok_capture_add(grok_t *grok, const grok_capture *gct, int only_renamed) { grok_log(grok, LOG_CAPTURE, "Adding pattern '%s' as capture %d (pcrenum %d)", gct->name, gct->id, gct->pcre_capture_number); if (only_renamed && strstr(gct->name, ":") == NULL) { return; } /* Primary key is id */ tctreeput(grok->captures_by_id, &(gct->id), sizeof(gct->id), gct, sizeof(grok_capture)); /* Tokyo Cabinet doesn't seem to support 'secondary indexes' like BDB does, * so let's manually update all the other 'captures_by_*' trees */ int unused_size; tctreeput(grok->captures_by_capture_number, &(gct->pcre_capture_number), sizeof(gct->pcre_capture_number), gct, sizeof(grok_capture)); int i, listsize; /* TCTREE doesn't permit dups, so let's make the structure a tree of arrays, * keyed on a string. */ /* captures_by_name */ TCLIST *by_name_list; by_name_list = (TCLIST *) tctreeget(grok->captures_by_name, (const char *)gct->name, gct->name_len, &unused_size); if (by_name_list == NULL) { by_name_list = tclistnew(); } /* delete a capture with the same capture id so we can replace it*/ listsize = tclistnum(by_name_list); for (i = 0; i < listsize; i++) { grok_capture *list_gct; list_gct = (grok_capture *)tclistval(by_name_list, i, &unused_size); if (list_gct->id == gct->id) { tclistremove(by_name_list, i, &unused_size); break; } } tclistpush(by_name_list, gct, sizeof(grok_capture)); tctreeput(grok->captures_by_name, gct->name, gct->name_len, by_name_list, sizeof(TCLIST)); /* end captures_by_name */ /* captures_by_subname */ TCLIST *by_subname_list; by_subname_list = (TCLIST *) tctreeget(grok->captures_by_subname, (const char *)gct->subname, gct->subname_len, &unused_size); if (by_subname_list == NULL) { by_subname_list = tclistnew(); } /* delete a capture with the same capture id so we can replace it*/ listsize = tclistnum(by_subname_list); for (i = 0; i < listsize; i++) { grok_capture *list_gct; list_gct = (grok_capture *)tclistval(by_subname_list, i, &unused_size); if (list_gct->id == gct->id) { tclistremove(by_subname_list, i, &unused_size); break; } } tclistpush(by_subname_list, gct, sizeof(grok_capture)); tctreeput(grok->captures_by_subname, gct->subname, gct->subname_len, by_subname_list, sizeof(TCLIST)); /* end captures_by_subname */ }
const grok_capture *grok_capture_get_by_id(const grok_t *grok, int id) { int unused_size; const grok_capture *gct; gct = tctreeget(grok->captures_by_id, &id, sizeof(id), &unused_size); return gct; }
void grok_discover(const grok_discover_t *gdt, /*grok_t *dest_grok, */ const char *input, char **discovery, int *discovery_len) { /* Find known patterns in the input string */ char *pattern = NULL; int pattern_len = 0; int pattern_size = 0; int replacements = -1; int offset = 0; /* Track what start position we are in the string */ int rounds = 0; /* This uses substr_replace to copy the input string while allocating * the size properly and tracking the length */ substr_replace(&pattern, &pattern_len, &pattern_size, 0, 0, input, -1); while (replacements != 0 || offset < pattern_len) { const void *key; int key_len; int match = 0; grok_match_t gm; grok_match_t best_match; grok_log(gdt, LOG_DISCOVER, "%d: Round starting", rounds); grok_log(gdt, LOG_DISCOVER, "%d: String: %.*s", rounds, pattern_len, pattern); grok_log(gdt, LOG_DISCOVER, "%d: Offset: % *s^", rounds, offset - 1, " "); tctreeiterinit(gdt->complexity_tree); rounds++; replacements = 0; /* This is used for tracking the longest matched pattern */ int max_matchlen = 0; /* This is used for finding the earliest (leftwise in the string) match * end point. If no matches are found, we'll skip to this position in the * string to find more things to match */ int first_match_endpos = -1; char *cursor = pattern + offset; while ((key = tctreeiternext(gdt->complexity_tree, &key_len)) != NULL) { const int *complexity = (const int *)key; int val_len; const grok_t *g = tctreeget(gdt->complexity_tree, key, sizeof(int), &val_len); match = grok_exec(g, cursor, &gm); grok_log(gdt, LOG_DISCOVER, "Test %s against %.*s", (match == GROK_OK ? "succeeded" : "failed"), g->pattern_len, g->pattern); if (match == GROK_OK) { int still_ok; int matchlen = gm.end - gm.start; grok_log(gdt, LOG_DISCOVER, "Matched %.*s", matchlen , cursor + gm.start); if (first_match_endpos == -1 || gm.end < first_match_endpos) { first_match_endpos = gm.end; } still_ok = grok_execn(&global_discovery_req1_grok, cursor + gm.start, matchlen, NULL); if (still_ok != GROK_OK) { grok_log(gdt, LOG_DISCOVER, "%d: Matched %s, but match (%.*s) not complex enough.", rounds, g->pattern, matchlen, cursor + gm.start); continue; } /* We don't want to replace existing patterns like %{FOO} */ if (grok_execn(&global_discovery_req2_grok, cursor + gm.start, matchlen, NULL) == GROK_OK) { grok_log(gdt, LOG_DISCOVER, "%d: Matched %s, but match (%.*s) includes %{...} patterns.", rounds, g->pattern, matchlen, cursor + gm.start); continue; } /* A longer match is a better match. * If match length is equal to max, then still take this match as * better since if true, then this match has a pattern that is less * complex and is therefore a more relevant match */ if (max_matchlen <= matchlen) { grok_log(gdt, LOG_DISCOVER, "%d: New best match: %s", rounds, g->pattern); max_matchlen = matchlen; memcpy(&best_match, &gm, sizeof(grok_match_t)); } else if (max_matchlen == matchlen) { /* Found a match with same length */ grok_log(gdt, LOG_DISCOVER, "%d: Common length match: %s", rounds, g->pattern); } } /* match == GROK_OK */ } /* tctreeiternext(complexity_tree ...) */ if (max_matchlen == 0) { /* No valid matches were found */ if (first_match_endpos > 0) { offset += first_match_endpos; } } else { /* We found a match, replace it in the pattern */ grok_log(gdt, LOG_DISCOVER, "%d: Matched %s on '%.*s'", rounds, best_match.grok->pattern, best_match.end - best_match.start, cursor + best_match.start); replacements = 1; substr_replace(&pattern, &pattern_len, &pattern_size, best_match.start + offset, best_match.end + offset, best_match.grok->pattern, best_match.grok->pattern_len); substr_replace(&pattern, &pattern_len, &pattern_size, best_match.start + offset, best_match.start + offset, "\\E", 2); substr_replace(&pattern, &pattern_len, &pattern_size, best_match.start + best_match.grok->pattern_len + 2 + offset, 0, "\\Q", 2); //usleep(1000000); /* Wrap the new regexp in \E .. \Q, for ending and beginning (respectively) * 'quote literal' as PCRE and Perl support. This prevents literal characters * in the input strings from being interpreted */ grok_log(gdt, LOG_DISCOVER, "%d: Pattern: %.*s", rounds, pattern_len, pattern); } /* if (max_matchlen != 0) */ } /* while (replacements != 0) */ /* Add \Q and \E at beginning and end */ substr_replace(&pattern, &pattern_len, &pattern_size, 0, 0, "\\Q", 2); substr_replace(&pattern, &pattern_len, &pattern_size, pattern_len, pattern_len, "\\E", 2); /* TODO(sissel): Prune any useless \Q\E */ *discovery = pattern; *discovery_len = pattern_len; }