VALUE rGrok_match(VALUE self, VALUE input) { grok_t *grok = NULL; grok_match_t gm; char *c_input = NULL; long len = 0; int ret = 0; VALUE match = Qnil; Data_Get_Struct(self, grok_t, grok); c_input = rb_str2cstr(input, &len); ret = grok_execn(grok, c_input, (int)len, &gm); VALUE rgm = Qnil; //fprintf(stderr, "%d\n", ret); switch (ret) { case GROK_ERROR_NOMATCH: rgm = Qfalse; break; case GROK_OK: rgm = rGrokMatch_new_from_grok_match(&gm); break; default: rb_raise(rb_eArgError, "Error from grok_execn: %d", ret); rgm = Qnil; } return rgm; }
char *grok_matchconfig_filter_reaction(const char *str, grok_match_t *gm) { char *output; int len; int size; grok_match_t tmp_gm; int offset = 0; if (gm == NULL) { return NULL; } len = strlen(str); size = len + 1; output = malloc(size); memcpy(output, str, size); grok_log(gm->grok, LOG_REACTION, "Checking '%.*s'", len - offset, output + offset); global_matchconfig_grok.logmask = gm->grok->logmask; global_matchconfig_grok.logdepth = gm->grok->logdepth + 1; while (grok_execn(&global_matchconfig_grok, output + offset, len - offset, &tmp_gm) == GROK_OK) { grok_log(gm->grok, LOG_REACTION, "Checking '%.*s'", len - offset, output + offset); const char *name = NULL; const char *filter = NULL; char *value = NULL; char *name_copy; int name_len, value_len, filter_len; int ret = -1; int free_value = 0; const struct strmacro *patmacro; grok_match_get_named_substring(&tmp_gm, "NAME", &name, &name_len); grok_match_get_named_substring(&tmp_gm, "FILTER", &filter, &filter_len); grok_log(gm->grok, LOG_REACTION, "Matched something: %.*s", name_len, name); /* XXX: We should really make a dispatch table out of this... */ /* _macro_dispatch_func(char **value, int *value_len) ... */ /* Let gperf do the hard work for us. */ patmacro = patname2macro(name, name_len); grok_log(gm->grok, LOG_REACTION, "Checking lookup table for '%.*s': %x", name_len, name, patmacro); if (patmacro != NULL) { free_value = 1; /* We malloc stuff to 'value' here */ switch (patmacro->code) { case VALUE_LINE: value = strdup(gm->subject); value_len = strlen(value); ret = 0; break; case VALUE_START: value_len = asprintf(&value, "%d", gm->start); ret = 0; break; case VALUE_END: value_len = asprintf(&value, "%d", gm->end); ret = 0; break; case VALUE_LENGTH: value_len = asprintf(&value, "%d", gm->end - gm->start); ret = 0; break; case VALUE_MATCH: value_len = gm->end - gm->start; value = string_ndup(gm->subject + gm->start, value_len); ret = 0; break; case VALUE_JSON_SIMPLE: case VALUE_JSON_COMPLEX: { int value_offset = 0; int value_size = 0; char *pname; const char *pdata; int pname_len, pdata_len; char *entry = NULL, *tmp = NULL; int entry_len = 0, tmp_len = 0, tmp_size = 0; value = NULL; value_len = 0; /* TODO(sissel): use a json generator library? */ /* Push @FOO values first */ substr_replace(&tmp, &tmp_len, &tmp_size, 0, 0, gm->subject, strlen(gm->subject)); filter_jsonencode(gm, &tmp, &tmp_len, &tmp_size); if (patmacro->code == VALUE_JSON_SIMPLE) { entry_len = asprintf(&entry, "\"@LINE\": \"%.*s\", ", tmp_len, tmp); } else { /* VALUE_JSON_COMPLEX */ entry_len = asprintf(&entry, "{ \"@LINE\": { " "\"start\": 0, " "\"end\": %d, " "\"value\": \"%.*s\" } }, ", tmp_len, tmp_len, tmp); } substr_replace(&value, &value_len, &value_size, value_len, value_len, entry, entry_len); free(entry); substr_replace(&tmp, &tmp_len, &tmp_size, 0, tmp_len, gm->subject + gm->start, gm->end - gm->start); filter_jsonencode(gm, &tmp, &tmp_len, &tmp_size); if (patmacro->code == VALUE_JSON_SIMPLE) { entry_len = asprintf(&entry, "\"@MATCH\": \"%.*s\", ", tmp_len, tmp); } else { /* VALUE_JSON_COMPLEX */ entry_len = asprintf(&entry, "{ \"@MATCH\": { " "\"start\": %d, " "\"end\": %d, " "\"value\": \"%.*s\" } }, ", gm->start, gm->end, tmp_len, tmp); } substr_replace(&value, &value_len, &value_size, value_len, value_len, entry, entry_len); free(entry); //printf("> %.*s\n", value_len, value); value_offset += value_len; /* For every named capture, put this in our result string: * "NAME": "%{NAME|jsonencode}" */ grok_match_walk_init(gm); while (grok_match_walk_next(gm, &pname, &pname_len, &pdata, &pdata_len) == 0) { char *entry; int entry_len; substr_replace(&tmp, &tmp_len, &tmp_size, 0, tmp_len, pdata, pdata_len); filter_jsonencode(gm, &tmp, &tmp_len, &tmp_size); if (patmacro->code == VALUE_JSON_SIMPLE) { entry_len = asprintf(&entry, "\"%.*s\": \"%.*s\", ", pname_len, pname, tmp_len, tmp); } else { /* VALUE_JSON_COMPLEX */ entry_len = asprintf(&entry, "{ \"%.*s\": { " "\"start\": %ld, " "\"end\": %ld, " "\"value\": \"%.*s\"" " } }, ", pname_len, pname, pdata - gm->subject, /*start*/ (pdata - gm->subject) + pdata_len, /*end*/ tmp_len, tmp); } substr_replace(&value, &value_len, &value_size, value_offset, value_offset, entry, entry_len); value_offset += entry_len; free(entry); } grok_match_walk_end(gm); /* Insert the { at the beginning */ /* And Replace trailing ", " with " }" */ if (patmacro->code == VALUE_JSON_SIMPLE) { substr_replace(&value, &value_len, &value_size, 0, 0, "{ ", 2); substr_replace(&value, &value_len, &value_size, value_len - 2, value_len, " }", 2); /* TODO(sissel): This could be: * -3, -1, " }", 2); */ } else { /* VALUE_JSON_COMPLEX */ substr_replace(&value, &value_len, &value_size, 0, 0, "{ \"grok\": [ ", 12); substr_replace(&value, &value_len, &value_size, value_len - 2, value_len, " ] }", 4); /* TODO(sissel): This could be: * -3, -1, " ] }", 4); */ } char *old = value; grok_log(gm->grok, LOG_REACTION, "JSON intermediate: %.*s", value_len, value); value = grok_matchconfig_filter_reaction(value, gm); free(old); ret = 0; free(tmp); } break; default: grok_log(gm->grok, LOG_REACTION, "Unhandled macro code: '%.*s' (%d)", name_len, name, patmacro->code); } } else { /* XXX: Should just have get_named_substring take a * 'name, name_len' instead */ name_copy = malloc(name_len + 1); memcpy(name_copy, name, name_len); name_copy[name_len] = '\0'; ret = grok_match_get_named_substring(gm, name_copy, (const char **)&value, &value_len); free(name_copy); } if (ret != 0) { offset += tmp_gm.end; } else { /* replace %{FOO} with the value of foo */ char *old; grok_log(tmp_gm.grok, LOG_REACTION, "Start/end: %d %d", tmp_gm.start, tmp_gm.end); grok_log(tmp_gm.grok, LOG_REACTION, "Replacing %.*s", (tmp_gm.end - tmp_gm.start), output + tmp_gm.start + offset); /* apply the any filters from %{FOO|filter1|filter2...} */ old = value; grok_log(tmp_gm.grok, LOG_REACTION, "Prefilter string: %.*s", value_len, value); grok_match_reaction_apply_filter(gm, &value, &value_len, filter, filter_len); if (old != value) { if (free_value) { free(old); /* Free the old value */ } free_value = 1; } grok_log(gm->grok, LOG_REACTION, "Filter: %.*s", filter_len, filter); grok_log(tmp_gm.grok, LOG_REACTION, "Replacing %.*s with %.*s", (tmp_gm.end - tmp_gm.start), output + tmp_gm.start + offset, value_len, value); substr_replace(&output, &len, &size, offset + tmp_gm.start, offset + tmp_gm.end, value, value_len); offset += value_len; if (free_value) { free(value); } } } /* while grok_execn ... */ return output; }
void grok_discover(const grok_discover_t *gdt, /*grok_t *dest_grok, */ const char *input, char **discovery, int *discovery_len) { /* Find known patterns in the input string */ char *pattern = NULL; int pattern_len = 0; int pattern_size = 0; int replacements = -1; int offset = 0; /* Track what start position we are in the string */ int rounds = 0; /* This uses substr_replace to copy the input string while allocating * the size properly and tracking the length */ substr_replace(&pattern, &pattern_len, &pattern_size, 0, 0, input, -1); while (replacements != 0 || offset < pattern_len) { const void *key; int key_len; int match = 0; grok_match_t gm; grok_match_t best_match; grok_log(gdt, LOG_DISCOVER, "%d: Round starting", rounds); grok_log(gdt, LOG_DISCOVER, "%d: String: %.*s", rounds, pattern_len, pattern); grok_log(gdt, LOG_DISCOVER, "%d: Offset: % *s^", rounds, offset - 1, " "); tctreeiterinit(gdt->complexity_tree); rounds++; replacements = 0; /* This is used for tracking the longest matched pattern */ int max_matchlen = 0; /* This is used for finding the earliest (leftwise in the string) match * end point. If no matches are found, we'll skip to this position in the * string to find more things to match */ int first_match_endpos = -1; char *cursor = pattern + offset; while ((key = tctreeiternext(gdt->complexity_tree, &key_len)) != NULL) { const int *complexity = (const int *)key; int val_len; const grok_t *g = tctreeget(gdt->complexity_tree, key, sizeof(int), &val_len); match = grok_exec(g, cursor, &gm); grok_log(gdt, LOG_DISCOVER, "Test %s against %.*s", (match == GROK_OK ? "succeeded" : "failed"), g->pattern_len, g->pattern); if (match == GROK_OK) { int still_ok; int matchlen = gm.end - gm.start; grok_log(gdt, LOG_DISCOVER, "Matched %.*s", matchlen , cursor + gm.start); if (first_match_endpos == -1 || gm.end < first_match_endpos) { first_match_endpos = gm.end; } still_ok = grok_execn(&global_discovery_req1_grok, cursor + gm.start, matchlen, NULL); if (still_ok != GROK_OK) { grok_log(gdt, LOG_DISCOVER, "%d: Matched %s, but match (%.*s) not complex enough.", rounds, g->pattern, matchlen, cursor + gm.start); continue; } /* We don't want to replace existing patterns like %{FOO} */ if (grok_execn(&global_discovery_req2_grok, cursor + gm.start, matchlen, NULL) == GROK_OK) { grok_log(gdt, LOG_DISCOVER, "%d: Matched %s, but match (%.*s) includes %{...} patterns.", rounds, g->pattern, matchlen, cursor + gm.start); continue; } /* A longer match is a better match. * If match length is equal to max, then still take this match as * better since if true, then this match has a pattern that is less * complex and is therefore a more relevant match */ if (max_matchlen <= matchlen) { grok_log(gdt, LOG_DISCOVER, "%d: New best match: %s", rounds, g->pattern); max_matchlen = matchlen; memcpy(&best_match, &gm, sizeof(grok_match_t)); } else if (max_matchlen == matchlen) { /* Found a match with same length */ grok_log(gdt, LOG_DISCOVER, "%d: Common length match: %s", rounds, g->pattern); } } /* match == GROK_OK */ } /* tctreeiternext(complexity_tree ...) */ if (max_matchlen == 0) { /* No valid matches were found */ if (first_match_endpos > 0) { offset += first_match_endpos; } } else { /* We found a match, replace it in the pattern */ grok_log(gdt, LOG_DISCOVER, "%d: Matched %s on '%.*s'", rounds, best_match.grok->pattern, best_match.end - best_match.start, cursor + best_match.start); replacements = 1; substr_replace(&pattern, &pattern_len, &pattern_size, best_match.start + offset, best_match.end + offset, best_match.grok->pattern, best_match.grok->pattern_len); substr_replace(&pattern, &pattern_len, &pattern_size, best_match.start + offset, best_match.start + offset, "\\E", 2); substr_replace(&pattern, &pattern_len, &pattern_size, best_match.start + best_match.grok->pattern_len + 2 + offset, 0, "\\Q", 2); //usleep(1000000); /* Wrap the new regexp in \E .. \Q, for ending and beginning (respectively) * 'quote literal' as PCRE and Perl support. This prevents literal characters * in the input strings from being interpreted */ grok_log(gdt, LOG_DISCOVER, "%d: Pattern: %.*s", rounds, pattern_len, pattern); } /* if (max_matchlen != 0) */ } /* while (replacements != 0) */ /* Add \Q and \E at beginning and end */ substr_replace(&pattern, &pattern_len, &pattern_size, 0, 0, "\\Q", 2); substr_replace(&pattern, &pattern_len, &pattern_size, pattern_len, pattern_len, "\\E", 2); /* TODO(sissel): Prune any useless \Q\E */ *discovery = pattern; *discovery_len = pattern_len; }
int grok_exec(const grok_t *grok, const char *text, grok_match_t *gm) { return grok_execn(grok, text, strlen(text), gm); }