VALUE rGrok_match(VALUE self, VALUE input) {
  grok_t *grok = NULL;
  grok_match_t gm;
  char *c_input = NULL;
  long len = 0;
  int ret = 0;
  VALUE match = Qnil;

  Data_Get_Struct(self, grok_t, grok);
  c_input = rb_str2cstr(input, &len);
  ret = grok_execn(grok, c_input, (int)len, &gm);

  VALUE rgm = Qnil;
  
  //fprintf(stderr, "%d\n", ret);
  switch (ret) {
    case GROK_ERROR_NOMATCH:
      rgm = Qfalse;
      break;
    case GROK_OK:
      rgm = rGrokMatch_new_from_grok_match(&gm);
      break;
    default:
      rb_raise(rb_eArgError, "Error from grok_execn: %d", ret);
      rgm = Qnil;
  }

  return rgm;
}
Пример #2
0
char *grok_matchconfig_filter_reaction(const char *str, grok_match_t *gm) {
  char *output;
  int len;
  int size;
  grok_match_t tmp_gm;
  int offset = 0;

  if (gm == NULL) {
    return NULL;
  }

  len = strlen(str);
  size = len + 1;
  output = malloc(size);
  memcpy(output, str, size);

  grok_log(gm->grok, LOG_REACTION,
           "Checking '%.*s'", len - offset, output + offset);
  global_matchconfig_grok.logmask = gm->grok->logmask;
  global_matchconfig_grok.logdepth  = gm->grok->logdepth + 1;
  while (grok_execn(&global_matchconfig_grok, output + offset,
                    len - offset, &tmp_gm) == GROK_OK) {
    grok_log(gm->grok, LOG_REACTION, "Checking '%.*s'",
             len - offset, output + offset);
    const char *name = NULL;
    const char *filter = NULL;
    char *value = NULL;
    char *name_copy;

    int name_len, value_len, filter_len;
    int ret = -1;
    int free_value = 0;
    const struct strmacro *patmacro;

    grok_match_get_named_substring(&tmp_gm, "NAME", &name, &name_len);
    grok_match_get_named_substring(&tmp_gm, "FILTER", &filter, &filter_len);
    grok_log(gm->grok, LOG_REACTION, "Matched something: %.*s", name_len, name);

    /* XXX: We should really make a dispatch table out of this... */
    /* _macro_dispatch_func(char **value, int *value_len) ... */
    /* Let gperf do the hard work for us. */
    patmacro = patname2macro(name, name_len);
    grok_log(gm->grok, LOG_REACTION, "Checking lookup table for '%.*s': %x",
             name_len, name, patmacro);
    if (patmacro != NULL) {
      free_value = 1; /* We malloc stuff to 'value' here */
      switch (patmacro->code) {
        case VALUE_LINE:
          value = strdup(gm->subject);
          value_len = strlen(value);
          ret = 0;
          break;
        case VALUE_START:
          value_len = asprintf(&value, "%d", gm->start);
          ret = 0;
          break;
        case VALUE_END:
          value_len = asprintf(&value, "%d", gm->end);
          ret = 0;
          break;
        case VALUE_LENGTH:
          value_len = asprintf(&value, "%d", gm->end - gm->start);
          ret = 0;
          break;
        case VALUE_MATCH:
          value_len = gm->end - gm->start;
          value = string_ndup(gm->subject + gm->start, value_len);
          ret = 0;
          break;
        case VALUE_JSON_SIMPLE:
        case VALUE_JSON_COMPLEX:
          {
            int value_offset = 0;
            int value_size = 0;
            char *pname;
            const char *pdata;
            int pname_len, pdata_len;

            char *entry = NULL, *tmp = NULL;
            int entry_len = 0, tmp_len = 0, tmp_size = 0;

            value = NULL;
            value_len = 0;

            /* TODO(sissel): use a json generator library? */

            /* Push @FOO values first */
            substr_replace(&tmp, &tmp_len, &tmp_size, 0, 0,
                           gm->subject, strlen(gm->subject));
            filter_jsonencode(gm, &tmp, &tmp_len, &tmp_size);

            if (patmacro->code == VALUE_JSON_SIMPLE) {
              entry_len = asprintf(&entry, 
                                   "\"@LINE\": \"%.*s\", ", tmp_len, tmp);
            } else { /* VALUE_JSON_COMPLEX */
              entry_len = asprintf(&entry, 
                                   "{ \"@LINE\": { "
                                   "\"start\": 0, "
                                   "\"end\": %d, "
                                   "\"value\": \"%.*s\" } }, ",
                                   tmp_len, tmp_len, tmp);
            }
            substr_replace(&value, &value_len, &value_size, value_len, value_len,
                           entry, entry_len);
            free(entry);

            substr_replace(&tmp, &tmp_len, &tmp_size, 0, tmp_len,
                           gm->subject + gm->start, gm->end - gm->start);
            filter_jsonencode(gm, &tmp, &tmp_len, &tmp_size);
            if (patmacro->code == VALUE_JSON_SIMPLE) {
              entry_len = asprintf(&entry, "\"@MATCH\": \"%.*s\", ", tmp_len, tmp);
            } else { /* VALUE_JSON_COMPLEX */
              entry_len = asprintf(&entry, 
                                   "{ \"@MATCH\": { "
                                   "\"start\": %d, "
                                   "\"end\": %d, "
                                   "\"value\": \"%.*s\" } }, ",
                                   gm->start, gm->end, tmp_len, tmp);
            }
            substr_replace(&value, &value_len, &value_size, value_len, value_len,
                           entry, entry_len);
            free(entry);
            //printf("> %.*s\n", value_len, value);

            value_offset += value_len;

            /* For every named capture, put this in our result string:
             * "NAME": "%{NAME|jsonencode}"
             */
            grok_match_walk_init(gm);
            while (grok_match_walk_next(gm, &pname, &pname_len,
                                        &pdata, &pdata_len) == 0) {
              char *entry;
              int entry_len;

              substr_replace(&tmp, &tmp_len, &tmp_size, 0, tmp_len,
                             pdata, pdata_len);
              filter_jsonencode(gm, &tmp, &tmp_len, &tmp_size);

              if (patmacro->code == VALUE_JSON_SIMPLE) {
                entry_len = asprintf(&entry, "\"%.*s\": \"%.*s\", ",
                                     pname_len, pname, tmp_len, tmp);
              } else { /* VALUE_JSON_COMPLEX */ 
                entry_len = asprintf(&entry, 
                                     "{ \"%.*s\": { "
                                     "\"start\": %ld, "
                                     "\"end\": %ld, "
                                     "\"value\": \"%.*s\""
                                     " } }, ",
                                     pname_len, pname, 
                                     pdata - gm->subject, /*start*/
                                     (pdata - gm->subject) + pdata_len, /*end*/
                                     tmp_len, tmp);
              }
              substr_replace(&value, &value_len, &value_size,
                             value_offset, value_offset, entry, entry_len);
              value_offset += entry_len;
              free(entry);
            }
            grok_match_walk_end(gm);

            /* Insert the { at the beginning */
            /* And Replace trailing ", " with " }" */
            if (patmacro->code == VALUE_JSON_SIMPLE) {
              substr_replace(&value, &value_len, &value_size, 0, 0, "{ ", 2); 
              substr_replace(&value, &value_len, &value_size,
                             value_len - 2, value_len, " }", 2);
                             /* TODO(sissel): This could be:
                              * -3, -1, " }", 2); */
            } else { /* VALUE_JSON_COMPLEX */
              substr_replace(&value, &value_len, &value_size, 0, 0, 
                             "{ \"grok\": [ ", 12);
              substr_replace(&value, &value_len, &value_size,
                             value_len - 2, value_len, " ] }", 4);
                             /* TODO(sissel): This could be:
                              * -3, -1, " ] }", 4); */
            }

            char *old = value;
            grok_log(gm->grok, LOG_REACTION, "JSON intermediate: %.*s",
                     value_len, value);
            value = grok_matchconfig_filter_reaction(value, gm);
            free(old);

            ret = 0;
            free(tmp);
          }
          break;
        default:
          grok_log(gm->grok, LOG_REACTION, "Unhandled macro code: '%.*s' (%d)",
                   name_len, name, patmacro->code);
      }
    } else {
      /* XXX: Should just have get_named_substring take a 
       * 'name, name_len' instead */
      name_copy = malloc(name_len + 1);
      memcpy(name_copy, name, name_len);
      name_copy[name_len] = '\0';
      ret = grok_match_get_named_substring(gm, name_copy, (const char **)&value,
                                           &value_len);
      free(name_copy);
    }

    if (ret != 0) {
      offset += tmp_gm.end;
    } else {
      /* replace %{FOO} with the value of foo */
      char *old;
      grok_log(tmp_gm.grok, LOG_REACTION, "Start/end: %d %d", tmp_gm.start, tmp_gm.end);
      grok_log(tmp_gm.grok, LOG_REACTION, "Replacing %.*s",
               (tmp_gm.end - tmp_gm.start), output + tmp_gm.start + offset);

      /* apply the any filters from %{FOO|filter1|filter2...} */
      old = value;

      grok_log(tmp_gm.grok, LOG_REACTION, "Prefilter string: %.*s",
               value_len, value);
      grok_match_reaction_apply_filter(gm, &value, &value_len,
                                       filter, filter_len);
      if (old != value) {
        if (free_value) {
          free(old); /* Free the old value */
        }
        free_value = 1;
      }
      grok_log(gm->grok, LOG_REACTION, "Filter: %.*s", filter_len, filter);

      grok_log(tmp_gm.grok, LOG_REACTION, "Replacing %.*s with %.*s",
               (tmp_gm.end - tmp_gm.start),
               output + tmp_gm.start + offset, value_len, value);
      substr_replace(&output, &len, &size, offset + tmp_gm.start,
                     offset + tmp_gm.end, value, value_len);
      offset += value_len;
      if (free_value) {
        free(value);
      }
    }
  } /* while grok_execn ... */

  return output;
}
Пример #3
0
void grok_discover(const grok_discover_t *gdt, /*grok_t *dest_grok, */
                   const char *input, char **discovery, int *discovery_len) {
  /* Find known patterns in the input string */
  char *pattern = NULL;
  int pattern_len = 0;
  int pattern_size = 0;

  int replacements = -1;
  int offset = 0; /* Track what start position we are in the string */
  int rounds = 0;

  /* This uses substr_replace to copy the input string while allocating
   * the size properly and tracking the length */
  substr_replace(&pattern, &pattern_len, &pattern_size, 0, 0, input, -1);

  while (replacements != 0 || offset < pattern_len) {
    const void *key;
    int key_len;
    int match = 0;
    grok_match_t gm;
    grok_match_t best_match;

    grok_log(gdt, LOG_DISCOVER, "%d: Round starting", rounds);
    grok_log(gdt, LOG_DISCOVER, "%d: String: %.*s", rounds, pattern_len, pattern);
    grok_log(gdt, LOG_DISCOVER, "%d: Offset: % *s^", rounds, offset - 1, " ");

    tctreeiterinit(gdt->complexity_tree);
    rounds++;

    replacements = 0;
    /* This is used for tracking the longest matched pattern */
    int max_matchlen = 0; 

    /* This is used for finding the earliest (leftwise in the string) match
     * end point. If no matches are found, we'll skip to this position in the
     * string to find more things to match
     */
    int first_match_endpos = -1; 

    char *cursor = pattern + offset;

    while ((key = tctreeiternext(gdt->complexity_tree, &key_len)) != NULL) {
      const int *complexity = (const int *)key;
      int val_len;
      const grok_t *g = tctreeget(gdt->complexity_tree, key, sizeof(int), &val_len);
      match = grok_exec(g, cursor, &gm);
      grok_log(gdt, LOG_DISCOVER, "Test %s against %.*s",
               (match == GROK_OK ? "succeeded" : "failed"), g->pattern_len, g->pattern);

      if (match == GROK_OK) {
        int still_ok;
        int matchlen = gm.end - gm.start;
        grok_log(gdt, LOG_DISCOVER, "Matched %.*s", matchlen , cursor + gm.start);

        if (first_match_endpos == -1 || gm.end < first_match_endpos) {
          first_match_endpos = gm.end;
        }

        still_ok = grok_execn(&global_discovery_req1_grok, cursor + gm.start,
                              matchlen, NULL);
        if (still_ok != GROK_OK) {
          grok_log(gdt, LOG_DISCOVER, 
                   "%d: Matched %s, but match (%.*s) not complex enough.",
                   rounds, g->pattern, matchlen, cursor + gm.start);
          continue;
        }

        /* We don't want to replace existing patterns like %{FOO} */
        if (grok_execn(&global_discovery_req2_grok, cursor + gm.start, matchlen,
                       NULL) == GROK_OK) {
          grok_log(gdt, LOG_DISCOVER, 
                   "%d: Matched %s, but match (%.*s) includes %{...} patterns.",
                   rounds, g->pattern, matchlen, cursor + gm.start);
          continue;
        }

        /* A longer match is a better match.
         * If match length is equal to max, then still take this match as
         * better since if true, then this match has a pattern that is less
         * complex and is therefore a more relevant match */
        if (max_matchlen <= matchlen) {
          grok_log(gdt, LOG_DISCOVER,
                   "%d: New best match: %s", rounds, g->pattern);
          max_matchlen = matchlen;
          memcpy(&best_match, &gm, sizeof(grok_match_t));
        } else if (max_matchlen == matchlen) {
          /* Found a match with same length */
          grok_log(gdt, LOG_DISCOVER, "%d: Common length match: %s", rounds, g->pattern);
        }
      } /* match == GROK_OK */
    } /* tctreeiternext(complexity_tree ...) */

    if (max_matchlen == 0) { /* No valid matches were found */
      if (first_match_endpos > 0) {
        offset += first_match_endpos;
      }
    } else { /* We found a match, replace it in the pattern */
      grok_log(gdt, LOG_DISCOVER, "%d: Matched %s on '%.*s'",
               rounds, best_match.grok->pattern,
               best_match.end - best_match.start, cursor + best_match.start);
      replacements = 1;
      substr_replace(&pattern, &pattern_len, &pattern_size,
                     best_match.start + offset, best_match.end + offset,
                     best_match.grok->pattern, best_match.grok->pattern_len);
      substr_replace(&pattern, &pattern_len, &pattern_size,
                     best_match.start + offset, best_match.start + offset, "\\E", 2);
      substr_replace(&pattern, &pattern_len, &pattern_size,
                     best_match.start + best_match.grok->pattern_len + 2 + offset,
                     0, "\\Q", 2);
      //usleep(1000000);

      /* Wrap the new regexp in \E .. \Q, for ending and beginning (respectively)
       * 'quote literal' as PCRE and Perl support. This prevents literal characters
       * in the input strings from being interpreted */
      grok_log(gdt, LOG_DISCOVER, "%d: Pattern: %.*s", rounds, pattern_len, pattern);
    } /* if (max_matchlen != 0) */
  } /* while (replacements != 0) */

  /* Add \Q and \E at beginning and end */
  substr_replace(&pattern, &pattern_len, &pattern_size,
                 0, 0, "\\Q", 2);
  substr_replace(&pattern, &pattern_len, &pattern_size,
                 pattern_len, pattern_len, "\\E", 2);

  /* TODO(sissel): Prune any useless \Q\E */
  *discovery = pattern;
  *discovery_len = pattern_len;
}
Пример #4
0
int grok_exec(const grok_t *grok, const char *text, grok_match_t *gm) {
  return grok_execn(grok, text, strlen(text), gm);
}