Exemplo n.º 1
0
const grok_capture *grok_capture_get_by_capture_number(grok_t *grok,
                                                       int capture_number) {
  int unused_size;
  const grok_capture *gct;
  gct = tctreeget(grok->captures_by_capture_number, &capture_number,
                   sizeof(capture_number), &unused_size);
  return gct;
}
Exemplo n.º 2
0
const grok_capture *grok_capture_get_by_subname(const grok_t *grok,
                                                const char *subname) {
  int unused_size;
  const grok_capture *gct;
  const TCLIST *by_subname_list;
  by_subname_list = tctreeget(grok->captures_by_subname, subname,
                              strlen(subname), &unused_size);

  if (by_subname_list == NULL)
    return NULL;

  gct = tclistval(by_subname_list, 0, &unused_size);
  return gct;
}
Exemplo n.º 3
0
const grok_capture *grok_capture_get_by_name(const grok_t *grok, const char *name) {
  int unused_size;
  const grok_capture *gct;
  const TCLIST *by_name_list;
  by_name_list = tctreeget(grok->captures_by_name, name, strlen(name),
                           &unused_size);

  if (by_name_list == NULL)
    return NULL;

  /* return the first capture by this name in the list */
  gct = tclistval(by_name_list, 0, &unused_size);
  return gct;
}
Exemplo n.º 4
0
int grok_pattern_find(const grok_t *grok, const char *name, size_t name_len,
                      const char **regexp, size_t *regexp_len) {
  TCTREE *patterns = grok->patterns;
  *regexp = tctreeget(patterns, name, name_len, (int*) regexp_len);

  grok_log(grok, LOG_PATTERNS, "Searching for pattern '%s' (%s): %.*s",
           name, *regexp == NULL ? "not found" : "found", *regexp_len, *regexp);
  if (*regexp == NULL) {
    grok_log(grok, LOG_PATTERNS, "pattern '%s': not found", name);
    *regexp = NULL;
    *regexp_len = 0;
    return GROK_ERROR_PATTERN_NOT_FOUND;
  }

  return GROK_OK;
}
Exemplo n.º 5
0
const grok_capture *grok_capture_walk_next(const TCTREE_ITER *iter, const grok_t *grok) {
  int id_size;
  int gct_size;
  int *id;
  const grok_capture *gct;

  id = (int *)tctreeiternext(iter, &id_size);
  if (id == NULL) {
    grok_log(grok, LOG_CAPTURE, "walknext null");
    return NULL;
  }
    grok_log(grok, LOG_CAPTURE, "walknext ok %d", *id);

  gct = (grok_capture *)tctreeget(grok->captures_by_id, id, id_size,
                                  &gct_size);
  return gct;
}
Exemplo n.º 6
0
void main () {

  uint32_t key = 123;
  const char *val = "abcdefkrnglrg";

  // Make a new tree
  TCTREE *tree = tctreenew();

  // Put an integer key
  tctreeput(tree, &key, sizeof(key), val, strlen(val));
  
  // Put a different key
  key = 122;
  tctreeput(tree, &key, sizeof(key), val, strlen(val));
  
  // Put the same key twice
  tctreeput(tree, &key, sizeof(key), val, strlen(val));

  // Put the same key but keep the old value
  tctreeputkeep(tree, &key, sizeof(key), val, strlen(val));

  // Get back a value
  int size;
  void *newVal = tctreeget(tree, &key, sizeof(key), &size);
  printf("Got value %s\n", newVal);

  // Create an iterator
  tctreeiterinit(tree);

  // Walk the tree
  tctreeiternext(tree, &size);
  tctreeiternext(tree, &size);
  tctreeiternext(tree, &size);

  // Clear the tree
  tctreeclear(tree);

  // Put one value back in the tree to make sure it's freed on delete 
  tctreeput(tree, &key, sizeof(key), val, strlen(val));

  // Delete the tree
  tctreedel(tree);

  // Make a list
  TCLIST *list = tclistnew();

  // Push a few times
  tclistpush(list, &key, sizeof(key));
  key += 1;
  tclistpush(list, &key, sizeof(key));
  key += 1;
  tclistpush(list, &key, sizeof(key));
 
  // Overwrite an existing element
  tclistover(list, 1, &key, sizeof(key));

  // Get a value
  tclistval(list, 2, &size);

  // Remove some values
  newVal = tclistremove(list, 2, &size);
  free(newVal);
  newVal = tclistremove(list, 0, &size);
  free(newVal);

  // Free the whole list
  tclistdel(list);
}
Exemplo n.º 7
0
void grok_capture_add(grok_t *grok, const grok_capture *gct, int only_renamed) {
  grok_log(grok, LOG_CAPTURE, 
           "Adding pattern '%s' as capture %d (pcrenum %d)",
           gct->name, gct->id, gct->pcre_capture_number);

  if (only_renamed && strstr(gct->name, ":") == NULL) {
    return;
  }

  /* Primary key is id */
  tctreeput(grok->captures_by_id, &(gct->id), sizeof(gct->id),
            gct, sizeof(grok_capture));
  /* Tokyo Cabinet doesn't seem to support 'secondary indexes' like BDB does,
   * so let's manually update all the other 'captures_by_*' trees */
  int unused_size;
  tctreeput(grok->captures_by_capture_number, &(gct->pcre_capture_number), 
            sizeof(gct->pcre_capture_number), gct, sizeof(grok_capture));


  int i, listsize;
  /* TCTREE doesn't permit dups, so let's make the structure a tree of arrays,
   * keyed on a string. */
  /* captures_by_name */
  TCLIST *by_name_list;
  by_name_list = (TCLIST *) tctreeget(grok->captures_by_name,
                                      (const char *)gct->name,
                                      gct->name_len, &unused_size);
  if (by_name_list == NULL) {
    by_name_list = tclistnew();
  }
  /* delete a capture with the same capture id  so we can replace it*/
  listsize = tclistnum(by_name_list);
  for (i = 0; i < listsize; i++) {
    grok_capture *list_gct;
    list_gct = (grok_capture *)tclistval(by_name_list, i, &unused_size);
    if (list_gct->id == gct->id) {
      tclistremove(by_name_list, i, &unused_size);
      break;
    }
  }
  tclistpush(by_name_list, gct, sizeof(grok_capture));
  tctreeput(grok->captures_by_name, gct->name, gct->name_len,
            by_name_list, sizeof(TCLIST));
  /* end captures_by_name */

  /* captures_by_subname */
  TCLIST *by_subname_list;
  by_subname_list = (TCLIST *) tctreeget(grok->captures_by_subname,
                                         (const char *)gct->subname,
                                         gct->subname_len, &unused_size);
  if (by_subname_list == NULL) {
    by_subname_list = tclistnew();
  }
  /* delete a capture with the same capture id so we can replace it*/
  listsize = tclistnum(by_subname_list);
  for (i = 0; i < listsize; i++) {
    grok_capture *list_gct;
    list_gct = (grok_capture *)tclistval(by_subname_list, i, &unused_size);
    if (list_gct->id == gct->id) {
      tclistremove(by_subname_list, i, &unused_size);
      break;
    }
  }
  tclistpush(by_subname_list, gct, sizeof(grok_capture));
  tctreeput(grok->captures_by_subname, gct->subname, gct->subname_len,
            by_subname_list, sizeof(TCLIST));
  /* end captures_by_subname */
}
Exemplo n.º 8
0
const grok_capture *grok_capture_get_by_id(const grok_t *grok, int id) {
  int unused_size;
  const grok_capture *gct;
  gct = tctreeget(grok->captures_by_id, &id, sizeof(id), &unused_size);
  return gct;
}
Exemplo n.º 9
0
void grok_discover(const grok_discover_t *gdt, /*grok_t *dest_grok, */
                   const char *input, char **discovery, int *discovery_len) {
  /* Find known patterns in the input string */
  char *pattern = NULL;
  int pattern_len = 0;
  int pattern_size = 0;

  int replacements = -1;
  int offset = 0; /* Track what start position we are in the string */
  int rounds = 0;

  /* This uses substr_replace to copy the input string while allocating
   * the size properly and tracking the length */
  substr_replace(&pattern, &pattern_len, &pattern_size, 0, 0, input, -1);

  while (replacements != 0 || offset < pattern_len) {
    const void *key;
    int key_len;
    int match = 0;
    grok_match_t gm;
    grok_match_t best_match;

    grok_log(gdt, LOG_DISCOVER, "%d: Round starting", rounds);
    grok_log(gdt, LOG_DISCOVER, "%d: String: %.*s", rounds, pattern_len, pattern);
    grok_log(gdt, LOG_DISCOVER, "%d: Offset: % *s^", rounds, offset - 1, " ");

    tctreeiterinit(gdt->complexity_tree);
    rounds++;

    replacements = 0;
    /* This is used for tracking the longest matched pattern */
    int max_matchlen = 0; 

    /* This is used for finding the earliest (leftwise in the string) match
     * end point. If no matches are found, we'll skip to this position in the
     * string to find more things to match
     */
    int first_match_endpos = -1; 

    char *cursor = pattern + offset;

    while ((key = tctreeiternext(gdt->complexity_tree, &key_len)) != NULL) {
      const int *complexity = (const int *)key;
      int val_len;
      const grok_t *g = tctreeget(gdt->complexity_tree, key, sizeof(int), &val_len);
      match = grok_exec(g, cursor, &gm);
      grok_log(gdt, LOG_DISCOVER, "Test %s against %.*s",
               (match == GROK_OK ? "succeeded" : "failed"), g->pattern_len, g->pattern);

      if (match == GROK_OK) {
        int still_ok;
        int matchlen = gm.end - gm.start;
        grok_log(gdt, LOG_DISCOVER, "Matched %.*s", matchlen , cursor + gm.start);

        if (first_match_endpos == -1 || gm.end < first_match_endpos) {
          first_match_endpos = gm.end;
        }

        still_ok = grok_execn(&global_discovery_req1_grok, cursor + gm.start,
                              matchlen, NULL);
        if (still_ok != GROK_OK) {
          grok_log(gdt, LOG_DISCOVER, 
                   "%d: Matched %s, but match (%.*s) not complex enough.",
                   rounds, g->pattern, matchlen, cursor + gm.start);
          continue;
        }

        /* We don't want to replace existing patterns like %{FOO} */
        if (grok_execn(&global_discovery_req2_grok, cursor + gm.start, matchlen,
                       NULL) == GROK_OK) {
          grok_log(gdt, LOG_DISCOVER, 
                   "%d: Matched %s, but match (%.*s) includes %{...} patterns.",
                   rounds, g->pattern, matchlen, cursor + gm.start);
          continue;
        }

        /* A longer match is a better match.
         * If match length is equal to max, then still take this match as
         * better since if true, then this match has a pattern that is less
         * complex and is therefore a more relevant match */
        if (max_matchlen <= matchlen) {
          grok_log(gdt, LOG_DISCOVER,
                   "%d: New best match: %s", rounds, g->pattern);
          max_matchlen = matchlen;
          memcpy(&best_match, &gm, sizeof(grok_match_t));
        } else if (max_matchlen == matchlen) {
          /* Found a match with same length */
          grok_log(gdt, LOG_DISCOVER, "%d: Common length match: %s", rounds, g->pattern);
        }
      } /* match == GROK_OK */
    } /* tctreeiternext(complexity_tree ...) */

    if (max_matchlen == 0) { /* No valid matches were found */
      if (first_match_endpos > 0) {
        offset += first_match_endpos;
      }
    } else { /* We found a match, replace it in the pattern */
      grok_log(gdt, LOG_DISCOVER, "%d: Matched %s on '%.*s'",
               rounds, best_match.grok->pattern,
               best_match.end - best_match.start, cursor + best_match.start);
      replacements = 1;
      substr_replace(&pattern, &pattern_len, &pattern_size,
                     best_match.start + offset, best_match.end + offset,
                     best_match.grok->pattern, best_match.grok->pattern_len);
      substr_replace(&pattern, &pattern_len, &pattern_size,
                     best_match.start + offset, best_match.start + offset, "\\E", 2);
      substr_replace(&pattern, &pattern_len, &pattern_size,
                     best_match.start + best_match.grok->pattern_len + 2 + offset,
                     0, "\\Q", 2);
      //usleep(1000000);

      /* Wrap the new regexp in \E .. \Q, for ending and beginning (respectively)
       * 'quote literal' as PCRE and Perl support. This prevents literal characters
       * in the input strings from being interpreted */
      grok_log(gdt, LOG_DISCOVER, "%d: Pattern: %.*s", rounds, pattern_len, pattern);
    } /* if (max_matchlen != 0) */
  } /* while (replacements != 0) */

  /* Add \Q and \E at beginning and end */
  substr_replace(&pattern, &pattern_len, &pattern_size,
                 0, 0, "\\Q", 2);
  substr_replace(&pattern, &pattern_len, &pattern_size,
                 pattern_len, pattern_len, "\\E", 2);

  /* TODO(sissel): Prune any useless \Q\E */
  *discovery = pattern;
  *discovery_len = pattern_len;
}