Пример #1
0
// script_english_number_*
String do_english_num(String input, String(*fun)(int)) {
	if (is_substr(input, 0, _("<param-"))) {
		// a keyword parameter, of the form "<param->123</param->"
		size_t start = skip_tag(input, 0);
		if (start != String::npos) {
			size_t end = input.find_first_of(_('<'), start);
			if (end != String::npos) {
				String is = input.substr(start, end - start);
				long i = 0;
				if (is.ToLong(&i)) {
					if (i == 1) {
						return _("<hint-1>") + substr_replace(input, start, end, fun(i));
					} else {
						return _("<hint-2>") + substr_replace(input, start, end, fun(i));
					}
				}
			}
		}
		return _("<hint-2>") + input;
	} else {
		long i = 0;
		if (input.ToLong(&i)) {
			return fun(i);
		}
		return input;
	}
}
Пример #2
0
// script_english_singular/plural/singplur
String do_english(String input, String(*fun)(const String&)) {
	if (is_substr(input, 0, _("<param-"))) {
		// a keyword parameter, of the form "<param->123</param->"
		size_t start = skip_tag(input, 0);
		if (start != String::npos) {
			size_t end = input.find_first_of(_('<'), start);
			if (end != String::npos) {
				String is = input.substr(start, end - start);
				return substr_replace(input, start, end, fun(is));
			}
		}
		return input; // failed
	} else {
		return fun(input);
	}
}
Пример #3
0
/*\ ARMCI_HOSTNAME_REPLACE contains "needle/nail" string to derive new hostname
\*/
static char *new_hostname(char *host)
{
  char *tmp, *needle, *nail;
  if((tmp =getenv("ARMCI_HOSTNAME_REPLACE"))){
      needle = strdup(tmp);
      if(needle== NULL) return NULL;
      nail = strchr(needle,'/');
      if(nail == NULL) return NULL;
      *nail = '\0';
      nail++;
      if(nail == (needle+1)){
        char* tmp1 = calloc(strlen(host)+strlen(nail)+1,1);
        if(tmp1 == NULL) return NULL;
        strcpy(tmp1,host);
        strcat(tmp1,nail);
        return tmp1;
      }
      return substr_replace(host,needle,nail);
  } else return NULL;
}
Пример #4
0
char *grok_matchconfig_filter_reaction(const char *str, grok_match_t *gm) {
  char *output;
  int len;
  int size;
  grok_match_t tmp_gm;
  int offset = 0;

  if (gm == NULL) {
    return NULL;
  }

  len = strlen(str);
  size = len + 1;
  output = malloc(size);
  memcpy(output, str, size);

  grok_log(gm->grok, LOG_REACTION,
           "Checking '%.*s'", len - offset, output + offset);
  global_matchconfig_grok.logmask = gm->grok->logmask;
  global_matchconfig_grok.logdepth  = gm->grok->logdepth + 1;
  while (grok_execn(&global_matchconfig_grok, output + offset,
                    len - offset, &tmp_gm) == GROK_OK) {
    grok_log(gm->grok, LOG_REACTION, "Checking '%.*s'",
             len - offset, output + offset);
    const char *name = NULL;
    const char *filter = NULL;
    char *value = NULL;
    char *name_copy;

    int name_len, value_len, filter_len;
    int ret = -1;
    int free_value = 0;
    const struct strmacro *patmacro;

    grok_match_get_named_substring(&tmp_gm, "NAME", &name, &name_len);
    grok_match_get_named_substring(&tmp_gm, "FILTER", &filter, &filter_len);
    grok_log(gm->grok, LOG_REACTION, "Matched something: %.*s", name_len, name);

    /* XXX: We should really make a dispatch table out of this... */
    /* _macro_dispatch_func(char **value, int *value_len) ... */
    /* Let gperf do the hard work for us. */
    patmacro = patname2macro(name, name_len);
    grok_log(gm->grok, LOG_REACTION, "Checking lookup table for '%.*s': %x",
             name_len, name, patmacro);
    if (patmacro != NULL) {
      free_value = 1; /* We malloc stuff to 'value' here */
      switch (patmacro->code) {
        case VALUE_LINE:
          value = strdup(gm->subject);
          value_len = strlen(value);
          ret = 0;
          break;
        case VALUE_START:
          value_len = asprintf(&value, "%d", gm->start);
          ret = 0;
          break;
        case VALUE_END:
          value_len = asprintf(&value, "%d", gm->end);
          ret = 0;
          break;
        case VALUE_LENGTH:
          value_len = asprintf(&value, "%d", gm->end - gm->start);
          ret = 0;
          break;
        case VALUE_MATCH:
          value_len = gm->end - gm->start;
          value = string_ndup(gm->subject + gm->start, value_len);
          ret = 0;
          break;
        case VALUE_JSON_SIMPLE:
        case VALUE_JSON_COMPLEX:
          {
            int value_offset = 0;
            int value_size = 0;
            char *pname;
            const char *pdata;
            int pname_len, pdata_len;

            char *entry = NULL, *tmp = NULL;
            int entry_len = 0, tmp_len = 0, tmp_size = 0;

            value = NULL;
            value_len = 0;

            /* TODO(sissel): use a json generator library? */

            /* Push @FOO values first */
            substr_replace(&tmp, &tmp_len, &tmp_size, 0, 0,
                           gm->subject, strlen(gm->subject));
            filter_jsonencode(gm, &tmp, &tmp_len, &tmp_size);

            if (patmacro->code == VALUE_JSON_SIMPLE) {
              entry_len = asprintf(&entry, 
                                   "\"@LINE\": \"%.*s\", ", tmp_len, tmp);
            } else { /* VALUE_JSON_COMPLEX */
              entry_len = asprintf(&entry, 
                                   "{ \"@LINE\": { "
                                   "\"start\": 0, "
                                   "\"end\": %d, "
                                   "\"value\": \"%.*s\" } }, ",
                                   tmp_len, tmp_len, tmp);
            }
            substr_replace(&value, &value_len, &value_size, value_len, value_len,
                           entry, entry_len);
            free(entry);

            substr_replace(&tmp, &tmp_len, &tmp_size, 0, tmp_len,
                           gm->subject + gm->start, gm->end - gm->start);
            filter_jsonencode(gm, &tmp, &tmp_len, &tmp_size);
            if (patmacro->code == VALUE_JSON_SIMPLE) {
              entry_len = asprintf(&entry, "\"@MATCH\": \"%.*s\", ", tmp_len, tmp);
            } else { /* VALUE_JSON_COMPLEX */
              entry_len = asprintf(&entry, 
                                   "{ \"@MATCH\": { "
                                   "\"start\": %d, "
                                   "\"end\": %d, "
                                   "\"value\": \"%.*s\" } }, ",
                                   gm->start, gm->end, tmp_len, tmp);
            }
            substr_replace(&value, &value_len, &value_size, value_len, value_len,
                           entry, entry_len);
            free(entry);
            //printf("> %.*s\n", value_len, value);

            value_offset += value_len;

            /* For every named capture, put this in our result string:
             * "NAME": "%{NAME|jsonencode}"
             */
            grok_match_walk_init(gm);
            while (grok_match_walk_next(gm, &pname, &pname_len,
                                        &pdata, &pdata_len) == 0) {
              char *entry;
              int entry_len;

              substr_replace(&tmp, &tmp_len, &tmp_size, 0, tmp_len,
                             pdata, pdata_len);
              filter_jsonencode(gm, &tmp, &tmp_len, &tmp_size);

              if (patmacro->code == VALUE_JSON_SIMPLE) {
                entry_len = asprintf(&entry, "\"%.*s\": \"%.*s\", ",
                                     pname_len, pname, tmp_len, tmp);
              } else { /* VALUE_JSON_COMPLEX */ 
                entry_len = asprintf(&entry, 
                                     "{ \"%.*s\": { "
                                     "\"start\": %ld, "
                                     "\"end\": %ld, "
                                     "\"value\": \"%.*s\""
                                     " } }, ",
                                     pname_len, pname, 
                                     pdata - gm->subject, /*start*/
                                     (pdata - gm->subject) + pdata_len, /*end*/
                                     tmp_len, tmp);
              }
              substr_replace(&value, &value_len, &value_size,
                             value_offset, value_offset, entry, entry_len);
              value_offset += entry_len;
              free(entry);
            }
            grok_match_walk_end(gm);

            /* Insert the { at the beginning */
            /* And Replace trailing ", " with " }" */
            if (patmacro->code == VALUE_JSON_SIMPLE) {
              substr_replace(&value, &value_len, &value_size, 0, 0, "{ ", 2); 
              substr_replace(&value, &value_len, &value_size,
                             value_len - 2, value_len, " }", 2);
                             /* TODO(sissel): This could be:
                              * -3, -1, " }", 2); */
            } else { /* VALUE_JSON_COMPLEX */
              substr_replace(&value, &value_len, &value_size, 0, 0, 
                             "{ \"grok\": [ ", 12);
              substr_replace(&value, &value_len, &value_size,
                             value_len - 2, value_len, " ] }", 4);
                             /* TODO(sissel): This could be:
                              * -3, -1, " ] }", 4); */
            }

            char *old = value;
            grok_log(gm->grok, LOG_REACTION, "JSON intermediate: %.*s",
                     value_len, value);
            value = grok_matchconfig_filter_reaction(value, gm);
            free(old);

            ret = 0;
            free(tmp);
          }
          break;
        default:
          grok_log(gm->grok, LOG_REACTION, "Unhandled macro code: '%.*s' (%d)",
                   name_len, name, patmacro->code);
      }
    } else {
      /* XXX: Should just have get_named_substring take a 
       * 'name, name_len' instead */
      name_copy = malloc(name_len + 1);
      memcpy(name_copy, name, name_len);
      name_copy[name_len] = '\0';
      ret = grok_match_get_named_substring(gm, name_copy, (const char **)&value,
                                           &value_len);
      free(name_copy);
    }

    if (ret != 0) {
      offset += tmp_gm.end;
    } else {
      /* replace %{FOO} with the value of foo */
      char *old;
      grok_log(tmp_gm.grok, LOG_REACTION, "Start/end: %d %d", tmp_gm.start, tmp_gm.end);
      grok_log(tmp_gm.grok, LOG_REACTION, "Replacing %.*s",
               (tmp_gm.end - tmp_gm.start), output + tmp_gm.start + offset);

      /* apply the any filters from %{FOO|filter1|filter2...} */
      old = value;

      grok_log(tmp_gm.grok, LOG_REACTION, "Prefilter string: %.*s",
               value_len, value);
      grok_match_reaction_apply_filter(gm, &value, &value_len,
                                       filter, filter_len);
      if (old != value) {
        if (free_value) {
          free(old); /* Free the old value */
        }
        free_value = 1;
      }
      grok_log(gm->grok, LOG_REACTION, "Filter: %.*s", filter_len, filter);

      grok_log(tmp_gm.grok, LOG_REACTION, "Replacing %.*s with %.*s",
               (tmp_gm.end - tmp_gm.start),
               output + tmp_gm.start + offset, value_len, value);
      substr_replace(&output, &len, &size, offset + tmp_gm.start,
                     offset + tmp_gm.end, value, value_len);
      offset += value_len;
      if (free_value) {
        free(value);
      }
    }
  } /* while grok_execn ... */

  return output;
}
Пример #5
0
/**
 * The output filter routine. This one gets called whenever a response is
 * generated that passes this filter. Returns APR_SUCCESS if everything works
 * out.
 *
 * @param f     The filter definition.
 * @param bb    The bucket brigade containing the data.
 */
static apr_status_t replace_output_filter(ap_filter_t *f, apr_bucket_brigade *bb)
{
    request_rec *r = f->r;
    conn_rec *c = r->connection;
    replace_ctx_t *ctx = f->ctx;
    apr_bucket *b;
    apr_size_t len;
    const char *data;
    const char *header;
    apr_status_t rv;
    int re_vector[RE_VECTOR_SIZE];  // 3 elements per matched pattern
    replace_pattern_t *next;
    header_replace_pattern_t *next_header;
    int modified = 0;               // flag to determine if a replacement has
                                    // occured.

    if (!ctx) {
        /* Initialize context */
        ctx = apr_pcalloc(f->r->pool, sizeof(replace_ctx_t));
        f->ctx = ctx;
        ctx->bb = apr_brigade_create(r->pool, c->bucket_alloc);
    }

    /* parse config settings */
    
    /* look for the user-defined filter */
    ctx->filter = find_filter_def(f->r->server, f->frec->name);
    if (!ctx->filter) {
        ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, f->r,
                      "couldn't find definition of filter '%s'",
                      f->frec->name);
        return APR_EINVAL;
    }
    ctx->p = f->r->pool;
    if (ctx->filter->intype &&
        ctx->filter->intype != INTYPE_ALL) {
        if (!f->r->content_type) {
            ctx->noop = 1;
        }
        else {
            const char *ctypes = f->r->content_type;
            const char *ctype = ap_getword(f->r->pool, &ctypes, ';');

            if (strcasecmp(ctx->filter->intype, ctype)) {
                /* wrong IMT for us; don't mess with the output */
                ctx->noop = 1;
            }
        }
    }

    /* exit immediately if there are indications that the filter shouldn't be
     * executed.
     */
    if (ctx->noop == 1) {
        ap_pass_brigade(f->next, bb);
        return APR_SUCCESS;
    }

    /**
     * Loop through the configured header patterns.
     */
    for (next_header = ctx->filter->header_pattern;
         next_header != NULL;
         next_header = next_header->next) {

        // create a separate table with the requested HTTP header entries and
        // unset those headers in the original request.
        apr_table_t *header_table;
        header_table = apr_table_make(r->pool, 2);
    	// create a data structure for the callback function
    	header_replace_cb_t *hrcb;
    	hrcb = apr_palloc(r->pool, sizeof(header_replace_cb_t));
    	hrcb->header_table = header_table;
	    hrcb->pattern = next_header->pattern;
    	hrcb->extra = next_header->extra;
	    hrcb->replacement = next_header->replacement;
    	hrcb->r = r;
	    // pass any header that is defined to be processed to the callback 
    	// function and unset those headers in the original outgoing record.
        apr_table_do(replace_header_cb, hrcb, r->headers_out, 
                     next_header->header, NULL);
        // only touch the header if the changed header table is not empty.
        if (!apr_is_empty_table(header_table)) {
            apr_table_unset(r->headers_out, next_header->header);
            // overlay the original header table with the new one to reintegrate
            // the changed headers.
            r->headers_out = apr_table_overlay(r->pool, r->headers_out, 
                                               header_table);
        }
    }

    /* Not nice but neccessary: Unset the ETag , because we cannot adjust the 
     * value correctly, because we do not know how.
     */
    apr_table_unset(f->r->headers_out, "ETag"); 

    int eos = 0;        // flag to check if an EOS bucket is in the brigade.
    apr_bucket *eos_bucket;
                        // Backup for the EOS bucket.

    /* Interate through the available data. Stop if there is an EOS */

   for (b = APR_BRIGADE_FIRST(bb); b != APR_BRIGADE_SENTINEL(bb); b = APR_BUCKET_NEXT(b)) {
        if (APR_BUCKET_IS_EOS(b)) {
            eos = 1;
            ap_save_brigade(f, &ctx->bb, &bb, ctx->p);
            APR_BUCKET_REMOVE(b);
            eos_bucket = b;
            break;
        }
    }


    /* If the iteration over the brigade hasn't found an EOS bucket, just save
     * the brigade and return.
     */
    if (eos != 1) {
        ap_save_brigade(f, &ctx->bb, &bb, ctx->p);
        return APR_SUCCESS;
    }

    if ((rv = apr_brigade_pflatten(ctx->bb, (char **)&data, &len, ctx->p)) 
        != APR_SUCCESS) { 
        /* Return if the flattening didn't work. */
        return rv;
    } else {
        /* Remove the original data from the bucket brigade. Otherwise it would
         * be passed twice (original data and the processed, flattened copy) to
         * the next filter.
         */
        apr_brigade_cleanup(ctx->bb);
    }

    /* Good cast, we just tested len isn't negative or zero */
    if (len > 0) {

        /* start checking for the regex's. */
        for (next = ctx->filter->pattern; 
             next != NULL; 
             next = next->next)
        {
            int rc = 0;
            int offset = 0;

            /* loop through the configured patterns */
            do {
                rc = pcre_exec(next->pattern, next->extra, data, 
                               len, offset, 0,
                               re_vector, RE_VECTOR_SIZE);
                               
                if (rc < 0 && rc != PCRE_ERROR_NOMATCH) {
                    ap_log_rerror(APLOG_MARK, APLOG_ERR, rc, r, 
                                  "Matching Error %d", rc);
                    return rc;
                }

                /* This shouldn´t happen */
                if (rc == 0) {
                    ap_log_rerror(APLOG_MARK, APLOG_ERR, rc, r,
                                  "PCRE output vector too small (%d)", 
                                  RE_VECTOR_SIZE/3-1);
                }

                /* If the result count is greater than 0 then there are
                 * matches in the data string. Thus we try to replace those
                 * strings with the user provided string.
                 */
                if (rc > 0) {
                    char *prefix;   // the string before the matching part.
                    char *postfix;  // the string after the matching part.
                    char *newdata;  // the concatenated string of prefix,
                                    // the replaced string and postfix.
                    char *replacement;
                                    // the string with the data to replace
                                    // (after the subpattern processing has
                                    // been done).
                    char *to_replace[10];
                                    // the string array containing the
                                    // strings that are to be replaced.
                    int match_diff; // the difference between the matching
                                    // string and its replacement.
                    int x;          // a simple counter.
                    char *pos;      // the starting position within the
                                    // replacement string, where there is a
                                    // subpattern to replace.

                    /* start with building the replacement string */
                    replacement = apr_pstrcat(ctx->p, next->replacement,
                                              NULL);

                    /* look for the subpatterns \0 to \9 */

                    for (x = 0; x < rc && x < 10; x++) {
                        /* extract the x'ths subpattern */
                        to_replace[x] = substr(data, re_vector[x*2],
                                               re_vector[x*2+1] -
                                               re_vector[x*2], r); 

                        ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
                                      "Found match: %s", to_replace[x]);
                        
                        /* the token ( \0 to \9) we are looking for */
                        char *token = apr_pstrcat(ctx->p, "\\",
                                                apr_itoa(ctx->p, x), NULL);
                        /* allocate memory for the replacement operation */
                        char *tmp;
                        if (!to_replace[x] || strlen(to_replace[x]) < 2) {
                            tmp = malloc(strlen(replacement) + 1);
                        } else {
                            tmp = malloc(strlen(replacement) - 1 +
                                         strlen(to_replace[x]));
                        }
                        /* copy the replacement string to the new
                         * location.
                         */
                        memcpy(tmp, replacement, strlen(replacement) + 1);
                        replacement = tmp;
                        /* try to replace each occurence of the token with
                         * its matched subpattern. */
                        pos = ap_strstr(replacement, token);
                        while (pos) { 
                            if (!to_replace[x]) {
                                break;
                            }
                            substr_replace(pos, to_replace[x],
                                           strlen(pos), 
                                           strlen(to_replace[x]));
                            if (strlen(to_replace[x]) < 2) {
                                tmp = malloc(strlen(replacement) + 1);
                            } else {
                                tmp = malloc(strlen(replacement) - 1 + 
                                             strlen(to_replace[x]));
                            }
                            memcpy(tmp, replacement, 
                                   strlen(replacement) + 1);
                            /* clean up. */
                            free(replacement);
                            replacement = tmp; 
                            pos = ap_strstr(replacement, token);
                        }
                    }

                    match_diff = strlen(replacement) -
                                 (re_vector[1] - re_vector[0]);

                    /* Allocate memory for a buffer to copy the first part
                     * of the data string up to (but not including) the
                     * the matching pattern.
                     */
                    prefix = apr_pcalloc(ctx->p, re_vector[0] + 1);
                    if (prefix == NULL) {
                        ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
                            "Unable to allocate memory for prefix",
                            NULL);
                        return -1;
                    }

                    /* Copy the string from the offset (beginning of
                     * pattern matching) to the first occurence of the
                     * pattern and add a trailing \0.
                     */
                    memcpy(prefix, data, (size_t)re_vector[0]); 

                    /* Copy the string from the end of the pattern to the
                     * end of the data string itself.
                     */
                    postfix = apr_pcalloc(ctx->p, len);
                    if (postfix == NULL) {
                        ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
                            "Unable to allocate memory for postfix",
                            NULL);
                        return -1;
                    }
                    
                    memcpy(postfix, 
                           (data + re_vector[1]),
                           len - re_vector[1]);
                    
                    /* Create the new data string, replace the old one
                     * and clean up.
                     */
                    newdata = apr_pstrcat(ctx->p, prefix, 
                                          replacement, postfix, 
                                          NULL);
                    /* update the point of the data and free the allocated
                     * memory for the replacement string.
                     */
                    data = newdata;
                    free(replacement);

                    /* Calculate the new offset in the data string, where
                     * the new matching round is to begin.
                     */
                    offset = re_vector[1] + match_diff; 
                    len += match_diff;
                    modified = 1;
                }
            } while (rc > 0);
        }
        /* Adjust the real length of the processed data. */
        if (apr_table_get(f->r->headers_out, "Content-Length") != NULL) {
            apr_table_set(f->r->headers_out, "Content-Length",
                apr_itoa(ctx->p, len));
        }
        /* If an Entity Tag is set, change the mtime and generate a new ETag.*/
        if (apr_table_get(f->r->headers_out, "ETag") != NULL) {
           r->mtime = time(NULL);
           ap_set_etag(r);
        }
    }
    /* Create a new bucket with the processed data, insert that one into our
     * brigade, then insert the saved EOS bucket at the end of the brigade
     * and pass the brigade to the next filter.
     */
    APR_BRIGADE_INSERT_TAIL(ctx->bb, apr_bucket_transient_create(data, len, apr_bucket_alloc_create(ctx->p)));
    APR_BRIGADE_INSERT_TAIL(ctx->bb, eos_bucket);
    ap_pass_brigade(f->next, ctx->bb);

    return APR_SUCCESS;
}
Пример #6
0
void grok_discover(const grok_discover_t *gdt, /*grok_t *dest_grok, */
                   const char *input, char **discovery, int *discovery_len) {
  /* Find known patterns in the input string */
  char *pattern = NULL;
  int pattern_len = 0;
  int pattern_size = 0;

  int replacements = -1;
  int offset = 0; /* Track what start position we are in the string */
  int rounds = 0;

  /* This uses substr_replace to copy the input string while allocating
   * the size properly and tracking the length */
  substr_replace(&pattern, &pattern_len, &pattern_size, 0, 0, input, -1);

  while (replacements != 0 || offset < pattern_len) {
    const void *key;
    int key_len;
    int match = 0;
    grok_match_t gm;
    grok_match_t best_match;

    grok_log(gdt, LOG_DISCOVER, "%d: Round starting", rounds);
    grok_log(gdt, LOG_DISCOVER, "%d: String: %.*s", rounds, pattern_len, pattern);
    grok_log(gdt, LOG_DISCOVER, "%d: Offset: % *s^", rounds, offset - 1, " ");

    tctreeiterinit(gdt->complexity_tree);
    rounds++;

    replacements = 0;
    /* This is used for tracking the longest matched pattern */
    int max_matchlen = 0; 

    /* This is used for finding the earliest (leftwise in the string) match
     * end point. If no matches are found, we'll skip to this position in the
     * string to find more things to match
     */
    int first_match_endpos = -1; 

    char *cursor = pattern + offset;

    while ((key = tctreeiternext(gdt->complexity_tree, &key_len)) != NULL) {
      const int *complexity = (const int *)key;
      int val_len;
      const grok_t *g = tctreeget(gdt->complexity_tree, key, sizeof(int), &val_len);
      match = grok_exec(g, cursor, &gm);
      grok_log(gdt, LOG_DISCOVER, "Test %s against %.*s",
               (match == GROK_OK ? "succeeded" : "failed"), g->pattern_len, g->pattern);

      if (match == GROK_OK) {
        int still_ok;
        int matchlen = gm.end - gm.start;
        grok_log(gdt, LOG_DISCOVER, "Matched %.*s", matchlen , cursor + gm.start);

        if (first_match_endpos == -1 || gm.end < first_match_endpos) {
          first_match_endpos = gm.end;
        }

        still_ok = grok_execn(&global_discovery_req1_grok, cursor + gm.start,
                              matchlen, NULL);
        if (still_ok != GROK_OK) {
          grok_log(gdt, LOG_DISCOVER, 
                   "%d: Matched %s, but match (%.*s) not complex enough.",
                   rounds, g->pattern, matchlen, cursor + gm.start);
          continue;
        }

        /* We don't want to replace existing patterns like %{FOO} */
        if (grok_execn(&global_discovery_req2_grok, cursor + gm.start, matchlen,
                       NULL) == GROK_OK) {
          grok_log(gdt, LOG_DISCOVER, 
                   "%d: Matched %s, but match (%.*s) includes %{...} patterns.",
                   rounds, g->pattern, matchlen, cursor + gm.start);
          continue;
        }

        /* A longer match is a better match.
         * If match length is equal to max, then still take this match as
         * better since if true, then this match has a pattern that is less
         * complex and is therefore a more relevant match */
        if (max_matchlen <= matchlen) {
          grok_log(gdt, LOG_DISCOVER,
                   "%d: New best match: %s", rounds, g->pattern);
          max_matchlen = matchlen;
          memcpy(&best_match, &gm, sizeof(grok_match_t));
        } else if (max_matchlen == matchlen) {
          /* Found a match with same length */
          grok_log(gdt, LOG_DISCOVER, "%d: Common length match: %s", rounds, g->pattern);
        }
      } /* match == GROK_OK */
    } /* tctreeiternext(complexity_tree ...) */

    if (max_matchlen == 0) { /* No valid matches were found */
      if (first_match_endpos > 0) {
        offset += first_match_endpos;
      }
    } else { /* We found a match, replace it in the pattern */
      grok_log(gdt, LOG_DISCOVER, "%d: Matched %s on '%.*s'",
               rounds, best_match.grok->pattern,
               best_match.end - best_match.start, cursor + best_match.start);
      replacements = 1;
      substr_replace(&pattern, &pattern_len, &pattern_size,
                     best_match.start + offset, best_match.end + offset,
                     best_match.grok->pattern, best_match.grok->pattern_len);
      substr_replace(&pattern, &pattern_len, &pattern_size,
                     best_match.start + offset, best_match.start + offset, "\\E", 2);
      substr_replace(&pattern, &pattern_len, &pattern_size,
                     best_match.start + best_match.grok->pattern_len + 2 + offset,
                     0, "\\Q", 2);
      //usleep(1000000);

      /* Wrap the new regexp in \E .. \Q, for ending and beginning (respectively)
       * 'quote literal' as PCRE and Perl support. This prevents literal characters
       * in the input strings from being interpreted */
      grok_log(gdt, LOG_DISCOVER, "%d: Pattern: %.*s", rounds, pattern_len, pattern);
    } /* if (max_matchlen != 0) */
  } /* while (replacements != 0) */

  /* Add \Q and \E at beginning and end */
  substr_replace(&pattern, &pattern_len, &pattern_size,
                 0, 0, "\\Q", 2);
  substr_replace(&pattern, &pattern_len, &pattern_size,
                 pattern_len, pattern_len, "\\E", 2);

  /* TODO(sissel): Prune any useless \Q\E */
  *discovery = pattern;
  *discovery_len = pattern_len;
}
Пример #7
0
/* XXX: This method is pretty long; split it up? */
static char *grok_pattern_expand(grok_t *grok) {
  int capture_id = 0; /* Starting capture_id, doesn't really matter what this is */
  int offset = 0; /* string offset; how far we've expanded so far */
  int *capture_vector = NULL;
  int replacement_count = 0; /* count of replacements of %{foo} with a regexp */

  int full_len = -1;
  int full_size = -1;
  char *full_pattern = NULL;
  char capture_id_str[CAPTURE_ID_LEN + 1];

  const char *patname = NULL;

  capture_vector = calloc(3 * g_pattern_num_captures, sizeof(int));
  full_len = grok->pattern_len;
  full_size = full_len;
  full_pattern = calloc(1, full_size);
  memcpy(full_pattern, grok->pattern, full_len);
  grok_log(grok, LOG_REGEXPAND, "% 20s: %.*s", "start of expand",
           full_len, full_pattern);

  while (pcre_exec(g_pattern_re, NULL, full_pattern, full_len, offset, 
                   0, capture_vector, g_pattern_num_captures * 3) >= 0) {
    int start, end, matchlen;
    const char *pattern_regex;
    int patname_len;
    size_t regexp_len;

    grok_log(grok, LOG_REGEXPAND, "% 20s: %.*s", "start of loop",
             full_len, full_pattern);

    replacement_count++;
    if (replacement_count > 500) {
      free(capture_vector);
      free(full_pattern);
      grok->errstr = "Too many replacements have occurred (500), infinite recursion?";
      return NULL;
    }

    start = capture_vector[0];
    end = capture_vector[1];
    matchlen = end - start;
    grok_log(grok, LOG_REGEXPAND, "Pattern length: %d", matchlen);

    pcre_get_substring(full_pattern, capture_vector, g_pattern_num_captures,
                       g_cap_pattern, &patname);
    patname_len = capture_vector[g_cap_pattern * 2 + 1] \
                  - capture_vector[g_cap_pattern * 2];
    grok_log(grok, LOG_REGEXPAND, "Pattern name: %.*s", patname_len, patname);

    grok_pattern_find(grok, patname, patname_len, &pattern_regex, &regexp_len);
    if (pattern_regex == NULL) {
      offset = end;
    } else {
      int has_predicate = (capture_vector[g_cap_predicate * 2] >= 0);
      const char *longname = NULL;
      const char *subname = NULL;
      grok_capture *gct = calloc(1, sizeof(grok_capture));;

      /* XXX: Change this to not use pcre_get_substring so we can skip a
       * malloc step? */
      pcre_get_substring(full_pattern, capture_vector, g_pattern_num_captures,
                         g_cap_name, &longname);
      pcre_get_substring(full_pattern, capture_vector, g_pattern_num_captures,
                         g_cap_subname, &subname);

      snprintf(capture_id_str, CAPTURE_ID_LEN + 1, CAPTURE_FORMAT, capture_id);

      /* Add this capture to the list of captures */
      gct->id = capture_id;
      gct->name = (char *)longname; /* XXX: CONST PROBLEM */
      gct->name_len = strlen(gct->name);
      gct->subname = (char *)subname;
      gct->subname_len = strlen(gct->subname);
      grok_capture_add(grok, gct);

      //pcre_free_substring(longname);
      //pcre_free_substring(subname);

      /* if a predicate was given, add (?C1) to callout when the match is made,
       * so we can test it further */
      if (has_predicate) {
        int pstart, pend;
        pstart = capture_vector[g_cap_predicate * 2];
        pend = capture_vector[g_cap_predicate * 2 + 1];
        grok_log(grok, LOG_REGEXPAND, "Predicate found in '%.*s'",
                 matchlen, full_pattern + start);
        grok_log(grok, LOG_REGEXPAND, "Predicate is: '%.*s'",
                 pend - pstart, full_pattern + pstart);

        grok_capture_add_predicate(grok, capture_id, full_pattern + pstart,
                                   pend - pstart);
        substr_replace(&full_pattern, &full_len, &full_size,
                       end, 0, "(?C1)", 5);
      }

      /* Replace %{FOO} with (?<>). '5' is strlen("(?<>)") */
      substr_replace(&full_pattern, &full_len, &full_size,
                     start, end, "(?<>)", 5);
      grok_log(grok, LOG_REGEXPAND, "% 20s: %.*s", "replace with (?<>)",
               full_len, full_pattern);

      /* Insert the capture id into (?<FOO>) */
      substr_replace(&full_pattern, &full_len, &full_size,
                     start + 3, 0,
                     capture_id_str, CAPTURE_ID_LEN);
      grok_log(grok, LOG_REGEXPAND, "% 20s: %.*s", "add capture id",
               full_len, full_pattern);


      /* Insert the pattern into (?<FOO>pattern) */
      /* 3 = '(?<', 4 = strlen(capture_id_str), 1 = ")" */
      substr_replace(&full_pattern, &full_len, &full_size, 
                     start + 3 + CAPTURE_ID_LEN + 1, 0, 
                     pattern_regex, regexp_len);
      grok_log(grok, LOG_REGEXPAND, ":: Inserted: %.*s", regexp_len,
               pattern_regex);
      grok_log(grok, LOG_REGEXPAND, ":: STR: %.*s", full_len, full_pattern);

      /* Invariant, full_pattern actual len must always be full_len */
      assert(strlen(full_pattern) == full_len);
      
      /* Move offset to the start of the regexp pattern we just injected.
       * This is so when we iterate again, we can process this new pattern
       * to see if the regexp included itself any %{FOO} things */
      offset = start;
      capture_id++;
    }

    if (patname != NULL) {
      pcre_free_substring(patname);
      patname = NULL;
    }
    //free(pattern_regex);
  }

  /* Unescape any "\%" strings found */
  offset = 0;
  while (offset < full_len) { /* loop to '< full_len' because we access offset+1 */
    if (full_pattern[offset] == '\\' && full_pattern[offset + 1] == '%') {
      substr_replace(&full_pattern, &full_len, &full_size,
                     offset, offset + 1, "", 0);
    }
    offset++;
  }

  grok_log(grok, LOG_REGEXPAND, "Fully expanded: %.*s", full_len, full_pattern);

  free(capture_vector);
  grok->full_pattern_len = full_len;
  grok->full_pattern = full_pattern;
  return full_pattern;
}