// script_english_number_* String do_english_num(String input, String(*fun)(int)) { if (is_substr(input, 0, _("<param-"))) { // a keyword parameter, of the form "<param->123</param->" size_t start = skip_tag(input, 0); if (start != String::npos) { size_t end = input.find_first_of(_('<'), start); if (end != String::npos) { String is = input.substr(start, end - start); long i = 0; if (is.ToLong(&i)) { if (i == 1) { return _("<hint-1>") + substr_replace(input, start, end, fun(i)); } else { return _("<hint-2>") + substr_replace(input, start, end, fun(i)); } } } } return _("<hint-2>") + input; } else { long i = 0; if (input.ToLong(&i)) { return fun(i); } return input; } }
// script_english_singular/plural/singplur String do_english(String input, String(*fun)(const String&)) { if (is_substr(input, 0, _("<param-"))) { // a keyword parameter, of the form "<param->123</param->" size_t start = skip_tag(input, 0); if (start != String::npos) { size_t end = input.find_first_of(_('<'), start); if (end != String::npos) { String is = input.substr(start, end - start); return substr_replace(input, start, end, fun(is)); } } return input; // failed } else { return fun(input); } }
/*\ ARMCI_HOSTNAME_REPLACE contains "needle/nail" string to derive new hostname \*/ static char *new_hostname(char *host) { char *tmp, *needle, *nail; if((tmp =getenv("ARMCI_HOSTNAME_REPLACE"))){ needle = strdup(tmp); if(needle== NULL) return NULL; nail = strchr(needle,'/'); if(nail == NULL) return NULL; *nail = '\0'; nail++; if(nail == (needle+1)){ char* tmp1 = calloc(strlen(host)+strlen(nail)+1,1); if(tmp1 == NULL) return NULL; strcpy(tmp1,host); strcat(tmp1,nail); return tmp1; } return substr_replace(host,needle,nail); } else return NULL; }
char *grok_matchconfig_filter_reaction(const char *str, grok_match_t *gm) { char *output; int len; int size; grok_match_t tmp_gm; int offset = 0; if (gm == NULL) { return NULL; } len = strlen(str); size = len + 1; output = malloc(size); memcpy(output, str, size); grok_log(gm->grok, LOG_REACTION, "Checking '%.*s'", len - offset, output + offset); global_matchconfig_grok.logmask = gm->grok->logmask; global_matchconfig_grok.logdepth = gm->grok->logdepth + 1; while (grok_execn(&global_matchconfig_grok, output + offset, len - offset, &tmp_gm) == GROK_OK) { grok_log(gm->grok, LOG_REACTION, "Checking '%.*s'", len - offset, output + offset); const char *name = NULL; const char *filter = NULL; char *value = NULL; char *name_copy; int name_len, value_len, filter_len; int ret = -1; int free_value = 0; const struct strmacro *patmacro; grok_match_get_named_substring(&tmp_gm, "NAME", &name, &name_len); grok_match_get_named_substring(&tmp_gm, "FILTER", &filter, &filter_len); grok_log(gm->grok, LOG_REACTION, "Matched something: %.*s", name_len, name); /* XXX: We should really make a dispatch table out of this... */ /* _macro_dispatch_func(char **value, int *value_len) ... */ /* Let gperf do the hard work for us. */ patmacro = patname2macro(name, name_len); grok_log(gm->grok, LOG_REACTION, "Checking lookup table for '%.*s': %x", name_len, name, patmacro); if (patmacro != NULL) { free_value = 1; /* We malloc stuff to 'value' here */ switch (patmacro->code) { case VALUE_LINE: value = strdup(gm->subject); value_len = strlen(value); ret = 0; break; case VALUE_START: value_len = asprintf(&value, "%d", gm->start); ret = 0; break; case VALUE_END: value_len = asprintf(&value, "%d", gm->end); ret = 0; break; case VALUE_LENGTH: value_len = asprintf(&value, "%d", gm->end - gm->start); ret = 0; break; case VALUE_MATCH: value_len = gm->end - gm->start; value = string_ndup(gm->subject + gm->start, value_len); ret = 0; break; case VALUE_JSON_SIMPLE: case VALUE_JSON_COMPLEX: { int value_offset = 0; int value_size = 0; char *pname; const char *pdata; int pname_len, pdata_len; char *entry = NULL, *tmp = NULL; int entry_len = 0, tmp_len = 0, tmp_size = 0; value = NULL; value_len = 0; /* TODO(sissel): use a json generator library? */ /* Push @FOO values first */ substr_replace(&tmp, &tmp_len, &tmp_size, 0, 0, gm->subject, strlen(gm->subject)); filter_jsonencode(gm, &tmp, &tmp_len, &tmp_size); if (patmacro->code == VALUE_JSON_SIMPLE) { entry_len = asprintf(&entry, "\"@LINE\": \"%.*s\", ", tmp_len, tmp); } else { /* VALUE_JSON_COMPLEX */ entry_len = asprintf(&entry, "{ \"@LINE\": { " "\"start\": 0, " "\"end\": %d, " "\"value\": \"%.*s\" } }, ", tmp_len, tmp_len, tmp); } substr_replace(&value, &value_len, &value_size, value_len, value_len, entry, entry_len); free(entry); substr_replace(&tmp, &tmp_len, &tmp_size, 0, tmp_len, gm->subject + gm->start, gm->end - gm->start); filter_jsonencode(gm, &tmp, &tmp_len, &tmp_size); if (patmacro->code == VALUE_JSON_SIMPLE) { entry_len = asprintf(&entry, "\"@MATCH\": \"%.*s\", ", tmp_len, tmp); } else { /* VALUE_JSON_COMPLEX */ entry_len = asprintf(&entry, "{ \"@MATCH\": { " "\"start\": %d, " "\"end\": %d, " "\"value\": \"%.*s\" } }, ", gm->start, gm->end, tmp_len, tmp); } substr_replace(&value, &value_len, &value_size, value_len, value_len, entry, entry_len); free(entry); //printf("> %.*s\n", value_len, value); value_offset += value_len; /* For every named capture, put this in our result string: * "NAME": "%{NAME|jsonencode}" */ grok_match_walk_init(gm); while (grok_match_walk_next(gm, &pname, &pname_len, &pdata, &pdata_len) == 0) { char *entry; int entry_len; substr_replace(&tmp, &tmp_len, &tmp_size, 0, tmp_len, pdata, pdata_len); filter_jsonencode(gm, &tmp, &tmp_len, &tmp_size); if (patmacro->code == VALUE_JSON_SIMPLE) { entry_len = asprintf(&entry, "\"%.*s\": \"%.*s\", ", pname_len, pname, tmp_len, tmp); } else { /* VALUE_JSON_COMPLEX */ entry_len = asprintf(&entry, "{ \"%.*s\": { " "\"start\": %ld, " "\"end\": %ld, " "\"value\": \"%.*s\"" " } }, ", pname_len, pname, pdata - gm->subject, /*start*/ (pdata - gm->subject) + pdata_len, /*end*/ tmp_len, tmp); } substr_replace(&value, &value_len, &value_size, value_offset, value_offset, entry, entry_len); value_offset += entry_len; free(entry); } grok_match_walk_end(gm); /* Insert the { at the beginning */ /* And Replace trailing ", " with " }" */ if (patmacro->code == VALUE_JSON_SIMPLE) { substr_replace(&value, &value_len, &value_size, 0, 0, "{ ", 2); substr_replace(&value, &value_len, &value_size, value_len - 2, value_len, " }", 2); /* TODO(sissel): This could be: * -3, -1, " }", 2); */ } else { /* VALUE_JSON_COMPLEX */ substr_replace(&value, &value_len, &value_size, 0, 0, "{ \"grok\": [ ", 12); substr_replace(&value, &value_len, &value_size, value_len - 2, value_len, " ] }", 4); /* TODO(sissel): This could be: * -3, -1, " ] }", 4); */ } char *old = value; grok_log(gm->grok, LOG_REACTION, "JSON intermediate: %.*s", value_len, value); value = grok_matchconfig_filter_reaction(value, gm); free(old); ret = 0; free(tmp); } break; default: grok_log(gm->grok, LOG_REACTION, "Unhandled macro code: '%.*s' (%d)", name_len, name, patmacro->code); } } else { /* XXX: Should just have get_named_substring take a * 'name, name_len' instead */ name_copy = malloc(name_len + 1); memcpy(name_copy, name, name_len); name_copy[name_len] = '\0'; ret = grok_match_get_named_substring(gm, name_copy, (const char **)&value, &value_len); free(name_copy); } if (ret != 0) { offset += tmp_gm.end; } else { /* replace %{FOO} with the value of foo */ char *old; grok_log(tmp_gm.grok, LOG_REACTION, "Start/end: %d %d", tmp_gm.start, tmp_gm.end); grok_log(tmp_gm.grok, LOG_REACTION, "Replacing %.*s", (tmp_gm.end - tmp_gm.start), output + tmp_gm.start + offset); /* apply the any filters from %{FOO|filter1|filter2...} */ old = value; grok_log(tmp_gm.grok, LOG_REACTION, "Prefilter string: %.*s", value_len, value); grok_match_reaction_apply_filter(gm, &value, &value_len, filter, filter_len); if (old != value) { if (free_value) { free(old); /* Free the old value */ } free_value = 1; } grok_log(gm->grok, LOG_REACTION, "Filter: %.*s", filter_len, filter); grok_log(tmp_gm.grok, LOG_REACTION, "Replacing %.*s with %.*s", (tmp_gm.end - tmp_gm.start), output + tmp_gm.start + offset, value_len, value); substr_replace(&output, &len, &size, offset + tmp_gm.start, offset + tmp_gm.end, value, value_len); offset += value_len; if (free_value) { free(value); } } } /* while grok_execn ... */ return output; }
/** * The output filter routine. This one gets called whenever a response is * generated that passes this filter. Returns APR_SUCCESS if everything works * out. * * @param f The filter definition. * @param bb The bucket brigade containing the data. */ static apr_status_t replace_output_filter(ap_filter_t *f, apr_bucket_brigade *bb) { request_rec *r = f->r; conn_rec *c = r->connection; replace_ctx_t *ctx = f->ctx; apr_bucket *b; apr_size_t len; const char *data; const char *header; apr_status_t rv; int re_vector[RE_VECTOR_SIZE]; // 3 elements per matched pattern replace_pattern_t *next; header_replace_pattern_t *next_header; int modified = 0; // flag to determine if a replacement has // occured. if (!ctx) { /* Initialize context */ ctx = apr_pcalloc(f->r->pool, sizeof(replace_ctx_t)); f->ctx = ctx; ctx->bb = apr_brigade_create(r->pool, c->bucket_alloc); } /* parse config settings */ /* look for the user-defined filter */ ctx->filter = find_filter_def(f->r->server, f->frec->name); if (!ctx->filter) { ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, f->r, "couldn't find definition of filter '%s'", f->frec->name); return APR_EINVAL; } ctx->p = f->r->pool; if (ctx->filter->intype && ctx->filter->intype != INTYPE_ALL) { if (!f->r->content_type) { ctx->noop = 1; } else { const char *ctypes = f->r->content_type; const char *ctype = ap_getword(f->r->pool, &ctypes, ';'); if (strcasecmp(ctx->filter->intype, ctype)) { /* wrong IMT for us; don't mess with the output */ ctx->noop = 1; } } } /* exit immediately if there are indications that the filter shouldn't be * executed. */ if (ctx->noop == 1) { ap_pass_brigade(f->next, bb); return APR_SUCCESS; } /** * Loop through the configured header patterns. */ for (next_header = ctx->filter->header_pattern; next_header != NULL; next_header = next_header->next) { // create a separate table with the requested HTTP header entries and // unset those headers in the original request. apr_table_t *header_table; header_table = apr_table_make(r->pool, 2); // create a data structure for the callback function header_replace_cb_t *hrcb; hrcb = apr_palloc(r->pool, sizeof(header_replace_cb_t)); hrcb->header_table = header_table; hrcb->pattern = next_header->pattern; hrcb->extra = next_header->extra; hrcb->replacement = next_header->replacement; hrcb->r = r; // pass any header that is defined to be processed to the callback // function and unset those headers in the original outgoing record. apr_table_do(replace_header_cb, hrcb, r->headers_out, next_header->header, NULL); // only touch the header if the changed header table is not empty. if (!apr_is_empty_table(header_table)) { apr_table_unset(r->headers_out, next_header->header); // overlay the original header table with the new one to reintegrate // the changed headers. r->headers_out = apr_table_overlay(r->pool, r->headers_out, header_table); } } /* Not nice but neccessary: Unset the ETag , because we cannot adjust the * value correctly, because we do not know how. */ apr_table_unset(f->r->headers_out, "ETag"); int eos = 0; // flag to check if an EOS bucket is in the brigade. apr_bucket *eos_bucket; // Backup for the EOS bucket. /* Interate through the available data. Stop if there is an EOS */ for (b = APR_BRIGADE_FIRST(bb); b != APR_BRIGADE_SENTINEL(bb); b = APR_BUCKET_NEXT(b)) { if (APR_BUCKET_IS_EOS(b)) { eos = 1; ap_save_brigade(f, &ctx->bb, &bb, ctx->p); APR_BUCKET_REMOVE(b); eos_bucket = b; break; } } /* If the iteration over the brigade hasn't found an EOS bucket, just save * the brigade and return. */ if (eos != 1) { ap_save_brigade(f, &ctx->bb, &bb, ctx->p); return APR_SUCCESS; } if ((rv = apr_brigade_pflatten(ctx->bb, (char **)&data, &len, ctx->p)) != APR_SUCCESS) { /* Return if the flattening didn't work. */ return rv; } else { /* Remove the original data from the bucket brigade. Otherwise it would * be passed twice (original data and the processed, flattened copy) to * the next filter. */ apr_brigade_cleanup(ctx->bb); } /* Good cast, we just tested len isn't negative or zero */ if (len > 0) { /* start checking for the regex's. */ for (next = ctx->filter->pattern; next != NULL; next = next->next) { int rc = 0; int offset = 0; /* loop through the configured patterns */ do { rc = pcre_exec(next->pattern, next->extra, data, len, offset, 0, re_vector, RE_VECTOR_SIZE); if (rc < 0 && rc != PCRE_ERROR_NOMATCH) { ap_log_rerror(APLOG_MARK, APLOG_ERR, rc, r, "Matching Error %d", rc); return rc; } /* This shouldn´t happen */ if (rc == 0) { ap_log_rerror(APLOG_MARK, APLOG_ERR, rc, r, "PCRE output vector too small (%d)", RE_VECTOR_SIZE/3-1); } /* If the result count is greater than 0 then there are * matches in the data string. Thus we try to replace those * strings with the user provided string. */ if (rc > 0) { char *prefix; // the string before the matching part. char *postfix; // the string after the matching part. char *newdata; // the concatenated string of prefix, // the replaced string and postfix. char *replacement; // the string with the data to replace // (after the subpattern processing has // been done). char *to_replace[10]; // the string array containing the // strings that are to be replaced. int match_diff; // the difference between the matching // string and its replacement. int x; // a simple counter. char *pos; // the starting position within the // replacement string, where there is a // subpattern to replace. /* start with building the replacement string */ replacement = apr_pstrcat(ctx->p, next->replacement, NULL); /* look for the subpatterns \0 to \9 */ for (x = 0; x < rc && x < 10; x++) { /* extract the x'ths subpattern */ to_replace[x] = substr(data, re_vector[x*2], re_vector[x*2+1] - re_vector[x*2], r); ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, "Found match: %s", to_replace[x]); /* the token ( \0 to \9) we are looking for */ char *token = apr_pstrcat(ctx->p, "\\", apr_itoa(ctx->p, x), NULL); /* allocate memory for the replacement operation */ char *tmp; if (!to_replace[x] || strlen(to_replace[x]) < 2) { tmp = malloc(strlen(replacement) + 1); } else { tmp = malloc(strlen(replacement) - 1 + strlen(to_replace[x])); } /* copy the replacement string to the new * location. */ memcpy(tmp, replacement, strlen(replacement) + 1); replacement = tmp; /* try to replace each occurence of the token with * its matched subpattern. */ pos = ap_strstr(replacement, token); while (pos) { if (!to_replace[x]) { break; } substr_replace(pos, to_replace[x], strlen(pos), strlen(to_replace[x])); if (strlen(to_replace[x]) < 2) { tmp = malloc(strlen(replacement) + 1); } else { tmp = malloc(strlen(replacement) - 1 + strlen(to_replace[x])); } memcpy(tmp, replacement, strlen(replacement) + 1); /* clean up. */ free(replacement); replacement = tmp; pos = ap_strstr(replacement, token); } } match_diff = strlen(replacement) - (re_vector[1] - re_vector[0]); /* Allocate memory for a buffer to copy the first part * of the data string up to (but not including) the * the matching pattern. */ prefix = apr_pcalloc(ctx->p, re_vector[0] + 1); if (prefix == NULL) { ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, "Unable to allocate memory for prefix", NULL); return -1; } /* Copy the string from the offset (beginning of * pattern matching) to the first occurence of the * pattern and add a trailing \0. */ memcpy(prefix, data, (size_t)re_vector[0]); /* Copy the string from the end of the pattern to the * end of the data string itself. */ postfix = apr_pcalloc(ctx->p, len); if (postfix == NULL) { ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, "Unable to allocate memory for postfix", NULL); return -1; } memcpy(postfix, (data + re_vector[1]), len - re_vector[1]); /* Create the new data string, replace the old one * and clean up. */ newdata = apr_pstrcat(ctx->p, prefix, replacement, postfix, NULL); /* update the point of the data and free the allocated * memory for the replacement string. */ data = newdata; free(replacement); /* Calculate the new offset in the data string, where * the new matching round is to begin. */ offset = re_vector[1] + match_diff; len += match_diff; modified = 1; } } while (rc > 0); } /* Adjust the real length of the processed data. */ if (apr_table_get(f->r->headers_out, "Content-Length") != NULL) { apr_table_set(f->r->headers_out, "Content-Length", apr_itoa(ctx->p, len)); } /* If an Entity Tag is set, change the mtime and generate a new ETag.*/ if (apr_table_get(f->r->headers_out, "ETag") != NULL) { r->mtime = time(NULL); ap_set_etag(r); } } /* Create a new bucket with the processed data, insert that one into our * brigade, then insert the saved EOS bucket at the end of the brigade * and pass the brigade to the next filter. */ APR_BRIGADE_INSERT_TAIL(ctx->bb, apr_bucket_transient_create(data, len, apr_bucket_alloc_create(ctx->p))); APR_BRIGADE_INSERT_TAIL(ctx->bb, eos_bucket); ap_pass_brigade(f->next, ctx->bb); return APR_SUCCESS; }
void grok_discover(const grok_discover_t *gdt, /*grok_t *dest_grok, */ const char *input, char **discovery, int *discovery_len) { /* Find known patterns in the input string */ char *pattern = NULL; int pattern_len = 0; int pattern_size = 0; int replacements = -1; int offset = 0; /* Track what start position we are in the string */ int rounds = 0; /* This uses substr_replace to copy the input string while allocating * the size properly and tracking the length */ substr_replace(&pattern, &pattern_len, &pattern_size, 0, 0, input, -1); while (replacements != 0 || offset < pattern_len) { const void *key; int key_len; int match = 0; grok_match_t gm; grok_match_t best_match; grok_log(gdt, LOG_DISCOVER, "%d: Round starting", rounds); grok_log(gdt, LOG_DISCOVER, "%d: String: %.*s", rounds, pattern_len, pattern); grok_log(gdt, LOG_DISCOVER, "%d: Offset: % *s^", rounds, offset - 1, " "); tctreeiterinit(gdt->complexity_tree); rounds++; replacements = 0; /* This is used for tracking the longest matched pattern */ int max_matchlen = 0; /* This is used for finding the earliest (leftwise in the string) match * end point. If no matches are found, we'll skip to this position in the * string to find more things to match */ int first_match_endpos = -1; char *cursor = pattern + offset; while ((key = tctreeiternext(gdt->complexity_tree, &key_len)) != NULL) { const int *complexity = (const int *)key; int val_len; const grok_t *g = tctreeget(gdt->complexity_tree, key, sizeof(int), &val_len); match = grok_exec(g, cursor, &gm); grok_log(gdt, LOG_DISCOVER, "Test %s against %.*s", (match == GROK_OK ? "succeeded" : "failed"), g->pattern_len, g->pattern); if (match == GROK_OK) { int still_ok; int matchlen = gm.end - gm.start; grok_log(gdt, LOG_DISCOVER, "Matched %.*s", matchlen , cursor + gm.start); if (first_match_endpos == -1 || gm.end < first_match_endpos) { first_match_endpos = gm.end; } still_ok = grok_execn(&global_discovery_req1_grok, cursor + gm.start, matchlen, NULL); if (still_ok != GROK_OK) { grok_log(gdt, LOG_DISCOVER, "%d: Matched %s, but match (%.*s) not complex enough.", rounds, g->pattern, matchlen, cursor + gm.start); continue; } /* We don't want to replace existing patterns like %{FOO} */ if (grok_execn(&global_discovery_req2_grok, cursor + gm.start, matchlen, NULL) == GROK_OK) { grok_log(gdt, LOG_DISCOVER, "%d: Matched %s, but match (%.*s) includes %{...} patterns.", rounds, g->pattern, matchlen, cursor + gm.start); continue; } /* A longer match is a better match. * If match length is equal to max, then still take this match as * better since if true, then this match has a pattern that is less * complex and is therefore a more relevant match */ if (max_matchlen <= matchlen) { grok_log(gdt, LOG_DISCOVER, "%d: New best match: %s", rounds, g->pattern); max_matchlen = matchlen; memcpy(&best_match, &gm, sizeof(grok_match_t)); } else if (max_matchlen == matchlen) { /* Found a match with same length */ grok_log(gdt, LOG_DISCOVER, "%d: Common length match: %s", rounds, g->pattern); } } /* match == GROK_OK */ } /* tctreeiternext(complexity_tree ...) */ if (max_matchlen == 0) { /* No valid matches were found */ if (first_match_endpos > 0) { offset += first_match_endpos; } } else { /* We found a match, replace it in the pattern */ grok_log(gdt, LOG_DISCOVER, "%d: Matched %s on '%.*s'", rounds, best_match.grok->pattern, best_match.end - best_match.start, cursor + best_match.start); replacements = 1; substr_replace(&pattern, &pattern_len, &pattern_size, best_match.start + offset, best_match.end + offset, best_match.grok->pattern, best_match.grok->pattern_len); substr_replace(&pattern, &pattern_len, &pattern_size, best_match.start + offset, best_match.start + offset, "\\E", 2); substr_replace(&pattern, &pattern_len, &pattern_size, best_match.start + best_match.grok->pattern_len + 2 + offset, 0, "\\Q", 2); //usleep(1000000); /* Wrap the new regexp in \E .. \Q, for ending and beginning (respectively) * 'quote literal' as PCRE and Perl support. This prevents literal characters * in the input strings from being interpreted */ grok_log(gdt, LOG_DISCOVER, "%d: Pattern: %.*s", rounds, pattern_len, pattern); } /* if (max_matchlen != 0) */ } /* while (replacements != 0) */ /* Add \Q and \E at beginning and end */ substr_replace(&pattern, &pattern_len, &pattern_size, 0, 0, "\\Q", 2); substr_replace(&pattern, &pattern_len, &pattern_size, pattern_len, pattern_len, "\\E", 2); /* TODO(sissel): Prune any useless \Q\E */ *discovery = pattern; *discovery_len = pattern_len; }
/* XXX: This method is pretty long; split it up? */ static char *grok_pattern_expand(grok_t *grok) { int capture_id = 0; /* Starting capture_id, doesn't really matter what this is */ int offset = 0; /* string offset; how far we've expanded so far */ int *capture_vector = NULL; int replacement_count = 0; /* count of replacements of %{foo} with a regexp */ int full_len = -1; int full_size = -1; char *full_pattern = NULL; char capture_id_str[CAPTURE_ID_LEN + 1]; const char *patname = NULL; capture_vector = calloc(3 * g_pattern_num_captures, sizeof(int)); full_len = grok->pattern_len; full_size = full_len; full_pattern = calloc(1, full_size); memcpy(full_pattern, grok->pattern, full_len); grok_log(grok, LOG_REGEXPAND, "% 20s: %.*s", "start of expand", full_len, full_pattern); while (pcre_exec(g_pattern_re, NULL, full_pattern, full_len, offset, 0, capture_vector, g_pattern_num_captures * 3) >= 0) { int start, end, matchlen; const char *pattern_regex; int patname_len; size_t regexp_len; grok_log(grok, LOG_REGEXPAND, "% 20s: %.*s", "start of loop", full_len, full_pattern); replacement_count++; if (replacement_count > 500) { free(capture_vector); free(full_pattern); grok->errstr = "Too many replacements have occurred (500), infinite recursion?"; return NULL; } start = capture_vector[0]; end = capture_vector[1]; matchlen = end - start; grok_log(grok, LOG_REGEXPAND, "Pattern length: %d", matchlen); pcre_get_substring(full_pattern, capture_vector, g_pattern_num_captures, g_cap_pattern, &patname); patname_len = capture_vector[g_cap_pattern * 2 + 1] \ - capture_vector[g_cap_pattern * 2]; grok_log(grok, LOG_REGEXPAND, "Pattern name: %.*s", patname_len, patname); grok_pattern_find(grok, patname, patname_len, &pattern_regex, ®exp_len); if (pattern_regex == NULL) { offset = end; } else { int has_predicate = (capture_vector[g_cap_predicate * 2] >= 0); const char *longname = NULL; const char *subname = NULL; grok_capture *gct = calloc(1, sizeof(grok_capture));; /* XXX: Change this to not use pcre_get_substring so we can skip a * malloc step? */ pcre_get_substring(full_pattern, capture_vector, g_pattern_num_captures, g_cap_name, &longname); pcre_get_substring(full_pattern, capture_vector, g_pattern_num_captures, g_cap_subname, &subname); snprintf(capture_id_str, CAPTURE_ID_LEN + 1, CAPTURE_FORMAT, capture_id); /* Add this capture to the list of captures */ gct->id = capture_id; gct->name = (char *)longname; /* XXX: CONST PROBLEM */ gct->name_len = strlen(gct->name); gct->subname = (char *)subname; gct->subname_len = strlen(gct->subname); grok_capture_add(grok, gct); //pcre_free_substring(longname); //pcre_free_substring(subname); /* if a predicate was given, add (?C1) to callout when the match is made, * so we can test it further */ if (has_predicate) { int pstart, pend; pstart = capture_vector[g_cap_predicate * 2]; pend = capture_vector[g_cap_predicate * 2 + 1]; grok_log(grok, LOG_REGEXPAND, "Predicate found in '%.*s'", matchlen, full_pattern + start); grok_log(grok, LOG_REGEXPAND, "Predicate is: '%.*s'", pend - pstart, full_pattern + pstart); grok_capture_add_predicate(grok, capture_id, full_pattern + pstart, pend - pstart); substr_replace(&full_pattern, &full_len, &full_size, end, 0, "(?C1)", 5); } /* Replace %{FOO} with (?<>). '5' is strlen("(?<>)") */ substr_replace(&full_pattern, &full_len, &full_size, start, end, "(?<>)", 5); grok_log(grok, LOG_REGEXPAND, "% 20s: %.*s", "replace with (?<>)", full_len, full_pattern); /* Insert the capture id into (?<FOO>) */ substr_replace(&full_pattern, &full_len, &full_size, start + 3, 0, capture_id_str, CAPTURE_ID_LEN); grok_log(grok, LOG_REGEXPAND, "% 20s: %.*s", "add capture id", full_len, full_pattern); /* Insert the pattern into (?<FOO>pattern) */ /* 3 = '(?<', 4 = strlen(capture_id_str), 1 = ")" */ substr_replace(&full_pattern, &full_len, &full_size, start + 3 + CAPTURE_ID_LEN + 1, 0, pattern_regex, regexp_len); grok_log(grok, LOG_REGEXPAND, ":: Inserted: %.*s", regexp_len, pattern_regex); grok_log(grok, LOG_REGEXPAND, ":: STR: %.*s", full_len, full_pattern); /* Invariant, full_pattern actual len must always be full_len */ assert(strlen(full_pattern) == full_len); /* Move offset to the start of the regexp pattern we just injected. * This is so when we iterate again, we can process this new pattern * to see if the regexp included itself any %{FOO} things */ offset = start; capture_id++; } if (patname != NULL) { pcre_free_substring(patname); patname = NULL; } //free(pattern_regex); } /* Unescape any "\%" strings found */ offset = 0; while (offset < full_len) { /* loop to '< full_len' because we access offset+1 */ if (full_pattern[offset] == '\\' && full_pattern[offset + 1] == '%') { substr_replace(&full_pattern, &full_len, &full_size, offset, offset + 1, "", 0); } offset++; } grok_log(grok, LOG_REGEXPAND, "Fully expanded: %.*s", full_len, full_pattern); free(capture_vector); grok->full_pattern_len = full_len; grok->full_pattern = full_pattern; return full_pattern; }