struct regex * compile_regex(struct buffer *b, int flags, int needed_sub) { struct regex *new_regex; size_t re_len; /* // matches the last RE */ if (size_buffer(b) == 0) { if (flags > 0) bad_prog(_(BAD_MODIF)); return NULL; } re_len = size_buffer(b); new_regex = ck_malloc(sizeof (struct regex) + re_len - 1); new_regex->flags = flags; memcpy (new_regex->re, get_buffer(b), re_len); #ifdef REG_PERL new_regex->sz = re_len; #else /* GNU regex does not process \t & co. */ new_regex->sz = normalize_text(new_regex->re, re_len, TEXT_REGEX); #endif compile_regex_1 (new_regex, needed_sub); return new_regex; }
int match_regex (struct regex *regex, char *buf, size_t buflen, size_t buf_start_offset, struct re_registers *regarray, int regsize) { int ret; static struct regex *regex_last; /* printf ("Matching from %d/%d\n", buf_start_offset, buflen); */ /* Keep track of the last regexp matched. */ if (!regex) { regex = regex_last; if (!regex_last) bad_prog (_(NO_REGEX)); } else regex_last = regex; /* gnulib's re_search uses signed-int as length */ if (buflen >= INT_MAX) panic (_("regex input buffer length larger than INT_MAX")); if (regex->pattern.no_sub && regsize) { /* Re-compiling an existing regex, free the previously allocated structures. */ if (regex->dfa) { dfafree (regex->dfa); free (regex->dfa); regex->dfa = NULL; } regfree (®ex->pattern); compile_regex_1 (regex, regsize); } regex->pattern.regs_allocated = REGS_REALLOCATE; /* Optimized handling for '^' and '$' patterns */ if (regex->begline || regex->endline) { size_t offset; if (regex->endline) { const char *p = NULL; if (regex->flags & REG_NEWLINE) p = memchr (buf + buf_start_offset, buffer_delimiter, buflen - buf_start_offset); offset = p ? p - buf : buflen; } else if (buf_start_offset == 0) /* begline anchor, starting at beginning of the buffer. */ offset = 0; else if (!(regex->flags & REG_NEWLINE)) /* begline anchor, starting in the middle of the text buffer, and multiline regex is not specified - will never match. Example: seq 2 | sed 'N;s/^/X/g' */ return 0; else if (buf[buf_start_offset - 1] == buffer_delimiter) /* begline anchor, starting in the middle of the text buffer, with multiline match, and the current character is the line delimiter - start here. Example: seq 2 | sed 'N;s/^/X/mg' */ offset = buf_start_offset; else { /* begline anchor, starting in the middle of the search buffer, all previous optimizions didn't work: search for the next line delimiter character in the buffer, and start from there if found. */ const char *p = memchr (buf + buf_start_offset, buffer_delimiter, buflen - buf_start_offset); if (p == NULL) return 0; offset = p - buf + 1; } if (regsize) { size_t i; if (!regarray->start) { regarray->start = XCALLOC (1, regoff_t); regarray->end = XCALLOC (1, regoff_t); regarray->num_regs = 1; } regarray->start[0] = offset; regarray->end[0] = offset; for (i = 1 ; i < regarray->num_regs; ++i) regarray->start[i] = regarray->end[i] = -1; } return 1; } if (buf_start_offset == 0) { struct dfa *superset = dfasuperset (regex->dfa); if (superset && !dfaexec (superset, buf, buf + buflen, true, NULL, NULL)) return 0; if ((!regsize && (regex->flags & REG_NEWLINE)) || (!superset && dfaisfast (regex->dfa))) { bool backref = false; if (!dfaexec (regex->dfa, buf, buf + buflen, true, NULL, &backref)) return 0; if (!regsize && (regex->flags & REG_NEWLINE) && !backref) return 1; } } /* If the buffer delimiter is not newline character, we cannot use newline_anchor flag of regex. So do it line-by-line, and add offset value to results. */ if ((regex->flags & REG_NEWLINE) && buffer_delimiter != '\n') { const char *beg, *end; const char *start; beg = buf; if (buf_start_offset > 0) { const char *eol = memrchr (buf, buffer_delimiter, buf_start_offset); if (eol != NULL) beg = eol + 1; } start = buf + buf_start_offset; for (;;) { end = memchr (beg, buffer_delimiter, buf + buflen - beg); if (end == NULL) end = buf + buflen; ret = re_search (®ex->pattern, beg, end - beg, start - beg, end - start, regsize ? regarray : NULL); if (ret > -1) { size_t i; ret += beg - buf; if (regsize) { for (i = 0; i < regarray->num_regs; ++i) { if (regarray->start[i] > -1) regarray->start[i] += beg - buf; if (regarray->end[i] > -1) regarray->end[i] += beg - buf; } } break; } if (end == buf + buflen) break; beg = start = end + 1; } } else ret = re_search (®ex->pattern, buf, buflen, buf_start_offset, buflen - buf_start_offset, regsize ? regarray : NULL); return (ret > -1); }