data(data const &other) : expression(other.expression), flags(other.flags), re(0), are(0), re_size(other.re_size), are_size(other.are_size), match_size(other.match_size) { try { if(other.re!=0) { re = (pcre *)(pcre_malloc(re_size)); if(!re) { throw std::bad_alloc(); } memcpy(re,other.re,re_size); } if(other.are!=0) { are = (pcre *)(pcre_malloc(are_size)); if(!are) { throw std::bad_alloc(); } memcpy(are,other.are,are_size); } } catch(...) { if(re) pcre_free(re); if(are) pcre_free(are); throw; } }
RegEx::RegEx(const RegEx& regex) { const char * error = __FILE__ ": unknown error in RegEx(RegEx)"; // allocate memory for the compiled regular expression information re = (pcre*)pcre_malloc(regex.re_size); if (re) { // copy the compiled regular expression information memcpy(re, regex.re, regex.re_size); re_size = regex.re_size; pe = NULL; study_size = 0; allocated_study = false; if (regex.pe) // should always be true, because constructor allocates it { // allocate memory for the extra study information and recursion limit pe = (pcre_extra*)pcre_malloc(sizeof(pcre_extra)); if (pe) { // copy the extra information memcpy(pe, regex.pe, sizeof(pcre_extra)) ; // copy any study information if (regex.study_size > 0) { void* copied_study_data = pcre_malloc(regex.study_size); if (copied_study_data) { pe->study_data = copied_study_data; memcpy(pe->study_data, regex.pe->study_data, regex.study_size) ; study_size = regex.study_size; allocated_study = true; } } } } else { // no extra or study data to copy // this should not happen because we always want the recursion limit } substrcount = regex.substrcount; ovector = new int[3*substrcount]; matchlist = NULL; } else { throw error; } };
static char * edit(const char *str, int len, const char *rep, int nmat, const int *ovec) { int i, slen, rlen; const int *mvec = ovec; char *res, *cp; int replen[MAXCAPTURE]; const char *repstr[MAXCAPTURE]; nmat--; ovec += 2; for (i = 0; i < nmat; i++) { replen[i] = ovec[i * 2 + 1] - ovec[i * 2]; repstr[i] = &str[ovec[i * 2]]; #ifdef DEBUG_PCRE_SUBST printf(">>>%d %d %.*s\n", i, replen[i], replen[i], repstr[i]); #endif } slen = len; len -= mvec[1] - mvec[0]; len += rlen = findreplen(rep, nmat, replen); #ifdef DEBUG_PCRE_SUBST printf("resulting length %d (srclen=%d)\n", len, slen); #endif cp = res = pcre_malloc(len + 1); if (mvec[0] > 0) { strncpy(cp, str, mvec[0]); cp += mvec[0]; } doreplace(cp, rep, nmat, replen, repstr); cp += rlen; if (mvec[1] < slen) strcpy(cp, &str[mvec[1]]); res[len] = 0; return res; }
RegEx::RegEx(const char * regex, int options, unsigned long int maxDepth) { const char* pcre_error; int erroffset; // compile and study the expression re = pcre_compile(regex, options, &pcre_error, &erroffset, NULL); if (re == NULL) { UtlString errorMsg("Regular Expression compile error: "); errorMsg.append(pcre_error); errorMsg.append(" at offset "); char offsetStr[10]; sprintf(offsetStr, "%9d", erroffset); errorMsg.append(offsetStr); errorMsg.append(" in expression '"); errorMsg.append(regex); errorMsg.append("'"); throw errorMsg.data(); assert(FALSE); // regex failed to compile } pe = pcre_study(re, 0, &pcre_error); if ( pcre_error == NULL ) { // save the compilation block sizes for the copy constructor. pcre_fullinfo(re, pe, PCRE_INFO_SIZE, &re_size); pcre_fullinfo(re, pe, PCRE_INFO_STUDYSIZE, &study_size); allocated_study = false; } else { re_size = 0; study_size = 0; } if (!pe) { // pcre_study didn't return any study data, // but we need the pcre_extra block anyway for the recursion limit, // so get one pe = (pcre_extra*)pcre_malloc(sizeof(pcre_extra)); memset(pe, 0, sizeof(pcre_extra)); } // set the maximum recursion depth option in the pcre_extra (pe) block pe->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION; pe->match_limit_recursion = maxDepth; // allocate space for match results based on how many substrings // there are in the expression (+1 for the entire match) pcre_fullinfo(re, pe, PCRE_INFO_CAPTURECOUNT, &substrcount); substrcount++; ovector = new int[3*substrcount]; matchlist = NULL; };
CAMLprim value pcre_set_imp_match_limit_stub(value v_rex, intnat v_lim) { pcre_extra *extra = get_extra(v_rex); if (extra == NULL) { extra = pcre_malloc(sizeof(pcre_extra)); extra->flags = PCRE_EXTRA_MATCH_LIMIT; set_extra(v_rex, extra); } else { unsigned long *flags_ptr = &extra->flags; *flags_ptr = PCRE_EXTRA_MATCH_LIMIT | *flags_ptr; } extra->match_limit = v_lim; return v_rex; }
pcre * Regex::clone_re(pcre * re) { if (!re) { return NULL; } size_t size; pcre_fullinfo(re, NULL, PCRE_INFO_SIZE, &size); pcre * newre = (pcre *) pcre_malloc(size * sizeof(char)); if (!newre) { // XXX: EXCEPTing sucks EXCEPT("No memory to allocate re clone"); } memcpy(newre, re, size); return newre; }
RegEx::RegEx(const RegEx& regex) { const char * error = __FILE__ ": unknown error in RegEx(RegEx)"; // allocate memory for the compiled regular expression information re = (pcre*)pcre_malloc(regex.re_size); if (re) { // copy the compiled regular expression information memcpy(re, regex.re, regex.re_size); re_size = regex.re_size; if ( (regex.pe) // did the original pcre_study return anything? && (0 < regex.study_size) ) { // allocate memory for the extra study information pe = (pcre_extra*)pcre_malloc(sizeof(pcre_extra)); if (pe) { void* copied_study_data = pcre_malloc(regex.study_size); if (copied_study_data) { // copy the extra and study information memcpy(pe, regex.pe, sizeof(pcre_extra)) ; pe->study_data = copied_study_data; memcpy(pe->study_data, regex.pe->study_data, regex.study_size) ; study_size = regex.study_size; allocated_study = true; } else { // failed to allocate the study data, so drop pe completely. pcre_free(pe); pe = NULL; study_size = 0; allocated_study = false; } } else { // failed to allocate extra data study_size = 0; allocated_study = false; } } else { // no extra or study data to copy pe = NULL; study_size = 0; allocated_study = false; } substrcount = regex.substrcount; ovector = new int[3*substrcount]; matchlist = NULL; } else { throw error; } };
char * regexp_replace(char * from, char *to, char *text) { int nmat, offset = 0, textlen; int ovec[MAXCAPTURE]; char *res, *ret, *pom; const char *overfl = NULL; /* warning, go away */ int global, i; #ifdef HAVE_PCRE const char *er_ptr; int erroffset; #else regmatch_t pmat[MAXCAPTURE/3]; regex_t ppat_data; regex_t *ppat; #endif if( from == NULL || to == NULL || text == NULL) { if(text == NULL) return NULL; ret = (unsigned char *)js_mem_alloc(strlen(text)+1); strcpy(ret,text); return ret; } while(*from == ' ' || *from == '\t') from++; #ifdef HAVE_PCRE pom = pcre_malloc(strlen(from)+1); #else /* HAVE_PCRE */ pom = mem_alloc(strlen(from)+1); #endif /* HAVE_PCRE */ if(*from != '/') { strcpy(pom, from); global = 0; } else { for( i = strlen(from)-1; i > 1 && (from[i] == ' ' || from[i] == '\t'); i--); if( from[i] == '/') { strncpy(pom, from+1, i-1); pom[i-1] = '\0'; global = 0; }else if( i > 1 && from[i] == 'g' && from[i-1] == '/') { strncpy(pom, from+1, i-2); pom[i-2] = '\0'; global = 1; }else { strncpy(pom, from, i+1); pom[i+1] = '\0'; global = 0; } } #ifdef REGEX_DEBUG printf("Search pattern is '%s', global = %d\n",pom,global); #endif /* REGEX_DEBUG */ #ifdef HAVE_PCRE pcre *ppat = pcre_compile(pom, 0/*PCRE_ANCHORED*/, &er_ptr, &erroffset, NULL); pcre_free(pom); #else /* HAVE_PCRE */ ppat = &ppat_data; if (regcomp(ppat, pom, REG_EXTENDED)) ppat = NULL; mem_free(pom); #endif /* HAVE_PCRE */ if (ppat == NULL) { if(text == NULL) return NULL; ret = (unsigned char *)js_mem_alloc(strlen(text)+1); strcpy(ret,text); return ret; } textlen = strlen(text); #ifdef HAVE_PCRE res = pcre_malloc(MAXCAPTURE+textlen); #else /* HAVE_PCRE */ res = mem_alloc(MAXCAPTURE+textlen); #endif /* HAVE_PCRE */ cp = res; ep = res+MAXCAPTURE+textlen; if(global) { do { #ifdef HAVE_PCRE nmat = pcre_exec(ppat, NULL, text, textlen, offset, 0, ovec, sizeof(ovec)/sizeof(int)); #else /* HAVE_PCRE */ if (regexec(ppat, text+offset, MAXCAPTURE/3, pmat, 0)) nmat = 0; else for( nmat = 0; nmat < MAXCAPTURE/3; nmat++ ) if((ovec[nmat<<1] = pmat[nmat].rm_so) == -1 || (ovec[(nmat<<1)+1] = pmat[nmat].rm_eo) == -1) break; #endif /* HAVE_PCRE */ #ifdef HAVE_PCRE for(i = 0; i < nmat*2; i++) ovec[i]-=offset; #endif /* HAVE_PCRE */ #ifdef REGEX_DEBUG dumpmatch(text+offset, textlen-offset, to, nmat, ovec); #endif /* REGEX_DEBUG */ if(nmat > 0) { overfl = edit(text+offset, textlen - offset, to, nmat, ovec, res); offset += ovec[1]; } } while (nmat >0 && overfl); } else { #ifdef HAVE_PCRE nmat = pcre_exec(ppat, NULL, text, textlen, 0, 0, ovec, sizeof(ovec)/sizeof(int)); #else /* HAVE_PCRE */ if (regexec(ppat, text, MAXCAPTURE/3, pmat, 0)) nmat = 0; else for( nmat = 0; nmat < MAXCAPTURE/3; nmat++ ) if((ovec[nmat<<1] = pmat[nmat].rm_so) == -1 || (ovec[(nmat<<1)+1] = pmat[nmat].rm_eo) == -1) break; #endif /* HAVE_PCRE */ #ifdef REGEX_DEBUG dumpmatch(text+offset, textlen-offset, to, nmat, ovec); #endif /* REGEX_DEBUG */ if(nmat > 0) { overfl = edit(text+offset, textlen - offset, to, nmat, ovec, res); offset += ovec[1]; } } if ( textlen >= offset && cp + textlen - offset < ep) { strncpy(cp, text+offset, textlen - offset); *(cp +textlen - offset) = '\0'; } else *(ep-1) = '\0'; ret = (unsigned char *)js_mem_alloc(strlen(res)+1); strcpy(ret,res); #ifdef HAVE_PCRE pcre_free(res); pcre_free(ppat); #else /* HAVE_PCRE */ mem_free(res); regfree(ppat); #endif /* HAVE_PCRE */ return ret; }