extern int re_match(regex_t* reg, const char* str, int size, int pos, struct re_registers* regs) { return onig_match(reg, (UChar* )str, (UChar* )(str + size), (UChar* )(str + pos), regs, ONIG_OPTION_NONE); }
void regexp_splitter::split( const std::string& str, std::vector<std::pair<size_t, size_t> >& bounds) const { const UChar* head = reinterpret_cast<const UChar*>(str.data()); const UChar* end = head + str.size(); OnigRegion* region = onig_region_new(); try { int cur = 0; while (head + cur < end) { int match = onig_match(reg_, head, end, head + cur, region, ONIG_OPTION_NONE); if (match < 0) { // did not match cur++; continue; } const int pos = region->beg[group_]; const int len = region->end[group_] - pos; bounds.push_back(std::make_pair(pos, len)); if (len > 0) { cur += len; } else { ++cur; } } onig_region_free(region, 1); } catch (...) { onig_region_free(region, 1); throw; } }
cell AMX_NATIVE_CALL pawn_regex_match(AMX* amx, cell* params) { regex_t* RegExpr; const char *rexp = NULL, *string = NULL; amx_StrParam(amx, params[1], string); amx_StrParam(amx, params[2], rexp); if(string && rexp) { int r=NULL; UChar* pattern = (UChar* )rexp; OnigErrorInfo einfo; r = onig_new(&RegExpr, pattern, pattern + strlen((char* )pattern), ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, ONIG_SYNTAX_PERL, &einfo); //logprintf("[REGEX DEBUG]: rexp %s",pattern); if(r != ONIG_NORMAL) { UChar s[ONIG_MAX_ERROR_MESSAGE_LEN]; onig_error_code_to_str(s, r, &einfo); logprintf("[REGEX ERROR]: %s", s); onig_free(RegExpr); return -1; } UChar* str = (UChar* )string; OnigRegion *region; region = onig_region_new(); r = onig_match(RegExpr, str, str+strlen((char*) str), str, region, ONIG_OPTION_NONE); //logprintf("[REGEX DEBUG]: string %s",str); onig_region_free(region, 1); onig_free(RegExpr); //logprintf("[REGEX DEBUG]: return %d",r); return r; } return -1337; }
Object* Regexp::match_start(STATE, String* string, Fixnum* start) { int beg, max; const UChar *str; const UChar *fin; OnigRegion *region; Object* md = Qnil; if(unlikely(!onig_data)) { Exception::argument_error(state, "Not properly initialized Regexp"); } maybe_recompile(state); region = onig_region_new(); max = string->size(); native_int pos = start->to_native(); str = (UChar*)string->c_str(state); fin = str + max; str += pos; int* back_match = onig_data->int_map_backward; beg = onig_match(onig_data, str, fin, str, region, ONIG_OPTION_NONE); // Seems like onig must setup int_map_backward lazily, so we have to watch // for it to appear here. if(onig_data->int_map_backward != back_match) { native_int size = sizeof(int) * ONIG_CHAR_TABLE_SIZE; ByteArray* ba = ByteArray::create(state, size); memcpy(ba->raw_bytes(), onig_data->int_map_backward, size); // Dispose of the old one. free(onig_data->int_map_backward); onig_data->int_map_backward = reinterpret_cast<int*>(ba->raw_bytes()); write_barrier(state, ba); } if(beg != ONIG_MISMATCH) { md = get_match_data(state, region, string, this, pos); } onig_region_free(region, 1); return md; }
cell AMX_NATIVE_CALL pawn_regex_exmatch(AMX* amx, cell* params) { const char *string = NULL; amx_StrParam(amx, params[1], string); if(string) { int id=(int)params[2]; if(id>=0 && id<TotalExpressions) { UChar* str = (UChar* )string; return onig_match(rexpression[id].RegExpr, str, str+strlen(string), str, rexpression[id].zreg, ONIG_OPTION_NONE); } logprintf("[REGEX ERROR]: Call regex_exmatch with undefined parameter at index %d", id); return -1; } return -1337; }
int MatchOnigRegex(void *str, int str_length, int offset, int option, OnigRegex regex, OnigRegion *region) { int ret = ONIG_MISMATCH; int error_msg_len = 0; #ifdef BENCHMARK_CHELP struct timeval tim1, tim2; long t; #endif OnigUChar *str_start = (OnigUChar *) str; OnigUChar *str_end = (OnigUChar *) (str_start + str_length); OnigUChar *search_start = (OnigUChar *)(str_start + offset); #ifdef BENCHMARK_CHELP gettimeofday(&tim1, NULL); #endif ret = onig_match(regex, str_start, str_end, search_start, region, option); #ifdef BENCHMARK_CHELP gettimeofday(&tim2, NULL); t = (tim2.tv_sec - tim1.tv_sec) * 1000000 + tim2.tv_usec - tim1.tv_usec; printf("%ld microseconds elapsed\n", t); #endif return ret; }
bool regexp_match::match(const std::string& key) { const UChar* str = reinterpret_cast<const UChar*>(key.c_str()); const UChar* end = str + key.size(); return onig_match(reg_, str, end, str, NULL, ONIG_OPTION_NONE) >= 0; }
int regexec_onig(bregonig *rx, const TCHAR *stringarg, const TCHAR *strend, /* pointer to null at end of string */ const TCHAR *strbeg, /* real beginning of string */ int minend, /* end of match must be at least minend after stringarg */ int safebase, /* no need to remember string in subbase */ int one_shot, /* if not match then break without proceed str pointer */ TCHAR *msg) /* fatal error message */ { TRACE1(_T("one_shot: %d\n"), one_shot); OnigPosition err_code; if (one_shot) { OnigOptionType option = (minend > 0) ? ONIG_OPTION_FIND_NOT_EMPTY : ONIG_OPTION_NONE; err_code = onig_match(rx->reg, (UChar*) strbeg, (UChar*) strend, (UChar*) stringarg, rx->region, option); } else { const TCHAR *global_pos = stringarg; /* \G */ if (minend > 0) { #ifdef UNICODE int kanjiflag = 1; #else int kanjiflag = rx->pmflags & PMf_KANJI; #endif if (kanjiflag && is_char_pair((TBYTE*) stringarg)) { stringarg += 2; } else { stringarg++; } } err_code = onig_search_gpos(rx->reg, (UChar*) strbeg, (UChar*) strend, (UChar*) global_pos, (UChar*) stringarg, (UChar*) strend, rx->region, ONIG_OPTION_NONE); } if (err_code >= 0) { /* FOUND */ if (rx->startp) { delete [] rx->startp; } rx->nparens = rx->region->num_regs - 1; rx->startp = new (std::nothrow) TCHAR*[rx->region->num_regs * 2]; /* allocate startp and endp together */ if (rx->startp == NULL) { asc2tcs(msg, "out of space", BREGEXP_MAX_ERROR_MESSAGE_LEN); return -1; } rx->endp = rx->startp + rx->region->num_regs; for (int i = 0; i < rx->region->num_regs; i++) { if (rx->region->beg[i] != ONIG_REGION_NOTPOS) { // found rx->startp[i] = const_cast<TCHAR *>(strbeg) + rx->region->beg[i] / sizeof(TCHAR); rx->endp[i] = const_cast<TCHAR *>(strbeg) + rx->region->end[i] / sizeof(TCHAR); } else { // not found rx->startp[i] = NULL; rx->endp[i] = NULL; } } return 1; } else if (err_code == ONIG_MISMATCH) { /* NOT FOUND */ return 0; } else { /* ERROR */ onig_err_to_bregexp_msg(err_code, NULL, msg); return -1; } }
Object* Regexp::match_start(STATE, String* string, Fixnum* start) { int beg, max; const UChar *str; const UChar *fin; Object* md = cNil; if(unlikely(!onig_data)) { Exception::argument_error(state, "Not properly initialized Regexp"); } // utilities::thread::Mutex::LockGuard lg(state->shared().onig_lock()); max = string->byte_size(); native_int pos = start->to_native(); str = (UChar*)string->byte_address(); fin = str + max; // Bounds check. if(pos > max) return cNil; str += pos; lock_.lock(); maybe_recompile(state, string); int begin_reg[10] = { ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS }; int end_reg[10] = { ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS }; OnigRegion region = { 10, 0, begin_reg, end_reg, 0, 0 }; int* back_match = onig_data->int_map_backward; beg = onig_match(onig_data, str, fin, str, ®ion, ONIG_OPTION_NONE); // Seems like onig must setup int_map_backward lazily, so we have to watch // for it to appear here. if(onig_data->int_map_backward != back_match) { native_int size = sizeof(int) * ONIG_CHAR_TABLE_SIZE; ByteArray* ba = ByteArray::create(state, size); memcpy(ba->raw_bytes(), onig_data->int_map_backward, size); // Dispose of the old one. free(onig_data->int_map_backward); onig_data->int_map_backward = reinterpret_cast<int*>(ba->raw_bytes()); write_barrier(state, ba); } lock_.unlock(); if(beg != ONIG_MISMATCH) { md = get_match_data(state, ®ion, string, this, pos); } onig_region_free(®ion, 0); return md; }
int qspArrayPos(QSPVariant *args, int argsCount, QSP_BOOL isRegExp) { int num, count, ind; QSPVar *var; QSPVariant *val; QSP_CHAR *str; OnigUChar *tempBeg, *tempEnd; regex_t *onigExp; OnigErrorInfo onigInfo; QSP_BOOL isString; if (qspConvertVariantTo(args, argsCount == 2)) { qspSetError(QSP_ERR_TYPEMISMATCH); return -1; } if (argsCount == 2) { str = QSP_STR(args[0]); ind = 0; val = args + 1; } else { qspConvertVariantTo(args + 1, QSP_TRUE); str = QSP_STR(args[1]); ind = QSP_NUM(args[0]); val = args + 2; if (ind < 0) ind = 0; } if (!(var = qspVarReferenceWithType(str, QSP_FALSE, &isString))) return -1; if (qspConvertVariantTo(val, isRegExp || isString)) { qspSetError(QSP_ERR_TYPEMISMATCH); return -1; } if (isRegExp) { tempBeg = (OnigUChar *)QSP_PSTR(val); tempEnd = (OnigUChar *)qspStrEnd(QSP_PSTR(val)); if (onig_new(&onigExp, tempBeg, tempEnd, ONIG_OPTION_DEFAULT, QSP_ONIG_ENC, ONIG_SYNTAX_PERL_NG, &onigInfo)) { qspSetError(QSP_ERR_INCORRECTREGEXP); return -1; } } count = var->ValsCount; if (ind > count) ind = count; while (ind <= count) { if (val->IsStr) { if (!(ind < count && (str = var->Values[ind].Str))) str = QSP_FMT(""); if (isRegExp) { tempBeg = (OnigUChar *)str; tempEnd = (OnigUChar *)qspStrEnd(str); if (onig_match(onigExp, tempBeg, tempEnd, tempBeg, 0, ONIG_OPTION_NONE) == tempEnd - tempBeg) { onig_free(onigExp); return ind; } } else if (!qspStrsComp(str, QSP_PSTR(val))) return ind; } else { num = (ind < count ? var->Values[ind].Num : 0); if (num == QSP_PNUM(val)) return ind; } ++ind; } if (isRegExp) onig_free(onigExp); return -1; }
// C/Oniguruma version of the "simple_id" program #include "../common.h" #define REGEX "(.*)" int main(int argc, char* argv[]) { PRE while(fgets(buffer, sizeof(buffer), stdin)) { lno++; end = buffer + strlen((char* )buffer); start = buffer; r = onig_match(reg, buffer, end, start, region, ONIG_OPTION_NONE); if (r < 0) { fprintf(stderr, "matching error on line %d\n", lno); return 1; } else { int l = region->end[0] - region->beg[0]; fprintf(stdout, "%*.*s", l, l, buffer); } } PRINT_TIMES return 0; }
MatchData* Regexp::match_start(STATE, String* string, Fixnum* start) { int beg, max; const UChar *str; const UChar *fin; if(unlikely(!onig_source_data(state))) { Exception::argument_error(state, "Not properly initialized Regexp"); } if(unlikely(!CBOOL(string->valid_encoding_p(state)))) { std::ostringstream msg; msg << "invalid byte sequence in " << string->encoding(state)->name()->to_string(state); Exception::argument_error(state, msg.str().c_str()); } max = string->byte_size(); native_int pos = start->to_native(); str = (UChar*)string->byte_address(); fin = str + max; // Bounds check. if(pos > max) return nil<MatchData>(); str += pos; lock_.lock(); regex_t* data = maybe_recompile(state, string); if(!data) return 0; OnigPosition begin_reg[10] = { ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS }; OnigPosition end_reg[10] = { ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS }; OnigRegion region = { 10, 0, begin_reg, end_reg, 0, 0 }; int* back_match = data->int_map_backward; beg = onig_match(data, str, fin, str, ®ion, ONIG_OPTION_NONE); // Seems like onig must setup int_map_backward lazily, so we have to watch // for it to appear here. if(data->int_map_backward != back_match) { native_int size = sizeof(int) * ONIG_CHAR_TABLE_SIZE; ByteArray* ba = ByteArray::create_dirty(state, size); memcpy(ba->raw_bytes(), data->int_map_backward, size); // Dispose of the old one. free(data->int_map_backward); data->int_map_backward = reinterpret_cast<int*>(ba->raw_bytes()); write_barrier(state, ba); } lock_.unlock(); MatchData* md = nil<MatchData>(); if(beg != ONIG_MISMATCH) { md = get_match_data(state, ®ion, string, this, pos); } onig_region_free(®ion, 0); return md; }