cell AMX_NATIVE_CALL pawn_regex_set(AMX* amx, cell* params) { const char *rexp = NULL; amx_StrParam(amx, params[1], rexp); if(rexp) { int aidi=(int)params[2]; //logprintf("aidi %d", aidi); if(aidi>=0 && aidi<TotalExpressions) { onig_free(rexpression[aidi].RegExpr); onig_region_free(rexpression[aidi].zreg, 1); UChar* pattern = (UChar* )rexp; OnigErrorInfo einfo; rexpression[aidi].id = onig_new(&rexpression[aidi].RegExpr, pattern, pattern + strlen(rexp), ONIG_OPTION_NONE, ONIG_ENCODING_UTF8, ONIG_SYNTAX_DEFAULT, &einfo); if(rexpression[aidi].id != ONIG_NORMAL) { UChar s[ONIG_MAX_ERROR_MESSAGE_LEN]; onig_error_code_to_str(s, rexpression[aidi].id, &einfo); logprintf("[REGEX ERROR]: %s | at index %d. regex_set aborted.", s, aidi); onig_free(rexpression[aidi].RegExpr); return -1; } rexpression[aidi].zreg = onig_region_new(); //logprintf("aidi %d", aidi); return 1; } logprintf("[REGEX ERROR]: Call regex_set with undefined parameter at index %d", aidi); return -1; } return -1337; }
void regexp_splitter::split( const std::string& str, std::vector<std::pair<size_t, size_t> >& bounds) const { const UChar* head = reinterpret_cast<const UChar*>(str.data()); const UChar* end = head + str.size(); OnigRegion* region = onig_region_new(); try { int cur = 0; while (head + cur < end) { int match = onig_match(reg_, head, end, head + cur, region, ONIG_OPTION_NONE); if (match < 0) { // did not match cur++; continue; } const int pos = region->beg[group_]; const int len = region->end[group_] - pos; bounds.push_back(std::make_pair(pos, len)); if (len > 0) { cur += len; } else { ++cur; } } onig_region_free(region, 1); } catch (...) { onig_region_free(region, 1); throw; } }
Object* Regexp::match_region(STATE, String* string, Fixnum* start, Fixnum* end, Object* forward) { int beg, max; const UChar *str; OnigRegion *region; Object* md; if(unlikely(!onig_data)) { Exception::argument_error(state, "Not properly initialized Regexp"); } maybe_recompile(state); region = onig_region_new(); max = string->size(); str = (UChar*)string->c_str(state); int* back_match = onig_data->int_map_backward; if(!RTEST(forward)) { beg = onig_search(onig_data, str, str + max, str + end->to_native(), str + start->to_native(), region, ONIG_OPTION_NONE); } else { beg = onig_search(onig_data, str, str + max, str + start->to_native(), str + end->to_native(), region, ONIG_OPTION_NONE); } // Seems like onig must setup int_map_backward lazily, so we have to watch // for it to appear here. if(onig_data->int_map_backward != back_match) { native_int size = sizeof(int) * ONIG_CHAR_TABLE_SIZE; ByteArray* ba = ByteArray::create(state, size); memcpy(ba->raw_bytes(), onig_data->int_map_backward, size); // Dispose of the old one. free(onig_data->int_map_backward); onig_data->int_map_backward = reinterpret_cast<int*>(ba->raw_bytes()); write_barrier(state, ba); } if(beg == ONIG_MISMATCH) { onig_region_free(region, 1); return Qnil; } md = get_match_data(state, region, string, this, 0); onig_region_free(region, 1); return md; }
cell AMX_NATIVE_CALL pawn_regex_search(AMX* amx, cell* params) { regex_t* RegExpr; const char *rexp = NULL, *string = NULL; cell* addr[2] = {NULL, NULL}; amx_GetAddr(amx, params[3], &addr[0]); amx_GetAddr(amx, params[4], &addr[1]); amx_StrParam(amx, params[1], string); amx_StrParam(amx, params[2], rexp); if(string && rexp) { int r=NULL; UChar* pattern = (UChar* )rexp; OnigErrorInfo einfo; r = onig_new(&RegExpr, pattern, pattern + strlen((char* )pattern), ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, ONIG_SYNTAX_PERL, &einfo); //logprintf("[REGEX DEBUG]: rexp %s",pattern); if(r != ONIG_NORMAL) { UChar s[ONIG_MAX_ERROR_MESSAGE_LEN]; onig_error_code_to_str(s, r, &einfo); logprintf("[REGEX ERROR]: %s", s); onig_free(RegExpr); return -1; } UChar* str = (UChar* )string; OnigRegion *region; region = onig_region_new(); r = onig_search(RegExpr, str, str+strlen((char*) str), str, str+strlen((char*) str), region, ONIG_OPTION_NONE); if(r>=0) { *addr[0]=r; *addr[1]=region->end[region->num_regs-1]-1; } else if(r==ONIG_MISMATCH) { *addr[0]=-1; *addr[1]=-1; } else { UChar s[ONIG_MAX_ERROR_MESSAGE_LEN]; onig_error_code_to_str(s, r); logprintf("[REGEX ERROR]: %s\n", s); onig_region_free(region, 1); onig_free(RegExpr); return -1; } //logprintf("[REGEX DEBUG]: string %s",str); onig_region_free(region, 1); onig_free(RegExpr); //logprintf("[REGEX DEBUG]: return %d",r); return 1; } return -1337; }
cell AMX_NATIVE_CALL pawn_regex_match(AMX* amx, cell* params) { regex_t* RegExpr; const char *rexp = NULL, *string = NULL; amx_StrParam(amx, params[1], string); amx_StrParam(amx, params[2], rexp); if(string && rexp) { int r=NULL; UChar* pattern = (UChar* )rexp; OnigErrorInfo einfo; r = onig_new(&RegExpr, pattern, pattern + strlen((char* )pattern), ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, ONIG_SYNTAX_PERL, &einfo); //logprintf("[REGEX DEBUG]: rexp %s",pattern); if(r != ONIG_NORMAL) { UChar s[ONIG_MAX_ERROR_MESSAGE_LEN]; onig_error_code_to_str(s, r, &einfo); logprintf("[REGEX ERROR]: %s", s); onig_free(RegExpr); return -1; } UChar* str = (UChar* )string; OnigRegion *region; region = onig_region_new(); r = onig_match(RegExpr, str, str+strlen((char*) str), str, region, ONIG_OPTION_NONE); //logprintf("[REGEX DEBUG]: string %s",str); onig_region_free(region, 1); onig_free(RegExpr); //logprintf("[REGEX DEBUG]: return %d",r); return r; } return -1337; }
static unstr_t* create_date_query(const nich_t *nich, size_t res_no, const unstr_t *data) { OnigRegion *region = onig_region_new(); unstr_t *strtmp = 0; unstr_t *query = 0; UChar *start; UChar *end; UChar *range; int ret = -1; int i = 0; end = (UChar *)(data->data + unstr_strlen(data)); start = (UChar *)(data->data); range = end; ret = onig_search(nich->reg, (UChar *)data->data, end, start, range, region, ONIG_OPTION_NONE); if(ret >= 0){ strtmp = unstr_init_memory(8); query = unstr_sprintf(NULL, "(%d,%$,%d,'", nich->board_no, nich->thread, res_no); for(i = 1; i < region->num_regs; i++){ unstr_substr_char(strtmp, data->data + region->beg[i], region->end[i] - region->beg[i]); unstr_strcat(query, strtmp); } unstr_strcat_char(query, "')"); } onig_region_clear(region); onig_region_free(region, 1); unstr_free(strtmp); return query; }
static void mmapscanner_free(mmapscanner_t *ms) { #ifdef HAVE_RUBY_ONIGURUMA_H onig_region_free(&ms->regs, 0); #else re_free_registers(&ms->regs); #endif xfree(ms); }
static void free_onig_type (Onig_Type *o) { if (o == NULL) return; if (o->region != NULL) onig_region_free (o->region, 1); if (o->re != NULL) onig_free (o->re); SLfree ((char *) o); }
cell AMX_NATIVE_CALL pawn_regex_end(AMX* amx, cell* params) { for(int i=0; i<TotalExpressions; i++) { delete (int*)rexpression[i].id; onig_free(rexpression[i].RegExpr); onig_region_free(rexpression[i].zreg, 1); } //delete (int*)TotalExpressions; onig_end(); return -1337; }
static int regexp_main(char *pat0, char *str0) { int r; unsigned char *start, *range, *end; regex_t* reg; OnigErrorInfo einfo; OnigRegion *region; static UChar* pattern; static UChar* str; int i; pattern = (UChar* )pat0; str = (UChar* )str0; strcpy(data_str, str); for (i = 0; i NAMES; i ++) { data_rslt[i].n[0] = 0; } r = onig_new(®, pattern, pattern + strlen((char* )pattern), ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, ONIG_SYNTAX_DEFAULT, &einfo); if (r != ONIG_NORMAL) { char s[ONIG_MAX_ERROR_MESSAGE_LEN]; onig_error_code_to_str(s, r, &einfo); return -1; } region = onig_region_new(); end = str + strlen((char* )str); start = str; range = end; r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); if (r = 0) { r = onig_foreach_name(reg, name_callback, (void* )region); } else if (r == ONIG_MISMATCH) { return 1; } else { /* error */ char s[ONIG_MAX_ERROR_MESSAGE_LEN]; onig_error_code_to_str(s, r); return -1; } onig_region_free(region, 1); /* 1:free self, 0:free contents only */ onig_free(reg); onig_end(); return 0; }
static int LOnig_gc (lua_State *L) { TOnig *ud = check_ud (L); if (ud->reg) { /* precaution against "manual" __gc calling */ onig_free (ud->reg); ud->reg = NULL; } if (ud->region) { onig_region_free (ud->region, 1); ud->region = NULL; } return 0; }
extern int ex(unsigned char* str, unsigned char* pattern, OnigSyntaxType* syntax) { int r; unsigned char *start, *range, *end; regex_t* reg; OnigErrorInfo einfo; OnigRegion *region; r = onig_new(®, pattern, pattern + strlen((char* )pattern), ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, syntax, &einfo); if (r != ONIG_NORMAL) { char s[ONIG_MAX_ERROR_MESSAGE_LEN]; onig_error_code_to_str(s, r, &einfo); fprintf(stderr, "ERROR: %s\n", s); return -1; } fprintf(stderr, "number of captures: %d\n", onig_number_of_captures(reg)); fprintf(stderr, "number of capture histories: %d\n", onig_number_of_capture_histories(reg)); region = onig_region_new(); end = str + strlen((char* )str); start = str; range = end; r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); if (r >= 0) { int i; fprintf(stderr, "match at %d\n", r); for (i = 0; i < region->num_regs; i++) { fprintf(stderr, "%d: (%ld-%ld)\n", i, region->beg[i], region->end[i]); } fprintf(stderr, "\n"); r = onig_capture_tree_traverse(region, ONIG_TRAVERSE_CALLBACK_AT_FIRST, node_callback, (void* )0); } else if (r == ONIG_MISMATCH) { fprintf(stderr, "search fail\n"); } else { /* error */ char s[ONIG_MAX_ERROR_MESSAGE_LEN]; onig_error_code_to_str(s, r); return -1; } onig_region_free(region, 1 /* 1:free self, 0:free contents only */); onig_free(reg); return 0; }
static int x0(int no, char* pattern_arg, char* str_arg, int start_offset, int expected_from, int expected_to, int backward) { int r; unsigned char *start, *range, *end; regex_t* reg; OnigErrorInfo einfo; OnigRegion *region; UChar *pattern, *str; pattern = (UChar* )pattern_arg; str = (UChar* )str_arg; r = onig_new(®, pattern, pattern + strlen((char* )pattern), ONIG_OPTION_NEWLINE_CRLF, ONIG_ENCODING_UTF8, ONIG_SYNTAX_DEFAULT, &einfo); if (r != ONIG_NORMAL) { char s[ONIG_MAX_ERROR_MESSAGE_LEN]; onig_error_code_to_str(s, r, &einfo); fprintf(stderr, "ERROR: %s\n", s); return -1; } region = onig_region_new(); end = str + strlen((char* )str); if (backward) { start = end + start_offset; range = str; } else { start = str + start_offset; range = end; } r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); if (r >= 0) { result(no, region->beg[0], region->end[0], expected_from, expected_to); } else if (r == ONIG_MISMATCH) { result(no, r, -1, expected_from, expected_to); } else { /* error */ char s[ONIG_MAX_ERROR_MESSAGE_LEN]; onig_error_code_to_str(s, r); fprintf(stderr, "ERROR: %s\n", s); return -1; } onig_region_free(region, 1 /* 1:free self, 0:free contents only */); onig_free(reg); return 0; }
extern int main(int argc, char* argv[]) { int r; unsigned char *start, *range, *end; regex_t* reg; OnigErrorInfo einfo; OnigRegion *region; static UChar* pattern = (UChar* )"a(.*)b|[e-f]+"; static UChar* str = (UChar* )"zzzzaffffffffb"; r = onig_new(®, pattern, pattern + strlen((char* )pattern), ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, ONIG_SYNTAX_DEFAULT, &einfo); if (r != ONIG_NORMAL) { char s[ONIG_MAX_ERROR_MESSAGE_LEN]; onig_error_code_to_str(s, r, &einfo); fprintf(stderr, "ERROR: %s\n", s); return -1; } region = onig_region_new(); end = str + strlen((char* )str); start = str; range = end; r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); if (r >= 0) { int i; fprintf(stderr, "match at %d\n", r); for (i = 0; i < region->num_regs; i++) { fprintf(stderr, "%d: (%ld-%ld)\n", i, region->beg[i], region->end[i]); } r = 0; } else if (r == ONIG_MISMATCH) { fprintf(stderr, "search fail\n"); r = -1; } else { /* error */ char s[ONIG_MAX_ERROR_MESSAGE_LEN]; onig_error_code_to_str(s, r); fprintf(stderr, "ERROR: %s\n", s); return -1; } onig_region_free(region, 1 /* 1:free self, 0:free contents only */); onig_free(reg); onig_end(); return r; }
extern int main(int argc, char* argv[]) { int r; unsigned char *start, *range, *end; regex_t* reg; OnigErrorInfo einfo; OnigRegion *region; static UChar* pattern = (UChar* )"(?<foo>a*)(?<bar>b*)(?<foo>c*)"; static UChar* str = (UChar* )"aaabbbbcc"; OnigEncoding use_encs[] = { ONIG_ENCODING_ASCII }; onig_initialize(use_encs, sizeof(use_encs)/sizeof(use_encs[0])); r = onig_new(®, pattern, pattern + strlen((char* )pattern), ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, ONIG_SYNTAX_DEFAULT, &einfo); if (r != ONIG_NORMAL) { char s[ONIG_MAX_ERROR_MESSAGE_LEN]; onig_error_code_to_str(s, r, &einfo); fprintf(stderr, "ERROR: %s\n", s); return -1; } fprintf(stderr, "number of names: %d\n", onig_number_of_names(reg)); region = onig_region_new(); end = str + strlen((char* )str); start = str; range = end; r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); if (r >= 0) { fprintf(stderr, "match at %d\n\n", r); r = onig_foreach_name(reg, name_callback, (void* )region); } else if (r == ONIG_MISMATCH) { fprintf(stderr, "search fail\n"); } else { /* error */ char s[ONIG_MAX_ERROR_MESSAGE_LEN]; onig_error_code_to_str(s, r); return -1; } onig_region_free(region, 1 /* 1:free self, 0:free contents only */); onig_free(reg); onig_end(); return 0; }
Object* Regexp::search_from(STATE, String* string, Fixnum* start) { int beg, max; const UChar *str; const UChar *fin; OnigRegion *region; Object* md = Qnil; maybe_recompile(state); region = onig_region_new(); max = string->size(); native_int pos = start->to_native(); str = (UChar*)string->c_str(); fin = str + max; str += pos; int* back_match = onig_data->int_map_backward; beg = onig_search(onig_data, str, fin, str, fin, region, ONIG_OPTION_NONE); // Seems like onig must setup int_map_backward lazily, so we have to watch // for it to appear here. if(onig_data->int_map_backward != back_match) { native_int size = sizeof(int) * ONIG_CHAR_TABLE_SIZE; ByteArray* ba = ByteArray::create(state, size); memcpy(ba->raw_bytes(), onig_data->int_map_backward, size); // Dispose of the old one. free(onig_data->int_map_backward); onig_data->int_map_backward = reinterpret_cast<int*>(ba->raw_bytes()); write_barrier(state, ba); } if(beg != ONIG_MISMATCH) { md = get_match_data(state, region, string, this, pos); } onig_region_free(region, 1); return md; }
void onig_find_all(char* pattern, char* subject, int subject_len, int repeat) { regex_t* reg; OnigRegion *region; clock_t best_time = 0, time; unsigned char *ptr; int res, len, found; res = onig_new(®, (unsigned char *)pattern, (unsigned char *)pattern + strlen((char* )pattern), ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, ONIG_SYNTAX_DEFAULT, NULL); if (res != ONIG_NORMAL) { printf("Onig compilation failed\n"); return; } region = onig_region_new(); if (!region) { printf("Cannot allocate region\n"); return; } do { found = 0; ptr = (unsigned char *)subject; len = subject_len; time = clock(); while (1) { res = onig_search(reg, ptr, ptr + len, ptr, ptr + len, region, ONIG_OPTION_NONE); if (res < 0) break; // printf("match: %d %d\n", (ptr - (unsigned char *)subject) + region->beg[0], (ptr - (unsigned char *)subject) + region->end[0]); ptr += region->end[0]; len -= region->end[0]; found++; } time = clock() - time; if (!best_time || time < best_time) best_time = time; } while (--repeat > 0); printResult("onig", best_time * 1000 / CLOCKS_PER_SEC, found); onig_region_free(region, 1); onig_free(reg); }
bregonig::~bregonig() { if (region) { onig_region_free(region, 1); } if (reg) { onig_free(reg); } delete [] outp; delete [] splitp; delete [] parap; delete [] transtblp; delete [] startp; // delete [] endp; delete [] patternp; // delete [] prerepp; // delete [] optionp; if (repstr) { delete repstr; } }
MatchData* Regexp::search_from(STATE, String* string, Fixnum* start) { int beg, max; const UChar *str; const UChar *fin; if(unlikely(!onig_source_data(state))) { Exception::argument_error(state, "Not properly initialized Regexp"); } if(unlikely(!CBOOL(string->valid_encoding_p(state)))) { std::ostringstream msg; msg << "invalid byte sequence in " << string->encoding(state)->name()->to_string(state); Exception::argument_error(state, msg.str().c_str()); } lock_.lock(); regex_t* data = maybe_recompile(state, string); if(!data) return 0; max = string->byte_size(); native_int pos = start->to_native(); str = (UChar*)string->byte_address(); fin = str + max; str += pos; OnigPosition begin_reg[10] = { ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS }; OnigPosition end_reg[10] = { ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS }; OnigRegion region = { 10, 0, begin_reg, end_reg, 0, 0 }; int* back_match = data->int_map_backward; beg = onig_search(data, str, fin, str, fin, ®ion, ONIG_OPTION_NONE); // Seems like onig must setup int_map_backward lazily, so we have to watch // for it to appear here. if(data->int_map_backward != back_match) { native_int size = sizeof(int) * ONIG_CHAR_TABLE_SIZE; ByteArray* ba = ByteArray::create_dirty(state, size); memcpy(ba->raw_bytes(), data->int_map_backward, size); // Dispose of the old one. free(data->int_map_backward); data->int_map_backward = reinterpret_cast<int*>(ba->raw_bytes()); write_barrier(state, ba); } lock_.unlock(); MatchData* md = nil<MatchData>(); if(beg != ONIG_MISMATCH) { md = get_match_data(state, ®ion, string, this, pos); } onig_region_free(®ion, 0); return md; }
BOOL cmd_condition_re(sObject* nextin, sObject* nextout, sRunInfo* runinfo) { enum eKanjiCode code = gKanjiCode; if(sRunInfo_option(runinfo, "-byte")) { code = kByte; } else if(sRunInfo_option(runinfo, "-utf8")) { code = kUtf8; } else if(sRunInfo_option(runinfo, "-sjis")) { code = kSjis; } else if(sRunInfo_option(runinfo, "-eucjp")) { code = kEucjp; } BOOL verbose = sRunInfo_option(runinfo, "-verbose"); BOOL offsets = sRunInfo_option(runinfo, "-offsets"); if(runinfo->mFilter && runinfo->mArgsNumRuntime == 2) { clear_matching_info_variable(); //BOOL preserve = sRunInfo_option(runinfo, "-preserve"); runinfo->mRCode = RCODE_NFUN_FALSE; char* target = SFD(nextin).mBuf; char* regex = runinfo->mArgsRuntime[1]; regex_t* reg; int r = get_onig_regex(®, runinfo, regex); if(r == ONIG_NORMAL) { //sObject* preserved_data = STRING_NEW_STACK(); OnigRegion* region = onig_region_new(); int r2 = onig_search(reg, target , target + strlen(target) , target, target + strlen(target) , region, ONIG_OPTION_NONE); if(r2 >= 0) { if(region->beg[0] > 0) { uobject_put(gRootObject, "PREMATCH", STRING_NEW_GC3(target, region->beg[0], FALSE)); } const int size = region->end[0] - region->beg[0]; uobject_put(gRootObject, "MATCH", STRING_NEW_GC3(target + region->beg[0], size, FALSE)); uobject_put(gRootObject, "0", STRING_NEW_GC3(target + region->beg[0], size, FALSE)); const int n = strlen(target)-region->end[0]; if(n > 0) { uobject_put(gRootObject, "POSTMATCH", STRING_NEW_GC3(target + region->end[0], n, FALSE)); } int i; for (i=1; i<region->num_regs; i++) { const int size = region->end[i] - region->beg[i]; char name[16]; snprintf(name, 16, "%d", i); uobject_put(gRootObject, name, STRING_NEW_GC3(target + region->beg[i], size, FALSE)); } if(region->num_regs > 0) { const int n = region->num_regs -1; const int size = region->end[n] - region->beg[n]; uobject_put(gRootObject, "LAST_MATCH", STRING_NEW_GC3(target + region->beg[n], size, FALSE)); } char buf[128]; snprintf(buf, 128, "%d", region->num_regs); uobject_put(gRootObject, "MATCH_NUMBER", STRING_NEW_GC(buf, FALSE)); if(verbose) { int point = str_pointer2kanjipos(code, target, target + r2); char buf[1024]; int size = snprintf(buf, 1024, "%d\n", point); if(!fd_write(nextout, buf, size)) { err_msg("signal interrupt", runinfo->mSName, runinfo->mSLine); runinfo->mRCode = RCODE_SIGNAL_INTERRUPT; onig_region_free(region, 1); onig_free(reg); return FALSE; } } if(offsets) { int i; for (i=0; i<region->num_regs; i++) { int point = str_pointer2kanjipos(code, target, target + region->beg[i]); char buf[1024]; int size = snprintf(buf, 1024, "%d\n", point); if(!fd_write(nextout, buf, size)) { err_msg("signal interrupt", runinfo->mSName, runinfo->mSLine); runinfo->mRCode = RCODE_SIGNAL_INTERRUPT; onig_region_free(region, 1); onig_free(reg); return FALSE; } point = str_pointer2kanjipos(code, target, target + region->end[i]); size = snprintf(buf, 1024, "%d\n", point); if(!fd_write(nextout, buf, size)) { err_msg("signal interrupt", runinfo->mSName, runinfo->mSLine); runinfo->mRCode = RCODE_SIGNAL_INTERRUPT; onig_region_free(region, 1); onig_free(reg); return FALSE; } } } runinfo->mRCode = 0; } onig_region_free(region, 1); onig_free(reg); } else { onig_free(reg); err_msg("=~: invalid regex", runinfo->mSName, runinfo->mSLine); return FALSE; } } return TRUE; }
extern void re_free_registers(OnigRegion* r) { /* 0: don't free self */ onig_region_free(r, 0); }
extern int main(int argc, char* argv[]) { static OnigSyntaxType SQLSyntax; int r; unsigned char *start, *range, *end; regex_t* reg; OnigErrorInfo einfo; OnigRegion *region; static UChar* pattern = (UChar* )"\\_%\\\\__zz"; static UChar* str = (UChar* )"a_abcabcabc\\ppzz"; onig_set_syntax_op (&SQLSyntax, ONIG_SYN_OP_VARIABLE_META_CHARACTERS); onig_set_syntax_op2 (&SQLSyntax, 0); onig_set_syntax_behavior(&SQLSyntax, 0); onig_set_syntax_options (&SQLSyntax, ONIG_OPTION_MULTILINE); onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ESCAPE, (OnigCodePoint )'\\'); onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ANYCHAR, (OnigCodePoint )'_'); onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ANYTIME, ONIG_INEFFECTIVE_META_CHAR); onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ZERO_OR_ONE_TIME, ONIG_INEFFECTIVE_META_CHAR); onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ONE_OR_MORE_TIME, ONIG_INEFFECTIVE_META_CHAR); onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ANYCHAR_ANYTIME, (OnigCodePoint )'%'); r = onig_new(®, pattern, pattern + strlen((char* )pattern), ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, &SQLSyntax, &einfo); if (r != ONIG_NORMAL) { char s[ONIG_MAX_ERROR_MESSAGE_LEN]; onig_error_code_to_str(s, r, &einfo); fprintf(stderr, "ERROR: %s\n", s); return -1; } region = onig_region_new(); end = str + strlen((char* )str); start = str; range = end; r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); if (r >= 0) { int i; fprintf(stderr, "match at %d\n", r); for (i = 0; i < region->num_regs; i++) { fprintf(stderr, "%d: (%ld-%ld)\n", i, region->beg[i], region->end[i]); } } else if (r == ONIG_MISMATCH) { fprintf(stderr, "search fail\n"); } else { /* error */ char s[ONIG_MAX_ERROR_MESSAGE_LEN]; onig_error_code_to_str(s, r); fprintf(stderr, "ERROR: %s\n", s); return -1; } onig_region_free(region, 1 /* 1:free self, 0:free contents only */); onig_free(reg); onig_end(); return 0; }
/* {{{ _php_mb_regex_ereg_replace_exec */ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOptionType options, int is_callable) { zval *arg_pattern_zval; char *arg_pattern; size_t arg_pattern_len; char *replace; size_t replace_len; zend_fcall_info arg_replace_fci; zend_fcall_info_cache arg_replace_fci_cache; char *string; size_t string_len; char *p; php_mb_regex_t *re; OnigSyntaxType *syntax; OnigRegion *regs = NULL; smart_str out_buf = {0}; smart_str eval_buf = {0}; smart_str *pbuf; size_t i; int err, eval, n; OnigUChar *pos; OnigUChar *string_lim; char *description = NULL; char pat_buf[6]; const mbfl_encoding *enc; { const char *current_enc_name; current_enc_name = _php_mb_regex_mbctype2name(MBREX(current_mbctype)); if (current_enc_name == NULL || (enc = mbfl_name2encoding(current_enc_name)) == NULL) { php_error_docref(NULL, E_WARNING, "Unknown error"); RETURN_FALSE; } } eval = 0; { char *option_str = NULL; size_t option_str_len = 0; if (!is_callable) { if (zend_parse_parameters(ZEND_NUM_ARGS(), "zss|s", &arg_pattern_zval, &replace, &replace_len, &string, &string_len, &option_str, &option_str_len) == FAILURE) { RETURN_FALSE; } } else { if (zend_parse_parameters(ZEND_NUM_ARGS(), "zfs|s", &arg_pattern_zval, &arg_replace_fci, &arg_replace_fci_cache, &string, &string_len, &option_str, &option_str_len) == FAILURE) { RETURN_FALSE; } } if (!php_mb_check_encoding( string, string_len, _php_mb_regex_mbctype2name(MBREX(current_mbctype)) )) { RETURN_NULL(); } if (option_str != NULL) { _php_mb_regex_init_options(option_str, option_str_len, &options, &syntax, &eval); } else { options |= MBREX(regex_default_options); syntax = MBREX(regex_default_syntax); } } if (eval && !is_callable) { php_error_docref(NULL, E_DEPRECATED, "The 'e' option is deprecated, use mb_ereg_replace_callback instead"); } if (Z_TYPE_P(arg_pattern_zval) == IS_STRING) { arg_pattern = Z_STRVAL_P(arg_pattern_zval); arg_pattern_len = Z_STRLEN_P(arg_pattern_zval); } else { /* FIXME: this code is not multibyte aware! */ convert_to_long_ex(arg_pattern_zval); pat_buf[0] = (char)Z_LVAL_P(arg_pattern_zval); pat_buf[1] = '\0'; pat_buf[2] = '\0'; pat_buf[3] = '\0'; pat_buf[4] = '\0'; pat_buf[5] = '\0'; arg_pattern = pat_buf; arg_pattern_len = 1; } /* create regex pattern buffer */ re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, MBREX(current_mbctype), syntax); if (re == NULL) { RETURN_FALSE; } if (eval || is_callable) { pbuf = &eval_buf; description = zend_make_compiled_string_description("mbregex replace"); } else { pbuf = &out_buf; description = NULL; } if (is_callable) { if (eval) { php_error_docref(NULL, E_WARNING, "Option 'e' cannot be used with replacement callback"); RETURN_FALSE; } } /* do the actual work */ err = 0; pos = (OnigUChar *)string; string_lim = (OnigUChar*)(string + string_len); regs = onig_region_new(); while (err >= 0) { err = onig_search(re, (OnigUChar *)string, (OnigUChar *)string_lim, pos, (OnigUChar *)string_lim, regs, 0); if (err <= -2) { OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN]; onig_error_code_to_str(err_str, err); php_error_docref(NULL, E_WARNING, "mbregex search failure in php_mbereg_replace_exec(): %s", err_str); break; } if (err >= 0) { #if moriyoshi_0 if (regs->beg[0] == regs->end[0]) { php_error_docref(NULL, E_WARNING, "Empty regular expression"); break; } #endif /* copy the part of the string before the match */ smart_str_appendl(&out_buf, (char *)pos, (size_t)((OnigUChar *)(string + regs->beg[0]) - pos)); if (!is_callable) { /* copy replacement and backrefs */ i = 0; p = replace; while (i < replace_len) { int fwd = (int) php_mb_mbchar_bytes_ex(p, enc); n = -1; if ((replace_len - i) >= 2 && fwd == 1 && p[0] == '\\' && p[1] >= '0' && p[1] <= '9') { n = p[1] - '0'; } if (n >= 0 && n < regs->num_regs) { if (regs->beg[n] >= 0 && regs->beg[n] < regs->end[n] && (size_t)regs->end[n] <= string_len) { smart_str_appendl(pbuf, string + regs->beg[n], regs->end[n] - regs->beg[n]); } p += 2; i += 2; } else { smart_str_appendl(pbuf, p, fwd); p += fwd; i += fwd; } } } if (eval) { zval v; zend_string *eval_str; /* null terminate buffer */ smart_str_0(&eval_buf); if (eval_buf.s) { eval_str = eval_buf.s; } else { eval_str = ZSTR_EMPTY_ALLOC(); } /* do eval */ if (zend_eval_stringl(ZSTR_VAL(eval_str), ZSTR_LEN(eval_str), &v, description) == FAILURE) { efree(description); zend_throw_error(NULL, "Failed evaluating code: %s%s", PHP_EOL, ZSTR_VAL(eval_str)); onig_region_free(regs, 0); smart_str_free(&out_buf); smart_str_free(&eval_buf); RETURN_FALSE; } /* result of eval */ convert_to_string(&v); smart_str_appendl(&out_buf, Z_STRVAL(v), Z_STRLEN(v)); /* Clean up */ smart_str_free(&eval_buf); zval_dtor(&v); } else if (is_callable) { zval args[1]; zval subpats, retval; int i; array_init(&subpats); for (i = 0; i < regs->num_regs; i++) { add_next_index_stringl(&subpats, string + regs->beg[i], regs->end[i] - regs->beg[i]); } ZVAL_COPY_VALUE(&args[0], &subpats); /* null terminate buffer */ smart_str_0(&eval_buf); arg_replace_fci.param_count = 1; arg_replace_fci.params = args; arg_replace_fci.retval = &retval; if (zend_call_function(&arg_replace_fci, &arg_replace_fci_cache) == SUCCESS && !Z_ISUNDEF(retval)) { convert_to_string_ex(&retval); smart_str_appendl(&out_buf, Z_STRVAL(retval), Z_STRLEN(retval)); smart_str_free(&eval_buf); zval_ptr_dtor(&retval); } else { if (!EG(exception)) { php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function"); } } zval_ptr_dtor(&subpats); } n = regs->end[0]; if ((pos - (OnigUChar *)string) < n) { pos = (OnigUChar *)string + n; } else { if (pos < string_lim) { smart_str_appendl(&out_buf, (char *)pos, 1); } pos++; } } else { /* nomatch */ /* stick that last bit of string on our output */ if (string_lim - pos > 0) { smart_str_appendl(&out_buf, (char *)pos, string_lim - pos); } } onig_region_free(regs, 0); } if (description) { efree(description); } if (regs != NULL) { onig_region_free(regs, 1); } smart_str_free(&eval_buf); if (err <= -2) { smart_str_free(&out_buf); RETVAL_FALSE; } else if (out_buf.s) { smart_str_0(&out_buf); RETVAL_STR(out_buf.s); } else { RETVAL_EMPTY_STRING(); } }
/* {{{ _php_mb_regex_ereg_exec */ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase) { zval *arg_pattern, *array = NULL; char *string; size_t string_len; php_mb_regex_t *re; OnigRegion *regs = NULL; int i, match_len, beg, end; OnigOptionType options; char *str; if (zend_parse_parameters(ZEND_NUM_ARGS(), "zs|z/", &arg_pattern, &string, &string_len, &array) == FAILURE) { RETURN_FALSE; } if (!php_mb_check_encoding( string, string_len, _php_mb_regex_mbctype2name(MBREX(current_mbctype)) )) { if (array != NULL) { zval_dtor(array); array_init(array); } RETURN_FALSE; } if (array != NULL) { zval_dtor(array); array_init(array); } options = MBREX(regex_default_options); if (icase) { options |= ONIG_OPTION_IGNORECASE; } /* compile the regular expression from the supplied regex */ if (Z_TYPE_P(arg_pattern) != IS_STRING) { /* we convert numbers to integers and treat them as a string */ if (Z_TYPE_P(arg_pattern) == IS_DOUBLE) { convert_to_long_ex(arg_pattern); /* get rid of decimal places */ } convert_to_string_ex(arg_pattern); /* don't bother doing an extended regex with just a number */ } if (Z_STRLEN_P(arg_pattern) == 0) { php_error_docref(NULL, E_WARNING, "empty pattern"); RETVAL_FALSE; goto out; } re = php_mbregex_compile_pattern(Z_STRVAL_P(arg_pattern), Z_STRLEN_P(arg_pattern), options, MBREX(current_mbctype), MBREX(regex_default_syntax)); if (re == NULL) { RETVAL_FALSE; goto out; } regs = onig_region_new(); /* actually execute the regular expression */ if (onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, (OnigUChar *)(string + string_len), regs, 0) < 0) { RETVAL_FALSE; goto out; } match_len = 1; str = string; if (array != NULL) { match_len = regs->end[0] - regs->beg[0]; for (i = 0; i < regs->num_regs; i++) { beg = regs->beg[i]; end = regs->end[i]; if (beg >= 0 && beg < end && (size_t)end <= string_len) { add_index_stringl(array, i, (char *)&str[beg], end - beg); } else { add_index_bool(array, i, 0); } } } if (match_len == 0) { match_len = 1; } RETVAL_LONG(match_len); out: if (regs != NULL) { onig_region_free(regs, 1); } }
/* {{{ _php_mb_regex_ereg_search_exec */ static void _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode) { char *arg_pattern = NULL, *arg_options = NULL; size_t arg_pattern_len, arg_options_len; int err; size_t n, i, pos, len, beg, end; OnigOptionType option; OnigUChar *str; OnigSyntaxType *syntax; if (zend_parse_parameters(ZEND_NUM_ARGS(), "|ss", &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) { return; } option = MBREX(regex_default_options); if (arg_options) { option = 0; _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL); } if (arg_pattern) { /* create regex pattern buffer */ if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), MBREX(regex_default_syntax))) == NULL) { RETURN_FALSE; } } pos = MBREX(search_pos); str = NULL; len = 0; if (!Z_ISUNDEF(MBREX(search_str)) && Z_TYPE(MBREX(search_str)) == IS_STRING){ str = (OnigUChar *)Z_STRVAL(MBREX(search_str)); len = Z_STRLEN(MBREX(search_str)); } if (MBREX(search_re) == NULL) { php_error_docref(NULL, E_WARNING, "No regex given"); RETURN_FALSE; } if (str == NULL) { php_error_docref(NULL, E_WARNING, "No string given"); RETURN_FALSE; } if (MBREX(search_regs)) { onig_region_free(MBREX(search_regs), 1); } MBREX(search_regs) = onig_region_new(); err = onig_search(MBREX(search_re), str, str + len, str + pos, str + len, MBREX(search_regs), 0); if (err == ONIG_MISMATCH) { MBREX(search_pos) = len; RETVAL_FALSE; } else if (err <= -2) { OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN]; onig_error_code_to_str(err_str, err); php_error_docref(NULL, E_WARNING, "mbregex search failure in mbregex_search(): %s", err_str); RETVAL_FALSE; } else { switch (mode) { case 1: array_init(return_value); beg = MBREX(search_regs)->beg[0]; end = MBREX(search_regs)->end[0]; add_next_index_long(return_value, beg); add_next_index_long(return_value, end - beg); break; case 2: array_init(return_value); n = MBREX(search_regs)->num_regs; for (i = 0; i < n; i++) { beg = MBREX(search_regs)->beg[i]; end = MBREX(search_regs)->end[i]; if (beg >= 0 && beg <= end && end <= len) { add_index_stringl(return_value, i, (char *)&str[beg], end - beg); } else { add_index_bool(return_value, i, 0); } } break; default: RETVAL_TRUE; break; } end = MBREX(search_regs)->end[0]; if (pos <= end) { MBREX(search_pos) = end; } else { MBREX(search_pos) = pos + 1; } } if (err < 0) { onig_region_free(MBREX(search_regs), 1); MBREX(search_regs) = (OnigRegion *)NULL; } }
static void region_finalizer(pone_world* world, pone_val* val) { OnigRegion* region = pone_opaque_ptr(val); onig_region_free(region, 1); }
/** Call the Oniguruma regex match API. Same parameters as RegularExpressionMatch, except SyntaxType is required. @param String A pointer to a NULL terminated string to match against the regular expression string specified by Pattern. @param Pattern A pointer to a NULL terminated string that represents the regular expression. @param SyntaxType A pointer to the EFI_REGEX_SYNTAX_TYPE that identifies the regular expression syntax type to use. May be NULL in which case the function will use its default regular expression syntax type. @param Result On return, points to TRUE if String fully matches against the regular expression Pattern using the regular expression SyntaxType. Otherwise, points to FALSE. @param Captures A Pointer to an array of EFI_REGEX_CAPTURE objects to receive the captured groups in the event of a match. The full sub-string match is put in Captures[0], and the results of N capturing groups are put in Captures[1:N]. If Captures is NULL, then this function doesn't allocate the memory for the array and does not build up the elements. It only returns the number of matching patterns in CapturesCount. If Captures is not NULL, this function returns a pointer to an array and builds up the elements in the array. CapturesCount is also updated to the number of matching patterns found. It is the caller's responsibility to free the memory pool in Captures and in each CapturePtr in the array elements. @param CapturesCount On output, CapturesCount is the number of matching patterns found in String. Zero means no matching patterns were found in the string. @retval EFI_SUCCESS Regex compilation and match completed successfully. @retval EFI_DEVICE_ERROR Regex compilation failed. **/ STATIC EFI_STATUS OnigurumaMatch ( IN CHAR16 *String, IN CHAR16 *Pattern, IN EFI_REGEX_SYNTAX_TYPE *SyntaxType, OUT BOOLEAN *Result, OUT EFI_REGEX_CAPTURE **Captures, OPTIONAL OUT UINTN *CapturesCount ) { regex_t *OnigRegex; OnigSyntaxType *OnigSyntax; OnigRegion *Region; INT32 OnigResult; OnigErrorInfo ErrorInfo; CHAR8 ErrorMessage[ONIG_MAX_ERROR_MESSAGE_LEN]; UINT32 Index; OnigUChar *Start; EFI_STATUS Status; Status = EFI_SUCCESS; // // Detemine the internal syntax type // OnigSyntax = ONIG_SYNTAX_DEFAULT; if (CompareGuid (SyntaxType, &gEfiRegexSyntaxTypePosixExtendedGuid)) { OnigSyntax = ONIG_SYNTAX_POSIX_EXTENDED; } else if (CompareGuid (SyntaxType, &gEfiRegexSyntaxTypePerlGuid)) { OnigSyntax = ONIG_SYNTAX_PERL; } else { DEBUG ((DEBUG_ERROR, "Unsupported regex syntax - using default\n")); return EFI_UNSUPPORTED; } // // Compile pattern // Start = (OnigUChar*)Pattern; OnigResult = onig_new ( &OnigRegex, Start, Start + onigenc_str_bytelen_null (CHAR16_ENCODING, Start), ONIG_OPTION_DEFAULT, CHAR16_ENCODING, OnigSyntax, &ErrorInfo ); if (OnigResult != ONIG_NORMAL) { onig_error_code_to_str (ErrorMessage, OnigResult, &ErrorInfo); DEBUG ((DEBUG_ERROR, "Regex compilation failed: %a\n", ErrorMessage)); return EFI_DEVICE_ERROR; } // // Try to match // Start = (OnigUChar*)String; Region = onig_region_new (); if (Region == NULL) { onig_free (OnigRegex); return EFI_OUT_OF_RESOURCES; } OnigResult = onig_search ( OnigRegex, Start, Start + onigenc_str_bytelen_null (CHAR16_ENCODING, Start), Start, Start + onigenc_str_bytelen_null (CHAR16_ENCODING, Start), Region, ONIG_OPTION_NONE ); if (OnigResult >= 0) { *Result = TRUE; } else { *Result = FALSE; if (OnigResult != ONIG_MISMATCH) { onig_error_code_to_str (ErrorMessage, OnigResult); DEBUG ((DEBUG_ERROR, "Regex match failed: %a\n", ErrorMessage)); onig_region_free (Region, 1); onig_free (OnigRegex); return EFI_DEVICE_ERROR; } } // // If successful, copy out the region (capture) information // if (*Result && Captures != NULL) { *CapturesCount = Region->num_regs; *Captures = AllocateZeroPool (*CapturesCount * sizeof(**Captures)); if (*Captures != NULL) { for (Index = 0; Index < *CapturesCount; ++Index) { // // Region beg/end values represent bytes, not characters // (*Captures)[Index].Length = (Region->end[Index] - Region->beg[Index]) / sizeof(CHAR16); (*Captures)[Index].CapturePtr = AllocateCopyPool ( ((*Captures)[Index].Length) * sizeof (CHAR16), (CHAR16*)((UINTN)String + Region->beg[Index]) ); if ((*Captures)[Index].CapturePtr == NULL) { Status = EFI_OUT_OF_RESOURCES; break; } } if (EFI_ERROR (Status)) { for (Index = 0; Index < *CapturesCount; ++Index) { if ((*Captures)[Index].CapturePtr != NULL) { FreePool ((CHAR16*)(*Captures)[Index].CapturePtr); } } FreePool (*Captures); } } } onig_region_free (Region, 1); onig_free (OnigRegex); return Status; }
Object* Regexp::match_region(STATE, String* string, Fixnum* start, Fixnum* end, Object* forward) { int beg, max; const UChar *str; Object* md; if(unlikely(!onig_data)) { Exception::argument_error(state, "Not properly initialized Regexp"); } // thread::Mutex::LockGuard lg(state->shared().onig_lock()); max = string->size(); str = (UChar*)string->byte_address(); native_int i_start = start->to_native(); native_int i_end = end->to_native(); // Bounds check. if(i_start < 0 || i_end < 0 || i_start > max || i_end > max) { return Qnil; } lock_.lock(); maybe_recompile(state); int begin_reg[10] = { ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS }; int end_reg[10] = { ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS }; OnigRegion region = { 10, 0, begin_reg, end_reg, 0, 0 }; int* back_match = onig_data->int_map_backward; if(!RTEST(forward)) { beg = onig_search(onig_data, str, str + max, str + i_end, str + i_start, ®ion, ONIG_OPTION_NONE); } else { beg = onig_search(onig_data, str, str + max, str + i_start, str + i_end, ®ion, ONIG_OPTION_NONE); } // Seems like onig must setup int_map_backward lazily, so we have to watch // for it to appear here. if(onig_data->int_map_backward != back_match) { native_int size = sizeof(int) * ONIG_CHAR_TABLE_SIZE; ByteArray* ba = ByteArray::create(state, size); memcpy(ba->raw_bytes(), onig_data->int_map_backward, size); // Dispose of the old one. free(onig_data->int_map_backward); onig_data->int_map_backward = reinterpret_cast<int*>(ba->raw_bytes()); write_barrier(state, ba); } lock_.unlock(); if(beg == ONIG_MISMATCH) { onig_region_free(®ion, 0); return Qnil; } md = get_match_data(state, ®ion, string, this, 0); onig_region_free(®ion, 0); return md; }
Object* Regexp::search_from(STATE, String* string, Fixnum* start) { int beg, max; const UChar *str; const UChar *fin; Object* md = cNil; if(unlikely(!onig_data)) { Exception::argument_error(state, "Not properly initialized Regexp"); } lock_.lock(); maybe_recompile(state, string); max = string->byte_size(); native_int pos = start->to_native(); str = (UChar*)string->byte_address(); fin = str + max; str += pos; int begin_reg[10] = { ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS }; int end_reg[10] = { ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS }; OnigRegion region = { 10, 0, begin_reg, end_reg, 0, 0 }; int* back_match = onig_data->int_map_backward; beg = onig_search(onig_data, str, fin, str, fin, ®ion, ONIG_OPTION_NONE); // Seems like onig must setup int_map_backward lazily, so we have to watch // for it to appear here. if(onig_data->int_map_backward != back_match) { native_int size = sizeof(int) * ONIG_CHAR_TABLE_SIZE; ByteArray* ba = ByteArray::create(state, size); memcpy(ba->raw_bytes(), onig_data->int_map_backward, size); // Dispose of the old one. free(onig_data->int_map_backward); onig_data->int_map_backward = reinterpret_cast<int*>(ba->raw_bytes()); write_barrier(state, ba); } lock_.unlock(); if(beg != ONIG_MISMATCH) { md = get_match_data(state, ®ion, string, this, pos); } onig_region_free(®ion, 0); return md; }
extern int main(int argc, char* argv[]) { err_file = stdout; #ifdef POSIX_TEST reg_set_encoding(REG_POSIX_ENCODING_EUC_JP); #else region = onig_region_new(); #endif x2("", "", 0, 0); x2("^", "", 0, 0); x2("$", "", 0, 0); x2("\\G", "", 0, 0); x2("\\A", "", 0, 0); x2("\\Z", "", 0, 0); x2("\\z", "", 0, 0); x2("^$", "", 0, 0); x2("\\ca", "\001", 0, 1); x2("\\C-b", "\002", 0, 1); x2("\\c\\\\", "\034", 0, 1); x2("q[\\c\\\\]", "q\034", 0, 2); x2("", "a", 0, 0); x2("a", "a", 0, 1); x2("aa", "aa", 0, 2); x2("aaa", "aaa", 0, 3); x2("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0, 35); x2("ab", "ab", 0, 2); x2("b", "ab", 1, 2); x2("bc", "abc", 1, 3); x2("\\17", "\017", 0, 1); x2("\\x1f", "\x1f", 0, 1); x2("\\xFE", "\xfe", 0, 1); x2("\\w+", "%a\xff\xfe%", 1, 2); x2("a(?#....\\\\JJJJ)b", "ab", 0, 2); x2("(?x) G (o O(?-x)oO) g L", "GoOoOgLe", 0, 7); x2(".", "a", 0, 1); n(".", ""); x2("..", "ab", 0, 2); x2("\\w", "e", 0, 1); n("\\W", "e"); x2("\\s", " ", 0, 1); x2("\\S", "b", 0, 1); x2("\\d", "4", 0, 1); n("\\D", "4"); x2("\\b", "z ", 0, 0); x2("\\b", " z", 1, 1); x2("\\B", "zz ", 1, 1); x2("\\B", "z ", 2, 2); x2("\\B", " z", 0, 0); x2("[ab]", "b", 0, 1); n("[ab]", "c"); x2("[a-z]", "t", 0, 1); n("[^a]", "a"); x2("[^a]", "\n", 0, 1); x2("[]]", "]", 0, 1); n("[^]]", "]"); x2("[\\^]+", "0^^1", 1, 3); x2("[b-]", "b", 0, 1); x2("[b-]", "-", 0, 1); x2("[\\w]", "z", 0, 1); n("[\\w]", " "); x2("[\\W]", "b$", 1, 2); x2("[\\d]", "5", 0, 1); n("[\\d]", "e"); x2("[\\D]", "t", 0, 1); n("[\\D]", "3"); x2("[\\s]", " ", 0, 1); n("[\\s]", "a"); x2("[\\S]", "b", 0, 1); n("[\\S]", " "); x2("[\\w\\d]", "2", 0, 1); n("[\\w\\d]", " "); x2("[[:upper:]]", "B", 0, 1); x2("[*[:xdigit:]+]", "+", 0, 1); x2("[*[:xdigit:]+]", "GHIKK-9+*", 6, 7); x2("[*[:xdigit:]+]", "-@^+", 3, 4); n("[[:upper]]", "A"); x2("[[:upper]]", ":", 0, 1); x2("[\\044-\\047]", "\046", 0, 1); x2("[\\x5a-\\x5c]", "\x5b", 0, 1); x2("[\\x6A-\\x6D]", "\x6c", 0, 1); n("[\\x6A-\\x6D]", "\x6E"); n("^[0-9A-F]+ 0+ UNDEF ", "75F 00000000 SECT14A notype () External | _rb_apply"); x2("[\\[]", "[", 0, 1); x2("[\\]]", "]", 0, 1); x2("[&]", "&", 0, 1); x2("[[ab]]", "b", 0, 1); x2("[[ab]c]", "c", 0, 1); n("[[^a]]", "a"); n("[^[a]]", "a"); x2("[[ab]&&bc]", "b", 0, 1); n("[[ab]&&bc]", "a"); n("[[ab]&&bc]", "c"); x2("[a-z&&b-y&&c-x]", "w", 0, 1); n("[^a-z&&b-y&&c-x]", "w"); x2("[[^a&&a]&&a-z]", "b", 0, 1); n("[[^a&&a]&&a-z]", "a"); x2("[[^a-z&&bcdef]&&[^c-g]]", "h", 0, 1); n("[[^a-z&&bcdef]&&[^c-g]]", "c"); x2("[^[^abc]&&[^cde]]", "c", 0, 1); x2("[^[^abc]&&[^cde]]", "e", 0, 1); n("[^[^abc]&&[^cde]]", "f"); x2("[a-&&-a]", "-", 0, 1); n("[a-&&-a]", "&"); n("\\wabc", " abc"); x2("a\\Wbc", "a bc", 0, 4); x2("a.b.c", "aabbc", 0, 5); x2(".\\wb\\W..c", "abb bcc", 0, 7); x2("\\s\\wzzz", " zzzz", 0, 5); x2("aa.b", "aabb", 0, 4); n(".a", "ab"); x2(".a", "aa", 0, 2); x2("^a", "a", 0, 1); x2("^a$", "a", 0, 1); x2("^\\w$", "a", 0, 1); n("^\\w$", " "); x2("^\\wab$", "zab", 0, 3); x2("^\\wabcdef$", "zabcdef", 0, 7); x2("^\\w...def$", "zabcdef", 0, 7); x2("\\w\\w\\s\\Waaa\\d", "aa aaa4", 0, 8); x2("\\A\\Z", "", 0, 0); x2("\\Axyz", "xyz", 0, 3); x2("xyz\\Z", "xyz", 0, 3); x2("xyz\\z", "xyz", 0, 3); x2("\\Gaz", "az", 0, 2); n("\\Gz", "bza"); n("az\\G", "az"); n("az\\A", "az"); n("a\\Az", "az"); x2("\\^\\$", "^$", 0, 2); x2("^x?y", "xy", 0, 2); x2("^(x?y)", "xy", 0, 2); x2("\\w", "_", 0, 1); n("\\W", "_"); x2("(?=z)z", "z", 0, 1); n("(?=z).", "a"); x2("(?!z)a", "a", 0, 1); n("(?!z)a", "z"); x2("(?i:a)", "a", 0, 1); x2("(?i:a)", "A", 0, 1); x2("(?i:A)", "a", 0, 1); n("(?i:A)", "b"); x2("(?i:[A-Z])", "a", 0, 1); x2("(?i:[f-m])", "H", 0, 1); x2("(?i:[f-m])", "h", 0, 1); n("(?i:[f-m])", "e"); x2("(?i:[A-c])", "D", 0, 1); x2("(?i:[!-k])", "Z", 0, 1); x2("(?i:[!-k])", "7", 0, 1); x2("(?i:[T-}])", "b", 0, 1); x2("(?i:[T-}])", "{", 0, 1); x2("(?i:\\?a)", "?A", 0, 2); x2("(?i:\\*A)", "*a", 0, 2); n(".", "\n"); x2("(?m:.)", "\n", 0, 1); x2("(?m:.b)", "a\nb", 1, 3); x2(".*abc", "dddabdd\nddabc", 8, 13); x2("(?m:.*abc)", "dddabddabc", 0, 10); n("(?i)(?-i)a", "A"); n("(?i)(?-i:a)", "A"); x2("a?", "", 0, 0); x2("a?", "b", 0, 0); x2("a?", "a", 0, 1); x2("a*", "", 0, 0); x2("a*", "a", 0, 1); x2("a*", "aaa", 0, 3); x2("a*", "baaaa", 0, 0); n("a+", ""); x2("a+", "a", 0, 1); x2("a+", "aaaa", 0, 4); x2("a+", "aabbb", 0, 2); x2("a+", "baaaa", 1, 5); x2(".?", "", 0, 0); x2(".?", "f", 0, 1); x2(".?", "\n", 0, 0); x2(".*", "", 0, 0); x2(".*", "abcde", 0, 5); x2(".+", "z", 0, 1); x2(".+", "zdswer\n", 0, 6); x2("a|b", "a", 0, 1); x2("a|b", "b", 0, 1); x2("|a", "a", 0, 0); x2("(|a)", "a", 0, 0); x2("ab|bc", "ab", 0, 2); x2("ab|bc", "bc", 0, 2); x2("z(?:ab|bc)", "zbc", 0, 3); x2("a(?:ab|bc)c", "aabc", 0, 4); x2("ab|(?:ac|az)", "az", 0, 2); x2("a|b|c", "dc", 1, 2); x2("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "pqr", 0, 2); n("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "mn"); x2("a|^z", "ba", 1, 2); x2("a|^z", "za", 0, 1); x2("a|\\Gz", "bza", 2, 3); x2("a|\\Gz", "za", 0, 1); x2("a|\\Az", "bza", 2, 3); x2("a|\\Az", "za", 0, 1); x2("a|b\\Z", "ba", 1, 2); x2("a|b\\Z", "b", 0, 1); x2("a|b\\z", "ba", 1, 2); x2("a|b\\z", "b", 0, 1); x2("\\w|\\s", " ", 0, 1); n("\\w|\\w", " "); x2("\\w|%", "%", 0, 1); x2("\\w|[&$]", "&", 0, 1); x2("[b-d]|[^e-z]", "a", 0, 1); x2("(?:a|[c-f])|bz", "dz", 0, 1); x2("(?:a|[c-f])|bz", "bz", 0, 2); x2("abc|(?=zz)..f", "zzf", 0, 3); x2("abc|(?!zz)..f", "abf", 0, 3); x2("(?=za)..a|(?=zz)..a", "zza", 0, 3); n("(?>a|abd)c", "abdc"); x2("(?>abd|a)c", "abdc", 0, 4); x2("a?|b", "a", 0, 1); x2("a?|b", "b", 0, 0); x2("a?|b", "", 0, 0); x2("a*|b", "aa", 0, 2); x2("a*|b*", "ba", 0, 0); x2("a*|b*", "ab", 0, 1); x2("a+|b*", "", 0, 0); x2("a+|b*", "bbb", 0, 3); x2("a+|b*", "abbb", 0, 1); n("a+|b+", ""); x2("(a|b)?", "b", 0, 1); x2("(a|b)*", "ba", 0, 2); x2("(a|b)+", "bab", 0, 3); x2("(ab|ca)+", "caabbc", 0, 4); x2("(ab|ca)+", "aabca", 1, 5); x2("(ab|ca)+", "abzca", 0, 2); x2("(a|bab)+", "ababa", 0, 5); x2("(a|bab)+", "ba", 1, 2); x2("(a|bab)+", "baaaba", 1, 4); x2("(?:a|b)(?:a|b)", "ab", 0, 2); x2("(?:a*|b*)(?:a*|b*)", "aaabbb", 0, 3); x2("(?:a*|b*)(?:a+|b+)", "aaabbb", 0, 6); x2("(?:a+|b+){2}", "aaabbb", 0, 6); x2("h{0,}", "hhhh", 0, 4); x2("(?:a+|b+){1,2}", "aaabbb", 0, 6); n("ax{2}*a", "0axxxa1"); n("a.{0,2}a", "0aXXXa0"); n("a.{0,2}?a", "0aXXXa0"); n("a.{0,2}?a", "0aXXXXa0"); x2("^a{2,}?a$", "aaa", 0, 3); x2("^[a-z]{2,}?$", "aaa", 0, 3); x2("(?:a+|\\Ab*)cc", "cc", 0, 2); n("(?:a+|\\Ab*)cc", "abcc"); x2("(?:^a+|b+)*c", "aabbbabc", 6, 8); x2("(?:^a+|b+)*c", "aabbbbc", 0, 7); x2("a|(?i)c", "C", 0, 1); x2("(?i)c|a", "C", 0, 1); x2("(?i)c|a", "A", 0, 1); x2("(?i:c)|a", "C", 0, 1); n("(?i:c)|a", "A"); x2("[abc]?", "abc", 0, 1); x2("[abc]*", "abc", 0, 3); x2("[^abc]*", "abc", 0, 0); n("[^abc]+", "abc"); x2("a??", "aaa", 0, 0); x2("ba??b", "bab", 0, 3); x2("a*?", "aaa", 0, 0); x2("ba*?", "baa", 0, 1); x2("ba*?b", "baab", 0, 4); x2("a+?", "aaa", 0, 1); x2("ba+?", "baa", 0, 2); x2("ba+?b", "baab", 0, 4); x2("(?:a?)??", "a", 0, 0); x2("(?:a??)?", "a", 0, 0); x2("(?:a?)+?", "aaa", 0, 1); x2("(?:a+)??", "aaa", 0, 0); x2("(?:a+)??b", "aaab", 0, 4); x2("(?:ab)?{2}", "", 0, 0); x2("(?:ab)?{2}", "ababa", 0, 4); x2("(?:ab)*{0}", "ababa", 0, 0); x2("(?:ab){3,}", "abababab", 0, 8); n("(?:ab){3,}", "abab"); x2("(?:ab){2,4}", "ababab", 0, 6); x2("(?:ab){2,4}", "ababababab", 0, 8); x2("(?:ab){2,4}?", "ababababab", 0, 4); x2("(?:ab){,}", "ab{,}", 0, 5); x2("(?:abc)+?{2}", "abcabcabc", 0, 6); x2("(?:X*)(?i:xa)", "XXXa", 0, 4); x2("(d+)([^abc]z)", "dddz", 0, 4); x2("([^abc]*)([^abc]z)", "dddz", 0, 4); x2("(\\w+)(\\wz)", "dddz", 0, 4); x3("(a)", "a", 0, 1, 1); x3("(ab)", "ab", 0, 2, 1); x2("((ab))", "ab", 0, 2); x3("((ab))", "ab", 0, 2, 1); x3("((ab))", "ab", 0, 2, 2); x3("((((((((((((((((((((ab))))))))))))))))))))", "ab", 0, 2, 20); x3("(ab)(cd)", "abcd", 0, 2, 1); x3("(ab)(cd)", "abcd", 2, 4, 2); x3("()(a)bc(def)ghijk", "abcdefghijk", 3, 6, 3); x3("(()(a)bc(def)ghijk)", "abcdefghijk", 3, 6, 4); x2("(^a)", "a", 0, 1); x3("(a)|(a)", "ba", 1, 2, 1); x3("(^a)|(a)", "ba", 1, 2, 2); x3("(a?)", "aaa", 0, 1, 1); x3("(a*)", "aaa", 0, 3, 1); x3("(a*)", "", 0, 0, 1); x3("(a+)", "aaaaaaa", 0, 7, 1); x3("(a+|b*)", "bbbaa", 0, 3, 1); x3("(a+|b?)", "bbbaa", 0, 1, 1); x3("(abc)?", "abc", 0, 3, 1); x3("(abc)*", "abc", 0, 3, 1); x3("(abc)+", "abc", 0, 3, 1); x3("(xyz|abc)+", "abc", 0, 3, 1); x3("([xyz][abc]|abc)+", "abc", 0, 3, 1); x3("((?i:abc))", "AbC", 0, 3, 1); x2("(abc)(?i:\\1)", "abcABC", 0, 6); x3("((?m:a.c))", "a\nc", 0, 3, 1); x3("((?=az)a)", "azb", 0, 1, 1); x3("abc|(.abd)", "zabd", 0, 4, 1); x2("(?:abc)|(ABC)", "abc", 0, 3); x3("(?i:(abc))|(zzz)", "ABC", 0, 3, 1); x3("a*(.)", "aaaaz", 4, 5, 1); x3("a*?(.)", "aaaaz", 0, 1, 1); x3("a*?(c)", "aaaac", 4, 5, 1); x3("[bcd]a*(.)", "caaaaz", 5, 6, 1); x3("(\\Abb)cc", "bbcc", 0, 2, 1); n("(\\Abb)cc", "zbbcc"); x3("(^bb)cc", "bbcc", 0, 2, 1); n("(^bb)cc", "zbbcc"); x3("cc(bb$)", "ccbb", 2, 4, 1); n("cc(bb$)", "ccbbb"); n("(\\1)", ""); n("\\1(a)", "aa"); n("(a(b)\\1)\\2+", "ababb"); n("(?:(?:\\1|z)(a))+$", "zaa"); x2("(?:(?:\\1|z)(a))+$", "zaaa", 0, 4); x2("(a)(?=\\1)", "aa", 0, 1); n("(a)$|\\1", "az"); x2("(a)\\1", "aa", 0, 2); n("(a)\\1", "ab"); x2("(a?)\\1", "aa", 0, 2); x2("(a??)\\1", "aa", 0, 0); x2("(a*)\\1", "aaaaa", 0, 4); x3("(a*)\\1", "aaaaa", 0, 2, 1); x2("a(b*)\\1", "abbbb", 0, 5); x2("a(b*)\\1", "ab", 0, 1); x2("(a*)(b*)\\1\\2", "aaabbaaabb", 0, 10); x2("(a*)(b*)\\2", "aaabbbb", 0, 7); x2("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 8); x3("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 3, 7); x2("(a)(b)(c)\\2\\1\\3", "abcbac", 0, 6); x2("([a-d])\\1", "cc", 0, 2); x2("(\\w\\d\\s)\\1", "f5 f5 ", 0, 6); n("(\\w\\d\\s)\\1", "f5 f5"); x2("(who|[a-c]{3})\\1", "whowho", 0, 6); x2("...(who|[a-c]{3})\\1", "abcwhowho", 0, 9); x2("(who|[a-c]{3})\\1", "cbccbc", 0, 6); x2("(^a)\\1", "aa", 0, 2); n("(^a)\\1", "baa"); n("(a$)\\1", "aa"); n("(ab\\Z)\\1", "ab"); x2("(a*\\Z)\\1", "a", 1, 1); x2(".(a*\\Z)\\1", "ba", 1, 2); x3("(.(abc)\\2)", "zabcabc", 0, 7, 1); x3("(.(..\\d.)\\2)", "z12341234", 0, 9, 1); x2("((?i:az))\\1", "AzAz", 0, 4); n("((?i:az))\\1", "Azaz"); x2("(?<=a)b", "ab", 1, 2); n("(?<=a)b", "bb"); x2("(?<=a|b)b", "bb", 1, 2); x2("(?<=a|bc)b", "bcb", 2, 3); x2("(?<=a|bc)b", "ab", 1, 2); x2("(?<=a|bc||defghij|klmnopq|r)z", "rz", 1, 2); x2("(a)\\g<1>", "aa", 0, 2); x2("(?<!a)b", "cb", 1, 2); n("(?<!a)b", "ab"); x2("(?<!a|bc)b", "bbb", 0, 1); n("(?<!a|bc)z", "bcz"); x2("(?<name1>a)", "a", 0, 1); x2("(?<name_2>ab)\\g<name_2>", "abab", 0, 4); x2("(?<name_3>.zv.)\\k<name_3>", "azvbazvb", 0, 8); x2("(?<=\\g<ab>)|-\\zEND (?<ab>XyZ)", "XyZ", 3, 3); x2("(?<n>|a\\g<n>)+", "", 0, 0); x2("(?<n>|\\(\\g<n>\\))+$", "()(())", 0, 6); x3("\\g<n>(?<n>.){0}", "X", 0, 1, 1); x2("\\g<n>(abc|df(?<n>.YZ){2,8}){0}", "XYZ", 0, 3); x2("\\A(?<n>(a\\g<n>)|)\\z", "aaaa", 0, 4); x2("(?<n>|\\g<m>\\g<n>)\\z|\\zEND (?<m>a|(b)\\g<m>)", "bbbbabba", 0, 8); x2("(?<name1240>\\w+\\sx)a+\\k<name1240>", " fg xaaaaaaaafg x", 2, 18); x3("(z)()()(?<_9>a)\\g<_9>", "zaa", 2, 3, 1); x2("(.)(((?<_>a)))\\k<_>", "zaa", 0, 3); x2("((?<name1>\\d)|(?<name2>\\w))(\\k<name1>|\\k<name2>)", "ff", 0, 2); x2("(?:(?<x>)|(?<x>efg))\\k<x>", "", 0, 0); x2("(?:(?<x>abc)|(?<x>efg))\\k<x>", "abcefgefg", 3, 9); n("(?:(?<x>abc)|(?<x>efg))\\k<x>", "abcefg"); x2("(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\\k<n1>$", "a-pyumpyum", 2, 10); x3("(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\\k<n1>$", "xxxxabcdefghijklmnabcdefghijklmn", 4, 18, 14); x3("(?<name1>)(?<name2>)(?<name3>)(?<name4>)(?<name5>)(?<name6>)(?<name7>)(?<name8>)(?<name9>)(?<name10>)(?<name11>)(?<name12>)(?<name13>)(?<name14>)(?<name15>)(?<name16>aaa)(?<name17>)$", "aaa", 0, 3, 16); x2("(?<foo>a|\\(\\g<foo>\\))", "a", 0, 1); x2("(?<foo>a|\\(\\g<foo>\\))", "((((((a))))))", 0, 13); x3("(?<foo>a|\\(\\g<foo>\\))", "((((((((a))))))))", 0, 17, 1); x2("\\g<bar>|\\zEND(?<bar>.*abc$)", "abcxxxabc", 0, 9); x2("\\g<1>|\\zEND(.a.)", "bac", 0, 3); x3("\\g<_A>\\g<_A>|\\zEND(.a.)(?<_A>.b.)", "xbxyby", 3, 6, 1); x2("\\A(?:\\g<pon>|\\g<pan>|\\zEND (?<pan>a|c\\g<pon>c)(?<pon>b|d\\g<pan>d))$", "cdcbcdc", 0, 7); x2("\\A(?<n>|a\\g<m>)\\z|\\zEND (?<m>\\g<n>)", "aaaa", 0, 4); x2("(?<n>(a|b\\g<n>c){3,5})", "baaaaca", 1, 5); x2("(?<n>(a|b\\g<n>c){3,5})", "baaaacaaaaa", 0, 10); x2("(?<pare>\\(([^\\(\\)]++|\\g<pare>)*+\\))", "((a))", 0, 5); x2("()*\\1", "", 0, 0); x2("(?:()|())*\\1\\2", "", 0, 0); x3("(?:\\1a|())*", "a", 0, 0, 1); x2("x((.)*)*x", "0x1x2x3", 1, 6); x2("x((.)*)*x(?i:\\1)\\Z", "0x1x2x1X2", 1, 9); x2("(?:()|()|()|()|()|())*\\2\\5", "", 0, 0); x2("(?:()|()|()|(x)|()|())*\\2b\\5", "b", 0, 1); x2("", "あ", 0, 0); x2("あ", "あ", 0, 2); n("い", "あ"); x2("うう", "うう", 0, 4); x2("あいう", "あいう", 0, 6); x2("こここここここここここここここここここここここここここここここここここ", "こここここここここここここここここここここここここここここここここここ", 0, 70); x2("あ", "いあ", 2, 4); x2("いう", "あいう", 2, 6); x2("\\xca\\xb8", "\xca\xb8", 0, 2); x2(".", "あ", 0, 2); x2("..", "かき", 0, 4); x2("\\w", "お", 0, 2); n("\\W", "あ"); x2("[\\W]", "う$", 2, 3); x2("\\S", "そ", 0, 2); x2("\\S", "漢", 0, 2); x2("\\b", "気 ", 0, 0); x2("\\b", " ほ", 1, 1); x2("\\B", "せそ ", 2, 2); x2("\\B", "う ", 3, 3); x2("\\B", " い", 0, 0); x2("[たち]", "ち", 0, 2); n("[なに]", "ぬ"); x2("[う-お]", "え", 0, 2); n("[^け]", "け"); x2("[\\w]", "ね", 0, 2); n("[\\d]", "ふ"); x2("[\\D]", "は", 0, 2); n("[\\s]", "く"); x2("[\\S]", "へ", 0, 2); x2("[\\w\\d]", "よ", 0, 2); x2("[\\w\\d]", " よ", 3, 5); n("\\w鬼車", " 鬼車"); x2("鬼\\W車", "鬼 車", 0, 5); x2("あ.い.う", "ああいいう", 0, 10); x2(".\\wう\\W..ぞ", "えうう うぞぞ", 0, 13); x2("\\s\\wこここ", " ここここ", 0, 9); x2("ああ.け", "ああけけ", 0, 8); n(".い", "いえ"); x2(".お", "おお", 0, 4); x2("^あ", "あ", 0, 2); x2("^む$", "む", 0, 2); x2("^\\w$", "に", 0, 2); x2("^\\wかきくけこ$", "zかきくけこ", 0, 11); x2("^\\w...うえお$", "zあいううえお", 0, 13); x2("\\w\\w\\s\\Wおおお\\d", "aお おおお4", 0, 12); x2("\\Aたちつ", "たちつ", 0, 6); x2("むめも\\Z", "むめも", 0, 6); x2("かきく\\z", "かきく", 0, 6); x2("かきく\\Z", "かきく\n", 0, 6); x2("\\Gぽぴ", "ぽぴ", 0, 4); n("\\Gえ", "うえお"); n("とて\\G", "とて"); n("まみ\\A", "まみ"); n("ま\\Aみ", "まみ"); x2("(?=せ)せ", "せ", 0, 2); n("(?=う).", "い"); x2("(?!う)か", "か", 0, 2); n("(?!と)あ", "と"); x2("(?i:あ)", "あ", 0, 2); x2("(?i:ぶべ)", "ぶべ", 0, 4); n("(?i:い)", "う"); x2("(?m:よ.)", "よ\n", 0, 3); x2("(?m:.め)", "ま\nめ", 2, 5); x2("あ?", "", 0, 0); x2("変?", "化", 0, 0); x2("変?", "変", 0, 2); x2("量*", "", 0, 0); x2("量*", "量", 0, 2); x2("子*", "子子子", 0, 6); x2("馬*", "鹿馬馬馬馬", 0, 0); n("山+", ""); x2("河+", "河", 0, 2); x2("時+", "時時時時", 0, 8); x2("え+", "ええううう", 0, 4); x2("う+", "おうううう", 2, 10); x2(".?", "た", 0, 2); x2(".*", "ぱぴぷぺ", 0, 8); x2(".+", "ろ", 0, 2); x2(".+", "いうえか\n", 0, 8); x2("あ|い", "あ", 0, 2); x2("あ|い", "い", 0, 2); x2("あい|いう", "あい", 0, 4); x2("あい|いう", "いう", 0, 4); x2("を(?:かき|きく)", "をかき", 0, 6); x2("を(?:かき|きく)け", "をきくけ", 0, 8); x2("あい|(?:あう|あを)", "あを", 0, 4); x2("あ|い|う", "えう", 2, 4); x2("あ|い|うえ|おかき|く|けこさ|しすせ|そ|たち|つてとなに|ぬね", "しすせ", 0, 6); n("あ|い|うえ|おかき|く|けこさ|しすせ|そ|たち|つてとなに|ぬね", "すせ"); x2("あ|^わ", "ぶあ", 2, 4); x2("あ|^を", "をあ", 0, 2); x2("鬼|\\G車", "け車鬼", 4, 6); x2("鬼|\\G車", "車鬼", 0, 2); x2("鬼|\\A車", "b車鬼", 3, 5); x2("鬼|\\A車", "車", 0, 2); x2("鬼|車\\Z", "車鬼", 2, 4); x2("鬼|車\\Z", "車", 0, 2); x2("鬼|車\\Z", "車\n", 0, 2); x2("鬼|車\\z", "車鬼", 2, 4); x2("鬼|車\\z", "車", 0, 2); x2("\\w|\\s", "お", 0, 2); x2("\\w|%", "%お", 0, 1); x2("\\w|[&$]", "う&", 0, 2); x2("[い-け]", "う", 0, 2); x2("[い-け]|[^か-こ]", "あ", 0, 2); x2("[い-け]|[^か-こ]", "か", 0, 2); x2("[^あ]", "\n", 0, 1); x2("(?:あ|[う-き])|いを", "うを", 0, 2); x2("(?:あ|[う-き])|いを", "いを", 0, 4); x2("あいう|(?=けけ)..ほ", "けけほ", 0, 6); x2("あいう|(?!けけ)..ほ", "あいほ", 0, 6); x2("(?=をあ)..あ|(?=をを)..あ", "ををあ", 0, 6); x2("(?<=あ|いう)い", "いうい", 4, 6); n("(?>あ|あいえ)う", "あいえう"); x2("(?>あいえ|あ)う", "あいえう", 0, 8); x2("あ?|い", "あ", 0, 2); x2("あ?|い", "い", 0, 0); x2("あ?|い", "", 0, 0); x2("あ*|い", "ああ", 0, 4); x2("あ*|い*", "いあ", 0, 0); x2("あ*|い*", "あい", 0, 2); x2("[aあ]*|い*", "aあいいい", 0, 3); x2("あ+|い*", "", 0, 0); x2("あ+|い*", "いいい", 0, 6); x2("あ+|い*", "あいいい", 0, 2); x2("あ+|い*", "aあいいい", 0, 0); n("あ+|い+", ""); x2("(あ|い)?", "い", 0, 2); x2("(あ|い)*", "いあ", 0, 4); x2("(あ|い)+", "いあい", 0, 6); x2("(あい|うあ)+", "うああいうえ", 0, 8); x2("(あい|うえ)+", "うああいうえ", 4, 12); x2("(あい|うあ)+", "ああいうあ", 2, 10); x2("(あい|うあ)+", "あいをうあ", 0, 4); x2("(あい|うあ)+", "$$zzzzあいをうあ", 6, 10); x2("(あ|いあい)+", "あいあいあ", 0, 10); x2("(あ|いあい)+", "いあ", 2, 4); x2("(あ|いあい)+", "いあああいあ", 2, 8); x2("(?:あ|い)(?:あ|い)", "あい", 0, 4); x2("(?:あ*|い*)(?:あ*|い*)", "あああいいい", 0, 6); x2("(?:あ*|い*)(?:あ+|い+)", "あああいいい", 0, 12); x2("(?:あ+|い+){2}", "あああいいい", 0, 12); x2("(?:あ+|い+){1,2}", "あああいいい", 0, 12); x2("(?:あ+|\\Aい*)うう", "うう", 0, 4); n("(?:あ+|\\Aい*)うう", "あいうう"); x2("(?:^あ+|い+)*う", "ああいいいあいう", 12, 16); x2("(?:^あ+|い+)*う", "ああいいいいう", 0, 14); x2("う{0,}", "うううう", 0, 8); x2("あ|(?i)c", "C", 0, 1); x2("(?i)c|あ", "C", 0, 1); x2("(?i:あ)|a", "a", 0, 1); n("(?i:あ)|a", "A"); x2("[あいう]?", "あいう", 0, 2); x2("[あいう]*", "あいう", 0, 6); x2("[^あいう]*", "あいう", 0, 0); n("[^あいう]+", "あいう"); x2("あ??", "あああ", 0, 0); x2("いあ??い", "いあい", 0, 6); x2("あ*?", "あああ", 0, 0); x2("いあ*?", "いああ", 0, 2); x2("いあ*?い", "いああい", 0, 8); x2("あ+?", "あああ", 0, 2); x2("いあ+?", "いああ", 0, 4); x2("いあ+?い", "いああい", 0, 8); x2("(?:天?)??", "天", 0, 0); x2("(?:天??)?", "天", 0, 0); x2("(?:夢?)+?", "夢夢夢", 0, 2); x2("(?:風+)??", "風風風", 0, 0); x2("(?:雪+)??霜", "雪雪雪霜", 0, 8); x2("(?:あい)?{2}", "", 0, 0); x2("(?:鬼車)?{2}", "鬼車鬼車鬼", 0, 8); x2("(?:鬼車)*{0}", "鬼車鬼車鬼", 0, 0); x2("(?:鬼車){3,}", "鬼車鬼車鬼車鬼車", 0, 16); n("(?:鬼車){3,}", "鬼車鬼車"); x2("(?:鬼車){2,4}", "鬼車鬼車鬼車", 0, 12); x2("(?:鬼車){2,4}", "鬼車鬼車鬼車鬼車鬼車", 0, 16); x2("(?:鬼車){2,4}?", "鬼車鬼車鬼車鬼車鬼車", 0, 8); x2("(?:鬼車){,}", "鬼車{,}", 0, 7); x2("(?:かきく)+?{2}", "かきくかきくかきく", 0, 12); x3("(火)", "火", 0, 2, 1); x3("(火水)", "火水", 0, 4, 1); x2("((時間))", "時間", 0, 4); x3("((風水))", "風水", 0, 4, 1); x3("((昨日))", "昨日", 0, 4, 2); x3("((((((((((((((((((((量子))))))))))))))))))))", "量子", 0, 4, 20); x3("(あい)(うえ)", "あいうえ", 0, 4, 1); x3("(あい)(うえ)", "あいうえ", 4, 8, 2); x3("()(あ)いう(えおか)きくけこ", "あいうえおかきくけこ", 6, 12, 3); x3("(()(あ)いう(えおか)きくけこ)", "あいうえおかきくけこ", 6, 12, 4); x3(".*(フォ)ン・マ(ン()シュタ)イン", "フォン・マンシュタイン", 10, 18, 2); x2("(^あ)", "あ", 0, 2); x3("(あ)|(あ)", "いあ", 2, 4, 1); x3("(^あ)|(あ)", "いあ", 2, 4, 2); x3("(あ?)", "あああ", 0, 2, 1); x3("(ま*)", "ままま", 0, 6, 1); x3("(と*)", "", 0, 0, 1); x3("(る+)", "るるるるるるる", 0, 14, 1); x3("(ふ+|へ*)", "ふふふへへ", 0, 6, 1); x3("(あ+|い?)", "いいいああ", 0, 2, 1); x3("(あいう)?", "あいう", 0, 6, 1); x3("(あいう)*", "あいう", 0, 6, 1); x3("(あいう)+", "あいう", 0, 6, 1); x3("(さしす|あいう)+", "あいう", 0, 6, 1); x3("([なにぬ][かきく]|かきく)+", "かきく", 0, 6, 1); x3("((?i:あいう))", "あいう", 0, 6, 1); x3("((?m:あ.う))", "あ\nう", 0, 5, 1); x3("((?=あん)あ)", "あんい", 0, 2, 1); x3("あいう|(.あいえ)", "んあいえ", 0, 8, 1); x3("あ*(.)", "ああああん", 8, 10, 1); x3("あ*?(.)", "ああああん", 0, 2, 1); x3("あ*?(ん)", "ああああん", 8, 10, 1); x3("[いうえ]あ*(.)", "えああああん", 10, 12, 1); x3("(\\Aいい)うう", "いいうう", 0, 4, 1); n("(\\Aいい)うう", "んいいうう"); x3("(^いい)うう", "いいうう", 0, 4, 1); n("(^いい)うう", "んいいうう"); x3("ろろ(るる$)", "ろろるる", 4, 8, 1); n("ろろ(るる$)", "ろろるるる"); x2("(無)\\1", "無無", 0, 4); n("(無)\\1", "無武"); x2("(空?)\\1", "空空", 0, 4); x2("(空??)\\1", "空空", 0, 0); x2("(空*)\\1", "空空空空空", 0, 8); x3("(空*)\\1", "空空空空空", 0, 4, 1); x2("あ(い*)\\1", "あいいいい", 0, 10); x2("あ(い*)\\1", "あい", 0, 2); x2("(あ*)(い*)\\1\\2", "あああいいあああいい", 0, 20); x2("(あ*)(い*)\\2", "あああいいいい", 0, 14); x3("(あ*)(い*)\\2", "あああいいいい", 6, 10, 2); x2("(((((((ぽ*)ぺ))))))ぴ\\7", "ぽぽぽぺぴぽぽぽ", 0, 16); x3("(((((((ぽ*)ぺ))))))ぴ\\7", "ぽぽぽぺぴぽぽぽ", 0, 6, 7); x2("(は)(ひ)(ふ)\\2\\1\\3", "はひふひはふ", 0, 12); x2("([き-け])\\1", "くく", 0, 4); x2("(\\w\\d\\s)\\1", "あ5 あ5 ", 0, 8); n("(\\w\\d\\s)\\1", "あ5 あ5"); x2("(誰?|[あ-う]{3})\\1", "誰?誰?", 0, 8); x2("...(誰?|[あ-う]{3})\\1", "あaあ誰?誰?", 0, 13); x2("(誰?|[あ-う]{3})\\1", "ういうういう", 0, 12); x2("(^こ)\\1", "ここ", 0, 4); n("(^む)\\1", "めむむ"); n("(あ$)\\1", "ああ"); n("(あい\\Z)\\1", "あい"); x2("(あ*\\Z)\\1", "あ", 2, 2); x2(".(あ*\\Z)\\1", "いあ", 2, 4); x3("(.(やいゆ)\\2)", "zやいゆやいゆ", 0, 13, 1); x3("(.(..\\d.)\\2)", "あ12341234", 0, 10, 1); x2("((?i:あvず))\\1", "あvずあvず", 0, 10); x2("(?<愚か>変|\\(\\g<愚か>\\))", "((((((変))))))", 0, 14); x2("\\A(?:\\g<阿_1>|\\g<云_2>|\\z終了 (?<阿_1>観|自\\g<云_2>自)(?<云_2>在|菩薩\\g<阿_1>菩薩))$", "菩薩自菩薩自在自菩薩自菩薩", 0, 26); x2("[[ひふ]]", "ふ", 0, 2); x2("[[いおう]か]", "か", 0, 2); n("[[^あ]]", "あ"); n("[^[あ]]", "あ"); x2("[^[^あ]]", "あ", 0, 2); x2("[[かきく]&&きく]", "く", 0, 2); n("[[かきく]&&きく]", "か"); n("[[かきく]&&きく]", "け"); x2("[あ-ん&&い-を&&う-ゑ]", "ゑ", 0, 2); n("[^あ-ん&&い-を&&う-ゑ]", "ゑ"); x2("[[^あ&&あ]&&あ-ん]", "い", 0, 2); n("[[^あ&&あ]&&あ-ん]", "あ"); x2("[[^あ-ん&&いうえお]&&[^う-か]]", "き", 0, 2); n("[[^あ-ん&&いうえお]&&[^う-か]]", "い"); x2("[^[^あいう]&&[^うえお]]", "う", 0, 2); x2("[^[^あいう]&&[^うえお]]", "え", 0, 2); n("[^[^あいう]&&[^うえお]]", "か"); x2("[あ-&&-あ]", "-", 0, 1); x2("[^[^a-zあいう]&&[^bcdefgうえお]q-w]", "え", 0, 2); x2("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "f", 0, 1); x2("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "g", 0, 1); n("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "2"); fprintf(stdout, "\nRESULT SUCC: %d, FAIL: %d, ERROR: %d (Oniguruma %s)\n", nsucc, nfail, nerror, onig_version()); #ifndef POSIX_TEST onig_region_free(region, 1); onig_end(); #endif return ((nfail == 0 && nerror == 0) ? 0 : -1); }