// Called with the individual regexp lock held. regex_t* Regexp::maybe_recompile(STATE, String* string) { const UChar *pat; const UChar *end; OnigEncoding enc; OnigErrorInfo err_info; int err; if(fixed_encoding_) return onig_source_data(state); Encoding* string_enc = string->encoding(state); regex_t* onig_encoded = onig_data_encoded(state, string_enc); if(onig_encoded) return onig_encoded; enc = string_enc->get_encoding(); Encoding* source_enc = source()->encoding(state); String* converted = source()->convert_escaped(state, source_enc, fixed_encoding_); pat = (UChar*)converted->byte_address(); end = pat + converted->byte_size(); int options = onig_source_data(state)->options; OnigEncoding orig_enc = onig_source_data(state)->enc; regex_t* reg; err = onig_new(®, pat, end, options, enc, ONIG_SYNTAX_RUBY, &err_info); // If it doesn't work out, then abort and reset the encoding back // and say that it's forced. if(err != ONIG_NORMAL) { err = onig_new(®, pat, end, options, orig_enc, ONIG_SYNTAX_RUBY, &err_info); // Ok, wtf. Well, no way to proceed now. if(err != ONIG_NORMAL) { UChar onig_err_buf[ONIG_MAX_ERROR_MESSAGE_LEN]; char err_buf[REGEXP_ONIG_ERROR_MESSAGE_LEN]; onig_error_code_to_str(onig_err_buf, err, &err_info); snprintf(err_buf, REGEXP_ONIG_ERROR_MESSAGE_LEN, "%s: %s", onig_err_buf, pat); Exception::regexp_error(state, err_buf); return NULL; } string_enc = source_enc; fixed_encoding_ = true; } return make_managed(state, string_enc, reg); }
/* * This is a primitive so #initialize_copy can work. */ Regexp* Regexp::initialize(STATE, String* pattern, Fixnum* options) { const UChar *pat; const UChar *end; OnigErrorInfo err_info; OnigOptionType opts; OnigEncoding enc; int err, num_names, kcode; pat = (UChar*)pattern->byte_address(); end = pat + pattern->size(); opts = options->to_native(); kcode = opts & KCODE_MASK; opts &= OPTION_MASK; if(kcode == 0) { enc = current_encoding(state); } else { // Don't attempt to fix the encoding later, it's been specified by the // user. enc = get_enc_from_kcode(kcode); forced_encoding_ = true; } thread::Mutex::LockGuard lg(state->shared().onig_lock()); err = onig_new(&this->onig_data, pat, end, opts, enc, ONIG_SYNTAX_RUBY, &err_info); if(err != ONIG_NORMAL) { UChar onig_err_buf[ONIG_MAX_ERROR_MESSAGE_LEN]; char err_buf[1024]; onig_error_code_to_str(onig_err_buf, err, &err_info); snprintf(err_buf, 1024, "%s: %s", onig_err_buf, pat); Exception::regexp_error(state, err_buf); return 0; } this->source(state, pattern); num_names = onig_number_of_names(this->onig_data); if(num_names == 0) { this->names(state, nil<LookupTable>()); } else { struct _gather_data gd; gd.state = state; LookupTable* tbl = LookupTable::create(state); gd.tbl = tbl; onig_foreach_name(this->onig_data, (int (*)(const OnigUChar*, const OnigUChar*,int,int*,OnigRegex,void*))_gather_names, (void*)&gd); this->names(state, tbl); } make_managed(state); return this; }
// Called with the onig_lock held. void Regexp::maybe_recompile(STATE, String* string) { const UChar *pat; const UChar *end; OnigEncoding enc; OnigErrorInfo err_info; int err; if(fixed_encoding_) return; enc = string->get_encoding_kcode_fallback(state); if(enc == onig_data->enc) return; pat = (UChar*)source()->byte_address(); end = pat + source()->byte_size(); int options = onig_data->options; OnigEncoding orig_enc = onig_data->enc; err = onig_new(&this->onig_data, pat, end, options, enc, ONIG_SYNTAX_RUBY, &err_info); // If it doesn't work out, then abort and reset the encoding back // and say that it's forced. if(err != ONIG_NORMAL) { err = onig_new(&this->onig_data, pat, end, options, orig_enc, ONIG_SYNTAX_RUBY, &err_info); // Ok, wtf. Well, no way to proceed now. if(err != ONIG_NORMAL) { OnigUChar buf[1024]; onig_error_code_to_str((UChar*)buf, err, &err_info); std::cout << "Fatal ONIG error: " << buf << "\n"; assert(err == ONIG_NORMAL); } fixed_encoding_ = true; } make_managed(state); }
/* * This is a primitive so #initialize_copy can work. */ Regexp* Regexp::initialize(STATE, String* pattern, Fixnum* options) { const UChar *pat; const UChar *end; OnigErrorInfo err_info; OnigEncoding enc; OnigOptionType opts = options->to_native(); if(LANGUAGE_18_ENABLED(state)) { int kcode = opts & KCODE_MASK; pat = (UChar*)pattern->byte_address(); end = pat + pattern->byte_size(); if(kcode == 0) { enc = pattern->get_encoding_kcode_fallback(state); } else { // Don't attempt to fix the encoding later, it's been specified by the // user. enc = get_enc_from_kcode(kcode); fixed_encoding_ = true; } } else { fixed_encoding_ = opts & OPTION_FIXEDENCODING; no_encoding_ = opts & OPTION_NOENCODING; Encoding* source_enc = pattern->encoding(state); switch(opts & KCODE_MASK) { case KCODE_NONE: source_enc = 0; no_encoding_ = true; break; case KCODE_EUC: source_enc = Encoding::find(state, "EUC-JP"); fixed_encoding_ = true; break; case KCODE_SJIS: source_enc = Encoding::find(state, "Windows-31J"); fixed_encoding_ = true; break; case KCODE_UTF8: source_enc = Encoding::utf8_encoding(state); fixed_encoding_ = true; break; } String* converted = pattern->convert_escaped(state, source_enc, fixed_encoding_); pat = (UChar*)converted->byte_address(); end = pat + converted->byte_size(); enc = source_enc->get_encoding(); pattern = pattern->string_dup(state); pattern->encoding(state, source_enc); } utilities::thread::Mutex::LockGuard lg(state->shared().onig_lock()); int err = onig_new(&this->onig_data, pat, end, opts & OPTION_MASK, enc, ONIG_SYNTAX_RUBY, &err_info); if(err != ONIG_NORMAL) { UChar onig_err_buf[ONIG_MAX_ERROR_MESSAGE_LEN]; char err_buf[1024]; onig_error_code_to_str(onig_err_buf, err, &err_info); snprintf(err_buf, 1024, "%s: %s", onig_err_buf, pat); Exception::regexp_error(state, err_buf); return 0; } this->source(state, pattern); int num_names = onig_number_of_names(this->onig_data); if(num_names == 0) { this->names(state, nil<LookupTable>()); } else { struct _gather_data gd; gd.state = state; LookupTable* tbl = LookupTable::create(state); gd.tbl = tbl; onig_foreach_name(this->onig_data, (int (*)(const OnigUChar*, const OnigUChar*,int,int*,OnigRegex,void*))_gather_names, (void*)&gd); this->names(state, tbl); } make_managed(state); return this; }
/* * This is a primitive so #initialize_copy can work. */ Regexp* Regexp::initialize(STATE, String* pattern, Fixnum* options) { const UChar *pat; const UChar *end; OnigErrorInfo err_info; OnigEncoding enc; OnigOptionType opts = options->to_native(); Encoding* original_enc = pattern->encoding(state); fixed_encoding_ = opts & OPTION_FIXEDENCODING; no_encoding_ = opts & OPTION_NOENCODING; Encoding* source_enc = original_enc; switch(opts & KCODE_MASK) { case KCODE_NONE: no_encoding_ = true; break; case KCODE_EUC: source_enc = Encoding::find(state, "EUC-JP"); fixed_encoding_ = true; break; case KCODE_SJIS: source_enc = Encoding::find(state, "Windows-31J"); fixed_encoding_ = true; break; case KCODE_UTF8: source_enc = Encoding::utf8_encoding(state); fixed_encoding_ = true; break; } if(no_encoding_) source_enc = 0; String* converted = pattern->convert_escaped(state, source_enc, fixed_encoding_); pat = (UChar*)converted->byte_address(); end = pat + converted->byte_size(); enc = source_enc->get_encoding(); pattern = pattern->string_dup(state); pattern->encoding(state, source_enc); regex_t* reg; int err = onig_new(®, pat, end, opts & OPTION_MASK, enc, ONIG_SYNTAX_RUBY, &err_info); if(err != ONIG_NORMAL) { enc = original_enc->get_encoding(); fixed_encoding_ = true; err = onig_new(®, pat, end, opts & OPTION_MASK, enc, ONIG_SYNTAX_RUBY, &err_info); pattern->encoding(state, original_enc); if(err != ONIG_NORMAL) { UChar onig_err_buf[ONIG_MAX_ERROR_MESSAGE_LEN]; char err_buf[REGEXP_ONIG_ERROR_MESSAGE_LEN]; onig_error_code_to_str(onig_err_buf, err, &err_info); snprintf(err_buf, REGEXP_ONIG_ERROR_MESSAGE_LEN, "%s: %s", onig_err_buf, pat); Exception::regexp_error(state, err_buf); return 0; } } this->source(state, pattern); int num_names = onig_number_of_names(reg); if(num_names == 0) { this->names(state, nil<LookupTable>()); } else { struct _gather_data gd; gd.state = state; LookupTable* tbl = LookupTable::create(state); gd.tbl = tbl; onig_foreach_name(reg, (int (*)(const OnigUChar*, const OnigUChar*,int,int*,OnigRegex,void*))_gather_names, (void*)&gd); this->names(state, tbl); } make_managed(state, pattern->encoding(), reg); return this; }