Beispiel #1
1
  // Called with the individual regexp lock held.
  regex_t* Regexp::maybe_recompile(STATE, String* string) {
    const UChar *pat;
    const UChar *end;
    OnigEncoding enc;
    OnigErrorInfo err_info;
    int err;

    if(fixed_encoding_) return onig_source_data(state);

    Encoding* string_enc = string->encoding(state);
    regex_t* onig_encoded = onig_data_encoded(state, string_enc);

    if(onig_encoded) return onig_encoded;

    enc = string_enc->get_encoding();

    Encoding* source_enc = source()->encoding(state);
    String* converted = source()->convert_escaped(state, source_enc, fixed_encoding_);
    pat = (UChar*)converted->byte_address();
    end = pat + converted->byte_size();

    int options = onig_source_data(state)->options;
    OnigEncoding orig_enc = onig_source_data(state)->enc;
    regex_t* reg;

    err = onig_new(&reg, pat, end, options,
                   enc, ONIG_SYNTAX_RUBY, &err_info);

    // If it doesn't work out, then abort and reset the encoding back
    // and say that it's forced.
    if(err != ONIG_NORMAL) {

      err = onig_new(&reg, pat, end, options,
                     orig_enc, ONIG_SYNTAX_RUBY, &err_info);

      // Ok, wtf. Well, no way to proceed now.
      if(err != ONIG_NORMAL) {
        UChar onig_err_buf[ONIG_MAX_ERROR_MESSAGE_LEN];
        char err_buf[REGEXP_ONIG_ERROR_MESSAGE_LEN];
        onig_error_code_to_str(onig_err_buf, err, &err_info);
        snprintf(err_buf, REGEXP_ONIG_ERROR_MESSAGE_LEN, "%s: %s", onig_err_buf, pat);

        Exception::regexp_error(state, err_buf);
        return NULL;
      }

      string_enc = source_enc;
      fixed_encoding_ = true;
    }

    return make_managed(state, string_enc, reg);
  }
Beispiel #2
0
  /*
   * This is a primitive so #initialize_copy can work.
   */
  Regexp* Regexp::initialize(STATE, String* pattern, Fixnum* options) {
    const UChar *pat;
    const UChar *end;
    OnigErrorInfo err_info;
    OnigOptionType opts;
    OnigEncoding enc;
    int err, num_names, kcode;

    pat = (UChar*)pattern->byte_address();
    end = pat + pattern->size();

    opts  = options->to_native();
    kcode = opts & KCODE_MASK;
    opts &= OPTION_MASK;

    if(kcode == 0) {
      enc = current_encoding(state);
    } else {
      // Don't attempt to fix the encoding later, it's been specified by the
      // user.
      enc = get_enc_from_kcode(kcode);
      forced_encoding_ = true;
    }

    thread::Mutex::LockGuard lg(state->shared().onig_lock());

    err = onig_new(&this->onig_data, pat, end, opts, enc, ONIG_SYNTAX_RUBY, &err_info);

    if(err != ONIG_NORMAL) {
      UChar onig_err_buf[ONIG_MAX_ERROR_MESSAGE_LEN];
      char err_buf[1024];
      onig_error_code_to_str(onig_err_buf, err, &err_info);
      snprintf(err_buf, 1024, "%s: %s", onig_err_buf, pat);

      Exception::regexp_error(state, err_buf);
      return 0;
    }

    this->source(state, pattern);

    num_names = onig_number_of_names(this->onig_data);

    if(num_names == 0) {
      this->names(state, nil<LookupTable>());
    } else {
      struct _gather_data gd;
      gd.state = state;
      LookupTable* tbl = LookupTable::create(state);
      gd.tbl = tbl;
      onig_foreach_name(this->onig_data, (int (*)(const OnigUChar*, const OnigUChar*,int,int*,OnigRegex,void*))_gather_names, (void*)&gd);
      this->names(state, tbl);
    }

    make_managed(state);

    return this;
  }
Beispiel #3
0
  // Called with the onig_lock held.
  void Regexp::maybe_recompile(STATE, String* string) {
    const UChar *pat;
    const UChar *end;
    OnigEncoding enc;
    OnigErrorInfo err_info;
    int err;

    if(fixed_encoding_) return;

    enc = string->get_encoding_kcode_fallback(state);

    if(enc == onig_data->enc) return;

    pat = (UChar*)source()->byte_address();
    end = pat + source()->byte_size();

    int options = onig_data->options;
    OnigEncoding orig_enc = onig_data->enc;

    err = onig_new(&this->onig_data, pat, end, options,
                   enc, ONIG_SYNTAX_RUBY, &err_info);

    // If it doesn't work out, then abort and reset the encoding back
    // and say that it's forced.
    if(err != ONIG_NORMAL) {

      err = onig_new(&this->onig_data, pat, end, options,
                     orig_enc, ONIG_SYNTAX_RUBY, &err_info);

      // Ok, wtf. Well, no way to proceed now.
      if(err != ONIG_NORMAL) {
        OnigUChar buf[1024];
        onig_error_code_to_str((UChar*)buf, err, &err_info);
        std::cout << "Fatal ONIG error: " << buf << "\n";
        assert(err == ONIG_NORMAL);
      }

      fixed_encoding_ = true;
    }

    make_managed(state);
  }
Beispiel #4
0
  /*
   * This is a primitive so #initialize_copy can work.
   */
  Regexp* Regexp::initialize(STATE, String* pattern, Fixnum* options) {
    const UChar *pat;
    const UChar *end;
    OnigErrorInfo err_info;
    OnigEncoding enc;

    OnigOptionType opts = options->to_native();

    if(LANGUAGE_18_ENABLED(state)) {
      int kcode = opts & KCODE_MASK;

      pat = (UChar*)pattern->byte_address();
      end = pat + pattern->byte_size();

      if(kcode == 0) {
        enc = pattern->get_encoding_kcode_fallback(state);
      } else {
        // Don't attempt to fix the encoding later, it's been specified by the
        // user.
        enc = get_enc_from_kcode(kcode);
        fixed_encoding_ = true;
      }
    } else {
      fixed_encoding_ = opts & OPTION_FIXEDENCODING;
      no_encoding_    = opts & OPTION_NOENCODING;

      Encoding* source_enc = pattern->encoding(state);

      switch(opts & KCODE_MASK) {
      case KCODE_NONE:
        source_enc = 0;
        no_encoding_ = true;
        break;
      case KCODE_EUC:
        source_enc = Encoding::find(state, "EUC-JP");
        fixed_encoding_ = true;
        break;
      case KCODE_SJIS:
        source_enc = Encoding::find(state, "Windows-31J");
        fixed_encoding_ = true;
        break;
      case KCODE_UTF8:
        source_enc = Encoding::utf8_encoding(state);
        fixed_encoding_ = true;
        break;
      }

      String* converted = pattern->convert_escaped(state, source_enc, fixed_encoding_);

      pat = (UChar*)converted->byte_address();
      end = pat + converted->byte_size();
      enc = source_enc->get_encoding();

      pattern = pattern->string_dup(state);
      pattern->encoding(state, source_enc);
    }

    utilities::thread::Mutex::LockGuard lg(state->shared().onig_lock());

    int err = onig_new(&this->onig_data, pat, end, opts & OPTION_MASK, enc, ONIG_SYNTAX_RUBY, &err_info);

    if(err != ONIG_NORMAL) {
      UChar onig_err_buf[ONIG_MAX_ERROR_MESSAGE_LEN];
      char err_buf[1024];
      onig_error_code_to_str(onig_err_buf, err, &err_info);
      snprintf(err_buf, 1024, "%s: %s", onig_err_buf, pat);

      Exception::regexp_error(state, err_buf);
      return 0;
    }

    this->source(state, pattern);

    int num_names = onig_number_of_names(this->onig_data);

    if(num_names == 0) {
      this->names(state, nil<LookupTable>());
    } else {
      struct _gather_data gd;
      gd.state = state;
      LookupTable* tbl = LookupTable::create(state);
      gd.tbl = tbl;
      onig_foreach_name(this->onig_data, (int (*)(const OnigUChar*, const OnigUChar*,int,int*,OnigRegex,void*))_gather_names, (void*)&gd);
      this->names(state, tbl);
    }

    make_managed(state);

    return this;
  }
Beispiel #5
0
  /*
   * This is a primitive so #initialize_copy can work.
   */
  Regexp* Regexp::initialize(STATE, String* pattern, Fixnum* options) {
    const UChar *pat;
    const UChar *end;
    OnigErrorInfo err_info;
    OnigEncoding enc;

    OnigOptionType opts = options->to_native();
    Encoding* original_enc = pattern->encoding(state);

    fixed_encoding_ = opts & OPTION_FIXEDENCODING;
    no_encoding_    = opts & OPTION_NOENCODING;

    Encoding* source_enc = original_enc;

    switch(opts & KCODE_MASK) {
    case KCODE_NONE:
      no_encoding_ = true;
      break;
    case KCODE_EUC:
      source_enc = Encoding::find(state, "EUC-JP");
      fixed_encoding_ = true;
      break;
    case KCODE_SJIS:
      source_enc = Encoding::find(state, "Windows-31J");
      fixed_encoding_ = true;
      break;
    case KCODE_UTF8:
      source_enc = Encoding::utf8_encoding(state);
      fixed_encoding_ = true;
      break;
    }

    if(no_encoding_) source_enc = 0;
    String* converted = pattern->convert_escaped(state, source_enc, fixed_encoding_);

    pat = (UChar*)converted->byte_address();
    end = pat + converted->byte_size();
    enc = source_enc->get_encoding();

    pattern = pattern->string_dup(state);
    pattern->encoding(state, source_enc);

    regex_t* reg;

    int err = onig_new(&reg, pat, end, opts & OPTION_MASK, enc, ONIG_SYNTAX_RUBY, &err_info);

    if(err != ONIG_NORMAL) {

      enc = original_enc->get_encoding();
      fixed_encoding_ = true;
      err = onig_new(&reg, pat, end, opts & OPTION_MASK, enc, ONIG_SYNTAX_RUBY, &err_info);
      pattern->encoding(state, original_enc);

      if(err != ONIG_NORMAL) {
        UChar onig_err_buf[ONIG_MAX_ERROR_MESSAGE_LEN];
        char err_buf[REGEXP_ONIG_ERROR_MESSAGE_LEN];
        onig_error_code_to_str(onig_err_buf, err, &err_info);
        snprintf(err_buf, REGEXP_ONIG_ERROR_MESSAGE_LEN, "%s: %s", onig_err_buf, pat);

        Exception::regexp_error(state, err_buf);
        return 0;
      }
    }

    this->source(state, pattern);

    int num_names = onig_number_of_names(reg);

    if(num_names == 0) {
      this->names(state, nil<LookupTable>());
    } else {
      struct _gather_data gd;
      gd.state = state;
      LookupTable* tbl = LookupTable::create(state);
      gd.tbl = tbl;
      onig_foreach_name(reg, (int (*)(const OnigUChar*, const OnigUChar*,int,int*,OnigRegex,void*))_gather_names, (void*)&gd);
      this->names(state, tbl);
    }

    make_managed(state, pattern->encoding(), reg);

    return this;
  }