Пример #1
0
extern int
re_match(regex_t* reg, const char* str, int size, int pos,
	 struct re_registers* regs)
{
  return onig_match(reg, (UChar* )str, (UChar* )(str + size),
		    (UChar* )(str + pos), regs, ONIG_OPTION_NONE);
}
Пример #2
0
void regexp_splitter::split(
    const std::string& str,
    std::vector<std::pair<size_t, size_t> >& bounds) const {

  const UChar* head = reinterpret_cast<const UChar*>(str.data());
  const UChar* end = head + str.size();

  OnigRegion* region = onig_region_new();
  try {
    int cur = 0;
    while (head + cur < end) {
      int match
          = onig_match(reg_, head, end, head + cur, region, ONIG_OPTION_NONE);
      if (match < 0) {
        // did not match
        cur++;
        continue;
      }

      const int pos = region->beg[group_];
      const int len = region->end[group_] - pos;
      bounds.push_back(std::make_pair(pos, len));

      if (len > 0) {
        cur += len;
      } else {
        ++cur;
      }
    }
    onig_region_free(region, 1);
  } catch (...) {
    onig_region_free(region, 1);
    throw;
  }
}
Пример #3
0
cell AMX_NATIVE_CALL pawn_regex_match(AMX* amx, cell* params)
{
	regex_t* RegExpr;
	const char *rexp = NULL, *string = NULL;
	amx_StrParam(amx, params[1], string);
	amx_StrParam(amx, params[2], rexp);
	if(string && rexp)
	{
		int r=NULL;
		UChar* pattern = (UChar* )rexp;
		OnigErrorInfo einfo;
		r = onig_new(&RegExpr, pattern, pattern + strlen((char* )pattern), ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, ONIG_SYNTAX_PERL, &einfo);
		//logprintf("[REGEX DEBUG]: rexp %s",pattern);
		if(r != ONIG_NORMAL)
		{
			UChar s[ONIG_MAX_ERROR_MESSAGE_LEN];
			onig_error_code_to_str(s, r, &einfo);
			logprintf("[REGEX ERROR]: %s", s);
			onig_free(RegExpr);
			return -1;
		}
		UChar* str = (UChar* )string;
		OnigRegion *region;
		region = onig_region_new();
		r = onig_match(RegExpr, str, str+strlen((char*) str), str, region, ONIG_OPTION_NONE);
		//logprintf("[REGEX DEBUG]: string %s",str);
		onig_region_free(region, 1);
		onig_free(RegExpr);
		//logprintf("[REGEX DEBUG]: return %d",r);
		return r;
	}
    return -1337;
}
Пример #4
0
  Object* Regexp::match_start(STATE, String* string, Fixnum* start) {
    int beg, max;
    const UChar *str;
    const UChar *fin;
    OnigRegion *region;
    Object* md = Qnil;

    if(unlikely(!onig_data)) {
      Exception::argument_error(state, "Not properly initialized Regexp");
    }

    maybe_recompile(state);
    region = onig_region_new();

    max = string->size();
    native_int pos = start->to_native();

    str = (UChar*)string->c_str(state);
    fin = str + max;

    str += pos;

    int* back_match = onig_data->int_map_backward;

    beg = onig_match(onig_data, str, fin, str, region,
                     ONIG_OPTION_NONE);

    // Seems like onig must setup int_map_backward lazily, so we have to watch
    // for it to appear here.
    if(onig_data->int_map_backward != back_match) {
      native_int size = sizeof(int) * ONIG_CHAR_TABLE_SIZE;
      ByteArray* ba = ByteArray::create(state, size);
      memcpy(ba->raw_bytes(), onig_data->int_map_backward, size);

      // Dispose of the old one.
      free(onig_data->int_map_backward);

      onig_data->int_map_backward = reinterpret_cast<int*>(ba->raw_bytes());

      write_barrier(state, ba);
    }

    if(beg != ONIG_MISMATCH) {
      md = get_match_data(state, region, string, this, pos);
    }

    onig_region_free(region, 1);
    return md;
  }
Пример #5
0
cell AMX_NATIVE_CALL pawn_regex_exmatch(AMX* amx, cell* params)
{
	const char *string = NULL;
	amx_StrParam(amx, params[1], string);
	if(string)
	{
		int id=(int)params[2];
		if(id>=0 && id<TotalExpressions)
		{
			UChar* str = (UChar* )string;
			return onig_match(rexpression[id].RegExpr, str, str+strlen(string), str, rexpression[id].zreg, ONIG_OPTION_NONE);
		}
		logprintf("[REGEX ERROR]: Call regex_exmatch with undefined parameter at index %d", id);
		return -1;
	}
    return -1337;
}
Пример #6
0
int MatchOnigRegex(void *str, int str_length, int offset, int option,
                  OnigRegex regex, OnigRegion *region) {
    int ret = ONIG_MISMATCH;
    int error_msg_len = 0;
#ifdef BENCHMARK_CHELP
    struct timeval tim1, tim2;
    long t;
#endif

    OnigUChar *str_start = (OnigUChar *) str;
    OnigUChar *str_end = (OnigUChar *) (str_start + str_length);
    OnigUChar *search_start = (OnigUChar *)(str_start + offset);

#ifdef BENCHMARK_CHELP
    gettimeofday(&tim1, NULL);
#endif
    ret = onig_match(regex, str_start, str_end, search_start, region, option);
#ifdef BENCHMARK_CHELP
    gettimeofday(&tim2, NULL);
    t = (tim2.tv_sec - tim1.tv_sec) * 1000000 + tim2.tv_usec - tim1.tv_usec;
    printf("%ld microseconds elapsed\n", t);
#endif
    return ret;
}
Пример #7
0
bool regexp_match::match(const std::string& key) {
  const UChar* str = reinterpret_cast<const UChar*>(key.c_str());
  const UChar* end = str + key.size();
  return onig_match(reg_, str, end, str, NULL, ONIG_OPTION_NONE) >= 0;
}
Пример #8
0
int regexec_onig(bregonig *rx, const TCHAR *stringarg,
                 const TCHAR *strend,	/* pointer to null at end of string */
                 const TCHAR *strbeg,	/* real beginning of string */
                 int minend,		/* end of match must be at least minend after stringarg */
                 int safebase,	/* no need to remember string in subbase */
                 int one_shot,	/* if not match then break without proceed str pointer */
                 TCHAR *msg)		/* fatal error message */
{
    TRACE1(_T("one_shot: %d\n"), one_shot);
    OnigPosition err_code;

    if (one_shot) {
        OnigOptionType option = (minend > 0) ?
                                ONIG_OPTION_FIND_NOT_EMPTY : ONIG_OPTION_NONE;
        err_code = onig_match(rx->reg, (UChar*) strbeg, (UChar*) strend,
                              (UChar*) stringarg, rx->region,
                              option);
    } else {
        const TCHAR *global_pos = stringarg;		/* \G */
        if (minend > 0) {
#ifdef UNICODE
            int kanjiflag = 1;
#else
            int kanjiflag = rx->pmflags & PMf_KANJI;
#endif
            if (kanjiflag && is_char_pair((TBYTE*) stringarg)) {
                stringarg += 2;
            } else {
                stringarg++;
            }
        }
        err_code = onig_search_gpos(rx->reg, (UChar*) strbeg, (UChar*) strend,
                                    (UChar*) global_pos,
                                    (UChar*) stringarg, (UChar*) strend, rx->region,
                                    ONIG_OPTION_NONE);
    }

    if (err_code >= 0) {
        /* FOUND */
        if (rx->startp) {
            delete [] rx->startp;
        }
        rx->nparens = rx->region->num_regs - 1;
        rx->startp = new (std::nothrow) TCHAR*[rx->region->num_regs * 2];
        /* allocate startp and endp together */
        if (rx->startp == NULL) {
            asc2tcs(msg, "out of space", BREGEXP_MAX_ERROR_MESSAGE_LEN);
            return -1;
        }
        rx->endp = rx->startp + rx->region->num_regs;

        for (int i = 0; i < rx->region->num_regs; i++) {
            if (rx->region->beg[i] != ONIG_REGION_NOTPOS) {
                // found
                rx->startp[i] = const_cast<TCHAR *>(strbeg) + rx->region->beg[i] / sizeof(TCHAR);
                rx->endp[i] = const_cast<TCHAR *>(strbeg) + rx->region->end[i] / sizeof(TCHAR);
            } else {
                // not found
                rx->startp[i] = NULL;
                rx->endp[i] = NULL;
            }
        }
        return 1;
    } else if (err_code == ONIG_MISMATCH) {
        /* NOT FOUND */
        return 0;
    } else {
        /* ERROR */
        onig_err_to_bregexp_msg(err_code, NULL, msg);
        return -1;
    }
}
Пример #9
0
  Object* Regexp::match_start(STATE, String* string, Fixnum* start) {
    int beg, max;
    const UChar *str;
    const UChar *fin;
    Object* md = cNil;

    if(unlikely(!onig_data)) {
      Exception::argument_error(state, "Not properly initialized Regexp");
    }

    // utilities::thread::Mutex::LockGuard lg(state->shared().onig_lock());

    max = string->byte_size();
    native_int pos = start->to_native();

    str = (UChar*)string->byte_address();
    fin = str + max;

    // Bounds check.
    if(pos > max) return cNil;
    str += pos;

    lock_.lock();

    maybe_recompile(state, string);

    int begin_reg[10] = { ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS,
                         ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS,
                         ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS,
                         ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS,
                         ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS };
    int end_reg[10] =  { ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS,
                         ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS,
                         ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS,
                         ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS,
                         ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS };

    OnigRegion region = { 10, 0, begin_reg, end_reg, 0, 0 };

    int* back_match = onig_data->int_map_backward;
    beg = onig_match(onig_data, str, fin, str, &region,
                     ONIG_OPTION_NONE);

    // Seems like onig must setup int_map_backward lazily, so we have to watch
    // for it to appear here.
    if(onig_data->int_map_backward != back_match) {
      native_int size = sizeof(int) * ONIG_CHAR_TABLE_SIZE;
      ByteArray* ba = ByteArray::create(state, size);
      memcpy(ba->raw_bytes(), onig_data->int_map_backward, size);

      // Dispose of the old one.
      free(onig_data->int_map_backward);

      onig_data->int_map_backward = reinterpret_cast<int*>(ba->raw_bytes());

      write_barrier(state, ba);
    }

    lock_.unlock();

    if(beg != ONIG_MISMATCH) {
      md = get_match_data(state, &region, string, this, pos);
    }

    onig_region_free(&region, 0);
    return md;
  }
Пример #10
0
int qspArrayPos(QSPVariant *args, int argsCount, QSP_BOOL isRegExp)
{
    int num, count, ind;
    QSPVar *var;
    QSPVariant *val;
    QSP_CHAR *str;
    OnigUChar *tempBeg, *tempEnd;
    regex_t *onigExp;
    OnigErrorInfo onigInfo;
    QSP_BOOL isString;
    if (qspConvertVariantTo(args, argsCount == 2))
    {
        qspSetError(QSP_ERR_TYPEMISMATCH);
        return -1;
    }
    if (argsCount == 2)
    {
        str = QSP_STR(args[0]);
        ind = 0;
        val = args + 1;
    }
    else
    {
        qspConvertVariantTo(args + 1, QSP_TRUE);
        str = QSP_STR(args[1]);
        ind = QSP_NUM(args[0]);
        val = args + 2;
        if (ind < 0) ind = 0;
    }
    if (!(var = qspVarReferenceWithType(str, QSP_FALSE, &isString))) return -1;
    if (qspConvertVariantTo(val, isRegExp || isString))
    {
        qspSetError(QSP_ERR_TYPEMISMATCH);
        return -1;
    }
    if (isRegExp)
    {
        tempBeg = (OnigUChar *)QSP_PSTR(val);
        tempEnd = (OnigUChar *)qspStrEnd(QSP_PSTR(val));
        if (onig_new(&onigExp, tempBeg, tempEnd, ONIG_OPTION_DEFAULT, QSP_ONIG_ENC, ONIG_SYNTAX_PERL_NG, &onigInfo))
        {
            qspSetError(QSP_ERR_INCORRECTREGEXP);
            return -1;
        }
    }
    count = var->ValsCount;
    if (ind > count) ind = count;
    while (ind <= count)
    {
        if (val->IsStr)
        {
            if (!(ind < count && (str = var->Values[ind].Str))) str = QSP_FMT("");
            if (isRegExp)
            {
                tempBeg = (OnigUChar *)str;
                tempEnd = (OnigUChar *)qspStrEnd(str);
                if (onig_match(onigExp, tempBeg, tempEnd, tempBeg, 0, ONIG_OPTION_NONE) == tempEnd - tempBeg)
                {
                    onig_free(onigExp);
                    return ind;
                }
            }
            else if (!qspStrsComp(str, QSP_PSTR(val)))
                return ind;
        }
        else
        {
            num = (ind < count ? var->Values[ind].Num : 0);
            if (num == QSP_PNUM(val)) return ind;
        }
        ++ind;
    }
    if (isRegExp) onig_free(onigExp);
    return -1;
}
Пример #11
0
// C/Oniguruma version of the "simple_id" program

#include "../common.h"

#define REGEX "(.*)"

int main(int argc, char* argv[]) {

  PRE

  while(fgets(buffer, sizeof(buffer), stdin)) {
    lno++;
    end   = buffer + strlen((char* )buffer);
    start = buffer;
    r = onig_match(reg, buffer, end, start, region, ONIG_OPTION_NONE);

    if (r < 0) {
      fprintf(stderr, "matching error on line %d\n", lno);
      return 1;
    } else {
      int l = region->end[0] - region->beg[0];
      fprintf(stdout, "%*.*s", l, l, buffer);
    }
  }

  PRINT_TIMES

  return 0;
}
Пример #12
0
  MatchData* Regexp::match_start(STATE, String* string, Fixnum* start) {
    int beg, max;
    const UChar *str;
    const UChar *fin;

    if(unlikely(!onig_source_data(state))) {
      Exception::argument_error(state, "Not properly initialized Regexp");
    }

    if(unlikely(!CBOOL(string->valid_encoding_p(state)))) {
      std::ostringstream msg;
      msg << "invalid byte sequence in " << string->encoding(state)->name()->to_string(state);
      Exception::argument_error(state, msg.str().c_str());
    }

    max = string->byte_size();
    native_int pos = start->to_native();

    str = (UChar*)string->byte_address();
    fin = str + max;

    // Bounds check.
    if(pos > max) return nil<MatchData>();
    str += pos;

    lock_.lock();

    regex_t* data = maybe_recompile(state, string);
    if(!data) return 0;

    OnigPosition begin_reg[10] = { ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS,
                         ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS,
                         ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS,
                         ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS,
                         ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS };
    OnigPosition end_reg[10] =  { ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS,
                         ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS,
                         ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS,
                         ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS,
                         ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS };

    OnigRegion region = { 10, 0, begin_reg, end_reg, 0, 0 };

    int* back_match = data->int_map_backward;
    beg = onig_match(data, str, fin, str, &region,
                     ONIG_OPTION_NONE);

    // Seems like onig must setup int_map_backward lazily, so we have to watch
    // for it to appear here.
    if(data->int_map_backward != back_match) {
      native_int size = sizeof(int) * ONIG_CHAR_TABLE_SIZE;
      ByteArray* ba = ByteArray::create_dirty(state, size);
      memcpy(ba->raw_bytes(), data->int_map_backward, size);

      // Dispose of the old one.
      free(data->int_map_backward);

      data->int_map_backward = reinterpret_cast<int*>(ba->raw_bytes());

      write_barrier(state, ba);
    }

    lock_.unlock();

    MatchData* md = nil<MatchData>();
    if(beg != ONIG_MISMATCH) {
      md = get_match_data(state, &region, string, this, pos);
    }

    onig_region_free(&region, 0);
    return md;
  }