예제 #1
0
  Object* Regexp::match_region(STATE, String* string, Fixnum* start,
                               Fixnum* end, Object* forward)
  {
    int beg, max;
    const UChar *str;
    OnigRegion *region;
    Object* md;

    if(unlikely(!onig_data)) {
      Exception::argument_error(state, "Not properly initialized Regexp");
    }

    maybe_recompile(state);

    region = onig_region_new();

    max = string->size();
    str = (UChar*)string->c_str(state);

    int* back_match = onig_data->int_map_backward;

    if(!RTEST(forward)) {
      beg = onig_search(onig_data, str, str + max,
                        str + end->to_native(),
                        str + start->to_native(),
                        region, ONIG_OPTION_NONE);
    } else {
      beg = onig_search(onig_data, str, str + max,
                        str + start->to_native(),
                        str + end->to_native(),
                        region, ONIG_OPTION_NONE);
    }

    // Seems like onig must setup int_map_backward lazily, so we have to watch
    // for it to appear here.
    if(onig_data->int_map_backward != back_match) {
      native_int size = sizeof(int) * ONIG_CHAR_TABLE_SIZE;
      ByteArray* ba = ByteArray::create(state, size);
      memcpy(ba->raw_bytes(), onig_data->int_map_backward, size);

      // Dispose of the old one.
      free(onig_data->int_map_backward);

      onig_data->int_map_backward = reinterpret_cast<int*>(ba->raw_bytes());

      write_barrier(state, ba);
    }


    if(beg == ONIG_MISMATCH) {
      onig_region_free(region, 1);
      return Qnil;
    }

    md = get_match_data(state, region, string, this, 0);
    onig_region_free(region, 1);
    return md;
  }
예제 #2
0
static int findmatch_exec (TUserdata *ud, TArgExec *argE) {
  const char *end = argE->text + argE->textlen;
  onig_region_clear(ud->region);
  return onig_search (ud->reg, (CUC)argE->text, (CUC)end,
                      (CUC)argE->text + argE->startoffset, (CUC)end,
                      ud->region, argE->eflags);
}
예제 #3
0
    /// returns the index of the given QChar array in the given text
    /// @param charPtr the pointer to the string data
    /// @param offset the offset to start searching
    /// @param length the length of the string data
    /// @param reverse should the search be reversed?
    int indexIn( const QChar* charPtr, int offset, int length, bool reverse )
    {
        // invalid reg-exp don't use it!
        if( !valid_ ) { return -2; }

        // delete old regenion an make a new one
        deleteRegion();
        region_ = onig_region_new();

        lineRef_ = charPtr;
        OnigUChar* stringStart  = (OnigUChar*)charPtr;
        OnigUChar* stringEnd    = (OnigUChar*)(charPtr+length);
        OnigUChar* stringOffset = (OnigUChar*)(charPtr+offset);
        OnigUChar* stringRange  = (OnigUChar*)stringEnd;
        if( reverse ) {
            stringOffset = stringEnd; //==stringStart ? stringEnd : stringEnd-1;
            stringRange  = (OnigUChar*)(charPtr+offset);
        }

        clearError();

        int result = onig_search(reg_, stringStart, stringEnd, stringOffset, stringRange, region_, ONIG_OPTION_NONE);
        if ( result >= 0) {
            Q_ASSERT(result%2==0);
            return result>>1;

        } else if (result == ONIG_MISMATCH) {
예제 #4
0
파일: read.c 프로젝트: tanaton/read2ch
static unstr_t* create_date_query(const nich_t *nich, size_t res_no, const unstr_t *data)
{
	OnigRegion *region = onig_region_new();
	unstr_t *strtmp = 0;
	unstr_t *query = 0;
	UChar *start;
	UChar *end;
	UChar *range;
	int ret = -1;
	int i = 0;

	end = (UChar *)(data->data + unstr_strlen(data));
	start = (UChar *)(data->data);
	range = end;

	ret = onig_search(nich->reg, (UChar *)data->data, end, start, range, region, ONIG_OPTION_NONE);
	if(ret >= 0){
		strtmp = unstr_init_memory(8);
		query = unstr_sprintf(NULL, "(%d,%$,%d,'", nich->board_no, nich->thread, res_no);
		for(i = 1; i < region->num_regs; i++){
			unstr_substr_char(strtmp, data->data + region->beg[i], region->end[i] - region->beg[i]);
			unstr_strcat(query, strtmp);
		}
		unstr_strcat_char(query, "')");
	}
	onig_region_clear(region);
	onig_region_free(region, 1);
	unstr_free(strtmp);
	return query;
}
예제 #5
0
static int do_onig_search_internal (Onig_Type *o, OnigOptionType option, UChar *str, UChar *str_end, int start_pos, int end_pos)
{
   UChar *start, *range;
   int status;

   onig_region_clear (o->region);

   start = str + start_pos;
   range = str + end_pos;
   /* fwd search: (start <= search string < range)
    * bkw search: (range <= search string <= start)
    */
   if ((start < str) || (start > str_end)
       || (range < str) || (range > str_end))
     {
	SLang_verror (SL_InvalidParm_Error, "Invalid string offsets");
	return -1;
     }
   status = onig_search (o->re, str, str_end, start, range, o->region, option);

   if (status >= 0)
     return status;

   if (status == ONIG_MISMATCH)
     return -1;

   throw_onig_error (status, NULL);
   return -2;
}
예제 #6
0
파일: reggnu.c 프로젝트: 0077cc/textmate
extern int
re_search(regex_t* bufp, const char* string, int size, int startpos, int range,
	  struct re_registers* regs)
{
  return onig_search(bufp, (UChar* )string, (UChar* )(string + size),
		     (UChar* )(string + startpos),
		     (UChar* )(string + startpos + range),
		     regs, ONIG_OPTION_NONE);
}
예제 #7
0
extern int
regexec(regex_t* reg, const char* str, size_t nmatch,
	regmatch_t pmatch[], int posix_options)
{
  int r, i, len;
  UChar* end;
  regmatch_t* pm;
  OnigOptionType options;

  options = ONIG_OPTION_POSIX_REGION;
  if ((posix_options & REG_NOTBOL) != 0) options |= ONIG_OPTION_NOTBOL;
  if ((posix_options & REG_NOTEOL) != 0) options |= ONIG_OPTION_NOTEOL;

  if (nmatch == 0 || (reg->comp_options & REG_NOSUB) != 0) {
    pm = (regmatch_t* )NULL;
    nmatch = 0;
  }
  else if ((int )nmatch < ONIG_C(reg)->num_mem + 1) {
    pm = (regmatch_t* )xmalloc(sizeof(regmatch_t)
                               * (ONIG_C(reg)->num_mem + 1));
    if (pm == NULL)
      return REG_ESPACE;
  }
  else {
    pm = pmatch;
  }

  ENC_STRING_LEN(ONIG_C(reg)->enc, str, len);
  end = (UChar* )(str + len);
  r = onig_search(ONIG_C(reg), (UChar* )str, end, (UChar* )str, end,
		  (OnigRegion* )pm, options);

  if (r >= 0) {
    r = 0; /* Match */
    if (pm != pmatch && pm != NULL) {
      xmemcpy(pmatch, pm, sizeof(regmatch_t) * nmatch);
    }
  }
  else if (r == ONIG_MISMATCH) {
    r = REG_NOMATCH;
    for (i = 0; i < (int )nmatch; i++)
      pmatch[i].rm_so = pmatch[i].rm_eo = ONIG_REGION_NOTPOS;
  }
  else {
    r = onig2posix_error_code(r);
  }

  if (pm != pmatch && pm != NULL)
    xfree(pm);

#if 0
  if (reg->re_nsub > nmatch - 1)
    reg->re_nsub = (nmatch <= 1 ? 0 : nmatch - 1);
#endif

  return r;
}
예제 #8
0
cell AMX_NATIVE_CALL pawn_regex_search(AMX* amx, cell* params)
{
	regex_t* RegExpr;
	const char *rexp = NULL, *string = NULL;
	cell* addr[2] = {NULL, NULL};
	amx_GetAddr(amx, params[3], &addr[0]);
    amx_GetAddr(amx, params[4], &addr[1]);
	amx_StrParam(amx, params[1], string);
	amx_StrParam(amx, params[2], rexp);
	if(string && rexp)
	{
		int r=NULL;
		UChar* pattern = (UChar* )rexp;
		OnigErrorInfo einfo;
		r = onig_new(&RegExpr, pattern, pattern + strlen((char* )pattern), ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, ONIG_SYNTAX_PERL, &einfo);
		//logprintf("[REGEX DEBUG]: rexp %s",pattern);
		if(r != ONIG_NORMAL)
		{
			UChar s[ONIG_MAX_ERROR_MESSAGE_LEN];
			onig_error_code_to_str(s, r, &einfo);
			logprintf("[REGEX ERROR]: %s", s);
			onig_free(RegExpr);
			return -1;
		}
		UChar* str = (UChar* )string;
		OnigRegion *region;
		region = onig_region_new();
		r = onig_search(RegExpr, str, str+strlen((char*) str), str, str+strlen((char*) str), region, ONIG_OPTION_NONE);
		if(r>=0)
		{
			*addr[0]=r;
			*addr[1]=region->end[region->num_regs-1]-1;
		}
		else if(r==ONIG_MISMATCH)
		{
			*addr[0]=-1;
			*addr[1]=-1;
		}
		else
		{
			UChar s[ONIG_MAX_ERROR_MESSAGE_LEN];
			onig_error_code_to_str(s, r);
			logprintf("[REGEX ERROR]: %s\n", s);
			onig_region_free(region, 1);
			onig_free(RegExpr);
			return -1;
		}
		//logprintf("[REGEX DEBUG]: string %s",str);
		onig_region_free(region, 1);
		onig_free(RegExpr);
		//logprintf("[REGEX DEBUG]: return %d",r);
		return 1;
	}
    return -1337;
}
예제 #9
0
static int 
regexp_main(char *pat0, char *str0) 
{ 
    int r; 
    unsigned char *start, *range, *end; 
    regex_t* reg; 
    OnigErrorInfo einfo; 
    OnigRegion *region; 
     
    static UChar* pattern; 
    static UChar* str; 
    int i; 
     
    pattern = (UChar* )pat0; 
    str = (UChar* )str0; 
     
    strcpy(data_str, str); 
    for (i = 0; i  NAMES; i ++) { 
        data_rslt[i].n[0] = 0; 
    } 
     
    r = onig_new(&reg, pattern, pattern + strlen((char* )pattern), 
                 ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, ONIG_SYNTAX_DEFAULT, &einfo); 
    if (r != ONIG_NORMAL) { 
        char s[ONIG_MAX_ERROR_MESSAGE_LEN]; 
        onig_error_code_to_str(s, r, &einfo); 
        return -1; 
    } 
     
    region = onig_region_new(); 
     
    end   = str + strlen((char* )str); 
    start = str; 
    range = end; 
    r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); 
    if (r = 0) { 
        r = onig_foreach_name(reg, name_callback, (void* )region); 
    } 
    else if (r == ONIG_MISMATCH) { 
        return 1; 
    } 
    else {                                  /* error */ 
        char s[ONIG_MAX_ERROR_MESSAGE_LEN]; 
        onig_error_code_to_str(s, r); 
        return -1; 
    } 
     
    onig_region_free(region, 1); 
    /* 1:free self, 0:free contents only */ 
    onig_free(reg); 
    onig_end(); 
    return 0; 
} 
예제 #10
0
파일: listcap.c 프로젝트: Jin-chan/Onigmo
extern int ex(unsigned char* str, unsigned char* pattern,
              OnigSyntaxType* syntax)
{
  int r;
  unsigned char *start, *range, *end;
  regex_t* reg;
  OnigErrorInfo einfo;
  OnigRegion *region;

  r = onig_new(&reg, pattern, pattern + strlen((char* )pattern),
	       ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, syntax, &einfo);
  if (r != ONIG_NORMAL) {
    char s[ONIG_MAX_ERROR_MESSAGE_LEN];
    onig_error_code_to_str(s, r, &einfo);
    fprintf(stderr, "ERROR: %s\n", s);
    return -1;
  }

  fprintf(stderr, "number of captures: %d\n", onig_number_of_captures(reg));
  fprintf(stderr, "number of capture histories: %d\n",
          onig_number_of_capture_histories(reg));

  region = onig_region_new();

  end   = str + strlen((char* )str);
  start = str;
  range = end;
  r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
  if (r >= 0) {
    int i;

    fprintf(stderr, "match at %d\n", r);
    for (i = 0; i < region->num_regs; i++) {
      fprintf(stderr, "%d: (%ld-%ld)\n", i, region->beg[i], region->end[i]);
    }
    fprintf(stderr, "\n");

    r = onig_capture_tree_traverse(region, ONIG_TRAVERSE_CALLBACK_AT_FIRST,
                                   node_callback, (void* )0);
  }
  else if (r == ONIG_MISMATCH) {
    fprintf(stderr, "search fail\n");
  }
  else { /* error */
    char s[ONIG_MAX_ERROR_MESSAGE_LEN];
    onig_error_code_to_str(s, r);
    return -1;
  }

  onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
  onig_free(reg);
  return 0;
}
예제 #11
0
파일: crnl.c 프로젝트: Jin-chan/Onigmo
static int
x0(int no, char* pattern_arg, char* str_arg,
   int start_offset, int expected_from, int expected_to, int backward)
{
  int r;
  unsigned char *start, *range, *end;
  regex_t* reg;
  OnigErrorInfo einfo;
  OnigRegion *region;
  UChar *pattern, *str;

  pattern = (UChar* )pattern_arg;
  str     = (UChar* )str_arg;

  r = onig_new(&reg, pattern, pattern + strlen((char* )pattern),
	ONIG_OPTION_NEWLINE_CRLF, ONIG_ENCODING_UTF8, ONIG_SYNTAX_DEFAULT, &einfo);
  if (r != ONIG_NORMAL) {
    char s[ONIG_MAX_ERROR_MESSAGE_LEN];
    onig_error_code_to_str(s, r, &einfo);
    fprintf(stderr, "ERROR: %s\n", s);
    return -1;
  }

  region = onig_region_new();

  end   = str + strlen((char* )str);
  if (backward) {
    start = end + start_offset;
    range = str;
  }
  else {
    start = str + start_offset;
    range = end;
  }
  r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
  if (r >= 0) {
    result(no, region->beg[0], region->end[0], expected_from, expected_to);
  }
  else if (r == ONIG_MISMATCH) {
    result(no, r, -1, expected_from, expected_to);
  }
  else { /* error */
    char s[ONIG_MAX_ERROR_MESSAGE_LEN];
    onig_error_code_to_str(s, r);
    fprintf(stderr, "ERROR: %s\n", s);
    return -1;
  }

  onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
  onig_free(reg);
  return 0;
}
예제 #12
0
파일: simple.c 프로젝트: Jin-chan/Onigmo
extern int main(int argc, char* argv[])
{
  int r;
  unsigned char *start, *range, *end;
  regex_t* reg;
  OnigErrorInfo einfo;
  OnigRegion *region;

  static UChar* pattern = (UChar* )"a(.*)b|[e-f]+";
  static UChar* str     = (UChar* )"zzzzaffffffffb";

  r = onig_new(&reg, pattern, pattern + strlen((char* )pattern),
	ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, ONIG_SYNTAX_DEFAULT, &einfo);
  if (r != ONIG_NORMAL) {
    char s[ONIG_MAX_ERROR_MESSAGE_LEN];
    onig_error_code_to_str(s, r, &einfo);
    fprintf(stderr, "ERROR: %s\n", s);
    return -1;
  }

  region = onig_region_new();

  end   = str + strlen((char* )str);
  start = str;
  range = end;
  r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
  if (r >= 0) {
    int i;

    fprintf(stderr, "match at %d\n", r);
    for (i = 0; i < region->num_regs; i++) {
      fprintf(stderr, "%d: (%ld-%ld)\n", i, region->beg[i], region->end[i]);
    }
    r = 0;
  }
  else if (r == ONIG_MISMATCH) {
    fprintf(stderr, "search fail\n");
    r = -1;
  }
  else { /* error */
    char s[ONIG_MAX_ERROR_MESSAGE_LEN];
    onig_error_code_to_str(s, r);
    fprintf(stderr, "ERROR: %s\n", s);
    return -1;
  }

  onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
  onig_free(reg);
  onig_end();
  return r;
}
예제 #13
0
파일: names.c 프로젝트: arki91/oniguruma
extern int main(int argc, char* argv[])
{
  int r;
  unsigned char *start, *range, *end;
  regex_t* reg;
  OnigErrorInfo einfo;
  OnigRegion *region;

  static UChar* pattern = (UChar* )"(?<foo>a*)(?<bar>b*)(?<foo>c*)";
  static UChar* str = (UChar* )"aaabbbbcc";

  OnigEncoding use_encs[] = { ONIG_ENCODING_ASCII };
  onig_initialize(use_encs, sizeof(use_encs)/sizeof(use_encs[0]));

  r = onig_new(&reg, pattern, pattern + strlen((char* )pattern),
	ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, ONIG_SYNTAX_DEFAULT, &einfo);
  if (r != ONIG_NORMAL) {
    char s[ONIG_MAX_ERROR_MESSAGE_LEN];
    onig_error_code_to_str(s, r, &einfo);
    fprintf(stderr, "ERROR: %s\n", s);
    return -1;
  }

  fprintf(stderr, "number of names: %d\n", onig_number_of_names(reg));

  region = onig_region_new();

  end   = str + strlen((char* )str);
  start = str;
  range = end;
  r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
  if (r >= 0) {
    fprintf(stderr, "match at %d\n\n", r);
    r = onig_foreach_name(reg, name_callback, (void* )region);
  }
  else if (r == ONIG_MISMATCH) {
    fprintf(stderr, "search fail\n");
  }
  else { /* error */
    char s[ONIG_MAX_ERROR_MESSAGE_LEN];
    onig_error_code_to_str(s, r);
    return -1;
  }

  onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
  onig_free(reg);
  onig_end();
  return 0;
}
예제 #14
0
파일: operator.c 프로젝트: groonga/groonga
static grn_bool
regexp_is_match(grn_ctx *ctx, OnigRegex regex,
                const char *target, unsigned int target_len)
{
  OnigPosition position;

  position = onig_search(regex,
                         target,
                         target + target_len,
                         target,
                         target + target_len,
                         NULL,
                         ONIG_OPTION_NONE);
  return position != ONIG_MISMATCH;
}
예제 #15
0
  Object* Regexp::search_from(STATE, String* string, Fixnum* start) {
    int beg, max;
    const UChar *str;
    const UChar *fin;
    OnigRegion *region;
    Object* md = Qnil;

    maybe_recompile(state);
    region = onig_region_new();

    max = string->size();
    native_int pos = start->to_native();

    str = (UChar*)string->c_str();
    fin = str + max;

    str += pos;

    int* back_match = onig_data->int_map_backward;

    beg = onig_search(onig_data, str, fin, str, fin,
                      region, ONIG_OPTION_NONE);

    // Seems like onig must setup int_map_backward lazily, so we have to watch
    // for it to appear here.
    if(onig_data->int_map_backward != back_match) {
      native_int size = sizeof(int) * ONIG_CHAR_TABLE_SIZE;
      ByteArray* ba = ByteArray::create(state, size);
      memcpy(ba->raw_bytes(), onig_data->int_map_backward, size);

      // Dispose of the old one.
      free(onig_data->int_map_backward);

      onig_data->int_map_backward = reinterpret_cast<int*>(ba->raw_bytes());

      write_barrier(state, ba);
    }

    if(beg != ONIG_MISMATCH) {
      md = get_match_data(state, region, string, this, pos);
    }

    onig_region_free(region, 1);
    return md;
  }
예제 #16
0
파일: onig.c 프로젝트: czrocklee/regex_test
void onig_find_all(char* pattern, char* subject, int subject_len, int repeat)
{
	regex_t* reg;
	OnigRegion *region;
	clock_t best_time = 0, time;
	unsigned char *ptr;
	int res, len, found;

	res = onig_new(&reg, (unsigned char *)pattern, (unsigned char *)pattern + strlen((char* )pattern),
		ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, ONIG_SYNTAX_DEFAULT, NULL);
	if (res != ONIG_NORMAL) {
		printf("Onig compilation failed\n");
		return;
	}
	region = onig_region_new();
	if (!region) {
		printf("Cannot allocate region\n");
		return;
	}

	do {
		found = 0;
		ptr = (unsigned char *)subject;
		len = subject_len;

		time = clock();
		while (1) {
			res = onig_search(reg, ptr, ptr + len, ptr, ptr + len, region, ONIG_OPTION_NONE);
			if (res < 0)
				break;
			// printf("match: %d %d\n", (ptr - (unsigned char *)subject) + region->beg[0], (ptr - (unsigned char *)subject) + region->end[0]);
			ptr += region->end[0];
			len -= region->end[0];
			found++;
		}
		time = clock() - time;
		if (!best_time || time < best_time)
			best_time = time;
	} while (--repeat > 0);
	printResult("onig", best_time * 1000 / CLOCKS_PER_SEC, found);

	onig_region_free(region, 1);
	onig_free(reg);
}
예제 #17
0
파일: chelper.c 프로젝트: admpub/gonigmo
int SearchOnigRegex(void *str, int str_length, int offset, int option,
                  OnigRegex regex, OnigRegion *region, OnigErrorInfo *error_info, char *error_buffer, int *captures, int *numCaptures) {
    int ret = ONIG_MISMATCH;
    int error_msg_len = 0;
#ifdef BENCHMARK_CHELP
    struct timeval tim1, tim2;
    long t;
#endif

    OnigUChar *str_start = (OnigUChar *) str;
    OnigUChar *str_end = (OnigUChar *) (str_start + str_length);
    OnigUChar *search_start = (OnigUChar *)(str_start + offset);
    OnigUChar *search_end = str_end;

#ifdef BENCHMARK_CHELP
    gettimeofday(&tim1, NULL);
#endif

    ret = onig_search(regex, str_start, str_end, search_start, search_end, region, option);
    if (ret < 0 && error_buffer != NULL) {
        error_msg_len = onig_error_code_to_str((unsigned char*)(error_buffer), ret, error_info);
        if (error_msg_len >= ONIG_MAX_ERROR_MESSAGE_LEN) {
            error_msg_len = ONIG_MAX_ERROR_MESSAGE_LEN - 1;
        }
        error_buffer[error_msg_len] = '\0';
    }
    else if (captures != NULL) {
        int i;
		int count = 0;
        for (i = 0; i < region->num_regs; i++) {
			captures[2*count] = region->beg[i];
			captures[2*count+1] = region->end[i];
			count ++;
        }
		*numCaptures = count;
    }

#ifdef BENCHMARK_CHELP
    gettimeofday(&tim2, NULL);
    t = (tim2.tv_sec - tim1.tv_sec) * 1000000 + tim2.tv_usec - tim1.tv_usec;
    printf("%ld microseconds elapsed\n", t);
#endif
    return ret;
}
예제 #18
0
cell AMX_NATIVE_CALL pawn_regex_exsearch(AMX* amx, cell* params)
{
	const char *string = NULL;
	cell* addr[2] = {NULL, NULL};
	amx_GetAddr(amx, params[3], &addr[0]);
    amx_GetAddr(amx, params[4], &addr[1]);
	amx_StrParam(amx, params[1], string);
	if(string)
	{
		int id=(int)params[2];
		if(id>=0 && id<TotalExpressions)
		{
			int r=NULL;
			UChar* str = (UChar* )string;
			onig_region_clear(rexpression[id].zreg);
			r = onig_search(rexpression[id].RegExpr, str, str+strlen(string), str, str+strlen(string), rexpression[id].zreg, ONIG_OPTION_NONE);
			if(r>=0)
			{
				*addr[0]=r;
				*addr[1]=rexpression[id].zreg->end[rexpression[id].zreg->num_regs-1]-1;
			}
			else if(r==ONIG_MISMATCH)
			{
				*addr[0]=-1;
				*addr[1]=-1;
			}
			else
			{
				UChar s[ONIG_MAX_ERROR_MESSAGE_LEN];
				onig_error_code_to_str(s, r);
				logprintf("[REGEX ERROR]: %s\n", s);
				return -1;
			}
			return 1;
		}
		logprintf("[REGEX ERROR]: Call regex_exsearch with undefined parameter at index %d", id);
		return -1;
	}
    return -1337;
}
static TextMateBestMatch FindBestMatch( const char* lineStart,
                                        const char* searchStart,
                                        const char* lineEnd,
                                        bool isFirstSearch,
                                        const std::vector<TextMateRegex*>& regexes )
{
	TextMateBestMatch bestMatch;

	for ( int32_t i = 0, size = regexes.size(); i < size; ++i )
	{
		TextMateRegex* regex = regexes[i];

		if ( isFirstSearch || ( regex->matchStart >= 0 && regex->matchStart < searchStart - lineStart ) )
			regex->matchStart = onig_search( regex->regex.get(),
			                                 reinterpret_cast<const OnigUChar*>( lineStart ),
			                                 reinterpret_cast<const OnigUChar*>( lineEnd ),
			                                 reinterpret_cast<const OnigUChar*>( searchStart ),
			                                 reinterpret_cast<const OnigUChar*>( lineEnd ),
			                                 regex->region.get(),
			                                 ONIG_OPTION_NONE );

		if ( regex->matchStart < 0 )
			continue;

		uint32_t matchLength = regex->region->end[0] - regex->matchStart;

		if ( !bestMatch.IsBetterThan( regex->matchStart, matchLength ) )
		{
			bestMatch.index  = i;
			bestMatch.start  = regex->matchStart;
			bestMatch.length = matchLength;
		}
	}

	return bestMatch;
}
예제 #20
0
cell AMX_NATIVE_CALL pawn_regex_exreplace(AMX* amx, cell* params)
{
	const char *string = NULL, *replace = NULL;
	cell* addr = NULL;
	amx_GetAddr(amx, params[1], &addr);
	amx_StrParam(amx, params[1], string);
	amx_StrParam(amx, params[3], replace);
	if(string)
	{
		int id=(int)params[2];
		if(id>=0 && id<TotalExpressions)
		{
			int r=NULL;
			UChar* str = (UChar* )string;
			onig_region_clear(rexpression[id].zreg);
			r = onig_search(rexpression[id].RegExpr, str, str+strlen(string), str, str+strlen(string), rexpression[id].zreg, ONIG_OPTION_NONE);
			if(r>=0)
			{
				std::string asd = std::string(string);
				asd.replace(asd.begin()+r, asd.begin()+rexpression[id].zreg->end[rexpression[id].zreg->num_regs-1], replace);
				amx_SetString(addr, asd.c_str(), 0, 0, params[1]);
			}
			else if(r<ONIG_MISMATCH)
			{
				UChar s[ONIG_MAX_ERROR_MESSAGE_LEN];
				onig_error_code_to_str(s, r);
				logprintf("[REGEX ERROR]: %s\n", s);
				return -1;
			}
			return r;
		}
		logprintf("[REGEX ERROR]: Call regex_exreplace with undefined parameter at index %d", id);
		return -1;
	}
    return -1337;
}
예제 #21
0
BOOL cmd_condition_re(sObject* nextin, sObject* nextout, sRunInfo* runinfo)
{
    enum eKanjiCode code = gKanjiCode;
    if(sRunInfo_option(runinfo, "-byte")) {
        code = kByte;
    }
    else if(sRunInfo_option(runinfo, "-utf8")) {
        code = kUtf8;
    }
    else if(sRunInfo_option(runinfo, "-sjis")) {
        code = kSjis;
    }
    else if(sRunInfo_option(runinfo, "-eucjp")) {
        code = kEucjp;
    }

    BOOL verbose = sRunInfo_option(runinfo, "-verbose");
    BOOL offsets = sRunInfo_option(runinfo, "-offsets");

    if(runinfo->mFilter && runinfo->mArgsNumRuntime == 2) {
        clear_matching_info_variable();

        //BOOL preserve = sRunInfo_option(runinfo, "-preserve");

        runinfo->mRCode = RCODE_NFUN_FALSE;
        char* target = SFD(nextin).mBuf;
        char* regex = runinfo->mArgsRuntime[1];

        regex_t* reg;
        int r = get_onig_regex(&reg, runinfo, regex);

        if(r == ONIG_NORMAL) {
            //sObject* preserved_data = STRING_NEW_STACK();

            OnigRegion* region = onig_region_new();
            int r2 = onig_search(reg, target
               , target + strlen(target)
               , target, target + strlen(target)
               , region, ONIG_OPTION_NONE);

            if(r2 >= 0) {
                if(region->beg[0] > 0) {
                    uobject_put(gRootObject, "PREMATCH", STRING_NEW_GC3(target, region->beg[0], FALSE));
                }

                const int size = region->end[0] - region->beg[0];

                uobject_put(gRootObject, "MATCH", STRING_NEW_GC3(target + region->beg[0], size, FALSE));
                uobject_put(gRootObject, "0", STRING_NEW_GC3(target + region->beg[0], size, FALSE));

                const int n = strlen(target)-region->end[0];
                if(n > 0) {
                    uobject_put(gRootObject, "POSTMATCH", STRING_NEW_GC3(target + region->end[0], n, FALSE));
                }

                int i;
                for (i=1; i<region->num_regs; i++) {
                    const int size = region->end[i] - region->beg[i];

                    char name[16];
                    snprintf(name, 16, "%d", i);

                    uobject_put(gRootObject, name, STRING_NEW_GC3(target + region->beg[i], size, FALSE));
                }

                if(region->num_regs > 0) {
                    const int n = region->num_regs -1;

                    const int size = region->end[n] - region->beg[n];

                    uobject_put(gRootObject, "LAST_MATCH", STRING_NEW_GC3(target + region->beg[n], size, FALSE));
                }

                char buf[128];
                snprintf(buf, 128, "%d", region->num_regs);
                uobject_put(gRootObject, "MATCH_NUMBER", STRING_NEW_GC(buf, FALSE));

                if(verbose) {
                    int point = str_pointer2kanjipos(code, target, target + r2);

                    char buf[1024];
                    int size = snprintf(buf, 1024, "%d\n", point);
                    if(!fd_write(nextout, buf, size)) {
                        err_msg("signal interrupt", runinfo->mSName, runinfo->mSLine);
                        runinfo->mRCode = RCODE_SIGNAL_INTERRUPT;
                        onig_region_free(region, 1);
                        onig_free(reg);
                        return FALSE;
                    }
                }

                if(offsets) {
                    int i;
                    for (i=0; i<region->num_regs; i++) {
                        int point = str_pointer2kanjipos(code, target, target + region->beg[i]);

                        char buf[1024];
                        int size = snprintf(buf, 1024, "%d\n", point);
                        if(!fd_write(nextout, buf, size)) {
                            err_msg("signal interrupt", runinfo->mSName, runinfo->mSLine);
                            runinfo->mRCode = RCODE_SIGNAL_INTERRUPT;
                            onig_region_free(region, 1);
                            onig_free(reg);
                            return FALSE;
                        }

                        point = str_pointer2kanjipos(code, target, target + region->end[i]);

                        size = snprintf(buf, 1024, "%d\n", point);
                        if(!fd_write(nextout, buf, size)) {
                            err_msg("signal interrupt", runinfo->mSName, runinfo->mSLine);
                            runinfo->mRCode = RCODE_SIGNAL_INTERRUPT;
                            onig_region_free(region, 1);
                            onig_free(reg);
                            return FALSE;
                        }
                    }
                }

                runinfo->mRCode = 0;
            }

            onig_region_free(region, 1);
            onig_free(reg);
        }
        else {
            onig_free(reg);
            err_msg("=~: invalid regex", runinfo->mSName, runinfo->mSLine);
            return FALSE;
        }
    }

    return TRUE;
}
예제 #22
0
파일: sql.c 프로젝트: TeXShop/TeXShop
extern int main(int argc, char* argv[])
{
  static OnigSyntaxType SQLSyntax;

  int r;
  unsigned char *start, *range, *end;
  regex_t* reg;
  OnigErrorInfo einfo;
  OnigRegion *region;

  static UChar* pattern = (UChar* )"\\_%\\\\__zz";
  static UChar* str = (UChar* )"a_abcabcabc\\ppzz";

  onig_set_syntax_op      (&SQLSyntax, ONIG_SYN_OP_VARIABLE_META_CHARACTERS);
  onig_set_syntax_op2     (&SQLSyntax, 0);
  onig_set_syntax_behavior(&SQLSyntax, 0);
  onig_set_syntax_options (&SQLSyntax, ONIG_OPTION_MULTILINE);
  onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ESCAPE, (OnigCodePoint )'\\');
  onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ANYCHAR, (OnigCodePoint )'_');
  onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ANYTIME,
		     ONIG_INEFFECTIVE_META_CHAR);
  onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ZERO_OR_ONE_TIME,
		     ONIG_INEFFECTIVE_META_CHAR);
  onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ONE_OR_MORE_TIME,
		     ONIG_INEFFECTIVE_META_CHAR);
  onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ANYCHAR_ANYTIME,
		     (OnigCodePoint )'%');

  r = onig_new(&reg, pattern, pattern + strlen((char* )pattern),
	       ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, &SQLSyntax, &einfo);
  if (r != ONIG_NORMAL) {
    char s[ONIG_MAX_ERROR_MESSAGE_LEN];
    onig_error_code_to_str(s, r, &einfo);
    fprintf(stderr, "ERROR: %s\n", s);
    return -1;
  }

  region = onig_region_new();

  end   = str + strlen((char* )str);
  start = str;
  range = end;
  r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
  if (r >= 0) {
    int i;

    fprintf(stderr, "match at %d\n", r);
    for (i = 0; i < region->num_regs; i++) {
      fprintf(stderr, "%d: (%ld-%ld)\n", i, region->beg[i], region->end[i]);
    }
  }
  else if (r == ONIG_MISMATCH) {
    fprintf(stderr, "search fail\n");
  }
  else { /* error */
    char s[ONIG_MAX_ERROR_MESSAGE_LEN];
    onig_error_code_to_str(s, r);
    fprintf(stderr, "ERROR: %s\n", s);
    return -1;
  }

  onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
  onig_free(reg);
  onig_end();
  return 0;
}
예제 #23
0
파일: regexp.cpp 프로젝트: Gimi/rubinius
  Object* Regexp::match_region(STATE, String* string, Fixnum* start,
                               Fixnum* end, Object* forward)
  {
    int beg, max;
    const UChar *str;
    Object* md;

    if(unlikely(!onig_data)) {
      Exception::argument_error(state, "Not properly initialized Regexp");
    }

    // thread::Mutex::LockGuard lg(state->shared().onig_lock());

    max = string->size();
    str = (UChar*)string->byte_address();

    native_int i_start = start->to_native();
    native_int i_end = end->to_native();

    // Bounds check.
    if(i_start < 0 || i_end < 0 || i_start > max || i_end > max) {
      return Qnil;
    }

    lock_.lock();

    maybe_recompile(state);

    int begin_reg[10] = { ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS,
                         ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS,
                         ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS,
                         ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS,
                         ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS };
    int end_reg[10] =  { ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS,
                         ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS,
                         ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS,
                         ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS,
                         ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS };

    OnigRegion region = { 10, 0, begin_reg, end_reg, 0, 0 };

    int* back_match = onig_data->int_map_backward;

    if(!RTEST(forward)) {
      beg = onig_search(onig_data, str, str + max,
                        str + i_end,
                        str + i_start,
                        &region, ONIG_OPTION_NONE);
    } else {
      beg = onig_search(onig_data, str, str + max,
                        str + i_start,
                        str + i_end,
                        &region, ONIG_OPTION_NONE);
    }

    // Seems like onig must setup int_map_backward lazily, so we have to watch
    // for it to appear here.
    if(onig_data->int_map_backward != back_match) {
      native_int size = sizeof(int) * ONIG_CHAR_TABLE_SIZE;
      ByteArray* ba = ByteArray::create(state, size);
      memcpy(ba->raw_bytes(), onig_data->int_map_backward, size);

      // Dispose of the old one.
      free(onig_data->int_map_backward);

      onig_data->int_map_backward = reinterpret_cast<int*>(ba->raw_bytes());

      write_barrier(state, ba);
    }

    lock_.unlock();

    if(beg == ONIG_MISMATCH) {
      onig_region_free(&region, 0);
      return Qnil;
    }

    md = get_match_data(state, &region, string, this, 0);
    onig_region_free(&region, 0);
    return md;
  }
예제 #24
0
/* {{{ _php_mb_regex_ereg_exec */
static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
{
	zval *arg_pattern, *array = NULL;
	char *string;
	size_t string_len;
	php_mb_regex_t *re;
	OnigRegion *regs = NULL;
	int i, match_len, beg, end;
	OnigOptionType options;
	char *str;

	if (zend_parse_parameters(ZEND_NUM_ARGS(), "zs|z/", &arg_pattern, &string, &string_len, &array) == FAILURE) {
		RETURN_FALSE;
	}

	if (!php_mb_check_encoding(
	string,
	string_len,
	_php_mb_regex_mbctype2name(MBREX(current_mbctype))
	)) {
		if (array != NULL) {
			zval_dtor(array);
			array_init(array);
		}
		RETURN_FALSE;
	}

	if (array != NULL) {
		zval_dtor(array);
		array_init(array);
	}

	options = MBREX(regex_default_options);
	if (icase) {
		options |= ONIG_OPTION_IGNORECASE;
	}

	/* compile the regular expression from the supplied regex */
	if (Z_TYPE_P(arg_pattern) != IS_STRING) {
		/* we convert numbers to integers and treat them as a string */
		if (Z_TYPE_P(arg_pattern) == IS_DOUBLE) {
			convert_to_long_ex(arg_pattern);	/* get rid of decimal places */
		}
		convert_to_string_ex(arg_pattern);
		/* don't bother doing an extended regex with just a number */
	}

	if (Z_STRLEN_P(arg_pattern) == 0) {
		php_error_docref(NULL, E_WARNING, "empty pattern");
		RETVAL_FALSE;
		goto out;
	}

	re = php_mbregex_compile_pattern(Z_STRVAL_P(arg_pattern), Z_STRLEN_P(arg_pattern), options, MBREX(current_mbctype), MBREX(regex_default_syntax));
	if (re == NULL) {
		RETVAL_FALSE;
		goto out;
	}

	regs = onig_region_new();

	/* actually execute the regular expression */
	if (onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, (OnigUChar *)(string + string_len), regs, 0) < 0) {
		RETVAL_FALSE;
		goto out;
	}

	match_len = 1;
	str = string;
	if (array != NULL) {

		match_len = regs->end[0] - regs->beg[0];
		for (i = 0; i < regs->num_regs; i++) {
			beg = regs->beg[i];
			end = regs->end[i];
			if (beg >= 0 && beg < end && (size_t)end <= string_len) {
				add_index_stringl(array, i, (char *)&str[beg], end - beg);
			} else {
				add_index_bool(array, i, 0);
			}
		}
	}

	if (match_len == 0) {
		match_len = 1;
	}
	RETVAL_LONG(match_len);
out:
	if (regs != NULL) {
		onig_region_free(regs, 1);
	}
}
예제 #25
0
/* {{{ _php_mb_regex_ereg_replace_exec */
static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOptionType options, int is_callable)
{
	zval *arg_pattern_zval;

	char *arg_pattern;
	size_t arg_pattern_len;

	char *replace;
	size_t replace_len;

	zend_fcall_info arg_replace_fci;
	zend_fcall_info_cache arg_replace_fci_cache;

	char *string;
	size_t string_len;

	char *p;
	php_mb_regex_t *re;
	OnigSyntaxType *syntax;
	OnigRegion *regs = NULL;
	smart_str out_buf = {0};
	smart_str eval_buf = {0};
	smart_str *pbuf;
	size_t i;
	int err, eval, n;
	OnigUChar *pos;
	OnigUChar *string_lim;
	char *description = NULL;
	char pat_buf[6];

	const mbfl_encoding *enc;

	{
		const char *current_enc_name;
		current_enc_name = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
		if (current_enc_name == NULL ||
			(enc = mbfl_name2encoding(current_enc_name)) == NULL) {
			php_error_docref(NULL, E_WARNING, "Unknown error");
			RETURN_FALSE;
		}
	}
	eval = 0;
	{
		char *option_str = NULL;
		size_t option_str_len = 0;

		if (!is_callable) {
			if (zend_parse_parameters(ZEND_NUM_ARGS(), "zss|s",
						&arg_pattern_zval,
						&replace, &replace_len,
						&string, &string_len,
						&option_str, &option_str_len) == FAILURE) {
				RETURN_FALSE;
			}
		} else {
			if (zend_parse_parameters(ZEND_NUM_ARGS(), "zfs|s",
						&arg_pattern_zval,
						&arg_replace_fci, &arg_replace_fci_cache,
						&string, &string_len,
						&option_str, &option_str_len) == FAILURE) {
				RETURN_FALSE;
			}
		}

		if (!php_mb_check_encoding(
		string,
		string_len,
		_php_mb_regex_mbctype2name(MBREX(current_mbctype))
		)) {
			RETURN_NULL();
		}

		if (option_str != NULL) {
			_php_mb_regex_init_options(option_str, option_str_len, &options, &syntax, &eval);
		} else {
			options |= MBREX(regex_default_options);
			syntax = MBREX(regex_default_syntax);
		}
	}
	if (eval && !is_callable) {
		php_error_docref(NULL, E_DEPRECATED, "The 'e' option is deprecated, use mb_ereg_replace_callback instead");
	}
	if (Z_TYPE_P(arg_pattern_zval) == IS_STRING) {
		arg_pattern = Z_STRVAL_P(arg_pattern_zval);
		arg_pattern_len = Z_STRLEN_P(arg_pattern_zval);
	} else {
		/* FIXME: this code is not multibyte aware! */
		convert_to_long_ex(arg_pattern_zval);
		pat_buf[0] = (char)Z_LVAL_P(arg_pattern_zval);
		pat_buf[1] = '\0';
		pat_buf[2] = '\0';
		pat_buf[3] = '\0';
		pat_buf[4] = '\0';
		pat_buf[5] = '\0';

		arg_pattern = pat_buf;
		arg_pattern_len = 1;
	}
	/* create regex pattern buffer */
	re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, MBREX(current_mbctype), syntax);
	if (re == NULL) {
		RETURN_FALSE;
	}

	if (eval || is_callable) {
		pbuf = &eval_buf;
		description = zend_make_compiled_string_description("mbregex replace");
	} else {
		pbuf = &out_buf;
		description = NULL;
	}

	if (is_callable) {
		if (eval) {
			php_error_docref(NULL, E_WARNING, "Option 'e' cannot be used with replacement callback");
			RETURN_FALSE;
		}
	}

	/* do the actual work */
	err = 0;
	pos = (OnigUChar *)string;
	string_lim = (OnigUChar*)(string + string_len);
	regs = onig_region_new();
	while (err >= 0) {
		err = onig_search(re, (OnigUChar *)string, (OnigUChar *)string_lim, pos, (OnigUChar *)string_lim, regs, 0);
		if (err <= -2) {
			OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
			onig_error_code_to_str(err_str, err);
			php_error_docref(NULL, E_WARNING, "mbregex search failure in php_mbereg_replace_exec(): %s", err_str);
			break;
		}
		if (err >= 0) {
#if moriyoshi_0
			if (regs->beg[0] == regs->end[0]) {
				php_error_docref(NULL, E_WARNING, "Empty regular expression");
				break;
			}
#endif
			/* copy the part of the string before the match */
			smart_str_appendl(&out_buf, (char *)pos, (size_t)((OnigUChar *)(string + regs->beg[0]) - pos));

			if (!is_callable) {
				/* copy replacement and backrefs */
				i = 0;
				p = replace;
				while (i < replace_len) {
					int fwd = (int) php_mb_mbchar_bytes_ex(p, enc);
					n = -1;
					if ((replace_len - i) >= 2 && fwd == 1 &&
					p[0] == '\\' && p[1] >= '0' && p[1] <= '9') {
						n = p[1] - '0';
					}
					if (n >= 0 && n < regs->num_regs) {
						if (regs->beg[n] >= 0 && regs->beg[n] < regs->end[n] && (size_t)regs->end[n] <= string_len) {
							smart_str_appendl(pbuf, string + regs->beg[n], regs->end[n] - regs->beg[n]);
						}
						p += 2;
						i += 2;
					} else {
						smart_str_appendl(pbuf, p, fwd);
						p += fwd;
						i += fwd;
					}
				}
			}

			if (eval) {
				zval v;
				zend_string *eval_str;
				/* null terminate buffer */
				smart_str_0(&eval_buf);

				if (eval_buf.s) {
					eval_str = eval_buf.s;
				} else {
					eval_str = ZSTR_EMPTY_ALLOC();
				}

				/* do eval */
				if (zend_eval_stringl(ZSTR_VAL(eval_str), ZSTR_LEN(eval_str), &v, description) == FAILURE) {
					efree(description);
					zend_throw_error(NULL, "Failed evaluating code: %s%s", PHP_EOL, ZSTR_VAL(eval_str));
					onig_region_free(regs, 0);
					smart_str_free(&out_buf);
					smart_str_free(&eval_buf);
					RETURN_FALSE;
				}

				/* result of eval */
				convert_to_string(&v);
				smart_str_appendl(&out_buf, Z_STRVAL(v), Z_STRLEN(v));
				/* Clean up */
				smart_str_free(&eval_buf);
				zval_dtor(&v);
			} else if (is_callable) {
				zval args[1];
				zval subpats, retval;
				int i;

				array_init(&subpats);
				for (i = 0; i < regs->num_regs; i++) {
					add_next_index_stringl(&subpats, string + regs->beg[i], regs->end[i] - regs->beg[i]);
				}

				ZVAL_COPY_VALUE(&args[0], &subpats);
				/* null terminate buffer */
				smart_str_0(&eval_buf);

				arg_replace_fci.param_count = 1;
				arg_replace_fci.params = args;
				arg_replace_fci.retval = &retval;
				if (zend_call_function(&arg_replace_fci, &arg_replace_fci_cache) == SUCCESS &&
						!Z_ISUNDEF(retval)) {
					convert_to_string_ex(&retval);
					smart_str_appendl(&out_buf, Z_STRVAL(retval), Z_STRLEN(retval));
					smart_str_free(&eval_buf);
					zval_ptr_dtor(&retval);
				} else {
					if (!EG(exception)) {
						php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
					}
				}
				zval_ptr_dtor(&subpats);
			}

			n = regs->end[0];
			if ((pos - (OnigUChar *)string) < n) {
				pos = (OnigUChar *)string + n;
			} else {
				if (pos < string_lim) {
					smart_str_appendl(&out_buf, (char *)pos, 1);
				}
				pos++;
			}
		} else { /* nomatch */
			/* stick that last bit of string on our output */
			if (string_lim - pos > 0) {
				smart_str_appendl(&out_buf, (char *)pos, string_lim - pos);
			}
		}
		onig_region_free(regs, 0);
	}

	if (description) {
		efree(description);
	}
	if (regs != NULL) {
		onig_region_free(regs, 1);
	}
	smart_str_free(&eval_buf);

	if (err <= -2) {
		smart_str_free(&out_buf);
		RETVAL_FALSE;
	} else if (out_buf.s) {
		smart_str_0(&out_buf);
		RETVAL_STR(out_buf.s);
	} else {
		RETVAL_EMPTY_STRING();
	}
}
예제 #26
0
/* {{{ _php_mb_regex_ereg_search_exec */
static void
_php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
{
	char *arg_pattern = NULL, *arg_options = NULL;
	size_t arg_pattern_len, arg_options_len;
	int err;
	size_t n, i, pos, len, beg, end;
	OnigOptionType option;
	OnigUChar *str;
	OnigSyntaxType *syntax;

	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|ss", &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
		return;
	}

	option = MBREX(regex_default_options);

	if (arg_options) {
		option = 0;
		_php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
	}

	if (arg_pattern) {
		/* create regex pattern buffer */
		if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), MBREX(regex_default_syntax))) == NULL) {
			RETURN_FALSE;
		}
	}

	pos = MBREX(search_pos);
	str = NULL;
	len = 0;
	if (!Z_ISUNDEF(MBREX(search_str)) && Z_TYPE(MBREX(search_str)) == IS_STRING){
		str = (OnigUChar *)Z_STRVAL(MBREX(search_str));
		len = Z_STRLEN(MBREX(search_str));
	}

	if (MBREX(search_re) == NULL) {
		php_error_docref(NULL, E_WARNING, "No regex given");
		RETURN_FALSE;
	}

	if (str == NULL) {
		php_error_docref(NULL, E_WARNING, "No string given");
		RETURN_FALSE;
	}

	if (MBREX(search_regs)) {
		onig_region_free(MBREX(search_regs), 1);
	}
	MBREX(search_regs) = onig_region_new();

	err = onig_search(MBREX(search_re), str, str + len, str + pos, str  + len, MBREX(search_regs), 0);
	if (err == ONIG_MISMATCH) {
		MBREX(search_pos) = len;
		RETVAL_FALSE;
	} else if (err <= -2) {
		OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
		onig_error_code_to_str(err_str, err);
		php_error_docref(NULL, E_WARNING, "mbregex search failure in mbregex_search(): %s", err_str);
		RETVAL_FALSE;
	} else {
		switch (mode) {
		case 1:
			array_init(return_value);
			beg = MBREX(search_regs)->beg[0];
			end = MBREX(search_regs)->end[0];
			add_next_index_long(return_value, beg);
			add_next_index_long(return_value, end - beg);
			break;
		case 2:
			array_init(return_value);
			n = MBREX(search_regs)->num_regs;
			for (i = 0; i < n; i++) {
				beg = MBREX(search_regs)->beg[i];
				end = MBREX(search_regs)->end[i];
				if (beg >= 0 && beg <= end && end <= len) {
					add_index_stringl(return_value, i, (char *)&str[beg], end - beg);
				} else {
					add_index_bool(return_value, i, 0);
				}
			}
			break;
		default:
			RETVAL_TRUE;
			break;
		}
		end = MBREX(search_regs)->end[0];
		if (pos <= end) {
			MBREX(search_pos) = end;
		} else {
			MBREX(search_pos) = pos + 1;
		}
	}

	if (err < 0) {
		onig_region_free(MBREX(search_regs), 1);
		MBREX(search_regs) = (OnigRegion *)NULL;
	}
}
예제 #27
0
static int gsub_exec (TOnig *ud, TArgExec *argE, int st) {
  const char *end = argE->text + argE->textlen;
  onig_region_clear(ud->region);
  return onig_search (ud->reg, (CUC)argE->text, (CUC)end, (CUC)argE->text + st,
    (CUC)end, ud->region, argE->eflags);
}
예제 #28
0
static void xx(char* pattern, char* str, int from, int to, int mem, int not)
{
  int r;

#ifdef POSIX_TEST
  regex_t reg;
  char buf[200];
  regmatch_t pmatch[25];

  r = regcomp(&reg, pattern, REG_EXTENDED | REG_NEWLINE);
  if (r) {
    regerror(r, &reg, buf, sizeof(buf));
    fprintf(err_file, "ERROR: %s\n", buf);
    nerror++;
    return ;
  }

  r = regexec(&reg, str, reg.re_nsub + 1, pmatch, 0);
  if (r != 0 && r != REG_NOMATCH) {
    regerror(r, &reg, buf, sizeof(buf));
    fprintf(err_file, "ERROR: %s\n", buf);
    nerror++;
    return ;
  }

  if (r == REG_NOMATCH) {
    if (not) {
      fprintf(stdout, "OK(N): /%s/ '%s'\n", pattern, str);
      nsucc++;
    }
    else {
      fprintf(stdout, "FAIL: /%s/ '%s'\n", pattern, str);
      nfail++;
    }
  }
  else {
    if (not) {
      fprintf(stdout, "FAIL(N): /%s/ '%s'\n", pattern, str);
      nfail++;
    }
    else {
      if (pmatch[mem].rm_so == from && pmatch[mem].rm_eo == to) {
        fprintf(stdout, "OK: /%s/ '%s'\n", pattern, str);
        nsucc++;
      }
      else {
        fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", pattern, str,
	        from, to, pmatch[mem].rm_so, pmatch[mem].rm_eo);
        nfail++;
      }
    }
  }
  regfree(&reg);

#else
  regex_t* reg;
  OnigErrorInfo einfo;

  r = onig_new(&reg, (UChar* )pattern, (UChar* )(pattern + strlen(pattern)),
	       ONIG_OPTION_DEFAULT, ONIG_ENCODING_EUC_JP, ONIG_SYNTAX_DEFAULT, &einfo);
  if (r) {
    char s[ONIG_MAX_ERROR_MESSAGE_LEN];
    onig_error_code_to_str((UChar* )s, r, &einfo);
    fprintf(err_file, "ERROR: %s\n", s);
    nerror++;
    return ;
  }

  r = onig_search(reg, (UChar* )str, (UChar* )(str + strlen(str)),
		  (UChar* )str, (UChar* )(str + strlen(str)),
		  region, ONIG_OPTION_NONE);
  if (r < ONIG_MISMATCH) {
    char s[ONIG_MAX_ERROR_MESSAGE_LEN];
    onig_error_code_to_str((UChar* )s, r);
    fprintf(err_file, "ERROR: %s\n", s);
    nerror++;
    return ;
  }

  if (r == ONIG_MISMATCH) {
    if (not) {
      fprintf(stdout, "OK(N): /%s/ '%s'\n", pattern, str);
      nsucc++;
    }
    else {
      fprintf(stdout, "FAIL: /%s/ '%s'\n", pattern, str);
      nfail++;
    }
  }
  else {
    if (not) {
      fprintf(stdout, "FAIL(N): /%s/ '%s'\n", pattern, str);
      nfail++;
    }
    else {
      if (region->beg[mem] == from && region->end[mem] == to) {
        fprintf(stdout, "OK: /%s/ '%s'\n", pattern, str);
        nsucc++;
      }
      else {
        fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", pattern, str,
	        from, to, region->beg[mem], region->end[mem]);
        nfail++;
      }
    }
  }
  onig_free(reg);
#endif
}
예제 #29
0
  Object* Regexp::search_from(STATE, String* string, Fixnum* start) {
    int beg, max;
    const UChar *str;
    const UChar *fin;
    Object* md = cNil;

    if(unlikely(!onig_data)) {
      Exception::argument_error(state, "Not properly initialized Regexp");
    }

    lock_.lock();

    maybe_recompile(state, string);

    max = string->byte_size();
    native_int pos = start->to_native();

    str = (UChar*)string->byte_address();
    fin = str + max;

    str += pos;

    int begin_reg[10] = { ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS,
                         ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS,
                         ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS,
                         ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS,
                         ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS };
    int end_reg[10] =  { ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS,
                         ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS,
                         ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS,
                         ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS,
                         ONIG_REGION_NOTPOS, ONIG_REGION_NOTPOS };

    OnigRegion region = { 10, 0, begin_reg, end_reg, 0, 0 };

    int* back_match = onig_data->int_map_backward;

    beg = onig_search(onig_data, str, fin, str, fin,
                      &region, ONIG_OPTION_NONE);

    // Seems like onig must setup int_map_backward lazily, so we have to watch
    // for it to appear here.
    if(onig_data->int_map_backward != back_match) {
      native_int size = sizeof(int) * ONIG_CHAR_TABLE_SIZE;
      ByteArray* ba = ByteArray::create(state, size);
      memcpy(ba->raw_bytes(), onig_data->int_map_backward, size);

      // Dispose of the old one.
      free(onig_data->int_map_backward);

      onig_data->int_map_backward = reinterpret_cast<int*>(ba->raw_bytes());

      write_barrier(state, ba);
    }

    lock_.unlock();

    if(beg != ONIG_MISMATCH) {
      md = get_match_data(state, &region, string, this, pos);
    }

    onig_region_free(&region, 0);
    return md;
  }
예제 #30
0
/**
  Call the Oniguruma regex match API.

  Same parameters as RegularExpressionMatch, except SyntaxType is required.

  @param String         A pointer to a NULL terminated string to match against the
                        regular expression string specified by Pattern.

  @param Pattern        A pointer to a NULL terminated string that represents the
                        regular expression.
  @param SyntaxType     A pointer to the EFI_REGEX_SYNTAX_TYPE that identifies the
                        regular expression syntax type to use. May be NULL in which
                        case the function will use its default regular expression
                        syntax type.

  @param Result         On return, points to TRUE if String fully matches against
                        the regular expression Pattern using the regular expression
                        SyntaxType. Otherwise, points to FALSE.

  @param Captures       A Pointer to an array of EFI_REGEX_CAPTURE objects to receive
                        the captured groups in the event of a match. The full
                        sub-string match is put in Captures[0], and the results of N
                        capturing groups are put in Captures[1:N]. If Captures is
                        NULL, then this function doesn't allocate the memory for the
                        array and does not build up the elements. It only returns the
                        number of matching patterns in CapturesCount. If Captures is
                        not NULL, this function returns a pointer to an array and
                        builds up the elements in the array. CapturesCount is also
                        updated to the number of matching patterns found. It is the
                        caller's responsibility to free the memory pool in Captures
                        and in each CapturePtr in the array elements.

  @param CapturesCount  On output, CapturesCount is the number of matching patterns
                        found in String. Zero means no matching patterns were found
                        in the string.

  @retval  EFI_SUCCESS       Regex compilation and match completed successfully.
  @retval  EFI_DEVICE_ERROR  Regex compilation failed.

**/
STATIC
EFI_STATUS
OnigurumaMatch (
    IN  CHAR16                *String,
    IN  CHAR16                *Pattern,
    IN  EFI_REGEX_SYNTAX_TYPE *SyntaxType,
    OUT BOOLEAN               *Result,
    OUT EFI_REGEX_CAPTURE     **Captures,     OPTIONAL
    OUT UINTN                 *CapturesCount
)
{
    regex_t         *OnigRegex;
    OnigSyntaxType  *OnigSyntax;
    OnigRegion      *Region;
    INT32           OnigResult;
    OnigErrorInfo   ErrorInfo;
    CHAR8           ErrorMessage[ONIG_MAX_ERROR_MESSAGE_LEN];
    UINT32          Index;
    OnigUChar       *Start;
    EFI_STATUS      Status;


    Status = EFI_SUCCESS;

    //
    // Detemine the internal syntax type
    //
    OnigSyntax = ONIG_SYNTAX_DEFAULT;
    if (CompareGuid (SyntaxType, &gEfiRegexSyntaxTypePosixExtendedGuid)) {
        OnigSyntax = ONIG_SYNTAX_POSIX_EXTENDED;
    } else if (CompareGuid (SyntaxType, &gEfiRegexSyntaxTypePerlGuid)) {
        OnigSyntax = ONIG_SYNTAX_PERL;
    } else {
        DEBUG ((DEBUG_ERROR, "Unsupported regex syntax - using default\n"));
        return EFI_UNSUPPORTED;
    }

    //
    // Compile pattern
    //
    Start = (OnigUChar*)Pattern;
    OnigResult = onig_new (
                     &OnigRegex,
                     Start,
                     Start + onigenc_str_bytelen_null (CHAR16_ENCODING, Start),
                     ONIG_OPTION_DEFAULT,
                     CHAR16_ENCODING,
                     OnigSyntax,
                     &ErrorInfo
                 );

    if (OnigResult != ONIG_NORMAL) {
        onig_error_code_to_str (ErrorMessage, OnigResult, &ErrorInfo);
        DEBUG ((DEBUG_ERROR, "Regex compilation failed: %a\n", ErrorMessage));
        return EFI_DEVICE_ERROR;
    }

    //
    // Try to match
    //
    Start = (OnigUChar*)String;
    Region = onig_region_new ();
    if (Region == NULL) {
        onig_free (OnigRegex);
        return EFI_OUT_OF_RESOURCES;
    }
    OnigResult = onig_search (
                     OnigRegex,
                     Start,
                     Start + onigenc_str_bytelen_null (CHAR16_ENCODING, Start),
                     Start,
                     Start + onigenc_str_bytelen_null (CHAR16_ENCODING, Start),
                     Region,
                     ONIG_OPTION_NONE
                 );

    if (OnigResult >= 0) {
        *Result = TRUE;
    } else {
        *Result = FALSE;
        if (OnigResult != ONIG_MISMATCH) {
            onig_error_code_to_str (ErrorMessage, OnigResult);
            DEBUG ((DEBUG_ERROR, "Regex match failed: %a\n", ErrorMessage));
            onig_region_free (Region, 1);
            onig_free (OnigRegex);
            return EFI_DEVICE_ERROR;
        }
    }

    //
    // If successful, copy out the region (capture) information
    //
    if (*Result && Captures != NULL) {
        *CapturesCount = Region->num_regs;
        *Captures = AllocateZeroPool (*CapturesCount * sizeof(**Captures));
        if (*Captures != NULL) {
            for (Index = 0; Index < *CapturesCount; ++Index) {
                //
                // Region beg/end values represent bytes, not characters
                //
                (*Captures)[Index].Length = (Region->end[Index] - Region->beg[Index]) / sizeof(CHAR16);
                (*Captures)[Index].CapturePtr = AllocateCopyPool (
                                                    ((*Captures)[Index].Length) * sizeof (CHAR16),
                                                    (CHAR16*)((UINTN)String + Region->beg[Index])
                                                );
                if ((*Captures)[Index].CapturePtr == NULL) {
                    Status = EFI_OUT_OF_RESOURCES;
                    break;
                }
            }

            if (EFI_ERROR (Status)) {
                for (Index = 0; Index < *CapturesCount; ++Index) {
                    if ((*Captures)[Index].CapturePtr != NULL) {
                        FreePool ((CHAR16*)(*Captures)[Index].CapturePtr);
                    }
                }
                FreePool (*Captures);
            }
        }
    }

    onig_region_free (Region, 1);
    onig_free (OnigRegex);

    return Status;
}