RE_FIBER* _yr_re_fiber_split( RE_FIBER* fiber, RE_FIBER_LIST* fiber_list, RE_FIBER_LIST* fiber_pool) { RE_FIBER* new_fiber; int32_t i; new_fiber = _yr_re_fiber_create(fiber_pool); if (new_fiber == NULL) return NULL; new_fiber->sp = fiber->sp; new_fiber->ip = fiber->ip; for (i = 0; i <= fiber->sp; i++) new_fiber->stack[i] = fiber->stack[i]; new_fiber->next = fiber->next; new_fiber->prev = fiber; if (fiber->next != NULL) fiber->next->prev = new_fiber; fiber->next = new_fiber; if (fiber_list->tail == fiber) fiber_list->tail = new_fiber; assert(fiber_list->tail->next == NULL); assert(fiber_list->head->prev == NULL); return new_fiber; }
int _yr_re_fiber_split( RE_FIBER* fiber, RE_FIBER_LIST* fiber_list, RE_FIBER_POOL* fiber_pool, RE_FIBER** new_fiber) { int32_t i; FAIL_ON_ERROR(_yr_re_fiber_create(fiber_pool, new_fiber)); (*new_fiber)->sp = fiber->sp; (*new_fiber)->ip = fiber->ip; for (i = 0; i <= fiber->sp; i++) (*new_fiber)->stack[i] = fiber->stack[i]; (*new_fiber)->next = fiber->next; (*new_fiber)->prev = fiber; if (fiber->next != NULL) fiber->next->prev = *new_fiber; fiber->next = *new_fiber; if (fiber_list->tail == fiber) fiber_list->tail = *new_fiber; assert(fiber_list->tail->next == NULL); assert(fiber_list->head->prev == NULL); return ERROR_SUCCESS; }
int yr_re_exec( uint8_t* code, uint8_t* input_data, size_t input_size, int flags, RE_MATCH_CALLBACK_FUNC callback, void* callback_args) { uint8_t* ip; uint8_t* input; uint8_t mask; uint8_t value; RE_FIBER_LIST fibers; RE_THREAD_STORAGE* storage; RE_FIBER* fiber; RE_FIBER* new_fiber; int count; int max_count; int match; int character_size; int result = -1; #ifdef WIN32 storage = TlsGetValue(thread_storage_key); #else storage = pthread_getspecific(thread_storage_key); #endif if (storage == NULL) { storage = yr_malloc(sizeof(RE_THREAD_STORAGE)); if (storage == NULL) return ERROR_INSUFICIENT_MEMORY; storage->fiber_pool.head = NULL; storage->fiber_pool.tail = NULL; #ifdef WIN32 TlsSetValue(thread_storage_key, storage); #else pthread_setspecific(thread_storage_key, storage); #endif } if (flags & RE_FLAGS_WIDE) character_size = 2; else character_size = 1; fiber = _yr_re_fiber_create(&storage->fiber_pool); fiber->ip = code; fibers.head = fiber; fibers.tail = fiber; input = input_data; count = 0; max_count = min(input_size, RE_SCAN_LIMIT); while (fibers.head != NULL) { fiber = fibers.head; while(fiber != NULL) { ip = fiber->ip; switch(*ip) { case RE_OPCODE_LITERAL: prolog; if (flags & RE_FLAGS_NO_CASE) match = lowercase[*input] == lowercase[*(ip + 1)]; else match = (*input == *(ip + 1)); fiber->ip += 2; epilog; break; case RE_OPCODE_ANY: prolog; match = (*input != 0x0A || flags & RE_FLAGS_DOT_ALL); fiber->ip += 1; epilog; break; case RE_OPCODE_MASKED_LITERAL: prolog; value = *(int16_t*)(ip + 1) & 0xFF; mask = *(int16_t*)(ip + 1) >> 8; // We don't need to take into account the case-insensitive // case because this opcode is only used with hex strings, // which can't be case-insensitive. match = ((*input & mask) == value); fiber->ip += 3; epilog; break; case RE_OPCODE_CLASS: prolog; if (flags & RE_FLAGS_NO_CASE) match = CHAR_IN_CLASS(*input, ip + 1) || CHAR_IN_CLASS(altercase[*input], ip + 1); else match = CHAR_IN_CLASS(*input, ip + 1); fiber->ip += 33; epilog; break; case RE_OPCODE_WORD_CHAR: prolog; match = (isalnum(*input) || *input == '_'); fiber->ip += 1; epilog; break; case RE_OPCODE_NON_WORD_CHAR: prolog; match = (!isalnum(*input) && *input != '_'); fiber->ip += 1; epilog; break; case RE_OPCODE_SPACE: prolog; match = (*input == ' ' || *input == '\t'); fiber->ip += 1; epilog; break; case RE_OPCODE_NON_SPACE: prolog; match = (*input != ' ' && *input != '\t'); fiber->ip += 1; epilog; break; case RE_OPCODE_DIGIT: prolog; match = isdigit(*input); fiber->ip += 1; epilog; break; case RE_OPCODE_NON_DIGIT: prolog; match = !isdigit(*input); fiber->ip += 1; epilog; break; case RE_OPCODE_SPLIT_A: new_fiber = _yr_re_fiber_split(fiber, &fibers, &storage->fiber_pool); new_fiber->ip += *(int16_t*)(ip + 1); fiber->ip += 3; break; case RE_OPCODE_SPLIT_B: new_fiber = _yr_re_fiber_split(fiber, &fibers, &storage->fiber_pool); new_fiber->ip += 3; fiber->ip += *(int16_t*)(ip + 1); break; case RE_OPCODE_JUMP: fiber->ip = ip + *(int16_t*)(ip + 1); break; case RE_OPCODE_JNZ: fiber->stack[fiber->sp]--; if (fiber->stack[fiber->sp] > 0) fiber->ip = ip + *(int16_t*)(ip + 1); else fiber->ip += 3; break; case RE_OPCODE_PUSH: fiber->stack[++fiber->sp] = *(uint16_t*)(ip + 1); fiber->ip += 3; break; case RE_OPCODE_POP: fiber->sp--; fiber->ip++; break; case RE_OPCODE_MATCH: case RE_OPCODE_MATCH_AT_START: case RE_OPCODE_MATCH_AT_END: if ((*ip == RE_OPCODE_MATCH_AT_START && input_size - 1 > count - character_size) || (*ip == RE_OPCODE_MATCH_AT_END && input_size > count)) { fiber = _yr_re_fiber_kill(fiber, &fibers, &storage->fiber_pool); break; } result = count; if (flags & RE_FLAGS_EXHAUSTIVE) { if (flags & RE_FLAGS_BACKWARDS) callback(input + character_size, count, flags, callback_args); else callback(input_data, count, flags, callback_args); fiber = _yr_re_fiber_kill(fiber, &fibers, &storage->fiber_pool); } else { _yr_re_fiber_kill_tail(fiber, &fibers, &storage->fiber_pool); fiber = NULL; } break; default: assert(FALSE); } } if (fibers.head != NULL && flags & RE_FLAGS_WIDE && *(input + 1) != 0) _yr_re_fiber_kill_tail(fibers.head, &fibers, &storage->fiber_pool); if (flags & RE_FLAGS_BACKWARDS) input -= character_size; else input += character_size; count += character_size; if ((flags & RE_FLAGS_SCAN) && count < max_count) { fiber = _yr_re_fiber_create(&storage->fiber_pool); fiber->ip = code; _yr_re_fiber_append(fiber, &fibers); } } return result; }
int yr_re_exec( RE_CODE re_code, uint8_t* input_data, size_t input_size, int flags, RE_MATCH_CALLBACK_FUNC callback, void* callback_args) { uint8_t* input; uint8_t mask; uint8_t value; RE_CODE ip; RE_FIBER_LIST fibers; RE_THREAD_STORAGE* storage; RE_FIBER* fiber; RE_FIBER* next_fiber; int count; int max_count; int match; int character_size; int kill; int action; int result = -1; #define ACTION_NONE 0 #define ACTION_CONTINUE 1 #define ACTION_KILL 2 #define ACTION_KILL_TAIL 3 #define prolog if (count >= max_count) \ { \ action = ACTION_KILL; \ break; \ } if (_yr_re_alloc_storage(&storage) != ERROR_SUCCESS) return -2; if (flags & RE_FLAGS_WIDE) character_size = 2; else character_size = 1; input = input_data; if (flags & RE_FLAGS_BACKWARDS) input -= character_size; max_count = min(input_size, RE_SCAN_LIMIT); count = 0; fiber = _yr_re_fiber_create(&storage->fiber_pool); fiber->ip = re_code; fibers.head = fiber; fibers.tail = fiber; _yr_re_fiber_sync(&fibers, &storage->fiber_pool, fiber); while (fibers.head != NULL) { fiber = fibers.head; while(fiber != NULL) { ip = fiber->ip; action = ACTION_NONE; switch(*ip) { case RE_OPCODE_ANY: prolog; action = ACTION_NONE; fiber->ip += 1; break; case RE_OPCODE_ANY_EXCEPT_NEW_LINE: prolog; match = (*input != 0x0A); action = match ? ACTION_NONE : ACTION_KILL; fiber->ip += 1; break; case RE_OPCODE_LITERAL: prolog; match = (*input == *(ip + 1)); action = match ? ACTION_NONE : ACTION_KILL; fiber->ip += 2; break; case RE_OPCODE_LITERAL_NO_CASE: prolog; match = lowercase[*input] == lowercase[*(ip + 1)]; action = match ? ACTION_NONE : ACTION_KILL; fiber->ip += 2; break; case RE_OPCODE_MASKED_LITERAL: prolog; value = *(int16_t*)(ip + 1) & 0xFF; mask = *(int16_t*)(ip + 1) >> 8; // We don't need to take into account the case-insensitive // case because this opcode is only used with hex strings, // which can't be case-insensitive. match = ((*input & mask) == value); action = match ? ACTION_NONE : ACTION_KILL; fiber->ip += 3; break; case RE_OPCODE_CLASS: prolog; match = CHAR_IN_CLASS(*input, ip + 1); action = match ? ACTION_NONE : ACTION_KILL; fiber->ip += 33; break; case RE_OPCODE_CLASS_NO_CASE: prolog; match = CHAR_IN_CLASS(*input, ip + 1) || CHAR_IN_CLASS(altercase[*input], ip + 1); action = match ? ACTION_NONE : ACTION_KILL; fiber->ip += 33; break; case RE_OPCODE_WORD_CHAR: prolog; match = (isalnum(*input) || *input == '_'); action = match ? ACTION_NONE : ACTION_KILL; fiber->ip += 1; break; case RE_OPCODE_NON_WORD_CHAR: prolog; match = (!isalnum(*input) && *input != '_'); action = match ? ACTION_NONE : ACTION_KILL; fiber->ip += 1; break; case RE_OPCODE_SPACE: prolog; switch(*input) { case ' ': case '\t': case '\r': case '\n': case '\v': case '\f': match = TRUE; break; default: match = FALSE; } action = match ? ACTION_NONE : ACTION_KILL; fiber->ip += 1; break; case RE_OPCODE_NON_SPACE: prolog; switch(*input) { case ' ': case '\t': case '\r': case '\n': case '\v': case '\f': match = FALSE; break; default: match = TRUE; } action = match ? ACTION_NONE : ACTION_KILL; fiber->ip += 1; break; case RE_OPCODE_DIGIT: prolog; match = isdigit(*input); action = match ? ACTION_NONE : ACTION_KILL; fiber->ip += 1; break; case RE_OPCODE_NON_DIGIT: prolog; match = !isdigit(*input); action = match ? ACTION_NONE : ACTION_KILL; fiber->ip += 1; break; case RE_OPCODE_MATCH_AT_START: if (flags & RE_FLAGS_BACKWARDS) kill = input_size > count; else kill = (flags & RE_FLAGS_NOT_AT_START) || (count != 0); action = kill ? ACTION_KILL : ACTION_CONTINUE; break; case RE_OPCODE_MATCH_AT_END: action = input_size > count ? ACTION_KILL : ACTION_CONTINUE; break; case RE_OPCODE_MATCH: result = count; if (flags & RE_FLAGS_EXHAUSTIVE) { if (callback != NULL) { if (flags & RE_FLAGS_BACKWARDS) callback(input + character_size, count, flags, callback_args); else callback(input_data, count, flags, callback_args); } action = ACTION_KILL; } else { action = ACTION_KILL_TAIL; } break; default: assert(FALSE); } switch(action) { case ACTION_KILL: fiber = _yr_re_fiber_kill(&fibers, &storage->fiber_pool, fiber); break; case ACTION_KILL_TAIL: _yr_re_fiber_kill_tail(&fibers, &storage->fiber_pool, fiber); fiber = NULL; break; case ACTION_CONTINUE: fiber->ip += 1; _yr_re_fiber_sync(&fibers, &storage->fiber_pool, fiber); break; default: next_fiber = fiber->next; _yr_re_fiber_sync(&fibers, &storage->fiber_pool, fiber); fiber = next_fiber; } } if (flags & RE_FLAGS_WIDE && count + 1 < max_count && *(input + 1) != 0) _yr_re_fiber_kill_all(&fibers, &storage->fiber_pool); if (flags & RE_FLAGS_BACKWARDS) input -= character_size; else input += character_size; count += character_size; if (flags & RE_FLAGS_SCAN && count < max_count) { fiber = _yr_re_fiber_create(&storage->fiber_pool); fiber->ip = re_code; _yr_re_fiber_append(&fibers, fiber); _yr_re_fiber_sync(&fibers, &storage->fiber_pool, fiber); } } return result; }
int yr_re_exec( RE_CODE re_code, uint8_t* input_data, size_t input_size, int flags, RE_MATCH_CALLBACK_FUNC callback, void* callback_args) { uint8_t* input; uint8_t mask; uint8_t value; RE_CODE ip; RE_FIBER_LIST fibers; RE_THREAD_STORAGE* storage; RE_FIBER* fiber; RE_FIBER* next_fiber; int error; int count; int max_count; int match; int character_size; int input_incr; int kill; int action; int result = -1; #define ACTION_NONE 0 #define ACTION_CONTINUE 1 #define ACTION_KILL 2 #define ACTION_KILL_TAIL 3 #define prolog if (count >= max_count) \ { \ action = ACTION_KILL; \ break; \ } #define fail_if_error(e) switch (e) { \ case ERROR_INSUFICIENT_MEMORY: \ return -2; \ case ERROR_TOO_MANY_RE_FIBERS: \ return -4; \ } if (_yr_re_alloc_storage(&storage) != ERROR_SUCCESS) return -2; if (flags & RE_FLAGS_WIDE) character_size = 2; else character_size = 1; input = input_data; input_incr = character_size; if (flags & RE_FLAGS_BACKWARDS) { input -= character_size; input_incr = -input_incr; } max_count = (int) yr_min(input_size, RE_SCAN_LIMIT); // Round down max_count to a multiple of character_size, this way if // character_size is 2 and input_size is odd we are ignoring the // extra byte which can't match anyways. max_count = max_count - max_count % character_size; count = 0; error = _yr_re_fiber_create(&storage->fiber_pool, &fiber); fail_if_error(error); fiber->ip = re_code; fibers.head = fiber; fibers.tail = fiber; error = _yr_re_fiber_sync(&fibers, &storage->fiber_pool, fiber); fail_if_error(error); while (fibers.head != NULL) { fiber = fibers.head; while(fiber != NULL) { ip = fiber->ip; action = ACTION_NONE; switch(*ip) { case RE_OPCODE_ANY: prolog; action = ACTION_NONE; fiber->ip += 1; break; case RE_OPCODE_ANY_EXCEPT_NEW_LINE: prolog; match = (*input != 0x0A); action = match ? ACTION_NONE : ACTION_KILL; fiber->ip += 1; break; case RE_OPCODE_LITERAL: prolog; match = (*input == *(ip + 1)); action = match ? ACTION_NONE : ACTION_KILL; fiber->ip += 2; break; case RE_OPCODE_LITERAL_NO_CASE: prolog; match = lowercase[*input] == lowercase[*(ip + 1)]; action = match ? ACTION_NONE : ACTION_KILL; fiber->ip += 2; break; case RE_OPCODE_MASKED_LITERAL: prolog; value = *(int16_t*)(ip + 1) & 0xFF; mask = *(int16_t*)(ip + 1) >> 8; // We don't need to take into account the case-insensitive // case because this opcode is only used with hex strings, // which can't be case-insensitive. match = ((*input & mask) == value); action = match ? ACTION_NONE : ACTION_KILL; fiber->ip += 3; break; case RE_OPCODE_CLASS: prolog; match = CHAR_IN_CLASS(*input, ip + 1); action = match ? ACTION_NONE : ACTION_KILL; fiber->ip += 33; break; case RE_OPCODE_CLASS_NO_CASE: prolog; match = CHAR_IN_CLASS(*input, ip + 1) || CHAR_IN_CLASS(altercase[*input], ip + 1); action = match ? ACTION_NONE : ACTION_KILL; fiber->ip += 33; break; case RE_OPCODE_WORD_CHAR: prolog; match = IS_WORD_CHAR(*input); action = match ? ACTION_NONE : ACTION_KILL; fiber->ip += 1; break; case RE_OPCODE_NON_WORD_CHAR: prolog; match = !IS_WORD_CHAR(*input); action = match ? ACTION_NONE : ACTION_KILL; fiber->ip += 1; break; case RE_OPCODE_SPACE: case RE_OPCODE_NON_SPACE: prolog; switch(*input) { case ' ': case '\t': case '\r': case '\n': case '\v': case '\f': match = TRUE; break; default: match = FALSE; } if (*ip == RE_OPCODE_NON_SPACE) match = !match; action = match ? ACTION_NONE : ACTION_KILL; fiber->ip += 1; break; case RE_OPCODE_DIGIT: prolog; match = isdigit(*input); action = match ? ACTION_NONE : ACTION_KILL; fiber->ip += 1; break; case RE_OPCODE_NON_DIGIT: prolog; match = !isdigit(*input); action = match ? ACTION_NONE : ACTION_KILL; fiber->ip += 1; break; case RE_OPCODE_WORD_BOUNDARY: case RE_OPCODE_NON_WORD_BOUNDARY: if (count == 0 && !(flags & RE_FLAGS_NOT_AT_START) && !(flags & RE_FLAGS_BACKWARDS)) match = TRUE; else if (count >= max_count) match = TRUE; else if (IS_WORD_CHAR(*(input - input_incr)) != IS_WORD_CHAR(*input)) match = TRUE; else match = FALSE; if (*ip == RE_OPCODE_NON_WORD_BOUNDARY) match = !match; action = match ? ACTION_CONTINUE : ACTION_KILL; break; case RE_OPCODE_MATCH_AT_START: if (flags & RE_FLAGS_BACKWARDS) kill = input_size > (size_t) count; else kill = (flags & RE_FLAGS_NOT_AT_START) || (count != 0); action = kill ? ACTION_KILL : ACTION_CONTINUE; break; case RE_OPCODE_MATCH_AT_END: action = input_size > (size_t) count ? ACTION_KILL : ACTION_CONTINUE; break; case RE_OPCODE_MATCH: result = count; if (flags & RE_FLAGS_EXHAUSTIVE) { if (callback != NULL) { int cb_result; if (flags & RE_FLAGS_BACKWARDS) cb_result = callback( input + character_size, count, flags, callback_args); else cb_result = callback( input_data, count, flags, callback_args); switch(cb_result) { case ERROR_INSUFICIENT_MEMORY: return -2; case ERROR_TOO_MANY_MATCHES: return -3; default: if (cb_result != ERROR_SUCCESS) return -4; } } action = ACTION_KILL; } else { action = ACTION_KILL_TAIL; } break; default: assert(FALSE); } switch(action) { case ACTION_KILL: fiber = _yr_re_fiber_kill(&fibers, &storage->fiber_pool, fiber); break; case ACTION_KILL_TAIL: _yr_re_fiber_kill_tail(&fibers, &storage->fiber_pool, fiber); fiber = NULL; break; case ACTION_CONTINUE: fiber->ip += 1; error = _yr_re_fiber_sync(&fibers, &storage->fiber_pool, fiber); fail_if_error(error); break; default: next_fiber = fiber->next; error = _yr_re_fiber_sync(&fibers, &storage->fiber_pool, fiber); fail_if_error(error); fiber = next_fiber; } } if (flags & RE_FLAGS_WIDE && count < max_count && *(input + 1) != 0) _yr_re_fiber_kill_all(&fibers, &storage->fiber_pool); input += input_incr; count += character_size; if (flags & RE_FLAGS_SCAN && count < max_count) { error = _yr_re_fiber_create(&storage->fiber_pool, &fiber); fail_if_error(error); fiber->ip = re_code; _yr_re_fiber_append(&fibers, fiber); error = _yr_re_fiber_sync(&fibers, &storage->fiber_pool, fiber); fail_if_error(error); } } return result; }