void _yr_re_fiber_sync( RE_FIBER_LIST* fiber_list, RE_FIBER_LIST* fiber_pool, RE_FIBER* fiber_to_sync) { RE_FIBER* fiber; RE_FIBER* last; RE_FIBER* prev; RE_FIBER* new_fiber; fiber = fiber_to_sync; prev = fiber_to_sync->prev; last = fiber_to_sync->next; while(fiber != last) { switch(*fiber->ip) { case RE_OPCODE_SPLIT_A: new_fiber = _yr_re_fiber_split(fiber, fiber_list, fiber_pool); new_fiber->ip += *(int16_t*)(fiber->ip + 1); fiber->ip += 3; break; case RE_OPCODE_SPLIT_B: new_fiber = _yr_re_fiber_split(fiber, fiber_list, fiber_pool); new_fiber->ip += 3; fiber->ip += *(int16_t*)(fiber->ip + 1); break; case RE_OPCODE_JUMP: fiber->ip += *(int16_t*)(fiber->ip + 1); break; case RE_OPCODE_JNZ: fiber->stack[fiber->sp]--; if (fiber->stack[fiber->sp] > 0) fiber->ip += *(int16_t*)(fiber->ip + 1); else fiber->ip += 3; break; case RE_OPCODE_PUSH: fiber->stack[++fiber->sp] = *(uint16_t*)(fiber->ip + 1); fiber->ip += 3; break; case RE_OPCODE_POP: fiber->sp--; fiber->ip++; break; default: if (_yr_re_fiber_exists(fiber_list, fiber, prev)) fiber = _yr_re_fiber_kill(fiber_list, fiber_pool, fiber); else fiber = fiber->next; } } }
int yr_re_exec( uint8_t* code, uint8_t* input_data, size_t input_size, int flags, RE_MATCH_CALLBACK_FUNC callback, void* callback_args) { uint8_t* ip; uint8_t* input; uint8_t mask; uint8_t value; RE_FIBER_LIST fibers; RE_THREAD_STORAGE* storage; RE_FIBER* fiber; RE_FIBER* new_fiber; int count; int max_count; int match; int character_size; int result = -1; #ifdef WIN32 storage = TlsGetValue(thread_storage_key); #else storage = pthread_getspecific(thread_storage_key); #endif if (storage == NULL) { storage = yr_malloc(sizeof(RE_THREAD_STORAGE)); if (storage == NULL) return ERROR_INSUFICIENT_MEMORY; storage->fiber_pool.head = NULL; storage->fiber_pool.tail = NULL; #ifdef WIN32 TlsSetValue(thread_storage_key, storage); #else pthread_setspecific(thread_storage_key, storage); #endif } if (flags & RE_FLAGS_WIDE) character_size = 2; else character_size = 1; fiber = _yr_re_fiber_create(&storage->fiber_pool); fiber->ip = code; fibers.head = fiber; fibers.tail = fiber; input = input_data; count = 0; max_count = min(input_size, RE_SCAN_LIMIT); while (fibers.head != NULL) { fiber = fibers.head; while(fiber != NULL) { ip = fiber->ip; switch(*ip) { case RE_OPCODE_LITERAL: prolog; if (flags & RE_FLAGS_NO_CASE) match = lowercase[*input] == lowercase[*(ip + 1)]; else match = (*input == *(ip + 1)); fiber->ip += 2; epilog; break; case RE_OPCODE_ANY: prolog; match = (*input != 0x0A || flags & RE_FLAGS_DOT_ALL); fiber->ip += 1; epilog; break; case RE_OPCODE_MASKED_LITERAL: prolog; value = *(int16_t*)(ip + 1) & 0xFF; mask = *(int16_t*)(ip + 1) >> 8; // We don't need to take into account the case-insensitive // case because this opcode is only used with hex strings, // which can't be case-insensitive. match = ((*input & mask) == value); fiber->ip += 3; epilog; break; case RE_OPCODE_CLASS: prolog; if (flags & RE_FLAGS_NO_CASE) match = CHAR_IN_CLASS(*input, ip + 1) || CHAR_IN_CLASS(altercase[*input], ip + 1); else match = CHAR_IN_CLASS(*input, ip + 1); fiber->ip += 33; epilog; break; case RE_OPCODE_WORD_CHAR: prolog; match = (isalnum(*input) || *input == '_'); fiber->ip += 1; epilog; break; case RE_OPCODE_NON_WORD_CHAR: prolog; match = (!isalnum(*input) && *input != '_'); fiber->ip += 1; epilog; break; case RE_OPCODE_SPACE: prolog; match = (*input == ' ' || *input == '\t'); fiber->ip += 1; epilog; break; case RE_OPCODE_NON_SPACE: prolog; match = (*input != ' ' && *input != '\t'); fiber->ip += 1; epilog; break; case RE_OPCODE_DIGIT: prolog; match = isdigit(*input); fiber->ip += 1; epilog; break; case RE_OPCODE_NON_DIGIT: prolog; match = !isdigit(*input); fiber->ip += 1; epilog; break; case RE_OPCODE_SPLIT_A: new_fiber = _yr_re_fiber_split(fiber, &fibers, &storage->fiber_pool); new_fiber->ip += *(int16_t*)(ip + 1); fiber->ip += 3; break; case RE_OPCODE_SPLIT_B: new_fiber = _yr_re_fiber_split(fiber, &fibers, &storage->fiber_pool); new_fiber->ip += 3; fiber->ip += *(int16_t*)(ip + 1); break; case RE_OPCODE_JUMP: fiber->ip = ip + *(int16_t*)(ip + 1); break; case RE_OPCODE_JNZ: fiber->stack[fiber->sp]--; if (fiber->stack[fiber->sp] > 0) fiber->ip = ip + *(int16_t*)(ip + 1); else fiber->ip += 3; break; case RE_OPCODE_PUSH: fiber->stack[++fiber->sp] = *(uint16_t*)(ip + 1); fiber->ip += 3; break; case RE_OPCODE_POP: fiber->sp--; fiber->ip++; break; case RE_OPCODE_MATCH: case RE_OPCODE_MATCH_AT_START: case RE_OPCODE_MATCH_AT_END: if ((*ip == RE_OPCODE_MATCH_AT_START && input_size - 1 > count - character_size) || (*ip == RE_OPCODE_MATCH_AT_END && input_size > count)) { fiber = _yr_re_fiber_kill(fiber, &fibers, &storage->fiber_pool); break; } result = count; if (flags & RE_FLAGS_EXHAUSTIVE) { if (flags & RE_FLAGS_BACKWARDS) callback(input + character_size, count, flags, callback_args); else callback(input_data, count, flags, callback_args); fiber = _yr_re_fiber_kill(fiber, &fibers, &storage->fiber_pool); } else { _yr_re_fiber_kill_tail(fiber, &fibers, &storage->fiber_pool); fiber = NULL; } break; default: assert(FALSE); } } if (fibers.head != NULL && flags & RE_FLAGS_WIDE && *(input + 1) != 0) _yr_re_fiber_kill_tail(fibers.head, &fibers, &storage->fiber_pool); if (flags & RE_FLAGS_BACKWARDS) input -= character_size; else input += character_size; count += character_size; if ((flags & RE_FLAGS_SCAN) && count < max_count) { fiber = _yr_re_fiber_create(&storage->fiber_pool); fiber->ip = code; _yr_re_fiber_append(fiber, &fibers); } } return result; }
int _yr_re_fiber_sync( RE_FIBER_LIST* fiber_list, RE_FIBER_POOL* fiber_pool, RE_FIBER* fiber_to_sync) { // A array for keeping track of which split instructions has been already // executed. Each split instruction within a regexp has an associated ID // between 0 and RE_MAX_SPLIT_ID. Keeping track of executed splits is // required to avoid infinite loops in regexps like (a*)* or (a|)* RE_SPLIT_ID_TYPE splits_executed[RE_MAX_SPLIT_ID]; RE_SPLIT_ID_TYPE splits_executed_count = 0; RE_SPLIT_ID_TYPE split_id, splits_executed_idx; int split_already_executed; RE_FIBER* fiber; RE_FIBER* last; RE_FIBER* prev; RE_FIBER* new_fiber; fiber = fiber_to_sync; prev = fiber_to_sync->prev; last = fiber_to_sync->next; while(fiber != last) { switch(*fiber->ip) { case RE_OPCODE_SPLIT_A: case RE_OPCODE_SPLIT_B: split_id = *(RE_SPLIT_ID_TYPE*)(fiber->ip + 1); split_already_executed = FALSE; for (splits_executed_idx = 0; splits_executed_idx < splits_executed_count; splits_executed_idx++) { if (split_id == splits_executed[splits_executed_idx]) { split_already_executed = TRUE; break; } } if (split_already_executed) { fiber = _yr_re_fiber_kill(fiber_list, fiber_pool, fiber); } else { FAIL_ON_ERROR(_yr_re_fiber_split( fiber, fiber_list, fiber_pool, &new_fiber)); if (*fiber->ip == RE_OPCODE_SPLIT_A) { new_fiber->ip += *(int16_t*)( fiber->ip + 1 // opcode size + sizeof(RE_SPLIT_ID_TYPE)); fiber->ip += (sizeof(RE_SPLIT_ID_TYPE) + 3); } else { fiber->ip += *(int16_t*)( fiber->ip + 1 // opcode size + sizeof(RE_SPLIT_ID_TYPE)); new_fiber->ip += (sizeof(RE_SPLIT_ID_TYPE) + 3); } splits_executed[splits_executed_count] = split_id; splits_executed_count++; } break; case RE_OPCODE_JUMP: fiber->ip += *(int16_t*)(fiber->ip + 1); break; case RE_OPCODE_JNZ: fiber->stack[fiber->sp]--; if (fiber->stack[fiber->sp] > 0) fiber->ip += *(int16_t*)(fiber->ip + 1); else fiber->ip += 3; break; case RE_OPCODE_PUSH: fiber->stack[++fiber->sp] = *(uint16_t*)(fiber->ip + 1); fiber->ip += 3; break; case RE_OPCODE_POP: fiber->sp--; fiber->ip++; break; default: if (_yr_re_fiber_exists(fiber_list, fiber, prev)) fiber = _yr_re_fiber_kill(fiber_list, fiber_pool, fiber); else fiber = fiber->next; } } return ERROR_SUCCESS; }