int yr_parser_emit_with_arg_reloc( yyscan_t yyscanner, int8_t instruction, int64_t argument, int8_t** instruction_address) { void* ptr; int result = yr_arena_write_data( yyget_extra(yyscanner)->code_arena, &instruction, sizeof(int8_t), (void**) instruction_address); if (result == ERROR_SUCCESS) result = yr_arena_write_data( yyget_extra(yyscanner)->code_arena, &argument, sizeof(int64_t), &ptr); if (result == ERROR_SUCCESS) result = yr_arena_make_relocatable( yyget_extra(yyscanner)->code_arena, ptr, 0, EOL); return result; }
int yr_arena_append( YR_ARENA* target_arena, YR_ARENA* source_arena) { uint8_t padding_data[15]; size_t padding_size = 16 - target_arena->current_page->used % 16; if (padding_size < 16) { memset(&padding_data, 0xCC, padding_size); FAIL_ON_ERROR(yr_arena_write_data( target_arena, padding_data, padding_size, NULL)); } target_arena->current_page->next = source_arena->page_list_head; source_arena->page_list_head->prev = target_arena->current_page; target_arena->current_page = source_arena->current_page; yr_free(source_arena); return ERROR_SUCCESS; }
int yr_arena_write_string( YR_ARENA* arena, const char* string, char** written_string) { return yr_arena_write_data( arena, (void*) string, strlen(string) + 1, (void**) written_string); }
int yr_parser_emit( yyscan_t yyscanner, int8_t instruction, int8_t** instruction_address) { return yr_arena_write_data( yyget_extra(yyscanner)->code_arena, &instruction, sizeof(int8_t), (void**) instruction_address); }
int yr_parser_emit_with_arg( yyscan_t yyscanner, int8_t instruction, int64_t argument, int8_t** instruction_address) { int result = yr_arena_write_data( yyget_extra(yyscanner)->code_arena, &instruction, sizeof(int8_t), (void**) instruction_address); if (result == ERROR_SUCCESS) result = yr_arena_write_data( yyget_extra(yyscanner)->code_arena, &argument, sizeof(int64_t), NULL); return result; }
int yr_parser_emit_with_arg_reloc( yyscan_t yyscanner, uint8_t instruction, void* argument, uint8_t** instruction_address, void** argument_address) { int64_t* ptr = NULL; int result; DECLARE_REFERENCE(void*, ptr) arg; memset(&arg, 0, sizeof(arg)); arg.ptr = argument; result = yr_arena_write_data( yyget_extra(yyscanner)->code_arena, &instruction, sizeof(uint8_t), (void**) instruction_address); if (result == ERROR_SUCCESS) result = yr_arena_write_data( yyget_extra(yyscanner)->code_arena, &arg, sizeof(arg), (void**) &ptr); if (result == ERROR_SUCCESS) result = yr_arena_make_ptr_relocatable( yyget_extra(yyscanner)->code_arena, ptr, 0, EOL); if (argument_address != NULL) *argument_address = (void*) ptr; return result; }
int yr_parser_emit_with_arg_double( yyscan_t yyscanner, uint8_t instruction, double argument, uint8_t** instruction_address, double** argument_address) { int result = yr_arena_write_data( yyget_extra(yyscanner)->code_arena, &instruction, sizeof(uint8_t), (void**) instruction_address); if (result == ERROR_SUCCESS) result = yr_arena_write_data( yyget_extra(yyscanner)->code_arena, &argument, sizeof(double), (void**) argument_address); return result; }
int _yr_emit_split( RE_EMIT_CONTEXT* emit_context, uint8_t opcode, int16_t argument, uint8_t** instruction_addr, int16_t** argument_addr, int* code_size) { assert(opcode == RE_OPCODE_SPLIT_A || opcode == RE_OPCODE_SPLIT_B); if (emit_context->next_split_id == RE_MAX_SPLIT_ID) return ERROR_REGULAR_EXPRESSION_TOO_COMPLEX; FAIL_ON_ERROR(yr_arena_write_data( emit_context->arena, &opcode, sizeof(uint8_t), (void**) instruction_addr)); FAIL_ON_ERROR(yr_arena_write_data( emit_context->arena, &emit_context->next_split_id, sizeof(RE_SPLIT_ID_TYPE), NULL)); emit_context->next_split_id++; FAIL_ON_ERROR(yr_arena_write_data( emit_context->arena, &argument, sizeof(int16_t), (void**) argument_addr)); *code_size = sizeof(uint8_t) + sizeof(RE_SPLIT_ID_TYPE) + sizeof(int16_t); return ERROR_SUCCESS; }
int _yr_emit_inst_arg_int16( YR_ARENA* arena, uint8_t opcode, int16_t argument, uint8_t** instruction_addr, int16_t** argument_addr, int* code_size) { FAIL_ON_ERROR(yr_arena_write_data( arena, &opcode, sizeof(uint8_t), (void**) instruction_addr)); FAIL_ON_ERROR(yr_arena_write_data( arena, &argument, sizeof(int16_t), (void**) argument_addr)); *code_size = sizeof(uint8_t) + sizeof(int16_t); return ERROR_SUCCESS; }
int _yr_emit_inst_arg_int16( RE_EMIT_CONTEXT* emit_context, uint8_t opcode, int16_t argument, uint8_t** instruction_addr, int16_t** argument_addr, int* code_size) { FAIL_ON_ERROR(yr_arena_write_data( emit_context->arena, &opcode, sizeof(uint8_t), (void**) instruction_addr)); FAIL_ON_ERROR(yr_arena_write_data( emit_context->arena, &argument, sizeof(int16_t), (void**) argument_addr)); *code_size = sizeof(uint8_t) + sizeof(int16_t); return ERROR_SUCCESS; }
static int _yr_parser_write_string( const char* identifier, int flags, YR_COMPILER* compiler, SIZED_STRING* str, RE_AST* re_ast, YR_STRING** string, int* min_atom_quality, int* num_atom) { SIZED_STRING* literal_string; YR_ATOM_LIST_ITEM* atom; YR_ATOM_LIST_ITEM* atom_list = NULL; int c, result; int max_string_len; bool free_literal = false; *string = NULL; result = yr_arena_allocate_struct( compiler->strings_arena, sizeof(YR_STRING), (void**) string, offsetof(YR_STRING, identifier), offsetof(YR_STRING, string), offsetof(YR_STRING, chained_to), offsetof(YR_STRING, rule), EOL); if (result != ERROR_SUCCESS) return result; result = yr_arena_write_string( compiler->sz_arena, identifier, &(*string)->identifier); if (result != ERROR_SUCCESS) return result; if (flags & STRING_GFLAGS_HEXADECIMAL || flags & STRING_GFLAGS_REGEXP) { literal_string = yr_re_ast_extract_literal(re_ast); if (literal_string != NULL) { flags |= STRING_GFLAGS_LITERAL; free_literal = true; } else { // Non-literal strings can't be marked as fixed offset because once we // find a string atom in the scanned data we don't know the offset where // the string should start, as the non-literal strings can contain // variable-length portions. flags &= ~STRING_GFLAGS_FIXED_OFFSET; } } else { literal_string = str; flags |= STRING_GFLAGS_LITERAL; } (*string)->g_flags = flags; (*string)->chained_to = NULL; (*string)->fixed_offset = UNDEFINED; (*string)->rule = compiler->current_rule; memset((*string)->matches, 0, sizeof((*string)->matches)); memset((*string)->unconfirmed_matches, 0, sizeof((*string)->unconfirmed_matches)); if (flags & STRING_GFLAGS_LITERAL) { (*string)->length = (uint32_t) literal_string->length; result = yr_arena_write_data( compiler->sz_arena, literal_string->c_string, literal_string->length + 1, // +1 to include terminating NULL (void**) &(*string)->string); if (result == ERROR_SUCCESS) { result = yr_atoms_extract_from_string( &compiler->atoms_config, (uint8_t*) literal_string->c_string, (int32_t) literal_string->length, flags, &atom_list, min_atom_quality); } } else { // Emit forwards code result = yr_re_ast_emit_code(re_ast, compiler->re_code_arena, false); // Emit backwards code if (result == ERROR_SUCCESS) result = yr_re_ast_emit_code(re_ast, compiler->re_code_arena, true); if (result == ERROR_SUCCESS) result = yr_atoms_extract_from_re( &compiler->atoms_config, re_ast, flags, &atom_list, min_atom_quality); } if (result == ERROR_SUCCESS) { // Add the string to Aho-Corasick automaton. result = yr_ac_add_string( compiler->automaton, *string, atom_list, compiler->matches_arena); } if (flags & STRING_GFLAGS_LITERAL) { if (flags & STRING_GFLAGS_WIDE) max_string_len = (*string)->length * 2; else max_string_len = (*string)->length; if (max_string_len <= YR_MAX_ATOM_LENGTH) (*string)->g_flags |= STRING_GFLAGS_FITS_IN_ATOM; } atom = atom_list; c = 0; while (atom != NULL) { atom = atom->next; c++; } (*num_atom) += c; if (free_literal) yr_free(literal_string); if (atom_list != NULL) yr_atoms_list_destroy(atom_list); return result; }
int yr_execute_code( YR_SCAN_CONTEXT* context) { int64_t mem[MEM_SIZE]; int32_t sp = 0; const uint8_t* ip = context->rules->code_start; YR_VALUE args[YR_MAX_FUNCTION_ARGS]; YR_VALUE *stack; YR_VALUE r1; YR_VALUE r2; YR_VALUE r3; uint64_t elapsed_time; #ifdef PROFILING_ENABLED uint64_t start_time; YR_RULE* current_rule = NULL; #endif YR_INIT_RULE_ARGS init_rule_args; YR_RULE* rule; YR_MATCH* match; YR_OBJECT_FUNCTION* function; YR_OBJECT** obj_ptr; YR_ARENA* obj_arena; char* identifier; char* args_fmt; int i; int found; int count; int result = ERROR_SUCCESS; int cycle = 0; int tidx = context->tidx; int stack_size; bool stop = false; uint8_t opcode; yr_get_configuration(YR_CONFIG_STACK_SIZE, (void*) &stack_size); stack = (YR_VALUE*) yr_malloc(stack_size * sizeof(YR_VALUE)); if (stack == NULL) return ERROR_INSUFFICIENT_MEMORY; FAIL_ON_ERROR_WITH_CLEANUP( yr_arena_create(1024, 0, &obj_arena), yr_free(stack)); #ifdef PROFILING_ENABLED start_time = yr_stopwatch_elapsed_us(&context->stopwatch); #endif #if PARANOID_EXEC memset(mem, 0, MEM_SIZE * sizeof(mem[0])); #endif while(!stop) { opcode = *ip; ip++; switch(opcode) { case OP_NOP: break; case OP_HALT: assert(sp == 0); // When HALT is reached the stack should be empty. stop = true; break; case OP_PUSH: r1.i = *(uint64_t*)(ip); ip += sizeof(uint64_t); push(r1); break; case OP_POP: pop(r1); break; case OP_CLEAR_M: r1.i = *(uint64_t*)(ip); ip += sizeof(uint64_t); #if PARANOID_EXEC ensure_within_mem(r1.i); #endif mem[r1.i] = 0; break; case OP_ADD_M: r1.i = *(uint64_t*)(ip); ip += sizeof(uint64_t); #if PARANOID_EXEC ensure_within_mem(r1.i); #endif pop(r2); if (!is_undef(r2)) mem[r1.i] += r2.i; break; case OP_INCR_M: r1.i = *(uint64_t*)(ip); ip += sizeof(uint64_t); #if PARANOID_EXEC ensure_within_mem(r1.i); #endif mem[r1.i]++; break; case OP_PUSH_M: r1.i = *(uint64_t*)(ip); ip += sizeof(uint64_t); #if PARANOID_EXEC ensure_within_mem(r1.i); #endif r1.i = mem[r1.i]; push(r1); break; case OP_POP_M: r1.i = *(uint64_t*)(ip); ip += sizeof(uint64_t); #if PARANOID_EXEC ensure_within_mem(r1.i); #endif pop(r2); mem[r1.i] = r2.i; break; case OP_SET_M: r1.i = *(uint64_t*)(ip); ip += sizeof(uint64_t); #if PARANOID_EXEC ensure_within_mem(r1.i); #endif pop(r2); push(r2); if (!is_undef(r2)) mem[r1.i] = r2.i; break; case OP_SWAPUNDEF: r1.i = *(uint64_t*)(ip); ip += sizeof(uint64_t); #if PARANOID_EXEC ensure_within_mem(r1.i); #endif pop(r2); if (is_undef(r2)) { r1.i = mem[r1.i]; push(r1); } else { push(r2); } break; case OP_JNUNDEF: pop(r1); push(r1); ip = jmp_if(!is_undef(r1), ip); break; case OP_JLE_P: pop(r2); pop(r1); ip = jmp_if(r1.i <= r2.i, ip); break; case OP_JTRUE: pop(r1); push(r1); ip = jmp_if(!is_undef(r1) && r1.i, ip); break; case OP_JFALSE: pop(r1); push(r1); ip = jmp_if(is_undef(r1) || !r1.i, ip); break; case OP_JFALSE_P: pop(r1); ip = jmp_if(is_undef(r1) || !r1.i, ip); break; case OP_AND: pop(r2); pop(r1); if (is_undef(r1) || is_undef(r2)) r1.i = 0; else r1.i = r1.i && r2.i; push(r1); break; case OP_OR: pop(r2); pop(r1); if (is_undef(r1)) { push(r2); } else if (is_undef(r2)) { push(r1); } else { r1.i = r1.i || r2.i; push(r1); } break; case OP_NOT: pop(r1); if (is_undef(r1)) r1.i = UNDEFINED; else r1.i = !r1.i; push(r1); break; case OP_MOD: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); if (r2.i != 0) r1.i = r1.i % r2.i; else r1.i = UNDEFINED; push(r1); break; case OP_SHR: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); if (r2.i < 0) r1.i = UNDEFINED; else if (r2.i < 64) r1.i = r1.i >> r2.i; else r1.i = 0; push(r1); break; case OP_SHL: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); if (r2.i < 0) r1.i = UNDEFINED; else if (r2.i < 64) r1.i = r1.i << r2.i; else r1.i = 0; push(r1); break; case OP_BITWISE_NOT: pop(r1); ensure_defined(r1); r1.i = ~r1.i; push(r1); break; case OP_BITWISE_AND: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.i & r2.i; push(r1); break; case OP_BITWISE_OR: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.i | r2.i; push(r1); break; case OP_BITWISE_XOR: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.i ^ r2.i; push(r1); break; case OP_PUSH_RULE: rule = *(YR_RULE**)(ip); ip += sizeof(uint64_t); if (RULE_IS_DISABLED(rule)) r1.i = UNDEFINED; else r1.i = rule->t_flags[tidx] & RULE_TFLAGS_MATCH ? 1 : 0; push(r1); break; case OP_INIT_RULE: memcpy(&init_rule_args, ip, sizeof(init_rule_args)); #ifdef PROFILING_ENABLED current_rule = init_rule_args.rule; #endif if (RULE_IS_DISABLED(init_rule_args.rule)) ip = init_rule_args.jmp_addr; else ip += sizeof(init_rule_args); break; case OP_MATCH_RULE: pop(r1); rule = *(YR_RULE**)(ip); ip += sizeof(uint64_t); if (!is_undef(r1) && r1.i) rule->t_flags[tidx] |= RULE_TFLAGS_MATCH; else if (RULE_IS_GLOBAL(rule)) rule->ns->t_flags[tidx] |= NAMESPACE_TFLAGS_UNSATISFIED_GLOBAL; #ifdef PROFILING_ENABLED elapsed_time = yr_stopwatch_elapsed_us(&context->stopwatch); rule->time_cost_per_thread[tidx] += (elapsed_time - start_time); start_time = elapsed_time; #endif assert(sp == 0); // at this point the stack should be empty. break; case OP_OBJ_LOAD: identifier = *(char**)(ip); ip += sizeof(uint64_t); r1.o = (YR_OBJECT*) yr_hash_table_lookup( context->objects_table, identifier, NULL); assert(r1.o != NULL); push(r1); break; case OP_OBJ_FIELD: identifier = *(char**)(ip); ip += sizeof(uint64_t); pop(r1); ensure_defined(r1); r1.o = yr_object_lookup_field(r1.o, identifier); assert(r1.o != NULL); push(r1); break; case OP_OBJ_VALUE: pop(r1); ensure_defined(r1); #if PARANOID_EXEC check_object_canary(r1.o); #endif switch(r1.o->type) { case OBJECT_TYPE_INTEGER: r1.i = r1.o->value.i; break; case OBJECT_TYPE_FLOAT: if (isnan(r1.o->value.d)) r1.i = UNDEFINED; else r1.d = r1.o->value.d; break; case OBJECT_TYPE_STRING: if (r1.o->value.ss == NULL) r1.i = UNDEFINED; else r1.ss = r1.o->value.ss; break; default: assert(false); } push(r1); break; case OP_INDEX_ARRAY: pop(r1); // index pop(r2); // array ensure_defined(r1); ensure_defined(r2); assert(r2.o->type == OBJECT_TYPE_ARRAY); #if PARANOID_EXEC check_object_canary(r2.o); #endif r1.o = yr_object_array_get_item(r2.o, 0, (int) r1.i); if (r1.o == NULL) r1.i = UNDEFINED; push(r1); break; case OP_LOOKUP_DICT: pop(r1); // key pop(r2); // dictionary ensure_defined(r1); ensure_defined(r2); assert(r2.o->type == OBJECT_TYPE_DICTIONARY); #if PARANOID_EXEC check_object_canary(r2.o); #endif r1.o = yr_object_dict_get_item( r2.o, 0, r1.ss->c_string); if (r1.o == NULL) r1.i = UNDEFINED; push(r1); break; case OP_CALL: args_fmt = *(char**)(ip); ip += sizeof(uint64_t); i = (int) strlen(args_fmt); count = 0; #if PARANOID_EXEC if (i > YR_MAX_FUNCTION_ARGS) { stop = true; result = ERROR_INTERNAL_FATAL_ERROR; break; } #endif // pop arguments from stack and copy them to args array while (i > 0) { pop(r1); if (is_undef(r1)) // count the number of undefined args count++; args[i - 1] = r1; i--; } pop(r2); ensure_defined(r2); #if PARANOID_EXEC check_object_canary(r2.o); #endif if (count > 0) { // if there are undefined args, result for function call // is undefined as well. r1.i = UNDEFINED; push(r1); break; } function = object_as_function(r2.o); result = ERROR_INTERNAL_FATAL_ERROR; for (i = 0; i < YR_MAX_OVERLOADED_FUNCTIONS; i++) { if (function->prototypes[i].arguments_fmt == NULL) break; if (strcmp(function->prototypes[i].arguments_fmt, args_fmt) == 0) { result = function->prototypes[i].code(args, context, function); break; } } // if i == YR_MAX_OVERLOADED_FUNCTIONS at this point no matching // prototype was found, but this shouldn't happen. assert(i < YR_MAX_OVERLOADED_FUNCTIONS); // make a copy of the returned object and push the copy into the stack // function->return_obj can't be pushed because it can change in // subsequent calls to the same function. if (result == ERROR_SUCCESS) result = yr_object_copy(function->return_obj, &r1.o); // a pointer to the copied object is stored in a arena in order to // free the object before exiting yr_execute_code if (result == ERROR_SUCCESS) result = yr_arena_write_data(obj_arena, &r1.o, sizeof(r1.o), NULL); stop = (result != ERROR_SUCCESS); push(r1); break; case OP_FOUND: pop(r1); r1.i = r1.s->matches[tidx].tail != NULL ? 1 : 0; push(r1); break; case OP_FOUND_AT: pop(r2); pop(r1); if (is_undef(r1)) { r1.i = 0; push(r1); break; } match = r2.s->matches[tidx].head; r3.i = false; while (match != NULL) { if (r1.i == match->base + match->offset) { r3.i = true; break; } if (r1.i < match->base + match->offset) break; match = match->next; } push(r3); break; case OP_FOUND_IN: pop(r3); pop(r2); pop(r1); ensure_defined(r1); ensure_defined(r2); match = r3.s->matches[tidx].head; r3.i = false; while (match != NULL && !r3.i) { if (match->base + match->offset >= r1.i && match->base + match->offset <= r2.i) { r3.i = true; } if (match->base + match->offset > r2.i) break; match = match->next; } push(r3); break; case OP_COUNT: pop(r1); #if PARANOID_EXEC // Make sure that the string pointer is within the rules arena. if (yr_arena_page_for_address(context->rules->arena, r1.p) == NULL) return ERROR_INTERNAL_FATAL_ERROR; #endif r1.i = r1.s->matches[tidx].count; push(r1); break; case OP_OFFSET: pop(r2); pop(r1); ensure_defined(r1); match = r2.s->matches[tidx].head; i = 1; r3.i = UNDEFINED; while (match != NULL && r3.i == UNDEFINED) { if (r1.i == i) r3.i = match->base + match->offset; i++; match = match->next; } push(r3); break; case OP_LENGTH: pop(r2); pop(r1); ensure_defined(r1); match = r2.s->matches[tidx].head; i = 1; r3.i = UNDEFINED; while (match != NULL && r3.i == UNDEFINED) { if (r1.i == i) r3.i = match->match_length; i++; match = match->next; } push(r3); break; case OP_OF: found = 0; count = 0; pop(r1); while (!is_undef(r1)) { if (r1.s->matches[tidx].tail != NULL) found++; count++; pop(r1); } pop(r2); if (is_undef(r2)) r1.i = found >= count ? 1 : 0; else r1.i = found >= r2.i ? 1 : 0; push(r1); break; case OP_FILESIZE: r1.i = context->file_size; push(r1); break; case OP_ENTRYPOINT: r1.i = context->entry_point; push(r1); break; case OP_INT8: pop(r1); r1.i = read_int8_t_little_endian(context->iterator, (size_t) r1.i); push(r1); break; case OP_INT16: pop(r1); r1.i = read_int16_t_little_endian(context->iterator, (size_t) r1.i); push(r1); break; case OP_INT32: pop(r1); r1.i = read_int32_t_little_endian(context->iterator, (size_t) r1.i); push(r1); break; case OP_UINT8: pop(r1); r1.i = read_uint8_t_little_endian(context->iterator, (size_t) r1.i); push(r1); break; case OP_UINT16: pop(r1); r1.i = read_uint16_t_little_endian(context->iterator, (size_t) r1.i); push(r1); break; case OP_UINT32: pop(r1); r1.i = read_uint32_t_little_endian(context->iterator, (size_t) r1.i); push(r1); break; case OP_INT8BE: pop(r1); r1.i = read_int8_t_big_endian(context->iterator, (size_t) r1.i); push(r1); break; case OP_INT16BE: pop(r1); r1.i = read_int16_t_big_endian(context->iterator, (size_t) r1.i); push(r1); break; case OP_INT32BE: pop(r1); r1.i = read_int32_t_big_endian(context->iterator, (size_t) r1.i); push(r1); break; case OP_UINT8BE: pop(r1); r1.i = read_uint8_t_big_endian(context->iterator, (size_t) r1.i); push(r1); break; case OP_UINT16BE: pop(r1); r1.i = read_uint16_t_big_endian(context->iterator, (size_t) r1.i); push(r1); break; case OP_UINT32BE: pop(r1); r1.i = read_uint32_t_big_endian(context->iterator, (size_t) r1.i); push(r1); break; case OP_CONTAINS: pop(r2); pop(r1); ensure_defined(r1); ensure_defined(r2); r1.i = memmem(r1.ss->c_string, r1.ss->length, r2.ss->c_string, r2.ss->length) != NULL; push(r1); break; case OP_IMPORT: r1.i = *(uint64_t*)(ip); ip += sizeof(uint64_t); result = yr_modules_load((char*) r1.p, context); if (result != ERROR_SUCCESS) stop = true; break; case OP_MATCHES: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); if (r1.ss->length == 0) { r1.i = false; push(r1); break; } result = yr_re_exec( context, (uint8_t*) r2.re->code, (uint8_t*) r1.ss->c_string, r1.ss->length, 0, r2.re->flags | RE_FLAGS_SCAN, NULL, NULL, &found); if (result != ERROR_SUCCESS) stop = true; r1.i = found >= 0; push(r1); break; case OP_INT_TO_DBL: r1.i = *(uint64_t*)(ip); ip += sizeof(uint64_t); #if PARANOID_EXEC if (r1.i > sp || sp - r1.i >= stack_size) { stop = true; result = ERROR_INTERNAL_FATAL_ERROR; break; } #endif r2 = stack[sp - r1.i]; if (is_undef(r2)) stack[sp - r1.i].i = UNDEFINED; else stack[sp - r1.i].d = (double) r2.i; break; case OP_STR_TO_BOOL: pop(r1); ensure_defined(r1); r1.i = r1.ss->length > 0; push(r1); break; case OP_INT_EQ: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.i == r2.i; push(r1); break; case OP_INT_NEQ: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.i != r2.i; push(r1); break; case OP_INT_LT: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.i < r2.i; push(r1); break; case OP_INT_GT: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.i > r2.i; push(r1); break; case OP_INT_LE: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.i <= r2.i; push(r1); break; case OP_INT_GE: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.i >= r2.i; push(r1); break; case OP_INT_ADD: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.i + r2.i; push(r1); break; case OP_INT_SUB: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.i - r2.i; push(r1); break; case OP_INT_MUL: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.i * r2.i; push(r1); break; case OP_INT_DIV: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); if (r2.i != 0) r1.i = r1.i / r2.i; else r1.i = UNDEFINED; push(r1); break; case OP_INT_MINUS: pop(r1); ensure_defined(r1); r1.i = -r1.i; push(r1); break; case OP_DBL_LT: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.d < r2.d; push(r1); break; case OP_DBL_GT: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.d > r2.d; push(r1); break; case OP_DBL_LE: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.d <= r2.d; push(r1); break; case OP_DBL_GE: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.d >= r2.d; push(r1); break; case OP_DBL_EQ: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = fabs(r1.d - r2.d) < DBL_EPSILON; push(r1); break; case OP_DBL_NEQ: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = fabs(r1.d - r2.d) >= DBL_EPSILON; push(r1); break; case OP_DBL_ADD: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.d = r1.d + r2.d; push(r1); break; case OP_DBL_SUB: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.d = r1.d - r2.d; push(r1); break; case OP_DBL_MUL: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.d = r1.d * r2.d; push(r1); break; case OP_DBL_DIV: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.d = r1.d / r2.d; push(r1); break; case OP_DBL_MINUS: pop(r1); ensure_defined(r1); r1.d = -r1.d; push(r1); break; case OP_STR_EQ: case OP_STR_NEQ: case OP_STR_LT: case OP_STR_LE: case OP_STR_GT: case OP_STR_GE: pop(r2); pop(r1); ensure_defined(r1); ensure_defined(r2); switch(opcode) { case OP_STR_EQ: r1.i = (sized_string_cmp(r1.ss, r2.ss) == 0); break; case OP_STR_NEQ: r1.i = (sized_string_cmp(r1.ss, r2.ss) != 0); break; case OP_STR_LT: r1.i = (sized_string_cmp(r1.ss, r2.ss) < 0); break; case OP_STR_LE: r1.i = (sized_string_cmp(r1.ss, r2.ss) <= 0); break; case OP_STR_GT: r1.i = (sized_string_cmp(r1.ss, r2.ss) > 0); break; case OP_STR_GE: r1.i = (sized_string_cmp(r1.ss, r2.ss) >= 0); break; } push(r1); break; default: // Unknown instruction, this shouldn't happen. assert(false); } // Check for timeout every 10 instruction cycles. If timeout == 0 it means // no timeout at all. if (context->timeout > 0L && ++cycle == 10) { elapsed_time = yr_stopwatch_elapsed_us(&context->stopwatch); if (elapsed_time > context->timeout) { #ifdef PROFILING_ENABLED assert(current_rule != NULL); current_rule->time_cost_per_thread[tidx] += elapsed_time - start_time; #endif result = ERROR_SCAN_TIMEOUT; stop = true; } cycle = 0; } }
int _yr_re_emit( RE_EMIT_CONTEXT* emit_context, RE_NODE* re_node, int flags, uint8_t** code_addr, int* code_size) { int i; int branch_size; int split_size; int inst_size; int jmp_size; RE_NODE* left; RE_NODE* right; int16_t* split_offset_addr = NULL; int16_t* jmp_offset_addr = NULL; uint8_t* instruction_addr = NULL; *code_size = 0; switch(re_node->type) { case RE_NODE_LITERAL: FAIL_ON_ERROR(_yr_emit_inst_arg_uint8( emit_context, flags & EMIT_NO_CASE ? RE_OPCODE_LITERAL_NO_CASE : RE_OPCODE_LITERAL, re_node->value, &instruction_addr, NULL, code_size)); break; case RE_NODE_MASKED_LITERAL: FAIL_ON_ERROR(_yr_emit_inst_arg_uint16( emit_context, RE_OPCODE_MASKED_LITERAL, re_node->mask << 8 | re_node->value, &instruction_addr, NULL, code_size)); break; case RE_NODE_WORD_CHAR: FAIL_ON_ERROR(_yr_emit_inst( emit_context, RE_OPCODE_WORD_CHAR, &instruction_addr, code_size)); break; case RE_NODE_NON_WORD_CHAR: FAIL_ON_ERROR(_yr_emit_inst( emit_context, RE_OPCODE_NON_WORD_CHAR, &instruction_addr, code_size)); break; case RE_NODE_WORD_BOUNDARY: FAIL_ON_ERROR(_yr_emit_inst( emit_context, RE_OPCODE_WORD_BOUNDARY, &instruction_addr, code_size)); break; case RE_NODE_NON_WORD_BOUNDARY: FAIL_ON_ERROR(_yr_emit_inst( emit_context, RE_OPCODE_NON_WORD_BOUNDARY, &instruction_addr, code_size)); break; case RE_NODE_SPACE: FAIL_ON_ERROR(_yr_emit_inst( emit_context, RE_OPCODE_SPACE, &instruction_addr, code_size)); break; case RE_NODE_NON_SPACE: FAIL_ON_ERROR(_yr_emit_inst( emit_context, RE_OPCODE_NON_SPACE, &instruction_addr, code_size)); break; case RE_NODE_DIGIT: FAIL_ON_ERROR(_yr_emit_inst( emit_context, RE_OPCODE_DIGIT, &instruction_addr, code_size)); break; case RE_NODE_NON_DIGIT: FAIL_ON_ERROR(_yr_emit_inst( emit_context, RE_OPCODE_NON_DIGIT, &instruction_addr, code_size)); break; case RE_NODE_ANY: FAIL_ON_ERROR(_yr_emit_inst( emit_context, flags & EMIT_DOT_ALL ? RE_OPCODE_ANY : RE_OPCODE_ANY_EXCEPT_NEW_LINE, &instruction_addr, code_size)); break; case RE_NODE_CLASS: FAIL_ON_ERROR(_yr_emit_inst( emit_context, (flags & EMIT_NO_CASE) ? RE_OPCODE_CLASS_NO_CASE : RE_OPCODE_CLASS, &instruction_addr, code_size)); FAIL_ON_ERROR(yr_arena_write_data( emit_context->arena, re_node->class_vector, 32, NULL)); *code_size += 32; break; case RE_NODE_ANCHOR_START: FAIL_ON_ERROR(_yr_emit_inst( emit_context, RE_OPCODE_MATCH_AT_START, &instruction_addr, code_size)); break; case RE_NODE_ANCHOR_END: FAIL_ON_ERROR(_yr_emit_inst( emit_context, RE_OPCODE_MATCH_AT_END, &instruction_addr, code_size)); break; case RE_NODE_CONCAT: if (flags & EMIT_BACKWARDS) { left = re_node->right; right = re_node->left; } else { left = re_node->left; right = re_node->right; } FAIL_ON_ERROR(_yr_re_emit( emit_context, left, flags, &instruction_addr, &branch_size)); *code_size += branch_size; FAIL_ON_ERROR(_yr_re_emit( emit_context, right, flags, NULL, &branch_size)); *code_size += branch_size; break; case RE_NODE_PLUS: // Code for e+ looks like: // // L1: code for e // split L1, L2 // L2: FAIL_ON_ERROR(_yr_re_emit( emit_context, re_node->left, flags, &instruction_addr, &branch_size)); *code_size += branch_size; FAIL_ON_ERROR(_yr_emit_split( emit_context, re_node->greedy ? RE_OPCODE_SPLIT_B : RE_OPCODE_SPLIT_A, -branch_size, NULL, &split_offset_addr, &split_size)); *code_size += split_size; break; case RE_NODE_STAR: // Code for e* looks like: // // L1: split L1, L2 // code for e // jmp L1 // L2: FAIL_ON_ERROR(_yr_emit_split( emit_context, re_node->greedy ? RE_OPCODE_SPLIT_A : RE_OPCODE_SPLIT_B, 0, &instruction_addr, &split_offset_addr, &split_size)); *code_size += split_size; FAIL_ON_ERROR(_yr_re_emit( emit_context, re_node->left, flags, NULL, &branch_size)); *code_size += branch_size; // Emit jump with offset set to 0. FAIL_ON_ERROR(_yr_emit_inst_arg_int16( emit_context, RE_OPCODE_JUMP, -(branch_size + split_size), NULL, &jmp_offset_addr, &jmp_size)); *code_size += jmp_size; // Update split offset. *split_offset_addr = split_size + branch_size + jmp_size; break; case RE_NODE_ALT: // Code for e1|e2 looks like: // // split L1, L2 // L1: code for e1 // jmp L3 // L2: code for e2 // L3: // Emit a split instruction with offset set to 0 temporarily. Offset // will be updated after we know the size of the code generated for // the left node (e1). FAIL_ON_ERROR(_yr_emit_split( emit_context, RE_OPCODE_SPLIT_A, 0, &instruction_addr, &split_offset_addr, &split_size)); *code_size += split_size; FAIL_ON_ERROR(_yr_re_emit( emit_context, re_node->left, flags, NULL, &branch_size)); *code_size += branch_size; // Emit jump with offset set to 0. FAIL_ON_ERROR(_yr_emit_inst_arg_int16( emit_context, RE_OPCODE_JUMP, 0, NULL, &jmp_offset_addr, &jmp_size)); *code_size += jmp_size; // Update split offset. *split_offset_addr = split_size + branch_size + jmp_size; FAIL_ON_ERROR(_yr_re_emit( emit_context, re_node->right, flags, NULL, &branch_size)); *code_size += branch_size; // Update offset for jmp instruction. *jmp_offset_addr = branch_size + jmp_size; break; case RE_NODE_RANGE: // Code for e{n,m} looks like: // // code for e (repeated n times) // push m-n-1 // L0: split L1, L2 // L1: code for e // jnz L0 // L2: pop // split L3, L4 // L3: code for e // L4: // // Instead of generating a loop with m-n iterations, we generate a loop // with m-n-1 iterations and the last one is unrolled outside the loop. // This is because re_node->backward_code pointers *must* point to code // past the loop. If they point to code before the loop then when some atom // contained inside "e" is found, the loop will be executed in both // forward and backward code. This causes an overlap in forward and backward // matches and the reported matching string will be longer than expected. if (re_node->start > 0) { FAIL_ON_ERROR(_yr_re_emit( emit_context, re_node->left, flags, &instruction_addr, &branch_size)); *code_size += branch_size; for (i = 0; i < re_node->start - 1; i++) { // Don't want re_node->forward_code updated in this call // forward_code must remain pointing to the code generated by // by the _yr_re_emit above. However we want re_node->backward_code // being updated. FAIL_ON_ERROR(_yr_re_emit( emit_context, re_node->left, flags | EMIT_DONT_SET_FORWARDS_CODE, NULL, &branch_size)); *code_size += branch_size; } } if (re_node->end > re_node->start + 1) { FAIL_ON_ERROR(_yr_emit_inst_arg_uint16( emit_context, RE_OPCODE_PUSH, re_node->end - re_node->start - 1, re_node->start == 0 ? &instruction_addr : NULL, NULL, &inst_size)); *code_size += inst_size; FAIL_ON_ERROR(_yr_emit_split( emit_context, re_node->greedy ? RE_OPCODE_SPLIT_A : RE_OPCODE_SPLIT_B, 0, NULL, &split_offset_addr, &split_size)); *code_size += split_size; FAIL_ON_ERROR(_yr_re_emit( emit_context, re_node->left, flags | EMIT_DONT_SET_FORWARDS_CODE | EMIT_DONT_SET_BACKWARDS_CODE, NULL, &branch_size)); *code_size += branch_size; FAIL_ON_ERROR(_yr_emit_inst_arg_int16( emit_context, RE_OPCODE_JNZ, -(branch_size + split_size), NULL, &jmp_offset_addr, &jmp_size)); *code_size += jmp_size; *split_offset_addr = split_size + branch_size + jmp_size; FAIL_ON_ERROR(_yr_emit_inst( emit_context, RE_OPCODE_POP, NULL, &inst_size)); *code_size += inst_size; } if (re_node->end > re_node->start) { FAIL_ON_ERROR(_yr_emit_split( emit_context, re_node->greedy ? RE_OPCODE_SPLIT_A : RE_OPCODE_SPLIT_B, 0, NULL, &split_offset_addr, &split_size)); *code_size += split_size; FAIL_ON_ERROR(_yr_re_emit( emit_context, re_node->left, flags | EMIT_DONT_SET_FORWARDS_CODE, re_node->start == 0 && re_node->end == 1 ? &instruction_addr : NULL, &branch_size)); *code_size += branch_size; *split_offset_addr = split_size + branch_size; } break; } if (flags & EMIT_BACKWARDS) { if (!(flags & EMIT_DONT_SET_BACKWARDS_CODE)) re_node->backward_code = instruction_addr + *code_size; } else { if (!(flags & EMIT_DONT_SET_FORWARDS_CODE)) re_node->forward_code = instruction_addr; } if (code_addr != NULL) *code_addr = instruction_addr; return ERROR_SUCCESS; }
int yr_ac_compile( YR_AC_AUTOMATON* automaton, YR_ARENA* arena, YR_AC_TABLES* tables) { uint32_t i; FAIL_ON_ERROR(_yr_ac_create_failure_links(automaton)); FAIL_ON_ERROR(_yr_ac_optimize_failure_links(automaton)); FAIL_ON_ERROR(_yr_ac_build_transition_table(automaton)); FAIL_ON_ERROR(yr_arena_reserve_memory( arena, automaton->tables_size * sizeof(tables->transitions[0]) + automaton->tables_size * sizeof(tables->matches[0]))); FAIL_ON_ERROR(yr_arena_write_data( arena, automaton->t_table, sizeof(YR_AC_TRANSITION), (void**) &tables->transitions)); for (i = 1; i < automaton->tables_size; i++) { FAIL_ON_ERROR(yr_arena_write_data( arena, automaton->t_table + i, sizeof(YR_AC_TRANSITION), NULL)); } FAIL_ON_ERROR(yr_arena_write_data( arena, automaton->m_table, sizeof(YR_AC_MATCH_TABLE_ENTRY), (void**) &tables->matches)); FAIL_ON_ERROR(yr_arena_make_relocatable( arena, tables->matches, offsetof(YR_AC_MATCH_TABLE_ENTRY, match), EOL)); for (i = 1; i < automaton->tables_size; i++) { void* ptr; FAIL_ON_ERROR(yr_arena_write_data( arena, automaton->m_table + i, sizeof(YR_AC_MATCH_TABLE_ENTRY), (void**) &ptr)); FAIL_ON_ERROR(yr_arena_make_relocatable( arena, ptr, offsetof(YR_AC_MATCH_TABLE_ENTRY, match), EOL)); } return ERROR_SUCCESS; }
int _yr_scan_verify_chained_string_match( YR_STRING* matching_string, YR_SCAN_CONTEXT* context, uint8_t* match_data, uint64_t match_base, uint64_t match_offset, int32_t match_length) { YR_STRING* string; YR_MATCH* match; YR_MATCH* next_match; YR_MATCH* new_match; uint64_t lower_offset; uint64_t ending_offset; int32_t full_chain_length; int tidx = context->tidx; int add_match = FALSE; if (matching_string->chained_to == NULL) { add_match = TRUE; } else { if (matching_string->unconfirmed_matches[tidx].head != NULL) lower_offset = matching_string->unconfirmed_matches[tidx].head->offset; else lower_offset = match_offset; match = matching_string->chained_to->unconfirmed_matches[tidx].head; while (match != NULL) { next_match = match->next; ending_offset = match->offset + match->length; if (ending_offset + matching_string->chain_gap_max < lower_offset) { _yr_scan_remove_match_from_list( match, &matching_string->chained_to->unconfirmed_matches[tidx]); } else { if (ending_offset + matching_string->chain_gap_max >= match_offset && ending_offset + matching_string->chain_gap_min <= match_offset) { add_match = TRUE; break; } } match = next_match; } } if (add_match) { if (STRING_IS_CHAIN_TAIL(matching_string)) { match = matching_string->chained_to->unconfirmed_matches[tidx].head; while (match != NULL) { ending_offset = match->offset + match->length; if (ending_offset + matching_string->chain_gap_max >= match_offset && ending_offset + matching_string->chain_gap_min <= match_offset) { _yr_scan_update_match_chain_length( tidx, matching_string->chained_to, match, 1); } match = match->next; } full_chain_length = 0; string = matching_string; while(string->chained_to != NULL) { full_chain_length++; string = string->chained_to; } // "string" points now to the head of the strings chain match = string->unconfirmed_matches[tidx].head; while (match != NULL) { next_match = match->next; if (match->chain_length == full_chain_length) { _yr_scan_remove_match_from_list( match, &string->unconfirmed_matches[tidx]); match->length = (int32_t) \ (match_offset - match->offset + match_length); match->data = match_data - match_offset + match->offset; match->prev = NULL; match->next = NULL; FAIL_ON_ERROR(_yr_scan_add_match_to_list( match, &string->matches[tidx], FALSE)); } match = next_match; } } else { if (matching_string->matches[tidx].count == 0 && matching_string->unconfirmed_matches[tidx].count == 0) { // If this is the first match for the string, put the string in the // list of strings whose flags needs to be cleared after the scan. FAIL_ON_ERROR(yr_arena_write_data( context->matching_strings_arena, &matching_string, sizeof(matching_string), NULL)); } FAIL_ON_ERROR(yr_arena_allocate_memory( context->matches_arena, sizeof(YR_MATCH), (void**) &new_match)); new_match->base = match_base; new_match->offset = match_offset; new_match->length = match_length; new_match->data = match_data; new_match->chain_length = 0; new_match->prev = NULL; new_match->next = NULL; FAIL_ON_ERROR(_yr_scan_add_match_to_list( new_match, &matching_string->unconfirmed_matches[tidx], FALSE)); } } return ERROR_SUCCESS; }
int _yr_re_emit( RE_NODE* re_node, YR_ARENA* arena, int flags, uint8_t** code_addr, int* code_size) { int i; int branch_size; int split_size; int inst_size; int jmp_size; RE_NODE* left; RE_NODE* right; int16_t* split_offset_addr = NULL; int16_t* jmp_offset_addr = NULL; uint8_t* instruction_addr = NULL; *code_size = 0; switch(re_node->type) { case RE_NODE_LITERAL: FAIL_ON_ERROR(_yr_emit_inst_arg_uint8( arena, RE_OPCODE_LITERAL, re_node->value, &instruction_addr, NULL, code_size)); break; case RE_NODE_MASKED_LITERAL: FAIL_ON_ERROR(_yr_emit_inst_arg_uint16( arena, RE_OPCODE_MASKED_LITERAL, re_node->mask << 8 | re_node->value, &instruction_addr, NULL, code_size)); break; case RE_NODE_WORD_CHAR: FAIL_ON_ERROR(_yr_emit_inst( arena, RE_OPCODE_WORD_CHAR, &instruction_addr, code_size)); break; case RE_NODE_NON_WORD_CHAR: FAIL_ON_ERROR(_yr_emit_inst( arena, RE_OPCODE_NON_WORD_CHAR, &instruction_addr, code_size)); break; case RE_NODE_SPACE: FAIL_ON_ERROR(_yr_emit_inst( arena, RE_OPCODE_SPACE, &instruction_addr, code_size)); break; case RE_NODE_NON_SPACE: FAIL_ON_ERROR(_yr_emit_inst( arena, RE_OPCODE_NON_SPACE, &instruction_addr, code_size)); break; case RE_NODE_DIGIT: FAIL_ON_ERROR(_yr_emit_inst( arena, RE_OPCODE_DIGIT, &instruction_addr, code_size)); break; case RE_NODE_NON_DIGIT: FAIL_ON_ERROR(_yr_emit_inst( arena, RE_OPCODE_NON_DIGIT, &instruction_addr, code_size)); break; case RE_NODE_ANY: FAIL_ON_ERROR(_yr_emit_inst( arena, RE_OPCODE_ANY, &instruction_addr, code_size)); break; case RE_NODE_CLASS: FAIL_ON_ERROR(_yr_emit_inst( arena, RE_OPCODE_CLASS, &instruction_addr, code_size)); FAIL_ON_ERROR(yr_arena_write_data( arena, re_node->class_vector, 32, NULL)); *code_size += 32; break; case RE_NODE_ANCHOR_START: FAIL_ON_ERROR(_yr_emit_inst( arena, RE_OPCODE_MATCH_AT_START, &instruction_addr, code_size)); break; case RE_NODE_ANCHOR_END: FAIL_ON_ERROR(_yr_emit_inst( arena, RE_OPCODE_MATCH_AT_END, &instruction_addr, code_size)); break; case RE_NODE_CONCAT: if (flags & EMIT_FLAGS_BACKWARDS) { left = re_node->right; right = re_node->left; } else { left = re_node->left; right = re_node->right; } FAIL_ON_ERROR(_yr_re_emit( left, arena, flags, &instruction_addr, &branch_size)); *code_size += branch_size; FAIL_ON_ERROR(_yr_re_emit( right, arena, flags, NULL, &branch_size)); *code_size += branch_size; break; case RE_NODE_PLUS: // Code for e+ looks like: // // L1: code for e // split L1, L2 // L2: FAIL_ON_ERROR(_yr_re_emit( re_node->left, arena, flags, &instruction_addr, &branch_size)); *code_size += branch_size; FAIL_ON_ERROR(_yr_emit_inst_arg_int16( arena, re_node->greedy ? RE_OPCODE_SPLIT_B : RE_OPCODE_SPLIT_A, -branch_size, NULL, &split_offset_addr, &split_size)); *code_size += split_size; break; case RE_NODE_STAR: // Code for e* looks like: // // L1: split L1, L2 // code for e // jmp L1 // L2: FAIL_ON_ERROR(_yr_emit_inst_arg_int16( arena, re_node->greedy ? RE_OPCODE_SPLIT_A : RE_OPCODE_SPLIT_B, 0, &instruction_addr, &split_offset_addr, &split_size)); *code_size += split_size; FAIL_ON_ERROR(_yr_re_emit( re_node->left, arena, flags, NULL, &branch_size)); *code_size += branch_size; // Emit jump with offset set to 0. FAIL_ON_ERROR(_yr_emit_inst_arg_int16( arena, RE_OPCODE_JUMP, -(branch_size + split_size), NULL, &jmp_offset_addr, &jmp_size)); *code_size += jmp_size; // Update split offset. *split_offset_addr = split_size + branch_size + jmp_size; break; case RE_NODE_ALT: // Code for e1|e2 looks like: // // split L1, L2 // L1: code for e1 // jmp L3 // L2: code for e2 // L3: // Emit a split instruction with offset set to 0 temporarily. Offset // will be updated after we know the size of the code generated for // the left node (e1). FAIL_ON_ERROR(_yr_emit_inst_arg_int16( arena, RE_OPCODE_SPLIT_A, 0, &instruction_addr, &split_offset_addr, &split_size)); *code_size += split_size; FAIL_ON_ERROR(_yr_re_emit( re_node->left, arena, flags, NULL, &branch_size)); *code_size += branch_size; // Emit jump with offset set to 0. FAIL_ON_ERROR(_yr_emit_inst_arg_int16( arena, RE_OPCODE_JUMP, 0, NULL, &jmp_offset_addr, &jmp_size)); *code_size += jmp_size; // Update split offset. *split_offset_addr = split_size + branch_size + jmp_size; FAIL_ON_ERROR(_yr_re_emit( re_node->right, arena, flags, NULL, &branch_size)); *code_size += branch_size; // Update offset for jmp instruction. *jmp_offset_addr = branch_size + jmp_size; break; case RE_NODE_RANGE: // Code for e1{n,m} looks like: // // code for e1 (n times) // push m-n // L0: split L1, L2 // L1: code for e1 // jnztop L0 // L2: pop if (re_node->start > 0) { FAIL_ON_ERROR(_yr_re_emit( re_node->left, arena, flags, &instruction_addr, &branch_size)); *code_size += branch_size; for (i = 0; i < re_node->start - 1; i++) { FAIL_ON_ERROR(_yr_re_emit( re_node->left, arena, flags | EMIT_FLAGS_DONT_ANNOTATE_RE, NULL, &branch_size)); *code_size += branch_size; } } // m == n, no more code needed. if (re_node->end == re_node->start) break; FAIL_ON_ERROR(_yr_emit_inst_arg_uint16( arena, RE_OPCODE_PUSH, re_node->end - re_node->start, re_node->start == 0 ? &instruction_addr : NULL, NULL, &inst_size)); *code_size += inst_size; FAIL_ON_ERROR(_yr_emit_inst_arg_int16( arena, re_node->greedy ? RE_OPCODE_SPLIT_A : RE_OPCODE_SPLIT_B, 0, NULL, &split_offset_addr, &split_size)); *code_size += split_size; FAIL_ON_ERROR(_yr_re_emit( re_node->left, arena, flags | EMIT_FLAGS_DONT_ANNOTATE_RE, NULL, &branch_size)); *code_size += branch_size; FAIL_ON_ERROR(_yr_emit_inst_arg_int16( arena, RE_OPCODE_JNZ, -(branch_size + split_size), NULL, &jmp_offset_addr, &jmp_size)); *code_size += jmp_size; *split_offset_addr = split_size + branch_size + jmp_size; FAIL_ON_ERROR(_yr_emit_inst( arena, RE_OPCODE_POP, NULL, &inst_size)); *code_size += inst_size; break; } if (!(flags & EMIT_FLAGS_DONT_ANNOTATE_RE)) { if (flags & EMIT_FLAGS_BACKWARDS) re_node->backward_code = instruction_addr; else re_node->forward_code = instruction_addr; } if (code_addr != NULL) *code_addr = instruction_addr; return ERROR_SUCCESS; }
int _yr_parser_write_string( const char* identifier, int flags, YR_COMPILER* compiler, SIZED_STRING* str, RE* re, YR_STRING** string, int* min_atom_quality) { SIZED_STRING* literal_string; YR_AC_MATCH* new_match; YR_ATOM_LIST_ITEM* atom_list = NULL; int result; int max_string_len; int free_literal = FALSE; *string = NULL; result = yr_arena_allocate_struct( compiler->strings_arena, sizeof(YR_STRING), (void**) string, offsetof(YR_STRING, identifier), offsetof(YR_STRING, string), offsetof(YR_STRING, chained_to), EOL); if (result != ERROR_SUCCESS) return result; result = yr_arena_write_string( compiler->sz_arena, identifier, &(*string)->identifier); if (result != ERROR_SUCCESS) return result; if (flags & STRING_GFLAGS_HEXADECIMAL || flags & STRING_GFLAGS_REGEXP) { literal_string = yr_re_extract_literal(re); if (literal_string != NULL) { flags |= STRING_GFLAGS_LITERAL; free_literal = TRUE; } else { // Non-literal strings can't be marked as fixed offset because once we // find a string atom in the scanned data we don't know the offset where // the string should start, as the non-literal strings can contain // variable-length portions. flags &= ~STRING_GFLAGS_FIXED_OFFSET; } } else { literal_string = str; flags |= STRING_GFLAGS_LITERAL; } (*string)->g_flags = flags; (*string)->chained_to = NULL; (*string)->fixed_offset = UNDEFINED; #ifdef PROFILING_ENABLED (*string)->clock_ticks = 0; #endif memset((*string)->matches, 0, sizeof((*string)->matches)); memset((*string)->unconfirmed_matches, 0, sizeof((*string)->unconfirmed_matches)); if (flags & STRING_GFLAGS_LITERAL) { (*string)->length = (uint32_t) literal_string->length; result = yr_arena_write_data( compiler->sz_arena, literal_string->c_string, literal_string->length + 1, // +1 to include terminating NULL (void**) &(*string)->string); if (result == ERROR_SUCCESS) { result = yr_atoms_extract_from_string( (uint8_t*) literal_string->c_string, (int32_t) literal_string->length, flags, &atom_list); } } else { result = yr_re_emit_code(re, compiler->re_code_arena); if (result == ERROR_SUCCESS) result = yr_atoms_extract_from_re(re, flags, &atom_list); } if (result == ERROR_SUCCESS) { // Add the string to Aho-Corasick automaton. if (atom_list != NULL) { result = yr_ac_add_string( compiler->automaton_arena, compiler->automaton, *string, atom_list); } else { result = yr_arena_allocate_struct( compiler->automaton_arena, sizeof(YR_AC_MATCH), (void**) &new_match, offsetof(YR_AC_MATCH, string), offsetof(YR_AC_MATCH, forward_code), offsetof(YR_AC_MATCH, backward_code), offsetof(YR_AC_MATCH, next), EOL); if (result == ERROR_SUCCESS) { new_match->backtrack = 0; new_match->string = *string; new_match->forward_code = re->root_node->forward_code; new_match->backward_code = NULL; new_match->next = compiler->automaton->root->matches; compiler->automaton->root->matches = new_match; } } } *min_atom_quality = yr_atoms_min_quality(atom_list); if (flags & STRING_GFLAGS_LITERAL) { if (flags & STRING_GFLAGS_WIDE) max_string_len = (*string)->length * 2; else max_string_len = (*string)->length; if (max_string_len <= MAX_ATOM_LENGTH) (*string)->g_flags |= STRING_GFLAGS_FITS_IN_ATOM; } if (free_literal) yr_free(literal_string); if (atom_list != NULL) yr_atoms_list_destroy(atom_list); return result; }
int _yr_compiler_compile_rules( YR_COMPILER* compiler) { YARA_RULES_FILE_HEADER* rules_file_header = NULL; YR_ARENA* arena = NULL; YR_RULE null_rule; YR_EXTERNAL_VARIABLE null_external; YR_AC_TABLES tables; int8_t halt = OP_HALT; int result; // Write halt instruction at the end of code. yr_arena_write_data( compiler->code_arena, &halt, sizeof(int8_t), NULL); // Write a null rule indicating the end. memset(&null_rule, 0xFA, sizeof(YR_RULE)); null_rule.g_flags = RULE_GFLAGS_NULL; yr_arena_write_data( compiler->rules_arena, &null_rule, sizeof(YR_RULE), NULL); // Write a null external the end. memset(&null_external, 0xFA, sizeof(YR_EXTERNAL_VARIABLE)); null_external.type = EXTERNAL_VARIABLE_TYPE_NULL; yr_arena_write_data( compiler->externals_arena, &null_external, sizeof(YR_EXTERNAL_VARIABLE), NULL); // Write Aho-Corasick automaton to arena. result = yr_ac_compile( compiler->automaton, compiler->automaton_arena, &tables); if (result == ERROR_SUCCESS) result = yr_arena_create(1024, 0, &arena); if (result == ERROR_SUCCESS) result = yr_arena_allocate_struct( arena, sizeof(YARA_RULES_FILE_HEADER), (void**) &rules_file_header, offsetof(YARA_RULES_FILE_HEADER, rules_list_head), offsetof(YARA_RULES_FILE_HEADER, externals_list_head), offsetof(YARA_RULES_FILE_HEADER, code_start), offsetof(YARA_RULES_FILE_HEADER, match_table), offsetof(YARA_RULES_FILE_HEADER, transition_table), EOL); if (result == ERROR_SUCCESS) { rules_file_header->rules_list_head = (YR_RULE*) yr_arena_base_address( compiler->rules_arena); rules_file_header->externals_list_head = (YR_EXTERNAL_VARIABLE*) yr_arena_base_address(compiler->externals_arena); rules_file_header->code_start = (uint8_t*) yr_arena_base_address( compiler->code_arena); rules_file_header->match_table = tables.matches; rules_file_header->transition_table = tables.transitions; } if (result == ERROR_SUCCESS) { result = yr_arena_append( arena, compiler->code_arena); } if (result == ERROR_SUCCESS) { compiler->code_arena = NULL; result = yr_arena_append( arena, compiler->re_code_arena); } if (result == ERROR_SUCCESS) { compiler->re_code_arena = NULL; result = yr_arena_append( arena, compiler->rules_arena); } if (result == ERROR_SUCCESS) { compiler->rules_arena = NULL; result = yr_arena_append( arena, compiler->strings_arena); } if (result == ERROR_SUCCESS) { compiler->strings_arena = NULL; result = yr_arena_append( arena, compiler->externals_arena); } if (result == ERROR_SUCCESS) { compiler->externals_arena = NULL; result = yr_arena_append( arena, compiler->namespaces_arena); } if (result == ERROR_SUCCESS) { compiler->namespaces_arena = NULL; result = yr_arena_append( arena, compiler->metas_arena); } if (result == ERROR_SUCCESS) { compiler->metas_arena = NULL; result = yr_arena_append( arena, compiler->sz_arena); } if (result == ERROR_SUCCESS) { compiler->sz_arena = NULL; result = yr_arena_append( arena, compiler->automaton_arena); } if (result == ERROR_SUCCESS) { compiler->automaton_arena = NULL; result = yr_arena_append( arena, compiler->matches_arena); } if (result == ERROR_SUCCESS) { compiler->matches_arena = NULL; compiler->compiled_rules_arena = arena; result = yr_arena_coalesce(arena); } else { yr_arena_destroy(arena); } return result; }
int _yr_parser_write_string( const char* identifier, int flags, YR_COMPILER* compiler, SIZED_STRING* str, RE* re, YR_STRING** string, int* min_atom_length) { SIZED_STRING* literal_string; YR_AC_MATCH* new_match; YR_ATOM_LIST_ITEM* atom; YR_ATOM_LIST_ITEM* atom_list = NULL; int result; int max_string_len; int free_literal = FALSE; *string = NULL; result = yr_arena_allocate_struct( compiler->strings_arena, sizeof(YR_STRING), (void**) string, offsetof(YR_STRING, identifier), offsetof(YR_STRING, string), offsetof(YR_STRING, chained_to), EOL); if (result != ERROR_SUCCESS) return result; result = yr_arena_write_string( compiler->sz_arena, identifier, &(*string)->identifier); if (result != ERROR_SUCCESS) return result; if (flags & STRING_GFLAGS_HEXADECIMAL || flags & STRING_GFLAGS_REGEXP) { literal_string = yr_re_extract_literal(re); if (literal_string != NULL) { flags |= STRING_GFLAGS_LITERAL; free_literal = TRUE; } } else { literal_string = str; flags |= STRING_GFLAGS_LITERAL; } (*string)->g_flags = flags; (*string)->chained_to = NULL; memset((*string)->matches, 0, sizeof((*string)->matches)); memset((*string)->unconfirmed_matches, 0, sizeof((*string)->unconfirmed_matches)); if (flags & STRING_GFLAGS_LITERAL) { (*string)->length = literal_string->length; result = yr_arena_write_data( compiler->sz_arena, literal_string->c_string, literal_string->length, (void*) &(*string)->string); if (result == ERROR_SUCCESS) { result = yr_atoms_extract_from_string( (uint8_t*) literal_string->c_string, literal_string->length, flags, &atom_list); } } else { result = yr_re_emit_code(re, compiler->re_code_arena); if (result == ERROR_SUCCESS) result = yr_atoms_extract_from_re(re, flags, &atom_list); } if (result == ERROR_SUCCESS) { // Add the string to Aho-Corasick automaton. if (atom_list != NULL) { result = yr_ac_add_string( compiler->automaton_arena, compiler->automaton, *string, atom_list); } else { result = yr_arena_allocate_struct( compiler->automaton_arena, sizeof(YR_AC_MATCH), (void**) &new_match, offsetof(YR_AC_MATCH, string), offsetof(YR_AC_MATCH, forward_code), offsetof(YR_AC_MATCH, backward_code), offsetof(YR_AC_MATCH, next), EOL); if (result == ERROR_SUCCESS) { new_match->backtrack = 0; new_match->string = *string; new_match->forward_code = re->root_node->forward_code; new_match->backward_code = NULL; new_match->next = compiler->automaton->root->matches; compiler->automaton->root->matches = new_match; } } } atom = atom_list; if (atom != NULL) *min_atom_length = MAX_ATOM_LENGTH; else *min_atom_length = 0; while (atom != NULL) { if (atom->atom_length < *min_atom_length) *min_atom_length = atom->atom_length; atom = atom->next; } if (flags & STRING_GFLAGS_LITERAL) { if (flags & STRING_GFLAGS_WIDE) max_string_len = (*string)->length * 2; else max_string_len = (*string)->length; if (max_string_len == *min_atom_length) (*string)->g_flags |= STRING_GFLAGS_FITS_IN_ATOM; } if (free_literal) yr_free(literal_string); if (atom_list != NULL) yr_atoms_list_destroy(atom_list); return result; }
int _yr_scan_match_callback( uint8_t* match_data, int32_t match_length, int flags, void* args) { CALLBACK_ARGS* callback_args = (CALLBACK_ARGS*) args; YR_STRING* string = callback_args->string; YR_MATCH* new_match; int result = ERROR_SUCCESS; int tidx = callback_args->context->tidx; size_t match_offset = match_data - callback_args->data; // total match length is the sum of backward and forward matches. match_length += callback_args->forward_matches; if (callback_args->full_word) { if (flags & RE_FLAGS_WIDE) { if (match_offset >= 2 && *(match_data - 1) == 0 && isalnum(*(match_data - 2))) return ERROR_SUCCESS; if (match_offset + match_length + 1 < callback_args->data_size && *(match_data + match_length + 1) == 0 && isalnum(*(match_data + match_length))) return ERROR_SUCCESS; } else { if (match_offset >= 1 && isalnum(*(match_data - 1))) return ERROR_SUCCESS; if (match_offset + match_length < callback_args->data_size && isalnum(*(match_data + match_length))) return ERROR_SUCCESS; } } if (STRING_IS_CHAIN_PART(string)) { result = _yr_scan_verify_chained_string_match( string, callback_args->context, match_data, callback_args->data_base, match_offset, match_length); } else { if (string->matches[tidx].count == 0) { // If this is the first match for the string, put the string in the // list of strings whose flags needs to be cleared after the scan. FAIL_ON_ERROR(yr_arena_write_data( callback_args->context->matching_strings_arena, &string, sizeof(string), NULL)); } result = yr_arena_allocate_memory( callback_args->context->matches_arena, sizeof(YR_MATCH), (void**) &new_match); if (result == ERROR_SUCCESS) { new_match->base = callback_args->data_base; new_match->offset = match_offset; new_match->length = match_length; new_match->data = match_data; new_match->prev = NULL; new_match->next = NULL; FAIL_ON_ERROR(_yr_scan_add_match_to_list( new_match, &string->matches[tidx], STRING_IS_GREEDY_REGEXP(string))); } } return result; }
YR_STRING* yr_parser_reduce_string_declaration( yyscan_t yyscanner, int32_t flags, const char* identifier, SIZED_STRING* str) { int i; int error_offset; int min_atom_length; char* file_name; char message[512]; YR_STRING* string; YR_AC_MATCH* new_match; ATOM_TREE* atom_tree; YR_ATOM_LIST_ITEM* atom; YR_ATOM_LIST_ITEM* atom_list = NULL; RE* re = NULL; uint8_t* literal_string; int literal_string_len; int max_string_len; YR_COMPILER* compiler = yyget_extra(yyscanner); compiler->last_result = yr_arena_allocate_struct( compiler->strings_arena, sizeof(YR_STRING), (void**) &string, offsetof(YR_STRING, identifier), offsetof(YR_STRING, string), EOL); if (compiler->last_result != ERROR_SUCCESS) return NULL; compiler->last_result = yr_arena_write_string( compiler->sz_arena, identifier, &string->identifier); if (compiler->last_result != ERROR_SUCCESS) return NULL; if (strcmp(identifier,"$") == 0) flags |= STRING_GFLAGS_ANONYMOUS; if (!(flags & STRING_GFLAGS_WIDE)) flags |= STRING_GFLAGS_ASCII; // The STRING_GFLAGS_SINGLE_MATCH flag indicates that finding // a single match for the string is enough. This is true in // most cases, except when the string count (#) and string offset (@) // operators are used. All strings are marked STRING_FLAGS_SINGLE_MATCH // initially, and unmarked later if required. flags |= STRING_GFLAGS_SINGLE_MATCH; string->g_flags = flags; memset(string->matches, 0, sizeof(string->matches)); if (flags & STRING_GFLAGS_HEXADECIMAL || flags & STRING_GFLAGS_REGEXP) { if (flags & STRING_GFLAGS_HEXADECIMAL) compiler->last_result = yr_re_compile_hex( str->c_string, &re); else compiler->last_result = yr_re_compile( str->c_string, &re); if (compiler->last_result != ERROR_SUCCESS) { snprintf( message, sizeof(message), "invalid %s in string \"%s\": %s", (flags & STRING_GFLAGS_HEXADECIMAL) ? "hex string" : "regular expression", identifier, re->error_message); yr_compiler_set_error_extra_info(compiler, message); string = NULL; goto _exit; } if (re->flags & RE_FLAGS_START_ANCHORED) string->g_flags |= STRING_GFLAGS_START_ANCHORED; if (re->flags & RE_FLAGS_END_ANCHORED) string->g_flags |= STRING_GFLAGS_END_ANCHORED; if (re->flags & RE_FLAGS_FAST_HEX_REGEXP) string->g_flags |= STRING_GFLAGS_FAST_HEX_REGEXP; if (re->flags & RE_FLAGS_LITERAL_STRING) { string->g_flags |= STRING_GFLAGS_LITERAL; literal_string = re->literal_string; literal_string_len = re->literal_string_len; compiler->last_result = yr_atoms_extract_from_string( literal_string, literal_string_len, string->g_flags, &atom_list); } else { compiler->last_result = yr_re_emit_code( re, compiler->re_code_arena); if (compiler->last_result != ERROR_SUCCESS) { string = NULL; goto _exit; } compiler->last_result = yr_atoms_extract_from_re( re, string->g_flags, &atom_list); } } else { string->g_flags |= STRING_GFLAGS_LITERAL; literal_string = (uint8_t*) str->c_string; literal_string_len = str->length; compiler->last_result = yr_atoms_extract_from_string( literal_string, literal_string_len, string->g_flags, &atom_list); } if (compiler->last_result != ERROR_SUCCESS) { string = NULL; goto _exit; } if (STRING_IS_LITERAL(string)) { compiler->last_result = yr_arena_write_data( compiler->sz_arena, literal_string, literal_string_len, (void*) &string->string); if (compiler->last_result != ERROR_SUCCESS) { string = NULL; goto _exit; } string->length = literal_string_len; } // Add the string to Aho-Corasick automaton. if (atom_list != NULL) { compiler->last_result = yr_ac_add_string( compiler->automaton_arena, compiler->automaton, string, atom_list); } else { compiler->last_result = yr_arena_allocate_struct( compiler->automaton_arena, sizeof(YR_AC_MATCH), (void**) &new_match, offsetof(YR_AC_MATCH, string), offsetof(YR_AC_MATCH, forward_code), offsetof(YR_AC_MATCH, backward_code), offsetof(YR_AC_MATCH, next), EOL); if (compiler->last_result == ERROR_SUCCESS) { new_match->backtrack = 0; new_match->string = string; new_match->forward_code = re->root_node->forward_code; new_match->backward_code = NULL; new_match->next = compiler->automaton->root->matches; compiler->automaton->root->matches = new_match; } } atom = atom_list; if (atom != NULL) min_atom_length = MAX_ATOM_LENGTH; else min_atom_length = 0; while (atom != NULL) { if (atom->atom_length < min_atom_length) min_atom_length = atom->atom_length; atom = atom->next; } if (STRING_IS_LITERAL(string)) { if (STRING_IS_WIDE(string)) max_string_len = string->length * 2; else max_string_len = string->length; if (max_string_len == min_atom_length) string->g_flags |= STRING_GFLAGS_FITS_IN_ATOM; } if (compiler->file_name_stack_ptr > 0) file_name = compiler->file_name_stack[compiler->file_name_stack_ptr - 1]; else file_name = NULL; if (min_atom_length < 2 && compiler->error_report_function != NULL) { snprintf( message, sizeof(message), "%s is slowing down scanning%s", string->identifier, min_atom_length == 0 ? " (critical!)" : ""); compiler->error_report_function( YARA_ERROR_LEVEL_WARNING, file_name, yyget_lineno(yyscanner), message); } if (compiler->last_result != ERROR_SUCCESS) string = NULL; _exit: if (atom_list != NULL) yr_atoms_list_destroy(atom_list); if (re != NULL) yr_re_destroy(re); return string; }