int yr_modules_unload_all( YR_SCAN_CONTEXT* context) { YR_OBJECT* module_structure; tidx_mask_t tidx_mask = 1 << yr_get_tidx(); int i; for (i = 0; i < sizeof(yr_modules_table) / sizeof(YR_MODULE); i++) { if (yr_modules_table[i].is_loaded & tidx_mask) { module_structure = (YR_OBJECT*) yr_hash_table_lookup( context->objects_table, yr_modules_table[i].name, NULL); assert(module_structure != NULL); yr_modules_table[i].unload(module_structure); yr_modules_table[i].is_loaded &= ~tidx_mask; } } return ERROR_SUCCESS; }
int yr_parser_reduce_import( yyscan_t yyscanner, SIZED_STRING* module_name) { YR_COMPILER* compiler = yyget_extra(yyscanner); YR_OBJECT* module_structure; char* name; module_structure = (YR_OBJECT*) yr_hash_table_lookup( compiler->objects_table, module_name->c_string, compiler->current_namespace->name); // if module already imported, do nothing if (module_structure != NULL) return ERROR_SUCCESS; compiler->last_result = yr_object_create( OBJECT_TYPE_STRUCTURE, module_name->c_string, NULL, &module_structure); if (compiler->last_result == ERROR_SUCCESS) compiler->last_result = yr_hash_table_add( compiler->objects_table, module_name->c_string, compiler->current_namespace->name, module_structure); if (compiler->last_result == ERROR_SUCCESS) { compiler->last_result = yr_modules_do_declarations( module_name->c_string, module_structure); if (compiler->last_result == ERROR_UNKNOWN_MODULE) yr_compiler_set_error_extra_info(compiler, module_name->c_string); } if (compiler->last_result == ERROR_SUCCESS) compiler->last_result = yr_arena_write_string( compiler->sz_arena, module_name->c_string, &name); if (compiler->last_result == ERROR_SUCCESS) compiler->last_result = yr_parser_emit_with_arg_reloc( yyscanner, OP_IMPORT, PTR_TO_INT64(name), NULL, NULL); return compiler->last_result; }
int yr_modules_unload_all( YR_SCAN_CONTEXT* context) { for (int i = 0; i < sizeof(yr_modules_table) / sizeof(YR_MODULE); i++) { YR_OBJECT* module_structure = (YR_OBJECT*) yr_hash_table_lookup( context->objects_table, yr_modules_table[i].name, NULL); if (module_structure != NULL) yr_modules_table[i].unload(module_structure); } return ERROR_SUCCESS; }
void yr_modules_print_data( YR_SCAN_CONTEXT* context) { for (int i = 0; i < sizeof(yr_modules_table) / sizeof(YR_MODULE); i++) { YR_OBJECT* module_structure = (YR_OBJECT*) yr_hash_table_lookup( context->objects_table, yr_modules_table[i].name, NULL); if (module_structure != NULL) { yr_object_print_data(module_structure, 0, 1); printf("\n"); } } }
void yr_modules_print_data( YR_SCAN_CONTEXT* context) { YR_OBJECT* module_structure; tidx_mask_t tidx_mask = 1 << yr_get_tidx(); for (int i = 0; i < sizeof(yr_modules_table) / sizeof(YR_MODULE); i++) { if (yr_modules_table[i].is_loaded & tidx_mask) { module_structure = (YR_OBJECT*) yr_hash_table_lookup( context->objects_table, yr_modules_table[i].name, NULL); assert(module_structure != NULL); yr_object_print_data(module_structure, 0); } } }
int yr_execute_code( YR_RULES* rules, YR_SCAN_CONTEXT* context, int timeout, time_t start_time) { int64_t mem[MEM_SIZE]; int64_t args[MAX_FUNCTION_ARGS]; int32_t sp = 0; uint8_t* ip = rules->code_start; STACK_ITEM *stack; STACK_ITEM r1; STACK_ITEM r2; STACK_ITEM r3; #ifdef PROFILING_ENABLED YR_RULE* current_rule = NULL; #endif YR_RULE* rule; YR_MATCH* match; YR_OBJECT_FUNCTION* function; char* identifier; char* args_fmt; int i; int found; int count; int result = ERROR_SUCCESS; int stop = FALSE; int cycle = 0; int tidx = context->tidx; #ifdef PROFILING_ENABLED clock_t start = clock(); #endif stack = (STACK_ITEM *) yr_malloc(STACK_SIZE * sizeof(STACK_ITEM)); if (stack == NULL) return ERROR_INSUFICIENT_MEMORY; while(!stop) { switch(*ip) { case OP_HALT: assert(sp == 0); // When HALT is reached the stack should be empty. stop = TRUE; break; case OP_PUSH: r1.i = *(uint64_t*)(ip + 1); ip += sizeof(uint64_t); push(r1); break; case OP_POP: pop(r1); break; case OP_CLEAR_M: r1.i = *(uint64_t*)(ip + 1); ip += sizeof(uint64_t); mem[r1.i] = 0; break; case OP_ADD_M: r1.i = *(uint64_t*)(ip + 1); ip += sizeof(uint64_t); pop(r2); if (!is_undef(r2)) mem[r1.i] += r2.i; break; case OP_INCR_M: r1.i = *(uint64_t*)(ip + 1); ip += sizeof(uint64_t); mem[r1.i]++; break; case OP_PUSH_M: r1.i = *(uint64_t*)(ip + 1); ip += sizeof(uint64_t); r1.i = mem[r1.i]; push(r1); break; case OP_POP_M: r1.i = *(uint64_t*)(ip + 1); ip += sizeof(uint64_t); pop(r2); mem[r1.i] = r2.i; break; case OP_SWAPUNDEF: r1.i = *(uint64_t*)(ip + 1); ip += sizeof(uint64_t); pop(r2); if (is_undef(r2)) { r1.i = mem[r1.i]; push(r1); } else { push(r2); } break; case OP_JNUNDEF: pop(r1); push(r1); ip = jmp_if(!is_undef(r1), ip); break; case OP_JLE: pop(r2); pop(r1); push(r1); push(r2); ip = jmp_if(r1.i <= r2.i, ip); break; case OP_JTRUE: pop(r1); push(r1); ip = jmp_if(!is_undef(r1) && r1.i, ip); break; case OP_JFALSE: pop(r1); push(r1); ip = jmp_if(is_undef(r1) || !r1.i, ip); break; case OP_AND: pop(r2); pop(r1); if (is_undef(r1) || is_undef(r2)) r1.i = 0; else r1.i = r1.i && r2.i; push(r1); break; case OP_OR: pop(r2); pop(r1); if (is_undef(r1)) { push(r2); } else if (is_undef(r2)) { push(r1); } else { r1.i = r1.i || r2.i; push(r1); } break; case OP_NOT: pop(r1); if (is_undef(r1)) r1.i = UNDEFINED; else r1.i= !r1.i; push(r1); break; case OP_MOD: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); if (r2.i != 0) r1.i = r1.i % r2.i; else r1.i = UNDEFINED; push(r1); break; case OP_SHR: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.i >> r2.i; push(r1); break; case OP_SHL: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.i << r2.i; push(r1); break; case OP_BITWISE_NOT: pop(r1); ensure_defined(r1); r1.i = ~r1.i; push(r1); break; case OP_BITWISE_AND: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.i & r2.i; push(r1); break; case OP_BITWISE_OR: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.i | r2.i; push(r1); break; case OP_BITWISE_XOR: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.i ^ r2.i; push(r1); break; case OP_PUSH_RULE: rule = *(YR_RULE**)(ip + 1); ip += sizeof(uint64_t); r1.i = rule->t_flags[tidx] & RULE_TFLAGS_MATCH ? 1 : 0; push(r1); break; case OP_INIT_RULE: #ifdef PROFILING_ENABLED current_rule = *(YR_RULE**)(ip + 1); #endif ip += sizeof(uint64_t); break; case OP_MATCH_RULE: pop(r1); rule = *(YR_RULE**)(ip + 1); ip += sizeof(uint64_t); if (!is_undef(r1) && r1.i) rule->t_flags[tidx] |= RULE_TFLAGS_MATCH; #ifdef PROFILING_ENABLED rule->clock_ticks += clock() - start; start = clock(); #endif break; case OP_OBJ_LOAD: identifier = *(char**)(ip + 1); ip += sizeof(uint64_t); r1.o = (YR_OBJECT*) yr_hash_table_lookup( context->objects_table, identifier, NULL); assert(r1.o != NULL); push(r1); break; case OP_OBJ_FIELD: identifier = *(char**)(ip + 1); ip += sizeof(uint64_t); pop(r1); ensure_defined(r1); r1.o = yr_object_lookup_field(r1.o, identifier); assert(r1.o != NULL); push(r1); break; case OP_OBJ_VALUE: pop(r1); ensure_defined(r1); switch(r1.o->type) { case OBJECT_TYPE_INTEGER: r1.i = ((YR_OBJECT_INTEGER*) r1.o)->value; break; case OBJECT_TYPE_FLOAT: if (isnan(((YR_OBJECT_DOUBLE*) r1.o)->value)) r1.i = UNDEFINED; else r1.d = ((YR_OBJECT_DOUBLE*) r1.o)->value; break; case OBJECT_TYPE_STRING: if (((YR_OBJECT_STRING*) r1.o)->value == NULL) r1.i = UNDEFINED; else r1.p = ((YR_OBJECT_STRING*) r1.o)->value; break; default: assert(FALSE); } push(r1); break; case OP_INDEX_ARRAY: pop(r1); // index pop(r2); // array ensure_defined(r1); assert(r2.o->type == OBJECT_TYPE_ARRAY); r1.o = yr_object_array_get_item(r2.o, 0, (int) r1.i); if (r1.o == NULL) r1.i = UNDEFINED; push(r1); break; case OP_LOOKUP_DICT: pop(r1); // key pop(r2); // dictionary ensure_defined(r1); assert(r2.o->type == OBJECT_TYPE_DICTIONARY); r1.o = yr_object_dict_get_item( r2.o, 0, r1.ss->c_string); if (r1.o == NULL) r1.i = UNDEFINED; push(r1); break; case OP_CALL: args_fmt = *(char**)(ip + 1); ip += sizeof(uint64_t); i = (int) strlen(args_fmt); count = 0; // pop arguments from stack and copy them to args array while (i > 0) { pop(r1); if (is_undef(r1)) // count the number of undefined args count++; args[i - 1] = r1.i; i--; } pop(r2); ensure_defined(r2); if (count > 0) { // if there are undefined args, result for function call // is undefined as well. r1.i = UNDEFINED; push(r1); break; } function = (YR_OBJECT_FUNCTION*) r2.o; result = ERROR_INTERNAL_FATAL_ERROR; for (i = 0; i < MAX_OVERLOADED_FUNCTIONS; i++) { if (function->prototypes[i].arguments_fmt == NULL) break; if (strcmp(function->prototypes[i].arguments_fmt, args_fmt) == 0) { result = function->prototypes[i].code( (void*) args, context, function); break; } } assert(i < MAX_OVERLOADED_FUNCTIONS); if (result == ERROR_SUCCESS) { r1.o = function->return_obj; push(r1); } else { stop = TRUE; } break; case OP_FOUND: pop(r1); r1.i = r1.s->matches[tidx].tail != NULL ? 1 : 0; push(r1); break; case OP_FOUND_AT: pop(r2); pop(r1); if (is_undef(r1)) { r1.i = 0; push(r1); break; } match = r2.s->matches[tidx].head; r3.i = FALSE; while (match != NULL) { if (r1.i == match->base + match->offset) { r3.i = TRUE; break; } if (r1.i < match->base + match->offset) break; match = match->next; } push(r3); break; case OP_FOUND_IN: pop(r3); pop(r2); pop(r1); ensure_defined(r1); ensure_defined(r2); match = r3.s->matches[tidx].head; r3.i = FALSE; while (match != NULL && !r3.i) { if (match->base + match->offset >= r1.i && match->base + match->offset <= r2.i) { r3.i = TRUE; } if (match->base + match->offset > r2.i) break; match = match->next; } push(r3); break; case OP_COUNT: pop(r1); r1.i = r1.s->matches[tidx].count; push(r1); break; case OP_OFFSET: pop(r2); pop(r1); ensure_defined(r1); match = r2.s->matches[tidx].head; i = 1; r3.i = UNDEFINED; while (match != NULL && r3.i == UNDEFINED) { if (r1.i == i) r3.i = match->base + match->offset; i++; match = match->next; } push(r3); break; case OP_LENGTH: pop(r2); pop(r1); ensure_defined(r1); match = r2.s->matches[tidx].head; i = 1; r3.i = UNDEFINED; while (match != NULL && r3.i == UNDEFINED) { if (r1.i == i) r3.i = match->length; i++; match = match->next; } push(r3); break; case OP_OF: found = 0; count = 0; pop(r1); while (!is_undef(r1)) { if (r1.s->matches[tidx].tail != NULL) found++; count++; pop(r1); } pop(r2); if (is_undef(r2)) r1.i = found >= count ? 1 : 0; else r1.i = found >= r2.i ? 1 : 0; push(r1); break; case OP_FILESIZE: r1.i = context->file_size; push(r1); break; case OP_ENTRYPOINT: r1.i = context->entry_point; push(r1); break; case OP_INT8: pop(r1); r1.i = read_int8_t_little_endian(context->mem_block, (size_t) r1.i); push(r1); break; case OP_INT16: pop(r1); r1.i = read_int16_t_little_endian(context->mem_block, (size_t) r1.i); push(r1); break; case OP_INT32: pop(r1); r1.i = read_int32_t_little_endian(context->mem_block, (size_t) r1.i); push(r1); break; case OP_UINT8: pop(r1); r1.i = read_uint8_t_little_endian(context->mem_block, (size_t) r1.i); push(r1); break; case OP_UINT16: pop(r1); r1.i = read_uint16_t_little_endian(context->mem_block, (size_t) r1.i); push(r1); break; case OP_UINT32: pop(r1); r1.i = read_uint32_t_little_endian(context->mem_block, (size_t) r1.i); push(r1); break; case OP_INT8BE: pop(r1); r1.i = read_int8_t_big_endian(context->mem_block, (size_t) r1.i); push(r1); break; case OP_INT16BE: pop(r1); r1.i = read_int16_t_big_endian(context->mem_block, (size_t) r1.i); push(r1); break; case OP_INT32BE: pop(r1); r1.i = read_int32_t_big_endian(context->mem_block, (size_t) r1.i); push(r1); break; case OP_UINT8BE: pop(r1); r1.i = read_uint8_t_big_endian(context->mem_block, (size_t) r1.i); push(r1); break; case OP_UINT16BE: pop(r1); r1.i = read_uint16_t_big_endian(context->mem_block, (size_t) r1.i); push(r1); break; case OP_UINT32BE: pop(r1); r1.i = read_uint32_t_big_endian(context->mem_block, (size_t) r1.i); push(r1); break; case OP_CONTAINS: pop(r2); pop(r1); ensure_defined(r1); ensure_defined(r2); r1.i = memmem(r1.ss->c_string, r1.ss->length, r2.ss->c_string, r2.ss->length) != NULL; push(r1); break; case OP_IMPORT: r1.i = *(uint64_t*)(ip + 1); ip += sizeof(uint64_t); FAIL_ON_ERROR(yr_modules_load( (char*) r1.p, context)); break; case OP_MATCHES: pop(r2); pop(r1); if (r1.ss->length == 0) { r1.i = FALSE; push(r1); break; } r1.i = yr_re_exec( (uint8_t*) r2.p, (uint8_t*) r1.ss->c_string, r1.ss->length, RE_FLAGS_SCAN, NULL, NULL) >= 0; push(r1); break; case OP_INT_TO_DBL: r1.i = *(uint64_t*)(ip + 1); ip += sizeof(uint64_t); r2 = stack[sp - r1.i]; if (is_undef(r2)) stack[sp - r1.i].i = UNDEFINED; else stack[sp - r1.i].d = r2.i; break; case OP_STR_TO_BOOL: pop(r1); ensure_defined(r1); r1.i = r1.ss->length > 0; push(r1); break; case OP_INT_EQ: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.i == r2.i; push(r1); break; case OP_INT_NEQ: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.i != r2.i; push(r1); break; case OP_INT_LT: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.i < r2.i; push(r1); break; case OP_INT_GT: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.i > r2.i; push(r1); break; case OP_INT_LE: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.i <= r2.i; push(r1); break; case OP_INT_GE: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.i >= r2.i; push(r1); break; case OP_INT_ADD: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.i + r2.i; push(r1); break; case OP_INT_SUB: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.i - r2.i; push(r1); break; case OP_INT_MUL: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.i * r2.i; push(r1); break; case OP_INT_DIV: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); if (r2.i != 0) r1.i = r1.i / r2.i; else r1.i = UNDEFINED; push(r1); break; case OP_INT_MINUS: pop(r1); ensure_defined(r1); r1.i = -r1.i; push(r1); break; case OP_DBL_LT: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.d < r2.d; push(r1); break; case OP_DBL_GT: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.d > r2.d; push(r1); break; case OP_DBL_LE: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.d <= r2.d; push(r1); break; case OP_DBL_GE: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.d >= r2.d; push(r1); break; case OP_DBL_EQ: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.d == r2.d; push(r1); break; case OP_DBL_NEQ: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.d != r2.d; push(r1); break; case OP_DBL_ADD: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.d = r1.d + r2.d; push(r1); break; case OP_DBL_SUB: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.d = r1.d - r2.d; push(r1); break; case OP_DBL_MUL: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.d = r1.d * r2.d; push(r1); break; case OP_DBL_DIV: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.d = r1.d / r2.d; push(r1); break; case OP_DBL_MINUS: pop(r1); ensure_defined(r1); r1.d = -r1.d; push(r1); break; case OP_STR_EQ: case OP_STR_NEQ: case OP_STR_LT: case OP_STR_LE: case OP_STR_GT: case OP_STR_GE: pop(r2); pop(r1); ensure_defined(r1); ensure_defined(r2); switch(*ip) { case OP_STR_EQ: r1.i = (sized_string_cmp(r1.ss, r2.ss) == 0); break; case OP_STR_NEQ: r1.i = (sized_string_cmp(r1.ss, r2.ss) != 0); break; case OP_STR_LT: r1.i = (sized_string_cmp(r1.ss, r2.ss) < 0); break; case OP_STR_LE: r1.i = (sized_string_cmp(r1.ss, r2.ss) <= 0); break; case OP_STR_GT: r1.i = (sized_string_cmp(r1.ss, r2.ss) > 0); break; case OP_STR_GE: r1.i = (sized_string_cmp(r1.ss, r2.ss) >= 0); break; } push(r1); break; default: // Unknown instruction, this shouldn't happen. assert(FALSE); } if (timeout > 0) // timeout == 0 means no timeout { // Check for timeout every 10 instruction cycles. if (++cycle == 10) { if (difftime(time(NULL), start_time) > timeout) { #ifdef PROFILING_ENABLED assert(current_rule != NULL); current_rule->clock_ticks += clock() - start; #endif result = ERROR_SCAN_TIMEOUT; stop = TRUE; } cycle = 0; } } ip++; } yr_free(stack); return result; }
int yr_modules_load( const char* module_name, YR_SCAN_CONTEXT* context) { int i, result; YR_MODULE_IMPORT mi; YR_OBJECT* module_structure = (YR_OBJECT*) yr_hash_table_lookup( context->objects_table, module_name, NULL); // if module_structure != NULL, the module was already // loaded, return successfully without doing nothing. if (module_structure != NULL) return ERROR_SUCCESS; // not loaded yet FAIL_ON_ERROR(yr_object_create( OBJECT_TYPE_STRUCTURE, module_name, NULL, &module_structure)); mi.module_name = module_name; mi.module_data = NULL; mi.module_data_size = 0; result = context->callback( CALLBACK_MSG_IMPORT_MODULE, &mi, context->user_data); if (result == CALLBACK_ERROR) { yr_object_destroy(module_structure); return ERROR_CALLBACK_ERROR; } FAIL_ON_ERROR_WITH_CLEANUP( yr_modules_do_declarations(module_name, module_structure), yr_object_destroy(module_structure)); FAIL_ON_ERROR_WITH_CLEANUP( yr_hash_table_add( context->objects_table, module_name, NULL, module_structure), yr_object_destroy(module_structure)); for (i = 0; i < sizeof(yr_modules_table) / sizeof(YR_MODULE); i++) { if (strcmp(yr_modules_table[i].name, module_name) == 0) { result = yr_modules_table[i].load( context, module_structure, mi.module_data, mi.module_data_size); if (result != ERROR_SUCCESS) return result; } } result = context->callback( CALLBACK_MSG_MODULE_IMPORTED, module_structure, context->user_data); return ERROR_SUCCESS; }
int yr_execute_code( YR_SCAN_CONTEXT* context) { int64_t mem[MEM_SIZE]; int32_t sp = 0; const uint8_t* ip = context->rules->code_start; YR_VALUE args[YR_MAX_FUNCTION_ARGS]; YR_VALUE *stack; YR_VALUE r1; YR_VALUE r2; YR_VALUE r3; uint64_t elapsed_time; #ifdef PROFILING_ENABLED uint64_t start_time; YR_RULE* current_rule = NULL; #endif YR_INIT_RULE_ARGS init_rule_args; YR_RULE* rule; YR_MATCH* match; YR_OBJECT_FUNCTION* function; YR_OBJECT** obj_ptr; YR_ARENA* obj_arena; char* identifier; char* args_fmt; int i; int found; int count; int result = ERROR_SUCCESS; int cycle = 0; int tidx = context->tidx; int stack_size; bool stop = false; uint8_t opcode; yr_get_configuration(YR_CONFIG_STACK_SIZE, (void*) &stack_size); stack = (YR_VALUE*) yr_malloc(stack_size * sizeof(YR_VALUE)); if (stack == NULL) return ERROR_INSUFFICIENT_MEMORY; FAIL_ON_ERROR_WITH_CLEANUP( yr_arena_create(1024, 0, &obj_arena), yr_free(stack)); #ifdef PROFILING_ENABLED start_time = yr_stopwatch_elapsed_us(&context->stopwatch); #endif #if PARANOID_EXEC memset(mem, 0, MEM_SIZE * sizeof(mem[0])); #endif while(!stop) { opcode = *ip; ip++; switch(opcode) { case OP_NOP: break; case OP_HALT: assert(sp == 0); // When HALT is reached the stack should be empty. stop = true; break; case OP_PUSH: r1.i = *(uint64_t*)(ip); ip += sizeof(uint64_t); push(r1); break; case OP_POP: pop(r1); break; case OP_CLEAR_M: r1.i = *(uint64_t*)(ip); ip += sizeof(uint64_t); #if PARANOID_EXEC ensure_within_mem(r1.i); #endif mem[r1.i] = 0; break; case OP_ADD_M: r1.i = *(uint64_t*)(ip); ip += sizeof(uint64_t); #if PARANOID_EXEC ensure_within_mem(r1.i); #endif pop(r2); if (!is_undef(r2)) mem[r1.i] += r2.i; break; case OP_INCR_M: r1.i = *(uint64_t*)(ip); ip += sizeof(uint64_t); #if PARANOID_EXEC ensure_within_mem(r1.i); #endif mem[r1.i]++; break; case OP_PUSH_M: r1.i = *(uint64_t*)(ip); ip += sizeof(uint64_t); #if PARANOID_EXEC ensure_within_mem(r1.i); #endif r1.i = mem[r1.i]; push(r1); break; case OP_POP_M: r1.i = *(uint64_t*)(ip); ip += sizeof(uint64_t); #if PARANOID_EXEC ensure_within_mem(r1.i); #endif pop(r2); mem[r1.i] = r2.i; break; case OP_SET_M: r1.i = *(uint64_t*)(ip); ip += sizeof(uint64_t); #if PARANOID_EXEC ensure_within_mem(r1.i); #endif pop(r2); push(r2); if (!is_undef(r2)) mem[r1.i] = r2.i; break; case OP_SWAPUNDEF: r1.i = *(uint64_t*)(ip); ip += sizeof(uint64_t); #if PARANOID_EXEC ensure_within_mem(r1.i); #endif pop(r2); if (is_undef(r2)) { r1.i = mem[r1.i]; push(r1); } else { push(r2); } break; case OP_JNUNDEF: pop(r1); push(r1); ip = jmp_if(!is_undef(r1), ip); break; case OP_JLE_P: pop(r2); pop(r1); ip = jmp_if(r1.i <= r2.i, ip); break; case OP_JTRUE: pop(r1); push(r1); ip = jmp_if(!is_undef(r1) && r1.i, ip); break; case OP_JFALSE: pop(r1); push(r1); ip = jmp_if(is_undef(r1) || !r1.i, ip); break; case OP_JFALSE_P: pop(r1); ip = jmp_if(is_undef(r1) || !r1.i, ip); break; case OP_AND: pop(r2); pop(r1); if (is_undef(r1) || is_undef(r2)) r1.i = 0; else r1.i = r1.i && r2.i; push(r1); break; case OP_OR: pop(r2); pop(r1); if (is_undef(r1)) { push(r2); } else if (is_undef(r2)) { push(r1); } else { r1.i = r1.i || r2.i; push(r1); } break; case OP_NOT: pop(r1); if (is_undef(r1)) r1.i = UNDEFINED; else r1.i = !r1.i; push(r1); break; case OP_MOD: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); if (r2.i != 0) r1.i = r1.i % r2.i; else r1.i = UNDEFINED; push(r1); break; case OP_SHR: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); if (r2.i < 0) r1.i = UNDEFINED; else if (r2.i < 64) r1.i = r1.i >> r2.i; else r1.i = 0; push(r1); break; case OP_SHL: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); if (r2.i < 0) r1.i = UNDEFINED; else if (r2.i < 64) r1.i = r1.i << r2.i; else r1.i = 0; push(r1); break; case OP_BITWISE_NOT: pop(r1); ensure_defined(r1); r1.i = ~r1.i; push(r1); break; case OP_BITWISE_AND: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.i & r2.i; push(r1); break; case OP_BITWISE_OR: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.i | r2.i; push(r1); break; case OP_BITWISE_XOR: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.i ^ r2.i; push(r1); break; case OP_PUSH_RULE: rule = *(YR_RULE**)(ip); ip += sizeof(uint64_t); if (RULE_IS_DISABLED(rule)) r1.i = UNDEFINED; else r1.i = rule->t_flags[tidx] & RULE_TFLAGS_MATCH ? 1 : 0; push(r1); break; case OP_INIT_RULE: memcpy(&init_rule_args, ip, sizeof(init_rule_args)); #ifdef PROFILING_ENABLED current_rule = init_rule_args.rule; #endif if (RULE_IS_DISABLED(init_rule_args.rule)) ip = init_rule_args.jmp_addr; else ip += sizeof(init_rule_args); break; case OP_MATCH_RULE: pop(r1); rule = *(YR_RULE**)(ip); ip += sizeof(uint64_t); if (!is_undef(r1) && r1.i) rule->t_flags[tidx] |= RULE_TFLAGS_MATCH; else if (RULE_IS_GLOBAL(rule)) rule->ns->t_flags[tidx] |= NAMESPACE_TFLAGS_UNSATISFIED_GLOBAL; #ifdef PROFILING_ENABLED elapsed_time = yr_stopwatch_elapsed_us(&context->stopwatch); rule->time_cost_per_thread[tidx] += (elapsed_time - start_time); start_time = elapsed_time; #endif assert(sp == 0); // at this point the stack should be empty. break; case OP_OBJ_LOAD: identifier = *(char**)(ip); ip += sizeof(uint64_t); r1.o = (YR_OBJECT*) yr_hash_table_lookup( context->objects_table, identifier, NULL); assert(r1.o != NULL); push(r1); break; case OP_OBJ_FIELD: identifier = *(char**)(ip); ip += sizeof(uint64_t); pop(r1); ensure_defined(r1); r1.o = yr_object_lookup_field(r1.o, identifier); assert(r1.o != NULL); push(r1); break; case OP_OBJ_VALUE: pop(r1); ensure_defined(r1); #if PARANOID_EXEC check_object_canary(r1.o); #endif switch(r1.o->type) { case OBJECT_TYPE_INTEGER: r1.i = r1.o->value.i; break; case OBJECT_TYPE_FLOAT: if (isnan(r1.o->value.d)) r1.i = UNDEFINED; else r1.d = r1.o->value.d; break; case OBJECT_TYPE_STRING: if (r1.o->value.ss == NULL) r1.i = UNDEFINED; else r1.ss = r1.o->value.ss; break; default: assert(false); } push(r1); break; case OP_INDEX_ARRAY: pop(r1); // index pop(r2); // array ensure_defined(r1); ensure_defined(r2); assert(r2.o->type == OBJECT_TYPE_ARRAY); #if PARANOID_EXEC check_object_canary(r2.o); #endif r1.o = yr_object_array_get_item(r2.o, 0, (int) r1.i); if (r1.o == NULL) r1.i = UNDEFINED; push(r1); break; case OP_LOOKUP_DICT: pop(r1); // key pop(r2); // dictionary ensure_defined(r1); ensure_defined(r2); assert(r2.o->type == OBJECT_TYPE_DICTIONARY); #if PARANOID_EXEC check_object_canary(r2.o); #endif r1.o = yr_object_dict_get_item( r2.o, 0, r1.ss->c_string); if (r1.o == NULL) r1.i = UNDEFINED; push(r1); break; case OP_CALL: args_fmt = *(char**)(ip); ip += sizeof(uint64_t); i = (int) strlen(args_fmt); count = 0; #if PARANOID_EXEC if (i > YR_MAX_FUNCTION_ARGS) { stop = true; result = ERROR_INTERNAL_FATAL_ERROR; break; } #endif // pop arguments from stack and copy them to args array while (i > 0) { pop(r1); if (is_undef(r1)) // count the number of undefined args count++; args[i - 1] = r1; i--; } pop(r2); ensure_defined(r2); #if PARANOID_EXEC check_object_canary(r2.o); #endif if (count > 0) { // if there are undefined args, result for function call // is undefined as well. r1.i = UNDEFINED; push(r1); break; } function = object_as_function(r2.o); result = ERROR_INTERNAL_FATAL_ERROR; for (i = 0; i < YR_MAX_OVERLOADED_FUNCTIONS; i++) { if (function->prototypes[i].arguments_fmt == NULL) break; if (strcmp(function->prototypes[i].arguments_fmt, args_fmt) == 0) { result = function->prototypes[i].code(args, context, function); break; } } // if i == YR_MAX_OVERLOADED_FUNCTIONS at this point no matching // prototype was found, but this shouldn't happen. assert(i < YR_MAX_OVERLOADED_FUNCTIONS); // make a copy of the returned object and push the copy into the stack // function->return_obj can't be pushed because it can change in // subsequent calls to the same function. if (result == ERROR_SUCCESS) result = yr_object_copy(function->return_obj, &r1.o); // a pointer to the copied object is stored in a arena in order to // free the object before exiting yr_execute_code if (result == ERROR_SUCCESS) result = yr_arena_write_data(obj_arena, &r1.o, sizeof(r1.o), NULL); stop = (result != ERROR_SUCCESS); push(r1); break; case OP_FOUND: pop(r1); r1.i = r1.s->matches[tidx].tail != NULL ? 1 : 0; push(r1); break; case OP_FOUND_AT: pop(r2); pop(r1); if (is_undef(r1)) { r1.i = 0; push(r1); break; } match = r2.s->matches[tidx].head; r3.i = false; while (match != NULL) { if (r1.i == match->base + match->offset) { r3.i = true; break; } if (r1.i < match->base + match->offset) break; match = match->next; } push(r3); break; case OP_FOUND_IN: pop(r3); pop(r2); pop(r1); ensure_defined(r1); ensure_defined(r2); match = r3.s->matches[tidx].head; r3.i = false; while (match != NULL && !r3.i) { if (match->base + match->offset >= r1.i && match->base + match->offset <= r2.i) { r3.i = true; } if (match->base + match->offset > r2.i) break; match = match->next; } push(r3); break; case OP_COUNT: pop(r1); #if PARANOID_EXEC // Make sure that the string pointer is within the rules arena. if (yr_arena_page_for_address(context->rules->arena, r1.p) == NULL) return ERROR_INTERNAL_FATAL_ERROR; #endif r1.i = r1.s->matches[tidx].count; push(r1); break; case OP_OFFSET: pop(r2); pop(r1); ensure_defined(r1); match = r2.s->matches[tidx].head; i = 1; r3.i = UNDEFINED; while (match != NULL && r3.i == UNDEFINED) { if (r1.i == i) r3.i = match->base + match->offset; i++; match = match->next; } push(r3); break; case OP_LENGTH: pop(r2); pop(r1); ensure_defined(r1); match = r2.s->matches[tidx].head; i = 1; r3.i = UNDEFINED; while (match != NULL && r3.i == UNDEFINED) { if (r1.i == i) r3.i = match->match_length; i++; match = match->next; } push(r3); break; case OP_OF: found = 0; count = 0; pop(r1); while (!is_undef(r1)) { if (r1.s->matches[tidx].tail != NULL) found++; count++; pop(r1); } pop(r2); if (is_undef(r2)) r1.i = found >= count ? 1 : 0; else r1.i = found >= r2.i ? 1 : 0; push(r1); break; case OP_FILESIZE: r1.i = context->file_size; push(r1); break; case OP_ENTRYPOINT: r1.i = context->entry_point; push(r1); break; case OP_INT8: pop(r1); r1.i = read_int8_t_little_endian(context->iterator, (size_t) r1.i); push(r1); break; case OP_INT16: pop(r1); r1.i = read_int16_t_little_endian(context->iterator, (size_t) r1.i); push(r1); break; case OP_INT32: pop(r1); r1.i = read_int32_t_little_endian(context->iterator, (size_t) r1.i); push(r1); break; case OP_UINT8: pop(r1); r1.i = read_uint8_t_little_endian(context->iterator, (size_t) r1.i); push(r1); break; case OP_UINT16: pop(r1); r1.i = read_uint16_t_little_endian(context->iterator, (size_t) r1.i); push(r1); break; case OP_UINT32: pop(r1); r1.i = read_uint32_t_little_endian(context->iterator, (size_t) r1.i); push(r1); break; case OP_INT8BE: pop(r1); r1.i = read_int8_t_big_endian(context->iterator, (size_t) r1.i); push(r1); break; case OP_INT16BE: pop(r1); r1.i = read_int16_t_big_endian(context->iterator, (size_t) r1.i); push(r1); break; case OP_INT32BE: pop(r1); r1.i = read_int32_t_big_endian(context->iterator, (size_t) r1.i); push(r1); break; case OP_UINT8BE: pop(r1); r1.i = read_uint8_t_big_endian(context->iterator, (size_t) r1.i); push(r1); break; case OP_UINT16BE: pop(r1); r1.i = read_uint16_t_big_endian(context->iterator, (size_t) r1.i); push(r1); break; case OP_UINT32BE: pop(r1); r1.i = read_uint32_t_big_endian(context->iterator, (size_t) r1.i); push(r1); break; case OP_CONTAINS: pop(r2); pop(r1); ensure_defined(r1); ensure_defined(r2); r1.i = memmem(r1.ss->c_string, r1.ss->length, r2.ss->c_string, r2.ss->length) != NULL; push(r1); break; case OP_IMPORT: r1.i = *(uint64_t*)(ip); ip += sizeof(uint64_t); result = yr_modules_load((char*) r1.p, context); if (result != ERROR_SUCCESS) stop = true; break; case OP_MATCHES: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); if (r1.ss->length == 0) { r1.i = false; push(r1); break; } result = yr_re_exec( context, (uint8_t*) r2.re->code, (uint8_t*) r1.ss->c_string, r1.ss->length, 0, r2.re->flags | RE_FLAGS_SCAN, NULL, NULL, &found); if (result != ERROR_SUCCESS) stop = true; r1.i = found >= 0; push(r1); break; case OP_INT_TO_DBL: r1.i = *(uint64_t*)(ip); ip += sizeof(uint64_t); #if PARANOID_EXEC if (r1.i > sp || sp - r1.i >= stack_size) { stop = true; result = ERROR_INTERNAL_FATAL_ERROR; break; } #endif r2 = stack[sp - r1.i]; if (is_undef(r2)) stack[sp - r1.i].i = UNDEFINED; else stack[sp - r1.i].d = (double) r2.i; break; case OP_STR_TO_BOOL: pop(r1); ensure_defined(r1); r1.i = r1.ss->length > 0; push(r1); break; case OP_INT_EQ: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.i == r2.i; push(r1); break; case OP_INT_NEQ: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.i != r2.i; push(r1); break; case OP_INT_LT: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.i < r2.i; push(r1); break; case OP_INT_GT: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.i > r2.i; push(r1); break; case OP_INT_LE: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.i <= r2.i; push(r1); break; case OP_INT_GE: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.i >= r2.i; push(r1); break; case OP_INT_ADD: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.i + r2.i; push(r1); break; case OP_INT_SUB: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.i - r2.i; push(r1); break; case OP_INT_MUL: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.i * r2.i; push(r1); break; case OP_INT_DIV: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); if (r2.i != 0) r1.i = r1.i / r2.i; else r1.i = UNDEFINED; push(r1); break; case OP_INT_MINUS: pop(r1); ensure_defined(r1); r1.i = -r1.i; push(r1); break; case OP_DBL_LT: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.d < r2.d; push(r1); break; case OP_DBL_GT: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.d > r2.d; push(r1); break; case OP_DBL_LE: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.d <= r2.d; push(r1); break; case OP_DBL_GE: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = r1.d >= r2.d; push(r1); break; case OP_DBL_EQ: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = fabs(r1.d - r2.d) < DBL_EPSILON; push(r1); break; case OP_DBL_NEQ: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.i = fabs(r1.d - r2.d) >= DBL_EPSILON; push(r1); break; case OP_DBL_ADD: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.d = r1.d + r2.d; push(r1); break; case OP_DBL_SUB: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.d = r1.d - r2.d; push(r1); break; case OP_DBL_MUL: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.d = r1.d * r2.d; push(r1); break; case OP_DBL_DIV: pop(r2); pop(r1); ensure_defined(r2); ensure_defined(r1); r1.d = r1.d / r2.d; push(r1); break; case OP_DBL_MINUS: pop(r1); ensure_defined(r1); r1.d = -r1.d; push(r1); break; case OP_STR_EQ: case OP_STR_NEQ: case OP_STR_LT: case OP_STR_LE: case OP_STR_GT: case OP_STR_GE: pop(r2); pop(r1); ensure_defined(r1); ensure_defined(r2); switch(opcode) { case OP_STR_EQ: r1.i = (sized_string_cmp(r1.ss, r2.ss) == 0); break; case OP_STR_NEQ: r1.i = (sized_string_cmp(r1.ss, r2.ss) != 0); break; case OP_STR_LT: r1.i = (sized_string_cmp(r1.ss, r2.ss) < 0); break; case OP_STR_LE: r1.i = (sized_string_cmp(r1.ss, r2.ss) <= 0); break; case OP_STR_GT: r1.i = (sized_string_cmp(r1.ss, r2.ss) > 0); break; case OP_STR_GE: r1.i = (sized_string_cmp(r1.ss, r2.ss) >= 0); break; } push(r1); break; default: // Unknown instruction, this shouldn't happen. assert(false); } // Check for timeout every 10 instruction cycles. If timeout == 0 it means // no timeout at all. if (context->timeout > 0L && ++cycle == 10) { elapsed_time = yr_stopwatch_elapsed_us(&context->stopwatch); if (elapsed_time > context->timeout) { #ifdef PROFILING_ENABLED assert(current_rule != NULL); current_rule->time_cost_per_thread[tidx] += elapsed_time - start_time; #endif result = ERROR_SCAN_TIMEOUT; stop = true; } cycle = 0; } }
int yr_parser_reduce_rule_declaration( yyscan_t yyscanner, int32_t flags, const char* identifier, char* tags, YR_STRING* strings, YR_META* metas) { YR_COMPILER* compiler = yyget_extra(yyscanner); YR_RULE* rule; YR_STRING* string; if (yr_hash_table_lookup( compiler->rules_table, identifier, compiler->current_namespace->name) != NULL) { // A rule with the same identifier already exists, return the // appropriate error. yr_compiler_set_error_extra_info(compiler, identifier); compiler->last_result = ERROR_DUPLICATE_RULE_IDENTIFIER; return compiler->last_result; } // Check for unreferenced (unused) strings. string = compiler->current_rule_strings; while(!STRING_IS_NULL(string)) { // Only the heading fragment in a chain of strings (the one with // chained_to == NULL) must be referenced. All other fragments // are never marked as referenced. if (!STRING_IS_REFERENCED(string) && string->chained_to == NULL) { yr_compiler_set_error_extra_info(compiler, string->identifier); compiler->last_result = ERROR_UNREFERENCED_STRING; break; } string = yr_arena_next_address( compiler->strings_arena, string, sizeof(YR_STRING)); } if (compiler->last_result != ERROR_SUCCESS) return compiler->last_result; compiler->last_result = yr_arena_allocate_struct( compiler->rules_arena, sizeof(YR_RULE), (void**) &rule, offsetof(YR_RULE, identifier), offsetof(YR_RULE, tags), offsetof(YR_RULE, strings), offsetof(YR_RULE, metas), offsetof(YR_RULE, ns), EOL); if (compiler->last_result != ERROR_SUCCESS) return compiler->last_result; compiler->last_result = yr_arena_write_string( compiler->sz_arena, identifier, &rule->identifier); if (compiler->last_result != ERROR_SUCCESS) return compiler->last_result; compiler->last_result = yr_parser_emit_with_arg_reloc( yyscanner, RULE_POP, PTR_TO_UINT64(rule), NULL); if (compiler->last_result != ERROR_SUCCESS) return compiler->last_result; rule->g_flags = flags | compiler->current_rule_flags; rule->tags = tags; rule->strings = strings; rule->metas = metas; rule->ns = compiler->current_namespace; compiler->current_rule_flags = 0; compiler->current_rule_strings = NULL; yr_hash_table_add( compiler->rules_table, identifier, compiler->current_namespace->name, (void*) rule); return compiler->last_result; }
int yr_execute_code( YR_RULES* rules, YR_SCAN_CONTEXT* context, int timeout, time_t start_time) { int64_t r1; int64_t r2; int64_t r3; int64_t mem[MEM_SIZE]; int64_t stack[STACK_SIZE]; int64_t args[MAX_FUNCTION_ARGS]; int32_t sp = 0; uint8_t* ip = rules->code_start; YR_RULE* rule; YR_STRING* string; YR_MATCH* match; YR_OBJECT* object; YR_OBJECT_FUNCTION* function; char* identifier; char* args_fmt; int i; int found; int count; int result; int cycle = 0; int tidx = yr_get_tidx(); #ifdef PROFILING_ENABLED clock_t start = clock(); #endif while(1) { switch(*ip) { case OP_HALT: // When the halt instruction is reached the stack // should be empty. assert(sp == 0); return ERROR_SUCCESS; case OP_PUSH: r1 = *(uint64_t*)(ip + 1); ip += sizeof(uint64_t); push(r1); break; case OP_POP: pop(r1); break; case OP_CLEAR_M: r1 = *(uint64_t*)(ip + 1); ip += sizeof(uint64_t); mem[r1] = 0; break; case OP_ADD_M: r1 = *(uint64_t*)(ip + 1); ip += sizeof(uint64_t); pop(r2); mem[r1] += r2; break; case OP_INCR_M: r1 = *(uint64_t*)(ip + 1); ip += sizeof(uint64_t); mem[r1]++; break; case OP_PUSH_M: r1 = *(uint64_t*)(ip + 1); ip += sizeof(uint64_t); push(mem[r1]); break; case OP_POP_M: r1 = *(uint64_t*)(ip + 1); ip += sizeof(uint64_t); pop(mem[r1]); break; case OP_SWAPUNDEF: r1 = *(uint64_t*)(ip + 1); ip += sizeof(uint64_t); pop(r2); if (r2 != UNDEFINED) push(r2); else push(mem[r1]); break; case OP_JNUNDEF: pop(r1); push(r1); if (r1 != UNDEFINED) { ip = *(uint8_t**)(ip + 1); // ip will be incremented at the end of the loop, // decrement it here to compensate. ip--; } else { ip += sizeof(uint64_t); } break; case OP_JLE: pop(r2); pop(r1); push(r1); push(r2); if (r1 <= r2) { ip = *(uint8_t**)(ip + 1); // ip will be incremented at the end of the loop, // decrement it here to compensate. ip--; } else { ip += sizeof(uint64_t); } break; case OP_AND: pop(r2); pop(r1); if (IS_UNDEFINED(r1) || IS_UNDEFINED(r2)) push(0); else push(r1 && r2); break; case OP_OR: pop(r2); pop(r1); if (IS_UNDEFINED(r1)) push(r2); else if (IS_UNDEFINED(r2)) push(r1); else push(r1 || r2); break; case OP_NOT: pop(r1); if (IS_UNDEFINED(r1)) push(UNDEFINED); else push(!r1); break; case OP_LT: pop(r2); pop(r1); push(COMPARISON(<, r1, r2)); break; case OP_GT: pop(r2); pop(r1); push(COMPARISON(>, r1, r2)); break; case OP_LE: pop(r2); pop(r1); push(COMPARISON(<=, r1, r2)); break; case OP_GE: pop(r2); pop(r1); push(COMPARISON(>=, r1, r2)); break; case OP_EQ: pop(r2); pop(r1); push(COMPARISON(==, r1, r2)); break; case OP_NEQ: pop(r2); pop(r1); push(COMPARISON(!=, r1, r2)); break; case OP_SZ_EQ: pop(r2); pop(r1); if (IS_UNDEFINED(r1) || IS_UNDEFINED(r2)) push(UNDEFINED); else push(strcmp(UINT64_TO_PTR(char*, r1), UINT64_TO_PTR(char*, r2)) == 0); break; case OP_SZ_NEQ: pop(r2); pop(r1); if (IS_UNDEFINED(r1) || IS_UNDEFINED(r2)) push(UNDEFINED); else push(strcmp(UINT64_TO_PTR(char*, r1), UINT64_TO_PTR(char*, r2)) != 0); break; case OP_SZ_TO_BOOL: pop(r1); if (IS_UNDEFINED(r1)) push(UNDEFINED); else push(strlen(UINT64_TO_PTR(char*, r1)) > 0); break; case OP_ADD: pop(r2); pop(r1); push(OPERATION(+, r1, r2)); break; case OP_SUB: pop(r2); pop(r1); push(OPERATION(-, r1, r2)); break; case OP_MUL: pop(r2); pop(r1); push(OPERATION(*, r1, r2)); break; case OP_DIV: pop(r2); pop(r1); push(OPERATION(/, r1, r2)); break; case OP_MOD: pop(r2); pop(r1); push(OPERATION(%, r1, r2)); break; case OP_SHR: pop(r2); pop(r1); push(OPERATION(>>, r1, r2)); break; case OP_SHL: pop(r2); pop(r1); push(OPERATION(<<, r1, r2)); break; case OP_BITWISE_NOT: pop(r1); push(IS_UNDEFINED(r1) ? UNDEFINED : ~r1); break; case OP_BITWISE_AND: pop(r2); pop(r1); push(OPERATION(&, r1, r2)); break; case OP_BITWISE_OR: pop(r2); pop(r1); push(OPERATION(|, r1, r2)); break; case OP_BITWISE_XOR: pop(r2); pop(r1); push(OPERATION(^, r1, r2)); break; case OP_PUSH_RULE: rule = *(YR_RULE**)(ip + 1); ip += sizeof(uint64_t); push(rule->t_flags[tidx] & RULE_TFLAGS_MATCH ? 1 : 0); break; case OP_MATCH_RULE: pop(r1); rule = *(YR_RULE**)(ip + 1); ip += sizeof(uint64_t); if (!IS_UNDEFINED(r1) && r1) rule->t_flags[tidx] |= RULE_TFLAGS_MATCH; #ifdef PROFILING_ENABLED rule->clock_ticks += clock() - start; start = clock(); #endif break; case OP_OBJ_LOAD: identifier = *(char**)(ip + 1); ip += sizeof(uint64_t); object = (YR_OBJECT*) yr_hash_table_lookup( context->objects_table, identifier, NULL); assert(object != NULL); push(PTR_TO_UINT64(object)); break; case OP_OBJ_FIELD: pop(r1); identifier = *(char**)(ip + 1); ip += sizeof(uint64_t); if (IS_UNDEFINED(r1)) { push(UNDEFINED); break; } object = UINT64_TO_PTR(YR_OBJECT*, r1); object = yr_object_lookup_field(object, identifier); assert(object != NULL); push(PTR_TO_UINT64(object)); break; case OP_OBJ_VALUE: pop(r1); if (IS_UNDEFINED(r1)) { push(UNDEFINED); break; } object = UINT64_TO_PTR(YR_OBJECT*, r1); switch(object->type) { case OBJECT_TYPE_INTEGER: push(((YR_OBJECT_INTEGER*) object)->value); break; case OBJECT_TYPE_STRING: if (((YR_OBJECT_STRING*) object)->value != NULL) push(PTR_TO_UINT64(((YR_OBJECT_STRING*) object)->value)); else push(UNDEFINED); break; default: assert(FALSE); } break; case OP_INDEX_ARRAY: pop(r1); // index pop(r2); // array if (IS_UNDEFINED(r1)) { push(UNDEFINED); break; } object = UINT64_TO_PTR(YR_OBJECT*, r2); assert(object->type == OBJECT_TYPE_ARRAY); object = yr_object_array_get_item(object, 0, r1); if (object != NULL) push(PTR_TO_UINT64(object)); else push(UNDEFINED); break; case OP_LOOKUP_DICT: pop(r1); // key pop(r2); // dictionary if (IS_UNDEFINED(r1)) { push(UNDEFINED); break; } object = UINT64_TO_PTR(YR_OBJECT*, r2); assert(object->type == OBJECT_TYPE_DICTIONARY); object = yr_object_dict_get_item( object, 0, UINT64_TO_PTR(const char*, r1)); if (object != NULL) push(PTR_TO_UINT64(object)); else push(UNDEFINED); break; case OP_CALL: args_fmt = *(char**)(ip + 1); ip += sizeof(uint64_t); i = strlen(args_fmt); // pop arguments from stack and copy them to args array while (i > 0) { pop(args[i - 1]); i--; } pop(r2); function = UINT64_TO_PTR(YR_OBJECT_FUNCTION*, r2); result = ERROR_INTERNAL_FATAL_ERROR; for (i = 0; i < MAX_OVERLOADED_FUNCTIONS; i++) { if (function->prototypes[i].arguments_fmt == NULL) break; if (strcmp(function->prototypes[i].arguments_fmt, args_fmt) == 0) { result = function->prototypes[i].code( (void*) args, context, function); break; } } assert(i < MAX_OVERLOADED_FUNCTIONS); if (result == ERROR_SUCCESS) push(PTR_TO_UINT64(function->return_obj)); else return result; break; case OP_STR_FOUND: pop(r1); string = UINT64_TO_PTR(YR_STRING*, r1); push(string->matches[tidx].tail != NULL ? 1 : 0); break; case OP_STR_FOUND_AT: pop(r2); pop(r1); if (IS_UNDEFINED(r1)) { push(0); break; } string = UINT64_TO_PTR(YR_STRING*, r2); match = string->matches[tidx].head; found = 0; while (match != NULL) { if (r1 == match->base + match->offset) { push(1); found = 1; break; } if (r1 < match->base + match->offset) break; match = match->next; } if (!found) push(0); break; case OP_STR_FOUND_IN: pop(r3); pop(r2); pop(r1); if (IS_UNDEFINED(r1) || IS_UNDEFINED(r2)) { push(UNDEFINED); break; } string = UINT64_TO_PTR(YR_STRING*, r3); match = string->matches[tidx].head; found = FALSE; while (match != NULL && !found) { if (match->base + match->offset >= r1 && match->base + match->offset <= r2) { push(1); found = TRUE; } if (match->base + match->offset > r2) break; match = match->next; } if (!found) push(0); break; case OP_STR_COUNT: pop(r1); string = UINT64_TO_PTR(YR_STRING*, r1); push(string->matches[tidx].count); break; case OP_STR_OFFSET: pop(r2); pop(r1); if (IS_UNDEFINED(r1)) { push(UNDEFINED); break; } string = UINT64_TO_PTR(YR_STRING*, r2); match = string->matches[tidx].head; i = 1; found = FALSE; while (match != NULL && !found) { if (r1 == i) { push(match->base + match->offset); found = TRUE; } i++; match = match->next; } if (!found) push(UNDEFINED); break; case OP_OF: found = 0; count = 0; pop(r1); while (r1 != UNDEFINED) { string = UINT64_TO_PTR(YR_STRING*, r1); if (string->matches[tidx].tail != NULL) found++; count++; pop(r1); } pop(r2); if (r2 != UNDEFINED) push(found >= r2 ? 1 : 0); else push(found >= count ? 1 : 0); break; case OP_FILESIZE: push(context->file_size); break; case OP_ENTRYPOINT: push(context->entry_point); break; case OP_INT8: pop(r1); push(read_int8_t(context->mem_block, r1)); break; case OP_INT16: pop(r1); push(read_int16_t(context->mem_block, r1)); break; case OP_INT32: pop(r1); push(read_int32_t(context->mem_block, r1)); break; case OP_UINT8: pop(r1); push(read_uint8_t(context->mem_block, r1)); break; case OP_UINT16: pop(r1); push(read_uint16_t(context->mem_block, r1)); break; case OP_UINT32: pop(r1); push(read_uint32_t(context->mem_block, r1)); break; case OP_CONTAINS: pop(r2); pop(r1); if (IS_UNDEFINED(r1) || IS_UNDEFINED(r2)) push(UNDEFINED); else push(strstr(UINT64_TO_PTR(char*, r1), UINT64_TO_PTR(char*, r2)) != NULL); break; case OP_IMPORT: r1 = *(uint64_t*)(ip + 1); ip += sizeof(uint64_t); FAIL_ON_ERROR(yr_modules_load( UINT64_TO_PTR(char*, r1), context)); break; case OP_MATCHES: pop(r2); pop(r1); count = strlen(UINT64_TO_PTR(char*, r1)); if (count == 0) { push(FALSE); break; } result = yr_re_exec( UINT64_TO_PTR(uint8_t*, r2), UINT64_TO_PTR(uint8_t*, r1), count, RE_FLAGS_SCAN, NULL, NULL); push(result >= 0); break; default: // Unknown instruction, this shouldn't happen. assert(FALSE); } if (timeout > 0) // timeout == 0 means no timeout { // Check for timeout every 10 instruction cycles. if (++cycle == 10) { if (difftime(time(NULL), start_time) > timeout) return ERROR_SCAN_TIMEOUT; cycle = 0; } } ip++; } // After executing the code the stack should be empty. assert(sp == 0); return ERROR_SUCCESS; }
YR_RULE* yr_parser_reduce_rule_declaration_phase_1( yyscan_t yyscanner, int32_t flags, const char* identifier) { YR_COMPILER* compiler = yyget_extra(yyscanner); YR_RULE* rule = NULL; if (yr_hash_table_lookup( compiler->rules_table, identifier, compiler->current_namespace->name) != NULL || yr_hash_table_lookup( compiler->objects_table, identifier, compiler->current_namespace->name) != NULL) { // A rule or variable with the same identifier already exists, return the // appropriate error. yr_compiler_set_error_extra_info(compiler, identifier); compiler->last_result = ERROR_DUPLICATED_IDENTIFIER; return NULL; } compiler->last_result = yr_arena_allocate_struct( compiler->rules_arena, sizeof(YR_RULE), (void**) &rule, offsetof(YR_RULE, identifier), offsetof(YR_RULE, tags), offsetof(YR_RULE, strings), offsetof(YR_RULE, metas), offsetof(YR_RULE, ns), EOL); if (compiler->last_result != ERROR_SUCCESS) return NULL; rule->g_flags = flags; rule->ns = compiler->current_namespace; #ifdef PROFILING_ENABLED rule->clock_ticks = 0; #endif compiler->last_result = yr_arena_write_string( compiler->sz_arena, identifier, (char**) &rule->identifier); if (compiler->last_result != ERROR_SUCCESS) return NULL; compiler->last_result = yr_parser_emit_with_arg_reloc( yyscanner, OP_INIT_RULE, PTR_TO_INT64(rule), NULL, NULL); if (compiler->last_result == ERROR_SUCCESS) compiler->last_result = yr_hash_table_add( compiler->rules_table, identifier, compiler->current_namespace->name, (void*) rule); // Clean strings_table as we are starting to parse a new rule. yr_hash_table_clean(compiler->strings_table, NULL); compiler->current_rule = rule; return rule; }
YR_STRING* yr_parser_reduce_string_declaration( yyscan_t yyscanner, int32_t string_flags, const char* identifier, SIZED_STRING* str) { int min_atom_quality; int min_atom_quality_aux; int re_flags = 0; int32_t min_gap; int32_t max_gap; char message[512]; YR_COMPILER* compiler = yyget_extra(yyscanner); YR_STRING* string = NULL; YR_STRING* aux_string; YR_STRING* prev_string; RE* re = NULL; RE* remainder_re; RE_ERROR re_error; // Determine if a string with the same identifier was already defined // by searching for the identifier in string_table. string = yr_hash_table_lookup( compiler->strings_table, identifier, NULL); if (string != NULL) { compiler->last_result = ERROR_DUPLICATED_STRING_IDENTIFIER; yr_compiler_set_error_extra_info(compiler, identifier); goto _exit; } // Empty strings are now allowed if (str->length == 0) { compiler->last_result = ERROR_EMPTY_STRING; yr_compiler_set_error_extra_info(compiler, identifier); goto _exit; } if (str->flags & SIZED_STRING_FLAGS_NO_CASE) string_flags |= STRING_GFLAGS_NO_CASE; if (str->flags & SIZED_STRING_FLAGS_DOT_ALL) re_flags |= RE_FLAGS_DOT_ALL; if (strcmp(identifier,"$") == 0) string_flags |= STRING_GFLAGS_ANONYMOUS; if (!(string_flags & STRING_GFLAGS_WIDE)) string_flags |= STRING_GFLAGS_ASCII; if (string_flags & STRING_GFLAGS_NO_CASE) re_flags |= RE_FLAGS_NO_CASE; // The STRING_GFLAGS_SINGLE_MATCH flag indicates that finding // a single match for the string is enough. This is true in // most cases, except when the string count (#) and string offset (@) // operators are used. All strings are marked STRING_FLAGS_SINGLE_MATCH // initially, and unmarked later if required. string_flags |= STRING_GFLAGS_SINGLE_MATCH; // The STRING_GFLAGS_FIXED_OFFSET indicates that the string doesn't // need to be searched all over the file because the user is using the // "at" operator. The string must be searched at a fixed offset in the // file. All strings are marked STRING_GFLAGS_FIXED_OFFSET initially, // and unmarked later if required. string_flags |= STRING_GFLAGS_FIXED_OFFSET; if (string_flags & STRING_GFLAGS_HEXADECIMAL || string_flags & STRING_GFLAGS_REGEXP) { if (string_flags & STRING_GFLAGS_HEXADECIMAL) compiler->last_result = yr_re_parse_hex( str->c_string, re_flags, &re, &re_error); else compiler->last_result = yr_re_parse( str->c_string, re_flags, &re, &re_error); if (compiler->last_result != ERROR_SUCCESS) { snprintf( message, sizeof(message), "invalid %s \"%s\": %s", (string_flags & STRING_GFLAGS_HEXADECIMAL) ? "hex string" : "regular expression", identifier, re_error.message); yr_compiler_set_error_extra_info( compiler, message); goto _exit; } if (re->flags & RE_FLAGS_FAST_HEX_REGEXP) string_flags |= STRING_GFLAGS_FAST_HEX_REGEXP; // Regular expressions in the strings section can't mix greedy and ungreedy // quantifiers like .* and .*?. That's because these regular expressions can // be matched forwards and/or backwards depending on the atom found, and we // need the regexp to be all-greedy or all-ungreedy to be able to properly // calculate the length of the match. if ((re->flags & RE_FLAGS_GREEDY) && (re->flags & RE_FLAGS_UNGREEDY)) { compiler->last_result = ERROR_INVALID_REGULAR_EXPRESSION; yr_compiler_set_error_extra_info(compiler, "greedy and ungreedy quantifiers can't be mixed in a regular " "expression"); goto _exit; } if (re->flags & RE_FLAGS_GREEDY) string_flags |= STRING_GFLAGS_GREEDY_REGEXP; if (yr_re_contains_dot_star(re)) { snprintf( message, sizeof(message), "%s contains .*, consider using .{N} with a reasonable value for N", identifier); yywarning(yyscanner, message); } compiler->last_result = yr_re_split_at_chaining_point( re, &re, &remainder_re, &min_gap, &max_gap); if (compiler->last_result != ERROR_SUCCESS) goto _exit; compiler->last_result = _yr_parser_write_string( identifier, string_flags, compiler, NULL, re, &string, &min_atom_quality); if (compiler->last_result != ERROR_SUCCESS) goto _exit; if (remainder_re != NULL) { string->g_flags |= STRING_GFLAGS_CHAIN_TAIL | STRING_GFLAGS_CHAIN_PART; string->chain_gap_min = min_gap; string->chain_gap_max = max_gap; } // Use "aux_string" from now on, we want to keep the value of "string" // because it will returned. aux_string = string; while (remainder_re != NULL) { // Destroy regexp pointed by 're' before yr_re_split_at_jmp // overwrites 're' with another value. yr_re_destroy(re); compiler->last_result = yr_re_split_at_chaining_point( remainder_re, &re, &remainder_re, &min_gap, &max_gap); if (compiler->last_result != ERROR_SUCCESS) goto _exit; prev_string = aux_string; compiler->last_result = _yr_parser_write_string( identifier, string_flags, compiler, NULL, re, &aux_string, &min_atom_quality_aux); if (compiler->last_result != ERROR_SUCCESS) goto _exit; if (min_atom_quality_aux < min_atom_quality) min_atom_quality = min_atom_quality_aux; aux_string->g_flags |= STRING_GFLAGS_CHAIN_PART; aux_string->chain_gap_min = min_gap; aux_string->chain_gap_max = max_gap; prev_string->chained_to = aux_string; // prev_string is now chained to aux_string, an string chained // to another one can't have a fixed offset, only the head of the // string chain can have a fixed offset. prev_string->g_flags &= ~STRING_GFLAGS_FIXED_OFFSET; } } else { compiler->last_result = _yr_parser_write_string( identifier, string_flags, compiler, str, NULL, &string, &min_atom_quality); if (compiler->last_result != ERROR_SUCCESS) goto _exit; } compiler->last_result = yr_hash_table_add( compiler->strings_table, identifier, NULL, string); if (compiler->last_result != ERROR_SUCCESS) goto _exit; if (min_atom_quality < 3 && compiler->callback != NULL) { snprintf( message, sizeof(message), "%s is slowing down scanning%s", string->identifier, min_atom_quality < 2 ? " (critical!)" : ""); yywarning(yyscanner, message); } _exit: if (re != NULL) yr_re_destroy(re); if (compiler->last_result != ERROR_SUCCESS) return NULL; return string; }
int yr_parser_reduce_rule_declaration_phase_1( yyscan_t yyscanner, int32_t flags, const char* identifier, YR_RULE** rule) { YR_FIXUP *fixup; YR_INIT_RULE_ARGS *init_rule_args; YR_COMPILER* compiler = yyget_extra(yyscanner); *rule = NULL; if (yr_hash_table_lookup( compiler->rules_table, identifier, compiler->current_namespace->name) != NULL || yr_hash_table_lookup( compiler->objects_table, identifier, NULL) != NULL) { // A rule or variable with the same identifier already exists, return the // appropriate error. yr_compiler_set_error_extra_info(compiler, identifier); return ERROR_DUPLICATED_IDENTIFIER; } FAIL_ON_ERROR(yr_arena_allocate_struct( compiler->rules_arena, sizeof(YR_RULE), (void**) rule, offsetof(YR_RULE, identifier), offsetof(YR_RULE, tags), offsetof(YR_RULE, strings), offsetof(YR_RULE, metas), offsetof(YR_RULE, ns), EOL)) (*rule)->g_flags = flags; (*rule)->ns = compiler->current_namespace; (*rule)->num_atoms = 0; #ifdef PROFILING_ENABLED (*rule)->time_cost = 0; memset( (*rule)->time_cost_per_thread, 0, sizeof((*rule)->time_cost_per_thread)); #endif FAIL_ON_ERROR(yr_arena_write_string( compiler->sz_arena, identifier, (char**) &(*rule)->identifier)); FAIL_ON_ERROR(yr_parser_emit( yyscanner, OP_INIT_RULE, NULL)); FAIL_ON_ERROR(yr_arena_allocate_struct( compiler->code_arena, sizeof(YR_INIT_RULE_ARGS), (void**) &init_rule_args, offsetof(YR_INIT_RULE_ARGS, rule), offsetof(YR_INIT_RULE_ARGS, jmp_addr), EOL)); init_rule_args->rule = *rule; // jmp_addr holds the address to jump to when we want to skip the code for // the rule. It is iniatialized as NULL at this point because we don't know // the address until emmiting the code for the rule's condition. The address // is set in yr_parser_reduce_rule_declaration_phase_2. init_rule_args->jmp_addr = NULL; // Create a fixup entry for the jump and push it in the stack fixup = (YR_FIXUP*) yr_malloc(sizeof(YR_FIXUP)); if (fixup == NULL) return ERROR_INSUFFICIENT_MEMORY; fixup->address = (void*) &(init_rule_args->jmp_addr); fixup->next = compiler->fixup_stack_head; compiler->fixup_stack_head = fixup; // Clean strings_table as we are starting to parse a new rule. yr_hash_table_clean(compiler->strings_table, NULL); FAIL_ON_ERROR(yr_hash_table_add( compiler->rules_table, identifier, compiler->current_namespace->name, (void*) *rule)); compiler->current_rule = *rule; return ERROR_SUCCESS; }
int yr_parser_reduce_import( yyscan_t yyscanner, SIZED_STRING* module_name) { int result; YR_COMPILER* compiler = yyget_extra(yyscanner); YR_OBJECT* module_structure; char* name; if (!_yr_parser_valid_module_name(module_name)) { yr_compiler_set_error_extra_info(compiler, module_name->c_string); return ERROR_INVALID_MODULE_NAME; } module_structure = (YR_OBJECT*) yr_hash_table_lookup( compiler->objects_table, module_name->c_string, compiler->current_namespace->name); // if module already imported, do nothing if (module_structure != NULL) return ERROR_SUCCESS; FAIL_ON_ERROR(yr_object_create( OBJECT_TYPE_STRUCTURE, module_name->c_string, NULL, &module_structure)); FAIL_ON_ERROR(yr_hash_table_add( compiler->objects_table, module_name->c_string, compiler->current_namespace->name, module_structure)); result = yr_modules_do_declarations( module_name->c_string, module_structure); if (result == ERROR_UNKNOWN_MODULE) yr_compiler_set_error_extra_info(compiler, module_name->c_string); if (result != ERROR_SUCCESS) return result; FAIL_ON_ERROR(yr_arena_write_string( compiler->sz_arena, module_name->c_string, &name)); FAIL_ON_ERROR(yr_parser_emit_with_arg_reloc( yyscanner, OP_IMPORT, name, NULL, NULL)); return ERROR_SUCCESS; }