int yr_compiler_define_string_variable( YR_COMPILER* compiler, const char* identifier, const char* value) { YR_OBJECT* object; YR_EXTERNAL_VARIABLE* external; char* id = NULL; char* val = NULL; compiler->last_result = ERROR_SUCCESS; FAIL_ON_COMPILER_ERROR(yr_arena_write_string( compiler->sz_arena, identifier, &id)); FAIL_ON_COMPILER_ERROR(yr_arena_write_string( compiler->sz_arena, value, &val)); FAIL_ON_COMPILER_ERROR(yr_arena_allocate_struct( compiler->externals_arena, sizeof(YR_EXTERNAL_VARIABLE), (void**) &external, offsetof(YR_EXTERNAL_VARIABLE, identifier), offsetof(YR_EXTERNAL_VARIABLE, string), EOL)); external->type = EXTERNAL_VARIABLE_TYPE_STRING; external->identifier = id; external->integer = 0; external->string = val; FAIL_ON_COMPILER_ERROR(yr_object_from_external_variable( external, &object)); FAIL_ON_COMPILER_ERROR(yr_hash_table_add( compiler->objects_table, external->identifier, NULL, (void*) object)); return compiler->last_result; }
int yr_parser_reduce_import( yyscan_t yyscanner, SIZED_STRING* module_name) { YR_COMPILER* compiler = yyget_extra(yyscanner); YR_OBJECT* module_structure; char* name; module_structure = (YR_OBJECT*) yr_hash_table_lookup( compiler->objects_table, module_name->c_string, compiler->current_namespace->name); // if module already imported, do nothing if (module_structure != NULL) return ERROR_SUCCESS; compiler->last_result = yr_object_create( OBJECT_TYPE_STRUCTURE, module_name->c_string, NULL, &module_structure); if (compiler->last_result == ERROR_SUCCESS) compiler->last_result = yr_hash_table_add( compiler->objects_table, module_name->c_string, compiler->current_namespace->name, module_structure); if (compiler->last_result == ERROR_SUCCESS) { compiler->last_result = yr_modules_do_declarations( module_name->c_string, module_structure); if (compiler->last_result == ERROR_UNKNOWN_MODULE) yr_compiler_set_error_extra_info(compiler, module_name->c_string); } if (compiler->last_result == ERROR_SUCCESS) compiler->last_result = yr_arena_write_string( compiler->sz_arena, module_name->c_string, &name); if (compiler->last_result == ERROR_SUCCESS) compiler->last_result = yr_parser_emit_with_arg_reloc( yyscanner, OP_IMPORT, PTR_TO_INT64(name), NULL, NULL); return compiler->last_result; }
YR_META* yr_parser_reduce_meta_declaration( yyscan_t yyscanner, int32_t type, const char* identifier, const char* string, int32_t integer) { YR_COMPILER* compiler = yyget_extra(yyscanner); YR_META* meta; compiler->last_result = yr_arena_allocate_struct( compiler->metas_arena, sizeof(YR_META), (void**) &meta, offsetof(YR_META, identifier), offsetof(YR_META, string), EOL); if (compiler->last_result != ERROR_SUCCESS) return NULL; compiler->last_result = yr_arena_write_string( compiler->sz_arena, identifier, &meta->identifier); if (compiler->last_result != ERROR_SUCCESS) return NULL; if (string != NULL) compiler->last_result = yr_arena_write_string( compiler->sz_arena, string, &meta->string); else meta->string = NULL; if (compiler->last_result != ERROR_SUCCESS) return NULL; meta->integer = integer; meta->type = type; return meta; }
int _yr_compiler_set_namespace( YR_COMPILER* compiler, const char* namespace_) { YR_NAMESPACE* ns; char* ns_name; int result; int i; int found; ns = (YR_NAMESPACE*) yr_arena_base_address(compiler->namespaces_arena); found = FALSE; for (i = 0; i < compiler->namespaces_count; i++) { if (strcmp(ns->name, namespace_) == 0) { found = TRUE; break; } ns = (YR_NAMESPACE*) yr_arena_next_address( compiler->namespaces_arena, ns, sizeof(YR_NAMESPACE)); } if (!found) { result = yr_arena_write_string( compiler->sz_arena, namespace_, &ns_name); if (result == ERROR_SUCCESS) result = yr_arena_allocate_struct( compiler->namespaces_arena, sizeof(YR_NAMESPACE), (void**) &ns, offsetof(YR_NAMESPACE, name), EOL); if (result != ERROR_SUCCESS) return result; ns->name = ns_name; for (i = 0; i < MAX_THREADS; i++) ns->t_flags[i] = 0; compiler->namespaces_count++; } compiler->current_namespace = ns; return ERROR_SUCCESS; }
int yr_parser_reduce_meta_declaration( yyscan_t yyscanner, int32_t type, const char* identifier, const char* string, int64_t integer, YR_META** meta) { YR_COMPILER* compiler = yyget_extra(yyscanner); FAIL_ON_ERROR(yr_arena_allocate_struct( compiler->metas_arena, sizeof(YR_META), (void**) meta, offsetof(YR_META, identifier), offsetof(YR_META, string), EOL)); FAIL_ON_ERROR(yr_arena_write_string( compiler->sz_arena, identifier, (char**) &(*meta)->identifier)); if (string != NULL) { FAIL_ON_ERROR(yr_arena_write_string( compiler->sz_arena, string, &(*meta)->string)); } else { (*meta)->string = NULL; } (*meta)->integer = integer; (*meta)->type = type; return ERROR_SUCCESS; }
YR_API int yr_compiler_define_float_variable( YR_COMPILER* compiler, const char* identifier, double value) { YR_EXTERNAL_VARIABLE* external; YR_OBJECT* object; char* id; compiler->last_result = ERROR_SUCCESS; FAIL_ON_COMPILER_ERROR(yr_arena_write_string( compiler->sz_arena, identifier, &id)); FAIL_ON_COMPILER_ERROR(yr_arena_allocate_struct( compiler->externals_arena, sizeof(YR_EXTERNAL_VARIABLE), (void**) &external, offsetof(YR_EXTERNAL_VARIABLE, identifier), EOL)); external->type = EXTERNAL_VARIABLE_TYPE_FLOAT; external->identifier = id; external->value.f = value; FAIL_ON_COMPILER_ERROR(yr_object_from_external_variable( external, &object)); FAIL_ON_COMPILER_ERROR(yr_hash_table_add( compiler->objects_table, external->identifier, NULL, (void*) object)); return ERROR_SUCCESS; }
int yr_parser_reduce_rule_declaration( yyscan_t yyscanner, int32_t flags, const char* identifier, char* tags, YR_STRING* strings, YR_META* metas) { YR_COMPILER* compiler = yyget_extra(yyscanner); YR_RULE* rule; YR_STRING* string; if (yr_hash_table_lookup( compiler->rules_table, identifier, compiler->current_namespace->name) != NULL) { // A rule with the same identifier already exists, return the // appropriate error. yr_compiler_set_error_extra_info(compiler, identifier); compiler->last_result = ERROR_DUPLICATE_RULE_IDENTIFIER; return compiler->last_result; } // Check for unreferenced (unused) strings. string = compiler->current_rule_strings; while(!STRING_IS_NULL(string)) { // Only the heading fragment in a chain of strings (the one with // chained_to == NULL) must be referenced. All other fragments // are never marked as referenced. if (!STRING_IS_REFERENCED(string) && string->chained_to == NULL) { yr_compiler_set_error_extra_info(compiler, string->identifier); compiler->last_result = ERROR_UNREFERENCED_STRING; break; } string = yr_arena_next_address( compiler->strings_arena, string, sizeof(YR_STRING)); } if (compiler->last_result != ERROR_SUCCESS) return compiler->last_result; compiler->last_result = yr_arena_allocate_struct( compiler->rules_arena, sizeof(YR_RULE), (void**) &rule, offsetof(YR_RULE, identifier), offsetof(YR_RULE, tags), offsetof(YR_RULE, strings), offsetof(YR_RULE, metas), offsetof(YR_RULE, ns), EOL); if (compiler->last_result != ERROR_SUCCESS) return compiler->last_result; compiler->last_result = yr_arena_write_string( compiler->sz_arena, identifier, &rule->identifier); if (compiler->last_result != ERROR_SUCCESS) return compiler->last_result; compiler->last_result = yr_parser_emit_with_arg_reloc( yyscanner, RULE_POP, PTR_TO_UINT64(rule), NULL); if (compiler->last_result != ERROR_SUCCESS) return compiler->last_result; rule->g_flags = flags | compiler->current_rule_flags; rule->tags = tags; rule->strings = strings; rule->metas = metas; rule->ns = compiler->current_namespace; compiler->current_rule_flags = 0; compiler->current_rule_strings = NULL; yr_hash_table_add( compiler->rules_table, identifier, compiler->current_namespace->name, (void*) rule); return compiler->last_result; }
int _yr_parser_write_string( const char* identifier, int flags, YR_COMPILER* compiler, SIZED_STRING* str, RE* re, YR_STRING** string, int* min_atom_length) { SIZED_STRING* literal_string; YR_AC_MATCH* new_match; YR_ATOM_LIST_ITEM* atom; YR_ATOM_LIST_ITEM* atom_list = NULL; int result; int max_string_len; int free_literal = FALSE; *string = NULL; result = yr_arena_allocate_struct( compiler->strings_arena, sizeof(YR_STRING), (void**) string, offsetof(YR_STRING, identifier), offsetof(YR_STRING, string), offsetof(YR_STRING, chained_to), EOL); if (result != ERROR_SUCCESS) return result; result = yr_arena_write_string( compiler->sz_arena, identifier, &(*string)->identifier); if (result != ERROR_SUCCESS) return result; if (flags & STRING_GFLAGS_HEXADECIMAL || flags & STRING_GFLAGS_REGEXP) { literal_string = yr_re_extract_literal(re); if (literal_string != NULL) { flags |= STRING_GFLAGS_LITERAL; free_literal = TRUE; } } else { literal_string = str; flags |= STRING_GFLAGS_LITERAL; } (*string)->g_flags = flags; (*string)->chained_to = NULL; memset((*string)->matches, 0, sizeof((*string)->matches)); memset((*string)->unconfirmed_matches, 0, sizeof((*string)->unconfirmed_matches)); if (flags & STRING_GFLAGS_LITERAL) { (*string)->length = literal_string->length; result = yr_arena_write_data( compiler->sz_arena, literal_string->c_string, literal_string->length, (void*) &(*string)->string); if (result == ERROR_SUCCESS) { result = yr_atoms_extract_from_string( (uint8_t*) literal_string->c_string, literal_string->length, flags, &atom_list); } } else { result = yr_re_emit_code(re, compiler->re_code_arena); if (result == ERROR_SUCCESS) result = yr_atoms_extract_from_re(re, flags, &atom_list); } if (result == ERROR_SUCCESS) { // Add the string to Aho-Corasick automaton. if (atom_list != NULL) { result = yr_ac_add_string( compiler->automaton_arena, compiler->automaton, *string, atom_list); } else { result = yr_arena_allocate_struct( compiler->automaton_arena, sizeof(YR_AC_MATCH), (void**) &new_match, offsetof(YR_AC_MATCH, string), offsetof(YR_AC_MATCH, forward_code), offsetof(YR_AC_MATCH, backward_code), offsetof(YR_AC_MATCH, next), EOL); if (result == ERROR_SUCCESS) { new_match->backtrack = 0; new_match->string = *string; new_match->forward_code = re->root_node->forward_code; new_match->backward_code = NULL; new_match->next = compiler->automaton->root->matches; compiler->automaton->root->matches = new_match; } } } atom = atom_list; if (atom != NULL) *min_atom_length = MAX_ATOM_LENGTH; else *min_atom_length = 0; while (atom != NULL) { if (atom->atom_length < *min_atom_length) *min_atom_length = atom->atom_length; atom = atom->next; } if (flags & STRING_GFLAGS_LITERAL) { if (flags & STRING_GFLAGS_WIDE) max_string_len = (*string)->length * 2; else max_string_len = (*string)->length; if (max_string_len == *min_atom_length) (*string)->g_flags |= STRING_GFLAGS_FITS_IN_ATOM; } if (free_literal) yr_free(literal_string); if (atom_list != NULL) yr_atoms_list_destroy(atom_list); return result; }
YR_RULE* yr_parser_reduce_rule_declaration_phase_1( yyscan_t yyscanner, int32_t flags, const char* identifier) { YR_COMPILER* compiler = yyget_extra(yyscanner); YR_RULE* rule = NULL; if (yr_hash_table_lookup( compiler->rules_table, identifier, compiler->current_namespace->name) != NULL || yr_hash_table_lookup( compiler->objects_table, identifier, compiler->current_namespace->name) != NULL) { // A rule or variable with the same identifier already exists, return the // appropriate error. yr_compiler_set_error_extra_info(compiler, identifier); compiler->last_result = ERROR_DUPLICATED_IDENTIFIER; return NULL; } compiler->last_result = yr_arena_allocate_struct( compiler->rules_arena, sizeof(YR_RULE), (void**) &rule, offsetof(YR_RULE, identifier), offsetof(YR_RULE, tags), offsetof(YR_RULE, strings), offsetof(YR_RULE, metas), offsetof(YR_RULE, ns), EOL); if (compiler->last_result != ERROR_SUCCESS) return NULL; rule->g_flags = flags; rule->ns = compiler->current_namespace; #ifdef PROFILING_ENABLED rule->clock_ticks = 0; #endif compiler->last_result = yr_arena_write_string( compiler->sz_arena, identifier, (char**) &rule->identifier); if (compiler->last_result != ERROR_SUCCESS) return NULL; compiler->last_result = yr_parser_emit_with_arg_reloc( yyscanner, OP_INIT_RULE, PTR_TO_INT64(rule), NULL, NULL); if (compiler->last_result == ERROR_SUCCESS) compiler->last_result = yr_hash_table_add( compiler->rules_table, identifier, compiler->current_namespace->name, (void*) rule); // Clean strings_table as we are starting to parse a new rule. yr_hash_table_clean(compiler->strings_table, NULL); compiler->current_rule = rule; return rule; }
int _yr_parser_write_string( const char* identifier, int flags, YR_COMPILER* compiler, SIZED_STRING* str, RE* re, YR_STRING** string, int* min_atom_quality) { SIZED_STRING* literal_string; YR_AC_MATCH* new_match; YR_ATOM_LIST_ITEM* atom_list = NULL; int result; int max_string_len; int free_literal = FALSE; *string = NULL; result = yr_arena_allocate_struct( compiler->strings_arena, sizeof(YR_STRING), (void**) string, offsetof(YR_STRING, identifier), offsetof(YR_STRING, string), offsetof(YR_STRING, chained_to), EOL); if (result != ERROR_SUCCESS) return result; result = yr_arena_write_string( compiler->sz_arena, identifier, &(*string)->identifier); if (result != ERROR_SUCCESS) return result; if (flags & STRING_GFLAGS_HEXADECIMAL || flags & STRING_GFLAGS_REGEXP) { literal_string = yr_re_extract_literal(re); if (literal_string != NULL) { flags |= STRING_GFLAGS_LITERAL; free_literal = TRUE; } else { // Non-literal strings can't be marked as fixed offset because once we // find a string atom in the scanned data we don't know the offset where // the string should start, as the non-literal strings can contain // variable-length portions. flags &= ~STRING_GFLAGS_FIXED_OFFSET; } } else { literal_string = str; flags |= STRING_GFLAGS_LITERAL; } (*string)->g_flags = flags; (*string)->chained_to = NULL; (*string)->fixed_offset = UNDEFINED; #ifdef PROFILING_ENABLED (*string)->clock_ticks = 0; #endif memset((*string)->matches, 0, sizeof((*string)->matches)); memset((*string)->unconfirmed_matches, 0, sizeof((*string)->unconfirmed_matches)); if (flags & STRING_GFLAGS_LITERAL) { (*string)->length = (uint32_t) literal_string->length; result = yr_arena_write_data( compiler->sz_arena, literal_string->c_string, literal_string->length + 1, // +1 to include terminating NULL (void**) &(*string)->string); if (result == ERROR_SUCCESS) { result = yr_atoms_extract_from_string( (uint8_t*) literal_string->c_string, (int32_t) literal_string->length, flags, &atom_list); } } else { result = yr_re_emit_code(re, compiler->re_code_arena); if (result == ERROR_SUCCESS) result = yr_atoms_extract_from_re(re, flags, &atom_list); } if (result == ERROR_SUCCESS) { // Add the string to Aho-Corasick automaton. if (atom_list != NULL) { result = yr_ac_add_string( compiler->automaton_arena, compiler->automaton, *string, atom_list); } else { result = yr_arena_allocate_struct( compiler->automaton_arena, sizeof(YR_AC_MATCH), (void**) &new_match, offsetof(YR_AC_MATCH, string), offsetof(YR_AC_MATCH, forward_code), offsetof(YR_AC_MATCH, backward_code), offsetof(YR_AC_MATCH, next), EOL); if (result == ERROR_SUCCESS) { new_match->backtrack = 0; new_match->string = *string; new_match->forward_code = re->root_node->forward_code; new_match->backward_code = NULL; new_match->next = compiler->automaton->root->matches; compiler->automaton->root->matches = new_match; } } } *min_atom_quality = yr_atoms_min_quality(atom_list); if (flags & STRING_GFLAGS_LITERAL) { if (flags & STRING_GFLAGS_WIDE) max_string_len = (*string)->length * 2; else max_string_len = (*string)->length; if (max_string_len <= MAX_ATOM_LENGTH) (*string)->g_flags |= STRING_GFLAGS_FITS_IN_ATOM; } if (free_literal) yr_free(literal_string); if (atom_list != NULL) yr_atoms_list_destroy(atom_list); return result; }
int yr_parser_reduce_rule_declaration_phase_1( yyscan_t yyscanner, int32_t flags, const char* identifier, YR_RULE** rule) { YR_FIXUP *fixup; YR_INIT_RULE_ARGS *init_rule_args; YR_COMPILER* compiler = yyget_extra(yyscanner); *rule = NULL; if (yr_hash_table_lookup( compiler->rules_table, identifier, compiler->current_namespace->name) != NULL || yr_hash_table_lookup( compiler->objects_table, identifier, NULL) != NULL) { // A rule or variable with the same identifier already exists, return the // appropriate error. yr_compiler_set_error_extra_info(compiler, identifier); return ERROR_DUPLICATED_IDENTIFIER; } FAIL_ON_ERROR(yr_arena_allocate_struct( compiler->rules_arena, sizeof(YR_RULE), (void**) rule, offsetof(YR_RULE, identifier), offsetof(YR_RULE, tags), offsetof(YR_RULE, strings), offsetof(YR_RULE, metas), offsetof(YR_RULE, ns), EOL)) (*rule)->g_flags = flags; (*rule)->ns = compiler->current_namespace; (*rule)->num_atoms = 0; #ifdef PROFILING_ENABLED (*rule)->time_cost = 0; memset( (*rule)->time_cost_per_thread, 0, sizeof((*rule)->time_cost_per_thread)); #endif FAIL_ON_ERROR(yr_arena_write_string( compiler->sz_arena, identifier, (char**) &(*rule)->identifier)); FAIL_ON_ERROR(yr_parser_emit( yyscanner, OP_INIT_RULE, NULL)); FAIL_ON_ERROR(yr_arena_allocate_struct( compiler->code_arena, sizeof(YR_INIT_RULE_ARGS), (void**) &init_rule_args, offsetof(YR_INIT_RULE_ARGS, rule), offsetof(YR_INIT_RULE_ARGS, jmp_addr), EOL)); init_rule_args->rule = *rule; // jmp_addr holds the address to jump to when we want to skip the code for // the rule. It is iniatialized as NULL at this point because we don't know // the address until emmiting the code for the rule's condition. The address // is set in yr_parser_reduce_rule_declaration_phase_2. init_rule_args->jmp_addr = NULL; // Create a fixup entry for the jump and push it in the stack fixup = (YR_FIXUP*) yr_malloc(sizeof(YR_FIXUP)); if (fixup == NULL) return ERROR_INSUFFICIENT_MEMORY; fixup->address = (void*) &(init_rule_args->jmp_addr); fixup->next = compiler->fixup_stack_head; compiler->fixup_stack_head = fixup; // Clean strings_table as we are starting to parse a new rule. yr_hash_table_clean(compiler->strings_table, NULL); FAIL_ON_ERROR(yr_hash_table_add( compiler->rules_table, identifier, compiler->current_namespace->name, (void*) *rule)); compiler->current_rule = *rule; return ERROR_SUCCESS; }
static int _yr_parser_write_string( const char* identifier, int flags, YR_COMPILER* compiler, SIZED_STRING* str, RE_AST* re_ast, YR_STRING** string, int* min_atom_quality, int* num_atom) { SIZED_STRING* literal_string; YR_ATOM_LIST_ITEM* atom; YR_ATOM_LIST_ITEM* atom_list = NULL; int c, result; int max_string_len; bool free_literal = false; *string = NULL; result = yr_arena_allocate_struct( compiler->strings_arena, sizeof(YR_STRING), (void**) string, offsetof(YR_STRING, identifier), offsetof(YR_STRING, string), offsetof(YR_STRING, chained_to), offsetof(YR_STRING, rule), EOL); if (result != ERROR_SUCCESS) return result; result = yr_arena_write_string( compiler->sz_arena, identifier, &(*string)->identifier); if (result != ERROR_SUCCESS) return result; if (flags & STRING_GFLAGS_HEXADECIMAL || flags & STRING_GFLAGS_REGEXP) { literal_string = yr_re_ast_extract_literal(re_ast); if (literal_string != NULL) { flags |= STRING_GFLAGS_LITERAL; free_literal = true; } else { // Non-literal strings can't be marked as fixed offset because once we // find a string atom in the scanned data we don't know the offset where // the string should start, as the non-literal strings can contain // variable-length portions. flags &= ~STRING_GFLAGS_FIXED_OFFSET; } } else { literal_string = str; flags |= STRING_GFLAGS_LITERAL; } (*string)->g_flags = flags; (*string)->chained_to = NULL; (*string)->fixed_offset = UNDEFINED; (*string)->rule = compiler->current_rule; memset((*string)->matches, 0, sizeof((*string)->matches)); memset((*string)->unconfirmed_matches, 0, sizeof((*string)->unconfirmed_matches)); if (flags & STRING_GFLAGS_LITERAL) { (*string)->length = (uint32_t) literal_string->length; result = yr_arena_write_data( compiler->sz_arena, literal_string->c_string, literal_string->length + 1, // +1 to include terminating NULL (void**) &(*string)->string); if (result == ERROR_SUCCESS) { result = yr_atoms_extract_from_string( &compiler->atoms_config, (uint8_t*) literal_string->c_string, (int32_t) literal_string->length, flags, &atom_list, min_atom_quality); } } else { // Emit forwards code result = yr_re_ast_emit_code(re_ast, compiler->re_code_arena, false); // Emit backwards code if (result == ERROR_SUCCESS) result = yr_re_ast_emit_code(re_ast, compiler->re_code_arena, true); if (result == ERROR_SUCCESS) result = yr_atoms_extract_from_re( &compiler->atoms_config, re_ast, flags, &atom_list, min_atom_quality); } if (result == ERROR_SUCCESS) { // Add the string to Aho-Corasick automaton. result = yr_ac_add_string( compiler->automaton, *string, atom_list, compiler->matches_arena); } if (flags & STRING_GFLAGS_LITERAL) { if (flags & STRING_GFLAGS_WIDE) max_string_len = (*string)->length * 2; else max_string_len = (*string)->length; if (max_string_len <= YR_MAX_ATOM_LENGTH) (*string)->g_flags |= STRING_GFLAGS_FITS_IN_ATOM; } atom = atom_list; c = 0; while (atom != NULL) { atom = atom->next; c++; } (*num_atom) += c; if (free_literal) yr_free(literal_string); if (atom_list != NULL) yr_atoms_list_destroy(atom_list); return result; }
int yr_parser_reduce_import( yyscan_t yyscanner, SIZED_STRING* module_name) { int result; YR_COMPILER* compiler = yyget_extra(yyscanner); YR_OBJECT* module_structure; char* name; if (!_yr_parser_valid_module_name(module_name)) { yr_compiler_set_error_extra_info(compiler, module_name->c_string); return ERROR_INVALID_MODULE_NAME; } module_structure = (YR_OBJECT*) yr_hash_table_lookup( compiler->objects_table, module_name->c_string, compiler->current_namespace->name); // if module already imported, do nothing if (module_structure != NULL) return ERROR_SUCCESS; FAIL_ON_ERROR(yr_object_create( OBJECT_TYPE_STRUCTURE, module_name->c_string, NULL, &module_structure)); FAIL_ON_ERROR(yr_hash_table_add( compiler->objects_table, module_name->c_string, compiler->current_namespace->name, module_structure)); result = yr_modules_do_declarations( module_name->c_string, module_structure); if (result == ERROR_UNKNOWN_MODULE) yr_compiler_set_error_extra_info(compiler, module_name->c_string); if (result != ERROR_SUCCESS) return result; FAIL_ON_ERROR(yr_arena_write_string( compiler->sz_arena, module_name->c_string, &name)); FAIL_ON_ERROR(yr_parser_emit_with_arg_reloc( yyscanner, OP_IMPORT, name, NULL, NULL)); return ERROR_SUCCESS; }
YR_STRING* yr_parser_reduce_string_declaration( yyscan_t yyscanner, int32_t flags, const char* identifier, SIZED_STRING* str) { int i; int error_offset; int min_atom_length; char* file_name; char message[512]; YR_STRING* string; YR_AC_MATCH* new_match; ATOM_TREE* atom_tree; YR_ATOM_LIST_ITEM* atom; YR_ATOM_LIST_ITEM* atom_list = NULL; RE* re = NULL; uint8_t* literal_string; int literal_string_len; int max_string_len; YR_COMPILER* compiler = yyget_extra(yyscanner); compiler->last_result = yr_arena_allocate_struct( compiler->strings_arena, sizeof(YR_STRING), (void**) &string, offsetof(YR_STRING, identifier), offsetof(YR_STRING, string), EOL); if (compiler->last_result != ERROR_SUCCESS) return NULL; compiler->last_result = yr_arena_write_string( compiler->sz_arena, identifier, &string->identifier); if (compiler->last_result != ERROR_SUCCESS) return NULL; if (strcmp(identifier,"$") == 0) flags |= STRING_GFLAGS_ANONYMOUS; if (!(flags & STRING_GFLAGS_WIDE)) flags |= STRING_GFLAGS_ASCII; // The STRING_GFLAGS_SINGLE_MATCH flag indicates that finding // a single match for the string is enough. This is true in // most cases, except when the string count (#) and string offset (@) // operators are used. All strings are marked STRING_FLAGS_SINGLE_MATCH // initially, and unmarked later if required. flags |= STRING_GFLAGS_SINGLE_MATCH; string->g_flags = flags; memset(string->matches, 0, sizeof(string->matches)); if (flags & STRING_GFLAGS_HEXADECIMAL || flags & STRING_GFLAGS_REGEXP) { if (flags & STRING_GFLAGS_HEXADECIMAL) compiler->last_result = yr_re_compile_hex( str->c_string, &re); else compiler->last_result = yr_re_compile( str->c_string, &re); if (compiler->last_result != ERROR_SUCCESS) { snprintf( message, sizeof(message), "invalid %s in string \"%s\": %s", (flags & STRING_GFLAGS_HEXADECIMAL) ? "hex string" : "regular expression", identifier, re->error_message); yr_compiler_set_error_extra_info(compiler, message); string = NULL; goto _exit; } if (re->flags & RE_FLAGS_START_ANCHORED) string->g_flags |= STRING_GFLAGS_START_ANCHORED; if (re->flags & RE_FLAGS_END_ANCHORED) string->g_flags |= STRING_GFLAGS_END_ANCHORED; if (re->flags & RE_FLAGS_FAST_HEX_REGEXP) string->g_flags |= STRING_GFLAGS_FAST_HEX_REGEXP; if (re->flags & RE_FLAGS_LITERAL_STRING) { string->g_flags |= STRING_GFLAGS_LITERAL; literal_string = re->literal_string; literal_string_len = re->literal_string_len; compiler->last_result = yr_atoms_extract_from_string( literal_string, literal_string_len, string->g_flags, &atom_list); } else { compiler->last_result = yr_re_emit_code( re, compiler->re_code_arena); if (compiler->last_result != ERROR_SUCCESS) { string = NULL; goto _exit; } compiler->last_result = yr_atoms_extract_from_re( re, string->g_flags, &atom_list); } } else { string->g_flags |= STRING_GFLAGS_LITERAL; literal_string = (uint8_t*) str->c_string; literal_string_len = str->length; compiler->last_result = yr_atoms_extract_from_string( literal_string, literal_string_len, string->g_flags, &atom_list); } if (compiler->last_result != ERROR_SUCCESS) { string = NULL; goto _exit; } if (STRING_IS_LITERAL(string)) { compiler->last_result = yr_arena_write_data( compiler->sz_arena, literal_string, literal_string_len, (void*) &string->string); if (compiler->last_result != ERROR_SUCCESS) { string = NULL; goto _exit; } string->length = literal_string_len; } // Add the string to Aho-Corasick automaton. if (atom_list != NULL) { compiler->last_result = yr_ac_add_string( compiler->automaton_arena, compiler->automaton, string, atom_list); } else { compiler->last_result = yr_arena_allocate_struct( compiler->automaton_arena, sizeof(YR_AC_MATCH), (void**) &new_match, offsetof(YR_AC_MATCH, string), offsetof(YR_AC_MATCH, forward_code), offsetof(YR_AC_MATCH, backward_code), offsetof(YR_AC_MATCH, next), EOL); if (compiler->last_result == ERROR_SUCCESS) { new_match->backtrack = 0; new_match->string = string; new_match->forward_code = re->root_node->forward_code; new_match->backward_code = NULL; new_match->next = compiler->automaton->root->matches; compiler->automaton->root->matches = new_match; } } atom = atom_list; if (atom != NULL) min_atom_length = MAX_ATOM_LENGTH; else min_atom_length = 0; while (atom != NULL) { if (atom->atom_length < min_atom_length) min_atom_length = atom->atom_length; atom = atom->next; } if (STRING_IS_LITERAL(string)) { if (STRING_IS_WIDE(string)) max_string_len = string->length * 2; else max_string_len = string->length; if (max_string_len == min_atom_length) string->g_flags |= STRING_GFLAGS_FITS_IN_ATOM; } if (compiler->file_name_stack_ptr > 0) file_name = compiler->file_name_stack[compiler->file_name_stack_ptr - 1]; else file_name = NULL; if (min_atom_length < 2 && compiler->error_report_function != NULL) { snprintf( message, sizeof(message), "%s is slowing down scanning%s", string->identifier, min_atom_length == 0 ? " (critical!)" : ""); compiler->error_report_function( YARA_ERROR_LEVEL_WARNING, file_name, yyget_lineno(yyscanner), message); } if (compiler->last_result != ERROR_SUCCESS) string = NULL; _exit: if (atom_list != NULL) yr_atoms_list_destroy(atom_list); if (re != NULL) yr_re_destroy(re); return string; }