void yr_object_destroy( YR_OBJECT* object) { YR_STRUCTURE_MEMBER* member; YR_STRUCTURE_MEMBER* next_member; YR_ARRAY_ITEMS* array_items; RE* re; int i; char* str; switch(object->type) { case OBJECT_TYPE_STRUCTURE: member = ((YR_OBJECT_STRUCTURE*) object)->members; while (member != NULL) { next_member = member->next; yr_object_destroy(member->object); yr_free(member); member = next_member; } break; case OBJECT_TYPE_STRING: str = ((YR_OBJECT_STRING*) object)->value; if (str != NULL) yr_free(str); break; case OBJECT_TYPE_REGEXP: re = ((YR_OBJECT_REGEXP*) object)->value; if (re != NULL) yr_re_destroy(re); break; case OBJECT_TYPE_ARRAY: array_items = ((YR_OBJECT_ARRAY*) object)->items; for (i = 0; i < array_items->count; i++) if (array_items->objects[i] != NULL) yr_object_destroy(array_items->objects[i]); yr_free(array_items); break; case OBJECT_TYPE_FUNCTION: yr_object_destroy(((YR_OBJECT_FUNCTION*) object)->return_obj); break; } yr_free((void*) object->identifier); yr_free(object); }
int yr_re_compile( const char* re_string, int flags, YR_ARENA* code_arena, RE** re, RE_ERROR* error) { RE* compiled_re; YR_ARENA* arena; *re = NULL; FAIL_ON_ERROR(yr_re_parse(re_string, flags, &compiled_re, error)); if (code_arena == NULL) { FAIL_ON_ERROR_WITH_CLEANUP( yr_arena_create( RE_MAX_CODE_SIZE, ARENA_FLAGS_FIXED_SIZE, &arena), yr_re_destroy(compiled_re)); compiled_re->code_arena = arena; } else { arena = code_arena; } FAIL_ON_ERROR_WITH_CLEANUP( yr_re_emit_code(compiled_re, arena), yr_re_destroy(compiled_re)); *re = compiled_re; return ERROR_SUCCESS; }
YR_STRING* yr_parser_reduce_string_declaration( yyscan_t yyscanner, int32_t flags, const char* identifier, SIZED_STRING* str) { int min_atom_length; int min_atom_length_aux; int32_t min_gap; int32_t max_gap; char* file_name; char message[512]; YR_COMPILER* compiler = yyget_extra(yyscanner); YR_STRING* string = NULL; YR_STRING* aux_string; YR_STRING* prev_string; RE* re = NULL; RE* remainder_re; if (strcmp(identifier,"$") == 0) flags |= STRING_GFLAGS_ANONYMOUS; if (!(flags & STRING_GFLAGS_WIDE)) flags |= STRING_GFLAGS_ASCII; if (str->flags & SIZED_STRING_FLAGS_NO_CASE) flags |= STRING_GFLAGS_NO_CASE; if (str->flags & SIZED_STRING_FLAGS_DOT_ALL) flags |= STRING_GFLAGS_REGEXP_DOT_ALL; // The STRING_GFLAGS_SINGLE_MATCH flag indicates that finding // a single match for the string is enough. This is true in // most cases, except when the string count (#) and string offset (@) // operators are used. All strings are marked STRING_FLAGS_SINGLE_MATCH // initially, and unmarked later if required. flags |= STRING_GFLAGS_SINGLE_MATCH; if (flags & STRING_GFLAGS_HEXADECIMAL || flags & STRING_GFLAGS_REGEXP) { if (flags & STRING_GFLAGS_HEXADECIMAL) compiler->last_result = yr_re_compile_hex( str->c_string, &re); else compiler->last_result = yr_re_compile( str->c_string, &re); if (compiler->last_result != ERROR_SUCCESS) { snprintf( message, sizeof(message), "invalid %s \"%s\": %s", (flags & STRING_GFLAGS_HEXADECIMAL) ? "hex string" : "regular expression", identifier, re->error_message); yr_compiler_set_error_extra_info( compiler, message); goto _exit; } if (re->flags & RE_FLAGS_FAST_HEX_REGEXP) flags |= STRING_GFLAGS_FAST_HEX_REGEXP; compiler->last_result = yr_re_split_at_chaining_point( re, &re, &remainder_re, &min_gap, &max_gap); if (compiler->last_result != ERROR_SUCCESS) goto _exit; compiler->last_result = _yr_parser_write_string( identifier, flags, compiler, NULL, re, &string, &min_atom_length); if (compiler->last_result != ERROR_SUCCESS) goto _exit; if (remainder_re != NULL) { string->g_flags |= STRING_GFLAGS_CHAIN_TAIL | STRING_GFLAGS_CHAIN_PART; string->chain_gap_min = min_gap; string->chain_gap_max = max_gap; } // Use "aux_string" from now on, we want to keep the value of "string" // because it will returned. aux_string = string; while (remainder_re != NULL) { // Destroy regexp pointed by 're' before yr_re_split_at_jmp // overwrites 're' with another value. yr_re_destroy(re); compiler->last_result = yr_re_split_at_chaining_point( remainder_re, &re, &remainder_re, &min_gap, &max_gap); if (compiler->last_result != ERROR_SUCCESS) goto _exit; prev_string = aux_string; compiler->last_result = _yr_parser_write_string( identifier, flags, compiler, NULL, re, &aux_string, &min_atom_length_aux); if (compiler->last_result != ERROR_SUCCESS) goto _exit; if (min_atom_length_aux < min_atom_length) min_atom_length = min_atom_length_aux; aux_string->g_flags |= STRING_GFLAGS_CHAIN_PART; aux_string->chain_gap_min = min_gap; aux_string->chain_gap_max = max_gap; prev_string->chained_to = aux_string; } } else { compiler->last_result = _yr_parser_write_string( identifier, flags, compiler, str, NULL, &string, &min_atom_length); if (compiler->last_result != ERROR_SUCCESS) goto _exit; } if (compiler->file_name_stack_ptr > 0) file_name = compiler->file_name_stack[compiler->file_name_stack_ptr - 1]; else file_name = NULL; if (min_atom_length < 2 && compiler->error_report_function != NULL) { snprintf( message, sizeof(message), "%s is slowing down scanning%s", string->identifier, min_atom_length == 0 ? " (critical!)" : ""); compiler->error_report_function( YARA_ERROR_LEVEL_WARNING, file_name, yyget_lineno(yyscanner), message); } _exit: if (re != NULL) yr_re_destroy(re); if (compiler->last_result != ERROR_SUCCESS) return NULL; return string; }
YR_STRING* yr_parser_reduce_string_declaration( yyscan_t yyscanner, int32_t string_flags, const char* identifier, SIZED_STRING* str) { int min_atom_quality; int min_atom_quality_aux; int re_flags = 0; int32_t min_gap; int32_t max_gap; char message[512]; YR_COMPILER* compiler = yyget_extra(yyscanner); YR_STRING* string = NULL; YR_STRING* aux_string; YR_STRING* prev_string; RE* re = NULL; RE* remainder_re; RE_ERROR re_error; // Determine if a string with the same identifier was already defined // by searching for the identifier in string_table. string = yr_hash_table_lookup( compiler->strings_table, identifier, NULL); if (string != NULL) { compiler->last_result = ERROR_DUPLICATED_STRING_IDENTIFIER; yr_compiler_set_error_extra_info(compiler, identifier); goto _exit; } // Empty strings are now allowed if (str->length == 0) { compiler->last_result = ERROR_EMPTY_STRING; yr_compiler_set_error_extra_info(compiler, identifier); goto _exit; } if (str->flags & SIZED_STRING_FLAGS_NO_CASE) string_flags |= STRING_GFLAGS_NO_CASE; if (str->flags & SIZED_STRING_FLAGS_DOT_ALL) re_flags |= RE_FLAGS_DOT_ALL; if (strcmp(identifier,"$") == 0) string_flags |= STRING_GFLAGS_ANONYMOUS; if (!(string_flags & STRING_GFLAGS_WIDE)) string_flags |= STRING_GFLAGS_ASCII; if (string_flags & STRING_GFLAGS_NO_CASE) re_flags |= RE_FLAGS_NO_CASE; // The STRING_GFLAGS_SINGLE_MATCH flag indicates that finding // a single match for the string is enough. This is true in // most cases, except when the string count (#) and string offset (@) // operators are used. All strings are marked STRING_FLAGS_SINGLE_MATCH // initially, and unmarked later if required. string_flags |= STRING_GFLAGS_SINGLE_MATCH; // The STRING_GFLAGS_FIXED_OFFSET indicates that the string doesn't // need to be searched all over the file because the user is using the // "at" operator. The string must be searched at a fixed offset in the // file. All strings are marked STRING_GFLAGS_FIXED_OFFSET initially, // and unmarked later if required. string_flags |= STRING_GFLAGS_FIXED_OFFSET; if (string_flags & STRING_GFLAGS_HEXADECIMAL || string_flags & STRING_GFLAGS_REGEXP) { if (string_flags & STRING_GFLAGS_HEXADECIMAL) compiler->last_result = yr_re_parse_hex( str->c_string, re_flags, &re, &re_error); else compiler->last_result = yr_re_parse( str->c_string, re_flags, &re, &re_error); if (compiler->last_result != ERROR_SUCCESS) { snprintf( message, sizeof(message), "invalid %s \"%s\": %s", (string_flags & STRING_GFLAGS_HEXADECIMAL) ? "hex string" : "regular expression", identifier, re_error.message); yr_compiler_set_error_extra_info( compiler, message); goto _exit; } if (re->flags & RE_FLAGS_FAST_HEX_REGEXP) string_flags |= STRING_GFLAGS_FAST_HEX_REGEXP; // Regular expressions in the strings section can't mix greedy and ungreedy // quantifiers like .* and .*?. That's because these regular expressions can // be matched forwards and/or backwards depending on the atom found, and we // need the regexp to be all-greedy or all-ungreedy to be able to properly // calculate the length of the match. if ((re->flags & RE_FLAGS_GREEDY) && (re->flags & RE_FLAGS_UNGREEDY)) { compiler->last_result = ERROR_INVALID_REGULAR_EXPRESSION; yr_compiler_set_error_extra_info(compiler, "greedy and ungreedy quantifiers can't be mixed in a regular " "expression"); goto _exit; } if (re->flags & RE_FLAGS_GREEDY) string_flags |= STRING_GFLAGS_GREEDY_REGEXP; if (yr_re_contains_dot_star(re)) { snprintf( message, sizeof(message), "%s contains .*, consider using .{N} with a reasonable value for N", identifier); yywarning(yyscanner, message); } compiler->last_result = yr_re_split_at_chaining_point( re, &re, &remainder_re, &min_gap, &max_gap); if (compiler->last_result != ERROR_SUCCESS) goto _exit; compiler->last_result = _yr_parser_write_string( identifier, string_flags, compiler, NULL, re, &string, &min_atom_quality); if (compiler->last_result != ERROR_SUCCESS) goto _exit; if (remainder_re != NULL) { string->g_flags |= STRING_GFLAGS_CHAIN_TAIL | STRING_GFLAGS_CHAIN_PART; string->chain_gap_min = min_gap; string->chain_gap_max = max_gap; } // Use "aux_string" from now on, we want to keep the value of "string" // because it will returned. aux_string = string; while (remainder_re != NULL) { // Destroy regexp pointed by 're' before yr_re_split_at_jmp // overwrites 're' with another value. yr_re_destroy(re); compiler->last_result = yr_re_split_at_chaining_point( remainder_re, &re, &remainder_re, &min_gap, &max_gap); if (compiler->last_result != ERROR_SUCCESS) goto _exit; prev_string = aux_string; compiler->last_result = _yr_parser_write_string( identifier, string_flags, compiler, NULL, re, &aux_string, &min_atom_quality_aux); if (compiler->last_result != ERROR_SUCCESS) goto _exit; if (min_atom_quality_aux < min_atom_quality) min_atom_quality = min_atom_quality_aux; aux_string->g_flags |= STRING_GFLAGS_CHAIN_PART; aux_string->chain_gap_min = min_gap; aux_string->chain_gap_max = max_gap; prev_string->chained_to = aux_string; // prev_string is now chained to aux_string, an string chained // to another one can't have a fixed offset, only the head of the // string chain can have a fixed offset. prev_string->g_flags &= ~STRING_GFLAGS_FIXED_OFFSET; } } else { compiler->last_result = _yr_parser_write_string( identifier, string_flags, compiler, str, NULL, &string, &min_atom_quality); if (compiler->last_result != ERROR_SUCCESS) goto _exit; } compiler->last_result = yr_hash_table_add( compiler->strings_table, identifier, NULL, string); if (compiler->last_result != ERROR_SUCCESS) goto _exit; if (min_atom_quality < 3 && compiler->callback != NULL) { snprintf( message, sizeof(message), "%s is slowing down scanning%s", string->identifier, min_atom_quality < 2 ? " (critical!)" : ""); yywarning(yyscanner, message); } _exit: if (re != NULL) yr_re_destroy(re); if (compiler->last_result != ERROR_SUCCESS) return NULL; return string; }
YR_STRING* yr_parser_reduce_string_declaration( yyscan_t yyscanner, int32_t flags, const char* identifier, SIZED_STRING* str) { int i; int error_offset; int min_atom_length; char* file_name; char message[512]; YR_STRING* string; YR_AC_MATCH* new_match; ATOM_TREE* atom_tree; YR_ATOM_LIST_ITEM* atom; YR_ATOM_LIST_ITEM* atom_list = NULL; RE* re = NULL; uint8_t* literal_string; int literal_string_len; int max_string_len; YR_COMPILER* compiler = yyget_extra(yyscanner); compiler->last_result = yr_arena_allocate_struct( compiler->strings_arena, sizeof(YR_STRING), (void**) &string, offsetof(YR_STRING, identifier), offsetof(YR_STRING, string), EOL); if (compiler->last_result != ERROR_SUCCESS) return NULL; compiler->last_result = yr_arena_write_string( compiler->sz_arena, identifier, &string->identifier); if (compiler->last_result != ERROR_SUCCESS) return NULL; if (strcmp(identifier,"$") == 0) flags |= STRING_GFLAGS_ANONYMOUS; if (!(flags & STRING_GFLAGS_WIDE)) flags |= STRING_GFLAGS_ASCII; // The STRING_GFLAGS_SINGLE_MATCH flag indicates that finding // a single match for the string is enough. This is true in // most cases, except when the string count (#) and string offset (@) // operators are used. All strings are marked STRING_FLAGS_SINGLE_MATCH // initially, and unmarked later if required. flags |= STRING_GFLAGS_SINGLE_MATCH; string->g_flags = flags; memset(string->matches, 0, sizeof(string->matches)); if (flags & STRING_GFLAGS_HEXADECIMAL || flags & STRING_GFLAGS_REGEXP) { if (flags & STRING_GFLAGS_HEXADECIMAL) compiler->last_result = yr_re_compile_hex( str->c_string, &re); else compiler->last_result = yr_re_compile( str->c_string, &re); if (compiler->last_result != ERROR_SUCCESS) { snprintf( message, sizeof(message), "invalid %s in string \"%s\": %s", (flags & STRING_GFLAGS_HEXADECIMAL) ? "hex string" : "regular expression", identifier, re->error_message); yr_compiler_set_error_extra_info(compiler, message); string = NULL; goto _exit; } if (re->flags & RE_FLAGS_START_ANCHORED) string->g_flags |= STRING_GFLAGS_START_ANCHORED; if (re->flags & RE_FLAGS_END_ANCHORED) string->g_flags |= STRING_GFLAGS_END_ANCHORED; if (re->flags & RE_FLAGS_FAST_HEX_REGEXP) string->g_flags |= STRING_GFLAGS_FAST_HEX_REGEXP; if (re->flags & RE_FLAGS_LITERAL_STRING) { string->g_flags |= STRING_GFLAGS_LITERAL; literal_string = re->literal_string; literal_string_len = re->literal_string_len; compiler->last_result = yr_atoms_extract_from_string( literal_string, literal_string_len, string->g_flags, &atom_list); } else { compiler->last_result = yr_re_emit_code( re, compiler->re_code_arena); if (compiler->last_result != ERROR_SUCCESS) { string = NULL; goto _exit; } compiler->last_result = yr_atoms_extract_from_re( re, string->g_flags, &atom_list); } } else { string->g_flags |= STRING_GFLAGS_LITERAL; literal_string = (uint8_t*) str->c_string; literal_string_len = str->length; compiler->last_result = yr_atoms_extract_from_string( literal_string, literal_string_len, string->g_flags, &atom_list); } if (compiler->last_result != ERROR_SUCCESS) { string = NULL; goto _exit; } if (STRING_IS_LITERAL(string)) { compiler->last_result = yr_arena_write_data( compiler->sz_arena, literal_string, literal_string_len, (void*) &string->string); if (compiler->last_result != ERROR_SUCCESS) { string = NULL; goto _exit; } string->length = literal_string_len; } // Add the string to Aho-Corasick automaton. if (atom_list != NULL) { compiler->last_result = yr_ac_add_string( compiler->automaton_arena, compiler->automaton, string, atom_list); } else { compiler->last_result = yr_arena_allocate_struct( compiler->automaton_arena, sizeof(YR_AC_MATCH), (void**) &new_match, offsetof(YR_AC_MATCH, string), offsetof(YR_AC_MATCH, forward_code), offsetof(YR_AC_MATCH, backward_code), offsetof(YR_AC_MATCH, next), EOL); if (compiler->last_result == ERROR_SUCCESS) { new_match->backtrack = 0; new_match->string = string; new_match->forward_code = re->root_node->forward_code; new_match->backward_code = NULL; new_match->next = compiler->automaton->root->matches; compiler->automaton->root->matches = new_match; } } atom = atom_list; if (atom != NULL) min_atom_length = MAX_ATOM_LENGTH; else min_atom_length = 0; while (atom != NULL) { if (atom->atom_length < min_atom_length) min_atom_length = atom->atom_length; atom = atom->next; } if (STRING_IS_LITERAL(string)) { if (STRING_IS_WIDE(string)) max_string_len = string->length * 2; else max_string_len = string->length; if (max_string_len == min_atom_length) string->g_flags |= STRING_GFLAGS_FITS_IN_ATOM; } if (compiler->file_name_stack_ptr > 0) file_name = compiler->file_name_stack[compiler->file_name_stack_ptr - 1]; else file_name = NULL; if (min_atom_length < 2 && compiler->error_report_function != NULL) { snprintf( message, sizeof(message), "%s is slowing down scanning%s", string->identifier, min_atom_length == 0 ? " (critical!)" : ""); compiler->error_report_function( YARA_ERROR_LEVEL_WARNING, file_name, yyget_lineno(yyscanner), message); } if (compiler->last_result != ERROR_SUCCESS) string = NULL; _exit: if (atom_list != NULL) yr_atoms_list_destroy(atom_list); if (re != NULL) yr_re_destroy(re); return string; }