int yr_atoms_extract_from_re( YR_ATOMS_CONFIG* config, RE_AST* re_ast, int flags, YR_ATOM_LIST_ITEM** atoms, int* min_atom_quality) { YR_ATOM_TREE* atom_tree = (YR_ATOM_TREE*) yr_malloc(sizeof(YR_ATOM_TREE)); YR_ATOM_LIST_ITEM* wide_atoms; YR_ATOM_LIST_ITEM* case_insensitive_atoms; if (atom_tree == NULL) return ERROR_INSUFFICIENT_MEMORY; atom_tree->root_node = _yr_atoms_tree_node_create(ATOM_TREE_OR); if (atom_tree->root_node == NULL) { _yr_atoms_tree_destroy(atom_tree); return ERROR_INSUFFICIENT_MEMORY; } FAIL_ON_ERROR_WITH_CLEANUP( _yr_atoms_extract_from_re(config, re_ast, atom_tree->root_node), _yr_atoms_tree_destroy(atom_tree)); // Initialize atom list *atoms = NULL; // Choose the atoms that will be used. FAIL_ON_ERROR_WITH_CLEANUP( _yr_atoms_choose(config, atom_tree->root_node, atoms, min_atom_quality), _yr_atoms_tree_destroy(atom_tree)); _yr_atoms_tree_destroy(atom_tree); FAIL_ON_ERROR_WITH_CLEANUP( _yr_atoms_expand_wildcards(*atoms), { yr_atoms_list_destroy(*atoms); *atoms = NULL; });
static int _yr_atoms_choose( ATOM_TREE_NODE* node, YR_ATOM_LIST_ITEM** chosen_atoms, int* atoms_quality) { ATOM_TREE_NODE* child; YR_ATOM_LIST_ITEM* item; YR_ATOM_LIST_ITEM* tail; int i, quality; int max_quality = YR_MIN_ATOM_QUALITY; int min_quality = YR_MAX_ATOM_QUALITY; *chosen_atoms = NULL; switch (node->type) { case ATOM_TREE_LEAF: item = (YR_ATOM_LIST_ITEM*) yr_malloc(sizeof(YR_ATOM_LIST_ITEM)); if (item == NULL) return ERROR_INSUFFICIENT_MEMORY; for (i = 0; i < node->atom_length; i++) item->atom[i] = node->atom[i]; item->atom_length = node->atom_length; item->forward_code = node->forward_code; item->backward_code = node->backward_code; item->backtrack = 0; item->next = NULL; *chosen_atoms = item; *atoms_quality = _yr_atoms_quality(node->atom, node->atom_length); break; case ATOM_TREE_OR: child = node->children_head; while (child != NULL) { FAIL_ON_ERROR(_yr_atoms_choose(child, &item, &quality)); if (quality > max_quality) { max_quality = quality; yr_atoms_list_destroy(*chosen_atoms); *chosen_atoms = item; } else { yr_atoms_list_destroy(item); } child = child->next_sibling; } *atoms_quality = max_quality; break; case ATOM_TREE_AND: child = node->children_head; while (child != NULL) { FAIL_ON_ERROR(_yr_atoms_choose(child, &item, &quality)); if (quality < min_quality) min_quality = quality; if (item != NULL) { tail = item; while (tail->next != NULL) tail = tail->next; tail->next = *chosen_atoms; *chosen_atoms = item; } child = child->next_sibling; } *atoms_quality = min_quality; break; } return ERROR_SUCCESS; }
int yr_atoms_extract_from_re( RE_AST* re_ast, int flags, YR_ATOM_LIST_ITEM** atoms) { ATOM_TREE* atom_tree = (ATOM_TREE*) yr_malloc(sizeof(ATOM_TREE)); ATOM_TREE_NODE* temp; YR_ATOM_LIST_ITEM* wide_atoms; YR_ATOM_LIST_ITEM* case_insensitive_atoms; YR_ATOM_LIST_ITEM* triplet_atoms; int min_atom_quality = YR_MIN_ATOM_QUALITY; if (atom_tree == NULL) return ERROR_INSUFFICIENT_MEMORY; atom_tree->root_node = _yr_atoms_tree_node_create(ATOM_TREE_OR); if (atom_tree->root_node == NULL) { _yr_atoms_tree_destroy(atom_tree); return ERROR_INSUFFICIENT_MEMORY; } atom_tree->current_leaf = NULL; atom_tree->root_node = _yr_atoms_extract_from_re_node( re_ast->root_node, atom_tree, atom_tree->root_node); if (atom_tree->root_node == NULL) { _yr_atoms_tree_destroy(atom_tree); return ERROR_INSUFFICIENT_MEMORY; } if (atom_tree->current_leaf != NULL) _yr_atoms_tree_node_append(atom_tree->root_node, atom_tree->current_leaf); if (atom_tree->root_node->children_head == atom_tree->root_node->children_tail) { // The root OR node has a single child, there's no need for the OR node so // we proceed to destroy it and use its child as root. temp = atom_tree->root_node; atom_tree->root_node = atom_tree->root_node->children_head; yr_free(temp); } // Initialize atom list *atoms = NULL; if (atom_tree->root_node != NULL) { // Choose the atoms that will be used. FAIL_ON_ERROR_WITH_CLEANUP( _yr_atoms_choose(atom_tree->root_node, atoms, &min_atom_quality), _yr_atoms_tree_destroy(atom_tree)); } _yr_atoms_tree_destroy(atom_tree); if (min_atom_quality <= 2) { // Chosen atoms contain low quality ones, let's try infering some higher // quality atoms. FAIL_ON_ERROR_WITH_CLEANUP( yr_atoms_extract_triplets(re_ast->root_node, &triplet_atoms), { yr_atoms_list_destroy(*atoms); yr_atoms_list_destroy(triplet_atoms); *atoms = NULL; });
int _yr_parser_write_string( const char* identifier, int flags, YR_COMPILER* compiler, SIZED_STRING* str, RE* re, YR_STRING** string, int* min_atom_length) { SIZED_STRING* literal_string; YR_AC_MATCH* new_match; YR_ATOM_LIST_ITEM* atom; YR_ATOM_LIST_ITEM* atom_list = NULL; int result; int max_string_len; int free_literal = FALSE; *string = NULL; result = yr_arena_allocate_struct( compiler->strings_arena, sizeof(YR_STRING), (void**) string, offsetof(YR_STRING, identifier), offsetof(YR_STRING, string), offsetof(YR_STRING, chained_to), EOL); if (result != ERROR_SUCCESS) return result; result = yr_arena_write_string( compiler->sz_arena, identifier, &(*string)->identifier); if (result != ERROR_SUCCESS) return result; if (flags & STRING_GFLAGS_HEXADECIMAL || flags & STRING_GFLAGS_REGEXP) { literal_string = yr_re_extract_literal(re); if (literal_string != NULL) { flags |= STRING_GFLAGS_LITERAL; free_literal = TRUE; } } else { literal_string = str; flags |= STRING_GFLAGS_LITERAL; } (*string)->g_flags = flags; (*string)->chained_to = NULL; memset((*string)->matches, 0, sizeof((*string)->matches)); memset((*string)->unconfirmed_matches, 0, sizeof((*string)->unconfirmed_matches)); if (flags & STRING_GFLAGS_LITERAL) { (*string)->length = literal_string->length; result = yr_arena_write_data( compiler->sz_arena, literal_string->c_string, literal_string->length, (void*) &(*string)->string); if (result == ERROR_SUCCESS) { result = yr_atoms_extract_from_string( (uint8_t*) literal_string->c_string, literal_string->length, flags, &atom_list); } } else { result = yr_re_emit_code(re, compiler->re_code_arena); if (result == ERROR_SUCCESS) result = yr_atoms_extract_from_re(re, flags, &atom_list); } if (result == ERROR_SUCCESS) { // Add the string to Aho-Corasick automaton. if (atom_list != NULL) { result = yr_ac_add_string( compiler->automaton_arena, compiler->automaton, *string, atom_list); } else { result = yr_arena_allocate_struct( compiler->automaton_arena, sizeof(YR_AC_MATCH), (void**) &new_match, offsetof(YR_AC_MATCH, string), offsetof(YR_AC_MATCH, forward_code), offsetof(YR_AC_MATCH, backward_code), offsetof(YR_AC_MATCH, next), EOL); if (result == ERROR_SUCCESS) { new_match->backtrack = 0; new_match->string = *string; new_match->forward_code = re->root_node->forward_code; new_match->backward_code = NULL; new_match->next = compiler->automaton->root->matches; compiler->automaton->root->matches = new_match; } } } atom = atom_list; if (atom != NULL) *min_atom_length = MAX_ATOM_LENGTH; else *min_atom_length = 0; while (atom != NULL) { if (atom->atom_length < *min_atom_length) *min_atom_length = atom->atom_length; atom = atom->next; } if (flags & STRING_GFLAGS_LITERAL) { if (flags & STRING_GFLAGS_WIDE) max_string_len = (*string)->length * 2; else max_string_len = (*string)->length; if (max_string_len == *min_atom_length) (*string)->g_flags |= STRING_GFLAGS_FITS_IN_ATOM; } if (free_literal) yr_free(literal_string); if (atom_list != NULL) yr_atoms_list_destroy(atom_list); return result; }
int _yr_parser_write_string( const char* identifier, int flags, YR_COMPILER* compiler, SIZED_STRING* str, RE* re, YR_STRING** string, int* min_atom_quality) { SIZED_STRING* literal_string; YR_AC_MATCH* new_match; YR_ATOM_LIST_ITEM* atom_list = NULL; int result; int max_string_len; int free_literal = FALSE; *string = NULL; result = yr_arena_allocate_struct( compiler->strings_arena, sizeof(YR_STRING), (void**) string, offsetof(YR_STRING, identifier), offsetof(YR_STRING, string), offsetof(YR_STRING, chained_to), EOL); if (result != ERROR_SUCCESS) return result; result = yr_arena_write_string( compiler->sz_arena, identifier, &(*string)->identifier); if (result != ERROR_SUCCESS) return result; if (flags & STRING_GFLAGS_HEXADECIMAL || flags & STRING_GFLAGS_REGEXP) { literal_string = yr_re_extract_literal(re); if (literal_string != NULL) { flags |= STRING_GFLAGS_LITERAL; free_literal = TRUE; } else { // Non-literal strings can't be marked as fixed offset because once we // find a string atom in the scanned data we don't know the offset where // the string should start, as the non-literal strings can contain // variable-length portions. flags &= ~STRING_GFLAGS_FIXED_OFFSET; } } else { literal_string = str; flags |= STRING_GFLAGS_LITERAL; } (*string)->g_flags = flags; (*string)->chained_to = NULL; (*string)->fixed_offset = UNDEFINED; #ifdef PROFILING_ENABLED (*string)->clock_ticks = 0; #endif memset((*string)->matches, 0, sizeof((*string)->matches)); memset((*string)->unconfirmed_matches, 0, sizeof((*string)->unconfirmed_matches)); if (flags & STRING_GFLAGS_LITERAL) { (*string)->length = (uint32_t) literal_string->length; result = yr_arena_write_data( compiler->sz_arena, literal_string->c_string, literal_string->length + 1, // +1 to include terminating NULL (void**) &(*string)->string); if (result == ERROR_SUCCESS) { result = yr_atoms_extract_from_string( (uint8_t*) literal_string->c_string, (int32_t) literal_string->length, flags, &atom_list); } } else { result = yr_re_emit_code(re, compiler->re_code_arena); if (result == ERROR_SUCCESS) result = yr_atoms_extract_from_re(re, flags, &atom_list); } if (result == ERROR_SUCCESS) { // Add the string to Aho-Corasick automaton. if (atom_list != NULL) { result = yr_ac_add_string( compiler->automaton_arena, compiler->automaton, *string, atom_list); } else { result = yr_arena_allocate_struct( compiler->automaton_arena, sizeof(YR_AC_MATCH), (void**) &new_match, offsetof(YR_AC_MATCH, string), offsetof(YR_AC_MATCH, forward_code), offsetof(YR_AC_MATCH, backward_code), offsetof(YR_AC_MATCH, next), EOL); if (result == ERROR_SUCCESS) { new_match->backtrack = 0; new_match->string = *string; new_match->forward_code = re->root_node->forward_code; new_match->backward_code = NULL; new_match->next = compiler->automaton->root->matches; compiler->automaton->root->matches = new_match; } } } *min_atom_quality = yr_atoms_min_quality(atom_list); if (flags & STRING_GFLAGS_LITERAL) { if (flags & STRING_GFLAGS_WIDE) max_string_len = (*string)->length * 2; else max_string_len = (*string)->length; if (max_string_len <= MAX_ATOM_LENGTH) (*string)->g_flags |= STRING_GFLAGS_FITS_IN_ATOM; } if (free_literal) yr_free(literal_string); if (atom_list != NULL) yr_atoms_list_destroy(atom_list); return result; }
int yr_atoms_extract_from_re( RE* re, int flags, YR_ATOM_LIST_ITEM** atoms) { ATOM_TREE* atom_tree = yr_malloc(sizeof(ATOM_TREE)); ATOM_TREE_NODE* temp; YR_ATOM_LIST_ITEM* wide_atoms; YR_ATOM_LIST_ITEM* case_insentive_atoms; YR_ATOM_LIST_ITEM* triplet_atoms; int min_atom_quality = 0; atom_tree->root_node = _yr_atoms_tree_node_create(ATOM_TREE_OR);; atom_tree->current_leaf = NULL; atom_tree->root_node = _yr_atoms_extract_from_re_node( re->root_node, atom_tree, atom_tree->root_node); if (atom_tree->current_leaf != NULL) _yr_atoms_tree_node_append(atom_tree->root_node, atom_tree->current_leaf); if (atom_tree->root_node->children_head == atom_tree->root_node->children_tail) { // The root OR node has a single child, there's no need for the OR node so // we proceed to destroy it and use its child as root. temp = atom_tree->root_node; atom_tree->root_node = atom_tree->root_node->children_head; yr_free(temp); } // Choose the atoms that will be used. min_atom_quality = _yr_atoms_choose(atom_tree->root_node, atoms); _yr_atoms_tree_destroy(atom_tree); if (min_atom_quality <= 2) { // Choosen atoms contain low quality ones, let's try infering some higher // quality atoms. yr_atoms_extract_triplets(re->root_node, &triplet_atoms); if (min_atom_quality < _yr_atoms_min_quality(triplet_atoms)) { yr_atoms_list_destroy(*atoms); *atoms = triplet_atoms; } else { yr_atoms_list_destroy(triplet_atoms); } } if (flags & STRING_GFLAGS_WIDE) { FAIL_ON_ERROR(_yr_atoms_wide( *atoms, &wide_atoms)); if (flags & STRING_GFLAGS_ASCII) { *atoms = _yr_atoms_list_concat(*atoms, wide_atoms); } else { yr_atoms_list_destroy(*atoms); *atoms = wide_atoms; } } if (flags & STRING_GFLAGS_NO_CASE) { FAIL_ON_ERROR(_yr_atoms_case_insentive( *atoms, &case_insentive_atoms)); *atoms = _yr_atoms_list_concat(*atoms, case_insentive_atoms); } return ERROR_SUCCESS; }
int _yr_atoms_choose( ATOM_TREE_NODE* node, YR_ATOM_LIST_ITEM** choosen_atoms) { ATOM_TREE_NODE* child; YR_ATOM_LIST_ITEM* item; YR_ATOM_LIST_ITEM* tail; int i, quality; int max_quality = 0; int min_quality = 10000; *choosen_atoms = NULL; if (node == NULL) return 0; switch (node->type) { case ATOM_TREE_LEAF: item = yr_malloc(sizeof(YR_ATOM_LIST_ITEM)); for (i = 0; i < node->atom_length; i++) item->atom[i] = node->atom[i]; item->atom_length = node->atom_length; item->forward_code = node->forward_code; item->backward_code = node->backward_code; item->backtrack = 0; item->next = NULL; *choosen_atoms = item; return _yr_atoms_quality(node->atom, node->atom_length); case ATOM_TREE_OR: child = node->children_head; while (child != NULL) { quality = _yr_atoms_choose(child, &item); if (quality > max_quality) { max_quality = quality; yr_atoms_list_destroy(*choosen_atoms); *choosen_atoms = item; } else { yr_atoms_list_destroy(item); } child = child->next_sibling; } return max_quality; case ATOM_TREE_AND: child = node->children_head; while (child != NULL) { quality = _yr_atoms_choose(child, &item); if (quality < min_quality) min_quality = quality; tail = item; while (tail->next != NULL) tail = tail->next; tail->next = *choosen_atoms; *choosen_atoms = item; child = child->next_sibling; } return min_quality; } return 0; }
int yr_atoms_extract_from_string( uint8_t* string, int string_length, int flags, YR_ATOM_LIST_ITEM** atoms) { YR_ATOM_LIST_ITEM* item; YR_ATOM_LIST_ITEM* case_insentive_atoms; YR_ATOM_LIST_ITEM* wide_atoms; int max_quality; int quality; int i, j, length; item = yr_malloc(sizeof(YR_ATOM_LIST_ITEM)); if (item == NULL) return ERROR_INSUFICIENT_MEMORY; item->forward_code = NULL; item->backward_code = NULL; item->next = NULL; item->backtrack = 0; length = min(string_length, MAX_ATOM_LENGTH); for (i = 0; i < length; i++) item->atom[i] = string[i]; item->atom_length = i; max_quality = _yr_atoms_quality(string, length); for (i = MAX_ATOM_LENGTH; i < string_length; i++) { quality = _yr_atoms_quality( string + i - MAX_ATOM_LENGTH + 1, MAX_ATOM_LENGTH); if (quality > max_quality) { for (j = 0; j < MAX_ATOM_LENGTH; j++) item->atom[j] = string[i + j - MAX_ATOM_LENGTH + 1]; item->backtrack = i - MAX_ATOM_LENGTH + 1; max_quality = quality; } } if (flags & STRING_GFLAGS_WIDE) { FAIL_ON_ERROR(_yr_atoms_wide( item, &wide_atoms)); if (flags & STRING_GFLAGS_ASCII) { item = _yr_atoms_list_concat(item, wide_atoms); } else { yr_atoms_list_destroy(item); item = wide_atoms; } } if (flags & STRING_GFLAGS_NO_CASE) { FAIL_ON_ERROR(_yr_atoms_case_insentive( item, &case_insentive_atoms)); item = _yr_atoms_list_concat(item, case_insentive_atoms); } *atoms = item; return ERROR_SUCCESS; }
static int _yr_parser_write_string( const char* identifier, int flags, YR_COMPILER* compiler, SIZED_STRING* str, RE_AST* re_ast, YR_STRING** string, int* min_atom_quality, int* num_atom) { SIZED_STRING* literal_string; YR_ATOM_LIST_ITEM* atom; YR_ATOM_LIST_ITEM* atom_list = NULL; int c, result; int max_string_len; bool free_literal = false; *string = NULL; result = yr_arena_allocate_struct( compiler->strings_arena, sizeof(YR_STRING), (void**) string, offsetof(YR_STRING, identifier), offsetof(YR_STRING, string), offsetof(YR_STRING, chained_to), offsetof(YR_STRING, rule), EOL); if (result != ERROR_SUCCESS) return result; result = yr_arena_write_string( compiler->sz_arena, identifier, &(*string)->identifier); if (result != ERROR_SUCCESS) return result; if (flags & STRING_GFLAGS_HEXADECIMAL || flags & STRING_GFLAGS_REGEXP) { literal_string = yr_re_ast_extract_literal(re_ast); if (literal_string != NULL) { flags |= STRING_GFLAGS_LITERAL; free_literal = true; } else { // Non-literal strings can't be marked as fixed offset because once we // find a string atom in the scanned data we don't know the offset where // the string should start, as the non-literal strings can contain // variable-length portions. flags &= ~STRING_GFLAGS_FIXED_OFFSET; } } else { literal_string = str; flags |= STRING_GFLAGS_LITERAL; } (*string)->g_flags = flags; (*string)->chained_to = NULL; (*string)->fixed_offset = UNDEFINED; (*string)->rule = compiler->current_rule; memset((*string)->matches, 0, sizeof((*string)->matches)); memset((*string)->unconfirmed_matches, 0, sizeof((*string)->unconfirmed_matches)); if (flags & STRING_GFLAGS_LITERAL) { (*string)->length = (uint32_t) literal_string->length; result = yr_arena_write_data( compiler->sz_arena, literal_string->c_string, literal_string->length + 1, // +1 to include terminating NULL (void**) &(*string)->string); if (result == ERROR_SUCCESS) { result = yr_atoms_extract_from_string( &compiler->atoms_config, (uint8_t*) literal_string->c_string, (int32_t) literal_string->length, flags, &atom_list, min_atom_quality); } } else { // Emit forwards code result = yr_re_ast_emit_code(re_ast, compiler->re_code_arena, false); // Emit backwards code if (result == ERROR_SUCCESS) result = yr_re_ast_emit_code(re_ast, compiler->re_code_arena, true); if (result == ERROR_SUCCESS) result = yr_atoms_extract_from_re( &compiler->atoms_config, re_ast, flags, &atom_list, min_atom_quality); } if (result == ERROR_SUCCESS) { // Add the string to Aho-Corasick automaton. result = yr_ac_add_string( compiler->automaton, *string, atom_list, compiler->matches_arena); } if (flags & STRING_GFLAGS_LITERAL) { if (flags & STRING_GFLAGS_WIDE) max_string_len = (*string)->length * 2; else max_string_len = (*string)->length; if (max_string_len <= YR_MAX_ATOM_LENGTH) (*string)->g_flags |= STRING_GFLAGS_FITS_IN_ATOM; } atom = atom_list; c = 0; while (atom != NULL) { atom = atom->next; c++; } (*num_atom) += c; if (free_literal) yr_free(literal_string); if (atom_list != NULL) yr_atoms_list_destroy(atom_list); return result; }
YR_STRING* yr_parser_reduce_string_declaration( yyscan_t yyscanner, int32_t flags, const char* identifier, SIZED_STRING* str) { int i; int error_offset; int min_atom_length; char* file_name; char message[512]; YR_STRING* string; YR_AC_MATCH* new_match; ATOM_TREE* atom_tree; YR_ATOM_LIST_ITEM* atom; YR_ATOM_LIST_ITEM* atom_list = NULL; RE* re = NULL; uint8_t* literal_string; int literal_string_len; int max_string_len; YR_COMPILER* compiler = yyget_extra(yyscanner); compiler->last_result = yr_arena_allocate_struct( compiler->strings_arena, sizeof(YR_STRING), (void**) &string, offsetof(YR_STRING, identifier), offsetof(YR_STRING, string), EOL); if (compiler->last_result != ERROR_SUCCESS) return NULL; compiler->last_result = yr_arena_write_string( compiler->sz_arena, identifier, &string->identifier); if (compiler->last_result != ERROR_SUCCESS) return NULL; if (strcmp(identifier,"$") == 0) flags |= STRING_GFLAGS_ANONYMOUS; if (!(flags & STRING_GFLAGS_WIDE)) flags |= STRING_GFLAGS_ASCII; // The STRING_GFLAGS_SINGLE_MATCH flag indicates that finding // a single match for the string is enough. This is true in // most cases, except when the string count (#) and string offset (@) // operators are used. All strings are marked STRING_FLAGS_SINGLE_MATCH // initially, and unmarked later if required. flags |= STRING_GFLAGS_SINGLE_MATCH; string->g_flags = flags; memset(string->matches, 0, sizeof(string->matches)); if (flags & STRING_GFLAGS_HEXADECIMAL || flags & STRING_GFLAGS_REGEXP) { if (flags & STRING_GFLAGS_HEXADECIMAL) compiler->last_result = yr_re_compile_hex( str->c_string, &re); else compiler->last_result = yr_re_compile( str->c_string, &re); if (compiler->last_result != ERROR_SUCCESS) { snprintf( message, sizeof(message), "invalid %s in string \"%s\": %s", (flags & STRING_GFLAGS_HEXADECIMAL) ? "hex string" : "regular expression", identifier, re->error_message); yr_compiler_set_error_extra_info(compiler, message); string = NULL; goto _exit; } if (re->flags & RE_FLAGS_START_ANCHORED) string->g_flags |= STRING_GFLAGS_START_ANCHORED; if (re->flags & RE_FLAGS_END_ANCHORED) string->g_flags |= STRING_GFLAGS_END_ANCHORED; if (re->flags & RE_FLAGS_FAST_HEX_REGEXP) string->g_flags |= STRING_GFLAGS_FAST_HEX_REGEXP; if (re->flags & RE_FLAGS_LITERAL_STRING) { string->g_flags |= STRING_GFLAGS_LITERAL; literal_string = re->literal_string; literal_string_len = re->literal_string_len; compiler->last_result = yr_atoms_extract_from_string( literal_string, literal_string_len, string->g_flags, &atom_list); } else { compiler->last_result = yr_re_emit_code( re, compiler->re_code_arena); if (compiler->last_result != ERROR_SUCCESS) { string = NULL; goto _exit; } compiler->last_result = yr_atoms_extract_from_re( re, string->g_flags, &atom_list); } } else { string->g_flags |= STRING_GFLAGS_LITERAL; literal_string = (uint8_t*) str->c_string; literal_string_len = str->length; compiler->last_result = yr_atoms_extract_from_string( literal_string, literal_string_len, string->g_flags, &atom_list); } if (compiler->last_result != ERROR_SUCCESS) { string = NULL; goto _exit; } if (STRING_IS_LITERAL(string)) { compiler->last_result = yr_arena_write_data( compiler->sz_arena, literal_string, literal_string_len, (void*) &string->string); if (compiler->last_result != ERROR_SUCCESS) { string = NULL; goto _exit; } string->length = literal_string_len; } // Add the string to Aho-Corasick automaton. if (atom_list != NULL) { compiler->last_result = yr_ac_add_string( compiler->automaton_arena, compiler->automaton, string, atom_list); } else { compiler->last_result = yr_arena_allocate_struct( compiler->automaton_arena, sizeof(YR_AC_MATCH), (void**) &new_match, offsetof(YR_AC_MATCH, string), offsetof(YR_AC_MATCH, forward_code), offsetof(YR_AC_MATCH, backward_code), offsetof(YR_AC_MATCH, next), EOL); if (compiler->last_result == ERROR_SUCCESS) { new_match->backtrack = 0; new_match->string = string; new_match->forward_code = re->root_node->forward_code; new_match->backward_code = NULL; new_match->next = compiler->automaton->root->matches; compiler->automaton->root->matches = new_match; } } atom = atom_list; if (atom != NULL) min_atom_length = MAX_ATOM_LENGTH; else min_atom_length = 0; while (atom != NULL) { if (atom->atom_length < min_atom_length) min_atom_length = atom->atom_length; atom = atom->next; } if (STRING_IS_LITERAL(string)) { if (STRING_IS_WIDE(string)) max_string_len = string->length * 2; else max_string_len = string->length; if (max_string_len == min_atom_length) string->g_flags |= STRING_GFLAGS_FITS_IN_ATOM; } if (compiler->file_name_stack_ptr > 0) file_name = compiler->file_name_stack[compiler->file_name_stack_ptr - 1]; else file_name = NULL; if (min_atom_length < 2 && compiler->error_report_function != NULL) { snprintf( message, sizeof(message), "%s is slowing down scanning%s", string->identifier, min_atom_length == 0 ? " (critical!)" : ""); compiler->error_report_function( YARA_ERROR_LEVEL_WARNING, file_name, yyget_lineno(yyscanner), message); } if (compiler->last_result != ERROR_SUCCESS) string = NULL; _exit: if (atom_list != NULL) yr_atoms_list_destroy(atom_list); if (re != NULL) yr_re_destroy(re); return string; }
static int _yr_atoms_choose( YR_ATOMS_CONFIG* config, YR_ATOM_TREE_NODE* node, YR_ATOM_LIST_ITEM** chosen_atoms, int* atoms_quality) { YR_ATOM_TREE_NODE* child; YR_ATOM_LIST_ITEM* item; YR_ATOM_LIST_ITEM* tail; int shift, quality; int max_quality = YR_MIN_ATOM_QUALITY; int min_quality = YR_MAX_ATOM_QUALITY; *chosen_atoms = NULL; *atoms_quality = YR_MIN_ATOM_QUALITY; switch (node->type) { case ATOM_TREE_LEAF: item = (YR_ATOM_LIST_ITEM*) yr_malloc(sizeof(YR_ATOM_LIST_ITEM)); if (item == NULL) return ERROR_INSUFFICIENT_MEMORY; memcpy(&item->atom, &node->atom, sizeof(YR_ATOM)); shift = _yr_atoms_trim(&item->atom); if (item->atom.length > 0) { item->forward_code = node->re_nodes[shift]->forward_code; item->backward_code = node->re_nodes[shift]->backward_code; item->backtrack = 0; item->next = NULL; *chosen_atoms = item; *atoms_quality = config->get_atom_quality(config, &item->atom); } else { yr_free(item); } break; case ATOM_TREE_OR: // The choosen nodes are those coming from the highest quality child. child = node->children_head; while (child != NULL) { FAIL_ON_ERROR(_yr_atoms_choose(config, child, &item, &quality)); if (quality > max_quality) { max_quality = quality; yr_atoms_list_destroy(*chosen_atoms); *chosen_atoms = item; } else { yr_atoms_list_destroy(item); } if (max_quality == YR_MAX_ATOM_QUALITY) break; child = child->next_sibling; } *atoms_quality = max_quality; break; case ATOM_TREE_AND: // The choosen nodes are the concatenation of the the nodes choosen from // all the children. child = node->children_head; while (child != NULL) { FAIL_ON_ERROR(_yr_atoms_choose(config, child, &item, &quality)); if (quality < min_quality) min_quality = quality; if (item != NULL) { tail = item; while (tail->next != NULL) tail = tail->next; tail->next = *chosen_atoms; *chosen_atoms = item; } child = child->next_sibling; } *atoms_quality = min_quality; break; } return ERROR_SUCCESS; }