int _yr_parser_write_string( const char* identifier, int flags, YR_COMPILER* compiler, SIZED_STRING* str, RE* re, YR_STRING** string, int* min_atom_length) { SIZED_STRING* literal_string; YR_AC_MATCH* new_match; YR_ATOM_LIST_ITEM* atom; YR_ATOM_LIST_ITEM* atom_list = NULL; int result; int max_string_len; int free_literal = FALSE; *string = NULL; result = yr_arena_allocate_struct( compiler->strings_arena, sizeof(YR_STRING), (void**) string, offsetof(YR_STRING, identifier), offsetof(YR_STRING, string), offsetof(YR_STRING, chained_to), EOL); if (result != ERROR_SUCCESS) return result; result = yr_arena_write_string( compiler->sz_arena, identifier, &(*string)->identifier); if (result != ERROR_SUCCESS) return result; if (flags & STRING_GFLAGS_HEXADECIMAL || flags & STRING_GFLAGS_REGEXP) { literal_string = yr_re_extract_literal(re); if (literal_string != NULL) { flags |= STRING_GFLAGS_LITERAL; free_literal = TRUE; } } else { literal_string = str; flags |= STRING_GFLAGS_LITERAL; } (*string)->g_flags = flags; (*string)->chained_to = NULL; memset((*string)->matches, 0, sizeof((*string)->matches)); memset((*string)->unconfirmed_matches, 0, sizeof((*string)->unconfirmed_matches)); if (flags & STRING_GFLAGS_LITERAL) { (*string)->length = literal_string->length; result = yr_arena_write_data( compiler->sz_arena, literal_string->c_string, literal_string->length, (void*) &(*string)->string); if (result == ERROR_SUCCESS) { result = yr_atoms_extract_from_string( (uint8_t*) literal_string->c_string, literal_string->length, flags, &atom_list); } } else { result = yr_re_emit_code(re, compiler->re_code_arena); if (result == ERROR_SUCCESS) result = yr_atoms_extract_from_re(re, flags, &atom_list); } if (result == ERROR_SUCCESS) { // Add the string to Aho-Corasick automaton. if (atom_list != NULL) { result = yr_ac_add_string( compiler->automaton_arena, compiler->automaton, *string, atom_list); } else { result = yr_arena_allocate_struct( compiler->automaton_arena, sizeof(YR_AC_MATCH), (void**) &new_match, offsetof(YR_AC_MATCH, string), offsetof(YR_AC_MATCH, forward_code), offsetof(YR_AC_MATCH, backward_code), offsetof(YR_AC_MATCH, next), EOL); if (result == ERROR_SUCCESS) { new_match->backtrack = 0; new_match->string = *string; new_match->forward_code = re->root_node->forward_code; new_match->backward_code = NULL; new_match->next = compiler->automaton->root->matches; compiler->automaton->root->matches = new_match; } } } atom = atom_list; if (atom != NULL) *min_atom_length = MAX_ATOM_LENGTH; else *min_atom_length = 0; while (atom != NULL) { if (atom->atom_length < *min_atom_length) *min_atom_length = atom->atom_length; atom = atom->next; } if (flags & STRING_GFLAGS_LITERAL) { if (flags & STRING_GFLAGS_WIDE) max_string_len = (*string)->length * 2; else max_string_len = (*string)->length; if (max_string_len == *min_atom_length) (*string)->g_flags |= STRING_GFLAGS_FITS_IN_ATOM; } if (free_literal) yr_free(literal_string); if (atom_list != NULL) yr_atoms_list_destroy(atom_list); return result; }
int _yr_parser_write_string( const char* identifier, int flags, YR_COMPILER* compiler, SIZED_STRING* str, RE* re, YR_STRING** string, int* min_atom_quality) { SIZED_STRING* literal_string; YR_AC_MATCH* new_match; YR_ATOM_LIST_ITEM* atom_list = NULL; int result; int max_string_len; int free_literal = FALSE; *string = NULL; result = yr_arena_allocate_struct( compiler->strings_arena, sizeof(YR_STRING), (void**) string, offsetof(YR_STRING, identifier), offsetof(YR_STRING, string), offsetof(YR_STRING, chained_to), EOL); if (result != ERROR_SUCCESS) return result; result = yr_arena_write_string( compiler->sz_arena, identifier, &(*string)->identifier); if (result != ERROR_SUCCESS) return result; if (flags & STRING_GFLAGS_HEXADECIMAL || flags & STRING_GFLAGS_REGEXP) { literal_string = yr_re_extract_literal(re); if (literal_string != NULL) { flags |= STRING_GFLAGS_LITERAL; free_literal = TRUE; } else { // Non-literal strings can't be marked as fixed offset because once we // find a string atom in the scanned data we don't know the offset where // the string should start, as the non-literal strings can contain // variable-length portions. flags &= ~STRING_GFLAGS_FIXED_OFFSET; } } else { literal_string = str; flags |= STRING_GFLAGS_LITERAL; } (*string)->g_flags = flags; (*string)->chained_to = NULL; (*string)->fixed_offset = UNDEFINED; #ifdef PROFILING_ENABLED (*string)->clock_ticks = 0; #endif memset((*string)->matches, 0, sizeof((*string)->matches)); memset((*string)->unconfirmed_matches, 0, sizeof((*string)->unconfirmed_matches)); if (flags & STRING_GFLAGS_LITERAL) { (*string)->length = (uint32_t) literal_string->length; result = yr_arena_write_data( compiler->sz_arena, literal_string->c_string, literal_string->length + 1, // +1 to include terminating NULL (void**) &(*string)->string); if (result == ERROR_SUCCESS) { result = yr_atoms_extract_from_string( (uint8_t*) literal_string->c_string, (int32_t) literal_string->length, flags, &atom_list); } } else { result = yr_re_emit_code(re, compiler->re_code_arena); if (result == ERROR_SUCCESS) result = yr_atoms_extract_from_re(re, flags, &atom_list); } if (result == ERROR_SUCCESS) { // Add the string to Aho-Corasick automaton. if (atom_list != NULL) { result = yr_ac_add_string( compiler->automaton_arena, compiler->automaton, *string, atom_list); } else { result = yr_arena_allocate_struct( compiler->automaton_arena, sizeof(YR_AC_MATCH), (void**) &new_match, offsetof(YR_AC_MATCH, string), offsetof(YR_AC_MATCH, forward_code), offsetof(YR_AC_MATCH, backward_code), offsetof(YR_AC_MATCH, next), EOL); if (result == ERROR_SUCCESS) { new_match->backtrack = 0; new_match->string = *string; new_match->forward_code = re->root_node->forward_code; new_match->backward_code = NULL; new_match->next = compiler->automaton->root->matches; compiler->automaton->root->matches = new_match; } } } *min_atom_quality = yr_atoms_min_quality(atom_list); if (flags & STRING_GFLAGS_LITERAL) { if (flags & STRING_GFLAGS_WIDE) max_string_len = (*string)->length * 2; else max_string_len = (*string)->length; if (max_string_len <= MAX_ATOM_LENGTH) (*string)->g_flags |= STRING_GFLAGS_FITS_IN_ATOM; } if (free_literal) yr_free(literal_string); if (atom_list != NULL) yr_atoms_list_destroy(atom_list); return result; }