int yr_parser_emit_with_arg_reloc( yyscan_t yyscanner, int8_t instruction, int64_t argument, int8_t** instruction_address) { void* ptr; int result = yr_arena_write_data( yyget_extra(yyscanner)->code_arena, &instruction, sizeof(int8_t), (void**) instruction_address); if (result == ERROR_SUCCESS) result = yr_arena_write_data( yyget_extra(yyscanner)->code_arena, &argument, sizeof(int64_t), &ptr); if (result == ERROR_SUCCESS) result = yr_arena_make_relocatable( yyget_extra(yyscanner)->code_arena, ptr, 0, EOL); return result; }
// rl_input puts upto MAX characters into BUF with the number put in // BUF placed in *RESULT. static void lexer_input (char *buf, unsigned long int *result, int max, void* yyscanner) { if (yyget_in(yyscanner) != rl_instream) { //not stdin so read as usual while ( (*result = read( fileno(yyget_in(yyscanner)), buf, max )) < 0 ) { if (errno != EINTR) { std::cerr << "read() in flex scanner failed" << std::endl; exit (1); } } return; } if (rl_len == 0) { //Do we need a new string? if (rl_start) { free(rl_start); } unsigned short scopes = yyget_extra(yyscanner)->indents.size(); std::string prompt = "sugar"; if(yyget_extra(yyscanner)->pendingEndInstr){ prompt += "*"; } if(scopes > 1){ prompt += "("+std::to_string(scopes)+")"; } prompt += "> "; #if SHELL_USE_COLOR prompt = "\x1b[33m"+prompt+"\x1b[0m"; #endif rl_start = readline (prompt.c_str()); if (rl_start == NULL) { //end of file *result = 0; rl_len = 0; return; } rl_line = rl_start; rl_len = strlen (rl_line)+1; if (rl_len != 1) { add_history (rl_line); } rl_line[rl_len-1] = '\n'; fflush (stdout); } if (rl_len <= max) { strncpy (buf, rl_line, rl_len); *result = rl_len; rl_len = 0; } else { strncpy (buf, rl_line, max); *result = max; rl_line += max; rl_len -= max; } }
YR_STRING* yr_parser_lookup_string( yyscan_t yyscanner, const char* identifier) { YR_STRING* string; YR_COMPILER* compiler = yyget_extra(yyscanner); string = compiler->current_rule_strings; while(!STRING_IS_NULL(string)) { // If some string $a gets fragmented into multiple chained // strings, all those fragments have the same $a identifier // but we are interested in the heading fragment, which is // that with chained_to == NULL if (strcmp(string->identifier, identifier) == 0 && string->chained_to == NULL) { return string; } string = yr_arena_next_address( compiler->strings_arena, string, sizeof(YR_STRING)); } yr_compiler_set_error_extra_info(compiler, identifier); compiler->last_result = ERROR_UNDEFINED_STRING; return NULL; }
YR_EXTERNAL_VARIABLE* yr_parser_lookup_external_variable( yyscan_t yyscanner, const char* identifier) { YR_EXTERNAL_VARIABLE* external; YR_COMPILER* compiler = yyget_extra(yyscanner); int i; external = (YR_EXTERNAL_VARIABLE*) yr_arena_base_address( compiler->externals_arena); for (i = 0; i < compiler->externals_count; i++) { if (strcmp(external->identifier, identifier) == 0) return external; external = yr_arena_next_address( compiler->externals_arena, external, sizeof(YR_EXTERNAL_VARIABLE)); } yr_compiler_set_error_extra_info(compiler, identifier); compiler->last_result = ERROR_UNDEFINED_IDENTIFIER; return NULL; }
YR_STRING* yr_parser_lookup_string( yyscan_t yyscanner, const char* identifier) { YR_STRING* string; YR_COMPILER* compiler = yyget_extra(yyscanner); string = compiler->current_rule_strings; while(!STRING_IS_NULL(string)) { if (strcmp(string->identifier, identifier) == 0) return string; string = yr_arena_next_address( compiler->strings_arena, string, sizeof(YR_STRING)); } yr_compiler_set_error_extra_info(compiler, identifier); compiler->last_result = ERROR_UNDEFINED_STRING; return NULL; }
int yr_parser_emit_pushes_for_strings( yyscan_t yyscanner, const char* identifier) { YR_COMPILER* compiler = yyget_extra(yyscanner); YR_STRING* string = compiler->current_rule->strings; const char* string_identifier; const char* target_identifier; int matching = 0; while(!STRING_IS_NULL(string)) { // Don't generate pushes for strings chained to another one, we are // only interested in non-chained strings or the head of the chain. if (string->chained_to == NULL) { string_identifier = string->identifier; target_identifier = identifier; while (*target_identifier != '\0' && *string_identifier != '\0' && *target_identifier == *string_identifier) { target_identifier++; string_identifier++; } if ((*target_identifier == '\0' && *string_identifier == '\0') || *target_identifier == '*') { yr_parser_emit_with_arg_reloc( yyscanner, OP_PUSH, PTR_TO_INT64(string), NULL, NULL); string->g_flags |= STRING_GFLAGS_REFERENCED; string->g_flags &= ~STRING_GFLAGS_FIXED_OFFSET; matching++; } } string = (YR_STRING*) yr_arena_next_address( compiler->strings_arena, string, sizeof(YR_STRING)); } if (matching == 0) { yr_compiler_set_error_extra_info(compiler, identifier); compiler->last_result = ERROR_UNDEFINED_STRING; } return compiler->last_result; }
int yr_parser_reduce_import( yyscan_t yyscanner, SIZED_STRING* module_name) { YR_COMPILER* compiler = yyget_extra(yyscanner); YR_OBJECT* module_structure; char* name; module_structure = (YR_OBJECT*) yr_hash_table_lookup( compiler->objects_table, module_name->c_string, compiler->current_namespace->name); // if module already imported, do nothing if (module_structure != NULL) return ERROR_SUCCESS; compiler->last_result = yr_object_create( OBJECT_TYPE_STRUCTURE, module_name->c_string, NULL, &module_structure); if (compiler->last_result == ERROR_SUCCESS) compiler->last_result = yr_hash_table_add( compiler->objects_table, module_name->c_string, compiler->current_namespace->name, module_structure); if (compiler->last_result == ERROR_SUCCESS) { compiler->last_result = yr_modules_do_declarations( module_name->c_string, module_structure); if (compiler->last_result == ERROR_UNKNOWN_MODULE) yr_compiler_set_error_extra_info(compiler, module_name->c_string); } if (compiler->last_result == ERROR_SUCCESS) compiler->last_result = yr_arena_write_string( compiler->sz_arena, module_name->c_string, &name); if (compiler->last_result == ERROR_SUCCESS) compiler->last_result = yr_parser_emit_with_arg_reloc( yyscanner, OP_IMPORT, PTR_TO_INT64(name), NULL, NULL); return compiler->last_result; }
void yyerror(yyscan_t scanner, const char *error) { if(xml_output) { printf("\t\t<issue line=\"%d\" char=\"\" issue=\"%s\" evidence=\"\"/>\n", yyget_lineno(scanner), error); } else { fprintf(stderr, "%s: %s at line %d\n", (char *) yyget_extra(scanner), error, yyget_lineno(scanner)); } }
int yr_parser_emit( yyscan_t yyscanner, int8_t instruction, int8_t** instruction_address) { return yr_arena_write_data( yyget_extra(yyscanner)->code_arena, &instruction, sizeof(int8_t), (void**) instruction_address); }
int yr_parser_reduce_rule_declaration_phase_2( yyscan_t yyscanner, YR_RULE* rule) { uint32_t max_strings_per_rule; uint32_t strings_in_rule = 0; YR_COMPILER* compiler = yyget_extra(yyscanner); // Check for unreferenced (unused) strings. YR_STRING* string = rule->strings; yr_get_configuration( YR_CONFIG_MAX_STRINGS_PER_RULE, (void*) &max_strings_per_rule); while (!STRING_IS_NULL(string)) { // Only the heading fragment in a chain of strings (the one with // chained_to == NULL) must be referenced. All other fragments // are never marked as referenced. if (!STRING_IS_REFERENCED(string) && string->chained_to == NULL) { yr_compiler_set_error_extra_info(compiler, string->identifier); compiler->last_result = ERROR_UNREFERENCED_STRING; return compiler->last_result; } strings_in_rule++; if (strings_in_rule > max_strings_per_rule) { yr_compiler_set_error_extra_info(compiler, rule->identifier); compiler->last_result = ERROR_TOO_MANY_STRINGS; return compiler->last_result; } string = (YR_STRING*) yr_arena_next_address( compiler->strings_arena, string, sizeof(YR_STRING)); } compiler->last_result = yr_parser_emit_with_arg_reloc( yyscanner, OP_MATCH_RULE, rule, NULL, NULL); return compiler->last_result; }
int yr_parser_emit_with_arg( yyscan_t yyscanner, int8_t instruction, int64_t argument, int8_t** instruction_address) { int result = yr_arena_write_data( yyget_extra(yyscanner)->code_arena, &instruction, sizeof(int8_t), (void**) instruction_address); if (result == ERROR_SUCCESS) result = yr_arena_write_data( yyget_extra(yyscanner)->code_arena, &argument, sizeof(int64_t), NULL); return result; }
int yr_parser_emit_with_arg_reloc( yyscan_t yyscanner, uint8_t instruction, void* argument, uint8_t** instruction_address, void** argument_address) { int64_t* ptr = NULL; int result; DECLARE_REFERENCE(void*, ptr) arg; memset(&arg, 0, sizeof(arg)); arg.ptr = argument; result = yr_arena_write_data( yyget_extra(yyscanner)->code_arena, &instruction, sizeof(uint8_t), (void**) instruction_address); if (result == ERROR_SUCCESS) result = yr_arena_write_data( yyget_extra(yyscanner)->code_arena, &arg, sizeof(arg), (void**) &ptr); if (result == ERROR_SUCCESS) result = yr_arena_make_ptr_relocatable( yyget_extra(yyscanner)->code_arena, ptr, 0, EOL); if (argument_address != NULL) *argument_address = (void*) ptr; return result; }
int yr_parser_emit_with_arg_double( yyscan_t yyscanner, uint8_t instruction, double argument, uint8_t** instruction_address, double** argument_address) { int result = yr_arena_write_data( yyget_extra(yyscanner)->code_arena, &instruction, sizeof(uint8_t), (void**) instruction_address); if (result == ERROR_SUCCESS) result = yr_arena_write_data( yyget_extra(yyscanner)->code_arena, &argument, sizeof(double), (void**) argument_address); return result; }
int yr_parser_lookup_loop_variable( yyscan_t yyscanner, const char* identifier) { YR_COMPILER* compiler = yyget_extra(yyscanner); int i; for (i = 0; i < compiler->loop_depth; i++) { if (strcmp(identifier, compiler->loop_identifier[i]) == 0) return i; } return -1; }
int yr_parser_reduce_external( yyscan_t yyscanner, const char* identifier, int8_t instruction) { YR_COMPILER* compiler = yyget_extra(yyscanner); YR_EXTERNAL_VARIABLE* external; external = yr_parser_lookup_external_variable(yyscanner, identifier); if (external != NULL) { if (instruction == EXT_BOOL) { compiler->last_result = yr_parser_emit_with_arg_reloc( yyscanner, EXT_BOOL, PTR_TO_UINT64(external), NULL); } else if (instruction == EXT_INT && external->type == EXTERNAL_VARIABLE_TYPE_INTEGER) { compiler->last_result = yr_parser_emit_with_arg_reloc( yyscanner, EXT_INT, PTR_TO_UINT64(external), NULL); } else if (instruction == EXT_STR && external->type == EXTERNAL_VARIABLE_TYPE_FIXED_STRING) { compiler->last_result = yr_parser_emit_with_arg_reloc( yyscanner, EXT_STR, PTR_TO_UINT64(external), NULL); } else { yr_compiler_set_error_extra_info(compiler, external->identifier); compiler->last_result = ERROR_INCORRECT_VARIABLE_TYPE; } } return compiler->last_result; }
YR_META* yr_parser_reduce_meta_declaration( yyscan_t yyscanner, int32_t type, const char* identifier, const char* string, int32_t integer) { YR_COMPILER* compiler = yyget_extra(yyscanner); YR_META* meta; compiler->last_result = yr_arena_allocate_struct( compiler->metas_arena, sizeof(YR_META), (void**) &meta, offsetof(YR_META, identifier), offsetof(YR_META, string), EOL); if (compiler->last_result != ERROR_SUCCESS) return NULL; compiler->last_result = yr_arena_write_string( compiler->sz_arena, identifier, &meta->identifier); if (compiler->last_result != ERROR_SUCCESS) return NULL; if (string != NULL) compiler->last_result = yr_arena_write_string( compiler->sz_arena, string, &meta->string); else meta->string = NULL; if (compiler->last_result != ERROR_SUCCESS) return NULL; meta->integer = integer; meta->type = type; return meta; }
int yr_parser_reduce_meta_declaration( yyscan_t yyscanner, int32_t type, const char* identifier, const char* string, int64_t integer, YR_META** meta) { YR_COMPILER* compiler = yyget_extra(yyscanner); FAIL_ON_ERROR(yr_arena_allocate_struct( compiler->metas_arena, sizeof(YR_META), (void**) meta, offsetof(YR_META, identifier), offsetof(YR_META, string), EOL)); FAIL_ON_ERROR(yr_arena_write_string( compiler->sz_arena, identifier, (char**) &(*meta)->identifier)); if (string != NULL) { FAIL_ON_ERROR(yr_arena_write_string( compiler->sz_arena, string, &(*meta)->string)); } else { (*meta)->string = NULL; } (*meta)->integer = integer; (*meta)->type = type; return ERROR_SUCCESS; }
void yr_parser_emit_pushes_for_strings( yyscan_t yyscanner, const char* identifier) { YR_COMPILER* compiler = yyget_extra(yyscanner); YR_STRING* string = compiler->current_rule_strings; const char* string_identifier; const char* target_identifier; while(!STRING_IS_NULL(string)) { string_identifier = string->identifier; target_identifier = identifier; while (*target_identifier != '\0' && *string_identifier != '\0' && *target_identifier == *string_identifier) { target_identifier++; string_identifier++; } if ((*target_identifier == '\0' && *string_identifier == '\0') || *target_identifier == '*') { yr_parser_emit_with_arg_reloc( yyscanner, PUSH, PTR_TO_UINT64(string), NULL); string->g_flags |= STRING_GFLAGS_REFERENCED; } string = yr_arena_next_address( compiler->strings_arena, string, sizeof(YR_STRING)); } }
int yr_parser_reduce_string_identifier( yyscan_t yyscanner, const char* identifier, int8_t instruction) { YR_STRING* string; YR_COMPILER* compiler = yyget_extra(yyscanner); if (strcmp(identifier, "$") == 0) { if (compiler->loop_depth > 0) { yr_parser_emit_with_arg( yyscanner, PUSH_M, LOOP_LOCAL_VARS * (compiler->loop_depth - 1), NULL); yr_parser_emit(yyscanner, instruction, NULL); if (instruction != SFOUND) { string = compiler->current_rule_strings; while(!STRING_IS_NULL(string)) { string->g_flags &= ~STRING_GFLAGS_SINGLE_MATCH; string = yr_arena_next_address( compiler->strings_arena, string, sizeof(YR_STRING)); } } } else { compiler->last_result = ERROR_MISPLACED_ANONYMOUS_STRING; } } else { string = yr_parser_lookup_string(yyscanner, identifier); if (string != NULL) { yr_parser_emit_with_arg_reloc( yyscanner, PUSH, PTR_TO_UINT64(string), NULL); if (instruction != SFOUND) string->g_flags &= ~STRING_GFLAGS_SINGLE_MATCH; yr_parser_emit(yyscanner, instruction, NULL); string->g_flags |= STRING_GFLAGS_REFERENCED; } } return compiler->last_result; }
int yr_parser_reduce_rule_declaration( yyscan_t yyscanner, int32_t flags, const char* identifier, char* tags, YR_STRING* strings, YR_META* metas) { YR_COMPILER* compiler = yyget_extra(yyscanner); YR_RULE* rule; YR_STRING* string; if (yr_hash_table_lookup( compiler->rules_table, identifier, compiler->current_namespace->name) != NULL) { // A rule with the same identifier already exists, return the // appropriate error. yr_compiler_set_error_extra_info(compiler, identifier); compiler->last_result = ERROR_DUPLICATE_RULE_IDENTIFIER; return compiler->last_result; } // Check for unreferenced (unused) strings. string = compiler->current_rule_strings; while(!STRING_IS_NULL(string)) { // Only the heading fragment in a chain of strings (the one with // chained_to == NULL) must be referenced. All other fragments // are never marked as referenced. if (!STRING_IS_REFERENCED(string) && string->chained_to == NULL) { yr_compiler_set_error_extra_info(compiler, string->identifier); compiler->last_result = ERROR_UNREFERENCED_STRING; break; } string = yr_arena_next_address( compiler->strings_arena, string, sizeof(YR_STRING)); } if (compiler->last_result != ERROR_SUCCESS) return compiler->last_result; compiler->last_result = yr_arena_allocate_struct( compiler->rules_arena, sizeof(YR_RULE), (void**) &rule, offsetof(YR_RULE, identifier), offsetof(YR_RULE, tags), offsetof(YR_RULE, strings), offsetof(YR_RULE, metas), offsetof(YR_RULE, ns), EOL); if (compiler->last_result != ERROR_SUCCESS) return compiler->last_result; compiler->last_result = yr_arena_write_string( compiler->sz_arena, identifier, &rule->identifier); if (compiler->last_result != ERROR_SUCCESS) return compiler->last_result; compiler->last_result = yr_parser_emit_with_arg_reloc( yyscanner, RULE_POP, PTR_TO_UINT64(rule), NULL); if (compiler->last_result != ERROR_SUCCESS) return compiler->last_result; rule->g_flags = flags | compiler->current_rule_flags; rule->tags = tags; rule->strings = strings; rule->metas = metas; rule->ns = compiler->current_namespace; compiler->current_rule_flags = 0; compiler->current_rule_strings = NULL; yr_hash_table_add( compiler->rules_table, identifier, compiler->current_namespace->name, (void*) rule); return compiler->last_result; }
YR_STRING* yr_parser_reduce_string_declaration( yyscan_t yyscanner, int32_t flags, const char* identifier, SIZED_STRING* str) { int i; int error_offset; int min_atom_length; char* file_name; char message[512]; YR_STRING* string; YR_AC_MATCH* new_match; ATOM_TREE* atom_tree; YR_ATOM_LIST_ITEM* atom; YR_ATOM_LIST_ITEM* atom_list = NULL; RE* re = NULL; uint8_t* literal_string; int literal_string_len; int max_string_len; YR_COMPILER* compiler = yyget_extra(yyscanner); compiler->last_result = yr_arena_allocate_struct( compiler->strings_arena, sizeof(YR_STRING), (void**) &string, offsetof(YR_STRING, identifier), offsetof(YR_STRING, string), EOL); if (compiler->last_result != ERROR_SUCCESS) return NULL; compiler->last_result = yr_arena_write_string( compiler->sz_arena, identifier, &string->identifier); if (compiler->last_result != ERROR_SUCCESS) return NULL; if (strcmp(identifier,"$") == 0) flags |= STRING_GFLAGS_ANONYMOUS; if (!(flags & STRING_GFLAGS_WIDE)) flags |= STRING_GFLAGS_ASCII; // The STRING_GFLAGS_SINGLE_MATCH flag indicates that finding // a single match for the string is enough. This is true in // most cases, except when the string count (#) and string offset (@) // operators are used. All strings are marked STRING_FLAGS_SINGLE_MATCH // initially, and unmarked later if required. flags |= STRING_GFLAGS_SINGLE_MATCH; string->g_flags = flags; memset(string->matches, 0, sizeof(string->matches)); if (flags & STRING_GFLAGS_HEXADECIMAL || flags & STRING_GFLAGS_REGEXP) { if (flags & STRING_GFLAGS_HEXADECIMAL) compiler->last_result = yr_re_compile_hex( str->c_string, &re); else compiler->last_result = yr_re_compile( str->c_string, &re); if (compiler->last_result != ERROR_SUCCESS) { snprintf( message, sizeof(message), "invalid %s in string \"%s\": %s", (flags & STRING_GFLAGS_HEXADECIMAL) ? "hex string" : "regular expression", identifier, re->error_message); yr_compiler_set_error_extra_info(compiler, message); string = NULL; goto _exit; } if (re->flags & RE_FLAGS_START_ANCHORED) string->g_flags |= STRING_GFLAGS_START_ANCHORED; if (re->flags & RE_FLAGS_END_ANCHORED) string->g_flags |= STRING_GFLAGS_END_ANCHORED; if (re->flags & RE_FLAGS_FAST_HEX_REGEXP) string->g_flags |= STRING_GFLAGS_FAST_HEX_REGEXP; if (re->flags & RE_FLAGS_LITERAL_STRING) { string->g_flags |= STRING_GFLAGS_LITERAL; literal_string = re->literal_string; literal_string_len = re->literal_string_len; compiler->last_result = yr_atoms_extract_from_string( literal_string, literal_string_len, string->g_flags, &atom_list); } else { compiler->last_result = yr_re_emit_code( re, compiler->re_code_arena); if (compiler->last_result != ERROR_SUCCESS) { string = NULL; goto _exit; } compiler->last_result = yr_atoms_extract_from_re( re, string->g_flags, &atom_list); } } else { string->g_flags |= STRING_GFLAGS_LITERAL; literal_string = (uint8_t*) str->c_string; literal_string_len = str->length; compiler->last_result = yr_atoms_extract_from_string( literal_string, literal_string_len, string->g_flags, &atom_list); } if (compiler->last_result != ERROR_SUCCESS) { string = NULL; goto _exit; } if (STRING_IS_LITERAL(string)) { compiler->last_result = yr_arena_write_data( compiler->sz_arena, literal_string, literal_string_len, (void*) &string->string); if (compiler->last_result != ERROR_SUCCESS) { string = NULL; goto _exit; } string->length = literal_string_len; } // Add the string to Aho-Corasick automaton. if (atom_list != NULL) { compiler->last_result = yr_ac_add_string( compiler->automaton_arena, compiler->automaton, string, atom_list); } else { compiler->last_result = yr_arena_allocate_struct( compiler->automaton_arena, sizeof(YR_AC_MATCH), (void**) &new_match, offsetof(YR_AC_MATCH, string), offsetof(YR_AC_MATCH, forward_code), offsetof(YR_AC_MATCH, backward_code), offsetof(YR_AC_MATCH, next), EOL); if (compiler->last_result == ERROR_SUCCESS) { new_match->backtrack = 0; new_match->string = string; new_match->forward_code = re->root_node->forward_code; new_match->backward_code = NULL; new_match->next = compiler->automaton->root->matches; compiler->automaton->root->matches = new_match; } } atom = atom_list; if (atom != NULL) min_atom_length = MAX_ATOM_LENGTH; else min_atom_length = 0; while (atom != NULL) { if (atom->atom_length < min_atom_length) min_atom_length = atom->atom_length; atom = atom->next; } if (STRING_IS_LITERAL(string)) { if (STRING_IS_WIDE(string)) max_string_len = string->length * 2; else max_string_len = string->length; if (max_string_len == min_atom_length) string->g_flags |= STRING_GFLAGS_FITS_IN_ATOM; } if (compiler->file_name_stack_ptr > 0) file_name = compiler->file_name_stack[compiler->file_name_stack_ptr - 1]; else file_name = NULL; if (min_atom_length < 2 && compiler->error_report_function != NULL) { snprintf( message, sizeof(message), "%s is slowing down scanning%s", string->identifier, min_atom_length == 0 ? " (critical!)" : ""); compiler->error_report_function( YARA_ERROR_LEVEL_WARNING, file_name, yyget_lineno(yyscanner), message); } if (compiler->last_result != ERROR_SUCCESS) string = NULL; _exit: if (atom_list != NULL) yr_atoms_list_destroy(atom_list); if (re != NULL) yr_re_destroy(re); return string; }
YR_STRING* yr_parser_reduce_string_declaration( yyscan_t yyscanner, int32_t flags, const char* identifier, SIZED_STRING* str) { int min_atom_length; int min_atom_length_aux; int32_t min_gap; int32_t max_gap; char* file_name; char message[512]; YR_COMPILER* compiler = yyget_extra(yyscanner); YR_STRING* string = NULL; YR_STRING* aux_string; YR_STRING* prev_string; RE* re = NULL; RE* remainder_re; if (strcmp(identifier,"$") == 0) flags |= STRING_GFLAGS_ANONYMOUS; if (!(flags & STRING_GFLAGS_WIDE)) flags |= STRING_GFLAGS_ASCII; if (str->flags & SIZED_STRING_FLAGS_NO_CASE) flags |= STRING_GFLAGS_NO_CASE; if (str->flags & SIZED_STRING_FLAGS_DOT_ALL) flags |= STRING_GFLAGS_REGEXP_DOT_ALL; // The STRING_GFLAGS_SINGLE_MATCH flag indicates that finding // a single match for the string is enough. This is true in // most cases, except when the string count (#) and string offset (@) // operators are used. All strings are marked STRING_FLAGS_SINGLE_MATCH // initially, and unmarked later if required. flags |= STRING_GFLAGS_SINGLE_MATCH; if (flags & STRING_GFLAGS_HEXADECIMAL || flags & STRING_GFLAGS_REGEXP) { if (flags & STRING_GFLAGS_HEXADECIMAL) compiler->last_result = yr_re_compile_hex( str->c_string, &re); else compiler->last_result = yr_re_compile( str->c_string, &re); if (compiler->last_result != ERROR_SUCCESS) { snprintf( message, sizeof(message), "invalid %s \"%s\": %s", (flags & STRING_GFLAGS_HEXADECIMAL) ? "hex string" : "regular expression", identifier, re->error_message); yr_compiler_set_error_extra_info( compiler, message); goto _exit; } if (re->flags & RE_FLAGS_FAST_HEX_REGEXP) flags |= STRING_GFLAGS_FAST_HEX_REGEXP; compiler->last_result = yr_re_split_at_chaining_point( re, &re, &remainder_re, &min_gap, &max_gap); if (compiler->last_result != ERROR_SUCCESS) goto _exit; compiler->last_result = _yr_parser_write_string( identifier, flags, compiler, NULL, re, &string, &min_atom_length); if (compiler->last_result != ERROR_SUCCESS) goto _exit; if (remainder_re != NULL) { string->g_flags |= STRING_GFLAGS_CHAIN_TAIL | STRING_GFLAGS_CHAIN_PART; string->chain_gap_min = min_gap; string->chain_gap_max = max_gap; } // Use "aux_string" from now on, we want to keep the value of "string" // because it will returned. aux_string = string; while (remainder_re != NULL) { // Destroy regexp pointed by 're' before yr_re_split_at_jmp // overwrites 're' with another value. yr_re_destroy(re); compiler->last_result = yr_re_split_at_chaining_point( remainder_re, &re, &remainder_re, &min_gap, &max_gap); if (compiler->last_result != ERROR_SUCCESS) goto _exit; prev_string = aux_string; compiler->last_result = _yr_parser_write_string( identifier, flags, compiler, NULL, re, &aux_string, &min_atom_length_aux); if (compiler->last_result != ERROR_SUCCESS) goto _exit; if (min_atom_length_aux < min_atom_length) min_atom_length = min_atom_length_aux; aux_string->g_flags |= STRING_GFLAGS_CHAIN_PART; aux_string->chain_gap_min = min_gap; aux_string->chain_gap_max = max_gap; prev_string->chained_to = aux_string; } } else { compiler->last_result = _yr_parser_write_string( identifier, flags, compiler, str, NULL, &string, &min_atom_length); if (compiler->last_result != ERROR_SUCCESS) goto _exit; } if (compiler->file_name_stack_ptr > 0) file_name = compiler->file_name_stack[compiler->file_name_stack_ptr - 1]; else file_name = NULL; if (min_atom_length < 2 && compiler->error_report_function != NULL) { snprintf( message, sizeof(message), "%s is slowing down scanning%s", string->identifier, min_atom_length == 0 ? " (critical!)" : ""); compiler->error_report_function( YARA_ERROR_LEVEL_WARNING, file_name, yyget_lineno(yyscanner), message); } _exit: if (re != NULL) yr_re_destroy(re); if (compiler->last_result != ERROR_SUCCESS) return NULL; return string; }
int yr_parser_reduce_rule_declaration_phase_1( yyscan_t yyscanner, int32_t flags, const char* identifier, YR_RULE** rule) { YR_FIXUP *fixup; YR_INIT_RULE_ARGS *init_rule_args; YR_COMPILER* compiler = yyget_extra(yyscanner); *rule = NULL; if (yr_hash_table_lookup( compiler->rules_table, identifier, compiler->current_namespace->name) != NULL || yr_hash_table_lookup( compiler->objects_table, identifier, NULL) != NULL) { // A rule or variable with the same identifier already exists, return the // appropriate error. yr_compiler_set_error_extra_info(compiler, identifier); return ERROR_DUPLICATED_IDENTIFIER; } FAIL_ON_ERROR(yr_arena_allocate_struct( compiler->rules_arena, sizeof(YR_RULE), (void**) rule, offsetof(YR_RULE, identifier), offsetof(YR_RULE, tags), offsetof(YR_RULE, strings), offsetof(YR_RULE, metas), offsetof(YR_RULE, ns), EOL)) (*rule)->g_flags = flags; (*rule)->ns = compiler->current_namespace; (*rule)->num_atoms = 0; #ifdef PROFILING_ENABLED (*rule)->time_cost = 0; memset( (*rule)->time_cost_per_thread, 0, sizeof((*rule)->time_cost_per_thread)); #endif FAIL_ON_ERROR(yr_arena_write_string( compiler->sz_arena, identifier, (char**) &(*rule)->identifier)); FAIL_ON_ERROR(yr_parser_emit( yyscanner, OP_INIT_RULE, NULL)); FAIL_ON_ERROR(yr_arena_allocate_struct( compiler->code_arena, sizeof(YR_INIT_RULE_ARGS), (void**) &init_rule_args, offsetof(YR_INIT_RULE_ARGS, rule), offsetof(YR_INIT_RULE_ARGS, jmp_addr), EOL)); init_rule_args->rule = *rule; // jmp_addr holds the address to jump to when we want to skip the code for // the rule. It is iniatialized as NULL at this point because we don't know // the address until emmiting the code for the rule's condition. The address // is set in yr_parser_reduce_rule_declaration_phase_2. init_rule_args->jmp_addr = NULL; // Create a fixup entry for the jump and push it in the stack fixup = (YR_FIXUP*) yr_malloc(sizeof(YR_FIXUP)); if (fixup == NULL) return ERROR_INSUFFICIENT_MEMORY; fixup->address = (void*) &(init_rule_args->jmp_addr); fixup->next = compiler->fixup_stack_head; compiler->fixup_stack_head = fixup; // Clean strings_table as we are starting to parse a new rule. yr_hash_table_clean(compiler->strings_table, NULL); FAIL_ON_ERROR(yr_hash_table_add( compiler->rules_table, identifier, compiler->current_namespace->name, (void*) *rule)); compiler->current_rule = *rule; return ERROR_SUCCESS; }
int yr_parser_reduce_import( yyscan_t yyscanner, SIZED_STRING* module_name) { int result; YR_COMPILER* compiler = yyget_extra(yyscanner); YR_OBJECT* module_structure; char* name; if (!_yr_parser_valid_module_name(module_name)) { yr_compiler_set_error_extra_info(compiler, module_name->c_string); return ERROR_INVALID_MODULE_NAME; } module_structure = (YR_OBJECT*) yr_hash_table_lookup( compiler->objects_table, module_name->c_string, compiler->current_namespace->name); // if module already imported, do nothing if (module_structure != NULL) return ERROR_SUCCESS; FAIL_ON_ERROR(yr_object_create( OBJECT_TYPE_STRUCTURE, module_name->c_string, NULL, &module_structure)); FAIL_ON_ERROR(yr_hash_table_add( compiler->objects_table, module_name->c_string, compiler->current_namespace->name, module_structure)); result = yr_modules_do_declarations( module_name->c_string, module_structure); if (result == ERROR_UNKNOWN_MODULE) yr_compiler_set_error_extra_info(compiler, module_name->c_string); if (result != ERROR_SUCCESS) return result; FAIL_ON_ERROR(yr_arena_write_string( compiler->sz_arena, module_name->c_string, &name)); FAIL_ON_ERROR(yr_parser_emit_with_arg_reloc( yyscanner, OP_IMPORT, name, NULL, NULL)); return ERROR_SUCCESS; }
int yr_parser_reduce_rule_declaration_phase_2( yyscan_t yyscanner, YR_RULE* rule) { uint32_t max_strings_per_rule; uint32_t strings_in_rule = 0; uint8_t* nop_inst_addr = NULL; int result; YR_FIXUP *fixup; YR_STRING* string; YR_COMPILER* compiler = yyget_extra(yyscanner); yr_get_configuration( YR_CONFIG_MAX_STRINGS_PER_RULE, (void*) &max_strings_per_rule); // Show warning if the rule is generating too many atoms. The warning is // shown if the number of atoms is greater than 20 times the maximum number // of strings allowed for a rule, as 20 is minimum number of atoms generated // for a string using *nocase*, *ascii* and *wide* modifiers simultaneosly. if (rule->num_atoms > YR_ATOMS_PER_RULE_WARNING_THRESHOLD) { yywarning( yyscanner, "rule %s is slowing down scanning", rule->identifier); } // Check for unreferenced (unused) strings. string = rule->strings; while (!STRING_IS_NULL(string)) { // Only the heading fragment in a chain of strings (the one with // chained_to == NULL) must be referenced. All other fragments // are never marked as referenced. if (!STRING_IS_REFERENCED(string) && string->chained_to == NULL) { yr_compiler_set_error_extra_info(compiler, string->identifier); return ERROR_UNREFERENCED_STRING; } strings_in_rule++; if (strings_in_rule > max_strings_per_rule) { yr_compiler_set_error_extra_info(compiler, rule->identifier); return ERROR_TOO_MANY_STRINGS; } string = (YR_STRING*) yr_arena_next_address( compiler->strings_arena, string, sizeof(YR_STRING)); } result = yr_parser_emit_with_arg_reloc( yyscanner, OP_MATCH_RULE, rule, NULL, NULL); // Generate a do-nothing instruction (NOP) in order to get its address // and use it as the destination for the OP_INIT_RULE skip jump. We can not // simply use the address of the OP_MATCH_RULE instruction +1 because we // can't be sure that the instruction following the OP_MATCH_RULE is going to // be in the same arena page. As we don't have a reliable way of getting the // address of the next instruction we generate the OP_NOP. if (result == ERROR_SUCCESS) result = yr_parser_emit(yyscanner, OP_NOP, &nop_inst_addr); fixup = compiler->fixup_stack_head; *(void**)(fixup->address) = (void*) nop_inst_addr; compiler->fixup_stack_head = fixup->next; yr_free(fixup); return result; }
int yr_parser_reduce_string_identifier( yyscan_t yyscanner, const char* identifier, uint8_t instruction, uint64_t at_offset) { YR_STRING* string; YR_COMPILER* compiler = yyget_extra(yyscanner); if (strcmp(identifier, "$") == 0) // is an anonymous string ? { if (compiler->loop_for_of_mem_offset >= 0) // inside a loop ? { yr_parser_emit_with_arg( yyscanner, OP_PUSH_M, compiler->loop_for_of_mem_offset, NULL, NULL); yr_parser_emit(yyscanner, instruction, NULL); string = compiler->current_rule->strings; while(!STRING_IS_NULL(string)) { if (instruction != OP_FOUND) string->g_flags &= ~STRING_GFLAGS_SINGLE_MATCH; if (instruction == OP_FOUND_AT) { // Avoid overwriting any previous fixed offset if (string->fixed_offset == UNDEFINED) string->fixed_offset = at_offset; // If a previous fixed offset was different, disable // the STRING_GFLAGS_FIXED_OFFSET flag because we only // have room to store a single fixed offset value if (string->fixed_offset != at_offset) string->g_flags &= ~STRING_GFLAGS_FIXED_OFFSET; } else { string->g_flags &= ~STRING_GFLAGS_FIXED_OFFSET; } string = (YR_STRING*) yr_arena_next_address( compiler->strings_arena, string, sizeof(YR_STRING)); } } else { // Anonymous strings not allowed outside of a loop compiler->last_result = ERROR_MISPLACED_ANONYMOUS_STRING; } } else { string = yr_parser_lookup_string(yyscanner, identifier); if (string != NULL) { yr_parser_emit_with_arg_reloc( yyscanner, OP_PUSH, PTR_TO_INT64(string), NULL, NULL); if (instruction != OP_FOUND) string->g_flags &= ~STRING_GFLAGS_SINGLE_MATCH; if (instruction == OP_FOUND_AT) { // Avoid overwriting any previous fixed offset if (string->fixed_offset == UNDEFINED) string->fixed_offset = at_offset; // If a previous fixed offset was different, disable // the STRING_GFLAGS_FIXED_OFFSET flag because we only // have room to store a single fixed offset value if (string->fixed_offset == UNDEFINED || string->fixed_offset != at_offset) { string->g_flags &= ~STRING_GFLAGS_FIXED_OFFSET; } } else { string->g_flags &= ~STRING_GFLAGS_FIXED_OFFSET; } yr_parser_emit(yyscanner, instruction, NULL); string->g_flags |= STRING_GFLAGS_REFERENCED; } } return compiler->last_result; }
YR_RULE* yr_parser_reduce_rule_declaration_phase_1( yyscan_t yyscanner, int32_t flags, const char* identifier) { YR_COMPILER* compiler = yyget_extra(yyscanner); YR_RULE* rule = NULL; if (yr_hash_table_lookup( compiler->rules_table, identifier, compiler->current_namespace->name) != NULL || yr_hash_table_lookup( compiler->objects_table, identifier, compiler->current_namespace->name) != NULL) { // A rule or variable with the same identifier already exists, return the // appropriate error. yr_compiler_set_error_extra_info(compiler, identifier); compiler->last_result = ERROR_DUPLICATED_IDENTIFIER; return NULL; } compiler->last_result = yr_arena_allocate_struct( compiler->rules_arena, sizeof(YR_RULE), (void**) &rule, offsetof(YR_RULE, identifier), offsetof(YR_RULE, tags), offsetof(YR_RULE, strings), offsetof(YR_RULE, metas), offsetof(YR_RULE, ns), EOL); if (compiler->last_result != ERROR_SUCCESS) return NULL; rule->g_flags = flags; rule->ns = compiler->current_namespace; #ifdef PROFILING_ENABLED rule->clock_ticks = 0; #endif compiler->last_result = yr_arena_write_string( compiler->sz_arena, identifier, (char**) &rule->identifier); if (compiler->last_result != ERROR_SUCCESS) return NULL; compiler->last_result = yr_parser_emit_with_arg_reloc( yyscanner, OP_INIT_RULE, PTR_TO_INT64(rule), NULL, NULL); if (compiler->last_result == ERROR_SUCCESS) compiler->last_result = yr_hash_table_add( compiler->rules_table, identifier, compiler->current_namespace->name, (void*) rule); // Clean strings_table as we are starting to parse a new rule. yr_hash_table_clean(compiler->strings_table, NULL); compiler->current_rule = rule; return rule; }
int yr_parser_reduce_operation( yyscan_t yyscanner, const char* op, EXPRESSION left_operand, EXPRESSION right_operand) { YR_COMPILER* compiler = yyget_extra(yyscanner); if ((left_operand.type == EXPRESSION_TYPE_INTEGER || left_operand.type == EXPRESSION_TYPE_FLOAT) && (right_operand.type == EXPRESSION_TYPE_INTEGER || right_operand.type == EXPRESSION_TYPE_FLOAT)) { if (left_operand.type != right_operand.type) { // One operand is double and the other is integer, // cast the integer to double compiler->last_result = yr_parser_emit_with_arg( yyscanner, OP_INT_TO_DBL, (left_operand.type == EXPRESSION_TYPE_INTEGER) ? 2 : 1, NULL, NULL); } if (compiler->last_result == ERROR_SUCCESS) { int expression_type = EXPRESSION_TYPE_FLOAT; if (left_operand.type == EXPRESSION_TYPE_INTEGER && right_operand.type == EXPRESSION_TYPE_INTEGER) { expression_type = EXPRESSION_TYPE_INTEGER; } compiler->last_result = yr_parser_emit( yyscanner, _yr_parser_operator_to_opcode(op, expression_type), NULL); } } else if (left_operand.type == EXPRESSION_TYPE_STRING && right_operand.type == EXPRESSION_TYPE_STRING) { int opcode = _yr_parser_operator_to_opcode(op, EXPRESSION_TYPE_STRING); if (opcode != OP_ERROR) { compiler->last_result = yr_parser_emit( yyscanner, opcode, NULL); } else { yr_compiler_set_error_extra_info_fmt( compiler, "strings don't support \"%s\" operation", op); compiler->last_result = ERROR_WRONG_TYPE; } } else { yr_compiler_set_error_extra_info(compiler, "type mismatch"); compiler->last_result = ERROR_WRONG_TYPE; } return compiler->last_result; }
YR_STRING* yr_parser_reduce_string_declaration( yyscan_t yyscanner, int32_t string_flags, const char* identifier, SIZED_STRING* str) { int min_atom_quality; int min_atom_quality_aux; int re_flags = 0; int32_t min_gap; int32_t max_gap; char message[512]; YR_COMPILER* compiler = yyget_extra(yyscanner); YR_STRING* string = NULL; YR_STRING* aux_string; YR_STRING* prev_string; RE* re = NULL; RE* remainder_re; RE_ERROR re_error; // Determine if a string with the same identifier was already defined // by searching for the identifier in string_table. string = yr_hash_table_lookup( compiler->strings_table, identifier, NULL); if (string != NULL) { compiler->last_result = ERROR_DUPLICATED_STRING_IDENTIFIER; yr_compiler_set_error_extra_info(compiler, identifier); goto _exit; } // Empty strings are now allowed if (str->length == 0) { compiler->last_result = ERROR_EMPTY_STRING; yr_compiler_set_error_extra_info(compiler, identifier); goto _exit; } if (str->flags & SIZED_STRING_FLAGS_NO_CASE) string_flags |= STRING_GFLAGS_NO_CASE; if (str->flags & SIZED_STRING_FLAGS_DOT_ALL) re_flags |= RE_FLAGS_DOT_ALL; if (strcmp(identifier,"$") == 0) string_flags |= STRING_GFLAGS_ANONYMOUS; if (!(string_flags & STRING_GFLAGS_WIDE)) string_flags |= STRING_GFLAGS_ASCII; if (string_flags & STRING_GFLAGS_NO_CASE) re_flags |= RE_FLAGS_NO_CASE; // The STRING_GFLAGS_SINGLE_MATCH flag indicates that finding // a single match for the string is enough. This is true in // most cases, except when the string count (#) and string offset (@) // operators are used. All strings are marked STRING_FLAGS_SINGLE_MATCH // initially, and unmarked later if required. string_flags |= STRING_GFLAGS_SINGLE_MATCH; // The STRING_GFLAGS_FIXED_OFFSET indicates that the string doesn't // need to be searched all over the file because the user is using the // "at" operator. The string must be searched at a fixed offset in the // file. All strings are marked STRING_GFLAGS_FIXED_OFFSET initially, // and unmarked later if required. string_flags |= STRING_GFLAGS_FIXED_OFFSET; if (string_flags & STRING_GFLAGS_HEXADECIMAL || string_flags & STRING_GFLAGS_REGEXP) { if (string_flags & STRING_GFLAGS_HEXADECIMAL) compiler->last_result = yr_re_parse_hex( str->c_string, re_flags, &re, &re_error); else compiler->last_result = yr_re_parse( str->c_string, re_flags, &re, &re_error); if (compiler->last_result != ERROR_SUCCESS) { snprintf( message, sizeof(message), "invalid %s \"%s\": %s", (string_flags & STRING_GFLAGS_HEXADECIMAL) ? "hex string" : "regular expression", identifier, re_error.message); yr_compiler_set_error_extra_info( compiler, message); goto _exit; } if (re->flags & RE_FLAGS_FAST_HEX_REGEXP) string_flags |= STRING_GFLAGS_FAST_HEX_REGEXP; // Regular expressions in the strings section can't mix greedy and ungreedy // quantifiers like .* and .*?. That's because these regular expressions can // be matched forwards and/or backwards depending on the atom found, and we // need the regexp to be all-greedy or all-ungreedy to be able to properly // calculate the length of the match. if ((re->flags & RE_FLAGS_GREEDY) && (re->flags & RE_FLAGS_UNGREEDY)) { compiler->last_result = ERROR_INVALID_REGULAR_EXPRESSION; yr_compiler_set_error_extra_info(compiler, "greedy and ungreedy quantifiers can't be mixed in a regular " "expression"); goto _exit; } if (re->flags & RE_FLAGS_GREEDY) string_flags |= STRING_GFLAGS_GREEDY_REGEXP; if (yr_re_contains_dot_star(re)) { snprintf( message, sizeof(message), "%s contains .*, consider using .{N} with a reasonable value for N", identifier); yywarning(yyscanner, message); } compiler->last_result = yr_re_split_at_chaining_point( re, &re, &remainder_re, &min_gap, &max_gap); if (compiler->last_result != ERROR_SUCCESS) goto _exit; compiler->last_result = _yr_parser_write_string( identifier, string_flags, compiler, NULL, re, &string, &min_atom_quality); if (compiler->last_result != ERROR_SUCCESS) goto _exit; if (remainder_re != NULL) { string->g_flags |= STRING_GFLAGS_CHAIN_TAIL | STRING_GFLAGS_CHAIN_PART; string->chain_gap_min = min_gap; string->chain_gap_max = max_gap; } // Use "aux_string" from now on, we want to keep the value of "string" // because it will returned. aux_string = string; while (remainder_re != NULL) { // Destroy regexp pointed by 're' before yr_re_split_at_jmp // overwrites 're' with another value. yr_re_destroy(re); compiler->last_result = yr_re_split_at_chaining_point( remainder_re, &re, &remainder_re, &min_gap, &max_gap); if (compiler->last_result != ERROR_SUCCESS) goto _exit; prev_string = aux_string; compiler->last_result = _yr_parser_write_string( identifier, string_flags, compiler, NULL, re, &aux_string, &min_atom_quality_aux); if (compiler->last_result != ERROR_SUCCESS) goto _exit; if (min_atom_quality_aux < min_atom_quality) min_atom_quality = min_atom_quality_aux; aux_string->g_flags |= STRING_GFLAGS_CHAIN_PART; aux_string->chain_gap_min = min_gap; aux_string->chain_gap_max = max_gap; prev_string->chained_to = aux_string; // prev_string is now chained to aux_string, an string chained // to another one can't have a fixed offset, only the head of the // string chain can have a fixed offset. prev_string->g_flags &= ~STRING_GFLAGS_FIXED_OFFSET; } } else { compiler->last_result = _yr_parser_write_string( identifier, string_flags, compiler, str, NULL, &string, &min_atom_quality); if (compiler->last_result != ERROR_SUCCESS) goto _exit; } compiler->last_result = yr_hash_table_add( compiler->strings_table, identifier, NULL, string); if (compiler->last_result != ERROR_SUCCESS) goto _exit; if (min_atom_quality < 3 && compiler->callback != NULL) { snprintf( message, sizeof(message), "%s is slowing down scanning%s", string->identifier, min_atom_quality < 2 ? " (critical!)" : ""); yywarning(yyscanner, message); } _exit: if (re != NULL) yr_re_destroy(re); if (compiler->last_result != ERROR_SUCCESS) return NULL; return string; }