Beispiel #1
0
int yr_parser_emit_with_arg_reloc(
    yyscan_t yyscanner,
    int8_t instruction,
    int64_t argument,
    int8_t** instruction_address)
{
  void* ptr;

  int result = yr_arena_write_data(
      yyget_extra(yyscanner)->code_arena,
      &instruction,
      sizeof(int8_t),
      (void**) instruction_address);

  if (result == ERROR_SUCCESS)
    result = yr_arena_write_data(
        yyget_extra(yyscanner)->code_arena,
        &argument,
        sizeof(int64_t),
        &ptr);

  if (result == ERROR_SUCCESS)
    result = yr_arena_make_relocatable(
        yyget_extra(yyscanner)->code_arena,
        ptr,
        0,
        EOL);

  return result;
}
Beispiel #2
0
// rl_input puts upto MAX characters into BUF with the number put in
// BUF placed in *RESULT.
static void lexer_input (char *buf, unsigned long int *result, int max, void* yyscanner) {
    if (yyget_in(yyscanner) != rl_instream) { //not stdin so read as usual
        while ( (*result = read( fileno(yyget_in(yyscanner)), buf, max )) < 0 ) {
            if (errno != EINTR) {
                std::cerr << "read() in flex scanner failed" << std::endl;
                exit (1);
            }
        }
        return;
    }

    if (rl_len == 0) { //Do we need a new string?
        if (rl_start) {
            free(rl_start);
        }
        unsigned short scopes = yyget_extra(yyscanner)->indents.size();
        std::string prompt = "sugar";
        if(yyget_extra(yyscanner)->pendingEndInstr){
            prompt += "*";
        }
        if(scopes > 1){
            prompt += "("+std::to_string(scopes)+")";
        }
        prompt += "> ";
#if SHELL_USE_COLOR
        prompt = "\x1b[33m"+prompt+"\x1b[0m";
#endif
        rl_start = readline (prompt.c_str());
        if (rl_start == NULL) { //end of file
            *result = 0;
            rl_len = 0;
            return;
        }
        rl_line = rl_start;
        rl_len = strlen (rl_line)+1;
        if (rl_len != 1) {
            add_history (rl_line);
        }
        rl_line[rl_len-1] = '\n';
        fflush (stdout);
    }

    if (rl_len <= max) {
        strncpy (buf, rl_line, rl_len);
        *result = rl_len;
        rl_len = 0;
    }
    else {
        strncpy (buf, rl_line, max);
        *result = max;
        rl_line += max;
        rl_len -= max;
    }
}
Beispiel #3
0
YR_STRING* yr_parser_lookup_string(
    yyscan_t yyscanner,
    const char* identifier)
{
  YR_STRING* string;
  YR_COMPILER* compiler = yyget_extra(yyscanner);

  string = compiler->current_rule_strings;

  while(!STRING_IS_NULL(string))
  {
    // If some string $a gets fragmented into multiple chained
    // strings, all those fragments have the same $a identifier
    // but we are interested in the heading fragment, which is
    // that with chained_to == NULL

    if (strcmp(string->identifier, identifier) == 0 &&
        string->chained_to == NULL)
    {
      return string;
    }

    string = yr_arena_next_address(
        compiler->strings_arena,
        string,
        sizeof(YR_STRING));
  }

  yr_compiler_set_error_extra_info(compiler, identifier);
  compiler->last_result = ERROR_UNDEFINED_STRING;

  return NULL;
}
Beispiel #4
0
YR_EXTERNAL_VARIABLE* yr_parser_lookup_external_variable(
    yyscan_t yyscanner,
    const char* identifier)
{
  YR_EXTERNAL_VARIABLE* external;
  YR_COMPILER* compiler = yyget_extra(yyscanner);
  int i;

  external = (YR_EXTERNAL_VARIABLE*) yr_arena_base_address(
      compiler->externals_arena);

  for (i = 0; i < compiler->externals_count; i++)
  {
    if (strcmp(external->identifier, identifier) == 0)
      return external;

    external = yr_arena_next_address(
        compiler->externals_arena,
        external,
        sizeof(YR_EXTERNAL_VARIABLE));
  }

  yr_compiler_set_error_extra_info(compiler, identifier);
  compiler->last_result = ERROR_UNDEFINED_IDENTIFIER;

  return NULL;
}
Beispiel #5
0
YR_STRING* yr_parser_lookup_string(
    yyscan_t yyscanner,
    const char* identifier)
{
  YR_STRING* string;
  YR_COMPILER* compiler = yyget_extra(yyscanner);

  string = compiler->current_rule_strings;

  while(!STRING_IS_NULL(string))
  {
    if (strcmp(string->identifier, identifier) == 0)
      return string;

    string = yr_arena_next_address(
        compiler->strings_arena,
        string,
        sizeof(YR_STRING));
  }

  yr_compiler_set_error_extra_info(compiler, identifier);
  compiler->last_result = ERROR_UNDEFINED_STRING;

  return NULL;
}
Beispiel #6
0
int yr_parser_emit_pushes_for_strings(
    yyscan_t yyscanner,
    const char* identifier)
{
  YR_COMPILER* compiler = yyget_extra(yyscanner);
  YR_STRING* string = compiler->current_rule->strings;

  const char* string_identifier;
  const char* target_identifier;

  int matching = 0;

  while(!STRING_IS_NULL(string))
  {
    // Don't generate pushes for strings chained to another one, we are
    // only interested in non-chained strings or the head of the chain.

    if (string->chained_to == NULL)
    {
      string_identifier = string->identifier;
      target_identifier = identifier;

      while (*target_identifier != '\0' &&
             *string_identifier != '\0' &&
             *target_identifier == *string_identifier)
      {
        target_identifier++;
        string_identifier++;
      }

      if ((*target_identifier == '\0' && *string_identifier == '\0') ||
           *target_identifier == '*')
      {
        yr_parser_emit_with_arg_reloc(
            yyscanner,
            OP_PUSH,
            PTR_TO_INT64(string),
            NULL,
            NULL);

        string->g_flags |= STRING_GFLAGS_REFERENCED;
        string->g_flags &= ~STRING_GFLAGS_FIXED_OFFSET;
        matching++;
      }
    }

    string = (YR_STRING*) yr_arena_next_address(
        compiler->strings_arena,
        string,
        sizeof(YR_STRING));
  }

  if (matching == 0)
  {
    yr_compiler_set_error_extra_info(compiler, identifier);
    compiler->last_result = ERROR_UNDEFINED_STRING;
  }

  return compiler->last_result;
}
Beispiel #7
0
int yr_parser_reduce_import(
    yyscan_t yyscanner,
    SIZED_STRING* module_name)
{
  YR_COMPILER* compiler = yyget_extra(yyscanner);
  YR_OBJECT* module_structure;

  char* name;

  module_structure = (YR_OBJECT*) yr_hash_table_lookup(
      compiler->objects_table,
      module_name->c_string,
      compiler->current_namespace->name);

  // if module already imported, do nothing

  if (module_structure != NULL)
    return ERROR_SUCCESS;

  compiler->last_result = yr_object_create(
      OBJECT_TYPE_STRUCTURE,
      module_name->c_string,
      NULL,
      &module_structure);

  if (compiler->last_result == ERROR_SUCCESS)
    compiler->last_result = yr_hash_table_add(
        compiler->objects_table,
        module_name->c_string,
        compiler->current_namespace->name,
        module_structure);

  if (compiler->last_result == ERROR_SUCCESS)
  {
    compiler->last_result = yr_modules_do_declarations(
        module_name->c_string,
        module_structure);

    if (compiler->last_result == ERROR_UNKNOWN_MODULE)
      yr_compiler_set_error_extra_info(compiler, module_name->c_string);
  }

  if (compiler->last_result == ERROR_SUCCESS)
    compiler->last_result = yr_arena_write_string(
        compiler->sz_arena,
        module_name->c_string,
        &name);

  if (compiler->last_result == ERROR_SUCCESS)
    compiler->last_result = yr_parser_emit_with_arg_reloc(
        yyscanner,
        OP_IMPORT,
        PTR_TO_INT64(name),
        NULL,
        NULL);

  return compiler->last_result;
}
Beispiel #8
0
void yyerror(yyscan_t scanner, const char *error)
{
	if(xml_output) {
		printf("\t\t<issue line=\"%d\" char=\"\" issue=\"%s\" evidence=\"\"/>\n",
		       yyget_lineno(scanner), error);
	} else {
		fprintf(stderr, "%s: %s at line %d\n",
			(char *) yyget_extra(scanner),
			error, yyget_lineno(scanner));
	}
}
Beispiel #9
0
int yr_parser_emit(
    yyscan_t yyscanner,
    int8_t instruction,
    int8_t** instruction_address)
{
  return yr_arena_write_data(
      yyget_extra(yyscanner)->code_arena,
      &instruction,
      sizeof(int8_t),
      (void**) instruction_address);
}
Beispiel #10
0
int yr_parser_reduce_rule_declaration_phase_2(
    yyscan_t yyscanner,
    YR_RULE* rule)
{
  uint32_t max_strings_per_rule;
  uint32_t strings_in_rule = 0;

  YR_COMPILER* compiler = yyget_extra(yyscanner);

  // Check for unreferenced (unused) strings.

  YR_STRING* string = rule->strings;

  yr_get_configuration(
      YR_CONFIG_MAX_STRINGS_PER_RULE,
      (void*) &max_strings_per_rule);

  while (!STRING_IS_NULL(string))
  {
    // Only the heading fragment in a chain of strings (the one with
    // chained_to == NULL) must be referenced. All other fragments
    // are never marked as referenced.

    if (!STRING_IS_REFERENCED(string) &&
        string->chained_to == NULL)
    {
      yr_compiler_set_error_extra_info(compiler, string->identifier);
      compiler->last_result = ERROR_UNREFERENCED_STRING;
      return compiler->last_result;
    }

    strings_in_rule++;

    if (strings_in_rule > max_strings_per_rule) {
      yr_compiler_set_error_extra_info(compiler, rule->identifier);
      compiler->last_result = ERROR_TOO_MANY_STRINGS;
      return compiler->last_result;
    }

    string = (YR_STRING*) yr_arena_next_address(
        compiler->strings_arena,
        string,
        sizeof(YR_STRING));
  }

  compiler->last_result = yr_parser_emit_with_arg_reloc(
      yyscanner,
      OP_MATCH_RULE,
      rule,
      NULL,
      NULL);

  return compiler->last_result;
}
Beispiel #11
0
int yr_parser_emit_with_arg(
    yyscan_t yyscanner,
    int8_t instruction,
    int64_t argument,
    int8_t** instruction_address)
{
  int result = yr_arena_write_data(
      yyget_extra(yyscanner)->code_arena,
      &instruction,
      sizeof(int8_t),
      (void**) instruction_address);

  if (result == ERROR_SUCCESS)
    result = yr_arena_write_data(
        yyget_extra(yyscanner)->code_arena,
        &argument,
        sizeof(int64_t),
        NULL);

  return result;
}
Beispiel #12
0
int yr_parser_emit_with_arg_reloc(
    yyscan_t yyscanner,
    uint8_t instruction,
    void* argument,
    uint8_t** instruction_address,
    void** argument_address)
{
  int64_t* ptr = NULL;
  int result;

  DECLARE_REFERENCE(void*, ptr) arg;

  memset(&arg, 0, sizeof(arg));
  arg.ptr = argument;

  result = yr_arena_write_data(
      yyget_extra(yyscanner)->code_arena,
      &instruction,
      sizeof(uint8_t),
      (void**) instruction_address);

  if (result == ERROR_SUCCESS)
    result = yr_arena_write_data(
        yyget_extra(yyscanner)->code_arena,
        &arg,
        sizeof(arg),
        (void**) &ptr);

  if (result == ERROR_SUCCESS)
    result = yr_arena_make_ptr_relocatable(
        yyget_extra(yyscanner)->code_arena,
        ptr,
        0,
        EOL);

  if (argument_address != NULL)
    *argument_address = (void*) ptr;

  return result;
}
Beispiel #13
0
int yr_parser_emit_with_arg_double(
    yyscan_t yyscanner,
    uint8_t instruction,
    double argument,
    uint8_t** instruction_address,
    double** argument_address)
{
  int result = yr_arena_write_data(
      yyget_extra(yyscanner)->code_arena,
      &instruction,
      sizeof(uint8_t),
      (void**) instruction_address);

  if (result == ERROR_SUCCESS)
    result = yr_arena_write_data(
        yyget_extra(yyscanner)->code_arena,
        &argument,
        sizeof(double),
        (void**) argument_address);

  return result;
}
Beispiel #14
0
int yr_parser_lookup_loop_variable(
    yyscan_t yyscanner,
    const char* identifier)
{
  YR_COMPILER* compiler = yyget_extra(yyscanner);
  int i;

  for (i = 0; i < compiler->loop_depth; i++)
  {
    if (strcmp(identifier, compiler->loop_identifier[i]) == 0)
      return i;
  }

  return -1;
}
Beispiel #15
0
int yr_parser_reduce_external(
  yyscan_t yyscanner,
  const char* identifier,
  int8_t instruction)
{
  YR_COMPILER* compiler = yyget_extra(yyscanner);
  YR_EXTERNAL_VARIABLE* external;

  external = yr_parser_lookup_external_variable(yyscanner, identifier);

  if (external != NULL)
  {
    if (instruction == EXT_BOOL)
    {
      compiler->last_result = yr_parser_emit_with_arg_reloc(
          yyscanner,
          EXT_BOOL,
          PTR_TO_UINT64(external),
          NULL);
    }
    else if (instruction == EXT_INT &&
             external->type == EXTERNAL_VARIABLE_TYPE_INTEGER)
    {
      compiler->last_result = yr_parser_emit_with_arg_reloc(
          yyscanner,
          EXT_INT,
          PTR_TO_UINT64(external),
          NULL);
    }
    else if (instruction == EXT_STR &&
             external->type == EXTERNAL_VARIABLE_TYPE_FIXED_STRING)
    {
      compiler->last_result = yr_parser_emit_with_arg_reloc(
          yyscanner,
          EXT_STR,
          PTR_TO_UINT64(external),
          NULL);
    }
    else
    {
      yr_compiler_set_error_extra_info(compiler, external->identifier);
      compiler->last_result = ERROR_INCORRECT_VARIABLE_TYPE;
    }
  }

  return compiler->last_result;
}
Beispiel #16
0
YR_META* yr_parser_reduce_meta_declaration(
    yyscan_t yyscanner,
    int32_t type,
    const char* identifier,
    const char* string,
    int32_t integer)
{
  YR_COMPILER* compiler = yyget_extra(yyscanner);
  YR_META* meta;

  compiler->last_result = yr_arena_allocate_struct(
      compiler->metas_arena,
      sizeof(YR_META),
      (void**) &meta,
      offsetof(YR_META, identifier),
      offsetof(YR_META, string),
      EOL);

  if (compiler->last_result != ERROR_SUCCESS)
    return NULL;

  compiler->last_result = yr_arena_write_string(
      compiler->sz_arena,
      identifier,
      &meta->identifier);

  if (compiler->last_result != ERROR_SUCCESS)
    return NULL;

  if (string != NULL)
    compiler->last_result = yr_arena_write_string(
        compiler->sz_arena,
        string,
        &meta->string);
  else
    meta->string = NULL;

  if (compiler->last_result != ERROR_SUCCESS)
    return NULL;

  meta->integer = integer;
  meta->type = type;

  return meta;
}
Beispiel #17
0
int yr_parser_reduce_meta_declaration(
    yyscan_t yyscanner,
    int32_t type,
    const char* identifier,
    const char* string,
    int64_t integer,
    YR_META** meta)
{
  YR_COMPILER* compiler = yyget_extra(yyscanner);

  FAIL_ON_ERROR(yr_arena_allocate_struct(
      compiler->metas_arena,
      sizeof(YR_META),
      (void**) meta,
      offsetof(YR_META, identifier),
      offsetof(YR_META, string),
      EOL));

  FAIL_ON_ERROR(yr_arena_write_string(
      compiler->sz_arena,
      identifier,
      (char**) &(*meta)->identifier));

  if (string != NULL)
  {
    FAIL_ON_ERROR(yr_arena_write_string(
        compiler->sz_arena,
        string,
        &(*meta)->string));
  }
  else
  {
    (*meta)->string = NULL;
  }

  (*meta)->integer = integer;
  (*meta)->type = type;

  return ERROR_SUCCESS;
}
Beispiel #18
0
void yr_parser_emit_pushes_for_strings(
    yyscan_t yyscanner,
    const char* identifier)
{
  YR_COMPILER* compiler = yyget_extra(yyscanner);
  YR_STRING* string = compiler->current_rule_strings;
  const char* string_identifier;
  const char* target_identifier;

  while(!STRING_IS_NULL(string))
  {
    string_identifier = string->identifier;
    target_identifier = identifier;

    while (*target_identifier != '\0' &&
           *string_identifier != '\0' &&
           *target_identifier == *string_identifier)
    {
      target_identifier++;
      string_identifier++;
    }

    if ((*target_identifier == '\0' && *string_identifier == '\0') ||
         *target_identifier == '*')
    {
      yr_parser_emit_with_arg_reloc(
          yyscanner,
          PUSH,
          PTR_TO_UINT64(string),
          NULL);

      string->g_flags |= STRING_GFLAGS_REFERENCED;
    }

    string = yr_arena_next_address(
        compiler->strings_arena,
        string,
        sizeof(YR_STRING));
  }
}
Beispiel #19
0
int yr_parser_reduce_string_identifier(
    yyscan_t yyscanner,
    const char* identifier,
    int8_t instruction)
{
  YR_STRING* string;
  YR_COMPILER* compiler = yyget_extra(yyscanner);

  if (strcmp(identifier, "$") == 0)
  {
    if (compiler->loop_depth > 0)
    {
      yr_parser_emit_with_arg(
          yyscanner,
          PUSH_M,
          LOOP_LOCAL_VARS * (compiler->loop_depth - 1),
          NULL);

      yr_parser_emit(yyscanner, instruction, NULL);

      if (instruction != SFOUND)
      {
        string = compiler->current_rule_strings;

        while(!STRING_IS_NULL(string))
        {
          string->g_flags &= ~STRING_GFLAGS_SINGLE_MATCH;
          string = yr_arena_next_address(
              compiler->strings_arena,
              string,
              sizeof(YR_STRING));
        }
      }
    }
    else
    {
      compiler->last_result = ERROR_MISPLACED_ANONYMOUS_STRING;
    }
  }
  else
  {
    string = yr_parser_lookup_string(yyscanner, identifier);

    if (string != NULL)
    {
      yr_parser_emit_with_arg_reloc(
          yyscanner,
          PUSH,
          PTR_TO_UINT64(string),
          NULL);

      if (instruction != SFOUND)
        string->g_flags &= ~STRING_GFLAGS_SINGLE_MATCH;

      yr_parser_emit(yyscanner, instruction, NULL);

      string->g_flags |= STRING_GFLAGS_REFERENCED;
    }
  }

  return compiler->last_result;
}
Beispiel #20
0
int yr_parser_reduce_rule_declaration(
    yyscan_t yyscanner,
    int32_t flags,
    const char* identifier,
    char* tags,
    YR_STRING* strings,
    YR_META* metas)
{
  YR_COMPILER* compiler = yyget_extra(yyscanner);
  YR_RULE* rule;
  YR_STRING* string;

  if (yr_hash_table_lookup(
        compiler->rules_table,
        identifier,
        compiler->current_namespace->name) != NULL)
  {
    // A rule with the same identifier already exists, return the
    // appropriate error.

    yr_compiler_set_error_extra_info(compiler, identifier);
    compiler->last_result = ERROR_DUPLICATE_RULE_IDENTIFIER;
    return compiler->last_result;
  }

  // Check for unreferenced (unused) strings.

  string = compiler->current_rule_strings;

  while(!STRING_IS_NULL(string))
  {
    // Only the heading fragment in a chain of strings (the one with
    // chained_to == NULL) must be referenced. All other fragments
    // are never marked as referenced.

    if (!STRING_IS_REFERENCED(string) &&
        string->chained_to == NULL)
    {
      yr_compiler_set_error_extra_info(compiler, string->identifier);
      compiler->last_result = ERROR_UNREFERENCED_STRING;
      break;
    }

    string = yr_arena_next_address(
        compiler->strings_arena,
        string,
        sizeof(YR_STRING));
  }

  if (compiler->last_result != ERROR_SUCCESS)
    return compiler->last_result;

  compiler->last_result = yr_arena_allocate_struct(
      compiler->rules_arena,
      sizeof(YR_RULE),
      (void**) &rule,
      offsetof(YR_RULE, identifier),
      offsetof(YR_RULE, tags),
      offsetof(YR_RULE, strings),
      offsetof(YR_RULE, metas),
      offsetof(YR_RULE, ns),
      EOL);

  if (compiler->last_result != ERROR_SUCCESS)
    return compiler->last_result;

  compiler->last_result = yr_arena_write_string(
      compiler->sz_arena,
      identifier,
      &rule->identifier);

  if (compiler->last_result != ERROR_SUCCESS)
    return compiler->last_result;

  compiler->last_result = yr_parser_emit_with_arg_reloc(
      yyscanner,
      RULE_POP,
      PTR_TO_UINT64(rule),
      NULL);

  if (compiler->last_result != ERROR_SUCCESS)
    return compiler->last_result;

  rule->g_flags = flags | compiler->current_rule_flags;
  rule->tags = tags;
  rule->strings = strings;
  rule->metas = metas;
  rule->ns = compiler->current_namespace;

  compiler->current_rule_flags = 0;
  compiler->current_rule_strings = NULL;

  yr_hash_table_add(
      compiler->rules_table,
      identifier,
      compiler->current_namespace->name,
      (void*) rule);

  return compiler->last_result;
}
Beispiel #21
0
YR_STRING* yr_parser_reduce_string_declaration(
    yyscan_t yyscanner,
    int32_t flags,
    const char* identifier,
    SIZED_STRING* str)
{
  int i;
  int error_offset;
  int min_atom_length;
  char* file_name;
  char message[512];

  YR_STRING* string;
  YR_AC_MATCH* new_match;
  ATOM_TREE* atom_tree;
  YR_ATOM_LIST_ITEM* atom;
  YR_ATOM_LIST_ITEM* atom_list = NULL;
  RE* re = NULL;

  uint8_t* literal_string;

  int literal_string_len;
  int max_string_len;

  YR_COMPILER* compiler = yyget_extra(yyscanner);

  compiler->last_result = yr_arena_allocate_struct(
      compiler->strings_arena,
      sizeof(YR_STRING),
      (void**) &string,
      offsetof(YR_STRING, identifier),
      offsetof(YR_STRING, string),
      EOL);

  if (compiler->last_result != ERROR_SUCCESS)
    return NULL;

  compiler->last_result = yr_arena_write_string(
      compiler->sz_arena,
      identifier,
      &string->identifier);

  if (compiler->last_result != ERROR_SUCCESS)
    return NULL;

  if (strcmp(identifier,"$") == 0)
    flags |= STRING_GFLAGS_ANONYMOUS;

  if (!(flags & STRING_GFLAGS_WIDE))
    flags |= STRING_GFLAGS_ASCII;

  // The STRING_GFLAGS_SINGLE_MATCH flag indicates that finding
  // a single match for the string is enough. This is true in
  // most cases, except when the string count (#) and string offset (@)
  // operators are used. All strings are marked STRING_FLAGS_SINGLE_MATCH
  // initially, and unmarked later if required.

  flags |= STRING_GFLAGS_SINGLE_MATCH;

  string->g_flags = flags;

  memset(string->matches, 0, sizeof(string->matches));

  if (flags & STRING_GFLAGS_HEXADECIMAL ||
      flags & STRING_GFLAGS_REGEXP)
  {
    if (flags & STRING_GFLAGS_HEXADECIMAL)
      compiler->last_result = yr_re_compile_hex(
          str->c_string, &re);
    else
      compiler->last_result = yr_re_compile(
          str->c_string, &re);

    if (compiler->last_result != ERROR_SUCCESS)
    {
      snprintf(
          message,
          sizeof(message),
          "invalid %s in string \"%s\": %s",
          (flags & STRING_GFLAGS_HEXADECIMAL) ?
              "hex string" : "regular expression",
          identifier,
          re->error_message);

      yr_compiler_set_error_extra_info(compiler, message);
      string = NULL;
      goto _exit;
    }

    if (re->flags & RE_FLAGS_START_ANCHORED)
      string->g_flags |= STRING_GFLAGS_START_ANCHORED;

    if (re->flags & RE_FLAGS_END_ANCHORED)
      string->g_flags |= STRING_GFLAGS_END_ANCHORED;

    if (re->flags & RE_FLAGS_FAST_HEX_REGEXP)
      string->g_flags |= STRING_GFLAGS_FAST_HEX_REGEXP;

    if (re->flags & RE_FLAGS_LITERAL_STRING)
    {
      string->g_flags |= STRING_GFLAGS_LITERAL;
      literal_string = re->literal_string;
      literal_string_len = re->literal_string_len;

      compiler->last_result = yr_atoms_extract_from_string(
          literal_string, literal_string_len, string->g_flags, &atom_list);
    }
    else
    {
      compiler->last_result = yr_re_emit_code(
          re, compiler->re_code_arena);

      if (compiler->last_result != ERROR_SUCCESS)
      {
        string = NULL;
        goto _exit;
      }

      compiler->last_result = yr_atoms_extract_from_re(
          re, string->g_flags, &atom_list);
    }
  }
  else
  {
    string->g_flags |= STRING_GFLAGS_LITERAL;
    literal_string = (uint8_t*) str->c_string;
    literal_string_len = str->length;

    compiler->last_result  = yr_atoms_extract_from_string(
        literal_string, literal_string_len, string->g_flags, &atom_list);
  }

  if (compiler->last_result != ERROR_SUCCESS)
  {
    string = NULL;
    goto _exit;
  }

  if (STRING_IS_LITERAL(string))
  {
    compiler->last_result = yr_arena_write_data(
        compiler->sz_arena,
        literal_string,
        literal_string_len,
        (void*) &string->string);

    if (compiler->last_result != ERROR_SUCCESS)
    {
      string = NULL;
      goto _exit;
    }

    string->length = literal_string_len;
  }

  // Add the string to Aho-Corasick automaton.

  if (atom_list != NULL)
  {
    compiler->last_result = yr_ac_add_string(
      compiler->automaton_arena,
      compiler->automaton,
      string,
      atom_list);
  }
  else
  {
    compiler->last_result = yr_arena_allocate_struct(
        compiler->automaton_arena,
        sizeof(YR_AC_MATCH),
        (void**) &new_match,
        offsetof(YR_AC_MATCH, string),
        offsetof(YR_AC_MATCH, forward_code),
        offsetof(YR_AC_MATCH, backward_code),
        offsetof(YR_AC_MATCH, next),
        EOL);

    if (compiler->last_result == ERROR_SUCCESS)
    {
      new_match->backtrack = 0;
      new_match->string = string;
      new_match->forward_code = re->root_node->forward_code;
      new_match->backward_code = NULL;
      new_match->next = compiler->automaton->root->matches;
      compiler->automaton->root->matches = new_match;
    }
  }

  atom = atom_list;

  if (atom != NULL)
    min_atom_length = MAX_ATOM_LENGTH;
  else
    min_atom_length = 0;

  while (atom != NULL)
  {
    if (atom->atom_length < min_atom_length)
      min_atom_length = atom->atom_length;
    atom = atom->next;
  }

  if (STRING_IS_LITERAL(string))
  {
    if (STRING_IS_WIDE(string))
      max_string_len = string->length * 2;
    else
      max_string_len = string->length;

    if (max_string_len == min_atom_length)
      string->g_flags |= STRING_GFLAGS_FITS_IN_ATOM;
  }

  if (compiler->file_name_stack_ptr > 0)
    file_name = compiler->file_name_stack[compiler->file_name_stack_ptr - 1];
  else
    file_name = NULL;

  if (min_atom_length < 2 && compiler->error_report_function != NULL)
  {
    snprintf(
        message,
        sizeof(message),
        "%s is slowing down scanning%s",
        string->identifier,
        min_atom_length == 0 ? " (critical!)" : "");

    compiler->error_report_function(
        YARA_ERROR_LEVEL_WARNING,
        file_name,
        yyget_lineno(yyscanner),
        message);
  }

  if (compiler->last_result != ERROR_SUCCESS)
    string = NULL;

_exit:

  if (atom_list != NULL)
    yr_atoms_list_destroy(atom_list);

  if (re != NULL)
    yr_re_destroy(re);

  return string;
}
Beispiel #22
0
YR_STRING* yr_parser_reduce_string_declaration(
    yyscan_t yyscanner,
    int32_t flags,
    const char* identifier,
    SIZED_STRING* str)
{
  int min_atom_length;
  int min_atom_length_aux;

  int32_t min_gap;
  int32_t max_gap;

  char* file_name;
  char message[512];

  YR_COMPILER* compiler = yyget_extra(yyscanner);
  YR_STRING* string = NULL;
  YR_STRING* aux_string;
  YR_STRING* prev_string;

  RE* re = NULL;
  RE* remainder_re;

  if (strcmp(identifier,"$") == 0)
    flags |= STRING_GFLAGS_ANONYMOUS;

  if (!(flags & STRING_GFLAGS_WIDE))
    flags |= STRING_GFLAGS_ASCII;

  if (str->flags & SIZED_STRING_FLAGS_NO_CASE)
    flags |= STRING_GFLAGS_NO_CASE;

  if (str->flags & SIZED_STRING_FLAGS_DOT_ALL)
    flags |= STRING_GFLAGS_REGEXP_DOT_ALL;

  // The STRING_GFLAGS_SINGLE_MATCH flag indicates that finding
  // a single match for the string is enough. This is true in
  // most cases, except when the string count (#) and string offset (@)
  // operators are used. All strings are marked STRING_FLAGS_SINGLE_MATCH
  // initially, and unmarked later if required.

  flags |= STRING_GFLAGS_SINGLE_MATCH;

  if (flags & STRING_GFLAGS_HEXADECIMAL ||
      flags & STRING_GFLAGS_REGEXP)
  {
    if (flags & STRING_GFLAGS_HEXADECIMAL)
      compiler->last_result = yr_re_compile_hex(
          str->c_string, &re);
    else
      compiler->last_result = yr_re_compile(
          str->c_string, &re);

    if (compiler->last_result != ERROR_SUCCESS)
    {
      snprintf(
          message,
          sizeof(message),
          "invalid %s \"%s\": %s",
          (flags & STRING_GFLAGS_HEXADECIMAL) ?
              "hex string" : "regular expression",
          identifier,
          re->error_message);

      yr_compiler_set_error_extra_info(
          compiler, message);

      goto _exit;
    }

    if (re->flags & RE_FLAGS_FAST_HEX_REGEXP)
      flags |= STRING_GFLAGS_FAST_HEX_REGEXP;

    compiler->last_result = yr_re_split_at_chaining_point(
        re, &re, &remainder_re, &min_gap, &max_gap);

    if (compiler->last_result != ERROR_SUCCESS)
      goto _exit;

    compiler->last_result = _yr_parser_write_string(
        identifier,
        flags,
        compiler,
        NULL,
        re,
        &string,
        &min_atom_length);

    if (compiler->last_result != ERROR_SUCCESS)
      goto _exit;

    if (remainder_re != NULL)
    {
      string->g_flags |= STRING_GFLAGS_CHAIN_TAIL | STRING_GFLAGS_CHAIN_PART;
      string->chain_gap_min = min_gap;
      string->chain_gap_max = max_gap;
    }

    // Use "aux_string" from now on, we want to keep the value of "string"
    // because it will returned.

    aux_string = string;

    while (remainder_re != NULL)
    {
      // Destroy regexp pointed by 're' before yr_re_split_at_jmp
      // overwrites 're' with another value.

      yr_re_destroy(re);

      compiler->last_result = yr_re_split_at_chaining_point(
          remainder_re, &re, &remainder_re, &min_gap, &max_gap);

      if (compiler->last_result != ERROR_SUCCESS)
        goto _exit;

      prev_string = aux_string;

      compiler->last_result = _yr_parser_write_string(
          identifier,
          flags,
          compiler,
          NULL,
          re,
          &aux_string,
          &min_atom_length_aux);

      if (compiler->last_result != ERROR_SUCCESS)
        goto _exit;

      if (min_atom_length_aux < min_atom_length)
        min_atom_length = min_atom_length_aux;

      aux_string->g_flags |= STRING_GFLAGS_CHAIN_PART;
      aux_string->chain_gap_min = min_gap;
      aux_string->chain_gap_max = max_gap;

      prev_string->chained_to = aux_string;
    }
  }
  else
  {
    compiler->last_result = _yr_parser_write_string(
        identifier,
        flags,
        compiler,
        str,
        NULL,
        &string,
        &min_atom_length);

    if (compiler->last_result != ERROR_SUCCESS)
      goto _exit;
  }

  if (compiler->file_name_stack_ptr > 0)
    file_name = compiler->file_name_stack[compiler->file_name_stack_ptr - 1];
  else
    file_name = NULL;

  if (min_atom_length < 2 && compiler->error_report_function != NULL)
  {
    snprintf(
        message,
        sizeof(message),
        "%s is slowing down scanning%s",
        string->identifier,
        min_atom_length == 0 ? " (critical!)" : "");

    compiler->error_report_function(
        YARA_ERROR_LEVEL_WARNING,
        file_name,
        yyget_lineno(yyscanner),
        message);
  }

_exit:

  if (re != NULL)
    yr_re_destroy(re);

  if (compiler->last_result != ERROR_SUCCESS)
    return NULL;

  return string;
}
Beispiel #23
0
int yr_parser_reduce_rule_declaration_phase_1(
    yyscan_t yyscanner,
    int32_t flags,
    const char* identifier,
    YR_RULE** rule)
{
  YR_FIXUP *fixup;
  YR_INIT_RULE_ARGS *init_rule_args;
  YR_COMPILER* compiler = yyget_extra(yyscanner);

  *rule = NULL;

  if (yr_hash_table_lookup(
        compiler->rules_table,
        identifier,
        compiler->current_namespace->name) != NULL ||
      yr_hash_table_lookup(
        compiler->objects_table,
        identifier,
        NULL) != NULL)
  {
    // A rule or variable with the same identifier already exists, return the
    // appropriate error.

    yr_compiler_set_error_extra_info(compiler, identifier);
    return ERROR_DUPLICATED_IDENTIFIER;
  }

  FAIL_ON_ERROR(yr_arena_allocate_struct(
      compiler->rules_arena,
      sizeof(YR_RULE),
      (void**) rule,
      offsetof(YR_RULE, identifier),
      offsetof(YR_RULE, tags),
      offsetof(YR_RULE, strings),
      offsetof(YR_RULE, metas),
      offsetof(YR_RULE, ns),
      EOL))

  (*rule)->g_flags = flags;
  (*rule)->ns = compiler->current_namespace;
  (*rule)->num_atoms = 0;

  #ifdef PROFILING_ENABLED
  (*rule)->time_cost = 0;

  memset(
      (*rule)->time_cost_per_thread, 0, sizeof((*rule)->time_cost_per_thread));
  #endif

  FAIL_ON_ERROR(yr_arena_write_string(
      compiler->sz_arena,
      identifier,
      (char**) &(*rule)->identifier));

  FAIL_ON_ERROR(yr_parser_emit(
      yyscanner,
      OP_INIT_RULE,
      NULL));

  FAIL_ON_ERROR(yr_arena_allocate_struct(
      compiler->code_arena,
      sizeof(YR_INIT_RULE_ARGS),
      (void**) &init_rule_args,
      offsetof(YR_INIT_RULE_ARGS, rule),
      offsetof(YR_INIT_RULE_ARGS, jmp_addr),
      EOL));

  init_rule_args->rule = *rule;

  // jmp_addr holds the address to jump to when we want to skip the code for
  // the rule. It is iniatialized as NULL at this point because we don't know
  // the address until emmiting the code for the rule's condition. The address
  // is set in yr_parser_reduce_rule_declaration_phase_2.
  init_rule_args->jmp_addr = NULL;

  // Create a fixup entry for the jump and push it in the stack
  fixup = (YR_FIXUP*) yr_malloc(sizeof(YR_FIXUP));

  if (fixup == NULL)
    return ERROR_INSUFFICIENT_MEMORY;

  fixup->address = (void*) &(init_rule_args->jmp_addr);
  fixup->next = compiler->fixup_stack_head;
  compiler->fixup_stack_head = fixup;

  // Clean strings_table as we are starting to parse a new rule.
  yr_hash_table_clean(compiler->strings_table, NULL);

  FAIL_ON_ERROR(yr_hash_table_add(
      compiler->rules_table,
      identifier,
      compiler->current_namespace->name,
      (void*) *rule));

  compiler->current_rule = *rule;

  return ERROR_SUCCESS;
}
Beispiel #24
0
int yr_parser_reduce_import(
    yyscan_t yyscanner,
    SIZED_STRING* module_name)
{
  int result;

  YR_COMPILER* compiler = yyget_extra(yyscanner);
  YR_OBJECT* module_structure;

  char* name;

  if (!_yr_parser_valid_module_name(module_name))
  {
    yr_compiler_set_error_extra_info(compiler, module_name->c_string);
    return ERROR_INVALID_MODULE_NAME;
  }

  module_structure = (YR_OBJECT*) yr_hash_table_lookup(
      compiler->objects_table,
      module_name->c_string,
      compiler->current_namespace->name);

  // if module already imported, do nothing

  if (module_structure != NULL)
    return ERROR_SUCCESS;

  FAIL_ON_ERROR(yr_object_create(
      OBJECT_TYPE_STRUCTURE,
      module_name->c_string,
      NULL,
      &module_structure));

  FAIL_ON_ERROR(yr_hash_table_add(
      compiler->objects_table,
      module_name->c_string,
      compiler->current_namespace->name,
      module_structure));

  result = yr_modules_do_declarations(
      module_name->c_string,
      module_structure);

  if (result == ERROR_UNKNOWN_MODULE)
    yr_compiler_set_error_extra_info(compiler, module_name->c_string);

  if (result != ERROR_SUCCESS)
    return result;

  FAIL_ON_ERROR(yr_arena_write_string(
      compiler->sz_arena,
      module_name->c_string,
      &name));

  FAIL_ON_ERROR(yr_parser_emit_with_arg_reloc(
        yyscanner,
        OP_IMPORT,
        name,
        NULL,
        NULL));

  return ERROR_SUCCESS;
}
Beispiel #25
0
int yr_parser_reduce_rule_declaration_phase_2(
    yyscan_t yyscanner,
    YR_RULE* rule)
{
  uint32_t max_strings_per_rule;
  uint32_t strings_in_rule = 0;
  uint8_t* nop_inst_addr = NULL;

  int result;

  YR_FIXUP *fixup;
  YR_STRING* string;
  YR_COMPILER* compiler = yyget_extra(yyscanner);

  yr_get_configuration(
      YR_CONFIG_MAX_STRINGS_PER_RULE,
      (void*) &max_strings_per_rule);

  // Show warning if the rule is generating too many atoms. The warning is
  // shown if the number of atoms is greater than 20 times the maximum number
  // of strings allowed for a rule, as 20 is minimum number of atoms generated
  // for a string using *nocase*, *ascii* and *wide* modifiers simultaneosly.

  if (rule->num_atoms > YR_ATOMS_PER_RULE_WARNING_THRESHOLD)
  {
    yywarning(
        yyscanner,
        "rule %s is slowing down scanning",
        rule->identifier);
  }

  // Check for unreferenced (unused) strings.
  string = rule->strings;

  while (!STRING_IS_NULL(string))
  {
    // Only the heading fragment in a chain of strings (the one with
    // chained_to == NULL) must be referenced. All other fragments
    // are never marked as referenced.

    if (!STRING_IS_REFERENCED(string) &&
        string->chained_to == NULL)
    {
      yr_compiler_set_error_extra_info(compiler, string->identifier);
      return ERROR_UNREFERENCED_STRING;
    }

    strings_in_rule++;

    if (strings_in_rule > max_strings_per_rule)
    {
      yr_compiler_set_error_extra_info(compiler, rule->identifier);
      return ERROR_TOO_MANY_STRINGS;
    }

    string = (YR_STRING*) yr_arena_next_address(
        compiler->strings_arena,
        string,
        sizeof(YR_STRING));
  }

  result = yr_parser_emit_with_arg_reloc(
      yyscanner,
      OP_MATCH_RULE,
      rule,
      NULL,
      NULL);

  // Generate a do-nothing instruction (NOP) in order to get its address
  // and use it as the destination for the OP_INIT_RULE skip jump. We can not
  // simply use the address of the OP_MATCH_RULE instruction +1 because we
  // can't be sure that the instruction following the OP_MATCH_RULE is going to
  // be in the same arena page. As we don't have a reliable way of getting the
  // address of the next instruction we generate the OP_NOP.

  if (result == ERROR_SUCCESS)
    result = yr_parser_emit(yyscanner, OP_NOP, &nop_inst_addr);

  fixup = compiler->fixup_stack_head;
  *(void**)(fixup->address) = (void*) nop_inst_addr;
  compiler->fixup_stack_head = fixup->next;
  yr_free(fixup);

  return result;
}
Beispiel #26
0
int yr_parser_reduce_string_identifier(
    yyscan_t yyscanner,
    const char* identifier,
    uint8_t instruction,
    uint64_t at_offset)
{
  YR_STRING* string;
  YR_COMPILER* compiler = yyget_extra(yyscanner);

  if (strcmp(identifier, "$") == 0) // is an anonymous string ?
  {
    if (compiler->loop_for_of_mem_offset >= 0) // inside a loop ?
    {
      yr_parser_emit_with_arg(
          yyscanner,
          OP_PUSH_M,
          compiler->loop_for_of_mem_offset,
          NULL,
          NULL);

      yr_parser_emit(yyscanner, instruction, NULL);

      string = compiler->current_rule->strings;

      while(!STRING_IS_NULL(string))
      {
        if (instruction != OP_FOUND)
          string->g_flags &= ~STRING_GFLAGS_SINGLE_MATCH;

        if (instruction == OP_FOUND_AT)
        {
          // Avoid overwriting any previous fixed offset

          if (string->fixed_offset == UNDEFINED)
            string->fixed_offset = at_offset;

          // If a previous fixed offset was different, disable
          // the STRING_GFLAGS_FIXED_OFFSET flag because we only
          // have room to store a single fixed offset value

          if (string->fixed_offset != at_offset)
            string->g_flags &= ~STRING_GFLAGS_FIXED_OFFSET;
        }
        else
        {
          string->g_flags &= ~STRING_GFLAGS_FIXED_OFFSET;
        }

        string = (YR_STRING*) yr_arena_next_address(
            compiler->strings_arena,
            string,
            sizeof(YR_STRING));
      }
    }
    else
    {
      // Anonymous strings not allowed outside of a loop
      compiler->last_result = ERROR_MISPLACED_ANONYMOUS_STRING;
    }
  }
  else
  {
    string = yr_parser_lookup_string(yyscanner, identifier);

    if (string != NULL)
    {
      yr_parser_emit_with_arg_reloc(
          yyscanner,
          OP_PUSH,
          PTR_TO_INT64(string),
          NULL,
          NULL);

      if (instruction != OP_FOUND)
        string->g_flags &= ~STRING_GFLAGS_SINGLE_MATCH;

      if (instruction == OP_FOUND_AT)
      {
        // Avoid overwriting any previous fixed offset

        if (string->fixed_offset == UNDEFINED)
          string->fixed_offset = at_offset;

        // If a previous fixed offset was different, disable
        // the STRING_GFLAGS_FIXED_OFFSET flag because we only
        // have room to store a single fixed offset value

        if (string->fixed_offset == UNDEFINED ||
            string->fixed_offset != at_offset)
        {
          string->g_flags &= ~STRING_GFLAGS_FIXED_OFFSET;
        }
      }
      else
      {
        string->g_flags &= ~STRING_GFLAGS_FIXED_OFFSET;
      }

      yr_parser_emit(yyscanner, instruction, NULL);

      string->g_flags |= STRING_GFLAGS_REFERENCED;
    }
  }

  return compiler->last_result;
}
Beispiel #27
0
YR_RULE* yr_parser_reduce_rule_declaration_phase_1(
    yyscan_t yyscanner,
    int32_t flags,
    const char* identifier)
{
  YR_COMPILER* compiler = yyget_extra(yyscanner);
  YR_RULE* rule = NULL;

  if (yr_hash_table_lookup(
        compiler->rules_table,
        identifier,
        compiler->current_namespace->name) != NULL ||
      yr_hash_table_lookup(
        compiler->objects_table,
        identifier,
        compiler->current_namespace->name) != NULL)
  {
    // A rule or variable with the same identifier already exists, return the
    // appropriate error.

    yr_compiler_set_error_extra_info(compiler, identifier);
    compiler->last_result = ERROR_DUPLICATED_IDENTIFIER;
    return NULL;
  }

  compiler->last_result = yr_arena_allocate_struct(
      compiler->rules_arena,
      sizeof(YR_RULE),
      (void**) &rule,
      offsetof(YR_RULE, identifier),
      offsetof(YR_RULE, tags),
      offsetof(YR_RULE, strings),
      offsetof(YR_RULE, metas),
      offsetof(YR_RULE, ns),
      EOL);

  if (compiler->last_result != ERROR_SUCCESS)
    return NULL;

  rule->g_flags = flags;
  rule->ns = compiler->current_namespace;

  #ifdef PROFILING_ENABLED
  rule->clock_ticks = 0;
  #endif

  compiler->last_result = yr_arena_write_string(
      compiler->sz_arena,
      identifier,
      (char**) &rule->identifier);

  if (compiler->last_result != ERROR_SUCCESS)
    return NULL;

  compiler->last_result = yr_parser_emit_with_arg_reloc(
      yyscanner,
      OP_INIT_RULE,
      PTR_TO_INT64(rule),
      NULL,
      NULL);

  if (compiler->last_result == ERROR_SUCCESS)
    compiler->last_result = yr_hash_table_add(
        compiler->rules_table,
        identifier,
        compiler->current_namespace->name,
        (void*) rule);

  // Clean strings_table as we are starting to parse a new rule.
  yr_hash_table_clean(compiler->strings_table, NULL);

  compiler->current_rule = rule;
  return rule;
}
Beispiel #28
0
int yr_parser_reduce_operation(
    yyscan_t yyscanner,
    const char* op,
    EXPRESSION left_operand,
    EXPRESSION right_operand)
{
  YR_COMPILER* compiler = yyget_extra(yyscanner);

  if ((left_operand.type == EXPRESSION_TYPE_INTEGER ||
       left_operand.type == EXPRESSION_TYPE_FLOAT) &&
      (right_operand.type == EXPRESSION_TYPE_INTEGER ||
       right_operand.type == EXPRESSION_TYPE_FLOAT))
  {
    if (left_operand.type != right_operand.type)
    {
      // One operand is double and the other is integer,
      // cast the integer to double

      compiler->last_result = yr_parser_emit_with_arg(
          yyscanner,
          OP_INT_TO_DBL,
          (left_operand.type == EXPRESSION_TYPE_INTEGER) ? 2 : 1,
          NULL,
          NULL);
    }

    if (compiler->last_result == ERROR_SUCCESS)
    {
      int expression_type = EXPRESSION_TYPE_FLOAT;

      if (left_operand.type == EXPRESSION_TYPE_INTEGER &&
          right_operand.type == EXPRESSION_TYPE_INTEGER)
      {
        expression_type = EXPRESSION_TYPE_INTEGER;
      }

      compiler->last_result = yr_parser_emit(
          yyscanner,
          _yr_parser_operator_to_opcode(op, expression_type),
          NULL);
    }

  }
  else if (left_operand.type == EXPRESSION_TYPE_STRING &&
           right_operand.type == EXPRESSION_TYPE_STRING)
  {
    int opcode = _yr_parser_operator_to_opcode(op, EXPRESSION_TYPE_STRING);

    if (opcode != OP_ERROR)
    {
      compiler->last_result = yr_parser_emit(
          yyscanner,
          opcode,
          NULL);
    }
    else
    {
      yr_compiler_set_error_extra_info_fmt(
          compiler, "strings don't support \"%s\" operation", op);

      compiler->last_result = ERROR_WRONG_TYPE;
    }
  }
  else
  {
    yr_compiler_set_error_extra_info(compiler, "type mismatch");
    compiler->last_result = ERROR_WRONG_TYPE;
  }

  return compiler->last_result;
}
Beispiel #29
0
YR_STRING* yr_parser_reduce_string_declaration(
    yyscan_t yyscanner,
    int32_t string_flags,
    const char* identifier,
    SIZED_STRING* str)
{
  int min_atom_quality;
  int min_atom_quality_aux;
  int re_flags = 0;

  int32_t min_gap;
  int32_t max_gap;

  char message[512];

  YR_COMPILER* compiler = yyget_extra(yyscanner);
  YR_STRING* string = NULL;
  YR_STRING* aux_string;
  YR_STRING* prev_string;

  RE* re = NULL;
  RE* remainder_re;

  RE_ERROR re_error;

  // Determine if a string with the same identifier was already defined
  // by searching for the identifier in string_table.

  string = yr_hash_table_lookup(
      compiler->strings_table,
      identifier,
      NULL);

  if (string != NULL)
  {
    compiler->last_result = ERROR_DUPLICATED_STRING_IDENTIFIER;
    yr_compiler_set_error_extra_info(compiler, identifier);
    goto _exit;
  }

  // Empty strings are now allowed

  if (str->length == 0)
  {
    compiler->last_result = ERROR_EMPTY_STRING;
    yr_compiler_set_error_extra_info(compiler, identifier);
    goto _exit;
  }

  if (str->flags & SIZED_STRING_FLAGS_NO_CASE)
    string_flags |= STRING_GFLAGS_NO_CASE;

  if (str->flags & SIZED_STRING_FLAGS_DOT_ALL)
    re_flags |= RE_FLAGS_DOT_ALL;

  if (strcmp(identifier,"$") == 0)
    string_flags |= STRING_GFLAGS_ANONYMOUS;

  if (!(string_flags & STRING_GFLAGS_WIDE))
    string_flags |= STRING_GFLAGS_ASCII;

  if (string_flags & STRING_GFLAGS_NO_CASE)
    re_flags |= RE_FLAGS_NO_CASE;

  // The STRING_GFLAGS_SINGLE_MATCH flag indicates that finding
  // a single match for the string is enough. This is true in
  // most cases, except when the string count (#) and string offset (@)
  // operators are used. All strings are marked STRING_FLAGS_SINGLE_MATCH
  // initially, and unmarked later if required.

  string_flags |= STRING_GFLAGS_SINGLE_MATCH;

  // The STRING_GFLAGS_FIXED_OFFSET indicates that the string doesn't
  // need to be searched all over the file because the user is using the
  // "at" operator. The string must be searched at a fixed offset in the
  // file. All strings are marked STRING_GFLAGS_FIXED_OFFSET initially,
  // and unmarked later if required.

  string_flags |= STRING_GFLAGS_FIXED_OFFSET;

  if (string_flags & STRING_GFLAGS_HEXADECIMAL ||
      string_flags & STRING_GFLAGS_REGEXP)
  {
    if (string_flags & STRING_GFLAGS_HEXADECIMAL)
      compiler->last_result = yr_re_parse_hex(
          str->c_string, re_flags, &re, &re_error);
    else
      compiler->last_result = yr_re_parse(
          str->c_string, re_flags, &re, &re_error);

    if (compiler->last_result != ERROR_SUCCESS)
    {
      snprintf(
          message,
          sizeof(message),
          "invalid %s \"%s\": %s",
          (string_flags & STRING_GFLAGS_HEXADECIMAL) ?
              "hex string" : "regular expression",
          identifier,
          re_error.message);

      yr_compiler_set_error_extra_info(
          compiler, message);

      goto _exit;
    }

    if (re->flags & RE_FLAGS_FAST_HEX_REGEXP)
      string_flags |= STRING_GFLAGS_FAST_HEX_REGEXP;

    // Regular expressions in the strings section can't mix greedy and ungreedy
    // quantifiers like .* and .*?. That's because these regular expressions can
    // be matched forwards and/or backwards depending on the atom found, and we
    // need the regexp to be all-greedy or all-ungreedy to be able to properly
    // calculate the length of the match.

    if ((re->flags & RE_FLAGS_GREEDY) &&
        (re->flags & RE_FLAGS_UNGREEDY))
    {
      compiler->last_result = ERROR_INVALID_REGULAR_EXPRESSION;

      yr_compiler_set_error_extra_info(compiler,
          "greedy and ungreedy quantifiers can't be mixed in a regular "
          "expression");

      goto _exit;
    }

    if (re->flags & RE_FLAGS_GREEDY)
      string_flags |= STRING_GFLAGS_GREEDY_REGEXP;

    if (yr_re_contains_dot_star(re))
    {
      snprintf(
        message,
        sizeof(message),
        "%s contains .*, consider using .{N} with a reasonable value for N",
        identifier);

        yywarning(yyscanner, message);
    }

    compiler->last_result = yr_re_split_at_chaining_point(
        re, &re, &remainder_re, &min_gap, &max_gap);

    if (compiler->last_result != ERROR_SUCCESS)
      goto _exit;

    compiler->last_result = _yr_parser_write_string(
        identifier,
        string_flags,
        compiler,
        NULL,
        re,
        &string,
        &min_atom_quality);

    if (compiler->last_result != ERROR_SUCCESS)
      goto _exit;

    if (remainder_re != NULL)
    {
      string->g_flags |= STRING_GFLAGS_CHAIN_TAIL | STRING_GFLAGS_CHAIN_PART;
      string->chain_gap_min = min_gap;
      string->chain_gap_max = max_gap;
    }

    // Use "aux_string" from now on, we want to keep the value of "string"
    // because it will returned.

    aux_string = string;

    while (remainder_re != NULL)
    {
      // Destroy regexp pointed by 're' before yr_re_split_at_jmp
      // overwrites 're' with another value.

      yr_re_destroy(re);

      compiler->last_result = yr_re_split_at_chaining_point(
          remainder_re, &re, &remainder_re, &min_gap, &max_gap);

      if (compiler->last_result != ERROR_SUCCESS)
        goto _exit;

      prev_string = aux_string;

      compiler->last_result = _yr_parser_write_string(
          identifier,
          string_flags,
          compiler,
          NULL,
          re,
          &aux_string,
          &min_atom_quality_aux);

      if (compiler->last_result != ERROR_SUCCESS)
        goto _exit;

      if (min_atom_quality_aux < min_atom_quality)
        min_atom_quality = min_atom_quality_aux;

      aux_string->g_flags |= STRING_GFLAGS_CHAIN_PART;
      aux_string->chain_gap_min = min_gap;
      aux_string->chain_gap_max = max_gap;

      prev_string->chained_to = aux_string;

      // prev_string is now chained to aux_string, an string chained
      // to another one can't have a fixed offset, only the head of the
      // string chain can have a fixed offset.

      prev_string->g_flags &= ~STRING_GFLAGS_FIXED_OFFSET;
    }
  }
  else
  {
    compiler->last_result = _yr_parser_write_string(
        identifier,
        string_flags,
        compiler,
        str,
        NULL,
        &string,
        &min_atom_quality);

    if (compiler->last_result != ERROR_SUCCESS)
      goto _exit;
  }

  compiler->last_result = yr_hash_table_add(
      compiler->strings_table,
      identifier,
      NULL,
      string);

  if (compiler->last_result != ERROR_SUCCESS)
    goto _exit;

  if (min_atom_quality < 3 && compiler->callback != NULL)
  {
    snprintf(
        message,
        sizeof(message),
        "%s is slowing down scanning%s",
        string->identifier,
        min_atom_quality < 2 ? " (critical!)" : "");

    yywarning(yyscanner, message);
  }

_exit:

  if (re != NULL)
    yr_re_destroy(re);

  if (compiler->last_result != ERROR_SUCCESS)
    return NULL;

  return string;
}