Example #1
0
int yr_ac_create_automaton(
    YR_ARENA* arena,
    YR_AC_AUTOMATON** automaton)
{
  int result;
  YR_AC_STATE* root_state;

  result = yr_arena_allocate_struct(
      arena,
      sizeof(YR_AC_AUTOMATON),
      (void**) automaton,
      offsetof(YR_AC_AUTOMATON, root),
      EOL);

  if (result != ERROR_SUCCESS)
    return result;

  result = yr_arena_allocate_struct(
      arena,
      sizeof(YR_AC_TABLE_BASED_STATE),
      (void**) &root_state,
      offsetof(YR_AC_TABLE_BASED_STATE, failure),
      offsetof(YR_AC_TABLE_BASED_STATE, matches),
      EOL);

  if (result != ERROR_SUCCESS)
    return result;

  (*automaton)->root = root_state;

  root_state->depth = 0;
  root_state->matches = NULL;

  return result;
}
Example #2
0
File: compiler.c Project: c4nc/yara
int _yr_compiler_set_namespace(
    YR_COMPILER* compiler,
    const char* namespace_)
{
  YR_NAMESPACE* ns;

  char* ns_name;
  int result;
  int i;
  int found;

  ns = (YR_NAMESPACE*) yr_arena_base_address(compiler->namespaces_arena);
  found = FALSE;

  for (i = 0; i < compiler->namespaces_count; i++)
  {
    if (strcmp(ns->name, namespace_) == 0)
    {
      found = TRUE;
      break;
    }

    ns = (YR_NAMESPACE*) yr_arena_next_address(
        compiler->namespaces_arena,
        ns,
        sizeof(YR_NAMESPACE));
  }

  if (!found)
  {
    result = yr_arena_write_string(
        compiler->sz_arena,
        namespace_,
        &ns_name);

    if (result == ERROR_SUCCESS)
      result = yr_arena_allocate_struct(
          compiler->namespaces_arena,
          sizeof(YR_NAMESPACE),
          (void**) &ns,
          offsetof(YR_NAMESPACE, name),
          EOL);

    if (result != ERROR_SUCCESS)
      return result;

    ns->name = ns_name;

    for (i = 0; i < MAX_THREADS; i++)
      ns->t_flags[i] = 0;

    compiler->namespaces_count++;
  }

  compiler->current_namespace = ns;
  return ERROR_SUCCESS;
}
Example #3
0
int yr_compiler_define_string_variable(
    YR_COMPILER* compiler,
    const char* identifier,
    const char* value)
{
  YR_OBJECT* object;
  YR_EXTERNAL_VARIABLE* external;

  char* id = NULL;
  char* val = NULL;

  compiler->last_result = ERROR_SUCCESS;

  FAIL_ON_COMPILER_ERROR(yr_arena_write_string(
      compiler->sz_arena,
      identifier,
      &id));

  FAIL_ON_COMPILER_ERROR(yr_arena_write_string(
      compiler->sz_arena,
      value,
      &val));

  FAIL_ON_COMPILER_ERROR(yr_arena_allocate_struct(
      compiler->externals_arena,
      sizeof(YR_EXTERNAL_VARIABLE),
      (void**) &external,
      offsetof(YR_EXTERNAL_VARIABLE, identifier),
      offsetof(YR_EXTERNAL_VARIABLE, string),
      EOL));

  external->type = EXTERNAL_VARIABLE_TYPE_STRING;
  external->identifier = id;
  external->integer = 0;
  external->string = val;

  FAIL_ON_COMPILER_ERROR(yr_object_from_external_variable(
      external,
      &object));

  FAIL_ON_COMPILER_ERROR(yr_hash_table_add(
      compiler->objects_table,
      external->identifier,
      NULL,
      (void*) object));

  return compiler->last_result;
}
Example #4
0
YR_META* yr_parser_reduce_meta_declaration(
    yyscan_t yyscanner,
    int32_t type,
    const char* identifier,
    const char* string,
    int32_t integer)
{
  YR_COMPILER* compiler = yyget_extra(yyscanner);
  YR_META* meta;

  compiler->last_result = yr_arena_allocate_struct(
      compiler->metas_arena,
      sizeof(YR_META),
      (void**) &meta,
      offsetof(YR_META, identifier),
      offsetof(YR_META, string),
      EOL);

  if (compiler->last_result != ERROR_SUCCESS)
    return NULL;

  compiler->last_result = yr_arena_write_string(
      compiler->sz_arena,
      identifier,
      &meta->identifier);

  if (compiler->last_result != ERROR_SUCCESS)
    return NULL;

  if (string != NULL)
    compiler->last_result = yr_arena_write_string(
        compiler->sz_arena,
        string,
        &meta->string);
  else
    meta->string = NULL;

  if (compiler->last_result != ERROR_SUCCESS)
    return NULL;

  meta->integer = integer;
  meta->type = type;

  return meta;
}
Example #5
0
File: compiler.c Project: c4nc/yara
YR_API int yr_compiler_define_float_variable(
    YR_COMPILER* compiler,
    const char* identifier,
    double value)
{
  YR_EXTERNAL_VARIABLE* external;
  YR_OBJECT* object;

  char* id;

  compiler->last_result = ERROR_SUCCESS;

  FAIL_ON_COMPILER_ERROR(yr_arena_write_string(
      compiler->sz_arena,
      identifier,
      &id));

  FAIL_ON_COMPILER_ERROR(yr_arena_allocate_struct(
      compiler->externals_arena,
      sizeof(YR_EXTERNAL_VARIABLE),
      (void**) &external,
      offsetof(YR_EXTERNAL_VARIABLE, identifier),
      EOL));

  external->type = EXTERNAL_VARIABLE_TYPE_FLOAT;
  external->identifier = id;
  external->value.f = value;

  FAIL_ON_COMPILER_ERROR(yr_object_from_external_variable(
      external,
      &object));

  FAIL_ON_COMPILER_ERROR(yr_hash_table_add(
      compiler->objects_table,
      external->identifier,
      NULL,
      (void*) object));

  return ERROR_SUCCESS;
}
Example #6
0
int yr_parser_reduce_meta_declaration(
    yyscan_t yyscanner,
    int32_t type,
    const char* identifier,
    const char* string,
    int64_t integer,
    YR_META** meta)
{
  YR_COMPILER* compiler = yyget_extra(yyscanner);

  FAIL_ON_ERROR(yr_arena_allocate_struct(
      compiler->metas_arena,
      sizeof(YR_META),
      (void**) meta,
      offsetof(YR_META, identifier),
      offsetof(YR_META, string),
      EOL));

  FAIL_ON_ERROR(yr_arena_write_string(
      compiler->sz_arena,
      identifier,
      (char**) &(*meta)->identifier));

  if (string != NULL)
  {
    FAIL_ON_ERROR(yr_arena_write_string(
        compiler->sz_arena,
        string,
        &(*meta)->string));
  }
  else
  {
    (*meta)->string = NULL;
  }

  (*meta)->integer = integer;
  (*meta)->type = type;

  return ERROR_SUCCESS;
}
Example #7
0
File: compiler.c Project: c4nc/yara
int _yr_compiler_compile_rules(
  YR_COMPILER* compiler)
{
  YARA_RULES_FILE_HEADER* rules_file_header = NULL;
  YR_ARENA* arena = NULL;
  YR_RULE null_rule;
  YR_EXTERNAL_VARIABLE null_external;
  YR_AC_TABLES tables;

  int8_t halt = OP_HALT;
  int result;

  // Write halt instruction at the end of code.
  yr_arena_write_data(
      compiler->code_arena,
      &halt,
      sizeof(int8_t),
      NULL);

  // Write a null rule indicating the end.
  memset(&null_rule, 0xFA, sizeof(YR_RULE));
  null_rule.g_flags = RULE_GFLAGS_NULL;

  yr_arena_write_data(
      compiler->rules_arena,
      &null_rule,
      sizeof(YR_RULE),
      NULL);

  // Write a null external the end.
  memset(&null_external, 0xFA, sizeof(YR_EXTERNAL_VARIABLE));
  null_external.type = EXTERNAL_VARIABLE_TYPE_NULL;

  yr_arena_write_data(
      compiler->externals_arena,
      &null_external,
      sizeof(YR_EXTERNAL_VARIABLE),
      NULL);

  // Write Aho-Corasick automaton to arena.
  result = yr_ac_compile(
      compiler->automaton,
      compiler->automaton_arena,
      &tables);

  if (result == ERROR_SUCCESS)
    result = yr_arena_create(1024, 0, &arena);

  if (result == ERROR_SUCCESS)
    result = yr_arena_allocate_struct(
        arena,
        sizeof(YARA_RULES_FILE_HEADER),
        (void**) &rules_file_header,
        offsetof(YARA_RULES_FILE_HEADER, rules_list_head),
        offsetof(YARA_RULES_FILE_HEADER, externals_list_head),
        offsetof(YARA_RULES_FILE_HEADER, code_start),
        offsetof(YARA_RULES_FILE_HEADER, match_table),
        offsetof(YARA_RULES_FILE_HEADER, transition_table),
        EOL);

  if (result == ERROR_SUCCESS)
  {
    rules_file_header->rules_list_head = (YR_RULE*) yr_arena_base_address(
        compiler->rules_arena);

    rules_file_header->externals_list_head = (YR_EXTERNAL_VARIABLE*)
		yr_arena_base_address(compiler->externals_arena);

    rules_file_header->code_start = (uint8_t*) yr_arena_base_address(
        compiler->code_arena);

    rules_file_header->match_table = tables.matches;
    rules_file_header->transition_table = tables.transitions;
  }

  if (result == ERROR_SUCCESS)
  {
    result = yr_arena_append(
        arena,
        compiler->code_arena);
  }

  if (result == ERROR_SUCCESS)
  {
    compiler->code_arena = NULL;
    result = yr_arena_append(
        arena,
        compiler->re_code_arena);
  }

  if (result == ERROR_SUCCESS)
  {
    compiler->re_code_arena = NULL;
    result = yr_arena_append(
        arena,
        compiler->rules_arena);
  }

  if (result == ERROR_SUCCESS)
  {
    compiler->rules_arena = NULL;
    result = yr_arena_append(
        arena,
        compiler->strings_arena);
  }

  if (result == ERROR_SUCCESS)
  {
    compiler->strings_arena = NULL;
    result = yr_arena_append(
        arena,
        compiler->externals_arena);
  }

  if (result == ERROR_SUCCESS)
  {
    compiler->externals_arena = NULL;
    result = yr_arena_append(
        arena,
        compiler->namespaces_arena);
  }

  if (result == ERROR_SUCCESS)
  {
    compiler->namespaces_arena = NULL;
    result = yr_arena_append(
        arena,
        compiler->metas_arena);
  }

  if (result == ERROR_SUCCESS)
  {
    compiler->metas_arena = NULL;
    result = yr_arena_append(
        arena,
        compiler->sz_arena);
  }

  if (result == ERROR_SUCCESS)
  {
    compiler->sz_arena = NULL;
    result = yr_arena_append(
        arena,
        compiler->automaton_arena);
  }

  if (result == ERROR_SUCCESS)
  {
    compiler->automaton_arena = NULL;
    result = yr_arena_append(
        arena,
        compiler->matches_arena);
  }

  if (result == ERROR_SUCCESS)
  {
    compiler->matches_arena = NULL;
    compiler->compiled_rules_arena = arena;
    result = yr_arena_coalesce(arena);
  }
  else
  {
    yr_arena_destroy(arena);
  }

  return result;
}
Example #8
0
int yr_ac_add_string(
    YR_ARENA* arena,
    YR_AC_AUTOMATON* automaton,
    YR_STRING* string,
    YR_ATOM_LIST_ITEM* atom)
{
  int result = ERROR_SUCCESS;
  int i;

  YR_AC_STATE* state;
  YR_AC_STATE* next_state;
  YR_AC_MATCH* new_match;

  // For each atom create the states in the automaton.

  while (atom != NULL)
  {
    state = automaton->root;

    for(i = 0; i < atom->atom_length; i++)
    {
      next_state = yr_ac_next_state(
          state, atom->atom[i]);

      if (next_state == NULL)
      {
        next_state = _yr_ac_create_state(
            arena,
            state,
            atom->atom[i]);

        if (next_state == NULL)
          return ERROR_INSUFICIENT_MEMORY;
      }

      state = next_state;
    }

    result = yr_arena_allocate_struct(
        arena,
        sizeof(YR_AC_MATCH),
        (void**) &new_match,
        offsetof(YR_AC_MATCH, string),
        offsetof(YR_AC_MATCH, forward_code),
        offsetof(YR_AC_MATCH, backward_code),
        offsetof(YR_AC_MATCH, next),
        EOL);

    if (result == ERROR_SUCCESS)
    {
      new_match->backtrack = state->depth + atom->backtrack;
      new_match->string = string;
      new_match->forward_code = atom->forward_code;
      new_match->backward_code = atom->backward_code;
      new_match->next = state->matches;
      state->matches = new_match;
    }
    else
    {
      break;
    }

    atom = atom->next;
  }

  return result;
}
Example #9
0
YR_AC_STATE* _yr_ac_create_state(
    YR_ARENA* arena,
    YR_AC_STATE* state,
    uint8_t input)
{
  int result;
  YR_AC_STATE* new_state;
  YR_AC_LIST_BASED_STATE* list_based_state;
  YR_AC_TABLE_BASED_STATE* table_based_state;
  YR_AC_STATE_TRANSITION* new_transition;

  if (state->depth < MAX_TABLE_BASED_STATES_DEPTH)
  {
    result = yr_arena_allocate_struct(
        arena,
        sizeof(YR_AC_TABLE_BASED_STATE),
        (void**) &new_state,
        offsetof(YR_AC_TABLE_BASED_STATE, failure),
        offsetof(YR_AC_TABLE_BASED_STATE, matches),
        EOL);
  }
  else
  {
    result = yr_arena_allocate_struct(
        arena,
        sizeof(YR_AC_LIST_BASED_STATE),
        (void**) &new_state,
        offsetof(YR_AC_LIST_BASED_STATE, failure),
        offsetof(YR_AC_LIST_BASED_STATE, matches),
        offsetof(YR_AC_LIST_BASED_STATE, transitions),
        EOL);
  }

  if (result != ERROR_SUCCESS)
    return NULL;

  if (state->depth <= MAX_TABLE_BASED_STATES_DEPTH)
  {
    result = yr_arena_make_relocatable(
        arena,
        state,
        offsetof(YR_AC_TABLE_BASED_STATE, transitions[input]),
        EOL);

    if (result != ERROR_SUCCESS)
      return NULL;

    table_based_state = (YR_AC_TABLE_BASED_STATE*) state;
    table_based_state->transitions[input].state = new_state;
  }
  else
  {
    result = yr_arena_allocate_struct(
        arena,
        sizeof(YR_AC_STATE_TRANSITION),
        (void**) &new_transition,
        offsetof(YR_AC_STATE_TRANSITION, state),
        offsetof(YR_AC_STATE_TRANSITION, next),
        EOL);

    if (result != ERROR_SUCCESS)
      return NULL;

    list_based_state = (YR_AC_LIST_BASED_STATE*) state;

    new_transition->input = input;
    new_transition->state = new_state;
    new_transition->next = list_based_state->transitions;
    list_based_state->transitions = new_transition;
  }

  new_state->depth = state->depth + 1;

  return new_state;
}
Example #10
0
int yr_parser_reduce_rule_declaration(
    yyscan_t yyscanner,
    int32_t flags,
    const char* identifier,
    char* tags,
    YR_STRING* strings,
    YR_META* metas)
{
  YR_COMPILER* compiler = yyget_extra(yyscanner);
  YR_RULE* rule;
  YR_STRING* string;

  if (yr_hash_table_lookup(
        compiler->rules_table,
        identifier,
        compiler->current_namespace->name) != NULL)
  {
    // A rule with the same identifier already exists, return the
    // appropriate error.

    yr_compiler_set_error_extra_info(compiler, identifier);
    compiler->last_result = ERROR_DUPLICATE_RULE_IDENTIFIER;
    return compiler->last_result;
  }

  // Check for unreferenced (unused) strings.

  string = compiler->current_rule_strings;

  while(!STRING_IS_NULL(string))
  {
    // Only the heading fragment in a chain of strings (the one with
    // chained_to == NULL) must be referenced. All other fragments
    // are never marked as referenced.

    if (!STRING_IS_REFERENCED(string) &&
        string->chained_to == NULL)
    {
      yr_compiler_set_error_extra_info(compiler, string->identifier);
      compiler->last_result = ERROR_UNREFERENCED_STRING;
      break;
    }

    string = yr_arena_next_address(
        compiler->strings_arena,
        string,
        sizeof(YR_STRING));
  }

  if (compiler->last_result != ERROR_SUCCESS)
    return compiler->last_result;

  compiler->last_result = yr_arena_allocate_struct(
      compiler->rules_arena,
      sizeof(YR_RULE),
      (void**) &rule,
      offsetof(YR_RULE, identifier),
      offsetof(YR_RULE, tags),
      offsetof(YR_RULE, strings),
      offsetof(YR_RULE, metas),
      offsetof(YR_RULE, ns),
      EOL);

  if (compiler->last_result != ERROR_SUCCESS)
    return compiler->last_result;

  compiler->last_result = yr_arena_write_string(
      compiler->sz_arena,
      identifier,
      &rule->identifier);

  if (compiler->last_result != ERROR_SUCCESS)
    return compiler->last_result;

  compiler->last_result = yr_parser_emit_with_arg_reloc(
      yyscanner,
      RULE_POP,
      PTR_TO_UINT64(rule),
      NULL);

  if (compiler->last_result != ERROR_SUCCESS)
    return compiler->last_result;

  rule->g_flags = flags | compiler->current_rule_flags;
  rule->tags = tags;
  rule->strings = strings;
  rule->metas = metas;
  rule->ns = compiler->current_namespace;

  compiler->current_rule_flags = 0;
  compiler->current_rule_strings = NULL;

  yr_hash_table_add(
      compiler->rules_table,
      identifier,
      compiler->current_namespace->name,
      (void*) rule);

  return compiler->last_result;
}
Example #11
0
int _yr_parser_write_string(
    const char* identifier,
    int flags,
    YR_COMPILER* compiler,
    SIZED_STRING* str,
    RE* re,
    YR_STRING** string,
    int* min_atom_length)
{
  SIZED_STRING* literal_string;
  YR_AC_MATCH* new_match;

  YR_ATOM_LIST_ITEM* atom;
  YR_ATOM_LIST_ITEM* atom_list = NULL;

  int result;
  int max_string_len;
  int free_literal = FALSE;

  *string = NULL;

  result = yr_arena_allocate_struct(
      compiler->strings_arena,
      sizeof(YR_STRING),
      (void**) string,
      offsetof(YR_STRING, identifier),
      offsetof(YR_STRING, string),
      offsetof(YR_STRING, chained_to),
      EOL);

  if (result != ERROR_SUCCESS)
    return result;

  result = yr_arena_write_string(
      compiler->sz_arena,
      identifier,
      &(*string)->identifier);

  if (result != ERROR_SUCCESS)
    return result;

  if (flags & STRING_GFLAGS_HEXADECIMAL ||
      flags & STRING_GFLAGS_REGEXP)
  {
    literal_string = yr_re_extract_literal(re);

    if (literal_string != NULL)
    {
      flags |= STRING_GFLAGS_LITERAL;
      free_literal = TRUE;
    }
  }
  else
  {
    literal_string = str;
    flags |= STRING_GFLAGS_LITERAL;
  }

  (*string)->g_flags = flags;
  (*string)->chained_to = NULL;

  memset((*string)->matches, 0,
         sizeof((*string)->matches));

  memset((*string)->unconfirmed_matches, 0,
         sizeof((*string)->unconfirmed_matches));

  if (flags & STRING_GFLAGS_LITERAL)
  {
    (*string)->length = literal_string->length;

    result = yr_arena_write_data(
        compiler->sz_arena,
        literal_string->c_string,
        literal_string->length,
        (void*) &(*string)->string);

    if (result == ERROR_SUCCESS)
    {
      result = yr_atoms_extract_from_string(
          (uint8_t*) literal_string->c_string,
          literal_string->length,
          flags,
          &atom_list);
    }
  }
  else
  {
    result = yr_re_emit_code(re, compiler->re_code_arena);

    if (result == ERROR_SUCCESS)
      result = yr_atoms_extract_from_re(re, flags, &atom_list);
  }

  if (result == ERROR_SUCCESS)
  {
    // Add the string to Aho-Corasick automaton.

    if (atom_list != NULL)
    {
      result = yr_ac_add_string(
          compiler->automaton_arena,
          compiler->automaton,
          *string,
          atom_list);
    }
    else
    {
      result = yr_arena_allocate_struct(
          compiler->automaton_arena,
          sizeof(YR_AC_MATCH),
          (void**) &new_match,
          offsetof(YR_AC_MATCH, string),
          offsetof(YR_AC_MATCH, forward_code),
          offsetof(YR_AC_MATCH, backward_code),
          offsetof(YR_AC_MATCH, next),
          EOL);

      if (result == ERROR_SUCCESS)
      {
        new_match->backtrack = 0;
        new_match->string = *string;
        new_match->forward_code = re->root_node->forward_code;
        new_match->backward_code = NULL;
        new_match->next = compiler->automaton->root->matches;
        compiler->automaton->root->matches = new_match;
      }
    }
  }

  atom = atom_list;

  if (atom != NULL)
    *min_atom_length = MAX_ATOM_LENGTH;
  else
    *min_atom_length = 0;

  while (atom != NULL)
  {
    if (atom->atom_length < *min_atom_length)
      *min_atom_length = atom->atom_length;
    atom = atom->next;
  }

  if (flags & STRING_GFLAGS_LITERAL)
  {
    if (flags & STRING_GFLAGS_WIDE)
      max_string_len = (*string)->length * 2;
    else
      max_string_len = (*string)->length;

    if (max_string_len == *min_atom_length)
      (*string)->g_flags |= STRING_GFLAGS_FITS_IN_ATOM;
  }

  if (free_literal)
    yr_free(literal_string);

  if (atom_list != NULL)
    yr_atoms_list_destroy(atom_list);

  return result;
}
Example #12
0
YR_RULE* yr_parser_reduce_rule_declaration_phase_1(
    yyscan_t yyscanner,
    int32_t flags,
    const char* identifier)
{
  YR_COMPILER* compiler = yyget_extra(yyscanner);
  YR_RULE* rule = NULL;

  if (yr_hash_table_lookup(
        compiler->rules_table,
        identifier,
        compiler->current_namespace->name) != NULL ||
      yr_hash_table_lookup(
        compiler->objects_table,
        identifier,
        compiler->current_namespace->name) != NULL)
  {
    // A rule or variable with the same identifier already exists, return the
    // appropriate error.

    yr_compiler_set_error_extra_info(compiler, identifier);
    compiler->last_result = ERROR_DUPLICATED_IDENTIFIER;
    return NULL;
  }

  compiler->last_result = yr_arena_allocate_struct(
      compiler->rules_arena,
      sizeof(YR_RULE),
      (void**) &rule,
      offsetof(YR_RULE, identifier),
      offsetof(YR_RULE, tags),
      offsetof(YR_RULE, strings),
      offsetof(YR_RULE, metas),
      offsetof(YR_RULE, ns),
      EOL);

  if (compiler->last_result != ERROR_SUCCESS)
    return NULL;

  rule->g_flags = flags;
  rule->ns = compiler->current_namespace;

  #ifdef PROFILING_ENABLED
  rule->clock_ticks = 0;
  #endif

  compiler->last_result = yr_arena_write_string(
      compiler->sz_arena,
      identifier,
      (char**) &rule->identifier);

  if (compiler->last_result != ERROR_SUCCESS)
    return NULL;

  compiler->last_result = yr_parser_emit_with_arg_reloc(
      yyscanner,
      OP_INIT_RULE,
      PTR_TO_INT64(rule),
      NULL,
      NULL);

  if (compiler->last_result == ERROR_SUCCESS)
    compiler->last_result = yr_hash_table_add(
        compiler->rules_table,
        identifier,
        compiler->current_namespace->name,
        (void*) rule);

  // Clean strings_table as we are starting to parse a new rule.
  yr_hash_table_clean(compiler->strings_table, NULL);

  compiler->current_rule = rule;
  return rule;
}
Example #13
0
int _yr_parser_write_string(
    const char* identifier,
    int flags,
    YR_COMPILER* compiler,
    SIZED_STRING* str,
    RE* re,
    YR_STRING** string,
    int* min_atom_quality)
{
  SIZED_STRING* literal_string;
  YR_AC_MATCH* new_match;
  YR_ATOM_LIST_ITEM* atom_list = NULL;

  int result;
  int max_string_len;
  int free_literal = FALSE;

  *string = NULL;

  result = yr_arena_allocate_struct(
      compiler->strings_arena,
      sizeof(YR_STRING),
      (void**) string,
      offsetof(YR_STRING, identifier),
      offsetof(YR_STRING, string),
      offsetof(YR_STRING, chained_to),
      EOL);

  if (result != ERROR_SUCCESS)
    return result;

  result = yr_arena_write_string(
      compiler->sz_arena,
      identifier,
      &(*string)->identifier);

  if (result != ERROR_SUCCESS)
    return result;

  if (flags & STRING_GFLAGS_HEXADECIMAL ||
      flags & STRING_GFLAGS_REGEXP)
  {
    literal_string = yr_re_extract_literal(re);

    if (literal_string != NULL)
    {
      flags |= STRING_GFLAGS_LITERAL;
      free_literal = TRUE;
    }
    else
    {
      // Non-literal strings can't be marked as fixed offset because once we
      // find a string atom in the scanned data we don't know the offset where
      // the string should start, as the non-literal strings can contain
      // variable-length portions.

      flags &= ~STRING_GFLAGS_FIXED_OFFSET;
    }
  }
  else
  {
    literal_string = str;
    flags |= STRING_GFLAGS_LITERAL;
  }

  (*string)->g_flags = flags;
  (*string)->chained_to = NULL;
  (*string)->fixed_offset = UNDEFINED;

  #ifdef PROFILING_ENABLED
  (*string)->clock_ticks = 0;
  #endif

  memset((*string)->matches, 0,
         sizeof((*string)->matches));

  memset((*string)->unconfirmed_matches, 0,
         sizeof((*string)->unconfirmed_matches));

  if (flags & STRING_GFLAGS_LITERAL)
  {
    (*string)->length = (uint32_t) literal_string->length;

    result = yr_arena_write_data(
        compiler->sz_arena,
        literal_string->c_string,
        literal_string->length + 1,   // +1 to include terminating NULL
        (void**) &(*string)->string);

    if (result == ERROR_SUCCESS)
    {
      result = yr_atoms_extract_from_string(
          (uint8_t*) literal_string->c_string,
          (int32_t) literal_string->length,
          flags,
          &atom_list);
    }
  }
  else
  {
    result = yr_re_emit_code(re, compiler->re_code_arena);

    if (result == ERROR_SUCCESS)
      result = yr_atoms_extract_from_re(re, flags, &atom_list);
  }

  if (result == ERROR_SUCCESS)
  {
    // Add the string to Aho-Corasick automaton.

    if (atom_list != NULL)
    {
      result = yr_ac_add_string(
          compiler->automaton_arena,
          compiler->automaton,
          *string,
          atom_list);
    }
    else
    {
      result = yr_arena_allocate_struct(
          compiler->automaton_arena,
          sizeof(YR_AC_MATCH),
          (void**) &new_match,
          offsetof(YR_AC_MATCH, string),
          offsetof(YR_AC_MATCH, forward_code),
          offsetof(YR_AC_MATCH, backward_code),
          offsetof(YR_AC_MATCH, next),
          EOL);

      if (result == ERROR_SUCCESS)
      {
        new_match->backtrack = 0;
        new_match->string = *string;
        new_match->forward_code = re->root_node->forward_code;
        new_match->backward_code = NULL;
        new_match->next = compiler->automaton->root->matches;
        compiler->automaton->root->matches = new_match;
      }
    }
  }

  *min_atom_quality = yr_atoms_min_quality(atom_list);

  if (flags & STRING_GFLAGS_LITERAL)
  {
    if (flags & STRING_GFLAGS_WIDE)
      max_string_len = (*string)->length * 2;
    else
      max_string_len = (*string)->length;

    if (max_string_len <= MAX_ATOM_LENGTH)
      (*string)->g_flags |= STRING_GFLAGS_FITS_IN_ATOM;
  }

  if (free_literal)
    yr_free(literal_string);

  if (atom_list != NULL)
    yr_atoms_list_destroy(atom_list);

  return result;
}
Example #14
0
int yr_parser_reduce_rule_declaration_phase_1(
    yyscan_t yyscanner,
    int32_t flags,
    const char* identifier,
    YR_RULE** rule)
{
  YR_FIXUP *fixup;
  YR_INIT_RULE_ARGS *init_rule_args;
  YR_COMPILER* compiler = yyget_extra(yyscanner);

  *rule = NULL;

  if (yr_hash_table_lookup(
        compiler->rules_table,
        identifier,
        compiler->current_namespace->name) != NULL ||
      yr_hash_table_lookup(
        compiler->objects_table,
        identifier,
        NULL) != NULL)
  {
    // A rule or variable with the same identifier already exists, return the
    // appropriate error.

    yr_compiler_set_error_extra_info(compiler, identifier);
    return ERROR_DUPLICATED_IDENTIFIER;
  }

  FAIL_ON_ERROR(yr_arena_allocate_struct(
      compiler->rules_arena,
      sizeof(YR_RULE),
      (void**) rule,
      offsetof(YR_RULE, identifier),
      offsetof(YR_RULE, tags),
      offsetof(YR_RULE, strings),
      offsetof(YR_RULE, metas),
      offsetof(YR_RULE, ns),
      EOL))

  (*rule)->g_flags = flags;
  (*rule)->ns = compiler->current_namespace;
  (*rule)->num_atoms = 0;

  #ifdef PROFILING_ENABLED
  (*rule)->time_cost = 0;

  memset(
      (*rule)->time_cost_per_thread, 0, sizeof((*rule)->time_cost_per_thread));
  #endif

  FAIL_ON_ERROR(yr_arena_write_string(
      compiler->sz_arena,
      identifier,
      (char**) &(*rule)->identifier));

  FAIL_ON_ERROR(yr_parser_emit(
      yyscanner,
      OP_INIT_RULE,
      NULL));

  FAIL_ON_ERROR(yr_arena_allocate_struct(
      compiler->code_arena,
      sizeof(YR_INIT_RULE_ARGS),
      (void**) &init_rule_args,
      offsetof(YR_INIT_RULE_ARGS, rule),
      offsetof(YR_INIT_RULE_ARGS, jmp_addr),
      EOL));

  init_rule_args->rule = *rule;

  // jmp_addr holds the address to jump to when we want to skip the code for
  // the rule. It is iniatialized as NULL at this point because we don't know
  // the address until emmiting the code for the rule's condition. The address
  // is set in yr_parser_reduce_rule_declaration_phase_2.
  init_rule_args->jmp_addr = NULL;

  // Create a fixup entry for the jump and push it in the stack
  fixup = (YR_FIXUP*) yr_malloc(sizeof(YR_FIXUP));

  if (fixup == NULL)
    return ERROR_INSUFFICIENT_MEMORY;

  fixup->address = (void*) &(init_rule_args->jmp_addr);
  fixup->next = compiler->fixup_stack_head;
  compiler->fixup_stack_head = fixup;

  // Clean strings_table as we are starting to parse a new rule.
  yr_hash_table_clean(compiler->strings_table, NULL);

  FAIL_ON_ERROR(yr_hash_table_add(
      compiler->rules_table,
      identifier,
      compiler->current_namespace->name,
      (void*) *rule));

  compiler->current_rule = *rule;

  return ERROR_SUCCESS;
}
Example #15
0
static int _yr_parser_write_string(
    const char* identifier,
    int flags,
    YR_COMPILER* compiler,
    SIZED_STRING* str,
    RE_AST* re_ast,
    YR_STRING** string,
    int* min_atom_quality,
    int* num_atom)
{
  SIZED_STRING* literal_string;
  YR_ATOM_LIST_ITEM* atom;
  YR_ATOM_LIST_ITEM* atom_list = NULL;

  int c, result;
  int max_string_len;
  bool free_literal = false;

  *string = NULL;

  result = yr_arena_allocate_struct(
      compiler->strings_arena,
      sizeof(YR_STRING),
      (void**) string,
      offsetof(YR_STRING, identifier),
      offsetof(YR_STRING, string),
      offsetof(YR_STRING, chained_to),
      offsetof(YR_STRING, rule),
      EOL);

  if (result != ERROR_SUCCESS)
    return result;

  result = yr_arena_write_string(
      compiler->sz_arena,
      identifier,
      &(*string)->identifier);

  if (result != ERROR_SUCCESS)
    return result;

  if (flags & STRING_GFLAGS_HEXADECIMAL ||
      flags & STRING_GFLAGS_REGEXP)
  {
    literal_string = yr_re_ast_extract_literal(re_ast);

    if (literal_string != NULL)
    {
      flags |= STRING_GFLAGS_LITERAL;
      free_literal = true;
    }
    else
    {
      // Non-literal strings can't be marked as fixed offset because once we
      // find a string atom in the scanned data we don't know the offset where
      // the string should start, as the non-literal strings can contain
      // variable-length portions.

      flags &= ~STRING_GFLAGS_FIXED_OFFSET;
    }
  }
  else
  {
    literal_string = str;
    flags |= STRING_GFLAGS_LITERAL;
  }

  (*string)->g_flags = flags;
  (*string)->chained_to = NULL;
  (*string)->fixed_offset = UNDEFINED;
  (*string)->rule = compiler->current_rule;

  memset((*string)->matches, 0,
         sizeof((*string)->matches));

  memset((*string)->unconfirmed_matches, 0,
         sizeof((*string)->unconfirmed_matches));

  if (flags & STRING_GFLAGS_LITERAL)
  {
    (*string)->length = (uint32_t) literal_string->length;

    result = yr_arena_write_data(
        compiler->sz_arena,
        literal_string->c_string,
        literal_string->length + 1,   // +1 to include terminating NULL
        (void**) &(*string)->string);

    if (result == ERROR_SUCCESS)
    {
      result = yr_atoms_extract_from_string(
          &compiler->atoms_config,
          (uint8_t*) literal_string->c_string,
          (int32_t) literal_string->length,
          flags,
          &atom_list,
          min_atom_quality);
    }
  }
  else
  {
    // Emit forwards code
    result = yr_re_ast_emit_code(re_ast, compiler->re_code_arena, false);

    // Emit backwards code
    if (result == ERROR_SUCCESS)
      result = yr_re_ast_emit_code(re_ast, compiler->re_code_arena, true);

    if (result == ERROR_SUCCESS)
      result = yr_atoms_extract_from_re(
          &compiler->atoms_config,
          re_ast,
          flags,
          &atom_list,
          min_atom_quality);
  }

  if (result == ERROR_SUCCESS)
  {
    // Add the string to Aho-Corasick automaton.
    result = yr_ac_add_string(
        compiler->automaton,
        *string,
        atom_list,
        compiler->matches_arena);
  }

  if (flags & STRING_GFLAGS_LITERAL)
  {
    if (flags & STRING_GFLAGS_WIDE)
      max_string_len = (*string)->length * 2;
    else
      max_string_len = (*string)->length;

    if (max_string_len <= YR_MAX_ATOM_LENGTH)
      (*string)->g_flags |= STRING_GFLAGS_FITS_IN_ATOM;
  }

  atom = atom_list;
  c = 0;

  while (atom != NULL)
  {
    atom = atom->next;
    c++;
  }

  (*num_atom) += c;

  if (free_literal)
    yr_free(literal_string);

  if (atom_list != NULL)
    yr_atoms_list_destroy(atom_list);

  return result;
}
Example #16
0
YR_STRING* yr_parser_reduce_string_declaration(
    yyscan_t yyscanner,
    int32_t flags,
    const char* identifier,
    SIZED_STRING* str)
{
  int i;
  int error_offset;
  int min_atom_length;
  char* file_name;
  char message[512];

  YR_STRING* string;
  YR_AC_MATCH* new_match;
  ATOM_TREE* atom_tree;
  YR_ATOM_LIST_ITEM* atom;
  YR_ATOM_LIST_ITEM* atom_list = NULL;
  RE* re = NULL;

  uint8_t* literal_string;

  int literal_string_len;
  int max_string_len;

  YR_COMPILER* compiler = yyget_extra(yyscanner);

  compiler->last_result = yr_arena_allocate_struct(
      compiler->strings_arena,
      sizeof(YR_STRING),
      (void**) &string,
      offsetof(YR_STRING, identifier),
      offsetof(YR_STRING, string),
      EOL);

  if (compiler->last_result != ERROR_SUCCESS)
    return NULL;

  compiler->last_result = yr_arena_write_string(
      compiler->sz_arena,
      identifier,
      &string->identifier);

  if (compiler->last_result != ERROR_SUCCESS)
    return NULL;

  if (strcmp(identifier,"$") == 0)
    flags |= STRING_GFLAGS_ANONYMOUS;

  if (!(flags & STRING_GFLAGS_WIDE))
    flags |= STRING_GFLAGS_ASCII;

  // The STRING_GFLAGS_SINGLE_MATCH flag indicates that finding
  // a single match for the string is enough. This is true in
  // most cases, except when the string count (#) and string offset (@)
  // operators are used. All strings are marked STRING_FLAGS_SINGLE_MATCH
  // initially, and unmarked later if required.

  flags |= STRING_GFLAGS_SINGLE_MATCH;

  string->g_flags = flags;

  memset(string->matches, 0, sizeof(string->matches));

  if (flags & STRING_GFLAGS_HEXADECIMAL ||
      flags & STRING_GFLAGS_REGEXP)
  {
    if (flags & STRING_GFLAGS_HEXADECIMAL)
      compiler->last_result = yr_re_compile_hex(
          str->c_string, &re);
    else
      compiler->last_result = yr_re_compile(
          str->c_string, &re);

    if (compiler->last_result != ERROR_SUCCESS)
    {
      snprintf(
          message,
          sizeof(message),
          "invalid %s in string \"%s\": %s",
          (flags & STRING_GFLAGS_HEXADECIMAL) ?
              "hex string" : "regular expression",
          identifier,
          re->error_message);

      yr_compiler_set_error_extra_info(compiler, message);
      string = NULL;
      goto _exit;
    }

    if (re->flags & RE_FLAGS_START_ANCHORED)
      string->g_flags |= STRING_GFLAGS_START_ANCHORED;

    if (re->flags & RE_FLAGS_END_ANCHORED)
      string->g_flags |= STRING_GFLAGS_END_ANCHORED;

    if (re->flags & RE_FLAGS_FAST_HEX_REGEXP)
      string->g_flags |= STRING_GFLAGS_FAST_HEX_REGEXP;

    if (re->flags & RE_FLAGS_LITERAL_STRING)
    {
      string->g_flags |= STRING_GFLAGS_LITERAL;
      literal_string = re->literal_string;
      literal_string_len = re->literal_string_len;

      compiler->last_result = yr_atoms_extract_from_string(
          literal_string, literal_string_len, string->g_flags, &atom_list);
    }
    else
    {
      compiler->last_result = yr_re_emit_code(
          re, compiler->re_code_arena);

      if (compiler->last_result != ERROR_SUCCESS)
      {
        string = NULL;
        goto _exit;
      }

      compiler->last_result = yr_atoms_extract_from_re(
          re, string->g_flags, &atom_list);
    }
  }
  else
  {
    string->g_flags |= STRING_GFLAGS_LITERAL;
    literal_string = (uint8_t*) str->c_string;
    literal_string_len = str->length;

    compiler->last_result  = yr_atoms_extract_from_string(
        literal_string, literal_string_len, string->g_flags, &atom_list);
  }

  if (compiler->last_result != ERROR_SUCCESS)
  {
    string = NULL;
    goto _exit;
  }

  if (STRING_IS_LITERAL(string))
  {
    compiler->last_result = yr_arena_write_data(
        compiler->sz_arena,
        literal_string,
        literal_string_len,
        (void*) &string->string);

    if (compiler->last_result != ERROR_SUCCESS)
    {
      string = NULL;
      goto _exit;
    }

    string->length = literal_string_len;
  }

  // Add the string to Aho-Corasick automaton.

  if (atom_list != NULL)
  {
    compiler->last_result = yr_ac_add_string(
      compiler->automaton_arena,
      compiler->automaton,
      string,
      atom_list);
  }
  else
  {
    compiler->last_result = yr_arena_allocate_struct(
        compiler->automaton_arena,
        sizeof(YR_AC_MATCH),
        (void**) &new_match,
        offsetof(YR_AC_MATCH, string),
        offsetof(YR_AC_MATCH, forward_code),
        offsetof(YR_AC_MATCH, backward_code),
        offsetof(YR_AC_MATCH, next),
        EOL);

    if (compiler->last_result == ERROR_SUCCESS)
    {
      new_match->backtrack = 0;
      new_match->string = string;
      new_match->forward_code = re->root_node->forward_code;
      new_match->backward_code = NULL;
      new_match->next = compiler->automaton->root->matches;
      compiler->automaton->root->matches = new_match;
    }
  }

  atom = atom_list;

  if (atom != NULL)
    min_atom_length = MAX_ATOM_LENGTH;
  else
    min_atom_length = 0;

  while (atom != NULL)
  {
    if (atom->atom_length < min_atom_length)
      min_atom_length = atom->atom_length;
    atom = atom->next;
  }

  if (STRING_IS_LITERAL(string))
  {
    if (STRING_IS_WIDE(string))
      max_string_len = string->length * 2;
    else
      max_string_len = string->length;

    if (max_string_len == min_atom_length)
      string->g_flags |= STRING_GFLAGS_FITS_IN_ATOM;
  }

  if (compiler->file_name_stack_ptr > 0)
    file_name = compiler->file_name_stack[compiler->file_name_stack_ptr - 1];
  else
    file_name = NULL;

  if (min_atom_length < 2 && compiler->error_report_function != NULL)
  {
    snprintf(
        message,
        sizeof(message),
        "%s is slowing down scanning%s",
        string->identifier,
        min_atom_length == 0 ? " (critical!)" : "");

    compiler->error_report_function(
        YARA_ERROR_LEVEL_WARNING,
        file_name,
        yyget_lineno(yyscanner),
        message);
  }

  if (compiler->last_result != ERROR_SUCCESS)
    string = NULL;

_exit:

  if (atom_list != NULL)
    yr_atoms_list_destroy(atom_list);

  if (re != NULL)
    yr_re_destroy(re);

  return string;
}