예제 #1
0
파일: object.c 프로젝트: nonmoun/yara
void yr_object_destroy(
    YR_OBJECT* object)
{
  YR_STRUCTURE_MEMBER* member;
  YR_STRUCTURE_MEMBER* next_member;
  YR_ARRAY_ITEMS* array_items;

  RE* re;
  int i;
  char* str;

  switch(object->type)
  {
    case OBJECT_TYPE_STRUCTURE:
      member = ((YR_OBJECT_STRUCTURE*) object)->members;

      while (member != NULL)
      {
        next_member = member->next;
        yr_object_destroy(member->object);
        yr_free(member);
        member = next_member;
      }
      break;

    case OBJECT_TYPE_STRING:
      str = ((YR_OBJECT_STRING*) object)->value;
      if (str != NULL)
        yr_free(str);
      break;

    case OBJECT_TYPE_REGEXP:
      re = ((YR_OBJECT_REGEXP*) object)->value;
      if (re != NULL)
        yr_re_destroy(re);
      break;

    case OBJECT_TYPE_ARRAY:
      array_items = ((YR_OBJECT_ARRAY*) object)->items;

      for (i = 0; i < array_items->count; i++)
        if (array_items->objects[i] != NULL)
          yr_object_destroy(array_items->objects[i]);

      yr_free(array_items);
      break;

    case OBJECT_TYPE_FUNCTION:
      yr_object_destroy(((YR_OBJECT_FUNCTION*) object)->return_obj);
      break;
  }

  yr_free((void*) object->identifier);
  yr_free(object);
}
예제 #2
0
파일: re.c 프로젝트: mikalv/yara
int yr_re_compile(
    const char* re_string,
    int flags,
    YR_ARENA* code_arena,
    RE** re,
    RE_ERROR* error)
{
  RE* compiled_re;
  YR_ARENA* arena;

  *re = NULL;

  FAIL_ON_ERROR(yr_re_parse(re_string, flags, &compiled_re, error));

  if (code_arena == NULL)
  {
    FAIL_ON_ERROR_WITH_CLEANUP(
        yr_arena_create(
            RE_MAX_CODE_SIZE,
            ARENA_FLAGS_FIXED_SIZE,
            &arena),
        yr_re_destroy(compiled_re));

    compiled_re->code_arena = arena;
  }
  else
  {
    arena = code_arena;
  }

  FAIL_ON_ERROR_WITH_CLEANUP(
      yr_re_emit_code(compiled_re, arena),
      yr_re_destroy(compiled_re));

  *re = compiled_re;

  return ERROR_SUCCESS;
}
예제 #3
0
YR_STRING* yr_parser_reduce_string_declaration(
    yyscan_t yyscanner,
    int32_t flags,
    const char* identifier,
    SIZED_STRING* str)
{
  int min_atom_length;
  int min_atom_length_aux;

  int32_t min_gap;
  int32_t max_gap;

  char* file_name;
  char message[512];

  YR_COMPILER* compiler = yyget_extra(yyscanner);
  YR_STRING* string = NULL;
  YR_STRING* aux_string;
  YR_STRING* prev_string;

  RE* re = NULL;
  RE* remainder_re;

  if (strcmp(identifier,"$") == 0)
    flags |= STRING_GFLAGS_ANONYMOUS;

  if (!(flags & STRING_GFLAGS_WIDE))
    flags |= STRING_GFLAGS_ASCII;

  if (str->flags & SIZED_STRING_FLAGS_NO_CASE)
    flags |= STRING_GFLAGS_NO_CASE;

  if (str->flags & SIZED_STRING_FLAGS_DOT_ALL)
    flags |= STRING_GFLAGS_REGEXP_DOT_ALL;

  // The STRING_GFLAGS_SINGLE_MATCH flag indicates that finding
  // a single match for the string is enough. This is true in
  // most cases, except when the string count (#) and string offset (@)
  // operators are used. All strings are marked STRING_FLAGS_SINGLE_MATCH
  // initially, and unmarked later if required.

  flags |= STRING_GFLAGS_SINGLE_MATCH;

  if (flags & STRING_GFLAGS_HEXADECIMAL ||
      flags & STRING_GFLAGS_REGEXP)
  {
    if (flags & STRING_GFLAGS_HEXADECIMAL)
      compiler->last_result = yr_re_compile_hex(
          str->c_string, &re);
    else
      compiler->last_result = yr_re_compile(
          str->c_string, &re);

    if (compiler->last_result != ERROR_SUCCESS)
    {
      snprintf(
          message,
          sizeof(message),
          "invalid %s \"%s\": %s",
          (flags & STRING_GFLAGS_HEXADECIMAL) ?
              "hex string" : "regular expression",
          identifier,
          re->error_message);

      yr_compiler_set_error_extra_info(
          compiler, message);

      goto _exit;
    }

    if (re->flags & RE_FLAGS_FAST_HEX_REGEXP)
      flags |= STRING_GFLAGS_FAST_HEX_REGEXP;

    compiler->last_result = yr_re_split_at_chaining_point(
        re, &re, &remainder_re, &min_gap, &max_gap);

    if (compiler->last_result != ERROR_SUCCESS)
      goto _exit;

    compiler->last_result = _yr_parser_write_string(
        identifier,
        flags,
        compiler,
        NULL,
        re,
        &string,
        &min_atom_length);

    if (compiler->last_result != ERROR_SUCCESS)
      goto _exit;

    if (remainder_re != NULL)
    {
      string->g_flags |= STRING_GFLAGS_CHAIN_TAIL | STRING_GFLAGS_CHAIN_PART;
      string->chain_gap_min = min_gap;
      string->chain_gap_max = max_gap;
    }

    // Use "aux_string" from now on, we want to keep the value of "string"
    // because it will returned.

    aux_string = string;

    while (remainder_re != NULL)
    {
      // Destroy regexp pointed by 're' before yr_re_split_at_jmp
      // overwrites 're' with another value.

      yr_re_destroy(re);

      compiler->last_result = yr_re_split_at_chaining_point(
          remainder_re, &re, &remainder_re, &min_gap, &max_gap);

      if (compiler->last_result != ERROR_SUCCESS)
        goto _exit;

      prev_string = aux_string;

      compiler->last_result = _yr_parser_write_string(
          identifier,
          flags,
          compiler,
          NULL,
          re,
          &aux_string,
          &min_atom_length_aux);

      if (compiler->last_result != ERROR_SUCCESS)
        goto _exit;

      if (min_atom_length_aux < min_atom_length)
        min_atom_length = min_atom_length_aux;

      aux_string->g_flags |= STRING_GFLAGS_CHAIN_PART;
      aux_string->chain_gap_min = min_gap;
      aux_string->chain_gap_max = max_gap;

      prev_string->chained_to = aux_string;
    }
  }
  else
  {
    compiler->last_result = _yr_parser_write_string(
        identifier,
        flags,
        compiler,
        str,
        NULL,
        &string,
        &min_atom_length);

    if (compiler->last_result != ERROR_SUCCESS)
      goto _exit;
  }

  if (compiler->file_name_stack_ptr > 0)
    file_name = compiler->file_name_stack[compiler->file_name_stack_ptr - 1];
  else
    file_name = NULL;

  if (min_atom_length < 2 && compiler->error_report_function != NULL)
  {
    snprintf(
        message,
        sizeof(message),
        "%s is slowing down scanning%s",
        string->identifier,
        min_atom_length == 0 ? " (critical!)" : "");

    compiler->error_report_function(
        YARA_ERROR_LEVEL_WARNING,
        file_name,
        yyget_lineno(yyscanner),
        message);
  }

_exit:

  if (re != NULL)
    yr_re_destroy(re);

  if (compiler->last_result != ERROR_SUCCESS)
    return NULL;

  return string;
}
예제 #4
0
YR_STRING* yr_parser_reduce_string_declaration(
    yyscan_t yyscanner,
    int32_t string_flags,
    const char* identifier,
    SIZED_STRING* str)
{
  int min_atom_quality;
  int min_atom_quality_aux;
  int re_flags = 0;

  int32_t min_gap;
  int32_t max_gap;

  char message[512];

  YR_COMPILER* compiler = yyget_extra(yyscanner);
  YR_STRING* string = NULL;
  YR_STRING* aux_string;
  YR_STRING* prev_string;

  RE* re = NULL;
  RE* remainder_re;

  RE_ERROR re_error;

  // Determine if a string with the same identifier was already defined
  // by searching for the identifier in string_table.

  string = yr_hash_table_lookup(
      compiler->strings_table,
      identifier,
      NULL);

  if (string != NULL)
  {
    compiler->last_result = ERROR_DUPLICATED_STRING_IDENTIFIER;
    yr_compiler_set_error_extra_info(compiler, identifier);
    goto _exit;
  }

  // Empty strings are now allowed

  if (str->length == 0)
  {
    compiler->last_result = ERROR_EMPTY_STRING;
    yr_compiler_set_error_extra_info(compiler, identifier);
    goto _exit;
  }

  if (str->flags & SIZED_STRING_FLAGS_NO_CASE)
    string_flags |= STRING_GFLAGS_NO_CASE;

  if (str->flags & SIZED_STRING_FLAGS_DOT_ALL)
    re_flags |= RE_FLAGS_DOT_ALL;

  if (strcmp(identifier,"$") == 0)
    string_flags |= STRING_GFLAGS_ANONYMOUS;

  if (!(string_flags & STRING_GFLAGS_WIDE))
    string_flags |= STRING_GFLAGS_ASCII;

  if (string_flags & STRING_GFLAGS_NO_CASE)
    re_flags |= RE_FLAGS_NO_CASE;

  // The STRING_GFLAGS_SINGLE_MATCH flag indicates that finding
  // a single match for the string is enough. This is true in
  // most cases, except when the string count (#) and string offset (@)
  // operators are used. All strings are marked STRING_FLAGS_SINGLE_MATCH
  // initially, and unmarked later if required.

  string_flags |= STRING_GFLAGS_SINGLE_MATCH;

  // The STRING_GFLAGS_FIXED_OFFSET indicates that the string doesn't
  // need to be searched all over the file because the user is using the
  // "at" operator. The string must be searched at a fixed offset in the
  // file. All strings are marked STRING_GFLAGS_FIXED_OFFSET initially,
  // and unmarked later if required.

  string_flags |= STRING_GFLAGS_FIXED_OFFSET;

  if (string_flags & STRING_GFLAGS_HEXADECIMAL ||
      string_flags & STRING_GFLAGS_REGEXP)
  {
    if (string_flags & STRING_GFLAGS_HEXADECIMAL)
      compiler->last_result = yr_re_parse_hex(
          str->c_string, re_flags, &re, &re_error);
    else
      compiler->last_result = yr_re_parse(
          str->c_string, re_flags, &re, &re_error);

    if (compiler->last_result != ERROR_SUCCESS)
    {
      snprintf(
          message,
          sizeof(message),
          "invalid %s \"%s\": %s",
          (string_flags & STRING_GFLAGS_HEXADECIMAL) ?
              "hex string" : "regular expression",
          identifier,
          re_error.message);

      yr_compiler_set_error_extra_info(
          compiler, message);

      goto _exit;
    }

    if (re->flags & RE_FLAGS_FAST_HEX_REGEXP)
      string_flags |= STRING_GFLAGS_FAST_HEX_REGEXP;

    // Regular expressions in the strings section can't mix greedy and ungreedy
    // quantifiers like .* and .*?. That's because these regular expressions can
    // be matched forwards and/or backwards depending on the atom found, and we
    // need the regexp to be all-greedy or all-ungreedy to be able to properly
    // calculate the length of the match.

    if ((re->flags & RE_FLAGS_GREEDY) &&
        (re->flags & RE_FLAGS_UNGREEDY))
    {
      compiler->last_result = ERROR_INVALID_REGULAR_EXPRESSION;

      yr_compiler_set_error_extra_info(compiler,
          "greedy and ungreedy quantifiers can't be mixed in a regular "
          "expression");

      goto _exit;
    }

    if (re->flags & RE_FLAGS_GREEDY)
      string_flags |= STRING_GFLAGS_GREEDY_REGEXP;

    if (yr_re_contains_dot_star(re))
    {
      snprintf(
        message,
        sizeof(message),
        "%s contains .*, consider using .{N} with a reasonable value for N",
        identifier);

        yywarning(yyscanner, message);
    }

    compiler->last_result = yr_re_split_at_chaining_point(
        re, &re, &remainder_re, &min_gap, &max_gap);

    if (compiler->last_result != ERROR_SUCCESS)
      goto _exit;

    compiler->last_result = _yr_parser_write_string(
        identifier,
        string_flags,
        compiler,
        NULL,
        re,
        &string,
        &min_atom_quality);

    if (compiler->last_result != ERROR_SUCCESS)
      goto _exit;

    if (remainder_re != NULL)
    {
      string->g_flags |= STRING_GFLAGS_CHAIN_TAIL | STRING_GFLAGS_CHAIN_PART;
      string->chain_gap_min = min_gap;
      string->chain_gap_max = max_gap;
    }

    // Use "aux_string" from now on, we want to keep the value of "string"
    // because it will returned.

    aux_string = string;

    while (remainder_re != NULL)
    {
      // Destroy regexp pointed by 're' before yr_re_split_at_jmp
      // overwrites 're' with another value.

      yr_re_destroy(re);

      compiler->last_result = yr_re_split_at_chaining_point(
          remainder_re, &re, &remainder_re, &min_gap, &max_gap);

      if (compiler->last_result != ERROR_SUCCESS)
        goto _exit;

      prev_string = aux_string;

      compiler->last_result = _yr_parser_write_string(
          identifier,
          string_flags,
          compiler,
          NULL,
          re,
          &aux_string,
          &min_atom_quality_aux);

      if (compiler->last_result != ERROR_SUCCESS)
        goto _exit;

      if (min_atom_quality_aux < min_atom_quality)
        min_atom_quality = min_atom_quality_aux;

      aux_string->g_flags |= STRING_GFLAGS_CHAIN_PART;
      aux_string->chain_gap_min = min_gap;
      aux_string->chain_gap_max = max_gap;

      prev_string->chained_to = aux_string;

      // prev_string is now chained to aux_string, an string chained
      // to another one can't have a fixed offset, only the head of the
      // string chain can have a fixed offset.

      prev_string->g_flags &= ~STRING_GFLAGS_FIXED_OFFSET;
    }
  }
  else
  {
    compiler->last_result = _yr_parser_write_string(
        identifier,
        string_flags,
        compiler,
        str,
        NULL,
        &string,
        &min_atom_quality);

    if (compiler->last_result != ERROR_SUCCESS)
      goto _exit;
  }

  compiler->last_result = yr_hash_table_add(
      compiler->strings_table,
      identifier,
      NULL,
      string);

  if (compiler->last_result != ERROR_SUCCESS)
    goto _exit;

  if (min_atom_quality < 3 && compiler->callback != NULL)
  {
    snprintf(
        message,
        sizeof(message),
        "%s is slowing down scanning%s",
        string->identifier,
        min_atom_quality < 2 ? " (critical!)" : "");

    yywarning(yyscanner, message);
  }

_exit:

  if (re != NULL)
    yr_re_destroy(re);

  if (compiler->last_result != ERROR_SUCCESS)
    return NULL;

  return string;
}
예제 #5
0
파일: parser.c 프로젝트: bushido/yara
YR_STRING* yr_parser_reduce_string_declaration(
    yyscan_t yyscanner,
    int32_t flags,
    const char* identifier,
    SIZED_STRING* str)
{
  int i;
  int error_offset;
  int min_atom_length;
  char* file_name;
  char message[512];

  YR_STRING* string;
  YR_AC_MATCH* new_match;
  ATOM_TREE* atom_tree;
  YR_ATOM_LIST_ITEM* atom;
  YR_ATOM_LIST_ITEM* atom_list = NULL;
  RE* re = NULL;

  uint8_t* literal_string;

  int literal_string_len;
  int max_string_len;

  YR_COMPILER* compiler = yyget_extra(yyscanner);

  compiler->last_result = yr_arena_allocate_struct(
      compiler->strings_arena,
      sizeof(YR_STRING),
      (void**) &string,
      offsetof(YR_STRING, identifier),
      offsetof(YR_STRING, string),
      EOL);

  if (compiler->last_result != ERROR_SUCCESS)
    return NULL;

  compiler->last_result = yr_arena_write_string(
      compiler->sz_arena,
      identifier,
      &string->identifier);

  if (compiler->last_result != ERROR_SUCCESS)
    return NULL;

  if (strcmp(identifier,"$") == 0)
    flags |= STRING_GFLAGS_ANONYMOUS;

  if (!(flags & STRING_GFLAGS_WIDE))
    flags |= STRING_GFLAGS_ASCII;

  // The STRING_GFLAGS_SINGLE_MATCH flag indicates that finding
  // a single match for the string is enough. This is true in
  // most cases, except when the string count (#) and string offset (@)
  // operators are used. All strings are marked STRING_FLAGS_SINGLE_MATCH
  // initially, and unmarked later if required.

  flags |= STRING_GFLAGS_SINGLE_MATCH;

  string->g_flags = flags;

  memset(string->matches, 0, sizeof(string->matches));

  if (flags & STRING_GFLAGS_HEXADECIMAL ||
      flags & STRING_GFLAGS_REGEXP)
  {
    if (flags & STRING_GFLAGS_HEXADECIMAL)
      compiler->last_result = yr_re_compile_hex(
          str->c_string, &re);
    else
      compiler->last_result = yr_re_compile(
          str->c_string, &re);

    if (compiler->last_result != ERROR_SUCCESS)
    {
      snprintf(
          message,
          sizeof(message),
          "invalid %s in string \"%s\": %s",
          (flags & STRING_GFLAGS_HEXADECIMAL) ?
              "hex string" : "regular expression",
          identifier,
          re->error_message);

      yr_compiler_set_error_extra_info(compiler, message);
      string = NULL;
      goto _exit;
    }

    if (re->flags & RE_FLAGS_START_ANCHORED)
      string->g_flags |= STRING_GFLAGS_START_ANCHORED;

    if (re->flags & RE_FLAGS_END_ANCHORED)
      string->g_flags |= STRING_GFLAGS_END_ANCHORED;

    if (re->flags & RE_FLAGS_FAST_HEX_REGEXP)
      string->g_flags |= STRING_GFLAGS_FAST_HEX_REGEXP;

    if (re->flags & RE_FLAGS_LITERAL_STRING)
    {
      string->g_flags |= STRING_GFLAGS_LITERAL;
      literal_string = re->literal_string;
      literal_string_len = re->literal_string_len;

      compiler->last_result = yr_atoms_extract_from_string(
          literal_string, literal_string_len, string->g_flags, &atom_list);
    }
    else
    {
      compiler->last_result = yr_re_emit_code(
          re, compiler->re_code_arena);

      if (compiler->last_result != ERROR_SUCCESS)
      {
        string = NULL;
        goto _exit;
      }

      compiler->last_result = yr_atoms_extract_from_re(
          re, string->g_flags, &atom_list);
    }
  }
  else
  {
    string->g_flags |= STRING_GFLAGS_LITERAL;
    literal_string = (uint8_t*) str->c_string;
    literal_string_len = str->length;

    compiler->last_result  = yr_atoms_extract_from_string(
        literal_string, literal_string_len, string->g_flags, &atom_list);
  }

  if (compiler->last_result != ERROR_SUCCESS)
  {
    string = NULL;
    goto _exit;
  }

  if (STRING_IS_LITERAL(string))
  {
    compiler->last_result = yr_arena_write_data(
        compiler->sz_arena,
        literal_string,
        literal_string_len,
        (void*) &string->string);

    if (compiler->last_result != ERROR_SUCCESS)
    {
      string = NULL;
      goto _exit;
    }

    string->length = literal_string_len;
  }

  // Add the string to Aho-Corasick automaton.

  if (atom_list != NULL)
  {
    compiler->last_result = yr_ac_add_string(
      compiler->automaton_arena,
      compiler->automaton,
      string,
      atom_list);
  }
  else
  {
    compiler->last_result = yr_arena_allocate_struct(
        compiler->automaton_arena,
        sizeof(YR_AC_MATCH),
        (void**) &new_match,
        offsetof(YR_AC_MATCH, string),
        offsetof(YR_AC_MATCH, forward_code),
        offsetof(YR_AC_MATCH, backward_code),
        offsetof(YR_AC_MATCH, next),
        EOL);

    if (compiler->last_result == ERROR_SUCCESS)
    {
      new_match->backtrack = 0;
      new_match->string = string;
      new_match->forward_code = re->root_node->forward_code;
      new_match->backward_code = NULL;
      new_match->next = compiler->automaton->root->matches;
      compiler->automaton->root->matches = new_match;
    }
  }

  atom = atom_list;

  if (atom != NULL)
    min_atom_length = MAX_ATOM_LENGTH;
  else
    min_atom_length = 0;

  while (atom != NULL)
  {
    if (atom->atom_length < min_atom_length)
      min_atom_length = atom->atom_length;
    atom = atom->next;
  }

  if (STRING_IS_LITERAL(string))
  {
    if (STRING_IS_WIDE(string))
      max_string_len = string->length * 2;
    else
      max_string_len = string->length;

    if (max_string_len == min_atom_length)
      string->g_flags |= STRING_GFLAGS_FITS_IN_ATOM;
  }

  if (compiler->file_name_stack_ptr > 0)
    file_name = compiler->file_name_stack[compiler->file_name_stack_ptr - 1];
  else
    file_name = NULL;

  if (min_atom_length < 2 && compiler->error_report_function != NULL)
  {
    snprintf(
        message,
        sizeof(message),
        "%s is slowing down scanning%s",
        string->identifier,
        min_atom_length == 0 ? " (critical!)" : "");

    compiler->error_report_function(
        YARA_ERROR_LEVEL_WARNING,
        file_name,
        yyget_lineno(yyscanner),
        message);
  }

  if (compiler->last_result != ERROR_SUCCESS)
    string = NULL;

_exit:

  if (atom_list != NULL)
    yr_atoms_list_destroy(atom_list);

  if (re != NULL)
    yr_re_destroy(re);

  return string;
}