Beispiel #1
0
int _yr_scan_verify_literal_match(
    YR_SCAN_CONTEXT* context,
    YR_AC_MATCH* ac_match,
    uint8_t* data,
    size_t data_size,
    size_t data_base,
    size_t offset)
{
  int flags = 0;
  int forward_matches = 0;

  CALLBACK_ARGS callback_args;
  YR_STRING* string = ac_match->string;

  if (STRING_FITS_IN_ATOM(string))
  {
    forward_matches = ac_match->backtrack;
  }
  else if (STRING_IS_NO_CASE(string))
  {
    if (STRING_IS_ASCII(string))
    {
      forward_matches = _yr_scan_icompare(
          data + offset,
          data_size - offset,
          string->string,
          string->length);
    }

    if (STRING_IS_WIDE(string) && forward_matches == 0)
    {
      forward_matches = _yr_scan_wicompare(
          data + offset,
          data_size - offset,
          string->string,
          string->length);
    }
  }
  else
  {
    if (STRING_IS_ASCII(string))
    {
      forward_matches = _yr_scan_compare(
          data + offset,
          data_size - offset,
          string->string,
          string->length);
    }

    if (STRING_IS_WIDE(string) && forward_matches == 0)
    {
      forward_matches = _yr_scan_wcompare(
          data + offset,
          data_size - offset,
          string->string,
          string->length);
    }
  }

  if (forward_matches == 0)
    return ERROR_SUCCESS;

  if (forward_matches == string->length * 2)
    flags |= RE_FLAGS_WIDE;

  if (STRING_IS_NO_CASE(string))
    flags |= RE_FLAGS_NO_CASE;

  callback_args.context = context;
  callback_args.string = string;
  callback_args.data = data;
  callback_args.data_size = data_size;
  callback_args.data_base = data_base;
  callback_args.forward_matches = forward_matches;
  callback_args.full_word = STRING_IS_FULL_WORD(string);

  FAIL_ON_ERROR(_yr_scan_match_callback(
      data + offset, 0, flags, &callback_args));

  return ERROR_SUCCESS;
}
Beispiel #2
0
int _yr_scan_verify_re_match(
    YR_SCAN_CONTEXT* context,
    YR_AC_MATCH* ac_match,
    uint8_t* data,
    size_t data_size,
    size_t data_base,
    size_t offset)
{
  CALLBACK_ARGS callback_args;
  RE_EXEC_FUNC exec;

  int forward_matches = -1;
  int backward_matches = -1;
  int flags = 0;

  if (STRING_IS_GREEDY_REGEXP(ac_match->string))
    flags |= RE_FLAGS_GREEDY;

  if (STRING_IS_FAST_HEX_REGEXP(ac_match->string))
    exec = _yr_scan_fast_hex_re_exec;
  else
    exec = yr_re_exec;

  if (STRING_IS_ASCII(ac_match->string))
  {
    forward_matches = exec(
        ac_match->forward_code,
        data + offset,
        data_size - offset,
        offset > 0 ? flags | RE_FLAGS_NOT_AT_START : flags,
        NULL,
        NULL);
  }

  if (STRING_IS_WIDE(ac_match->string) && forward_matches == -1)
  {
    flags |= RE_FLAGS_WIDE;
    forward_matches = exec(
        ac_match->forward_code,
        data + offset,
        data_size - offset,
        offset > 0 ? flags | RE_FLAGS_NOT_AT_START : flags,
        NULL,
        NULL);
  }

  switch(forward_matches)
  {
    case -1:
      return ERROR_SUCCESS;
    case -2:
      return ERROR_INSUFICIENT_MEMORY;
    case -3:
      return ERROR_TOO_MANY_MATCHES;
    case -4:
      return ERROR_TOO_MANY_RE_FIBERS;
    case -5:
      return ERROR_INTERNAL_FATAL_ERROR;
  }

  if (forward_matches == 0 && ac_match->backward_code == NULL)
    return ERROR_SUCCESS;

  callback_args.string = ac_match->string;
  callback_args.context = context;
  callback_args.data = data;
  callback_args.data_size = data_size;
  callback_args.data_base = data_base;
  callback_args.forward_matches = forward_matches;
  callback_args.full_word = STRING_IS_FULL_WORD(ac_match->string);

  if (ac_match->backward_code != NULL)
  {
    backward_matches = exec(
        ac_match->backward_code,
        data + offset,
        offset,
        flags | RE_FLAGS_BACKWARDS | RE_FLAGS_EXHAUSTIVE,
        _yr_scan_match_callback,
        (void*) &callback_args);

    switch(backward_matches)
    {
      case -2:
        return ERROR_INSUFICIENT_MEMORY;
      case -3:
        return ERROR_TOO_MANY_MATCHES;
      case -4:
        return ERROR_TOO_MANY_RE_FIBERS;
      case -5:
        return ERROR_INTERNAL_FATAL_ERROR;
    }
  }
  else
  {
    FAIL_ON_ERROR(_yr_scan_match_callback(
        data + offset, 0, flags, &callback_args));
  }

  return ERROR_SUCCESS;
}
Beispiel #3
0
int _yr_scan_verify_literal_match(
    YR_AC_MATCH* ac_match,
    uint8_t* data,
    size_t data_size,
    size_t offset,
    YR_ARENA* matches_arena)
{
  int flags = 0;
  int forward_matches = 0;

  CALLBACK_ARGS callback_args;
  YR_STRING* string = ac_match->string;

  if (STRING_FITS_IN_ATOM(string))
  {
    if (STRING_IS_WIDE(string))
      forward_matches = string->length * 2;
    else
      forward_matches = string->length;
  }
  else if (STRING_IS_NO_CASE(string))
  {
    flags |= RE_FLAGS_NO_CASE;

    if (STRING_IS_ASCII(string))
    {
      forward_matches = _yr_scan_icompare(
          data + offset,
          data_size - offset,
          string->string,
          string->length);
    }

    if (STRING_IS_WIDE(string) && forward_matches == 0)
    {
      flags |= RE_FLAGS_WIDE;
      forward_matches = _yr_scan_wicompare(
          data + offset,
          data_size - offset,
          string->string,
          string->length);
    }
  }
  else
  {
    if (STRING_IS_ASCII(string))
    {
      forward_matches = _yr_scan_compare(
          data + offset,
          data_size - offset,
          string->string,
          string->length);
    }

    if (STRING_IS_WIDE(string) && forward_matches == 0)
    {
      flags |= RE_FLAGS_WIDE;
      forward_matches = _yr_scan_wcompare(
          data + offset,
          data_size - offset,
          string->string,
          string->length);
    }
  }

  if (forward_matches > 0)
  {
    if (STRING_IS_FULL_WORD(string))
    {
      if (flags & RE_FLAGS_WIDE)
      {
        if (offset >= 2 &&
            *(data + offset - 1) == 0 &&
            isalnum(*(data + offset - 2)))
          return ERROR_SUCCESS;

        if (offset + forward_matches + 1 < data_size &&
            *(data + offset + forward_matches + 1) == 0 &&
            isalnum(*(data + offset + forward_matches)))
          return ERROR_SUCCESS;
      }
      else
      {
        if (offset >= 1 &&
            isalnum(*(data + offset - 1)))
          return ERROR_SUCCESS;

        if (offset + forward_matches < data_size &&
            isalnum(*(data + offset + forward_matches)))
          return ERROR_SUCCESS;
      }
    }

    callback_args.string = string;
    callback_args.data = data;
    callback_args.data_size = data_size;
    callback_args.matches_arena = matches_arena;
    callback_args.forward_matches = forward_matches;
    callback_args.full_word = STRING_IS_FULL_WORD(string);
    callback_args.tidx = yr_get_tidx();

    FAIL_ON_ERROR(_yr_scan_match_callback(
        data + offset, 0, flags, &callback_args));
  }

  return ERROR_SUCCESS;
}
Beispiel #4
0
int _yr_scan_verify_re_match(
    YR_AC_MATCH* ac_match,
    uint8_t* data,
    size_t data_size,
    size_t offset,
    YR_ARENA* matches_arena)
{
  CALLBACK_ARGS callback_args;
  RE_EXEC_FUNC exec;

  int forward_matches = -1;
  int backward_matches = -1;
  int flags = 0;

  if (STRING_IS_FAST_HEX_REGEXP(ac_match->string))
    exec = _yr_scan_fast_hex_re_exec;
  else
    exec = yr_re_exec;

  if (STRING_IS_NO_CASE(ac_match->string))
    flags |= RE_FLAGS_NO_CASE;

  if (STRING_IS_HEX(ac_match->string) ||
      STRING_IS_REGEXP_DOT_ALL(ac_match->string))
    flags |= RE_FLAGS_DOT_ALL;

  if (STRING_IS_ASCII(ac_match->string))
  {
    forward_matches = exec(
        ac_match->forward_code,
        data + offset,
        data_size - offset,
        flags,
        NULL,
        NULL);
  }

  if (STRING_IS_WIDE(ac_match->string) && forward_matches == -1)
  {
    flags |= RE_FLAGS_WIDE;
    forward_matches = exec(
        ac_match->forward_code,
        data + offset,
        data_size - offset,
        flags,
        NULL,
        NULL);
  }

  switch(forward_matches)
  {
    case -1:
      return ERROR_SUCCESS;
    case -2:
      return ERROR_INSUFICIENT_MEMORY;
    case -3:
      return ERROR_INTERNAL_FATAL_ERROR;
  }

  if (forward_matches == 0 && ac_match->backward_code == NULL)
    return ERROR_SUCCESS;

  callback_args.string = ac_match->string;
  callback_args.data = data;
  callback_args.data_size = data_size;
  callback_args.matches_arena = matches_arena;
  callback_args.forward_matches = forward_matches;
  callback_args.full_word = STRING_IS_FULL_WORD(ac_match->string);
  callback_args.tidx = yr_get_tidx();

  if (ac_match->backward_code != NULL)
  {
    backward_matches = exec(
        ac_match->backward_code,
        data + offset,
        offset,
        flags | RE_FLAGS_BACKWARDS | RE_FLAGS_EXHAUSTIVE,
        _yr_scan_match_callback,
        (void*) &callback_args);

    if (backward_matches == -2)
      return ERROR_INSUFICIENT_MEMORY;

    if (backward_matches == -3)
      return ERROR_INTERNAL_FATAL_ERROR;
  }
  else
  {
    FAIL_ON_ERROR(_yr_scan_match_callback(
        data + offset, 0, flags, &callback_args));
  }

  return ERROR_SUCCESS;
}
Beispiel #5
0
static int _yr_scan_verify_re_match(
    YR_SCAN_CONTEXT* context,
    YR_AC_MATCH* ac_match,
    const uint8_t* data,
    size_t data_size,
    uint64_t data_base,
    size_t offset)
{
  CALLBACK_ARGS callback_args;
  RE_EXEC_FUNC exec;

  int forward_matches = -1;
  int backward_matches = -1;
  int flags = 0;

  if (STRING_IS_GREEDY_REGEXP(ac_match->string))
    flags |= RE_FLAGS_GREEDY;

  if (STRING_IS_NO_CASE(ac_match->string))
    flags |= RE_FLAGS_NO_CASE;

  if (STRING_IS_DOT_ALL(ac_match->string))
    flags |= RE_FLAGS_DOT_ALL;

  if (STRING_IS_FAST_REGEXP(ac_match->string))
    exec = yr_re_fast_exec;
  else
    exec = yr_re_exec;

  if (STRING_IS_ASCII(ac_match->string))
  {
    FAIL_ON_ERROR(exec(
        context,
        ac_match->forward_code,
        data + offset,
        data_size - offset,
        offset,
        flags,
        NULL,
        NULL,
        &forward_matches));
  }

  if (STRING_IS_WIDE(ac_match->string) && forward_matches == -1)
  {
    flags |= RE_FLAGS_WIDE;
    FAIL_ON_ERROR(exec(
        context,
        ac_match->forward_code,
        data + offset,
        data_size - offset,
        offset,
        flags,
        NULL,
        NULL,
        &forward_matches));
  }

  if (forward_matches == -1)
    return ERROR_SUCCESS;

  if (forward_matches == 0 && ac_match->backward_code == NULL)
    return ERROR_SUCCESS;

  callback_args.string = ac_match->string;
  callback_args.context = context;
  callback_args.data = data;
  callback_args.data_size = data_size;
  callback_args.data_base = data_base;
  callback_args.forward_matches = forward_matches;
  callback_args.full_word = STRING_IS_FULL_WORD(ac_match->string);

  if (ac_match->backward_code != NULL)
  {
    FAIL_ON_ERROR(exec(
        context,
        ac_match->backward_code,
        data + offset,
        data_size - offset,
        offset,
        flags | RE_FLAGS_BACKWARDS | RE_FLAGS_EXHAUSTIVE,
        _yr_scan_match_callback,
        (void*) &callback_args,
        &backward_matches));
  }
  else
  {
    FAIL_ON_ERROR(_yr_scan_match_callback(
        data + offset, 0, flags, &callback_args));
  }

  return ERROR_SUCCESS;
}
Beispiel #6
0
YR_STRING* yr_parser_reduce_string_declaration(
    yyscan_t yyscanner,
    int32_t flags,
    const char* identifier,
    SIZED_STRING* str)
{
  int i;
  int error_offset;
  int min_atom_length;
  char* file_name;
  char message[512];

  YR_STRING* string;
  YR_AC_MATCH* new_match;
  ATOM_TREE* atom_tree;
  YR_ATOM_LIST_ITEM* atom;
  YR_ATOM_LIST_ITEM* atom_list = NULL;
  RE* re = NULL;

  uint8_t* literal_string;

  int literal_string_len;
  int max_string_len;

  YR_COMPILER* compiler = yyget_extra(yyscanner);

  compiler->last_result = yr_arena_allocate_struct(
      compiler->strings_arena,
      sizeof(YR_STRING),
      (void**) &string,
      offsetof(YR_STRING, identifier),
      offsetof(YR_STRING, string),
      EOL);

  if (compiler->last_result != ERROR_SUCCESS)
    return NULL;

  compiler->last_result = yr_arena_write_string(
      compiler->sz_arena,
      identifier,
      &string->identifier);

  if (compiler->last_result != ERROR_SUCCESS)
    return NULL;

  if (strcmp(identifier,"$") == 0)
    flags |= STRING_GFLAGS_ANONYMOUS;

  if (!(flags & STRING_GFLAGS_WIDE))
    flags |= STRING_GFLAGS_ASCII;

  // The STRING_GFLAGS_SINGLE_MATCH flag indicates that finding
  // a single match for the string is enough. This is true in
  // most cases, except when the string count (#) and string offset (@)
  // operators are used. All strings are marked STRING_FLAGS_SINGLE_MATCH
  // initially, and unmarked later if required.

  flags |= STRING_GFLAGS_SINGLE_MATCH;

  string->g_flags = flags;

  memset(string->matches, 0, sizeof(string->matches));

  if (flags & STRING_GFLAGS_HEXADECIMAL ||
      flags & STRING_GFLAGS_REGEXP)
  {
    if (flags & STRING_GFLAGS_HEXADECIMAL)
      compiler->last_result = yr_re_compile_hex(
          str->c_string, &re);
    else
      compiler->last_result = yr_re_compile(
          str->c_string, &re);

    if (compiler->last_result != ERROR_SUCCESS)
    {
      snprintf(
          message,
          sizeof(message),
          "invalid %s in string \"%s\": %s",
          (flags & STRING_GFLAGS_HEXADECIMAL) ?
              "hex string" : "regular expression",
          identifier,
          re->error_message);

      yr_compiler_set_error_extra_info(compiler, message);
      string = NULL;
      goto _exit;
    }

    if (re->flags & RE_FLAGS_START_ANCHORED)
      string->g_flags |= STRING_GFLAGS_START_ANCHORED;

    if (re->flags & RE_FLAGS_END_ANCHORED)
      string->g_flags |= STRING_GFLAGS_END_ANCHORED;

    if (re->flags & RE_FLAGS_FAST_HEX_REGEXP)
      string->g_flags |= STRING_GFLAGS_FAST_HEX_REGEXP;

    if (re->flags & RE_FLAGS_LITERAL_STRING)
    {
      string->g_flags |= STRING_GFLAGS_LITERAL;
      literal_string = re->literal_string;
      literal_string_len = re->literal_string_len;

      compiler->last_result = yr_atoms_extract_from_string(
          literal_string, literal_string_len, string->g_flags, &atom_list);
    }
    else
    {
      compiler->last_result = yr_re_emit_code(
          re, compiler->re_code_arena);

      if (compiler->last_result != ERROR_SUCCESS)
      {
        string = NULL;
        goto _exit;
      }

      compiler->last_result = yr_atoms_extract_from_re(
          re, string->g_flags, &atom_list);
    }
  }
  else
  {
    string->g_flags |= STRING_GFLAGS_LITERAL;
    literal_string = (uint8_t*) str->c_string;
    literal_string_len = str->length;

    compiler->last_result  = yr_atoms_extract_from_string(
        literal_string, literal_string_len, string->g_flags, &atom_list);
  }

  if (compiler->last_result != ERROR_SUCCESS)
  {
    string = NULL;
    goto _exit;
  }

  if (STRING_IS_LITERAL(string))
  {
    compiler->last_result = yr_arena_write_data(
        compiler->sz_arena,
        literal_string,
        literal_string_len,
        (void*) &string->string);

    if (compiler->last_result != ERROR_SUCCESS)
    {
      string = NULL;
      goto _exit;
    }

    string->length = literal_string_len;
  }

  // Add the string to Aho-Corasick automaton.

  if (atom_list != NULL)
  {
    compiler->last_result = yr_ac_add_string(
      compiler->automaton_arena,
      compiler->automaton,
      string,
      atom_list);
  }
  else
  {
    compiler->last_result = yr_arena_allocate_struct(
        compiler->automaton_arena,
        sizeof(YR_AC_MATCH),
        (void**) &new_match,
        offsetof(YR_AC_MATCH, string),
        offsetof(YR_AC_MATCH, forward_code),
        offsetof(YR_AC_MATCH, backward_code),
        offsetof(YR_AC_MATCH, next),
        EOL);

    if (compiler->last_result == ERROR_SUCCESS)
    {
      new_match->backtrack = 0;
      new_match->string = string;
      new_match->forward_code = re->root_node->forward_code;
      new_match->backward_code = NULL;
      new_match->next = compiler->automaton->root->matches;
      compiler->automaton->root->matches = new_match;
    }
  }

  atom = atom_list;

  if (atom != NULL)
    min_atom_length = MAX_ATOM_LENGTH;
  else
    min_atom_length = 0;

  while (atom != NULL)
  {
    if (atom->atom_length < min_atom_length)
      min_atom_length = atom->atom_length;
    atom = atom->next;
  }

  if (STRING_IS_LITERAL(string))
  {
    if (STRING_IS_WIDE(string))
      max_string_len = string->length * 2;
    else
      max_string_len = string->length;

    if (max_string_len == min_atom_length)
      string->g_flags |= STRING_GFLAGS_FITS_IN_ATOM;
  }

  if (compiler->file_name_stack_ptr > 0)
    file_name = compiler->file_name_stack[compiler->file_name_stack_ptr - 1];
  else
    file_name = NULL;

  if (min_atom_length < 2 && compiler->error_report_function != NULL)
  {
    snprintf(
        message,
        sizeof(message),
        "%s is slowing down scanning%s",
        string->identifier,
        min_atom_length == 0 ? " (critical!)" : "");

    compiler->error_report_function(
        YARA_ERROR_LEVEL_WARNING,
        file_name,
        yyget_lineno(yyscanner),
        message);
  }

  if (compiler->last_result != ERROR_SUCCESS)
    string = NULL;

_exit:

  if (atom_list != NULL)
    yr_atoms_list_destroy(atom_list);

  if (re != NULL)
    yr_re_destroy(re);

  return string;
}