Example #1
0
int yr_atoms_extract_from_re(
    YR_ATOMS_CONFIG* config,
    RE_AST* re_ast,
    int flags,
    YR_ATOM_LIST_ITEM** atoms,
    int* min_atom_quality)
{
  YR_ATOM_TREE* atom_tree = (YR_ATOM_TREE*) yr_malloc(sizeof(YR_ATOM_TREE));

  YR_ATOM_LIST_ITEM* wide_atoms;
  YR_ATOM_LIST_ITEM* case_insensitive_atoms;

  if (atom_tree == NULL)
    return ERROR_INSUFFICIENT_MEMORY;

  atom_tree->root_node = _yr_atoms_tree_node_create(ATOM_TREE_OR);

  if (atom_tree->root_node == NULL)
  {
    _yr_atoms_tree_destroy(atom_tree);
    return ERROR_INSUFFICIENT_MEMORY;
  }

  FAIL_ON_ERROR_WITH_CLEANUP(
      _yr_atoms_extract_from_re(config, re_ast, atom_tree->root_node),
      _yr_atoms_tree_destroy(atom_tree));

  // Initialize atom list
  *atoms = NULL;

  // Choose the atoms that will be used.
  FAIL_ON_ERROR_WITH_CLEANUP(
      _yr_atoms_choose(config, atom_tree->root_node, atoms, min_atom_quality),
      _yr_atoms_tree_destroy(atom_tree));

  _yr_atoms_tree_destroy(atom_tree);

  FAIL_ON_ERROR_WITH_CLEANUP(
      _yr_atoms_expand_wildcards(*atoms),
      {
        yr_atoms_list_destroy(*atoms);
        *atoms = NULL;
      });
Example #2
0
File: atoms.c Project: rednaga/yara
static int _yr_atoms_choose(
    ATOM_TREE_NODE* node,
    YR_ATOM_LIST_ITEM** chosen_atoms,
    int* atoms_quality)
{
  ATOM_TREE_NODE* child;
  YR_ATOM_LIST_ITEM* item;
  YR_ATOM_LIST_ITEM* tail;

  int i, quality;
  int max_quality = YR_MIN_ATOM_QUALITY;
  int min_quality = YR_MAX_ATOM_QUALITY;

  *chosen_atoms = NULL;

  switch (node->type)
  {
  case ATOM_TREE_LEAF:

    item = (YR_ATOM_LIST_ITEM*) yr_malloc(sizeof(YR_ATOM_LIST_ITEM));

    if (item == NULL)
      return ERROR_INSUFFICIENT_MEMORY;

    for (i = 0; i < node->atom_length; i++)
      item->atom[i] = node->atom[i];

    item->atom_length = node->atom_length;
    item->forward_code = node->forward_code;
    item->backward_code = node->backward_code;
    item->backtrack = 0;
    item->next = NULL;

    *chosen_atoms = item;
    *atoms_quality = _yr_atoms_quality(node->atom, node->atom_length);
    break;

  case ATOM_TREE_OR:

    child = node->children_head;

    while (child != NULL)
    {
      FAIL_ON_ERROR(_yr_atoms_choose(child, &item, &quality));

      if (quality > max_quality)
      {
        max_quality = quality;
        yr_atoms_list_destroy(*chosen_atoms);
        *chosen_atoms = item;
      }
      else
      {
        yr_atoms_list_destroy(item);
      }

      child = child->next_sibling;
    }

    *atoms_quality = max_quality;
    break;

  case ATOM_TREE_AND:

    child = node->children_head;

    while (child != NULL)
    {
      FAIL_ON_ERROR(_yr_atoms_choose(child, &item, &quality));

      if (quality < min_quality)
        min_quality = quality;

      if (item != NULL)
      {
        tail = item;
        while (tail->next != NULL)
          tail = tail->next;

        tail->next = *chosen_atoms;
        *chosen_atoms = item;
      }

      child = child->next_sibling;
    }

    *atoms_quality = min_quality;
    break;
  }

  return ERROR_SUCCESS;
}
Example #3
0
File: atoms.c Project: rednaga/yara
int yr_atoms_extract_from_re(
    RE_AST* re_ast,
    int flags,
    YR_ATOM_LIST_ITEM** atoms)
{
  ATOM_TREE* atom_tree = (ATOM_TREE*) yr_malloc(sizeof(ATOM_TREE));
  ATOM_TREE_NODE* temp;
  YR_ATOM_LIST_ITEM* wide_atoms;
  YR_ATOM_LIST_ITEM* case_insensitive_atoms;
  YR_ATOM_LIST_ITEM* triplet_atoms;

  int min_atom_quality = YR_MIN_ATOM_QUALITY;

  if (atom_tree == NULL)
    return ERROR_INSUFFICIENT_MEMORY;

  atom_tree->root_node = _yr_atoms_tree_node_create(ATOM_TREE_OR);

  if (atom_tree->root_node == NULL)
  {
    _yr_atoms_tree_destroy(atom_tree);
    return ERROR_INSUFFICIENT_MEMORY;
  }

  atom_tree->current_leaf = NULL;

  atom_tree->root_node = _yr_atoms_extract_from_re_node(
      re_ast->root_node, atom_tree, atom_tree->root_node);

  if (atom_tree->root_node == NULL)
  {
    _yr_atoms_tree_destroy(atom_tree);
    return ERROR_INSUFFICIENT_MEMORY;
  }

  if (atom_tree->current_leaf != NULL)
    _yr_atoms_tree_node_append(atom_tree->root_node, atom_tree->current_leaf);

  if (atom_tree->root_node->children_head ==
      atom_tree->root_node->children_tail)
  {
    // The root OR node has a single child, there's no need for the OR node so
    // we proceed to destroy it and use its child as root.

    temp = atom_tree->root_node;
    atom_tree->root_node = atom_tree->root_node->children_head;
    yr_free(temp);
  }

  // Initialize atom list
  *atoms = NULL;

  if (atom_tree->root_node != NULL)
  {
    // Choose the atoms that will be used.
    FAIL_ON_ERROR_WITH_CLEANUP(
        _yr_atoms_choose(atom_tree->root_node, atoms, &min_atom_quality),
        _yr_atoms_tree_destroy(atom_tree));
  }

  _yr_atoms_tree_destroy(atom_tree);

  if (min_atom_quality <= 2)
  {
    // Chosen atoms contain low quality ones, let's try infering some higher
    // quality atoms.

    FAIL_ON_ERROR_WITH_CLEANUP(
        yr_atoms_extract_triplets(re_ast->root_node, &triplet_atoms),
        {
          yr_atoms_list_destroy(*atoms);
          yr_atoms_list_destroy(triplet_atoms);
          *atoms = NULL;
        });
Example #4
0
int _yr_parser_write_string(
    const char* identifier,
    int flags,
    YR_COMPILER* compiler,
    SIZED_STRING* str,
    RE* re,
    YR_STRING** string,
    int* min_atom_length)
{
  SIZED_STRING* literal_string;
  YR_AC_MATCH* new_match;

  YR_ATOM_LIST_ITEM* atom;
  YR_ATOM_LIST_ITEM* atom_list = NULL;

  int result;
  int max_string_len;
  int free_literal = FALSE;

  *string = NULL;

  result = yr_arena_allocate_struct(
      compiler->strings_arena,
      sizeof(YR_STRING),
      (void**) string,
      offsetof(YR_STRING, identifier),
      offsetof(YR_STRING, string),
      offsetof(YR_STRING, chained_to),
      EOL);

  if (result != ERROR_SUCCESS)
    return result;

  result = yr_arena_write_string(
      compiler->sz_arena,
      identifier,
      &(*string)->identifier);

  if (result != ERROR_SUCCESS)
    return result;

  if (flags & STRING_GFLAGS_HEXADECIMAL ||
      flags & STRING_GFLAGS_REGEXP)
  {
    literal_string = yr_re_extract_literal(re);

    if (literal_string != NULL)
    {
      flags |= STRING_GFLAGS_LITERAL;
      free_literal = TRUE;
    }
  }
  else
  {
    literal_string = str;
    flags |= STRING_GFLAGS_LITERAL;
  }

  (*string)->g_flags = flags;
  (*string)->chained_to = NULL;

  memset((*string)->matches, 0,
         sizeof((*string)->matches));

  memset((*string)->unconfirmed_matches, 0,
         sizeof((*string)->unconfirmed_matches));

  if (flags & STRING_GFLAGS_LITERAL)
  {
    (*string)->length = literal_string->length;

    result = yr_arena_write_data(
        compiler->sz_arena,
        literal_string->c_string,
        literal_string->length,
        (void*) &(*string)->string);

    if (result == ERROR_SUCCESS)
    {
      result = yr_atoms_extract_from_string(
          (uint8_t*) literal_string->c_string,
          literal_string->length,
          flags,
          &atom_list);
    }
  }
  else
  {
    result = yr_re_emit_code(re, compiler->re_code_arena);

    if (result == ERROR_SUCCESS)
      result = yr_atoms_extract_from_re(re, flags, &atom_list);
  }

  if (result == ERROR_SUCCESS)
  {
    // Add the string to Aho-Corasick automaton.

    if (atom_list != NULL)
    {
      result = yr_ac_add_string(
          compiler->automaton_arena,
          compiler->automaton,
          *string,
          atom_list);
    }
    else
    {
      result = yr_arena_allocate_struct(
          compiler->automaton_arena,
          sizeof(YR_AC_MATCH),
          (void**) &new_match,
          offsetof(YR_AC_MATCH, string),
          offsetof(YR_AC_MATCH, forward_code),
          offsetof(YR_AC_MATCH, backward_code),
          offsetof(YR_AC_MATCH, next),
          EOL);

      if (result == ERROR_SUCCESS)
      {
        new_match->backtrack = 0;
        new_match->string = *string;
        new_match->forward_code = re->root_node->forward_code;
        new_match->backward_code = NULL;
        new_match->next = compiler->automaton->root->matches;
        compiler->automaton->root->matches = new_match;
      }
    }
  }

  atom = atom_list;

  if (atom != NULL)
    *min_atom_length = MAX_ATOM_LENGTH;
  else
    *min_atom_length = 0;

  while (atom != NULL)
  {
    if (atom->atom_length < *min_atom_length)
      *min_atom_length = atom->atom_length;
    atom = atom->next;
  }

  if (flags & STRING_GFLAGS_LITERAL)
  {
    if (flags & STRING_GFLAGS_WIDE)
      max_string_len = (*string)->length * 2;
    else
      max_string_len = (*string)->length;

    if (max_string_len == *min_atom_length)
      (*string)->g_flags |= STRING_GFLAGS_FITS_IN_ATOM;
  }

  if (free_literal)
    yr_free(literal_string);

  if (atom_list != NULL)
    yr_atoms_list_destroy(atom_list);

  return result;
}
Example #5
0
int _yr_parser_write_string(
    const char* identifier,
    int flags,
    YR_COMPILER* compiler,
    SIZED_STRING* str,
    RE* re,
    YR_STRING** string,
    int* min_atom_quality)
{
  SIZED_STRING* literal_string;
  YR_AC_MATCH* new_match;
  YR_ATOM_LIST_ITEM* atom_list = NULL;

  int result;
  int max_string_len;
  int free_literal = FALSE;

  *string = NULL;

  result = yr_arena_allocate_struct(
      compiler->strings_arena,
      sizeof(YR_STRING),
      (void**) string,
      offsetof(YR_STRING, identifier),
      offsetof(YR_STRING, string),
      offsetof(YR_STRING, chained_to),
      EOL);

  if (result != ERROR_SUCCESS)
    return result;

  result = yr_arena_write_string(
      compiler->sz_arena,
      identifier,
      &(*string)->identifier);

  if (result != ERROR_SUCCESS)
    return result;

  if (flags & STRING_GFLAGS_HEXADECIMAL ||
      flags & STRING_GFLAGS_REGEXP)
  {
    literal_string = yr_re_extract_literal(re);

    if (literal_string != NULL)
    {
      flags |= STRING_GFLAGS_LITERAL;
      free_literal = TRUE;
    }
    else
    {
      // Non-literal strings can't be marked as fixed offset because once we
      // find a string atom in the scanned data we don't know the offset where
      // the string should start, as the non-literal strings can contain
      // variable-length portions.

      flags &= ~STRING_GFLAGS_FIXED_OFFSET;
    }
  }
  else
  {
    literal_string = str;
    flags |= STRING_GFLAGS_LITERAL;
  }

  (*string)->g_flags = flags;
  (*string)->chained_to = NULL;
  (*string)->fixed_offset = UNDEFINED;

  #ifdef PROFILING_ENABLED
  (*string)->clock_ticks = 0;
  #endif

  memset((*string)->matches, 0,
         sizeof((*string)->matches));

  memset((*string)->unconfirmed_matches, 0,
         sizeof((*string)->unconfirmed_matches));

  if (flags & STRING_GFLAGS_LITERAL)
  {
    (*string)->length = (uint32_t) literal_string->length;

    result = yr_arena_write_data(
        compiler->sz_arena,
        literal_string->c_string,
        literal_string->length + 1,   // +1 to include terminating NULL
        (void**) &(*string)->string);

    if (result == ERROR_SUCCESS)
    {
      result = yr_atoms_extract_from_string(
          (uint8_t*) literal_string->c_string,
          (int32_t) literal_string->length,
          flags,
          &atom_list);
    }
  }
  else
  {
    result = yr_re_emit_code(re, compiler->re_code_arena);

    if (result == ERROR_SUCCESS)
      result = yr_atoms_extract_from_re(re, flags, &atom_list);
  }

  if (result == ERROR_SUCCESS)
  {
    // Add the string to Aho-Corasick automaton.

    if (atom_list != NULL)
    {
      result = yr_ac_add_string(
          compiler->automaton_arena,
          compiler->automaton,
          *string,
          atom_list);
    }
    else
    {
      result = yr_arena_allocate_struct(
          compiler->automaton_arena,
          sizeof(YR_AC_MATCH),
          (void**) &new_match,
          offsetof(YR_AC_MATCH, string),
          offsetof(YR_AC_MATCH, forward_code),
          offsetof(YR_AC_MATCH, backward_code),
          offsetof(YR_AC_MATCH, next),
          EOL);

      if (result == ERROR_SUCCESS)
      {
        new_match->backtrack = 0;
        new_match->string = *string;
        new_match->forward_code = re->root_node->forward_code;
        new_match->backward_code = NULL;
        new_match->next = compiler->automaton->root->matches;
        compiler->automaton->root->matches = new_match;
      }
    }
  }

  *min_atom_quality = yr_atoms_min_quality(atom_list);

  if (flags & STRING_GFLAGS_LITERAL)
  {
    if (flags & STRING_GFLAGS_WIDE)
      max_string_len = (*string)->length * 2;
    else
      max_string_len = (*string)->length;

    if (max_string_len <= MAX_ATOM_LENGTH)
      (*string)->g_flags |= STRING_GFLAGS_FITS_IN_ATOM;
  }

  if (free_literal)
    yr_free(literal_string);

  if (atom_list != NULL)
    yr_atoms_list_destroy(atom_list);

  return result;
}
Example #6
0
int yr_atoms_extract_from_re(
    RE* re,
    int flags,
    YR_ATOM_LIST_ITEM** atoms)
{
  ATOM_TREE* atom_tree = yr_malloc(sizeof(ATOM_TREE));
  ATOM_TREE_NODE* temp;
  YR_ATOM_LIST_ITEM* wide_atoms;
  YR_ATOM_LIST_ITEM* case_insentive_atoms;
  YR_ATOM_LIST_ITEM* triplet_atoms;

  int min_atom_quality = 0;

  atom_tree->root_node = _yr_atoms_tree_node_create(ATOM_TREE_OR);;
  atom_tree->current_leaf = NULL;

  atom_tree->root_node = _yr_atoms_extract_from_re_node(
      re->root_node, atom_tree, atom_tree->root_node);

  if (atom_tree->current_leaf != NULL)
    _yr_atoms_tree_node_append(atom_tree->root_node, atom_tree->current_leaf);

  if (atom_tree->root_node->children_head ==
      atom_tree->root_node->children_tail)
  {
    // The root OR node has a single child, there's no need for the OR node so
    // we proceed to destroy it and use its child as root.

    temp = atom_tree->root_node;
    atom_tree->root_node = atom_tree->root_node->children_head;
    yr_free(temp);
  }

  // Choose the atoms that will be used.
  min_atom_quality = _yr_atoms_choose(atom_tree->root_node, atoms);

  _yr_atoms_tree_destroy(atom_tree);

  if (min_atom_quality <= 2)
  {
    // Choosen atoms contain low quality ones, let's try infering some higher
    // quality atoms.

    yr_atoms_extract_triplets(re->root_node, &triplet_atoms);

    if (min_atom_quality < _yr_atoms_min_quality(triplet_atoms))
    {
      yr_atoms_list_destroy(*atoms);
      *atoms = triplet_atoms;
    }
    else
    {
      yr_atoms_list_destroy(triplet_atoms);
    }
  }

  if (flags & STRING_GFLAGS_WIDE)
  {
    FAIL_ON_ERROR(_yr_atoms_wide(
        *atoms, &wide_atoms));

    if (flags & STRING_GFLAGS_ASCII)
    {
      *atoms = _yr_atoms_list_concat(*atoms, wide_atoms);
    }
    else
    {
      yr_atoms_list_destroy(*atoms);
      *atoms = wide_atoms;
    }
  }

  if (flags & STRING_GFLAGS_NO_CASE)
  {
    FAIL_ON_ERROR(_yr_atoms_case_insentive(
        *atoms, &case_insentive_atoms));

    *atoms = _yr_atoms_list_concat(*atoms, case_insentive_atoms);
  }

  return ERROR_SUCCESS;
}
Example #7
0
int _yr_atoms_choose(
    ATOM_TREE_NODE* node,
    YR_ATOM_LIST_ITEM** choosen_atoms)
{
  ATOM_TREE_NODE* child;
  YR_ATOM_LIST_ITEM* item;
  YR_ATOM_LIST_ITEM* tail;

  int i, quality;
  int max_quality = 0;
  int min_quality = 10000;

  *choosen_atoms = NULL;

  if (node == NULL)
    return 0;

  switch (node->type)
  {
  case ATOM_TREE_LEAF:

    item = yr_malloc(sizeof(YR_ATOM_LIST_ITEM));

    for (i = 0; i < node->atom_length; i++)
      item->atom[i] = node->atom[i];

    item->atom_length = node->atom_length;
    item->forward_code = node->forward_code;
    item->backward_code = node->backward_code;
    item->backtrack = 0;
    item->next = NULL;

    *choosen_atoms = item;

    return _yr_atoms_quality(node->atom, node->atom_length);

  case ATOM_TREE_OR:

    child = node->children_head;

    while (child != NULL)
    {
      quality = _yr_atoms_choose(child, &item);

      if (quality > max_quality)
      {
        max_quality = quality;
        yr_atoms_list_destroy(*choosen_atoms);
        *choosen_atoms = item;
      }
      else
      {
        yr_atoms_list_destroy(item);
      }

      child = child->next_sibling;
    }

    return max_quality;

  case ATOM_TREE_AND:

    child = node->children_head;

    while (child != NULL)
    {
      quality = _yr_atoms_choose(child, &item);

      if (quality < min_quality)
        min_quality = quality;

      tail = item;
      while (tail->next != NULL)
        tail = tail->next;

      tail->next = *choosen_atoms;
      *choosen_atoms = item;

      child = child->next_sibling;
    }

    return min_quality;
  }

  return 0;
}
Example #8
0
int yr_atoms_extract_from_string(
    uint8_t* string,
    int string_length,
    int flags,
    YR_ATOM_LIST_ITEM** atoms)
{
  YR_ATOM_LIST_ITEM* item;
  YR_ATOM_LIST_ITEM* case_insentive_atoms;
  YR_ATOM_LIST_ITEM* wide_atoms;

  int max_quality;
  int quality;
  int i, j, length;

  item = yr_malloc(sizeof(YR_ATOM_LIST_ITEM));

  if (item == NULL)
    return ERROR_INSUFICIENT_MEMORY;

  item->forward_code = NULL;
  item->backward_code = NULL;
  item->next = NULL;
  item->backtrack = 0;

  length = min(string_length, MAX_ATOM_LENGTH);

  for (i = 0; i < length; i++)
    item->atom[i] = string[i];

  item->atom_length = i;

  max_quality = _yr_atoms_quality(string, length);

  for (i = MAX_ATOM_LENGTH; i < string_length; i++)
  {
    quality = _yr_atoms_quality(
        string + i - MAX_ATOM_LENGTH + 1, MAX_ATOM_LENGTH);

    if (quality > max_quality)
    {
      for (j = 0; j < MAX_ATOM_LENGTH; j++)
        item->atom[j] = string[i + j - MAX_ATOM_LENGTH + 1];

      item->backtrack = i - MAX_ATOM_LENGTH + 1;
      max_quality = quality;
    }
  }

  if (flags & STRING_GFLAGS_WIDE)
  {
    FAIL_ON_ERROR(_yr_atoms_wide(
        item, &wide_atoms));

    if (flags & STRING_GFLAGS_ASCII)
    {
      item = _yr_atoms_list_concat(item, wide_atoms);
    }
    else
    {
      yr_atoms_list_destroy(item);
      item = wide_atoms;
    }
  }

  if (flags & STRING_GFLAGS_NO_CASE)
  {
    FAIL_ON_ERROR(_yr_atoms_case_insentive(
        item, &case_insentive_atoms));

    item = _yr_atoms_list_concat(item, case_insentive_atoms);
  }

  *atoms = item;
  return ERROR_SUCCESS;
}
Example #9
0
static int _yr_parser_write_string(
    const char* identifier,
    int flags,
    YR_COMPILER* compiler,
    SIZED_STRING* str,
    RE_AST* re_ast,
    YR_STRING** string,
    int* min_atom_quality,
    int* num_atom)
{
  SIZED_STRING* literal_string;
  YR_ATOM_LIST_ITEM* atom;
  YR_ATOM_LIST_ITEM* atom_list = NULL;

  int c, result;
  int max_string_len;
  bool free_literal = false;

  *string = NULL;

  result = yr_arena_allocate_struct(
      compiler->strings_arena,
      sizeof(YR_STRING),
      (void**) string,
      offsetof(YR_STRING, identifier),
      offsetof(YR_STRING, string),
      offsetof(YR_STRING, chained_to),
      offsetof(YR_STRING, rule),
      EOL);

  if (result != ERROR_SUCCESS)
    return result;

  result = yr_arena_write_string(
      compiler->sz_arena,
      identifier,
      &(*string)->identifier);

  if (result != ERROR_SUCCESS)
    return result;

  if (flags & STRING_GFLAGS_HEXADECIMAL ||
      flags & STRING_GFLAGS_REGEXP)
  {
    literal_string = yr_re_ast_extract_literal(re_ast);

    if (literal_string != NULL)
    {
      flags |= STRING_GFLAGS_LITERAL;
      free_literal = true;
    }
    else
    {
      // Non-literal strings can't be marked as fixed offset because once we
      // find a string atom in the scanned data we don't know the offset where
      // the string should start, as the non-literal strings can contain
      // variable-length portions.

      flags &= ~STRING_GFLAGS_FIXED_OFFSET;
    }
  }
  else
  {
    literal_string = str;
    flags |= STRING_GFLAGS_LITERAL;
  }

  (*string)->g_flags = flags;
  (*string)->chained_to = NULL;
  (*string)->fixed_offset = UNDEFINED;
  (*string)->rule = compiler->current_rule;

  memset((*string)->matches, 0,
         sizeof((*string)->matches));

  memset((*string)->unconfirmed_matches, 0,
         sizeof((*string)->unconfirmed_matches));

  if (flags & STRING_GFLAGS_LITERAL)
  {
    (*string)->length = (uint32_t) literal_string->length;

    result = yr_arena_write_data(
        compiler->sz_arena,
        literal_string->c_string,
        literal_string->length + 1,   // +1 to include terminating NULL
        (void**) &(*string)->string);

    if (result == ERROR_SUCCESS)
    {
      result = yr_atoms_extract_from_string(
          &compiler->atoms_config,
          (uint8_t*) literal_string->c_string,
          (int32_t) literal_string->length,
          flags,
          &atom_list,
          min_atom_quality);
    }
  }
  else
  {
    // Emit forwards code
    result = yr_re_ast_emit_code(re_ast, compiler->re_code_arena, false);

    // Emit backwards code
    if (result == ERROR_SUCCESS)
      result = yr_re_ast_emit_code(re_ast, compiler->re_code_arena, true);

    if (result == ERROR_SUCCESS)
      result = yr_atoms_extract_from_re(
          &compiler->atoms_config,
          re_ast,
          flags,
          &atom_list,
          min_atom_quality);
  }

  if (result == ERROR_SUCCESS)
  {
    // Add the string to Aho-Corasick automaton.
    result = yr_ac_add_string(
        compiler->automaton,
        *string,
        atom_list,
        compiler->matches_arena);
  }

  if (flags & STRING_GFLAGS_LITERAL)
  {
    if (flags & STRING_GFLAGS_WIDE)
      max_string_len = (*string)->length * 2;
    else
      max_string_len = (*string)->length;

    if (max_string_len <= YR_MAX_ATOM_LENGTH)
      (*string)->g_flags |= STRING_GFLAGS_FITS_IN_ATOM;
  }

  atom = atom_list;
  c = 0;

  while (atom != NULL)
  {
    atom = atom->next;
    c++;
  }

  (*num_atom) += c;

  if (free_literal)
    yr_free(literal_string);

  if (atom_list != NULL)
    yr_atoms_list_destroy(atom_list);

  return result;
}
Example #10
0
YR_STRING* yr_parser_reduce_string_declaration(
    yyscan_t yyscanner,
    int32_t flags,
    const char* identifier,
    SIZED_STRING* str)
{
  int i;
  int error_offset;
  int min_atom_length;
  char* file_name;
  char message[512];

  YR_STRING* string;
  YR_AC_MATCH* new_match;
  ATOM_TREE* atom_tree;
  YR_ATOM_LIST_ITEM* atom;
  YR_ATOM_LIST_ITEM* atom_list = NULL;
  RE* re = NULL;

  uint8_t* literal_string;

  int literal_string_len;
  int max_string_len;

  YR_COMPILER* compiler = yyget_extra(yyscanner);

  compiler->last_result = yr_arena_allocate_struct(
      compiler->strings_arena,
      sizeof(YR_STRING),
      (void**) &string,
      offsetof(YR_STRING, identifier),
      offsetof(YR_STRING, string),
      EOL);

  if (compiler->last_result != ERROR_SUCCESS)
    return NULL;

  compiler->last_result = yr_arena_write_string(
      compiler->sz_arena,
      identifier,
      &string->identifier);

  if (compiler->last_result != ERROR_SUCCESS)
    return NULL;

  if (strcmp(identifier,"$") == 0)
    flags |= STRING_GFLAGS_ANONYMOUS;

  if (!(flags & STRING_GFLAGS_WIDE))
    flags |= STRING_GFLAGS_ASCII;

  // The STRING_GFLAGS_SINGLE_MATCH flag indicates that finding
  // a single match for the string is enough. This is true in
  // most cases, except when the string count (#) and string offset (@)
  // operators are used. All strings are marked STRING_FLAGS_SINGLE_MATCH
  // initially, and unmarked later if required.

  flags |= STRING_GFLAGS_SINGLE_MATCH;

  string->g_flags = flags;

  memset(string->matches, 0, sizeof(string->matches));

  if (flags & STRING_GFLAGS_HEXADECIMAL ||
      flags & STRING_GFLAGS_REGEXP)
  {
    if (flags & STRING_GFLAGS_HEXADECIMAL)
      compiler->last_result = yr_re_compile_hex(
          str->c_string, &re);
    else
      compiler->last_result = yr_re_compile(
          str->c_string, &re);

    if (compiler->last_result != ERROR_SUCCESS)
    {
      snprintf(
          message,
          sizeof(message),
          "invalid %s in string \"%s\": %s",
          (flags & STRING_GFLAGS_HEXADECIMAL) ?
              "hex string" : "regular expression",
          identifier,
          re->error_message);

      yr_compiler_set_error_extra_info(compiler, message);
      string = NULL;
      goto _exit;
    }

    if (re->flags & RE_FLAGS_START_ANCHORED)
      string->g_flags |= STRING_GFLAGS_START_ANCHORED;

    if (re->flags & RE_FLAGS_END_ANCHORED)
      string->g_flags |= STRING_GFLAGS_END_ANCHORED;

    if (re->flags & RE_FLAGS_FAST_HEX_REGEXP)
      string->g_flags |= STRING_GFLAGS_FAST_HEX_REGEXP;

    if (re->flags & RE_FLAGS_LITERAL_STRING)
    {
      string->g_flags |= STRING_GFLAGS_LITERAL;
      literal_string = re->literal_string;
      literal_string_len = re->literal_string_len;

      compiler->last_result = yr_atoms_extract_from_string(
          literal_string, literal_string_len, string->g_flags, &atom_list);
    }
    else
    {
      compiler->last_result = yr_re_emit_code(
          re, compiler->re_code_arena);

      if (compiler->last_result != ERROR_SUCCESS)
      {
        string = NULL;
        goto _exit;
      }

      compiler->last_result = yr_atoms_extract_from_re(
          re, string->g_flags, &atom_list);
    }
  }
  else
  {
    string->g_flags |= STRING_GFLAGS_LITERAL;
    literal_string = (uint8_t*) str->c_string;
    literal_string_len = str->length;

    compiler->last_result  = yr_atoms_extract_from_string(
        literal_string, literal_string_len, string->g_flags, &atom_list);
  }

  if (compiler->last_result != ERROR_SUCCESS)
  {
    string = NULL;
    goto _exit;
  }

  if (STRING_IS_LITERAL(string))
  {
    compiler->last_result = yr_arena_write_data(
        compiler->sz_arena,
        literal_string,
        literal_string_len,
        (void*) &string->string);

    if (compiler->last_result != ERROR_SUCCESS)
    {
      string = NULL;
      goto _exit;
    }

    string->length = literal_string_len;
  }

  // Add the string to Aho-Corasick automaton.

  if (atom_list != NULL)
  {
    compiler->last_result = yr_ac_add_string(
      compiler->automaton_arena,
      compiler->automaton,
      string,
      atom_list);
  }
  else
  {
    compiler->last_result = yr_arena_allocate_struct(
        compiler->automaton_arena,
        sizeof(YR_AC_MATCH),
        (void**) &new_match,
        offsetof(YR_AC_MATCH, string),
        offsetof(YR_AC_MATCH, forward_code),
        offsetof(YR_AC_MATCH, backward_code),
        offsetof(YR_AC_MATCH, next),
        EOL);

    if (compiler->last_result == ERROR_SUCCESS)
    {
      new_match->backtrack = 0;
      new_match->string = string;
      new_match->forward_code = re->root_node->forward_code;
      new_match->backward_code = NULL;
      new_match->next = compiler->automaton->root->matches;
      compiler->automaton->root->matches = new_match;
    }
  }

  atom = atom_list;

  if (atom != NULL)
    min_atom_length = MAX_ATOM_LENGTH;
  else
    min_atom_length = 0;

  while (atom != NULL)
  {
    if (atom->atom_length < min_atom_length)
      min_atom_length = atom->atom_length;
    atom = atom->next;
  }

  if (STRING_IS_LITERAL(string))
  {
    if (STRING_IS_WIDE(string))
      max_string_len = string->length * 2;
    else
      max_string_len = string->length;

    if (max_string_len == min_atom_length)
      string->g_flags |= STRING_GFLAGS_FITS_IN_ATOM;
  }

  if (compiler->file_name_stack_ptr > 0)
    file_name = compiler->file_name_stack[compiler->file_name_stack_ptr - 1];
  else
    file_name = NULL;

  if (min_atom_length < 2 && compiler->error_report_function != NULL)
  {
    snprintf(
        message,
        sizeof(message),
        "%s is slowing down scanning%s",
        string->identifier,
        min_atom_length == 0 ? " (critical!)" : "");

    compiler->error_report_function(
        YARA_ERROR_LEVEL_WARNING,
        file_name,
        yyget_lineno(yyscanner),
        message);
  }

  if (compiler->last_result != ERROR_SUCCESS)
    string = NULL;

_exit:

  if (atom_list != NULL)
    yr_atoms_list_destroy(atom_list);

  if (re != NULL)
    yr_re_destroy(re);

  return string;
}
Example #11
0
static int _yr_atoms_choose(
    YR_ATOMS_CONFIG* config,
    YR_ATOM_TREE_NODE* node,
    YR_ATOM_LIST_ITEM** chosen_atoms,
    int* atoms_quality)
{
  YR_ATOM_TREE_NODE* child;
  YR_ATOM_LIST_ITEM* item;
  YR_ATOM_LIST_ITEM* tail;

  int shift, quality;

  int max_quality = YR_MIN_ATOM_QUALITY;
  int min_quality = YR_MAX_ATOM_QUALITY;

  *chosen_atoms = NULL;
  *atoms_quality = YR_MIN_ATOM_QUALITY;

  switch (node->type)
  {
  case ATOM_TREE_LEAF:

    item = (YR_ATOM_LIST_ITEM*) yr_malloc(sizeof(YR_ATOM_LIST_ITEM));

    if (item == NULL)
      return ERROR_INSUFFICIENT_MEMORY;

    memcpy(&item->atom, &node->atom, sizeof(YR_ATOM));

    shift = _yr_atoms_trim(&item->atom);

    if (item->atom.length > 0)
    {
      item->forward_code = node->re_nodes[shift]->forward_code;
      item->backward_code = node->re_nodes[shift]->backward_code;
      item->backtrack = 0;
      item->next = NULL;

      *chosen_atoms = item;
      *atoms_quality = config->get_atom_quality(config, &item->atom);
    }
    else
    {
      yr_free(item);
    }

    break;

  case ATOM_TREE_OR:

    // The choosen nodes are those coming from the highest quality child.

    child = node->children_head;

    while (child != NULL)
    {
      FAIL_ON_ERROR(_yr_atoms_choose(config, child, &item, &quality));

      if (quality > max_quality)
      {
        max_quality = quality;
        yr_atoms_list_destroy(*chosen_atoms);
        *chosen_atoms = item;
      }
      else
      {
        yr_atoms_list_destroy(item);
      }

      if (max_quality == YR_MAX_ATOM_QUALITY)
        break;

      child = child->next_sibling;
    }

    *atoms_quality = max_quality;
    break;

  case ATOM_TREE_AND:

    // The choosen nodes are the concatenation of the the nodes choosen from
    // all the children.

    child = node->children_head;

    while (child != NULL)
    {
      FAIL_ON_ERROR(_yr_atoms_choose(config, child, &item, &quality));

      if (quality < min_quality)
        min_quality = quality;

      if (item != NULL)
      {
        tail = item;
        while (tail->next != NULL)
          tail = tail->next;

        tail->next = *chosen_atoms;
        *chosen_atoms = item;
      }

      child = child->next_sibling;
    }

    *atoms_quality = min_quality;
    break;
  }

  return ERROR_SUCCESS;
}