Example #1
0
File: atoms.c Project: rednaga/yara
int yr_atoms_extract_from_re(
    RE_AST* re_ast,
    int flags,
    YR_ATOM_LIST_ITEM** atoms)
{
  ATOM_TREE* atom_tree = (ATOM_TREE*) yr_malloc(sizeof(ATOM_TREE));
  ATOM_TREE_NODE* temp;
  YR_ATOM_LIST_ITEM* wide_atoms;
  YR_ATOM_LIST_ITEM* case_insensitive_atoms;
  YR_ATOM_LIST_ITEM* triplet_atoms;

  int min_atom_quality = YR_MIN_ATOM_QUALITY;

  if (atom_tree == NULL)
    return ERROR_INSUFFICIENT_MEMORY;

  atom_tree->root_node = _yr_atoms_tree_node_create(ATOM_TREE_OR);

  if (atom_tree->root_node == NULL)
  {
    _yr_atoms_tree_destroy(atom_tree);
    return ERROR_INSUFFICIENT_MEMORY;
  }

  atom_tree->current_leaf = NULL;

  atom_tree->root_node = _yr_atoms_extract_from_re_node(
      re_ast->root_node, atom_tree, atom_tree->root_node);

  if (atom_tree->root_node == NULL)
  {
    _yr_atoms_tree_destroy(atom_tree);
    return ERROR_INSUFFICIENT_MEMORY;
  }

  if (atom_tree->current_leaf != NULL)
    _yr_atoms_tree_node_append(atom_tree->root_node, atom_tree->current_leaf);

  if (atom_tree->root_node->children_head ==
      atom_tree->root_node->children_tail)
  {
    // The root OR node has a single child, there's no need for the OR node so
    // we proceed to destroy it and use its child as root.

    temp = atom_tree->root_node;
    atom_tree->root_node = atom_tree->root_node->children_head;
    yr_free(temp);
  }

  // Initialize atom list
  *atoms = NULL;

  if (atom_tree->root_node != NULL)
  {
    // Choose the atoms that will be used.
    FAIL_ON_ERROR_WITH_CLEANUP(
        _yr_atoms_choose(atom_tree->root_node, atoms, &min_atom_quality),
        _yr_atoms_tree_destroy(atom_tree));
  }

  _yr_atoms_tree_destroy(atom_tree);

  if (min_atom_quality <= 2)
  {
    // Chosen atoms contain low quality ones, let's try infering some higher
    // quality atoms.

    FAIL_ON_ERROR_WITH_CLEANUP(
        yr_atoms_extract_triplets(re_ast->root_node, &triplet_atoms),
        {
          yr_atoms_list_destroy(*atoms);
          yr_atoms_list_destroy(triplet_atoms);
          *atoms = NULL;
        });
Example #2
0
File: atoms.c Project: rednaga/yara
static ATOM_TREE_NODE* _yr_atoms_extract_from_re_node(
  RE_NODE* re_node,
  ATOM_TREE* atom_tree,
  ATOM_TREE_NODE* current_node)
{
  ATOM_TREE_NODE* left_node;
  ATOM_TREE_NODE* right_node;
  ATOM_TREE_NODE* and_node;
  ATOM_TREE_NODE* current_leaf;
  ATOM_TREE_NODE* temp;

  int quality;
  int new_quality;
  int i;

  uint8_t new_atom[MAX_ATOM_LENGTH];

  switch(re_node->type)
  {
    case RE_NODE_LITERAL:

      if (atom_tree->current_leaf == NULL)
      {
        atom_tree->current_leaf = _yr_atoms_tree_node_create(ATOM_TREE_LEAF);

        if (atom_tree->current_leaf == NULL)
          return NULL;

        atom_tree->current_leaf->forward_code = re_node->forward_code;
        atom_tree->current_leaf->backward_code = re_node->backward_code;

        assert(atom_tree->current_leaf->forward_code != NULL);
        assert(atom_tree->current_leaf->backward_code != NULL);
      }

      current_leaf = atom_tree->current_leaf;

      if (current_leaf->atom_length < MAX_ATOM_LENGTH)
      {
        current_leaf->atom[current_leaf->atom_length] =
            (uint8_t) re_node->value;
        current_leaf->recent_nodes[current_leaf->atom_length] = re_node;
        current_leaf->atom_length++;
      }
      else
      {
        for (i = 1; i < MAX_ATOM_LENGTH; i++)
          current_leaf->recent_nodes[i - 1] = current_leaf->recent_nodes[i];

        current_leaf->recent_nodes[MAX_ATOM_LENGTH - 1] = re_node;

        for (i = 0; i < MAX_ATOM_LENGTH; i++)
          new_atom[i] = (uint8_t) current_leaf->recent_nodes[i]->value;

        quality = _yr_atoms_quality(
            current_leaf->atom,
            MAX_ATOM_LENGTH);

        new_quality = _yr_atoms_quality(
            new_atom,
            MAX_ATOM_LENGTH);

        if (new_quality > quality)
        {
          for (i = 0; i < MAX_ATOM_LENGTH; i++)
            current_leaf->atom[i] = new_atom[i];

          current_leaf->forward_code = \
              current_leaf->recent_nodes[0]->forward_code;

          current_leaf->backward_code = \
              current_leaf->recent_nodes[0]->backward_code;

          assert(current_leaf->forward_code != NULL);
          assert(current_leaf->backward_code != NULL);
        }
      }

      return current_node;

    case RE_NODE_CONCAT:

      current_node = _yr_atoms_extract_from_re_node(
          re_node->left, atom_tree, current_node);

      if (current_node == NULL)
        return NULL;

      current_node = _yr_atoms_extract_from_re_node(
          re_node->right, atom_tree, current_node);

      return current_node;

    case RE_NODE_ALT:

      append_current_leaf_to_node(current_node);

      left_node = _yr_atoms_tree_node_create(ATOM_TREE_OR);

      if (left_node == NULL)
        return NULL;

      left_node = _yr_atoms_extract_from_re_node(
          re_node->left, atom_tree, left_node);

      if (left_node == NULL)
        return NULL;

      append_current_leaf_to_node(left_node);

      if (left_node->children_head == NULL)
      {
        _yr_atoms_tree_node_destroy(left_node);
        return current_node;
      }

      if (left_node->children_head == left_node->children_tail)
      {
        temp = left_node;
        left_node = left_node->children_head;
        yr_free(temp);
      }

      right_node = _yr_atoms_tree_node_create(ATOM_TREE_OR);

      if (right_node == NULL)
        return NULL;

      right_node = _yr_atoms_extract_from_re_node(
          re_node->right, atom_tree, right_node);

      if (right_node == NULL)
        return NULL;

      append_current_leaf_to_node(right_node);

      if (right_node->children_head == NULL)
      {
        _yr_atoms_tree_node_destroy(left_node);
        _yr_atoms_tree_node_destroy(right_node);
        return current_node;
      }

      if (right_node->children_head == right_node->children_tail)
      {
        temp = right_node;
        right_node = right_node->children_head;
        yr_free(temp);
      }

      and_node = _yr_atoms_tree_node_create(ATOM_TREE_AND);

      if (and_node == NULL)
        return NULL;

      and_node->children_head = left_node;
      and_node->children_tail = right_node;
      left_node->next_sibling = right_node;

      _yr_atoms_tree_node_append(current_node, and_node);

      return current_node;

    case RE_NODE_RANGE:

      if (re_node->start == 0)
        append_current_leaf_to_node(current_node);

      for (i = 0; i < re_node->start; i++)
      {
        current_node = _yr_atoms_extract_from_re_node(
            re_node->left, atom_tree, current_node);

        if (current_node == NULL)
          return NULL;
      }

      if (re_node->start != re_node->end)
        append_current_leaf_to_node(current_node);

      return current_node;

    case RE_NODE_PLUS:

      current_node = _yr_atoms_extract_from_re_node(
          re_node->left, atom_tree, current_node);

      if (current_node == NULL)
        return NULL;

      append_current_leaf_to_node(current_node);
      return current_node;

    case RE_NODE_ANY:
    case RE_NODE_RANGE_ANY:
    case RE_NODE_STAR:
    case RE_NODE_CLASS:
    case RE_NODE_MASKED_LITERAL:
    case RE_NODE_WORD_CHAR:
    case RE_NODE_NON_WORD_CHAR:
    case RE_NODE_SPACE:
    case RE_NODE_NON_SPACE:
    case RE_NODE_DIGIT:
    case RE_NODE_NON_DIGIT:
    case RE_NODE_EMPTY:
    case RE_NODE_ANCHOR_START:
    case RE_NODE_ANCHOR_END:
    case RE_NODE_WORD_BOUNDARY:
    case RE_NODE_NON_WORD_BOUNDARY:

      append_current_leaf_to_node(current_node);
      return current_node;

    default:
      assert(FALSE);
  }

  return NULL;
}
Example #3
0
int yr_atoms_extract_from_re(
    RE* re,
    int flags,
    YR_ATOM_LIST_ITEM** atoms)
{
  ATOM_TREE* atom_tree = yr_malloc(sizeof(ATOM_TREE));
  ATOM_TREE_NODE* temp;
  YR_ATOM_LIST_ITEM* wide_atoms;
  YR_ATOM_LIST_ITEM* case_insentive_atoms;
  YR_ATOM_LIST_ITEM* triplet_atoms;

  int min_atom_quality = 0;

  atom_tree->root_node = _yr_atoms_tree_node_create(ATOM_TREE_OR);;
  atom_tree->current_leaf = NULL;

  atom_tree->root_node = _yr_atoms_extract_from_re_node(
      re->root_node, atom_tree, atom_tree->root_node);

  if (atom_tree->current_leaf != NULL)
    _yr_atoms_tree_node_append(atom_tree->root_node, atom_tree->current_leaf);

  if (atom_tree->root_node->children_head ==
      atom_tree->root_node->children_tail)
  {
    // The root OR node has a single child, there's no need for the OR node so
    // we proceed to destroy it and use its child as root.

    temp = atom_tree->root_node;
    atom_tree->root_node = atom_tree->root_node->children_head;
    yr_free(temp);
  }

  // Choose the atoms that will be used.
  min_atom_quality = _yr_atoms_choose(atom_tree->root_node, atoms);

  _yr_atoms_tree_destroy(atom_tree);

  if (min_atom_quality <= 2)
  {
    // Choosen atoms contain low quality ones, let's try infering some higher
    // quality atoms.

    yr_atoms_extract_triplets(re->root_node, &triplet_atoms);

    if (min_atom_quality < _yr_atoms_min_quality(triplet_atoms))
    {
      yr_atoms_list_destroy(*atoms);
      *atoms = triplet_atoms;
    }
    else
    {
      yr_atoms_list_destroy(triplet_atoms);
    }
  }

  if (flags & STRING_GFLAGS_WIDE)
  {
    FAIL_ON_ERROR(_yr_atoms_wide(
        *atoms, &wide_atoms));

    if (flags & STRING_GFLAGS_ASCII)
    {
      *atoms = _yr_atoms_list_concat(*atoms, wide_atoms);
    }
    else
    {
      yr_atoms_list_destroy(*atoms);
      *atoms = wide_atoms;
    }
  }

  if (flags & STRING_GFLAGS_NO_CASE)
  {
    FAIL_ON_ERROR(_yr_atoms_case_insentive(
        *atoms, &case_insentive_atoms));

    *atoms = _yr_atoms_list_concat(*atoms, case_insentive_atoms);
  }

  return ERROR_SUCCESS;
}