Exemplo n.º 1
0
int yr_atoms_extract_from_re(
    YR_ATOMS_CONFIG* config,
    RE_AST* re_ast,
    int flags,
    YR_ATOM_LIST_ITEM** atoms,
    int* min_atom_quality)
{
  YR_ATOM_TREE* atom_tree = (YR_ATOM_TREE*) yr_malloc(sizeof(YR_ATOM_TREE));

  YR_ATOM_LIST_ITEM* wide_atoms;
  YR_ATOM_LIST_ITEM* case_insensitive_atoms;

  if (atom_tree == NULL)
    return ERROR_INSUFFICIENT_MEMORY;

  atom_tree->root_node = _yr_atoms_tree_node_create(ATOM_TREE_OR);

  if (atom_tree->root_node == NULL)
  {
    _yr_atoms_tree_destroy(atom_tree);
    return ERROR_INSUFFICIENT_MEMORY;
  }

  FAIL_ON_ERROR_WITH_CLEANUP(
      _yr_atoms_extract_from_re(config, re_ast, atom_tree->root_node),
      _yr_atoms_tree_destroy(atom_tree));

  // Initialize atom list
  *atoms = NULL;

  // Choose the atoms that will be used.
  FAIL_ON_ERROR_WITH_CLEANUP(
      _yr_atoms_choose(config, atom_tree->root_node, atoms, min_atom_quality),
      _yr_atoms_tree_destroy(atom_tree));

  _yr_atoms_tree_destroy(atom_tree);

  FAIL_ON_ERROR_WITH_CLEANUP(
      _yr_atoms_expand_wildcards(*atoms),
      {
        yr_atoms_list_destroy(*atoms);
        *atoms = NULL;
      });
Exemplo n.º 2
0
Arquivo: atoms.c Projeto: rednaga/yara
static ATOM_TREE_NODE* _yr_atoms_extract_from_re_node(
  RE_NODE* re_node,
  ATOM_TREE* atom_tree,
  ATOM_TREE_NODE* current_node)
{
  ATOM_TREE_NODE* left_node;
  ATOM_TREE_NODE* right_node;
  ATOM_TREE_NODE* and_node;
  ATOM_TREE_NODE* current_leaf;
  ATOM_TREE_NODE* temp;

  int quality;
  int new_quality;
  int i;

  uint8_t new_atom[MAX_ATOM_LENGTH];

  switch(re_node->type)
  {
    case RE_NODE_LITERAL:

      if (atom_tree->current_leaf == NULL)
      {
        atom_tree->current_leaf = _yr_atoms_tree_node_create(ATOM_TREE_LEAF);

        if (atom_tree->current_leaf == NULL)
          return NULL;

        atom_tree->current_leaf->forward_code = re_node->forward_code;
        atom_tree->current_leaf->backward_code = re_node->backward_code;

        assert(atom_tree->current_leaf->forward_code != NULL);
        assert(atom_tree->current_leaf->backward_code != NULL);
      }

      current_leaf = atom_tree->current_leaf;

      if (current_leaf->atom_length < MAX_ATOM_LENGTH)
      {
        current_leaf->atom[current_leaf->atom_length] =
            (uint8_t) re_node->value;
        current_leaf->recent_nodes[current_leaf->atom_length] = re_node;
        current_leaf->atom_length++;
      }
      else
      {
        for (i = 1; i < MAX_ATOM_LENGTH; i++)
          current_leaf->recent_nodes[i - 1] = current_leaf->recent_nodes[i];

        current_leaf->recent_nodes[MAX_ATOM_LENGTH - 1] = re_node;

        for (i = 0; i < MAX_ATOM_LENGTH; i++)
          new_atom[i] = (uint8_t) current_leaf->recent_nodes[i]->value;

        quality = _yr_atoms_quality(
            current_leaf->atom,
            MAX_ATOM_LENGTH);

        new_quality = _yr_atoms_quality(
            new_atom,
            MAX_ATOM_LENGTH);

        if (new_quality > quality)
        {
          for (i = 0; i < MAX_ATOM_LENGTH; i++)
            current_leaf->atom[i] = new_atom[i];

          current_leaf->forward_code = \
              current_leaf->recent_nodes[0]->forward_code;

          current_leaf->backward_code = \
              current_leaf->recent_nodes[0]->backward_code;

          assert(current_leaf->forward_code != NULL);
          assert(current_leaf->backward_code != NULL);
        }
      }

      return current_node;

    case RE_NODE_CONCAT:

      current_node = _yr_atoms_extract_from_re_node(
          re_node->left, atom_tree, current_node);

      if (current_node == NULL)
        return NULL;

      current_node = _yr_atoms_extract_from_re_node(
          re_node->right, atom_tree, current_node);

      return current_node;

    case RE_NODE_ALT:

      append_current_leaf_to_node(current_node);

      left_node = _yr_atoms_tree_node_create(ATOM_TREE_OR);

      if (left_node == NULL)
        return NULL;

      left_node = _yr_atoms_extract_from_re_node(
          re_node->left, atom_tree, left_node);

      if (left_node == NULL)
        return NULL;

      append_current_leaf_to_node(left_node);

      if (left_node->children_head == NULL)
      {
        _yr_atoms_tree_node_destroy(left_node);
        return current_node;
      }

      if (left_node->children_head == left_node->children_tail)
      {
        temp = left_node;
        left_node = left_node->children_head;
        yr_free(temp);
      }

      right_node = _yr_atoms_tree_node_create(ATOM_TREE_OR);

      if (right_node == NULL)
        return NULL;

      right_node = _yr_atoms_extract_from_re_node(
          re_node->right, atom_tree, right_node);

      if (right_node == NULL)
        return NULL;

      append_current_leaf_to_node(right_node);

      if (right_node->children_head == NULL)
      {
        _yr_atoms_tree_node_destroy(left_node);
        _yr_atoms_tree_node_destroy(right_node);
        return current_node;
      }

      if (right_node->children_head == right_node->children_tail)
      {
        temp = right_node;
        right_node = right_node->children_head;
        yr_free(temp);
      }

      and_node = _yr_atoms_tree_node_create(ATOM_TREE_AND);

      if (and_node == NULL)
        return NULL;

      and_node->children_head = left_node;
      and_node->children_tail = right_node;
      left_node->next_sibling = right_node;

      _yr_atoms_tree_node_append(current_node, and_node);

      return current_node;

    case RE_NODE_RANGE:

      if (re_node->start == 0)
        append_current_leaf_to_node(current_node);

      for (i = 0; i < re_node->start; i++)
      {
        current_node = _yr_atoms_extract_from_re_node(
            re_node->left, atom_tree, current_node);

        if (current_node == NULL)
          return NULL;
      }

      if (re_node->start != re_node->end)
        append_current_leaf_to_node(current_node);

      return current_node;

    case RE_NODE_PLUS:

      current_node = _yr_atoms_extract_from_re_node(
          re_node->left, atom_tree, current_node);

      if (current_node == NULL)
        return NULL;

      append_current_leaf_to_node(current_node);
      return current_node;

    case RE_NODE_ANY:
    case RE_NODE_RANGE_ANY:
    case RE_NODE_STAR:
    case RE_NODE_CLASS:
    case RE_NODE_MASKED_LITERAL:
    case RE_NODE_WORD_CHAR:
    case RE_NODE_NON_WORD_CHAR:
    case RE_NODE_SPACE:
    case RE_NODE_NON_SPACE:
    case RE_NODE_DIGIT:
    case RE_NODE_NON_DIGIT:
    case RE_NODE_EMPTY:
    case RE_NODE_ANCHOR_START:
    case RE_NODE_ANCHOR_END:
    case RE_NODE_WORD_BOUNDARY:
    case RE_NODE_NON_WORD_BOUNDARY:

      append_current_leaf_to_node(current_node);
      return current_node;

    default:
      assert(FALSE);
  }

  return NULL;
}
Exemplo n.º 3
0
Arquivo: atoms.c Projeto: rednaga/yara
int yr_atoms_extract_from_re(
    RE_AST* re_ast,
    int flags,
    YR_ATOM_LIST_ITEM** atoms)
{
  ATOM_TREE* atom_tree = (ATOM_TREE*) yr_malloc(sizeof(ATOM_TREE));
  ATOM_TREE_NODE* temp;
  YR_ATOM_LIST_ITEM* wide_atoms;
  YR_ATOM_LIST_ITEM* case_insensitive_atoms;
  YR_ATOM_LIST_ITEM* triplet_atoms;

  int min_atom_quality = YR_MIN_ATOM_QUALITY;

  if (atom_tree == NULL)
    return ERROR_INSUFFICIENT_MEMORY;

  atom_tree->root_node = _yr_atoms_tree_node_create(ATOM_TREE_OR);

  if (atom_tree->root_node == NULL)
  {
    _yr_atoms_tree_destroy(atom_tree);
    return ERROR_INSUFFICIENT_MEMORY;
  }

  atom_tree->current_leaf = NULL;

  atom_tree->root_node = _yr_atoms_extract_from_re_node(
      re_ast->root_node, atom_tree, atom_tree->root_node);

  if (atom_tree->root_node == NULL)
  {
    _yr_atoms_tree_destroy(atom_tree);
    return ERROR_INSUFFICIENT_MEMORY;
  }

  if (atom_tree->current_leaf != NULL)
    _yr_atoms_tree_node_append(atom_tree->root_node, atom_tree->current_leaf);

  if (atom_tree->root_node->children_head ==
      atom_tree->root_node->children_tail)
  {
    // The root OR node has a single child, there's no need for the OR node so
    // we proceed to destroy it and use its child as root.

    temp = atom_tree->root_node;
    atom_tree->root_node = atom_tree->root_node->children_head;
    yr_free(temp);
  }

  // Initialize atom list
  *atoms = NULL;

  if (atom_tree->root_node != NULL)
  {
    // Choose the atoms that will be used.
    FAIL_ON_ERROR_WITH_CLEANUP(
        _yr_atoms_choose(atom_tree->root_node, atoms, &min_atom_quality),
        _yr_atoms_tree_destroy(atom_tree));
  }

  _yr_atoms_tree_destroy(atom_tree);

  if (min_atom_quality <= 2)
  {
    // Chosen atoms contain low quality ones, let's try infering some higher
    // quality atoms.

    FAIL_ON_ERROR_WITH_CLEANUP(
        yr_atoms_extract_triplets(re_ast->root_node, &triplet_atoms),
        {
          yr_atoms_list_destroy(*atoms);
          yr_atoms_list_destroy(triplet_atoms);
          *atoms = NULL;
        });
Exemplo n.º 4
0
int yr_atoms_extract_from_re(
    RE* re,
    int flags,
    YR_ATOM_LIST_ITEM** atoms)
{
  ATOM_TREE* atom_tree = yr_malloc(sizeof(ATOM_TREE));
  ATOM_TREE_NODE* temp;
  YR_ATOM_LIST_ITEM* wide_atoms;
  YR_ATOM_LIST_ITEM* case_insentive_atoms;
  YR_ATOM_LIST_ITEM* triplet_atoms;

  int min_atom_quality = 0;

  atom_tree->root_node = _yr_atoms_tree_node_create(ATOM_TREE_OR);;
  atom_tree->current_leaf = NULL;

  atom_tree->root_node = _yr_atoms_extract_from_re_node(
      re->root_node, atom_tree, atom_tree->root_node);

  if (atom_tree->current_leaf != NULL)
    _yr_atoms_tree_node_append(atom_tree->root_node, atom_tree->current_leaf);

  if (atom_tree->root_node->children_head ==
      atom_tree->root_node->children_tail)
  {
    // The root OR node has a single child, there's no need for the OR node so
    // we proceed to destroy it and use its child as root.

    temp = atom_tree->root_node;
    atom_tree->root_node = atom_tree->root_node->children_head;
    yr_free(temp);
  }

  // Choose the atoms that will be used.
  min_atom_quality = _yr_atoms_choose(atom_tree->root_node, atoms);

  _yr_atoms_tree_destroy(atom_tree);

  if (min_atom_quality <= 2)
  {
    // Choosen atoms contain low quality ones, let's try infering some higher
    // quality atoms.

    yr_atoms_extract_triplets(re->root_node, &triplet_atoms);

    if (min_atom_quality < _yr_atoms_min_quality(triplet_atoms))
    {
      yr_atoms_list_destroy(*atoms);
      *atoms = triplet_atoms;
    }
    else
    {
      yr_atoms_list_destroy(triplet_atoms);
    }
  }

  if (flags & STRING_GFLAGS_WIDE)
  {
    FAIL_ON_ERROR(_yr_atoms_wide(
        *atoms, &wide_atoms));

    if (flags & STRING_GFLAGS_ASCII)
    {
      *atoms = _yr_atoms_list_concat(*atoms, wide_atoms);
    }
    else
    {
      yr_atoms_list_destroy(*atoms);
      *atoms = wide_atoms;
    }
  }

  if (flags & STRING_GFLAGS_NO_CASE)
  {
    FAIL_ON_ERROR(_yr_atoms_case_insentive(
        *atoms, &case_insentive_atoms));

    *atoms = _yr_atoms_list_concat(*atoms, case_insentive_atoms);
  }

  return ERROR_SUCCESS;
}
Exemplo n.º 5
0
static int _yr_atoms_extract_from_re(
    YR_ATOMS_CONFIG* config,
    RE_AST* re_ast,
    YR_ATOM_TREE_NODE* appending_node)
{
  YR_STACK* stack;
  RE_NODE* re_node;

  YR_ATOM atom;
  YR_ATOM best_atom;

  struct STACK_ITEM si;

  int i, shift;
  int quality;
  int best_quality = -1;
  int n = 0;

  YR_ATOM_TREE_NODE* and_node;
  YR_ATOM_TREE_NODE* left_node;
  YR_ATOM_TREE_NODE* right_node;

  // The RE_NODEs most recently visited that can conform an atom (ie:
  // RE_NODE_LITERAL, RE_NODE_MASKED_LITERAL and RE_NODE_ANY). The number of
  // items in this array is n.
  RE_NODE* recent_re_nodes[YR_MAX_ATOM_LENGTH];

  // The RE_NODEs corresponding to the best atom found so far for the current
  // appending node.
  RE_NODE* best_atom_re_nodes[YR_MAX_ATOM_LENGTH];

  // This holds the ATOM_TREE_OR node where leaves (ATOM_TREE_LEAF) are
  // currently being appended.
  YR_ATOM_TREE_NODE* current_appending_node = NULL;

  // This holds the ATOM_TREE_LEAF node whose atom is currently being updated.
  YR_ATOM_TREE_NODE* leaf = NULL;

  FAIL_ON_ERROR(yr_stack_create(1024, sizeof(si), &stack));

  // This first item pushed in the stack is the last one to be poped out, its
  // sole purpose is forcing that any pending
  si.re_node = NULL;
  si.new_appending_node = appending_node;

  FAIL_ON_ERROR_WITH_CLEANUP(
      yr_stack_push(stack, (void*) &si),
      yr_stack_destroy(stack));

  // Start processing the root node.
  si.re_node = re_ast->root_node;

  // Leaf nodes are initially appended to the node passed in the appending_node,
  // argument which is the root ATOM_TREE_OR node that is empty at this point.
  si.new_appending_node = appending_node;

  FAIL_ON_ERROR_WITH_CLEANUP(
      yr_stack_push(stack, (void*) &si),
      yr_stack_destroy(stack));

  while (yr_stack_pop(stack, (void*) &si))
  {
    // Change the appending node if the item poped from the stack says so.
    if (si.new_appending_node != NULL)
    {
      // Before changing the appending node let's append any pending leaf to
      // the current appending node.
      if (n > 0)
      {
        make_atom_from_re_nodes(atom, n, recent_re_nodes);
        shift = _yr_atoms_trim(&atom);
        quality = config->get_atom_quality(config, &atom);

        FAIL_ON_NULL_WITH_CLEANUP(
            leaf = _yr_atoms_tree_node_create(ATOM_TREE_LEAF),
            yr_stack_destroy(stack));

        if (quality > best_quality)
        {
          memcpy(&leaf->atom, &atom, sizeof(atom));
          memcpy(
              &leaf->re_nodes,
              &recent_re_nodes[shift],
              sizeof(recent_re_nodes) - shift * sizeof(recent_re_nodes[0]));
        }
        else
        {
          memcpy(&leaf->atom, &best_atom, sizeof(best_atom));
          memcpy(
              &leaf->re_nodes,
              &best_atom_re_nodes,
              sizeof(best_atom_re_nodes));
        }

        _yr_atoms_tree_node_append(current_appending_node, leaf);
        n = 0;
      }

      current_appending_node = si.new_appending_node;
    }

    if (si.re_node != NULL)
    {
      switch(si.re_node->type)
      {
        case RE_NODE_LITERAL:
        case RE_NODE_MASKED_LITERAL:
        case RE_NODE_ANY:

          if (n < YR_MAX_ATOM_LENGTH)
          {
            recent_re_nodes[n] = si.re_node;
            best_atom_re_nodes[n] = si.re_node;
            best_atom.bytes[n] = (uint8_t) si.re_node->value;
            best_atom.mask[n] = (uint8_t) si.re_node->mask;
            best_atom.length = ++n;
          }
          else if (best_quality < YR_MAX_ATOM_QUALITY)
          {
            make_atom_from_re_nodes(atom, n, recent_re_nodes);
            shift = _yr_atoms_trim(&atom);
            quality = config->get_atom_quality(config, &atom);

            if (quality > best_quality)
            {
              for (i = 0; i < atom.length; i++)
              {
                best_atom.bytes[i] = atom.bytes[i];
                best_atom.mask[i] = atom.mask[i];
                best_atom_re_nodes[i] = recent_re_nodes[i + shift];
              }

              best_atom.length = atom.length;
              best_quality = quality;
            }

            for (i = 1; i < YR_MAX_ATOM_LENGTH; i++)
              recent_re_nodes[i - 1] = recent_re_nodes[i];

            recent_re_nodes[YR_MAX_ATOM_LENGTH - 1] = si.re_node;
          }

          break;

        case RE_NODE_CONCAT:

          re_node = si.re_node->children_tail;

          // Push children right to left, they are poped left to right.
          while (re_node != NULL)
          {
            si.new_appending_node = NULL;
            si.re_node = re_node;

            FAIL_ON_ERROR_WITH_CLEANUP(
                yr_stack_push(stack, &si),
                yr_stack_destroy(stack));

            re_node = re_node->prev_sibling;
          }

          break;

        case RE_NODE_ALT:

          // Create ATOM_TREE_AND node with two ATOM_TREE_OR children nodes.
          and_node = _yr_atoms_tree_node_create(ATOM_TREE_AND);
          left_node = _yr_atoms_tree_node_create(ATOM_TREE_OR);
          right_node = _yr_atoms_tree_node_create(ATOM_TREE_OR);

          if (and_node == NULL || left_node == NULL || right_node == NULL)
          {
            _yr_atoms_tree_node_destroy(and_node);
            _yr_atoms_tree_node_destroy(left_node);
            _yr_atoms_tree_node_destroy(right_node);

            yr_stack_destroy(stack);

            return ERROR_INSUFFICIENT_MEMORY;
          }

          and_node->children_head = left_node;
          and_node->children_tail = right_node;
          left_node->next_sibling = right_node;

          // Add the ATOM_TREE_AND as children of the current node.
          _yr_atoms_tree_node_append(current_appending_node, and_node);

          re_node = si.re_node;

          si.new_appending_node = current_appending_node;
          si.re_node = NULL;

          FAIL_ON_ERROR_WITH_CLEANUP(
              yr_stack_push(stack, &si),
              yr_stack_destroy(stack));

          // RE_NODE_ALT nodes has only two children, so children_head is the
          // left one, and children_tail is right one.
          si.new_appending_node = right_node;
          si.re_node = re_node->children_tail;

          FAIL_ON_ERROR_WITH_CLEANUP(
              yr_stack_push(stack, &si),
              yr_stack_destroy(stack));

          si.new_appending_node = left_node;
          si.re_node = re_node->children_head;

          FAIL_ON_ERROR_WITH_CLEANUP(
              yr_stack_push(stack, &si),
              yr_stack_destroy(stack));

          break;

        case RE_NODE_PLUS:

          re_node = si.re_node;

          si.new_appending_node = current_appending_node;
          si.re_node = NULL;

          FAIL_ON_ERROR_WITH_CLEANUP(
              yr_stack_push(stack, &si),
              yr_stack_destroy(stack));

          si.new_appending_node = NULL;
          // RE_NODE_PLUS nodes has a single child, which is children_head.
          si.re_node = re_node->children_head;

          FAIL_ON_ERROR_WITH_CLEANUP(
              yr_stack_push(stack, &si),
              yr_stack_destroy(stack));

          break;

        case RE_NODE_RANGE:

          re_node = si.re_node;

          si.new_appending_node = current_appending_node;
          si.re_node = NULL;

          FAIL_ON_ERROR_WITH_CLEANUP(
              yr_stack_push(stack, &si),
              yr_stack_destroy(stack));

          si.new_appending_node = NULL;
          // RE_NODE_RANGE nodes has a single child, which is children_head.
          si.re_node = re_node->children_head;

          // In a regexp like /a{10,20}/ the optimal atom is 'aaaa' (assuming
          // that YR_MAX_ATOM_LENGTH = 4) because the 'a' character must appear
          // at least 10 times in the matching string. Each call in the loop
          // will append one 'a' to the atom, so YR_MAX_ATOM_LENGTH iterations
          // are enough.

          for (i = 0; i < yr_min(re_node->start, YR_MAX_ATOM_LENGTH); i++)
          {
            FAIL_ON_ERROR_WITH_CLEANUP(
                yr_stack_push(stack, &si),
                yr_stack_destroy(stack));
          }

          break;

        case RE_NODE_RANGE_ANY:
        case RE_NODE_STAR:
        case RE_NODE_CLASS:
        case RE_NODE_WORD_CHAR:
        case RE_NODE_NON_WORD_CHAR:
        case RE_NODE_SPACE:
        case RE_NODE_NON_SPACE:
        case RE_NODE_DIGIT:
        case RE_NODE_NON_DIGIT:
        case RE_NODE_EMPTY:
        case RE_NODE_ANCHOR_START:
        case RE_NODE_ANCHOR_END:
        case RE_NODE_WORD_BOUNDARY:
        case RE_NODE_NON_WORD_BOUNDARY:

          si.new_appending_node = current_appending_node;
          si.re_node = NULL;

          FAIL_ON_ERROR_WITH_CLEANUP(
              yr_stack_push(stack, &si),
              yr_stack_destroy(stack));

          break;

        default:
          assert(false);
      }
    }
  }

  yr_stack_destroy(stack);

  return ERROR_SUCCESS;
}