Ejemplo n.º 1
0
Archivo: atoms.c Proyecto: rednaga/yara
static void _yr_atoms_tree_node_destroy(
    ATOM_TREE_NODE* node)
{
  ATOM_TREE_NODE* child;
  ATOM_TREE_NODE* next_child;

  if (node == NULL)
    return;

  if (node->type == ATOM_TREE_OR || node->type == ATOM_TREE_AND)
  {
    child = node->children_head;

    while (child != NULL)
    {
      next_child = child->next_sibling;
      _yr_atoms_tree_node_destroy(child);
      child = next_child;
    }
  }

  yr_free(node);
}
Ejemplo n.º 2
0
Archivo: atoms.c Proyecto: rednaga/yara
static ATOM_TREE_NODE* _yr_atoms_extract_from_re_node(
  RE_NODE* re_node,
  ATOM_TREE* atom_tree,
  ATOM_TREE_NODE* current_node)
{
  ATOM_TREE_NODE* left_node;
  ATOM_TREE_NODE* right_node;
  ATOM_TREE_NODE* and_node;
  ATOM_TREE_NODE* current_leaf;
  ATOM_TREE_NODE* temp;

  int quality;
  int new_quality;
  int i;

  uint8_t new_atom[MAX_ATOM_LENGTH];

  switch(re_node->type)
  {
    case RE_NODE_LITERAL:

      if (atom_tree->current_leaf == NULL)
      {
        atom_tree->current_leaf = _yr_atoms_tree_node_create(ATOM_TREE_LEAF);

        if (atom_tree->current_leaf == NULL)
          return NULL;

        atom_tree->current_leaf->forward_code = re_node->forward_code;
        atom_tree->current_leaf->backward_code = re_node->backward_code;

        assert(atom_tree->current_leaf->forward_code != NULL);
        assert(atom_tree->current_leaf->backward_code != NULL);
      }

      current_leaf = atom_tree->current_leaf;

      if (current_leaf->atom_length < MAX_ATOM_LENGTH)
      {
        current_leaf->atom[current_leaf->atom_length] =
            (uint8_t) re_node->value;
        current_leaf->recent_nodes[current_leaf->atom_length] = re_node;
        current_leaf->atom_length++;
      }
      else
      {
        for (i = 1; i < MAX_ATOM_LENGTH; i++)
          current_leaf->recent_nodes[i - 1] = current_leaf->recent_nodes[i];

        current_leaf->recent_nodes[MAX_ATOM_LENGTH - 1] = re_node;

        for (i = 0; i < MAX_ATOM_LENGTH; i++)
          new_atom[i] = (uint8_t) current_leaf->recent_nodes[i]->value;

        quality = _yr_atoms_quality(
            current_leaf->atom,
            MAX_ATOM_LENGTH);

        new_quality = _yr_atoms_quality(
            new_atom,
            MAX_ATOM_LENGTH);

        if (new_quality > quality)
        {
          for (i = 0; i < MAX_ATOM_LENGTH; i++)
            current_leaf->atom[i] = new_atom[i];

          current_leaf->forward_code = \
              current_leaf->recent_nodes[0]->forward_code;

          current_leaf->backward_code = \
              current_leaf->recent_nodes[0]->backward_code;

          assert(current_leaf->forward_code != NULL);
          assert(current_leaf->backward_code != NULL);
        }
      }

      return current_node;

    case RE_NODE_CONCAT:

      current_node = _yr_atoms_extract_from_re_node(
          re_node->left, atom_tree, current_node);

      if (current_node == NULL)
        return NULL;

      current_node = _yr_atoms_extract_from_re_node(
          re_node->right, atom_tree, current_node);

      return current_node;

    case RE_NODE_ALT:

      append_current_leaf_to_node(current_node);

      left_node = _yr_atoms_tree_node_create(ATOM_TREE_OR);

      if (left_node == NULL)
        return NULL;

      left_node = _yr_atoms_extract_from_re_node(
          re_node->left, atom_tree, left_node);

      if (left_node == NULL)
        return NULL;

      append_current_leaf_to_node(left_node);

      if (left_node->children_head == NULL)
      {
        _yr_atoms_tree_node_destroy(left_node);
        return current_node;
      }

      if (left_node->children_head == left_node->children_tail)
      {
        temp = left_node;
        left_node = left_node->children_head;
        yr_free(temp);
      }

      right_node = _yr_atoms_tree_node_create(ATOM_TREE_OR);

      if (right_node == NULL)
        return NULL;

      right_node = _yr_atoms_extract_from_re_node(
          re_node->right, atom_tree, right_node);

      if (right_node == NULL)
        return NULL;

      append_current_leaf_to_node(right_node);

      if (right_node->children_head == NULL)
      {
        _yr_atoms_tree_node_destroy(left_node);
        _yr_atoms_tree_node_destroy(right_node);
        return current_node;
      }

      if (right_node->children_head == right_node->children_tail)
      {
        temp = right_node;
        right_node = right_node->children_head;
        yr_free(temp);
      }

      and_node = _yr_atoms_tree_node_create(ATOM_TREE_AND);

      if (and_node == NULL)
        return NULL;

      and_node->children_head = left_node;
      and_node->children_tail = right_node;
      left_node->next_sibling = right_node;

      _yr_atoms_tree_node_append(current_node, and_node);

      return current_node;

    case RE_NODE_RANGE:

      if (re_node->start == 0)
        append_current_leaf_to_node(current_node);

      for (i = 0; i < re_node->start; i++)
      {
        current_node = _yr_atoms_extract_from_re_node(
            re_node->left, atom_tree, current_node);

        if (current_node == NULL)
          return NULL;
      }

      if (re_node->start != re_node->end)
        append_current_leaf_to_node(current_node);

      return current_node;

    case RE_NODE_PLUS:

      current_node = _yr_atoms_extract_from_re_node(
          re_node->left, atom_tree, current_node);

      if (current_node == NULL)
        return NULL;

      append_current_leaf_to_node(current_node);
      return current_node;

    case RE_NODE_ANY:
    case RE_NODE_RANGE_ANY:
    case RE_NODE_STAR:
    case RE_NODE_CLASS:
    case RE_NODE_MASKED_LITERAL:
    case RE_NODE_WORD_CHAR:
    case RE_NODE_NON_WORD_CHAR:
    case RE_NODE_SPACE:
    case RE_NODE_NON_SPACE:
    case RE_NODE_DIGIT:
    case RE_NODE_NON_DIGIT:
    case RE_NODE_EMPTY:
    case RE_NODE_ANCHOR_START:
    case RE_NODE_ANCHOR_END:
    case RE_NODE_WORD_BOUNDARY:
    case RE_NODE_NON_WORD_BOUNDARY:

      append_current_leaf_to_node(current_node);
      return current_node;

    default:
      assert(FALSE);
  }

  return NULL;
}
Ejemplo n.º 3
0
Archivo: atoms.c Proyecto: rednaga/yara
static void _yr_atoms_tree_destroy(
    ATOM_TREE* atom_tree)
{
  _yr_atoms_tree_node_destroy(atom_tree->root_node);
  yr_free(atom_tree);
}
Ejemplo n.º 4
0
static int _yr_atoms_extract_from_re(
    YR_ATOMS_CONFIG* config,
    RE_AST* re_ast,
    YR_ATOM_TREE_NODE* appending_node)
{
  YR_STACK* stack;
  RE_NODE* re_node;

  YR_ATOM atom;
  YR_ATOM best_atom;

  struct STACK_ITEM si;

  int i, shift;
  int quality;
  int best_quality = -1;
  int n = 0;

  YR_ATOM_TREE_NODE* and_node;
  YR_ATOM_TREE_NODE* left_node;
  YR_ATOM_TREE_NODE* right_node;

  // The RE_NODEs most recently visited that can conform an atom (ie:
  // RE_NODE_LITERAL, RE_NODE_MASKED_LITERAL and RE_NODE_ANY). The number of
  // items in this array is n.
  RE_NODE* recent_re_nodes[YR_MAX_ATOM_LENGTH];

  // The RE_NODEs corresponding to the best atom found so far for the current
  // appending node.
  RE_NODE* best_atom_re_nodes[YR_MAX_ATOM_LENGTH];

  // This holds the ATOM_TREE_OR node where leaves (ATOM_TREE_LEAF) are
  // currently being appended.
  YR_ATOM_TREE_NODE* current_appending_node = NULL;

  // This holds the ATOM_TREE_LEAF node whose atom is currently being updated.
  YR_ATOM_TREE_NODE* leaf = NULL;

  FAIL_ON_ERROR(yr_stack_create(1024, sizeof(si), &stack));

  // This first item pushed in the stack is the last one to be poped out, its
  // sole purpose is forcing that any pending
  si.re_node = NULL;
  si.new_appending_node = appending_node;

  FAIL_ON_ERROR_WITH_CLEANUP(
      yr_stack_push(stack, (void*) &si),
      yr_stack_destroy(stack));

  // Start processing the root node.
  si.re_node = re_ast->root_node;

  // Leaf nodes are initially appended to the node passed in the appending_node,
  // argument which is the root ATOM_TREE_OR node that is empty at this point.
  si.new_appending_node = appending_node;

  FAIL_ON_ERROR_WITH_CLEANUP(
      yr_stack_push(stack, (void*) &si),
      yr_stack_destroy(stack));

  while (yr_stack_pop(stack, (void*) &si))
  {
    // Change the appending node if the item poped from the stack says so.
    if (si.new_appending_node != NULL)
    {
      // Before changing the appending node let's append any pending leaf to
      // the current appending node.
      if (n > 0)
      {
        make_atom_from_re_nodes(atom, n, recent_re_nodes);
        shift = _yr_atoms_trim(&atom);
        quality = config->get_atom_quality(config, &atom);

        FAIL_ON_NULL_WITH_CLEANUP(
            leaf = _yr_atoms_tree_node_create(ATOM_TREE_LEAF),
            yr_stack_destroy(stack));

        if (quality > best_quality)
        {
          memcpy(&leaf->atom, &atom, sizeof(atom));
          memcpy(
              &leaf->re_nodes,
              &recent_re_nodes[shift],
              sizeof(recent_re_nodes) - shift * sizeof(recent_re_nodes[0]));
        }
        else
        {
          memcpy(&leaf->atom, &best_atom, sizeof(best_atom));
          memcpy(
              &leaf->re_nodes,
              &best_atom_re_nodes,
              sizeof(best_atom_re_nodes));
        }

        _yr_atoms_tree_node_append(current_appending_node, leaf);
        n = 0;
      }

      current_appending_node = si.new_appending_node;
    }

    if (si.re_node != NULL)
    {
      switch(si.re_node->type)
      {
        case RE_NODE_LITERAL:
        case RE_NODE_MASKED_LITERAL:
        case RE_NODE_ANY:

          if (n < YR_MAX_ATOM_LENGTH)
          {
            recent_re_nodes[n] = si.re_node;
            best_atom_re_nodes[n] = si.re_node;
            best_atom.bytes[n] = (uint8_t) si.re_node->value;
            best_atom.mask[n] = (uint8_t) si.re_node->mask;
            best_atom.length = ++n;
          }
          else if (best_quality < YR_MAX_ATOM_QUALITY)
          {
            make_atom_from_re_nodes(atom, n, recent_re_nodes);
            shift = _yr_atoms_trim(&atom);
            quality = config->get_atom_quality(config, &atom);

            if (quality > best_quality)
            {
              for (i = 0; i < atom.length; i++)
              {
                best_atom.bytes[i] = atom.bytes[i];
                best_atom.mask[i] = atom.mask[i];
                best_atom_re_nodes[i] = recent_re_nodes[i + shift];
              }

              best_atom.length = atom.length;
              best_quality = quality;
            }

            for (i = 1; i < YR_MAX_ATOM_LENGTH; i++)
              recent_re_nodes[i - 1] = recent_re_nodes[i];

            recent_re_nodes[YR_MAX_ATOM_LENGTH - 1] = si.re_node;
          }

          break;

        case RE_NODE_CONCAT:

          re_node = si.re_node->children_tail;

          // Push children right to left, they are poped left to right.
          while (re_node != NULL)
          {
            si.new_appending_node = NULL;
            si.re_node = re_node;

            FAIL_ON_ERROR_WITH_CLEANUP(
                yr_stack_push(stack, &si),
                yr_stack_destroy(stack));

            re_node = re_node->prev_sibling;
          }

          break;

        case RE_NODE_ALT:

          // Create ATOM_TREE_AND node with two ATOM_TREE_OR children nodes.
          and_node = _yr_atoms_tree_node_create(ATOM_TREE_AND);
          left_node = _yr_atoms_tree_node_create(ATOM_TREE_OR);
          right_node = _yr_atoms_tree_node_create(ATOM_TREE_OR);

          if (and_node == NULL || left_node == NULL || right_node == NULL)
          {
            _yr_atoms_tree_node_destroy(and_node);
            _yr_atoms_tree_node_destroy(left_node);
            _yr_atoms_tree_node_destroy(right_node);

            yr_stack_destroy(stack);

            return ERROR_INSUFFICIENT_MEMORY;
          }

          and_node->children_head = left_node;
          and_node->children_tail = right_node;
          left_node->next_sibling = right_node;

          // Add the ATOM_TREE_AND as children of the current node.
          _yr_atoms_tree_node_append(current_appending_node, and_node);

          re_node = si.re_node;

          si.new_appending_node = current_appending_node;
          si.re_node = NULL;

          FAIL_ON_ERROR_WITH_CLEANUP(
              yr_stack_push(stack, &si),
              yr_stack_destroy(stack));

          // RE_NODE_ALT nodes has only two children, so children_head is the
          // left one, and children_tail is right one.
          si.new_appending_node = right_node;
          si.re_node = re_node->children_tail;

          FAIL_ON_ERROR_WITH_CLEANUP(
              yr_stack_push(stack, &si),
              yr_stack_destroy(stack));

          si.new_appending_node = left_node;
          si.re_node = re_node->children_head;

          FAIL_ON_ERROR_WITH_CLEANUP(
              yr_stack_push(stack, &si),
              yr_stack_destroy(stack));

          break;

        case RE_NODE_PLUS:

          re_node = si.re_node;

          si.new_appending_node = current_appending_node;
          si.re_node = NULL;

          FAIL_ON_ERROR_WITH_CLEANUP(
              yr_stack_push(stack, &si),
              yr_stack_destroy(stack));

          si.new_appending_node = NULL;
          // RE_NODE_PLUS nodes has a single child, which is children_head.
          si.re_node = re_node->children_head;

          FAIL_ON_ERROR_WITH_CLEANUP(
              yr_stack_push(stack, &si),
              yr_stack_destroy(stack));

          break;

        case RE_NODE_RANGE:

          re_node = si.re_node;

          si.new_appending_node = current_appending_node;
          si.re_node = NULL;

          FAIL_ON_ERROR_WITH_CLEANUP(
              yr_stack_push(stack, &si),
              yr_stack_destroy(stack));

          si.new_appending_node = NULL;
          // RE_NODE_RANGE nodes has a single child, which is children_head.
          si.re_node = re_node->children_head;

          // In a regexp like /a{10,20}/ the optimal atom is 'aaaa' (assuming
          // that YR_MAX_ATOM_LENGTH = 4) because the 'a' character must appear
          // at least 10 times in the matching string. Each call in the loop
          // will append one 'a' to the atom, so YR_MAX_ATOM_LENGTH iterations
          // are enough.

          for (i = 0; i < yr_min(re_node->start, YR_MAX_ATOM_LENGTH); i++)
          {
            FAIL_ON_ERROR_WITH_CLEANUP(
                yr_stack_push(stack, &si),
                yr_stack_destroy(stack));
          }

          break;

        case RE_NODE_RANGE_ANY:
        case RE_NODE_STAR:
        case RE_NODE_CLASS:
        case RE_NODE_WORD_CHAR:
        case RE_NODE_NON_WORD_CHAR:
        case RE_NODE_SPACE:
        case RE_NODE_NON_SPACE:
        case RE_NODE_DIGIT:
        case RE_NODE_NON_DIGIT:
        case RE_NODE_EMPTY:
        case RE_NODE_ANCHOR_START:
        case RE_NODE_ANCHOR_END:
        case RE_NODE_WORD_BOUNDARY:
        case RE_NODE_NON_WORD_BOUNDARY:

          si.new_appending_node = current_appending_node;
          si.re_node = NULL;

          FAIL_ON_ERROR_WITH_CLEANUP(
              yr_stack_push(stack, &si),
              yr_stack_destroy(stack));

          break;

        default:
          assert(false);
      }
    }
  }

  yr_stack_destroy(stack);

  return ERROR_SUCCESS;
}