int yr_atoms_extract_from_re( RE_AST* re_ast, int flags, YR_ATOM_LIST_ITEM** atoms) { ATOM_TREE* atom_tree = (ATOM_TREE*) yr_malloc(sizeof(ATOM_TREE)); ATOM_TREE_NODE* temp; YR_ATOM_LIST_ITEM* wide_atoms; YR_ATOM_LIST_ITEM* case_insensitive_atoms; YR_ATOM_LIST_ITEM* triplet_atoms; int min_atom_quality = YR_MIN_ATOM_QUALITY; if (atom_tree == NULL) return ERROR_INSUFFICIENT_MEMORY; atom_tree->root_node = _yr_atoms_tree_node_create(ATOM_TREE_OR); if (atom_tree->root_node == NULL) { _yr_atoms_tree_destroy(atom_tree); return ERROR_INSUFFICIENT_MEMORY; } atom_tree->current_leaf = NULL; atom_tree->root_node = _yr_atoms_extract_from_re_node( re_ast->root_node, atom_tree, atom_tree->root_node); if (atom_tree->root_node == NULL) { _yr_atoms_tree_destroy(atom_tree); return ERROR_INSUFFICIENT_MEMORY; } if (atom_tree->current_leaf != NULL) _yr_atoms_tree_node_append(atom_tree->root_node, atom_tree->current_leaf); if (atom_tree->root_node->children_head == atom_tree->root_node->children_tail) { // The root OR node has a single child, there's no need for the OR node so // we proceed to destroy it and use its child as root. temp = atom_tree->root_node; atom_tree->root_node = atom_tree->root_node->children_head; yr_free(temp); } // Initialize atom list *atoms = NULL; if (atom_tree->root_node != NULL) { // Choose the atoms that will be used. FAIL_ON_ERROR_WITH_CLEANUP( _yr_atoms_choose(atom_tree->root_node, atoms, &min_atom_quality), _yr_atoms_tree_destroy(atom_tree)); } _yr_atoms_tree_destroy(atom_tree); if (min_atom_quality <= 2) { // Chosen atoms contain low quality ones, let's try infering some higher // quality atoms. FAIL_ON_ERROR_WITH_CLEANUP( yr_atoms_extract_triplets(re_ast->root_node, &triplet_atoms), { yr_atoms_list_destroy(*atoms); yr_atoms_list_destroy(triplet_atoms); *atoms = NULL; });
static ATOM_TREE_NODE* _yr_atoms_extract_from_re_node( RE_NODE* re_node, ATOM_TREE* atom_tree, ATOM_TREE_NODE* current_node) { ATOM_TREE_NODE* left_node; ATOM_TREE_NODE* right_node; ATOM_TREE_NODE* and_node; ATOM_TREE_NODE* current_leaf; ATOM_TREE_NODE* temp; int quality; int new_quality; int i; uint8_t new_atom[MAX_ATOM_LENGTH]; switch(re_node->type) { case RE_NODE_LITERAL: if (atom_tree->current_leaf == NULL) { atom_tree->current_leaf = _yr_atoms_tree_node_create(ATOM_TREE_LEAF); if (atom_tree->current_leaf == NULL) return NULL; atom_tree->current_leaf->forward_code = re_node->forward_code; atom_tree->current_leaf->backward_code = re_node->backward_code; assert(atom_tree->current_leaf->forward_code != NULL); assert(atom_tree->current_leaf->backward_code != NULL); } current_leaf = atom_tree->current_leaf; if (current_leaf->atom_length < MAX_ATOM_LENGTH) { current_leaf->atom[current_leaf->atom_length] = (uint8_t) re_node->value; current_leaf->recent_nodes[current_leaf->atom_length] = re_node; current_leaf->atom_length++; } else { for (i = 1; i < MAX_ATOM_LENGTH; i++) current_leaf->recent_nodes[i - 1] = current_leaf->recent_nodes[i]; current_leaf->recent_nodes[MAX_ATOM_LENGTH - 1] = re_node; for (i = 0; i < MAX_ATOM_LENGTH; i++) new_atom[i] = (uint8_t) current_leaf->recent_nodes[i]->value; quality = _yr_atoms_quality( current_leaf->atom, MAX_ATOM_LENGTH); new_quality = _yr_atoms_quality( new_atom, MAX_ATOM_LENGTH); if (new_quality > quality) { for (i = 0; i < MAX_ATOM_LENGTH; i++) current_leaf->atom[i] = new_atom[i]; current_leaf->forward_code = \ current_leaf->recent_nodes[0]->forward_code; current_leaf->backward_code = \ current_leaf->recent_nodes[0]->backward_code; assert(current_leaf->forward_code != NULL); assert(current_leaf->backward_code != NULL); } } return current_node; case RE_NODE_CONCAT: current_node = _yr_atoms_extract_from_re_node( re_node->left, atom_tree, current_node); if (current_node == NULL) return NULL; current_node = _yr_atoms_extract_from_re_node( re_node->right, atom_tree, current_node); return current_node; case RE_NODE_ALT: append_current_leaf_to_node(current_node); left_node = _yr_atoms_tree_node_create(ATOM_TREE_OR); if (left_node == NULL) return NULL; left_node = _yr_atoms_extract_from_re_node( re_node->left, atom_tree, left_node); if (left_node == NULL) return NULL; append_current_leaf_to_node(left_node); if (left_node->children_head == NULL) { _yr_atoms_tree_node_destroy(left_node); return current_node; } if (left_node->children_head == left_node->children_tail) { temp = left_node; left_node = left_node->children_head; yr_free(temp); } right_node = _yr_atoms_tree_node_create(ATOM_TREE_OR); if (right_node == NULL) return NULL; right_node = _yr_atoms_extract_from_re_node( re_node->right, atom_tree, right_node); if (right_node == NULL) return NULL; append_current_leaf_to_node(right_node); if (right_node->children_head == NULL) { _yr_atoms_tree_node_destroy(left_node); _yr_atoms_tree_node_destroy(right_node); return current_node; } if (right_node->children_head == right_node->children_tail) { temp = right_node; right_node = right_node->children_head; yr_free(temp); } and_node = _yr_atoms_tree_node_create(ATOM_TREE_AND); if (and_node == NULL) return NULL; and_node->children_head = left_node; and_node->children_tail = right_node; left_node->next_sibling = right_node; _yr_atoms_tree_node_append(current_node, and_node); return current_node; case RE_NODE_RANGE: if (re_node->start == 0) append_current_leaf_to_node(current_node); for (i = 0; i < re_node->start; i++) { current_node = _yr_atoms_extract_from_re_node( re_node->left, atom_tree, current_node); if (current_node == NULL) return NULL; } if (re_node->start != re_node->end) append_current_leaf_to_node(current_node); return current_node; case RE_NODE_PLUS: current_node = _yr_atoms_extract_from_re_node( re_node->left, atom_tree, current_node); if (current_node == NULL) return NULL; append_current_leaf_to_node(current_node); return current_node; case RE_NODE_ANY: case RE_NODE_RANGE_ANY: case RE_NODE_STAR: case RE_NODE_CLASS: case RE_NODE_MASKED_LITERAL: case RE_NODE_WORD_CHAR: case RE_NODE_NON_WORD_CHAR: case RE_NODE_SPACE: case RE_NODE_NON_SPACE: case RE_NODE_DIGIT: case RE_NODE_NON_DIGIT: case RE_NODE_EMPTY: case RE_NODE_ANCHOR_START: case RE_NODE_ANCHOR_END: case RE_NODE_WORD_BOUNDARY: case RE_NODE_NON_WORD_BOUNDARY: append_current_leaf_to_node(current_node); return current_node; default: assert(FALSE); } return NULL; }
int yr_atoms_extract_from_re( RE* re, int flags, YR_ATOM_LIST_ITEM** atoms) { ATOM_TREE* atom_tree = yr_malloc(sizeof(ATOM_TREE)); ATOM_TREE_NODE* temp; YR_ATOM_LIST_ITEM* wide_atoms; YR_ATOM_LIST_ITEM* case_insentive_atoms; YR_ATOM_LIST_ITEM* triplet_atoms; int min_atom_quality = 0; atom_tree->root_node = _yr_atoms_tree_node_create(ATOM_TREE_OR);; atom_tree->current_leaf = NULL; atom_tree->root_node = _yr_atoms_extract_from_re_node( re->root_node, atom_tree, atom_tree->root_node); if (atom_tree->current_leaf != NULL) _yr_atoms_tree_node_append(atom_tree->root_node, atom_tree->current_leaf); if (atom_tree->root_node->children_head == atom_tree->root_node->children_tail) { // The root OR node has a single child, there's no need for the OR node so // we proceed to destroy it and use its child as root. temp = atom_tree->root_node; atom_tree->root_node = atom_tree->root_node->children_head; yr_free(temp); } // Choose the atoms that will be used. min_atom_quality = _yr_atoms_choose(atom_tree->root_node, atoms); _yr_atoms_tree_destroy(atom_tree); if (min_atom_quality <= 2) { // Choosen atoms contain low quality ones, let's try infering some higher // quality atoms. yr_atoms_extract_triplets(re->root_node, &triplet_atoms); if (min_atom_quality < _yr_atoms_min_quality(triplet_atoms)) { yr_atoms_list_destroy(*atoms); *atoms = triplet_atoms; } else { yr_atoms_list_destroy(triplet_atoms); } } if (flags & STRING_GFLAGS_WIDE) { FAIL_ON_ERROR(_yr_atoms_wide( *atoms, &wide_atoms)); if (flags & STRING_GFLAGS_ASCII) { *atoms = _yr_atoms_list_concat(*atoms, wide_atoms); } else { yr_atoms_list_destroy(*atoms); *atoms = wide_atoms; } } if (flags & STRING_GFLAGS_NO_CASE) { FAIL_ON_ERROR(_yr_atoms_case_insentive( *atoms, &case_insentive_atoms)); *atoms = _yr_atoms_list_concat(*atoms, case_insentive_atoms); } return ERROR_SUCCESS; }