int yr_atoms_extract_triplets( RE_NODE* re_node, YR_ATOM_LIST_ITEM** atoms) { RE_NODE* left_child; RE_NODE* left_grand_child; int i; int shift; *atoms = NULL; if (re_node->type == RE_NODE_CONCAT) left_child = re_node->left; else return ERROR_SUCCESS; if (left_child->type == RE_NODE_CONCAT) left_grand_child = left_child->left; else return ERROR_SUCCESS; if (re_node->right->type != RE_NODE_LITERAL) return yr_atoms_extract_triplets(left_child, atoms); if (left_child->left->type == RE_NODE_LITERAL && (left_child->right->type == RE_NODE_ANY)) { for (i = 0; i < 256; i++) { YR_ATOM_LIST_ITEM* atom = (YR_ATOM_LIST_ITEM*) yr_malloc(sizeof(YR_ATOM_LIST_ITEM)); if (atom == NULL) return ERROR_INSUFFICIENT_MEMORY; atom->atom[0] = (uint8_t) left_child->left->value; atom->atom[1] = (uint8_t) i; atom->atom[2] = (uint8_t) re_node->right->value; atom->atom_length = 3; atom->forward_code = left_child->left->forward_code; atom->backward_code = left_child->left->backward_code; atom->backtrack = 0; atom->next = *atoms; *atoms = atom; } return ERROR_SUCCESS; } if (left_child->left->type == RE_NODE_LITERAL && (left_child->right->type == RE_NODE_MASKED_LITERAL)) { for (i = 0; i < 16; i++) { YR_ATOM_LIST_ITEM* atom = (YR_ATOM_LIST_ITEM*) yr_malloc(sizeof(YR_ATOM_LIST_ITEM)); if (atom == NULL) return ERROR_INSUFFICIENT_MEMORY; if (left_child->right->mask == 0xF0) shift = 0; else shift = 4; atom->atom[0] = (uint8_t) left_child->left->value; atom->atom[1] = (uint8_t)(left_child->right->value | (i << shift)); atom->atom[2] = (uint8_t) re_node->right->value; atom->atom_length = 3; atom->forward_code = left_child->left->forward_code; atom->backward_code = left_child->left->backward_code; atom->backtrack = 0; atom->next = *atoms; *atoms = atom; } return ERROR_SUCCESS; } if (left_grand_child->type == RE_NODE_CONCAT && left_grand_child->right->type == RE_NODE_LITERAL && (left_child->right->type == RE_NODE_ANY)) { for (i = 0; i < 256; i++) { YR_ATOM_LIST_ITEM* atom = (YR_ATOM_LIST_ITEM*) yr_malloc(sizeof(YR_ATOM_LIST_ITEM)); if (atom == NULL) return ERROR_INSUFFICIENT_MEMORY; atom->atom[0] = (uint8_t) left_grand_child->right->value; atom->atom[1] = (uint8_t) i; atom->atom[2] = (uint8_t) re_node->right->value; atom->atom_length = 3; atom->forward_code = left_grand_child->right->forward_code; atom->backward_code = left_grand_child->right->backward_code; atom->backtrack = 0; atom->next = *atoms; *atoms = atom; } return ERROR_SUCCESS; } if (left_grand_child->type == RE_NODE_CONCAT && left_grand_child->right->type == RE_NODE_LITERAL && (left_child->right->type == RE_NODE_MASKED_LITERAL)) { for (i = 0; i < 16; i++) { YR_ATOM_LIST_ITEM* atom = (YR_ATOM_LIST_ITEM*) yr_malloc(sizeof(YR_ATOM_LIST_ITEM)); if (atom == NULL) return ERROR_INSUFFICIENT_MEMORY; if (left_child->right->mask == 0xF0) shift = 0; else shift = 4; atom->atom[0] = (uint8_t) left_grand_child->right->value; atom->atom[1] = (uint8_t)(left_child->right->value | (i << shift)); atom->atom[2] = (uint8_t) re_node->right->value; atom->atom_length = 3; atom->forward_code = left_grand_child->right->forward_code; atom->backward_code = left_grand_child->right->backward_code; atom->backtrack = 0; atom->next = *atoms; *atoms = atom; } return ERROR_SUCCESS; } return yr_atoms_extract_triplets(left_child, atoms);; }
int yr_atoms_extract_from_re( RE* re, int flags, YR_ATOM_LIST_ITEM** atoms) { ATOM_TREE* atom_tree = yr_malloc(sizeof(ATOM_TREE)); ATOM_TREE_NODE* temp; YR_ATOM_LIST_ITEM* wide_atoms; YR_ATOM_LIST_ITEM* case_insentive_atoms; YR_ATOM_LIST_ITEM* triplet_atoms; int min_atom_quality = 0; atom_tree->root_node = _yr_atoms_tree_node_create(ATOM_TREE_OR);; atom_tree->current_leaf = NULL; atom_tree->root_node = _yr_atoms_extract_from_re_node( re->root_node, atom_tree, atom_tree->root_node); if (atom_tree->current_leaf != NULL) _yr_atoms_tree_node_append(atom_tree->root_node, atom_tree->current_leaf); if (atom_tree->root_node->children_head == atom_tree->root_node->children_tail) { // The root OR node has a single child, there's no need for the OR node so // we proceed to destroy it and use its child as root. temp = atom_tree->root_node; atom_tree->root_node = atom_tree->root_node->children_head; yr_free(temp); } // Choose the atoms that will be used. min_atom_quality = _yr_atoms_choose(atom_tree->root_node, atoms); _yr_atoms_tree_destroy(atom_tree); if (min_atom_quality <= 2) { // Choosen atoms contain low quality ones, let's try infering some higher // quality atoms. yr_atoms_extract_triplets(re->root_node, &triplet_atoms); if (min_atom_quality < _yr_atoms_min_quality(triplet_atoms)) { yr_atoms_list_destroy(*atoms); *atoms = triplet_atoms; } else { yr_atoms_list_destroy(triplet_atoms); } } if (flags & STRING_GFLAGS_WIDE) { FAIL_ON_ERROR(_yr_atoms_wide( *atoms, &wide_atoms)); if (flags & STRING_GFLAGS_ASCII) { *atoms = _yr_atoms_list_concat(*atoms, wide_atoms); } else { yr_atoms_list_destroy(*atoms); *atoms = wide_atoms; } } if (flags & STRING_GFLAGS_NO_CASE) { FAIL_ON_ERROR(_yr_atoms_case_insentive( *atoms, &case_insentive_atoms)); *atoms = _yr_atoms_list_concat(*atoms, case_insentive_atoms); } return ERROR_SUCCESS; }