void lst_stringset_free(LST_StringSet *set) { LST_String *string; if (!set) return; while (set->members.lh_first) { string = set->members.lh_first; LIST_REMOVE(set->members.lh_first, set); lst_string_free(string); } free(set); }
/* callback for tree traversal. * annotates nodes to keep track of which strings * have leaves in its subtree. */ int annotate(LST_Node *node, params_t *p) { annotation_t *annotation; annotation_t *child_annotation; LST_Edge *edge; LST_Node *child; int i; /* allocate annotation for this node */ if (node->annotation == NULL) { node->annotation = malloc(sizeof(annotation_t)); assert(node->annotation != NULL); annotation = (annotation_t*)node->annotation; annotation->strings = (int*)calloc((p->tree->numstrings), sizeof(int)); assert(annotation->strings != NULL); } else { /* reallocate in case number of strings has changed */ annotation = (annotation_t*)node->annotation; annotation->strings = realloc(annotation->strings, p->tree->numstrings * sizeof(int)); assert(annotation->strings != NULL); } /* initialize the strings annotation */ bzero(annotation->strings, (p->tree->numstrings)*sizeof(int)); if (lst_node_is_leaf(node)) { annotation->strings[lst_stree_get_string_index(p->tree->tree, node->up_edge->range.string)] = 1; return True; } /* incoorporate children's annotations */ for (edge = node->kids.lh_first; edge; edge = edge->siblings.le_next) { child = edge->dst_node; assert(child->annotation != NULL); child_annotation = (annotation_t*)child->annotation; for(i=0; i<p->tree->numstrings; i++) { // annotation->strings[i] = annotation->strings[i] || child_annotation->strings[i]; annotation->strings[i] += child_annotation->strings[i]; } } /* how many strings have this substring? */ int string_count = 0; int occ_count = 0; // LST_String *s = lst_node_get_string(node, 1000); // printf("%s: ", lst_string_print(s)); for(i=0; i<p->tree->numstrings; i++) { // printf("%d, ", annotation->strings[i]); if(annotation->strings[i]) { occ_count += annotation->strings[i]; string_count++; } } // printf(": %d %d\n", occ_count, string_count); /* if this node's substring occurs in min_occ strings, * and is at least min_len long, report it. */ if (string_count >= p->min_occ) { int len = lst_node_get_string_length(node); if (len >= p->min_len) { /* add a dictionary mapping the strings that this * substring occurred in to the number of times * it occurred in each string. */ PyObject *pydict = subtree_occurrences(node,p->tree->numstrings); LST_String *s = lst_node_get_string(node, len); PyObject *pystring = PyString_FromStringAndSize(s->data, s->num_items-1); PyDict_SetItem(p->substrings, pystring, pydict); Py_DECREF(pystring); Py_DECREF(pydict); // printf("%s\n", lst_string_print(s)); lst_string_free(s); } } return True; }