Ejemplo n.º 1
0
static PyObject*
automaton_make_automaton(PyObject* self, PyObject* args) {
#define automaton ((Automaton*)self)
	if (automaton->kind != TRIE)
		Py_RETURN_FALSE;

	AutomatonQueueItem* item;
	List queue;
	int i;

	list_init(&queue);

	// 1. setup nodes at 1-st level
	ASSERT(automaton->root);

	for (i=0; i < 256; i++) {
		TrieNode* child = trienode_get_next(automaton->root, i);
		if (child) {
			// fail edges go to root
			child->fail = automaton->root;

			item = (AutomatonQueueItem*)list_item_new(sizeof(AutomatonQueueItem));
			if (item) {
				item->node = child;
				list_append(&queue, (ListItem*)item);
			}
			else
				goto no_mem;
		}
		else
			// loop on root - implicit (see automaton_next)
			;
	}

	// 2. make links
	TrieNode* node;
	TrieNode* child;
	TrieNode* state;
	while (true) {
		AutomatonQueueItem* item = (AutomatonQueueItem*)list_pop_first(&queue);
		if (item == NULL)
			break;
		else {
			node = item->node;
			memfree(item);
		}

		const size_t n = node->n;
		for (i=0; i < n; i++) {
			child = node->next[i];
			ASSERT(child);

			item = (AutomatonQueueItem*)list_item_new(sizeof(AutomatonQueueItem));
			item->node = child;
			if (item)
				list_append(&queue, (ListItem*)item);
			else
				goto no_mem;

			state = node->fail;
			ASSERT(state);
			ASSERT(child);
			while (state != automaton->root and\
				   not trienode_get_next(state, child->letter)) {

				state = state->fail;
				ASSERT(state);
			}

			child->fail = trienode_get_next(state, child->letter);
			if (child->fail == NULL)
				child->fail = automaton->root;
			
			ASSERT(child->fail);
		}
	}

	automaton->kind = AHOCORASICK;
	automaton->version += 1;
	list_delete(&queue);
	Py_RETURN_NONE;
#undef automaton

no_mem:
	list_delete(&queue);
	PyErr_NoMemory();
	return NULL;
}
Ejemplo n.º 2
0
static PyObject*
DAWGIterator_next(PyObject* self) {
	if (UNLIKELY(iter->version != iter->dawg->version)) {
		PyErr_SetString(PyExc_ValueError, "underlaying graph has changed, iterator is not valid anymore");
		return NULL;
	}

	while (true) {
		StackItem* item = (StackItem*)list_pop_first(&iter->stack);
		if (item == NULL or item->node == NULL)
			return NULL; /* Stop iteration */

		const size_t index = item->depth;
		if (iter->matchtype != MATCH_AT_LEAST_PREFIX and index > iter->pattern_length) {
			continue;
		}

		bool output;
		switch (iter->matchtype) {
			case MATCH_EXACT_LENGTH:
				output = (index == iter->pattern_length);
				break;

			case MATCH_AT_MOST_PREFIX:
				output = (index <= iter->pattern_length);
				break;
				
			case MATCH_AT_LEAST_PREFIX:
			default:
				output = (index >= iter->pattern_length);
				break;

		}

		iter->state = item->node;

		if ((index >= iter->pattern_length) or
		    (iter->use_wildcard and iter->pattern[index] == iter->wildcard)) {

			const int n = iter->state->n;
			int i;
			for (i=0; i < n; i++) {
				StackItem* new_item = (StackItem*)list_item_new(sizeof(StackItem));
				if (not new_item) {
					PyErr_NoMemory();
					return NULL;
				}

				new_item->node  = iter->state->next[i].child;
				new_item->letter= iter->state->next[i].letter;
				new_item->depth = index + 1;
				list_push_front(&iter->stack, (ListItem*)new_item);
			}
		}
		else {
			// process single letter
			const DAWG_LETTER_TYPE ch = iter->pattern[index];
			DAWGNode* node = dawgnode_get_child(iter->state, ch);

			if (node) {
				StackItem* new_item = (StackItem*)list_item_new(sizeof(StackItem));
				if (UNLIKELY(new_item == NULL)) {
					PyErr_NoMemory();
					return NULL;
				}

				new_item->node  = node;
				new_item->letter= ch;
				new_item->depth = index + 1;
				list_push_front(&iter->stack, (ListItem*)new_item);
			}
		}

		iter->buffer[item->depth] = item->letter;

		if (output and iter->state->eow)
#ifdef DAWG_UNICODE
			return PyUnicode_FromUnicode(iter->buffer + 1, item->depth);
#else
			return PyBytes_FromStringAndSize((char*)(iter->buffer + 1), item->depth);
#endif
	}
}