Exemple #1
0
static PyObject*
automaton_find_all(PyObject* self, PyObject* args) {
#define automaton ((Automaton*)self)
	if (automaton->kind != AHOCORASICK)
		Py_RETURN_NONE;

	ssize_t wordlen;
	ssize_t start;
	ssize_t end;
	TRIE_LETTER_TYPE* word;
	PyObject* py_word;
	PyObject* callback;
	PyObject* callback_ret;

	// arg 1
	py_word = pymod_get_string_from_tuple(args, 0, &word, &wordlen);
	if (py_word == NULL)
		return NULL;

	// arg 2
	callback = PyTuple_GetItem(args, 1);
	if (callback == NULL)
		return NULL;
	else
	if (not PyCallable_Check(callback)) {
		PyErr_SetString(PyExc_TypeError, "second argument isn't callable");
		return NULL;
	}

	// parse start/end
	if (pymod_parse_start_end(args, 2, 3, 0, wordlen, &start, &end))
		return NULL;

	ssize_t i;
	TrieNode* state;
	TrieNode* tmp;

	state = automaton->root;
	for (i=start; i < end; i++) {
		state = tmp = ahocorasick_next(state, automaton->root, word[i]);

		// return output
		while (tmp and tmp->eow) {
			if (automaton->store == STORE_ANY)
				callback_ret = PyObject_CallFunction(callback, "iO", i, tmp->output.object);
			else
				callback_ret = PyObject_CallFunction(callback, "ii", i, tmp->output.integer);

			if (callback_ret == NULL)
				return NULL;
			else
				Py_DECREF(callback_ret);

			tmp = tmp->fail;
		}
	}
#undef automaton

	Py_RETURN_NONE;
}
static PyObject*
automaton_search_iter_next(PyObject* self) {
	PyObject* output;

	if (iter->version != iter->automaton->version) {
		PyErr_SetString(PyExc_ValueError, "underlaying automaton has changed, iterator is not valid anymore");
		return NULL;
	}

return_output:
	switch (automaton_build_output(self, &output)) {
		case OutputValue:
			return output;

		case OutputNone:
			break;

		case OutputError:
			return NULL;
	}

	iter->index += 1;
	while (iter->index < iter->end) {
		// process single char
		iter->state = ahocorasick_next(
						iter->state,
						iter->automaton->root,
						iter->data[iter->index]
						);

		ASSERT(iter->state);

		iter->output = iter->state;
		goto return_output;

		iter->index += 1;

	} // while 
	
	return NULL;	// StopIteration
}
static PyObject*
automaton_search_iter_next(PyObject* self) {
	if (iter->version != iter->automaton->version) {
		PyErr_SetString(PyExc_ValueError, "underlaying automaton has changed, iterator is not valid anymore");
		return NULL;
	}

return_output:
	if (iter->output and iter->output->eow) {
		TrieNode* node = iter->output;
		PyObject* tuple;
		switch (iter->automaton->store) {
			case STORE_LENGTH:
			case STORE_INTS:
				tuple = Py_BuildValue("ii",
							iter->index + iter->shift,
							node->output.integer);
				break;

			case STORE_ANY:
				tuple = Py_BuildValue("iO",
							iter->index + iter->shift,
							node->output.object);
				break;

			default:
				PyErr_SetString(PyExc_ValueError, "inconsistent internal state!");
				return NULL;
		}

		// next element to output
		iter->output = iter->output->fail;

		// yield value
		return tuple;
	}
	else
		iter->index += 1;

	while (iter->index < iter->end) {
#define NEXT(byte) ahocorasick_next(iter->state, iter->automaton->root, (byte))
		// process single char
		iter->state = ahocorasick_next(
						iter->state,
						iter->automaton->root,
						iter->data[iter->index]
						);
		
		ASSERT(iter->state);

		if (iter->state->eow) {
			iter->output = iter->state;
			goto return_output;
		}
		else
			iter->index += 1;

	} // while 
	
	return NULL;	// StopIteration
}