예제 #1
0
파일: hunpos_proc.c 프로젝트: Oghma/speect
static void set_tag(void* data, int n, const char * tag, int* error)
{
	s_erc tmperror;
	SItem** nodeList = data;
	SItemSetString(nodeList[n], "POS", tag, &tmperror);
	if (S_CHK_ERR(&tmperror, S_CONTERR,
		      "set_tag",
		      "Call to \"SItemSetString\" failed"))
		*error = 1;
}
예제 #2
0
static void s_compute_stresses ( const SFeatProcessor* proc, SItem* word, s_erc *error )
{
	S_CLR_ERR(error);
	SItem *wordAsSylStructure = SItemAs(word, "SylStructure", error);
	if (S_CHK_ERR(error, S_CONTERR,
		      "s_compute_stresses",
		      "Call to \"SItemAs\" failed"))
		return;

	SItem *syllable = SItemDaughter(wordAsSylStructure, error);
	if (S_CHK_ERR(error, S_CONTERR,
		      "s_compute_stresses",
		      "Call to \"SItemDaughter\" failed"))
		return;

	SObject* result = NULL;
	while (syllable != NULL)
	{
		result = SFeatProcessorRun ( proc, syllable, error);
		if (S_CHK_ERR(error, S_CONTERR,
					  "s_compute_stresses",
					  "Call to \"SItemPathToFeatProc\" failed"))
			goto s_compute_stresses_cleanup;

		const char* resultString = SObjectGetString ( result, error );
		if (S_CHK_ERR(error, S_CONTERR,
					  "s_compute_stresses",
					  "Call to \"SObjectGetInt\" failed"))
			goto s_compute_stresses_cleanup;

		SItemSetString ( syllable, "stress", resultString, error );
		if (S_CHK_ERR(error, S_CONTERR,
					  "s_compute_stresses",
					  "Call to \"SItemSetInt\" failed"))
			goto s_compute_stresses_cleanup;

		syllable = SItemNext ( syllable, error );
		if (S_CHK_ERR(error, S_CONTERR,
					  "s_compute_stresses",
					  "Call to \"SItemNext\" failed"))
			goto s_compute_stresses_cleanup;
		if(result != NULL) {
			S_DELETE(result, "s_compute_stresses", error);
		}
	}
s_compute_stresses_cleanup:
	if(result != NULL) {
		S_DELETE(result, "s_compute_stresses", error);
	}

}
예제 #3
0
static void Run(const SUttProcessor *self, SUtterance *utt,
				s_erc *error)
{
	const SRelation *wordRel;
	SItem *wordItem;
	const SItem *tokenItem;
	s_bool is_present;
	const char *end_punc;
	SRelation *phraseRelation = NULL;
	SItem *phraseItem = NULL;
	const char *post_punc;
	SRelation *sentenceRelation = NULL;
	SItem *sentenceItem = NULL;


	S_CLR_ERR(error);

	/* we require the word relation */
	is_present = SUtteranceRelationIsPresent(utt, "Word", error);
	if (S_CHK_ERR(error, S_CONTERR,
				  "Run",
				  "Call to \"SUtteranceRelationIsPresent\" failed"))
		goto quit_error;

	if (!is_present)
	{
		S_CTX_ERR(error, S_FAILURE,
				  "Run",
				  "Failed to find 'Word' relation in utterance");
		goto quit_error;
	}

	wordRel = SUtteranceGetRelation(utt, "Word", error);
	if (S_CHK_ERR(error, S_CONTERR,
				  "Run",
				  "Call to \"SUtteranceGetRelation\" failed"))
		goto quit_error;

	/* get phrasing symbols */
	s_get_phrasing_symbols(self, &end_punc, error);
	if (S_CHK_ERR(error, S_CONTERR,
				  "Run",
				  "Call to \"s_get_phrasing_symbols\" failed"))
		goto quit_error;

	/* create Phrase relation */
	phraseRelation = SUtteranceNewRelation(utt, "Phrase", error);
	if (S_CHK_ERR(error, S_CONTERR,
				  "Run",
				  "Call to \"SUtteranceNewRelation\" failed"))
		goto quit_error;

	/* create Sentence relation */
	sentenceRelation = SUtteranceNewRelation(utt, "Sentence", error);
	if (S_CHK_ERR(error, S_CONTERR,
				  "Run",
				  "Call to \"SUtteranceNewRelation\" failed"))
		goto quit_error;

	/* start at the first item in the word relation, cast away
	 * const, we want to add daughter items
	 */
	wordItem = (SItem*)SRelationHead(wordRel, error);
	if (S_CHK_ERR(error, S_CONTERR,
				  "Run",
				  "Call to \"SRelationHead\" failed"))
		goto quit_error;

	while (wordItem != NULL)
	{
		SItem *lastWordInToken;
		SItem *wordAsToken;


		if (phraseItem == NULL)
		{
			/* if phrase item is NULL, create a new phrase item (NULL
			 * shared content) that is appended to phrase
			 * relation. Will happen in first pass.
			 */
			sentenceItem = SRelationAppend(sentenceRelation, NULL, error);
			if (S_CHK_ERR(error, S_CONTERR,
						  "Run",
						  "Call to \"SRelationAppend\" failed"))
				goto quit_error;

			/* Added on top a sentence item, for now is one on one with the phrase item
			 * */
			phraseItem = SRelationAppend(phraseRelation, NULL, error);
			if (S_CHK_ERR(error, S_CONTERR,
						  "Run",
						  "Call to \"SRelationAppend\" failed"))
				goto quit_error;

			/* add an item name, NB, no break */
			SItemSetString(phraseItem, "name", "NB", error);
			if (S_CHK_ERR(error, S_CONTERR,
						  "Run",
						  "Call to \"SItemSetString\" failed"))
				goto quit_error;

			SItemAddDaughter(sentenceItem, phraseItem, error);
			if (S_CHK_ERR(error, S_CONTERR,
						  "Run",
						  "Call to \"SItemAddDaughter\" failed"))
				goto quit_error;
		}

		/* Create a daughter item for the phrase item. Shared content
		 * is the word item.
		 */
		SItemAddDaughter(phraseItem, wordItem, error);
		if (S_CHK_ERR(error, S_CONTERR,
					  "Run",
					  "Call to \"SItemAddDaughter\" failed"))
			goto quit_error;

		/* get word as in Token relation */
		wordAsToken = SItemAs(wordItem, "Token", error);
		if (S_CHK_ERR(error, S_CONTERR,
					  "Run",
					  "Failed to get word item's as in Token relation"))
			goto quit_error;

		/*
		 * get word's token which is the parent of wordAsToken.
		 */
		tokenItem = SItemParent(wordAsToken, error);
		if (S_CHK_ERR(error, S_CONTERR,
					  "Run",
					  "Failed to get word item's token item"))
			goto quit_error;

		/* get last word in token */
		lastWordInToken = SItemLastDaughter(tokenItem, error);
		if (S_CHK_ERR(error, S_CONTERR,
			"Run",
		"Failed to get last daughter of token item"))
			goto quit_error;

		/* check if the next token is punctuation */
		is_present = FALSE;
		tokenItem = SItemNext(tokenItem, error);
		if (S_CHK_ERR(error, S_CONTERR,
					  "Run",
					  "Call to \"SItemNext\" failed"))
			goto quit_error;
		if (tokenItem != NULL)
		{
			is_present = SItemFeatureIsPresent(tokenItem, "IsPunctuation", error);
			if (S_CHK_ERR(error, S_CONTERR,
						"Run",
						"Call to \"SItemFeatureIsPresent\" failed"))
				goto quit_error;
			if (is_present)
			{
				sint32 value = SItemGetInt(tokenItem, "IsPunctuation", error);
				if (S_CHK_ERR(error, S_CONTERR,
							"Run",
							"Call to \"SItemGetInt\" failed"))
					goto quit_error;
				is_present = (value > 0);
			}
		}

		if ((is_present) && (wordAsToken == lastWordInToken))
		{
			char *ptr;


			post_punc = SItemGetName(tokenItem, error);
			if (S_CHK_ERR(error, S_CONTERR,
						  "Run",
						  "Call to \"SItemGetName\" failed"))
				goto quit_error;

			/* check if it is in the end_punc list */
			ptr = s_strpbrk(post_punc, end_punc, error);
			if (S_CHK_ERR(error, S_CONTERR,
						  "Run",
						  "Call to \"s_strpbrk\" failed"))
				goto quit_error;

			if (ptr != NULL)
			{
				/* add a phrase break */
				SItemSetString(phraseItem, "name", "BB", error);
				if (S_CHK_ERR(error, S_CONTERR,
							  "Run",
							  "Call to \"SItemSetString\" failed"))
					goto quit_error;

				/* set to NULL so that a new phrase item is created */
				phraseItem = NULL;
			}
		}

		wordItem = SItemNext(wordItem, error);
		if (S_CHK_ERR(error, S_CONTERR,
					  "Run",
					  "Call to \"SItemNext\" failed"))
			goto quit_error;
	}

	/* here all is OK */
	return;

	/* error clean-up code */
quit_error:
	if (phraseRelation != NULL)
	{
		SUtteranceDelRelation(utt, "Phrase", error);
		S_CHK_ERR(error, S_CONTERR,
				  "Run",
				  "Call to \"SUtteranceDelRelation\" failed");
	}
}
예제 #4
0
static void Run(const SUttProcessor *self, SUtterance *utt,
		s_erc *error)
{
	SCrfSuiteUttProc *crfsuiteProc = (SCrfSuiteUttProc*)self;

	crfsuite_model_t * ptr_model = malloc (sizeof(crfsuite_tagger_t));
	crfsuite_tagger_t * ptr_tagger = malloc (sizeof(crfsuite_tagger_t));
	crfsuite_dictionary_t * ptr_attrs = malloc (sizeof(crfsuite_dictionary_t));
	crfsuite_dictionary_t * ptr_labels = malloc (sizeof(crfsuite_dictionary_t));
	crfsuite_instance_t * instance = malloc (sizeof(crfsuite_instance_t));

	/* Initialize model object */
	if ( crfsuite_create_instance_from_file( crfsuiteProc->model_file,
	                                              (void**)&ptr_model
	                                            ) != 0 ) {
		goto exit_cleanup;
	}

	const SRelation* phrase = SUtteranceGetRelation(utt, "Phrase", error);
	if (S_CHK_ERR(error, S_CONTERR,
			  "Run",
			  "Call to \"SUtteranceGetRelation\" failed"))
		return;

	SItem* itrPhrase = SRelationHead( phrase, error );
	if (S_CHK_ERR(error, S_CONTERR,
			  "Run",
			  "Call to \"SRelationHead\" failed"))
		return;

	while ( itrPhrase != NULL )
	{
		/* Obtain the dictionary interface representing the labels in the model. */
		if ( ptr_model->get_labels(ptr_model, &ptr_labels) != 0) {
			goto exit_cleanup;
		}

		/* Obtain the dictionary interface representing the attributes in the model. */
		if ( ptr_model->get_attrs(ptr_model, &ptr_attrs) != 0) {
			goto exit_cleanup;
		}

		/* Obtain the tagger interface. */
		if ( ptr_model->get_tagger(ptr_model, &ptr_tagger) != 0) {
			goto exit_cleanup;
		}

		instance = create_phrase_instance ( itrPhrase, ptr_attrs, ptr_labels, error );
		int *output = calloc(sizeof(int), instance->num_items);
		floatval_t score = 0;

		/* Set the instance to the tagger. */
		if ( ptr_tagger->set(ptr_tagger, instance) != 0) {
			goto exit_cleanup;
		}

		/* Obtain the viterbi label sequence. */
		if (ptr_tagger->viterbi(ptr_tagger, output, &score) != 0) {
			goto exit_cleanup;
		}

		/* Extract the output and insert in the POS attribute */

		const SItem* tokenTMP = SItemPathToItem ( itrPhrase, "daughter.R:Token", error );
		if (S_CHK_ERR(error, S_CONTERR,
				  "Run",
				  "Call to \"SItemPathToItem\" failed"))
			return;

		SItem* token = SItemParent (tokenTMP, error);
		if (S_CHK_ERR(error, S_CONTERR,
				  "Run",
				  "Call to \"SItemParent\" failed"))
			return;

		const SItem* lastToken = SItemPathToItem ( itrPhrase, "n.daughter.R:Token.parent", error );
		if (S_CHK_ERR(error, S_CONTERR,
				  "Run",
				  "Call to \"SItemPathToItem\" failed"))
			return;

		int i = 0;

		while ( token != NULL && token != lastToken )
		{
			const char * str = malloc (sizeof (char)*16);
			ptr_labels->to_string (ptr_labels, output[i], &str);

			i += 1;

			SItemSetString (token, "POS", str, error);
			if (S_CHK_ERR(error, S_CONTERR,
					  "Run",
					  "Call to \"SItemSetString\" failed"))
				return;

			token = SItemNext(token, error);
			if (S_CHK_ERR(error, S_CONTERR,
					  "Run",
					  "Call to \"SItemNext\" failed"))
				return;
		}

		free(output);
		crfsuite_instance_finish(instance);

		itrPhrase = SItemNext(itrPhrase, error);
		if (S_CHK_ERR(error, S_CONTERR,
				  "Run",
				  "Call to \"SItemNext\" failed"))
			return;
	}

	/* here all is OK */

	S_UNUSED(utt);

exit_cleanup:
	if ( ptr_model != NULL )
		free ( ptr_model );

	if ( ptr_tagger != NULL )
		free ( ptr_tagger );

	if ( ptr_attrs != NULL )
		free ( ptr_attrs );

	if ( ptr_labels != NULL )
		free ( ptr_labels );

	if ( instance != NULL )
		free ( instance );

}
예제 #5
0
static char *s_add_token_to_relation(SRelation *tokenRel, const SToken *token,
									 const char *token_string, s_erc *error)
{
	SItem *tokenItem;
	const char *tmp;
	char *post_punc;


	S_CLR_ERR(error);

	/* create item, NULL shared content */
	tokenItem = SRelationAppend(tokenRel, NULL, error);
	if (S_CHK_ERR(error, S_CONTERR,
				  "s_add_token_to_relation",
				  "Call to \"SRelationAppend\" failed"))
		return NULL;

	/* item's name is the token string */
	SItemSetName(tokenItem, token_string, error);
	if (S_CHK_ERR(error, S_CONTERR,
				  "s_add_token_to_relation",
				  "Call to \"SItemSetName\" failed"))
		return NULL;

	/* get white-space */
	tmp = STokenGetWhitespace(token, error);
	if (S_CHK_ERR(error, S_CONTERR,
				  "s_add_token_to_relation",
				  "Call to \"STokenGetWhitespace\" failed"))
		return NULL;

	if (tmp != NULL)
	{
		SItemSetString(tokenItem, "whitespace", tmp, error);
		if (S_CHK_ERR(error, S_CONTERR,
					  "s_add_token_to_relation",
					  "Call to \"SItemSetString\" failed"))
			return NULL;
	}

	/* get pre-punctuation */
	tmp = STokenGetPrePunc(token, error);
	if (S_CHK_ERR(error, S_CONTERR,
				  "s_add_token_to_relation",
				  "Call to  \"STokenGetPrePunc\" failed"))
		return NULL;

	if (tmp != NULL)
	{
		SItemSetString(tokenItem, "prepunc", tmp, error);
		if (S_CHK_ERR(error, S_CONTERR,
					  "s_add_token_to_relation",
					  "Call to \"SItemSetString\" failed"))
			return NULL;
	}

	/* get post-punctuation */
	tmp = STokenGetPostPunc(token, error);
	if (S_CHK_ERR(error, S_CONTERR,
				  "s_add_token_to_relation",
				  "Call to \"STokenGetPostPunc\" failed"))
		return NULL;

	if (tmp != NULL)
	{
		SItemSetString(tokenItem, "postpunc", tmp, error);
		if (S_CHK_ERR(error, S_CONTERR,
					  "s_add_token_to_relation",
					  "Call to \"SItemSetString\" failed"))
			return NULL;
	}

	post_punc = s_strdup(tmp, error);
	if (S_CHK_ERR(error, S_CONTERR,
				  "s_add_token_to_relation",
				  "Call to \"s_strdup\" failed"))
		return NULL;

	return post_punc; /* return the post punctuation */
}
예제 #6
0
static void s_compute_phonetic_features (SItem* word, s_erc *error )
{
	SItem *syllable;
	SItem * phone;
	char* position_in_syllable_string = NULL;

	/* Extract Phoneset from Voice*/
	const SVoice* voice = SItemVoice (word, error);
	if (S_CHK_ERR(error, S_CONTERR,
				  "s_compute_phonetic_features",
				  "Call to \"SItemGetVoice\" failed"))
		return;

	const SPhoneset* phoneset = (SPhoneset*)SVoiceGetData(voice, "phoneset", error);
	if (S_CHK_ERR(error, S_CONTERR,
				  "s_compute_phonetic_features",
				  "Call to \"SVoiceGetData\" failed"))
		return;
	SItem *wordAsSylStructure = SItemAs(word, "SylStructure", error);
	if (S_CHK_ERR(error, S_CONTERR,
		      "s_compute_stresses",
		      "Call to \"SItemAs\" failed"))
		return;

	syllable = SItemDaughter(wordAsSylStructure, error);
	if (S_CHK_ERR(error, S_CONTERR,
		      "s_compute_stresses",
		      "Call to \"SItemDaughter\" failed"))
		return;

	while (syllable != NULL)
	{
		phone = SItemDaughter(syllable, error);
		if (S_CHK_ERR(error, S_CONTERR,
			      "s_compute_phonetic_features",
			      "Call to \"SItemDaughter\" failed"))
			return;

		s_bool nucleusFound = FALSE;
		while (phone != NULL)
		{
			const char* phone_value = SItemGetName(phone, error);
			if (S_CHK_ERR(error, S_CONTERR,
				      "s_compute_phonetic_features",
				      "Call to \"SItemGetName\" failed"))
				return;
			s_bool isVowel = S_PHONESET_CALL(phoneset, phone_has_feature)
				(phoneset,
				 phone_value,
				 "vowel",
				 error);
			if (S_CHK_ERR(error, S_CONTERR,
				      "s_compute_phonetic_features",
				      "Call to \"phone_has_feature\" failed"))
				return;

			if( isVowel )
			{
				nucleusFound = TRUE;
				position_in_syllable_string = "nucleus";
			}
			else
			{
				if( nucleusFound == TRUE )
					position_in_syllable_string = "coda";
				else
					position_in_syllable_string = "onset";
			}

			SItemSetString ( phone, "syllablepart", position_in_syllable_string, error );
			if (S_CHK_ERR(error, S_CONTERR,
						  "s_compute_phonetic_features",
						  "Call to \"SItemSetString\" failed"))
				return;

			s_bool hasLong = S_PHONESET_CALL(phoneset, phone_has_feature)
				(phoneset,
				 phone_value,
				 "duration_long",
				 error);
			if (S_CHK_ERR(error, S_CONTERR,
				      "s_compute_phonetic_features",
				      "Call to \"phone_has_feature\" failed"))
				return;

			s_bool hasShort = S_PHONESET_CALL(phoneset, phone_has_feature)
				(phoneset,
				 phone_value,
				 "duration_short",
				 error);
			if (S_CHK_ERR(error, S_CONTERR,
				      "s_compute_phonetic_features",
				      "Call to \"phone_has_feature\" failed"))
				return;

			const char * feat = NULL;
			if( hasLong )
			{
				feat = "+";
			}
			else if( hasShort )
			{
				feat = "-";
			}

			if(feat != NULL)
			{
				SItemSetString ( phone, "duration", feat, error );
				if (S_CHK_ERR(error, S_CONTERR,
					      "s_compute_phonetic_features",
					      "Call to \"SItemSetString\" failed"))
					return;
			}

			phone = SItemNext ( phone, error);
			if (S_CHK_ERR(error, S_CONTERR,
				      "s_compute_phonetic_features",
				      "Call to \"SItemNext\" failed"))
				return;
		}

		syllable = SItemNext (syllable, error);
		if (S_CHK_ERR(error, S_CONTERR,
			      "s_compute_phonetic_features",
			      "Call to \"SItemNext\" failed"))
			return;
	}

}