static const char* setSentenceType(const SItem *phrase, SMap *puncMap, s_erc *error) { S_CLR_ERR(error); const char* result = NULL; /* types: "decl, "excl", "interrog" */ /* stop at sentence's last token */ const SItem *wordFromCurrentPhrase = SItemPathToItem(phrase, "daughtern", error); if (S_CHK_ERR(error, S_CONTERR, "setSentenceType", "Call to \"SItemPathToItem\" failed")) return NULL; SItem *wordAsToken = SItemAs(wordFromCurrentPhrase, "Token", error); if (S_CHK_ERR(error, S_CONTERR, "setSentenceType", "Call to \"SItemAs\" failed")) return NULL; SItem *tokenItem = SItemParent(wordAsToken, error); tokenItem = SItemNext(tokenItem, error); const char *punctStr = SItemGetName(tokenItem, error); if (S_CHK_ERR(error, S_CONTERR, "setSentenceType", "Call to \"SItemGetName\" failed")) return NULL; s_bool found= SMapObjectPresent(puncMap, punctStr, error); if (S_CHK_ERR(error, S_CONTERR, "setSentenceType", "Call to \"SMapObjectPresent\" failed")) return NULL; result = punctStr; if( found == TRUE) { result = SMapGetString ( puncMap, punctStr, error); if (S_CHK_ERR(error, S_CONTERR, "setSentenceType", "Call to \"SMapGetString\" failed")) return NULL; } else { result = NULL; } return result; }
static SObject *Run(const SFeatProcessor *self, const SItem *item, s_erc *error) { SObject *extractedFeat = NULL; const SItem *itemInSylStructRel; const SItem *syllableItem; const SItem *itr; sint32 count; S_CLR_ERR(error); if (item == NULL) return NULL; itemInSylStructRel = SItemAs(item, "SylStructure", error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemAs\" failed")) goto quit_error; if (itemInSylStructRel == NULL) { extractedFeat = SObjectSetInt(0, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SObjectSetInt\" failed")) goto quit_error; return extractedFeat; } syllableItem = SItemParent(itemInSylStructRel, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemParent\" failed")) goto quit_error; if (syllableItem == NULL) { extractedFeat = SObjectSetInt(0, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SObjectSetInt\" failed")) goto quit_error; return extractedFeat; } itr = SItemDaughter(syllableItem, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemDaughter\" failed")) goto quit_error; count = 0; while (itr != NULL) { s_bool is_equal; is_equal = SItemEqual(itr, itemInSylStructRel, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemEqual\" failed")) goto quit_error; if (is_equal) break; count++; itr = SItemNext(itr, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemNext\" failed")) goto quit_error; } extractedFeat = SObjectSetInt(count, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SObjectSetInt\" failed")) goto quit_error; /* all OK here */ return extractedFeat; /* error cleanup */ quit_error: if (extractedFeat != NULL) S_DELETE(extractedFeat, "Run", error); return NULL; S_UNUSED(self); }
/* setSentenceType should be made out of two parts: * 1) the first section searchs for the last punctuation element of the sentence * -> if it is a '.' --> set "decl" type (where should I set this feature value?) * -> if it is a '!' --> set "excl" type (where should I set this feature value?) * -> if it is a '?' --> set "interrog" type (where should I set this feature value?) * 2) if the first part decides for "interrog" type, there should be other controls * to establish the sentence's complete type * */ static char* setSentenceType(const SItem *phrase, SMap *prosSymbols, s_erc *error) { S_CLR_ERR(error); char* result = "decl"; /* types: "decl, "excl", "interrog" */ /* stop at sentence's last token */ const SItem *wordFromCurrentPhrase = SItemPathToItem(phrase, "daughter", error); if (S_CHK_ERR(error, S_CONTERR, "setSentenceType", "Call to \"SItemPathToItem\" failed")) return NULL; SItem *wordAsToken = SItemAs(wordFromCurrentPhrase, "Token", error); if (S_CHK_ERR(error, S_CONTERR, "setSentenceType", "Call to \"SItemAs\" failed")) return NULL; SItem *tokenItem = SItemParent(wordAsToken, error); SItem *firstTokenItem = tokenItem; s_bool isPunct = SItemFeatureIsPresent(tokenItem, "IsPunctuation", error); if (S_CHK_ERR(error, S_CONTERR, "setSentenceType", "Call to \"SItemFeatureIsPresent\" failed")) return NULL; s_bool isFinalPunct = FALSE; while (isFinalPunct == FALSE) { isPunct = SItemFeatureIsPresent(tokenItem, "IsPunctuation", error); if (S_CHK_ERR(error, S_CONTERR, "setSentenceType", "Call to \"SItemFeatureIsPresent\" failed")) return NULL; if (isPunct) { const char *punctStr = SItemGetName(tokenItem, error); if (S_CHK_ERR(error, S_CONTERR, "setSentenceType", "Call to \"SItemGetName\" failed")) return NULL; if (s_strcmp(punctStr, ".", error) == 0) { isFinalPunct = TRUE; result = "decl"; } else if (s_strcmp(punctStr, "!", error) == 0) { isFinalPunct = TRUE; result = "excl"; } else if (s_strcmp(punctStr, "?", error) == 0) { isFinalPunct = TRUE; const char *posValueStr = NULL; char *posValueStr_filtered = NULL; s_bool currPosInCurrList; s_bool have_symbols = FALSE; SMap* valueMap = NULL; have_symbols = SMapObjectPresent(prosSymbols, "firstPosInQuestionW", error); if (S_CHK_ERR(error, S_CONTERR, "SetSentenceType", "Call to \"SMapObjectPresent\" failed")) goto quit_error; if (have_symbols) { valueMap = S_CAST(SMapGetObject(prosSymbols, "firstPosInQuestionW", error), SMap, error); if (S_CHK_ERR(error, S_CONTERR, "SetSentenceType", "Call to \"SMapGetObject\" failed")) goto quit_error; } else goto quit_error; posValueStr = SItemGetString(firstTokenItem, "POS", error); if (S_CHK_ERR(error, S_CONTERR, "SetSentenceType", "Call to \"SItemGetString\" failed")) goto quit_error; /* filter the current POS tag, remember to free the memory * pointed to by 'posValueStr_filtered' pointer */ posValueStr_filtered = filterPosTag(posValueStr, error); if (S_CHK_ERR(error, S_CONTERR, "SetSentenceType", "Call to \"filterPosTag\" failed")) goto quit_error; currPosInCurrList = searchStringMap(valueMap, posValueStr_filtered, error); if (currPosInCurrList == TRUE) { result = "interrogW"; } else { result = "interrog"; } quit_error: if (posValueStr_filtered) { S_FREE(posValueStr_filtered); } break; } } tokenItem = SItemNext(tokenItem, error); if (S_CHK_ERR(error, S_CONTERR, "setSentenceType", "Call to \"SItemNext\" failed")) return NULL; if(tokenItem == NULL) { isFinalPunct = TRUE; } } return result; }
static SObject *Run(const SFeatProcessor *self, const SItem *item, s_erc *error) { SObject *extractedFeat = NULL; const SItem *syllWord; const SItem *itemInSylStructRel; const SItem *wordInPhrase; const SItem *phrase; const SItem *itr; sint32 count; S_CLR_ERR(error); if (item == NULL) return NULL; /* syllable as in SylStructure */ itemInSylStructRel = SItemAs(item, "SylStructure", error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemAs\" failed")) goto quit_error; if (itemInSylStructRel == NULL) goto quit_null; /* syllable's word */ syllWord = SItemParent(itemInSylStructRel, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemParent\" failed")) goto quit_error; if (syllWord == NULL) goto quit_null; /* as in phrase structure */ wordInPhrase = SItemAs(syllWord, "Phrase", error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemAs\" failed")) goto quit_error; if (wordInPhrase == NULL) goto quit_null; /* get phrase */ phrase = SItemParent(wordInPhrase, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemParent\" failed")) goto quit_error; if (phrase == NULL) goto quit_null; itr = SItemLastDaughter(phrase, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemLastDaughter\" failed")) goto quit_error; count = 0; while (itr != NULL) { s_bool is_equal; is_equal = SItemEqual(itr, syllWord, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemEqual\" failed")) goto quit_error; if (is_equal) break; count += word_num_syls(itr, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"word_num_syls\" failed")) goto quit_error; itr = SItemPrev(itr, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemPrev\" failed")) goto quit_error; } count += syll_pos_word_rev(item, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"syll_pos_word_rev\" failed")) goto quit_error; extractedFeat = SObjectSetInt(count, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SObjectSetInt\" failed")) goto quit_error; /* all OK here */ return extractedFeat; /* error cleanup */ quit_error: if (extractedFeat != NULL) S_DELETE(extractedFeat, "Run", error); return NULL; /* return 0 */ quit_null: extractedFeat = SObjectSetInt(0, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SObjectSetInt\" failed")) goto quit_error; return extractedFeat; S_UNUSED(self); }
static sint32 syll_pos_word_rev(const SItem *item, s_erc *error) { const SItem *itemInSylStructRel; const SItem *wordItem; const SItem *itr; sint32 count; S_CLR_ERR(error); if (item == NULL) return 0; itemInSylStructRel = SItemAs(item, "SylStructure", error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemAs\" failed")) return 0; if (itemInSylStructRel == NULL) return 0; wordItem = SItemParent(itemInSylStructRel, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemParent\" failed")) return 0; if (wordItem == NULL) return 0; itr = SItemLastDaughter(wordItem, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemLastDaughter\" failed")) return 0; count = 0; while (itr != NULL) { s_bool is_equal; is_equal = SItemEqual(itr, itemInSylStructRel, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemEqual\" failed")) return 0; if (is_equal) break; count++; itr = SItemPrev(itr, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemPrev\" failed")) return 0; } /* all OK here */ return count; }
static SObject *Run(const SFeatProcessor *self, const SItem *item, s_erc *error) { SObject *extractedFeat = NULL; const SItem *itemInSentenceRel; const SItem *itr; sint32 count; S_CLR_ERR(error); if (item == NULL) return NULL; itemInSentenceRel = SItemAs(item, "Sentence", error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemRelation\" failed")) goto quit_error; SItem * sentenceItem = SItemParent (itemInSentenceRel, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemParent\" failed")) goto quit_error; itr = SItemLastDaughter (sentenceItem, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SRelationHead\" failed")) goto quit_error; count = 0; while (itr != NULL) { s_bool is_equal; is_equal = SItemEqual(itr, itemInSentenceRel, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemEqual\" failed")) goto quit_error; if (is_equal) break; count++; itr = SItemPrev(itr, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemPrev\" failed")) goto quit_error; } extractedFeat = SObjectSetInt(count, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SObjectSetInt\" failed")) goto quit_error; /* all OK here */ return extractedFeat; /* error cleanup */ quit_error: if (extractedFeat != NULL) S_DELETE(extractedFeat, "Run", error); return NULL; S_UNUSED(self); }
static void Run(const SUttProcessor *self, SUtterance *utt, s_erc *error) { const SRelation *wordRel; SItem *wordItem; const SItem *tokenItem; s_bool is_present; const char *end_punc; SRelation *phraseRelation = NULL; SItem *phraseItem = NULL; const char *post_punc; SRelation *sentenceRelation = NULL; SItem *sentenceItem = NULL; S_CLR_ERR(error); /* we require the word relation */ is_present = SUtteranceRelationIsPresent(utt, "Word", error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SUtteranceRelationIsPresent\" failed")) goto quit_error; if (!is_present) { S_CTX_ERR(error, S_FAILURE, "Run", "Failed to find 'Word' relation in utterance"); goto quit_error; } wordRel = SUtteranceGetRelation(utt, "Word", error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SUtteranceGetRelation\" failed")) goto quit_error; /* get phrasing symbols */ s_get_phrasing_symbols(self, &end_punc, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"s_get_phrasing_symbols\" failed")) goto quit_error; /* create Phrase relation */ phraseRelation = SUtteranceNewRelation(utt, "Phrase", error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SUtteranceNewRelation\" failed")) goto quit_error; /* create Sentence relation */ sentenceRelation = SUtteranceNewRelation(utt, "Sentence", error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SUtteranceNewRelation\" failed")) goto quit_error; /* start at the first item in the word relation, cast away * const, we want to add daughter items */ wordItem = (SItem*)SRelationHead(wordRel, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SRelationHead\" failed")) goto quit_error; while (wordItem != NULL) { SItem *lastWordInToken; SItem *wordAsToken; if (phraseItem == NULL) { /* if phrase item is NULL, create a new phrase item (NULL * shared content) that is appended to phrase * relation. Will happen in first pass. */ sentenceItem = SRelationAppend(sentenceRelation, NULL, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SRelationAppend\" failed")) goto quit_error; /* Added on top a sentence item, for now is one on one with the phrase item * */ phraseItem = SRelationAppend(phraseRelation, NULL, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SRelationAppend\" failed")) goto quit_error; /* add an item name, NB, no break */ SItemSetString(phraseItem, "name", "NB", error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemSetString\" failed")) goto quit_error; SItemAddDaughter(sentenceItem, phraseItem, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemAddDaughter\" failed")) goto quit_error; } /* Create a daughter item for the phrase item. Shared content * is the word item. */ SItemAddDaughter(phraseItem, wordItem, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemAddDaughter\" failed")) goto quit_error; /* get word as in Token relation */ wordAsToken = SItemAs(wordItem, "Token", error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Failed to get word item's as in Token relation")) goto quit_error; /* * get word's token which is the parent of wordAsToken. */ tokenItem = SItemParent(wordAsToken, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Failed to get word item's token item")) goto quit_error; /* get last word in token */ lastWordInToken = SItemLastDaughter(tokenItem, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Failed to get last daughter of token item")) goto quit_error; /* check if the next token is punctuation */ is_present = FALSE; tokenItem = SItemNext(tokenItem, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemNext\" failed")) goto quit_error; if (tokenItem != NULL) { is_present = SItemFeatureIsPresent(tokenItem, "IsPunctuation", error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemFeatureIsPresent\" failed")) goto quit_error; if (is_present) { sint32 value = SItemGetInt(tokenItem, "IsPunctuation", error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemGetInt\" failed")) goto quit_error; is_present = (value > 0); } } if ((is_present) && (wordAsToken == lastWordInToken)) { char *ptr; post_punc = SItemGetName(tokenItem, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemGetName\" failed")) goto quit_error; /* check if it is in the end_punc list */ ptr = s_strpbrk(post_punc, end_punc, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"s_strpbrk\" failed")) goto quit_error; if (ptr != NULL) { /* add a phrase break */ SItemSetString(phraseItem, "name", "BB", error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemSetString\" failed")) goto quit_error; /* set to NULL so that a new phrase item is created */ phraseItem = NULL; } } wordItem = SItemNext(wordItem, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemNext\" failed")) goto quit_error; } /* here all is OK */ return; /* error clean-up code */ quit_error: if (phraseRelation != NULL) { SUtteranceDelRelation(utt, "Phrase", error); S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SUtteranceDelRelation\" failed"); } }
static void Run(const SUttProcessor *self, SUtterance *utt, s_erc *error) { SCrfSuiteUttProc *crfsuiteProc = (SCrfSuiteUttProc*)self; crfsuite_model_t * ptr_model = malloc (sizeof(crfsuite_tagger_t)); crfsuite_tagger_t * ptr_tagger = malloc (sizeof(crfsuite_tagger_t)); crfsuite_dictionary_t * ptr_attrs = malloc (sizeof(crfsuite_dictionary_t)); crfsuite_dictionary_t * ptr_labels = malloc (sizeof(crfsuite_dictionary_t)); crfsuite_instance_t * instance = malloc (sizeof(crfsuite_instance_t)); /* Initialize model object */ if ( crfsuite_create_instance_from_file( crfsuiteProc->model_file, (void**)&ptr_model ) != 0 ) { goto exit_cleanup; } const SRelation* phrase = SUtteranceGetRelation(utt, "Phrase", error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SUtteranceGetRelation\" failed")) return; SItem* itrPhrase = SRelationHead( phrase, error ); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SRelationHead\" failed")) return; while ( itrPhrase != NULL ) { /* Obtain the dictionary interface representing the labels in the model. */ if ( ptr_model->get_labels(ptr_model, &ptr_labels) != 0) { goto exit_cleanup; } /* Obtain the dictionary interface representing the attributes in the model. */ if ( ptr_model->get_attrs(ptr_model, &ptr_attrs) != 0) { goto exit_cleanup; } /* Obtain the tagger interface. */ if ( ptr_model->get_tagger(ptr_model, &ptr_tagger) != 0) { goto exit_cleanup; } instance = create_phrase_instance ( itrPhrase, ptr_attrs, ptr_labels, error ); int *output = calloc(sizeof(int), instance->num_items); floatval_t score = 0; /* Set the instance to the tagger. */ if ( ptr_tagger->set(ptr_tagger, instance) != 0) { goto exit_cleanup; } /* Obtain the viterbi label sequence. */ if (ptr_tagger->viterbi(ptr_tagger, output, &score) != 0) { goto exit_cleanup; } /* Extract the output and insert in the POS attribute */ const SItem* tokenTMP = SItemPathToItem ( itrPhrase, "daughter.R:Token", error ); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemPathToItem\" failed")) return; SItem* token = SItemParent (tokenTMP, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemParent\" failed")) return; const SItem* lastToken = SItemPathToItem ( itrPhrase, "n.daughter.R:Token.parent", error ); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemPathToItem\" failed")) return; int i = 0; while ( token != NULL && token != lastToken ) { const char * str = malloc (sizeof (char)*16); ptr_labels->to_string (ptr_labels, output[i], &str); i += 1; SItemSetString (token, "POS", str, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemSetString\" failed")) return; token = SItemNext(token, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemNext\" failed")) return; } free(output); crfsuite_instance_finish(instance); itrPhrase = SItemNext(itrPhrase, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemNext\" failed")) return; } /* here all is OK */ S_UNUSED(utt); exit_cleanup: if ( ptr_model != NULL ) free ( ptr_model ); if ( ptr_tagger != NULL ) free ( ptr_tagger ); if ( ptr_attrs != NULL ) free ( ptr_attrs ); if ( ptr_labels != NULL ) free ( ptr_labels ); if ( instance != NULL ) free ( instance ); }