// add morphological analysis to CFSArray containing the sentence void addAnalysis(CLinguistic& linguistic, CDisambiguator& disambiguator, CFSArray<CFSVar>& words, const bool disambiguate) { //CFSVar &words=Data["words"]; CFSArray<CPTWord> PTWords; for (INTPTR ip=0; ip<words.GetSize(); ip++) { PTWords.AddItem(words[ip]["text"].GetWString()); } // perform analysis and optional disambiguation CFSArray<CMorphInfos> MorphResults=linguistic.AnalyzeSentense(PTWords); if (disambiguate) { MorphResults=disambiguator.Disambiguate(MorphResults); } // collect the analysis results ASSERT(PTWords.GetSize()==MorphResults.GetSize()); for (INTPTR ip=0; ip<words.GetSize(); ip++) { const CFSArray<CMorphInfo> &Analysis=MorphResults[ip].m_MorphInfo; CFSVar VarAnalysis; VarAnalysis.Cast(CFSVar::VAR_ARRAY); for (INTPTR ipRes=0; ipRes<Analysis.GetSize(); ipRes++) { const CMorphInfo &Analysis1=Analysis[ipRes]; CFSVar VarAnalysis1; VarAnalysis1["root"]=Analysis1.m_szRoot; VarAnalysis1["ending"]=Analysis1.m_szEnding; VarAnalysis1["clitic"]=Analysis1.m_szClitic; VarAnalysis1["partofspeech"]=CFSWString(Analysis1.m_cPOS); VarAnalysis1["form"]=Analysis1.m_szForm; VarAnalysis[ipRes]=VarAnalysis1; } words[ip]["analysis"]=VarAnalysis; } }
void SubKeys(const CFSAString szExcept, const CFSVar &Data) { for (INTPTR ip=0; ip<Data.GetSize(); ip++) { CFSAString szKey=Data.GetKey(ip); if (szKey==szExcept) continue; m_Writer.Key(szKey); m_Writer.Val(Data[szKey]); } }
StringVector convertStringVectorOutput(CFSVar& data) { CFSVar text = data["text"]; StringVector words; words.reserve(text.GetSize()); for (int idx=0 ; idx<text.GetSize() ; ++idx) { words.push_back(std::string(text[idx].GetAString())); } return words; }
// convert StringVector input to CFSArray input required by vabamorf base library. CFSArray<CFSVar> convertInput(StringVector const& sentence) { CFSArray<CFSVar> data(sentence.size()); for (size_t i=0 ; i<sentence.size() ; ++i) { CFSVar wordData; wordData.Cast(CFSVar::VAR_MAP); wordData["text"] = sentence[i].c_str(); data.AddItem(wordData); } return data; }
void CJSONWriter::Val(const CFSVar &Var) { switch (Var.GetType()) { case CFSVar::VAR_EMPTY: NullVal(); break; case CFSVar::VAR_INT: IntVal(Var.GetInt()); break; case CFSVar::VAR_FLOAT: FloatVal(Var.GetFloat()); break; case CFSVar::VAR_BOOL: BoolVal(Var.GetBool()); break; case CFSVar::VAR_STRING: StringVal(Var.GetAString()); break; case CFSVar::VAR_MAP: ObjectStart(); for (INTPTR ip=0; ip<Var.GetSize(); ip++) { CFSAString szKey=Var.GetKey(ip); Key(szKey); Val(Var[szKey]); } ObjectEnd(); break; case CFSVar::VAR_ARRAY: ArrayStart(); for (INTPTR ip=0; ip<Var.GetSize(); ip++) { Val(Var[ip]); } ArrayEnd(); break; } }
// convert output to wrapper format std::vector<SpellingResults> convertSpellingOutput(CFSArray<CFSVar>& words) { std::vector<SpellingResults> results; results.reserve(words.GetSize()); for (int widx=0 ; widx < words.GetSize() ; ++widx) { CFSVar word = words[widx]; std::string text = std::string(word["text"].GetAString()); CFSVar suggestions = word["suggestions"]; StringVector suggestStrings; suggestStrings.reserve(suggestions.GetSize()); for (int sidx=0 ; sidx < suggestions.GetSize() ; ++sidx) { CFSVar suggestion = suggestions[sidx]; suggestStrings.push_back(std::string(suggestion.GetAString())); } results.push_back(SpellingResults(text, word["spelling"].GetInt(), suggestStrings)); } return results; }
// convert vabamorf base library output to WordAnalysis instances, which as easier to wrap. std::vector<WordAnalysis> convertOutput(CFSArray<CFSVar>& words) { std::vector<WordAnalysis> results; results.reserve(words.GetSize()); for (int widx=0 ; widx < words.GetSize() ; ++widx) { CFSVar word = words[widx]; CFSVar analysis = word["analysis"]; AnalysisVector vec; for (int aidx=0 ; aidx < analysis.GetSize() ; ++aidx) { CFSVar a = analysis[aidx]; vec.push_back(Analysis(a["root"].GetAString(), a["ending"].GetAString(), a["clitic"].GetAString(), a["partofspeech"].GetAString(), a["form"].GetAString())); } results.push_back(WordAnalysis(std::string(word["text"].GetAString()), vec)); } return results; }
// synthesize words based on lemma, pos and form void synthesizeWord(CLinguistic& linguistic, CFSVar &Data) { const CFSVar &Word=Data; CMorphInfo Input; Input.m_szRoot=Word["lemma"].GetWString(); Input.m_cPOS=Word["partofspeech"].GetWString()[0]; if (!Input.m_cPOS) { Input.m_cPOS='*'; } Input.m_szForm=Word["form"].GetWString(); CFSWString szHint=Word["hint"].GetWString(); CFSArray<CMorphInfo> Result=linguistic.Synthesize(Input, szHint); if (Result.GetSize()) { CFSVar Text; Text.Cast(CFSVar::VAR_ARRAY); for (INTPTR ipRes=0; ipRes<Result.GetSize(); ipRes++) { Text[ipRes]=Result[ipRes].m_szRoot+Result[ipRes].m_szEnding+Result[ipRes].m_szClitic; } Data["text"]=Text; } }
// spellcheck the words and add suggestions void addSuggestions(CLinguistic& linguistic, CFSArray<CFSVar>& words, const bool suggest) { for (INTPTR ip=0; ip<words.GetSize(); ip++) { CFSVar &Word=words[ip]; CPTWord PTWord=Word["text"].GetWString(); PTWord.RemoveHyphens(); PTWord.RemovePunctuation(); PTWord.Trim(); if (PTWord.m_szWord.IsEmpty() || linguistic.SpellWord(PTWord.m_szWord)==SPL_NOERROR) { Word["spelling"]=true; } else { Word["spelling"]=false; if (suggest) { CFSWStringArray Suggestions=linguistic.Suggest(PTWord.m_szWord); CFSVar VarSuggestions; VarSuggestions.Cast(CFSVar::VAR_ARRAY); for (INTPTR ipRes=0; ipRes<Suggestions.GetSize(); ipRes++) { VarSuggestions[ipRes]=Suggestions[ipRes]; } Word["suggestions"]=VarSuggestions; } } } }
void OnValReadEnd(const CFSAString &szKey, CFSVar &Data) { if (szKey.IsEmpty()) { SubKeys("paragraphs", Data); m_Writer.ObjectEnd(); } else if (szKey=="/paragraphs") { m_Writer.ArrayEnd(); m_iCollectData++; } else if (KeyMatch(szKey, "/paragraphs/%d")) { SubKeys("sentences", Data); m_Writer.ObjectEnd(); m_iCollectData--; } else if (KeyMatch(szKey, "/paragraphs/%d/sentences")) { m_Writer.ArrayEnd(); m_iCollectData++; } else if (KeyMatch(szKey, "/paragraphs/%d/sentences/%d")) { if (Data.KeyExist("words")) { CFSVar &Words=Data["words"]; CFSArray<CMorphInfos> WordsAnalysis; for (INTPTR ip=0; ip<Words.GetSize(); ip++) { const CFSVar &Word=Words[ip]; CMorphInfos Analysis; Analysis.m_szWord=Word["text"].GetWString(); const CFSVar &VarAnalysis=Word["analysis"]; for (INTPTR ip2=0; ip2<VarAnalysis.GetSize(); ip2++) { const CFSVar &VarAnalysis1=VarAnalysis[ip2]; CMorphInfo Analysis1; Analysis1.m_szRoot=VarAnalysis1["root"].GetWString(); Analysis1.m_szEnding=VarAnalysis1["ending"].GetWString(); Analysis1.m_szClitic=VarAnalysis1["clitic"].GetWString(); Analysis1.m_cPOS=VarAnalysis1["partofspeech"].GetWString()[0]; Analysis1.m_szForm=VarAnalysis1["form"].GetWString(); Analysis.m_MorphInfo.AddItem(Analysis1); } WordsAnalysis.AddItem(Analysis); } WordsAnalysis=m_Disambiguator.Disambiguate(WordsAnalysis); RT_ASSERT(Words.GetSize()==WordsAnalysis.GetSize()); for (INTPTR ip=0; ip<Words.GetSize(); ip++) { const CMorphInfos &Analysis=WordsAnalysis[ip]; CFSVar VarAnalysis; VarAnalysis.Cast(CFSVar::VAR_ARRAY); for (INTPTR ipRes=0; ipRes<Analysis.m_MorphInfo.GetSize(); ipRes++) { const CMorphInfo &Analysis1=Analysis.m_MorphInfo[ipRes]; CFSVar VarAnalysis1; VarAnalysis1["root"]=Analysis1.m_szRoot; VarAnalysis1["ending"]=Analysis1.m_szEnding; VarAnalysis1["clitic"]=Analysis1.m_szClitic; VarAnalysis1["partofspeech"]=CFSWString(Analysis1.m_cPOS); VarAnalysis1["form"]=Analysis1.m_szForm; VarAnalysis[ipRes]=VarAnalysis1; } Words[ip]["analysis"]=VarAnalysis; } } m_Writer.Val(Data); m_iCollectData--; } }
CFSVar CJSONReader::ReadVal(const CFSAString &szKeyPath) { OnValReadStart(szKeyPath); CFSVar Data; if (m_cCh=='[') { Data.Cast(CFSVar::VAR_ARRAY); GetChar(true); INTPTR ipPos=0; for (;;) { if (m_cCh==0) { throw CJSONException(FSTSTR("Unexpetcted EOF")); } else if (m_cCh==']') { GetChar(true); break; } else if (ipPos>0) { if (m_cCh==',') { GetChar(true); } else { throw CJSONException(FSTSTR("Missing ',' in array")); } } CFSAString szKey; szKey.Format("%zd", ipPos); CFSVar Data1=ReadVal(szKeyPath+"/"+szKey); if (m_iCollectData>0) { Data[ipPos]=Data1; } ipPos++; } } else if (m_cCh=='{') { Data.Cast(CFSVar::VAR_MAP); GetChar(true); INTPTR ipPos=0; for (;;) { if (m_cCh==0) { throw CJSONException(FSTSTR("Unexpetcted EOF")); } else if (m_cCh=='}') { GetChar(true); break; } else if (ipPos>0) { if (m_cCh==',') { GetChar(true); } else { throw CJSONException(FSTSTR("Missing ',' in map")); } } CFSAString szKey; if (m_cCh=='\"' || m_cCh=='\'') { szKey=ReadString(); } else if (FSIsLetter(m_cCh)) { szKey=ReadText(); } else { throw CJSONException(FSTSTR("Expected key")); } if (m_cCh==':') { GetChar(true); } else { throw CJSONException(FSTSTR("Expected ':'")); } CFSVar Data1=ReadVal(szKeyPath+"/"+szKey); if (m_iCollectData>0) { Data[szKey]=Data1; } ipPos++; } } else if (m_cCh=='\"' || m_cCh=='\'') { Data=ReadString(); } else if ((m_cCh>='0' && m_cCh<='9') || FSStrChr("-+.", m_cCh)) { Data=ReadNumber(); } else if (FSIsLetter(m_cCh)) { Data=ReadConst(); } else if (!m_cCh) { } else { throw CJSONException(FSTSTR("Unknown value type")); } OnValReadEnd(szKeyPath, Data); return Data; }