Esempio n. 1
0
// add morphological analysis to CFSArray containing the sentence
void addAnalysis(CLinguistic& linguistic, CDisambiguator& disambiguator, CFSArray<CFSVar>& words, const bool disambiguate) {
    //CFSVar &words=Data["words"];
    CFSArray<CPTWord> PTWords;
    for (INTPTR ip=0; ip<words.GetSize(); ip++) {
        PTWords.AddItem(words[ip]["text"].GetWString());
    }
    // perform analysis and optional disambiguation
    CFSArray<CMorphInfos> MorphResults=linguistic.AnalyzeSentense(PTWords);
    if (disambiguate) {
        MorphResults=disambiguator.Disambiguate(MorphResults);
    }
    // collect the analysis results
    ASSERT(PTWords.GetSize()==MorphResults.GetSize());
    for (INTPTR ip=0; ip<words.GetSize(); ip++) {
        const CFSArray<CMorphInfo> &Analysis=MorphResults[ip].m_MorphInfo;
        CFSVar VarAnalysis;
        VarAnalysis.Cast(CFSVar::VAR_ARRAY);
        for (INTPTR ipRes=0; ipRes<Analysis.GetSize(); ipRes++) {
            const CMorphInfo &Analysis1=Analysis[ipRes];
            CFSVar VarAnalysis1;
            VarAnalysis1["root"]=Analysis1.m_szRoot;
            VarAnalysis1["ending"]=Analysis1.m_szEnding;
            VarAnalysis1["clitic"]=Analysis1.m_szClitic;
            VarAnalysis1["partofspeech"]=CFSWString(Analysis1.m_cPOS);
            VarAnalysis1["form"]=Analysis1.m_szForm;
            VarAnalysis[ipRes]=VarAnalysis1;
        }
        words[ip]["analysis"]=VarAnalysis;
    }
}
Esempio n. 2
0
	void SubKeys(const CFSAString szExcept, const CFSVar &Data) {
		for (INTPTR ip=0; ip<Data.GetSize(); ip++) {
			CFSAString szKey=Data.GetKey(ip);
			if (szKey==szExcept) continue;
			m_Writer.Key(szKey);
			m_Writer.Val(Data[szKey]);
		}
	}
Esempio n. 3
0
StringVector convertStringVectorOutput(CFSVar& data) {
    CFSVar text = data["text"];
    StringVector words;
    words.reserve(text.GetSize());
    for (int idx=0 ; idx<text.GetSize() ; ++idx) {
        words.push_back(std::string(text[idx].GetAString()));
    }
    return words;
}
Esempio n. 4
0
// convert StringVector input to CFSArray input required by vabamorf base library.
CFSArray<CFSVar> convertInput(StringVector const& sentence) {
    CFSArray<CFSVar> data(sentence.size());
    for (size_t i=0 ; i<sentence.size() ; ++i) {
        CFSVar wordData;
        wordData.Cast(CFSVar::VAR_MAP);
        wordData["text"] = sentence[i].c_str();
        data.AddItem(wordData);
    }
    return data;
}
Esempio n. 5
0
void CJSONWriter::Val(const CFSVar &Var) {
	switch (Var.GetType()) {
		case CFSVar::VAR_EMPTY:
			NullVal();
		break;
		case CFSVar::VAR_INT:
			IntVal(Var.GetInt());
		break;
		case CFSVar::VAR_FLOAT:
			FloatVal(Var.GetFloat());
		break;
		case CFSVar::VAR_BOOL:
			BoolVal(Var.GetBool());
		break;
		case CFSVar::VAR_STRING:
			StringVal(Var.GetAString());
		break;
		case CFSVar::VAR_MAP:
			ObjectStart();
			for (INTPTR ip=0; ip<Var.GetSize(); ip++) {
				CFSAString szKey=Var.GetKey(ip);
				Key(szKey);
				Val(Var[szKey]);
			}
			ObjectEnd();
		break;
		case CFSVar::VAR_ARRAY:
			ArrayStart();
			for (INTPTR ip=0; ip<Var.GetSize(); ip++) {
				Val(Var[ip]);
			}
			ArrayEnd();
		break;
	}
}
Esempio n. 6
0
// convert output to wrapper format
std::vector<SpellingResults> convertSpellingOutput(CFSArray<CFSVar>& words) {
    std::vector<SpellingResults> results;
    results.reserve(words.GetSize());
    for (int widx=0 ; widx < words.GetSize() ; ++widx) {
        CFSVar word = words[widx];
        std::string text = std::string(word["text"].GetAString());
        CFSVar suggestions = word["suggestions"];
        StringVector suggestStrings;
        suggestStrings.reserve(suggestions.GetSize());
        for (int sidx=0 ; sidx < suggestions.GetSize() ; ++sidx) {
            CFSVar suggestion = suggestions[sidx];
            suggestStrings.push_back(std::string(suggestion.GetAString()));
        }
        results.push_back(SpellingResults(text, word["spelling"].GetInt(), suggestStrings));
    }
    return results;
}
Esempio n. 7
0
// convert vabamorf base library output to WordAnalysis instances, which as easier to wrap.
std::vector<WordAnalysis> convertOutput(CFSArray<CFSVar>& words) {
    std::vector<WordAnalysis> results;
    results.reserve(words.GetSize());
    for (int widx=0 ; widx < words.GetSize() ; ++widx) {
        CFSVar word = words[widx];
        CFSVar analysis = word["analysis"];
        AnalysisVector vec;
        for (int aidx=0 ; aidx < analysis.GetSize() ; ++aidx) {
            CFSVar a = analysis[aidx];
            vec.push_back(Analysis(a["root"].GetAString(),
                                   a["ending"].GetAString(),
                                   a["clitic"].GetAString(),
                                   a["partofspeech"].GetAString(),
                                   a["form"].GetAString()));
        }
        results.push_back(WordAnalysis(std::string(word["text"].GetAString()), vec));
    }
    return results;
}
Esempio n. 8
0
// synthesize words based on lemma, pos and form
void synthesizeWord(CLinguistic& linguistic, CFSVar &Data) {
    const CFSVar &Word=Data;

    CMorphInfo Input;
    Input.m_szRoot=Word["lemma"].GetWString();
    Input.m_cPOS=Word["partofspeech"].GetWString()[0];
    if (!Input.m_cPOS) {
        Input.m_cPOS='*';
    }
    Input.m_szForm=Word["form"].GetWString();
    CFSWString szHint=Word["hint"].GetWString();

    CFSArray<CMorphInfo> Result=linguistic.Synthesize(Input, szHint);
    if (Result.GetSize()) {
        CFSVar Text;
        Text.Cast(CFSVar::VAR_ARRAY);
        for (INTPTR ipRes=0; ipRes<Result.GetSize(); ipRes++) {
            Text[ipRes]=Result[ipRes].m_szRoot+Result[ipRes].m_szEnding+Result[ipRes].m_szClitic;
        }
        Data["text"]=Text;
    }
}
Esempio n. 9
0
// spellcheck the words and add suggestions
void addSuggestions(CLinguistic& linguistic, CFSArray<CFSVar>& words, const bool suggest) {
    for (INTPTR ip=0; ip<words.GetSize(); ip++) {
        CFSVar &Word=words[ip];
        CPTWord PTWord=Word["text"].GetWString();
        PTWord.RemoveHyphens();
        PTWord.RemovePunctuation();
        PTWord.Trim();
        if (PTWord.m_szWord.IsEmpty() || linguistic.SpellWord(PTWord.m_szWord)==SPL_NOERROR) {
            Word["spelling"]=true;
        } else {
            Word["spelling"]=false;
            if (suggest) {
                CFSWStringArray Suggestions=linguistic.Suggest(PTWord.m_szWord);
                CFSVar VarSuggestions;
                VarSuggestions.Cast(CFSVar::VAR_ARRAY);
                for (INTPTR ipRes=0; ipRes<Suggestions.GetSize(); ipRes++) {
                    VarSuggestions[ipRes]=Suggestions[ipRes];
                }
                Word["suggestions"]=VarSuggestions;
            }
        }
    }
}
Esempio n. 10
0
	void OnValReadEnd(const CFSAString &szKey, CFSVar &Data) {
		if (szKey.IsEmpty()) {
			SubKeys("paragraphs", Data);
			m_Writer.ObjectEnd();
		} else if (szKey=="/paragraphs") {
			m_Writer.ArrayEnd();
			m_iCollectData++;
		} else if (KeyMatch(szKey, "/paragraphs/%d")) {
			SubKeys("sentences", Data);
			m_Writer.ObjectEnd();
			m_iCollectData--;
		} else if (KeyMatch(szKey, "/paragraphs/%d/sentences")) {
			m_Writer.ArrayEnd();
			m_iCollectData++;
		} else if (KeyMatch(szKey, "/paragraphs/%d/sentences/%d")) {
			if (Data.KeyExist("words")) {

				CFSVar &Words=Data["words"];
				CFSArray<CMorphInfos> WordsAnalysis;
				for (INTPTR ip=0; ip<Words.GetSize(); ip++) {
					const CFSVar &Word=Words[ip];
					CMorphInfos Analysis;
					Analysis.m_szWord=Word["text"].GetWString();
					const CFSVar &VarAnalysis=Word["analysis"];
					for (INTPTR ip2=0; ip2<VarAnalysis.GetSize(); ip2++) {
						const CFSVar &VarAnalysis1=VarAnalysis[ip2];
						CMorphInfo Analysis1;
						Analysis1.m_szRoot=VarAnalysis1["root"].GetWString();
						Analysis1.m_szEnding=VarAnalysis1["ending"].GetWString();
						Analysis1.m_szClitic=VarAnalysis1["clitic"].GetWString();
						Analysis1.m_cPOS=VarAnalysis1["partofspeech"].GetWString()[0];
						Analysis1.m_szForm=VarAnalysis1["form"].GetWString();
						Analysis.m_MorphInfo.AddItem(Analysis1);
					}
					WordsAnalysis.AddItem(Analysis);
				}

				WordsAnalysis=m_Disambiguator.Disambiguate(WordsAnalysis);
				RT_ASSERT(Words.GetSize()==WordsAnalysis.GetSize());

				for (INTPTR ip=0; ip<Words.GetSize(); ip++) {
					const CMorphInfos &Analysis=WordsAnalysis[ip];
					CFSVar VarAnalysis;
					VarAnalysis.Cast(CFSVar::VAR_ARRAY);
					for (INTPTR ipRes=0; ipRes<Analysis.m_MorphInfo.GetSize(); ipRes++) {
						const CMorphInfo &Analysis1=Analysis.m_MorphInfo[ipRes];
						CFSVar VarAnalysis1;
						VarAnalysis1["root"]=Analysis1.m_szRoot;
						VarAnalysis1["ending"]=Analysis1.m_szEnding;
						VarAnalysis1["clitic"]=Analysis1.m_szClitic;
						VarAnalysis1["partofspeech"]=CFSWString(Analysis1.m_cPOS);
						VarAnalysis1["form"]=Analysis1.m_szForm;
						VarAnalysis[ipRes]=VarAnalysis1;
					}
					Words[ip]["analysis"]=VarAnalysis;
				}

			}
			m_Writer.Val(Data);
			m_iCollectData--;
		}
	}
Esempio n. 11
0
CFSVar CJSONReader::ReadVal(const CFSAString &szKeyPath)
{
	OnValReadStart(szKeyPath);
	CFSVar Data;

	if (m_cCh=='[') {
		Data.Cast(CFSVar::VAR_ARRAY);
		GetChar(true);
		INTPTR ipPos=0;
		for (;;) {
			if (m_cCh==0) {
				throw CJSONException(FSTSTR("Unexpetcted EOF"));
			} else if (m_cCh==']') {
				GetChar(true);
				break;
			} else if (ipPos>0) {
				if (m_cCh==',') {
					GetChar(true);
				} else {
					throw CJSONException(FSTSTR("Missing ',' in array"));
				}
			}

			CFSAString szKey;
			szKey.Format("%zd", ipPos);
			CFSVar Data1=ReadVal(szKeyPath+"/"+szKey);
			if (m_iCollectData>0) {
				Data[ipPos]=Data1;
			}
			ipPos++;
		}
	} else if (m_cCh=='{') {
		Data.Cast(CFSVar::VAR_MAP);
		GetChar(true);
		INTPTR ipPos=0;
		for (;;) {
			if (m_cCh==0) {
				throw CJSONException(FSTSTR("Unexpetcted EOF"));
			} else if (m_cCh=='}') {
				GetChar(true);
				break;
			} else if (ipPos>0) {
				if (m_cCh==',') {
					GetChar(true);
				} else {
					throw CJSONException(FSTSTR("Missing ',' in map"));
				}
			}

			CFSAString szKey;
			if (m_cCh=='\"' || m_cCh=='\'') {
				szKey=ReadString();
			} else if (FSIsLetter(m_cCh)) {
				szKey=ReadText();
			} else {
				throw CJSONException(FSTSTR("Expected key"));
			}
			if (m_cCh==':') {
				GetChar(true);
			} else {
				throw CJSONException(FSTSTR("Expected ':'"));
			}
			CFSVar Data1=ReadVal(szKeyPath+"/"+szKey);
			if (m_iCollectData>0) {
				Data[szKey]=Data1;
			}
			ipPos++;
		}
	} else if (m_cCh=='\"' || m_cCh=='\'') {
		Data=ReadString();
	} else if ((m_cCh>='0' && m_cCh<='9') || FSStrChr("-+.", m_cCh)) {
		Data=ReadNumber();
	} else if (FSIsLetter(m_cCh)) {
		Data=ReadConst();
	} else if (!m_cCh) {
	} else {
		throw CJSONException(FSTSTR("Unknown value type"));
	}

	OnValReadEnd(szKeyPath, Data);
	return Data;
}