Exemple #1
0
// add morphological analysis to CFSArray containing the sentence
void addAnalysis(CLinguistic& linguistic, CDisambiguator& disambiguator, CFSArray<CFSVar>& words, const bool disambiguate) {
    //CFSVar &words=Data["words"];
    CFSArray<CPTWord> PTWords;
    for (INTPTR ip=0; ip<words.GetSize(); ip++) {
        PTWords.AddItem(words[ip]["text"].GetWString());
    }
    // perform analysis and optional disambiguation
    CFSArray<CMorphInfos> MorphResults=linguistic.AnalyzeSentense(PTWords);
    if (disambiguate) {
        MorphResults=disambiguator.Disambiguate(MorphResults);
    }
    // collect the analysis results
    ASSERT(PTWords.GetSize()==MorphResults.GetSize());
    for (INTPTR ip=0; ip<words.GetSize(); ip++) {
        const CFSArray<CMorphInfo> &Analysis=MorphResults[ip].m_MorphInfo;
        CFSVar VarAnalysis;
        VarAnalysis.Cast(CFSVar::VAR_ARRAY);
        for (INTPTR ipRes=0; ipRes<Analysis.GetSize(); ipRes++) {
            const CMorphInfo &Analysis1=Analysis[ipRes];
            CFSVar VarAnalysis1;
            VarAnalysis1["root"]=Analysis1.m_szRoot;
            VarAnalysis1["ending"]=Analysis1.m_szEnding;
            VarAnalysis1["clitic"]=Analysis1.m_szClitic;
            VarAnalysis1["partofspeech"]=CFSWString(Analysis1.m_cPOS);
            VarAnalysis1["form"]=Analysis1.m_szForm;
            VarAnalysis[ipRes]=VarAnalysis1;
        }
        words[ip]["analysis"]=VarAnalysis;
    }
}
Exemple #2
0
// convert StringVector input to CFSArray input required by vabamorf base library.
CFSArray<CFSVar> convertInput(StringVector const& sentence) {
    CFSArray<CFSVar> data(sentence.size());
    for (size_t i=0 ; i<sentence.size() ; ++i) {
        CFSVar wordData;
        wordData.Cast(CFSVar::VAR_MAP);
        wordData["text"] = sentence[i].c_str();
        data.AddItem(wordData);
    }
    return data;
}
Exemple #3
0
// synthesize words based on lemma, pos and form
void synthesizeWord(CLinguistic& linguistic, CFSVar &Data) {
    const CFSVar &Word=Data;

    CMorphInfo Input;
    Input.m_szRoot=Word["lemma"].GetWString();
    Input.m_cPOS=Word["partofspeech"].GetWString()[0];
    if (!Input.m_cPOS) {
        Input.m_cPOS='*';
    }
    Input.m_szForm=Word["form"].GetWString();
    CFSWString szHint=Word["hint"].GetWString();

    CFSArray<CMorphInfo> Result=linguistic.Synthesize(Input, szHint);
    if (Result.GetSize()) {
        CFSVar Text;
        Text.Cast(CFSVar::VAR_ARRAY);
        for (INTPTR ipRes=0; ipRes<Result.GetSize(); ipRes++) {
            Text[ipRes]=Result[ipRes].m_szRoot+Result[ipRes].m_szEnding+Result[ipRes].m_szClitic;
        }
        Data["text"]=Text;
    }
}
Exemple #4
0
// spellcheck the words and add suggestions
void addSuggestions(CLinguistic& linguistic, CFSArray<CFSVar>& words, const bool suggest) {
    for (INTPTR ip=0; ip<words.GetSize(); ip++) {
        CFSVar &Word=words[ip];
        CPTWord PTWord=Word["text"].GetWString();
        PTWord.RemoveHyphens();
        PTWord.RemovePunctuation();
        PTWord.Trim();
        if (PTWord.m_szWord.IsEmpty() || linguistic.SpellWord(PTWord.m_szWord)==SPL_NOERROR) {
            Word["spelling"]=true;
        } else {
            Word["spelling"]=false;
            if (suggest) {
                CFSWStringArray Suggestions=linguistic.Suggest(PTWord.m_szWord);
                CFSVar VarSuggestions;
                VarSuggestions.Cast(CFSVar::VAR_ARRAY);
                for (INTPTR ipRes=0; ipRes<Suggestions.GetSize(); ipRes++) {
                    VarSuggestions[ipRes]=Suggestions[ipRes];
                }
                Word["suggestions"]=VarSuggestions;
            }
        }
    }
}
Exemple #5
0
	void OnValReadEnd(const CFSAString &szKey, CFSVar &Data) {
		if (szKey.IsEmpty()) {
			SubKeys("paragraphs", Data);
			m_Writer.ObjectEnd();
		} else if (szKey=="/paragraphs") {
			m_Writer.ArrayEnd();
			m_iCollectData++;
		} else if (KeyMatch(szKey, "/paragraphs/%d")) {
			SubKeys("sentences", Data);
			m_Writer.ObjectEnd();
			m_iCollectData--;
		} else if (KeyMatch(szKey, "/paragraphs/%d/sentences")) {
			m_Writer.ArrayEnd();
			m_iCollectData++;
		} else if (KeyMatch(szKey, "/paragraphs/%d/sentences/%d")) {
			if (Data.KeyExist("words")) {

				CFSVar &Words=Data["words"];
				CFSArray<CMorphInfos> WordsAnalysis;
				for (INTPTR ip=0; ip<Words.GetSize(); ip++) {
					const CFSVar &Word=Words[ip];
					CMorphInfos Analysis;
					Analysis.m_szWord=Word["text"].GetWString();
					const CFSVar &VarAnalysis=Word["analysis"];
					for (INTPTR ip2=0; ip2<VarAnalysis.GetSize(); ip2++) {
						const CFSVar &VarAnalysis1=VarAnalysis[ip2];
						CMorphInfo Analysis1;
						Analysis1.m_szRoot=VarAnalysis1["root"].GetWString();
						Analysis1.m_szEnding=VarAnalysis1["ending"].GetWString();
						Analysis1.m_szClitic=VarAnalysis1["clitic"].GetWString();
						Analysis1.m_cPOS=VarAnalysis1["partofspeech"].GetWString()[0];
						Analysis1.m_szForm=VarAnalysis1["form"].GetWString();
						Analysis.m_MorphInfo.AddItem(Analysis1);
					}
					WordsAnalysis.AddItem(Analysis);
				}

				WordsAnalysis=m_Disambiguator.Disambiguate(WordsAnalysis);
				RT_ASSERT(Words.GetSize()==WordsAnalysis.GetSize());

				for (INTPTR ip=0; ip<Words.GetSize(); ip++) {
					const CMorphInfos &Analysis=WordsAnalysis[ip];
					CFSVar VarAnalysis;
					VarAnalysis.Cast(CFSVar::VAR_ARRAY);
					for (INTPTR ipRes=0; ipRes<Analysis.m_MorphInfo.GetSize(); ipRes++) {
						const CMorphInfo &Analysis1=Analysis.m_MorphInfo[ipRes];
						CFSVar VarAnalysis1;
						VarAnalysis1["root"]=Analysis1.m_szRoot;
						VarAnalysis1["ending"]=Analysis1.m_szEnding;
						VarAnalysis1["clitic"]=Analysis1.m_szClitic;
						VarAnalysis1["partofspeech"]=CFSWString(Analysis1.m_cPOS);
						VarAnalysis1["form"]=Analysis1.m_szForm;
						VarAnalysis[ipRes]=VarAnalysis1;
					}
					Words[ip]["analysis"]=VarAnalysis;
				}

			}
			m_Writer.Val(Data);
			m_iCollectData--;
		}
	}
Exemple #6
0
CFSVar CJSONReader::ReadVal(const CFSAString &szKeyPath)
{
	OnValReadStart(szKeyPath);
	CFSVar Data;

	if (m_cCh=='[') {
		Data.Cast(CFSVar::VAR_ARRAY);
		GetChar(true);
		INTPTR ipPos=0;
		for (;;) {
			if (m_cCh==0) {
				throw CJSONException(FSTSTR("Unexpetcted EOF"));
			} else if (m_cCh==']') {
				GetChar(true);
				break;
			} else if (ipPos>0) {
				if (m_cCh==',') {
					GetChar(true);
				} else {
					throw CJSONException(FSTSTR("Missing ',' in array"));
				}
			}

			CFSAString szKey;
			szKey.Format("%zd", ipPos);
			CFSVar Data1=ReadVal(szKeyPath+"/"+szKey);
			if (m_iCollectData>0) {
				Data[ipPos]=Data1;
			}
			ipPos++;
		}
	} else if (m_cCh=='{') {
		Data.Cast(CFSVar::VAR_MAP);
		GetChar(true);
		INTPTR ipPos=0;
		for (;;) {
			if (m_cCh==0) {
				throw CJSONException(FSTSTR("Unexpetcted EOF"));
			} else if (m_cCh=='}') {
				GetChar(true);
				break;
			} else if (ipPos>0) {
				if (m_cCh==',') {
					GetChar(true);
				} else {
					throw CJSONException(FSTSTR("Missing ',' in map"));
				}
			}

			CFSAString szKey;
			if (m_cCh=='\"' || m_cCh=='\'') {
				szKey=ReadString();
			} else if (FSIsLetter(m_cCh)) {
				szKey=ReadText();
			} else {
				throw CJSONException(FSTSTR("Expected key"));
			}
			if (m_cCh==':') {
				GetChar(true);
			} else {
				throw CJSONException(FSTSTR("Expected ':'"));
			}
			CFSVar Data1=ReadVal(szKeyPath+"/"+szKey);
			if (m_iCollectData>0) {
				Data[szKey]=Data1;
			}
			ipPos++;
		}
	} else if (m_cCh=='\"' || m_cCh=='\'') {
		Data=ReadString();
	} else if ((m_cCh>='0' && m_cCh<='9') || FSStrChr("-+.", m_cCh)) {
		Data=ReadNumber();
	} else if (FSIsLetter(m_cCh)) {
		Data=ReadConst();
	} else if (!m_cCh) {
	} else {
		throw CJSONException(FSTSTR("Unknown value type"));
	}

	OnValReadEnd(szKeyPath, Data);
	return Data;
}