Пример #1
0
bool CLemmatizer::CreateParadigmCollection(bool bNorm, string& InputWordStr, bool capital, vector<CFormInfo>& Result) const
{
	FilterSrc(InputWordStr);
	vector<CAutomAnnotationInner>	FindResults;
	bool bFound = LemmatizeWord(InputWordStr, capital, m_bUsePrediction, FindResults, true);
		
	if (FindResults.empty())
	{
		// the word cannot be predicted or m_bUsePrediction is false
		return false;
	};
	
	AssignWeightIfNeed(FindResults);

	for (size_t i = 0; i < FindResults.size(); i++)
	{
		const CAutomAnnotationInner& A = FindResults[i];
		// if bNorm, then  ignore words which are not lemma
		if (   bNorm && (A.m_ItemNo!=0)) continue;
		
		CFormInfo P;
		P.Create(this, A, InputWordStr, bFound);

		Result.push_back(P);
	}

	return true;
}
Пример #2
0
// loading simple list of words from a file, lemmatizing it, and storing
void CMorphDictionary::LoadFrequentRoots(string path)
{
	string load_path = path+"predictroots.txt";
	FILE*fp =  fopen (load_path.c_str(), "r");
	if (!fp) return;
	char buffer[1000];
	while (fgets (buffer, 1000, fp))
	{
		string WordStr = buffer;
		Trim(WordStr);
		if (WordStr.empty()) continue;
		RmlMakeUpper (WordStr, m_Language);
		vector<CFindWordNode> FindResults;
		bool retval = FindWord(WordStr, true, false, false, FindResults);
		if (!retval) continue;
		set<size_t> UsedFlexia;
		for (size_t i=0; i<FindResults.size(); i++)
		{
				CFormInfo P;
				P.Create(	this, 
							FindResults[i].m_nBase,
							FindResults[i].m_LemmaInfo,
							FindResults[i].m_nFlex
						);
				for (size_t j=0; j < P.GetCount(); j++)
				{
					size_t FlexNo = P.GetFlexNoOfForm(j);
					if (UsedFlexia.find(FlexNo) == UsedFlexia.end())
					{
						CPredictEndRoot R;
						R.m_BaseNo = FindResults[i].m_nBase;
						R.m_LemmaInfo = FindResults[i].m_LemmaInfo;
						R.m_EndRoot = P.GetWordForm(j);
						reverse(R.m_EndRoot.begin(),R.m_EndRoot.end());
						
						R.m_FlexNo = FlexNo;
						UsedFlexia.insert(FlexNo);
						m_PredictEndRoots.push_back(R);
					};
				};
			

		};

	};
	fclose(fp);
	sort(m_PredictEndRoots.begin(), m_PredictEndRoots.end());
};