예제 #1
0
list<Analysis *> * VfstAnalyzer::analyze(const wchar_t * word, size_t wlen, bool fullMorphology) {
	if (wlen > LIBVOIKKO_MAX_WORD_CHARS) {
		return new list<Analysis *>();
	}
	
	wchar_t * wordLowerUcs4 = new wchar_t[wlen];
	memcpy(wordLowerUcs4, word, wlen * sizeof(wchar_t));
	voikko_set_case(CT_ALL_LOWER, wordLowerUcs4, wlen);
	
	list<Analysis *> * analysisList = new list<Analysis *>();
	if (transducer->prepare(configuration, wordLowerUcs4, wlen)) {
		int analysisCount = 0;
		int16_t weight;
		while (++analysisCount < MAX_ANALYSIS_COUNT && transducer->next(configuration, outputBuffer, BUFFER_SIZE, &weight)) {
			Analysis * analysis = new Analysis();
			if (fullMorphology) {
				analysis->addAttribute(Analysis::Key::FSTOUTPUT, StringUtils::copy(outputBuffer));
			}
			stringstream ss;
			ss << setprecision(9) << logWeightToProb(weight);
			string weightStr = ss.str();
			analysis->addAttribute(Analysis::Key::WEIGHT, StringUtils::ucs4FromUtf8(weightStr.c_str()));
			analysisList->push_back(analysis);
		}
	}
	
	delete[] wordLowerUcs4;
	return analysisList;
}
예제 #2
0
list<Analysis *> * HfstAnalyzer::analyze(const char * word, bool fullMorphology) {
	//cerr << "HfstAnalyzer::analyze (" << string(word) << ")" << endl;
	size_t wlen = strlen(word);
	if (wlen > LIBVOIKKO_MAX_WORD_CHARS) {
		return new list<Analysis *>();
	}
	list<Analysis *> * analysisList = new list<Analysis *>();

	/* I know this is the wrong thing, but going to do it anyway */
	std::string str(word);
	char * writable = new char[str.size() + 1];
	std::copy(str.begin(), str.end(), writable);
	writable[str.size()] = '\0';

	hfst_ospell::AnalysisQueue q = t->lookup(writable);

	while(q.size() > 0) {
		hfst_ospell::StringWeightPair pair = q.top();
		string analysis = pair.first;
		string tags = analysis.substr(analysis.find("+"),analysis.length()-1);
		Analysis * a = new Analysis();
		if (fullMorphology) {
			string lemma = analysis.substr(0,analysis.find("+"));
			a->addAttribute(Analysis::Key::BASEFORM,  StringUtils::ucs4FromUtf8(lemma.c_str()));
		}
		a->addAttribute(Analysis::Key::FSTOUTPUT,  StringUtils::ucs4FromUtf8(tags.c_str()));
		analysisList->push_back(a);
		q.pop();
	}

	return analysisList;
}