list<Analysis *> * VfstAnalyzer::analyze(const wchar_t * word, size_t wlen, bool fullMorphology) { if (wlen > LIBVOIKKO_MAX_WORD_CHARS) { return new list<Analysis *>(); } wchar_t * wordLowerUcs4 = new wchar_t[wlen]; memcpy(wordLowerUcs4, word, wlen * sizeof(wchar_t)); voikko_set_case(CT_ALL_LOWER, wordLowerUcs4, wlen); list<Analysis *> * analysisList = new list<Analysis *>(); if (transducer->prepare(configuration, wordLowerUcs4, wlen)) { int analysisCount = 0; int16_t weight; while (++analysisCount < MAX_ANALYSIS_COUNT && transducer->next(configuration, outputBuffer, BUFFER_SIZE, &weight)) { Analysis * analysis = new Analysis(); if (fullMorphology) { analysis->addAttribute(Analysis::Key::FSTOUTPUT, StringUtils::copy(outputBuffer)); } stringstream ss; ss << setprecision(9) << logWeightToProb(weight); string weightStr = ss.str(); analysis->addAttribute(Analysis::Key::WEIGHT, StringUtils::ucs4FromUtf8(weightStr.c_str())); analysisList->push_back(analysis); } } delete[] wordLowerUcs4; return analysisList; }
list<Analysis *> * HfstAnalyzer::analyze(const char * word, bool fullMorphology) { //cerr << "HfstAnalyzer::analyze (" << string(word) << ")" << endl; size_t wlen = strlen(word); if (wlen > LIBVOIKKO_MAX_WORD_CHARS) { return new list<Analysis *>(); } list<Analysis *> * analysisList = new list<Analysis *>(); /* I know this is the wrong thing, but going to do it anyway */ std::string str(word); char * writable = new char[str.size() + 1]; std::copy(str.begin(), str.end(), writable); writable[str.size()] = '\0'; hfst_ospell::AnalysisQueue q = t->lookup(writable); while(q.size() > 0) { hfst_ospell::StringWeightPair pair = q.top(); string analysis = pair.first; string tags = analysis.substr(analysis.find("+"),analysis.length()-1); Analysis * a = new Analysis(); if (fullMorphology) { string lemma = analysis.substr(0,analysis.find("+")); a->addAttribute(Analysis::Key::BASEFORM, StringUtils::ucs4FromUtf8(lemma.c_str())); } a->addAttribute(Analysis::Key::FSTOUTPUT, StringUtils::ucs4FromUtf8(tags.c_str())); analysisList->push_back(a); q.pop(); } return analysisList; }