void EnchantSpellingAlternativesPrivate::setEnchantSpellingAlternatives( Token* token, MorphoSyntacticData* tokenData, FsaStringsPool& sp) { // try to find simple Uncapitalization MORPHOLOGINIT; // FIXME Conditions below could be process unit parameters const LimaString& tokenStr=token->stringForm(); if (token->status().getAlphaCapital() == T_CAPITAL || token->status().getAlphaCapital() == T_CAPITAL_1ST || token->status().getAlphaCapital() == T_CAPITAL_SMALL || token->status().isAlphaConcatAbbrev() || token->status().isAlphaHyphen() || token->status().isAlphaPossessive() || tokenStr.toUpper() == tokenStr) { return; } std::vector<std::string> suggestions = m_enchantDictionary->suggest(tokenStr.toUtf8().constData()); for (std::vector<std::string>::const_iterator it = suggestions.begin(); it != suggestions.end();it++) { LimaString correction = LimaString::fromUtf8((*it).c_str()); // FIXME Conditions below could be process unit parameters if ( correction.size() > 1 && correction != tokenStr ) { DictionaryEntry* entry = new DictionaryEntry(m_dictionary->getEntry(correction)); MorphoSyntacticDataHandler lingInfosHandler(*tokenData, SPELLING_ALTERNATIVE); if (!entry->isEmpty()) { LINFO << "EnchantSpellingAlternativesPrivate::setEnchantSpellingAlternatives correcting" << tokenStr << "into" << correction; // add orthographic alternative to Token; StringsPoolIndex idx=sp[correction]; token->addOrthographicAlternatives(idx); if (entry->hasLingInfos()) { entry->parseLingInfos(&lingInfosHandler); } } else { delete entry; } } } }
void OrthographicAlternatives::createAlternative( Token* srcToken, MorphoSyntacticData* tokenData, LimaString& str, AnalysisDict::AbstractAnalysisDictionary* dictionary, StringsPool& sp) { MORPHOLOGINIT; LDEBUG << "OrthographicAlternatives::createAlternative" << str; DictionaryEntry* dicoEntry = new DictionaryEntry(dictionary->getEntry(str)); if (!dicoEntry->isEmpty()) { // add orthographic alternative to Token; StringsPoolIndex infl=sp[str]; Token* altToken=new Token(infl,str,srcToken->position(),srcToken->length(),new TStatus(*(srcToken->status()))); altToken->setDictionaryEntry(dicoEntry); srcToken->addOrthographicAlternative(altToken); tokenData->appendLingInfo(infl,dicoEntry,ORTHOGRAPHIC_ALTERNATIVE,sp); // if entry has other accented forms, // keep them ("PARIS" -> "paris" -> "Paris") if (dicoEntry->hasAccented()) { dicoEntry->reset(); Lima::LimaString alternativeStr = dicoEntry->nextAccented(); while (alternativeStr.size() != 0) { // give it its simple word entry into dictionary DictionaryEntry* altDicoEntry = new DictionaryEntry(dictionary->getEntry(alternativeStr)); StringsPoolIndex infl2=sp[alternativeStr]; tokenData->appendLingInfo(infl2,altDicoEntry,ORTHOGRAPHIC_ALTERNATIVE,sp); // add orthographic alternative to Token Token* altToken2=new Token(infl2,alternativeStr,srcToken->position(),srcToken->length(),new TStatus(*(srcToken->status()))); altToken2->setDictionaryEntry(altDicoEntry); srcToken->addOrthographicAlternative(altToken2); alternativeStr = dicoEntry->nextAccented(); } } } else { delete dicoEntry; } }