C++ (Cpp) WordAttributes Exemples, WordAttributes C++ (Cpp) Exemples

Exemple #1

0

Afficher le fichier

Fichier : suggest.cpp Projet : AOSP-Minimal/platform_packages_inputmethods_LatinIME

/**
 * Creates a new dicNode that represents a space insertion at the end of the input dicNode. Also
 * incorporates the unigram / bigram score for the ending word into the new dicNode.
 */
void Suggest::createNextWordDicNode(DicTraverseSession *traverseSession, DicNode *dicNode,
        const bool spaceSubstitution) const {
    const WordAttributes wordAttributes =
            traverseSession->getDictionaryStructurePolicy()->getWordAttributesInContext(
                    dicNode->getPrevWordIds(), dicNode->getWordId(),
                    traverseSession->getMultiBigramMap());
    if (SuggestionsOutputUtils::shouldBlockWord(traverseSession->getSuggestOptions(),
            dicNode, wordAttributes, false /* isLastWord */)) {
        return;
    }

    if (!TRAVERSAL->isGoodToTraverseNextWord(dicNode, wordAttributes.getProbability())) {
        return;
    }

    // Create a non-cached node here.
    DicNode newDicNode;
    DicNodeUtils::initAsRootWithPreviousWord(
            traverseSession->getDictionaryStructurePolicy(), dicNode, &newDicNode);
    const CorrectionType correctionType = spaceSubstitution ?
            CT_NEW_WORD_SPACE_SUBSTITUTION : CT_NEW_WORD_SPACE_OMISSION;
    Weighting::addCostAndForwardInputIndex(WEIGHTING, correctionType, traverseSession, dicNode,
            &newDicNode, traverseSession->getMultiBigramMap());
    if (newDicNode.getCompoundDistance() < static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) {
        // newDicNode is worth continuing to traverse.
        // CAVEAT: This pruning is important for speed. Remove this when we can afford not to prune
        // here because here is not the right place to do pruning. Pruning should take place only
        // in DicNodePriorityQueue.
        traverseSession->getDicTraverseCache()->copyPushNextActive(&newDicNode);
    }
}

Exemple #2

0

Afficher le fichier

Fichier : ver4_patricia_trie_policy.cpp Projet : AOSP-Minimal/platform_packages_inputmethods_LatinIME

int Ver4PatriciaTriePolicy::getProbabilityOfWord(const WordIdArrayView prevWordIds,
        const int wordId) const {
    if (wordId == NOT_A_WORD_ID || prevWordIds.contains(NOT_A_WORD_ID)) {
        return NOT_A_PROBABILITY;
    }
    const WordAttributes wordAttributes =
            mBuffers->getLanguageModelDictContent()->getWordAttributes(prevWordIds, wordId,
                    true /* mustMatchAllPrevWords */, mHeaderPolicy);
    if (wordAttributes.isBlacklisted() || wordAttributes.isNotAWord()) {
        return NOT_A_PROBABILITY;
    }
    return wordAttributes.getProbability();
}

Exemple #3

0

Afficher le fichier

Fichier : dictionary_utils.cpp Projet : Phonemetra/TurboKeyboard

/* static */ int DictionaryUtils::getMaxProbabilityOfExactMatches(
        const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
        const CodePointArrayView codePoints) {
    std::vector<DicNode> current;
    std::vector<DicNode> next;

    // No ngram context.
    NgramContext emptyNgramContext;
    WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
    const WordIdArrayView prevWordIds = emptyNgramContext.getPrevWordIds(
            dictionaryStructurePolicy, &prevWordIdArray, false /* tryLowerCaseSearch */);
    current.emplace_back();
    DicNodeUtils::initAsRoot(dictionaryStructurePolicy, prevWordIds, &current.front());
    for (const int codePoint : codePoints) {
        // The base-lower input is used to ignore case errors and accent errors.
        const int baseLowerCodePoint = CharUtils::toBaseLowerCase(codePoint);
        for (const DicNode &dicNode : current) {
            if (dicNode.isInDigraph() && dicNode.getNodeCodePoint() == baseLowerCodePoint) {
                next.emplace_back(dicNode);
                next.back().advanceDigraphIndex();
                continue;
            }
            processChildDicNodes(dictionaryStructurePolicy, baseLowerCodePoint, &dicNode, &next);
        }
        current.clear();
        current.swap(next);
    }

    int maxProbability = NOT_A_PROBABILITY;
    for (const DicNode &dicNode : current) {
        if (!dicNode.isTerminalDicNode()) {
            continue;
        }
        const WordAttributes wordAttributes =
                dictionaryStructurePolicy->getWordAttributesInContext(dicNode.getPrevWordIds(),
                        dicNode.getWordId(), nullptr /* multiBigramMap */);
        // dicNode can contain case errors, accent errors, intentional omissions or digraphs.
        maxProbability = std::max(maxProbability, wordAttributes.getProbability());
    }
    return maxProbability;
}

Exemple #4

0

Afficher le fichier

Fichier : suggestions_output_utils.cpp Projet : Phonemetra/TurboKeyboard

/* static */ bool SuggestionsOutputUtils::shouldBlockWord(
        const SuggestOptions *const suggestOptions, const DicNode *const terminalDicNode,
        const WordAttributes wordAttributes, const bool isLastWord) {
    const bool currentWordExactMatch =
            ErrorTypeUtils::isExactMatch(terminalDicNode->getContainedErrorTypes());
    // When we have to block offensive words, non-exact matched offensive words should not be
    // output.
    const bool shouldBlockOffensiveWords = suggestOptions->blockOffensiveWords();

    const bool isBlockedOffensiveWord = shouldBlockOffensiveWords &&
            wordAttributes.isPossiblyOffensive();

    // This function is called in two situations:
    //
    // 1) At the end of a search, in which case terminalDicNode will point to the last DicNode
    //    of the search, and isLastWord will be true.
    //                    "f**k"
    //                        |
    //                        \ terminalDicNode (isLastWord=true, currentWordExactMatch=true)
    //    In this case, if the current word is an exact match, we will always let the word
    //    through, even if the user is blocking offensive words (it's exactly what they typed!)
    //
    // 2) In the middle of the search, when we hit a terminal node, to decide whether or not
    //    to start a new search at root, to try to match the rest of the input. In this case,
    //    terminalDicNode will point to the terminal node we just hit, and isLastWord will be
    //    false.
    //                    "fuckvthis"
    //                        |
    //                        \ terminalDicNode (isLastWord=false, currentWordExactMatch=true)
    //
    // In this case, we should NOT allow the match through (correcting "fuckthis" to "f**k this"
    // when offensive words are blocked would be a bad idea).
    //
    // In the case of a multi-word correction where the offensive word is typed last (eg.
    // for the input "allfuck"), this function will be called with isLastWord==true, but
    // currentWordExactMatch==false. So we are OK in this case as well.
    //                    "allfuck"
    //                           |
    //                           \ terminalDicNode (isLastWord=true, currentWordExactMatch=false)
    if (isLastWord && currentWordExactMatch) {
        return false;
    } else {
        return isBlockedOffensiveWord;
    }
}

Exemple #5

0

Afficher le fichier

Fichier : ver4_patricia_trie_policy.cpp Projet : AOSP-Minimal/platform_packages_inputmethods_LatinIME

const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
        const CodePointArrayView wordCodePoints) const {
    const int wordId = getWordId(wordCodePoints, false /* forceLowerCaseSearch */);
    if (wordId == NOT_A_WORD_ID) {
        AKLOGE("getWordProperty is called for invalid word.");
        return WordProperty();
    }
    const LanguageModelDictContent *const languageModelDictContent =
            mBuffers->getLanguageModelDictContent();
    // Fetch ngram information.
    std::vector<NgramProperty> ngrams;
    int ngramTargetCodePoints[MAX_WORD_LENGTH];
    int ngramPrevWordsCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH];
    int ngramPrevWordsCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
    bool ngramPrevWordIsBeginningOfSentense[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
    for (const auto entry : languageModelDictContent->exportAllNgramEntriesRelatedToWord(
            mHeaderPolicy, wordId)) {
        const int codePointCount = getCodePointsAndReturnCodePointCount(entry.getTargetWordId(),
                MAX_WORD_LENGTH, ngramTargetCodePoints);
        const WordIdArrayView prevWordIds = entry.getPrevWordIds();
        for (size_t i = 0; i < prevWordIds.size(); ++i) {
            ngramPrevWordsCodePointCount[i] = getCodePointsAndReturnCodePointCount(prevWordIds[i],
                       MAX_WORD_LENGTH, ngramPrevWordsCodePoints[i]);
            ngramPrevWordIsBeginningOfSentense[i] = languageModelDictContent->getProbabilityEntry(
                    prevWordIds[i]).representsBeginningOfSentence();
            if (ngramPrevWordIsBeginningOfSentense[i]) {
                ngramPrevWordsCodePointCount[i] = CharUtils::removeBeginningOfSentenceMarker(
                        ngramPrevWordsCodePoints[i], ngramPrevWordsCodePointCount[i]);
            }
        }
        const NgramContext ngramContext(ngramPrevWordsCodePoints, ngramPrevWordsCodePointCount,
                ngramPrevWordIsBeginningOfSentense, prevWordIds.size());
        const ProbabilityEntry ngramProbabilityEntry = entry.getProbabilityEntry();
        const HistoricalInfo *const historicalInfo = ngramProbabilityEntry.getHistoricalInfo();
        // TODO: Output flags in WordAttributes.
        ngrams.emplace_back(ngramContext,
                CodePointArrayView(ngramTargetCodePoints, codePointCount).toVector(),
                entry.getWordAttributes().getProbability(), *historicalInfo);
    }
    // Fetch shortcut information.
    std::vector<UnigramProperty::ShortcutProperty> shortcuts;
    int shortcutPos = getShortcutPositionOfWord(wordId);
    if (shortcutPos != NOT_A_DICT_POS) {
        int shortcutTarget[MAX_WORD_LENGTH];
        const ShortcutDictContent *const shortcutDictContent =
                mBuffers->getShortcutDictContent();
        bool hasNext = true;
        while (hasNext) {
            int shortcutTargetLength = 0;
            int shortcutProbability = NOT_A_PROBABILITY;
            shortcutDictContent->getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH, shortcutTarget,
                    &shortcutTargetLength, &shortcutProbability, &hasNext, &shortcutPos);
            shortcuts.emplace_back(
                    CodePointArrayView(shortcutTarget, shortcutTargetLength).toVector(),
                    shortcutProbability);
        }
    }
    const WordAttributes wordAttributes = languageModelDictContent->getWordAttributes(
            WordIdArrayView(), wordId, true /* mustMatchAllPrevWords */, mHeaderPolicy);
    const ProbabilityEntry probabilityEntry = languageModelDictContent->getProbabilityEntry(wordId);
    const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
    const UnigramProperty unigramProperty(probabilityEntry.representsBeginningOfSentence(),
            wordAttributes.isNotAWord(), wordAttributes.isBlacklisted(),
            wordAttributes.isPossiblyOffensive(), wordAttributes.getProbability(),
            *historicalInfo, std::move(shortcuts));
    return WordProperty(wordCodePoints.toVector(), unigramProperty, ngrams);
}

Exemple #6

0

Afficher le fichier

Fichier : suggestions_output_utils.cpp Projet : Phonemetra/TurboKeyboard

/* static */ void SuggestionsOutputUtils::outputSuggestionsOfDicNode(
        const Scoring *const scoringPolicy, DicTraverseSession *traverseSession,
        const DicNode *const terminalDicNode, const float weightOfLangModelVsSpatialModel,
        const bool boostExactMatches, const bool forceCommitMultiWords,
        const bool outputSecondWordFirstLetterInputIndex,
        SuggestionResults *const outSuggestionResults) {
    if (DEBUG_GEO_FULL) {
        terminalDicNode->dump("OUT:");
    }
    const float doubleLetterCost =
            scoringPolicy->getDoubleLetterDemotionDistanceCost(terminalDicNode);
    const float compoundDistance =
            terminalDicNode->getCompoundDistance(weightOfLangModelVsSpatialModel)
                    + doubleLetterCost;
    const WordAttributes wordAttributes = traverseSession->getDictionaryStructurePolicy()
            ->getWordAttributesInContext(terminalDicNode->getPrevWordIds(),
                    terminalDicNode->getWordId(), nullptr /* multiBigramMap */);
    const bool isExactMatch =
            ErrorTypeUtils::isExactMatch(terminalDicNode->getContainedErrorTypes());
    const bool isExactMatchWithIntentionalOmission =
            ErrorTypeUtils::isExactMatchWithIntentionalOmission(
                    terminalDicNode->getContainedErrorTypes());
    // TODO: Decide whether the word should be auto-corrected or not here.
    const bool isAppropriateForAutoCorrection = !ErrorTypeUtils::isMissingExplicitAccent(
            terminalDicNode->getContainedErrorTypes());
    const int outputTypeFlags =
            (wordAttributes.isPossiblyOffensive() ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0)
            | ((isExactMatch && boostExactMatches) ? Dictionary::KIND_FLAG_EXACT_MATCH : 0)
            | (isExactMatchWithIntentionalOmission ?
                    Dictionary::KIND_FLAG_EXACT_MATCH_WITH_INTENTIONAL_OMISSION : 0)
            | (isAppropriateForAutoCorrection ?
                    Dictionary::KIND_FLAG_APPROPRIATE_FOR_AUTOCORRECTION : 0);
    // Entries that are blacklisted or do not represent a word should not be output.
    const bool isValidWord = !(wordAttributes.isBlacklisted() || wordAttributes.isNotAWord());

    const bool shouldBlockThisWord = shouldBlockWord(traverseSession->getSuggestOptions(),
            terminalDicNode, wordAttributes, true /* isLastWord */);

    // Increase output score of top typing suggestion to ensure autocorrection.
    // TODO: Better integration with java side autocorrection logic.
    const int finalScore = scoringPolicy->calculateFinalScore(
            compoundDistance, traverseSession->getInputSize(),
            terminalDicNode->getContainedErrorTypes(),
            (forceCommitMultiWords && terminalDicNode->hasMultipleWords()),
            boostExactMatches, wordAttributes.getProbability() == 0);

    // Don't output invalid or blocked offensive words. However, we still need to submit their
    // shortcuts if any.
    if (isValidWord && !shouldBlockThisWord) {
        int codePoints[MAX_WORD_LENGTH];
        terminalDicNode->outputResult(codePoints);
        const int indexToPartialCommit = outputSecondWordFirstLetterInputIndex ?
                terminalDicNode->getSecondWordFirstInputIndex(
                        traverseSession->getProximityInfoState(0)) :
                NOT_AN_INDEX;
        outSuggestionResults->addSuggestion(codePoints,
                terminalDicNode->getTotalNodeCodePointCount(),
                finalScore, Dictionary::KIND_CORRECTION | outputTypeFlags,
                indexToPartialCommit, computeFirstWordConfidence(terminalDicNode));
    }

    // Output shortcuts.
    // Shortcut is not supported for multiple words suggestions.
    // TODO: Check shortcuts during traversal for multiple words suggestions.
    if (!terminalDicNode->hasMultipleWords()) {
        BinaryDictionaryShortcutIterator shortcutIt =
                traverseSession->getDictionaryStructurePolicy()->getShortcutIterator(
                        terminalDicNode->getWordId());
        const bool sameAsTyped = scoringPolicy->sameAsTyped(traverseSession, terminalDicNode);
        outputShortcuts(&shortcutIt, finalScore, sameAsTyped, outSuggestionResults);
    }
}