bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy, BufferWithExtendableBuffer *const bufferToWrite, int *const outUnigramCount, int *const outBigramCount) { DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy); readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); DynamicPatriciaTrieGcEventListeners ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted( headerPolicy, this, mBuffer, mNeedsToDecay); if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner( &traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) { return false; } if (mNeedsToDecay && traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted .getValidUnigramCount() > ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC) { // TODO: Remove more unigrams. } readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability traversePolicyToUpdateBigramProbability(mBigramPolicy); if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner( &traversePolicyToUpdateBigramProbability)) { return false; } if (mNeedsToDecay && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount() > ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC) { // TODO: Remove more bigrams. } // Mapping from positions in mBuffer to positions in bufferToWrite. DictPositionRelocationMap dictPositionRelocationMap; readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer traversePolicyToPlaceAndWriteValidPtNodesToBuffer(this, bufferToWrite, &dictPositionRelocationMap); if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner( &traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) { return false; } // Create policy instance for the GCed dictionary. DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite); DynamicBigramListPolicy newDictBigramPolicy(headerPolicy, bufferToWrite, &newDictShortcutPolicy, mNeedsToDecay); // Create reading helper for the GCed dictionary. DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictBigramPolicy, &newDictShortcutPolicy); newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields traversePolicyToUpdateAllPositionFields(this, &newDictBigramPolicy, bufferToWrite, &dictPositionRelocationMap); if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner( &traversePolicyToUpdateAllPositionFields)) { return false; } *outUnigramCount = traversePolicyToUpdateAllPositionFields.getUnigramCount(); *outBigramCount = traversePolicyToUpdateAllPositionFields.getBigramCount(); return true; }
int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints, int *const outCodePointCount) { *outCodePointCount = 0; if (token == 0) { mTerminalPtNodePositionsForIteratingWords.clear(); DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy( &mTerminalPtNodePositionsForIteratingWords); DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); readingHelper.initWithPtNodeArrayPos(getRootPosition()); readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(&traversePolicy); } const int terminalPtNodePositionsVectorSize = static_cast<int>(mTerminalPtNodePositionsForIteratingWords.size()); if (token < 0 || token >= terminalPtNodePositionsVectorSize) { AKLOGE("Given token %d is invalid.", token); return 0; } const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token]; const PtNodeParams ptNodeParams = mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(terminalPtNodePos); *outCodePointCount = getCodePointsAndReturnCodePointCount(ptNodeParams.getTerminalId(), MAX_WORD_LENGTH, outCodePoints); const int nextToken = token + 1; if (nextToken >= terminalPtNodePositionsVectorSize) { // All words have been iterated. mTerminalPtNodePositionsForIteratingWords.clear(); return 0; } return nextToken; }
bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int length, const int probability) { if (!mBuffer->isUpdatable()) { AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary."); return false; } if (mBufferWithExtendableBuffer.getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { AKLOGE("The dictionary is too large to dynamically update."); return false; } DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, getBigramsStructurePolicy(), getShortcutsStructurePolicy()); readingHelper.initWithPtNodeArrayPos(getRootPosition()); DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()); bool addedNewUnigram = false; if (writingHelper.addUnigramWord(&readingHelper, word, length, probability, &addedNewUnigram)) { if (addedNewUnigram) { mUnigramCount++; } return true; } else { return false; } }
bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition( const DynamicPatriciaTrieNodeReader *const originalNode, const int movedPos, const int bigramLinkedNodePos) { int pos = originalNode->getHeadPos(); const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos); const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); if (usesAdditionalBuffer) { pos -= mBuffer->getOriginalBufferSize(); } // Read original flags const PatriciaTrieReadingUtils::NodeFlags originalFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos); const PatriciaTrieReadingUtils::NodeFlags updatedFlags = DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */, false /* isDeleted */); int writingPos = originalNode->getHeadPos(); // Update flags. if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags, &writingPos)) { return false; } // Update moved position, which is stored in the parent offset field. if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition( mBuffer, movedPos, originalNode->getHeadPos(), &writingPos)) { return false; } // Update bigram linked node position, which is stored in the children position field. int childrenPosFieldPos = originalNode->getChildrenPosFieldPos(); if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition( mBuffer, bigramLinkedNodePos, &childrenPosFieldPos)) { return false; } if (originalNode->hasChildren()) { // Update children's parent position. DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy); const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader(); readingHelper.initWithPtNodeArrayPos(originalNode->getChildrenPos()); while (!readingHelper.isEnd()) { int parentOffsetFieldPos = nodeReader->getHeadPos() + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE; if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition( mBuffer, bigramLinkedNodePos, nodeReader->getHeadPos(), &parentOffsetFieldPos)) { // Parent offset cannot be written because of a bug or a broken dictionary; thus, // we give up to update dictionary. return false; } readingHelper.readNextSiblingNode(); } } return true; }
int Ver4PatriciaTriePolicy::getCodePointsAndReturnCodePointCount(const int wordId, const int maxCodePointCount, int *const outCodePoints) const { DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); const int ptNodePos = mBuffers->getTerminalPositionLookupTable()->getTerminalPtNodePosition(wordId); readingHelper.initWithPtNodePos(ptNodePos); const int codePointCount = readingHelper.getCodePointsAndReturnCodePointCount( maxCodePointCount, outCodePoints); if (readingHelper.isError()) { mIsCorrupted = true; AKLOGE("Dictionary reading error in getCodePointsAndProbabilityAndReturnCodePointCount()."); } return codePointCount; }
void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode, DicNodeVector *const childDicNodes) const { if (!dicNode->hasChildren()) { return; } DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, getBigramsStructurePolicy(), getShortcutsStructurePolicy()); readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPos()); const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader(); while (!readingHelper.isEnd()) { childDicNodes->pushLeavingChild(dicNode, nodeReader->getHeadPos(), nodeReader->getChildrenPos(), nodeReader->getProbability(), nodeReader->isTerminal() && !nodeReader->isDeleted(), nodeReader->hasChildren(), nodeReader->isBlacklisted() || nodeReader->isNotAWord(), nodeReader->getCodePointCount(), readingHelper.getMergedNodeCodePoints()); readingHelper.readNextSiblingNode(); } }
int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const { int searchCodePoints[length]; for (int i = 0; i < length; ++i) { searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i]; } DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, getBigramsStructurePolicy(), getShortcutsStructurePolicy()); readingHelper.initWithPtNodeArrayPos(getRootPosition()); const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader(); while (!readingHelper.isEnd()) { const int matchedCodePointCount = readingHelper.getPrevTotalCodePointCount(); if (readingHelper.getTotalCodePointCount() > length || !readingHelper.isMatchedCodePoint(0 /* index */, searchCodePoints[matchedCodePointCount])) { // Current node has too many code points or its first code point is different from // target code point. Skip this node and read the next sibling node. readingHelper.readNextSiblingNode(); continue; } // Check following merged node code points. const int nodeCodePointCount = nodeReader->getCodePointCount(); for (int j = 1; j < nodeCodePointCount; ++j) { if (!readingHelper.isMatchedCodePoint( j, searchCodePoints[matchedCodePointCount + j])) { // Different code point is found. The given word is not included in the dictionary. return NOT_A_DICT_POS; } } // All characters are matched. if (length == readingHelper.getTotalCodePointCount()) { // Terminal position is found. return nodeReader->getHeadPos(); } if (!nodeReader->hasChildren()) { return NOT_A_DICT_POS; } // Advance to the children nodes. readingHelper.readChildNode(); } // If we already traversed the tree further than the word is long, there means // there was no match (or we would have found it). return NOT_A_DICT_POS; }
int Ver4PatriciaTriePolicy::getWordId(const CodePointArrayView wordCodePoints, const bool forceLowerCaseSearch) const { DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); readingHelper.initWithPtNodeArrayPos(getRootPosition()); const int ptNodePos = readingHelper.getTerminalPtNodePositionOfWord(wordCodePoints.data(), wordCodePoints.size(), forceLowerCaseSearch); if (readingHelper.isError()) { mIsCorrupted = true; AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes()."); } if (ptNodePos == NOT_A_DICT_POS) { return NOT_A_WORD_ID; } const PtNodeParams ptNodeParams = mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos); if (ptNodeParams.isDeleted()) { return NOT_A_WORD_ID; } return ptNodeParams.getTerminalId(); }
int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( const int ptNodePos, const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) const { // This method traverses parent nodes from the terminal by following parent pointers; thus, // node code points are stored in the buffer in the reverse order. int reverseCodePoints[maxCodePointCount]; DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, getBigramsStructurePolicy(), getShortcutsStructurePolicy()); // First, read the terminal node and get its probability. readingHelper.initWithPtNodePos(ptNodePos); if (!readingHelper.isValidTerminalNode()) { // Node at the ptNodePos is not a valid terminal node. *outUnigramProbability = NOT_A_PROBABILITY; return 0; } // Store terminal node probability. *outUnigramProbability = readingHelper.getNodeReader()->getProbability(); // Then, following parent node link to the dictionary root and fetch node code points. while (!readingHelper.isEnd()) { if (readingHelper.getTotalCodePointCount() > maxCodePointCount) { // The ptNodePos is not a valid terminal node position in the dictionary. *outUnigramProbability = NOT_A_PROBABILITY; return 0; } // Store node code points to buffer in the reverse order. readingHelper.fetchMergedNodeCodePointsInReverseOrder( readingHelper.getPrevTotalCodePointCount(), reverseCodePoints); // Follow parent node toward the root node. readingHelper.readParentNode(); } if (readingHelper.isError()) { // The node position or the dictionary is invalid. *outUnigramProbability = NOT_A_PROBABILITY; return 0; } // Reverse the stored code points to output them. const int codePointCount = readingHelper.getTotalCodePointCount(); for (int i = 0; i < codePointCount; ++i) { outCodePoints[i] = reverseCodePoints[codePointCount - i - 1]; } return codePointCount; }
void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode, DicNodeVector *const childDicNodes) const { if (!dicNode->hasChildren()) { return; } DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos()); while (!readingHelper.isEnd()) { const PtNodeParams ptNodeParams = readingHelper.getPtNodeParams(); if (!ptNodeParams.isValid()) { break; } const bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted(); const int wordId = isTerminal ? ptNodeParams.getTerminalId() : NOT_A_WORD_ID; childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getChildrenPos(), wordId, ptNodeParams.getCodePointArrayView()); readingHelper.readNextSiblingNode(ptNodeParams); } if (readingHelper.isError()) { mIsCorrupted = true; AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes()."); } }
bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy, Ver4DictBuffers *const buffersToWrite, int *const outUnigramCount, int *const outBigramCount) { Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(), mBuffers->getProbabilityDictContent(), headerPolicy); Ver4PtNodeArrayReader ptNodeArrayReader(mBuffers->getTrieBuffer()); Ver4BigramListPolicy bigramPolicy(mBuffers->getMutableBigramDictContent(), mBuffers->getTerminalPositionLookupTable(), headerPolicy); Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getMutableShortcutDictContent(), mBuffers->getTerminalPositionLookupTable()); Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(), mBuffers, headerPolicy, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy, &shortcutPolicy); DynamicPtReadingHelper readingHelper(&ptNodeReader, &ptNodeArrayReader); readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); DynamicPtGcEventListeners ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted( &ptNodeWriter); if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner( &traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) { return false; } const int unigramCount = traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted .getValidUnigramCount(); const int maxUnigramCount = headerPolicy->getMaxUnigramCount(); if (headerPolicy->isDecayingDict() && unigramCount > maxUnigramCount) { if (!truncateUnigrams(&ptNodeReader, &ptNodeWriter, maxUnigramCount)) { AKLOGE("Cannot remove unigrams. current: %d, max: %d", unigramCount, maxUnigramCount); return false; } } readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); DynamicPtGcEventListeners::TraversePolicyToUpdateBigramProbability traversePolicyToUpdateBigramProbability(&ptNodeWriter); if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner( &traversePolicyToUpdateBigramProbability)) { return false; } const int bigramCount = traversePolicyToUpdateBigramProbability.getValidBigramEntryCount(); const int maxBigramCount = headerPolicy->getMaxBigramCount(); if (headerPolicy->isDecayingDict() && bigramCount > maxBigramCount) { if (!truncateBigrams(maxBigramCount)) { AKLOGE("Cannot remove bigrams. current: %d, max: %d", bigramCount, maxBigramCount); return false; } } // Mapping from positions in mBuffer to positions in bufferToWrite. PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap; readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); Ver4PatriciaTrieNodeWriter ptNodeWriterForNewBuffers(buffersToWrite->getWritableTrieBuffer(), buffersToWrite, headerPolicy, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy, &shortcutPolicy); DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&ptNodeWriterForNewBuffers, buffersToWrite->getWritableTrieBuffer(), &dictPositionRelocationMap); if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner( &traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) { return false; } // Create policy instances for the GCed dictionary. Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(), buffersToWrite->getProbabilityDictContent(), headerPolicy); Ver4PtNodeArrayReader newPtNodeArrayreader(buffersToWrite->getTrieBuffer()); Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getMutableBigramDictContent(), buffersToWrite->getTerminalPositionLookupTable(), headerPolicy); Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getMutableShortcutDictContent(), buffersToWrite->getTerminalPositionLookupTable()); Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(), buffersToWrite, headerPolicy, &newPtNodeReader, &newPtNodeArrayreader, &newBigramPolicy, &newShortcutPolicy); // Re-assign terminal IDs for valid terminal PtNodes. TerminalPositionLookupTable::TerminalIdMap terminalIdMap; if(!buffersToWrite->getMutableTerminalPositionLookupTable()->runGCTerminalIds( &terminalIdMap)) { return false; } // Run GC for probability dict content. if (!buffersToWrite->getMutableProbabilityDictContent()->runGC(&terminalIdMap, mBuffers->getProbabilityDictContent())) { return false; } // Run GC for bigram dict content. if(!buffersToWrite->getMutableBigramDictContent()->runGC(&terminalIdMap, mBuffers->getBigramDictContent(), outBigramCount)) { return false; } // Run GC for shortcut dict content. if(!buffersToWrite->getMutableShortcutDictContent()->runGC(&terminalIdMap, mBuffers->getShortcutDictContent())) { return false; } DynamicPtReadingHelper newDictReadingHelper(&newPtNodeReader, &newPtNodeArrayreader); newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); DynamicPtGcEventListeners::TraversePolicyToUpdateAllPositionFields traversePolicyToUpdateAllPositionFields(&newPtNodeWriter, &dictPositionRelocationMap); if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner( &traversePolicyToUpdateAllPositionFields)) { return false; } newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds traversePolicyToUpdateAllPtNodeFlagsAndTerminalIds(&newPtNodeWriter, &terminalIdMap); if (!newDictReadingHelper.traverseAllPtNodesInPostorderDepthFirstManner( &traversePolicyToUpdateAllPtNodeFlagsAndTerminalIds)) { return false; } *outUnigramCount = traversePolicyToUpdateAllPositionFields.getUnigramCount(); return true; }
bool Ver4PatriciaTriePolicy::addUnigramEntry(const CodePointArrayView wordCodePoints, const UnigramProperty *const unigramProperty) { if (!mBuffers->isUpdatable()) { AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary."); return false; } if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d", mDictBuffer->getTailPosition()); return false; } if (wordCodePoints.size() > MAX_WORD_LENGTH) { AKLOGE("The word is too long to insert to the dictionary, length: %zd", wordCodePoints.size()); return false; } for (const auto &shortcut : unigramProperty->getShortcuts()) { if (shortcut.getTargetCodePoints()->size() > MAX_WORD_LENGTH) { AKLOGE("One of shortcut targets is too long to insert to the dictionary, length: %zd", shortcut.getTargetCodePoints()->size()); return false; } } DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); readingHelper.initWithPtNodeArrayPos(getRootPosition()); bool addedNewUnigram = false; int codePointsToAdd[MAX_WORD_LENGTH]; int codePointCountToAdd = wordCodePoints.size(); memmove(codePointsToAdd, wordCodePoints.data(), sizeof(int) * codePointCountToAdd); if (unigramProperty->representsBeginningOfSentence()) { codePointCountToAdd = CharUtils::attachBeginningOfSentenceMarker(codePointsToAdd, codePointCountToAdd, MAX_WORD_LENGTH); } if (codePointCountToAdd <= 0) { return false; } const CodePointArrayView codePointArrayView(codePointsToAdd, codePointCountToAdd); if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointArrayView, unigramProperty, &addedNewUnigram)) { if (addedNewUnigram && !unigramProperty->representsBeginningOfSentence()) { mEntryCounters.incrementNgramCount(NgramType::Unigram); } if (unigramProperty->getShortcuts().size() > 0) { // Add shortcut target. const int wordId = getWordId(codePointArrayView, false /* forceLowerCaseSearch */); if (wordId == NOT_A_WORD_ID) { AKLOGE("Cannot find word id to add shortcut target."); return false; } const int wordPos = mBuffers->getTerminalPositionLookupTable()->getTerminalPtNodePosition(wordId); for (const auto &shortcut : unigramProperty->getShortcuts()) { if (!mUpdatingHelper.addShortcutTarget(wordPos, CodePointArrayView(*shortcut.getTargetCodePoints()), shortcut.getProbability())) { AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %zd, " "probability: %d", wordPos, shortcut.getTargetCodePoints()->size(), shortcut.getProbability()); return false; } } } return true; } else { return false; } }
bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy, Ver4DictBuffers *const buffersToWrite, MutableEntryCounters *const outEntryCounters) { Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer()); Ver4PtNodeArrayReader ptNodeArrayReader(mBuffers->getTrieBuffer()); Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getMutableShortcutDictContent(), mBuffers->getTerminalPositionLookupTable()); Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(), mBuffers, &ptNodeReader, &ptNodeArrayReader, &shortcutPolicy); if (!mBuffers->getMutableLanguageModelDictContent()->updateAllProbabilityEntriesForGC( headerPolicy, outEntryCounters)) { AKLOGE("Failed to update probabilities in language model dict content."); return false; } if (headerPolicy->isDecayingDict()) { const EntryCounts &maxEntryCounts = headerPolicy->getMaxNgramCounts(); if (!mBuffers->getMutableLanguageModelDictContent()->truncateEntries( outEntryCounters->getEntryCounts(), maxEntryCounts, headerPolicy, outEntryCounters)) { AKLOGE("Failed to truncate entries in language model dict content."); return false; } } DynamicPtReadingHelper readingHelper(&ptNodeReader, &ptNodeArrayReader); readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); DynamicPtGcEventListeners ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted( &ptNodeWriter); if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner( &traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) { return false; } // Mapping from positions in mBuffer to positions in bufferToWrite. PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap; readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); Ver4PatriciaTrieNodeWriter ptNodeWriterForNewBuffers(buffersToWrite->getWritableTrieBuffer(), buffersToWrite, &ptNodeReader, &ptNodeArrayReader, &shortcutPolicy); DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&ptNodeWriterForNewBuffers, buffersToWrite->getWritableTrieBuffer(), &dictPositionRelocationMap); if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner( &traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) { return false; } // Create policy instances for the GCed dictionary. Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer()); Ver4PtNodeArrayReader newPtNodeArrayreader(buffersToWrite->getTrieBuffer()); Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getMutableShortcutDictContent(), buffersToWrite->getTerminalPositionLookupTable()); Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(), buffersToWrite, &newPtNodeReader, &newPtNodeArrayreader, &newShortcutPolicy); // Re-assign terminal IDs for valid terminal PtNodes. TerminalPositionLookupTable::TerminalIdMap terminalIdMap; if(!buffersToWrite->getMutableTerminalPositionLookupTable()->runGCTerminalIds( &terminalIdMap)) { return false; } // Run GC for language model dict content. if (!buffersToWrite->getMutableLanguageModelDictContent()->runGC(&terminalIdMap, mBuffers->getLanguageModelDictContent())) { return false; } // Run GC for shortcut dict content. if(!buffersToWrite->getMutableShortcutDictContent()->runGC(&terminalIdMap, mBuffers->getShortcutDictContent())) { return false; } DynamicPtReadingHelper newDictReadingHelper(&newPtNodeReader, &newPtNodeArrayreader); newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); DynamicPtGcEventListeners::TraversePolicyToUpdateAllPositionFields traversePolicyToUpdateAllPositionFields(&newPtNodeWriter, &dictPositionRelocationMap); if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner( &traversePolicyToUpdateAllPositionFields)) { return false; } newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds traversePolicyToUpdateAllPtNodeFlagsAndTerminalIds(&newPtNodeWriter, &terminalIdMap); if (!newDictReadingHelper.traverseAllPtNodesInPostorderDepthFirstManner( &traversePolicyToUpdateAllPtNodeFlagsAndTerminalIds)) { return false; } return true; }