bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int length, const int probability) { if (!mBuffer->isUpdatable()) { AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary."); return false; } if (mBufferWithExtendableBuffer.getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { AKLOGE("The dictionary is too large to dynamically update."); return false; } DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, getBigramsStructurePolicy(), getShortcutsStructurePolicy()); readingHelper.initWithPtNodeArrayPos(getRootPosition()); DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()); bool addedNewUnigram = false; if (writingHelper.addUnigramWord(&readingHelper, word, length, probability, &addedNewUnigram)) { if (addedNewUnigram) { mUnigramCount++; } return true; } else { return false; } }
int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints, int *const outCodePointCount) { *outCodePointCount = 0; if (token == 0) { mTerminalPtNodePositionsForIteratingWords.clear(); DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy( &mTerminalPtNodePositionsForIteratingWords); DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); readingHelper.initWithPtNodeArrayPos(getRootPosition()); readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(&traversePolicy); } const int terminalPtNodePositionsVectorSize = static_cast<int>(mTerminalPtNodePositionsForIteratingWords.size()); if (token < 0 || token >= terminalPtNodePositionsVectorSize) { AKLOGE("Given token %d is invalid.", token); return 0; } const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token]; const PtNodeParams ptNodeParams = mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(terminalPtNodePos); *outCodePointCount = getCodePointsAndReturnCodePointCount(ptNodeParams.getTerminalId(), MAX_WORD_LENGTH, outCodePoints); const int nextToken = token + 1; if (nextToken >= terminalPtNodePositionsVectorSize) { // All words have been iterated. mTerminalPtNodePositionsForIteratingWords.clear(); return 0; } return nextToken; }
klVec3 Pose::getNodePositionFromRoot( int bone_index, int root_index, bool with_root_rotation) { klVec3 result; if( with_root_rotation ) { result = getNodePosition( bone_index ); result -= getRootPosition(); } else result = getNodePositionFromBone( bone_index, root_index); return result; }
bool Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) { if (!mBuffers->isUpdatable()) { AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary."); return false; } if (!mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath)) { AKLOGE("Cannot flush the dictionary to file with GC."); mIsCorrupted = true; return false; } return true; }
void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) { if (!mBuffer->isUpdatable()) { AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary."); return; } const bool needsToDecay = mHeaderPolicy.isDecayingDict() && (mNeedsToDecayForTesting || ForgettingCurveUtils::needsToDecay( false /* mindsBlockByDecay */, mUnigramCount, mBigramCount, &mHeaderPolicy)); DynamicBigramListPolicy bigramListPolicyForGC(&mHeaderPolicy, &mBufferWithExtendableBuffer, &mShortcutListPolicy, needsToDecay); DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, &bigramListPolicyForGC, &mShortcutListPolicy, needsToDecay); writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy); mNeedsToDecayForTesting = false; }
int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const { int searchCodePoints[length]; for (int i = 0; i < length; ++i) { searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i]; } DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, getBigramsStructurePolicy(), getShortcutsStructurePolicy()); readingHelper.initWithPtNodeArrayPos(getRootPosition()); const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader(); while (!readingHelper.isEnd()) { const int matchedCodePointCount = readingHelper.getPrevTotalCodePointCount(); if (readingHelper.getTotalCodePointCount() > length || !readingHelper.isMatchedCodePoint(0 /* index */, searchCodePoints[matchedCodePointCount])) { // Current node has too many code points or its first code point is different from // target code point. Skip this node and read the next sibling node. readingHelper.readNextSiblingNode(); continue; } // Check following merged node code points. const int nodeCodePointCount = nodeReader->getCodePointCount(); for (int j = 1; j < nodeCodePointCount; ++j) { if (!readingHelper.isMatchedCodePoint( j, searchCodePoints[matchedCodePointCount + j])) { // Different code point is found. The given word is not included in the dictionary. return NOT_A_DICT_POS; } } // All characters are matched. if (length == readingHelper.getTotalCodePointCount()) { // Terminal position is found. return nodeReader->getHeadPos(); } if (!nodeReader->hasChildren()) { return NOT_A_DICT_POS; } // Advance to the children nodes. readingHelper.readChildNode(); } // If we already traversed the tree further than the word is long, there means // there was no match (or we would have found it). return NOT_A_DICT_POS; }
int Ver4PatriciaTriePolicy::getWordId(const CodePointArrayView wordCodePoints, const bool forceLowerCaseSearch) const { DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); readingHelper.initWithPtNodeArrayPos(getRootPosition()); const int ptNodePos = readingHelper.getTerminalPtNodePositionOfWord(wordCodePoints.data(), wordCodePoints.size(), forceLowerCaseSearch); if (readingHelper.isError()) { mIsCorrupted = true; AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes()."); } if (ptNodePos == NOT_A_DICT_POS) { return NOT_A_WORD_ID; } const PtNodeParams ptNodeParams = mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos); if (ptNodeParams.isDeleted()) { return NOT_A_WORD_ID; } return ptNodeParams.getTerminalId(); }
bool Ver4PatriciaTriePolicy::addUnigramEntry(const CodePointArrayView wordCodePoints, const UnigramProperty *const unigramProperty) { if (!mBuffers->isUpdatable()) { AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary."); return false; } if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d", mDictBuffer->getTailPosition()); return false; } if (wordCodePoints.size() > MAX_WORD_LENGTH) { AKLOGE("The word is too long to insert to the dictionary, length: %zd", wordCodePoints.size()); return false; } for (const auto &shortcut : unigramProperty->getShortcuts()) { if (shortcut.getTargetCodePoints()->size() > MAX_WORD_LENGTH) { AKLOGE("One of shortcut targets is too long to insert to the dictionary, length: %zd", shortcut.getTargetCodePoints()->size()); return false; } } DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); readingHelper.initWithPtNodeArrayPos(getRootPosition()); bool addedNewUnigram = false; int codePointsToAdd[MAX_WORD_LENGTH]; int codePointCountToAdd = wordCodePoints.size(); memmove(codePointsToAdd, wordCodePoints.data(), sizeof(int) * codePointCountToAdd); if (unigramProperty->representsBeginningOfSentence()) { codePointCountToAdd = CharUtils::attachBeginningOfSentenceMarker(codePointsToAdd, codePointCountToAdd, MAX_WORD_LENGTH); } if (codePointCountToAdd <= 0) { return false; } const CodePointArrayView codePointArrayView(codePointsToAdd, codePointCountToAdd); if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointArrayView, unigramProperty, &addedNewUnigram)) { if (addedNewUnigram && !unigramProperty->representsBeginningOfSentence()) { mEntryCounters.incrementNgramCount(NgramType::Unigram); } if (unigramProperty->getShortcuts().size() > 0) { // Add shortcut target. const int wordId = getWordId(codePointArrayView, false /* forceLowerCaseSearch */); if (wordId == NOT_A_WORD_ID) { AKLOGE("Cannot find word id to add shortcut target."); return false; } const int wordPos = mBuffers->getTerminalPositionLookupTable()->getTerminalPtNodePosition(wordId); for (const auto &shortcut : unigramProperty->getShortcuts()) { if (!mUpdatingHelper.addShortcutTarget(wordPos, CodePointArrayView(*shortcut.getTargetCodePoints()), shortcut.getProbability())) { AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %zd, " "probability: %d", wordPos, shortcut.getTargetCodePoints()->size(), shortcut.getProbability()); return false; } } } return true; } else { return false; } }