// Remove a bigram relation from word0Pos to word1Pos. bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, const int word1Pos) { DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); nodeReader.fetchNodeInfoInBufferFromPtNodePos(word0Pos); if (nodeReader.getBigramsPos() == NOT_A_DICT_POS) { return false; } return mBigramPolicy->removeBigram(nodeReader.getBigramsPos(), word1Pos); }
int DynamicPatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const { if (ptNodePos == NOT_A_DICT_POS) { return NOT_A_DICT_POS; } DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer, getBigramsStructurePolicy(), getShortcutsStructurePolicy()); nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos); if (nodeReader.isDeleted()) { return NOT_A_DICT_POS; } return nodeReader.getBigramsPos(); }
int DynamicPatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const { if (ptNodePos == NOT_A_DICT_POS) { return NOT_A_PROBABILITY; } DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer, getBigramsStructurePolicy(), getShortcutsStructurePolicy()); nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos); if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) { return NOT_A_PROBABILITY; } return getProbability(nodeReader.getProbability(), NOT_A_PROBABILITY); }
bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos, const int probability, bool *const outAddedNewBigram) { int mMergedNodeCodePoints[MAX_WORD_LENGTH]; DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); nodeReader.fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(word0Pos, MAX_WORD_LENGTH, mMergedNodeCodePoints); // Move node to add bigram entry. const int newNodePos = mBuffer->getTailPosition(); if (!markNodeAsMovedAndSetPosition(&nodeReader, newNodePos, newNodePos)) { return false; } int writingPos = newNodePos; // Write a new PtNode using original PtNode's info to the tail of the dictionary in mBuffer. if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, &nodeReader, nodeReader.getParentPos(), mMergedNodeCodePoints, nodeReader.getCodePointCount(), nodeReader.getProbability(), &writingPos)) { return false; } nodeReader.fetchNodeInfoInBufferFromPtNodePos(newNodePos); if (nodeReader.getBigramsPos() != NOT_A_DICT_POS) { // Insert a new bigram entry into the existing bigram list. int bigramListPos = nodeReader.getBigramsPos(); return mBigramPolicy->addNewBigramEntryToBigramList(word1Pos, probability, &bigramListPos, outAddedNewBigram); } else { // The PtNode doesn't have a bigram list. *outAddedNewBigram = true; // First, Write a bigram entry at the tail position of the PtNode. if (!mBigramPolicy->writeNewBigramEntry(word1Pos, probability, &writingPos)) { return false; } // Then, Mark as the PtNode having bigram list in the flags. const PatriciaTrieReadingUtils::NodeFlags updatedFlags = PatriciaTrieReadingUtils::createAndGetFlags(nodeReader.isBlacklisted(), nodeReader.isNotAWord(), nodeReader.getProbability() != NOT_A_PROBABILITY, nodeReader.getShortcutPos() != NOT_A_DICT_POS, true /* hasBigrams */, nodeReader.getCodePointCount() > 1, CHILDREN_POSITION_FIELD_SIZE); writingPos = newNodePos; // Write updated flags into the moved PtNode's flags field. return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags, &writingPos); } }
// Returns whether the dictionary updating was succeeded or not. bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes( const DynamicPatriciaTrieNodeReader *const reallocatingPtNode, const int *const reallocatingPtNodeCodePoints, const int overlappingCodePointCount, const int probabilityOfNewPtNode, const int *const newNodeCodePoints, const int newNodeCodePointCount) { // When addsExtraChild is true, split the reallocating PtNode and add new child. // Reallocating PtNode: abcde, newNode: abcxy. // abc (1st, not terminal) __ de (2nd) // \_ xy (extra child, terminal) // Otherwise, this method makes 1st part terminal and write probabilityOfNewPtNode. // Reallocating PtNode: abcde, newNode: abc. // abc (1st, terminal) __ de (2nd) const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount; const int firstPartOfReallocatedPtNodePos = mBuffer->getTailPosition(); int writingPos = firstPartOfReallocatedPtNodePos; // Write the 1st part of the reallocating node. The children position will be updated later // with actual children position. const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode; if (!writePtNodeToBuffer(mBuffer, reallocatingPtNode->getParentPos(), reallocatingPtNodeCodePoints, overlappingCodePointCount, newProbability, &writingPos)) { return false; } const int actualChildrenPos = writingPos; // Create new children PtNode array. const size_t newPtNodeCount = addsExtraChild ? 2 : 1; if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer, newPtNodeCount, &writingPos)) { return false; } // Write the 2nd part of the reallocating node. const int secondPartOfReallocatedPtNodePos = writingPos; if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, reallocatingPtNode, firstPartOfReallocatedPtNodePos, reallocatingPtNodeCodePoints + overlappingCodePointCount, reallocatingPtNode->getCodePointCount() - overlappingCodePointCount, reallocatingPtNode->getProbability(), &writingPos)) { return false; } if (addsExtraChild) { if (!writePtNodeToBuffer(mBuffer, firstPartOfReallocatedPtNodePos, newNodeCodePoints + overlappingCodePointCount, newNodeCodePointCount - overlappingCodePointCount, probabilityOfNewPtNode, &writingPos)) { return false; } } if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer, NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) { return false; } // Update original reallocatingPtNode as moved. if (!markNodeAsMovedAndSetPosition(reallocatingPtNode, firstPartOfReallocatedPtNodePos, secondPartOfReallocatedPtNodePos)) { return false; } // Load node info. Information of the 1st part will be fetched. DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); nodeReader.fetchNodeInfoInBufferFromPtNodePos(firstPartOfReallocatedPtNodePos); // Update children position. int childrenPosFieldPos = nodeReader.getChildrenPosFieldPos(); if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer, actualChildrenPos, &childrenPosFieldPos)) { return false; } return true; }