// This function will take the position of a character array within a CharGroup, // and check it actually like-matches the word in inWord starting at startInputIndex, // that is, it matches it with case and accents squashed. // The function returns true if there was a full match, false otherwise. // The function will copy on-the-fly the characters in the CharGroup to outNewWord. // It will also place the end position of the array in outPos; in outInputIndex, // it will place the index of the first char AFTER the match if there was a match, // and the initial position if there was not. It makes sense because if there was // a match we want to continue searching, but if there was not, we want to go to // the next CharGroup. // In and out parameters may point to the same location. This function takes care // not to use any input parameters after it wrote into its outputs. static inline bool testCharGroupForContinuedLikeness(const uint8_t flags, const uint8_t* const root, const int startPos, const uint16_t* const inWord, const int startInputIndex, int32_t* outNewWord, int* outInputIndex, int* outPos) { const bool hasMultipleChars = (0 != (UnigramDictionary::FLAG_HAS_MULTIPLE_CHARS & flags)); int pos = startPos; int32_t character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos); int32_t baseChar = toBaseLowerCase(character); const uint16_t wChar = toBaseLowerCase(inWord[startInputIndex]); if (baseChar != wChar) { *outPos = hasMultipleChars ? BinaryFormat::skipOtherCharacters(root, pos) : pos; *outInputIndex = startInputIndex; return false; } int inputIndex = startInputIndex; outNewWord[inputIndex] = character; if (hasMultipleChars) { character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos); while (NOT_A_CHARACTER != character) { baseChar = toBaseLowerCase(character); if (toBaseLowerCase(inWord[++inputIndex]) != baseChar) { *outPos = BinaryFormat::skipOtherCharacters(root, pos); *outInputIndex = startInputIndex; return false; } outNewWord[inputIndex] = character; character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos); } } *outInputIndex = inputIndex + 1; *outPos = pos; return true; }
bool BigramDictionary::checkFirstCharacter(unsigned short *word) { // Checks whether this word starts with same character or neighboring characters of // what user typed. int *inputCodes = mInputCodes; int maxAlt = MAX_ALTERNATIVES; const unsigned short firstBaseChar = toBaseLowerCase(*word); while (maxAlt > 0) { if (toBaseLowerCase(*inputCodes) == firstBaseChar) { return true; } inputCodes++; maxAlt--; } return false; }
/* static */ void DicNodeUtils::createAndGetPassingChildNode(DicNode *dicNode, const ProximityInfoState *pInfoState, const int pointIndex, const bool exactOnly, DicNodeVector *childDicNodes) { // Passing multiple chars node. No need to traverse child const int codePoint = dicNode->getNodeTypedCodePoint(); const int baseLowerCaseCodePoint = toBaseLowerCase(codePoint); const bool isMatch = isMatchedNodeCodePoint(pInfoState, pointIndex, exactOnly, codePoint); if (isMatch || isIntentionalOmissionCodePoint(baseLowerCaseCodePoint)) { childDicNodes->pushPassingChild(dicNode); } }