le_int32 IndicReordering::reorder(const LEUnicode *chars, le_int32 charCount, le_int32 scriptCode, LEUnicode *outChars, LEGlyphStorage &glyphStorage, MPreFixups **outMPreFixups, LEErrorCode& success) { if (LE_FAILURE(success)) { return 0; } MPreFixups *mpreFixups = NULL; const IndicClassTable *classTable = IndicClassTable::getScriptClassTable(scriptCode); if (classTable->scriptFlags & SF_MPRE_FIXUP) { mpreFixups = new MPreFixups(charCount); if (mpreFixups == NULL) { success = LE_MEMORY_ALLOCATION_ERROR; return 0; } } IndicReorderingOutput output(outChars, glyphStorage, mpreFixups); le_int32 i, prev = 0; le_bool lastInWord = FALSE; while (prev < charCount) { le_int32 syllable = findSyllable(classTable, chars, prev, charCount); le_int32 matra, markStart = syllable; output.reset(); if (classTable->isStressMark(chars[markStart - 1])) { markStart -= 1; output.noteStressMark(classTable, chars[markStart], markStart, tagArray1); } if (markStart != prev && classTable->isVowelModifier(chars[markStart - 1])) { markStart -= 1; output.noteVowelModifier(classTable, chars[markStart], markStart, tagArray1); } matra = markStart - 1; while (output.noteMatra(classTable, chars[matra], matra, tagArray1, !lastInWord) && matra != prev) { matra -= 1; } lastInWord = TRUE; switch (classTable->getCharClass(chars[prev]) & CF_CLASS_MASK) { case CC_RESERVED: lastInWord = FALSE; /* fall through */ case CC_INDEPENDENT_VOWEL: case CC_ZERO_WIDTH_MARK: for (i = prev; i < syllable; i += 1) { output.writeChar(chars[i], i, tagArray1); } break; case CC_AL_LAKUNA: case CC_NUKTA: output.writeChar(C_DOTTED_CIRCLE, prev, tagArray1); output.writeChar(chars[prev], prev, tagArray1); break; case CC_VIRAMA: // A lone virama is illegal unless it follows a // MALAYALAM_VOWEL_SIGN_U. Such a usage is called // "samvruthokaram". if (chars[prev - 1] != C_MALAYALAM_VOWEL_SIGN_U) { output.writeChar(C_DOTTED_CIRCLE, prev, tagArray1); } output.writeChar(chars[prev], prev, tagArray1); break; case CC_DEPENDENT_VOWEL: case CC_SPLIT_VOWEL_PIECE_1: case CC_SPLIT_VOWEL_PIECE_2: case CC_SPLIT_VOWEL_PIECE_3: case CC_VOWEL_MODIFIER: case CC_STRESS_MARK: output.writeMpre(); output.writeChar(C_DOTTED_CIRCLE, prev, tagArray1); output.writeMbelow(); output.writeSMbelow(); output.writeMabove(); if ((classTable->scriptFlags & SF_MATRAS_AFTER_BASE) != 0) { output.writeMpost(); } if ((classTable->scriptFlags & SF_REPH_AFTER_BELOW) != 0) { output.writeVMabove(); output.writeSMabove(); // FIXME: there are no SM's in these scripts... } if ((classTable->scriptFlags & SF_MATRAS_AFTER_BASE) == 0) { output.writeMpost(); } output.writeLengthMark(); output.writeAlLakuna(); if ((classTable->scriptFlags & SF_REPH_AFTER_BELOW) == 0) { output.writeVMabove(); output.writeSMabove(); } output.writeVMpost(); break; case CC_INDEPENDENT_VOWEL_2: case CC_INDEPENDENT_VOWEL_3: case CC_CONSONANT: case CC_CONSONANT_WITH_NUKTA: { le_uint32 length = markStart - prev; le_int32 lastConsonant = markStart - 1; le_int32 baseLimit = prev; // Check for REPH at front of syllable if (length > 2 && classTable->isReph(chars[prev]) && classTable->isVirama(chars[prev + 1]) && chars[prev + 2] != C_SIGN_ZWNJ) { baseLimit += 2; // Check for eyelash RA, if the script supports it if ((classTable->scriptFlags & SF_EYELASH_RA) != 0 && chars[baseLimit] == C_SIGN_ZWJ) { if (length > 3) { baseLimit += 1; } else { baseLimit -= 2; } } } while (lastConsonant > baseLimit && !classTable->isConsonant(chars[lastConsonant])) { lastConsonant -= 1; } IndicClassTable::CharClass charClass = CC_RESERVED; IndicClassTable::CharClass nextClass = CC_RESERVED; le_int32 baseConsonant = lastConsonant; le_int32 postBase = lastConsonant + 1; le_int32 postBaseLimit = classTable->scriptFlags & SF_POST_BASE_LIMIT_MASK; le_bool seenVattu = FALSE; le_bool seenBelowBaseForm = FALSE; le_bool hasNukta = FALSE; le_bool hasBelowBaseForm = FALSE; le_bool hasPostBaseForm = FALSE; if (postBase < markStart && classTable->isNukta(chars[postBase])) { charClass = CC_NUKTA; postBase += 1; } while (baseConsonant > baseLimit) { nextClass = charClass; hasNukta = IndicClassTable::isNukta(nextClass); charClass = classTable->getCharClass(chars[baseConsonant]); hasBelowBaseForm = IndicClassTable::hasBelowBaseForm(charClass) && !hasNukta; hasPostBaseForm = IndicClassTable::hasPostBaseForm(charClass) && !hasNukta; if (IndicClassTable::isConsonant(charClass)) { if (postBaseLimit == 0 || seenVattu || (baseConsonant > baseLimit && !classTable->isVirama(chars[baseConsonant - 1])) || !(hasBelowBaseForm || hasPostBaseForm)) { break; } // consonants with nuktas are never vattus seenVattu = IndicClassTable::isVattu(charClass) && !hasNukta; // consonants with nuktas never have below- or post-base forms if (hasPostBaseForm) { if (seenBelowBaseForm) { break; } postBase = baseConsonant; } else if (hasBelowBaseForm) { seenBelowBaseForm = TRUE; } postBaseLimit -= 1; } baseConsonant -= 1; } // Write Mpre output.writeMpre(); // Write eyelash RA // NOTE: baseLimit == prev + 3 iff eyelash RA present... if (baseLimit == prev + 3) { output.writeChar(chars[prev], prev, tagArray2); output.writeChar(chars[prev + 1], prev + 1, tagArray2); output.writeChar(chars[prev + 2], prev + 2, tagArray2); } // write any pre-base consonants le_bool supressVattu = TRUE; for (i = baseLimit; i < baseConsonant; i += 1) { LEUnicode ch = chars[i]; // Don't put 'blwf' on first consonant. FeatureMask features = (i == baseLimit? tagArray2 : tagArray1); charClass = classTable->getCharClass(ch); nextClass = classTable->getCharClass(chars[i + 1]); hasNukta = IndicClassTable::isNukta(nextClass); if (IndicClassTable::isConsonant(charClass)) { if (IndicClassTable::isVattu(charClass) && !hasNukta && supressVattu) { features = tagArray4; } supressVattu = IndicClassTable::isVattu(charClass) && !hasNukta; } else if (IndicClassTable::isVirama(charClass) && chars[i + 1] == C_SIGN_ZWNJ) { features = tagArray4; } output.writeChar(ch, i, features); } le_int32 bcSpan = baseConsonant + 1; if (bcSpan < markStart && classTable->isNukta(chars[bcSpan])) { bcSpan += 1; } if (baseConsonant == lastConsonant && bcSpan < markStart && (classTable->isVirama(chars[bcSpan]) || classTable->isAlLakuna(chars[bcSpan]))) { bcSpan += 1; if (bcSpan < markStart && chars[bcSpan] == C_SIGN_ZWNJ) { bcSpan += 1; } } // note the base consonant for post-GSUB fixups output.noteBaseConsonant(); // write base consonant for (i = baseConsonant; i < bcSpan; i += 1) { output.writeChar(chars[i], i, tagArray4); } if ((classTable->scriptFlags & SF_MATRAS_AFTER_BASE) != 0) { output.writeMbelow(); output.writeSMbelow(); // FIXME: there are no SMs in these scripts... output.writeMabove(); output.writeMpost(); } // write below-base consonants if (baseConsonant != lastConsonant) { for (i = bcSpan + 1; i < postBase; i += 1) { output.writeChar(chars[i], i, tagArray1); } if (postBase > lastConsonant) { // write halant that was after base consonant output.writeChar(chars[bcSpan], bcSpan, tagArray1); } } // write Mbelow, SMbelow, Mabove if ((classTable->scriptFlags & SF_MATRAS_AFTER_BASE) == 0) { output.writeMbelow(); output.writeSMbelow(); output.writeMabove(); } if ((classTable->scriptFlags & SF_REPH_AFTER_BELOW) != 0) { if (baseLimit == prev + 2) { output.writeChar(chars[prev], prev, tagArray0); output.writeChar(chars[prev + 1], prev + 1, tagArray0); } output.writeVMabove(); output.writeSMabove(); // FIXME: there are no SM's in these scripts... } // write post-base consonants // FIXME: does this put the right tags on post-base consonants? if (baseConsonant != lastConsonant) { if (postBase <= lastConsonant) { for (i = postBase; i <= lastConsonant; i += 1) { output.writeChar(chars[i], i, tagArray3); } // write halant that was after base consonant output.writeChar(chars[bcSpan], bcSpan, tagArray1); } // write the training halant, if there is one if (lastConsonant < matra && classTable->isVirama(chars[matra])) { output.writeChar(chars[matra], matra, tagArray4); } } // write Mpost if ((classTable->scriptFlags & SF_MATRAS_AFTER_BASE) == 0) { output.writeMpost(); } output.writeLengthMark(); output.writeAlLakuna(); // write reph if ((classTable->scriptFlags & SF_REPH_AFTER_BELOW) == 0) { if (baseLimit == prev + 2) { output.writeChar(chars[prev], prev, tagArray0); output.writeChar(chars[prev + 1], prev + 1, tagArray0); } output.writeVMabove(); output.writeSMabove(); } output.writeVMpost(); break; } default: break; } prev = syllable; } *outMPreFixups = mpreFixups; return output.getOutputIndex(); }
le_int32 KhmerReordering::reorder(const LEUnicode *chars, le_int32 charCount, le_int32 /*scriptCode*/, LEUnicode *outChars, LEGlyphStorage &glyphStorage) { const KhmerClassTable *classTable = KhmerClassTable::getKhmerClassTable(); ReorderingOutput output(outChars, glyphStorage); KhmerClassTable::CharClass charClass; le_int32 i, prev = 0, coengRo; // This loop only exits when we reach the end of a run, which may contain // several syllables. while (prev < charCount) { le_int32 syllable = findSyllable(classTable, chars, prev, charCount); // write a pre vowel or the pre part of a split vowel first // and look out for coeng + ro. RO is the only vowel of type 2, and // therefore the only one that requires saving space before the base. coengRo = -1; // There is no Coeng Ro, if found this value will change for (i = prev; i < syllable; i += 1) { charClass = classTable->getCharClass(chars[i]); // if a split vowel, write the pre part. In Khmer the pre part // is the same for all split vowels, same glyph as pre vowel C_VOWEL_E if (charClass & KhmerClassTable::CF_SPLIT_VOWEL) { output.writeChar(C_VOWEL_E, i, tagPref); break; // there can be only one vowel } // if a vowel with pos before write it out if (charClass & KhmerClassTable::CF_POS_BEFORE) { output.writeChar(chars[i], i, tagPref); break; // there can be only one vowel } // look for coeng + ro and remember position // works because coeng + ro is always in front of a vowel (if there is a vowel) // and because CC_CONSONANT2 is enough to identify it, as it is the only consonant // with this flag if ( (charClass & KhmerClassTable::CF_COENG) && (i + 1 < syllable) && ( (classTable->getCharClass(chars[i + 1]) & KhmerClassTable::CF_CLASS_MASK) == KhmerClassTable::CC_CONSONANT2) ) { coengRo = i; } } // write coeng + ro if found if (coengRo > -1) { output.writeChar(C_COENG, coengRo, tagPref); output.writeChar(C_RO, coengRo + 1, tagPref); } // shall we add a dotted circle? If in the position in which // the base should be (first char in the string) there is a // character that has the Dotted circle flag (a character that // cannot be a base) then write a dotted circle if (classTable->getCharClass(chars[prev]) & KhmerClassTable::CF_DOTTED_CIRCLE) { output.writeChar(C_DOTTED_CIRCLE, prev, tagDefault); } // copy what is left to the output, skipping before vowels and // coeng Ro if they are present for (i = prev; i < syllable; i += 1) { charClass = classTable->getCharClass(chars[i]); // skip a before vowel, it was already processed if (charClass & KhmerClassTable::CF_POS_BEFORE) { continue; } // skip coeng + ro, it was already processed if (i == coengRo) { i += 1; continue; } switch (charClass & KhmerClassTable::CF_POS_MASK) { case KhmerClassTable::CF_POS_ABOVE : output.writeChar(chars[i], i, tagAbvf); break; case KhmerClassTable::CF_POS_AFTER : output.writeChar(chars[i], i, tagPstf); break; case KhmerClassTable::CF_POS_BELOW : output.writeChar(chars[i], i, tagBlwf); break; default: // assign the correct flags to a coeng consonant // Consonants of type 3 are taged as Post forms and those type 1 as below forms if ( (charClass & KhmerClassTable::CF_COENG) && i + 1 < syllable ) { if ( (classTable->getCharClass(chars[i + 1]) & KhmerClassTable::CF_CLASS_MASK) == KhmerClassTable::CC_CONSONANT3) { output.writeChar(chars[i], i, tagPstf); i += 1; output.writeChar(chars[i], i, tagPstf); } else { output.writeChar(chars[i], i, tagBlwf); i += 1; output.writeChar(chars[i], i, tagBlwf); } break; } // if a shifter is followed by an above vowel change the shifter to below form, // an above vowel can have two possible positions i + 1 or i + 3 // (position i+1 corresponds to unicode 3, position i+3 to Unicode 4) // and there is an extra rule for C_VOWEL_AA + C_SIGN_NIKAHIT also for two // different positions, right after the shifter or after a vowel (Unicode 4) if ( (charClass & KhmerClassTable::CF_SHIFTER) && (i + 1 < syllable) ) { if (classTable->getCharClass(chars[i + 1]) & KhmerClassTable::CF_ABOVE_VOWEL ) { output.writeChar(chars[i], i, tagBlwf); break; } if (i + 2 < syllable && ( (classTable->getCharClass(chars[i + 1]) & KhmerClassTable::CF_CLASS_MASK) == C_VOWEL_AA) && ( (classTable->getCharClass(chars[i + 2]) & KhmerClassTable::CF_CLASS_MASK) == C_SIGN_NIKAHIT) ) { output.writeChar(chars[i], i, tagBlwf); break; } if (i + 3 < syllable && (classTable->getCharClass(chars[i + 3]) & KhmerClassTable::CF_ABOVE_VOWEL) ) { output.writeChar(chars[i], i, tagBlwf); break; } if (i + 4 < syllable && ( (classTable->getCharClass(chars[i + 3]) & KhmerClassTable::CF_CLASS_MASK) == C_VOWEL_AA) && ( (classTable->getCharClass(chars[i + 4]) & KhmerClassTable::CF_CLASS_MASK) == C_SIGN_NIKAHIT) ) { output.writeChar(chars[i], i, tagBlwf); break; } } // default - any other characters output.writeChar(chars[i], i, tagDefault); break; } // switch } // for prev = syllable; // move the pointer to the start of next syllable } return output.getOutputIndex(); }
le_int32 IndicReordering::v2process(const LEUnicode *chars, le_int32 charCount, le_int32 scriptCode, LEUnicode *outChars, LEGlyphStorage &glyphStorage) { const IndicClassTable *classTable = IndicClassTable::getScriptClassTable(scriptCode); DynamicProperties dynProps[INDIC_BLOCK_SIZE]; IndicReordering::getDynamicProperties(dynProps,classTable); IndicReorderingOutput output(outChars, glyphStorage, NULL); le_int32 i, firstConsonant, baseConsonant, secondConsonant, inv_count = 0, beginSyllable = 0; //le_bool lastInWord = FALSE; while (beginSyllable < charCount) { le_int32 nextSyllable = findSyllable(classTable, chars, beginSyllable, charCount); output.reset(); // Find the First Consonant for ( firstConsonant = beginSyllable ; firstConsonant < nextSyllable ; firstConsonant++ ) { if ( classTable->isConsonant(chars[firstConsonant]) ) { break; } } // Find the base consonant baseConsonant = nextSyllable - 1; secondConsonant = firstConsonant; // TODO: Use Dynamic Properties for hasBelowBaseForm and hasPostBaseForm() while ( baseConsonant > firstConsonant ) { if ( classTable->isConsonant(chars[baseConsonant]) && !classTable->hasBelowBaseForm(chars[baseConsonant]) && !classTable->hasPostBaseForm(chars[baseConsonant]) ) { break; } else { if ( classTable->isConsonant(chars[baseConsonant]) ) { secondConsonant = baseConsonant; } baseConsonant--; } } // If the syllable starts with Ra + Halant ( in a script that has Reph ) and has more than one // consonant, Ra is excluced from candidates for base consonants if ( classTable->isReph(chars[beginSyllable]) && beginSyllable+1 < nextSyllable && classTable->isVirama(chars[beginSyllable+1]) && secondConsonant != firstConsonant) { baseConsonant = secondConsonant; } // Populate the output for ( i = beginSyllable ; i < nextSyllable ; i++ ) { // Handle invalid combinartions if ( classTable->isVirama(chars[beginSyllable]) || classTable->isMatra(chars[beginSyllable]) || classTable->isVowelModifier(chars[beginSyllable]) || classTable->isNukta(chars[beginSyllable]) ) { output.writeChar(C_DOTTED_CIRCLE,beginSyllable,basicShapingFormsMask); inv_count++; } output.writeChar(chars[i],i, basicShapingFormsMask); } // Adjust features and set syllable structure bits for ( i = beginSyllable ; i < nextSyllable ; i++ ) { FeatureMask outMask = output.getFeatures(i+inv_count); FeatureMask saveMask = outMask; // Since reph can only validly occur at the beginning of a syllable // We only apply it to the first 2 characters in the syllable, to keep it from // conflicting with other features ( i.e. rkrf ) // TODO : Use the dynamic property for determining isREPH if ( i == beginSyllable && i < baseConsonant && classTable->isReph(chars[i]) && i+1 < nextSyllable && classTable->isVirama(chars[i+1])) { outMask |= rphfFeatureMask; outMask |= rephConsonantMask; output.setFeatures(i+1+inv_count,outMask); } if ( i == baseConsonant ) { outMask |= baseConsonantMask; } if ( classTable->isMatra(chars[i])) { outMask |= matraMask; if ( classTable->hasAboveBaseForm(chars[i])) { outMask |= aboveBasePosition; } else if ( classTable->hasBelowBaseForm(chars[i])) { outMask |= belowBasePosition; } } // Don't apply half form to virama that stands alone at the end of a syllable // to prevent half forms from forming when syllable ends with virama if ( classTable->isVirama(chars[i]) && (i+1 == nextSyllable) ) { outMask ^= halfFeatureMask; if ( classTable->isConsonant(chars[i-1]) ) { FeatureMask tmp = output.getFeatures(i-1+inv_count); tmp ^= halfFeatureMask; output.setFeatures(i-1+inv_count,tmp); } } if ( outMask != saveMask ) { output.setFeatures(i+inv_count,outMask); } } output.decomposeReorderMatras(classTable,beginSyllable,nextSyllable,inv_count); beginSyllable = nextSyllable; } return output.getOutputIndex(); }