U_CAPI UTrie2 * U_EXPORT2 upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode) { UPVecToUTrie2Context toUTrie2={ NULL }; upvec_compact(pv, upvec_compactToUTrie2Handler, &toUTrie2, pErrorCode); utrie2_freeze(toUTrie2.trie, UTRIE2_16_VALUE_BITS, pErrorCode); if(U_FAILURE(*pErrorCode)) { utrie2_close(toUTrie2.trie); toUTrie2.trie=NULL; } return toUTrie2.trie; }
U_CFUNC void generateAdditionalProperties(char *filename, const char *suffix, UErrorCode *pErrorCode) { char *basename; basename=filename+uprv_strlen(filename); /* process various UCD .txt files */ /* add Han numeric types & values */ parseMultiFieldFile(filename, basename, "DerivedNumericValues", suffix, 2, numericLineFn, pErrorCode); parseTwoFieldFile(filename, basename, "DerivedAge", suffix, ageLineFn, pErrorCode); /* * UTR 24 says: * Section 2: * "Common - For characters that may be used * within multiple scripts, * or any unassigned code points." * * Section 4: * "The value COMMON is the default value, * given to all code points that are not * explicitly mentioned in the data file." * * COMMON==USCRIPT_COMMON==0 - nothing to do */ parseSingleEnumFile(filename, basename, suffix, &scriptSingleEnum, pErrorCode); parseSingleEnumFile(filename, basename, suffix, &blockSingleEnum, pErrorCode); parseBinariesFile(filename, basename, suffix, &propListBinaries, pErrorCode); parseBinariesFile(filename, basename, suffix, &derCorePropsBinaries, pErrorCode); parseSingleEnumFile(filename, basename, suffix, &graphemeClusterBreakSingleEnum, pErrorCode); parseSingleEnumFile(filename, basename, suffix, &wordBreakSingleEnum, pErrorCode); parseSingleEnumFile(filename, basename, suffix, &sentenceBreakSingleEnum, pErrorCode); /* * LineBreak-4.0.0.txt: * - All code points, assigned and unassigned, that are not listed * explicitly are given the value "XX". * * XX==U_LB_UNKNOWN==0 - nothing to do */ parseSingleEnumFile(filename, basename, suffix, &lineBreakSingleEnum, pErrorCode); /* * Preset East Asian Width defaults: * * http://www.unicode.org/reports/tr11/#Unassigned * 7.1 Unassigned and Private Use characters * * All unassigned characters are by default classified as non-East Asian neutral, * except for the range U+20000 to U+2FFFD, * since all code positions from U+20000 to U+2FFFD are intended for CJK ideographs (W). * All Private use characters are by default classified as ambiguous, * since their definition depends on context. * * N for all ==0 - nothing to do * A for Private Use * W for plane 2 */ *pErrorCode=U_ZERO_ERROR; upvec_setValue(pv, 0xe000, 0xf8ff, 0, (uint32_t)(U_EA_AMBIGUOUS<<UPROPS_EA_SHIFT), UPROPS_EA_MASK, pErrorCode); upvec_setValue(pv, 0xf0000, 0xffffd, 0, (uint32_t)(U_EA_AMBIGUOUS<<UPROPS_EA_SHIFT), UPROPS_EA_MASK, pErrorCode); upvec_setValue(pv, 0x100000, 0x10fffd, 0, (uint32_t)(U_EA_AMBIGUOUS<<UPROPS_EA_SHIFT), UPROPS_EA_MASK, pErrorCode); upvec_setValue(pv, 0x20000, 0x2fffd, 0, (uint32_t)(U_EA_WIDE<<UPROPS_EA_SHIFT), UPROPS_EA_MASK, pErrorCode); if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "genprops: unable to set default East Asian Widths: %s\n", u_errorName(*pErrorCode)); exit(*pErrorCode); } /* parse EastAsianWidth.txt */ parseSingleEnumFile(filename, basename, suffix, &eawSingleEnum, pErrorCode); { UPVecToUTrieContext toUTrie={ NULL, 50000 /* capacity */, 0, TRUE /* latin1Linear */ }; upvec_compact(pv, upvec_compactToUTrieHandler, &toUTrie, pErrorCode); if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "genprops error: unable to build trie for additional properties: %s\n", u_errorName(*pErrorCode)); exit(*pErrorCode); } newTrie=toUTrie.newTrie; } }