U_CAPI void U_EXPORT2 ucm_printTable(UCMTable *table, FILE *f, UBool byUnicode) { UCMapping *m; int32_t i, length; m=table->mappings; length=table->mappingsLength; if(byUnicode) { for(i=0; i<length; ++m, ++i) { ucm_printMapping(table, m, f); } } else { const int32_t *map=table->reverseMap; for(i=0; i<length; ++i) { ucm_printMapping(table, m+map[i], f); } } }
U_CAPI UBool U_EXPORT2 ucm_separateMappings(UCMFile *ucm, UBool isSISO) { UCMTable *table; UCMapping *m, *mLimit; int32_t type; UBool needsMove, isOK; table=ucm->base; m=table->mappings; mLimit=m+table->mappingsLength; needsMove=FALSE; isOK=TRUE; for(; m<mLimit; ++m) { if(isSISO && m->bLen==1 && (m->b.bytes[0]==0xe || m->b.bytes[0]==0xf)) { fprintf(stderr, "warning: removing illegal mapping from an SI/SO-stateful table\n"); ucm_printMapping(table, m, stderr); m->moveFlag|=UCM_REMOVE_MAPPING; needsMove=TRUE; continue; } type=ucm_mappingType( &ucm->states, m, UCM_GET_CODE_POINTS(table, m), UCM_GET_BYTES(table, m)); if(type<0) { /* illegal byte sequence */ printMapping(m, UCM_GET_CODE_POINTS(table, m), UCM_GET_BYTES(table, m), stderr); isOK=FALSE; } else if(type>0) { m->moveFlag|=UCM_MOVE_TO_EXT; needsMove=TRUE; } } if(!isOK) { return FALSE; } if(needsMove) { ucm_moveMappings(ucm->base, ucm->ext); return ucm_checkBaseExt(&ucm->states, ucm->base, ucm->ext, ucm->ext, FALSE); } else { ucm_sortTable(ucm->base); return TRUE; } }
U_CAPI UBool U_EXPORT2 ucm_checkValidity(UCMTable *table, UCMStates *baseStates) { UCMapping *m, *mLimit; int32_t count; UBool isOK; m=table->mappings; mLimit=m+table->mappingsLength; isOK=TRUE; while(m<mLimit) { count=ucm_countChars(baseStates, UCM_GET_BYTES(table, m), m->bLen); if(count<1) { ucm_printMapping(table, m, stderr); isOK=FALSE; } ++m; } return isOK; }
static uint8_t checkBaseExtBytes(UCMStates *baseStates, UCMTable *base, UCMTable *ext, UBool moveToExt, UBool intersectBase) { UCMapping *mb, *me; int32_t *baseMap, *extMap; int32_t b, e, bLimit, eLimit, cmp; uint8_t result; UBool isSISO; baseMap=base->reverseMap; extMap=ext->reverseMap; b=e=0; bLimit=base->mappingsLength; eLimit=ext->mappingsLength; result=0; isSISO=(UBool)(baseStates->outputType==MBCS_OUTPUT_2_SISO); for(;;) { /* skip irrelevant mappings on both sides */ for(;; ++b) { if(b==bLimit) { return result; } mb=base->mappings+baseMap[b]; if(intersectBase==2 && mb->bLen==1) { /* * comparing a base against a DBCS extension: * leave SBCS base mappings alone */ continue; } if(mb->f==0 || mb->f==3) { break; } } for(;;) { if(e==eLimit) { return result; } me=ext->mappings+extMap[e]; if(me->f==0 || me->f==3) { break; } ++e; } /* compare the base and extension mappings */ cmp=compareBytes(base, mb, ext, me, TRUE); if(cmp<0) { if(intersectBase) { /* mapping in base but not in ext, move it */ mb->moveFlag|=UCM_MOVE_TO_EXT; result|=NEEDS_MOVE; /* * does mb map from an input sequence that is a prefix of me's? * for SI/SO tables, a single byte is never a prefix because it * occurs in a separate single-byte state */ } else if( mb->bLen<me->bLen && (!isSISO || mb->bLen>1) && 0==uprv_memcmp(UCM_GET_BYTES(base, mb), UCM_GET_BYTES(ext, me), mb->bLen) ) { if(moveToExt) { /* mark this mapping to be moved to the extension table */ mb->moveFlag|=UCM_MOVE_TO_EXT; result|=NEEDS_MOVE; } else { fprintf(stderr, "ucm error: the base table contains a mapping whose input sequence\n" " is a prefix of the input sequence of an extension mapping\n"); ucm_printMapping(base, mb, stderr); ucm_printMapping(ext, me, stderr); result|=HAS_ERRORS; } } ++b; } else if(cmp==0) { /* * same output: remove the extension mapping, * otherwise treat as an error */ if( mb->f==me->f && mb->uLen==me->uLen && 0==uprv_memcmp(UCM_GET_CODE_POINTS(base, mb), UCM_GET_CODE_POINTS(ext, me), 4*mb->uLen) ) { me->moveFlag|=UCM_REMOVE_MAPPING; result|=NEEDS_MOVE; } else if(intersectBase) { /* mapping in base but not in ext, move it */ mb->moveFlag|=UCM_MOVE_TO_EXT; result|=NEEDS_MOVE; } else { fprintf(stderr, "ucm error: the base table contains a mapping whose input sequence\n" " is the same as the input sequence of an extension mapping\n" " but it maps differently\n"); ucm_printMapping(base, mb, stderr); ucm_printMapping(ext, me, stderr); result|=HAS_ERRORS; } ++b; } else /* cmp>0 */ { ++e; } } }
static uint8_t checkBaseExtUnicode(UCMStates *baseStates, UCMTable *base, UCMTable *ext, UBool moveToExt, UBool intersectBase) { (void)baseStates; UCMapping *mb, *me, *mbLimit, *meLimit; int32_t cmp; uint8_t result; mb=base->mappings; mbLimit=mb+base->mappingsLength; me=ext->mappings; meLimit=me+ext->mappingsLength; result=0; for(;;) { /* skip irrelevant mappings on both sides */ for(;;) { if(mb==mbLimit) { return result; } if((0<=mb->f && mb->f<=2) || mb->f==4) { break; } ++mb; } for(;;) { if(me==meLimit) { return result; } if((0<=me->f && me->f<=2) || me->f==4) { break; } ++me; } /* compare the base and extension mappings */ cmp=compareUnicode(base, mb, ext, me); if(cmp<0) { if(intersectBase && (intersectBase!=2 || mb->bLen>1)) { /* * mapping in base but not in ext, move it * * if ext is DBCS, move DBCS mappings here * and check SBCS ones for Unicode prefix below */ mb->moveFlag|=UCM_MOVE_TO_EXT; result|=NEEDS_MOVE; /* does mb map from an input sequence that is a prefix of me's? */ } else if( mb->uLen<me->uLen && 0==uprv_memcmp(UCM_GET_CODE_POINTS(base, mb), UCM_GET_CODE_POINTS(ext, me), 4*mb->uLen) ) { if(moveToExt) { /* mark this mapping to be moved to the extension table */ mb->moveFlag|=UCM_MOVE_TO_EXT; result|=NEEDS_MOVE; } else { fprintf(stderr, "ucm error: the base table contains a mapping whose input sequence\n" " is a prefix of the input sequence of an extension mapping\n"); ucm_printMapping(base, mb, stderr); ucm_printMapping(ext, me, stderr); result|=HAS_ERRORS; } } ++mb; } else if(cmp==0) { /* * same output: remove the extension mapping, * otherwise treat as an error */ if( mb->f==me->f && mb->bLen==me->bLen && 0==uprv_memcmp(UCM_GET_BYTES(base, mb), UCM_GET_BYTES(ext, me), mb->bLen) ) { me->moveFlag|=UCM_REMOVE_MAPPING; result|=NEEDS_MOVE; } else if(intersectBase) { /* mapping in base but not in ext, move it */ mb->moveFlag|=UCM_MOVE_TO_EXT; result|=NEEDS_MOVE; } else { fprintf(stderr, "ucm error: the base table contains a mapping whose input sequence\n" " is the same as the input sequence of an extension mapping\n" " but it maps differently\n"); ucm_printMapping(base, mb, stderr); ucm_printMapping(ext, me, stderr); result|=HAS_ERRORS; } ++mb; } else /* cmp>0 */ { ++me; } } }
/* * works like generateToUTable(), except that the * output section consists of two arrays, one for input UChars and one * for result values * * also, fromUTable sections are always stored in a compact form for * access via binary search */ static UBool generateFromUTable(CnvExtData *extData, UCMTable *table, int32_t start, int32_t limit, int32_t unitIndex, uint32_t defaultValue) { UCMapping *mappings, *m; int32_t *map; int32_t i, j, uniqueCount, count, subStart, subLimit; UChar *uchars; UChar32 low, high, prev; UChar *sectionUChars; uint32_t *sectionValues; mappings=table->mappings; map=table->reverseMap; /* step 1: examine the input units; set low, high, uniqueCount */ m=mappings+map[start]; uchars=(UChar *)UCM_GET_CODE_POINTS(table, m); low=uchars[unitIndex]; uniqueCount=1; prev=high=low; for(i=start+1; i<limit; ++i) { m=mappings+map[i]; uchars=(UChar *)UCM_GET_CODE_POINTS(table, m); high=uchars[unitIndex]; if(high!=prev) { prev=high; ++uniqueCount; } } /* step 2: allocate the section; set count, section */ /* the fromUTable always stores for access via binary search */ count=uniqueCount; /* allocate the section: 1 entry for the header + count for the items */ sectionUChars=(UChar *)utm_allocN(extData->fromUTableUChars, 1+count); sectionValues=(uint32_t *)utm_allocN(extData->fromUTableValues, 1+count); /* write the section header */ *sectionUChars++=(UChar)count; *sectionValues++=defaultValue; /* step 3: write temporary section table with subsection starts */ prev=low-1; /* just before low to prevent empty subsections before low */ j=0; /* section table index */ for(i=start; i<limit; ++i) { m=mappings+map[i]; uchars=(UChar *)UCM_GET_CODE_POINTS(table, m); high=uchars[unitIndex]; if(high!=prev) { /* start of a new subsection for unit high */ prev=high; /* write the entry with the subsection start */ sectionUChars[j]=(UChar)high; sectionValues[j]=(uint32_t)i; ++j; } } /* assert(j==count) */ /* step 4: recurse and write results */ subLimit=(int32_t)(sectionValues[0]); for(j=0; j<count; ++j) { subStart=subLimit; subLimit= (j+1)<count ? (int32_t)(sectionValues[j+1]) : limit; /* see if there is exactly one input unit sequence of length unitIndex+1 */ defaultValue=0; m=mappings+map[subStart]; if(m->uLen==unitIndex+1) { /* do not include this in generateToUTable() */ ++subStart; if(subStart<subLimit && mappings[map[subStart]].uLen==unitIndex+1) { /* print error for multiple same-input-sequence mappings */ fprintf(stderr, "error: multiple mappings from same Unicode code points\n"); ucm_printMapping(table, m, stderr); ucm_printMapping(table, mappings+map[subStart], stderr); return FALSE; } defaultValue=getFromUBytesValue(extData, table, m); } if(subStart==subLimit) { /* write the result for the input sequence ending here */ sectionValues[j]=defaultValue; } else { /* write the index to the subsection table */ sectionValues[j]=(uint32_t)utm_countItems(extData->fromUTableValues); /* recurse */ if(!generateFromUTable(extData, table, subStart, subLimit, unitIndex+1, defaultValue)) { return FALSE; } } } return TRUE; }
/* * Recursive toUTable generator core function. * Preconditions: * - start<limit (There is at least one mapping.) * - The mappings are sorted lexically. (Access is through the reverseMap.) * - All mappings between start and limit have input sequences that share * the same prefix of unitIndex length, and therefore all of these sequences * are at least unitIndex+1 long. * - There are only relevant mappings available through the reverseMap, * see reduceToUMappings(). * * One function invocation generates one section table. * * Steps: * 1. Count the number of unique unit values and get the low/high unit values * that occur at unitIndex. * 2. Allocate the section table with possible optimization for linear access. * 3. Write temporary version of the section table with start indexes of * subsections, each corresponding to one unit value at unitIndex. * 4. Iterate through the table once more, and depending on the subsection length: * 0: write 0 as a result value (unused byte in linear-access section table) * >0: if there is one mapping with an input unit sequence of unitIndex+1 * then defaultValue=compute the mapping result for this whole sequence * else defaultValue=0 * * recurse into the subsection */ static UBool generateToUTable(CnvExtData *extData, UCMTable *table, int32_t start, int32_t limit, int32_t unitIndex, uint32_t defaultValue) { UCMapping *mappings, *m; int32_t *map; int32_t i, j, uniqueCount, count, subStart, subLimit; uint8_t *bytes; int32_t low, high, prev; uint32_t *section; mappings=table->mappings; map=table->reverseMap; /* step 1: examine the input units; set low, high, uniqueCount */ m=mappings+map[start]; bytes=UCM_GET_BYTES(table, m); low=bytes[unitIndex]; uniqueCount=1; prev=high=low; for(i=start+1; i<limit; ++i) { m=mappings+map[i]; bytes=UCM_GET_BYTES(table, m); high=bytes[unitIndex]; if(high!=prev) { prev=high; ++uniqueCount; } } /* step 2: allocate the section; set count, section */ count=(high-low)+1; if(unitIndex==0 || uniqueCount>=(3*count)/4) { /* * for the root table and for fairly full tables: * allocate for direct, linear array access * by keeping count, to write an entry for each unit value * from low to high */ } else { count=uniqueCount; } /* allocate the section: 1 entry for the header + count for the items */ section=(uint32_t *)utm_allocN(extData->toUTable, 1+count); /* write the section header */ *section++=((uint32_t)count<<UCNV_EXT_TO_U_BYTE_SHIFT)|defaultValue; /* step 3: write temporary section table with subsection starts */ prev=low-1; /* just before low to prevent empty subsections before low */ j=0; /* section table index */ for(i=start; i<limit; ++i) { m=mappings+map[i]; bytes=UCM_GET_BYTES(table, m); high=bytes[unitIndex]; if(high!=prev) { /* start of a new subsection for unit high */ if(count>uniqueCount) { /* write empty subsections for unused units in a linear table */ while(++prev<high) { section[j++]=((uint32_t)prev<<UCNV_EXT_TO_U_BYTE_SHIFT)|(uint32_t)i; } } else { prev=high; } /* write the entry with the subsection start */ section[j++]=((uint32_t)high<<UCNV_EXT_TO_U_BYTE_SHIFT)|(uint32_t)i; } } /* assert(j==count) */ /* step 4: recurse and write results */ subLimit=UCNV_EXT_TO_U_GET_VALUE(section[0]); for(j=0; j<count; ++j) { subStart=subLimit; subLimit= (j+1)<count ? UCNV_EXT_TO_U_GET_VALUE(section[j+1]) : limit; /* remove the subStart temporary value */ section[j]&=~UCNV_EXT_TO_U_VALUE_MASK; if(subStart==subLimit) { /* leave the value zero: empty subsection for unused unit in a linear table */ continue; } /* see if there is exactly one input unit sequence of length unitIndex+1 */ defaultValue=0; m=mappings+map[subStart]; if(m->bLen==unitIndex+1) { /* do not include this in generateToUTable() */ ++subStart; if(subStart<subLimit && mappings[map[subStart]].bLen==unitIndex+1) { /* print error for multiple same-input-sequence mappings */ fprintf(stderr, "error: multiple mappings from same bytes\n"); ucm_printMapping(table, m, stderr); ucm_printMapping(table, mappings+map[subStart], stderr); return FALSE; } defaultValue=getToUnicodeValue(extData, table, m); } if(subStart==subLimit) { /* write the result for the input sequence ending here */ section[j]|=defaultValue; } else { /* write the index to the subsection table */ section[j]|=(uint32_t)utm_countItems(extData->toUTable); /* recurse */ if(!generateToUTable(extData, table, subStart, subLimit, unitIndex+1, defaultValue)) { return FALSE; } } } return TRUE; }