U_CAPI UBool U_EXPORT2 ucm_checkBaseExt(UCMStates *baseStates, UCMTable *base, UCMTable *ext, UCMTable *moveTarget, UBool intersectBase) { uint8_t result; /* if we have an extension table, we must always use precision flags */ if(base->flagsType&UCM_FLAGS_IMPLICIT) { fprintf(stderr, "ucm error: the base table contains mappings without precision flags\n"); return FALSE; } if(ext->flagsType&UCM_FLAGS_IMPLICIT) { fprintf(stderr, "ucm error: extension table contains mappings without precision flags\n"); return FALSE; } /* checking requires both tables to be sorted */ ucm_sortTable(base); ucm_sortTable(ext); /* check */ result= checkBaseExtUnicode(baseStates, base, ext, (UBool)(moveTarget!=NULL), intersectBase)| checkBaseExtBytes(baseStates, base, ext, (UBool)(moveTarget!=NULL), intersectBase); if(result&HAS_ERRORS) { return FALSE; } if(result&NEEDS_MOVE) { ucm_moveMappings(ext, NULL); ucm_moveMappings(base, moveTarget); ucm_sortTable(base); ucm_sortTable(ext); if(moveTarget!=NULL) { ucm_sortTable(moveTarget); } } return TRUE; }
U_CAPI UBool U_EXPORT2 ucm_separateMappings(UCMFile *ucm, UBool isSISO) { UCMTable *table; UCMapping *m, *mLimit; int32_t type; UBool needsMove, isOK; table=ucm->base; m=table->mappings; mLimit=m+table->mappingsLength; needsMove=FALSE; isOK=TRUE; for(; m<mLimit; ++m) { if(isSISO && m->bLen==1 && (m->b.bytes[0]==0xe || m->b.bytes[0]==0xf)) { fprintf(stderr, "warning: removing illegal mapping from an SI/SO-stateful table\n"); ucm_printMapping(table, m, stderr); m->moveFlag|=UCM_REMOVE_MAPPING; needsMove=TRUE; continue; } type=ucm_mappingType( &ucm->states, m, UCM_GET_CODE_POINTS(table, m), UCM_GET_BYTES(table, m)); if(type<0) { /* illegal byte sequence */ printMapping(m, UCM_GET_CODE_POINTS(table, m), UCM_GET_BYTES(table, m), stderr); isOK=FALSE; } else if(type>0) { m->moveFlag|=UCM_MOVE_TO_EXT; needsMove=TRUE; } } if(!isOK) { return FALSE; } if(needsMove) { ucm_moveMappings(ucm->base, ucm->ext); return ucm_checkBaseExt(&ucm->states, ucm->base, ucm->ext, ucm->ext, FALSE); } else { ucm_sortTable(ucm->base); return TRUE; } }
static void createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) { ConvData baseData; UBool dataIsBase; UConverterStaticData *staticData; UCMStates *states, *baseStates; if(U_FAILURE(*pErrorCode)) { return; } initConvData(data); dataIsBase=readFile(data, converterName, pErrorCode); if(U_FAILURE(*pErrorCode)) { return; } staticData=&data->staticData; states=&data->ucm->states; if(dataIsBase) { /* * Build a normal .cnv file with a base table * and an optional extension table. */ data->cnvData=MBCSOpen(data->ucm); if(data->cnvData==NULL) { *pErrorCode=U_MEMORY_ALLOCATION_ERROR; } else if(!data->cnvData->isValid(data->cnvData, staticData->subChar, staticData->subCharLen) ) { fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n"); *pErrorCode=U_INVALID_TABLE_FORMAT; } else if(staticData->subChar1!=0 && !data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1) ) { fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n"); *pErrorCode=U_INVALID_TABLE_FORMAT; } else if( data->ucm->ext->mappingsLength>0 && !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, FALSE) ) { *pErrorCode=U_INVALID_TABLE_FORMAT; } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) { /* sort the table so that it can be turned into UTF-8-friendly data */ ucm_sortTable(data->ucm->base); } if(U_SUCCESS(*pErrorCode)) { if( /* add the base table after ucm_checkBaseExt()! */ !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData) ) { *pErrorCode=U_INVALID_TABLE_FORMAT; } else { /* * addTable() may have requested moving more mappings to the extension table * if they fit into the base toUnicode table but not into the * base fromUnicode table. * (Especially for UTF-8-friendly fromUnicode tables.) * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them * to be excluded from the extension toUnicode data. * See MBCSOkForBaseFromUnicode() for which mappings do not fit into * the base fromUnicode table. */ ucm_moveMappings(data->ucm->base, data->ucm->ext); ucm_sortTable(data->ucm->ext); if(data->ucm->ext->mappingsLength>0) { /* prepare the extension table, if there is one */ data->extData=CnvExtOpen(data->ucm); if(data->extData==NULL) { *pErrorCode=U_MEMORY_ALLOCATION_ERROR; } else if( !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData) ) { *pErrorCode=U_INVALID_TABLE_FORMAT; } } } } } else { /* Build an extension-only .cnv file. */ char baseFilename[500]; char *basename; initConvData(&baseData); /* assemble a path/filename for data->ucm->baseName */ uprv_strcpy(baseFilename, converterName); basename=(char *)findBasename(baseFilename); uprv_strcpy(basename, data->ucm->baseName); uprv_strcat(basename, ".ucm"); /* read the base table */ dataIsBase=readFile(&baseData, baseFilename, pErrorCode); if(U_FAILURE(*pErrorCode)) { return; } else if(!dataIsBase) { fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename); *pErrorCode=U_INVALID_TABLE_FORMAT; } else { /* prepare the extension table */ data->extData=CnvExtOpen(data->ucm); if(data->extData==NULL) { *pErrorCode=U_MEMORY_ALLOCATION_ERROR; } else { /* fill in gaps in extension file header fields */ UCMapping *m, *mLimit; uint8_t fallbackFlags; baseStates=&baseData.ucm->states; if(states->conversionType==UCNV_DBCS) { staticData->minBytesPerChar=(int8_t)(states->minCharLength=2); } else if(states->minCharLength==0) { staticData->minBytesPerChar=(int8_t)(states->minCharLength=baseStates->minCharLength); } if(states->maxCharLength<states->minCharLength) { staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=baseStates->maxCharLength); } if(staticData->subCharLen==0) { uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4); staticData->subCharLen=baseData.staticData.subCharLen; } /* * do not copy subChar1 - * only use what is explicitly specified * because it cannot be unset in the extension file header */ /* get the fallback flags */ fallbackFlags=0; for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength; m<mLimit && fallbackFlags!=3; ++m ) { if(m->f==1) { fallbackFlags|=1; } else if(m->f==3) { fallbackFlags|=2; } } if(fallbackFlags&1) { staticData->hasFromUnicodeFallback=TRUE; } if(fallbackFlags&2) { staticData->hasToUnicodeFallback=TRUE; } if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) { fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n"); *pErrorCode=U_INVALID_TABLE_FORMAT; } else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) { fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n"); *pErrorCode=U_INVALID_TABLE_FORMAT; } else if( !ucm_checkValidity(data->ucm->ext, baseStates) || !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, FALSE) ) { *pErrorCode=U_INVALID_TABLE_FORMAT; } else { if(states->maxCharLength>1) { /* * When building a normal .cnv file with a base table * for an MBCS (not SBCS) table with explicit precision flags, * the MBCSAddTable() function marks some mappings for moving * to the extension table. * They fit into the base toUnicode table but not into the * base fromUnicode table. * (Note: We do have explicit precision flags because they are * required for extension table generation, and * ucm_checkBaseExt() verified it.) * * We do not call MBCSAddTable() here (we probably could) * so we need to do the analysis before building the extension table. * We assume that MBCSAddTable() will build a UTF-8-friendly table. * Redundant mappings in the extension table are ok except they cost some size. * * Do this after ucm_checkBaseExt(). */ const MBCSData *mbcsData=MBCSGetDummy(); int32_t needsMove=0; for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength; m<mLimit; ++m ) { if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, m->u, m->f)) { m->f|=MBCS_FROM_U_EXT_FLAG; m->moveFlag=UCM_MOVE_TO_EXT; ++needsMove; } } if(needsMove!=0) { ucm_moveMappings(baseData.ucm->base, data->ucm->ext); ucm_sortTable(data->ucm->ext); } } if(!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)) { *pErrorCode=U_INVALID_TABLE_FORMAT; } } } } cleanupConvData(&baseData); } }