/* parse a mapping line; must not be empty */ U_CAPI UBool U_EXPORT2 ucm_parseMappingLine(UCMapping *m, UChar32 codePoints[UCNV_EXT_MAX_UCHARS], uint8_t bytes[UCNV_EXT_MAX_BYTES], const char *line) { const char *s; char *end; UChar32 cp; int32_t u16Length; int8_t uLen, bLen, f; s=line; uLen=bLen=0; /* parse code points */ for(;;) { /* skip an optional plus sign */ if(uLen>0 && *s=='+') { ++s; } if(*s!='<') { break; } if( s[1]!='U' || (cp=(UChar32)uprv_strtoul(s+2, &end, 16), end)==s+2 || *end!='>' ) { fprintf(stderr, "ucm error: Unicode code point must be formatted as <UXXXX> (1..6 hex digits) - \"%s\"\n", line); return FALSE; } if((uint32_t)cp>0x10ffff || U_IS_SURROGATE(cp)) { fprintf(stderr, "ucm error: Unicode code point must be 0..d7ff or e000..10ffff - \"%s\"\n", line); return FALSE; } if(uLen==UCNV_EXT_MAX_UCHARS) { fprintf(stderr, "ucm error: too many code points on \"%s\"\n", line); return FALSE; } codePoints[uLen++]=cp; s=end+1; } if(uLen==0) { fprintf(stderr, "ucm error: no Unicode code points on \"%s\"\n", line); return FALSE; } else if(uLen==1) { m->u=codePoints[0]; } else { UErrorCode errorCode=U_ZERO_ERROR; u_strFromUTF32(NULL, 0, &u16Length, codePoints, uLen, &errorCode); if( (U_FAILURE(errorCode) && errorCode!=U_BUFFER_OVERFLOW_ERROR) || u16Length>UCNV_EXT_MAX_UCHARS ) { fprintf(stderr, "ucm error: too many UChars on \"%s\"\n", line); return FALSE; } } s=u_skipWhitespace(s); /* parse bytes */ bLen=ucm_parseBytes(bytes, line, &s); if(bLen<0) { return FALSE; } else if(bLen==0) { fprintf(stderr, "ucm error: no bytes on \"%s\"\n", line); return FALSE; } else if(bLen<=4) { uprv_memcpy(m->b.bytes, bytes, bLen); } /* skip everything until the fallback indicator, even the start of a comment */ for(;;) { if(*s==0) { f=-1; /* no fallback indicator */ break; } else if(*s=='|') { f=(int8_t)(s[1]-'0'); if((uint8_t)f>4) { fprintf(stderr, "ucm error: fallback indicator must be |0..|4 - \"%s\"\n", line); return FALSE; } break; } ++s; } m->uLen=uLen; m->bLen=bLen; m->f=f; return TRUE; }
static void readHeader(ConvData *data, FileStream* convFile, const char* converterName, UErrorCode *pErrorCode) { char line[1024]; char *s, *key, *value; const UConverterStaticData *prototype; UConverterStaticData *staticData; if(U_FAILURE(*pErrorCode)) { return; } staticData=&data->staticData; staticData->platform=UCNV_IBM; staticData->subCharLen=0; while(T_FileStream_readLine(convFile, line, sizeof(line))) { /* basic parsing and handling of state-related items */ if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) { continue; } /* stop at the beginning of the mapping section */ if(uprv_strcmp(line, "CHARMAP")==0) { break; } /* collect the information from the header field, ignore unknown keys */ if(uprv_strcmp(key, "code_set_name")==0) { if(*value!=0) { uprv_strcpy((char *)staticData->name, value); getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage); } } else if(uprv_strcmp(key, "subchar")==0) { uint8_t bytes[UCNV_EXT_MAX_BYTES]; int8_t length; s=value; length=ucm_parseBytes(bytes, line, (const char **)&s); if(1<=length && length<=4 && *s==0) { staticData->subCharLen=length; uprv_memcpy(staticData->subChar, bytes, length); } else { fprintf(stderr, "error: illegal <subchar> %s\n", value); *pErrorCode=U_INVALID_TABLE_FORMAT; return; } } else if(uprv_strcmp(key, "subchar1")==0) { uint8_t bytes[UCNV_EXT_MAX_BYTES]; s=value; if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) { staticData->subChar1=bytes[0]; } else { fprintf(stderr, "error: illegal <subchar1> %s\n", value); *pErrorCode=U_INVALID_TABLE_FORMAT; return; } } } /* copy values from the UCMFile to the static data */ staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength; staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength; staticData->conversionType=data->ucm->states.conversionType; if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) { fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n"); *pErrorCode=U_INVALID_TABLE_FORMAT; return; } /* * Now that we know the type, copy any 'default' values from the table. * We need not check the type any further because the parser only * recognizes what we have prototypes for. * * For delta (extension-only) tables, copy values from the base file * instead, see createConverter(). */ if(data->ucm->baseName[0]==0) { prototype=ucnv_converterStaticData[staticData->conversionType]; if(prototype!=NULL) { if(staticData->name[0]==0) { uprv_strcpy((char *)staticData->name, prototype->name); } if(staticData->codepage==0) { staticData->codepage=prototype->codepage; } if(staticData->platform==0) { staticData->platform=prototype->platform; } if(staticData->minBytesPerChar==0) { staticData->minBytesPerChar=prototype->minBytesPerChar; } if(staticData->maxBytesPerChar==0) { staticData->maxBytesPerChar=prototype->maxBytesPerChar; } if(staticData->subCharLen==0) { staticData->subCharLen=prototype->subCharLen; if(prototype->subCharLen>0) { uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen); } } } } if(data->ucm->states.outputType<0) { data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1; } if( staticData->subChar1!=0 && (staticData->minBytesPerChar>1 || (staticData->conversionType!=UCNV_MBCS && staticData->conversionType!=UCNV_EBCDIC_STATEFUL)) ) { fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n"); *pErrorCode=U_INVALID_TABLE_FORMAT; } }