static char * allocString(StringBlock *block, const char *s, int32_t length) { uint32_t top; char *p; if(length<0) { length=(int32_t)uprv_strlen(s); } /* * add 1 for the terminating NUL * and round up (+1 &~1) * to keep the addresses on a 16-bit boundary */ top=block->top + (uint32_t)((length + 1 + 1) & ~1); if(top >= block->max) { fprintf(stderr, "%s:%d: error: out of memory\n", path, lineNum); exit(U_MEMORY_ALLOCATION_ERROR); } /* get the pointer and copy the string */ p = block->store + block->top; uprv_memcpy(p, s, length); p[length] = 0; /* NUL-terminate it */ if((length & 1) == 0) { p[length + 1] = 0; /* set the padding byte */ } /* check for invariant characters now that we have a NUL-terminated string for easy output */ if(!uprv_isInvariantString(p, length)) { fprintf(stderr, "%s:%d: error: the name %s contains not just invariant characters\n", path, lineNum, p); exit(U_INVALID_TABLE_FORMAT); } block->top = top; return p; }
/* test invariant-character handling */ static void TestInvariant() { /* all invariant graphic chars and some control codes (not \n!) */ const char invariantChars[]= "\t\r \"%&'()*+,-./" "0123456789:;<=>?" "ABCDEFGHIJKLMNOPQRSTUVWXYZ_" "abcdefghijklmnopqrstuvwxyz"; const UChar invariantUChars[]={ 9, 0xd, 0x20, 0x22, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5f, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0 }; const char variantChars[]="\n!#$@[\\]^`{|}~"; const UChar variantUChars[]={ 0x0a, 0x21, 0x23, 0x24, 0x40, 0x5b, 0x5c, 0x5d, 0x5e, 0x60, 0x7b, 0x7c, 0x7d, 0x7e, 0 }; const UChar nonASCIIUChars[]={ 0x80, 0xa0, 0x900, 0xff51 }; UChar us[120]; char cs[120]; int32_t i, length; /* make sure that all invariant characters convert both ways */ length=sizeof(invariantChars); u_charsToUChars(invariantChars, us, length); if(u_strcmp(us, invariantUChars)!=0) { log_err("u_charsToUChars(invariantChars) failed\n"); } u_UCharsToChars(invariantUChars, cs, length); if(strcmp(cs, invariantChars)!=0) { log_err("u_UCharsToChars(invariantUChars) failed\n"); } /* * make sure that variant characters convert from source code literals to Unicode * but not back to char * */ length=sizeof(variantChars); u_charsToUChars(variantChars, us, length); if(u_strcmp(us, variantUChars)!=0) { log_err("u_charsToUChars(variantChars) failed\n"); } #ifdef NDEBUG /* * Test u_UCharsToChars(variantUChars) only in release mode because it will * cause an assertion failure in debug builds. */ u_UCharsToChars(variantUChars, cs, length); for(i=0; i<length; ++i) { if(cs[i]!=0) { log_err("u_UCharsToChars(variantUChars) converted the %d-th character to %02x instead of 00\n", i, cs[i]); } } #endif /* * Verify that invariant characters roundtrip from Unicode to the * default converter and back. */ { UConverter *cnv; UErrorCode errorCode; errorCode=U_ZERO_ERROR; cnv=ucnv_open(NULL, &errorCode); if(U_FAILURE(errorCode)) { log_err("unable to open the default converter\n"); } else { length=ucnv_fromUChars(cnv, cs, sizeof(cs), invariantUChars, -1, &errorCode); if(U_FAILURE(errorCode)) { log_err("ucnv_fromUChars(invariantUChars) failed - %s\n", u_errorName(errorCode)); } else if(length!=sizeof(invariantChars)-1 || strcmp(cs, invariantChars)!=0) { log_err("ucnv_fromUChars(invariantUChars) failed\n"); } errorCode=U_ZERO_ERROR; length=ucnv_toUChars(cnv, us, LENGTHOF(us), invariantChars, -1, &errorCode); if(U_FAILURE(errorCode)) { log_err("ucnv_toUChars(invariantChars) failed - %s\n", u_errorName(errorCode)); } else if(length!=LENGTHOF(invariantUChars)-1 || u_strcmp(us, invariantUChars)!=0) { log_err("ucnv_toUChars(invariantChars) failed\n"); } ucnv_close(cnv); } } /* API tests */ if(!uprv_isInvariantString(invariantChars, -1)) { log_err("uprv_isInvariantString(invariantChars) failed\n"); } if(!uprv_isInvariantUString(invariantUChars, -1)) { log_err("uprv_isInvariantUString(invariantUChars) failed\n"); } if(!uprv_isInvariantString(invariantChars+strlen(invariantChars), 1)) { log_err("uprv_isInvariantString(\"\\0\") failed\n"); } for(i=0; i<(sizeof(variantChars)-1); ++i) { if(uprv_isInvariantString(variantChars+i, 1)) { log_err("uprv_isInvariantString(variantChars[%d]) failed\n", i); } if(uprv_isInvariantUString(variantUChars+i, 1)) { log_err("uprv_isInvariantUString(variantUChars[%d]) failed\n", i); } } for(i=0; i<LENGTHOF(nonASCIIUChars); ++i) { if(uprv_isInvariantUString(nonASCIIUChars+i, 1)) { log_err("uprv_isInvariantUString(nonASCIIUChars[%d]) failed\n", i); } } }
int main(int argc, char* argv[]) { ConvData data; UErrorCode err = U_ZERO_ERROR, localError; char outFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH]; const char* destdir, *arg; size_t destdirlen; char* dot = NULL, *outBasename; char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH]; char cnvNameWithPkg[UCNV_MAX_FULL_FILE_NAME_LENGTH]; UVersionInfo icuVersion; UBool printFilename; err = U_ZERO_ERROR; U_MAIN_INIT_ARGS(argc, argv); /* Set up the ICU version number */ u_getVersion(icuVersion); uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo)); /* preset then read command line options */ options[OPT_DESTDIR].value=u_getDataDirectory(); argc=u_parseArgs(argc, argv, LENGTHOF(options), options); /* error handling, printing usage message */ if(argc<0) { fprintf(stderr, "error in command line argument \"%s\"\n", argv[-argc]); } else if(argc<2) { argc=-1; } if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) { FILE *stdfile=argc<0 ? stderr : stdout; fprintf(stdfile, "usage: %s [-options] files...\n" "\tread .ucm codepage mapping files and write .cnv files\n" "options:\n" "\t-h or -? or --help this usage text\n" "\t-V or --version show a version message\n" "\t-c or --copyright include a copyright notice\n" "\t-d or --destdir destination directory, followed by the path\n" "\t-v or --verbose Turn on verbose output\n", argv[0]); fprintf(stdfile, "\t --small Generate smaller .cnv files. They will be\n" "\t significantly smaller but may not be compatible with\n" "\t older versions of ICU and will require heap memory\n" "\t allocation when loaded.\n" "\t --ignore-siso-check Use SI/SO other than 0xf/0xe.\n"); return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; } if(options[OPT_VERSION].doesOccur) { printf("makeconv version %hu.%hu, ICU tool to read .ucm codepage mapping files and write .cnv files\n", dataInfo.formatVersion[0], dataInfo.formatVersion[1]); printf("%s\n", U_COPYRIGHT_STRING); exit(0); } /* get the options values */ haveCopyright = options[OPT_COPYRIGHT].doesOccur; destdir = options[OPT_DESTDIR].value; VERBOSE = options[OPT_VERBOSE].doesOccur; SMALL = options[OPT_SMALL].doesOccur; if (options[OPT_IGNORE_SISO_CHECK].doesOccur) { IGNORE_SISO_CHECK = TRUE; } if (destdir != NULL && *destdir != 0) { uprv_strcpy(outFileName, destdir); destdirlen = uprv_strlen(destdir); outBasename = outFileName + destdirlen; if (*(outBasename - 1) != U_FILE_SEP_CHAR) { *outBasename++ = U_FILE_SEP_CHAR; ++destdirlen; } } else { destdirlen = 0; outBasename = outFileName; } #if DEBUG { int i; printf("makeconv: processing %d files...\n", argc - 1); for(i=1; i<argc; ++i) { printf("%s ", argv[i]); } printf("\n"); fflush(stdout); } #endif err = U_ZERO_ERROR; printFilename = (UBool) (argc > 2 || VERBOSE); for (++argv; --argc; ++argv) { arg = getLongPathname(*argv); /* Check for potential buffer overflow */ if(strlen(arg) > UCNV_MAX_FULL_FILE_NAME_LENGTH) { fprintf(stderr, "%s\n", u_errorName(U_BUFFER_OVERFLOW_ERROR)); return U_BUFFER_OVERFLOW_ERROR; } /*produces the right destination path for display*/ if (destdirlen != 0) { const char *basename; /* find the last file sepator */ basename = findBasename(arg); uprv_strcpy(outBasename, basename); } else { uprv_strcpy(outFileName, arg); } /*removes the extension if any is found*/ dot = uprv_strrchr(outBasename, '.'); if (dot) { *dot = '\0'; } /* the basename without extension is the converter name */ uprv_strcpy(cnvName, outBasename); /*Adds the target extension*/ uprv_strcat(outBasename, CONVERTER_FILE_EXTENSION); #if DEBUG printf("makeconv: processing %s ...\n", arg); fflush(stdout); #endif localError = U_ZERO_ERROR; initConvData(&data); createConverter(&data, arg, &localError); if (U_FAILURE(localError)) { /* if an error is found, print out an error msg and keep going */ fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n", outFileName, arg, u_errorName(localError)); if(U_SUCCESS(err)) { err = localError; } } else { /* Insure the static data name matches the file name */ /* Changed to ignore directory and only compare base name LDH 1/2/08*/ char *p; p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator */ if(p == NULL) /* OK, try alternate */ { p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR); if(p == NULL) { p=cnvName; /* If no separators, no problem */ } } else { p++; /* If found separtor, don't include it in compare */ } if(uprv_stricmp(p,data.staticData.name)) { fprintf(stderr, "Warning: %s%s claims to be '%s'\n", cnvName, CONVERTER_FILE_EXTENSION, data.staticData.name); } uprv_strcpy((char*)data.staticData.name, cnvName); if(!uprv_isInvariantString((char*)data.staticData.name, -1)) { fprintf(stderr, "Error: A converter name must contain only invariant characters.\n" "%s is not a valid converter name.\n", data.staticData.name); if(U_SUCCESS(err)) { err = U_INVALID_TABLE_FORMAT; } } uprv_strcpy(cnvNameWithPkg, cnvName); localError = U_ZERO_ERROR; writeConverterData(&data, cnvNameWithPkg, destdir, &localError); if(U_FAILURE(localError)) { /* if an error is found, print out an error msg and keep going*/ fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName, arg, u_errorName(localError)); if(U_SUCCESS(err)) { err = localError; } } else if (printFilename) { puts(outBasename); } } fflush(stdout); fflush(stderr); cleanupConvData(&data); } return err; }