static void
createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) {
    ConvData baseData;
    UBool dataIsBase;

    UConverterStaticData *staticData;
    UCMStates *states, *baseStates;

    if(U_FAILURE(*pErrorCode)) {
        return;
    }

    initConvData(data);

    dataIsBase=readFile(data, converterName, pErrorCode);
    if(U_FAILURE(*pErrorCode)) {
        return;
    }

    staticData=&data->staticData;
    states=&data->ucm->states;

    if(dataIsBase) {
        /*
         * Build a normal .cnv file with a base table
         * and an optional extension table.
         */
        data->cnvData=MBCSOpen(data->ucm);
        if(data->cnvData==NULL) {
            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;

        } else if(!data->cnvData->isValid(data->cnvData,
                            staticData->subChar, staticData->subCharLen)
        ) {
            fprintf(stderr, "       the substitution character byte sequence is illegal in this codepage structure!\n");
            *pErrorCode=U_INVALID_TABLE_FORMAT;

        } else if(staticData->subChar1!=0 &&
                    !data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1)
        ) {
            fprintf(stderr, "       the subchar1 byte is illegal in this codepage structure!\n");
            *pErrorCode=U_INVALID_TABLE_FORMAT;

        } else if(
            data->ucm->ext->mappingsLength>0 &&
            !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
        ) {
            *pErrorCode=U_INVALID_TABLE_FORMAT;
        } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) {
            /* sort the table so that it can be turned into UTF-8-friendly data */
            ucm_sortTable(data->ucm->base);
        }

        if(U_SUCCESS(*pErrorCode)) {
            if(
                /* add the base table after ucm_checkBaseExt()! */
                !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData)
            ) {
                *pErrorCode=U_INVALID_TABLE_FORMAT;
            } else {
                /*
                 * addTable() may have requested moving more mappings to the extension table
                 * if they fit into the base toUnicode table but not into the
                 * base fromUnicode table.
                 * (Especially for UTF-8-friendly fromUnicode tables.)
                 * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them
                 * to be excluded from the extension toUnicode data.
                 * See MBCSOkForBaseFromUnicode() for which mappings do not fit into
                 * the base fromUnicode table.
                 */
                ucm_moveMappings(data->ucm->base, data->ucm->ext);
                ucm_sortTable(data->ucm->ext);
                if(data->ucm->ext->mappingsLength>0) {
                    /* prepare the extension table, if there is one */
                    data->extData=CnvExtOpen(data->ucm);
                    if(data->extData==NULL) {
                        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
                    } else if(
                        !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)
                    ) {
                        *pErrorCode=U_INVALID_TABLE_FORMAT;
                    }
                }
            }
        }
    } else {
        /* Build an extension-only .cnv file. */
        char baseFilename[500];
        char *basename;

        initConvData(&baseData);

        /* assemble a path/filename for data->ucm->baseName */
        uprv_strcpy(baseFilename, converterName);
        basename=(char *)findBasename(baseFilename);
        uprv_strcpy(basename, data->ucm->baseName);
        uprv_strcat(basename, ".ucm");

        /* read the base table */
        dataIsBase=readFile(&baseData, baseFilename, pErrorCode);
        if(U_FAILURE(*pErrorCode)) {
            return;
        } else if(!dataIsBase) {
            fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename);
            *pErrorCode=U_INVALID_TABLE_FORMAT;
        } else {
            /* prepare the extension table */
            data->extData=CnvExtOpen(data->ucm);
            if(data->extData==NULL) {
                *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
            } else {
                /* fill in gaps in extension file header fields */
                UCMapping *m, *mLimit;
                uint8_t fallbackFlags;

                baseStates=&baseData.ucm->states;
                if(states->conversionType==UCNV_DBCS) {
                    staticData->minBytesPerChar=(int8_t)(states->minCharLength=2);
                } else if(states->minCharLength==0) {
                    staticData->minBytesPerChar=(int8_t)(states->minCharLength=baseStates->minCharLength);
                }
                if(states->maxCharLength<states->minCharLength) {
                    staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=baseStates->maxCharLength);
                }

                if(staticData->subCharLen==0) {
                    uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4);
                    staticData->subCharLen=baseData.staticData.subCharLen;
                }
                /*
                 * do not copy subChar1 -
                 * only use what is explicitly specified
                 * because it cannot be unset in the extension file header
                 */

                /* get the fallback flags */
                fallbackFlags=0;
                for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
                    m<mLimit && fallbackFlags!=3;
                    ++m
                ) {
                    if(m->f==1) {
                        fallbackFlags|=1;
                    } else if(m->f==3) {
                        fallbackFlags|=2;
                    }
                }

                if(fallbackFlags&1) {
                    staticData->hasFromUnicodeFallback=TRUE;
                }
                if(fallbackFlags&2) {
                    staticData->hasToUnicodeFallback=TRUE;
                }

                if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) {
                    fprintf(stderr, "       the substitution character byte sequence is illegal in this codepage structure!\n");
                    *pErrorCode=U_INVALID_TABLE_FORMAT;

                } else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) {
                    fprintf(stderr, "       the subchar1 byte is illegal in this codepage structure!\n");
                    *pErrorCode=U_INVALID_TABLE_FORMAT;

                } else if(
                    !ucm_checkValidity(data->ucm->ext, baseStates) ||
                    !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
                ) {
                    *pErrorCode=U_INVALID_TABLE_FORMAT;
                } else {
                    if(states->maxCharLength>1) {
                        /*
                         * When building a normal .cnv file with a base table
                         * for an MBCS (not SBCS) table with explicit precision flags,
                         * the MBCSAddTable() function marks some mappings for moving
                         * to the extension table.
                         * They fit into the base toUnicode table but not into the
                         * base fromUnicode table.
                         * (Note: We do have explicit precision flags because they are
                         * required for extension table generation, and
                         * ucm_checkBaseExt() verified it.)
                         *
                         * We do not call MBCSAddTable() here (we probably could)
                         * so we need to do the analysis before building the extension table.
                         * We assume that MBCSAddTable() will build a UTF-8-friendly table.
                         * Redundant mappings in the extension table are ok except they cost some size.
                         *
                         * Do this after ucm_checkBaseExt().
                         */
                        const MBCSData *mbcsData=MBCSGetDummy();
                        int32_t needsMove=0;
                        for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
                            m<mLimit;
                            ++m
                        ) {
                            if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, m->u, m->f)) {
                                m->f|=MBCS_FROM_U_EXT_FLAG;
                                m->moveFlag=UCM_MOVE_TO_EXT;
                                ++needsMove;
                            }
                        }

                        if(needsMove!=0) {
                            ucm_moveMappings(baseData.ucm->base, data->ucm->ext);
                            ucm_sortTable(data->ucm->ext);
                        }
                    }
                    if(!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)) {
                        *pErrorCode=U_INVALID_TABLE_FORMAT;
                    }
                }
            }
        }

        cleanupConvData(&baseData);
    }
}
int main(int argc, char* argv[])
{
    ConvData data;
    UErrorCode err = U_ZERO_ERROR, localError;
    char outFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
    const char* destdir, *arg;
    size_t destdirlen;
    char* dot = NULL, *outBasename;
    char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
    char cnvNameWithPkg[UCNV_MAX_FULL_FILE_NAME_LENGTH];
    UVersionInfo icuVersion;
    UBool printFilename;

    err = U_ZERO_ERROR;

    U_MAIN_INIT_ARGS(argc, argv);

    /* Set up the ICU version number */
    u_getVersion(icuVersion);
    uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo));

    /* preset then read command line options */
    options[OPT_DESTDIR].value=u_getDataDirectory();
    argc=u_parseArgs(argc, argv, LENGTHOF(options), options);

    /* error handling, printing usage message */
    if(argc<0) {
        fprintf(stderr,
            "error in command line argument \"%s\"\n",
            argv[-argc]);
    } else if(argc<2) {
        argc=-1;
    }
    if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) {
        FILE *stdfile=argc<0 ? stderr : stdout;
        fprintf(stdfile,
            "usage: %s [-options] files...\n"
            "\tread .ucm codepage mapping files and write .cnv files\n"
            "options:\n"
            "\t-h or -? or --help  this usage text\n"
            "\t-V or --version     show a version message\n"
            "\t-c or --copyright   include a copyright notice\n"
            "\t-d or --destdir     destination directory, followed by the path\n"
            "\t-v or --verbose     Turn on verbose output\n",
            argv[0]);
        fprintf(stdfile,
            "\t      --small       Generate smaller .cnv files. They will be\n"
            "\t                    significantly smaller but may not be compatible with\n"
            "\t                    older versions of ICU and will require heap memory\n"
            "\t                    allocation when loaded.\n"
            "\t      --ignore-siso-check         Use SI/SO other than 0xf/0xe.\n");
        return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
    }

    if(options[OPT_VERSION].doesOccur) {
        printf("makeconv version %hu.%hu, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
               dataInfo.formatVersion[0], dataInfo.formatVersion[1]);
        printf("%s\n", U_COPYRIGHT_STRING);
        exit(0);
    }

    /* get the options values */
    haveCopyright = options[OPT_COPYRIGHT].doesOccur;
    destdir = options[OPT_DESTDIR].value;
    VERBOSE = options[OPT_VERBOSE].doesOccur;
    SMALL = options[OPT_SMALL].doesOccur;

    if (options[OPT_IGNORE_SISO_CHECK].doesOccur) {
        IGNORE_SISO_CHECK = TRUE;
    }

    if (destdir != NULL && *destdir != 0) {
        uprv_strcpy(outFileName, destdir);
        destdirlen = uprv_strlen(destdir);
        outBasename = outFileName + destdirlen;
        if (*(outBasename - 1) != U_FILE_SEP_CHAR) {
            *outBasename++ = U_FILE_SEP_CHAR;
            ++destdirlen;
        }
    } else {
        destdirlen = 0;
        outBasename = outFileName;
    }

#if DEBUG
    {
      int i;
      printf("makeconv: processing %d files...\n", argc - 1);
      for(i=1; i<argc; ++i) {
        printf("%s ", argv[i]);
      }
      printf("\n");
      fflush(stdout);
    }
#endif

    err = U_ZERO_ERROR;
    printFilename = (UBool) (argc > 2 || VERBOSE);
    for (++argv; --argc; ++argv)
    {
        arg = getLongPathname(*argv);

        /* Check for potential buffer overflow */
        if(strlen(arg) > UCNV_MAX_FULL_FILE_NAME_LENGTH)
        {
            fprintf(stderr, "%s\n", u_errorName(U_BUFFER_OVERFLOW_ERROR));
            return U_BUFFER_OVERFLOW_ERROR;
        }

        /*produces the right destination path for display*/
        if (destdirlen != 0)
        {
            const char *basename;

            /* find the last file sepator */
            basename = findBasename(arg);
            uprv_strcpy(outBasename, basename);
        }
        else
        {
            uprv_strcpy(outFileName, arg);
        }

        /*removes the extension if any is found*/
        dot = uprv_strrchr(outBasename, '.');
        if (dot)
        {
            *dot = '\0';
        }

        /* the basename without extension is the converter name */
        uprv_strcpy(cnvName, outBasename);

        /*Adds the target extension*/
        uprv_strcat(outBasename, CONVERTER_FILE_EXTENSION);

#if DEBUG
        printf("makeconv: processing %s  ...\n", arg);
        fflush(stdout);
#endif
        localError = U_ZERO_ERROR;
        initConvData(&data);
        createConverter(&data, arg, &localError);

        if (U_FAILURE(localError))
        {
            /* if an error is found, print out an error msg and keep going */
            fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n", outFileName, arg,
                u_errorName(localError));
            if(U_SUCCESS(err)) {
                err = localError;
            }
        }
        else
        {
            /* Insure the static data name matches the  file name */
            /* Changed to ignore directory and only compare base name
             LDH 1/2/08*/
            char *p;
            p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator */

            if(p == NULL)            /* OK, try alternate */
            {
                p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR);
                if(p == NULL)
                {
                    p=cnvName; /* If no separators, no problem */
                }
            }
            else
            {
                p++;   /* If found separtor, don't include it in compare */
            }
            if(uprv_stricmp(p,data.staticData.name))
            {
                fprintf(stderr, "Warning: %s%s claims to be '%s'\n",
                    cnvName,  CONVERTER_FILE_EXTENSION,
                    data.staticData.name);
            }

            uprv_strcpy((char*)data.staticData.name, cnvName);

            if(!uprv_isInvariantString((char*)data.staticData.name, -1)) {
                fprintf(stderr,
                    "Error: A converter name must contain only invariant characters.\n"
                    "%s is not a valid converter name.\n",
                    data.staticData.name);
                if(U_SUCCESS(err)) {
                    err = U_INVALID_TABLE_FORMAT;
                }
            }

            uprv_strcpy(cnvNameWithPkg, cnvName);

            localError = U_ZERO_ERROR;
            writeConverterData(&data, cnvNameWithPkg, destdir, &localError);

            if(U_FAILURE(localError))
            {
                /* if an error is found, print out an error msg and keep going*/
                fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName, arg,
                    u_errorName(localError));
                if(U_SUCCESS(err)) {
                    err = localError;
                }
            }
            else if (printFilename)
            {
                puts(outBasename);
            }
        }
        fflush(stdout);
        fflush(stderr);

        cleanupConvData(&data);
    }

    return err;
}
Example #3
0
extern int
main(int argc, char *argv[]) {
    const char *pname, *sourcePath, *destPath, *inFilename, *outFilename, *outComment;
    char outType;
    UBool isHelp, isModified, isPackage;
    int result = 0;

    Package *pkg, *listPkg, *addListPkg;

    U_MAIN_INIT_ARGS(argc, argv);

    /* get the program basename */
    pname=findBasename(argv[0]);

    argc=u_parseArgs(argc, argv, LENGTHOF(options), options);
    isHelp=options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur;
    if(isHelp) {
        printUsage(pname, TRUE);
        return U_ZERO_ERROR;
    }
    if(argc<2 || 3<argc) {
        printUsage(pname, FALSE);
        return U_ILLEGAL_ARGUMENT_ERROR;
    }

    pkg=new Package;
    if(pkg==NULL) {
        fprintf(stderr, "icupkg: not enough memory\n");
        return U_MEMORY_ALLOCATION_ERROR;
    }
    isModified=FALSE;

    if(options[OPT_SOURCEDIR].doesOccur) {
        sourcePath=options[OPT_SOURCEDIR].value;
    } else {
        // work relative to the current working directory
        sourcePath=NULL;
    }
    if(options[OPT_DESTDIR].doesOccur) {
        destPath=options[OPT_DESTDIR].value;
    } else {
        // work relative to the current working directory
        destPath=NULL;
    }

    if(0==strcmp(argv[1], "new")) {
        inFilename=NULL;
        isPackage=TRUE;
    } else {
        inFilename=argv[1];
        if(isPackageName(inFilename)) {
            pkg->readPackage(inFilename);
            isPackage=TRUE;
        } else {
            /* swap a single file (icuswap replacement) rather than work on a package */
            pkg->addFile(sourcePath, inFilename);
            isPackage=FALSE;
        }
    }

    if(argc>=3) {
        outFilename=argv[2];
        if(0!=strcmp(argv[1], argv[2])) {
            isModified=TRUE;
        }
    } else if(isPackage) {
        outFilename=NULL;
    } else /* !isPackage */ {
        outFilename=inFilename;
        isModified=(UBool)(sourcePath!=destPath);
    }

    /* parse the output type option */
    if(options[OPT_OUT_TYPE].doesOccur) {
        const char *type=options[OPT_OUT_TYPE].value;
        if(type[0]==0 || type[1]!=0) {
            /* the type must be exactly one letter */
            printUsage(pname, FALSE);
            return U_ILLEGAL_ARGUMENT_ERROR;
        }
        outType=type[0];
        switch(outType) {
        case 'l':
        case 'b':
        case 'e':
            break;
        default:
            printUsage(pname, FALSE);
            return U_ILLEGAL_ARGUMENT_ERROR;
        }

        /*
         * Set the isModified flag if the output type differs from the
         * input package type.
         * If we swap a single file, just assume that we are modifying it.
         * The Package class does not give us access to the item and its type.
         */
        isModified|=(UBool)(!isPackage || outType!=pkg->getInType());
    } else if(isPackage) {
        outType=pkg->getInType(); // default to input type
    } else /* !isPackage: swap single file */ {
        outType=0; /* tells extractItem() to not swap */
    }

    if(options[OPT_WRITEPKG].doesOccur) {
        isModified=TRUE;
    }

    if(!isPackage) {
        /*
         * icuswap tool replacement: Only swap a single file.
         * Check that irrelevant options are not set.
         */
        if( options[OPT_COMMENT].doesOccur ||
            options[OPT_COPYRIGHT].doesOccur ||
            options[OPT_MATCHMODE].doesOccur ||
            options[OPT_REMOVE_LIST].doesOccur ||
            options[OPT_ADD_LIST].doesOccur ||
            options[OPT_EXTRACT_LIST].doesOccur ||
            options[OPT_LIST_ITEMS].doesOccur
        ) {
            printUsage(pname, FALSE);
            return U_ILLEGAL_ARGUMENT_ERROR;
        }
        if(isModified) {
            pkg->extractItem(destPath, outFilename, 0, outType);
        }

        delete pkg;
        return result;
    }

    /* Work with a package. */

    if(options[OPT_COMMENT].doesOccur) {
        outComment=options[OPT_COMMENT].value;
    } else if(options[OPT_COPYRIGHT].doesOccur) {
        outComment=U_COPYRIGHT_STRING;
    } else {
        outComment=NULL;
    }

    if(options[OPT_MATCHMODE].doesOccur) {
        if(0==strcmp(options[OPT_MATCHMODE].value, "noslash")) {
            pkg->setMatchMode(Package::MATCH_NOSLASH);
        } else {
            printUsage(pname, FALSE);
            return U_ILLEGAL_ARGUMENT_ERROR;
        }
    }

    /* remove items */
    if(options[OPT_REMOVE_LIST].doesOccur) {
        listPkg=readList(NULL, options[OPT_REMOVE_LIST].value, FALSE);
        if(listPkg!=NULL) {
            pkg->removeItems(*listPkg);
            delete listPkg;
            isModified=TRUE;
        } else {
            printUsage(pname, FALSE);
            return U_ILLEGAL_ARGUMENT_ERROR;
        }
    }

    /*
     * add items
     * use a separate Package so that its memory and items stay around
     * as long as the main Package
     */
    addListPkg=NULL;
    if(options[OPT_ADD_LIST].doesOccur) {
        addListPkg=readList(sourcePath, options[OPT_ADD_LIST].value, TRUE);
        if(addListPkg!=NULL) {
            pkg->addItems(*addListPkg);
            // delete addListPkg; deferred until after writePackage()
            isModified=TRUE;
        } else {
            printUsage(pname, FALSE);
            return U_ILLEGAL_ARGUMENT_ERROR;
        }
    }

    /* extract items */
    if(options[OPT_EXTRACT_LIST].doesOccur) {
        listPkg=readList(NULL, options[OPT_EXTRACT_LIST].value, FALSE);
        if(listPkg!=NULL) {
            pkg->extractItems(destPath, *listPkg, outType);
            delete listPkg;
        } else {
            printUsage(pname, FALSE);
            return U_ILLEGAL_ARGUMENT_ERROR;
        }
    }

    /* list items */
    if(options[OPT_LIST_ITEMS].doesOccur) {
        int32_t i;
        if (options[OPT_LIST_FILE].doesOccur) {
            FileStream *out;
            out = T_FileStream_open(options[OPT_LIST_FILE].value, "w");
            if (out != NULL) {
                for(i=0; i<pkg->getItemCount(); ++i) {
                    T_FileStream_writeLine(out, pkg->getItem(i)->name);
                    T_FileStream_writeLine(out, "\n");
                }
                T_FileStream_close(out);
            } else {
                return U_ILLEGAL_ARGUMENT_ERROR;
            }
        } else {
            for(i=0; i<pkg->getItemCount(); ++i) {
                fprintf(stdout, "%s\n", pkg->getItem(i)->name);
            }
        }
    }

    /* check dependencies between items */
    if(!pkg->checkDependencies()) {
        /* some dependencies are not fulfilled */
        return U_MISSING_RESOURCE_ERROR;
    }

    /* write the output .dat package if there are any modifications */
    if(isModified) {
        char outFilenameBuffer[1024]; // for auto-generated output filename, if necessary

        if(outFilename==NULL || outFilename[0]==0) {
            if(inFilename==NULL || inFilename[0]==0) {
                fprintf(stderr, "icupkg: unable to auto-generate an output filename if there is no input filename\n");
                exit(U_ILLEGAL_ARGUMENT_ERROR);
            }

            /*
             * auto-generate a filename:
             * copy the inFilename,
             * and if the last basename character matches the input file's type,
             * then replace it with the output file's type
             */
            char suffix[6]="?.dat";
            char *s;

            suffix[0]=pkg->getInType();
            strcpy(outFilenameBuffer, inFilename);
            s=strchr(outFilenameBuffer, 0);
            if((s-outFilenameBuffer)>5 && 0==memcmp(s-5, suffix, 5)) {
                *(s-5)=outType;
            }
            outFilename=outFilenameBuffer;
        }
        result = writePackageDatFile(outFilename, outComment, NULL, NULL, pkg, outType);
    }

    delete addListPkg;
    delete pkg;
    return result;
}