U_CAPI UBool U_EXPORT2 ucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* error){ char start[8]; int32_t numRead; UChar target[1]={ 0 }; UChar* pTarget; const char* pStart; /* read a few bytes */ numRead=T_FileStream_read(in, start, sizeof(start)); *cp = ucnv_detectUnicodeSignature(start, numRead, signatureLength, error); /* unread the bytes beyond what was consumed for U+FEFF */ T_FileStream_rewind(in); if (*signatureLength > 0) { numRead = T_FileStream_read(in, start, *signatureLength); } if(*cp==NULL){ *conv =NULL; return FALSE; } /* open the converter for the detected Unicode charset */ *conv = ucnv_open(*cp,error); /* convert and ignore initial U+FEFF, and the buffer overflow */ pTarget = target; pStart = start; ucnv_toUnicode(*conv, &pTarget, target+1, &pStart, start+*signatureLength, NULL, FALSE, error); *signatureLength = (int32_t)(pStart - start); if(*error==U_BUFFER_OVERFLOW_ERROR) { *error=U_ZERO_ERROR; } /* verify that we successfully read exactly U+FEFF */ if(U_SUCCESS(*error) && (pTarget!=(target+1) || target[0]!=0xfeff)) { *error=U_INTERNAL_PROGRAM_ERROR; } return TRUE; }
/* rewind the buf and file stream */ U_CAPI void U_EXPORT2 ucbuf_rewind(UCHARBUF* buf,UErrorCode* error){ if(error==NULL || U_FAILURE(*error)){ return; } if(buf){ buf->currentPos=buf->buffer; buf->bufLimit=buf->buffer; T_FileStream_rewind(buf->in); buf->remaining=T_FileStream_size(buf->in)-buf->signatureLength; ucnv_resetToUnicode(buf->conv); if(buf->signatureLength>0) { UChar target[1]={ 0 }; UChar* pTarget; char start[8]; const char* pStart; int32_t numRead; /* read the signature bytes */ numRead=T_FileStream_read(buf->in, start, buf->signatureLength); /* convert and ignore initial U+FEFF, and the buffer overflow */ pTarget = target; pStart = start; ucnv_toUnicode(buf->conv, &pTarget, target+1, &pStart, start+numRead, NULL, FALSE, error); if(*error==U_BUFFER_OVERFLOW_ERROR) { *error=U_ZERO_ERROR; } /* verify that we successfully read exactly U+FEFF */ if(U_SUCCESS(*error) && (numRead!=buf->signatureLength || pTarget!=(target+1) || target[0]!=0xfeff)) { *error=U_INTERNAL_PROGRAM_ERROR; } } } }
static void TestFileStream(void){ int32_t c = 0; int32_t c1=0; UErrorCode status = U_ZERO_ERROR; const char* testdatapath = loadTestData(&status); char* fileName = (char*) malloc(uprv_strlen(testdatapath) +10); FileStream* stream = NULL; /* these should not be closed */ FileStream* pStdin = T_FileStream_stdin(); FileStream* pStdout = T_FileStream_stdout(); FileStream* pStderr = T_FileStream_stderr(); const char* testline = "This is a test line"; int32_t bufLen = (int32_t)strlen(testline)+10; char* buf = (char*) malloc(bufLen); int32_t retLen = 0; if(pStdin==NULL){ log_err("failed to get T_FileStream_stdin()"); } if(pStdout==NULL){ log_err("failed to get T_FileStream_stdout()"); } if(pStderr==NULL){ log_err("failed to get T_FileStream_stderr()"); } uprv_strcpy(fileName,testdatapath); uprv_strcat(fileName,".dat"); stream = T_FileStream_open(fileName, "r"); if(stream==NULL){ log_data_err("T_FileStream_open failed to open %s\n",fileName); } else { if(!T_FileStream_file_exists(fileName)){ log_data_err("T_FileStream_file_exists failed to verify existence of %s \n",fileName); } retLen=T_FileStream_read(stream,&c,1); if(retLen==0){ log_data_err("T_FileStream_read failed to read from %s \n",fileName); } retLen=0; T_FileStream_rewind(stream); T_FileStream_read(stream,&c1,1); if(c!=c1){ log_data_err("T_FileStream_rewind failed to rewind %s \n",fileName); } T_FileStream_rewind(stream); c1 = T_FileStream_peek(stream); if(c!=c1){ log_data_err("T_FileStream_peek failed to peekd %s \n",fileName); } c = T_FileStream_getc(stream); T_FileStream_ungetc(c,stream); if(c!= T_FileStream_getc(stream)){ log_data_err("T_FileStream_ungetc failed to d %s \n",fileName); } if(T_FileStream_size(stream)<=0){ log_data_err("T_FileStream_size failed to d %s \n",fileName); } if(T_FileStream_error(stream)){ log_data_err("T_FileStream_error shouldn't have an error %s\n",fileName); } if(!T_FileStream_error(NULL)){ log_err("T_FileStream_error didn't get an error %s\n",fileName); } T_FileStream_putc(stream, 0x20); if(!T_FileStream_error(stream)){ /* Warning writing to a read-only file may not consistently fail on all platforms (e.g. HP-UX, FreeBSD, MacOSX) */ log_verbose("T_FileStream_error didn't get an error when writing to a readonly file %s\n",fileName); } T_FileStream_close(stream); } /* test writing function */ stream=NULL; uprv_strcpy(fileName,testdatapath); uprv_strcat(fileName,".tmp"); stream = T_FileStream_open(fileName,"w+"); if(stream == NULL){ log_data_err("Could not open %s for writing\n",fileName); } else { c= '$'; T_FileStream_putc(stream,c); T_FileStream_rewind(stream); if(c != T_FileStream_getc(stream)){ log_data_err("T_FileStream_putc failed %s\n",fileName); } T_FileStream_rewind(stream); T_FileStream_writeLine(stream,testline); T_FileStream_rewind(stream); T_FileStream_readLine(stream,buf,bufLen); if(uprv_strncmp(testline, buf,uprv_strlen(buf))!=0){ log_data_err("T_FileStream_writeLine failed %s\n",fileName); } T_FileStream_rewind(stream); T_FileStream_write(stream,testline,(int32_t)strlen(testline)); T_FileStream_rewind(stream); retLen = T_FileStream_read(stream, buf, bufLen); if(uprv_strncmp(testline, buf,retLen)!=0){ log_data_err("T_FileStream_write failed %s\n",fileName); } T_FileStream_close(stream); } if(!T_FileStream_remove(fileName)){ log_data_err("T_FileStream_remove failed to delete %s\n",fileName); } free(fileName); free(buf); }
U_CAPI void U_EXPORT2 writeCCode(const char *filename, const char *destdir, const char *optName, const char *optFilename, char *outFilePath) { uint32_t column = MAX_COLUMN; char buffer[4096], entry[64]; FileStream *in, *out; size_t i, length; in=T_FileStream_open(filename, "rb"); if(in==NULL) { fprintf(stderr, "genccode: unable to open input file %s\n", filename); exit(U_FILE_ACCESS_ERROR); } if(optName != NULL) { /* prepend 'icudt28_' */ strcpy(entry, optName); strcat(entry, "_"); } else { entry[0] = 0; } getOutFilename(filename, destdir, buffer, entry+uprv_strlen(entry), ".c", optFilename); if (outFilePath != NULL) { uprv_strcpy(outFilePath, buffer); } out=T_FileStream_open(buffer, "w"); if(out==NULL) { fprintf(stderr, "genccode: unable to open output file %s\n", buffer); exit(U_FILE_ACCESS_ERROR); } /* turn dashes or dots in the entry name into underscores */ length=uprv_strlen(entry); for(i=0; i<length; ++i) { if(entry[i]=='-' || entry[i]=='.') { entry[i]='_'; } } #if U_PLATFORM == U_PF_OS400 /* TODO: Fix this once the compiler implements this feature. Keep in sync with udatamem.c This is here because this platform can't currently put const data into the read-only pages of an object or shared library (service program). Only strings are allowed in read-only pages, so we use char * strings to store the data. In order to prevent the beginning of the data from ever matching the magic numbers we must still use the initial double. [grhoten 4/24/2003] */ sprintf(buffer, "#ifndef IN_GENERATED_CCODE\n" "#define IN_GENERATED_CCODE\n" "#define U_DISABLE_RENAMING 1\n" "#include \"unicode/umachine.h\"\n" "#endif\n" "U_CDECL_BEGIN\n" "const struct {\n" " double bogus;\n" " const char *bytes; \n" "} %s={ 0.0, \n", entry); T_FileStream_writeLine(out, buffer); for(;;) { length=T_FileStream_read(in, buffer, sizeof(buffer)); if(length==0) { break; } for(i=0; i<length; ++i) { column = write8str(out, (uint8_t)buffer[i], column); } } T_FileStream_writeLine(out, "\"\n};\nU_CDECL_END\n"); #else /* Function renaming shouldn't be done in data */ sprintf(buffer, "#ifndef IN_GENERATED_CCODE\n" "#define IN_GENERATED_CCODE\n" "#define U_DISABLE_RENAMING 1\n" "#include \"unicode/umachine.h\"\n" "#endif\n" "U_CDECL_BEGIN\n" "const struct {\n" " double bogus;\n" " uint8_t bytes[%ld]; \n" "} %s={ 0.0, {\n", (long)T_FileStream_size(in), entry); T_FileStream_writeLine(out, buffer); for(;;) { length=T_FileStream_read(in, buffer, sizeof(buffer)); if(length==0) { break; } for(i=0; i<length; ++i) { column = write8(out, (uint8_t)buffer[i], column); } } T_FileStream_writeLine(out, "\n}\n};\nU_CDECL_END\n"); #endif if(T_FileStream_error(in)) { fprintf(stderr, "genccode: file read error while generating from file %s\n", filename); exit(U_FILE_ACCESS_ERROR); } if(T_FileStream_error(out)) { fprintf(stderr, "genccode: file write error while generating from file %s\n", filename); exit(U_FILE_ACCESS_ERROR); } T_FileStream_close(out); T_FileStream_close(in); }
U_CAPI void U_EXPORT2 writeAssemblyCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optFilename, char *outFilePath) { uint32_t column = MAX_COLUMN; char entry[64]; uint32_t buffer[1024]; char *bufferStr = (char *)buffer; FileStream *in, *out; size_t i, length; in=T_FileStream_open(filename, "rb"); if(in==NULL) { fprintf(stderr, "genccode: unable to open input file %s\n", filename); exit(U_FILE_ACCESS_ERROR); } getOutFilename(filename, destdir, bufferStr, entry, ".S", optFilename); out=T_FileStream_open(bufferStr, "w"); if(out==NULL) { fprintf(stderr, "genccode: unable to open output file %s\n", bufferStr); exit(U_FILE_ACCESS_ERROR); } if (outFilePath != NULL) { uprv_strcpy(outFilePath, bufferStr); } #ifdef WINDOWS_WITH_GNUC /* Need to fix the file seperator character when using MinGW. */ swapFileSepChar(outFilePath, U_FILE_SEP_CHAR, '/'); #endif if(optEntryPoint != NULL) { uprv_strcpy(entry, optEntryPoint); uprv_strcat(entry, "_dat"); } /* turn dashes or dots in the entry name into underscores */ length=uprv_strlen(entry); for(i=0; i<length; ++i) { if(entry[i]=='-' || entry[i]=='.') { entry[i]='_'; } } sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].header, entry, entry, entry, entry, entry, entry, entry, entry); T_FileStream_writeLine(out, bufferStr); T_FileStream_writeLine(out, assemblyHeader[assemblyHeaderIndex].beginLine); for(;;) { length=T_FileStream_read(in, buffer, sizeof(buffer)); if(length==0) { break; } if (length != sizeof(buffer)) { /* pad with extra 0's when at the end of the file */ for(i=0; i < (length % sizeof(uint32_t)); ++i) { buffer[length+i] = 0; } } for(i=0; i<(length/sizeof(buffer[0])); i++) { column = write32(out, buffer[i], column); } } T_FileStream_writeLine(out, "\n"); sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].footer, entry, entry, entry, entry, entry, entry, entry, entry); T_FileStream_writeLine(out, bufferStr); if(T_FileStream_error(in)) { fprintf(stderr, "genccode: file read error while generating from file %s\n", filename); exit(U_FILE_ACCESS_ERROR); } if(T_FileStream_error(out)) { fprintf(stderr, "genccode: file write error while generating from file %s\n", filename); exit(U_FILE_ACCESS_ERROR); } T_FileStream_close(out); T_FileStream_close(in); }
UXMLElement * UXMLParser::parseFile(const char *filename, UErrorCode &errorCode) { char bytes[4096], charsetBuffer[100]; FileStream *f; const char *charset, *pb; UnicodeString src; UConverter *cnv; UChar *buffer, *pu; int32_t fileLength, bytesLength, length, capacity; UBool flush; if(U_FAILURE(errorCode)) { return NULL; } f=T_FileStream_open(filename, "rb"); if(f==NULL) { errorCode=U_FILE_ACCESS_ERROR; return NULL; } bytesLength=T_FileStream_read(f, bytes, (int32_t)sizeof(bytes)); if(bytesLength<(int32_t)sizeof(bytes)) { // we have already read the entire file fileLength=bytesLength; } else { // get the file length fileLength=T_FileStream_size(f); } /* * get the charset: * 1. Unicode signature * 2. treat as ISO-8859-1 and read XML encoding="charser" * 3. default to UTF-8 */ charset=ucnv_detectUnicodeSignature(bytes, bytesLength, NULL, &errorCode); if(U_SUCCESS(errorCode) && charset!=NULL) { // open converter according to Unicode signature cnv=ucnv_open(charset, &errorCode); } else { // read as Latin-1 and parse the XML declaration and encoding cnv=ucnv_open("ISO-8859-1", &errorCode); if(U_FAILURE(errorCode)) { // unexpected error opening Latin-1 converter goto exit; } buffer=src.getBuffer(bytesLength); if(buffer==NULL) { // unexpected failure to reserve some string capacity errorCode=U_MEMORY_ALLOCATION_ERROR; goto exit; } pb=bytes; pu=buffer; ucnv_toUnicode( cnv, &pu, buffer+src.getCapacity(), &pb, bytes+bytesLength, NULL, TRUE, &errorCode); src.releaseBuffer(U_SUCCESS(errorCode) ? (int32_t)(pu-buffer) : 0); ucnv_close(cnv); cnv=NULL; if(U_FAILURE(errorCode)) { // unexpected error in conversion from Latin-1 src.remove(); goto exit; } // parse XML declaration if(mXMLDecl.reset(src).lookingAt(0, errorCode)) { int32_t declEnd=mXMLDecl.end(errorCode); // go beyond <?xml int32_t pos=src.indexOf((UChar)x_l)+1; mAttrValue.reset(src); while(pos<declEnd && mAttrValue.lookingAt(pos, errorCode)) { // loop runs once per attribute on this element. UnicodeString attName = mAttrValue.group(1, errorCode); UnicodeString attValue = mAttrValue.group(2, errorCode); // Trim the quotes from the att value. These are left over from the original regex // that parsed the attribue, which couldn't conveniently strip them. attValue.remove(0,1); // one char from the beginning attValue.truncate(attValue.length()-1); // and one from the end. if(attName==UNICODE_STRING("encoding", 8)) { length=attValue.extract(0, 0x7fffffff, charsetBuffer, (int32_t)sizeof(charsetBuffer)); charset=charsetBuffer; break; } pos = mAttrValue.end(2, errorCode); } if(charset==NULL) { // default to UTF-8 charset="UTF-8"; } cnv=ucnv_open(charset, &errorCode); } } if(U_FAILURE(errorCode)) { // unable to open the converter goto exit; } // convert the file contents capacity=fileLength; // estimated capacity src.getBuffer(capacity); src.releaseBuffer(0); // zero length flush=FALSE; for(;;) { // convert contents of bytes[bytesLength] pb=bytes; for(;;) { length=src.length(); buffer=src.getBuffer(capacity); if(buffer==NULL) { // unexpected failure to reserve some string capacity errorCode=U_MEMORY_ALLOCATION_ERROR; goto exit; } pu=buffer+length; ucnv_toUnicode( cnv, &pu, buffer+src.getCapacity(), &pb, bytes+bytesLength, NULL, FALSE, &errorCode); src.releaseBuffer(U_SUCCESS(errorCode) ? (int32_t)(pu-buffer) : 0); if(errorCode==U_BUFFER_OVERFLOW_ERROR) { errorCode=U_ZERO_ERROR; capacity=(3*src.getCapacity())/2; // increase capacity by 50% } else { break; } } if(U_FAILURE(errorCode)) { break; // conversion error } if(flush) { break; // completely converted the file } // read next block bytesLength=T_FileStream_read(f, bytes, (int32_t)sizeof(bytes)); if(bytesLength==0) { // reached end of file, convert once more to flush the converter flush=TRUE; } }; exit: ucnv_close(cnv); T_FileStream_close(f); if(U_SUCCESS(errorCode)) { return parse(src, errorCode); } else { return NULL; } }
/* fill the uchar buffer */ static UCHARBUF* ucbuf_fillucbuf( UCHARBUF* buf,UErrorCode* error){ UChar* pTarget=NULL; UChar* target=NULL; const char* source=NULL; char carr[MAX_IN_BUF] = {'\0'}; char* cbuf = carr; int32_t inputRead=0; int32_t outputWritten=0; int32_t offset=0; const char* sourceLimit =NULL; int32_t cbufSize=0; pTarget = buf->buffer; /* check if we arrived here without exhausting the buffer*/ if(buf->currentPos<buf->bufLimit){ offset = (int32_t)(buf->bufLimit-buf->currentPos); memmove(buf->buffer,buf->currentPos,offset* sizeof(UChar)); } #if DEBUG memset(pTarget+offset,0xff,sizeof(UChar)*(MAX_IN_BUF-offset)); #endif if(buf->isBuffered){ cbufSize = MAX_IN_BUF; /* read the file */ inputRead=T_FileStream_read(buf->in,cbuf,cbufSize-offset); buf->remaining-=inputRead; }else{ cbufSize = T_FileStream_size(buf->in); cbuf = (char*)uprv_malloc(cbufSize); if (cbuf == NULL) { *error = U_MEMORY_ALLOCATION_ERROR; return NULL; } inputRead= T_FileStream_read(buf->in,cbuf,cbufSize); buf->remaining-=inputRead; } /* just to be sure...*/ if ( 0 == inputRead ) buf->remaining = 0; target=pTarget; /* convert the bytes */ if(buf->conv){ /* set the callback to stop */ UConverterToUCallback toUOldAction ; void* toUOldContext; void* toUNewContext=NULL; ucnv_setToUCallBack(buf->conv, UCNV_TO_U_CALLBACK_STOP, toUNewContext, &toUOldAction, (const void**)&toUOldContext, error); /* since state is saved in the converter we add offset to source*/ target = pTarget+offset; source = cbuf; sourceLimit = source + inputRead; ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset), &source,sourceLimit,NULL, (UBool)(buf->remaining==0),error); if(U_FAILURE(*error)){ char context[CONTEXT_LEN+1]; char preContext[CONTEXT_LEN+1]; char postContext[CONTEXT_LEN+1]; int8_t len = CONTEXT_LEN; int32_t start=0; int32_t stop =0; int32_t pos =0; /* use erro1 to preserve the error code */ UErrorCode error1 =U_ZERO_ERROR; if( buf->showWarning==TRUE){ fprintf(stderr,"\n###WARNING: Encountered abnormal bytes while" " converting input stream to target encoding: %s\n", u_errorName(*error)); } /* now get the context chars */ ucnv_getInvalidChars(buf->conv,context,&len,&error1); context[len]= 0 ; /* null terminate the buffer */ pos = (int32_t)(source - cbuf - len); /* for pre-context */ start = (pos <=CONTEXT_LEN)? 0 : (pos - (CONTEXT_LEN-1)); stop = pos-len; memcpy(preContext,cbuf+start,stop-start); /* null terminate the buffer */ preContext[stop-start] = 0; /* for post-context */ start = pos+len; stop = (int32_t)(((pos+CONTEXT_LEN)<= (sourceLimit-cbuf) )? (pos+(CONTEXT_LEN-1)) : (sourceLimit-cbuf)); memcpy(postContext,source,stop-start); /* null terminate the buffer */ postContext[stop-start] = 0; if(buf->showWarning ==TRUE){ /* print out the context */ fprintf(stderr,"\tPre-context: %s\n",preContext); fprintf(stderr,"\tContext: %s\n",context); fprintf(stderr,"\tPost-context: %s\n", postContext); } /* reset the converter */ ucnv_reset(buf->conv); /* set the call back to substitute * and restart conversion */ ucnv_setToUCallBack(buf->conv, UCNV_TO_U_CALLBACK_SUBSTITUTE, toUNewContext, &toUOldAction, (const void**)&toUOldContext, &error1); /* reset source and target start positions */ target = pTarget+offset; source = cbuf; /* re convert */ ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset), &source,sourceLimit,NULL, (UBool)(buf->remaining==0),&error1); } outputWritten = (int32_t)(target - pTarget); #if DEBUG { int i; target = pTarget; for(i=0;i<numRead;i++){ /* printf("%c", (char)(*target++));*/ } } #endif }else{ u_charsToUChars(cbuf,target+offset,inputRead); outputWritten=((buf->remaining>cbufSize)? cbufSize:inputRead+offset); } buf->currentPos = pTarget; buf->bufLimit=pTarget+outputWritten; *buf->bufLimit=0; /*NUL terminate*/ if(cbuf!=carr){ uprv_free(cbuf); } return buf; }
int main(int argc, char* argv[]) { UErrorCode status = U_ZERO_ERROR; const char *arg = NULL; const char *outputDir = NULL; /* NULL = no output directory, use current */ const char *inputDir = NULL; const char *encoding = ""; int i; U_MAIN_INIT_ARGS(argc, argv); argc = u_parseArgs(argc, argv, (int32_t)(sizeof(options)/sizeof(options[0])), options); /* error handling, printing usage message */ if(argc<0) { fprintf(stderr, "%s: error in command line argument \"%s\"\n", argv[0], argv[-argc]); } else if(argc<2) { argc = -1; } if(options[WRITE_POOL_BUNDLE].doesOccur && options[USE_POOL_BUNDLE].doesOccur) { fprintf(stderr, "%s: cannot combine --writePoolBundle and --usePoolBundle\n", argv[0]); argc = -1; } if(options[FORMAT_VERSION].doesOccur) { const char *s = options[FORMAT_VERSION].value; if(uprv_strlen(s) != 1 || (s[0] != '1' && s[0] != '2')) { fprintf(stderr, "%s: unsupported --formatVersion %s\n", argv[0], s); argc = -1; } else if(s[0] == '1' && (options[WRITE_POOL_BUNDLE].doesOccur || options[USE_POOL_BUNDLE].doesOccur) ) { fprintf(stderr, "%s: cannot combine --formatVersion 1 with --writePoolBundle or --usePoolBundle\n", argv[0]); argc = -1; } else { setFormatVersion(s[0] - '0'); } } if(options[VERSION].doesOccur) { fprintf(stderr, "%s version %s (ICU version %s).\n" "%s\n", argv[0], GENRB_VERSION, U_ICU_VERSION, U_COPYRIGHT_STRING); return U_ZERO_ERROR; } if(argc<0 || options[HELP1].doesOccur || options[HELP2].doesOccur) { /* * Broken into chunks because the C89 standard says the minimum * required supported string length is 509 bytes. */ fprintf(stderr, "Usage: %s [OPTIONS] [FILES]\n" "\tReads the list of resource bundle source files and creates\n" "\tbinary version of reosurce bundles (.res files)\n", argv[0]); fprintf(stderr, "Options:\n" "\t-h or -? or --help this usage text\n" "\t-q or --quiet do not display warnings\n" "\t-v or --verbose print extra information when processing files\n" "\t-V or --version prints out version number and exits\n" "\t-c or --copyright include copyright notice\n"); fprintf(stderr, "\t-e or --encoding encoding of source files\n" "\t-d of --destdir destination directory, followed by the path, defaults to %s\n" "\t-s or --sourcedir source directory for files followed by path, defaults to %s\n" "\t-i or --icudatadir directory for locating any needed intermediate data files,\n" "\t followed by path, defaults to %s\n", u_getDataDirectory(), u_getDataDirectory(), u_getDataDirectory()); fprintf(stderr, "\t-j or --write-java write a Java ListResourceBundle for ICU4J, followed by optional encoding\n" "\t defaults to ASCII and \\uXXXX format.\n"); /* This option is deprecated and should not be used ever. "\t-p or --package-name For ICU4J: package name for writing the ListResourceBundle for ICU4J,\n" "\t defaults to com.ibm.icu.impl.data\n"); */ fprintf(stderr, "\t-b or --bundle-name bundle name for writing the ListResourceBundle for ICU4J,\n" "\t defaults to LocaleElements\n" "\t-x or --write-xliff write an XLIFF file for the resource bundle. Followed by\n" "\t an optional output file name.\n" "\t-k or --strict use pedantic parsing of syntax\n" /*added by Jing*/ "\t-l or --language for XLIFF: language code compliant with BCP 47.\n"); fprintf(stderr, "\t-C or --noBinaryCollation do not generate binary collation image;\n" "\t makes .res file smaller but collator instantiation much slower;\n" "\t maintains ability to get tailoring rules\n" "\t-R or --omitCollationRules do not include collation (tailoring) rules;\n" "\t makes .res file smaller and maintains collator instantiation speed\n" "\t but tailoring rules will not be available (they are rarely used)\n"); fprintf(stderr, "\t --formatVersion write a .res file compatible with the requested formatVersion (single digit);\n" "\t for example, --formatVersion 1\n"); fprintf(stderr, "\t --writePoolBundle write a pool.res file with all of the keys of all input bundles\n" "\t --usePoolBundle [path-to-pool.res] point to keys from the pool.res keys pool bundle if they are available there;\n" "\t makes .res files smaller but dependent on the pool bundle\n" "\t (--writePoolBundle and --usePoolBundle cannot be combined)\n"); return argc < 0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; } if(options[VERBOSE].doesOccur) { setVerbose(TRUE); } if(options[QUIET].doesOccur) { setShowWarning(FALSE); } if(options[STRICT].doesOccur) { setStrict(TRUE); } if(options[COPYRIGHT].doesOccur){ setIncludeCopyright(TRUE); } if(options[SOURCEDIR].doesOccur) { inputDir = options[SOURCEDIR].value; } if(options[DESTDIR].doesOccur) { outputDir = options[DESTDIR].value; } /* This option is deprecated and should never be used. if(options[PACKAGE_NAME].doesOccur) { gPackageName = options[PACKAGE_NAME].value; if(!strcmp(gPackageName, "ICUDATA")) { gPackageName = U_ICUDATA_NAME; } if(gPackageName[0] == 0) { gPackageName = NULL; } }*/ if(options[ENCODING].doesOccur) { encoding = options[ENCODING].value; } if(options[ICUDATADIR].doesOccur) { u_setDataDirectory(options[ICUDATADIR].value); } /* Initialize ICU */ u_init(&status); if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) { /* Note: u_init() will try to open ICU property data. * failures here are expected when building ICU from scratch. * ignore them. */ fprintf(stderr, "%s: can not initialize ICU. status = %s\n", argv[0], u_errorName(status)); exit(1); } status = U_ZERO_ERROR; if(options[WRITE_JAVA].doesOccur) { write_java = TRUE; outputEnc = options[WRITE_JAVA].value; } if(options[BUNDLE_NAME].doesOccur) { bundleName = options[BUNDLE_NAME].value; } if(options[WRITE_XLIFF].doesOccur) { write_xliff = TRUE; if(options[WRITE_XLIFF].value != NULL){ xliffOutputFileName = options[WRITE_XLIFF].value; } } initParser(options[NO_BINARY_COLLATION].doesOccur, options[NO_COLLATION_RULES].doesOccur); /*added by Jing*/ if(options[LANGUAGE].doesOccur) { language = options[LANGUAGE].value; } if(options[WRITE_POOL_BUNDLE].doesOccur) { newPoolBundle = bundle_open(NULL, TRUE, &status); if(U_FAILURE(status)) { fprintf(stderr, "unable to create an empty bundle for the pool keys: %s\n", u_errorName(status)); return status; } else { const char *poolResName = "pool.res"; char *nameWithoutSuffix = uprv_malloc(uprv_strlen(poolResName) + 1); if (nameWithoutSuffix == NULL) { fprintf(stderr, "out of memory error\n"); return U_MEMORY_ALLOCATION_ERROR; } uprv_strcpy(nameWithoutSuffix, poolResName); *uprv_strrchr(nameWithoutSuffix, '.') = 0; newPoolBundle->fLocale = nameWithoutSuffix; } } if(options[USE_POOL_BUNDLE].doesOccur) { const char *poolResName = "pool.res"; FileStream *poolFile; int32_t poolFileSize; int32_t indexLength; /* * TODO: Consolidate inputDir/filename handling from main() and processFile() * into a common function, and use it here as well. * Try to create toolutil functions for dealing with dir/filenames and * loading ICU data files without udata_open(). * Share code with icupkg? * Also, make_res_filename() seems to be unused. Review and remove. */ if (options[USE_POOL_BUNDLE].value!=NULL) { uprv_strcpy(theCurrentFileName, options[USE_POOL_BUNDLE].value); uprv_strcat(theCurrentFileName, U_FILE_SEP_STRING); } else if (inputDir) { uprv_strcpy(theCurrentFileName, inputDir); uprv_strcat(theCurrentFileName, U_FILE_SEP_STRING); } else { *theCurrentFileName = 0; } uprv_strcat(theCurrentFileName, poolResName); poolFile = T_FileStream_open(theCurrentFileName, "rb"); if (poolFile == NULL) { fprintf(stderr, "unable to open pool bundle file %s\n", theCurrentFileName); return 1; } poolFileSize = T_FileStream_size(poolFile); if (poolFileSize < 32) { fprintf(stderr, "the pool bundle file %s is too small\n", theCurrentFileName); return 1; } poolBundle.fBytes = (uint8_t *)uprv_malloc((poolFileSize + 15) & ~15); if (poolFileSize > 0 && poolBundle.fBytes == NULL) { fprintf(stderr, "unable to allocate memory for the pool bundle file %s\n", theCurrentFileName); return U_MEMORY_ALLOCATION_ERROR; } else { UDataSwapper *ds; const DataHeader *header; int32_t bytesRead = T_FileStream_read(poolFile, poolBundle.fBytes, poolFileSize); int32_t keysBottom; if (bytesRead != poolFileSize) { fprintf(stderr, "unable to read the pool bundle file %s\n", theCurrentFileName); return 1; } /* * Swap the pool bundle so that a single checked-in file can be used. * The swapper functions also test that the data looks like * a well-formed .res file. */ ds = udata_openSwapperForInputData(poolBundle.fBytes, bytesRead, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &status); if (U_FAILURE(status)) { fprintf(stderr, "udata_openSwapperForInputData(pool bundle %s) failed: %s\n", theCurrentFileName, u_errorName(status)); return status; } ures_swap(ds, poolBundle.fBytes, bytesRead, poolBundle.fBytes, &status); udata_closeSwapper(ds); if (U_FAILURE(status)) { fprintf(stderr, "ures_swap(pool bundle %s) failed: %s\n", theCurrentFileName, u_errorName(status)); return status; } header = (const DataHeader *)poolBundle.fBytes; if (header->info.formatVersion[0]!=2) { fprintf(stderr, "invalid format of pool bundle file %s\n", theCurrentFileName); return U_INVALID_FORMAT_ERROR; } poolBundle.fKeys = (const char *)header + header->dataHeader.headerSize; poolBundle.fIndexes = (const int32_t *)poolBundle.fKeys + 1; indexLength = poolBundle.fIndexes[URES_INDEX_LENGTH] & 0xff; if (indexLength <= URES_INDEX_POOL_CHECKSUM) { fprintf(stderr, "insufficient indexes[] in pool bundle file %s\n", theCurrentFileName); return U_INVALID_FORMAT_ERROR; } keysBottom = (1 + indexLength) * 4; poolBundle.fKeys += keysBottom; poolBundle.fKeysLength = (poolBundle.fIndexes[URES_INDEX_KEYS_TOP] * 4) - keysBottom; poolBundle.fChecksum = poolBundle.fIndexes[URES_INDEX_POOL_CHECKSUM]; } for (i = 0; i < poolBundle.fKeysLength; ++i) { if (poolBundle.fKeys[i] == 0) { ++poolBundle.fKeysCount; } } T_FileStream_close(poolFile); setUsePoolBundle(TRUE); } if(options[INCLUDE_UNIHAN_COLL].doesOccur) { gIncludeUnihanColl = TRUE; } if((argc-1)!=1) { printf("genrb number of files: %d\n", argc - 1); } /* generate the binary files */ for(i = 1; i < argc; ++i) { status = U_ZERO_ERROR; arg = getLongPathname(argv[i]); if (inputDir) { uprv_strcpy(theCurrentFileName, inputDir); uprv_strcat(theCurrentFileName, U_FILE_SEP_STRING); } else { *theCurrentFileName = 0; } uprv_strcat(theCurrentFileName, arg); if (isVerbose()) { printf("Processing file \"%s\"\n", theCurrentFileName); } processFile(arg, encoding, inputDir, outputDir, gPackageName, &status); } uprv_free(poolBundle.fBytes); if(options[WRITE_POOL_BUNDLE].doesOccur) { char outputFileName[256]; bundle_write(newPoolBundle, outputDir, NULL, outputFileName, sizeof(outputFileName), &status); bundle_close(newPoolBundle, &status); if(U_FAILURE(status)) { fprintf(stderr, "unable to write the pool bundle: %s\n", u_errorName(status)); } } /* Dont return warnings as a failure */ if (U_SUCCESS(status)) { return 0; } return status; }
int main(int argc, char* argv[]) { UErrorCode status = U_ZERO_ERROR; const char *arg = NULL; const char *outputDir = NULL; /* NULL = no output directory, use current */ const char *inputDir = NULL; const char *encoding = ""; int i; UBool illegalArg = FALSE; U_MAIN_INIT_ARGS(argc, argv); options[JAVA_PACKAGE].value = "com.ibm.icu.impl.data"; options[BUNDLE_NAME].value = "LocaleElements"; argc = u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options); /* error handling, printing usage message */ if(argc<0) { fprintf(stderr, "%s: error in command line argument \"%s\"\n", argv[0], argv[-argc]); illegalArg = TRUE; } else if(argc<2) { illegalArg = TRUE; } if(options[WRITE_POOL_BUNDLE].doesOccur && options[USE_POOL_BUNDLE].doesOccur) { fprintf(stderr, "%s: cannot combine --writePoolBundle and --usePoolBundle\n", argv[0]); illegalArg = TRUE; } if(options[FORMAT_VERSION].doesOccur) { const char *s = options[FORMAT_VERSION].value; if(uprv_strlen(s) != 1 || (s[0] < '1' && '3' < s[0])) { fprintf(stderr, "%s: unsupported --formatVersion %s\n", argv[0], s); illegalArg = TRUE; } else if(s[0] == '1' && (options[WRITE_POOL_BUNDLE].doesOccur || options[USE_POOL_BUNDLE].doesOccur) ) { fprintf(stderr, "%s: cannot combine --formatVersion 1 with --writePoolBundle or --usePoolBundle\n", argv[0]); illegalArg = TRUE; } else { setFormatVersion(s[0] - '0'); } } if((options[JAVA_PACKAGE].doesOccur || options[BUNDLE_NAME].doesOccur) && !options[WRITE_JAVA].doesOccur) { fprintf(stderr, "%s error: command line argument --java-package or --bundle-name " "without --write-java\n", argv[0]); illegalArg = TRUE; } if(options[VERSION].doesOccur) { fprintf(stderr, "%s version %s (ICU version %s).\n" "%s\n", argv[0], GENRB_VERSION, U_ICU_VERSION, U_COPYRIGHT_STRING); if(!illegalArg) { return U_ZERO_ERROR; } } if(illegalArg || options[HELP1].doesOccur || options[HELP2].doesOccur) { /* * Broken into chunks because the C89 standard says the minimum * required supported string length is 509 bytes. */ fprintf(stderr, "Usage: %s [OPTIONS] [FILES]\n" "\tReads the list of resource bundle source files and creates\n" "\tbinary version of resource bundles (.res files)\n", argv[0]); fprintf(stderr, "Options:\n" "\t-h or -? or --help this usage text\n" "\t-q or --quiet do not display warnings\n" "\t-v or --verbose print extra information when processing files\n" "\t-V or --version prints out version number and exits\n" "\t-c or --copyright include copyright notice\n"); fprintf(stderr, "\t-e or --encoding encoding of source files\n" "\t-d of --destdir destination directory, followed by the path, defaults to %s\n" "\t-s or --sourcedir source directory for files followed by path, defaults to %s\n" "\t-i or --icudatadir directory for locating any needed intermediate data files,\n" "\t followed by path, defaults to %s\n", u_getDataDirectory(), u_getDataDirectory(), u_getDataDirectory()); fprintf(stderr, "\t-j or --write-java write a Java ListResourceBundle for ICU4J, followed by optional encoding\n" "\t defaults to ASCII and \\uXXXX format.\n" "\t --java-package For --write-java: package name for writing the ListResourceBundle,\n" "\t defaults to com.ibm.icu.impl.data\n"); fprintf(stderr, "\t-b or --bundle-name For --write-java: root resource bundle name for writing the ListResourceBundle,\n" "\t defaults to LocaleElements\n" "\t-x or --write-xliff write an XLIFF file for the resource bundle. Followed by\n" "\t an optional output file name.\n" "\t-k or --strict use pedantic parsing of syntax\n" /*added by Jing*/ "\t-l or --language for XLIFF: language code compliant with BCP 47.\n"); fprintf(stderr, "\t-C or --noBinaryCollation do not generate binary collation image;\n" "\t makes .res file smaller but collator instantiation much slower;\n" "\t maintains ability to get tailoring rules\n" "\t-R or --omitCollationRules do not include collation (tailoring) rules;\n" "\t makes .res file smaller and maintains collator instantiation speed\n" "\t but tailoring rules will not be available (they are rarely used)\n"); fprintf(stderr, "\t --formatVersion write a .res file compatible with the requested formatVersion (single digit);\n" "\t for example, --formatVersion 1\n"); fprintf(stderr, "\t --writePoolBundle write a pool.res file with all of the keys of all input bundles\n" "\t --usePoolBundle [path-to-pool.res] point to keys from the pool.res keys pool bundle if they are available there;\n" "\t makes .res files smaller but dependent on the pool bundle\n" "\t (--writePoolBundle and --usePoolBundle cannot be combined)\n"); return illegalArg ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; } if(options[VERBOSE].doesOccur) { setVerbose(TRUE); } if(options[QUIET].doesOccur) { setShowWarning(FALSE); } if(options[STRICT].doesOccur) { setStrict(TRUE); } if(options[COPYRIGHT].doesOccur){ setIncludeCopyright(TRUE); } if(options[SOURCEDIR].doesOccur) { inputDir = options[SOURCEDIR].value; } if(options[DESTDIR].doesOccur) { outputDir = options[DESTDIR].value; } if(options[ENCODING].doesOccur) { encoding = options[ENCODING].value; } if(options[ICUDATADIR].doesOccur) { u_setDataDirectory(options[ICUDATADIR].value); } /* Initialize ICU */ u_init(&status); if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) { /* Note: u_init() will try to open ICU property data. * failures here are expected when building ICU from scratch. * ignore them. */ fprintf(stderr, "%s: can not initialize ICU. status = %s\n", argv[0], u_errorName(status)); exit(1); } status = U_ZERO_ERROR; if(options[WRITE_JAVA].doesOccur) { write_java = TRUE; outputEnc = options[WRITE_JAVA].value; } if(options[WRITE_XLIFF].doesOccur) { write_xliff = TRUE; if(options[WRITE_XLIFF].value != NULL){ xliffOutputFileName = options[WRITE_XLIFF].value; } } initParser(); /*added by Jing*/ if(options[LANGUAGE].doesOccur) { language = options[LANGUAGE].value; } LocalPointer<SRBRoot> newPoolBundle; if(options[WRITE_POOL_BUNDLE].doesOccur) { newPoolBundle.adoptInsteadAndCheckErrorCode(new SRBRoot(NULL, TRUE, status), status); if(U_FAILURE(status)) { fprintf(stderr, "unable to create an empty bundle for the pool keys: %s\n", u_errorName(status)); return status; } else { const char *poolResName = "pool.res"; char *nameWithoutSuffix = static_cast<char *>(uprv_malloc(uprv_strlen(poolResName) + 1)); if (nameWithoutSuffix == NULL) { fprintf(stderr, "out of memory error\n"); return U_MEMORY_ALLOCATION_ERROR; } uprv_strcpy(nameWithoutSuffix, poolResName); *uprv_strrchr(nameWithoutSuffix, '.') = 0; newPoolBundle->fLocale = nameWithoutSuffix; } } if(options[USE_POOL_BUNDLE].doesOccur) { const char *poolResName = "pool.res"; FileStream *poolFile; int32_t poolFileSize; int32_t indexLength; /* * TODO: Consolidate inputDir/filename handling from main() and processFile() * into a common function, and use it here as well. * Try to create toolutil functions for dealing with dir/filenames and * loading ICU data files without udata_open(). * Share code with icupkg? * Also, make_res_filename() seems to be unused. Review and remove. */ CharString poolFileName; if (options[USE_POOL_BUNDLE].value!=NULL) { poolFileName.append(options[USE_POOL_BUNDLE].value, status); } else if (inputDir) { poolFileName.append(inputDir, status); } poolFileName.appendPathPart(poolResName, status); if (U_FAILURE(status)) { return status; } poolFile = T_FileStream_open(poolFileName.data(), "rb"); if (poolFile == NULL) { fprintf(stderr, "unable to open pool bundle file %s\n", poolFileName.data()); return 1; } poolFileSize = T_FileStream_size(poolFile); if (poolFileSize < 32) { fprintf(stderr, "the pool bundle file %s is too small\n", poolFileName.data()); return 1; } poolBundle.fBytes = new uint8_t[(poolFileSize + 15) & ~15]; if (poolFileSize > 0 && poolBundle.fBytes == NULL) { fprintf(stderr, "unable to allocate memory for the pool bundle file %s\n", poolFileName.data()); return U_MEMORY_ALLOCATION_ERROR; } UDataSwapper *ds; const DataHeader *header; int32_t bytesRead = T_FileStream_read(poolFile, poolBundle.fBytes, poolFileSize); if (bytesRead != poolFileSize) { fprintf(stderr, "unable to read the pool bundle file %s\n", poolFileName.data()); return 1; } /* * Swap the pool bundle so that a single checked-in file can be used. * The swapper functions also test that the data looks like * a well-formed .res file. */ ds = udata_openSwapperForInputData(poolBundle.fBytes, bytesRead, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &status); if (U_FAILURE(status)) { fprintf(stderr, "udata_openSwapperForInputData(pool bundle %s) failed: %s\n", poolFileName.data(), u_errorName(status)); return status; } ures_swap(ds, poolBundle.fBytes, bytesRead, poolBundle.fBytes, &status); udata_closeSwapper(ds); if (U_FAILURE(status)) { fprintf(stderr, "ures_swap(pool bundle %s) failed: %s\n", poolFileName.data(), u_errorName(status)); return status; } header = (const DataHeader *)poolBundle.fBytes; if (header->info.formatVersion[0] < 2) { fprintf(stderr, "invalid format of pool bundle file %s\n", poolFileName.data()); return U_INVALID_FORMAT_ERROR; } const int32_t *pRoot = (const int32_t *)( (const char *)header + header->dataHeader.headerSize); poolBundle.fIndexes = pRoot + 1; indexLength = poolBundle.fIndexes[URES_INDEX_LENGTH] & 0xff; if (indexLength <= URES_INDEX_POOL_CHECKSUM) { fprintf(stderr, "insufficient indexes[] in pool bundle file %s\n", poolFileName.data()); return U_INVALID_FORMAT_ERROR; } int32_t keysBottom = 1 + indexLength; int32_t keysTop = poolBundle.fIndexes[URES_INDEX_KEYS_TOP]; poolBundle.fKeys = (const char *)(pRoot + keysBottom); poolBundle.fKeysLength = (keysTop - keysBottom) * 4; poolBundle.fChecksum = poolBundle.fIndexes[URES_INDEX_POOL_CHECKSUM]; for (i = 0; i < poolBundle.fKeysLength; ++i) { if (poolBundle.fKeys[i] == 0) { ++poolBundle.fKeysCount; } } // 16BitUnits[] begins with strings-v2. // The strings-v2 may optionally be terminated by what looks like // an explicit string length that exceeds the number of remaining 16-bit units. int32_t stringUnitsLength = (poolBundle.fIndexes[URES_INDEX_16BIT_TOP] - keysTop) * 2; if (stringUnitsLength >= 2 && getFormatVersion() >= 3) { poolBundle.fStrings = new PseudoListResource(NULL, status); if (poolBundle.fStrings == NULL) { fprintf(stderr, "unable to allocate memory for the pool bundle strings %s\n", poolFileName.data()); return U_MEMORY_ALLOCATION_ERROR; } // The PseudoListResource constructor call did not allocate further memory. assert(U_SUCCESS(status)); const UChar *p = (const UChar *)(pRoot + keysTop); int32_t remaining = stringUnitsLength; do { int32_t first = *p; int8_t numCharsForLength; int32_t length; if (!U16_IS_TRAIL(first)) { // NUL-terminated numCharsForLength = 0; for (length = 0; length < remaining && p[length] != 0; ++length) {} } else if (first < 0xdfef) { numCharsForLength = 1; length = first & 0x3ff; } else if (first < 0xdfff && remaining >= 2) { numCharsForLength = 2; length = ((first - 0xdfef) << 16) | p[1]; } else if (first == 0xdfff && remaining >= 3) { numCharsForLength = 3; length = ((int32_t)p[1] << 16) | p[2]; } else { break; // overrun } // Check for overrun before changing remaining, // so that it is always accurate after the loop body. if ((numCharsForLength + length) >= remaining || p[numCharsForLength + length] != 0) { break; // overrun or explicitly terminated } int32_t poolStringIndex = stringUnitsLength - remaining; // Maximum pool string index when suffix-sharing the last character. int32_t maxStringIndex = poolStringIndex + numCharsForLength + length - 1; if (maxStringIndex >= RES_MAX_OFFSET) { // pool string index overrun break; } p += numCharsForLength; remaining -= numCharsForLength; if (length != 0) { StringResource *sr = new StringResource(poolStringIndex, numCharsForLength, p, length, status); if (sr == NULL) { fprintf(stderr, "unable to allocate memory for a pool bundle string %s\n", poolFileName.data()); return U_MEMORY_ALLOCATION_ERROR; } poolBundle.fStrings->add(sr); poolBundle.fStringIndexLimit = maxStringIndex + 1; // The StringResource constructor did not allocate further memory. assert(U_SUCCESS(status)); } p += length + 1; remaining -= length + 1; } while (remaining > 0); if (poolBundle.fStrings->fCount == 0) { delete poolBundle.fStrings; poolBundle.fStrings = NULL; } } T_FileStream_close(poolFile); setUsePoolBundle(TRUE); if (isVerbose() && poolBundle.fStrings != NULL) { printf("number of shared strings: %d\n", (int)poolBundle.fStrings->fCount); int32_t length = poolBundle.fStringIndexLimit + 1; // incl. last NUL printf("16-bit units for strings: %6d = %6d bytes\n", (int)length, (int)length * 2); } } if(!options[FORMAT_VERSION].doesOccur && getFormatVersion() == 3 && poolBundle.fStrings == NULL && !options[WRITE_POOL_BUNDLE].doesOccur) { // If we just default to formatVersion 3 // but there are no pool bundle strings to share // and we do not write a pool bundle, // then write formatVersion 2 which is just as good. setFormatVersion(2); } if(options[INCLUDE_UNIHAN_COLL].doesOccur) { puts("genrb option --includeUnihanColl ignored: \n" "CLDR 26/ICU 54 unihan data is small, except\n" "the ucadata-unihan.icu version of the collation root data\n" "is about 300kB larger than the ucadata-implicithan.icu version."); } if((argc-1)!=1) { printf("genrb number of files: %d\n", argc - 1); } /* generate the binary files */ for(i = 1; i < argc; ++i) { status = U_ZERO_ERROR; arg = getLongPathname(argv[i]); CharString theCurrentFileName; if (inputDir) { theCurrentFileName.append(inputDir, status); } theCurrentFileName.appendPathPart(arg, status); if (U_FAILURE(status)) { break; } gCurrentFileName = theCurrentFileName.data(); if (isVerbose()) { printf("Processing file \"%s\"\n", theCurrentFileName.data()); } processFile(arg, encoding, inputDir, outputDir, NULL, newPoolBundle.getAlias(), options[NO_BINARY_COLLATION].doesOccur, status); } poolBundle.close(); if(U_SUCCESS(status) && options[WRITE_POOL_BUNDLE].doesOccur) { char outputFileName[256]; newPoolBundle->write(outputDir, NULL, outputFileName, sizeof(outputFileName), status); if(U_FAILURE(status)) { fprintf(stderr, "unable to write the pool bundle: %s\n", u_errorName(status)); } } u_cleanup(); /* Dont return warnings as a failure */ if (U_SUCCESS(status)) { return 0; } return status; }
U_CAPI void U_EXPORT2 createCommonDataFile(const char *destDir, const char *name, const char *entrypointName, const char *type, const char *source, const char *copyRight, const char *dataFile, uint32_t max_size, UBool sourceTOC, UBool verbose, char *gencmnFileName) { static char buffer[4096]; char *line; char *linePtr; char *s = NULL; UErrorCode errorCode=U_ZERO_ERROR; uint32_t i, fileOffset, basenameOffset, length, nread; FileStream *in, *file; line = (char *)uprv_malloc(sizeof(char) * LINE_BUFFER_SIZE); if (line == NULL) { fprintf(stderr, "gencmn: unable to allocate memory for line buffer of size %d\n", LINE_BUFFER_SIZE); exit(U_MEMORY_ALLOCATION_ERROR); } linePtr = line; maxSize = max_size; if (destDir == NULL) { destDir = u_getDataDirectory(); } if (name == NULL) { name = COMMON_DATA_NAME; } if (type == NULL) { type = DATA_TYPE; } if (source == NULL) { source = "."; } if (dataFile == NULL) { in = T_FileStream_stdin(); } else { in = T_FileStream_open(dataFile, "r"); if(in == NULL) { fprintf(stderr, "gencmn: unable to open input file %s\n", dataFile); exit(U_FILE_ACCESS_ERROR); } } if (verbose) { if(sourceTOC) { printf("generating %s_%s.c (table of contents source file)\n", name, type); } else { printf("generating %s.%s (common data file with table of contents)\n", name, type); } } /* read the list of files and get their lengths */ while((s != NULL && *s != 0) || (s=T_FileStream_readLine(in, (line=linePtr), LINE_BUFFER_SIZE))!=NULL) { /* remove trailing newline characters and parse space separated items */ if (s != NULL && *s != 0) { line=s; } else { s=line; } while(*s!=0) { if(*s==' ') { *s=0; ++s; break; } else if(*s=='\r' || *s=='\n') { *s=0; break; } ++s; } /* check for comment */ if (*line == '#') { continue; } /* add the file */ #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) { char *t; while((t = uprv_strchr(line,U_FILE_ALT_SEP_CHAR))) { *t = U_FILE_SEP_CHAR; } } #endif addFile(getLongPathname(line), name, source, sourceTOC, verbose); } uprv_free(linePtr); if(in!=T_FileStream_stdin()) { T_FileStream_close(in); } if(fileCount==0) { fprintf(stderr, "gencmn: no files listed in %s\n", dataFile == NULL ? "<stdin>" : dataFile); return; } /* sort the files by basename */ qsort(files, fileCount, sizeof(File), compareFiles); if(!sourceTOC) { UNewDataMemory *out; /* determine the offsets of all basenames and files in this common one */ basenameOffset=4+8*fileCount; fileOffset=(basenameOffset+(basenameTotal+15))&~0xf; for(i=0; i<fileCount; ++i) { files[i].fileOffset=fileOffset; fileOffset+=(files[i].fileSize+15)&~0xf; files[i].basenameOffset=basenameOffset; basenameOffset+=files[i].basenameLength; } /* create the output file */ out=udata_create(destDir, type, name, &dataInfo, copyRight == NULL ? U_COPYRIGHT_STRING : copyRight, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "gencmn: udata_create(-d %s -n %s -t %s) failed - %s\n", destDir, name, type, u_errorName(errorCode)); exit(errorCode); } /* write the table of contents */ udata_write32(out, fileCount); for(i=0; i<fileCount; ++i) { udata_write32(out, files[i].basenameOffset); udata_write32(out, files[i].fileOffset); } /* write the basenames */ for(i=0; i<fileCount; ++i) { udata_writeString(out, files[i].basename, files[i].basenameLength); } length=4+8*fileCount+basenameTotal; /* copy the files */ for(i=0; i<fileCount; ++i) { /* pad to 16-align the next file */ length&=0xf; if(length!=0) { udata_writePadding(out, 16-length); } if (verbose) { printf("adding %s (%ld byte%s)\n", files[i].pathname, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s"); } /* copy the next file */ file=T_FileStream_open(files[i].pathname, "rb"); if(file==NULL) { fprintf(stderr, "gencmn: unable to open listed file %s\n", files[i].pathname); exit(U_FILE_ACCESS_ERROR); } for(nread = 0;;) { length=T_FileStream_read(file, buffer, sizeof(buffer)); if(length <= 0) { break; } nread += length; udata_writeBlock(out, buffer, length); } T_FileStream_close(file); length=files[i].fileSize; if (nread != files[i].fileSize) { fprintf(stderr, "gencmn: unable to read %s properly (got %ld/%ld byte%s)\n", files[i].pathname, (long)nread, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s"); exit(U_FILE_ACCESS_ERROR); } } /* pad to 16-align the last file (cleaner, avoids growing .dat files in icuswap) */ length&=0xf; if(length!=0) { udata_writePadding(out, 16-length); } /* finish */ udata_finish(out, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "gencmn: udata_finish() failed - %s\n", u_errorName(errorCode)); exit(errorCode); } } else { /* write a .c source file with the table of contents */ char *filename; FileStream *out; /* create the output filename */ filename=s=buffer; uprv_strcpy(filename, destDir); s=filename+uprv_strlen(filename); if(s>filename && *(s-1)!=U_FILE_SEP_CHAR) { *s++=U_FILE_SEP_CHAR; } uprv_strcpy(s, name); if(*(type)!=0) { s+=uprv_strlen(s); *s++='_'; uprv_strcpy(s, type); } s+=uprv_strlen(s); uprv_strcpy(s, ".c"); /* open the output file */ out=T_FileStream_open(filename, "w"); if (gencmnFileName != NULL) { uprv_strcpy(gencmnFileName, filename); } if(out==NULL) { fprintf(stderr, "gencmn: unable to open .c output file %s\n", filename); exit(U_FILE_ACCESS_ERROR); } /* write the source file */ sprintf(buffer, "/*\n" " * ICU common data table of contents for %s.%s\n" " * Automatically generated by icu/source/tools/gencmn/gencmn .\n" " */\n\n" "#include \"unicode/utypes.h\"\n" "#include \"unicode/udata.h\"\n" "\n" "/* external symbol declarations for data (%d files) */\n", name, type, fileCount); T_FileStream_writeLine(out, buffer); sprintf(buffer, "extern const char\n %s%s[]", symPrefix?symPrefix:"", files[0].pathname); T_FileStream_writeLine(out, buffer); for(i=1; i<fileCount; ++i) { sprintf(buffer, ",\n %s%s[]", symPrefix?symPrefix:"", files[i].pathname); T_FileStream_writeLine(out, buffer); } T_FileStream_writeLine(out, ";\n\n"); sprintf( buffer, "U_EXPORT struct {\n" " uint16_t headerSize;\n" " uint8_t magic1, magic2;\n" " UDataInfo info;\n" " char padding[%lu];\n" " uint32_t count, reserved;\n" " struct {\n" " const char *name;\n" " const void *data;\n" " } toc[%lu];\n" "} U_EXPORT2 %s_dat = {\n" " 32, 0xda, 0x27, {\n" " %lu, 0,\n" " %u, %u, %u, 0,\n" " {0x54, 0x6f, 0x43, 0x50},\n" " {1, 0, 0, 0},\n" " {0, 0, 0, 0}\n" " },\n" " \"\", %lu, 0, {\n", (unsigned long)32-4-sizeof(UDataInfo), (unsigned long)fileCount, entrypointName, (unsigned long)sizeof(UDataInfo), U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, U_SIZEOF_UCHAR, (unsigned long)fileCount ); T_FileStream_writeLine(out, buffer); sprintf(buffer, " { \"%s\", %s%s }", files[0].basename, symPrefix?symPrefix:"", files[0].pathname); T_FileStream_writeLine(out, buffer); for(i=1; i<fileCount; ++i) { sprintf(buffer, ",\n { \"%s\", %s%s }", files[i].basename, symPrefix?symPrefix:"", files[i].pathname); T_FileStream_writeLine(out, buffer); } T_FileStream_writeLine(out, "\n }\n};\n"); T_FileStream_close(out); uprv_free(symPrefix); } }
extern int main(int argc, char* argv[]) { static char buffer[4096]; char line[512]; FileStream *in, *file; char *s; UErrorCode errorCode=U_ZERO_ERROR; uint32_t i, fileOffset, basenameOffset, length, nread; UBool sourceTOC, verbose; const char *entrypointName = NULL; U_MAIN_INIT_ARGS(argc, argv); /* preset then read command line options */ options[4].value=u_getDataDirectory(); options[6].value=COMMON_DATA_NAME; options[7].value=DATA_TYPE; options[10].value="."; argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options); /* error handling, printing usage message */ if(argc<0) { fprintf(stderr, "error in command line argument \"%s\"\n", argv[-argc]); } else if(argc<2) { argc=-1; } if(argc<0 || options[0].doesOccur || options[1].doesOccur) { FILE *where = argc < 0 ? stderr : stdout; /* * Broken into chucks because the C89 standard says the minimum * required supported string length is 509 bytes. */ fprintf(where, "%csage: %s [ -h, -?, --help ] [ -v, --verbose ] [ -c, --copyright ] [ -C, --comment comment ] [ -d, --destdir dir ] [ -n, --name filename ] [ -t, --type filetype ] [ -S, --source tocfile ] [ -e, --entrypoint name ] maxsize listfile\n", argc < 0 ? 'u' : 'U', *argv); if (options[0].doesOccur || options[1].doesOccur) { fprintf(where, "\n" "Read the list file (default: standard input) and create a common data\n" "file from specified files. Omit any files larger than maxsize, if maxsize > 0.\n"); fprintf(where, "\n" "Options:\n" "\t-h, -?, --help this usage text\n" "\t-v, --verbose verbose output\n" "\t-c, --copyright include the ICU copyright notice\n" "\t-C, --comment comment include a comment string\n" "\t-d, --destdir dir destination directory\n"); fprintf(where, "\t-n, --name filename output filename, without .type extension\n" "\t (default: " COMMON_DATA_NAME ")\n" "\t-t, --type filetype type of the destination file\n" "\t (default: \"" DATA_TYPE "\")\n" "\t-S, --source tocfile write a .c source file with the table of\n" "\t contents\n" "\t-e, --entrypoint name override the c entrypoint name\n" "\t (default: \"<name>_<type>\")\n"); } return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; } sourceTOC=options[8].doesOccur; verbose = options[2].doesOccur; maxSize=(uint32_t)uprv_strtoul(argv[1], NULL, 0); if(argc==2) { in=T_FileStream_stdin(); } else { in=T_FileStream_open(argv[2], "r"); if(in==NULL) { fprintf(stderr, "gencmn: unable to open input file %s\n", argv[2]); exit(U_FILE_ACCESS_ERROR); } } if (verbose) { if(sourceTOC) { printf("generating %s_%s.c (table of contents source file)\n", options[6].value, options[7].value); } else { printf("generating %s.%s (common data file with table of contents)\n", options[6].value, options[7].value); } } /* read the list of files and get their lengths */ while(T_FileStream_readLine(in, line, sizeof(line))!=NULL) { /* remove trailing newline characters */ s=line; while(*s!=0) { if(*s=='\r' || *s=='\n') { *s=0; break; } ++s; } /* check for comment */ if (*line == '#') { continue; } /* add the file */ #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) { char *t; while((t = uprv_strchr(line,U_FILE_ALT_SEP_CHAR))) { *t = U_FILE_SEP_CHAR; } } #endif addFile(getLongPathname(line), sourceTOC, verbose); } if(in!=T_FileStream_stdin()) { T_FileStream_close(in); } if(fileCount==0) { fprintf(stderr, "gencmn: no files listed in %s\n", argc==2 ? "<stdin>" : argv[2]); return 0; } /* sort the files by basename */ qsort(files, fileCount, sizeof(File), compareFiles); if(!sourceTOC) { UNewDataMemory *out; /* determine the offsets of all basenames and files in this common one */ basenameOffset=4+8*fileCount; fileOffset=(basenameOffset+(basenameTotal+15))&~0xf; for(i=0; i<fileCount; ++i) { files[i].fileOffset=fileOffset; fileOffset+=(files[i].fileSize+15)&~0xf; files[i].basenameOffset=basenameOffset; basenameOffset+=files[i].basenameLength; } /* create the output file */ out=udata_create(options[4].value, options[7].value, options[6].value, &dataInfo, options[3].doesOccur ? U_COPYRIGHT_STRING : options[5].value, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "gencmn: udata_create(-d %s -n %s -t %s) failed - %s\n", options[4].value, options[6].value, options[7].value, u_errorName(errorCode)); exit(errorCode); } /* write the table of contents */ udata_write32(out, fileCount); for(i=0; i<fileCount; ++i) { udata_write32(out, files[i].basenameOffset); udata_write32(out, files[i].fileOffset); } /* write the basenames */ for(i=0; i<fileCount; ++i) { udata_writeString(out, files[i].basename, files[i].basenameLength); } length=4+8*fileCount+basenameTotal; /* copy the files */ for(i=0; i<fileCount; ++i) { /* pad to 16-align the next file */ length&=0xf; if(length!=0) { udata_writePadding(out, 16-length); } if (verbose) { printf("adding %s (%ld byte%s)\n", files[i].pathname, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s"); } /* copy the next file */ file=T_FileStream_open(files[i].pathname, "rb"); if(file==NULL) { fprintf(stderr, "gencmn: unable to open listed file %s\n", files[i].pathname); exit(U_FILE_ACCESS_ERROR); } for(nread = 0;;) { length=T_FileStream_read(file, buffer, sizeof(buffer)); if(length <= 0) { break; } nread += length; udata_writeBlock(out, buffer, length); } T_FileStream_close(file); length=files[i].fileSize; if (nread != files[i].fileSize) { fprintf(stderr, "gencmn: unable to read %s properly (got %ld/%ld byte%s)\n", files[i].pathname, (long)nread, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s"); exit(U_FILE_ACCESS_ERROR); } } /* pad to 16-align the last file (cleaner, avoids growing .dat files in icuswap) */ length&=0xf; if(length!=0) { udata_writePadding(out, 16-length); } /* finish */ udata_finish(out, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "gencmn: udata_finish() failed - %s\n", u_errorName(errorCode)); exit(errorCode); } } else { /* write a .c source file with the table of contents */ char *filename; FileStream *out; /* create the output filename */ filename=s=buffer; uprv_strcpy(filename, options[4].value); s=filename+uprv_strlen(filename); if(s>filename && *(s-1)!=U_FILE_SEP_CHAR) { *s++=U_FILE_SEP_CHAR; } uprv_strcpy(s, options[6].value); if(*(options[7].value)!=0) { s+=uprv_strlen(s); *s++='_'; uprv_strcpy(s, options[7].value); } s+=uprv_strlen(s); uprv_strcpy(s, ".c"); /* open the output file */ out=T_FileStream_open(filename, "w"); if(out==NULL) { fprintf(stderr, "gencmn: unable to open .c output file %s\n", filename); exit(U_FILE_ACCESS_ERROR); } /* If an entrypoint is specified, use it. */ if(options[9].doesOccur) { entrypointName = options[9].value; } else { entrypointName = options[6].value; } /* write the source file */ sprintf(buffer, "/*\n" " * ICU common data table of contents for %s.%s ,\n" " * Automatically generated by icu/source/tools/gencmn/gencmn .\n" " */\n\n" "#include \"unicode/utypes.h\"\n" "#include \"unicode/udata.h\"\n" "\n" "/* external symbol declarations for data */\n", options[6].value, options[7].value); T_FileStream_writeLine(out, buffer); sprintf(buffer, "extern const char\n %s%s[]", symPrefix?symPrefix:"", files[0].pathname); T_FileStream_writeLine(out, buffer); for(i=1; i<fileCount; ++i) { sprintf(buffer, ",\n %s%s[]", symPrefix?symPrefix:"", files[i].pathname); T_FileStream_writeLine(out, buffer); } T_FileStream_writeLine(out, ";\n\n"); sprintf( buffer, "U_EXPORT struct {\n" " uint16_t headerSize;\n" " uint8_t magic1, magic2;\n" " UDataInfo info;\n" " char padding[%lu];\n" " uint32_t count, reserved;\n" " struct {\n" " const char *name;\n" " const void *data;\n" " } toc[%lu];\n" "} U_EXPORT2 %s_dat = {\n" " 32, 0xda, 0x27, {\n" " %lu, 0,\n" " %u, %u, %u, 0,\n" " {0x54, 0x6f, 0x43, 0x50},\n" " {1, 0, 0, 0},\n" " {0, 0, 0, 0}\n" " },\n" " \"\", %lu, 0, {\n", (unsigned long)32-4-sizeof(UDataInfo), (unsigned long)fileCount, entrypointName, (unsigned long)sizeof(UDataInfo), U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, U_SIZEOF_UCHAR, (unsigned long)fileCount ); T_FileStream_writeLine(out, buffer); sprintf(buffer, " { \"%s\", %s%s }", files[0].basename, symPrefix?symPrefix:"", files[0].pathname); T_FileStream_writeLine(out, buffer); for(i=1; i<fileCount; ++i) { sprintf(buffer, ",\n { \"%s\", %s%s }", files[i].basename, symPrefix?symPrefix:"", files[i].pathname); T_FileStream_writeLine(out, buffer); } T_FileStream_writeLine(out, "\n }\n};\n"); T_FileStream_close(out); uprv_free(symPrefix); } return 0; }