// --------------------------------------------------------------------------- // XMLASCIITranscoder: Implementation of the transcoder API // --------------------------------------------------------------------------- XMLSize_t XMLASCIITranscoder::transcodeFrom( const XMLByte* const srcData , const XMLSize_t srcCount , XMLCh* const toFill , const XMLSize_t maxChars , XMLSize_t& bytesEaten , unsigned char* const charSizes) { // // Calculate the max chars we can do here. Its the lesser of the // max output chars and the source byte count. // const XMLSize_t countToDo = srcCount < maxChars ? srcCount : maxChars; // // Now loop through that many source chars and just cast each one // over to the XMLCh format. Check each source that its really a // valid ASCI char. // const XMLByte* srcPtr = srcData; XMLCh* outPtr = toFill; XMLSize_t countDone = 0; for (; countDone < countToDo; countDone++) { // Do the optimistic work up front if (*srcPtr < 0x80) { *outPtr++ = XMLCh(*srcPtr++); continue; } // // We got non source encoding char. If we got more than 32 chars, // the just break out. We'll come back here later to hit this again // and give an error much closer to the real source position. // if (countDone > 32) break; XMLCh tmpBuf[17]; XMLString::binToText((unsigned int)*srcPtr, tmpBuf, 16, 16, getMemoryManager()); ThrowXMLwithMemMgr2 ( TranscodingException , XMLExcepts::Trans_Unrepresentable , tmpBuf , getEncodingName() , getMemoryManager() ); } // Set the bytes we ate bytesEaten = countDone; // Set the char sizes to the fixed size memset(charSizes, 1, countDone); // Return the chars we transcoded return countDone; }
unsigned int XMLASCIITranscoder::transcodeTo(const XMLCh* const srcData , const unsigned int srcCount , XMLByte* const toFill , const unsigned int maxBytes , unsigned int& charsEaten , const UnRepOpts options) { // If debugging, make sure that the block size is legal #if defined(XERCES_DEBUG) checkBlockSize(maxBytes); #endif // // Calculate the max chars we can do here. Its the lesser of the // max output chars and the source byte count. // const unsigned int countToDo = srcCount < maxBytes ? srcCount : maxBytes; const XMLCh* srcPtr = srcData; XMLByte* outPtr = toFill; for (unsigned int index = 0; index < countToDo; index++) { // If its legal, do it and jump back to the top if (*srcPtr < 0x80) { *outPtr++ = XMLByte(*srcPtr++); continue; } // // Its not representable so use a replacement char. According to // the options, either throw or use the replacement. // if (options == UnRep_Throw) { XMLCh tmpBuf[17]; XMLString::binToText((unsigned int)*srcPtr, tmpBuf, 16, 16, getMemoryManager()); ThrowXMLwithMemMgr2 ( TranscodingException , XMLExcepts::Trans_Unrepresentable , tmpBuf , getEncodingName() , getMemoryManager() ); } // Use the replacement char *outPtr++ = 0x1A; srcPtr++; } // Set the chars we ate charsEaten = countToDo; // Return the byte we transcoded return countToDo; }
XMLSize_t XML88591Transcoder::transcodeTo(const XMLCh* const srcData , const XMLSize_t srcCount , XMLByte* const toFill , const XMLSize_t maxBytes , XMLSize_t& charsEaten , const UnRepOpts options) { // // Calculate the max chars we can do here. Its the lesser of the // max output bytes and the number of chars in the source. // const XMLSize_t countToDo = srcCount < maxBytes ? srcCount : maxBytes; // // Loop through the bytes to do and convert over each byte. Its just // a downcast of the wide char, checking for unrepresentable chars. // const XMLCh* srcPtr = srcData; const XMLCh* srcEnd = srcPtr + countToDo; XMLByte* destPtr = toFill; while (srcPtr < srcEnd) { // If its legal, take it and jump back to top if (*srcPtr < 256) { *destPtr++ = XMLByte(*srcPtr++); continue; } // // Its not representable so use a replacement char. According to // the options, either throw or use the replacement. // if (options == UnRep_Throw) { XMLCh tmpBuf[17]; XMLString::binToText((unsigned int)*srcPtr, tmpBuf, 16, 16, getMemoryManager()); ThrowXMLwithMemMgr2 ( TranscodingException , XMLExcepts::Trans_Unrepresentable , tmpBuf , getEncodingName() , getMemoryManager() ); } *destPtr++ = 0x1A; srcPtr++; } // Set the chars eaten charsEaten = countToDo; // Return the bytes we transcoded return countToDo; }
// --------------------------------------------------------------------------- // XMLASCIITranscoder390: Implementation of the transcoder API // --------------------------------------------------------------------------- unsigned int XMLASCIITranscoder390::transcodeFrom( const XMLByte* const srcData , const unsigned int srcCount , XMLCh* const toFill , const unsigned int maxChars , unsigned int& bytesEaten , unsigned char* const charSizes) { // If debugging, make sure that the block size is legal #if defined(XERCES_DEBUG) checkBlockSize(maxChars); #endif // // Calculate the max chars we can do here. Its the lesser of the // max output chars and the source byte count. // const unsigned int countToDo = srcCount < maxChars ? srcCount : maxChars; // // Now loop through that many source chars and just cast each one // over to the XMLCh format. Check each source that its really a // valid ASCI char. // const XMLByte* srcPtr = srcData; XMLCh* outPtr = toFill; unsigned int countDone = countToDo; int flag = 0; // if flag is set to 1, an non-ASCII character is encountered TROTASC(srcPtr, toFill, &countDone, padding_temp.gFromTable, 0xFFFF, &flag); if (flag == 1 && countDone < 32){ XMLCh tmpBuf[17]; XMLString::binToText((unsigned int)*srcPtr, tmpBuf, 16, 16, getMemoryManager()); ThrowXMLwithMemMgr2 ( TranscodingException , XMLExcepts::Trans_Unrepresentable , tmpBuf , getEncodingName() , getMemoryManager() ); }//end if // Set the bytes we ate bytesEaten = countDone; // Set the char sizes to the fixed size memset(charSizes, 1, countDone); // Return the chars we transcoded return countDone; }
XMLSize_t MacOSTranscoder::transcodeTo(const XMLCh* const srcData , const XMLSize_t srcCount , XMLByte* const toFill , const XMLSize_t maxBytes , XMLSize_t& charsEaten , const UnRepOpts options) { // Reset the tec state (since we don't know that we're part of a // larger run of text). TECClearConverterContextInfo(mUnicodeToText); // Do the conversion ByteCount bytesConsumed = 0; ByteCount bytesProduced = 0; OSStatus status = TECConvertText(mUnicodeToText, (ConstTextPtr) srcData, srcCount * sizeof(XMLCh), // inputBufferLength &bytesConsumed, // actualInputLength (TextPtr) toFill, // outputBuffer maxBytes, // outputBufferLength &bytesProduced); // actualOutputLength // Ignorable error codes if( status == kTECUsedFallbacksStatus || status == kTECOutputBufferFullStatus || status == kTECPartialCharErr ) status = noErr; std::size_t charsConsumed = bytesConsumed / sizeof(XMLCh); // Deal with errors if (status != noErr) { if (status == kTECUnmappableElementErr && options == UnRep_Throw) { XMLCh tmpBuf[17]; XMLString::binToText(srcData[charsConsumed], tmpBuf, 16, 16); ThrowXML2 ( TranscodingException , XMLExcepts::Trans_Unrepresentable , tmpBuf , getEncodingName() ); } } charsEaten = charsConsumed; return bytesProduced; }
unsigned int CygwinTranscoder::transcodeTo(const XMLCh* const srcData , const unsigned int srcCount , XMLByte* const toFill , const unsigned int maxBytes , unsigned int& charsEaten , const UnRepOpts options) { // Get pointers to the start and end of each buffer const XMLCh* srcPtr = srcData; const XMLCh* srcEnd = srcData + srcCount; XMLByte* outPtr = toFill; XMLByte* outEnd = toFill + maxBytes; // // Now loop until we either get our max chars, or cannot get a whole // character from the input buffer. // // NOTE: We have to use a loop for this unfortunately because the // conversion API is too dumb to tell us how many chars it converted if // it couldn't do the whole source. // BOOL usedDef; while ((outPtr < outEnd) && (srcPtr < srcEnd)) { // // Do one char and see if it made it. const unsigned int bytesStored = ::WideCharToMultiByte ( fWinCP , WC_COMPOSITECHECK | WC_SEPCHARS , (LPCWSTR)srcPtr , 1 , (char*)outPtr , outEnd - outPtr , 0 , &usedDef ); // If we didn't transcode anything, then we are done if (!bytesStored) break; // // If the defaault char was used and the options indicate that // this isn't allowed, then throw. // if (usedDef && (options == UnRep_Throw)) { XMLCh tmpBuf[17]; XMLString::binToText((unsigned int)*srcPtr, tmpBuf, 16, 16, getMemoryManager()); ThrowXMLwithMemMgr2 ( TranscodingException , XMLExcepts::Trans_Unrepresentable , tmpBuf , getEncodingName() , getMemoryManager() ); } // Update our pointers outPtr += bytesStored; srcPtr++; } // Update the chars eaten charsEaten = srcPtr - srcData; // And return the bytes we stored return outPtr - toFill; }
// --------------------------------------------------------------------------- // CygwinTranscoder: The virtual transcoder API // --------------------------------------------------------------------------- unsigned int CygwinTranscoder::transcodeFrom( const XMLByte* const srcData , const unsigned int srcCount , XMLCh* const toFill , const unsigned int maxChars , unsigned int& bytesEaten , unsigned char* const charSizes) { // Get temp pointers to the in and out buffers, and the chars sizes one XMLCh* outPtr = toFill; const XMLByte* inPtr = srcData; unsigned char* sizesPtr = charSizes; // Calc end pointers for each of them XMLCh* outEnd = toFill + maxChars; const XMLByte* inEnd = srcData + srcCount; // // Now loop until we either get our max chars, or cannot get a whole // character from the input buffer. // bytesEaten = 0; while ((outPtr < outEnd) && (inPtr < inEnd)) { // // If we are looking at a leading byte of a multibyte sequence, // then we are going to eat 2 bytes, else 1. // const unsigned int toEat = ::IsDBCSLeadByteEx(fWinCP, *inPtr) ? 2 : 1; // Make sure a whol char is in the source if (inPtr + toEat > inEnd) break; // Try to translate this next char and check for an error const unsigned int converted = ::MultiByteToWideChar ( fWinCP , MB_PRECOMPOSED | MB_ERR_INVALID_CHARS , (const char*)inPtr , toEat , (LPWSTR)outPtr , 1 ); if (converted != 1) { if (toEat == 1) { XMLCh tmpBuf[17]; XMLString::binToText((unsigned int)(*inPtr), tmpBuf, 16, 16, getMemoryManager()); ThrowXMLwithMemMgr2 ( TranscodingException , XMLExcepts::Trans_BadSrcCP , tmpBuf , getEncodingName() , getMemoryManager() ); } else { ThrowXMLwithMemMgr(TranscodingException, XMLExcepts::Trans_BadSrcSeq, getMemoryManager()); } } // Update the char sizes array for this round *sizesPtr++ = toEat; // And update the bytes eaten count bytesEaten += toEat; // And update our in/out ptrs inPtr += toEat; outPtr++; } // Return the chars we output return (outPtr - toFill); }
unsigned int XMLUTF8Transcoder::transcodeTo( const XMLCh* const srcData , const unsigned int srcCount , XMLByte* const toFill , const unsigned int maxBytes , unsigned int& charsEaten , const UnRepOpts options) { // Watch for pathological scenario. Shouldn't happen, but... if (!srcCount || !maxBytes) return 0; // // Get pointers to our start and end points of the input and output // buffers. // const XMLCh* srcPtr = srcData; const XMLCh* srcEnd = srcPtr + srcCount; XMLByte* outPtr = toFill; XMLByte* outEnd = toFill + maxBytes; while (srcPtr < srcEnd) { // // Tentatively get the next char out. We have to get it into a // 32 bit value, because it could be a surrogate pair. // XMLUInt32 curVal = *srcPtr; // // If its a leading surrogate, then lets see if we have the trailing // available. If not, then give up now and leave it for next time. // unsigned int srcUsed = 1; if ((curVal >= 0xD800) && (curVal <= 0xDBFF)) { if (srcPtr + 1 >= srcEnd) break; // Create the composite surrogate pair curVal = ((curVal - 0xD800) << 10) + ((*(srcPtr + 1) - 0xDC00) + 0x10000); // And indicate that we ate another one srcUsed++; } // Figure out how many bytes we need unsigned int encodedBytes; if (curVal < 0x80) encodedBytes = 1; else if (curVal < 0x800) encodedBytes = 2; else if (curVal < 0x10000) encodedBytes = 3; else if (curVal < 0x110000) encodedBytes = 4; else { // If the options say to throw, then throw if (options == UnRep_Throw) { XMLCh tmpBuf[17]; XMLString::binToText(curVal, tmpBuf, 16, 16, getMemoryManager()); ThrowXMLwithMemMgr2 ( TranscodingException , XMLExcepts::Trans_Unrepresentable , tmpBuf , getEncodingName() , getMemoryManager() ); } // Else, use the replacement character *outPtr++ = chSpace; srcPtr += srcUsed; continue; } // // If we cannot fully get this char into the output buffer, // then leave it for the next time. // if (outPtr + encodedBytes > outEnd) break; // We can do it, so update the source index srcPtr += srcUsed; // // And spit out the bytes. We spit them out in reverse order // here, so bump up the output pointer and work down as we go. // outPtr += encodedBytes; switch(encodedBytes) { case 6 : *--outPtr = XMLByte((curVal | 0x80UL) & 0xBFUL); curVal >>= 6; case 5 : *--outPtr = XMLByte((curVal | 0x80UL) & 0xBFUL); curVal >>= 6; case 4 : *--outPtr = XMLByte((curVal | 0x80UL) & 0xBFUL); curVal >>= 6; case 3 : *--outPtr = XMLByte((curVal | 0x80UL) & 0xBFUL); curVal >>= 6; case 2 : *--outPtr = XMLByte((curVal | 0x80UL) & 0xBFUL); curVal >>= 6; case 1 : *--outPtr = XMLByte ( curVal | gFirstByteMark[encodedBytes] ); } // Add the encoded bytes back in again to indicate we've eaten them outPtr += encodedBytes; } // Fill in the chars we ate charsEaten = (srcPtr - srcData); // And return the bytes we filled in return (outPtr - toFill); }
extern int main(int argc, char* argv[]) { const char *encoding = NULL; const char *outputDir = NULL; /* NULL = no output directory, use current */ const char *inputDir = "."; int tostdout = 0; int prbom = 0; const char *pname; UResourceBundle *bundle = NULL; UErrorCode status = U_ZERO_ERROR; int32_t i = 0; UConverter *converter; const char* arg; /* Get the name of tool. */ pname = uprv_strrchr(*argv, U_FILE_SEP_CHAR); #if U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR if (!pname) { pname = uprv_strrchr(*argv, U_FILE_ALT_SEP_CHAR); } #endif if (!pname) { pname = *argv; } else { ++pname; } /* error handling, printing usage message */ argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options); /* error handling, printing usage message */ if(argc<0) { fprintf(stderr, "%s: error in command line argument \"%s\"\n", pname, argv[-argc]); } if(argc<0 || options[0].doesOccur || options[1].doesOccur) { fprintf(argc < 0 ? stderr : stdout, "%csage: %s [ -h, -?, --help ] [ -V, --version ]\n" " [ -v, --verbose ] [ -e, --encoding encoding ] [ --bom ]\n" " [ -t, --truncate [ size ] ]\n" " [ -s, --sourcedir source ] [ -d, --destdir destination ]\n" " [ -i, --icudatadir directory ] [ -c, --to-stdout ]\n" " [ -A, --suppressAliases]\n" " bundle ...\n", argc < 0 ? 'u' : 'U', pname); return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; } if(options[10].doesOccur) { fprintf(stderr, "%s version %s (ICU version %s).\n" "%s\n", pname, DERB_VERSION, U_ICU_VERSION, U_COPYRIGHT_STRING); return U_ZERO_ERROR; } if(options[2].doesOccur) { encoding = options[2].value; } if (options[3].doesOccur) { tostdout = 1; } if(options[4].doesOccur) { trunc = TRUE; if(options[4].value != NULL) { truncsize = atoi(options[4].value); /* user defined printable size */ } else { truncsize = DERB_DEFAULT_TRUNC; /* we'll use default omitting size */ } } else { trunc = FALSE; } if(options[5].doesOccur) { verbose = TRUE; } if (options[6].doesOccur) { outputDir = options[6].value; } if(options[7].doesOccur) { inputDir = options[7].value; /* we'll use users resources */ } if (options[8].doesOccur) { prbom = 1; } if (options[9].doesOccur) { u_setDataDirectory(options[9].value); } if (options[11].doesOccur) { suppressAliases = TRUE; } converter = ucnv_open(encoding, &status); if (U_FAILURE(status)) { fprintf(stderr, "%s: couldn't create %s converter for encoding\n", pname, encoding ? encoding : ucnv_getDefaultName()); return 2; } ucnv_setFromUCallBack(converter, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C, 0, 0, &status); if (U_FAILURE(status)) { fprintf(stderr, "%s: couldn't configure converter for encoding\n", pname); return 3; } defaultConverter = ucnv_open(0, &status); if (U_FAILURE(status)) { fprintf(stderr, "%s: couldn't create %s converter for encoding\n", ucnv_getDefaultName(), pname); return 2; } for (i = 1; i < argc; ++i) { static const UChar sp[] = { 0x0020 }; /* " " */ char infile[4096]; /* XXX Sloppy. */ char locale[64]; const char *thename = 0, *p, *q; UBool fromICUData = FALSE; arg = getLongPathname(argv[i]); if (verbose) { printf("processing bundle \"%s\"\n", argv[i]); } p = uprv_strrchr(arg, U_FILE_SEP_CHAR); #if U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR if (p == NULL) { p = uprv_strrchr(arg, U_FILE_ALT_SEP_CHAR); } #endif if (!p) { p = arg; } else { p++; } q = uprv_strrchr(p, '.'); if (!q) { for (q = p; *q; ++q) ; } uprv_strncpy(locale, p, q - p); locale[q - p] = 0; if (!(fromICUData = !uprv_strcmp(inputDir, "-"))) { UBool absfilename = *arg == U_FILE_SEP_CHAR; #ifdef U_WINDOWS if (!absfilename) { absfilename = (uprv_strlen(arg) > 2 && isalpha(arg[0]) && arg[1] == ':' && arg[2] == U_FILE_SEP_CHAR); } #endif if (absfilename) { thename = arg; } else { q = uprv_strrchr(arg, U_FILE_SEP_CHAR); #if U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR if (q == NULL) { q = uprv_strrchr(arg, U_FILE_ALT_SEP_CHAR); } #endif uprv_strcpy(infile, inputDir); if(q != NULL) { uprv_strcat(infile, U_FILE_SEP_STRING); strncat(infile, arg, q-arg); } thename = infile; } } status = U_ZERO_ERROR; if (thename) { bundle = ures_openDirect(thename, locale, &status); } else { bundle = ures_open(fromICUData ? 0 : inputDir, locale, &status); } if (status == U_ZERO_ERROR) { FILE *out; const char *filename = 0; const char *ext = 0; if (!locale || !tostdout) { filename = uprv_strrchr(arg, U_FILE_SEP_CHAR); #if U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR if (!filename) { filename = uprv_strrchr(arg, U_FILE_ALT_SEP_CHAR); } #endif if (!filename) { filename = arg; } else { ++filename; } ext = uprv_strrchr(arg, '.'); if (!ext) { ext = filename + uprv_strlen(filename); } } if (tostdout) { out = stdout; #if defined(U_WINDOWS) || defined(U_CYGWIN) if (setmode(fileno(out), O_BINARY) == -1) { fprintf(stderr, "%s: couldn't set standard output to binary mode\n", pname); return 4; } #endif } else { char thefile[4096], *tp; int32_t len; if (outputDir) { uprv_strcpy(thefile, outputDir); uprv_strcat(thefile, U_FILE_SEP_STRING); } else { *thefile = 0; } uprv_strcat(thefile, filename); tp = thefile + uprv_strlen(thefile); len = (int32_t)uprv_strlen(ext); if (len) { tp -= len - 1; } else { *tp++ = '.'; } uprv_strcpy(tp, "txt"); out = fopen(thefile, "w"); if (!out) { fprintf(stderr, "%s: couldn't create %s\n", pname, thefile); return 4; } } if (prbom) { /* XXX: Should be done only for UTFs */ static const UChar bom[] = { 0xFEFF }; printString(out, converter, bom, (int32_t)(sizeof(bom)/sizeof(*bom))); } printCString(out, converter, "// -*- Coding: ", -1); printCString(out, converter, encoding ? encoding : getEncodingName(ucnv_getDefaultName()), -1); printCString(out, converter, "; -*-\n//\n", -1); printCString(out, converter, "// This file was dumped by derb(8) from ", -1); if (thename) { printCString(out, converter, thename, -1); } else if (fromICUData) { printCString(out, converter, "the ICU internal ", -1); printCString(out, converter, locale, -1); printCString(out, converter, " locale", -1); } printCString(out, converter, "\n// derb(8) by Vladimir Weinstein and Yves Arrouye\n\n", -1); if (locale) { printCString(out, converter, locale, -1); } else { printCString(out, converter, filename, (int32_t)(ext - filename)); printString(out, converter, sp, (int32_t)(sizeof(sp)/sizeof(*sp))); } printOutBundle(out, converter, bundle, 0, pname, &status); if (out != stdout) { fclose(out); } } else { reportError(pname, &status, "opening resource file"); } ures_close(bundle); } ucnv_close(defaultConverter); ucnv_close(converter); return 0; }
unsigned int XML256TableTranscoder390::transcodeTo( const XMLCh* const srcData , const unsigned int srcCount , XMLByte* const toFill , const unsigned int maxBytes , unsigned int& charsEaten , const UnRepOpts options) { // If debugging, make sure that the block size is legal #if defined(XERCES_DEBUG) checkBlockSize(maxBytes); #endif // // Calculate the max chars we can do here. Its the lesser of the // max output chars and the number of chars in the source. // const unsigned int countToDo = srcCount < maxBytes ? srcCount : maxBytes; // // Loop through the count we have to do and map each char via the // lookup table. // const XMLCh* srcPtr = srcData; const XMLCh* endPtr = (srcPtr + countToDo); XMLByte* outPtr = toFill; XMLByte nextOut; while (srcPtr < endPtr) { // // Get the next src char out to a temp, then do a binary search // of the 'to' table for this entry. // if ((nextOut = xlatOneTo(*srcPtr))) { *outPtr++ = nextOut; srcPtr++; continue; } // // Its not representable so, according to the options, either // throw or use the replacement. // if (options == UnRep_Throw) { XMLCh tmpBuf[17]; XMLString::binToText((unsigned int)*srcPtr, tmpBuf, 16, 16, getMemoryManager()); ThrowXMLwithMemMgr2 ( TranscodingException , XMLExcepts::Trans_Unrepresentable , tmpBuf , getEncodingName() , getMemoryManager() ); } // Eat the source char and use the replacement char srcPtr++; *outPtr++ = 0x3F; } // Set the chars eaten charsEaten = countToDo; // Return the bytes we transcoded return countToDo; }
unsigned int ICUTranscoder::transcodeTo( const XMLCh* const srcData , const unsigned int srcCount , XMLByte* const toFill , const unsigned int maxBytes , unsigned int& charsEaten , const UnRepOpts options) { // // Get a pointer to the buffer to transcode. If UChar and XMLCh are // the same size here, then use the original. Else, create a temp // one and put a janitor on it. // const UChar* srcPtr; UChar* tmpBufPtr = 0; if (sizeof(XMLCh) == sizeof(UChar)) { srcPtr = (const UChar*)srcData; } else { tmpBufPtr = convertToUChar(srcData, srcCount, getMemoryManager()); srcPtr = tmpBufPtr; } ArrayJanitor<UChar> janTmpBuf(tmpBufPtr, getMemoryManager()); // // Set the appropriate callback so that it will either fail or use // the rep char. Remember the old one so we can put it back. // UErrorCode err = U_ZERO_ERROR; UConverterFromUCallback oldCB = NULL; #if (U_ICU_VERSION_MAJOR_NUM < 2) void* orgContent; #else const void* orgContent; #endif ucnv_setFromUCallBack ( fConverter , (options == UnRep_Throw) ? UCNV_FROM_U_CALLBACK_STOP : UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL , &oldCB , &orgContent , &err ); // // Ok, lets transcode as many chars as we we can in one shot. The // ICU API gives enough info not to have to do this one char by char. // XMLByte* startTarget = toFill; const UChar* startSrc = srcPtr; err = U_ZERO_ERROR; ucnv_fromUnicode ( fConverter , (char**)&startTarget , (char*)(startTarget + maxBytes) , &startSrc , srcPtr + srcCount , 0 , false , &err ); // Rememember the status before we possibly overite the error code const bool res = (err == U_ZERO_ERROR); // Put the old handler back err = U_ZERO_ERROR; UConverterFromUCallback orgAction = NULL; ucnv_setFromUCallBack(fConverter, oldCB, NULL, &orgAction, &orgContent, &err); if (!res) { XMLCh tmpBuf[17]; XMLString::binToText((unsigned int)*startSrc, tmpBuf, 16, 16, getMemoryManager()); ThrowXMLwithMemMgr2 ( TranscodingException , XMLExcepts::Trans_Unrepresentable , tmpBuf , getEncodingName() , getMemoryManager() ); } // Fill in the chars we ate from the input charsEaten = startSrc - srcPtr; // Return the chars we stored return startTarget - toFill; }
// --------------------------------------------------------------------------- // ICUTranscoder: The virtual transcoder API // --------------------------------------------------------------------------- unsigned int ICUTranscoder::transcodeFrom(const XMLByte* const srcData , const unsigned int srcCount , XMLCh* const toFill , const unsigned int maxChars , unsigned int& bytesEaten , unsigned char* const charSizes) { // If debugging, insure the block size is legal #if defined(XERCES_DEBUG) checkBlockSize(maxChars); #endif // Set up pointers to the start and end of the source buffer const XMLByte* startSrc = srcData; const XMLByte* endSrc = srcData + srcCount; // // And now do the target buffer. This works differently according to // whether XMLCh and UChar are the same size or not. // UChar* startTarget; if (sizeof(XMLCh) == sizeof(UChar)) startTarget = (UChar*)toFill; else startTarget = (UChar*) getMemoryManager()->allocate ( maxChars * sizeof(UChar) );//new UChar[maxChars]; UChar* orgTarget = startTarget; // // Transoode the buffer. Buffer overflow errors are normal, occuring // when the raw input buffer holds more characters than will fit in // the Unicode output buffer. // UErrorCode err = U_ZERO_ERROR; ucnv_toUnicode ( fConverter , &startTarget , startTarget + maxChars , (const char**)&startSrc , (const char*)endSrc , (fFixed ? 0 : (int32_t*)fSrcOffsets) , false , &err ); if ((err != U_ZERO_ERROR) && (err != U_BUFFER_OVERFLOW_ERROR)) { if (orgTarget != (UChar*)toFill) getMemoryManager()->deallocate(orgTarget);//delete [] orgTarget; if (fFixed) { XMLCh tmpBuf[17]; XMLString::binToText((unsigned int)(*startTarget), tmpBuf, 16, 16, getMemoryManager()); ThrowXMLwithMemMgr2 ( TranscodingException , XMLExcepts::Trans_BadSrcCP , tmpBuf , getEncodingName() , getMemoryManager() ); } else { ThrowXMLwithMemMgr(TranscodingException, XMLExcepts::Trans_BadSrcSeq, getMemoryManager()); } } // Calculate the bytes eaten and store in caller's param bytesEaten = startSrc - srcData; // And the characters decoded const unsigned int charsDecoded = startTarget - orgTarget; // // Translate the array of char offsets into an array of character // sizes, which is what the transcoder interface semantics requires. // If its fixed, then we can optimize it. // if (fFixed) { const unsigned char fillSize = (unsigned char)ucnv_getMaxCharSize(fConverter); memset(charSizes, fillSize, maxChars); } else { // // We have to convert the series of offsets into a series of // sizes. If just one char was decoded, then its the total bytes // eaten. Otherwise, do a loop and subtract out each element from // its previous element. // if (charsDecoded == 1) { charSizes[0] = (unsigned char)bytesEaten; } else { // ICU does not return an extra element to allow us to figure // out the last char size, so we have to compute it from the // total bytes used. unsigned int index; for (index = 0; index < charsDecoded - 1; index++) { charSizes[index] = (unsigned char)(fSrcOffsets[index + 1] - fSrcOffsets[index]); } if( charsDecoded > 0 ) { charSizes[charsDecoded - 1] = (unsigned char)(bytesEaten - fSrcOffsets[charsDecoded - 1]); } } } // // If XMLCh and UChar are not the same size, then we need to copy over // the temp buffer to the new one. // if (sizeof(UChar) != sizeof(XMLCh)) { XMLCh* outPtr = toFill; startTarget = orgTarget; for (unsigned int index = 0; index < charsDecoded; index++) *outPtr++ = XMLCh(*startTarget++); // And delete the temp buffer getMemoryManager()->deallocate(orgTarget);//delete [] orgTarget; } // Return the chars we put into the target buffer return charsDecoded; }
extern int main(int argc, char* argv[]) { const char *encoding = NULL; const char *outputDir = NULL; /* NULL = no output directory, use current */ const char *inputDir = "."; int tostdout = 0; int prbom = 0; const char *pname; UResourceBundle *bundle = NULL; int32_t i = 0; const char* arg; /* Get the name of tool. */ pname = uprv_strrchr(*argv, U_FILE_SEP_CHAR); #if U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR if (!pname) { pname = uprv_strrchr(*argv, U_FILE_ALT_SEP_CHAR); } #endif if (!pname) { pname = *argv; } else { ++pname; } /* error handling, printing usage message */ argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options); /* error handling, printing usage message */ if(argc<0) { fprintf(stderr, "%s: error in command line argument \"%s\"\n", pname, argv[-argc]); } if(argc<0 || options[0].doesOccur || options[1].doesOccur) { fprintf(argc < 0 ? stderr : stdout, "%csage: %s [ -h, -?, --help ] [ -V, --version ]\n" " [ -v, --verbose ] [ -e, --encoding encoding ] [ --bom ]\n" " [ -t, --truncate [ size ] ]\n" " [ -s, --sourcedir source ] [ -d, --destdir destination ]\n" " [ -i, --icudatadir directory ] [ -c, --to-stdout ]\n" " [ -A, --suppressAliases]\n" " bundle ...\n", argc < 0 ? 'u' : 'U', pname); return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; } if(options[10].doesOccur) { fprintf(stderr, "%s version %s (ICU version %s).\n" "%s\n", pname, DERB_VERSION, U_ICU_VERSION, U_COPYRIGHT_STRING); return U_ZERO_ERROR; } if(options[2].doesOccur) { encoding = options[2].value; } if (options[3].doesOccur) { if(options[2].doesOccur) { fprintf(stderr, "%s: Error: don't specify an encoding (-e) when writing to stdout (-c).\n", pname); return 3; } tostdout = 1; } if(options[4].doesOccur) { opt_truncate = TRUE; if(options[4].value != NULL) { truncsize = atoi(options[4].value); /* user defined printable size */ } else { truncsize = DERB_DEFAULT_TRUNC; /* we'll use default omitting size */ } } else { opt_truncate = FALSE; } if(options[5].doesOccur) { verbose = TRUE; } if (options[6].doesOccur) { outputDir = options[6].value; } if(options[7].doesOccur) { inputDir = options[7].value; /* we'll use users resources */ } if (options[8].doesOccur) { prbom = 1; } if (options[9].doesOccur) { u_setDataDirectory(options[9].value); } if (options[11].doesOccur) { suppressAliases = TRUE; } fflush(stderr); // use ustderr now. ustderr = u_finit(stderr, NULL, NULL); for (i = 1; i < argc; ++i) { static const UChar sp[] = { 0x0020 }; /* " " */ arg = getLongPathname(argv[i]); if (verbose) { u_fprintf(ustderr, "processing bundle \"%s\"\n", argv[i]); } icu::CharString locale; UErrorCode status = U_ZERO_ERROR; { const char *p = findBasename(arg); const char *q = uprv_strrchr(p, '.'); if (q == NULL) { locale.append(p, status); } else { locale.append(p, (int32_t)(q - p), status); } } if (U_FAILURE(status)) { return status; } icu::CharString infile; const char *thename = NULL; UBool fromICUData = !uprv_strcmp(inputDir, "-"); if (!fromICUData) { UBool absfilename = *arg == U_FILE_SEP_CHAR; #if U_PLATFORM_HAS_WIN32_API if (!absfilename) { absfilename = (uprv_strlen(arg) > 2 && isalpha(arg[0]) && arg[1] == ':' && arg[2] == U_FILE_SEP_CHAR); } #endif if (absfilename) { thename = arg; } else { const char *q = uprv_strrchr(arg, U_FILE_SEP_CHAR); #if U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR if (q == NULL) { q = uprv_strrchr(arg, U_FILE_ALT_SEP_CHAR); } #endif infile.append(inputDir, status); if(q != NULL) { infile.appendPathPart(icu::StringPiece(arg, (int32_t)(q - arg)), status); } if (U_FAILURE(status)) { return status; } thename = infile.data(); } } if (thename) { bundle = ures_openDirect(thename, locale.data(), &status); } else { bundle = ures_open(fromICUData ? 0 : inputDir, locale.data(), &status); } if (U_SUCCESS(status)) { UFILE *out = NULL; const char *filename = 0; const char *ext = 0; if (locale.isEmpty() || !tostdout) { filename = findBasename(arg); ext = uprv_strrchr(filename, '.'); if (!ext) { ext = uprv_strchr(filename, 0); } } if (tostdout) { out = u_get_stdout(); } else { icu::CharString thefile; if (outputDir) { thefile.append(outputDir, status); } thefile.appendPathPart(filename, status); if (*ext) { thefile.truncate(thefile.length() - (int32_t)uprv_strlen(ext)); } thefile.append(".txt", status); if (U_FAILURE(status)) { return status; } out = u_fopen(thefile.data(), "w", NULL, encoding); if (!out) { u_fprintf(ustderr, "%s: couldn't create %s\n", pname, thefile.data()); u_fclose(ustderr); return 4; } } // now, set the callback. ucnv_setFromUCallBack(u_fgetConverter(out), UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C, 0, 0, &status); if (U_FAILURE(status)) { u_fprintf(ustderr, "%s: couldn't configure converter for encoding\n", pname); u_fclose(ustderr); if(!tostdout) { u_fclose(out); } return 3; } if (prbom) { /* XXX: Should be done only for UTFs */ u_fputc(0xFEFF, out); } u_fprintf(out, "// -*- Coding: %s; -*-\n//\n", encoding ? encoding : getEncodingName(ucnv_getDefaultName())); u_fprintf(out, "// This file was dumped by derb(8) from "); if (thename) { u_fprintf(out, "%s", thename); } else if (fromICUData) { u_fprintf(out, "the ICU internal %s locale", locale.data()); } u_fprintf(out, "\n// derb(8) by Vladimir Weinstein and Yves Arrouye\n\n"); if (!locale.isEmpty()) { u_fprintf(out, "%s", locale.data()); } else { u_fprintf(out, "%.*s%.*S", (int32_t)(ext - filename), filename, UPRV_LENGTHOF(sp), sp); } printOutBundle(out, bundle, 0, pname, &status); if (!tostdout) { u_fclose(out); } } else { reportError(pname, &status, "opening resource file"); } ures_close(bundle); } return 0; }
std::string ImageFormat::toString() const { std::stringstream stream; stream << getWidth() << "x" << getHeight() << " pixels, " << getEncodingName() << " encoding"; return stream.str(); }