U_CAPI char* U_EXPORT2 u_austrncpy(char *s1, const UChar *ucs2, int32_t n) { char *target = s1; UErrorCode err = U_ZERO_ERROR; UConverter *cnv = u_getDefaultConverter(&err); if(U_SUCCESS(err) && cnv != NULL) { ucnv_reset(cnv); ucnv_fromUnicode(cnv, &target, s1+n, &ucs2, ucs2+u_ustrnlen(ucs2, n), NULL, TRUE, &err); ucnv_reset(cnv); /* be good citizens */ u_releaseDefaultConverter(cnv); if(U_FAILURE(err) && (err != U_BUFFER_OVERFLOW_ERROR) ) { *s1 = 0; /* failure */ } if(target < (s1+n)) { /* U_BUFFER_OVERFLOW_ERROR isn't an err, just means no termination will happen. */ *target = 0; /* terminate */ } } else { *s1 = 0; } return s1; }
static UChar* toUChar(const char *src, void **freeHook) { /* Structure of the memory that we allocate on the heap */ int32_t numUChars; int32_t destSize; UChar stackBuf[2000 + sizeof(void *)/sizeof(UChar)]; StringStruct *dest; UConverter *cnv; UErrorCode status = U_ZERO_ERROR; if (src == NULL) { return NULL; }; cnv = ucnv_open(NULL, &status); if(U_FAILURE(status) || cnv == NULL) { return NULL; } ucnv_reset(cnv); numUChars = ucnv_toUChars(cnv, stackBuf, 2000, src, -1, &status); destSize = (numUChars+1) * sizeof(UChar) + sizeof(struct StringStruct); dest = (StringStruct *)malloc(destSize); if (dest != NULL) { if (status == U_BUFFER_OVERFLOW_ERROR || status == U_STRING_NOT_TERMINATED_WARNING) { ucnv_toUChars(cnv, dest->str, numUChars+1, src, -1, &status); } else if (status == U_ZERO_ERROR) { u_strcpy(dest->str, stackBuf); } else { free(dest); dest = NULL; } } ucnv_reset(cnv); /* be good citizens */ ucnv_close(cnv); if (dest == NULL) { return NULL; } dest->link = (StringStruct*)(*freeHook); *freeHook = dest; return dest->str; }
void FStringConverter::ConvertString(const icu::UnicodeString& Source, const int32 SourceStartIndex, const int32 SourceLen, FString& Destination) { if (Source.length() > 0) { UErrorCode ICUStatus = U_ZERO_ERROR; ucnv_reset(ICUConverter); // Get the internal buffer of the string, we're going to use it as scratch space TArray<TCHAR>& InternalStringBuffer = Destination.GetCharArray(); // Work out the maximum size required and resize the buffer so it can hold enough data const int32_t DestinationCapacityBytes = UCNV_GET_MAX_BYTES_FOR_STRING(SourceLen, ucnv_getMaxCharSize(ICUConverter)); const int32 DestinationCapacityTCHARs = DestinationCapacityBytes / sizeof(TCHAR); InternalStringBuffer.SetNumUninitialized(DestinationCapacityTCHARs); // Perform the conversion into the string buffer, and then null terminate the FString and size it back down to the correct size const int32_t DestinationSizeBytes = ucnv_fromUChars(ICUConverter, reinterpret_cast<char*>(InternalStringBuffer.GetData()), DestinationCapacityBytes, Source.getBuffer() + SourceStartIndex, SourceLen, &ICUStatus); const int32 DestinationSizeTCHARs = DestinationSizeBytes / sizeof(TCHAR); InternalStringBuffer[DestinationSizeTCHARs] = 0; InternalStringBuffer.SetNum(DestinationSizeTCHARs + 1, /*bAllowShrinking*/false); // the array size includes null check(U_SUCCESS(ICUStatus)); } else { Destination.Empty(); } }
void FStringConverter::ConvertString(const TCHAR* Source, const int32 SourceStartIndex, const int32 SourceLen, icu::UnicodeString& Destination, const bool ShouldNullTerminate) { if (SourceLen > 0) { UErrorCode ICUStatus = U_ZERO_ERROR; ucnv_reset(ICUConverter); // Get the internal buffer of the string, we're going to use it as scratch space const int32_t DestinationCapacityUChars = SourceLen * 2; UChar* InternalStringBuffer = Destination.getBuffer(DestinationCapacityUChars); // Perform the conversion into the string buffer const int32_t SourceSizeBytes = SourceLen * sizeof(TCHAR); const int32_t DestinationLength = ucnv_toUChars(ICUConverter, InternalStringBuffer, DestinationCapacityUChars, reinterpret_cast<const char*>(Source + SourceStartIndex), SourceSizeBytes, &ICUStatus); // Optionally null terminate the string if (ShouldNullTerminate) { InternalStringBuffer[DestinationLength] = 0; } // Size it back down to the correct size and release our lock on the string buffer Destination.releaseBuffer(DestinationLength); check(U_SUCCESS(ICUStatus)); } else { Destination.remove(); } }
void TextCodecICU::releaseICUConverter() const { if (m_converterICU) { UConverter*& cachedConverter = cachedConverterICU(); if (cachedConverter) ucnv_close(cachedConverter); ucnv_reset(m_converterICU); cachedConverter = m_converterICU; m_converterICU = 0; } }
U_CAPI void u_frewind(UFILE *file) { u_fflush(file); ucnv_reset(file->fConverter); if (file->fFile) { rewind(file->fFile); file->str.fLimit = file->fUCBuffer; file->str.fPos = file->fUCBuffer; } else { file->str.fPos = file->str.fBuffer; } }
U_CAPI void U_EXPORT2 u_releaseDefaultConverter(UConverter *converter) { if(gDefaultConverter == NULL) { if (converter != NULL) { ucnv_reset(converter); } umtx_lock(NULL); if(gDefaultConverter == NULL) { gDefaultConverter = converter; converter = NULL; } umtx_unlock(NULL); } if(converter != NULL) { ucnv_close(converter); } }
/* fill the uchar buffer */ static UCHARBUF* ucbuf_fillucbuf( UCHARBUF* buf,UErrorCode* error){ UChar* pTarget=NULL; UChar* target=NULL; const char* source=NULL; char carr[MAX_IN_BUF] = {'\0'}; char* cbuf = carr; int32_t inputRead=0; int32_t outputWritten=0; int32_t offset=0; const char* sourceLimit =NULL; int32_t cbufSize=0; pTarget = buf->buffer; /* check if we arrived here without exhausting the buffer*/ if(buf->currentPos<buf->bufLimit){ offset = (int32_t)(buf->bufLimit-buf->currentPos); memmove(buf->buffer,buf->currentPos,offset* sizeof(UChar)); } #if DEBUG memset(pTarget+offset,0xff,sizeof(UChar)*(MAX_IN_BUF-offset)); #endif if(buf->isBuffered){ cbufSize = MAX_IN_BUF; /* read the file */ inputRead=T_FileStream_read(buf->in,cbuf,cbufSize-offset); buf->remaining-=inputRead; }else{ cbufSize = T_FileStream_size(buf->in); cbuf = (char*)uprv_malloc(cbufSize); if (cbuf == NULL) { *error = U_MEMORY_ALLOCATION_ERROR; return NULL; } inputRead= T_FileStream_read(buf->in,cbuf,cbufSize); buf->remaining-=inputRead; } /* just to be sure...*/ if ( 0 == inputRead ) buf->remaining = 0; target=pTarget; /* convert the bytes */ if(buf->conv){ /* set the callback to stop */ UConverterToUCallback toUOldAction ; void* toUOldContext; void* toUNewContext=NULL; ucnv_setToUCallBack(buf->conv, UCNV_TO_U_CALLBACK_STOP, toUNewContext, &toUOldAction, (const void**)&toUOldContext, error); /* since state is saved in the converter we add offset to source*/ target = pTarget+offset; source = cbuf; sourceLimit = source + inputRead; ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset), &source,sourceLimit,NULL, (UBool)(buf->remaining==0),error); if(U_FAILURE(*error)){ char context[CONTEXT_LEN+1]; char preContext[CONTEXT_LEN+1]; char postContext[CONTEXT_LEN+1]; int8_t len = CONTEXT_LEN; int32_t start=0; int32_t stop =0; int32_t pos =0; /* use erro1 to preserve the error code */ UErrorCode error1 =U_ZERO_ERROR; if( buf->showWarning==TRUE){ fprintf(stderr,"\n###WARNING: Encountered abnormal bytes while" " converting input stream to target encoding: %s\n", u_errorName(*error)); } /* now get the context chars */ ucnv_getInvalidChars(buf->conv,context,&len,&error1); context[len]= 0 ; /* null terminate the buffer */ pos = (int32_t)(source - cbuf - len); /* for pre-context */ start = (pos <=CONTEXT_LEN)? 0 : (pos - (CONTEXT_LEN-1)); stop = pos-len; memcpy(preContext,cbuf+start,stop-start); /* null terminate the buffer */ preContext[stop-start] = 0; /* for post-context */ start = pos+len; stop = (int32_t)(((pos+CONTEXT_LEN)<= (sourceLimit-cbuf) )? (pos+(CONTEXT_LEN-1)) : (sourceLimit-cbuf)); memcpy(postContext,source,stop-start); /* null terminate the buffer */ postContext[stop-start] = 0; if(buf->showWarning ==TRUE){ /* print out the context */ fprintf(stderr,"\tPre-context: %s\n",preContext); fprintf(stderr,"\tContext: %s\n",context); fprintf(stderr,"\tPost-context: %s\n", postContext); } /* reset the converter */ ucnv_reset(buf->conv); /* set the call back to substitute * and restart conversion */ ucnv_setToUCallBack(buf->conv, UCNV_TO_U_CALLBACK_SUBSTITUTE, toUNewContext, &toUOldAction, (const void**)&toUOldContext, &error1); /* reset source and target start positions */ target = pTarget+offset; source = cbuf; /* re convert */ ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset), &source,sourceLimit,NULL, (UBool)(buf->remaining==0),&error1); } outputWritten = (int32_t)(target - pTarget); #if DEBUG { int i; target = pTarget; for(i=0;i<numRead;i++){ /* printf("%c", (char)(*target++));*/ } } #endif }else{ u_charsToUChars(cbuf,target+offset,inputRead); outputWritten=((buf->remaining>cbufSize)? cbufSize:inputRead+offset); } buf->currentPos = pTarget; buf->bufLimit=pTarget+outputWritten; *buf->bufLimit=0; /*NUL terminate*/ if(cbuf!=carr){ uprv_free(cbuf); } return buf; }
int BinaryGrammar::readBinaryGrammar_10043(std::istream& input) { if (!input) { u_fprintf(ux_stderr, "Error: Input is null - cannot read from nothing!\n"); CG3Quit(1); } if (!grammar) { u_fprintf(ux_stderr, "Error: No grammar provided - cannot continue!\n"); CG3Quit(1); } uint32_t fields = 0; uint32_t u32tmp = 0; int32_t i32tmp = 0; uint8_t u8tmp = 0; UErrorCode err = U_ZERO_ERROR; UConverter* conv = ucnv_open("UTF-8", &err); if (fread_throw(&cbuffers[0][0], 1, 4, input) != 4) { std::cerr << "Error: Error reading first 4 bytes from grammar!" << std::endl; CG3Quit(1); } if (cbuffers[0][0] != 'C' || cbuffers[0][1] != 'G' || cbuffers[0][2] != '3' || cbuffers[0][3] != 'B') { u_fprintf(ux_stderr, "Error: Grammar does not begin with magic bytes - cannot load as binary!\n"); CG3Quit(1); } fread_throw(&u32tmp, sizeof(uint32_t), 1, input); u32tmp = (uint32_t)ntohl(u32tmp); if (u32tmp < 10043) { u_fprintf(ux_stderr, "Error: Grammar revision is %u, but this loader requires %u or later!\n", u32tmp, 10043); CG3Quit(1); } grammar->is_binary = true; fread_throw(&u32tmp, sizeof(uint32_t), 1, input); fields = (uint32_t)ntohl(u32tmp); grammar->has_dep = (fields & (1 << 0)) != 0; grammar->sub_readings_ltr = (fields & (1 << 2)) != 0; grammar->has_relations = (fields & (1 << 13)) != 0; if (fields & (1 << 1)) { ucnv_reset(conv); fread_throw(&u32tmp, sizeof(uint32_t), 1, input); u32tmp = (uint32_t)ntohl(u32tmp); fread_throw(&cbuffers[0][0], 1, u32tmp, input); i32tmp = ucnv_toUChars(conv, &grammar->mapping_prefix, 1, &cbuffers[0][0], u32tmp, &err); } // Keep track of which sets that the varstring tags used; we can't just assign them as sets are not loaded yet typedef std::map<uint32_t, uint32Vector> tag_varsets_t; tag_varsets_t tag_varsets; u32tmp = 0; if (fields & (1 << 3)) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); u32tmp = (uint32_t)ntohl(u32tmp); } uint32_t num_single_tags = u32tmp; grammar->single_tags_list.resize(num_single_tags); for (uint32_t i = 0; i < num_single_tags; i++) { Tag* t = grammar->allocateTag(); t->type |= T_GRAMMAR; uint32_t fields = 0; fread_throw(&u32tmp, sizeof(uint32_t), 1, input); fields = (uint32_t)ntohl(u32tmp); if (fields & (1 << 0)) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); t->number = (uint32_t)ntohl(u32tmp); } if (fields & (1 << 1)) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); t->hash = (uint32_t)ntohl(u32tmp); } if (fields & (1 << 2)) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); t->plain_hash = (uint32_t)ntohl(u32tmp); } if (fields & (1 << 3)) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); t->seed = (uint32_t)ntohl(u32tmp); } if (fields & (1 << 4)) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); t->type = (uint32_t)ntohl(u32tmp); } if (fields & (1 << 5)) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); t->comparison_hash = (uint32_t)ntohl(u32tmp); } if (fields & (1 << 6)) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); t->comparison_op = (C_OPS)ntohl(u32tmp); } if (fields & (1 << 7)) { fread_throw(&i32tmp, sizeof(int32_t), 1, input); t->comparison_val = (int32_t)ntohl(i32tmp); if (t->comparison_val <= std::numeric_limits<int32_t>::min()) { t->comparison_val = NUMERIC_MIN; } if (t->comparison_val >= std::numeric_limits<int32_t>::max()) { t->comparison_val = NUMERIC_MAX; } } if (fields & (1 << 8)) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); u32tmp = (uint32_t)ntohl(u32tmp); if (u32tmp) { ucnv_reset(conv); fread_throw(&cbuffers[0][0], 1, u32tmp, input); i32tmp = ucnv_toUChars(conv, &gbuffers[0][0], CG3_BUFFER_SIZE - 1, &cbuffers[0][0], u32tmp, &err); t->tag = &gbuffers[0][0]; } } if (fields & (1 << 9)) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); u32tmp = (uint32_t)ntohl(u32tmp); if (u32tmp) { ucnv_reset(conv); fread_throw(&cbuffers[0][0], 1, u32tmp, input); i32tmp = ucnv_toUChars(conv, &gbuffers[0][0], CG3_BUFFER_SIZE - 1, &cbuffers[0][0], u32tmp, &err); UParseError pe; UErrorCode status = U_ZERO_ERROR; if (t->type & T_CASE_INSENSITIVE) { t->regexp = uregex_open(&gbuffers[0][0], i32tmp, UREGEX_CASE_INSENSITIVE, &pe, &status); } else { t->regexp = uregex_open(&gbuffers[0][0], i32tmp, 0, &pe, &status); } if (status != U_ZERO_ERROR) { u_fprintf(ux_stderr, "Error: uregex_open returned %s trying to parse tag %S - cannot continue!\n", u_errorName(status), t->tag.c_str()); CG3Quit(1); } } } if (fields & (1 << 10)) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); uint32_t num = (uint32_t)ntohl(u32tmp); t->allocateVsSets(); t->vs_sets->reserve(num); tag_varsets[t->number].reserve(num); for (size_t i = 0; i < num; ++i) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); u32tmp = (uint32_t)ntohl(u32tmp); tag_varsets[t->number].push_back(u32tmp); } } if (fields & (1 << 11)) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); uint32_t num = (uint32_t)ntohl(u32tmp); t->allocateVsNames(); t->vs_names->reserve(num); for (size_t i = 0; i < num; ++i) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); u32tmp = (uint32_t)ntohl(u32tmp); if (u32tmp) { ucnv_reset(conv); fread_throw(&cbuffers[0][0], 1, u32tmp, input); i32tmp = ucnv_toUChars(conv, &gbuffers[0][0], CG3_BUFFER_SIZE - 1, &cbuffers[0][0], u32tmp, &err); t->vs_names->push_back(&gbuffers[0][0]); } } } grammar->single_tags[t->hash] = t; grammar->single_tags_list[t->number] = t; if (t->tag.size() == 1 && t->tag[0] == '*') { grammar->tag_any = t->hash; } } u32tmp = 0; if (fields & (1 << 5)) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); u32tmp = (uint32_t)ntohl(u32tmp); } uint32_t num_pref_targets = u32tmp; for (uint32_t i = 0; i < num_pref_targets; i++) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); u32tmp = (uint32_t)ntohl(u32tmp); grammar->preferred_targets.push_back(u32tmp); } u32tmp = 0; if (fields & (1 << 6)) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); u32tmp = (uint32_t)ntohl(u32tmp); } uint32_t num_par_pairs = u32tmp; for (uint32_t i = 0; i < num_par_pairs; i++) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); uint32_t left = (uint32_t)ntohl(u32tmp); fread_throw(&u32tmp, sizeof(uint32_t), 1, input); uint32_t right = (uint32_t)ntohl(u32tmp); grammar->parentheses[left] = right; grammar->parentheses_reverse[right] = left; } u32tmp = 0; if (fields & (1 << 7)) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); u32tmp = (uint32_t)ntohl(u32tmp); } uint32_t num_par_anchors = u32tmp; for (uint32_t i = 0; i < num_par_anchors; i++) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); uint32_t left = (uint32_t)ntohl(u32tmp); fread_throw(&u32tmp, sizeof(uint32_t), 1, input); uint32_t right = (uint32_t)ntohl(u32tmp); grammar->anchors[left] = right; } u32tmp = 0; if (fields & (1 << 8)) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); u32tmp = (uint32_t)ntohl(u32tmp); } uint32_t num_sets = u32tmp; grammar->sets_list.resize(num_sets); for (uint32_t i = 0; i < num_sets; i++) { Set* s = grammar->allocateSet(); uint32_t fields = 0; fread_throw(&u32tmp, sizeof(uint32_t), 1, input); fields = (uint32_t)ntohl(u32tmp); if (fields & (1 << 0)) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); s->number = (uint32_t)ntohl(u32tmp); } if (fields & (1 << 1)) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); s->hash = (uint32_t)ntohl(u32tmp); } if (fields & (1 << 2)) { fread_throw(&u8tmp, sizeof(uint8_t), 1, input); s->type = u8tmp; } if (fields & (1 << 3)) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); u32tmp = (uint32_t)ntohl(u32tmp); if (u32tmp) { trie_unserialize(s->trie, input, *grammar, u32tmp); } fread_throw(&u32tmp, sizeof(uint32_t), 1, input); u32tmp = (uint32_t)ntohl(u32tmp); if (u32tmp) { trie_unserialize(s->trie_special, input, *grammar, u32tmp); } } if (fields & (1 << 4)) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); u32tmp = (uint32_t)ntohl(u32tmp); uint32_t num_set_ops = u32tmp; for (uint32_t j = 0; j < num_set_ops; j++) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); u32tmp = (uint32_t)ntohl(u32tmp); s->set_ops.push_back(u32tmp); } } if (fields & (1 << 5)) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); u32tmp = (uint32_t)ntohl(u32tmp); uint32_t num_sets = u32tmp; for (uint32_t j = 0; j < num_sets; j++) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); u32tmp = (uint32_t)ntohl(u32tmp); s->sets.push_back(u32tmp); } } if (fields & (1 << 6)) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); u32tmp = (uint32_t)ntohl(u32tmp); if (u32tmp) { ucnv_reset(conv); fread_throw(&cbuffers[0][0], 1, u32tmp, input); i32tmp = ucnv_toUChars(conv, &gbuffers[0][0], CG3_BUFFER_SIZE - 1, &cbuffers[0][0], u32tmp, &err); s->setName(&gbuffers[0][0]); } } grammar->sets_by_contents[s->hash] = s; grammar->sets_list[s->number] = s; } // Actually assign sets to the varstring tags now that sets are loaded for (auto iter : tag_varsets) { Tag* t = grammar->single_tags_list[iter.first]; for (auto uit : iter.second) { Set* s = grammar->sets_list[uit]; t->vs_sets->push_back(s); } } if (fields & (1 << 9)) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); u32tmp = (uint32_t)ntohl(u32tmp); grammar->delimiters = grammar->sets_by_contents.find(u32tmp)->second; } if (fields & (1 << 10)) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); u32tmp = (uint32_t)ntohl(u32tmp); grammar->soft_delimiters = grammar->sets_by_contents.find(u32tmp)->second; } u32tmp = 0; if (fields & (1 << 11)) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); u32tmp = (uint32_t)ntohl(u32tmp); } uint32_t num_contexts = u32tmp; contexts_list.resize(num_contexts); for (uint32_t i = 0; i < num_contexts; i++) { ContextualTest* t = readContextualTest_10043(input); grammar->contexts[t->hash] = t; contexts_list[i] = t; } u32tmp = 0; if (fields & (1 << 12)) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); u32tmp = (uint32_t)ntohl(u32tmp); } uint32_t num_rules = u32tmp; grammar->rule_by_number.resize(num_rules); for (uint32_t i = 0; i < num_rules; i++) { Rule* r = grammar->allocateRule(); uint32_t fields = 0; fread_throw(&u32tmp, sizeof(uint32_t), 1, input); fields = (uint32_t)ntohl(u32tmp); if (fields & (1 << 0)) { fread_throw(&i32tmp, sizeof(int32_t), 1, input); r->section = (int32_t)ntohl(i32tmp); } if (fields & (1 << 1)) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); r->type = (KEYWORDS)ntohl(u32tmp); } if (fields & (1 << 2)) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); r->line = (uint32_t)ntohl(u32tmp); } if (fields & (1 << 3)) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); r->flags = (uint32_t)ntohl(u32tmp); } if (fields & (1 << 4)) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); u32tmp = (uint32_t)ntohl(u32tmp); if (u32tmp) { ucnv_reset(conv); fread_throw(&cbuffers[0][0], 1, u32tmp, input); i32tmp = ucnv_toUChars(conv, &gbuffers[0][0], CG3_BUFFER_SIZE - 1, &cbuffers[0][0], u32tmp, &err); r->setName(&gbuffers[0][0]); } } if (fields & (1 << 5)) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); r->target = (uint32_t)ntohl(u32tmp); } if (fields & (1 << 6)) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); r->wordform = grammar->single_tags_list[(uint32_t)ntohl(u32tmp)]; } if (fields & (1 << 7)) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); r->varname = (uint32_t)ntohl(u32tmp); } if (fields & (1 << 8)) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); r->varvalue = (uint32_t)ntohl(u32tmp); } if (fields & (1 << 9)) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); u32tmp = (uint32_t)ntohl(u32tmp); int32_t v = u32tmp; if (u32tmp & (1 << 31)) { u32tmp &= ~(1 << 31); v = u32tmp; v = -v; } r->sub_reading = v; } if (fields & (1 << 10)) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); r->childset1 = (uint32_t)ntohl(u32tmp); } if (fields & (1 << 11)) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); r->childset2 = (uint32_t)ntohl(u32tmp); } if (fields & (1 << 12)) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); r->maplist = grammar->sets_list[(uint32_t)ntohl(u32tmp)]; } if (fields & (1 << 13)) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); r->sublist = grammar->sets_list[(uint32_t)ntohl(u32tmp)]; } if (fields & (1 << 14)) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); r->number = (uint32_t)ntohl(u32tmp); } fread_throw(&u32tmp, sizeof(uint32_t), 1, input); u32tmp = (uint32_t)ntohl(u32tmp); if (u32tmp) { r->dep_target = contexts_list[u32tmp - 1]; } fread_throw(&u32tmp, sizeof(uint32_t), 1, input); u32tmp = (uint32_t)ntohl(u32tmp); uint32_t num_dep_tests = u32tmp; for (uint32_t j = 0; j < num_dep_tests; j++) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); u32tmp = (uint32_t)ntohl(u32tmp); ContextualTest* t = contexts_list[u32tmp - 1]; r->addContextualTest(t, r->dep_tests); } fread_throw(&u32tmp, sizeof(uint32_t), 1, input); u32tmp = (uint32_t)ntohl(u32tmp); uint32_t num_tests = u32tmp; for (uint32_t j = 0; j < num_tests; j++) { fread_throw(&u32tmp, sizeof(uint32_t), 1, input); u32tmp = (uint32_t)ntohl(u32tmp); ContextualTest* t = contexts_list[u32tmp - 1]; r->addContextualTest(t, r->tests); } grammar->rule_by_number[r->number] = r; } // Bind the named templates to where they are used for (auto it : deferred_tmpls) { auto tmt = templates.find(it.second); it.first->tmpl = tmt->second; } ucnv_close(conv); // Create the dummy set grammar->allocateDummySet(); grammar->is_binary = false; return 0; }
i8_q* QStringUnicode::ToBytes(const EQTextEncoding &eEncoding, unsigned int &uOutputLength) const { i8_q* pOutputBytes = null_q; uOutputLength = 0; const unsigned int CHARACTERS_COUNT = m_strString.countChar32(); // It does not include the final null character if(CHARACTERS_COUNT > 0) { UErrorCode errorCode = U_ZERO_ERROR; UConverter* pConverter = QStringUnicode::GetConverter(eEncoding); const unsigned int CODE_UNITS_COUNT = m_strString.length(); // It does not include the final null character // Depending on whether the string is already null-terminated or not, a null terminator will be added at the end // of the resultant array of bytes const unsigned int ADD_NULL_TERMINATION = m_strString.char32At(CHARACTERS_COUNT - 1) == 0 ? 0 : 1; // By default, it is assigned as if it was to be encoded in ASCII or ISO 8859-1 (8-bits per character) int32_t nRequiredLengthBytes = CHARACTERS_COUNT + ADD_NULL_TERMINATION; // Output size calculation for Unicode encoding forms switch(eEncoding) { case EQTextEncoding::E_UTF8: // It is not possible to know in advance how much memory the UTF-8 will require // (each character could be represented by 1, 2, 3 or 4 8-bits code units) so we reserve the maximum it would need nRequiredLengthBytes = sizeof(i32_q) * (CHARACTERS_COUNT + ADD_NULL_TERMINATION); break; case EQTextEncoding::E_UTF16: // We already know the number of 16 bits code units. A BOM character is added at the beginning nRequiredLengthBytes = sizeof(i16_q) * (CODE_UNITS_COUNT + 1 + ADD_NULL_TERMINATION); break; case EQTextEncoding::E_UTF16BE: case EQTextEncoding::E_UTF16LE: // We already know the number of 16 bits code units nRequiredLengthBytes = sizeof(i16_q) * (CODE_UNITS_COUNT + ADD_NULL_TERMINATION); break; case EQTextEncoding::E_UTF32: // The width of UTF32 characters is always 32 bits. A BOM character is added at the beginning nRequiredLengthBytes = sizeof(i32_q) * (CHARACTERS_COUNT + 1 + ADD_NULL_TERMINATION); break; case EQTextEncoding::E_UTF32BE: case EQTextEncoding::E_UTF32LE: // The width of UTF32 characters is always 32 bits nRequiredLengthBytes = sizeof(i32_q) * (CHARACTERS_COUNT + ADD_NULL_TERMINATION); break; } // Conversion from native encoding (UTF16) to input encoding const UChar* pBuffer = m_strString.getBuffer(); pOutputBytes = new char[nRequiredLengthBytes]; ucnv_reset(pConverter); uOutputLength = ucnv_fromUChars(pConverter, pOutputBytes, nRequiredLengthBytes, pBuffer, CODE_UNITS_COUNT, &errorCode); // If it was necessary to add a null terminator... if(ADD_NULL_TERMINATION == 1) { // The last character has to be set to zero (ICU adds only 1 byte at the end as the null terminator) // The last character has to be added to the output length switch(eEncoding) { case EQTextEncoding::E_ASCII: case EQTextEncoding::E_ISO88591: case EQTextEncoding::E_UTF8: // 8 bits character uOutputLength += sizeof(i8_q); memset(&pOutputBytes[uOutputLength - sizeof(i8_q)], 0, sizeof(i8_q)); break; case EQTextEncoding::E_UTF16: case EQTextEncoding::E_UTF16BE: case EQTextEncoding::E_UTF16LE: // 16 bits character uOutputLength += sizeof(i16_q); memset(&pOutputBytes[uOutputLength - sizeof(i16_q)], 0, sizeof(i16_q)); break; case EQTextEncoding::E_UTF32: case EQTextEncoding::E_UTF32BE: case EQTextEncoding::E_UTF32LE: // 32 bits character uOutputLength += sizeof(i32_q); memset(&pOutputBytes[uOutputLength - sizeof(i32_q)], 0, sizeof(i32_q)); break; } } } return pOutputBytes; }
void charsetConverter_icu::convert (utility::inputStream& in, utility::outputStream& out, status* st) { UErrorCode err = U_ZERO_ERROR; ucnv_reset(m_from); ucnv_reset(m_to); if (st) new (st) status(); // From buffers byte_t cpInBuffer[16]; // stream data put here const size_t outSize = ucnv_getMinCharSize(m_from) * sizeof(cpInBuffer) * sizeof(UChar); std::vector <UChar> uOutBuffer(outSize); // Unicode chars end up here // To buffers // converted (char) data end up here const size_t cpOutBufferSz = ucnv_getMaxCharSize(m_to) * outSize; std::vector <char> cpOutBuffer(cpOutBufferSz); // Tell ICU what to do when encountering an illegal byte sequence if (m_options.silentlyReplaceInvalidSequences) { // Set replacement chars for when converting from Unicode to codepage icu::UnicodeString substString(m_options.invalidSequence.c_str()); ucnv_setSubstString(m_to, substString.getTerminatedBuffer(), -1, &err); if (U_FAILURE(err)) throw exceptions::charset_conv_error("[ICU] Error when setting substitution string."); } else { // Tell ICU top stop (and return an error) on illegal byte sequences ucnv_setToUCallBack (m_from, UCNV_TO_U_CALLBACK_STOP, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &err); if (U_FAILURE(err)) throw exceptions::charset_conv_error("[ICU] Error when setting ToU callback."); ucnv_setFromUCallBack (m_to, UCNV_FROM_U_CALLBACK_STOP, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &err); if (U_FAILURE(err)) throw exceptions::charset_conv_error("[ICU] Error when setting FromU callback."); } // Input data available while (!in.eof()) { // Read input data into buffer size_t inLength = in.read(cpInBuffer, sizeof(cpInBuffer)); // Beginning of read data const char* source = reinterpret_cast <const char*>(&cpInBuffer[0]); const char* sourceLimit = source + inLength; // end + 1 UBool flush = in.eof(); // is this last run? UErrorCode toErr; // Loop until all source has been processed do { // Set up target pointers UChar* target = &uOutBuffer[0]; UChar* targetLimit = &target[0] + outSize; toErr = U_ZERO_ERROR; ucnv_toUnicode(m_from, &target, targetLimit, &source, sourceLimit, NULL, flush, &toErr); if (st) st->inputBytesRead += (source - reinterpret_cast <const char*>(&cpInBuffer[0])); if (toErr != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(toErr)) { if (toErr == U_INVALID_CHAR_FOUND || toErr == U_TRUNCATED_CHAR_FOUND || toErr == U_ILLEGAL_CHAR_FOUND) { // Error will be thrown later (*) } else { throw exceptions::charset_conv_error("[ICU] Error converting to Unicode from " + m_source.getName()); } } // The Unicode source is the buffer just written and the limit // is where the previous conversion stopped (target is moved in the conversion) const UChar* uSource = &uOutBuffer[0]; UChar* uSourceLimit = &target[0]; UErrorCode fromErr; // Loop until converted chars are fully written do { char* cpTarget = &cpOutBuffer[0]; const char* cpTargetLimit = &cpOutBuffer[0] + cpOutBufferSz; fromErr = U_ZERO_ERROR; // Write converted bytes (Unicode) to destination codepage ucnv_fromUnicode(m_to, &cpTarget, cpTargetLimit, &uSource, uSourceLimit, NULL, flush, &fromErr); if (st) { // Decrement input bytes count by the number of input bytes in error char errBytes[16]; int8_t errBytesLen = sizeof(errBytes); UErrorCode errBytesErr = U_ZERO_ERROR; ucnv_getInvalidChars(m_from, errBytes, &errBytesLen, &errBytesErr); st->inputBytesRead -= errBytesLen; st->outputBytesWritten += cpTarget - &cpOutBuffer[0]; } // (*) If an error occurred while converting from input charset, throw it now if (toErr == U_INVALID_CHAR_FOUND || toErr == U_TRUNCATED_CHAR_FOUND || toErr == U_ILLEGAL_CHAR_FOUND) { throw exceptions::illegal_byte_sequence_for_charset(); } if (fromErr != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(fromErr)) { if (fromErr == U_INVALID_CHAR_FOUND || fromErr == U_TRUNCATED_CHAR_FOUND || fromErr == U_ILLEGAL_CHAR_FOUND) { throw exceptions::illegal_byte_sequence_for_charset(); } else { throw exceptions::charset_conv_error("[ICU] Error converting from Unicode to " + m_dest.getName()); } } // Write to destination stream out.write(&cpOutBuffer[0], (cpTarget - &cpOutBuffer[0])); } while (fromErr == U_BUFFER_OVERFLOW_ERROR); } while (toErr == U_BUFFER_OVERFLOW_ERROR); } }