/** * Formats a int64_t number into a base 10 string representation, and NULL terminates it. * @param number The number to format * @param outputStr The string to output to. Must be at least MAX_DIGITS+2 in length (21), * to hold the longest int64_t value. * @return the number of digits written, not including the sign. */ static int32_t formatBase10(int64_t number, char *outputStr) { // The number is output backwards, starting with the LSD. // Fill the buffer from the far end. After the number is complete, // slide the string contents to the front. const int32_t MAX_IDX = MAX_DIGITS+2; int32_t destIdx = MAX_IDX; outputStr[--destIdx] = 0; int64_t n = number; if (number < 0) { // Negative numbers are slightly larger than a postive outputStr[--destIdx] = (char)(-(n % 10) + kZero); n /= -10; } do { outputStr[--destIdx] = (char)(n % 10 + kZero); n /= 10; } while (n > 0); if (number < 0) { outputStr[--destIdx] = '-'; } // Slide the number to the start of the output str U_ASSERT(destIdx >= 0); int32_t length = MAX_IDX - destIdx; uprv_memmove(outputStr, outputStr+MAX_IDX-length, length); return length; }
U_CAPI UChar * U_EXPORT2 u_memmove(UChar *dest, const UChar *src, int32_t count) { if(count > 0) { uprv_memmove(dest, src, count*U_SIZEOF_UCHAR); } return dest; }
/** * Append a tag to a buffer, adding the separator if necessary. The buffer * must be large enough to contain the resulting tag plus any separator * necessary. The tag must not be a zero-length string. * * @param tag The tag to add. * @param tagLength The length of the tag. * @param buffer The output buffer. * @param bufferLength The length of the output buffer. This is an input/ouput parameter. **/ static void U_CALLCONV appendTag( const char* tag, int32_t tagLength, char* buffer, int32_t* bufferLength) { if (*bufferLength > 0) { buffer[*bufferLength] = '_'; ++(*bufferLength); } uprv_memmove( &buffer[*bufferLength], tag, tagLength); *bufferLength += tagLength; }
static void doInsertionSort(char *array, int32_t length, int32_t itemSize, UComparator *cmp, const void *context, void *pv) { int32_t j; for(j=1; j<length; ++j) { char *item=array+j*itemSize; int32_t insertionPoint=uprv_stableBinarySearch(array, j, item, itemSize, cmp, context); if(insertionPoint<0) { insertionPoint=~insertionPoint; } else { ++insertionPoint; /* one past the last equal item */ } if(insertionPoint<j) { char *dest=array+insertionPoint*itemSize; uprv_memcpy(pv, item, itemSize); /* v=array[j] */ uprv_memmove(dest+itemSize, dest, (j-insertionPoint)*itemSize); uprv_memcpy(dest, pv, itemSize); /* array[insertionPoint]=v */ } } }
U_CAPI const char * U_EXPORT2 getLongPathname(const char *pathname) { #ifdef WIN32 /* anticipate problems with "short" pathnames */ static WIN32_FIND_DATA info; HANDLE file=FindFirstFile(pathname, &info); if(file!=INVALID_HANDLE_VALUE) { if(info.cAlternateFileName[0]!=0) { /* this file has a short name, get and use the long one */ const char *basename=findBasename(pathname); if(basename!=pathname) { /* prepend the long filename with the original path */ uprv_memmove(info.cFileName+(basename-pathname), info.cFileName, uprv_strlen(info.cFileName)+1); uprv_memcpy(info.cFileName, pathname, basename-pathname); } pathname=info.cFileName; } FindClose(file); } #endif return pathname; }
/* private function used for buffering input */ void ufile_fill_uchar_buffer(UFILE *f) { UErrorCode status; const char *mySource; const char *mySourceEnd; UChar *myTarget; int32_t bufferSize; int32_t maxCPBytes; int32_t bytesRead; int32_t availLength; int32_t dataSize; char charBuffer[UFILE_CHARBUFFER_SIZE]; u_localized_string *str; if (f->fFile == NULL) { /* There is nothing to do. It's a string. */ return; } str = &f->str; dataSize = (int32_t)(str->fLimit - str->fPos); if (f->fFileno == 0 && dataSize > 0) { /* Don't read from stdin too many times. There is still some data. */ return; } /* shift the buffer if it isn't empty */ if(dataSize != 0) { uprv_memmove(f->fUCBuffer, str->fPos, dataSize * sizeof(UChar)); } /* record how much buffer space is available */ availLength = UFILE_UCHARBUFFER_SIZE - dataSize; /* Determine the # of codepage bytes needed to fill our UChar buffer */ /* weiv: if converter is NULL, we use invariant converter with charwidth = 1)*/ maxCPBytes = availLength / (f->fConverter!=NULL?(2*ucnv_getMinCharSize(f->fConverter)):1); /* Read in the data to convert */ if (f->fFileno == 0) { /* Special case. Read from stdin one line at a time. */ char *retStr = fgets(charBuffer, ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), f->fFile); bytesRead = (int32_t)(retStr ? uprv_strlen(charBuffer) : 0); } else { /* A normal file */ bytesRead = (int32_t)fread(charBuffer, sizeof(char), ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), f->fFile); } /* Set up conversion parameters */ status = U_ZERO_ERROR; mySource = charBuffer; mySourceEnd = charBuffer + bytesRead; myTarget = f->fUCBuffer + dataSize; bufferSize = UFILE_UCHARBUFFER_SIZE; if(f->fConverter != NULL) { /* We have a valid converter */ /* Perform the conversion */ ucnv_toUnicode(f->fConverter, &myTarget, f->fUCBuffer + bufferSize, &mySource, mySourceEnd, NULL, (UBool)(feof(f->fFile) != 0), &status); } else { /*weiv: do the invariant conversion */ u_charsToUChars(mySource, myTarget, bytesRead); myTarget += bytesRead; } /* update the pointers into our array */ str->fPos = str->fBuffer; str->fLimit = myTarget; }
/** * Create a tag string from the supplied parameters. The lang, script and region * parameters may be NULL pointers. If they are, their corresponding length parameters * must be less than or equal to 0. * * If any of the language, script or region parameters are empty, and the alternateTags * parameter is not NULL, it will be parsed for potential language, script and region tags * to be used when constructing the new tag. If the alternateTags parameter is NULL, or * it contains no language tag, the default tag for the unknown language is used. * * If the length of the new string exceeds the capacity of the output buffer, * the function copies as many bytes to the output buffer as it can, and returns * the error U_BUFFER_OVERFLOW_ERROR. * * If an illegal argument is provided, the function returns the error * U_ILLEGAL_ARGUMENT_ERROR. * * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if * the tag string fits in the output buffer, but the null terminator doesn't. * * @param lang The language tag to use. * @param langLength The length of the language tag. * @param script The script tag to use. * @param scriptLength The length of the script tag. * @param region The region tag to use. * @param regionLength The length of the region tag. * @param trailing Any trailing data to append to the new tag. * @param trailingLength The length of the trailing data. * @param alternateTags A string containing any alternate tags. * @param tag The output buffer. * @param tagCapacity The capacity of the output buffer. * @param err A pointer to a UErrorCode for error reporting. * @return The length of the tag string, which may be greater than tagCapacity, or -1 on error. **/ static int32_t U_CALLCONV createTagStringWithAlternates( const char* lang, int32_t langLength, const char* script, int32_t scriptLength, const char* region, int32_t regionLength, const char* trailing, int32_t trailingLength, const char* alternateTags, char* tag, int32_t tagCapacity, UErrorCode* err) { if (U_FAILURE(*err)) { goto error; } else if (tag == NULL || tagCapacity <= 0 || langLength >= ULOC_LANG_CAPACITY || scriptLength >= ULOC_SCRIPT_CAPACITY || regionLength >= ULOC_COUNTRY_CAPACITY) { goto error; } else { /** * ULOC_FULLNAME_CAPACITY will provide enough capacity * that we can build a string that contains the language, * script and region code without worrying about overrunning * the user-supplied buffer. **/ char tagBuffer[ULOC_FULLNAME_CAPACITY]; int32_t tagLength = 0; int32_t capacityRemaining = tagCapacity; UBool regionAppended = FALSE; if (langLength > 0) { appendTag( lang, langLength, tagBuffer, &tagLength); } else if (alternateTags == NULL) { /* * Append the value for an unknown language, if * we found no language. */ appendTag( unknownLanguage, (int32_t)uprv_strlen(unknownLanguage), tagBuffer, &tagLength); } else { /* * Parse the alternateTags string for the language. */ char alternateLang[ULOC_LANG_CAPACITY]; int32_t alternateLangLength = sizeof(alternateLang); alternateLangLength = uloc_getLanguage( alternateTags, alternateLang, alternateLangLength, err); if(U_FAILURE(*err) || alternateLangLength >= ULOC_LANG_CAPACITY) { goto error; } else if (alternateLangLength == 0) { /* * Append the value for an unknown language, if * we found no language. */ appendTag( unknownLanguage, (int32_t)uprv_strlen(unknownLanguage), tagBuffer, &tagLength); } else { appendTag( alternateLang, alternateLangLength, tagBuffer, &tagLength); } } if (scriptLength > 0) { appendTag( script, scriptLength, tagBuffer, &tagLength); } else if (alternateTags != NULL) { /* * Parse the alternateTags string for the script. */ char alternateScript[ULOC_SCRIPT_CAPACITY]; const int32_t alternateScriptLength = uloc_getScript( alternateTags, alternateScript, sizeof(alternateScript), err); if (U_FAILURE(*err) || alternateScriptLength >= ULOC_SCRIPT_CAPACITY) { goto error; } else if (alternateScriptLength > 0) { appendTag( alternateScript, alternateScriptLength, tagBuffer, &tagLength); } } if (regionLength > 0) { appendTag( region, regionLength, tagBuffer, &tagLength); regionAppended = TRUE; } else if (alternateTags != NULL) { /* * Parse the alternateTags string for the region. */ char alternateRegion[ULOC_COUNTRY_CAPACITY]; const int32_t alternateRegionLength = uloc_getCountry( alternateTags, alternateRegion, sizeof(alternateRegion), err); if (U_FAILURE(*err) || alternateRegionLength >= ULOC_COUNTRY_CAPACITY) { goto error; } else if (alternateRegionLength > 0) { appendTag( alternateRegion, alternateRegionLength, tagBuffer, &tagLength); regionAppended = TRUE; } } { const int32_t toCopy = tagLength >= tagCapacity ? tagCapacity : tagLength; /** * Copy the partial tag from our internal buffer to the supplied * target. **/ uprv_memcpy( tag, tagBuffer, toCopy); capacityRemaining -= toCopy; } if (trailingLength > 0) { if (*trailing != '@' && capacityRemaining > 0) { tag[tagLength++] = '_'; --capacityRemaining; if (capacityRemaining > 0 && !regionAppended) { /* extra separator is required */ tag[tagLength++] = '_'; --capacityRemaining; } } if (capacityRemaining > 0) { /* * Copy the trailing data into the supplied buffer. Use uprv_memmove, since we * don't know if the user-supplied buffers overlap. */ const int32_t toCopy = trailingLength >= capacityRemaining ? capacityRemaining : trailingLength; uprv_memmove( &tag[tagLength], trailing, toCopy); } } tagLength += trailingLength; return u_terminateChars( tag, tagCapacity, tagLength, err); } error: /** * An overflow indicates the locale ID passed in * is ill-formed. If we got here, and there was * no previous error, it's an implicit overflow. **/ if (*err == U_BUFFER_OVERFLOW_ERROR || U_SUCCESS(*err)) { *err = U_ILLEGAL_ARGUMENT_ERROR; } return -1; }
static void storeMappingData(){ int32_t pos = -1; const UHashElement* element = NULL; ValueStruct* value = NULL; int32_t codepoint = 0; int32_t elementCount = 0; int32_t writtenElementCount = 0; int32_t mappingLength = 1; /* minimum mapping length */ int32_t oldMappingLength = 0; uint16_t trieWord =0; int32_t limitIndex = 0; if (hashTable == NULL) { return; } elementCount = uhash_count(hashTable); /*initialize the mapping data */ mappingData = (uint16_t*) uprv_calloc(mappingDataCapacity, U_SIZEOF_UCHAR); while(writtenElementCount < elementCount){ while( (element = uhash_nextElement(hashTable, &pos))!=NULL){ codepoint = element->key.integer; value = (ValueStruct*)element->value.pointer; /* store the start of indexes */ if(oldMappingLength != mappingLength){ /* Assume that index[] is used according to the enums defined */ if(oldMappingLength <=_SPREP_MAX_INDEX_TOP_LENGTH){ indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION+mappingLength] = currentIndex; } if(oldMappingLength <= _SPREP_MAX_INDEX_TOP_LENGTH && mappingLength == _SPREP_MAX_INDEX_TOP_LENGTH +1){ limitIndex = currentIndex; } oldMappingLength = mappingLength; } if(value->length == mappingLength){ uint32_t savedTrieWord = 0; trieWord = currentIndex << 2; /* turn on the 2nd bit to signal that the following bits contain an index */ trieWord += 0x02; if(trieWord > _SPREP_TYPE_THRESHOLD){ fprintf(stderr,"trieWord cannot contain value greater than 0x%04X.\n",_SPREP_TYPE_THRESHOLD); exit(U_ILLEGAL_CHAR_FOUND); } /* figure out if the code point has type already stored */ savedTrieWord= utrie_get32(sprepTrie,codepoint,NULL); if(savedTrieWord!=0){ if((savedTrieWord- _SPREP_TYPE_THRESHOLD) == USPREP_PROHIBITED){ /* turn on the first bit in trie word */ trieWord += 0x01; }else{ /* * the codepoint has value something other than prohibited * and a mapping .. error! */ fprintf(stderr,"Type for codepoint \\U%08X already set!.\n", (int)codepoint); exit(U_ILLEGAL_ARGUMENT_ERROR); } } /* now set the value in the trie */ if(!utrie_set32(sprepTrie,codepoint,trieWord)){ fprintf(stderr,"Could not set the value for code point.\n"); exit(U_ILLEGAL_ARGUMENT_ERROR); } /* written the trie word for the codepoint... increment the count*/ writtenElementCount++; /* sanity check are we exceeding the max number allowed */ if(currentIndex+value->length+1 > _SPREP_MAX_INDEX_VALUE){ fprintf(stderr, "Too many entries in the mapping table %i. Maximum allowed is %i\n", currentIndex+value->length, _SPREP_MAX_INDEX_VALUE); exit(U_INDEX_OUTOFBOUNDS_ERROR); } /* copy the mapping data */ if(currentIndex+value->length+1 <= mappingDataCapacity){ /* write the length */ if(mappingLength > _SPREP_MAX_INDEX_TOP_LENGTH ){ /* the cast here is safe since we donot expect the length to be > 65535 */ mappingData[currentIndex++] = (uint16_t) mappingLength; } /* copy the contents to mappindData array */ uprv_memmove(mappingData+currentIndex, value->mapping, value->length*U_SIZEOF_UCHAR); currentIndex += value->length; }else{ /* realloc */ UChar* newMappingData = (uint16_t*) uprv_malloc(U_SIZEOF_UCHAR * mappingDataCapacity*2); if(newMappingData == NULL){ fprintf(stderr, "Could not realloc the mapping data!\n"); exit(U_MEMORY_ALLOCATION_ERROR); } uprv_memmove(newMappingData, mappingData, U_SIZEOF_UCHAR * mappingDataCapacity); mappingDataCapacity *= 2; uprv_free(mappingData); mappingData = newMappingData; /* write the length */ if(mappingLength > _SPREP_MAX_INDEX_TOP_LENGTH ){ /* the cast here is safe since we donot expect the length to be > 65535 */ mappingData[currentIndex++] = (uint16_t) mappingLength; } /* continue copying */ uprv_memmove(mappingData+currentIndex, value->mapping, value->length*U_SIZEOF_UCHAR); currentIndex += value->length; } } } mappingLength++; pos = -1; } /* set the last length for range check */ if(mappingLength <= _SPREP_MAX_INDEX_TOP_LENGTH){ indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION+mappingLength] = currentIndex+1; }else{ indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START] = limitIndex; } }
U_CFUNC int32_t idnaref_toUnicode(const UChar* src, int32_t srcLength, UChar* dest, int32_t destCapacity, int32_t options, UParseError* parseError, UErrorCode* status){ if(status == NULL || U_FAILURE(*status)){ return 0; } if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ *status = U_ILLEGAL_ARGUMENT_ERROR; return 0; } UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stack[MAX_LABEL_BUFFER_SIZE]; //initialize pointers to stack buffers UChar *b1 = b1Stack, *b2 = b2Stack, *b1Prime=NULL, *b3=b3Stack; int32_t b1Len, b2Len, b1PrimeLen, b3Len, b1Capacity = MAX_LABEL_BUFFER_SIZE, b2Capacity = MAX_LABEL_BUFFER_SIZE, b3Capacity = MAX_LABEL_BUFFER_SIZE, reqLength=0; // UParseError parseError; NamePrepTransform* prep = TestIDNA::getInstance(*status); b1Len = 0; UBool* caseFlags = NULL; //get the options UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0); UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0); UBool srcIsASCII = TRUE; UBool srcIsLDH = TRUE; int32_t failPos =0; if(U_FAILURE(*status)){ goto CLEANUP; } // step 1: find out if all the codepoints in src are ASCII if(srcLength==-1){ srcLength = 0; for(;src[srcLength]!=0;){ if(src[srcLength]> 0x7f){ srcIsASCII = FALSE; }if(prep->isLDHChar(src[srcLength])==FALSE){ // here we do not assemble surrogates // since we know that LDH code points // are in the ASCII range only srcIsLDH = FALSE; failPos = srcLength; } srcLength++; } }else{ for(int32_t j=0; j<srcLength; j++){ if(src[j]> 0x7f){ srcIsASCII = FALSE; }else if(prep->isLDHChar(src[j])==FALSE){ // here we do not assemble surrogates // since we know that LDH code points // are in the ASCII range only srcIsLDH = FALSE; failPos = j; } } } if(srcIsASCII == FALSE){ // step 2: process the string b1Len = prep->process(src,srcLength,b1,b1Capacity,allowUnassigned, parseError, *status); if(*status == U_BUFFER_OVERFLOW_ERROR){ // redo processing of string /* we do not have enough room so grow the buffer*/ b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); if(b1==NULL){ *status = U_MEMORY_ALLOCATION_ERROR; goto CLEANUP; } *status = U_ZERO_ERROR; // reset error b1Len = prep->process(src,srcLength,b1, b1Len,allowUnassigned, parseError, *status); } //bail out on error if(U_FAILURE(*status)){ goto CLEANUP; } }else{ // copy everything to b1 if(srcLength < b1Capacity){ uprv_memmove(b1,src, srcLength * U_SIZEOF_UCHAR); }else{ /* we do not have enough room so grow the buffer*/ b1 = (UChar*) uprv_malloc(srcLength * U_SIZEOF_UCHAR); if(b1==NULL){ *status = U_MEMORY_ALLOCATION_ERROR; goto CLEANUP; } uprv_memmove(b1,src, srcLength * U_SIZEOF_UCHAR); } b1Len = srcLength; } //step 3: verify ACE Prefix if(startsWithPrefix(src,srcLength)){ //step 4: Remove the ACE Prefix b1Prime = b1 + ACE_PREFIX_LENGTH; b1PrimeLen = b1Len - ACE_PREFIX_LENGTH; //step 5: Decode using punycode b2Len = convertFromPuny(b1Prime,b1PrimeLen, b2, b2Capacity, *status); //b2Len = u_strFromPunycode(b2, b2Capacity,b1Prime,b1PrimeLen, caseFlags, status); if(*status == U_BUFFER_OVERFLOW_ERROR){ // redo processing of string /* we do not have enough room so grow the buffer*/ b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); if(b2==NULL){ *status = U_MEMORY_ALLOCATION_ERROR; goto CLEANUP; } *status = U_ZERO_ERROR; // reset error b2Len = convertFromPuny(b1Prime,b1PrimeLen, b2, b2Len, *status); //b2Len = u_strFromPunycode(b2, b2Len,b1Prime,b1PrimeLen,caseFlags, status); } //step 6:Apply toASCII b3Len = idnaref_toASCII(b2,b2Len,b3,b3Capacity,options,parseError, status); if(*status == U_BUFFER_OVERFLOW_ERROR){ // redo processing of string /* we do not have enough room so grow the buffer*/ b3 = (UChar*) uprv_malloc(b3Len * U_SIZEOF_UCHAR); if(b3==NULL){ *status = U_MEMORY_ALLOCATION_ERROR; goto CLEANUP; } *status = U_ZERO_ERROR; // reset error b3Len = idnaref_toASCII(b2,b2Len,b3,b3Len, options, parseError, status); } //bail out on error if(U_FAILURE(*status)){ goto CLEANUP; } //step 7: verify if(compareCaseInsensitiveASCII(b1, b1Len, b3, b3Len) !=0){ *status = U_IDNA_VERIFICATION_ERROR; goto CLEANUP; } //step 8: return output of step 5 reqLength = b2Len; if(b2Len <= destCapacity) { uprv_memmove(dest, b2, b2Len * U_SIZEOF_UCHAR); } }else{ // verify that STD3 ASCII rules are satisfied if(useSTD3ASCIIRules == TRUE){ if( srcIsLDH == FALSE /* source contains some non-LDH characters */ || src[0] == HYPHEN || src[srcLength-1] == HYPHEN){ *status = U_IDNA_STD3_ASCII_RULES_ERROR; /* populate the parseError struct */ if(srcIsLDH==FALSE){ // failPos is always set the index of failure uprv_syntaxError(src,failPos, srcLength,parseError); }else if(src[0] == HYPHEN){ // fail position is 0 uprv_syntaxError(src,0,srcLength,parseError); }else{ // the last index in the source is always length-1 uprv_syntaxError(src, (srcLength>0) ? srcLength-1 : srcLength, srcLength,parseError); } goto CLEANUP; } } //copy the source to destination if(srcLength <= destCapacity){ uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR); } reqLength = srcLength; } CLEANUP: if(b1 != b1Stack){ uprv_free(b1); } if(b2 != b2Stack){ uprv_free(b2); } uprv_free(caseFlags); // delete prep; return u_terminateUChars(dest, destCapacity, reqLength, status); }
int32_t NamePrepTransform::process( const UChar* src, int32_t srcLength, UChar* dest, int32_t destCapacity, UBool allowUnassigned, UParseError* parseError, UErrorCode& status ){ // check error status if(U_FAILURE(status)){ return 0; } //check arguments if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) { status=U_ILLEGAL_ARGUMENT_ERROR; return 0; } UChar b1Stack[MAX_BUFFER_SIZE]; UChar *b1 = b1Stack; int32_t b1Len,b1Capacity = MAX_BUFFER_SIZE; int32_t b1Index = 0; UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT; UBool leftToRight=FALSE, rightToLeft=FALSE; b1Len = map(src,srcLength, b1, b1Capacity,allowUnassigned,parseError, status); if(status == U_BUFFER_OVERFLOW_ERROR){ // redo processing of string /* we do not have enough room so grow the buffer*/ if(!u_growBufferFromStatic(b1Stack,&b1,&b1Capacity,b1Len,0)){ status = U_MEMORY_ALLOCATION_ERROR; goto CLEANUP; } status = U_ZERO_ERROR; // reset error b1Len = map(src,srcLength, b1, b1Len,allowUnassigned, parseError, status); } if(U_FAILURE(status)){ goto CLEANUP; } for(; b1Index<b1Len; ){ UChar32 ch = 0; U16_NEXT(b1, b1Index, b1Len, ch); if(prohibited.contains(ch) && ch!=0x0020){ status = U_IDNA_PROHIBITED_ERROR; goto CLEANUP; } direction = u_charDirection(ch); if(firstCharDir==U_CHAR_DIRECTION_COUNT){ firstCharDir = direction; } if(direction == U_LEFT_TO_RIGHT){ leftToRight = TRUE; } if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){ rightToLeft = TRUE; } } // satisfy 2 if( leftToRight == TRUE && rightToLeft == TRUE){ status = U_IDNA_CHECK_BIDI_ERROR; goto CLEANUP; } //satisfy 3 if( rightToLeft == TRUE && !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) && (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC)) ){ status = U_IDNA_CHECK_BIDI_ERROR; return FALSE; } if(b1Len <= destCapacity){ uprv_memmove(dest,b1, b1Len*U_SIZEOF_UCHAR); } CLEANUP: if(b1!=b1Stack){ uprv_free(b1); } return u_terminateUChars(dest, destCapacity, b1Len, &status); }
static int32_t caseMap(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, UBreakIterator *titleIter, const char *locale, uint32_t options, int32_t toWhichCase, UErrorCode *pErrorCode) { UChar buffer[300]; UChar *temp; const UCaseProps *csp; int32_t destLength; UBool ownTitleIter; /* check argument values */ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } if( destCapacity<0 || (dest==NULL && destCapacity>0) || src==NULL || srcLength<-1 ) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } csp=ucase_getSingleton(pErrorCode); if(U_FAILURE(*pErrorCode)) { return 0; } /* get the string length */ if(srcLength==-1) { srcLength=u_strlen(src); } /* check for overlapping source and destination */ if( dest!=NULL && ((src>=dest && src<(dest+destCapacity)) || (dest>=src && dest<(src+srcLength))) ) { /* overlap: provide a temporary destination buffer and later copy the result */ if(destCapacity<=(sizeof(buffer)/U_SIZEOF_UCHAR)) { /* the stack buffer is large enough */ temp=buffer; } else { /* allocate a buffer */ temp=(UChar *)uprv_malloc(destCapacity*U_SIZEOF_UCHAR); if(temp==NULL) { *pErrorCode=U_MEMORY_ALLOCATION_ERROR; return 0; } } } else { temp=dest; } ownTitleIter=FALSE; destLength=0; if(toWhichCase==FOLD_CASE) { destLength=ustr_foldCase(csp, temp, destCapacity, src, srcLength, options, pErrorCode); } else { UCaseContext csc={ NULL }; int32_t locCache; csc.p=(void *)src; csc.limit=srcLength; locCache=0; /* the internal functions require locale!=NULL */ if(locale==NULL) { locale=uloc_getDefault(); } if(toWhichCase==TO_LOWER) { destLength=_caseMap(csp, ucase_toFullLower, temp, destCapacity, src, &csc, 0, srcLength, locale, &locCache, pErrorCode); } else if(toWhichCase==TO_UPPER) { destLength=_caseMap(csp, ucase_toFullUpper, temp, destCapacity, src, &csc, 0, srcLength, locale, &locCache, pErrorCode); } else /* if(toWhichCase==TO_TITLE) */ { #if UCONFIG_NO_BREAK_ITERATION *pErrorCode=U_UNSUPPORTED_ERROR; #else if(titleIter==NULL) { titleIter=ubrk_open(UBRK_WORD, locale, src, srcLength, pErrorCode); ownTitleIter=(UBool)U_SUCCESS(*pErrorCode); } if(U_SUCCESS(*pErrorCode)) { destLength=_toTitle(csp, temp, destCapacity, src, &csc, srcLength, titleIter, locale, &locCache, pErrorCode); } #endif } } if(temp!=dest) { /* copy the result string to the destination buffer */ if(destLength>0) { int32_t copyLength= destLength<=destCapacity ? destLength : destCapacity; if(copyLength>0) { uprv_memmove(dest, temp, copyLength*U_SIZEOF_UCHAR); } } if(temp!=buffer) { uprv_free(temp); } } #if !UCONFIG_NO_BREAK_ITERATION if(ownTitleIter) { ubrk_close(titleIter); } #endif return u_terminateUChars(dest, destCapacity, destLength, pErrorCode); }
U_CAPI UChar* U_EXPORT2 u_memmove(UChar* dest, const UChar* src, int32_t count) { return (UChar*) uprv_memmove(dest, src, count * U_SIZEOF_UCHAR); }
/* * continue partial match with new input * never called for simple, single-character conversion */ U_CFUNC void ucnv_extContinueMatchToU(UConverter *cnv, UConverterToUnicodeArgs *pArgs, int32_t srcIndex, UErrorCode *pErrorCode) { uint32_t value; int32_t match, length; match=ucnv_extMatchToU(cnv->sharedData->mbcs.extIndexes, (int8_t)UCNV_SISO_STATE(cnv), cnv->preToU, cnv->preToULength, pArgs->source, (int32_t)(pArgs->sourceLimit-pArgs->source), &value, cnv->useFallback, pArgs->flush); if(match>0) { if(match>=cnv->preToULength) { /* advance src pointer for the consumed input */ pArgs->source+=match-cnv->preToULength; cnv->preToULength=0; } else { /* the match did not use all of preToU[] - keep the rest for replay */ length=cnv->preToULength-match; uprv_memmove(cnv->preToU, cnv->preToU+match, length); cnv->preToULength=(int8_t)-length; } /* write result */ ucnv_extWriteToU(cnv, cnv->sharedData->mbcs.extIndexes, value, &pArgs->target, pArgs->targetLimit, &pArgs->offsets, srcIndex, pErrorCode); } else if(match<0) { /* save state for partial match */ const char *s; int32_t j; /* just _append_ the newly consumed input to preToU[] */ s=pArgs->source; match=-match; for(j=cnv->preToULength; j<match; ++j) { cnv->preToU[j]=*s++; } pArgs->source=s; /* same as *src=srcLimit; because we reached the end of input */ cnv->preToULength=(int8_t)match; } else /* match==0 */ { /* * no match * * We need to split the previous input into two parts: * * 1. The first codepage character is unmappable - that's how we got into * trying the extension data in the first place. * We need to move it from the preToU buffer * to the error buffer, set an error code, * and prepare the rest of the previous input for 2. * * 2. The rest of the previous input must be converted once we * come back from the callback for the first character. * At that time, we have to try again from scratch to convert * these input characters. * The replay will be handled by the ucnv.c conversion code. */ /* move the first codepage character to the error field */ uprv_memcpy(cnv->toUBytes, cnv->preToU, cnv->preToUFirstLength); cnv->toULength=cnv->preToUFirstLength; /* move the rest up inside the buffer */ length=cnv->preToULength-cnv->preToUFirstLength; if(length>0) { uprv_memmove(cnv->preToU, cnv->preToU+cnv->preToUFirstLength, length); } /* mark preToU for replay */ cnv->preToULength=(int8_t)-length; /* set the error code for unassigned */ *pErrorCode=U_INVALID_CHAR_FOUND; } }
U_CAPI void U_EXPORT2 upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode) { uint32_t *row; int32_t i, columns, valueColumns, rows, count; UChar32 start, limit; /* argument checking */ if(U_FAILURE(*pErrorCode)) { return; } if(handler==NULL) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return; } if(pv->isCompacted) { return; } /* Set the flag now: Sorting and compacting destroys the builder data structure. */ pv->isCompacted=TRUE; rows=pv->rows; columns=pv->columns; valueColumns=columns-2; /* not counting start & limit */ /* sort the properties vectors to find unique vector values */ uprv_sortArray(pv->v, rows, columns*4, upvec_compareRows, pv, FALSE, pErrorCode); if(U_FAILURE(*pErrorCode)) { return; } /* * Find and set the special values. * This has to do almost the same work as the compaction below, * to find the indexes where the special-value rows will move. */ row=pv->v; count=-valueColumns; for(i=0; i<rows; ++i) { start=(UChar32)row[0]; /* count a new values vector if it is different from the current one */ if(count<0 || 0!=uprv_memcmp(row+2, row-valueColumns, valueColumns*4)) { count+=valueColumns; } if(start>=UPVEC_FIRST_SPECIAL_CP) { handler(context, start, start, count, row+2, valueColumns, pErrorCode); if(U_FAILURE(*pErrorCode)) { return; } } row+=columns; } /* count is at the beginning of the last vector, add valueColumns to include that last vector */ count+=valueColumns; /* Call the handler once more to signal the start of delivering real values. */ handler(context, UPVEC_START_REAL_VALUES_CP, UPVEC_START_REAL_VALUES_CP, count, row-valueColumns, valueColumns, pErrorCode); if(U_FAILURE(*pErrorCode)) { return; } /* * Move vector contents up to a contiguous array with only unique * vector values, and call the handler function for each vector. * * This destroys the Properties Vector structure and replaces it * with an array of just vector values. */ row=pv->v; count=-valueColumns; for(i=0; i<rows; ++i) { /* fetch these first before memmove() may overwrite them */ start=(UChar32)row[0]; limit=(UChar32)row[1]; /* add a new values vector if it is different from the current one */ if(count<0 || 0!=uprv_memcmp(row+2, pv->v+count, valueColumns*4)) { count+=valueColumns; uprv_memmove(pv->v+count, row+2, valueColumns*4); } if(start<UPVEC_FIRST_SPECIAL_CP) { handler(context, start, limit-1, count, pv->v+count, valueColumns, pErrorCode); if(U_FAILURE(*pErrorCode)) { return; } } row+=columns; } /* count is at the beginning of the last vector, add one to include that last vector */ pv->rows=count/valueColumns+1; }
U_CAPI void U_EXPORT2 upvec_setValue(UPropsVectors *pv, UChar32 start, UChar32 end, int32_t column, uint32_t value, uint32_t mask, UErrorCode *pErrorCode) { uint32_t *firstRow, *lastRow; int32_t columns; UChar32 limit; UBool splitFirstRow, splitLastRow; /* argument checking */ if(U_FAILURE(*pErrorCode)) { return; } if( pv==NULL || start<0 || start>end || end>UPVEC_MAX_CP || column<0 || column>=(pv->columns-2) ) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return; } if(pv->isCompacted) { *pErrorCode=U_NO_WRITE_PERMISSION; return; } limit=end+1; /* initialize */ columns=pv->columns; column+=2; /* skip range start and limit columns */ value&=mask; /* find the rows whose ranges overlap with the input range */ /* find the first and last rows, always successful */ firstRow=_findRow(pv, start); lastRow=_findRow(pv, end); /* * Rows need to be split if they partially overlap with the * input range (only possible for the first and last rows) * and if their value differs from the input value. */ splitFirstRow= (UBool)(start!=(UChar32)firstRow[0] && value!=(firstRow[column]&mask)); splitLastRow= (UBool)(limit!=(UChar32)lastRow[1] && value!=(lastRow[column]&mask)); /* split first/last rows if necessary */ if(splitFirstRow || splitLastRow) { int32_t count, rows; rows=pv->rows; if((rows+splitFirstRow+splitLastRow)>pv->maxRows) { uint32_t *newVectors; int32_t newMaxRows; if(pv->maxRows<UPVEC_MEDIUM_ROWS) { newMaxRows=UPVEC_MEDIUM_ROWS; } else if(pv->maxRows<UPVEC_MAX_ROWS) { newMaxRows=UPVEC_MAX_ROWS; } else { /* Implementation bug, or UPVEC_MAX_ROWS too low. */ *pErrorCode=U_INTERNAL_PROGRAM_ERROR; return; } newVectors=(uint32_t *)uprv_malloc(newMaxRows*columns*4); if(newVectors==NULL) { *pErrorCode=U_MEMORY_ALLOCATION_ERROR; return; } uprv_memcpy(newVectors, pv->v, rows*columns*4); firstRow=newVectors+(firstRow-pv->v); lastRow=newVectors+(lastRow-pv->v); uprv_free(pv->v); pv->v=newVectors; pv->maxRows=newMaxRows; } /* count the number of row cells to move after the last row, and move them */ count = (int32_t)((pv->v+rows*columns)-(lastRow+columns)); if(count>0) { uprv_memmove( lastRow+(1+splitFirstRow+splitLastRow)*columns, lastRow+columns, count*4); } pv->rows=rows+splitFirstRow+splitLastRow; /* split the first row, and move the firstRow pointer to the second part */ if(splitFirstRow) { /* copy all affected rows up one and move the lastRow pointer */ count = (int32_t)((lastRow-firstRow)+columns); uprv_memmove(firstRow+columns, firstRow, count*4); lastRow+=columns; /* split the range and move the firstRow pointer */ firstRow[1]=firstRow[columns]=(uint32_t)start; firstRow+=columns; } /* split the last row */ if(splitLastRow) { /* copy the last row data */ uprv_memcpy(lastRow+columns, lastRow, columns*4); /* split the range and move the firstRow pointer */ lastRow[1]=lastRow[columns]=(uint32_t)limit; } } /* set the "row last seen" to the last row for the range */ pv->prevRow=(int32_t)((lastRow-(pv->v))/columns); /* set the input value in all remaining rows */ firstRow+=column; lastRow+=column; mask=~mask; for(;;) { *firstRow=(*firstRow&mask)|value; if(firstRow==lastRow) { break; } firstRow+=columns; } }
U_CFUNC int32_t ustrcase_map(const UCaseMap *csm, UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, UStringCaseMapper *stringCaseMapper, UErrorCode *pErrorCode) { UChar buffer[300]; UChar *temp; int32_t destLength; /* check argument values */ if(U_FAILURE(*pErrorCode)) { return 0; } if( destCapacity<0 || (dest==NULL && destCapacity>0) || src==NULL || srcLength<-1 ) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } /* get the string length */ if(srcLength==-1) { srcLength=u_strlen(src); } /* check for overlapping source and destination */ if( dest!=NULL && ((src>=dest && src<(dest+destCapacity)) || (dest>=src && dest<(src+srcLength))) ) { /* overlap: provide a temporary destination buffer and later copy the result */ if(destCapacity<=UPRV_LENGTHOF(buffer)) { /* the stack buffer is large enough */ temp=buffer; } else { /* allocate a buffer */ temp=(UChar *)uprv_malloc(destCapacity*U_SIZEOF_UCHAR); if(temp==NULL) { *pErrorCode=U_MEMORY_ALLOCATION_ERROR; return 0; } } } else { temp=dest; } destLength=stringCaseMapper(csm, temp, destCapacity, src, srcLength, pErrorCode); if(temp!=dest) { /* copy the result string to the destination buffer */ if(destLength>0) { int32_t copyLength= destLength<=destCapacity ? destLength : destCapacity; if(copyLength>0) { uprv_memmove(dest, temp, copyLength*U_SIZEOF_UCHAR); } } if(temp!=buffer) { uprv_free(temp); } } return u_terminateUChars(dest, destCapacity, destLength, pErrorCode); }
static int32_t caseMap(const UCaseMap *csm, UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, int32_t toWhichCase, UErrorCode *pErrorCode) { UChar buffer[300]; UChar *temp; int32_t destLength; /* check argument values */ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } if( destCapacity<0 || (dest==NULL && destCapacity>0) || src==NULL || srcLength<-1 ) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } /* get the string length */ if(srcLength==-1) { srcLength=u_strlen(src); } /* check for overlapping source and destination */ if( dest!=NULL && ((src>=dest && src<(dest+destCapacity)) || (dest>=src && dest<(src+srcLength))) ) { /* overlap: provide a temporary destination buffer and later copy the result */ if(destCapacity<=(sizeof(buffer)/U_SIZEOF_UCHAR)) { /* the stack buffer is large enough */ temp=buffer; } else { /* allocate a buffer */ temp=(UChar *)uprv_malloc(destCapacity*U_SIZEOF_UCHAR); if(temp==NULL) { *pErrorCode=U_MEMORY_ALLOCATION_ERROR; return 0; } } } else { temp=dest; } destLength=0; if(toWhichCase==FOLD_CASE) { destLength=ustr_foldCase(csm->csp, temp, destCapacity, src, srcLength, csm->options, pErrorCode); } else { UCaseContext csc={ NULL }; csc.p=(void *)src; csc.limit=srcLength; if(toWhichCase==TO_LOWER) { destLength=_caseMap(csm, ucase_toFullLower, temp, destCapacity, src, &csc, 0, srcLength, pErrorCode); } else if(toWhichCase==TO_UPPER) { destLength=_caseMap(csm, ucase_toFullUpper, temp, destCapacity, src, &csc, 0, srcLength, pErrorCode); } else /* if(toWhichCase==TO_TITLE) */ { #if UCONFIG_NO_BREAK_ITERATION *pErrorCode=U_UNSUPPORTED_ERROR; #else /* UCaseMap is actually non-const in toTitle() APIs. */ destLength=_toTitle((UCaseMap *)csm, temp, destCapacity, src, &csc, srcLength, pErrorCode); #endif } } if(temp!=dest) { /* copy the result string to the destination buffer */ if(destLength>0) { int32_t copyLength= destLength<=destCapacity ? destLength : destCapacity; if(copyLength>0) { uprv_memmove(dest, temp, copyLength*U_SIZEOF_UCHAR); } } if(temp!=buffer) { uprv_free(temp); } } return u_terminateUChars(dest, destCapacity, destLength, pErrorCode); }
U_CFUNC int32_t u_strFromPunycode(const UChar *src, int32_t srcLength, UChar *dest, int32_t destCapacity, UBool *caseFlags, UErrorCode *pErrorCode) { int32_t n, destLength, i, bias, basicLength, j, in, oldi, w, k, digit, t, destCPCount, firstSupplementaryIndex, cpLength; UChar b; /* argument checking */ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } if(srcLength==-1) { srcLength=u_strlen(src); } /* * Handle the basic code points: * Let basicLength be the number of input code points * before the last delimiter, or 0 if there is none, * then copy the first basicLength code points to the output. * * The two following loops iterate backward. */ for(j=srcLength; j>0;) { if(src[--j]==DELIMITER) { break; } } destLength=basicLength=destCPCount=j; U_ASSERT(destLength>=0); while(j>0) { b=src[--j]; if(!IS_BASIC(b)) { *pErrorCode=U_INVALID_CHAR_FOUND; return 0; } if(j<destCapacity) { dest[j]=(UChar)b; if(caseFlags!=NULL) { caseFlags[j]=IS_BASIC_UPPERCASE(b); } } } /* Initialize the state: */ n=INITIAL_N; i=0; bias=INITIAL_BIAS; firstSupplementaryIndex=1000000000; /* * Main decoding loop: * Start just after the last delimiter if any * basic code points were copied; start at the beginning otherwise. */ for(in=basicLength>0 ? basicLength+1 : 0; in<srcLength; /* no op */) { /* * in is the index of the next character to be consumed, and * destCPCount is the number of code points in the output array. * * Decode a generalized variable-length integer into delta, * which gets added to i. The overflow checking is easier * if we increase i as we go, then subtract off its starting * value at the end to obtain delta. */ for(oldi=i, w=1, k=BASE; /* no condition */; k+=BASE) { if(in>=srcLength) { *pErrorCode=U_ILLEGAL_CHAR_FOUND; return 0; } digit=basicToDigit[(uint8_t)src[in++]]; if(digit<0) { *pErrorCode=U_INVALID_CHAR_FOUND; return 0; } if(digit>(0x7fffffff-i)/w) { /* integer overflow */ *pErrorCode=U_ILLEGAL_CHAR_FOUND; return 0; } i+=digit*w; /** RAM: comment out the old code for conformance with draft-ietf-idn-punycode-03.txt t=k-bias; if(t<TMIN) { t=TMIN; } else if(t>TMAX) { t=TMAX; } */ t=k-bias; if(t<TMIN) { t=TMIN; } else if(k>=(bias+TMAX)) { t=TMAX; } if(digit<t) { break; } if(w>0x7fffffff/(BASE-t)) { /* integer overflow */ *pErrorCode=U_ILLEGAL_CHAR_FOUND; return 0; } w*=BASE-t; } /* * Modification from sample code: * Increments destCPCount here, * where needed instead of in for() loop tail. */ ++destCPCount; bias=adaptBias(i-oldi, destCPCount, (UBool)(oldi==0)); /* * i was supposed to wrap around from (incremented) destCPCount to 0, * incrementing n each time, so we'll fix that now: */ if(i/destCPCount>(0x7fffffff-n)) { /* integer overflow */ *pErrorCode=U_ILLEGAL_CHAR_FOUND; return 0; } n+=i/destCPCount; i%=destCPCount; /* not needed for Punycode: */ /* if (decode_digit(n) <= BASE) return punycode_invalid_input; */ if(n>0x10ffff || U_IS_SURROGATE(n)) { /* Unicode code point overflow */ *pErrorCode=U_ILLEGAL_CHAR_FOUND; return 0; } /* Insert n at position i of the output: */ cpLength=U16_LENGTH(n); if(dest!=NULL && ((destLength+cpLength)<=destCapacity)) { int32_t codeUnitIndex; /* * Handle indexes when supplementary code points are present. * * In almost all cases, there will be only BMP code points before i * and even in the entire string. * This is handled with the same efficiency as with UTF-32. * * Only the rare cases with supplementary code points are handled * more slowly - but not too bad since this is an insertion anyway. */ if(i<=firstSupplementaryIndex) { codeUnitIndex=i; if(cpLength>1) { firstSupplementaryIndex=codeUnitIndex; } else { ++firstSupplementaryIndex; } } else { codeUnitIndex=firstSupplementaryIndex; U16_FWD_N(dest, codeUnitIndex, destLength, i-codeUnitIndex); } /* use the UChar index codeUnitIndex instead of the code point index i */ if(codeUnitIndex<destLength) { uprv_memmove(dest+codeUnitIndex+cpLength, dest+codeUnitIndex, (destLength-codeUnitIndex)*U_SIZEOF_UCHAR); if(caseFlags!=NULL) { uprv_memmove(caseFlags+codeUnitIndex+cpLength, caseFlags+codeUnitIndex, destLength-codeUnitIndex); } } if(cpLength==1) { /* BMP, insert one code unit */ dest[codeUnitIndex]=(UChar)n; } else { /* supplementary character, insert two code units */ dest[codeUnitIndex]=U16_LEAD(n); dest[codeUnitIndex+1]=U16_TRAIL(n); } if(caseFlags!=NULL) { /* Case of last character determines uppercase flag: */ caseFlags[codeUnitIndex]=IS_BASIC_UPPERCASE(src[in-1]); if(cpLength==2) { caseFlags[codeUnitIndex+1]=FALSE; } } } destLength+=cpLength; U_ASSERT(destLength>=0); ++i; } return u_terminateUChars(dest, destCapacity, destLength, pErrorCode); }
U_CFUNC int32_t idnaref_toASCII(const UChar* src, int32_t srcLength, UChar* dest, int32_t destCapacity, int32_t options, UParseError* parseError, UErrorCode* status){ if(status == NULL || U_FAILURE(*status)){ return 0; } if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ *status = U_ILLEGAL_ARGUMENT_ERROR; return 0; } UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE]; //initialize pointers to stack buffers UChar *b1 = b1Stack, *b2 = b2Stack; int32_t b1Len, b2Len, b1Capacity = MAX_LABEL_BUFFER_SIZE, b2Capacity = MAX_LABEL_BUFFER_SIZE , reqLength=0; //get the options UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0); UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0); UBool* caseFlags = NULL; // assume the source contains all ascii codepoints UBool srcIsASCII = TRUE; // assume the source contains all LDH codepoints UBool srcIsLDH = TRUE; int32_t j=0; // UParseError parseError; // step 2 NamePrepTransform* prep = TestIDNA::getInstance(*status); if(U_FAILURE(*status)){ goto CLEANUP; } b1Len = prep->process(src,srcLength,b1, b1Capacity,allowUnassigned,parseError,*status); if(*status == U_BUFFER_OVERFLOW_ERROR){ // redo processing of string /* we do not have enough room so grow the buffer*/ b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); if(b1==NULL){ *status = U_MEMORY_ALLOCATION_ERROR; goto CLEANUP; } *status = U_ZERO_ERROR; // reset error b1Len = prep->process(src,srcLength,b1, b1Len,allowUnassigned, parseError, *status); } // error bail out if(U_FAILURE(*status)){ goto CLEANUP; } // step 3 & 4 for( j=0;j<b1Len;j++){ if(b1[j] > 0x7F){ srcIsASCII = FALSE; }else if(prep->isLDHChar(b1[j])==FALSE){ // if the char is in ASCII range verify that it is an LDH character{ srcIsLDH = FALSE; } } if(useSTD3ASCIIRules == TRUE){ // verify 3a and 3b if( srcIsLDH == FALSE /* source contains some non-LDH characters */ || b1[0] == HYPHEN || b1[b1Len-1] == HYPHEN){ *status = U_IDNA_STD3_ASCII_RULES_ERROR; goto CLEANUP; } } if(srcIsASCII){ if(b1Len <= destCapacity){ uprv_memmove(dest, b1, b1Len * U_SIZEOF_UCHAR); reqLength = b1Len; }else{ reqLength = b1Len; goto CLEANUP; } }else{ // step 5 : verify the sequence does not begin with ACE prefix if(!startsWithPrefix(b1,b1Len)){ //step 6: encode the sequence with punycode //caseFlags = (UBool*) uprv_malloc(b1Len * sizeof(UBool)); b2Len = convertToPuny(b1,b1Len, b2,b2Capacity,*status); //b2Len = u_strToPunycode(b2,b2Capacity,b1,b1Len, caseFlags, status); if(*status == U_BUFFER_OVERFLOW_ERROR){ // redo processing of string /* we do not have enough room so grow the buffer*/ b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); if(b2 == NULL){ *status = U_MEMORY_ALLOCATION_ERROR; goto CLEANUP; } *status = U_ZERO_ERROR; // reset error b2Len = convertToPuny(b1, b1Len, b2, b2Len, *status); //b2Len = u_strToPunycode(b2,b2Len,b1,b1Len, caseFlags, status); } //error bail out if(U_FAILURE(*status)){ goto CLEANUP; } reqLength = b2Len+ACE_PREFIX_LENGTH; if(reqLength > destCapacity){ *status = U_BUFFER_OVERFLOW_ERROR; goto CLEANUP; } //Step 7: prepend the ACE prefix uprv_memcpy(dest,ACE_PREFIX,ACE_PREFIX_LENGTH * U_SIZEOF_UCHAR); //Step 6: copy the contents in b2 into dest uprv_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len * U_SIZEOF_UCHAR); }else{ *status = U_IDNA_ACE_PREFIX_ERROR; goto CLEANUP; } } if(reqLength > MAX_LABEL_LENGTH){ *status = U_IDNA_LABEL_TOO_LONG_ERROR; } CLEANUP: if(b1 != b1Stack){ uprv_free(b1); } if(b2 != b2Stack){ uprv_free(b2); } uprv_free(caseFlags); // delete prep; return u_terminateUChars(dest, destCapacity, reqLength, status); }
static int32_t _internal_toASCII(const UChar* src, int32_t srcLength, UChar* dest, int32_t destCapacity, int32_t options, UStringPrepProfile* nameprep, UParseError* parseError, UErrorCode* status) { // TODO Revisit buffer handling. The label should not be over 63 ASCII characters. ICU4J may need to be updated too. UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE]; //initialize pointers to stack buffers UChar *b1 = b1Stack, *b2 = b2Stack; int32_t b1Len=0, b2Len, b1Capacity = MAX_LABEL_BUFFER_SIZE, b2Capacity = MAX_LABEL_BUFFER_SIZE , reqLength=0; int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0; UBool* caseFlags = NULL; // the source contains all ascii codepoints UBool srcIsASCII = TRUE; // assume the source contains all LDH codepoints UBool srcIsLDH = TRUE; int32_t j=0; //get the options UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0); int32_t failPos = -1; if(srcLength == -1){ srcLength = u_strlen(src); } if(srcLength > b1Capacity){ b1 = (UChar*) uprv_malloc(srcLength * U_SIZEOF_UCHAR); if(b1==NULL){ *status = U_MEMORY_ALLOCATION_ERROR; goto CLEANUP; } b1Capacity = srcLength; } // step 1 for( j=0;j<srcLength;j++){ if(src[j] > 0x7F){ srcIsASCII = FALSE; } b1[b1Len++] = src[j]; } // step 2 is performed only if the source contains non ASCII if(srcIsASCII == FALSE){ // step 2 b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status); if(*status == U_BUFFER_OVERFLOW_ERROR){ // redo processing of string // we do not have enough room so grow the buffer if(b1 != b1Stack){ uprv_free(b1); } b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); if(b1==NULL){ *status = U_MEMORY_ALLOCATION_ERROR; goto CLEANUP; } *status = U_ZERO_ERROR; // reset error b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status); } } // error bail out if(U_FAILURE(*status)){ goto CLEANUP; } if(b1Len == 0){ *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; goto CLEANUP; } // for step 3 & 4 srcIsASCII = TRUE; for( j=0;j<b1Len;j++){ // check if output of usprep_prepare is all ASCII if(b1[j] > 0x7F){ srcIsASCII = FALSE; }else if(isLDHChar(b1[j])==FALSE){ // if the char is in ASCII range verify that it is an LDH character srcIsLDH = FALSE; failPos = j; } } if(useSTD3ASCIIRules == TRUE){ // verify 3a and 3b // 3(a) Verify the absence of non-LDH ASCII code points; that is, the // absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F. // 3(b) Verify the absence of leading and trailing hyphen-minus; that // is, the absence of U+002D at the beginning and end of the // sequence. if( srcIsLDH == FALSE /* source at this point should not contain anyLDH characters */ || b1[0] == HYPHEN || b1[b1Len-1] == HYPHEN){ *status = U_IDNA_STD3_ASCII_RULES_ERROR; /* populate the parseError struct */ if(srcIsLDH==FALSE){ // failPos is always set the index of failure uprv_syntaxError(b1,failPos, b1Len,parseError); }else if(b1[0] == HYPHEN){ // fail position is 0 uprv_syntaxError(b1,0,b1Len,parseError); }else{ // the last index in the source is always length-1 uprv_syntaxError(b1, (b1Len>0) ? b1Len-1 : b1Len, b1Len,parseError); } goto CLEANUP; } } // Step 4: if the source is ASCII then proceed to step 8 if(srcIsASCII){ if(b1Len <= destCapacity){ uprv_memmove(dest, b1, b1Len * U_SIZEOF_UCHAR); reqLength = b1Len; }else{ reqLength = b1Len; goto CLEANUP; } }else{ // step 5 : verify the sequence does not begin with ACE prefix if(!startsWithPrefix(b1,b1Len)){ //step 6: encode the sequence with punycode // do not preserve the case flags for now! // TODO: Preserve the case while implementing the RFE // caseFlags = (UBool*) uprv_malloc(b1Len * sizeof(UBool)); // uprv_memset(caseFlags,TRUE,b1Len); b2Len = u_strToPunycode(b1,b1Len,b2,b2Capacity,caseFlags, status); if(*status == U_BUFFER_OVERFLOW_ERROR){ // redo processing of string /* we do not have enough room so grow the buffer*/ b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); if(b2 == NULL){ *status = U_MEMORY_ALLOCATION_ERROR; goto CLEANUP; } *status = U_ZERO_ERROR; // reset error b2Len = u_strToPunycode(b1,b1Len,b2,b2Len,caseFlags, status); } //error bail out if(U_FAILURE(*status)){ goto CLEANUP; } // TODO : Reconsider while implementing the case preserve RFE // convert all codepoints to lower case ASCII // toASCIILower(b2,b2Len); reqLength = b2Len+ACE_PREFIX_LENGTH; if(reqLength > destCapacity){ *status = U_BUFFER_OVERFLOW_ERROR; goto CLEANUP; } //Step 7: prepend the ACE prefix uprv_memcpy(dest,ACE_PREFIX,ACE_PREFIX_LENGTH * U_SIZEOF_UCHAR); //Step 6: copy the contents in b2 into dest uprv_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len * U_SIZEOF_UCHAR); }else{ *status = U_IDNA_ACE_PREFIX_ERROR; //position of failure is 0 uprv_syntaxError(b1,0,b1Len,parseError); goto CLEANUP; } } // step 8: verify the length of label if(reqLength > MAX_LABEL_LENGTH){ *status = U_IDNA_LABEL_TOO_LONG_ERROR; } CLEANUP: if(b1 != b1Stack){ uprv_free(b1); } if(b2 != b2Stack){ uprv_free(b2); } uprv_free(caseFlags); return u_terminateUChars(dest, destCapacity, reqLength, status); }
U_CFUNC int32_t idnaref_IDNToUnicode( const UChar* src, int32_t srcLength, UChar* dest, int32_t destCapacity, int32_t options, UParseError* parseError, UErrorCode* status){ if(status == NULL || U_FAILURE(*status)){ return 0; } if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ *status = U_ILLEGAL_ARGUMENT_ERROR; return 0; } int32_t reqLength = 0; UBool done = FALSE; NamePrepTransform* prep = TestIDNA::getInstance(*status); //initialize pointers to stack buffers UChar b1Stack[MAX_LABEL_BUFFER_SIZE]; UChar *b1 = b1Stack; int32_t b1Len, labelLen; UChar* delimiter = (UChar*)src; UChar* labelStart = (UChar*)src; int32_t remainingLen = srcLength; int32_t b1Capacity = MAX_LABEL_BUFFER_SIZE; //get the options // UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0); // UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0); if(U_FAILURE(*status)){ goto CLEANUP; } if(srcLength == -1){ for(;;){ if(*delimiter == 0){ break; } labelLen = getNextSeparator(labelStart, -1, prep, &delimiter, &done, status); b1Len = idnaref_toUnicode(labelStart, labelLen, b1, b1Capacity, options, parseError, status); if(*status == U_BUFFER_OVERFLOW_ERROR){ // redo processing of string /* we do not have enough room so grow the buffer*/ b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); if(b1==NULL){ *status = U_MEMORY_ALLOCATION_ERROR; goto CLEANUP; } *status = U_ZERO_ERROR; // reset error b1Len = idnaref_toUnicode( labelStart, labelLen, b1, b1Len, options, parseError, status); } if(U_FAILURE(*status)){ goto CLEANUP; } int32_t tempLen = (reqLength + b1Len ); // copy to dest if( tempLen< destCapacity){ uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR); } reqLength = tempLen; // add the label separator if(done == FALSE){ if(reqLength < destCapacity){ dest[reqLength] = FULL_STOP; } reqLength++; } labelStart = delimiter; } }else{ for(;;){ if(delimiter == src+srcLength){ break; } labelLen = getNextSeparator(labelStart, remainingLen, prep, &delimiter, &done, status); b1Len = idnaref_toUnicode( labelStart,labelLen, b1, b1Capacity, options, parseError, status); if(*status == U_BUFFER_OVERFLOW_ERROR){ // redo processing of string /* we do not have enough room so grow the buffer*/ b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); if(b1==NULL){ *status = U_MEMORY_ALLOCATION_ERROR; goto CLEANUP; } *status = U_ZERO_ERROR; // reset error b1Len = idnaref_toUnicode( labelStart, labelLen, b1, b1Len, options, parseError, status); } if(U_FAILURE(*status)){ goto CLEANUP; } int32_t tempLen = (reqLength + b1Len ); // copy to dest if( tempLen< destCapacity){ uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR); } reqLength = tempLen; // add the label separator if(done == FALSE){ if(reqLength < destCapacity){ dest[reqLength] = FULL_STOP; } reqLength++; } labelStart = delimiter; remainingLen = srcLength - (delimiter - src); } } CLEANUP: if(b1 != b1Stack){ uprv_free(b1); } // delete prep; return u_terminateUChars(dest, destCapacity, reqLength, status); }
static int32_t _internal_toUnicode(const UChar* src, int32_t srcLength, UChar* dest, int32_t destCapacity, int32_t options, UStringPrepProfile* nameprep, UParseError* parseError, UErrorCode* status) { //get the options //UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0); int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0; // TODO Revisit buffer handling. The label should not be over 63 ASCII characters. ICU4J may need to be updated too. UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stack[MAX_LABEL_BUFFER_SIZE]; //initialize pointers to stack buffers UChar *b1 = b1Stack, *b2 = b2Stack, *b1Prime=NULL, *b3=b3Stack; int32_t b1Len, b2Len, b1PrimeLen, b3Len, b1Capacity = MAX_LABEL_BUFFER_SIZE, b2Capacity = MAX_LABEL_BUFFER_SIZE, b3Capacity = MAX_LABEL_BUFFER_SIZE, reqLength=0; b1Len = 0; UBool* caseFlags = NULL; UBool srcIsASCII = TRUE; /*UBool srcIsLDH = TRUE; int32_t failPos =0;*/ // step 1: find out if all the codepoints in src are ASCII if(srcLength==-1){ srcLength = 0; for(;src[srcLength]!=0;){ if(src[srcLength]> 0x7f){ srcIsASCII = FALSE; }/*else if(isLDHChar(src[srcLength])==FALSE){ // here we do not assemble surrogates // since we know that LDH code points // are in the ASCII range only srcIsLDH = FALSE; failPos = srcLength; }*/ srcLength++; } }else if(srcLength > 0){ for(int32_t j=0; j<srcLength; j++){ if(src[j]> 0x7f){ srcIsASCII = FALSE; }/*else if(isLDHChar(src[j])==FALSE){ // here we do not assemble surrogates // since we know that LDH code points // are in the ASCII range only srcIsLDH = FALSE; failPos = j; }*/ } }else{ return 0; } if(srcIsASCII == FALSE){ // step 2: process the string b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status); if(*status == U_BUFFER_OVERFLOW_ERROR){ // redo processing of string /* we do not have enough room so grow the buffer*/ b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); if(b1==NULL){ *status = U_MEMORY_ALLOCATION_ERROR; goto CLEANUP; } *status = U_ZERO_ERROR; // reset error b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status); } //bail out on error if(U_FAILURE(*status)){ goto CLEANUP; } }else{ //just point src to b1 b1 = (UChar*) src; b1Len = srcLength; } // The RFC states that // <quote> // ToUnicode never fails. If any step fails, then the original input // is returned immediately in that step. // </quote> //step 3: verify ACE Prefix if(startsWithPrefix(b1,b1Len)){ //step 4: Remove the ACE Prefix b1Prime = b1 + ACE_PREFIX_LENGTH; b1PrimeLen = b1Len - ACE_PREFIX_LENGTH; //step 5: Decode using punycode b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Capacity, caseFlags,status); if(*status == U_BUFFER_OVERFLOW_ERROR){ // redo processing of string /* we do not have enough room so grow the buffer*/ b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); if(b2==NULL){ *status = U_MEMORY_ALLOCATION_ERROR; goto CLEANUP; } *status = U_ZERO_ERROR; // reset error b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Len, caseFlags, status); } //step 6:Apply toASCII b3Len = uidna_toASCII(b2, b2Len, b3, b3Capacity, options, parseError, status); if(*status == U_BUFFER_OVERFLOW_ERROR){ // redo processing of string /* we do not have enough room so grow the buffer*/ b3 = (UChar*) uprv_malloc(b3Len * U_SIZEOF_UCHAR); if(b3==NULL){ *status = U_MEMORY_ALLOCATION_ERROR; goto CLEANUP; } *status = U_ZERO_ERROR; // reset error b3Len = uidna_toASCII(b2,b2Len,b3,b3Len,options,parseError, status); } //bail out on error if(U_FAILURE(*status)){ goto CLEANUP; } //step 7: verify if(compareCaseInsensitiveASCII(b1, b1Len, b3, b3Len) !=0){ // Cause the original to be returned. *status = U_IDNA_VERIFICATION_ERROR; goto CLEANUP; } //step 8: return output of step 5 reqLength = b2Len; if(b2Len <= destCapacity) { uprv_memmove(dest, b2, b2Len * U_SIZEOF_UCHAR); } } else{ // See the start of this if statement for why this is commented out. // verify that STD3 ASCII rules are satisfied /*if(useSTD3ASCIIRules == TRUE){ if( srcIsLDH == FALSE // source contains some non-LDH characters || src[0] == HYPHEN || src[srcLength-1] == HYPHEN){ *status = U_IDNA_STD3_ASCII_RULES_ERROR; // populate the parseError struct if(srcIsLDH==FALSE){ // failPos is always set the index of failure uprv_syntaxError(src,failPos, srcLength,parseError); }else if(src[0] == HYPHEN){ // fail position is 0 uprv_syntaxError(src,0,srcLength,parseError); }else{ // the last index in the source is always length-1 uprv_syntaxError(src, (srcLength>0) ? srcLength-1 : srcLength, srcLength,parseError); } goto CLEANUP; } }*/ // just return the source //copy the source to destination if(srcLength <= destCapacity){ uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR); } reqLength = srcLength; } CLEANUP: if(b1 != b1Stack && b1!=src){ uprv_free(b1); } if(b2 != b2Stack){ uprv_free(b2); } uprv_free(caseFlags); // The RFC states that // <quote> // ToUnicode never fails. If any step fails, then the original input // is returned immediately in that step. // </quote> // So if any step fails lets copy source to destination if(U_FAILURE(*status)){ //copy the source to destination if(dest && srcLength <= destCapacity){ // srcLength should have already been set earlier. U_ASSERT(srcLength >= 0); uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR); } reqLength = srcLength; *status = U_ZERO_ERROR; } return u_terminateUChars(dest, destCapacity, reqLength, status); }