Пример #1
0
U_CAPI int32_t U_EXPORT2
uspoof_areConfusableUnicodeString(const USpoofChecker *sc,
                                  const icu::UnicodeString &id1,
                                  const icu::UnicodeString &id2,
                                  UErrorCode *status) {
    const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
    if (U_FAILURE(*status)) {
        return 0;
    }
    //
    // See section 4 of UAX 39 for the algorithm for checking whether two strings are confusable,
    //   and for definitions of the types (single, whole, mixed-script) of confusables.

    // We only care about a few of the check flags.  Ignore the others.
    // If no tests relavant to this function have been specified, return an error.
    // TODO:  is this really the right thing to do?  It's probably an error on the caller's part,
    //        but logically we would just return 0 (no error).
    if ((This->fChecks & USPOOF_CONFUSABLE) == 0) {
        *status = U_INVALID_STATE_ERROR;
        return 0;
    }

    // Compute the skeletons and check for confusability.
    UnicodeString id1Skeleton;
    uspoof_getSkeletonUnicodeString(sc, 0 /* deprecated */, id1, id1Skeleton, status);
    UnicodeString id2Skeleton;
    uspoof_getSkeletonUnicodeString(sc, 0 /* deprecated */, id2, id2Skeleton, status);
    if (U_FAILURE(*status)) { return 0; }
    if (id1Skeleton != id2Skeleton) {
        return 0;
    }

    // If we get here, the strings are confusable.  Now we just need to set the flags for the appropriate classes
    // of confusables according to UTS 39 section 4.
    // Start by computing the resolved script sets of id1 and id2.
    ScriptSet id1RSS;
    This->getResolvedScriptSet(id1, id1RSS, *status);
    ScriptSet id2RSS;
    This->getResolvedScriptSet(id2, id2RSS, *status);

    // Turn on all applicable flags
    int32_t result = 0;
    if (id1RSS.intersects(id2RSS)) {
        result |= USPOOF_SINGLE_SCRIPT_CONFUSABLE;
    } else {
        result |= USPOOF_MIXED_SCRIPT_CONFUSABLE;
        if (!id1RSS.isEmpty() && !id2RSS.isEmpty()) {
            result |= USPOOF_WHOLE_SCRIPT_CONFUSABLE;
        }
    }

    // Turn off flags that the user doesn't want
    if ((This->fChecks & USPOOF_SINGLE_SCRIPT_CONFUSABLE) == 0) {
        result &= ~USPOOF_SINGLE_SCRIPT_CONFUSABLE;
    }
    if ((This->fChecks & USPOOF_MIXED_SCRIPT_CONFUSABLE) == 0) {
        result &= ~USPOOF_MIXED_SCRIPT_CONFUSABLE;
    }
    if ((This->fChecks & USPOOF_WHOLE_SCRIPT_CONFUSABLE) == 0) {
        result &= ~USPOOF_WHOLE_SCRIPT_CONFUSABLE;
    }

    return result;
}
Пример #2
0
int spep::apache::RequestHandler::handleRequestInner( request_rec *req )
{
	if( !this->_spep->isStarted() )
	{
		return HTTP_SERVICE_UNAVAILABLE;
	}
	
	spep::SPEPConfigData *spepConfigData = this->_spep->getSPEPConfigData();
	
	char *properURI = apr_pstrdup( req->pool, req->parsed_uri.path );
	if( req->parsed_uri.query != NULL )
	{
		properURI = apr_psprintf( req->pool, "%s?%s", properURI, req->parsed_uri.query );
	}
	
	ap_unescape_url( properURI );
	
	Cookies cookies( req );
	std::vector<std::string> cookieValues;
	cookies.getCookieValuesByName( cookieValues, spepConfigData->getTokenName() );
	if( !cookieValues.empty() )
	{
		std::string sessionID;
		spep::PrincipalSession principalSession;
		bool validSession = false;

		// SPEP cookie was found, validate using one of the values and use that to proceed.
		for (std::vector<std::string>::iterator cookieValueIterator = cookieValues.begin();
			cookieValueIterator != cookieValues.end(); ++cookieValueIterator) {

			sessionID = *cookieValueIterator;
			try {
				principalSession = this->_spep->getAuthnProcessor()->verifySession( sessionID );
				validSession = true;
				break;
			} catch( std::exception& e ) {
			}
		}
		
		if( validSession )
		{
			// If attribute querying is not disabled...
			if( !this->_spep->getSPEPConfigData()->disableAttributeQuery() )
			{
				// Put attributes into the environment.
				
				std::string usernameAttribute = spepConfigData->getUsernameAttribute();
				
				std::string attributeValueSeparator = spepConfigData->getAttributeValueSeparator();
				std::string attributeNamePrefix = spepConfigData->getAttributeNamePrefix();
				for( spep::PrincipalSession::AttributeMapType::iterator attributeIterator = principalSession.getAttributeMap().begin();
					attributeIterator != principalSession.getAttributeMap().end();
					++attributeIterator )
				{
					
					std::string name = spep::UnicodeStringConversion::toString( attributeIterator->first );
					std::string envName = attributeNamePrefix + name;
					
					std::stringstream valueStream;
					bool first = true;
					for( std::vector<UnicodeString>::iterator attributeValueIterator = attributeIterator->second.begin(); 
						attributeValueIterator != attributeIterator->second.end(); 
						++attributeValueIterator )
					{
						std::string value = spep::UnicodeStringConversion::toString( *attributeValueIterator );
						
						if( first )
						{
							valueStream << value;
							first = false;
						}
						else
						{
							valueStream << attributeValueSeparator << value;
						}
					}
					
					std::string envValue = valueStream.str();
					
					// Insert the attribute name/value pair into the subprocess environment.
					apr_table_set( req->subprocess_env, envName.c_str(), envValue.c_str() );
					
					if( name.compare( usernameAttribute ) == 0 )
					{
#ifndef APACHE1
						req->user = apr_pstrdup( req->pool, envValue.c_str() );
#else
						req->connection->user = apr_pstrdup( req->pool, envValue.c_str() );
#endif
					}
				}
			}
			
			if( this->_spep->getSPEPConfigData()->disablePolicyEnforcement() )
			{
				// No need to perform authorization, just let them in.
				return DECLINED;
			}
			
			// Perform authorization on the URI requested.
			spep::PolicyEnforcementProcessorData pepData;
			pepData.setESOESessionID( principalSession.getESOESessionID() );
			pepData.setResource( properURI );
			
			this->_spep->getPolicyEnforcementProcessor()->makeAuthzDecision( pepData );
			spep::Decision authzDecision( pepData.getDecision() );
			
			validSession = false;
			try
			{
				principalSession = this->_spep->getAuthnProcessor()->verifySession( sessionID );
				validSession = true;
			}
			catch( std::exception& e )
			{
			}
			
			if( validSession )
			{
				if( authzDecision == spep::Decision::PERMIT )
				{
					return DECLINED;
				}
				else if( authzDecision == spep::Decision::DENY )
				{
					return HTTP_FORBIDDEN;
				}
				else if( authzDecision == spep::Decision::ERROR )
				{
					return HTTP_INTERNAL_SERVER_ERROR;
				}
				else
				{
					return HTTP_INTERNAL_SERVER_ERROR;
				}
			}
		}
	}
	
	// If we get to this stage, the session has not been authenticated. We proceed to clear the
	// cookies configured by the SPEP to be cleared upon logout, since this is potentially the
	// first time they have come back to the SPEP since logging out.
	
	bool requireSend = false;
	const std::vector<std::string>& logoutClearCookies = this->_spep->getSPEPConfigData()->getLogoutClearCookies();
	for( std::vector<std::string>::const_iterator logoutClearCookieIterator = logoutClearCookies.begin();
		logoutClearCookieIterator != logoutClearCookies.end();
		++logoutClearCookieIterator )
	{
		// Throw the configured string into a stringstream
		std::stringstream ss( *logoutClearCookieIterator );
		
		// Split into name, domain, path. Doc says that stringstream operator>> won't throw
		std::string cookieNameString, cookieDomainString, cookiePathString;
		ss >> cookieNameString >> cookieDomainString >> cookiePathString;

		// Default to NULL, and then check if they were specified
		const char *cookieName = NULL, *cookieDomain = NULL, *cookiePath = NULL;
		// No cookie name, no clear.
		if( cookieNameString.length() == 0 )
		{
			continue;
		}
		
		// If the user sent this cookie.
		Cookies cookies( req );
		std::vector<std::string> cookieValues;
		cookies.getCookieValuesByName( cookieValues, spepConfigData->getTokenName() );
		if( !cookieValues.empty() ) {
			cookieName = cookieNameString.c_str();
			
			if( cookieDomainString.length() > 0 )
			{
				cookieDomain = cookieDomainString.c_str();
			}
			
			if( cookiePathString.length() > 0 )
			{
				cookiePath = cookiePathString.c_str();
			}
			
			// Set the cookie to an empty value.
			cookies.addCookie( req, cookieName, "", cookiePath, cookieDomain, false );
			
			// Flag that we need to send the cookies, because we have set at least one.
			requireSend = true;
		}
	}
	
	if( requireSend )
	{
		cookies.sendCookies( req );
	}
	
	// Lazy init code.
	if( spepConfigData->isLazyInit() )
	{
		
		std::string globalESOECookieName( spepConfigData->getGlobalESOECookieName() );
		Cookies cookies( req );
		std::vector<std::string> cookieValues;
		cookies.getCookieValuesByName( cookieValues, globalESOECookieName );
		if( cookieValues.empty() ) {
			bool matchedLazyInitResource = false;
			UnicodeString properURIUnicode( spep::UnicodeStringConversion::toUnicodeString( properURI ) );
			
			std::vector<UnicodeString>::const_iterator lazyInitResourceIterator;
			for( lazyInitResourceIterator = spepConfigData->getLazyInitResources().begin();
				lazyInitResourceIterator != spepConfigData->getLazyInitResources().end();
				++lazyInitResourceIterator )
			{
				// TODO Opportunity for caching of compiled regex patterns is here.
				UParseError parseError;
				UErrorCode errorCode = U_ZERO_ERROR;
				// Perform the regular expression matching here.
				UBool result = RegexPattern::matches( *lazyInitResourceIterator, properURIUnicode, parseError, errorCode );
				
				if ( U_FAILURE( errorCode ) )
				{
					// TODO throw u_errorName( errorCode )
					return HTTP_INTERNAL_SERVER_ERROR;
				}
				
				// FALSE is defined by ICU. This line for portability.
				if (result != FALSE)
				{
					matchedLazyInitResource = true;
					break;
				}
			}
			
			if( matchedLazyInitResource )
			{
				if( !spepConfigData->isLazyInitDefaultPermit() )
				{
					return DECLINED;
				}
			}
			else
			{
				if( spepConfigData->isLazyInitDefaultPermit() )
				{
					return DECLINED;
				}
			}
		}
	}
	
	boost::posix_time::ptime epoch( boost::gregorian::date( 1970, 1, 1 ) );
	boost::posix_time::time_duration timestamp = boost::posix_time::microsec_clock::local_time() - epoch;
	boost::posix_time::time_duration::tick_type currentTimeMillis = timestamp.total_milliseconds();
	
	apr_uri_t *uri = static_cast<apr_uri_t*>( apr_pcalloc( req->pool, sizeof(apr_uri_t) ) );
	apr_uri_parse( req->pool, this->_spep->getSPEPConfigData()->getServiceHost().c_str(), uri );
	
	const char *hostname = apr_table_get( req->headers_in, "Host" );
	if( hostname == NULL )
	{
		hostname = req->server->server_hostname;
	}
	
	const char *format = NULL;
	const char *base64RequestURI = NULL;
	// If we can't determine our own hostname, just fall through to the service host.
	// If the service host was requested obviously we want that.
	if( hostname == NULL || std::strcmp( uri->hostinfo, hostname ) == 0 )
	{
		// Join the service hostname and requested URI to form the return URL
		char *returnURL = apr_psprintf( req->pool, "%s%s", 
				this->_spep->getSPEPConfigData()->getServiceHost().c_str(), req->unparsed_uri );
		
		// Base64 encode this so that the HTTP redirect doesn't corrupt it.
		base64RequestURI = ap_pbase64encode( req->pool, returnURL );
		
		// Create the format string for building the redirect URL.
		format = apr_psprintf( req->pool, "%s%s", this->_spep->getSPEPConfigData()->getServiceHost().c_str(), 
					this->_spep->getSPEPConfigData()->getSSORedirect().c_str() );
	}
	else
	{
		base64RequestURI = ap_pbase64encode( req->pool, req->unparsed_uri );
		// getSSORedirect() will only give us a temporary.. dup it into the pool so we don't lose it when we leave this scope.
		format = apr_pstrdup( req->pool, this->_spep->getSPEPConfigData()->getSSORedirect().c_str() );
	}
	
	char *redirectURL = apr_psprintf( req->pool, format, base64RequestURI );
	
	std::stringstream timestampParameter;
	if( strchr( redirectURL, '?' ) != NULL )
	{
		// Query string already exists.. append the timestamp as another parameter
		timestampParameter << "&ts=" << currentTimeMillis;
		redirectURL = apr_psprintf( req->pool, "%s%s", redirectURL, timestampParameter.str().c_str() );
	}
	else
	{
		// No query string. Add one with the timestamp as a parameter.
		timestampParameter << "?ts=" << currentTimeMillis;
		redirectURL = apr_psprintf( req->pool, "%s%s", redirectURL, timestampParameter.str().c_str() );
	}
	
	apr_table_setn( req->headers_out, "Location", redirectURL );
	return HTTP_MOVED_TEMPORARILY;
	
}
Пример #3
0
void
Package::writePackage(const char *filename, char outType, const char *comment) {
    char prefix[MAX_PKG_NAME_LENGTH+4];
    UDataOffsetTOCEntry entry;
    UDataSwapper *dsLocalToOut, *ds[TYPE_COUNT];
    FILE *file;
    Item *pItem;
    char *name;
    UErrorCode errorCode;
    int32_t i, length, prefixLength, maxItemLength, basenameOffset, offset, outInt32;
    uint8_t outCharset;
    UBool outIsBigEndian;

    extractPackageName(filename, prefix, MAX_PKG_NAME_LENGTH);

    // if there is an explicit comment, then use it, else use what's in the current header
    if(comment!=NULL) {
        /* get the header size minus the current comment */
        DataHeader *pHeader;
        int32_t length;

        pHeader=(DataHeader *)header;
        headerLength=4+pHeader->info.size;
        length=(int32_t)strlen(comment);
        if((int32_t)(headerLength+length)>=(int32_t)sizeof(header)) {
            fprintf(stderr, "icupkg: comment too long\n");
            exit(U_BUFFER_OVERFLOW_ERROR);
        }
        memcpy(header+headerLength, comment, length+1);
        headerLength+=length;
        if(headerLength&0xf) {
            /* NUL-pad the header to a multiple of 16 */
            length=(headerLength+0xf)&~0xf;
            memset(header+headerLength, 0, length-headerLength);
            headerLength=length;
        }
        pHeader->dataHeader.headerSize=(uint16_t)headerLength;
    }

    makeTypeProps(outType, outCharset, outIsBigEndian);

    // open (TYPE_COUNT-2) swappers
    // one is a no-op for local type==outType
    // one type (TYPE_LE) is bogus
    errorCode=U_ZERO_ERROR;
    i=makeTypeEnum(outType);
    ds[TYPE_B]= i==TYPE_B ? NULL : udata_openSwapper(TRUE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode);
    ds[TYPE_L]= i==TYPE_L ? NULL : udata_openSwapper(FALSE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode);
    ds[TYPE_LE]=NULL;
    ds[TYPE_E]= i==TYPE_E ? NULL : udata_openSwapper(TRUE, U_EBCDIC_FAMILY, outIsBigEndian, outCharset, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "icupkg: udata_openSwapper() failed - %s\n", u_errorName(errorCode));
        exit(errorCode);
    }
    for(i=0; i<TYPE_COUNT; ++i) {
        if(ds[i]!=NULL) {
            ds[i]->printError=printPackageError;
            ds[i]->printErrorContext=stderr;
        }
    }

    dsLocalToOut=ds[makeTypeEnum(U_CHARSET_FAMILY, U_IS_BIG_ENDIAN)];

    // create the file and write its contents
    file=fopen(filename, "wb");
    if(file==NULL) {
        fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename);
        exit(U_FILE_ACCESS_ERROR);
    }

    // swap and write the header
    if(dsLocalToOut!=NULL) {
        udata_swapDataHeader(dsLocalToOut, header, headerLength, header, &errorCode);
        if(U_FAILURE(errorCode)) {
            fprintf(stderr, "icupkg: udata_swapDataHeader(local to out) failed - %s\n", u_errorName(errorCode));
            exit(errorCode);
        }
    }
    length=(int32_t)fwrite(header, 1, headerLength, file);
    if(length!=headerLength) {
        fprintf(stderr, "icupkg: unable to write complete header to file \"%s\"\n", filename);
        exit(U_FILE_ACCESS_ERROR);
    }

    // prepare and swap the package name with a tree separator
    // for prepending to item names
    strcat(prefix, U_TREE_ENTRY_SEP_STRING);
    prefixLength=(int32_t)strlen(prefix);
    if(dsLocalToOut!=NULL) {
        dsLocalToOut->swapInvChars(dsLocalToOut, prefix, prefixLength, prefix, &errorCode);
        if(U_FAILURE(errorCode)) {
            fprintf(stderr, "icupkg: swapInvChars(output package name) failed - %s\n", u_errorName(errorCode));
            exit(errorCode);
        }

        // swap and sort the item names (sorting needs to be done in the output charset)
        dsLocalToOut->swapInvChars(dsLocalToOut, inStrings, inStringTop, inStrings, &errorCode);
        if(U_FAILURE(errorCode)) {
            fprintf(stderr, "icupkg: swapInvChars(item names) failed - %s\n", u_errorName(errorCode));
            exit(errorCode);
        }
        sortItems();
    }

    // create the output item names in sorted order, with the package name prepended to each
    for(i=0; i<itemCount; ++i) {
        length=(int32_t)strlen(items[i].name);
        name=allocString(FALSE, length+prefixLength);
        memcpy(name, prefix, prefixLength);
        memcpy(name+prefixLength, items[i].name, length+1);
        items[i].name=name;
    }

    // calculate offsets for item names and items, pad to 16-align items
    // align only the first item; each item's length is a multiple of 16
    basenameOffset=4+8*itemCount;
    offset=basenameOffset+outStringTop;
    if((length=(offset&15))!=0) {
        length=16-length;
        memset(allocString(FALSE, length-1), 0xaa, length);
        offset+=length;
    }

    // write the table of contents
    // first the itemCount
    outInt32=itemCount;
    if(dsLocalToOut!=NULL) {
        dsLocalToOut->swapArray32(dsLocalToOut, &outInt32, 4, &outInt32, &errorCode);
        if(U_FAILURE(errorCode)) {
            fprintf(stderr, "icupkg: swapArray32(item count) failed - %s\n", u_errorName(errorCode));
            exit(errorCode);
        }
    }
    length=(int32_t)fwrite(&outInt32, 1, 4, file);
    if(length!=4) {
        fprintf(stderr, "icupkg: unable to write complete item count to file \"%s\"\n", filename);
        exit(U_FILE_ACCESS_ERROR);
    }

    // then write the item entries (and collect the maxItemLength)
    maxItemLength=0;
    for(i=0; i<itemCount; ++i) {
        entry.nameOffset=(uint32_t)(basenameOffset+(items[i].name-outStrings));
        entry.dataOffset=(uint32_t)offset;
        if(dsLocalToOut!=NULL) {
            dsLocalToOut->swapArray32(dsLocalToOut, &entry, 8, &entry, &errorCode);
            if(U_FAILURE(errorCode)) {
                fprintf(stderr, "icupkg: swapArray32(item entry %ld) failed - %s\n", (long)i, u_errorName(errorCode));
                exit(errorCode);
            }
        }
        length=(int32_t)fwrite(&entry, 1, 8, file);
        if(length!=8) {
            fprintf(stderr, "icupkg: unable to write complete item entry %ld to file \"%s\"\n", (long)i, filename);
            exit(U_FILE_ACCESS_ERROR);
        }

        length=items[i].length;
        if(length>maxItemLength) {
            maxItemLength=length;
        }
        offset+=length;
    }

    // write the item names
    length=(int32_t)fwrite(outStrings, 1, outStringTop, file);
    if(length!=outStringTop) {
        fprintf(stderr, "icupkg: unable to write complete item names to file \"%s\"\n", filename);
        exit(U_FILE_ACCESS_ERROR);
    }

    // write the items
    for(pItem=items, i=0; i<itemCount; ++pItem, ++i) {
        int32_t type=makeTypeEnum(pItem->type);
        if(ds[type]!=NULL) {
            // swap each item from its platform properties to the desired ones
            udata_swap(
                ds[type],
                pItem->data, pItem->length, pItem->data,
                &errorCode);
            if(U_FAILURE(errorCode)) {
                fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)i, u_errorName(errorCode));
                exit(errorCode);
            }
        }
        length=(int32_t)fwrite(pItem->data, 1, pItem->length, file);
        if(length!=pItem->length) {
            fprintf(stderr, "icupkg: unable to write complete item %ld to file \"%s\"\n", (long)i, filename);
            exit(U_FILE_ACCESS_ERROR);
        }
    }

    if(ferror(file)) {
        fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename);
        exit(U_FILE_ACCESS_ERROR);
    }

    fclose(file);
    for(i=0; i<TYPE_COUNT; ++i) {
        udata_closeSwapper(ds[i]);
    }
}
Пример #4
0
static UBool U_CALLCONV
loadData(UStringPrepProfile* profile, 
         const char* path, 
         const char* name, 
         const char* type, 
         UErrorCode* errorCode) {
    /* load Unicode SPREP data from file */    
    UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
    UDataMemory *dataMemory;
    const int32_t *p=NULL;
    const uint8_t *pb;
    UVersionInfo normUnicodeVersion;
    int32_t normUniVer, sprepUniVer, normCorrVer;

    if(errorCode==NULL || U_FAILURE(*errorCode)) {
        return 0;
    }

    /* open the data outside the mutex block */
    //TODO: change the path
    dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode);
    if(U_FAILURE(*errorCode)) {
        return FALSE;
    }

    p=(const int32_t *)udata_getMemory(dataMemory);
    pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
    utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
    _sprepTrie.getFoldingOffset=getSPrepFoldingOffset;


    if(U_FAILURE(*errorCode)) {
        udata_close(dataMemory);
        return FALSE;
    }

    /* in the mutex block, set the data for this process */
    umtx_lock(&usprepMutex);
    if(profile->sprepData==NULL) {
        profile->sprepData=dataMemory;
        dataMemory=NULL;
        uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
        uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
    } else {
        p=(const int32_t *)udata_getMemory(profile->sprepData);
    }
    umtx_unlock(&usprepMutex);
    /* initialize some variables */
    profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
    
    u_getUnicodeVersion(normUnicodeVersion);
    normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) + 
                 (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
    sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) + 
                  (dataVersion[2] << 8 ) + (dataVersion[3]);
    normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
    
    if(U_FAILURE(*errorCode)){
        udata_close(dataMemory);
        return FALSE;
    }
    if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
        normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
        ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
      ){
        *errorCode = U_INVALID_FORMAT_ERROR;
        udata_close(dataMemory);
        return FALSE;
    }
    profile->isDataLoaded = TRUE;

    /* if a different thread set it first, then close the extra data */
    if(dataMemory!=NULL) {
        udata_close(dataMemory); /* NULL if it was set correctly */
    }


    return profile->isDataLoaded;
}
Пример #5
0
int main(int /* argc*/ , const char * /*argv*/ []) {
    UErrorCode status = U_ZERO_ERROR;
    int diffs = 0;
    int gbaddiffs =0;
    setup(status);
    if(U_FAILURE(status)) return 1;

    int expected = PROVIDER_COUNT;

    for(int l=0;l<LOCALE_COUNT;l++) {
        printf("\n");
        uint8_t oldBytes[200];
        int32_t oldLen = -1;
        for(int v=0;v<=expected;v++) {

            // Construct the locale ID
            char locID[200];
            strcpy(locID, locale[l]);
            if((v!=expected)) { // -1 = no version
                strcat(locID, "@sp=icu");
                strcat(locID, provider_version[v]);
            }
            
            printf("%-28s =  ", locID);
            
            UErrorCode subStatus = U_ZERO_ERROR;
            uint8_t bytes[200];
            uint8_t bytesb[200];
#define USE_CXX 0

#if USE_CXX
            Collator *col = Collator::createInstance(Locale(locID),subStatus);
            if(U_FAILURE(subStatus)) {
                printf("ERR: %s\n", u_errorName(subStatus));
                continue;
            }
            int32_t len = col->getSortKey(stuff, -1, bytes, 200);
#else
#if 1
            char xbuf2[200];
            strcpy(xbuf2,"X/");
            strcat(xbuf2,locID);
            strcat(xbuf2,"/");
            //printf(" -> %s\n", xbuf2);
            UCollator *col = ucol_openFromShortString(xbuf2, FALSE,NULL, &subStatus);
#else
            UCollator *col = ucol_open(locID, &subStatus);
#endif
            if(U_FAILURE(subStatus)) {
                printf("ERR: %s\n", u_errorName(subStatus));
                continue;
            }
            

            char xbuf3[200];
            {
              int32_t def = ucol_getShortDefinitionString(col,locID/*NULL*/,xbuf3,200,&subStatus);
              if(U_FAILURE(subStatus)) {
                printf("Err getting short string name: %s\n", u_errorName(subStatus));
              } else {
                printf(" --> %s\n", xbuf3);
              }              
            }

            int32_t len = ucol_getSortKey(col, stuff, -1, bytes, 200);
#endif

            printf("     ");

            int tdiffs=0;

            for(int i=0;i<len;i++) {
	      if(i<oldLen&&bytes[i]!=oldBytes[i]) {
                diffs++;
                printf("*");
              } else {
                printf(" ");
              }
              printf("%02X", (0xFF&bytes[i]));
            }
            printf("\n");

            char xbuf4[200];
            UCollator *col2 = ucol_openFromShortString(xbuf3, FALSE, NULL, &subStatus);
            if(U_FAILURE(subStatus)) {
              printf("Err opening from new short string : %s\n", u_errorName(subStatus));
              continue;
            } else {
              int32_t def4 = ucol_getShortDefinitionString(col,locID/*NULL*/,xbuf4,200,&subStatus);
              if(strcmp(xbuf4,xbuf3)) {
                printf(" --> reopened = %s (%s)\n", xbuf4, u_errorName(subStatus));
              }
            }
            int32_t len2 = ucol_getSortKey(col2, stuff, -1, bytesb, 200);

            int baddiffs=0;
            for(int i=0;i<len;i++) {
	      if(i<len&&bytes[i]!=bytesb[i]) {
                  baddiffs++;
                  printf("!");
                 } else {
                   // printf(" ");
                 }
                // printf("%02X", (0xFF&bytesb[i]));
            }
            if(baddiffs>0) {
              printf(" - ERR! Diffs from %s in %d places\n", xbuf2,baddiffs);
              gbaddiffs+=baddiffs;
            } else {
              //printf("  OK.\n");
            }
            //            printf("\n");

            

#if USE_CXX
            delete col;
#else
            ucol_close(col);
#endif

            oldLen = len;
            memcpy(oldBytes, bytes, len);
        }
    }

    if(diffs==0) {
#if (U_ICU_VERSION_MAJOR_NUM < 49)
      printf("ERROR: 0 differences found between platforms. ICU " U_ICU_VERSION " does not support collator plugins properly (not until 49)\n");
#else
      printf("ERROR: 0 differences found between platforms.. are the platforms installed? Try 'icuinfo -L'\n");
#endif
      return 1;
    } else {
      printf("%d differences found among provider versions!\n", diffs);
    }

    if(gbaddiffs>0) {
      printf("ERROR: %d diffs found between a collator and it's reopened (from shortstring) variant.\n", gbaddiffs);
      return 2;
    } else {
      printf("Collator and reopened (shortstring) are OK.\n");
    }

    printf("Success!\n");
    
    return 0;
}
Пример #6
0
bool f_intl_is_failure(int64 error_code) {
  if (U_FAILURE((UErrorCode)error_code)) return true;
  return false;
}
Пример #7
0
Variant php_intl_idn_to(CStrRef domain, Variant errorcode, int mode) {
  long option = 0;
  UChar* ustring = NULL;
  int ustring_len = 0;
  UErrorCode status;
  char     *converted_utf8 = NULL;
  int32_t   converted_utf8_len;
  UChar*    converted = NULL;
  int32_t   converted_ret_len;

  // Convert the string to UTF-16
  status = U_ZERO_ERROR;
  intl_convert_utf8_to_utf16(&ustring, &ustring_len,
      (char*)domain.data(), domain.size(), &status);
  if (U_FAILURE(status)) {
    free(ustring);
    errorcode = status;
    return false;
  }

  // Call the appropriate IDN function
  int converted_len = (ustring_len > 1) ? ustring_len : 1;
  for (;;) {
    UParseError parse_error;
    status = U_ZERO_ERROR;
    converted = (UChar*)malloc(sizeof(UChar)*converted_len);
    // If the malloc failed, bail out
    if (!converted) {
      free(ustring);
      errorcode = U_MEMORY_ALLOCATION_ERROR;
      return false;
    }
    if (mode == INTL_IDN_TO_ASCII) {
      converted_ret_len = uidna_IDNToASCII(ustring,
          ustring_len, converted, converted_len,
          (int32_t)option, &parse_error, &status);
    } else {
      converted_ret_len = uidna_IDNToUnicode(ustring,
          ustring_len, converted, converted_len,
          (int32_t)option, &parse_error, &status);
    }
    if (status != U_BUFFER_OVERFLOW_ERROR)
      break;
    // If we have a buffer overflow error, try again with a larger buffer
    free(converted);
    converted = NULL;
    converted_len = converted_len * 2;
  }
  free(ustring);
  if (U_FAILURE(status)) {
    free(converted);
    errorcode = status;
    return false;
  }

  // Convert the string back to UTF-8
  status = U_ZERO_ERROR;
  intl_convert_utf16_to_utf8(&converted_utf8, &converted_utf8_len,
      converted, converted_ret_len, &status);
  free(converted);
  if (U_FAILURE(status)) {
    free(converted_utf8);
    errorcode = status;
    return false;
  }

  // Return the string
  return String(converted_utf8, converted_utf8_len, AttachString);
}
Пример #8
0
void IntlPluralRules::initializePluralRules(ExecState& exec, JSValue locales, JSValue optionsValue)
{
    VM& vm = exec.vm();
    auto scope = DECLARE_THROW_SCOPE(vm);

    // 13.1.1 InitializePluralRules (pluralRules, locales, options)
    // https://tc39.github.io/ecma402/#sec-initializepluralrules
    Vector<String> requestedLocales = canonicalizeLocaleList(exec, locales);
    RETURN_IF_EXCEPTION(scope, void());

    JSObject* options;
    if (optionsValue.isUndefined())
        options = constructEmptyObject(&exec, exec.lexicalGlobalObject()->nullPrototypeObjectStructure());
    else {
        options = optionsValue.toObject(&exec);
        RETURN_IF_EXCEPTION(scope, void());
    }

    HashMap<String, String> localeOpt;
    String localeMatcher = intlStringOption(exec, options, vm.propertyNames->localeMatcher, { "lookup", "best fit" }, "localeMatcher must be either \"lookup\" or \"best fit\"", "best fit");
    RETURN_IF_EXCEPTION(scope, void());
    localeOpt.add(vm.propertyNames->localeMatcher.string(), localeMatcher);

    const HashSet<String> availableLocales = exec.jsCallee()->globalObject(vm)->intlNumberFormatAvailableLocales();
    HashMap<String, String> resolved = resolveLocale(exec, availableLocales, requestedLocales, localeOpt, nullptr, 0, IntlPRInternal::localeData);
    m_locale = resolved.get(vm.propertyNames->locale.string());
    if (m_locale.isEmpty()) {
        throwTypeError(&exec, scope, "failed to initialize PluralRules due to invalid locale"_s);
        return;
    }

    String typeString = intlStringOption(exec, options, Identifier::fromString(&vm, "type"), { "cardinal", "ordinal" }, "type must be \"cardinal\" or \"ordinal\"", "cardinal");
    RETURN_IF_EXCEPTION(scope, void());
    m_type = typeString == "ordinal" ? UPLURAL_TYPE_ORDINAL : UPLURAL_TYPE_CARDINAL;

    unsigned minimumIntegerDigits = intlNumberOption(exec, options, Identifier::fromString(&vm, "minimumIntegerDigits"), 1, 21, 1);
    RETURN_IF_EXCEPTION(scope, void());
    m_minimumIntegerDigits = minimumIntegerDigits;

    unsigned minimumFractionDigitsDefault = 0;
    unsigned minimumFractionDigits = intlNumberOption(exec, options, Identifier::fromString(&vm, "minimumFractionDigits"), 0, 20, minimumFractionDigitsDefault);
    RETURN_IF_EXCEPTION(scope, void());
    m_minimumFractionDigits = minimumFractionDigits;

    unsigned maximumFractionDigitsDefault = std::max(minimumFractionDigits, 3u);
    unsigned maximumFractionDigits = intlNumberOption(exec, options, Identifier::fromString(&vm, "maximumFractionDigits"), minimumFractionDigits, 20, maximumFractionDigitsDefault);
    RETURN_IF_EXCEPTION(scope, void());
    m_maximumFractionDigits = maximumFractionDigits;

    JSValue minimumSignificantDigitsValue = options->get(&exec, Identifier::fromString(&vm, "minimumSignificantDigits"));
    RETURN_IF_EXCEPTION(scope, void());

    JSValue maximumSignificantDigitsValue = options->get(&exec, Identifier::fromString(&vm, "maximumSignificantDigits"));
    RETURN_IF_EXCEPTION(scope, void());

    if (!minimumSignificantDigitsValue.isUndefined() || !maximumSignificantDigitsValue.isUndefined()) {
        unsigned minimumSignificantDigits = intlNumberOption(exec, options, Identifier::fromString(&vm, "minimumSignificantDigits"), 1, 21, 1);
        RETURN_IF_EXCEPTION(scope, void());
        unsigned maximumSignificantDigits = intlNumberOption(exec, options, Identifier::fromString(&vm, "maximumSignificantDigits"), minimumSignificantDigits, 21, 21);
        RETURN_IF_EXCEPTION(scope, void());
        m_minimumSignificantDigits = minimumSignificantDigits;
        m_maximumSignificantDigits = maximumSignificantDigits;
    }

    UErrorCode status = U_ZERO_ERROR;
    m_numberFormat = std::unique_ptr<UNumberFormat, UNumberFormatDeleter>(unum_open(UNUM_DECIMAL, nullptr, 0, m_locale.utf8().data(), nullptr, &status));
    if (U_FAILURE(status)) {
        throwTypeError(&exec, scope, "failed to initialize PluralRules"_s);
        return;
    }

    if (m_minimumSignificantDigits) {
        unum_setAttribute(m_numberFormat.get(), UNUM_SIGNIFICANT_DIGITS_USED, true);
        unum_setAttribute(m_numberFormat.get(), UNUM_MIN_SIGNIFICANT_DIGITS, m_minimumSignificantDigits.value());
        unum_setAttribute(m_numberFormat.get(), UNUM_MAX_SIGNIFICANT_DIGITS, m_maximumSignificantDigits.value());
    } else {
        unum_setAttribute(m_numberFormat.get(), UNUM_MIN_INTEGER_DIGITS, m_minimumIntegerDigits);
        unum_setAttribute(m_numberFormat.get(), UNUM_MIN_FRACTION_DIGITS, m_minimumFractionDigits);
        unum_setAttribute(m_numberFormat.get(), UNUM_MAX_FRACTION_DIGITS, m_maximumFractionDigits);
    }

    status = U_ZERO_ERROR;
    m_pluralRules = std::unique_ptr<UPluralRules, UPluralRulesDeleter>(uplrules_openForType(m_locale.utf8().data(), m_type, &status));
    if (U_FAILURE(status)) {
        throwTypeError(&exec, scope, "failed to initialize PluralRules"_s);
        return;
    }

    m_initializedPluralRules = true;
}
Пример #9
0
int icu_breakpoints(lua_State *L) {
  const char* input = luaL_checkstring(L, 1);
  int input_l = strlen(input);
  const char* locale = luaL_checkstring(L, 2);
  UChar *buffer;
  int32_t l, breakcount = 0;
  UErrorCode err = U_ZERO_ERROR;
  u_strFromUTF8(NULL, 0, &l, input, input_l, &err);
  /* Above call returns an error every time. */
  err = U_ZERO_ERROR;
  buffer = malloc(l * sizeof(UChar));
  u_strFromUTF8(buffer, l, &l, input, input_l, &err);

  UBreakIterator* wordbreaks, *linebreaks;
  int32_t i, previous;
  wordbreaks = ubrk_open(UBRK_WORD, locale, buffer, l, &err);
  if(U_FAILURE(err)) {
    luaL_error(L, "Word break parser failure: %s", u_errorName(err));
  }

  linebreaks = ubrk_open(UBRK_LINE, locale, buffer, l, &err);
  if(U_FAILURE(err)) {
    luaL_error(L, "Line break parser failure: %s", u_errorName(err));
  }

  previous = 0;
  i = 0;
  while (i <= l) {
    int32_t out_l;
    int32_t type;
    if (!ubrk_isBoundary(linebreaks, i) && !ubrk_isBoundary(wordbreaks,i)) {
      i++; continue;
    }
    lua_checkstack(L, 3);
    /* At some kind of boundary */
    lua_newtable(L);
    lua_pushstring(L, "type");
    lua_pushstring(L, ubrk_isBoundary(linebreaks,i) ? "line" : "word");
    lua_settable(L, -3);

    int32_t utf8_index = 0;
    err = U_ZERO_ERROR;
    u_strToUTF8(NULL, 0, &utf8_index, buffer, i, &err);
    assert(U_SUCCESS(err) || err == U_BUFFER_OVERFLOW_ERROR);

    lua_pushstring(L, "index");
    lua_pushinteger(L, utf8_index);
    lua_settable(L, -3);

    if (ubrk_isBoundary(linebreaks, i)) {
      lua_pushstring(L, "subtype");
      type = ubrk_getRuleStatus(linebreaks);
      if (type >= UBRK_LINE_SOFT && type < UBRK_LINE_SOFT_LIMIT) {
        lua_pushstring(L, "soft");
      } else {
        lua_pushstring(L, "hard");
      }
      lua_settable(L, -3);
    }
    lua_pushstring(L, "token");
    lua_pushlstring(L, input+previous, utf8_index-previous);

    lua_settable(L, -3);

    previous = utf8_index;
    breakcount++;
    i++;
  }
  ubrk_close(wordbreaks);
  ubrk_close(linebreaks);
  return breakcount;
}
Пример #10
0
static void TestGetSortKey() {
    /* This is meant to test a buffer reallocation crash while using
    French secondary sorting with a large buffer.
    The fact that Japanese characters are used is irrelevant. */
    static const UChar pucUTF16[] = {
        0x3049,0x30b9,0x3088,0xfffd,0xfffd,0x308f,0xfffd,0x3042,
        0xfffd,0xfffd,0x305e,0xfffd,0x30b6,0x30bb,0x305b,0x30b1,
        0x3050,0x30af,0x304e,0x30bd,0xfffd,0x30c6,0xfffd,0xfffd,
        0x30e1,0xfffd,0xfffd,0x30d9,0xfffd,0x3092,0x3075,0x304a,
        0x3074,0x3070,0x30f5,0x30c4,0x306e,0x30df,0x3053,0xfffd,
        0x30a6,0x30b6,0x30e0,0xfffd,0x30bc,0x30ef,0x3087,0x30cc,
        0x305f,0x30de,0xfffd,0x3090,0x3063,0x30dc,0x30b6,0x30b9,
        0x30d2,0x3072,0x3061,0xfffd,0xfffd,0xfffd,0x307b,0x3092,
        0x30a5,0x30a9,0x30b1,0x30e7,0xfffd,0xfffd,0xfffd,0xfffd,
        0xfffd,0x305e,0xfffd,0x30c7,0x30ae,0x305b,0x308b,0x30c0,
        0x30f5,0xfffd,0xfffd,0xfffd,0x307d,0x304e,0xfffd,0xfffd,
        0x30c0,0x30c8,0x306f,0x307a,0x30dd,0x30e4,0x3084,0xfffd,
        0x308c,0x30f1,0xfffd,0x30c6,0xfffd,0x307a,0xfffd,0x3052,
        0x3056,0x305d,0x30b7,0xfffd,0x305b,0x30b0,0x30b9,0xfffd,
        0x30b2,0x306d,0x3044,0xfffd,0x3073,0xfffd,0x30be,0x30cf,
        0x3080,0xfffd,0x30a8,0x30f5,0x30a5,0x30c7,0x307c,0xfffd,
        0x30d1,0x305f,0x30b2,0xfffd,0x3053,0x30ca,0xfffd,0x30dd,
        0x3058,0x30c0,0x305d,0x30e1,0xfffd,0x30bb,0x305f,0x30d1,
        0x30f2,0x3058,0x3086,0x30ce,0x30db,0x30cb,0x30e9,0xfffd,
        0x308c,0xfffd,0xfffd,0x30af,0x30c4,0x3076,0x304c,0x30f5,
        0x30e8,0x308c,0xfffd,0x30e2,0x3073,0x30a3,0x304e,0x30ea,
        0xfffd,0x304f,0xfffd,0x306c,0x3044,0xfffd,0xfffd,0x30c9,
        0xfffd,0x30f5,0xfffd,0xfffd,0xfffd,0x30eb,0x30a8,0xfffd,
        0x306d,0x307d,0x30d8,0x3069,0xfffd,0xfffd,0x3086,0x30a9,
        0xfffd,0x3076,0x30e9,0x30cc,0x3074,0x30e0,0xfffd,0xfffd,
        0xfffd,0x30f0,0x3086,0x30ac,0x3076,0x3068,0x30c7,0xfffd,
        0x30b7,0x30d2,0x3048,0x308e,0x30e8,0x30d9,0x30ce,0x30d0,
        0x308b,0x30ee,0x30e6,0x3079,0x30f3,0x30af,0xfffd,0x3079,
        0xfffd,0xfffd,0x30ca,0x30bf,0xfffd,0x30b5,0xfffd,0xfffd,
        0x3093,0xfffd,0x30ba,0xfffd,0x3076,0x3047,0x304a,0xfffd,
        0xfffd,0x3086,0xfffd,0x3081,0xfffd,0x30f6,0x3066,0xfffd,
        0xfffd,0x30b6,0x30ef,0x30e2,0x30bf,0xfffd,0x3053,0x304a,
        0xfffd,0xfffd,0x304a,0x30e8,0xfffd,0x30e2,0xfffd,0xfffd,
        0x305c,0x3081,0x30c6,0xfffd,0x3091,0x3046,0x306a,0x3059,
        0xfffd,0xfffd,0x30dd,0x30d1,0x308a,0x30ee,0xfffd,0xfffd,
        0x308a,0x3042,0x30da,0xfffd,0x3064,0x30ef,0x305c,0x306b,
        0xfffd,0x30ca,0x3085,0x3067,0x30ea,0x30c2,0x30c8,0xfffd,
        0x30f5,0xfffd,0xfffd,0xfffd,0x30ca,0xfffd,0x3050,0x30f1,
        0x3050,0x3053,0x3072,0xfffd,0xfffd,0xfffd,0x3074,0xfffd,
        0x304b,0x30dd,0x306d,0xfffd,0x3049,0x30a1,0x30cc,0x30de,
        0x30ae,0x307b,0x308a,0xfffd,0x3065,0xfffd,0xfffd,0x30c0,
        0xfffd,0x3048,0x30dc,0x304f,0x3085,0x3059,0x304b,0x30d3,
        0x30eb,0x30a4,0x3073,0xfffd,0x30ba,0x308f,0x30a7,0x30c3,
        0x3074,0x30cf,0x306c,0x3053,0x30c0,0xfffd,0x3066,0xfffd,
        0x308f,0xfffd,0x30b5,0xfffd,0x3092,0x30c4,0xfffd,0x30d6,
        0x3056,0x30ad,0x30d2,0x30ba,0xfffd,0x30e6,0x304c,0x3088,
        0x30b6,0x3048,0x3077,0x30d1,0xfffd,0x3050,0xfffd,0x3042,
        0xfffd,0xfffd,0x308f,0xfffd,0x30c1,0xfffd,0x3074,0x3061,
        0x3056,0x30e5,0xfffd,0xfffd,0x3057,0xfffd,0xfffd,0xfffd,
        0xfffd,0x30bd,0x30b3,0x30ee,0xfffd,0x30f2,0x3084,0x3050,
        0xfffd,0x30e7,0xfffd,0xfffd,0x3060,0x3049,0x30f2,0x30ad,
        0x30bf,0x30f1,0x30a2,0xfffd,0x30af,0xfffd,0x3060,0x30a1,
        0x30e9,0x30c3,0xfffd,0x3072,0x3093,0x3070,0xfffd,0x308f,
        0x3060,0xfffd,0x3067,0x306f,0x3082,0x308b,0x3051,0xfffd,
        0x3058,0xfffd,0xfffd,0x30a8,0x3051,0x3054,0x30ad,0x30f0,
        0x3053,0xfffd,0x30e1,0x30d7,0x308d,0x307f,0x30be,0x30b0,
        0xfffd,0x30db,0xfffd,0x30d1,0xfffd,0x3054,0x30a5,0xfffd,
        0x306a,0xfffd,0x305c,0xfffd,0x3052,0x3088,0xfffd,0x306e,
        0xfffd,0x30a9,0x30a1,0x30b4,0x3083,0x30bd,0xfffd,0xfffd,
        0x306a,0x3070,0x30cd,0xfffd,0x3072,0x30ed,0x30c6,0x30be,
        0x30c4,0x305e,0x30b3,0x30e1,0x308a,0xfffd,0x305b,0xfffd,
        0x3042,0x3088,0xfffd,0x304c,0xfffd,0x3089,0x3071,0xfffd,
        0xfffd,0x30c6,0x3062,0x3079,0xfffd,0x304b,0x304a,0xfffd,
        0x30ad,0x3045,0x3045,0x3087,0xfffd,0x306a,0x308b,0x0000,
        0x30bd,0x3065,0x30b8,0x3086,0x30d3,0x3076,0xfffd,0xfffd,
        0x308f,0x3053,0x307c,0x3053,0x3084,0x30ae,0x30c4,0x3045,
        0x30a8,0x30d0,0x30e1,0x308c,0x30e6,0x30b7,0xfffd,0xfffd,
        0xfffd,0x3046,0x305f,0xfffd,0x3086,0x30ab,0xfffd,0xfffd,
        0x30c8,0xfffd,0x30a1,0x3052,0x3059,0xfffd,0x30a4,0xfffd,
        0xfffd,0x308c,0x3085,0x30ab,0x30b5,0x3091,0x30bf,0x30e3,
        0xfffd,0xfffd,0x3087,0xfffd,0x30f6,0x3051,0x30bd,0x3092,
        0x3063,0xfffd,0x30a9,0x3063,0x306e,0xfffd,0xfffd,0xfffd,
        0x306c,0xfffd,0x307e,0x30ad,0x3077,0x30c2,0x30e9,0x30d5,
        0xfffd,0xfffd,0x30c6,0x305c,0xfffd,0xfffd,0x3089,0xfffd,
        0x3048,0x30cb,0x308c,0xfffd,0xfffd,0x3044,0xfffd,0x3080,
        0x3063,0x3079,0xfffd,0x308a,0x30cb,0x3042,0x3057,0xfffd,
        0x307c,0x30c1,0x30a8,0x30cf,0xfffd,0x3083,0xfffd,0xfffd,
        0x306c,0xfffd,0x305e,0x3092,0xfffd,0x30dc,0x30b0,0x3081,
        0x30e3,0x30f0,0x304e,0x30cc,0x308e,0x30c4,0x30ad
    };

    UErrorCode status = U_ZERO_ERROR;
    UCollator *pCollator;
    int32_t lenActualSortKey;
    uint8_t pucSortKey[4096];
    static const int32_t LENSORTKEY = (int32_t)sizeof(pucSortKey);

    ucol_prepareShortStringOpen("LFR_AN_CX_EX_FO_HX_NX_S3", 0, NULL, &status);

    pCollator = ucol_openFromShortString("LFR_AN_CX_EX_FO_HX_NX_S3", 0, NULL, &status);

    if (U_FAILURE(status)) {
        log_data_err("error opening collator -> %s. (Are you missing data?)\n", u_errorName(status));
        return;
    }

    lenActualSortKey = ucol_getSortKey(pCollator,
        (const UChar *)pucUTF16,
        sizeof(pucUTF16) / sizeof(pucUTF16[0]),
        pucSortKey,
        LENSORTKEY);

    if (lenActualSortKey > LENSORTKEY) {
        log_err("sort key too big for original buffer. Got: %d Expected: %d\n", lenActualSortKey, LENSORTKEY);
        return;
    }
    /* If the test didn't crash, then the test succeeded. */
    ucol_close(pCollator);
}
Пример #11
0
//--------------------------------------------------------------------------
//
//    Assignment Operator
//
//--------------------------------------------------------------------------
RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
    if (this == &other) {
        // Source and destination are the same.  Don't do anything.
        return *this;
    }

    // Clean out any previous contents of object being assigned to.
    zap();

    // Give target object a default initialization
    init();

    // Copy simple fields
    fDeferredStatus   = other.fDeferredStatus;

    if (U_FAILURE(fDeferredStatus)) {
        return *this;
    }

    if (other.fPatternString == NULL) {
        fPatternString = NULL;
        fPattern = utext_clone(fPattern, other.fPattern, FALSE, TRUE, &fDeferredStatus);
    } else {
        fPatternString = new UnicodeString(*(other.fPatternString));
        if (fPatternString == NULL) {
            fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
        } else {
            fPattern = utext_openConstUnicodeString(NULL, fPatternString, &fDeferredStatus);
        }
    }
    if (U_FAILURE(fDeferredStatus)) {
        return *this;
    }

    fFlags            = other.fFlags;
    fLiteralText      = other.fLiteralText;
    fMinMatchLen      = other.fMinMatchLen;
    fFrameSize        = other.fFrameSize;
    fDataSize         = other.fDataSize;
    fStaticSets       = other.fStaticSets;
    fStaticSets8      = other.fStaticSets8;

    fStartType        = other.fStartType;
    fInitialStringIdx = other.fInitialStringIdx;
    fInitialStringLen = other.fInitialStringLen;
    *fInitialChars    = *other.fInitialChars;
    fInitialChar      = other.fInitialChar;
    *fInitialChars8   = *other.fInitialChars8;
    fNeedsAltInput    = other.fNeedsAltInput;

    //  Copy the pattern.  It's just values, nothing deep to copy.
    fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus);
    fGroupMap->assign(*other.fGroupMap, fDeferredStatus);

    //  Copy the Unicode Sets.
    //    Could be made more efficient if the sets were reference counted and shared,
    //    but I doubt that pattern copying will be particularly common.
    //    Note:  init() already added an empty element zero to fSets
    int32_t i;
    int32_t  numSets = other.fSets->size();
    fSets8 = new Regex8BitSet[numSets];
    if (fSets8 == NULL) {
    	fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
    	return *this;
    }
    for (i=1; i<numSets; i++) {
        if (U_FAILURE(fDeferredStatus)) {
            return *this;
        }
        UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i);
        UnicodeSet *newSet    = new UnicodeSet(*sourceSet);
        if (newSet == NULL) {
            fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
            break;
        }
        fSets->addElement(newSet, fDeferredStatus);
        fSets8[i] = other.fSets8[i];
    }

    // Copy the named capture group hash map.
    int32_t hashPos = UHASH_FIRST;
    while (const UHashElement *hashEl = uhash_nextElement(other.fNamedCaptureMap, &hashPos)) {
        if (U_FAILURE(fDeferredStatus)) {
            break;
        }
        const UnicodeString *name = (const UnicodeString *)hashEl->key.pointer;
        UnicodeString *key = new UnicodeString(*name);
        int32_t val = hashEl->value.integer;
        if (key == NULL) {
            fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
        } else {
            uhash_puti(fNamedCaptureMap, key, val, &fDeferredStatus);
        }
    }
    return *this;
}
Пример #12
0
U_CAPI const USet* U_EXPORT2
uspoof_getCheckResultNumerics(const USpoofCheckResult *checkResult, UErrorCode *status) {
    const CheckResult* This = CheckResult::validateThis(checkResult, *status);
    if (U_FAILURE(*status)) { return NULL; }
    return This->fNumerics.toUSet();
}
Пример #13
0
U_CAPI URestrictionLevel U_EXPORT2
uspoof_getCheckResultRestrictionLevel(const USpoofCheckResult *checkResult, UErrorCode *status) {
    const CheckResult* This = CheckResult::validateThis(checkResult, *status);
    if (U_FAILURE(*status)) { return USPOOF_UNRESTRICTIVE; }
    return This->fRestrictionLevel;
}
Пример #14
0
U_CAPI int32_t U_EXPORT2
uspoof_getCheckResultChecks(const USpoofCheckResult *checkResult, UErrorCode *status) {
    const CheckResult* This = CheckResult::validateThis(checkResult, *status);
    if (U_FAILURE(*status)) { return 0; }
    return This->fChecks;
}
Пример #15
0
const LanguageBreakEngine *
ICULanguageBreakFactory::loadEngineFor(UChar32 c, int32_t breakType) {
    UErrorCode status = U_ZERO_ERROR;
    UScriptCode code = uscript_getScript(c, &status);
    if (U_SUCCESS(status)) {
        DictionaryMatcher *m = loadDictionaryMatcherFor(code, breakType);
        if (m != NULL) {
            const LanguageBreakEngine *engine = NULL;
            switch(code) {
            case USCRIPT_THAI:
                engine = new ThaiBreakEngine(m, status);
                break;
            case USCRIPT_LAO:
                engine = new LaoBreakEngine(m, status);
                break;
            case USCRIPT_MYANMAR:
                engine = new BurmeseBreakEngine(m, status);
                break;
            case USCRIPT_KHMER:
                engine = new KhmerBreakEngine(m, status);
                break;

#if !UCONFIG_NO_NORMALIZATION
                // CJK not available w/o normalization
            case USCRIPT_HANGUL:
                engine = new CjkBreakEngine(m, kKorean, status);
                break;

            // use same BreakEngine and dictionary for both Chinese and Japanese
            case USCRIPT_HIRAGANA:
            case USCRIPT_KATAKANA:
            case USCRIPT_HAN:
                engine = new CjkBreakEngine(m, kChineseJapanese, status);
                break;
#if 0
            // TODO: Have to get some characters with script=common handled
            // by CjkBreakEngine (e.g. U+309B). Simply subjecting
            // them to CjkBreakEngine does not work. The engine has to
            // special-case them.
            case USCRIPT_COMMON:
            {
                UBlockCode block = ublock_getCode(code);
                if (block == UBLOCK_HIRAGANA || block == UBLOCK_KATAKANA)
                   engine = new CjkBreakEngine(dict, kChineseJapanese, status);
                break;
            }
#endif
#endif

            default:
                break;
            }
            if (engine == NULL) {
                delete m;
            }
            else if (U_FAILURE(status)) {
                delete engine;
                engine = NULL;
            }
            return engine;
        }
    }
    return NULL;
}
Пример #16
0
int main(int argc, char* argv[])
{
    ConvData data;
    char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH];

    U_MAIN_INIT_ARGS(argc, argv);

    /* Set up the ICU version number */
    UVersionInfo icuVersion;
    u_getVersion(icuVersion);
    uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo));

    /* preset then read command line options */
    options[OPT_DESTDIR].value=u_getDataDirectory();
    argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);

    /* error handling, printing usage message */
    if(argc<0) {
        fprintf(stderr,
            "error in command line argument \"%s\"\n",
            argv[-argc]);
    } else if(argc<2) {
        argc=-1;
    }
    if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) {
        FILE *stdfile=argc<0 ? stderr : stdout;
        fprintf(stdfile,
            "usage: %s [-options] files...\n"
            "\tread .ucm codepage mapping files and write .cnv files\n"
            "options:\n"
            "\t-h or -? or --help  this usage text\n"
            "\t-V or --version     show a version message\n"
            "\t-c or --copyright   include a copyright notice\n"
            "\t-d or --destdir     destination directory, followed by the path\n"
            "\t-v or --verbose     Turn on verbose output\n"
            "\t-q or --quiet       do not display warnings and progress\n",
            argv[0]);
        fprintf(stdfile,
            "\t      --small       Generate smaller .cnv files. They will be\n"
            "\t                    significantly smaller but may not be compatible with\n"
            "\t                    older versions of ICU and will require heap memory\n"
            "\t                    allocation when loaded.\n"
            "\t      --ignore-siso-check         Use SI/SO other than 0xf/0xe.\n");
        return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
    }

    if(options[OPT_VERSION].doesOccur) {
        printf("makeconv version %u.%u, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
               dataInfo.formatVersion[0], dataInfo.formatVersion[1]);
        printf("%s\n", U_COPYRIGHT_STRING);
        exit(0);
    }

    /* get the options values */
    haveCopyright = options[OPT_COPYRIGHT].doesOccur;
    const char *destdir = options[OPT_DESTDIR].value;
    VERBOSE = options[OPT_VERBOSE].doesOccur;
    QUIET = options[OPT_QUIET].doesOccur;
    SMALL = options[OPT_SMALL].doesOccur;

    if (options[OPT_IGNORE_SISO_CHECK].doesOccur) {
        IGNORE_SISO_CHECK = TRUE;
    }

    icu::CharString outFileName;
    UErrorCode err = U_ZERO_ERROR;
    if (destdir != NULL && *destdir != 0) {
        outFileName.append(destdir, err).ensureEndsWithFileSeparator(err);
        if (U_FAILURE(err)) {
            return err;
        }
    }
    int32_t outBasenameStart = outFileName.length();

#if DEBUG
    {
      int i;
      printf("makeconv: processing %d files...\n", argc - 1);
      for(i=1; i<argc; ++i) {
        printf("%s ", argv[i]);
      }
      printf("\n");
      fflush(stdout);
    }
#endif

    UBool printFilename = (UBool) (argc > 2 || VERBOSE);
    for (++argv; --argc; ++argv)
    {
        UErrorCode localError = U_ZERO_ERROR;
        const char *arg = getLongPathname(*argv);

        /*produces the right destination path for display*/
        outFileName.truncate(outBasenameStart);
        if (outBasenameStart != 0)
        {
            /* find the last file sepator */
            const char *basename = findBasename(arg);
            outFileName.append(basename, localError);
        }
        else
        {
            outFileName.append(arg, localError);
        }
        if (U_FAILURE(localError)) {
            return localError;
        }

        /*removes the extension if any is found*/
        int32_t lastDotIndex = outFileName.lastIndexOf('.');
        if (lastDotIndex >= outBasenameStart) {
            outFileName.truncate(lastDotIndex);
        }

        /* the basename without extension is the converter name */
        if ((outFileName.length() - outBasenameStart) >= UPRV_LENGTHOF(cnvName)) {
            fprintf(stderr, "converter name %s too long\n", outFileName.data() + outBasenameStart);
            return U_BUFFER_OVERFLOW_ERROR;
        }
        uprv_strcpy(cnvName, outFileName.data() + outBasenameStart);

        /*Adds the target extension*/
        outFileName.append(CONVERTER_FILE_EXTENSION, localError);
        if (U_FAILURE(localError)) {
            return localError;
        }

#if DEBUG
        printf("makeconv: processing %s  ...\n", arg);
        fflush(stdout);
#endif
        initConvData(&data);
        createConverter(&data, arg, &localError);

        if (U_FAILURE(localError))
        {
            /* if an error is found, print out an error msg and keep going */
            fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n",
                    outFileName.data(), arg, u_errorName(localError));
            if(U_SUCCESS(err)) {
                err = localError;
            }
        }
        else
        {
            /* Insure the static data name matches the  file name */
            /* Changed to ignore directory and only compare base name
             LDH 1/2/08*/
            char *p;
            p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator */

            if(p == NULL)            /* OK, try alternate */
            {
                p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR);
                if(p == NULL)
                {
                    p=cnvName; /* If no separators, no problem */
                }
            }
            else
            {
                p++;   /* If found separator, don't include it in compare */
            }
            if(uprv_stricmp(p,data.staticData.name) && !QUIET)
            {
                fprintf(stderr, "Warning: %s%s claims to be '%s'\n",
                    cnvName,  CONVERTER_FILE_EXTENSION,
                    data.staticData.name);
            }

            uprv_strcpy((char*)data.staticData.name, cnvName);

            if(!uprv_isInvariantString((char*)data.staticData.name, -1)) {
                fprintf(stderr,
                    "Error: A converter name must contain only invariant characters.\n"
                    "%s is not a valid converter name.\n",
                    data.staticData.name);
                if(U_SUCCESS(err)) {
                    err = U_INVALID_TABLE_FORMAT;
                }
            }

            localError = U_ZERO_ERROR;
            writeConverterData(&data, cnvName, destdir, &localError);

            if(U_FAILURE(localError))
            {
                /* if an error is found, print out an error msg and keep going*/
                fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName.data(), arg,
                    u_errorName(localError));
                if(U_SUCCESS(err)) {
                    err = localError;
                }
            }
            else if (printFilename)
            {
                puts(outFileName.data() + outBasenameStart);
            }
        }
        fflush(stdout);
        fflush(stderr);

        cleanupConvData(&data);
    }

    return err;
}
Пример #17
0
bool c_Collator::t_sortwithsortkeys(Variant arr) {
  INSTANCE_METHOD_INJECTION_BUILTIN(Collator, Collator::sortwithsortkeys);
  char*       sortKeyBuf = NULL; /* buffer to store sort keys */
  int32_t     sortKeyBufSize = DEF_SORT_KEYS_BUF_SIZE; /* buffer size */
  ptrdiff_t   sortKeyBufOffset = 0; /* pos in buffer to store sort key */
  int32_t     sortKeyLen = 0; /* the length of currently processing key */
  int32_t     bufLeft = 0;
  int32_t     bufIncrement = 0;

  /* buffer to store 'indexes' which will be passed to 'qsort' */
  collator_sort_key_index_t* sortKeyIndxBuf = NULL;
  int32_t     sortKeyIndxBufSize   = DEF_SORT_KEYS_INDX_BUF_SIZE;
  int32_t     sortKeyIndxSize      = sizeof( collator_sort_key_index_t );

  int32_t     sortKeyCount         = 0;
  int32_t     j                    = 0;

  /* tmp buffer to hold current processing string in utf-16 */
  UChar*      utf16_buf            = NULL;
  /* the length of utf16_buf */
  int         utf16_buf_size       = DEF_UTF16_BUF_SIZE;
  /* length of converted string */
  int         utf16_len            = 0;

  m_errcode.clear();
  s_intl_error->m_error.clear();

  /*
   * Sort specified array.
   */
  if (!arr.isArray()) {
    return true;
  }
  Array hash = arr.toArray();
  if (hash.size() == 0) {
    return true;
  }

  /* Create bufers */
  sortKeyBuf     = (char*)calloc(sortKeyBufSize, sizeof(char));
  sortKeyIndxBuf = (collator_sort_key_index_t*)malloc(sortKeyIndxBufSize);
  utf16_buf      = (UChar*)malloc(utf16_buf_size);

  /* Iterate through input hash and create a sort key for each value. */
  for (ssize_t pos = hash->iter_begin(); pos != ArrayData::invalid_index;
       pos = hash->iter_advance(pos)) {
    /* Convert current hash item from UTF-8 to UTF-16LE and save the result
     * to utf16_buf. */
    utf16_len = utf16_buf_size;
    /* Process string values only. */
    Variant val(hash->getValue(pos));
    if (val.isString()) {
      String str = val.toString();
      intl_convert_utf8_to_utf16(&utf16_buf, &utf16_len, str.data(),
                                 str.size(), &(m_errcode.code));
      if (U_FAILURE(m_errcode.code)) {
        m_errcode.custom_error_message = "Sort with sort keys failed";
        if (utf16_buf) {
          free(utf16_buf);
        }
        free(sortKeyIndxBuf);
        free(sortKeyBuf);
        return false;
      }
    } else {
      /* Set empty string */
      utf16_len = 0;
      utf16_buf[utf16_len] = 0;
    }

    if ((utf16_len + 1) > utf16_buf_size) {
      utf16_buf_size = utf16_len + 1;
    }

    /* Get sort key, reallocating the buffer if needed. */
    bufLeft = sortKeyBufSize - sortKeyBufOffset;

    sortKeyLen = ucol_getSortKey(m_ucoll,
                    utf16_buf,
                    utf16_len,
                    (uint8_t*)sortKeyBuf + sortKeyBufOffset,
                    bufLeft);

    /* check for sortKeyBuf overflow, increasing its size of the buffer if
       needed */
    if (sortKeyLen > bufLeft) {
      bufIncrement = ( sortKeyLen > DEF_SORT_KEYS_BUF_INCREMENT ) ?
        sortKeyLen : DEF_SORT_KEYS_BUF_INCREMENT;
      sortKeyBufSize += bufIncrement;
      bufLeft += bufIncrement;
      sortKeyBuf = (char*)realloc(sortKeyBuf, sortKeyBufSize);
      sortKeyLen = ucol_getSortKey(m_ucoll, utf16_buf, utf16_len,
                                   (uint8_t*)sortKeyBuf + sortKeyBufOffset,
                                   bufLeft);
    }

    /* check sortKeyIndxBuf overflow, increasing its size of the buffer if
       needed */
    if ((sortKeyCount + 1) * sortKeyIndxSize > sortKeyIndxBufSize) {
      bufIncrement = (sortKeyIndxSize > DEF_SORT_KEYS_INDX_BUF_INCREMENT) ?
        sortKeyIndxSize : DEF_SORT_KEYS_INDX_BUF_INCREMENT;
      sortKeyIndxBufSize += bufIncrement;
      sortKeyIndxBuf = (collator_sort_key_index_t*)realloc(sortKeyIndxBuf,
                                                           sortKeyIndxBufSize);
    }
    sortKeyIndxBuf[sortKeyCount].key = (char*)sortKeyBufOffset;
    sortKeyIndxBuf[sortKeyCount].valPos = pos;
    sortKeyBufOffset += sortKeyLen;
    ++sortKeyCount;
  }

  /* update ptrs to point to valid keys. */
  for( j = 0; j < sortKeyCount; j++ )
    sortKeyIndxBuf[j].key = sortKeyBuf + (ptrdiff_t)sortKeyIndxBuf[j].key;

  /* sort it */
  zend_qsort(sortKeyIndxBuf, sortKeyCount, sortKeyIndxSize,
             collator_cmp_sort_keys, NULL);

  /* for resulting hash we'll assign new hash keys rather then reordering */
  Array sortedHash = Array::Create();

  for (j = 0; j < sortKeyCount; j++) {
    sortedHash.append(hash->getValue(sortKeyIndxBuf[j].valPos));
  }

  /* Save sorted hash into return variable. */
  arr = sortedHash;

  if (utf16_buf)
    free(utf16_buf);

  free(sortKeyIndxBuf);
  free(sortKeyBuf);

  return true;
}
Пример #18
0
static void
readHeader(ConvData *data,
           FileStream* convFile,
           UErrorCode *pErrorCode) {
    char line[1024];
    char *s, *key, *value;
    const UConverterStaticData *prototype;
    UConverterStaticData *staticData;

    if(U_FAILURE(*pErrorCode)) {
        return;
    }

    staticData=&data->staticData;
    staticData->platform=UCNV_IBM;
    staticData->subCharLen=0;

    while(T_FileStream_readLine(convFile, line, sizeof(line))) {
        /* basic parsing and handling of state-related items */
        if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) {
            continue;
        }

        /* stop at the beginning of the mapping section */
        if(uprv_strcmp(line, "CHARMAP")==0) {
            break;
        }

        /* collect the information from the header field, ignore unknown keys */
        if(uprv_strcmp(key, "code_set_name")==0) {
            if(*value!=0) {
                uprv_strcpy((char *)staticData->name, value);
                getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage);
            }
        } else if(uprv_strcmp(key, "subchar")==0) {
            uint8_t bytes[UCNV_EXT_MAX_BYTES];
            int8_t length;

            s=value;
            length=ucm_parseBytes(bytes, line, (const char **)&s);
            if(1<=length && length<=4 && *s==0) {
                staticData->subCharLen=length;
                uprv_memcpy(staticData->subChar, bytes, length);
            } else {
                fprintf(stderr, "error: illegal <subchar> %s\n", value);
                *pErrorCode=U_INVALID_TABLE_FORMAT;
                return;
            }
        } else if(uprv_strcmp(key, "subchar1")==0) {
            uint8_t bytes[UCNV_EXT_MAX_BYTES];

            s=value;
            if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) {
                staticData->subChar1=bytes[0];
            } else {
                fprintf(stderr, "error: illegal <subchar1> %s\n", value);
                *pErrorCode=U_INVALID_TABLE_FORMAT;
                return;
            }
        }
    }

    /* copy values from the UCMFile to the static data */
    staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength;
    staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength;
    staticData->conversionType=data->ucm->states.conversionType;

    if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) {
        fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n");
        *pErrorCode=U_INVALID_TABLE_FORMAT;
        return;
    }

    /*
     * Now that we know the type, copy any 'default' values from the table.
     * We need not check the type any further because the parser only
     * recognizes what we have prototypes for.
     *
     * For delta (extension-only) tables, copy values from the base file
     * instead, see createConverter().
     */
    if(data->ucm->baseName[0]==0) {
        prototype=ucnv_converterStaticData[staticData->conversionType];
        if(prototype!=NULL) {
            if(staticData->name[0]==0) {
                uprv_strcpy((char *)staticData->name, prototype->name);
            }

            if(staticData->codepage==0) {
                staticData->codepage=prototype->codepage;
            }

            if(staticData->platform==0) {
                staticData->platform=prototype->platform;
            }

            if(staticData->minBytesPerChar==0) {
                staticData->minBytesPerChar=prototype->minBytesPerChar;
            }

            if(staticData->maxBytesPerChar==0) {
                staticData->maxBytesPerChar=prototype->maxBytesPerChar;
            }

            if(staticData->subCharLen==0) {
                staticData->subCharLen=prototype->subCharLen;
                if(prototype->subCharLen>0) {
                    uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen);
                }
            }
        }
    }

    if(data->ucm->states.outputType<0) {
        data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1;
    }

    if( staticData->subChar1!=0 &&
            (staticData->minBytesPerChar>1 ||
                (staticData->conversionType!=UCNV_MBCS &&
                 staticData->conversionType!=UCNV_EBCDIC_STATEFUL))
    ) {
        fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n");
        *pErrorCode=U_INVALID_TABLE_FORMAT;
    }
}
Пример #19
0
Variant c_Normalizer::ti_normalize(const char* cls , CStrRef input,
                                   int64 form /* = q_Normalizer_FORM_C */) {
  STATIC_METHOD_INJECTION_BUILTIN(Normalizer, Normalizer::normalize);
  s_intl_error->m_error.clear();

  int expansion_factor = 1;
  switch(form) {
  case UNORM_NONE:
  case UNORM_NFC:
  case UNORM_NFKC:
    break;
  case UNORM_NFD:
  case UNORM_NFKD:
    expansion_factor = 3;
    break;
  default:
    s_intl_error->m_error.code = U_ILLEGAL_ARGUMENT_ERROR;
    s_intl_error->m_error.custom_error_message =
      "normalizer_normalize: illegal normalization form";
    return null;
  }

  /* First convert the string to UTF-16. */
  UChar* uinput = NULL; int uinput_len = 0;
  UErrorCode status = U_ZERO_ERROR;
  intl_convert_utf8_to_utf16(&uinput, &uinput_len, input.data(), input.size(),
                             &status);

  if (U_FAILURE(status)) {
    s_intl_error->m_error.code = status;
    s_intl_error->m_error.custom_error_message =
        "Error converting string to UTF-16.";
    free(uinput);
    return null;
  }

  /* Allocate memory for the destination buffer for normalization */
  int uret_len = uinput_len * expansion_factor;
  UChar *uret_buf = (UChar*)malloc((uret_len + 1) * sizeof(UChar));

  /* normalize */
  int size_needed = unorm_normalize(uinput, uinput_len,
                                    (UNormalizationMode)form, (int32_t) 0,
                                    uret_buf, uret_len, &status);

  /* Bail out if an unexpected error occured.
   * (U_BUFFER_OVERFLOW_ERROR means that *target buffer is not large enough).
   * (U_STRING_NOT_TERMINATED_WARNING usually means that the input string
   * is empty).
   */
  if (U_FAILURE(status) &&
      status != U_BUFFER_OVERFLOW_ERROR &&
      status != U_STRING_NOT_TERMINATED_WARNING) {
    free(uret_buf);
    free(uinput);
    return null;
  }

  if (size_needed > uret_len) {
    /* realloc does not seem to work properly - memory is corrupted
     * uret_buf =  eurealloc(uret_buf, size_needed + 1); */
    free(uret_buf);
    uret_buf = (UChar*)malloc((size_needed + 1) * sizeof(UChar));
    uret_len = size_needed;

    status = U_ZERO_ERROR;

    /* try normalize again */
    size_needed = unorm_normalize( uinput, uinput_len,
                                   (UNormalizationMode)form, (int32_t) 0,
                                   uret_buf, uret_len, &status);

    /* Bail out if an unexpected error occured. */
    if (U_FAILURE(status)) {
      /* Set error messages. */
      s_intl_error->m_error.code = status;
      s_intl_error->m_error.custom_error_message = "Error normalizing string";
      free(uret_buf);
      free(uinput);
      return null;
    }
  }

  free(uinput);

  /* the buffer we actually used */
  uret_len = size_needed;

  /* Convert normalized string from UTF-16 to UTF-8. */
  char* ret_buf = NULL; int ret_len = 0;
  intl_convert_utf16_to_utf8(&ret_buf, &ret_len, uret_buf, uret_len, &status);
  free(uret_buf);
  if (U_FAILURE(status)) {
    s_intl_error->m_error.code = status;
    s_intl_error->m_error.custom_error_message =
      "normalizer_normalize: error converting normalized text UTF-8";
    return null;
  }

  return String(ret_buf, ret_len, AttachString);
}
Пример #20
0
/* return TRUE if a base table was read, FALSE for an extension table */
static UBool
readFile(ConvData *data, const char* converterName,
         UErrorCode *pErrorCode) {
    char line[1024];
    char *end;
    FileStream *convFile;

    UCMStates *baseStates;
    UBool dataIsBase;

    if(U_FAILURE(*pErrorCode)) {
        return FALSE;
    }

    data->ucm=ucm_open();

    convFile=T_FileStream_open(converterName, "r");
    if(convFile==NULL) {
        *pErrorCode=U_FILE_ACCESS_ERROR;
        return FALSE;
    }

    readHeader(data, convFile, pErrorCode);
    if(U_FAILURE(*pErrorCode)) {
        return FALSE;
    }

    if(data->ucm->baseName[0]==0) {
        dataIsBase=TRUE;
        baseStates=&data->ucm->states;
        ucm_processStates(baseStates, IGNORE_SISO_CHECK);
    } else {
        dataIsBase=FALSE;
        baseStates=NULL;
    }

    /* read the base table */
    ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode);
    if(U_FAILURE(*pErrorCode)) {
        return FALSE;
    }

    /* read an extension table if there is one */
    while(T_FileStream_readLine(convFile, line, sizeof(line))) {
        end=uprv_strchr(line, 0);
        while(line<end &&
              (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\t')) {
            --end;
        }
        *end=0;

        if(line[0]=='#' || u_skipWhitespace(line)==end) {
            continue; /* ignore empty and comment lines */
        }

        if(0==uprv_strcmp(line, "CHARMAP")) {
            /* read the extension table */
            ucm_readTable(data->ucm, convFile, FALSE, baseStates, pErrorCode);
        } else {
            fprintf(stderr, "unexpected text after the base mapping table\n");
        }
        break;
    }

    T_FileStream_close(convFile);

    if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType==UCM_FLAGS_MIXED) {
        fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n");
        *pErrorCode=U_INVALID_TABLE_FORMAT;
    }

    return dataIsBase;
}
Пример #21
0
/*
PAL Function:
GetLocaleInfoString

Obtains string locale information.
Returns 1 for success, 0 otherwise
*/
extern "C" int32_t
GetLocaleInfoString(const UChar* localeName, LocaleStringData localeStringData, UChar* value, int32_t valueLength)
{
    UErrorCode status = U_ZERO_ERROR;
    char locale[ULOC_FULLNAME_CAPACITY];
    GetLocale(localeName, locale, ULOC_FULLNAME_CAPACITY, false, &status);

    if (U_FAILURE(status))
    {
        return UErrorCodeToBool(U_ILLEGAL_ARGUMENT_ERROR);
    }

    switch (localeStringData)
    {
        case LocalizedDisplayName:
            uloc_getDisplayName(locale, uloc_getDefault(), value, valueLength, &status);
            break;
        case EnglishDisplayName:
            uloc_getDisplayName(locale, ULOC_ENGLISH, value, valueLength, &status);
            break;
        case NativeDisplayName:
            uloc_getDisplayName(locale, locale, value, valueLength, &status);
            break;
        case LocalizedLanguageName:
            uloc_getDisplayLanguage(locale, uloc_getDefault(), value, valueLength, &status);
            break;
        case EnglishLanguageName:
            uloc_getDisplayLanguage(locale, ULOC_ENGLISH, value, valueLength, &status);
            break;
        case NativeLanguageName:
            uloc_getDisplayLanguage(locale, locale, value, valueLength, &status);
            break;
        case EnglishCountryName:
            uloc_getDisplayCountry(locale, ULOC_ENGLISH, value, valueLength, &status);
            break;
        case NativeCountryName:
            uloc_getDisplayCountry(locale, locale, value, valueLength, &status);
            break;
        case ListSeparator:
        // fall through
        case ThousandSeparator:
            status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_GROUPING_SEPARATOR_SYMBOL, value, valueLength);
            break;
        case DecimalSeparator:
            status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_DECIMAL_SEPARATOR_SYMBOL, value, valueLength);
            break;
        case Digits:
            status = GetDigitSymbol(locale, status, UNUM_ZERO_DIGIT_SYMBOL, 0, value, valueLength);
            // symbols UNUM_ONE_DIGIT to UNUM_NINE_DIGIT are contiguous
            for (int32_t symbol = UNUM_ONE_DIGIT_SYMBOL; symbol <= UNUM_NINE_DIGIT_SYMBOL; symbol++)
            {
                int charIndex = symbol - UNUM_ONE_DIGIT_SYMBOL + 1;
                status = GetDigitSymbol(
                    locale, status, static_cast<UNumberFormatSymbol>(symbol), charIndex, value, valueLength);
            }
            break;
        case MonetarySymbol:
            status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_CURRENCY_SYMBOL, value, valueLength);
            break;
        case Iso4217MonetarySymbol:
            status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_INTL_CURRENCY_SYMBOL, value, valueLength);
            break;
        case MonetaryDecimalSeparator:
            status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_MONETARY_SEPARATOR_SYMBOL, value, valueLength);
            break;
        case MonetaryThousandSeparator:
            status =
                GetLocaleInfoDecimalFormatSymbol(locale, UNUM_MONETARY_GROUPING_SEPARATOR_SYMBOL, value, valueLength);
            break;
        case AMDesignator:
            status = GetLocaleInfoAmPm(locale, true, value, valueLength);
            break;
        case PMDesignator:
            status = GetLocaleInfoAmPm(locale, false, value, valueLength);
            break;
        case PositiveSign:
            status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_PLUS_SIGN_SYMBOL, value, valueLength);
            break;
        case NegativeSign:
            status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_MINUS_SIGN_SYMBOL, value, valueLength);
            break;
        case Iso639LanguageName:
            status = GetLocaleIso639LanguageName(locale, value, valueLength);
            break;
        case Iso3166CountryName:
            status = GetLocaleIso3166CountryName(locale, value, valueLength);
            break;
        case NaNSymbol:
            status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_NAN_SYMBOL, value, valueLength);
            break;
        case PositiveInfinitySymbol:
            status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_INFINITY_SYMBOL, value, valueLength);
            break;
        case ParentName:
        {
            // ICU supports lang[-script][-region][-variant] so up to 4 parents
            // including invariant locale
            char localeNameTemp[ULOC_FULLNAME_CAPACITY];

            uloc_getParent(locale, localeNameTemp, ULOC_FULLNAME_CAPACITY, &status);
            if (U_SUCCESS(status))
            {
                status = u_charsToUChars_safe(localeNameTemp, value, valueLength);
                if (U_SUCCESS(status))
                {
                    FixupLocaleName(value, valueLength);
                }
            }
            break;
        }
        case PercentSymbol:
            status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_PERCENT_SYMBOL, value, valueLength);
            break;
        case PerMilleSymbol:
            status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_PERMILL_SYMBOL, value, valueLength);
            break;
        default:
            status = U_UNSUPPORTED_ERROR;
            break;
    };

    return UErrorCodeToBool(status);
}
Пример #22
0
static void
createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) {
    ConvData baseData;
    UBool dataIsBase;

    UConverterStaticData *staticData;
    UCMStates *states, *baseStates;

    if(U_FAILURE(*pErrorCode)) {
        return;
    }

    initConvData(data);

    dataIsBase=readFile(data, converterName, pErrorCode);
    if(U_FAILURE(*pErrorCode)) {
        return;
    }

    staticData=&data->staticData;
    states=&data->ucm->states;

    if(dataIsBase) {
        /*
         * Build a normal .cnv file with a base table
         * and an optional extension table.
         */
        data->cnvData=MBCSOpen(data->ucm);
        if(data->cnvData==NULL) {
            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;

        } else if(!data->cnvData->isValid(data->cnvData,
                            staticData->subChar, staticData->subCharLen)
        ) {
            fprintf(stderr, "       the substitution character byte sequence is illegal in this codepage structure!\n");
            *pErrorCode=U_INVALID_TABLE_FORMAT;

        } else if(staticData->subChar1!=0 &&
                    !data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1)
        ) {
            fprintf(stderr, "       the subchar1 byte is illegal in this codepage structure!\n");
            *pErrorCode=U_INVALID_TABLE_FORMAT;

        } else if(
            data->ucm->ext->mappingsLength>0 &&
            !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
        ) {
            *pErrorCode=U_INVALID_TABLE_FORMAT;
        } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) {
            /* sort the table so that it can be turned into UTF-8-friendly data */
            ucm_sortTable(data->ucm->base);
        }

        if(U_SUCCESS(*pErrorCode)) {
            if(
                /* add the base table after ucm_checkBaseExt()! */
                !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData)
            ) {
                *pErrorCode=U_INVALID_TABLE_FORMAT;
            } else {
                /*
                 * addTable() may have requested moving more mappings to the extension table
                 * if they fit into the base toUnicode table but not into the
                 * base fromUnicode table.
                 * (Especially for UTF-8-friendly fromUnicode tables.)
                 * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them
                 * to be excluded from the extension toUnicode data.
                 * See MBCSOkForBaseFromUnicode() for which mappings do not fit into
                 * the base fromUnicode table.
                 */
                ucm_moveMappings(data->ucm->base, data->ucm->ext);
                ucm_sortTable(data->ucm->ext);
                if(data->ucm->ext->mappingsLength>0) {
                    /* prepare the extension table, if there is one */
                    data->extData=CnvExtOpen(data->ucm);
                    if(data->extData==NULL) {
                        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
                    } else if(
                        !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)
                    ) {
                        *pErrorCode=U_INVALID_TABLE_FORMAT;
                    }
                }
            }
        }
    } else {
        /* Build an extension-only .cnv file. */
        char baseFilename[500];
        char *basename;

        initConvData(&baseData);

        /* assemble a path/filename for data->ucm->baseName */
        uprv_strcpy(baseFilename, converterName);
        basename=(char *)findBasename(baseFilename);
        uprv_strcpy(basename, data->ucm->baseName);
        uprv_strcat(basename, ".ucm");

        /* read the base table */
        dataIsBase=readFile(&baseData, baseFilename, pErrorCode);
        if(U_FAILURE(*pErrorCode)) {
            return;
        } else if(!dataIsBase) {
            fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename);
            *pErrorCode=U_INVALID_TABLE_FORMAT;
        } else {
            /* prepare the extension table */
            data->extData=CnvExtOpen(data->ucm);
            if(data->extData==NULL) {
                *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
            } else {
                /* fill in gaps in extension file header fields */
                UCMapping *m, *mLimit;
                uint8_t fallbackFlags;

                baseStates=&baseData.ucm->states;
                if(states->conversionType==UCNV_DBCS) {
                    staticData->minBytesPerChar=(int8_t)(states->minCharLength=2);
                } else if(states->minCharLength==0) {
                    staticData->minBytesPerChar=(int8_t)(states->minCharLength=baseStates->minCharLength);
                }
                if(states->maxCharLength<states->minCharLength) {
                    staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=baseStates->maxCharLength);
                }

                if(staticData->subCharLen==0) {
                    uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4);
                    staticData->subCharLen=baseData.staticData.subCharLen;
                }
                /*
                 * do not copy subChar1 -
                 * only use what is explicitly specified
                 * because it cannot be unset in the extension file header
                 */

                /* get the fallback flags */
                fallbackFlags=0;
                for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
                    m<mLimit && fallbackFlags!=3;
                    ++m
                ) {
                    if(m->f==1) {
                        fallbackFlags|=1;
                    } else if(m->f==3) {
                        fallbackFlags|=2;
                    }
                }

                if(fallbackFlags&1) {
                    staticData->hasFromUnicodeFallback=TRUE;
                }
                if(fallbackFlags&2) {
                    staticData->hasToUnicodeFallback=TRUE;
                }

                if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) {
                    fprintf(stderr, "       the substitution character byte sequence is illegal in this codepage structure!\n");
                    *pErrorCode=U_INVALID_TABLE_FORMAT;

                } else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) {
                    fprintf(stderr, "       the subchar1 byte is illegal in this codepage structure!\n");
                    *pErrorCode=U_INVALID_TABLE_FORMAT;

                } else if(
                    !ucm_checkValidity(data->ucm->ext, baseStates) ||
                    !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
                ) {
                    *pErrorCode=U_INVALID_TABLE_FORMAT;
                } else {
                    if(states->maxCharLength>1) {
                        /*
                         * When building a normal .cnv file with a base table
                         * for an MBCS (not SBCS) table with explicit precision flags,
                         * the MBCSAddTable() function marks some mappings for moving
                         * to the extension table.
                         * They fit into the base toUnicode table but not into the
                         * base fromUnicode table.
                         * (Note: We do have explicit precision flags because they are
                         * required for extension table generation, and
                         * ucm_checkBaseExt() verified it.)
                         *
                         * We do not call MBCSAddTable() here (we probably could)
                         * so we need to do the analysis before building the extension table.
                         * We assume that MBCSAddTable() will build a UTF-8-friendly table.
                         * Redundant mappings in the extension table are ok except they cost some size.
                         *
                         * Do this after ucm_checkBaseExt().
                         */
                        const MBCSData *mbcsData=MBCSGetDummy();
                        int32_t needsMove=0;
                        for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
                            m<mLimit;
                            ++m
                        ) {
                            if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, m->u, m->f)) {
                                m->f|=MBCS_FROM_U_EXT_FLAG;
                                m->moveFlag=UCM_MOVE_TO_EXT;
                                ++needsMove;
                            }
                        }

                        if(needsMove!=0) {
                            ucm_moveMappings(baseData.ucm->base, data->ucm->ext);
                            ucm_sortTable(data->ucm->ext);
                        }
                    }
                    if(!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)) {
                        *pErrorCode=U_INVALID_TABLE_FORMAT;
                    }
                }
            }
        }

        cleanupConvData(&baseData);
    }
}
Пример #23
0
static UStringPrepProfile* 
usprep_getProfile(const char* path, 
                  const char* name,
                  UErrorCode *status){

    UStringPrepProfile* profile = NULL;

    initCache(status);

    if(U_FAILURE(*status)){
        return NULL;
    }

    UStringPrepKey stackKey;
    /* 
     * const is cast way to save malloc, strcpy and free calls 
     * we use the passed in pointers for fetching the data from the 
     * hash table which is safe
     */
    stackKey.name = (char*) name;
    stackKey.path = (char*) path;

    /* fetch the data from the cache */
    umtx_lock(&usprepMutex);
    profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
    if(profile != NULL) {
        profile->refCount++;
    }
    umtx_unlock(&usprepMutex);
    
    if(profile == NULL) {
        /* else load the data and put the data in the cache */
        LocalMemory<UStringPrepProfile> newProfile;
        if(newProfile.allocateInsteadAndReset() == NULL) {
            *status = U_MEMORY_ALLOCATION_ERROR;
            return NULL;
        }

        /* load the data */
        if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
            return NULL;
        }

        /* get the options */
        newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
        newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);

        if(newProfile->checkBiDi) {
            newProfile->bdp = ubidi_getSingleton();
        }

        LocalMemory<UStringPrepKey> key;
        LocalMemory<char> keyName;
        LocalMemory<char> keyPath;
        if( key.allocateInsteadAndReset() == NULL ||
            keyName.allocateInsteadAndCopy(uprv_strlen(name)+1) == NULL ||
            (path != NULL &&
             keyPath.allocateInsteadAndCopy(uprv_strlen(path)+1) == NULL)
         ) {
            *status = U_MEMORY_ALLOCATION_ERROR;
            usprep_unload(newProfile.getAlias());
            return NULL;
        }

        umtx_lock(&usprepMutex);
        // If another thread already inserted the same key/value, refcount and cleanup our thread data
        profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
        if(profile != NULL) {
            profile->refCount++;
            usprep_unload(newProfile.getAlias());
        }
        else {
            /* initialize the key members */
            key->name = keyName.orphan();
            uprv_strcpy(key->name, name);
            if(path != NULL){
                key->path = keyPath.orphan();
                uprv_strcpy(key->path, path);
            }        
            profile = newProfile.orphan();
    
            /* add the data object to the cache */
            profile->refCount = 1;
            uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status);
        }
        umtx_unlock(&usprepMutex);
    }

    return profile;
}
Пример #24
0
/* Instead of having a separate pass for 'special' patterns, reintegrate the two
 * so we don't get bitten by preflight bugs again.  We can be reasonably efficient
 * without two separate code paths, this code isn't that performance-critical.
 *
 * This code is general enough to deal with patterns that have a prefix or swap the
 * language and remainder components, since we gave developers enough rope to do such
 * things if they futz with the pattern data.  But since we don't give them a way to
 * specify a pattern for arbitrary combinations of components, there's not much use in
 * that.  I don't think our data includes such patterns, the only variable I know if is
 * whether there is a space before the open paren, or not.  Oh, and zh uses different
 * chars than the standard open/close paren (which ja and ko use, btw).
 */
U_CAPI int32_t U_EXPORT2
uloc_getDisplayName(const char *locale,
                    const char *displayLocale,
                    UChar *dest, int32_t destCapacity,
                    UErrorCode *pErrorCode)
{
    static const UChar defaultSeparator[9] = { 0x007b, 0x0030, 0x007d, 0x002c, 0x0020, 0x007b, 0x0031, 0x007d, 0x0000 }; /* "{0}, {1}" */
    static const UChar sub0[4] = { 0x007b, 0x0030, 0x007d , 0x0000 } ; /* {0} */
    static const UChar sub1[4] = { 0x007b, 0x0031, 0x007d , 0x0000 } ; /* {1} */
    static const int32_t subLen = 3;
    static const UChar defaultPattern[10] = {
        0x007b, 0x0030, 0x007d, 0x0020, 0x0028, 0x007b, 0x0031, 0x007d, 0x0029, 0x0000
    }; /* {0} ({1}) */
    static const int32_t defaultPatLen = 9;
    static const int32_t defaultSub0Pos = 0;
    static const int32_t defaultSub1Pos = 5;

    int32_t length; /* of formatted result */

    const UChar *separator;
    int32_t sepLen = 0;
    const UChar *pattern;
    int32_t patLen = 0;
    int32_t sub0Pos, sub1Pos;

    UChar formatOpenParen         = 0x0028; // (
    UChar formatReplaceOpenParen  = 0x005B; // [
    UChar formatCloseParen        = 0x0029; // )
    UChar formatReplaceCloseParen = 0x005D; // ]

    UBool haveLang = TRUE; /* assume true, set false if we find we don't have
                              a lang component in the locale */
    UBool haveRest = TRUE; /* assume true, set false if we find we don't have
                              any other component in the locale */
    UBool retry = FALSE; /* set true if we need to retry, see below */

    int32_t langi = 0; /* index of the language substitution (0 or 1), virtually always 0 */

    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
        return 0;
    }

    if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
        return 0;
    }

    {
        UErrorCode status = U_ZERO_ERROR;
        UResourceBundle* locbundle=ures_open(U_ICUDATA_LANG, displayLocale, &status);
        UResourceBundle* dspbundle=ures_getByKeyWithFallback(locbundle, _kLocaleDisplayPattern,
                                                             NULL, &status);

        separator=ures_getStringByKeyWithFallback(dspbundle, _kSeparator, &sepLen, &status);
        pattern=ures_getStringByKeyWithFallback(dspbundle, _kPattern, &patLen, &status);

        ures_close(dspbundle);
        ures_close(locbundle);
    }

    /* If we couldn't find any data, then use the defaults */
    if(sepLen == 0) {
       separator = defaultSeparator;
    }
    /* #10244: Even though separator is now a pattern, it is awkward to handle it as such
     * here since we are trying to build the display string in place in the dest buffer,
     * and to handle it as a pattern would entail having separate storage for the
     * substrings that need to be combined (the first of which may be the result of
     * previous such combinations). So for now we continue to treat the portion between
     * {0} and {1} as a string to be appended when joining substrings, ignoring anything
     * that is before {0} or after {1} (no existing separator pattern has any such thing).
     * This is similar to how pattern is handled below.
     */
    {
        UChar *p0=u_strstr(separator, sub0);
        UChar *p1=u_strstr(separator, sub1);
        if (p0==NULL || p1==NULL || p1<p0) {
            *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
            return 0;
        }
        separator = (const UChar *)p0 + subLen;
        sepLen = p1 - separator;
    }

    if(patLen==0 || (patLen==defaultPatLen && !u_strncmp(pattern, defaultPattern, patLen))) {
        pattern=defaultPattern;
        patLen=defaultPatLen;
        sub0Pos=defaultSub0Pos;
        sub1Pos=defaultSub1Pos;
        // use default formatOpenParen etc. set above
    } else { /* non-default pattern */
        UChar *p0=u_strstr(pattern, sub0);
        UChar *p1=u_strstr(pattern, sub1);
        if (p0==NULL || p1==NULL) {
            *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
            return 0;
        }
        sub0Pos=p0-pattern;
        sub1Pos=p1-pattern;
        if (sub1Pos < sub0Pos) { /* a very odd pattern */
            int32_t t=sub0Pos; sub0Pos=sub1Pos; sub1Pos=t;
            langi=1;
        }
        if (u_strchr(pattern, 0xFF08) != NULL) {
            formatOpenParen         = 0xFF08; // fullwidth (
            formatReplaceOpenParen  = 0xFF3B; // fullwidth [
            formatCloseParen        = 0xFF09; // fullwidth )
            formatReplaceCloseParen = 0xFF3D; // fullwidth ]
        }
    }

    /* We loop here because there is one case in which after the first pass we could need to
     * reextract the data.  If there's initial padding before the first element, we put in
     * the padding and then write that element.  If it turns out there's no second element,
     * we didn't need the padding.  If we do need the data (no preflight), and the first element
     * would have fit but for the padding, we need to reextract.  In this case (only) we
     * adjust the parameters so padding is not added, and repeat.
     */
    do {
        UChar* p=dest;
        int32_t patPos=0; /* position in the pattern, used for non-substitution portions */
        int32_t langLen=0; /* length of language substitution */
        int32_t langPos=0; /* position in output of language substitution */
        int32_t restLen=0; /* length of 'everything else' substitution */
        int32_t restPos=0; /* position in output of 'everything else' substitution */
        UEnumeration* kenum = NULL; /* keyword enumeration */

        /* prefix of pattern, extremely likely to be empty */
        if(sub0Pos) {
            if(destCapacity >= sub0Pos) {
                while (patPos < sub0Pos) {
                    *p++ = pattern[patPos++];
                }
            } else {
                patPos=sub0Pos;
            }
            length=sub0Pos;
        } else {
            length=0;
        }

        for(int32_t subi=0,resti=0;subi<2;) { /* iterate through patterns 0 and 1*/
            UBool subdone = FALSE; /* set true when ready to move to next substitution */

            /* prep p and cap for calls to get display components, pin cap to 0 since
               they complain if cap is negative */
            int32_t cap=destCapacity-length;
            if (cap <= 0) {
                cap=0;
            } else {
                p=dest+length;
            }

            if (subi == langi) { /* {0}*/
                if(haveLang) {
                    langPos=length;
                    langLen=uloc_getDisplayLanguage(locale, displayLocale, p, cap, pErrorCode);
                    length+=langLen;
                    haveLang=langLen>0;
                }
                subdone=TRUE;
            } else { /* {1} */
                if(!haveRest) {
                    subdone=TRUE;
                } else {
                    int32_t len; /* length of component (plus other stuff) we just fetched */
                    switch(resti++) {
                        case 0:
                            restPos=length;
                            len=uloc_getDisplayScriptInContext(locale, displayLocale, p, cap, pErrorCode);
                            break;
                        case 1:
                            len=uloc_getDisplayCountry(locale, displayLocale, p, cap, pErrorCode);
                            break;
                        case 2:
                            len=uloc_getDisplayVariant(locale, displayLocale, p, cap, pErrorCode);
                            break;
                        case 3:
                            kenum = uloc_openKeywords(locale, pErrorCode);
                            U_FALLTHROUGH;
                        default: {
                            const char* kw=uenum_next(kenum, &len, pErrorCode);
                            if (kw == NULL) {
                                uenum_close(kenum);
                                len=0; /* mark that we didn't add a component */
                                subdone=TRUE;
                            } else {
                                /* incorporating this behavior into the loop made it even more complex,
                                   so just special case it here */
                                len = uloc_getDisplayKeyword(kw, displayLocale, p, cap, pErrorCode);
                                if(len) {
                                    if(len < cap) {
                                        p[len]=0x3d; /* '=', assume we'll need it */
                                    }
                                    len+=1;

                                    /* adjust for call to get keyword */
                                    cap-=len;
                                    if(cap <= 0) {
                                        cap=0;
                                    } else {
                                        p+=len;
                                    }
                                }
                                /* reset for call below */
                                if(*pErrorCode == U_BUFFER_OVERFLOW_ERROR) {
                                    *pErrorCode=U_ZERO_ERROR;
                                }
                                int32_t vlen = uloc_getDisplayKeywordValue(locale, kw, displayLocale,
                                                                           p, cap, pErrorCode);
                                if(len) {
                                    if(vlen==0) {
                                        --len; /* remove unneeded '=' */
                                    }
                                    /* restore cap and p to what they were at start */
                                    cap=destCapacity-length;
                                    if(cap <= 0) {
                                        cap=0;
                                    } else {
                                        p=dest+length;
                                    }
                                }
                                len+=vlen; /* total we added for key + '=' + value */
                            }
                        } break;
                    } /* end switch */

                    if (len>0) {
                        /* we addeed a component, so add separator and write it if there's room. */
                        if(len+sepLen<=cap) {
                            const UChar * plimit = p + len;
                            for (; p < plimit; p++) {
                                if (*p == formatOpenParen) {
                                    *p = formatReplaceOpenParen;
                                } else if (*p == formatCloseParen) {
                                    *p = formatReplaceCloseParen;
                                }
                            }
                            for(int32_t i=0;i<sepLen;++i) {
                                *p++=separator[i];
                            }
                        }
                        length+=len+sepLen;
                    } else if(subdone) {
                        /* remove separator if we added it */
                        if (length!=restPos) {
                            length-=sepLen;
                        }
                        restLen=length-restPos;
                        haveRest=restLen>0;
                    }
                }
            }

            if(*pErrorCode == U_BUFFER_OVERFLOW_ERROR) {
                *pErrorCode=U_ZERO_ERROR;
            }

            if(subdone) {
                if(haveLang && haveRest) {
                    /* append internal portion of pattern, the first time,
                       or last portion of pattern the second time */
                    int32_t padLen;
                    patPos+=subLen;
                    padLen=(subi==0 ? sub1Pos : patLen)-patPos;
                    if(length+padLen < destCapacity) {
                        p=dest+length;
                        for(int32_t i=0;i<padLen;++i) {
                            *p++=pattern[patPos++];
                        }
                    } else {
                        patPos+=padLen;
                    }
                    length+=padLen;
                } else if(subi==0) {
                    /* don't have first component, reset for second component */
                    sub0Pos=0;
                    length=0;
                } else if(length>0) {
                    /* true length is the length of just the component we got. */
                    length=haveLang?langLen:restLen;
                    if(dest && sub0Pos!=0) {
                        if (sub0Pos+length<=destCapacity) {
                            /* first component not at start of result,
                               but we have full component in buffer. */
                            u_memmove(dest, dest+(haveLang?langPos:restPos), length);
                        } else {
                            /* would have fit, but didn't because of pattern prefix. */
                            sub0Pos=0; /* stops initial padding (and a second retry,
                                          so we won't end up here again) */
                            retry=TRUE;
                        }
                    }
                }

                ++subi; /* move on to next substitution */
            }
        }
    } while(retry);

    return u_terminateUChars(dest, destCapacity, length, pErrorCode);
}
// populatePower10 grabs data for a particular power of 10 from CLDR.
// The loaded data is stored in result.
static void populatePower10(const UResourceBundle* power10Bundle, CDFLocaleStyleData* result, UErrorCode& status) {
  if (U_FAILURE(status)) {
    return;
  }
  char* endPtr = NULL;
  double power10 = uprv_strtod(ures_getKey(power10Bundle), &endPtr);
  if (*endPtr != 0) {
    status = U_INTERNAL_PROGRAM_ERROR;
    return;
  }
  int32_t log10Value = computeLog10(power10, FALSE);
  // Silently ignore divisors that are too big.
  if (log10Value == MAX_DIGITS) {
    return;
  }
  int32_t size = ures_getSize(power10Bundle);
  int32_t numZeros = 0;
  UBool otherVariantDefined = FALSE;
  UResourceBundle* variantBundle = NULL;
  // Iterate over all the plural variants for the power of 10
  for (int32_t i = 0; i < size; ++i) {
    variantBundle = ures_getByIndex(power10Bundle, i, variantBundle, &status);
    if (U_FAILURE(status)) {
      ures_close(variantBundle);
      return;
    }
    const char* variant = ures_getKey(variantBundle);
    int32_t resLen;
    const UChar* formatStrP = ures_getString(variantBundle, &resLen, &status);
    if (U_FAILURE(status)) {
      ures_close(variantBundle);
      return;
    }
    UnicodeString formatStr(false, formatStrP, resLen);
    if (uprv_strcmp(variant, gOther) == 0) {
      otherVariantDefined = TRUE;
    }
    int32_t nz = populatePrefixSuffix(
        variant, log10Value, formatStr, result->unitsByVariant, status);
    if (U_FAILURE(status)) {
      ures_close(variantBundle);
      return;
    }
    if (nz != numZeros) {
      // We expect all format strings to have the same number of 0's
      // left of the decimal point.
      if (numZeros != 0) {
        status = U_INTERNAL_PROGRAM_ERROR;
        ures_close(variantBundle);
        return;
      }
      numZeros = nz;
    }
  }
  ures_close(variantBundle);
  // We expect to find an OTHER variant for each power of 10.
  if (!otherVariantDefined) {
    status = U_INTERNAL_PROGRAM_ERROR;
    return;
  }
  double divisor = power10;
  for (int32_t i = 1; i < numZeros; ++i) {
    divisor /= 10.0;
  }
  result->divisors[log10Value] = divisor;
}
Пример #26
0
U_CAPI int32_t U_EXPORT2
uloc_getDisplayKeywordValue(   const char* locale,
                               const char* keyword,
                               const char* displayLocale,
                               UChar* dest,
                               int32_t destCapacity,
                               UErrorCode* status){


    char keywordValue[ULOC_FULLNAME_CAPACITY*4];
    int32_t capacity = ULOC_FULLNAME_CAPACITY*4;
    int32_t keywordValueLen =0;

    /* argument checking */
    if(status==NULL || U_FAILURE(*status)) {
        return 0;
    }

    if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
        *status=U_ILLEGAL_ARGUMENT_ERROR;
        return 0;
    }

    /* get the keyword value */
    keywordValue[0]=0;
    keywordValueLen = uloc_getKeywordValue(locale, keyword, keywordValue, capacity, status);

    /*
     * if the keyword is equal to currency .. then to get the display name
     * we need to do the fallback ourselves
     */
    if(uprv_stricmp(keyword, _kCurrency)==0){

        int32_t dispNameLen = 0;
        const UChar *dispName = NULL;

        UResourceBundle *bundle     = ures_open(U_ICUDATA_CURR, displayLocale, status);
        UResourceBundle *currencies = ures_getByKey(bundle, _kCurrencies, NULL, status);
        UResourceBundle *currency   = ures_getByKeyWithFallback(currencies, keywordValue, NULL, status);

        dispName = ures_getStringByIndex(currency, UCURRENCY_DISPLAY_NAME_INDEX, &dispNameLen, status);

        /*close the bundles */
        ures_close(currency);
        ures_close(currencies);
        ures_close(bundle);

        if(U_FAILURE(*status)){
            if(*status == U_MISSING_RESOURCE_ERROR){
                /* we just want to write the value over if nothing is available */
                *status = U_USING_DEFAULT_WARNING;
            }else{
                return 0;
            }
        }

        /* now copy the dispName over if not NULL */
        if(dispName != NULL){
            if(dispNameLen <= destCapacity){
                uprv_memcpy(dest, dispName, dispNameLen * U_SIZEOF_UCHAR);
                return u_terminateUChars(dest, destCapacity, dispNameLen, status);
            }else{
                *status = U_BUFFER_OVERFLOW_ERROR;
                return dispNameLen;
            }
        }else{
            /* we have not found the display name for the value .. just copy over */
            if(keywordValueLen <= destCapacity){
                u_charsToUChars(keywordValue, dest, keywordValueLen);
                return u_terminateUChars(dest, destCapacity, keywordValueLen, status);
            }else{
                 *status = U_BUFFER_OVERFLOW_ERROR;
                return keywordValueLen;
            }
        }


    }else{

        return _getStringOrCopyKey(U_ICUDATA_LANG, displayLocale,
                                   _kTypes, keyword,
                                   keywordValue,
                                   keywordValue,
                                   dest, destCapacity,
                                   status);
    }
}
Пример #27
0
void
Package::readPackage(const char *filename) {
    UDataSwapper *ds;
    const UDataInfo *pInfo;
    UErrorCode errorCode;

    const uint8_t *inBytes;

    int32_t length, offset, i;
    int32_t itemLength, typeEnum;
    char type;

    const UDataOffsetTOCEntry *inEntries;

    extractPackageName(filename, inPkgName, (int32_t)sizeof(inPkgName));

    /* read the file */
    inData=readFile(NULL, filename, inLength, type);
    length=inLength;

    /*
     * swap the header - even if the swapping itself is a no-op
     * because it tells us the header length
     */
    errorCode=U_ZERO_ERROR;
    makeTypeProps(type, inCharset, inIsBigEndian);
    ds=udata_openSwapper(inIsBigEndian, inCharset, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n",
                filename, u_errorName(errorCode));
        exit(errorCode);
    }

    ds->printError=printPackageError;
    ds->printErrorContext=stderr;

    headerLength=sizeof(header);
    if(length<headerLength) {
        headerLength=length;
    }
    headerLength=udata_swapDataHeader(ds, inData, headerLength, header, &errorCode);
    if(U_FAILURE(errorCode)) {
        exit(errorCode);
    }

    /* check data format and format version */
    pInfo=(const UDataInfo *)((const char *)inData+4);
    if(!(
        pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CmnD" */
        pInfo->dataFormat[1]==0x6d &&
        pInfo->dataFormat[2]==0x6e &&
        pInfo->dataFormat[3]==0x44 &&
        pInfo->formatVersion[0]==1
    )) {
        fprintf(stderr, "icupkg: data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n",
                pInfo->dataFormat[0], pInfo->dataFormat[1],
                pInfo->dataFormat[2], pInfo->dataFormat[3],
                pInfo->formatVersion[0]);
        exit(U_UNSUPPORTED_ERROR);
    }
    inIsBigEndian=(UBool)pInfo->isBigEndian;
    inCharset=pInfo->charsetFamily;

    inBytes=(const uint8_t *)inData+headerLength;
    inEntries=(const UDataOffsetTOCEntry *)(inBytes+4);

    /* check that the itemCount fits, then the ToC table, then at least the header of the last item */
    length-=headerLength;
    if(length<4) {
        /* itemCount does not fit */
        offset=0x7fffffff;
    } else {
        itemCount=udata_readInt32(ds, *(const int32_t *)inBytes);
        if(itemCount==0) {
            offset=4;
        } else if(length<(4+8*itemCount)) {
            /* ToC table does not fit */
            offset=0x7fffffff;
        } else {
            /* offset of the last item plus at least 20 bytes for its header */
            offset=20+(int32_t)ds->readUInt32(inEntries[itemCount-1].dataOffset);
        }
    }
    if(length<offset) {
        fprintf(stderr, "icupkg: too few bytes (%ld after header) for a .dat package\n",
                        (long)length);
        exit(U_INDEX_OUTOFBOUNDS_ERROR);
    }
    /* do not modify the package length variable until the last item's length is set */

    if(itemCount>0) {
        char prefix[MAX_PKG_NAME_LENGTH+4];
        char *s, *inItemStrings;
        int32_t inPkgNameLength, prefixLength, stringsOffset;

        if(itemCount>MAX_FILE_COUNT) {
            fprintf(stderr, "icupkg: too many items, maximum is %d\n", MAX_FILE_COUNT);
            exit(U_BUFFER_OVERFLOW_ERROR);
        }

        /* swap the item name strings */
        stringsOffset=4+8*itemCount;
        itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset))-stringsOffset;

        // don't include padding bytes at the end of the item names
        while(itemLength>0 && inBytes[stringsOffset+itemLength-1]!=0) {
            --itemLength;
        }

        if((inStringTop+itemLength)>STRING_STORE_SIZE) {
            fprintf(stderr, "icupkg: total length of item name strings too long\n");
            exit(U_BUFFER_OVERFLOW_ERROR);
        }

        inItemStrings=inStrings+inStringTop;
        ds->swapInvChars(ds, inBytes+stringsOffset, itemLength, inItemStrings, &errorCode);
        if(U_FAILURE(errorCode)) {
            fprintf(stderr, "icupkg failed to swap the input .dat package item name strings\n");
            exit(U_INVALID_FORMAT_ERROR);
        }
        inStringTop+=itemLength;

        // reset the Item entries
        memset(items, 0, itemCount*sizeof(Item));

        inPkgNameLength=strlen(inPkgName);
        memcpy(prefix, inPkgName, inPkgNameLength);
        prefixLength=inPkgNameLength;

        /*
         * Get the common prefix of the items.
         * New-style ICU .dat packages use tree separators ('/') between package names,
         * tree names, and item names,
         * while old-style ICU .dat packages (before multi-tree support)
         * use an underscore ('_') between package and item names.
         */
        offset=(int32_t)ds->readUInt32(inEntries[0].nameOffset)-stringsOffset;
        s=inItemStrings+offset;
        if( (int32_t)strlen(s)>=(inPkgNameLength+2) &&
            0==memcmp(s, inPkgName, inPkgNameLength) &&
            s[inPkgNameLength]=='_'
        ) {
            // old-style .dat package
            prefix[prefixLength++]='_';
        } else {
            // new-style .dat package
            prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR;
            // if it turns out to not contain U_TREE_ENTRY_SEP_CHAR
            // then the test in the loop below will fail
        }
        prefix[prefixLength]=0;

        /* read the ToC table */
        for(i=0; i<itemCount; ++i) {
            // skip the package part of the item name, error if it does not match the actual package name
            // or if nothing follows the package name
            offset=(int32_t)ds->readUInt32(inEntries[i].nameOffset)-stringsOffset;
            s=inItemStrings+offset;
            if(0!=strncmp(s, prefix, prefixLength) || s[prefixLength]==0) {
                fprintf(stderr, "icupkg: input .dat item name \"%s\" does not start with \"%s\"\n",
                        s, prefix);
                exit(U_UNSUPPORTED_ERROR);
            }
            items[i].name=s+prefixLength;

            // set the item's data
            items[i].data=(uint8_t *)inBytes+ds->readUInt32(inEntries[i].dataOffset);
            if(i>0) {
                items[i-1].length=(int32_t)(items[i].data-items[i-1].data);

                // set the previous item's platform type
                typeEnum=getTypeEnumForInputData(items[i-1].data, items[i-1].length, &errorCode);
                if(typeEnum<0 || U_FAILURE(errorCode)) {
                    fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename);
                    exit(U_INVALID_FORMAT_ERROR);
                }
                items[i-1].type=makeTypeLetter(typeEnum);
            }
            items[i].isDataOwned=FALSE;
        }
        // set the last item's length
        items[itemCount-1].length=length-ds->readUInt32(inEntries[itemCount-1].dataOffset);

        // set the last item's platform type
        typeEnum=getTypeEnumForInputData(items[itemCount-1].data, items[itemCount-1].length, &errorCode);
        if(typeEnum<0 || U_FAILURE(errorCode)) {
            fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename);
            exit(U_INVALID_FORMAT_ERROR);
        }
        items[itemCount-1].type=makeTypeLetter(typeEnum);

        if(type!=U_ICUDATA_TYPE_LETTER[0]) {
            // sort the item names for the local charset
            sortItems();
        }
    }

    udata_closeSwapper(ds);
}
Пример #28
0
JNIEXPORT void JNICALL Java_java_text_Bidi_nativeBidiChars
  (JNIEnv *env, jclass cls, jobject jbidi, jcharArray text, jint tStart, jbyteArray embs, jint eStart, jint length, jint dir)
{
    UErrorCode err = U_ZERO_ERROR;
    UBiDi* bidi = ubidi_openSized(length, length, &err);
    if (!U_FAILURE(err)) {
        jchar *cText = (jchar*)(*env)->GetPrimitiveArrayCritical(env, text, NULL);
        if (cText) {
            UBiDiLevel baseLevel = (UBiDiLevel)dir;
            jbyte *cEmbs = 0;
            uint8_t *cEmbsAdj = 0;
            if (embs != NULL) {
                cEmbs = (jbyte*)(*env)->GetPrimitiveArrayCritical(env, embs, NULL);
                if (cEmbs) {
                    cEmbsAdj = (uint8_t*)(cEmbs + eStart);
                }
            }
            ubidi_setPara(bidi, cText + tStart, length, baseLevel, cEmbsAdj, &err);
            if (cEmbs) {
                (*env)->ReleasePrimitiveArrayCritical(env, embs, cEmbs, JNI_ABORT);
            }

            (*env)->ReleasePrimitiveArrayCritical(env, text, cText, JNI_ABORT);

            if (!U_FAILURE(err)) {
                jint resDir = (jint)ubidi_getDirection(bidi);
                jint resLevel = (jint)ubidi_getParaLevel(bidi);
                jint resRunCount = 0;
                jintArray resRuns = 0;
                jintArray resCWS = 0;
                if (resDir == UBIDI_MIXED) {
                    resRunCount = (jint)ubidi_countRuns(bidi, &err);
                    if (!U_FAILURE(err)) {
                        if (resRunCount) {
                            jint* cResRuns = (jint*)calloc(resRunCount * 2, sizeof(jint));
                                  if (cResRuns) {
                                    int32_t limit = 0;
                                    UBiDiLevel level;
                                    jint *p = cResRuns;
                                    while (limit < length) {
                                        ubidi_getLogicalRun(bidi, limit, &limit, &level);
                                        *p++ = (jint)limit;
                                        *p++ = (jint)level;
                                    }

                                    {
                                        const DirProp *dp = bidi->dirProps;
                                        jint ccws = 0;
                                        jint n = 0;
                                        p = cResRuns;
                                        do {
                                            if ((*(p+1) ^ resLevel) & 0x1) {
                                                while (n < *p) {
                                                    if (dp[n++] == WS) {
                                                        ++ccws;
                                                    }
                                                }
                                            } else {
                                                n = *p;
                                            }
                                            p += 2;
                                        } while (n < length);

                                        resCWS = (*env)->NewIntArray(env, ccws);
                                        if (resCWS) {
                                            jint* cResCWS = (jint*)(*env)->GetPrimitiveArrayCritical(env, resCWS, NULL);
                                            if (cResCWS) {
                                                jint ccws = 0;
                                                jint n = 0;
                                                p = cResRuns;
                                                do {
                                                    if ((*(p+1) ^ resLevel) & 0x1) {
                                                        while (n < *p) {
                                                            if (dp[n] == WS) {
                                                                cResCWS[ccws++] = n;
                                                            }
                                                            ++n;
                                                        }
                                                    } else {
                                                        n = *p;
                                                    }
                                                    p += 2;
                                                } while (n < length);
                                                (*env)->ReleasePrimitiveArrayCritical(env, resCWS, cResCWS, 0);
                                            }
                                        }
                                    }

                                    resRuns = (*env)->NewIntArray(env, resRunCount * 2);
                                    if (resRuns) {
                                        (*env)->SetIntArrayRegion(env, resRuns, 0, resRunCount * 2, cResRuns);
                                    }
                                    free(cResRuns);
                                }
                            }
                        }
                    }

                resetBidi(env, cls, jbidi, resDir, resLevel, length, resRuns, resCWS);
            }
        }
        ubidi_close(bidi);
    }
}
Пример #29
0
void CollationIteratorTest::TestSetText(/* char* par */)
{
    CollationElementIterator *iter1 = en_us->createCollationElementIterator(test1);
    CollationElementIterator *iter2 = en_us->createCollationElementIterator(test2);
    UErrorCode status = U_ZERO_ERROR;

    // Run through the second iterator just to exercise it
    int32_t c = iter2->next(status);
    int32_t i = 0;

    while ( ++i < 10 && c != CollationElementIterator::NULLORDER)
    {
        if (U_FAILURE(status))
        {
            errln("iter2->next() returned an error.");
            delete iter2;
            delete iter1;
        }

        c = iter2->next(status);
    }

    // Now set it to point to the same string as the first iterator
    iter2->setText(test1, status);

    if (U_FAILURE(status))
    {
        errln("call to iter2->setText(test1) failed.");
    }
    else
    {
        assertEqual(*iter1, *iter2);
    }
    iter1->reset();
    //now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text
    CharacterIterator* chariter = new StringCharacterIterator(test1);
    iter2->setText(*chariter, status);
    if (U_FAILURE(status))
    {
        errln("call to iter2->setText(chariter(test1)) failed.");
    }
    else
    {
        assertEqual(*iter1, *iter2);
    }
   
    // test for an empty string
    UnicodeString empty("");
    iter1->setText(empty, status);
    if (U_FAILURE(status) 
        || iter1->next(status) != (int32_t)UCOL_NULLORDER) {
        errln("Empty string should have no CEs.");
    }
    ((StringCharacterIterator *)chariter)->setText(empty);
    iter1->setText(*chariter, status);
    if (U_FAILURE(status) 
        || iter1->next(status) != (int32_t)UCOL_NULLORDER) {
        errln("Empty string should have no CEs.");
    }
    delete chariter;
    delete iter2;
    delete iter1;
}
Пример #30
0
U_CAPI void  U_EXPORT2
ucnv_cbFromUWriteUChars(UConverterFromUnicodeArgs *args,
                             const UChar** source,
                             const UChar*  sourceLimit,
                             int32_t offsetIndex,
                             UErrorCode * err)
{
    /*
    This is a fun one.  Recursion can occur - we're basically going to
    just retry shoving data through the same converter. Note, if you got
    here through some kind of invalid sequence, you maybe should emit a
    reset sequence of some kind and/or call ucnv_reset().  Since this
    IS an actual conversion, take care that you've changed the callback
    or the data, or you'll get an infinite loop.

    Please set the err value to something reasonable before calling
    into this.
    */

    char *oldTarget;

    if(U_FAILURE(*err))
    {
        return;
    }

    oldTarget = args->target;

    ucnv_fromUnicode(args->converter,
        &args->target,
        args->targetLimit,
        source,
        sourceLimit,
        NULL, /* no offsets */
        FALSE, /* no flush */
        err);

    if(args->offsets)
    {
        while (args->target != oldTarget)  /* if it moved at all.. */
        {
            *(args->offsets)++ = offsetIndex;
            oldTarget++;
        }
    }

    /*
    Note, if you did something like used a Stop subcallback, things would get interesting.
    In fact, here's where we want to return the partially consumed in-source!
    */
    if(*err == U_BUFFER_OVERFLOW_ERROR)
    /* && (*source < sourceLimit && args->target >= args->targetLimit)
    -- S. Hrcek */
    {
        /* Overflowed the target.  Now, we'll write into the charErrorBuffer.
        It's a fixed size. If we overflow it... Hmm */
        char *newTarget;
        const char *newTargetLimit;
        UErrorCode err2 = U_ZERO_ERROR;

        int8_t errBuffLen;

        errBuffLen  = args->converter->charErrorBufferLength;

        /* start the new target at the first free slot in the errbuff.. */
        newTarget = (char *)(args->converter->charErrorBuffer + errBuffLen);

        newTargetLimit = (char *)(args->converter->charErrorBuffer +
            sizeof(args->converter->charErrorBuffer));

        if(newTarget >= newTargetLimit)
        {
            *err = U_INTERNAL_PROGRAM_ERROR;
            return;
        }

        /* We're going to tell the converter that the errbuff len is empty.
        This prevents the existing errbuff from being 'flushed' out onto
        itself.  If the errbuff is needed by the converter this time,
        we're hosed - we're out of space! */

        args->converter->charErrorBufferLength = 0;

        ucnv_fromUnicode(args->converter,
                         &newTarget,
                         newTargetLimit,
                         source,
                         sourceLimit,
                         NULL,
                         FALSE,
                         &err2);

        /* We can go ahead and overwrite the  length here. We know just how
        to recalculate it. */

        args->converter->charErrorBufferLength = (int8_t)(
            newTarget - (char*)args->converter->charErrorBuffer);

        if((newTarget >= newTargetLimit) || (err2 == U_BUFFER_OVERFLOW_ERROR))
        {
            /* now we're REALLY in trouble.
            Internal program error - callback shouldn't have written this much
            data!
            */
            *err = U_INTERNAL_PROGRAM_ERROR;
            return;
        }
        /*else {*/
            /* sub errs could be invalid/truncated/illegal chars or w/e.
            These might want to be passed on up.. But the problem is, we already
            need to pass U_BUFFER_OVERFLOW_ERROR. That has to override these
            other errs.. */

            /*
            if(U_FAILURE(err2))
            ??
            */
        /*}*/
    }
}