示例#1
0
static int icu_normalizer_isnormalized(lua_State *L) {
	UNormalizationMode mode;
	UErrorCode status;
	
	icu4lua_checkustring(L,1,NORMALIZER_UV_USTRING_META);
	mode = modes[luaL_checkoption(L,2,DEFAULT_MODE_OPTION,modeNames)];

	status = U_ZERO_ERROR;
	lua_pushboolean(L,
		unorm_isNormalized(icu4lua_trustustring(L,1), (int32_t)icu4lua_ustrlen(L,1), mode, &status));
	if (U_FAILURE(status)) {
		lua_pushstring(L, u_errorName(status));
		return lua_error(L);
	}
	return 1;
}
示例#2
0
U_CAPI int32_t U_EXPORT2
uspoof_getSkeleton(const USpoofChecker *sc,
                   uint32_t type,
                   const UChar *s,  int32_t length,
                   UChar *dest, int32_t destCapacity,
                   UErrorCode *status) {

    // TODO:  this function could be sped up a bit
    //        Skip the input normalization when not needed, work from callers data.
    //        Put the initial skeleton straight into the caller's destination buffer.
    //        It probably won't need normalization.
    //        But these would make the structure more complicated.  

    const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
    if (U_FAILURE(*status)) {
        return 0;
    }
    if (length<-1 || destCapacity<0 || (destCapacity==0 && dest!=NULL) ||
        (type & ~(USPOOF_SINGLE_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE)) != 0) {
        *status = U_ILLEGAL_ARGUMENT_ERROR;
        return 0;
    }

   int32_t tableMask = 0;
   switch (type) {
      case 0:
        tableMask = USPOOF_ML_TABLE_FLAG;
        break;
      case USPOOF_SINGLE_SCRIPT_CONFUSABLE:
        tableMask = USPOOF_SL_TABLE_FLAG;
        break;
      case USPOOF_ANY_CASE:
        tableMask = USPOOF_MA_TABLE_FLAG;
        break;
      case USPOOF_SINGLE_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE:
        tableMask = USPOOF_SA_TABLE_FLAG;
        break;
      default:
        *status = U_ILLEGAL_ARGUMENT_ERROR;
        return 0;
    }

    // NFD transform of the user supplied input
    
    UChar nfdStackBuf[USPOOF_STACK_BUFFER_SIZE];
    UChar *nfdInput = nfdStackBuf;
    int32_t normalizedLen = unorm_normalize(
        s, length, UNORM_NFD, 0, nfdInput, USPOOF_STACK_BUFFER_SIZE, status);
    if (*status == U_BUFFER_OVERFLOW_ERROR) {
        nfdInput = (UChar *)uprv_malloc((normalizedLen+1)*sizeof(UChar));
        if (nfdInput == NULL) {
            *status = U_MEMORY_ALLOCATION_ERROR;
            return 0;
        }
        *status = U_ZERO_ERROR;
        normalizedLen = unorm_normalize(s, length, UNORM_NFD, 0,
                                        nfdInput, normalizedLen+1, status);
    }
    if (U_FAILURE(*status)) {
        if (nfdInput != nfdStackBuf) {
            uprv_free(nfdInput);
        }
        return 0;
    }

    // buffer to hold the Unicode defined skeleton mappings for a single code point
    UChar buf[USPOOF_MAX_SKELETON_EXPANSION];

    // Apply the skeleton mapping to the NFD normalized input string
    // Accumulate the skeleton, possibly unnormalized, in a UnicodeString.
    int32_t inputIndex = 0;
    UnicodeString skelStr;
    while (inputIndex < normalizedLen) {
        UChar32 c;
        U16_NEXT(nfdInput, inputIndex, normalizedLen, c);
        int32_t replaceLen = This->confusableLookup(c, tableMask, buf);
        skelStr.append(buf, replaceLen);
    }

    if (nfdInput != nfdStackBuf) {
        uprv_free(nfdInput);
    }
    
    const UChar *result = skelStr.getBuffer();
    int32_t  resultLen  = skelStr.length();
    UChar   *normedResult = NULL;

    // Check the skeleton for NFD, normalize it if needed.
    // Unnormalized results should be very rare.
    if (!unorm_isNormalized(result, resultLen, UNORM_NFD, status)) {
        normalizedLen = unorm_normalize(result, resultLen, UNORM_NFD, 0, NULL, 0, status);
        normedResult = static_cast<UChar *>(uprv_malloc((normalizedLen+1)*sizeof(UChar)));
        if (normedResult == NULL) {
            *status = U_MEMORY_ALLOCATION_ERROR;
            return 0;
        }
        *status = U_ZERO_ERROR;
        unorm_normalize(result, resultLen, UNORM_NFD, 0, normedResult, normalizedLen+1, status);
        result = normedResult;
        resultLen = normalizedLen;
    }

    // Copy the skeleton to the caller's buffer
    if (U_SUCCESS(*status)) {
        if (destCapacity == 0 || resultLen > destCapacity) {
            *status = resultLen>destCapacity ? U_BUFFER_OVERFLOW_ERROR : U_STRING_NOT_TERMINATED_WARNING;
        } else {
            u_memcpy(dest, result, resultLen);
            if (destCapacity > resultLen) {
                dest[resultLen] = 0;
            } else {
                *status = U_STRING_NOT_TERMINATED_WARNING;
            }
        }
     }       
     uprv_free(normedResult);
     return resultLen;
}
示例#3
0
文件: text_icu.c 项目: bos/text-icu
UBool __hs_unorm_isNormalized(const UChar *src, int32_t srcLength,
			      UNormalizationMode mode,
			      UErrorCode *pErrorCode)
{
    return unorm_isNormalized(src, srcLength, mode, pErrorCode);
}