Exemple #1
0
bool Tcl_UniCharIsAlnum(Tcl_UniChar c)
{
    if (c < 0x7f)
	return isalnum(c);
    CFCharacterSetRef set = CFCharacterSetGetPredefined(kCFCharacterSetAlphaNumeric);
    return CFCharacterSetIsCharacterMember(set, c);
}
Exemple #2
0
/*--
Function:
  PAL_iswblank

Returns TRUE if c is a Win32 "blank" character.
--*/
int 
__cdecl 
PAL_iswblank(wchar_16 c)
{
    int ret;
    static CFCharacterSetRef sSpaceAndNewlineSet;
    
    if (sSpaceAndNewlineSet == NULL)
    {
        sSpaceAndNewlineSet = CFCharacterSetGetPredefined(
                                            kCFCharacterSetWhitespaceAndNewline);
    }
    switch (c)
    {
        case 0x0085:
        case 0x1680:
        case 0x202f:
        case 0xfeff:
            // These are blank characters on Windows, but are not part
            // of the SpaceAndNewline character set in Core Foundation.
            ret = TRUE;
            break;
        case 0x2028:
        case 0x2029:
            // These are not blank characters on Windows, but are part
            // of the SpaceAndNewline character set in Core Foundation.
            ret = FALSE;
            break;
        default:
            ret = CFCharacterSetIsCharacterMember(sSpaceAndNewlineSet, c);
            break;
    }
    return ret;
}
Exemple #3
0
bool Tcl_UniCharIsDigit(Tcl_UniChar c)
{
    if (c < 0x7f)
	return isdigit(c);
    CFCharacterSetRef set = CFCharacterSetGetPredefined(kCFCharacterSetDecimalDigit);
    return CFCharacterSetIsCharacterMember(set, c);
}
Exemple #4
0
bool Tcl_UniCharIsAlpha(Tcl_UniChar c)
{
    if (c < 0x7f)
	return isalpha(c);
    CFCharacterSetRef set = CFCharacterSetGetPredefined(kCFCharacterSetLetter);
    return CFCharacterSetIsCharacterMember(set, c);
}
Exemple #5
0
bool Tcl_UniCharIsSpace(Tcl_UniChar c)
{
    if (c < 0x7f)
	return isspace(c);
    CFCharacterSetRef set = CFCharacterSetGetPredefined(kCFCharacterSetWhitespaceAndNewline);
    return CFCharacterSetIsCharacterMember(set, c);
}
Exemple #6
0
/*++
Function:
  PAL_iswlower

See MSDN

--*/
int
__cdecl
PAL_iswlower( wchar_16 c )
{
    BOOL bRetVal = FALSE;
#if HAVE_CFSTRING
    static CFCharacterSetRef sLowercaseSet;
    
    if (sLowercaseSet == NULL)
    {
        sLowercaseSet = CFCharacterSetGetPredefined(
                                        kCFCharacterSetLowercaseLetter);
    }
    ENTRY( "iswlower (c=%d)\n", c );
    bRetVal = CFCharacterSetIsCharacterMember(sLowercaseSet, c);
#else   /* HAVE_CFSTRING */
    UnicodeDataRec dataRec;

    ENTRY("iswlower (c=%d)\n", c);
    
    if (!GetUnicodeData(c, &dataRec))
    {
        TRACE( "Unable to retrive unicode data for the character %c.\n", c );
        goto exit;
    }
    
    if (dataRec.C1_TYPE_FLAGS & C1_LOWER)
    {
        bRetVal = TRUE;
    }
exit:
#endif  /* HAVE_CFSTRING */
    LOGEXIT( "iswlower returns %s.\n", bRetVal == TRUE ? "TRUE" : "FALSE" );
    return bRetVal;
}
Exemple #7
0
/*--
Function:
  PAL_iswpunct

Returns TRUE if c is a punctuation character.
--*/
int 
__cdecl 
PAL_iswpunct(wchar_16 c)
{
    int ret;
    static CFCharacterSetRef sPunctuationSet = NULL;
    static CFCharacterSetRef sSymbolSet = NULL;

    if (sPunctuationSet == NULL)
    {
        sPunctuationSet = CFCharacterSetGetPredefined(kCFCharacterSetPunctuation);
    }
    if (sSymbolSet == NULL)
    {
        sSymbolSet = CFCharacterSetGetPredefined(kCFCharacterSetSymbol);
    }
    ret = CFCharacterSetIsCharacterMember(sPunctuationSet, c) ||
          CFCharacterSetIsCharacterMember(sSymbolSet, c);
    return ret;
}
static void
CFStringTruncateToUTF8Length(CFMutableStringRef str, ssize_t utf8LengthLimit)
    // Truncates a CFString such that it's UTF-8 representation will have 
    // utf8LengthLimit or less characters (not including the terminating 
    // null character).  Handles UTF-16 surrogates and trims whitespace 
    // from the end of the resulting string.
{
    CFIndex             shortLen;
    CFIndex             convertedLen;
    CFIndex             originalLen;
    CFIndex             utf8Length;
    CFCharacterSetRef   whiteCharSet;
    UniChar             thisChar;
    CFIndex             trailingCharsToDelete;
    
    // Keep converting successively smaller strings until the UTF-8 string is suitably 
    // short.  Note that utf8LengthLimit must be non-negative, so this loop will 
    // always terminate before we run off the front of the string.
    
    originalLen = CFStringGetLength(str);
    shortLen = originalLen;
    do {
        // Because the buffer parameter is NULL, CFStringGetBytes returns the size of 
        // buffer required for the range of characters.  This doesn't include the 
        // trailing null byte traditionally associated with UTF-8 C strings, which 
        // is cool because that's what our caller is expecting.
        
        convertedLen = CFStringGetBytes(str, CFRangeMake(0, shortLen), kCFStringEncodingUTF8, 0, false, NULL, 0, &utf8Length);
        assert( (convertedLen == shortLen) || (convertedLen == (shortLen - 1)) );
        shortLen = convertedLen;
        
        if (utf8Length <= utf8LengthLimit) {
            break;
        }
        shortLen -= 1;
    } while (true);
    
    whiteCharSet = CFCharacterSetGetPredefined(kCFCharacterSetWhitespaceAndNewline);
    assert(whiteCharSet != NULL);
    
    do {
        if ( shortLen == 0 ) {
            break;
        }
        thisChar = CFStringGetCharacterAtIndex(str, shortLen - 1);
        if ( ! CFCharacterSetIsCharacterMember(whiteCharSet, thisChar) ) {
            break;
        }
        shortLen -= 1;
    } while (true);    
    
    trailingCharsToDelete = originalLen - shortLen;
    CFStringDelete(str, CFRangeMake(originalLen - trailingCharsToDelete, trailingCharsToDelete));
}
/**
 * Given two strings, compute a score representing whether the internal
 * boundary falls on logical boundaries.
 * Scores range from 5 (best) to 0 (worst).
 * @param one First CFStringRef.
 * @param two Second CFStringRef.
 * @return The score.
 */
CFIndex diff_cleanupSemanticScore(CFStringRef one, CFStringRef two) {
  static Boolean firstRun = true;
  static CFCharacterSetRef alphaNumericSet = NULL;
  static CFCharacterSetRef whiteSpaceSet = NULL;
  static CFCharacterSetRef controlSet = NULL;
  static regex_t blankLineEndRegEx;
  static regex_t blankLineStartRegEx;

  if (firstRun) {
    alphaNumericSet = CFCharacterSetGetPredefined(kCFCharacterSetAlphaNumeric);
    whiteSpaceSet = CFCharacterSetGetPredefined(kCFCharacterSetWhitespaceAndNewline);
    controlSet = CFCharacterSetGetPredefined(kCFCharacterSetControl);

    int status;
    status = regcomp(&blankLineEndRegEx, "\n\r?\n$", REG_EXTENDED | REG_NOSUB);
    check(status == 0);
    status = regcomp(&blankLineStartRegEx, "^\r?\n\r?\n", REG_EXTENDED | REG_NOSUB);
    check(status == 0);

    firstRun = false;
  }


  if (CFStringGetLength(one) == 0 || CFStringGetLength(two) == 0) {
    // Edges are the best.
    return 5;
  }

  // Each port of this function behaves slightly differently due to
  // subtle differences in each language's definition of things like
  // 'whitespace'.  Since this function's purpose is largely cosmetic,
  // the choice has been made to use each language's native features
  // rather than force total conformity.
  CFIndex score = 0;
  UniChar lastCharOfStringOne = CFStringGetCharacterAtIndex(one, (CFStringGetLength(one) - 1));
  UniChar firstCharOfStringTwo = CFStringGetCharacterAtIndex(two, 0);
  // One point for non-alphanumeric.
  if (!CFCharacterSetIsCharacterMember(alphaNumericSet, lastCharOfStringOne)
      || !CFCharacterSetIsCharacterMember(alphaNumericSet, firstCharOfStringTwo)) {
    score++;
    // Two points for whitespace.
    if (CFCharacterSetIsCharacterMember(whiteSpaceSet, lastCharOfStringOne)
        || CFCharacterSetIsCharacterMember(whiteSpaceSet, firstCharOfStringTwo)) {
      score++;
      // Three points for line breaks.
      if (CFCharacterSetIsCharacterMember(controlSet, lastCharOfStringOne)
          || CFCharacterSetIsCharacterMember(controlSet, firstCharOfStringTwo)) {
        score++;
        // Four points for blank lines.
        if (diff_regExMatch(one, &blankLineEndRegEx)
            || diff_regExMatch(two, &blankLineStartRegEx)) {
          score++;
        }
      }
    }
  }
  return score;
}
Exemple #10
0
/*--
Function:
  PAL_iswcntrl

Returns TRUE if c is a control character.
--*/
int 
__cdecl 
PAL_iswcntrl(wchar_16 c)
{
    int ret;
    static CFCharacterSetRef sControlSet;
    
    if (sControlSet == NULL)
    {
        sControlSet = CFCharacterSetGetPredefined(kCFCharacterSetControl);
    }
    ret = CFCharacterSetIsCharacterMember(sControlSet, c);
    return ret;
}
Exemple #11
0
/*--
Function:
  PAL_iswspace

See MSDN doc
--*/
int 
__cdecl 
PAL_iswspace(wchar_16 c)
{
    int ret;
#if HAVE_CFSTRING
    static CFCharacterSetRef sSpaceSet;
    
    if (sSpaceSet == NULL)
    {
        sSpaceSet = CFCharacterSetGetPredefined(kCFCharacterSetWhitespace);
    }
    ENTRY("PAL_iswspace (c=%C)\n", c);
    if (c >= 0x2000 && c <= 0x200b)
    {
        // U+2000 through U+200b are space characters according to
        // Core Foundation, but not on Windows.
        ret = FALSE;
    }
    else if ((c >= 0x000a && c <= 0x000d) || c == 0x0085 || c == 0x1680)
    {
        // U+000A through U+000D, U+0085, and U+1680 are space
        // characters according to Windows, but not with Core Foundation.
        ret = TRUE;
    }
    else
    {
        ret = CFCharacterSetIsCharacterMember(sSpaceSet, c);
    }
#else   // HAVE_CFSTRING
    WORD Info;

    ENTRY("PAL_iswspace (c=%C)\n", c);
    
    ret = GetStringTypeExW(LOCALE_USER_DEFAULT, CT_CTYPE1, (WCHAR*)&c, 1, &Info);

    if (ret == FALSE)
    {
        ASSERT("GetStringTypeExW failed to get information for %#X!\n", c);
        return -1;
    }

    ret = (Info & C1_SPACE);
#endif  // HAVE_CFSTRING
    LOGEXIT("PAL_iswspace returns int %d\n", ret);
    return ret;
}
Exemple #12
0
/*++
Function:

    iswdigit
    
See MSDN for more details.
--*/
int
__cdecl
PAL_iswdigit( wchar_16 c )
{
    UINT nRetVal = 0;
#if HAVE_COREFOUNDATION
    static CFCharacterSetRef sDigitSet;
    
    if (sDigitSet == NULL)
    {
        sDigitSet = CFCharacterSetGetPredefined(
                                        kCFCharacterSetDecimalDigit);
    }
    PERF_ENTRY(iswdigit);
    ENTRY("PAL_iswdigit (c=%d)\n", c);
    nRetVal = CFCharacterSetIsCharacterMember(sDigitSet, c);
#else   /* HAVE_COREFOUNDATION */
    UnicodeDataRec dataRec;

    PERF_ENTRY(iswdigit);
    ENTRY("PAL_iswdigit (c=%d)\n", c);
    
    if (GetUnicodeData(c, &dataRec))
    {
        if (dataRec.C1_TYPE_FLAGS & C1_DIGIT)
        {
            nRetVal = 1; 
        }
        else
        {
            nRetVal = 0;
        }
    }
    else
    {
        TRACE( "No corresonding unicode record for character %d.\n", c );
    }
#endif  /* HAVE_COREFOUNDATION */
    LOGEXIT("PAL_iswdigit returning %d\n", nRetVal);
    PERF_EXIT(iswdigit);
    return nRetVal;
}
Exemple #13
0
/*++
Function:
  PAL_iswupper

See MSDN

--*/
int
__cdecl
PAL_iswupper( wchar_16 c )
{
    BOOL bRetVal = FALSE;
#if HAVE_COREFOUNDATION
    static CFCharacterSetRef sUppercaseSet;
    
    if (sUppercaseSet == NULL)
    {
        sUppercaseSet = CFCharacterSetGetPredefined(
                                        kCFCharacterSetUppercaseLetter);
    }
    PERF_ENTRY(iswupper);
    ENTRY( "iswupper (c=%d)\n", c );
    bRetVal = CFCharacterSetIsCharacterMember(sUppercaseSet, c);
#else   /* HAVE_COREFOUNDATION */
    UnicodeDataRec dataRec;

    PERF_ENTRY(iswupper);
    ENTRY( "iswupper (c=%d)\n", c );

    if (!GetUnicodeData(c, &dataRec))
    {
        TRACE( "Unable to retrieve unicode data for the character %c.\n", c );
        goto exit;
    }
    
    if (dataRec.C1_TYPE_FLAGS & C1_UPPER)
    {
        bRetVal = TRUE;
    }
exit:
#endif  /* HAVE_COREFOUNDATION */
    LOGEXIT( "iswupper returns %s.\n", bRetVal == TRUE ? "TRUE" : "FALSE" );
    PERF_EXIT(iswupper);
    return bRetVal;
}
/* There has got to be an easier way to do this.  For now we based this code
   on CFNetwork/Connection/URLResponse.cpp. */
static CFStringRef copyParseMaxAge(CFStringRef cacheControlHeader) {
    /* The format of the cache control header is a comma-separated list, but
       each list element could be a key-value pair, with the value quoted and
       possibly containing a comma. */
    CFStringInlineBuffer inlineBuf;
    CFRange componentRange;
    CFIndex length = CFStringGetLength(cacheControlHeader);
    bool done = false;
    CFCharacterSetRef whitespaceSet = CFCharacterSetGetPredefined(kCFCharacterSetWhitespace);
    CFStringRef maxAgeValue = NULL;

    CFStringInitInlineBuffer(cacheControlHeader, &inlineBuf, CFRangeMake(0, length));
    componentRange.location = 0;

    while (!done) {
        bool inQuotes = false;
        bool foundComponentStart = false;
        CFIndex charIndex = componentRange.location;
        CFIndex componentEnd = -1;
        CFRange maxAgeRg;
        componentRange.length = 0;

        while (charIndex < length) {
            UniChar ch = CFStringGetCharacterFromInlineBuffer(&inlineBuf, charIndex);
            if (!inQuotes && ch == ',') {
                componentRange.length = charIndex - componentRange.location;
                break;
            }
            if (!CFCharacterSetIsCharacterMember(whitespaceSet, ch)) {
                if (!foundComponentStart) {
                    foundComponentStart = true;
                    componentRange.location = charIndex;
                } else {
                    componentEnd = charIndex;
                }
                if (ch == '\"') {
                    inQuotes = (inQuotes == false);
                }
            }
            charIndex ++;
        }

        if (componentEnd == -1) {
            componentRange.length = charIndex - componentRange.location;
        } else {
            componentRange.length = componentEnd - componentRange.location + 1;
        }

        if (charIndex == length) {
            /* Fell off the end; this is the last component. */
            done = true;
        }

        /* componentRange should now contain the range of the current
           component; trimmed of any whitespace. */

        /* We want to look for a max-age value. */
        if (!maxAgeValue && CFStringFindWithOptions(cacheControlHeader, CFSTR("max-age"), componentRange, kCFCompareCaseInsensitive | kCFCompareAnchored, &maxAgeRg)) {
            CFIndex equalIdx;
            CFIndex maxCompRg = componentRange.location + componentRange.length;
            for (equalIdx = maxAgeRg.location + maxAgeRg.length; equalIdx < maxCompRg; equalIdx ++) {
                UniChar equalCh = CFStringGetCharacterFromInlineBuffer(&inlineBuf, equalIdx);
                if (equalCh == '=') {
                    // Parse out max-age value
                    equalIdx ++;
                    while (equalIdx < maxCompRg && CFCharacterSetIsCharacterMember(whitespaceSet, CFStringGetCharacterAtIndex(cacheControlHeader, equalIdx))) {
                        equalIdx ++;
                    }
                    if (equalIdx < maxCompRg) {
                        maxAgeValue = CFStringCreateWithSubstring(kCFAllocatorDefault, cacheControlHeader, CFRangeMake(equalIdx, maxCompRg-equalIdx));
                    }
                } else if (!CFCharacterSetIsCharacterMember(whitespaceSet, equalCh)) {
                    // Not a valid max-age header; break out doing nothing
                    break;
                }
            }
        }

        if (!done && maxAgeValue) {
            done = true;
        }
        if (!done) {
            /* Advance to the next component; + 1 to get past the comma. */
            componentRange.location = charIndex + 1;
        }
    }

    return maxAgeValue;
}
/**
 * Given two strings, compute a score representing whether the internal
 * boundary falls on logical boundaries.
 * Scores range from 6 (best) to 0 (worst).
 * @param one First CFStringRef.
 * @param two Second CFStringRef.
 * @return The score.
 */
CFIndex diff_cleanupSemanticScore(CFStringRef one, CFStringRef two) {
  static Boolean firstRun = true;
  static CFCharacterSetRef alphaNumericSet = NULL;
  static CFCharacterSetRef whiteSpaceSet = NULL;
  static CFCharacterSetRef controlSet = NULL;
  static regex_t blankLineEndRegEx;
  static regex_t blankLineStartRegEx;

  if (firstRun) {
    alphaNumericSet = CFCharacterSetGetPredefined(kCFCharacterSetAlphaNumeric);
    whiteSpaceSet = CFCharacterSetGetPredefined(kCFCharacterSetWhitespaceAndNewline);
    controlSet = CFCharacterSetGetPredefined(kCFCharacterSetControl);

    // Define some regex patterns for matching boundaries.
    int status;
    status = regcomp(&blankLineEndRegEx, "\n\r?\n$", REG_EXTENDED | REG_NOSUB);
    check(status == 0);
    status = regcomp(&blankLineStartRegEx, "^\r?\n\r?\n", REG_EXTENDED | REG_NOSUB);
    check(status == 0);

    firstRun = false;
  }

  if (CFStringGetLength(one) == 0 || CFStringGetLength(two) == 0) {
    // Edges are the best.
    return 6;
  }

  // Each port of this function behaves slightly differently due to
  // subtle differences in each language's definition of things like
  // 'whitespace'.  Since this function's purpose is largely cosmetic,
  // the choice has been made to use each language's native features
  // rather than force total conformity.
  UniChar char1 =
  CFStringGetCharacterAtIndex(one, (CFStringGetLength(one) - 1));
  UniChar char2 =
  CFStringGetCharacterAtIndex(two, 0);
  Boolean nonAlphaNumeric1 =
  !CFCharacterSetIsCharacterMember(alphaNumericSet, char1);
  Boolean nonAlphaNumeric2 =
  !CFCharacterSetIsCharacterMember(alphaNumericSet, char2);
  Boolean whitespace1 =
  nonAlphaNumeric1 && CFCharacterSetIsCharacterMember(whiteSpaceSet, char1);
  Boolean whitespace2 =
  nonAlphaNumeric2 && CFCharacterSetIsCharacterMember(whiteSpaceSet, char2);
  Boolean lineBreak1 =
  whitespace1 && CFCharacterSetIsCharacterMember(controlSet, char1);
  Boolean lineBreak2 =
  whitespace2 && CFCharacterSetIsCharacterMember(controlSet, char2);
  Boolean blankLine1 =
  lineBreak1 && diff_regExMatch(one, &blankLineEndRegEx);
  Boolean blankLine2 =
  lineBreak2 && diff_regExMatch(two, &blankLineStartRegEx);
  
  if (blankLine1 || blankLine2) {
    // Five points for blank lines.
    return 5;
  } else if (lineBreak1 || lineBreak2) {
    // Four points for line breaks.
    return 4;
  } else if (nonAlphaNumeric1 && !whitespace1 && whitespace2) {
    // Three points for end of sentences.
    return 3;
  } else if (whitespace1 || whitespace2) {
    // Two points for whitespace.
    return 2;
  } else if (nonAlphaNumeric1 || nonAlphaNumeric2) {
    // One point for non-alphanumeric.
    return 1;
  }
  return 0;
}