void SyntaxHighlighter::Highlight(const std::wstring& s, SyntaxHighlighter::CallbackType highlight)
{
    if (s.empty())
        return;

    const int length = int(s.length());

    for (auto i = s.begin(); i != s.end(); ++i)
    {
        if (!u_isblank(*i))
        {
            int wlen = int(i - s.begin());
            if (wlen)
                highlight(0, wlen, LeadingWhitespace);
            break;
        }
    }

    for (auto i = s.rbegin(); i != s.rend(); ++i)
    {
        if (!u_isblank(*i))
        {
            int wlen = int(i - s.rbegin());
            if (wlen)
                highlight(length - wlen, length, LeadingWhitespace);
            break;
        }
    }

    for (auto i = s.begin(); i != s.end(); ++i)
    {
        if (*i == '\\')
        {
            int pos = int(i - s.begin());
            if (++i == s.end())
                break;
            // TODO: highlight full syntax, incl. octals: http://en.cppreference.com/w/cpp/language/escape
            switch (*i)
            {
                case '0':
                case 'n':
                case 'r':
                case 't':
                case '"':
                case '\\':
                    highlight(pos, pos + 2, Escape);
                    break;
                default:
                    break;
            }
        }
    }
}
Beispiel #2
0
bool icu_regex_traits::isctype(char_type c, char_class_type f) const
{
   // check for standard catagories first:
   char_class_type m = char_class_type(1u << u_charType(c));
   if((m & f) != 0) 
      return true;
   // now check for special cases:
   if(((f & mask_blank) != 0) && u_isblank(c))
      return true;
   if(((f & mask_space) != 0) && u_isspace(c))
      return true;
   if(((f & mask_xdigit) != 0) && (u_digit(c, 16) >= 0))
      return true;
   if(((f & mask_unicode) != 0) && (c >= 0x100))
      return true;
   if(((f & mask_underscore) != 0) && (c == '_'))
      return true;
   if(((f & mask_any) != 0) && (c <= 0x10FFFF))
      return true;
   if(((f & mask_ascii) != 0) && (c <= 0x7F))
      return true;
   if(((f & mask_vertical) != 0) && (::boost::re_detail::is_separator(c) || (c == static_cast<char_type>('\v')) || (m == U_GC_ZL_MASK) || (m == U_GC_ZP_MASK)))
      return true;
   if(((f & mask_horizontal) != 0) && !::boost::re_detail::is_separator(c) && u_isspace(c) && (c != static_cast<char_type>('\v')))
      return true;
   return false;
}
Beispiel #3
0
/** \brief Returns \c 1 if \p ln is a blank string i.e. a string formed by
           ONLY spaces and/or tabs characters.
 */
__INLINE__ int u_isblank_str(const char *ln)
{
    for(; *ln; ++ln)
        if(!u_isblank(*ln))
            return 0;
    return 1;
}
Beispiel #4
0
/** \brief Removes leading and trailing blanks (spaces and tabs) from \p s
 */
void u_trim(char *s)
{
    char *p;

    if(!s)
        return;

    /* trim trailing blanks */
    p = s + strlen(s) -1;
    while(s < p && u_isblank(*p))
        --p;
    p[1] = 0;

    /* trim leading blanks */
    p = s;
    while(*p && u_isblank(*p))
        ++p;

    if(p > s)
        memmove(s, p, 1 + strlen(p));
}
Beispiel #5
0
static int
u_iscclass(PARROT_INTERP, UINTVAL codepoint, INTVAL flags)
{
    ASSERT_ARGS(u_iscclass)
#if PARROT_HAS_ICU
    UNUSED(interp);
            /* XXX which one
               return u_charDigitValue(codepoint);
               */
    if ((flags & enum_cclass_uppercase)    && u_isupper(codepoint))  return 1;
    if ((flags & enum_cclass_lowercase)    && u_islower(codepoint))  return 1;
    if ((flags & enum_cclass_alphabetic)   && u_isalpha(codepoint))  return 1;
    if ((flags & enum_cclass_numeric)      && u_isdigit(codepoint))  return 1;
    if ((flags & enum_cclass_hexadecimal)  && u_isxdigit(codepoint)) return 1;
    if ((flags & enum_cclass_whitespace)   && u_isspace(codepoint))  return 1;
    if ((flags & enum_cclass_printing)     && u_isprint(codepoint))  return 1;
    if ((flags & enum_cclass_graphical)    && u_isgraph(codepoint))  return 1;
    if ((flags & enum_cclass_blank)        && u_isblank(codepoint))  return 1;
    if ((flags & enum_cclass_control)      && u_iscntrl(codepoint))  return 1;
    if ((flags & enum_cclass_alphanumeric) && u_isalnum(codepoint))  return 1;
    if ((flags & enum_cclass_word)         &&
        (u_isalnum(codepoint) || codepoint == '_'))                  return 1;
    if ((flags & enum_cclass_newline)      &&
        (codepoint == 0x2028 || codepoint == 0x2029 ||
         u_hasBinaryProperty(codepoint, UCHAR_LINE_BREAK)))          return 1;

    return 0;
#else
    if (codepoint < 256)
        return (Parrot_iso_8859_1_typetable[codepoint] & flags) ? 1 : 0;

    if (flags == enum_cclass_any)
        return 1;

    /* All codepoints from u+0100 to u+02af are alphabetic, so we
     * cheat on the WORD and ALPHABETIC properties to include these
     * (and incorrectly exclude all others).  This is a stopgap until
     * ICU is everywhere, or we have better non-ICU unicode support. */
    if (flags == enum_cclass_word || flags == enum_cclass_alphabetic)
        return (codepoint < 0x2b0);

    if (flags & enum_cclass_whitespace) {
        /* from http://www.unicode.org/Public/UNIDATA/PropList.txt */
        switch (codepoint) {
          case 0x1680: case 0x180e: case 0x2000: case 0x2001:
          case 0x2002: case 0x2003: case 0x2004: case 0x2005:
          case 0x2006: case 0x2007: case 0x2008: case 0x2009:
          case 0x200a: case 0x2028: case 0x2029: case 0x202f:
          case 0x205f: case 0x3000:
            return 1;
          default:
            break;
        }
    }

    if (flags & enum_cclass_numeric) {
        /* from http://www.unicode.org/Public/UNIDATA/UnicodeData.txt */
        if (codepoint >= 0x0660 && codepoint <= 0x0669) return 1;
        if (codepoint >= 0x06f0 && codepoint <= 0x06f9) return 1;
        if (codepoint >= 0x07c0 && codepoint <= 0x07c9) return 1;
        if (codepoint >= 0x0966 && codepoint <= 0x096f) return 1;
        if (codepoint >= 0x09e6 && codepoint <= 0x09ef) return 1;
        if (codepoint >= 0x0a66 && codepoint <= 0x0a6f) return 1;
        if (codepoint >= 0x0ae6 && codepoint <= 0x0aef) return 1;
        if (codepoint >= 0x0b66 && codepoint <= 0x0b6f) return 1;
        if (codepoint >= 0x0be6 && codepoint <= 0x0bef) return 1;
        if (codepoint >= 0x0c66 && codepoint <= 0x0c6f) return 1;
        if (codepoint >= 0x0ce6 && codepoint <= 0x0cef) return 1;
        if (codepoint >= 0x0d66 && codepoint <= 0x0d6f) return 1;
        if (codepoint >= 0x0e50 && codepoint <= 0x0e59) return 1;
        if (codepoint >= 0x0ed0 && codepoint <= 0x0ed9) return 1;
        if (codepoint >= 0x0f20 && codepoint <= 0x0f29) return 1;
        if (codepoint >= 0x1040 && codepoint <= 0x1049) return 1;
        if (codepoint >= 0x17e0 && codepoint <= 0x17e9) return 1;
        if (codepoint >= 0x1810 && codepoint <= 0x1819) return 1;
        if (codepoint >= 0x1946 && codepoint <= 0x194f) return 1;
        if (codepoint >= 0x19d0 && codepoint <= 0x19d9) return 1;
        if (codepoint >= 0x1b50 && codepoint <= 0x1b59) return 1;
        if (codepoint >= 0xff10 && codepoint <= 0xff19) return 1;
    }

    if (flags & enum_cclass_newline) {
        /* from http://www.unicode.org/Public/UNIDATA/extracted/DerivedLineBreak.txt
         * Line_Break=Mandatory_Break*/
        if (codepoint == 0x2028 || codepoint == 0x2029) return 1;
    }

    if (flags & ~(enum_cclass_whitespace | enum_cclass_numeric | enum_cclass_newline))
        Parrot_ex_throw_from_c_noargs(interp, EXCEPTION_LIBRARY_ERROR,
            "no ICU lib loaded");

    return 0;
#endif
}
Beispiel #6
0
static UBool isPOSIX_blank(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
    return u_isblank(c);
}
Beispiel #7
0
U_CAPI UBool U_EXPORT2
u_hasBinaryProperty(UChar32 c, UProperty which) {
    /* c is range-checked in the functions that are called from here */
    if(which<UCHAR_BINARY_START || UCHAR_BINARY_LIMIT<=which) {
        /* not a known binary property */
    } else {
        uint32_t mask=binProps[which].mask;
        int32_t column=binProps[which].column;
        if(mask!=0) {
            /* systematic, directly stored properties */
            return (u_getUnicodeProperties(c, column)&mask)!=0;
        } else {
            if(column==UPROPS_SRC_CASE) {
                return ucase_hasBinaryProperty(c, which);
            } else if(column==UPROPS_SRC_NORM) {
#if !UCONFIG_NO_NORMALIZATION
                /* normalization properties from unorm.icu */
                switch(which) {
                case UCHAR_SEGMENT_STARTER:
                    return unorm_isCanonSafeStart(c);
                default:
                    break;
                }
#endif
            } else if(column==UPROPS_SRC_NFC) {
#if !UCONFIG_NO_NORMALIZATION
                UErrorCode errorCode=U_ZERO_ERROR;
                switch(which) {
                case UCHAR_FULL_COMPOSITION_EXCLUSION: {
                    // By definition, Full_Composition_Exclusion is the same as NFC_QC=No.
                    const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
                    return U_SUCCESS(errorCode) && impl->isCompNo(impl->getNorm16(c));
                    break;
                }
                default: {
                    // UCHAR_NF[CD]_INERT properties
                    const Normalizer2 *norm2=Normalizer2Factory::getInstance(
                        (UNormalizationMode)(which-UCHAR_NFD_INERT+UNORM_NFD), errorCode);
                    return U_SUCCESS(errorCode) && norm2->isInert(c);
                }
                }
#endif
            } else if(column==UPROPS_SRC_NFKC) {
#if !UCONFIG_NO_NORMALIZATION
                // UCHAR_NFK[CD]_INERT properties
                UErrorCode errorCode=U_ZERO_ERROR;
                const Normalizer2 *norm2=Normalizer2Factory::getInstance(
                    (UNormalizationMode)(which-UCHAR_NFD_INERT+UNORM_NFD), errorCode);
                return U_SUCCESS(errorCode) && norm2->isInert(c);
#endif
            } else if(column==UPROPS_SRC_NFKC_CF) {
                // currently only for UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED
#if !UCONFIG_NO_NORMALIZATION
                UErrorCode errorCode=U_ZERO_ERROR;
                const Normalizer2Impl *kcf=Normalizer2Factory::getNFKC_CFImpl(errorCode);
                if(U_SUCCESS(errorCode)) {
                    UnicodeString src(c);
                    UnicodeString dest;
                    {
                        // The ReorderingBuffer must be in a block because its destructor
                        // needs to release dest's buffer before we look at its contents.
                        ReorderingBuffer buffer(*kcf, dest);
                        // Small destCapacity for NFKC_CF(c).
                        if(buffer.init(5, errorCode)) {
                            const UChar *srcArray=src.getBuffer();
                            kcf->compose(srcArray, srcArray+src.length(), FALSE,
                                         TRUE, buffer, errorCode);
                        }
                    }
                    return U_SUCCESS(errorCode) && dest!=src;
                }
#endif
            } else if(column==UPROPS_SRC_BIDI) {
                /* bidi/shaping properties */
                const UBiDiProps *bdp=GET_BIDI_PROPS();
                if(bdp!=NULL) {
                    switch(which) {
                    case UCHAR_BIDI_MIRRORED:
                        return ubidi_isMirrored(bdp, c);
                    case UCHAR_BIDI_CONTROL:
                        return ubidi_isBidiControl(bdp, c);
                    case UCHAR_JOIN_CONTROL:
                        return ubidi_isJoinControl(bdp, c);
                    default:
                        break;
                    }
                }
                /* else return FALSE below */
            } else if(column==UPROPS_SRC_CHAR) {
                switch(which) {
                case UCHAR_POSIX_BLANK:
                    return u_isblank(c);
                case UCHAR_POSIX_GRAPH:
                    return u_isgraphPOSIX(c);
                case UCHAR_POSIX_PRINT:
                    return u_isprintPOSIX(c);
                case UCHAR_POSIX_XDIGIT:
                    return u_isxdigit(c);
                default:
                    break;
                }
            } else if(column==UPROPS_SRC_CHAR_AND_PROPSVEC) {
                switch(which) {
                case UCHAR_POSIX_ALNUM:
                    return u_isalnumPOSIX(c);
                default:
                    break;
                }
            } else if(column==UPROPS_SRC_CASE_AND_NORM) {
#if !UCONFIG_NO_NORMALIZATION
                UChar nfdBuffer[4];
                const UChar *nfd;
                int32_t nfdLength;
                UErrorCode errorCode=U_ZERO_ERROR;
                const Normalizer2Impl *nfcImpl=Normalizer2Factory::getNFCImpl(errorCode);
                if(U_FAILURE(errorCode)) {
                    return FALSE;
                }
                switch(which) {
                case UCHAR_CHANGES_WHEN_CASEFOLDED:
                    nfd=nfcImpl->getDecomposition(c, nfdBuffer, nfdLength);
                    if(nfd!=NULL) {
                        /* c has a decomposition */
                        if(nfdLength==1) {
                            c=nfd[0];  /* single BMP code point */
                        } else if(nfdLength<=U16_MAX_LENGTH) {
                            int32_t i=0;
                            U16_NEXT(nfd, i, nfdLength, c);
                            if(i==nfdLength) {
                                /* single supplementary code point */
                            } else {
                                c=U_SENTINEL;
                            }
                        } else {
                            c=U_SENTINEL;
                        }
                    } else if(c<0) {
                        return FALSE;  /* protect against bad input */
                    }
                    errorCode=U_ZERO_ERROR;
                    if(c>=0) {
                        /* single code point */
                        const UCaseProps *csp=ucase_getSingleton(&errorCode);
                        const UChar *resultString;
                        return (UBool)(ucase_toFullFolding(csp, c, &resultString, U_FOLD_CASE_DEFAULT)>=0);
                    } else {
                        /* guess some large but stack-friendly capacity */
                        UChar dest[2*UCASE_MAX_STRING_LENGTH];
                        int32_t destLength;
                        destLength=u_strFoldCase(dest, LENGTHOF(dest), nfd, nfdLength, U_FOLD_CASE_DEFAULT, &errorCode);
                        return (UBool)(U_SUCCESS(errorCode) && 0!=u_strCompare(nfd, nfdLength, dest, destLength, FALSE));
                    }
                default:
                    break;
                }
#endif
            }
        }
    }
    return FALSE;
}
Beispiel #8
0
bool isblank(uint32_t codepoint)
{
    return u_isblank(static_cast<UChar32>(codepoint));
}