// Generates non-fatal failures and returns false if regex is invalid; // otherwise returns true. bool ValidateRegex(const char* regex) { if (regex == NULL) { // TODO([email protected]): fix the source file location in the // assertion failures to match where the regex is used in user // code. ADD_FAILURE() << "NULL is not a valid simple regular expression."; return false; } bool is_valid = true; // True iff ?, *, or + can follow the previous atom. bool prev_repeatable = false; for (int i = 0; regex[i]; i++) { if (regex[i] == '\\') { // An escape sequence i++; if (regex[i] == '\0') { ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1) << "'\\' cannot appear at the end."; return false; } if (!IsValidEscape(regex[i])) { ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1) << "invalid escape sequence \"\\" << regex[i] << "\"."; is_valid = false; } prev_repeatable = true; } else { // Not an escape sequence. const char ch = regex[i]; if (ch == '^' && i > 0) { ADD_FAILURE() << FormatRegexSyntaxError(regex, i) << "'^' can only appear at the beginning."; is_valid = false; } else if (ch == '$' && regex[i + 1] != '\0') { ADD_FAILURE() << FormatRegexSyntaxError(regex, i) << "'$' can only appear at the end."; is_valid = false; } else if (IsInSet(ch, "()[]{}|")) { ADD_FAILURE() << FormatRegexSyntaxError(regex, i) << "'" << ch << "' is unsupported."; is_valid = false; } else if (IsRepeat(ch) && !prev_repeatable) { ADD_FAILURE() << FormatRegexSyntaxError(regex, i) << "'" << ch << "' can only follow a repeatable token."; is_valid = false; } prev_repeatable = !IsInSet(ch, "^$?*+"); } } return is_valid; }
bool CISpellAffix::AddToCharset ( char * szRangeL, char * szRangeU ) { if ( !szRangeL || !szRangeU ) return false; int iLengthL = strlen ( szRangeL ); int iLengthU = strlen ( szRangeU ); bool bSetL = ( iLengthL>0 && szRangeL[0]=='[' && szRangeL[iLengthL-1]==']' ); bool bSetR = ( iLengthU>0 && szRangeU[0]=='[' && szRangeU[iLengthU-1]==']' ); if ( bSetL!=bSetR ) return false; if ( bSetL ) { szRangeL [iLengthL - 1] = '\0'; szRangeL = szRangeL + 1; szRangeU [iLengthU - 1] = '\0'; szRangeU = szRangeU + 1; BYTE uMinL, uMaxL; if ( !GetSetMinMax ( szRangeL, uMinL, uMaxL ) ) return false; BYTE uMinU, uMaxU; if ( !GetSetMinMax ( szRangeU, uMinU, uMaxU ) ) return false; if ( ( uMaxU - uMinU )!=( uMaxL - uMinL ) ) return false; for ( BYTE i=0; i<=( uMaxL - uMinL ); ++i ) if ( IsInSet ( uMinL + i, szRangeL ) && IsInSet ( uMinU + i, szRangeU ) ) AddCharPair ( uMinL + i, uMinU + i ); } else { if ( iLengthL > 4 || iLengthU > 4 ) return false; const char * szL = szRangeL; const char * szU = szRangeU; AddCharPair ( GetWordchar(szL), GetWordchar(szU) ); } m_bUseDictConversion = true; return true; }
bool CISpellAffixRule::CheckPrefix ( const CSphString & sWord ) const { int iCondI = 0; for ( int i = 0; iCondI < m_iCondLen && i < m_iWordLen; ++i ) { if ( m_sCondition.cstr()[iCondI]!='[' ) { if ( m_sCondition.cstr()[iCondI]!=sWord.cstr()[i] ) return false; ++iCondI; } else { int iRangeEnd = -1; for ( int j=iCondI; j<m_iCondLen && iRangeEnd==-1; ++j ) if ( m_sCondition.cstr()[j]==']' ) iRangeEnd = j; if ( iRangeEnd==-1 ) return false; else { if ( !IsInSet ( sWord.cstr () [i], m_sCondition.SubString ( iCondI + 1, iRangeEnd - iCondI - 1 ).cstr () ) ) return false; } } } return true; }
bool CISpellAffixRule::CheckSuffix ( const CSphString & sWord ) const { int iCondI = m_iCondLen-1; for ( int i=m_iWordLen-1; iCondI>=0 && i>=0; --i ) { if ( m_sCondition.cstr()[iCondI]!=']' ) { if ( m_sCondition.cstr()[iCondI]!=sWord.cstr()[i] ) return false; --iCondI; } else { int iRangeStart = -1; for ( int j=iCondI; j>=0 && iRangeStart==-1; --j ) if ( m_sCondition.cstr()[j]=='[' ) iRangeStart = j; if ( iRangeStart==-1 ) return false; else { if ( !IsInSet ( sWord.cstr () [i], m_sCondition.SubString ( iRangeStart + 1, iCondI - iRangeStart - 1 ).cstr () ) ) return false; iCondI = iRangeStart - 1; } } } return true; }
// Search for the collection which contains a specific type static _cmsParametricCurvesCollection *GetParametricCurveByType(int Type, int* index) { _cmsParametricCurvesCollection* c; int Position; for (c = ParametricCurves; c != NULL; c = c ->Next) { Position = IsInSet(Type, c); if (Position != -1) { if (index != NULL) *index = Position; return c; } } return NULL; }
// Returns true iff "\\c" is a supported escape sequence. bool IsValidEscape(char c) { return (IsAsciiPunct(c) || IsInSet(c, "dDfnrsStvwW")); }
bool IsAsciiWhiteSpace(char ch) { return IsInSet(ch, " \f\n\r\t\v"); }
bool IsRepeat(char ch) { return IsInSet(ch, "?*+"); }
bool IsAsciiPunct(char ch) { return IsInSet(ch, "^-!\"#$%&'()*+,./:;<=>?@[\\]_`{|}~"); }