void NumericLiteralParser::checkSeparator(SourceLocation TokLoc, const char *Pos, CheckSeparatorKind IsAfterDigits) { if (IsAfterDigits == CSK_AfterDigits) { if (Pos == ThisTokBegin) return; --Pos; } else if (Pos == ThisTokEnd) return; if (isDigitSeparator(*Pos)) PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Pos - ThisTokBegin), diag::err_digit_separator_not_between_digits) << IsAfterDigits; }
/// decimal integer: [0-9] [0-9_]* /// binary integer: [01] [01_]* [bB] /// binary integer: "0b" [01_]+ /// octal integer: [0-7] [0-7_]* [qQoO] /// hex integer: [0-9] [0-9a-fA-F_]* [hH] /// hex integer: [$] [0-9] [0-9a-fA-F_]* /// hex integer: "0x" [0-9a-fA-F_]+ /// /// decimal float: [0-9]+ [.] [0-9]* ([eE] [-+]? [0-9]+)? /// decimal float: [0-9]+ [eE] [-+]? [0-9]+ /// hex float: "0x" [0-9a-fA-F_]* [.] [0-9a-fA-F]* ([pP] [-+]? [0-9]+)? /// hex float: "0x" [0-9a-fA-F_]+ [pP] [-+]? [0-9]+ /// NasmNumericParser::NasmNumericParser(llvm::StringRef str, SourceLocation loc, Preprocessor& pp) : NumericParser(str) { // This routine assumes that the range begin/end matches the regex for // integer and FP constants, and assumes that the byte at "*end" is both // valid and not part of the regex. Because of this, it doesn't have to // check for 'overscan' in various places. assert(!isalnum(*m_digits_end) && *m_digits_end != '.' && *m_digits_end != '_' && "Lexer didn't maximally munch?"); const char* s = str.begin(); bool float_ok = false; // Look for key radix flags (prefixes and suffixes) if (*s == '$') { m_radix = 16; ++s; } else if (m_digits_end[-1] == 'b' || m_digits_end[-1] == 'B') { m_radix = 2; --m_digits_end; } else if (m_digits_end[-1] == 'q' || m_digits_end[-1] == 'Q' || m_digits_end[-1] == 'o' || m_digits_end[-1] == 'O') { m_radix = 8; --m_digits_end; } else if (m_digits_end[-1] == 'h' || m_digits_end[-1] == 'H') { m_radix = 16; --m_digits_end; } else if (*s == '0' && (s[1] == 'x' || s[1] == 'X') && (isxdigit(s[2]) || s[2] == '.')) { m_radix = 16; float_ok = true; // C99-style hex floating point s += 2; } else if (*s == '0' && (s[1] == 'b' || s[1] == 'B') && (s[2] == '0' || s[2] == '1')) { m_radix = 2; s += 2; } else { // Otherwise it's a decimal or float m_radix = 10; float_ok = true; } m_digits_begin = s; switch (m_radix) { case 2: s = SkipBinaryDigits(s); break; case 8: s = SkipOctalDigits(s); break; case 10: s = SkipDigits(s); break; case 16: s = SkipHexDigits(s); break; } if (s == m_digits_end) { // Done. } else if (isxdigit(*s) && (!float_ok || (*s != 'e' && *s != 'E'))) { unsigned int err; switch (m_radix) { case 2: err = diag::err_invalid_binary_digit; break; case 8: err = diag::err_invalid_octal_digit; break; case 10: err = diag::err_invalid_decimal_digit; break; case 16: default: assert(false && "unexpected radix"); err = diag::err_invalid_decimal_digit; break; } pp.Diag(pp.AdvanceToTokenCharacter(loc, s-str.begin()), err) << std::string(s, s+1); m_had_error = true; return; } else if (*s == '.' && float_ok) { ++s; m_is_float = true; if (m_radix == 16) s = SkipHexDigits(s); else s = SkipDigits(s); } if (float_ok && ((m_radix == 10 && (*s == 'e' || *s == 'E')) || (m_radix == 16 && (*s == 'p' || *s == 'P')))) { // Float exponent const char* exponent = s; ++s; m_is_float = true; if (*s == '+' || *s == '-') // sign ++s; const char* first_non_digit = SkipDigits(s); if (first_non_digit == s) { pp.Diag(pp.AdvanceToTokenCharacter(loc, exponent-str.begin()), diag::err_exponent_has_no_digits); m_had_error = true; return; } s = first_non_digit; } // Report an error if there are any. if (s != m_digits_end) { pp.Diag(pp.AdvanceToTokenCharacter(loc, s-str.begin()), m_is_float ? diag::err_invalid_suffix_float_constant : diag::err_invalid_suffix_integer_constant) << std::string(s, str.end()); m_had_error = true; return; } }
/// decimal integer: [1-9] [0-9]* /// binary integer: "0" [bB] [01]+ /// octal integer: "0" [0-7]* /// hex integer: "0" [xX] [0-9a-fA-F]+ /// /// float: "0" [a-zA-Z except bB or xX] /// [-+]? [0-9]* ([.] [0-9]*)? ([eE] [-+]? [0-9]+)? /// GasNumericParser::GasNumericParser(llvm::StringRef str, SourceLocation loc, Preprocessor& pp, bool force_float) : NumericParser(str) { // This routine assumes that the range begin/end matches the regex for // integer and FP constants, and assumes that the byte at "*end" is both // valid and not part of the regex. Because of this, it doesn't have to // check for 'overscan' in various places. assert(!isalnum(*m_digits_end) && *m_digits_end != '.' && "Lexer didn't maximally munch?"); const char* s = str.begin(); // Look for key radix prefixes if (force_float) { // forced decimal float; skip the prefix if present m_radix = 10; m_is_float = true; if (*s == '0' && isalpha(s[1])) s += 2; } else if (*s == '0' && (s[1] == 'x' || s[1] == 'X')) { m_radix = 16; s += 2; } else if (*s == '0' && (s[1] == 'b' || s[1] == 'B')) { m_radix = 2; s += 2; } else if (*s == '0' && isalpha(s[1])) { // it's a decimal float; skip the prefix m_radix = 10; s += 2; m_is_float = true; } else if (*s == '0') { // It's an octal integer m_radix = 8; } else { // Otherwise it's a decimal m_radix = 10; } m_digits_begin = s; switch (m_radix) { case 2: s = SkipBinaryDigits(s); break; case 8: s = SkipOctalDigits(s); break; case 10: s = SkipDigits(s); break; case 16: s = SkipHexDigits(s); break; } if (s == m_digits_end) { // Done. } else if (isxdigit(*s) && (!m_is_float || (*s != 'e' && *s != 'E'))) { unsigned int err; switch (m_radix) { case 2: err = diag::err_invalid_binary_digit; break; case 8: err = diag::err_invalid_octal_digit; break; case 10: err = diag::err_invalid_decimal_digit; break; case 16: default: assert(false && "unexpected radix"); err = diag::err_invalid_decimal_digit; break; } pp.Diag(pp.AdvanceToTokenCharacter(loc, s-str.begin()), err) << std::string(s, s+1); m_had_error = true; return; } else if (m_is_float) { if (*s == '-' || *s == '+') { ++s; s = SkipDigits(s); } if (*s == '.') { ++s; s = SkipDigits(s); } if (*s == 'e' || *s == 'E') { // Float exponent const char* exponent = s; ++s; if (*s == '+' || *s == '-') // sign ++s; const char* first_non_digit = SkipDigits(s); if (first_non_digit == s) { pp.Diag(pp.AdvanceToTokenCharacter(loc, exponent-str.begin()), diag::err_exponent_has_no_digits); m_had_error = true; return; } s = first_non_digit; } } // Report an error if there are any. if (s != m_digits_end) { pp.Diag(pp.AdvanceToTokenCharacter(loc, s-str.begin()), m_is_float ? diag::err_invalid_suffix_float_constant : diag::err_invalid_suffix_integer_constant) << std::string(s, str.end()); m_had_error = true; return; } }
/// ParseNumberStartingWithZero - This method is called when the first character /// of the number is found to be a zero. This means it is either an octal /// number (like '04') or a hex number ('0x123a') a binary number ('0b1010') or /// a floating point number (01239.123e4). Eat the prefix, determining the /// radix etc. void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) { assert(s[0] == '0' && "Invalid method call"); s++; int c1 = s[0]; // Handle a hex number like 0x1234. if ((c1 == 'x' || c1 == 'X') && (isHexDigit(s[1]) || s[1] == '.')) { s++; assert(s < ThisTokEnd && "didn't maximally munch?"); radix = 16; DigitsBegin = s; s = SkipHexDigits(s); bool noSignificand = (s == DigitsBegin); if (s == ThisTokEnd) { // Done. } else if (*s == '.') { s++; saw_period = true; const char *floatDigitsBegin = s; checkSeparator(TokLoc, s, CSK_BeforeDigits); s = SkipHexDigits(s); noSignificand &= (floatDigitsBegin == s); } if (noSignificand) { PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin), diag::err_hexconstant_requires_digits); hadError = true; return; } // A binary exponent can appear with or with a '.'. If dotted, the // binary exponent is required. if (*s == 'p' || *s == 'P') { checkSeparator(TokLoc, s, CSK_AfterDigits); const char *Exponent = s; s++; saw_exponent = true; if (*s == '+' || *s == '-') s++; // sign const char *first_non_digit = SkipDigits(s); if (first_non_digit == s) { PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin), diag::err_exponent_has_no_digits); hadError = true; return; } checkSeparator(TokLoc, s, CSK_BeforeDigits); s = first_non_digit; if (!PP.getLangOpts().HexFloats) PP.Diag(TokLoc, diag::ext_hexconstant_invalid); } else if (saw_period) { PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin), diag::err_hexconstant_requires_exponent); hadError = true; } return; } // Handle simple binary numbers 0b01010 if ((c1 == 'b' || c1 == 'B') && (s[1] == '0' || s[1] == '1')) { // 0b101010 is a C++1y / GCC extension. PP.Diag(TokLoc, PP.getLangOpts().CPlusPlus14 ? diag::warn_cxx11_compat_binary_literal : PP.getLangOpts().CPlusPlus ? diag::ext_binary_literal_cxx14 : diag::ext_binary_literal); ++s; assert(s < ThisTokEnd && "didn't maximally munch?"); radix = 2; DigitsBegin = s; s = SkipBinaryDigits(s); if (s == ThisTokEnd) { // Done. } else if (isHexDigit(*s)) { PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin), diag::err_invalid_binary_digit) << StringRef(s, 1); hadError = true; } // Other suffixes will be diagnosed by the caller. return; } // For now, the radix is set to 8. If we discover that we have a // floating point constant, the radix will change to 10. Octal floating // point constants are not permitted (only decimal and hexadecimal). radix = 8; DigitsBegin = s; s = SkipOctalDigits(s); if (s == ThisTokEnd) return; // Done, simple octal number like 01234 // If we have some other non-octal digit that *is* a decimal digit, see if // this is part of a floating point number like 094.123 or 09e1. if (isDigit(*s)) { const char *EndDecimal = SkipDigits(s); if (EndDecimal[0] == '.' || EndDecimal[0] == 'e' || EndDecimal[0] == 'E') { s = EndDecimal; radix = 10; } } // If we have a hex digit other than 'e' (which denotes a FP exponent) then // the code is using an incorrect base. if (isHexDigit(*s) && *s != 'e' && *s != 'E') { PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin), diag::err_invalid_octal_digit) << StringRef(s, 1); hadError = true; return; } if (*s == '.') { s++; radix = 10; saw_period = true; checkSeparator(TokLoc, s, CSK_BeforeDigits); s = SkipDigits(s); // Skip suffix. } if (*s == 'e' || *s == 'E') { // exponent checkSeparator(TokLoc, s, CSK_AfterDigits); const char *Exponent = s; s++; radix = 10; saw_exponent = true; if (*s == '+' || *s == '-') s++; // sign const char *first_non_digit = SkipDigits(s); if (first_non_digit != s) { checkSeparator(TokLoc, s, CSK_BeforeDigits); s = first_non_digit; } else { PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin), diag::err_exponent_has_no_digits); hadError = true; return; } } }
/// integer-constant: [C99 6.4.4.1] /// decimal-constant integer-suffix /// octal-constant integer-suffix /// hexadecimal-constant integer-suffix /// binary-literal integer-suffix [GNU, C++1y] /// user-defined-integer-literal: [C++11 lex.ext] /// decimal-literal ud-suffix /// octal-literal ud-suffix /// hexadecimal-literal ud-suffix /// binary-literal ud-suffix [GNU, C++1y] /// decimal-constant: /// nonzero-digit /// decimal-constant digit /// octal-constant: /// 0 /// octal-constant octal-digit /// hexadecimal-constant: /// hexadecimal-prefix hexadecimal-digit /// hexadecimal-constant hexadecimal-digit /// hexadecimal-prefix: one of /// 0x 0X /// binary-literal: /// 0b binary-digit /// 0B binary-digit /// binary-literal binary-digit /// integer-suffix: /// unsigned-suffix [long-suffix] /// unsigned-suffix [long-long-suffix] /// long-suffix [unsigned-suffix] /// long-long-suffix [unsigned-sufix] /// nonzero-digit: /// 1 2 3 4 5 6 7 8 9 /// octal-digit: /// 0 1 2 3 4 5 6 7 /// hexadecimal-digit: /// 0 1 2 3 4 5 6 7 8 9 /// a b c d e f /// A B C D E F /// binary-digit: /// 0 /// 1 /// unsigned-suffix: one of /// u U /// long-suffix: one of /// l L /// long-long-suffix: one of /// ll LL /// /// floating-constant: [C99 6.4.4.2] /// TODO: add rules... /// NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling) : ThisTokBegin(TokSpelling.begin()), ThisTokEnd(TokSpelling.end()) { // This routine assumes that the range begin/end matches the regex for integer // and FP constants (specifically, the 'pp-number' regex), and assumes that // the byte at "*end" is both valid and not part of the regex. Because of // this, it doesn't have to check for 'overscan' in various places. assert(!isPreprocessingNumberBody(*ThisTokEnd) && "didn't maximally munch?"); s = DigitsBegin = ThisTokBegin; saw_exponent = false; saw_period = false; saw_ud_suffix = false; isLong = false; isUnsigned = false; isLongLong = false; isFloat = false; isImaginary = false; MicrosoftInteger = 0; hadError = false; SourceLocation TokLoc; if (*s == '0') { // parse radix ParseNumberStartingWithZero(TokLoc); if (hadError) return; } else { // the first digit is non-zero radix = 10; s = SkipDigits(s); if (s == ThisTokEnd) { // Done. } else if (isHexDigit(*s) && !(*s == 'e' || *s == 'E')) { PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin), diag::err_invalid_decimal_digit) << StringRef(s, 1); hadError = true; return; } else if (*s == '.') { checkSeparator(TokLoc, s, CSK_AfterDigits); s++; saw_period = true; checkSeparator(TokLoc, s, CSK_BeforeDigits); s = SkipDigits(s); } if ((*s == 'e' || *s == 'E')) { // exponent checkSeparator(TokLoc, s, CSK_AfterDigits); const char *Exponent = s; s++; saw_exponent = true; if (*s == '+' || *s == '-') s++; // sign checkSeparator(TokLoc, s, CSK_BeforeDigits); const char *first_non_digit = SkipDigits(s); if (first_non_digit != s) { s = first_non_digit; } else { PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent - ThisTokBegin), diag::err_exponent_has_no_digits); hadError = true; return; } } } SuffixBegin = s; checkSeparator(TokLoc, s, CSK_AfterDigits); // Parse the suffix. At this point we can classify whether we have an FP or // integer constant. bool isFPConstant = isFloatingLiteral(); const char *ImaginarySuffixLoc = nullptr; // Loop over all of the characters of the suffix. If we see something bad, // we break out of the loop. for (; s != ThisTokEnd; ++s) { switch (*s) { case 'f': // FP Suffix for "float" case 'F': if (!isFPConstant) break; // Error for integer constant. if (isFloat || isLong) break; // FF, LF invalid. isFloat = true; continue; // Success. case 'u': case 'U': if (isFPConstant) break; // Error for floating constant. if (isUnsigned) break; // Cannot be repeated. isUnsigned = true; continue; // Success. case 'l': case 'L': if (isLong || isLongLong) break; // Cannot be repeated. if (isFloat) break; // LF invalid. // Check for long long. The L's need to be adjacent and the same case. if (s[1] == s[0]) { assert(s + 1 < ThisTokEnd && "didn't maximally munch?"); if (isFPConstant) break; // long long invalid for floats. isLongLong = true; ++s; // Eat both of them. } else { isLong = true; } continue; // Success. case 'i': case 'I': if (PP.getLangOpts().MicrosoftExt) { if (isLong || isLongLong || MicrosoftInteger) break; if (!isFPConstant) { // Allow i8, i16, i32, i64, and i128. switch (s[1]) { case '8': s += 2; // i8 suffix MicrosoftInteger = 8; break; case '1': if (s[2] == '6') { s += 3; // i16 suffix MicrosoftInteger = 16; } else if (s[2] == '2' && s[3] == '8') { s += 4; // i128 suffix MicrosoftInteger = 128; } break; case '3': if (s[2] == '2') { s += 3; // i32 suffix MicrosoftInteger = 32; } break; case '6': if (s[2] == '4') { s += 3; // i64 suffix MicrosoftInteger = 64; } break; default: break; } } if (MicrosoftInteger) { assert(s <= ThisTokEnd && "didn't maximally munch?"); break; } } // "i", "if", and "il" are user-defined suffixes in C++1y. if (*s == 'i' && PP.getLangOpts().CPlusPlus14) break; // fall through. case 'j': case 'J': if (isImaginary) break; // Cannot be repeated. isImaginary = true; ImaginarySuffixLoc = s; continue; // Success. } // If we reached here, there was an error or a ud-suffix. break; } if (s != ThisTokEnd) { // FIXME: Don't bother expanding UCNs if !tok.hasUCN(). expandUCNs(UDSuffixBuf, StringRef(SuffixBegin, ThisTokEnd - SuffixBegin)); if (isValidUDSuffix(PP.getLangOpts(), UDSuffixBuf)) { // Any suffix pieces we might have parsed are actually part of the // ud-suffix. isLong = false; isUnsigned = false; isLongLong = false; isFloat = false; isImaginary = false; MicrosoftInteger = 0; saw_ud_suffix = true; return; } // Report an error if there are any. PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, SuffixBegin - ThisTokBegin), isFPConstant ? diag::err_invalid_suffix_float_constant : diag::err_invalid_suffix_integer_constant) << StringRef(SuffixBegin, ThisTokEnd-SuffixBegin); hadError = true; return; } if (isImaginary) { PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, ImaginarySuffixLoc - ThisTokBegin), diag::ext_imaginary_constant); } }