Example #1
0
void NumericLiteralParser::checkSeparator(SourceLocation TokLoc,
		const char *Pos,
		CheckSeparatorKind IsAfterDigits) {
	if (IsAfterDigits == CSK_AfterDigits) {
		if (Pos == ThisTokBegin)
			return;
		--Pos;
	} else if (Pos == ThisTokEnd)
		return;

	if (isDigitSeparator(*Pos))
		PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Pos - ThisTokBegin),
				diag::err_digit_separator_not_between_digits)
				<< IsAfterDigits;
}
/// decimal integer: [0-9] [0-9_]*
/// binary integer: [01] [01_]* [bB]
/// binary integer: "0b" [01_]+
/// octal integer: [0-7] [0-7_]* [qQoO]
/// hex integer: [0-9] [0-9a-fA-F_]* [hH]
/// hex integer: [$] [0-9] [0-9a-fA-F_]*
/// hex integer: "0x" [0-9a-fA-F_]+
///
/// decimal float: [0-9]+ [.] [0-9]* ([eE] [-+]? [0-9]+)?
/// decimal float: [0-9]+ [eE] [-+]? [0-9]+
/// hex float: "0x" [0-9a-fA-F_]* [.] [0-9a-fA-F]* ([pP] [-+]? [0-9]+)?
/// hex float: "0x" [0-9a-fA-F_]+ [pP] [-+]? [0-9]+
///
NasmNumericParser::NasmNumericParser(llvm::StringRef str,
                                     SourceLocation loc,
                                     Preprocessor& pp)
    : NumericParser(str)
{
    // This routine assumes that the range begin/end matches the regex for
    // integer and FP constants, and assumes that the byte at "*end" is both
    // valid and not part of the regex.  Because of this, it doesn't have to
    // check for 'overscan' in various places.
    assert(!isalnum(*m_digits_end) && *m_digits_end != '.' &&
           *m_digits_end != '_' && "Lexer didn't maximally munch?");

    const char* s = str.begin();
    bool float_ok = false;

    // Look for key radix flags (prefixes and suffixes)
    if (*s == '$')
    {
        m_radix = 16;
        ++s;
    }
    else if (m_digits_end[-1] == 'b' || m_digits_end[-1] == 'B')
    {
        m_radix = 2;
        --m_digits_end;
    }
    else if (m_digits_end[-1] == 'q' || m_digits_end[-1] == 'Q' ||
             m_digits_end[-1] == 'o' || m_digits_end[-1] == 'O')
    {
        m_radix = 8;
        --m_digits_end;
    }
    else if (m_digits_end[-1] == 'h' || m_digits_end[-1] == 'H')
    {
        m_radix = 16;
        --m_digits_end;
    }
    else if (*s == '0' && (s[1] == 'x' || s[1] == 'X') &&
             (isxdigit(s[2]) || s[2] == '.'))
    {
        m_radix = 16;
        float_ok = true;    // C99-style hex floating point
        s += 2;
    }
    else if (*s == '0' && (s[1] == 'b' || s[1] == 'B') &&
             (s[2] == '0' || s[2] == '1'))
    {
        m_radix = 2;
        s += 2;
    }
    else
    {
        // Otherwise it's a decimal or float
        m_radix = 10;
        float_ok = true;
    }

    m_digits_begin = s;

    switch (m_radix)
    {
        case 2:     s = SkipBinaryDigits(s); break;
        case 8:     s = SkipOctalDigits(s); break;
        case 10:    s = SkipDigits(s); break;
        case 16:    s = SkipHexDigits(s); break;
    }

    if (s == m_digits_end)
    {
        // Done.
    }
    else if (isxdigit(*s) && (!float_ok || (*s != 'e' && *s != 'E')))
    {
        unsigned int err;
        switch (m_radix)
        {
            case 2: err = diag::err_invalid_binary_digit; break;
            case 8: err = diag::err_invalid_octal_digit; break;
            case 10: err = diag::err_invalid_decimal_digit; break;
            case 16:
            default:
                assert(false && "unexpected radix");
                err = diag::err_invalid_decimal_digit;
                break;
        }
        pp.Diag(pp.AdvanceToTokenCharacter(loc, s-str.begin()), err)
            << std::string(s, s+1);
        m_had_error = true;
        return;
    }
    else if (*s == '.' && float_ok)
    {
        ++s;
        m_is_float = true;
        if (m_radix == 16)
            s = SkipHexDigits(s);
        else
            s = SkipDigits(s);
    }

    if (float_ok &&
        ((m_radix == 10 && (*s == 'e' || *s == 'E')) ||
         (m_radix == 16 && (*s == 'p' || *s == 'P'))))
    {
        // Float exponent
        const char* exponent = s;
        ++s;
        m_is_float = true;
        if (*s == '+' || *s == '-') // sign
            ++s;
        const char* first_non_digit = SkipDigits(s);
        if (first_non_digit == s)
        {
            pp.Diag(pp.AdvanceToTokenCharacter(loc, exponent-str.begin()),
                    diag::err_exponent_has_no_digits);
            m_had_error = true;
            return;
        }
        s = first_non_digit;
    }

    // Report an error if there are any.
    if (s != m_digits_end)
    {
        pp.Diag(pp.AdvanceToTokenCharacter(loc, s-str.begin()),
                m_is_float ? diag::err_invalid_suffix_float_constant :
                             diag::err_invalid_suffix_integer_constant)
            << std::string(s, str.end());
        m_had_error = true;
        return;
    }
}
/// decimal integer: [1-9] [0-9]*
/// binary integer: "0" [bB] [01]+
/// octal integer: "0" [0-7]*
/// hex integer: "0" [xX] [0-9a-fA-F]+
///
/// float: "0" [a-zA-Z except bB or xX]
///        [-+]? [0-9]* ([.] [0-9]*)? ([eE] [-+]? [0-9]+)?
///
GasNumericParser::GasNumericParser(llvm::StringRef str,
                                   SourceLocation loc,
                                   Preprocessor& pp,
                                   bool force_float)
    : NumericParser(str)
{
    // This routine assumes that the range begin/end matches the regex for
    // integer and FP constants, and assumes that the byte at "*end" is both
    // valid and not part of the regex.  Because of this, it doesn't have to
    // check for 'overscan' in various places.
    assert(!isalnum(*m_digits_end) && *m_digits_end != '.' &&
           "Lexer didn't maximally munch?");

    const char* s = str.begin();

    // Look for key radix prefixes
    if (force_float)
    {
        // forced decimal float; skip the prefix if present
        m_radix = 10;
        m_is_float = true;
        if (*s == '0' && isalpha(s[1]))
            s += 2;
    }
    else if (*s == '0' && (s[1] == 'x' || s[1] == 'X'))
    {
        m_radix = 16;
        s += 2;
    }
    else if (*s == '0' && (s[1] == 'b' || s[1] == 'B'))
    {
        m_radix = 2;
        s += 2;
    }
    else if (*s == '0' && isalpha(s[1]))
    {
        // it's a decimal float; skip the prefix
        m_radix = 10;
        s += 2;
        m_is_float = true;
    }
    else if (*s == '0')
    {
        // It's an octal integer
        m_radix = 8;
    }
    else
    {
        // Otherwise it's a decimal
        m_radix = 10;
    }

    m_digits_begin = s;

    switch (m_radix)
    {
        case 2:     s = SkipBinaryDigits(s); break;
        case 8:     s = SkipOctalDigits(s); break;
        case 10:    s = SkipDigits(s); break;
        case 16:    s = SkipHexDigits(s); break;
    }

    if (s == m_digits_end)
    {
        // Done.
    }
    else if (isxdigit(*s) && (!m_is_float || (*s != 'e' && *s != 'E')))
    {
        unsigned int err;
        switch (m_radix)
        {
            case 2: err = diag::err_invalid_binary_digit; break;
            case 8: err = diag::err_invalid_octal_digit; break;
            case 10: err = diag::err_invalid_decimal_digit; break;
            case 16:
            default:
                assert(false && "unexpected radix");
                err = diag::err_invalid_decimal_digit;
                break;
        }
        pp.Diag(pp.AdvanceToTokenCharacter(loc, s-str.begin()), err)
            << std::string(s, s+1);
        m_had_error = true;
        return;
    }
    else if (m_is_float)
    {
        if (*s == '-' || *s == '+')
        {
            ++s;
            s = SkipDigits(s);
        }
        if (*s == '.')
        {
            ++s;
            s = SkipDigits(s);
        }

        if (*s == 'e' || *s == 'E')
        {
            // Float exponent
            const char* exponent = s;
            ++s;
            if (*s == '+' || *s == '-') // sign
                ++s;
            const char* first_non_digit = SkipDigits(s);
            if (first_non_digit == s)
            {
                pp.Diag(pp.AdvanceToTokenCharacter(loc, exponent-str.begin()),
                        diag::err_exponent_has_no_digits);
                m_had_error = true;
                return;
            }
            s = first_non_digit;
        }
    }

    // Report an error if there are any.
    if (s != m_digits_end)
    {
        pp.Diag(pp.AdvanceToTokenCharacter(loc, s-str.begin()),
                m_is_float ? diag::err_invalid_suffix_float_constant :
                             diag::err_invalid_suffix_integer_constant)
            << std::string(s, str.end());
        m_had_error = true;
        return;
    }
}
Example #4
0
/// ParseNumberStartingWithZero - This method is called when the first character
/// of the number is found to be a zero.  This means it is either an octal
/// number (like '04') or a hex number ('0x123a') a binary number ('0b1010') or
/// a floating point number (01239.123e4).  Eat the prefix, determining the
/// radix etc.
void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
	assert(s[0] == '0' && "Invalid method call");
	s++;

	int c1 = s[0];

	// Handle a hex number like 0x1234.
	if ((c1 == 'x' || c1 == 'X') && (isHexDigit(s[1]) || s[1] == '.')) {
		s++;
		assert(s < ThisTokEnd && "didn't maximally munch?");
		radix = 16;
		DigitsBegin = s;
		s = SkipHexDigits(s);
		bool noSignificand = (s == DigitsBegin);
		if (s == ThisTokEnd) {
			// Done.
		} else if (*s == '.') {
			s++;
			saw_period = true;
			const char *floatDigitsBegin = s;
			checkSeparator(TokLoc, s, CSK_BeforeDigits);
			s = SkipHexDigits(s);
			noSignificand &= (floatDigitsBegin == s);
		}

		if (noSignificand) {
			PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin),
					diag::err_hexconstant_requires_digits);
			hadError = true;
			return;
		}

		// A binary exponent can appear with or with a '.'. If dotted, the
		// binary exponent is required.
		if (*s == 'p' || *s == 'P') {
			checkSeparator(TokLoc, s, CSK_AfterDigits);
			const char *Exponent = s;
			s++;
			saw_exponent = true;
			if (*s == '+' || *s == '-')  s++; // sign
			const char *first_non_digit = SkipDigits(s);
			if (first_non_digit == s) {
				PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin),
						diag::err_exponent_has_no_digits);
				hadError = true;
				return;
			}
			checkSeparator(TokLoc, s, CSK_BeforeDigits);
			s = first_non_digit;

			if (!PP.getLangOpts().HexFloats)
				PP.Diag(TokLoc, diag::ext_hexconstant_invalid);
		} else if (saw_period) {
			PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
					diag::err_hexconstant_requires_exponent);
			hadError = true;
		}
		return;
	}

	// Handle simple binary numbers 0b01010
	if ((c1 == 'b' || c1 == 'B') && (s[1] == '0' || s[1] == '1')) {
		// 0b101010 is a C++1y / GCC extension.
		PP.Diag(TokLoc,
				PP.getLangOpts().CPlusPlus14
				? diag::warn_cxx11_compat_binary_literal
						: PP.getLangOpts().CPlusPlus
						  ? diag::ext_binary_literal_cxx14
								  : diag::ext_binary_literal);
		++s;
		assert(s < ThisTokEnd && "didn't maximally munch?");
		radix = 2;
		DigitsBegin = s;
		s = SkipBinaryDigits(s);
		if (s == ThisTokEnd) {
			// Done.
		} else if (isHexDigit(*s)) {
			PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
					diag::err_invalid_binary_digit) << StringRef(s, 1);
			hadError = true;
		}
		// Other suffixes will be diagnosed by the caller.
		return;
	}

	// For now, the radix is set to 8. If we discover that we have a
	// floating point constant, the radix will change to 10. Octal floating
	// point constants are not permitted (only decimal and hexadecimal).
	radix = 8;
	DigitsBegin = s;
	s = SkipOctalDigits(s);
	if (s == ThisTokEnd)
		return; // Done, simple octal number like 01234

	// If we have some other non-octal digit that *is* a decimal digit, see if
	// this is part of a floating point number like 094.123 or 09e1.
	if (isDigit(*s)) {
		const char *EndDecimal = SkipDigits(s);
		if (EndDecimal[0] == '.' || EndDecimal[0] == 'e' || EndDecimal[0] == 'E') {
			s = EndDecimal;
			radix = 10;
		}
	}

	// If we have a hex digit other than 'e' (which denotes a FP exponent) then
	// the code is using an incorrect base.
	if (isHexDigit(*s) && *s != 'e' && *s != 'E') {
		PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
				diag::err_invalid_octal_digit) << StringRef(s, 1);
		hadError = true;
		return;
	}

	if (*s == '.') {
		s++;
		radix = 10;
		saw_period = true;
		checkSeparator(TokLoc, s, CSK_BeforeDigits);
		s = SkipDigits(s); // Skip suffix.
	}
	if (*s == 'e' || *s == 'E') { // exponent
		checkSeparator(TokLoc, s, CSK_AfterDigits);
		const char *Exponent = s;
		s++;
		radix = 10;
		saw_exponent = true;
		if (*s == '+' || *s == '-')  s++; // sign
		const char *first_non_digit = SkipDigits(s);
		if (first_non_digit != s) {
			checkSeparator(TokLoc, s, CSK_BeforeDigits);
			s = first_non_digit;
		} else {
			PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin),
					diag::err_exponent_has_no_digits);
			hadError = true;
			return;
		}
	}
}
Example #5
0
///       integer-constant: [C99 6.4.4.1]
///         decimal-constant integer-suffix
///         octal-constant integer-suffix
///         hexadecimal-constant integer-suffix
///         binary-literal integer-suffix [GNU, C++1y]
///       user-defined-integer-literal: [C++11 lex.ext]
///         decimal-literal ud-suffix
///         octal-literal ud-suffix
///         hexadecimal-literal ud-suffix
///         binary-literal ud-suffix [GNU, C++1y]
///       decimal-constant:
///         nonzero-digit
///         decimal-constant digit
///       octal-constant:
///         0
///         octal-constant octal-digit
///       hexadecimal-constant:
///         hexadecimal-prefix hexadecimal-digit
///         hexadecimal-constant hexadecimal-digit
///       hexadecimal-prefix: one of
///         0x 0X
///       binary-literal:
///         0b binary-digit
///         0B binary-digit
///         binary-literal binary-digit
///       integer-suffix:
///         unsigned-suffix [long-suffix]
///         unsigned-suffix [long-long-suffix]
///         long-suffix [unsigned-suffix]
///         long-long-suffix [unsigned-sufix]
///       nonzero-digit:
///         1 2 3 4 5 6 7 8 9
///       octal-digit:
///         0 1 2 3 4 5 6 7
///       hexadecimal-digit:
///         0 1 2 3 4 5 6 7 8 9
///         a b c d e f
///         A B C D E F
///       binary-digit:
///         0
///         1
///       unsigned-suffix: one of
///         u U
///       long-suffix: one of
///         l L
///       long-long-suffix: one of
///         ll LL
///
///       floating-constant: [C99 6.4.4.2]
///         TODO: add rules...
///
NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling)
: ThisTokBegin(TokSpelling.begin()), ThisTokEnd(TokSpelling.end()) {

	// This routine assumes that the range begin/end matches the regex for integer
	// and FP constants (specifically, the 'pp-number' regex), and assumes that
	// the byte at "*end" is both valid and not part of the regex.  Because of
	// this, it doesn't have to check for 'overscan' in various places.
	assert(!isPreprocessingNumberBody(*ThisTokEnd) && "didn't maximally munch?");

	s = DigitsBegin = ThisTokBegin;
	saw_exponent = false;
	saw_period = false;
	saw_ud_suffix = false;
	isLong = false;
	isUnsigned = false;
	isLongLong = false;
	isFloat = false;
	isImaginary = false;
	MicrosoftInteger = 0;
	hadError = false;

	SourceLocation TokLoc;

	if (*s == '0') { // parse radix
		ParseNumberStartingWithZero(TokLoc);
		if (hadError)
			return;
	} else { // the first digit is non-zero
		radix = 10;
		s = SkipDigits(s);
		if (s == ThisTokEnd) {
			// Done.
		} else if (isHexDigit(*s) && !(*s == 'e' || *s == 'E')) {
			PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin),
					diag::err_invalid_decimal_digit) << StringRef(s, 1);
			hadError = true;
			return;
		} else if (*s == '.') {
			checkSeparator(TokLoc, s, CSK_AfterDigits);
			s++;
			saw_period = true;
			checkSeparator(TokLoc, s, CSK_BeforeDigits);
			s = SkipDigits(s);
		}
		if ((*s == 'e' || *s == 'E')) { // exponent
			checkSeparator(TokLoc, s, CSK_AfterDigits);
			const char *Exponent = s;
			s++;
			saw_exponent = true;
			if (*s == '+' || *s == '-')  s++; // sign
			checkSeparator(TokLoc, s, CSK_BeforeDigits);
			const char *first_non_digit = SkipDigits(s);
			if (first_non_digit != s) {
				s = first_non_digit;
			} else {
				PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent - ThisTokBegin),
						diag::err_exponent_has_no_digits);
				hadError = true;
				return;
			}
		}
	}

	SuffixBegin = s;
	checkSeparator(TokLoc, s, CSK_AfterDigits);

	// Parse the suffix.  At this point we can classify whether we have an FP or
	// integer constant.
	bool isFPConstant = isFloatingLiteral();
	const char *ImaginarySuffixLoc = nullptr;

	// Loop over all of the characters of the suffix.  If we see something bad,
	// we break out of the loop.
	for (; s != ThisTokEnd; ++s) {
		switch (*s) {
		case 'f':      // FP Suffix for "float"
		case 'F':
			if (!isFPConstant) break;  // Error for integer constant.
			if (isFloat || isLong) break; // FF, LF invalid.
			isFloat = true;
			continue;  // Success.
		case 'u':
		case 'U':
			if (isFPConstant) break;  // Error for floating constant.
			if (isUnsigned) break;    // Cannot be repeated.
			isUnsigned = true;
			continue;  // Success.
		case 'l':
		case 'L':
			if (isLong || isLongLong) break;  // Cannot be repeated.
			if (isFloat) break;               // LF invalid.

			// Check for long long.  The L's need to be adjacent and the same case.
			if (s[1] == s[0]) {
				assert(s + 1 < ThisTokEnd && "didn't maximally munch?");
				if (isFPConstant) break;        // long long invalid for floats.
				isLongLong = true;
				++s;  // Eat both of them.
			} else {
				isLong = true;
			}
			continue;  // Success.
		case 'i':
		case 'I':
			if (PP.getLangOpts().MicrosoftExt) {
				if (isLong || isLongLong || MicrosoftInteger)
					break;

				if (!isFPConstant) {
					// Allow i8, i16, i32, i64, and i128.
					switch (s[1]) {
					case '8':
						s += 2; // i8 suffix
						MicrosoftInteger = 8;
						break;
					case '1':
						if (s[2] == '6') {
							s += 3; // i16 suffix
							MicrosoftInteger = 16;
						} else if (s[2] == '2' && s[3] == '8') {
							s += 4; // i128 suffix
							MicrosoftInteger = 128;
						}
						break;
					case '3':
						if (s[2] == '2') {
							s += 3; // i32 suffix
							MicrosoftInteger = 32;
						}
						break;
					case '6':
						if (s[2] == '4') {
							s += 3; // i64 suffix
							MicrosoftInteger = 64;
						}
						break;
					default:
						break;
					}
				}
				if (MicrosoftInteger) {
					assert(s <= ThisTokEnd && "didn't maximally munch?");
					break;
				}
			}
			// "i", "if", and "il" are user-defined suffixes in C++1y.
			if (*s == 'i' && PP.getLangOpts().CPlusPlus14)
				break;
			// fall through.
		case 'j':
		case 'J':
			if (isImaginary) break;   // Cannot be repeated.
			isImaginary = true;
			ImaginarySuffixLoc = s;
			continue;  // Success.
		}
		// If we reached here, there was an error or a ud-suffix.
		break;
	}

	if (s != ThisTokEnd) {
		// FIXME: Don't bother expanding UCNs if !tok.hasUCN().
		expandUCNs(UDSuffixBuf, StringRef(SuffixBegin, ThisTokEnd - SuffixBegin));
		if (isValidUDSuffix(PP.getLangOpts(), UDSuffixBuf)) {
			// Any suffix pieces we might have parsed are actually part of the
			// ud-suffix.
			isLong = false;
			isUnsigned = false;
			isLongLong = false;
			isFloat = false;
			isImaginary = false;
			MicrosoftInteger = 0;

			saw_ud_suffix = true;
			return;
		}

		// Report an error if there are any.
		PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, SuffixBegin - ThisTokBegin),
				isFPConstant ? diag::err_invalid_suffix_float_constant :
						diag::err_invalid_suffix_integer_constant)
    				  << StringRef(SuffixBegin, ThisTokEnd-SuffixBegin);
		hadError = true;
		return;
	}

	if (isImaginary) {
		PP.Diag(PP.AdvanceToTokenCharacter(TokLoc,
				ImaginarySuffixLoc - ThisTokBegin),
				diag::ext_imaginary_constant);
	}
}