void exactNumericToStr(SINT64 value, int scale, Firebird::string& target, bool append) { if (value == 0) { if (append) target.append("0", 1); else target.assign("0", 1); return; } const int MAX_SCALE = 25; const int MAX_BUFFER = 50; if (scale < -MAX_SCALE || scale > MAX_SCALE) { fb_assert(false); return; // throw exception here? } const bool neg = value < 0; const bool dot = scale < 0; // Need the decimal separator or not? char buffer[MAX_BUFFER]; int iter = MAX_BUFFER; buffer[--iter] = '\0'; if (scale > 0) { while (scale-- > 0) buffer[--iter] = '0'; } bool dot_used = false; FB_UINT64 uval = neg ? FB_UINT64(-(value + 1)) + 1 : value; // avoid problems with MIN_SINT64 while (uval != 0) { buffer[--iter] = static_cast<char>(uval % 10) + '0'; uval /= 10; if (dot && !++scale) { buffer[--iter] = '.'; dot_used = true; } } if (dot) { // if scale > 0 we have N.M // if scale == 0 we have .M and we need 0.M // if scale < 0 we have pending zeroes and need 0.{0+}M if (!dot_used) { while (scale++ < 0) buffer[--iter] = '0'; buffer[--iter] = '.'; buffer[--iter] = '0'; } else if (!scale) buffer[--iter] = '0'; } if (neg) buffer[--iter] = '-'; const FB_SIZE_T len = MAX_BUFFER - iter - 1; if (append) target.append(buffer + iter, len); else target.assign(buffer + iter, len); }
int Parser::yylexAux() { thread_db* tdbb = JRD_get_thread_data(); MemoryPool& pool = *tdbb->getDefaultPool(); SSHORT c = lex.ptr[-1]; UCHAR tok_class = classes(c); char string[MAX_TOKEN_LEN]; // Depending on tok_class of token, parse token lex.last_token = lex.ptr - 1; if (tok_class & CHR_INTRODUCER) { // The Introducer (_) is skipped, all other idents are copied // to become the name of the character set. char* p = string; for (; lex.ptr < lex.end && classes(*lex.ptr) & CHR_IDENT; lex.ptr++) { if (lex.ptr >= lex.end) return -1; check_copy_incr(p, UPPER7(*lex.ptr), string); } check_bound(p, string); if (p - string > MAX_SQL_IDENTIFIER_LEN) yyabandon(-104, isc_dyn_name_longer); *p = 0; // make a string value to hold the name, the name is resolved in pass1_constant. yylval.metaNamePtr = FB_NEW(pool) MetaName(pool, string, p - string); return INTRODUCER; } // parse a quoted string, being sure to look for double quotes if (tok_class & CHR_QUOTE) { StrMark mark; mark.pos = lex.last_token - lex.start; char* buffer = string; size_t buffer_len = sizeof(string); const char* buffer_end = buffer + buffer_len - 1; char* p; for (p = buffer; ; ++p) { if (lex.ptr >= lex.end) { if (buffer != string) gds__free (buffer); yyerror("unterminated string"); return -1; } // Care about multi-line constants and identifiers if (*lex.ptr == '\n') { lex.lines++; lex.line_start = lex.ptr + 1; } // *lex.ptr is quote - if next != quote we're at the end if ((*lex.ptr == c) && ((++lex.ptr == lex.end) || (*lex.ptr != c))) break; if (p > buffer_end) { char* const new_buffer = (char*) gds__alloc (2 * buffer_len); // FREE: at outer block if (!new_buffer) // NOMEM: { if (buffer != string) gds__free (buffer); return -1; } memcpy (new_buffer, buffer, buffer_len); if (buffer != string) gds__free (buffer); buffer = new_buffer; p = buffer + buffer_len; buffer_len = 2 * buffer_len; buffer_end = buffer + buffer_len - 1; } *p = *lex.ptr++; } if (c == '"') { stmt_ambiguous = true; // string delimited by double quotes could be // either a string constant or a SQL delimited // identifier, therefore marks the SQL statement as ambiguous if (client_dialect == SQL_DIALECT_V6_TRANSITION) { if (buffer != string) gds__free (buffer); yyabandon (-104, isc_invalid_string_constant); } else if (client_dialect >= SQL_DIALECT_V6) { if (p - buffer >= MAX_TOKEN_LEN) { if (buffer != string) gds__free (buffer); yyabandon(-104, isc_token_too_long); } else if (p > &buffer[MAX_SQL_IDENTIFIER_LEN]) { if (buffer != string) gds__free (buffer); yyabandon(-104, isc_dyn_name_longer); } else if (p - buffer == 0) { if (buffer != string) gds__free (buffer); yyabandon(-104, isc_dyn_zero_len_id); } Attachment* attachment = tdbb->getAttachment(); MetaName name(attachment->nameToMetaCharSet(tdbb, MetaName(buffer, p - buffer))); yylval.metaNamePtr = FB_NEW(pool) MetaName(pool, name); if (buffer != string) gds__free (buffer); return SYMBOL; } } yylval.intlStringPtr = newIntlString(Firebird::string(buffer, p - buffer)); if (buffer != string) gds__free (buffer); mark.length = lex.ptr - lex.last_token; mark.str = yylval.intlStringPtr; strMarks.put(mark.str, mark); return STRING; } /* * Check for a numeric constant, which starts either with a digit or with * a decimal point followed by a digit. * * This code recognizes the following token types: * * NUMBER: string of digits which fits into a 32-bit integer * * NUMBER64BIT: string of digits whose value might fit into an SINT64, * depending on whether or not there is a preceding '-', which is to * say that "9223372036854775808" is accepted here. * * SCALEDINT: string of digits and a single '.', where the digits * represent a value which might fit into an SINT64, depending on * whether or not there is a preceding '-'. * * FLOAT: string of digits with an optional '.', and followed by an "e" * or "E" and an optionally-signed exponent. * * NOTE: we swallow leading or trailing blanks, but we do NOT accept * embedded blanks: * * Another note: c is the first character which need to be considered, * ptr points to the next character. */ fb_assert(lex.ptr <= lex.end); // Hexadecimal string constant. This is treated the same as a // string constant, but is defined as: X'bbbb' // // Where the X is a literal 'x' or 'X' character, followed // by a set of nibble values in single quotes. The nibble // can be 0-9, a-f, or A-F, and is converted from the hex. // The number of nibbles should be even. // // The resulting value is stored in a string descriptor and // returned to the parser as a string. This can be stored // in a character or binary item. if ((c == 'x' || c == 'X') && lex.ptr < lex.end && *lex.ptr == '\'') { bool hexerror = false; // Remember where we start from, to rescan later. // Also we'll need to know the length of the buffer. const char* hexstring = ++lex.ptr; int charlen = 0; // Time to scan the string. Make sure the characters are legal, // and find out how long the hex digit string is. for (;;) { if (lex.ptr >= lex.end) // Unexpected EOS { hexerror = true; break; } c = *lex.ptr; if (c == '\'') // Trailing quote, done { ++lex.ptr; // Skip the quote break; } if (!(classes(c) & CHR_HEX)) // Illegal character { hexerror = true; break; } ++charlen; // Okay, just count 'em ++lex.ptr; // and advance... } hexerror = hexerror || (charlen & 1); // IS_ODD(charlen) // If we made it this far with no error, then convert the string. if (!hexerror) { // Figure out the length of the actual resulting hex string. // Allocate a second temporary buffer for it. Firebird::string temp; // Re-scan over the hex string we got earlier, converting // adjacent bytes into nibble values. Every other nibble, // write the saved byte to the temp space. At the end of // this, the temp.space area will contain the binary // representation of the hex constant. UCHAR byte = 0; for (int i = 0; i < charlen; i++) { c = UPPER7(hexstring[i]); // Now convert the character to a nibble if (c >= 'A') c = (c - 'A') + 10; else c = (c - '0'); if (i & 1) // nibble? { byte = (byte << 4) + (UCHAR) c; temp.append(1, (char) byte); } else byte = c; } yylval.intlStringPtr = newIntlString(temp, "BINARY"); return STRING; } // if (!hexerror)... // If we got here, there was a parsing error. Set the // position back to where it was before we messed with // it. Then fall through to the next thing we might parse. c = *lex.last_token; lex.ptr = lex.last_token + 1; } if ((c == 'q' || c == 'Q') && lex.ptr + 3 < lex.end && *lex.ptr == '\'') { StrMark mark; mark.pos = lex.last_token - lex.start; char endChar = *++lex.ptr; switch (endChar) { case '{': endChar = '}'; break; case '(': endChar = ')'; break; case '[': endChar = ']'; break; case '<': endChar = '>'; break; } while (++lex.ptr + 1 < lex.end) { if (*lex.ptr == endChar && *++lex.ptr == '\'') { yylval.intlStringPtr = newIntlString( Firebird::string(lex.last_token + 3, lex.ptr - lex.last_token - 4)); ++lex.ptr; mark.length = lex.ptr - lex.last_token; mark.str = yylval.intlStringPtr; strMarks.put(mark.str, mark); return STRING; } } // If we got here, there was a parsing error. Set the // position back to where it was before we messed with // it. Then fall through to the next thing we might parse. c = *lex.last_token; lex.ptr = lex.last_token + 1; } // Hexadecimal numeric constants - 0xBBBBBB // // where the '0' and the 'X' (or 'x') are literal, followed // by a set of nibbles, using 0-9, a-f, or A-F. Odd numbers // of nibbles assume a leading '0'. The result is converted // to an integer, and the result returned to the caller. The // token is identified as a NUMBER if it's a 32-bit or less // value, or a NUMBER64INT if it requires a 64-bit number. if (c == '0' && lex.ptr + 1 < lex.end && (*lex.ptr == 'x' || *lex.ptr == 'X') && (classes(lex.ptr[1]) & CHR_HEX)) { bool hexerror = false; // Remember where we start from, to rescan later. // Also we'll need to know the length of the buffer. ++lex.ptr; // Skip the 'X' and point to the first digit const char* hexstring = lex.ptr; int charlen = 0; // Time to scan the string. Make sure the characters are legal, // and find out how long the hex digit string is. for (;;) { if (lex.ptr >= lex.end) // Unexpected EOS { hexerror = true; break; } c = *lex.ptr; if (!(classes(c) & CHR_HEX)) // End of digit string break; ++charlen; // Okay, just count 'em ++lex.ptr; // and advance... if (charlen > 16) // Too many digits... { hexerror = true; break; } } // we have a valid hex token. Now give it back, either as // an NUMBER or NUMBER64BIT. if (!hexerror) { // if charlen > 8 (something like FFFF FFFF 0, w/o the spaces) // then we have to return a NUMBER64BIT. We'll make a string // node here, and let make.cpp worry about converting the // string to a number and building the node later. if (charlen > 8) { char cbuff[32]; cbuff[0] = 'X'; strncpy(&cbuff[1], hexstring, charlen); cbuff[charlen + 1] = '\0'; char* p = &cbuff[1]; while (*p != '\0') { if ((*p >= 'a') && (*p <= 'f')) *p = UPPER(*p); p++; } yylval.stringPtr = newString(cbuff); return NUMBER64BIT; } else { // we have an integer value. we'll return NUMBER. // but we have to make a number value to be compatible // with existing code. // See if the string length is odd. If so, // we'll assume a leading zero. Then figure out the length // of the actual resulting hex string. Allocate a second // temporary buffer for it. bool nibble = (charlen & 1); // IS_ODD(temp.length) // Re-scan over the hex string we got earlier, converting // adjacent bytes into nibble values. Every other nibble, // write the saved byte to the temp space. At the end of // this, the temp.space area will contain the binary // representation of the hex constant. UCHAR byte = 0; SINT64 value = 0; for (int i = 0; i < charlen; i++) { c = UPPER(hexstring[i]); // Now convert the character to a nibble if (c >= 'A') c = (c - 'A') + 10; else c = (c - '0'); if (nibble) { byte = (byte << 4) + (UCHAR) c; nibble = false; value = (value << 8) + byte; } else { byte = c; nibble = true; } } yylval.int32Val = (SLONG) value; return NUMBER; } // integer value } // if (!hexerror)... // If we got here, there was a parsing error. Set the // position back to where it was before we messed with // it. Then fall through to the next thing we might parse. c = *lex.last_token; lex.ptr = lex.last_token + 1; } // headecimal numeric constants if ((tok_class & CHR_DIGIT) || ((c == '.') && (lex.ptr < lex.end) && (classes(*lex.ptr) & CHR_DIGIT))) { // The following variables are used to recognize kinds of numbers. bool have_error = false; // syntax error or value too large bool have_digit = false; // we've seen a digit bool have_decimal = false; // we've seen a '.' bool have_exp = false; // digit ... [eE] bool have_exp_sign = false; // digit ... [eE] {+-] bool have_exp_digit = false; // digit ... [eE] ... digit FB_UINT64 number = 0; FB_UINT64 limit_by_10 = MAX_SINT64 / 10; for (--lex.ptr; lex.ptr < lex.end; lex.ptr++) { c = *lex.ptr; if (have_exp_digit && (! (classes(c) & CHR_DIGIT))) // First non-digit after exponent and digit terminates the token. break; if (have_exp_sign && (! (classes(c) & CHR_DIGIT))) { // only digits can be accepted after "1E-" have_error = true; break; } if (have_exp) { // We've seen e or E, but nothing beyond that. if ( ('-' == c) || ('+' == c) ) have_exp_sign = true; else if ( classes(c) & CHR_DIGIT ) // We have a digit: we haven't seen a sign yet, but it's too late now. have_exp_digit = have_exp_sign = true; else { // end of the token have_error = true; break; } } else if ('.' == c) { if (!have_decimal) have_decimal = true; else { have_error = true; break; } } else if (classes(c) & CHR_DIGIT) { // Before computing the next value, make sure there will be no overflow. have_digit = true; if (number >= limit_by_10) { // possibility of an overflow if ((number > limit_by_10) || (c > '8')) { have_error = true; break; } } number = number * 10 + (c - '0'); } else if ( (('E' == c) || ('e' == c)) && have_digit ) have_exp = true; else // Unexpected character: this is the end of the number. break; } // We're done scanning the characters: now return the right kind // of number token, if any fits the bill. if (!have_error) { fb_assert(have_digit); if (have_exp_digit) { yylval.stringPtr = newString( Firebird::string(lex.last_token, lex.ptr - lex.last_token)); lex.last_token_bk = lex.last_token; lex.line_start_bk = lex.line_start; lex.lines_bk = lex.lines; return FLOAT_NUMBER; } if (!have_exp) { // We should return some kind (scaled-) integer type // except perhaps in dialect 1. if (!have_decimal && (number <= MAX_SLONG)) { yylval.int32Val = (SLONG) number; //printf ("parse.y %p %d\n", yylval.legacyStr, number); return NUMBER; } else { /* We have either a decimal point with no exponent or a string of digits whose value exceeds MAX_SLONG: the returned type depends on the client dialect, so warn of the difference if the client dialect is SQL_DIALECT_V6_TRANSITION. */ if (SQL_DIALECT_V6_TRANSITION == client_dialect) { /* Issue a warning about the ambiguity of the numeric * numeric literal. There are multiple calls because * the message text exceeds the 119-character limit * of our message database. */ ERRD_post_warning(Arg::Warning(isc_dsql_warning_number_ambiguous) << Arg::Str(Firebird::string(lex.last_token, lex.ptr - lex.last_token))); ERRD_post_warning(Arg::Warning(isc_dsql_warning_number_ambiguous1)); } yylval.stringPtr = newString(Firebird::string(lex.last_token, lex.ptr - lex.last_token)); lex.last_token_bk = lex.last_token; lex.line_start_bk = lex.line_start; lex.lines_bk = lex.lines; if (client_dialect < SQL_DIALECT_V6_TRANSITION) return FLOAT_NUMBER; if (have_decimal) return SCALEDINT; return NUMBER64BIT; } } // else if (!have_exp) } // if (!have_error) // we got some kind of error or overflow, so don't recognize this // as a number: just pass it through to the next part of the lexer. } // Restore the status quo ante, before we started our unsuccessful // attempt to recognize a number. lex.ptr = lex.last_token; c = *lex.ptr++; // We never touched tok_class, so it doesn't need to be restored. // end of number-recognition code if (tok_class & CHR_LETTER) { char* p = string; check_copy_incr(p, UPPER (c), string); for (; lex.ptr < lex.end && classes(*lex.ptr) & CHR_IDENT; lex.ptr++) { if (lex.ptr >= lex.end) return -1; check_copy_incr(p, UPPER (*lex.ptr), string); } check_bound(p, string); *p = 0; if (p > &string[MAX_SQL_IDENTIFIER_LEN]) yyabandon(-104, isc_dyn_name_longer); MetaName str(string, p - string); KeywordVersion* keyVer = keywordsMap->get(str); if (keyVer && parser_version >= keyVer->version && (keyVer->keyword != COMMENT || lex.prev_keyword == -1)) { yylval.metaNamePtr = keyVer->str; lex.last_token_bk = lex.last_token; lex.line_start_bk = lex.line_start; lex.lines_bk = lex.lines; return keyVer->keyword; } yylval.metaNamePtr = FB_NEW(pool) MetaName(pool, str); lex.last_token_bk = lex.last_token; lex.line_start_bk = lex.line_start; lex.lines_bk = lex.lines; return SYMBOL; } // Must be punctuation -- test for double character punctuation if (lex.last_token + 1 < lex.end && !isspace(UCHAR(lex.last_token[1]))) { Firebird::string str(lex.last_token, 2); KeywordVersion* keyVer = keywordsMap->get(str); if (keyVer && keyVer && parser_version >= keyVer->version) { ++lex.ptr; return keyVer->keyword; } } // Single character punctuation are simply passed on return (UCHAR) c; }