/* Normal line comments start with two /'s and continue until the next \n * (NOT any other newline character!). Additionally, a shebang in the beginning * of the file also counts as a line comment. * Block comments start with / followed by a * and end with a * followed by a /. * Unlike in C/C++ they nest. */ static void scanComments (lexerState *lexer) { /* // or #! */ if (lexer->next_c == '/' || lexer->next_c == '!') { advanceNChar(lexer, 2); while (lexer->cur_c != EOF && lexer->cur_c != '\n') advanceChar(lexer); } else if (lexer->next_c == '*') { int level = 1; advanceNChar(lexer, 2); while (lexer->cur_c != EOF && level > 0) { if (lexer->cur_c == '*' && lexer->next_c == '/') { level--; advanceNChar(lexer, 2); } else if (lexer->cur_c == '/' && lexer->next_c == '*') { level++; advanceNChar(lexer, 2); } else { advanceChar(lexer); } } } }
/* proceed one character per cycle */ static inline void loop() { byte encodedChar; byte charNumSymbols; /* read next char */ char messageChar = message[messageIdx]; /* handle special cases and validate input */ if (messageChar == ' ') { /* space */ delay(wordSpaceLength_ms - charSpaceLength_ms - blinkSpaceLength_ms); advanceChar(); return; } else if (messageChar < '0' || (messageChar > '9' && messageChar < 'A') || messageChar > 'Z') { /* Invalid char! Skip. */ advanceChar(); return; } /* unpack current character encoding */ encodedChar = morseEncoding[messageChar - '0']; charNumSymbols = encodedChar >> 5; /* top three bits */ /* iterate over symbols in char frorm right to left */ while (charNumSymbols--) { if (encodedChar & 1) dash(); else dot(); delay(blinkSpaceLength_ms); encodedChar >>= 1; } /* terminate char */ delay(charSpaceLength_ms - blinkSpaceLength_ms); advanceChar(); }
/* Double-quoted strings, we only care about the \" escape. These * last past the end of the line, so be careful not too store too much * of them (see MAX_STRING_LENGTH). The only place we look at their * contents is in the function definitions, and there the valid strings are * things like "C" and "Rust" */ static void scanString (lexerState *lexer) { vStringClear(lexer->token_str); advanceChar(lexer); while (lexer->cur_c != EOF && lexer->cur_c != '"') { if (lexer->cur_c == '\\' && lexer->next_c == '"') advanceChar(lexer); if (vStringLength(lexer->token_str) < MAX_STRING_LENGTH) vStringPut(lexer->token_str, (char) lexer->cur_c); advanceChar(lexer); } advanceChar(lexer); }
static void scanIdentifier (lexerState *lexer) { vStringClear(lexer->token_str); do { vStringPut(lexer->token_str, (char) lexer->cur_c); advanceChar(lexer); } while(lexer->cur_c != EOF && isIdentifierContinue(lexer->cur_c)); }
/* Normal line comments start with two /'s and continue until the next \n * (potentially after a \r). Additionally, a shebang in the beginning of the * file also counts as a line comment as long as it is not this sequence: #![ . * Block comments start with / followed by a * and end with a * followed by a /. * Unlike in C/C++ they nest. */ static void scanComments (lexerState *lexer) { /* // */ if (lexer->next_c == '/') { advanceNChar(lexer, 2); while (lexer->cur_c != EOF && lexer->cur_c != '\n') advanceChar(lexer); } /* #! */ else if (lexer->next_c == '!') { advanceNChar(lexer, 2); /* If it is exactly #![ then it is not a comment, but an attribute */ if (lexer->cur_c == '[') return; while (lexer->cur_c != EOF && lexer->cur_c != '\n') advanceChar(lexer); } /* block comment */ else if (lexer->next_c == '*') { int level = 1; advanceNChar(lexer, 2); while (lexer->cur_c != EOF && level > 0) { if (lexer->cur_c == '*' && lexer->next_c == '/') { level--; advanceNChar(lexer, 2); } else if (lexer->cur_c == '/' && lexer->next_c == '*') { level++; advanceNChar(lexer, 2); } else { advanceChar(lexer); } } } }
void consume(int match, int len) { Token instance; instance.match = match; instance.start = this->nextIndex; // Record where this token starts instance.lineStart = this->linePosition; instance.colStart = this->charPosition; for (int i=0; i < len; i++) advanceChar(); // Record where this token ends instance.end = this->nextIndex; instance.lineEnd = this->linePosition; instance.colEnd = this->charPosition; // Update precedingIndent if this is the first whitespace on a line if (this->precedingIndent == -1) { if (instance.match == tok_Whitespace) this->precedingIndent = len; else this->precedingIndent = 0; } // NEWLINE token is a special case if (instance.match == tok_Newline) { instance.lineEnd = instance.lineStart; instance.colEnd = instance.colStart + 1; this->precedingIndent = -1; } instance.precedingIndent = this->precedingIndent; ca_assert(instance.lineStart >= 0); ca_assert(instance.lineEnd >= 0); ca_assert(instance.colStart >= 0); ca_assert(instance.colEnd >= 0); ca_assert(instance.lineStart <= instance.lineEnd); ca_assert((instance.colEnd > instance.colStart) || instance.lineStart < instance.lineEnd); results->push_back(instance); }
/* Raw strings look like this: r"" or r##""## where the number of * hashes must match */ static void scanRawString (lexerState *lexer) { size_t num_initial_hashes = 0; vStringClear(lexer->token_str); advanceChar(lexer); /* Count how many leading hashes there are */ while (lexer->cur_c == '#') { num_initial_hashes++; advanceChar(lexer); } if (lexer->cur_c != '"') return; advanceChar(lexer); while (lexer->cur_c != EOF) { if (vStringLength(lexer->token_str) < MAX_STRING_LENGTH) vStringPut(lexer->token_str, (char) lexer->cur_c); /* Count how many trailing hashes there are. If the number is equal or more * than the number of leading hashes, break. */ if (lexer->cur_c == '"') { size_t num_trailing_hashes = 0; advanceChar(lexer); while (lexer->cur_c == '#' && num_trailing_hashes < num_initial_hashes) { num_trailing_hashes++; if (vStringLength(lexer->token_str) < MAX_STRING_LENGTH) vStringPut(lexer->token_str, (char) lexer->cur_c); advanceChar(lexer); } if (num_trailing_hashes == num_initial_hashes) { /* Strip the trailing hashes and quotes */ if (vStringLength(lexer->token_str) < MAX_STRING_LENGTH && vStringLength(lexer->token_str) > num_trailing_hashes + 1) { lexer->token_str->length = vStringLength(lexer->token_str) - num_trailing_hashes - 1; lexer->token_str->buffer[lexer->token_str->length] = '\0'; } break; } } else { advanceChar(lexer); } } }
/* Advances the parser one token, optionally skipping whitespace * (otherwise it is concatenated and returned as a single whitespace token). * Whitespace is needed to properly render function signatures. Unrecognized * token starts are stored literally, e.g. token may equal to a character '#'. */ static int advanceToken (lexerState *lexer, boolean skip_whitspace) { boolean have_whitespace = FALSE; lexer->line = getSourceLineNumber(); lexer->pos = getInputFilePosition(); while (lexer->cur_c != EOF) { if (isWhitespace(lexer->cur_c)) { scanWhitespace(lexer); have_whitespace = TRUE; } else if (lexer->cur_c == '/' && (lexer->next_c == '/' || lexer->next_c == '*')) { scanComments(lexer); have_whitespace = TRUE; } else { if (have_whitespace && !skip_whitspace) return lexer->cur_token = TOKEN_WHITESPACE; break; } } lexer->line = getSourceLineNumber(); lexer->pos = getInputFilePosition(); while (lexer->cur_c != EOF) { if (lexer->cur_c == '"') { scanString(lexer); return lexer->cur_token = TOKEN_STRING; } else if (lexer->cur_c == 'r' && (lexer->next_c == '#' || lexer->next_c == '"')) { scanRawString(lexer); return lexer->cur_token = TOKEN_STRING; } else if (lexer->cur_c == '\'') { scanCharacterOrLifetime(lexer); return lexer->cur_token = TOKEN_STRING; } else if (isIdentifierStart(lexer->cur_c)) { scanIdentifier(lexer); return lexer->cur_token = TOKEN_IDENT; } /* These shift tokens aren't too important for tag-generation per se, * but they confuse the skipUntil code which tracks the <> pairs. */ else if (lexer->cur_c == '>' && lexer->next_c == '>') { advanceNChar(lexer, 2); return lexer->cur_token = TOKEN_RSHIFT; } else if (lexer->cur_c == '<' && lexer->next_c == '<') { advanceNChar(lexer, 2); return lexer->cur_token = TOKEN_LSHIFT; } else if (lexer->cur_c == '-' && lexer->next_c == '>') { advanceNChar(lexer, 2); return lexer->cur_token = TOKEN_RARROW; } else { int c = lexer->cur_c; advanceChar(lexer); return lexer->cur_token = c; } } return lexer->cur_token = TOKEN_EOF; }
static void scanWhitespace (lexerState *lexer) { while (isWhitespace(lexer->cur_c)) advanceChar(lexer); }
/* Store the current character in lexerState::token_str if there is space * (set by MAX_STRING_LENGTH), and then read the next character from the file */ static void advanceAndStoreChar (lexerState *lexer) { if (vStringLength(lexer->token_str) < MAX_STRING_LENGTH) vStringPut(lexer->token_str, (char) lexer->cur_c); advanceChar(lexer); }
/* Reads N characters from the file */ static void advanceNChar (lexerState *lexer, int n) { while (n--) advanceChar(lexer); }
// return next token ExtQualModuleNames::tokenType ExtQualModuleNames::scanner() { currentToken_ = ""; if (atEnd()) { return SCANEOF; } while (!atEnd()) { const char cc = returnAdvanceChar(); if (isspace((unsigned char)cc)) { // For VS2003... continue; // do nothing } else if (isalpha(cc)) { // regular identifier currentToken_ += cc; while (isalnum(currentChar()) || currentChar() == '_') { currentToken_ += currentChar(); advanceChar(); } // convert id to internal format (ie, uppercase it) NAString id(currentToken_.c_str()); if (ToInternalIdentifier(id) != 0) { *mxCUMptr << FAIL << DgSqlCode(-2215) << DgString0(currentToken_.c_str()); } currentToken_ = id.data(); return checkReserved(); } currentToken_ += cc; switch (cc) { case '{' : case '}' : case ',' : case '.' : case '=' : return tokenType(cc); case '"': // "delimited identifier" specified by \"([^\"]|"\"\"")*\" while (!atEnd()) { const char c1 = returnAdvanceChar(); currentToken_ += c1; if (c1 == '"') { if (currentChar() == '"') { currentToken_ += currentChar(); } else { // end of delimited identifier // convert delimited id to internal format NAString id(currentToken_.c_str()); if (ToInternalIdentifier(id, FALSE, TRUE) != 0) { *mxCUMptr << FAIL << DgSqlCode(-2209) << DgString0(currentToken_.c_str()); } currentToken_ = id.data(); return ID; } } } *mxCUMptr << FAIL << DgSqlCode(-2210); // unterminated string return ID; default: advanceChar(); *mxCUMptr << FAIL << DgSqlCode(-2211) << DgString0(currentToken_.c_str()); return SCANERROR; } } return SCANEOF; }