int pdf_lex(fz_stream *f, pdf_lexbuf *buf) { while (1) { int c = fz_read_byte(f); switch (c) { case EOF: return PDF_TOK_EOF; case IS_WHITE: lex_white(f); break; case '%': lex_comment(f); break; case '/': lex_name(f, buf); return PDF_TOK_NAME; case '(': return lex_string(f, buf); case ')': fz_warn(f->ctx, "lexical error (unexpected ')')"); continue; case '<': c = fz_read_byte(f); if (c == '<') { return PDF_TOK_OPEN_DICT; } else { fz_unread_byte(f); return lex_hex_string(f, buf); } case '>': c = fz_read_byte(f); if (c == '>') { return PDF_TOK_CLOSE_DICT; } fz_warn(f->ctx, "lexical error (unexpected '>')"); continue; case '[': return PDF_TOK_OPEN_ARRAY; case ']': return PDF_TOK_CLOSE_ARRAY; case '{': return PDF_TOK_OPEN_BRACE; case '}': return PDF_TOK_CLOSE_BRACE; case IS_NUMBER: return lex_number(f, buf, c); default: /* isregular: !isdelim && !iswhite && c != EOF */ fz_unread_byte(f); lex_name(f, buf); return pdf_token_from_keyword(buf->scratch); } } }
Token Lexer::lex() { assert(!cs.eof()); while (!cs.eof()) { switch (cs.peek()) { case '\n': std::cout << "Error: unexpected newline \n"; cs.get(); return Token(-1, error_tok, "error"); // if its a space, consume and continue case ' ': cs.get(); break; // if its a single character token case '+': return lex_char(cs.location(), plus_tok, cs.get(), 1); case '-': return lex_char(cs.location(), minus_tok, cs.get(), 1); case '*': return lex_char(cs.location(), star_tok, cs.get(), 1); case '/': return lex_char(cs.location(), fslash_tok, cs.get(), 1); case '%': return lex_char(cs.location(), mod_tok, cs.get(), 1); case '(': return lex_char(cs.location(), lparen_tok, cs.get(), 1); case ')': return lex_char(cs.location(), rparen_tok, cs.get(), 1); default: // if its none of those then it has to be a number or identifier if (is_digit(cs.peek())) return lex_number(cs.location(), cs); else { std::cout << "ERROR: unrecognized token: " << cs.get() << '\n'; return Token(-1, error_tok, "error"); } } } // handles trailing spaces which don't end in anything return Token(-2, error_tok, "eof"); }
Token extract_next_token(std::istream& stream) { char c; while (stream.get(c) && std::isspace(c)); if (!stream) return {end_of_file_token, ""}; if (c == '(') return {open_paren_token, "("}; if (c == ')') return {close_paren_token, ")"}; stream.unget(); if (std::isalpha(c)) return lex_name(stream); if (std::isdigit(c)) return lex_number(stream); if (isoperator(c)) return lex_operator(stream); throw std::runtime_error{"unrecognised character"}; }
std::list<Token> jsonnet_lex(const std::string &filename, const char *input) { unsigned long line_number = 1; const char *line_start = input; std::list<Token> r; const char *c = input; for ( ; *c!='\0' ; ++c) { Location begin(line_number, c - line_start + 1); Token::Kind kind; std::string data; switch (*c) { // Skip non-\n whitespace case ' ': case '\t': case '\r': continue; // Skip \n and maintain line numbers case '\n': line_number++; line_start = c+1; continue; case '{': kind = Token::BRACE_L; break; case '}': kind = Token::BRACE_R; break; case '[': kind = Token::BRACKET_L; break; case ']': kind = Token::BRACKET_R; break; case ':': kind = Token::COLON; break; case ',': kind = Token::COMMA; break; case '$': kind = Token::DOLLAR; break; case '.': kind = Token::DOT; break; case '(': kind = Token::PAREN_L; break; case ')': kind = Token::PAREN_R; break; case ';': kind = Token::SEMICOLON; break; // Special cases for unary operators. case '!': kind = Token::OPERATOR; if (*(c+1) == '=') { c++; data = "!="; } else { data = "!"; } break; case '~': kind = Token::OPERATOR; data = "~"; break; case '+': kind = Token::OPERATOR; data = "+"; break; case '-': kind = Token::OPERATOR; data = "-"; break; // Numeric literals. case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': kind = Token::NUMBER; data = lex_number(c, filename, begin); break; // String literals. case '"': { c++; for (; ; ++c) { if (*c == '\0') { throw StaticError(filename, begin, "Unterminated string"); } if (*c == '"') { break; } switch (*c) { case '\\': switch (*(++c)) { case '"': data += *c; break; case '\\': data += *c; break; case '/': data += *c; break; case 'b': data += '\b'; break; case 'f': data += '\f'; break; case 'n': data += '\n'; break; case 'r': data += '\r'; break; case 't': data += '\t'; break; case 'u': { ++c; // Consume the 'u'. unsigned long codepoint = 0; // Expect 4 hex digits. for (unsigned i=0 ; i<4 ; ++i) { auto x = (unsigned char)(c[i]); unsigned digit; if (x == '\0') { auto msg = "Unterminated string"; throw StaticError(filename, begin, msg); } else if (x == '"') { auto msg = "Truncated unicode escape sequence in " "string literal."; throw StaticError(filename, begin, msg); } else if (x >= '0' && x <= '9') { digit = x - '0'; } else if (x >= 'a' && x <= 'f') { digit = x - 'a' + 10; } else if (x >= 'A' && x <= 'F') { digit = x - 'A' + 10; } else { std::stringstream ss; ss << "Malformed unicode escape character, " << "should be hex: '" << x << "'"; throw StaticError(filename, begin, ss.str()); } codepoint *= 16; codepoint += digit; } encode_utf8(codepoint, data); // Leave us on the last char, ready for the ++c at // the outer for loop. c += 3; } break; case '\0': { auto msg = "Truncated escape sequence in string literal."; throw StaticError(filename, begin, msg); } default: { std::stringstream ss; ss << "Unknown escape sequence in string literal: '" << *c << "'"; throw StaticError(filename, begin, ss.str()); } } break; // Treat as a regular letter, but maintain line/column counters. case '\n': line_number++; line_start = c+1; data += *c; break; default: // Just a regular letter. data += *c; } } kind = Token::STRING; } break; // Keywords default: if (is_identifier_first(*c)) { std::string id; for (; *c != '\0' ; ++c) { if (!is_identifier(*c)) { break; } id += *c; } --c; if (id == "assert") { kind = Token::ASSERT; } else if (id == "else") { kind = Token::ELSE; } else if (id == "error") { kind = Token::ERROR; } else if (id == "false") { kind = Token::FALSE; } else if (id == "for") { kind = Token::FOR; } else if (id == "function") { kind = Token::FUNCTION; } else if (id == "if") { kind = Token::IF; } else if (id == "import") { kind = Token::IMPORT; } else if (id == "importstr") { kind = Token::IMPORTSTR; } else if (id == "in") { kind = Token::IN; } else if (id == "local") { kind = Token::LOCAL; } else if (id == "null") { kind = Token::NULL_LIT; } else if (id == "self") { kind = Token::SELF; } else if (id == "super") { kind = Token::SUPER; } else if (id == "tailstrict") { kind = Token::TAILSTRICT; } else if (id == "then") { kind = Token::THEN; } else if (id == "true") { kind = Token::TRUE; } else { // Not a keyword, must be an identifier. kind = Token::IDENTIFIER; data = id; } } else if (is_symbol(*c)) { // Single line C++ style comment if (*c == '/' && *(c+1) == '/') { while (*c != '\0' && *c != '\n') { ++c; } // Leaving it on the \n allows processing of \n on next iteration, // i.e. managing of the line & column counter. c--; continue; } // Single line # comment if (*c == '#') { while (*c != '\0' && *c != '\n') { ++c; } // Leaving it on the \n allows processing of \n on next iteration, // i.e. managing of the line & column counter. c--; continue; } // Multi-line comment. if (*c == '/' && *(c+1) == '*') { c += 2; // Avoid matching /*/: skip the /* before starting the search for */. while (*c != '\0' && !(*c == '*' && *(c+1) == '/')) { if (*c == '\n') { // Just keep track of the line / column counters. line_number++; line_start = c+1; } ++c; } if (*c == '\0') { auto msg = "Multi-line comment has no terminating */."; throw StaticError(filename, begin, msg); } // Leave the counter on the closing /. c++; continue; } // Text block if (*c == '|' && *(c+1) == '|' && *(c+2) == '|' && *(c+3) == '\n') { std::stringstream block; c += 4; // Skip the "|||\n" line_number++; line_start = c; const char *first_line = c; int ws_chars = whitespace_check(first_line, c); if (ws_chars == 0) { auto msg = "Text block's first line must start with whitespace."; throw StaticError(filename, begin, msg); } while (true) { assert(ws_chars > 0); // Read up to the \n for (c = &c[ws_chars]; *c != '\n' ; ++c) { if (*c == '\0') throw StaticError(filename, begin, "Unexpected EOF"); block << *c; } // Add the \n block << '\n'; ++c; line_number++; line_start = c; // Examine next line ws_chars = whitespace_check(first_line, c); if (ws_chars == 0) { // End of text block // Skip over any whitespace while (*c == ' ' || *c == '\t') ++c; // Expect ||| if (!(*c == '|' && *(c+1) == '|' && *(c+2) == '|')) { auto msg = "Text block not terminated with |||"; throw StaticError(filename, begin, msg); } c += 2; // Leave on the last | data = block.str(); kind = Token::STRING; break; } } break; // Out of the switch. } for (; *c != '\0' ; ++c) { if (!is_symbol(*c)) { break; } data += *c; } --c; kind = Token::OPERATOR; } else { std::stringstream ss; ss << "Could not lex the character "; auto uc = (unsigned char)(*c); if (*c < 32) ss << "code " << unsigned(uc); else ss << "'" << *c << "'"; throw StaticError(filename, begin, ss.str()); } break; } Location end(line_number, c - line_start + 1); r.push_back(Token(kind, data, LocationRange(filename, begin, end))); } Location end(line_number, c - line_start + 1); r.push_back(Token(Token::END_OF_FILE, "", LocationRange(filename, end, end))); return r; }
static int llex(LexState *ls, TValue *tv) { lj_str_resetbuf(&ls->sb); for (;;) { if (lj_char_isident(ls->current)) { GCstr *s; if (lj_char_isdigit(ls->current)) { /* Numeric literal. */ lex_number(ls, tv); return TK_number; } /* Identifier or reserved word. */ do { save_and_next(ls); } while (lj_char_isident(ls->current)); s = lj_parse_keepstr(ls, ls->sb.buf, ls->sb.n); setstrV(ls->L, tv, s); if (s->reserved > 0) /* Reserved word? */ return TK_OFS + s->reserved; return TK_name; } switch (ls->current) { case '\n': case '\r': inclinenumber(ls); continue; case ' ': case '\t': case '\v': case '\f': next(ls); continue; case '-': next(ls); if (ls->current != '-') return '-'; /* else is a comment */ next(ls); if (ls->current == '[') { int sep = skip_sep(ls); lj_str_resetbuf(&ls->sb); /* `skip_sep' may dirty the buffer */ if (sep >= 0) { read_long_string(ls, NULL, sep); /* long comment */ lj_str_resetbuf(&ls->sb); continue; } } /* else short comment */ while (!currIsNewline(ls) && ls->current != END_OF_STREAM) next(ls); continue; case '[': { int sep = skip_sep(ls); if (sep >= 0) { read_long_string(ls, tv, sep); return TK_string; } else if (sep == -1) { return '['; } else { lj_lex_error(ls, TK_string, LJ_ERR_XLDELIM); continue; } } case '=': next(ls); if (ls->current != '=') return '='; else { next(ls); return TK_eq; } case '<': next(ls); if (ls->current != '=') return '<'; else { next(ls); return TK_le; } case '>': next(ls); if (ls->current != '=') return '>'; else { next(ls); return TK_ge; } case '~': next(ls); if (ls->current != '=') return '~'; else { next(ls); return TK_ne; } case ':': next(ls); if (ls->current != ':') return ':'; else { next(ls); return TK_label; } case '"': case '\'': read_string(ls, ls->current, tv); return TK_string; case '.': save_and_next(ls); if (ls->current == '.') { next(ls); if (ls->current == '.') { next(ls); return TK_dots; /* ... */ } return TK_concat; /* .. */ } else if (!lj_char_isdigit(ls->current)) { return '.'; } else { lex_number(ls, tv); return TK_number; } case END_OF_STREAM: return TK_eof; default: { int c = ls->current; next(ls); return c; /* Single-char tokens (+ - / ...). */ } } } }
/* Lex a token into pfile->cur_token, which is also incremented, to get diagnostics pointing to the correct location. Does not handle issues such as token lookahead, multiple-include optimization, directives, skipping etc. This function is only suitable for use by _cpp_lex_token, and in special cases like lex_expansion_token which doesn't care for any of these issues. When meeting a newline, returns CPP_EOF if parsing a directive, otherwise returns to the start of the token buffer if permissible. Returns the location of the lexed token. */ cpp_token * _cpp_lex_direct (cpp_reader *pfile) { cppchar_t c; cpp_buffer *buffer; const unsigned char *comment_start; cpp_token *result = pfile->cur_token++; fresh_line: result->flags = 0; buffer = pfile->buffer; if (buffer->need_line) { if (!_cpp_get_fresh_line (pfile)) { result->type = CPP_EOF; if (!pfile->state.in_directive) { /* Tell the compiler the line number of the EOF token. */ result->line = pfile->line; result->flags = BOL; } return result; } if (!pfile->keep_tokens) { pfile->cur_run = &pfile->base_run; result = pfile->base_run.base; pfile->cur_token = result + 1; } result->flags = BOL; if (pfile->state.parsing_args == 2) result->flags |= PREV_WHITE; } buffer = pfile->buffer; update_tokens_line: result->line = pfile->line; skipped_white: if (buffer->cur >= buffer->notes[buffer->cur_note].pos && !pfile->overlaid_buffer) { _cpp_process_line_notes (pfile, false); result->line = pfile->line; } c = *buffer->cur++; result->col = CPP_BUF_COLUMN (buffer, buffer->cur); switch (c) { case ' ': case '\t': case '\f': case '\v': case '\0': result->flags |= PREV_WHITE; skip_whitespace (pfile, c); goto skipped_white; case '\n': pfile->line++; buffer->need_line = true; goto fresh_line; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': result->type = CPP_NUMBER; lex_number (pfile, &result->val.str); break; case 'L': /* 'L' may introduce wide characters or strings. */ if (*buffer->cur == '\'' || *buffer->cur == '"') { lex_string (pfile, result, buffer->cur - 1); break; } /* Fall through. */ case '_': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': result->type = CPP_NAME; result->val.node = lex_identifier (pfile, buffer->cur - 1); /* Convert named operators to their proper types. */ if (result->val.node->flags & NODE_OPERATOR) { result->flags |= NAMED_OP; result->type = result->val.node->directive_index; } break; case '\'': case '"': lex_string (pfile, result, buffer->cur - 1); break; case '/': /* A potential block or line comment. */ comment_start = buffer->cur; c = *buffer->cur; if (c == '*') { if (_cpp_skip_block_comment (pfile)) cpp_error (pfile, CPP_DL_ERROR, "unterminated comment"); } else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments) || CPP_IN_SYSTEM_HEADER (pfile))) { /* Warn about comments only if pedantically GNUC89, and not in system headers. */ if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile) && ! buffer->warned_cplusplus_comments) { cpp_error (pfile, CPP_DL_PEDWARN, "C++ style comments are not allowed in ISO C90"); cpp_error (pfile, CPP_DL_PEDWARN, "(this will be reported only once per input file)"); buffer->warned_cplusplus_comments = 1; } if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments)) cpp_error (pfile, CPP_DL_WARNING, "multi-line comment"); } else if (c == '=') { buffer->cur++; result->type = CPP_DIV_EQ; break; } else { result->type = CPP_DIV; break; } if (!pfile->state.save_comments) { result->flags |= PREV_WHITE; goto update_tokens_line; } /* Save the comment as a token in its own right. */ save_comment (pfile, result, comment_start, c); break; case '<': if (pfile->state.angled_headers) { lex_string (pfile, result, buffer->cur - 1); break; } result->type = CPP_LESS; if (*buffer->cur == '=') buffer->cur++, result->type = CPP_LESS_EQ; else if (*buffer->cur == '<') { buffer->cur++; IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT); } else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus)) { buffer->cur++; IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN); } else if (CPP_OPTION (pfile, digraphs)) { if (*buffer->cur == ':') { buffer->cur++; result->flags |= DIGRAPH; result->type = CPP_OPEN_SQUARE; } else if (*buffer->cur == '%') { buffer->cur++; result->flags |= DIGRAPH; result->type = CPP_OPEN_BRACE; } } break; case '>': result->type = CPP_GREATER; if (*buffer->cur == '=') buffer->cur++, result->type = CPP_GREATER_EQ; else if (*buffer->cur == '>') { buffer->cur++; IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT); } else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus)) { buffer->cur++; IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX); } break; case '%': result->type = CPP_MOD; if (*buffer->cur == '=') buffer->cur++, result->type = CPP_MOD_EQ; else if (CPP_OPTION (pfile, digraphs)) { if (*buffer->cur == ':') { buffer->cur++; result->flags |= DIGRAPH; result->type = CPP_HASH; if (*buffer->cur == '%' && buffer->cur[1] == ':') buffer->cur += 2, result->type = CPP_PASTE; } else if (*buffer->cur == '>') { buffer->cur++; result->flags |= DIGRAPH; result->type = CPP_CLOSE_BRACE; } } break; case '.': result->type = CPP_DOT; if (ISDIGIT (*buffer->cur)) { result->type = CPP_NUMBER; lex_number (pfile, &result->val.str); } else if (*buffer->cur == '.' && buffer->cur[1] == '.') buffer->cur += 2, result->type = CPP_ELLIPSIS; else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus)) buffer->cur++, result->type = CPP_DOT_STAR; break; case '+': result->type = CPP_PLUS; if (*buffer->cur == '+') buffer->cur++, result->type = CPP_PLUS_PLUS; else if (*buffer->cur == '=') buffer->cur++, result->type = CPP_PLUS_EQ; break; case '-': result->type = CPP_MINUS; if (*buffer->cur == '>') { buffer->cur++; result->type = CPP_DEREF; if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus)) buffer->cur++, result->type = CPP_DEREF_STAR; } else if (*buffer->cur == '-') buffer->cur++, result->type = CPP_MINUS_MINUS; else if (*buffer->cur == '=') buffer->cur++, result->type = CPP_MINUS_EQ; break; case '&': result->type = CPP_AND; if (*buffer->cur == '&') buffer->cur++, result->type = CPP_AND_AND; else if (*buffer->cur == '=') buffer->cur++, result->type = CPP_AND_EQ; break; case '|': result->type = CPP_OR; if (*buffer->cur == '|') buffer->cur++, result->type = CPP_OR_OR; else if (*buffer->cur == '=') buffer->cur++, result->type = CPP_OR_EQ; break; case ':': result->type = CPP_COLON; if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus)) buffer->cur++, result->type = CPP_SCOPE; else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs)) { buffer->cur++; result->flags |= DIGRAPH; result->type = CPP_CLOSE_SQUARE; } break; case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break; case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break; case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break; case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break; case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break; case '?': result->type = CPP_QUERY; break; case '~': result->type = CPP_COMPL; break; case ',': result->type = CPP_COMMA; break; case '(': result->type = CPP_OPEN_PAREN; break; case ')': result->type = CPP_CLOSE_PAREN; break; case '[': result->type = CPP_OPEN_SQUARE; break; case ']': result->type = CPP_CLOSE_SQUARE; break; case '{': result->type = CPP_OPEN_BRACE; break; case '}': result->type = CPP_CLOSE_BRACE; break; case ';': result->type = CPP_SEMICOLON; break; /* @ is a punctuator in Objective-C. */ case '@': result->type = CPP_ATSIGN; break; case '$': case '\\': { const uchar *base = --buffer->cur; if (forms_identifier_p (pfile, true)) { result->type = CPP_NAME; result->val.node = lex_identifier (pfile, base); break; } buffer->cur++; } default: create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER); break; } return result; }
pdf_token pdf_lex(fz_stream *f, pdf_lexbuf *buf) { while (1) { int c = fz_read_byte(f); switch (c) { case EOF: return PDF_TOK_EOF; case IS_WHITE: lex_white(f); break; case '%': lex_comment(f); break; case '/': lex_name(f, buf); return PDF_TOK_NAME; case '(': return lex_string(f, buf); case ')': fz_warn(f->ctx, "lexical error (unexpected ')')"); continue; case '<': c = fz_read_byte(f); if (c == '<') { return PDF_TOK_OPEN_DICT; } else { fz_unread_byte(f); return lex_hex_string(f, buf); } case '>': c = fz_read_byte(f); if (c == '>') { return PDF_TOK_CLOSE_DICT; } fz_warn(f->ctx, "lexical error (unexpected '>')"); if (c == EOF) { return PDF_TOK_EOF; } fz_unread_byte(f); continue; case '[': return PDF_TOK_OPEN_ARRAY; case ']': return PDF_TOK_CLOSE_ARRAY; case '{': return PDF_TOK_OPEN_BRACE; case '}': return PDF_TOK_CLOSE_BRACE; case IS_NUMBER: /* cf. https://code.google.com/p/sumatrapdf/issues/detail?id=2231 */ { int tok = lex_number(f, buf, c); while (1) { c = fz_peek_byte(f); switch (c) { case IS_NUMBER: fz_warn(f->ctx, "ignoring invalid character after number: '%c'", c); fz_read_byte(f); continue; default: return tok; } } } default: /* isregular: !isdelim && !iswhite && c != EOF */ fz_unread_byte(f); lex_name(f, buf); return pdf_token_from_keyword(buf->scratch); } } }
int yylex(void) { int ch; for(;;) { ch = getchar(); switch( ch ) { case EOF: return 0; case '\n': ++yylineno; continue; case '\t': case ' ': continue; case '{': do { ch = getchar(); } while( ch != '}' && ch != EOF ); continue; case ':': /* ASSIGN or COLON */ ch = getchar(); switch( ch ) { case '=': return lex_log_token(T_ASSIGN); default: ungetc(ch, stdin); return lex_log_token(T_COLON); } case ',': return lex_log_token(T_COMMA); case '.': /* RCON or DOT */ ch = getchar(); ungetc(ch, stdin); if( isdigit(ch) ) return lex_log_token(lex_number('.')); else return lex_log_token(T_DOT); case '[': return lex_log_token(T_LBRACK); case ']': return lex_log_token(T_RBRACK); case '(': return lex_log_token(T_LPAREN); case ')': return lex_log_token(T_RPAREN); case '<': /* NE, LE, or LT */ ch = getchar(); switch( ch ) { case '>': return lex_log_token(T_NE); case '=': return lex_log_token(T_LE); default: ungetc(ch, stdin); return lex_log_token(T_LT); } case '=': return lex_log_token(T_EQ); case '>': /* GE or GT */ ch = getchar(); switch( ch ) { case '=': return lex_log_token(T_GE); default: ungetc(ch, stdin); return lex_log_token(T_GT); } case '-': return lex_log_token(T_MINUS); case '*': return lex_log_token(T_MUL); case '+': return lex_log_token(T_PLUS); case '/': return lex_log_token(T_RDIV); case ';': return lex_log_token(T_SEMI); case '&': return lex_log_token(T_AMPER); case '^': return lex_log_token(T_CARET); default: if( isalpha(ch) || ch == '_' ) return lex_log_token(lex_alpha_ident(ch)); else if( isdigit(ch) ) return lex_log_token(lex_number(ch)); else lexerror("Illegal character %03o", ch); } } }
int cmd_t_test (struct lexer *lexer, struct dataset *ds) { bool ok; const struct dictionary *dict = dataset_dict (ds); struct tt tt; int mode_count = 0; /* Variables pertaining to the paired mode */ const struct variable **v1 = NULL; size_t n_v1; const struct variable **v2 = NULL; size_t n_v2; size_t n_pairs = 0; vp *pairs = NULL; /* One sample mode */ double testval = SYSMIS; /* Independent samples mode */ const struct variable *gvar; union value gval0; union value gval1; bool cut = false; tt.wv = dict_get_weight (dict); tt.dict = dict; tt.confidence = 0.95; tt.exclude = MV_ANY; tt.missing_type = MISS_ANALYSIS; tt.n_vars = 0; tt.vars = NULL; tt.mode = MODE_undef; lex_match (lexer, T_EQUALS); for (; lex_token (lexer) != T_ENDCMD; ) { lex_match (lexer, T_SLASH); if (lex_match_id (lexer, "TESTVAL")) { mode_count++; tt.mode = MODE_SINGLE; lex_match (lexer, T_EQUALS); lex_force_num (lexer); testval = lex_number (lexer); lex_get (lexer); } else if (lex_match_id (lexer, "GROUPS")) { mode_count++; cut = false; tt.mode = MODE_INDEP; lex_match (lexer, T_EQUALS); if (NULL == (gvar = parse_variable (lexer, dict))) goto parse_failed; if (lex_match (lexer, T_LPAREN)) { value_init (&gval0, var_get_width (gvar)); parse_value (lexer, &gval0, gvar); cut = true; if (lex_match (lexer, T_COMMA)) { value_init (&gval1, var_get_width (gvar)); parse_value (lexer, &gval1, gvar); cut = false; } lex_force_match (lexer, T_RPAREN); } else { value_init (&gval0, 0); value_init (&gval1, 0); gval0.f = 1.0; gval1.f = 2.0; cut = false; } if ( cut == true && var_is_alpha (gvar)) { msg (SE, _("When applying GROUPS to a string variable, two " "values must be specified.")); goto parse_failed; } } else if (lex_match_id (lexer, "PAIRS")) { bool with = false; bool paired = false; if (tt.n_vars > 0) { msg (SE, _("VARIABLES subcommand may not be used with PAIRS.")); goto parse_failed; } mode_count++; tt.mode = MODE_PAIRED; lex_match (lexer, T_EQUALS); if (!parse_variables_const (lexer, dict, &v1, &n_v1, PV_NO_DUPLICATE | PV_NUMERIC)) goto parse_failed; if ( lex_match (lexer, T_WITH)) { with = true; if (!parse_variables_const (lexer, dict, &v2, &n_v2, PV_NO_DUPLICATE | PV_NUMERIC)) goto parse_failed; if (lex_match (lexer, T_LPAREN) && lex_match_id (lexer, "PAIRED") && lex_match (lexer, T_RPAREN)) { paired = true; if (n_v1 != n_v2) { msg (SE, _("PAIRED was specified but the number of variables " "preceding WITH (%zu) did not match the number " "following (%zu)."), n_v1, n_v2); goto parse_failed; } } } { int i; if ( !with ) n_pairs = (n_v1 * (n_v1 - 1)) / 2.0; else if ( paired ) n_pairs = n_v1; else n_pairs = n_v1 * n_v2; pairs = xcalloc (n_pairs, sizeof *pairs); if ( with) { int x = 0; if (paired) { for (i = 0 ; i < n_v1; ++i) { vp *pair = &pairs[i]; (*pair)[0] = v1[i]; (*pair)[1] = v2[i]; } } else { for (i = 0 ; i < n_v1; ++i) { int j; for (j = 0 ; j < n_v2; ++j) { vp *pair = &pairs[x++]; (*pair)[0] = v1[i]; (*pair)[1] = v2[j]; } } } } else { int x = 0; for (i = 0 ; i < n_v1; ++i) { int j; for (j = i + 1 ; j < n_v1; ++j) { vp *pair = &pairs[x++]; (*pair)[0] = v1[i]; (*pair)[1] = v1[j]; } } } } } else if (lex_match_id (lexer, "VARIABLES")) { if ( tt.mode == MODE_PAIRED) { msg (SE, _("VARIABLES subcommand may not be used with PAIRS.")); goto parse_failed; } lex_match (lexer, T_EQUALS); if (!parse_variables_const (lexer, dict, &tt.vars, &tt.n_vars, PV_NO_DUPLICATE | PV_NUMERIC)) goto parse_failed; } else if ( lex_match_id (lexer, "MISSING")) { lex_match (lexer, T_EQUALS); while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH) { if (lex_match_id (lexer, "INCLUDE")) { tt.exclude = MV_SYSTEM; } else if (lex_match_id (lexer, "EXCLUDE")) { tt.exclude = MV_ANY; } else if (lex_match_id (lexer, "LISTWISE")) { tt.missing_type = MISS_LISTWISE; } else if (lex_match_id (lexer, "ANALYSIS")) { tt.missing_type = MISS_ANALYSIS; } else { lex_error (lexer, NULL); goto parse_failed; } lex_match (lexer, T_COMMA); } } else if (lex_match_id (lexer, "CRITERIA")) { lex_match (lexer, T_EQUALS); if ( lex_force_match_id (lexer, "CIN")) if ( lex_force_match (lexer, T_LPAREN)) { lex_force_num (lexer); tt.confidence = lex_number (lexer); lex_get (lexer); lex_force_match (lexer, T_RPAREN); } } else { lex_error (lexer, NULL); goto parse_failed; } } if ( mode_count != 1) { msg (SE, _("Exactly one of TESTVAL, GROUPS and PAIRS subcommands " "must be specified.")); goto parse_failed; } if (tt.n_vars == 0 && tt.mode != MODE_PAIRED) { lex_sbc_missing ("VARIABLES"); goto parse_failed; } /* Deal with splits etc */ { struct casereader *group; struct casegrouper *grouper = casegrouper_create_splits (proc_open (ds), dict); while (casegrouper_get_next_group (grouper, &group)) { if ( tt.mode == MODE_SINGLE) { if ( tt.missing_type == MISS_LISTWISE ) group = casereader_create_filter_missing (group, tt.vars, tt.n_vars, tt.exclude, NULL, NULL); one_sample_run (&tt, testval, group); } else if ( tt.mode == MODE_PAIRED) { if ( tt.missing_type == MISS_LISTWISE ) { group = casereader_create_filter_missing (group, v1, n_v1, tt.exclude, NULL, NULL); group = casereader_create_filter_missing (group, v2, n_v2, tt.exclude, NULL, NULL); } paired_run (&tt, n_pairs, pairs, group); } else /* tt.mode == MODE_INDEP */ { if ( tt.missing_type == MISS_LISTWISE ) { group = casereader_create_filter_missing (group, tt.vars, tt.n_vars, tt.exclude, NULL, NULL); group = casereader_create_filter_missing (group, &gvar, 1, tt.exclude, NULL, NULL); } indep_run (&tt, gvar, cut, &gval0, &gval1, group); } } ok = casegrouper_destroy (grouper); ok = proc_commit (ds) && ok; } free (pairs); free (v1); free (v2); free (tt.vars); return ok ? CMD_SUCCESS : CMD_FAILURE; parse_failed: return CMD_FAILURE; }
bool Lexer::lex_file(char * filename) { FILE * fp = fopen(filename, "r"); if (fp == NULL) { REPORT_ERROR("Failed to open file [%s].", filename); return false; } current_filename = filename; char cur; while (true) { cur = peek_char(fp); while (cur && isspace(cur)) { next_char(fp); cur = peek_char(fp); } if (!cur) { break; } if (isalpha(cur)) { if (!lex_identifier(fp)) { fclose(fp); return false; } continue; } else if (isdigit(cur)) { if (!lex_number(fp)) { fclose(fp); return false; } continue; } else if (cur == '\'') { if (!lex_char_literal(fp)) { fclose(fp); return false; } continue; } else if (cur == '\"') { if (!lex_string_literal(fp)) { fclose(fp); return false; } continue; } else if (cur == '\0') { assert(0); } else { if (!lex_symbol(fp)) { fclose(fp); return false; } continue; } } fclose(fp); return true; }
rev_list * rev_list_cvs (cvs_file *cvs) { rev_list *rl = calloc (1, sizeof (rev_list)); cvs_number trunk_number; rev_commit *trunk; rev_commit *branch; cvs_version *cv; cvs_branch *cb; rev_ref *t; cvs_version *ctrunk = NULL; build_branches(); /* * Locate first revision on trunk branch */ for (cv = cvs->versions; cv; cv = cv->next) { if (cvs_is_trunk (&cv->number) && (!ctrunk || cvs_number_compare (&cv->number, &ctrunk->number) < 0)) { ctrunk = cv; } } /* * Generate trunk branch */ if (ctrunk) trunk_number = ctrunk->number; else trunk_number = lex_number ("1.1"); trunk = rev_branch_cvs (cvs, &trunk_number); if (trunk) { t = rev_list_add_head (rl, trunk, atom ("master"), 2); t->number = trunk_number; } else fprintf(stderr, "warning - no master branch generated\n"); /* * Search for other branches */ #if DEBUG printf ("building branches for %s\n", cvs->name); #endif for (cv = cvs->versions; cv; cv = cv->next) { for (cb = cv->branches; cb; cb = cb->next) { branch = rev_branch_cvs (cvs, &cb->number); rev_list_add_head (rl, branch, NULL, 0); } } rev_list_patch_vendor_branch (rl, cvs); rev_list_graft_branches (rl, cvs); rev_list_set_refs (rl, cvs); rev_list_sort_heads (rl, cvs); rev_list_set_tail (rl); rev_list_free_dead_files (rl); rev_list_validate (rl); return rl; }
fz_error pdf_lex(int *tok, fz_stream *f, char *buf, int n, int *sl) { while (1) { int c = fz_read_byte(f); switch (c) { case EOF: *tok = PDF_TOK_EOF; return fz_okay; case IS_WHITE: lex_white(f); break; case '%': lex_comment(f); break; case '/': lex_name(f, buf, n); *sl = strlen(buf); *tok = PDF_TOK_NAME; return fz_okay; case '(': *sl = lex_string(f, buf, n); *tok = PDF_TOK_STRING; return fz_okay; case ')': *tok = PDF_TOK_ERROR; goto cleanuperror; case '<': c = fz_read_byte(f); if (c == '<') { *tok = PDF_TOK_OPEN_DICT; } else { fz_unread_byte(f); *sl = lex_hex_string(f, buf, n); *tok = PDF_TOK_STRING; } return fz_okay; case '>': c = fz_read_byte(f); if (c == '>') { *tok = PDF_TOK_CLOSE_DICT; return fz_okay; } *tok = PDF_TOK_ERROR; goto cleanuperror; case '[': *tok = PDF_TOK_OPEN_ARRAY; return fz_okay; case ']': *tok = PDF_TOK_CLOSE_ARRAY; return fz_okay; case '{': *tok = PDF_TOK_OPEN_BRACE; return fz_okay; case '}': *tok = PDF_TOK_CLOSE_BRACE; return fz_okay; case IS_NUMBER: fz_unread_byte(f); *sl = lex_number(f, buf, n, tok); return fz_okay; default: /* isregular: !isdelim && !iswhite && c != EOF */ fz_unread_byte(f); lex_name(f, buf, n); *sl = strlen(buf); *tok = pdf_token_from_keyword(buf); return fz_okay; } } cleanuperror: *tok = PDF_TOK_ERROR; return fz_throw("lexical error"); }
int cmd_reliability (struct lexer *lexer, struct dataset *ds) { const struct dictionary *dict = dataset_dict (ds); struct reliability reliability; reliability.n_variables = 0; reliability.variables = NULL; reliability.model = MODEL_ALPHA; reliability.exclude = MV_ANY; reliability.summary = 0; reliability.wv = dict_get_weight (dict); reliability.total_start = 0; lex_match (lexer, T_SLASH); if (!lex_force_match_id (lexer, "VARIABLES")) { goto error; } lex_match (lexer, T_EQUALS); if (!parse_variables_const (lexer, dict, &reliability.variables, &reliability.n_variables, PV_NO_DUPLICATE | PV_NUMERIC)) goto error; if (reliability.n_variables < 2) msg (MW, _("Reliability on a single variable is not useful.")); { int i; struct cronbach *c; /* Create a default Scale */ reliability.n_sc = 1; reliability.sc = xzalloc (sizeof (struct cronbach) * reliability.n_sc); ds_init_cstr (&reliability.scale_name, "ANY"); c = &reliability.sc[0]; c->n_items = reliability.n_variables; c->items = xzalloc (sizeof (struct variable*) * c->n_items); for (i = 0 ; i < c->n_items ; ++i) c->items[i] = reliability.variables[i]; } while (lex_token (lexer) != T_ENDCMD) { lex_match (lexer, T_SLASH); if (lex_match_id (lexer, "SCALE")) { struct const_var_set *vs; if ( ! lex_force_match (lexer, T_LPAREN)) goto error; if ( ! lex_force_string (lexer) ) goto error; ds_init_substring (&reliability.scale_name, lex_tokss (lexer)); lex_get (lexer); if ( ! lex_force_match (lexer, T_RPAREN)) goto error; lex_match (lexer, T_EQUALS); vs = const_var_set_create_from_array (reliability.variables, reliability.n_variables); if (!parse_const_var_set_vars (lexer, vs, &reliability.sc->items, &reliability.sc->n_items, 0)) { const_var_set_destroy (vs); goto error; } const_var_set_destroy (vs); } else if (lex_match_id (lexer, "MODEL")) { lex_match (lexer, T_EQUALS); if (lex_match_id (lexer, "ALPHA")) { reliability.model = MODEL_ALPHA; } else if (lex_match_id (lexer, "SPLIT")) { reliability.model = MODEL_SPLIT; reliability.split_point = -1; if ( lex_match (lexer, T_LPAREN)) { lex_force_num (lexer); reliability.split_point = lex_number (lexer); lex_get (lexer); lex_force_match (lexer, T_RPAREN); } } else goto error; } else if (lex_match_id (lexer, "SUMMARY")) { lex_match (lexer, T_EQUALS); if (lex_match_id (lexer, "TOTAL")) { reliability.summary |= SUMMARY_TOTAL; } else if (lex_match (lexer, T_ALL)) { reliability.summary = 0xFFFF; } else goto error; } else if (lex_match_id (lexer, "MISSING")) { lex_match (lexer, T_EQUALS); while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH) { if (lex_match_id (lexer, "INCLUDE")) { reliability.exclude = MV_SYSTEM; } else if (lex_match_id (lexer, "EXCLUDE")) { reliability.exclude = MV_ANY; } else { lex_error (lexer, NULL); goto error; } } } else { lex_error (lexer, NULL); goto error; } } if ( reliability.model == MODEL_SPLIT) { int i; const struct cronbach *s; if ( reliability.split_point >= reliability.n_variables) { msg (ME, _("The split point must be less than the number of variables")); goto error; } reliability.n_sc += 2 ; reliability.sc = xrealloc (reliability.sc, sizeof (struct cronbach) * reliability.n_sc); s = &reliability.sc[0]; reliability.sc[1].n_items = (reliability.split_point == -1) ? s->n_items / 2 : reliability.split_point; reliability.sc[2].n_items = s->n_items - reliability.sc[1].n_items; reliability.sc[1].items = xzalloc (sizeof (struct variable *) * reliability.sc[1].n_items); reliability.sc[2].items = xzalloc (sizeof (struct variable *) * reliability.sc[2].n_items); for (i = 0; i < reliability.sc[1].n_items ; ++i) reliability.sc[1].items[i] = s->items[i]; while (i < s->n_items) { reliability.sc[2].items[i - reliability.sc[1].n_items] = s->items[i]; i++; } } if ( reliability.summary & SUMMARY_TOTAL) { int i; const int base_sc = reliability.n_sc; reliability.total_start = base_sc; reliability.n_sc += reliability.sc[0].n_items ; reliability.sc = xrealloc (reliability.sc, sizeof (struct cronbach) * reliability.n_sc); for (i = 0 ; i < reliability.sc[0].n_items; ++i ) { int v_src; int v_dest = 0; struct cronbach *s = &reliability.sc[i + base_sc]; s->n_items = reliability.sc[0].n_items - 1; s->items = xzalloc (sizeof (struct variable *) * s->n_items); for (v_src = 0 ; v_src < reliability.sc[0].n_items ; ++v_src) { if ( v_src != i) s->items[v_dest++] = reliability.sc[0].items[v_src]; } } } if ( ! run_reliability (ds, &reliability)) goto error; free (reliability.variables); return CMD_SUCCESS; error: free (reliability.variables); return CMD_FAILURE; }
Tokens jsonnet_lex(const std::string &filename, const char *input) { unsigned long line_number = 1; const char *line_start = input; Tokens r; const char *c = input; Fodder fodder; bool fresh_line = true; // Are we tokenizing from the beginning of a new line? while (*c!='\0') { Token::Kind kind; std::string data; std::string string_block_indent; std::string string_block_term_indent; unsigned new_lines, indent; lex_ws(c, new_lines, indent, line_start, line_number); // If it's the end of the file, discard final whitespace. if (*c == '\0') break; if (new_lines > 0) { // Otherwise store whitespace in fodder. unsigned blanks = new_lines - 1; fodder.emplace_back(FodderElement::LINE_END, blanks, indent, EMPTY); fresh_line = true; } Location begin(line_number, c - line_start + 1); switch (*c) { // The following operators should never be combined with subsequent symbols. case '{': kind = Token::BRACE_L; c++; break; case '}': kind = Token::BRACE_R; c++; break; case '[': kind = Token::BRACKET_L; c++; break; case ']': kind = Token::BRACKET_R; c++; break; case ',': kind = Token::COMMA; c++; break; case '.': kind = Token::DOT; c++; break; case '(': kind = Token::PAREN_L; c++; break; case ')': kind = Token::PAREN_R; c++; break; case ';': kind = Token::SEMICOLON; c++; break; // Numeric literals. case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': kind = Token::NUMBER; data = lex_number(c, filename, begin); break; // String literals. case '"': { c++; for (; ; ++c) { if (*c == '\0') { throw StaticError(filename, begin, "Unterminated string"); } if (*c == '"') { break; } if (*c == '\\' && *(c+1) != '\0') { data += *c; ++c; } if (*c == '\n') { // Maintain line/column counters. line_number++; line_start = c+1; } data += *c; } c++; // Advance beyond the ". kind = Token::STRING_DOUBLE; } break; // String literals. case '\'': { c++; for (; ; ++c) { if (*c == '\0') { throw StaticError(filename, begin, "Unterminated string"); } if (*c == '\'') { break; } if (*c == '\\' && *(c+1) != '\0') { data += *c; ++c; } if (*c == '\n') { // Maintain line/column counters. line_number++; line_start = c+1; } data += *c; } c++; // Advance beyond the '. kind = Token::STRING_SINGLE; } break; // Keywords default: if (is_identifier_first(*c)) { std::string id; for (; is_identifier(*c); ++c) id += *c; if (id == "assert") { kind = Token::ASSERT; } else if (id == "else") { kind = Token::ELSE; } else if (id == "error") { kind = Token::ERROR; } else if (id == "false") { kind = Token::FALSE; } else if (id == "for") { kind = Token::FOR; } else if (id == "function") { kind = Token::FUNCTION; } else if (id == "if") { kind = Token::IF; } else if (id == "import") { kind = Token::IMPORT; } else if (id == "importstr") { kind = Token::IMPORTSTR; } else if (id == "in") { kind = Token::IN; } else if (id == "local") { kind = Token::LOCAL; } else if (id == "null") { kind = Token::NULL_LIT; } else if (id == "self") { kind = Token::SELF; } else if (id == "super") { kind = Token::SUPER; } else if (id == "tailstrict") { kind = Token::TAILSTRICT; } else if (id == "then") { kind = Token::THEN; } else if (id == "true") { kind = Token::TRUE; } else { // Not a keyword, must be an identifier. kind = Token::IDENTIFIER; } data = id; } else if (is_symbol(*c) || *c == '#') { // Single line C++ and Python style comments. if (*c == '#' || (*c == '/' && *(c+1) == '/')) { std::vector<std::string> comment(1); unsigned blanks; unsigned indent; lex_until_newline(c, comment[0], blanks, indent, line_start, line_number); auto kind = fresh_line ? FodderElement::PARAGRAPH : FodderElement::LINE_END; fodder.emplace_back(kind, blanks, indent, comment); fresh_line = true; continue; // We've not got a token, just fodder, so keep scanning. } // Multi-line C style comment. if (*c == '/' && *(c+1) == '*') { unsigned margin = c - line_start; const char *initial_c = c; c += 2; // Avoid matching /*/: skip the /* before starting the search for */. while (!(*c == '*' && *(c+1) == '/')) { if (*c == '\0') { auto msg = "Multi-line comment has no terminating */."; throw StaticError(filename, begin, msg); } if (*c == '\n') { // Just keep track of the line / column counters. line_number++; line_start = c+1; } ++c; } c += 2; // Move the pointer to the char after the closing '/'. std::string comment(initial_c, c - initial_c); // Includes the "/*" and "*/". // Lex whitespace after comment unsigned new_lines_after, indent_after; lex_ws(c, new_lines_after, indent_after, line_start, line_number); std::vector<std::string> lines; if (comment.find('\n') >= comment.length()) { // Comment looks like /* foo */ lines.push_back(comment); fodder.emplace_back(FodderElement::INTERSTITIAL, 0, 0, lines); if (new_lines_after > 0) { fodder.emplace_back(FodderElement::LINE_END, new_lines_after - 1, indent_after, EMPTY); fresh_line = true; } } else { lines = line_split(comment, margin); assert(lines[0][0] == '/'); // Little hack to support PARAGRAPHs with * down the LHS: // Add a space to lines that start with a '*' bool all_star = true; for (auto &l : lines) { if (l[0] != '*') all_star = false; } if (all_star) { for (auto &l : lines) { if (l[0] == '*') l = " " + l; } } if (new_lines_after == 0) { // Ensure a line end after the paragraph. new_lines_after = 1; indent_after = 0; } if (!fresh_line) // Ensure a line end before the comment. fodder.emplace_back(FodderElement::LINE_END, 0, 0, EMPTY); fodder.emplace_back(FodderElement::PARAGRAPH, new_lines_after - 1, indent_after, lines); fresh_line = true; } continue; // We've not got a token, just fodder, so keep scanning. } // Text block if (*c == '|' && *(c+1) == '|' && *(c+2) == '|' && *(c+3) == '\n') { std::stringstream block; c += 4; // Skip the "|||\n" line_number++; // Skip any blank lines at the beginning of the block. while (*c == '\n') { line_number++; ++c; block << '\n'; } line_start = c; const char *first_line = c; int ws_chars = whitespace_check(first_line, c); string_block_indent = std::string(first_line, ws_chars); if (ws_chars == 0) { auto msg = "Text block's first line must start with whitespace."; throw StaticError(filename, begin, msg); } while (true) { assert(ws_chars > 0); // Read up to the \n for (c = &c[ws_chars]; *c != '\n' ; ++c) { if (*c == '\0') throw StaticError(filename, begin, "Unexpected EOF"); block << *c; } // Add the \n block << '\n'; ++c; line_number++; line_start = c; // Skip any blank lines while (*c == '\n') { line_number++; ++c; block << '\n'; } // Examine next line ws_chars = whitespace_check(first_line, c); if (ws_chars == 0) { // End of text block // Skip over any whitespace while (*c == ' ' || *c == '\t') { string_block_term_indent += *c; ++c; } // Expect ||| if (!(*c == '|' && *(c+1) == '|' && *(c+2) == '|')) { auto msg = "Text block not terminated with |||"; throw StaticError(filename, begin, msg); } c += 3; // Leave after the last | data = block.str(); kind = Token::STRING_BLOCK; break; // Out of the while loop. } } break; // Out of the switch. } const char *operator_begin = c; for (; is_symbol(*c) ; ++c) { // Not allowed // in operators if (*c == '/' && *(c+1) == '/') break; // Not allowed /* in operators if (*c == '/' && *(c+1) == '*') break; // Not allowed ||| in operators if (*c == '|' && *(c+1) == '|' && *(c+2) == '|') break; } // Not allowed to end with a + - ~ ! unless a single char. // So, wind it back if we need to (but not too far). while (c > operator_begin + 1 && (*(c-1) == '+' || *(c-1) == '-' || *(c-1) == '~' || *(c-1) == '!')) { c--; } data += std::string(operator_begin, c); if (data == "$") { kind = Token::DOLLAR; data = ""; } else { kind = Token::OPERATOR; } } else { std::stringstream ss; ss << "Could not lex the character "; auto uc = (unsigned char)(*c); if (*c < 32) ss << "code " << unsigned(uc); else ss << "'" << *c << "'"; throw StaticError(filename, begin, ss.str()); } } Location end(line_number, c - line_start); r.emplace_back(kind, fodder, data, string_block_indent, string_block_term_indent, LocationRange(filename, begin, end)); fodder.clear(); fresh_line = false; } Location end(line_number, c - line_start + 1); r.emplace_back(Token::END_OF_FILE, fodder, "", "", "", LocationRange(filename, end, end)); return r; }