C++ (Cpp) is_identifier_first Examples

Programming Language: C++ (Cpp)

Method/Function: is_identifier_first

Examples at hotexamples.com: 3

C++ (Cpp) is_identifier_first - 3 examples found. These are the top rated real world C++ (Cpp) examples of is_identifier_first extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: lexer.cpp Project: johnboiles/jsonnet

std::list<Token> jsonnet_lex(const std::string &filename, const char *input)
{
    unsigned long line_number = 1;
    const char *line_start = input;

    std::list<Token> r;

    const char *c = input;

    for ( ; *c!='\0' ; ++c) {
        Location begin(line_number, c - line_start + 1);
        Token::Kind kind;
        std::string data;

        switch (*c) {

            // Skip non-\n whitespace
            case ' ': case '\t': case '\r':
            continue;

            // Skip \n and maintain line numbers
            case '\n':
            line_number++;
            line_start = c+1;
            continue;

            case '{':
            kind = Token::BRACE_L;
            break;

            case '}':
            kind = Token::BRACE_R;
            break;

            case '[':
            kind = Token::BRACKET_L;
            break;

            case ']':
            kind = Token::BRACKET_R;
            break;

            case ':':
            kind = Token::COLON;
            break;

            case ',':
            kind = Token::COMMA;
            break;

            case '$':
            kind = Token::DOLLAR;
            break;

            case '.':
            kind = Token::DOT;
            break;

            case '(':
            kind = Token::PAREN_L;
            break;

            case ')':
            kind = Token::PAREN_R;
            break;

            case ';':
            kind = Token::SEMICOLON;
            break;

            // Special cases for unary operators.
            case '!':
            kind = Token::OPERATOR;
            if (*(c+1) == '=') {
                c++;
                data = "!=";
            } else {
                data = "!";
            }
            break;

            case '~':
            kind = Token::OPERATOR;
            data = "~";
            break;

            case '+':
            kind = Token::OPERATOR;
            data = "+";

            break;
            case '-':
            kind = Token::OPERATOR;
            data = "-";
            break;

            // Numeric literals.
            case '0': case '1': case '2': case '3': case '4':
            case '5': case '6': case '7': case '8': case '9':
            kind = Token::NUMBER;
            data = lex_number(c, filename, begin);
            break;

            // String literals.
            case '"': {
                c++;
                for (; ; ++c) {
                    if (*c == '\0') {
                        throw StaticError(filename, begin, "Unterminated string");
                    }
                    if (*c == '"') {
                        break;
                    }
                    switch (*c) {
                        case '\\':
                        switch (*(++c)) {
                            case '"':
                            data += *c;
                            break;

                            case '\\':
                            data += *c;
                            break;

                            case '/':
                            data += *c;
                            break;

                            case 'b':
                            data += '\b';
                            break;

                            case 'f':
                            data += '\f';
                            break;

                            case 'n':
                            data += '\n';
                            break;

                            case 'r':
                            data += '\r';
                            break;

                            case 't':
                            data += '\t';
                            break;

                            case 'u': {
                                ++c;  // Consume the 'u'.
                                unsigned long codepoint = 0;
                                // Expect 4 hex digits.
                                for (unsigned i=0 ; i<4 ; ++i) {
                                    auto x = (unsigned char)(c[i]);
                                    unsigned digit;
                                    if (x == '\0') {
                                        auto msg = "Unterminated string";
                                        throw StaticError(filename, begin, msg);
                                    } else if (x == '"') {
                                        auto msg = "Truncated unicode escape sequence in "
                                                   "string literal.";
                                        throw StaticError(filename, begin, msg);
                                    } else if (x >= '0' && x <= '9') {
                                        digit = x - '0';
                                    } else if (x >= 'a' && x <= 'f') {
                                        digit = x - 'a' + 10;
                                    } else if (x >= 'A' && x <= 'F') {
                                        digit = x - 'A' + 10;
                                    } else {
                                        std::stringstream ss;
                                        ss << "Malformed unicode escape character, "
                                           << "should be hex: '" << x << "'";
                                        throw StaticError(filename, begin, ss.str());
                                    }
                                    codepoint *= 16;
                                    codepoint += digit;
                                }

                                encode_utf8(codepoint, data);

                                // Leave us on the last char, ready for the ++c at
                                // the outer for loop.
                                c += 3;
                            }
                            break;

                            case '\0': {
                                auto msg = "Truncated escape sequence in string literal.";
                                throw StaticError(filename, begin, msg);
                            }

                            default: {
                                std::stringstream ss;
                                ss << "Unknown escape sequence in string literal: '" << *c << "'";
                                throw StaticError(filename, begin, ss.str());
                            }
                        }
                        break;

                        // Treat as a regular letter, but maintain line/column counters.
                        case '\n':
                        line_number++;
                        line_start = c+1;
                        data += *c;
                        break;

                        default:
                        // Just a regular letter.
                        data += *c;
                    }
                }
                kind = Token::STRING;
            }
            break;

            // Keywords
            default:
            if (is_identifier_first(*c)) {
                std::string id;
                for (; *c != '\0' ; ++c) {
                    if (!is_identifier(*c)) {
                        break;
                    }
                    id += *c;
                }
                --c;
                if (id == "assert") {
                    kind = Token::ASSERT;
                } else if (id == "else") {
                    kind = Token::ELSE;
                } else if (id == "error") {
                    kind = Token::ERROR;
                } else if (id == "false") {
                    kind = Token::FALSE;
                } else if (id == "for") {
                    kind = Token::FOR;
                } else if (id == "function") {
                    kind = Token::FUNCTION;
                } else if (id == "if") {
                    kind = Token::IF;
                } else if (id == "import") {
                    kind = Token::IMPORT;
                } else if (id == "importstr") {
                    kind = Token::IMPORTSTR;
                } else if (id == "in") {
                    kind = Token::IN;
                } else if (id == "local") {
                    kind = Token::LOCAL;
                } else if (id == "null") {
                    kind = Token::NULL_LIT;
                } else if (id == "self") {
                    kind = Token::SELF;
                } else if (id == "super") {
                    kind = Token::SUPER;
                } else if (id == "tailstrict") {
                    kind = Token::TAILSTRICT;
                } else if (id == "then") {
                    kind = Token::THEN;
                } else if (id == "true") {
                    kind = Token::TRUE;
                } else {
                    // Not a keyword, must be an identifier.
                    kind = Token::IDENTIFIER;
                    data = id;
                }
            } else if (is_symbol(*c)) {

                // Single line C++ style comment
                if (*c == '/' && *(c+1) == '/') {
                    while (*c != '\0' && *c != '\n') {
                        ++c;
                    }
                    // Leaving it on the \n allows processing of \n on next iteration,
                    // i.e. managing of the line & column counter.
                    c--;
                    continue;
                }

                // Single line # comment
                if (*c == '#') {
                    while (*c != '\0' && *c != '\n') {
                        ++c;
                    }
                    // Leaving it on the \n allows processing of \n on next iteration,
                    // i.e. managing of the line & column counter.
                    c--;
                    continue;
                }

                // Multi-line comment.
                if (*c == '/' && *(c+1) == '*') {
                    c += 2;  // Avoid matching /*/: skip the /* before starting the search for */.
                    while (*c != '\0' && !(*c == '*' && *(c+1) == '/')) {
                        if (*c == '\n') {
                            // Just keep track of the line / column counters.
                            line_number++;
                            line_start = c+1;
                        }
                        ++c;
                    }
                    if (*c == '\0') {
                        auto msg = "Multi-line comment has no terminating */.";
                        throw StaticError(filename, begin, msg);
                    }
                    // Leave the counter on the closing /.
                    c++;
                    continue;
                }
                // Text block
                if (*c == '|' && *(c+1) == '|' && *(c+2) == '|' && *(c+3) == '\n') {
                    std::stringstream block;
                    c += 4; // Skip the "|||\n"
                    line_number++;
                    line_start = c;
                    const char *first_line = c;
                    int ws_chars = whitespace_check(first_line, c);
                    if (ws_chars == 0) {
                        auto msg = "Text block's first line must start with whitespace.";
                        throw StaticError(filename, begin, msg);
                    }
                    while (true) {
                        assert(ws_chars > 0);
                        // Read up to the \n
                        for (c = &c[ws_chars]; *c != '\n' ; ++c) {
                            if (*c == '\0')
                                throw StaticError(filename, begin, "Unexpected EOF");
                            block << *c;
                        }
                        // Add the \n
                        block << '\n';
                        ++c;
                        line_number++;
                        line_start = c;
                        // Examine next line
                        ws_chars = whitespace_check(first_line, c);
                        if (ws_chars == 0) {
                            // End of text block
                            // Skip over any whitespace
                            while (*c == ' ' || *c == '\t') ++c;
                            // Expect |||
                            if (!(*c == '|' && *(c+1) == '|' && *(c+2) == '|')) {
                                auto msg = "Text block not terminated with |||";
                                throw StaticError(filename, begin, msg);
                            }
                            c += 2;  // Leave on the last |
                            data = block.str();
                            kind = Token::STRING;
                            break;
                        }
                    }

                    break;  // Out of the switch.
                }

                for (; *c != '\0' ; ++c) {
                    if (!is_symbol(*c)) {
                        break;
                    }
                    data += *c;
                }
                --c;
                kind = Token::OPERATOR;
            } else {
                std::stringstream ss;
                ss << "Could not lex the character ";
                auto uc = (unsigned char)(*c);
                if (*c < 32)
                    ss << "code " << unsigned(uc);
                else
                    ss << "'" << *c << "'";
                throw StaticError(filename, begin, ss.str());
            }
            break;
        }

        Location end(line_number, c - line_start + 1);
        r.push_back(Token(kind, data, LocationRange(filename, begin, end)));
    }

    Location end(line_number, c - line_start + 1);
    r.push_back(Token(Token::END_OF_FILE, "", LocationRange(filename, end, end)));
    return r;
}

Example #2

Show file

File: lexer.cpp Project: johnboiles/jsonnet

static bool is_identifier(char c)
{
    return is_identifier_first(c) || is_number(c);
}

Example #3

Show file

File: lexer.cpp Project: fx19880617/jsonnet

Tokens jsonnet_lex(const std::string &filename, const char *input)
{
    unsigned long line_number = 1;
    const char *line_start = input;

    Tokens r;

    const char *c = input;

    Fodder fodder;
    bool fresh_line = true;  // Are we tokenizing from the beginning of a new line?

    while (*c!='\0') {
        Token::Kind kind;
        std::string data;
        std::string string_block_indent;
        std::string string_block_term_indent;

        unsigned new_lines, indent;
        lex_ws(c, new_lines, indent, line_start, line_number);

        // If it's the end of the file, discard final whitespace.
        if (*c == '\0')
            break;

        if (new_lines > 0) {
            // Otherwise store whitespace in fodder.
            unsigned blanks = new_lines - 1;
            fodder.emplace_back(FodderElement::LINE_END, blanks, indent, EMPTY);
            fresh_line = true;
        }

        Location begin(line_number, c - line_start + 1);

        switch (*c) {

            // The following operators should never be combined with subsequent symbols.
            case '{':
            kind = Token::BRACE_L;
            c++;
            break;

            case '}':
            kind = Token::BRACE_R;
            c++;
            break;

            case '[':
            kind = Token::BRACKET_L;
            c++;
            break;

            case ']':
            kind = Token::BRACKET_R;
            c++;
            break;

            case ',':
            kind = Token::COMMA;
            c++;
            break;

            case '.':
            kind = Token::DOT;
            c++;
            break;

            case '(':
            kind = Token::PAREN_L;
            c++;
            break;

            case ')':
            kind = Token::PAREN_R;
            c++;
            break;

            case ';':
            kind = Token::SEMICOLON;
            c++;
            break;

            // Numeric literals.
            case '0': case '1': case '2': case '3': case '4':
            case '5': case '6': case '7': case '8': case '9':
            kind = Token::NUMBER;
            data = lex_number(c, filename, begin);
            break;

            // String literals.
            case '"': {
                c++;
                for (; ; ++c) {
                    if (*c == '\0') {
                        throw StaticError(filename, begin, "Unterminated string");
                    }
                    if (*c == '"') {
                        break;
                    }
                    if (*c == '\\' && *(c+1) != '\0') {
                        data += *c;
                        ++c;
                    }
                    if (*c == '\n') {
                        // Maintain line/column counters.
                        line_number++;
                        line_start = c+1;
                    }
                    data += *c;
                }
                c++;  // Advance beyond the ".
                kind = Token::STRING_DOUBLE;
            }
            break;

            // String literals.
            case '\'': {
                c++;
                for (; ; ++c) {
                    if (*c == '\0') {
                        throw StaticError(filename, begin, "Unterminated string");
                    }
                    if (*c == '\'') {
                        break;
                    }
                    if (*c == '\\' && *(c+1) != '\0') {
                        data += *c;
                        ++c;
                    }
                    if (*c == '\n') {
                        // Maintain line/column counters.
                        line_number++;
                        line_start = c+1;
                    }
                    data += *c;
                }
                c++;  // Advance beyond the '.
                kind = Token::STRING_SINGLE;
            }
            break;

            // Keywords
            default:
            if (is_identifier_first(*c)) {
                std::string id;
                for (; is_identifier(*c); ++c)
                    id += *c;
                if (id == "assert") {
                    kind = Token::ASSERT;
                } else if (id == "else") {
                    kind = Token::ELSE;
                } else if (id == "error") {
                    kind = Token::ERROR;
                } else if (id == "false") {
                    kind = Token::FALSE;
                } else if (id == "for") {
                    kind = Token::FOR;
                } else if (id == "function") {
                    kind = Token::FUNCTION;
                } else if (id == "if") {
                    kind = Token::IF;
                } else if (id == "import") {
                    kind = Token::IMPORT;
                } else if (id == "importstr") {
                    kind = Token::IMPORTSTR;
                } else if (id == "in") {
                    kind = Token::IN;
                } else if (id == "local") {
                    kind = Token::LOCAL;
                } else if (id == "null") {
                    kind = Token::NULL_LIT;
                } else if (id == "self") {
                    kind = Token::SELF;
                } else if (id == "super") {
                    kind = Token::SUPER;
                } else if (id == "tailstrict") {
                    kind = Token::TAILSTRICT;
                } else if (id == "then") {
                    kind = Token::THEN;
                } else if (id == "true") {
                    kind = Token::TRUE;
                } else {
                    // Not a keyword, must be an identifier.
                    kind = Token::IDENTIFIER;
                }
                data = id;

            } else if (is_symbol(*c) || *c == '#') {

                // Single line C++ and Python style comments.
                if (*c == '#' || (*c == '/' && *(c+1) == '/')) {
                    std::vector<std::string> comment(1);
                    unsigned blanks;
                    unsigned indent;
                    lex_until_newline(c, comment[0], blanks, indent, line_start, line_number);
                    auto kind = fresh_line ? FodderElement::PARAGRAPH : FodderElement::LINE_END;
                    fodder.emplace_back(kind, blanks, indent, comment);
                    fresh_line = true;
                    continue;  // We've not got a token, just fodder, so keep scanning.
                }

                // Multi-line C style comment.
                if (*c == '/' && *(c+1) == '*') {

                    unsigned margin = c - line_start;
 
                    const char *initial_c = c;
                    c += 2;  // Avoid matching /*/: skip the /* before starting the search for */.

                    while (!(*c == '*' && *(c+1) == '/')) {
                        if (*c == '\0') {
                            auto msg = "Multi-line comment has no terminating */.";
                            throw StaticError(filename, begin, msg);
                        }
                        if (*c == '\n') {
                            // Just keep track of the line / column counters.
                            line_number++;
                            line_start = c+1;
                        }
                        ++c;
                    }
                    c += 2;  // Move the pointer to the char after the closing '/'.

                    std::string comment(initial_c, c - initial_c);  // Includes the "/*" and "*/".

                    // Lex whitespace after comment
                    unsigned new_lines_after, indent_after;
                    lex_ws(c, new_lines_after, indent_after, line_start, line_number);
                    std::vector<std::string> lines;
                    if (comment.find('\n') >= comment.length()) {
                        // Comment looks like /* foo */
                        lines.push_back(comment);
                        fodder.emplace_back(FodderElement::INTERSTITIAL, 0, 0, lines);
                        if (new_lines_after > 0) {
                            fodder.emplace_back(FodderElement::LINE_END, new_lines_after - 1,
                                                indent_after, EMPTY);
                            fresh_line = true;
                        }
                    } else {
                        lines = line_split(comment, margin);
                        assert(lines[0][0] == '/');
                        // Little hack to support PARAGRAPHs with * down the LHS:
                        // Add a space to lines that start with a '*'
                        bool all_star = true;
                        for (auto &l : lines) {
                            if (l[0] != '*')
                                all_star = false;
                        }
                        if (all_star) {
                            for (auto &l : lines) {
                                if (l[0] == '*') l = " " + l;
                            }
                        }
                        if (new_lines_after == 0) {
                            // Ensure a line end after the paragraph.
                            new_lines_after = 1;
                            indent_after = 0;
                        }
                        if (!fresh_line)
                            // Ensure a line end before the comment.
                            fodder.emplace_back(FodderElement::LINE_END, 0, 0, EMPTY);
                        fodder.emplace_back(FodderElement::PARAGRAPH, new_lines_after - 1,
                                            indent_after, lines);
                        fresh_line = true;
                    }
                    continue;  // We've not got a token, just fodder, so keep scanning.
                }

                // Text block
                if (*c == '|' && *(c+1) == '|' && *(c+2) == '|' && *(c+3) == '\n') {
                    std::stringstream block;
                    c += 4; // Skip the "|||\n"
                    line_number++;
                    // Skip any blank lines at the beginning of the block.
                    while (*c == '\n') {
                        line_number++;
                        ++c;
                        block << '\n';
                    }
                    line_start = c;
                    const char *first_line = c;
                    int ws_chars = whitespace_check(first_line, c);
                    string_block_indent = std::string(first_line, ws_chars);
                    if (ws_chars == 0) {
                        auto msg = "Text block's first line must start with whitespace.";
                        throw StaticError(filename, begin, msg);
                    }
                    while (true) {
                        assert(ws_chars > 0);
                        // Read up to the \n
                        for (c = &c[ws_chars]; *c != '\n' ; ++c) {
                            if (*c == '\0')
                                throw StaticError(filename, begin, "Unexpected EOF");
                            block << *c;
                        }
                        // Add the \n
                        block << '\n';
                        ++c;
                        line_number++;
                        line_start = c;
                        // Skip any blank lines
                        while (*c == '\n') {
                            line_number++;
                            ++c;
                            block << '\n';
                        }
                        // Examine next line
                        ws_chars = whitespace_check(first_line, c);
                        if (ws_chars == 0) {
                            // End of text block
                            // Skip over any whitespace
                            while (*c == ' ' || *c == '\t') {
                                string_block_term_indent += *c;
                                ++c;
                            }
                            // Expect |||
                            if (!(*c == '|' && *(c+1) == '|' && *(c+2) == '|')) {
                                auto msg = "Text block not terminated with |||";
                                throw StaticError(filename, begin, msg);
                            }
                            c += 3;  // Leave after the last |
                            data = block.str();
                            kind = Token::STRING_BLOCK;
                            break;  // Out of the while loop.
                        }
                    }

                    break;  // Out of the switch.
                }

                const char *operator_begin = c;
                for (; is_symbol(*c) ; ++c) {
                    // Not allowed // in operators
                    if (*c == '/' && *(c+1) == '/') break;
                    // Not allowed /* in operators
                    if (*c == '/' && *(c+1) == '*') break;
                    // Not allowed ||| in operators
                    if (*c == '|' && *(c+1) == '|' && *(c+2) == '|') break;
                }
                // Not allowed to end with a + - ~ ! unless a single char.
                // So, wind it back if we need to (but not too far).
                while (c > operator_begin + 1
                       && (*(c-1) == '+' || *(c-1) == '-' || *(c-1) == '~' || *(c-1) == '!')) {
                    c--;
                }
                data += std::string(operator_begin, c);
                if (data == "$") {
                    kind = Token::DOLLAR;
                    data = "";
                } else {
                    kind = Token::OPERATOR;
                }
            } else {
                std::stringstream ss;
                ss << "Could not lex the character ";
                auto uc = (unsigned char)(*c);
                if (*c < 32)
                    ss << "code " << unsigned(uc);
                else
                    ss << "'" << *c << "'";
                throw StaticError(filename, begin, ss.str());
            }
        }

        Location end(line_number, c - line_start);
        r.emplace_back(kind, fodder, data, string_block_indent, string_block_term_indent,
                       LocationRange(filename, begin, end));
        fodder.clear();
        fresh_line = false;
    }

    Location end(line_number, c - line_start + 1);
    r.emplace_back(Token::END_OF_FILE, fodder, "", "", "", LocationRange(filename, end, end));
    return r;
}