static char* format_unsigned( char* buf, char* end, unsigned val ) { char temp[16]; int nn; for ( nn = 0; val != 0; nn++ ) { int rem = val % 10; temp[nn] = '0'+rem; val /= 10; } if (nn == 0) temp[nn++] = '0'; while (nn > 0) buf = format_char(buf, end, temp[--nn]); return buf; }
{ int shift = 4*ndigits; static const char hex[16] = "0123456789abcdef"; while (shift >= 0) { buf = format_char(buf, end, hex[(val >> shift) & 15]); shift -= 4; } return buf; } static char* format_ip4( char* buf, char* end, uint32_t ip ) { buf = format_unsigned( buf, end, (unsigned)(ip >> 24) ); buf = format_char( buf, end, '.'); buf = format_unsigned( buf, end, (unsigned)((ip >> 16) & 255)); buf = format_char( buf, end, '.'); buf = format_unsigned( buf, end, (unsigned)((ip >> 8) & 255)); buf = format_char( buf, end, '.'); buf = format_unsigned( buf, end, (unsigned)(ip & 255)); return buf; } static char* format_ip6( char* buf, char* end, const uint8_t* ip6 ) { int nn; for (nn = 0; nn < 8; nn++) { int val = (ip6[0] << 16) | ip6[1]; ip6 += 2;
static inline int lex_once(Lexer *lexer) { if (lexer->unscanned_dedents > 0) { lexer->unscanned_dedents--; return DEDENT; } else if (lexer->is_end) { return TOKEN_EOF; } else if (lexer->is_newline_phase) { lexer->is_newline_phase = 0; int next_indent_level = 0; int lookahead = Stream_peek(&lexer->stream); while (lookahead == ' ' || lookahead == '\n' || lookahead == '\t' || lookahead == '#') { Stream_pop(&lexer->stream); if (lookahead == '#') { int comment_c = Stream_peek(&lexer->stream); while (comment_c != '\n' && comment_c != CHAR_EOF) { Stream_pop(&lexer->stream); comment_c = Stream_peek(&lexer->stream); } } else if (lookahead == '\n') { next_indent_level = 0; } else { next_indent_level++; if (lexer->repr_indent_char == 0) { lexer->repr_indent_char = lookahead; } else if (lexer->repr_indent_char != lookahead) { error(lexer, LEXERR_MIXED_SPACES_AND_TABS, "Don't mix spaces and tabs to indicate indents"); return TOKEN_ERROR; } } lookahead = Stream_peek(&lexer->stream); } TEST_ERROR(lookahead); if (lookahead == CHAR_EOF) { flush_ind_stack(lexer); lexer->is_end = 1; Stream_clear_record(&lexer->stream); return lex_once(lexer); } int t = ind_stack_peek(lexer); if (ind_stack_empty(lexer)) { error(lexer, LEXERR_FATAL_ERROR, "FATAL INDENT ERROR"); return TOKEN_ERROR; } else if (t < next_indent_level) { ind_stack_push(lexer, next_indent_level); return INDENT; } else if (t > next_indent_level) { while (t > next_indent_level) { ind_stack_pop(lexer); t = ind_stack_peek(lexer); lexer->unscanned_dedents++; } if (t != next_indent_level) { error(lexer, LEXERR_INDENT_MISMATCH, "Indentation level mismatch"); return TOKEN_ERROR; } } Stream_clear_record(&lexer->stream); return lex_once(lexer); } int skip; int token; do { int c = Stream_pop(&lexer->stream); TEST_ERROR(c); token = 0; skip = 0; switch (c) { case CHAR_EOF: flush_ind_stack(lexer); if (lexer->bracket_depth > 0) { error(lexer, LEXERR_BRACKET_MISMATCH, "Expected closing bracket/parenthisis"); return TOKEN_ERROR; } else { token = NEWLINE; } lexer->is_end = 1; break; case ' ': case '\t': skip = 1; Stream_clear_record(&lexer->stream); break; case '\n': if (lexer->bracket_depth > 0) { skip = 1; Stream_clear_record(&lexer->stream); } else { token = NEWLINE; lexer->is_newline_phase = 1; } break; case '#': { int comment_c = Stream_peek(&lexer->stream); while (comment_c != '\n' && comment_c != CHAR_EOF) { Stream_pop(&lexer->stream); comment_c = Stream_peek(&lexer->stream); } skip = 1; Stream_clear_record(&lexer->stream); } break; #define NEWLINE_TOKEN_HACK(returned_token) {\ int col_c = Stream_peek(&lexer->stream); \ while (col_c == ' ' || col_c == '\t') { \ Stream_pop(&lexer->stream); \ col_c = Stream_peek(&lexer->stream); \ } \ if (col_c == '#' || col_c == '\n') { \ token = (returned_token); \ } \ } case '-': { int lookahead = Stream_peek(&lexer->stream); if (lookahead == '>') { Stream_pop(&lexer->stream); token = ARROW; } else if (lookahead == '-') { Stream_pop(&lexer->stream); NEWLINE_TOKEN_HACK(DMINUS_NEWLINE); if (token != DMINUS_NEWLINE) { error(lexer, LEXERR_INVALID_CHARACTER, "Expected newline after '--'"); return TOKEN_ERROR; } } else { token = MINUS; } break; } case ':': if (Stream_peek(&lexer->stream) == '=') { Stream_pop(&lexer->stream); token = DEFASSIGN; } else if (lexer->bracket_depth == 0) { NEWLINE_TOKEN_HACK(COLUMN_NEWLINE); if (token != COLUMN_NEWLINE) token = COLUMN; } else token = COLUMN; break; case '>': if (Stream_peek(&lexer->stream) == '=') { Stream_pop(&lexer->stream); token = GTE; } else if (lexer->bracket_depth == 0) { NEWLINE_TOKEN_HACK(GT_NEWLINE); if (token != GT_NEWLINE) token = GT; } else { token = GT; } break; case '<': if (Stream_peek(&lexer->stream) == '=') { Stream_pop(&lexer->stream); token = LTE; } else { token = LT; } break; case '.': if (Stream_peek(&lexer->stream) >= '0' && Stream_peek(&lexer->stream) <= '9') { while (Stream_peek(&lexer->stream) >= '0' && Stream_peek(&lexer->stream) <= '9') { Stream_pop(&lexer->stream); } token = FLOAT; } else { token = DOT; } break; case ',': token = COMMA; break; case '+': token = PLUS; break; case '/': token = SLASH; break; case '%': token = PERCENT; break; case '!': if (Stream_peek(&lexer->stream) == '=') { Stream_pop(&lexer->stream); token = NEQ; } else token = BANG; break; case '*': if (Stream_peek(&lexer->stream) == c) { Stream_pop(&lexer->stream); token = DSTAR; } else token = STAR; break; case '&': if (Stream_peek(&lexer->stream) == c) { Stream_pop(&lexer->stream); token = DAMP; } else token = AMP; break; case '|': if (Stream_peek(&lexer->stream) == c) { Stream_pop(&lexer->stream); token = DPIPE; } else token = PIPE; break; case '=': if (Stream_peek(&lexer->stream) == c) { Stream_pop(&lexer->stream); token = EQ; } else token = ASSIGN; break; case '\\': if (Stream_peek(&lexer->stream) == '\n' || Stream_peek(&lexer->stream) == CHAR_EOF) { Stream_pop(&lexer->stream); while (Stream_peek(&lexer->stream) == ' ' || Stream_peek(&lexer->stream) == '\t') { Stream_pop(&lexer->stream); } skip = 1; Stream_clear_record(&lexer->stream); } else { error(lexer, LEXERR_INVALID_AFTER_BACKSLASH, "Unexpected character after line continuation character"); return TOKEN_ERROR; } break; case '[': lexer->bracket_depth++; token = LBRKT; break; case '(': lexer->bracket_depth++; token = LPAR; break; case ']': if(lexer->bracket_depth == 0) { error(lexer, LEXERR_BRACKET_MISMATCH, "Openning bracket not found"); return TOKEN_ERROR; } lexer->bracket_depth--; token = RBRKT; break; case ')': if(lexer->bracket_depth == 0) { error(lexer, LEXERR_BRACKET_MISMATCH, "Openning parenthisis not found"); return TOKEN_ERROR; } lexer->bracket_depth--; token = RPAR; break; default: if (c >= '0' && c <= '9') { while (Stream_peek (&lexer->stream) >= '0' && Stream_peek (&lexer->stream) <= '9') { Stream_pop(&lexer->stream); } if (Stream_peek(&lexer->stream) != '.') { token = INTEGER; } else { Stream_pop(&lexer->stream); // consume dot while (Stream_peek (&lexer->stream) >= '0' && Stream_peek (&lexer->stream) <= '9') { Stream_pop(&lexer->stream); } token = FLOAT; } } else if (c == '\'' || c == '\"') { if (Stream_peek(&lexer->stream) == c) { Stream_pop(&lexer->stream); if (Stream_peek(&lexer->stream) == c) { Stream_pop(&lexer->stream); if (!consume_string(lexer, c, 1)) return TOKEN_ERROR; token = LONGSTRING; } else { // "" or '' token = STRING; } } else { if (!consume_string(lexer, c, 0)) return TOKEN_ERROR; token = STRING; } } else if (c == '$' || c == '@' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_') { // letter [$@a-zA-Z_] // exletter [$@a-zA-Z0-9_!?] // pattern: {letter}{exletter}* char next = Stream_peek(&lexer->stream); while (next == '$' || next == '@' || (next >= 'a' && next <= 'z') || (next >= 'A' && next <= 'Z') || (next >= '0' && next <= '9') || next == '_' || next == '!' || next == '?') { Stream_pop(&lexer->stream); next = Stream_peek(&lexer->stream); } token = NAME; } else { char fmt[5]; char msg[128]; format_char(c, fmt); snprintf(msg, 128, "Invalid character: '%s' [line %d, col %d]", fmt, lexer->stream.curline, lexer->stream.curcol - 1); error(lexer, LEXERR_INVALID_CHARACTER, msg); return TOKEN_ERROR; } break; } } while (skip); return token; }