/** chunkify tokens. **/ int tokenize_next_internal(tokenize_t *t) { int c; int pos = 0; // output char pos skip_white: c = tokenize_next_char(t); if (c == EOF) return EOF; if (isspace(c)) goto skip_white; // a token is starting. mark its position. t->token_line = t->current_line; t->token_column = t->current_column; // is a character literal? if (c=='\'') { t->token[pos++] = c; c = tokenize_next_char(t); if (c=='\\') c = unescape(tokenize_next_char(t)); if (c == EOF) return -4; t->token[pos++] = c; c = tokenize_next_char(t); if (c!='\'') return -5; t->token[pos++] = c; goto end_tok; } // is a string literal? if (c=='\"') { int escape = 0; // add the initial quote t->token[pos++] = c; // keep reading until close quote while (1) { if (pos >= MAX_TOKEN_LEN) return -2; c = tokenize_next_char(t); if (c == EOF) goto end_tok; if (escape) { escape = 0; c = unescape(c); continue; } if (c=='\"') { t->token[pos++] = c; goto end_tok; } if (c=='\\') { escape = 1; continue; } t->token[pos++] = c; } goto end_tok; } // is an operator? if (strchr(op_chars, c)!=NULL) { while (strchr(op_chars, c)!=NULL) { if (pos >= MAX_TOKEN_LEN) return -2; t->token[pos++] = c; c = tokenize_next_char(t); } tokenize_ungetc(t, c); goto end_tok; } // otherwise, all tokens are alpha-numeric blobs in_tok: if (pos >= MAX_TOKEN_LEN) return -2; t->token[pos++] = c; if (strchr(single_char_toks,c)!=NULL) goto end_tok; c = tokenize_next_char(t); if (strchr(single_char_toks,c)!=NULL || strchr(op_chars,c)!=NULL) { tokenize_ungetc(t, c); goto end_tok; } if (!isspace(c) && c != EOF) goto in_tok; end_tok: t->token[pos] = 0; return pos; }
/** chunkify tokens. **/ int tokenize_next_internal(tokenize_t *t) { int c; int pos = 0; // output char pos t->token_type = LCM_TOK_INVALID; // Repeatedly read characters until EOF or a non-whitespace character is // reached. do { c = tokenize_next_char(t); if (c == EOF) { t->token_type = LCM_TOK_EOF; return EOF; } } while (isspace(c)); // a token is starting. mark its position. t->token_line = t->current_line; t->token_column = t->current_column; // is a character literal? if (c=='\'') { t->token[pos++] = c; c = tokenize_next_char(t); if (c=='\\') c = unescape(tokenize_next_char(t)); if (c == EOF) return -4; t->token[pos++] = c; c = tokenize_next_char(t); if (c!='\'') return -5; t->token[pos++] = c; t->token_type = LCM_TOK_OTHER; goto end_tok; } // is a string literal? if (c=='\"') { int escape = 0; // add the initial quote t->token[pos++] = c; // keep reading until close quote while (1) { if (!ensure_token_capacity(t, pos)) { return TOK_ERR_MEMORY_INSUFFICIENT; } c = tokenize_next_char(t); if (c == EOF) goto end_tok; if (escape) { escape = 0; c = unescape(c); continue; } if (c=='\"') { t->token[pos++] = c; goto end_tok; } if (c=='\\') { escape = 1; continue; } t->token[pos++] = c; } t->token_type = LCM_TOK_OTHER; goto end_tok; } // is an operator? if (strchr(op_chars, c)!=NULL) { while (strchr(op_chars, c)!=NULL) { if (!ensure_token_capacity(t, pos)) { return TOK_ERR_MEMORY_INSUFFICIENT; } t->token[pos++] = c; c = tokenize_next_char(t); } t->token_type = LCM_TOK_OTHER; tokenize_ungetc(t, c); goto end_tok; } // Is a comment? if (c == '/') { if (!ensure_token_capacity(t, pos)) { return TOK_ERR_MEMORY_INSUFFICIENT; } t->token[pos++] = c; c = tokenize_next_char(t); if (c == EOF) { t->token_type = LCM_TOK_OTHER; goto end_tok; } // Extended comment '/* ... */' if (c == '*') { return tokenize_extended_comment(t); } // Single-line comment if (c == '/') { t->token_type = LCM_TOK_COMMENT; c = tokenize_next_char(t); // Strip out leading '/' characters while (c == '/') { c = tokenize_next_char(t); } // Strip out leading whitespace. while (c != EOF && c == ' ') { c = tokenize_next_char(t); } pos = 0; // Place the rest of the line into a comment token. while (c != EOF && c != '\n') { if (!ensure_token_capacity(t, pos)) { return TOK_ERR_MEMORY_INSUFFICIENT; } t->token[pos++] = c; c = tokenize_next_char(t); }; tokenize_ungetc(t, c); goto end_tok; } // If the '/' is not followed by a '*' or a '/', then treat it like an // operator t->token_type = LCM_TOK_OTHER; tokenize_ungetc(t, c); goto end_tok; } // otherwise, all tokens are alpha-numeric blobs do { if (!ensure_token_capacity(t, pos)) { return TOK_ERR_MEMORY_INSUFFICIENT; } t->token[pos++] = c; t->token_type = LCM_TOK_OTHER; if (strchr(single_char_toks,c)!=NULL) goto end_tok; c = tokenize_next_char(t); if (strchr(single_char_toks,c)!=NULL || strchr(op_chars,c)!=NULL) { tokenize_ungetc(t, c); goto end_tok; } } while (!isspace(c) && c != EOF); end_tok: t->token[pos] = 0; return pos; }