bool Lexer::lex_identifier(FILE * fp) { Token token; token.filename = current_filename; token.line_number = current_line; token.column_number = current_column; token.type = TOKEN_IDENT; char cur = peek_char(fp); char buf[256]; int i = 0; while (cur && isalnum(cur)) { if (!(i < 256)) { REPORT_TOKEN_ERROR("Identifier exceeds limit of 256 characters.", token); return false; } next_char(fp); // eat current char buf[i] = cur; i++; cur = peek_char(fp); } make_token_text(&token, buf, i); tokens.push_back(token); return true; }
/* This function is a bit messy unfortunately since it does efficient in-place parsing of "words" * with escape codes. When escape codes are encountered, it collapses them to the actual character * value in-place in memory. The token data generated from this operation points to the word within * the stream data memory. */ void parse_escaped_word_item(struct tml_stream *stream, struct tml_token *token) { char *word_start = &stream->data[stream->index]; char *p = word_start; bool shift_necessary = false; /* scan the word, collapsing escape codes in-place if necessary */ int ch = peek_char(stream); while (ch != ' ' && ch != '\t' && ch != -1 && ch != TML_DIVIDER_CHAR && ch != TML_OPEN_CHAR && ch != TML_CLOSE_CHAR) { if (ch == TML_ESCAPE_CHAR) { /* substitute 2-character escape code with the character it represents */ next_char(stream); ch = peek_char(stream); if (ch == -1) break; *p = translate_escape_code(ch); shift_necessary = true; } else if (shift_necessary) { /* shift character to the left collapsed position */ *p = (ch = peek_char(stream)); } /* go on to the next potential character */ p++; next_char(stream); ch = peek_char(stream); } /* return a reference to the data slice */ token->type = TML_TOKEN_ITEM; token->value = word_start; token->value_size = (p - word_start); }
struct Token get_number(char c) { struct Token token; BOOLEAN isReal = FALSE; int i = 1; token.tokenCode = NUMBER; token.literalValue.valString[0] = c; while(char_table[peek_char()] == DIGIT || peek_char() == '.' || peek_char() == 'e'|| peek_char() == '-') { c = get_char(); token.literalValue.valString[i] = c; isReal = (c == '.' || c == 'e' || c == '-')? TRUE : isReal; i++; } for(i; i<MAX_TOKEN_STRING_LENGTH; i++) { token.literalValue.valString[i] = '\0'; } if(isReal) { token.literalType = REAL_LIT; } else { token.literalType = INTEGER_LIT; token.literalValue.valInt = atoi(token.literalValue.valString); } return token; }
static bool get_args (const char **s, const char *keyw, uint32 *args, uint count) { if (!count) return true; if (peek_char (s) != '(') { ScriptError("%s(%d args) expected", keyw, count); return false; } (*s)++; while (count--) { if (!get_expression (s, args, 0, count ? 0 : PAREN_EXPECT | PAREN_EAT)) { error: ScriptError("not enough arguments to function %s", keyw); return false; } if (!count) break; if (peek_char (s) != ',') goto error; (*s)++; args++; } return true; }
static object read_character(FILE *in) { int c = fgetc(in); switch (c) { case EOF: error("Unexpected EOF -- read", nil); break; case 's': case 'S': if (tolower(peek_char(in)) == 'p') { expect_string(in, "pace"); peek_char_expect_delimiter(in); return make_character(' '); } break; case 'n': case 'N': if (tolower(peek_char(in)) == 'e') { expect_string(in, "ewline"); peek_char_expect_delimiter(in); return make_character('\n'); } break; } peek_char_expect_delimiter(in); return make_character(c); }
static CMARK_INLINE bool skip_spaces(subject *subj) { bool skipped = false; while (peek_char(subj) == ' ' || peek_char(subj) == '\t') { advance(subj); skipped = true; } return skipped; }
// Parse reference. Assumes string begins with '[' character. // Modify refmap if a reference is encountered. // Return 0 if no reference found, otherwise position of subject // after reference is parsed. int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap) { subject subj; cmark_chunk lab; cmark_chunk url; cmark_chunk title; int matchlen = 0; int beforetitle; subject_from_buf(&subj, input, NULL); // parse label: if (!link_label(&subj, &lab)) return 0; // colon: if (peek_char(&subj) == ':') { advance(&subj); } else { return 0; } // parse link url: spnl(&subj); matchlen = scan_link_url(&subj.input, subj.pos); if (matchlen) { url = cmark_chunk_dup(&subj.input, subj.pos, matchlen); subj.pos += matchlen; } else { return 0; } // parse optional link_title beforetitle = subj.pos; spnl(&subj); matchlen = scan_link_title(&subj.input, subj.pos); if (matchlen) { title = cmark_chunk_dup(&subj.input, subj.pos, matchlen); subj.pos += matchlen; } else { subj.pos = beforetitle; title = cmark_chunk_literal(""); } // parse final spaces and newline: while (peek_char(&subj) == ' ') { advance(&subj); } if (peek_char(&subj) == '\n') { advance(&subj); } else if (peek_char(&subj) != 0) { return 0; } // insert reference into refmap cmark_reference_create(refmap, &lab, &url, &title); return subj.pos; }
// Parse zero or more space characters, including at most one newline. static void spnl(subject* subj) { bool seen_newline = false; while (peek_char(subj) == ' ' || (!seen_newline && (seen_newline = peek_char(subj) == '\n'))) { advance(subj); } }
void analyzer::drop_garbage() { if( peek_char() == EOF )return ; while(true){ char c = read_char(); if( !isspace(c) ){ if( c == '/' ){ if( peek_char() == EOF ){ restore_char(); return ; } char cc = read_char(); if( cc == '/' ){//drop until new line is found while(true){ cc = read_char(); if( cc == EOF )return ; if( cc == '\n' ){ drop_garbage();//maybe the new line has garbage return ;//return from this method } } } else if( cc == '*' ){//drop until */ is found c = read_char(); while( true ){ if( c != EOF ){ cc = read_char(); if( cc == EOF )return ; if( c == '*' && cc == '/' ){ drop_garbage(); return ; } c = cc ; } else{ return ; } } } else{//we're in good case restore_char(2); break ; } } else{ restore_char(); break ;/* means we start at character that is not a begin of comment */ } } else{ drop_garbage(); break ; } } }
static CMARK_INLINE bool skip_line_end(subject *subj) { bool seen_line_end_char = false; if (peek_char(subj) == '\r') { advance(subj); seen_line_end_char = true; } if (peek_char(subj) == '\n') { advance(subj); seen_line_end_char = true; } return seen_line_end_char || is_eof(subj); }
static char * read_expression (struct parsebuf *p) { int start; int end; skip_whitespace (p); if (peek_char (p) == '"') { /* Read as a quoted string. The quotation marks are not included in the expression value. */ /* Skip opening quotation mark. */ read_char (p); start = p->pos; while (has_more (p) && peek_char (p) != '"') read_char (p); end = p->pos; /* Skip the terminating quotation mark. */ read_char (p); } else if (peek_char (p) == '(') { /* Read as a parenthesized string -- for tuples/coordinates. */ /* The parentheses are included in the expression value. */ int c; start = p->pos; do { c = read_char (p); } while (c != -1 && c != ')'); end = p->pos; } else if (has_more (p)) { /* Read as a single word -- for numeric values or words without whitespace. */ start = p->pos; while (has_more (p) && ! is_whitespace (peek_char (p))) read_char (p); end = p->pos; } else { /* The end of the theme file has been reached. */ grub_error (GRUB_ERR_IO, "%s:%d:%d expression expected in theme file", p->filename, p->line_num, p->col_num); return 0; } return grub_new_substring (p->buf, start, end); }
// Assumes we have a period at the current position. static cmark_node *handle_period(subject *subj, bool smart) { advance(subj); if (smart && peek_char(subj) == '.') { advance(subj); if (peek_char(subj) == '.') { advance(subj); return make_str(subj->mem, cmark_chunk_literal(ELLIPSES)); } else { return make_str(subj->mem, cmark_chunk_literal("..")); } } else { return make_str(subj->mem, cmark_chunk_literal(".")); } }
// Assumes we have a hyphen at the current position. static cmark_node* handle_hyphen(subject* subj, bool smart) { advance(subj); if (smart && peek_char(subj) == '-') { advance(subj); if (peek_char(subj) == '-') { advance(subj); return make_str(cmark_chunk_literal(EMDASH)); } else { return make_str(cmark_chunk_literal(ENDASH)); } } else { return make_str(cmark_chunk_literal("-")); } }
struct Token get_special(char c) { struct Token tokenOneChar, tokenTwoChar, tokenFinal; TokenCode codeOneChar, codeTwoChar, codeFinal; tokenOneChar.literalType = STRING_LIT; tokenTwoChar.literalType = STRING_LIT; tokenOneChar.literalValue.valString[0] = c; tokenOneChar.literalValue.valString[1] = '\0'; tokenTwoChar.literalValue.valString[0] = c; tokenTwoChar.literalValue.valString[1] = peek_char(); tokenTwoChar.literalValue.valString[2] = '\0'; codeOneChar = is_reserved_word(tokenOneChar.literalValue.valString); codeTwoChar = is_reserved_word(tokenTwoChar.literalValue.valString); if(codeTwoChar != NO_TOKEN) { get_char(); codeFinal = codeTwoChar; tokenFinal = tokenTwoChar; } else { codeFinal = codeOneChar; tokenFinal = tokenOneChar; } tokenFinal.tokenCode = codeFinal; return tokenFinal; }
char lexer::next_char() { char c = peek_char(); ++current; ++column; return c; }
struct Token* get_token() { struct Token newToken; struct Token* retToken; char c = '\0'; retToken = (struct Token*)malloc(sizeof(struct Token)); c = skip_blanks(); if(peek_char() == '{') { c = skip_comment(); } if(char_table[c] == LETTER) { newToken = get_word(c); } else if(char_table[c] == DIGIT) { newToken = get_number(c); } else if(char_table[c] == QUOTE) { newToken = get_string(c); } else if(c == EOF) { newToken.literalValue.valString[0] = '.'; newToken.literalType = INTEGER_LIT; newToken.tokenCode = END_OF_FILE; } else if(char_table[c] == SPECIAL) { newToken = get_special(c); } memcpy(retToken, &newToken, sizeof(struct Token)); return retToken; }
// Interpret one line of scripting language; returns false on QUIT bool scrInterpret(const char *str, uint lineno) { ScriptLine = lineno; const char *x = str; if (! peek_char(&x)) return true; // Output command being executed to the log. Output(C_LOG "HaRET(%d)# %s", lineno, str); char tok[MAX_CMDLEN]; get_token(&x, tok, sizeof(tok), 1); // Okay, now see what keyword is this :) for (int i = 0; i < commands_count; i++) { regCommand *hc = regCommand::cast(commands_start[i]); if (hc && IsToken(tok, hc->name)) { hc->func(tok, x); return true; } } if (IsToken(tok, "Q|UIT")) return false; Output(C_ERROR "Unknown keyword: `%s'", tok); return true; }
struct tml_token tml_stream_pop(struct tml_stream *stream) { struct tml_token token; token.value = NULL; token.value_size = 0; for (;;) { int ch = peek_char(stream); if (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n') { next_char(stream); continue; } token.offset = stream->index; if (ch == TML_OPEN_CHAR) { next_char(stream); token.type = TML_TOKEN_OPEN; return token; } else if (ch == TML_CLOSE_CHAR) { next_char(stream); token.type = TML_TOKEN_CLOSE; return token; } else if (ch == TML_DIVIDER_CHAR) { next_char(stream); if (peek_char(stream) == TML_DIVIDER_CHAR) { skip_to_next_line(stream); continue; } else { token.type = TML_TOKEN_DIVIDER; return token; } } else if (ch == -1) { token.type = TML_TOKEN_EOF; return token; } else { parse_word_item(stream, &token); return token; } } }
static grub_err_t read_property (struct parsebuf *p) { char *name; /* Read the property name. */ name = read_identifier (p); if (! name) { advance_to_next_line (p); return grub_errno; } /* Skip whitespace before separator. */ skip_whitespace (p); /* Read separator. */ if (read_char (p) != ':') { grub_error (GRUB_ERR_IO, "%s:%d:%d missing separator after property name `%s'", p->filename, p->line_num, p->col_num, name); goto done; } /* Skip whitespace after separator. */ skip_whitespace (p); /* Get the value based on its type. */ if (peek_char (p) == '"') { /* String value (e.g., '"My string"'). */ char *value = read_expression (p); if (! value) { grub_error (GRUB_ERR_IO, "%s:%d:%d missing property value", p->filename, p->line_num, p->col_num); goto done; } /* If theme_set_string results in an error, grub_errno will be returned below. */ theme_set_string (p->view, name, value, p->theme_dir, p->filename, p->line_num, p->col_num); grub_free (value); } else { grub_error (GRUB_ERR_IO, "%s:%d:%d property value invalid; " "enclose literal values in quotes (\")", p->filename, p->line_num, p->col_num); goto done; } done: grub_free (name); return grub_errno; }
// Assumes we have a hyphen at the current position. static cmark_node *handle_hyphen(subject *subj, bool smart) { cmark_strbuf buf = CMARK_BUF_INIT(NULL); int startpos = subj->pos; int en_count = 0; int em_count = 0; int numhyphens; int i; advance(subj); if (!smart || peek_char(subj) != '-') { return make_str(subj->mem, cmark_chunk_literal("-")); } while (smart && peek_char(subj) == '-') { advance(subj); } numhyphens = subj->pos - startpos; buf.mem = subj->mem; if (numhyphens % 3 == 0) { // if divisible by 3, use all em dashes em_count = numhyphens / 3; } else if (numhyphens % 2 == 0) { // if divisible by 2, use all en dashes en_count = numhyphens / 2; } else if (numhyphens % 3 == 2) { // use one en dash at end en_count = 1; em_count = (numhyphens - 2) / 3; } else { // use two en dashes at the end en_count = 2; em_count = (numhyphens - 4) / 3; } for (i = em_count; i > 0; i--) { cmark_strbuf_puts(&buf, EMDASH); } for (i = en_count; i > 0; i--) { cmark_strbuf_puts(&buf, ENDASH); } return make_str(subj->mem, cmark_chunk_buf_detach(&buf)); }
void skip_to_next_line(struct tml_stream *stream) { for (;;) { int ch = peek_char(stream); next_char(stream); if (ch == '\n' || ch == '\r' || ch == -1) return; } }
// Parse a link label. Returns 1 if successful. // Note: unescaped brackets are not allowed in labels. // The label begins with `[` and ends with the first `]` character // encountered. Backticks in labels do not start code spans. static int link_label(subject* subj, cmark_chunk *raw_label) { bufsize_t startpos = subj->pos; int length = 0; unsigned char c; // advance past [ if (peek_char(subj) == '[') { advance(subj); } else { return 0; } while ((c = peek_char(subj)) && c != '[' && c != ']') { if (c == '\\') { advance(subj); length++; if (cmark_ispunct(peek_char(subj))) { advance(subj); length++; } } else { advance(subj); length++; } if (length > MAX_LINK_LABEL_LENGTH) { goto noMatch; } } if (c == ']') { // match found *raw_label = cmark_chunk_dup(&subj->input, startpos + 1, subj->pos - (startpos + 1)); cmark_chunk_trim(raw_label); advance(subj); // advance past ] return 1; } noMatch: subj->pos = startpos; // rewind return 0; }
// Try to process a backtick code span that began with a // span of ticks of length openticklength length (already // parsed). Return 0 if you don't find matching closing // backticks, otherwise return the position in the subject // after the closing backticks. static int scan_to_closing_backticks(subject* subj, int openticklength) { // read non backticks unsigned char c; while ((c = peek_char(subj)) && c != '`') { advance(subj); } if (is_eof(subj)) { return 0; // did not find closing ticks, return 0 } int numticks = 0; while (peek_char(subj) == '`') { advance(subj); numticks++; } if (numticks != openticklength) { return(scan_to_closing_backticks(subj, openticklength)); } return (subj->pos); }
// Take characters while a predicate holds, and return a string. static CMARK_INLINE cmark_chunk take_while(subject *subj, int (*f)(int)) { unsigned char c; bufsize_t startpos = subj->pos; bufsize_t len = 0; while ((c = peek_char(subj)) && (*f)(c)) { advance(subj); len++; } return cmark_chunk_dup(&subj->input, startpos, len); }
// Scan ***, **, or * and return number scanned, or 0. // Advances position. static int scan_delims(subject* subj, unsigned char c, bool * can_open, bool * can_close) { int numdelims = 0; int before_char_pos; int32_t after_char = 0; int32_t before_char = 0; int len; bool left_flanking, right_flanking; if (subj->pos == 0) { before_char = 10; } else { before_char_pos = subj->pos - 1; // walk back to the beginning of the UTF_8 sequence: while (peek_at(subj, before_char_pos) >> 6 == 2 && before_char_pos > 0) { before_char_pos -= 1; } len = utf8proc_iterate(subj->input.data + before_char_pos, subj->pos - before_char_pos, &before_char); if (len == -1) { before_char = 10; } } while (peek_char(subj) == c) { numdelims++; advance(subj); } len = utf8proc_iterate(subj->input.data + subj->pos, subj->input.len - subj->pos, &after_char); if (len == -1) { after_char = 10; } left_flanking = numdelims > 0 && !utf8proc_is_space(after_char) && !(utf8proc_is_punctuation(after_char) && !utf8proc_is_space(before_char) && !utf8proc_is_punctuation(before_char)); right_flanking = numdelims > 0 && !utf8proc_is_space(before_char) && !(utf8proc_is_punctuation(before_char) && !utf8proc_is_space(after_char) && !utf8proc_is_punctuation(after_char)); if (c == '_') { *can_open = left_flanking && !right_flanking; *can_close = right_flanking && !left_flanking; } else { *can_open = left_flanking; *can_close = right_flanking; } return numdelims; }
// Try to process a backtick code span that began with a // span of ticks of length openticklength length (already // parsed). Return 0 if you don't find matching closing // backticks, otherwise return the position in the subject // after the closing backticks. static bufsize_t scan_to_closing_backticks(subject *subj, bufsize_t openticklength) { bool found = false; if (openticklength > MAXBACKTICKS) { // we limit backtick string length because of the array subj->backticks: return 0; } if (subj->scanned_for_backticks && subj->backticks[openticklength] <= subj->pos) { // return if we already know there's no closer return 0; } while (!found) { // read non backticks unsigned char c; bufsize_t numticks = 0; while ((c = peek_char(subj)) && c != '`') { advance(subj); } if (is_eof(subj)) { break; } while (peek_char(subj) == '`') { advance(subj); numticks++; } // store position of ender if (numticks <= MAXBACKTICKS) { subj->backticks[numticks] = subj->pos - numticks; } if (numticks == openticklength) { return (subj->pos); } } // got through whole input without finding closer subj->scanned_for_backticks = true; return 0; }
// Take characters while a predicate holds, and return a string. static inline cmark_chunk take_while(subject* subj, int (*f)(int)) { unsigned char c; int startpos = subj->pos; int len = 0; while ((c = peek_char(subj)) && (*f)(c)) { advance(subj); len++; } return cmark_chunk_dup(&subj->input, startpos, len); }
void lexer::skip_whitespace() { for (;;) { if (eof_char()) break; if (!is_ascii_whitespace(peek_char())) break; advance_char(); } }
// Parse backslash-escape or just a backslash, returning an inline. static cmark_node *handle_backslash(subject *subj) { advance(subj); unsigned char nextchar = peek_char(subj); if (cmark_ispunct( nextchar)) { // only ascii symbols and newline can be escaped advance(subj); return make_str(subj->mem, cmark_chunk_dup(&subj->input, subj->pos - 1, 1)); } else if (!is_eof(subj) && skip_line_end(subj)) { return make_linebreak(subj->mem); } else { return make_str(subj->mem, cmark_chunk_literal("\\")); } }
static struct buffer get_ident(struct buffer buff, const char **ident, size_t *len, const char **error) { char c = '\0'; if (is_eot(buff)) { raise_unexpected_eof(buff.offset, error); return buff; } *ident = buff.data + buff.offset; *len = 0; peek_char(buff, &c, error); if (*error) goto clean; if (!isalpha((int)c)) return buff; buff.offset++; *len = *len + 1; peek_char(buff, &c, error); while (!*error) { if (!(isalpha((int)c) || isdigit((int)c) || c == '_')) break; buff.offset++; *len = *len + 1; peek_char(buff, &c, error); } clean: return buff; }