/* a, a-b, or a-b:c */ static int parse_range(char_stream_t cs, int_list_t il) { int a = parse_int(cs); if (a == -1) return 0; /* NG */ int b = a + 1; int c = 1; /* stride */ int x; if (cur_char(cs) == '-') { next_char(cs); b = parse_int(cs); if (b == -1) return 0; if (cur_char(cs) == ':') { next_char(cs); c = parse_int(cs); if (c == -1) return 0; /* NG */ } } for (x = a; x < b; x += c) { if (!int_list_add(il, x)) { parse_error(cs, "myth_parse_cpu_list: too many numbers in MYTH_CPU_LIST\n"); return 0; } } return 1; /* OK */ }
void sax_parser<_Handler,_Config>::characters() { size_t first = m_pos; const char* p0 = m_char; for (; has_char(); next()) { if (cur_char() == '<') break; if (cur_char() == '&') { // Text span with one or more encoded characters. Parse using cell buffer. cell_buffer& buf = get_cell_buffer(); buf.reset(); buf.append(p0, m_pos-first); characters_with_encoded_char(buf); if (buf.empty()) m_handler.characters(pstring(), false); else m_handler.characters(pstring(buf.get(), buf.size()), true); return; } } if (m_pos > first) { size_t size = m_pos - first; pstring val(m_content + first, size); m_handler.characters(val, false); } }
void parser_base::characters_with_encoded_char(cell_buffer& buf) { assert(cur_char() == '&'); parse_encoded_char(buf); size_t first = m_pos; while (has_char()) { if (cur_char() == '&') { if (m_pos > first) buf.append(m_content+first, m_pos-first); parse_encoded_char(buf); first = m_pos; } if (cur_char() == '<') break; if (cur_char() != '&') next(); } if (m_pos > first) buf.append(m_content+first, m_pos-first); }
static void parse_ascii_number(struct context_t *ctx, struct lexem_t *lexem) { locate_lexem(lexem, ctx); /* skip ' */ advance(ctx, 1); /* we expect n<=4 character and then ' */ int len = 0; uint32_t value = 0; while(!eof(ctx)) { if(cur_char(ctx) != '\'') { value = value << 8 | cur_char(ctx); len++; advance(ctx, 1); } else break; } if(eof(ctx) || cur_char(ctx) != '\'') parse_error(ctx, "Unterminated ascii number literal\n"); if(len == 0 || len > 4) parse_error(ctx, "Invalid ascii number literal length: only 1 to 4 characters allowed\n"); /* skip ' */ advance(ctx, 1); lexem->type = LEX_NUMBER; lexem->num = value; }
int Scanner::read_num() { clear_token(); bool hex_dec_mode = false; if (cur_char() == '0' && next_char() == 'x') { push_char(cur_char()); go_ahead(); push_char(cur_char()); go_ahead(); hex_dec_mode = true; } while (true) { char c = cur_char(); if (hex_dec_mode) { if (!is_hex_dec(c)) { break; } } else { if (!is_dec(c)) { break; } } push_char(cur_char()); go_ahead(); } return s_info->num_token; }
int Scanner::read_sym() { clear_token(); while (is_symbody(cur_char())) { push_char(cur_char()); go_ahead(); } return s_info->sym_token; }
void Scanner::go_ahead() { if (cur_char()) { cur_ ++; } char c = cur_char(); if (c == '\n') { ln_ ++; } }
void parser_base::literal(const char*& p, size_t& len, char quote) { assert(cur_char() == quote); next(); skip_to(p, len, quote); if (cur_char() != quote) throw css::parse_error("literal: end quote has never been reached."); }
void json_parser<_Handler>::number_with_exp(double base) { assert(cur_char() == 'e' || cur_char() == 'E'); next(); if (!has_char()) throw json::parse_error("number_with_exp: illegal exponent value.", offset()); long exp = parse_long_or_throw(); base *= std::pow(10.0, exp); m_handler.number(base); skip_blanks(); }
void parser_base::blank() { char c = cur_char(); while (is_blank(c)) { next(); if (!has_char()) return; c = cur_char(); } }
static void parse_identifier(struct context_t *ctx, struct lexem_t *lexem) { locate_lexem(lexem, ctx); /* remember position */ char *old = ctx->ptr; while(!eof(ctx) && (isalnum(cur_char(ctx)) || cur_char(ctx) == '_')) advance(ctx, 1); lexem->type = LEX_IDENTIFIER; int len = ctx->ptr - old; lexem->str = xmalloc(len + 1); lexem->str[len] = 0; memcpy(lexem->str, old, len); }
static void __parse_string(struct context_t *ctx, void *user, void (*emit_fn)(void *user, char c)) { while(!eof(ctx)) { if(cur_char(ctx) == '"') break; else if(cur_char(ctx) == '\\') { advance(ctx, 1); if(eof(ctx)) parse_error(ctx, "Unfinished string\n"); if(cur_char(ctx) == '\\') emit_fn(user, '\\'); else if(cur_char(ctx) == '\'') emit_fn(user, '\''); else if(cur_char(ctx) == '\"') emit_fn(user, '\"'); else parse_error(ctx, "Unknown escape sequence \\%c\n", cur_char(ctx)); advance(ctx, 1); } else { emit_fn(user, cur_char(ctx)); advance(ctx, 1); } } if(eof(ctx) || cur_char(ctx) != '"') parse_error(ctx, "Unfinished string\n"); advance(ctx, 1); }
/* get a non-negative number or return -1 */ static int parse_int(char_stream_t cs) { int x = 0; int n_digits = 0; while (isdigit(cur_char(cs))) { n_digits++; x = x * 10 + (cur_char(cs) - '0'); next_char(cs); } if (n_digits == 0) { parse_error(cs, "expected a digit"); return -1; } return x; }
static int parse_range_list(char_stream_t cs, int_list_t il) { if (!parse_range(cs, il)) return 0; set_ok_pos(cs); while (cur_char(cs) == ',') { next_char(cs); if (!parse_range(cs, il)) return 0; set_ok_pos(cs); } if (cur_char(cs) != '\0') { next_char(cs); parse_error(cs, "junk at the end of CPU list"); return 0; } return 1; /* OK */ }
int Scanner::GetToken(int *sub) { *sub = 0; skip_non_token(); // char c = cur_char(); if (is_dec(c)) { return read_num(); } // struct OperatorTableEntry *op = lookup_op(); if (op) { int r; r = read_op(op); *sub = op->sub_op; return r; } // if (c == '\"') { return read_str(); } if (is_symhead(c)) { return read_sym(); } return -1; }
bool parser_base::value(pstring& str, bool decode) { char c = cur_char(); if (c != '"') throw malformed_xml_error("value must be quoted"); c = next_char_checked(); size_t first = m_pos; const char* p0 = m_char; for (; c != '"'; c = next_char_checked()) { if (decode && c == '&') { // This value contains one or more encoded characters. cell_buffer& buf = get_cell_buffer(); buf.reset(); buf.append(p0, m_pos-first); value_with_encoded_char(buf, str); return true; } } str = pstring(p0, m_pos-first); // Skip the closing quote. next(); return false; }
bool Scanner::is_comment_start() { if (cur_char() == '/' && (next_char() == '*' || next_char() == '/')) { return true; } return false; }
void parser_base::comment() { // Parse until we reach '-->'. size_t len = remains(); assert(len > 3); char c = cur_char(); size_t i = 0; bool hyphen = false; for (; i < len; ++i, c = next_char()) { if (c == '-') { if (!hyphen) // first hyphen. hyphen = true; else // second hyphen. break; } else hyphen = false; } if (len - i < 2 || next_char() != '>') throw malformed_xml_error("'--' should not occur in comment other than in the closing tag."); next(); }
uint8_t parser_base::parse_uint8() { // 0 - 255 int val = 0; size_t len = 0; for (; has_char() && len <= 3; next()) { char c = cur_char(); if (!is_numeric(c)) break; ++len; val *= 10; val += c - '0'; } if (!len) throw css::parse_error("parse_uint8: no digit encountered."); int maxval = std::numeric_limits<uint8_t>::max(); if (val > maxval) val = maxval; return static_cast<uint8_t>(val); }
void sax_parser<_Handler,_Config>::attribute() { sax::parser_attribute attr; pstring attr_ns_name, attr_name, attr_value; attribute_name(attr.ns, attr.name); #if ORCUS_DEBUG_SAX_PARSER std::ostringstream os; os << "sax_parser::attribute: ns='" << attr.ns << "', name='" << attr.name << "'"; #endif char c = cur_char(); if (c != '=') { std::ostringstream os; os << "Attribute must begin with 'name=..'. (ns='" << attr.ns << "', name='" << attr.name << "')"; throw sax::malformed_xml_error(os.str()); } next_check(); attr.transient = value(attr.value, true); if (attr.transient) // Value is stored in a temporary buffer. Push a new buffer. inc_buffer_pos(); #if ORCUS_DEBUG_SAX_PARSER os << " value='" << attr.value << "'" << endl; cout << os.str(); #endif m_handler.attribute(attr); }
void sax_parser<_Handler,_Config>::cdata() { size_t len = remains(); assert(len > 3); // Parse until we reach ']]>'. const char* p0 = m_char; size_t i = 0, match = 0; for (char c = cur_char(); i < len; ++i, c = next_char()) { if (c == ']') { // Be aware that we may encounter a series of more than two ']' // characters, in which case we'll only count the last two. if (match == 0) // First ']' ++match; else if (match == 1) // Second ']' ++match; } else if (c == '>' && match == 2) { // Found ']]>'. size_t cdata_len = i - 2; m_handler.characters(pstring(p0, cdata_len), false); next(); return; } else match = 0; } throw sax::malformed_xml_error("malformed CDATA section."); }
void json_parser<_Handler>::root_value() { char c = cur_char(); switch (c) { case '[': array(); break; case '{': object(); break; default: json::parse_error::throw_with( "root_value: either '[' or '{' was expected, but '", cur_char(), "' was found.", offset()); } }
void json_parser<_Handler>::number() { assert(is_numeric(cur_char()) || cur_char() == '-'); double val = parse_double_or_throw(); switch (cur_char()) { case 'e': case 'E': number_with_exp(val); return; default: ; } m_handler.number(val); skip_blanks(); }
void parser_base::comment() { assert(cur_char() == '*'); // Parse until we reach either EOF or '*/'. bool has_star = false; for (next(); has_char(); next()) { char c = cur_char(); if (has_star && c == '/') { next(); return; } has_star = (c == '*'); } // EOF reached. }
void sax_parser<_Handler,_Config>::element_open(const char* begin_pos) { assert(is_alpha(cur_char())); sax::parser_element elem; element_name(elem, begin_pos); while (true) { blank(); char c = cur_char(); if (c == '/') { // Self-closing element: <element/> if (next_char() != '>') throw sax::malformed_xml_error("expected '/>' to self-close the element."); next(); elem.end_pos = m_char; m_handler.start_element(elem); reset_buffer_pos(); m_handler.end_element(elem); #if ORCUS_DEBUG_SAX_PARSER cout << "element_open: ns='" << elem.ns << "', name='" << elem.name << "' (self-closing)" << endl; #endif return; } else if (c == '>') { // End of opening element: <element> next(); elem.end_pos = m_char; nest_up(); m_handler.start_element(elem); reset_buffer_pos(); #if ORCUS_DEBUG_SAX_PARSER cout << "element_open: ns='" << elem.ns << "', name='" << elem.name << "'" << endl; #endif return; } else attribute(); } }
void parser_base::skip_to(const char*&p, size_t& len, char c) { p = mp_char; len = 0; for (; has_char(); next(), ++len) { if (cur_char() == c) return; } }
void parser_base::attribute_name(pstring& attr_ns, pstring& attr_name) { name(attr_name); if (cur_char() == ':') { // Attribute name is namespaced. attr_ns = attr_name; next_check(); name(attr_name); } }
void sax_parser<_Handler,_Config>::element_close(const char* begin_pos) { assert(cur_char() == '/'); nest_down(); next_check(); sax::parser_element elem; element_name(elem, begin_pos); if (cur_char() != '>') throw sax::malformed_xml_error("expected '>' to close the element."); next(); elem.end_pos = m_char; m_handler.end_element(elem); #if ORCUS_DEBUG_SAX_PARSER cout << "element_close: ns='" << elem.ns << "', name='" << elem.name << "'" << endl; #endif if (!m_nest_level) m_root_elem_open = false; }
void Scanner::skip_comment() { if (!is_comment_start()) { return ; } char c = next_char(); if (c == '/') { while (cur_char() != '\n' && cur_char() != 0) { go_ahead(); } go_ahead(); } else { go_ahead(); go_ahead(); while (cur_char() != '*' || next_char() != '/') { go_ahead(); } go_ahead(); go_ahead(); } }
void Scanner::skip_non_token() { while (1) { if ((is_space(cur_char()))) { go_ahead(); } else if (is_comment_start()) { skip_comment(); } else { return ; } } }