bool tokenizer::skip_command(char const *cmd) { for (; *cmd; ++cmd) { next_char_fast(); if (current_ != *cmd) return false; } next_char_fast(); if (!is_space(current_)) return false; next_char_fast(); return true; }
tokenizer::tokenizer(std::istream& in) : current_(EOF), lineno_(1), startlineno_(0), textdomain_(PACKAGE), file_(), token_(), in_(in) { next_char_fast(); }
void tokenizer::skip_comment() { next_char_fast(); if (current_ == '\n' || current_ == EOF) return; std::string *dst = nullptr; if (current_ == 't') { if (!skip_command("extdomain")) goto fail; dst = &textdomain_; } else if (current_ == 'l') { if (!skip_command("ine")) goto fail; lineno_ = 0; while (is_num(current_)) { lineno_ = lineno_ * 10 + (current_ - '0'); next_char_fast(); } if (!is_space(current_)) goto fail; next_char_fast(); dst = &file_; } else { fail: while (current_ != '\n' && current_ != EOF) { next_char_fast(); } return; } dst->clear(); while (current_ != '\n' && current_ != EOF) { *dst += current_; next_char_fast(); } }
tokenizer::tokenizer(std::istream& in) : current_(EOF), lineno_(1), startlineno_(0), textdomain_(PACKAGE), file_(), token_(), in_(in) { for (int c = 0; c < 128; ++c) { int t = 0; if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_') { t = TOK_ALPHA; } else if (c >= '0' && c <= '9') { t = TOK_NUMERIC; } else if (c == ' ' || c == '\t') { t = TOK_SPACE; } char_types_[c] = t; } in_.stream().exceptions(std::ios_base::badbit); next_char_fast(); }
void next_char() { if (UNLIKELY(current_ == '\n')) ++lineno_; next_char_fast(); }
const token &tokenizer::next_token() { #if DEBUG_TOKENIZER previous_token_ = token_; #endif token_.value.clear(); // Dump spaces and inlined comments for(;;) { while (is_space(current_)) { next_char_fast(); } if (current_ != 254) break; skip_comment(); // skip the line end next_char_fast(); } if (current_ == '#') skip_comment(); startlineno_ = lineno_; switch(current_) { case EOF: token_.type = token::END; break; case '<': if (peek_char() != '<') { token_.type = token::MISC; token_.value += current_; break; } token_.type = token::QSTRING; next_char_fast(); for (;;) { next_char(); if (current_ == EOF) { token_.type = token::UNTERMINATED_QSTRING; break; } if (current_ == '>' && peek_char() == '>') { next_char_fast(); break; } token_.value += current_; } break; case '"': token_.type = token::QSTRING; for (;;) { next_char(); if (current_ == EOF) { token_.type = token::UNTERMINATED_QSTRING; break; } if (current_ == '"') { if (peek_char() != '"') break; next_char_fast(); } if (current_ == 254) { skip_comment(); --lineno_; continue; } token_.value += current_; } break; case '[': case ']': case '/': case '\n': case '=': case ',': case '+': token_.type = token::token_type(current_); token_.value = current_; break; case '_': if (!is_alnum(peek_char())) { token_.type = token::token_type(current_); token_.value = current_; break; } // no break default: if (is_alnum(current_)) { token_.type = token::STRING; do { token_.value += current_; next_char_fast(); while (current_ == 254) { skip_comment(); next_char_fast(); } } while (is_alnum(current_)); } else { token_.type = token::MISC; token_.value += current_; next_char(); } return token_; } if (current_ != EOF) next_char(); return token_; }