bool XmlReader::parseXmlHeader () { eatChar ('?'); char * name = getNextToken (); if (name == NULL || strcmp (name, "xml") != 0) { free (name); MESSAGE ("Error: XML must start with a valid xml declaration <?xml ?> !'\n"); m_failure = true; return false; } free (name); XmlAttribute * attr = parseAttribute (); while (attr != NULL) { if (m_iconv == NULL && strcmp (attr->m_name, "encoding") == 0 && strcmp (attr->m_value, "UTF-8") != 0) { // Setup caracter conversion as encoding is not UTF-8 m_iconv = iconv_open ("UTF-8", attr->m_value); if (m_iconv == (iconv_t)-1) { // unsupported target encoding MESSAGE ("Error: unsupported encoding in xml declaration: %s\n", attr->m_value); return false; } } delete attr; attr = parseAttribute (); } if (eatChar ('?') && eatChar ('>')) { return true; } MESSAGE ("Error: <?xml declaration must end with a valid '?>' value !'\n"); return false; }
// return a string or NULL is next thing is not a string (i.e. first char is not a '"') char * XmlReader::getString () { int utf = 0; skipSpaces (); char quote = '"'; if (!eatChar (quote)) { quote = '\''; // single quoted string ? if (!eatChar (quote)) { return NULL; } } int curPos = 0; // m_sb.setLength (0); char c = getChar (); while ( c != '\0' && c != quote ) { if (c == '\\') { c = getNextChar (); switch (c) { case '"': c = '"'; break; case '\\': c = '\\'; break; case '/' : c = '/'; break; case 'b' : c = '\b'; break; case 'f' : c = '\f'; break; case 'n' : c = '\n'; break; case 'r' : c = '\r'; break; case 't' : c = '\t'; break; case 'u' : utf = 0; utf += getHexDigit (getNextChar ()) << 12; utf += getHexDigit (getNextChar ()) << 8; utf += getHexDigit (getNextChar ()) << 4; utf += getHexDigit (getNextChar ()); c = (char)utf; break; } } else if (c == '&') { c = getHtmlChar (); } else if (c == '\n') { m_nbLines++; } setSb (curPos++, c); c = getNextChar (); } if (eatChar (quote)) { setSb (curPos, 0); return toUTF8 (m_sb, curPos); } else { return NULL; } }
Token Lexer::parseNext() { //skip whitespace of the current character skipWhitespace(); //return eof if we are at the end if (curChar() == '\0') { return Token::Eof; } Token ret; //first check if it is a keyword and return it ret = parseKeyword(); if (ret != Token::None) return ret; ret = parseSymbol(); if (ret != Token::None) return ret; ret = parseIdentifier(); if (ret != Token::None) return ret; ret = parseNumeric(); if (ret != Token::None) return ret; //if nothing was able to parse it, it must be a character and we return it value.character = eatChar(); return Token::Character; }
void eatIgnored() { bool stop(false); while (!stop){ while (isIgnored(pickChar())) eatChar(); if (_ignore_flag == ignoreFlag::None) return; stop = true; if (_ignore_flag == ignoreFlag::CppC) { if (pickString(sizeof("//") - 1) == "//") { eatString(sizeof("//") - 1); while (stream_cursor != stream_buffer.end() || pickChar() != '\n') eatChar(); stop = false; } if (pickString(sizeof("/*") - 1) == "/*") {eatString(sizeof("/*") - 1); while (pickString(sizeof("*/")-1) != "*/") eatChar(); eatString(sizeof("/*") - 1); stop = false; } } } }
Token Lexer::parseNumeric() { bool dotFound = false; //first check if we are parsing a number if (!(isdigit(curChar()) || curChar() == '.')) { return Token::None; } value.string.clear(); do { if (isdigit(curChar())) { value.string += eatChar(); } else if (!dotFound && curChar() == '.') { dotFound = true; value.string += eatChar(); } else { return Token::Numeric; } } while (true); }
inline std::string getNamedString(std::istream& inStream, const std::string& inName){ findNextToken(inStream); std::string token; std::getline(inStream, token, ' '); if(token != inName) fatalError(inName + " variable wanted."); // '"' must be here eatChar(inStream, '"'); //read name std::string value; std::getline(inStream, value, '"'); return value; }
Token Lexer::parseIdentifier() { //identifiers can only start with alphabetic characters, or underscores if (!(isalpha(curChar()) || curChar() == '_')) return Token::None; value.string.clear(); do { if (isalnum(curChar()) || curChar() == '_') value.string += eatChar(); else { return Token::Identifier; } } while (true); }