bool Parser::parse(IReader& reader, bool verbose) { wchar_t entityBuf; debug = verbose; tagPage = 0; attributePage = 0; if (!reader.readByte(version)) return false; if (version != 0x03) return false; if (debug) cout << " : Version"; if (!reader.readMultiByteInt(publicIdentifierId)) return false; if (debug) cout << " : PublicIdentifierId"; unsigned int dtdIndex = 0; if (publicIdentifierId == 0) { if (!reader.readMultiByteInt(dtdIndex)) return false; if (debug) cout << " : dtdIndex"; } else { return false; } if (dtdIndex != 0) return false; if (!reader.readMultiByteInt(charSet)) return false; if (charSet != 0x6a) return false; if (debug) cout << " : CharSet"; unsigned int len; if (!reader.readMultiByteInt(len)) return false; if (debug) cout << " : StringTable Len"; unsigned int offset = 0; while (offset < len) { wstring s; unsigned short utflen; if (!reader.readUTF(s, utflen)) return false; unsigned char b; if (!reader.readByte(b)) return false; // skip null termination if (debug) cout << " : null termination"; reader.putStrT(offset, s); offset += utflen + 2+ 1; } if (!contentHandler.startDocument()) return false; if (publicIdentifierId == 0) { wstring dtdPair; if (!reader.getStrT(dtdIndex, dtdPair)) return false; if (debug) cout << " : dtdPair"; size_t space = dtdPair.find(' '); if (space == string::npos) { cout << endl << "DTD specifier does not contain 'space'"<< endl; exit(1); } wstring name = dtdPair.substr(0, space); wstring dtd = dtdPair.substr(space+1); istream* resolverStream = resolver.resolveEntity(name,L"" , dtd); if (resolverStream != NULL) { // FIXME, use the resolver stream cout << endl << "Entity Resolver NOT Used." << endl; } } unsigned char id; while (reader.readByte(id)) { switch (id) { case WBXML::SWITCH_PAGE: if (debug) cout << " : Switch Page"; if (!reader.readByte(tagPage)) return false; if (debug) cout << " : TagPage: " << (unsigned int)tagPage; break; case WBXML::END: { unsigned int tagID; if (stack.empty()) return false; tagID = stack.top(); stack.pop(); if (debug) cout << " : </" << tagHandler.getTag(tagID) << ">"; if (!contentHandler.endElement(tagID)) return false; break; } case WBXML::ENTITY: if (debug) cout << " : Entity"; if (!reader.readMultiByteInt((unsigned int&)entityBuf)) return false; if (debug) cout << " : " << entityBuf; if (!contentHandler.characters(&entityBuf, 0, 1)) return false; break; case WBXML::STR_I: { if (debug) cout << " : STR_I"; wstring s; if (!reader.readStrI(s)) return false; if (!contentHandler.characters(s, 0, s.length())) return false; break; } case WBXML::EXT_I_0: case WBXML::EXT_I_1: case WBXML::EXT_I_2: case WBXML::EXT_T_0: case WBXML::EXT_T_1: case WBXML::EXT_T_2: case WBXML::EXT_0: case WBXML::EXT_1: case WBXML::EXT_2: case WBXML::OPAQUE: { MutableAttributes result; vector<wstring> value; if (!handleExtensions(reader, id, stack.top(), -1, result, value)) return false; break; } case WBXML::PI: if (debug) cout << " : PI"; return false; // throw new SAXException("PI Not Supported"); break; case WBXML::STR_T: { if (debug) cout << " : STR_T"; wstring s; unsigned int pos; if (!reader.readStrT(s, pos)) return false; if (!contentHandler.characters(s, 0, s.size())) return false; break; } default: if (!readElement(reader, id)) return false; break; } } if (!stack.empty()) { cout << endl << "Stack not empty " << stack.size(); return false; // throw new SAXException("unclosed elements: " + stack); } return contentHandler.endDocument(); }
// FIXME, no handling of arrays yet bool DefaultExtensionHandler::opaque(unsigned int len, IReader& reader, unsigned int tagID, int attributeID, MutableAttributes& atts, vector<wstring>& value) { // read type unsigned char type; if (!reader.readByte(type)) return false; if (debug) cout << " : Type: "<< atts.getTypeName((IAttributes::Types)type); len--; switch (type) { case IAttributes::BOOLEAN: if (len == 1) { bool b; if (!reader.readBoolean(b)) return false; atts.set(attributeID, b); len--; } else { // boolean[] array = new boolean[len]; // for (int i=0; i<array.length; i++) { // array[i] = in.readByte() != 0; // len--; // } // atts.set(attributeID, array); } break; case IAttributes::BYTE: if (len == 1) { unsigned char b; if (!reader.readByte(b)) return false; atts.set(attributeID, b); len--; } else { // byte[] array = new byte[len]; // for (int i=0; i<array.length; i++) { // array[i] = in.readByte(); // len--; // } // atts.set(attributeID, array); } break; case IAttributes::CHAR: if (len == 2) { wchar_t c; if (!reader.readChar(c)) return false; atts.set(attributeID, c); len-=2; } else { // char[] array = new char[len/2]; // for (int i=0; i<array.length; i++) { // array[i] = in.readChar(); // len-=2; // } // atts.set(attributeID, array); } break; case IAttributes::DOUBLE: if (len == 8) { double d; if (!reader.readDouble(d)) return false; atts.set(attributeID, d); len -= 8; } else { // double[] array = new double[len/8]; // for (int i=0; i<array.length; i++) { // array[i] = in.readDouble(); // len-=8; // } // atts.set(attributeID, array); } break; case IAttributes::FLOAT: if (len == 4) { float f; if (!reader.readFloat(f)) return false; atts.set(attributeID, f); len -= 4; } else { // float[] array = new float[len/4]; // for (int i=0; i<array.length; i++) { // array[i] = in.readFloat(); // len-=4; // } // atts.set(attributeID, array); } break; case IAttributes::INT: if (len == 4) { int i; if (!reader.readInt(i)) return false; atts.set(attributeID, i); len -= 4; } else { // int[] array = new int[len/4]; // for (int i=0; i<array.length; i++) { // array[i] = in.readInt(); // len-=4; // } // atts.set(attributeID, array); } break; case IAttributes::LONG: if (len == 8) { int64 l; if (!reader.readLong(l)) return false; atts.set(attributeID, l); len -= 8; } else { // long[] array = new long[len/8]; // for (int i=0; i<array.length; i++) { // array[i] = in.readLong(); // len-=8; // } // atts.set(attributeID, array); } break; case IAttributes::SHORT: if (len == 2) { short s; if (!reader.readShort(s)) return false; atts.set(attributeID, s); len -= 2; } else { // short[] array = new short[len/2]; // for (int i=0; i<array.length; i++) { // array[i] = in.readShort(); // len-=2; // } // atts.set(attributeID, array); } break; case IAttributes::STRING: { vector<wstring> vs; while (len != 0) { wstring s; unsigned short utflen; if (!reader.readUTF(s, utflen)) return false; unsigned char b; if (!reader.readByte(b)) return false; // skip NULL termination vs.push_back(s); len -= utflen + 2+ 1; } if (vs.size() == 1) { atts.set(attributeID, vs[0]); } else { // String[] array = new String[vs.size()]; // array = (String[])vs.toArray(array); // atts.set(attributeID, array); } } break; default: cerr << endl << "ERROR No opaque handler for attributeID: "<< attributeID << " part of tagID: "<< tagID << endl; return false; } if (debug) cout << " : Value"; if (len != 0) { cerr << endl << "Skipping "<< len << " unused OPAQUE bytes..."<< endl; while (len != 0) { unsigned char b; if (!reader.readByte(b)) return false; len--; } } return true; }
bool Parser::readAttr(IReader& reader, unsigned int tagID, MutableAttributes& atts) { int intResult; bool hasIntResult = false; wchar_t charResult; int hasCharResult = false; unsigned char id; if (!reader.readByte(id)) return false; int attributeID = -1; while (id != WBXML::END) { // attribute start while (id == WBXML::SWITCH_PAGE) { if (debug) cout << " : Switch Page"; if (!reader.readByte(attributePage)) return false; if (debug) cout << " : AttributePage: " << (unsigned int)attributePage; if (!reader.readByte(id)) return false; } attributeID = getAttributeId(id); if (debug) cout << " : AttributeID (0x" << hex << attributeID << "): " << tagHandler.getAttribute(attributeID); vector<wstring> value; // attribute value(s) if (!reader.readByte(id)) return false; while (id > 128|| id == WBXML::SWITCH_PAGE || id == WBXML::ENTITY || id == WBXML::STR_I || id == WBXML::STR_T|| (id >= WBXML::EXT_I_0 && id <= WBXML::EXT_I_2)|| (id >= WBXML::EXT_T_0 && id <= WBXML::EXT_T_2)) { switch (id) { case WBXML::SWITCH_PAGE: if (debug) cout << " : Switch Page"; if (!reader.readByte(attributePage)) return false; if (debug) cout << " : AttributePage: " << (unsigned int)attributePage; break; case WBXML::ENTITY: if (debug) cout << " : Entity"; if (!reader.readMultiByteInt((unsigned int&)charResult)) return false; if (debug) cout << " : " << charResult; hasCharResult = true; value.push_back(&charResult); break; case WBXML::STR_I: { if (debug) cout << " : STR_I"; wstring s; if (!reader.readStrI(s)) return false; value.push_back(s); break; } case WBXML::EXT_I_0: case WBXML::EXT_I_1: case WBXML::EXT_I_2: case WBXML::EXT_T_0: case WBXML::EXT_T_1: case WBXML::EXT_T_2: case WBXML::EXT_0: case WBXML::EXT_1: case WBXML::EXT_2: case WBXML::OPAQUE: if (!handleExtensions(reader, id, tagID, attributeID, atts, value)) return false; break; case WBXML::STR_T: { if (debug) cout << " : STR_T"; wstring s; unsigned int pos; if (!reader.readStrT(s, pos)) return false; value.push_back(s); break; } default: intResult = getAttributeId(id); hasIntResult = true; wstringstream s; s << intResult; value.push_back(s.str()); break; } if (!reader.readByte(id)) return false; } switch (value.size()) { case 0: // already handled break; case 1: if (hasIntResult) { atts.set(attributeID, intResult); } else if (hasCharResult) { atts.set(attributeID, charResult); } else { atts.set(attributeID, value[0]); } break; default: atts.set(attributeID, value); break; } attributeID = -1; } if (debug) cout << " : AttributeEnd"; return true; }