// basic function to check if the header is ok bool DosWordParser::checkHeader(WPSHeader *header, bool /*strict*/) { RVNGInputStreamPtr input = getInput(); if (!input || !checkFilePosition(0x100)) { WPS_DEBUG_MSG(("DosWordParser::checkHeader: file is too short\n")); return false; } input->seek(DosWordParserInternal::HEADER_B_ASV, librevenge::RVNG_SEEK_SET); if (libwps::readU8(input) & 2) { WPS_DEBUG_MSG(("DosWordParser::checkHeader: file is autosaved\n")); return false; } input->seek(DosWordParserInternal::HEADER_B_VERSION, librevenge::RVNG_SEEK_SET); uint8_t ver = libwps::readU8(input); switch (ver) { case 0: WPS_DEBUG_MSG(("DosWordParser::checkHeader: version 4.0 or earlier\n")); header->setMajorVersion(4); break; case 3: WPS_DEBUG_MSG(("DosWordParser::checkHeader: version 5 OEM\n")); header->setMajorVersion(5); break; case 4: WPS_DEBUG_MSG(("DosWordParser::checkHeader: version 5\n")); header->setMajorVersion(5); break; case 7: WPS_DEBUG_MSG(("DosWordParser::checkHeader: version 5.5\n")); header->setMajorVersion(5); break; case 9: WPS_DEBUG_MSG(("DosWordParser::checkHeader: version 6.0\n")); header->setMajorVersion(6); break; default: WPS_DEBUG_MSG(("DosWordParser::checkHeader: unknown version %u\n", ver)); break; } input->seek(DosWordParserInternal::HEADER_W_CODEPAGE, librevenge::RVNG_SEEK_SET); uint16_t codepage = libwps::readU16(input); if (!codepage) header->setNeedEncoding(true); return true; }
bool LotusParser::checkFilePosition(long pos) { if (m_state->m_eof < 0) { RVNGInputStreamPtr input = getInput(); long actPos = input->tell(); input->seek(0, librevenge::RVNG_SEEK_END); m_state->m_eof=input->tell(); input->seek(actPos, librevenge::RVNG_SEEK_SET); } return pos <= m_state->m_eof; }
unsigned long getLength(const RVNGInputStreamPtr &input) { checkStream(input); const long begin = input->tell(); if (0 > begin) throw SeekFailedException(); long end = begin; if (0 == input->seek(0, librevenge::RVNG_SEEK_END)) end = input->tell(); else { // RVNG_SEEK_END does not work. Use the harder way. while (!input->isEnd()) { readU8(input); ++end; } } seek(input, begin); assert(begin <= end); return static_cast<unsigned long>(end - begin); }
void seekRelative(const RVNGInputStreamPtr &input, const long pos) { checkStream(input); if (0 != input->seek(pos, librevenge::RVNG_SEEK_CUR)) throw SeekFailedException(); }
void seek(const RVNGInputStreamPtr &input, const unsigned long pos) { checkStream(input); if (0 != input->seek(static_cast<long>(pos), librevenge::RVNG_SEEK_SET)) throw SeekFailedException(); }
bool LotusParser::readFMTFontName() { libwps::DebugStream f; RVNGInputStreamPtr input = getInput(); long pos = input->tell(); int type = (int) libwps::read16(input); if (type!=0xae) { WPS_DEBUG_MSG(("LotusParser::readFMTFontName: not a font name definition\n")); return false; } long sz = (long) libwps::readU16(input); long endPos=pos+4+sz; f << "Entries(FontFMTName):"; if (sz < 2) { WPS_DEBUG_MSG(("LotusParser::readFMTFontName: the zone is too short\n")); f << "###"; ascii().addPos(pos); ascii().addNote(f.str().c_str()); return true; } int id=(int) libwps::readU8(input); f << "id=" << id << ","; bool nameOk=true; std::string name(""); for (long i=1; i<sz; ++i) { char c=(char) libwps::readU8(input); if (!c) break; if (nameOk && !(c==' ' || (c>='0'&&c<='9') || (c>='a'&&c<='z') || (c>='A'&&c<='Z'))) { nameOk=false; WPS_DEBUG_MSG(("LotusParser::readFMTFontName: find odd character in name\n")); f << "#"; } name += c; } f << name << ","; if (input->tell()!=endPos) { WPS_DEBUG_MSG(("LotusParser::readFMTFontName: find extra data\n")); f << "###extra"; input->seek(endPos, librevenge::RVNG_SEEK_SET); } ascii().addPos(pos); ascii().addNote(f.str().c_str()); return true; }
libwps_tools_win::Font::Type DosWordParser::getFileEncoding(libwps_tools_win::Font::Type encoding) { RVNGInputStreamPtr input = getInput(); input->seek(DosWordParserInternal::HEADER_W_CODEPAGE, librevenge::RVNG_SEEK_SET); uint16_t codepage = libwps::readU16(input); WPS_DEBUG_MSG(("DosWordParser::getFileEncoding: codepage %u\n", codepage)); if (codepage) encoding = libwps_tools_win::Font::getTypeForOEM(codepage); if (encoding == libwps_tools_win::Font::UNKNOWN) encoding = libwps_tools_win::Font::CP_437; return encoding; }
// try to read a data : which can be an item, a list or unknown zone bool readBlockData(RVNGInputStreamPtr input, long endPos, FileData &dt, std::string &error) { std::string saveError = error; long actPos = input->tell(); dt.m_recursData.resize(0); if (actPos+2 > endPos) // to short { error += FileData::createErrorString(input, endPos); return false; } dt.m_value = libwps::readU16(input); // normally 0, but who know ... dt.m_beginOffset = dt.m_endOffset = -1; int prevId = -1; bool ok = true; while (input->tell() != endPos) { FileData child; if (!readData(input, endPos, child, error)) { ok = false; break; } if (child.isBad()) continue; if (prevId > child.id()) { ok = false; break; } prevId = child.id(); dt.m_recursData.push_back(child); } if (ok) return true; if (dt.m_type == -1) dt.m_type = 0x80; dt.m_beginOffset = actPos; dt.m_endOffset = endPos; dt.m_input = input; error = saveError; input->seek(endPos, librevenge::RVNG_SEEK_SET); return false; }
//////////////////////////////////////////////////////////// // generic //////////////////////////////////////////////////////////// bool LotusParser::readMacFontName(long endPos) { libwps::DebugStream f; RVNGInputStreamPtr input = getInput(); const int vers=version(); long pos = input->tell(); long sz=endPos-pos; f << "Entries(MacFontName):"; if ((vers<=1 && sz<7) || (vers>1 && sz!=42)) { WPS_DEBUG_MSG(("LotusParser::readMacFontName: the zone size seems bad\n")); f << "###"; ascii().addPos(pos-6); ascii().addNote(f.str().c_str()); return true; } if (vers<=1) { // seems only to exist in a lotus mac file, so revert the default encoding to MacRoman if undef if (m_state->m_fontType==libwps_tools_win::Font::UNKNOWN) m_state->m_fontType=libwps_tools_win::Font::MAC_ROMAN; int id=(int) libwps::readU16(input); f << "FN" << id << ","; int val=(int) libwps::readU16(input); // always 0? if (val) f << "f0=" << val << ","; val=(int) libwps::read16(input); // find -1, 30 (Geneva), 60 (Helvetica) if (val) f << "f1=" << val << ","; librevenge::RVNGString name(""); bool nameOk=true; for (int i=0; i<sz-6; ++i) { char c=(char) libwps::readU8(input); if (!c) break; if (nameOk && !(c==' ' || (c>='0'&&c<='9') || (c>='a'&&c<='z') || (c>='A'&&c<='Z'))) { nameOk=false; WPS_DEBUG_MSG(("LotusParser::readMacFontName: find odd character in name\n")); f << "#"; } name.append(c); } f << name.cstr() << ","; if (m_state->m_fontsMap.find(id)!=m_state->m_fontsMap.end()) { WPS_DEBUG_MSG(("LotusParser::readMacFontName: a font with id=%d already exists\n", id)); f << "###id,"; } else if (nameOk && !name.empty()) { libwps_tools_win::Font::Type encoding=name!="Symbol" ? libwps_tools_win::Font::MAC_ROMAN : libwps_tools_win::Font::MAC_SYMBOL; LotusParserInternal::Font font(encoding); font.m_name=name; m_state->m_fontsMap.insert(std::map<int, LotusParserInternal::Font>::value_type(id,font)); } ascii().addPos(pos-6); ascii().addNote(f.str().c_str()); return true; } for (int i=0; i<4; ++i) { int val=(int) libwps::read8(input); // 0|1 if (val) f << "fl" << i << "=" << val << ","; } for (int i=0; i<2; ++i) // f1=0|1288 { int val=(int) libwps::read16(input); if (val) f << "f" << i << "=" << val << ","; } std::string name(""); for (int i=0; i<8; ++i) { char c=(char) libwps::read8(input); if (!c) break; name+=c; } f << name << ","; input->seek(pos+16, librevenge::RVNG_SEEK_SET); if (input->tell()!=endPos) { ascii().addDelimiter(input->tell(),'|'); input->seek(endPos, librevenge::RVNG_SEEK_SET); } ascii().addPos(pos-6); ascii().addNote(f.str().c_str()); return true; }
bool LotusParser::readChartDefinition() { libwps::DebugStream f; RVNGInputStreamPtr input = getInput(); long pos = input->tell(); long type = (long) libwps::read16(input); if (type != 0x11) { WPS_DEBUG_MSG(("LotusParser::readChartDefinition: not a chart name\n")); return false; } long sz = (long) libwps::readU16(input); long endPos=pos+4+sz; f << "Entries(ChartDef):"; if (sz != 0xB2) { WPS_DEBUG_MSG(("LotusParser::readChartDefinition: chart name is too short\n")); f << "###"; ascii().addPos(pos); ascii().addNote(f.str().c_str()); return true; } f << "id=" << (int) libwps::readU8(input) << ","; std::string name(""); for (int i=0; i<16; ++i) { char c=(char) libwps::readU8(input); if (!c) break; name += c; } if (!name.empty()) f << name << ","; input->seek(pos+4+17, librevenge::RVNG_SEEK_SET); for (int i=0; i<43; ++i) // small number { int val=(int) libwps::read8(input); if (val) f << "f" << i << "=" << val << ","; } ascii().addPos(pos); ascii().addNote(f.str().c_str()); pos=input->tell(); f.str(""); f << "ChartDef-A:"; for (int i=0; i<28; ++i) // small number expect f24=0|4|64 { int val=(int) libwps::read8(input); if (val) f << "f" << i << "=" << val << ","; } for (int i=0; i<9; ++i) // small number expect g0=1|2, g1=g2=g3=1|14|20, g4=g5=0|1 { int val=(int) libwps::read16(input); if (val) f << "g" << i << "=" << val << ","; } ascii().addPos(pos); ascii().addNote(f.str().c_str()); pos=input->tell(); f.str(""); f << "ChartDef-B:"; if (input->tell()!=endPos) input->seek(endPos, librevenge::RVNG_SEEK_SET); ascii().addPos(pos); ascii().addNote(f.str().c_str()); return true; }
bool LotusParser::readDataZone() { libwps::DebugStream f; RVNGInputStreamPtr input = getInput(); long pos = input->tell(); int type = (int) libwps::readU16(input); long sz = (long) libwps::readU16(input); long endPos=pos+4+sz; if (type!=0x1b || sz<2) { WPS_DEBUG_MSG(("LotusParser::readDataZone: the zone seems odd\n")); input->seek(pos, librevenge::RVNG_SEEK_SET); return false; } type = (int) libwps::readU16(input); f << "Entries(Data" << std::hex << type << std::dec << "E):"; bool isParsed=false, needWriteInAscii=false; sz-=2; int val; switch (type) { // // mac windows // case 0x7d2: { f.str(""); f << "Entries(WindowsMacDef):"; if (sz<26) { WPS_DEBUG_MSG(("LotusParser::readDataZone: the windows definition seems bad\n")); f << "###"; break; } val=(int) libwps::readU8(input); if (val) f << "id=" << val << ","; val=(int) libwps::read8(input); // find 0|2 if (val) f << "f0=" << val << ","; int dim[4]; for (int i=0; i<4; ++i) { dim[i]=(int) libwps::read16(input); val=(int) libwps::read16(input); if (!val) continue; if (i) f << "num[split]=" << val << ","; else f << "dim" << i << "[h]=" << val << ","; } f << "dim=" << Box2i(Vec2i(dim[0],dim[1]),Vec2i(dim[2],dim[3])) << ","; for (int i=0; i<8; ++i) // small value or 100 { val=(int) libwps::read8(input); if (val) f << "f" << i+1 << "=" << val << ","; } isParsed=needWriteInAscii=true; int remain=int(sz-26); if (remain<=1) break; std::string name(""); for (int i=0; i<remain; ++i) name+=(char) libwps::readU8(input); f << name << ","; break; } case 0x7d3: { f.str(""); f << "Entries(WindowsMacSplit):"; if (sz<24) { WPS_DEBUG_MSG(("LotusParser::readDataZone: the windows split seems bad\n")); f << "###"; break; } val=(int) libwps::readU8(input); if (val) f << "id=" << val << ","; val=(int) libwps::readU8(input); if (val) f << "split[id]=" << val << ","; for (int i=0; i<3; ++i) // 0 or 1 { val=(int) libwps::read8(input); if (val) f << "f" << i+1 << "=" << val << ","; } int dim[4]; for (int i=0; i<4; ++i) { val=(int) libwps::read16(input); dim[i]=(int) libwps::read16(input); if (val) f << "dim" << i <<"[h]=" << val << ","; } f << "dim=" << Box2i(Vec2i(dim[0],dim[1]),Vec2i(dim[2],dim[3])) << ","; for (int i=0; i<3; ++i) { static int const expected[]= {0,-1,25}; val=(int) libwps::read8(input); if (val!=expected[i]) f << "g" << i << "=" << val << ","; } isParsed=needWriteInAscii=true; break; } case 0x7d4: { f.str(""); f << "Entries(WindowsMacUnkn0)"; if (sz<5) { WPS_DEBUG_MSG(("LotusParser::readDataZone: the windows unkn0 seems bad\n")); f << "###"; break; } for (int i=0; i<4; ++i) // always 2,1,1,2 ? { val=(int) libwps::read8(input); if (val) f << "f" << i << "=" << val << ","; } isParsed=needWriteInAscii=true; int remain=int(sz-4); if (remain<=1) break; std::string name(""); for (int i=0; i<remain; ++i) // always LMBCS 1.2? name+=(char) libwps::readU8(input); f << name << ","; break; } case 0x7d5: // frequently followed by Lotus13 block and SheetRow, ... f.str(""); f << "Entries(SheetBegin):"; if (sz!=11) { WPS_DEBUG_MSG(("LotusParser::readDataZone: the sheet begin zone seems bad\n")); f << "###"; break; } // time to update the style manager state m_styleManager->updateState(); val=(int) libwps::readU8(input); if (val) f << "sheet[id]=" << val << ","; // then always 0a3fff00ffff508451ff ? isParsed=needWriteInAscii=true; break; case 0x7d7: isParsed=m_spreadsheetParser->readRowSizes(endPos); break; case 0x7d8: case 0x7d9: { f.str(""); int dataSz=type==0x7d8 ? 1 : 2; if (type==0x7d8) f << "Entries(ColMacBreak):"; else f << "Entries(RowMacBreak):"; if (sz<4 || (sz%dataSz)) { WPS_DEBUG_MSG(("LotusParser::readDataZone: the page mac break seems bad\n")); f << "###"; break; } val=(int) libwps::readU8(input); if (val) f << "sheet[id]=" << val << ","; val=(int) libwps::readU8(input); // always 0 if (val) f << "f0=" << val << ","; f << "break=["; int N=int((sz-2)/dataSz); for (int i=0; i<N; ++i) { if (dataSz==1) f << (int) libwps::readU8(input) << ","; else f << libwps::readU16(input) << ","; } f << "],"; isParsed=needWriteInAscii=true; break; } // // selection // case 0xbb8: f.str(""); f << "Entries(MacSelect):"; if (sz!=18) { WPS_DEBUG_MSG(("LotusParser::readDataZone: the mac selection seems bad\n")); f << "###"; break; } for (int i=0; i<3; ++i) // f0=0, f1=f2=1 { val=(int) libwps::read16(input); if (val) f << "f" << i << "=" << val << ","; } for (int i=0; i<3; ++i) { int row=(int) libwps::readU16(input); int sheet=(int) libwps::readU8(input); int col=(int) libwps::readU8(input); f << "C" << col << "-" << row; if (sheet) f << "[" << sheet << "],"; else f << ","; } isParsed=needWriteInAscii=true; break; // // style // case 0xfa0: isParsed=m_styleManager->readFontStyle(endPos); break; case 0xfaa: isParsed=m_styleManager->readLineStyle(endPos); break; case 0xfb4: isParsed=m_styleManager->readColorStyle(endPos); break; case 0xfbe: isParsed=m_styleManager->readFormatStyle(endPos); break; case 0xfc8: isParsed=m_styleManager->readGraphicStyle(endPos); break; case 0xfd2: isParsed=m_styleManager->readCellStyle(endPos); break; case 0xfdc: isParsed=readMacFontName(endPos); break; // 0xfd2: id, ..., colorid // // graphic // case 0x2328: isParsed=m_graphParser->readZoneBegin(endPos); break; case 0x2332: // line case 0x2346: // rect, rectoval, rect case 0x2350: // arac case 0x2352: // rect shadow case 0x23f0: // frame isParsed=m_graphParser->readZoneData(endPos, type); break; case 0x23fa: // textbox data isParsed=m_graphParser->readTextBoxData(endPos); break; // // mac pict // case 0x240e: isParsed=m_graphParser->readPictureDefinition(endPos); break; case 0x2410: isParsed=m_graphParser->readPictureData(endPos); break; // // mac printer // case 0x2af8: isParsed=readDocumentInfoMac(endPos); break; case 0x2afa: f.str(""); f << "Entries(PrinterMacUnkn1):"; if (sz!=3) { WPS_DEBUG_MSG(("LotusParser::readDataZone: the printer unkn1 seems bad\n")); f << "###"; break; } for (int i=0; i<3; ++i) { val=(int) libwps::readU8(input); static int const expected[]= {0x1f, 0xe0, 0/*or 1*/}; if (val!=expected[i]) f << "f" << i << "=" << val << ","; } isParsed=needWriteInAscii=true; break; case 0x2afb: { f.str(""); f << "Entries(PrinterMacName):"; if (sz<3) { WPS_DEBUG_MSG(("LotusParser::readDataZone: the printername seems bad\n")); f << "###"; break; } val=(int) libwps::read16(input); if (val!=20) f << "f0=" << val << ","; std::string name(""); for (int i=4; i<sz; ++i) { char c=(char) libwps::readU8(input); if (!c) break; name+=c; } f << name << ","; isParsed=needWriteInAscii=true; break; } case 0x2afc: f.str(""); f << "Entries(PrintMacInfo):"; if (sz<120) { WPS_DEBUG_MSG(("LotusParser::readDataZone: the printinfo seems bad\n")); f << "###"; break; } isParsed=needWriteInAscii=true; break; // // 4268, 4269 // default: break; } if (!isParsed || needWriteInAscii) { ascii().addPos(pos); ascii().addNote(f.str().c_str()); } if (input->tell()!=endPos) ascii().addDelimiter(input->tell(),'|'); input->seek(endPos, librevenge::RVNG_SEEK_SET); return true; }
//////////////////////////////////////////////////////////// // read data //////////////////////////////////////////////////////////// bool WPSTextParser::readFDP(WPSEntry const &entry, std::vector<DataFOD> &fods, WPSTextParser::FDPParser parser) { RVNGInputStreamPtr input = getInput(); if (entry.length() <= 0 || entry.begin() <= 0) { WPS_DEBUG_MSG(("WPSTextParser::readFDP: warning: FDP entry unintialized\n")); return false; } entry.setParsed(); long page_offset = entry.begin(); long length = entry.length(); long endPage = entry.end(); bool smallFDP = version() < 5; int deplSize = smallFDP ? 1 : 2; int headerSize = smallFDP ? 4 : 8; if (length < headerSize) { WPS_DEBUG_MSG(("WPSTextParser::readFDP: warning: FDP offset=0x%lx, length=0x%lx\n", (unsigned long) page_offset, (unsigned long) length)); return false; } libwps::DebugStream f, f2; if (smallFDP) { endPage--; input->seek(endPage, librevenge::RVNG_SEEK_SET); } else input->seek(page_offset, librevenge::RVNG_SEEK_SET); uint16_t cfod = deplSize == 1 ? (uint16_t) libwps::readU8(m_input) : libwps::readU16(m_input); f << "FDP: N="<<(int) cfod; if (smallFDP) input->seek(page_offset, librevenge::RVNG_SEEK_SET); else f << ", unk=" << libwps::read16(m_input); if (headerSize+(4+deplSize)*static_cast<long>(cfod) > length) { WPS_DEBUG_MSG(("WPSTextParser::readFDP: error: cfod = %i (0x%X)\n", cfod, (unsigned) cfod)); return false; } int firstFod = int(fods.size()); long lastLimit = firstFod ? fods.back().m_pos : 0; long lastReadPos = 0L; DataFOD::Type type = DataFOD::ATTR_UNKN; if (entry.hasType("FDPC")) type = DataFOD::ATTR_TEXT; else if (entry.hasType("FDPP")) type = DataFOD::ATTR_PARAG; else { WPS_DEBUG_MSG(("WPSTextParser::readFDP: FDP error: unknown type = '%s'\n", entry.type().c_str())); } /* Read array of fcLim of FODs. The fcLim refers to the offset of the last character covered by the formatting. */ for (int i = 0; i <= cfod; ++i) { DataFOD fod; fod.m_type = type; fod.m_pos = (long) libwps::readU32(m_input); if (fod.m_pos == 0) fod.m_pos=m_textPositions.begin(); /* check that fcLim is not too large */ if (fod.m_pos > m_textPositions.end()) { WPS_DEBUG_MSG(("WPSTextParser::readFDP: error: length of 'text selection' %ld > " "total text length %ld\n", fod.m_pos, m_textPositions.end())); return false; } /* check that pos is monotonic */ if (lastLimit > fod.m_pos) { WPS_DEBUG_MSG(("WPSTextParser::readFDP: error: character position list must " "be monotonic, but found %ld, %ld\n", lastLimit, fod.m_pos)); return false; } lastLimit = fod.m_pos; if (i != cfod) fods.push_back(fod); else // ignore the last text position lastReadPos = fod.m_pos; } std::vector<DataFOD>::iterator fods_iter; /* Read array of bfprop of FODs. The bfprop is the offset where the FPROP is located. */ f << ", Tpos:defP=("; for (fods_iter = fods.begin() + firstFod; fods_iter!= fods.end(); ++fods_iter) { unsigned depl = deplSize == 1 ? libwps::readU8(m_input) : libwps::readU16(m_input); /* check size of bfprop */ if ((depl < unsigned(headerSize+(4+deplSize)*cfod) && depl > 0) || page_offset+long(depl) > endPage) { WPS_DEBUG_MSG(("WPSTextParser::readFDP: error: pos of bfprop is bad " "%u (0x%X)\n", depl, depl)); return false; } if (depl) (*fods_iter).m_defPos = long(depl) + page_offset; } ascii().addPos(input->tell()); std::map<long,int> mapPtr; bool smallSzInProp = smallFDP; for (fods_iter = fods.begin() + firstFod; fods_iter!= fods.end(); ++fods_iter) { long pos = (*fods_iter).m_defPos; f << std::hex << (*fods_iter).m_pos << std::dec << ":"; if (pos == 0) { f << "_, "; continue; } std::map<long,int>::iterator it= mapPtr.find(pos); if (it != mapPtr.end()) { (*fods_iter).m_id = mapPtr[pos]; f << entry.type() << (*fods_iter).m_id << ", "; continue; } input->seek(pos, librevenge::RVNG_SEEK_SET); int szProp = smallSzInProp ? libwps::readU8(m_input) : libwps::readU16(m_input); if (smallSzInProp) szProp++; if (szProp == 0) { WPS_DEBUG_MSG(("WPSTextParser::readFDP: error: 0 == szProp at file offset 0x%lx\n", (unsigned long)(input->tell()-1))); return false; } long endPos = pos+szProp; if (endPos > endPage) { WPS_DEBUG_MSG(("WPSTextParser::readFDP: error: cch = %d, too large\n", szProp)); return false; } int id; std::string mess; if (parser &&(this->*parser)(endPos, id, mess)) { (*fods_iter).m_id = mapPtr[pos] = id; f2.str(""); f2 << entry.type() << id <<":" << mess; ascii().addPos(pos); ascii().addNote(f2.str().c_str()); pos = input->tell(); } f << entry.type() << (*fods_iter).m_id << ", "; if (pos != endPos) { f2.str(""); f2 << entry.type() << "###"; ascii().addPos(pos); ascii().addNote(f2.str().c_str()); } } f << "), lstPos=" << std::hex << lastReadPos << std::dec << ", "; ascii().addPos(page_offset); ascii().addNote(f.str().c_str()); /* go to end of page */ input->seek(endPage, librevenge::RVNG_SEEK_SET); return m_textPositions.end() > lastReadPos; }
bool LotusParser::readZone() { libwps::DebugStream f; RVNGInputStreamPtr input = getInput(); long pos = input->tell(); int id = (int) libwps::readU8(input); int type = (int) libwps::readU8(input); long sz = (long) libwps::readU16(input); long endPos=pos+4+sz; if ((type>0x2a) || sz<0 || !checkFilePosition(endPos)) { input->seek(pos, librevenge::RVNG_SEEK_SET); return false; } f << "Entries(Lotus"; if (type) f << std::hex << type << std::dec << "A"; f << std::hex << id << std::dec << "E):"; bool ok = true, isParsed = false, needWriteInAscii = false; int val; input->seek(pos, librevenge::RVNG_SEEK_SET); switch (type) { case 0: switch (id) { case 0: { if (sz!=26) { ok=false; break; } input->seek(pos+4, librevenge::RVNG_SEEK_SET); f.str(""); f << "Entries(BOF):"; val=(int) libwps::readU16(input); m_state->m_inMainContentBlock=false; if (val==0x8007) f << "FMT,"; else if (val>=0x1000 && val <= 0x1005) { m_state->m_inMainContentBlock=true; f << "version=" << (val-0x1000) << ","; } else f << "#version=" << std::hex << val << std::dec << ","; for (int i=0; i<4; ++i) // f0=4, f3 a small number { val=(int) libwps::read16(input); if (val) f << "f" << i << "=" << val << ","; } val=(int) libwps::readU8(input); if (m_state->m_inMainContentBlock) { m_spreadsheetParser->setLastSpreadsheetId(val); m_state->m_maxSheet=val; } if (val && m_state->m_inMainContentBlock) f << "max[sheet]=" << val << ","; else if (val) f << "max[fmt]=" << val << ","; for (int i=0; i<7; ++i) // g0/g1=0..fd, g2=0|4, g3=0|5|7|1e|20|30, g4=0|8c|3d, g5=1|10, g6=2|a { val=(int) libwps::readU8(input); if (val) f << "g" << i << "=" << std::hex << val << std::dec << ","; } isParsed=needWriteInAscii=true; break; } case 0x1: // EOF ok = false; break; case 0x3: if (sz!=6) { ok=false; break; } input->seek(pos+4, librevenge::RVNG_SEEK_SET); for (int i=0; i<3; ++i) // f0=1, f2=1|32 { val=(int) libwps::read16(input); if (val) f << "f" << i << "=" << val << ","; } isParsed=needWriteInAscii=true; break; case 0x4: if (sz!=28) { ok=false; break; } input->seek(pos+4, librevenge::RVNG_SEEK_SET); for (int i=0; i<2; ++i) // f0=1|3, f1=1 { val=(int) libwps::read8(input); if (val!=1) f << "f" << i << "=" << val << ","; } for (int i=0; i<2; ++i) // f2=1-3, f1=0|1 { val=(int) libwps::read16(input); if (val) f << "f" << i+1 << "=" << val << ","; } isParsed=needWriteInAscii=true; break; case 0x5: { f.str(""); f << "Entries(SheetUnknA):"; if (sz!=16) { ok=false; break; } input->seek(pos+4, librevenge::RVNG_SEEK_SET); val=(int) libwps::readU8(input); if (val) f << "sheet[id]=" << val << ","; val=(int) libwps::read8(input); // always 0? if (val) f << "f0=" << val << ","; isParsed=needWriteInAscii=true; break; } case 0x6: // one by sheet f.str(""); f << "Entries(SheetUnknB):"; if (sz!=5) { ok=false; break; } input->seek(pos+4, librevenge::RVNG_SEEK_SET); val=(int) libwps::readU8(input); if (val) f << "sheet[id]=" << val << ","; for (int i=0; i<4; ++i) // f0=0, f2=0|1, f3=7-9 { val=(int) libwps::read8(input); // always 0? if (val) f << "f" << i << "=" << val << ","; } isParsed=needWriteInAscii=true; break; case 0x7: ok=isParsed=m_spreadsheetParser->readColumnSizes(); break; case 0x9: ok=isParsed=m_spreadsheetParser->readCellName(); break; case 0xa: ok=isParsed=readLinkZone(); break; case 0xb: // 0,1,-1 case 0x1e: // always with 0 case 0x21: if (sz!=1) { ok=false; break; } input->seek(pos+4, librevenge::RVNG_SEEK_SET); val=(int) libwps::read8(input); if (val==1) f << "true,"; else if (val) f << "val=" << val << ","; break; case 0xc: // find 0 or 4 int with value 0|1|ff input->seek(pos+4, librevenge::RVNG_SEEK_SET); for (int i=0; i<sz; ++i) { val=(int) libwps::read8(input); if (val==1) f << "f" << i << ","; else if (val) f << "f" << i << "=" << val << ","; } isParsed=needWriteInAscii=true; break; case 0xe: if (sz<30) { ok=false; break; } input->seek(pos+4, librevenge::RVNG_SEEK_SET); for (int i=0; i<30; ++i) // f7=0|f, f8=0|60, f9=0|54, f17=80, f18=0|ff, f19=3f|40, f26=0|f8, f27=80|ff, f28=b|c,f29=40 { val=(int) libwps::read8(input); if (val) f << "f" << i << "=" << val << ","; } if (sz>=32) { val=(int) libwps::read16(input); // always 1? if (val!=1) f << "f30=" << val << ","; } isParsed=needWriteInAscii=true; break; case 0xf: if (sz<0x56) { ok=false; break; } input->seek(pos+4, librevenge::RVNG_SEEK_SET); val=(int) libwps::read8(input); // 1|2 if (val!=1) f << "f0=" << val << ","; for (int i=0; i<3; ++i) { long actPos=input->tell(); std::string name(""); for (int j=0; j<16; ++j) { char c=(char) libwps::readU8(input); if (!c) break; name += c; } if (!name.empty()) f << "str" << i << "=" << name << ","; input->seek(actPos+16, librevenge::RVNG_SEEK_SET); } for (int i=0; i<17; ++i) // f2=f11=1,f15=0|1, f16=0|2, f17=0|1|2 { val=(int) libwps::read8(input); if (val) f << "f" << i+1 << "=" << val << ","; } for (int i=0; i<10; ++i) // g0=0|1,g1=Ø|1, g2=4|a, g3=4c|50|80, g4=g5=0|2, g6=42, g7=41|4c, g8=3c|42|59 { val=(int) libwps::read16(input); if (val) f << "g" << i << "=" << val << ","; } isParsed=needWriteInAscii=true; break; case 0x10: // CHECKME { if (sz<3) { ok=false; break; } f.str(""); f << "Entries(Macro):"; input->seek(pos+4, librevenge::RVNG_SEEK_SET); for (int i=0; i<2; ++i) { val=(int) libwps::readU8(input); if (val) f << "f" << i << "=" << val << ","; } std::string data(""); for (int i=2; i<sz; ++i) { char c=(char) libwps::readU8(input); if (!c) break; data += c; } if (!data.empty()) f << "data=" << data << ","; if (input->tell()!=endPos && input->tell()+1!=endPos) { WPS_DEBUG_MSG(("LotusParser::readZone: the string zone %d seems too short\n", id)); f << "###"; } isParsed=needWriteInAscii=true; break; } case 0x11: ok=isParsed=readChartDefinition(); break; case 0x12: ok=isParsed=readChartName(); break; case 0x13: isParsed=m_spreadsheetParser->readRowFormats(); break; case 0x15: case 0x1d: if (sz!=4) { WPS_DEBUG_MSG(("LotusParser::readZone: size of zone%d seems bad\n", id)); f << "###"; break; } input->seek(pos+4, librevenge::RVNG_SEEK_SET); val=(int) libwps::read16(input); // small number 6-c maybe a style if (val) f << "f0=" << val << ","; for (int i=0; i<2; ++i) // zone15: f1=3, f2=2-5, zone 1d: always 0 { val=(int) libwps::readU8(input); if (val) f << "f" << i+1 << "=" << val << ","; } isParsed=needWriteInAscii=true; break; case 0x16: // the cell text case 0x17: // double10 cell case 0x18: // uint16 double cell case 0x19: // double10+formula case 0x1a: // text formula result cell case 0x25: // uint32 double cell case 0x26: // comment cell case 0x27: // double8 cell case 0x28: // double8+formula ok=isParsed=m_spreadsheetParser->readCell(); break; case 0x1b: isParsed=readDataZone(); break; case 0x1c: // always 00002d000000 if (sz!=6) { WPS_DEBUG_MSG(("LotusParser::readZone: size of zone%d seems bad\n", id)); f << "###"; break; } input->seek(pos+4, librevenge::RVNG_SEEK_SET); for (int i=0; i<6; ++i) // some int { val=(int) libwps::readU8(input); if (val) f << "f" << i << "=" << std::hex << val << std::dec << ","; } isParsed=needWriteInAscii=true; break; case 0x1f: isParsed=ok=m_spreadsheetParser->readColumnDefinition(); break; case 0x23: isParsed=ok=m_spreadsheetParser->readSheetName(); break; // case 13: big structure // // format: // case 0xae: isParsed=readFMTFontName(); break; default: input->seek(pos+4, librevenge::RVNG_SEEK_SET); break; } break; default: // checkme: maybe <5 is ok if (version()<=2) { ok=false; break; } break; } if (!ok) { input->seek(pos, librevenge::RVNG_SEEK_SET); return false; } if (sz && input->tell()!=pos && input->tell()!=endPos) ascii().addDelimiter(input->tell(),'|'); input->seek(endPos, librevenge::RVNG_SEEK_SET); if (!isParsed || needWriteInAscii) { ascii().addPos(pos); ascii().addNote(f.str().c_str()); } return true; }
bool LotusParser::readZones() { RVNGInputStreamPtr input = getInput(); // reset data m_styleManager->cleanState(); m_graphParser->cleanState(); m_spreadsheetParser->cleanState(); input->seek(0, librevenge::RVNG_SEEK_SET); bool mainDataRead=false; // data, format and ? for (int wh=0; wh<2; ++wh) { if (input->isEnd()) break; while (readZone()) ; // // look for ending // long pos = input->tell(); if (!checkFilePosition(pos+4)) break; int type = (int) libwps::readU16(input); // 1 int length = (int) libwps::readU16(input); if (type==1 && length==0) { ascii().addPos(pos); ascii().addNote("Entries(EOF)"); if (!mainDataRead) mainDataRead=m_state->m_inMainContentBlock; // end of block, look for other blocks continue; } input->seek(pos, librevenge::RVNG_SEEK_SET); break; } while (!input->isEnd()) { long pos=input->tell(); int id = (int) libwps::readU8(input); int type = (int) libwps::readU8(input); long sz = (long) libwps::readU16(input); if ((type>0x2a) || sz<0 || !checkFilePosition(pos+4+sz)) { input->seek(pos, librevenge::RVNG_SEEK_SET); break; } libwps::DebugStream f; f << "Entries(UnknZon" << std::hex << id << "):"; ascii().addPos(pos); ascii().addNote(f.str().c_str()); input->seek(pos+4+sz, librevenge::RVNG_SEEK_SET); } if (!input->isEnd()) { ascii().addPos(input->tell()); ascii().addNote("Entries(Unknown)"); } return mainDataRead || m_spreadsheetParser->hasSomeSpreadsheetData(); }
//////////////////////////////////////////////////////////// // low level //////////////////////////////////////////////////////////// // read the header //////////////////////////////////////////////////////////// bool LotusParser::checkHeader(WPSHeader *header, bool strict) { *m_state = LotusParserInternal::State(m_state->m_fontType); libwps::DebugStream f; RVNGInputStreamPtr input = getInput(); if (!checkFilePosition(12)) { WPS_DEBUG_MSG(("LotusParser::checkHeader: file is too short\n")); return false; } input->seek(0,librevenge::RVNG_SEEK_SET); int firstOffset = (int) libwps::readU8(input); int type = (int) libwps::read8(input); int val=(int) libwps::read16(input); f << "FileHeader:"; if (firstOffset == 0 && type == 0 && val==0x1a) { m_state->m_version=1; f << "DOS,"; } else { WPS_DEBUG_MSG(("LotusParser::checkHeader: find unexpected first data\n")); return false; } val=(int) libwps::readU16(input); if (val>=0x1000 && val<=0x1002) { WPS_DEBUG_MSG(("LotusParser::checkHeader: find lotus123 file\n")); m_state->m_version=(val-0x1000)+1; f << "lotus123[" << m_state->m_version << "],"; } #ifdef DEBUG else if (val>0x1002 && val<=0x1005) { WPS_DEBUG_MSG(("LotusParser::checkHeader: find lotus123 file\n")); m_state->m_version=(val-0x1000)+1; f << "lotus123[" << m_state->m_version << "],"; } else if (val==0x8007) { WPS_DEBUG_MSG(("LotusParser::checkHeader: find lotus file format, sorry parsing this file is only implemented for debugging, not output will be created\n")); f << "lotus123[FMT],"; } #endif else { WPS_DEBUG_MSG(("LotusParser::checkHeader: unknown lotus 123 header\n")); return false; } input->seek(0, librevenge::RVNG_SEEK_SET); if (strict) { for (int i=0; i < 4; ++i) { if (!readZone()) return false; } } ascii().addPos(0); ascii().addNote(f.str().c_str()); if (header) { header->setMajorVersion(uint8_t(100+m_state->m_version)); header->setCreator(libwps::WPS_LOTUS); header->setKind(libwps::WPS_SPREADSHEET); header->setNeedEncoding(true); } return true; }
bool LotusParser::readLinkZone() { libwps::DebugStream f; RVNGInputStreamPtr input = getInput(); long pos = input->tell(); int type = (int) libwps::read16(input); if (type!=0xa) { WPS_DEBUG_MSG(("LotusParser::readLinkZone: not a link definition\n")); return false; } long sz = (long) libwps::readU16(input); f << "Entries(Link):"; if (sz < 19) { WPS_DEBUG_MSG(("LotusParser::readLinkZone: the zone is too short\n")); f << "###"; ascii().addPos(pos); ascii().addNote(f.str().c_str()); return true; } type=(int) libwps::read8(input); if (type==0) f << "chart,"; else if (type==1) f << "file,"; else { WPS_DEBUG_MSG(("LotusParser::readLinkZone: find unknown type\n")); f << "##type=" << type << ","; ascii().addPos(pos); ascii().addNote(f.str().c_str()); return true; } // maybe too int f << "ID=" << std::hex << libwps::readU16(input) << std::dec << ","; f << "id=" << (int) libwps::readU8(input) << ","; std::string name(""); for (int i=0; i<14; ++i) { char c=(char) libwps::readU8(input); if (!c) break; name += c; } f << "\"" << name << "\","; input->seek(pos+4+18, librevenge::RVNG_SEEK_SET); switch (type) { case 0: if (sz<26) { WPS_DEBUG_MSG(("LotusParser::readLinkZone: the chart zone seems too short\n")); f << "###"; break; } for (int i=0; i<2; ++i) { int row=(int) libwps::readU16(input); int table=(int) libwps::readU8(input); int col=(int) libwps::readU8(input); f << "C" << col << "-" << row; if (table) f << "[" << table << "]"; if (i==0) f << "<->"; else f << ","; } break; case 1: name=""; for (int i=18; i<sz; ++i) { char c=(char) libwps::readU8(input); if (!c) break; name += c; } f << "link=" << name << ","; break; default: WPS_DEBUG_MSG(("LotusParser::readLinkZone: find unknown type\n")); f << "###"; break; } if (input->tell()!=pos+4+sz && input->tell()+1!=pos+4+sz) { WPS_DEBUG_MSG(("LotusParser::readLinkZone: the zone seems too short\n")); f << "##"; ascii().addDelimiter(input->tell(), '|'); } ascii().addPos(pos); ascii().addNote(f.str().c_str()); return true; }
// try to read an item bool readData(RVNGInputStreamPtr input, long endPos, FileData &dt, std::string &/*error*/) { long actPos = input->tell(); dt = FileData(); if (actPos >= endPos) return false; long val = (long) libwps::readU16(input); dt.m_type = int((val & 0xFF00)>>8); dt.m_id = (val & 0xFF); if (dt.m_type & 5) { dt.m_type = -1; return false; } dt.m_value = 0; // what is the meaning of dt.m_type & 0xF // maybe : // 0x1/0x4 -> never seem // 0x2 -> set for the main child ? // 0x8 -> signed/unsigned ? set/unset for bool ? switch (dt.m_type>>4) { case 0: return true; case 1: if (actPos+4 > endPos) break; if (dt.m_type == 0x12) { dt.m_value = libwps::readU8(input); input->seek(1, librevenge::RVNG_SEEK_CUR); } else dt.m_value = libwps::readU16(input); return true; case 2: { if (dt.m_type == 0x2a) // special case : STR4 + long { if (actPos+10 > endPos) break; for (int i = 0; i < 4; i++) dt.m_text += (char) libwps::readU8(input); dt.m_value = libwps::read32(input); return true; } if (actPos+6 > endPos) break; dt.m_value = libwps::read32(input); return true; } case 8: { if (actPos+4 > endPos) break; long extraSize = (long) libwps::readU16(input); long newEndPos = actPos+2+extraSize; if ((extraSize%2) || newEndPos > endPos) break; // can either be a list of data or a structured list, so we stored the information dt.m_beginOffset = actPos+4; dt.m_endOffset = newEndPos; dt.m_input = input; input->seek(newEndPos, librevenge::RVNG_SEEK_SET); return true; } default: break; } dt.m_type = -1; return false; }