UT_Error IE_Imp_PalmDoc::_parseFile(GsfInput * pdfp) { UT_GrowBuf gbBlock(1024); bool bEatLF = false; bool bEmptyFile = true; UT_UCSChar c; UT_UCS4Char wc; pdb_header header; doc_record0 rec0; bool bCompressed = false; int num_records, rec_num; DWord file_size, offset; gsf_input_read( pdfp, PDB_HEADER_SIZE, (guint8*)&header); if (strncmp( header.type, DOC_TYPE, sizeof(header.type) ) || strncmp( header.creator, DOC_CREATOR, sizeof(header.creator) )) { UT_DEBUGMSG(("This is not a DOC file!\n")); // Create an empty paragraph. X_ReturnNoMemIfError(appendStrux(PTX_Block, NULL)); return UT_OK; } num_records = _swap_Word( header.numRecords ) - 1; gsf_input_seek( pdfp, PDB_HEADER_SIZE, G_SEEK_SET ); GET_DWord( pdfp, offset ); gsf_input_seek( pdfp, offset, G_SEEK_SET ); gsf_input_read( pdfp, sizeof(rec0), (guint8*)&rec0); if ( _swap_Word( rec0.version ) == 2 ) bCompressed = true; gsf_input_seek( pdfp, 0, G_SEEK_END ); file_size = gsf_input_tell( pdfp ); for (rec_num = 1; rec_num <= num_records; ++rec_num ) { DWord next_offset; gsf_input_seek( pdfp, PDB_HEADER_SIZE + PDB_RECORD_HEADER_SIZE * rec_num, G_SEEK_SET); GET_DWord( pdfp, offset ); if( rec_num < num_records ) { gsf_input_seek( pdfp, PDB_HEADER_SIZE + PDB_RECORD_HEADER_SIZE * (rec_num + 1), G_SEEK_SET); GET_DWord( pdfp, next_offset ); } else next_offset = file_size; gsf_input_seek( pdfp, offset, G_SEEK_SET ); // be overly cautious here _zero_fill (m_buf->buf, BUFFER_SIZE); gsf_input_read(pdfp, next_offset - offset, m_buf->buf); m_buf->position = next_offset - offset; if ( bCompressed ) _uncompress( m_buf ); m_buf->position = 0; while ( (m_buf->position) < (m_buf->len) ) { // don't copy over null chars if (m_buf->buf[m_buf->position] == '\0') { ++m_buf->position; continue; } if( !m_Mbtowc.mbtowc( wc, m_buf->buf[m_buf->position] ) ) continue; c = static_cast<UT_UCSChar>(wc); switch (c) { case static_cast<UT_UCSChar>('\r'): case static_cast<UT_UCSChar>('\n'): if ((c == static_cast<UT_UCSChar>('\n')) && bEatLF) { bEatLF = false; break; } if (c == static_cast<UT_UCSChar>('\r')) { bEatLF = true; } // we interprete either CRLF, CR, or LF as a paragraph break. // start a paragraph and emit any text that we // have accumulated. X_ReturnNoMemIfError(appendStrux(PTX_Block, NULL)); bEmptyFile = false; if (gbBlock.getLength() > 0) { X_ReturnNoMemIfError(appendSpan(reinterpret_cast<const UT_UCSChar*>(gbBlock.getPointer(0)), gbBlock.getLength())); gbBlock.truncate(0); } break; default: bEatLF = false; X_ReturnNoMemIfError(gbBlock.ins(gbBlock.getLength(),reinterpret_cast<const UT_GrowBufElement *>(&c),1)); break; } ++m_buf->position; } } if (gbBlock.getLength() > 0 || bEmptyFile) { // if we have text left over (without final CR/LF), // or if we read an empty file, // create a paragraph and emit the text now. X_ReturnNoMemIfError(appendStrux(PTX_Block, NULL)); if (gbBlock.getLength() > 0) X_ReturnNoMemIfError(appendSpan(reinterpret_cast<const UT_UCSChar *>(gbBlock.getPointer(0)), gbBlock.getLength())); } return UT_OK; }
void PdbIm::loadFile(QString fname) { FILE *m_pdfp = fopen(fname.toLocal8Bit(), "rb"); pdb_header m_header; DWord file_size, offset; doc_record0 m_rec0; if (!m_pdfp) { QMessageBox::warning(ScCore->primaryMainWindow(), QObject::tr("PDB Import", "PDB Importer"), "<qt>" + QObject::tr("Could not open file %1", "PDB Importer").arg(fname) + "</qt>", QMessageBox::Ok, QMessageBox::NoButton); return; } fread( &m_header, PDB_HEADER_SIZE, 1, m_pdfp ); if (strncmp(m_header.type, DOC_TYPE, sizeof(m_header.type) ) || strncmp( m_header.creator, DOC_CREATOR, sizeof(m_header.creator))) { QMessageBox::warning(ScCore->primaryMainWindow(), QObject::tr("PDB Import", "PDB Importer"), "<qt>" + QObject::tr("This file is not recognized as a PDB document. Please, report this as a bug if you are sure it is one.", "PDB Importer") + "</qt>", QMessageBox::Ok, QMessageBox::NoButton); return; } // progressbar int num_records = swap_Word( m_header.numRecords ) - 1; ScCore->primaryMainWindow()->mainWindowProgressBar->setMaximum(num_records); fseek(m_pdfp, PDB_HEADER_SIZE, SEEK_SET); GET_DWord(m_pdfp, offset); fseek(m_pdfp, offset, SEEK_SET); fread(&m_rec0, sizeof(m_rec0), 1, m_pdfp); if (swap_Word( m_rec0.version ) == 2 ) bCompressed = true; fseek( m_pdfp, 0, SEEK_END ); file_size = ftell( m_pdfp ); for (int rec_num = 1; rec_num <= num_records; ++rec_num ) { DWord next_offset; ScCore->primaryMainWindow()->mainWindowProgressBar->setValue(rec_num); fseek( m_pdfp, PDB_HEADER_SIZE + PDB_RECORD_HEADER_SIZE * rec_num, SEEK_SET); GET_DWord( m_pdfp, offset ); if( rec_num < num_records ) { fseek( m_pdfp, PDB_HEADER_SIZE + PDB_RECORD_HEADER_SIZE * (rec_num + 1), SEEK_SET); GET_DWord( m_pdfp, next_offset ); } else next_offset = file_size; fseek( m_pdfp, offset, SEEK_SET ); // be overly cautious here _zero_fill (m_buf->buf, BUFFER_SIZE); m_buf->position = fread( m_buf->buf, 1, next_offset - offset, m_pdfp ); if ( bCompressed ) uncompress( m_buf ); m_buf->position = 0; while ( (m_buf->position) < (m_buf->len) ) { if (m_buf->buf[m_buf->position] == '\0') { ++m_buf->position; continue; } data += m_buf->buf[m_buf->position]; ++m_buf->position; } } }