UT_Error IE_Imp_PalmDoc::_parseFile(GsfInput * pdfp)
{
	UT_GrowBuf gbBlock(1024);
	bool bEatLF = false;
	bool bEmptyFile = true;
	UT_UCSChar c;
	UT_UCS4Char wc;

	pdb_header	header;
	doc_record0	rec0;
	bool		bCompressed = false;
	int		num_records, rec_num;
	DWord		file_size, offset;

	gsf_input_read( pdfp, PDB_HEADER_SIZE, (guint8*)&header);
	if (strncmp( header.type,    DOC_TYPE,    sizeof(header.type) ) ||
	    strncmp( header.creator, DOC_CREATOR, sizeof(header.creator) ))
        {
		UT_DEBUGMSG(("This is not a DOC file!\n"));

		// Create an empty paragraph.
		X_ReturnNoMemIfError(appendStrux(PTX_Block, NULL));
		return UT_OK;
	}

	num_records = _swap_Word( header.numRecords ) - 1;

	gsf_input_seek( pdfp, PDB_HEADER_SIZE, G_SEEK_SET );
	GET_DWord( pdfp, offset );
	gsf_input_seek( pdfp, offset, G_SEEK_SET );
	gsf_input_read( pdfp, sizeof(rec0), (guint8*)&rec0);

	if ( _swap_Word( rec0.version ) == 2 )
		bCompressed = true;

	gsf_input_seek( pdfp, 0, G_SEEK_END );
	file_size = gsf_input_tell( pdfp );

	for (rec_num = 1; rec_num <= num_records; ++rec_num )
	{
		DWord next_offset;

		gsf_input_seek( pdfp, PDB_HEADER_SIZE + PDB_RECORD_HEADER_SIZE * rec_num, G_SEEK_SET);
		GET_DWord( pdfp, offset );
		if( rec_num < num_records )
		{
			gsf_input_seek( pdfp, PDB_HEADER_SIZE + PDB_RECORD_HEADER_SIZE * (rec_num + 1), G_SEEK_SET);
			GET_DWord( pdfp, next_offset );
		}
		else
			next_offset = file_size;

		gsf_input_seek( pdfp, offset, G_SEEK_SET );

		// be overly cautious here
		_zero_fill (m_buf->buf, BUFFER_SIZE);
		gsf_input_read(pdfp, next_offset - offset, m_buf->buf);
		m_buf->position = next_offset - offset;

		if ( bCompressed )
			_uncompress( m_buf );

		m_buf->position = 0;

		while ( (m_buf->position) < (m_buf->len) )
		{
		  // don't copy over null chars
		        if (m_buf->buf[m_buf->position] == '\0')
			  {
			    ++m_buf->position;
			    continue;
			  }
			if( !m_Mbtowc.mbtowc( wc, m_buf->buf[m_buf->position] ) )
		 	   continue;
			c = static_cast<UT_UCSChar>(wc);
			switch (c)
			{
			case static_cast<UT_UCSChar>('\r'):
			case static_cast<UT_UCSChar>('\n'):
			
				if ((c == static_cast<UT_UCSChar>('\n')) && bEatLF)
				{
					bEatLF = false;
					break;
				}

				if (c == static_cast<UT_UCSChar>('\r'))
				{
					bEatLF = true;
				}
		
				// we interprete either CRLF, CR, or LF as a paragraph break.
		
				// start a paragraph and emit any text that we
				// have accumulated.
				X_ReturnNoMemIfError(appendStrux(PTX_Block, NULL));
				bEmptyFile = false;
				if (gbBlock.getLength() > 0)
				{
					X_ReturnNoMemIfError(appendSpan(reinterpret_cast<const UT_UCSChar*>(gbBlock.getPointer(0)), gbBlock.getLength()));
					gbBlock.truncate(0);
				}
				break;

			default:
				bEatLF = false;
				X_ReturnNoMemIfError(gbBlock.ins(gbBlock.getLength(),reinterpret_cast<const UT_GrowBufElement *>(&c),1));
				break;
			}

			++m_buf->position;
		} 

	}
	if (gbBlock.getLength() > 0 || bEmptyFile)
	{
		// if we have text left over (without final CR/LF),
		// or if we read an empty file,
		// create a paragraph and emit the text now.
		X_ReturnNoMemIfError(appendStrux(PTX_Block, NULL));
		if (gbBlock.getLength() > 0)
			X_ReturnNoMemIfError(appendSpan(reinterpret_cast<const UT_UCSChar *>(gbBlock.getPointer(0)), gbBlock.getLength()));
	}

	return UT_OK;
}
Ejemplo n.º 2
0
void PdbIm::loadFile(QString fname)
{
	FILE *m_pdfp = fopen(fname.toLocal8Bit(), "rb");
	pdb_header m_header;
	DWord file_size, offset;
	doc_record0 m_rec0;

	if (!m_pdfp)
	{
		QMessageBox::warning(ScCore->primaryMainWindow(), QObject::tr("PDB Import", "PDB Importer"),
							 "<qt>" + QObject::tr("Could not open file %1", "PDB Importer").arg(fname) + "</qt>",
							 QMessageBox::Ok, QMessageBox::NoButton);
		return;
	}
	fread( &m_header, PDB_HEADER_SIZE, 1, m_pdfp );
	if (strncmp(m_header.type, DOC_TYPE, sizeof(m_header.type) ) ||
		strncmp( m_header.creator, DOC_CREATOR, sizeof(m_header.creator)))
	{
		QMessageBox::warning(ScCore->primaryMainWindow(), QObject::tr("PDB Import", "PDB Importer"),
							 "<qt>" + QObject::tr("This file is not recognized as a PDB document. Please, report this as a bug if you are sure it is one.", "PDB Importer") + "</qt>",
							 QMessageBox::Ok, QMessageBox::NoButton);
		return;
	}

	// progressbar
	int num_records = swap_Word( m_header.numRecords ) - 1;
	ScCore->primaryMainWindow()->mainWindowProgressBar->setMaximum(num_records);
	fseek(m_pdfp, PDB_HEADER_SIZE, SEEK_SET);
	GET_DWord(m_pdfp, offset);
	fseek(m_pdfp, offset, SEEK_SET);
	fread(&m_rec0, sizeof(m_rec0), 1, m_pdfp);

	if (swap_Word( m_rec0.version ) == 2 )
		bCompressed = true;

	fseek( m_pdfp, 0, SEEK_END );
	file_size = ftell( m_pdfp );
	for (int rec_num = 1; rec_num <= num_records; ++rec_num )
	{
		DWord next_offset;

		ScCore->primaryMainWindow()->mainWindowProgressBar->setValue(rec_num);
		fseek( m_pdfp, PDB_HEADER_SIZE + PDB_RECORD_HEADER_SIZE * rec_num, SEEK_SET);
		GET_DWord( m_pdfp, offset );
		if( rec_num < num_records )
		{
			fseek( m_pdfp, PDB_HEADER_SIZE + PDB_RECORD_HEADER_SIZE * (rec_num + 1), SEEK_SET);
			GET_DWord( m_pdfp, next_offset );
		}
		else
			next_offset = file_size;
		fseek( m_pdfp, offset, SEEK_SET );
		// be overly cautious here
		_zero_fill (m_buf->buf, BUFFER_SIZE);
		m_buf->position = fread( m_buf->buf, 1, next_offset - offset, m_pdfp );

		if ( bCompressed )
			uncompress( m_buf );

		m_buf->position = 0;
		while ( (m_buf->position) < (m_buf->len) )
		{
			if (m_buf->buf[m_buf->position] == '\0')
			{
				++m_buf->position;
				continue;
			}
			data += m_buf->buf[m_buf->position];
			++m_buf->position;
		} 
	}
}