Ejemplo n.º 1
0
/** 
 * 
 * 
 * @param input 
 * @param filename 
 */
void do_ppt(FILE *input,char *filename) {
	int itemsread=1;
	int rectype;
	long reclen;
	unsigned char recbuf[8];

	while(itemsread) {
		itemsread = catdoc_read(recbuf, 1, 8, input);
/* 		fprintf(stderr,"itemsread=%d: ",itemsread); */
/* 		for(i=0; i<8; i++) */
/* 			fprintf(stderr,"%02x ",recbuf[i]); */
/* 		fprintf(stderr,"\n"); */
		
		if (catdoc_eof(input)) {
			process_item(DOCUMENT_END,0,input);
			return;
		}
		if(itemsread < 8)
			break;
		rectype=getshort(recbuf,2);
		reclen=getulong(recbuf,4);
		if (reclen < 0) {
			return;
		}	
		process_item(rectype,reclen,input);
	}
}
Ejemplo n.º 2
0
void do_table(FILE *input,char *filename) {    
	long rectype;
	long reclen,build_year=0,build_rel=0,offset=0;
	int eof_flag=0;
	int itemsread=1;
	date_shift=25569.0; /* Windows 1900 date system */
	CleanUpFormatIdxUsed();
	while (itemsread) {
		catdoc_read(rec,2,1,input);
		biff_version=getshort(rec,0);
		catdoc_read(rec,2,1,input);
		reclen=getshort(rec,0);
		if ( biff_version == 0x0809 || biff_version == 0x0409 ||
				 biff_version == 0x0209 || biff_version == 0x0009 ) {
			if (reclen==8 || reclen==16) {
				if (biff_version == 0x0809 ) {
					itemsread=catdoc_read(rec,4,1,input);
					if (itemsread == 0) 
						break;
					build_year=getshort(rec+2,0);
					build_rel=getshort(rec,0);
					(void) build_rel;
					if(build_year > 5 ) {
						catdoc_read(rec,8,1,input);
						biff_version=8;
						offset=12;
					}
					else {
						biff_version=7;
						offset=4;
					}
				} else if (biff_version == 0x0209 ) {
					biff_version=3;
					offset=2;
				} else if (biff_version == 0x0409 ) {
					offset=2;
					biff_version=4;
				} else {
					biff_version=2;
				}
				itemsread=catdoc_read(rec,reclen-offset,1,input);
				break;
			} else {
				fprintf(stderr,"%s: Invalid BOF record\n",filename);
				return;
			} 
		} else {
			itemsread=catdoc_read(rec,126,1,input);
		}
	}
	if (catdoc_eof(input)) {
		fprintf(stderr,"%s: No BOF record found\n",filename);
		exit(1);
	}    
	while(itemsread){
		unsigned char buffer[2];

		itemsread = catdoc_read(buffer, 2, 1, input);
		if (catdoc_eof(input)) {
			process_item(MSEOF,0,NULL);
			return;
		}
		
		if(itemsread == 0)
			break;

		rectype=getshort(buffer,0);
		itemsread = catdoc_read(buffer, 2, 1, input);
		if(itemsread == 0)
			break;
		reclen=getshort(buffer,0);
		if (reclen && reclen <MAX_MS_RECSIZE &&reclen >0){
			itemsread = catdoc_read(rec, 1, reclen, input);
			rec[reclen] = '\0';
		}
		if(eof_flag) {
			if (rectype != BOF) {
				break;
			}    
		}
/* 		fprintf(stderr,"Rectype 0x%04X reclen=%d\n",rectype, reclen); */
		process_item(rectype,reclen,rec);
		if (rectype == MSEOF) {
			eof_flag=1;
		} else {
			eof_flag=0;	
		}  
	}
	return;
}
Ejemplo n.º 3
0
int process_file(FILE *f,long stop) {
	int bufptr;
	int tabmode=0;
	long offset=0;
	int hyperlink_mode = 0;
	unsigned short c;
	/* Now we are starting to read with get_unicode_char */
	while (!catdoc_eof(f) && offset<stop) {
		bufptr = -1;
		do {
			int unichar = get_unicode_char(f,&offset,stop);
			if (unichar < 0)
				continue;
			c = unichar;
			/* Following symbols below 32 are allowed inside paragraph:
			   0x0002 - footnote mark
			   0x0007 - table separator (converted to tabmode)
			   0x0009 - Horizontal tab ( printed as is)
			   0x000B - hard return
			   0x000C - page break
			   0x000D - return - marks an end of paragraph
			   0x001E - IS2 for some reason means short defis in Word.
			   0x001F - soft hyphen in Word
			   0x0013 - start embedded hyperlink
			   0x0014 - separate hyperlink URL from text
			   0x0015 - end embedded hyperlink
			   */
			if (tabmode) {
				tabmode=0;
				if (c==0x007) {
					buffer[++bufptr]=0x1E;
					continue;
				} else {
					buffer[++bufptr]=0x1C;
				}  
			}   	 
			if (c<32) {
				switch (c) {
					case 0x007:
						tabmode = 1;
						break;
					case 0x000D:
					case 0x000B:
						buffer[++bufptr]=0x000A;
						break;
					case 0x000C:
						buffer[++bufptr]=c;
						break;
					case 0x001E:
						buffer[++bufptr]='-';
						break;
					case 0x0002: break;

					case 0x001F:
								 buffer[++bufptr]=0xAD;/* translate to Unicode
														  soft hyphen */
								 break;						  
					case 0x0009:
								 buffer[++bufptr]=c;
								 break;
					case 0x0013:
								 hyperlink_mode=1;
								 buffer[++bufptr]=' ';
								 break;
					case 0x0014:
								 hyperlink_mode = 0;
								 /*fall through */
					case 0x0015:
								 /* just treat hyperlink separators as
								  * space */
								 buffer[++bufptr]=' ';
								 break;
					case 0x0001: if (hyperlink_mode) 
									 	break;
								 /* else fall through */
					default:
								 bufptr=-1; /* Any other control char - discard para*/
				}
			} else if (c != 0xfeff) {
				/* skip ZERO-WIDTH-UNBREAKABLE-SPACE. Output anything
				 * else*/
				buffer[++bufptr]=c;
			}
		} while (bufptr >=0 && bufptr<PARAGRAPH_BUFFER-2 && !catdoc_eof(f) && buffer[bufptr]!=0x000a);
		if (bufptr>0) {
			buffer[++bufptr]=0;
			output_paragraph(buffer);
		}
	}
	return 0;
}
Ejemplo n.º 4
0
void copy_out (FILE *f,char *header) {
	char *buf=(char *)buffer;
	int count,i;
	long offset;

	if (get_unicode_char == get_word8_char) {
		/* non-word file and -u specified. Trying to guess which kind of
		 * unicode is used
		 */
		if ((unsigned char)header[0]==0xFE && (unsigned char)header[1]==0xFF) {
			get_unicode_char = get_utf16msb;
			fputs(convert_char(header[2]<<8|header[3]),output_file);
			fputs(convert_char(header[4]<<8|header[5]),output_file);
			fputs(convert_char(header[6]<<8|header[7]),output_file);
		} else if ((unsigned char)header[0]!=0xFF ||
				(unsigned char)header[1]!=0xFE) {
			int c,j,d;
			/* if it is not utf16, assume it is UTF8. We are told -u,
			 * aren't we */
			get_unicode_char = get_utf8;
			i=0;
			while (i<8) {
				c=(unsigned char)header[i++];		
				if (c >=0x80) {
					if ( c<0xE0) {
						c=(c & 0x1F);
						count =1;
					} else {
						c=(c & 0xF);
						count = 2;
					}
					for (j=0;j<count;j++) {
						if (i<7) {
							d=(unsigned char) header[i++];
						} else {
							d=fgetc(f);
						}
						c=c<<6 | (d & 0x3F);
					}
				}
				fputs (convert_char(c),output_file);
			}
		} else {
			get_unicode_char = get_utf16lsb;
			fputs(convert_char(header[3]<<8|header[2]),output_file);
			fputs(convert_char(header[5]<<8|header[4]),output_file);
			fputs(convert_char(header[7]<<8|header[6]),output_file);
		}	    
		while (!catdoc_eof(f)) {
			i=get_unicode_char(f,&offset,0x7FFFFFFF); 
			if (i!=EOF) fputs(convert_char(i),output_file);
		}    
	} else {
		for (i=0;i<8;i++) {
			fputs(convert_char(to_unicode(source_charset,(unsigned char)header[i])),output_file);
		}			 
		/* Assuming 8-bit input text */
		while ((count = catdoc_read(buf,1,PARAGRAPH_BUFFER,f))) {
			for (i=0;i<count;i++) {
				fputs(convert_char(to_unicode(source_charset,
								(unsigned char)buf[i])),output_file);
			}		       
		}
	} 
}