Esempio n. 1
0
/********************************************************************* 
 * Determines format of input file and calls parse_word_header or 
 * process_file if
 * it is word processor file or copy_out if it is plain text file
 * return not 0 when error
 ********************************************************************/ 
int analyze_format(FILE *f) {
	unsigned char buffer[129];
	long offset=0;
	FILE *new_file, *ole_file;
	int ret_code=69;

	if (!signature_check) {
		/* forced parsing */
		/* no autodetect possible. Assume 8-bit if not overriden on
		 * command line */ 
		if (!get_unicode_char) 
			get_unicode_char=get_8bit_char;
		return process_file(f,LONG_MAX);
	}
	catdoc_read(buffer,4,1,f);
	buffer[4]=0;
	if (strncmp((char *)&buffer,write_sign,2)==0) {
		printf("[Windows Write file. Some garbage expected]\n");
		get_unicode_char=get_8bit_char;
		return process_file(f,LONG_MAX);
	} else if (strncmp((char *)&buffer,rtf_sign,4)==0) {
		return parse_rtf(f);
	} else if (strncmp((char *)&buffer, zip_sign,4) == 0) {
		fprintf(stderr,"This file looks like ZIP archive or Office 2007 "
		"or later file.\nNot supported by catdoc\n");
		exit(1);
	} else if (strncmp((char *)&buffer,old_word_sign,2)==0) {
	   fread(buffer+4,1,124,f);	
	   return parse_word_header(buffer,f,128,0);
	}	
	fread(buffer+4,1,4,f);
	if (strncmp((char *)&buffer,ole_sign,8)==0) {
		if ((new_file=ole_init(f, buffer, 8)) != NULL) {
			set_ole_func();
			while((ole_file=ole_readdir(new_file)) != NULL) {
				int res=ole_open(ole_file);
				if (res >= 0) {
					if (strcmp(((oleEntry*)ole_file)->name , "WordDocument") == 0) {
						offset=catdoc_read(buffer, 1, 128, ole_file);
						ret_code=parse_word_header(buffer,ole_file,-offset,offset);
					}
				} 
				ole_close(ole_file);
			}
			set_std_func();
			ole_finish();
		} else {
			fprintf(stderr,"Broken OLE file. Try using -b switch\n");
			exit(1);
		}	
	} else {

		copy_out(f,(char *)&buffer);
		return 0;
	}
	
	return ret_code;
}   
Esempio n. 2
0
/** 
 * Initializes ole structure
 * 
 * @param f (FILE *) compound document file, positioned at bufSize
 *           byte. Might be pipe or socket 
 * @param buffer (void *) bytes already read from f
 * @param bufSize number of bytes already read from f should be less
 *                than 512 
 * 
 * @return 
 */
FILE* ole_init(FILE *f, void *buffer, size_t bufSize)  {
	unsigned char oleBuf[BBD_BLOCK_SIZE];
	unsigned char *tmpBuf;
	FILE *newfile;
	int ret=0, i;
	long int sbdMaxLen, sbdCurrent, propMaxLen, propCurrent, mblock, msat_size;
	oleEntry *tEntry;

	/* deleting old data (if it was allocated) */
	ole_finish();
	
	if (fseek(f,0,SEEK_SET) == -1) {
		if ( errno == ESPIPE ) {
			/* We got non-seekable file, create temp file */
			if((newfile=tmpfile()) == NULL) {
				perror("Can't create tmp file");
				return NULL;
			}
			if (bufSize > 0) {
				ret=fwrite(buffer, 1, bufSize, newfile);
				if(ret != bufSize) {
					perror("Can't write to tmp file");
					return NULL;
				}
			}
			
			while(!feof(f)){
				ret=fread(oleBuf,1,BBD_BLOCK_SIZE,f);
				fwrite(oleBuf, 1, ret, newfile);
			}
			fseek(newfile,0,SEEK_SET);
		} else {
			perror("Can't seek in file");
			return NULL;
		}
	} else {
		newfile=f;
	}	
	fseek(newfile,0,SEEK_END);
	fileLength=ftell(newfile);
/* 	fprintf(stderr, "fileLength=%ld\n", fileLength); */
	fseek(newfile,0,SEEK_SET);
	ret=fread(oleBuf,1,BBD_BLOCK_SIZE,newfile);
	if ( ret != BBD_BLOCK_SIZE ) {
		return NULL;
	}
	if (strncmp(oleBuf,ole_sign,8) != 0) {
		return NULL;
	}
 	sectorSize = 1<<getshort(oleBuf,0x1e);
	shortSectorSize=1<<getshort(oleBuf,0x20);
	
/* Read BBD into memory */
	bbdNumBlocks = getulong(oleBuf,0x2c);
	if((BBD=malloc(bbdNumBlocks*sectorSize)) == NULL ) {
		return NULL;
	}
	
	if((tmpBuf=malloc(MSAT_ORIG_SIZE)) == NULL ) {
		return NULL;
	}
	memcpy(tmpBuf,oleBuf+0x4c,MSAT_ORIG_SIZE);
	mblock=getlong(oleBuf,0x44);
	msat_size=getlong(oleBuf,0x48);

/* 	fprintf(stderr, "msat_size=%ld\n", msat_size); */

	i=0;
	while((mblock >= 0) && (i < msat_size)) {
		unsigned char *newbuf;
/* 		fprintf(stderr, "i=%d mblock=%ld\n", i, mblock); */
		if ((newbuf=realloc(tmpBuf, sectorSize*(i+1)+MSAT_ORIG_SIZE)) != NULL) {
			tmpBuf=newbuf;
		} else {
			perror("MSAT realloc error");
			free(tmpBuf);
			ole_finish();
			return NULL;
		}
		
		fseek(newfile, 512+mblock*sectorSize, SEEK_SET);
		if(fread(tmpBuf+MSAT_ORIG_SIZE+(sectorSize-4)*i,
						 1, sectorSize, newfile) != sectorSize) {
			fprintf(stderr, "Error read MSAT!\n");
			ole_finish();
			return NULL;
		}

		i++;
		mblock=getlong(tmpBuf, MSAT_ORIG_SIZE+(sectorSize-4)*i);
	}
	
/* 	fprintf(stderr, "bbdNumBlocks=%ld\n", bbdNumBlocks); */
	for(i=0; i< bbdNumBlocks; i++) {
		long int bbdSector=getlong(tmpBuf,4*i);
		
		if (bbdSector >= fileLength/sectorSize || bbdSector < 0) {
			fprintf(stderr, "Bad BBD entry!\n");
			ole_finish();
			return NULL;
		}
		fseek(newfile, 512+bbdSector*sectorSize, SEEK_SET);
		if ( fread(BBD+i*sectorSize, 1, sectorSize, newfile) != sectorSize ) {
			fprintf(stderr, "Can't read BBD!\n");
			free(tmpBuf);
			ole_finish();
			return NULL;
		}
	}
	free(tmpBuf);
	
/* Read SBD into memory */
	sbdLen=0;
	sbdMaxLen=10;
	sbdCurrent = sbdStart = getlong(oleBuf,0x3c);
	if (sbdStart > 0) {
		if((SBD=malloc(sectorSize*sbdMaxLen)) == NULL ) {
			ole_finish();
			return NULL;
		}
		while(1) {
			fseek(newfile, 512+sbdCurrent*sectorSize, SEEK_SET);
			fread(SBD+sbdLen*sectorSize, 1, sectorSize, newfile);
			sbdLen++;
			if (sbdLen >= sbdMaxLen) {
				unsigned char *newSBD;
				
				sbdMaxLen+=5;
				if ((newSBD=realloc(SBD, sectorSize*sbdMaxLen)) != NULL) {
					SBD=newSBD;
				} else {
					perror("SBD realloc error");
					ole_finish();
					return NULL;
				}
			}
			sbdCurrent = getlong(BBD, sbdCurrent*4);
			if(sbdCurrent < 0 ||
				sbdCurrent >= fileLength/sectorSize)
				break;
		}
		sbdNumber = (sbdLen*sectorSize)/shortSectorSize;
/*   		fprintf(stderr, "sbdLen=%ld sbdNumber=%ld\n",sbdLen, sbdNumber); */
	} else {
		SBD=NULL;
	}
/* Read property catalog into memory */
	propLen = 0;
	propMaxLen = 5;
	propCurrent = propStart = getlong(oleBuf,0x30);
	if (propStart >= 0) {
		if((properties=malloc(propMaxLen*sectorSize)) == NULL ) {
			ole_finish();
			return NULL;
		}
		while(1) {
/*  			fprintf(stderr, "propCurrent=%ld\n",propCurrent); */
			fseek(newfile, 512+propCurrent*sectorSize, SEEK_SET);
			fread(properties+propLen*sectorSize,
				  1, sectorSize, newfile);
			propLen++;
			if (propLen >= propMaxLen) {
				unsigned char *newProp;
				
				propMaxLen+=5;
				if ((newProp=realloc(properties, propMaxLen*sectorSize)) != NULL)
					properties=newProp;
				else {
					perror("Properties realloc error");
					ole_finish();
					return NULL;
				}
			}
			
			propCurrent = getlong(BBD, propCurrent*4);
			if(propCurrent < 0 ||
			   propCurrent >= fileLength/sectorSize ) {
				break;
			}
		}
/*  		fprintf(stderr, "propLen=%ld\n",propLen); */
		propNumber = (propLen*sectorSize)/PROP_BLOCK_SIZE;
		propCurNumber = 0;
	} else {
		ole_finish();
		properties = NULL;
		return NULL;
	}
	
	
/* Find Root Entry */
	while((tEntry=(oleEntry*)ole_readdir(newfile)) != NULL) {
		if (tEntry->type == oleRootDir ) {
			rootEntry=tEntry;
			break;
		}
		ole_close((FILE*)tEntry);
	}
	propCurNumber = 0;
	fseek(newfile, 0, SEEK_SET);
	if (!rootEntry) {
		fprintf(stderr,"Broken OLE structure. Cannot find root entry in this file!\n");		ole_finish();
		return NULL;
	}	
	return newfile;
}
Esempio n. 3
0
/** 
 * 
 * 
 * @param argc 
 * @param argv 
 * 
 * @return 
 */
int main(int argc, char *argv[]) {
	FILE *input;
	FILE *new_file, *ole_file;
	char *filename =NULL;
	short int *tmp_charset;
	int c;
	int i;
	char *tempname;
	read_config_file(SYSTEMRC);
#ifdef USERRC
	tempname=find_file(strdup(USERRC),getenv("HOME"));
	if (tempname) {
		read_config_file(tempname);
		free(tempname);
	}
#endif
#ifdef HAVE_LANGINFO
	get_locale_charset();
#endif
	
	check_charset(&dest_csname,dest_csname); 

	while ((c=getopt(argc,argv,"Vls:d:p:"))!=-1) {
		switch(c)  {
		case 'l':
			list_charsets(); exit(0);
		case 's':
			check_charset(&source_csname,optarg);
			source_charset=read_charset(source_csname);
			break;
		case 'd':
			check_charset(&dest_csname,optarg);
			break;
		case 'V': printf("Catdoc Version %s\n",CATDOC_VERSION);
			exit(0);
		default:
			help();
			exit(1);
		}	
	}
	/* If we are using system strftime, we need to  set LC_TIME locale
	 * category unless choosen charset is not same as system locale
	 */ 
#if defined(HAVE_LANGINFO) && defined(HAVE_STRFTIME) && !defined(__TURB0C__)
	set_time_locale();
#endif	
	/* charset conversion init*/
	input_buffer=malloc(FILE_BUFFER);
	if (strcmp(dest_csname,"utf-8")) {
		tmp_charset=read_charset(dest_csname);
		if (!tmp_charset) {
			fprintf(stderr,"Cannot load target charset %s\n",dest_csname);
			exit(1);
		}	
		target_charset=make_reverse_map(tmp_charset);
		free(tmp_charset);
	} else { 
		target_charset=NULL;
	} 
	spec_chars=read_substmap(stradd("ascii",SPEC_EXT));
	if (!spec_chars) {
		fprintf(stderr,"Cannod read substitution map ascii%s\n",
						SPEC_EXT);
		exit(1);
	}  
	replacements=read_substmap(stradd("ascii",REPL_EXT));
	if (!replacements) {
		fprintf(stderr,"Cannod read substitution map ascii%s\n",
						REPL_EXT);
		exit(1);
	}  
	if (optind>=argc) {
		if (isatty(fileno(stdin))) {
			help();
			exit(0);
		}    
		do_ppt(stdin,"STDIN");
		exit (0);
	}	
	for (i=optind;i<argc;i++) {
		filename = argv[i];
		input=fopen(filename,"rb");
		if (!input) {
			perror(filename);
			exit(1);
		}
		if ((new_file=ole_init(input, NULL, 0)) != NULL) {
			set_ole_func();
			while((ole_file=ole_readdir(new_file)) != NULL) {
				int res=ole_open(ole_file);
/* 				fprintf(stderr, "name = %s\n", ((oleEntry*)ole_file)->name); */
				if (res >= 0) {
					if (strcasecmp(((oleEntry*)ole_file)->name , "PowerPoint Document") == 0) {
						do_ppt(ole_file,filename);
					}
				} 
				ole_close(ole_file);
			}
			set_std_func();
			ole_finish();
			fclose(new_file);
		} else {
			fprintf(stderr, "%s is not OLE file or Error\n", filename);
		}
	}
	return 0;
}