/********************************************************************* * Determines format of input file and calls parse_word_header or * process_file if * it is word processor file or copy_out if it is plain text file * return not 0 when error ********************************************************************/ int analyze_format(FILE *f) { unsigned char buffer[129]; long offset=0; FILE *new_file, *ole_file; int ret_code=69; if (!signature_check) { /* forced parsing */ /* no autodetect possible. Assume 8-bit if not overriden on * command line */ if (!get_unicode_char) get_unicode_char=get_8bit_char; return process_file(f,LONG_MAX); } catdoc_read(buffer,4,1,f); buffer[4]=0; if (strncmp((char *)&buffer,write_sign,2)==0) { printf("[Windows Write file. Some garbage expected]\n"); get_unicode_char=get_8bit_char; return process_file(f,LONG_MAX); } else if (strncmp((char *)&buffer,rtf_sign,4)==0) { return parse_rtf(f); } else if (strncmp((char *)&buffer, zip_sign,4) == 0) { fprintf(stderr,"This file looks like ZIP archive or Office 2007 " "or later file.\nNot supported by catdoc\n"); exit(1); } else if (strncmp((char *)&buffer,old_word_sign,2)==0) { fread(buffer+4,1,124,f); return parse_word_header(buffer,f,128,0); } fread(buffer+4,1,4,f); if (strncmp((char *)&buffer,ole_sign,8)==0) { if ((new_file=ole_init(f, buffer, 8)) != NULL) { set_ole_func(); while((ole_file=ole_readdir(new_file)) != NULL) { int res=ole_open(ole_file); if (res >= 0) { if (strcmp(((oleEntry*)ole_file)->name , "WordDocument") == 0) { offset=catdoc_read(buffer, 1, 128, ole_file); ret_code=parse_word_header(buffer,ole_file,-offset,offset); } } ole_close(ole_file); } set_std_func(); ole_finish(); } else { fprintf(stderr,"Broken OLE file. Try using -b switch\n"); exit(1); } } else { copy_out(f,(char *)&buffer); return 0; } return ret_code; }
/** * Initializes ole structure * * @param f (FILE *) compound document file, positioned at bufSize * byte. Might be pipe or socket * @param buffer (void *) bytes already read from f * @param bufSize number of bytes already read from f should be less * than 512 * * @return */ FILE* ole_init(FILE *f, void *buffer, size_t bufSize) { unsigned char oleBuf[BBD_BLOCK_SIZE]; unsigned char *tmpBuf; FILE *newfile; int ret=0, i; long int sbdMaxLen, sbdCurrent, propMaxLen, propCurrent, mblock, msat_size; oleEntry *tEntry; /* deleting old data (if it was allocated) */ ole_finish(); if (fseek(f,0,SEEK_SET) == -1) { if ( errno == ESPIPE ) { /* We got non-seekable file, create temp file */ if((newfile=tmpfile()) == NULL) { perror("Can't create tmp file"); return NULL; } if (bufSize > 0) { ret=fwrite(buffer, 1, bufSize, newfile); if(ret != bufSize) { perror("Can't write to tmp file"); return NULL; } } while(!feof(f)){ ret=fread(oleBuf,1,BBD_BLOCK_SIZE,f); fwrite(oleBuf, 1, ret, newfile); } fseek(newfile,0,SEEK_SET); } else { perror("Can't seek in file"); return NULL; } } else { newfile=f; } fseek(newfile,0,SEEK_END); fileLength=ftell(newfile); /* fprintf(stderr, "fileLength=%ld\n", fileLength); */ fseek(newfile,0,SEEK_SET); ret=fread(oleBuf,1,BBD_BLOCK_SIZE,newfile); if ( ret != BBD_BLOCK_SIZE ) { return NULL; } if (strncmp(oleBuf,ole_sign,8) != 0) { return NULL; } sectorSize = 1<<getshort(oleBuf,0x1e); shortSectorSize=1<<getshort(oleBuf,0x20); /* Read BBD into memory */ bbdNumBlocks = getulong(oleBuf,0x2c); if((BBD=malloc(bbdNumBlocks*sectorSize)) == NULL ) { return NULL; } if((tmpBuf=malloc(MSAT_ORIG_SIZE)) == NULL ) { return NULL; } memcpy(tmpBuf,oleBuf+0x4c,MSAT_ORIG_SIZE); mblock=getlong(oleBuf,0x44); msat_size=getlong(oleBuf,0x48); /* fprintf(stderr, "msat_size=%ld\n", msat_size); */ i=0; while((mblock >= 0) && (i < msat_size)) { unsigned char *newbuf; /* fprintf(stderr, "i=%d mblock=%ld\n", i, mblock); */ if ((newbuf=realloc(tmpBuf, sectorSize*(i+1)+MSAT_ORIG_SIZE)) != NULL) { tmpBuf=newbuf; } else { perror("MSAT realloc error"); free(tmpBuf); ole_finish(); return NULL; } fseek(newfile, 512+mblock*sectorSize, SEEK_SET); if(fread(tmpBuf+MSAT_ORIG_SIZE+(sectorSize-4)*i, 1, sectorSize, newfile) != sectorSize) { fprintf(stderr, "Error read MSAT!\n"); ole_finish(); return NULL; } i++; mblock=getlong(tmpBuf, MSAT_ORIG_SIZE+(sectorSize-4)*i); } /* fprintf(stderr, "bbdNumBlocks=%ld\n", bbdNumBlocks); */ for(i=0; i< bbdNumBlocks; i++) { long int bbdSector=getlong(tmpBuf,4*i); if (bbdSector >= fileLength/sectorSize || bbdSector < 0) { fprintf(stderr, "Bad BBD entry!\n"); ole_finish(); return NULL; } fseek(newfile, 512+bbdSector*sectorSize, SEEK_SET); if ( fread(BBD+i*sectorSize, 1, sectorSize, newfile) != sectorSize ) { fprintf(stderr, "Can't read BBD!\n"); free(tmpBuf); ole_finish(); return NULL; } } free(tmpBuf); /* Read SBD into memory */ sbdLen=0; sbdMaxLen=10; sbdCurrent = sbdStart = getlong(oleBuf,0x3c); if (sbdStart > 0) { if((SBD=malloc(sectorSize*sbdMaxLen)) == NULL ) { ole_finish(); return NULL; } while(1) { fseek(newfile, 512+sbdCurrent*sectorSize, SEEK_SET); fread(SBD+sbdLen*sectorSize, 1, sectorSize, newfile); sbdLen++; if (sbdLen >= sbdMaxLen) { unsigned char *newSBD; sbdMaxLen+=5; if ((newSBD=realloc(SBD, sectorSize*sbdMaxLen)) != NULL) { SBD=newSBD; } else { perror("SBD realloc error"); ole_finish(); return NULL; } } sbdCurrent = getlong(BBD, sbdCurrent*4); if(sbdCurrent < 0 || sbdCurrent >= fileLength/sectorSize) break; } sbdNumber = (sbdLen*sectorSize)/shortSectorSize; /* fprintf(stderr, "sbdLen=%ld sbdNumber=%ld\n",sbdLen, sbdNumber); */ } else { SBD=NULL; } /* Read property catalog into memory */ propLen = 0; propMaxLen = 5; propCurrent = propStart = getlong(oleBuf,0x30); if (propStart >= 0) { if((properties=malloc(propMaxLen*sectorSize)) == NULL ) { ole_finish(); return NULL; } while(1) { /* fprintf(stderr, "propCurrent=%ld\n",propCurrent); */ fseek(newfile, 512+propCurrent*sectorSize, SEEK_SET); fread(properties+propLen*sectorSize, 1, sectorSize, newfile); propLen++; if (propLen >= propMaxLen) { unsigned char *newProp; propMaxLen+=5; if ((newProp=realloc(properties, propMaxLen*sectorSize)) != NULL) properties=newProp; else { perror("Properties realloc error"); ole_finish(); return NULL; } } propCurrent = getlong(BBD, propCurrent*4); if(propCurrent < 0 || propCurrent >= fileLength/sectorSize ) { break; } } /* fprintf(stderr, "propLen=%ld\n",propLen); */ propNumber = (propLen*sectorSize)/PROP_BLOCK_SIZE; propCurNumber = 0; } else { ole_finish(); properties = NULL; return NULL; } /* Find Root Entry */ while((tEntry=(oleEntry*)ole_readdir(newfile)) != NULL) { if (tEntry->type == oleRootDir ) { rootEntry=tEntry; break; } ole_close((FILE*)tEntry); } propCurNumber = 0; fseek(newfile, 0, SEEK_SET); if (!rootEntry) { fprintf(stderr,"Broken OLE structure. Cannot find root entry in this file!\n"); ole_finish(); return NULL; } return newfile; }
/** * * * @param argc * @param argv * * @return */ int main(int argc, char *argv[]) { FILE *input; FILE *new_file, *ole_file; char *filename =NULL; short int *tmp_charset; int c; int i; char *tempname; read_config_file(SYSTEMRC); #ifdef USERRC tempname=find_file(strdup(USERRC),getenv("HOME")); if (tempname) { read_config_file(tempname); free(tempname); } #endif #ifdef HAVE_LANGINFO get_locale_charset(); #endif check_charset(&dest_csname,dest_csname); while ((c=getopt(argc,argv,"Vls:d:p:"))!=-1) { switch(c) { case 'l': list_charsets(); exit(0); case 's': check_charset(&source_csname,optarg); source_charset=read_charset(source_csname); break; case 'd': check_charset(&dest_csname,optarg); break; case 'V': printf("Catdoc Version %s\n",CATDOC_VERSION); exit(0); default: help(); exit(1); } } /* If we are using system strftime, we need to set LC_TIME locale * category unless choosen charset is not same as system locale */ #if defined(HAVE_LANGINFO) && defined(HAVE_STRFTIME) && !defined(__TURB0C__) set_time_locale(); #endif /* charset conversion init*/ input_buffer=malloc(FILE_BUFFER); if (strcmp(dest_csname,"utf-8")) { tmp_charset=read_charset(dest_csname); if (!tmp_charset) { fprintf(stderr,"Cannot load target charset %s\n",dest_csname); exit(1); } target_charset=make_reverse_map(tmp_charset); free(tmp_charset); } else { target_charset=NULL; } spec_chars=read_substmap(stradd("ascii",SPEC_EXT)); if (!spec_chars) { fprintf(stderr,"Cannod read substitution map ascii%s\n", SPEC_EXT); exit(1); } replacements=read_substmap(stradd("ascii",REPL_EXT)); if (!replacements) { fprintf(stderr,"Cannod read substitution map ascii%s\n", REPL_EXT); exit(1); } if (optind>=argc) { if (isatty(fileno(stdin))) { help(); exit(0); } do_ppt(stdin,"STDIN"); exit (0); } for (i=optind;i<argc;i++) { filename = argv[i]; input=fopen(filename,"rb"); if (!input) { perror(filename); exit(1); } if ((new_file=ole_init(input, NULL, 0)) != NULL) { set_ole_func(); while((ole_file=ole_readdir(new_file)) != NULL) { int res=ole_open(ole_file); /* fprintf(stderr, "name = %s\n", ((oleEntry*)ole_file)->name); */ if (res >= 0) { if (strcasecmp(((oleEntry*)ole_file)->name , "PowerPoint Document") == 0) { do_ppt(ole_file,filename); } } ole_close(ole_file); } set_std_func(); ole_finish(); fclose(new_file); } else { fprintf(stderr, "%s is not OLE file or Error\n", filename); } } return 0; }