/******************************************************************* * NAME : int main(int argc, char **argv) * * DESCRIPTION : Punto de entrada principal. Abre el diccionario * principal, procesa el archivo especificado y * guarda los cambios realizados en el diccionario * principal. *******************************************************************/ int main(int argc, char **argv) { char *dict; /* Verificamos el nro de argumentos. */ if (argc < 2) { printf("spellchecker.c: nro de argumentos erroneo. Deben ser <documento> [<diccionario>].\n"); return (1); } /* si se especifico un diccionario lo usamos, */ /* caso contrario usamos el diccionario por defecto */ dict = (argc >=3) ? argv[2] : "dict.txt"; /* completar aca */ dict_load(dict); // Se carga el diccionario principal. process_document(argv[1]); // Se procesa el documento. dict_save(dict); // Se guarda el diccionario principal. printf("El documento %s ha sido procesado. Resultados en out.txt\n", argv[1]); }
int main(int argc, char **argv) { FILE *outfp; int ret = 0; if (argc != 3) { fprintf(stderr, "usage: %s <infilename> <outfilename>\n", argv[0]); return(2); } if ((outfp = fopen(argv[2], "w")) == NULL) { fprintf(stderr, "Error: couldn't open output file '%s'\n", argv[2]); return(3); } ret = process_document(outfp, argv[1], argv[1], 0, 0); fclose(outfp); return ret; }
void print_document(char *outfname, int (*msg_func)(int,int), int save_extraseg ) { static FCODE err_no_temp[] = "Unable to create temporary file.\n"; static FCODE err_no_out[] = "Unable to create output file.\n"; static FCODE err_badwrite[] = "Error writing temporary file.\n"; static FCODE err_badread[] = "Error reading temporary file.\nSystem may be corrupt!\nSave your image and re-start FRACTINT!\n"; PRINT_DOC_INFO info; int success = 0; int temp_file = -1; char far *msg = NULL; int dummy; /* to quiet compiler */ info.buffer = MK_FP(extraseg, 0); /* help_seek((long)sizeof(int)+sizeof(long)); Strange -- should be 8 -- CWM */ help_seek(8L); /* indeed it should - Bert */ dummy = read(help_file, (char *)&info.num_contents, sizeof(int)); dummy = read(help_file, (char *)&info.num_page, sizeof(int)); info.cnum = info.tnum = -1; info.content_pos = sizeof(long)+4*sizeof(int) + num_topic*sizeof(long) + num_label*2*sizeof(int); info.msg_func = msg_func; if ( msg_func != NULL ) msg_func(0, info.num_page); /* initialize */ if ( save_extraseg ) { if ( (temp_file=open(TEMP_FILE_NAME, O_RDWR|O_CREAT|O_TRUNC|O_BINARY, S_IREAD|S_IWRITE)) == -1 ) { msg = err_no_temp; goto ErrorAbort; } if ( farwrite(temp_file, info.buffer, PRINT_BUFFER_SIZE) != PRINT_BUFFER_SIZE ) { msg = err_badwrite; goto ErrorAbort; } } if ( (info.file = fopen(outfname, "wt")) == NULL ) { msg = err_no_out; goto ErrorAbort; } info.margin = PAGE_INDENT; info.start_of_line = 1; info.spaces = 0; success = process_document((PD_FUNC)print_doc_get_info, (PD_FUNC)print_doc_output, &info); fclose(info.file); if ( save_extraseg ) { if ( lseek(temp_file, 0L, SEEK_SET) != 0L ) { msg = err_badread; goto ErrorAbort; } if ( farread(temp_file, info.buffer, PRINT_BUFFER_SIZE) != PRINT_BUFFER_SIZE ) { msg = err_badread; goto ErrorAbort; } } ErrorAbort: if (temp_file != -1) { close(temp_file); remove(TEMP_FILE_NAME); temp_file = -1; } if ( msg != NULL ) { helptitle(); stopmsg(1, msg); } else if ( msg_func != NULL ) msg_func((success) ? -1 : -2, info.num_page ); }
/* Open a named physical or virtual file, extract the text from it, search for document or page attachments, and process these recursively. Either filename must be supplied for physical files, or data+length from which a virtual file will be created. The caller cannot create the PVF file since we create a new TET object here in case an exception happens with the embedded document - the caller can happily continue with his TET object even in case of an exception here. */ static int process_document(FILE *outfp, const char *filename, const char *realname, const unsigned char *data, int length) { TET *tet; if ((tet = TET_new()) == (TET *) 0) { fprintf(stderr, "extractor: out of memory\n"); return(4); } TET_TRY (tet) { const char *pvfname = "/pvf/attachment"; int doc; int file, filecount; int page, pagecount; const unsigned char *attdata; int attlength; int objtype; /* Construct a PVF file if data instead of a filename was provided */ if (!filename) { TET_create_pvf(tet, pvfname, 0, data, length, ""); filename = pvfname; } TET_set_option(tet, globaloptlist); doc = TET_open_document(tet, filename, 0, docoptlist); if (doc == -1) { fprintf(stderr, "Error %d in %s() (source: attachment '%s'): %s\n", TET_get_errnum(tet), TET_get_apiname(tet), realname, TET_get_errmsg(tet)); TET_EXIT_TRY(tet); TET_delete(tet); return(5); } /* -------------------- Extract the document's own page contents */ extract_text(tet, doc, outfp); /* -------------------- Process all document-level file attachments */ /* Get the number of document-level file attachments. */ filecount = (int) TET_pcos_get_number(tet, doc, "length:names/EmbeddedFiles"); for (file = 0; file < filecount; file++) { const char *attname; /* fetch the name of the file attachment; check for Unicode file * name (a PDF 1.7 feature) */ objtype = (int) TET_pcos_get_number(tet, doc, "type:names/EmbeddedFiles[%d]/UF", file); if (objtype == pcos_ot_string) { attname = TET_pcos_get_string(tet, doc, "names/EmbeddedFiles[%d]/UF", file); } else { /* fetch the name of the file attachment */ objtype = (int) TET_pcos_get_number(tet, doc, "type:names/EmbeddedFiles[%d]/F", file); if (objtype == pcos_ot_string) { attname = TET_pcos_get_string(tet, doc, "names/EmbeddedFiles[%d]/F", file); } else { attname = "(unnamed)"; } } fprintf(outfp, "\n----- File attachment '%s':\n", attname); /* fetch the contents of the file attachment and process it */ objtype = (int) TET_pcos_get_number(tet, doc, "type:names/EmbeddedFiles[%d]/EF/F", file); if (objtype == pcos_ot_stream) { attdata = TET_pcos_get_stream(tet, doc, &attlength, "", "names/EmbeddedFiles[%d]/EF/F", file); (void) process_document(outfp, 0, attname, attdata, attlength); } } /* -------------------- Process all page-level file attachments */ pagecount = (int) TET_pcos_get_number(tet, doc, "length:pages"); /* Check all pages for annotations of type FileAttachment */ for (page = 0; page < pagecount; page++) { int annot, annotcount; annotcount = (int) TET_pcos_get_number(tet, doc, "length:pages[%d]/Annots", page); for (annot = 0; annot < annotcount; annot++) { const char *val; char attname[128]; val = TET_pcos_get_string(tet, doc, "pages[%d]/Annots[%d]/Subtype", page, annot); sprintf(attname, "page %d, annotation %d", page+1, annot+1); if (!strcmp(val, "FileAttachment")) { /* fetch the contents of the attachment and process it */ objtype = (int) TET_pcos_get_number(tet, doc, "type:pages[%d]/Annots[%d]/FS/EF/F", page, annot); if (objtype == pcos_ot_stream) { attdata = TET_pcos_get_stream(tet, doc, &attlength, "", "pages[%d]/Annots[%d]/FS/EF/F", page, annot); (void) process_document(outfp, 0, attname, attdata, attlength); } } } } TET_close_document(tet, doc); /* If there was no PVF file deleting it won't do any harm */ TET_delete_pvf(tet, pvfname, 0); } TET_CATCH (tet) { fprintf(stderr, "Error %d in %s() (source: attachment '%s'): %s\n", TET_get_errnum(tet), TET_get_apiname(tet), realname, TET_get_errmsg(tet)); } TET_delete(tet); return(0); }