예제 #1
0
/*******************************************************************
* NAME :            int main(int argc, char **argv)
*
* DESCRIPTION :     Punto de entrada principal. Abre el diccionario 
*                   principal, procesa el archivo especificado y 
*                   guarda los cambios realizados en el diccionario 
*                   principal.
*******************************************************************/
int main(int argc, char **argv) {
    char *dict;
    /* Verificamos el nro de argumentos. */
    if (argc < 2) {
        printf("spellchecker.c: nro de argumentos erroneo. Deben ser <documento> [<diccionario>].\n");
        return (1);
    }
    /* si se especifico un diccionario lo usamos,  */
    /* caso contrario usamos el diccionario por defecto */
    dict = (argc >=3) ? argv[2] : "dict.txt";
/* completar aca */
    dict_load(dict);    // Se carga el diccionario principal.
    process_document(argv[1]);    // Se procesa el documento.
    dict_save(dict);    // Se guarda el diccionario principal.
    printf("El documento %s ha sido procesado. Resultados en out.txt\n", argv[1]);
}
예제 #2
0
int main(int argc, char **argv)
{
    FILE *outfp;
    int ret = 0;

    if (argc != 3)
    {
        fprintf(stderr, "usage: %s <infilename> <outfilename>\n", argv[0]);
        return(2);
    }

    if ((outfp = fopen(argv[2], "w")) == NULL)
    {
	fprintf(stderr, "Error: couldn't open output file '%s'\n", argv[2]);
	return(3);
    }

    ret = process_document(outfp, argv[1], argv[1], 0, 0);

    fclose(outfp);
    return ret;
}
예제 #3
0
void print_document(char *outfname, int (*msg_func)(int,int), int save_extraseg )
   {
   static FCODE err_no_temp[]  = "Unable to create temporary file.\n";
   static FCODE err_no_out[]   = "Unable to create output file.\n";
   static FCODE err_badwrite[] = "Error writing temporary file.\n";
   static FCODE err_badread[]  = "Error reading temporary file.\nSystem may be corrupt!\nSave your image and re-start FRACTINT!\n";

   PRINT_DOC_INFO info;
   int            success   = 0;
   int            temp_file = -1;
   char      far *msg = NULL;
   int            dummy; /* to quiet compiler */

   info.buffer = MK_FP(extraseg, 0);

/*   help_seek((long)sizeof(int)+sizeof(long));         Strange -- should be 8 -- CWM */
   help_seek(8L);                               /* indeed it should - Bert */
   dummy = read(help_file, (char *)&info.num_contents, sizeof(int));
   dummy = read(help_file, (char *)&info.num_page, sizeof(int));

   info.cnum = info.tnum = -1;
   info.content_pos = sizeof(long)+4*sizeof(int) + num_topic*sizeof(long) + num_label*2*sizeof(int);
   info.msg_func = msg_func;

   if ( msg_func != NULL )
      msg_func(0, info.num_page);   /* initialize */

   if ( save_extraseg )
      {
      if ( (temp_file=open(TEMP_FILE_NAME, O_RDWR|O_CREAT|O_TRUNC|O_BINARY, S_IREAD|S_IWRITE)) == -1 )
         {
         msg = err_no_temp;
         goto ErrorAbort;
         }

      if ( farwrite(temp_file, info.buffer, PRINT_BUFFER_SIZE) != PRINT_BUFFER_SIZE )
         {
         msg = err_badwrite;
         goto ErrorAbort;
         }
      }

   if ( (info.file = fopen(outfname, "wt")) == NULL )
      {
      msg = err_no_out;
      goto ErrorAbort;
      }

   info.margin = PAGE_INDENT;
   info.start_of_line = 1;
   info.spaces = 0;

   success = process_document((PD_FUNC)print_doc_get_info,
                              (PD_FUNC)print_doc_output,   &info);
   fclose(info.file);

   if ( save_extraseg )
      {
      if ( lseek(temp_file, 0L, SEEK_SET) != 0L )
         {
         msg = err_badread;
         goto ErrorAbort;
         }

      if ( farread(temp_file, info.buffer, PRINT_BUFFER_SIZE) != PRINT_BUFFER_SIZE )
         {
         msg = err_badread;
         goto ErrorAbort;
         }
      }

ErrorAbort:
   if (temp_file != -1)
      {
      close(temp_file);
      remove(TEMP_FILE_NAME);
      temp_file = -1;
      }

   if ( msg != NULL )
      {
      helptitle();
      stopmsg(1, msg);
      }

   else if ( msg_func != NULL )
      msg_func((success) ? -1 : -2, info.num_page );
   }
예제 #4
0
/* Open a named physical or virtual file, extract the text from it,
   search for document or page attachments, and process these recursively.
   Either filename must be supplied for physical files, or data+length
   from which a virtual file will be created.
   The caller cannot create the PVF file since we create a new TET object
   here in case an exception happens with the embedded document - the
   caller can happily continue with his TET object even in case of an
   exception here.
*/
static int
process_document(FILE *outfp, const char *filename, const char *realname,
	const unsigned char *data, int length)
{
    TET *tet;

    if ((tet = TET_new()) == (TET *) 0)
    {
        fprintf(stderr, "extractor: out of memory\n");
        return(4);
    }

    TET_TRY (tet)
    {
	const char *pvfname = "/pvf/attachment";
        int doc;
	int file, filecount;
	int page, pagecount;
	const unsigned char *attdata;
	int attlength;
	int objtype;

	/* Construct a PVF file if data instead of a filename was provided */
	if (!filename)
	{
	    TET_create_pvf(tet, pvfname, 0, data, length, "");
	    filename = pvfname;
	}

        TET_set_option(tet, globaloptlist);

        doc = TET_open_document(tet, filename, 0, docoptlist);

        if (doc == -1)
        {
	    fprintf(stderr,
		"Error %d in %s() (source: attachment '%s'): %s\n",
		TET_get_errnum(tet), TET_get_apiname(tet),
		realname, TET_get_errmsg(tet));
            TET_EXIT_TRY(tet);
            TET_delete(tet);
            return(5);
        }

	/* -------------------- Extract the document's own page contents */
	extract_text(tet, doc, outfp);

	/* -------------------- Process all document-level file attachments */

	/* Get the number of document-level file attachments.  */
	filecount = (int) TET_pcos_get_number(tet, doc,
		"length:names/EmbeddedFiles");

	for (file = 0; file < filecount; file++)
	{
	    const char *attname;

	    /* fetch the name of the file attachment; check for Unicode file
             * name (a PDF 1.7 feature)
             */
            objtype = (int) TET_pcos_get_number(tet, doc,
                    "type:names/EmbeddedFiles[%d]/UF", file);

            if (objtype == pcos_ot_string)
            {
                attname = TET_pcos_get_string(tet, doc,
                    "names/EmbeddedFiles[%d]/UF", file);
            }
            else {
                /* fetch the name of the file attachment */
                objtype = (int) TET_pcos_get_number(tet, doc,
                        "type:names/EmbeddedFiles[%d]/F", file);

                if (objtype == pcos_ot_string)
                {
                    attname = TET_pcos_get_string(tet, doc,
                                "names/EmbeddedFiles[%d]/F", file);
                }
                else
                {
                    attname = "(unnamed)";
                }
            }

	    fprintf(outfp, "\n----- File attachment '%s':\n", attname);

	    /* fetch the contents of the file attachment and process it */
	    objtype = (int) TET_pcos_get_number(tet, doc,
		    "type:names/EmbeddedFiles[%d]/EF/F", file);

	    if (objtype == pcos_ot_stream)
	    {
		attdata = TET_pcos_get_stream(tet, doc, &attlength, "",
			"names/EmbeddedFiles[%d]/EF/F", file);

		(void) process_document(outfp, 0, attname, attdata, attlength);
	    }
	}

	/* -------------------- Process all page-level file attachments */

	pagecount = (int) TET_pcos_get_number(tet, doc, "length:pages");

	/* Check all pages for annotations of type FileAttachment */
	for (page = 0; page < pagecount; page++)
	{
	    int annot, annotcount;

	    annotcount = (int) TET_pcos_get_number(tet, doc,
	                    "length:pages[%d]/Annots", page);

	    for (annot = 0; annot < annotcount; annot++)
	    {
		const char *val;
		char attname[128];

                val = TET_pcos_get_string(tet, doc,
			"pages[%d]/Annots[%d]/Subtype", page, annot);

		sprintf(attname, "page %d, annotation %d", page+1, annot+1);

                if (!strcmp(val, "FileAttachment"))
		{
		    /* fetch the contents of the attachment and process it */
		    objtype = (int) TET_pcos_get_number(tet, doc,
			"type:pages[%d]/Annots[%d]/FS/EF/F", page, annot);

		    if (objtype == pcos_ot_stream)
		    {
			attdata = TET_pcos_get_stream(tet, doc, &attlength, "",
			    "pages[%d]/Annots[%d]/FS/EF/F", page, annot);

			(void) process_document(outfp, 0,
				attname, attdata, attlength);
		    }
                }
            }
	}

	TET_close_document(tet, doc);

	/* If there was no PVF file deleting it won't do any harm */
	TET_delete_pvf(tet, pvfname, 0);
    }

    TET_CATCH (tet)
    {
	fprintf(stderr,
	    "Error %d in %s() (source: attachment '%s'): %s\n",
	    TET_get_errnum(tet), TET_get_apiname(tet),
	    realname, TET_get_errmsg(tet));
    }

    TET_delete(tet);

    return(0);
}