void *process_doc( void *in ) { struct process_phash *data = (struct process_phash *)in; int thread_id = data->thread_id; thread_active[thread_id] = 1; // belt & braces o_log(INFORMATION, "[%d] Calculating pHash for docid = %d", thread_id, data->docid ); // Generate the pHash for this image. o_log(DEBUGM, "Calculating pHash for %s", data->filename ); unsigned long long hash = getImagePhash_fn( data->filename ); // Save the result back to the DB savePhash( data->docid, hash ); // Cleanup and go home free(data->filename); free(data); thread_active[thread_id] = 0; return NULL; }
char *uploadfile(char *filename, char *lookForSimilar, char *lang) { #ifndef CAN_MAGIC o_log(ERROR, "Unable to determin the file type, aborting."); return NULL; #else int width = 0, height = 0, itype = PLACE_HOLDER; char *final_filename, *ocrText = NULL, *tmp; #ifdef CAN_PDF char *thumbext = NULL; #else #ifdef CAN_READODF char *thumbext = NULL; #endif /* CAN_READODF */ #endif /* CAN_PDF */ char *docid; char *ftype; char *datafile; char *thumbfile = NULL; PIX *pix; datafile = o_printf("/tmp/%s.dat", filename); magic_t cookie = magic_open(MAGIC_MIME_TYPE); magic_load( cookie, NULL ); const char *t = magic_file( cookie, datafile ); ftype = o_strdup( t ); o_log( ERROR, "Uploaded file looks to be of type: %s", ftype ); magic_close( cookie ); // -------------------------------------- if( 0 == strcmp("application/pdf", ftype) ) { itype = PDF_FILETYPE; #ifdef CAN_PDF thumbfile = o_printf("/tmp/%s.thumb", filename); ocrText = parse_pdf( datafile, thumbfile ); // pdf_plug.cc [create thumbnail and return body text] thumbext = o_strdup("jpg"); #endif /* CAN_PDF */ o_log( INFORMATION, "Processed PDF"); } // -------------------------------------- else if( 0 == strcmp("application/vnd.oasis.opendocument.text", ftype) ) { itype = ODF_FILETYPE; #ifdef CAN_READODF thumbfile = o_printf("/tmp/%s.thumb", filename); get_odf_Thumb( datafile, thumbfile ); ocrText = get_odf_Text( datafile ); // odf_plug.c thumbext = o_strdup("png"); #endif /* CAN_READODF */ o_log( INFORMATION, "Processed ODF doc"); } // -------------------------------------- else if( 0 == strcmp("image/jpeg", ftype) ) { itype = JPG_FILETYPE; #ifdef CAN_OCR PIX *pix_l; if ( ( pix_l = pixRead( datafile ) ) == NULL) { o_log(ERROR, "Could not load the image data into a PIX"); return NULL; } int depth; pixGetDimensions( pix_l, &width, &height, &depth ); o_log(INFORMATION, "Convertion process: Loaded (depth: %d)", depth ); pix = pixScaleRGBToGrayFast( pix_l, 1, COLOR_GREEN ); pixDestroy( &pix_l ); if (pix == NULL ) { o_log(ERROR,"Conversion process failed pixScaleRGBToGrayFast! skip ocr"); } else { o_log(INFORMATION, "Convertion process: Reduced depth to %d", pixGetDepth(pix)); ocrText = getTextFromImage(pix, 0, "eng"); } #endif /* CAN_OCR */ o_log( INFORMATION, "Processed JPG doc"); } // -------------------------------------- else { free( ftype ); free( datafile ); o_log(ERROR, "unknown file type."); return NULL; } free( ftype ); // Set a default OCR text string if( ocrText == NULL ) { ocrText = o_strdup( getString("LOCAL_ocr_default_text", lang ) ); } // Save the record to the DB o_log(DEBUGM, "Saving doc import record"); docid = addNewFileDoc(itype, width, height, ocrText); // ocrText get freed in this method // Move the main datafile to the file store location final_filename = o_printf("%s/scans/%s", BASE_DIR, docid); // none image imported docs, are stored with no "_x" postfix. if( itype == JPG_FILETYPE ) { conCat(&final_filename, "_1"); } addFileExt(&final_filename, itype); fcopy(datafile, final_filename); o_log( DEBUGM, "Moved data file"); // The original file will be unlinked by the HTTPD process free(datafile); // Move any thumbnail image to the file store location if( thumbfile ) { free(final_filename); // This currently holds the main PDG or ODF file. final_filename = o_printf("%s/scans/%s_thumb.%s", BASE_DIR, docid, thumbext); // any thumbnails are postfixed with "_thumb" fcopy(thumbfile, final_filename); o_log( DEBUGM, "Moved thumbnail file"); unlink(thumbfile); free(thumbfile); free(thumbext); #ifdef CAN_PHASH o_log( DEBUGM, "About to perform pHash on file"); unsigned long long hash = getImagePhash_fn( final_filename ); savePhash( atoi(docid), hash ); #endif /* CAN_PHASH */ } else { #ifdef CAN_PHASH o_log( DEBUGM, "About to perform pHash on pix"); unsigned long long hash = getImagePhash_px( pix ); savePhash( atoi(docid), hash ); #endif /* CAN_PHASH */ pixDestroy( &pix ); } free(final_filename); // Should we look for a similar doc, on opening? char *findSim = ""; #ifdef CAN_PHASH if( lookForSimilar != (void *)NULL ) { findSim = "&findSimilar=1"; } #endif /* CAN_PHASH */ // Open the document for editing. tmp = o_printf("<html><HEAD><META HTTP-EQUIV=\"refresh\" CONTENT=\"0;URL=/opendias/docDetail.html?docid=%s%s\"></HEAD><body></body></html>", docid, findSim); free(docid); return tmp; #endif /* CAN_MAGIC */ }