Exemplo n.º 1
0
void *process_doc( void *in ) {

  struct process_phash *data = (struct process_phash *)in;
  int thread_id = data->thread_id;
  thread_active[thread_id] = 1; // belt & braces
  o_log(INFORMATION, "[%d] Calculating pHash for docid = %d", thread_id, data->docid );

  // Generate the pHash for this image.
  o_log(DEBUGM, "Calculating pHash for %s", data->filename );
  unsigned long long hash = getImagePhash_fn( data->filename );

  // Save the result back to the DB
  savePhash( data->docid, hash );

  // Cleanup and go home
  free(data->filename);
  free(data);
  thread_active[thread_id] = 0;
  return NULL;
}
Exemplo n.º 2
0
char *internalDoScanningOperation(char *uuid, char *lang) {

  int request_resolution = 0;
  int docid;
  int current_page = 0;
  int total_requested_pages;
  double totbytes = 0;
  SANE_Status status;
  SANE_Handle *openDeviceHandle;
  SANE_Byte *raw_image;
  SANE_Parameters pars;
  char *docid_s;
  char *total_requested_pages_s;
  char *devName;
  char *outFilename;
  char *raw_image_format;
  char *header;

  o_log(DEBUGM, "doScanningOperation: sane initialized uuid(%s)",(char *)uuid);
  updateScanProgress(uuid, SCAN_WAITING_ON_SCANNER, 0);

  // Open the device
  devName = getScanParam(uuid, SCAN_PARAM_DEVNAME);
  o_log(DEBUGM, "sane_open of \"%s\"",devName);
  status = sane_open ((SANE_String_Const) devName, (SANE_Handle)&openDeviceHandle);
  if(status != SANE_STATUS_GOOD) {
    handleSaneErrors("Cannot open device ", devName, status, 0);
    updateScanProgress(uuid, SCAN_ERRO_FROM_SCANNER, status);
    free(devName);
    return 0;
  }
  free(devName);

  /* ========================================================== */
  if ( ! setOptions( (char *)uuid, openDeviceHandle, &request_resolution ) )
    return 0;
  o_log(DEBUGM, "sane_start: setOptions returned request_resolution %d\n",request_resolution);

  int timeout = 5;
  while( 0 < timeout ) {
    status = sane_start (openDeviceHandle);
    if(status == SANE_STATUS_GOOD) {  
      break;
    }
    else {
      if(status == SANE_STATUS_DEVICE_BUSY ) {  
        // BUSY signal could be the scanner just having a 
        // bit of lag - specially network connected devices
        timeout--;
        if ( timeout == 0 ) {
          handleSaneErrors("Cannot start scanning", "even after trying several time", status, 0);
          updateScanProgress(uuid, SCAN_ERRO_FROM_SCANNER, status);
          return 0;
        }
        else {
          o_log(WARNING, "Device reports not ready to 'start', waiting 500ms. Will try another %d times", timeout);
          usleep(500 * 1000); // 500ms or 0.5sec
        }
      }
      else {
        handleSaneErrors("Cannot start scanning", "", status, 0);
        updateScanProgress(uuid, SCAN_ERRO_FROM_SCANNER, status);
        return 0;
      }
    }
  }

  // Get scanning params (from the scanner)
  if( request_resolution == 0 ) {
    o_log(DEBUGM, "Resolution did not get set in scanner setup.");
    updateScanProgress(uuid, SCAN_INTERNAL_ERROR, 10004);
    return 0;
  }

  o_log(DEBUGM, "Get scanning params");
  status = sane_get_parameters (openDeviceHandle, &pars);
  o_log(INFORMATION, "Scanner Parm : stat=%s form=%d,lf=%d,bpl=%d,pixpl=%d,lin=%d,dep=%d",
    sane_strstatus (status),
    pars.format, pars.last_frame,
    pars.bytes_per_line, pars.pixels_per_line,
    pars.lines, pars.depth);

  switch (pars.format) {
    case SANE_FRAME_GRAY:
      o_log(DEBUGM, "Expecting Gray data (1 channel only).");
      raw_image_format = o_strdup( "P5" );
      break;
    case SANE_FRAME_RGB:
      o_log(DEBUGM, "Expecting RGB data (3 channels).");
      raw_image_format = o_strdup( "P6" );
      break;
    default:
      o_log(DEBUGM, "backend returns three frames speratly. We do not currently support this.");
      updateScanProgress(uuid, SCAN_INTERNAL_ERROR, 10003);
      return 0;
      break;
  }
  
  header = o_printf ("%s\n# SANE data follows\n%d %d\n%d\n", 
    raw_image_format, pars.pixels_per_line, pars.lines,
    (pars.depth <= 8) ? 255 : 65535);
  free( raw_image_format );


  // Save Record
  //
  docid_s = getScanParam(uuid, SCAN_PARAM_DOCID);
  total_requested_pages_s = getScanParam(uuid, SCAN_PARAM_REQUESTED_PAGES);
  total_requested_pages = atoi(total_requested_pages_s);
  free(total_requested_pages_s);
  if( docid_s == NULL ) {
    o_log(DEBUGM, "Saving record");
    updateScanProgress(uuid, SCAN_DB_WORKING, 0);

    docid_s = addNewScannedDoc(pars.lines, pars.pixels_per_line, request_resolution, total_requested_pages); 
    setScanParam(uuid, SCAN_PARAM_DOCID, docid_s);
    setScanParam(uuid, SCAN_PARAM_ON_PAGE, "1");
    current_page = 1;
  }
  else {
    char *current_page_s = getScanParam(uuid, SCAN_PARAM_ON_PAGE);
    current_page = atoi(current_page_s);
    free(current_page_s);

    current_page++;

    current_page_s = itoa(current_page, 10);
    setScanParam(uuid, SCAN_PARAM_ON_PAGE, current_page_s);
    free(current_page_s);
  }
  docid = atoi(docid_s);
  free(docid_s);

  totbytes = (double)((pars.bytes_per_line * pars.lines));

  /* ========================================================== */
  raw_image = collectData( (char *)uuid, openDeviceHandle, totbytes, pars.bytes_per_line, header );
  o_log(INFORMATION, "Scanning done.");

  o_log(DEBUGM, "sane_cancel");
  sane_cancel(openDeviceHandle);

  o_log(DEBUGM, "sane_close");
  sane_close(openDeviceHandle);


  // Convert Raw into JPEG
  //
  updateScanProgress(uuid, SCAN_CONVERTING_FORMAT, 0);
  PIX *pix;
  if ( ( pix = pixReadMem( raw_image, (pars.bytes_per_line*pars.lines)+strlen(header) ) ) == NULL) {
    o_log(ERROR, "Could not load the image data into a PIX");
  }
  updateScanProgress(uuid, SCAN_CONVERTING_FORMAT, 55);
  o_log(INFORMATION, "Convertion process: Loaded (depth: %d)", pixGetDepth(pix));
  free(raw_image);
  free(header);

  outFilename = o_printf("%s/scans/%d_%d.jpg", BASE_DIR, docid, current_page);
  pixWrite(outFilename, pix, IFF_JFIF_JPEG);
  free(outFilename);
  updateScanProgress(uuid, SCAN_CONVERTING_FORMAT, 100);
  o_log(INFORMATION, "Conversion process: Complete");




  // Do OCR - on this page
  // - OCR libs just wants the raw data and not the image header
  ocrImage( uuid, docid, current_page, request_resolution, pix, lang );


#ifdef CAN_PHASH
  // Calulate the pHash, so we can compare images later
  if( current_page == 1 ) {
    updateScanProgress(uuid, SCAN_CALULATING_PHASH, 0);
    unsigned long long hash = getImagePhash_px( pix );
    savePhash( docid, hash );
  }
#endif /* CAN_PHASH */
  pixDestroy( &pix );


  // cleaup && What should we do next
  //
  o_log(DEBUGM, "mostly done.");
  if(current_page >= total_requested_pages)
    updateScanProgress(uuid, SCAN_FINISHED, docid);
  else
    updateScanProgress(uuid, SCAN_WAITING_ON_NEW_PAGE, ++current_page);

  o_log(DEBUGM, "Page scan done.");

  return o_strdup("OK"); 
}
Exemplo n.º 3
0
char *uploadfile(char *filename, char *lookForSimilar, char *lang) {

#ifndef CAN_MAGIC
  o_log(ERROR, "Unable to determin the file type, aborting.");
  return NULL;
#else

  int width = 0, height = 0, itype = PLACE_HOLDER;
  char *final_filename, *ocrText = NULL, *tmp;
#ifdef CAN_PDF
  char *thumbext = NULL;
#else
#ifdef CAN_READODF
  char *thumbext = NULL;
#endif /* CAN_READODF */
#endif /* CAN_PDF */
  char *docid;
  char *ftype;
  char *datafile;
  char *thumbfile = NULL;
  PIX *pix;

  datafile = o_printf("/tmp/%s.dat", filename);
  magic_t cookie = magic_open(MAGIC_MIME_TYPE);
  magic_load( cookie, NULL );
  const char *t = magic_file( cookie, datafile );
  ftype = o_strdup( t );
  o_log( ERROR, "Uploaded file looks to be of type: %s", ftype );
  magic_close( cookie );

  // --------------------------------------
  if( 0 == strcmp("application/pdf", ftype) ) {
    itype = PDF_FILETYPE;
#ifdef CAN_PDF
    thumbfile = o_printf("/tmp/%s.thumb", filename);
    ocrText = parse_pdf( datafile, thumbfile ); // pdf_plug.cc [create thumbnail and return body text] 
    thumbext = o_strdup("jpg");
#endif /* CAN_PDF */
    o_log( INFORMATION, "Processed PDF");
  }

  // --------------------------------------
  else if( 0 == strcmp("application/vnd.oasis.opendocument.text", ftype) ) {
    itype = ODF_FILETYPE;
#ifdef CAN_READODF
    thumbfile = o_printf("/tmp/%s.thumb", filename);
    get_odf_Thumb( datafile, thumbfile );
    ocrText = get_odf_Text( datafile ); // odf_plug.c 
    thumbext = o_strdup("png");
#endif /* CAN_READODF */
    o_log( INFORMATION, "Processed ODF doc");
  }

  // --------------------------------------
  else if( 0 == strcmp("image/jpeg", ftype) ) {
    itype = JPG_FILETYPE;
#ifdef CAN_OCR
    PIX *pix_l;
    if ( ( pix_l = pixRead( datafile ) ) == NULL) {
      o_log(ERROR, "Could not load the image data into a PIX");
      return NULL;
    }
    int depth;
    pixGetDimensions( pix_l, &width, &height, &depth );
    o_log(INFORMATION, "Convertion process: Loaded (depth: %d)", depth );
    pix = pixScaleRGBToGrayFast( pix_l, 1, COLOR_GREEN );
    pixDestroy( &pix_l );
    if (pix == NULL ) {
      o_log(ERROR,"Conversion process failed pixScaleRGBToGrayFast! skip ocr");
    }
    else {
      o_log(INFORMATION, "Convertion process: Reduced depth to %d", pixGetDepth(pix));
      ocrText = getTextFromImage(pix, 0, "eng");
    }
#endif /* CAN_OCR */
    o_log( INFORMATION, "Processed JPG doc");
  }

  // --------------------------------------
  else {
    free( ftype );
    free( datafile );
    o_log(ERROR, "unknown file type.");
    return NULL;
  }
  free( ftype );

  // Set a default OCR text string
  if( ocrText == NULL ) {
    ocrText = o_strdup( getString("LOCAL_ocr_default_text", lang ) );
  }

  // Save the record to the DB
  o_log(DEBUGM, "Saving doc import record");
  docid = addNewFileDoc(itype, width, height, ocrText); // ocrText get freed in this method

  // Move the main datafile to the file store location
  final_filename = o_printf("%s/scans/%s", BASE_DIR, docid); // none image imported docs, are stored with no "_x" postfix.
  if( itype == JPG_FILETYPE ) {
    conCat(&final_filename, "_1");
  }
  addFileExt(&final_filename, itype);

  fcopy(datafile, final_filename);
  o_log( DEBUGM, "Moved data file");
  // The original file will be unlinked by the HTTPD process
  free(datafile);

  // Move any thumbnail image to the file store location
  if( thumbfile ) {
    free(final_filename); // This currently holds the main PDG or ODF file.
    final_filename = o_printf("%s/scans/%s_thumb.%s", BASE_DIR, docid, thumbext); // any thumbnails are postfixed with "_thumb"
    fcopy(thumbfile, final_filename);
    o_log( DEBUGM, "Moved thumbnail file");
    unlink(thumbfile);
    free(thumbfile);
    free(thumbext);

#ifdef CAN_PHASH
    o_log( DEBUGM, "About to perform pHash on file");
    unsigned long long hash = getImagePhash_fn( final_filename );
    savePhash( atoi(docid), hash );
#endif /* CAN_PHASH */
  }
  else {
#ifdef CAN_PHASH
    o_log( DEBUGM, "About to perform pHash on pix");
    unsigned long long hash = getImagePhash_px( pix );
    savePhash( atoi(docid), hash );
#endif /* CAN_PHASH */
    pixDestroy( &pix );
  }
  free(final_filename);

  // Should we look for a similar doc, on opening?
  char *findSim = "";
#ifdef CAN_PHASH
  if( lookForSimilar != (void *)NULL ) {
    findSim = "&findSimilar=1";
  }
#endif /*  CAN_PHASH */

  // Open the document for editing.
  tmp = o_printf("<html><HEAD><META HTTP-EQUIV=\"refresh\" CONTENT=\"0;URL=/opendias/docDetail.html?docid=%s%s\"></HEAD><body></body></html>", docid, findSim);
  free(docid);

  return tmp;
#endif /* CAN_MAGIC */
}