Beispiel #1
0
bool TessPDFRenderer::imageToPDFObj(Pix *pix,
                                    char *filename,
                                    long int objnum,
                                    char **pdf_object,
                                    long int *pdf_object_size) {
  size_t n;
  char b0[kBasicBufSize];
  char b1[kBasicBufSize];
  char b2[kBasicBufSize];
  if (!pdf_object_size || !pdf_object)
    return false;
  *pdf_object = nullptr;
  *pdf_object_size = 0;
  if (!filename)
    return false;

  L_Compressed_Data *cid = nullptr;
  const int kJpegQuality = 85;

  int format, sad;
  findFileFormat(filename, &format);
  if (pixGetSpp(pix) == 4 && format == IFF_PNG) {
    Pix *p1 = pixAlphaBlendUniform(pix, 0xffffff00);
    sad = pixGenerateCIData(p1, L_FLATE_ENCODE, 0, 0, &cid);
    pixDestroy(&p1);
  } else {
    sad = l_generateCIDataForPdf(filename, pix, kJpegQuality, &cid);
  }

  if (sad || !cid) {
    l_CIDataDestroy(&cid);
    return false;
  }

  const char *group4 = "";
  const char *filter;
  switch(cid->type) {
    case L_FLATE_ENCODE:
      filter = "/FlateDecode";
      break;
    case L_JPEG_ENCODE:
      filter = "/DCTDecode";
      break;
    case L_G4_ENCODE:
      filter = "/CCITTFaxDecode";
      group4 = "    /K -1\n";
      break;
    case L_JP2K_ENCODE:
      filter = "/JPXDecode";
      break;
    default:
      l_CIDataDestroy(&cid);
      return false;
  }

  // Maybe someday we will accept RGBA but today is not that day.
  // It requires creating an /SMask for the alpha channel.
  // http://stackoverflow.com/questions/14220221
  const char *colorspace;
  if (cid->ncolors > 0) {
    n = snprintf(b0, sizeof(b0),
                 "  /ColorSpace [ /Indexed /DeviceRGB %d %s ]\n",
                 cid->ncolors - 1, cid->cmapdatahex);
    if (n >= sizeof(b0)) {
      l_CIDataDestroy(&cid);
      return false;
    }
    colorspace = b0;
  } else {
    switch (cid->spp) {
      case 1:
        colorspace = "  /ColorSpace /DeviceGray\n";
        break;
      case 3:
        colorspace = "  /ColorSpace /DeviceRGB\n";
        break;
      default:
        l_CIDataDestroy(&cid);
        return false;
    }
  }

  int predictor = (cid->predictor) ? 14 : 1;

  // IMAGE
  n = snprintf(b1, sizeof(b1),
               "%ld 0 obj\n"
               "<<\n"
               "  /Length %ld\n"
               "  /Subtype /Image\n",
               objnum, (unsigned long) cid->nbytescomp);
  if (n >= sizeof(b1)) {
    l_CIDataDestroy(&cid);
    return false;
  }

  n = snprintf(b2, sizeof(b2),
               "  /Width %d\n"
               "  /Height %d\n"
               "  /BitsPerComponent %d\n"
               "  /Filter %s\n"
               "  /DecodeParms\n"
               "  <<\n"
               "    /Predictor %d\n"
               "    /Colors %d\n"
               "%s"
               "    /Columns %d\n"
               "    /BitsPerComponent %d\n"
               "  >>\n"
               ">>\n"
               "stream\n",
               cid->w, cid->h, cid->bps, filter, predictor, cid->spp,
               group4, cid->w, cid->bps);
  if (n >= sizeof(b2)) {
    l_CIDataDestroy(&cid);
    return false;
  }

  const char *b3 =
      "endstream\n"
      "endobj\n";

  size_t b1_len = strlen(b1);
  size_t b2_len = strlen(b2);
  size_t b3_len = strlen(b3);
  size_t colorspace_len = strlen(colorspace);

  *pdf_object_size =
      b1_len + colorspace_len + b2_len + cid->nbytescomp + b3_len;
  *pdf_object = new char[*pdf_object_size];

  char *p = *pdf_object;
  memcpy(p, b1, b1_len);
  p += b1_len;
  memcpy(p, colorspace, colorspace_len);
  p += colorspace_len;
  memcpy(p, b2, b2_len);
  p += b2_len;
  memcpy(p, cid->datacomp, cid->nbytescomp);
  p += cid->nbytescomp;
  memcpy(p, b3, b3_len);
  l_CIDataDestroy(&cid);
  return true;
}
Beispiel #2
0
bool TessPDFRenderer::imageToPDFObj(Pix *pix,
                                    char *filename,
                                    long int objnum,
                                    char **pdf_object,
                                    long int *pdf_object_size) {
    size_t n;
    char b0[kBasicBufSize];
    char b1[kBasicBufSize];
    char b2[kBasicBufSize];
    if (!pdf_object_size || !pdf_object)
        return false;
    *pdf_object = NULL;
    *pdf_object_size = 0;
    if (!filename)
        return false;

    L_COMP_DATA *cid = NULL;
    const int kJpegQuality = 85;

    // TODO(jbreiden) Leptonica 1.71 doesn't correctly handle certain
    // types of PNG files, especially if there are 2 samples per pixel.
    // We can get rid of this logic after Leptonica 1.72 is released and
    // has propagated everywhere. Bug discussion as follows.
    // https://code.google.com/p/tesseract-ocr/issues/detail?id=1300
    int format, sad;
    findFileFormat(filename, &format);
    if (pixGetSpp(pix) == 4 && format == IFF_PNG) {
        pixSetSpp(pix, 3);
        sad = pixGenerateCIData(pix, L_FLATE_ENCODE, 0, 0, &cid);
    } else {
        sad = l_generateCIDataForPdf(filename, pix, kJpegQuality, &cid);
    }

    if (sad || !cid) {
        l_CIDataDestroy(&cid);
        return false;
    }

    const char *group4 = "";
    const char *filter;
    switch (cid->type) {
    case L_FLATE_ENCODE:
        filter = "/FlateDecode";
        break;
    case L_JPEG_ENCODE:
        filter = "/DCTDecode";
        break;
    case L_G4_ENCODE:
        filter = "/CCITTFaxDecode";
        group4 = "    /K -1\n";
        break;
    case L_JP2K_ENCODE:
        filter = "/JPXDecode";
        break;
    default:
        l_CIDataDestroy(&cid);
        return false;
    }

    // Maybe someday we will accept RGBA but today is not that day.
    // It requires creating an /SMask for the alpha channel.
    // http://stackoverflow.com/questions/14220221
    const char *colorspace;
    if (cid->ncolors > 0) {
        n = snprintf(b0, sizeof(b0),
                     "  /ColorSpace [ /Indexed /DeviceRGB %d %s ]\n",
                     cid->ncolors - 1, cid->cmapdatahex);
        if (n >= sizeof(b0)) {
            l_CIDataDestroy(&cid);
            return false;
        }
        colorspace = b0;
    } else {
        switch (cid->spp) {
        case 1:
            colorspace = "  /ColorSpace /DeviceGray\n";
            break;
        case 3:
            colorspace = "  /ColorSpace /DeviceRGB\n";
            break;
        default:
            l_CIDataDestroy(&cid);
            return false;
        }
    }

    int predictor = (cid->predictor) ? 14 : 1;

    // IMAGE
    n = snprintf(b1, sizeof(b1),
                 "%ld 0 obj\n"
                 "<<\n"
                 "  /Length %ld\n"
                 "  /Subtype /Image\n",
                 objnum, (unsigned long) cid->nbytescomp);
    if (n >= sizeof(b1)) {
        l_CIDataDestroy(&cid);
        return false;
    }

    n = snprintf(b2, sizeof(b2),
                 "  /Width %d\n"
                 "  /Height %d\n"
                 "  /BitsPerComponent %d\n"
                 "  /Filter %s\n"
                 "  /DecodeParms\n"
                 "  <<\n"
                 "    /Predictor %d\n"
                 "    /Colors %d\n"
                 "%s"
                 "    /Columns %d\n"
                 "    /BitsPerComponent %d\n"
                 "  >>\n"
                 ">>\n"
                 "stream\n",
                 cid->w, cid->h, cid->bps, filter, predictor, cid->spp,
                 group4, cid->w, cid->bps);
    if (n >= sizeof(b2)) {
        l_CIDataDestroy(&cid);
        return false;
    }

    const char *b3 =
        "endstream\n"
        "endobj\n";

    size_t b1_len = strlen(b1);
    size_t b2_len = strlen(b2);
    size_t b3_len = strlen(b3);
    size_t colorspace_len = strlen(colorspace);

    *pdf_object_size =
        b1_len + colorspace_len + b2_len + cid->nbytescomp + b3_len;
    *pdf_object = new char[*pdf_object_size];
    if (!pdf_object) {
        l_CIDataDestroy(&cid);
        return false;
    }

    char *p = *pdf_object;
    memcpy(p, b1, b1_len);
    p += b1_len;
    memcpy(p, colorspace, colorspace_len);
    p += colorspace_len;
    memcpy(p, b2, b2_len);
    p += b2_len;
    memcpy(p, cid->datacomp, cid->nbytescomp);
    p += cid->nbytescomp;
    memcpy(p, b3, b3_len);
    l_CIDataDestroy(&cid);
    return true;
}
Beispiel #3
0
bool TessPDFRenderer::pixToPDFObj(Pix *pix, long int objnum,
                                  char **pdf_object,
                                  long int *pdf_object_size) {
  if (!pdf_object_size || !pdf_object)
    return false;
  *pdf_object = NULL;
  *pdf_object_size = 0;
  char b0[kBasicBufSize];
  char b1[kBasicBufSize * 2];
  char b2[kBasicBufSize];
  L_COMP_DATA *cid;
  int encoding_type;
  const int kJpegQuality = 85;
  if (selectDefaultPdfEncoding(pix, &encoding_type) != 0)
    return false;
  if (pixGenerateCIData(pix, encoding_type, kJpegQuality, 0, &cid) != 0)
    return false;

  const char *filter;
  switch(encoding_type) {
    case L_FLATE_ENCODE:
      filter = "/FlateDecode";
      break;
    case L_JPEG_ENCODE:
      filter = "/DCTDecode";
      break;
    case L_G4_ENCODE:
      filter = "/CCITTFaxDecode";
      break;
    default:
      return false;
  }

  const char *colorspace;
  if (cid->ncolors > 0) {
    snprintf(b0, sizeof(b0), "[ /Indexed /DeviceRGB %d %s ]",
             cid->ncolors - 1, cid->cmapdatahex);
    colorspace = b0;
  } else {
    switch (cid->spp) {
      case 1:
        colorspace = "/DeviceGray";
        break;
      case 3:
        colorspace = "/DeviceRGB";
        break;
      default:
        return false;
    }
  }

  snprintf(b1, sizeof(b1),
           "%ld 0 obj\n"
           "<<\n"
           "  /Length %lu\n"
           "  /Subtype /Image\n"
           "  /ColorSpace %s\n"
           "  /Width %d\n"
           "  /Height %d\n"
           "  /BitsPerComponent %d\n"
           "  /Filter %s\n"
           "  /DecodeParms\n"
           "  <<\n"
           "    /K -1\n"
           "    /Columns %d\n"
           "  >>\n"
           ">>\n"
           "stream\n",
           objnum, (unsigned long) cid->nbytescomp, colorspace,
           cid->w, cid->h, cid->bps, filter, cid->w);
  size_t b1_len = strlen(b1);

  snprintf(b2, sizeof(b2),
           "\n"
           "endstream\n"
           "endobj\n");
  size_t b2_len = strlen(b2);

  *pdf_object_size = b1_len + cid->nbytescomp + b2_len;
  *pdf_object = new char[*pdf_object_size];
  if (!pdf_object)
    return false;
  memcpy(*pdf_object, b1, b1_len);
  memcpy(*pdf_object + b1_len, cid->datacomp, cid->nbytescomp);
  memcpy(*pdf_object + b1_len + cid->nbytescomp, b2, b2_len);

  return true;
}