bool TessPDFRenderer::imageToPDFObj(Pix *pix, char *filename, long int objnum, char **pdf_object, long int *pdf_object_size) { size_t n; char b0[kBasicBufSize]; char b1[kBasicBufSize]; char b2[kBasicBufSize]; if (!pdf_object_size || !pdf_object) return false; *pdf_object = nullptr; *pdf_object_size = 0; if (!filename) return false; L_Compressed_Data *cid = nullptr; const int kJpegQuality = 85; int format, sad; findFileFormat(filename, &format); if (pixGetSpp(pix) == 4 && format == IFF_PNG) { Pix *p1 = pixAlphaBlendUniform(pix, 0xffffff00); sad = pixGenerateCIData(p1, L_FLATE_ENCODE, 0, 0, &cid); pixDestroy(&p1); } else { sad = l_generateCIDataForPdf(filename, pix, kJpegQuality, &cid); } if (sad || !cid) { l_CIDataDestroy(&cid); return false; } const char *group4 = ""; const char *filter; switch(cid->type) { case L_FLATE_ENCODE: filter = "/FlateDecode"; break; case L_JPEG_ENCODE: filter = "/DCTDecode"; break; case L_G4_ENCODE: filter = "/CCITTFaxDecode"; group4 = " /K -1\n"; break; case L_JP2K_ENCODE: filter = "/JPXDecode"; break; default: l_CIDataDestroy(&cid); return false; } // Maybe someday we will accept RGBA but today is not that day. // It requires creating an /SMask for the alpha channel. // http://stackoverflow.com/questions/14220221 const char *colorspace; if (cid->ncolors > 0) { n = snprintf(b0, sizeof(b0), " /ColorSpace [ /Indexed /DeviceRGB %d %s ]\n", cid->ncolors - 1, cid->cmapdatahex); if (n >= sizeof(b0)) { l_CIDataDestroy(&cid); return false; } colorspace = b0; } else { switch (cid->spp) { case 1: colorspace = " /ColorSpace /DeviceGray\n"; break; case 3: colorspace = " /ColorSpace /DeviceRGB\n"; break; default: l_CIDataDestroy(&cid); return false; } } int predictor = (cid->predictor) ? 14 : 1; // IMAGE n = snprintf(b1, sizeof(b1), "%ld 0 obj\n" "<<\n" " /Length %ld\n" " /Subtype /Image\n", objnum, (unsigned long) cid->nbytescomp); if (n >= sizeof(b1)) { l_CIDataDestroy(&cid); return false; } n = snprintf(b2, sizeof(b2), " /Width %d\n" " /Height %d\n" " /BitsPerComponent %d\n" " /Filter %s\n" " /DecodeParms\n" " <<\n" " /Predictor %d\n" " /Colors %d\n" "%s" " /Columns %d\n" " /BitsPerComponent %d\n" " >>\n" ">>\n" "stream\n", cid->w, cid->h, cid->bps, filter, predictor, cid->spp, group4, cid->w, cid->bps); if (n >= sizeof(b2)) { l_CIDataDestroy(&cid); return false; } const char *b3 = "endstream\n" "endobj\n"; size_t b1_len = strlen(b1); size_t b2_len = strlen(b2); size_t b3_len = strlen(b3); size_t colorspace_len = strlen(colorspace); *pdf_object_size = b1_len + colorspace_len + b2_len + cid->nbytescomp + b3_len; *pdf_object = new char[*pdf_object_size]; char *p = *pdf_object; memcpy(p, b1, b1_len); p += b1_len; memcpy(p, colorspace, colorspace_len); p += colorspace_len; memcpy(p, b2, b2_len); p += b2_len; memcpy(p, cid->datacomp, cid->nbytescomp); p += cid->nbytescomp; memcpy(p, b3, b3_len); l_CIDataDestroy(&cid); return true; }
bool TessPDFRenderer::imageToPDFObj(Pix *pix, char *filename, long int objnum, char **pdf_object, long int *pdf_object_size) { size_t n; char b0[kBasicBufSize]; char b1[kBasicBufSize]; char b2[kBasicBufSize]; if (!pdf_object_size || !pdf_object) return false; *pdf_object = NULL; *pdf_object_size = 0; if (!filename) return false; L_COMP_DATA *cid = NULL; const int kJpegQuality = 85; // TODO(jbreiden) Leptonica 1.71 doesn't correctly handle certain // types of PNG files, especially if there are 2 samples per pixel. // We can get rid of this logic after Leptonica 1.72 is released and // has propagated everywhere. Bug discussion as follows. // https://code.google.com/p/tesseract-ocr/issues/detail?id=1300 int format, sad; findFileFormat(filename, &format); if (pixGetSpp(pix) == 4 && format == IFF_PNG) { pixSetSpp(pix, 3); sad = pixGenerateCIData(pix, L_FLATE_ENCODE, 0, 0, &cid); } else { sad = l_generateCIDataForPdf(filename, pix, kJpegQuality, &cid); } if (sad || !cid) { l_CIDataDestroy(&cid); return false; } const char *group4 = ""; const char *filter; switch (cid->type) { case L_FLATE_ENCODE: filter = "/FlateDecode"; break; case L_JPEG_ENCODE: filter = "/DCTDecode"; break; case L_G4_ENCODE: filter = "/CCITTFaxDecode"; group4 = " /K -1\n"; break; case L_JP2K_ENCODE: filter = "/JPXDecode"; break; default: l_CIDataDestroy(&cid); return false; } // Maybe someday we will accept RGBA but today is not that day. // It requires creating an /SMask for the alpha channel. // http://stackoverflow.com/questions/14220221 const char *colorspace; if (cid->ncolors > 0) { n = snprintf(b0, sizeof(b0), " /ColorSpace [ /Indexed /DeviceRGB %d %s ]\n", cid->ncolors - 1, cid->cmapdatahex); if (n >= sizeof(b0)) { l_CIDataDestroy(&cid); return false; } colorspace = b0; } else { switch (cid->spp) { case 1: colorspace = " /ColorSpace /DeviceGray\n"; break; case 3: colorspace = " /ColorSpace /DeviceRGB\n"; break; default: l_CIDataDestroy(&cid); return false; } } int predictor = (cid->predictor) ? 14 : 1; // IMAGE n = snprintf(b1, sizeof(b1), "%ld 0 obj\n" "<<\n" " /Length %ld\n" " /Subtype /Image\n", objnum, (unsigned long) cid->nbytescomp); if (n >= sizeof(b1)) { l_CIDataDestroy(&cid); return false; } n = snprintf(b2, sizeof(b2), " /Width %d\n" " /Height %d\n" " /BitsPerComponent %d\n" " /Filter %s\n" " /DecodeParms\n" " <<\n" " /Predictor %d\n" " /Colors %d\n" "%s" " /Columns %d\n" " /BitsPerComponent %d\n" " >>\n" ">>\n" "stream\n", cid->w, cid->h, cid->bps, filter, predictor, cid->spp, group4, cid->w, cid->bps); if (n >= sizeof(b2)) { l_CIDataDestroy(&cid); return false; } const char *b3 = "endstream\n" "endobj\n"; size_t b1_len = strlen(b1); size_t b2_len = strlen(b2); size_t b3_len = strlen(b3); size_t colorspace_len = strlen(colorspace); *pdf_object_size = b1_len + colorspace_len + b2_len + cid->nbytescomp + b3_len; *pdf_object = new char[*pdf_object_size]; if (!pdf_object) { l_CIDataDestroy(&cid); return false; } char *p = *pdf_object; memcpy(p, b1, b1_len); p += b1_len; memcpy(p, colorspace, colorspace_len); p += colorspace_len; memcpy(p, b2, b2_len); p += b2_len; memcpy(p, cid->datacomp, cid->nbytescomp); p += cid->nbytescomp; memcpy(p, b3, b3_len); l_CIDataDestroy(&cid); return true; }
bool TessPDFRenderer::pixToPDFObj(Pix *pix, long int objnum, char **pdf_object, long int *pdf_object_size) { if (!pdf_object_size || !pdf_object) return false; *pdf_object = NULL; *pdf_object_size = 0; char b0[kBasicBufSize]; char b1[kBasicBufSize * 2]; char b2[kBasicBufSize]; L_COMP_DATA *cid; int encoding_type; const int kJpegQuality = 85; if (selectDefaultPdfEncoding(pix, &encoding_type) != 0) return false; if (pixGenerateCIData(pix, encoding_type, kJpegQuality, 0, &cid) != 0) return false; const char *filter; switch(encoding_type) { case L_FLATE_ENCODE: filter = "/FlateDecode"; break; case L_JPEG_ENCODE: filter = "/DCTDecode"; break; case L_G4_ENCODE: filter = "/CCITTFaxDecode"; break; default: return false; } const char *colorspace; if (cid->ncolors > 0) { snprintf(b0, sizeof(b0), "[ /Indexed /DeviceRGB %d %s ]", cid->ncolors - 1, cid->cmapdatahex); colorspace = b0; } else { switch (cid->spp) { case 1: colorspace = "/DeviceGray"; break; case 3: colorspace = "/DeviceRGB"; break; default: return false; } } snprintf(b1, sizeof(b1), "%ld 0 obj\n" "<<\n" " /Length %lu\n" " /Subtype /Image\n" " /ColorSpace %s\n" " /Width %d\n" " /Height %d\n" " /BitsPerComponent %d\n" " /Filter %s\n" " /DecodeParms\n" " <<\n" " /K -1\n" " /Columns %d\n" " >>\n" ">>\n" "stream\n", objnum, (unsigned long) cid->nbytescomp, colorspace, cid->w, cid->h, cid->bps, filter, cid->w); size_t b1_len = strlen(b1); snprintf(b2, sizeof(b2), "\n" "endstream\n" "endobj\n"); size_t b2_len = strlen(b2); *pdf_object_size = b1_len + cid->nbytescomp + b2_len; *pdf_object = new char[*pdf_object_size]; if (!pdf_object) return false; memcpy(*pdf_object, b1, b1_len); memcpy(*pdf_object + b1_len, cid->datacomp, cid->nbytescomp); memcpy(*pdf_object + b1_len + cid->nbytescomp, b2, b2_len); return true; }