/* Returns 1 on error */ static l_int32 testcomp(const char *filename, PIX *pix, l_int32 comptype) { l_int32 format, sameformat, sameimage; FILE *fp; PIX *pixt; fp = lept_fopen(filename, "rb"); findFileFormatStream(fp, &format); sameformat = TRUE; if (format != comptype) { fprintf(stderr, "File %s has format %d, not comptype %d\n", filename, format, comptype); sameformat = FALSE; } lept_fclose(fp); pixt = pixRead(filename); pixEqual(pix, pixt, &sameimage); pixDestroy(&pixt); if (!sameimage) fprintf(stderr, "Write/read fail for file %s with format %d\n", filename, format); return (!sameformat || !sameimage); }
/*! * fileFormatIsTiff() * * Input: fp (file stream) * Return: 1 if file is tiff; 0 otherwise or on error */ l_int32 fileFormatIsTiff(FILE *fp) { l_int32 format; PROCNAME("fileFormatIsTiff"); if (!fp) return ERROR_INT("stream not defined", procName, 0); findFileFormatStream(fp, &format); if (format == IFF_TIFF || format == IFF_TIFF_PACKBITS || format == IFF_TIFF_RLE || format == IFF_TIFF_G3 || format == IFF_TIFF_G4 || format == IFF_TIFF_LZW || format == IFF_TIFF_ZIP) return 1; else return 0; }
/*! * findFileFormat() * * Input: filename * &format (<return>) * Return: 0 if OK, 1 on error or if format is not recognized */ l_int32 findFileFormat(const char *filename, l_int32 *pformat) { l_int32 ret; FILE *fp; PROCNAME("findFileFormat"); if (!pformat) return ERROR_INT("&format not defined", procName, 1); *pformat = IFF_UNKNOWN; if (!filename) return ERROR_INT("filename not defined", procName, 1); if ((fp = fopenReadStream(filename)) == NULL) return ERROR_INT("image file not found", procName, 1); ret = findFileFormatStream(fp, pformat); fclose(fp); return ret; }
/*! * pixReadHeader() * * Input: filename (with full pathname or in local directory) * &format (<optional return> file format) * &w, &h (<optional returns> width and height) * &bps <optional return> bits/sample * &spp <optional return> samples/pixel (1, 3 or 4) * &iscmap (<optional return> 1 if cmap exists; 0 otherwise) * Return: 0 if OK, 1 on error * * Notes: * (1) This reads the actual headers for jpeg, png, tiff and pnm. * For bmp and gif, we cheat and read the entire file into a pix, * from which we extract the "header" information. */ l_int32 pixReadHeader(const char *filename, l_int32 *pformat, l_int32 *pw, l_int32 *ph, l_int32 *pbps, l_int32 *pspp, l_int32 *piscmap) { l_int32 format, ret, w, h, d, bps, spp, iscmap; l_int32 type; /* ignored */ FILE *fp; PIX *pix; PROCNAME("pixReadHeader"); if (pw) *pw = 0; if (ph) *ph = 0; if (pbps) *pbps = 0; if (pspp) *pspp = 0; if (piscmap) *piscmap = 0; if (pformat) *pformat = 0; iscmap = 0; /* init to false */ if (!filename) return ERROR_INT("filename not defined", procName, 1); if ((fp = fopenReadStream(filename)) == NULL) return ERROR_INT("image file not found", procName, 1); findFileFormatStream(fp, &format); fclose(fp); switch (format) { case IFF_BMP: /* cheating: reading the entire file */ if ((pix = pixRead(filename)) == NULL) return ERROR_INT( "bmp: pix not read", procName, 1); pixGetDimensions(pix, &w, &h, &d); if (pixGetColormap(pix)) iscmap = 1; pixDestroy(&pix); bps = (d == 32) ? 8 : d; spp = (d == 32) ? 3 : 1; break; case IFF_JFIF_JPEG: ret = readHeaderJpeg(filename, &w, &h, &spp, NULL, NULL); bps = 8; if (ret) return ERROR_INT( "jpeg: no header info returned", procName, 1); break; case IFF_PNG: ret = readHeaderPng(filename, &w, &h, &bps, &spp, &iscmap); if (ret) return ERROR_INT( "png: no header info returned", procName, 1); break; case IFF_TIFF: case IFF_TIFF_PACKBITS: case IFF_TIFF_RLE: case IFF_TIFF_G3: case IFF_TIFF_G4: case IFF_TIFF_LZW: case IFF_TIFF_ZIP: /* Reading page 0 by default; possibly redefine format */ ret = readHeaderTiff(filename, 0, &w, &h, &bps, &spp, NULL, &iscmap, &format); if (ret) return ERROR_INT( "tiff: no header info returned", procName, 1); break; case IFF_PNM: ret = readHeaderPnm(filename, &w, &h, &d, &type, &bps, &spp); if (ret) return ERROR_INT( "pnm: no header info returned", procName, 1); break; case IFF_GIF: /* cheating: reading the entire file */ if ((pix = pixRead(filename)) == NULL) return ERROR_INT( "gif: pix not read", procName, 1); pixGetDimensions(pix, &w, &h, &d); pixDestroy(&pix); iscmap = 1; /* always colormapped; max 256 colors */ spp = 1; bps = d; break; case IFF_JP2: ret = readHeaderJp2k(filename, &w, &h, &bps, &spp); break; case IFF_WEBP: if (readHeaderWebP(filename, &w, &h, &spp)) return ERROR_INT( "webp: no header info returned", procName, 1); bps = 8; break; case IFF_SPIX: ret = readHeaderSpix(filename, &w, &h, &bps, &spp, &iscmap); if (ret) return ERROR_INT( "spix: no header info returned", procName, 1); break; case IFF_UNKNOWN: L_ERROR("unknown format in file %s\n", procName, filename); return 1; break; } if (pw) *pw = w; if (ph) *ph = h; if (pbps) *pbps = bps; if (pspp) *pspp = spp; if (piscmap) *piscmap = iscmap; if (pformat) *pformat = format; return 0; }
/*! * pixReadStream() * * Input: fp (file stream) * hint (bitwise OR of L_HINT_* values for jpeg; use 0 for no hint) * Return: pix if OK; null on error * * Notes: * (1) The hint only applies to jpeg. */ PIX * pixReadStream(FILE *fp, l_int32 hint) { l_int32 format, ret; l_uint8 *comment; PIX *pix; PROCNAME("pixReadStream"); if (!fp) return (PIX *)ERROR_PTR("stream not defined", procName, NULL); pix = NULL; findFileFormatStream(fp, &format); switch (format) { case IFF_BMP: if ((pix = pixReadStreamBmp(fp)) == NULL ) return (PIX *)ERROR_PTR( "bmp: no pix returned", procName, NULL); break; // case IFF_JFIF_JPEG: // if ((pix = pixReadStreamJpeg(fp, READ_24_BIT_COLOR, 1, NULL, hint)) // == NULL) // return (PIX *)ERROR_PTR( "jpeg: no pix returned", procName, NULL); // ret = fgetJpegComment(fp, &comment); // if (!ret && comment) // pixSetText(pix, (char *)comment); // FREE(comment); // break; case IFF_PNG: if ((pix = pixReadStreamPng(fp)) == NULL) return (PIX *)ERROR_PTR("png: no pix returned", procName, NULL); break; case IFF_TIFF: case IFF_TIFF_PACKBITS: case IFF_TIFF_RLE: case IFF_TIFF_G3: case IFF_TIFF_G4: case IFF_TIFF_LZW: case IFF_TIFF_ZIP: if ((pix = pixReadStreamTiff(fp, 0)) == NULL) /* page 0 by default */ return (PIX *)ERROR_PTR("tiff: no pix returned", procName, NULL); break; case IFF_PNM: if ((pix = pixReadStreamPnm(fp)) == NULL) return (PIX *)ERROR_PTR("pnm: no pix returned", procName, NULL); break; case IFF_GIF: if ((pix = pixReadStreamGif(fp)) == NULL) return (PIX *)ERROR_PTR("gif: no pix returned", procName, NULL); break; case IFF_JP2: if ((pix = pixReadStreamJp2k(fp, 1, NULL, 0)) == NULL) return (PIX *)ERROR_PTR("jp2: no pix returned", procName, NULL); break; case IFF_WEBP: if ((pix = pixReadStreamWebP(fp)) == NULL) return (PIX *)ERROR_PTR("webp: no pix returned", procName, NULL); break; case IFF_SPIX: if ((pix = pixReadStreamSpix(fp)) == NULL) return (PIX *)ERROR_PTR("spix: no pix returned", procName, NULL); break; case IFF_UNKNOWN: return (PIX *)ERROR_PTR( "Unknown format: no pix returned", procName, NULL); break; } if (pix) pixSetInputFormat(pix, format); return pix; }
// TODO(jbreiden) I hear that you can pull the flate stream out // of a PNG file and, by mentioning the predictor in the PDF object, // make most of them work without transcoding. If so that's a big win // versus what we do now. Try it out. bool TessPDFRenderer::fileToPDFObj(char *filename, long int objnum, char **pdf_object, long int *pdf_object_size) { char b1[kBasicBufSize]; char b2[kBasicBufSize]; if (!pdf_object_size || !pdf_object) return false; *pdf_object = NULL; *pdf_object_size = 0; if (!filename) return false; FILE *fp = fopen(filename, "rb"); if (!fp) return false; int format; findFileFormatStream(fp, &format); if (format != IFF_JFIF_JPEG) { fclose(fp); return false; } fseek(fp, 0, SEEK_END); long int jpeg_size = ftell(fp); fseek(fp, 0, SEEK_SET); int spp, cmyk, w, h; freadHeaderJpeg(fp, &w, &h, &spp, NULL, &cmyk); const char *colorspace; switch (spp) { case 1: colorspace = "/DeviceGray"; break; case 3: colorspace = "/DeviceRGB"; break; case 4: if (cmyk) colorspace = "/DeviceCMYK"; else return false; break; default: return false; } // IMAGE snprintf(b1, sizeof(b1), "%ld 0 obj\n" "<<\n" " /Length %ld\n" " /Subtype /Image\n" " /ColorSpace %s\n" " /Width %d\n" " /Height %d\n" " /BitsPerComponent 8\n" " /Filter /DCTDecode\n" ">>\n" "stream\n", objnum, jpeg_size, colorspace, w, h); size_t b1_len = strlen(b1); snprintf(b2, sizeof(b2), "\n" "endstream\n" "endobj\n"); size_t b2_len = strlen(b2); *pdf_object_size = b1_len + jpeg_size + b2_len; *pdf_object = new char[*pdf_object_size]; if (!pdf_object) return false; memcpy(*pdf_object, b1, b1_len); if (static_cast<int>(fread(*pdf_object + b1_len, 1, jpeg_size, fp)) != jpeg_size) { delete[] pdf_object; return false; } memcpy(*pdf_object + b1_len + jpeg_size, b2, b2_len); fclose(fp); return true; }