bool TessPDFRenderer::AddImageHandler(TessBaseAPI * api) { size_t n; char buf[kBasicBufSize]; Pix *pix = api->GetInputImage(); char *filename = (char *) api->GetInputName(); int ppi = api->GetSourceYResolution(); if (!pix || ppi <= 0) return false; double width = pixGetWidth(pix) * 72.0 / ppi; double height = pixGetHeight(pix) * 72.0 / ppi; // PAGE n = snprintf(buf, sizeof(buf), "%ld 0 obj\n" "<<\n" " /Type /Page\n" " /Parent %ld 0 R\n" " /MediaBox [0 0 %.2f %.2f]\n" " /Contents %ld 0 R\n" " /Resources\n" " <<\n" " /XObject << /Im1 %ld 0 R >>\n" " /ProcSet [ /PDF /Text /ImageB /ImageI /ImageC ]\n" " /Font << /f-0-0 %ld 0 R >>\n" " >>\n" ">>\n" "endobj\n", obj_, 2L, // Pages object width, height, obj_ + 1, // Contents object obj_ + 2, // Image object 3L); // Type0 Font if (n >= sizeof(buf)) return false; pages_.push_back(obj_); AppendPDFObject(buf); // CONTENTS char *pdftext = GetPDFTextObjects(api, width, height); long pdftext_len = strlen(pdftext); unsigned char *pdftext_casted = reinterpret_cast<unsigned char *>(pdftext); size_t len; unsigned char *comp_pdftext = zlibCompress(pdftext_casted, pdftext_len, &len); long comp_pdftext_len = len; n = snprintf(buf, sizeof(buf), "%ld 0 obj\n" "<<\n" " /Length %ld /Filter /FlateDecode\n" ">>\n" "stream\n", obj_, comp_pdftext_len); if (n >= sizeof(buf)) { delete[] pdftext; lept_free(comp_pdftext); return false; } AppendString(buf); long objsize = strlen(buf); AppendData(reinterpret_cast<char *>(comp_pdftext), comp_pdftext_len); objsize += comp_pdftext_len; lept_free(comp_pdftext); delete[] pdftext; const char *b2 = "endstream\n" "endobj\n"; AppendString(b2); objsize += strlen(b2); AppendPDFObjectDIY(objsize); char *pdf_object; if (!imageToPDFObj(pix, filename, obj_, &pdf_object, &objsize)) { return false; } AppendData(pdf_object, objsize); AppendPDFObjectDIY(objsize); delete[] pdf_object; return true; }
void TessPDFRenderer::AppendPDFObject(const char *data) { AppendPDFObjectDIY(strlen(data)); AppendString((const char *) data); }
bool TessPDFRenderer::BeginDocumentHandler() { char buf[kBasicBufSize]; size_t n; n = snprintf(buf, sizeof(buf), "%%PDF-1.5\n" "%%%c%c%c%c\n", 0xDE, 0xAD, 0xBE, 0xEB); if (n >= sizeof(buf)) return false; AppendPDFObject(buf); // CATALOG n = snprintf(buf, sizeof(buf), "1 0 obj\n" "<<\n" " /Type /Catalog\n" " /Pages %ld 0 R\n" ">>\n" "endobj\n", 2L); if (n >= sizeof(buf)) return false; AppendPDFObject(buf); // We are reserving object #2 for the /Pages // object, which I am going to create and write // at the end of the PDF file. AppendPDFObject(""); // TYPE0 FONT n = snprintf(buf, sizeof(buf), "3 0 obj\n" "<<\n" " /BaseFont /GlyphLessFont\n" " /DescendantFonts [ %ld 0 R ]\n" " /Encoding /Identity-H\n" " /Subtype /Type0\n" " /ToUnicode %ld 0 R\n" " /Type /Font\n" ">>\n" "endobj\n", 4L, // CIDFontType2 font 6L // ToUnicode ); if (n >= sizeof(buf)) return false; AppendPDFObject(buf); // CIDFONTTYPE2 n = snprintf(buf, sizeof(buf), "4 0 obj\n" "<<\n" " /BaseFont /GlyphLessFont\n" " /CIDToGIDMap %ld 0 R\n" " /CIDSystemInfo\n" " <<\n" " /Ordering (Identity)\n" " /Registry (Adobe)\n" " /Supplement 0\n" " >>\n" " /FontDescriptor %ld 0 R\n" " /Subtype /CIDFontType2\n" " /Type /Font\n" " /DW %d\n" ">>\n" "endobj\n", 5L, // CIDToGIDMap 7L, // Font descriptor 1000 / kCharWidth); if (n >= sizeof(buf)) return false; AppendPDFObject(buf); // CIDTOGIDMAP const int kCIDToGIDMapSize = 2 * (1 << 16); unsigned char *cidtogidmap = new unsigned char[kCIDToGIDMapSize]; for (int i = 0; i < kCIDToGIDMapSize; i++) { cidtogidmap[i] = (i % 2) ? 1 : 0; } size_t len; unsigned char *comp = zlibCompress(cidtogidmap, kCIDToGIDMapSize, &len); delete[] cidtogidmap; n = snprintf(buf, sizeof(buf), "5 0 obj\n" "<<\n" " /Length %ld /Filter /FlateDecode\n" ">>\n" "stream\n", len); if (n >= sizeof(buf)) { lept_free(comp); return false; } AppendString(buf); long objsize = strlen(buf); AppendData(reinterpret_cast<char *>(comp), len); objsize += len; lept_free(comp); const char *endstream_endobj = "endstream\n" "endobj\n"; AppendString(endstream_endobj); objsize += strlen(endstream_endobj); AppendPDFObjectDIY(objsize); const char *stream = "/CIDInit /ProcSet findresource begin\n" "12 dict begin\n" "begincmap\n" "/CIDSystemInfo\n" "<<\n" " /Registry (Adobe)\n" " /Ordering (UCS)\n" " /Supplement 0\n" ">> def\n" "/CMapName /Adobe-Identify-UCS def\n" "/CMapType 2 def\n" "1 begincodespacerange\n" "<0000> <FFFF>\n" "endcodespacerange\n" "1 beginbfrange\n" "<0000> <FFFF> <0000>\n" "endbfrange\n" "endcmap\n" "CMapName currentdict /CMap defineresource pop\n" "end\n" "end\n"; // TOUNICODE n = snprintf(buf, sizeof(buf), "6 0 obj\n" "<< /Length %lu >>\n" "stream\n" "%s" "endstream\n" "endobj\n", (unsigned long) strlen(stream), stream); if (n >= sizeof(buf)) return false; AppendPDFObject(buf); // FONT DESCRIPTOR const int kCharHeight = 2; // Effect: highlights are half height n = snprintf(buf, sizeof(buf), "7 0 obj\n" "<<\n" " /Ascent %d\n" " /CapHeight %d\n" " /Descent -1\n" // Spec says must be negative " /Flags 5\n" // FixedPitch + Symbolic " /FontBBox [ 0 0 %d %d ]\n" " /FontFile2 %ld 0 R\n" " /FontName /GlyphLessFont\n" " /ItalicAngle 0\n" " /StemV 80\n" " /Type /FontDescriptor\n" ">>\n" "endobj\n", 1000 / kCharHeight, 1000 / kCharHeight, 1000 / kCharWidth, 1000 / kCharHeight, 8L // Font data ); if (n >= sizeof(buf)) return false; AppendPDFObject(buf); n = snprintf(buf, sizeof(buf), "%s/pdf.ttf", datadir_); if (n >= sizeof(buf)) return false; FILE *fp = fopen(buf, "rb"); if (!fp) { tprintf("Can not open file \"%s\"!\n", buf); return false; } fseek(fp, 0, SEEK_END); long int size = ftell(fp); fseek(fp, 0, SEEK_SET); char *buffer = new char[size]; if (fread(buffer, 1, size, fp) != size) { fclose(fp); delete[] buffer; return false; } fclose(fp); // FONTFILE2 n = snprintf(buf, sizeof(buf), "8 0 obj\n" "<<\n" " /Length %ld\n" " /Length1 %ld\n" ">>\n" "stream\n", size, size); if (n >= sizeof(buf)) { delete[] buffer; return false; } AppendString(buf); objsize = strlen(buf); AppendData(buffer, size); delete[] buffer; objsize += size; AppendString(endstream_endobj); objsize += strlen(endstream_endobj); AppendPDFObjectDIY(objsize); return true; }
bool TessPDFRenderer::BeginDocumentHandler() { char buf[kBasicBufSize]; snprintf(buf, sizeof(buf), "%%PDF-1.5\n" "%%%c%c%c%c\n", 0xDE, 0xAD, 0xBE, 0xEB); AppendPDFObject(buf); // CATALOG snprintf(buf, sizeof(buf), "1 0 obj\n" "<<\n" " /Type /Catalog\n" " /Pages %ld 0 R\n" ">>\n" "endobj\n", 2L); AppendPDFObject(buf); // We are reserving object #2 for the /Pages // object, which I am going to create and write // at the end of the PDF file. AppendPDFObject(""); // TYPE0 FONT snprintf(buf, sizeof(buf), "3 0 obj\n" "<<\n" " /BaseFont /GlyphLessFont\n" " /DescendantFonts [ %ld 0 R ]\n" " /Encoding /Identity-H\n" " /Subtype /Type0\n" " /ToUnicode %ld 0 R\n" " /Type /Font\n" ">>\n" "endobj\n", 4L, // CIDFontType2 font 5L // ToUnicode ); AppendPDFObject(buf); // CIDFONTTYPE2 snprintf(buf, sizeof(buf), "4 0 obj\n" "<<\n" " /BaseFont /GlyphLessFont\n" " /CIDToGIDMap /Identity\n" " /CIDSystemInfo\n" " <<\n" " /Ordering (Identity)\n" " /Registry (Adobe)\n" " /Supplement 0\n" " >>\n" " /FontDescriptor %ld 0 R\n" " /Subtype /CIDFontType2\n" " /Type /Font\n" " /DW %d\n" ">>\n" "endobj\n", 6L, // Font descriptor 1000 / kCharWidth); AppendPDFObject(buf); const char *stream = "/CIDInit /ProcSet findresource begin\n" "12 dict begin\n" "begincmap\n" "/CIDSystemInfo\n" "<<\n" " /Registry (Adobe)\n" " /Ordering (UCS)\n" " /Supplement 0\n" ">> def\n" "/CMapName /Adobe-Identify-UCS def\n" "/CMapType 2 def\n" "1 begincodespacerange\n" "<0000> <FFFF>\n" "endcodespacerange\n" "1 beginbfrange\n" "<0000> <FFFF> <0000>\n" "endbfrange\n" "endcmap\n" "CMapName currentdict /CMap defineresource pop\n" "end\n" "end\n"; // TOUNICODE snprintf(buf, sizeof(buf), "5 0 obj\n" "<< /Length %lu >>\n" "stream\n" "%s" "endstream\n" "endobj\n", (unsigned long) strlen(stream), stream); AppendPDFObject(buf); // FONT DESCRIPTOR const int kCharHeight = 2; // Effect: highlights are half height snprintf(buf, sizeof(buf), "6 0 obj\n" "<<\n" " /Ascent %d\n" " /CapHeight %d\n" " /Descent -1\n" // Spec says must be negative " /Flags 5\n" // FixedPitch + Symbolic " /FontBBox [ 0 0 %d %d ]\n" " /FontFile2 %ld 0 R\n" " /FontName /GlyphLessFont\n" " /ItalicAngle 0\n" " /StemV 80\n" " /Type /FontDescriptor\n" ">>\n" "endobj\n", 1000 / kCharHeight, 1000 / kCharHeight, 1000 / kCharWidth, 1000 / kCharHeight, 7L // Font data ); AppendPDFObject(buf); snprintf(buf, sizeof(buf), "%s/pdf.ttf", datadir_); FILE *fp = fopen(buf, "rb"); if (!fp) return false; fseek(fp, 0, SEEK_END); long int size = ftell(fp); fseek(fp, 0, SEEK_SET); char *buffer = new char[size]; fread(buffer, 1, size, fp); fclose(fp); // FONTFILE2 snprintf(buf, sizeof(buf), "7 0 obj\n" "<<\n" " /Length %ld\n" " /Length1 %ld\n" ">>\n" "stream\n", size, size); AppendString(buf); size_t objsize = strlen(buf); AppendData(buffer, size); objsize += size; snprintf(buf, sizeof(buf), "endstream\n" "endobj\n"); AppendString(buf); objsize += strlen(buf); AppendPDFObjectDIY(objsize); return true; }