Ejemplo n.º 1
0
bool TessPDFRenderer::AddImageHandler(TessBaseAPI * api) {
    size_t n;
    char buf[kBasicBufSize];
    Pix *pix = api->GetInputImage();
    char *filename = (char *) api->GetInputName();
    int ppi = api->GetSourceYResolution();
    if (!pix || ppi <= 0)
        return false;
    double width = pixGetWidth(pix) * 72.0 / ppi;
    double height = pixGetHeight(pix) * 72.0 / ppi;

    // PAGE
    n = snprintf(buf, sizeof(buf),
                 "%ld 0 obj\n"
                 "<<\n"
                 "  /Type /Page\n"
                 "  /Parent %ld 0 R\n"
                 "  /MediaBox [0 0 %.2f %.2f]\n"
                 "  /Contents %ld 0 R\n"
                 "  /Resources\n"
                 "  <<\n"
                 "    /XObject << /Im1 %ld 0 R >>\n"
                 "    /ProcSet [ /PDF /Text /ImageB /ImageI /ImageC ]\n"
                 "    /Font << /f-0-0 %ld 0 R >>\n"
                 "  >>\n"
                 ">>\n"
                 "endobj\n",
                 obj_,
                 2L,            // Pages object
                 width,
                 height,
                 obj_ + 1,      // Contents object
                 obj_ + 2,      // Image object
                 3L);           // Type0 Font
    if (n >= sizeof(buf)) return false;
    pages_.push_back(obj_);
    AppendPDFObject(buf);

    // CONTENTS
    char *pdftext = GetPDFTextObjects(api, width, height);
    long pdftext_len = strlen(pdftext);
    unsigned char *pdftext_casted = reinterpret_cast<unsigned char *>(pdftext);
    size_t len;
    unsigned char *comp_pdftext =
        zlibCompress(pdftext_casted, pdftext_len, &len);
    long comp_pdftext_len = len;
    n = snprintf(buf, sizeof(buf),
                 "%ld 0 obj\n"
                 "<<\n"
                 "  /Length %ld /Filter /FlateDecode\n"
                 ">>\n"
                 "stream\n", obj_, comp_pdftext_len);
    if (n >= sizeof(buf)) {
        delete[] pdftext;
        lept_free(comp_pdftext);
        return false;
    }
    AppendString(buf);
    long objsize = strlen(buf);
    AppendData(reinterpret_cast<char *>(comp_pdftext), comp_pdftext_len);
    objsize += comp_pdftext_len;
    lept_free(comp_pdftext);
    delete[] pdftext;
    const char *b2 =
        "endstream\n"
        "endobj\n";
    AppendString(b2);
    objsize += strlen(b2);
    AppendPDFObjectDIY(objsize);

    char *pdf_object;
    if (!imageToPDFObj(pix, filename, obj_, &pdf_object, &objsize)) {
        return false;
    }
    AppendData(pdf_object, objsize);
    AppendPDFObjectDIY(objsize);
    delete[] pdf_object;
    return true;
}
Ejemplo n.º 2
0
void TessPDFRenderer::AppendPDFObject(const char *data) {
    AppendPDFObjectDIY(strlen(data));
    AppendString((const char *) data);
}
Ejemplo n.º 3
0
bool TessPDFRenderer::BeginDocumentHandler() {
    char buf[kBasicBufSize];
    size_t n;

    n = snprintf(buf, sizeof(buf),
                 "%%PDF-1.5\n"
                 "%%%c%c%c%c\n",
                 0xDE, 0xAD, 0xBE, 0xEB);
    if (n >= sizeof(buf)) return false;
    AppendPDFObject(buf);

    // CATALOG
    n = snprintf(buf, sizeof(buf),
                 "1 0 obj\n"
                 "<<\n"
                 "  /Type /Catalog\n"
                 "  /Pages %ld 0 R\n"
                 ">>\n"
                 "endobj\n",
                 2L);
    if (n >= sizeof(buf)) return false;
    AppendPDFObject(buf);

    // We are reserving object #2 for the /Pages
    // object, which I am going to create and write
    // at the end of the PDF file.
    AppendPDFObject("");

    // TYPE0 FONT
    n = snprintf(buf, sizeof(buf),
                 "3 0 obj\n"
                 "<<\n"
                 "  /BaseFont /GlyphLessFont\n"
                 "  /DescendantFonts [ %ld 0 R ]\n"
                 "  /Encoding /Identity-H\n"
                 "  /Subtype /Type0\n"
                 "  /ToUnicode %ld 0 R\n"
                 "  /Type /Font\n"
                 ">>\n"
                 "endobj\n",
                 4L,         // CIDFontType2 font
                 6L          // ToUnicode
                );
    if (n >= sizeof(buf)) return false;
    AppendPDFObject(buf);

    // CIDFONTTYPE2
    n = snprintf(buf, sizeof(buf),
                 "4 0 obj\n"
                 "<<\n"
                 "  /BaseFont /GlyphLessFont\n"
                 "  /CIDToGIDMap %ld 0 R\n"
                 "  /CIDSystemInfo\n"
                 "  <<\n"
                 "     /Ordering (Identity)\n"
                 "     /Registry (Adobe)\n"
                 "     /Supplement 0\n"
                 "  >>\n"
                 "  /FontDescriptor %ld 0 R\n"
                 "  /Subtype /CIDFontType2\n"
                 "  /Type /Font\n"
                 "  /DW %d\n"
                 ">>\n"
                 "endobj\n",
                 5L,         // CIDToGIDMap
                 7L,         // Font descriptor
                 1000 / kCharWidth);
    if (n >= sizeof(buf)) return false;
    AppendPDFObject(buf);

    // CIDTOGIDMAP
    const int kCIDToGIDMapSize = 2 * (1 << 16);
    unsigned char *cidtogidmap = new unsigned char[kCIDToGIDMapSize];
    for (int i = 0; i < kCIDToGIDMapSize; i++) {
        cidtogidmap[i] = (i % 2) ? 1 : 0;
    }
    size_t len;
    unsigned char *comp =
        zlibCompress(cidtogidmap, kCIDToGIDMapSize, &len);
    delete[] cidtogidmap;
    n = snprintf(buf, sizeof(buf),
                 "5 0 obj\n"
                 "<<\n"
                 "  /Length %ld /Filter /FlateDecode\n"
                 ">>\n"
                 "stream\n", len);
    if (n >= sizeof(buf)) {
        lept_free(comp);
        return false;
    }
    AppendString(buf);
    long objsize = strlen(buf);
    AppendData(reinterpret_cast<char *>(comp), len);
    objsize += len;
    lept_free(comp);
    const char *endstream_endobj =
        "endstream\n"
        "endobj\n";
    AppendString(endstream_endobj);
    objsize += strlen(endstream_endobj);
    AppendPDFObjectDIY(objsize);

    const char *stream =
        "/CIDInit /ProcSet findresource begin\n"
        "12 dict begin\n"
        "begincmap\n"
        "/CIDSystemInfo\n"
        "<<\n"
        "  /Registry (Adobe)\n"
        "  /Ordering (UCS)\n"
        "  /Supplement 0\n"
        ">> def\n"
        "/CMapName /Adobe-Identify-UCS def\n"
        "/CMapType 2 def\n"
        "1 begincodespacerange\n"
        "<0000> <FFFF>\n"
        "endcodespacerange\n"
        "1 beginbfrange\n"
        "<0000> <FFFF> <0000>\n"
        "endbfrange\n"
        "endcmap\n"
        "CMapName currentdict /CMap defineresource pop\n"
        "end\n"
        "end\n";

    // TOUNICODE
    n = snprintf(buf, sizeof(buf),
                 "6 0 obj\n"
                 "<< /Length %lu >>\n"
                 "stream\n"
                 "%s"
                 "endstream\n"
                 "endobj\n", (unsigned long) strlen(stream), stream);
    if (n >= sizeof(buf)) return false;
    AppendPDFObject(buf);

    // FONT DESCRIPTOR
    const int kCharHeight = 2;  // Effect: highlights are half height
    n = snprintf(buf, sizeof(buf),
                 "7 0 obj\n"
                 "<<\n"
                 "  /Ascent %d\n"
                 "  /CapHeight %d\n"
                 "  /Descent -1\n"       // Spec says must be negative
                 "  /Flags 5\n"          // FixedPitch + Symbolic
                 "  /FontBBox  [ 0 0 %d %d ]\n"
                 "  /FontFile2 %ld 0 R\n"
                 "  /FontName /GlyphLessFont\n"
                 "  /ItalicAngle 0\n"
                 "  /StemV 80\n"
                 "  /Type /FontDescriptor\n"
                 ">>\n"
                 "endobj\n",
                 1000 / kCharHeight,
                 1000 / kCharHeight,
                 1000 / kCharWidth,
                 1000 / kCharHeight,
                 8L      // Font data
                );
    if (n >= sizeof(buf)) return false;
    AppendPDFObject(buf);

    n = snprintf(buf, sizeof(buf), "%s/pdf.ttf", datadir_);
    if (n >= sizeof(buf)) return false;
    FILE *fp = fopen(buf, "rb");
    if (!fp) {
        tprintf("Can not open file \"%s\"!\n", buf);
        return false;
    }
    fseek(fp, 0, SEEK_END);
    long int size = ftell(fp);
    fseek(fp, 0, SEEK_SET);
    char *buffer = new char[size];
    if (fread(buffer, 1, size, fp) != size) {
        fclose(fp);
        delete[] buffer;
        return false;
    }
    fclose(fp);
    // FONTFILE2
    n = snprintf(buf, sizeof(buf),
                 "8 0 obj\n"
                 "<<\n"
                 "  /Length %ld\n"
                 "  /Length1 %ld\n"
                 ">>\n"
                 "stream\n", size, size);
    if (n >= sizeof(buf)) {
        delete[] buffer;
        return false;
    }
    AppendString(buf);
    objsize = strlen(buf);
    AppendData(buffer, size);
    delete[] buffer;
    objsize += size;
    AppendString(endstream_endobj);
    objsize += strlen(endstream_endobj);
    AppendPDFObjectDIY(objsize);
    return true;
}
Ejemplo n.º 4
0
bool TessPDFRenderer::BeginDocumentHandler() {
  char buf[kBasicBufSize];

  snprintf(buf, sizeof(buf),
           "%%PDF-1.5\n"
           "%%%c%c%c%c\n",
           0xDE, 0xAD, 0xBE, 0xEB);
  AppendPDFObject(buf);

  // CATALOG
  snprintf(buf, sizeof(buf),
           "1 0 obj\n"
           "<<\n"
           "  /Type /Catalog\n"
           "  /Pages %ld 0 R\n"
           ">>\n"
           "endobj\n", 2L);
  AppendPDFObject(buf);

  // We are reserving object #2 for the /Pages
  // object, which I am going to create and write
  // at the end of the PDF file.
  AppendPDFObject("");

  // TYPE0 FONT
  snprintf(buf, sizeof(buf),
           "3 0 obj\n"
           "<<\n"
           "  /BaseFont /GlyphLessFont\n"
           "  /DescendantFonts [ %ld 0 R ]\n"
           "  /Encoding /Identity-H\n"
           "  /Subtype /Type0\n"
           "  /ToUnicode %ld 0 R\n"
           "  /Type /Font\n"
           ">>\n"
           "endobj\n",
           4L,          // CIDFontType2 font
           5L           // ToUnicode
           );
  AppendPDFObject(buf);

  // CIDFONTTYPE2
  snprintf(buf, sizeof(buf),
           "4 0 obj\n"
           "<<\n"
           "  /BaseFont /GlyphLessFont\n"
           "  /CIDToGIDMap /Identity\n"
           "  /CIDSystemInfo\n"
           "  <<\n"
           "     /Ordering (Identity)\n"
           "     /Registry (Adobe)\n"
           "     /Supplement 0\n"
           "  >>\n"
           "  /FontDescriptor %ld 0 R\n"
           "  /Subtype /CIDFontType2\n"
           "  /Type /Font\n"
           "  /DW %d\n"
           ">>\n"
           "endobj\n",
           6L,         // Font descriptor
           1000 / kCharWidth);
  AppendPDFObject(buf);

  const char *stream =
      "/CIDInit /ProcSet findresource begin\n"
      "12 dict begin\n"
      "begincmap\n"
      "/CIDSystemInfo\n"
      "<<\n"
      "  /Registry (Adobe)\n"
      "  /Ordering (UCS)\n"
      "  /Supplement 0\n"
      ">> def\n"
      "/CMapName /Adobe-Identify-UCS def\n"
      "/CMapType 2 def\n"
      "1 begincodespacerange\n"
      "<0000> <FFFF>\n"
      "endcodespacerange\n"
      "1 beginbfrange\n"
      "<0000> <FFFF> <0000>\n"
      "endbfrange\n"
      "endcmap\n"
      "CMapName currentdict /CMap defineresource pop\n"
      "end\n"
      "end\n";

  // TOUNICODE
  snprintf(buf, sizeof(buf),
           "5 0 obj\n"
           "<< /Length %lu >>\n"
           "stream\n"
           "%s"
           "endstream\n"
           "endobj\n", (unsigned long) strlen(stream), stream);
  AppendPDFObject(buf);

  // FONT DESCRIPTOR
  const int kCharHeight = 2;  // Effect: highlights are half height
  snprintf(buf, sizeof(buf),
           "6 0 obj\n"
           "<<\n"
           "  /Ascent %d\n"
           "  /CapHeight %d\n"
           "  /Descent -1\n"       // Spec says must be negative
           "  /Flags 5\n"          // FixedPitch + Symbolic
           "  /FontBBox  [ 0 0 %d %d ]\n"
           "  /FontFile2 %ld 0 R\n"
           "  /FontName /GlyphLessFont\n"
           "  /ItalicAngle 0\n"
           "  /StemV 80\n"
           "  /Type /FontDescriptor\n"
           ">>\n"
           "endobj\n",
           1000 / kCharHeight,
           1000 / kCharHeight,
           1000 / kCharWidth,
           1000 / kCharHeight,
           7L      // Font data
           );
  AppendPDFObject(buf);

  snprintf(buf, sizeof(buf), "%s/pdf.ttf", datadir_);
  FILE *fp = fopen(buf, "rb");
  if (!fp)
    return false;
  fseek(fp, 0, SEEK_END);
  long int size = ftell(fp);
  fseek(fp, 0, SEEK_SET);
  char *buffer = new char[size];
  fread(buffer, 1, size, fp);
  fclose(fp);
  // FONTFILE2
  snprintf(buf, sizeof(buf),
           "7 0 obj\n"
           "<<\n"
           "  /Length %ld\n"
           "  /Length1 %ld\n"
           ">>\n"
           "stream\n", size, size);
  AppendString(buf);
  size_t objsize  = strlen(buf);
  AppendData(buffer, size);
  objsize += size;
  snprintf(buf, sizeof(buf),
           "endstream\n"
           "endobj\n");
  AppendString(buf);
  objsize += strlen(buf);
  AppendPDFObjectDIY(objsize);
  return true;
}