bool TessPDFRenderer::EndDocumentHandler() {
    size_t n;
    char buf[kBasicBufSize];

    // We reserved the /Pages object number early, so that the /Page
    // objects could refer to their parent. We finally have enough
    // information to go fill it in. Using lower level calls to manipulate
    // the offset record in two spots, because we are placing objects
    // out of order in the file.

    // PAGES
    const long int kPagesObjectNumber = 2;
    offsets_[kPagesObjectNumber] = offsets_.back();  // manipulation #1
    n = snprintf(buf, sizeof(buf),
                 "%ld 0 obj\n"
                 "<<\n"
                 "  /Type /Pages\n"
                 "  /Kids [ ", kPagesObjectNumber);
    if (n >= sizeof(buf)) return false;
    AppendString(buf);
    size_t pages_objsize = strlen(buf);
    for (size_t i = 0; i < pages_.size(); i++) {
        n = snprintf(buf, sizeof(buf),
                     "%ld 0 R ", pages_[i]);
        if (n >= sizeof(buf)) return false;
        AppendString(buf);
        pages_objsize += strlen(buf);
    }
    n = snprintf(buf, sizeof(buf),
                 "]\n"
                 "  /Count %d\n"
                 ">>\n"
                 "endobj\n", pages_.size());
    if (n >= sizeof(buf)) return false;
    AppendString(buf);
    pages_objsize += strlen(buf);
    offsets_.back() += pages_objsize;    // manipulation #2

    // INFO
    char *datestr = l_getFormattedDate();
    n = snprintf(buf, sizeof(buf),
                 "%ld 0 obj\n"
                 "<<\n"
                 "  /Producer (Tesseract %s)\n"
                 "  /CreationDate (D:%s)\n"
                 "  /Title (%s)"
                 ">>\n"
                 "endobj\n", obj_, TESSERACT_VERSION_STR, datestr, title());
    lept_free(datestr);
    if (n >= sizeof(buf)) return false;
    AppendPDFObject(buf);
    n = snprintf(buf, sizeof(buf),
                 "xref\n"
                 "0 %ld\n"
                 "0000000000 65535 f \n", obj_);
    if (n >= sizeof(buf)) return false;
    AppendString(buf);
    for (int i = 1; i < obj_; i++) {
        n = snprintf(buf, sizeof(buf), "%010ld 00000 n \n", offsets_[i]);
        if (n >= sizeof(buf)) return false;
        AppendString(buf);
    }
    n = snprintf(buf, sizeof(buf),
                 "trailer\n"
                 "<<\n"
                 "  /Size %ld\n"
                 "  /Root %ld 0 R\n"
                 "  /Info %ld 0 R\n"
                 ">>\n"
                 "startxref\n"
                 "%ld\n"
                 "%%%%EOF\n",
                 obj_,
                 1L,               // catalog
                 obj_ - 1,         // info
                 offsets_.back());
    if (n >= sizeof(buf)) return false;
    AppendString(buf);
    return true;
}
Exemple #2
0
bool TessPDFRenderer::EndDocumentHandler() {
  size_t n;
  char buf[kBasicBufSize];

  // We reserved the /Pages object number early, so that the /Page
  // objects could refer to their parent. We finally have enough
  // information to go fill it in. Using lower level calls to manipulate
  // the offset record in two spots, because we are placing objects
  // out of order in the file.

  // PAGES
  const long int kPagesObjectNumber = 2;
  offsets_[kPagesObjectNumber] = offsets_.back();  // manipulation #1
  n = snprintf(buf, sizeof(buf),
               "%ld 0 obj\n"
               "<<\n"
               "  /Type /Pages\n"
               "  /Kids [ ", kPagesObjectNumber);
  if (n >= sizeof(buf)) return false;
  AppendString(buf);
  size_t pages_objsize  = strlen(buf);
  for (size_t i = 0; i < pages_.unsigned_size(); i++) {
    n = snprintf(buf, sizeof(buf),
                 "%ld 0 R ", pages_[i]);
    if (n >= sizeof(buf)) return false;
    AppendString(buf);
    pages_objsize += strlen(buf);
  }
  n = snprintf(buf, sizeof(buf),
               "]\n"
               "  /Count %d\n"
               ">>\n"
               "endobj\n", pages_.size());
  if (n >= sizeof(buf)) return false;
  AppendString(buf);
  pages_objsize += strlen(buf);
  offsets_.back() += pages_objsize;    // manipulation #2

  // INFO
  STRING utf16_title = "FEFF";  // byte_order_marker
  std::vector<char32> unicodes = UNICHAR::UTF8ToUTF32(title());
  char utf16[kMaxBytesPerCodepoint];
  for (char32 code : unicodes) {
    if (CodepointToUtf16be(code, utf16)) {
      utf16_title += utf16;
    }
  }

  char* datestr = l_getFormattedDate();
  n = snprintf(buf, sizeof(buf),
               "%ld 0 obj\n"
               "<<\n"
               "  /Producer (Tesseract %s)\n"
               "  /CreationDate (D:%s)\n"
               "  /Title <%s>\n"
               ">>\n"
               "endobj\n",
               obj_, tesseract::TessBaseAPI::Version(),
               datestr, utf16_title.c_str());
  lept_free(datestr);
  if (n >= sizeof(buf)) return false;
  AppendPDFObject(buf);
  n = snprintf(buf, sizeof(buf),
               "xref\n"
               "0 %ld\n"
               "0000000000 65535 f \n", obj_);
  if (n >= sizeof(buf)) return false;
  AppendString(buf);
  for (int i = 1; i < obj_; i++) {
    n = snprintf(buf, sizeof(buf), "%010ld 00000 n \n", offsets_[i]);
    if (n >= sizeof(buf)) return false;
    AppendString(buf);
  }
  n = snprintf(buf, sizeof(buf),
               "trailer\n"
               "<<\n"
               "  /Size %ld\n"
               "  /Root %ld 0 R\n"
               "  /Info %ld 0 R\n"
               ">>\n"
               "startxref\n"
               "%ld\n"
               "%%%%EOF\n",
               obj_,
               1L,               // catalog
               obj_ - 1,         // info
               offsets_.back());
  if (n >= sizeof(buf)) return false;
  AppendString(buf);
  return true;
}