Пример #1
0
/* Convert Unicode/PdfDocEncoding string into utf-8 */
char *
pdf_to_utf8(fz_context *ctx, pdf_obj *src)
{
    unsigned char *srcptr = (unsigned char *) pdf_to_str_buf(src);
    char *dstptr, *dst;
    int srclen = pdf_to_str_len(src);
    int dstlen = 0;
    int ucs;
    int i;

    if (srclen >= 2 && srcptr[0] == 254 && srcptr[1] == 255)
    {
        for (i = 2; i + 1 < srclen; i += 2)
        {
            ucs = srcptr[i] << 8 | srcptr[i+1];
            dstlen += fz_runelen(ucs);
        }

        dstptr = dst = fz_malloc(ctx, dstlen + 1);

        for (i = 2; i + 1 < srclen; i += 2)
        {
            ucs = srcptr[i] << 8 | srcptr[i+1];
            dstptr += fz_runetochar(dstptr, ucs);
        }
    }
    else if (srclen >= 2 && srcptr[0] == 255 && srcptr[1] == 254)
    {
        for (i = 2; i + 1 < srclen; i += 2)
        {
            ucs = srcptr[i] | srcptr[i+1] << 8;
            dstlen += fz_runelen(ucs);
        }

        dstptr = dst = fz_malloc(ctx, dstlen + 1);

        for (i = 2; i + 1 < srclen; i += 2)
        {
            ucs = srcptr[i] | srcptr[i+1] << 8;
            dstptr += fz_runetochar(dstptr, ucs);
        }
    }
    else
    {
        for (i = 0; i < srclen; i++)
            dstlen += fz_runelen(pdf_doc_encoding[srcptr[i]]);

        dstptr = dst = fz_malloc(ctx, dstlen + 1);

        for (i = 0; i < srclen; i++)
        {
            ucs = pdf_doc_encoding[srcptr[i]];
            dstptr += fz_runetochar(dstptr, ucs);
        }
    }

    *dstptr = '\0';
    return dst;
}
Пример #2
0
char *
fz_utf8_from_wchar(const wchar_t *s)
{
	const wchar_t *src = s;
	char *d;
	char *dst;
	int len = 1;

	while (*src)
	{
		len += fz_runelen(*src++);
	}

	d = malloc(len);
	if (d != NULL)
	{
		dst = d;
		src = s;
		while (*src)
		{
			dst += fz_runetochar(dst, *src++);
		}
		*dst = 0;
	}
	return d;
}
Пример #3
0
static void format_alpha_number(fz_context *ctx, char *buf, int size, int n, int alpha, int omega)
{
    int base = omega - alpha + 1;
    int tmp[40];
    int i, c;

    if (alpha > 256) /* to skip final-s for greek */
        --base;

    /* Bijective base-26 (base-24 for greek) numeration */
    i = 0;
    while (n > 0)
    {
        --n;
        c = n % base + alpha;
        if (alpha > 256 && c > alpha + 16) /* skip final-s for greek */
            ++c;
        tmp[i++] = c;
        n /= base;
    }

    while (i > 0)
        buf += fz_runetochar(buf, tmp[--i]);
    *buf++ = '.';
    *buf++ = ' ';
    *buf = 0;
}
Пример #4
0
static void
process_block (fz_context *ctx, FILE *memstream, fz_stext_block *block)
{
  fz_stext_line *line;
  /* printf ("in process block, block->len = %d\n", block->len); */
  for (line = block->lines; line - block->lines < block->len;
       line++)
    {
      fz_stext_span *span;
      for (span = line->first_span; span; span = span->next)
	{
	  fz_stext_char *ch;
	    
	  if (span != line->first_span)
	      putc (' ', memstream);
	  for (ch = span->text; ch < span->text + span->len; ch++)
	    {
	      char utf[4];
	      int len = fz_runetochar (utf, ch->c);
	      if (utf[0] == '\0')
		{
		  pdfout_warn (ctx, "process_block: skipping null character");
		  continue;
		}
	      fwrite (utf, len, 1, memstream);
	    }
	}
      putc ('\n', memstream);
    }
}
Пример #5
0
void docopy(pdfapp_t *app, Atom copy_target)
{
	unsigned short copyucs2[16 * 1024];
	char *latin1 = copylatin1;
	char *utf8 = copyutf8;
	unsigned short *ucs2;
	int ucs;

	pdfapp_oncopy(&gapp, copyucs2, 16 * 1024);

	for (ucs2 = copyucs2; ucs2[0] != 0; ucs2++)
	{
		ucs = ucs2[0];

		utf8 += fz_runetochar(utf8, ucs);

		if (ucs < 256)
			*latin1++ = ucs;
		else
			*latin1++ = '?';
	}

	*utf8 = 0;
	*latin1 = 0;

	XSetSelectionOwner(xdpy, copy_target, xwin, copytime);

	justcopied = 1;
}
Пример #6
0
void
fz_print_text_page(fz_context *ctx, fz_output *out, fz_text_page *page)
{
	fz_text_block *block;
	fz_text_line *line;
	fz_text_span *span;
	fz_text_char *ch;
	char utf[10];
	int i, n;

	for (block = page->blocks; block < page->blocks + page->len; block++)
	{
		for (line = block->lines; line < block->lines + block->len; line++)
		{
			for (span = line->spans; span < line->spans + line->len; span++)
			{
				for (ch = span->text; ch < span->text + span->len; ch++)
				{
					n = fz_runetochar(utf, ch->c);
					for (i = 0; i < n; i++)
						fz_printf(out, "%c", utf[i]);
				}
			}
			fz_printf(out, "\n");
		}
		fz_printf(out, "\n");
	}
}
Пример #7
0
void
fz_print_stext_page_as_text(fz_context *ctx, fz_output *out, fz_stext_page *page)
{
	fz_stext_block *block;
	fz_stext_line *line;
	fz_stext_char *ch;
	char utf[10];
	int i, n;

	for (block = page->first_block; block; block = block->next)
	{
		if (block->type == FZ_STEXT_BLOCK_TEXT)
		{
			for (line = block->u.t.first_line; line; line = line->next)
			{
				for (ch = line->first_char; ch; ch = ch->next)
				{
					n = fz_runetochar(utf, ch->c);
					for (i = 0; i < n; i++)
						fz_write_byte(ctx, out, utf[i]);
				}
				fz_write_string(ctx, out, "\n");
			}
			fz_write_string(ctx, out, "\n");
		}
	}
}
void fz_write_buffer_rune(fz_context *ctx, fz_buffer *buf, int c)
{
	char data[10];
	int len = fz_runetochar(data, c);
	if (buf->len + len > buf->cap)
		fz_ensure_buffer(ctx, buf, buf->len + len);
	memcpy(buf->data + buf->len, data, len);
	buf->len += len;
	buf->unused_bits = 0;
}
Пример #9
0
void
fz_print_text_page(fz_context *ctx, fz_output *out, fz_text_page *page)
{
	int block_n;

	for (block_n = 0; block_n < page->len; block_n++)
	{
		switch (page->blocks[block_n].type)
		{
		case FZ_PAGE_BLOCK_TEXT:
		{
			fz_text_block *block = page->blocks[block_n].u.text;
			fz_text_line *line;
			fz_text_char *ch;
			char utf[10];
			int i, n;

			for (line = block->lines; line < block->lines + block->len; line++)
			{
				fz_text_span *span;
				for (span = line->first_span; span; span = span->next)
				{
					for (ch = span->text; ch < span->text + span->len; ch++)
					{
						n = fz_runetochar(utf, ch->c);
						for (i = 0; i < n; i++)
							fz_printf(out, "%c", utf[i]);
					}
				}
				fz_printf(out, "\n");
			}
			fz_printf(out, "\n");
			break;
		}
		case FZ_PAGE_BLOCK_IMAGE:
			break;
		}
	}
}
Пример #10
0
static int ui_input_key(struct input *input)
{
	switch (ui.key)
	{
	case KEY_LEFT:
		if (ui.mod == GLFW_MOD_CONTROL + GLFW_MOD_SHIFT)
		{
			input->q = prev_word(input->q, input->text);
		}
		else if (ui.mod == GLFW_MOD_CONTROL)
		{
			if (input->p != input->q)
				input->p = input->q = input->p < input->q ? input->p : input->q;
			else
				input->p = input->q = prev_word(input->q, input->text);
		}
		else if (ui.mod == GLFW_MOD_SHIFT)
		{
			if (input->q > input->text)
				input->q = prev_char(input->q, input->text);
		}
		else if (ui.mod == 0)
		{
			if (input->p != input->q)
				input->p = input->q = input->p < input->q ? input->p : input->q;
			else if (input->q > input->text)
				input->p = input->q = prev_char(input->q, input->text);
		}
		break;
	case KEY_RIGHT:
		if (ui.mod == GLFW_MOD_CONTROL + GLFW_MOD_SHIFT)
		{
			input->q = next_word(input->q, input->end);
		}
		else if (ui.mod == GLFW_MOD_CONTROL)
		{
			if (input->p != input->q)
				input->p = input->q = input->p > input->q ? input->p : input->q;
			else
				input->p = input->q = next_word(input->q, input->end);
		}
		else if (ui.mod == GLFW_MOD_SHIFT)
		{
			if (input->q < input->end)
				input->q = next_char(input->q);
		}
		else if (ui.mod == 0)
		{
			if (input->p != input->q)
				input->p = input->q = input->p > input->q ? input->p : input->q;
			else if (input->q < input->end)
				input->p = input->q = next_char(input->q);
		}
		break;
	case KEY_UP:
	case KEY_HOME:
		if (ui.mod == GLFW_MOD_CONTROL + GLFW_MOD_SHIFT)
		{
			input->q = input->text;
		}
		else if (ui.mod == GLFW_MOD_CONTROL)
		{
			input->p = input->q = input->text;
		}
		else if (ui.mod == GLFW_MOD_SHIFT)
		{
			input->q = input->text;
		}
		else if (ui.mod == 0)
		{
			input->p = input->q = input->text;
		}
		break;
	case KEY_DOWN:
	case KEY_END:
		if (ui.mod == GLFW_MOD_CONTROL + GLFW_MOD_SHIFT)
		{
			input->q = input->end;
		}
		else if (ui.mod == GLFW_MOD_CONTROL)
		{
			input->p = input->q = input->end;
		}
		else if (ui.mod == GLFW_MOD_SHIFT)
		{
			input->q = input->end;
		}
		else if (ui.mod == 0)
		{
			input->p = input->q = input->end;
		}
		break;
	case KEY_DELETE:
		if (input->p != input->q)
			ui_input_delete_selection(input);
		else if (input->p < input->end)
		{
			char *np = next_char(input->p);
			memmove(input->p, np, input->end - np);
			input->end -= np - input->p;
			*input->end = 0;
			input->q = input->p;
		}
		break;
	case KEY_ESCAPE:
		return -1;
	case KEY_ENTER:
		return 1;
	case KEY_BACKSPACE:
		if (input->p != input->q)
			ui_input_delete_selection(input);
		else if (input->p > input->text)
		{
			char *pp = prev_char(input->p, input->text);
			memmove(pp, input->p, input->end - input->p);
			input->end -= input->p - pp;
			*input->end = 0;
			input->q = input->p = pp;
		}
		break;
	case KEY_CTL_A:
		input->p = input->q = input->text;
		break;
	case KEY_CTL_E:
		input->p = input->q = input->end;
		break;
	case KEY_CTL_W:
		if (input->p != input->q)
			ui_input_delete_selection(input);
		else
		{
			input->p = prev_word(input->p, input->text);
			ui_input_delete_selection(input);
		}
		break;
	case KEY_CTL_U:
		input->p = input->q = input->end = input->text;
		break;
	case KEY_CTL_C:
	case KEY_CTL_X:
		if (input->p != input->q)
		{
			char buf[sizeof input->text];
			char *p = input->p < input->q ? input->p : input->q;
			char *q = input->p > input->q ? input->p : input->q;
			memmove(buf, p, q - p);
			buf[q-p] = 0;
			glfwSetClipboardString(window, buf);
			if (ui.key == KEY_CTL_X)
				ui_input_delete_selection(input);
		}
		break;
	case KEY_CTL_V:
		{
			const char *buf = glfwGetClipboardString(window);
			if (buf)
				ui_input_paste(input, buf, (int)strlen(buf));
		}
		break;
	default:
		if (ui.key >= 32)
		{
			int cat = ucdn_get_general_category(ui.key);
			if (ui.key == ' ' || (cat >= UCDN_GENERAL_CATEGORY_LL && cat < UCDN_GENERAL_CATEGORY_ZL))
			{
				char buf[8];
				int n = fz_runetochar(buf, ui.key);
				ui_input_paste(input, buf, n);
			}
		}
		break;
	}
	return 0;
}
Пример #11
0
std::string PDFDocument::GetPageText(int page, int line_sep) {
  // 1. Init MuPDF structures.
  pdf_page* page_struct = GetPage(page);
  fz_stext_sheet* text_sheet = fz_new_stext_sheet(_fz_context);

  // 2. Render page.
#if MUPDF_VERSION >= 10010
  fz_stext_options stext_options = { 0 };
  // See #elif MUPDF_VERSION >= 10009 block below.
  fz_stext_page* text_page = fz_new_stext_page_from_page(
      _fz_context, &(page_struct->super), text_sheet, &stext_options);
#elif MUPDF_VERSION >= 10009
  // The function below is a wrapper around fz_run_page that uses a fresh
  // device. We can't use pdf_run_page to gather the text for us.
  // These notes are also left in here in case MuPDF's API changes again.
  fz_stext_page* text_page = fz_new_stext_page_from_page(
      _fz_context, &(page_struct->super), text_sheet);
#else
  fz_stext_page* text_page = fz_new_text_page(_fz_context);
  fz_device* dev = fz_new_stext_device(_fz_context, text_sheet, text_page);
  // I've no idea what fz_{begin,end}_page do, but without them pdf_run_page
  // segfaults :-/
  fz_begin_page(_fz_context, dev, &fz_infinite_rect, &fz_identity);
  pdf_run_page(
      _fz_context, _pdf_document, page_struct, dev, &fz_identity, nullptr);
  fz_end_page(_fz_context, dev);
#endif

  // 3. Build text.
  std::string r;
  for (fz_page_block* page_block = text_page->blocks;
       page_block < text_page->blocks + text_page->len;
       ++page_block) {
    assert(page_block != nullptr);
    if (page_block->type != FZ_PAGE_BLOCK_TEXT) {
      continue;
    }
    fz_stext_block* const text_block = page_block->u.text;
    assert(text_block != nullptr);
    for (fz_stext_line* text_line = text_block->lines;
         text_line < text_block->lines + text_block->len;
         ++text_line) {
      assert(text_line != nullptr);
      for (fz_stext_span* text_span = text_line->first_span;
           text_span != nullptr;
           text_span = text_span->next) {
        for (int i = 0; i < text_span->len; ++i) {
          const int c = text_span->text[i].c;
          // A single UTF-8 character cannot take more than 4 bytes, but let's
          // go for 8.
          char buffer[8];
          const int num_bytes = fz_runetochar(buffer, c);
          assert(num_bytes <= static_cast<int>(sizeof(buffer)));
          buffer[num_bytes] = '\0';
          r += buffer;
        }
      }
      if (!isspace(r.back())) {
        r += line_sep;
      }
    }
  }

  // 4. Clean up.
  fz_drop_stext_page(_fz_context, text_page);
  fz_drop_stext_sheet(_fz_context, text_sheet);

  return r;
}
Пример #12
0
/* Convert Unicode/PdfDocEncoding string into utf-8 */
char *
pdf_to_utf8(pdf_document *doc, pdf_obj *src)
{
    fz_context *ctx = doc->ctx;
    fz_buffer *strmbuf = NULL;
    unsigned char *srcptr;
    char *dstptr, *dst;
    int srclen;
    int dstlen = 0;
    int ucs;
    int i;

    fz_var(strmbuf);
    fz_try(ctx)
    {
        if (pdf_is_string(src))
        {
            srcptr = (unsigned char *) pdf_to_str_buf(src);
            srclen = pdf_to_str_len(src);
        }
        else if (pdf_is_stream(doc, pdf_to_num(src), pdf_to_gen(src)))
        {
            strmbuf = pdf_load_stream(doc, pdf_to_num(src), pdf_to_gen(src));
            srclen = fz_buffer_storage(ctx, strmbuf, (unsigned char **)&srcptr);
        }
        else
        {
            srclen = 0;
        }

        if (srclen >= 2 && srcptr[0] == 254 && srcptr[1] == 255)
        {
            for (i = 2; i + 1 < srclen; i += 2)
            {
                ucs = srcptr[i] << 8 | srcptr[i+1];
                dstlen += fz_runelen(ucs);
            }

            dstptr = dst = fz_malloc(ctx, dstlen + 1);

            for (i = 2; i + 1 < srclen; i += 2)
            {
                ucs = srcptr[i] << 8 | srcptr[i+1];
                dstptr += fz_runetochar(dstptr, ucs);
            }
        }
        else if (srclen >= 2 && srcptr[0] == 255 && srcptr[1] == 254)
        {
            for (i = 2; i + 1 < srclen; i += 2)
            {
                ucs = srcptr[i] | srcptr[i+1] << 8;
                dstlen += fz_runelen(ucs);
            }

            dstptr = dst = fz_malloc(ctx, dstlen + 1);

            for (i = 2; i + 1 < srclen; i += 2)
            {
                ucs = srcptr[i] | srcptr[i+1] << 8;
                dstptr += fz_runetochar(dstptr, ucs);
            }
        }
        else
        {
            for (i = 0; i < srclen; i++)
                dstlen += fz_runelen(pdf_doc_encoding[srcptr[i]]);

            dstptr = dst = fz_malloc(ctx, dstlen + 1);

            for (i = 0; i < srclen; i++)
            {
                ucs = pdf_replace_undefined(pdf_doc_encoding[srcptr[i]]);
                dstptr += fz_runetochar(dstptr, ucs);
            }
        }
    }
    fz_always(ctx)
    {
        fz_drop_buffer(ctx, strmbuf);
    }
    fz_catch(ctx)
    {
        fz_rethrow(ctx);
    }

    *dstptr = '\0';
    return dst;
}
Пример #13
0
/* Convert Unicode/PdfDocEncoding string into utf-8 */
char *
pdf_to_utf8(fz_context *ctx, pdf_document *doc, pdf_obj *src)
{
	fz_buffer *stmbuf = NULL;
	unsigned char *srcptr;
	char *dstptr, *dst;
	size_t srclen;
	size_t dstlen = 0;
	int ucs;
	size_t i;

	fz_var(stmbuf);
	fz_try(ctx)
	{
		if (pdf_is_string(ctx, src))
		{
			srcptr = (unsigned char *) pdf_to_str_buf(ctx, src);
			srclen = pdf_to_str_len(ctx, src);
		}
		else if (pdf_is_stream(ctx, src))
		{
			stmbuf = pdf_load_stream(ctx, doc, pdf_to_num(ctx, src), pdf_to_gen(ctx, src));
			srclen = fz_buffer_storage(ctx, stmbuf, (unsigned char **)&srcptr);
		}
		else
		{
			srclen = 0;
		}

		/* UTF-16BE */
		if (srclen >= 2 && srcptr[0] == 254 && srcptr[1] == 255)
		{
			i = 2;
			while (i + 2 <= srclen)
			{
				/* skip language escape codes */
				if (i + 6 <= srclen &&
					srcptr[i+0] == 0 && srcptr[i+1] == 27 &&
					srcptr[i+4] == 0 && srcptr[i+5] == 27)
				{
					i += 6;
				}
				else if (i + 8 <= srclen &&
					srcptr[i+0] == 0 && srcptr[i+1] == 27 &&
					srcptr[i+6] == 0 && srcptr[i+7] == 27)
				{
					i += 8;
				}
				else
				{
					i += rune_from_utf16be(&ucs, srcptr + i, srcptr + srclen);
					dstlen += fz_runelen(ucs);
				}
			}

			dstptr = dst = fz_malloc(ctx, dstlen + 1);

			i = 2;
			while (i + 2 <= srclen)
			{
				/* skip language escape codes */
				if (i + 6 <= srclen &&
					srcptr[i+0] == 0 && srcptr[i+1] == 27 &&
					srcptr[i+4] == 0 && srcptr[i+5] == 27)
				{
					i += 6;
				}
				else if (i + 8 <= srclen &&
					srcptr[i+0] == 0 && srcptr[i+1] == 27 &&
					srcptr[i+6] == 0 && srcptr[i+7] == 27)
				{
					i += 8;
				}
				else
				{
					i += rune_from_utf16be(&ucs, srcptr + i, srcptr + srclen);
					dstptr += fz_runetochar(dstptr, ucs);
				}
			}
		}

		/* PDFDocEncoding */
		else
		{
			for (i = 0; i < srclen; i++)
				dstlen += fz_runelen(pdf_doc_encoding[srcptr[i]]);

			dstptr = dst = fz_malloc(ctx, dstlen + 1);

			for (i = 0; i < srclen; i++)
			{
				ucs = pdf_doc_encoding[srcptr[i]];
				dstptr += fz_runetochar(dstptr, ucs);
			}
		}
	}
	fz_always(ctx)
	{
		fz_drop_buffer(ctx, stmbuf);
	}
	fz_catch(ctx)
	{
		fz_rethrow(ctx);
	}

	*dstptr = '\0';
	return dst;
}
Пример #14
0
/*
	Write a UTF-8 encoded unicode character.
*/
void
fz_write_rune(fz_context *ctx, fz_output *out, int rune)
{
	char data[10];
	fz_write_data(ctx, out, data, fz_runetochar(data, rune));
}
Пример #15
0
std::string PDFDocument::GetPageText(int page, int line_sep) {
  // 1. Init MuPDF structures.
  pdf_page* page_struct = GetPage(page);

#if MUPDF_VERSION < 10012
  fz_stext_sheet* text_sheet = fz_new_stext_sheet(_fz_context);
#endif

  // 2. Render page.
#if MUPDF_VERSION >= 10012
  fz_stext_options stext_options = {0};
  // See #elif MUPDF_VERSION >= 10009 block below.
  fz_stext_page* text_page = fz_new_stext_page_from_page(
      _fz_context, &(page_struct->super), &stext_options);
#elif MUPDF_VERSION >= 10010
  fz_stext_options stext_options = {0};
  // See #elif MUPDF_VERSION >= 10009 block below.
  fz_stext_page* text_page = fz_new_stext_page_from_page(
      _fz_context, &(page_struct->super), text_sheet, &stext_options);
#elif MUPDF_VERSION >= 10009
  // The function below is a wrapper around fz_run_page that uses a fresh
  // device. We can't use pdf_run_page to gather the text for us.
  // These notes are also left in here in case MuPDF's API changes again.
  fz_stext_page* text_page = fz_new_stext_page_from_page(
      _fz_context, &(page_struct->super), text_sheet);
#else
  fz_stext_page* text_page = fz_new_text_page(_fz_context);
  fz_device* dev = fz_new_stext_device(_fz_context, text_sheet, text_page);
  // I've no idea what fz_{begin,end}_page do, but without them pdf_run_page
  // segfaults :-/
  fz_begin_page(_fz_context, dev, &fz_infinite_rect, &fz_identity);
  pdf_run_page(
      _fz_context, _pdf_document, page_struct, dev, &fz_identity, nullptr);
  fz_end_page(_fz_context, dev);
#endif

  // 3. Build text.
  std::string r;
#if MUPDF_VERSION >= 10012
  for (fz_stext_block* text_block = text_page->first_block;
       text_block != nullptr; text_block = text_block->next) {
    if (text_block->type != FZ_STEXT_BLOCK_TEXT) {
      continue;
    }
    for (fz_stext_line* text_line = text_block->u.t.first_line;
         text_line != nullptr; text_line = text_line->next) {
      for (fz_stext_char* text_char = text_line->first_char;
           text_char != nullptr; text_char = text_char->next) {
        {
          const int c = text_char->c;
#else
  for (fz_page_block* page_block = text_page->blocks;
       page_block < text_page->blocks + text_page->len; ++page_block) {
    assert(page_block != nullptr);
    if (page_block->type != FZ_PAGE_BLOCK_TEXT) {
      continue;
    }
    fz_stext_block* const text_block = page_block->u.text;
    assert(text_block != nullptr);
    for (fz_stext_line* text_line = text_block->lines;
         text_line < text_block->lines + text_block->len; ++text_line) {
      assert(text_line != nullptr);
      for (fz_stext_span* text_span = text_line->first_span;
           text_span != nullptr; text_span = text_span->next) {
        for (int i = 0; i < text_span->len; ++i) {
          const int c = text_span->text[i].c;
#endif
          // A single UTF-8 character cannot take more than 4 bytes, but let's
          // go for 8.
          char buffer[8];
          const int num_bytes = fz_runetochar(buffer, c);
          assert(num_bytes <= static_cast<int>(sizeof(buffer)));
          buffer[num_bytes] = '\0';
          r += buffer;
        }
      }
      if (!isspace(r.back())) {
        r += line_sep;
      }
    }
  }

  // 4. Clean up.
  fz_drop_stext_page(_fz_context, text_page);
#if MUPDF_VERSION < 10012
  fz_drop_stext_sheet(_fz_context, text_sheet);
#endif

  return r;
}

PDFDocument::PDFOutlineItem::~PDFOutlineItem() {}

PDFDocument::PDFOutlineItem::PDFOutlineItem(fz_outline* src) {
  if (src == nullptr) {
    _dest_page = -1;
  } else {
    _title = src->title;
#if MUPDF_VERSION >= 10010
    _dest_page = src->page;
#else
    _dest_page = src->dest.ld.gotor.page;
#endif
  }
}

int PDFDocument::PDFOutlineItem::GetDestPage() const { return _dest_page; }

PDFDocument::PDFOutlineItem* PDFDocument::PDFOutlineItem::Build(
    fz_context* ctx, fz_outline* src) {
  PDFOutlineItem* root = nullptr;
  std::vector<std::unique_ptr<OutlineItem>> items;
  BuildRecursive(src, &items);
  fz_drop_outline(ctx, src);
  if (items.empty()) {
    return nullptr;
  } else if (items.size() == 1) {
    root = dynamic_cast<PDFOutlineItem*>(items[0].release());
  } else {
    root = new PDFOutlineItem(nullptr);
    root->_title = DEFAULT_ROOT_OUTLINE_ITEM_TITLE;
    root->_children.swap(items);
  }
  return root;
}

void PDFDocument::PDFOutlineItem::BuildRecursive(
    fz_outline* src,
    std::vector<std::unique_ptr<Document::OutlineItem>>* output) {
  assert(output != nullptr);
  for (fz_outline* i = src; i != nullptr; i = i->next) {
    PDFOutlineItem* item = new PDFOutlineItem(i);
    if (i->down != nullptr) {
      BuildRecursive(i->down, &(item->_children));
    }
    output->push_back(std::unique_ptr<Document::OutlineItem>(item));
  }
}

PDFDocument::PDFPageCache::PDFPageCache(int cache_size, PDFDocument* parent)
    : Cache<int, pdf_page*>(cache_size), _parent(parent) {}

PDFDocument::PDFPageCache::~PDFPageCache() { Clear(); }

pdf_page* PDFDocument::PDFPageCache::Load(const int& page) {
  std::unique_lock<std::mutex> lock(_mutex);
  return pdf_load_page(_parent->_fz_context, _parent->_pdf_document, page);
}

void PDFDocument::PDFPageCache::Discard(
    const int& page, pdf_page* const& page_struct) {
  std::unique_lock<std::mutex> lock(_mutex);
  pdf_drop_page(_parent->_fz_context, _parent->_pdf_document, page_struct);
}