Пример #1
0
static void
fz_text_begin_page(fz_context *ctx, fz_device *dev, const fz_rect *mediabox, const fz_matrix *ctm)
{
	fz_text_device *tdev = (fz_text_device*)dev;

	if (tdev->page->len)
	{
		tdev->page->next = fz_new_text_page(ctx);
		tdev->page = tdev->page->next;
	}

	tdev->page->mediabox = *mediabox;
	fz_transform_rect(&tdev->page->mediabox, ctm);
}
Пример #2
0
std::shared_ptr<std::vector<std::shared_ptr<RectFloat>>> MuPDFDoc::SearchText(const char* searchText)
{
	fz_text_sheet *sheet = nullptr;
	fz_text_page *text = nullptr;
	fz_device *dev  = nullptr;
	PageCache *pageCache = &m_pages[m_currentPage];
	fz_var(sheet);
	fz_var(text);
	fz_var(dev);
	std::shared_ptr<std::vector<std::shared_ptr<RectFloat>>> hints(new std::vector<std::shared_ptr<RectFloat>>());
	fz_try(m_context)
	{
		int hitCount = 0;
		fz_matrix ctm = CalcConvertMatrix();
		fz_rect mbrect = fz_transform_rect(ctm, pageCache->mediaBox);
		sheet = fz_new_text_sheet(m_context);
		text = fz_new_text_page(m_context, mbrect);
		dev = fz_new_text_device(m_context, sheet, text);
		fz_run_page(m_document, pageCache->page, dev, ctm, nullptr);
		fz_free_device(dev);
		dev = nullptr;
		int len = TextLen(text);
		for (int pos = 0; pos < len; pos++)
		{
			fz_bbox rr = fz_empty_bbox;
			int n = Match(text, searchText, pos);
			for (int i = 0; i < n; i++)
				rr = fz_union_bbox(rr, BBoxCharAt(text, pos + i));

			if (!fz_is_empty_bbox(rr) && hitCount < MAX_SEARCH_HITS)
			{
				hints->push_back(std::shared_ptr<RectFloat>(new RectFloat(rr.x0, rr.y0, rr.x1, rr.y1)));
				if (++hitCount >= MAX_SEARCH_HITS)
					break;
			}
		}
	}
	fz_always(m_context)
	{
		fz_free_text_page(m_context, text);
		fz_free_text_sheet(m_context, sheet);
		fz_free_device(dev);
	}
	fz_catch(m_context)
	{
		return std::shared_ptr<std::vector<std::shared_ptr<RectFloat>>>(nullptr);
	}
	return hints;
}
Пример #3
0
fz_text_page *
fz_new_text_page_from_page(fz_context *ctx, fz_page *page, fz_text_sheet *sheet)
{
	fz_text_page *text;
	fz_device *dev;

	text = fz_new_text_page(ctx);
	fz_try(ctx)
	{
		dev = fz_new_text_device(ctx, sheet, text);
		fz_run_page(ctx, page, dev, &fz_identity, NULL);
	}
	fz_always(ctx)
	{
		fz_drop_device(ctx, dev);
	}
	fz_catch(ctx)
	{
		fz_drop_text_page(ctx, text);
		fz_rethrow(ctx);
	}

	return text;
}
Пример #4
0
static void drawpage(fz_context *ctx, fz_document *doc, int pagenum)
{
	fz_page *page;
	fz_display_list *list = NULL;
	fz_device *dev = NULL;
	int start;
	fz_cookie cookie = { 0 };
	int needshot = 0;

	fz_var(list);
	fz_var(dev);

	if (showtime)
	{
		start = gettime();
	}

	fz_try(ctx)
	{
		page = fz_load_page(doc, pagenum - 1);
	}
	fz_catch(ctx)
	{
		fz_rethrow_message(ctx, "cannot load page %d in file '%s'", pagenum, filename);
	}

	if (mujstest_file)
	{
		pdf_document *inter = pdf_specifics(doc);
		pdf_widget *widget = NULL;

		if (inter)
			widget = pdf_first_widget(inter, (pdf_page *)page);

		if (widget)
		{
			fprintf(mujstest_file, "GOTO %d\n", pagenum);
			needshot = 1;
		}
		for (;widget; widget = pdf_next_widget(widget))
		{
			fz_rect rect;
			int w, h, len;
			int type = pdf_widget_get_type(widget);

			pdf_bound_widget(widget, &rect);
			w = (rect.x1 - rect.x0);
			h = (rect.y1 - rect.y0);
			++mujstest_count;
			switch (type)
			{
			default:
				fprintf(mujstest_file, "%% UNKNOWN %0.2f %0.2f %0.2f %0.2f\n", rect.x0, rect.y0, rect.x1, rect.y1);
				break;
			case PDF_WIDGET_TYPE_PUSHBUTTON:
				fprintf(mujstest_file, "%% PUSHBUTTON %0.2f %0.2f %0.2f %0.2f\n", rect.x0, rect.y0, rect.x1, rect.y1);
				break;
			case PDF_WIDGET_TYPE_CHECKBOX:
				fprintf(mujstest_file, "%% CHECKBOX %0.2f %0.2f %0.2f %0.2f\n", rect.x0, rect.y0, rect.x1, rect.y1);
				break;
			case PDF_WIDGET_TYPE_RADIOBUTTON:
				fprintf(mujstest_file, "%% RADIOBUTTON %0.2f %0.2f %0.2f %0.2f\n", rect.x0, rect.y0, rect.x1, rect.y1);
				break;
			case PDF_WIDGET_TYPE_TEXT:
			{
				int maxlen = pdf_text_widget_max_len(inter, widget);
				int texttype = pdf_text_widget_content_type(inter, widget);

				/* If height is low, assume a single row, and base
				 * the width off that. */
				if (h < 10)
				{
					w = (w+h-1) / (h ? h : 1);
					h = 1;
				}
				/* Otherwise, if width is low, work off height */
				else if (w < 10)
				{
					h = (w+h-1) / (w ? w : 1);
					w = 1;
				}
				else
				{
					w = (w+9)/10;
					h = (h+9)/10;
				}
				len = w*h;
				if (len < 2)
					len = 2;
				if (len > maxlen)
					len = maxlen;
				fprintf(mujstest_file, "%% TEXT %0.2f %0.2f %0.2f %0.2f\n", rect.x0, rect.y0, rect.x1, rect.y1);
				switch (texttype)
				{
				default:
				case PDF_WIDGET_CONTENT_UNRESTRAINED:
					fprintf(mujstest_file, "TEXT %d ", mujstest_count);
					escape_string(mujstest_file, len-3, lorem);
					fprintf(mujstest_file, "\n");
					break;
				case PDF_WIDGET_CONTENT_NUMBER:
					fprintf(mujstest_file, "TEXT %d\n", mujstest_count);
					break;
				case PDF_WIDGET_CONTENT_SPECIAL:
#ifdef __MINGW32__
					fprintf(mujstest_file, "TEXT %I64d\n", 46702919800LL + mujstest_count);
#else
					fprintf(mujstest_file, "TEXT %lld\n", 46702919800LL + mujstest_count);
#endif
					break;
				case PDF_WIDGET_CONTENT_DATE:
					fprintf(mujstest_file, "TEXT Jun %d 1979\n", 1 + ((13 + mujstest_count) % 30));
					break;
				case PDF_WIDGET_CONTENT_TIME:
					++mujstest_count;
					fprintf(mujstest_file, "TEXT %02d:%02d\n", ((mujstest_count/60) % 24), mujstest_count % 60);
					break;
				}
				break;
			}
			case PDF_WIDGET_TYPE_LISTBOX:
				fprintf(mujstest_file, "%% LISTBOX %0.2f %0.2f %0.2f %0.2f\n", rect.x0, rect.y0, rect.x1, rect.y1);
				break;
			case PDF_WIDGET_TYPE_COMBOBOX:
				fprintf(mujstest_file, "%% COMBOBOX %0.2f %0.2f %0.2f %0.2f\n", rect.x0, rect.y0, rect.x1, rect.y1);
				break;
			}
			fprintf(mujstest_file, "CLICK %0.2f %0.2f\n", (rect.x0+rect.x1)/2, (rect.y0+rect.y1)/2);
		}
	}

	if (uselist)
	{
		fz_try(ctx)
		{
			list = fz_new_display_list(ctx);
			dev = fz_new_list_device(ctx, list);
			fz_run_page(doc, page, dev, &fz_identity, &cookie);
		}
		fz_always(ctx)
		{
			fz_free_device(dev);
			dev = NULL;
		}
		fz_catch(ctx)
		{
			fz_drop_display_list(ctx, list);
			fz_free_page(doc, page);
			fz_rethrow_message(ctx, "cannot draw page %d in file '%s'", pagenum, filename);
		}
	}

	if (showxml)
	{
		fz_try(ctx)
		{
			dev = fz_new_trace_device(ctx);
			if (list)
				fz_run_display_list(list, dev, &fz_identity, &fz_infinite_rect, &cookie);
			else
				fz_run_page(doc, page, dev, &fz_identity, &cookie);
		}
		fz_always(ctx)
		{
			fz_free_device(dev);
			dev = NULL;
		}
		fz_catch(ctx)
		{
			fz_drop_display_list(ctx, list);
			fz_free_page(doc, page);
			fz_rethrow(ctx);
		}
	}

	if (showtext)
	{
		fz_text_page *text = NULL;

		fz_var(text);

		fz_try(ctx)
		{
			text = fz_new_text_page(ctx);
			dev = fz_new_text_device(ctx, sheet, text);
			if (showtext == TEXT_HTML)
				fz_disable_device_hints(dev, FZ_IGNORE_IMAGE);
			if (list)
				fz_run_display_list(list, dev, &fz_identity, &fz_infinite_rect, &cookie);
			else
				fz_run_page(doc, page, dev, &fz_identity, &cookie);
			fz_free_device(dev);
			dev = NULL;
			if (showtext == TEXT_XML)
			{
				fz_print_text_page_xml(ctx, out, text);
			}
			else if (showtext == TEXT_HTML)
			{
				fz_analyze_text(ctx, sheet, text);
				fz_print_text_page_html(ctx, out, text);
			}
			else if (showtext == TEXT_PLAIN)
			{
				fz_print_text_page(ctx, out, text);
				fz_printf(out, "\f\n");
			}
		}
		fz_always(ctx)
		{
			fz_free_device(dev);
			dev = NULL;
			fz_free_text_page(ctx, text);
		}
		fz_catch(ctx)
		{
			fz_drop_display_list(ctx, list);
			fz_free_page(doc, page);
			fz_rethrow(ctx);
		}
	}

	if (showmd5 || showtime)
		printf("page %s %d", filename, pagenum);

	if (pdfout)
	{
		fz_matrix ctm;
		fz_rect bounds, tbounds;
		pdf_page *newpage;

		fz_bound_page(doc, page, &bounds);
		fz_rotate(&ctm, rotation);
		tbounds = bounds;
		fz_transform_rect(&tbounds, &ctm);

		newpage = pdf_create_page(pdfout, bounds, 72, 0);

		fz_try(ctx)
		{
			dev = pdf_page_write(pdfout, newpage);
			if (list)
				fz_run_display_list(list, dev, &ctm, &tbounds, &cookie);
			else
				fz_run_page(doc, page, dev, &ctm, &cookie);
			fz_free_device(dev);
			dev = NULL;
		}
		fz_always(ctx)
		{
			fz_free_device(dev);
			dev = NULL;
		}
		fz_catch(ctx)
		{
			fz_drop_display_list(ctx, list);
			fz_free_page(doc, page);
			fz_rethrow(ctx);
		}
		pdf_insert_page(pdfout, newpage, INT_MAX);
		pdf_free_page(pdfout, newpage);
	}

	if (output && output_format == OUT_SVG)
	{
		float zoom;
		fz_matrix ctm;
		fz_rect bounds, tbounds;
		char buf[512];
		FILE *file;
		fz_output *out;

		if (!strcmp(output, "-"))
			file = stdout;
		else
		{
			sprintf(buf, output, pagenum);
			file = fopen(buf, "wb");
			if (file == NULL)
				fz_throw(ctx, FZ_ERROR_GENERIC, "cannot open file '%s': %s", buf, strerror(errno));
		}

		out = fz_new_output_with_file(ctx, file);

		fz_bound_page(doc, page, &bounds);
		zoom = resolution / 72;
		fz_pre_rotate(fz_scale(&ctm, zoom, zoom), rotation);
		tbounds = bounds;
		fz_transform_rect(&tbounds, &ctm);

		fz_try(ctx)
		{
			dev = fz_new_svg_device(ctx, out, tbounds.x1-tbounds.x0, tbounds.y1-tbounds.y0);
			if (list)
				fz_run_display_list(list, dev, &ctm, &tbounds, &cookie);
			else
				fz_run_page(doc, page, dev, &ctm, &cookie);
			fz_free_device(dev);
			dev = NULL;
		}
		fz_always(ctx)
		{
			fz_free_device(dev);
			dev = NULL;
			fz_close_output(out);
			if (file != stdout)
				fclose(file);
		}
		fz_catch(ctx)
		{
			fz_drop_display_list(ctx, list);
			fz_free_page(doc, page);
			fz_rethrow(ctx);
		}
	}
Пример #5
0
static void drawpage(fz_context *ctx, fz_document *doc, int pagenum)
{
	fz_page *page;
	fz_display_list *list = NULL;
	fz_device *dev = NULL;
	int start;
	fz_cookie cookie = { 0 };

	fz_var(list);
	fz_var(dev);

	if (showtime)
	{
		start = gettime();
	}

	fz_try(ctx)
	{
		page = fz_load_page(doc, pagenum - 1);
	}
	fz_catch(ctx)
	{
		fz_rethrow_message(ctx, "cannot load page %d in file '%s'", pagenum, filename);
	}

	if (uselist)
	{
		fz_try(ctx)
		{
			list = fz_new_display_list(ctx);
			dev = fz_new_list_device(ctx, list);
			fz_run_page(doc, page, dev, &fz_identity, &cookie);
		}
		fz_always(ctx)
		{
			fz_free_device(dev);
			dev = NULL;
		}
		fz_catch(ctx)
		{
			fz_drop_display_list(ctx, list);
			fz_free_page(doc, page);
			fz_rethrow_message(ctx, "cannot draw page %d in file '%s'", pagenum, filename);
		}
	}

	if (showxml)
	{
		fz_try(ctx)
		{
			dev = fz_new_trace_device(ctx);
			if (list)
				fz_run_display_list(list, dev, &fz_identity, &fz_infinite_rect, &cookie);
			else
				fz_run_page(doc, page, dev, &fz_identity, &cookie);
		}
		fz_always(ctx)
		{
			fz_free_device(dev);
			dev = NULL;
		}
		fz_catch(ctx)
		{
			fz_drop_display_list(ctx, list);
			fz_free_page(doc, page);
			fz_rethrow(ctx);
		}
	}

	if (showtext)
	{
		fz_text_page *text = NULL;

		fz_var(text);

		fz_try(ctx)
		{
			text = fz_new_text_page(ctx);
			dev = fz_new_text_device(ctx, sheet, text);
			if (showtext == TEXT_HTML)
				fz_disable_device_hints(dev, FZ_IGNORE_IMAGE);
			if (list)
				fz_run_display_list(list, dev, &fz_identity, &fz_infinite_rect, &cookie);
			else
				fz_run_page(doc, page, dev, &fz_identity, &cookie);
			fz_free_device(dev);
			dev = NULL;
			if (showtext == TEXT_XML)
			{
				fz_print_text_page_xml(ctx, out, text);
			}
			else if (showtext == TEXT_HTML)
			{
				fz_analyze_text(ctx, sheet, text);
				fz_print_text_page_html(ctx, out, text);
			}
			else if (showtext == TEXT_PLAIN)
			{
				fz_print_text_page(ctx, out, text);
				fz_printf(out, "\f\n");
			}
		}
		fz_always(ctx)
		{
			fz_free_device(dev);
			dev = NULL;
			fz_free_text_page(ctx, text);
		}
		fz_catch(ctx)
		{
			fz_drop_display_list(ctx, list);
			fz_free_page(doc, page);
			fz_rethrow(ctx);
		}
	}

	if (showmd5 || showtime || showfeatures)
		printf("page %s %d", filename, pagenum);

	if (showfeatures)
	{
		int iscolor;
		dev = fz_new_test_device(ctx, &iscolor, 0.02f);
		fz_try(ctx)
		{
			if (list)
				fz_run_display_list(list, dev, &fz_identity, &fz_infinite_rect, NULL);
			else
				fz_run_page(doc, page, dev, &fz_identity, &cookie);
		}
		fz_always(ctx)
		{
			fz_free_device(dev);
			dev = NULL;
		}
		fz_catch(ctx)
		{
			fz_rethrow(ctx);
		}
		printf(" %s", iscolor ? "color" : "grayscale");
	}

	if (pdfout)
	{
		fz_matrix ctm;
		fz_rect bounds, tbounds;
		pdf_page *newpage;

		fz_bound_page(doc, page, &bounds);
		fz_rotate(&ctm, rotation);
		tbounds = bounds;
		fz_transform_rect(&tbounds, &ctm);

		newpage = pdf_create_page(pdfout, bounds, 72, 0);

		fz_try(ctx)
		{
			dev = pdf_page_write(pdfout, newpage);
			if (list)
				fz_run_display_list(list, dev, &ctm, &tbounds, &cookie);
			else
				fz_run_page(doc, page, dev, &ctm, &cookie);
			fz_free_device(dev);
			dev = NULL;
		}
		fz_always(ctx)
		{
			fz_free_device(dev);
			dev = NULL;
		}
		fz_catch(ctx)
		{
			fz_drop_display_list(ctx, list);
			fz_free_page(doc, page);
			fz_rethrow(ctx);
		}
		pdf_insert_page(pdfout, newpage, INT_MAX);
		pdf_free_page(pdfout, newpage);
	}

	if (output && output_format == OUT_SVG)
	{
		float zoom;
		fz_matrix ctm;
		fz_rect bounds, tbounds;
		char buf[512];
		FILE *file;
		fz_output *out;

		if (!strcmp(output, "-"))
			file = stdout;
		else
		{
			sprintf(buf, output, pagenum);
			file = fopen(buf, "wb");
			if (file == NULL)
				fz_throw(ctx, FZ_ERROR_GENERIC, "cannot open file '%s': %s", buf, strerror(errno));
		}

		out = fz_new_output_with_file(ctx, file);

		fz_bound_page(doc, page, &bounds);
		zoom = resolution / 72;
		fz_pre_rotate(fz_scale(&ctm, zoom, zoom), rotation);
		tbounds = bounds;
		fz_transform_rect(&tbounds, &ctm);

		fz_try(ctx)
		{
			dev = fz_new_svg_device(ctx, out, tbounds.x1-tbounds.x0, tbounds.y1-tbounds.y0);
			if (list)
				fz_run_display_list(list, dev, &ctm, &tbounds, &cookie);
			else
				fz_run_page(doc, page, dev, &ctm, &cookie);
			fz_free_device(dev);
			dev = NULL;
		}
		fz_always(ctx)
		{
			fz_free_device(dev);
			dev = NULL;
			fz_close_output(out);
			if (file != stdout)
				fclose(file);
		}
		fz_catch(ctx)
		{
			fz_drop_display_list(ctx, list);
			fz_free_page(doc, page);
			fz_rethrow(ctx);
		}
	}
Пример #6
0
std::string PDFDocument::GetPageText(int page, int line_sep) {
  // 1. Init MuPDF structures.
  pdf_page* page_struct = GetPage(page);
  fz_stext_sheet* text_sheet = fz_new_stext_sheet(_fz_context);

  // 2. Render page.
#if MUPDF_VERSION >= 10010
  fz_stext_options stext_options = { 0 };
  // See #elif MUPDF_VERSION >= 10009 block below.
  fz_stext_page* text_page = fz_new_stext_page_from_page(
      _fz_context, &(page_struct->super), text_sheet, &stext_options);
#elif MUPDF_VERSION >= 10009
  // The function below is a wrapper around fz_run_page that uses a fresh
  // device. We can't use pdf_run_page to gather the text for us.
  // These notes are also left in here in case MuPDF's API changes again.
  fz_stext_page* text_page = fz_new_stext_page_from_page(
      _fz_context, &(page_struct->super), text_sheet);
#else
  fz_stext_page* text_page = fz_new_text_page(_fz_context);
  fz_device* dev = fz_new_stext_device(_fz_context, text_sheet, text_page);
  // I've no idea what fz_{begin,end}_page do, but without them pdf_run_page
  // segfaults :-/
  fz_begin_page(_fz_context, dev, &fz_infinite_rect, &fz_identity);
  pdf_run_page(
      _fz_context, _pdf_document, page_struct, dev, &fz_identity, nullptr);
  fz_end_page(_fz_context, dev);
#endif

  // 3. Build text.
  std::string r;
  for (fz_page_block* page_block = text_page->blocks;
       page_block < text_page->blocks + text_page->len;
       ++page_block) {
    assert(page_block != nullptr);
    if (page_block->type != FZ_PAGE_BLOCK_TEXT) {
      continue;
    }
    fz_stext_block* const text_block = page_block->u.text;
    assert(text_block != nullptr);
    for (fz_stext_line* text_line = text_block->lines;
         text_line < text_block->lines + text_block->len;
         ++text_line) {
      assert(text_line != nullptr);
      for (fz_stext_span* text_span = text_line->first_span;
           text_span != nullptr;
           text_span = text_span->next) {
        for (int i = 0; i < text_span->len; ++i) {
          const int c = text_span->text[i].c;
          // A single UTF-8 character cannot take more than 4 bytes, but let's
          // go for 8.
          char buffer[8];
          const int num_bytes = fz_runetochar(buffer, c);
          assert(num_bytes <= static_cast<int>(sizeof(buffer)));
          buffer[num_bytes] = '\0';
          r += buffer;
        }
      }
      if (!isspace(r.back())) {
        r += line_sep;
      }
    }
  }

  // 4. Clean up.
  fz_drop_stext_page(_fz_context, text_page);
  fz_drop_stext_sheet(_fz_context, text_sheet);

  return r;
}
Пример #7
0
static void drawpage(fz_context *ctx, fz_document *doc, int pagenum)
{
	fz_page *page;
	fz_display_list *list = NULL;
	fz_device *dev = NULL;
	int start;
	fz_cookie cookie = { 0 };
	int needshot = 0;

	fz_var(list);
	fz_var(dev);

	if (showtime)
	{
		start = gettime();
	}

	fz_try(ctx)
	{
		page = fz_load_page(doc, pagenum - 1);
	}
	fz_catch(ctx)
	{
		fz_throw(ctx, "cannot load page %d in file '%s'", pagenum, filename);
	}

	if (mujstest_file)
	{
		fz_interactive *inter = fz_interact(doc);
		fz_widget *widget = NULL;

		if (inter)
			widget = fz_first_widget(inter, page);

		if (widget)
		{
			fprintf(mujstest_file, "GOTO %d\n", pagenum);
			needshot = 1;
		}
		for (;widget; widget = fz_next_widget(inter, widget))
		{
			fz_rect rect = fz_widget_bbox(widget);
			int w = (rect.x1-rect.x0);
			int h = (rect.y1-rect.y0);
			int len;
			int type = fz_widget_get_type(widget);

			++mujstest_count;
			switch (type)
			{
			default:
				fprintf(mujstest_file, "%% UNKNOWN %0.2f %0.2f %0.2f %0.2f\n", rect.x0, rect.y0, rect.x1, rect.y1);
				break;
			case FZ_WIDGET_TYPE_PUSHBUTTON:
				fprintf(mujstest_file, "%% PUSHBUTTON %0.2f %0.2f %0.2f %0.2f\n", rect.x0, rect.y0, rect.x1, rect.y1);
				break;
			case FZ_WIDGET_TYPE_CHECKBOX:
				fprintf(mujstest_file, "%% CHECKBOX %0.2f %0.2f %0.2f %0.2f\n", rect.x0, rect.y0, rect.x1, rect.y1);
				break;
			case FZ_WIDGET_TYPE_RADIOBUTTON:
				fprintf(mujstest_file, "%% RADIOBUTTON %0.2f %0.2f %0.2f %0.2f\n", rect.x0, rect.y0, rect.x1, rect.y1);
				break;
			case FZ_WIDGET_TYPE_TEXT:
			{
				int maxlen = fz_text_widget_max_len(inter, widget);
				int texttype = fz_text_widget_content_type(inter, widget);

				/* If height is low, assume a single row, and base
				 * the width off that. */
				if (h < 10)
				{
					w = (w+h-1) / (h ? h : 1);
					h = 1;
				}
				/* Otherwise, if width is low, work off height */
				else if (w < 10)
				{
					h = (w+h-1) / (w ? w : 1);
					w = 1;
				}
				else
				{
					w = (w+9)/10;
					h = (h+9)/10;
				}
				len = w*h;
				if (len < 2)
					len = 2;
				if (len > maxlen)
					len = maxlen;
				fprintf(mujstest_file, "%% TEXT %0.2f %0.2f %0.2f %0.2f\n", rect.x0, rect.y0, rect.x1, rect.y1);
				switch (texttype)
				{
				default:
				case FZ_WIDGET_CONTENT_UNRESTRAINED:
					fprintf(mujstest_file, "TEXT %d ", mujstest_count);
					escape_string(mujstest_file, len-3, lorem);
					fprintf(mujstest_file, "\n");
					break;
				case FZ_WIDGET_CONTENT_NUMBER:
					fprintf(mujstest_file, "TEXT %d\n", mujstest_count);
					break;
				case FZ_WIDGET_CONTENT_SPECIAL:
					fprintf(mujstest_file, "TEXT %lld\n", 46702919800LL + mujstest_count);
					break;
				case FZ_WIDGET_CONTENT_DATE:
					fprintf(mujstest_file, "TEXT Jun %d 1979\n", 1 + ((13 + mujstest_count) % 30));
					break;
				case FZ_WIDGET_CONTENT_TIME:
					++mujstest_count;
					fprintf(mujstest_file, "TEXT %02d:%02d\n", ((mujstest_count/60) % 24), mujstest_count % 60);
					break;
				}
				break;
			}
			case FZ_WIDGET_TYPE_LISTBOX:
				fprintf(mujstest_file, "%% LISTBOX %0.2f %0.2f %0.2f %0.2f\n", rect.x0, rect.y0, rect.x1, rect.y1);
				break;
			case FZ_WIDGET_TYPE_COMBOBOX:
				fprintf(mujstest_file, "%% COMBOBOX %0.2f %0.2f %0.2f %0.2f\n", rect.x0, rect.y0, rect.x1, rect.y1);
				break;
			}
			fprintf(mujstest_file, "CLICK %0.2f %0.2f\n", (rect.x0+rect.x1)/2, (rect.y0+rect.y1)/2);
		}
	}

	if (uselist)
	{
		fz_try(ctx)
		{
			list = fz_new_display_list(ctx);
			dev = fz_new_list_device(ctx, list);
			fz_run_page(doc, page, dev, fz_identity, &cookie);
		}
		fz_always(ctx)
		{
			fz_free_device(dev);
			dev = NULL;
		}
		fz_catch(ctx)
		{
			fz_free_display_list(ctx, list);
			fz_free_page(doc, page);
			fz_throw(ctx, "cannot draw page %d in file '%s'", pagenum, filename);
		}
	}

	if (showxml)
	{
		fz_try(ctx)
		{
			dev = fz_new_trace_device(ctx);
			fz_printf(out, "<page number=\"%d\">\n", pagenum);
			if (list)
				fz_run_display_list(list, dev, fz_identity, fz_infinite_rect, &cookie);
			else
				fz_run_page(doc, page, dev, fz_identity, &cookie);
			fz_printf(out, "</page>\n");
		}
		fz_always(ctx)
		{
			fz_free_device(dev);
			dev = NULL;
		}
		fz_catch(ctx)
		{
			fz_free_display_list(ctx, list);
			fz_free_page(doc, page);
			fz_rethrow(ctx);
		}
	}

	if (showtext)
	{
		fz_text_page *text = NULL;

		fz_var(text);

		fz_try(ctx)
		{
			text = fz_new_text_page(ctx, fz_bound_page(doc, page));
			dev = fz_new_text_device(ctx, sheet, text);
			if (list)
				fz_run_display_list(list, dev, fz_identity, fz_infinite_rect, &cookie);
			else
				fz_run_page(doc, page, dev, fz_identity, &cookie);
			fz_free_device(dev);
			dev = NULL;
			if (showtext == TEXT_XML)
			{
				fz_print_text_page_xml(ctx, out, text);
			}
			else if (showtext == TEXT_HTML)
			{
				fz_print_text_page_html(ctx, out, text);
			}
			else if (showtext == TEXT_PLAIN)
			{
				fz_print_text_page(ctx, out, text);
				fz_printf(out, "\f\n");
			}
		}
		fz_always(ctx)
		{
			fz_free_device(dev);
			dev = NULL;
			fz_free_text_page(ctx, text);
		}
		fz_catch(ctx)
		{
			fz_free_display_list(ctx, list);
			fz_free_page(doc, page);
			fz_rethrow(ctx);
		}
	}

	if (showmd5 || showtime)
		printf("page %s %d", filename, pagenum);

#ifdef GDI_PLUS_BMP_RENDERER
	// hack: use -G0 to "enable GDI+" when saving as TGA
	if (output && (strstr(output, ".bmp") || strstr(output, ".tga") && !gamma_value))
		drawbmp(ctx, doc, page, list, pagenum);
	else
#endif
	if (output || showmd5 || showtime)
	{
		float zoom;
		fz_matrix ctm;
		fz_rect bounds, tbounds;
		fz_bbox ibounds;
		fz_pixmap *pix = NULL;
		int w, h;

		fz_var(pix);

		bounds = fz_bound_page(doc, page);
		zoom = resolution / 72;
		ctm = fz_scale(zoom, zoom);
		ctm = fz_concat(ctm, fz_rotate(rotation));
		tbounds = fz_transform_rect(ctm, bounds);
		ibounds = fz_round_rect(tbounds); /* convert to integers */

		/* Make local copies of our width/height */
		w = width;
		h = height;

		/* If a resolution is specified, check to see whether w/h are
		 * exceeded; if not, unset them. */
		if (res_specified)
		{
			int t;
			t = ibounds.x1 - ibounds.x0;
			if (w && t <= w)
				w = 0;
			t = ibounds.y1 - ibounds.y0;
			if (h && t <= h)
				h = 0;
		}

		/* Now w or h will be 0 unless they need to be enforced. */
		if (w || h)
		{
			float scalex = w / (tbounds.x1 - tbounds.x0);
			float scaley = h / (tbounds.y1 - tbounds.y0);

			if (fit)
			{
				if (w == 0)
					scalex = 1.0f;
				if (h == 0)
					scaley = 1.0f;
			}
			else
			{
				if (w == 0)
					scalex = scaley;
				if (h == 0)
					scaley = scalex;
			}
			if (!fit)
			{
				if (scalex > scaley)
					scalex = scaley;
				else
					scaley = scalex;
			}
			ctm = fz_concat(ctm, fz_scale(scalex, scaley));
			tbounds = fz_transform_rect(ctm, bounds);
		}
		ibounds = fz_round_rect(tbounds);

		/* TODO: banded rendering and multi-page ppm */

		fz_try(ctx)
		{
			pix = fz_new_pixmap_with_bbox(ctx, colorspace, ibounds);

			if (savealpha)
				fz_clear_pixmap(ctx, pix);
			else
				fz_clear_pixmap_with_value(ctx, pix, 255);

			dev = fz_new_draw_device(ctx, pix);
			if (list)
				fz_run_display_list(list, dev, ctm, tbounds, &cookie);
			else
				fz_run_page(doc, page, dev, ctm, &cookie);
			fz_free_device(dev);
			dev = NULL;

			if (invert)
				fz_invert_pixmap(ctx, pix);
			if (gamma_value != 1)
				fz_gamma_pixmap(ctx, pix, gamma_value);

			if (savealpha)
				fz_unmultiply_pixmap(ctx, pix);

			if (output)
			{
				char buf[512];
				sprintf(buf, output, pagenum);
				if (strstr(output, ".pgm") || strstr(output, ".ppm") || strstr(output, ".pnm"))
					fz_write_pnm(ctx, pix, buf);
				else if (strstr(output, ".pam"))
					fz_write_pam(ctx, pix, buf, savealpha);
				else if (strstr(output, ".png"))
					fz_write_png(ctx, pix, buf, savealpha);
				else if (strstr(output, ".pbm")) {
					fz_bitmap *bit = fz_halftone_pixmap(ctx, pix, NULL);
					fz_write_pbm(ctx, bit, buf);
					fz_drop_bitmap(ctx, bit);
				}
				/* SumatraPDF: support TGA as output format */
				else if (strstr(output, ".tga"))
					fz_write_tga(ctx, pix, buf, savealpha);
			}

			if (showmd5)
			{
				unsigned char digest[16];
				int i;

				fz_md5_pixmap(pix, digest);
				printf(" ");
				for (i = 0; i < 16; i++)
					printf("%02x", digest[i]);
			}
		}
		fz_always(ctx)
		{
			fz_free_device(dev);
			dev = NULL;
			fz_drop_pixmap(ctx, pix);
		}
		fz_catch(ctx)
		{
			fz_free_display_list(ctx, list);
			fz_free_page(doc, page);
			fz_rethrow(ctx);
		}
	}
Пример #8
0
DrPage * DrPDFExtractor::ExtractPage(unsigned int pageno)
{
    fz_page * page = fz_load_page(m_doc, pageno);
    if (page == NULL) {
        return NULL;
    }

    DrPage * dpage = new DrPage;
    dpage->SetPageNo(pageno);
    
    std::list<DrChar *> charlist;
    std::list<DrPhrase *> phraselist;
    std::list<DrLine *> linelist;
    std::list<DrZone *> &zonelist = dpage->m_zonelist;
    
    
    fz_matrix transform;
    fz_rotate(&transform,0);
    fz_pre_scale(&transform, 1.0f, 1.0f);
    
    fz_rect bounds;
    fz_bound_page(m_doc, page, &bounds);
    fz_transform_rect(&bounds, &transform);
    fz_irect bbox;
    fz_round_rect(&bbox, &bounds);
    fz_matrix ttransform = transform;
    fz_pixmap *pix = fz_new_pixmap_with_bbox(m_ctx, fz_device_rgb(m_ctx), &bbox);
    fz_clear_pixmap_with_value(m_ctx, pix, 0xff);
    
    fz_device * dev = fz_new_draw_device(m_ctx,pix);
    fz_run_page(m_doc, page, dev, &transform, NULL);
    fz_free_device(dev);

	fz_text_sheet * sheet = fz_new_text_sheet(m_ctx);
	fz_text_page * tpage = fz_new_text_page(m_ctx);
    fz_device * cdev = fz_new_text_device(m_ctx, sheet, tpage);
    fz_run_page(m_doc, page, cdev, &ttransform, NULL);
    
    
    ExtractChars(charlist,tpage);
    fz_free_device(cdev);
    
	//    DrThumbnail * thumb = new DrThumbnail(m_ctx,pix,pageno);
	//   dpage->m_thumbnail = thumb;

    
    DrTextGrouper::TextGroup(phraselist, charlist);
    
    
    std::list<DrPhrase *>::iterator itphrase = phraselist.begin();
    while (itphrase != phraselist.end()) {
        if ((*itphrase)->IsSpacePhrase()) {
            delete *itphrase;
            itphrase = phraselist.erase(itphrase);
        }
        else itphrase++;
    }
    
    DrTextGrouper::TextGroup(linelist, phraselist);
    DrTextGrouper::TextGroup(zonelist, linelist);
    dpage->CalculatePageBox();
        //    fz_free_text_sheet(m_ctx, tsheet);
        //    fz_free_text_page(m_ctx, tpage);
    fz_free_page(m_doc, page);
    return dpage;
}
Пример #9
0
static void drawpage(fz_context *ctx, fz_document *doc, int pagenum)
{
	fz_page *page;
	fz_link *links;
	fz_display_list *list = NULL;
	fz_device *dev = NULL;
	int start;

	fz_var(list);
	fz_var(dev);

	if (showtime)
	{
		start = gettime();
	}

	fz_try(ctx)
	{
		page = fz_load_page(doc, pagenum - 1);
	}
	fz_catch(ctx)
	{
		fz_throw(ctx, "cannot load page %d in file '%s'", pagenum, filename);
	}

	fz_try(ctx)
	{
		links = fz_load_links(doc, page);
	}
	fz_catch(ctx)
	{
		fz_throw(ctx, "cannot load links for page %d in file '%s'", pagenum, filename);
	}

	if (uselist)
	{
		fz_try(ctx)
		{
			list = fz_new_display_list(ctx);
			dev = fz_new_list_device(ctx, list);
			fz_run_page(doc, page, dev, fz_identity, NULL);
		}
		fz_catch(ctx)
		{
			fz_free_device(dev);
			fz_free_display_list(ctx, list);
			fz_free_page(doc, page);
			fz_throw(ctx, "cannot draw page %d in file '%s'", pagenum, filename);
		}
		fz_free_device(dev);
		dev = NULL;
	}

	if (showjson)
	{
		fz_text_page *text = NULL;
		fz_var(text);
		fz_try(ctx)
		{
			text = fz_new_text_page(ctx, fz_bound_page(doc, page));
			dev = fz_new_text_device(ctx, sheet, text);
			if (list)
				fz_run_display_list(list, dev, fz_identity, fz_infinite_bbox, NULL);
			else
				fz_run_page(doc, page, dev, fz_identity, NULL);
			fz_free_device(dev);
			dev = NULL;
			fz_print_text_page_json(ctx, stdout, text, links);
			printf("\f\n");
		}
		fz_catch(ctx)
		{
			fz_free_device(dev);
			fz_free_text_page(ctx, text);
			fz_free_display_list(ctx, list);
			fz_free_page(doc, page);
			fz_rethrow(ctx);
		}
		fz_free_text_page(ctx, text);
	}



	if (showmd5 || showtime)
		printf("page %s %d", filename, pagenum);

	if (output || showmd5 || showtime)
	{
		float zoom;
		fz_matrix ctm;
		fz_rect bounds, bounds2;
		fz_bbox bbox;
		fz_pixmap *pix = NULL;
		int w, h;

		fz_var(pix);

		bounds = fz_bound_page(doc, page);
		zoom = resolution / 72;
		ctm = fz_scale(zoom, zoom);
		ctm = fz_concat(ctm, fz_rotate(rotation));
		bounds2 = fz_transform_rect(ctm, bounds);
		bbox = fz_round_rect(bounds2);
		/* Make local copies of our width/height */
		w = width;
		h = height;
		/* If a resolution is specified, check to see whether w/h are
		 * exceeded; if not, unset them. */
		if (res_specified)
		{
			int t;
			t = bbox.x1 - bbox.x0;
			if (w && t <= w)
				w = 0;
			t = bbox.y1 - bbox.y0;
			if (h && t <= h)
				h = 0;
		}
		/* Now w or h will be 0 unless then need to be enforced. */
		if (w || h)
		{
			float scalex = w/(bounds2.x1-bounds2.x0);
			float scaley = h/(bounds2.y1-bounds2.y0);

			if (fit)
			{
				if (w == 0)
					scalex = 1.0f;
				if (h == 0)
					scaley = 1.0f;
			}
			else
			{
				if (w == 0)
					scalex = scaley;
				if (h == 0)
					scaley = scalex;
			}
			if (!fit)
			{
				if (scalex > scaley)
					scalex = scaley;
				else
					scaley = scalex;
			}
			ctm = fz_concat(ctm, fz_scale(scalex, scaley));
			bounds2 = fz_transform_rect(ctm, bounds);
		}
		bbox = fz_round_rect(bounds2);

		/* TODO: banded rendering and multi-page ppm */

		fz_try(ctx)
		{
			pix = fz_new_pixmap_with_bbox(ctx, colorspace, bbox);

			if (savealpha)
				fz_clear_pixmap(ctx, pix);
			else
				fz_clear_pixmap_with_value(ctx, pix, 255);

			dev = fz_new_draw_device(ctx, pix);
			if (list)
				fz_run_display_list(list, dev, ctm, bbox, NULL);
			else
				fz_run_page(doc, page, dev, ctm, NULL);
			fz_free_device(dev);
			dev = NULL;

			if (invert)
				fz_invert_pixmap(ctx, pix);
			if (gamma_value != 1)
				fz_gamma_pixmap(ctx, pix, gamma_value);

			if (savealpha)
				fz_unmultiply_pixmap(ctx, pix);

			if (output)
			{
				char buf[512];
				sprintf(buf, output, pagenum);
				if (strstr(output, ".pgm") || strstr(output, ".ppm") || strstr(output, ".pnm"))
					fz_write_pnm(ctx, pix, buf);
				else if (strstr(output, ".pam"))
					fz_write_pam(ctx, pix, buf, savealpha);
				else if (strstr(output, ".png"))
					fz_write_png(ctx, pix, buf, savealpha);
				else if (strstr(output, ".pbm")) {
					fz_bitmap *bit = fz_halftone_pixmap(ctx, pix, NULL);
					fz_write_pbm(ctx, bit, buf);
					fz_drop_bitmap(ctx, bit);
				}
			}

			if (showmd5)
			{
				unsigned char digest[16];
				int i;

				fz_md5_pixmap(pix, digest);
				printf(" ");
				for (i = 0; i < 16; i++)
					printf("%02x", digest[i]);
			}

			fz_drop_pixmap(ctx, pix);
		}
		fz_catch(ctx)
		{
			fz_free_device(dev);
			fz_drop_pixmap(ctx, pix);
			fz_free_display_list(ctx, list);
			fz_free_page(doc, page);
			fz_rethrow(ctx);
		}
	}
Пример #10
0
std::string PDFDocument::GetPageText(int page, int line_sep) {
  // 1. Init MuPDF structures.
  pdf_page* page_struct = GetPage(page);

#if MUPDF_VERSION < 10012
  fz_stext_sheet* text_sheet = fz_new_stext_sheet(_fz_context);
#endif

  // 2. Render page.
#if MUPDF_VERSION >= 10012
  fz_stext_options stext_options = {0};
  // See #elif MUPDF_VERSION >= 10009 block below.
  fz_stext_page* text_page = fz_new_stext_page_from_page(
      _fz_context, &(page_struct->super), &stext_options);
#elif MUPDF_VERSION >= 10010
  fz_stext_options stext_options = {0};
  // See #elif MUPDF_VERSION >= 10009 block below.
  fz_stext_page* text_page = fz_new_stext_page_from_page(
      _fz_context, &(page_struct->super), text_sheet, &stext_options);
#elif MUPDF_VERSION >= 10009
  // The function below is a wrapper around fz_run_page that uses a fresh
  // device. We can't use pdf_run_page to gather the text for us.
  // These notes are also left in here in case MuPDF's API changes again.
  fz_stext_page* text_page = fz_new_stext_page_from_page(
      _fz_context, &(page_struct->super), text_sheet);
#else
  fz_stext_page* text_page = fz_new_text_page(_fz_context);
  fz_device* dev = fz_new_stext_device(_fz_context, text_sheet, text_page);
  // I've no idea what fz_{begin,end}_page do, but without them pdf_run_page
  // segfaults :-/
  fz_begin_page(_fz_context, dev, &fz_infinite_rect, &fz_identity);
  pdf_run_page(
      _fz_context, _pdf_document, page_struct, dev, &fz_identity, nullptr);
  fz_end_page(_fz_context, dev);
#endif

  // 3. Build text.
  std::string r;
#if MUPDF_VERSION >= 10012
  for (fz_stext_block* text_block = text_page->first_block;
       text_block != nullptr; text_block = text_block->next) {
    if (text_block->type != FZ_STEXT_BLOCK_TEXT) {
      continue;
    }
    for (fz_stext_line* text_line = text_block->u.t.first_line;
         text_line != nullptr; text_line = text_line->next) {
      for (fz_stext_char* text_char = text_line->first_char;
           text_char != nullptr; text_char = text_char->next) {
        {
          const int c = text_char->c;
#else
  for (fz_page_block* page_block = text_page->blocks;
       page_block < text_page->blocks + text_page->len; ++page_block) {
    assert(page_block != nullptr);
    if (page_block->type != FZ_PAGE_BLOCK_TEXT) {
      continue;
    }
    fz_stext_block* const text_block = page_block->u.text;
    assert(text_block != nullptr);
    for (fz_stext_line* text_line = text_block->lines;
         text_line < text_block->lines + text_block->len; ++text_line) {
      assert(text_line != nullptr);
      for (fz_stext_span* text_span = text_line->first_span;
           text_span != nullptr; text_span = text_span->next) {
        for (int i = 0; i < text_span->len; ++i) {
          const int c = text_span->text[i].c;
#endif
          // A single UTF-8 character cannot take more than 4 bytes, but let's
          // go for 8.
          char buffer[8];
          const int num_bytes = fz_runetochar(buffer, c);
          assert(num_bytes <= static_cast<int>(sizeof(buffer)));
          buffer[num_bytes] = '\0';
          r += buffer;
        }
      }
      if (!isspace(r.back())) {
        r += line_sep;
      }
    }
  }

  // 4. Clean up.
  fz_drop_stext_page(_fz_context, text_page);
#if MUPDF_VERSION < 10012
  fz_drop_stext_sheet(_fz_context, text_sheet);
#endif

  return r;
}

PDFDocument::PDFOutlineItem::~PDFOutlineItem() {}

PDFDocument::PDFOutlineItem::PDFOutlineItem(fz_outline* src) {
  if (src == nullptr) {
    _dest_page = -1;
  } else {
    _title = src->title;
#if MUPDF_VERSION >= 10010
    _dest_page = src->page;
#else
    _dest_page = src->dest.ld.gotor.page;
#endif
  }
}

int PDFDocument::PDFOutlineItem::GetDestPage() const { return _dest_page; }

PDFDocument::PDFOutlineItem* PDFDocument::PDFOutlineItem::Build(
    fz_context* ctx, fz_outline* src) {
  PDFOutlineItem* root = nullptr;
  std::vector<std::unique_ptr<OutlineItem>> items;
  BuildRecursive(src, &items);
  fz_drop_outline(ctx, src);
  if (items.empty()) {
    return nullptr;
  } else if (items.size() == 1) {
    root = dynamic_cast<PDFOutlineItem*>(items[0].release());
  } else {
    root = new PDFOutlineItem(nullptr);
    root->_title = DEFAULT_ROOT_OUTLINE_ITEM_TITLE;
    root->_children.swap(items);
  }
  return root;
}

void PDFDocument::PDFOutlineItem::BuildRecursive(
    fz_outline* src,
    std::vector<std::unique_ptr<Document::OutlineItem>>* output) {
  assert(output != nullptr);
  for (fz_outline* i = src; i != nullptr; i = i->next) {
    PDFOutlineItem* item = new PDFOutlineItem(i);
    if (i->down != nullptr) {
      BuildRecursive(i->down, &(item->_children));
    }
    output->push_back(std::unique_ptr<Document::OutlineItem>(item));
  }
}

PDFDocument::PDFPageCache::PDFPageCache(int cache_size, PDFDocument* parent)
    : Cache<int, pdf_page*>(cache_size), _parent(parent) {}

PDFDocument::PDFPageCache::~PDFPageCache() { Clear(); }

pdf_page* PDFDocument::PDFPageCache::Load(const int& page) {
  std::unique_lock<std::mutex> lock(_mutex);
  return pdf_load_page(_parent->_fz_context, _parent->_pdf_document, page);
}

void PDFDocument::PDFPageCache::Discard(
    const int& page, pdf_page* const& page_struct) {
  std::unique_lock<std::mutex> lock(_mutex);
  pdf_drop_page(_parent->_fz_context, _parent->_pdf_document, page_struct);
}
Пример #11
0
JNIEXPORT jobjectArray JNICALL
Java_com_artifex_mupdf_MuPDFCore_searchPage(JNIEnv * env, jobject thiz, jstring jtext)
{
	jclass         rectClass;
	jmethodID      ctor;
	jobjectArray   arr;
	jobject        rect;
	fz_text_sheet *sheet = NULL;
	fz_text_page  *text = NULL;
	fz_device     *dev  = NULL;
	float          zoom;
	fz_matrix      ctm;
	int            pos;
	int            len;
	int            i, n;
	int            hit_count = 0;
	const char    *str;
	page_cache    *pc = &pages[current];

	rectClass = (*env)->FindClass(env, "android/graphics/RectF");
	if (rectClass == NULL) return NULL;
	ctor = (*env)->GetMethodID(env, rectClass, "<init>", "(FFFF)V");
	if (ctor == NULL) return NULL;
	str = (*env)->GetStringUTFChars(env, jtext, NULL);
	if (str == NULL) return NULL;

	fz_var(sheet);
	fz_var(text);
	fz_var(dev);

	fz_try(ctx)
	{
		fz_rect rect;

		if (hit_bbox == NULL)
			hit_bbox = fz_malloc_array(ctx, MAX_SEARCH_HITS, sizeof(*hit_bbox));

		zoom = resolution / 72;
		ctm = fz_scale(zoom, zoom);
		rect = fz_transform_rect(ctm, pc->media_box);
		sheet = fz_new_text_sheet(ctx);
		text = fz_new_text_page(ctx, rect);
		dev  = fz_new_text_device(ctx, sheet, text);
		fz_run_page(doc, pc->page, dev, ctm, NULL);
		fz_free_device(dev);
		dev = NULL;

		len = textlen(text);
		for (pos = 0; pos < len; pos++)
		{
			fz_bbox rr = fz_empty_bbox;
			n = match(text, str, pos);
			for (i = 0; i < n; i++)
				rr = fz_union_bbox(rr, bboxcharat(text, pos + i));

			if (!fz_is_empty_bbox(rr) && hit_count < MAX_SEARCH_HITS)
				hit_bbox[hit_count++] = rr;
		}
	}
	fz_always(ctx)
	{
		fz_free_text_page(ctx, text);
		fz_free_text_sheet(ctx, sheet);
		fz_free_device(dev);
	}
	fz_catch(ctx)
	{
		jclass cls;
		(*env)->ReleaseStringUTFChars(env, jtext, str);
		cls = (*env)->FindClass(env, "java/lang/OutOfMemoryError");
		if (cls != NULL)
			(*env)->ThrowNew(env, cls, "Out of memory in MuPDFCore_searchPage");
		(*env)->DeleteLocalRef(env, cls);

		return NULL;
	}

	(*env)->ReleaseStringUTFChars(env, jtext, str);

	arr = (*env)->NewObjectArray(env,
					hit_count,
					rectClass,
					NULL);
	if (arr == NULL) return NULL;

	for (i = 0; i < hit_count; i++) {
		rect = (*env)->NewObject(env, rectClass, ctor,
				(float) (hit_bbox[i].x0),
				(float) (hit_bbox[i].y0),
				(float) (hit_bbox[i].x1),
				(float) (hit_bbox[i].y1));
		if (rect == NULL)
			return NULL;
		(*env)->SetObjectArrayElement(env, arr, i, rect);
		(*env)->DeleteLocalRef(env, rect);
	}

	return arr;
}