Example #1
0
std::shared_ptr<std::vector<std::shared_ptr<RectFloat>>> MuPDFDoc::SearchText(const char* searchText)
{
	fz_text_sheet *sheet = nullptr;
	fz_text_page *text = nullptr;
	fz_device *dev  = nullptr;
	PageCache *pageCache = &m_pages[m_currentPage];
	fz_var(sheet);
	fz_var(text);
	fz_var(dev);
	std::shared_ptr<std::vector<std::shared_ptr<RectFloat>>> hints(new std::vector<std::shared_ptr<RectFloat>>());
	fz_try(m_context)
	{
		int hitCount = 0;
		fz_matrix ctm = CalcConvertMatrix();
		fz_rect mbrect = fz_transform_rect(ctm, pageCache->mediaBox);
		sheet = fz_new_text_sheet(m_context);
		text = fz_new_text_page(m_context, mbrect);
		dev = fz_new_text_device(m_context, sheet, text);
		fz_run_page(m_document, pageCache->page, dev, ctm, nullptr);
		fz_free_device(dev);
		dev = nullptr;
		int len = TextLen(text);
		for (int pos = 0; pos < len; pos++)
		{
			fz_bbox rr = fz_empty_bbox;
			int n = Match(text, searchText, pos);
			for (int i = 0; i < n; i++)
				rr = fz_union_bbox(rr, BBoxCharAt(text, pos + i));

			if (!fz_is_empty_bbox(rr) && hitCount < MAX_SEARCH_HITS)
			{
				hints->push_back(std::shared_ptr<RectFloat>(new RectFloat(rr.x0, rr.y0, rr.x1, rr.y1)));
				if (++hitCount >= MAX_SEARCH_HITS)
					break;
			}
		}
	}
	fz_always(m_context)
	{
		fz_free_text_page(m_context, text);
		fz_free_text_sheet(m_context, sheet);
		fz_free_device(dev);
	}
	fz_catch(m_context)
	{
		return std::shared_ptr<std::vector<std::shared_ptr<RectFloat>>>(nullptr);
	}
	return hints;
}
Example #2
0
fz_buffer *
fz_new_buffer_from_page(fz_context *ctx, fz_page *page, const fz_rect *sel, int crlf)
{
	fz_text_sheet *sheet;
	fz_text_page *text;
	fz_buffer *buf;

	sheet = fz_new_text_sheet(ctx);
	fz_try(ctx)
	{
		text = fz_new_text_page_from_page(ctx, page, sheet);
		buf = fz_new_buffer_from_text_page(ctx, text, sel, crlf);
	}
	fz_always(ctx)
		fz_drop_text_sheet(ctx, sheet);
	fz_catch(ctx)
		fz_rethrow(ctx);
	fz_drop_text_page(ctx, text);
	return buf;
}
Example #3
0
int
fz_search_page(fz_context *ctx, fz_page *page, const char *needle, fz_rect *hit_bbox, int hit_max)
{
	fz_text_sheet *sheet;
	fz_text_page *text;
	int count;

	sheet = fz_new_text_sheet(ctx);
	fz_try(ctx)
	{
		text = fz_new_text_page_from_page(ctx, page, sheet);
		count = fz_search_text_page(ctx, text, needle, hit_bbox, hit_max);
	}
	fz_always(ctx)
		fz_drop_text_sheet(ctx, sheet);
	fz_catch(ctx)
		fz_rethrow(ctx);
	fz_drop_text_page(ctx, text);
	return count;
}
DrPage * DrPDFExtractor::ExtractPage(unsigned int pageno)
{
    fz_page * page = fz_load_page(m_doc, pageno);
    if (page == NULL) {
        return NULL;
    }

    DrPage * dpage = new DrPage;
    dpage->SetPageNo(pageno);
    
    std::list<DrChar *> charlist;
    std::list<DrPhrase *> phraselist;
    std::list<DrLine *> linelist;
    std::list<DrZone *> &zonelist = dpage->m_zonelist;
    
    
    fz_matrix transform;
    fz_rotate(&transform,0);
    fz_pre_scale(&transform, 1.0f, 1.0f);
    
    fz_rect bounds;
    fz_bound_page(m_doc, page, &bounds);
    fz_transform_rect(&bounds, &transform);
    fz_irect bbox;
    fz_round_rect(&bbox, &bounds);
    fz_matrix ttransform = transform;
    fz_pixmap *pix = fz_new_pixmap_with_bbox(m_ctx, fz_device_rgb(m_ctx), &bbox);
    fz_clear_pixmap_with_value(m_ctx, pix, 0xff);
    
    fz_device * dev = fz_new_draw_device(m_ctx,pix);
    fz_run_page(m_doc, page, dev, &transform, NULL);
    fz_free_device(dev);

	fz_text_sheet * sheet = fz_new_text_sheet(m_ctx);
	fz_text_page * tpage = fz_new_text_page(m_ctx);
    fz_device * cdev = fz_new_text_device(m_ctx, sheet, tpage);
    fz_run_page(m_doc, page, cdev, &ttransform, NULL);
    
    
    ExtractChars(charlist,tpage);
    fz_free_device(cdev);
    
	//    DrThumbnail * thumb = new DrThumbnail(m_ctx,pix,pageno);
	//   dpage->m_thumbnail = thumb;

    
    DrTextGrouper::TextGroup(phraselist, charlist);
    
    
    std::list<DrPhrase *>::iterator itphrase = phraselist.begin();
    while (itphrase != phraselist.end()) {
        if ((*itphrase)->IsSpacePhrase()) {
            delete *itphrase;
            itphrase = phraselist.erase(itphrase);
        }
        else itphrase++;
    }
    
    DrTextGrouper::TextGroup(linelist, phraselist);
    DrTextGrouper::TextGroup(zonelist, linelist);
    dpage->CalculatePageBox();
        //    fz_free_text_sheet(m_ctx, tsheet);
        //    fz_free_text_page(m_ctx, tpage);
    fz_free_page(m_doc, page);
    return dpage;
}
JNIEXPORT jobjectArray JNICALL
Java_com_artifex_mupdf_MuPDFCore_searchPage(JNIEnv * env, jobject thiz, jstring jtext)
{
	jclass         rectClass;
	jmethodID      ctor;
	jobjectArray   arr;
	jobject        rect;
	fz_text_sheet *sheet = NULL;
	fz_text_page  *text = NULL;
	fz_device     *dev  = NULL;
	float          zoom;
	fz_matrix      ctm;
	int            pos;
	int            len;
	int            i, n;
	int            hit_count = 0;
	const char    *str;
	page_cache    *pc = &pages[current];

	rectClass = (*env)->FindClass(env, "android/graphics/RectF");
	if (rectClass == NULL) return NULL;
	ctor = (*env)->GetMethodID(env, rectClass, "<init>", "(FFFF)V");
	if (ctor == NULL) return NULL;
	str = (*env)->GetStringUTFChars(env, jtext, NULL);
	if (str == NULL) return NULL;

	fz_var(sheet);
	fz_var(text);
	fz_var(dev);

	fz_try(ctx)
	{
		fz_rect rect;

		if (hit_bbox == NULL)
			hit_bbox = fz_malloc_array(ctx, MAX_SEARCH_HITS, sizeof(*hit_bbox));

		zoom = resolution / 72;
		ctm = fz_scale(zoom, zoom);
		rect = fz_transform_rect(ctm, pc->media_box);
		sheet = fz_new_text_sheet(ctx);
		text = fz_new_text_page(ctx, rect);
		dev  = fz_new_text_device(ctx, sheet, text);
		fz_run_page(doc, pc->page, dev, ctm, NULL);
		fz_free_device(dev);
		dev = NULL;

		len = textlen(text);
		for (pos = 0; pos < len; pos++)
		{
			fz_bbox rr = fz_empty_bbox;
			n = match(text, str, pos);
			for (i = 0; i < n; i++)
				rr = fz_union_bbox(rr, bboxcharat(text, pos + i));

			if (!fz_is_empty_bbox(rr) && hit_count < MAX_SEARCH_HITS)
				hit_bbox[hit_count++] = rr;
		}
	}
	fz_always(ctx)
	{
		fz_free_text_page(ctx, text);
		fz_free_text_sheet(ctx, sheet);
		fz_free_device(dev);
	}
	fz_catch(ctx)
	{
		jclass cls;
		(*env)->ReleaseStringUTFChars(env, jtext, str);
		cls = (*env)->FindClass(env, "java/lang/OutOfMemoryError");
		if (cls != NULL)
			(*env)->ThrowNew(env, cls, "Out of memory in MuPDFCore_searchPage");
		(*env)->DeleteLocalRef(env, cls);

		return NULL;
	}

	(*env)->ReleaseStringUTFChars(env, jtext, str);

	arr = (*env)->NewObjectArray(env,
					hit_count,
					rectClass,
					NULL);
	if (arr == NULL) return NULL;

	for (i = 0; i < hit_count; i++) {
		rect = (*env)->NewObject(env, rectClass, ctor,
				(float) (hit_bbox[i].x0),
				(float) (hit_bbox[i].y0),
				(float) (hit_bbox[i].x1),
				(float) (hit_bbox[i].y1));
		if (rect == NULL)
			return NULL;
		(*env)->SetObjectArrayElement(env, arr, i, rect);
		(*env)->DeleteLocalRef(env, rect);
	}

	return arr;
}