std::shared_ptr<std::vector<std::shared_ptr<RectFloat>>> MuPDFDoc::SearchText(const char* searchText) { fz_text_sheet *sheet = nullptr; fz_text_page *text = nullptr; fz_device *dev = nullptr; PageCache *pageCache = &m_pages[m_currentPage]; fz_var(sheet); fz_var(text); fz_var(dev); std::shared_ptr<std::vector<std::shared_ptr<RectFloat>>> hints(new std::vector<std::shared_ptr<RectFloat>>()); fz_try(m_context) { int hitCount = 0; fz_matrix ctm = CalcConvertMatrix(); fz_rect mbrect = fz_transform_rect(ctm, pageCache->mediaBox); sheet = fz_new_text_sheet(m_context); text = fz_new_text_page(m_context, mbrect); dev = fz_new_text_device(m_context, sheet, text); fz_run_page(m_document, pageCache->page, dev, ctm, nullptr); fz_free_device(dev); dev = nullptr; int len = TextLen(text); for (int pos = 0; pos < len; pos++) { fz_bbox rr = fz_empty_bbox; int n = Match(text, searchText, pos); for (int i = 0; i < n; i++) rr = fz_union_bbox(rr, BBoxCharAt(text, pos + i)); if (!fz_is_empty_bbox(rr) && hitCount < MAX_SEARCH_HITS) { hints->push_back(std::shared_ptr<RectFloat>(new RectFloat(rr.x0, rr.y0, rr.x1, rr.y1))); if (++hitCount >= MAX_SEARCH_HITS) break; } } } fz_always(m_context) { fz_free_text_page(m_context, text); fz_free_text_sheet(m_context, sheet); fz_free_device(dev); } fz_catch(m_context) { return std::shared_ptr<std::vector<std::shared_ptr<RectFloat>>>(nullptr); } return hints; }
fz_buffer * fz_new_buffer_from_page(fz_context *ctx, fz_page *page, const fz_rect *sel, int crlf) { fz_text_sheet *sheet; fz_text_page *text; fz_buffer *buf; sheet = fz_new_text_sheet(ctx); fz_try(ctx) { text = fz_new_text_page_from_page(ctx, page, sheet); buf = fz_new_buffer_from_text_page(ctx, text, sel, crlf); } fz_always(ctx) fz_drop_text_sheet(ctx, sheet); fz_catch(ctx) fz_rethrow(ctx); fz_drop_text_page(ctx, text); return buf; }
int fz_search_page(fz_context *ctx, fz_page *page, const char *needle, fz_rect *hit_bbox, int hit_max) { fz_text_sheet *sheet; fz_text_page *text; int count; sheet = fz_new_text_sheet(ctx); fz_try(ctx) { text = fz_new_text_page_from_page(ctx, page, sheet); count = fz_search_text_page(ctx, text, needle, hit_bbox, hit_max); } fz_always(ctx) fz_drop_text_sheet(ctx, sheet); fz_catch(ctx) fz_rethrow(ctx); fz_drop_text_page(ctx, text); return count; }
DrPage * DrPDFExtractor::ExtractPage(unsigned int pageno) { fz_page * page = fz_load_page(m_doc, pageno); if (page == NULL) { return NULL; } DrPage * dpage = new DrPage; dpage->SetPageNo(pageno); std::list<DrChar *> charlist; std::list<DrPhrase *> phraselist; std::list<DrLine *> linelist; std::list<DrZone *> &zonelist = dpage->m_zonelist; fz_matrix transform; fz_rotate(&transform,0); fz_pre_scale(&transform, 1.0f, 1.0f); fz_rect bounds; fz_bound_page(m_doc, page, &bounds); fz_transform_rect(&bounds, &transform); fz_irect bbox; fz_round_rect(&bbox, &bounds); fz_matrix ttransform = transform; fz_pixmap *pix = fz_new_pixmap_with_bbox(m_ctx, fz_device_rgb(m_ctx), &bbox); fz_clear_pixmap_with_value(m_ctx, pix, 0xff); fz_device * dev = fz_new_draw_device(m_ctx,pix); fz_run_page(m_doc, page, dev, &transform, NULL); fz_free_device(dev); fz_text_sheet * sheet = fz_new_text_sheet(m_ctx); fz_text_page * tpage = fz_new_text_page(m_ctx); fz_device * cdev = fz_new_text_device(m_ctx, sheet, tpage); fz_run_page(m_doc, page, cdev, &ttransform, NULL); ExtractChars(charlist,tpage); fz_free_device(cdev); // DrThumbnail * thumb = new DrThumbnail(m_ctx,pix,pageno); // dpage->m_thumbnail = thumb; DrTextGrouper::TextGroup(phraselist, charlist); std::list<DrPhrase *>::iterator itphrase = phraselist.begin(); while (itphrase != phraselist.end()) { if ((*itphrase)->IsSpacePhrase()) { delete *itphrase; itphrase = phraselist.erase(itphrase); } else itphrase++; } DrTextGrouper::TextGroup(linelist, phraselist); DrTextGrouper::TextGroup(zonelist, linelist); dpage->CalculatePageBox(); // fz_free_text_sheet(m_ctx, tsheet); // fz_free_text_page(m_ctx, tpage); fz_free_page(m_doc, page); return dpage; }
JNIEXPORT jobjectArray JNICALL Java_com_artifex_mupdf_MuPDFCore_searchPage(JNIEnv * env, jobject thiz, jstring jtext) { jclass rectClass; jmethodID ctor; jobjectArray arr; jobject rect; fz_text_sheet *sheet = NULL; fz_text_page *text = NULL; fz_device *dev = NULL; float zoom; fz_matrix ctm; int pos; int len; int i, n; int hit_count = 0; const char *str; page_cache *pc = &pages[current]; rectClass = (*env)->FindClass(env, "android/graphics/RectF"); if (rectClass == NULL) return NULL; ctor = (*env)->GetMethodID(env, rectClass, "<init>", "(FFFF)V"); if (ctor == NULL) return NULL; str = (*env)->GetStringUTFChars(env, jtext, NULL); if (str == NULL) return NULL; fz_var(sheet); fz_var(text); fz_var(dev); fz_try(ctx) { fz_rect rect; if (hit_bbox == NULL) hit_bbox = fz_malloc_array(ctx, MAX_SEARCH_HITS, sizeof(*hit_bbox)); zoom = resolution / 72; ctm = fz_scale(zoom, zoom); rect = fz_transform_rect(ctm, pc->media_box); sheet = fz_new_text_sheet(ctx); text = fz_new_text_page(ctx, rect); dev = fz_new_text_device(ctx, sheet, text); fz_run_page(doc, pc->page, dev, ctm, NULL); fz_free_device(dev); dev = NULL; len = textlen(text); for (pos = 0; pos < len; pos++) { fz_bbox rr = fz_empty_bbox; n = match(text, str, pos); for (i = 0; i < n; i++) rr = fz_union_bbox(rr, bboxcharat(text, pos + i)); if (!fz_is_empty_bbox(rr) && hit_count < MAX_SEARCH_HITS) hit_bbox[hit_count++] = rr; } } fz_always(ctx) { fz_free_text_page(ctx, text); fz_free_text_sheet(ctx, sheet); fz_free_device(dev); } fz_catch(ctx) { jclass cls; (*env)->ReleaseStringUTFChars(env, jtext, str); cls = (*env)->FindClass(env, "java/lang/OutOfMemoryError"); if (cls != NULL) (*env)->ThrowNew(env, cls, "Out of memory in MuPDFCore_searchPage"); (*env)->DeleteLocalRef(env, cls); return NULL; } (*env)->ReleaseStringUTFChars(env, jtext, str); arr = (*env)->NewObjectArray(env, hit_count, rectClass, NULL); if (arr == NULL) return NULL; for (i = 0; i < hit_count; i++) { rect = (*env)->NewObject(env, rectClass, ctor, (float) (hit_bbox[i].x0), (float) (hit_bbox[i].y0), (float) (hit_bbox[i].x1), (float) (hit_bbox[i].y1)); if (rect == NULL) return NULL; (*env)->SetObjectArrayElement(env, arr, i, rect); (*env)->DeleteLocalRef(env, rect); } return arr; }