extern "C" jobject Java_org_ebookdroid_droids_djvu_codec_DjvuPage_getPageText(JNIEnv *jenv, jclass cls, jlong docHandle, jint pageNumber, jlong contextHandle, jstring pattern) { miniexp_t r = miniexp_nil; while ((r = ddjvu_document_get_pagetext((ddjvu_document_t*) docHandle, pageNumber, "word")) == miniexp_dummy) { waitAndHandleMessages(jenv, contextHandle); } if (r == miniexp_nil || !miniexp_consp(r)) { // DEBUG("getPageLinks(%d): no text on page", pageNumber); return NULL; } // DEBUG("getPageLinks(%d): text on page found", pageNumber); SearchHelper h(jenv); if (!h.valid) { DEBUG("getPageLinks(%d): JNI helper initialization failed", pageNumber); return NULL; } jobject arrayList = h.arr.create(); djvu_get_djvu_words(h, arrayList, r, pattern); return arrayList; }
QString Model::DjVuPage::text(const QRectF& rect) const { QMutexLocker mutexLocker(&m_parent->m_mutex); miniexp_t pageTextExp; while(true) { pageTextExp = ddjvu_document_get_pagetext(m_parent->m_document, m_index, "word"); if(pageTextExp == miniexp_dummy) { clearMessageQueue(m_parent->m_context, true); } else { break; } } const QString text = loadText(pageTextExp, QTransform::fromScale(m_resolution / 72.0, m_resolution / 72.0).mapRect(rect).toRect(), m_size.height()); ddjvu_miniexp_release(m_parent->m_document, pageTextExp); return text.trimmed(); }
void guFolderInspector::extractIsbnsFromDjvu(QString fileName, QList<QString> &ISBNList) { ctx = ddjvu_context_create("lgUploader"); //ddjvu_document_create_by_filename doc = ddjvu_document_create_by_filename_utf8(ctx, fileName.toUtf8(), 1); while (! ddjvu_document_decoding_done(doc)); int numOfPages = ddjvu_document_get_pagenum(doc); //количество страниц - строго! //QByteArray b = QFile::encodeName(fileName); //doc = ddjvu_document_create_by_filename(ctx, b, 1); if(!doc) qDebug() << "error create doc"; if(!ctx) qDebug() << "error create context"; const char *lvl = "page"; //начало перебора по страницам isbnMethods find; int numOfSearchPages = 15; //qDebug() << "num of pages " << numOfPages; if(numOfPages < numOfSearchPages) { numOfSearchPages = numOfPages; } for (int pageCount = 0 ; pageCount < numOfSearchPages ; pageCount++) { miniexp_t r = miniexp_nil; while ((r = ddjvu_document_get_pagetext(doc, pageCount ,lvl))==miniexp_dummy); r = miniexp_nth(5, r); //if ( r == miniexp_nil ) // qDebug() << "r = null"; const char *pageDumpArr = miniexp_to_str( r ); QString pageDump( QString::fromUtf8( pageDumpArr )); find.findIsbns(pageDump, ISBNList); //qDebug() << fileName << " content: \n" << pageDump ; } //конец перебора по страницам if (doc) ddjvu_document_release(doc); //освобождаем контекст документа if (ctx) ddjvu_context_release(ctx); //освобождаем контескт приложени¤ (возможно стоит оставить) }
JNIEXPORT jstring JNICALL Java_universe_constellation_orion_viewer_djvu_DjvuDocument_getText(JNIEnv *env, jobject thiz, int pageNumber, int startX, int startY, int width, int height) { LOGI("==================Start Text Extraction=============="); miniexp_t pagetext; while ((pagetext=ddjvu_document_get_pagetext(doc,pageNumber,0))==miniexp_dummy) { //handle_ddjvu_messages(ctx, TRUE); } if (miniexp_nil == pagetext) { return NULL; } // ddjvu_status_t status; ddjvu_pageinfo_t info; while ((status = ddjvu_document_get_pageinfo(doc, pageNumber, &info)) < DDJVU_JOB_OK) { //nothing } LOGI("Extraction rectangle=[%d,%d,%d,%d]", startX, startY, width, height); Arraylist values = arraylist_create(); int w = info.width; int h = info.height; fz_bbox target = {startX, h - startY - height, startX + width, h - startY}; LOGI("Extraction irectangle=[%d,%d,%d,%d]", target.x0, target.y0, target.x1, target.y1); extractText(pagetext, values, &target); arraylist_add(values, 0); LOGI("Data: %s", arraylist_getData(values)); jstring result = (*env)->NewStringUTF(env, arraylist_getData(values)); arraylist_free(values); return result; }
JNIEXPORT jboolean JNICALL Java_universe_constellation_orion_viewer_djvu_DjvuDocument_getPageText(JNIEnv *env, jobject thiz, jint pageNumber, jobject stringBuilder, jobject positionList) { LOGI("Start Page Text Extraction %i", pageNumber); miniexp_t pagetext; while ((pagetext=ddjvu_document_get_pagetext(doc, pageNumber, "word"))==miniexp_dummy) { //handle_ddjvu_messages(ctx, TRUE); } if (pagetext == NULL) { LOGI("no text on page %i", pageNumber); return 0; } jclass listClass; jmethodID addToList; listClass = (*env)->FindClass(env, "java/util/ArrayList"); if (listClass == NULL) return 0; addToList = (*env)->GetMethodID(env, listClass, "add", "(Ljava/lang/Object;)Z"); if (addToList == NULL) return 0; jclass rectFClass; jmethodID ctor; rectFClass = (*env)->FindClass(env, "android/graphics/RectF"); if (rectFClass == NULL) return 0; ctor = (*env)->GetMethodID(env, rectFClass, "<init>", "(FFFF)V"); if (ctor == NULL) return 0; int state = -1; ddjvu_pageinfo_t dinfo; ddjvu_document_get_pageinfo(doc, pageNumber, &dinfo); return miniexp_get_text(env, pagetext, stringBuilder, positionList, &state, rectFClass, ctor, addToList, dinfo.height); }
void dopage(int pageno) { miniexp_t r = miniexp_nil; const char *lvl = (detail) ? detail : "page"; while ((r = ddjvu_document_get_pagetext(doc,pageno-1,lvl))==miniexp_dummy) handle(TRUE); if (detail) { miniexp_io_t io; miniexp_io_init(&io); io.p_print7bits = &escape; miniexp_pprint_r(&io, r, 72); } else if ((r = miniexp_nth(5, r)) && miniexp_stringp(r)) { const char *s = miniexp_to_str(r); if (! escape) fputs(s, stdout); else { unsigned char c; while ((c = *(unsigned char*)s++)) { bool esc = false; if (c == '\\' || c >= 0x7f) esc = true; /* non-ascii */ if (c < 0x20 && !strchr("\013\035\037\012", c)) esc = true; /* non-printable other than separators */ if (esc) printf("\\%03o", c); else putc(c, stdout); } } fputs("\n\f", stdout); } }
QList< QRectF > DjVuPage::search(const QString& text, bool matchCase, bool wholeWords) const { LOCK_PAGE miniexp_t pageTextExp = miniexp_nil; { LOCK_PAGE_GLOBAL while(true) { pageTextExp = ddjvu_document_get_pagetext(m_parent->m_document, m_index, "word"); if(pageTextExp == miniexp_dummy) { clearMessageQueue(m_parent->m_context, true); } else { break; } } } const QTransform transform = QTransform::fromScale(72.0 / m_resolution, 72.0 / m_resolution); const QStringList words = text.split(QRegExp(QLatin1String("\\W+")), QString::SkipEmptyParts); const QList< QRectF > results = findText(pageTextExp, m_size, transform, words, matchCase, wholeWords); { LOCK_PAGE_GLOBAL ddjvu_miniexp_release(m_parent->m_document, pageTextExp); } return results; }
QString DjVuPage::text(const QRectF& rect) const { LOCK_PAGE miniexp_t pageTextExp = miniexp_nil; { LOCK_PAGE_GLOBAL while(true) { pageTextExp = ddjvu_document_get_pagetext(m_parent->m_document, m_index, "word"); if(pageTextExp == miniexp_dummy) { clearMessageQueue(m_parent->m_context, true); } else { break; } } } const QTransform transform = QTransform::fromScale(m_resolution / 72.0, m_resolution / 72.0); const QString text = loadText(pageTextExp, m_size, transform.mapRect(rect)).simplified(); { LOCK_PAGE_GLOBAL ddjvu_miniexp_release(m_parent->m_document, pageTextExp); } return text.simplified(); }
QList< QRectF > Model::DjVuPage::search(const QString& text, bool matchCase) const { QMutexLocker mutexLocker(&m_parent->m_mutex); miniexp_t pageTextExp; while(true) { pageTextExp = ddjvu_document_get_pagetext(m_parent->m_document, m_index, "word"); if(pageTextExp == miniexp_dummy) { clearMessageQueue(m_parent->m_context, true); } else { break; } } QList< miniexp_t > words; QList< QRectF > results; words.append(pageTextExp); QRectF rect; int index = 0; while(!words.isEmpty()) { miniexp_t textExp = words.takeFirst(); const int textLength = miniexp_length(textExp); if(textLength >= 6 && miniexp_symbolp(miniexp_nth(0, textExp))) { if(qstrncmp(miniexp_to_name(miniexp_nth(0, textExp)), "word", 4) == 0) { const QString word = QString::fromUtf8(miniexp_to_str(miniexp_nth(5, textExp))); if(text.indexOf(word, index, matchCase ? Qt::CaseSensitive : Qt::CaseInsensitive) == index) { const int xmin = miniexp_to_int(miniexp_nth(1, textExp)); const int ymin = miniexp_to_int(miniexp_nth(2, textExp)); const int xmax = miniexp_to_int(miniexp_nth(3, textExp)); const int ymax = miniexp_to_int(miniexp_nth(4, textExp)); rect = rect.united(QRectF(xmin, m_size.height() - ymax, xmax - xmin, ymax - ymin)); index += word.length(); while(text.length() > index && text.at(index).isSpace()) { ++index; } if(text.length() == index) { results.append(rect); rect = QRectF(); index = 0; } } else { rect = QRectF(); index = 0; } } else { for(int textN = 5; textN < textLength; ++textN) { words.append(miniexp_nth(textN, textExp)); } } } } ddjvu_miniexp_release(m_parent->m_document, pageTextExp); QTransform transform = QTransform::fromScale(72.0 / m_resolution, 72.0 / m_resolution); for(int index = 0; index < results.size(); ++index) { results[index] = transform.mapRect(results[index]); } return results; }
/* * Return a table like following: * { * -- a line entry * 1 = { * 1 = {word="This", x0=377, y0=4857, x1=2427, y1=5089}, * 2 = {word="is", x0=377, y0=4857, x1=2427, y1=5089}, * 3 = {word="Word", x0=377, y0=4857, x1=2427, y1=5089}, * 4 = {word="List", x0=377, y0=4857, x1=2427, y1=5089}, * x0 = 377, y0 = 4857, x1 = 2427, y1 = 5089, * }, * * -- an other line entry * 2 = { * 1 = {word="This", x0=377, y0=4857, x1=2427, y1=5089}, * 2 = {word="is", x0=377, y0=4857, x1=2427, y1=5089}, * x0 = 377, y0 = 4857, x1 = 2427, y1 = 5089, * }, * } */ static int getPageText(lua_State *L) { DjvuDocument *doc = (DjvuDocument*) luaL_checkudata(L, 1, "djvudocument"); int pageno = luaL_checkint(L, 2); /* get page height for coordinates transform */ ddjvu_pageinfo_t info; ddjvu_status_t r; while ((r=ddjvu_document_get_pageinfo( doc->doc_ref, pageno-1, &info))<DDJVU_JOB_OK) { handle(L, doc->context, TRUE); } if (r>=DDJVU_JOB_FAILED) return luaL_error(L, "cannot get page #%d information", pageno); /* start retrieving page text */ miniexp_t sexp, se_line, se_word; int i = 1, j = 1, counter_l = 1, counter_w=1, nr_line = 0, nr_word = 0; const char *word = NULL; while ((sexp = ddjvu_document_get_pagetext(doc->doc_ref, pageno-1, "word")) == miniexp_dummy) { handle(L, doc->context, True); } /* throuw page info and obtain lines info, after this, sexp's entries * are lines. */ sexp = miniexp_cdr(sexp); /* get number of lines in a page */ nr_line = miniexp_length(sexp); /* table that contains all the lines */ lua_newtable(L); counter_l = 1; for(i = 1; i <= nr_line; i++) { /* retrive one line entry */ se_line = miniexp_nth(i, sexp); nr_word = miniexp_length(se_line); if (nr_word == 0) { continue; } /* subtable that contains words in a line */ lua_pushnumber(L, counter_l); lua_newtable(L); counter_l++; /* set line position */ lua_pushstring(L, "x0"); lua_pushnumber(L, miniexp_to_int(miniexp_nth(1, se_line))); lua_settable(L, -3); lua_pushstring(L, "y1"); lua_pushnumber(L, info.height - miniexp_to_int(miniexp_nth(2, se_line))); lua_settable(L, -3); lua_pushstring(L, "x1"); lua_pushnumber(L, miniexp_to_int(miniexp_nth(3, se_line))); lua_settable(L, -3); lua_pushstring(L, "y0"); lua_pushnumber(L, info.height - miniexp_to_int(miniexp_nth(4, se_line))); lua_settable(L, -3); /* now loop through each word in the line */ counter_w = 1; for(j = 1; j <= nr_word; j++) { /* retrive one word entry */ se_word = miniexp_nth(j, se_line); /* check to see whether the entry is empty */ word = miniexp_to_str(miniexp_nth(5, se_word)); if (!word) { continue; } /* create table that contains info for a word */ lua_pushnumber(L, counter_w); lua_newtable(L); counter_w++; /* set word info */ lua_pushstring(L, "x0"); lua_pushnumber(L, miniexp_to_int(miniexp_nth(1, se_word))); lua_settable(L, -3); lua_pushstring(L, "y1"); lua_pushnumber(L, info.height - miniexp_to_int(miniexp_nth(2, se_word))); lua_settable(L, -3); lua_pushstring(L, "x1"); lua_pushnumber(L, miniexp_to_int(miniexp_nth(3, se_word))); lua_settable(L, -3); lua_pushstring(L, "y0"); lua_pushnumber(L, info.height - miniexp_to_int(miniexp_nth(4, se_word))); lua_settable(L, -3); lua_pushstring(L, "word"); lua_pushstring(L, word); lua_settable(L, -3); /* set word entry to line subtable */ lua_settable(L, -3); } /* end of for (j) */ /* set line entry to page text table */ lua_settable(L, -3); } /* end of for (i) */ return 1; }