extern "C" jobject Java_org_ebookdroid_droids_djvu_codec_DjvuPage_getPageText(JNIEnv *jenv, jclass cls,
                                                                                 jlong docHandle, jint pageNumber, jlong contextHandle, jstring pattern)
{
    miniexp_t r = miniexp_nil;

    while ((r = ddjvu_document_get_pagetext((ddjvu_document_t*) docHandle, pageNumber, "word")) == miniexp_dummy)
    {
        waitAndHandleMessages(jenv, contextHandle);
    }

    if (r == miniexp_nil || !miniexp_consp(r))
    {
        // DEBUG("getPageLinks(%d): no text on page", pageNumber);
        return NULL;
    }

    // DEBUG("getPageLinks(%d): text on page found", pageNumber);

    SearchHelper h(jenv);

    if (!h.valid)
    {
        DEBUG("getPageLinks(%d): JNI helper initialization failed", pageNumber);
        return NULL;
    }

    jobject arrayList = h.arr.create();

    djvu_get_djvu_words(h, arrayList, r, pattern);

    return arrayList;
}
Ejemplo n.º 2
0
QString Model::DjVuPage::text(const QRectF& rect) const
{
    QMutexLocker mutexLocker(&m_parent->m_mutex);

    miniexp_t pageTextExp;

    while(true)
    {
        pageTextExp = ddjvu_document_get_pagetext(m_parent->m_document, m_index, "word");

        if(pageTextExp == miniexp_dummy)
        {
            clearMessageQueue(m_parent->m_context, true);
        }
        else
        {
            break;
        }
    }

    const QString text = loadText(pageTextExp, QTransform::fromScale(m_resolution / 72.0, m_resolution / 72.0).mapRect(rect).toRect(), m_size.height());

    ddjvu_miniexp_release(m_parent->m_document, pageTextExp);

    return text.trimmed();
}
void guFolderInspector::extractIsbnsFromDjvu(QString fileName, QList<QString> &ISBNList)
{
    ctx = ddjvu_context_create("lgUploader"); //ddjvu_document_create_by_filename
    doc = ddjvu_document_create_by_filename_utf8(ctx, fileName.toUtf8(), 1);
    while (! ddjvu_document_decoding_done(doc));
    int  numOfPages = ddjvu_document_get_pagenum(doc); //количество страниц - строго!
    //QByteArray b = QFile::encodeName(fileName);
    //doc = ddjvu_document_create_by_filename(ctx, b, 1);
    if(!doc)
        qDebug() << "error create doc";
    if(!ctx)
        qDebug() << "error create context";


    const char *lvl = "page";
    //начало перебора по страницам
    isbnMethods find;
    int numOfSearchPages = 15;
    //qDebug() << "num of pages " << numOfPages;
    if(numOfPages < numOfSearchPages)
    {
        numOfSearchPages = numOfPages;
    }
    for (int pageCount = 0 ; pageCount < numOfSearchPages ; pageCount++)
    {

        miniexp_t r = miniexp_nil;
        while ((r = ddjvu_document_get_pagetext(doc, pageCount ,lvl))==miniexp_dummy);
        r = miniexp_nth(5, r);
        //if ( r == miniexp_nil )
        //    qDebug() << "r = null";
        const char *pageDumpArr = miniexp_to_str( r );
        QString pageDump( QString::fromUtf8( pageDumpArr ));

        find.findIsbns(pageDump, ISBNList);

        //qDebug() << fileName << " content: \n" <<  pageDump ;
    }
    //конец перебора по страницам
    if (doc)
        ddjvu_document_release(doc); //освобождаем контекст документа
    if (ctx)
        ddjvu_context_release(ctx); //освобождаем контескт приложени¤ (возможно стоит оставить)

}
Ejemplo n.º 4
0
JNIEXPORT jstring JNICALL
Java_universe_constellation_orion_viewer_djvu_DjvuDocument_getText(JNIEnv *env, jobject thiz, int pageNumber,
		int startX, int startY, int width, int height)
{

    LOGI("==================Start Text Extraction==============");

    miniexp_t pagetext;
    while ((pagetext=ddjvu_document_get_pagetext(doc,pageNumber,0))==miniexp_dummy) {
      //handle_ddjvu_messages(ctx, TRUE);
    }

    if (miniexp_nil == pagetext) {
        return NULL;
    }
//
    ddjvu_status_t status;
    ddjvu_pageinfo_t info;
    while ((status = ddjvu_document_get_pageinfo(doc, pageNumber, &info)) < DDJVU_JOB_OK) {
        //nothing
    }

	LOGI("Extraction rectangle=[%d,%d,%d,%d]", startX, startY, width, height);

	Arraylist values = arraylist_create();

    int w = info.width;
    int h = info.height;
    fz_bbox target = {startX, h - startY - height, startX + width, h - startY};
    LOGI("Extraction irectangle=[%d,%d,%d,%d]", target.x0, target.y0, target.x1, target.y1);

	extractText(pagetext, values, &target);

	arraylist_add(values, 0);

    LOGI("Data: %s", arraylist_getData(values));
    jstring result = (*env)->NewStringUTF(env, arraylist_getData(values));
    arraylist_free(values);

    return result;
}
Ejemplo n.º 5
0
JNIEXPORT jboolean JNICALL Java_universe_constellation_orion_viewer_djvu_DjvuDocument_getPageText(JNIEnv *env, jobject thiz,
                                                                                  jint pageNumber, jobject stringBuilder, jobject positionList)
{
    LOGI("Start Page Text Extraction %i", pageNumber);

    miniexp_t pagetext;
    while ((pagetext=ddjvu_document_get_pagetext(doc, pageNumber, "word"))==miniexp_dummy) {
      //handle_ddjvu_messages(ctx, TRUE);
    }

    if (pagetext == NULL) {
        LOGI("no text on page %i", pageNumber);
        return 0;
    }

    jclass listClass;
    jmethodID addToList;

    listClass = (*env)->FindClass(env, "java/util/ArrayList");
    if (listClass == NULL) return 0;

    addToList = (*env)->GetMethodID(env, listClass, "add", "(Ljava/lang/Object;)Z");
    if (addToList == NULL) return 0;

    jclass rectFClass;
    jmethodID ctor;

    rectFClass = (*env)->FindClass(env, "android/graphics/RectF");
    if (rectFClass == NULL) return 0;

    ctor = (*env)->GetMethodID(env, rectFClass, "<init>", "(FFFF)V");
    if (ctor == NULL) return 0;

    int state = -1;
    ddjvu_pageinfo_t dinfo;
    ddjvu_document_get_pageinfo(doc, pageNumber, &dinfo);

    return miniexp_get_text(env, pagetext, stringBuilder, positionList, &state, rectFClass, ctor, addToList, dinfo.height);
}
Ejemplo n.º 6
0
void
dopage(int pageno)
{
  miniexp_t r = miniexp_nil;
  const char *lvl = (detail) ? detail : "page";
  while ((r = ddjvu_document_get_pagetext(doc,pageno-1,lvl))==miniexp_dummy)
    handle(TRUE);
  if (detail)
    {
      miniexp_io_t io;
      miniexp_io_init(&io);
      io.p_print7bits = &escape;
      miniexp_pprint_r(&io, r, 72);
    }
  else if ((r = miniexp_nth(5, r)) && miniexp_stringp(r))
    {
      const char *s = miniexp_to_str(r); 
      if (! escape)
        fputs(s, stdout);
      else
        {
          unsigned char c;
          while ((c = *(unsigned char*)s++))
            {
              bool esc = false;
              if (c == '\\' || c >= 0x7f)
                esc = true; /* non-ascii */
              if (c < 0x20 && !strchr("\013\035\037\012", c))
                esc = true; /* non-printable other than separators */
              if (esc)
                printf("\\%03o", c);
              else
                putc(c, stdout);
            }
        }
      fputs("\n\f", stdout);
    }
}
Ejemplo n.º 7
0
QList< QRectF > DjVuPage::search(const QString& text, bool matchCase, bool wholeWords) const
{
    LOCK_PAGE

    miniexp_t pageTextExp = miniexp_nil;

    {
        LOCK_PAGE_GLOBAL

        while(true)
        {
            pageTextExp = ddjvu_document_get_pagetext(m_parent->m_document, m_index, "word");

            if(pageTextExp == miniexp_dummy)
            {
                clearMessageQueue(m_parent->m_context, true);
            }
            else
            {
                break;
            }
        }
    }

    const QTransform transform = QTransform::fromScale(72.0 / m_resolution, 72.0 / m_resolution);
    const QStringList words = text.split(QRegExp(QLatin1String("\\W+")), QString::SkipEmptyParts);

    const QList< QRectF > results = findText(pageTextExp, m_size, transform, words, matchCase, wholeWords);

    {
        LOCK_PAGE_GLOBAL

        ddjvu_miniexp_release(m_parent->m_document, pageTextExp);
    }

    return results;
}
Ejemplo n.º 8
0
QString DjVuPage::text(const QRectF& rect) const
{
    LOCK_PAGE

    miniexp_t pageTextExp = miniexp_nil;

    {
        LOCK_PAGE_GLOBAL

        while(true)
        {
            pageTextExp = ddjvu_document_get_pagetext(m_parent->m_document, m_index, "word");

            if(pageTextExp == miniexp_dummy)
            {
                clearMessageQueue(m_parent->m_context, true);
            }
            else
            {
                break;
            }
        }
    }

    const QTransform transform = QTransform::fromScale(m_resolution / 72.0, m_resolution / 72.0);

    const QString text = loadText(pageTextExp, m_size, transform.mapRect(rect)).simplified();

    {
        LOCK_PAGE_GLOBAL

        ddjvu_miniexp_release(m_parent->m_document, pageTextExp);
    }

    return text.simplified();
}
Ejemplo n.º 9
0
QList< QRectF > Model::DjVuPage::search(const QString& text, bool matchCase) const
{
    QMutexLocker mutexLocker(&m_parent->m_mutex);

    miniexp_t pageTextExp;

    while(true)
    {
        pageTextExp = ddjvu_document_get_pagetext(m_parent->m_document, m_index, "word");

        if(pageTextExp == miniexp_dummy)
        {
            clearMessageQueue(m_parent->m_context, true);
        }
        else
        {
            break;
        }
    }

    QList< miniexp_t > words;
    QList< QRectF > results;

    words.append(pageTextExp);

    QRectF rect;
    int index = 0;

    while(!words.isEmpty())
    {
        miniexp_t textExp = words.takeFirst();

        const int textLength = miniexp_length(textExp);

        if(textLength >= 6 && miniexp_symbolp(miniexp_nth(0, textExp)))
        {
            if(qstrncmp(miniexp_to_name(miniexp_nth(0, textExp)), "word", 4) == 0)
            {
                const QString word = QString::fromUtf8(miniexp_to_str(miniexp_nth(5, textExp)));

                if(text.indexOf(word, index, matchCase ? Qt::CaseSensitive : Qt::CaseInsensitive) == index)
                {
                    const int xmin = miniexp_to_int(miniexp_nth(1, textExp));
                    const int ymin = miniexp_to_int(miniexp_nth(2, textExp));
                    const int xmax = miniexp_to_int(miniexp_nth(3, textExp));
                    const int ymax = miniexp_to_int(miniexp_nth(4, textExp));

                    rect = rect.united(QRectF(xmin, m_size.height() - ymax, xmax - xmin, ymax - ymin));

                    index += word.length();

                    while(text.length() > index && text.at(index).isSpace())
                    {
                        ++index;
                    }

                    if(text.length() == index)
                    {
                        results.append(rect);

                        rect = QRectF();
                        index = 0;
                    }
                }
                else
                {
                    rect = QRectF();
                    index = 0;
                }
            }
            else
            {
                for(int textN = 5; textN < textLength; ++textN)
                {
                    words.append(miniexp_nth(textN, textExp));
                }
            }
        }
    }

    ddjvu_miniexp_release(m_parent->m_document, pageTextExp);

    QTransform transform = QTransform::fromScale(72.0 / m_resolution, 72.0 / m_resolution);

    for(int index = 0; index < results.size(); ++index)
    {
        results[index] = transform.mapRect(results[index]);
    }

    return results;
}
Ejemplo n.º 10
0
/*
 * Return a table like following:
 * {
 *    -- a line entry
 *    1 = {
 *       1 = {word="This", x0=377, y0=4857, x1=2427, y1=5089},
 *       2 = {word="is", x0=377, y0=4857, x1=2427, y1=5089},
 *       3 = {word="Word", x0=377, y0=4857, x1=2427, y1=5089},
 *       4 = {word="List", x0=377, y0=4857, x1=2427, y1=5089},
 *       x0 = 377, y0 = 4857, x1 = 2427, y1 = 5089,
 *    },
 *
 *    -- an other line entry
 *    2 = {
 *       1 = {word="This", x0=377, y0=4857, x1=2427, y1=5089},
 *       2 = {word="is", x0=377, y0=4857, x1=2427, y1=5089},
 *       x0 = 377, y0 = 4857, x1 = 2427, y1 = 5089,
 *    },
 * }
 */
static int getPageText(lua_State *L) {
	DjvuDocument *doc = (DjvuDocument*) luaL_checkudata(L, 1, "djvudocument");
	int pageno = luaL_checkint(L, 2);

	/* get page height for coordinates transform */
	ddjvu_pageinfo_t info;
	ddjvu_status_t r;
	while ((r=ddjvu_document_get_pageinfo(
				   doc->doc_ref, pageno-1, &info))<DDJVU_JOB_OK) {
		handle(L, doc->context, TRUE);
	}
	if (r>=DDJVU_JOB_FAILED)
		return luaL_error(L, "cannot get page #%d information", pageno);

	/* start retrieving page text */
	miniexp_t sexp, se_line, se_word;
	int i = 1, j = 1, counter_l = 1, counter_w=1,
		nr_line = 0, nr_word = 0;
	const char *word = NULL;

	while ((sexp = ddjvu_document_get_pagetext(doc->doc_ref, pageno-1, "word"))
				== miniexp_dummy) {
		handle(L, doc->context, True);
	}

	/* throuw page info and obtain lines info, after this, sexp's entries
	 * are lines. */
	sexp = miniexp_cdr(sexp);
	/* get number of lines in a page */
	nr_line = miniexp_length(sexp);
	/* table that contains all the lines */
	lua_newtable(L);

	counter_l = 1;
	for(i = 1; i <= nr_line; i++) {
		/* retrive one line entry */
		se_line = miniexp_nth(i, sexp);
		nr_word = miniexp_length(se_line);
		if (nr_word == 0) {
			continue;
		}

		/* subtable that contains words in a line */
		lua_pushnumber(L, counter_l);
		lua_newtable(L);
		counter_l++;

		/* set line position */
		lua_pushstring(L, "x0");
		lua_pushnumber(L, miniexp_to_int(miniexp_nth(1, se_line)));
		lua_settable(L, -3);

		lua_pushstring(L, "y1");
		lua_pushnumber(L,
				info.height - miniexp_to_int(miniexp_nth(2, se_line)));
		lua_settable(L, -3);

		lua_pushstring(L, "x1");
		lua_pushnumber(L, miniexp_to_int(miniexp_nth(3, se_line)));
		lua_settable(L, -3);

		lua_pushstring(L, "y0");
		lua_pushnumber(L,
				info.height - miniexp_to_int(miniexp_nth(4, se_line)));
		lua_settable(L, -3);

		/* now loop through each word in the line */
		counter_w = 1;
		for(j = 1; j <= nr_word; j++) {
			/* retrive one word entry */
			se_word = miniexp_nth(j, se_line);
			/* check to see whether the entry is empty */
			word = miniexp_to_str(miniexp_nth(5, se_word));
			if (!word) {
				continue;
			}

			/* create table that contains info for a word */
			lua_pushnumber(L, counter_w);
			lua_newtable(L);
			counter_w++;

			/* set word info */
			lua_pushstring(L, "x0");
			lua_pushnumber(L, miniexp_to_int(miniexp_nth(1, se_word)));
			lua_settable(L, -3);

			lua_pushstring(L, "y1");
			lua_pushnumber(L,
					info.height - miniexp_to_int(miniexp_nth(2, se_word)));
			lua_settable(L, -3);

			lua_pushstring(L, "x1");
			lua_pushnumber(L, miniexp_to_int(miniexp_nth(3, se_word)));
			lua_settable(L, -3);

			lua_pushstring(L, "y0");
			lua_pushnumber(L,
					info.height - miniexp_to_int(miniexp_nth(4, se_word)));
			lua_settable(L, -3);

			lua_pushstring(L, "word");
			lua_pushstring(L, word);
			lua_settable(L, -3);

			/* set word entry to line subtable */
			lua_settable(L, -3);
		} /* end of for (j) */

		/* set line entry to page text table */
		lua_settable(L, -3);
	} /* end of for (i) */

	return 1;
}