//sumatrapdf code int extractText(miniexp_t item, Arraylist list, fz_bbox * target) { miniexp_t type = miniexp_car(item); if (!miniexp_symbolp(type)) return 0; item = miniexp_cdr(item); if (!miniexp_numberp(miniexp_car(item))) return 0; int x0 = miniexp_to_int(miniexp_car(item)); item = miniexp_cdr(item); if (!miniexp_numberp(miniexp_car(item))) return 0; int y0 = miniexp_to_int(miniexp_car(item)); item = miniexp_cdr(item); if (!miniexp_numberp(miniexp_car(item))) return 0; int x1 = miniexp_to_int(miniexp_car(item)); item = miniexp_cdr(item); if (!miniexp_numberp(miniexp_car(item))) return 0; int y1 = miniexp_to_int(miniexp_car(item)); item = miniexp_cdr(item); //RectI rect = RectI::FromXY(x0, y0, x1, y1); fz_bbox rect = {x0 , y0 , x1 , y1}; miniexp_t str = miniexp_car(item); if (miniexp_stringp(str) && !miniexp_cdr(item)) { fz_bbox inters = fz_intersect_bbox(rect, *target); //LOGI("Start text extraction: rectangle=[%d,%d,%d,%d] %s", rect.x0, rect.y0, rect.x1, rect.y1, content); if (!fz_is_empty_bbox(inters)) { const char *content = miniexp_to_str(str); while (*content) { arraylist_add(list, *content++); } // if (value) { // size_t len = str::Len(value); // // TODO: split the rectangle into individual parts per glyph // for (size_t i = 0; i < len; i++) // coords.Append(RectI(rect.x, rect.y, rect.dx, rect.dy)); // extracted.AppendAndFree(value); // } if (miniexp_symbol("word") == type) { arraylist_add(list, ' '); //coords.Append(RectI(rect.x + rect.dx, rect.y, 2, rect.dy)); } else if (miniexp_symbol("char") != type) { arraylist_add(list, '\n'); // extracted.Append(lineSep); // for (size_t i = 0; i < str::Len(lineSep); i++) // coords.Append(RectI()); } } item = miniexp_cdr(item); } while (miniexp_consp(str)) { extractText(str, list, target); item = miniexp_cdr(item); str = miniexp_car(item); } return !item; }
static gboolean number_from_miniexp(miniexp_t sexp, int *number) { if (miniexp_numberp (sexp)) { *number = miniexp_to_int (sexp); return TRUE; } else { return FALSE; } }
static int miniexp_get_int(miniexp_t * r, int * x) { if (! miniexp_numberp(miniexp_car(*r))) return 0; *x = miniexp_to_int(miniexp_car(*r)); *r = miniexp_cdr(*r); return 1; }
static bool exp_to_int(miniexp_t expression, int* integer) { if (integer == NULL) { return false; } if (miniexp_numberp(expression)) { *integer = miniexp_to_int(expression); return true; } return false; }
void djvu_get_djvu_words(SearchHelper& h, jobject list, miniexp_t expr, jstring pattern) { int coords[4]; if (!miniexp_consp(expr)) { return; } miniexp_t head = miniexp_car(expr); expr = miniexp_cdr(expr); if (!miniexp_symbolp(head)) { return; } int i; for (i = 0; i < 4 && miniexp_consp(expr); i++) { head = miniexp_car(expr); expr = miniexp_cdr(expr); if (!miniexp_numberp(head)) { return; } coords[i] = miniexp_to_int(head); } while (miniexp_consp(expr)) { head = miniexp_car(expr); if (miniexp_stringp(head)) { const char* text = miniexp_to_str(head); // DEBUG("%d, %d, %d, %d: %s", coords[0], coords[1], coords[2], coords[3], text); bool add = !pattern; jstring txt = h.str.toString(text); if (pattern) { jstring ltxt = h.str.toLowerCase(txt); add = h.str.indexOf(ltxt, pattern) >= 0; h.str.release(ltxt); } if (add) { // add to list jobject ptb = h.box.create(); h.box.setRect(ptb, coords); h.box.setText(ptb, txt); h.arr.add(list, ptb); } else { h.str.release(txt); } } else if (miniexp_consp(head)) { djvu_get_djvu_words(h, list, head, pattern); } expr = miniexp_cdr(expr); } }