virtual void Visit(const WCHAR *name, const WCHAR *url, int level) { if (!url || IsExternalUrl(url)) return; ScopedMem<WCHAR> plainUrl(str::ToPlainUrl(url)); if (added.FindI(plainUrl) != -1) return; ScopedMem<char> urlUtf8(str::conv::ToUtf8(plainUrl)); size_t pageHtmlLen; ScopedMem<unsigned char> pageHtml(doc->GetData(urlUtf8, &pageHtmlLen)); if (!pageHtml) return; html.AppendFmt("<pagebreak page_path=\"%s\" page_marker />", urlUtf8); html.AppendAndFree(doc->ToUtf8(pageHtml, ExtractHttpCharset((const char *)pageHtml.Get(), pageHtmlLen))); added.Append(plainUrl.StealData()); }
bool DjVuEngineImpl::ExtractPageText(miniexp_t item, const WCHAR *lineSep, str::Str<WCHAR>& extracted, Vec<RectI>& coords) { miniexp_t type = miniexp_car(item); if (!miniexp_symbolp(type)) return false; item = miniexp_cdr(item); if (!miniexp_numberp(miniexp_car(item))) return false; int x0 = miniexp_to_int(miniexp_car(item)); item = miniexp_cdr(item); if (!miniexp_numberp(miniexp_car(item))) return false; int y0 = miniexp_to_int(miniexp_car(item)); item = miniexp_cdr(item); if (!miniexp_numberp(miniexp_car(item))) return false; int x1 = miniexp_to_int(miniexp_car(item)); item = miniexp_cdr(item); if (!miniexp_numberp(miniexp_car(item))) return false; int y1 = miniexp_to_int(miniexp_car(item)); item = miniexp_cdr(item); RectI rect = RectI::FromXY(x0, y0, x1, y1); miniexp_t str = miniexp_car(item); if (miniexp_stringp(str) && !miniexp_cdr(item)) { if (type != miniexp_symbol("char") && type != miniexp_symbol("word") || coords.Count() > 0 && rect.y < coords.Last().y - coords.Last().dy * 0.8) { AppendNewline(extracted, coords, lineSep); } const char *content = miniexp_to_str(str); WCHAR *value = str::conv::FromUtf8(content); if (value) { size_t len = str::Len(value); // TODO: split the rectangle into individual parts per glyph for (size_t i = 0; i < len; i++) coords.Append(RectI(rect.x, rect.y, rect.dx, rect.dy)); extracted.AppendAndFree(value); } if (miniexp_symbol("word") == type) { extracted.Append(' '); coords.Append(RectI(rect.x + rect.dx, rect.y, 2, rect.dy)); } item = miniexp_cdr(item); } while (miniexp_consp(str)) { ExtractPageText(str, lineSep, extracted, coords); item = miniexp_cdr(item); str = miniexp_car(item); } return !item; }