// Callers delete the ToC tree, so we re-create it from prerecorded // values (which is faster than re-creating it from html every time) DocTocItem *ChmModel::GetTocTree() { DocTocItem *root = NULL, **nextChild = &root; Vec<DocTocItem *> levels; int idCounter = 0; for (ChmTocTraceItem *ti = tocTrace->IterStart(); ti; ti = tocTrace->IterNext()) { ChmTocItem *item = new ChmTocItem(ti->title, ti->pageNo, ti->url); item->id = ++idCounter; // append the item at the correct level CrashIf(ti->level < 1); if ((size_t)ti->level <= levels.Count()) { levels.RemoveAt(ti->level, levels.Count() - ti->level); levels.Last()->AddSibling(item); } else { (*nextChild) = item; levels.Append(item); } nextChild = &item->child; } if (root) root->OpenSingleNode(); return root; }
virtual const unsigned char *GetDataForUrl(const WCHAR *url, size_t *len) { ScopedCritSec scope(&docAccess); ScopedMem<WCHAR> plainUrl(url::GetFullPath(url)); ScopedMem<char> urlUtf8(str::conv::ToUtf8(plainUrl)); data.Append(doc->GetData(urlUtf8, len)); return data.Last(); }
static WCHAR *ExtractHtmlText(EpubDoc *doc) { size_t len; const char *data = doc->GetTextData(&len); str::Str<char> text(len / 2); HtmlPullParser p(data, len); HtmlToken *t; Vec<HtmlTag> tagNesting; while ((t = p.Next()) != NULL && !t->IsError()) { if (t->IsText() && !tagNesting.Contains(Tag_Head) && !tagNesting.Contains(Tag_Script) && !tagNesting.Contains(Tag_Style)) { // trim whitespace (TODO: also normalize within text?) while (t->sLen > 0 && str::IsWs(t->s[0])) { t->s++; t->sLen--; } while (t->sLen > 0 && str::IsWs(t->s[t->sLen-1])) t->sLen--; if (t->sLen > 0) { text.AppendAndFree(ResolveHtmlEntities(t->s, t->sLen)); text.Append(' '); } } else if (t->IsStartTag()) { // TODO: force-close tags similar to HtmlFormatter.cpp's AutoCloseOnOpen? if (!IsTagSelfClosing(t->tag)) tagNesting.Append(t->tag); } else if (t->IsEndTag()) { if (!IsInlineTag(t->tag) && text.Size() > 0 && text.Last() == ' ') { text.Pop(); text.Append("\r\n"); } // when closing a tag, if the top tag doesn't match but // there are only potentially self-closing tags on the // stack between the matching tag, we pop all of them if (tagNesting.Contains(t->tag)) { while (tagNesting.Last() != t->tag) tagNesting.Pop(); } if (tagNesting.Count() > 0 && tagNesting.Last() == t->tag) tagNesting.Pop(); } } return str::conv::FromUtf8(text.Get()); }
bool DjVuEngineImpl::ExtractPageText(miniexp_t item, const WCHAR *lineSep, str::Str<WCHAR>& extracted, Vec<RectI>& coords) { miniexp_t type = miniexp_car(item); if (!miniexp_symbolp(type)) return false; item = miniexp_cdr(item); if (!miniexp_numberp(miniexp_car(item))) return false; int x0 = miniexp_to_int(miniexp_car(item)); item = miniexp_cdr(item); if (!miniexp_numberp(miniexp_car(item))) return false; int y0 = miniexp_to_int(miniexp_car(item)); item = miniexp_cdr(item); if (!miniexp_numberp(miniexp_car(item))) return false; int x1 = miniexp_to_int(miniexp_car(item)); item = miniexp_cdr(item); if (!miniexp_numberp(miniexp_car(item))) return false; int y1 = miniexp_to_int(miniexp_car(item)); item = miniexp_cdr(item); RectI rect = RectI::FromXY(x0, y0, x1, y1); miniexp_t str = miniexp_car(item); if (miniexp_stringp(str) && !miniexp_cdr(item)) { if (type != miniexp_symbol("char") && type != miniexp_symbol("word") || coords.Count() > 0 && rect.y < coords.Last().y - coords.Last().dy * 0.8) { AppendNewline(extracted, coords, lineSep); } const char *content = miniexp_to_str(str); WCHAR *value = str::conv::FromUtf8(content); if (value) { size_t len = str::Len(value); // TODO: split the rectangle into individual parts per glyph for (size_t i = 0; i < len; i++) coords.Append(RectI(rect.x, rect.y, rect.dx, rect.dy)); extracted.AppendAndFree(value); } if (miniexp_symbol("word") == type) { extracted.Append(' '); coords.Append(RectI(rect.x + rect.dx, rect.y, 2, rect.dy)); } item = miniexp_cdr(item); } while (miniexp_consp(str)) { ExtractPageText(str, lineSep, extracted, coords); item = miniexp_cdr(item); str = miniexp_car(item); } return !item; }
virtual void Visit(const WCHAR *name, const WCHAR *url, int level) { int pageNo = CreatePageNoForURL(url); ChmTocItem *item = new ChmTocItem(str::Dup(name), pageNo, str::Dup(url)); item->id = ++idCounter; item->open = level == 1; // append the item at the correct level CrashIf(level < 1); if (!*root) { *root = item; lastItems.Append(*root); } else if ((size_t)level <= lastItems.Count()) { lastItems.RemoveAt(level, lastItems.Count() - level); lastItems.Last() = lastItems.Last()->next = item; } else { lastItems.Last()->child = item; lastItems.Append(item); } }
// don't emit multiple spaces and don't emit spaces // at the beginning of the line static bool CanEmitElasticSpace(float currX, float NewLineX, float maxCurrX, Vec<DrawInstr>& currLineInstr) { if (NewLineX == currX || 0 == currLineInstr.size()) return false; // prevent elastic spaces from being flushed to the // beginning of the next line if (currX > maxCurrX) return false; DrawInstr& di = currLineInstr.Last(); // don't add a space if only an anchor would be in between them if (InstrAnchor == di.type && currLineInstr.size() > 1) di = currLineInstr.at(currLineInstr.size() - 2); return (InstrElasticSpace != di.type) && (InstrFixedSpace != di.type); }
// TODO: quite possibly the real logic for generating "click" events is // more complicated // (x, y) is in the coordinates of the root window LRESULT EventMgr::OnLButtonUp(WPARAM keys, int x, int y, bool& wasHandled) { Vec<CtrlAndOffset> controls; uint16 wantedInputMask = bit::FromBit<uint16>(Control::WantsMouseClickBit); size_t count = CollectWindowsAt(wndRoot, x, y, wantedInputMask, &controls); if (0 == count) return 0; // TODO: should this take z-order into account? Control *c = controls.Last().c; c->MapRootToMyPos(x, y); NotifyClicked(c, x, y); NotifyNamedEventClicked(c, x, y); return 0; }
// adapted from DisplayModel::NextZoomStep float ChmModel::GetNextZoomStep(float towardsLevel) const { float currZoom = GetZoomVirtual(); if (gGlobalPrefs->zoomIncrement > 0) { if (currZoom < towardsLevel) return min(currZoom * (gGlobalPrefs->zoomIncrement / 100 + 1), towardsLevel); if (currZoom > towardsLevel) return max(currZoom / (gGlobalPrefs->zoomIncrement / 100 + 1), towardsLevel); return currZoom; } Vec<float> *zoomLevels = gGlobalPrefs->zoomLevels; CrashIf(zoomLevels->Count() != 0 && (zoomLevels->At(0) < ZOOM_MIN || zoomLevels->Last() > ZOOM_MAX)); CrashIf(zoomLevels->Count() != 0 && zoomLevels->At(0) > zoomLevels->Last()); const float FUZZ = 0.01f; float newZoom = towardsLevel; if (currZoom < towardsLevel) { for (size_t i = 0; i < zoomLevels->Count(); i++) { if (zoomLevels->At(i) - FUZZ > currZoom) { newZoom = zoomLevels->At(i); break; } } } else if (currZoom > towardsLevel) { for (size_t i = zoomLevels->Count(); i > 0; i--) { if (zoomLevels->At(i - 1) + FUZZ < currZoom) { newZoom = zoomLevels->At(i - 1); break; } } } return newZoom; }
// TODO: optimize by getting both mouse over and mouse move windows in one call // x, y is a position in the root window LRESULT EventMgr::OnMouseMove(WPARAM keys, int x, int y, bool& wasHandled) { Vec<CtrlAndOffset> windows; Control *c; uint16 wantedInputMask = bit::FromBit<uint16>(Control::WantsMouseOverBit); size_t count = CollectWindowsAt(wndRoot, x, y, wantedInputMask, &windows); if (0 == count) { if (currOver) { currOver->SetIsMouseOver(false); currOver->NotifyMouseLeave(); currOver = nullptr; } } else { // TODO: should this take z-order into account ? c = windows.Last().c; if (c != currOver) { if (currOver) { currOver->SetIsMouseOver(false); currOver->NotifyMouseLeave(); } currOver = c; currOver->SetIsMouseOver(true); currOver->NotifyMouseEnter(); } } wantedInputMask = bit::FromBit<uint16>(Control::WantsMouseMoveBit); count = CollectWindowsAt(wndRoot, x, y, wantedInputMask, &windows); if (0 == count) return 0; c = windows.Last().c; c->MapRootToMyPos(x, y); c->NotifyMouseMove(x, y); return 0; }
ImageData *GetImageData(const char *id, const char *pagePath) { ScopedMem<char> url(NormalizeURL(id, pagePath)); str::UrlDecodeInPlace(url); for (size_t i = 0; i < images.Count(); i++) { if (str::Eq(images.At(i).id, url)) return &images.At(i).base; } ImageData2 data = { 0 }; data.base.data = (char *)doc->GetData(url, &data.base.len); if (!data.base.data) return NULL; data.id = url.StealData(); images.Append(data); return &images.Last().base; }
static void VecTest() { Vec<int> ints; assert(ints.Count() == 0); ints.Append(1); ints.Push(2); ints.InsertAt(0, -1); assert(ints.Count() == 3); assert(ints.At(0) == -1 && ints.At(1) == 1 && ints.At(2) == 2); assert(ints.At(0) == -1 && ints.Last() == 2); int last = ints.Pop(); assert(last == 2); assert(ints.Count() == 2); ints.Push(3); ints.RemoveAt(0); assert(ints.Count() == 2); assert(ints.At(0) == 1 && ints.At(1) == 3); ints.Reset(); assert(ints.Count() == 0); for (int i = 0; i < 1000; i++) { ints.Push(i); } assert(ints.Count() == 1000 && ints.At(500) == 500); ints.Remove(500); assert(ints.Count() == 999 && ints.At(500) == 501); last = ints.Pop(); assert(last == 999); ints.Append(last); assert(ints.AtPtr(501) == &ints.At(501)); { Vec<int> ints2(ints); assert(ints2.Count() == 999); assert(ints.LendData() != ints2.LendData()); ints.Remove(600); assert(ints.Count() < ints2.Count()); ints2 = ints; assert(ints2.Count() == 998); } { char buf[2] = {'a', '\0'}; str::Str<char> v(0); for (int i = 0; i < 7; i++) { v.Append(buf, 1); buf[0] = buf[0] + 1; } char *s = v.LendData(); assert(str::Eq("abcdefg", s)); assert(7 == v.Count()); v.Set("helo"); assert(4 == v.Count()); assert(str::Eq("helo", v.LendData())); } { str::Str<char> v(128); v.Append("boo", 3); assert(str::Eq("boo", v.LendData())); assert(v.Count() == 3); v.Append("fop"); assert(str::Eq("boofop", v.LendData())); assert(v.Count() == 6); v.RemoveAt(2, 3); assert(v.Count() == 3); assert(str::Eq("bop", v.LendData())); v.Append('a'); assert(v.Count() == 4); assert(str::Eq("bopa", v.LendData())); char *s = v.StealData(); assert(str::Eq("bopa", s)); free(s); assert(v.Count() == 0); } { str::Str<char> v(0); for (int i = 0; i < 32; i++) { assert(v.Count() == i * 6); v.Append("lambd", 5); if (i % 2 == 0) v.Append('a'); else v.Push('a'); } for (int i=1; i<=16; i++) { v.RemoveAt((16 - i) * 6, 6); assert(v.Count() == (32 - i) * 6); } v.RemoveAt(0, 6 * 15); assert(v.Count() == 6); char *s = v.LendData(); assert(str::Eq(s, "lambda")); s = v.StealData(); assert(str::Eq(s, "lambda")); free(s); assert(v.Count() == 0); v.Append("lambda"); assert(str::Eq(v.LendData(), "lambda")); char c = v.Pop(); assert(c == 'a'); assert(str::Eq(v.LendData(), "lambd")); } VecTestAppendFmt(); { Vec<PointI *> v; srand((unsigned int)time(NULL)); for (int i = 0; i < 128; i++) { v.Append(new PointI(i, i)); size_t pos = rand() % v.Count(); v.InsertAt(pos, new PointI(i, i)); } assert(v.Count() == 128 * 2); size_t idx = 0; for (PointI **p = v.IterStart(); p; p = v.IterNext()) { assert(idx == v.IterIdx()); ++idx; } while (v.Count() > 64) { size_t pos = rand() % v.Count(); PointI *f = v.At(pos); v.Remove(f); delete f; } DeleteVecMembers(v); } { Vec<int> v; v.Append(2); for (int i = 0; i < 500; i++) v.Append(4); v.At(250) = 5; v.Reverse(); assert(v.Count() == 501 && v.At(0) == 4 && v.At(249) == v.At(251) && v.At(250) == 5 && v.At(500) == 2); v.Remove(4); v.Reverse(); assert(v.Count() == 500 && v.At(0) == 2 && v.At(249) == v.At(251) && v.At(250) == 5 && v.At(499) == 4); } }
// see http://itexmac.sourceforge.net/pdfsync.html for the specification int Pdfsync::RebuildIndex() { size_t len; ScopedMem<char> data(file::ReadAll(syncfilepath, &len)); if (!data) return PDFSYNCERR_SYNCFILE_CANNOT_BE_OPENED; // convert the file data into a list of zero-terminated strings str::TransChars(data, "\r\n", "\0\0"); // parse preamble (jobname and version marker) char *line = data; char *dataEnd = data + len; // replace star by spaces (TeX uses stars instead of spaces in filenames) str::TransChars(line, "*/", " \\"); ScopedMem<WCHAR> jobName(str::conv::FromAnsi(line)); jobName.Set(str::Join(jobName, L".tex")); jobName.Set(PrependDir(jobName)); line = Advance0Line(line, dataEnd); UINT versionNumber = 0; if (!line || !str::Parse(line, "version %u", &versionNumber) || versionNumber != 1) return PDFSYNCERR_SYNCFILE_CANNOT_BE_OPENED; // reset synchronizer database srcfiles.Reset(); lines.Reset(); points.Reset(); fileIndex.Reset(); sheetIndex.Reset(); Vec<size_t> filestack; UINT page = 1; sheetIndex.Append(0); // add the initial tex file to the source file stack filestack.Push(srcfiles.Count()); srcfiles.Append(jobName.StealData()); PdfsyncFileIndex findex = { 0 }; fileIndex.Append(findex); PdfsyncLine psline; PdfsyncPoint pspoint; // parse data UINT maxPageNo = engine->PageCount(); while ((line = Advance0Line(line, dataEnd)) != NULL) { if (!line) break; switch (*line) { case 'l': psline.file = filestack.Last(); if (str::Parse(line, "l %u %u %u", &psline.record, &psline.line, &psline.column)) lines.Append(psline); else if (str::Parse(line, "l %u %u", &psline.record, &psline.line)) { psline.column = 0; lines.Append(psline); } // else dbg("Bad 'l' line in the pdfsync file"); break; case 's': if (str::Parse(line, "s %u", &page)) sheetIndex.Append(points.Count()); // else dbg("Bad 's' line in the pdfsync file"); // if (0 == page || page > maxPageNo) // dbg("'s' line with invalid page number in the pdfsync file"); break; case 'p': pspoint.page = page; if (0 == page || page > maxPageNo) /* ignore point for invalid page number */; else if (str::Parse(line, "p %u %u %u", &pspoint.record, &pspoint.x, &pspoint.y)) points.Append(pspoint); else if (str::Parse(line, "p* %u %u %u", &pspoint.record, &pspoint.x, &pspoint.y)) points.Append(pspoint); // else dbg("Bad 'p' line in the pdfsync file"); break; case '(': { ScopedMem<WCHAR> filename(str::conv::FromAnsi(line + 1)); // if the filename contains quotes then remove them // TODO: this should never happen!? if (filename[0] == '"' && filename[str::Len(filename) - 1] == '"') filename.Set(str::DupN(filename + 1, str::Len(filename) - 2)); // undecorate the filepath: replace * by space and / by \ str::TransChars(filename, L"*/", L" \\"); // if the file name extension is not specified then add the suffix '.tex' if (str::IsEmpty(path::GetExt(filename))) filename.Set(str::Join(filename, L".tex")); // ensure that the path is absolute if (PathIsRelative(filename)) filename.Set(PrependDir(filename)); filestack.Push(srcfiles.Count()); srcfiles.Append(filename.StealData()); findex.start = findex.end = lines.Count(); fileIndex.Append(findex); } break; case ')': if (filestack.Count() > 1) fileIndex.At(filestack.Pop()).end = lines.Count(); // else dbg("Unbalanced ')' line in the pdfsync file"); break; default: // dbg("Ignoring invalid pdfsync line starting with '%c'", *line); break; } } fileIndex.At(0).end = lines.Count(); assert(filestack.Count() == 1); return Synchronizer::RebuildIndex(); }
WCHAR *EbookEngine::ExtractPageText(int pageNo, WCHAR *lineSep, RectI **coords_out, RenderTarget target) { ScopedCritSec scope(&pagesAccess); str::Str<WCHAR> content; Vec<RectI> coords; bool insertSpace = false; Vec<DrawInstr> *pageInstrs = GetHtmlPage(pageNo); for (DrawInstr *i = pageInstrs->IterStart(); i; i = pageInstrs->IterNext()) { RectI bbox = GetInstrBbox(i, pageBorder); switch (i->type) { case InstrString: if (coords.Count() > 0 && bbox.x < coords.Last().BR().x) { content.Append(lineSep); coords.AppendBlanks(str::Len(lineSep)); CrashIf(*lineSep && !coords.Last().IsEmpty()); } else if (insertSpace && coords.Count() > 0) { int swidth = bbox.x - coords.Last().BR().x; if (swidth > 0) { content.Append(' '); coords.Append(RectI(bbox.x - swidth, bbox.y, swidth, bbox.dy)); } } insertSpace = false; { ScopedMem<WCHAR> s(str::conv::FromHtmlUtf8(i->str.s, i->str.len)); content.Append(s); size_t len = str::Len(s); double cwidth = 1.0 * bbox.dx / len; for (size_t k = 0; k < len; k++) coords.Append(RectI((int)(bbox.x + k * cwidth), bbox.y, (int)cwidth, bbox.dy)); } break; case InstrRtlString: if (coords.Count() > 0 && bbox.BR().x > coords.Last().x) { content.Append(lineSep); coords.AppendBlanks(str::Len(lineSep)); CrashIf(*lineSep && !coords.Last().IsEmpty()); } else if (insertSpace && coords.Count() > 0) { int swidth = coords.Last().x - bbox.BR().x; if (swidth > 0) { content.Append(' '); coords.Append(RectI(bbox.BR().x, bbox.y, swidth, bbox.dy)); } } insertSpace = false; { ScopedMem<WCHAR> s(str::conv::FromHtmlUtf8(i->str.s, i->str.len)); content.Append(s); size_t len = str::Len(s); double cwidth = 1.0 * bbox.dx / len; for (size_t k = 0; k < len; k++) coords.Append(RectI((int)(bbox.x + (len - k - 1) * cwidth), bbox.y, (int)cwidth, bbox.dy)); } break; case InstrElasticSpace: case InstrFixedSpace: insertSpace = true; break; } } if (coords_out) { CrashIf(coords.Count() != content.Count()); *coords_out = new RectI[coords.Count()]; memcpy(*coords_out, coords.LendData(), coords.Count() * sizeof(RectI)); } return content.StealData(); }