// add horizontal line (<hr> in html terms) void HtmlFormatter::EmitHr() { // hr creates an implicit paragraph break FlushCurrLine(true); CrashIf(NewLineX() != currX); RectF bbox(0.f, 0.f, pageDx, lineSpacing); AppendInstr(DrawInstr(InstrLine, bbox)); FlushCurrLine(true); }
void HtmlFormatter::HandleTagP(HtmlToken* t, bool isDiv) { if (!t->IsEndTag()) { AlignAttr align = CurrStyle()->align; float indent = 0; StyleRule rule = ComputeStyleRule(t); if (rule.textAlign != Align_NotFound) align = rule.textAlign; else if (!isDiv) { // prefer CSS styling to align attribute align = GetAlignAttr(t, align); } if (rule.textIndentUnit != StyleRule::inherit && rule.textIndent > 0) { float factor = rule.textIndentUnit == StyleRule::em ? CurrFont()->GetSize() : rule.textIndentUnit == StyleRule::pt ? 1 /* TODO: take DPI into account */ : 1; indent = rule.textIndent * factor; } SetAlignment(align); EmitParagraph(indent); } else { FlushCurrLine(true); RevertStyleChange(); } EmitEmptyLine(0.4f * CurrFont()->GetSize()); }
void HtmlFormatter::HandleTagBr() { // make sure to always emit a line if (IsCurrLineEmpty()) EmitEmptyLine(lineSpacing); else FlushCurrLine(true); }
void HtmlFormatter::HandleTagList(HtmlToken* t) { FlushCurrLine(true); if (t->IsStartTag()) listDepth++; else if (t->IsEndTag() && listDepth > 0) listDepth--; currX = NewLineX(); }
void HtmlFormatter::EmitParagraph(float indent) { FlushCurrLine(true); CrashIf(NewLineX() != currX); bool needsIndent = Align_Left == CurrStyle()->align || Align_Justify == CurrStyle()->align; if (indent > 0 && needsIndent && EnsureDx(indent)) { AppendInstr(DrawInstr::FixedSpace(indent)); currX += indent; } }
void HtmlFormatter::HandleTagPre(HtmlToken* t) { FlushCurrLine(true); if (t->IsStartTag()) { SetFont(L"Courier New", (FontStyle)CurrFont()->GetStyle()); CurrStyle()->align = Align_Left; preFormatted = true; } else if (t->IsEndTag()) { RevertStyleChange(); preFormatted = false; } }
void HtmlFormatter::ForceNewPage() { bool createdNewPage = FlushCurrLine(true); if (createdNewPage) return; UpdateLinkBboxes(currPage); pagesToSend.Append(currPage); EmitNewPage(); currX = NewLineX(); currLineTopPadding = 0.f; }
// the name doesn't quite fit: this handles FB2 tags void Fb2Formatter::HandleHtmlTag(HtmlToken *t) { if (Tag_Title == t->tag || Tag_Subtitle == t->tag) { bool isSubtitle = Tag_Subtitle == t->tag; ScopedMem<char> name(str::Format("h%d", section + (isSubtitle ? 1 : 0))); HtmlToken tok; tok.SetTag(t->type, name, name + str::Len(name)); HandleTagHx(&tok); HandleAnchorAttr(t); if (!isSubtitle && t->IsStartTag()) { char *link = (char *)Allocator::Alloc(textAllocator, 24); sprintf_s(link, 24, FB2_TOC_ENTRY_MARK "%d", ++titleCount); currPage->instructions.Append(DrawInstr::Anchor(link, str::Len(link), RectF(0, currY, pageDx, 0))); } } else if (Tag_Section == t->tag) { if (t->IsStartTag()) section++; else if (t->IsEndTag() && section > 1) section--; FlushCurrLine(true); HandleAnchorAttr(t); } else if (Tag_P == t->tag) { if (!tagNesting.Contains(Tag_Title)) HtmlFormatter::HandleHtmlTag(t); } else if (Tag_Image == t->tag) { HandleTagImg(t); HandleAnchorAttr(t); } else if (Tag_A == t->tag) { HandleTagA(t, "href", "http://www.w3.org/1999/xlink"); HandleAnchorAttr(t, true); } else if (Tag_Pagebreak == t->tag) ForceNewPage(); else if (Tag_Strong == t->tag) HandleTagAsHtml(t, "b"); else if (t->NameIs("emphasis")) HandleTagAsHtml(t, "i"); else if (t->NameIs("epigraph")) HandleTagAsHtml(t, "blockquote"); else if (t->NameIs("empty-line")) { if (!t->IsEndTag()) EmitParagraph(0); } else if (t->NameIs("stylesheet")) HandleTagAsHtml(t, "style"); }
void HtmlFormatter::HandleTagHx(HtmlToken* t) { if (t->IsEndTag()) { FlushCurrLine(true); currY += CurrFont()->GetSize() / 2; RevertStyleChange(); } else { EmitParagraph(0); float fontSize = defaultFontSize * pow(1.1f, '5' - t->s[1]); if (currY > 0) currY += fontSize / 2; SetFontBasedOn(CurrFont(), FontStyleBold, fontSize); StyleRule rule = ComputeStyleRule(t); if (Align_NotFound == rule.textAlign) rule.textAlign = GetAlignAttr(t, Align_Left); CurrStyle()->align = rule.textAlign; } }
// Return the next parsed page. Returns nullptr if finished parsing. // For simplicity of implementation, we parse xml text node or // xml element at a time. This might cause a creation of one // or more pages, which we remeber and send to the caller // if we detect accumulated pages. HtmlPage* HtmlFormatter::Next(bool skipEmptyPages) { for (;;) { // send out all pages accumulated so far while (pagesToSend.size() > 0) { HtmlPage* ret = pagesToSend.PopAt(0); pageCount++; if (skipEmptyPages && IsEmptyPage(ret)) delete ret; else return ret; } // we can call ourselves recursively to send outstanding // pages after parsing has finished so this is to detect // that case and really end parsing if (finishedParsing) return nullptr; HtmlToken* t = htmlParser->Next(); if (!t || t->IsError()) break; currReparseIdx = t->GetReparsePoint() - htmlParser->Start(); CrashIf(!ValidReparseIdx(currReparseIdx, htmlParser)); if (t->IsTag()) HandleHtmlTag(t); else if (!IgnoreText()) HandleText(t); } // force layout of the last line AutoCloseTags(tagNesting.size()); FlushCurrLine(true); UpdateLinkBboxes(currPage); pagesToSend.Append(currPage); currPage = nullptr; // call ourselves recursively to return accumulated pages finishedParsing = true; return Next(); }
void MobiFormatter::HandleHtmlTag(HtmlToken *t) { CrashIf(!t->IsTag()); if (Tag_P == t->tag || Tag_Blockquote == t->tag) { HtmlFormatter::HandleHtmlTag(t); HandleSpacing_Mobi(t); } else if (Tag_Mbp_Pagebreak == t->tag) { ForceNewPage(); } else if (Tag_A == t->tag) { HandleAnchorAttr(t); // handle internal and external links (prefer internal ones) if (!HandleTagA(t, "filepos")) HandleTagA(t); } else if (Tag_Hr == t->tag) { // imitating Kindle: hr is proceeded by an empty line FlushCurrLine(false); EmitEmptyLine(lineSpacing); EmitHr(); } else { HtmlFormatter::HandleHtmlTag(t); } }
bool HtmlFormatter::EmitImage(ImageData* img) { CrashIf(!img->data); Size imgSize = BitmapSizeFromData(img->data, img->len); if (imgSize.Empty()) return false; SizeF newSize((REAL)imgSize.Width, (REAL)imgSize.Height); // move overly large images to a new line (if they don't fit entirely) if (!IsCurrLineEmpty() && (currX + newSize.Width > pageDx || currY + newSize.Height > pageDy)) FlushCurrLine(false); // move overly large images to a new page // (if they don't fit even when scaled down to 75%) REAL scalePage = std::min((pageDx - currX) / newSize.Width, pageDy / newSize.Height); if (currY > 0 && currY + newSize.Height * std::min(scalePage, 0.75f) > pageDy) ForceNewPage(); // if image is bigger than the available space, scale it down if (newSize.Width > pageDx - currX || newSize.Height > pageDy - currY) { REAL scale = std::min(scalePage, (pageDy - currY) / newSize.Height); // scale down images that follow right after a line // containing a single image as little as possible, // as they might be intended to be of the same size if (scale < scalePage && HasPreviousLineSingleImage(currPage->instructions)) { ForceNewPage(); scale = scalePage; } if (scale < 1) { newSize.Width = std::min(newSize.Width * scale, pageDx - currX); newSize.Height = std::min(newSize.Height * scale, pageDy - currY); } } RectF bbox(PointF(currX, 0), newSize); AppendInstr(DrawInstr::Image(img->data, img->len, bbox)); currX += bbox.Width; return true; }
// ensure there is enough dx space left in the current line // if there isn't, we start a new line // returns false if dx is bigger than pageDx bool HtmlFormatter::EnsureDx(float dx) { if (currX + dx <= pageDx) return true; FlushCurrLine(false); return dx <= pageDx; }
void HtmlFormatter::HandleHtmlTag(HtmlToken* t) { CrashIf(!t->IsTag()); UpdateTagNesting(t); HtmlTag tag = t->tag; if (Tag_P == tag) { HandleTagP(t); } else if (Tag_Hr == tag) { EmitHr(); } else if ((Tag_B == tag) || (Tag_Strong == tag)) { ChangeFontStyle(FontStyleBold, t->IsStartTag()); } else if ((Tag_I == tag) || (Tag_Em == tag)) { ChangeFontStyle(FontStyleItalic, t->IsStartTag()); } else if (Tag_U == tag) { if (!currLinkIdx) ChangeFontStyle(FontStyleUnderline, t->IsStartTag()); } else if (Tag_Strike == tag) { ChangeFontStyle(FontStyleStrikeout, t->IsStartTag()); } else if (Tag_Br == tag) { HandleTagBr(); } else if (Tag_Font == tag) { HandleTagFont(t); } else if (Tag_A == tag) { HandleTagA(t); } else if (Tag_Blockquote == tag) { // TODO: implement me HandleTagList(t); } else if (Tag_Div == tag) { // TODO: implement me HandleTagP(t, true); } else if (IsTagH(tag)) { HandleTagHx(t); } else if (Tag_Sup == tag) { // TODO: implement me } else if (Tag_Sub == tag) { // TODO: implement me } else if (Tag_Span == tag) { // TODO: implement me } else if (Tag_Center == tag) { HandleTagP(t, true); if (!t->IsEndTag()) CurrStyle()->align = Align_Center; } else if ((Tag_Ul == tag) || (Tag_Ol == tag)) { HandleTagList(t); } else if (Tag_Li == tag) { // TODO: display bullet/number FlushCurrLine(true); } else if (Tag_Dt == tag) { FlushCurrLine(true); ChangeFontStyle(FontStyleBold, t->IsStartTag()); if (t->IsStartTag()) CurrStyle()->align = Align_Left; } else if (Tag_Dd == tag) { // TODO: separate indentation from list depth HandleTagList(t); } else if (Tag_Table == tag) { // TODO: implement me HandleTagList(t); } else if (Tag_Tr == tag) { // display tables row-by-row for now FlushCurrLine(true); if (t->IsStartTag()) SetAlignment(Align_Left); else if (t->IsEndTag()) RevertStyleChange(); } else if (Tag_Code == tag || Tag_Tt == tag) { if (t->IsStartTag()) SetFont(L"Courier New", (FontStyle)CurrFont()->GetStyle()); else if (t->IsEndTag()) RevertStyleChange(); } else if (Tag_Pre == tag) { HandleTagPre(t); } else if (Tag_Img == tag) { HandleTagImg(t); } else if (Tag_Pagebreak == tag) { // not really a HTML tag, but many ebook // formats use it HandleTagPagebreak(t); } else if (Tag_Link == tag) { HandleTagLink(t); } else if (Tag_Style == tag) { HandleTagStyle(t); } else { // TODO: temporary debugging // lf("unhandled tag: %d", tag); } // any tag could contain anchor information HandleAnchorAttr(t); // any tag could contain a reading direction change HandleDirAttr(t); }