void HtmlFormatter::HandleTagP(HtmlToken* t, bool isDiv) { if (!t->IsEndTag()) { AlignAttr align = CurrStyle()->align; float indent = 0; StyleRule rule = ComputeStyleRule(t); if (rule.textAlign != Align_NotFound) align = rule.textAlign; else if (!isDiv) { // prefer CSS styling to align attribute align = GetAlignAttr(t, align); } if (rule.textIndentUnit != StyleRule::inherit && rule.textIndent > 0) { float factor = rule.textIndentUnit == StyleRule::em ? CurrFont()->GetSize() : rule.textIndentUnit == StyleRule::pt ? 1 /* TODO: take DPI into account */ : 1; indent = rule.textIndent * factor; } SetAlignment(align); EmitParagraph(indent); } else { FlushCurrLine(true); RevertStyleChange(); } EmitEmptyLine(0.4f * CurrFont()->GetSize()); }
void HtmlFormatter::HandleTagFont(HtmlToken* t) { if (t->IsEndTag()) { RevertStyleChange(); return; } AttrInfo* attr = t->GetAttrByName("face"); const WCHAR* faceName = CurrFont()->GetName(); if (attr) { size_t strLen = str::Utf8ToWcharBuf(attr->val, attr->valLen, buf, dimof(buf)); // multiple font names can be comma separated if (strLen > 0 && *buf != ',') { str::TransChars(buf, L",", L"\0"); faceName = buf; } } float fontSize = CurrFont()->GetSize(); attr = t->GetAttrByName("size"); if (attr) { // the sizes are in the range from 1 (tiny) to 7 (huge) int size = 3; // normal size str::Parse(attr->val, attr->valLen, "%d", &size); // sizes can also be relative to the current size if (attr->valLen > 0 && ('-' == *attr->val || '+' == *attr->val)) size += 3; size = limitValue(size, 1, 7); float scale = pow(1.2f, size - 3); fontSize = defaultFontSize * scale; } SetFont(faceName, (FontStyle)CurrFont()->GetStyle(), fontSize); }
// change the current font by adding (if addStyle is true) or removing // a given font style from current font style // TODO: it doesn't corrctly support the case where a style is wrongly nested // like "<b>fo<i>oo</b>bar</i>" - "bar" should be italic but will be bold void HtmlFormatter::ChangeFontStyle(FontStyle fs, bool addStyle) { CrashIf(!ValidStyleForChangeFontStyle(fs)); if (addStyle) SetFontBasedOn(CurrFont(), (FontStyle)(fs | CurrFont()->GetStyle())); else RevertStyleChange(); }
void HtmlFormatter::RevertStyleChange() { if (styleStack.size() > 1) { DrawStyle style = styleStack.Pop(); if (style.font != CurrFont()) AppendInstr(DrawInstr::SetFont(CurrFont())); dirRtl = style.dirRtl; } }
// a text run is a string of consecutive text with uniform style void HtmlFormatter::EmitTextRun(const char* s, const char* end) { currReparseIdx = s - htmlParser->Start(); CrashIf(!ValidReparseIdx(currReparseIdx, htmlParser)); CrashIf(IsSpaceOnly(s, end) && !preFormatted); const char* tmp = ResolveHtmlEntities(s, end, textAllocator); bool resolved = tmp != s; if (resolved) { s = tmp; end = s + str::Len(s); } while (s < end) { // don't update the reparseIdx if s doesn't point into the original source if (!resolved) currReparseIdx = s - htmlParser->Start(); size_t strLen = str::Utf8ToWcharBuf(s, end - s, buf, dimof(buf)); // soft hyphens should not be displayed strLen -= str::RemoveChars(buf, L"\xad"); if (0 == strLen) break; textMeasure->SetFont(CurrFont()); RectF bbox = textMeasure->Measure(buf, strLen); EnsureDx(bbox.Width); if (bbox.Width <= pageDx - currX) { AppendInstr(DrawInstr::Str(s, end - s, bbox, dirRtl)); currX += bbox.Width; break; } size_t lenThatFits = StringLenForWidth(textMeasure, buf, strLen, pageDx - NewLineX()); // try to prevent a break in the middle of a word if (iswalnum(buf[lenThatFits])) { for (size_t len = lenThatFits; len > 0; len--) { if (!iswalnum(buf[len - 1])) { lenThatFits = len; break; } } } textMeasure->SetFont(CurrFont()); bbox = textMeasure->Measure(buf, lenThatFits); CrashIf(bbox.Width > pageDx); // s is UTF-8 and buf is UTF-16, so one // WCHAR doesn't always equal one char // TODO: this usually fails for non-BMP characters (i.e. hardly ever) for (size_t i = lenThatFits; i > 0; i--) { lenThatFits += buf[i - 1] < 0x80 ? 0 : buf[i - 1] < 0x800 ? 1 : 2; } AppendInstr(DrawInstr::Str(s, lenThatFits, bbox, dirRtl)); currX += bbox.Width; s += lenThatFits; } }
void HtmlFormatter::SetFont(const WCHAR* fontName, FontStyle fs, float fontSize) { if (fontSize < 0) { fontSize = CurrFont()->GetSize(); } mui::CachedFont* newFont = mui::GetCachedFont(fontName, fontSize, fs); if (CurrFont() != newFont) { AppendInstr(DrawInstr::SetFont(newFont)); } DrawStyle style = styleStack.Last(); style.font = newFont; styleStack.Append(style); }
void HtmlFormatter::HandleTagHx(HtmlToken* t) { if (t->IsEndTag()) { FlushCurrLine(true); currY += CurrFont()->GetSize() / 2; RevertStyleChange(); } else { EmitParagraph(0); float fontSize = defaultFontSize * pow(1.1f, '5' - t->s[1]); if (currY > 0) currY += fontSize / 2; SetFontBasedOn(CurrFont(), FontStyleBold, fontSize); StyleRule rule = ComputeStyleRule(t); if (Align_NotFound == rule.textAlign) rule.textAlign = GetAlignAttr(t, Align_Left); CurrStyle()->align = rule.textAlign; } }
HtmlFormatter::HtmlFormatter(HtmlFormatterArgs* args) : pageDx(args->pageDx), pageDy(args->pageDy), textAllocator(args->textAllocator), currLineReparseIdx(0), currX(0), currY(0), currLineTopPadding(0), currLinkIdx(0), listDepth(0), preFormatted(false), dirRtl(false), currPage(nullptr), finishedParsing(false), pageCount(0), keepTagNesting(false) { currReparseIdx = args->reparseIdx; htmlParser = new HtmlPullParser(args->htmlStr.data(), args->htmlStr.size()); htmlParser->SetCurrPosOff(currReparseIdx); CrashIf(!ValidReparseIdx(currReparseIdx, htmlParser)); gfx = mui::AllocGraphicsForMeasureText(); textMeasure = CreateTextRender(args->textRenderMethod, gfx, 10, 10); defaultFontName.SetCopy(args->GetFontName()); defaultFontSize = args->fontSize; DrawStyle style; style.font = mui::GetCachedFont(defaultFontName, defaultFontSize, FontStyleRegular); style.align = Align_Justify; style.dirRtl = false; styleStack.Append(style); nextPageStyle = styleStack.Last(); textMeasure->SetFont(CurrFont()); lineSpacing = textMeasure->GetCurrFontLineSpacing(); spaceDx = CurrFont()->GetSize() / 2.5f; // note: a heuristic float spaceDx2 = GetSpaceDx(textMeasure); if (spaceDx2 < spaceDx) spaceDx = spaceDx2; EmitNewPage(); }
void HtmlFormatter::HandleTagPre(HtmlToken* t) { FlushCurrLine(true); if (t->IsStartTag()) { SetFont(L"Courier New", (FontStyle)CurrFont()->GetStyle()); CurrStyle()->align = Align_Left; preFormatted = true; } else if (t->IsEndTag()) { RevertStyleChange(); preFormatted = false; } }
// returns true if created a new page bool HtmlFormatter::FlushCurrLine(bool isParagraphBreak) { if (IsCurrLineEmpty()) { currX = NewLineX(); currLineTopPadding = 0; // remove all spaces (only keep SetFont, LinkStart and Anchor instructions) for (size_t k = currLineInstr.size(); k > 0; k--) { DrawInstr& i = currLineInstr.at(k - 1); if (InstrFixedSpace == i.type || InstrElasticSpace == i.type) currLineInstr.RemoveAt(k - 1); } return false; } AlignAttr align = CurrStyle()->align; if (isParagraphBreak && (Align_Justify == align)) align = Align_Left; JustifyCurrLine(align); // create a new page if necessary float totalLineDy = CurrLineDy() + currLineTopPadding; bool createdPage = false; if (currY + totalLineDy > pageDy) { // current line too big to fit in current page, // so need to start another page UpdateLinkBboxes(currPage); pagesToSend.Append(currPage); // instructions for each page need to be self-contained // so we have to carry over some state (like current font) CrashIf(!CurrFont()); EmitNewPage(); CrashIf(currLineReparseIdx > INT_MAX); currPage->reparseIdx = (int)currLineReparseIdx; createdPage = true; } SetYPos(currLineInstr, currY + currLineTopPadding); currY += totalLineDy; DrawInstr link; if (currLinkIdx) { link = currLineInstr.at(currLinkIdx - 1); // TODO: this occasionally leads to empty links AppendInstr(DrawInstr(InstrLinkEnd)); } currPage->instructions.Append(currLineInstr.LendData(), currLineInstr.size()); currLineInstr.Reset(); currLineReparseIdx = -1; // mark as not set currLineTopPadding = 0; currX = NewLineX(); if (currLinkIdx) { AppendInstr(DrawInstr::LinkStart(link.str.s, link.str.len)); currLinkIdx = currLineInstr.size(); } nextPageStyle = styleStack.Last(); return createdPage; }
void MobiFormatter::HandleSpacing_Mobi(HtmlToken *t) { if (!t->IsStartTag()) return; // best I can tell, in mobi <p width="1em" height="3pt> means that // the first line of the paragrap is indented by 1em and there's // 3pt top padding (the same seems to apply for <blockquote>) AttrInfo *attr = t->GetAttrByName("width"); if (attr) { float lineIndent = ParseSizeAsPixels(attr->val, attr->valLen, CurrFont()->GetSize()); // there are files with negative width which produces partially invisible // text, so don't allow that if (lineIndent > 0) { // this should replace the previously emitted paragraph/quote block EmitParagraph(lineIndent); } } attr = t->GetAttrByName("height"); if (attr) { // for use it in FlushCurrLine() currLineTopPadding = ParseSizeAsPixels(attr->val, attr->valLen, CurrFont()->GetSize()); } }
void HtmlFormatter::HandleHtmlTag(HtmlToken* t) { CrashIf(!t->IsTag()); UpdateTagNesting(t); HtmlTag tag = t->tag; if (Tag_P == tag) { HandleTagP(t); } else if (Tag_Hr == tag) { EmitHr(); } else if ((Tag_B == tag) || (Tag_Strong == tag)) { ChangeFontStyle(FontStyleBold, t->IsStartTag()); } else if ((Tag_I == tag) || (Tag_Em == tag)) { ChangeFontStyle(FontStyleItalic, t->IsStartTag()); } else if (Tag_U == tag) { if (!currLinkIdx) ChangeFontStyle(FontStyleUnderline, t->IsStartTag()); } else if (Tag_Strike == tag) { ChangeFontStyle(FontStyleStrikeout, t->IsStartTag()); } else if (Tag_Br == tag) { HandleTagBr(); } else if (Tag_Font == tag) { HandleTagFont(t); } else if (Tag_A == tag) { HandleTagA(t); } else if (Tag_Blockquote == tag) { // TODO: implement me HandleTagList(t); } else if (Tag_Div == tag) { // TODO: implement me HandleTagP(t, true); } else if (IsTagH(tag)) { HandleTagHx(t); } else if (Tag_Sup == tag) { // TODO: implement me } else if (Tag_Sub == tag) { // TODO: implement me } else if (Tag_Span == tag) { // TODO: implement me } else if (Tag_Center == tag) { HandleTagP(t, true); if (!t->IsEndTag()) CurrStyle()->align = Align_Center; } else if ((Tag_Ul == tag) || (Tag_Ol == tag)) { HandleTagList(t); } else if (Tag_Li == tag) { // TODO: display bullet/number FlushCurrLine(true); } else if (Tag_Dt == tag) { FlushCurrLine(true); ChangeFontStyle(FontStyleBold, t->IsStartTag()); if (t->IsStartTag()) CurrStyle()->align = Align_Left; } else if (Tag_Dd == tag) { // TODO: separate indentation from list depth HandleTagList(t); } else if (Tag_Table == tag) { // TODO: implement me HandleTagList(t); } else if (Tag_Tr == tag) { // display tables row-by-row for now FlushCurrLine(true); if (t->IsStartTag()) SetAlignment(Align_Left); else if (t->IsEndTag()) RevertStyleChange(); } else if (Tag_Code == tag || Tag_Tt == tag) { if (t->IsStartTag()) SetFont(L"Courier New", (FontStyle)CurrFont()->GetStyle()); else if (t->IsEndTag()) RevertStyleChange(); } else if (Tag_Pre == tag) { HandleTagPre(t); } else if (Tag_Img == tag) { HandleTagImg(t); } else if (Tag_Pagebreak == tag) { // not really a HTML tag, but many ebook // formats use it HandleTagPagebreak(t); } else if (Tag_Link == tag) { HandleTagLink(t); } else if (Tag_Style == tag) { HandleTagStyle(t); } else { // TODO: temporary debugging // lf("unhandled tag: %d", tag); } // any tag could contain anchor information HandleAnchorAttr(t); // any tag could contain a reading direction change HandleDirAttr(t); }