void HtmlFormatter::HandleDirAttr(HtmlToken* t) { // only apply reading direction changes to block elements (for now) if (t->IsStartTag() && !IsInlineTag(t->tag)) { AttrInfo* attr = t->GetAttrByName("dir"); if (attr) dirRtl = CurrStyle()->dirRtl = attr->ValIs("RTL"); } }
AttrInfo *HtmlToken::GetAttrByNameNS(const char *name, const char *attrNS) { nextAttr = NULL; // start from the beginning for (AttrInfo *a = NextAttr(); a; a = NextAttr()) { if (a->NameIsNS(name, attrNS)) return a; } return NULL; }
void HtmlFormatter::HandleTagStyle(HtmlToken* t) { if (!t->IsStartTag()) return; AttrInfo* attr = t->GetAttrByName("type"); if (attr && !attr->ValIs("text/css")) return; const char* start = t->s + t->sLen + 1; while (t && !t->IsError() && (!t->IsEndTag() || t->tag != Tag_Style)) { t = htmlParser->Next(); } if (!t || !t->IsEndTag() || Tag_Style != t->tag) return; const char* end = t->s - 2; CrashIf(start > end); ParseStyleSheet(start, end - start); UpdateTagNesting(t); }
static void Test00(const char *s, HtmlToken::TokenType expectedType) { HtmlPullParser parser(s, str::Len(s)); HtmlToken *t = parser.Next(); assert(t->type == expectedType); assert(t->NameIs("p")); assert(Tag_P == t->tag); AttrInfo *a = t->GetAttrByName("a1"); assert(a->NameIs("a1")); assert(a->ValIs(">")); a = t->GetAttrByName("foo"); assert(a->NameIs("foo")); assert(a->ValIs("bar")); a = t->GetAttrByName("nope"); assert(!a); t = parser.Next(); assert(!t); }
void HtmlFileFormatter::HandleTagLink(HtmlToken *t) { CrashIf(!htmlDoc); if (t->IsEndTag()) return; AttrInfo *attr = t->GetAttrByName("rel"); if (!attr || !attr->ValIs("stylesheet")) return; attr = t->GetAttrByName("type"); if (attr && !attr->ValIs("text/css")) return; attr = t->GetAttrByName("href"); if (!attr) return; size_t len; ScopedMem<char> src(str::DupN(attr->val, attr->valLen)); ScopedMem<char> data(htmlDoc->GetFileData(src, &len)); if (data) ParseStyleSheet(data, len); }
// cf. http://www.w3.org/TR/html4/charset.html#h-5.2.2 static UINT ExtractHttpCharset(const char *html, size_t htmlLen) { if (!strstr(html, "charset=")) return 0; HtmlPullParser parser(html, min(htmlLen, 1024)); HtmlToken *tok; while ((tok = parser.Next()) && !tok->IsError()) { if (tok->tag != Tag_Meta) continue; AttrInfo *attr = tok->GetAttrByName("http-equiv"); if (!attr || !attr->ValIs("Content-Type")) continue; attr = tok->GetAttrByName("content"); ScopedMem<char> mimetype, charset; if (!attr || !str::Parse(attr->val, attr->valLen, "%S;%_charset=%S", &mimetype, &charset)) continue; static struct { const char *name; UINT codepage; } codepages[] = { { "ISO-8859-1", 1252 }, { "Latin1", 1252 }, { "CP1252", 1252 }, { "Windows-1252", 1252 }, { "ISO-8859-2", 28592 }, { "Latin2", 28592 }, { "CP1251", 1251 }, { "Windows-1251", 1251 }, { "KOI8-R", 20866 }, { "shift-jis", 932 }, { "x-euc", 932 }, { "euc-kr", 949 }, { "Big5", 950 }, { "GB2312", 936 }, { "UTF-8", CP_UTF8 }, }; for (int i = 0; i < dimof(codepages); i++) { if (str::EqI(charset, codepages[i].name)) return codepages[i].codepage; } break; } return 0; }