String XSSAuditor::decodeHTMLEntities(const String& string, bool leaveUndecodableEntitiesUntouched) { SegmentedString source(string); SegmentedString sourceShadow; Vector<UChar> result; while (!source.isEmpty()) { UChar cc = *source; source.advance(); if (cc != '&') { result.append(cc); continue; } if (leaveUndecodableEntitiesUntouched) sourceShadow = source; bool notEnoughCharacters = false; Vector<UChar, 16> decodedEntity; bool success = consumeHTMLEntity(source, decodedEntity, notEnoughCharacters); // We ignore notEnoughCharacters because we might as well use this loop // to copy the remaining characters into |result|. if (!success || (!leaveUndecodableEntitiesUntouched && decodedEntity.size() == 1 && decodedEntity[0] == 0xFFFD)) { result.append('&'); if (leaveUndecodableEntitiesUntouched) source = sourceShadow; } else { Vector<UChar>::const_iterator iter = decodedEntity.begin(); for (; iter != decodedEntity.end(); ++iter) result.append(*iter); } } return String::adopt(result); }
TEST(HTMLEntityParserTest, ConsumeHTMLEntityIncomplete) { String original("am"); // Incomplete by purpose. SegmentedString src(original); DecodedHTMLEntity entity; bool notEnoughCharacters = false; bool success = consumeHTMLEntity(src, entity, notEnoughCharacters); EXPECT_TRUE(notEnoughCharacters); EXPECT_FALSE(success); // consumeHTMLEntity should recover the original SegmentedString state if // failed. EXPECT_EQ(original, src.toString()); }