String XSSAuditor::decodeHTMLEntities(const String& string, bool leaveUndecodableEntitiesUntouched)
{
    SegmentedString source(string);
    SegmentedString sourceShadow;
    Vector<UChar> result;
    
    while (!source.isEmpty()) {
        UChar cc = *source;
        source.advance();
        
        if (cc != '&') {
            result.append(cc);
            continue;
        }
        
        if (leaveUndecodableEntitiesUntouched)
            sourceShadow = source;
        bool notEnoughCharacters = false;
        Vector<UChar, 16> decodedEntity;
        bool success = consumeHTMLEntity(source, decodedEntity, notEnoughCharacters);
        // We ignore notEnoughCharacters because we might as well use this loop
        // to copy the remaining characters into |result|.
        if (!success || (!leaveUndecodableEntitiesUntouched && decodedEntity.size() == 1 && decodedEntity[0] == 0xFFFD)) {
            result.append('&');
            if (leaveUndecodableEntitiesUntouched)
                source = sourceShadow;
        } else {
            Vector<UChar>::const_iterator iter = decodedEntity.begin();
            for (; iter != decodedEntity.end(); ++iter)
                result.append(*iter);
        }
    }
    
    return String::adopt(result);
}
Example #2
0
TEST(HTMLEntityParserTest, ConsumeHTMLEntityIncomplete) {
  String original("am");  // Incomplete by purpose.
  SegmentedString src(original);

  DecodedHTMLEntity entity;
  bool notEnoughCharacters = false;
  bool success = consumeHTMLEntity(src, entity, notEnoughCharacters);
  EXPECT_TRUE(notEnoughCharacters);
  EXPECT_FALSE(success);

  // consumeHTMLEntity should recover the original SegmentedString state if
  // failed.
  EXPECT_EQ(original, src.toString());
}