void mozTXTToHTMLConv::CalculateURLBoundaries(const PRUnichar * aInString, PRInt32 aInStringLength, const PRUint32 pos, const PRUint32 whathasbeendone, const modetype check, const PRUint32 start, const PRUint32 end, nsString& txtURL, nsString& desc, PRInt32& replaceBefore, PRInt32& replaceAfter) { PRUint32 descstart = start; switch(check) { case RFC1738: { descstart = start - 5; desc.Append(&aInString[descstart], end - descstart + 2); // include "<URL:" and ">" replaceAfter = end - pos + 1; } break; case RFC2396E: { descstart = start - 1; desc.Append(&aInString[descstart], end - descstart + 2); // include brackets replaceAfter = end - pos + 1; } break; case freetext: case abbreviated: { descstart = start; desc.Append(&aInString[descstart], end - start + 1); // don't include brackets replaceAfter = end - pos; } break; default: break; } //switch EscapeStr(desc, false); txtURL.Append(&aInString[start], end - start + 1); txtURL.StripWhitespace(); // FIX ME nsAutoString temp2; ScanTXT(&aInString[descstart], pos - descstart, ~kURLs /*prevents loop*/ & whathasbeendone, temp2); replaceBefore = temp2.Length(); return; }
NS_IMETHODIMP mozTXTToHTMLConv::ScanTXT(const PRUnichar *text, PRUint32 whattodo, PRUnichar **_retval) { NS_ENSURE_ARG(text); // FIX ME!!! nsString outString; PRInt32 inLength = nsCRT::strlen(text); // by setting a large capacity up front, we save time // when appending characters to the output string because we don't // need to reallocate and re-copy the characters already in the out String. NS_ASSERTION(inLength, "ScanTXT passed 0 length string"); if (inLength == 0) { *_retval = nsCRT::strdup(text); return NS_OK; } outString.SetCapacity(PRUint32(inLength * growthRate)); ScanTXT(text, inLength, whattodo, outString); *_retval = ToNewUnicode(outString); return *_retval ? NS_OK : NS_ERROR_OUT_OF_MEMORY; }
void mozTXTToHTMLConv::ScanHTML(nsString& aInString, PRUint32 whattodo, nsString &aOutString) { // some common variables we were recalculating // every time inside the for loop... PRInt32 lengthOfInString = aInString.Length(); const PRUnichar * uniBuffer = aInString.get(); #ifdef DEBUG_BenB_Perf PRTime parsing_start = PR_IntervalNow(); #endif // Look for simple entities not included in a tags and scan them. /* Skip all tags ("<[...]>") and content in an a tag ("<a[...]</a>") or in a tag ("<!--[...]-->"). Unescape the rest (text between tags) and pass it to ScanTXT. */ for (PRInt32 i = 0; i < lengthOfInString;) { if (aInString[i] == '<') // html tag { PRUint32 start = PRUint32(i); if (nsCRT::ToLower((char)aInString[PRUint32(i) + 1]) == 'a') // if a tag, skip until </a> { i = aInString.Find("</a>", true, i); if (i == kNotFound) i = lengthOfInString; else i += 4; } else if (aInString[PRUint32(i) + 1] == '!' && aInString[PRUint32(i) + 2] == '-' && aInString[PRUint32(i) + 3] == '-') //if out-commended code, skip until --> { i = aInString.Find("-->", false, i); if (i == kNotFound) i = lengthOfInString; else i += 3; } else // just skip tag (attributes etc.) { i = aInString.FindChar('>', i); if (i == kNotFound) i = lengthOfInString; else i++; } aOutString.Append(&uniBuffer[start], PRUint32(i) - start); } else { PRUint32 start = PRUint32(i); i = aInString.FindChar('<', i); if (i == kNotFound) i = lengthOfInString; nsString tempString; tempString.SetCapacity(PRUint32((PRUint32(i) - start) * growthRate)); UnescapeStr(uniBuffer, start, PRUint32(i) - start, tempString); ScanTXT(tempString.get(), tempString.Length(), whattodo, aOutString); } } #ifdef DEBUG_BenB_Perf printf("ScanHTML time: %d ms\n", PR_IntervalToMilliseconds(PR_IntervalNow() - parsing_start)); #endif }
void mozTXTToHTMLConv::ScanHTML(nsString& aInString, uint32_t whattodo, nsString &aOutString) { // some common variables we were recalculating // every time inside the for loop... int32_t lengthOfInString = aInString.Length(); const char16_t * uniBuffer = aInString.get(); #ifdef DEBUG_BenB_Perf PRTime parsing_start = PR_IntervalNow(); #endif // Look for simple entities not included in a tags and scan them. // Skip all tags ("<[...]>") and content in an a link tag ("<a [...]</a>"), // comment tag ("<!--[...]-->"), style tag, script tag or head tag. // Unescape the rest (text between tags) and pass it to ScanTXT. for (int32_t i = 0; i < lengthOfInString;) { if (aInString[i] == '<') // html tag { int32_t start = i; if (Substring(aInString, i + 1, 2).LowerCaseEqualsASCII("a ")) // if a tag, skip until </a>. // Make sure there's a space after, not to match "abbr". { i = aInString.Find("</a>", true, i); if (i == kNotFound) i = lengthOfInString; else i += 4; } else if (Substring(aInString, i + 1, 3).LowerCaseEqualsASCII("!--")) // if out-commended code, skip until --> { i = aInString.Find("-->", false, i); if (i == kNotFound) i = lengthOfInString; else i += 3; } else if (Substring(aInString, i + 1, 5).LowerCaseEqualsASCII("style") && (aInString.CharAt(i + 6) == ' ' || aInString.CharAt(i + 6) == '>')) // if style tag, skip until </style> { i = aInString.Find("</style>", true, i); if (i == kNotFound) i = lengthOfInString; else i += 8; } else if (Substring(aInString, i + 1, 6).LowerCaseEqualsASCII("script") && (aInString.CharAt(i + 7) == ' ' || aInString.CharAt(i + 7) == '>')) // if script tag, skip until </script> { i = aInString.Find("</script>", true, i); if (i == kNotFound) i = lengthOfInString; else i += 9; } else if (Substring(aInString, i + 1, 4).LowerCaseEqualsASCII("head") && (aInString.CharAt(i + 5) == ' ' || aInString.CharAt(i + 5) == '>')) // if head tag, skip until </head> // Make sure not to match <header>. { i = aInString.Find("</head>", true, i); if (i == kNotFound) i = lengthOfInString; else i += 7; } else // just skip tag (attributes etc.) { i = aInString.FindChar('>', i); if (i == kNotFound) i = lengthOfInString; else i++; } aOutString.Append(&uniBuffer[start], i - start); } else { uint32_t start = uint32_t(i); i = aInString.FindChar('<', i); if (i == kNotFound) i = lengthOfInString; nsString tempString; tempString.SetCapacity(uint32_t((uint32_t(i) - start) * growthRate)); UnescapeStr(uniBuffer, start, uint32_t(i) - start, tempString); ScanTXT(tempString.get(), tempString.Length(), whattodo, aOutString); } } #ifdef DEBUG_BenB_Perf printf("ScanHTML time: %d ms\n", PR_IntervalToMilliseconds(PR_IntervalNow() - parsing_start)); #endif }