C++ (Cpp) ScanTXT 예제들

예제 #1

0

파일 보기

파일: mozTXTToHTMLConv.cpp 프로젝트: marshall/mozilla-central

void
mozTXTToHTMLConv::CalculateURLBoundaries(const PRUnichar * aInString, PRInt32 aInStringLength, 
     const PRUint32 pos, const PRUint32 whathasbeendone,
     const modetype check, const PRUint32 start, const PRUint32 end,
     nsString& txtURL, nsString& desc,
     PRInt32& replaceBefore, PRInt32& replaceAfter)
{
  PRUint32 descstart = start;
  switch(check)
  {
  case RFC1738:
  {
    descstart = start - 5;
    desc.Append(&aInString[descstart], end - descstart + 2);  // include "<URL:" and ">"
    replaceAfter = end - pos + 1;
  } break;
  case RFC2396E:
  {
    descstart = start - 1;
    desc.Append(&aInString[descstart], end - descstart + 2); // include brackets
    replaceAfter = end - pos + 1;
  } break;
  case freetext:
  case abbreviated:
  {
    descstart = start;
    desc.Append(&aInString[descstart], end - start + 1); // don't include brackets  
    replaceAfter = end - pos;
  } break;
  default: break;
  } //switch

  EscapeStr(desc, false);

  txtURL.Append(&aInString[start], end - start + 1);
  txtURL.StripWhitespace();

  // FIX ME
  nsAutoString temp2;
  ScanTXT(&aInString[descstart], pos - descstart, ~kURLs /*prevents loop*/ & whathasbeendone, temp2);
  replaceBefore = temp2.Length();
  return;
}

예제 #2

0

파일 보기

파일: mozTXTToHTMLConv.cpp 프로젝트: marshall/mozilla-central

NS_IMETHODIMP
mozTXTToHTMLConv::ScanTXT(const PRUnichar *text, PRUint32 whattodo,
			   PRUnichar **_retval)
{
  NS_ENSURE_ARG(text);

  // FIX ME!!!
  nsString outString;
  PRInt32 inLength = nsCRT::strlen(text);
  // by setting a large capacity up front, we save time
  // when appending characters to the output string because we don't
  // need to reallocate and re-copy the characters already in the out String.
  NS_ASSERTION(inLength, "ScanTXT passed 0 length string");
  if (inLength == 0) {
    *_retval = nsCRT::strdup(text);
    return NS_OK;
  }

  outString.SetCapacity(PRUint32(inLength * growthRate));
  ScanTXT(text, inLength, whattodo, outString);

  *_retval = ToNewUnicode(outString);
  return *_retval ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
}

예제 #3

0

파일 보기

파일: mozTXTToHTMLConv.cpp 프로젝트: marshall/mozilla-central

void
mozTXTToHTMLConv::ScanHTML(nsString& aInString, PRUint32 whattodo, nsString &aOutString)
{ 
  // some common variables we were recalculating
  // every time inside the for loop...
  PRInt32 lengthOfInString = aInString.Length();
  const PRUnichar * uniBuffer = aInString.get();

#ifdef DEBUG_BenB_Perf
  PRTime parsing_start = PR_IntervalNow();
#endif

  // Look for simple entities not included in a tags and scan them.
  /* Skip all tags ("<[...]>") and content in an a tag ("<a[...]</a>")
     or in a tag ("<!--[...]-->").
     Unescape the rest (text between tags) and pass it to ScanTXT. */
  for (PRInt32 i = 0; i < lengthOfInString;)
  {
    if (aInString[i] == '<')  // html tag
    {
      PRUint32 start = PRUint32(i);
      if (nsCRT::ToLower((char)aInString[PRUint32(i) + 1]) == 'a')
           // if a tag, skip until </a>
      {
        i = aInString.Find("</a>", true, i);
        if (i == kNotFound)
          i = lengthOfInString;
        else
          i += 4;
      }
      else if (aInString[PRUint32(i) + 1] == '!' && aInString[PRUint32(i) + 2] == '-' &&
        aInString[PRUint32(i) + 3] == '-')
          //if out-commended code, skip until -->
      {
        i = aInString.Find("-->", false, i);
        if (i == kNotFound)
          i = lengthOfInString;
        else
          i += 3;

      }
      else  // just skip tag (attributes etc.)
      {
        i = aInString.FindChar('>', i);
        if (i == kNotFound)
          i = lengthOfInString;
        else
          i++;
      }
      aOutString.Append(&uniBuffer[start], PRUint32(i) - start);
    }
    else
    {
      PRUint32 start = PRUint32(i);
      i = aInString.FindChar('<', i);
      if (i == kNotFound)
        i = lengthOfInString;
  
      nsString tempString;     
      tempString.SetCapacity(PRUint32((PRUint32(i) - start) * growthRate));
      UnescapeStr(uniBuffer, start, PRUint32(i) - start, tempString);
      ScanTXT(tempString.get(), tempString.Length(), whattodo, aOutString);
    }
  }

#ifdef DEBUG_BenB_Perf
  printf("ScanHTML time:    %d ms\n", PR_IntervalToMilliseconds(PR_IntervalNow() - parsing_start));
#endif
}

예제 #4

0

파일 보기

파일: mozTXTToHTMLConv.cpp 프로젝트: MekliCZ/positron

void
mozTXTToHTMLConv::ScanHTML(nsString& aInString, uint32_t whattodo, nsString &aOutString)
{ 
  // some common variables we were recalculating
  // every time inside the for loop...
  int32_t lengthOfInString = aInString.Length();
  const char16_t * uniBuffer = aInString.get();

#ifdef DEBUG_BenB_Perf
  PRTime parsing_start = PR_IntervalNow();
#endif

  // Look for simple entities not included in a tags and scan them.
  // Skip all tags ("<[...]>") and content in an a link tag ("<a [...]</a>"),
  // comment tag ("<!--[...]-->"), style tag, script tag or head tag.
  // Unescape the rest (text between tags) and pass it to ScanTXT.
  for (int32_t i = 0; i < lengthOfInString;)
  {
    if (aInString[i] == '<')  // html tag
    {
      int32_t start = i;
      if (Substring(aInString, i + 1, 2).LowerCaseEqualsASCII("a "))
           // if a tag, skip until </a>.
           // Make sure there's a space after, not to match "abbr".
      {
        i = aInString.Find("</a>", true, i);
        if (i == kNotFound)
          i = lengthOfInString;
        else
          i += 4;
      }
      else if (Substring(aInString, i + 1, 3).LowerCaseEqualsASCII("!--"))
          // if out-commended code, skip until -->
      {
        i = aInString.Find("-->", false, i);
        if (i == kNotFound)
          i = lengthOfInString;
        else
          i += 3;
      }
      else if (Substring(aInString, i + 1, 5).LowerCaseEqualsASCII("style") &&
               (aInString.CharAt(i + 6) == ' ' || aInString.CharAt(i + 6) == '>'))
           // if style tag, skip until </style>
      {
        i = aInString.Find("</style>", true, i);
        if (i == kNotFound)
          i = lengthOfInString;
        else
          i += 8;
      }
      else if (Substring(aInString, i + 1, 6).LowerCaseEqualsASCII("script") &&
               (aInString.CharAt(i + 7) == ' ' || aInString.CharAt(i + 7) == '>'))
           // if script tag, skip until </script>
      {
        i = aInString.Find("</script>", true, i);
        if (i == kNotFound)
          i = lengthOfInString;
        else
          i += 9;
      }
      else if (Substring(aInString, i + 1, 4).LowerCaseEqualsASCII("head") &&
               (aInString.CharAt(i + 5) == ' ' || aInString.CharAt(i + 5) == '>'))
           // if head tag, skip until </head>
           // Make sure not to match <header>.
      {
        i = aInString.Find("</head>", true, i);
        if (i == kNotFound)
          i = lengthOfInString;
        else
          i += 7;
      }
      else  // just skip tag (attributes etc.)
      {
        i = aInString.FindChar('>', i);
        if (i == kNotFound)
          i = lengthOfInString;
        else
          i++;
      }
      aOutString.Append(&uniBuffer[start], i - start);
    }
    else
    {
      uint32_t start = uint32_t(i);
      i = aInString.FindChar('<', i);
      if (i == kNotFound)
        i = lengthOfInString;
  
      nsString tempString;     
      tempString.SetCapacity(uint32_t((uint32_t(i) - start) * growthRate));
      UnescapeStr(uniBuffer, start, uint32_t(i) - start, tempString);
      ScanTXT(tempString.get(), tempString.Length(), whattodo, aOutString);
    }
  }

#ifdef DEBUG_BenB_Perf
  printf("ScanHTML time:    %d ms\n", PR_IntervalToMilliseconds(PR_IntervalNow() - parsing_start));
#endif
}