Пример #1
0
void
mozTXTToHTMLConv::ScanHTML(nsString& aInString, uint32_t whattodo, nsString &aOutString)
{ 
  // some common variables we were recalculating
  // every time inside the for loop...
  int32_t lengthOfInString = aInString.Length();
  const char16_t * uniBuffer = aInString.get();

#ifdef DEBUG_BenB_Perf
  PRTime parsing_start = PR_IntervalNow();
#endif

  // Look for simple entities not included in a tags and scan them.
  // Skip all tags ("<[...]>") and content in an a link tag ("<a [...]</a>"),
  // comment tag ("<!--[...]-->"), style tag, script tag or head tag.
  // Unescape the rest (text between tags) and pass it to ScanTXT.
  for (int32_t i = 0; i < lengthOfInString;)
  {
    if (aInString[i] == '<')  // html tag
    {
      int32_t start = i;
      if (Substring(aInString, i + 1, 2).LowerCaseEqualsASCII("a "))
           // if a tag, skip until </a>.
           // Make sure there's a space after, not to match "abbr".
      {
        i = aInString.Find("</a>", true, i);
        if (i == kNotFound)
          i = lengthOfInString;
        else
          i += 4;
      }
      else if (Substring(aInString, i + 1, 3).LowerCaseEqualsASCII("!--"))
          // if out-commended code, skip until -->
      {
        i = aInString.Find("-->", false, i);
        if (i == kNotFound)
          i = lengthOfInString;
        else
          i += 3;
      }
      else if (Substring(aInString, i + 1, 5).LowerCaseEqualsASCII("style") &&
               (aInString.CharAt(i + 6) == ' ' || aInString.CharAt(i + 6) == '>'))
           // if style tag, skip until </style>
      {
        i = aInString.Find("</style>", true, i);
        if (i == kNotFound)
          i = lengthOfInString;
        else
          i += 8;
      }
      else if (Substring(aInString, i + 1, 6).LowerCaseEqualsASCII("script") &&
               (aInString.CharAt(i + 7) == ' ' || aInString.CharAt(i + 7) == '>'))
           // if script tag, skip until </script>
      {
        i = aInString.Find("</script>", true, i);
        if (i == kNotFound)
          i = lengthOfInString;
        else
          i += 9;
      }
      else if (Substring(aInString, i + 1, 4).LowerCaseEqualsASCII("head") &&
               (aInString.CharAt(i + 5) == ' ' || aInString.CharAt(i + 5) == '>'))
           // if head tag, skip until </head>
           // Make sure not to match <header>.
      {
        i = aInString.Find("</head>", true, i);
        if (i == kNotFound)
          i = lengthOfInString;
        else
          i += 7;
      }
      else  // just skip tag (attributes etc.)
      {
        i = aInString.FindChar('>', i);
        if (i == kNotFound)
          i = lengthOfInString;
        else
          i++;
      }
      aOutString.Append(&uniBuffer[start], i - start);
    }
    else
    {
      uint32_t start = uint32_t(i);
      i = aInString.FindChar('<', i);
      if (i == kNotFound)
        i = lengthOfInString;
  
      nsString tempString;     
      tempString.SetCapacity(uint32_t((uint32_t(i) - start) * growthRate));
      UnescapeStr(uniBuffer, start, uint32_t(i) - start, tempString);
      ScanTXT(tempString.get(), tempString.Length(), whattodo, aOutString);
    }
  }

#ifdef DEBUG_BenB_Perf
  printf("ScanHTML time:    %d ms\n", PR_IntervalToMilliseconds(PR_IntervalNow() - parsing_start));
#endif
}
Пример #2
0
void
mozTXTToHTMLConv::ScanHTML(nsString& aInString, PRUint32 whattodo, nsString &aOutString)
{ 
  // some common variables we were recalculating
  // every time inside the for loop...
  PRInt32 lengthOfInString = aInString.Length();
  const PRUnichar * uniBuffer = aInString.get();

#ifdef DEBUG_BenB_Perf
  PRTime parsing_start = PR_IntervalNow();
#endif

  // Look for simple entities not included in a tags and scan them.
  /* Skip all tags ("<[...]>") and content in an a tag ("<a[...]</a>")
     or in a tag ("<!--[...]-->").
     Unescape the rest (text between tags) and pass it to ScanTXT. */
  for (PRInt32 i = 0; i < lengthOfInString;)
  {
    if (aInString[i] == '<')  // html tag
    {
      PRUint32 start = PRUint32(i);
      if (nsCRT::ToLower((char)aInString[PRUint32(i) + 1]) == 'a')
           // if a tag, skip until </a>
      {
        i = aInString.Find("</a>", true, i);
        if (i == kNotFound)
          i = lengthOfInString;
        else
          i += 4;
      }
      else if (aInString[PRUint32(i) + 1] == '!' && aInString[PRUint32(i) + 2] == '-' &&
        aInString[PRUint32(i) + 3] == '-')
          //if out-commended code, skip until -->
      {
        i = aInString.Find("-->", false, i);
        if (i == kNotFound)
          i = lengthOfInString;
        else
          i += 3;

      }
      else  // just skip tag (attributes etc.)
      {
        i = aInString.FindChar('>', i);
        if (i == kNotFound)
          i = lengthOfInString;
        else
          i++;
      }
      aOutString.Append(&uniBuffer[start], PRUint32(i) - start);
    }
    else
    {
      PRUint32 start = PRUint32(i);
      i = aInString.FindChar('<', i);
      if (i == kNotFound)
        i = lengthOfInString;
  
      nsString tempString;     
      tempString.SetCapacity(PRUint32((PRUint32(i) - start) * growthRate));
      UnescapeStr(uniBuffer, start, PRUint32(i) - start, tempString);
      ScanTXT(tempString.get(), tempString.Length(), whattodo, aOutString);
    }
  }

#ifdef DEBUG_BenB_Perf
  printf("ScanHTML time:    %d ms\n", PR_IntervalToMilliseconds(PR_IntervalNow() - parsing_start));
#endif
}
Пример #3
0
// Interpret a line
TABLE *ParseTableLine(char *line, char *prefix, UINT prefix_size, LIST *replace_list)
{
	UINT i, len;
	UINT len_name;
	UINT string_start;
	char *name;
	char *name2;
	UINT name2_size;
	wchar_t *unistr;
	char *str;
	UINT unistr_size, str_size;
	TABLE *t;
	// Validate arguments
	if (line == NULL || prefix == NULL)
	{
		return NULL;
	}
	TrimLeft(line);

	// No line
	len = StrLen(line);
	if (len == 0)
	{
		return NULL;
	}

	// Comment
	if (line[0] == '#' || (line[0] == '/' && line[1] == '/'))
	{
		return NULL;
	}

	// Search to the end position of the name
	len_name = 0;
	for (i = 0;;i++)
	{
		if (line[i] == 0)
		{
			// There is only one token
			return NULL;
		}
		if (line[i] == ' ' || line[i] == '\t')
		{
			break;
		}
		len_name++;
	}

	name = Malloc(len_name + 1);
	StrCpy(name, len_name + 1, line);

	string_start = len_name;
	for (i = len_name;i < len;i++)
	{
		if (line[i] != ' ' && line[i] != '\t')
		{
			break;
		}
		string_start++;
	}
	if (i == len)
	{
		Free(name);
		return NULL;
	}

	// Unescape
	UnescapeStr(&line[string_start]);

	// Convert to Unicode
	unistr_size = CalcUtf8ToUni(&line[string_start], StrLen(&line[string_start]));
	if (unistr_size == 0)
	{
		Free(name);
		return NULL;
	}
	unistr = Malloc(unistr_size);
	Utf8ToUni(unistr, unistr_size, &line[string_start], StrLen(&line[string_start]));

	if (UniInChar(unistr, L'$'))
	{
		// Replace the replacement string
		wchar_t *tmp;
		UINT tmp_size = (UniStrSize(unistr) + 1024) * 2;
		UINT i;

		tmp = Malloc(tmp_size);

		UniStrCpy(tmp, tmp_size, unistr);

		for (i = 0; i < LIST_NUM(replace_list);i++)
		{
			TABLE *r = LIST_DATA(replace_list, i);

			UniReplaceStrEx(tmp, tmp_size, tmp, (wchar_t *)r->name, r->unistr, false);
		}

		unistr = CopyUniStr(tmp);

		Free(tmp);
	}

	// Convert to ANSI
	str_size = CalcUniToStr(unistr);
	if (str_size == 0)
	{
		str_size = 1;
		str = Malloc(1);
		str[0] = 0;
	}
	else
	{
		str = Malloc(str_size);
		UniToStr(str, str_size, unistr);
	}

	if (StrCmpi(name, "PREFIX") == 0)
	{
		// Prefix is specified
		StrCpy(prefix, prefix_size, str);
		Trim(prefix);

		if (StrCmpi(prefix, "$") == 0 || StrCmpi(prefix, "NULL") == 0)
		{
			prefix[0] = 0;
		}

		Free(name);
		Free(str);
		Free(unistr);

		return NULL;
	}

	name2_size = StrLen(name) + StrLen(prefix) + 2;
	name2 = ZeroMalloc(name2_size);

	if (prefix[0] != 0)
	{
		StrCat(name2, name2_size, prefix);
		StrCat(name2, name2_size, "@");
	}

	StrCat(name2, name2_size, name);

	Free(name);

	// Create a TABLE
	t = Malloc(sizeof(TABLE));
	StrUpper(name2);
	t->name = name2;
	t->str = str;
	t->unistr = unistr;

	return t;
}