示例#1
0
void ParseData(MeasureData* measure, const BYTE* rawData, DWORD rawSize, bool utf16Data)
{
	const int UTF16_CODEPAGE = 1200;
	if (measure->codepage == UTF16_CODEPAGE) {
		utf16Data = true;
	}

	const char* error;
	int erroffset;
	int ovector[OVECCOUNT];
	int rc;
	bool doErrorAction = false;

	// Compile the regular expression in the first argument
	pcre16* re = pcre16_compile(
		(PCRE_SPTR16)measure->regExp.c_str(),
		PCRE_UTF16, &error, &erroffset, nullptr);
	if (re != nullptr)
	{
		// Compilation succeeded: match the subject in the second argument
		std::wstring buffer;
		auto data = (const WCHAR*)rawData;
		DWORD dataLength = rawSize / 2;
		if (!utf16Data)
		{
			buffer = StringUtil::Widen((LPCSTR)rawData, rawSize, measure->codepage);
			data = buffer.c_str();
			dataLength = (DWORD)buffer.length();
		}

		rc = pcre16_exec(re, nullptr, (PCRE_SPTR16)data, dataLength, 0, 0, ovector, OVECCOUNT);
		if (rc >= 0)
		{
			if (rc == 0)
			{
				// The output vector wasn't big enough
				RmLog(measure->rm, LOG_ERROR, L"WebParser: Too many substrings");
			}
			else
			{
				if (measure->stringIndex < rc)
				{
					if (measure->debug != 0)
					{
						for (int i = 0; i < rc; ++i)
						{
							const WCHAR* match = data + ovector[2 * i];
							const int matchLen = min(ovector[2 * i + 1] - ovector[2 * i], 256);
							RmLogF(measure->rm, LOG_DEBUG, L"WebParser: Index %2d: %.*s", i, matchLen, match);
						}
					}

					const WCHAR* match = data + ovector[2 * measure->stringIndex];
					int matchLen = ovector[2 * measure->stringIndex + 1] - ovector[2 * measure->stringIndex];
					EnterCriticalSection(&g_CriticalSection);
					measure->resultString.assign(match, matchLen);
					DecodeReferences(measure->resultString, measure->decodeCharacterReference);
					LeaveCriticalSection(&g_CriticalSection);
				}
				else
				{
					RmLog(measure->rm, LOG_WARNING, L"WebParser: Not enough substrings");

					// Clear the old result
					EnterCriticalSection(&g_CriticalSection);
					measure->resultString.clear();
					if (measure->download)
					{
						if (measure->downloadFile.empty())  // cache mode
						{
							if (!measure->downloadedFile.empty())
							{
								// Delete old downloaded file
								DeleteFile(measure->downloadedFile.c_str());
							}
						}
						measure->downloadedFile.clear();
					}
					LeaveCriticalSection(&g_CriticalSection);
				}

				// Update the references
				std::vector<MeasureData*>::iterator i = g_Measures.begin();
				std::wstring compareStr = L"[";
				compareStr += RmGetMeasureName(measure->rm);
				compareStr += L']';
				for ( ; i != g_Measures.end(); ++i)
				{
					if (measure->skin == (*i)->skin &&
						StringUtil::CaseInsensitiveFind((*i)->url, compareStr) != std::wstring::npos)
					{
						if ((*i)->stringIndex < rc)
						{
							const WCHAR* match = data + ovector[2 * (*i)->stringIndex];
							int matchLen = ovector[2 * (*i)->stringIndex + 1] - ovector[2 * (*i)->stringIndex];
							if (!(*i)->regExp.empty())
							{
								// Change the index and parse the substring
								int index = (*i)->stringIndex;
								(*i)->stringIndex = (*i)->stringIndex2;
								ParseData((*i), (BYTE*)match, matchLen * 2, true);
								(*i)->stringIndex = index;
							}
							else
							{
								// Set the result
								EnterCriticalSection(&g_CriticalSection);

								// Substitude the [measure] with result
								(*i)->resultString = (*i)->url;
								(*i)->resultString.replace(
									StringUtil::CaseInsensitiveFind((*i)->resultString, compareStr),
									compareStr.size(), match, matchLen);
								DecodeReferences((*i)->resultString, (*i)->decodeCharacterReference);

								// Start download threads for the references
								if ((*i)->download)
								{
									// Start the download thread
									unsigned int id;
									HANDLE threadHandle = (HANDLE)_beginthreadex(nullptr, 0, NetworkDownloadThreadProc, (*i), 0, &id);
									if (threadHandle)
									{
										(*i)->dlThreadHandle = threadHandle;
									}
								}

								LeaveCriticalSection(&g_CriticalSection);
							}
						}
						else
						{
							RmLog((*i)->rm, LOG_WARNING, L"WebParser: Not enough substrings");

							// Clear the old result
							EnterCriticalSection(&g_CriticalSection);
							(*i)->resultString.clear();
							if ((*i)->download)
							{
								if ((*i)->downloadFile.empty())  // cache mode
								{
									if (!(*i)->downloadedFile.empty())
									{
										// Delete old downloaded file
										DeleteFile((*i)->downloadedFile.c_str());
									}
								}
								(*i)->downloadedFile.clear();
							}
							LeaveCriticalSection(&g_CriticalSection);
						}
					}
				}
			}
		}
		else
		{
			// Matching failed: handle error cases
			RmLogF(measure->rm, LOG_ERROR, L"WebParser: RegExp matching error (%d)", rc);
			doErrorAction = true;

			EnterCriticalSection(&g_CriticalSection);
			measure->resultString = measure->errorString;

			// Update the references
			std::vector<MeasureData*>::iterator i = g_Measures.begin();
			std::wstring compareStr = L"[";
			compareStr += RmGetMeasureName(measure->rm);
			compareStr += L']';
			for ( ; i != g_Measures.end(); ++i)
			{
				if ((StringUtil::CaseInsensitiveFind((*i)->url, compareStr) != std::wstring::npos) &&
					(measure->skin == (*i)->skin))
				{
					(*i)->resultString = (*i)->errorString;
				}
			}
			LeaveCriticalSection(&g_CriticalSection);
		}

		// Release memory used for the compiled pattern
		pcre16_free(re);
	}
	else
	{
		// Compilation failed.
		RmLogF(measure->rm, LOG_ERROR, L"WebParser: RegExp error at offset %d: %S", erroffset, error);
		doErrorAction = true;
	}

	if (measure->download)
	{
		// Start the download thread
		unsigned int id;
		HANDLE threadHandle = (HANDLE)_beginthreadex(nullptr, 0, NetworkDownloadThreadProc, measure, 0, &id);
		if (threadHandle)
		{
			measure->dlThreadHandle = threadHandle;
		}
	}

	if (doErrorAction && !measure->onRegExpErrAction.empty())
	{
		RmExecute(measure->skin, measure->onRegExpErrAction.c_str());
	}
	else if (!measure->download && !measure->finishAction.empty())
	{
		RmExecute(measure->skin, measure->finishAction.c_str());
	}
}
示例#2
0
void ParseData(MeasureData* measure, LPCSTR parseData, DWORD dwSize)
{
	// Parse the value from the data
	pcre* re;
	const char* error;
	int erroffset;
	int ovector[OVECCOUNT];
	int rc;
	int flags = PCRE_UTF8;

	if (measure->codepage == 0)
	{
		flags = 0;
	}

	// Compile the regular expression in the first argument
	re = pcre_compile(
		StringUtil::NarrowUTF8(measure->regExp).c_str(),	// the pattern
		flags,												// default options
		&error,												// for error message
		&erroffset,											// for error offset
		nullptr);											// use default character tables

	if (re != nullptr)
	{
		// Compilation succeeded: match the subject in the second argument
		std::string utf8Data;

		if (measure->codepage == 1200)		// 1200 = UTF-16LE
		{
			// Must convert the data to utf8
			utf8Data = StringUtil::NarrowUTF8((LPCWSTR)parseData, dwSize / 2);
			parseData = utf8Data.c_str();
			dwSize = (DWORD)utf8Data.length();
		}
		else if (measure->codepage != CP_UTF8 && measure->codepage != 0)		// 0 = CP_ACP
		{
			// Must convert the data to utf8
			utf8Data = ConvertAsciiToUTF8(parseData, dwSize, measure->codepage);
			parseData = utf8Data.c_str();
			dwSize = (DWORD)utf8Data.length();
		}

		rc = pcre_exec(
			re,						// the compiled pattern
			nullptr,				// no extra data - we didn't study the pattern
			parseData,				// the subject string
			dwSize,					// the length of the subject
			0,						// start at offset 0 in the subject
			0,						// default options
			ovector,				// output vector for substring information
			OVECCOUNT);				// number of elements in the output vector

		if (rc >= 0)
		{
			if (rc == 0)
			{
				// The output vector wasn't big enough
				RmLog(measure->rm, LOG_ERROR, L"WebParser: Too many substrings");
			}
			else
			{
				if (measure->stringIndex < rc)
				{
					if (measure->debug != 0)
					{
						for (int i = 0; i < rc; ++i)
						{
							const char* substring_start = parseData + ovector[2 * i];
							int substring_length = ovector[2 * i + 1] - ovector[2 * i];
							substring_length = min(substring_length, 256);

							const std::wstring value = StringUtil::WidenUTF8(substring_start, substring_length);
							RmLogF(measure->rm, LOG_DEBUG, L"WebParser: Index %2d: %s", i, value.c_str());
						}
					}

					const char* substring_start = parseData + ovector[2 * measure->stringIndex];
					int substring_length = ovector[2 * measure->stringIndex + 1] - ovector[2 * measure->stringIndex];

					EnterCriticalSection(&g_CriticalSection);
					measure->resultString = StringUtil::WidenUTF8(substring_start, substring_length);
					DecodeReferences(measure->resultString, measure->decodeCharacterReference);
					LeaveCriticalSection(&g_CriticalSection);
				}
				else
				{
					RmLog(measure->rm, LOG_WARNING, L"WebParser: Not enough substrings");

					// Clear the old result
					EnterCriticalSection(&g_CriticalSection);
					measure->resultString.clear();
					if (measure->download)
					{
						if (measure->downloadFile.empty())  // cache mode
						{
							if (!measure->downloadedFile.empty())
							{
								// Delete old downloaded file
								DeleteFile(measure->downloadedFile.c_str());
							}
						}
						measure->downloadedFile.clear();
					}
					LeaveCriticalSection(&g_CriticalSection);
				}

				// Update the references
				std::vector<MeasureData*>::iterator i = g_Measures.begin();
				std::wstring compareStr = L"[";
				compareStr += RmGetMeasureName(measure->rm);
				compareStr += L']';
				for ( ; i != g_Measures.end(); ++i)
				{
					if (measure->skin == (*i)->skin &&
						(*i)->url.find(compareStr) != std::wstring::npos)
					{
						if ((*i)->stringIndex < rc)
						{
							const char* substring_start = parseData + ovector[2 * (*i)->stringIndex];
							int substring_length = ovector[2 * (*i)->stringIndex + 1] - ovector[2 * (*i)->stringIndex];

							if (!(*i)->regExp.empty())
							{
								// Change the index and parse the substring
								int index = (*i)->stringIndex;
								(*i)->stringIndex = (*i)->stringIndex2;
								ParseData((*i), substring_start, substring_length);
								(*i)->stringIndex = index;
							}
							else
							{
								// Set the result
								EnterCriticalSection(&g_CriticalSection);

								// Substitude the [measure] with result
								std::wstring result = StringUtil::WidenUTF8(substring_start, substring_length);
								(*i)->resultString = (*i)->url;
								(*i)->resultString.replace((*i)->resultString.find(compareStr), compareStr.size(), result);
								DecodeReferences((*i)->resultString, (*i)->decodeCharacterReference);

								// Start download threads for the references
								if ((*i)->download)
								{
									// Start the download thread
									unsigned int id;
									HANDLE threadHandle = (HANDLE)_beginthreadex(nullptr, 0, NetworkDownloadThreadProc, (*i), 0, &id);
									if (threadHandle)
									{
										(*i)->dlThreadHandle = threadHandle;
									}
								}

								LeaveCriticalSection(&g_CriticalSection);
							}
						}
						else
						{
							RmLog((*i)->rm, LOG_WARNING, L"WebParser: Not enough substrings");

							// Clear the old result
							EnterCriticalSection(&g_CriticalSection);
							(*i)->resultString.clear();
							if ((*i)->download)
							{
								if ((*i)->downloadFile.empty())  // cache mode
								{
									if (!(*i)->downloadedFile.empty())
									{
										// Delete old downloaded file
										DeleteFile((*i)->downloadedFile.c_str());
									}
								}
								(*i)->downloadedFile.clear();
							}
							LeaveCriticalSection(&g_CriticalSection);
						}
					}
				}
			}
		}
		else
		{
			// Matching failed: handle error cases
			RmLogF(measure->rm, LOG_ERROR, L"WebParser: RegExp matching error (%d)", rc);

			EnterCriticalSection(&g_CriticalSection);
			measure->resultString = measure->errorString;

			// Update the references
			std::vector<MeasureData*>::iterator i = g_Measures.begin();
			std::wstring compareStr = L"[";
			compareStr += RmGetMeasureName(measure->rm);
			compareStr += L']';
			for ( ; i != g_Measures.end(); ++i)
			{
				if (((*i)->url.find(compareStr) != std::wstring::npos) && (measure->skin == (*i)->skin))
				{
					(*i)->resultString = (*i)->errorString;
				}
			}
			LeaveCriticalSection(&g_CriticalSection);
		}

		// Release memory used for the compiled pattern
		pcre_free(re);
	}
	else
	{
		// Compilation failed.
		RmLogF(measure->rm, LOG_ERROR, L"WebParser: RegExp error at offset %d: %S", erroffset, error);
	}

	if (measure->download)
	{
		// Start the download thread
		unsigned int id;
		HANDLE threadHandle = (HANDLE)_beginthreadex(nullptr, 0, NetworkDownloadThreadProc, measure, 0, &id);
		if (threadHandle)
		{
			measure->dlThreadHandle = threadHandle;
		}
	}
	else
	{
		if (!measure->finishAction.empty())
		{
			RmExecute(measure->skin, measure->finishAction.c_str());
		}
	}
}