void ParseData(MeasureData* measure, const BYTE* rawData, DWORD rawSize, bool utf16Data) { const int UTF16_CODEPAGE = 1200; if (measure->codepage == UTF16_CODEPAGE) { utf16Data = true; } const char* error; int erroffset; int ovector[OVECCOUNT]; int rc; bool doErrorAction = false; // Compile the regular expression in the first argument pcre16* re = pcre16_compile( (PCRE_SPTR16)measure->regExp.c_str(), PCRE_UTF16, &error, &erroffset, nullptr); if (re != nullptr) { // Compilation succeeded: match the subject in the second argument std::wstring buffer; auto data = (const WCHAR*)rawData; DWORD dataLength = rawSize / 2; if (!utf16Data) { buffer = StringUtil::Widen((LPCSTR)rawData, rawSize, measure->codepage); data = buffer.c_str(); dataLength = (DWORD)buffer.length(); } rc = pcre16_exec(re, nullptr, (PCRE_SPTR16)data, dataLength, 0, 0, ovector, OVECCOUNT); if (rc >= 0) { if (rc == 0) { // The output vector wasn't big enough RmLog(measure->rm, LOG_ERROR, L"WebParser: Too many substrings"); } else { if (measure->stringIndex < rc) { if (measure->debug != 0) { for (int i = 0; i < rc; ++i) { const WCHAR* match = data + ovector[2 * i]; const int matchLen = min(ovector[2 * i + 1] - ovector[2 * i], 256); RmLogF(measure->rm, LOG_DEBUG, L"WebParser: Index %2d: %.*s", i, matchLen, match); } } const WCHAR* match = data + ovector[2 * measure->stringIndex]; int matchLen = ovector[2 * measure->stringIndex + 1] - ovector[2 * measure->stringIndex]; EnterCriticalSection(&g_CriticalSection); measure->resultString.assign(match, matchLen); DecodeReferences(measure->resultString, measure->decodeCharacterReference); LeaveCriticalSection(&g_CriticalSection); } else { RmLog(measure->rm, LOG_WARNING, L"WebParser: Not enough substrings"); // Clear the old result EnterCriticalSection(&g_CriticalSection); measure->resultString.clear(); if (measure->download) { if (measure->downloadFile.empty()) // cache mode { if (!measure->downloadedFile.empty()) { // Delete old downloaded file DeleteFile(measure->downloadedFile.c_str()); } } measure->downloadedFile.clear(); } LeaveCriticalSection(&g_CriticalSection); } // Update the references std::vector<MeasureData*>::iterator i = g_Measures.begin(); std::wstring compareStr = L"["; compareStr += RmGetMeasureName(measure->rm); compareStr += L']'; for ( ; i != g_Measures.end(); ++i) { if (measure->skin == (*i)->skin && StringUtil::CaseInsensitiveFind((*i)->url, compareStr) != std::wstring::npos) { if ((*i)->stringIndex < rc) { const WCHAR* match = data + ovector[2 * (*i)->stringIndex]; int matchLen = ovector[2 * (*i)->stringIndex + 1] - ovector[2 * (*i)->stringIndex]; if (!(*i)->regExp.empty()) { // Change the index and parse the substring int index = (*i)->stringIndex; (*i)->stringIndex = (*i)->stringIndex2; ParseData((*i), (BYTE*)match, matchLen * 2, true); (*i)->stringIndex = index; } else { // Set the result EnterCriticalSection(&g_CriticalSection); // Substitude the [measure] with result (*i)->resultString = (*i)->url; (*i)->resultString.replace( StringUtil::CaseInsensitiveFind((*i)->resultString, compareStr), compareStr.size(), match, matchLen); DecodeReferences((*i)->resultString, (*i)->decodeCharacterReference); // Start download threads for the references if ((*i)->download) { // Start the download thread unsigned int id; HANDLE threadHandle = (HANDLE)_beginthreadex(nullptr, 0, NetworkDownloadThreadProc, (*i), 0, &id); if (threadHandle) { (*i)->dlThreadHandle = threadHandle; } } LeaveCriticalSection(&g_CriticalSection); } } else { RmLog((*i)->rm, LOG_WARNING, L"WebParser: Not enough substrings"); // Clear the old result EnterCriticalSection(&g_CriticalSection); (*i)->resultString.clear(); if ((*i)->download) { if ((*i)->downloadFile.empty()) // cache mode { if (!(*i)->downloadedFile.empty()) { // Delete old downloaded file DeleteFile((*i)->downloadedFile.c_str()); } } (*i)->downloadedFile.clear(); } LeaveCriticalSection(&g_CriticalSection); } } } } } else { // Matching failed: handle error cases RmLogF(measure->rm, LOG_ERROR, L"WebParser: RegExp matching error (%d)", rc); doErrorAction = true; EnterCriticalSection(&g_CriticalSection); measure->resultString = measure->errorString; // Update the references std::vector<MeasureData*>::iterator i = g_Measures.begin(); std::wstring compareStr = L"["; compareStr += RmGetMeasureName(measure->rm); compareStr += L']'; for ( ; i != g_Measures.end(); ++i) { if ((StringUtil::CaseInsensitiveFind((*i)->url, compareStr) != std::wstring::npos) && (measure->skin == (*i)->skin)) { (*i)->resultString = (*i)->errorString; } } LeaveCriticalSection(&g_CriticalSection); } // Release memory used for the compiled pattern pcre16_free(re); } else { // Compilation failed. RmLogF(measure->rm, LOG_ERROR, L"WebParser: RegExp error at offset %d: %S", erroffset, error); doErrorAction = true; } if (measure->download) { // Start the download thread unsigned int id; HANDLE threadHandle = (HANDLE)_beginthreadex(nullptr, 0, NetworkDownloadThreadProc, measure, 0, &id); if (threadHandle) { measure->dlThreadHandle = threadHandle; } } if (doErrorAction && !measure->onRegExpErrAction.empty()) { RmExecute(measure->skin, measure->onRegExpErrAction.c_str()); } else if (!measure->download && !measure->finishAction.empty()) { RmExecute(measure->skin, measure->finishAction.c_str()); } }
void ParseData(MeasureData* measure, LPCSTR parseData, DWORD dwSize) { // Parse the value from the data pcre* re; const char* error; int erroffset; int ovector[OVECCOUNT]; int rc; int flags = PCRE_UTF8; if (measure->codepage == 0) { flags = 0; } // Compile the regular expression in the first argument re = pcre_compile( StringUtil::NarrowUTF8(measure->regExp).c_str(), // the pattern flags, // default options &error, // for error message &erroffset, // for error offset nullptr); // use default character tables if (re != nullptr) { // Compilation succeeded: match the subject in the second argument std::string utf8Data; if (measure->codepage == 1200) // 1200 = UTF-16LE { // Must convert the data to utf8 utf8Data = StringUtil::NarrowUTF8((LPCWSTR)parseData, dwSize / 2); parseData = utf8Data.c_str(); dwSize = (DWORD)utf8Data.length(); } else if (measure->codepage != CP_UTF8 && measure->codepage != 0) // 0 = CP_ACP { // Must convert the data to utf8 utf8Data = ConvertAsciiToUTF8(parseData, dwSize, measure->codepage); parseData = utf8Data.c_str(); dwSize = (DWORD)utf8Data.length(); } rc = pcre_exec( re, // the compiled pattern nullptr, // no extra data - we didn't study the pattern parseData, // the subject string dwSize, // the length of the subject 0, // start at offset 0 in the subject 0, // default options ovector, // output vector for substring information OVECCOUNT); // number of elements in the output vector if (rc >= 0) { if (rc == 0) { // The output vector wasn't big enough RmLog(measure->rm, LOG_ERROR, L"WebParser: Too many substrings"); } else { if (measure->stringIndex < rc) { if (measure->debug != 0) { for (int i = 0; i < rc; ++i) { const char* substring_start = parseData + ovector[2 * i]; int substring_length = ovector[2 * i + 1] - ovector[2 * i]; substring_length = min(substring_length, 256); const std::wstring value = StringUtil::WidenUTF8(substring_start, substring_length); RmLogF(measure->rm, LOG_DEBUG, L"WebParser: Index %2d: %s", i, value.c_str()); } } const char* substring_start = parseData + ovector[2 * measure->stringIndex]; int substring_length = ovector[2 * measure->stringIndex + 1] - ovector[2 * measure->stringIndex]; EnterCriticalSection(&g_CriticalSection); measure->resultString = StringUtil::WidenUTF8(substring_start, substring_length); DecodeReferences(measure->resultString, measure->decodeCharacterReference); LeaveCriticalSection(&g_CriticalSection); } else { RmLog(measure->rm, LOG_WARNING, L"WebParser: Not enough substrings"); // Clear the old result EnterCriticalSection(&g_CriticalSection); measure->resultString.clear(); if (measure->download) { if (measure->downloadFile.empty()) // cache mode { if (!measure->downloadedFile.empty()) { // Delete old downloaded file DeleteFile(measure->downloadedFile.c_str()); } } measure->downloadedFile.clear(); } LeaveCriticalSection(&g_CriticalSection); } // Update the references std::vector<MeasureData*>::iterator i = g_Measures.begin(); std::wstring compareStr = L"["; compareStr += RmGetMeasureName(measure->rm); compareStr += L']'; for ( ; i != g_Measures.end(); ++i) { if (measure->skin == (*i)->skin && (*i)->url.find(compareStr) != std::wstring::npos) { if ((*i)->stringIndex < rc) { const char* substring_start = parseData + ovector[2 * (*i)->stringIndex]; int substring_length = ovector[2 * (*i)->stringIndex + 1] - ovector[2 * (*i)->stringIndex]; if (!(*i)->regExp.empty()) { // Change the index and parse the substring int index = (*i)->stringIndex; (*i)->stringIndex = (*i)->stringIndex2; ParseData((*i), substring_start, substring_length); (*i)->stringIndex = index; } else { // Set the result EnterCriticalSection(&g_CriticalSection); // Substitude the [measure] with result std::wstring result = StringUtil::WidenUTF8(substring_start, substring_length); (*i)->resultString = (*i)->url; (*i)->resultString.replace((*i)->resultString.find(compareStr), compareStr.size(), result); DecodeReferences((*i)->resultString, (*i)->decodeCharacterReference); // Start download threads for the references if ((*i)->download) { // Start the download thread unsigned int id; HANDLE threadHandle = (HANDLE)_beginthreadex(nullptr, 0, NetworkDownloadThreadProc, (*i), 0, &id); if (threadHandle) { (*i)->dlThreadHandle = threadHandle; } } LeaveCriticalSection(&g_CriticalSection); } } else { RmLog((*i)->rm, LOG_WARNING, L"WebParser: Not enough substrings"); // Clear the old result EnterCriticalSection(&g_CriticalSection); (*i)->resultString.clear(); if ((*i)->download) { if ((*i)->downloadFile.empty()) // cache mode { if (!(*i)->downloadedFile.empty()) { // Delete old downloaded file DeleteFile((*i)->downloadedFile.c_str()); } } (*i)->downloadedFile.clear(); } LeaveCriticalSection(&g_CriticalSection); } } } } } else { // Matching failed: handle error cases RmLogF(measure->rm, LOG_ERROR, L"WebParser: RegExp matching error (%d)", rc); EnterCriticalSection(&g_CriticalSection); measure->resultString = measure->errorString; // Update the references std::vector<MeasureData*>::iterator i = g_Measures.begin(); std::wstring compareStr = L"["; compareStr += RmGetMeasureName(measure->rm); compareStr += L']'; for ( ; i != g_Measures.end(); ++i) { if (((*i)->url.find(compareStr) != std::wstring::npos) && (measure->skin == (*i)->skin)) { (*i)->resultString = (*i)->errorString; } } LeaveCriticalSection(&g_CriticalSection); } // Release memory used for the compiled pattern pcre_free(re); } else { // Compilation failed. RmLogF(measure->rm, LOG_ERROR, L"WebParser: RegExp error at offset %d: %S", erroffset, error); } if (measure->download) { // Start the download thread unsigned int id; HANDLE threadHandle = (HANDLE)_beginthreadex(nullptr, 0, NetworkDownloadThreadProc, measure, 0, &id); if (threadHandle) { measure->dlThreadHandle = threadHandle; } } else { if (!measure->finishAction.empty()) { RmExecute(measure->skin, measure->finishAction.c_str()); } } }