Exemplo n.º 1
0
bool GetFileFormat(File& file, UINT& nCodePage, bool* pSignatureFound, bool bUseHeuristics)
{
    DWORD dwTemp=0;
    bool bSignatureFound = false;
    bool bDetect=false;

    DWORD Readed = 0;
    if (file.Read(&dwTemp, sizeof(dwTemp), Readed) && Readed > 1 ) // minimum signature size is 2 bytes
    {
        if (LOWORD(dwTemp) == SIGN_UNICODE)
        {
            nCodePage = CP_UNICODE;
            file.SetPointer(2, nullptr, FILE_BEGIN);
            bSignatureFound = true;
        }
        else if (LOWORD(dwTemp) == SIGN_REVERSEBOM)
        {
            nCodePage = CP_REVERSEBOM;
            file.SetPointer(2, nullptr, FILE_BEGIN);
            bSignatureFound = true;
        }
        else if ((dwTemp & 0x00FFFFFF) == SIGN_UTF8)
        {
            nCodePage = CP_UTF8;
            file.SetPointer(3, nullptr, FILE_BEGIN);
            bSignatureFound = true;
        }
        else
        {
            file.SetPointer(0, nullptr, FILE_BEGIN);
        }
    }

    if (bSignatureFound)
    {
        bDetect = true;
    }
    else if (bUseHeuristics)
    {
        file.SetPointer(0, nullptr, FILE_BEGIN);
        DWORD Size=0x8000; // BUGBUG. TODO: configurable
        LPVOID Buffer=xf_malloc(Size);
        DWORD ReadSize = 0;
        bool ReadResult = file.Read(Buffer, Size, ReadSize);
        file.SetPointer(0, nullptr, FILE_BEGIN);

        if (ReadResult && ReadSize)
        {
            int test=
                IS_TEXT_UNICODE_STATISTICS|
                IS_TEXT_UNICODE_REVERSE_STATISTICS|
                IS_TEXT_UNICODE_CONTROLS|
                IS_TEXT_UNICODE_REVERSE_CONTROLS|
                IS_TEXT_UNICODE_ILLEGAL_CHARS|
                IS_TEXT_UNICODE_ODD_LENGTH|
                IS_TEXT_UNICODE_NULL_BYTES;

            if (IsTextUnicode(Buffer, ReadSize, &test))
            {
                if (!(test&IS_TEXT_UNICODE_ODD_LENGTH) && !(test&IS_TEXT_UNICODE_ILLEGAL_CHARS))
                {
                    if ((test&IS_TEXT_UNICODE_NULL_BYTES) || (test&IS_TEXT_UNICODE_CONTROLS) || (test&IS_TEXT_UNICODE_REVERSE_CONTROLS))
                    {
                        if ((test&IS_TEXT_UNICODE_CONTROLS) || (test&IS_TEXT_UNICODE_STATISTICS))
                        {
                            nCodePage=CP_UNICODE;
                            bDetect=true;
                        }
                        else if ((test&IS_TEXT_UNICODE_REVERSE_CONTROLS) || (test&IS_TEXT_UNICODE_REVERSE_STATISTICS))
                        {
                            nCodePage=CP_REVERSEBOM;
                            bDetect=true;
                        }
                    }
                }
            }
            else if (IsTextUTF8(static_cast<LPBYTE>(Buffer), ReadSize))
            {
                nCodePage=CP_UTF8;
                bDetect=true;
            }
            else
            {
                nsUniversalDetectorEx *ns = new nsUniversalDetectorEx();
                ns->HandleData(static_cast<LPCSTR>(Buffer), ReadSize);
                ns->DataEnd();
                int cp = ns->getCodePage();
                if ( cp >= 0 )
                {
                    const wchar_t *deprecated = Opt.strNoAutoDetectCP.CPtr();

                    if ( 0 == wcscmp(deprecated, L"-1") )
                    {
                        if ( Opt.CPMenuMode )
                        {
                            if ( static_cast<UINT>(cp) != GetACP() && static_cast<UINT>(cp) != GetOEMCP() )
                            {
                                int selectType = 0;
                                wchar_t szcp[16];
                                _snwprintf(szcp, ARRAYSIZE(szcp), L"%d", cp);
                                GeneralCfg->GetValue(FavoriteCodePagesKey, szcp, &selectType, 0);
                                if (0 == (selectType & CPST_FAVORITE))
                                    cp = -1;
                            }
                        }
                    }
                    else
                    {
                        while (*deprecated)
                        {
                            while (*deprecated && (*deprecated < L'0' || *deprecated > L'9'))
                                ++deprecated;

                            int dp = (int)wcstol(deprecated, (wchar_t **)&deprecated, 0);
                            if (cp == dp)
                            {
                                cp = -1;
                                break;
                            }
                        }
                    }
                }

                if (cp != -1)
                {
                    nCodePage = cp;
                    bDetect = true;
                }

                delete ns;
            }
        }

        xf_free(Buffer);
    }

    if (pSignatureFound)
    {
        *pSignatureFound = bSignatureFound;
    }
    return bDetect;
}
Exemplo n.º 2
0
CString GetServerDetailInfo(CString strServer, CString &strTitle)
{
	strServer.Insert(0, "http://");
	CString strReturn = "Unknown", szAllData, szData;
	CInternetSession ss(_T("session"), 0, INTERNET_OPEN_TYPE_PRECONFIG, NULL, NULL, INTERNET_FLAG_DONT_CACHE | INTERNET_FLAG_RELOAD);//设置不缓冲
	CHttpFile *pF = NULL;
    try
	{
		ss.SetOption(INTERNET_OPTION_CONNECT_TIMEOUT, 10 * 1000);
		pF = (CHttpFile *)ss.OpenURL(strServer, 1, INTERNET_FLAG_EXISTING_CONNECT | INTERNET_FLAG_TRANSFER_ASCII | INTERNET_FLAG_NO_AUTO_REDIRECT);
		pF->QueryInfo(HTTP_QUERY_RAW_HEADERS_CRLF, szAllData, 0);
		int nPos1 = szAllData.Find("Server: ", 0);
		if (nPos1 == -1)
		{
			strReturn = "Unknown";
		}
		else
		{
			nPos1 += 8;
			int nPos2 = szAllData.Find("\r\n", nPos1);
			strReturn = szAllData.Mid(nPos1, nPos2 - nPos1);
			if (strReturn.Find("Microsoft", 0) != -1)
			{
				strReturn.Insert(0, "Windows, ");
			}
		}

		szAllData.Empty();
		while(pF->ReadString(szData))
			szAllData += szData;

		szAllData.MakeLower();
		nPos1 = szAllData.Find("<title>", 0);
		if (nPos1 != -1)
		{
			int nPos2 = szAllData.Find("</title>", nPos1);
			if (nPos2 != -1)
			{
				strTitle = szAllData.Mid(nPos1 + 7, nPos2 - nPos1 - 7);
				if (IsTextUTF8(strTitle.GetBuffer(0), strTitle.GetLength()))
					Utf8ToAnsi(strTitle);
			}
		}
	}
	catch(...)
	{
		strReturn = "";
	}
	if (pF != NULL)
	{
		pF->Close();
		delete pF;
		pF = NULL;
	}
	if (ss != NULL)
	{
		ss.Close();
		delete ss;
	}
	return strReturn;
}
Exemplo n.º 3
0
bool GetFileFormat(
	api::fs::file& file, uintptr_t& nCodePage, bool* pSignatureFound, bool bUseHeuristics, bool* pPureAscii)
{
	DWORD dwTemp = 0;
	bool bSignatureFound = false;
	bool bDetect = false;
	bool bPureAscii = false;

	size_t Readed = 0;
	if (file.Read(&dwTemp, sizeof(dwTemp), Readed) && Readed > 1 ) // minimum signature size is 2 bytes
	{
		if (LOWORD(dwTemp) == SIGN_UNICODE)
		{
			nCodePage = CP_UNICODE;
			file.SetPointer(2, nullptr, FILE_BEGIN);
			bSignatureFound = true;
		}
		else if (LOWORD(dwTemp) == SIGN_REVERSEBOM)
		{
			nCodePage = CP_REVERSEBOM;
			file.SetPointer(2, nullptr, FILE_BEGIN);
			bSignatureFound = true;
		}
		else if ((dwTemp & 0x00FFFFFF) == SIGN_UTF8)
		{
			nCodePage = CP_UTF8;
			file.SetPointer(3, nullptr, FILE_BEGIN);
			bSignatureFound = true;
		}
		else
		{
			file.SetPointer(0, nullptr, FILE_BEGIN);
		}
	}

	if (bSignatureFound)
	{
		bDetect = true;
	}
	else if (bUseHeuristics)
	{
		file.SetPointer(0, nullptr, FILE_BEGIN);
		size_t Size = 0x8000; // BUGBUG. TODO: configurable
		char_ptr Buffer(Size);
		size_t ReadSize = 0;
		bool ReadResult = file.Read(Buffer.get(), Size, ReadSize);
		file.SetPointer(0, nullptr, FILE_BEGIN);

		bPureAscii = ReadResult && !ReadSize; // empty file == pure ascii

		if (ReadResult && ReadSize)
		{
			// BUGBUG MSDN documents IS_TEXT_UNICODE_BUFFER_TOO_SMALL but there is no such thing
			if (ReadSize > 1)
			{
				int test = IS_TEXT_UNICODE_UNICODE_MASK | IS_TEXT_UNICODE_REVERSE_MASK | IS_TEXT_UNICODE_NOT_UNICODE_MASK | IS_TEXT_UNICODE_NOT_ASCII_MASK;

				IsTextUnicode(Buffer.get(), static_cast<int>(ReadSize), &test); // return value is ignored, it's ok.

				if (!(test & IS_TEXT_UNICODE_NOT_UNICODE_MASK) && (test & IS_TEXT_UNICODE_NOT_ASCII_MASK))
				{
					if (test & IS_TEXT_UNICODE_UNICODE_MASK)
					{
						nCodePage = CP_UNICODE;
						bDetect = true;
					}
					else if (test & IS_TEXT_UNICODE_REVERSE_MASK)
					{
						nCodePage = CP_REVERSEBOM;
						bDetect = true;
					}
				}

				if (!bDetect && IsTextUTF8(Buffer.get(), ReadSize, bPureAscii))
				{
					nCodePage = CP_UTF8;
					bDetect = true;
				}
			}

			if (!bDetect && !bPureAscii)
			{
				int cp = GetCpUsingUniversalDetector(Buffer.get(), ReadSize);
				if ( cp >= 0 )
				{
					if (Global->Opt->strNoAutoDetectCP.Get() == L"-1")
					{
						if ( Global->Opt->CPMenuMode )
						{
							if ( static_cast<UINT>(cp) != GetACP() && static_cast<UINT>(cp) != GetOEMCP() )
							{
								long long selectType = Codepages().GetFavorite(cp);
								if (0 == (selectType & CPST_FAVORITE))
									cp = -1;
							}
						}
					}
					else
					{
						std::vector<string> BannedCpList;
						split(BannedCpList, Global->Opt->strNoAutoDetectCP, STLF_UNIQUE);

						if (std::find(ALL_CONST_RANGE(BannedCpList), std::to_wstring(cp)) != BannedCpList.cend())
						{
							cp = -1;
						}
					}
				}

				if (cp != -1)
				{
					nCodePage = cp;
					bDetect = true;
				}
			}
		}
	}

	if (pSignatureFound)
		*pSignatureFound = bSignatureFound;

	if (pPureAscii)
		*pPureAscii = bPureAscii;

	return bDetect;
}
Exemplo n.º 4
0
bool OldGetFileFormat(FILE *file, UINT &nCodePage, bool *pSignatureFound, bool bUseHeuristics)
{
    DWORD dwTemp=0;
    bool bSignatureFound = false;
    bool bDetect=false;

    if (fread(&dwTemp, 1, 4, file))
    {
        if (LOWORD(dwTemp) == SIGN_UNICODE)
        {
            nCodePage = CP_UNICODE;
            fseek(file, 2, SEEK_SET);
            bSignatureFound = true;
        }
        else if (LOWORD(dwTemp) == SIGN_REVERSEBOM)
        {
            nCodePage = CP_REVERSEBOM;
            fseek(file, 2, SEEK_SET);
            bSignatureFound = true;
        }
        else if ((dwTemp & 0x00FFFFFF) == SIGN_UTF8)
        {
            nCodePage = CP_UTF8;
            fseek(file, 3, SEEK_SET);
            bSignatureFound = true;
        }
        else
            fseek(file, 0, SEEK_SET);
    }

    if (bSignatureFound)
    {
        bDetect = true;
    }
    else if (bUseHeuristics)
    {
        fseek(file, 0, SEEK_SET);
        size_t sz=0x8000; // BUGBUG. TODO: configurable
        LPVOID Buffer=xf_malloc(sz);
        sz=fread(Buffer,1,sz,file);
        fseek(file,0,SEEK_SET);

        if (sz)
        {
            int test=
                IS_TEXT_UNICODE_STATISTICS|
                IS_TEXT_UNICODE_REVERSE_STATISTICS|
                IS_TEXT_UNICODE_CONTROLS|
                IS_TEXT_UNICODE_REVERSE_CONTROLS|
                IS_TEXT_UNICODE_ILLEGAL_CHARS|
                IS_TEXT_UNICODE_ODD_LENGTH|
                IS_TEXT_UNICODE_NULL_BYTES;

            if (IsTextUnicode(Buffer, (int)sz, &test))
            {
                if (!(test&IS_TEXT_UNICODE_ODD_LENGTH) && !(test&IS_TEXT_UNICODE_ILLEGAL_CHARS))
                {
                    if ((test&IS_TEXT_UNICODE_NULL_BYTES) ||
                            (test&IS_TEXT_UNICODE_CONTROLS) ||
                            (test&IS_TEXT_UNICODE_REVERSE_CONTROLS))
                    {
                        if ((test&IS_TEXT_UNICODE_CONTROLS) || (test&IS_TEXT_UNICODE_STATISTICS))
                        {
                            nCodePage=CP_UNICODE;
                            bDetect=true;
                        }
                        else if ((test&IS_TEXT_UNICODE_REVERSE_CONTROLS) || (test&IS_TEXT_UNICODE_REVERSE_STATISTICS))
                        {
                            nCodePage=CP_REVERSEBOM;
                            bDetect=true;
                        }
                    }
                }
            }
            else if (IsTextUTF8((const LPBYTE)Buffer, sz))
            {
                nCodePage=CP_UTF8;
                bDetect=true;
            }
            else
            {
                nsUniversalDetectorEx *ns = new nsUniversalDetectorEx();
                ns->HandleData((const char*)Buffer,(PRUint32)sz);
                ns->DataEnd();
                int cp = ns->getCodePage();

                if (cp != -1)
                {
                    nCodePage = cp;
                    bDetect = true;
                }

                delete ns;
            }
        }

        xf_free(Buffer);
    }

    if (pSignatureFound)
        *pSignatureFound = bSignatureFound;

    return bDetect;
}
Exemplo n.º 5
0
bool CCharsetDetector::DoIt(unsigned long dwParserType, char* pBuf, unsigned long dwLength)
{
	//I. "��������(elimination logic)����С�������ȷ���ַ���"��http://www.renpeicheng.com/plus/view-2619-1.html��
	//float fTemp = 0.0;
	unsigned long	dwTempIdx	        = 0;
	unsigned long	dwTempOrder	    = 0;
	unsigned long dwSamiNoiseLen   = 0;
	unsigned long dwMicroDvdLen    = 0;
	unsigned long dwSubViewLen     = 0;
	unsigned long dwTimeLineNoiseLen = 0;
	unsigned long	dwTempPostion	= 0;
	bool	    bJISSecondChar	= true;

	Reset();
	if (IsTextUTF8(pBuf, dwLength))
	{
		m_pSMUTF_8Info->dwDetectState = EDetectStateMe;
		m_bDone =  true;
		goto EXIT;
	}
	for (long i = 0; i < dwLength; i++)
	{
		char b = pBuf[i];
		char c = 0x00;
		

		if (5 == dwParserType)// 5 means EParserTypeSmi.
		{
			dwSamiNoiseLen = IsSamiNoise(pBuf, i, dwLength);

			if (dwSamiNoiseLen > 0)
			{
				i += dwSamiNoiseLen - 1;
				continue;
			}
		}
		
		dwMicroDvdLen = IsMicroDvdNoise(pBuf, i, dwLength);
		if (dwMicroDvdLen > 0)
		{
			i += dwMicroDvdLen;
			continue;
		}
		
		//dwTimeLineNoiseLen = ISTimeLineNoise(pBuf, i, dwLength);
		//if (dwTimeLineNoiseLen > 0)
		//{
		//	i+=dwTimeLineNoiseLen - 1;
		//	continue;
		//}

		if (dwLength == (i - 1))
		{
			c = 0x00;
		}
		else
		{
			c = pBuf[i + 1];
		}

		if ((b&0xff) > 0xA0)
		{
			if (i > (dwTempIdx + 1))
			{
				m_bSecondChar = false;
			}
			if (true == m_bSecondChar)
			{
				m_fGb2312	+= m_StatisticsArray[8]->SecondByteFreq()[(unsigned long)((b&0xff) - 0xA0)] * 0.5;
				m_fGbEucKR	+= m_StatisticsArray[4]->SecondByteFreq()[(unsigned long)((b&0xff) - 0xA0)] * 0.5;
				m_fBig5		+= m_StatisticsArray[6]->SecondByteFreq()[(unsigned long)((b&0xff) - 0xA0)] * 0.5;


				//**************************************************************************************
				//	simplified chinese.
				//  first  byte range: 0xb0 -- 0xfe
				//  second byte range: 0xa1 -- 0xfe
				if (i > 0)
				{ 
					if ((unsigned long)(pBuf[i-1]&0xff) >= 0xb0 && (unsigned long)(b&0xff) >= 0xa1)
					{
						m_dwGB2312Count++;
						dwTempOrder = 94*((unsigned long)(pBuf[i-1]&0xff)-(unsigned long)0xb0) + (unsigned long)(b&0xff)- (unsigned long)0xa1;
					}
					else
						dwTempOrder =  -1;
				}
				if (dwTempOrder < GB2312_TABLE_SIZE)
				{
					if (GB2312CharToFreqOrder[dwTempOrder] < 1024)
					{
						m_dwGB2312OccurrenceCharCount++;
					}
				}

				//**************************************************************************************
				//	big5
				//  first  byte range: 0xa4 -- 0xfe
				//  second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe

				if (i > 0)
				{ 
					if ((unsigned long)(pBuf[i-1]&0xff) >= 0xa4 && (unsigned long)(b&0xff) >= 0xa1)
					{
						m_dwBIG5Count++;
						dwTempOrder = 157 * ((unsigned long)(pBuf[i-1]&0xff)-(unsigned long)0xa4) + (unsigned long)(b&0xff)- (unsigned long)0xa1 + 63;
					}
					else
						dwTempOrder =  -1;
				}
				if (dwTempOrder < BIG5_TABLE_SIZE)
				{
					if (Big5CharToFreqOrder[dwTempOrder] < 1024)
					{
						m_dwBIG5OccurrenceCharCount++;
					}
				}

				//**************************************************************************************
				//	EUC-KR
				//  first  byte range: 0xb0 -- 0xfe
				//  second byte range: 0xa1 -- 0xfe


				if (i > 0)
				{ 
					if ((unsigned long)(pBuf[i-1]&0xff) >= 0xb0 && (unsigned long)(b&0xff) >= 0xa1)
					{
						m_dwEUCKRCount++;
						dwTempOrder = 94 * ((unsigned long)(pBuf[i-1]&0xff)-(unsigned long)0xb0) + (unsigned long)(b&0xff)- (unsigned long)0xa1;
					}
					else
						dwTempOrder =  -1;
				}
				if (dwTempOrder < EUCKR_TABLE_SIZE)
				{
					if (EUCKRCharToFreqOrder[dwTempOrder] < 1024)
					{
						m_dwEUCKROccurrenceCharCount++;
					}
				}
				//**************************************************************************************


				m_dwCharacterCount++;
				m_bSecondChar = false;
			}
			else
			{
				m_fGb2312	+= m_StatisticsArray[8]->SecondByteFreq()[(unsigned long)((b&0xff) - 0xA0)] * 0.5;
				m_fGbEucKR	+= m_StatisticsArray[4]->SecondByteFreq()[(unsigned long)((b&0xff) - 0xA0)] * 0.5;
				m_fBig5		+= m_StatisticsArray[6]->SecondByteFreq()[(unsigned long)((b&0xff) - 0xA0)] * 0.5;

				if (((b&0xff) > 0xb0 && (b&0xff) < 0xca) && (c&0xff) > 0xa0)   //most korean character  16~40(+ A0) qzone 
				{
					m_dwKRCount++;
				}
				else if ((0xa4 == (b&0xff) || 0xa5 == (b&0xff)) && (c&0xff) > 0xa0)//chinese and euc-jp zone for kana
				{
					m_dwZHJPCount++;
				}
				else if ((0xaa == (b&0xff) || 0xab == (b&0xff)) && (c&0xff) > 0xa0)//korean zone for kana
				{
					m_dwKRJPCount++;
				}
				m_bSecondChar = true;
				dwTempIdx	= i;
			}
		}
		else if ((b&0xff) >= 0x40 && true == m_bSecondChar)
		{

			//**************************************************************************************
			//	big5
			//  first  byte range: 0xa4 -- 0xfe
			//  second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe

			if (i > 0)
			{ 
				if ((unsigned long)(pBuf[i-1]&0xff) >= 0xa4 && (unsigned long)(b&0xff) >= 0x40)
				{
					m_dwBIG5Count++;
					dwTempOrder = 157 * ((unsigned long)(pBuf[i-1]&0xff)-(unsigned long)0xa4) + (unsigned long)(b&0xff)- (unsigned long)0x40;
				}
				else
					dwTempOrder =  -1;
			}
			if (dwTempOrder < BIG5_TABLE_SIZE)
			{
				if (Big5CharToFreqOrder[dwTempOrder] < 1024)
				{
					m_dwBIG5OccurrenceCharCount++;
				}
			}
			//**************************************************************************************
			m_dwBig5OnlyCount++;
			m_bSecondChar = false;
		}
		else if (((b&0xff) == 0x82 &&(c&0xff) <=0xf1 && (c&0xff) >=0x9f) || ((b&0xff) == 0x83 &&(c&0xff) <=0x96 && (c&0xff) >=0x40))
		{
			m_dwJISJPCount++;
		}

		//************************************************************************************************************
		//for sjis encoding
		//  first  byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe
		//  second byte range: 0x40 -- 0x7e,  0x81 -- oxfe

		if (i - dwTempPostion > 1)
		{
			bJISSecondChar	= true;
		}						
		if ((((b&0xff) >= 0x81 &&(b&0xff) <=0x9f) || ((b&0xff) >= 0xe0 &&(b&0xff) <=0xfe)) 
			&& (((c&0xff) >= 0x40 &&(c&0xff) <=0x7e) || ((c&0xff) >= 0x81 &&(c&0xff) <=0xfe))
			&& true == bJISSecondChar)
		{
			m_dwJISCount++;
			bJISSecondChar	= false;
			dwTempPostion	= i;
			if (((b&0xff) >= 0x81 &&(b&0xff) <=0x9f))
			{
				dwTempOrder = 188 * ((unsigned long)(b&0xff) - (unsigned long)0x81) +(unsigned long)(c&0xff) - 0x40;
			}
			else if (((b&0xff) >= 0xe0 &&(b&0xff) <=0xfe))
			{
				dwTempOrder = 188 * ((unsigned long)(b&0xff) - (unsigned long)0xe0 + 31) +(unsigned long)(c&0xff) - 0x40;
			}
			else
			{
				dwTempOrder =  -1;
			}
			if (dwTempOrder < JIS_TABLE_SIZE)
			{
				if (JISCharToFreqOrder[dwTempOrder] < 1024)
				{
					m_dwJISOccurrenceCharCount++;
				}
			}
		}



		/******************************************************
		/#The emphasis of following charset detector algorithm is <<CONFIDENCE>>.
		/#Confidence is a float value, if one's confidence is more than 0.65(found the right charset).
		/#Set the EDetectStateMe, and get out this function.
		******************************************************/

		//************************************************************************************************************
		/*for windows-1252 encoding*/
		if(!m_pSBWIN_1252Info->dwState)
			m_pSBWIN_1252Info->dwState = DetectWIN1252Codepage(&Win1252Model, pBuf[i], m_pSBWIN_1252Info);
		if (EDetectStateMe == m_pSBWIN_1252Info->dwState)
		{
			break;
		}
		//************************************************************************************************************
		//for koir-8 encoding
		//if character is not a symbol or punctuation character
		if (EDetectStateOn == m_pSBKOIR_8Info->dwState)
			m_pSBKOIR_8Info->dwState = DetectSingleByteCodepage(&Koi8rModel, pBuf[i], m_pSBKOIR_8Info);
		if (EDetectStateMe == m_pSBKOIR_8Info->dwState)
		{
			break;
		}
		//************************************************************************************************************
		//for win-1251 encoding
		if(!m_pSBWIN_1251Info->dwState)
			m_pSBWIN_1251Info->dwState = DetectSingleByteCodepage(&Win1251Model, pBuf[i], m_pSBWIN_1251Info);
		if (EDetectStateMe == m_pSBWIN_1251Info->dwState)
		{
			break;
		}
		//************************************************************************************************************
		/*for iso-1859-2 encoding*/
		if(!m_pSBISO8859_2Info->dwState)
			m_pSBISO8859_2Info->dwState = DetectSingleByteCodepage(&Latin2HungarianModel, pBuf[i], m_pSBISO8859_2Info);

		//************************************************************************************************************
		/*for win-1250 encoding*/
		if(!m_pSBWIN_1250Info->dwState)
			m_pSBWIN_1250Info->dwState = DetectSingleByteCodepage(&Win1250HungarianModel, pBuf[i], m_pSBWIN_1250Info);


		/*windows-1250 is very likely to iso-8859-2*/
		if (EDetectStateMe == m_pSBWIN_1250Info->dwState || EDetectStateMe == m_pSBISO8859_2Info->dwState)
		{
			break;
		}

		//************************************************************************************************************
		//for win-1253 encoding
		if(!m_pSBWIN_1253Info->dwState)
			m_pSBWIN_1253Info->dwState = DetectSingleByteCodepage(&Win1253Model, pBuf[i], m_pSBWIN_1253Info);

		//************************************************************************************************************
		//for iso-8859-7 encoding
		if(!m_pSBISO8859_7Info->dwState)
			m_pSBISO8859_7Info->dwState = DetectSingleByteCodepage(&Latin7Model, pBuf[i], m_pSBISO8859_7Info);

		/*windwos-1253 is very likely to iso-8859-7*/
		if (EDetectStateMe == m_pSBISO8859_7Info->dwState || EDetectStateMe == m_pSBWIN_1253Info->dwState)
		{
			break;
		}

		if (!m_pSMUTF_8Info->dwDetectState)
			m_pSMUTF_8Info->dwDetectState = DetectCodingStateMachine(&UTF8SMModel, pBuf[i], m_pSMUTF_8Info);
		
		/*************************************************************************************************************/
		/*====START filter charset.*/
		/*for almost the same win-1250 and iso-1859-2:
		*Because iso-8859-2 may not use characters between 128 with 159, so I count it.*/
		if ( (unsigned char)pBuf[i]>= 128 && (unsigned char)pBuf[i] <= 159)
		{
			m_dwWINvsISO8859Count++;
		}
		/*====END of filter charset.*/

		for (long j = 0; j < m_dwItems; )
		{
			unsigned char st = ICharsetVerifier::getNextState(m_VerifierArray[m_dwItemIdx[j]], b,
				m_State[j]);
			if (st == 2)
			{
				//Bingo to detector the charset.
				if (ECodepageGB18030 == m_VerifierArray[m_dwItemIdx[j]]->Charset() && i < 2000)
				{
					j++;
					continue;
				}
				m_eCodepageType = m_VerifierArray[m_dwItemIdx[j]]->Charset();
				m_bDone = true;
				goto EXIT;
			}
			if (st == 1)
			{
				//filter				
				if (j < m_dwItems)
				{
					m_dwItems--;
					m_dwItemIdx[j] = m_dwItemIdx[m_dwItems];
					m_State[j] = m_State[m_dwItems];
				}
			}
			else
			{
				m_State[j++] = st;
			}
		}

		if (m_dwItems <= 1)
		{
			if ((i <= dwLength - 1) && i < 2000) 
			{
				continue;
			}
			if (1 == m_dwItems)
			{
				m_eCodepageType = m_VerifierArray[m_dwItemIdx[0]]->Charset();
			}
			m_bDone = true;
			goto EXIT;
		}
		long nonUCS2Num = 0;
		long nonUCS2Idx = 0;
		for (long j = 0; j < m_dwItems; j++)
		{
			if (!m_VerifierArray[m_dwItemIdx[j]]->IsUCS2()
				&& !m_VerifierArray[m_dwItemIdx[j]]->IsUCS2())
			{
				nonUCS2Num++;
				nonUCS2Idx = j;
			}
		}

		if (1 == nonUCS2Num)
		{
			if ((i <= dwLength - 1) && i < 2000)
			{
				continue;
			}
			m_eCodepageType = m_VerifierArray[m_dwItemIdx[nonUCS2Idx]]->Charset();
			m_bDone = true;
			goto EXIT;
		}
	}
	//m_bRunSampler = true;
	if (m_bRunSampler)
	{
		Sample(pBuf, dwLength);
		//	m_bRunSampler = false;
	}
EXIT:

	m_bSecondChar = false;
	return m_bDone;
}
Exemplo n.º 6
0
Result AppList::recvFromServer(Json::Value jsonObj)
{
    if (jsonObj.isMember("method")) {
        std::string str_method = jsonObj["method"].asString();

        if (str_method == "BasicCommunication.OnAppRegistered") {
            newAppRegistered(jsonObj);
            m_pUIManager->onAppShow(ID_APPLINK);
        }else if (str_method == "BasicCommunication.OnAppUnregistered") {
            int appID = jsonObj["params"]["appID"].asInt();
            m_pUIManager->onAppUnregister(appID);
        }else if (str_method == "VR.VRExitApp") {
            m_pUIManager->tsSpeak(ID_EXIT, "退出"+ m_pCurApp->m_szAppName);
            m_pUIManager->onAppShow(ID_APPLINK);
        }else if (str_method == "Navigation.StopStream") {            
            m_pUIManager->onVideoStreamStop();
            ShowPreviousUI();
        }else if (str_method == "VR.VRSwitchApp") {
//            {
//               "jsonrpc" : "2.0",
//               "method" : "VR.VRSwitchApp",
//               "params" : {
//                  "appID" : 18467,
//                  "appVRName" : "百度 "
//               }
//            }
            std::string strAppVRName = jsonObj["params"]["appVRName"].asString();
            if (!IsTextUTF8((char *)strAppVRName.data(),strAppVRName.size()))
                strAppVRName = string_To_UTF8(strAppVRName);

            m_pUIManager->tsSpeak(ID_SWITCHAPP, strAppVRName);

            int iNewID = jsonObj["params"]["appID"].asInt();
            if (m_pCurApp->m_iAppID != iNewID) {
                std::vector <AppData *>::iterator i;
                for (i = m_AppDatas.begin(); i != m_AppDatas.end(); ++i) {
                    if (iNewID == (*i)->m_iAppID) {
                        m_pCurApp = *i;
                        m_pUIManager->onAppShow(m_pCurApp->getCurUI());
                        break;
                    }
                }
            }
        }else if (str_method == "UI.SetAppIcon") {
            int iAppId = jsonObj["params"]["appID"].asInt();
            std::vector <AppData *>::iterator Iter = m_AppDatas.begin();
            while (Iter != m_AppDatas.end()) {
                if (iAppId == (*Iter)->m_iAppID) {
                    QUrl iconPathUrl(jsonObj["params"]["syncFileName"]["value"].asString().c_str());
#if defined(WINCE)
                    (*Iter)->m_strAppIconFilePath = ChangeSlash(iconPathUrl.path().toLocal8Bit().data());
#else
                    (*Iter)->m_strAppIconFilePath = ChangeSlash(iconPathUrl.path().toStdString());
#endif
                    if (m_pCurApp == NULL) {
                        m_pUIManager->onAppShow(ID_APPLINK);
                    }
                    //m_pUIManager->onAppShow(m_pCurApp->getCurUI());
                    break;
                }
                ++Iter;
            }
        }else if (str_method == "UI.EndAudioPassThru") {
            ToSDL->OnVRCancelRecord();
            m_pUIManager->OnEndAudioPassThru();
            return RESULT_SUCCESS;
        }else if (str_method == "BasicCommunication.UpdateDeviceList") {
            // add by fanqiang
            updateDeiveList(jsonObj);
            m_pUIManager->ShowDeviceList();
        }
        else {
            if (m_pCurApp)
                return m_pCurApp->recvFromServer(jsonObj);
            else
                return RESULT_APPLICATION_NOT_REGISTERED;
        }
        return  RESULT_SUCCESS;
    }
    return  RESULT_SUCCESS;
}
Exemplo n.º 7
0
bool GetFileFormat(api::File& file, uintptr_t& nCodePage, bool* pSignatureFound, bool bUseHeuristics)
{
	DWORD dwTemp=0;
	bool bSignatureFound = false;
	bool bDetect=false;

	DWORD Readed = 0;
	if (file.Read(&dwTemp, sizeof(dwTemp), Readed) && Readed > 1 ) // minimum signature size is 2 bytes
	{
		if (LOWORD(dwTemp) == SIGN_UNICODE)
		{
			nCodePage = CP_UNICODE;
			file.SetPointer(2, nullptr, FILE_BEGIN);
			bSignatureFound = true;
		}
		else if (LOWORD(dwTemp) == SIGN_REVERSEBOM)
		{
			nCodePage = CP_REVERSEBOM;
			file.SetPointer(2, nullptr, FILE_BEGIN);
			bSignatureFound = true;
		}
		else if ((dwTemp & 0x00FFFFFF) == SIGN_UTF8)
		{
			nCodePage = CP_UTF8;
			file.SetPointer(3, nullptr, FILE_BEGIN);
			bSignatureFound = true;
		}
		else
		{
			file.SetPointer(0, nullptr, FILE_BEGIN);
		}
	}

	if (bSignatureFound)
	{
		bDetect = true;
	}
	else if (bUseHeuristics)
	{
		file.SetPointer(0, nullptr, FILE_BEGIN);
		DWORD Size=0x8000; // BUGBUG. TODO: configurable
		char_ptr Buffer(Size);
		DWORD ReadSize = 0;
		bool ReadResult = file.Read(Buffer.get(), Size, ReadSize);
		file.SetPointer(0, nullptr, FILE_BEGIN);

		if (ReadResult && ReadSize)
		{
			int test=
				IS_TEXT_UNICODE_STATISTICS|
				IS_TEXT_UNICODE_REVERSE_STATISTICS|
				IS_TEXT_UNICODE_CONTROLS|
				IS_TEXT_UNICODE_REVERSE_CONTROLS|
				IS_TEXT_UNICODE_ILLEGAL_CHARS|
				IS_TEXT_UNICODE_ODD_LENGTH|
				IS_TEXT_UNICODE_NULL_BYTES;

			if (IsTextUnicode(Buffer.get(), ReadSize, &test))
			{
				if (!(test&IS_TEXT_UNICODE_ODD_LENGTH) && !(test&IS_TEXT_UNICODE_ILLEGAL_CHARS))
				{
					if ((test&IS_TEXT_UNICODE_NULL_BYTES) || (test&IS_TEXT_UNICODE_CONTROLS) || (test&IS_TEXT_UNICODE_REVERSE_CONTROLS))
					{
						if ((test&IS_TEXT_UNICODE_CONTROLS) || (test&IS_TEXT_UNICODE_STATISTICS))
						{
							nCodePage=CP_UNICODE;
							bDetect=true;
						}
						else if ((test&IS_TEXT_UNICODE_REVERSE_CONTROLS) || (test&IS_TEXT_UNICODE_REVERSE_STATISTICS))
						{
							nCodePage=CP_REVERSEBOM;
							bDetect=true;
						}
					}
				}
			}
			else if (IsTextUTF8(Buffer.get(), ReadSize))
			{
				nCodePage=CP_UTF8;
				bDetect=true;
			}
			else
			{
				int cp = GetCpUsingUniversalDetector(Buffer.get(), ReadSize);
				if ( cp >= 0 )
				{
					if (Global->Opt->strNoAutoDetectCP.Get() == L"-1")
					{
						if ( Global->Opt->CPMenuMode )
						{
							if ( static_cast<UINT>(cp) != GetACP() && static_cast<UINT>(cp) != GetOEMCP() )
							{
								long long selectType = Global->CodePages->GetFavorite(cp);
								if (0 == (selectType & CPST_FAVORITE))
									cp = -1;
							}
						}
					}
					else
					{
						const auto BannedCpList = StringToList(Global->Opt->strNoAutoDetectCP, STLF_UNIQUE);

						if (std::find(ALL_CONST_RANGE(BannedCpList), std::to_wstring(cp)) != BannedCpList.cend())
						{
							cp = -1;
						}
					}
				}

				if (cp != -1)
				{
					nCodePage = cp;
					bDetect = true;
				}
			}
		}
	}

	if (pSignatureFound)
	{
		*pSignatureFound = bSignatureFound;
	}
	return bDetect;
}
Exemplo n.º 8
0
bool FileProcess::LoadClass(std::string strFile, std::string strTable)
{
	tinyxml2::XMLDocument* doc = new tinyxml2::XMLDocument();
	if (NULL == doc)
	{
		return false;
	}
	doc->LoadFile(strFile.c_str());
	auto ff = doc->Value();
	tinyxml2::XMLElement* root = doc->RootElement();
	auto classElement = root->FirstChildElement("Propertys");
	if (classElement)
	{
		auto nodeElement = classElement->FirstChildElement("Property");
		if (nodeElement)
		{
			while (true)
			{
				if (!nodeElement || (std::string)(nodeElement->Attribute("Save")) != "1")
				{
					if (!nodeElement)
					{
						break;
					}
					nodeElement = nodeElement->NextSiblingElement();
					continue;
				}

				std::string strID = nodeElement->Attribute("Id");

				auto chrDesc = nodeElement->Attribute("Desc");
				std::string strDesc = chrDesc;
				auto descLength = strlen(chrDesc);
				if (bConvertIntoUTF8 && IsTextUTF8(chrDesc, descLength))
				{
					if (descLength > 0)
					{
						char* chrArrDesc = new char[descLength];
						Utf8ToGbk((char*)chrDesc, chrArrDesc);
						strDesc = chrArrDesc;
						delete[] chrArrDesc;
					}
				}
				//////////////////////////////////////////////////////////////////////////
				std::string strType = nodeElement->Attribute("Type");

				std::string toWrite = "";
				if (strType == "string")
				{
					toWrite = "ALTER TABLE `" + strTable + "` ADD `" + strID + "` varchar(128) DEFAULT '' COMMENT '" + strDesc + "';";
				}
				else if (strType == "int")
				{
					toWrite = "ALTER TABLE `" + strTable + "` ADD `" + strID + "` bigint(11) DEFAULT '0' COMMENT '" + strDesc + "';";
				}
				else if (strType == "object")
				{
					toWrite = "ALTER TABLE `" + strTable + "` ADD `" + strID + "` varchar(128) DEFAULT '' COMMENT '" + strDesc + "';";
				}
				else if (strType == "float")
				{
					toWrite = "ALTER TABLE `" + strTable + "` ADD `" + strID + "` float(11,3) DEFAULT '0' COMMENT '" + strDesc + "';";
				}
				else
				{
					toWrite = "ALTER TABLE `" + strTable + "` ADD `" + strID + "` varchar(128) DEFAULT '' COMMENT '" + strDesc + "';";
				}
				toWrite += "\n";
				fwrite(toWrite.c_str(), toWrite.length(), 1, mysqlWriter);

				if (!nodeElement)
				{
					break;
				}
				nodeElement = nodeElement->NextSiblingElement();
			}
		}
	}


	auto xRecordsNode = root->FirstChildElement("Records");
	if (xRecordsNode)
	{
		auto nodeElement = xRecordsNode->FirstChildElement("Record");
		if (nodeElement)
		{
			while (true)
			{
				if (!nodeElement || (std::string)(nodeElement->Attribute("Save")) != "1")
				{
					if (!nodeElement)
					{
						break;
					}
					nodeElement = nodeElement->NextSiblingElement();
					continue;
				}

				std::string strID = nodeElement->Attribute("Id");

				auto chrDesc = nodeElement->Attribute("Desc");
				std::string strDesc = chrDesc;
				auto descLength = strlen(chrDesc);
				if (bConvertIntoUTF8 && IsTextUTF8(chrDesc, descLength))
				{
					if (descLength > 0)
					{
						char* chrArrDesc = new char[descLength];
						Utf8ToGbk((char*)chrDesc, chrArrDesc);
						strDesc = chrArrDesc;
						delete[] chrArrDesc;
					}
				}

				std::string toWrite = "ALTER TABLE `" + strTable + "` ADD `" + strID + "` BLOB COMMENT '" + strDesc + "';";
				toWrite += "\n";
				fwrite(toWrite.c_str(), toWrite.length(), 1, mysqlWriter);

				if (nodeElement == classElement->LastChildElement())
				{
					break;
				}
				nodeElement = nodeElement->NextSiblingElement();
			}
		}

	}
	delete doc;
	return true;
}
Exemplo n.º 9
0
bool FileProcess::CreateIniXML(std::string strFile)
{
	std::cout << strFile << std::endl;
	// 打开excel
	MiniExcelReader::ExcelFile* x = new MiniExcelReader::ExcelFile();
	if (!x->open(strFile.c_str()))
	{
		printf("can't open %s\n", strFile.c_str());
		return false;
	}
	////////////////////////////////////////////////////////////////////////////
	// 开始创建xml
	tinyxml2::XMLDocument* iniDoc = new tinyxml2::XMLDocument();
	if (NULL == iniDoc)
	{
		return false;
	}
	//xml声明
	tinyxml2::XMLDeclaration *pDel = iniDoc->NewDeclaration("xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"");
	if (NULL == pDel)
	{
		return false;
	}

	iniDoc->LinkEndChild(pDel);

	// 写入XML root标签
	tinyxml2::XMLElement* root = iniDoc->NewElement("XML");
	iniDoc->LinkEndChild(root);

	// 读取excel中每一个sheet
	std::vector<MiniExcelReader::Sheet>& sheets = x->sheets();
	std::vector<std::string> vColNames;
	std::vector<std::string> vDataIDs;
	std::map<std::string, std::string> mDataValues;
	int nCurrentCol = 0;

	for (MiniExcelReader::Sheet& sh : sheets)
	{
		std::string strSheetName = sh.getName();

		std::string strUpperSheetName = strSheetName.substr(0, 8);
		transform(strUpperSheetName.begin(), strUpperSheetName.end(), strUpperSheetName.begin(), ::tolower);

		if (strUpperSheetName != "property")
		{
			continue;
		}

		const MiniExcelReader::Range& dim = sh.getDimension();

		for (int c = dim.firstCol; c <= dim.lastCol; c++)
		{
			MiniExcelReader::Cell* cell = sh.getCell(dim.firstRow, c);
			if (cell)
			{
				vColNames.push_back(cell->value);
			}
		}

		if (vDataIDs.size() <= 0)
		{
			for (int r = dim.firstRow + 8; r <= dim.lastRow; r++)
			{
				MiniExcelReader::Cell* cell = sh.getCell(r, dim.firstCol);
				if (cell)
				{
					if (cell->value.length() > 0)
					{
						vDataIDs.push_back(cell->value);
					}
				}
			}
		}

		for (int r = dim.firstRow + 8; r <= vDataIDs.size() + 8; r++)
		{
			std::string testValue = "";
			MiniExcelReader::Cell* cell = sh.getCell(r, dim.firstCol);

			for (int c = dim.firstCol; c <= dim.lastCol; c++)
			{
				std::string name = vColNames[c - 1 + nCurrentCol];
				std::string value = "";
				MiniExcelReader::Cell* cell = sh.getCell(r, c);
				std::string vType = sh.getCell(dim.firstRow + 1, c)->value;
				if (cell)
				{
					std::string valueCell = cell->value;
					transform(valueCell.begin(), valueCell.end(), valueCell.begin(), ::toupper);
					if (valueCell == "TRUE" || valueCell == "FALSE")
					{
						value = valueCell == "TRUE" ? 1 : 0;
					}
					else
					{
						value = cell->value;
						if (value.size() <= 0)
						{
							if (vType == "int" || vType == "float")
							{
								value = "0";
							}
							else
							{
								value = "";
							}
						}
					}
				}
				else
				{
					if (vType == "int" || vType == "float")
					{
						value = "0";
					}
					else
					{
						value = "";
					}
				}
				//check the field is utf8, then convert it into gbk.
				auto descLength = value.size();
				if (bConvertIntoUTF8 && IsTextUTF8(value.c_str(), descLength))
				{
					if (descLength > 0)
					{
						char* chrArrDesc = new char[descLength];
						Utf8ToGbk((char*)value.c_str(), chrArrDesc);
						value = chrArrDesc;
						delete[] chrArrDesc;
					}
				}
				mDataValues.insert(std::pair<string, string>(vDataIDs[r - 9] + name, value));
			}
		}
		nCurrentCol += dim.lastCol;
	}

	int nDataCount = 0;
	if (strFile.find("NPC") > 0 && strFile.find("NPC") < strFile.size())
	{
		int a = 0;
	}
	for (auto strID : vDataIDs)
	{
		auto objectNode = iniDoc->NewElement("Object");
		root->LinkEndChild(objectNode);
		for (auto strColName : vColNames)
		{
			if (strColName == "Id")
			{
				const char* chrID = objectNode->Attribute("Id");
				if (!chrID)
				{
					objectNode->SetAttribute(strColName.c_str(), mDataValues[strID + strColName].c_str());
				}
			}
			else
			{
				objectNode->SetAttribute(strColName.c_str(), mDataValues[strID + strColName].c_str());
			}
			nDataCount++;
		}
	}

	////////////////////////////////////////////////////////////////////////////
	// 保存文件
	int nLastPoint = strFile.find_last_of(".") + 1;
	int nLastSlash = strFile.find_last_of("/") + 1;
	std::string strFileName = strFile.substr(nLastSlash, nLastPoint - nLastSlash - 1);
	std::string strFileExt = strFile.substr(nLastPoint, strFile.length() - nLastPoint);

	std::string strXMLFile = strToolBasePath + strXMLIniPath + strFileName;
	if (nCipher > 0)
	{
		strXMLFile += ".NF";
	}
	else
	{
		strXMLFile += ".xml";
	}

	iniDoc->SetBOM(false);
	iniDoc->SaveFile(strXMLFile.c_str());
	delete iniDoc;
	delete x;
	return true;
}