bool GetFileFormat(File& file, UINT& nCodePage, bool* pSignatureFound, bool bUseHeuristics) { DWORD dwTemp=0; bool bSignatureFound = false; bool bDetect=false; DWORD Readed = 0; if (file.Read(&dwTemp, sizeof(dwTemp), Readed) && Readed > 1 ) // minimum signature size is 2 bytes { if (LOWORD(dwTemp) == SIGN_UNICODE) { nCodePage = CP_UNICODE; file.SetPointer(2, nullptr, FILE_BEGIN); bSignatureFound = true; } else if (LOWORD(dwTemp) == SIGN_REVERSEBOM) { nCodePage = CP_REVERSEBOM; file.SetPointer(2, nullptr, FILE_BEGIN); bSignatureFound = true; } else if ((dwTemp & 0x00FFFFFF) == SIGN_UTF8) { nCodePage = CP_UTF8; file.SetPointer(3, nullptr, FILE_BEGIN); bSignatureFound = true; } else { file.SetPointer(0, nullptr, FILE_BEGIN); } } if (bSignatureFound) { bDetect = true; } else if (bUseHeuristics) { file.SetPointer(0, nullptr, FILE_BEGIN); DWORD Size=0x8000; // BUGBUG. TODO: configurable LPVOID Buffer=xf_malloc(Size); DWORD ReadSize = 0; bool ReadResult = file.Read(Buffer, Size, ReadSize); file.SetPointer(0, nullptr, FILE_BEGIN); if (ReadResult && ReadSize) { int test= IS_TEXT_UNICODE_STATISTICS| IS_TEXT_UNICODE_REVERSE_STATISTICS| IS_TEXT_UNICODE_CONTROLS| IS_TEXT_UNICODE_REVERSE_CONTROLS| IS_TEXT_UNICODE_ILLEGAL_CHARS| IS_TEXT_UNICODE_ODD_LENGTH| IS_TEXT_UNICODE_NULL_BYTES; if (IsTextUnicode(Buffer, ReadSize, &test)) { if (!(test&IS_TEXT_UNICODE_ODD_LENGTH) && !(test&IS_TEXT_UNICODE_ILLEGAL_CHARS)) { if ((test&IS_TEXT_UNICODE_NULL_BYTES) || (test&IS_TEXT_UNICODE_CONTROLS) || (test&IS_TEXT_UNICODE_REVERSE_CONTROLS)) { if ((test&IS_TEXT_UNICODE_CONTROLS) || (test&IS_TEXT_UNICODE_STATISTICS)) { nCodePage=CP_UNICODE; bDetect=true; } else if ((test&IS_TEXT_UNICODE_REVERSE_CONTROLS) || (test&IS_TEXT_UNICODE_REVERSE_STATISTICS)) { nCodePage=CP_REVERSEBOM; bDetect=true; } } } } else if (IsTextUTF8(static_cast<LPBYTE>(Buffer), ReadSize)) { nCodePage=CP_UTF8; bDetect=true; } else { nsUniversalDetectorEx *ns = new nsUniversalDetectorEx(); ns->HandleData(static_cast<LPCSTR>(Buffer), ReadSize); ns->DataEnd(); int cp = ns->getCodePage(); if ( cp >= 0 ) { const wchar_t *deprecated = Opt.strNoAutoDetectCP.CPtr(); if ( 0 == wcscmp(deprecated, L"-1") ) { if ( Opt.CPMenuMode ) { if ( static_cast<UINT>(cp) != GetACP() && static_cast<UINT>(cp) != GetOEMCP() ) { int selectType = 0; wchar_t szcp[16]; _snwprintf(szcp, ARRAYSIZE(szcp), L"%d", cp); GeneralCfg->GetValue(FavoriteCodePagesKey, szcp, &selectType, 0); if (0 == (selectType & CPST_FAVORITE)) cp = -1; } } } else { while (*deprecated) { while (*deprecated && (*deprecated < L'0' || *deprecated > L'9')) ++deprecated; int dp = (int)wcstol(deprecated, (wchar_t **)&deprecated, 0); if (cp == dp) { cp = -1; break; } } } } if (cp != -1) { nCodePage = cp; bDetect = true; } delete ns; } } xf_free(Buffer); } if (pSignatureFound) { *pSignatureFound = bSignatureFound; } return bDetect; }
CString GetServerDetailInfo(CString strServer, CString &strTitle) { strServer.Insert(0, "http://"); CString strReturn = "Unknown", szAllData, szData; CInternetSession ss(_T("session"), 0, INTERNET_OPEN_TYPE_PRECONFIG, NULL, NULL, INTERNET_FLAG_DONT_CACHE | INTERNET_FLAG_RELOAD);//设置不缓冲 CHttpFile *pF = NULL; try { ss.SetOption(INTERNET_OPTION_CONNECT_TIMEOUT, 10 * 1000); pF = (CHttpFile *)ss.OpenURL(strServer, 1, INTERNET_FLAG_EXISTING_CONNECT | INTERNET_FLAG_TRANSFER_ASCII | INTERNET_FLAG_NO_AUTO_REDIRECT); pF->QueryInfo(HTTP_QUERY_RAW_HEADERS_CRLF, szAllData, 0); int nPos1 = szAllData.Find("Server: ", 0); if (nPos1 == -1) { strReturn = "Unknown"; } else { nPos1 += 8; int nPos2 = szAllData.Find("\r\n", nPos1); strReturn = szAllData.Mid(nPos1, nPos2 - nPos1); if (strReturn.Find("Microsoft", 0) != -1) { strReturn.Insert(0, "Windows, "); } } szAllData.Empty(); while(pF->ReadString(szData)) szAllData += szData; szAllData.MakeLower(); nPos1 = szAllData.Find("<title>", 0); if (nPos1 != -1) { int nPos2 = szAllData.Find("</title>", nPos1); if (nPos2 != -1) { strTitle = szAllData.Mid(nPos1 + 7, nPos2 - nPos1 - 7); if (IsTextUTF8(strTitle.GetBuffer(0), strTitle.GetLength())) Utf8ToAnsi(strTitle); } } } catch(...) { strReturn = ""; } if (pF != NULL) { pF->Close(); delete pF; pF = NULL; } if (ss != NULL) { ss.Close(); delete ss; } return strReturn; }
bool GetFileFormat( api::fs::file& file, uintptr_t& nCodePage, bool* pSignatureFound, bool bUseHeuristics, bool* pPureAscii) { DWORD dwTemp = 0; bool bSignatureFound = false; bool bDetect = false; bool bPureAscii = false; size_t Readed = 0; if (file.Read(&dwTemp, sizeof(dwTemp), Readed) && Readed > 1 ) // minimum signature size is 2 bytes { if (LOWORD(dwTemp) == SIGN_UNICODE) { nCodePage = CP_UNICODE; file.SetPointer(2, nullptr, FILE_BEGIN); bSignatureFound = true; } else if (LOWORD(dwTemp) == SIGN_REVERSEBOM) { nCodePage = CP_REVERSEBOM; file.SetPointer(2, nullptr, FILE_BEGIN); bSignatureFound = true; } else if ((dwTemp & 0x00FFFFFF) == SIGN_UTF8) { nCodePage = CP_UTF8; file.SetPointer(3, nullptr, FILE_BEGIN); bSignatureFound = true; } else { file.SetPointer(0, nullptr, FILE_BEGIN); } } if (bSignatureFound) { bDetect = true; } else if (bUseHeuristics) { file.SetPointer(0, nullptr, FILE_BEGIN); size_t Size = 0x8000; // BUGBUG. TODO: configurable char_ptr Buffer(Size); size_t ReadSize = 0; bool ReadResult = file.Read(Buffer.get(), Size, ReadSize); file.SetPointer(0, nullptr, FILE_BEGIN); bPureAscii = ReadResult && !ReadSize; // empty file == pure ascii if (ReadResult && ReadSize) { // BUGBUG MSDN documents IS_TEXT_UNICODE_BUFFER_TOO_SMALL but there is no such thing if (ReadSize > 1) { int test = IS_TEXT_UNICODE_UNICODE_MASK | IS_TEXT_UNICODE_REVERSE_MASK | IS_TEXT_UNICODE_NOT_UNICODE_MASK | IS_TEXT_UNICODE_NOT_ASCII_MASK; IsTextUnicode(Buffer.get(), static_cast<int>(ReadSize), &test); // return value is ignored, it's ok. if (!(test & IS_TEXT_UNICODE_NOT_UNICODE_MASK) && (test & IS_TEXT_UNICODE_NOT_ASCII_MASK)) { if (test & IS_TEXT_UNICODE_UNICODE_MASK) { nCodePage = CP_UNICODE; bDetect = true; } else if (test & IS_TEXT_UNICODE_REVERSE_MASK) { nCodePage = CP_REVERSEBOM; bDetect = true; } } if (!bDetect && IsTextUTF8(Buffer.get(), ReadSize, bPureAscii)) { nCodePage = CP_UTF8; bDetect = true; } } if (!bDetect && !bPureAscii) { int cp = GetCpUsingUniversalDetector(Buffer.get(), ReadSize); if ( cp >= 0 ) { if (Global->Opt->strNoAutoDetectCP.Get() == L"-1") { if ( Global->Opt->CPMenuMode ) { if ( static_cast<UINT>(cp) != GetACP() && static_cast<UINT>(cp) != GetOEMCP() ) { long long selectType = Codepages().GetFavorite(cp); if (0 == (selectType & CPST_FAVORITE)) cp = -1; } } } else { std::vector<string> BannedCpList; split(BannedCpList, Global->Opt->strNoAutoDetectCP, STLF_UNIQUE); if (std::find(ALL_CONST_RANGE(BannedCpList), std::to_wstring(cp)) != BannedCpList.cend()) { cp = -1; } } } if (cp != -1) { nCodePage = cp; bDetect = true; } } } } if (pSignatureFound) *pSignatureFound = bSignatureFound; if (pPureAscii) *pPureAscii = bPureAscii; return bDetect; }
bool OldGetFileFormat(FILE *file, UINT &nCodePage, bool *pSignatureFound, bool bUseHeuristics) { DWORD dwTemp=0; bool bSignatureFound = false; bool bDetect=false; if (fread(&dwTemp, 1, 4, file)) { if (LOWORD(dwTemp) == SIGN_UNICODE) { nCodePage = CP_UNICODE; fseek(file, 2, SEEK_SET); bSignatureFound = true; } else if (LOWORD(dwTemp) == SIGN_REVERSEBOM) { nCodePage = CP_REVERSEBOM; fseek(file, 2, SEEK_SET); bSignatureFound = true; } else if ((dwTemp & 0x00FFFFFF) == SIGN_UTF8) { nCodePage = CP_UTF8; fseek(file, 3, SEEK_SET); bSignatureFound = true; } else fseek(file, 0, SEEK_SET); } if (bSignatureFound) { bDetect = true; } else if (bUseHeuristics) { fseek(file, 0, SEEK_SET); size_t sz=0x8000; // BUGBUG. TODO: configurable LPVOID Buffer=xf_malloc(sz); sz=fread(Buffer,1,sz,file); fseek(file,0,SEEK_SET); if (sz) { int test= IS_TEXT_UNICODE_STATISTICS| IS_TEXT_UNICODE_REVERSE_STATISTICS| IS_TEXT_UNICODE_CONTROLS| IS_TEXT_UNICODE_REVERSE_CONTROLS| IS_TEXT_UNICODE_ILLEGAL_CHARS| IS_TEXT_UNICODE_ODD_LENGTH| IS_TEXT_UNICODE_NULL_BYTES; if (IsTextUnicode(Buffer, (int)sz, &test)) { if (!(test&IS_TEXT_UNICODE_ODD_LENGTH) && !(test&IS_TEXT_UNICODE_ILLEGAL_CHARS)) { if ((test&IS_TEXT_UNICODE_NULL_BYTES) || (test&IS_TEXT_UNICODE_CONTROLS) || (test&IS_TEXT_UNICODE_REVERSE_CONTROLS)) { if ((test&IS_TEXT_UNICODE_CONTROLS) || (test&IS_TEXT_UNICODE_STATISTICS)) { nCodePage=CP_UNICODE; bDetect=true; } else if ((test&IS_TEXT_UNICODE_REVERSE_CONTROLS) || (test&IS_TEXT_UNICODE_REVERSE_STATISTICS)) { nCodePage=CP_REVERSEBOM; bDetect=true; } } } } else if (IsTextUTF8((const LPBYTE)Buffer, sz)) { nCodePage=CP_UTF8; bDetect=true; } else { nsUniversalDetectorEx *ns = new nsUniversalDetectorEx(); ns->HandleData((const char*)Buffer,(PRUint32)sz); ns->DataEnd(); int cp = ns->getCodePage(); if (cp != -1) { nCodePage = cp; bDetect = true; } delete ns; } } xf_free(Buffer); } if (pSignatureFound) *pSignatureFound = bSignatureFound; return bDetect; }
bool CCharsetDetector::DoIt(unsigned long dwParserType, char* pBuf, unsigned long dwLength) { //I. "��������(elimination logic)����С�������ȷ���ַ���"��http://www.renpeicheng.com/plus/view-2619-1.html�� //float fTemp = 0.0; unsigned long dwTempIdx = 0; unsigned long dwTempOrder = 0; unsigned long dwSamiNoiseLen = 0; unsigned long dwMicroDvdLen = 0; unsigned long dwSubViewLen = 0; unsigned long dwTimeLineNoiseLen = 0; unsigned long dwTempPostion = 0; bool bJISSecondChar = true; Reset(); if (IsTextUTF8(pBuf, dwLength)) { m_pSMUTF_8Info->dwDetectState = EDetectStateMe; m_bDone = true; goto EXIT; } for (long i = 0; i < dwLength; i++) { char b = pBuf[i]; char c = 0x00; if (5 == dwParserType)// 5 means EParserTypeSmi. { dwSamiNoiseLen = IsSamiNoise(pBuf, i, dwLength); if (dwSamiNoiseLen > 0) { i += dwSamiNoiseLen - 1; continue; } } dwMicroDvdLen = IsMicroDvdNoise(pBuf, i, dwLength); if (dwMicroDvdLen > 0) { i += dwMicroDvdLen; continue; } //dwTimeLineNoiseLen = ISTimeLineNoise(pBuf, i, dwLength); //if (dwTimeLineNoiseLen > 0) //{ // i+=dwTimeLineNoiseLen - 1; // continue; //} if (dwLength == (i - 1)) { c = 0x00; } else { c = pBuf[i + 1]; } if ((b&0xff) > 0xA0) { if (i > (dwTempIdx + 1)) { m_bSecondChar = false; } if (true == m_bSecondChar) { m_fGb2312 += m_StatisticsArray[8]->SecondByteFreq()[(unsigned long)((b&0xff) - 0xA0)] * 0.5; m_fGbEucKR += m_StatisticsArray[4]->SecondByteFreq()[(unsigned long)((b&0xff) - 0xA0)] * 0.5; m_fBig5 += m_StatisticsArray[6]->SecondByteFreq()[(unsigned long)((b&0xff) - 0xA0)] * 0.5; //************************************************************************************** // simplified chinese. // first byte range: 0xb0 -- 0xfe // second byte range: 0xa1 -- 0xfe if (i > 0) { if ((unsigned long)(pBuf[i-1]&0xff) >= 0xb0 && (unsigned long)(b&0xff) >= 0xa1) { m_dwGB2312Count++; dwTempOrder = 94*((unsigned long)(pBuf[i-1]&0xff)-(unsigned long)0xb0) + (unsigned long)(b&0xff)- (unsigned long)0xa1; } else dwTempOrder = -1; } if (dwTempOrder < GB2312_TABLE_SIZE) { if (GB2312CharToFreqOrder[dwTempOrder] < 1024) { m_dwGB2312OccurrenceCharCount++; } } //************************************************************************************** // big5 // first byte range: 0xa4 -- 0xfe // second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe if (i > 0) { if ((unsigned long)(pBuf[i-1]&0xff) >= 0xa4 && (unsigned long)(b&0xff) >= 0xa1) { m_dwBIG5Count++; dwTempOrder = 157 * ((unsigned long)(pBuf[i-1]&0xff)-(unsigned long)0xa4) + (unsigned long)(b&0xff)- (unsigned long)0xa1 + 63; } else dwTempOrder = -1; } if (dwTempOrder < BIG5_TABLE_SIZE) { if (Big5CharToFreqOrder[dwTempOrder] < 1024) { m_dwBIG5OccurrenceCharCount++; } } //************************************************************************************** // EUC-KR // first byte range: 0xb0 -- 0xfe // second byte range: 0xa1 -- 0xfe if (i > 0) { if ((unsigned long)(pBuf[i-1]&0xff) >= 0xb0 && (unsigned long)(b&0xff) >= 0xa1) { m_dwEUCKRCount++; dwTempOrder = 94 * ((unsigned long)(pBuf[i-1]&0xff)-(unsigned long)0xb0) + (unsigned long)(b&0xff)- (unsigned long)0xa1; } else dwTempOrder = -1; } if (dwTempOrder < EUCKR_TABLE_SIZE) { if (EUCKRCharToFreqOrder[dwTempOrder] < 1024) { m_dwEUCKROccurrenceCharCount++; } } //************************************************************************************** m_dwCharacterCount++; m_bSecondChar = false; } else { m_fGb2312 += m_StatisticsArray[8]->SecondByteFreq()[(unsigned long)((b&0xff) - 0xA0)] * 0.5; m_fGbEucKR += m_StatisticsArray[4]->SecondByteFreq()[(unsigned long)((b&0xff) - 0xA0)] * 0.5; m_fBig5 += m_StatisticsArray[6]->SecondByteFreq()[(unsigned long)((b&0xff) - 0xA0)] * 0.5; if (((b&0xff) > 0xb0 && (b&0xff) < 0xca) && (c&0xff) > 0xa0) //most korean character 16~40(+ A0) qzone { m_dwKRCount++; } else if ((0xa4 == (b&0xff) || 0xa5 == (b&0xff)) && (c&0xff) > 0xa0)//chinese and euc-jp zone for kana { m_dwZHJPCount++; } else if ((0xaa == (b&0xff) || 0xab == (b&0xff)) && (c&0xff) > 0xa0)//korean zone for kana { m_dwKRJPCount++; } m_bSecondChar = true; dwTempIdx = i; } } else if ((b&0xff) >= 0x40 && true == m_bSecondChar) { //************************************************************************************** // big5 // first byte range: 0xa4 -- 0xfe // second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe if (i > 0) { if ((unsigned long)(pBuf[i-1]&0xff) >= 0xa4 && (unsigned long)(b&0xff) >= 0x40) { m_dwBIG5Count++; dwTempOrder = 157 * ((unsigned long)(pBuf[i-1]&0xff)-(unsigned long)0xa4) + (unsigned long)(b&0xff)- (unsigned long)0x40; } else dwTempOrder = -1; } if (dwTempOrder < BIG5_TABLE_SIZE) { if (Big5CharToFreqOrder[dwTempOrder] < 1024) { m_dwBIG5OccurrenceCharCount++; } } //************************************************************************************** m_dwBig5OnlyCount++; m_bSecondChar = false; } else if (((b&0xff) == 0x82 &&(c&0xff) <=0xf1 && (c&0xff) >=0x9f) || ((b&0xff) == 0x83 &&(c&0xff) <=0x96 && (c&0xff) >=0x40)) { m_dwJISJPCount++; } //************************************************************************************************************ //for sjis encoding // first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe // second byte range: 0x40 -- 0x7e, 0x81 -- oxfe if (i - dwTempPostion > 1) { bJISSecondChar = true; } if ((((b&0xff) >= 0x81 &&(b&0xff) <=0x9f) || ((b&0xff) >= 0xe0 &&(b&0xff) <=0xfe)) && (((c&0xff) >= 0x40 &&(c&0xff) <=0x7e) || ((c&0xff) >= 0x81 &&(c&0xff) <=0xfe)) && true == bJISSecondChar) { m_dwJISCount++; bJISSecondChar = false; dwTempPostion = i; if (((b&0xff) >= 0x81 &&(b&0xff) <=0x9f)) { dwTempOrder = 188 * ((unsigned long)(b&0xff) - (unsigned long)0x81) +(unsigned long)(c&0xff) - 0x40; } else if (((b&0xff) >= 0xe0 &&(b&0xff) <=0xfe)) { dwTempOrder = 188 * ((unsigned long)(b&0xff) - (unsigned long)0xe0 + 31) +(unsigned long)(c&0xff) - 0x40; } else { dwTempOrder = -1; } if (dwTempOrder < JIS_TABLE_SIZE) { if (JISCharToFreqOrder[dwTempOrder] < 1024) { m_dwJISOccurrenceCharCount++; } } } /****************************************************** /#The emphasis of following charset detector algorithm is <<CONFIDENCE>>. /#Confidence is a float value, if one's confidence is more than 0.65(found the right charset). /#Set the EDetectStateMe, and get out this function. ******************************************************/ //************************************************************************************************************ /*for windows-1252 encoding*/ if(!m_pSBWIN_1252Info->dwState) m_pSBWIN_1252Info->dwState = DetectWIN1252Codepage(&Win1252Model, pBuf[i], m_pSBWIN_1252Info); if (EDetectStateMe == m_pSBWIN_1252Info->dwState) { break; } //************************************************************************************************************ //for koir-8 encoding //if character is not a symbol or punctuation character if (EDetectStateOn == m_pSBKOIR_8Info->dwState) m_pSBKOIR_8Info->dwState = DetectSingleByteCodepage(&Koi8rModel, pBuf[i], m_pSBKOIR_8Info); if (EDetectStateMe == m_pSBKOIR_8Info->dwState) { break; } //************************************************************************************************************ //for win-1251 encoding if(!m_pSBWIN_1251Info->dwState) m_pSBWIN_1251Info->dwState = DetectSingleByteCodepage(&Win1251Model, pBuf[i], m_pSBWIN_1251Info); if (EDetectStateMe == m_pSBWIN_1251Info->dwState) { break; } //************************************************************************************************************ /*for iso-1859-2 encoding*/ if(!m_pSBISO8859_2Info->dwState) m_pSBISO8859_2Info->dwState = DetectSingleByteCodepage(&Latin2HungarianModel, pBuf[i], m_pSBISO8859_2Info); //************************************************************************************************************ /*for win-1250 encoding*/ if(!m_pSBWIN_1250Info->dwState) m_pSBWIN_1250Info->dwState = DetectSingleByteCodepage(&Win1250HungarianModel, pBuf[i], m_pSBWIN_1250Info); /*windows-1250 is very likely to iso-8859-2*/ if (EDetectStateMe == m_pSBWIN_1250Info->dwState || EDetectStateMe == m_pSBISO8859_2Info->dwState) { break; } //************************************************************************************************************ //for win-1253 encoding if(!m_pSBWIN_1253Info->dwState) m_pSBWIN_1253Info->dwState = DetectSingleByteCodepage(&Win1253Model, pBuf[i], m_pSBWIN_1253Info); //************************************************************************************************************ //for iso-8859-7 encoding if(!m_pSBISO8859_7Info->dwState) m_pSBISO8859_7Info->dwState = DetectSingleByteCodepage(&Latin7Model, pBuf[i], m_pSBISO8859_7Info); /*windwos-1253 is very likely to iso-8859-7*/ if (EDetectStateMe == m_pSBISO8859_7Info->dwState || EDetectStateMe == m_pSBWIN_1253Info->dwState) { break; } if (!m_pSMUTF_8Info->dwDetectState) m_pSMUTF_8Info->dwDetectState = DetectCodingStateMachine(&UTF8SMModel, pBuf[i], m_pSMUTF_8Info); /*************************************************************************************************************/ /*====START filter charset.*/ /*for almost the same win-1250 and iso-1859-2: *Because iso-8859-2 may not use characters between 128 with 159, so I count it.*/ if ( (unsigned char)pBuf[i]>= 128 && (unsigned char)pBuf[i] <= 159) { m_dwWINvsISO8859Count++; } /*====END of filter charset.*/ for (long j = 0; j < m_dwItems; ) { unsigned char st = ICharsetVerifier::getNextState(m_VerifierArray[m_dwItemIdx[j]], b, m_State[j]); if (st == 2) { //Bingo to detector the charset. if (ECodepageGB18030 == m_VerifierArray[m_dwItemIdx[j]]->Charset() && i < 2000) { j++; continue; } m_eCodepageType = m_VerifierArray[m_dwItemIdx[j]]->Charset(); m_bDone = true; goto EXIT; } if (st == 1) { //filter if (j < m_dwItems) { m_dwItems--; m_dwItemIdx[j] = m_dwItemIdx[m_dwItems]; m_State[j] = m_State[m_dwItems]; } } else { m_State[j++] = st; } } if (m_dwItems <= 1) { if ((i <= dwLength - 1) && i < 2000) { continue; } if (1 == m_dwItems) { m_eCodepageType = m_VerifierArray[m_dwItemIdx[0]]->Charset(); } m_bDone = true; goto EXIT; } long nonUCS2Num = 0; long nonUCS2Idx = 0; for (long j = 0; j < m_dwItems; j++) { if (!m_VerifierArray[m_dwItemIdx[j]]->IsUCS2() && !m_VerifierArray[m_dwItemIdx[j]]->IsUCS2()) { nonUCS2Num++; nonUCS2Idx = j; } } if (1 == nonUCS2Num) { if ((i <= dwLength - 1) && i < 2000) { continue; } m_eCodepageType = m_VerifierArray[m_dwItemIdx[nonUCS2Idx]]->Charset(); m_bDone = true; goto EXIT; } } //m_bRunSampler = true; if (m_bRunSampler) { Sample(pBuf, dwLength); // m_bRunSampler = false; } EXIT: m_bSecondChar = false; return m_bDone; }
Result AppList::recvFromServer(Json::Value jsonObj) { if (jsonObj.isMember("method")) { std::string str_method = jsonObj["method"].asString(); if (str_method == "BasicCommunication.OnAppRegistered") { newAppRegistered(jsonObj); m_pUIManager->onAppShow(ID_APPLINK); }else if (str_method == "BasicCommunication.OnAppUnregistered") { int appID = jsonObj["params"]["appID"].asInt(); m_pUIManager->onAppUnregister(appID); }else if (str_method == "VR.VRExitApp") { m_pUIManager->tsSpeak(ID_EXIT, "退出"+ m_pCurApp->m_szAppName); m_pUIManager->onAppShow(ID_APPLINK); }else if (str_method == "Navigation.StopStream") { m_pUIManager->onVideoStreamStop(); ShowPreviousUI(); }else if (str_method == "VR.VRSwitchApp") { // { // "jsonrpc" : "2.0", // "method" : "VR.VRSwitchApp", // "params" : { // "appID" : 18467, // "appVRName" : "百度 " // } // } std::string strAppVRName = jsonObj["params"]["appVRName"].asString(); if (!IsTextUTF8((char *)strAppVRName.data(),strAppVRName.size())) strAppVRName = string_To_UTF8(strAppVRName); m_pUIManager->tsSpeak(ID_SWITCHAPP, strAppVRName); int iNewID = jsonObj["params"]["appID"].asInt(); if (m_pCurApp->m_iAppID != iNewID) { std::vector <AppData *>::iterator i; for (i = m_AppDatas.begin(); i != m_AppDatas.end(); ++i) { if (iNewID == (*i)->m_iAppID) { m_pCurApp = *i; m_pUIManager->onAppShow(m_pCurApp->getCurUI()); break; } } } }else if (str_method == "UI.SetAppIcon") { int iAppId = jsonObj["params"]["appID"].asInt(); std::vector <AppData *>::iterator Iter = m_AppDatas.begin(); while (Iter != m_AppDatas.end()) { if (iAppId == (*Iter)->m_iAppID) { QUrl iconPathUrl(jsonObj["params"]["syncFileName"]["value"].asString().c_str()); #if defined(WINCE) (*Iter)->m_strAppIconFilePath = ChangeSlash(iconPathUrl.path().toLocal8Bit().data()); #else (*Iter)->m_strAppIconFilePath = ChangeSlash(iconPathUrl.path().toStdString()); #endif if (m_pCurApp == NULL) { m_pUIManager->onAppShow(ID_APPLINK); } //m_pUIManager->onAppShow(m_pCurApp->getCurUI()); break; } ++Iter; } }else if (str_method == "UI.EndAudioPassThru") { ToSDL->OnVRCancelRecord(); m_pUIManager->OnEndAudioPassThru(); return RESULT_SUCCESS; }else if (str_method == "BasicCommunication.UpdateDeviceList") { // add by fanqiang updateDeiveList(jsonObj); m_pUIManager->ShowDeviceList(); } else { if (m_pCurApp) return m_pCurApp->recvFromServer(jsonObj); else return RESULT_APPLICATION_NOT_REGISTERED; } return RESULT_SUCCESS; } return RESULT_SUCCESS; }
bool GetFileFormat(api::File& file, uintptr_t& nCodePage, bool* pSignatureFound, bool bUseHeuristics) { DWORD dwTemp=0; bool bSignatureFound = false; bool bDetect=false; DWORD Readed = 0; if (file.Read(&dwTemp, sizeof(dwTemp), Readed) && Readed > 1 ) // minimum signature size is 2 bytes { if (LOWORD(dwTemp) == SIGN_UNICODE) { nCodePage = CP_UNICODE; file.SetPointer(2, nullptr, FILE_BEGIN); bSignatureFound = true; } else if (LOWORD(dwTemp) == SIGN_REVERSEBOM) { nCodePage = CP_REVERSEBOM; file.SetPointer(2, nullptr, FILE_BEGIN); bSignatureFound = true; } else if ((dwTemp & 0x00FFFFFF) == SIGN_UTF8) { nCodePage = CP_UTF8; file.SetPointer(3, nullptr, FILE_BEGIN); bSignatureFound = true; } else { file.SetPointer(0, nullptr, FILE_BEGIN); } } if (bSignatureFound) { bDetect = true; } else if (bUseHeuristics) { file.SetPointer(0, nullptr, FILE_BEGIN); DWORD Size=0x8000; // BUGBUG. TODO: configurable char_ptr Buffer(Size); DWORD ReadSize = 0; bool ReadResult = file.Read(Buffer.get(), Size, ReadSize); file.SetPointer(0, nullptr, FILE_BEGIN); if (ReadResult && ReadSize) { int test= IS_TEXT_UNICODE_STATISTICS| IS_TEXT_UNICODE_REVERSE_STATISTICS| IS_TEXT_UNICODE_CONTROLS| IS_TEXT_UNICODE_REVERSE_CONTROLS| IS_TEXT_UNICODE_ILLEGAL_CHARS| IS_TEXT_UNICODE_ODD_LENGTH| IS_TEXT_UNICODE_NULL_BYTES; if (IsTextUnicode(Buffer.get(), ReadSize, &test)) { if (!(test&IS_TEXT_UNICODE_ODD_LENGTH) && !(test&IS_TEXT_UNICODE_ILLEGAL_CHARS)) { if ((test&IS_TEXT_UNICODE_NULL_BYTES) || (test&IS_TEXT_UNICODE_CONTROLS) || (test&IS_TEXT_UNICODE_REVERSE_CONTROLS)) { if ((test&IS_TEXT_UNICODE_CONTROLS) || (test&IS_TEXT_UNICODE_STATISTICS)) { nCodePage=CP_UNICODE; bDetect=true; } else if ((test&IS_TEXT_UNICODE_REVERSE_CONTROLS) || (test&IS_TEXT_UNICODE_REVERSE_STATISTICS)) { nCodePage=CP_REVERSEBOM; bDetect=true; } } } } else if (IsTextUTF8(Buffer.get(), ReadSize)) { nCodePage=CP_UTF8; bDetect=true; } else { int cp = GetCpUsingUniversalDetector(Buffer.get(), ReadSize); if ( cp >= 0 ) { if (Global->Opt->strNoAutoDetectCP.Get() == L"-1") { if ( Global->Opt->CPMenuMode ) { if ( static_cast<UINT>(cp) != GetACP() && static_cast<UINT>(cp) != GetOEMCP() ) { long long selectType = Global->CodePages->GetFavorite(cp); if (0 == (selectType & CPST_FAVORITE)) cp = -1; } } } else { const auto BannedCpList = StringToList(Global->Opt->strNoAutoDetectCP, STLF_UNIQUE); if (std::find(ALL_CONST_RANGE(BannedCpList), std::to_wstring(cp)) != BannedCpList.cend()) { cp = -1; } } } if (cp != -1) { nCodePage = cp; bDetect = true; } } } } if (pSignatureFound) { *pSignatureFound = bSignatureFound; } return bDetect; }
bool FileProcess::LoadClass(std::string strFile, std::string strTable) { tinyxml2::XMLDocument* doc = new tinyxml2::XMLDocument(); if (NULL == doc) { return false; } doc->LoadFile(strFile.c_str()); auto ff = doc->Value(); tinyxml2::XMLElement* root = doc->RootElement(); auto classElement = root->FirstChildElement("Propertys"); if (classElement) { auto nodeElement = classElement->FirstChildElement("Property"); if (nodeElement) { while (true) { if (!nodeElement || (std::string)(nodeElement->Attribute("Save")) != "1") { if (!nodeElement) { break; } nodeElement = nodeElement->NextSiblingElement(); continue; } std::string strID = nodeElement->Attribute("Id"); auto chrDesc = nodeElement->Attribute("Desc"); std::string strDesc = chrDesc; auto descLength = strlen(chrDesc); if (bConvertIntoUTF8 && IsTextUTF8(chrDesc, descLength)) { if (descLength > 0) { char* chrArrDesc = new char[descLength]; Utf8ToGbk((char*)chrDesc, chrArrDesc); strDesc = chrArrDesc; delete[] chrArrDesc; } } ////////////////////////////////////////////////////////////////////////// std::string strType = nodeElement->Attribute("Type"); std::string toWrite = ""; if (strType == "string") { toWrite = "ALTER TABLE `" + strTable + "` ADD `" + strID + "` varchar(128) DEFAULT '' COMMENT '" + strDesc + "';"; } else if (strType == "int") { toWrite = "ALTER TABLE `" + strTable + "` ADD `" + strID + "` bigint(11) DEFAULT '0' COMMENT '" + strDesc + "';"; } else if (strType == "object") { toWrite = "ALTER TABLE `" + strTable + "` ADD `" + strID + "` varchar(128) DEFAULT '' COMMENT '" + strDesc + "';"; } else if (strType == "float") { toWrite = "ALTER TABLE `" + strTable + "` ADD `" + strID + "` float(11,3) DEFAULT '0' COMMENT '" + strDesc + "';"; } else { toWrite = "ALTER TABLE `" + strTable + "` ADD `" + strID + "` varchar(128) DEFAULT '' COMMENT '" + strDesc + "';"; } toWrite += "\n"; fwrite(toWrite.c_str(), toWrite.length(), 1, mysqlWriter); if (!nodeElement) { break; } nodeElement = nodeElement->NextSiblingElement(); } } } auto xRecordsNode = root->FirstChildElement("Records"); if (xRecordsNode) { auto nodeElement = xRecordsNode->FirstChildElement("Record"); if (nodeElement) { while (true) { if (!nodeElement || (std::string)(nodeElement->Attribute("Save")) != "1") { if (!nodeElement) { break; } nodeElement = nodeElement->NextSiblingElement(); continue; } std::string strID = nodeElement->Attribute("Id"); auto chrDesc = nodeElement->Attribute("Desc"); std::string strDesc = chrDesc; auto descLength = strlen(chrDesc); if (bConvertIntoUTF8 && IsTextUTF8(chrDesc, descLength)) { if (descLength > 0) { char* chrArrDesc = new char[descLength]; Utf8ToGbk((char*)chrDesc, chrArrDesc); strDesc = chrArrDesc; delete[] chrArrDesc; } } std::string toWrite = "ALTER TABLE `" + strTable + "` ADD `" + strID + "` BLOB COMMENT '" + strDesc + "';"; toWrite += "\n"; fwrite(toWrite.c_str(), toWrite.length(), 1, mysqlWriter); if (nodeElement == classElement->LastChildElement()) { break; } nodeElement = nodeElement->NextSiblingElement(); } } } delete doc; return true; }
bool FileProcess::CreateIniXML(std::string strFile) { std::cout << strFile << std::endl; // 打开excel MiniExcelReader::ExcelFile* x = new MiniExcelReader::ExcelFile(); if (!x->open(strFile.c_str())) { printf("can't open %s\n", strFile.c_str()); return false; } //////////////////////////////////////////////////////////////////////////// // 开始创建xml tinyxml2::XMLDocument* iniDoc = new tinyxml2::XMLDocument(); if (NULL == iniDoc) { return false; } //xml声明 tinyxml2::XMLDeclaration *pDel = iniDoc->NewDeclaration("xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\""); if (NULL == pDel) { return false; } iniDoc->LinkEndChild(pDel); // 写入XML root标签 tinyxml2::XMLElement* root = iniDoc->NewElement("XML"); iniDoc->LinkEndChild(root); // 读取excel中每一个sheet std::vector<MiniExcelReader::Sheet>& sheets = x->sheets(); std::vector<std::string> vColNames; std::vector<std::string> vDataIDs; std::map<std::string, std::string> mDataValues; int nCurrentCol = 0; for (MiniExcelReader::Sheet& sh : sheets) { std::string strSheetName = sh.getName(); std::string strUpperSheetName = strSheetName.substr(0, 8); transform(strUpperSheetName.begin(), strUpperSheetName.end(), strUpperSheetName.begin(), ::tolower); if (strUpperSheetName != "property") { continue; } const MiniExcelReader::Range& dim = sh.getDimension(); for (int c = dim.firstCol; c <= dim.lastCol; c++) { MiniExcelReader::Cell* cell = sh.getCell(dim.firstRow, c); if (cell) { vColNames.push_back(cell->value); } } if (vDataIDs.size() <= 0) { for (int r = dim.firstRow + 8; r <= dim.lastRow; r++) { MiniExcelReader::Cell* cell = sh.getCell(r, dim.firstCol); if (cell) { if (cell->value.length() > 0) { vDataIDs.push_back(cell->value); } } } } for (int r = dim.firstRow + 8; r <= vDataIDs.size() + 8; r++) { std::string testValue = ""; MiniExcelReader::Cell* cell = sh.getCell(r, dim.firstCol); for (int c = dim.firstCol; c <= dim.lastCol; c++) { std::string name = vColNames[c - 1 + nCurrentCol]; std::string value = ""; MiniExcelReader::Cell* cell = sh.getCell(r, c); std::string vType = sh.getCell(dim.firstRow + 1, c)->value; if (cell) { std::string valueCell = cell->value; transform(valueCell.begin(), valueCell.end(), valueCell.begin(), ::toupper); if (valueCell == "TRUE" || valueCell == "FALSE") { value = valueCell == "TRUE" ? 1 : 0; } else { value = cell->value; if (value.size() <= 0) { if (vType == "int" || vType == "float") { value = "0"; } else { value = ""; } } } } else { if (vType == "int" || vType == "float") { value = "0"; } else { value = ""; } } //check the field is utf8, then convert it into gbk. auto descLength = value.size(); if (bConvertIntoUTF8 && IsTextUTF8(value.c_str(), descLength)) { if (descLength > 0) { char* chrArrDesc = new char[descLength]; Utf8ToGbk((char*)value.c_str(), chrArrDesc); value = chrArrDesc; delete[] chrArrDesc; } } mDataValues.insert(std::pair<string, string>(vDataIDs[r - 9] + name, value)); } } nCurrentCol += dim.lastCol; } int nDataCount = 0; if (strFile.find("NPC") > 0 && strFile.find("NPC") < strFile.size()) { int a = 0; } for (auto strID : vDataIDs) { auto objectNode = iniDoc->NewElement("Object"); root->LinkEndChild(objectNode); for (auto strColName : vColNames) { if (strColName == "Id") { const char* chrID = objectNode->Attribute("Id"); if (!chrID) { objectNode->SetAttribute(strColName.c_str(), mDataValues[strID + strColName].c_str()); } } else { objectNode->SetAttribute(strColName.c_str(), mDataValues[strID + strColName].c_str()); } nDataCount++; } } //////////////////////////////////////////////////////////////////////////// // 保存文件 int nLastPoint = strFile.find_last_of(".") + 1; int nLastSlash = strFile.find_last_of("/") + 1; std::string strFileName = strFile.substr(nLastSlash, nLastPoint - nLastSlash - 1); std::string strFileExt = strFile.substr(nLastPoint, strFile.length() - nLastPoint); std::string strXMLFile = strToolBasePath + strXMLIniPath + strFileName; if (nCipher > 0) { strXMLFile += ".NF"; } else { strXMLFile += ".xml"; } iniDoc->SetBOM(false); iniDoc->SaveFile(strXMLFile.c_str()); delete iniDoc; delete x; return true; }