// 取主题列表 BOOL GetThreads(LPCTSTR forumName, LPCTSTR ignoreThread, vector<ThreadInfo>& threads) { CString src = HTTPGet(_T("http://tieba.baidu.com/f?ie=UTF-8&kw=") + EncodeURI(forumName) + _T("&tp=0&pn=") + ignoreThread + _T("&apage=1"), FALSE, &g_stopScanFlag); CStringArray rawThreads; SplitString(rawThreads, src, THREAD_SPLIT); if (rawThreads.GetSize() < 2) { if (src != NET_STOP_TEXT && src != NET_TIMEOUT_TEXT) WriteString(src, _T("forum.txt")); return FALSE; } int iThreads; //// 今日主题 //CString topic = GetStringBetween(rawThreads[0], TOPIC_LEFT, TOPIC_RIGHT); //if (topic != _T("")) //{ // threads.resize(rawThreads.GetSize()); // int pos = topic.Find(TOPIC_TID_LEFT); // threads[0].tid = GetStringBetween(topic, TOPIC_TID_LEFT, TOPIC_TID_RIGHT, pos); // threads[0].reply = GetStringBetween(topic, TOPIC_REPLY_LEFT, TOPIC_REPLY_RIGHT); // threads[0].title = GetStringBetween(topic, TOPIC_TITLE_LEFT, TOPIC_TITLE_RIGHT, pos); // threads[0].preview = GetStringBetween(topic, TOPIC_PREVIEW_LEFT, TOPIC_PREVIEW_RIGHT, pos); // threads[0].author = _T(""); // iThreads = 1; //} //else //{ threads.resize(rawThreads.GetSize() - 1); iThreads = 0; //} // 普通主题 rawThreads[rawThreads.GetSize() - 1] = GetStringBefore(rawThreads[rawThreads.GetSize() - 1], THREAD_END); for (int iRawThreads = 1; iRawThreads < rawThreads.GetSize(); iRawThreads++, iThreads++) { threads[iThreads].tid = GetStringBetween(rawThreads[iRawThreads], THREAD_TID_LEFT, THREAD_TID_RIGHT); threads[iThreads].reply = GetStringBetween(rawThreads[iRawThreads], THREAD_REPLY_LEFT, THREAD_REPLY_RIGHT); threads[iThreads].title = HTMLUnescape(GetStringBetween(rawThreads[iRawThreads], THREAD_TITLE_LEFT, THREAD_TITLE_RIGHT)); threads[iThreads].preview = HTMLUnescape(GetStringBetween(rawThreads[iRawThreads], THREAD_PREVIEW_LEFT, THREAD_PREVIEW_RIGHT)) + _T("\r\n") + GetStringBetween2(rawThreads[iRawThreads], THREAD_MEDIA_LEFT, THREAD_MEDIA_RIGHT); threads[iThreads].authorID = GetStringBetween(rawThreads[iRawThreads], THREAD_AUTHOR_ID_LEFT, THREAD_AUTHOR_ID_RIGHT); threads[iThreads].author = JSUnescape(GetStringBefore(rawThreads[iRawThreads], THREAD_AUTHOR_RIGHT)); //OutputDebugString(_T("\n")); //OutputDebugString(rawThreads[iRawThreads]); //OutputDebugString(_T("\n----------------------------------")); } return TRUE; }
// Wrapper around GetStringBefore that fills in a VString for us inline VBool GetStringBefore(const char *pString,char ch, VString &res) { char *pNew = GetStringBefore(pString,ch); if (pNew) { res = pNew; vStrFree(pNew); return TRUE; } else res.Reset(); return FALSE; }
// 取楼中楼列表 void GetLzls(const CString& tid, const CString& page, vector<PostInfo>& posts, vector<PostInfo>& lzls) { time_t timestamp; time(×tamp); CString url; url.Format(_T("http://tieba.baidu.com/p/totalComment?t=%I64d&tid=%s&fid=%s&pn=%s&see_lz=0"), timestamp, tid, g_forumID, page); CString src = HTTPGet(url, FALSE, &g_stopScanFlag); //WriteString(src, _T("lzl.txt")); CStringArray splitedSrc; // 0楼中楼,1用户 SplitString(splitedSrc, src, _T("\"user_list\":{")); lzls.clear(); if (splitedSrc.GetSize() != 2) return; // 遍历用户采集头像哈希 CStringArray users; SplitString(users, splitedSrc[1], LZL_USER_SPLIT); map<CString, CString> portrait; for (int i = 1; i < users.GetSize(); i++) { CString id = JSUnescape(GetStringBefore(users[i], LZL_USER_NAME_RIGHT)); portrait[id] = GetStringBetween(users[i], LZL_USER_PORTRAIT_LEFT, LZL_USER_PORTRAIT_RIGHT); } // 遍历楼层 int iLzls = 0; for (std::regex_iterator<LPCTSTR> it((LPCTSTR)splitedSrc[0], (LPCTSTR)splitedSrc[0] + splitedSrc[0].GetLength(), LZL_FLOOR_REG), end; it != end; it++) { // 查找该层楼层 CString pid = (*it)[1].str().c_str(); // 该层PID CString floor; for (PostInfo post : posts) if (post.pid == pid) { floor = post.floor; break; } // 遍历该层楼中楼 CStringArray rawLzls; SplitString(rawLzls, (*it)[2].str().c_str(), LZL_SPLIT); lzls.resize(lzls.size() + rawLzls.GetSize() - 1); for (int iRawLzls = 1; iRawLzls < rawLzls.GetSize(); iRawLzls++, iLzls++) { lzls[iLzls].pid = GetStringBetween(rawLzls[iRawLzls], LZL_PID_LEFT, LZL_PID_RIGHT); lzls[iLzls].floor = floor; lzls[iLzls].author = JSUnescape(GetStringBetween(rawLzls[iRawLzls], LZL_AUTHOR_LEFT, LZL_AUTHOR_RIGHT)); lzls[iLzls].authorID = GetStringBetween(rawLzls[iRawLzls], LZL_AUTHOR_ID_LEFT, LZL_AUTHOR_ID_RIGHT); lzls[iLzls].authorPortrait = portrait[lzls[iLzls].author]; lzls[iLzls].content = HTMLUnescape(JSUnescape(GetStringBetween(rawLzls[iRawLzls], LZL_CONTENT_LEFT, LZL_CONTENT_RIGHT))); } } }