bool TextSearch::FindTextInPage(int pageNo) { if (str::IsEmpty(findText)) return false; if (!pageNo) pageNo = findPage; findPage = pageNo; const WCHAR *found; int length; do { if (!anchor) found = GetNextIndex(pageText, findIndex, forward); else if (forward) found = (caseSensitive ? StrStr : StrStrI)(pageText + findIndex, anchor); else found = StrRStrI(pageText, pageText + findIndex, anchor); if (!found) return false; findIndex = (int)(found - pageText) + (forward ? 1 : 0); length = MatchLen(found); } while (length <= 0); int offset = (int)(found - pageText); StartAt(pageNo, offset); SelectUpTo(pageNo, offset + length); findIndex = offset + (forward ? length : 0); // try again if the found text is completely outside the page's mediabox if (result.len == 0) return FindTextInPage(pageNo); return true; }
// 取帖子列表 GetPostsResult GetPosts(const CString& tid, const CString& _src, const CString& page, vector<PostInfo>& posts, vector<PostInfo>& lzls) { CString src = _src != _T("") ? _src : HTTPGet(_T("http://tieba.baidu.com/p/") + tid + _T("?pn=") + page, FALSE, &g_stopScanFlag); if (src == NET_STOP_TEXT) return GET_POSTS_STOP; if (src == NET_TIMEOUT_TEXT) return GET_POSTS_TIMEOUT; CStringArray rawPosts; SplitString(rawPosts, src, POST_SPLIT); if (rawPosts.GetSize() < 2) return GET_POSTS_DELETED; posts.resize(rawPosts.GetSize() - 1); for (int iRawPosts = 1, iPosts = 0; iRawPosts < rawPosts.GetSize(); iRawPosts++, iPosts++) { posts[iPosts].pid = GetStringBetween(rawPosts[iRawPosts], POST_PID_LEFT, POST_PID_RIGHT); posts[iPosts].floor = GetStringBetween(rawPosts[iRawPosts], POST_FLOOR_LEFT, POST_FLOOR_RIGHT); posts[iPosts].author = JSUnescape(GetStringBetween(rawPosts[iRawPosts], POST_AUTHOR_LEFT, POST_AUTHOR_RIGHT)); posts[iPosts].authorID = GetStringBetween(rawPosts[iRawPosts], POST_AUTHOR_ID_LEFT, POST_AUTHOR_ID_RIGHT); posts[iPosts].authorPortrait = GetStringBetween(rawPosts[iRawPosts], POST_AUTHOR_PORTRAIT_LEFT, POST_AUTHOR_PORTRAIT_RIGHT); //posts[iPosts].content = GetStringBetween(rawPosts[iRawPosts], POST_CONTENT_LEFT, POST_CONTENT_RIGHT); int left = rawPosts[iRawPosts].Find(POST_CONTENT_LEFT) + _tcslen(POST_CONTENT_LEFT); left = rawPosts[iRawPosts].Find(_T(">"), left) + 1; // 去掉首空格 while (left < rawPosts[iRawPosts].GetLength() && rawPosts[iRawPosts][left] == _T(' ')) left++; int right = rawPosts[iRawPosts].Find(POST_CONTENT_RIGHT, left + 1); // CString不支持反向查找字符串? posts[iPosts].content = rawPosts[iRawPosts].Mid(left, right - left); LPCTSTR pos = StrRStrI(posts[iPosts].content, NULL, _T("</div>")); if (pos != NULL) { right = ((DWORD)pos - (DWORD)(LPCTSTR)posts[iPosts].content) / sizeof(TCHAR)-1; // 去掉尾空格 while (right >= 0 && posts[iPosts].content[right] == _T(' ')) right--; posts[iPosts].content = posts[iPosts].content.Left(right + 1); } // 签名档 posts[iPosts].content += _T("\r\n") + GetStringBetween2(rawPosts[iRawPosts], POST_SIGN_LEFT, POST_SIGN_RIGHT); //OutputDebugString(_T("\n")); //OutputDebugString(rawPosts[iRawPosts]); //OutputDebugString(_T("\n----------------------------------")); } GetLzls(tid, page, posts, lzls); return GET_POSTS_SUCCESS; }