int HttpClient::requestHeader(Url &url, HttpHeader &header) { string headStr; int ret = requestHeader(url, headStr); header.setHeaderStr(headStr); return ret; }
int HttpClient::requestWebPage(Url &url, HttpHeader &httpHeader, HttpContent &httpContent) { // construct a request string requestStr; string path = url.getPath(); if (path.empty()) path = "/"; requestStr = "GET " + path + " HTTP/1.0\r\nHost: " + url.getHost() + "\r\nUser-Agent: openSE/1.0 (Ubuntu11.04)\r\nAccept-Language: zh,en-us\r\nAccept-Charset: gb2312,utf-8\r\nConnection: Keep-Alive\r\n\r\n"; cout << "requestStr:\n" << requestStr << endl; // send request: if (url.getHost() != _preHost) { if (_preSockFd != -1) { closesocket(_preSockFd); _preSockFd = -1; } } int sockFd; bool sendSuccess = false; // try to use previous connection if (_preSockFd != -1) { sockFd = _preSockFd; if (rio_writen(sockFd, requestStr.c_str(), requestStr.size()) == -1) { cerr << "use previous connection:rio_writen error !" << endl; closesocket(_preSockFd); _preSockFd = -1; } else sendSuccess = true; } if (!sendSuccess) { // try to creat a new connection sockFd = tcpConnect(url.getIp(), url.getPort()); if (sockFd == -1) { cerr << "tcpConnect error" << endl; return -1; } // try to use new connection if (rio_writen(sockFd, requestStr.c_str(), requestStr.size()) == -1) { cerr << "rio_writen error for requestStr:" << requestStr << endl; closesocket(sockFd); return -1; } } // receive http header string headerStr; headerStr.reserve(1024); if (receiveHeader(sockFd, headerStr, DEFAULT_TIMEOUT_SECONDS) <= 0) { cerr << "receiveHeader error" << endl; closesocket(sockFd); _preSockFd = -1; return -1; } //cout << "headerStr:\n" << headerStr << endl; // parser http header httpHeader.setHeaderStr(headerStr); // check StatusCode int stausCode = httpHeader.getStatusCode(); if (stausCode == -1) { cerr << "not find status code in httpHeader: " << httpHeader.getHeaderStr() << endl; } if (stausCode == 301 || stausCode == 302) { closesocket(sockFd); _preSockFd = -1; string locationUrlStr = httpHeader.getLocation(); if (locationUrlStr.empty()) { cerr << "error location in httpHeader: " << httpHeader.getHeaderStr() << endl; } //locationStr = location; Url locationUrl(locationUrlStr); return requestWebPage(locationUrl, httpHeader, httpContent); } if (stausCode < 200 || stausCode > 299) { closesocket(sockFd); _preSockFd = -1; cerr << "status code beyond [200-300) in httpHeader: " << httpHeader.getHeaderStr() << endl; return -1; } // check content type string contentType = httpHeader.getContentType(); if (contentType.find("image") != string::npos) { closesocket(sockFd); _preSockFd = -1; cerr << "contentType is image in httpHeader: " << httpHeader.getHeaderStr() << endl; return -1; } // check ContentLength int contentLength = httpHeader.getContentLength(); if (contentLength == -1) { //cerr << "contentLength is not finded in httpHeader: " << httpHeader.getHeaderStr() << endl; contentLength = MAX_HTTPCONTENT_SIZE / 10; } if (contentLength == 0) { closesocket(sockFd); _preSockFd = -1; cerr << "contentLength is 0 in httpHeader: " << httpHeader.getHeaderStr() << endl; return -1; } if (contentLength > MAX_HTTPCONTENT_SIZE) { closesocket(sockFd); _preSockFd = -1; cerr << "contentLength > MAX_HTTPCONTENT_SIZE in httpHeader: " << httpHeader.getHeaderStr() << endl; return -1; } // receive content string contentStr; if (receiveContent(sockFd, contentLength, contentStr, DEFAULT_TIMEOUT_SECONDS) == -1) { closesocket(sockFd); _preSockFd = -1; cerr << "receiveContent error for url: " << url.getUrlStr() << endl; //cout << contentStr << endl; return -1; } else _preSockFd = sockFd; //cout << "contentStr:\n" << contentStr << endl; // cout << "content finished,url is:"<<url.getUrlStr()<<endl; // set http content httpContent.setContentStr(contentStr); return 0; }