int HttpClient::requestContent(Url &url, string &contentStr, int expectContentLength) { // construct a request string requestStr; string path = url.getPath(); if (path.empty()) path = "/"; requestStr = "GET " + path + " HTTP/1.0\r\nHost: " + url.getHost() + "\r\nUser-Agent: openSE/1.0 (Ubuntu11.04)\r\nAccept-Language: zh,en-us\r\nAccept-Charset: gb2312,utf-8\r\nConnection: Keep-Alive\r\n\r\n"; cout << "requestStr:\n" << requestStr << endl; // send request: if (url.getHost() != _preHost) { if (_preSockFd != -1) { closesocket(_preSockFd); _preSockFd = -1; } } int sockFd; bool sendSuccess = false; // try to use previous connection if (_preSockFd != -1) { sockFd = _preSockFd; if (rio_writen(sockFd, requestStr.c_str(), requestStr.size()) == -1) { cerr << "use previous connection:rio_writen error !" << endl; closesocket(_preSockFd); _preSockFd = -1; } else sendSuccess = true; } if (!sendSuccess) { // try to creat a new connection sockFd = tcpConnect(url.getIp(), url.getPort()); if (sockFd == -1) { cerr << "tcpConnect error" << endl; return -1; } // try to use new connection if (rio_writen(sockFd, requestStr.c_str(), requestStr.size()) == -1) { cerr << "rio_writen error for requestStr:" << requestStr << endl; closesocket(sockFd); return -1; } } //// receive http header //string headerStr; //headerStr.reserve(1024); //if(receiveHeader(sockFd, headerStr, DEFAULT_TIMEOUT_SECONDS) <= 0){ // cerr << "receiveHeader error" << endl; // closesocket(sockFd); // _preSockFd = -1; // return -1; // } //cout << "headerStr:\n" << headerStr << endl; //// parser http header //httpHeader.setHeaderStr(headerStr); //// check StatusCode //int stausCode = httpHeader.getStatusCode(); //if(stausCode == -1){ // cerr << "not find status code in httpHeader: " << httpHeader.getHeaderStr() << endl; // } //if(stausCode == 301 || stausCode == 302){ // closesocket(sockFd); // _preSockFd = -1; // string locationUrlStr = httpHeader.getLocation(); // if(locationUrlStr.empty()){ // cerr << "error location in httpHeader: " << httpHeader.getHeaderStr() << endl; // } // //locationStr = location; // Url locationUrl(locationUrlStr); // return requestWebPage(locationUrl, httpHeader, httpContent); // } //if(stausCode < 200 || stausCode > 299){ // closesocket(sockFd); // _preSockFd = -1; // cerr << "status code beyond [200-300) in httpHeader: " << httpHeader.getHeaderStr() << endl; // return -1; // } //// check content type //string contentType = httpHeader.getContentType(); //if(contentType.find("image") != string::npos){ // closesocket(sockFd); // _preSockFd = -1; // cerr << "contentType is image in httpHeader: " << httpHeader.getHeaderStr() << endl; // return -1; // } //// check ContentLength //int contentLength = httpHeader.getContentLength(); //if(contentLength == -1){ // //cerr << "contentLength is not finded in httpHeader: " << httpHeader.getHeaderStr() << endl; // contentLength = MAX_HTTPCONTENT_SIZE/10; // } //if(contentLength == 0){ // closesocket(sockFd); // _preSockFd = -1; // cerr << "contentLength is 0 in httpHeader: " << httpHeader.getHeaderStr() << endl; // return -1; // } //if(contentLength > MAX_HTTPCONTENT_SIZE){ // closesocket(sockFd); // _preSockFd = -1; // cerr << "contentLength > MAX_HTTPCONTENT_SIZE in httpHeader: " // << httpHeader.getHeaderStr() << endl; // return -1; // } // receive content if (receiveContent(sockFd, expectContentLength, contentStr, DEFAULT_TIMEOUT_SECONDS) == -1) { closesocket(sockFd); _preSockFd = -1; cerr << "receiveContent error for url: " << url.getUrlStr() << endl; cout << contentStr << endl; return -1; } else _preSockFd = sockFd; cout << "contentStr:\n" << contentStr << endl; // cout << "content finished,url is:"<<url.getUrlStr()<<endl; // set http content return 0; }
int HttpClient::requestHeader(Url &url, string&headerStr) { // construct a request string requestStr; string path = url.getPath(); if (path.empty()) path = "/"; requestStr = "HEAD " + path + " HTTP/1.0\r\nHost: " + url.getHost() + "\r\nUser-Agent: openSE/1.0 (Ubuntu11.04)\r\nAccept-Language: zh,en-us\r\nAccept-Charset: gb2312,utf-8\r\nConnection: Keep-Alive\r\n\r\n"; cout << "requestStr:\n" << requestStr << endl; // send request: if (url.getHost() != _preHost) { if (_preSockFd != -1) { closesocket(_preSockFd); _preSockFd = -1; } } int sockFd; bool sendSuccess = false; // try to use previous connection if (_preSockFd != -1) { sockFd = _preSockFd; if (rio_writen(sockFd, requestStr.c_str(), requestStr.size()) == -1) { cerr << "use previous connection:rio_writen error !" << endl; closesocket(_preSockFd); _preSockFd = -1; } else sendSuccess = true; } if (!sendSuccess) { // try to creat a new connection sockFd = tcpConnect(url.getIp(), url.getPort()); if (sockFd == -1) { cerr << "tcpConnect error" << endl; return -1; } // try to use new connection if (rio_writen(sockFd, requestStr.c_str(), requestStr.size()) == -1) { cerr << "rio_writen error for requestStr:" << requestStr << endl; closesocket(sockFd); return -1; } } headerStr.reserve(1024); int timeoutSeconds = DEFAULT_TIMEOUT_SECONDS; int bytesRead = 0; // set socket to no block type #ifdef _MSC_VER #else int flags; flags = fcntl(sockFd, F_GETFL, 0); if (flags < 0) { cerr << "1.fcntl() error in receiveHeader()< 0" << endl; return -1; } flags |= O_NONBLOCK; if (fcntl(sockFd, F_SETFL, flags) < 0) { cerr << "2.fcntl() error in receiveHeader" << endl; return -1; } #endif int newlines = 0; while (newlines < 2) { fd_set rfds; FD_ZERO(&rfds); FD_SET(sockFd, &rfds); struct timeval tv; tv.tv_sec = timeoutSeconds; tv.tv_usec = 0; int selectRet; if (timeoutSeconds >= 0) // wait DEFAULT_TIMEOUT_SECONDS seconds selectRet = select(sockFd + 1, &rfds, NULL, NULL, &tv); else // wait infinitely selectRet = select(sockFd + 1, &rfds, NULL, NULL, NULL); if (selectRet == 0 && timeoutSeconds < 0) { cerr << "select should wait infinitely" << endl; bytesRead = -1; break; } else if (selectRet == -1) { cerr << "select error" << endl; bytesRead = -1; break; } else if (selectRet == 0) { cerr << "select timeout after " << timeoutSeconds << " seconds" << endl; bytesRead = -1; break; } char c; int ret = recv(sockFd, &c, 1, 0); if (ret <= 0) { cerr << "read error" << endl; bytesRead = -1; break; } headerStr += c; ++bytesRead; if (c == '\r') { /* Ignore CR */ continue; } else if (c == '\n') /* LF is the separator */ newlines++; else newlines = 0; } if (bytesRead <= 0) { cerr << "receiveHeader error" << endl; closesocket(sockFd); _preSockFd = -1; return -1; } return 0; }