Beispiel #1
0
void processPage(int sockfd, t_Buffer* document) {
	
	char *pos = document->ptr;
	char query_get[1024] = {0};
	char qid[22] = {0};
	
	t_Buffer *header_q = t_Buffer_new(1024);
	t_Buffer *document_q = t_Buffer_new(1024);
	
	
	srand(1200);
	
	// Busca URL de una pregunta de la página
	while ( (pos = strstr(pos, SEARCH_QUESTION_TOKEN )) )
	{		
		// Identifica el qid
		pos = strstr(pos, SEARCH_QID_TOKEN );
		
		memcpy(qid, pos+5, 21);
		
		sprintf( query_get,\
				"GET %s%s%s HTTP/%s\r\nHost: %s\r\nUser-Agent: Mozilla Firefox 11.0 \r\n\r\n",\
				SEARCH_QUESTION_TOKEN, SEARCH_QID_TOKEN, qid,\
				"1.1", HOST);
	
		// Enviar
		if ( send(sockfd, query_get, strlen(query_get), 0) < 0 )
		{
			perror("send()");
			exit(EXIT_FAILURE);
		}
	
		receiveHeader(sockfd, header_q);
		receiveContent(sockfd, document_q, TE_CHUNKED);
		
		if ( strstr(document_q->ptr, SEARCH_USER) )
			printf("http://%s%s%s%s\n", HOST,SEARCH_QUESTION_TOKEN, SEARCH_QID_TOKEN,qid);
	
	    sleep(rand() % 3);
		
	}
	
	printf("\n");
	
	
	// Liberar recursos	
	t_Buffer_delete(header_q);
	t_Buffer_delete(document_q);

	
}
Beispiel #2
0
int HttpClient::requestContent(Url &url, string &contentStr, int expectContentLength) {
    // construct a request
    string requestStr;
    string path = url.getPath();
    if (path.empty())
        path = "/";
    requestStr = "GET " + path + " HTTP/1.0\r\nHost: " + url.getHost()
            + "\r\nUser-Agent: openSE/1.0 (Ubuntu11.04)\r\nAccept-Language: zh,en-us\r\nAccept-Charset: gb2312,utf-8\r\nConnection: Keep-Alive\r\n\r\n";

    cout << "requestStr:\n" << requestStr << endl;

    // send request:
    if (url.getHost() != _preHost) {
        if (_preSockFd != -1) {
            closesocket(_preSockFd);
            _preSockFd = -1;
        }
    }
    int sockFd;
    bool sendSuccess = false;

    // try to use previous connection
    if (_preSockFd != -1) {
        sockFd = _preSockFd;
        if (rio_writen(sockFd, requestStr.c_str(), requestStr.size()) == -1) {
            cerr << "use previous connection:rio_writen error !" << endl;
            closesocket(_preSockFd);
            _preSockFd = -1;
        } else
            sendSuccess = true;
    }

    if (!sendSuccess) {
        // try to creat a new connection
        sockFd = tcpConnect(url.getIp(), url.getPort());
        if (sockFd == -1) {
            cerr << "tcpConnect error" << endl;
            return -1;
        }
        // try to use new connection
        if (rio_writen(sockFd, requestStr.c_str(), requestStr.size()) == -1) {
            cerr << "rio_writen error for requestStr:" << requestStr << endl;
            closesocket(sockFd);
            return -1;
        }
    }

    //// receive http header
    //string headerStr;
    //headerStr.reserve(1024);
    //if(receiveHeader(sockFd, headerStr, DEFAULT_TIMEOUT_SECONDS) <= 0){
    //	cerr << "receiveHeader error" << endl;
    //	closesocket(sockFd);
    //	_preSockFd = -1;
    //	return -1;
    //	}

    //cout << "headerStr:\n" << headerStr << endl;

    //// parser http header
    //httpHeader.setHeaderStr(headerStr);

    //// check StatusCode
    //int stausCode = httpHeader.getStatusCode();
    //if(stausCode == -1){
    //	cerr << "not find status code in httpHeader: " << httpHeader.getHeaderStr() << endl;
    //	}

    //if(stausCode == 301 || stausCode == 302){
    //	closesocket(sockFd);
    //	_preSockFd = -1;
    //	string locationUrlStr = httpHeader.getLocation();
    //	if(locationUrlStr.empty()){
    //		cerr << "error location in httpHeader: " << httpHeader.getHeaderStr() << endl;
    //		}
    //	//locationStr = location;
    //	Url locationUrl(locationUrlStr);
    //	return requestWebPage(locationUrl, httpHeader, httpContent);
    //	}

    //if(stausCode < 200 || stausCode > 299){
    //	closesocket(sockFd);
    //	_preSockFd = -1;
    //	cerr << "status code beyond [200-300) in httpHeader: " << httpHeader.getHeaderStr() << endl;
    //	return -1;
    //	}

    //// check content type
    //string contentType = httpHeader.getContentType();
    //if(contentType.find("image") != string::npos){
    //	closesocket(sockFd);
    //	_preSockFd = -1;
    //	cerr << "contentType is image in httpHeader: " << httpHeader.getHeaderStr() << endl;
    //	return -1;
    //	}

    //// check ContentLength
    //int contentLength = httpHeader.getContentLength();

    //if(contentLength == -1){
    //	//cerr << "contentLength is not finded in httpHeader: " << httpHeader.getHeaderStr() << endl;
    //	contentLength = MAX_HTTPCONTENT_SIZE/10;
    //	}

    //if(contentLength == 0){
    //	closesocket(sockFd);
    //	_preSockFd = -1;
    //	cerr << "contentLength is 0 in httpHeader: " << httpHeader.getHeaderStr() << endl;
    //	return -1;
    //	}

    //if(contentLength  > MAX_HTTPCONTENT_SIZE){
    //	closesocket(sockFd);
    //	_preSockFd = -1;
    //	cerr << "contentLength > MAX_HTTPCONTENT_SIZE in httpHeader: "
    //		<< httpHeader.getHeaderStr() << endl;
    //	return -1;
    //	}

    // receive content
    if (receiveContent(sockFd, expectContentLength, contentStr, DEFAULT_TIMEOUT_SECONDS)
            == -1) {
        closesocket(sockFd);
        _preSockFd = -1;
        cerr << "receiveContent error for url: " << url.getUrlStr() << endl;
        cout << contentStr << endl;
        return -1;
    } else
        _preSockFd = sockFd;

    cout << "contentStr:\n" << contentStr << endl;
    // cout << "content finished,url is:"<<url.getUrlStr()<<endl;
    // set http content
    return 0;
}