Exemple #1
0
bool check_url(const char* filename)
{
    char* fileBuf = 0;
    
    // Tries to open as an URL for a local file
    if (strstr(filename, "file://") != 0) {
        // Tries to open as a regular file after removing 'file://'
        FILE* f = fopen(&filename[7], "r");
        if (f) {
            fclose(f);
            return true;
        } else {
            stringstream error;
            error << "ERROR : cannot open file '" << filename << "' : " << http_strerror() << "; for help type \"faust --help\"" << endl;
            throw faustexception(error.str());
        }
        // Tries to open as a http URL
    } else if (strstr(filename, "http://") != 0) {
        if (http_fetch(filename, &fileBuf) != -1) {
            return true;
        } else {
            stringstream error;
            error << "ERROR : unable to access URL '" << filename << "' : " << http_strerror() << "; for help type \"faust --help\"" << endl;
            throw faustexception(error.str());
        }
    } else {
        // Otherwise tries to open as a regular file
        FILE* f = fopen(filename, "r");
        if (f) {
            fclose(f);
            return true;
        } else {
            stringstream error;
            error << "ERROR : cannot open file '" << filename << "' : " <<  strerror(errno) << "; for help type \"faust --help\"" << endl;
            throw faustexception(error.str());
        }
    }
}
Exemple #2
0
/*
===============
HTTP_FetchFile

Fetches data from an arbitrary URL in a blocking fashion. Doesn't touch any
global variables and thus doesn't interfere with existing client downloads.
===============
*/
ssize_t HTTP_FetchFile(const char *url, void **data) {
    dlhandle_t tmp;
    CURL *curl;
    CURLcode ret;
    long response;

    *data = NULL;

    curl = curl_easy_init();
    if (!curl)
        return -1;

    memset(&tmp, 0, sizeof(tmp));

    curl_easy_setopt(curl, CURLOPT_ENCODING, "");
    curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1);
    curl_easy_setopt(curl, CURLOPT_WRITEDATA, &tmp);
    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, recv_func);
    curl_easy_setopt(curl, CURLOPT_FAILONERROR, 1);
    curl_easy_setopt(curl, CURLOPT_PROXY, cl_http_proxy->string);
    curl_easy_setopt(curl, CURLOPT_USERAGENT, com_version->string);
    curl_easy_setopt(curl, CURLOPT_URL, url);

    ret = curl_easy_perform(curl);

    if (ret == CURLE_HTTP_RETURNED_ERROR)
        curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &response);

    curl_easy_cleanup(curl);

    if (ret == CURLE_OK) {
        *data = tmp.buffer;
        return tmp.position;
    }

    Com_EPrintf("[HTTP] Failed to fetch '%s': %s\n",
                url, ret == CURLE_HTTP_RETURNED_ERROR ?
                http_strerror(response) : curl_easy_strerror(ret));
    if (tmp.buffer)
        Z_Free(tmp.buffer);
    return -1;
}
Exemple #3
0
int main(int argc, char *argv[])
{
    char *url;
    char data[1024], response[4096];
    int  i, ret, size;

    HTTP_INFO hi1, hi2;


    // Init http session. verify: check the server CA cert.
    http_init(&hi1, FALSE);
    http_init(&hi2, TRUE);

/*
    url = "https://localhost:8080/upload";
    sprintf(data,
            "--1234567890abcdef\r\n"
            "Content-Disposition: form-data; name=\"upload\"; filename=\"test.txt\"\r\n"
            "Content-Type: text/plain\r\n\r\n"
            "test message\r\n"
            "--1234567890abcdef--\r\n\r\n"
    );

    ret = http_post(&hi1, url, data, response, sizeof(response));

    printf("return code: %d \n", ret);
    printf("return body: %s \n", response);
*/

    url = "https://localhost:8080/upload";

    if(http_open(&hi1, url) < 0)
    {
        http_strerror(data, 1024);
        printf("socket error: %s \n", data);

        goto error;
    }

    snprintf(hi1.request.method, 8, "POST");
    hi1.request.close = FALSE;
    hi1.request.chunked = FALSE;
    snprintf(hi1.request.content_type, 256, "multipart/form-data; boundary=1234567890abcdef");

    size = sprintf(data,
                   "--1234567890abcdef\r\n"
                   "Content-Disposition: form-data; name=\"upload\"; filename=\"test.txt\"\r\n"
                   "Content-Type: text/plain\r\n\r\n"
                   "test message\r\n"
                   "--1234567890abcdef--\r\n"
                   );

    hi1.request.content_length = size;

    if(http_write_header(&hi1) < 0)
    {
        http_strerror(data, 1024);
        printf("socket error: %s \n", data);

        goto error;
    }

    if(http_write(&hi1, data, size) != size)
    {
        http_strerror(data, 1024);
        printf("socket error: %s \n", data);

        goto error;
    }

    // Write end-chunked
    if(http_write_end(&hi1) < 0)
    {
        http_strerror(data, 1024);
        printf("socket error: %s \n", data);

        goto error;
    }

    ret = http_read_chunked(&hi1, response, sizeof(response));

    printf("return code: %d \n", ret);
    printf("return body: %s \n", response);


/*
    // Test a http get method.
    url = "http://httpbin.org/get?message=https_client";

    ret = http_get(&hi1, url, response, sizeof(response));

    printf("return code: %d \n", ret);
    printf("return body: %s \n", response);

    // Test a http post method.

    url = "http://httpbin.org/post";
    sprintf(data, "{\"message\":\"Hello, https_client!\"}");

    ret = http_post(&hi1, url, data, response, sizeof(response));

    printf("return code: %d \n", ret);
    printf("return body: %s \n", response);

    // Test a https get method.

    url = "https://httpbin.org/get?message=https_client";

    ret = http_get(&hi2, url, response, sizeof(response));

    printf("return code: %d \n", ret);
    printf("return body: %s \n", response);

    // Test a https post method.

    url = "https://httpbin.org/post";
    sprintf(data, "{\"message\":\"Hello, https_client!\"}");

    ret = http_post(&hi2, url, data, response, sizeof(response));

    printf("return code: %d \n", ret);
    printf("return body: %s \n", response);

    // Test a https post with the chunked-encoding data.

    url = "https://httpbin.org/post";

    if(http_open_chunked(&hi2, url) == 0)
    {
        size = sprintf(data, "[{\"message\":\"Hello, https_client %d\"},", 0);

        if(http_write_chunked(&hi2, data, size) != size)
        {
            http_strerror(data, 1024);
            printf("socket error: %s \n", data);

            goto error;
        }

        for(i=1; i<4; i++)
        {
            size = sprintf(data, "{\"message\":\"Hello, https_client %d\"},", i);
            if(http_write_chunked(&hi2, data, size) != size)
            {
                http_strerror(data, 1024);
                printf("socket error: %s \n", data);

                goto error;
            }
        }

        size = sprintf(data, "{\"message\":\"Hello, https_client %d\"}]", i);
        if(http_write_chunked(&hi2, data, strlen(data)) != size)
        {
            http_strerror(data, 1024);
            printf("socket error: %s \n", data);

            goto error;
        }

        ret = http_read_chunked(&hi2, response, sizeof(response));

        printf("return code: %d \n", ret);
        printf("return body: %s \n", response);

    }
    else
    {
        http_strerror(data, 1024);
        printf("socket error: %s \n", data);
    }

    error:
*/

error:

    http_close(&hi1);
    http_close(&hi2);

    return 0;
}
Exemple #4
0
// A download finished, find out what it was, whether there were any errors and
// if so, how severe. If none, rename file and other such stuff.
static qboolean finish_download(void)
{
    int         msgs_in_queue;
    CURLMsg     *msg;
    CURLcode    result;
    dlhandle_t  *dl;
    CURL        *curl;
    long        response;
    double      sec, bytes;
    char        size[16], speed[16];
    char        temp[MAX_OSPATH];
    qboolean    fatal_error = qfalse;
    const char  *err;
    print_type_t level;

    do {
        msg = curl_multi_info_read(curl_multi, &msgs_in_queue);
        if (!msg)
            break;

        if (msg->msg != CURLMSG_DONE)
            continue;

        curl = msg->easy_handle;
        dl = find_handle(curl);

        cls.download.current = NULL;
        cls.download.percent = 0;

        //filelist processing is done on read
        if (dl->file) {
            fclose(dl->file);
            dl->file = NULL;
        }

        curl_handles--;

        result = msg->data.result;

        switch (result) {
            //for some reason curl returns CURLE_OK for a 404...
        case CURLE_HTTP_RETURNED_ERROR:
        case CURLE_OK:
            curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &response);
            if (result == CURLE_OK && response == 200) {
                //success
                break;
            }

            err = http_strerror(response);

            //404 is non-fatal
            if (response == 404) {
                level = PRINT_ALL;
                goto fail1;
            }

            //every other code is treated as fatal
            //not marking download as done since
            //we are falling back to UDP
            level = PRINT_ERROR;
            fatal_error = qtrue;
            goto fail2;

        case CURLE_COULDNT_RESOLVE_HOST:
        case CURLE_COULDNT_CONNECT:
        case CURLE_COULDNT_RESOLVE_PROXY:
            //connection problems are fatal
            err = curl_easy_strerror(result);
            level = PRINT_ERROR;
            fatal_error = qtrue;
            goto fail2;

        default:
            err = curl_easy_strerror(result);
            level = PRINT_WARNING;
fail1:
            //we mark download as done even if it errored
            //to prevent multiple attempts.
            CL_FinishDownload(dl->queue);
fail2:
            Com_LPrintf(level,
                        "[HTTP] %s [%s] [%d remaining file%s]\n",
                        dl->queue->path, err, cls.download.pending,
                        cls.download.pending == 1 ? "" : "s");
            if (dl->path[0]) {
                remove(dl->path);
                dl->path[0] = 0;
            }
            if (dl->buffer) {
                Z_Free(dl->buffer);
                dl->buffer = NULL;
            }
            curl_multi_remove_handle(curl_multi, curl);
            continue;
        }

        //mark as done
        CL_FinishDownload(dl->queue);

        //show some stats
        curl_easy_getinfo(curl, CURLINFO_TOTAL_TIME, &sec);
        curl_easy_getinfo(curl, CURLINFO_SIZE_DOWNLOAD, &bytes);
        if (sec < 0.001)
            sec = 0.001;
        Com_FormatSizeLong(size, sizeof(size), bytes);
        Com_FormatSizeLong(speed, sizeof(speed), bytes / sec);

        //FIXME:
        //technically i shouldn't need to do this as curl will auto reuse the
        //existing handle when you change the url. however, the curl_handles goes
        //all weird when reusing a download slot in this way. if you can figure
        //out why, please let me know.
        curl_multi_remove_handle(curl_multi, curl);

        Com_Printf("[HTTP] %s [%s, %s/sec] [%d remaining file%s]\n",
                   dl->queue->path, size, speed, cls.download.pending,
                   cls.download.pending == 1 ? "" : "s");

        if (dl->path[0]) {
            //rename the temp file
            Q_snprintf(temp, sizeof(temp), "%s/%s", fs_gamedir, dl->queue->path);

            if (rename(dl->path, temp))
                Com_EPrintf("[HTTP] Failed to rename '%s' to '%s': %s\n",
                            dl->path, dl->queue->path, strerror(errno));
            dl->path[0] = 0;

            //a pak file is very special...
            if (dl->queue->type == DL_PAK) {
                CL_RestartFilesystem(qfalse);
                rescan_queue();
            }
        } else if (!fatal_error) {
            parse_file_list(dl);
        }
    } while (msgs_in_queue > 0);

    //fatal error occured, disable HTTP
    if (fatal_error) {
        abort_downloads();
        return qfalse;
    }

    // see if we have more to dl
    CL_RequestNextDownload();
    return qtrue;
}
Exemple #5
0
int getAddress (char* url) {
	char *stream, *text, *textHighlight, *lenstr;
	char *tokens = (char*)malloc(DEF_BUFF_SIZE);
	int *positions;// array to record position of each token in the text
	cvector addressVector;
	Address *adr;

	long len;
	int MAXLEN = 1805;
    int EXTRA = 11;
	/* 4 for field name "data", 1 for "=" */
    int MAXINPUT = MAXLEN+EXTRA+2;
	char input[MAXINPUT];
	char* data = input, *p;
	int rightOrWrong = -1, numRight, numTotal;


	//char* domain_url;
	int i;
	http_setTimeout(8);//seconds 
	//fetch web page
	int ret = httpFetch (url, &stream);
	if (ret == -1) {
		printf("%s\n",http_strerror());
		exit(0);
	}
	//printf("ret: %d, strlen: %d\n",ret, strlen(stream));
	assert(stream);
	text= (char*)malloc(ret+2);
	if (!text) {
		printf("out of memory when convert text to tokens!\n");
		exit(0);
	}

	strncpy(text, stream, ret);
	//append a '\0' to the end of string to make sure it is end with two '\0' for flex to scan
	*(text+ret) = '\0';
	*(text+ret+1) = '\0';
	free(stream);



	/* convert text to tokens, remove tags
	and convert back to string: tokens
	and keep all positions in array "positions"
	*/
	convertToken2Text(text, tokens, &positions);

	// get base domain of given url
	//e.g. given http://www.google.com/address, return http://www.google.com to domain_url
	/* domain_url = (char*)malloc(strlen(url)+1);
	strcpy(domain_url, url);
	for (i=strlen(url); i>0; i--) {
	if (url[i] == '/') {
	if (url[i-1] == '/' ) // is "//"
	break;
	else   // not "//"
	domain_url[i] = '\0';
	}
	}
	printf ("<base href=\"%s%s\">\n", GEO_URL, domain_url);
	free(domain_url);
	*/
	VectorNew (&addressVector, sizeof (Address),free_address, DEF_ADDRESS_PER_PAGE);
	//extract address,
	//get position from positions vector
    //and save extracted address, position, country to addressVector
	extractAddress(tokens, positions, &addressVector);

	//display the parsed text
	//printf("tokens: %s\n",tokens);
	//printf("url: %s\n", url);
	//printf("domain_url: %s\n", domain_url);
	//output header

	printf("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-1\">\n");
	printf("<table border=1 width=100%%><tr><td><table border=1 bgcolor=#ffffff cellpadding=10 cellspacing=0 width=100%% color=#ffffff><tr><td>\n");
	printf("<font face=arial,sans-serif color=black size=-1>\n");
	printf("<b><a href='%s'>US, UK & Canadian Addresses</a> extracted by <a href='%s'>Geo Extractor</a> from web page</b> <a href='%s'>%s</a></font><br><br>\n",LIST_FILES_URL, HOME_PAGE,url,url);

	//printf("%s,",textHighlight);
	//display extracted address
	//table header
	printf("<table width=100%% border=0 cellpadding=0 cellspacing=0><tr><td bgcolor=#3366cc><img width=1 height=1 alt=''></td></tr></table>\n");
	printf("<table width=100%% border=0 cellpadding=0 cellspacing=0 bgcolor=#e5ecf9><tr><td width=10></td><td bgcolor=#e5ecf9 nowrap><br>\n");
	printf("<font face=arial,sans-serif color=black size=-1><b>\n");

	for (i=0; i<addressVector.ItemsCount; i++) {
		adr = (Address*)VectorNth(&addressVector,i);
		printf("%s<br>\n", adr->address);
		/*printf("%s, start: %d, end: %d<br>\n",adr->address, adr->start, adr->end);
		for (j=adr->start; j<=adr->end; j++)
			printf("%c",*(text+j));
		printf("\n");
		*/

	}
	printf("</b></font>\n");
	printf("<br></td></tr></table>\n");
	printf("<table width=100%% border=0 cellpadding=0 cellspacing=0><tr><td bgcolor=#3366cc><img width=1 height=1 alt=''></td></tr></table>\n");

	textHighlight = (char*)malloc(DEF_BUFF_SIZE);
	numRight=numTotal =addressVector.ItemsCount;

	/* if there is a user post, we save the user input to get tagged data*/
	lenstr = getenv("CONTENT_LENGTH");
	if ( !(lenstr == NULL || sscanf(lenstr,"%ld",&len)!=1 || len > MAXLEN) ) {
		tagAddress(text, textHighlight, &addressVector);
		fgets(input, len+1, stdin);
		URLdecode(input);
		data = input+EXTRA;
		//printf("posted: %s\n",data);
		len = strlen("right");
		if ( strncmp(data, "right", len)==0 ) { 
			rightOrWrong = 0; //set flag for right or wrong extraction
		}
		len = strlen("wrong");
		if ( strncmp(data, "wrong", len)==0 ) {
			rightOrWrong = 1; //user input "Wrong Extraction"
		}

		// get user input: numRight, which is number of correct extracted address
		data += strlen("right") + strlen("&numRight=");
		p = data;
		while (*data++ !='&');
		*data= '\0';
		numRight = atoi(p);
		//printf("numRight: %d\n", numRight);

		// get user input numTotal, which is number of total address in the page
		p = data+strlen("numTotal=");
		numTotal = atoi(p);
		//printf("numTotal: %d\n", numTotal);



		if (rightOrWrong == 0) {
			//printf("webpage saved to RIGHT folder\n");
			saveTaggedText(url, text, textHighlight, rightOrWrong, numRight, addressVector.ItemsCount, numTotal);
		}
		if ( rightOrWrong == 1 ) {
			//printf("webpage saved to WRONG folder\n");
			saveTaggedText(url, text, textHighlight, rightOrWrong, numRight, addressVector.ItemsCount, numTotal);
		}

		//printf("tagged text: %s\n", textHighlight);
	}

	// give source text, and addressVector
	//highlight all extracted address in the webpage
	getHighlight(text, textHighlight, &addressVector);


	/* if there is at least one address extracted, show user input to let user 
	judge where extraction is correct*/


	if ( SHOW_COLLECT_DATA_INTERFACE) {

		printf("<FORM ACTION=\"%s%s\" METHOD=\"POST\">\n", GEO_URL, url);
		printf("<font face=arial,sans-serif color=black size=-1>\n");
		
		printf("<P><input name=\"extraction\" type=\"radio\" value=\"right\" ");
		if ((rightOrWrong == 0)||(rightOrWrong == -1)) //if no user input or user input: extracted address all correct
			printf("checked");
		printf("> All address extracted correctly<br>\n");
		printf("<input name=\"extraction\" type=\"radio\" value=\"wrong\" ");
		if (rightOrWrong == 1) //user input: extracted address all correct
			printf("checked");
		printf("> Not all addresses extracted correctly. \n");
		printf("<input type=\"text\" name=\"numRight\" size=\"4\" value=\"%d\"> addresses extracted correctly from total <input type=\"text\" name=\"numTotal\" size=\"4\" value=\"%d\"> addresses<BR>\n", numRight, numTotal);
		printf("<INPUT TYPE=\"SUBMIT\" VALUE=\"Save Webpage\"></font></FORM>\n");
		//show google search
		printf("<SCRIPT language=\"JavaScript\">function OnSubmitForm(){ document.g.action =\"%shttp://www.google.com/search?num=100&q=\"+document.g.q.value.replace(\" \",\"%%2B\");}</SCRIPT>\n", GEO_URL);
		printf("<table border=0 align=right><tr><td>\n");
		printf("<form action=\"\" method=\"post\" name=\"g\" onSubmit=\"return OnSubmitForm();\">\n");
		printf("<input size=\"32\" name=\"q\">\n");
		printf("<INPUT TYPE=\"SUBMIT\" name=\"Submit\" VALUE=\"Google\"></form>\n");
		printf("</td></tr></table>\n");

	}
	printf("</td></tr></table></td></tr></table>\n");



	//extract address from original html text
  // extract_address(text);
 // printf("Original <hr>%s",text);
  printf("<hr>\n");
  //printf("%s",textHighlight);
  displayHtmlAbsoluteURL(textHighlight, url);

  VectorDispose(&addressVector);
  free (positions);
  free (text);
  free (tokens);
  

  return 0;

}