bool check_url(const char* filename) { char* fileBuf = 0; // Tries to open as an URL for a local file if (strstr(filename, "file://") != 0) { // Tries to open as a regular file after removing 'file://' FILE* f = fopen(&filename[7], "r"); if (f) { fclose(f); return true; } else { stringstream error; error << "ERROR : cannot open file '" << filename << "' : " << http_strerror() << "; for help type \"faust --help\"" << endl; throw faustexception(error.str()); } // Tries to open as a http URL } else if (strstr(filename, "http://") != 0) { if (http_fetch(filename, &fileBuf) != -1) { return true; } else { stringstream error; error << "ERROR : unable to access URL '" << filename << "' : " << http_strerror() << "; for help type \"faust --help\"" << endl; throw faustexception(error.str()); } } else { // Otherwise tries to open as a regular file FILE* f = fopen(filename, "r"); if (f) { fclose(f); return true; } else { stringstream error; error << "ERROR : cannot open file '" << filename << "' : " << strerror(errno) << "; for help type \"faust --help\"" << endl; throw faustexception(error.str()); } } }
/* =============== HTTP_FetchFile Fetches data from an arbitrary URL in a blocking fashion. Doesn't touch any global variables and thus doesn't interfere with existing client downloads. =============== */ ssize_t HTTP_FetchFile(const char *url, void **data) { dlhandle_t tmp; CURL *curl; CURLcode ret; long response; *data = NULL; curl = curl_easy_init(); if (!curl) return -1; memset(&tmp, 0, sizeof(tmp)); curl_easy_setopt(curl, CURLOPT_ENCODING, ""); curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1); curl_easy_setopt(curl, CURLOPT_WRITEDATA, &tmp); curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, recv_func); curl_easy_setopt(curl, CURLOPT_FAILONERROR, 1); curl_easy_setopt(curl, CURLOPT_PROXY, cl_http_proxy->string); curl_easy_setopt(curl, CURLOPT_USERAGENT, com_version->string); curl_easy_setopt(curl, CURLOPT_URL, url); ret = curl_easy_perform(curl); if (ret == CURLE_HTTP_RETURNED_ERROR) curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &response); curl_easy_cleanup(curl); if (ret == CURLE_OK) { *data = tmp.buffer; return tmp.position; } Com_EPrintf("[HTTP] Failed to fetch '%s': %s\n", url, ret == CURLE_HTTP_RETURNED_ERROR ? http_strerror(response) : curl_easy_strerror(ret)); if (tmp.buffer) Z_Free(tmp.buffer); return -1; }
int main(int argc, char *argv[]) { char *url; char data[1024], response[4096]; int i, ret, size; HTTP_INFO hi1, hi2; // Init http session. verify: check the server CA cert. http_init(&hi1, FALSE); http_init(&hi2, TRUE); /* url = "https://localhost:8080/upload"; sprintf(data, "--1234567890abcdef\r\n" "Content-Disposition: form-data; name=\"upload\"; filename=\"test.txt\"\r\n" "Content-Type: text/plain\r\n\r\n" "test message\r\n" "--1234567890abcdef--\r\n\r\n" ); ret = http_post(&hi1, url, data, response, sizeof(response)); printf("return code: %d \n", ret); printf("return body: %s \n", response); */ url = "https://localhost:8080/upload"; if(http_open(&hi1, url) < 0) { http_strerror(data, 1024); printf("socket error: %s \n", data); goto error; } snprintf(hi1.request.method, 8, "POST"); hi1.request.close = FALSE; hi1.request.chunked = FALSE; snprintf(hi1.request.content_type, 256, "multipart/form-data; boundary=1234567890abcdef"); size = sprintf(data, "--1234567890abcdef\r\n" "Content-Disposition: form-data; name=\"upload\"; filename=\"test.txt\"\r\n" "Content-Type: text/plain\r\n\r\n" "test message\r\n" "--1234567890abcdef--\r\n" ); hi1.request.content_length = size; if(http_write_header(&hi1) < 0) { http_strerror(data, 1024); printf("socket error: %s \n", data); goto error; } if(http_write(&hi1, data, size) != size) { http_strerror(data, 1024); printf("socket error: %s \n", data); goto error; } // Write end-chunked if(http_write_end(&hi1) < 0) { http_strerror(data, 1024); printf("socket error: %s \n", data); goto error; } ret = http_read_chunked(&hi1, response, sizeof(response)); printf("return code: %d \n", ret); printf("return body: %s \n", response); /* // Test a http get method. url = "http://httpbin.org/get?message=https_client"; ret = http_get(&hi1, url, response, sizeof(response)); printf("return code: %d \n", ret); printf("return body: %s \n", response); // Test a http post method. url = "http://httpbin.org/post"; sprintf(data, "{\"message\":\"Hello, https_client!\"}"); ret = http_post(&hi1, url, data, response, sizeof(response)); printf("return code: %d \n", ret); printf("return body: %s \n", response); // Test a https get method. url = "https://httpbin.org/get?message=https_client"; ret = http_get(&hi2, url, response, sizeof(response)); printf("return code: %d \n", ret); printf("return body: %s \n", response); // Test a https post method. url = "https://httpbin.org/post"; sprintf(data, "{\"message\":\"Hello, https_client!\"}"); ret = http_post(&hi2, url, data, response, sizeof(response)); printf("return code: %d \n", ret); printf("return body: %s \n", response); // Test a https post with the chunked-encoding data. url = "https://httpbin.org/post"; if(http_open_chunked(&hi2, url) == 0) { size = sprintf(data, "[{\"message\":\"Hello, https_client %d\"},", 0); if(http_write_chunked(&hi2, data, size) != size) { http_strerror(data, 1024); printf("socket error: %s \n", data); goto error; } for(i=1; i<4; i++) { size = sprintf(data, "{\"message\":\"Hello, https_client %d\"},", i); if(http_write_chunked(&hi2, data, size) != size) { http_strerror(data, 1024); printf("socket error: %s \n", data); goto error; } } size = sprintf(data, "{\"message\":\"Hello, https_client %d\"}]", i); if(http_write_chunked(&hi2, data, strlen(data)) != size) { http_strerror(data, 1024); printf("socket error: %s \n", data); goto error; } ret = http_read_chunked(&hi2, response, sizeof(response)); printf("return code: %d \n", ret); printf("return body: %s \n", response); } else { http_strerror(data, 1024); printf("socket error: %s \n", data); } error: */ error: http_close(&hi1); http_close(&hi2); return 0; }
// A download finished, find out what it was, whether there were any errors and // if so, how severe. If none, rename file and other such stuff. static qboolean finish_download(void) { int msgs_in_queue; CURLMsg *msg; CURLcode result; dlhandle_t *dl; CURL *curl; long response; double sec, bytes; char size[16], speed[16]; char temp[MAX_OSPATH]; qboolean fatal_error = qfalse; const char *err; print_type_t level; do { msg = curl_multi_info_read(curl_multi, &msgs_in_queue); if (!msg) break; if (msg->msg != CURLMSG_DONE) continue; curl = msg->easy_handle; dl = find_handle(curl); cls.download.current = NULL; cls.download.percent = 0; //filelist processing is done on read if (dl->file) { fclose(dl->file); dl->file = NULL; } curl_handles--; result = msg->data.result; switch (result) { //for some reason curl returns CURLE_OK for a 404... case CURLE_HTTP_RETURNED_ERROR: case CURLE_OK: curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &response); if (result == CURLE_OK && response == 200) { //success break; } err = http_strerror(response); //404 is non-fatal if (response == 404) { level = PRINT_ALL; goto fail1; } //every other code is treated as fatal //not marking download as done since //we are falling back to UDP level = PRINT_ERROR; fatal_error = qtrue; goto fail2; case CURLE_COULDNT_RESOLVE_HOST: case CURLE_COULDNT_CONNECT: case CURLE_COULDNT_RESOLVE_PROXY: //connection problems are fatal err = curl_easy_strerror(result); level = PRINT_ERROR; fatal_error = qtrue; goto fail2; default: err = curl_easy_strerror(result); level = PRINT_WARNING; fail1: //we mark download as done even if it errored //to prevent multiple attempts. CL_FinishDownload(dl->queue); fail2: Com_LPrintf(level, "[HTTP] %s [%s] [%d remaining file%s]\n", dl->queue->path, err, cls.download.pending, cls.download.pending == 1 ? "" : "s"); if (dl->path[0]) { remove(dl->path); dl->path[0] = 0; } if (dl->buffer) { Z_Free(dl->buffer); dl->buffer = NULL; } curl_multi_remove_handle(curl_multi, curl); continue; } //mark as done CL_FinishDownload(dl->queue); //show some stats curl_easy_getinfo(curl, CURLINFO_TOTAL_TIME, &sec); curl_easy_getinfo(curl, CURLINFO_SIZE_DOWNLOAD, &bytes); if (sec < 0.001) sec = 0.001; Com_FormatSizeLong(size, sizeof(size), bytes); Com_FormatSizeLong(speed, sizeof(speed), bytes / sec); //FIXME: //technically i shouldn't need to do this as curl will auto reuse the //existing handle when you change the url. however, the curl_handles goes //all weird when reusing a download slot in this way. if you can figure //out why, please let me know. curl_multi_remove_handle(curl_multi, curl); Com_Printf("[HTTP] %s [%s, %s/sec] [%d remaining file%s]\n", dl->queue->path, size, speed, cls.download.pending, cls.download.pending == 1 ? "" : "s"); if (dl->path[0]) { //rename the temp file Q_snprintf(temp, sizeof(temp), "%s/%s", fs_gamedir, dl->queue->path); if (rename(dl->path, temp)) Com_EPrintf("[HTTP] Failed to rename '%s' to '%s': %s\n", dl->path, dl->queue->path, strerror(errno)); dl->path[0] = 0; //a pak file is very special... if (dl->queue->type == DL_PAK) { CL_RestartFilesystem(qfalse); rescan_queue(); } } else if (!fatal_error) { parse_file_list(dl); } } while (msgs_in_queue > 0); //fatal error occured, disable HTTP if (fatal_error) { abort_downloads(); return qfalse; } // see if we have more to dl CL_RequestNextDownload(); return qtrue; }
int getAddress (char* url) { char *stream, *text, *textHighlight, *lenstr; char *tokens = (char*)malloc(DEF_BUFF_SIZE); int *positions;// array to record position of each token in the text cvector addressVector; Address *adr; long len; int MAXLEN = 1805; int EXTRA = 11; /* 4 for field name "data", 1 for "=" */ int MAXINPUT = MAXLEN+EXTRA+2; char input[MAXINPUT]; char* data = input, *p; int rightOrWrong = -1, numRight, numTotal; //char* domain_url; int i; http_setTimeout(8);//seconds //fetch web page int ret = httpFetch (url, &stream); if (ret == -1) { printf("%s\n",http_strerror()); exit(0); } //printf("ret: %d, strlen: %d\n",ret, strlen(stream)); assert(stream); text= (char*)malloc(ret+2); if (!text) { printf("out of memory when convert text to tokens!\n"); exit(0); } strncpy(text, stream, ret); //append a '\0' to the end of string to make sure it is end with two '\0' for flex to scan *(text+ret) = '\0'; *(text+ret+1) = '\0'; free(stream); /* convert text to tokens, remove tags and convert back to string: tokens and keep all positions in array "positions" */ convertToken2Text(text, tokens, &positions); // get base domain of given url //e.g. given http://www.google.com/address, return http://www.google.com to domain_url /* domain_url = (char*)malloc(strlen(url)+1); strcpy(domain_url, url); for (i=strlen(url); i>0; i--) { if (url[i] == '/') { if (url[i-1] == '/' ) // is "//" break; else // not "//" domain_url[i] = '\0'; } } printf ("<base href=\"%s%s\">\n", GEO_URL, domain_url); free(domain_url); */ VectorNew (&addressVector, sizeof (Address),free_address, DEF_ADDRESS_PER_PAGE); //extract address, //get position from positions vector //and save extracted address, position, country to addressVector extractAddress(tokens, positions, &addressVector); //display the parsed text //printf("tokens: %s\n",tokens); //printf("url: %s\n", url); //printf("domain_url: %s\n", domain_url); //output header printf("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-1\">\n"); printf("<table border=1 width=100%%><tr><td><table border=1 bgcolor=#ffffff cellpadding=10 cellspacing=0 width=100%% color=#ffffff><tr><td>\n"); printf("<font face=arial,sans-serif color=black size=-1>\n"); printf("<b><a href='%s'>US, UK & Canadian Addresses</a> extracted by <a href='%s'>Geo Extractor</a> from web page</b> <a href='%s'>%s</a></font><br><br>\n",LIST_FILES_URL, HOME_PAGE,url,url); //printf("%s,",textHighlight); //display extracted address //table header printf("<table width=100%% border=0 cellpadding=0 cellspacing=0><tr><td bgcolor=#3366cc><img width=1 height=1 alt=''></td></tr></table>\n"); printf("<table width=100%% border=0 cellpadding=0 cellspacing=0 bgcolor=#e5ecf9><tr><td width=10></td><td bgcolor=#e5ecf9 nowrap><br>\n"); printf("<font face=arial,sans-serif color=black size=-1><b>\n"); for (i=0; i<addressVector.ItemsCount; i++) { adr = (Address*)VectorNth(&addressVector,i); printf("%s<br>\n", adr->address); /*printf("%s, start: %d, end: %d<br>\n",adr->address, adr->start, adr->end); for (j=adr->start; j<=adr->end; j++) printf("%c",*(text+j)); printf("\n"); */ } printf("</b></font>\n"); printf("<br></td></tr></table>\n"); printf("<table width=100%% border=0 cellpadding=0 cellspacing=0><tr><td bgcolor=#3366cc><img width=1 height=1 alt=''></td></tr></table>\n"); textHighlight = (char*)malloc(DEF_BUFF_SIZE); numRight=numTotal =addressVector.ItemsCount; /* if there is a user post, we save the user input to get tagged data*/ lenstr = getenv("CONTENT_LENGTH"); if ( !(lenstr == NULL || sscanf(lenstr,"%ld",&len)!=1 || len > MAXLEN) ) { tagAddress(text, textHighlight, &addressVector); fgets(input, len+1, stdin); URLdecode(input); data = input+EXTRA; //printf("posted: %s\n",data); len = strlen("right"); if ( strncmp(data, "right", len)==0 ) { rightOrWrong = 0; //set flag for right or wrong extraction } len = strlen("wrong"); if ( strncmp(data, "wrong", len)==0 ) { rightOrWrong = 1; //user input "Wrong Extraction" } // get user input: numRight, which is number of correct extracted address data += strlen("right") + strlen("&numRight="); p = data; while (*data++ !='&'); *data= '\0'; numRight = atoi(p); //printf("numRight: %d\n", numRight); // get user input numTotal, which is number of total address in the page p = data+strlen("numTotal="); numTotal = atoi(p); //printf("numTotal: %d\n", numTotal); if (rightOrWrong == 0) { //printf("webpage saved to RIGHT folder\n"); saveTaggedText(url, text, textHighlight, rightOrWrong, numRight, addressVector.ItemsCount, numTotal); } if ( rightOrWrong == 1 ) { //printf("webpage saved to WRONG folder\n"); saveTaggedText(url, text, textHighlight, rightOrWrong, numRight, addressVector.ItemsCount, numTotal); } //printf("tagged text: %s\n", textHighlight); } // give source text, and addressVector //highlight all extracted address in the webpage getHighlight(text, textHighlight, &addressVector); /* if there is at least one address extracted, show user input to let user judge where extraction is correct*/ if ( SHOW_COLLECT_DATA_INTERFACE) { printf("<FORM ACTION=\"%s%s\" METHOD=\"POST\">\n", GEO_URL, url); printf("<font face=arial,sans-serif color=black size=-1>\n"); printf("<P><input name=\"extraction\" type=\"radio\" value=\"right\" "); if ((rightOrWrong == 0)||(rightOrWrong == -1)) //if no user input or user input: extracted address all correct printf("checked"); printf("> All address extracted correctly<br>\n"); printf("<input name=\"extraction\" type=\"radio\" value=\"wrong\" "); if (rightOrWrong == 1) //user input: extracted address all correct printf("checked"); printf("> Not all addresses extracted correctly. \n"); printf("<input type=\"text\" name=\"numRight\" size=\"4\" value=\"%d\"> addresses extracted correctly from total <input type=\"text\" name=\"numTotal\" size=\"4\" value=\"%d\"> addresses<BR>\n", numRight, numTotal); printf("<INPUT TYPE=\"SUBMIT\" VALUE=\"Save Webpage\"></font></FORM>\n"); //show google search printf("<SCRIPT language=\"JavaScript\">function OnSubmitForm(){ document.g.action =\"%shttp://www.google.com/search?num=100&q=\"+document.g.q.value.replace(\" \",\"%%2B\");}</SCRIPT>\n", GEO_URL); printf("<table border=0 align=right><tr><td>\n"); printf("<form action=\"\" method=\"post\" name=\"g\" onSubmit=\"return OnSubmitForm();\">\n"); printf("<input size=\"32\" name=\"q\">\n"); printf("<INPUT TYPE=\"SUBMIT\" name=\"Submit\" VALUE=\"Google\"></form>\n"); printf("</td></tr></table>\n"); } printf("</td></tr></table></td></tr></table>\n"); //extract address from original html text // extract_address(text); // printf("Original <hr>%s",text); printf("<hr>\n"); //printf("%s",textHighlight); displayHtmlAbsoluteURL(textHighlight, url); VectorDispose(&addressVector); free (positions); free (text); free (tokens); return 0; }