void beforeSend(struct requestValues* reqVal, struct responseValues* respVal, char* path) { int isGET = 0; int isMethodAllowed = 1; char url[URL_LEN + 1] = {}; if (reqVal->request[0] == 'G') { isGET = 1; } else if (reqVal->request[0] != 'H') { isMethodAllowed = 0; } findUrl((const char*)reqVal->request, url); char decUrl[URL_LEN + 1] = {}; decodeUrl(decUrl, (const char*)url); int len = findGetParams(decUrl); respVal->contType[0] = '\0'; int isIndex = 0; if ((isIndex = getContentType((const char*)decUrl, respVal->contType)) == 1) { //4 if (len > 0 && (decUrl[len - 1] != '/')) { decUrl[len] = '/'; decUrl[len + 1] = '\0'; } addSymbs(decUrl, "index.html"); } addSymbs(path, DOCUMENT_ROOT); addSymbs(path, (const char*)decUrl); respVal->isFound = 0; //1 respVal->isAccessDenied = 0; //2 respVal->contLen = 0; //3 struct stat stat_buf; if (stat((const char*)path, &stat_buf) == 0 && findValue((const char*)path, "../", NULL) == 0) { respVal->isFound = 1; if (isMethodAllowed == 1) { respVal->contLen = stat_buf.st_size; } else { respVal->isAccessDenied = 1; } } else if (isIndex == 1) { respVal->isAccessDenied = 1; } if (respVal->isFound == 0 || respVal->isAccessDenied == 1 || isGET == 0) { path[0] = '\0'; } }
std::string prettify(std::string url){ // Finds the name by getting the name after // the last "/" and the before the first "?" // after that size_t last_slash = url.find_last_of('/')+1; size_t first_ques= url.find('?',last_slash)-1; if( first_ques == std::string::npos) first_ques = url.length(); return decodeUrl(url.substr(last_slash,first_ques-last_slash+1)); }
bool HttpParserImpl::parseStartLine(const char* line, const char* line_end) { const char* p_line = line; const char* p_end = line_end; if(!str_buf_.empty()) { str_buf_.append(line, line_end); p_line = str_buf_.c_str(); p_end = p_line + str_buf_.length(); } std::string str; const char* p = std::find(p_line, p_end, ' '); if(p != p_end) { str.assign(p_line, p); p_line = p + 1; } else { return false; } is_request_ = !is_equal(str, "HTTP", 4); if(is_request_) {// request method_.swap(str); p = std::find(p_line, p_end, ' '); if(p != p_end) { url_.assign(p_line, p); p_line = p + 1; } else { return false; } version_.assign(p_line, p_end); decodeUrl(); parseUrl(); } else {// response version_.swap(str); p = std::find(p_line, p_end, ' '); str.assign(p_line, p); status_code_ = atoi(str.c_str()); } clearBuffer(); return true; }
int getSite(char* site, double* ping, int print) { //---- check command line arguments ---- char protocol[0x100]; char hostname[0x100]; int port; char resource[0x100]; int pageSize = -1; if((!site)||(decodeUrl(site,protocol,hostname,&port,resource)==-1)) { fprintf(stderr,"bad argument\n"); return -1; } if(print>0){ fprintf(stderr,"url=%s\n",site); fprintf(stderr,"protocol=%s\n",protocol); fprintf(stderr,"hostname=%s\n",hostname); fprintf(stderr,"port=%d\n",port); fprintf(stderr,"resource=%s\n",resource); } //---- detect proxy ---- const char *connectHost=hostname; int connectPort=port; char proxyHost[0x100]; int proxyPort; if(getHttpProxy(proxyHost,&proxyPort)!=-1) { if(print>0){ fprintf(stderr,"proxyHost=%s\n",proxyHost); fprintf(stderr,"proxyPort=%d\n",proxyPort); } connectHost=proxyHost; connectPort=proxyPort; } int r=-1; char buffer[0x100]; (void)connectHost; // avoid ``unused variable'' warning (void)connectPort; // avoid ``unused variable'' warning (void)r; // avoid ``unused variable'' warning (void)buffer; // avoid ``unused variable'' warning //---- extract destination IP address ---- struct hostent *host=gethostbyname(connectHost); if(!host) { if(print>0)fprintf(stderr,"unknown host %s\n",connectHost); return -1; } in_addr_t ipAddress=*((in_addr_t *)(host->h_addr)); //---- create client socket ---- int clientSocket=socket(PF_INET,SOCK_STREAM,0); if(clientSocket==-1) { perror("socket"); return -1; } // ... connected to the specified destination/port struct sockaddr_in toAddr; toAddr.sin_family=AF_INET; toAddr.sin_port=htons(connectPort); toAddr.sin_addr.s_addr=ipAddress; if(connect(clientSocket,(struct sockaddr *)&toAddr,sizeof(toAddr))==-1) { perror("connect"); return -1; } if(!strcmp(protocol,"http")) //---- handle HTTP protocol ---- { //---- send HTTP request ---- r=sprintf(buffer,"GET %s HTTP/1.1\n" "Host: %s:%d\n" "Connection: close\n" "\n", (proxyPort==-1 ? resource : site),hostname,port); double startTime = getTime(); sendAll(clientSocket,buffer,r); //---- receive HTTP reply header ---- int i=0; for(;;) { r=recvLine(clientSocket,buffer,0x100); if(i==0){ *ping = (double)(getTime() - startTime); i++; } if(r==-1) { perror("recvLine"); return -1; } if(print>0){fprintf(stderr,"header: %s",buffer);} if(!strcmp(buffer,"\n")||!strcmp(buffer,"\r\n")) { break; } } //---- receive HTTP reply content ---- for(;;) { r=recv(clientSocket,buffer,0x100,0); pageSize+=r; if(r<=0) { break; } if(print>0)fwrite(buffer,r,1,stdout); } } else if(!strcmp(protocol,"https")) //---- handle HTTPS protocol ---- { //---- initialise SSL context ---- SSL_CTX *ctx; SSL_library_init(); SSL_load_error_strings(); OpenSSL_add_all_algorithms(); ctx=SSL_CTX_new(SSLv23_method()); if(!SSL_CTX_load_verify_locations(ctx,"cacert.pem",NULL)) { fprintf(stderr,"SSL_CTX_load_verify_locations: %s\n", ERR_error_string(ERR_get_error(),NULL)); return -1; } if(proxyPort!=-1) { //---- ask the proxy a connection to the server ---- r=sprintf(buffer,"CONNECT %s:%d HTTP/1.1\n" "Host: %s:%d\n" "\n", hostname,port,hostname,port); sendAll(clientSocket,buffer,r); for(;;) { r=recvLine(clientSocket,buffer,0x100); if(r==-1) { perror("recvLine"); return -1; } fprintf(stderr,"connect header: %s",buffer); if(!strcmp(buffer,"\n")||!strcmp(buffer,"\r\n")) { break; } } } //---- initialise SSL connection over the TCP connection ---- SSL *ssl=SSL_new(ctx); SSL_set_mode(ssl,SSL_MODE_AUTO_RETRY); SSL_set_fd(ssl,clientSocket); r=SSL_connect(ssl); if(r!=1) { fprintf(stderr,"SSL_connect: %s\n", ERR_error_string(ERR_get_error(),NULL)); return -1; } //---- warn if untrusted certificate or bad common-name ---- r=SSL_get_verify_result(ssl); if(r!=X509_V_OK) { fprintf(stderr,"!!! Warning !!! Certificate not trusted\n"); } X509 *cert=SSL_get_peer_certificate(ssl); if(!cert) { fprintf(stderr,"SSL_get_peer_certificate: %s\n", ERR_error_string(ERR_get_error(),NULL)); } else { char commonName[0x100]=""; X509_NAME_get_text_by_NID(X509_get_subject_name(cert),NID_commonName, commonName,0x100); if(strcmp(commonName,hostname)) { fprintf(stderr,"!!! Warning !!! Common name `%s' != host name `%s'\n", commonName,hostname); } X509_free(cert); } //---- send HTTP request ---- r=sprintf(buffer,"GET %s HTTP/1.1\n" "Host: %s:%d\n" "Connection: close\n" "\n", resource,hostname,port); sslSendAll(ssl,buffer,r); //---- receive HTTP reply header ---- for(;;) { r=sslRecvLine(ssl,buffer,0x100); if(r==-1) { perror("sslRecvLine"); return -1; } fprintf(stderr,"header: %s",buffer); if(!strcmp(buffer,"\n")||!strcmp(buffer,"\r\n")) { break; } } //---- receive HTTP reply content ---- for(;;) { r=SSL_read(ssl,buffer,0x100); pageSize+=r; if(r<=0) { break; } fwrite(buffer,r,1,stdout); } //---- close SSL resources --- SSL_free(ssl); SSL_CTX_free(ctx); } else { fprintf(stderr,"unsupported protocol %s\n",protocol); return -1; } //---- close client socket ---- if(close(clientSocket)==-1) { perror("close"); return -1; } return ++pageSize; }
Article::Article(const std::string& path, const bool detectRedirects) { invalid = false; /* aid */ aid = path.substr(directoryPath.size()+1); /* url */ url = aid; /* mime-type */ mimeType = getMimeTypeForFile(aid); /* namespace */ ns = getNamespaceForMimeType(mimeType)[0]; /* HTML specific code */ if (mimeType.find("text/html") != std::string::npos) { std::size_t found; std::string html = getFileContent(path); GumboOutput* output = gumbo_parse(html.c_str()); GumboNode* root = output->root; /* Search the content of the <title> tag in the HTML */ if (root->type == GUMBO_NODE_ELEMENT && root->v.element.children.length >= 2) { const GumboVector* root_children = &root->v.element.children; GumboNode* head = NULL; for (int i = 0; i < root_children->length; ++i) { GumboNode* child = (GumboNode*)(root_children->data[i]); if (child->type == GUMBO_NODE_ELEMENT && child->v.element.tag == GUMBO_TAG_HEAD) { head = child; break; } } if (head != NULL) { GumboVector* head_children = &head->v.element.children; for (int i = 0; i < head_children->length; ++i) { GumboNode* child = (GumboNode*)(head_children->data[i]); if (child->type == GUMBO_NODE_ELEMENT && child->v.element.tag == GUMBO_TAG_TITLE) { if (child->v.element.children.length == 1) { GumboNode* title_text = (GumboNode*)(child->v.element.children.data[0]); if (title_text->type == GUMBO_NODE_TEXT) { title = title_text->v.text.text; stripTitleInvalidChars(title); } } } } /* Detect if this is a redirection (if no redirects CSV specified) */ std::string targetUrl; try { targetUrl = detectRedirects ? extractRedirectUrlFromHtml(head_children) : ""; } catch (std::string &error) { std::cerr << error << std::endl; } if (!targetUrl.empty()) { redirectAid = computeAbsolutePath(aid, decodeUrl(targetUrl)); if (!fileExists(directoryPath + "/" + redirectAid)) { redirectAid.clear(); invalid = true; } } } /* If no title, then compute one from the filename */ if (title.empty()) { found = path.rfind("/"); if (found != std::string::npos) { title = path.substr(found+1); found = title.rfind("."); if (found!=std::string::npos) { title = title.substr(0, found); } } else { title = path; } std::replace(title.begin(), title.end(), '_', ' '); } } gumbo_destroy_output(&kGumboDefaultOptions, output); } }
bool cYTFeedParser::decodeVideoInfo(std::string &answer, cYTVideoInfo &vinfo) { bool ret = false; decodeUrl(answer); #if 0 std::string infofile = thumbnail_dir; infofile += "/"; infofile += vinfo.id; infofile += ".txt"; saveToFile(infofile.c_str(), answer); #endif if(answer.find("token=") == std::string::npos) return ret; //FIXME check expire std::vector<std::string> ulist; std::string::size_type fmt = answer.find("url_encoded_fmt_stream_map="); if (fmt != std::string::npos) { fmt = answer.find("=", fmt); splitString(answer, ",", ulist, fmt+1); for (unsigned i = 0; i < ulist.size(); i++) { #if 0 // to decode all params decodeUrl(ulist[i]); printf("URL: %s\n", ulist[i].c_str()); #endif std::map<std::string,std::string> smap; std::vector<std::string> uparams; splitString(ulist[i], "&", uparams); if (uparams.size() < 3) continue; for (unsigned j = 0; j < uparams.size(); j++) { decodeUrl(uparams[j]); #ifdef DEBUG_PARSER printf(" param: %s\n", uparams[j].c_str()); #endif splitString(uparams[j], "=", smap); } #ifdef DEBUG_PARSER printf("=========================================================\n"); #endif cYTVideoUrl yurl; yurl.url = smap["url"]; std::string::size_type ptr = smap["url"].find("signature="); if (ptr != std::string::npos) { ptr = smap["url"].find("=", ptr); smap["url"].erase(0,ptr+1); if((ptr = smap["url"].find("&")) != std::string::npos) yurl.sig = smap["url"].substr(0,ptr); } int id = atoi(smap["itag"].c_str()); if (supportedFormat(id) && !yurl.url.empty() && !yurl.sig.empty()) { yurl.quality = smap["quality"]; yurl.type = smap["type"]; vinfo.formats.insert(yt_urlmap_pair_t(id, yurl)); ret = true; } } } return ret; }
int main (int argc, char **argv) { if (argc < 2) { fprintf(stderr, "usage: webclient URL\n"); return EXIT_FAILURE; } char msg[BUFFER_LEN]; int vsocket; tUrl url = decodeUrl(argv[1]); if (url.status != SUCCESS) { fprintf(stderr, "Wrong format of URL.\n"); return EXIT_FAILURE; } char filename[BUFFER_LEN]; char filechar[BUFFER_LEN]; exctractFileName(url.path, filechar); strcpy(filename, filechar); codeUnsafeChar(url.domain); codeUnsafeChar(url.path); createRequest(msg, url); if (connectToServer(url, &vsocket) == FAILED) { fprintf(stderr, "Failed: connect to server\n"); return EXIT_FAILURE; } if (write(vsocket, msg, strlen(msg)) == FAILED) { fprintf(stderr, "Failed: write request\n"); return EXIT_FAILURE; } char answer[BUFFER_LEN]; if (readHeader(vsocket, answer) == FAILED) { fprintf(stderr, "Failed: read header\n"); return EXIT_FAILURE; } int enconding = selectEnconding(answer); tConn connection; connection = selectStatus(answer); if (connection.status >= 400) { fprintf(stderr, "%d\n",connection.status); return EXIT_FAILURE; } char msg_redir[BUFFER_LEN]; int connect_try = 1; tUrl url_redir; // Redirection while (connection.status == 301 || connection.status == 302) { if (selectLocation(answer, msg_redir) == FAILED) { fprintf(stderr, "Failed: redirection\n"); return EXIT_FAILURE; } url_redir = decodeUrl(msg_redir); if (url_redir.status != SUCCESS) { fprintf(stderr, "Failed: decode URL (redirection %d)\n",connect_try); return EXIT_FAILURE; } exctractFileName(url_redir.path, filename); codeUnsafeChar(url_redir.domain); codeUnsafeChar(url_redir.path); createRequest(msg_redir, url_redir); if (connectToServer(url_redir, &vsocket) == FAILED) { fprintf(stderr, "Failed: connect to server\n"); return EXIT_FAILURE; } if (write(vsocket, msg_redir, strlen(msg_redir)) == FAILED) { fprintf(stderr, "Failed: write request (redirection %d)\n",connect_try); return EXIT_FAILURE; } if (readHeader(vsocket, answer) == FAILED) { fprintf(stderr, "Failed: read header\n"); return EXIT_FAILURE; } enconding = selectEnconding(answer); connection = selectStatus(answer); if (connection.status == FAILED) { fprintf(stderr, "%s\n", connection.message); return EXIT_FAILURE; } if (connection.status >= 400) { fprintf(stderr, "%d\n",connection.status); return EXIT_FAILURE; } connect_try++; if (connect_try > 5) { fprintf(stderr, "Multiple (5) redirection."); return EXIT_FAILURE; } } FILE *fw; remove(filename); if ((fw = fopen(filename,"a")) == NULL) { fprintf(stderr, "Failed: open dest file\n"); return EXIT_FAILURE; } // Non-chunked coding if (enconding != CHUNKED) { char ch; while (read(vsocket, &ch, sizeof(char)) > 0) fprintf(fw,"%c",ch); fclose(fw); return SUCCESS; } // Chunked coding char chr1 = '#'; char chr2 = '#'; int i = 0; int chunk = 0; char ch[BUFFER_LEN]; while (read(vsocket, &chr2, sizeof(char)) > 0) { ch[i] = chr2; i++; if (chr1 == '\r' && chr2 == '\n') { ch[i-2] = '\0'; chunk = (int) strtol(ch, NULL, 16); i = 0; break; } chr1 = chr2; } int w = chunk; char p; while (w > 0) { if (read(vsocket, &p, sizeof(char)) == FAILED) { fprintf(stderr, "Failed: read data\n"); return EXIT_FAILURE; } w--; fprintf(fw, "%c", p); } char ch1 = '#', ch2 = '#'; chunk = -1; char chr[BUFFER_LEN]; int n = 0; while (chunk != 0) { if (read(vsocket, &ch2, sizeof(char)) == FAILED) { fprintf(stderr, "Failed: read chunk or \\r\\n\n"); return EXIT_FAILURE; } if (ch1 == '\r' && ch2 == '\n') { while (ch2 != '\r') { if (read(vsocket, &ch2, sizeof(char)) == FAILED) { fprintf(stderr, "Failed: read chunk\n"); return EXIT_FAILURE; } chr[i] = ch2; i++; } ch1 = ch2; chr[i] = '\0'; i = 0; if (read(vsocket, &ch2, sizeof(char)) == FAILED) { fprintf(stderr, "Failed: read chunked data\n"); return EXIT_FAILURE; } if (ch1 == '\r' && ch2 == '\n') { chunk = (int) strtol(chr, NULL, 16); if (chunk == 0) break; n = chunk; while (n > 0) { if (read(vsocket, &p, sizeof(char)) == FAILED) { fprintf(stderr, "Failed: read chunked data\n"); return EXIT_FAILURE; } n--; fprintf(fw, "%c", p); } ch2 = '#'; } } ch1 = ch2; } fclose(fw); return EXIT_SUCCESS; }