示例#1
0
文件: sending.c 项目: svr93/highload
void beforeSend(struct requestValues* reqVal, 
struct responseValues* respVal, char* path) {
    
    int isGET = 0;
    int isMethodAllowed = 1;
    char url[URL_LEN + 1] = {};
    
    if (reqVal->request[0] == 'G') {
        isGET = 1;
    } else if (reqVal->request[0] != 'H') {
        isMethodAllowed = 0;
    }
    
    findUrl((const char*)reqVal->request, url);
      
    char decUrl[URL_LEN + 1] = {};
    decodeUrl(decUrl, (const char*)url);
    
    int len = findGetParams(decUrl);
    
    respVal->contType[0] = '\0';
    int isIndex = 0;
    
    if ((isIndex = getContentType((const char*)decUrl, 
    respVal->contType)) == 1) { //4
        
        if (len > 0 && (decUrl[len - 1] != '/')) {
            decUrl[len] = '/';
            decUrl[len + 1] = '\0';
        }
        addSymbs(decUrl, "index.html");
    }
    
    addSymbs(path, DOCUMENT_ROOT);
    addSymbs(path, (const char*)decUrl);
    
    respVal->isFound = 0; //1
    respVal->isAccessDenied = 0; //2
    respVal->contLen = 0; //3
    
    struct stat stat_buf;

    if (stat((const char*)path, &stat_buf) == 0 &&
    findValue((const char*)path, "../", NULL) == 0) {
        
        respVal->isFound = 1;
        if (isMethodAllowed == 1) {
            respVal->contLen = stat_buf.st_size;
        } else {
            respVal->isAccessDenied = 1;
        }
    } else if (isIndex == 1) {
        respVal->isAccessDenied = 1;
    }
    
    if (respVal->isFound == 0 || respVal->isAccessDenied == 1 ||
    isGET == 0) {
        path[0] = '\0';
    }
}
std::string prettify(std::string url){
    // Finds the name by getting the name after
    // the last "/" and the before the first "?"
    // after that
    size_t last_slash = url.find_last_of('/')+1;
    size_t first_ques= url.find('?',last_slash)-1;
    if( first_ques == std::string::npos)
        first_ques = url.length();
    return decodeUrl(url.substr(last_slash,first_ques-last_slash+1));
}
示例#3
0
bool HttpParserImpl::parseStartLine(const char* line, const char* line_end)
{
    const char* p_line = line;
    const char* p_end = line_end;
    if(!str_buf_.empty()) {
        str_buf_.append(line, line_end);
        p_line = str_buf_.c_str();
        p_end = p_line + str_buf_.length();
    }
    std::string str;
    const char* p = std::find(p_line, p_end, ' ');
    if(p != p_end) {
        str.assign(p_line, p);
        p_line = p + 1;
    } else {
        return false;
    }
    is_request_ = !is_equal(str, "HTTP", 4);
    if(is_request_) {// request
        method_.swap(str);
        p = std::find(p_line, p_end, ' ');
        if(p != p_end) {
            url_.assign(p_line, p);
            p_line = p + 1;
        } else {
            return false;
        }
        version_.assign(p_line, p_end);
        decodeUrl();
        parseUrl();
    } else {// response
        version_.swap(str);
        p = std::find(p_line, p_end, ' ');
        str.assign(p_line, p);
        status_code_ = atoi(str.c_str());
    }
    clearBuffer();
    return true;
}
示例#4
0
int getSite(char* site, double* ping, int print)
{
//---- check command line arguments ----
char protocol[0x100];
char hostname[0x100];
int port;
char resource[0x100];
int pageSize = -1;
if((!site)||(decodeUrl(site,protocol,hostname,&port,resource)==-1))
  { fprintf(stderr,"bad argument\n"); return -1; }
if(print>0){
  fprintf(stderr,"url=%s\n",site);
  fprintf(stderr,"protocol=%s\n",protocol);
  fprintf(stderr,"hostname=%s\n",hostname);
  fprintf(stderr,"port=%d\n",port);
  fprintf(stderr,"resource=%s\n",resource);
}

//---- detect proxy ----
const char *connectHost=hostname;
int connectPort=port;
char proxyHost[0x100];
int proxyPort;
if(getHttpProxy(proxyHost,&proxyPort)!=-1)
  {
    if(print>0){
      fprintf(stderr,"proxyHost=%s\n",proxyHost);
      fprintf(stderr,"proxyPort=%d\n",proxyPort);
    } 
  connectHost=proxyHost;
  connectPort=proxyPort;
  }

int r=-1;
char buffer[0x100];

(void)connectHost; // avoid ``unused variable'' warning
(void)connectPort; // avoid ``unused variable'' warning
(void)r; // avoid ``unused variable'' warning
(void)buffer; // avoid ``unused variable'' warning



//---- extract destination IP address ----
struct hostent *host=gethostbyname(connectHost);
if(!host)
  { if(print>0)fprintf(stderr,"unknown host %s\n",connectHost); return -1; }
in_addr_t ipAddress=*((in_addr_t *)(host->h_addr));
//---- create client socket ----
int clientSocket=socket(PF_INET,SOCK_STREAM,0);
if(clientSocket==-1)
  { perror("socket"); return -1; }
// ... connected to the specified destination/port
struct sockaddr_in toAddr;
toAddr.sin_family=AF_INET;
toAddr.sin_port=htons(connectPort);
toAddr.sin_addr.s_addr=ipAddress;
if(connect(clientSocket,(struct sockaddr *)&toAddr,sizeof(toAddr))==-1)
  { perror("connect"); return -1; }

if(!strcmp(protocol,"http")) //---- handle HTTP protocol ----
  {
  //---- send HTTP request ----
    
  r=sprintf(buffer,"GET %s HTTP/1.1\n"
                   "Host: %s:%d\n"
                   "Connection: close\n"
                   "\n",
                   (proxyPort==-1 ? resource : site),hostname,port);
  double startTime = getTime();
  sendAll(clientSocket,buffer,r);

  //---- receive HTTP reply header ----
  int i=0;
  for(;;)
    {
    r=recvLine(clientSocket,buffer,0x100);
    if(i==0){
      *ping = (double)(getTime() - startTime);
      i++;
    }
    if(r==-1)
      { perror("recvLine"); return -1; }
    if(print>0){fprintf(stderr,"header: %s",buffer);}
    if(!strcmp(buffer,"\n")||!strcmp(buffer,"\r\n")) { break; }
    }

    
  //---- receive HTTP reply content ----

  for(;;)
    {
    r=recv(clientSocket,buffer,0x100,0);
    pageSize+=r;
    if(r<=0)
      { break; }
    if(print>0)fwrite(buffer,r,1,stdout);
    }

  }
else if(!strcmp(protocol,"https")) //---- handle HTTPS protocol ----
  {
  //---- initialise SSL context ----

  SSL_CTX *ctx;
  SSL_library_init();
  SSL_load_error_strings();
  OpenSSL_add_all_algorithms();
  ctx=SSL_CTX_new(SSLv23_method());
  if(!SSL_CTX_load_verify_locations(ctx,"cacert.pem",NULL))
    {
    fprintf(stderr,"SSL_CTX_load_verify_locations: %s\n",
                   ERR_error_string(ERR_get_error(),NULL));
    return -1;
    }

  if(proxyPort!=-1)
    {
    //---- ask the proxy a connection to the server ----

    r=sprintf(buffer,"CONNECT %s:%d HTTP/1.1\n"
                     "Host: %s:%d\n"
                     "\n",
                     hostname,port,hostname,port);
    sendAll(clientSocket,buffer,r);
    for(;;)
      {
      r=recvLine(clientSocket,buffer,0x100);
      if(r==-1)
        { perror("recvLine"); return -1; }
      fprintf(stderr,"connect header: %s",buffer);
      if(!strcmp(buffer,"\n")||!strcmp(buffer,"\r\n")) { break; }
      }

    }

  //---- initialise SSL connection over the TCP connection ----

  SSL *ssl=SSL_new(ctx);
  SSL_set_mode(ssl,SSL_MODE_AUTO_RETRY);
  SSL_set_fd(ssl,clientSocket);
  r=SSL_connect(ssl);
  if(r!=1)
    {
    fprintf(stderr,"SSL_connect: %s\n",
                   ERR_error_string(ERR_get_error(),NULL));
    return -1;
    }
  //---- warn if untrusted certificate or bad common-name ----
  r=SSL_get_verify_result(ssl);
  if(r!=X509_V_OK)
    {
    fprintf(stderr,"!!! Warning !!! Certificate not trusted\n");
    }
  X509 *cert=SSL_get_peer_certificate(ssl);
  if(!cert)
    {
    fprintf(stderr,"SSL_get_peer_certificate: %s\n",
                   ERR_error_string(ERR_get_error(),NULL));
    }
  else
    {
    char commonName[0x100]="";
    X509_NAME_get_text_by_NID(X509_get_subject_name(cert),NID_commonName,
                              commonName,0x100);
    if(strcmp(commonName,hostname))
      {
      fprintf(stderr,"!!! Warning !!! Common name `%s' != host name `%s'\n",
                     commonName,hostname);
      }
    X509_free(cert);
    }
 
  //---- send HTTP request ----

  r=sprintf(buffer,"GET %s HTTP/1.1\n"
                   "Host: %s:%d\n"
                   "Connection: close\n"
                   "\n",
                   resource,hostname,port);
  sslSendAll(ssl,buffer,r);

  //---- receive HTTP reply header ----

  for(;;)
    {
    r=sslRecvLine(ssl,buffer,0x100);
    if(r==-1)
      { perror("sslRecvLine"); return -1; }
    fprintf(stderr,"header: %s",buffer);
    if(!strcmp(buffer,"\n")||!strcmp(buffer,"\r\n")) { break; }
    }

  //---- receive HTTP reply content ----

  for(;;)
    {
    r=SSL_read(ssl,buffer,0x100);
    pageSize+=r;
    if(r<=0)
      { break; }
    fwrite(buffer,r,1,stdout);
    }
  
  //---- close SSL resources ---
 
  SSL_free(ssl);
  SSL_CTX_free(ctx);

  }
else
  { fprintf(stderr,"unsupported protocol %s\n",protocol); return -1; }

//---- close client socket ----

if(close(clientSocket)==-1)
  { perror("close"); return -1; }

return ++pageSize;
}
示例#5
0
文件: article.cpp 项目: zjzdy/openzim
Article::Article(const std::string& path, const bool detectRedirects) {
  invalid = false;

  /* aid */
  aid = path.substr(directoryPath.size()+1);

  /* url */
  url = aid;

  /* mime-type */
  mimeType = getMimeTypeForFile(aid);
  
  /* namespace */
  ns = getNamespaceForMimeType(mimeType)[0];

  /* HTML specific code */
  if (mimeType.find("text/html") != std::string::npos) {
    std::size_t found;
    std::string html = getFileContent(path);
    GumboOutput* output = gumbo_parse(html.c_str());
    GumboNode* root = output->root;

    /* Search the content of the <title> tag in the HTML */
    if (root->type == GUMBO_NODE_ELEMENT && root->v.element.children.length >= 2) {
      const GumboVector* root_children = &root->v.element.children;
      GumboNode* head = NULL;
      for (int i = 0; i < root_children->length; ++i) {
	GumboNode* child = (GumboNode*)(root_children->data[i]);
	if (child->type == GUMBO_NODE_ELEMENT &&
	    child->v.element.tag == GUMBO_TAG_HEAD) {
	  head = child;
	  break;
	}
      }

      if (head != NULL) {
	GumboVector* head_children = &head->v.element.children;
	for (int i = 0; i < head_children->length; ++i) {
	  GumboNode* child = (GumboNode*)(head_children->data[i]);
	  if (child->type == GUMBO_NODE_ELEMENT &&
	      child->v.element.tag == GUMBO_TAG_TITLE) {
	    if (child->v.element.children.length == 1) {
	      GumboNode* title_text = (GumboNode*)(child->v.element.children.data[0]);
	      if (title_text->type == GUMBO_NODE_TEXT) {
		title = title_text->v.text.text;
		stripTitleInvalidChars(title);
	      }
	    }
	  }
	}

	/* Detect if this is a redirection (if no redirects CSV specified) */
	std::string targetUrl;
	try {
	  targetUrl = detectRedirects ? extractRedirectUrlFromHtml(head_children) : "";
	} catch (std::string &error) {
	  std::cerr << error << std::endl;
	}
	if (!targetUrl.empty()) {
	  redirectAid = computeAbsolutePath(aid, decodeUrl(targetUrl));
	  if (!fileExists(directoryPath + "/" + redirectAid)) {
	    redirectAid.clear();
	    invalid = true;
	  }
	}
      }

      /* If no title, then compute one from the filename */
      if (title.empty()) {
	found = path.rfind("/");
	if (found != std::string::npos) {
	  title = path.substr(found+1);
	  found = title.rfind(".");
	  if (found!=std::string::npos) {
	    title = title.substr(0, found);
	  }
	} else {
	  title = path;
	}
	std::replace(title.begin(), title.end(), '_',  ' ');
      }
    }

    gumbo_destroy_output(&kGumboDefaultOptions, output);
  }
}
bool cYTFeedParser::decodeVideoInfo(std::string &answer, cYTVideoInfo &vinfo)
{
	bool ret = false;
	decodeUrl(answer);
#if 0
	std::string infofile = thumbnail_dir;
	infofile += "/";
	infofile += vinfo.id;
	infofile += ".txt";
	saveToFile(infofile.c_str(), answer);
#endif
	if(answer.find("token=") == std::string::npos)
		return ret;

	//FIXME check expire
	std::vector<std::string> ulist;
	std::string::size_type fmt = answer.find("url_encoded_fmt_stream_map=");
	if (fmt != std::string::npos) {
		fmt = answer.find("=", fmt);
		splitString(answer, ",", ulist, fmt+1);
		for (unsigned i = 0; i < ulist.size(); i++) {
#if 0 // to decode all params
			decodeUrl(ulist[i]);
			printf("URL: %s\n", ulist[i].c_str());
#endif
			std::map<std::string,std::string> smap;
			std::vector<std::string> uparams;
			splitString(ulist[i], "&", uparams);
			if (uparams.size() < 3)
				continue;
			for (unsigned j = 0; j < uparams.size(); j++) {
				decodeUrl(uparams[j]);
#ifdef DEBUG_PARSER
				printf("	param: %s\n", uparams[j].c_str());
#endif
				splitString(uparams[j], "=", smap);
			}
#ifdef DEBUG_PARSER
			printf("=========================================================\n");
#endif
			cYTVideoUrl yurl;
			yurl.url = smap["url"];

			std::string::size_type ptr = smap["url"].find("signature=");
			if (ptr != std::string::npos)
			{
				ptr = smap["url"].find("=", ptr);
				smap["url"].erase(0,ptr+1);

				if((ptr = smap["url"].find("&")) != std::string::npos)
					yurl.sig = smap["url"].substr(0,ptr);
			}

			int id = atoi(smap["itag"].c_str());
			if (supportedFormat(id) && !yurl.url.empty() && !yurl.sig.empty()) {
				yurl.quality = smap["quality"];
				yurl.type = smap["type"];
				vinfo.formats.insert(yt_urlmap_pair_t(id, yurl));
				ret = true;
			}
		}
	}
	return ret;
}
示例#7
0
int main (int argc, char **argv)
{
  if (argc < 2) {
    fprintf(stderr, "usage: webclient URL\n");
    return EXIT_FAILURE;
  }

  char msg[BUFFER_LEN];
  int vsocket;

  tUrl url = decodeUrl(argv[1]);
  if (url.status != SUCCESS) {
    fprintf(stderr, "Wrong format of URL.\n");
    return EXIT_FAILURE;
  }

  char filename[BUFFER_LEN];
  char filechar[BUFFER_LEN];

  exctractFileName(url.path, filechar);
  strcpy(filename, filechar);

  codeUnsafeChar(url.domain);
  codeUnsafeChar(url.path);

  createRequest(msg, url);

  if (connectToServer(url, &vsocket) == FAILED) {
    fprintf(stderr, "Failed: connect to server\n");
    return EXIT_FAILURE;
  }

  if (write(vsocket, msg, strlen(msg)) == FAILED) {
    fprintf(stderr, "Failed: write request\n");
    return EXIT_FAILURE;
  }

  char answer[BUFFER_LEN];
  if (readHeader(vsocket, answer) == FAILED) {
    fprintf(stderr, "Failed: read header\n");
    return EXIT_FAILURE;
  }

  int enconding = selectEnconding(answer);
  tConn connection;
  connection = selectStatus(answer);

  if (connection.status >= 400) {
    fprintf(stderr, "%d\n",connection.status);
    return EXIT_FAILURE;
  }

  char msg_redir[BUFFER_LEN];
  int connect_try = 1;
  tUrl url_redir;

  // Redirection
  while (connection.status == 301 || connection.status == 302) {
    if (selectLocation(answer, msg_redir) == FAILED) {
      fprintf(stderr, "Failed: redirection\n");
      return EXIT_FAILURE;
    }

    url_redir = decodeUrl(msg_redir);
    if (url_redir.status != SUCCESS) {
      fprintf(stderr, "Failed: decode URL (redirection %d)\n",connect_try);
      return EXIT_FAILURE;
    }

    exctractFileName(url_redir.path, filename);
    codeUnsafeChar(url_redir.domain);
    codeUnsafeChar(url_redir.path);

    createRequest(msg_redir, url_redir);

    if (connectToServer(url_redir, &vsocket) == FAILED) {
      fprintf(stderr, "Failed: connect to server\n");
      return EXIT_FAILURE;
    }

    if (write(vsocket, msg_redir, strlen(msg_redir)) == FAILED) {
      fprintf(stderr, "Failed: write request (redirection %d)\n",connect_try);
      return EXIT_FAILURE;
    }

    if (readHeader(vsocket, answer) == FAILED) {
      fprintf(stderr, "Failed: read header\n");
      return EXIT_FAILURE;
    }

    enconding = selectEnconding(answer);
    connection = selectStatus(answer);

    if (connection.status == FAILED) {
      fprintf(stderr, "%s\n", connection.message);
      return EXIT_FAILURE;
    }

    if (connection.status >= 400) {
      fprintf(stderr, "%d\n",connection.status);
      return EXIT_FAILURE;
    }

    connect_try++;

    if (connect_try > 5) {
      fprintf(stderr, "Multiple (5) redirection.");
      return EXIT_FAILURE;
    }
  }

  FILE *fw;
  remove(filename);
  if ((fw = fopen(filename,"a")) == NULL) {
    fprintf(stderr, "Failed: open dest file\n");
    return EXIT_FAILURE;
  }

  // Non-chunked coding
  if (enconding != CHUNKED) {
    char ch;
    while (read(vsocket, &ch, sizeof(char)) > 0)
      fprintf(fw,"%c",ch);
    fclose(fw);
    return SUCCESS;
  }

  // Chunked coding
  char chr1 = '#';
  char chr2 = '#';
  int i = 0;
  int chunk = 0;

  char ch[BUFFER_LEN];

  while (read(vsocket, &chr2, sizeof(char)) > 0) {
    ch[i] = chr2;
    i++;
    if (chr1 == '\r' && chr2 == '\n') {
      ch[i-2] = '\0';
      chunk = (int) strtol(ch, NULL, 16);
      i = 0;
      break;
    }
    chr1 = chr2;
  }

  int w = chunk;
  char p;

  while (w > 0) {
    if (read(vsocket, &p, sizeof(char)) == FAILED) {
      fprintf(stderr, "Failed: read data\n");
      return EXIT_FAILURE;
    }
    w--;
    fprintf(fw, "%c", p);
  }

  char ch1 = '#', ch2 = '#';
  chunk = -1;
  char chr[BUFFER_LEN];
  int n = 0;

  while (chunk != 0) {
    if (read(vsocket, &ch2, sizeof(char)) == FAILED) {
      fprintf(stderr, "Failed: read chunk or \\r\\n\n");
      return EXIT_FAILURE;
    }

    if (ch1 == '\r' && ch2 == '\n') {

      while (ch2 != '\r') {
        if (read(vsocket, &ch2, sizeof(char)) == FAILED) {
          fprintf(stderr, "Failed: read chunk\n");
          return EXIT_FAILURE;
        }
        chr[i] = ch2;
        i++;
      }

      ch1 = ch2;
      chr[i] = '\0';
      i = 0;
      if (read(vsocket, &ch2, sizeof(char)) == FAILED) {
        fprintf(stderr, "Failed: read chunked data\n");
        return EXIT_FAILURE;
      }

      if (ch1 == '\r' && ch2 == '\n') {
        chunk = (int) strtol(chr, NULL, 16);
        if (chunk == 0)
          break;
        n = chunk;

        while (n > 0) {
          if (read(vsocket, &p, sizeof(char)) == FAILED) {
            fprintf(stderr, "Failed: read chunked data\n");
            return EXIT_FAILURE;
          }
          n--;
          fprintf(fw, "%c", p);
        }

        ch2 = '#';
      }
    }
    ch1 = ch2;
  }
  fclose(fw);
  return EXIT_SUCCESS;
}