예제 #1
0
/* 
 * ===  FUNCTION  ======================================================================
 *         Name:  is_pchar
 *  Description:  Test if a character is a pchar as desribed by RFC3986.  A pchar
 *                is: unreserved / pct-encoded / sub-delims / ":" / "@", if character
 *                is a pchar return true otherwise return false.
 * =====================================================================================
 */
static bool
is_pchar( char c)
{
	bool result  = false;
	if( is_unreserved(c) || is_sub_delim(c) ){
		result = true;
	}
	else if( c == ':' || c == '@'){
		result = true;
	}
	return result;
}
예제 #2
0
static bool is_password(char c)
{
	switch (c) {

	case '&':
	case '=':
	case '+':
	case '$':
	case ',':
		return true;
	default:
		return is_unreserved(c);
	}
}
예제 #3
0
파일: uri.c 프로젝트: jamesyan84/zbase
/************************************************************************
*	Function :	parse_uric
*
*	Parameters :
*		char *in ;	string of characters
*		int max ;	maximum limit
*		token *out ; token object where the string of characters is 
*					 copied
*
*	Description : Parses a string of uric characters starting at in[0]
*		as defined in http://www.ietf.org/rfc/rfc2396.txt (RFC explaining 
*		URIs)	
*
*	Return : int ;
*
*	Note :
************************************************************************/
int
parse_uric( char *in,
            int max,
            token * out )
{
    int i = 0;

    while( ( i < max )
            && ( ( is_unreserved( in[i] ) ) || ( is_reserved( in[i] ) )
                 || ( ( i + 2 < max ) && ( is_escaped( &in[i] ) ) ) ) ) {
        i++;
    }

    out->size = i;
    out->buff = in;
    return i;
}
예제 #4
0
std::string uri::encode(const std::experimental::string_view input) {
  static const std::array<char,16> hex =
    {{ '0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f' }};

  std::string res;
  res.reserve(input.size());

  for (const auto chr : input)
    if (is_unreserved(chr)) {
      res += chr;
    } else {
      res += '%';
      res += hex[ chr >> 4 ];
      res += hex[ chr & 0xf ];
    }

  return res;
}
예제 #5
0
/* 
 * ===  FUNCTION  ======================================================================
 *         Name:  uri_norm_host
 *  Description:  Normalize the host section of the URI if it is set.  The host section
 *                that is normalized here is the reg-name, and not the IP address.
 *                The IP address should be inside the IP section of the structure.
 *
 *                This section attempts to implement section 3.2.2 of RFC3986 with the
 *                exception that userinfo is not checked as HTTP and HTTPS do not 
 *                specify the usage of it.
 * =====================================================================================
 */
extern int
uri_norm_host( uriobj_t *uri)
{
	int   err  = 0, i, len;
	char *pct  = (char *) malloc(4),
	     *host = URI_CP_PT(uri->uri_host);
	if( host ) {
		len = strlen(host);
		for(i = 0;i < len; i ++ ) {
			if( host[i] == '%'){
				if( (i + 2) > len ) {
					err = EILSEQ;
					break;
				}
				pct[0] = host[i ++];
				pct[1] = host[i ++];
				pct[2] = host[i ++];
				pct[3] = '\0';
				err = norm_pct(&pct);
				if( err ) {
					break;
				}
				host[(i - 3)] = pct[0];
				host[(i - 2)] = pct[1];
				host[(i - 1)] = pct[2];
				continue;
			}
			if(!(is_sub_delim(host[i]) || is_unreserved(host[i]))){
				err = EILSEQ;
				break;	
			}
			if( isalpha(host[i]) && isupper(host[i]) ){
				host[i] = tolower(host[i]);
			}
		}
		if( ! err ) {
			*(uri->uri_host) = NULL;
			free(*uri->uri_host);
			*(uri->uri_host) = host;
		}
	}
	return err;
}
예제 #6
0
파일: uparse.c 프로젝트: bradclawsie/code
// No special destructor needed, fragment is just char *. A url does not need to have
// a fragment, so a NULL return value is not strictly an error.
static char *get_fragment(char **s,unsigned int *fragment_out_err) {
    *fragment_out_err = NO_UPARSE_ERROR;
    if ((NULL == s) || (0 == strlen(*s))) {
        return NULL;
    }

    char *c = *s;

    if (FRAGMENT_DELIM != c[0]) {
        fprintf(stderr,"no %c as prefix\n",FRAGMENT_DELIM);
        *fragment_out_err = UPARSE_ERROR;
        return NULL;
    }
    c++;

    size_t fragment_len = strlen(c);
    if (fragment_len == 0) {
        // no fragment
        *fragment_out_err = NO_UPARSE_ERROR;
        return NULL;
    }

    char *fragment = strdup(c);
    if (NULL == fragment) {
        fprintf(stderr,"cannot allocate fragment\n");
        *fragment_out_err = UPARSE_ERROR;
        return NULL;        
    }
    char *test_fragment = fragment;
    while (*test_fragment) {        
        if (!is_unreserved(*test_fragment)) {
            fprintf(stderr,"'%c' is invalid\n",*test_fragment);
            free(fragment);
            *fragment_out_err = UPARSE_ERROR;
            return NULL;
        }
        test_fragment++;
    }

    *fragment_out_err = NO_UPARSE_ERROR;
    return fragment;
}
예제 #7
0
/*!
 * \brief Parses a string of uric characters starting at in[0] as defined in
 * http://www.ietf.org/rfc/rfc2396.txt (RFC explaining URIs).
 *
 * \return 
 */
static size_t parse_uric(
	/*! [in] String of characters. */
	const char *in,
	/*! [in] Maximum limit. */
	size_t max,
	/*! [out] Token object where the string of characters is copied. */
	token *out)
{
	size_t i = (size_t)0;

	while (i < max &&
	       (is_unreserved(in[i]) ||
	        is_reserved(in[i])   ||
	        ((i + (size_t)2 < max) && is_escaped(&in[i])))) {
		i++;
	}

	out->size = i;
	out->buff = in;
	return i;
}
예제 #8
0
std::string uri::decode(const std::experimental::string_view input) {
  std::string res;
  res.reserve(input.size());
  for (auto it = input.cbegin(), e = input.cend(); it not_eq e; ++it) {
    if (*it == '%') {

      if (++it >= e) return decode_error(std::move(res));
      const uint8_t nibble1 = (*it);

      if (++it >= e) return decode_error(std::move(res));
      const uint8_t nibble2 = (*it);

      res += static_cast<char>(from_hex(nibble1, nibble2));

    } else {
      if (is_reserved(*it) or is_unreserved(*it))
        res += *it;
      else
        return decode_error(std::move(res));
    }
  }
  return res;
}
예제 #9
0
static bool is_hvalue(char c)
{
	return is_hnv_unreserved(c) || is_unreserved(c);
}
예제 #10
0
static bool is_paramchar(char c)
{
	return is_param_unreserved(c) || is_unreserved(c);
}
예제 #11
0
파일: uparse.c 프로젝트: bradclawsie/code
// A url doesn't have to have a ?query arg list, so this can return NULL
// and not be an error. 
static query_arg_list_t *get_query_arg_list(char **s,unsigned int *query_out_err) {
    if ((NULL == s) || (0 == strlen(*s))) {
        *query_out_err = NO_UPARSE_ERROR;
        return NULL;
    }
    char *c = *s;

    *query_out_err = UPARSE_ERROR; // default until end of function

    if (QUERY_DELIM != c[0]) {
        fprintf(stderr,"no %c as prefix\n",QUERY_DELIM);
        return NULL;
    }
    c++;
    if (NULL == c) {
        fprintf(stderr,"after %c found, no text\n",QUERY_DELIM);
        return NULL;
    }
    // at the end of the loop, t will point to the end of the query string, which
    // may not be the end of the url; it may have a #fragment
    char *t = c;
    unsigned int query_delim_count = 0;
    unsigned int query_pair_count = 0;
    unsigned int query_str_len = 0;
    while ((*t) && (FRAGMENT_DELIM != *t)) {
        if (QUERY_PAIR_DELIM == *t) {
            query_delim_count++;
        }
        t++;
        query_str_len++;
    }
    query_pair_count = query_delim_count + 1;

    char query_string[query_str_len+1];
    bzero((void *) query_string,(query_str_len+1) * sizeof(char));
    strlcpy(query_string,c,query_str_len+1);

    // stringify the delims for strsep
    char query_pair_delim_str[2];
    snprintf(query_pair_delim_str,2,"%c",QUERY_PAIR_DELIM);
    char query_key_value_delim_str[2];
    snprintf(query_key_value_delim_str,2,"%c",QUERY_KEY_VAL_DELIM);

    query_key_val_t **query_key_vals =
        (query_key_val_t **) malloc(query_pair_count * sizeof(query_key_val_t *));
    if (NULL == query_key_vals) {
        fprintf(stderr,"could not allocate query_key_val\n");
        return NULL;
    }
    bzero((void *) query_key_vals,query_pair_count * sizeof(query_key_val_t *));

    unsigned int i = 0;
    char *pair_tok;
    char *free_query_string = query_string;
    while ((pair_tok = strsep(&free_query_string,query_pair_delim_str)) != NULL) {
        if (0 != strcmp("",pair_tok)) {
            if (i >= query_pair_count) {
                fprintf(stderr,"loop count %d >= previous pair count %d\n",
                        i,query_pair_count);
                free_query_key_val_t_list(query_key_vals,query_pair_count);
                return NULL;
            }
            char *sep_pair_tok = strdup(pair_tok);
            if (NULL == sep_pair_tok) {
                fprintf(stderr,"could not allocate sep_pair_tok\n");
                free_query_key_val_t_list(query_key_vals,query_pair_count);
                return NULL;
            }
            char *free_sep_pair_tok = sep_pair_tok;

            // put the key and val in a struct, and set as item i in a list
            // query_pair_count long (which will need to be allocated above)
            query_key_val_t *query_key_val_tok =
                (query_key_val_t *) malloc(sizeof(query_key_val_t));
            if (NULL == query_key_val_tok) {
                fprintf(stderr,"could not allocate query_key_val_tok\n");
                free_query_key_val_t_list(query_key_vals,query_pair_count);
                free(free_sep_pair_tok);
                return NULL;
            }
            bzero((void *) query_key_val_tok,sizeof(query_key_val_t));

            bool seen_key = false;
            char *kv_tok = NULL;
            char *key = NULL;
            char *val = NULL;

            // make sure sep_pair_tok contains an = to delimit the key and value
            if (NULL == strstr(sep_pair_tok,query_key_value_delim_str)) {
                fprintf(stderr,"could not find '%s' in query pair '%s'\n",
                        query_key_value_delim_str,sep_pair_tok);
                free_query_key_val_t_list(query_key_vals,query_pair_count);
                free_query_key_val_t(query_key_val_tok);
                free(free_sep_pair_tok);
                return NULL;
            }

            // break sep_pair_tok (a=b) into the key (a) and the value (b)
            while ((kv_tok = strsep(&sep_pair_tok,query_key_value_delim_str)) != NULL) {
                if (!seen_key) {
                    key = strdup(kv_tok);
                    seen_key = true;
                } else {
                    val = strdup(kv_tok);
                }
            }
            free(kv_tok);

            if ((NULL == key) || (NULL == val)) {
                fprintf(stderr,"either key or val from %s was null\n",free_sep_pair_tok);
                free_query_key_val_t_list(query_key_vals,query_pair_count);               
                free_query_key_val_t(query_key_val_tok);
                free(free_sep_pair_tok);
                free(key);
                free(val);
                return NULL;
            }
            
            char *key_val_list[] = {key,val};
            char *test_kv;
            for (size_t j = 0;j < 2;j++) {
                test_kv = key_val_list[i];
                while (*test_kv) {
                    if (!is_unreserved(*test_kv)) {
                        fprintf(stderr,"'%c' is invalid\n",*test_kv);
                        free_query_key_val_t_list(query_key_vals,query_pair_count);
                        free(free_sep_pair_tok);
                        free_query_key_val_t(query_key_val_tok);
                        free(key);
                        free(val);
                        return NULL;
                    }
                    test_kv++;
                }
            }

            query_key_val_tok->key = key;
            query_key_val_tok->val = val;
            query_key_vals[i] = query_key_val_tok;
            free(free_sep_pair_tok);
            i++;
        }
    }

    free(pair_tok);

    query_arg_list_t *query_arg_list =
        (query_arg_list_t *) malloc(sizeof(query_arg_list_t));
    if (NULL == query_arg_list) {
        fprintf(stderr,"cannot allocate query_arg_list\n");
        return NULL;
    }
    bzero((void *) query_arg_list,sizeof(query_arg_list_t));

    query_arg_list->count = i;
    query_arg_list->query_key_vals = query_key_vals;
    *s = t; // pointer is now past query arg str, at # fragment delim if there
    *query_out_err = NO_UPARSE_ERROR;
    return query_arg_list;
}
예제 #12
0
static bool is_user(char c)
{
	return is_unreserved(c) || is_user_unreserved(c);
}
예제 #13
0
파일: uparse.c 프로젝트: bradclawsie/code
// A url does not need to have a path, so this can return NULL without an error
// being thrown
static path_t *get_path(char **s, unsigned int *path_out_err) {
    if ((NULL == s) || (0 == strlen(*s))) {
        *path_out_err = NO_UPARSE_ERROR; // not an error: url can have no path
        return NULL;
    }

    *path_out_err = UPARSE_ERROR; // default until end of function

    size_t delim_count = 0;
    size_t chars_until_query_delim = 0;
    char *c = *s;
    while (*c) {
        if (PATH_DELIM == *c) {
            delim_count++;
        } else if (QUERY_DELIM == *c) {
            break;
        } else if (!is_unreserved(*c)) {
            fprintf(stderr,"'%c' is invalid\n",*c);
            return NULL;            
        }
        chars_until_query_delim++;
        c++;
    }

    char *just_path;
    char path_delim_str[2] = {PATH_DELIM,'\0'}; // "/" (not '/')
    char nonempty_path[chars_until_query_delim+1];

    if (0 == delim_count) {
        // in this case, the path was '', not even '/'. so we create 
        // a default empty path of '/' to represent what '' implies
        just_path = (char *) &path_delim_str;
    } else {
        bzero((void *) nonempty_path,(chars_until_query_delim + 1) * sizeof(char));
        strlcpy(nonempty_path,*s,chars_until_query_delim+1);
        just_path = (char *) &nonempty_path;
    }

    path_t *path = (path_t *) malloc(sizeof(path_t));
    if (NULL == path) {
        fprintf(stderr,"cannot allocate path\n");
        return NULL;
    }
    bzero((void *) path,sizeof(path_t));
    path->path_str = strdup(just_path);

    // set pointer to where the query delim may be found, now that *s
    // has been copied into just_path
    *s = c;

    // case of single '/' path (like: http://foo.com/?key=val)
    if (strlen(just_path) == 1) {
        path->path_elts = (char **) malloc(1 * sizeof(char *));
        path->path_elts[0] = (char *) calloc(1,sizeof(char));
        path->count = 1;
        *path_out_err = NO_UPARSE_ERROR;
        return path;
    }

    path->path_elts = (char **) malloc(delim_count * sizeof(char *));
    
    if (NULL == path->path_elts) {
        fprintf(stderr,"cannot allocate path\n");
        return NULL;        
    }
    path->count = delim_count;

    char *tok;
    size_t i = 0;

    while (((tok = strsep(&just_path,path_delim_str)) != NULL) && (i < delim_count)) {
        if (0 != strcmp("",tok)) {
            path->path_elts[i] = strdup(tok);
            if (NULL == path->path_elts[i]) {
                fprintf(stderr,"cannot dup %s\n",tok);
                free_path_t(path);
                free(tok);                
                return NULL;
            }
            i++;
        }
    }

    *path_out_err = NO_UPARSE_ERROR;
    return path;
}
예제 #14
0
파일: uparse.c 프로젝트: bradclawsie/code
// Get the host:port section of the url. Doesn't support ipv6, unicode hosts, 
// username annotations etc.
// Every url must have a host (but the port is optional). If this returns NULL,
// it is an error.
static host_port_t *get_host_port(char **s, unsigned int *host_port_out_err) {
    *host_port_out_err = UPARSE_ERROR; // default
    if ((NULL == s) || (0 == strlen(*s))) {
        fprintf(stderr,"arg pointer null\n");
        return NULL;    
    }

    char *c = *s; 
    size_t host_len = 0;
    size_t port_len = 0;
    char *host_start = *s;
    char *port_start = NULL;
    bool seen_host_delim = false;
    while (*c) {
        // normally we would see the PATH_DELIM to break this, but 
        // this can be '', so we must also look out for the 
        // QUERY_DELIM
        if ((PATH_DELIM == c[0]) || (QUERY_DELIM == c[0])) {
            break;
        } else if (HOST_PORT_DELIM == c[0]) {
            seen_host_delim = true;
            c++;
            port_start = c;
        } else {
            // *c is part of the host or port, depending on delimiters seen
            if (!is_unreserved(*c)) {
                fprintf(stderr,"'%c' is invalid\n",*c);
                return NULL;
            }
            // If we have already seen the host/port delimiter (:), then
            // count the chars in the length of the port part. Otherwise,
            // count the chars in the length of the host part.
            seen_host_delim ? port_len++ : host_len++;
            c++;
        }
    }
    if (0 == host_len) {
        fprintf(stderr,"no host found\n");
        return NULL;
    }
    if (seen_host_delim && (0 == port_len)) {
        fprintf(stderr,"port delimiter seen but no port number string\n");
        return NULL;
    }

    host_port_t *host_port = (host_port_t *) malloc(sizeof(host_port_t));
    if (NULL == host_port) {
        fprintf(stderr,"cannot allocate host_port\n");
        return NULL;            
    }
    bzero((void *) host_port,sizeof(host_port_t));

    host_port->host = strndup(host_start,host_len);
    if (NULL == host_port->host) {
        fprintf(stderr,"cannot allocate host_port->host\n");
        free_host_port_t(host_port);
        return NULL;
    }

    host_port->port = 0;
    if ((seen_host_delim) && (port_len > 0)) {
        char *port_chars = strndup(port_start,port_len);
        if (NULL == port_chars) {
            fprintf(stderr,"cannot allocate chars for host_port->port\n");
            free_host_port_t(host_port);
            return NULL;
        }
        long port_long = (long) strtol(port_chars,NULL,10);
        if (0 == port_long) {
            fprintf(stderr,"zero port or cannot convert port_chars %s to long\n",port_chars);
            free_host_port_t(host_port);
            free(port_chars);
            return NULL;
        }
        free(port_chars);
        if (MAX_PORT < port_long) {
            fprintf(stderr,"port %ld out of range\n",port_long);
            free_host_port_t(host_port);
            *host_port_out_err = UPARSE_ERROR;
            return NULL;
        }
        host_port->port = (unsigned int) port_long;
    }

    *s = c; // set pointer past host/port
    *host_port_out_err = NO_UPARSE_ERROR;
    return host_port;
}
예제 #15
0
파일: uri.cpp 프로젝트: dnybz/downloader
bool net::uri::uri::normalize(uri& other) const
{
  if (!other._M_uri.allocate(_M_uri.length() + 1)) {
    return false;
  }

  const char* d = _M_uri.data();
  const char* end = d + _M_uri.length();

  char* otherd = other._M_uri.data();

  // Copy scheme.
  size_t len = _M_scheme.length();
  for (size_t i = 0; i < len; i++) {
    *otherd++ = util::to_lower(*d++);
  }

  other._M_scheme.set(other._M_uri.data(), len);

  // Skip colon.
  d++;

  *otherd++ = ':';

  // If there is authority...
  if (_M_hier_part.host.length() > 0) {
    d += 2;

    *otherd++ = '/';
    *otherd++ = '/';

    // If there is user information...
    if ((len = _M_hier_part.userinfo.length()) > 0) {
      // Save start of the userinfo.
      char* userinfo = otherd;

      size_t i = 0;
      do {
        if (*d == '%') {
          uint8_t c = (util::hex2dec(d[1]) * 16) + util::hex2dec(d[2]);
          if (is_unreserved(c)) {
            *otherd++ = static_cast<char>(c);
          } else {
            *otherd++ = '%';
            *otherd++ = util::to_upper(d[1]);
            *otherd++ = util::to_upper(d[2]);
          }

          d += 3;
          i += 2;
        } else {
          *otherd++ = *d++;
        }
      } while (++i < len);

      other._M_hier_part.userinfo.set(userinfo, otherd - userinfo);

      d++;
      *otherd++ = '@';
    }

    // IP literal?
    if (_M_hier_part.ip_literal) {
      // Skip '['.
      d++;

      *otherd++ = '[';
    }

    // Save start of the host.
    char* host = otherd;

    // Copy host.
    len = _M_hier_part.host.length();
    size_t i = 0;
    do {
      if (*d == '%') {
        uint8_t c = (util::hex2dec(d[1]) * 16) + util::hex2dec(d[2]);
        if (is_unreserved(c)) {
          *otherd++ = static_cast<char>(util::to_lower(c));
        } else {
          *otherd++ = '%';
          *otherd++ = util::to_upper(d[1]);
          *otherd++ = util::to_upper(d[2]);
        }

        d += 3;
        i += 2;
      } else {
        *otherd++ = util::to_lower(*d++);
      }
    } while (++i < len);

    other._M_hier_part.host.set(host, otherd - host);

    // IP literal?
    if (_M_hier_part.ip_literal) {
      // Skip ']'.
      d++;

      *otherd++ = ']';
    }

    other._M_hier_part.ip_literal = _M_hier_part.ip_literal;

    // If not a standard port...
    if (_M_hier_part.port != 0) {
      *otherd++ = ':';

      const char* otherend = other._M_uri.data() + other._M_uri.capacity();

      otherd += snprintf(otherd, otherend - otherd, "%u", _M_hier_part.port);
    }

    other._M_hier_part.port = _M_hier_part.port;
  }

  // Save start of the path.
  char* path = otherd;

  // If the path is empty...
  if ((len = _M_hier_part.path.length()) == 0) {
    *otherd++ = '/';
  } else {
    // Remove dot segments.
    d = _M_hier_part.path.data();

    size_t i = 0;
    do {
      if (*d == '%') {
        uint8_t c = (util::hex2dec(d[1]) * 16) + util::hex2dec(d[2]);
        if (is_unreserved(c)) {
          *otherd++ = static_cast<char>(c);
        } else {
          *otherd++ = '%';
          *otherd++ = util::to_upper(d[1]);
          *otherd++ = util::to_upper(d[2]);
        }

        d += 3;
        i += 2;
      } else {
        *otherd++ = *d++;
      }

      // A. If the input buffer begins with a prefix of "../" or "./",
      //    then remove that prefix from the input buffer; otherwise,
      if (((path + 3 == otherd) &&
           (path[0] == '.') &&
           (path[1] == '.') &&
           (path[2] == '/')) ||
          ((path + 2 == otherd) &&
           (path[0] == '.') &&
           (path[1] == '/'))) {
        otherd = path;

      // B. if the input buffer begins with a prefix of "/./" or "/.",
      //    where "." is a complete path segment, then replace that
      //    prefix with "/" in the input buffer; otherwise,
      } else if ((path + 3 <= otherd) &&
                 (otherd[-3] == '/') &&
                 (otherd[-2] == '.') &&
                 (otherd[-1] == '/')) {
        otherd -= 2;
      } else if ((path + 2 <= otherd) &&
                 (d == end) &&
                 (otherd[-2] == '/') &&
                 (otherd[-1] == '.')) {
        otherd--;

      // C. if the input buffer begins with a prefix of "/../" or "/..",
      //    where ".." is a complete path segment, then replace that
      //    prefix with "/" in the input buffer and remove the last
      //    segment and its preceding "/" (if any) from the output
      //    buffer; otherwise,
      } else if ((path + 4 <= otherd) &&
                 (otherd[-4] == '/') &&
                 (otherd[-3] == '.') &&
                 (otherd[-2] == '.') &&
                 (otherd[-1] == '/')) {
        otherd -= 4;
        while (otherd > path) {
          if (*--otherd == '/') {
            break;
          }
        }

        *otherd++ = '/';
      } else if ((path + 3 <= otherd) &&
                 (d == end) &&
                 (otherd[-3] == '/') &&
                 (otherd[-2] == '.') &&
                 (otherd[-1] == '.')) {
        otherd -= 3;
        while (otherd > path) {
          if (*--otherd == '/') {
            break;
          }
        }

        *otherd++ = '/';

      // D. if the input buffer consists only of "." or "..", then remove
      // that from the input buffer; otherwise,
      } else if (((path + 1 == otherd) &&
                  (d == end) &&
                  (otherd[-1] == '.')) ||
                 ((path + 2 == otherd) &&
                  (d == end) &&
                  (otherd[-2] == '.') &&
                  (otherd[-1] == '.'))) {
        otherd = path;
      }
    } while (++i < len);
  }

  if (path < otherd) {
    other._M_hier_part.path.set(path, otherd - path);
  }

  // If there is query...
  if ((len = _M_query.length()) > 0) {
    *otherd++ = '?';

    // Save start of the query.
    char* query = otherd;

    d = _M_query.data();

    size_t i = 0;
    do {
      if (*d == '%') {
        uint8_t c = (util::hex2dec(d[1]) * 16) + util::hex2dec(d[2]);
        if (is_unreserved(c)) {
          *otherd++ = static_cast<char>(c);
        } else {
          *otherd++ = '%';
          *otherd++ = util::to_upper(d[1]);
          *otherd++ = util::to_upper(d[2]);
        }

        d += 3;
        i += 2;
      } else {
        *otherd++ = *d++;
      }
    } while (++i < len);

    other._M_query.set(query, otherd - query);
  }

  // If there is fragment...
  if ((len = _M_fragment.length()) > 0) {
    *otherd++ = '#';

    // Save start of the fragment.
    char* fragment = otherd;

    d = _M_fragment.data();

    size_t i = 0;
    do {
      if (*d == '%') {
        uint8_t c = (util::hex2dec(d[1]) * 16) + util::hex2dec(d[2]);
        if (is_unreserved(c)) {
          *otherd++ = static_cast<char>(c);
        } else {
          *otherd++ = '%';
          *otherd++ = util::to_upper(d[1]);
          *otherd++ = util::to_upper(d[2]);
        }

        d += 3;
        i += 2;
      } else {
        *otherd++ = *d++;
      }
    } while (++i < len);

    other._M_fragment.set(fragment, otherd - fragment);
  }

  other._M_uri.length(otherd - other._M_uri.data());

  return true;
}
예제 #16
0
/**
 Decodes URL-encoded data. Because encoded data is always bigger conversion is 
 done in-place.
 @return Number of decoded bytes written to data. Negative integer if data is 
 not valid URL-encoded sequence.
 */
ssize_t urldecode(decoder_state *state, char *data, size_t size)
{
    size_t inpos = 0, outpos = 0;
    int d1, d2;
    while (inpos < size)
    {
        char in = data[inpos++];
        switch (in)
        {
        case '%':
            switch (state->state)
            {
            case ST_SYM:
                state->state = ST_PERCENT;
                break;
            default:
                return -1;
            }
            break;
        case '+':
            switch (state->state)
            {
            case ST_SYM:
                data[outpos++] = ' ';
                break;
            default:
                return -1;
            }
            break;
        default:
            switch (state->state)
            {
            case ST_PERCENT_AND_SYM:
                d1 = hexdigit(state->sym), d2 = hexdigit(in);
                if (d1 >= 0 && d2 >= 0)
                {
                    data[outpos++] = (d1 << 4) | d2;
                }
                else
                {
                    return -1;
                }
                state->state = ST_SYM;
                break;
            case ST_PERCENT:
                state->sym = in;
                state->state = ST_PERCENT_AND_SYM;
                break;
            case ST_SYM:
                if (is_unreserved(in))
                {
                    data[outpos++] = in;
                }
                else
                {
                    return -1;
                }
                break;
            }
        }
    }
    return outpos;
}
예제 #17
0
/** Built-in preprocessing callback
 *
 * Built-in preprocessing callback to break or not to break URLs according to
 * some rules by Chicago Manual of Style 15th ed.
 * If data is NULL, prohibit break.
 * Otherwise, allow break by rule above.
 */
gcstring_t *linebreak_prep_URIBREAK(linebreak_t * lbobj, void *data,
				    unistr_t * str, unistr_t * text)
{
    gcstring_t *gcstr;
    size_t i;
    unichar_t *ptr;

    /* Pass I */

    if (text != NULL) {
	/*
	 * Search URL in str.
	 * Following code loosely refers RFC3986 but some practical
	 * assumptions are put:
	 *
	 * o Broken pct-encoded sequences (e.g. single "%") are allowed.
	 * o scheme names must end with alphanumeric, must be longer than
	 *   or equal to two octets, and must not contain more than one
	 *   non-alphanumeric ("+", "-" or ".").
	 * o URLs containing neither non-empty path, query part nor fragment
	 *   (e.g. "about:") are omitted: they are treated as ordinal words.
	 */
	for (ptr = NULL, i = 0; i < str->len; ptr = NULL, i++) {
	    int has_double_slash, has_authority, has_empty_path,
		has_no_query, has_no_fragment;
	    size_t alphadigit, nonalphadigit;

	    /* skip non-alpha. */
	    if (!is_alpha(str, i))
		continue;

	    ptr = str->str + i;

	    /* "url:" - case insensitive */
	    if (startswith(str, i, "url:", 4, 0))
		i += 4;

	    /* scheme */
	    if (is_alpha(str, i))
		i++;
	    else
		continue;

	    nonalphadigit = 0;
	    alphadigit = 1;
	    while (1) {
		if (is_alpha(str, i) || is_digit(str, i))
		    alphadigit++;
		else if (is(str, i, '+') || is(str, i, '-') || is(str, i, '.'))
		    nonalphadigit++;
		else
		    break;
		i++;
	    }
	    if (alphadigit < 2 || 1 < nonalphadigit ||
	        ! (is_digit(str, i - 1) || is_alpha(str, i - 1)))
		continue;

	    /* ":" */
	    if (is(str, i, ':'))
		i++;
	    else
		continue;

	    /* hier-part */
	    has_double_slash = 0;
	    has_authority = 0;
	    has_empty_path = 0;
	    has_no_query = 0;
	    has_no_fragment = 0;
	    if (startswith(str, i, "//", 2, 0)) {
		/* "//" */
		has_double_slash = 1;
		i += 2;

		/* authority - FIXME:syntax relaxed */
		if (is(str, i, '[') || is(str, i, ':') || is(str, i, '@') ||
		    is_unreserved(str, i) || is_pct_encoded(str, i) ||
		    is_sub_delim(str, i)) {
		    has_authority = 1;
		    i++;
		    while (is(str, i, '[') || is(str, i, ']') ||
			   is(str, i, ':') || is(str, i, '@') ||
			   is_unreserved(str, i) || is_pct_encoded(str, i) ||
			   is_sub_delim(str, i))
			i++;
		}
	    }

	    /* path */
	    if (has_double_slash) {
		if (has_authority)
		    goto path_abempty;
		else
		    goto path_absolute;
	    } /* else goto path_rootless; */

	    /* path_rootless: */
	    if (is_pchar(str, i)) { /* FIXME:path-noscheme not concerned */
		i++;
		while (is_pchar(str, i))
		    i++;
		goto path_abempty;
	    } else {
		has_empty_path = 1;
		goto path_empty;
	    }

	  path_absolute:
	    if (startswith(str, i, "//", 2, 0))
		continue;
	    else if (is(str, i, '/')) {
		i++;
		if (is_pchar(str, i)) {
		    i++;
		    while (is_pchar(str, i))
			i++;
		}
		goto path_abempty;
	    } else
		continue;

	  path_abempty:
	    if (is(str, i, '/')) {
		i++;
		while (is(str, i, '/') || is_pchar(str, i))
		    i++;
	    } /* else goto path_empty; */

	  path_empty:
	    ;

	    /* query */
	    if (is(str, i, '?')) {
		i++;
		while (is(str, i, '/') || is(str, i, '?') || is_pchar(str, i))
		    i++;
	    } else
		has_no_query = 1;

	    /* fragment */
	    if (is(str, i, '#')) {
		i++;
		while (is(str, i, '/') || is(str, i, '?') || is_pchar(str, i))
		    i++;
	    } else
		has_no_fragment = 1;

	    if (has_empty_path && has_no_query && has_no_fragment)
		continue;

	    break;
	}

	if (ptr != NULL)
	    str->len = i - (ptr - str->str);
	str->str = ptr;
	return NULL;
    }

    /* Pass II */

    if ((gcstr = gcstring_newcopy(str, lbobj)) == NULL) {
	lbobj->errnum = errno ? errno : ENOMEM;
	return NULL;
    }

    /* non-break URI. */
    if (data == NULL) {
	for (i = 1; i < gcstr->gclen; i++)
	    gcstr->gcstr[i].flag = LINEBREAK_FLAG_PROHIBIT_BEFORE;
	return gcstr;
    }

    /* break URI. */
    if (startswith((unistr_t *) gcstr, 0, "url:", 4, 0)) {
	gcstr->gcstr[4].flag = LINEBREAK_FLAG_ALLOW_BEFORE;
	i = 5;
    } else
	i = 1;
    for (; i < gcstr->gclen; i++) {
	unichar_t u, v;
	u = gcstr->str[gcstr->gcstr[i - 1].idx];
	v = gcstr->str[gcstr->gcstr[i].idx];

	/*
	 * Some rules based on CMoS 15th ed.
	 * 17.11 1.1: [/] ÷ [^/]
	 * 17.11 2:   [-] ×
	 * 6.17 2:   [.] ×
	 * 17.11 1.2: ÷ [-~.,_?#%]
	 * 17.11 1.3: ÷ [=&]
	 * 17.11 1.3: [=&] ÷
	 * Default:  ALL × ALL
	 */
	if (u == '/' && v != '/')
	    gcstr->gcstr[i].flag = LINEBREAK_FLAG_ALLOW_BEFORE;
	else if (u == '-' || u == '.')
	    gcstr->gcstr[i].flag = LINEBREAK_FLAG_PROHIBIT_BEFORE;
	else if (v == '-' || v == '~' || v == '.' || v == ',' ||
		 v == '_' || v == '?' || v == '#' || v == '%' ||
		 u == '=' || v == '=' || u == '&' || v == '&')
	    gcstr->gcstr[i].flag = LINEBREAK_FLAG_ALLOW_BEFORE;
	else
	    gcstr->gcstr[i].flag = LINEBREAK_FLAG_PROHIBIT_BEFORE;
    }

    /* Won't break punctuations at end of matches. */
    for (i = gcstr->gclen - 1; 1 <= i; i--) {
	unichar_t u = gcstr->str[gcstr->gcstr[i].idx];
	if (gcstr->gcstr[i].flag == LINEBREAK_FLAG_ALLOW_BEFORE &&
	    (u == '"' || u == '.' || u == ':' || u == ';' || u == ',' ||
	     u == '>'))
	    gcstr->gcstr[i].flag = LINEBREAK_FLAG_PROHIBIT_BEFORE;
	else
	    break;
    }
    return gcstr;
}