Ejemplo n.º 1
0
bool parse_url(char const* input, url_t* url, url_t const* base, bool* errors) {
  enum state_t {
    state_scheme_start,
    state_scheme,
    state_no_scheme,
    state_relative_or_authority,
    state_path_or_authority,
    state_relative,
    state_relative_slash,
    state_authority_slashes,
    state_authority_ignore_slashes,
    state_authority,
    state_host,
    state_port,
    state_file,
    state_file_slash,
    state_file_host,
    state_path_start,
    state_path,
    state_non_relative_path,
    state_query,
    state_fragment,
  };

  size_t length = strlen(input);
  if (!length) return false;
  for (size_t i = 0; i < length; ++i) {
    if (input[i] < 0x00 || input[i] > 0x7E) {
      // no unicode support
      return false;
    }
    if (input[i] == 0x0D || input[i] == 0x0A || input[i] == 0x09) {
      // no tab or newline
      return false;
    }
  }

  bool s_errors = false;
  if (errors) {
    *errors = false;
  } else {
    errors = &s_errors;
  }

  if (input[0] <= 0x20 || input[length - 1] <= 0x20) {
    // leading or trailing C0-controls and space
    *errors = true;
    while (*input <= 0x20) {
      ++input;
      --length;
    }
    while (length && input[length - 1] <= 0x20) {
      --length;
    }
  }

  state_t state = state_scheme_start;
  reset_url(url);

  bool at_flag = false;
  bool bracket_flag = false;

  std::string buffer;
  char const* ptr = input;
  while (true) {
    switch (state) {
    case state_scheme_start:
      if (std::isalpha(*ptr)) {
        buffer.push_back(std::tolower(*ptr));
        state = state_scheme;
      } else {
        state = state_no_scheme;
        --ptr;
      }
      break;
    case state_scheme:
      if (std::isalnum(*ptr) || *ptr == '+' || *ptr == '-' || *ptr == '.') {
        buffer.push_back(std::tolower(*ptr));
      } else if (*ptr == ':') {
        url->scheme = buffer;
        url->is_special = (url->scheme == "ftp" || url->scheme == "gopher" || url->scheme == "http" ||
          url->scheme == "https" || url->scheme == "ws" || url->scheme == "wss" || url->scheme == "file");
        buffer.clear();
        if (url->scheme == "file") {
          if (ptr[1] != '/' || ptr[2] != '/') {
            *errors = true;
          }
          state = state_file;
        } else if (url->is_special) {
          if (base && base->scheme == url->scheme) {
            state = state_relative_or_authority;
          } else {
            state = state_authority_slashes;
          }
        } else if (ptr[1] == '/') {
          state = state_path_or_authority;
          ++ptr;
        } else {
          url->non_relative = true;
          url->path.push_back("");
          state = state_non_relative_path;
        }
      } else {
        buffer.clear();
        ptr = input - 1;
        state = state_no_scheme;
      }
      break;
    case state_no_scheme:
      if (!base || (base->non_relative && *ptr != '#')) {
        return false;
      } else if (base->non_relative && *ptr == '#') {
        copy_url(url, base, c_scheme | c_path | c_query);
        url->non_relative = true;
        state = state_fragment;
      } else if (base->scheme != "file") {
        state = state_relative;
        --ptr;
      } else {
        state = state_file;
        --ptr;
      }
      break;
    case state_relative_or_authority:
      if (ptr[0] == '/' && ptr[1] == '/') {
        state = state_authority_ignore_slashes;
        ++ptr;
      } else {
        *errors = true;
        state = state_relative;
        --ptr;
      }
      break;
    case state_path_or_authority:
      if (*ptr == '/') {
        state = state_authority;
      } else {
        state = state_path;
        --ptr;
      }
      break;
    case state_relative:
      copy_url(url, base, c_scheme);
      switch (*ptr) {
      case 0:
        copy_url(url, base, c_username | c_password | c_host | c_port | c_path | c_query);
        break;
      case '/':
        state = state_relative_slash;
        break;
      case '?':
        copy_url(url, base, c_username | c_password | c_host | c_port | c_path);
        state = state_query;
        break;
      case '#':
        copy_url(url, base, c_username | c_password | c_host | c_port | c_path | c_query);
        state = state_fragment;
        break;
      default:
        if (url->is_special && *ptr == '\\') {
          *errors = true;
          state = state_relative_slash;
        } else {
          copy_url(url, base, c_username | c_password | c_host | c_port | c_path);
          if (url->path.size()) {
            url->path.pop_back();
          }
          state = state_path;
          --ptr;
        }
      }
      break;
    case state_relative_slash:
      if (*ptr == '/' || (url->is_special && *ptr == '\\')) {
        if (*ptr == '\\') {
          *errors = true;
        }
        state = state_authority_ignore_slashes;
      } else {
        copy_url(url, base, c_username | c_password | c_host | c_port);
        state = state_path;
        --ptr;
      }
      break;
    case state_authority_slashes:
      if (ptr[0] == '/' && ptr[1] == '/') {
        state = state_authority_ignore_slashes;
        ++ptr;
      } else {
        *errors = true;
        state = state_authority_ignore_slashes;
        --ptr;
      }
      break;
    case state_authority_ignore_slashes:
      if (*ptr != '/' && *ptr != '\\') {
        state = state_authority;
        --ptr;
      } else {
        *errors = true;
      }
      break;
    case state_authority:
      if (*ptr == '@') {
        *errors = true;
        if (at_flag) {
          buffer = "%40" + buffer;
        }
        at_flag = true;
        bool password = false;
        for (char chr : buffer) {
          if (chr == ':') {
            password = true;
          } else if (password) {
            percent_encode<encode_userinfo>(url->password, chr);
          } else {
            percent_encode<encode_userinfo>(url->username, chr);
          }
        }
        buffer.clear();
      } else if (*ptr == 0 || *ptr == '/' || *ptr == '?' || *ptr == '#' || (url->is_special && *ptr == '\\')) {
        ptr -= buffer.size() + 1;
        buffer.clear();
        state = state_host;
      } else {
        buffer.push_back(*ptr);
      }
      break;
    case state_host:
      if (*ptr == ':' && !bracket_flag) {
        if (url->is_special && buffer.empty()) {
          return false;
        }
        if (!parse_host(buffer, url, errors)) {
          return false;
        }
        buffer.clear();
        state = state_port;
      } else if (*ptr == 0 || *ptr == '/' || *ptr == '?' || *ptr == '#' || (url->is_special && *ptr == '\\')) {
        --ptr;
        if (url->is_special && buffer.empty()) {
          return false;
        }
        if (!parse_host(buffer, url, errors)) {
          return false;
        }
        buffer.clear();
        state = state_path_start;
      } else {
        if (*ptr == '[') {
          bracket_flag = true;
        } else if (*ptr == ']') {
          bracket_flag = false;
        }
        buffer.push_back(*ptr);
      }
      break;
    case state_port:
      if (std::isdigit(*ptr)) {
        buffer.push_back(*ptr);
      } else if (*ptr == 0 || *ptr == '/' || *ptr == '?' || *ptr == '#' || (url->is_special && *ptr == '\\')) {
        if (buffer.size() > 5) return false;
        int length, port;
        if (sscanf(buffer.c_str(), "%d%n", &port, &length) != 1 || static_cast<size_t>(length) != buffer.size() || port > 65535) {
          return false;
        }
        url->port = (port == scheme_port(url->scheme.c_str()) ? 0 : port);
        buffer.clear();
        state = state_path_start;
        --ptr;
      } else {
        return false;
      }
      break;
    case state_file:
      url->scheme = "file";
      url->is_special = true;
      switch (*ptr) {
      case 0:
        if (base && base->scheme == "file") {
          copy_url(url, base, c_host | c_path | c_query);
        }
        break;
      case '\\':
        *errors = true;
        // fall through
      case '/':
        state = state_file_slash;
        break;
      case '?':
        if (base && base->scheme == "file") {
          copy_url(url, base, c_host | c_path);
        }
        state = state_query;
        break;
      case '#':
        if (base && base->scheme == "file") {
          copy_url(url, base, c_host | c_path | c_query);
        }
        state = state_fragment;
        break;
      default:
        if (base && base->scheme == "file" && (!std::isalpha(ptr[0]) || (ptr[1] != ':' && ptr[1] != '|') ||
          ptr[2] == 0 || (ptr[2] != '/' && ptr[2] != '\\' && ptr[2] != '?' && ptr[2] != '#')))
        {
          copy_url(url, base, c_host | c_path);
          pop_path(url->path);
        } else if (base && base->scheme == "file") {
          return false;
        }
        state = state_path;
        --ptr;
      }
      break;
    case state_file_slash:
      if (*ptr == '/' || *ptr == '\\') {
        if (*ptr == '\\') *errors = true;
        state = state_file_host;
      } else {
        if (base && base->scheme == "file" && base->path.size() && is_normalized_drive(base->path[0])) {
          url->path.push_back(base->path[0]);
        }
        state = state_path;
        --ptr;
      }
      break;
    case state_file_host:
      if (*ptr == 0 || *ptr == '/' || *ptr == '\\' || *ptr == '?' || *ptr == '#') {
        --ptr;
        if (is_drive(buffer)) {
          *errors = true;
          state = state_path;
        } else if (buffer.empty()) {
          state = state_path_start;
        } else {
          if (!parse_host(buffer, url, errors)) {
            return false;
          }
          if (url->host == "localhost") {
            url->host.clear();
          }
          buffer.clear();
          state = state_path_start;
        }
      } else {
        buffer.push_back(*ptr);
      }
      break;
    case state_path_start:
      if (url->is_special && *ptr == '\\') {
        *errors = true;
      }
      state = state_path;
      if (*ptr != '/' && (!url->is_special || *ptr != '\\')) {
        --ptr;
      }
      break;
    case state_path:
      if (*ptr == 0 || *ptr == '/' || (url->is_special && *ptr == '\\') || *ptr == '?' || *ptr == '#') {
        if (url->is_special && *ptr == '\\') {
          *errors = true;
        }
        if (buffer == "..") {
          pop_path(url->path);
          if (*ptr != '/' && (!url->is_special || *ptr != '\\')) {
            url->path.push_back("");
          }
        } else if (buffer == "." && *ptr != '/' && (!url->is_special || *ptr != '\\')) {
          url->path.push_back("");
        } else if (buffer != ".") {
          if (url->scheme == "file" && url->path.empty() && is_drive(buffer)) {
            if (url->host.size()) *errors = true;
            url->host.clear();
            buffer[1] = ':';
          }
          url->path.push_back(buffer);
        }
        buffer.clear();
        if (*ptr == '?') {
          state = state_query;
        } else if (*ptr == '#') {
          state = state_fragment;
        }
      } else {
        if (!url_code_point(*ptr) && *ptr != '%') {
          *errors = true;
        } else if (*ptr == '%' && (!std::isxdigit(ptr[1]) || !std::isxdigit(ptr[2]))) {
          *errors = true;
        }
        if (*ptr == '%' && ptr[1] == '2' && (ptr[2] == 'e' || ptr[2] == 'E')) {
          buffer.push_back('.');
          ptr += 2;
        } else {
          percent_encode<encode_default>(buffer, *ptr);
        }
      }
      break;
    case state_non_relative_path:
      if (*ptr == '?') {
        state = state_query;
      } else if (*ptr == '#') {
        state = state_fragment;
      } else {
        if (*ptr != 0 && !url_code_point(*ptr) && *ptr != '%') {
          *errors = true;
        } else if (*ptr == '%' && (!std::isxdigit(ptr[1]) || !std::isxdigit(ptr[2]))) {
          *errors = true;
        }
        if (*ptr) {
          percent_encode<encode_simple>(url->path[0], *ptr);
        }
      }
      break;
    case state_query:
      if (*ptr == '#') {
        state = state_fragment;
      } else {
        if (*ptr != 0 && !url_code_point(*ptr) && *ptr != '%') {
          *errors = true;
        } else if (*ptr == '%' && (!std::isxdigit(ptr[1]) || !std::isxdigit(ptr[2]))) {
          *errors = true;
        }
        if (*ptr) {
          percent_encode<encode_query>(url->query, *ptr);
        }
      }
      break;
    case state_fragment:
      if (*ptr != 0 && !url_code_point(*ptr) && *ptr != '%') {
        *errors = true;
      } else if (*ptr == '%' && (!std::isxdigit(ptr[1]) || !std::isxdigit(ptr[2]))) {
        *errors = true;
      }
      if (*ptr) {
        url->query.push_back(*ptr);
      }
      break;
    }

    if (ptr < input + length) {
      ++ptr;
    } else {
      break;
    }
  }

  return true;
}
Ejemplo n.º 2
0
static int
add_devs(di_node_t node, di_minor_t minor, void *arg)
{
	struct search_args	*args;
	int result = DI_WALK_CONTINUE;

	args = (struct search_args *)arg;

	if (dm_debug > 1) {
		/* This is all just debugging code */
		char	*devpath;
		char	dev_name[MAXPATHLEN];

		devpath = di_devfs_path(node);
		(void) snprintf(dev_name, sizeof (dev_name), "%s:%s", devpath,
		    di_minor_name(minor));
		di_devfs_path_free((void *) devpath);

		(void) fprintf(stderr,
		    "INFO: dev: %s, node: %s%d, minor: 0x%x, type: %s\n",
		    dev_name, di_node_name(node), di_instance(node),
		    di_minor_spectype(minor),
		    (di_minor_nodetype(minor) != NULL ?
		    di_minor_nodetype(minor) : "NULL"));
	}

	if (bus_type(node, minor, args->ph) != NULL) {
		if (add_bus(args, node, minor, NULL) == NULL) {
			args->dev_walk_status = ENOMEM;
			result = DI_WALK_TERMINATE;
		}

	} else if (is_ctrl(node, minor)) {
		if (add_controller(args, node, minor) == NULL) {
			args->dev_walk_status = ENOMEM;
			result = DI_WALK_TERMINATE;
		}

	} else if (di_minor_spectype(minor) == S_IFCHR &&
	    (is_drive(minor) || is_zvol(node, minor))) {
		char	*devidstr;
		char	kernel_name[MAXPATHLEN];
		disk_t	*diskp;

		(void) snprintf(kernel_name, sizeof (kernel_name), "%s%d",
		    di_node_name(node), di_instance(node));
		devidstr = get_str_prop(DEVICE_ID_PROP, node);

		args->node = node;
		args->minor = minor;
		/*
		 * Check if we already got this disk and
		 * this is another slice.
		 */
		if (!have_disk(args, devidstr, kernel_name, &diskp)) {
			args->dev_walk_status = 0;
			/*
			 * This is a newly found disk, create the
			 * disk structure.
			 */
			diskp = create_disk(devidstr, kernel_name, args);
			if (diskp == NULL) {
				args->dev_walk_status = ENOMEM;
			}

			if (diskp->drv_type != DM_DT_FLOPPY) {
				/* add the controller relationship */
				if (args->dev_walk_status == 0) {
					if (add_disk2controller(diskp,
					    args) != 0) {
						args->dev_walk_status = ENOMEM;
					}
				}
			}
		}
		if (is_zvol(node, minor)) {
			char zvdsk[MAXNAMELEN];
			char *str;
			alias_t *ap;

			if (di_prop_lookup_strings(di_minor_devt(minor),
			    node, "name", &str) == -1)
				return (DI_WALK_CONTINUE);
			(void) snprintf(zvdsk, MAXNAMELEN, "/dev/zvol/rdsk/%s",
			    str);
			if ((ap = find_alias(diskp, kernel_name)) == NULL) {
				if (new_alias(diskp, kernel_name,
				    zvdsk, args) != 0) {
					args->dev_walk_status = ENOMEM;
				}
			} else {
				/*
				 * It is possible that we have already added
				 * this devpath.
				 * Do not add it again. new_devpath will
				 * return a 0 if found, and not add the path.
				 */
				if (new_devpath(ap, zvdsk) != 0) {
					args->dev_walk_status = ENOMEM;
				}
			}
		}

		/* Add the devpaths for the drive. */
		if (args->dev_walk_status == 0) {
			char	*devpath;
			char	slice_path[MAXPATHLEN];
			char	*pattern;

			/*
			 * We will come through here once for each of
			 * the raw slice device names.
			 */
			devpath = di_devfs_path(node);
			(void) snprintf(slice_path,
			    sizeof (slice_path), "%s:%s",
			    devpath, di_minor_name(minor));
			di_devfs_path_free((void *) devpath);

			if (libdiskmgt_str_eq(di_minor_nodetype(minor),
			    DDI_NT_FD)) {
				pattern = DEVLINK_FLOPPY_REGEX;
			} else {
				pattern = DEVLINK_REGEX;
			}

			/* Walk the /dev tree to get the devlinks. */
			(void) di_devlink_walk(args->handle, pattern,
			    slice_path, DI_PRIMARY_LINK, arg, add_devpath);
		}

		if (args->dev_walk_status != 0) {
			result = DI_WALK_TERMINATE;
		}
	}

	return (result);
}