Exemple #1
0
/** Декодирует строку str из процентного представления */
std::string UrlParser::percent_decode(const std::string &str) const
{
    enum State {
	General,
	FirstPercentEncodedDigit,
	SecondPercentEncodedDigit
    } state = General;

    char first_char = '\0',
         second_char = '\0';
    std::stringstream result;

    for (std::string::const_iterator it = str.begin(); it != str.end(); it++) {
	switch (state) {

	case General:
	    if (*it != '%')
		result << *it;
	    else {
		state = FirstPercentEncodedDigit;
		first_char = second_char = '\0';
	    }
	    break;

	case FirstPercentEncodedDigit:
	    first_char = *it;
	    if (is_hex_digit(first_char)) {
		state = SecondPercentEncodedDigit;
	    } else {
		result << "%" << first_char;
		state = General;
	    }
	    break;

	case SecondPercentEncodedDigit:
	    second_char = *it;
	    if (is_hex_digit(second_char)) {

		result << char(
			(hex_digit_to_int(first_char) << 4) |
			 hex_digit_to_int(second_char));
		state = General;
	    } else {
		result << "%" << first_char << second_char;
		state = General;
	    }
	    break;
	}
    }
    return result.str();
}
size_t
url_decode_cstr(
	char *dest,
	const char*src,
	size_t max_size
) {
	size_t ret = 0;
#if DEBUG
	char*const dest_check = dest;
#endif

	if(!max_size--)
		goto bail;

	while(true) {
		const char src_char = *src++;
		if(!src_char) {
			break;
		}

		if(!max_size) {
			ret++;
			break;
		}

		if((src_char == '%')
			&& src[0]
			&& src[1]
			&& !(src[0] == '0' && src[1] == '0')
		) {
			*dest++ = (hex_digit_to_int(src[0]) << 4)
				+ hex_digit_to_int(src[1]);
			src += 2;
		} else if(src_char == '+') {
			*dest++ = ' ';  // Stupid legacy space encoding.
		} else {
			*dest++ = src_char;
		}

		ret++;
		max_size--;
	}

bail:
	*dest = 0;
#if DEBUG
	assert(strlen(dest_check)==ret);
#endif
	return ret;
}
size_t
url_decode_str(
	char *dest,
	size_t max_size,
	const char* src,		// Length determined by src_len.
	size_t src_len
) {
	size_t ret = 0;

	if(!max_size--)
		return 0;

	while(src_len--) {
		const char src_char = *src++;
		if(!src_char)
			break;

		if(!max_size) {
			ret++;
			break;
		}

		if((src_char == '%')
			&& (src_len>=2)
			&& src[0]
			&& src[1]
		) {
			*dest++ = (hex_digit_to_int(src[0]) << 4) + hex_digit_to_int(
				src[1]);
			src += 2;
			src_len -= 2;
		} else if(src_char == '+') {
			*dest++ = ' ';  // Stupid legacy space encoding.
		} else {
			*dest++ = src_char;
		}

		ret++;
		max_size--;
	}

	*dest = 0;

	return ret;
}
static int str_to_bin(char *line, void *buf, int maxlen)
{
	static int offset;
	unsigned char *p = buf;
	if (strlen(line) % 2 != 0)
		return -1;
	while (offset < maxlen && *line) {
		uint8_t value;
		char c = hex_digit_to_int(*line++);
		if (c < 0 || *line == '\0')
			return -1;
		value = c << 4;
		c = hex_digit_to_int(*line++);
		if (c < 0)
			return -1;
		value |= c;
		p[offset++] = value;
	}
	if (offset == maxlen && *line)
		return -1;
	return offset;
}
Exemple #5
0
/* Split a line into arguments, where every argument can be in the
 * following programming-language REPL-alike form:
 *
 * foo bar "newline are supported\n" and "\xff\x00otherstuff"
 *
 * The number of arguments is stored into *argc, and an array
 * of sds is returned. The caller should sdsfree() all the returned
 * strings and finally zfree() the array itself.
 *
 * Note that sdscatrepr() is able to convert back a string into
 * a quoted string in the same format sdssplitargs() is able to parse.
 */
sds *sdssplitargs(char *line, int *argc) {
    char *p = line;
    char *current = NULL;
    char **vector = NULL;

    *argc = 0;
    while(1) {
        /* skip blanks */
        while(*p && isspace(*p)) p++;
        if (*p) {
            /* get a token */
            int inq=0;  /* set to 1 if we are in "quotes" */
            int insq=0; /* set to 1 if we are in 'single quotes' */
            int done=0;

            if (current == NULL) current = sdsempty();
            while(!done) {
                if (inq) {
                    if (*p == '\\' && *(p+1) == 'x' &&
                                             is_hex_digit(*(p+2)) &&
                                             is_hex_digit(*(p+3)))
                    {
                        unsigned char byte;

                        byte = (hex_digit_to_int(*(p+2))*16)+
                                hex_digit_to_int(*(p+3));
                        current = sdscatlen(current,(char*)&byte,1);
                        p += 3;
                    } else if (*p == '\\' && *(p+1)) {
                        char c;

                        p++;
                        switch(*p) {
                        case 'n': c = '\n'; break;
                        case 'r': c = '\r'; break;
                        case 't': c = '\t'; break;
                        case 'b': c = '\b'; break;
                        case 'a': c = '\a'; break;
                        default: c = *p; break;
                        }
                        current = sdscatlen(current,&c,1);
                    } else if (*p == '"') {
                        /* closing quote must be followed by a space or
                         * nothing at all. */
                        if (*(p+1) && !isspace(*(p+1))) goto err;
                        done=1;
                    } else if (!*p) {
                        /* unterminated quotes */
                        goto err;
                    } else {
                        current = sdscatlen(current,p,1);
                    }
                } else if (insq) {
                    if (*p == '\\' && *(p+1) == '\'') {
                        p++;
                        current = sdscatlen(current,"'",1);
                    } else if (*p == '\'') {
                        /* closing quote must be followed by a space or
                         * nothing at all. */
                        if (*(p+1) && !isspace(*(p+1))) goto err;
                        done=1;
                    } else if (!*p) {
                        /* unterminated quotes */
                        goto err;
                    } else {
                        current = sdscatlen(current,p,1);
                    }
                } else {
                    switch(*p) {
                    case ' ':
                    case '\n':
                    case '\r':
                    case '\t':
                    case '\0':
                        done=1;
                        break;
                    case '"':
                        inq=1;
                        break;
                    case '\'':
                        insq=1;
                        break;
                    default:
                        current = sdscatlen(current,p,1);
                        break;
                    }
                }
                if (*p) p++;
            }
            /* add the token to the vector */
            vector = zrealloc(vector,((*argc)+1)*sizeof(char*));
            vector[*argc] = current;
            (*argc)++;
            current = NULL;
        } else {
            return vector;
        }
    }

err:
    while((*argc)--)
        sdsfree(vector[*argc]);
    zfree(vector);
    if (current) sdsfree(current);
    return NULL;
}
Exemple #6
0
dstr *dstr_split_args(const char *line, int *argc) {
	const char *p = line;
	dstr current = NULL;
	dstr *argv = NULL;

	*argc = 0;
	for (;;) {
		while (*p && isspace(*p))
			++p;
		if (*p) {
			int inq  = 0; /* 1 if in quotes */
			int insq = 0; /* 1 if in single quotes */
			int done = 0;

			if (current == NULL)
				current = dstr_new_len("", 0);
			while (!done) {
				/* FIXME */
				if (inq) {
					if (*p == '\\' && *(p + 1) == 'x' &&
						is_hex_digit(*(p + 2)) && is_hex_digit(*(p + 3))) {
						unsigned char byte = 16 * hex_digit_to_int(*(p + 2)) +
							hex_digit_to_int(*(p + 3));

						p += 3;
						current = dstr_cat_len(current, (char *)&byte, 1);
					} else if (*p == '\\' && *(p + 1)) {
						char c;

						++p;
						switch (*p) {
						case 'a':
							c = '\a';
							break;
						case 'b':
							c = '\b';
							break;
						case 'n':
							c = '\n';
							break;
						case 'r':
							c = '\r';
							break;
						case 't':
							c = '\t';
							break;
						default:
							c = *p;
							break;
						}
						current = dstr_cat_len(current, &c, 1);
					} else if (*p == '"') {
						/* closing quote must be followed by a space or not at all */
						if (*(p + 1) && !isspace(*(p + 1)))
							goto err;
						done = 1;
					/* unterminated quotes */
					} else if (*p == '\0')
						goto err;
					else
						current = dstr_cat_len(current, p, 1);
				} else if (insq) {
					if (*p == '\\' && *(p + 1) == '\'') {
						++p;
						current = dstr_cat_len(current, "'", 1);
					} else if (*p == '\'') {
						/* closing quote must be followed by a space or not at all */
						if (*(p + 1) && !isspace(*(p + 1)))
							goto err;
						done = 1;
					/* unterminated quotes */
					} else if (*p == '\0')
						goto err;
					else
						current = dstr_cat_len(current, p, 1);
				} else
					switch (*p) {
					case ' ':
					case '\0':
					case '\n':
					case '\r':
					case '\t':
						done = 1;
						break;
					case '"':
						inq = 1;
						break;
					case '\'':
						insq = 1;
						break;
					default:
						current = dstr_cat_len(current, p, 1);
						break;
					}
				if (*p)
					++p;
			}
			if (RESIZE(argv, (*argc + 1) * sizeof (char *)) == NULL)
				goto err;
			argv[*argc] = current;
			++*argc;
			current = NULL;
		} else
			return argv;
	}

err:
	{
		int i;

		for (i = 0; i < *argc; ++i)
			dstr_free(argv[i]);
		FREE(argv);
		if (current)
			dstr_free(current);
		return NULL;
	}
}
int UnescapeCEscapeSequences(const char* source, char* dest,
                             vector<string> *errors) {
  GOOGLE_DCHECK(errors == NULL) << "Error reporting not implemented.";

  char* d = dest;
  const char* p = source;

  // Small optimization for case where source = dest and there's no escaping
  while ( p == d && *p != '\0' && *p != '\\' )
    p++, d++;

  while (*p != '\0') {
    if (*p != '\\') {
      *d++ = *p++;
    } else {
      switch ( *++p ) {                    // skip past the '\\'
        case '\0':
          LOG_STRING(ERROR, errors) << "String cannot end with \\";
          *d = '\0';
          return d - dest;   // we're done with p
        case 'a':  *d++ = '\a';  break;
        case 'b':  *d++ = '\b';  break;
        case 'f':  *d++ = '\f';  break;
        case 'n':  *d++ = '\n';  break;
        case 'r':  *d++ = '\r';  break;
        case 't':  *d++ = '\t';  break;
        case 'v':  *d++ = '\v';  break;
        case '\\': *d++ = '\\';  break;
        case '?':  *d++ = '\?';  break;    // \?  Who knew?
        case '\'': *d++ = '\'';  break;
        case '"':  *d++ = '\"';  break;
        case '0': case '1': case '2': case '3':  // octal digit: 1 to 3 digits
        case '4': case '5': case '6': case '7': {
          char ch = *p - '0';
          if ( IS_OCTAL_DIGIT(p[1]) )
            ch = ch * 8 + *++p - '0';
          if ( IS_OCTAL_DIGIT(p[1]) )      // safe (and easy) to do this twice
            ch = ch * 8 + *++p - '0';      // now points at last digit
          *d++ = ch;
          break;
        }
        case 'x': case 'X': {
          if (!isxdigit(p[1])) {
            if (p[1] == '\0') {
              LOG_STRING(ERROR, errors) << "String cannot end with \\x";
            } else {
              LOG_STRING(ERROR, errors) <<
                "\\x cannot be followed by non-hex digit: \\" << *p << p[1];
            }
            break;
          }
          unsigned int ch = 0;
          const char *hex_start = p;
          while (isxdigit(p[1]))  // arbitrarily many hex digits
            ch = (ch << 4) + hex_digit_to_int(*++p);
          if (ch > 0xFF)
            LOG_STRING(ERROR, errors) << "Value of " <<
              "\\" << string(hex_start, p+1-hex_start) << " exceeds 8 bits";
          *d++ = ch;
          break;
        }
#if 0  // TODO(kenton):  Support \u and \U?  Requires runetochar().
        case 'u': {
          // \uhhhh => convert 4 hex digits to UTF-8
          char32 rune = 0;
          const char *hex_start = p;
          for (int i = 0; i < 4; ++i) {
            if (isxdigit(p[1])) {  // Look one char ahead.
              rune = (rune << 4) + hex_digit_to_int(*++p);  // Advance p.
            } else {
              LOG_STRING(ERROR, errors)
                << "\\u must be followed by 4 hex digits: \\"
                <<  string(hex_start, p+1-hex_start);
              break;
            }
          }
          d += runetochar(d, &rune);
          break;
        }
        case 'U': {
          // \Uhhhhhhhh => convert 8 hex digits to UTF-8
          char32 rune = 0;
          const char *hex_start = p;
          for (int i = 0; i < 8; ++i) {
            if (isxdigit(p[1])) {  // Look one char ahead.
              // Don't change rune until we're sure this
              // is within the Unicode limit, but do advance p.
              char32 newrune = (rune << 4) + hex_digit_to_int(*++p);
              if (newrune > 0x10FFFF) {
                LOG_STRING(ERROR, errors)
                  << "Value of \\"
                  << string(hex_start, p + 1 - hex_start)
                  << " exceeds Unicode limit (0x10FFFF)";
                break;
              } else {
                rune = newrune;
              }
            } else {
              LOG_STRING(ERROR, errors)
                << "\\U must be followed by 8 hex digits: \\"
                <<  string(hex_start, p+1-hex_start);
              break;
            }
          }
          d += runetochar(d, &rune);
          break;
        }
#endif
        default:
          LOG_STRING(ERROR, errors) << "Unknown escape sequence: \\" << *p;
      }
      p++;                                 // read past letter we escaped
    }
  }
  *d = '\0';
  return d - dest;
}