/** Декодирует строку str из процентного представления */ std::string UrlParser::percent_decode(const std::string &str) const { enum State { General, FirstPercentEncodedDigit, SecondPercentEncodedDigit } state = General; char first_char = '\0', second_char = '\0'; std::stringstream result; for (std::string::const_iterator it = str.begin(); it != str.end(); it++) { switch (state) { case General: if (*it != '%') result << *it; else { state = FirstPercentEncodedDigit; first_char = second_char = '\0'; } break; case FirstPercentEncodedDigit: first_char = *it; if (is_hex_digit(first_char)) { state = SecondPercentEncodedDigit; } else { result << "%" << first_char; state = General; } break; case SecondPercentEncodedDigit: second_char = *it; if (is_hex_digit(second_char)) { result << char( (hex_digit_to_int(first_char) << 4) | hex_digit_to_int(second_char)); state = General; } else { result << "%" << first_char << second_char; state = General; } break; } } return result.str(); }
size_t url_decode_cstr( char *dest, const char*src, size_t max_size ) { size_t ret = 0; #if DEBUG char*const dest_check = dest; #endif if(!max_size--) goto bail; while(true) { const char src_char = *src++; if(!src_char) { break; } if(!max_size) { ret++; break; } if((src_char == '%') && src[0] && src[1] && !(src[0] == '0' && src[1] == '0') ) { *dest++ = (hex_digit_to_int(src[0]) << 4) + hex_digit_to_int(src[1]); src += 2; } else if(src_char == '+') { *dest++ = ' '; // Stupid legacy space encoding. } else { *dest++ = src_char; } ret++; max_size--; } bail: *dest = 0; #if DEBUG assert(strlen(dest_check)==ret); #endif return ret; }
size_t url_decode_str( char *dest, size_t max_size, const char* src, // Length determined by src_len. size_t src_len ) { size_t ret = 0; if(!max_size--) return 0; while(src_len--) { const char src_char = *src++; if(!src_char) break; if(!max_size) { ret++; break; } if((src_char == '%') && (src_len>=2) && src[0] && src[1] ) { *dest++ = (hex_digit_to_int(src[0]) << 4) + hex_digit_to_int( src[1]); src += 2; src_len -= 2; } else if(src_char == '+') { *dest++ = ' '; // Stupid legacy space encoding. } else { *dest++ = src_char; } ret++; max_size--; } *dest = 0; return ret; }
static int str_to_bin(char *line, void *buf, int maxlen) { static int offset; unsigned char *p = buf; if (strlen(line) % 2 != 0) return -1; while (offset < maxlen && *line) { uint8_t value; char c = hex_digit_to_int(*line++); if (c < 0 || *line == '\0') return -1; value = c << 4; c = hex_digit_to_int(*line++); if (c < 0) return -1; value |= c; p[offset++] = value; } if (offset == maxlen && *line) return -1; return offset; }
/* Split a line into arguments, where every argument can be in the * following programming-language REPL-alike form: * * foo bar "newline are supported\n" and "\xff\x00otherstuff" * * The number of arguments is stored into *argc, and an array * of sds is returned. The caller should sdsfree() all the returned * strings and finally zfree() the array itself. * * Note that sdscatrepr() is able to convert back a string into * a quoted string in the same format sdssplitargs() is able to parse. */ sds *sdssplitargs(char *line, int *argc) { char *p = line; char *current = NULL; char **vector = NULL; *argc = 0; while(1) { /* skip blanks */ while(*p && isspace(*p)) p++; if (*p) { /* get a token */ int inq=0; /* set to 1 if we are in "quotes" */ int insq=0; /* set to 1 if we are in 'single quotes' */ int done=0; if (current == NULL) current = sdsempty(); while(!done) { if (inq) { if (*p == '\\' && *(p+1) == 'x' && is_hex_digit(*(p+2)) && is_hex_digit(*(p+3))) { unsigned char byte; byte = (hex_digit_to_int(*(p+2))*16)+ hex_digit_to_int(*(p+3)); current = sdscatlen(current,(char*)&byte,1); p += 3; } else if (*p == '\\' && *(p+1)) { char c; p++; switch(*p) { case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'b': c = '\b'; break; case 'a': c = '\a'; break; default: c = *p; break; } current = sdscatlen(current,&c,1); } else if (*p == '"') { /* closing quote must be followed by a space or * nothing at all. */ if (*(p+1) && !isspace(*(p+1))) goto err; done=1; } else if (!*p) { /* unterminated quotes */ goto err; } else { current = sdscatlen(current,p,1); } } else if (insq) { if (*p == '\\' && *(p+1) == '\'') { p++; current = sdscatlen(current,"'",1); } else if (*p == '\'') { /* closing quote must be followed by a space or * nothing at all. */ if (*(p+1) && !isspace(*(p+1))) goto err; done=1; } else if (!*p) { /* unterminated quotes */ goto err; } else { current = sdscatlen(current,p,1); } } else { switch(*p) { case ' ': case '\n': case '\r': case '\t': case '\0': done=1; break; case '"': inq=1; break; case '\'': insq=1; break; default: current = sdscatlen(current,p,1); break; } } if (*p) p++; } /* add the token to the vector */ vector = zrealloc(vector,((*argc)+1)*sizeof(char*)); vector[*argc] = current; (*argc)++; current = NULL; } else { return vector; } } err: while((*argc)--) sdsfree(vector[*argc]); zfree(vector); if (current) sdsfree(current); return NULL; }
dstr *dstr_split_args(const char *line, int *argc) { const char *p = line; dstr current = NULL; dstr *argv = NULL; *argc = 0; for (;;) { while (*p && isspace(*p)) ++p; if (*p) { int inq = 0; /* 1 if in quotes */ int insq = 0; /* 1 if in single quotes */ int done = 0; if (current == NULL) current = dstr_new_len("", 0); while (!done) { /* FIXME */ if (inq) { if (*p == '\\' && *(p + 1) == 'x' && is_hex_digit(*(p + 2)) && is_hex_digit(*(p + 3))) { unsigned char byte = 16 * hex_digit_to_int(*(p + 2)) + hex_digit_to_int(*(p + 3)); p += 3; current = dstr_cat_len(current, (char *)&byte, 1); } else if (*p == '\\' && *(p + 1)) { char c; ++p; switch (*p) { case 'a': c = '\a'; break; case 'b': c = '\b'; break; case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; default: c = *p; break; } current = dstr_cat_len(current, &c, 1); } else if (*p == '"') { /* closing quote must be followed by a space or not at all */ if (*(p + 1) && !isspace(*(p + 1))) goto err; done = 1; /* unterminated quotes */ } else if (*p == '\0') goto err; else current = dstr_cat_len(current, p, 1); } else if (insq) { if (*p == '\\' && *(p + 1) == '\'') { ++p; current = dstr_cat_len(current, "'", 1); } else if (*p == '\'') { /* closing quote must be followed by a space or not at all */ if (*(p + 1) && !isspace(*(p + 1))) goto err; done = 1; /* unterminated quotes */ } else if (*p == '\0') goto err; else current = dstr_cat_len(current, p, 1); } else switch (*p) { case ' ': case '\0': case '\n': case '\r': case '\t': done = 1; break; case '"': inq = 1; break; case '\'': insq = 1; break; default: current = dstr_cat_len(current, p, 1); break; } if (*p) ++p; } if (RESIZE(argv, (*argc + 1) * sizeof (char *)) == NULL) goto err; argv[*argc] = current; ++*argc; current = NULL; } else return argv; } err: { int i; for (i = 0; i < *argc; ++i) dstr_free(argv[i]); FREE(argv); if (current) dstr_free(current); return NULL; } }
int UnescapeCEscapeSequences(const char* source, char* dest, vector<string> *errors) { GOOGLE_DCHECK(errors == NULL) << "Error reporting not implemented."; char* d = dest; const char* p = source; // Small optimization for case where source = dest and there's no escaping while ( p == d && *p != '\0' && *p != '\\' ) p++, d++; while (*p != '\0') { if (*p != '\\') { *d++ = *p++; } else { switch ( *++p ) { // skip past the '\\' case '\0': LOG_STRING(ERROR, errors) << "String cannot end with \\"; *d = '\0'; return d - dest; // we're done with p case 'a': *d++ = '\a'; break; case 'b': *d++ = '\b'; break; case 'f': *d++ = '\f'; break; case 'n': *d++ = '\n'; break; case 'r': *d++ = '\r'; break; case 't': *d++ = '\t'; break; case 'v': *d++ = '\v'; break; case '\\': *d++ = '\\'; break; case '?': *d++ = '\?'; break; // \? Who knew? case '\'': *d++ = '\''; break; case '"': *d++ = '\"'; break; case '0': case '1': case '2': case '3': // octal digit: 1 to 3 digits case '4': case '5': case '6': case '7': { char ch = *p - '0'; if ( IS_OCTAL_DIGIT(p[1]) ) ch = ch * 8 + *++p - '0'; if ( IS_OCTAL_DIGIT(p[1]) ) // safe (and easy) to do this twice ch = ch * 8 + *++p - '0'; // now points at last digit *d++ = ch; break; } case 'x': case 'X': { if (!isxdigit(p[1])) { if (p[1] == '\0') { LOG_STRING(ERROR, errors) << "String cannot end with \\x"; } else { LOG_STRING(ERROR, errors) << "\\x cannot be followed by non-hex digit: \\" << *p << p[1]; } break; } unsigned int ch = 0; const char *hex_start = p; while (isxdigit(p[1])) // arbitrarily many hex digits ch = (ch << 4) + hex_digit_to_int(*++p); if (ch > 0xFF) LOG_STRING(ERROR, errors) << "Value of " << "\\" << string(hex_start, p+1-hex_start) << " exceeds 8 bits"; *d++ = ch; break; } #if 0 // TODO(kenton): Support \u and \U? Requires runetochar(). case 'u': { // \uhhhh => convert 4 hex digits to UTF-8 char32 rune = 0; const char *hex_start = p; for (int i = 0; i < 4; ++i) { if (isxdigit(p[1])) { // Look one char ahead. rune = (rune << 4) + hex_digit_to_int(*++p); // Advance p. } else { LOG_STRING(ERROR, errors) << "\\u must be followed by 4 hex digits: \\" << string(hex_start, p+1-hex_start); break; } } d += runetochar(d, &rune); break; } case 'U': { // \Uhhhhhhhh => convert 8 hex digits to UTF-8 char32 rune = 0; const char *hex_start = p; for (int i = 0; i < 8; ++i) { if (isxdigit(p[1])) { // Look one char ahead. // Don't change rune until we're sure this // is within the Unicode limit, but do advance p. char32 newrune = (rune << 4) + hex_digit_to_int(*++p); if (newrune > 0x10FFFF) { LOG_STRING(ERROR, errors) << "Value of \\" << string(hex_start, p + 1 - hex_start) << " exceeds Unicode limit (0x10FFFF)"; break; } else { rune = newrune; } } else { LOG_STRING(ERROR, errors) << "\\U must be followed by 8 hex digits: \\" << string(hex_start, p+1-hex_start); break; } } d += runetochar(d, &rune); break; } #endif default: LOG_STRING(ERROR, errors) << "Unknown escape sequence: \\" << *p; } p++; // read past letter we escaped } } *d = '\0'; return d - dest; }