void check_locale(void) { if(param.force_utf8) utf8env = 1; else { const char *cp; /* Check for env vars in proper oder. */ if((cp = getenv("LC_ALL")) == NULL && (cp = getenv("LC_CTYPE")) == NULL) cp = getenv("LANG"); if(is_utf8(cp)) utf8env = 1; } #if defined(HAVE_SETLOCALE) && defined(LC_CTYPE) /* To query, we need to set from environment... */ if(!utf8env && is_utf8(setlocale(LC_CTYPE, ""))) utf8env = 1; #endif #if defined(HAVE_NL_LANGINFO) && defined(CODESET) /* ...langinfo works after we set a locale, eh? So it makes sense after setlocale, if only. */ if(!utf8env && is_utf8(nl_langinfo(CODESET))) utf8env = 1; #endif debug1("UTF-8 locale: %i", utf8env); }
PyObject * is_frame_utf8(PyObject* self, PyObject* args) { PyFrameObject *frame = PyEval_GetFrame(); while (frame != NULL) { if (!(PyFrame_Check(frame) && is_utf8(frame->f_code->co_filename) && is_utf8(frame->f_code->co_name))) { Py_RETURN_FALSE; } frame = frame->f_back; } Py_RETURN_TRUE; }
bool to_utf8(const char* source_str, const size_t len, std::string & target_str) { if (is_utf8(source_str)) { target_str = source_str; return true; } else { iconv_t cd = iconv_open("UTF-8", "GB18030"); if ((iconv_t)-1 != cd) { size_t inlen = len; char *in = const_cast<char*>(source_str); size_t outlen = 4 * inlen; char *out = (char*)malloc(outlen); memset(out, '\0', outlen); if (NULL == out) { iconv_close(cd); return false; } char *pout = out; if ((size_t)-1 == iconv(cd, &in, &inlen, &pout, &outlen)) { iconv_close(cd); free(out); return false; } else { target_str = std::string(out); } free(out); iconv_close(cd); return true; } else { return false; } } }
/* The main conversion routine. ICY in CP-1252 (or UTF-8 alreay) to UTF-8 encoded string. */ char * icy2utf8(const char *src) { const uint8_t *s = (const uint8_t *)src; size_t srclen, dstlen, i, k; uint8_t ch, *d; char *dst; /* Some funny streams from Apple/iTunes give ICY info in UTF-8 already. So, be prepared and don't try to re-encode such. */ if(is_utf8(src)) return (strdup(src)); srclen = strlen(src) + 1; /* allocate conservatively */ if ((d = malloc(srclen * 3)) == NULL) return (NULL); i = 0; dstlen = 0; while (i < srclen) { ch = s[i++]; k = tblofs[ch]; while (k < tblofs[ch + 1]) d[dstlen++] = cp1252_utf8[k++]; } /* dstlen includes trailing NUL since srclen also does */ if ((dst = realloc(d, dstlen)) == NULL) { free(d); return (NULL); } return (dst); }
/* Return screen length of plain string */ static int scrlen_str(const char *str) { int len = 0; char *stripped; g_return_val_if_fail(str != NULL, 0); str = stripped = strip_codes(str); if (is_utf8() && g_utf8_validate(str, -1, NULL)) { while (*str != '\0') { gunichar c; c = g_utf8_get_char(str); str = g_utf8_next_char(str); len += unichar_isprint(c) ? mk_wcwidth(c) : 1; } } else { len = strlen(str); } g_free(stripped); return len; }
static int is_utf8_readline(FILE *stream, const char *file_path, int quiet, int verbose, int list_only, int invert) { char *string = NULL; size_t size = 0; ssize_t str_length; char *message = NULL; int lineno = 1; int pos = 0; int offset = 0; int faulty_bytes = 0; while ((str_length = getline(&string, &size, stream)) != -1) { pos = is_utf8((unsigned char*)string, str_length, &message, &faulty_bytes); if (message != NULL) { offset += pos; print_utf8_error(file_path, lineno, pos, offset, string, str_length, pos, message, faulty_bytes, quiet, verbose, list_only, invert); break; } offset += str_length; lineno += 1; } if (string != NULL) free(string); return message == NULL ? EXIT_SUCCESS : EXIT_FAILURE; }
/* * When there is no known charset, guess. * * Right now we assume that if the target is UTF-8 (the default), * and it already looks like UTF-8 (which includes US-ASCII as its * subset, of course) then that is what it is and there is nothing * to do. * * Otherwise, we default to assuming it is Latin1 for historical * reasons. */ static const char *guess_charset(const struct strbuf *line, const char *target_charset) { if (is_encoding_utf8(target_charset)) { if (is_utf8(line->buf)) return NULL; } return "ISO8859-1"; }
/** * Return utf8 version of print_path. * * @param info file information * @return utf8 string on success, NULL if couldn't convert. */ const char* file_info_get_utf8_print_path(struct file_info* info) { if(info->utf8_print_path == NULL) { if(is_utf8()) return info->print_path; info->utf8_print_path = to_utf8(info->print_path); } return info->utf8_print_path; }
/* * When there is no known charset, guess. * * Right now we assume that if the target is UTF-8 (the default), * and it already looks like UTF-8 (which includes US-ASCII as its * subset, of course) then that is what it is and there is nothing * to do. * * Otherwise, we default to assuming it is Latin1 for historical * reasons. */ static const char *guess_charset(const char *line, const char *target_charset) { if (is_encoding_utf8(target_charset)) { if (is_utf8(line)) return NULL; } return "latin1"; }
std::string CPPCMS_API from_utf8(char const *c_encoding,char const *begin,char const *end) { std::string result; if(is_utf8(c_encoding)) { result.assign(begin,end-begin); return result; } return locale::conv::from_utf<char>(begin,end,c_encoding); }
int main(int ac, char **av) { char buffer[BUFSIZE]; int read_retval; int pos; char *message; if (ac != 2) { fprintf(stderr, "USAGE: %s STRING or - for stdin.\n", av[0]); return EXIT_FAILURE; } if (strcmp(av[1], "-") == 0) { while (42) { read_retval = read(0, buffer, BUFSIZE); if (read_retval == 0) return EXIT_SUCCESS; if (read_retval == -1) { perror("read"); return EXIT_FAILURE; } pos = is_utf8((unsigned char*)buffer, read_retval, &message); if (message != NULL) { pretty_print_error_at(buffer, pos, message); return EXIT_FAILURE; } } return EXIT_SUCCESS; } pos = is_utf8((unsigned char*)av[1], strlen(av[1]), &message); if (message != NULL) pretty_print_error_at(av[1], pos, message); return message == NULL ? EXIT_SUCCESS : EXIT_FAILURE; }
static int varnam_learn_internal(varnam *handle, const char *word, int confidence) { int rc; varray *tokens; strbuf *sanitized_word; if (handle == NULL || word == NULL) return VARNAM_ARGS_ERROR; if (!is_words_store_available(handle)) { return VARNAM_ERROR; } if (!is_utf8 (word)) { set_last_error (handle, "Incorrect encoding. Expected UTF-8 string"); return VARNAM_ERROR; } tokens = get_pooled_array (handle); /* This removes all starting and trailing special characters from the word */ sanitized_word = sanitize_word (handle, word); rc = vst_tokenize (handle, strbuf_to_s (sanitized_word), VARNAM_TOKENIZER_VALUE, VARNAM_MATCH_ALL, tokens); if (rc) return rc; #ifdef _VARNAM_VERBOSE printf ("%s\n", "Tokens before reducing noice"); print_tokens_array (tokens); #endif /* Tokens may contain more data that we can handle. Reducing noice so that we learn most relevant combinations */ reduce_noise_in_tokens (tokens); #ifdef _VARNAM_VERBOSE printf ("%s\n", "Tokens after reducing noice"); print_tokens_array (tokens); #endif if (!can_learn_from_tokens (handle, tokens, strbuf_to_s (sanitized_word))) return VARNAM_ERROR; return vwt_persist_possibilities (handle, tokens, strbuf_to_s (sanitized_word), confidence); }
static void block_invalid_utf8_process(hook_data_privmsg_channel *data) { /* don't waste CPU if message is already blocked */ if (data->approved) { return; } if(!is_utf8(data->text)) { sendto_one_numeric(data->source_p, ERR_CANNOTSENDTOCHAN, form_str(ERR_CANNOTSENDTOCHAN), data->chptr->chname, "your message was badly formatted UTF-8 and this network enforces valid UTF-8"); data->approved = ERR_CANNOTSENDTOCHAN; return; } }
/* * Wrap the text, if necessary. The variable indent is the indent for the * first line, indent2 is the indent for all other lines. * If indent is negative, assume that already -indent columns have been * consumed (and no extra indent is necessary for the first line). */ int print_wrapped_text(const char *text, int indent, int indent2, int width) { int w = indent, assume_utf8 = is_utf8(text); const char *bol = text, *space = NULL; if (indent < 0) { w = -indent; space = text; } for (;;) { char c = *text; if (!c || isspace(c)) { if (w < width || !space) { const char *start = bol; if (space) start = space; else print_spaces(indent); fwrite(start, text - start, 1, stdout); if (!c) return w; else if (c == '\t') w |= 0x07; space = text; w++; text++; } else { putchar('\n'); text = bol = space + isspace(*space); space = NULL; w = indent = indent2; } continue; } if (assume_utf8) w += utf8_width(&text, NULL); else { w++; text++; } } }
int main() { char to_check[4096]; fread(to_check, 1, 4096, stdin); if (is_utf8(to_check)) { printf("UTF-8\n"); } else if (is_utf16be(to_check)) { printf("UTF-16 BE\n"); } else if (is_utf16le(to_check)) { printf("UTF-16 LE\n"); } else if (is_utf32be(to_check)) { printf("UTF-32 BE\n"); } else if (is_utf32le(to_check)) { printf("UTF-32 LE\n"); } else { printf("Unknown encoding\n"); } }
static int is_utf8_mmap(const char *file_path, int quiet, int verbose, int list_only, int invert) { char *addr; struct stat sb; int fd; int pos = 0; char *message; int retval = EXIT_SUCCESS; int error_column = 1; int error_line = 0; int faulty_bytes = 0; fd = open(file_path, O_RDONLY); if (fd == -1) handle_error("open", err_open); if (fstat(fd, &sb) == -1) /* To obtain file size */ handle_error("fstat", err_fstat); addr = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0); if (addr == MAP_FAILED) { /* Can't nmap, maybe a pipe or whatever, let's try readline. */ close(fd); return is_utf8_readline(fopen(file_path, "r"), file_path, quiet, verbose, list_only, invert); } pos = is_utf8((unsigned char*)addr, sb.st_size, &message, &faulty_bytes); if (message != NULL) count_lines(addr, sb.st_size, pos, &error_line, &error_column); print_utf8_error(file_path, error_line, error_column, pos, addr, sb.st_size, pos, message, faulty_bytes, quiet, verbose, list_only, invert); if (message != NULL) retval = EXIT_FAILURE; munmap(addr, sb.st_size); err_fstat: close(fd); err_open: return retval; }
static int write_zip_entry(struct archiver_args *args, const unsigned char *sha1, const char *path, size_t pathlen, unsigned int mode, int big_file_threshold, int zip_dir_size, int zip_dir_offset, int zip_dir, int zip_time, int zip_date, int zip_offset, int zip_dir_entries) { struct zip_local_header header; struct zip_dir_header dirent; struct zip_extra_mtime extra; unsigned long attr2 = 0; unsigned long compressed_size = 0; unsigned long crc = 0; unsigned long direntsize = 0; int method = 0; int out = 0; int deflated = 0; int buffer = 0; int stream = 0; unsigned long flags = 0; unsigned long size = 0; crc = crc32(0, NULL, 0); if (!has_only_ascii(path)) { if (is_utf8(path)) flags = LOR(flags,ZIP_UTF8); else warning("Path is not valid UTF-8: %s", path); } if (pathlen > 0xffff) { return error("path too long (%d chars, SHA1: %s): %s", (int)pathlen, sha1_to_hex(sha1), path); } if (S_ISDIR(mode) || S_ISGITLINK(mode)) { method = 0; attr2 = 16; out = NULL; size = 0; compressed_size = 0; buffer = NULL; size = 0; } else if (S_ISREG(mode) || S_ISLNK(mode)) { int type = sha1_object_info(sha1, &size); method = 0; if (S_ISLNK(mode)) { attr2 = ASL(LOR(mode,0777),16); } else if (LAND(mode,0111)) { attr2 = ASL(LOR(mode,0111),16); } if (S_ISREG(mode) && args->compression_level != 0 && size > 0) method = 8; compressed_size = size; if (S_ISREG(mode) && type == OBJ_BLOB && !args->convert && size > big_file_threshold) { stream = open_istream(sha1, &type, &size, NULL); if (!stream) return error("cannot stream blob %s", sha1_to_hex(sha1)); flags |= ZIP_STREAM; out = buffer = NULL; } else { buffer = sha1_file_to_archive(args, path, sha1, mode, &type, &size); if (!buffer) return error("cannot read %s", sha1_to_hex(sha1)); crc = crc32(crc, buffer, size); out = buffer; } } else { return error("unsupported file mode: 0%o (SHA1: %s)", mode, sha1_to_hex(sha1)); } // if (buffer && method == 8) { // deflated = zlib_deflate(buffer, size, args->compression_level, // &compressed_size); // if (deflated && compressed_size - 6 < size) { // /* ZLIB --> raw compressed data (see RFC 1950) */ // /* CMF and FLG ... */ // out = deflated + 2; // compressed_size -= 6; /* ... and ADLER32 */ // } else { // method = 0; // compressed_size = size; // } // } // // copy_le16(extra.magic, 0x5455); // copy_le16(extra.extra_size, ZIP_EXTRA_MTIME_PAYLOAD_SIZE); // extra.flags[0] = 1; /* just mtime */ // copy_le32(extra.mtime, args->time); // // /* make sure we have enough free space in the dictionary */ // direntsize = ZIP_DIR_HEADER_SIZE + pathlen + ZIP_EXTRA_MTIME_SIZE; // while (zip_dir_size < zip_dir_offset + direntsize) { // zip_dir_size += ZIP_DIRECTORY_MIN_SIZE; // zip_dir = xrealloc(zip_dir, zip_dir_size); // } // // copy_le32(dirent.magic, 0x02014b50); // copy_le16(dirent.creator_version, // S_ISLNK(mode) || (S_ISREG(mode) && (mode & 0111)) ? 0x0317 : 0); // copy_le16(dirent.version, 10); // copy_le16(dirent.flags, flags); // copy_le16(dirent.compression_method, method); // copy_le16(dirent.mtime, zip_time); // copy_le16(dirent.mdate, zip_date); // set_zip_dir_data_desc(&dirent, size, compressed_size, crc); // copy_le16(dirent.filename_length, pathlen); // copy_le16(dirent.extra_length, ZIP_EXTRA_MTIME_SIZE); // copy_le16(dirent.comment_length, 0); // copy_le16(dirent.disk, 0); // copy_le16(dirent.attr1, 0); // copy_le32(dirent.attr2, attr2); // copy_le32(dirent.offset, zip_offset); // // copy_le32(header.magic, 0x04034b50); // copy_le16(header.version, 10); // copy_le16(header.flags, flags); // copy_le16(header.compression_method, method); // copy_le16(header.mtime, zip_time); // copy_le16(header.mdate, zip_date); // set_zip_header_data_desc(&header, size, compressed_size, crc); // copy_le16(header.filename_length, pathlen); // copy_le16(header.extra_length, ZIP_EXTRA_MTIME_SIZE); // write_or_die(1, &header, ZIP_LOCAL_HEADER_SIZE); // zip_offset += ZIP_LOCAL_HEADER_SIZE; // write_or_die(1, path, pathlen); // zip_offset += pathlen; // write_or_die(1, &extra, ZIP_EXTRA_MTIME_SIZE); // zip_offset += ZIP_EXTRA_MTIME_SIZE; // if (stream && method == 0) { // unsigned char buf[STREAM_BUFFER_SIZE]; // ssize_t readlen = 0; // // for (;;) { // readlen = read_istream(stream, buf, sizeof(buf)); // if (readlen <= 0) // break; // crc = crc32(crc, buf, readlen); // write_or_die(1, buf, readlen); // } // close_istream(stream); // if (readlen) // return readlen; // // compressed_size = size; // zip_offset += compressed_size; // // write_zip_data_desc(size, compressed_size, crc); // zip_offset += ZIP_DATA_DESC_SIZE; // // set_zip_dir_data_desc(&dirent, size, compressed_size, crc); // } else if (stream && method == 8) { // int buf; // ssize_t readlen; // git_zstream zstream; // int result; // size_t out_len; // int compressed; // // memset(&zstream, 0, sizeof(zstream)); // git_deflate_init(&zstream, args->compression_level); // // compressed_size = 0; // zstream.next_out = compressed; // zstream.avail_out = sizeof(compressed); // // for (;;) { // readlen = read_istream(stream, buf, sizeof(buf)); // if (readlen <= 0) // break; // crc = crc32(crc, buf, readlen); // // zstream.next_in = buf; // zstream.avail_in = readlen; // result = git_deflate(&zstream, 0); // if (result != Z_OK) // die("deflate error (%d)", result); // out = compressed; // if (!compressed_size) // out += 2; // out_len = zstream.next_out - out; // // if (out_len > 0) { // write_or_die(1, out, out_len); // compressed_size += out_len; // zstream.next_out = compressed; // zstream.avail_out = sizeof(compressed); // } // // } // close_istream(stream); // if (readlen) // return readlen; // // zstream.next_in = buf; // zstream.avail_in = 0; // result = git_deflate(&zstream, Z_FINISH); // if (result != Z_STREAM_END) // die("deflate error (%d)", result); // // git_deflate_end(&zstream); // out = compressed; // if (!compressed_size) // out += 2; // out_len = zstream.next_out - out - 4; // write_or_die(1, out, out_len); // compressed_size += out_len; // zip_offset += compressed_size; // // write_zip_data_desc(size, compressed_size, crc); // zip_offset += ZIP_DATA_DESC_SIZE; // // set_zip_dir_data_desc(&dirent, size, compressed_size, crc); // } else if (compressed_size > 0) { // write_or_die(1, out, compressed_size); // zip_offset += compressed_size; // } // // free(deflated); // free(buffer); // // memcpy(zip_dir + zip_dir_offset, &dirent, ZIP_DIR_HEADER_SIZE); // zip_dir_offset += ZIP_DIR_HEADER_SIZE; // memcpy(zip_dir + zip_dir_offset, path, pathlen); // zip_dir_offset += pathlen; // memcpy(zip_dir + zip_dir_offset, &extra, ZIP_EXTRA_MTIME_SIZE); // zip_dir_offset += ZIP_EXTRA_MTIME_SIZE; // zip_dir_entries++; return 0; }
// Returns a new reference. static PyObject* decode_val(DecodeBuffer* input, TType type, PyObject* typeargs, long string_limit, long container_limit) { switch (type) { case T_BOOL: { int8_t v = readByte(input); if (INT_CONV_ERROR_OCCURRED(v)) { return NULL; } switch (v) { case 0: Py_RETURN_FALSE; case 1: Py_RETURN_TRUE; // Don't laugh. This is a potentially serious issue. default: PyErr_SetString(PyExc_TypeError, "boolean out of range"); return NULL; } break; } case T_I08: { int8_t v = readByte(input); if (INT_CONV_ERROR_OCCURRED(v)) { return NULL; } return PyInt_FromLong(v); } case T_I16: { int16_t v = readI16(input); if (INT_CONV_ERROR_OCCURRED(v)) { return NULL; } return PyInt_FromLong(v); } case T_I32: { int32_t v = readI32(input); if (INT_CONV_ERROR_OCCURRED(v)) { return NULL; } return PyInt_FromLong(v); } case T_I64: { int64_t v = readI64(input); if (INT_CONV_ERROR_OCCURRED(v)) { return NULL; } // TODO(dreiss): Find out if we can take this fastpath always when // sizeof(long) == sizeof(long long). if (CHECK_RANGE(v, LONG_MIN, LONG_MAX)) { return PyInt_FromLong((long) v); } return PyLong_FromLongLong(v); } case T_DOUBLE: { double v = readDouble(input); if (v == -1.0 && PyErr_Occurred()) { return false; } return PyFloat_FromDouble(v); } case T_STRING: { Py_ssize_t len = readI32(input); char* buf; if (!readBytes(input, &buf, len)) { return NULL; } if (!check_length_limit(len, string_limit)) { return NULL; } if (is_utf8(typeargs)) return PyUnicode_DecodeUTF8(buf, len, 0); else return PyString_FromStringAndSize(buf, len); } case T_LIST: case T_SET: { SetListTypeArgs parsedargs; int32_t len; PyObject* ret = NULL; int i; bool use_tuple = false; if (!parse_set_list_args(&parsedargs, typeargs)) { return NULL; } if (!checkTypeByte(input, parsedargs.element_type)) { return NULL; } len = readI32(input); if (!check_length_limit(len, container_limit)) { return NULL; } use_tuple = type == T_LIST && parsedargs.immutable; ret = use_tuple ? PyTuple_New(len) : PyList_New(len); if (!ret) { return NULL; } for (i = 0; i < len; i++) { PyObject* item = decode_val(input, parsedargs.element_type, parsedargs.typeargs, string_limit, container_limit); if (!item) { Py_DECREF(ret); return NULL; } if (use_tuple) { PyTuple_SET_ITEM(ret, i, item); } else { PyList_SET_ITEM(ret, i, item); } } // TODO(dreiss): Consider biting the bullet and making two separate cases // for list and set, avoiding this post facto conversion. if (type == T_SET) { PyObject* setret; setret = parsedargs.immutable ? PyFrozenSet_New(ret) : PySet_New(ret); Py_DECREF(ret); return setret; } return ret; } case T_MAP: { int32_t len; int i; MapTypeArgs parsedargs; PyObject* ret = NULL; if (!parse_map_args(&parsedargs, typeargs)) { return NULL; } if (!checkTypeByte(input, parsedargs.ktag)) { return NULL; } if (!checkTypeByte(input, parsedargs.vtag)) { return NULL; } len = readI32(input); if (!check_length_limit(len, container_limit)) { return NULL; } ret = PyDict_New(); if (!ret) { goto error; } for (i = 0; i < len; i++) { PyObject* k = NULL; PyObject* v = NULL; k = decode_val(input, parsedargs.ktag, parsedargs.ktypeargs, string_limit, container_limit); if (k == NULL) { goto loop_error; } v = decode_val(input, parsedargs.vtag, parsedargs.vtypeargs, string_limit, container_limit); if (v == NULL) { goto loop_error; } if (PyDict_SetItem(ret, k, v) == -1) { goto loop_error; } Py_DECREF(k); Py_DECREF(v); continue; // Yuck! Destructors, anyone? loop_error: Py_XDECREF(k); Py_XDECREF(v); goto error; } if (parsedargs.immutable) { PyObject* thrift = PyImport_ImportModule("thrift.Thrift"); PyObject* cls = NULL; PyObject* arg = NULL; if (!thrift) { goto error; } cls = PyObject_GetAttrString(thrift, "TFrozenDict"); if (!cls) { goto error; } arg = PyTuple_New(1); PyTuple_SET_ITEM(arg, 0, ret); return PyObject_CallObject(cls, arg); } return ret; error: Py_XDECREF(ret); return NULL; } case T_STRUCT: { StructTypeArgs parsedargs; if (!parse_struct_args(&parsedargs, typeargs)) { return NULL; } return decode_struct(input, Py_None, parsedargs.klass, parsedargs.spec, string_limit, container_limit); } case T_STOP: case T_VOID: case T_UTF16: case T_UTF8: case T_U64: default: PyErr_SetString(PyExc_TypeError, "Unexpected TType"); return NULL; } }
static bool output_val(PyObject* output, PyObject* value, TType type, PyObject* typeargs) { /* * Refcounting Strategy: * * We assume that elements of the thrift_spec tuple are not going to be * mutated, so we don't ref count those at all. Other than that, we try to * keep a reference to all the user-created objects while we work with them. * output_val assumes that a reference is already held. The *caller* is * responsible for handling references */ switch (type) { case T_BOOL: { int v = PyObject_IsTrue(value); if (v == -1) { return false; } writeByte(output, (int8_t) v); break; } case T_I08: { int32_t val; if (!parse_pyint(value, &val, INT8_MIN, INT8_MAX)) { return false; } writeByte(output, (int8_t) val); break; } case T_I16: { int32_t val; if (!parse_pyint(value, &val, INT16_MIN, INT16_MAX)) { return false; } writeI16(output, (int16_t) val); break; } case T_I32: { int32_t val; if (!parse_pyint(value, &val, INT32_MIN, INT32_MAX)) { return false; } writeI32(output, val); break; } case T_I64: { int64_t nval = PyLong_AsLongLong(value); if (INT_CONV_ERROR_OCCURRED(nval)) { return false; } if (!CHECK_RANGE(nval, INT64_MIN, INT64_MAX)) { PyErr_SetString(PyExc_OverflowError, "int out of range"); return false; } writeI64(output, nval); break; } case T_DOUBLE: { double nval = PyFloat_AsDouble(value); if (nval == -1.0 && PyErr_Occurred()) { return false; } writeDouble(output, nval); break; } case T_STRING: { Py_ssize_t len = 0; if (is_utf8(typeargs) && PyUnicode_Check(value)) value = PyUnicode_AsUTF8String(value); len = PyString_Size(value); if (!check_ssize_t_32(len)) { return false; } writeI32(output, (int32_t) len); PycStringIO->cwrite(output, PyString_AsString(value), (int32_t) len); break; } case T_LIST: case T_SET: { Py_ssize_t len; SetListTypeArgs parsedargs; PyObject *item; PyObject *iterator; if (!parse_set_list_args(&parsedargs, typeargs)) { return false; } len = PyObject_Length(value); if (!check_ssize_t_32(len)) { return false; } writeByte(output, parsedargs.element_type); writeI32(output, (int32_t) len); iterator = PyObject_GetIter(value); if (iterator == NULL) { return false; } while ((item = PyIter_Next(iterator))) { if (!output_val(output, item, parsedargs.element_type, parsedargs.typeargs)) { Py_DECREF(item); Py_DECREF(iterator); return false; } Py_DECREF(item); } Py_DECREF(iterator); if (PyErr_Occurred()) { return false; } break; } case T_MAP: { PyObject *k, *v; Py_ssize_t pos = 0; Py_ssize_t len; MapTypeArgs parsedargs; len = PyDict_Size(value); if (!check_ssize_t_32(len)) { return false; } if (!parse_map_args(&parsedargs, typeargs)) { return false; } writeByte(output, parsedargs.ktag); writeByte(output, parsedargs.vtag); writeI32(output, len); // TODO(bmaurer): should support any mapping, not just dicts while (PyDict_Next(value, &pos, &k, &v)) { // TODO(dreiss): Think hard about whether these INCREFs actually // turn any unsafe scenarios into safe scenarios. Py_INCREF(k); Py_INCREF(v); if (!output_val(output, k, parsedargs.ktag, parsedargs.ktypeargs) || !output_val(output, v, parsedargs.vtag, parsedargs.vtypeargs)) { Py_DECREF(k); Py_DECREF(v); return false; } Py_DECREF(k); Py_DECREF(v); } break; } // TODO(dreiss): Consider breaking this out as a function // the way we did for decode_struct. case T_STRUCT: { StructTypeArgs parsedargs; Py_ssize_t nspec; Py_ssize_t i; if (!parse_struct_args(&parsedargs, typeargs)) { return false; } nspec = PyTuple_Size(parsedargs.spec); if (nspec == -1) { return false; } for (i = 0; i < nspec; i++) { StructItemSpec parsedspec; PyObject* spec_tuple; PyObject* instval = NULL; spec_tuple = PyTuple_GET_ITEM(parsedargs.spec, i); if (spec_tuple == Py_None) { continue; } if (!parse_struct_item_spec (&parsedspec, spec_tuple)) { return false; } instval = PyObject_GetAttr(value, parsedspec.attrname); if (!instval) { return false; } if (instval == Py_None) { Py_DECREF(instval); continue; } writeByte(output, (int8_t) parsedspec.type); writeI16(output, parsedspec.tag); if (!output_val(output, instval, parsedspec.type, parsedspec.typeargs)) { Py_DECREF(instval); return false; } Py_DECREF(instval); } writeByte(output, (int8_t)T_STOP); break; } case T_STOP: case T_VOID: case T_UTF16: case T_UTF8: case T_U64: default: PyErr_SetString(PyExc_TypeError, "Unexpected TType"); return false; } return true; }
void throw_not_utf8(const std::string& text) { if (!is_utf8(text.c_str(), text.length())) throw std::invalid_argument("The text is not encoded in UTF8"); }
void throw_not_utf8(const char* text) { if (!is_utf8(text, std::strlen(text))) throw std::invalid_argument("The text is not encoded in UTF8"); }
/* * Wrap the text, if necessary. The variable indent is the indent for the * first line, indent2 is the indent for all other lines. * If indent is negative, assume that already -indent columns have been * consumed (and no extra indent is necessary for the first line). */ int strbuf_add_wrapped_text(struct strbuf *buf, const char *text, int indent, int indent2, int width) { int w = indent, assume_utf8 = is_utf8(text); const char *bol = text, *space = NULL; if (width <= 0) { strbuf_add_indented_text(buf, text, indent, indent2); return 1; } if (indent < 0) { w = -indent; space = text; } for (;;) { char c; size_t skip; while ((skip = display_mode_esc_sequence_len(text))) text += skip; c = *text; if (!c || isspace(c)) { if (w < width || !space) { const char *start = bol; if (!c && text == start) return w; if (space) start = space; else print_spaces(buf, indent); strbuf_write(buf, start, text - start); if (!c) return w; space = text; if (c == '\t') w |= 0x07; else if (c == '\n') { space++; if (*space == '\n') { strbuf_write(buf, "\n", 1); goto new_line; } else if (!isalnum(*space)) goto new_line; else strbuf_write(buf, " ", 1); } w++; text++; } else { new_line: strbuf_write(buf, "\n", 1); text = bol = space + isspace(*space); space = NULL; w = indent = indent2; } continue; } if (assume_utf8) w += utf8_width(&text, NULL); else { w++; text++; } } }
std::string CPPCMS_API from_utf8(char const *encoding,std::string const &str) { if(is_utf8(encoding)) return str; return from_utf8(encoding,str.data(),str.data()+str.size()); }
void throw_not_utf8(const char* text, unsigned len) { if (!is_utf8(text, len)) throw std::invalid_argument("The text is not encoded in UTF8"); }
int cmd_commit(int argc, const char **argv, const char *prefix) { int header_len; struct strbuf sb; const char *index_file, *reflog_msg; char *nl, *p; unsigned char commit_sha1[20]; struct ref_lock *ref_lock; git_config(git_commit_config); argc = parse_and_validate_options(argc, argv, builtin_commit_usage); index_file = prepare_index(argc, argv, prefix); /* Set up everything for writing the commit object. This includes running hooks, writing the trees, and interacting with the user. */ if (!prepare_to_commit(index_file, prefix)) { rollback_index_files(); return 1; } /* * The commit object */ strbuf_init(&sb, 0); strbuf_addf(&sb, "tree %s\n", sha1_to_hex(active_cache_tree->sha1)); /* Determine parents */ if (initial_commit) { reflog_msg = "commit (initial)"; } else if (amend) { struct commit_list *c; struct commit *commit; reflog_msg = "commit (amend)"; commit = lookup_commit(head_sha1); if (!commit || parse_commit(commit)) die("could not parse HEAD commit"); for (c = commit->parents; c; c = c->next) add_parent(&sb, c->item->object.sha1); } else if (in_merge) { struct strbuf m; FILE *fp; reflog_msg = "commit (merge)"; add_parent(&sb, head_sha1); strbuf_init(&m, 0); fp = fopen(git_path("MERGE_HEAD"), "r"); if (fp == NULL) die("could not open %s for reading: %s", git_path("MERGE_HEAD"), strerror(errno)); while (strbuf_getline(&m, fp, '\n') != EOF) { unsigned char sha1[20]; if (get_sha1_hex(m.buf, sha1) < 0) die("Corrupt MERGE_HEAD file (%s)", m.buf); add_parent(&sb, sha1); } fclose(fp); strbuf_release(&m); } else { reflog_msg = "commit"; strbuf_addf(&sb, "parent %s\n", sha1_to_hex(head_sha1)); } strbuf_addf(&sb, "author %s\n", fmt_ident(author_name, author_email, author_date, IDENT_ERROR_ON_NO_NAME)); strbuf_addf(&sb, "committer %s\n", git_committer_info(IDENT_ERROR_ON_NO_NAME)); if (!is_encoding_utf8(git_commit_encoding)) strbuf_addf(&sb, "encoding %s\n", git_commit_encoding); strbuf_addch(&sb, '\n'); /* Finally, get the commit message */ header_len = sb.len; if (strbuf_read_file(&sb, git_path(commit_editmsg), 0) < 0) { rollback_index_files(); die("could not read commit message"); } /* Truncate the message just before the diff, if any. */ p = strstr(sb.buf, "\ndiff --git a/"); if (p != NULL) strbuf_setlen(&sb, p - sb.buf + 1); if (cleanup_mode != CLEANUP_NONE) stripspace(&sb, cleanup_mode == CLEANUP_ALL); if (sb.len < header_len || message_is_empty(&sb, header_len)) { rollback_index_files(); die("no commit message? aborting commit."); } strbuf_addch(&sb, '\0'); if (is_encoding_utf8(git_commit_encoding) && !is_utf8(sb.buf)) fprintf(stderr, commit_utf8_warn); if (write_sha1_file(sb.buf, sb.len - 1, commit_type, commit_sha1)) { rollback_index_files(); die("failed to write commit object"); } ref_lock = lock_any_ref_for_update("HEAD", initial_commit ? NULL : head_sha1, 0); nl = strchr(sb.buf + header_len, '\n'); if (nl) strbuf_setlen(&sb, nl + 1 - sb.buf); else strbuf_addch(&sb, '\n'); strbuf_remove(&sb, 0, header_len); strbuf_insert(&sb, 0, reflog_msg, strlen(reflog_msg)); strbuf_insert(&sb, strlen(reflog_msg), ": ", 2); if (!ref_lock) { rollback_index_files(); die("cannot lock HEAD ref"); } if (write_ref_sha1(ref_lock, commit_sha1, sb.buf) < 0) { rollback_index_files(); die("cannot update HEAD ref"); } unlink(git_path("MERGE_HEAD")); unlink(git_path("MERGE_MSG")); unlink(git_path("SQUASH_MSG")); if (commit_index_files()) die ("Repository has been updated, but unable to write\n" "new_index file. Check that disk is not full or quota is\n" "not exceeded, and then \"git reset HEAD\" to recover."); rerere(); run_hook(get_index_file(), "post-commit", NULL); if (!quiet) print_summary(prefix, commit_sha1); return 0; }