Exemple #1
0
void check_locale(void)
{
	if(param.force_utf8) utf8env = 1;
	else
	{
		const char *cp;

		/* Check for env vars in proper oder. */
		if((cp = getenv("LC_ALL")) == NULL && (cp = getenv("LC_CTYPE")) == NULL)
		cp = getenv("LANG");

		if(is_utf8(cp)) utf8env = 1;
	}

#if defined(HAVE_SETLOCALE) && defined(LC_CTYPE)
	/* To query, we need to set from environment... */
	if(!utf8env && is_utf8(setlocale(LC_CTYPE, ""))) utf8env = 1;
#endif
#if defined(HAVE_NL_LANGINFO) && defined(CODESET)
	/* ...langinfo works after we set a locale, eh? So it makes sense after setlocale, if only. */
	if(!utf8env && is_utf8(nl_langinfo(CODESET))) utf8env = 1;
#endif

	debug1("UTF-8 locale: %i", utf8env);
}
PyObject *
is_frame_utf8(PyObject* self, PyObject* args)
{
    PyFrameObject *frame = PyEval_GetFrame();
    while (frame != NULL) {
        if (!(PyFrame_Check(frame)
                && is_utf8(frame->f_code->co_filename)
                && is_utf8(frame->f_code->co_name))) {
            Py_RETURN_FALSE;
        }
        frame = frame->f_back;
    }
    Py_RETURN_TRUE;
}
Exemple #3
0
bool to_utf8(const char* source_str, const size_t len, std::string & target_str)
{
    if (is_utf8(source_str)) {
        target_str = source_str;
        return true;
    } else {
        iconv_t cd = iconv_open("UTF-8", "GB18030");
        if ((iconv_t)-1 != cd) {
            size_t inlen = len;
            char *in = const_cast<char*>(source_str);
            size_t outlen = 4 * inlen;
            char *out = (char*)malloc(outlen);
            memset(out, '\0', outlen);
            if (NULL == out) {
                iconv_close(cd);
                return false;
            }
            char *pout = out;
            if ((size_t)-1 == iconv(cd, &in, &inlen, &pout, &outlen)) {
                iconv_close(cd);
                free(out);
                return false;
            } else {
                target_str = std::string(out);
            }
            free(out);
            iconv_close(cd);
            return true;
        } else {
            return false;
        }
    }
}
Exemple #4
0
/* The main conversion routine.
   ICY in CP-1252 (or UTF-8 alreay) to UTF-8 encoded string. */
char *
icy2utf8(const char *src)
{
	const uint8_t *s = (const uint8_t *)src;
	size_t srclen, dstlen, i, k;
	uint8_t ch, *d;
	char *dst;

	/* Some funny streams from Apple/iTunes give ICY info in UTF-8 already.
	   So, be prepared and don't try to re-encode such. */
	if(is_utf8(src)) return (strdup(src));

	srclen = strlen(src) + 1;
	/* allocate conservatively */
	if ((d = malloc(srclen * 3)) == NULL)
		return (NULL);

	i = 0;
	dstlen = 0;
	while (i < srclen) {
		ch = s[i++];
		k = tblofs[ch];
		while (k < tblofs[ch + 1])
			d[dstlen++] = cp1252_utf8[k++];
	}

	/* dstlen includes trailing NUL since srclen also does */
	if ((dst = realloc(d, dstlen)) == NULL) {
		free(d);
		return (NULL);
	}
	return (dst);
}
Exemple #5
0
/* Return screen length of plain string */
static int scrlen_str(const char *str)
{
	int len = 0;
	char *stripped;
	g_return_val_if_fail(str != NULL, 0);

	str = stripped = strip_codes(str);
	if (is_utf8() && g_utf8_validate(str, -1, NULL)) {

		while (*str != '\0') {
			gunichar c;

			c = g_utf8_get_char(str);
			str = g_utf8_next_char(str);

			len += unichar_isprint(c) ? mk_wcwidth(c) : 1;
		}

	} else {
		len = strlen(str);
	}

	g_free(stripped);
	return len;
}
Exemple #6
0
static int is_utf8_readline(FILE *stream, const char *file_path,
                            int quiet, int verbose, int list_only, int invert)
{
    char *string = NULL;
    size_t size = 0;
    ssize_t str_length;
    char *message = NULL;
    int lineno = 1;
    int pos = 0;
    int offset = 0;
    int faulty_bytes = 0;

    while ((str_length = getline(&string, &size, stream)) != -1)
    {
        pos = is_utf8((unsigned char*)string, str_length, &message, &faulty_bytes);
        if (message != NULL)
        {
            offset += pos;
            print_utf8_error(file_path, lineno, pos, offset,
                             string, str_length, pos, message, faulty_bytes,
                             quiet, verbose, list_only, invert);
            break;
        }
        offset += str_length;
        lineno += 1;
    }
    if (string != NULL)
        free(string);
    return message == NULL ? EXIT_SUCCESS : EXIT_FAILURE;
}
Exemple #7
0
/*
 * When there is no known charset, guess.
 *
 * Right now we assume that if the target is UTF-8 (the default),
 * and it already looks like UTF-8 (which includes US-ASCII as its
 * subset, of course) then that is what it is and there is nothing
 * to do.
 *
 * Otherwise, we default to assuming it is Latin1 for historical
 * reasons.
 */
static const char *guess_charset(const struct strbuf *line, const char *target_charset)
{
	if (is_encoding_utf8(target_charset)) {
		if (is_utf8(line->buf))
			return NULL;
	}
	return "ISO8859-1";
}
Exemple #8
0
/**
 * Return utf8 version of print_path.
 *
 * @param info file information
 * @return utf8 string on success, NULL if couldn't convert.
 */
const char* file_info_get_utf8_print_path(struct file_info* info)
{
	if(info->utf8_print_path == NULL) {
		if(is_utf8()) return info->print_path;
		info->utf8_print_path = to_utf8(info->print_path);
	}
	return info->utf8_print_path;
}
Exemple #9
0
/*
 * When there is no known charset, guess.
 *
 * Right now we assume that if the target is UTF-8 (the default),
 * and it already looks like UTF-8 (which includes US-ASCII as its
 * subset, of course) then that is what it is and there is nothing
 * to do.
 *
 * Otherwise, we default to assuming it is Latin1 for historical
 * reasons.
 */
static const char *guess_charset(const char *line, const char *target_charset)
{
	if (is_encoding_utf8(target_charset)) {
		if (is_utf8(line))
			return NULL;
	}
	return "latin1";
}
Exemple #10
0
std::string CPPCMS_API from_utf8(char const *c_encoding,char const *begin,char const *end)
{
	std::string result;
	if(is_utf8(c_encoding)) {
		result.assign(begin,end-begin);
		return result;
	}
	return locale::conv::from_utf<char>(begin,end,c_encoding);
}
Exemple #11
0
int main(int ac, char **av)
{
    char buffer[BUFSIZE];
    int  read_retval;
    int pos;
    char *message;

    if (ac != 2)
    {
        fprintf(stderr, "USAGE: %s STRING or - for stdin.\n", av[0]);
        return EXIT_FAILURE;
    }
    if (strcmp(av[1], "-") == 0)
    {
        while (42)
        {
            read_retval = read(0, buffer, BUFSIZE);
            if (read_retval == 0)
                return EXIT_SUCCESS;
            if (read_retval == -1)
            {
                perror("read");
                return EXIT_FAILURE;
            }
            pos = is_utf8((unsigned char*)buffer, read_retval, &message);
            if (message != NULL)
            {
                pretty_print_error_at(buffer, pos, message);
                return EXIT_FAILURE;
            }
        }
        return EXIT_SUCCESS;
    }
    pos = is_utf8((unsigned char*)av[1], strlen(av[1]), &message);
    if (message != NULL)
        pretty_print_error_at(av[1], pos, message);
    return message == NULL ? EXIT_SUCCESS : EXIT_FAILURE;
}
Exemple #12
0
static int
varnam_learn_internal(varnam *handle, const char *word, int confidence)
{
    int rc;
    varray *tokens;
    strbuf *sanitized_word;

    if (handle == NULL || word == NULL)
        return VARNAM_ARGS_ERROR;

    if (!is_words_store_available(handle)) {
        return VARNAM_ERROR;
    }

    if (!is_utf8 (word)) {
        set_last_error (handle, "Incorrect encoding. Expected UTF-8 string");
        return VARNAM_ERROR;
    }

    tokens = get_pooled_array (handle);

    /* This removes all starting and trailing special characters from the word */
    sanitized_word = sanitize_word (handle, word);

    rc = vst_tokenize (handle, strbuf_to_s (sanitized_word), VARNAM_TOKENIZER_VALUE, VARNAM_MATCH_ALL, tokens);
    if (rc) return rc;

#ifdef _VARNAM_VERBOSE
    printf ("%s\n", "Tokens before reducing noice");
    print_tokens_array (tokens);
#endif

    /* Tokens may contain more data that we can handle. Reducing noice so that we learn most relevant combinations */
    reduce_noise_in_tokens (tokens);

#ifdef _VARNAM_VERBOSE
    printf ("%s\n", "Tokens after reducing noice");
    print_tokens_array (tokens);
#endif

    if (!can_learn_from_tokens (handle, tokens, strbuf_to_s (sanitized_word)))
        return VARNAM_ERROR;

    return vwt_persist_possibilities (handle,
                                      tokens,
                                      strbuf_to_s (sanitized_word),
                                      confidence);
}
static void
block_invalid_utf8_process(hook_data_privmsg_channel *data)
{
    /* don't waste CPU if message is already blocked */
    if (data->approved) {
        return;
    }

    if(!is_utf8(data->text)) {
        sendto_one_numeric(data->source_p,
                           ERR_CANNOTSENDTOCHAN,
                           form_str(ERR_CANNOTSENDTOCHAN),
                           data->chptr->chname,
                           "your message was badly formatted UTF-8 and this network enforces valid UTF-8");
        data->approved = ERR_CANNOTSENDTOCHAN;
        return;
    }
}
Exemple #14
0
/*
 * Wrap the text, if necessary. The variable indent is the indent for the
 * first line, indent2 is the indent for all other lines.
 * If indent is negative, assume that already -indent columns have been
 * consumed (and no extra indent is necessary for the first line).
 */
int print_wrapped_text(const char *text, int indent, int indent2, int width)
{
	int w = indent, assume_utf8 = is_utf8(text);
	const char *bol = text, *space = NULL;

	if (indent < 0) {
		w = -indent;
		space = text;
	}

	for (;;) {
		char c = *text;
		if (!c || isspace(c)) {
			if (w < width || !space) {
				const char *start = bol;
				if (space)
					start = space;
				else
					print_spaces(indent);
				fwrite(start, text - start, 1, stdout);
				if (!c)
					return w;
				else if (c == '\t')
					w |= 0x07;
				space = text;
				w++;
				text++;
			}
			else {
				putchar('\n');
				text = bol = space + isspace(*space);
				space = NULL;
				w = indent = indent2;
			}
			continue;
		}
		if (assume_utf8)
			w += utf8_width(&text, NULL);
		else {
			w++;
			text++;
		}
	}
}
Exemple #15
0
int main()
{
    char to_check[4096];
    fread(to_check, 1, 4096, stdin);

    if (is_utf8(to_check)) {
        printf("UTF-8\n");
    } else if (is_utf16be(to_check)) {
        printf("UTF-16 BE\n");
    } else if (is_utf16le(to_check)) {
        printf("UTF-16 LE\n");
    } else if (is_utf32be(to_check)) {
        printf("UTF-32 BE\n");
    } else if (is_utf32le(to_check)) {
        printf("UTF-32 LE\n");
    } else {
        printf("Unknown encoding\n");
    }
}
Exemple #16
0
static int is_utf8_mmap(const char *file_path, int quiet, int verbose,
                        int list_only, int invert)
{
    char *addr;
    struct stat sb;
    int fd;
    int pos = 0;
    char *message;
    int retval = EXIT_SUCCESS;
    int error_column = 1;
    int error_line = 0;
    int faulty_bytes = 0;

    fd = open(file_path, O_RDONLY);
    if (fd == -1)
        handle_error("open", err_open);
    if (fstat(fd, &sb) == -1)           /* To obtain file size */
        handle_error("fstat", err_fstat);
    addr = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
    if (addr == MAP_FAILED)
    {
        /* Can't nmap, maybe a pipe or whatever, let's try readline. */
        close(fd);
        return is_utf8_readline(fopen(file_path, "r"), file_path,
                                quiet, verbose, list_only, invert);
    }
    pos = is_utf8((unsigned char*)addr, sb.st_size, &message, &faulty_bytes);
    if (message != NULL)
        count_lines(addr, sb.st_size, pos, &error_line, &error_column);
    print_utf8_error(file_path, error_line, error_column, pos,
                     addr, sb.st_size, pos, message, faulty_bytes,
                     quiet, verbose, list_only, invert);
    if (message != NULL)
        retval = EXIT_FAILURE;
    munmap(addr, sb.st_size);
err_fstat:
    close(fd);
err_open:
    return retval;
}
static int write_zip_entry(struct archiver_args *args,
		const unsigned char *sha1,
		const char *path, size_t pathlen,
		unsigned int mode, int big_file_threshold,
		int zip_dir_size, int zip_dir_offset, int zip_dir,
		int zip_time, int zip_date, int zip_offset, int zip_dir_entries)
{
	struct zip_local_header header;
	struct zip_dir_header dirent;
	struct zip_extra_mtime extra;
	unsigned long attr2 = 0;
	unsigned long compressed_size = 0;
	unsigned long crc = 0;
	unsigned long direntsize = 0;
	int method = 0;
	int out = 0;
	int deflated = 0;
	int buffer = 0;
	int stream = 0;
	unsigned long flags = 0;
	unsigned long size = 0;

	crc = crc32(0, NULL, 0);

	if (!has_only_ascii(path)) {
		if (is_utf8(path))
			flags = LOR(flags,ZIP_UTF8);
		else
			warning("Path is not valid UTF-8: %s", path);
	}

	if (pathlen > 0xffff) {
		return error("path too long (%d chars, SHA1: %s): %s",
				(int)pathlen, sha1_to_hex(sha1), path);
	}

	if (S_ISDIR(mode) || S_ISGITLINK(mode)) {
		method = 0;
		attr2 = 16;
		out = NULL;
		size = 0;
		compressed_size = 0;
		buffer = NULL;
		size = 0;
	} else if (S_ISREG(mode) || S_ISLNK(mode)) {
		int type = sha1_object_info(sha1, &size);

		method = 0;
		if (S_ISLNK(mode)) {
			attr2 = ASL(LOR(mode,0777),16);
		} else if (LAND(mode,0111)) {
			attr2 = ASL(LOR(mode,0111),16);
		}
		if (S_ISREG(mode) && args->compression_level != 0 && size > 0)
			method = 8;
		compressed_size = size;

		if (S_ISREG(mode) && type == OBJ_BLOB && !args->convert &&
				size > big_file_threshold) {
			stream = open_istream(sha1, &type, &size, NULL);
			if (!stream)
				return error("cannot stream blob %s",
						sha1_to_hex(sha1));
			flags |= ZIP_STREAM;
			out = buffer = NULL;
		} else {
			buffer = sha1_file_to_archive(args, path, sha1, mode,
					&type, &size);
			if (!buffer)
				return error("cannot read %s",
						sha1_to_hex(sha1));
			crc = crc32(crc, buffer, size);
			out = buffer;
		}
	} else {
		return error("unsupported file mode: 0%o (SHA1: %s)", mode,
				sha1_to_hex(sha1));
	}

//	if (buffer && method == 8) {
//		deflated = zlib_deflate(buffer, size, args->compression_level,
//				&compressed_size);
//		if (deflated && compressed_size - 6 < size) {
//			/* ZLIB --> raw compressed data (see RFC 1950) */
//			/* CMF and FLG ... */
//			out = deflated + 2;
//			compressed_size -= 6;	/* ... and ADLER32 */
//		} else {
//			method = 0;
//			compressed_size = size;
//		}
//	}
//
//	copy_le16(extra.magic, 0x5455);
//	copy_le16(extra.extra_size, ZIP_EXTRA_MTIME_PAYLOAD_SIZE);
//	extra.flags[0] = 1;	/* just mtime */
//	copy_le32(extra.mtime, args->time);
//
//	/* make sure we have enough free space in the dictionary */
//	direntsize = ZIP_DIR_HEADER_SIZE + pathlen + ZIP_EXTRA_MTIME_SIZE;
//	while (zip_dir_size < zip_dir_offset + direntsize) {
//		zip_dir_size += ZIP_DIRECTORY_MIN_SIZE;
//		zip_dir = xrealloc(zip_dir, zip_dir_size);
//	}
//
//	copy_le32(dirent.magic, 0x02014b50);
//	copy_le16(dirent.creator_version,
//			S_ISLNK(mode) || (S_ISREG(mode) && (mode & 0111)) ? 0x0317 : 0);
//	copy_le16(dirent.version, 10);
//	copy_le16(dirent.flags, flags);
//	copy_le16(dirent.compression_method, method);
//	copy_le16(dirent.mtime, zip_time);
//	copy_le16(dirent.mdate, zip_date);
//	set_zip_dir_data_desc(&dirent, size, compressed_size, crc);
//	copy_le16(dirent.filename_length, pathlen);
//	copy_le16(dirent.extra_length, ZIP_EXTRA_MTIME_SIZE);
//	copy_le16(dirent.comment_length, 0);
//	copy_le16(dirent.disk, 0);
//	copy_le16(dirent.attr1, 0);
//	copy_le32(dirent.attr2, attr2);
//	copy_le32(dirent.offset, zip_offset);
//
//	copy_le32(header.magic, 0x04034b50);
//	copy_le16(header.version, 10);
//	copy_le16(header.flags, flags);
//	copy_le16(header.compression_method, method);
//	copy_le16(header.mtime, zip_time);
//	copy_le16(header.mdate, zip_date);
//	set_zip_header_data_desc(&header, size, compressed_size, crc);
//	copy_le16(header.filename_length, pathlen);
//	copy_le16(header.extra_length, ZIP_EXTRA_MTIME_SIZE);
//	write_or_die(1, &header, ZIP_LOCAL_HEADER_SIZE);
//	zip_offset += ZIP_LOCAL_HEADER_SIZE;
//	write_or_die(1, path, pathlen);
//	zip_offset += pathlen;
//	write_or_die(1, &extra, ZIP_EXTRA_MTIME_SIZE);
//	zip_offset += ZIP_EXTRA_MTIME_SIZE;
//	if (stream && method == 0) {
//		unsigned char buf[STREAM_BUFFER_SIZE];
//		ssize_t readlen = 0;
//
//		for (;;) {
//			readlen = read_istream(stream, buf, sizeof(buf));
//			if (readlen <= 0)
//				break;
//			crc = crc32(crc, buf, readlen);
//			write_or_die(1, buf, readlen);
//		}
//		close_istream(stream);
//		if (readlen)
//			return readlen;
//
//		compressed_size = size;
//		zip_offset += compressed_size;
//
//		write_zip_data_desc(size, compressed_size, crc);
//		zip_offset += ZIP_DATA_DESC_SIZE;
//
//		set_zip_dir_data_desc(&dirent, size, compressed_size, crc);
//	} else if (stream && method == 8) {
//		int buf;
//		ssize_t readlen;
//		git_zstream zstream;
//		int result;
//		size_t out_len;
//		int compressed;
//
//		memset(&zstream, 0, sizeof(zstream));
//		git_deflate_init(&zstream, args->compression_level);
//
//		compressed_size = 0;
//		zstream.next_out = compressed;
//		zstream.avail_out = sizeof(compressed);
//
//		for (;;) {
//			readlen = read_istream(stream, buf, sizeof(buf));
//			if (readlen <= 0)
//				break;
//			crc = crc32(crc, buf, readlen);
//
//			zstream.next_in = buf;
//			zstream.avail_in = readlen;
//			result = git_deflate(&zstream, 0);
//			if (result != Z_OK)
//				die("deflate error (%d)", result);
//			out = compressed;
//			if (!compressed_size)
//				out += 2;
//			out_len = zstream.next_out - out;
//
//			if (out_len > 0) {
//				write_or_die(1, out, out_len);
//				compressed_size += out_len;
//				zstream.next_out = compressed;
//				zstream.avail_out = sizeof(compressed);
//			}
//
//		}
//		close_istream(stream);
//		if (readlen)
//			return readlen;
//
//		zstream.next_in = buf;
//		zstream.avail_in = 0;
//		result = git_deflate(&zstream, Z_FINISH);
//		if (result != Z_STREAM_END)
//			die("deflate error (%d)", result);
//
//		git_deflate_end(&zstream);
//		out = compressed;
//		if (!compressed_size)
//			out += 2;
//		out_len = zstream.next_out - out - 4;
//		write_or_die(1, out, out_len);
//		compressed_size += out_len;
//		zip_offset += compressed_size;
//
//		write_zip_data_desc(size, compressed_size, crc);
//		zip_offset += ZIP_DATA_DESC_SIZE;
//
//		set_zip_dir_data_desc(&dirent, size, compressed_size, crc);
//	} else if (compressed_size > 0) {
//		write_or_die(1, out, compressed_size);
//		zip_offset += compressed_size;
//	}
//
//	free(deflated);
//	free(buffer);
//
//	memcpy(zip_dir + zip_dir_offset, &dirent, ZIP_DIR_HEADER_SIZE);
//	zip_dir_offset += ZIP_DIR_HEADER_SIZE;
//	memcpy(zip_dir + zip_dir_offset, path, pathlen);
//	zip_dir_offset += pathlen;
//	memcpy(zip_dir + zip_dir_offset, &extra, ZIP_EXTRA_MTIME_SIZE);
//	zip_dir_offset += ZIP_EXTRA_MTIME_SIZE;
//	zip_dir_entries++;

	return 0;
}
Exemple #18
0
// Returns a new reference.
static PyObject*
decode_val(DecodeBuffer* input, TType type, PyObject* typeargs, long string_limit, long container_limit) {
    switch (type) {

    case T_BOOL: {
        int8_t v = readByte(input);
        if (INT_CONV_ERROR_OCCURRED(v)) {
            return NULL;
        }

        switch (v) {
        case 0:
            Py_RETURN_FALSE;
        case 1:
            Py_RETURN_TRUE;
        // Don't laugh.  This is a potentially serious issue.
        default:
            PyErr_SetString(PyExc_TypeError, "boolean out of range");
            return NULL;
        }
        break;
    }
    case T_I08: {
        int8_t v = readByte(input);
        if (INT_CONV_ERROR_OCCURRED(v)) {
            return NULL;
        }

        return PyInt_FromLong(v);
    }
    case T_I16: {
        int16_t v = readI16(input);
        if (INT_CONV_ERROR_OCCURRED(v)) {
            return NULL;
        }
        return PyInt_FromLong(v);
    }
    case T_I32: {
        int32_t v = readI32(input);
        if (INT_CONV_ERROR_OCCURRED(v)) {
            return NULL;
        }
        return PyInt_FromLong(v);
    }

    case T_I64: {
        int64_t v = readI64(input);
        if (INT_CONV_ERROR_OCCURRED(v)) {
            return NULL;
        }
        // TODO(dreiss): Find out if we can take this fastpath always when
        //               sizeof(long) == sizeof(long long).
        if (CHECK_RANGE(v, LONG_MIN, LONG_MAX)) {
            return PyInt_FromLong((long) v);
        }

        return PyLong_FromLongLong(v);
    }

    case T_DOUBLE: {
        double v = readDouble(input);
        if (v == -1.0 && PyErr_Occurred()) {
            return false;
        }
        return PyFloat_FromDouble(v);
    }

    case T_STRING: {
        Py_ssize_t len = readI32(input);
        char* buf;
        if (!readBytes(input, &buf, len)) {
            return NULL;
        }
        if (!check_length_limit(len, string_limit)) {
            return NULL;
        }

        if (is_utf8(typeargs))
            return PyUnicode_DecodeUTF8(buf, len, 0);
        else
            return PyString_FromStringAndSize(buf, len);
    }

    case T_LIST:
    case T_SET: {
        SetListTypeArgs parsedargs;
        int32_t len;
        PyObject* ret = NULL;
        int i;
        bool use_tuple = false;

        if (!parse_set_list_args(&parsedargs, typeargs)) {
            return NULL;
        }

        if (!checkTypeByte(input, parsedargs.element_type)) {
            return NULL;
        }

        len = readI32(input);
        if (!check_length_limit(len, container_limit)) {
            return NULL;
        }

        use_tuple = type == T_LIST && parsedargs.immutable;
        ret = use_tuple ? PyTuple_New(len) : PyList_New(len);
        if (!ret) {
            return NULL;
        }

        for (i = 0; i < len; i++) {
            PyObject* item = decode_val(input, parsedargs.element_type, parsedargs.typeargs, string_limit, container_limit);
            if (!item) {
                Py_DECREF(ret);
                return NULL;
            }
            if (use_tuple) {
                PyTuple_SET_ITEM(ret, i, item);
            } else  {
                PyList_SET_ITEM(ret, i, item);
            }
        }

        // TODO(dreiss): Consider biting the bullet and making two separate cases
        //               for list and set, avoiding this post facto conversion.
        if (type == T_SET) {
            PyObject* setret;
            setret = parsedargs.immutable ? PyFrozenSet_New(ret) : PySet_New(ret);
            Py_DECREF(ret);
            return setret;
        }
        return ret;
    }

    case T_MAP: {
        int32_t len;
        int i;
        MapTypeArgs parsedargs;
        PyObject* ret = NULL;

        if (!parse_map_args(&parsedargs, typeargs)) {
            return NULL;
        }

        if (!checkTypeByte(input, parsedargs.ktag)) {
            return NULL;
        }
        if (!checkTypeByte(input, parsedargs.vtag)) {
            return NULL;
        }

        len = readI32(input);
        if (!check_length_limit(len, container_limit)) {
            return NULL;
        }

        ret = PyDict_New();
        if (!ret) {
            goto error;
        }

        for (i = 0; i < len; i++) {
            PyObject* k = NULL;
            PyObject* v = NULL;
            k = decode_val(input, parsedargs.ktag, parsedargs.ktypeargs, string_limit, container_limit);
            if (k == NULL) {
                goto loop_error;
            }
            v = decode_val(input, parsedargs.vtag, parsedargs.vtypeargs, string_limit, container_limit);
            if (v == NULL) {
                goto loop_error;
            }
            if (PyDict_SetItem(ret, k, v) == -1) {
                goto loop_error;
            }

            Py_DECREF(k);
            Py_DECREF(v);
            continue;

            // Yuck!  Destructors, anyone?
loop_error:
            Py_XDECREF(k);
            Py_XDECREF(v);
            goto error;
        }

        if (parsedargs.immutable) {
            PyObject* thrift = PyImport_ImportModule("thrift.Thrift");
            PyObject* cls = NULL;
            PyObject* arg = NULL;
            if (!thrift) {
                goto error;
            }
            cls = PyObject_GetAttrString(thrift, "TFrozenDict");
            if (!cls) {
                goto error;
            }
            arg = PyTuple_New(1);
            PyTuple_SET_ITEM(arg, 0, ret);
            return PyObject_CallObject(cls, arg);
        }

        return ret;

error:
        Py_XDECREF(ret);
        return NULL;
    }

    case T_STRUCT: {
        StructTypeArgs parsedargs;
        if (!parse_struct_args(&parsedargs, typeargs)) {
            return NULL;
        }

        return decode_struct(input, Py_None, parsedargs.klass, parsedargs.spec, string_limit, container_limit);
    }

    case T_STOP:
    case T_VOID:
    case T_UTF16:
    case T_UTF8:
    case T_U64:
    default:
        PyErr_SetString(PyExc_TypeError, "Unexpected TType");
        return NULL;
    }
}
Exemple #19
0
static bool
output_val(PyObject* output, PyObject* value, TType type, PyObject* typeargs) {
    /*
     * Refcounting Strategy:
     *
     * We assume that elements of the thrift_spec tuple are not going to be
     * mutated, so we don't ref count those at all. Other than that, we try to
     * keep a reference to all the user-created objects while we work with them.
     * output_val assumes that a reference is already held. The *caller* is
     * responsible for handling references
     */

    switch (type) {

    case T_BOOL: {
        int v = PyObject_IsTrue(value);
        if (v == -1) {
            return false;
        }

        writeByte(output, (int8_t) v);
        break;
    }
    case T_I08: {
        int32_t val;

        if (!parse_pyint(value, &val, INT8_MIN, INT8_MAX)) {
            return false;
        }

        writeByte(output, (int8_t) val);
        break;
    }
    case T_I16: {
        int32_t val;

        if (!parse_pyint(value, &val, INT16_MIN, INT16_MAX)) {
            return false;
        }

        writeI16(output, (int16_t) val);
        break;
    }
    case T_I32: {
        int32_t val;

        if (!parse_pyint(value, &val, INT32_MIN, INT32_MAX)) {
            return false;
        }

        writeI32(output, val);
        break;
    }
    case T_I64: {
        int64_t nval = PyLong_AsLongLong(value);

        if (INT_CONV_ERROR_OCCURRED(nval)) {
            return false;
        }

        if (!CHECK_RANGE(nval, INT64_MIN, INT64_MAX)) {
            PyErr_SetString(PyExc_OverflowError, "int out of range");
            return false;
        }

        writeI64(output, nval);
        break;
    }

    case T_DOUBLE: {
        double nval = PyFloat_AsDouble(value);
        if (nval == -1.0 && PyErr_Occurred()) {
            return false;
        }

        writeDouble(output, nval);
        break;
    }

    case T_STRING: {
        Py_ssize_t len = 0;
        if (is_utf8(typeargs) && PyUnicode_Check(value))
            value = PyUnicode_AsUTF8String(value);
        len = PyString_Size(value);

        if (!check_ssize_t_32(len)) {
            return false;
        }

        writeI32(output, (int32_t) len);
        PycStringIO->cwrite(output, PyString_AsString(value), (int32_t) len);
        break;
    }

    case T_LIST:
    case T_SET: {
        Py_ssize_t len;
        SetListTypeArgs parsedargs;
        PyObject *item;
        PyObject *iterator;

        if (!parse_set_list_args(&parsedargs, typeargs)) {
            return false;
        }

        len = PyObject_Length(value);

        if (!check_ssize_t_32(len)) {
            return false;
        }

        writeByte(output, parsedargs.element_type);
        writeI32(output, (int32_t) len);

        iterator =  PyObject_GetIter(value);
        if (iterator == NULL) {
            return false;
        }

        while ((item = PyIter_Next(iterator))) {
            if (!output_val(output, item, parsedargs.element_type, parsedargs.typeargs)) {
                Py_DECREF(item);
                Py_DECREF(iterator);
                return false;
            }
            Py_DECREF(item);
        }

        Py_DECREF(iterator);

        if (PyErr_Occurred()) {
            return false;
        }

        break;
    }

    case T_MAP: {
        PyObject *k, *v;
        Py_ssize_t pos = 0;
        Py_ssize_t len;

        MapTypeArgs parsedargs;

        len = PyDict_Size(value);
        if (!check_ssize_t_32(len)) {
            return false;
        }

        if (!parse_map_args(&parsedargs, typeargs)) {
            return false;
        }

        writeByte(output, parsedargs.ktag);
        writeByte(output, parsedargs.vtag);
        writeI32(output, len);

        // TODO(bmaurer): should support any mapping, not just dicts
        while (PyDict_Next(value, &pos, &k, &v)) {
            // TODO(dreiss): Think hard about whether these INCREFs actually
            //               turn any unsafe scenarios into safe scenarios.
            Py_INCREF(k);
            Py_INCREF(v);

            if (!output_val(output, k, parsedargs.ktag, parsedargs.ktypeargs)
                    || !output_val(output, v, parsedargs.vtag, parsedargs.vtypeargs)) {
                Py_DECREF(k);
                Py_DECREF(v);
                return false;
            }
            Py_DECREF(k);
            Py_DECREF(v);
        }
        break;
    }

    // TODO(dreiss): Consider breaking this out as a function
    //               the way we did for decode_struct.
    case T_STRUCT: {
        StructTypeArgs parsedargs;
        Py_ssize_t nspec;
        Py_ssize_t i;

        if (!parse_struct_args(&parsedargs, typeargs)) {
            return false;
        }

        nspec = PyTuple_Size(parsedargs.spec);

        if (nspec == -1) {
            return false;
        }

        for (i = 0; i < nspec; i++) {
            StructItemSpec parsedspec;
            PyObject* spec_tuple;
            PyObject* instval = NULL;

            spec_tuple = PyTuple_GET_ITEM(parsedargs.spec, i);
            if (spec_tuple == Py_None) {
                continue;
            }

            if (!parse_struct_item_spec (&parsedspec, spec_tuple)) {
                return false;
            }

            instval = PyObject_GetAttr(value, parsedspec.attrname);

            if (!instval) {
                return false;
            }

            if (instval == Py_None) {
                Py_DECREF(instval);
                continue;
            }

            writeByte(output, (int8_t) parsedspec.type);
            writeI16(output, parsedspec.tag);

            if (!output_val(output, instval, parsedspec.type, parsedspec.typeargs)) {
                Py_DECREF(instval);
                return false;
            }

            Py_DECREF(instval);
        }

        writeByte(output, (int8_t)T_STOP);
        break;
    }

    case T_STOP:
    case T_VOID:
    case T_UTF16:
    case T_UTF8:
    case T_U64:
    default:
        PyErr_SetString(PyExc_TypeError, "Unexpected TType");
        return false;

    }

    return true;
}
Exemple #20
0
	void throw_not_utf8(const std::string& text)
	{
		if (!is_utf8(text.c_str(), text.length()))
			throw std::invalid_argument("The text is not encoded in UTF8");
	}
Exemple #21
0
	void throw_not_utf8(const char* text)
	{
		if (!is_utf8(text, std::strlen(text)))
			throw std::invalid_argument("The text is not encoded in UTF8");
		
	}
Exemple #22
0
Fichier : utf8.c Projet : samv/git
/*
 * Wrap the text, if necessary. The variable indent is the indent for the
 * first line, indent2 is the indent for all other lines.
 * If indent is negative, assume that already -indent columns have been
 * consumed (and no extra indent is necessary for the first line).
 */
int strbuf_add_wrapped_text(struct strbuf *buf,
		const char *text, int indent, int indent2, int width)
{
	int w = indent, assume_utf8 = is_utf8(text);
	const char *bol = text, *space = NULL;

	if (width <= 0) {
		strbuf_add_indented_text(buf, text, indent, indent2);
		return 1;
	}

	if (indent < 0) {
		w = -indent;
		space = text;
	}

	for (;;) {
		char c;
		size_t skip;

		while ((skip = display_mode_esc_sequence_len(text)))
			text += skip;

		c = *text;
		if (!c || isspace(c)) {
			if (w < width || !space) {
				const char *start = bol;
				if (!c && text == start)
					return w;
				if (space)
					start = space;
				else
					print_spaces(buf, indent);
				strbuf_write(buf, start, text - start);
				if (!c)
					return w;
				space = text;
				if (c == '\t')
					w |= 0x07;
				else if (c == '\n') {
					space++;
					if (*space == '\n') {
						strbuf_write(buf, "\n", 1);
						goto new_line;
					}
					else if (!isalnum(*space))
						goto new_line;
					else
						strbuf_write(buf, " ", 1);
				}
				w++;
				text++;
			}
			else {
new_line:
				strbuf_write(buf, "\n", 1);
				text = bol = space + isspace(*space);
				space = NULL;
				w = indent = indent2;
			}
			continue;
		}
		if (assume_utf8)
			w += utf8_width(&text, NULL);
		else {
			w++;
			text++;
		}
	}
}
Exemple #23
0
std::string CPPCMS_API from_utf8(char const *encoding,std::string const &str)
{
	if(is_utf8(encoding))
		return str;
	return from_utf8(encoding,str.data(),str.data()+str.size());
}
Exemple #24
0
	void throw_not_utf8(const char* text, unsigned len)
	{
		if (!is_utf8(text, len))
			throw std::invalid_argument("The text is not encoded in UTF8");
	}
Exemple #25
0
int cmd_commit(int argc, const char **argv, const char *prefix)
{
	int header_len;
	struct strbuf sb;
	const char *index_file, *reflog_msg;
	char *nl, *p;
	unsigned char commit_sha1[20];
	struct ref_lock *ref_lock;

	git_config(git_commit_config);

	argc = parse_and_validate_options(argc, argv, builtin_commit_usage);

	index_file = prepare_index(argc, argv, prefix);

	/* Set up everything for writing the commit object.  This includes
	   running hooks, writing the trees, and interacting with the user.  */
	if (!prepare_to_commit(index_file, prefix)) {
		rollback_index_files();
		return 1;
	}

	/*
	 * The commit object
	 */
	strbuf_init(&sb, 0);
	strbuf_addf(&sb, "tree %s\n",
		    sha1_to_hex(active_cache_tree->sha1));

	/* Determine parents */
	if (initial_commit) {
		reflog_msg = "commit (initial)";
	} else if (amend) {
		struct commit_list *c;
		struct commit *commit;

		reflog_msg = "commit (amend)";
		commit = lookup_commit(head_sha1);
		if (!commit || parse_commit(commit))
			die("could not parse HEAD commit");

		for (c = commit->parents; c; c = c->next)
			add_parent(&sb, c->item->object.sha1);
	} else if (in_merge) {
		struct strbuf m;
		FILE *fp;

		reflog_msg = "commit (merge)";
		add_parent(&sb, head_sha1);
		strbuf_init(&m, 0);
		fp = fopen(git_path("MERGE_HEAD"), "r");
		if (fp == NULL)
			die("could not open %s for reading: %s",
			    git_path("MERGE_HEAD"), strerror(errno));
		while (strbuf_getline(&m, fp, '\n') != EOF) {
			unsigned char sha1[20];
			if (get_sha1_hex(m.buf, sha1) < 0)
				die("Corrupt MERGE_HEAD file (%s)", m.buf);
			add_parent(&sb, sha1);
		}
		fclose(fp);
		strbuf_release(&m);
	} else {
		reflog_msg = "commit";
		strbuf_addf(&sb, "parent %s\n", sha1_to_hex(head_sha1));
	}

	strbuf_addf(&sb, "author %s\n",
		    fmt_ident(author_name, author_email, author_date, IDENT_ERROR_ON_NO_NAME));
	strbuf_addf(&sb, "committer %s\n", git_committer_info(IDENT_ERROR_ON_NO_NAME));
	if (!is_encoding_utf8(git_commit_encoding))
		strbuf_addf(&sb, "encoding %s\n", git_commit_encoding);
	strbuf_addch(&sb, '\n');

	/* Finally, get the commit message */
	header_len = sb.len;
	if (strbuf_read_file(&sb, git_path(commit_editmsg), 0) < 0) {
		rollback_index_files();
		die("could not read commit message");
	}

	/* Truncate the message just before the diff, if any. */
	p = strstr(sb.buf, "\ndiff --git a/");
	if (p != NULL)
		strbuf_setlen(&sb, p - sb.buf + 1);

	if (cleanup_mode != CLEANUP_NONE)
		stripspace(&sb, cleanup_mode == CLEANUP_ALL);
	if (sb.len < header_len || message_is_empty(&sb, header_len)) {
		rollback_index_files();
		die("no commit message?  aborting commit.");
	}
	strbuf_addch(&sb, '\0');
	if (is_encoding_utf8(git_commit_encoding) && !is_utf8(sb.buf))
		fprintf(stderr, commit_utf8_warn);

	if (write_sha1_file(sb.buf, sb.len - 1, commit_type, commit_sha1)) {
		rollback_index_files();
		die("failed to write commit object");
	}

	ref_lock = lock_any_ref_for_update("HEAD",
					   initial_commit ? NULL : head_sha1,
					   0);

	nl = strchr(sb.buf + header_len, '\n');
	if (nl)
		strbuf_setlen(&sb, nl + 1 - sb.buf);
	else
		strbuf_addch(&sb, '\n');
	strbuf_remove(&sb, 0, header_len);
	strbuf_insert(&sb, 0, reflog_msg, strlen(reflog_msg));
	strbuf_insert(&sb, strlen(reflog_msg), ": ", 2);

	if (!ref_lock) {
		rollback_index_files();
		die("cannot lock HEAD ref");
	}
	if (write_ref_sha1(ref_lock, commit_sha1, sb.buf) < 0) {
		rollback_index_files();
		die("cannot update HEAD ref");
	}

	unlink(git_path("MERGE_HEAD"));
	unlink(git_path("MERGE_MSG"));
	unlink(git_path("SQUASH_MSG"));

	if (commit_index_files())
		die ("Repository has been updated, but unable to write\n"
		     "new_index file. Check that disk is not full or quota is\n"
		     "not exceeded, and then \"git reset HEAD\" to recover.");

	rerere();
	run_hook(get_index_file(), "post-commit", NULL);
	if (!quiet)
		print_summary(prefix, commit_sha1);

	return 0;
}