Пример #1
0
idn_result_t
idn_delimitermap_add(idn_delimitermap_t ctx, unsigned long delimiter) {
	idn_result_t r;

	assert(ctx != NULL && ctx->ndelimiters <= ctx->delimiter_size);
	TRACE(("idn_delimitermap_add(delimiter=\\x%04lx)\n", delimiter));

	if (delimiter == 0 || delimiter > UNICODE_MAX ||
	    IS_SURROGATE_HIGH(delimiter) || IS_SURROGATE_LOW(delimiter)) {
		r = idn_invalid_codepoint;
		goto ret;
	}
	    
	if (ctx->ndelimiters == ctx->delimiter_size) {
		unsigned long *new_delimiters;

		new_delimiters = (unsigned long *) realloc(ctx->delimiters,
			sizeof(unsigned long) * ctx->delimiter_size * 2);
		if (new_delimiters == NULL) {
			r = idn_nomemory;
			goto ret;
		}
		ctx->delimiters = new_delimiters;
		ctx->delimiter_size *= 2;
	}

	ctx->delimiters[ctx->ndelimiters] = delimiter;
	ctx->ndelimiters++;
	r = idn_success;

ret:
	TRACE(("idn_delimitermap_add(): %s\n", idn_result_tostring(r)));
	return (r);
}
Пример #2
0
idn_result_t
idn_ucs4_ucs4toutf16(const unsigned long *ucs4, unsigned short *utf16,
		     size_t tolen) {
	unsigned short *utf16p = utf16;
	unsigned long v;
	idn_result_t r;

	TRACE(("idn_ucs4_ucs4toutf16(ucs4=\"%s\", tolen=%d)\n",
	       idn__debug_ucs4xstring(ucs4, 50), (int)tolen));

	while (*ucs4 != '\0') {
		v = *ucs4++;

		if (IS_SURROGATE_LOW(v) || IS_SURROGATE_HIGH(v)) {
			WARNING(("idn_ucs4_ucs4toutf16: UCS4 string contains "
				 "surrogate pair\n"));
			r = idn_invalid_encoding;
			goto ret;
		} else if (v > 0xffff) {
			/* Convert to surrogate pair */
			if (v >= 0x110000) {
				r = idn_invalid_encoding;
				goto ret;
			}
			if (tolen < 2) {
				r = idn_buffer_overflow;
				goto ret;
			}
			*utf16p++ = SURROGATE_HIGH(v);
			*utf16p++ = SURROGATE_LOW(v);
			tolen -= 2;
		} else {
			if (tolen < 1) {
				r = idn_buffer_overflow;
				goto ret;
			}
			*utf16p++ = v;
			tolen--;
		}
	}

	if (tolen < 1) {
		r = idn_buffer_overflow;
		goto ret;
	}
	*utf16p = '\0';

	r = idn_success;
ret:
	if (r == idn_success) {
		TRACE(("idn_ucs4_ucs4toutf16(): success (utf16=\"%s\")\n",
		       idn__debug_utf16xstring(utf16, 50)));
	} else {
		TRACE(("idn_ucs4_ucs4toutf16(): %s\n",
		       idn_result_tostring(r)));
	}
	return (r);
}
Пример #3
0
idn_result_t
idn_ucs4_utf16toucs4(const unsigned short *utf16, unsigned long *ucs4,
		     size_t tolen) {
	unsigned long *ucs4p = ucs4;
	unsigned short v0, v1;
	idn_result_t r;

	TRACE(("idn_ucs4_utf16toucs4(utf16=\"%s\", tolen=%d)\n",
	       idn__debug_utf16xstring(utf16, 50), (int)tolen));

	while (*utf16 != '\0') {
		v0 = *utf16;

		if (tolen < 1) {
			r = idn_buffer_overflow;
			goto ret;
		}

		if (IS_SURROGATE_HIGH(v0)) {
			v1 = *(utf16 + 1);
			if (!IS_SURROGATE_LOW(v1)) {
				WARNING(("idn_ucs4_utf16toucs4: "
					 "corrupted surrogate pair\n"));
				r = idn_invalid_encoding;
				goto ret;
			}
			*ucs4p++ = COMBINE_SURROGATE(v0, v1);
			tolen--;
			utf16 += 2;

		} else {
			*ucs4p++ = v0;
			tolen--;
			utf16++;
			
		}
	}

	if (tolen < 1) {
		r = idn_buffer_overflow;
		goto ret;
	}
	*ucs4p = '\0';

	r = idn_success;
ret:
	if (r == idn_success) {
		TRACE(("idn_ucs4_utf16toucs4(): success (ucs4=\"%s\")\n",
		       idn__debug_ucs4xstring(ucs4, 50)));
	} else {
		TRACE(("idn_ucs4_utf16toucs4(): %s\n",
		       idn_result_tostring(r)));
	}
	return (r);
}
Пример #4
0
idn_result_t
idn_ucs4_ucs4toutf8(const unsigned long *ucs4, char *utf8, size_t tolen) {
	unsigned char *utf8p = (unsigned char *)utf8;
	unsigned long v;
	int width;
	int mask;
	int offset;
	idn_result_t r;

	TRACE(("idn_ucs4_ucs4toutf8(ucs4=\"%s\", tolen=%d)\n",
	       idn__debug_ucs4xstring(ucs4, 50), (int)tolen));

	while (*ucs4 != '\0') {
		v = *ucs4++;
		if (IS_SURROGATE_LOW(v) || IS_SURROGATE_HIGH(v)) {
			WARNING(("idn_ucs4_ucs4toutf8: UCS4 string contains "
				 "surrogate pair\n"));
			r = idn_invalid_encoding;
			goto ret;
		}
		if (v < 0x80) {
			mask = 0;
			width = 1;
		} else if (v < 0x800) {
			mask = 0xc0;
			width = 2;
		} else if (v < 0x10000) {
			mask = 0xe0;
			width = 3;
		} else if (v < 0x200000) {
			mask = 0xf0;
			width = 4;
		} else if (v < 0x4000000) {
			mask = 0xf8;
			width = 5;
		} else if (v < 0x80000000) {
			mask = 0xfc;
			width = 6;
		} else {
			WARNING(("idn_ucs4_ucs4toutf8: invalid character\n"));
			r = idn_invalid_encoding;
			goto ret;
		}

		if (tolen < width) {
			r = idn_buffer_overflow;
			goto ret;
		}
		offset = 6 * (width - 1);
		*utf8p++ = (v >> offset) | mask;
		mask = 0x80;
		while (offset > 0) {
			offset -= 6;
			*utf8p++ = ((v >> offset) & 0x3f) | mask;
		}
		tolen -= width;
	}

	if (tolen < 1) {
		r = idn_buffer_overflow;
		goto ret;
	}
	*utf8p = '\0';

	r = idn_success;
ret:
	if (r == idn_success) {
		TRACE(("idn_ucs4_ucs4toutf8(): success (utf8=\"%s\")\n",
		       idn__debug_xstring(utf8, 50)));
	} else {
		TRACE(("idn_ucs4_ucs4toutf8(): %s\n",
		       idn_result_tostring(r)));
	}
	return (r);
}
Пример #5
0
idn_result_t
idn_ucs4_utf8toucs4(const char *utf8, unsigned long *ucs4, size_t tolen) {
	const unsigned char *utf8p = (const unsigned char *)utf8;
	unsigned long *ucs4p = ucs4;
	unsigned long v, min;
	unsigned char c;
	int width;
	int i;
	idn_result_t r;

	TRACE(("idn_ucs4_utf8toucs4(utf8=\"%s\", tolen=%d)\n",
	       idn__debug_xstring(utf8, 50), (int)tolen));

	while(*utf8p != '\0') {
		c = *utf8p++;
		if (c < 0x80) {
			v = c;
			min = 0;
			width = 1;
		} else if (c < 0xc0) {
			WARNING(("idn_ucs4_utf8toucs4: invalid character\n"));
			r = idn_invalid_encoding;
			goto ret;
		} else if (c < 0xe0) {
			v = c & 0x1f;
			min = 0x80;
			width = 2;
		} else if (c < 0xf0) {
			v = c & 0x0f;
			min = 0x800;
			width = 3;
		} else if (c < 0xf8) {
			v = c & 0x07;
			min = 0x10000;
			width = 4;
		} else if (c < 0xfc) {
			v = c & 0x03;
			min = 0x200000;
			width = 5;
		} else if (c < 0xfe) {
			v = c & 0x01;
			min = 0x4000000;
			width = 6;
		} else {
			WARNING(("idn_ucs4_utf8toucs4: invalid character\n"));
			r = idn_invalid_encoding;
			goto ret;
		}

		for (i = width - 1; i > 0; i--) {
			c = *utf8p++;
			if (c < 0x80 || 0xc0 <= c) {
				WARNING(("idn_ucs4_utf8toucs4: "
					 "invalid character\n"));
				r = idn_invalid_encoding;
				goto ret;
			}
			v = (v << 6) | (c & 0x3f);
		}

	        if (v < min) {
			WARNING(("idn_ucs4_utf8toucs4: invalid character\n"));
			r = idn_invalid_encoding;
			goto ret;
		}
		if (IS_SURROGATE_LOW(v) || IS_SURROGATE_HIGH(v)) {
			WARNING(("idn_ucs4_utf8toucs4: UTF-8 string contains "
				 "surrogate pair\n"));
			r = idn_invalid_encoding;
			goto ret;
		}
		if (tolen < 1) {
			r = idn_buffer_overflow;
			goto ret;
		}
		tolen--;
		*ucs4p++ = v;
	}

	if (tolen < 1) {
		r = idn_buffer_overflow;
		goto ret;
	}
	*ucs4p = '\0';

	r = idn_success;
ret:
	if (r == idn_success) {
		TRACE(("idn_ucs4_utf8toucs4(): success (ucs4=\"%s\")\n",
		       idn__debug_ucs4xstring(ucs4, 50)));
	} else {
		TRACE(("idn_ucs4_utf8toucs4(): %s\n",
		       idn_result_tostring(r)));
	}
	return (r);
}