示例#1
0
/* This routine is designed to replace the following routine:
	mz_euc2euc
	mz_b52b5
	mz_cns2cns
	mz_ksc2ksc
	mz_sjis2sjis
	mz_utf82utf8

   It should also replace
		mz_gb2gb
   but currently mz_gb2gb also handle hz to gb. We need to move that functionality out of mz_gb2gb
 */
PRIVATE unsigned char *
mz_mbNullConv(CCCDataObject obj, const unsigned char *buf, int32 bufsz)
{
	int32			left_over;
	int32			len;
	unsigned char	*p;
	unsigned char	*ret;
	int32			total;
	intl_CharLenFunc	CharLenFunc = intl_char_len_func[INTL_GetCCCCvtflag(obj)];
	int charlen = 0;
	
	/*	Get the unconverted buffer */
	unsigned char *uncvtbuf = INTL_GetCCCUncvtbuf(obj);
	int32			uncvtsz = strlen((char *)uncvtbuf);

	/*  return in the input is nonsense */
	if ((!obj) || (! buf) || (bufsz < 0))
		return NULL;

	/*	Allocate Output Buffer */
	total = uncvtsz + bufsz;
	ret = (unsigned char *) XP_ALLOC(total + 1);
	if (!ret)
	{
		INTL_SetCCCRetval(obj, MK_OUT_OF_MEMORY);
		return NULL;
	}

	/*	Copy unconverted buffer into the output bufer */
	memcpy(ret, uncvtbuf, uncvtsz);
	/* Copy the current input buffer into the output buffer */
	memcpy(ret+uncvtsz, buf, bufsz);

	/*	Walk through the buffer and figure out the left_over length */
	for (p=ret, len=total, left_over=0; len > 0; p += charlen, len -= charlen)
	{
		if((charlen = CharLenFunc(*p)) > 1)
		{	/* count left_over only if it is multibyte char */
			if(charlen > len)	/* count left_over only if the len is less than charlen */
				left_over = len;
		};
	}

	/*	Copy the left over into the uncvtbuf */
	if(left_over)
		memcpy(uncvtbuf, p - charlen, left_over);
	/*  Null terminated the uncvtbuf */
	uncvtbuf[left_over] = '\0';

	/* Null terminate the return buffer and set the length */
	INTL_SetCCCLen(obj, total - left_over);
	ret[total - left_over] = 0;

	return ret;
}
示例#2
0
PRIVATE unsigned char *
mz_hz2gb(CCCDataObject obj, const unsigned char *gbbuf, int32 gbbufsz)
{
	unsigned char *start, *p, *q;
	unsigned char *output;
	int i, j, len;
	unsigned char *uncvtbuf = INTL_GetCCCUncvtbuf(obj);

	q = output = XP_ALLOC(strlen((char*)uncvtbuf) + gbbufsz + 1);
	if (q == NULL)
		return NULL;

	start = NULL;

	for (j = 0; j < 2; j++)
	{
		len = 0;
		if (j == 0)
			len = strlen((char *)uncvtbuf);
		if (len)
			p = (unsigned char *) uncvtbuf;
		else
		{
			p = (unsigned char *) gbbuf ;
			len = gbbufsz;
			j = 100;  /* quit this loop next time */
		}
		for (i = 0; i < len;)
		{
			if (start)
			{
				if (*p == '~' && *(p+1) == '}')   /* switch back to ASCII mode */
				{
					for (; start < p; start++)
						*q++ = *start | 0x80;
					p += 2;
					i += 2;
					start = NULL;
				}
				else if (*p == 0x0D && *(p+1) == 0x0A)  /* Unix or Mac return */
				{
					for (; start < p; start++)
						*q++ = *start | 0x80;
					i += 2;
					*q++ = *p++;   /* 0x0D  */
					*q++ = *p++;   /* 0x0A  */
					start = NULL;   /* reset start if we see normal line return */
				}
				else if (*p == 0x0A)  /* Unix or Mac return */
				{
					for (; start < p; start++)
						*q++ = *start | 0x80;
					i ++;
					*q++ = *p++;   /* LF  */
					start = NULL;   /* reset start if we see normal line return */
				}
				else if (*p == 0x0D)  /* Unix or Mac return */
				{
					for (; start < p; start++)
						*q++ = *start | 0x80;
					i ++;
					*q++ = *p++;   /* LF  */
					start = NULL;   /* reset start if we see normal line return */
				}
				else
				{
					i ++ ;
					p ++ ;
				}
			}
			else
			{
				if (*p == '~' && *(p+1) == '{')    /* switch to GB mode */
				{
					start = p + 2;
					p += 2;
					i += 2;
				}
				else if (*p == '~' && *(p+1) == 0x0D && *(p+2) == 0x0A)  /* line-continuation marker */
				{
					i += 3;
					p += 3;
				}
				else if (*p == '~' && *(p+1) == 0x0A)  /* line-continuation marker */
				{
					i += 2;
					p += 2;
				}
				else if (*p == '~' && *(p+1) == 0x0D)  /* line-continuation marker */
				{
					i += 2;
					p += 2;
				}
				else if (*p == '~' && *(p+1) == '~')   /* ~~ means ~ */
				{
					*q++ = '~';
					p += 2;
					i += 2;
				}
				else
				{
					i ++;
					*q++ = *p++;
				}
			}
		}
	}
	*q = '\0';
	INTL_SetCCCLen(obj, q - output);
	if (start)
	{

		/* Consider UNCVTBUF_SIZE is only 8 byte long, it's not enough 
		   for HZ anyway. Let's convert leftover to GB first and deal with
		   unfinished buffer in the coming block.
		*/
		INTL_SetCCCLen(obj, INTL_GetCCCLen(obj) + p - start);
		for (; start < p; start++)
			*q++ = *start | 0x80;
		*q = '\0';

		q = uncvtbuf;
		XP_STRCPY((char *)q, "~{");
	}

    return output;
}
示例#3
0
MODULE_PRIVATE unsigned char *
mz_sjis2euc(	CCCDataObject		obj,
			const unsigned char	*sjisbuf,	/* SJIS buf for conversion	*/
			int32				sjisbufsz)	/* SJIS buf size in bytes	*/
{
 	unsigned char			*tobuf = NULL;
 	int32					tobufsz;
 	register unsigned char	*sjisp, *tobufp;	/* current byte in bufs	*/
 	register unsigned char	*sjisep, *tobufep;	/* end of buffers		*/
 	int32					uncvtlen;
	unsigned char *uncvtbuf = INTL_GetCCCUncvtbuf(obj);
 	
 										/* Allocate a EUC buffer:		*/
		/* In the worst case ( all Half-width Kanas ), the converted	*/
		/* EUC will be 2X the size of the SJIS + 1 for nul byte			*/
	uncvtlen = strlen((char *)uncvtbuf);
	tobufsz = ((sjisbufsz  + uncvtlen) << 1) + 1;
	if ((tobuf = (unsigned char *)XP_ALLOC(tobufsz)) == (unsigned char *)NULL) {
		INTL_SetCCCRetval(obj, MK_OUT_OF_MEMORY);
		return(NULL);
	}
										/* Initialize pointers, etc.	*/
 	sjisp = (unsigned char *)sjisbuf;
 	sjisep = sjisp + sjisbufsz - 1;

#define uncvtp	tobufp	/* use tobufp as temp */ 	
							/* If prev. unconverted chars, append unconverted
							 * chars w/new chars and try to process.
							 */
 	if (uncvtbuf[0] != '\0') {
 		uncvtp = uncvtbuf + uncvtlen;
 		while (uncvtp < (uncvtbuf + UNCVTBUF_SIZE) &&
													sjisp <= sjisep)
 			*uncvtp++ = *sjisp++;
 		*uncvtp = '\0';						/* nul terminate	*/
 		sjisp = uncvtbuf;				/* process unconverted first */
 		sjisep = uncvtp - 1;
 	}
#undef uncvtp
 	
 	tobufp = tobuf;
 	tobufep = tobufp + tobufsz - 1;		/* save space for terminating null */

WHILELOOP: 	
									/* While SJIS data && space in EUC buf. */
 	while ((sjisp <= sjisep) && (tobufp <= tobufep)) {
		if (*sjisp < 0x80) {
 										/* ASCII/JIS-Roman 				*/
 			*tobufp++ = *sjisp++;

 		} else if (*sjisp < 0xA0) {
 										/* 1st byte of 2-byte low SJIS. */
 			if (sjisp+1 > sjisep)		/* No 2nd byte in SJIS buffer?	*/
 				break;

 			TwoByteSJIS2EUC(sjisp, tobufp, 0x70);

 		} else if (*sjisp==0xA0) {
										/* SJIS half-width space.	*/
										/* Just treat like Roman??	*/
 			*tobufp++ = *sjisp++;

 		} else if (*sjisp < 0xE0) {
										/* SJIS half-width katakana		*/
			*tobufp++ = SS2;
			*tobufp++ = *sjisp | 0x80;	/* Set 8th bit for EUC & SJIS */
			sjisp++;

 		} else if (*sjisp < 0xF0) {
										/* 1st byte of 2-byte high SJIS */
 			if (sjisp+1 > sjisep)		/* No 2nd byte in SJIS buffer? */
 				break;

 			TwoByteSJIS2EUC(sjisp, tobufp, 0xB0);
 		} else {
										/* User Defined SJIS: copy bytes */
 			if (sjisp+1 > sjisep)		/* No 2nd byte in SJIS buf?	*/
 				break;

 			*tobufp++ = *sjisp++;			/* Just copy 2 bytes.	*/
 			*tobufp++ = *sjisp++;
 		}
 	}
 	
 	if (uncvtbuf[0] != '\0') {
 										/* jisp pts to 1st unprocessed char in
 										 * jisbuf.  Some may have been processed
 										 * while processing unconverted chars,
 										 * so set up ptrs not to process them
 										 * twice.
 										 */
 		sjisp = (unsigned char *)sjisbuf + (sjisp - uncvtbuf - uncvtlen);
 		sjisep = (unsigned char *)sjisbuf + sjisbufsz - 1;
 		uncvtbuf[0] = '\0';		/* No more uncoverted chars.	*/
 		goto WHILELOOP;					/* Process new data				*/
 	}

	*tobufp = '\0';						/* null terminate EUC data */
	INTL_SetCCCLen(obj,  tobufp - tobuf);			/* length not counting null	*/

 	if (sjisp <= sjisep) {				/* uncoverted SJIS?		*/
		tobufp = uncvtbuf;			/* reuse the tobufp as a TEMP */
 		while (sjisp <= sjisep)
 			*tobufp++ = *sjisp++;
 		*tobufp = '\0';					/* null terminate		*/
 	}
	return(tobuf);
}