Пример #1
0
/** 
 * @internal walk the given UTF8 string, looking for non-ASCII characters.
 * @return  0 if none were found, or, if non-ASCII strings were found,
 * answer the length of the buffer if it were converted to platform
 * encoding
 *
 * @note this relies on the assumption that wide chars are Unicode.
 * If not, the platform will need different support for this
 */
static IDATA
walkUTF8String (const U_8 * buf, IDATA nbytes)
{
  const U_8 *end = buf + nbytes;
  const U_8 *cursor = buf;
  IDATA newLength = 0;
  int hasHighChars = 0;
  /* reset the shift state */
  wctomb (NULL, 0);
  while (cursor < end)
    {
      if ((*cursor & 0x80) == 0x80)
        {
          char temp[MB_CUR_MAX];
          int wcresult;
          U_16 unicode;
          U_32 numberU8Consumed =
            decodeUTF8CharN (cursor, &unicode, end - cursor);
          if (numberU8Consumed == 0)
            {
              /* an illegal encoding was encountered! Don't try to decode the string */
              return 0;
            }
          cursor += numberU8Consumed;
          /* calculate the encoded length of this character */
          wcresult = wctomb (temp, (wchar_t) unicode);
          if (wcresult == -1)
            {
              /* an un-encodable char was encountered */
              newLength += 1;
            }
          else
            {
              newLength += wcresult;
            }
          hasHighChars = 1;
        }
      else
        {
          newLength += 1;
          cursor += 1;
        }
    }
  return hasHighChars ? newLength : 0;
}
Пример #2
0
/**
* Output the buffer onto the another buffer as text. The in buffer is a UTF8-encoded array of chars.
* It is converted to the appropriate platform encoding.
*
* @param[in] portLibrary The port library
* @param[in] buf buffer of text to be converted.
* @param[in] nbytes size of buffer of text to be converted.
*
* @return buffer of converted to the appropriate platform encoding text.
*/
char *VMCALL
hybuf_write_text (struct HyPortLibrary * portLibrary,
                  const char *buf, IDATA nbytes)
{
    IDATA i;
    int newlines = 0, highchars = 0;
    char *newBuf = NULL;
    IDATA newLen;
    char *outBuf = (char*)buf;

    /* scan the buffer for any characters which need to be converted */
    for (i = 0; i < nbytes; i++)
    {
        if (outBuf[i] == '\n')
        {
            newlines += 1;
        }
        else if ((U_8) outBuf[i] & 0x80)
        {
            highchars += 1;
        }
    }
    newlines = 0;
    /* if there are any non-ASCII chars, convert to Unicode and then to the local code page */
    if (highchars)
    {
        U_16 *wBuf;
        newLen = (nbytes + newlines) * 2;
        wBuf = portLibrary->mem_allocate_memory (portLibrary, newLen);
        if (wBuf)
        {
            U_8 *in = (U_8 *) outBuf;
            U_8 *end = in + nbytes;
            U_16 *out = wBuf;

            while (in < end)
            {
                if (*in == '\n')
                {
                    *out++ = (U_16) '\r';
                    *out++ = (U_16) '\n';
                    in += 1;
                }
                else
                {
                    U_32 numberU8Consumed =
                        decodeUTF8CharN (in, out++, end - in);
                    if (numberU8Consumed == 0)
                    {
                        break;
                    }
                    in += numberU8Consumed;
                }
            }
            /* in will be NULL if an error occurred */
            if (in)
            {
                UINT codePage = GetConsoleOutputCP ();
                IDATA wLen = out - wBuf;
                IDATA mbLen =
                    WideCharToMultiByte (codePage, 0, wBuf, wLen, NULL, 0, NULL,
                    NULL);
                if (mbLen > 0)
                {
                    newBuf = portLibrary->mem_allocate_memory (portLibrary, mbLen + 1);
                    /* if we couldn't allocate the buffer, just output the data the way it was */
                    if (newBuf)
                    {
                        WideCharToMultiByte (codePage, 0, wBuf, wLen, newBuf,
                            mbLen, NULL, NULL);
                        outBuf = newBuf;
                        nbytes = mbLen;
                        newBuf[nbytes] = '\0';
                        newBuf = NULL;
                    }
                }
            }
                portLibrary->mem_free_memory (portLibrary, wBuf);
        }
    }
    else if (newlines)
    {
        /* change any LFs to CRLFs */
        newLen = nbytes + newlines;
        newBuf = portLibrary->mem_allocate_memory (portLibrary, newLen + 1);
        /* if we couldn't allocate the buffer, just output the data the way it was */
        if (newBuf)
        {
            char *cursor = newBuf;
            for (i = 0; i < nbytes; i++)
            {
                if (outBuf[i] == '\n')
                    *cursor++ = '\r';
                *cursor++ = outBuf[i];
            }
            if (outBuf != buf)
            {
                portLibrary->mem_free_memory (portLibrary, outBuf);
            }
            outBuf = newBuf;
            nbytes = newLen;
            outBuf[nbytes] = '\0';

        }
    }
    if (outBuf == buf) {
        outBuf = portLibrary->mem_allocate_memory (portLibrary, nbytes + 1);
        memcpy((void*)outBuf, (const void*)buf, nbytes);
        outBuf[nbytes] = '\0';
    }
    return outBuf;
}
Пример #3
0
/**
 * Output the buffer onto the stream as text. The buffer is a UTF8-encoded array of chars.
 * It is converted to the appropriate platform encoding.
 *
 * @param[in] portLibrary The port library
 * @param[in] fd the file descriptor.
 * @param[in] buf buffer of text to be output.
 * @param[in] nbytes size of buffer of text to be output.
 *
 * @return 0 on success, negative error code on failure.
 */
IDATA VMCALL
hyfile_write_text (struct HyPortLibrary * portLibrary, IDATA fd,
		   const char *buf, IDATA nbytes)
{
  IDATA result;
  IDATA i;
  int newlines = 0, highchars = 0;
  char stackBuf[512];
  char *newBuf = stackBuf;
  IDATA newLen;

  /* scan the buffer for any characters which need to be converted */
  for (i = 0; i < nbytes; i++)
    {
      if (buf[i] == '\n')
	{
	  newlines += 1;
	}
      else if ((U_8) buf[i] & 0x80)
	{
	  highchars += 1;
	}
    }

  /* if there are any non-ASCII chars, convert to Unicode and then to the local code page */
  if (highchars)
    {
      U_16 wStackBuf[512];
      U_16 *wBuf = wStackBuf;
      newLen = (nbytes + newlines) * 2;
      if (newLen > sizeof (wStackBuf))
	{
	  wBuf = portLibrary->mem_allocate_memory (portLibrary, newLen);
	}
      if (wBuf)
	{
	  U_8 *in = (U_8 *) buf;
	  U_8 *end = in + nbytes;
	  U_16 *out = wBuf;

	  while (in < end)
	    {
	      if (*in == '\n')
		{
		  *out++ = (U_16) '\r';
		  *out++ = (U_16) '\n';
		  in += 1;
		}
	      else
		{
		  U_32 numberU8Consumed =
		    decodeUTF8CharN (in, out++, end - in);
		  if (numberU8Consumed == 0)
		    {
		      break;
		    }
		  in += numberU8Consumed;
		}
	    }
	  /* in will be NULL if an error occurred */
	  if (in)
	    {
	      UINT codePage = GetConsoleOutputCP ();
	      IDATA wLen = out - wBuf;
	      IDATA mbLen =
		WideCharToMultiByte (codePage, 0, wBuf, wLen, NULL, 0, NULL,
				     NULL);
	      if (mbLen > 0)
		{
		  if (mbLen > sizeof (stackBuf))
		    {
		      newBuf =
			portLibrary->mem_allocate_memory (portLibrary, mbLen);
		      /* if we couldn't allocate the buffer, just output the data the way it was */
		    }
		  if (newBuf)
		    {
		      WideCharToMultiByte (codePage, 0, wBuf, wLen, newBuf,
					   mbLen, NULL, NULL);
		      buf = newBuf;
		      nbytes = mbLen;
		    }
		}
	    }
	  if (wBuf != wStackBuf)
	    {
	      portLibrary->mem_free_memory (portLibrary, wBuf);
	    }
	}
    }
  else if (newlines)
    {
      /* change any LFs to CRLFs */
      newLen = nbytes + newlines;
      if (newLen > sizeof (stackBuf))
	{
	  newBuf = portLibrary->mem_allocate_memory (portLibrary, newLen);
	  /* if we couldn't allocate the buffer, just output the data the way it was */
	}
      if (newBuf)
	{
	  char *cursor = newBuf;
	  for (i = 0; i < nbytes; i++)
	    {
	      if (buf[i] == '\n')
		*cursor++ = '\r';
	      *cursor++ = buf[i];
	    }
	  buf = newBuf;
	  nbytes = newLen;
	}
    }

  result = portLibrary->file_write (portLibrary, fd, (void *) buf, nbytes);

  if (newBuf != stackBuf && newBuf != NULL)
    {
      portLibrary->mem_free_memory (portLibrary, newBuf);
    }

  return (result == nbytes) ? 0 : result;
}
Пример #4
0
static intptr_t
file_write_using_iconv(struct OMRPortLibrary *portLibrary, intptr_t fd, const char *buf, intptr_t nbytes)
{
	intptr_t result = 0;
	char stackBuf[512];
	char *bufStart = NULL;
	uintptr_t outBufLen = sizeof(stackBuf);

	iconv_t converter = J9VM_INVALID_ICONV_DESCRIPTOR;
	size_t inbytesleft = 0;
	size_t outbytesleft = 0;
	char *inbuf = NULL;
	char *outbuf = NULL;
	intptr_t bytesToWrite = 0;

#ifdef J9ZOS390
	/* LIR 1280 (z/OS only) - every failed call to iconv_open() is recorded on the operator console, so don't retry */
	if (FALSE == PPG_file_text_iconv_open_failed) {
		/* iconv_get is not an a2e function, so we need to pass it honest-to-goodness EBCDIC strings */
#pragma convlit(suspend)
#endif

#ifndef OMRZTPF
		converter = iconv_get(portLibrary, J9FILETEXT_ICONV_DESCRIPTOR, nl_langinfo(CODESET), "UTF-8");
#else
		converter = iconv_get(portLibrary, J9FILETEXT_ICONV_DESCRIPTOR, "IBM1047", "ISO8859-1" );
#endif

#ifdef J9ZOS390
#pragma convlit(resume)
		if (J9VM_INVALID_ICONV_DESCRIPTOR == converter) {
			PPG_file_text_iconv_open_failed = TRUE;
		}
	}
#endif

	if (J9VM_INVALID_ICONV_DESCRIPTOR == converter) {
		/* no converter available for this code set. Just dump the UTF-8 chars */
		result = portLibrary->file_write(portLibrary, fd, (void *)buf, nbytes);
		return (result == nbytes) ? 0 : result;
	}

	inbuf = (char *)buf; /* for some reason this argument isn't const */
	outbuf = bufStart = stackBuf;
	inbytesleft = nbytes;
	outbytesleft = sizeof(stackBuf);

	while ((size_t)-1 == iconv(converter, &inbuf, &inbytesleft, &outbuf, &outbytesleft)) {
		int tmp_errno = errno;

		if (inbytesleft == 0) {
			break;
		}

		if ((outbytesleft == 0) || (tmp_errno == E2BIG)) {
			/* input conversion stopped due to lack of space in the output buffer */

			if (growBuffer(portLibrary, stackBuf, &bufStart, &outbuf, &outbytesleft, &outBufLen) < 0) {
				/* failed to grow buffer, just output what we've got so far */
				break;
			}

		} else if (tmp_errno == EILSEQ) {
			/* input conversion stopped due to an input byte that does not belong to the input code set */

			const char *unicodeFormat = "\\u%04x";
#define J9FILETEXT_ESCAPE_STR_SIZE 6 /* max size of unicode format */
			char escapedStr[J9FILETEXT_ESCAPE_STR_SIZE];
			char *escapedStrStart = escapedStr;

			uint16_t unicodeC = 0;
			size_t escapedLength = 0;
			size_t utf8Length = decodeUTF8CharN((const uint8_t *)inbuf, &unicodeC, inbytesleft);

			if (utf8Length == 0) {
				/* invalid encoding, including 4-byte UTF-8 */
				utf8Length = 1;
				escapedLength = 1;
				escapedStr[0] = '?';
			} else {
				escapedLength = portLibrary->str_printf(portLibrary, escapedStr, J9FILETEXT_ESCAPE_STR_SIZE, unicodeFormat, (uintptr_t)unicodeC);
			}

			inbytesleft -= utf8Length;
			inbuf += utf8Length;

			if ((size_t)-1 == iconv(converter, &escapedStrStart, &escapedLength, &outbuf, &outbytesleft)) {
				/* not handling EILSEQ here because:
				 *  1. we can't do much if iconv() fails to convert ascii.
				 *  2. inbuf and inbytesleft have been explicitly updated so the while loop will get terminated after converting the rest of the characters.
				 */

				tmp_errno = errno;

				/* if the remaining outbuf is too small, then grow it before storing Unicode string representation */
				if (tmp_errno == E2BIG) {
					if (growBuffer(portLibrary, stackBuf, &bufStart, &outbuf, &outbytesleft, &outBufLen) < 0) {
						/* failed to grow buffer, just output what we've got so far */
						break;
					}
				}
			}
		} else {
			/* input conversion stopped due to an incomplete character or shift sequence at the end of the input buffer */
			break;
		}
	}

	iconv_free(portLibrary, J9FILETEXT_ICONV_DESCRIPTOR, converter);

	/* CMVC 152575 - the converted string is not necessarily the same length in bytes as the original string */
	bytesToWrite = outbuf - bufStart;
	result = portLibrary->file_write(portLibrary, fd, (void *)bufStart, bytesToWrite);

	if (bufStart != stackBuf) {
		portLibrary->mem_free_memory(portLibrary, bufStart);
	}

	return (result == bytesToWrite) ? 0 : result;
}
Пример #5
0
/**
 * Decode the UTF8 character.
 *
 * Decode the input UTF8 character and stores it into result.
 *
 * @param[in] input The UTF8 character
 * @param[in,out] result buffer for unicode characters
 *
 * @return The number of UTF8 characters consumed (1,2,3) on success, 0 on failure
 */
U_32
decodeUTF8Char (const U_8 * input, U_16 * result)
{
  /* a UTF8 character can't require more than 3 bytes */
  return decodeUTF8CharN (input, result, 3);
}