/* * cifs_utf16_bytes - how long will a string be after conversion? * @utf16 - pointer to input string * @maxbytes - don't go past this many bytes of input string * @codepage - destination codepage * * Walk a utf16le string and return the number of bytes that the string will * be after being converted to the given charset, not including any null * termination required. Don't walk past maxbytes in the source buffer. */ int cifs_utf16_bytes(const __le16 *from, int maxbytes, const struct nls_table *codepage) { int i; int charlen, outlen = 0; int maxwords = maxbytes / 2; char tmp[NLS_MAX_CHARSET_SIZE]; __u16 ftmp[3]; for (i = 0; i < maxwords; i++) { ftmp[0] = get_unaligned_le16(&from[i]); if (ftmp[0] == 0) break; if (i + 1 < maxwords) ftmp[1] = get_unaligned_le16(&from[i + 1]); else ftmp[1] = 0; if (i + 2 < maxwords) ftmp[2] = get_unaligned_le16(&from[i + 2]); else ftmp[2] = 0; charlen = cifs_mapchar(tmp, ftmp, codepage, NO_MAP_UNI_RSVD); outlen += charlen; } return outlen; }
/* * cifs_from_utf16 - convert utf16le string to local charset * @to - destination buffer * @from - source buffer * @tolen - destination buffer size (in bytes) * @fromlen - source buffer size (in bytes) * @codepage - codepage to which characters should be converted * @mapchar - should characters be remapped according to the mapchars option? * * Convert a little-endian utf16le string (as sent by the server) to a string * in the provided codepage. The tolen and fromlen parameters are to ensure * that the code doesn't walk off of the end of the buffer (which is always * a danger if the alignment of the source buffer is off). The destination * string is always properly null terminated and fits in the destination * buffer. Returns the length of the destination string in bytes (including * null terminator). * * Note that some windows versions actually send multiword UTF-16 characters * instead of straight UTF16-2. The linux nls routines however aren't able to * deal with those characters properly. In the event that we get some of * those characters, they won't be translated properly. */ int cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen, const struct nls_table *codepage, bool mapchar) { int i, charlen, safelen; int outlen = 0; int nullsize = nls_nullsize(codepage); int fromwords = fromlen / 2; char tmp[NLS_MAX_CHARSET_SIZE]; __u16 ftmp; /* * because the chars can be of varying widths, we need to take care * not to overflow the destination buffer when we get close to the * end of it. Until we get to this offset, we don't need to check * for overflow however. */ safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize); for (i = 0; i < fromwords; i++) { ftmp = get_unaligned_le16(&from[i]); if (ftmp == 0) break; /* * check to see if converting this character might make the * conversion bleed into the null terminator */ if (outlen >= safelen) { charlen = cifs_mapchar(tmp, ftmp, codepage, mapchar); if ((outlen + charlen) > (tolen - nullsize)) break; } /* put converted char into 'to' buffer */ charlen = cifs_mapchar(&to[outlen], ftmp, codepage, mapchar); outlen += charlen; } /* properly null-terminate string */ for (i = 0; i < nullsize; i++) to[outlen++] = 0; return outlen; }
/* * cifs_from_utf16 - convert utf16le string to local charset * @to - destination buffer * @from - source buffer * @tolen - destination buffer size (in bytes) * @fromlen - source buffer size (in bytes) * @codepage - codepage to which characters should be converted * @mapchar - should characters be remapped according to the mapchars option? * * Convert a little-endian utf16le string (as sent by the server) to a string * in the provided codepage. The tolen and fromlen parameters are to ensure * that the code doesn't walk off of the end of the buffer (which is always * a danger if the alignment of the source buffer is off). The destination * string is always properly null terminated and fits in the destination * buffer. Returns the length of the destination string in bytes (including * null terminator). * * Note that some windows versions actually send multiword UTF-16 characters * instead of straight UTF16-2. The linux nls routines however aren't able to * deal with those characters properly. In the event that we get some of * those characters, they won't be translated properly. */ int cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen, const struct nls_table *codepage, int map_type) { int i, charlen, safelen; int outlen = 0; int nullsize = nls_nullsize(codepage); int fromwords = fromlen / 2; char tmp[NLS_MAX_CHARSET_SIZE]; __u16 ftmp[3]; /* ftmp[3] = 3array x 2bytes = 6bytes UTF-16 */ /* * because the chars can be of varying widths, we need to take care * not to overflow the destination buffer when we get close to the * end of it. Until we get to this offset, we don't need to check * for overflow however. */ safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize); for (i = 0; i < fromwords; i++) { ftmp[0] = get_unaligned_le16(&from[i]); if (ftmp[0] == 0) break; if (i + 1 < fromwords) ftmp[1] = get_unaligned_le16(&from[i + 1]); else ftmp[1] = 0; if (i + 2 < fromwords) ftmp[2] = get_unaligned_le16(&from[i + 2]); else ftmp[2] = 0; /* * check to see if converting this character might make the * conversion bleed into the null terminator */ if (outlen >= safelen) { charlen = cifs_mapchar(tmp, ftmp, codepage, map_type); if ((outlen + charlen) > (tolen - nullsize)) break; } /* put converted char into 'to' buffer */ charlen = cifs_mapchar(&to[outlen], ftmp, codepage, map_type); outlen += charlen; /* charlen (=bytes of UTF-8 for 1 character) * 4bytes UTF-8(surrogate pair) is charlen=4 * (4bytes UTF-16 code) * 7-8bytes UTF-8(IVS) is charlen=3+4 or 4+4 * (2 UTF-8 pairs divided to 2 UTF-16 pairs) */ if (charlen == 4) i++; else if (charlen >= 5) /* 5-6bytes UTF-8 */ i += 2; } /* properly null-terminate string */ for (i = 0; i < nullsize; i++) to[outlen++] = 0; return outlen; }