string_t *convert_string(const string_t *str, enum str_e type, int codepage) { const union cptable *cptable = codepage ? wine_cp_get_table( codepage ) : NULL; string_t *ret = xmalloc(sizeof(*ret)); int res; ret->loc = str->loc; if (!codepage && str->type != type) parser_error( "Current language is Unicode only, cannot convert string" ); if((str->type == str_char) && (type == str_unicode)) { ret->type = str_unicode; ret->size = cptable ? wine_cp_mbstowcs( cptable, 0, str->str.cstr, str->size, NULL, 0 ) : wine_utf8_mbstowcs( 0, str->str.cstr, str->size, NULL, 0 ); ret->str.wstr = xmalloc( (ret->size+1) * sizeof(WCHAR) ); if (cptable) res = wine_cp_mbstowcs( cptable, MB_ERR_INVALID_CHARS, str->str.cstr, str->size, ret->str.wstr, ret->size ); else res = wine_utf8_mbstowcs( MB_ERR_INVALID_CHARS, str->str.cstr, str->size, ret->str.wstr, ret->size ); if (res == -2) parser_error( "Invalid character in string '%.*s' for codepage %u", str->size, str->str.cstr, codepage ); ret->str.wstr[ret->size] = 0; } else if((str->type == str_unicode) && (type == str_char)) { ret->type = str_char; ret->size = cptable ? wine_cp_wcstombs( cptable, 0, str->str.wstr, str->size, NULL, 0, NULL, NULL ) : wine_utf8_wcstombs( 0, str->str.wstr, str->size, NULL, 0 ); ret->str.cstr = xmalloc( ret->size + 1 ); if (cptable) wine_cp_wcstombs( cptable, 0, str->str.wstr, str->size, ret->str.cstr, ret->size, NULL, NULL ); else wine_utf8_wcstombs( 0, str->str.wstr, str->size, ret->str.cstr, ret->size ); ret->str.cstr[ret->size] = 0; } else if(str->type == str_unicode) { ret->type = str_unicode; ret->size = str->size; ret->str.wstr = xmalloc(sizeof(WCHAR)*(ret->size+1)); memcpy( ret->str.wstr, str->str.wstr, ret->size * sizeof(WCHAR) ); ret->str.wstr[ret->size] = 0; } else /* str->type == str_char */ { ret->type = str_char; ret->size = str->size; ret->str.cstr = xmalloc( ret->size + 1 ); memcpy( ret->str.cstr, str->str.cstr, ret->size ); ret->str.cstr[ret->size] = 0; } return ret; }
/* check if the string is valid utf8 despite a different codepage being in use */ int check_valid_utf8( const string_t *str, int codepage ) { unsigned int i; if (!check_utf8) return 0; if (!codepage) return 0; if (!wine_cp_get_table( codepage )) return 0; for (i = 0; i < str->size; i++) { if ((unsigned char)str->str.cstr[i] >= 0xf5) goto done; if ((unsigned char)str->str.cstr[i] >= 0xc2) break; if ((unsigned char)str->str.cstr[i] >= 0x80) goto done; } if (i == str->size) return 0; /* no 8-bit chars at all */ if (wine_utf8_mbstowcs( MB_ERR_INVALID_CHARS, str->str.cstr, str->size, NULL, 0 ) >= 0) return 1; done: check_utf8 = 0; /* at least one 8-bit non-utf8 string found, stop checking */ return 0; }
/* * Fill the input buffer with *one* line of input. * The line is '\n' terminated so that scanning * messages with translation works as expected * (otherwise we cannot pre-translate because the * language is first known one line before the * actual message). */ static int fill_inputbuffer(void) { int n; static const char err_fatalread[] = "Fatal: reading input failed"; static int endian = -1; if(!inputbuffer) { inputbuffer = xmalloc(INPUTBUFFER_SIZE*sizeof(WCHAR)); xlatebuffer = xmalloc(INPUTBUFFER_SIZE); } try_again: if(!unicodein) { char *cptr; cptr = fgets(xlatebuffer, INPUTBUFFER_SIZE, yyin); if(!cptr && ferror(yyin)) xyyerror(err_fatalread); else if(!cptr) return 0; if (codepage_def) n = wine_cp_mbstowcs(codepage_def, 0, xlatebuffer, strlen(xlatebuffer)+1, inputbuffer, INPUTBUFFER_SIZE); else n = wine_utf8_mbstowcs(0, xlatebuffer, strlen(xlatebuffer)+1, inputbuffer, INPUTBUFFER_SIZE); if(n < 0) internal_error(__FILE__, __LINE__, "Could not translate to unicode (%d)\n", n); if(n <= 1) goto try_again; /* Should not happen */ n--; /* Strip added conversion '\0' from input length */ /* * FIXME: * Detect UTF-8 in the first time we read some bytes by * checking the special sequence "FE..." or something like * that. I need to check www.unicode.org for details. */ } else { if(endian == -1) { n = fread(inputbuffer, 1, 8, yyin); if(n != 8) { if(!n && ferror(yyin)) xyyerror(err_fatalread); else xyyerror("Fatal: file to short to determine byteorder (should never happen)\n"); } if(isisochar(inputbuffer[0]) && isisochar(inputbuffer[1]) && isisochar(inputbuffer[2]) && isisochar(inputbuffer[3])) { #ifdef WORDS_BIGENDIAN endian = WMC_BO_BIG; #else endian = WMC_BO_LITTLE; #endif } else if(isisochar(BYTESWAP_WORD(inputbuffer[0])) && isisochar(BYTESWAP_WORD(inputbuffer[1])) && isisochar(BYTESWAP_WORD(inputbuffer[2])) && isisochar(BYTESWAP_WORD(inputbuffer[3]))) { #ifdef WORDS_BIGENDIAN endian = WMC_BO_LITTLE; #else endian = WMC_BO_BIG; #endif } else xyyerror("Fatal: cannot determine file's byteorder\n"); /* FIXME: * Determine the file-endian with the leader-bytes * "FF FE..."; can't remember the exact sequence. */ n /= 2; #ifdef WORDS_BIGENDIAN if(endian == WMC_BO_LITTLE) #else if(endian == WMC_BO_BIG) #endif { inputbuffer[0] = BYTESWAP_WORD(inputbuffer[0]); inputbuffer[1] = BYTESWAP_WORD(inputbuffer[1]); inputbuffer[2] = BYTESWAP_WORD(inputbuffer[2]); inputbuffer[3] = BYTESWAP_WORD(inputbuffer[3]); } } else { int i; n = 0; for(i = 0; i < INPUTBUFFER_SIZE; i++) { int t; t = fread(&inputbuffer[i], 2, 1, yyin); if(!t && ferror(yyin)) xyyerror(err_fatalread); else if(!t && n) break; n++; #ifdef WORDS_BIGENDIAN if(endian == WMC_BO_LITTLE) #else if(endian == WMC_BO_BIG) #endif { if((inputbuffer[i] = BYTESWAP_WORD(inputbuffer[i])) == '\n') break; } else { if(inputbuffer[i] == '\n') break; } } } } if(!n) { mcy_warning("Re-read line (input was or converted to zilch)\n"); goto try_again; /* Should not happen, but could be due to stdin reading and a signal */ } ninputbuffer += n; return 1; }