void wc(wc_info& wci) { char mbc[2]; bool lead_byte = false; char* p = (char*)wci.m_p; for (size_t i = 0; i < wci.m_size; i++) { if (!lead_byte) { if (_ismbblead(p[i])) { // 2バイトコードの可能性があるので、フラグを立てて保留 mbc[0] = p[i]; lead_byte = true; } else { // 1バイトコードなので、カウント+1 ++wci.m_count_byte; ++wci.m_count_char; } } else { if (_ismbbtrail(p[i])) { // 2バイトコード確定。フラグを落としてカウントアップ mbc[1] = p[i]; lead_byte = false; wci.m_count_byte += 2; ++wci.m_count_char; } else { // 異常な文字。カウントせずにフラグだけ戻す lead_byte = false; } } } }
/* * @implemented */ int _mbsbtype( const unsigned char *str, size_t n ) { int lead = 0; const unsigned char *end = str + n; /* Lead bytes can also be trail bytes so we need to analyse the string. * Also we must return _MBC_ILLEGAL for chars past the end of the string */ while (str < end) /* Note: we skip the last byte - will check after the loop */ { if (!*str) return _MBC_ILLEGAL; lead = !lead && _ismbblead(*str); str++; } if (lead) if (_ismbbtrail(*str)) return _MBC_TRAIL; else return _MBC_ILLEGAL; else if (_ismbblead(*str)) return _MBC_LEAD; else return _MBC_SINGLE; }
/* added by H.Banno for Windows & Mac */ static char * fgets_jconf(char *buf, int size, FILE *fp) { int c, prev_c; int pos; if (fp == NULL) return NULL; pos = 0; c = '\0'; prev_c = '\0'; while (1) { if (pos >= size) { pos--; break; } c = fgetc(fp); if (c == EOF) { buf[pos] = '\0'; if (pos <= 0) { return NULL; } else { return buf; } } else if (c == '\n' || c == '\r') { if (c == '\r' && (c = fgetc(fp)) != '\n') { /* for Mac */ ungetc(c, fp); } if (prev_c == '\\') { pos--; } else { break; } } else { buf[pos] = c; pos++; #if defined(_WIN32) && !defined(__CYGWIN32__) if (c == '\\' && (_ismbblead(prev_c) && _ismbbtrail(c))) { c = '\0'; } #endif } prev_c = c; } buf[pos] = '\0'; return buf; }
static void test_mbcp(void) { int mb_orig_max = *p__mb_cur_max; int curr_mbcp = _getmbcp(); unsigned char *mbstring = (unsigned char *)"\xb0\xb1\xb2 \xb3\xb4 \xb5"; /* incorrect string */ unsigned char *mbstring2 = (unsigned char *)"\xb0\xb1\xb2\xb3Q\xb4\xb5"; /* correct string */ unsigned char *mbsonlylead = (unsigned char *)"\xb0\0\xb1\xb2 \xb3"; unsigned char buf[16]; int step; /* _mbtype tests */ /* An SBCS codepage test. The ctype of characters on e.g. CP1252 or CP1250 differs slightly * between versions of Windows. Also Windows 9x seems to ignore the codepage and always uses * CP1252 (or the ACP?) so we test only a few ASCII characters */ _setmbcp(1252); expect_eq(p_mbctype[10], 0, char, "%x"); expect_eq(p_mbctype[50], 0, char, "%x"); expect_eq(p_mbctype[66], _SBUP, char, "%x"); expect_eq(p_mbctype[100], _SBLOW, char, "%x"); expect_eq(p_mbctype[128], 0, char, "%x"); _setmbcp(1250); expect_eq(p_mbctype[10], 0, char, "%x"); expect_eq(p_mbctype[50], 0, char, "%x"); expect_eq(p_mbctype[66], _SBUP, char, "%x"); expect_eq(p_mbctype[100], _SBLOW, char, "%x"); expect_eq(p_mbctype[128], 0, char, "%x"); /* double byte code pages */ test_codepage(932); test_codepage(936); test_codepage(949); test_codepage(950); _setmbcp(936); ok(*p__mb_cur_max == mb_orig_max, "__mb_cur_max shouldn't be updated (is %d != %d)\n", *p__mb_cur_max, mb_orig_max); ok(_ismbblead('\354'), "\354 should be a lead byte\n"); ok(_ismbblead(' ') == FALSE, "' ' should not be a lead byte\n"); ok(_ismbblead(0x1234b0), "0x1234b0 should not be a lead byte\n"); ok(_ismbblead(0x123420) == FALSE, "0x123420 should not be a lead byte\n"); ok(_ismbbtrail('\xb0'), "\xa0 should be a trail byte\n"); ok(_ismbbtrail(' ') == FALSE, "' ' should not be a trail byte\n"); /* _ismbslead */ expect_eq(_ismbslead(mbstring, &mbstring[0]), -1, int, "%d"); expect_eq(_ismbslead(mbstring, &mbstring[1]), FALSE, int, "%d"); expect_eq(_ismbslead(mbstring, &mbstring[2]), -1, int, "%d"); expect_eq(_ismbslead(mbstring, &mbstring[3]), FALSE, int, "%d"); expect_eq(_ismbslead(mbstring, &mbstring[4]), -1, int, "%d"); expect_eq(_ismbslead(mbstring, &mbstring[5]), FALSE, int, "%d"); expect_eq(_ismbslead(mbstring, &mbstring[6]), FALSE, int, "%d"); expect_eq(_ismbslead(mbstring, &mbstring[7]), -1, int, "%d"); expect_eq(_ismbslead(mbstring, &mbstring[8]), FALSE, int, "%d"); expect_eq(_ismbslead(mbsonlylead, &mbsonlylead[0]), -1, int, "%d"); expect_eq(_ismbslead(mbsonlylead, &mbsonlylead[1]), FALSE, int, "%d"); expect_eq(_ismbslead(mbsonlylead, &mbsonlylead[2]), FALSE, int, "%d"); expect_eq(_ismbslead(mbsonlylead, &mbsonlylead[5]), FALSE, int, "%d"); /* _ismbstrail */ expect_eq(_ismbstrail(mbstring, &mbstring[0]), FALSE, int, "%d"); expect_eq(_ismbstrail(mbstring, &mbstring[1]), -1, int, "%d"); expect_eq(_ismbstrail(mbstring, &mbstring[2]), FALSE, int, "%d"); expect_eq(_ismbstrail(mbstring, &mbstring[3]), -1, int, "%d"); expect_eq(_ismbstrail(mbstring, &mbstring[4]), FALSE, int, "%d"); expect_eq(_ismbstrail(mbstring, &mbstring[5]), -1, int, "%d"); expect_eq(_ismbstrail(mbstring, &mbstring[6]), FALSE, int, "%d"); expect_eq(_ismbstrail(mbstring, &mbstring[7]), FALSE, int, "%d"); expect_eq(_ismbstrail(mbstring, &mbstring[8]), -1, int, "%d"); expect_eq(_ismbstrail(mbsonlylead, &mbsonlylead[0]), FALSE, int, "%d"); expect_eq(_ismbstrail(mbsonlylead, &mbsonlylead[1]), -1, int, "%d"); expect_eq(_ismbstrail(mbsonlylead, &mbsonlylead[2]), FALSE, int, "%d"); expect_eq(_ismbstrail(mbsonlylead, &mbsonlylead[3]), FALSE, int, "%d"); expect_eq(_ismbstrail(mbsonlylead, &mbsonlylead[4]), FALSE, int, "%d"); expect_eq(_ismbstrail(mbsonlylead, &mbsonlylead[5]), FALSE, int, "%d"); /* _mbsbtype */ expect_eq(_mbsbtype(mbstring, 0), _MBC_LEAD, int, "%d"); expect_eq(_mbsbtype(mbstring, 1), _MBC_TRAIL, int, "%d"); expect_eq(_mbsbtype(mbstring, 2), _MBC_LEAD, int, "%d"); expect_eq(_mbsbtype(mbstring, 3), _MBC_ILLEGAL, int, "%d"); expect_eq(_mbsbtype(mbstring, 4), _MBC_LEAD, int, "%d"); expect_eq(_mbsbtype(mbstring, 5), _MBC_TRAIL, int, "%d"); expect_eq(_mbsbtype(mbstring, 6), _MBC_SINGLE, int, "%d"); expect_eq(_mbsbtype(mbstring, 7), _MBC_LEAD, int, "%d"); expect_eq(_mbsbtype(mbstring, 8), _MBC_ILLEGAL, int, "%d"); expect_eq(_mbsbtype(mbsonlylead, 0), _MBC_LEAD, int, "%d"); expect_eq(_mbsbtype(mbsonlylead, 1), _MBC_ILLEGAL, int, "%d"); expect_eq(_mbsbtype(mbsonlylead, 2), _MBC_ILLEGAL, int, "%d"); expect_eq(_mbsbtype(mbsonlylead, 3), _MBC_ILLEGAL, int, "%d"); expect_eq(_mbsbtype(mbsonlylead, 4), _MBC_ILLEGAL, int, "%d"); expect_eq(_mbsbtype(mbsonlylead, 5), _MBC_ILLEGAL, int, "%d"); /* _mbsnextc */ expect_eq(_mbsnextc(mbstring), 0xb0b1, int, "%x"); expect_eq(_mbsnextc(&mbstring[2]), 0xb220, int, "%x"); /* lead + invalid tail */ expect_eq(_mbsnextc(&mbstring[3]), 0x20, int, "%x"); /* single char */ /* _mbclen/_mbslen */ expect_eq(_mbclen(mbstring), 2, int, "%d"); expect_eq(_mbclen(&mbstring[2]), 2, int, "%d"); expect_eq(_mbclen(&mbstring[3]), 1, int, "%d"); expect_eq(_mbslen(mbstring2), 4, int, "%d"); expect_eq(_mbslen(mbsonlylead), 0, int, "%d"); /* lead + NUL not counted as character */ expect_eq(_mbslen(mbstring), 4, int, "%d"); /* lead + invalid trail counted */ /* _mbccpy/_mbsncpy */ memset(buf, 0xff, sizeof(buf)); _mbccpy(buf, mbstring); expect_bin(buf, "\xb0\xb1\xff", 3); memset(buf, 0xff, sizeof(buf)); _mbsncpy(buf, mbstring, 1); expect_bin(buf, "\xb0\xb1\xff", 3); memset(buf, 0xff, sizeof(buf)); _mbsncpy(buf, mbstring, 2); expect_bin(buf, "\xb0\xb1\xb2 \xff", 5); memset(buf, 0xff, sizeof(buf)); _mbsncpy(buf, mbstring, 3); expect_bin(buf, "\xb0\xb1\xb2 \xb3\xb4\xff", 7); memset(buf, 0xff, sizeof(buf)); _mbsncpy(buf, mbstring, 4); expect_bin(buf, "\xb0\xb1\xb2 \xb3\xb4 \xff", 8); memset(buf, 0xff, sizeof(buf)); _mbsncpy(buf, mbstring, 5); expect_bin(buf, "\xb0\xb1\xb2 \xb3\xb4 \0\0\xff", 10); memset(buf, 0xff, sizeof(buf)); _mbsncpy(buf, mbsonlylead, 6); expect_bin(buf, "\0\0\0\0\0\0\0\xff", 8); memset(buf, 0xff, sizeof(buf)); _mbsnbcpy(buf, mbstring2, 2); expect_bin(buf, "\xb0\xb1\xff", 3); _mbsnbcpy(buf, mbstring2, 3); expect_bin(buf, "\xb0\xb1\0\xff", 4); _mbsnbcpy(buf, mbstring2, 4); expect_bin(buf, "\xb0\xb1\xb2\xb3\xff", 5); memset(buf, 0xff, sizeof(buf)); _mbsnbcpy(buf, mbsonlylead, 5); expect_bin(buf, "\0\0\0\0\0\xff", 6); /* _mbsinc/mbsdec */ step = _mbsinc(mbstring) - mbstring; ok(step == 2, "_mbsinc adds %d (exp. 2)\n", step); step = _mbsinc(&mbstring[2]) - &mbstring[2]; /* lead + invalid tail */ ok(step == 2, "_mbsinc adds %d (exp. 2)\n", step); step = _mbsninc(mbsonlylead, 1) - mbsonlylead; ok(step == 0, "_mbsninc adds %d (exp. 0)\n", step); step = _mbsninc(mbsonlylead, 2) - mbsonlylead; /* lead + NUL byte + lead + char */ ok(step == 0, "_mbsninc adds %d (exp. 0)\n", step); step = _mbsninc(mbstring2, 0) - mbstring2; ok(step == 0, "_mbsninc adds %d (exp. 2)\n", step); step = _mbsninc(mbstring2, 1) - mbstring2; ok(step == 2, "_mbsninc adds %d (exp. 2)\n", step); step = _mbsninc(mbstring2, 2) - mbstring2; ok(step == 4, "_mbsninc adds %d (exp. 4)\n", step); step = _mbsninc(mbstring2, 3) - mbstring2; ok(step == 5, "_mbsninc adds %d (exp. 5)\n", step); step = _mbsninc(mbstring2, 4) - mbstring2; ok(step == 7, "_mbsninc adds %d (exp. 7)\n", step); step = _mbsninc(mbstring2, 5) - mbstring2; ok(step == 7, "_mbsninc adds %d (exp. 7)\n", step); step = _mbsninc(mbstring2, 17) - mbstring2; ok(step == 7, "_mbsninc adds %d (exp. 7)\n", step); /* functions that depend on locale codepage, not mbcp. * we hope the current locale to be SBCS because setlocale(LC_ALL, ".1252") seems not to work yet * (as of Wine 0.9.43) */ if (*p__mb_cur_max == 1) { expect_eq(mblen((char *)mbstring, 3), 1, int, "%x"); expect_eq(_mbstrlen((char *)mbstring2), 7, int, "%d"); }