std::wstring GetSKKServerInfo(CHAR req) { std::wstring ret; std::string sbuf; std::string buf; CHAR rbuf[RECVBUFSIZE]; int n; if(!serv) { return ret; } if(send(sock, &req, 1, 0) == SOCKET_ERROR) { DisconnectSKKServer(); ConnectSKKServer(); } else { while(true) { ZeroMemory(rbuf, sizeof(rbuf)); n = recv(sock, rbuf, sizeof(rbuf) - 1, 0); if(n == SOCKET_ERROR || n <= 0) { DisconnectSKKServer(); ConnectSKKServer(); break; } sbuf += rbuf; if(rbuf[n - 1] == '\x20'/*SP*/) { break; } } switch(encoding) { case 0: ret = eucjis2004_string_to_wstring(sbuf); break; case 1: ret = utf8_string_to_wstring(sbuf); break; default: break; } } return ret; }
std::wstring SearchSKKServer(const std::wstring &searchkey) { std::wstring candidate; std::string key; std::string buf; CHAR rbuf[RECVBUFSIZE]; int n; if(!serv) { return candidate; } key.push_back(SKK_REQ); switch(encoding) { case 0: buf = wstring_to_eucjis2004_string(searchkey); if(!buf.empty()) { key += buf; } else { return candidate; } break; case 1: buf = wstring_to_utf8_string(searchkey); if(!buf.empty()) { key += buf; } else { return candidate; } break; default: return candidate; break; } key.push_back('\x20'/*SP*/); if(sock == INVALID_SOCKET) { ConnectSKKServer(); } GetSKKServerInfo(SKK_VER); if(send(sock, key.c_str(), (int)key.size(), 0) == SOCKET_ERROR) { DisconnectSKKServer(); goto end; } buf.clear(); while(true) { ZeroMemory(rbuf, sizeof(rbuf)); n = recv(sock, rbuf, sizeof(rbuf) - 1, 0); if(n == SOCKET_ERROR || n <= 0) { DisconnectSKKServer(); goto end; } buf += rbuf; if(n <= _countof(rbuf) && rbuf[n - 1] == '\n'/*LF*/) { break; } } end: if(buf.size() > 1 && buf.front() == SKK_HIT) { std::string s; std::smatch m; std::regex r; s = buf.substr(1); r.assign("/[^/]+"); while(std::regex_search(s, m, r)) { switch(encoding) { case 0: candidate += eucjis2004_string_to_wstring(m.str()); break; case 1: candidate += utf8_string_to_wstring(m.str()); break; default: break; } s = m.suffix().str(); } } return candidate; }
int ReadSKKDicLine(FILE *fp, WCHAR bom, int &okuri, std::wstring &key, SKKDICCANDIDATES &c, SKKDICOKURIBLOCKS &o) { CHAR buf[READBUFSIZE * sizeof(WCHAR)]; std::string sbuf; WCHAR wbuf[READBUFSIZE]; std::wstring wsbuf; size_t is; void *rp; std::wstring s, fmt; std::wregex re; c.clear(); o.clear(); switch(bom) { case BOM: while((rp = fgetws(wbuf, _countof(wbuf), fp)) != NULL) { wsbuf += wbuf; if(!wsbuf.empty() && wsbuf.back() == L'\n') { break; } } break; default: while((rp = fgets(buf, _countof(buf), fp)) != NULL) { sbuf += buf; if(!sbuf.empty() && sbuf.back() == '\n') { break; } } break; } if(rp == NULL) { return -1; } switch(bom) { case BOM: break; default: wsbuf = eucjis2004_string_to_wstring(sbuf); if(wsbuf.empty()) { return 1; } break; } if(wsbuf.empty()) { return 1; } if(wsbuf.compare(EntriesAri) == 0) { okuri = 1; return 1; } else if(wsbuf.compare(EntriesNasi) == 0) { okuri = 0; return 1; } else { if(L'\0' <= wsbuf.front() && wsbuf.front() <= L'\x20') { return 1; } } if(okuri == -1) { return 1; } s = wsbuf; re.assign(L"[\\x00-\\x19]"); fmt.assign(L""); s = std::regex_replace(s, re, fmt); if(okuri == 1) { //送りありエントリのブロック ParseSKKDicOkuriBlock(s, o); //送りありエントリのブロックを除去 re.assign(L"\\[[^\\[\\]]+?/[^\\[\\]]+?/\\]/"); fmt.assign(L""); s = std::regex_replace(s, re, fmt); } is = s.find_first_of(L'\x20'); if(is == std::wstring::npos) { return 1; } key = s.substr(0, is); is = s.find_first_of(L'/', is); if(is == std::wstring::npos) { return 1; } s = s.substr(is); ParseSKKDicCandiate(s, c); return 0; }
int wmain(int argc, wchar_t* argv[]) { FILE *fpi, *fpo; CHAR buf[BUFSIZE * sizeof(WCHAR)]; LPSTR pb; WCHAR wbuf[BUFSIZE]; LPWSTR pwb; std::string sbuf; std::wstring wsbuf; BOOL ret; UINT line; LPCWSTR rflag = RB, wflag = WccsUTF16LE; int ai, inenc = in_euc, outenc = out_utf16; LPCWSTR infile, outfile; setlocale(LC_ALL, ""); if(argc < 3) { print_usage(); return -1; } for(ai = 1; ai < 3; ai++) { if(wcscmp(argv[ai], L"-e") == 0) { } else if(wcscmp(argv[ai], L"-u") == 0) { inenc = in_utf8; rflag = RB; } else if(wcscmp(argv[ai], L"-w") == 0) { inenc = in_utf16; rflag = RccsUTF16LE; } else if(wcscmp(argv[ai], L"-E") == 0) { outenc = out_euc; wflag = WB; } else if(wcscmp(argv[ai], L"-U") == 0) { outenc = out_utf8; wflag = WB; } else if(wcscmp(argv[ai], L"-W") == 0) { } else { if(argv[ai][0] == L'-') { print_usage(); return -1; } break; } } if(argc < ai + 2) { print_usage(); return -1; } infile = argv[ai]; outfile = argv[ai + 1]; _wfopen_s(&fpi, infile, rflag); if(fpi == nullptr) { fwprintf(stderr, L"error : cannot open %s\n", infile); return -1; } _wfopen_s(&fpo, outfile, wflag); if(fpo == nullptr) { fwprintf(stderr, L"error : cannot open %s\n", outfile); fclose(fpi); return -1; } if(inenc == in_utf16) { for(line = 1; ; line++) { sbuf.clear(); wsbuf.clear(); while((pwb = fgetws(wbuf, _countof(wbuf), fpi)) != nullptr) { wsbuf.append(wbuf); if(!wsbuf.empty() && wsbuf.back() == L'\n') { break; } } if(pwb == nullptr) { break; } ret = TRUE; switch(outenc) { case out_euc: sbuf = wstring_to_eucjis2004_string(wsbuf); if(sbuf.size() > 0) { fwrite(sbuf.c_str(), sbuf.size(), 1, fpo); } else { ret = FALSE; } break; case out_utf16: fwprintf(fpo, L"%s", wsbuf.c_str()); break; case out_utf8: sbuf = wstring_to_utf8_string(wsbuf); if(sbuf.size() > 0) { fwrite(sbuf.c_str(), sbuf.size(), 1, fpo); } else { ret = FALSE; } break; default: break; } if(!ret) { fwprintf(stderr, L"error : cannot convert line %u\n", line); break; } } fclose(fpi); fclose(fpo); } else { for(line = 1; ; line++) { sbuf.clear(); wsbuf.clear(); while((pb = fgets(buf, _countof(buf), fpi)) != nullptr) { sbuf.append(buf); if(!sbuf.empty() && sbuf.back() == '\n') { break; } } if(pb == nullptr) { break; } if(inenc == in_utf8 && sbuf.size() >= 3 && sbuf.substr(0, 3) == "\xEF\xBB\xBF") { sbuf.erase(0, 3); } if(sbuf.size() >= 2 && sbuf.substr(sbuf.size() - 2) == "\r\n") { sbuf.erase(sbuf.size() - 2); sbuf.push_back('\n'); } ret = TRUE; switch(inenc) { case in_euc: switch(outenc) { case out_euc: fwrite(sbuf.c_str(), sbuf.size(), 1, fpo); break; case in_utf8: wsbuf = eucjis2004_string_to_wstring(sbuf); if(wsbuf.size() > 0) { sbuf = wstring_to_utf8_string(wsbuf); if(sbuf.size() > 0) { fwrite(sbuf.c_str(), sbuf.size(), 1, fpo); } else { ret = FALSE; } } else { ret = FALSE; } break; case out_utf16: wsbuf = eucjis2004_string_to_wstring(sbuf); if(wsbuf.size() > 0) { fwprintf(fpo, L"%s", wsbuf.c_str()); } else { ret = FALSE; } break; default: break; } break; case in_utf8: switch(outenc) { case out_euc: wsbuf = utf8_string_to_wstring(sbuf); if(wsbuf.size() > 0) { sbuf = wstring_to_eucjis2004_string(wsbuf); if(sbuf.size() > 0) { fwrite(sbuf.c_str(), sbuf.size(), 1, fpo); } else { ret = FALSE; } } else { ret = FALSE; } break; case in_utf8: fwrite(sbuf.c_str(), sbuf.size(), 1, fpo); break; case out_utf16: wsbuf = utf8_string_to_wstring(sbuf); if(wsbuf.size() > 0) { fwprintf(fpo, L"%s", wsbuf.c_str()); } else { ret = FALSE; } break; default: break; } break; default: break; } if(!ret) { fwprintf(stderr, L"error : cannot convert line %u\n", line); break; } } fclose(fpi); fclose(fpo); } return 0; }