wcs getTextUCS2(const void* buffer, size_t size) { if(size >= 2) if(*(unsigned short*)buffer == UNICODE_BYTE_ORDER_MARK) return wcs(((const wchar_t*)buffer) + 1, (size_t)((size - 2) >> 1)); return MBSTOWCS(mbs((char*)buffer, (size_t)size)); }
bool DictConv::constructLexicon(const char *filename) { static char buf[4096]; static char word_buf[2048]; static TWCHAR wbuf[1024]; static std::string syls; FILE *fp = fopen(filename, "r"); if (!fp) return false; printf("Adding id and corresponding words...\n"); fflush(stdout); while (fgets(buf, sizeof(buf), fp) != NULL) { if (parseLine(buf, word_buf, syls)) { if (word_buf[0] != L'<' && word_buf[0] != 0) { MBSTOWCS(wbuf, word_buf, 1024); m_Lexicon[wstring(wbuf)] = syls; } continue; } } fclose(fp); printf("\n %zd primitive nodes\n", m_Lexicon.size()); fflush(stdout); return true; }
bool DictConv::convertDictUsingLexicon(const char *ofile, const char *ifile) { static char buf[4096]; static char word_buf[2048]; static TWCHAR wbuf[1024]; unsigned id; unsigned real_id = 99; FILE *ifp = fopen(ifile, "r"); if (!ifp) return false; FILE *ofp = fopen(ofile, "w"); if (!ofp) { fclose(ifp); return false; } printf("Iterate the lines...\n"); fflush(stdout); while (fgets(buf, sizeof(buf), ifp) != NULL) { if (parseLine(buf, word_buf, id)) { if (word_buf[0] != L'<' && word_buf[0] != 0) { fprintf(ofp, "%s %d ", word_buf, ++real_id); MBSTOWCS(wbuf, word_buf, 1024); int i = 0, sz = WCSLEN(wbuf); bool unknown = false; for (; i < sz; i++) { if (i != 0) { fprintf(ofp, "'"); } wstring key(&wbuf[i], size_t(1)); CLexicon::iterator it = m_Lexicon.find(key); if (it != m_Lexicon.end()) { fprintf(ofp, "%s", m_Lexicon[key].c_str()); } else { unknown = true; fprintf(ofp, "XXX"); } } if (unknown) --real_id; fprintf(ofp, "\n"); } else { fprintf(ofp, "%s\n", buf); } } } fclose(ifp); fclose(ofp); return true; }
void UnzipBase::getAllFilenames(vector<wcs>& array_filename) const { if(!_handle) return; int ret; for(ret=unzGoToFirstFile(_handle);ret==UNZ_OK;ret=unzGoToNextFile(_handle)) { char filename[4096]; if(unzGetCurrentFileInfo(_handle,0,filename,(unsigned long)sizeof(filename),0,0,0,0)!=UNZ_OK) continue; filename[4095]=0; array_filename.push_back(MBSTOWCS(filename)); } }