// -------------------------------------------------------------------- // main // -------------------------------------------------------------------- int main(int argc, char*argv[]) { parseArgs(argc, argv); struct sigaction a; a.sa_handler = traitementInterrupt; /* fonction à lancer */ sigemptyset(&a.sa_mask); /* rien à masquer */ sigaction(SIGTSTP, &a, NULL); /* pause contrôle-Z */ sigaction(SIGINT, &a, NULL); /* fin contrôle-C */ sigaction(SIGTERM, &a, NULL); /* arrêt */ sigaction(SIGSEGV, &a, NULL); /* segmentation fault ! */ int sock=0; initSocket(sock); sock_=sock; ZFile file; initFile(&file); file_=&file; while(1) { bool close=false; processPacket(sock, &file, false, close); if (close) { file.close(); initFile(&file); file_=&file; } } return(EXIT_SUCCESS); }
virtual void loadRawDataContainer (const CEGUI::String &filename, CEGUI::RawDataContainer &output, const CEGUI::String &resourceGroup) { ZFile file; if (file.Open(filename.c_str())) { int fn = file.GetSize(); char *ptr = new char [fn+1]; file.Read(ptr, fn); ptr[fn] = 0; output.setData((CEGUI::uint8*)ptr); output.setSize(fn); } }
void ArpaNgramLM::LoadLM(ZFile &lmFile) { if (ReadUInt64(lmFile) == MITLMv1) { Deserialize(lmFile); } else { lmFile.ReOpen(); _pModel->LoadLM(_probVectors, _bowVectors, lmFile); } }
void NgramModel::_LoadTopicProbs2(vector<DoubleVector> &topicProbVectors, ZFile &hmmldaFile, size_t maxSize) const { assert(maxSize <= size()); vector<CountVector> countVectors(maxSize); topicProbVectors.resize(maxSize); for (size_t o = 0; o < maxSize; ++o) { countVectors[o].resize(sizes(o), 0); topicProbVectors[o].resize(sizes(o), 0); } // Accumulate counts for each n-gram in words. size_t numSentenceWords = 1; IndexVector hists(maxSize, 0); char line[MAXLINE]; char wordStr[1024]; VocabIndex word; int state, topic; while (hmmldaFile.getLine( line, MAXLINE)) { if (line[0] == '#') continue; // Skip comment lines. int numItems = sscanf(line, "%s\t%d\t%d\n", wordStr, &state, &topic); if (numItems != 3) throw std::invalid_argument("Bad format"); word = _vocab.Find(wordStr); numSentenceWords++; NgramIndex hist = 0, index; for (size_t j = 1; j <= std::min(numSentenceWords, maxSize - 1); j++) { index = _vectors[j].Find(hist, word); if (index == -1) { printf("Feature skipped\n"); } else { countVectors[j-1][hist]++; if (state == 1) topicProbVectors[j-1][hist]++; } hist = hists[j]; hists[j] = index; } if (word == Vocab::EndOfSentence) numSentenceWords = 1; } // Finalize probability computation. for (size_t o = 0; o < maxSize; o++) { for (size_t i = 0; i < countVectors[o].length(); i++) { if (countVectors[o][i] == 0) topicProbVectors[o][i] = 0.0; else topicProbVectors[o][i] /= countVectors[o][i]; } } }
//--------------------------------------------------------------------------------- // resolve includes // to avoid accessing the run time compiler calling filesystem functions (not supported on debug) // we must manually replace the includes directives by the file referenced. // this is weird but I can't see other alternative except to load precompile the shaders offline. //--------------------------------------------------------------------------------- char* resolveIncludes(char * src) { const char* pInclude; int srcSize = (int)strlen(src); int offset = 0; while ((pInclude = strstr(src+offset, "#include"))) { char* fstart; char* fend; if ((fstart = strchr((char*)pInclude, '\"')) && (fend = strchr(fstart+1, '\"')) ) { chdir("ZenithDatas"); int includePos = int(pInclude - src); tstring fname(fstart+1, (fend-fstart-1)); ZFile file; if (!file.Open(fname.c_str())) { LOG("can't open include file '%s'\n", fname.c_str()); break; } int includeSize = file.GetSize(); int newSize = srcSize + includeSize - (fend - pInclude); char* newSrc = (char*) new char [newSize]; memcpy(newSrc, src, includePos); file.Read(newSrc + includePos, includeSize); memcpy(newSrc + includePos + includeSize, fend+1, newSize - (includePos + includeSize)); newSrc[newSize-1] = 0; delete [] src; src = newSrc; srcSize = newSize; offset = includePos+1; chdir(".."); } } return src; }
static bool WriteHeader(ZFile& File) { REPLAY_HEADER_RG Header; Header.Header = RG_REPLAY_MAGIC_NUMBER; Header.ReplayBinaryVersion = RG_REPLAY_BINARY_VERSION; Header.Timestamp = static_cast<i64>(time(nullptr)); Header.ClientVersionMajor = RGUNZ_VERSION_MAJOR; Header.ClientVersionMinor = RGUNZ_VERSION_MINOR; Header.ClientVersionPatch = RGUNZ_VERSION_PATCH; Header.ClientVersionRevision = RGUNZ_VERSION_REVISION; return File.Write(Header); }
void RequestHandler::staticFile(Request &req, Reply &rep){ std::string request_path = req.rawpath.str(); /*if (!urlDecode(req.uri, request_path)){ rep = Reply::stock_reply(Reply::bad_request); return; }*/ // Request path must be absolute and not contain "..". if (request_path.empty() || request_path[0] != '/' || request_path.find("..") != std::string::npos){ rep = Reply::stock_reply(Reply::bad_request); return; } // If path ends in slash (i.e. is a directory) then add "index.html". if (request_path[request_path.size() - 1] == '/'){ request_path += "index.html"; } // Determine the file extension. std::size_t last_slash_pos = request_path.find_last_of("/"); std::size_t last_dot_pos = request_path.find_last_of("."); std::string extension; if (last_dot_pos != std::string::npos && last_dot_pos > last_slash_pos){ extension = request_path.substr(last_dot_pos + 1); } ZFile staticfl; if(!staticfl.open(doc_root_ + request_path)){ rep = Reply::stock_reply(Reply::not_found); return; } rep.status = Reply::ok; rep.content = staticfl.read(); rep.headers["Content-Length"] = rep.content.length(); rep.headers["Content-Type"] = MimeTypes::extension_to_type(extension); rep.headers["Cache-Control"] = "max-age=3600, must-revalidate"; }
void NgramLM::LoadCounts(ZFile &countsFile, bool reset) { if (ReadUInt64(countsFile) == MITLMv1) { if (!reset) throw std::runtime_error("Not implemented yet."); VerifyHeader(countsFile, "NgramCounts"); _pModel->Deserialize(countsFile); SetOrder(_pModel->size() - 1); for (size_t o = 0; o <= order(); ++o) ReadVector(countsFile, _countVectors[o]); } else { countsFile.ReOpen(); _pModel->LoadCounts(_countVectors, countsFile, reset); } }
void NgramModel::LoadFeatures(vector<DoubleVector> &featureVectors, ZFile &featureFile, size_t maxOrder) const { if (featureFile == NULL) throw std::invalid_argument("Invalid file"); // Allocate space for feature vectors. if (maxOrder == 0 || maxOrder > size() - 1) maxOrder = size() - 1; featureVectors.resize(maxOrder + 1); for (size_t i = 0; i <= maxOrder; i++) featureVectors[i].reset(sizes(i), 0); // Initialize to 0. // Load feature value for each n-gram in feature file. char line[MAXLINE]; vector<VocabIndex> words(256); while (featureFile.getLine(line, MAXLINE)) { if (line[0] == '\0' || line[0] == '#') continue; words.clear(); char *p = &line[0]; while (true) { const char *token = p; while (*p != 0 && !isspace(*p)) ++p; if (*p == 0) { // Last token in line: Set feature value. NgramIndex index = 0; for (size_t i = 1; i <= words.size(); i++) index = _vectors[i].Find(index, words[i - 1]); if (index == -1) Logger::Warn(1, "Feature skipped.\n"); else featureVectors[words.size()][index] = atof(token); break; // Move to next line. } // Not the last token: Lookup word index and add to words. size_t len = p - token; *p++ = 0; if (len > 0) words.push_back(_vocab.Find(token, len)); if (words.size() > maxOrder) break; } } }
bool WriteReplayEnd(ZFile& File, ZObserverCommandList& ReplayCommandList) { // Write the commands for (auto* pItem : ReplayCommandList) { auto* pCommand = pItem->pCommand; constexpr int BUF_SIZE = 1024; char CommandBuffer[BUF_SIZE]; auto Size = pCommand->GetData(CommandBuffer, BUF_SIZE); if (Size <= 0) { MLog("WriteReplayEnd -- Invalid command!\n"); continue; } WRITE(pItem->fTime); WRITE(pCommand->m_Sender); WRITE(Size); File.Write(CommandBuffer, Size); } return true; }
void GuiProgress::Show(const char *szImgName) { // preventive mGUI->mMessageboxGui.Hide(); // build rect tstring backimg = szImgName; backimg.Replace(".track",".dds"); /* blocking texture loading */ ZTexture *tex = GDD->NewTexture(); ZFile file; if (file.Open(backimg.c_str(), ZOPEN_READ, false)) { unsigned char *tmpTex = new unsigned char [file.GetSize()]; file.Read(tmpTex, file.GetSize()); tex->LoadDDSFromMemory(tmpTex, file.GetSize() ); delete [] tmpTex; } // -- backimg.Replace(".dds","\0"); backimg.ToLower(); trckNfo->setText(backimg.c_str()); mLoadingRect = AddRect(0.f, 0.f, 1.f, 1.f, 0.f, 1.f, 1.f, 0.f); mLoadingRectTransform = mLoadingRect->GetTransform(); ZMaterial *mat = mLoadingRect->GetMaterial(0); mat->setEffect(guifx); mat->addTexture(tex); mat->connectEffect(true, false); FFxSetParam *paramcolor = mat->getNamedParam("col"); if (paramcolor) paramcolor->setVector(vector4(1.f)); mLoadingRect->SetVisible(true); //mLoadingRect->AddRef(); // Show GUI mLoadingfrm->show(); IncStackCount(); // transition /* GDD->ApplyRandomTransition(); // reset post process things GDD->SetPPfocalFactor(0.f); GDD->SetSepiaStrength(0.f); */ }
double PerplexityOptimizer::ShortCorpusComputeEntropy(ZFile &corpusFile, const ParamVector ¶ms) { if (corpusFile == NULL) throw std::invalid_argument("Invalid file"); size_t size = _lm._pModel->size(); // BitVector vocabMask(size, 1); // Accumulate counts of prob/bow for computing perplexity of corpusFilename. char line[MAXLINE]; size_t numOOV = 0; vector<VocabIndex> words(256); _totLogProb = 0.0; _numZeroProbs = 0; _numWords = 0; while (corpusFile.getLine( line, MAXLINE)) { if (strncmp(line, "<DOC ", 5) == 0 || strcmp(line, "</DOC>") == 0) continue; // Logger::Log(0, "Additional Input:%s\n", line); // Lookup vocabulary indices for each word in the line. words.clear(); // words.push_back(Vocab::EndOfSentence); char *p = &line[0]; while (*p != 0) { while (isspace(*p)) ++p; // Skip consecutive spaces. const char *token = p; while (*p != 0 && !isspace(*p)) ++p; size_t len = p - token; if (*p != 0) *p++ = 0; words.push_back(_lm.vocab().Find(token, len)); } // words.push_back(Vocab::EndOfSentence); // Add each top order n-gram. size_t ngramOrder = std::min((size_t)2, size - 1); for (size_t i = 1; i < words.size(); i++) { if (words[i] == Vocab::Invalid) { // OOV word encountered. Reset order to unigrams. ngramOrder = 1; numOOV++; } else { NgramIndex index; size_t boOrder = ngramOrder; while ((index = _lm._pModel->_Find(&words[i-boOrder+1], boOrder)) == -1) { --boOrder; NgramIndex hist = _lm._pModel->_Find(&words[i - boOrder], boOrder); if (hist != (NgramIndex)-1) { if ((_lm.bows(boOrder))[hist] != 0) { // _bowCountVectors[boOrder][hist]++; _totLogProb += log((_lm.bows(boOrder))[hist]) * 1; } } } ngramOrder = std::min(ngramOrder + 1, size - 1); // _probCountVectors[boOrder][index]++; if ((_lm.probs(boOrder))[index] == 0) _numZeroProbs++; else _totLogProb += log((_lm.probs(boOrder))[index]) * 1; _numWords++; } } } double entropy = -_totLogProb / (_numWords - _numZeroProbs); // std::cout << -_totLogProb << "\t" << _numWords << "\t" << _numZeroProbs << std::endl; if (Logger::GetVerbosity() > 2) std::cout << exp(entropy) << "\t" << params << std::endl; else Logger::Log(2, "%f\n", exp(entropy)); return std::isnan(entropy) ? 70 : entropy; }
void zmodifyer::getInfo( std::vector< std::pair< std::wstring, ZFile* > > & fileList, wchar_t const * path, wchar_t const * password ) { if( zdb_->db_.IsEmpty() ) { // 커맨드 스트링 UStringVector commandStrings; commandStrings.Add(L"L"); UString pw( L"-P" ); commandStrings.Add( (pw + ( password ? password : L"") ) ); commandStrings.Add( file_name_ ); CArchiveCommandLineOptions options; OptionSetting( commandStrings, options ); // 압축파일 형식 인덱스 추출. CIntVector formatIndices; if (!codecs_->FindFormatForArchiveType(options.ArcType, formatIndices)) { throw kUnsupportedArcTypeMessage; } UInt64 numErrors = 0; HRESULT result = ListArchives( codecs_, formatIndices, options.StdInMode, options.ArchivePathsSorted, options.ArchivePathsFullSorted, options.WildcardCensor.Pairs.Front().Head, options.EnableHeaders, options.TechMode, #ifndef _NO_CRYPTO options.PasswordEnabled, options.Password, #endif numErrors, zdb_); } ZFile * zfile = 0; std::wstring filePath = path ? path : L""; if( filePath.empty() ) { zfile = &zdb_->folder_; } else { zfile = zdb_->folder_.find( filePath ); } if( zfile ) { zfile->getList( fileList, filePath ); } }
void NgramModel::_LoadEntropy(vector<DoubleVector> &entropyVectors, ZFile &corpusFile, size_t maxSize) const { if (corpusFile == NULL) throw std::invalid_argument("Invalid file"); // Resize vectors and allocate counts. if (maxSize == 0 || maxSize > size()) maxSize = size(); int numDocs = 0; vector<CountVector> countVectors(maxSize); vector<CountVector> totCountVectors(maxSize); entropyVectors.resize(maxSize); for (size_t o = 0; o < maxSize; ++o) { countVectors[o].resize(sizes(o), 0); totCountVectors[o].resize(sizes(o), 0); entropyVectors[o].resize(sizes(o), 0); } // Accumulate counts for each n-gram in corpus file. char line[MAXLINE]; vector<VocabIndex> words(256); vector<NgramIndex> hists(maxSize); while (corpusFile.getLine( line, MAXLINE)) { if (strcmp(line, "</DOC>") == 0) { // Accumulate frequency. numDocs++; for (size_t o = 1; o < countVectors.size(); ++o) { for (size_t i = 0; i < countVectors[o].length(); ++i) { int c = countVectors[o][i]; if (c > 0) { totCountVectors[o][i] += c; entropyVectors[o][i] += c * log(c); countVectors[o][i] = 0; } } } continue; } else if (strncmp(line, "<DOC ", 5) == 0) continue; // Lookup vocabulary indices for each word in the line. words.clear(); words.push_back(Vocab::EndOfSentence); char *p = &line[0]; while (*p != '\0') { while (isspace(*p)) ++p; // Skip consecutive spaces. const char *token = p; while (*p != 0 && !isspace(*p)) ++p; size_t len = p - token; if (*p != 0) *p++ = 0; words.push_back(_vocab.Find(token, len)); } words.push_back(Vocab::EndOfSentence); // Add each order n-gram. hists[1] = _vectors[1].Find(0, Vocab::EndOfSentence); for (size_t i = 1; i < words.size(); ++i) { VocabIndex word = words[i]; NgramIndex hist = 0; for (size_t j = 1; j < std::min(i + 2, maxSize); ++j) { if (word != Vocab::Invalid) { NgramIndex index = _vectors[j].Find(hist, word); if (index >= 0) countVectors[j][index]++; else Logger::Warn(1, "DocFreq feature skipped.\n"); hist = hists[j]; hists[j] = index; } else { hist = hists[j]; hists[j] = NgramVector::Invalid; } } } } // Finalize entropy computation. double invLogNumDocs = 1.0 / log((double)numDocs); for (size_t o = 1; o < maxSize; o++) entropyVectors[o] = CondExpr( totCountVectors[o] == 0, 0.0, ((entropyVectors[o] / -totCountVectors[o]) + log(asDouble(totCountVectors[o]))) * invLogNumDocs); }
void NgramModel::LoadCorpus(vector<CountVector> &countVectors, ZFile &corpusFile, bool reset) { if (corpusFile == NULL) throw std::invalid_argument("Invalid file (corpusFile is NULL)"); // Resize vectors and allocate counts. countVectors.resize(size()); countVectors[0].resize(1, 0); for (size_t o = 1; o < size(); ++o) { size_t capacity = std::max(1ul<<16, nextPowerOf2(_vectors[o].size())); if (reset) countVectors[o].reset(capacity, 0); else countVectors[o].resize(capacity, 0); } // Accumulate counts for each n-gram in corpus file. char line[MAXLINE]; vector<VocabIndex> words(256); vector<NgramIndex> hists(size(), -1); while (corpusFile.getLine( line, MAXLINE)) { // Lookup vocabulary indices for each word in the line. words.clear(); words.push_back(Vocab::EndOfSentence); char *p = &line[0]; while (*p != '\0') { while (isspace(*p)) ++p; // Skip consecutive spaces. const char *token = p; while (*p != 0 && !isspace(*p)) ++p; size_t len = p - token; if (*p != 0) *p++ = 0; words.push_back(_vocab.Add(token, len)); } words.push_back(Vocab::EndOfSentence); // Add each order n-gram. hists[1] = _vectors[1].Add(0, Vocab::EndOfSentence); for (size_t i = 1; i < words.size(); ++i) { VocabIndex word = words[i]; NgramIndex hist = 0; for (size_t j = 1; j < std::min(i + 2, size()); ++j) { if (word != Vocab::Invalid && hist != NgramVector::Invalid) { bool newNgram; NgramIndex index = _vectors[j].Add(hist, word, &newNgram); if (newNgram && (size_t)index >= countVectors[j].length()) countVectors[j].resize(countVectors[j].length() * 2, 0); countVectors[j][index]++; hist = hists[j]; hists[j] = index; } else { hist = hists[j]; hists[j] = NgramVector::Invalid; } } } } // Add remaining vocabulary, if necessary. if (_vectors[1].size() != _vocab.size()) { for (VocabIndex i = 0; i < (VocabIndex)_vocab.size(); ++i) _vectors[1].Add(0, i); countVectors[1].resize(_vocab.size(), 0); } // Sort and resize counts to actual size. VocabVector vocabMap; IndexVector ngramMap(1, 0), boNgramMap; _vocab.Sort(vocabMap); for (size_t o = 0; o < size(); ++o) { boNgramMap.swap(ngramMap); if (_vectors[o].Sort(vocabMap, boNgramMap, ngramMap)) NgramModel::ApplySort(ngramMap, countVectors[o]); else countVectors[o].resize(ngramMap.length()); } _ComputeBackoffs(); }
void NgramModel::LoadEvalCorpus(vector<CountVector> &probCountVectors, vector<CountVector> &bowCountVectors, BitVector &vocabMask, ZFile &corpusFile, size_t &outNumOOV, size_t &outNumWords) const { if (corpusFile == NULL) throw std::invalid_argument("Invalid file"); // Allocate count vectors. probCountVectors.resize(size()); bowCountVectors.resize(size() - 1); for (size_t i = 0; i < size(); i++) probCountVectors[i].reset(_vectors[i].size(), 0); for (size_t i = 0; i < size() - 1; i++) bowCountVectors[i].reset(_vectors[i].size(), 0); // Accumulate counts of prob/bow for computing perplexity of corpusFilename. char line[MAXLINE]; size_t numOOV = 0; size_t numWords = 0; vector<VocabIndex> words(256); while (corpusFile.getLine( line, MAXLINE)) { if (strncmp(line, "<DOC ", 5) == 0 || strcmp(line, "</DOC>") == 0) continue; // Logger::Log(0, "Additional Input:%s\n", line); // Lookup vocabulary indices for each word in the line. words.clear(); // words.push_back(Vocab::EndOfSentence); char *p = &line[0]; while (*p != 0) { while (isspace(*p)) ++p; // Skip consecutive spaces. const char *token = p; while (*p != 0 && !isspace(*p)) ++p; size_t len = p - token; if (*p != 0) *p++ = 0; words.push_back(_vocab.Find(token, len)); } // words.push_back(Vocab::EndOfSentence); // Add each top order n-gram. size_t ngramOrder = std::min((size_t)2, size() - 1); for (size_t i = 1; i < words.size(); i++) { if (words[i] == Vocab::Invalid || !vocabMask[words[i]]) { // OOV word encountered. Reset order to unigrams. ngramOrder = 1; numOOV++; } else { NgramIndex index; size_t boOrder = ngramOrder; while ((index = _Find(&words[i-boOrder+1], boOrder)) == -1) { --boOrder; NgramIndex hist = _Find(&words[i - boOrder], boOrder); if (hist != (NgramIndex)-1) bowCountVectors[boOrder][hist]++; } ngramOrder = std::min(ngramOrder + 1, size() - 1); probCountVectors[boOrder][index]++; numWords++; } } } outNumOOV = numOOV; outNumWords = numWords; }
void NgramModel::LoadLM(vector<ProbVector> &probVectors, vector<ProbVector> &bowVectors, ZFile &lmFile) { if (lmFile == NULL) throw std::invalid_argument("Invalid file"); // Read ARPA LM header. char line[MAXLINE]; size_t o, len; vector<size_t> ngramLengths(1); while (lmFile.getLine( line, MAXLINE) && strcmp(line, "\\data\\") != 0) /* NOP */; while (lmFile.getLine( line, MAXLINE)) { unsigned int o, len; if (sscanf(line, "ngram %u=%u", &o, &len) != 2) break; assert(o == ngramLengths.size()); ngramLengths.push_back(len); } // Allocate buffers and read counts. _vocab.Reserve(ngramLengths[1]); probVectors.resize(size()); probVectors[0].resize(1, 0.0); bowVectors.resize(size() - 1); bowVectors[0].resize(1, 0.0); for (o = 1; o < size(); o++) { ProbVector &probs = probVectors[o]; ProbVector &bows = bowVectors[o]; bool hasBow = (o < size() - 1); // Preallocate buffer for n-grams. _vectors[o].Reserve(ngramLengths[o]); probs.reset(ngramLengths[o]); if (hasBow) bows.reset(ngramLengths[o]); lmFile.getLine( line, MAXLINE); unsigned int i; if (sscanf(line, "\\%u-ngrams:", &i) != 1 || i != o) { throw std::invalid_argument("Unexpected file format."); } while (true) { lmFile.getLine( line, MAXLINE); size_t lineLen = strlen(line); if (line[0] == '\0') break; // Empty line ends section. char *p = &line[0]; // Read log probability. Prob prob = (Prob)pow(10.0, strtod(p, &p)); p++; // Read i words. NgramIndex index = 0; const char *token = NULL; for (i = 1; i <= o; ++i) { token = p; while (*p != 0 && !isspace(*p)) ++p; len = p - token; *p++ = 0; VocabIndex vocabIndex = _vocab.Add(token, len); if (vocabIndex == Vocab::Invalid) { index = NgramVector::Invalid; break; } index = _vectors[i].Add(index, vocabIndex); } if (index == NgramVector::Invalid) break; // Set probability and backoff weight. if (index == Vocab::EndOfSentence && o == 1) { if (strcmp(token, "<s>") == 0) { assert(prob <= pow(10, -99)); bows[index] = (p >= &line[lineLen]) ? (Prob)1 : (Prob)pow(10.0, strtod(p, &p)); } else { probs[index] = prob; assert(p >= &line[lineLen]); } } else { probs[index] = prob; if (hasBow) { // Read optional backoff weight. bows[index] = (p >= &line[lineLen]) ? (Prob)1 : (Prob)pow(10.0, strtod(p, &p)); } } } } // Read ARPA LM footer. while (lmFile.getLine( line, MAXLINE) && strcmp(line, "\\end\\") != 0) /* NOP */; // Sort and resize probs/bows to actual size. VocabVector vocabMap; IndexVector ngramMap(1, 0), boNgramMap; _vocab.Sort(vocabMap); for (size_t o = 0; o < size(); ++o) { boNgramMap.swap(ngramMap); if (_vectors[o].Sort(vocabMap, boNgramMap, ngramMap)) { NgramModel::ApplySort(ngramMap, probVectors[o]); if (o < bowVectors.size()) NgramModel::ApplySort(ngramMap, bowVectors[o]); } else { probVectors[o].resize(ngramMap.length()); if (o < bowVectors.size()) bowVectors[o].resize(ngramMap.length()); } } _ComputeBackoffs(); }
void NgramModel::LoadCounts(vector<CountVector> &countVectors, ZFile &countsFile, bool reset) { if (countsFile == NULL) throw std::invalid_argument("Invalid file"); // Resize vectors and allocate counts. countVectors.resize(size()); countVectors[0].resize(1, 0); for (size_t o = 1; o < size(); ++o) { size_t capacity = std::max(1ul<<16, nextPowerOf2(_vectors[o].size())); if (reset) countVectors[o].reset(capacity, 0); else countVectors[o].resize(capacity, 0); } // Accumulate counts for each n-gram in counts file. char line[MAXLINE]; vector<VocabIndex> words(256); while (countsFile.getLine( line, MAXLINE)) { if (line[0] == '\0' || line[0] == '#') continue; words.clear(); char *p = &line[0]; while (words.size() < size()) { const char *token = p; while (*p != 0 && !isspace(*p)) ++p; if (*p == 0) { // Last token in line: Add ngram with count bool newNgram; size_t order = words.size(); NgramIndex index = 0; for (size_t i = 1; i < order; ++i) index = _vectors[i].Add(index, words[i - 1]); index = _vectors[order].Add(index, words[order - 1], &newNgram); if (newNgram && (size_t)index >= countVectors[order].length()) countVectors[order].resize(countVectors[order].length() * 2, 0); countVectors[order][index] += atoi(token); break; // Move to next line. } // Not the last token: Lookup word index and add to words. size_t len = p - token; *p++ = 0; if (len > 0) { VocabIndex vocabIndex = _vocab.Add(token, len); if (vocabIndex == Vocab::Invalid) break; words.push_back(vocabIndex); } } } // Sort and resize counts to actual size. VocabVector vocabMap; IndexVector ngramMap(1, 0), boNgramMap; _vocab.Sort(vocabMap); for (size_t o = 0; o < size(); ++o) { boNgramMap.swap(ngramMap); if (_vectors[o].Sort(vocabMap, boNgramMap, ngramMap)) NgramModel::ApplySort(ngramMap, countVectors[o]); else countVectors[o].resize(ngramMap.length()); } _ComputeBackoffs(); }