コード例 #1
0
ファイル: logger.cpp プロジェクト: BackupTheBerlios/robotm6
// --------------------------------------------------------------------
// main
// --------------------------------------------------------------------
int main(int argc, char*argv[]) 
{    
    parseArgs(argc, argv);

    struct sigaction a;
    a.sa_handler = traitementInterrupt;      /* fonction à lancer */
    sigemptyset(&a.sa_mask);    /* rien à masquer */
    
    sigaction(SIGTSTP, &a, NULL);       /* pause contrôle-Z */
    sigaction(SIGINT,  &a, NULL);       /* fin contrôle-C */
    sigaction(SIGTERM, &a, NULL);       /* arrêt */
    sigaction(SIGSEGV, &a, NULL);       /* segmentation fault ! */

    int sock=0;
    initSocket(sock);
    sock_=sock;
    
    ZFile file;
    initFile(&file);
    file_=&file;
    
    while(1) {
	bool close=false;
        processPacket(sock, &file, false, close);
	if (close) {
	    file.close();
	    initFile(&file);
	    file_=&file;
	}
    }
    return(EXIT_SUCCESS);
}
コード例 #2
0
ファイル: ZCEGui.cpp プロジェクト: pulkomandy/.theRush-
	virtual void 	loadRawDataContainer (const CEGUI::String &filename,
		CEGUI::RawDataContainer &output, const CEGUI::String &resourceGroup)
	{

		ZFile file;
		if (file.Open(filename.c_str()))
		{
			int fn = file.GetSize();
			char *ptr = new char [fn+1];
			file.Read(ptr, fn);
			ptr[fn] = 0;
			output.setData((CEGUI::uint8*)ptr);
			output.setSize(fn);
		}

	}
コード例 #3
0
void
ArpaNgramLM::LoadLM(ZFile &lmFile) {
    if (ReadUInt64(lmFile) == MITLMv1) {
        Deserialize(lmFile);
    } else {
        lmFile.ReOpen();
        _pModel->LoadLM(_probVectors, _bowVectors, lmFile);
    }
}
コード例 #4
0
void 
NgramModel::_LoadTopicProbs2(vector<DoubleVector> &topicProbVectors,
                            ZFile &hmmldaFile, size_t maxSize) const {
    assert(maxSize <= size());

    vector<CountVector> countVectors(maxSize);
    topicProbVectors.resize(maxSize);
    for (size_t o = 0; o < maxSize; ++o) {
        countVectors[o].resize(sizes(o), 0);
        topicProbVectors[o].resize(sizes(o), 0);
    }

    // Accumulate counts for each n-gram in words.
    size_t      numSentenceWords = 1;
    IndexVector hists(maxSize, 0);
    char        line[MAXLINE];
    char        wordStr[1024];
    VocabIndex  word;
    int         state, topic;
    while (hmmldaFile.getLine( line, MAXLINE)) {
        if (line[0] == '#') continue;  // Skip comment lines.
        int numItems = sscanf(line, "%s\t%d\t%d\n", wordStr, &state, &topic);
        if (numItems != 3) throw std::invalid_argument("Bad format");
        word = _vocab.Find(wordStr);
        numSentenceWords++;

        NgramIndex hist = 0, index;
        for (size_t j = 1; j <= std::min(numSentenceWords, maxSize - 1); j++) {
            index = _vectors[j].Find(hist, word);
            if (index == -1) {
                printf("Feature skipped\n");
            } else {
                countVectors[j-1][hist]++;
                if (state == 1)
                    topicProbVectors[j-1][hist]++;
            }
            hist     = hists[j];
            hists[j] = index;
        }
        if (word == Vocab::EndOfSentence)
            numSentenceWords = 1;
    }

    // Finalize probability computation.
    for (size_t o = 0; o < maxSize; o++) {
        for (size_t i = 0; i < countVectors[o].length(); i++) {
            if (countVectors[o][i] == 0)
                topicProbVectors[o][i] = 0.0;
            else
                topicProbVectors[o][i] /= countVectors[o][i];
        }
    }    
}
コード例 #5
0
ファイル: FFxProgram.cpp プロジェクト: pulkomandy/.theRush-
//---------------------------------------------------------------------------------
// resolve includes
// to avoid accessing the run time compiler calling filesystem functions (not supported on debug)
// we must manually replace the includes directives by the file referenced.
// this is weird but I can't see other alternative except to load precompile the shaders offline.
//---------------------------------------------------------------------------------
char* resolveIncludes(char * src)
{
	const char* pInclude;
	int srcSize = (int)strlen(src);
	int offset = 0;
	while ((pInclude = strstr(src+offset, "#include")))
	{
		char* fstart;
		char* fend;
		if ((fstart = strchr((char*)pInclude, '\"')) && (fend = strchr(fstart+1, '\"')) )
		{
			chdir("ZenithDatas");
			int includePos = int(pInclude - src);
			tstring	fname(fstart+1, (fend-fstart-1));
			ZFile file;
            if (!file.Open(fname.c_str()))
			{
				LOG("can't open include file '%s'\n", fname.c_str());
				break;
			}
            int includeSize = file.GetSize();
			int newSize = srcSize + includeSize - (fend - pInclude);

			char* newSrc = (char*) new char [newSize];
			memcpy(newSrc, src, includePos);
			file.Read(newSrc + includePos, includeSize);
			memcpy(newSrc + includePos + includeSize, fend+1, newSize - (includePos + includeSize));
			newSrc[newSize-1] = 0;
			delete [] src;

			src = newSrc;
			srcSize = newSize;
			offset = includePos+1;
			chdir("..");
		}

	}
	return src;
}
コード例 #6
0
ファイル: ZReplayWrite.cpp プロジェクト: Asunaya/RefinedGunz
static bool WriteHeader(ZFile& File)
{
	REPLAY_HEADER_RG Header;
	Header.Header = RG_REPLAY_MAGIC_NUMBER;
	Header.ReplayBinaryVersion = RG_REPLAY_BINARY_VERSION;
	Header.Timestamp = static_cast<i64>(time(nullptr));
	Header.ClientVersionMajor = RGUNZ_VERSION_MAJOR;
	Header.ClientVersionMinor = RGUNZ_VERSION_MINOR;
	Header.ClientVersionPatch = RGUNZ_VERSION_PATCH;
	Header.ClientVersionRevision = RGUNZ_VERSION_REVISION;

	return File.Write(Header);
}
コード例 #7
0
ファイル: staticfile.cpp プロジェクト: team3499/team3499.org
void RequestHandler::staticFile(Request &req, Reply &rep){
    std::string request_path = req.rawpath.str();
    /*if (!urlDecode(req.uri, request_path)){
        rep = Reply::stock_reply(Reply::bad_request);
        return;
    }*/

    // Request path must be absolute and not contain "..".
    if (request_path.empty() || request_path[0] != '/' || request_path.find("..") != std::string::npos){
        rep = Reply::stock_reply(Reply::bad_request);
        return;
    }

    // If path ends in slash (i.e. is a directory) then add "index.html".
    if (request_path[request_path.size() - 1] == '/'){
        request_path += "index.html";
    }

    // Determine the file extension.
    std::size_t last_slash_pos = request_path.find_last_of("/");
    std::size_t last_dot_pos = request_path.find_last_of(".");
    std::string extension;
    if (last_dot_pos != std::string::npos && last_dot_pos > last_slash_pos){
        extension = request_path.substr(last_dot_pos + 1);
    }

    ZFile staticfl;
    if(!staticfl.open(doc_root_ + request_path)){
        rep = Reply::stock_reply(Reply::not_found);
        return;
    }
    rep.status = Reply::ok;
    rep.content = staticfl.read();
    rep.headers["Content-Length"] = rep.content.length();
    rep.headers["Content-Type"] = MimeTypes::extension_to_type(extension);
    rep.headers["Cache-Control"] = "max-age=3600, must-revalidate";
}
コード例 #8
0
void
NgramLM::LoadCounts(ZFile &countsFile, bool reset) {
    if (ReadUInt64(countsFile) == MITLMv1) {
        if (!reset)
            throw std::runtime_error("Not implemented yet.");
        VerifyHeader(countsFile, "NgramCounts");
        _pModel->Deserialize(countsFile);
        SetOrder(_pModel->size() - 1);
        for (size_t o = 0; o <= order(); ++o)
            ReadVector(countsFile, _countVectors[o]);
    } else {
        countsFile.ReOpen();
        _pModel->LoadCounts(_countVectors, countsFile, reset);
    }
}
コード例 #9
0
void
NgramModel::LoadFeatures(vector<DoubleVector> &featureVectors,
                         ZFile &featureFile, size_t maxOrder) const {
    if (featureFile == NULL) throw std::invalid_argument("Invalid file");

    // Allocate space for feature vectors.
    if (maxOrder == 0 || maxOrder > size() - 1)
        maxOrder = size() - 1;
    featureVectors.resize(maxOrder + 1);
    for (size_t i = 0; i <= maxOrder; i++)
        featureVectors[i].reset(sizes(i), 0);  // Initialize to 0.

    // Load feature value for each n-gram in feature file.
    char                    line[MAXLINE];
    vector<VocabIndex> words(256);
    while (featureFile.getLine(line, MAXLINE)) {
        if (line[0] == '\0' || line[0] == '#') continue;
        words.clear();
        char *p = &line[0];
        while (true) {
            const char *token = p;
            while (*p != 0 && !isspace(*p))  ++p;
            if (*p == 0)
            {
                // Last token in line: Set feature value.
                NgramIndex index = 0;
                for (size_t i = 1; i <= words.size(); i++)
                    index = _vectors[i].Find(index, words[i - 1]);
                if (index == -1)
                    Logger::Warn(1, "Feature skipped.\n");
                else
                    featureVectors[words.size()][index] = atof(token);
                break;  // Move to next line.
            }

            // Not the last token: Lookup word index and add to words.
            size_t len = p - token;
            *p++ = 0;
            if (len > 0) words.push_back(_vocab.Find(token, len));
            if (words.size() > maxOrder) break;
        }
    }
}
コード例 #10
0
ファイル: ZReplayWrite.cpp プロジェクト: Asunaya/RefinedGunz
bool WriteReplayEnd(ZFile& File, ZObserverCommandList& ReplayCommandList)
{
	// Write the commands
	for (auto* pItem : ReplayCommandList)
	{
		auto* pCommand = pItem->pCommand;

		constexpr int BUF_SIZE = 1024;
		char CommandBuffer[BUF_SIZE];
		auto Size = pCommand->GetData(CommandBuffer, BUF_SIZE);
		if (Size <= 0)
		{
			MLog("WriteReplayEnd -- Invalid command!\n");
			continue;
		}

		WRITE(pItem->fTime);
		WRITE(pCommand->m_Sender);
		WRITE(Size);
		File.Write(CommandBuffer, Size);
	}

	return true;
}
コード例 #11
0
ファイル: ZGuiProgress.cpp プロジェクト: pulkomandy/.theRush-
void GuiProgress::Show(const char *szImgName)
{
	// preventive
	mGUI->mMessageboxGui.Hide();
	// build rect
	
	
	tstring backimg = szImgName;
	backimg.Replace(".track",".dds");

	/* blocking texture loading
	*/
	
	ZTexture *tex = GDD->NewTexture();
	
	ZFile file;
	if (file.Open(backimg.c_str(), ZOPEN_READ, false))
	{
		unsigned char *tmpTex = new unsigned char [file.GetSize()];
		file.Read(tmpTex, file.GetSize());
		tex->LoadDDSFromMemory(tmpTex, file.GetSize() );
		
		delete [] tmpTex;
	}
	// --
	backimg.Replace(".dds","\0");
	backimg.ToLower();

	trckNfo->setText(backimg.c_str());


	mLoadingRect = AddRect(0.f, 0.f, 1.f, 1.f, 0.f, 1.f, 1.f, 0.f);
	mLoadingRectTransform = mLoadingRect->GetTransform();


	ZMaterial *mat = mLoadingRect->GetMaterial(0);
	
	mat->setEffect(guifx);
	mat->addTexture(tex);

	mat->connectEffect(true, false);
	FFxSetParam *paramcolor = mat->getNamedParam("col");
	if (paramcolor)
		paramcolor->setVector(vector4(1.f));

	mLoadingRect->SetVisible(true);
	
	//mLoadingRect->AddRef();
	

	// Show GUI
	mLoadingfrm->show();

	IncStackCount();
	// transition
/*
	GDD->ApplyRandomTransition();
	// reset post process things
	
	GDD->SetPPfocalFactor(0.f);
	GDD->SetSepiaStrength(0.f);
	*/
}
コード例 #12
0
double
PerplexityOptimizer::ShortCorpusComputeEntropy(ZFile &corpusFile, const ParamVector &params) {
    if (corpusFile == NULL) throw std::invalid_argument("Invalid file");
    size_t size = _lm._pModel->size();
//     BitVector vocabMask(size, 1);
    // Accumulate counts of prob/bow for computing perplexity of corpusFilename.
    char                    line[MAXLINE];
    size_t                  numOOV = 0;
    vector<VocabIndex> words(256);
    _totLogProb = 0.0;
    _numZeroProbs = 0;
    _numWords = 0;
    while (corpusFile.getLine( line, MAXLINE)) {
        if (strncmp(line, "<DOC ", 5) == 0 || strcmp(line, "</DOC>") == 0)
            continue;
//      Logger::Log(0, "Additional Input:%s\n", line);
        // Lookup vocabulary indices for each word in the line.
        words.clear();
//         words.push_back(Vocab::EndOfSentence);
        char *p = &line[0];
        while (*p != 0) {
            while (isspace(*p)) ++p;  // Skip consecutive spaces.
            const char *token = p;
            while (*p != 0 && !isspace(*p))  ++p;
            size_t len = p - token;
            if (*p != 0) *p++ = 0;
            words.push_back(_lm.vocab().Find(token, len));
        }
//         words.push_back(Vocab::EndOfSentence);

        // Add each top order n-gram.
        size_t ngramOrder = std::min((size_t)2, size - 1);
        for (size_t i = 1; i < words.size(); i++) {
            if (words[i] == Vocab::Invalid) {
                // OOV word encountered.  Reset order to unigrams.
                ngramOrder = 1;
                numOOV++;
            } else {
                NgramIndex index;
                size_t     boOrder = ngramOrder;
                while ((index = _lm._pModel->_Find(&words[i-boOrder+1], boOrder)) == -1) {
                    --boOrder;
                    NgramIndex hist = _lm._pModel->_Find(&words[i - boOrder], boOrder);
                    if (hist != (NgramIndex)-1) {
                        if ((_lm.bows(boOrder))[hist] != 0) {
//                         _bowCountVectors[boOrder][hist]++;
                          _totLogProb += log((_lm.bows(boOrder))[hist]) * 1;
                        }
                    }
                }
                ngramOrder = std::min(ngramOrder + 1, size - 1);
//                 _probCountVectors[boOrder][index]++;
                if ((_lm.probs(boOrder))[index] == 0)
                    _numZeroProbs++;
                else
                    _totLogProb += log((_lm.probs(boOrder))[index]) * 1;
                _numWords++;
            }
        }
    }
    double entropy = -_totLogProb / (_numWords - _numZeroProbs);
//     std::cout << -_totLogProb << "\t" << _numWords << "\t" << _numZeroProbs << std::endl;
    if (Logger::GetVerbosity() > 2)
        std::cout << exp(entropy) << "\t" << params << std::endl;
    else
        Logger::Log(2, "%f\n", exp(entropy));
    return std::isnan(entropy) ? 70 : entropy;
}
コード例 #13
0
ファイル: zmodifyer.cpp プロジェクト: daoluong/zpack-library
void zmodifyer::getInfo( std::vector< std::pair< std::wstring, ZFile* > > & fileList, wchar_t const * path, wchar_t const * password )
{
	if( zdb_->db_.IsEmpty() )
	{
		// 커맨드 스트링
		UStringVector commandStrings;

		commandStrings.Add(L"L");

		UString pw( L"-P" );

		commandStrings.Add( (pw + ( password ? password : L"") ) );

		commandStrings.Add( file_name_ );

		CArchiveCommandLineOptions options;

		OptionSetting( commandStrings, options );

		// 압축파일 형식 인덱스 추출.
		CIntVector formatIndices;

		if (!codecs_->FindFormatForArchiveType(options.ArcType, formatIndices))
		{
			throw kUnsupportedArcTypeMessage;
		}

		UInt64 numErrors = 0;

		HRESULT result = ListArchives(
			codecs_,
			formatIndices,
			options.StdInMode,
			options.ArchivePathsSorted,
			options.ArchivePathsFullSorted,
			options.WildcardCensor.Pairs.Front().Head,
			options.EnableHeaders,
			options.TechMode,
#ifndef _NO_CRYPTO
			options.PasswordEnabled,
			options.Password,
#endif
			numErrors, zdb_);
	}

	ZFile * zfile = 0;

	std::wstring filePath = path ? path : L"";

	if( filePath.empty() )
	{
		zfile = &zdb_->folder_;
	}
	else
	{
		zfile = zdb_->folder_.find( filePath );
	}

	if( zfile )
	{
		zfile->getList( fileList, filePath );
	}
}
コード例 #14
0
void
NgramModel::_LoadEntropy(vector<DoubleVector> &entropyVectors,
                         ZFile &corpusFile, size_t maxSize) const {
    if (corpusFile == NULL) throw std::invalid_argument("Invalid file");

    // Resize vectors and allocate counts.
    if (maxSize == 0 || maxSize > size())
        maxSize = size();
    int numDocs = 0;
    vector<CountVector> countVectors(maxSize);
    vector<CountVector> totCountVectors(maxSize);
    entropyVectors.resize(maxSize);
    for (size_t o = 0; o < maxSize; ++o) {
        countVectors[o].resize(sizes(o), 0);
        totCountVectors[o].resize(sizes(o), 0);
        entropyVectors[o].resize(sizes(o), 0);
    }

    // Accumulate counts for each n-gram in corpus file.
    char line[MAXLINE];
    vector<VocabIndex> words(256);
    vector<NgramIndex> hists(maxSize);
    while (corpusFile.getLine( line, MAXLINE)) {
        if (strcmp(line, "</DOC>") == 0) {
            // Accumulate frequency.
            numDocs++;
            for (size_t o = 1; o < countVectors.size(); ++o) {
                for (size_t i = 0; i < countVectors[o].length(); ++i) {
                    int c = countVectors[o][i];
                    if (c > 0) {
                        totCountVectors[o][i] += c;
                        entropyVectors[o][i] += c * log(c);
                        countVectors[o][i] = 0;
                    }
                }
            }
            continue;
        } else if (strncmp(line, "<DOC ", 5) == 0)
            continue;

        // Lookup vocabulary indices for each word in the line.
        words.clear();
        words.push_back(Vocab::EndOfSentence);
        char *p = &line[0];
        while (*p != '\0') {
            while (isspace(*p)) ++p;  // Skip consecutive spaces.
            const char *token = p;
            while (*p != 0 && !isspace(*p))  ++p;
            size_t len = p - token;
            if (*p != 0) *p++ = 0;
            words.push_back(_vocab.Find(token, len));
        }
        words.push_back(Vocab::EndOfSentence);

        // Add each order n-gram.
        hists[1] = _vectors[1].Find(0, Vocab::EndOfSentence);
        for (size_t i = 1; i < words.size(); ++i) {
            VocabIndex word = words[i];
            NgramIndex hist = 0;
            for (size_t j = 1; j < std::min(i + 2, maxSize); ++j) {
                if (word != Vocab::Invalid) {
                    NgramIndex index = _vectors[j].Find(hist, word);
                    if (index >= 0)
                        countVectors[j][index]++;
                    else
                        Logger::Warn(1, "DocFreq feature skipped.\n");
                    hist     = hists[j];
                    hists[j] = index;
                } else {
                    hist     = hists[j];
                    hists[j] = NgramVector::Invalid;
                }
            }
        }
    }

    // Finalize entropy computation.
    double invLogNumDocs = 1.0 / log((double)numDocs);
    for (size_t o = 1; o < maxSize; o++)
        entropyVectors[o] = CondExpr(
            totCountVectors[o] == 0, 0.0,
            ((entropyVectors[o] / -totCountVectors[o])
             + log(asDouble(totCountVectors[o]))) * invLogNumDocs);
}
コード例 #15
0
void
NgramModel::LoadCorpus(vector<CountVector> &countVectors,
                       ZFile &corpusFile, bool reset) {
    if (corpusFile == NULL) throw std::invalid_argument("Invalid file (corpusFile is NULL)");

    // Resize vectors and allocate counts.
    countVectors.resize(size());
    countVectors[0].resize(1, 0);
    for (size_t o = 1; o < size(); ++o) {
        size_t capacity = std::max(1ul<<16, nextPowerOf2(_vectors[o].size()));
        if (reset)
            countVectors[o].reset(capacity, 0);
        else
            countVectors[o].resize(capacity, 0);
    }

    // Accumulate counts for each n-gram in corpus file.
    char line[MAXLINE];
    vector<VocabIndex> words(256);
    vector<NgramIndex> hists(size(), -1);
    while (corpusFile.getLine( line, MAXLINE)) {
        // Lookup vocabulary indices for each word in the line.
        words.clear();
        words.push_back(Vocab::EndOfSentence);
        char *p = &line[0];
        while (*p != '\0') {
            while (isspace(*p)) ++p;  // Skip consecutive spaces.
            const char *token = p;
            while (*p != 0 && !isspace(*p))  ++p;
            size_t len = p - token;
            if (*p != 0) *p++ = 0;
            words.push_back(_vocab.Add(token, len));
        }
        words.push_back(Vocab::EndOfSentence);

        // Add each order n-gram.
        hists[1] = _vectors[1].Add(0, Vocab::EndOfSentence);
        for (size_t i = 1; i < words.size(); ++i) {
            VocabIndex word = words[i];
            NgramIndex hist = 0;
            for (size_t j = 1; j < std::min(i + 2, size()); ++j) {
                if (word != Vocab::Invalid && hist != NgramVector::Invalid) {
                    bool       newNgram;
                    NgramIndex index = _vectors[j].Add(hist, word, &newNgram);
                    if (newNgram && (size_t)index >= countVectors[j].length())
                        countVectors[j].resize(countVectors[j].length() * 2, 0);
                    countVectors[j][index]++;
                    hist     = hists[j];
                    hists[j] = index;
                } else {
                    hist     = hists[j];
                    hists[j] = NgramVector::Invalid;
                }
            }
        }
    }

    // Add remaining vocabulary, if necessary.
    if (_vectors[1].size() != _vocab.size()) {
        for (VocabIndex i = 0; i < (VocabIndex)_vocab.size(); ++i)
            _vectors[1].Add(0, i);
        countVectors[1].resize(_vocab.size(), 0);
    }

    // Sort and resize counts to actual size.
    VocabVector vocabMap;
    IndexVector ngramMap(1, 0), boNgramMap;
    _vocab.Sort(vocabMap);
    for (size_t o = 0; o < size(); ++o) {
        boNgramMap.swap(ngramMap);
        if (_vectors[o].Sort(vocabMap, boNgramMap, ngramMap))
            NgramModel::ApplySort(ngramMap, countVectors[o]);
        else
            countVectors[o].resize(ngramMap.length());
    }
    _ComputeBackoffs();
}
コード例 #16
0
void
NgramModel::LoadEvalCorpus(vector<CountVector> &probCountVectors,
                           vector<CountVector> &bowCountVectors,
                           BitVector &vocabMask,
                           ZFile &corpusFile,
                           size_t &outNumOOV,
                           size_t &outNumWords) const {
    if (corpusFile == NULL) throw std::invalid_argument("Invalid file");

    // Allocate count vectors.
    probCountVectors.resize(size());
    bowCountVectors.resize(size() - 1);
    for (size_t i = 0; i < size(); i++)
        probCountVectors[i].reset(_vectors[i].size(), 0);
    for (size_t i = 0; i < size() - 1; i++)
        bowCountVectors[i].reset(_vectors[i].size(), 0);

    // Accumulate counts of prob/bow for computing perplexity of corpusFilename.
    char                    line[MAXLINE];
    size_t                  numOOV = 0;
    size_t                  numWords = 0;
    vector<VocabIndex> words(256);
    while (corpusFile.getLine( line, MAXLINE)) {
        if (strncmp(line, "<DOC ", 5) == 0 || strcmp(line, "</DOC>") == 0)
            continue;
// 	Logger::Log(0, "Additional Input:%s\n", line);
        // Lookup vocabulary indices for each word in the line.
        words.clear();
//         words.push_back(Vocab::EndOfSentence);
        char *p = &line[0];
        while (*p != 0) {
            while (isspace(*p)) ++p;  // Skip consecutive spaces.
            const char *token = p;
            while (*p != 0 && !isspace(*p))  ++p;
            size_t len = p - token;
            if (*p != 0) *p++ = 0;
            words.push_back(_vocab.Find(token, len));
        }
//         words.push_back(Vocab::EndOfSentence);

        // Add each top order n-gram.
        size_t ngramOrder = std::min((size_t)2, size() - 1);
        for (size_t i = 1; i < words.size(); i++) {
            if (words[i] == Vocab::Invalid || !vocabMask[words[i]]) {
                // OOV word encountered.  Reset order to unigrams.
                ngramOrder = 1;
                numOOV++;
            } else {
                NgramIndex index;
                size_t     boOrder = ngramOrder;
                while ((index = _Find(&words[i-boOrder+1], boOrder)) == -1) {
                    --boOrder;
                    NgramIndex hist = _Find(&words[i - boOrder], boOrder);
                    if (hist != (NgramIndex)-1)
                        bowCountVectors[boOrder][hist]++;
                }
                ngramOrder = std::min(ngramOrder + 1, size() - 1);
                probCountVectors[boOrder][index]++;
                numWords++;
            }
        }
    }
    outNumOOV   = numOOV;
    outNumWords = numWords;
}
コード例 #17
0
void
NgramModel::LoadLM(vector<ProbVector> &probVectors,
                   vector<ProbVector> &bowVectors,
                   ZFile &lmFile) {
    if (lmFile == NULL) throw std::invalid_argument("Invalid file");

    // Read ARPA LM header.
    char           line[MAXLINE];
    size_t         o, len;
    vector<size_t> ngramLengths(1);
    while (lmFile.getLine( line, MAXLINE) && strcmp(line, "\\data\\") != 0)
        /* NOP */;
    while (lmFile.getLine( line, MAXLINE)) {
        unsigned int o, len;
        if (sscanf(line, "ngram %u=%u", &o, &len) != 2)
            break;
        assert(o == ngramLengths.size());
        ngramLengths.push_back(len);
    }

    // Allocate buffers and read counts.
    _vocab.Reserve(ngramLengths[1]);
    probVectors.resize(size());
    probVectors[0].resize(1, 0.0);
    bowVectors.resize(size() - 1);
    bowVectors[0].resize(1, 0.0);
    for (o = 1; o < size(); o++) {
        ProbVector &probs  = probVectors[o];
        ProbVector &bows   = bowVectors[o];
        bool        hasBow = (o < size() - 1);

        // Preallocate buffer for n-grams.
        _vectors[o].Reserve(ngramLengths[o]);
        probs.reset(ngramLengths[o]);
        if (hasBow) bows.reset(ngramLengths[o]);

        lmFile.getLine( line, MAXLINE);
        unsigned int i;
        if (sscanf(line, "\\%u-ngrams:", &i) != 1 || i != o) {
            throw std::invalid_argument("Unexpected file format.");
        }
        while (true) {
          lmFile.getLine( line, MAXLINE);
          size_t lineLen = strlen(line);
          if (line[0] == '\0') break;  // Empty line ends section.
          char *p = &line[0];
          
            // Read log probability.
            Prob prob = (Prob)pow(10.0, strtod(p, &p)); p++;

            // Read i words.
            NgramIndex index  = 0;
            const char *token = NULL;
            for (i = 1; i <= o; ++i) {
                token = p;
                while (*p != 0 && !isspace(*p)) ++p;
                len = p - token;
                *p++ = 0;
                VocabIndex vocabIndex = _vocab.Add(token, len);
                if (vocabIndex == Vocab::Invalid) {
                    index = NgramVector::Invalid;
                    break;
                }
                index = _vectors[i].Add(index, vocabIndex);
            }
            if (index == NgramVector::Invalid) break;

            // Set probability and backoff weight.
            if (index == Vocab::EndOfSentence && o == 1) {
                if (strcmp(token, "<s>") == 0) {
                    assert(prob <= pow(10, -99));
                    bows[index] = (p >= &line[lineLen]) ?
                        (Prob)1 : (Prob)pow(10.0, strtod(p, &p));
                } else {
                    probs[index] = prob;
                    assert(p >= &line[lineLen]);
                }
            } else {
                probs[index] = prob;
                if (hasBow) {
                    // Read optional backoff weight.
                    bows[index] = (p >= &line[lineLen]) ?
                        (Prob)1 : (Prob)pow(10.0, strtod(p, &p));
                }
            }
        }
    }

    // Read ARPA LM footer.
    while (lmFile.getLine( line, MAXLINE) &&
           strcmp(line, "\\end\\") != 0)  /* NOP */;

    // Sort and resize probs/bows to actual size.
    VocabVector vocabMap;
    IndexVector ngramMap(1, 0), boNgramMap;
    _vocab.Sort(vocabMap);
    for (size_t o = 0; o < size(); ++o) {
        boNgramMap.swap(ngramMap);
        if (_vectors[o].Sort(vocabMap, boNgramMap, ngramMap)) {
            NgramModel::ApplySort(ngramMap, probVectors[o]);
            if (o < bowVectors.size())
                NgramModel::ApplySort(ngramMap, bowVectors[o]);
        } else {
            probVectors[o].resize(ngramMap.length());
            if (o < bowVectors.size())
                bowVectors[o].resize(ngramMap.length());
        }
    }
    _ComputeBackoffs();
}
コード例 #18
0
void
NgramModel::LoadCounts(vector<CountVector> &countVectors,
                       ZFile &countsFile, bool reset) {
    if (countsFile == NULL) throw std::invalid_argument("Invalid file");

    // Resize vectors and allocate counts.
    countVectors.resize(size());
    countVectors[0].resize(1, 0);
    for (size_t o = 1; o < size(); ++o) {
        size_t capacity = std::max(1ul<<16, nextPowerOf2(_vectors[o].size()));
        if (reset)
            countVectors[o].reset(capacity, 0);
        else
            countVectors[o].resize(capacity, 0);
    }

    // Accumulate counts for each n-gram in counts file.
    char                    line[MAXLINE];
    vector<VocabIndex> words(256);
    while (countsFile.getLine( line, MAXLINE)) {
        if (line[0] == '\0' || line[0] == '#') continue;

        words.clear();
        char *p = &line[0];
        while (words.size() < size()) {
            const char *token = p;
            while (*p != 0 && !isspace(*p))  ++p;
            if (*p == 0) {
                // Last token in line: Add ngram with count
                bool       newNgram;
                size_t     order = words.size();
                NgramIndex index = 0;
                for (size_t i = 1; i < order; ++i)
                    index = _vectors[i].Add(index, words[i - 1]);
                index = _vectors[order].Add(index, words[order - 1], &newNgram);
                if (newNgram && (size_t)index >= countVectors[order].length())
                    countVectors[order].resize(countVectors[order].length() * 2,
                                               0);
                countVectors[order][index] += atoi(token);
                break;  // Move to next line.
            }

            // Not the last token: Lookup word index and add to words.
            size_t len = p - token;
            *p++ = 0;
            if (len > 0) {
                VocabIndex vocabIndex = _vocab.Add(token, len);
                if (vocabIndex == Vocab::Invalid) break;
                words.push_back(vocabIndex);
            }
        }
    }

    // Sort and resize counts to actual size.
    VocabVector vocabMap;
    IndexVector ngramMap(1, 0), boNgramMap;
    _vocab.Sort(vocabMap);
    for (size_t o = 0; o < size(); ++o) {
        boNgramMap.swap(ngramMap);
        if (_vectors[o].Sort(vocabMap, boNgramMap, ngramMap))
            NgramModel::ApplySort(ngramMap, countVectors[o]);
        else
            countVectors[o].resize(ngramMap.length());
    }
    _ComputeBackoffs();
}