void LanguageModel::load_finish(){ size_t used_gram_num_ = 0; for (uint32_t i = 0; i < order; i++) { assert(used_ngram_num_per_order[i] == ngram_num_per_order[i]); used_gram_num_ += used_ngram_num_per_order[i]; } assert(used_gram_num == used_gram_num_); assert(vocab_size == used_ngram_num_per_order[0]); auto bos_ = word2id.find(bos); if(bos_ != word2id.end()){ //该语言模型中包含bos,那么将bos的概率设为eos的 word_id_t word_buff [1]; word_buff[0] = bos_->second; const GramNode* bos_gram = gram(word_buff,1); assert(bos_gram != NULL); bos_gram_id = bos_gram->gram_id; auto eos_ = word2id.find(eos); assert(eos_ != word2id.end()); word_buff[0] = eos_->second; const GramNode* eos_gram = gram(word_buff, 1); assert(eos_gram != NULL); prob_buff[bos_gram->gram_id] = prob_buff[eos_gram->gram_id]; } }
//gram[context_len]是P(w|h)中的w prob_t LanguageModel::wordProbBO(word_id_t *word_ids, uint32_t context_len) { //返回P(w|h')的log值 //srilm的计算方法和sunpinyin的不同。 //srilm不需要递归计算 //srilm首先计算最长的P(w|h')。然后加上bow值 const GramNode *n = gram(word_ids, context_len + 1); if (n && prob_buff[n->gram_id] < 2.0) { return prob_buff[n->gram_id]; } assert(context_len != 0 && "Unigram must in Language model"); //查找最长的在语言模型中的h'w的suffix prob_t prob = 3.0; //用大于0的数代表没有找到。 uint32_t i = 0; //i+1是去掉的head的数目。也是需要补的bow的数目 for (; i < context_len; i++) { n = gram(word_ids + i + 1, context_len - i); if (n) { prob = prob_buff[n->gram_id]; if (prob < 2.0) { break; } } } assert(prob < 2.0 && "prob must be found"); for (uint32_t j = 0; j <= i; j++) { n = gram(word_ids + j, context_len - j); assert(n != NULL && "bow must be found"); prob += bow_buff[n->gram_id]; } return prob; }
void FunctionApproximatorGPR::train(const MatrixXd& inputs, const MatrixXd& targets) { if (isTrained()) { cerr << "WARNING: You may not call FunctionApproximatorGPR::train more than once. Doing nothing." << endl; cerr << " (if you really want to retrain, call reTrain function instead)" << endl; return; } assert(inputs.rows() == targets.rows()); assert(inputs.cols()==getExpectedInputDim()); const MetaParametersGPR* meta_parameters_gpr = dynamic_cast<const MetaParametersGPR*>(getMetaParameters()); double max_covar = meta_parameters_gpr->maximum_covariance(); VectorXd sigmas = meta_parameters_gpr->sigmas(); // Compute the gram matrix // In a gram matrix, every input point is itself a center MatrixXd centers = inputs; // Replicate sigmas, because they are the same for each data point/center MatrixXd widths = sigmas.transpose().colwise().replicate(centers.rows()); MatrixXd gram(inputs.rows(),inputs.rows()); bool normalize_activations = false; bool asymmetric_kernels = false; BasisFunction::Gaussian::activations(centers,widths,inputs,gram,normalize_activations,asymmetric_kernels); gram *= max_covar; setModelParameters(new ModelParametersGPR(inputs,targets,gram,max_covar,sigmas)); }
std::vector<std::string> recognize() { ABuffer auChan("auChan"); // waveform data ABuffer feChan("feChan"); // features ABuffer ansChan("ansChan"); // recognition output ASource ain("AIn",&auChan); // auChan connects source to coder ACode acode("ACode",&auChan,&feChan); //feChan connects coder to reco AHmms hset("HmmSet"); // create HMM set called HmmSet ADict dict("ADict"); // create a dictionary called ADict AGram gram("AGram"); // create a grammar called AGram ARMan rman; // create a resource manager rman.StoreHMMs(&hset); // store the resources in rman rman.StoreDict(&dict); rman.StoreGram(&gram); ResourceGroup *group = rman.NewGroup("directions"); group->AddHMMs(&hset); group->AddDict(&dict); group->AddGram(&gram); ARec arec("ARec",&feChan,&ansChan,&rman); // create recogniser ain.Start(); acode.Start(); arec.Start(); arec.SendMessage("start()"); ain.SendMessage("start()"); // Waiting for processing to finish do { usleep(10000); }while(!auChan.IsEmpty() || !feChan.IsEmpty()); // Get the results std::vector<std::string> result; while(!ansChan.IsEmpty()) { APacket p = ansChan.GetPacket(); if(p.GetKind() == PhrasePacket) { APhraseData *pd = (APhraseData *)p.GetData(); result.push_back(pd->word); } } ain.SendMessage("stop()"); acode.SendMessage("terminate()"); arec.SendMessage("terminate()"); ain.SendMessage("terminate()"); // Wait for threads to finish acode.Join(); arec.Join(); ain.Join(); return result; }
const GramNode* LanguageModel::gram(const vector<string> & vec){ word_id_t words[MAX_ORDER]; uint32_t order = vec.size(); for (uint32_t i = 0; i < order;i++) { words[i] = word2Idx(vec[i]); if (words[i] == (word_id_t)-1) { return NULL; } } return gram(words,order); }
//! Actually performs the computation virtual Decomposition<Scalar> _getDecomposition() const override { Decomposition<Scalar> d(this->getFSpace()); // Compute A^T X^T X A^T // where A = diag(A_1 ... A_n) and X = (X_1 ... X_n) Index size = offsets_A.back(); // Nothing to do if (size == 0) { d.mX = this->getFSpace()->newMatrix(); d.mY.resize(0,0); d.mD.resize(0,1); return d; } ScalarMatrix gram_X = this->getFSpace()->k(fMatrix); ScalarMatrix gram(size, size); for(size_t i = 0; i < combination_matrices.size(); i++) { const ScalarAltMatrix &mAi = combination_matrices[i]; for(size_t j = 0; j <= i; j++) { const ScalarAltMatrix &mAj = combination_matrices[j]; getBlock(gram, offsets_A, i, j) = (mAi.adjoint() * ((Eigen::internal::conj(alphas[i]) * alphas[j]) * getBlock(gram_X, offsets_X, i, j))) * mAj; } } // Direct EVD ScalarMatrix _mY; RealVector _mD; Eigen::SelfAdjointEigenSolver<ScalarMatrix> evd(gram.template selfadjointView<Eigen::Lower>()); kqp::ThinEVD<ScalarMatrix>::run(evd, _mY, _mD); d.mD.swap(_mD); // Y <- A * Y * D^-1/2 (use cwiseAbs to avoid problems with small negative values) ScalarMatrix _mY2 = _mY * d.mD.cwiseAbs().cwiseSqrt().cwiseInverse().asDiagonal(); _mY = _mY2; ScalarMatrix __mY(offsets_X.back(), _mY.cols()); for(size_t i = 0; i < combination_matrices.size(); i++) { const ScalarAltMatrix &mAi = combination_matrices[i]; __mY.block(offsets_X[i], 0, offsets_X[i+1]-offsets_X[i], __mY.cols()) = mAi * (alphas[i] * _mY.block(offsets_A[i], 0, offsets_A[i+1]-offsets_A[i], _mY.cols())); } d.mY.swap(__mY); d.mX = fMatrix; return d; }
int main(int argc, char *argv[]) { APacket p; try { // if (NCInitHTK("TRec.cfg",version)<SUCCESS){ if (InitHTK(argc,argv,version)<SUCCESS){ ReportErrors("Main",0); exit(-1); } printf("TRec: Basic Recogniser Test\n"); ConfParam *cParm[MAXGLOBS]; /* config parameters */ int numParm,i; char ngramFN[100],buf[100]; ngramFN[0] = '\0'; // Read configuration parms for ANGRAM to see if NGram used numParm = GetConfig("ANGRAM", TRUE, cParm, MAXGLOBS); if (numParm>0){ if (GetConfStr(cParm,numParm,"NGRAMFILE",buf)) strcpy(ngramFN,buf); } printf("TRec: HTK initialised: %s\n",ngramFN); // Create Buffers ABuffer auChan("auChan"); ABuffer feChan("feChan"); ABuffer ansChan("ansChan"); printf("TRec: Buffers initialised\n"); // create a resource manager ARMan rman; // Create Audio Source and Coder ASource ain("AIn",&auChan); ACode acode("ACode",&auChan,&feChan); ARec arec("ARec",&feChan,&ansChan,&rman,0); printf("TRec: Components initialised\n"); // create global resources AHmms hset("HmmSet"); // load info in config ADict dict("ADict"); AGram gram("AGram"); rman.StoreHMMs(&hset); rman.StoreDict(&dict); rman.StoreGram(&gram); ResourceGroup *main = rman.NewGroup("main"); main->AddHMMs(&hset); main->AddDict(&dict); main->AddGram(&gram); if (strlen(ngramFN)>0){ ANGram * ngram = new ANGram("ANGram"); rman.StoreNGram(ngram); main->AddNGram(ngram); } #ifdef WITHMON // Create Monitor and Start it AMonitor amon; amon.AddComponent(&ain); amon.AddComponent(&acode); amon.AddComponent(&arec); amon.Start(); #endif // Start components executing ain.Start(); acode.Start(); arec.Start(); arec.SendMessage("usegrp(main)"); arec.SendMessage("start()"); Boolean terminated = FALSE; while (!terminated) { APacket p = ansChan.GetPacket(); if (p.GetKind() == StringPacket){ AStringData * sd = (AStringData *)p.GetData(); if (sd->data.find("TERMINATED") != string::npos) { terminated = TRUE; } } p.Show(); } // Shutdown printf("Waiting for ain\n"); ain.Join(); printf("Waiting for acode\n"); acode.Join(); printf("Waiting for arec\n"); arec.Join(); #ifdef WITHMON printf("Waiting for monitor\n"); amon.Terminate(); HJoinMonitor(); #endif return 0; } catch (ATK_Error e){ ReportErrors("ATK",e.i); return 0;} catch (HTK_Error e){ ReportErrors("HTK",e.i); return 0;} }