Esempio n. 1
0
void LanguageModel::load_finish(){
    size_t used_gram_num_ = 0;
    for (uint32_t i = 0; i < order; i++) {
        assert(used_ngram_num_per_order[i] == ngram_num_per_order[i]);
        used_gram_num_ += used_ngram_num_per_order[i];
    }
    assert(used_gram_num == used_gram_num_);
    assert(vocab_size == used_ngram_num_per_order[0]);

    auto bos_ = word2id.find(bos);
    if(bos_ != word2id.end()){
      //该语言模型中包含bos,那么将bos的概率设为eos的
      word_id_t word_buff [1];
      word_buff[0] = bos_->second;
      const GramNode* bos_gram = gram(word_buff,1);
      assert(bos_gram != NULL);

      bos_gram_id = bos_gram->gram_id;
      auto eos_ = word2id.find(eos);
      assert(eos_ != word2id.end());

      word_buff[0] = eos_->second;
      const GramNode* eos_gram = gram(word_buff, 1);
      assert(eos_gram != NULL);

      prob_buff[bos_gram->gram_id] = prob_buff[eos_gram->gram_id];
    }

}
Esempio n. 2
0
//gram[context_len]是P(w|h)中的w
prob_t LanguageModel::wordProbBO(word_id_t *word_ids, uint32_t context_len) {
    //返回P(w|h')的log值
    //srilm的计算方法和sunpinyin的不同。
    //srilm不需要递归计算
    //srilm首先计算最长的P(w|h')。然后加上bow值
    const GramNode *n = gram(word_ids, context_len + 1);
    if (n && prob_buff[n->gram_id] < 2.0) {
        return prob_buff[n->gram_id];
    }
    assert(context_len != 0 && "Unigram must in Language model");
    //查找最长的在语言模型中的h'w的suffix
    prob_t prob = 3.0; //用大于0的数代表没有找到。
    uint32_t i = 0; //i+1是去掉的head的数目。也是需要补的bow的数目
    for (; i < context_len; i++) {
        n = gram(word_ids + i + 1, context_len - i);
        if (n) {
            prob = prob_buff[n->gram_id];
            if (prob < 2.0) {
                break;
            }
        }
    }
    assert(prob < 2.0 && "prob must be found");

    for (uint32_t j = 0; j <= i; j++) {
        n = gram(word_ids + j, context_len - j);
        assert(n != NULL && "bow must be found");
        prob += bow_buff[n->gram_id];
    }
    return prob;
}
Esempio n. 3
0
void FunctionApproximatorGPR::train(const MatrixXd& inputs, const MatrixXd& targets)
{
  if (isTrained())  
  {
    cerr << "WARNING: You may not call FunctionApproximatorGPR::train more than once. Doing nothing." << endl;
    cerr << "   (if you really want to retrain, call reTrain function instead)" << endl;
    return;
  }
  
  assert(inputs.rows() == targets.rows());
  assert(inputs.cols()==getExpectedInputDim());

  const MetaParametersGPR* meta_parameters_gpr = 
    dynamic_cast<const MetaParametersGPR*>(getMetaParameters());
  
  double max_covar = meta_parameters_gpr->maximum_covariance();
  VectorXd sigmas = meta_parameters_gpr->sigmas();
  
  
  // Compute the gram matrix
  // In a gram matrix, every input point is itself a center
  MatrixXd centers = inputs;
  // Replicate sigmas, because they are the same for each data point/center
  MatrixXd widths = sigmas.transpose().colwise().replicate(centers.rows()); 

  MatrixXd gram(inputs.rows(),inputs.rows());
  bool normalize_activations = false;
  bool asymmetric_kernels = false;
  BasisFunction::Gaussian::activations(centers,widths,inputs,gram,normalize_activations,asymmetric_kernels);
  
  gram *= max_covar;

  setModelParameters(new ModelParametersGPR(inputs,targets,gram,max_covar,sigmas));
  
}
Esempio n. 4
0
std::vector<std::string> recognize() {

	ABuffer auChan("auChan");		// waveform data 
	ABuffer feChan("feChan");		// features 
	ABuffer ansChan("ansChan");		// recognition output 

	ASource ain("AIn",&auChan);			// auChan connects source to coder 
	ACode acode("ACode",&auChan,&feChan); //feChan connects coder to reco 
	
	AHmms hset("HmmSet");	// create HMM set called HmmSet 
	ADict dict("ADict");	// create a dictionary called ADict 
	AGram gram("AGram");	// create a grammar called AGram 
	ARMan rman;				// create a resource manager 
	rman.StoreHMMs(&hset);	// store the resources in rman 
	rman.StoreDict(&dict); 
	rman.StoreGram(&gram); 
	
	ResourceGroup *group = rman.NewGroup("directions"); 
	group->AddHMMs(&hset); 
	group->AddDict(&dict); 
	group->AddGram(&gram); 
	ARec arec("ARec",&feChan,&ansChan,&rman); // create recogniser 
	
	ain.Start(); acode.Start(); arec.Start();
	
	arec.SendMessage("start()");
	ain.SendMessage("start()");

	// Waiting for processing to finish
	do
	{
		usleep(10000);
	}while(!auChan.IsEmpty() || !feChan.IsEmpty());
	
	// Get the results
	std::vector<std::string> result;
	while(!ansChan.IsEmpty())
	{
		APacket p = ansChan.GetPacket();
		if(p.GetKind() == PhrasePacket)
		{
			APhraseData *pd = (APhraseData *)p.GetData();
			result.push_back(pd->word);
		}
	}
	
	ain.SendMessage("stop()");
	acode.SendMessage("terminate()");
	arec.SendMessage("terminate()");
	ain.SendMessage("terminate()");
	
	// Wait for threads to finish
	acode.Join();
	arec.Join();
	ain.Join();
	
	return result;
}
Esempio n. 5
0
const GramNode* LanguageModel::gram(const vector<string> & vec){
    word_id_t words[MAX_ORDER];
    uint32_t order = vec.size();
    for (uint32_t i = 0; i < order;i++) {
        words[i] = word2Idx(vec[i]);
        if (words[i] == (word_id_t)-1) {
            return NULL;
        }
    }
    return gram(words,order);
}
Esempio n. 6
0
 //! Actually performs the computation
 virtual Decomposition<Scalar> _getDecomposition() const override {
     Decomposition<Scalar> d(this->getFSpace());
     // Compute A^T X^T X A^T 
     // where A = diag(A_1 ... A_n) and X = (X_1 ... X_n)
     
     Index size = offsets_A.back();
     
     // Nothing to do
     if (size == 0) {
         d.mX = this->getFSpace()->newMatrix();
         d.mY.resize(0,0);
         d.mD.resize(0,1);
         return d;
     }
     
     ScalarMatrix gram_X = this->getFSpace()->k(fMatrix);
     ScalarMatrix gram(size, size);
     
     for(size_t i = 0; i < combination_matrices.size(); i++) {
         const ScalarAltMatrix &mAi = combination_matrices[i];
         for(size_t j = 0; j <= i; j++) {
             const ScalarAltMatrix &mAj = combination_matrices[j];
             getBlock(gram, offsets_A, i, j) 
                     =  (mAi.adjoint() *  ((Eigen::internal::conj(alphas[i]) * alphas[j]) * getBlock(gram_X, offsets_X, i, j))) * mAj;
         }
     }
     
     
     // Direct EVD
     ScalarMatrix _mY;
     RealVector _mD;
     Eigen::SelfAdjointEigenSolver<ScalarMatrix> evd(gram.template selfadjointView<Eigen::Lower>());
     kqp::ThinEVD<ScalarMatrix>::run(evd, _mY, _mD);
     d.mD.swap(_mD);
     
     // Y <- A * Y * D^-1/2 (use cwiseAbs to avoid problems with small negative values)
     ScalarMatrix _mY2 = _mY * d.mD.cwiseAbs().cwiseSqrt().cwiseInverse().asDiagonal();
     _mY = _mY2;
     ScalarMatrix __mY(offsets_X.back(), _mY.cols());
     
     for(size_t i = 0; i < combination_matrices.size(); i++) {
         const ScalarAltMatrix &mAi = combination_matrices[i];
         __mY.block(offsets_X[i], 0, offsets_X[i+1]-offsets_X[i], __mY.cols()) = mAi * (alphas[i] * _mY.block(offsets_A[i], 0,  offsets_A[i+1]-offsets_A[i], _mY.cols()));
     }
     
     d.mY.swap(__mY);
     d.mX = fMatrix;
     return d;
 }
Esempio n. 7
0
int main(int argc, char *argv[])
{
   APacket p;

   try {

      // if (NCInitHTK("TRec.cfg",version)<SUCCESS){
      if (InitHTK(argc,argv,version)<SUCCESS){
         ReportErrors("Main",0); exit(-1);
      }
      printf("TRec: Basic Recogniser Test\n");
      ConfParam *cParm[MAXGLOBS];       /* config parameters */
      int numParm,i;
      char ngramFN[100],buf[100];
      ngramFN[0] = '\0';
      // Read configuration parms for ANGRAM to see if NGram used
      numParm = GetConfig("ANGRAM", TRUE, cParm, MAXGLOBS);
      if (numParm>0){
         if (GetConfStr(cParm,numParm,"NGRAMFILE",buf)) strcpy(ngramFN,buf);
      }
      printf("TRec: HTK initialised: %s\n",ngramFN);
      // Create Buffers
      ABuffer auChan("auChan");
      ABuffer feChan("feChan");
      ABuffer ansChan("ansChan");
      printf("TRec: Buffers initialised\n");
      // create a resource manager
      ARMan rman;

      // Create Audio Source and Coder
      ASource ain("AIn",&auChan);
      ACode acode("ACode",&auChan,&feChan);
      ARec  arec("ARec",&feChan,&ansChan,&rman,0);
      printf("TRec: Components initialised\n");

      // create global resources
      AHmms hset("HmmSet"); // load info in config
      ADict dict("ADict");
      AGram gram("AGram");
      rman.StoreHMMs(&hset);
      rman.StoreDict(&dict);
      rman.StoreGram(&gram);

      ResourceGroup *main = rman.NewGroup("main");
      main->AddHMMs(&hset);
      main->AddDict(&dict);
      main->AddGram(&gram);

      if (strlen(ngramFN)>0){
         ANGram * ngram  = new ANGram("ANGram");
         rman.StoreNGram(ngram);
         main->AddNGram(ngram);
      }

#ifdef WITHMON
      // Create Monitor and Start it
      AMonitor amon;
      amon.AddComponent(&ain);
      amon.AddComponent(&acode);
      amon.AddComponent(&arec);
      amon.Start();
#endif

      // Start components executing
      ain.Start();
      acode.Start();
      arec.Start();
      arec.SendMessage("usegrp(main)");
      arec.SendMessage("start()");

      Boolean terminated = FALSE;
      while (!terminated) {
         APacket p = ansChan.GetPacket();
         if (p.GetKind() == StringPacket){
            AStringData * sd = (AStringData *)p.GetData();
            if (sd->data.find("TERMINATED") != string::npos) {
               terminated = TRUE;
            }
         }
         p.Show();
      }
      // Shutdown
      printf("Waiting for ain\n");
      ain.Join();
      printf("Waiting for acode\n");
      acode.Join();
      printf("Waiting for arec\n");
      arec.Join();
#ifdef WITHMON
      printf("Waiting for monitor\n");
      amon.Terminate();
      HJoinMonitor();
#endif
      return 0;
   }
   catch (ATK_Error e){ ReportErrors("ATK",e.i); return 0;}
   catch (HTK_Error e){ ReportErrors("HTK",e.i); return 0;}
}