コード例 #1
0
ファイル: lda_var_em.cpp プロジェクト: rpdodo/mllib
void LDA::RunEM(const Str &type, CorpusC &train, CorpusC &test, LdaModel* m) {
  LdaSuffStats ss(*m);
  if (type == "seeded") {
    ss.CorpusInitSS(train, *m);
  } else if (type == "random") {
    ss.RandomInitSS(*m);
  }

  LdaMLE(0, ss, m);
  double converged = 1;
  double likelihood_old = 0;
  for (int i = 0; i < em_max_iter_; i++) {
    VVReal gamma(train.Len());
    VVVReal phi(train.Len());
    VReal likelihood(train.Len());
    #pragma omp parallel for
    for (size_t d = 0; d < train.Len(); d++) {
      likelihood[d] = Infer(train, d, *m, &gamma[d], &phi[d]);
    }

    double likelihoods = 0;
    ss.InitSS(*m, 0);
    for (size_t d = 0; d<train.Len(); d++) {
      double gamma_sum = 0;
      for (int k = 0; k < m->num_topics; k++) {
        gamma_sum += gamma[d][k];
      }
      for (int k = 0; k < m->num_topics; k++) {
        ss.alpha_suffstats[k] += DiGamma(gamma[d][k]) - DiGamma(gamma_sum);
      }

      for (size_t n = 0; n < train.ULen(d); n++) {
        for (int k = 0; k < m->num_topics; k++) {
          ss.class_word[k][train.Word(d, n)] += train.Count(d, n) * phi[d][n][k];
          ss.class_total[k] += train.Count(d, n) * phi[d][n][k];
        }
      }
      ss.num_docs = ss.num_docs + 1;
      likelihoods += likelihood[d];
    }

    LdaMLE(estimate_alpha_, ss, m);
    converged = (likelihood_old - likelihoods) / (likelihood_old);
    if (converged < 0) {
      var_max_iter_ = var_max_iter_ * 2;
    }
    likelihood_old = likelihoods;

    if (i % 10 == 0) {
      VVReal gamma2;
      VVVReal phi2;
      LOG(INFO) << "em " << i << " perplexity:" << Infer(test, *m, &gamma2, &phi2);
    }
  }
}
コード例 #2
0
ファイル: prooftree.cpp プロジェクト: mehstruslehpy/Documents
void ProofNode::Prove()
{
    //make any inferences if possible
    Infer(*this);
    //recurse down both branches
    if (_left) _left->Prove();
    if (_right) _right->Prove();
    //print the finished graph
}
コード例 #3
0
ファイル: var_mgctm.cpp プロジェクト: rpdodo/mllib
double VarMGCTM::Infer(CorpusC &test, MGCTMC &m) {
  double sum = 0.0;
  VReal likelihoods(test.Len());
  #pragma omp parallel for
  for (size_t d = 0; d < test.Len(); d++) {
    MGVar var;
    likelihoods[d] = Infer(test.docs[d], m, &var);
  }
  for (size_t d = 0; d < test.Len(); d++) {
    sum += likelihoods[d];
  }
  return exp(- sum / test.TWordsNum());
}
コード例 #4
0
ファイル: rtm_var_em.cpp プロジェクト: lijiankou/mllib-1
/*****
Infer and compute suffstats, the motivation of infer is computing suffstats
phi: topic * doc_len
update z_bar
*****/
void VarRTM::EStep(CorpusC &cor, RTMC &m, RTMSuffStats* ss) const {
  RTMVar var;
  Infer(cor, m, &var);
  ss->z_bar.resize(m.TopicNum(), cor.Len());
  for (size_t d = 0; d < cor.Len(); d++) {
    for (size_t n = 0; n < cor.ULen(d); n++) {
      for (int k = 0; k < m.TopicNum(); k++) {
        ss->topic(k, cor.Word(d, n)) += cor.Count(d, n) * var.phi[d](k, n);
        ss->topic_sum[k] += cor.Count(d, n) * var.phi[d](k, n);
      }
    }
    ss->z_bar.col(d) = var.z_bar.col(d);
  }
}
コード例 #5
0
ファイル: var_mgctm.cpp プロジェクト: rpdodo/mllib
void VarMGCTM::RunEM(CorpusC &test, MGCTM* m) {
  MGSS ss;
  ss.CorpusInit(cor_, *m);
  MStep(ss, m);
  LOG(INFO) << m->pi.transpose();
  for (int i = 0; i < converged_.em_max_iter_; i++) {
    std::vector<MGVar> vars(cor_.Len());
    VReal likelihoods(cor_.Len());
    #pragma omp parallel for
    for (size_t d = 0; d < cor_.Len(); d++) {
      likelihoods[d] = Infer(cor_.docs[d], *m, &vars[d]);
    }

    double likelihood = 0;
    VStr etas(cor_.Len());
    ss.SetZero(m->GTopicNum(), m->LTopicNum1(), m->LTopicNum2(), m->TermNum());
    for (size_t d = 0; d < cor_.Len(); d++) {
      DocC &doc = cor_.docs[d];
      for (size_t n = 0; n < doc.ULen(); n++) {
        for (int k = 0; k < m->GTopicNum(); k++) {
          ss.g_topic(k, doc.Word(n)) += doc.Count(n)*vars[d].g_z(k, n)*
                                     (1 - vars[d].delta[n]);
          ss.g_topic_sum[k] += doc.Count(n)*vars[d].g_z(k, n)*(1 - vars[d].delta[n]);
        }
      }
      for (int j = 0; j < m->LTopicNum1(); j++) {
        for (size_t n = 0; n < doc.ULen(); n++) {
          for (int k = 0; k < m->LTopicNum2(); k++) {
            ss.l_topic[j](k, doc.Word(n)) += doc.Count(n)*vars[d].l_z[j](k, n)
                                *vars[d].delta[n]*vars[d].eta[j];
            ss.l_topic_sum(k, j) += doc.Count(n)*vars[d].l_z[j](k, n) *
                                  vars[d].delta[n] * vars[d].eta[j];
          }
        }
      }
      for (int j = 0; j < m->LTopicNum1(); j++) {
        ss.pi[j] += vars[d].eta[j];
      }

      etas[d] = EVecToStr(vars[d].eta);
      likelihood += likelihoods[d];
    }
    MStep(ss, m);
    LOG(INFO) << m->pi.transpose();
    OutputFile(*m, Join(etas,"\n"), i);
//    LOG(INFO) <<"perplexity: " <<Infer(test,*m);
  }
}
コード例 #6
0
ファイル: lda_var_em.cpp プロジェクト: rpdodo/mllib
double LDA::Infer(CorpusC &cor, const LdaModel &m,VVReal* ga, VVVReal* phi) const {
  ga->resize(cor.Len());
  phi->resize(cor.Len());
  VReal likelihood(cor.Len());
  
  #pragma omp parallel for
  for (size_t d = 0; d < cor.Len(); d++) {
    likelihood[d] = Infer(cor, d, m, &(ga->at(d)), &(phi->at(d)));
  }

  double sum = 0.0;
  for (size_t d = 0; d < cor.Len(); d++) {
    sum += likelihood[d];
  }
  return exp(- sum / cor.TWordsNum());
}
コード例 #7
0
ファイル: ialias.c プロジェクト: NoSuchProcess/OrangeC
static void HandleAdd(QUAD *head)
{
    if ((head->ans->size == ISZ_ADDR) && (head->temps & TEMP_ANS))
    {
        if (head->dc.opcode == i_add && head->dc.left->mode == i_immed)
        {
            if (head->temps & TEMP_RIGHT)
            {
                if (isintconst(head->dc.left->offset))
                {
                    // C + R
                    ALIASLIST *scan = tempInfo[head->dc.right->offset->v.sp->value.i]->pointsto;
                    ALIASLIST *result = NULL;
                    BOOLEAN xchanged = changed;
                    while (scan)
                    {
                        ALIASADDRESS *addr = LookupAddress(scan->address->name, scan->address->offset + head->dc.left->offset->v.i);
                        ALIASLIST *al = aAlloc(sizeof(ALIASLIST));
                        al->address = addr;
                        AliasUnion(&result, al);
                        scan = scan->next;
                    }
                    changed = xchanged;
                    AliasUnion(&tempInfo[head->ans->offset->v.sp->value.i]->pointsto, result);
                }
                else 
                {
                    // p + R
                    if (head->dc.left->offset->type != en_labcon) // needed for exports
                    {
                        ALIASNAME *nm = LookupMem(head->dc.left->offset->v.sp->imvalue);
                        ALIASADDRESS *aa = LookupAddress(nm, 0);
                        ALIASLIST *al = aAlloc(sizeof(ALIASLIST));
                        al->address = aa;
                        Infer(head->ans, head->dc.right, al);
                    }
                }
            }
            else if (head->dc.right->mode == i_immed)
            {
                if (!isintconst(head->dc.left->offset) && head->dc.left->offset->type != en_labcon)
                {
                    // p + C
                    ALIASNAME *nm = LookupMem(head->dc.left->offset->v.sp->imvalue);
                    ALIASADDRESS *aa = LookupAddress(nm, head->dc.right->offset->v.i);
                    ALIASLIST *al = aAlloc(sizeof(ALIASLIST));
                    al->address = aa;
                    AliasUnion(&tempInfo[head->ans->offset->v.sp->value.i]->pointsto,al);
                }
                else if (!isintconst(head->dc.right->offset) && head->dc.right->offset->type != en_labcon)
                {
                    // C + p
                    ALIASNAME *nm = LookupMem(head->dc.right->offset->v.sp->imvalue);
                    ALIASADDRESS *aa = LookupAddress(nm, head->dc.left->offset->v.i);
                    ALIASLIST *al = aAlloc(sizeof(ALIASLIST));
                    al->address = aa;
                    AliasUnion(&tempInfo[head->ans->offset->v.sp->value.i]->pointsto, al);
                }
            }
        }
        else if (head->dc.right->mode == i_immed)
        {

            if (head->temps & TEMP_LEFT)
            {
                if (isintconst(head->dc.right->offset))
                {
                    // R+C
                    int c = head->dc.opcode == i_add ? head->dc.right->offset->v.i : -head->dc.right->offset->v.i;
                    ALIASLIST *scan = tempInfo[head->dc.left->offset->v.sp->value.i]->pointsto;
                    ALIASLIST *result = NULL;
                    BOOLEAN xchanged = changed;
                    while (scan)
                    {
                        ALIASADDRESS *addr = LookupAddress(scan->address->name, scan->address->offset + c);
                        ALIASLIST *al = aAlloc(sizeof(ALIASLIST));
                        al->address = addr;
                        AliasUnion(&result, al);
                        scan = scan->next;
                    }
                    changed = xchanged;
                    AliasUnion(&tempInfo[head->ans->offset->v.sp->value.i]->pointsto, result);
                }
                else
                {
                    // R + p
                    if (head->dc.right->offset->type != en_labcon) // needed for exports
                    {
                        ALIASNAME *nm = LookupMem(head->dc.right->offset->v.sp->imvalue);
                        ALIASADDRESS *aa = LookupAddress(nm, 0);
                        ALIASLIST *al = aAlloc(sizeof(ALIASLIST));
                        al->address = aa;
                        Infer(head->ans, head->dc.left, al);
                    }
                }
            }
        }
        else if ((head ->temps & (TEMP_LEFT | TEMP_RIGHT)) == (TEMP_LEFT | TEMP_RIGHT))
        {
            // R+R
            ALIASLIST *src;
            IMODE *one = head->dc.left;
            IMODE *two = head->dc.right;
            if (two->size == ISZ_ADDR)
            {
                IMODE *three = one;
                one = two;
                two = three;
            }
            if (one->size == ISZ_ADDR)
            {
                // now one has the pointer, two has something else
                src = tempInfo[one->offset->v.sp->value.i]->pointsto;
                Infer(head->ans, two, src);
            }
        }
    }
}