Example #1
0
double likemerge_DIRalpha(k1,k2) {
  /*
   *   Dirichlet for topics
   */
  int i,t;
  double likelihood = 0;
  for (i=0; i<ddN.DT; i++) {
    if ( ddS.Ndt[i][k2]>0 ) {
      likelihood +=
	gammadiff((int)ddS.Ndt[i][k1]+ddS.Ndt[i][k2], ddP.alphapr[k1], 0.0)
	- gammadiff((int)ddS.Ndt[i][k1], ddP.alphapr[k1], 0.0)
	- gammadiff((int)ddS.Ndt[i][k2], ddP.alphapr[k2], 0.0);
    }
  }
  yap_infinite(likelihood);
  return likelihood;
}
Example #2
0
//   likelihood_DIRbeta() 
static double likemerge_DIRbeta(int k1, int k2) {
  int j;
  double val = 0;
  for (j=0; j<ddN.W; j++) {
    if ( ddS.Nwt[j][k2]>0 ) {
      assert(ddP.betapr[j]>0);
      val += (gammadiff((int)ddS.Nwt[j][k1]+ddS.Nwt[j][k2], ddP.betapr[j], 0.0)
	      - gammadiff((int)ddS.Nwt[j][k1], ddP.betapr[j], 0.0)
	      - gammadiff((int)ddS.Nwt[j][k2], ddP.betapr[j], 0.0));
    } 
  }     
  val -= (gammadiff((int)ddS.NWt[k1]+ddS.NWt[k2], ddP.betatot, 0.0)
	  - gammadiff((int)ddS.NWt[k1], ddP.betatot, 0.0)
	  - gammadiff((int)ddS.NWt[k2], ddP.betatot, 0.0));
  yap_infinite(val);
  return val;
}
Example #3
0
double likemerge_DIRbeta(k1,k2) {
  int j,t;
  double likelihood = 0;
  double val = 0;
  for (j=0; j<ddN.W; j++) {
    if ( ddS.Nwt[j][k2]>0 ) {
      assert(ddP.betapr[j]>0);
      val += gammadiff((int)ddS.Nwt[j][k1]+ddS.Nwt[j][k2], ddP.betapr[j], 0.0)
	- gammadiff((int)ddS.Nwt[j][k1], ddP.betapr[j], 0.0)
	- gammadiff((int)ddS.Nwt[j][k2], ddP.betapr[j], 0.0);
    } 
  }     
  val -= gammadiff((int)ddS.NWt[k1]+ddS.NWt[k2], ddP.betatot, 0.0)
    - gammadiff((int)ddS.NWt[k1], ddP.betatot, 0.0)
    - gammadiff((int)ddS.NWt[k2], ddP.betatot, 0.0);
  likelihood += val;
  yap_infinite(likelihood);
  return likelihood;
}
Example #4
0
double likelihood_PYbeta() {
  int i,t;
  double likelihood = 0;
  double lbw = log(ddP.bwpar);
  double law = log(ddP.awpar);
  likelihood += pctl_gammaprior(ddP.bwpar);
  /*
   *    term for k-th node
   */
#ifdef BWPAR0
  for (t=1; t<ddN.T; t++) {
#else
  for (t=0; t<ddN.T; t++) {
#endif
    uint32_t Tw_ = 0;
    for (i=0; i<ddN.W; i++) {
      int tt = ddS.Twt[i][t];
      int nn = ddS.Nwt[i][t];
      if ( nn>0 ) {
        Tw_ += tt;
	likelihood += S_S(ddC.SY,nn,tt);
#if 1
	if ( !finite(likelihood) || isinf(likelihood) || isnan(likelihood)  ) 
	  yap_quit("Like=%lf:  Nwt[%d][%d]=%d  Twt[i][t]=%d S.M=%d S.N=%d\n",
		   likelihood,
		   i, t, (int)ddS.Nwt[i][t],(int)ddS.Twt[i][t],ddC.SY->usedM, ddC.SY->usedN);
#endif
      }
    }
    yap_infinite(likelihood);   
    if ( ddP.awpar==0 ) {
      likelihood += Tw_*lbw;
    } else {
#ifdef L_CACHE
      likelihood += Tw_*law + gcache_value(&ddC.lgbaw, (int)Tw_);
#else
      likelihood += Tw_*law + gammadiff((int)Tw_, ddP.bwpar/ddP.awpar, 0.0);
#endif
    }
#ifdef L_CACHE
    likelihood -= gcache_value(&ddC.lgbw, (int)ddS.NWt[t]);
#else
    likelihood -= gammadiff((int)ddS.NWt[t], ddP.bwpar, 0.0);
#endif
    yap_infinite(likelihood);   
  }  
  yap_infinite(likelihood);   
  return likelihood;
}

double likelihood_PYbeta_PDP() {
  /*
   *    the constant prior
   */
  int j;
  double likelihood = 0;
  for (j=0; j<ddN.W; j++) {
    if ( ddS.TwT[j]>0 ) {
      likelihood += ddS.TwT[j]*log(ddP.betapr[j]);
    }
  }      
  //yap_infinite(likelihood);
  return likelihood;
}
Example #5
0
/********************************
 *   code for LDA 
 *****************************/
double gibbs_lda(/*
      *  fix==GibbsNone for standard ML training/testing
      *  fix==GibbsHold for word hold-out testing,
                  *       same as GibbsNone but also handles
      *       train and test words differently
      */
     enum GibbsType fix,
     int did,    //  document index
     int words,  //  do this many
     float *p,    //  temp store
     D_MiSi_t *dD
     ) {
  int i, wid, t, mi=0;
  int e;
  double Z, tot;
  double logdoc = 0;
  int logdocinf = 0;
  int StartWord = ddD.N_dTcum[did];
  int EndWord = StartWord + words;
  float dtip[ddN.T];
#ifdef MH_STEP
  double doc_side_cache[ddN.T];
  for (t=0; t<ddN.T; t++) 
    doc_side_cache[t] = doc_side_fact(did,t);
#endif

  /*
   *   some of the latent variables are not sampled
   *   are kept in the testing version, uses enum GibbsType
   *      fix = global document setting
   *      fix_doc = settings for word in this doc
   *
   *   NB.   if fix==GibbsNone, then fix_doc==fix
   *         if fix==GibbsHold then fix_doc==GibbsHold or GibbsNone
   */
  enum GibbsType fix_doc = fix;

  if ( PCTL_BURSTY() ) {
    mi = ddM.MI[did];
  }
  e = ddD.e[did];

  for (i=StartWord; i<EndWord; i++) {
#ifdef MH_STEP
    int oldt;
#endif
    if ( fix==GibbsHold ) {
      if ( pctl_hold(i) )
	fix_doc = GibbsHold;  //   this word is a hold out
      else
	fix_doc = GibbsNone;
    }
    // check_m_vte(e);
    wid=ddD.w[i]; 
    /*******************
     *   first we remove affects of this word on the stats
     *******************/
#ifdef MH_STEP
    oldt = 
#endif
      t = Z_t(ddS.z[i]); 
    if ( fix_doc!=GibbsHold ) {
      if ( remove_topic(i, did, (!PCTL_BURSTY()||Z_issetr(ddS.z[i]))?wid:-1, 
                        t, mi, dD) ) {
	goto endword;
      }
    }
    /***********************
     *    get topic probabilities
     ***********************/
    // check_m_vte(e);
#ifdef MU_CACHE
    mu_side_fact_update(e);
#endif
#ifdef PHI_CACHE
    phi_norm_update(wid, e);
    phi_sum_update(wid, e, i);
#endif
    for (t=0, Z=0, tot=0; t<ddN.T; t++) {
#ifdef MH_STEP
      int saveback = ddP.back;
      if ( fix_doc!=GibbsHold )
        ddP.back = 0;
#endif
      /*
       *   (fix_doc==GibbsHold) =>
       *       doing estimation, not sampling so use prob versions
       *    else
       *        doing sampling so use fact versions
       */
#ifdef MH_STEP
      double tf = (fix_doc==GibbsHold)?doc_side_prob(did,t):
        doc_side_cache[t];
      if ( tf>0 ) {
        double wf = (fix_doc==GibbsHold)?word_side_prob(e, wid, t):
          word_side_fact(e, wid, t);
#else
      double tf = (fix_doc==GibbsHold)?doc_side_prob(did,t):
        doc_side_fact(did,t);
      if ( tf>0 ) {
        double wf = (fix_doc==GibbsHold)?word_side_prob(e, wid, t):
          word_side_fact(e, wid, t);
#endif
        tot += tf;
        if ( PCTL_BURSTY() ) 
          wf = (fix_doc==GibbsHold)?docprob(dD, t, i, mi, wf):
            docfact(dD, t, i, mi, wf, &dtip[t]);
        Z += p[t] = tf * wf;
      } else
        p[t] = 0;
#ifdef MH_STEP
      ddP.back = saveback;
#endif
    }
    if ( fix!=GibbsHold || fix_doc==GibbsHold )
      logdoc += log(Z/tot);
    if ( logdocinf==0 ) 
      if ( !finite(logdoc) ) {
	logdocinf++;
	yap_infinite(logdoc);
      }

    /*******************
     *   now sample t using p[] and install affects of this on the stats;
     *   but note this needs indicator to be set!
     *******************/
    if ( fix_doc!=GibbsHold ) {
      /*
       *  sample and update core stats 
       */
      t = samplet(p, Z, ddN.T, rng_unit(rngp));
#ifdef MH_STEP
      if ( t != oldt ) {
        double ratio  = p[oldt]/p[t];
        if ( PCTL_BURSTY() ) {
          ratio *= docfact(dD, t, i, mi, word_side_fact(e, wid, t), &dtip[t])
            * doc_side_fact(did,t);
          ratio /= docfact(dD, oldt, i, mi, word_side_fact(e, wid, oldt), &dtip[oldt])
            * doc_side_fact(did,oldt);
        } else {
          ratio *= word_side_fact(e, wid, t) * doc_side_fact(did, t);
          ratio /= word_side_fact(e, wid, oldt) * doc_side_fact(did, oldt);
        }
        if ( ratio<1 && ratio<rng_unit(rngp) )
          t = oldt;
      }
#endif
      Z_sett(ddS.z[i],t);
#ifdef TRACE_WT
      if ( wid==TR_W && t==TR_T )
        yap_message("update_topic(w=%d,t=%d,d=%d,l=%d,z=%d,N=%d,T=%d)\n",
                    wid,t,did,i,ddS.z[i],
                    (int)ddS.m_vte[wid][t][e],(int)ddS.s_vte[wid][t][e]);
#endif
      update_topic(i, did, wid, t, mi, dtip[t], dD);
#ifdef TRACE_WT
      if ( wid==TR_W && t==TR_T )
        yap_message("after update_topic(w=%d,t=%d,d=%d,l=%d,z=%d,N=%d,T=%d)\n",
                    wid,t,did,i,ddS.z[i],
                    (int)ddS.m_vte[wid][t][e],(int)ddS.s_vte[wid][t][e]);
#endif
    }
    endword:
    if ( PCTL_BURSTY() && M_multi(i) ) {
      mi++;
    }
  }
  return logdoc;
}
Example #6
0
/*
 *  compute likelihood ratio difference based on *M
 */
static double merge_like_Tdt_sum(int k1, int k2, merge_alpha_t *M) {
  double *val;
  int d;
  val = dvec(ddN.DT);

  for (d=0; d<ddN.DT; d++) {

}
#endif

static double merge_like_Tdt(int k1, int k2, merge_alpha_t *M) {
  int d;
  double la = 0;
  double lb = log(ddP.bpar);
  int TD_diff = 0;
  double likelihood = 0;
  if ( ddP.apar>0 ) la = log(ddP.apar);
  for (d=0; d<ddN.DT; d++) {
    int Td_diff;  /*  total change in T for doc */
    if ( M->Ndt[d]>1 ) {
      likelihood -= S_S(ddC.SX,ddS.Ndt[d][k2],ddS.Tdt[d][k2]);
      likelihood -= S_S(ddC.SX,ddS.Ndt[d][k1],ddS.Tdt[d][k1]);
      likelihood += S_S(ddC.SX,M->Ndt[d],M->Tdt[d]);
      assert(M->Tdt[d]>=1);
      assert(M->Tdt[d]<=M->Ndt[d]);
      assert(ddS.Ndt[d][k2]==0 || ddS.Tdt[d][k2]>0);
      assert(ddS.Ndt[d][k1]==0 || ddS.Tdt[d][k1]>0);
    }
    yap_infinite(likelihood);
    TD_diff += Td_diff = (M->Tdt[d]-ddS.Tdt[d][k2]-ddS.Tdt[d][k1]);
    if ( Td_diff==0 )
      continue;
    if ( ddP.apar==0 ) {
      likelihood += Td_diff*lb;
    } else {
      likelihood += Td_diff*la;
      if ( Td_diff<0 ) 
	likelihood -= 
	  gammadiff(-Td_diff,M->TdT[d]+ddP.bpar/ddP.apar, 0.0);
      else
	likelihood += 
	  gammadiff(Td_diff, M->TdT[d]-Td_diff+ddP.bpar/ddP.apar, 0.0);
    }
    yap_infinite(likelihood);
  }      
  if ( ddP.PYalpha==H_PDP ) {
    likelihood += (M->TDt-ddS.TDt[k1])*log(ddP.alphapr[k1])
      - ddS.TDt[k2]*log(ddP.alphapr[k2]);
  } else if ( ddP.PYalpha==H_HDP ) {
    likelihood += lgamma(M->TDTm+M->TDt-TD_diff+ddP.b0) 
      - lgamma(M->TDTm+M->TDt+ddP.b0);
    likelihood -= gammadiff(ddS.TDt[k1], ddP.b0*ddP.alphapr[k1], 0.0);
    likelihood -= gammadiff(ddS.TDt[k2], ddP.b0*ddP.alphapr[k2], 0.0);
    likelihood += gammadiff(M->TDt, ddP.b0*ddP.alphapr[k1], 0.0);
  } else {
    double lga0 = lgamma(1-ddP.a0);
    likelihood += lgamma(M->TDTm+M->TDt-TD_diff+ddP.b0) 
      - lgamma(M->TDTm+M->TDt+ddP.b0);
    /*   because k2 gone to zero, so one less topic */
    likelihood -= log(ddP.b0+ddP.a0*(ddS.TDTnz-1));
    if ( ddS.TDt[k2]>1 )
      likelihood -= lgamma(ddS.TDt[k2]-ddP.a0) - lga0;
    if ( ddS.TDt[k1]>1 )
      likelihood -= lgamma(ddS.TDt[k1]-ddP.a0) - lga0;
    likelihood += lgamma(M->TDt-ddP.a0) - lga0;
  }
  yap_infinite(likelihood);
  return likelihood;
}
Example #7
0
/*
 *  compute likelihood ratio difference based on *M
 */
static double merge_like_Twt(int k1, int k2, merge_beta_t *M) {
  int i, w;
  double likelihood = 0;
#ifndef BWPAR0
  double lbw = log(ddP.bwpar);
#endif
  double law = log(ddP.awpar);
  double TW_diff = 0;
#ifdef BWPAR0
	yap_quit("BWPAR0 unimpleented in merge\n");
#endif
  for (i=0; i<ddN.W; i++) {
    likelihood -= S_S(ddC.SY,ddS.Nwt[i][k1],ddS.Twt[i][k2]);
    likelihood -= S_S(ddC.SY,ddS.Nwt[i][k1],ddS.Twt[i][k2]);
    likelihood += S_S(ddC.SY,M->Nwt[i],M->Twt[i]);
  }
  if ( ddP.awpar==0 ) {
#ifdef BWPAR0
    likelihood += M->TWt-ddS.TWt[k1]*log(ddP_bwpar(k1))
	-ddS.TWt[k2]*log(ddP_bwpar(k2));
#else
    likelihood += (M->TWt-ddS.TWt[k1]-ddS.TWt[k2])*lbw;
#endif
  } else {
    likelihood += (M->TWt-ddS.TWt[k1]-ddS.TWt[k2])*law 
      + gammadiff((int)M->TWt, ddP.bwpar/ddP.awpar, 0.0)
      - gammadiff((int)ddS.TWt[k1], ddP_bwpar(k1)/ddP.awpar, 0.0)
      - gammadiff((int)ddS.TWt[k2], ddP_bwpar(k2)/ddP.awpar, 0.0);
  }
  likelihood += gammadiff((int)ddS.NWt[k1], ddP_bwpar(k1), 0.0);
  likelihood += gammadiff((int)ddS.NWt[k2], ddP_bwpar(k2), 0.0);
  likelihood -= gammadiff((int)M->NWt, ddP.bwpar, 0.0);
  yap_infinite(likelihood);
  if ( ddP.PYbeta==H_PDP ) {
    for (w=0; w<ddN.W; w++) {
      if ( ddS.TwT[w]>0 ) {
	// ???????????????
        likelihood += ddS.TwT[w]*log(ddP.betapr[w]);
      }
    }      
  } else if ( ddP.PYbeta==H_HDP ) {
    yap_quit("merge with PYbeta unimplemented\n");
    likelihood += lgamma(M->TWTm+M->TWt-TW_diff+ddP.bw0) 
      - lgamma(M->TWTm+M->TWt+ddP.bw0);
    for (w=0; w<ddN.W; w++) {
      // ???????????
      likelihood -= gammadiff(ddS.TWt[k1], ddP.bw0*ddP.betapr[k1], 0.0);
      likelihood -= gammadiff(ddS.TWt[k2], ddP.bw0*ddP.betapr[k2], 0.0);
      likelihood += gammadiff(M->TWt, ddP.bw0*ddP.betapr[k1], 0.0);
    }
  } else {
    double lgaw0 = lgamma(1-ddP.aw0);
    likelihood += lgamma(M->TWTm+M->TWt-TW_diff+ddP.bw0) 
      - lgamma(M->TWTm+M->TWt+ddP.bw0);
    /*   because k2 gone to zero, so one less topic */
    likelihood -= log(ddP.bw0+ddP.aw0*(ddS.TWTnz-1));
    if ( ddS.TWt[k2]>1 )
      likelihood -= lgamma(ddS.TWt[k2]-ddP.aw0) - lgaw0;
    if ( ddS.TWt[k1]>1 )
      likelihood -= lgamma(ddS.TWt[k1]-ddP.aw0) - lgaw0;
    likelihood += lgamma(M->TWt-ddP.aw0) - lgaw0;
  }
  yap_infinite(likelihood);
  return likelihood;
}