static void fullfill_param(Lda *lda) { for (int i = 0; i < lda->t; i++) { int uid = lda->tokens[i][0]; int vid = lda->tokens[i][1]; int top = lda->tokens[i][2]; lda->nd[uid * lda->p.k + top] += 1; lda->nw[vid * lda->p.k + top] += 1; lda->nkw[top] += 1; } save_lda(lda, 0); }
void est_lda(Lda *lda) { fullfill_param(lda); int *p = (int *) malloc(sizeof(int) * lda->t); int st = 0; double *prob = (double *) malloc(sizeof(double) * lda->p.k); double vb = lda->p.b * lda->v; for (int i = 0; i < lda->t; i++) p[i] = i; for (int i = 1; i <= lda->p.niters; i++) { fprintf(stderr, "iteration %d estimate begin ... ", i); shuffle(p, lda->t); for (int j = 0; j < lda->t; j++) { int id = p[j]; int uid = lda->tokens[id][0]; int vid = lda->tokens[id][1]; int top = lda->tokens[id][2]; lda->nd[uid * lda->p.k + top] -= 1; lda->nw[vid * lda->p.k + top] -= 1; lda->nkw[top] -= 1; for (int l = 0; l < lda->p.k; l++) { prob[l] = 1.0 * (lda->nd[uid * lda->p.k + l] + lda->p.a) * (lda->nw[vid * lda->p.k + l] + lda->p.b) / (lda->nkw[l] + vb); if (l > 0) prob[l] += prob[l - 1]; } double rnd = prob[lda->p.k - 1] * (0.1 + rand()) / (0.1 + RAND_MAX); for (st = 0; st < lda->p.k; st++) { if (prob[st] > rnd) break; } lda->nd[uid * lda->p.k + st] += 1; lda->nw[vid * lda->p.k + st] += 1; lda->nkw[st] += 1; lda->tokens[id][2] = st; } fprintf(stderr, " done\n"); if (i % lda->p.savestep == 0) { save_lda(lda, i); } } free(p); p = NULL; free(prob); prob = NULL; }
void est_lda(Lda *lda) { // first full fill theta & phi matrix // and link the nonzero elements fullfill_param(lda); // est iteration for lda for (int n = 1; n <= lda->p.niters; n++) { long sec1 = time(NULL); gibbs_sample(lda); long sec2 = time(NULL); fprintf(stderr, "iter %d done, using %ld seconds\n", n, sec2 - sec1); if (n % lda->p.savestep == 0) { save_lda(lda, n); } } }
static void fullfill_param(Lda *lda) { for (int i = 0; i < lda->t; i++) { int uid = lda->tokens[i][0]; int vid = lda->tokens[i][1]; int tid = lda->tokens[i][2]; lda->nd[uid * (lda->p.k + 1) + tid].count += 1; lda->nw[vid * (lda->p.k + 1) + tid].count += 1; lda->nkw[tid] += 1; } for (int d = 0; d < lda->d; d++) { int offs = d * (lda->p.k + 1); int p = 0; for (int k = 1; k <= lda->p.k; k++) { if (lda->nd[offs + k].count > 0) { lda->nd[offs + p].next = k; lda->nd[offs + k].prev = p; p = k; } } lda->nd[offs + p].next = 0; lda->nd[offs].prev = p; } for (int v = 0; v < lda->v; v++) { int offs = v * (lda->p.k + 1); int p = 0; for (int k = 1; k <= lda->p.k; k++) { if (lda->nw[offs + k].count > 0) { lda->nw[offs + p].next = k; lda->nw[offs + k].prev = p; p = k; } } lda->nw[offs + p].next = 0; lda->nw[offs].prev = p; } save_lda(lda, 0); }