void est_mf(MF * mf) { fullfill_param(mf); double l_logll = logll(mf, 0); fprintf(stderr, "iter :0 train logll : %f test logll: %f learn rate : %f\n", l_logll, logll(mf, 1), mf->p.a); int *p = (int*)malloc(sizeof(int) * mf->T); for (int i = 0; i < mf->T; i++) p[i] = i; int n = 1; while (n <= mf->p.niters){ fprintf(stderr, "iter :%d ", n); shuffle(p, mf->T); backup(mf); for (int j = 0; j < mf->T; j++){ int id = p[j]; int uid = mf->u_i[id][0]; int iid = mf->u_i[id][1]; int uoff = uid * mf->p.k; int ioff = iid * mf->p.k; double score = mf->s[id]; double rx = mf->bu[uid] + mf->bi[iid]; if (n > mf->p.nbias){ for (int k = 0; k < mf->p.k; k++){ rx += mf->pu[uoff + k] * mf->qi[ioff + k]; } } double e = 1.0/(1.0 + exp(-rx)); e = score - e; mf->bu[uid] += mf->p.a * (e - mf->p.b * mf->bu[uid]); mf->bi[iid] += mf->p.a * (e - mf->p.b * mf->bi[iid]); if (n > mf->p.nbias){ for (int k = 0; k < mf->p.k; k++){ double tmp = mf->pu[uoff + k]; mf->pu[uoff + k] += mf->p.a * (e * mf->qi[ioff + k] - mf->p.b * mf->pu[uoff + k]); mf->qi[ioff + k] += mf->p.a * (e * tmp - mf->p.b * mf->qi[ioff + k]); } } } double c_logll = logll(mf, 0); if (c_logll > l_logll){ mf->p.a *= 0.99; l_logll = c_logll; n += 1; double v_logll = logll(mf, 1); fprintf(stderr, "train logll: %f test logll: %f learn rate : %f\n", c_logll, v_logll, mf->p.a); if (n % mf->p.savestep == 0){ save_mf(mf, n); } } else{ recover(mf); mf->p.a *= 0.8; fprintf(stderr, "run failed, try again\n"); } } free(p); p = NULL; }
void est_mf(MF * mf) { fullfill_param(mf); double l_rmse = rmse(mf, 0); fprintf(stderr, "iter :0 train rmse : %f test rmse : %f learn rate : %f\n", l_rmse, rmse(mf,1), mf->p.a); int *p = (int*)malloc(sizeof(int) * mf->T); for (int i = 0; i < mf->T; i++) p[i] = i; int n = 1; while (n <= mf->p.niters){ fprintf(stderr, "iter :%d ", n); shuffle(p, mf->T); backup(mf); for (int j = 0; j < mf->T; j++){ int id = p[j]; int uid = mf->u_i[id][0]; int iid = mf->u_i[id][1]; int uoff = uid * mf->p.k; int ioff = iid * mf->p.k; double score = mf->s[id]; double rscore = mf->mu + mf->bu[uid] + mf->bi[iid]; if (n > mf->p.nbias) { for (int k = 0; k < mf->p.k; k++){ rscore += mf->pu[uoff + k] * mf->qi[ioff + k]; } } if (rscore > mf->max_s) rscore = mf->max_s; if (rscore < mf->min_s) rscore = mf->min_s; double e = score - rscore; mf->bu[uid] += mf->p.a * (e - mf->p.b * mf->bu[uid]); mf->bi[iid] += mf->p.a * (e - mf->p.b * mf->bi[iid]); if (n > mf->p.nbias) { for (int k = 0; k < mf->p.k; k++){ double tmp = mf->pu[uoff + k]; mf->pu[uoff + k] += mf->p.a * (e * mf->qi[ioff + k] - mf->p.b * mf->pu[uoff + k]); mf->qi[ioff + k] += mf->p.a * (e * tmp - mf->p.b * mf->qi[ioff + k]); } } } double c_rmse = rmse(mf, 0); if (c_rmse < l_rmse){ mf->p.a *= 0.9; l_rmse = c_rmse; n += 1; double v_rmse = rmse(mf, 1); fprintf(stderr, "train rmse : %f test rmse : %f learn rate : %f\n", c_rmse, v_rmse, mf->p.a); if (n % mf->p.savestep == 0){ save_mf(mf, n); } } else{ recover(mf); mf->p.a *= 0.8; fprintf(stderr, "run failed, try again\n"); } } free(p); p = NULL; }
void est_lda(Lda *lda) { fullfill_param(lda); int *p = (int *) malloc(sizeof(int) * lda->t); int st = 0; double *prob = (double *) malloc(sizeof(double) * lda->p.k); double vb = lda->p.b * lda->v; for (int i = 0; i < lda->t; i++) p[i] = i; for (int i = 1; i <= lda->p.niters; i++) { fprintf(stderr, "iteration %d estimate begin ... ", i); shuffle(p, lda->t); for (int j = 0; j < lda->t; j++) { int id = p[j]; int uid = lda->tokens[id][0]; int vid = lda->tokens[id][1]; int top = lda->tokens[id][2]; lda->nd[uid * lda->p.k + top] -= 1; lda->nw[vid * lda->p.k + top] -= 1; lda->nkw[top] -= 1; for (int l = 0; l < lda->p.k; l++) { prob[l] = 1.0 * (lda->nd[uid * lda->p.k + l] + lda->p.a) * (lda->nw[vid * lda->p.k + l] + lda->p.b) / (lda->nkw[l] + vb); if (l > 0) prob[l] += prob[l - 1]; } double rnd = prob[lda->p.k - 1] * (0.1 + rand()) / (0.1 + RAND_MAX); for (st = 0; st < lda->p.k; st++) { if (prob[st] > rnd) break; } lda->nd[uid * lda->p.k + st] += 1; lda->nw[vid * lda->p.k + st] += 1; lda->nkw[st] += 1; lda->tokens[id][2] = st; } fprintf(stderr, " done\n"); if (i % lda->p.savestep == 0) { save_lda(lda, i); } } free(p); p = NULL; free(prob); prob = NULL; }
void est_lda(Lda *lda) { // first full fill theta & phi matrix // and link the nonzero elements fullfill_param(lda); // est iteration for lda for (int n = 1; n <= lda->p.niters; n++) { long sec1 = time(NULL); gibbs_sample(lda); long sec2 = time(NULL); fprintf(stderr, "iter %d done, using %ld seconds\n", n, sec2 - sec1); if (n % lda->p.savestep == 0) { save_lda(lda, n); } } }
void est_author_lda(AuthorLda * alda){ fullfill_param(alda); int *p = (int*)malloc(sizeof(int) * alda->T); int st = 0, sa = 0; double vb = alda->V * alda->p.b; double at = alda->p.a * alda->p.k; double * prob = (double*)malloc(sizeof(double) * alda->MA * alda->p.k); for (int i = 0; i < alda->T; i++) { p[i] = i; } for (int i = 1; i <= alda->p.niters; i++){ fprintf(stderr, "iteration %d estimate begin ... ", i); shuffle(p, alda->T); for (int j = 0; j < alda->T; j++){ int id = p[j]; int uid = alda->tokens[id][0]; int vid = alda->tokens[id][1]; int aid = alda->tokens[id][2]; int tid = alda->tokens[id][3]; int v_off = vid * alda->p.k; alda->na[aid * alda->p.k + tid] -= 1; alda->nka[aid] -= 1; #ifndef PHI alda->nw[vid * alda->p.k + tid] -= 1; alda->nkw[tid] -= 1; #endif DocAuthors * ad = alda->doc_author + uid; memset(prob, 0, sizeof(double) * alda->MA * alda->p.k); for (int ai = 0; ai < ad->nda; ai++) { int a = ad->das[ai][0]; int ai_off = ai * alda->p.k; int a_off = a * alda->p.k; for (int l = 0; l < alda->p.k; l++){ prob[ai_off + l] = 1.0 * (alda->na[a_off + l] + alda->p.a) \ * (alda->nw[v_off + l] + alda->p.b) \ / (alda->nka[a] + at) \ / (alda->nkw[l] + vb) \ * (ad->das[ai][1]); if (ai_off + l > 0){ prob[ai_off + l] += prob[ai_off + l - 1]; } } } double rnd = 1.0 * prob[ad->nda * alda->p.k - 1] * rand() / (1.0 + RAND_MAX); int sampled_index = 0; for (; sampled_index < ad->nda * alda->p.k; sampled_index++){ if (prob[sampled_index] > rnd) break; } if (sampled_index == ad->nda * alda->p.k){ // just keep as before fprintf(stderr, "\n[warning] sampled author and topic failed\n"); // and see what is going on fprintf(stderr, "token index :%d \n", id); for (int ti = 0; ti < ad->nda; ti++){ int tioff = ti * alda->p.k; for (int tk = 0; tk < alda->p.k; tk++){ fprintf(stderr, "% e", prob[tioff + tk]); } fprintf(stderr, "\n"); } fprintf(stderr,"rnd: %e\n", rnd); save_author_lda(alda,i); exit(2); sa = aid; st = tid; }else { sa = ad->das[sampled_index / alda->p.k][0]; st = sampled_index % alda->p.k; } alda->na[sa * alda->p.k + st] += 1; alda->nka[sa] += 1; #ifndef PHI alda->nw[vid * alda->p.k + st] += 1; alda->nkw[st] += 1; #endif alda->tokens[id][2] = sa; alda->tokens[id][3] = st; } fprintf(stderr, " done\n"); if (i % alda->p.savestep == 0){ save_author_lda(alda, i); } } free(p); p = NULL; free(prob); prob = NULL; }