Beispiel #1
0
Datei: bmf.c Projekt: nionjo/dm
void  est_mf(MF * mf) {
    fullfill_param(mf);
    double l_logll = logll(mf, 0);
    fprintf(stderr, "iter :0 train logll : %f   test logll: %f  learn rate : %f\n", l_logll, logll(mf, 1), mf->p.a);
    int *p = (int*)malloc(sizeof(int) * mf->T);
    for (int i = 0; i < mf->T; i++) p[i] = i;
    int n = 1;
    while (n <= mf->p.niters){
        fprintf(stderr, "iter :%d ", n);
        shuffle(p, mf->T);
        backup(mf);
        for (int j = 0; j < mf->T; j++){
            int id = p[j];
            int uid = mf->u_i[id][0];
            int iid = mf->u_i[id][1];
            int uoff = uid * mf->p.k;
            int ioff = iid * mf->p.k;
            double score = mf->s[id];
            double rx = mf->bu[uid] + mf->bi[iid];
            if (n > mf->p.nbias){
                for (int k = 0; k < mf->p.k; k++){
                    rx += mf->pu[uoff + k] * mf->qi[ioff + k];
                }
            }
            double e = 1.0/(1.0 + exp(-rx)); 
            e = score - e;

            mf->bu[uid] += mf->p.a * (e - mf->p.b * mf->bu[uid]);
            mf->bi[iid] += mf->p.a * (e - mf->p.b * mf->bi[iid]);
            if (n > mf->p.nbias){
                for (int k = 0; k < mf->p.k; k++){
                    double tmp = mf->pu[uoff + k];
                    mf->pu[uoff + k] += mf->p.a * (e * mf->qi[ioff + k] - mf->p.b * mf->pu[uoff + k]);
                    mf->qi[ioff + k] += mf->p.a * (e * tmp              - mf->p.b * mf->qi[ioff + k]);
                }
            }
        }
        double c_logll = logll(mf, 0);
        if (c_logll > l_logll){
            mf->p.a *= 0.99;
            l_logll = c_logll;
            n += 1;
            double v_logll = logll(mf, 1);
            fprintf(stderr, "train logll: %f   test logll: %f  learn rate : %f\n", c_logll, v_logll, mf->p.a);
            if (n % mf->p.savestep == 0){
                save_mf(mf, n);
            }
        }
        else{
            recover(mf);
            mf->p.a *= 0.8;
            fprintf(stderr, "run failed, try again\n");
        }
    }
    free(p); p = NULL;
}
Beispiel #2
0
Datei: pmf.c Projekt: nionjo/dm
void  est_mf(MF * mf) {
    fullfill_param(mf);
    double l_rmse = rmse(mf, 0);
    fprintf(stderr, "iter :0 train rmse : %f   test rmse : %f  learn rate : %f\n", l_rmse, rmse(mf,1), mf->p.a);
    int *p = (int*)malloc(sizeof(int) * mf->T);
    for (int i = 0; i < mf->T; i++) p[i] = i;
    int n = 1;
    while (n <= mf->p.niters){
        fprintf(stderr, "iter :%d ", n);
        shuffle(p, mf->T);
        backup(mf);
        for (int j = 0; j < mf->T; j++){
            int id = p[j];
            int uid = mf->u_i[id][0];
            int iid = mf->u_i[id][1];
            int uoff = uid * mf->p.k;
            int ioff = iid * mf->p.k;
            double score = mf->s[id];
            double rscore = mf->mu + mf->bu[uid] + mf->bi[iid];
            if (n > mf->p.nbias) {
                for (int k = 0; k < mf->p.k; k++){
                    rscore += mf->pu[uoff + k] * mf->qi[ioff + k];
                }
            }
            if (rscore > mf->max_s) rscore = mf->max_s;
            if (rscore < mf->min_s) rscore = mf->min_s;
            double e = score - rscore;
            mf->bu[uid] += mf->p.a * (e - mf->p.b * mf->bu[uid]);
            mf->bi[iid] += mf->p.a * (e - mf->p.b * mf->bi[iid]);
            if (n > mf->p.nbias) {
                for (int k = 0; k < mf->p.k; k++){
                    double tmp = mf->pu[uoff + k];
                    mf->pu[uoff + k] += mf->p.a * (e * mf->qi[ioff + k] - mf->p.b * mf->pu[uoff + k]);
                    mf->qi[ioff + k] += mf->p.a * (e * tmp              - mf->p.b * mf->qi[ioff + k]);
                }
            }
        }
        double c_rmse = rmse(mf, 0);
        if (c_rmse < l_rmse){
            mf->p.a *= 0.9;
            l_rmse = c_rmse;
            n += 1;
            double v_rmse = rmse(mf, 1);
            fprintf(stderr, "train rmse : %f   test rmse : %f  learn rate : %f\n", c_rmse, v_rmse, mf->p.a);
            if (n % mf->p.savestep == 0){
                save_mf(mf, n);
            }
        }
        else{
            recover(mf);
            mf->p.a *= 0.8;
            fprintf(stderr, "run failed, try again\n");
        }
    }
    free(p); p = NULL;
}
Beispiel #3
0
Datei: lda.c Projekt: nionjo/dm
void est_lda(Lda *lda) {

    fullfill_param(lda);

    int *p = (int *) malloc(sizeof(int) * lda->t);
    int st = 0;

    double *prob = (double *) malloc(sizeof(double) * lda->p.k);
    double vb = lda->p.b * lda->v;

    for (int i = 0; i < lda->t; i++) p[i] = i;
    for (int i = 1; i <= lda->p.niters; i++) {
        fprintf(stderr, "iteration %d estimate begin ... ", i);
        shuffle(p, lda->t);
        for (int j = 0; j < lda->t; j++) {
            int id = p[j];
            int uid = lda->tokens[id][0];
            int vid = lda->tokens[id][1];
            int top = lda->tokens[id][2];

            lda->nd[uid * lda->p.k + top] -= 1;
            lda->nw[vid * lda->p.k + top] -= 1;
            lda->nkw[top] -= 1;

            for (int l = 0; l < lda->p.k; l++) {
                prob[l] = 1.0 * (lda->nd[uid * lda->p.k + l] + lda->p.a) *
                                (lda->nw[vid * lda->p.k + l] + lda->p.b) /
                                (lda->nkw[l] + vb);
                if (l > 0) prob[l] += prob[l - 1];
            }
            double rnd = prob[lda->p.k - 1] * (0.1 + rand()) / (0.1 + RAND_MAX);
            for (st = 0; st < lda->p.k; st++) {
                if (prob[st] > rnd) break;
            }

            lda->nd[uid * lda->p.k + st] += 1;
            lda->nw[vid * lda->p.k + st] += 1;
            lda->nkw[st] += 1;

            lda->tokens[id][2] = st;
        }

        fprintf(stderr, " done\n");
        if (i % lda->p.savestep == 0) {
            save_lda(lda, i);
        }
    }
    free(p);    p    = NULL;
    free(prob); prob = NULL;
}
Beispiel #4
0
void est_lda(Lda *lda) {
    // first full fill theta & phi matrix
    // and link the nonzero elements
    fullfill_param(lda);
    // est iteration for lda
    for (int n = 1; n <= lda->p.niters; n++) {
        long sec1 = time(NULL);
        gibbs_sample(lda);
        long sec2 = time(NULL);
        fprintf(stderr, "iter %d done, using %ld seconds\n", n, sec2 - sec1);
        if (n % lda->p.savestep == 0) {
            save_lda(lda, n);
        }
    }
}
Beispiel #5
0
void est_author_lda(AuthorLda * alda){

    fullfill_param(alda);

    int *p = (int*)malloc(sizeof(int) * alda->T);
    int st = 0, sa = 0;

    double vb = alda->V   * alda->p.b;
    double at = alda->p.a * alda->p.k;
    double * prob = (double*)malloc(sizeof(double) * alda->MA * alda->p.k);

    for (int i = 0; i < alda->T; i++) {
        p[i] = i;
    }

    for (int i = 1; i <= alda->p.niters; i++){
        fprintf(stderr, "iteration %d estimate begin ... ", i);
        shuffle(p, alda->T);
        for (int j = 0; j < alda->T; j++){
            int id = p[j];
            int uid = alda->tokens[id][0];
            int vid = alda->tokens[id][1];
            int aid = alda->tokens[id][2];
            int tid = alda->tokens[id][3];
            int v_off = vid * alda->p.k;

            alda->na[aid * alda->p.k + tid] -= 1;
            alda->nka[aid]                  -= 1;
#ifndef PHI
            alda->nw[vid * alda->p.k + tid] -= 1;
            alda->nkw[tid]                  -= 1;
#endif

            DocAuthors * ad = alda->doc_author + uid;

            memset(prob, 0, sizeof(double) * alda->MA * alda->p.k);
            for (int ai = 0; ai < ad->nda; ai++) {

                int a      = ad->das[ai][0];
                int ai_off = ai * alda->p.k;
                int a_off  = a  * alda->p.k;

                for (int l = 0; l < alda->p.k; l++){
                    prob[ai_off + l] = 1.0 * (alda->na[a_off + l] + alda->p.a) \
                                           * (alda->nw[v_off + l] + alda->p.b) \
                                           / (alda->nka[a]        + at)        \
                                           / (alda->nkw[l]        + vb)        \
                                           * (ad->das[ai][1]);
                    if (ai_off + l > 0){
                        prob[ai_off + l] += prob[ai_off + l - 1];
                    }
                }
            }

            double rnd = 1.0 * prob[ad->nda * alda->p.k - 1] * rand() / (1.0 + RAND_MAX);
            int sampled_index = 0;
            for (; sampled_index < ad->nda * alda->p.k; sampled_index++){
                if (prob[sampled_index] > rnd) break;
            }
            if (sampled_index == ad->nda * alda->p.k){
                // just keep as before
                fprintf(stderr, "\n[warning] sampled author and topic failed\n");
                // and see what is going on
                fprintf(stderr, "token index :%d \n", id);
                for (int ti = 0; ti < ad->nda; ti++){
                    int tioff = ti * alda->p.k;
                    for (int tk = 0; tk < alda->p.k; tk++){
                        fprintf(stderr, "% e", prob[tioff + tk]);
                    }
                    fprintf(stderr, "\n");
                }
                fprintf(stderr,"rnd: %e\n", rnd);
                save_author_lda(alda,i);
                exit(2);
                sa = aid;
                st = tid;
            }else {
                sa = ad->das[sampled_index / alda->p.k][0];
                st = sampled_index % alda->p.k;
            }

            alda->na[sa  * alda->p.k + st] += 1;
            alda->nka[sa]                  += 1;
#ifndef PHI
            alda->nw[vid * alda->p.k + st] += 1;
            alda->nkw[st]                  += 1;
#endif

            alda->tokens[id][2] = sa;
            alda->tokens[id][3] = st;
        }
        fprintf(stderr, " done\n");
        if (i % alda->p.savestep == 0){
            save_author_lda(alda, i);
        }
    }
    free(p);    p    = NULL;
    free(prob); prob = NULL;

}