void crf1dc_partial_marginals(crf1d_context_t *ctx, int *mask) { int i, j, t; int *prev_mask, *curr_mask; const int T = ctx->num_items; const int L = ctx->num_labels; /* Compute the model expectations of states. p(t,i) = fwd[t][i] * bwd[t][i] / norm = (1. / C[t]) * fwd'[t][i] * bwd'[t][i] */ for (t = 0;t < T;++t) { curr_mask = &mask[t* L]; floatval_t *fwd = PARTIAL_ALPHA_SCORE(ctx, t); floatval_t *bwd = PARTIAL_BETA_SCORE(ctx, t); floatval_t *prob = PARTIAL_STATE_MEXP(ctx, t); veccopy(prob, fwd, L); vecmul(prob, bwd, L); vecscale(prob, 1. / ctx->partial_scale_factor[t], L); } /* Compute the model expectations of transitions. p(t,i,t+1,j) = fwd[t][i] * edge[i][j] * state[t+1][j] * bwd[t+1][j] / norm = (fwd'[t][i] / (C[0] ... C[t])) * edge[i][j] * state[t+1][j] * (bwd'[t+1][j] / (C[t+1] ... C[T-1])) * (C[0] * ... * C[T-1]) = fwd'[t][i] * edge[i][j] * state[t+1][j] * bwd'[t+1][j] The model expectation of a transition (i -> j) is the sum of the marginal probabilities p(t,i,t+1,j) over t. */ for (t = 0;t < T-1;++t) { floatval_t *fwd = PARTIAL_ALPHA_SCORE(ctx, t); floatval_t *state = EXP_STATE_SCORE(ctx, t+1); floatval_t *bwd = PARTIAL_BETA_SCORE(ctx, t+1); floatval_t *row = ctx->row; /* row[j] = state[t+1][j] * bwd'[t+1][j] */ veccopy(row, bwd, L); vecmul(row, state, L); prev_mask = &mask[t*L]; curr_mask = &mask[(t+1)*L]; for (i = 0;i < L;++i) { if (prev_mask[i]) { floatval_t *edge = EXP_TRANS_SCORE(ctx, i); floatval_t *prob = PARTIAL_TRANS_MEXP(ctx, i); for (j = 0;j < L;++j) { if (curr_mask[j]) { prob[j] += fwd[i] * edge[j] * row[j]; // fprintf(stderr, "%lf\n", fwd[i] * edge[j] * row[j]); } } } } } }
void crf1dc_marginals(crf1d_context_t* ctx) { int i, j, t; const int T = ctx->num_items; const int L = ctx->num_labels; /* Compute the model expectations of states. p(t,i) = fwd[t][i] * bwd[t][i] / norm = (1. / C[t]) * fwd'[t][i] * bwd'[t][i] */ for (t = 0;t < T;++t) { floatval_t *fwd = ALPHA_SCORE(ctx, t); floatval_t *bwd = BETA_SCORE(ctx, t); floatval_t *prob = STATE_MEXP(ctx, t); veccopy(prob, fwd, L); vecmul(prob, bwd, L); vecscale(prob, 1. / ctx->scale_factor[t], L); } /* Compute the model expectations of transitions. p(t,i,t+1,j) = fwd[t][i] * edge[i][j] * state[t+1][j] * bwd[t+1][j] / norm = (fwd'[t][i] / (C[0] ... C[t])) * edge[i][j] * state[t+1][j] * (bwd'[t+1][j] / (C[t+1] ... C[T-1])) * (C[0] * ... * C[T-1]) = fwd'[t][i] * edge[i][j] * state[t+1][j] * bwd'[t+1][j] The model expectation of a transition (i -> j) is the sum of the marginal probabilities p(t,i,t+1,j) over t. */ for (t = 0;t < T-1;++t) { floatval_t *fwd = ALPHA_SCORE(ctx, t); floatval_t *state = EXP_STATE_SCORE(ctx, t+1); floatval_t *bwd = BETA_SCORE(ctx, t+1); floatval_t *row = ctx->row; /* row[j] = state[t+1][j] * bwd'[t+1][j] */ veccopy(row, bwd, L); vecmul(row, state, L); for (i = 0;i < L;++i) { floatval_t *edge = EXP_TRANS_SCORE(ctx, i); floatval_t *prob = TRANS_MEXP(ctx, i); for (j = 0;j < L;++j) { prob[j] += fwd[i] * edge[j] * row[j]; } } } }
/*============================================================================ * add_mean_maybe_scale * * Add mean to proj and check that all the elements of proj are now >= 0. * If not, scale proj down by 10%. Repeat until all elements are >= 0. *==========================================================================*/ void add_mean_maybe_scale(float *proj, float *mean, int dim) { int bad; assert(proj != NULL); assert(mean != NULL); assert(dim > 0); do { bad = 0; vecvecadd(proj, mean, dim, proj); #if USE_EV_SCALING int j; for (j=0; j < dim; j++) if (proj[j] < 0) bad = 1; if (bad) { vecvecsub(proj, mean, dim, proj); if (verbosity > 2) fprintf(err, "Scaling...\n"); vecmul(proj, .9, dim, proj); } #endif } while (bad); }
void crf1dc_beta_score(crf1d_context_t* ctx) { int i, t; floatval_t *cur = NULL; floatval_t *row = ctx->row; const floatval_t *next = NULL, *state = NULL, *trans = NULL; const int T = ctx->num_items; const int L = ctx->num_labels; const floatval_t *scale = &ctx->scale_factor[T-1]; /* Compute the beta scores at (T-1, *). */ cur = BETA_SCORE(ctx, T-1); vecset(cur, *scale, L); --scale; /* Compute the beta scores at (t, *). */ for (t = T-2;0 <= t;--t) { cur = BETA_SCORE(ctx, t); next = BETA_SCORE(ctx, t+1); state = EXP_STATE_SCORE(ctx, t+1); veccopy(row, next, L); vecmul(row, state, L); /* Compute the beta score at (t, i). */ for (i = 0;i < L;++i) { trans = EXP_TRANS_SCORE(ctx, i); cur[i] = vecdot(trans, row, L); } vecscale(cur, *scale, L); --scale; } }
/*============================================================================ * Test routines. *==========================================================================*/ int main(int argc, char **argv) { /*======================================================================= * Vector operation tests *=====================================================================*/ { /* vecdot */ float X[3] = {1.0, 2.0, 3.0}; float Y[3] = {3.0, 2.0, 1.0}; assert(vecdot(X, Y, 3) == 10.0); } { /* vecdot */ float X[3] = {10.0}; float Y[3] = {13.0}; assert(vecdot(X, Y, 1) == 130.0); } { /* veclen */ float X[3] = {3.0, 4.0}; assert(veclen(X, 2) == 5.0); } { /* vecmul */ float X[5] = {1.0, 2.0, 3.0, 4.0, 5.0}; float Y[5] = {2.0, 4.0, 6.0, 8.0, 10.0}; vecmul(X, 2, 5, X); assert(vecveceq(X, Y, 5)); } { /* vecdiv */ float X[5] = {2.0, 4.0, 6.0, 8.0, 10.0}; float Y[5] = {1.0, 2.0, 3.0, 4.0, 5.0}; vecdiv(X, 2, 5, X); assert(vecveceq(X, Y, 5)); } { /* vecvecsub */ float X[5] = {1.0, 2.0, 3.0, 4.0, 5.0}; float Y[5] = {2.0, 4.0, 6.0, 8.0, 10.0}; float Z[5] = {-1.0, -2.0, -3.0, -4.0, -5.0}; vecvecsub(X, Y, 5, X); assert(vecveceq(X, Z, 5)); } { /* vecvecadd */ float X[5] = {1.0, 2.0, 3.0, 4.0, 5.0}; float Y[5] = {2.0, 4.0, 6.0, 8.0, 10.0}; float Z[5] = {3.0, 6.0, 9.0, 12.0, 15.0}; vecvecadd(X, Y, 5, X); assert(vecveceq(X, Z, 5)); } return 0; }
void crf1dc_alpha_score(crf1d_context_t* ctx) { int i, t; floatval_t sum, *cur = NULL; floatval_t *scale = &ctx->scale_factor[0]; const floatval_t *prev = NULL, *trans = NULL, *state = NULL; const int T = ctx->num_items; const int L = ctx->num_labels; /* Compute the alpha scores on nodes (0, *). alpha[0][j] = state[0][j] */ cur = ALPHA_SCORE(ctx, 0); state = EXP_STATE_SCORE(ctx, 0); veccopy(cur, state, L); sum = vecsum(cur, L); *scale = (sum != 0.) ? 1. / sum : 1.; vecscale(cur, *scale, L); ++scale; /* Compute the alpha scores on nodes (t, *). alpha[t][j] = state[t][j] * \sum_{i} alpha[t-1][i] * trans[i][j] */ for (t = 1;t < T;++t) { prev = ALPHA_SCORE(ctx, t-1); cur = ALPHA_SCORE(ctx, t); state = EXP_STATE_SCORE(ctx, t); veczero(cur, L); for (i = 0;i < L;++i) { trans = EXP_TRANS_SCORE(ctx, i); vecaadd(cur, prev[i], trans, L); } vecmul(cur, state, L); sum = vecsum(cur, L); *scale = (sum != 0.) ? 1. / sum : 1.; vecscale(cur, *scale, L); ++scale; } /* Compute the logarithm of the normalization factor here. norm = 1. / (C[0] * C[1] ... * C[T-1]) log(norm) = - \sum_{t = 0}^{T-1} log(C[t]). */ ctx->log_norm = -vecsumlog(ctx->scale_factor, T); }
void do_eigen_projections(int nev, float *M, float **models, float *mean, int num_models, eigen_t *eigen, int dim) { int i, m; struct { float *proj; float best_dot; float best_dot_index; char symbol; } proj_model[2]; assert(models != NULL); assert(mean != NULL); assert(eigen != NULL); assert(dim > 0); assert(num_models > 0); assert(0 < nev && nev <= dim); proj_model[0].proj = (float *)malloc(dim * sizeof(float)); proj_model[1].proj = (float *)malloc(dim * sizeof(float)); proj_model[0].symbol = '+'; proj_model[1].symbol = '-'; if (!opt_multi_out) fprintf(out, "#BEGIN ENSEM\n"); for (i=0; i < nev; i++) { /*================================================================ * Find the models that have the largest and smallest projections * onto the current eigenvector. *==============================================================*/ proj_model[0].best_dot = 0; proj_model[0].best_dot_index = -1; proj_model[1].best_dot = 0; proj_model[1].best_dot_index = -1; for (m=0; m < num_models; m++) { float dot = vecdot(models[m], eigen[i].rvec, dim); if (dot >= 0 && dot >= proj_model[0].best_dot) { proj_model[0].best_dot = dot; proj_model[0].best_dot_index = m; vecmul(eigen[i].rvec, dot, dim, proj_model[0].proj); } if (dot <= 0 && dot <= proj_model[1].best_dot) { proj_model[1].best_dot = dot; proj_model[1].best_dot_index = m; vecmul(eigen[i].rvec, dot, dim, proj_model[1].proj); } } if (opt_interp_proj) { if (proj_model[0].best_dot_index != -1 && proj_model[1].best_dot_index != -1) { int j; float *newp = (float *)malloc(dim * sizeof(float)); float *step = (float *)malloc(dim * sizeof(float)); #define NSTEPS 20 #define OVERSHOOT 20 vecvecsub(proj_model[1].proj, proj_model[0].proj, dim, step); vecdiv(step, NSTEPS, dim, step); for (j=0; j < NSTEPS+OVERSHOOT; j++) { vecmul(step, j, dim, newp); vecvecadd(proj_model[0].proj, newp, dim, newp); undo_scale(newp, dim, M); vecvecadd(newp, mean, dim, newp); multi_out_write_ev_model((i+1)*100 + j, 'p', 0, 0, newp, dim); } free(newp); free(step); #undef NSTEPS #undef OVERSHOOT } } else { /*================================================================ * Write out the new models *==============================================================*/ for (m=0; m < 2; m++) { if (proj_model[m].best_dot_index != -1) { float proj_len_before_scale = veclen(proj_model[m].proj, dim); undo_scale(proj_model[m].proj, dim, M); /* add back mean */ vecvecadd(proj_model[m].proj,mean, dim, proj_model[m].proj); multi_out_write_ev_model(i, proj_model[m].symbol, proj_len_before_scale, proj_model[m].best_dot, proj_model[m].proj, dim); } } } } if (!opt_multi_out) fprintf(out, "#END ENSEM\n"); free(proj_model[0].proj); free(proj_model[1].proj); }
void crf1dc_marginal_without_beta(crf1d_context_t* ctx) { int i, j, t; floatval_t *prob = NULL; floatval_t *row = ctx->row; const floatval_t *fwd = NULL; const int T = ctx->num_items; const int L = ctx->num_labels; /* Compute marginal probabilities of states at T-1 p(T-1,j) = fwd'[T-1][j] */ fwd = ALPHA_SCORE(ctx, T-1); prob = STATE_MEXP(ctx, T-1); veccopy(prob, fwd, L); /* Repeat the following computation for t = T-1,T-2, ..., 1. 1) Compute p(t-1,i,t,j) using p(t,j) 2) Compute p(t,i) using p(t-1,i,t,j) */ for (t = T-1;0 < t;--t) { fwd = ALPHA_SCORE(ctx, t-1); prob = STATE_MEXP(ctx, t); veczero(ctx->adj, L*L); veczero(row, L); /* Compute adj[i][j] and row[j]. adj[i][j] = fwd'[t-1][i] * edge[i][j] row[j] = \sum_{i} adj[i][j] */ for (i = 0;i < L;++i) { floatval_t *adj = ADJACENCY(ctx, i); floatval_t *edge = EXP_TRANS_SCORE(ctx, i); vecaadd(adj, fwd[i], edge, L); vecadd(row, adj, L); } /* Find z such that z * \sum_{i] adj[i][j] = p(t,j). Thus, z = p(t,j) / row[j]; we overwrite row with z. */ vecinv(row, L); vecmul(row, prob, L); /* Apply the partition factor z (row[j]) to adj[i][j]. */ for (i = 0;i < L;++i) { floatval_t *adj = ADJACENCY(ctx, i); vecmul(adj, row, L); } /* Now that adj[i][j] presents p(t-1,i,t,j), accumulate model expectations of transitions. */ for (i = 0;i < L;++i) { floatval_t *adj = ADJACENCY(ctx, i); floatval_t *prob = TRANS_MEXP(ctx, i); vecadd(prob, adj, L); } /* Compute the marginal probability of states at t-1. p(t-1,i) = \sum_{j} p(t-1,i,t,j) */ prob = STATE_MEXP(ctx, t-1); for (i = 0;i < L;++i) { floatval_t *adj = ADJACENCY(ctx, i); prob[i] = vecsum(adj, L); } } }