Exemplo n.º 1
0
float64
best_q(float32 ****mixw,
       /* ADDITION FOR CONTINUOUS_TREES 21 May 98 */
       float32 ****means,
       float32 ****vars,
       uint32  *veclen,
       /* END ADDITION FOR CONTINUOUS_TREES */
       uint32 n_model,
       uint32 n_state,
       uint32 n_stream,
       uint32 n_density,
       float32 *stwt,
       uint32 **dfeat,
       uint32 n_dfeat,
       quest_t *all_q,
       uint32 n_all_q,
       pset_t *pset,
       uint32 *id,
       uint32 n_id,
       float32 ***dist,
       /* ADDITION FOR CONTINUOUS_TREES 21 May 98 */
       float64 node_wt_ent,  /* Weighted entropy of node */
       /* END ADDITION FOR CONTINUOUS_TREES */
       quest_t **out_best_q)
{
    float32 ***yes_dist;
    /* ADDITION FOR CONTINUOUS_TREES */
    float32 ***yes_means=0;
    float32 ***yes_vars=0;
    float32 varfloor=0;
    float64 y_ent;
    /* END ADDITION FOR CONTINUOUS_TREES */
    float64 yes_dnom, yes_norm;
    uint32 *yes_id;
    float32 ***no_dist;
    /* ADDITION FOR CONTINUOUS_TREES */
    float32 ***no_means=0;
    float32 ***no_vars=0;
    float64 n_ent;
    /* END ADDITION FOR CONTINUOUS_TREES */
    float64 no_dnom, no_norm;
    uint32 *no_id;
    uint32 n_yes, n_b_yes = 0;
    uint32 n_no, n_b_no = 0;
    uint32 i, j, k, q, b_q=0, s;
    uint32 ii;
    float64 einc, b_einc = -1.0e+50;

    /* ADDITION FOR CONTINUOUS_TREES; 20 May 98 */
    char*  type;
    uint32 continuous, sumveclen=0;

    type = (char *)cmd_ln_access("-ts2cbfn");
    if (strcmp(type,".semi.")!=0 && strcmp(type,".cont.") != 0)
        E_FATAL("Type %s unsupported; trees can only be built on types .semi. or .cont.\n",type);
    if (strcmp(type,".cont.") == 0)
        continuous = 1;
    else
        continuous = 0;

    if (continuous == 1) {
        varfloor = *(float32 *)cmd_ln_access("-varfloor");
        /* Allocating for sumveclen is overallocation, but it eases coding */
        for (ii=0,sumveclen=0; ii<n_stream; ii++) sumveclen += veclen[ii];
        yes_means = (float32 ***)ckd_calloc_3d(n_state,n_stream,sumveclen,sizeof(float32));
        yes_vars = (float32 ***)ckd_calloc_3d(n_state,n_stream,sumveclen,sizeof(float32));
        no_means = (float32 ***)ckd_calloc_3d(n_state,n_stream,sumveclen,sizeof(float32));
        no_vars = (float32 ***)ckd_calloc_3d(n_state,n_stream,sumveclen,sizeof(float32));
    }
    /* END ADDITIONS FOR CONTINUOUS_TREES */

    n_yes = n_no = 0;

    yes_dist = (float32 ***)ckd_calloc_3d(n_state, n_stream, n_density, sizeof(float32));
    no_dist = (float32 ***)ckd_calloc_3d(n_state, n_stream, n_density, sizeof(float32));

    for (q = 0; q < n_all_q; q++) {
        memset(&yes_dist[0][0][0], 0, sizeof(float32) * n_state * n_stream * n_density);
        memset(&no_dist[0][0][0], 0, sizeof(float32) * n_state * n_stream * n_density);

        /* ADDITION FOR CONTINUOUS_TREES; If continuous hmm initialize means and vars to zero */
        if (continuous == 1) {
            memset(&yes_means[0][0][0], 0, sizeof(float32) * n_state * n_stream * sumveclen);
            memset(&yes_vars[0][0][0], 0, sizeof(float32) * n_state * n_stream * sumveclen);
            memset(&no_means[0][0][0], 0, sizeof(float32) * n_state * n_stream * sumveclen);
            memset(&no_vars[0][0][0], 0, sizeof(float32) * n_state * n_stream * sumveclen);
        }
        /* END ADDITION FOR CONTINUOUS_TREES */

        n_yes = n_no = 0;

        for (ii = 0; ii < n_id; ii++) {
            i = id[ii];
            if (eval_quest(&all_q[q], dfeat[i], n_dfeat)) {
                for (s = 0; s < n_state; s++) {
                    for (j = 0; j < n_stream; j++) {
                        for (k = 0; k < n_density; k++) {
                            yes_dist[s][j][k] += mixw[i][s][j][k];
                        }
                    }
                }
                /* MODIFICATION FOR CONTINUOUS_TREES: ADDITIONS FOR CONTINUOUS CASE */
                if (continuous == 1) {
                    for (s = 0; s < n_state; s++) {
                        for (j = 0; j < n_stream; j++) {
                            for (k = 0; k < veclen[j]; k++) {
                                yes_means[s][j][k] += mixw[i][s][j][0] * means[i][s][j][k];
                                yes_vars[s][j][k] += mixw[i][s][j][0] * (vars[i][s][j][k] + means[i][s][j][k]*means[i][s][j][k]);
                            }
                        }
                    }
                }
                /* END MODIFICATION FOR CONTINUOUS_TREES */
                ++n_yes;
            }
            else {
                for (s = 0; s < n_state; s++) {
                    for (j = 0; j < n_stream; j++) {
                        for (k = 0; k < n_density; k++) {
                            no_dist[s][j][k] += mixw[i][s][j][k];
                        }
                    }
                }
                /* MODIFICATION FOR CONTINUOUS_TREES: ADDITIONS FOR CONTINUOUS CASE */
                if (continuous == 1) {
                    for (s = 0; s < n_state; s++) {
                        for (j = 0; j < n_stream; j++) {
                            for (k = 0; k < veclen[j]; k++) {
                                no_means[s][j][k] += mixw[i][s][j][0] * means[i][s][j][k];
                                no_vars[s][j][k] += mixw[i][s][j][0] * (vars[i][s][j][k] + means[i][s][j][k]*means[i][s][j][k]);
                            }
                        }
                    }
                }
                /* END MODIFICATION FOR CONTINUOUS_TREES */
                ++n_no;
            }
        }

        if ((n_yes == 0) || (n_no == 0)) {
            /* no split.  All satisfy or all don't satisfy */
            continue;
        }


        for (s = 0, einc = 0; s < n_state; s++) {
            for (k = 0, yes_dnom = 0; k < n_density; k++) {
                yes_dnom += yes_dist[s][0][k];
            }

            if (yes_dnom == 0)
                break;

            yes_norm = 1.0 / yes_dnom;

            for (j = 0; j < n_stream; j++) {
                for (k = 0; k < n_density; k++) {
                    yes_dist[s][j][k] *= yes_norm;
                }
            }

            for (k = 0, no_dnom = 0; k < n_density; k++) {
                no_dnom += no_dist[s][0][k];
            }

            if (no_dnom == 0)
                break;

            no_norm = 1.0 / no_dnom;

            for (j = 0; j < n_stream; j++) {
                for (k = 0; k < n_density; k++) {
                    no_dist[s][j][k] *= no_norm;
                }
            }

            /* MODIFICATION FOR CONTINUOUS_TREES: Do appropriate operations for discrete and
               continuous */
            if (continuous == 1) {
                y_ent = 0;
                n_ent = 0;
                for (j = 0; j < n_stream; j++) {
                    if (yes_dnom != 0) {
                        for (k = 0; k < veclen[j]; k++) {
                            yes_means[s][j][k] *= yes_norm;
                            yes_vars[s][j][k] = yes_vars[s][j][k]*yes_norm -
                                                yes_means[s][j][k]*yes_means[s][j][k];
                            if (yes_vars[s][j][k] < varfloor) yes_vars[s][j][k] = varfloor;
                        }
                    }
                    if (no_dnom != 0) {
                        for (k = 0; k < veclen[j]; k++) {
                            no_means[s][j][k] *= no_norm;
                            no_vars[s][j][k] = no_vars[s][j][k]*no_norm -
                                               no_means[s][j][k]*no_means[s][j][k];
                            if (no_vars[s][j][k] < varfloor) no_vars[s][j][k] = varfloor;
                        }
                    }
                    y_ent +=  yes_dnom * ent_cont(yes_means[s][j],yes_vars[s][j],veclen[j]);
                    n_ent +=  no_dnom * ent_cont(no_means[s][j],no_vars[s][j],veclen[j]);
                }
                einc += (float64)stwt[s] * (y_ent + n_ent);
            }
            else {
                einc += (float64)stwt[s] * wt_ent_inc(yes_dist[s], yes_dnom,
                                                      no_dist[s], no_dnom,
                                                      dist[s], n_stream, n_density);
            }
        }
        /* END MODIFICATION FOR CONTINUOUS_TREES */

        /* ADDITION FOR CONTINUOUS_TREES; In current code this is true only for continous HMM */
        if (continuous == 1) {
            einc -=  node_wt_ent;
        }
        /* END ADDITION FOR CONTINUOUS_TREES */

        if (s < n_state) {
            /* Ended iteration over states prematurely; assume 'bad' question */
            continue;
        }

        if (einc > b_einc) {
            b_einc = einc;
            b_q = q;
            n_b_yes = n_yes;
            n_b_no = n_no;
        }
    }

    if ((n_b_yes == 0) || (n_b_no == 0)) {
        /* No best question */
        *out_best_q = NULL;

        return 0;
    }

    yes_id = (uint32 *)ckd_calloc(n_b_yes, sizeof(uint32));
    no_id  = (uint32 *)ckd_calloc(n_b_no, sizeof(uint32));

    memset(&yes_dist[0][0][0], 0, sizeof(float32) * n_state * n_stream * n_density);
    memset(&no_dist[0][0][0], 0, sizeof(float32) * n_state * n_stream * n_density);
    n_yes = n_no = 0;

    for (ii = 0; ii < n_id; ii++) {
        i = id[ii];
        if (eval_quest(&all_q[b_q], dfeat[i], n_dfeat)) {
            for (s = 0; s < n_state; s++) {
                for (j = 0; j < n_stream; j++) {
                    for (k = 0; k < n_density; k++) {
                        yes_dist[s][j][k] += mixw[i][s][j][k];
                    }
                }
            }
            yes_id[n_yes] = i;
            ++n_yes;
        }
        else {
            for (s = 0; s < n_state; s++) {
                for (j = 0; j < n_stream; j++) {
                    for (k = 0; k < n_density; k++) {
                        no_dist[s][j][k] += mixw[i][s][j][k];
                    }
                }
            }
            no_id[n_no] = i;
            ++n_no;
        }
    }

    ckd_free_3d((void ***)yes_dist);
    ckd_free((void *)yes_id);
    ckd_free_3d((void ***)no_dist);
    ckd_free((void *)no_id);

    /* ADDITION FOR CONTINUOUS_TREES */
    if (continuous == 1) {
        ckd_free_3d((void ***)yes_means);
        ckd_free_3d((void ***)yes_vars);
        ckd_free_3d((void ***)no_means);
        ckd_free_3d((void ***)no_vars);
    }
    /* END ADDITION FOR CONTINUOUS_TREES */

    *out_best_q = &all_q[b_q];

    return b_einc;
}
Exemplo n.º 2
0
void
split_node(dtree_t *tr,
	   uint32 node_id,
	   float32 ****mixw,
           float32 ****means,
           float32 ****vars,
           uint32 *veclen,
	   uint32 n_model,
	   uint32 n_state,
	   uint32 n_stream,
	   uint32 n_density,
	   float32 *stwt,

	   quest_t *all_q,
	   uint32 n_all_q,
	   pset_t *pset,
	   
	   uint32 **dfeat,
	   uint32 n_dfeat,

	   float32 mwfloor)
{
    uint32 *id, n_id;
    uint32 *id_yes, n_yes;
    uint32 *id_no, n_no;
    dtree_node_t *node;
    uint32 node_id_yes;
    uint32 node_id_no;
    uint32 ii, i;

    node = &tr->node[node_id];

    id = node->id;
    n_id = node->n_id;

    for (ii = 0, n_yes = 0, n_no = 0; ii < n_id; ii++) {
	i = id[ii];

	if (eval_quest((quest_t *)node->q, dfeat[i], n_dfeat)) {
	    ++n_yes;
	}
	else {
	    ++n_no;
	}
    }

#if 0
    fprintf(stderr, "Split: (");
    print_quest(stderr, pset, (quest_t *)node->q);
    fprintf(stderr, ") %u/%u %.3e\n", n_yes, n_no, node->wt_ent_dec);
#endif

    id_yes = ckd_calloc(n_yes, sizeof(uint32));
    id_no = ckd_calloc(n_no, sizeof(uint32));

    for (ii = 0, n_yes = 0, n_no = 0; ii < n_id; ii++) {
	i = id[ii];

	if (eval_quest((quest_t *)node->q, dfeat[i], n_dfeat)) {
	    id_yes[n_yes] = i;
	    ++n_yes;
	}
	else {
	    id_no[n_no] = i;
	    ++n_no;
	}
    }

    node_id_yes = tr->n_node++;
    node_id_no = tr->n_node++;

    node->y = &tr->node[node_id_yes];
    node->n = &tr->node[node_id_no];
    node->y->p = node;
    node->n->p = node;

    mk_node(node->y,
	    node_id_yes,
	    id_yes, n_yes,
	    mixw, means, vars, veclen,
            n_model, n_state, n_stream, n_density, stwt, mwfloor);

    set_best_quest(node->y,
		   mixw,
                   means,
                   vars,
                   veclen,
		   n_model,
		   n_state,
		   n_stream,
		   n_density,
		   stwt,
		   all_q, n_all_q, pset,
		   dfeat, n_dfeat,
		   mwfloor);

    mk_node(node->n,
	    node_id_no,
	    id_no, n_no,
	    mixw, means, vars, veclen,
            n_model, n_state, n_stream, n_density, stwt, mwfloor);

    set_best_quest(node->n,
		   mixw,
                   means,
                   vars,
                   veclen,
		   n_model,
		   n_state,
		   n_stream,
		   n_density,
		   stwt,
		   all_q, n_all_q, pset,
		   dfeat, n_dfeat,
		   mwfloor);
}