Beispiel #1
0
static int
acmod_init_feat(acmod_t *acmod)
{
    acmod->fcb =
        feat_init(cmd_ln_str_r(acmod->config, "-feat"),
                  cmn_type_from_str(cmd_ln_str_r(acmod->config,"-cmn")),
                  cmd_ln_boolean_r(acmod->config, "-varnorm"),
                  agc_type_from_str(cmd_ln_str_r(acmod->config, "-agc")),
                  1, cmd_ln_int32_r(acmod->config, "-ceplen"));
    if (acmod->fcb == NULL)
        return -1;

    if (cmd_ln_str_r(acmod->config, "-lda")) {
        E_INFO("Reading linear feature transformation from %s\n",
               cmd_ln_str_r(acmod->config, "-lda"));
        if (feat_read_lda(acmod->fcb,
                          cmd_ln_str_r(acmod->config, "-lda"),
                          cmd_ln_int32_r(acmod->config, "-ldadim")) < 0)
            return -1;
    }

    if (cmd_ln_str_r(acmod->config, "-svspec")) {
        int32 **subvecs;
        E_INFO("Using subvector specification %s\n",
               cmd_ln_str_r(acmod->config, "-svspec"));
        if ((subvecs = parse_subvecs(cmd_ln_str_r(acmod->config, "-svspec"))) == NULL)
            return -1;
        if ((feat_set_subvecs(acmod->fcb, subvecs)) < 0)
            return -1;
    }

    if (cmd_ln_exists_r(acmod->config, "-agcthresh")
        && 0 != strcmp(cmd_ln_str_r(acmod->config, "-agc"), "none")) {
        agc_set_threshold(acmod->fcb->agc_struct,
                          cmd_ln_float32_r(acmod->config, "-agcthresh"));
    }

    if (acmod->fcb->cmn_struct
        && cmd_ln_exists_r(acmod->config, "-cmninit")) {
        char *c, *cc, *vallist;
        int32 nvals;

        vallist = ckd_salloc(cmd_ln_str_r(acmod->config, "-cmninit"));
        c = vallist;
        nvals = 0;
        while (nvals < acmod->fcb->cmn_struct->veclen
               && (cc = strchr(c, ',')) != NULL) {
            *cc = '\0';
            acmod->fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof_c(c));
            c = cc + 1;
            ++nvals;
        }
        if (nvals < acmod->fcb->cmn_struct->veclen && *c != '\0') {
            acmod->fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof_c(c));
        }
        ckd_free(vallist);
    }
    return 0;
}
static void
cmn_prior_shiftwin(cmn_t *cmn)
{
    mfcc_t sf;
    int32 i;

    E_INFO("cmn_prior_update: from < ");
    for (i = 0; i < cmn->veclen; i++)
        E_INFOCONT("%5.2f ", MFCC2FLOAT(cmn->cmn_mean[i]));
    E_INFOCONT(">\n");

    sf = FLOAT2MFCC(1.0) / cmn->nframe;
    for (i = 0; i < cmn->veclen; i++)
        cmn->cmn_mean[i] = cmn->sum[i] / cmn->nframe; /* sum[i] * sf */

    /* Make the accumulation decay exponentially */
    if (cmn->nframe >= CMN_WIN_HWM) {
        sf = CMN_WIN * sf;
        for (i = 0; i < cmn->veclen; i++)
            cmn->sum[i] = MFCCMUL(cmn->sum[i], sf);
        cmn->nframe = CMN_WIN;
    }

    E_INFO("cmn_prior_update: to   < ");
    for (i = 0; i < cmn->veclen; i++)
        E_INFOCONT("%5.2f ", MFCC2FLOAT(cmn->cmn_mean[i]));
    E_INFOCONT(">\n");
}
Beispiel #3
0
Datei: agc.c Projekt: 006/ios_lab
agc_t *agc_init(void)
{
    agc_t *agc;
    agc = ckd_calloc(1, sizeof(*agc));
    agc->noise_thresh = FLOAT2MFCC(2.0);
    
    return agc;
}
Beispiel #4
0
static int32
read_kd_nodes(FILE * fp, kd_tree_t * tree, uint32 maxdepth, int32 maxbbi)
{
    uint32 i, j, in, out;
    int32 ilevel, olevel;

    /* Balanced binary trees, so we have 2^nlevels-1 nodes. */
    if (maxdepth == 0 || maxdepth > tree->n_level)
        maxdepth = tree->n_level;
    in = (1 << tree->n_level) - 1;
    out = (1 << maxdepth) - 1;
    tree->nodes = ckd_calloc(out, sizeof(kd_tree_node_t));

    /* Nodes are read in depth-first ordering. */
    for (j = i = 0; i < in; ++i) {
        float32 split_plane;
        int32 split_comp;

        if (read_tree_int(fp, "NODE", &ilevel, FALSE) < 0)
            break;
        if (read_tree_int(fp, "split_comp", &split_comp, FALSE) < 0)
            return -1;
        if (read_tree_float(fp, "split_plane", &split_plane, FALSE) < 0)
            return -1;
        olevel = ilevel - (tree->n_level - maxdepth);
        if (olevel > 0) {
            /* Only create a node if we are above maxdepth */
            assert(j < out);
            tree->nodes[j].split_comp = split_comp;
            tree->nodes[j].split_plane = FLOAT2MFCC(split_plane);
            /* We only need the BBI list for leafnodes now. */
            if (olevel == 1) {
                if (read_bbi_list(fp, tree->nodes + j, maxbbi) < 0)
                    return -1;
            }
            else {
                if (read_bbi_list(fp, NULL, 0) < 0)
                    return -1;
            }
            /* They are also full trees, hence: */
            if (olevel > 1) {
                tree->nodes[j].left = j + 1;
                tree->nodes[j].right = j + (1 << (olevel - 1));
            }
            ++j;
        }
        else {                  /* Have to read the BBI list anyway. */
            if (read_bbi_list(fp, NULL, 0) < 0)
                return -1;
        }
    }
    E_INFO("Read %d nodes\n", j);

    return 0;
}
Beispiel #5
0
/**
 * Convert a block of float32 to mfcc_t (can be done in-place)
 **/
int32
fe_float_to_mfcc(fe_t * FE,
                 float32 ** input, mfcc_t ** output, int32 nframes)
{
    int32 i;

#ifndef FIXED_POINT
    if ((void *) input == (void *) output)
        return nframes * FE->FEATURE_DIMENSION;
#endif
    for (i = 0; i < nframes * FE->FEATURE_DIMENSION; ++i)
        output[0][i] = FLOAT2MFCC(input[0][i]);

    return i;
}
Beispiel #6
0
/**
 * Convert a block of float32 to mfcc_t (can be done in-place)
 **/
int32
fe_float_to_mfcc(fe_t * fe,
                 float32 ** input, mfcc_t ** output, int32 nframes)
{
    int32 i;

#ifndef FIXED_POINT
    if ((void *) input == (void *) output)
        return nframes * fe->feature_dimension;
#endif
    for (i = 0; i < nframes * fe->feature_dimension; ++i)
        output[0][i] = FLOAT2MFCC(input[0][i]);

    return i;
}
Beispiel #7
0
cmn_t *
cmn_init(int32 veclen)
{
    cmn_t *cmn;
    cmn = (cmn_t *) ckd_calloc(1, sizeof(cmn_t));
    cmn->veclen = veclen;
    cmn->cmn_mean = (mfcc_t *) ckd_calloc(veclen, sizeof(mfcc_t));
    cmn->cmn_var = (mfcc_t *) ckd_calloc(veclen, sizeof(mfcc_t));
    cmn->sum = (mfcc_t *) ckd_calloc(veclen, sizeof(mfcc_t));
    /* A front-end dependent magic number */
    cmn->cmn_mean[0] = FLOAT2MFCC(12.0);
    cmn->nframe = 0;
    E_INFO("mean[0]= %.2f, mean[1..%d]= 0.0\n",
           MFCC2FLOAT(cmn->cmn_mean[0]), veclen - 1);

    return cmn;
}
/*
 * Some of the gaussian density computation can be carried out in advance:
 * 	log(determinant) calculation,
 * 	1/(2*var) in the exponent,
 * NOTE; The density computation is performed in log domain.
 */
static int32
gauden_dist_precompute(gauden_t * g, logmath_t *lmath, float32 varfloor)
{
    int32 i, m, f, d, flen;
    mfcc_t *meanp;
    mfcc_t *varp;
    mfcc_t *detp;
    int32 floored;

    floored = 0;
    /* Allocate space for determinants */
    g->det = (mfcc_t***)ckd_calloc_3d(g->n_mgau, g->n_feat, g->n_density, sizeof(***g->det));

    for (m = 0; m < g->n_mgau; m++) {
        for (f = 0; f < g->n_feat; f++) {
            flen = g->featlen[f];

            /* Determinants for all variance vectors in g->[m][f] */
            for (d = 0, detp = g->det[m][f]; d < g->n_density; d++, detp++) {
                *detp = 0;
                for (i = 0, varp = g->var[m][f][d], meanp = g->mean[m][f][d];
                     i < flen; i++, varp++, meanp++) {
                    float32 *fvarp = (float32 *)varp;

#ifdef FIXED_POINT
                    float32 *fmp = (float32 *)meanp;
                    *meanp = FLOAT2MFCC(*fmp);
#endif
                    if (*fvarp < varfloor) {
                        *fvarp = varfloor;
                        ++floored;
                    }
                    *detp += (mfcc_t)logmath_log(lmath,
                                                 1.0 / sqrt(*fvarp * 2.0 * M_PI));
                    /* Precompute this part of the exponential */
                    *varp = (mfcc_t)logmath_ln_to_log(lmath,
                                                      (1.0 / (*fvarp * 2.0)));
                }
            }
        }
    }

    E_INFO("%d variance values floored\n", floored);

    return 0;
}
Beispiel #9
0
Datei: agc.c Projekt: 006/ios_lab
/* Update estimated max for next utterance */
void
agc_emax_update(agc_t *agc)
{
    if (agc->obs_frame) {            /* Update only if some data observed */
        agc->obs_max_sum += agc->obs_max;
        agc->obs_utt++;

        /* Re-estimate max over past history; decay the history */
        agc->max = agc->obs_max_sum / agc->obs_utt;
        if (agc->obs_utt == 8) {
            agc->obs_max_sum /= 2;
            agc->obs_utt = 4;
        }
    }
    E_INFO("AGCEMax: obs= %.2f, new= %.2f\n", agc->obs_max, agc->max);

    /* Reset the accumulators for the next utterance. */
    agc->obs_frame = 0;
    agc->obs_max = FLOAT2MFCC(-1000.0); /* Less than any real C0 value (hopefully!!) */
}
Beispiel #10
0
static int32
s3_precomp(s2_semi_mgau_t *s, logmath_t *lmath, float32 vFloor)
{
    int feat;

    for (feat = 0; feat < s->n_feat; ++feat) {
        float32 *fmp;
        mfcc_t *mp;
        mfcc_t *vp, *dp;
        int32 vecLen, i;

        vecLen = s->veclen[feat];
        fmp = (float32 *) s->means[feat];
        mp = s->means[feat];
        vp = s->vars[feat];
        dp = s->dets[feat];

        for (i = 0; i < s->n_density; ++i) {
            mfcc_t d;
            int32 j;

            d = 0;
            for (j = 0; j < vecLen; ++j, ++vp, ++mp, ++fmp) {
                float64 fvar;

                *mp = FLOAT2MFCC(*fmp);
                /* Always do these pre-calculations in floating point */
                fvar = *(float32 *) vp;
                if (fvar < vFloor)
                    fvar = vFloor;
                d += (mfcc_t)logmath_log(lmath, 1 / sqrt(fvar * 2.0 * M_PI));
                *vp = (mfcc_t)logmath_ln_to_log(lmath, 1.0 / (2.0 * fvar));
            }
            *dp++ = d;
        }
    }
    return 0;
}
Beispiel #11
0
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include <stdio.h>
#include <string.h>
#include <math.h>

#include "feat.h"
#include "ckd_alloc.h"
#include "test_macros.h"

const mfcc_t data[6][13] = {
	{ FLOAT2MFCC(15.114), FLOAT2MFCC(-1.424), FLOAT2MFCC(-0.953),
	  FLOAT2MFCC(0.186), FLOAT2MFCC(-0.656), FLOAT2MFCC(-0.226),
	  FLOAT2MFCC(-0.105), FLOAT2MFCC(-0.412), FLOAT2MFCC(-0.024),
	  FLOAT2MFCC(-0.091), FLOAT2MFCC(-0.124), FLOAT2MFCC(-0.158), FLOAT2MFCC(-0.197)},
	{ FLOAT2MFCC(14.729), FLOAT2MFCC(-1.313), FLOAT2MFCC(-0.892),
	  FLOAT2MFCC(0.140), FLOAT2MFCC(-0.676), FLOAT2MFCC(-0.089),
	  FLOAT2MFCC(-0.313), FLOAT2MFCC(-0.422), FLOAT2MFCC(-0.058),
	  FLOAT2MFCC(-0.101), FLOAT2MFCC(-0.100), FLOAT2MFCC(-0.128), FLOAT2MFCC(-0.123)},
	{ FLOAT2MFCC(14.502), FLOAT2MFCC(-1.351), FLOAT2MFCC(-1.028),
	  FLOAT2MFCC(-0.189), FLOAT2MFCC(-0.718), FLOAT2MFCC(-0.139),
	  FLOAT2MFCC(-0.121), FLOAT2MFCC(-0.365), FLOAT2MFCC(-0.139),
	  FLOAT2MFCC(-0.154), FLOAT2MFCC(0.041), FLOAT2MFCC(0.009), FLOAT2MFCC(-0.073)},
	{ FLOAT2MFCC(14.557), FLOAT2MFCC(-1.676), FLOAT2MFCC(-0.864),
	  FLOAT2MFCC(0.118), FLOAT2MFCC(-0.445), FLOAT2MFCC(-0.168),
	  FLOAT2MFCC(-0.069), FLOAT2MFCC(-0.503), FLOAT2MFCC(-0.013),
	  FLOAT2MFCC(0.007), FLOAT2MFCC(-0.056), FLOAT2MFCC(-0.075), FLOAT2MFCC(-0.237)},
	{ FLOAT2MFCC(14.665), FLOAT2MFCC(-1.498), FLOAT2MFCC(-0.582),
	  FLOAT2MFCC(0.209), FLOAT2MFCC(-0.487), FLOAT2MFCC(-0.247),
Beispiel #12
0
#include <pocketsphinx.h>
#include <stdio.h>
#include <string.h>

#include "pocketsphinx_internal.h"

#include "test_macros.h"

static const mfcc_t prior[13] = {
    FLOAT2MFCC(33.89),
    FLOAT2MFCC(-1.13),
    FLOAT2MFCC(0.83),
    FLOAT2MFCC(0.49),
    FLOAT2MFCC(-0.65),
    FLOAT2MFCC(0.12),
    FLOAT2MFCC(-0.03),
    FLOAT2MFCC(0.28),
    FLOAT2MFCC(0.41),
    FLOAT2MFCC(0.59),
    FLOAT2MFCC(0.11),
    FLOAT2MFCC(-0.20),
    FLOAT2MFCC(0.17)
};

int
ps_decoder_test(cmd_ln_t *config, char const *sname, char const *expected)
{
    ps_decoder_t *ps;
    mfcc_t **cepbuf;
    FILE *rawfh;
    int16 *buf;
Beispiel #13
0
Datei: agc.c Projekt: 006/ios_lab
float32
agc_get_threshold(agc_t *agc)
{
    return FLOAT2MFCC(agc->noise_thresh);
}
Beispiel #14
0
Datei: agc.c Projekt: 006/ios_lab
void
agc_set_threshold(agc_t *agc, float32 threshold)
{
    agc->noise_thresh = FLOAT2MFCC(threshold);
}
Beispiel #15
0
Datei: agc.c Projekt: 006/ios_lab
void
agc_emax_set(agc_t *agc, float32 m)
{
    agc->max = FLOAT2MFCC(m);
    E_INFO("AGCEMax: max= %.2f\n", m);
}
static int
initialize(int argc,
	   char *argv[])
{
    const char *fdictfn;
    const char *dictfn;
    const char *ts2cbfn;
    uint32 n_ts;
    uint32 n_cb;

    /* define, parse and (partially) validate the command line */
    parse_cmd_ln(argc, argv);

    feat = 
        feat_init(cmd_ln_str("-feat"),
                  cmn_type_from_str(cmd_ln_str("-cmn")),
                  cmd_ln_boolean("-varnorm"),
                  agc_type_from_str(cmd_ln_str("-agc")),
                  1, cmd_ln_int32("-ceplen"));


    if (cmd_ln_str("-lda")) {
        E_INFO("Reading linear feature transformation from %s\n",
               cmd_ln_str("-lda"));
        if (feat_read_lda(feat,
                          cmd_ln_str("-lda"),
                          cmd_ln_int32("-ldadim")) < 0)
            return -1;
    }

    if (cmd_ln_str("-svspec")) {
        int32 **subvecs;
        E_INFO("Using subvector specification %s\n", 
               cmd_ln_str("-svspec"));
        if ((subvecs = parse_subvecs(cmd_ln_str("-svspec"))) == NULL)
            return -1;
        if ((feat_set_subvecs(feat, subvecs)) < 0)
            return -1;
    }

    if (cmd_ln_exists("-agcthresh")
        && 0 != strcmp(cmd_ln_str("-agc"), "none")) {
        agc_set_threshold(feat->agc_struct,
                          cmd_ln_float32("-agcthresh"));
    }

    if (feat->cmn_struct
        && cmd_ln_exists("-cmninit")) {
        char *c, *cc, *vallist;
        int32 nvals;

        vallist = ckd_salloc(cmd_ln_str("-cmninit"));
        c = vallist;
        nvals = 0;
        while (nvals < feat->cmn_struct->veclen
               && (cc = strchr(c, ',')) != NULL) {
            *cc = '\0';
            feat->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof(c));
            c = cc + 1;
            ++nvals;
        }
        if (nvals < feat->cmn_struct->veclen && *c != '\0') {
            feat->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof(c));
        }
        ckd_free(vallist);
    }


    if (cmd_ln_str("-segdir"))
	corpus_set_seg_dir(cmd_ln_str("-segdir"));
    if (cmd_ln_str("-segext"))
	corpus_set_seg_ext(cmd_ln_str("-segext"));

    corpus_set_mfcc_dir(cmd_ln_str("-cepdir"));
    corpus_set_mfcc_ext(cmd_ln_str("-cepext"));

    if (cmd_ln_str("-lsnfn"))
	corpus_set_lsn_filename(cmd_ln_str("-lsnfn"));

    corpus_set_ctl_filename(cmd_ln_str("-ctlfn"));

    if (cmd_ln_int32("-nskip") && cmd_ln_int32("-runlen")) {
        corpus_set_interval(cmd_ln_int32("-nskip"),
			    cmd_ln_int32("-runlen"));
    } else if (cmd_ln_int32("-part") && cmd_ln_int32("-npart")) {
	corpus_set_partition(cmd_ln_int32("-part"),
			     cmd_ln_int32("-npart"));
    }
    

    if (corpus_init() != S3_SUCCESS) {
	return S3_ERROR;
    }
    
    if (cmd_ln_str("-moddeffn")) {
	E_INFO("Reading %s\n", cmd_ln_str("-moddeffn"));
    
	/* Read in the model definitions.  Defines the set of
	   CI phones and context dependent phones.  Defines the
	   transition matrix tying and state level tying. */
	if (model_def_read(&mdef,
			   cmd_ln_str("-moddeffn")) != S3_SUCCESS) {
	    return S3_ERROR;
	}
	
	ts2cbfn = cmd_ln_str("-ts2cbfn");
	if (strcmp(SEMI_LABEL, ts2cbfn) == 0) {
	    mdef->cb = semi_ts2cb(mdef->n_tied_state);
	    n_ts = mdef->n_tied_state;
	    n_cb = 1;
	}
	else if (strcmp(CONT_LABEL, ts2cbfn) == 0) {
	    mdef->cb = cont_ts2cb(mdef->n_tied_state);
	    n_ts = mdef->n_tied_state;
	    n_cb = mdef->n_tied_state;
	}
	else if (strcmp(PTM_LABEL, ts2cbfn) == 0) {
	    mdef->cb = ptm_ts2cb(mdef);
	    n_ts = mdef->n_tied_state;
	    n_cb = mdef->acmod_set->n_ci;
	}
	else if (s3ts2cb_read(ts2cbfn,
			      &mdef->cb,
			      &n_ts,
			      &n_cb) != S3_SUCCESS) {
	    return S3_ERROR;
	}

	dictfn = cmd_ln_str("-dictfn");

	if (dictfn == NULL) {
	    E_FATAL("You must specify a content dictionary using -dictfn\n");
	}

	E_INFO("Reading %s\n", dictfn);
	
	lex = lexicon_read(NULL,	/* no lexicon to start */
			   dictfn,
			   mdef->acmod_set);
	if (lex == NULL)
	    return S3_ERROR;
    
	fdictfn = cmd_ln_str("-fdictfn");

	if (fdictfn) {
	    E_INFO("Reading %s\n", fdictfn);
	    
	    (void)lexicon_read(lex,	/* add filler words content lexicon */
			       fdictfn,
			       mdef->acmod_set);
	}
    }

    return S3_SUCCESS;
}
Beispiel #17
0
/**
 * Read Sphinx-II format mfc file (s2mfc = Sphinx-II format MFC data).
 * If out_mfc is 0, no actual reading will be done, and the number of 
 * frames (plus padding) that would be read is returned.
 * 
 * It's important that normalization is done before padding because
 * frames outside the data we are interested in shouldn't be taken
 * into normalization stats.
 *
 * @return # frames read (plus padding) if successful, -1 if
 * error (e.g., mfc array too small).  
 */
static int32
feat_s2mfc_read_norm_pad(feat_t *fcb, char *file, int32 win,
            		 int32 sf, int32 ef,
            		 mfcc_t ***out_mfc,
            		 int32 maxfr,
            		 int32 cepsize)
{
    FILE *fp;
    int32 n_float32;
    float32 *float_feat;
    struct stat statbuf;
    int32 i, n, byterev;
    int32 start_pad, end_pad;
    mfcc_t **mfc;

    /* Initialize the output pointer to 0, so that any attempts to
       free() it if we fail before allocating it will not segfault! */
    if (out_mfc)
        *out_mfc = 0;
    E_INFO("Reading mfc file: '%s'[%d..%d]\n", file, sf, ef);
    if (ef >= 0 && ef <= sf) {
        E_ERROR("%s: End frame (%d) <= Start frame (%d)\n", file, ef, sf);
        return -1;
    }

    /* Find filesize; HACK!! To get around intermittent NFS failures, use stat_retry */
    if ((stat_retry(file, &statbuf) < 0)
        || ((fp = fopen(file, "rb")) == 0)) {
#ifndef POCKETSPHINX_NET
         E_ERROR("Failed to open file '%s' for reading: %s\n", file, strerror(errno));
#endif
        return -1;
    }

    /* Read #floats in header */
    if (fread_retry(&n_float32, sizeof(int32), 1, fp) != 1) {
        E_ERROR("%s: fread(#floats) failed\n", file);
        fclose(fp);
        return -1;
    }

    /* Check if n_float32 matches file size */
    byterev = 0;
    if ((int32) (n_float32 * sizeof(float32) + 4) != (int32) statbuf.st_size) { /* RAH, typecast both sides to remove compile warning */
        n = n_float32;
        SWAP_INT32(&n);

        if ((int32) (n * sizeof(float32) + 4) != (int32) (statbuf.st_size)) {   /* RAH, typecast both sides to remove compile warning */
            E_ERROR
                ("%s: Header size field: %d(%08x); filesize: %d(%08x)\n",
                 file, n_float32, n_float32, statbuf.st_size,
                 statbuf.st_size);
            fclose(fp);
            return -1;
        }

        n_float32 = n;
        byterev = 1;
    }
    if (n_float32 <= 0) {
        E_ERROR("%s: Header size field (#floats) = %d\n", file, n_float32);
        fclose(fp);
        return -1;
    }

    /* Convert n to #frames of input */
    n = n_float32 / cepsize;
    if (n * cepsize != n_float32) {
        E_ERROR("Header size field: %d; not multiple of %d\n", n_float32,
                cepsize);
        fclose(fp);
        return -1;
    }

    /* Check start and end frames */
    if (sf > 0) {
        if (sf >= n) {
            E_ERROR("%s: Start frame (%d) beyond file size (%d)\n", file,
                    sf, n);
            fclose(fp);
            return -1;
        }
    }
    if (ef < 0)
        ef = n-1;
    else if (ef >= n) {
        E_WARN("%s: End frame (%d) beyond file size (%d), will truncate\n",
               file, ef, n);
        ef = n-1;
    }

    /* Add window to start and end frames */
    sf -= win;
    ef += win;
    if (sf < 0) {
        start_pad = -sf;
        sf = 0;
    }
    else
        start_pad = 0;
    if (ef >= n) {
        end_pad = ef - n + 1;
        ef = n - 1;
    }
    else
        end_pad = 0;

    /* Limit n if indicated by [sf..ef] */
    if ((ef - sf + 1) < n)
        n = (ef - sf + 1);
    if (maxfr > 0 && n + start_pad + end_pad > maxfr) {
        E_ERROR("%s: Maximum output size(%d frames) < actual #frames(%d)\n",
                file, maxfr, n + start_pad + end_pad);
        fclose(fp);
        return -1;
    }

    /* If no output buffer was supplied, then skip the actual data reading. */
    if (out_mfc != 0) {
        /* Position at desired start frame and read actual MFC data */
        mfc = (mfcc_t **)ckd_calloc_2d(n + start_pad + end_pad, cepsize, sizeof(mfcc_t));
        if (sf > 0)
            fseek(fp, sf * cepsize * sizeof(float32), SEEK_CUR);
        n_float32 = n * cepsize;
#ifdef FIXED_POINT
        float_feat = ckd_calloc(n_float32, sizeof(float32));
#else
        float_feat = mfc[start_pad];
#endif
        if (fread_retry(float_feat, sizeof(float32), n_float32, fp) != n_float32) {
            E_ERROR("%s: fread(%dx%d) (MFC data) failed\n", file, n, cepsize);
            ckd_free_2d(mfc);
            fclose(fp);
            return -1;
        }
        if (byterev) {
            for (i = 0; i < n_float32; i++) {
                SWAP_FLOAT32(&float_feat[i]);
            }
        }
#ifdef FIXED_POINT
        for (i = 0; i < n_float32; ++i) {
            mfc[start_pad][i] = FLOAT2MFCC(float_feat[i]);
        }
        ckd_free(float_feat);
#endif

        /* Normalize */
        feat_cmn(fcb, mfc + start_pad, n, 1, 1);
        feat_agc(fcb, mfc + start_pad, n, 1, 1);

        /* Replicate start and end frames if necessary. */
        for (i = 0; i < start_pad; ++i)
            memcpy(mfc[i], mfc[start_pad], cepsize * sizeof(mfcc_t));
        for (i = 0; i < end_pad; ++i)
            memcpy(mfc[start_pad + n + i], mfc[start_pad + n - 1],
                   cepsize * sizeof(mfcc_t));

        *out_mfc = mfc;
    }

    fclose(fp);
    return n + start_pad + end_pad;
}
Beispiel #18
0
void
cmn(cmn_t *cmn, mfcc_t ** mfc, int32 varnorm, int32 n_frame)
{
    mfcc_t *mfcp;
    mfcc_t t;
    int32 i, f;

    oe_assert(mfc != NULL);

    if (n_frame <= 0)
        return;

    /* If cmn->cmn_mean wasn't NULL, we need to zero the contents */
    memset(cmn->cmn_mean, 0, cmn->veclen * sizeof(mfcc_t));

    /* Find mean cep vector for this utterance */
    for (f = 0; f < n_frame; f++) {
        mfcp = mfc[f];
        for (i = 0; i < cmn->veclen; i++) {
            cmn->cmn_mean[i] += mfcp[i];
        }
    }

    for (i = 0; i < cmn->veclen; i++)
        cmn->cmn_mean[i] /= n_frame;

    E_INFO("CMN: ");
    for (i = 0; i < cmn->veclen; i++)
        E_INFOCONT("%5.2f ", MFCC2FLOAT(cmn->cmn_mean[i]));
    E_INFOCONT("\n");
    if (!varnorm) {
        /* Subtract mean from each cep vector */
        for (f = 0; f < n_frame; f++) {
            mfcp = mfc[f];
            for (i = 0; i < cmn->veclen; i++)
                mfcp[i] -= cmn->cmn_mean[i];
        }
    }
    else {
        /* Scale cep vectors to have unit variance along each dimension, and subtract means */
        /* If cmn->cmn_var wasn't NULL, we need to zero the contents */
        memset(cmn->cmn_var, 0, cmn->veclen * sizeof(mfcc_t));

        for (f = 0; f < n_frame; f++) {
            mfcp = mfc[f];

            for (i = 0; i < cmn->veclen; i++) {
                t = mfcp[i] - cmn->cmn_mean[i];
                cmn->cmn_var[i] += MFCCMUL(t, t);
            }
        }
        for (i = 0; i < cmn->veclen; i++)
            /* Inverse Std. Dev, RAH added type case from sqrt */
            cmn->cmn_var[i] = FLOAT2MFCC(sqrt((float64)n_frame / MFCC2FLOAT(cmn->cmn_var[i])));

        for (f = 0; f < n_frame; f++) {
            mfcp = mfc[f];
            for (i = 0; i < cmn->veclen; i++)
                mfcp[i] = MFCCMUL((mfcp[i] - cmn->cmn_mean[i]), cmn->cmn_var[i]);
        }
    }
}
Beispiel #19
0
#include <stdio.h>
#include <string.h>
#include <pocketsphinx.h>

#include <sphinxbase/logmath.h>

#include "acmod.h"
#include "test_macros.h"

static const mfcc_t cmninit[13] = {
	FLOAT2MFCC(41.00),
	FLOAT2MFCC(-5.29),
	FLOAT2MFCC(-0.12),
	FLOAT2MFCC(5.09),
	FLOAT2MFCC(2.48),
	FLOAT2MFCC(-4.07),
	FLOAT2MFCC(-1.37),
	FLOAT2MFCC(-1.78),
	FLOAT2MFCC(-5.08),
	FLOAT2MFCC(-2.05),
	FLOAT2MFCC(-6.45),
	FLOAT2MFCC(-1.42),
	FLOAT2MFCC(1.17)
};

int
main(int argc, char *argv[])
{
    acmod_t *acmod;
    logmath_t *lmath;
    cmd_ln_t *config;
Beispiel #20
0
int32
feat_read_lda(feat_t *feat, const char *ldafile, int32 dim)
{
    FILE *fh;
    int32 byteswap, chksum_present;
    uint32 chksum, i, m, n;
    char **argname, **argval;

    assert(feat);
    if (feat->n_stream != 1) {
        E_ERROR("LDA incompatible with multi-stream features (n_stream = %d)\n",
                feat->n_stream);
        return -1;
    }

    if ((fh = fopen(ldafile, "rb")) == NULL) {
        E_ERROR_SYSTEM("Failed to open transform file '%s' for reading", ldafile);
        return -1;
    }

    if (bio_readhdr(fh, &argname, &argval, &byteswap) < 0) {
        E_ERROR("Failed to read header from transform file '%s'\n", ldafile);
        fclose(fh);
        return -1;
    }

    chksum_present = 0;
    for (i = 0; argname[i]; i++) {
        if (strcmp(argname[i], "version") == 0) {
            if (strcmp(argval[i], MATRIX_FILE_VERSION) != 0)
                E_WARN("%s: Version mismatch: %s, expecting %s\n",
                       ldafile, argval[i], MATRIX_FILE_VERSION);
        }
        else if (strcmp(argname[i], "chksum0") == 0) {
            chksum_present = 1; /* Ignore the associated value */
        }
    }

    bio_hdrarg_free(argname, argval);
    argname = argval = NULL;

    chksum = 0;

    if (feat->lda)
        ckd_free_3d((void ***)feat->lda);

    {
        /* Use a temporary variable to avoid strict-aliasing problems. */
        void ***outlda;

        if (bio_fread_3d(&outlda, sizeof(float32),
                         &feat->n_lda, &m, &n,
                         fh, byteswap, &chksum) < 0) {
            E_ERROR_SYSTEM("%s: bio_fread_3d(lda) failed\n", ldafile);
            fclose(fh);
            return -1;
        }
        feat->lda = (void *)outlda;
    }
    fclose(fh);
    
#ifdef FIXED_POINT
    /* FIXME: This is a fragile hack that depends on mfcc_t and
     * float32 being the same size (which they are, but...) */
    for (i = 0; i < feat->n_lda * m * n; ++i) {
        feat->lda[0][0][i] = FLOAT2MFCC(((float *)feat->lda[0][0])[i]);
    }
#endif

    /* Note that SphinxTrain stores the eigenvectors as row vectors. */
    if (n != feat->stream_len[0])
	E_FATAL("LDA matrix dimension %d doesn't match feature stream size %d\n", n, feat->stream_len[0]);
    
    /* Override dim from file if it is 0 or greater than m. */
    if (dim > m || dim <= 0) {
        dim = m;
    }
    feat->out_dim = dim;

    return 0;
}
Beispiel #21
0
int
main_initialize(int argc,
		char *argv[],
		lexicon_t **out_lex,
		model_def_t **out_omdef,
		model_def_t **out_dmdef,
		feat_t** out_feat)
{
    model_def_t *dmdef = NULL;
    model_def_t *omdef = NULL;
    lexicon_t *lex = NULL;
    feat_t *feat;
    const char *fn;
    uint32 n_ts;
    uint32 n_cb;
    const char *ts2cbfn;

    parse_cmd_ln(argc, argv);

    feat = 
        feat_init(cmd_ln_str("-feat"),
                  cmn_type_from_str(cmd_ln_str("-cmn")),
                  cmd_ln_boolean("-varnorm"),
                  agc_type_from_str(cmd_ln_str("-agc")),
                  1, cmd_ln_int32("-ceplen"));


    if (cmd_ln_str("-lda")) {
        E_INFO("Reading linear feature transformation from %s\n",
               cmd_ln_str("-lda"));
        if (feat_read_lda(feat,
                          cmd_ln_str("-lda"),
                          cmd_ln_int32("-ldadim")) < 0)
            return -1;
    }

    if (cmd_ln_str("-svspec")) {
        int32 **subvecs;
        E_INFO("Using subvector specification %s\n", 
               cmd_ln_str("-svspec"));
        if ((subvecs = parse_subvecs(cmd_ln_str("-svspec"))) == NULL)
            return -1;
        if ((feat_set_subvecs(feat, subvecs)) < 0)
            return -1;
    }

    if (cmd_ln_exists("-agcthresh")
        && 0 != strcmp(cmd_ln_str("-agc"), "none")) {
        agc_set_threshold(feat->agc_struct,
                          cmd_ln_float32("-agcthresh"));
    }

    if (feat->cmn_struct
        && cmd_ln_exists("-cmninit")) {
        char *c, *cc, *vallist;
        int32 nvals;

        vallist = ckd_salloc(cmd_ln_str("-cmninit"));
        c = vallist;
        nvals = 0;
        while (nvals < feat->cmn_struct->veclen
               && (cc = strchr(c, ',')) != NULL) {
            *cc = '\0';
            feat->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof(c));
            c = cc + 1;
            ++nvals;
        }
        if (nvals < feat->cmn_struct->veclen && *c != '\0') {
            feat->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof(c));
        }
        ckd_free(vallist);
    }
    *out_feat = feat;


    if (cmd_ln_str("-omoddeffn")) {
	E_INFO("Reading output model definitions: %s\n", cmd_ln_str("-omoddeffn"));
	
	/* Read in the model definitions.  Defines the set of
	   CI phones and context dependent phones.  Defines the
	   transition matrix tying and state level tying. */
	if (model_def_read(&omdef,
			   cmd_ln_str("-omoddeffn")) != S3_SUCCESS) {
	    return S3_ERROR;
	}

	if (cmd_ln_str("-dmoddeffn")) {
	    E_INFO("Reading dump model definitions: %s\n", cmd_ln_str("-dmoddeffn"));
	
	    if (model_def_read(&dmdef,
			       cmd_ln_str("-dmoddeffn")) != S3_SUCCESS) {
		return S3_ERROR;
	    }
	    setup_d2o_map(dmdef, omdef);
	}
	else {
	    E_INFO("Assuming dump and output model definitions are identical\n");
	}

	ts2cbfn = cmd_ln_str("-ts2cbfn");
	if (ts2cbfn) {
	    if (strcmp(SEMI_LABEL, ts2cbfn) == 0) {
		omdef->cb = semi_ts2cb(omdef->n_tied_state);
		n_ts = omdef->n_tied_state;
		n_cb = 1;
	    }
	    else if (strcmp(CONT_LABEL, ts2cbfn) == 0) {
		omdef->cb = cont_ts2cb(omdef->n_tied_state);
		n_ts = omdef->n_tied_state;
		n_cb = omdef->n_tied_state;
	    }
	    else if (strcmp(PTM_LABEL, ts2cbfn) == 0) {
		omdef->cb = ptm_ts2cb(omdef);
		n_ts = omdef->n_tied_state;
		n_cb = omdef->acmod_set->n_ci;
	    }
	    else if (s3ts2cb_read(cmd_ln_str("-ts2cbfn"),
				  &omdef->cb,
				  &n_ts,
				  &n_cb) != S3_SUCCESS) {
		return S3_ERROR;
	    }
	    
	    if (omdef->n_tied_state != n_ts) {
		E_FATAL("Model definition file n_tied_state = %u, but %u mappings in ts2cb\n",
			omdef->n_tied_state, n_ts);
	    }
	}
    }
    else {
	E_INFO("No mdef files.  Assuming 1-class init\n");
    }

    *out_omdef = omdef;
    *out_dmdef = dmdef;

    fn = cmd_ln_str("-dictfn");
    if (fn) {
	E_INFO("Reading main lexicon: %s\n", fn);
	     

	lex = lexicon_read(NULL,
			   fn,
			   omdef->acmod_set);
	if (lex == NULL)
	    return S3_ERROR;
    }
    
    fn = cmd_ln_str("-fdictfn");
    if (fn) {
	E_INFO("Reading filler lexicon: %s\n", fn);
	(void)lexicon_read(lex,
			   fn,
			   omdef->acmod_set);
    }
    
    *out_lex = lex;
    
    stride = cmd_ln_int32("-stride");

    return S3_SUCCESS;
}
Beispiel #22
0
#include <stdio.h>
#include <string.h>
#include <pocketsphinx.h>

#include <sphinxbase/logmath.h>

#include "acmod.h"
#include "test_macros.h"

static const mfcc_t prior[13] = {
    FLOAT2MFCC(37.03),
    FLOAT2MFCC(-1.01),
    FLOAT2MFCC(0.53),
    FLOAT2MFCC(0.49),
    FLOAT2MFCC(-0.60),
    FLOAT2MFCC(0.14),
    FLOAT2MFCC(-0.05),
    FLOAT2MFCC(0.25),
    FLOAT2MFCC(0.37),
    FLOAT2MFCC(0.58),
    FLOAT2MFCC(0.13),
    FLOAT2MFCC(-0.16),
    FLOAT2MFCC(0.17)
};

int
main(int argc, char *argv[])
{
    acmod_t *acmod;
    logmath_t *lmath;
    cmd_ln_t *config;
Beispiel #23
0
/**
 * For fixed point we are doing the computation in a fixlog domain,
 * so we have to add many processing cases.
 */
void
fe_track_snr(fe_t * fe, int32 *in_speech)
{
    powspec_t *signal;
    powspec_t *gain;
    noise_stats_t *noise_stats;
    powspec_t *mfspec;
    int32 i, num_filts;
    powspec_t lrt, snr;

    if (!(fe->remove_noise || fe->remove_silence)) {
        *in_speech = TRUE;
        return;
    }

    noise_stats = fe->noise_stats;
    mfspec = fe->mfspec;
    num_filts = noise_stats->num_filters;

    signal = (powspec_t *) ckd_calloc(num_filts, sizeof(powspec_t));

    if (noise_stats->undefined) {
        for (i = 0; i < num_filts; i++) {
            noise_stats->power[i] = mfspec[i];
            noise_stats->noise[i] = mfspec[i];
#ifndef FIXED_POINT
            noise_stats->floor[i] = mfspec[i] / noise_stats->max_gain;
            noise_stats->peak[i] = 0.0;       
#else
            noise_stats->floor[i] = mfspec[i] - noise_stats->max_gain;
            noise_stats->peak[i] = MIN_FIXLOG;
#endif
        }
        noise_stats->undefined = FALSE;
    }

    /* Calculate smoothed power */
    for (i = 0; i < num_filts; i++) {
#ifndef FIXED_POINT
        noise_stats->power[i] =
            noise_stats->lambda_power * noise_stats->power[i] + noise_stats->comp_lambda_power * mfspec[i];   
#else
        noise_stats->power[i] = fe_log_add(noise_stats->lambda_power + noise_stats->power[i],
            noise_stats->comp_lambda_power + mfspec[i]);
#endif
    }

    /* Noise estimation and vad decision */
    fe_lower_envelope(noise_stats, noise_stats->power, noise_stats->noise, num_filts);

    lrt = FLOAT2MFCC(-10.0);
    for (i = 0; i < num_filts; i++) {
#ifndef FIXED_POINT
        signal[i] = noise_stats->power[i] - noise_stats->noise[i];
            if (signal[i] < 0)
                signal[i] = 0;
        snr = log(noise_stats->power[i] / noise_stats->noise[i]);

#else
        signal[i] = fe_log_sub(noise_stats->power[i], noise_stats->noise[i]);
        snr = MFCC2FLOAT(noise_stats->power[i] - noise_stats->noise[i]);
#endif    
        if (snr > lrt)
            lrt = snr;
    }

    if (fe->remove_silence && (lrt < fe->vad_threshold))
        *in_speech = FALSE;
    else
	*in_speech = TRUE;

    fe_lower_envelope(noise_stats, signal, noise_stats->floor, num_filts);

    fe_temp_masking(noise_stats, signal, noise_stats->peak, num_filts);

    if (!fe->remove_noise) {
        //no need for further calculations if noise cancellation disabled
        ckd_free(signal);
        return;
    }

    for (i = 0; i < num_filts; i++) {
        if (signal[i] < noise_stats->floor[i])
            signal[i] = noise_stats->floor[i];
    }

    gain = (powspec_t *) ckd_calloc(num_filts, sizeof(powspec_t));
#ifndef FIXED_POINT
    for (i = 0; i < num_filts; i++) {
        if (signal[i] < noise_stats->max_gain * noise_stats->power[i])
            gain[i] = signal[i] / noise_stats->power[i];
        else
            gain[i] = noise_stats->max_gain;
        if (gain[i] < noise_stats->inv_max_gain)
            gain[i] = noise_stats->inv_max_gain;
    }
#else
    for (i = 0; i < num_filts; i++) {
        gain[i] = signal[i] - noise_stats->power[i];
        if (gain[i] > noise_stats->max_gain)
            gain[i] = noise_stats->max_gain;
        if (gain[i] < noise_stats->inv_max_gain)
            gain[i] = noise_stats->inv_max_gain;
    }
#endif

    /* Weight smoothing and time frequency normalization */
    fe_weight_smooth(noise_stats, mfspec, gain, num_filts);

    ckd_free(gain);
    ckd_free(signal);
}