Ejemplo n.º 1
0
static void
feat_s3_cepwin(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
{
    assert(fcb);
    assert(feat_n_stream(fcb) == 1);

    /* CEP */
    memcpy(feat[0], mfc[ -feat_window_size(fcb)], 
           (1 + 2 * feat_window_size (fcb)) * feat_cepsize(fcb) * sizeof(mfcc_t));
}
Ejemplo n.º 2
0
static void
feat_s3_cep_dcep(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
{
    mfcc_t *f;
    mfcc_t *w, *_w;
    int32 i;

    assert(fcb);
    assert(feat_n_stream(fcb) == 1);
    assert(feat_stream_len(fcb, 0) == feat_cepsize(fcb) * 2);
    assert(feat_window_size(fcb) == 2);

    /* CEP */
    memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t));

    /*
     * DCEP: mfc[2] - mfc[-2];
     */
    f = feat[0] + feat_cepsize(fcb);
    w = mfc[2];
    _w = mfc[-2];

    for (i = 0; i < feat_cepsize(fcb); i++)
        f[i] = w[i] - _w[i];
}
Ejemplo n.º 3
0
static int32
feat_s2mfc2feat_block_utt(feat_t * fcb, mfcc_t ** uttcep,
			  int32 nfr, mfcc_t *** ofeat)
{
    mfcc_t **cepbuf;
    int32 i, win, cepsize;

    win = feat_window_size(fcb);
    cepsize = feat_cepsize(fcb);

    /* Copy and pad out the utterance (this requires that the
     * feature computation functions always access the buffer via
     * the frame pointers, which they do)  */
    cepbuf = (mfcc_t**) ckd_calloc(nfr + win * 2, sizeof(mfcc_t *));
    memcpy(cepbuf + win, uttcep, nfr * sizeof(mfcc_t *));

    /* Do normalization before we interpolate on the boundary */    
    feat_cmn(fcb, cepbuf + win, nfr, 1, 1);
    feat_agc(fcb, cepbuf + win, nfr, 1, 1);

    /* Now interpolate */    
    for (i = 0; i < win; ++i) {
        cepbuf[i] = fcb->cepbuf[i];
        memcpy(cepbuf[i], uttcep[0], cepsize * sizeof(mfcc_t));
        cepbuf[nfr + win + i] = fcb->cepbuf[win + i];
        memcpy(cepbuf[nfr + win + i], uttcep[nfr - 1], cepsize * sizeof(mfcc_t));
    }
    /* Compute as usual. */
    feat_compute_utt(fcb, cepbuf, nfr + win * 2, win, ofeat);
    ckd_free(cepbuf);
    return nfr;
}
Ejemplo n.º 4
0
static void
feat_1s_c_d_ld_dd_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
{
    mfcc_t *f;
    mfcc_t *w, *_w;
    mfcc_t *w1, *w_1, *_w1, *_w_1;
    mfcc_t d1, d2;
    int32 i;

    assert(fcb);
    assert(feat_n_stream(fcb) == 1);
    assert(feat_stream_len(fcb, 0) == feat_cepsize(fcb) * 4);
    assert(feat_window_size(fcb) == FEAT_DCEP_WIN * 2);

    /* CEP */
    memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t));

    /*
     * DCEP: mfc[w] - mfc[-w], where w = FEAT_DCEP_WIN;
     */
    f = feat[0] + feat_cepsize(fcb);
    w = mfc[FEAT_DCEP_WIN];
    _w = mfc[-FEAT_DCEP_WIN];

    for (i = 0; i < feat_cepsize(fcb); i++)
        f[i] = w[i] - _w[i];

    /*
     * LDCEP: mfc[w] - mfc[-w], where w = FEAT_DCEP_WIN * 2;
     */
    f += feat_cepsize(fcb);
    w = mfc[FEAT_DCEP_WIN * 2];
    _w = mfc[-FEAT_DCEP_WIN * 2];

    for (i = 0; i < feat_cepsize(fcb); i++)
        f[i] = w[i] - _w[i];

    /* 
     * D2CEP: (mfc[w+1] - mfc[-w+1]) - (mfc[w-1] - mfc[-w-1]), 
     * where w = FEAT_DCEP_WIN 
     */
    f += feat_cepsize(fcb);

    w1 = mfc[FEAT_DCEP_WIN + 1];
    _w1 = mfc[-FEAT_DCEP_WIN + 1];
    w_1 = mfc[FEAT_DCEP_WIN - 1];
    _w_1 = mfc[-FEAT_DCEP_WIN - 1];

    for (i = 0; i < feat_cepsize(fcb); i++) {
        d1 = w1[i] - _w1[i];
        d2 = w_1[i] - _w_1[i];

        f[i] = d1 - d2;
    }
}
Ejemplo n.º 5
0
static void
feat_s3_1x39_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
{
    mfcc_t *f;
    mfcc_t *w, *_w;
    mfcc_t *w1, *w_1, *_w1, *_w_1;
    mfcc_t d1, d2;
    int32 i;

    assert(fcb);
    assert(feat_cepsize(fcb) == 13);
    assert(feat_n_stream(fcb) == 1);
    assert(feat_stream_len(fcb, 0) == 39);
    assert(feat_window_size(fcb) == 3);

    /* CEP; skip C0 */
    memcpy(feat[0], mfc[0] + 1, (feat_cepsize(fcb) - 1) * sizeof(mfcc_t));
    /*
     * DCEP: mfc[2] - mfc[-2];
     */
    f = feat[0] + feat_cepsize(fcb) - 1;
    w = mfc[2] + 1;             /* +1 to skip C0 */
    _w = mfc[-2] + 1;

    for (i = 0; i < feat_cepsize(fcb) - 1; i++)
        f[i] = w[i] - _w[i];

    /* POW: C0, DC0, D2C0 */
    f += feat_cepsize(fcb) - 1;

    f[0] = mfc[0][0];
    f[1] = mfc[2][0] - mfc[-2][0];

    d1 = mfc[3][0] - mfc[-1][0];
    d2 = mfc[1][0] - mfc[-3][0];
    f[2] = d1 - d2;

    /* D2CEP: (mfc[3] - mfc[-1]) - (mfc[1] - mfc[-3]) */
    f += 3;

    w1 = mfc[3] + 1;            /* Final +1 to skip C0 */
    _w1 = mfc[-1] + 1;
    w_1 = mfc[1] + 1;
    _w_1 = mfc[-3] + 1;

    for (i = 0; i < feat_cepsize(fcb) - 1; i++) {
        d1 = w1[i] - _w1[i];
        d2 = w_1[i] - _w_1[i];

        f[i] = d1 - d2;
    }
}
Ejemplo n.º 6
0
static void
feat_copy(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
{
    int32 win, i, j;

    win = feat_window_size(fcb);

    /* Concatenate input features */
    for (i = -win; i <= win; ++i) {
        uint32 spos = 0;

        for (j = 0; j < feat_n_stream(fcb); ++j) {
            uint32 stream_len;

            /* Unscale the stream length by the window. */
            stream_len = feat_stream_len(fcb, j) / (2 * win + 1);
            memcpy(feat[j] + ((i + win) * stream_len),
                   mfc[i] + spos,
                   stream_len * sizeof(mfcc_t));
            spos += stream_len;
        }
    }
}
Ejemplo n.º 7
0
feat_t *
feat_init(char const *type, cmn_type_t cmn, int32 varnorm,
          agc_type_t agc, int32 breport, int32 cepsize)
{
    feat_t *fcb;

    if (cepsize == 0)
        cepsize = 13;
    if (breport)
        E_INFO
            ("Initializing feature stream to type: '%s', ceplen=%d, CMN='%s', VARNORM='%s', AGC='%s'\n",
             type, cepsize, cmn_type_str[cmn], varnorm ? "yes" : "no", agc_type_str[agc]);

    fcb = (feat_t *) ckd_calloc(1, sizeof(feat_t));
    fcb->refcount = 1;
    fcb->name = (char *) ckd_salloc(type);
    if (strcmp(type, "s2_4x") == 0) {
        /* Sphinx-II format 4-stream feature (Hack!! hardwired constants below) */
        if (cepsize != 13) {
            E_ERROR("s2_4x features require cepsize == 13\n");
            ckd_free(fcb);
            return 0;
        }
        fcb->cepsize = 13;
        fcb->n_stream = 4;
        fcb->stream_len = (uint32 *) ckd_calloc(4, sizeof(uint32));
        fcb->stream_len[0] = 12;
        fcb->stream_len[1] = 24;
        fcb->stream_len[2] = 3;
        fcb->stream_len[3] = 12;
        fcb->out_dim = 51;
        fcb->window_size = 4;
        fcb->compute_feat = feat_s2_4x_cep2feat;
    }
    else if ((strcmp(type, "s3_1x39") == 0) || (strcmp(type, "1s_12c_12d_3p_12dd") == 0)) {
        /* 1-stream cep/dcep/pow/ddcep (Hack!! hardwired constants below) */
        if (cepsize != 13) {
            E_ERROR("s2_4x features require cepsize == 13\n");
            ckd_free(fcb);
            return 0;
        }
        fcb->cepsize = 13;
        fcb->n_stream = 1;
        fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32));
        fcb->stream_len[0] = 39;
        fcb->out_dim = 39;
        fcb->window_size = 3;
        fcb->compute_feat = feat_s3_1x39_cep2feat;
    }
    else if (strncmp(type, "1s_c_d_dd", 9) == 0) {
        fcb->cepsize = cepsize;
        fcb->n_stream = 1;
        fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32));
        fcb->stream_len[0] = cepsize * 3;
        fcb->out_dim = cepsize * 3;
        fcb->window_size = FEAT_DCEP_WIN + 1; /* ddcep needs the extra 1 */
        fcb->compute_feat = feat_1s_c_d_dd_cep2feat;
    }
    else if (strncmp(type, "1s_c_d_ld_dd", 12) == 0) {
        fcb->cepsize = cepsize;
        fcb->n_stream = 1;
        fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32));
        fcb->stream_len[0] = cepsize * 4;
        fcb->out_dim = cepsize * 4;
        fcb->window_size = FEAT_DCEP_WIN * 2;
        fcb->compute_feat = feat_1s_c_d_ld_dd_cep2feat;
    }
    else if (strncmp(type, "cep_dcep", 8) == 0 || strncmp(type, "1s_c_d", 6) == 0) {
        /* 1-stream cep/dcep */
        fcb->cepsize = cepsize;
        fcb->n_stream = 1;
        fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32));
        fcb->stream_len[0] = feat_cepsize(fcb) * 2;
        fcb->out_dim = fcb->stream_len[0];
        fcb->window_size = 2;
        fcb->compute_feat = feat_s3_cep_dcep;
    }
    else if (strncmp(type, "cep", 3) == 0 || strncmp(type, "1s_c", 4) == 0) {
        /* 1-stream cep */
        fcb->cepsize = cepsize;
        fcb->n_stream = 1;
        fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32));
        fcb->stream_len[0] = feat_cepsize(fcb);
        fcb->out_dim = fcb->stream_len[0];
        fcb->window_size = 0;
        fcb->compute_feat = feat_s3_cep;
    }
    else if (strncmp(type, "1s_3c", 5) == 0 || strncmp(type, "1s_4c", 5) == 0) {
	/* 1-stream cep with frames concatenated, so called cepwin features */
        if (strncmp(type, "1s_3c", 5) == 0)
            fcb->window_size = 3;
        else
    	    fcb->window_size = 4;

        fcb->cepsize = cepsize;
        fcb->n_stream = 1;
        fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32));
        fcb->stream_len[0] = feat_cepsize(fcb) * (2 * fcb->window_size + 1);
        fcb->out_dim = fcb->stream_len[0];
        fcb->compute_feat = feat_copy;
    }
    else {
        int32 i, l, k;
        char *strp;
        char *mtype = ckd_salloc(type);
        char *wd = ckd_salloc(type);
        /*
         * Generic definition: Format should be %d,%d,%d,...,%d (i.e.,
         * comma separated list of feature stream widths; #items =
         * #streams).  An optional window size (frames will be
         * concatenated) is also allowed, which can be specified with
         * a colon after the list of feature streams.
         */
        l = strlen(mtype);
        k = 0;
        for (i = 1; i < l - 1; i++) {
            if (mtype[i] == ',') {
                mtype[i] = ' ';
                k++;
            }
            else if (mtype[i] == ':') {
                mtype[i] = '\0';
                fcb->window_size = atoi(mtype + i + 1);
                break;
            }
        }
        k++;                    /* Presumably there are (#commas+1) streams */
        fcb->n_stream = k;
        fcb->stream_len = (uint32 *) ckd_calloc(k, sizeof(uint32));

        /* Scan individual feature stream lengths */
        strp = mtype;
        i = 0;
        fcb->out_dim = 0;
        fcb->cepsize = 0;

#ifndef POCKETSPHINX_NET
        while (sscanf(strp, "%s%n", wd, &l) == 1)
#else
        while (net_sscanf_word(strp, wd, &l) == 1)
#endif		
		{
            strp += l;
            if ((i >= fcb->n_stream)
                || 
#ifndef POCKETSPHINX_NET
				(sscanf(wd, "%d", &(fcb->stream_len[i])) != 1)
#else
				UInt32::TryParse(gcnew String(wd), fcb->stream_len[i])
#endif
                || (fcb->stream_len[i] <= 0))
                E_FATAL("Bad feature type argument\n");
            /* Input size before windowing */
            fcb->cepsize += fcb->stream_len[i];
            if (fcb->window_size > 0)
                fcb->stream_len[i] *= (fcb->window_size * 2 + 1);
            /* Output size after windowing */
            fcb->out_dim += fcb->stream_len[i];
            i++;
        }

        if (i != fcb->n_stream)
            E_FATAL("Bad feature type argument\n");
        if (fcb->cepsize != cepsize)
    	    E_FATAL("Bad feature type argument\n");

        /* Input is already the feature stream */
        fcb->compute_feat = feat_copy;
        ckd_free(mtype);
        ckd_free(wd);
    }

    if (cmn != CMN_NONE)
        fcb->cmn_struct = cmn_init(feat_cepsize(fcb));
    fcb->cmn = cmn;
    fcb->varnorm = varnorm;
    if (agc != AGC_NONE) {
        fcb->agc_struct = agc_init();
        /*
         * No need to check if agc is set to EMAX; agc_emax_set() changes only emax related things
         * Moreover, if agc is not NONE and block mode is used, feat_agc() SILENTLY
         * switches to EMAX
         */
        /* HACK: hardwired initial estimates based on use of CMN (from Sphinx2) */
        agc_emax_set(fcb->agc_struct, (cmn != CMN_NONE) ? 5.0 : 10.0);
    }
    fcb->agc = agc;
    /*
     * Make sure this buffer is large enough to be used in feat_s2mfc2feat_block_utt()
     */
    fcb->cepbuf = (mfcc_t **) ckd_calloc_2d((LIVEBUFBLOCKSIZE < feat_window_size(fcb) * 2) ? feat_window_size(fcb) * 2 : LIVEBUFBLOCKSIZE,
                                            feat_cepsize(fcb),
                                            sizeof(mfcc_t));
    /* This one is actually just an array of pointers to "flatten out"
     * wraparounds. */
    fcb->tmpcepbuf = (mfcc_t**)ckd_calloc(2 * feat_window_size(fcb) + 1,
                                sizeof(*fcb->tmpcepbuf));

    return fcb;
}
Ejemplo n.º 8
0
static void
feat_s2_4x_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
{
    mfcc_t *f;
    mfcc_t *w, *_w;
    mfcc_t *w1, *w_1, *_w1, *_w_1;
    mfcc_t d1, d2;
    int32 i, j;

    assert(fcb);
    assert(feat_cepsize(fcb) == 13);
    assert(feat_n_stream(fcb) == 4);
    assert(feat_stream_len(fcb, 0) == 12);
    assert(feat_stream_len(fcb, 1) == 24);
    assert(feat_stream_len(fcb, 2) == 3);
    assert(feat_stream_len(fcb, 3) == 12);
    assert(feat_window_size(fcb) == 4);

    /* CEP; skip C0 */
    memcpy(feat[0], mfc[0] + 1, (feat_cepsize(fcb) - 1) * sizeof(mfcc_t));

    /*
     * DCEP(SHORT): mfc[2] - mfc[-2]
     * DCEP(LONG):  mfc[4] - mfc[-4]
     */
    w = mfc[2] + 1;             /* +1 to skip C0 */
    _w = mfc[-2] + 1;

    f = feat[1];
    for (i = 0; i < feat_cepsize(fcb) - 1; i++) /* Short-term */
        f[i] = w[i] - _w[i];

    w = mfc[4] + 1;             /* +1 to skip C0 */
    _w = mfc[-4] + 1;

    for (j = 0; j < feat_cepsize(fcb) - 1; i++, j++)    /* Long-term */
        f[i] = w[j] - _w[j];

    /* D2CEP: (mfc[3] - mfc[-1]) - (mfc[1] - mfc[-3]) */
    w1 = mfc[3] + 1;            /* Final +1 to skip C0 */
    _w1 = mfc[-1] + 1;
    w_1 = mfc[1] + 1;
    _w_1 = mfc[-3] + 1;

    f = feat[3];
    for (i = 0; i < feat_cepsize(fcb) - 1; i++) {
        d1 = w1[i] - _w1[i];
        d2 = w_1[i] - _w_1[i];

        f[i] = d1 - d2;
    }

    /* POW: C0, DC0, D2C0; differences computed as above for rest of cep */
    f = feat[2];
    f[0] = mfc[0][0];
    f[1] = mfc[2][0] - mfc[-2][0];

    d1 = mfc[3][0] - mfc[-1][0];
    d2 = mfc[1][0] - mfc[-3][0];
    f[2] = d1 - d2;
}
Ejemplo n.º 9
0
int32
feat_s2mfc2feat_live(feat_t * fcb, mfcc_t ** uttcep, int32 *inout_ncep,
		     int32 beginutt, int32 endutt, mfcc_t *** ofeat)
{
    int32 win, cepsize, nbufcep;
    int32 i, j, nfeatvec;
    int32 zero = 0;

    /* Avoid having to check this everywhere. */
    if (inout_ncep == 0) inout_ncep = &zero;

    /* Special case for entire utterances. */
    if (beginutt && endutt && *inout_ncep > 0)
        return feat_s2mfc2feat_block_utt(fcb, uttcep, *inout_ncep, ofeat);

    win = feat_window_size(fcb);
    cepsize = feat_cepsize(fcb);

    /* Empty the input buffer on start of utterance. */
    if (beginutt)
        fcb->bufpos = fcb->curpos;

    /* Calculate how much data is in the buffer already. */
    nbufcep = fcb->bufpos - fcb->curpos;
    if (nbufcep < 0)
	nbufcep = fcb->bufpos + LIVEBUFBLOCKSIZE - fcb->curpos;
    /* Add any data that we have to replicate. */
    if (beginutt && *inout_ncep > 0)
        nbufcep += win;
    if (endutt)
        nbufcep += win;

    /* Only consume as much input as will fit in the buffer. */
    if (nbufcep + *inout_ncep > LIVEBUFBLOCKSIZE) {
        /* We also can't overwrite the trailing window, hence the
         * reason why win is subtracted here. */
        *inout_ncep = LIVEBUFBLOCKSIZE - nbufcep - win;
        /* Cancel end of utterance processing. */
        endutt = FALSE;
    }

    /* FIXME: Don't modify the input! */
    feat_cmn(fcb, uttcep, *inout_ncep, beginutt, endutt);
    feat_agc(fcb, uttcep, *inout_ncep, beginutt, endutt);

    /* Replicate first frame into the first win frames if we're at the
     * beginning of the utterance and there was some actual input to
     * deal with.  (FIXME: Not entirely sure why that condition) */
    if (beginutt && *inout_ncep > 0) {
        for (i = 0; i < win; i++) {
            memcpy(fcb->cepbuf[fcb->bufpos++], uttcep[0],
                   cepsize * sizeof(mfcc_t));
            fcb->bufpos %= LIVEBUFBLOCKSIZE;
        }
        /* Move the current pointer past this data. */
        fcb->curpos = fcb->bufpos;
        nbufcep -= win;
    }

    /* Copy in frame data to the circular buffer. */
    for (i = 0; i < *inout_ncep; ++i) {
        memcpy(fcb->cepbuf[fcb->bufpos++], uttcep[i],
               cepsize * sizeof(mfcc_t));
        fcb->bufpos %= LIVEBUFBLOCKSIZE;
	++nbufcep;
    }

    /* Replicate last frame into the last win frames if we're at the
     * end of the utterance (even if there was no input, so we can
     * flush the output). */
    if (endutt) {
        int32 tpos; /* Index of last input frame. */
        if (fcb->bufpos == 0)
            tpos = LIVEBUFBLOCKSIZE - 1;
        else
            tpos = fcb->bufpos - 1;
        for (i = 0; i < win; ++i) {
            memcpy(fcb->cepbuf[fcb->bufpos++], fcb->cepbuf[tpos],
                   cepsize * sizeof(mfcc_t));
            fcb->bufpos %= LIVEBUFBLOCKSIZE;
        }
    }

    /* We have to leave the trailing window of frames. */
    nfeatvec = nbufcep - win;
    if (nfeatvec <= 0)
        return 0; /* Do nothing. */

    for (i = 0; i < nfeatvec; ++i) {
        /* Handle wraparound cases. */
        if (fcb->curpos - win < 0 || fcb->curpos + win >= LIVEBUFBLOCKSIZE) {
            /* Use tmpcepbuf for this case.  Actually, we just need the pointers. */
            for (j = -win; j <= win; ++j) {
                int32 tmppos =
                    (fcb->curpos + j + LIVEBUFBLOCKSIZE) % LIVEBUFBLOCKSIZE;
		fcb->tmpcepbuf[win + j] = fcb->cepbuf[tmppos];
            }
            fcb->compute_feat(fcb, fcb->tmpcepbuf + win, ofeat[i]);
        }
        else {
            fcb->compute_feat(fcb, fcb->cepbuf + fcb->curpos, ofeat[i]);
        }
	/* Move the read pointer forward. */
        ++fcb->curpos;
        fcb->curpos %= LIVEBUFBLOCKSIZE;
    }

    if (fcb->lda)
        feat_lda_transform(fcb, ofeat, nfeatvec);

    if (fcb->subvecs)
        feat_subvec_project(fcb, ofeat, nfeatvec);

    return nfeatvec;
}
Ejemplo n.º 10
0
int32
feat_s2mfc2feat(feat_t * fcb, const char *file, const char *dir, const char *cepext,
                int32 sf, int32 ef, mfcc_t *** feat, int32 maxfr)
{
    char *path;
    char *ps = "/";
    int32 win, nfr;
    int32 file_length, cepext_length, path_length = 0;
    mfcc_t **mfc;

    if (fcb->cepsize <= 0) {
        E_ERROR("Bad cepsize: %d\n", fcb->cepsize);
        return -1;
    }

    if (cepext == 0)
        cepext = "";

    /*
     * Create mfc filename, combining file, dir and extension if
     * necessary
     */

    /*
     * First we decide about the path. If dir is defined, then use
     * it. Otherwise assume the filename already contains the path.
     */
    if (dir == 0) {
        dir = "";
        ps = "";
        /*
         * This is not true but some 3rd party apps
         * may parse the output explicitly checking for this line
         */
        E_INFO("At directory . (current directory)\n");
    }
    else {
        E_INFO("At directory %s\n", dir);
        /*
         * Do not forget the path separator!
         */
        path_length += strlen(dir) + 1;
    }

    /*
     * Include cepext, if it's not already part of the filename.
     */
    file_length = strlen(file);
    cepext_length = strlen(cepext);
    if ((file_length > cepext_length)
        && (strcmp(file + file_length - cepext_length, cepext) == 0)) {
        cepext = "";
        cepext_length = 0;
    }

    /*
     * Do not forget the '\0'
     */
    path_length += file_length + cepext_length + 1;
    path = (char*) ckd_calloc(path_length, sizeof(char));

#ifdef HAVE_SNPRINTF
    /*
     * Paranoia is our best friend...
     */
    while ((file_length = snprintf(path, path_length, "%s%s%s%s", dir, ps, file, cepext)) > path_length) {
        path_length = file_length;
        path = (char*) ckd_realloc(path, path_length * sizeof(char));
    }
#else
#ifndef POCKETSPHINX_NET
    sprintf(path, "%s%s%s%s", dir, ps, file, cepext);
#else
	strcpy(path,dir);
	strcat(path,ps);
	strcat(path,file);
	strcat(path,cepext);
#endif
#endif

    win = feat_window_size(fcb);
    /* Pad maxfr with win, so we read enough raw feature data to
     * calculate the requisite number of dynamic features. */
    if (maxfr >= 0)
        maxfr += win * 2;

    if (feat != 0) {
        /* Read mfc file including window or padding if necessary. */
        nfr = feat_s2mfc_read_norm_pad(fcb, path, win, sf, ef, &mfc, maxfr, fcb->cepsize);
        ckd_free(path);
        if (nfr < 0) {
            ckd_free_2d((void **) mfc);
            return -1;
        }

        /* Actually compute the features */
        feat_compute_utt(fcb, mfc, nfr, win, feat);
        
        ckd_free_2d((void **) mfc);
    }
    else {
        /* Just calculate the number of frames we would need. */
        nfr = feat_s2mfc_read_norm_pad(fcb, path, win, sf, ef, 0, maxfr, fcb->cepsize);
        ckd_free(path);
        if (nfr < 0)
            return nfr;
    }


    return (nfr - win * 2);
}
Ejemplo n.º 11
0
int
acmod_process_cep(acmod_t *acmod,
                  mfcc_t ***inout_cep,
                  int *inout_n_frames,
                  int full_utt)
{
    int32 nfeat, ncep, inptr;
    int orig_n_frames;

    /* If this is a full utterance, process it all at once. */
    if (full_utt)
        return acmod_process_full_cep(acmod, inout_cep, inout_n_frames);

    /* Write to log file. */
    if (acmod->mfcfh)
        acmod_log_mfc(acmod, *inout_cep, *inout_n_frames);

    /* Maximum number of frames we're going to generate. */
    orig_n_frames = ncep = nfeat = *inout_n_frames;

    /* FIXME: This behaviour isn't guaranteed... */
    if (acmod->state == ACMOD_ENDED)
        nfeat += feat_window_size(acmod->fcb);
    else if (acmod->state == ACMOD_STARTED)
        nfeat -= feat_window_size(acmod->fcb);

    /* Clamp number of features to fit available space. */
    if (nfeat > acmod->n_feat_alloc - acmod->n_feat_frame) {
        /* Grow it as needed - we have to grow it at the end of an
         * utterance because we can't return a short read there. */
        if (acmod->grow_feat || acmod->state == ACMOD_ENDED)
            acmod_grow_feat_buf(acmod, acmod->n_feat_alloc + nfeat);
        else
            ncep -= (nfeat - (acmod->n_feat_alloc - acmod->n_feat_frame));
    }

    /* Where to start writing in the feature buffer. */
    if (acmod->grow_feat) {
        /* Grow to avoid wraparound if grow_feat == TRUE. */
        inptr = acmod->feat_outidx + acmod->n_feat_frame;
        while (inptr + nfeat >= acmod->n_feat_alloc)
            acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2);
    }
    else {
        inptr = (acmod->feat_outidx + acmod->n_feat_frame) % acmod->n_feat_alloc;
    }


    /* FIXME: we can't split the last frame drop properly to be on the bounary,
     *        so just return
     */
    if (inptr + nfeat > acmod->n_feat_alloc && acmod->state == ACMOD_ENDED) {
        *inout_n_frames -= ncep;
        *inout_cep += ncep;
        return 0;
    }

    /* Write them in two parts if there is wraparound. */
    if (inptr + nfeat > acmod->n_feat_alloc) {
        int32 ncep1 = acmod->n_feat_alloc - inptr;

        /* Make sure we don't end the utterance here. */
        nfeat = feat_s2mfc2feat_live(acmod->fcb, *inout_cep,
                                     &ncep1,
                                     (acmod->state == ACMOD_STARTED),
                                     FALSE,
                                     acmod->feat_buf + inptr);
        if (nfeat < 0)
            return -1;
        /* Move the output feature pointer forward. */
        acmod->n_feat_frame += nfeat;
        assert(acmod->n_feat_frame <= acmod->n_feat_alloc);
        inptr += nfeat;
        inptr %= acmod->n_feat_alloc;
        /* Move the input feature pointers forward. */
        *inout_n_frames -= ncep1;
        *inout_cep += ncep1;
        ncep -= ncep1;
    }

    nfeat = feat_s2mfc2feat_live(acmod->fcb, *inout_cep,
                                 &ncep,
                                 (acmod->state == ACMOD_STARTED),
                                 (acmod->state == ACMOD_ENDED),
                                 acmod->feat_buf + inptr);
    if (nfeat < 0)
        return -1;
    acmod->n_feat_frame += nfeat;
    assert(acmod->n_feat_frame <= acmod->n_feat_alloc);
    /* Move the input feature pointers forward. */
    *inout_n_frames -= ncep;
    *inout_cep += ncep;
    if (acmod->state == ACMOD_STARTED)
        acmod->state = ACMOD_PROCESSING;
    
    return orig_n_frames - *inout_n_frames;
}
Ejemplo n.º 12
0
int
agg_phn_seg(lexicon_t *lex,
	    acmod_set_t *acmod_set,
	    feat_t *fcb,
	    segdmp_type_t type)
{
    uint16 *seg;
    vector_t *mfcc;
    vector_t **feat;
    int32 n_frame;
    uint32 tick_cnt;

    acmod_id_t *phone;
    uint32 *start;
    uint32 *len;
    uint32 n_phone;
    uint32 s;
    char *btw_mark;

    char *trans;
    char **word;
    uint32 n_word;
    int32 mfc_veclen = cmd_ln_int32("-ceplen");

    uint32 n_stream;
    uint32 *veclen;

    tick_cnt = 0;

    n_stream = feat_dimension1(fcb);
    veclen = feat_stream_lengths(fcb);

    while (corpus_next_utt()) {
	if ((++tick_cnt % 500) == 0) {
	    E_INFOCONT("[%u] ", tick_cnt);
	}

	if (corpus_get_sent(&trans) != S3_SUCCESS) {
	    E_FATAL("Unable to read word transcript for %s\n", corpus_utt_brief_name());
	}

	if (corpus_get_seg(&seg, &n_frame) != S3_SUCCESS) {
	    E_FATAL("Unable to read Viterbi state segmentation for %s\n", corpus_utt_brief_name());
	}
	    
	n_word = str2words(trans, NULL, 0);
	word = ckd_calloc(n_word, sizeof(char*));
	str2words(trans, word, n_word);

	phone = mk_phone_list(&btw_mark, &n_phone, word, n_word, lex);
	start = ckd_calloc(n_phone, sizeof(uint32));
	len = ckd_calloc(n_phone, sizeof(uint32));

	/* check to see whether the word transcript and dictionary entries
	   agree with the state segmentation */
	if (ck_seg(acmod_set, phone, n_phone, seg, n_frame, corpus_utt()) != S3_SUCCESS) {
	    free(trans);	/* alloc'ed using strdup, not ckd_*() */
	    free(seg);	/* alloc'ed using malloc in areadshort(), not ckd_*() */
	    ckd_free(word);
	    ckd_free(phone);
	    
	    E_ERROR("ck_seg failed");

	    continue;
	}

	if (cvt2triphone(acmod_set, phone, btw_mark, n_phone) != S3_SUCCESS) {
	    free(trans);	/* alloc'ed using strdup, not ckd_*() */
	    free(seg);		/* alloc'ed using malloc in areadshort(), not ckd_*() */
	    ckd_free(word);
	    ckd_free(phone);

	    E_ERROR("cvt2triphone failed");
	    
	    continue;
	}

	ckd_free(btw_mark);

	if (mk_seg(acmod_set,
		   seg,
		   n_frame,
		   phone,
		   start,
		   len,
		   n_phone) != S3_SUCCESS) {
	    free(trans);
	    free(seg);
	    ckd_free(word);
	    ckd_free(phone);

	    E_ERROR("mk_seg failed");
	    continue;
	}
	
	if (corpus_provides_mfcc()) {
    	        if (corpus_get_generic_featurevec(&mfcc, &n_frame, mfc_veclen) < 0) {
		      E_FATAL("Can't read input features from %s\n", corpus_utt());
		}
		
		if (n_frame < 9) {
		  E_WARN("utt %s too short\n", corpus_utt());
		  if (mfcc) {
		    ckd_free(mfcc[0]);
		    ckd_free(mfcc);
		    mfcc = NULL;
		  }
		  continue;
		}

		feat = feat_array_alloc(fcb, n_frame + feat_window_size(fcb));
	        feat_s2mfc2feat_live(fcb, mfcc, &n_frame, TRUE, TRUE, feat);

		for (s = 0; s < n_phone; s++) {
		    segdmp_add_feat(phone[s],
				    &feat[start[s]],
				    len[s]);
		}

		feat_array_free(feat);
		free(&mfcc[0][0]);
		ckd_free(mfcc);
	}
	else {
	    E_FATAL("No data type specified\n");
	}

	free(trans);	/* alloc'ed using strdup, not ckd_*() */
	free(seg);	/* alloc'ed using malloc in areadshort(), not ckd_*() */
	ckd_free(word);
	ckd_free(phone);
	ckd_free(start);
	ckd_free(len);
    }

    return 0;
}
/*
 * Find Viterbi alignment.
 */
static void align_utt (char *sent,	/* In: Reference transcript */
		       float32 **mfc,	/* In: MFC cepstra for input utterance */
		       int32 nfr,	/* In: #frames of input */
		       char *ctlspec,	/* In: Utt specifiction from control file */
		       char *uttid)	/* In: Utterance id, for logging and other use */
{
    static float32 **feat = NULL;
    static int32 w;
    static int32 topn;
    static gauden_dist_t ***dist;
    static int32 *senscr;
    static s3senid_t *sen_active;
    static int8 *mgau_active;
    static char *s2stsegdir;
    static char *stsegdir;
    static char *phsegdir;
    static char *wdsegdir;
    
    int32 i, s, sid, gid, n_sen_active, best;
    char *arg;
    align_stseg_t *stseg;
    align_phseg_t *phseg;
    align_wdseg_t *wdseg;

    if (! feat) {
	/* One-time allocation of necessary intermediate variables */

	/* Allocate space for a feature vector */
	feat = (float32 **) ckd_calloc (n_feat, sizeof(float32 *));
	for (i = 0; i < n_feat; i++)
	    feat[i] = (float32 *) ckd_calloc (featlen[i], sizeof(float32));
	
	/* Allocate space for top-N codeword density values in a codebook */
	w = feat_window_size ();	/* #MFC vectors needed on either side of current
					   frame to compute one feature vector */
	topn = *((int32 *) cmd_ln_access("-topn"));
	if (topn > g->n_density) {
	    E_ERROR("-topn argument (%d) > #density codewords (%d); set to latter\n",
		   topn, g->n_density);
	    topn = g->n_density;
	}
	dist = (gauden_dist_t ***) ckd_calloc_3d (g->n_mgau, n_feat, topn,
						  sizeof(gauden_dist_t));
	
	/* Space for one frame of senone scores, and per frame active flags */
	senscr = (int32 *) ckd_calloc (sen->n_sen, sizeof(int32));
	sen_active = (s3senid_t *) ckd_calloc (sen->n_sen, sizeof(s3senid_t));
	mgau_active = (int8 *) ckd_calloc (g->n_mgau, sizeof(int8));

	/* Note various output directories */
	s2stsegdir = NULL;
	stsegdir = NULL;
	phsegdir = NULL;
	wdsegdir = NULL;
	if ((arg = (char *) cmd_ln_access ("-s2stsegdir")) != NULL)
	    s2stsegdir = (char *) ckd_salloc (arg);
	if ((arg = (char *) cmd_ln_access ("-stsegdir")) != NULL)
	    stsegdir = (char *) ckd_salloc (arg);
	if ((arg = (char *) cmd_ln_access ("-phsegdir")) != NULL)
	    phsegdir = (char *) ckd_salloc (arg);
	if ((arg = (char *) cmd_ln_access ("-wdsegdir")) != NULL)
	    wdsegdir = (char *) ckd_salloc (arg);
    }
    
/* HACK HACKA HACK BHIKSHA 
    if (nfr <= (w<<1)) {
	E_ERROR("Utterance %s < %d frames (%d); ignored\n", uttid, (w<<1)+1, nfr);
	return;
    }
 END HACK HACKA HACK */
    
    cyctimer_reset_all ();
    counter_reset_all ();
    
    timing_reset (tm_utt);
    timing_start (tm_utt);
    cyctimer_resume (tmr_utt);

    /* AGC and CMN */
    arg = (char *) cmd_ln_access ("-cmn");
    if (strcmp (arg, "current") == 0)
	norm_mean (mfc-4, nfr+8, cepsize); /* -4 HACKA HACK */
    arg = (char *) cmd_ln_access ("-agc");
    if (strcmp (arg, "max") == 0)
	agc_max (mfc, nfr);
    
    if (align_build_sent_hmm (sent) != 0) {
	align_destroy_sent_hmm ();
	cyctimer_pause (tmr_utt);

	E_ERROR("No sentence HMM; no alignment for %s\n", uttid);
	
	return;
    }
    
    align_start_utt (uttid);
    
    /*
     * A feature vector for frame f depends on input MFC vectors [f-w..f+w].  Hence
     * the feature vector corresponding to the first w and last w input frames is
     * undefined.  We define them by simply replicating the first and last true
     * feature vectors (presumably silence regions).
     */
    for (i = 0; i < nfr; i++) {
	cyctimer_resume (tmr_utt);
	
	/* Compute feature vector for current frame from input speech cepstra */
/* HACK HACKA HACK BHIKSHA 
	if (i < w)
	    feat_cep2feat (mfc+w, feat);
	else if (i >= nfr-w)
	    feat_cep2feat (mfc+(nfr-w-1), feat);
	else
END HACK HACKA HACK */
	    feat_cep2feat (mfc+i, feat);

	/*
	 * Evaluate gaussian density codebooks and senone scores for input codeword.
	 * Evaluate only active codebooks and senones.
	 */
	/* Obtain active senone flags */
	cyctimer_resume (tmr_senone);
	align_sen_active (sen_active, sen->n_sen);
	/* Flag all CI senones to active if interpolating */
	if (interp) {
	    for (s = 0; s < mdef->n_ci_sen; s++)
		sen_active[s] = 1;
	}
	/* Turn active flags into list (for faster access) */
	n_sen_active = 0;
	for (s = 0; s < mdef->n_sen; s++) {
	    if (sen_active[s])
		sen_active[n_sen_active++] = s;
	}
	cyctimer_pause (tmr_senone);
	
	/* Flag all active mixture-gaussian codebooks */
	cyctimer_resume (tmr_gauden);
	for (gid = 0; gid < g->n_mgau; gid++)
	    mgau_active[gid] = 0;
	for (s = 0; s < n_sen_active; s++) {
	    sid = sen_active[s];
	    mgau_active[sen->mgau[sid]] = 1;
	}
	
	/* Compute topn gaussian density values (for active codebooks) */
	for (gid = 0; gid < g->n_mgau; gid++)
	    if (mgau_active[gid])
		gauden_dist (g, gid, topn, feat, dist[gid]);
	cyctimer_pause (tmr_gauden);
	
	/* Evaluate active senones */
	cyctimer_resume (tmr_senone);
	best = (int32) 0x80000000;
	for (s = 0; s < n_sen_active; s++) {
	    sid = sen_active[s];
	    senscr[sid] = senone_eval (sen, sid, dist[sen->mgau[sid]], topn);
	    if (best < senscr[sid])
		best = senscr[sid];
	}
	if (interp) {
	    for (s = 0; s < n_sen_active; s++) {
		if ((sid = sen_active[s]) >= mdef->n_ci_sen)
		    interp_cd_ci (interp, senscr, sid, mdef->cd2cisen[sid]);
	    }
	}
	
	/* Normalize senone scores (interpolation above can only lower best score) */
	for (s = 0; s < n_sen_active; s++) {
	    sid = sen_active[s];
	    senscr[sid] -= best;
	}
	senscale[i] = best;
	cyctimer_pause (tmr_senone);
	
	/* Step alignment one frame forward */
	cyctimer_resume (tmr_align);
	align_frame (senscr);
	cyctimer_pause (tmr_align);
	
	cyctimer_pause (tmr_utt);
    }
    timing_stop (tm_utt);

    printf ("\n");

    /* Wind up alignment for this utterance */
    if (align_end_utt (&stseg, &phseg, &wdseg) < 0)
	E_ERROR("Final state not reached; no alignment for %s\n\n", uttid);
    else {
	if (s2stsegdir)
	    write_s2stseg (s2stsegdir, stseg, uttid, ctlspec);
	if (stsegdir)
	    write_stseg (stsegdir, stseg, uttid, ctlspec);
	if (phsegdir)
	    write_phseg (phsegdir, phseg, uttid, ctlspec);
	if (wdsegdir)
	    write_wdseg (wdsegdir, wdseg, uttid, ctlspec);
	if (outsentfp)
	    write_outsent (outsentfp, wdseg, uttid);
    }
    
    align_destroy_sent_hmm ();
    
    cyctimer_print_all_norm (stdout, nfr*0.01, tmr_utt);
    counter_print_all (stdout);

    printf("EXECTIME: %5d frames, %7.2f sec CPU, %6.2f xRT; %7.2f sec elapsed, %6.2f xRT\n",
	   nfr,
	   tm_utt->t_cpu, tm_utt->t_cpu * 100.0 / nfr,
	   tm_utt->t_elapsed, tm_utt->t_elapsed * 100.0 / nfr);

    tot_nfr += nfr;
}
Ejemplo n.º 14
0
/*
 * Find Viterbi alignment.
 */
static void
align_utt(char *sent,           /* In: Reference transcript */
          int32 nfr,            /* In: #frames of input */
          char *ctlspec,        /* In: Utt specifiction from control file */
          char *uttid)
{                               /* In: Utterance id, for logging and other use */
    int32 i;
    align_stseg_t *stseg;
    align_phseg_t *phseg;
    align_wdseg_t *wdseg;
    int32 w;

    w = feat_window_size(kbcore_fcb(kbc));  /* #MFC vectors needed on either side of current
                                   frame to compute one feature vector */
    if (nfr <= (w << 1)) {
        E_ERROR("Utterance %s < %d frames (%d); ignored\n", uttid,
                (w << 1) + 1, nfr);
        return;
    }

    ptmr_reset_all(timers);

    ptmr_reset(&tm_utt);
    ptmr_start(&tm_utt);
    ptmr_reset(&tm_ovrhd);
    ptmr_start(&tm_ovrhd);
    ptmr_start(timers + tmr_utt);


    if (align_build_sent_hmm(sent, cmd_ln_int32_r(kbc->config, "-insert_sil")) != 0) {
        align_destroy_sent_hmm();
        ptmr_stop(timers + tmr_utt);

        E_ERROR("No sentence HMM; no alignment for %s\n", uttid);

        return;
    }

    align_start_utt(uttid);

    for (i = 0; i < nfr; i++) {
        ptmr_start(timers + tmr_utt);

        /* Obtain active senone flags */
        ptmr_start(timers + tmr_gauden);
        ptmr_start(timers + tmr_senone);

        align_sen_active(ascr->sen_active, ascr->n_sen);

        /* Bah, there ought to be a function for this. */
        if (kbc->ms_mgau) {
            ms_cont_mgau_frame_eval(ascr,
				    kbc->ms_mgau,
				    kbc->mdef, feat[i], i);
        }
        else if (kbc->s2_mgau) {
            s2_semi_mgau_frame_eval(kbc->s2_mgau,
				    ascr, fastgmm, feat[i],
				    i);
        }
        else if (kbc->mgau) {
            approx_cont_mgau_ci_eval(kbcore_svq(kbc),
                                     kbcore_gs(kbc),
                                     kbcore_mgau(kbc),
                                     fastgmm,
                                     kbc->mdef,
                                     feat[i][0],
                                     ascr->cache_ci_senscr[0],
                                     &(ascr->cache_best_list[0]), i,
                                     kbcore_logmath(kbc));
            approx_cont_mgau_frame_eval(kbcore_mdef(kbc),
					kbcore_svq(kbc),
					kbcore_gs(kbc),
					kbcore_mgau(kbc),
					fastgmm, ascr,
					feat[i][0], i,
					ascr->
					cache_ci_senscr[0],
					&tm_ovrhd,
					kbcore_logmath(kbc));
        }

        ptmr_stop(timers + tmr_gauden);
        ptmr_stop(timers + tmr_senone);

        /* Step alignment one frame forward */
        ptmr_start(timers + tmr_align);
        align_frame(ascr->senscr);
        ptmr_stop(timers + tmr_align);
        ptmr_stop(timers + tmr_utt);
    }
    ptmr_stop(&tm_utt);
    ptmr_stop(&tm_ovrhd);

    printf("\n");

    /* Wind up alignment for this utterance */
    if (align_end_utt(&stseg, &phseg, &wdseg) < 0)
        E_ERROR("Final state not reached; no alignment for %s\n\n", uttid);
    else {
        if (s2stsegdir)
            write_s2stseg(s2stsegdir, stseg, uttid, ctlspec, cmd_ln_boolean_r(kbc->config, "-s2cdsen"));
        if (stsegdir)
            write_stseg(stsegdir, stseg, uttid, ctlspec);
        if (phsegdir)
            write_phseg(phsegdir, phseg, uttid, ctlspec);
        if (phlabdir)
            write_phlab(phlabdir, phseg, uttid, ctlspec, cmd_ln_int32_r(kbc->config, "-frate"));
        if (wdsegdir)
            write_wdseg(wdsegdir, wdseg, uttid, ctlspec);
        if (outsentfp)
            write_outsent(outsentfp, wdseg, uttid);
        if (outctlfp)
            write_outctl(outctlfp, ctlspec);
    }

    align_destroy_sent_hmm();

    ptmr_print_all(stdout, timers, nfr * 0.1);

    printf
        ("EXECTIME: %5d frames, %7.2f sec CPU, %6.2f xRT; %7.2f sec elapsed, %6.2f xRT\n",
         nfr, tm_utt.t_cpu, tm_utt.t_cpu * 100.0 / nfr, tm_utt.t_elapsed,
         tm_utt.t_elapsed * 100.0 / nfr);

    tot_nfr += nfr;
}
Ejemplo n.º 15
0
int
agg_all_seg(feat_t *fcb,
	    segdmp_type_t type,
	    const char *fn,
	    uint32 stride)
{
    uint32 seq_no;
    vector_t *mfcc = NULL;
    uint32 mfc_veclen = cmd_ln_int32("-ceplen");
    uint32 n_frame;
    uint32 n_out_frame;
    uint32 blksz=0;
    vector_t **feat = NULL;
    uint32 i, j;
    uint32 t;
    uint32 n_stream;
    const uint32 *veclen;
    FILE *fp;
    uint32 ignore = 0;
    long start;
    int32 no_retries=0;
    
    n_stream = feat_dimension1(fcb);
    veclen = feat_stream_lengths(fcb);
    for (i = 0, blksz = 0; i < n_stream; i++)
        blksz += veclen[i];

    fp = open_dmp(fn);

    start = ftell(fp);

    if (s3write(&i, sizeof(uint32), 1, fp, &ignore) != 1) {
	E_ERROR_SYSTEM("Unable to write to dmp file");

	return S3_ERROR;
    }

    for (seq_no = corpus_get_begin(), j = 0, n_out_frame = 0;
	 corpus_next_utt(); seq_no++) {
	    if (mfcc) {
		free(mfcc[0]);
		ckd_free(mfcc);

		mfcc = NULL;
	    }

	    /* get the MFCC data for the utterance */
	    if (corpus_get_generic_featurevec(&mfcc, &n_frame, mfc_veclen) < 0) {
	      E_FATAL("Can't read input features from %s\n", corpus_utt());
	    }

	if ((seq_no % 1000) == 0) {
	    E_INFO("[%u]\n", seq_no);
	}

	    if (feat) {
		feat_array_free(feat);
		feat = NULL;
	    }
	    
	    if (n_frame < 9) {
	      E_WARN("utt %s too short\n", corpus_utt());
	      if (mfcc) {
		ckd_free(mfcc[0]);
		ckd_free(mfcc);
		mfcc = NULL;
	      }
	      continue;
	    }

	    feat = feat_array_alloc(fcb, n_frame + feat_window_size(fcb));
	    feat_s2mfc2feat_live(fcb, mfcc, &n_frame, TRUE, TRUE, feat);

	    for (t = 0; t < n_frame; t++, j++) {
		if ((j % stride) == 0) {
		    while (s3write(&feat[t][0][0],
				   sizeof(float32),
				   blksz,
				   fp, &ignore) != blksz) {
			static int rpt = 0;

			if (!rpt) {
			    E_ERROR_SYSTEM("Unable to write to dmp file");
			    E_INFO("sleeping...\n");
			    no_retries++;
			}
			sleep(3);

			if(no_retries > 10){
			  E_FATAL("Failed to write to a dmp file after 10 retries of getting MFCC(about 30 seconds)\n ");
			}
		    }
		    ++n_out_frame;
		}
	    }
    }

    if (fseek(fp, start, SEEK_SET) < 0) {
	E_ERROR_SYSTEM("Unable to seek to begin of dmp");

	return S3_ERROR;
    }

    E_INFO("Wrote %u frames to %s\n", n_out_frame, fn);

    if (s3write((void *)&n_out_frame, sizeof(uint32), 1, fp, &ignore) != 1) {
	E_ERROR_SYSTEM("Unable to write to dmp file");
	
	return S3_ERROR;
    }

    return S3_SUCCESS;
}
Ejemplo n.º 16
0
Archivo: main.c Proyecto: 10v/cmusphinx
static void decode_utt (void *data, char *uttfile, int32 sf, int32 ef, char *uttid)
{
    kb_t *kb;
    acoustic_t *am;
    int32 featwin, nfr, min_utt_frames, n_vithist;
    char cepfile[4096], latfile[4096];
    vithist_t *finalhist;
    int32 i, f;
    glist_t hyplist;
    FILE *latfp;

    printf ("\n");
    fflush (stdout);
    E_INFO("Utterance %s\n", uttid);
    
    kb = (kb_t *)data;
    am = kb->am;
    featwin = feat_window_size(am->fcb);
    
    /* Build complete cepfile name and read cepstrum data; check for min length */
    ctl_infile (cepfile, cmd_ln_str("-cepdir"), cmd_ln_str("-cepext"), uttfile);

    if ((nfr = s2mfc_read (cepfile, sf, ef, featwin, am->mfc, S3_MAX_FRAMES)) < 0) {
	E_ERROR("%s: MFC read failed\n", uttid);
	return;
    }
    E_INFO("%s: %d frames\n", uttid, nfr-(featwin<<1));
    
    ptmr_reset (kb->tm);
    ptmr_reset (kb->tm_search);
    ptmr_start (kb->tm);
    
    min_utt_frames = (featwin<<1) + 1;
    if (nfr < min_utt_frames) {
	E_ERROR("%s: Utterance shorter than %d frames; ignored\n",
		uttid, min_utt_frames, nfr);
	return;
    }
    
    /* CMN/AGC */
    if (strcmp (cmd_ln_str("-cmn"), "current") == 0)
	cmn (am->mfc, nfr, feat_cepsize(am->fcb));
    if (strcmp (cmd_ln_str("-agc"), "max") == 0)
	agc_max (am->mfc, nfr);
    
    /* Process utterance */
    lextree_vit_start (kb, uttid);
    for (i = featwin, f = 0; i < nfr-featwin; i++, f++) {
	am->senscale[f] = acoustic_eval (am, i);
	
	ptmr_start (kb->tm_search);
	
	lextree_vit_frame (kb, f, uttid);
	printf (" %d,%d,%d", f, glist_count (kb->vithist[f]), glist_count (kb->lextree_active));
	fflush (stdout);
	
	ptmr_stop (kb->tm_search);
    }
    printf ("\n");
    finalhist = lextree_vit_end (kb, f, uttid);
    
    hyplist = vithist_backtrace (finalhist, kb->am->senscale);
    hyp_log (stdout, hyplist, _dict_wordstr, (void *)kb->dict);
    hyp_myfree (hyplist);
    printf ("\n");
    
    /* Log the entire Viterbi word lattice */
    sprintf (latfile, "%s.lat", uttid);
    if ((latfp = fopen(latfile, "w")) == NULL) {
	E_ERROR("fopen(%s,w) failed; using stdout\n", latfile);
	latfp = stdout;
    }
    n_vithist = vithist_log (latfp, kb->vithist, f, _dict_wordstr, (void *)kb->dict);
    if (latfp != stdout)
	fclose (latfp);
    else {
	printf ("\n");
	fflush (stdout);
    }
    
    ptmr_stop (kb->tm);
    if (f > 0) {
	printf("TMR(%s): %5d frames; %.1fs CPU, %.2f xRT; %.1fs CPU(search), %.2f xRT; %.1fs Elapsed, %.2f xRT\n",
	       uttid, f,
	       kb->tm->t_cpu, kb->tm->t_cpu * 100.0 / f,
	       kb->tm_search->t_cpu, kb->tm_search->t_cpu * 100.0 / f,
	       kb->tm->t_elapsed, kb->tm->t_elapsed * 100.0 / f);
	printf("CTR(%s): %5d frames; %d Sen (%.1f/fr); %d HMM (%.1f/fr); %d Words (%.1f/fr)\n",
	       uttid, f,
	       kb->n_sen_eval, ((float64)kb->n_sen_eval) / f,
	       kb->n_hmm_eval, ((float64)kb->n_hmm_eval) / f,
	       n_vithist, ((float64) n_vithist) / f);
    }
    
    /* Cleanup */
    glist_free (kb->lextree_active);
    kb->lextree_active = NULL;
    for (; f >= -1; --f) {	/* I.e., including dummy START_WORD node at frame -1 */
	glist_myfree (kb->vithist[f], sizeof(vithist_t));
	kb->vithist[f] = NULL;
    }
    
    lm_cache_reset (kb->lm);
}