Пример #1
0
static void
feat_s3_cep_dcep(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
{
    mfcc_t *f;
    mfcc_t *w, *_w;
    int32 i;

    assert(fcb);
    assert(feat_n_stream(fcb) == 1);
    assert(feat_stream_len(fcb, 0) == feat_cepsize(fcb) * 2);
    assert(feat_window_size(fcb) == 2);

    /* CEP */
    memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t));

    /*
     * DCEP: mfc[2] - mfc[-2];
     */
    f = feat[0] + feat_cepsize(fcb);
    w = mfc[2];
    _w = mfc[-2];

    for (i = 0; i < feat_cepsize(fcb); i++)
        f[i] = w[i] - _w[i];
}
Пример #2
0
static int32
feat_s2mfc2feat_block_utt(feat_t * fcb, mfcc_t ** uttcep,
			  int32 nfr, mfcc_t *** ofeat)
{
    mfcc_t **cepbuf;
    int32 i, win, cepsize;

    win = feat_window_size(fcb);
    cepsize = feat_cepsize(fcb);

    /* Copy and pad out the utterance (this requires that the
     * feature computation functions always access the buffer via
     * the frame pointers, which they do)  */
    cepbuf = (mfcc_t**) ckd_calloc(nfr + win * 2, sizeof(mfcc_t *));
    memcpy(cepbuf + win, uttcep, nfr * sizeof(mfcc_t *));

    /* Do normalization before we interpolate on the boundary */    
    feat_cmn(fcb, cepbuf + win, nfr, 1, 1);
    feat_agc(fcb, cepbuf + win, nfr, 1, 1);

    /* Now interpolate */    
    for (i = 0; i < win; ++i) {
        cepbuf[i] = fcb->cepbuf[i];
        memcpy(cepbuf[i], uttcep[0], cepsize * sizeof(mfcc_t));
        cepbuf[nfr + win + i] = fcb->cepbuf[win + i];
        memcpy(cepbuf[nfr + win + i], uttcep[nfr - 1], cepsize * sizeof(mfcc_t));
    }
    /* Compute as usual. */
    feat_compute_utt(fcb, cepbuf, nfr + win * 2, win, ofeat);
    ckd_free(cepbuf);
    return nfr;
}
Пример #3
0
static int
acmod_log_mfc(acmod_t *acmod,
              mfcc_t **cep, int n_frames)
{
    int i, n;
    int32 *ptr = (int32 *)cep[0];

    n = n_frames * feat_cepsize(acmod->fcb);
    /* Swap bytes. */
    if (!WORDS_BIGENDIAN) {
        for (i = 0; i < (n * sizeof(mfcc_t)); ++i) {
            SWAP_INT32(ptr + i);
        }
    }
    /* Write features. */
    if (fwrite(cep[0], sizeof(mfcc_t), n, acmod->mfcfh) != n) {
        E_ERROR_SYSTEM("Failed to write %d values to log file", n);
    }

    /* Swap them back. */
    if (!WORDS_BIGENDIAN) {
        for (i = 0; i < (n * sizeof(mfcc_t)); ++i) {
            SWAP_INT32(ptr + i);
        }
    }
    return 0;
}
Пример #4
0
static void
feat_1s_c_d_ld_dd_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
{
    mfcc_t *f;
    mfcc_t *w, *_w;
    mfcc_t *w1, *w_1, *_w1, *_w_1;
    mfcc_t d1, d2;
    int32 i;

    assert(fcb);
    assert(feat_n_stream(fcb) == 1);
    assert(feat_stream_len(fcb, 0) == feat_cepsize(fcb) * 4);
    assert(feat_window_size(fcb) == FEAT_DCEP_WIN * 2);

    /* CEP */
    memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t));

    /*
     * DCEP: mfc[w] - mfc[-w], where w = FEAT_DCEP_WIN;
     */
    f = feat[0] + feat_cepsize(fcb);
    w = mfc[FEAT_DCEP_WIN];
    _w = mfc[-FEAT_DCEP_WIN];

    for (i = 0; i < feat_cepsize(fcb); i++)
        f[i] = w[i] - _w[i];

    /*
     * LDCEP: mfc[w] - mfc[-w], where w = FEAT_DCEP_WIN * 2;
     */
    f += feat_cepsize(fcb);
    w = mfc[FEAT_DCEP_WIN * 2];
    _w = mfc[-FEAT_DCEP_WIN * 2];

    for (i = 0; i < feat_cepsize(fcb); i++)
        f[i] = w[i] - _w[i];

    /* 
     * D2CEP: (mfc[w+1] - mfc[-w+1]) - (mfc[w-1] - mfc[-w-1]), 
     * where w = FEAT_DCEP_WIN 
     */
    f += feat_cepsize(fcb);

    w1 = mfc[FEAT_DCEP_WIN + 1];
    _w1 = mfc[-FEAT_DCEP_WIN + 1];
    w_1 = mfc[FEAT_DCEP_WIN - 1];
    _w_1 = mfc[-FEAT_DCEP_WIN - 1];

    for (i = 0; i < feat_cepsize(fcb); i++) {
        d1 = w1[i] - _w1[i];
        d2 = w_1[i] - _w_1[i];

        f[i] = d1 - d2;
    }
}
Пример #5
0
static void
feat_s3_cep(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
{
    assert(fcb);
    assert(feat_n_stream(fcb) == 1);
    assert(feat_window_size(fcb) == 0);

    /* CEP */
    memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t));
}
Пример #6
0
int
acmod_feat_mismatch(acmod_t *acmod, feat_t *fcb)
{
    /* Feature type needs to be the same. */
    if (0 != strcmp(cmd_ln_str_r(acmod->config, "-feat"), feat_name(fcb)))
        return TRUE;
    /* Input vector dimension needs to be the same. */
    if (cmd_ln_int32_r(acmod->config, "-ceplen") != feat_cepsize(fcb))
        return TRUE;
    /* FIXME: Need to check LDA and stuff too. */
    return FALSE;
}
Пример #7
0
static void
feat_s3_1x39_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
{
    mfcc_t *f;
    mfcc_t *w, *_w;
    mfcc_t *w1, *w_1, *_w1, *_w_1;
    mfcc_t d1, d2;
    int32 i;

    assert(fcb);
    assert(feat_cepsize(fcb) == 13);
    assert(feat_n_stream(fcb) == 1);
    assert(feat_stream_len(fcb, 0) == 39);
    assert(feat_window_size(fcb) == 3);

    /* CEP; skip C0 */
    memcpy(feat[0], mfc[0] + 1, (feat_cepsize(fcb) - 1) * sizeof(mfcc_t));
    /*
     * DCEP: mfc[2] - mfc[-2];
     */
    f = feat[0] + feat_cepsize(fcb) - 1;
    w = mfc[2] + 1;             /* +1 to skip C0 */
    _w = mfc[-2] + 1;

    for (i = 0; i < feat_cepsize(fcb) - 1; i++)
        f[i] = w[i] - _w[i];

    /* POW: C0, DC0, D2C0 */
    f += feat_cepsize(fcb) - 1;

    f[0] = mfc[0][0];
    f[1] = mfc[2][0] - mfc[-2][0];

    d1 = mfc[3][0] - mfc[-1][0];
    d2 = mfc[1][0] - mfc[-3][0];
    f[2] = d1 - d2;

    /* D2CEP: (mfc[3] - mfc[-1]) - (mfc[1] - mfc[-3]) */
    f += 3;

    w1 = mfc[3] + 1;            /* Final +1 to skip C0 */
    _w1 = mfc[-1] + 1;
    w_1 = mfc[1] + 1;
    _w_1 = mfc[-3] + 1;

    for (i = 0; i < feat_cepsize(fcb) - 1; i++) {
        d1 = w1[i] - _w1[i];
        d2 = w_1[i] - _w_1[i];

        f[i] = d1 - d2;
    }
}
Пример #8
0
feat_t *
feat_init(char const *type, cmn_type_t cmn, int32 varnorm,
          agc_type_t agc, int32 breport, int32 cepsize)
{
    feat_t *fcb;

    if (cepsize == 0)
        cepsize = 13;
    if (breport)
        E_INFO
            ("Initializing feature stream to type: '%s', ceplen=%d, CMN='%s', VARNORM='%s', AGC='%s'\n",
             type, cepsize, cmn_type_str[cmn], varnorm ? "yes" : "no", agc_type_str[agc]);

    fcb = (feat_t *) ckd_calloc(1, sizeof(feat_t));
    fcb->refcount = 1;
    fcb->name = (char *) ckd_salloc(type);
    if (strcmp(type, "s2_4x") == 0) {
        /* Sphinx-II format 4-stream feature (Hack!! hardwired constants below) */
        if (cepsize != 13) {
            E_ERROR("s2_4x features require cepsize == 13\n");
            ckd_free(fcb);
            return 0;
        }
        fcb->cepsize = 13;
        fcb->n_stream = 4;
        fcb->stream_len = (uint32 *) ckd_calloc(4, sizeof(uint32));
        fcb->stream_len[0] = 12;
        fcb->stream_len[1] = 24;
        fcb->stream_len[2] = 3;
        fcb->stream_len[3] = 12;
        fcb->out_dim = 51;
        fcb->window_size = 4;
        fcb->compute_feat = feat_s2_4x_cep2feat;
    }
    else if ((strcmp(type, "s3_1x39") == 0) || (strcmp(type, "1s_12c_12d_3p_12dd") == 0)) {
        /* 1-stream cep/dcep/pow/ddcep (Hack!! hardwired constants below) */
        if (cepsize != 13) {
            E_ERROR("s2_4x features require cepsize == 13\n");
            ckd_free(fcb);
            return 0;
        }
        fcb->cepsize = 13;
        fcb->n_stream = 1;
        fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32));
        fcb->stream_len[0] = 39;
        fcb->out_dim = 39;
        fcb->window_size = 3;
        fcb->compute_feat = feat_s3_1x39_cep2feat;
    }
    else if (strncmp(type, "1s_c_d_dd", 9) == 0) {
        fcb->cepsize = cepsize;
        fcb->n_stream = 1;
        fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32));
        fcb->stream_len[0] = cepsize * 3;
        fcb->out_dim = cepsize * 3;
        fcb->window_size = FEAT_DCEP_WIN + 1; /* ddcep needs the extra 1 */
        fcb->compute_feat = feat_1s_c_d_dd_cep2feat;
    }
    else if (strncmp(type, "1s_c_d_ld_dd", 12) == 0) {
        fcb->cepsize = cepsize;
        fcb->n_stream = 1;
        fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32));
        fcb->stream_len[0] = cepsize * 4;
        fcb->out_dim = cepsize * 4;
        fcb->window_size = FEAT_DCEP_WIN * 2;
        fcb->compute_feat = feat_1s_c_d_ld_dd_cep2feat;
    }
    else if (strncmp(type, "cep_dcep", 8) == 0 || strncmp(type, "1s_c_d", 6) == 0) {
        /* 1-stream cep/dcep */
        fcb->cepsize = cepsize;
        fcb->n_stream = 1;
        fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32));
        fcb->stream_len[0] = feat_cepsize(fcb) * 2;
        fcb->out_dim = fcb->stream_len[0];
        fcb->window_size = 2;
        fcb->compute_feat = feat_s3_cep_dcep;
    }
    else if (strncmp(type, "cep", 3) == 0 || strncmp(type, "1s_c", 4) == 0) {
        /* 1-stream cep */
        fcb->cepsize = cepsize;
        fcb->n_stream = 1;
        fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32));
        fcb->stream_len[0] = feat_cepsize(fcb);
        fcb->out_dim = fcb->stream_len[0];
        fcb->window_size = 0;
        fcb->compute_feat = feat_s3_cep;
    }
    else if (strncmp(type, "1s_3c", 5) == 0 || strncmp(type, "1s_4c", 5) == 0) {
	/* 1-stream cep with frames concatenated, so called cepwin features */
        if (strncmp(type, "1s_3c", 5) == 0)
            fcb->window_size = 3;
        else
    	    fcb->window_size = 4;

        fcb->cepsize = cepsize;
        fcb->n_stream = 1;
        fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32));
        fcb->stream_len[0] = feat_cepsize(fcb) * (2 * fcb->window_size + 1);
        fcb->out_dim = fcb->stream_len[0];
        fcb->compute_feat = feat_copy;
    }
    else {
        int32 i, l, k;
        char *strp;
        char *mtype = ckd_salloc(type);
        char *wd = ckd_salloc(type);
        /*
         * Generic definition: Format should be %d,%d,%d,...,%d (i.e.,
         * comma separated list of feature stream widths; #items =
         * #streams).  An optional window size (frames will be
         * concatenated) is also allowed, which can be specified with
         * a colon after the list of feature streams.
         */
        l = strlen(mtype);
        k = 0;
        for (i = 1; i < l - 1; i++) {
            if (mtype[i] == ',') {
                mtype[i] = ' ';
                k++;
            }
            else if (mtype[i] == ':') {
                mtype[i] = '\0';
                fcb->window_size = atoi(mtype + i + 1);
                break;
            }
        }
        k++;                    /* Presumably there are (#commas+1) streams */
        fcb->n_stream = k;
        fcb->stream_len = (uint32 *) ckd_calloc(k, sizeof(uint32));

        /* Scan individual feature stream lengths */
        strp = mtype;
        i = 0;
        fcb->out_dim = 0;
        fcb->cepsize = 0;

#ifndef POCKETSPHINX_NET
        while (sscanf(strp, "%s%n", wd, &l) == 1)
#else
        while (net_sscanf_word(strp, wd, &l) == 1)
#endif		
		{
            strp += l;
            if ((i >= fcb->n_stream)
                || 
#ifndef POCKETSPHINX_NET
				(sscanf(wd, "%d", &(fcb->stream_len[i])) != 1)
#else
				UInt32::TryParse(gcnew String(wd), fcb->stream_len[i])
#endif
                || (fcb->stream_len[i] <= 0))
                E_FATAL("Bad feature type argument\n");
            /* Input size before windowing */
            fcb->cepsize += fcb->stream_len[i];
            if (fcb->window_size > 0)
                fcb->stream_len[i] *= (fcb->window_size * 2 + 1);
            /* Output size after windowing */
            fcb->out_dim += fcb->stream_len[i];
            i++;
        }

        if (i != fcb->n_stream)
            E_FATAL("Bad feature type argument\n");
        if (fcb->cepsize != cepsize)
    	    E_FATAL("Bad feature type argument\n");

        /* Input is already the feature stream */
        fcb->compute_feat = feat_copy;
        ckd_free(mtype);
        ckd_free(wd);
    }

    if (cmn != CMN_NONE)
        fcb->cmn_struct = cmn_init(feat_cepsize(fcb));
    fcb->cmn = cmn;
    fcb->varnorm = varnorm;
    if (agc != AGC_NONE) {
        fcb->agc_struct = agc_init();
        /*
         * No need to check if agc is set to EMAX; agc_emax_set() changes only emax related things
         * Moreover, if agc is not NONE and block mode is used, feat_agc() SILENTLY
         * switches to EMAX
         */
        /* HACK: hardwired initial estimates based on use of CMN (from Sphinx2) */
        agc_emax_set(fcb->agc_struct, (cmn != CMN_NONE) ? 5.0 : 10.0);
    }
    fcb->agc = agc;
    /*
     * Make sure this buffer is large enough to be used in feat_s2mfc2feat_block_utt()
     */
    fcb->cepbuf = (mfcc_t **) ckd_calloc_2d((LIVEBUFBLOCKSIZE < feat_window_size(fcb) * 2) ? feat_window_size(fcb) * 2 : LIVEBUFBLOCKSIZE,
                                            feat_cepsize(fcb),
                                            sizeof(mfcc_t));
    /* This one is actually just an array of pointers to "flatten out"
     * wraparounds. */
    fcb->tmpcepbuf = (mfcc_t**)ckd_calloc(2 * feat_window_size(fcb) + 1,
                                sizeof(*fcb->tmpcepbuf));

    return fcb;
}
Пример #9
0
static void
feat_s2_4x_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
{
    mfcc_t *f;
    mfcc_t *w, *_w;
    mfcc_t *w1, *w_1, *_w1, *_w_1;
    mfcc_t d1, d2;
    int32 i, j;

    assert(fcb);
    assert(feat_cepsize(fcb) == 13);
    assert(feat_n_stream(fcb) == 4);
    assert(feat_stream_len(fcb, 0) == 12);
    assert(feat_stream_len(fcb, 1) == 24);
    assert(feat_stream_len(fcb, 2) == 3);
    assert(feat_stream_len(fcb, 3) == 12);
    assert(feat_window_size(fcb) == 4);

    /* CEP; skip C0 */
    memcpy(feat[0], mfc[0] + 1, (feat_cepsize(fcb) - 1) * sizeof(mfcc_t));

    /*
     * DCEP(SHORT): mfc[2] - mfc[-2]
     * DCEP(LONG):  mfc[4] - mfc[-4]
     */
    w = mfc[2] + 1;             /* +1 to skip C0 */
    _w = mfc[-2] + 1;

    f = feat[1];
    for (i = 0; i < feat_cepsize(fcb) - 1; i++) /* Short-term */
        f[i] = w[i] - _w[i];

    w = mfc[4] + 1;             /* +1 to skip C0 */
    _w = mfc[-4] + 1;

    for (j = 0; j < feat_cepsize(fcb) - 1; i++, j++)    /* Long-term */
        f[i] = w[j] - _w[j];

    /* D2CEP: (mfc[3] - mfc[-1]) - (mfc[1] - mfc[-3]) */
    w1 = mfc[3] + 1;            /* Final +1 to skip C0 */
    _w1 = mfc[-1] + 1;
    w_1 = mfc[1] + 1;
    _w_1 = mfc[-3] + 1;

    f = feat[3];
    for (i = 0; i < feat_cepsize(fcb) - 1; i++) {
        d1 = w1[i] - _w1[i];
        d2 = w_1[i] - _w_1[i];

        f[i] = d1 - d2;
    }

    /* POW: C0, DC0, D2C0; differences computed as above for rest of cep */
    f = feat[2];
    f[0] = mfc[0][0];
    f[1] = mfc[2][0] - mfc[-2][0];

    d1 = mfc[3][0] - mfc[-1][0];
    d2 = mfc[1][0] - mfc[-3][0];
    f[2] = d1 - d2;
}
Пример #10
0
int32
feat_s2mfc2feat_live(feat_t * fcb, mfcc_t ** uttcep, int32 *inout_ncep,
		     int32 beginutt, int32 endutt, mfcc_t *** ofeat)
{
    int32 win, cepsize, nbufcep;
    int32 i, j, nfeatvec;
    int32 zero = 0;

    /* Avoid having to check this everywhere. */
    if (inout_ncep == 0) inout_ncep = &zero;

    /* Special case for entire utterances. */
    if (beginutt && endutt && *inout_ncep > 0)
        return feat_s2mfc2feat_block_utt(fcb, uttcep, *inout_ncep, ofeat);

    win = feat_window_size(fcb);
    cepsize = feat_cepsize(fcb);

    /* Empty the input buffer on start of utterance. */
    if (beginutt)
        fcb->bufpos = fcb->curpos;

    /* Calculate how much data is in the buffer already. */
    nbufcep = fcb->bufpos - fcb->curpos;
    if (nbufcep < 0)
	nbufcep = fcb->bufpos + LIVEBUFBLOCKSIZE - fcb->curpos;
    /* Add any data that we have to replicate. */
    if (beginutt && *inout_ncep > 0)
        nbufcep += win;
    if (endutt)
        nbufcep += win;

    /* Only consume as much input as will fit in the buffer. */
    if (nbufcep + *inout_ncep > LIVEBUFBLOCKSIZE) {
        /* We also can't overwrite the trailing window, hence the
         * reason why win is subtracted here. */
        *inout_ncep = LIVEBUFBLOCKSIZE - nbufcep - win;
        /* Cancel end of utterance processing. */
        endutt = FALSE;
    }

    /* FIXME: Don't modify the input! */
    feat_cmn(fcb, uttcep, *inout_ncep, beginutt, endutt);
    feat_agc(fcb, uttcep, *inout_ncep, beginutt, endutt);

    /* Replicate first frame into the first win frames if we're at the
     * beginning of the utterance and there was some actual input to
     * deal with.  (FIXME: Not entirely sure why that condition) */
    if (beginutt && *inout_ncep > 0) {
        for (i = 0; i < win; i++) {
            memcpy(fcb->cepbuf[fcb->bufpos++], uttcep[0],
                   cepsize * sizeof(mfcc_t));
            fcb->bufpos %= LIVEBUFBLOCKSIZE;
        }
        /* Move the current pointer past this data. */
        fcb->curpos = fcb->bufpos;
        nbufcep -= win;
    }

    /* Copy in frame data to the circular buffer. */
    for (i = 0; i < *inout_ncep; ++i) {
        memcpy(fcb->cepbuf[fcb->bufpos++], uttcep[i],
               cepsize * sizeof(mfcc_t));
        fcb->bufpos %= LIVEBUFBLOCKSIZE;
	++nbufcep;
    }

    /* Replicate last frame into the last win frames if we're at the
     * end of the utterance (even if there was no input, so we can
     * flush the output). */
    if (endutt) {
        int32 tpos; /* Index of last input frame. */
        if (fcb->bufpos == 0)
            tpos = LIVEBUFBLOCKSIZE - 1;
        else
            tpos = fcb->bufpos - 1;
        for (i = 0; i < win; ++i) {
            memcpy(fcb->cepbuf[fcb->bufpos++], fcb->cepbuf[tpos],
                   cepsize * sizeof(mfcc_t));
            fcb->bufpos %= LIVEBUFBLOCKSIZE;
        }
    }

    /* We have to leave the trailing window of frames. */
    nfeatvec = nbufcep - win;
    if (nfeatvec <= 0)
        return 0; /* Do nothing. */

    for (i = 0; i < nfeatvec; ++i) {
        /* Handle wraparound cases. */
        if (fcb->curpos - win < 0 || fcb->curpos + win >= LIVEBUFBLOCKSIZE) {
            /* Use tmpcepbuf for this case.  Actually, we just need the pointers. */
            for (j = -win; j <= win; ++j) {
                int32 tmppos =
                    (fcb->curpos + j + LIVEBUFBLOCKSIZE) % LIVEBUFBLOCKSIZE;
		fcb->tmpcepbuf[win + j] = fcb->cepbuf[tmppos];
            }
            fcb->compute_feat(fcb, fcb->tmpcepbuf + win, ofeat[i]);
        }
        else {
            fcb->compute_feat(fcb, fcb->cepbuf + fcb->curpos, ofeat[i]);
        }
	/* Move the read pointer forward. */
        ++fcb->curpos;
        fcb->curpos %= LIVEBUFBLOCKSIZE;
    }

    if (fcb->lda)
        feat_lda_transform(fcb, ofeat, nfeatvec);

    if (fcb->subvecs)
        feat_subvec_project(fcb, ofeat, nfeatvec);

    return nfeatvec;
}
main (int32 argc, char *argv[])
{
    char *str;

#if 0
    ckd_debug(100000);
#endif
    
    E_INFO("%s COMPILED ON: %s, AT: %s\n\n", argv[0], __DATE__, __TIME__);
    
    /* Digest command line argument definitions */
    cmd_ln_define (defn);

    if ((argc == 2) && (strcmp (argv[1], "help") == 0)) {
	cmd_ln_print_definitions();
	exit(1); 
    }

    /* Look for default or specified arguments file */
    str = NULL;
    if ((argc == 2) && (argv[1][0] != '-'))
	str = argv[1];
    else if (argc == 1) {
	str = "s3align.arg";
	E_INFO("Looking for default argument file: %s\n", str);
    }
    if (str) {
	/* Build command line argument list from file */
	if ((argc = load_argfile (str, argv[0], &argv)) < 0) {
	    fprintf (stderr, "Usage:\n");
	    fprintf (stderr, "\t%s argument-list, or\n", argv[0]);
	    fprintf (stderr, "\t%s [argument-file] (default file: s3align.arg)\n\n",
		     argv[0]);
	    cmd_ln_print_definitions();
	    exit(1);
	}
    }
    
    cmdline_parse (argc, argv);
    
    if ((cmd_ln_access("-mdeffn") == NULL) ||
	(cmd_ln_access("-meanfn") == NULL) ||
	(cmd_ln_access("-varfn") == NULL)  ||
	(cmd_ln_access("-mixwfn") == NULL)  ||
	(cmd_ln_access("-tmatfn") == NULL) ||
	(cmd_ln_access("-dictfn") == NULL))
	E_FATAL("Missing -mdeffn, -meanfn, -varfn, -mixwfn, -tmatfn, or -dictfn argument\n");
    
    if ((cmd_ln_access("-ctlfn") == NULL) || (cmd_ln_access("-insentfn") == NULL))
	E_FATAL("Missing -ctlfn or -insentfn argument\n");

    if ((cmd_ln_access ("-s2stsegdir") == NULL) &&
	(cmd_ln_access ("-stsegdir") == NULL) &&
	(cmd_ln_access ("-phsegdir") == NULL) &&
	(cmd_ln_access ("-wdsegdir") == NULL) &&
	(cmd_ln_access ("-outsentfn") == NULL))
	E_FATAL("Missing output file/directory argument(s)\n");
    
    tm_utt = timing_new ();
    
    /*
     * Initialize log(S3-base).  All scores (probs...) computed in log domain to avoid
     * underflow.  At the same time, log base = 1.0001 (1+epsilon) to allow log values
     * to be maintained in int32 variables without significant loss of precision.
     */
    if (cmd_ln_access("-logbase") == NULL)
	logs3_init (1.0001);
    else {
	float32 logbase;
    
	logbase = *((float32 *) cmd_ln_access("-logbase"));
	if (logbase <= 1.0)
	    E_FATAL("Illegal log-base: %e; must be > 1.0\n", logbase);
	if (logbase > 1.1)
	    E_WARN("Logbase %e perhaps too large??\n", logbase);
	logs3_init ((float64) logbase);
    }

    /* Initialize feature stream type */
    feat_init ((char *) cmd_ln_access ("-feat"));
/* BHIKSHA: PASS CEPSIZE TO FEAT_CEPSIZE, 6 Jan 98 */
    cepsize = *((int32 *) cmd_ln_access("-ceplen"));
    cepsize = feat_cepsize (cepsize);
/* END CHANGES BY BHIKSHA */
    
    /* Read in input databases */
    models_init ();
    
    senscale = (int32 *) ckd_calloc (S3_MAX_FRAMES, sizeof(int32));
    
    tmr_utt = cyctimer_new ("U");
    tmr_gauden = cyctimer_new ("G");
    tmr_senone = cyctimer_new ("S");
    tmr_align = cyctimer_new ("A");

    /* Initialize align module */
    align_init ();
    printf ("\n");
    
    tot_nfr = 0;
    
    process_ctlfile ();

    if (tot_nfr > 0) {
	printf ("\n");
	printf("TOTAL FRAMES:       %8d\n", tot_nfr);
	printf("TOTAL CPU TIME:     %11.2f sec, %7.2f xRT\n",
	       tm_utt->t_tot_cpu, tm_utt->t_tot_cpu/(tot_nfr*0.01));
	printf("TOTAL ELAPSED TIME: %11.2f sec, %7.2f xRT\n",
	       tm_utt->t_tot_elapsed, tm_utt->t_tot_elapsed/(tot_nfr*0.01));
    }

#if (! WIN32)
    system ("ps aguxwww | grep s3align");
#endif

    /* Hack!! To avoid hanging problem under Linux */
    if (logfp) {
	fclose (logfp);
	*stdout = orig_stdout;
	*stderr = orig_stderr;
    }
    
    exit(0);
}
Пример #12
0
acoustic_t *acoustic_init (feat_t *f, gauden_t *g, senone_t *s,
                           float64 beam,
                           int32 maxfr)
{
    acoustic_t *am;
    int32 i;

    if (senone_n_mgau(s) != gauden_n_mgau(g)) {
        E_ERROR("#Parent mixture Gaussians mismatch: senone(%d), gauden(%d)\n",
                senone_n_mgau(s), gauden_n_mgau(g));
    }

    if (feat_n_stream(f) != senone_n_stream(s)) {
        E_ERROR("#Feature-streams mismatch: feat(%d), senone(%d)\n",
                feat_n_stream(f), senone_n_stream(s));
    }

    if (feat_n_stream(f) != gauden_n_stream(g)) {
        E_ERROR("#Feature-streams mismatch: feat(%d), gauden(%d)\n",
                feat_n_stream(f), gauden_n_stream(g));
        return NULL;
    }

    for (i = 0; i < feat_n_stream(f); i++) {
        if (feat_stream_len(f, i) != gauden_stream_len(g, i)) {
            E_ERROR("Feature stream(%d) length mismatch: feat(%d), gauden(%d)\n",
                    feat_stream_len(f, i), gauden_stream_len(g, i));
            return NULL;
        }
    }

    if (beam > 1.0) {
        E_ERROR("mgaubeam > 1.0 (%e)\n", beam);
        return NULL;
    }

    am = (acoustic_t *) ckd_calloc (1, sizeof(acoustic_t));

    am->fcb = f;
    am->gau = g;
    am->sen = s;

    am->mgaubeam = (beam == 0.0) ? LOGPROB_ZERO : logs3(beam);
    if (am->mgaubeam > 0)
        am->mgaubeam = 0;
    am->tot_mgau_eval = 0;
    am->tot_dist_valid = 0.0;

    am->dist_valid = (am->mgaubeam <= LOGPROB_ZERO) ? NULL :
                     (int32 *) ckd_calloc (g->max_n_mean, sizeof(int32));

    if (f->compute_feat) {
        /* Input is MFC cepstra; feature vectors computed from that */
        am->mfc = (float32 **) ckd_calloc_2d (maxfr, feat_cepsize(am->fcb),
                                              sizeof(float32));
        am->feat = feat_array_alloc (f, 1);
    } else {
        /* Input is directly feature vectors */
        am->mfc = NULL;
        am->feat = feat_array_alloc (f, maxfr);
    }

    am->dist = (int32 *) ckd_calloc (g->max_n_mean, sizeof(int32));
    am->gauden_active = bitvec_alloc (g->n_mgau);

    am->senscr = (int32 *) ckd_calloc (s->n_sen, sizeof(int32));
    am->senscale = (int32 *) ckd_calloc (maxfr, sizeof(int32));
    am->sen_active = bitvec_alloc (s->n_sen);

    return am;
}
Пример #13
0
static void decode_utt (void *data, char *uttfile, int32 sf, int32 ef, char *uttid)
{
    kb_t *kb;
    acoustic_t *am;
    int32 featwin, nfr, min_utt_frames, n_vithist;
    char cepfile[4096], latfile[4096];
    vithist_t *finalhist;
    int32 i, f;
    glist_t hyplist;
    FILE *latfp;

    printf ("\n");
    fflush (stdout);
    E_INFO("Utterance %s\n", uttid);
    
    kb = (kb_t *)data;
    am = kb->am;
    featwin = feat_window_size(am->fcb);
    
    /* Build complete cepfile name and read cepstrum data; check for min length */
    ctl_infile (cepfile, cmd_ln_str("-cepdir"), cmd_ln_str("-cepext"), uttfile);

    if ((nfr = s2mfc_read (cepfile, sf, ef, featwin, am->mfc, S3_MAX_FRAMES)) < 0) {
	E_ERROR("%s: MFC read failed\n", uttid);
	return;
    }
    E_INFO("%s: %d frames\n", uttid, nfr-(featwin<<1));
    
    ptmr_reset (kb->tm);
    ptmr_reset (kb->tm_search);
    ptmr_start (kb->tm);
    
    min_utt_frames = (featwin<<1) + 1;
    if (nfr < min_utt_frames) {
	E_ERROR("%s: Utterance shorter than %d frames; ignored\n",
		uttid, min_utt_frames, nfr);
	return;
    }
    
    /* CMN/AGC */
    if (strcmp (cmd_ln_str("-cmn"), "current") == 0)
	cmn (am->mfc, nfr, feat_cepsize(am->fcb));
    if (strcmp (cmd_ln_str("-agc"), "max") == 0)
	agc_max (am->mfc, nfr);
    
    /* Process utterance */
    lextree_vit_start (kb, uttid);
    for (i = featwin, f = 0; i < nfr-featwin; i++, f++) {
	am->senscale[f] = acoustic_eval (am, i);
	
	ptmr_start (kb->tm_search);
	
	lextree_vit_frame (kb, f, uttid);
	printf (" %d,%d,%d", f, glist_count (kb->vithist[f]), glist_count (kb->lextree_active));
	fflush (stdout);
	
	ptmr_stop (kb->tm_search);
    }
    printf ("\n");
    finalhist = lextree_vit_end (kb, f, uttid);
    
    hyplist = vithist_backtrace (finalhist, kb->am->senscale);
    hyp_log (stdout, hyplist, _dict_wordstr, (void *)kb->dict);
    hyp_myfree (hyplist);
    printf ("\n");
    
    /* Log the entire Viterbi word lattice */
    sprintf (latfile, "%s.lat", uttid);
    if ((latfp = fopen(latfile, "w")) == NULL) {
	E_ERROR("fopen(%s,w) failed; using stdout\n", latfile);
	latfp = stdout;
    }
    n_vithist = vithist_log (latfp, kb->vithist, f, _dict_wordstr, (void *)kb->dict);
    if (latfp != stdout)
	fclose (latfp);
    else {
	printf ("\n");
	fflush (stdout);
    }
    
    ptmr_stop (kb->tm);
    if (f > 0) {
	printf("TMR(%s): %5d frames; %.1fs CPU, %.2f xRT; %.1fs CPU(search), %.2f xRT; %.1fs Elapsed, %.2f xRT\n",
	       uttid, f,
	       kb->tm->t_cpu, kb->tm->t_cpu * 100.0 / f,
	       kb->tm_search->t_cpu, kb->tm_search->t_cpu * 100.0 / f,
	       kb->tm->t_elapsed, kb->tm->t_elapsed * 100.0 / f);
	printf("CTR(%s): %5d frames; %d Sen (%.1f/fr); %d HMM (%.1f/fr); %d Words (%.1f/fr)\n",
	       uttid, f,
	       kb->n_sen_eval, ((float64)kb->n_sen_eval) / f,
	       kb->n_hmm_eval, ((float64)kb->n_hmm_eval) / f,
	       n_vithist, ((float64) n_vithist) / f);
    }
    
    /* Cleanup */
    glist_free (kb->lextree_active);
    kb->lextree_active = NULL;
    for (; f >= -1; --f) {	/* I.e., including dummy START_WORD node at frame -1 */
	glist_myfree (kb->vithist[f], sizeof(vithist_t));
	kb->vithist[f] = NULL;
    }
    
    lm_cache_reset (kb->lm);
}