Beispiel #1
0
llna_model* em_initial_model(int k, corpus* corpus, char* start)
{
    llna_model* model;
    printf("starting from %s\n", start);
    if (strcmp(start, "rand")==0)
        model = random_init(k, corpus->nterms);
    else if (strcmp(start, "seed")==0)
        model = corpus_init(k, corpus);
    else
        model = read_llna_model(start);
    return(model);
}
static int
initialize(int argc,
	   char *argv[])
{
    const char *fdictfn;
    const char *dictfn;
    const char *ts2cbfn;
    uint32 n_ts;
    uint32 n_cb;

    /* define, parse and (partially) validate the command line */
    parse_cmd_ln(argc, argv);

    feat = 
        feat_init(cmd_ln_str("-feat"),
                  cmn_type_from_str(cmd_ln_str("-cmn")),
                  cmd_ln_boolean("-varnorm"),
                  agc_type_from_str(cmd_ln_str("-agc")),
                  1, cmd_ln_int32("-ceplen"));


    if (cmd_ln_str("-lda")) {
        E_INFO("Reading linear feature transformation from %s\n",
               cmd_ln_str("-lda"));
        if (feat_read_lda(feat,
                          cmd_ln_str("-lda"),
                          cmd_ln_int32("-ldadim")) < 0)
            return -1;
    }

    if (cmd_ln_str("-svspec")) {
        int32 **subvecs;
        E_INFO("Using subvector specification %s\n", 
               cmd_ln_str("-svspec"));
        if ((subvecs = parse_subvecs(cmd_ln_str("-svspec"))) == NULL)
            return -1;
        if ((feat_set_subvecs(feat, subvecs)) < 0)
            return -1;
    }

    if (cmd_ln_exists("-agcthresh")
        && 0 != strcmp(cmd_ln_str("-agc"), "none")) {
        agc_set_threshold(feat->agc_struct,
                          cmd_ln_float32("-agcthresh"));
    }

    if (feat->cmn_struct
        && cmd_ln_exists("-cmninit")) {
        char *c, *cc, *vallist;
        int32 nvals;

        vallist = ckd_salloc(cmd_ln_str("-cmninit"));
        c = vallist;
        nvals = 0;
        while (nvals < feat->cmn_struct->veclen
               && (cc = strchr(c, ',')) != NULL) {
            *cc = '\0';
            feat->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof(c));
            c = cc + 1;
            ++nvals;
        }
        if (nvals < feat->cmn_struct->veclen && *c != '\0') {
            feat->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof(c));
        }
        ckd_free(vallist);
    }


    if (cmd_ln_str("-segdir"))
	corpus_set_seg_dir(cmd_ln_str("-segdir"));
    if (cmd_ln_str("-segext"))
	corpus_set_seg_ext(cmd_ln_str("-segext"));

    corpus_set_mfcc_dir(cmd_ln_str("-cepdir"));
    corpus_set_mfcc_ext(cmd_ln_str("-cepext"));

    if (cmd_ln_str("-lsnfn"))
	corpus_set_lsn_filename(cmd_ln_str("-lsnfn"));

    corpus_set_ctl_filename(cmd_ln_str("-ctlfn"));

    if (cmd_ln_int32("-nskip") && cmd_ln_int32("-runlen")) {
        corpus_set_interval(cmd_ln_int32("-nskip"),
			    cmd_ln_int32("-runlen"));
    } else if (cmd_ln_int32("-part") && cmd_ln_int32("-npart")) {
	corpus_set_partition(cmd_ln_int32("-part"),
			     cmd_ln_int32("-npart"));
    }
    

    if (corpus_init() != S3_SUCCESS) {
	return S3_ERROR;
    }
    
    if (cmd_ln_str("-moddeffn")) {
	E_INFO("Reading %s\n", cmd_ln_str("-moddeffn"));
    
	/* Read in the model definitions.  Defines the set of
	   CI phones and context dependent phones.  Defines the
	   transition matrix tying and state level tying. */
	if (model_def_read(&mdef,
			   cmd_ln_str("-moddeffn")) != S3_SUCCESS) {
	    return S3_ERROR;
	}
	
	ts2cbfn = cmd_ln_str("-ts2cbfn");
	if (strcmp(SEMI_LABEL, ts2cbfn) == 0) {
	    mdef->cb = semi_ts2cb(mdef->n_tied_state);
	    n_ts = mdef->n_tied_state;
	    n_cb = 1;
	}
	else if (strcmp(CONT_LABEL, ts2cbfn) == 0) {
	    mdef->cb = cont_ts2cb(mdef->n_tied_state);
	    n_ts = mdef->n_tied_state;
	    n_cb = mdef->n_tied_state;
	}
	else if (strcmp(PTM_LABEL, ts2cbfn) == 0) {
	    mdef->cb = ptm_ts2cb(mdef);
	    n_ts = mdef->n_tied_state;
	    n_cb = mdef->acmod_set->n_ci;
	}
	else if (s3ts2cb_read(ts2cbfn,
			      &mdef->cb,
			      &n_ts,
			      &n_cb) != S3_SUCCESS) {
	    return S3_ERROR;
	}

	dictfn = cmd_ln_str("-dictfn");

	if (dictfn == NULL) {
	    E_FATAL("You must specify a content dictionary using -dictfn\n");
	}

	E_INFO("Reading %s\n", dictfn);
	
	lex = lexicon_read(NULL,	/* no lexicon to start */
			   dictfn,
			   mdef->acmod_set);
	if (lex == NULL)
	    return S3_ERROR;
    
	fdictfn = cmd_ln_str("-fdictfn");

	if (fdictfn) {
	    E_INFO("Reading %s\n", fdictfn);
	    
	    (void)lexicon_read(lex,	/* add filler words content lexicon */
			       fdictfn,
			       mdef->acmod_set);
	}
    }

    return S3_SUCCESS;
}
Beispiel #3
0
static int
initialize(lexicon_t **out_lex,
	   model_def_t **out_mdef,
	   int argc,
	   char *argv[])
{
    lexicon_t *lex = NULL;
    model_def_t *mdef = NULL;
    const char *fdictfn;
    const char *dictfn;
    const char *ts2cbfn;
    uint32 n_ts;
    uint32 n_cb;

    /* define, parse and (partially) validate the command line */
    parse_cmd_ln(argc, argv);

    if (cmd_ln_access("-feat") != NULL) {
	feat_set(cmd_ln_str("-feat"));
	feat_set_in_veclen(cmd_ln_int32("-ceplen"));
	feat_set_subvecs(cmd_ln_str("-svspec"));
    }
    else {
	E_ERROR("Specify the feature extraction algorithm using -feat\n");

	return S3_ERROR;
    }
    if (cmd_ln_access("-ldafn") != NULL) {
	if (feat_read_lda(cmd_ln_access("-ldafn"), cmd_ln_int32("-ldadim"))) {
	    E_FATAL("Failed to read LDA matrix\n");
	}
    }

    if (cmd_ln_access("-segdir"))
	corpus_set_seg_dir(cmd_ln_access("-segdir"));
    if (cmd_ln_access("-segext"))
	corpus_set_seg_ext(cmd_ln_access("-segext"));

    corpus_set_mfcc_dir(cmd_ln_access("-cepdir"));
    corpus_set_mfcc_ext(cmd_ln_access("-cepext"));

    if (cmd_ln_access("-lsnfn"))
	corpus_set_lsn_filename(cmd_ln_access("-lsnfn"));

    corpus_set_ctl_filename(cmd_ln_access("-ctlfn"));
    
    if ( cmd_ln_access("-nskip") && cmd_ln_access("-runlen") ) {
	corpus_set_interval(*(int32 *)cmd_ln_access("-nskip"),
			    *(int32 *)cmd_ln_access("-runlen"));
    }
    else if (cmd_ln_access("-part") && cmd_ln_access("-npart")) {
	corpus_set_partition(*(uint32 *)cmd_ln_access("-part"),
			     *(uint32 *)cmd_ln_access("-npart"));
    }
    

    if (corpus_init() != S3_SUCCESS) {
	return S3_ERROR;
    }
    
    if (cmd_ln_access("-moddeffn")) {
	E_INFO("Reading %s\n", cmd_ln_access("-moddeffn"));
    
	/* Read in the model definitions.  Defines the set of
	   CI phones and context dependent phones.  Defines the
	   transition matrix tying and state level tying. */
	if (model_def_read(&mdef,
			   cmd_ln_access("-moddeffn")) != S3_SUCCESS) {
	    return S3_ERROR;
	}
	
	ts2cbfn = (const char *)cmd_ln_access("-ts2cbfn");
	if (strcmp(SEMI_LABEL, ts2cbfn) == 0) {
	    mdef->cb = semi_ts2cb(mdef->n_tied_state);
	    n_ts = mdef->n_tied_state;
	    n_cb = 1;
	}
	else if (strcmp(CONT_LABEL, ts2cbfn) == 0) {
	    mdef->cb = cont_ts2cb(mdef->n_tied_state);
	    n_ts = mdef->n_tied_state;
	    n_cb = mdef->n_tied_state;
	}
	else if (strcmp(PTM_LABEL, ts2cbfn) == 0) {
	    mdef->cb = ptm_ts2cb(mdef);
	    n_ts = mdef->n_tied_state;
	    n_cb = mdef->acmod_set->n_ci;
	}
	else if (s3ts2cb_read(ts2cbfn,
			      &mdef->cb,
			      &n_ts,
			      &n_cb) != S3_SUCCESS) {
	    return S3_ERROR;
	}

	dictfn = cmd_ln_access("-dictfn");

	if (dictfn == NULL) {
	    E_FATAL("You must specify a content dictionary using -dictfn\n");
	}

	E_INFO("Reading %s\n", dictfn);
	
	lex = lexicon_read(NULL,	/* no lexicon to start */
			   dictfn,
			   mdef->acmod_set);
	if (lex == NULL)
	    return S3_ERROR;
    
	fdictfn = cmd_ln_access("-fdictfn");

	if (fdictfn) {
	    E_INFO("Reading %s\n", fdictfn);
	    
	    (void)lexicon_read(lex,	/* add filler words content lexicon */
			       fdictfn,
			       mdef->acmod_set);
	}
    }

    *out_mdef = mdef;
    *out_lex = lex;

    return S3_SUCCESS;
}