Example #1
0
/*********************************************************************
 *
 * Function: corpus_set_partition
 * 
 * Description: 
 *    This function allows one to specify a set R of a partition of
 *    the corpus into S (roughly) equal sized partitions.
 * 
 * Function Inputs: 
 *    uint32 r -
 *	This argument selects the Rth OF_S sets (R runs from 1..OF_S)
 *
 *    uint32 of_s -
 *	The number of total (roughly equal sized) sets in the partition.
 * 
 * Global Inputs: 
 *    None
 * 
 * Return Values: 
 *    S3_SUCCESS - Operation completed successfully
 *    S3_ERROR   - Operation did not complete successfully
 * 
 * Global Outputs: 
 *    None
 * 
 *********************************************************************/
int
corpus_set_partition(uint32 part,
		     uint32 parts)
{
    uint32 run_len;
    uint32 n_skip;
    int lineno = 0;
    lineiter_t* li;

    if (ctl_fp == NULL) {
	E_ERROR("Control file has not been set\n");

	return S3_ERROR;
    }

    for (li = lineiter_start(ctl_fp); li; li = lineiter_next(li)) {
	lineno++;
    }

    fseek(ctl_fp, 0L, SEEK_SET);
    
    li = lineiter_start(ctl_fp);
    lineiter_free(li);
    
    run_len = lineno / parts;

    n_skip = (part - 1) * run_len;

    if (part == parts)
	run_len = UNTIL_EOF;

    return corpus_set_interval(n_skip, run_len);
}
Example #2
0
int
corpus_reset()
{
    lineiter_t* li;
    n_run = UNTIL_EOF;

    assert(ctl_fp);
    fseek(ctl_fp, 0L, SEEK_SET);

    if (transcription_fp)
        fseek(transcription_fp, 0L, SEEK_SET);

    li = lineiter_start_clean(ctl_fp);

    if (li == NULL) {
	E_ERROR("Must be at least one line in the control file\n");
	return S3_ERROR;
    }

    parse_ctl_line(li->buf,
		   &next_ctl_path,
		   &next_ctl_sf,
		   &next_ctl_ef,
		   &next_ctl_utt_id);
    lineiter_free (li);


    corpus_set_interval(sv_n_skip, sv_run_len);

    return S3_SUCCESS;
}
Example #3
0
int
corpus_ckpt_set_interval(const char *fn)
{
    FILE *fp;
    uint32 o, rl;

    fp = fopen(fn, "r");
    if (fp == NULL) {
	E_ERROR_SYSTEM("Can't open ckpt file %s", fn);

	return S3_ERROR;
    }

    if (fscanf(fp, "%u %u", &o, &rl) != 2) {
	E_ERROR("Problems reading ckpt file %s\n", fn);
	fclose(fp);

	return S3_ERROR;
    }

    fclose(fp);

    return corpus_set_interval(o, rl);
}
Example #4
0
static int
initialize(int argc,
	   char *argv[])
{
    const char *fdictfn;
    const char *dictfn;
    const char *ts2cbfn;
    uint32 n_ts;
    uint32 n_cb;

    /* define, parse and (partially) validate the command line */
    parse_cmd_ln(argc, argv);

    feat = 
        feat_init(cmd_ln_str("-feat"),
                  cmn_type_from_str(cmd_ln_str("-cmn")),
                  cmd_ln_boolean("-varnorm"),
                  agc_type_from_str(cmd_ln_str("-agc")),
                  1, cmd_ln_int32("-ceplen"));


    if (cmd_ln_str("-lda")) {
        E_INFO("Reading linear feature transformation from %s\n",
               cmd_ln_str("-lda"));
        if (feat_read_lda(feat,
                          cmd_ln_str("-lda"),
                          cmd_ln_int32("-ldadim")) < 0)
            return -1;
    }

    if (cmd_ln_str("-svspec")) {
        int32 **subvecs;
        E_INFO("Using subvector specification %s\n", 
               cmd_ln_str("-svspec"));
        if ((subvecs = parse_subvecs(cmd_ln_str("-svspec"))) == NULL)
            return -1;
        if ((feat_set_subvecs(feat, subvecs)) < 0)
            return -1;
    }

    if (cmd_ln_exists("-agcthresh")
        && 0 != strcmp(cmd_ln_str("-agc"), "none")) {
        agc_set_threshold(feat->agc_struct,
                          cmd_ln_float32("-agcthresh"));
    }

    if (feat->cmn_struct
        && cmd_ln_exists("-cmninit")) {
        char *c, *cc, *vallist;
        int32 nvals;

        vallist = ckd_salloc(cmd_ln_str("-cmninit"));
        c = vallist;
        nvals = 0;
        while (nvals < feat->cmn_struct->veclen
               && (cc = strchr(c, ',')) != NULL) {
            *cc = '\0';
            feat->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof(c));
            c = cc + 1;
            ++nvals;
        }
        if (nvals < feat->cmn_struct->veclen && *c != '\0') {
            feat->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof(c));
        }
        ckd_free(vallist);
    }


    if (cmd_ln_str("-segdir"))
	corpus_set_seg_dir(cmd_ln_str("-segdir"));
    if (cmd_ln_str("-segext"))
	corpus_set_seg_ext(cmd_ln_str("-segext"));

    corpus_set_mfcc_dir(cmd_ln_str("-cepdir"));
    corpus_set_mfcc_ext(cmd_ln_str("-cepext"));

    if (cmd_ln_str("-lsnfn"))
	corpus_set_lsn_filename(cmd_ln_str("-lsnfn"));

    corpus_set_ctl_filename(cmd_ln_str("-ctlfn"));

    if (cmd_ln_int32("-nskip") && cmd_ln_int32("-runlen")) {
        corpus_set_interval(cmd_ln_int32("-nskip"),
			    cmd_ln_int32("-runlen"));
    } else if (cmd_ln_int32("-part") && cmd_ln_int32("-npart")) {
	corpus_set_partition(cmd_ln_int32("-part"),
			     cmd_ln_int32("-npart"));
    }
    

    if (corpus_init() != S3_SUCCESS) {
	return S3_ERROR;
    }
    
    if (cmd_ln_str("-moddeffn")) {
	E_INFO("Reading %s\n", cmd_ln_str("-moddeffn"));
    
	/* Read in the model definitions.  Defines the set of
	   CI phones and context dependent phones.  Defines the
	   transition matrix tying and state level tying. */
	if (model_def_read(&mdef,
			   cmd_ln_str("-moddeffn")) != S3_SUCCESS) {
	    return S3_ERROR;
	}
	
	ts2cbfn = cmd_ln_str("-ts2cbfn");
	if (strcmp(SEMI_LABEL, ts2cbfn) == 0) {
	    mdef->cb = semi_ts2cb(mdef->n_tied_state);
	    n_ts = mdef->n_tied_state;
	    n_cb = 1;
	}
	else if (strcmp(CONT_LABEL, ts2cbfn) == 0) {
	    mdef->cb = cont_ts2cb(mdef->n_tied_state);
	    n_ts = mdef->n_tied_state;
	    n_cb = mdef->n_tied_state;
	}
	else if (strcmp(PTM_LABEL, ts2cbfn) == 0) {
	    mdef->cb = ptm_ts2cb(mdef);
	    n_ts = mdef->n_tied_state;
	    n_cb = mdef->acmod_set->n_ci;
	}
	else if (s3ts2cb_read(ts2cbfn,
			      &mdef->cb,
			      &n_ts,
			      &n_cb) != S3_SUCCESS) {
	    return S3_ERROR;
	}

	dictfn = cmd_ln_str("-dictfn");

	if (dictfn == NULL) {
	    E_FATAL("You must specify a content dictionary using -dictfn\n");
	}

	E_INFO("Reading %s\n", dictfn);
	
	lex = lexicon_read(NULL,	/* no lexicon to start */
			   dictfn,
			   mdef->acmod_set);
	if (lex == NULL)
	    return S3_ERROR;
    
	fdictfn = cmd_ln_str("-fdictfn");

	if (fdictfn) {
	    E_INFO("Reading %s\n", fdictfn);
	    
	    (void)lexicon_read(lex,	/* add filler words content lexicon */
			       fdictfn,
			       mdef->acmod_set);
	}
    }

    return S3_SUCCESS;
}
Example #5
0
File: main.c Project: 10v/cmusphinx
static int
initialize(lexicon_t **out_lex,
	   model_def_t **out_mdef,
	   int argc,
	   char *argv[])
{
    lexicon_t *lex = NULL;
    model_def_t *mdef = NULL;
    const char *fdictfn;
    const char *dictfn;
    const char *ts2cbfn;
    uint32 n_ts;
    uint32 n_cb;

    /* define, parse and (partially) validate the command line */
    parse_cmd_ln(argc, argv);

    if (cmd_ln_access("-feat") != NULL) {
	feat_set(cmd_ln_str("-feat"));
	feat_set_in_veclen(cmd_ln_int32("-ceplen"));
	feat_set_subvecs(cmd_ln_str("-svspec"));
    }
    else {
	E_ERROR("Specify the feature extraction algorithm using -feat\n");

	return S3_ERROR;
    }
    if (cmd_ln_access("-ldafn") != NULL) {
	if (feat_read_lda(cmd_ln_access("-ldafn"), cmd_ln_int32("-ldadim"))) {
	    E_FATAL("Failed to read LDA matrix\n");
	}
    }

    if (cmd_ln_access("-segdir"))
	corpus_set_seg_dir(cmd_ln_access("-segdir"));
    if (cmd_ln_access("-segext"))
	corpus_set_seg_ext(cmd_ln_access("-segext"));

    corpus_set_mfcc_dir(cmd_ln_access("-cepdir"));
    corpus_set_mfcc_ext(cmd_ln_access("-cepext"));

    if (cmd_ln_access("-lsnfn"))
	corpus_set_lsn_filename(cmd_ln_access("-lsnfn"));

    corpus_set_ctl_filename(cmd_ln_access("-ctlfn"));
    
    if ( cmd_ln_access("-nskip") && cmd_ln_access("-runlen") ) {
	corpus_set_interval(*(int32 *)cmd_ln_access("-nskip"),
			    *(int32 *)cmd_ln_access("-runlen"));
    }
    else if (cmd_ln_access("-part") && cmd_ln_access("-npart")) {
	corpus_set_partition(*(uint32 *)cmd_ln_access("-part"),
			     *(uint32 *)cmd_ln_access("-npart"));
    }
    

    if (corpus_init() != S3_SUCCESS) {
	return S3_ERROR;
    }
    
    if (cmd_ln_access("-moddeffn")) {
	E_INFO("Reading %s\n", cmd_ln_access("-moddeffn"));
    
	/* Read in the model definitions.  Defines the set of
	   CI phones and context dependent phones.  Defines the
	   transition matrix tying and state level tying. */
	if (model_def_read(&mdef,
			   cmd_ln_access("-moddeffn")) != S3_SUCCESS) {
	    return S3_ERROR;
	}
	
	ts2cbfn = (const char *)cmd_ln_access("-ts2cbfn");
	if (strcmp(SEMI_LABEL, ts2cbfn) == 0) {
	    mdef->cb = semi_ts2cb(mdef->n_tied_state);
	    n_ts = mdef->n_tied_state;
	    n_cb = 1;
	}
	else if (strcmp(CONT_LABEL, ts2cbfn) == 0) {
	    mdef->cb = cont_ts2cb(mdef->n_tied_state);
	    n_ts = mdef->n_tied_state;
	    n_cb = mdef->n_tied_state;
	}
	else if (strcmp(PTM_LABEL, ts2cbfn) == 0) {
	    mdef->cb = ptm_ts2cb(mdef);
	    n_ts = mdef->n_tied_state;
	    n_cb = mdef->acmod_set->n_ci;
	}
	else if (s3ts2cb_read(ts2cbfn,
			      &mdef->cb,
			      &n_ts,
			      &n_cb) != S3_SUCCESS) {
	    return S3_ERROR;
	}

	dictfn = cmd_ln_access("-dictfn");

	if (dictfn == NULL) {
	    E_FATAL("You must specify a content dictionary using -dictfn\n");
	}

	E_INFO("Reading %s\n", dictfn);
	
	lex = lexicon_read(NULL,	/* no lexicon to start */
			   dictfn,
			   mdef->acmod_set);
	if (lex == NULL)
	    return S3_ERROR;
    
	fdictfn = cmd_ln_access("-fdictfn");

	if (fdictfn) {
	    E_INFO("Reading %s\n", fdictfn);
	    
	    (void)lexicon_read(lex,	/* add filler words content lexicon */
			       fdictfn,
			       mdef->acmod_set);
	}
    }

    *out_mdef = mdef;
    *out_lex = lex;

    return S3_SUCCESS;
}