/********************************************************************* * * Function: corpus_set_partition * * Description: * This function allows one to specify a set R of a partition of * the corpus into S (roughly) equal sized partitions. * * Function Inputs: * uint32 r - * This argument selects the Rth OF_S sets (R runs from 1..OF_S) * * uint32 of_s - * The number of total (roughly equal sized) sets in the partition. * * Global Inputs: * None * * Return Values: * S3_SUCCESS - Operation completed successfully * S3_ERROR - Operation did not complete successfully * * Global Outputs: * None * *********************************************************************/ int corpus_set_partition(uint32 part, uint32 parts) { uint32 run_len; uint32 n_skip; int lineno = 0; lineiter_t* li; if (ctl_fp == NULL) { E_ERROR("Control file has not been set\n"); return S3_ERROR; } for (li = lineiter_start(ctl_fp); li; li = lineiter_next(li)) { lineno++; } fseek(ctl_fp, 0L, SEEK_SET); li = lineiter_start(ctl_fp); lineiter_free(li); run_len = lineno / parts; n_skip = (part - 1) * run_len; if (part == parts) run_len = UNTIL_EOF; return corpus_set_interval(n_skip, run_len); }
int corpus_reset() { lineiter_t* li; n_run = UNTIL_EOF; assert(ctl_fp); fseek(ctl_fp, 0L, SEEK_SET); if (transcription_fp) fseek(transcription_fp, 0L, SEEK_SET); li = lineiter_start_clean(ctl_fp); if (li == NULL) { E_ERROR("Must be at least one line in the control file\n"); return S3_ERROR; } parse_ctl_line(li->buf, &next_ctl_path, &next_ctl_sf, &next_ctl_ef, &next_ctl_utt_id); lineiter_free (li); corpus_set_interval(sv_n_skip, sv_run_len); return S3_SUCCESS; }
int corpus_ckpt_set_interval(const char *fn) { FILE *fp; uint32 o, rl; fp = fopen(fn, "r"); if (fp == NULL) { E_ERROR_SYSTEM("Can't open ckpt file %s", fn); return S3_ERROR; } if (fscanf(fp, "%u %u", &o, &rl) != 2) { E_ERROR("Problems reading ckpt file %s\n", fn); fclose(fp); return S3_ERROR; } fclose(fp); return corpus_set_interval(o, rl); }
static int initialize(int argc, char *argv[]) { const char *fdictfn; const char *dictfn; const char *ts2cbfn; uint32 n_ts; uint32 n_cb; /* define, parse and (partially) validate the command line */ parse_cmd_ln(argc, argv); feat = feat_init(cmd_ln_str("-feat"), cmn_type_from_str(cmd_ln_str("-cmn")), cmd_ln_boolean("-varnorm"), agc_type_from_str(cmd_ln_str("-agc")), 1, cmd_ln_int32("-ceplen")); if (cmd_ln_str("-lda")) { E_INFO("Reading linear feature transformation from %s\n", cmd_ln_str("-lda")); if (feat_read_lda(feat, cmd_ln_str("-lda"), cmd_ln_int32("-ldadim")) < 0) return -1; } if (cmd_ln_str("-svspec")) { int32 **subvecs; E_INFO("Using subvector specification %s\n", cmd_ln_str("-svspec")); if ((subvecs = parse_subvecs(cmd_ln_str("-svspec"))) == NULL) return -1; if ((feat_set_subvecs(feat, subvecs)) < 0) return -1; } if (cmd_ln_exists("-agcthresh") && 0 != strcmp(cmd_ln_str("-agc"), "none")) { agc_set_threshold(feat->agc_struct, cmd_ln_float32("-agcthresh")); } if (feat->cmn_struct && cmd_ln_exists("-cmninit")) { char *c, *cc, *vallist; int32 nvals; vallist = ckd_salloc(cmd_ln_str("-cmninit")); c = vallist; nvals = 0; while (nvals < feat->cmn_struct->veclen && (cc = strchr(c, ',')) != NULL) { *cc = '\0'; feat->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof(c)); c = cc + 1; ++nvals; } if (nvals < feat->cmn_struct->veclen && *c != '\0') { feat->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof(c)); } ckd_free(vallist); } if (cmd_ln_str("-segdir")) corpus_set_seg_dir(cmd_ln_str("-segdir")); if (cmd_ln_str("-segext")) corpus_set_seg_ext(cmd_ln_str("-segext")); corpus_set_mfcc_dir(cmd_ln_str("-cepdir")); corpus_set_mfcc_ext(cmd_ln_str("-cepext")); if (cmd_ln_str("-lsnfn")) corpus_set_lsn_filename(cmd_ln_str("-lsnfn")); corpus_set_ctl_filename(cmd_ln_str("-ctlfn")); if (cmd_ln_int32("-nskip") && cmd_ln_int32("-runlen")) { corpus_set_interval(cmd_ln_int32("-nskip"), cmd_ln_int32("-runlen")); } else if (cmd_ln_int32("-part") && cmd_ln_int32("-npart")) { corpus_set_partition(cmd_ln_int32("-part"), cmd_ln_int32("-npart")); } if (corpus_init() != S3_SUCCESS) { return S3_ERROR; } if (cmd_ln_str("-moddeffn")) { E_INFO("Reading %s\n", cmd_ln_str("-moddeffn")); /* Read in the model definitions. Defines the set of CI phones and context dependent phones. Defines the transition matrix tying and state level tying. */ if (model_def_read(&mdef, cmd_ln_str("-moddeffn")) != S3_SUCCESS) { return S3_ERROR; } ts2cbfn = cmd_ln_str("-ts2cbfn"); if (strcmp(SEMI_LABEL, ts2cbfn) == 0) { mdef->cb = semi_ts2cb(mdef->n_tied_state); n_ts = mdef->n_tied_state; n_cb = 1; } else if (strcmp(CONT_LABEL, ts2cbfn) == 0) { mdef->cb = cont_ts2cb(mdef->n_tied_state); n_ts = mdef->n_tied_state; n_cb = mdef->n_tied_state; } else if (strcmp(PTM_LABEL, ts2cbfn) == 0) { mdef->cb = ptm_ts2cb(mdef); n_ts = mdef->n_tied_state; n_cb = mdef->acmod_set->n_ci; } else if (s3ts2cb_read(ts2cbfn, &mdef->cb, &n_ts, &n_cb) != S3_SUCCESS) { return S3_ERROR; } dictfn = cmd_ln_str("-dictfn"); if (dictfn == NULL) { E_FATAL("You must specify a content dictionary using -dictfn\n"); } E_INFO("Reading %s\n", dictfn); lex = lexicon_read(NULL, /* no lexicon to start */ dictfn, mdef->acmod_set); if (lex == NULL) return S3_ERROR; fdictfn = cmd_ln_str("-fdictfn"); if (fdictfn) { E_INFO("Reading %s\n", fdictfn); (void)lexicon_read(lex, /* add filler words content lexicon */ fdictfn, mdef->acmod_set); } } return S3_SUCCESS; }
static int initialize(lexicon_t **out_lex, model_def_t **out_mdef, int argc, char *argv[]) { lexicon_t *lex = NULL; model_def_t *mdef = NULL; const char *fdictfn; const char *dictfn; const char *ts2cbfn; uint32 n_ts; uint32 n_cb; /* define, parse and (partially) validate the command line */ parse_cmd_ln(argc, argv); if (cmd_ln_access("-feat") != NULL) { feat_set(cmd_ln_str("-feat")); feat_set_in_veclen(cmd_ln_int32("-ceplen")); feat_set_subvecs(cmd_ln_str("-svspec")); } else { E_ERROR("Specify the feature extraction algorithm using -feat\n"); return S3_ERROR; } if (cmd_ln_access("-ldafn") != NULL) { if (feat_read_lda(cmd_ln_access("-ldafn"), cmd_ln_int32("-ldadim"))) { E_FATAL("Failed to read LDA matrix\n"); } } if (cmd_ln_access("-segdir")) corpus_set_seg_dir(cmd_ln_access("-segdir")); if (cmd_ln_access("-segext")) corpus_set_seg_ext(cmd_ln_access("-segext")); corpus_set_mfcc_dir(cmd_ln_access("-cepdir")); corpus_set_mfcc_ext(cmd_ln_access("-cepext")); if (cmd_ln_access("-lsnfn")) corpus_set_lsn_filename(cmd_ln_access("-lsnfn")); corpus_set_ctl_filename(cmd_ln_access("-ctlfn")); if ( cmd_ln_access("-nskip") && cmd_ln_access("-runlen") ) { corpus_set_interval(*(int32 *)cmd_ln_access("-nskip"), *(int32 *)cmd_ln_access("-runlen")); } else if (cmd_ln_access("-part") && cmd_ln_access("-npart")) { corpus_set_partition(*(uint32 *)cmd_ln_access("-part"), *(uint32 *)cmd_ln_access("-npart")); } if (corpus_init() != S3_SUCCESS) { return S3_ERROR; } if (cmd_ln_access("-moddeffn")) { E_INFO("Reading %s\n", cmd_ln_access("-moddeffn")); /* Read in the model definitions. Defines the set of CI phones and context dependent phones. Defines the transition matrix tying and state level tying. */ if (model_def_read(&mdef, cmd_ln_access("-moddeffn")) != S3_SUCCESS) { return S3_ERROR; } ts2cbfn = (const char *)cmd_ln_access("-ts2cbfn"); if (strcmp(SEMI_LABEL, ts2cbfn) == 0) { mdef->cb = semi_ts2cb(mdef->n_tied_state); n_ts = mdef->n_tied_state; n_cb = 1; } else if (strcmp(CONT_LABEL, ts2cbfn) == 0) { mdef->cb = cont_ts2cb(mdef->n_tied_state); n_ts = mdef->n_tied_state; n_cb = mdef->n_tied_state; } else if (strcmp(PTM_LABEL, ts2cbfn) == 0) { mdef->cb = ptm_ts2cb(mdef); n_ts = mdef->n_tied_state; n_cb = mdef->acmod_set->n_ci; } else if (s3ts2cb_read(ts2cbfn, &mdef->cb, &n_ts, &n_cb) != S3_SUCCESS) { return S3_ERROR; } dictfn = cmd_ln_access("-dictfn"); if (dictfn == NULL) { E_FATAL("You must specify a content dictionary using -dictfn\n"); } E_INFO("Reading %s\n", dictfn); lex = lexicon_read(NULL, /* no lexicon to start */ dictfn, mdef->acmod_set); if (lex == NULL) return S3_ERROR; fdictfn = cmd_ln_access("-fdictfn"); if (fdictfn) { E_INFO("Reading %s\n", fdictfn); (void)lexicon_read(lex, /* add filler words content lexicon */ fdictfn, mdef->acmod_set); } } *out_mdef = mdef; *out_lex = lex; return S3_SUCCESS; }