예제 #1
0
int
corpus_reset()
{
    lineiter_t* li;
    n_run = UNTIL_EOF;

    assert(ctl_fp);
    fseek(ctl_fp, 0L, SEEK_SET);

    if (transcription_fp)
        fseek(transcription_fp, 0L, SEEK_SET);

    li = lineiter_start_clean(ctl_fp);

    if (li == NULL) {
	E_ERROR("Must be at least one line in the control file\n");
	return S3_ERROR;
    }

    parse_ctl_line(li->buf,
		   &next_ctl_path,
		   &next_ctl_sf,
		   &next_ctl_ef,
		   &next_ctl_utt_id);
    lineiter_free (li);


    corpus_set_interval(sv_n_skip, sv_run_len);

    return S3_SUCCESS;
}
예제 #2
0
/*********************************************************************
 *
 * Function: corpus_set_ctl_filename
 * 
 * Description: 
 *    This routine sets the control file used to define the corpus.
 *    It has a side-effect of opening the control file.
 * 
 * Function Inputs: 
 *    const char *ctl_filename -
 * 	This is the file name of the control file.
 *
 * Global Inputs: 
 *    None
 *
 * Return Values: 
 *    S3_SUCCESS -
 *	Indicates the control file could be opened for reading.
 *
 *    S3_ERROR -
 *	Indicates some error occured while opening the control file.
 *
 * Global Outputs: 
 *    None
 *
 * Pre-Conditions: 
 *    ctl_filename argument must be a pointer to a C string.
 * 
 * Post-Conditions: 
 * 
 *********************************************************************/
int
corpus_set_ctl_filename(const char *ctl_filename)
{
    lineiter_t *li;
    ctl_fp = fopen(ctl_filename, "rb");

    if (ctl_fp == NULL) {
	E_ERROR_SYSTEM("Unable to open %s for reading",  ctl_filename);
	return S3_ERROR;
    }
    
    li = lineiter_start_clean(ctl_fp);

    if (li == NULL) {
	E_ERROR("Must be at least one line in the control file\n");
	return S3_ERROR;
    }

    parse_ctl_line(li->buf,
		   &next_ctl_path,
		   &next_ctl_sf,
		   &next_ctl_ef,
		   &next_ctl_utt_id);
    lineiter_free (li);
    
    return S3_SUCCESS;
}
예제 #3
0
static int
corpus_read_next_sent_file(char **trans)
{
    FILE *fp;
    lineiter_t *li;

    /* open the current file */
    fp = open_file_for_reading(DATA_TYPE_SENT);

    li = lineiter_start_clean(fp);
    if (li == NULL) {
	E_ERROR("Unable to read data in sent file %s\n",
		mk_filename(DATA_TYPE_SENT, cur_ctl_path));		
	return S3_ERROR;
    }

    *trans = strdup(li->buf);
    lineiter_free(li);
    fclose(fp);

    return S3_SUCCESS;
}
예제 #4
0
int32
model_def_read(model_def_t **out_model_def,
	       const char *file_name)
{
    lineiter_t *li = NULL;
    uint32 n;
    char tag[32];
    acmod_set_t *acmod_set;
    uint32 i, j;
    acmod_id_t acmod_id;
    uint32 tmat;
    uint32 n_state;
    uint32 n_tri;
    uint32 n_base;
    uint32 n_total_map;
    uint32 n_tied_state;
    uint32 n_tied_ci_state;
    uint32 n_tied_tmat;
    uint32 state[MAX_N_STATE];
    uint32 n_total;
    model_def_t *omd;
    model_def_entry_t *mdef;
    uint32 *all_state;
    uint32 max_tmat;
    uint32 max_state;
    uint32 max_ci_state;
    
    FILE *fp;

    fp = fopen(file_name, "r");
    if (fp == NULL) {
	E_WARN_SYSTEM("Unable to open %s for reading",
		      file_name);

	return S3_ERROR;
    }
    
    li = lineiter_start_clean(fp);

    if (li == NULL) {
	E_ERROR("ERROR not even a version number in %s!?\n",
		file_name);

	fclose(fp);

        lineiter_free(li);
	return S3_ERROR;
    }

    if (strcmp(li->buf, MODEL_DEF_VERSION) != 0) {
	E_ERROR("ERROR version(%s) == \"%s\", but expected %s at line %d.\n",
		file_name, li->buf, MODEL_DEF_VERSION, lineiter_lineno(li));

	fclose(fp);
	
	if (strcmp(li->buf, "0.1") == 0) {
	    E_ERROR("You must add an attribute field to all the model records.  See SPHINX-III File Formats manual\n");
	}
	
	if (strcmp(li->buf, "0.2") == 0) {
	    E_ERROR("You must add n_tied_state, n_tied_ci_state and n_tied_tmat definitions at the head of the file.  See /net/alf19/usr2/eht/s3/cvtmdef.csh\n");
	}
	
        lineiter_free(li);
	return S3_ERROR;
    }

    n_tri = n_base = n_total_map = n_tied_state = n_tied_ci_state = n_tied_tmat = NO_NUMBER;
    for ( i = 0; i < 6; i++) {
        li = lineiter_next(li);
        if (li == NULL) {
	    E_ERROR("Incomplete count information in %s!?\n",
		    file_name);
	    
	    fclose(fp);
            lineiter_free(li);
	    return S3_ERROR;
	}

	sscanf(li->buf, "%u %s", &n, tag);

	if (strcmp(tag, "n_base") == 0) {
	    n_base = n;
	}	
	else if (strcmp(tag, "n_tri") == 0) {
	    n_tri = n;
	}
	else if (strcmp(tag, "n_state_map") == 0) {
	    n_total_map = n;
	}
	else if (strcmp(tag, "n_tied_state") == 0) {
	    n_tied_state = n;
	}
	else if (strcmp(tag, "n_tied_ci_state") == 0) {
	    n_tied_ci_state = n;
	}
	else if (strcmp(tag, "n_tied_tmat") == 0) {
	    n_tied_tmat = n;
	}
	else {
	    E_ERROR("Unknown tag %s in file at line %d\n",
		    tag, lineiter_lineno(li));
	    	    
	    fclose(fp);

            lineiter_free(li);
	    return S3_ERROR;
	}
    }
    li = lineiter_next(li);

    *out_model_def = omd = ckd_calloc(1, sizeof(model_def_t));
    omd->acmod_set = acmod_set = acmod_set_new();

    /* give the acmod_set module some storage allocation requirements */
    acmod_set_set_n_ci_hint(acmod_set, n_base);
    acmod_set_set_n_tri_hint(acmod_set, n_tri);

    n_total = n_base + n_tri;

    omd->defn = mdef = ckd_calloc(n_total, sizeof(model_def_entry_t));
    omd->n_total_state = n_total_map;

    all_state = ckd_calloc(n_total_map, sizeof(uint32));
    
    omd->n_tied_ci_state = n_tied_ci_state;
    omd->n_tied_state = n_tied_state;
    omd->n_tied_tmat = n_tied_tmat;

    omd->max_n_state = 0;
    omd->min_n_state = MAX_N_STATE;

    for (i = 0, j = 0, max_state = 0, max_ci_state = 0, max_tmat = 0;
	 i < n_base; i++, j += n_state) {
	n_state = MAX_N_STATE;
	if (parse_base_line(li->buf,
			    lineiter_lineno(li),
	                    &acmod_id,
			    &tmat,
			    state,
			    &n_state,
			    acmod_set) != S3_SUCCESS) {

	    fclose(fp);
            lineiter_free(li);
            return S3_ERROR;
	}

	mdef[i].p = acmod_id;
	mdef[i].tmat = tmat;
	mdef[i].n_state = n_state;
	mdef[i].state = &all_state[j];

	memcpy((char *)mdef[i].state, (const char *)state,
	       n_state * sizeof(uint32));

	update_totals(omd, &mdef[i]);

	li = lineiter_next(li);
    }

    for (; i < n_total; i++, j += n_state) {
	n_state = MAX_N_STATE;

	if (parse_tri_line(li->buf,
			   lineiter_lineno(li),
			   &acmod_id,
			   &tmat,
			   state,
			   &n_state,
			   acmod_set) != S3_SUCCESS) {
	    fclose(fp);
            lineiter_free(li);
	    return S3_ERROR;
	}

	mdef[i].p = acmod_id;
	mdef[i].tmat = tmat;
	mdef[i].n_state = n_state;
	mdef[i].state = &all_state[j];
	memcpy((char *)mdef[i].state,
	       (const char *)state,
	       n_state * sizeof(uint32));

	update_totals(omd, &mdef[i]);
	li = lineiter_next(li);
    }

    omd->n_defn = n_total;

    assert(j == n_total_map);
    
    E_INFO("Model definition info:\n");
    E_INFO("%u total models defined (%u base, %u tri)\n", omd->n_defn, n_base, n_tri);
    E_INFO("%u total states\n", omd->n_total_state);
    E_INFO("%u total tied states\n", omd->n_tied_state);
    E_INFO("%u total tied CI states\n", omd->n_tied_ci_state);
    E_INFO("%u total tied transition matrices\n", omd->n_tied_tmat);
    E_INFO("%u max state/model\n", omd->max_n_state);
    E_INFO("%u min state/model\n", omd->min_n_state);

    fclose(fp);

    lineiter_free(li);
    return S3_SUCCESS;
}
예제 #5
0
파일: dtree.c 프로젝트: Ankit77/cmusphinx
dtree_t *
read_final_tree(FILE *fp,
		pset_t *pset,
		uint32 n_pset)
{
    dtree_t *out;
    dtree_node_t *node;
    uint32  n_node;
    char *s, str[128];
    lineiter_t *ln = NULL;
    uint32 n_scan;
    uint32 i, node_id, node_id_y, node_id_n;
    comp_quest_t *q;
    float64 ent;
    float32 occ;
    int err;

    out = ckd_calloc(1, sizeof(dtree_t));

    ln = lineiter_start_clean(fp);
    
    s = ln->buf;
    sscanf(s, "%s%n", str, &n_scan);
    if (strcmp(str, "n_node") == 0) {
	s += n_scan;
	sscanf(s, "%u", &n_node);
    }
    else {
	E_FATAL("Format error; expecting n_node\n");
    }

    out->n_node = n_node;
    out->node = node = ckd_calloc(n_node, sizeof(dtree_node_t));

    for (i = 0; i < n_node; i++)
	node[i].node_id = i;
    
    err = FALSE;
    
    while ((ln = lineiter_next(ln))) {
	s = ln->buf;

	sscanf(s, "%u%n", &node_id, &n_scan);
	s += n_scan;
	sscanf(s, "%s%n", str, &n_scan);
	s += n_scan;
	if (strcmp(str, "-") == 0) {
	    node_id_y = NO_ID;
	}
	else {
	    node_id_y = atoi(str);
	}
	sscanf(s, "%s%n", str, &n_scan);
	s += n_scan;
	if (strcmp(str, "-") == 0) {
	    node_id_n = NO_ID;
	}
	else {
	    node_id_n = atoi(str);
	}
	sscanf(s, "%le%n", &ent, &n_scan);
	s += n_scan;
	sscanf(s, "%e%n", &occ, &n_scan);
	s += n_scan;

	if ((node_id_y != NO_ID) && (node_id_y != NO_ID)) {
	    q = (comp_quest_t *)ckd_calloc(1, sizeof(comp_quest_t));
	    if (s3parse_comp_quest(pset, n_pset, q, s) != S3_SUCCESS) {
		err = TRUE;
	    }

	    node[node_id].q = q;
	}
	else
	    node[node_id].q = NULL;

	/* ck if internal node */
	if ((node_id_y != NO_ID) && (node_id_y != NO_ID))
	    node[node_id].wt_ent_dec = ent;
	else
	    node[node_id].wt_ent = ent;

	node[node_id].occ = occ;

	if ((node_id_y != NO_ID) && (node_id_y != NO_ID)) {
	    node[node_id].y = &node[node_id_y];
	    node[node_id].n = &node[node_id_n];
	    node[node_id_y].p = node[node_id_n].p = &node[node_id];
	}
	else {
	    node[node_id].y = NULL;
	    node[node_id].n = NULL;
	}
    }

    if (err == TRUE) {
	free_tree(out);
	out = NULL;
    }

    lineiter_free(ln);
    return out;
}
예제 #6
0
int
corpus_next_utt()
{
    lineiter_t *li;
    
    if (cur_ctl_path) {
	free(cur_ctl_path);
    }
    cur_ctl_path = next_ctl_path;

    if (cur_ctl_utt_id) {
	free(cur_ctl_utt_id);
	cur_ctl_utt_id = NULL;
    }
    cur_ctl_utt_id = next_ctl_utt_id;
    
    cur_ctl_sf = next_ctl_sf;
    cur_ctl_ef = next_ctl_ef;

    if (n_run != UNTIL_EOF) {
	if (n_run == 0) return FALSE;

	--n_run;
    }

    ++n_proc;

    if (cur_ctl_path == NULL || strlen(cur_ctl_path) == 0)
	return FALSE;

    /* if a big LSN file exists, position it to the correct line
     * corpus_set_ctl_filename() reads the first line of
     * the control file, so that transcription_fp is one line
     * behind ctl_fp. */
    if (transcription_fp) {
	lineiter_t *trans_li;
	trans_li = lineiter_start_clean(transcription_fp);
	if (trans_li == NULL) {
	    E_FATAL("File length mismatch at line %d in %s\n", n_proc, transcription_filename);
	}
	if (transcription_line)
	    free(transcription_line);
	transcription_line = strdup(trans_li->buf);
	lineiter_free(trans_li);
    }  

    li = lineiter_start_clean(ctl_fp);

    if (li != NULL) {
        parse_ctl_line(li->buf,
                       &next_ctl_path,
                       &next_ctl_sf,
                       &next_ctl_ef,
                       &next_ctl_utt_id);
        lineiter_free (li);
    } else {
        next_ctl_path = NULL;
        next_ctl_sf = NO_FRAME;
        next_ctl_ef = NO_FRAME;
        next_ctl_utt_id = NULL;
    }

    return TRUE;
}
예제 #7
0
ngram_model_t *
ngram_model_trie_read_arpa(cmd_ln_t * config,
                           const char *path, logmath_t * lmath)
{
    FILE *fp;
    lineiter_t *li;
    ngram_model_trie_t *model;
    ngram_model_t *base;
    ngram_raw_t **raw_ngrams;
    int32 is_pipe;
    uint32 counts[NGRAM_MAX_ORDER];
    uint32 fixed_counts[NGRAM_MAX_ORDER];
    int order;
    int i;

    E_INFO("Trying to read LM in arpa format\n");
    if ((fp = fopen_comp(path, "r", &is_pipe)) == NULL) {
        E_ERROR("File %s not found\n", path);
        return NULL;
    }

    model = (ngram_model_trie_t *) ckd_calloc(1, sizeof(*model));
    li = lineiter_start_clean(fp);
    /* Read n-gram counts from file */
    if (read_counts_arpa(&li, counts, &order) == -1) {
        ckd_free(model);
        lineiter_free(li);
        fclose_comp(fp, is_pipe);
        return NULL;
    }

    E_INFO("LM of order %d\n", order);
    for (i = 0; i < order; i++) {
        E_INFO("#%d-grams: %d\n", i + 1, counts[i]);
    }

    base = &model->base;
    ngram_model_init(base, &ngram_model_trie_funcs, lmath, order,
                     (int32) counts[0]);
    base->writable = TRUE;

    model->trie = lm_trie_create(counts[0], order);
    if (read_1grams_arpa(&li, counts[0], base, model->trie->unigrams) < 0) {
        ckd_free(model);
        lineiter_free(li);
        fclose_comp(fp, is_pipe);
        return NULL;
    }

    if (order > 1) {
        raw_ngrams =
            ngrams_raw_read_arpa(&li, base->lmath, counts, order,
                                 base->wid);
        ngrams_raw_fix_counts(raw_ngrams, counts, fixed_counts, order);
        for (i = 0; i < order; i++) {
            base->n_counts[i] = fixed_counts[i];
        }
        lm_trie_alloc_ngram(model->trie, fixed_counts, order);
        lm_trie_build(model->trie, raw_ngrams, counts, order);
        ngrams_raw_free(raw_ngrams, counts, order);
    }

    lineiter_free(li);
    fclose_comp(fp, is_pipe);

    return base;
}
예제 #8
0
/*********************************************************************
 *
 * Function: 
 *	topo_read
 * 
 * Description: 
 * 	This routine reads an ASCII transition matrix which may then be
 *	used to determine the topology of the models used in the system.
 *
 * Traceability: 
 * 
 * Function Inputs: 
 * 
 * Global Inputs: 
 *	None
 * 
 * Return Values: 
 *	S3_SUCCESS is returned upon successful completion
 *	S3_ERROR is returned upon an error condition
 * 
 * Global Outputs: 
 *	None
 * 
 * Errors: 
 * 
 * Pre-Conditions: 
 * 
 * Post-Conditions: 
 * 
 * Design: 
 * 
 * Notes: 
 * 
 *********************************************************************/
int32
topo_read(float32 ***tmat,
	  uint32 *n_state_pm,
	  const char *topo_file_name)
{
    float32 **out;
    FILE *fp;
    lineiter_t *li = NULL;
    uint32 n_state;
    uint32 i, j;
    float32 row_sum;

    assert(topo_file_name != NULL);

    fp = fopen(topo_file_name, "r");
    if (fp == NULL) {
	E_ERROR_SYSTEM("Unable to open %s for reading\n", topo_file_name);

	goto error;
    }
    
    li = lineiter_start_clean(fp);
    
    if (li == NULL) {
	E_ERROR("EOF encounted while reading version number in %s!?\n", topo_file_name);

	goto error;
    }

    if (strcmp(li->buf, TOPO_FILE_VERSION) != 0) {
	E_ERROR("Topo file version in %s is %s.  Expected %s\n",
		topo_file_name, li->buf, TOPO_FILE_VERSION);

	goto error;
    }

    li = lineiter_next(li);
    if (li == NULL) {
	E_ERROR("EOF encountered while reading n_state in %s!?\n", topo_file_name);

	goto error;
    }

    sscanf(li->buf, "%d\n", &n_state);

    /* Support Request 1504066: robust reading of topo file in
       SphinxTrain
	   
       When user put 
       0.1
       1.0 1.0 1.0 0.0
       1.0 1.0 1.0 0.0
       1.0 1.0 1.0 1.0 
       
       instead of 
       
       0.1
       4
       1.0 1.0 1.0 0.0
       1.0 1.0 1.0 0.0
       1.0 1.0 1.0 1.0
       
       topo_read will misread 1.0 into n_state as 1.  And the 
       generated transition matrix will corrupt bw as well. This 
       problem is now fixed. 
    */

    if(n_state==1) {
        E_ERROR("n_state =1, if you are using a transition matrix with more than 1 state, this error might show that there is format issue in your input topology file.  You are recommended to use perl/make_topology.pl to generate the topo file instead.\n");
	goto error;
    }

    out = (float **)ckd_calloc_2d(n_state-1, n_state, sizeof(float32));

    for (i = 0; i < n_state-1; i++) {
	row_sum = 0.0;
	for (j = 0; j < n_state; j++) {
	    fscanf(fp, "%f", &out[i][j]);
	    row_sum += out[i][j];
	}
	for (j = 0; j < n_state; j++) {
	    out[i][j] /= row_sum;
	}
    }
    
    *tmat = out;
    *n_state_pm = n_state;

    fclose(fp);
    lineiter_free(li);
    return S3_SUCCESS;

error:    
    if (fp) fclose(fp);
    lineiter_free(li);
    return S3_ERROR;
}