Esempio n. 1
0
int
main(int argc, char *argv[])
{
	bin_mdef_t *mdef;
	dict_t *dict;
	cmd_ln_t *config;

	int i;
	char buf[100];

	TEST_ASSERT(config = cmd_ln_init(NULL, NULL, FALSE,
						   "-dict", MODELDIR "/en-us/cmudict-en-us.dict",
						   "-fdict", MODELDIR "/en-us/en-us/noisedict",
						   NULL));

	/* Test dictionary in standard fashion. */
	TEST_ASSERT(mdef = bin_mdef_read(NULL, MODELDIR "/en-us/en-us/mdef"));
	TEST_ASSERT(dict = dict_init(config, mdef, NULL));

	printf("Word ID (CARNEGIE) = %d\n",
	       dict_wordid(dict, "CARNEGIE"));
	printf("Word ID (ASDFASFASSD) = %d\n",
	       dict_wordid(dict, "ASDFASFASSD"));

	TEST_EQUAL(0, dict_write(dict, "_cmu07a.dic", NULL));
	TEST_EQUAL(0, system("diff -uw " MODELDIR "/en-us/cmudict-en-us.dict _cmu07a.dic"));

	dict_free(dict);
	bin_mdef_free(mdef);

	/* Now test an empty dictionary. */
	TEST_ASSERT(dict = dict_init(NULL, NULL, NULL));
	printf("Word ID(<s>) = %d\n", dict_wordid(dict, "<s>"));
	TEST_ASSERT(BAD_S3WID != dict_add_word(dict, "FOOBIE", NULL, 0));
	TEST_ASSERT(BAD_S3WID != dict_add_word(dict, "BLETCH", NULL, 0));
	printf("Word ID(FOOBIE) = %d\n", dict_wordid(dict, "FOOBIE"));
	printf("Word ID(BLETCH) = %d\n", dict_wordid(dict, "BLETCH"));
	TEST_ASSERT(dict_real_word(dict, dict_wordid(dict, "FOOBIE")));
	TEST_ASSERT(dict_real_word(dict, dict_wordid(dict, "BLETCH")));
	TEST_ASSERT(!dict_real_word(dict, dict_wordid(dict, "</s>")));
	dict_free(dict);

	/* Test to add 500k words. */
	TEST_ASSERT(dict = dict_init(NULL, NULL, NULL));
	for (i = 0; i < 500000; i++) {
	    sprintf(buf, "word_%d", i);
    	    TEST_ASSERT(BAD_S3WID != dict_add_word(dict, buf, NULL, 0));
	}
	dict_free(dict);

	cmd_ln_free_r(config);

	return 0;
}
Esempio n. 2
0
int
ps_add_word(ps_decoder_t *ps,
            char const *word,
            char const *phones,
            int update)
{
    int32 wid, lmwid;
    ngram_model_t *lmset;
    s3cipid_t *pron;
    char **phonestr, *tmp;
    int np, i, rv;

    /* Parse phones into an array of phone IDs. */
    tmp = ckd_salloc(phones);
    np = str2words(tmp, NULL, 0);
    phonestr = ckd_calloc(np, sizeof(*phonestr));
    str2words(tmp, phonestr, np);
    pron = ckd_calloc(np, sizeof(*pron));
    for (i = 0; i < np; ++i) {
        pron[i] = bin_mdef_ciphone_id(ps->acmod->mdef, phonestr[i]);
        if (pron[i] == -1) {
            E_ERROR("Unknown phone %s in phone string %s\n",
                    phonestr[i], tmp);
            ckd_free(phonestr);
            ckd_free(tmp);
            ckd_free(pron);
            return -1;
        }
    }
    /* No longer needed. */
    ckd_free(phonestr);
    ckd_free(tmp);

    /* Add it to the dictionary. */
    if ((wid = dict_add_word(ps->dict, word, pron, np)) == -1) {
        ckd_free(pron);
        return -1;
    }
    /* No longer needed. */
    ckd_free(pron);

    /* Now we also have to add it to dict2pid. */
    dict2pid_add_word(ps->d2p, wid);

    if ((lmset = ps_get_lmset(ps)) != NULL) {
        /* Add it to the LM set (meaning, the current LM).  In a perfect
         * world, this would result in the same WID, but because of the
         * weird way that word IDs are handled, it doesn't. */
        if ((lmwid = ngram_model_add_word(lmset, word, 1.0))
            == NGRAM_INVALID_WID)
            return -1;
    }
 
    /* Rebuild the widmap and search tree if requested. */
    if (update) {
        if ((rv = ps_search_reinit(ps->search, ps->dict, ps->d2p) < 0))
            return rv;
    }
    return wid;
}
Esempio n. 3
0
File: dict.c Progetto: 10v/cmusphinx
static int32 dict_read (FILE *fp, dict_t *d)
{
    char line[16384], **wptr;
    s3cipid_t p[4096];
    int32 lineno, nwd;
    s3wid_t w;
    int32 i, maxwd;
    
    maxwd = 4092;
    wptr = (char **) ckd_calloc (maxwd, sizeof(char *));
    
    lineno = 0;
    while (fgets (line, sizeof(line), fp) != NULL) {
        lineno++;
	if (line[0] == '#')	/* Comment line */
	    continue;
	
	if ((nwd = str2words (line, wptr, maxwd)) < 0)
	    E_FATAL("str2words(%s) failed; Increase maxwd from %d\n", line, maxwd);

	if (nwd == 0)	    /* Empty line */
	    continue;
	/* wptr[0] is the word-string and wptr[1..nwd-1] the pronunciation sequence */
	if (nwd == 1) {
	    E_ERROR("Line %d: No pronunciation for word %s; ignored\n", lineno, wptr[0]);
	    continue;
	}
	
	/* Convert pronunciation string to CI-phone-ids */
	for (i = 1; i < nwd; i++) {
	    p[i-1] = dict_ciphone_id (d, wptr[i]);
	    if (NOT_CIPID(p[i-1])) {
		E_ERROR("Line %d: Bad ciphone: %s; word %s ignored\n",
			lineno, wptr[i], wptr[0]);
		break;
	    }
	}
	
	if (i == nwd) {	/* All CI-phones successfully converted to IDs */
	    w = dict_add_word (d, wptr[0], p, nwd-1);
	    if (NOT_WID(w))
		E_ERROR("Line %d: dict_add_word (%s) failed (duplicate?); ignored\n",
			lineno, wptr[0]);
	}
    }

    ckd_free (wptr);
    
    return 0;
}
Esempio n. 4
0
static void homfile_load (char *file)
{
    FILE *fp;
    char line[16380], w1[4096], w2[4096];
    int32 k, n;
    s3wid_t wid1, wid2;
    s3cipid_t ci[1];
    hom_t *h;
    
    E_INFO("Reading homophones file %s\n", file);
    if ((fp = fopen(file, "r")) == NULL)
	E_FATAL("fopen(%s,r) failed\n", file);
    
    ci[0] = (s3cipid_t) 0;	/* Dummy */
    
    n = 0;
    while (fgets (line, sizeof(line), fp) != NULL) {
	if ((k = sscanf (line, "%s %s", w1, w2)) == 2) {
	    wid1 = dict_wordid (dict, w1);
	    if (NOT_WID(wid1)) {
		E_INFO("Adding %s to dictionary\n", w1);
		wid1 = dict_add_word (dict, w1, ci, 1);
		if (NOT_WID(wid1))
		    E_FATAL("dict_add_word(%s) failed\n", w1);
	    }
	    
	    wid2 = dict_wordid (dict, w2);
	    if ((NOT_WID(wid2)) || (wid2 >= oovbegin))
		E_FATAL("%s not in dictionary\n", w2);

	    h = (hom_t *) listelem_alloc (sizeof(hom_t));
	    h->w1 = wid1;
	    h->w2 = wid2;
	    h->next = homlist;
	    homlist = h;
	    
	    n++;
	} else
	    E_FATAL("Bad homophones line: %s\n", line);
    }
    
    E_INFO("%d homophone pairs read\n", n);
    
    fclose (fp);
}
Esempio n. 5
0
int32 line2wid (dict_t *dict, char *line, s3wid_t *wid, int32 max_n_wid, int32 add_oov,
		char *uttid)
{
    char *lp, word[1024];
    int32 n, k;
    s3wid_t w;
    s3cipid_t ci[1];
    
    uttid[0] = '\0';
    ci[0] = (s3cipid_t) 0;
    
    lp = line;
    n = 0;
    while (sscanf (lp, "%s%n", word, &k) == 1) {
	lp += k;

	if (n >= max_n_wid)
	    return -n;
	
	if (is_uttid (word, uttid))
	    break;
	
	wid[n] = dict_wordid (dict, word);	/* Up to caller to handle BAD_WIDs */
	if (NOT_WID(wid[n])) {
	    /* OOV word */
	    if (add_oov) {
		E_INFO("Adding %s to dictionary\n", word);
		wid[n] = dict_add_word (dict, word, NULL, 0);
		if (NOT_WID(wid[n]))
		    E_FATAL("dict_add_word(%s) failed for line: %s\n", word, line);
	    } else
		E_FATAL("Unknown word (%s) in line: %s\n", word, line);
	}
	
	n++;
    }
    
    if (sscanf (lp, "%s", word) == 1)	/* Check that line really ended */
	E_WARN("Nonempty data ignored after uttid(%s) in line: %s\n", uttid, line);
    
    return n;
}
Esempio n. 6
0
int
dict_add_g2p_word(dict_t * dict, char const *word)
{
    int32 wid = 0;
    s3cipid_t *pron;
    char **phonestr, *tmp;
    int np, i;
    char *phones;

    phones = dict_g2p(word, dict->ngram_g2p_model);
    if (phones == NULL)
        return 0;

    E_INFO("Adding phone %s for word %s \n",  phones, word);
    tmp = ckd_salloc(phones);
    np = str2words(tmp, NULL, 0);
    phonestr = ckd_calloc(np, sizeof(*phonestr));
    str2words(tmp, phonestr, np);
    pron = ckd_calloc(np, sizeof(*pron));
    for (i = 0; i < np; ++i) {
        pron[i] = bin_mdef_ciphone_id(dict->mdef, phonestr[i]);
        if (pron[i] == -1) {
            E_ERROR("Unknown phone %s in phone string %s\n",
                    phonestr[i], tmp);
            ckd_free(phonestr);
            ckd_free(tmp);
            ckd_free(pron);
            ckd_free(phones);
            return -1;
        }
    }
    ckd_free(phonestr);
    ckd_free(tmp);
    ckd_free(phones);
    if ((wid = dict_add_word(dict, word, pron, np)) == -1) {
        ckd_free(pron);
        return -1;
    }
    ckd_free(pron);

    return wid;
}
Esempio n. 7
0
int
ps_add_word(ps_decoder_t *ps,
            char const *word,
            char const *phones,
            int update)
{
    int32 wid;
    s3cipid_t *pron;
    hash_iter_t *search_it;
    char **phonestr, *tmp;
    int np, i, rv;

    /* Parse phones into an array of phone IDs. */
    tmp = ckd_salloc(phones);
    np = str2words(tmp, NULL, 0);
    phonestr = ckd_calloc(np, sizeof(*phonestr));
    str2words(tmp, phonestr, np);
    pron = ckd_calloc(np, sizeof(*pron));
    for (i = 0; i < np; ++i) {
        pron[i] = bin_mdef_ciphone_id(ps->acmod->mdef, phonestr[i]);
        if (pron[i] == -1) {
            E_ERROR("Unknown phone %s in phone string %s\n",
                    phonestr[i], tmp);
            ckd_free(phonestr);
            ckd_free(tmp);
            ckd_free(pron);
            return -1;
        }
    }
    /* No longer needed. */
    ckd_free(phonestr);
    ckd_free(tmp);

    /* Add it to the dictionary. */
    if ((wid = dict_add_word(ps->dict, word, pron, np)) == -1) {
        ckd_free(pron);
        return -1;
    }
    /* No longer needed. */
    ckd_free(pron);

    /* Now we also have to add it to dict2pid. */
    dict2pid_add_word(ps->d2p, wid);

    /* TODO: we definitely need to refactor this */
    for (search_it = hash_table_iter(ps->searches); search_it;
         search_it = hash_table_iter_next(search_it)) {
        ps_search_t *search = hash_entry_val(search_it->ent);
        if (!strcmp(PS_SEARCH_NGRAM, ps_search_name(search))) {
            ngram_model_t *lmset = ((ngram_search_t *) search)->lmset;
            if (ngram_model_add_word(lmset, word, 1.0) == NGRAM_INVALID_WID) {
                hash_table_iter_free(search_it);
                return -1;
            }
        }

        if (update) {
            if ((rv = ps_search_reinit(search, ps->dict, ps->d2p) < 0)) {
                hash_table_iter_free(search_it);
                return rv;
            }
        }
    }

    /* Rebuild the widmap and search tree if requested. */
    return wid;
}
Esempio n. 8
0
dict_t *
dict_init(cmd_ln_t *config, bin_mdef_t * mdef)
{
    FILE *fp, *fp2;
    int32 n;
    lineiter_t *li;
    dict_t *d;
    s3cipid_t sil;
    char const *dictfile = NULL, *fillerfile = NULL;

    if (config) {
        dictfile = cmd_ln_str_r(config, "-dict");
        fillerfile = cmd_ln_str_r(config, "-fdict");
    }

    /*
     * First obtain #words in dictionary (for hash table allocation).
     * Reason: The PC NT system doesn't like to grow memory gradually.  Better to allocate
     * all the required memory in one go.
     */
    fp = NULL;
    n = 0;
    if (dictfile) {
        if ((fp = fopen(dictfile, "r")) == NULL)
            E_FATAL_SYSTEM("Failed to open dictionary file '%s' for reading", dictfile);
        for (li = lineiter_start(fp); li; li = lineiter_next(li)) {
            if (li->buf[0] != '#')
                n++;
        }
        rewind(fp);
    }

    fp2 = NULL;
    if (fillerfile) {
        if ((fp2 = fopen(fillerfile, "r")) == NULL)
            E_FATAL_SYSTEM("Failed to open filler dictionary file '%s' for reading", fillerfile);
        for (li = lineiter_start(fp2); li; li = lineiter_next(li)) {
            if (li->buf[0] != '#')
                n++;
        }
        rewind(fp2);
    }

    /*
     * Allocate dict entries.  HACK!!  Allow some extra entries for words not in file.
     * Also check for type size restrictions.
     */
    d = (dict_t *) ckd_calloc(1, sizeof(dict_t));       /* freed in dict_free() */
    d->refcnt = 1;
    d->max_words =
        (n + S3DICT_INC_SZ < MAX_S3WID) ? n + S3DICT_INC_SZ : MAX_S3WID;
    if (n >= MAX_S3WID)
        E_FATAL("#Words in dictionaries (%d) exceeds limit (%d)\n", n,
                MAX_S3WID);

    E_INFO("Allocating %d * %d bytes (%d KiB) for word entries\n",
           d->max_words, sizeof(dictword_t),
           d->max_words * sizeof(dictword_t) / 1024);
    d->word = (dictword_t *) ckd_calloc(d->max_words, sizeof(dictword_t));      /* freed in dict_free() */
    d->n_word = 0;
    if (mdef)
        d->mdef = bin_mdef_retain(mdef);

    /* Create new hash table for word strings; case-insensitive word strings */
    if (config && cmd_ln_exists_r(config, "-dictcase"))
        d->nocase = cmd_ln_boolean_r(config, "-dictcase");
    d->ht = hash_table_new(d->max_words, d->nocase);

    /* Digest main dictionary file */
    if (fp) {
        E_INFO("Reading main dictionary: %s\n", dictfile);
        dict_read(fp, d);
        fclose(fp);
        E_INFO("%d words read\n", d->n_word);
    }

    /* Now the filler dictionary file, if it exists */
    d->filler_start = d->n_word;
    if (fillerfile) {
        E_INFO("Reading filler dictionary: %s\n", fillerfile);
        dict_read(fp2, d);
        fclose(fp2);
        E_INFO("%d words read\n", d->n_word - d->filler_start);
    }
    if (mdef)
        sil = bin_mdef_silphone(mdef);
    else
        sil = 0;
    if (dict_wordid(d, S3_START_WORD) == BAD_S3WID) {
        dict_add_word(d, S3_START_WORD, &sil, 1);
    }
    if (dict_wordid(d, S3_FINISH_WORD) == BAD_S3WID) {
        dict_add_word(d, S3_FINISH_WORD, &sil, 1);
    }
    if (dict_wordid(d, S3_SILENCE_WORD) == BAD_S3WID) {
        dict_add_word(d, S3_SILENCE_WORD, &sil, 1);
    }

    d->filler_end = d->n_word - 1;

    /* Initialize distinguished word-ids */
    d->startwid = dict_wordid(d, S3_START_WORD);
    d->finishwid = dict_wordid(d, S3_FINISH_WORD);
    d->silwid = dict_wordid(d, S3_SILENCE_WORD);

    if ((d->filler_start > d->filler_end)
        || (!dict_filler_word(d, d->silwid)))
        E_FATAL("%s must occur (only) in filler dictionary\n",
                S3_SILENCE_WORD);

    /* No check that alternative pronunciations for filler words are in filler range!! */

    return d;
}
Esempio n. 9
0
static int32
dict_read(FILE * fp, dict_t * d)
{
    lineiter_t *li;
    char **wptr;
    s3cipid_t *p;
    int32 lineno, nwd;
    s3wid_t w;
    int32 i, maxwd;
    size_t stralloc, phnalloc;

    maxwd = 512;
    p = (s3cipid_t *) ckd_calloc(maxwd + 4, sizeof(*p));
    wptr = (char **) ckd_calloc(maxwd, sizeof(char *)); /* Freed below */

    lineno = 0;
    stralloc = phnalloc = 0;
    for (li = lineiter_start(fp); li; li = lineiter_next(li)) {
        lineno++;
        if (0 == strncmp(li->buf, "##", 2)
            || 0 == strncmp(li->buf, ";;", 2))
            continue;

        if ((nwd = str2words(li->buf, wptr, maxwd)) < 0) {
            /* Increase size of p, wptr. */
            nwd = str2words(li->buf, NULL, 0);
            assert(nwd > maxwd); /* why else would it fail? */
            maxwd = nwd;
            p = (s3cipid_t *) ckd_realloc(p, (maxwd + 4) * sizeof(*p));
            wptr = (char **) ckd_realloc(wptr, maxwd * sizeof(*wptr));
        }

        if (nwd == 0)           /* Empty line */
            continue;
        /* wptr[0] is the word-string and wptr[1..nwd-1] the pronunciation sequence */
        if (nwd == 1) {
            E_ERROR("Line %d: No pronunciation for word %s; ignored\n",
                    lineno, wptr[0]);
            continue;
        }


        /* Convert pronunciation string to CI-phone-ids */
        for (i = 1; i < nwd; i++) {
            p[i - 1] = dict_ciphone_id(d, wptr[i]);
            if (NOT_S3CIPID(p[i - 1])) {
                E_ERROR("Line %d: Bad ciphone: %s; word %s ignored\n",
                        lineno, wptr[i], wptr[0]);
                break;
            }
        }

        if (i == nwd) {         /* All CI-phones successfully converted to IDs */
            w = dict_add_word(d, wptr[0], p, nwd - 1);
            if (NOT_S3WID(w))
                E_ERROR
                    ("Line %d: dict_add_word (%s) failed (duplicate?); ignored\n",
                     lineno, wptr[0]);
            else {
                stralloc += strlen(d->word[w].word);
                phnalloc += d->word[w].pronlen * sizeof(s3cipid_t);
            }
        }
    }
    E_INFO("Allocated %d KiB for strings, %d KiB for phones\n",
           (int)stralloc / 1024, (int)phnalloc / 1024);
    ckd_free(p);
    ckd_free(wptr);

    return 0;
}
Esempio n. 10
0
File: comp.c Progetto: Jaxx89/lz78
/* compressor algorithm for lz78:
 * from is the path of the file to compress, size is the size of the dictionary
 * to is the compressed file name, it can be chose by the user, 
 * 		otherwise a standard name is put in its place*/
void compress(char* from, char* to, int size){
	
	//fprintf(stderr, "compressor\n");
    //int, i, flen;	//length of filename
    //int i;
    unsigned int tmp=0; //temporary variable in which we put the byte read    
    unsigned int itmp=0, size_tmp=0; //itmp is the index to write;
    int position=0, father=0; //postion in the dictionary
    //int fd=open(filename,  'r'); //file descriptor
    dictionary* dict=malloc(sizeof(dictionary)); //dictionary
    int blen=1;     //bit lenght of my dictionary's indexes
    size_tmp=size;
    struct bitio* bit;
    header hdr;		//header of the file
    int result=0;
	int longest_match=1, tmp_longest_match=0;
	FILE* file_read, *file_write;
    
    //compute how many bit to write in the compressed file 
    while(size_tmp>>=1) blen++;
    
    fprintf(stderr, "blen: %i\n", blen);
    
    //initialize the dictionary
    dict_init(dict, size, 256);
    
    father=0;
    
    //initialize the bitio structure to write bits instead of bytes
    bit=bit_open(to, "w");
    if(bit==NULL) fprintf(stderr, "null bitio\n");
    
    
    //creation of the header of the file
    hdr.dictionary_size=size;
	hdr.longest_match=0;	//prepare the space for the overwriting
	
    //write the header in the top of the file
    //flen=strlen(hdr.extension);
    bitio_write(bit, (uint64_t)hdr.dictionary_size, sizeof(int)*8);
	bitio_write(bit, (uint64_t)hdr.longest_match, sizeof(int)*8);
    //for(i=0; i<6; i++)
		//bitio_write(bit, (uint64_t)hdr.extension[i], sizeof(char)*8);
    
    //read a byte from the file
    //fprintf(stderr, "%i\n", fd);
    if(from==NULL) file_read=stdin;
	else file_read=fopen(from, "r");
    //fprintf(stderr, "%c\n", (char)tmp);
	
	//first read
	result=fread(&tmp, 1, 1, file_read);
	
    father=tmp;
	
	//read the file until reach the EOF or an error occurs
    do {
			
		//read one byte from the file
		result=fread(&tmp, 1, 1, file_read);
		//fprintf(stderr, "%i ",tmp );        
		
		itmp=father;	
		tmp_longest_match++;
		//search if we already have the actual string in the dictionary
		position=dict_search(&father, tmp, dict);
		//fprintf(stderr, "new_father: %i %i\n", father, position);
		
		if(position!=0) {
			if(longest_match < tmp_longest_match) 
				longest_match = tmp_longest_match;
			tmp_longest_match=0;
			//add the string in the dictionary
			dict_add_word(position, father, tmp, dict);
			//fprintf(stderr, "%i: %i %i\n", position, father, tmp);
			
			bitio_write(bit, (uint64_t)itmp, blen);
			//fprintf(stderr,  "\n");
			father=tmp;
		}
	} while(result!=0 || !feof(file_read));
    //write the last position reached
    bitio_write(bit, father, blen);
    
    //close all the structures writing EOFC on the compressed file
    bitio_write(bit, EOFC, blen);
    bitio_close(bit);
	//fprintf(stderr, "%i\n", longest_match);
	
	//write the longest match in the file
	file_write=fopen(to, "r+b");
	fseek(file_write, sizeof(int), SEEK_SET); 
	hdr.longest_match=longest_match;
	fwrite(&hdr.longest_match, sizeof(int), 1, file_write);
	fclose(file_write);
	
    //free all other structures
    suppress_dictionary(dict);
    free(dict);
	fclose(file_read);
    
    
    fprintf(stderr, "compression executed\n");
}
Esempio n. 11
0
File: decomp.c Progetto: Jaxx89/lz78
//decompressor algorithm for lz78
//filename is the path of the file to decompress, size is the size of the dictionary
void decompress(char* input, char* filename) {
  
    int /*i,*/index_bits, flag, aux, child_root=0, pos;
    bool first_read = true, res_retrieve=false;
    uint64_t read_index, prev_current=0;
    bitio* comp_file;
    char* tmp;
    dictionary* dict;
    word* sequence = NULL;      //Variable in which store the word in each step
    word* print_seq;
    header hdr;
    int decomp_file;
    
    //eofc_pos = dict->symbols;
    comp_file = bit_open(input, "r");       //Preparing my data structures
    
    //retrieve header
    bitio_read(comp_file, (uint64_t*)&hdr.dictionary_size, sizeof(int)*8);
    //for(i=0; i<6; i++)
		//bitio_read(comp_file, (uint64_t*)&hdr.extension[i], sizeof(char)*8);
    
    dict = malloc(sizeof(dictionary));
    dict_init(dict, hdr.dictionary_size, 256);
   
    
    aux = hdr.dictionary_size;         //Compute the number of bits representing the indexes
    fprintf(stderr, "%i\n", aux);
    index_bits = 1;
    while(aux >>= 1)
        index_bits++;
    
    tmp=malloc(strlen(filename));
    strcpy(tmp, filename);
    //strcat(tmp, ".\0");
    //strcat(tmp, hdr.extension);
	
    
    decomp_file=open(tmp, O_CREAT|O_RDWR, S_IWUSR|S_IRUSR|S_IRGRP|S_IROTH);
    
    bitio_read(comp_file, &read_index, index_bits); //Read the first index
    //decomp_file = open(decompname, O_CREAT|O_RDWR, S_IWUSR|S_IRUSR|S_IRGRP|S_IROTH);
    //fprintf(stderr, "\n%i\n", fdim);
    
	fprintf(stderr, "%s %i\n", tmp, index_bits);
	
    while(read_index != EOFC) {   //Until the end of compressed file
		
		//fprintf(stdout, "%i\n", (int)read_index);
		
		//retrieve word from index
        res_retrieve=dict_retrieve_word(dict, read_index, &sequence);  
        //fprintf(stderr, "\n%x\n", (unsigned int)read_index);
	
        //critical situation
        if(res_retrieve==false) {
        	pos=dict_search((int)prev_current, child_root, dict, &flag);
        	dict_add_word(pos, (int)prev_current, child_root, dict, &flag);
        	dict_retrieve_word(dict, read_index, &sequence);
        	first_read=true;
        }
        //fprintf(stderr, "decompressor %i\n", decomp_file);
        child_root = sequence->symbol;
	//fprintf(stderr, "decompressor %i\n", decomp_file);
        print_seq = sequence;	//Write word on file
        
        while(print_seq != NULL) {
			//fprintf(stdout, "%i\n",print_seq->symbol);
	    
	    write(decomp_file, (void*)&(print_seq->symbol), 1);
            print_seq = print_seq->next;
            free(sequence);                             //Deallocate word structure
            sequence = print_seq;
        }
        
        if(first_read==false) {
			//Add the first symbol of my sequence, as child of the previous current node
            pos=dict_search((int)prev_current, child_root, dict, &flag);
            dict_add_word(pos, (int)prev_current, child_root, dict, &flag); 
        }
        first_read = false;
        prev_current = read_index;
        bitio_read(comp_file, &read_index, index_bits);                 //Read the index
    }
    
    //Closure of all data structures
    close(comp_file->fd);                                 
    free(comp_file);
    close(decomp_file);
    //print_dict(dict);
    suppress_dictionary(dict);
    
    fprintf(stderr, "decompression executed\n");
}
Esempio n. 12
0
static int32
dict_read(FILE * fp, dict_t * d)
{
    char line[16384], **wptr;
    s3cipid_t p[4096];
    int32 lineno, nwd;
    s3wid_t w;
    int32 i, maxwd;
    s3cipid_t ci;
    int32 ph;

    maxwd = 4092;
    wptr = (char **) ckd_calloc(maxwd, sizeof(char *)); /* Freed below */

    lineno = 0;
    while (fgets(line, sizeof(line), fp) != NULL) {
        lineno++;
        if (line[0] == '#')     /* Comment line */
            continue;

        if ((nwd = str2words(line, wptr, maxwd)) < 0)
            E_FATAL("str2words(%s) failed; Increase maxwd from %d\n", line,
                    maxwd);

        if (nwd == 0)           /* Empty line */
            continue;
        /* wptr[0] is the word-string and wptr[1..nwd-1] the pronunciation sequence */
        if (nwd == 1) {
            E_ERROR("Line %d: No pronunciation for word %s; ignored\n",
                    lineno, wptr[0]);
            continue;
        }
        {char * fin;
	  float proba=0.0;
	  int deca=0;
	  proba=strtod(wptr[1],&fin);
	  if (fin !=wptr[1]) 
	    deca=1;
	  else
	    proba=0.0;
	  
        /* Convert pronunciation string to CI-phone-ids */
        for (i = 1; i < nwd-deca; i++) {
            p[i - 1] = dict_ciphone_id(d, wptr[i+deca]);
            if (NOT_S3CIPID(p[i - 1])) {
                E_ERROR("Line %d: Bad ciphone: %s; word %s ignored\n",
                        lineno, wptr[i], wptr[0]);
                break;
            }
        }

        if (i == nwd-deca) {         /* All CI-phones successfully converted to IDs */
            w = dict_add_word(d, wptr[0], p, nwd - 1-deca);
            if (NOT_S3WID(w))
                E_ERROR
                    ("Line %d: dict_add_word (%s) failed (duplicate?); ignored\n",
                     lineno, wptr[0]);
	    d->word[w].proba=proba;
        }
	}
    }
    

    if (d->lts_rules) {

#if 1                           /* Until we allow user to put in a mapping of the phoneset from LTS to the phoneset from mdef, 
                                   The checking will intrusively stop the recognizer.  */

        for (ci = 0; ci < mdef_n_ciphone(d->mdef); ci++) {

            if (!mdef_is_fillerphone(d->mdef, ci)) {
                for (ph = 0; cmu6_lts_phone_table[ph] != NULL; ph++) {

                    /*        E_INFO("%s %s\n",cmu6_lts_phone_table[ph],mdef_ciphone_str(d->mdef,ci)); */
                    if (!strcmp
                        (cmu6_lts_phone_table[ph],
                         mdef_ciphone_str(d->mdef, ci)))
                        break;
                }
                if (cmu6_lts_phone_table[ph] == NULL) {
                    E_FATAL
                        ("A phone in the model definition doesn't appear in the letter to sound ",
                         "rules. \n This is case we don't recommend user to ",
                         "use the built-in LTS. \n Please kindly turn off ",
                         "-lts_mismatch\n");
                }
            }
        }
#endif
    }