Exemple #1
0
list *parse_file_to_list(FILE *file)
{
    L1 = create_list();
    dictionary *D = create_dict(MAX_LABEL_NUM);

    /*
    list *L2 = first_pass(L1, D, file); // 1.parse all instructions, remove junk, keep labels to
                                        //   a dictionary and save branching instructions to L2
               second_pass(L2, D);      // 2.parse branching instructions to fill their pointers

    assert(L1->size == sublist_size(L1->head));
    */

    /* Much uglier alternative due to static list declarations (It even ignores the list
       "returned" by the first_pass() function and obscures the control flow): */

    L2 = create_list();
    first_pass(file, D);
    second_pass(L2, D);

    //Print dictionary
    fprintf(stdout, "\n\nPrinting Labels from the dictionary.\n");
    fprintf(stdout, "There are %d entries:\n", D->entries);
    for(int i = 0; i < D->entries; ++i)
       fprintf(stdout, "Label: Hash Code %20lu. Next instruction @%d %5s %2d\n", D->e[i].hashcode, D->e[i].goto_node, getCmdStr(D->e[i].goto_node->instr.instr.r.cmd), D->e[i].goto_node->instr.instr.r.dstReg);
    free(D);

    fprintf(stderr,"\n\nStarting the issuing of commands:");
    return L1;
}
void sms(){
    string x[] = {"0-0", "1-1", "2-ABC2", "3-DEF3", "4-GHI4", "5-JKL5", "6-MNO6","7-PQRS7", "8-TUV8", "9-WXYZ9"};
    string inputString = "AD";
    vector<string> z = getVector(x,10);
    unordered_map<string,char> dict;
    unordered_map<char,int> lendict;
    create_dict(dict,lendict,z);
    string res = transform(dict,inputString,lendict);
}
Exemple #3
0
gboolean cong_is_word_misspelt (const gchar* string, const CongWord* word)
{
	if(!dict)
	{
		create_dict();
	}

	return enchant_dict_check(dict, string + word->start_byte_offset,
			word->length_in_bytes);
}
Exemple #4
0
extern void compress_lzw(FILE *orig, FILE *archf, unsigned int *orig_size, unsigned int *archf_size)
{
	current_code_len = 8;
	dictionary_t *dict = create_dict();
	string_t *str = create_str();
	int i;
	for(i = 0; i < CHARS_NUM; i++)
	{
		assign(str, (char)i);
		add_to_dictionary(dict, str);		
	}	
	int prev_id, t, k = 7, code, bit;
	char c, prin_c = (char)0;
	fscanf(orig, "%c", &c); 
	for(i = 0; (char)i != c; prev_id = ++i);
	rewind(orig);
	str->length = 0;
	while (!feof(orig))
	{
		if (fscanf(orig, "%c", &c) <= 0) break;
		append(str, c);
		t = dict_str_id(dict, str);
		if (t == -1)
		{
			add_to_dictionary(dict, str);
			code = dict->code[prev_id];
			for(i = dict->code_len[prev_id] - 1; i >= 0; i--)
			{
				bit = !!(code & (1 << i));
				prin_c = prin_c | (bit << k);						
				if (--k == -1)
				{
					fprintf(archf, "%c", prin_c);
					prin_c = (char)0;
					k = 7;
					++*archf_size;
				}								
			}
			assign(str, c);
			for(i = 0; (char)i != c; prev_id = ++i);
		}
		else
			prev_id = t;
	}
	if (k < 7)
	{
		fprintf(archf, "%c", prin_c);
		++*archf_size;
	}	
	printf("%d\n", dict->size);
	for(i = 256; i < dict->size; i++)
		printf("%s %d\n", dict->word[i], dict->word_len[i]);
}
Exemple #5
0
//put to dict from file
static void work_with_file(size_t count_of_files, char** files) {
    size_t buf_len = 0;
    char buf[4096];
    char *buf_ptr = buf;
    size_t buf_size = sizeof(buf);
    for( size_t i = 0; i < count_of_files; ++i ) {
        char *fname = files[i];
        // printf("%s\n", fname);
        FILE *file = fopen(fname, "r");
        if ( !file )
            continue;
        while( buf_len = fread( buf_ptr, 1, buf_size, file) ) {
            size_t last_word_pos = buf_len;
            if ( buf_len == buf_size ) {
                last_word_pos = find_space_or_punct_from_end( buf_ptr, buf_len );
                if( !last_word_pos ) {
                    printf("too long word\n");
                    fclose(file);
                    exit(2);
                }
            }
            create_dict( buf, find_not_space_or_punct_from_end( buf_ptr, last_word_pos ) + buf_ptr - buf, i );
            size_t length_of_part = buf_len - last_word_pos;
            if( length_of_part )
                memcpy(buf, buf_ptr + last_word_pos, length_of_part);
            buf_size = sizeof(buf) - length_of_part;
            buf_ptr = buf + length_of_part;
        }
        if( buf != buf_ptr ) {
            create_dict( buf, buf_ptr - buf, i );
            buf_ptr = buf;
            buf_size = sizeof(buf);
        }
        //printf("%lu %s %lu\n", i, fname, words_and_counts_used);
        fclose(file);
    }

 
}
Exemple #6
0
void Tagger::Init(int argc, char * argv[])
{

	setlocale(LC_CTYPE, "iso_8858_1");
#ifdef SpecialMalloc
	/* Force fast allocation */
	set_small_allocation(100);
#endif

	/* Clear data structures */
	InitDict(dict);
	InitDict(skip_dict);
	InitTrans(trans);
	InitTrans(c_newtrans);
	odictfile = otranfile = NULL;

	/* Verify command line */
	if (argc <= 2)
	  error_exit("Usage: label corpus options\n");

	/* Form options */
	InitOptions;

	set_up_options(argc, argv, &iterations, &initialise, &dict_size,
				dictname, tranname, odictname, otranname, outname, mapname,
				skipname, reducename, fsmname, grammarname, infername, ukwname,
				ofeaturesname, obadwordname, bdbmname, runkstatname, wunkstatname);

	any_output = !no_output || Option(report_stats) || OutOpt(prob_dist);


	/* Open BDBM dictionary */
	if (Option(bdbm)){
		/* Berkeley DB: first of all need to create the dbp data structure*/
		if((ret = db_create(&dbp, NULL, 0)) != 0) {
			fprintf(stderr, "db_create: %s\n", db_strerror(ret));
			exit (1);
		}
		/* Berkeley DB: Then you open it, readonly  */
		if((ret = dbp->open(dbp,bdbmname, NULL, DB_BTREE, DB_RDONLY, 0777)) != 0) {
			dbp->err(dbp, ret, "%s", bdbmname);
			exit(1);
		} 
	}

	/* Read mappings */
	if (Option(verbose)) printf("Read mappings\n");
	read_mapping(mapname);

	/* Read tag reduction mappings */
	if (Option(reduced_tags))
	{
		if (Option(verbose)) printf("Read reduced tag set\n");
		read_reduce_mapping(reducename);
	}

#ifdef Use_Parser
	/* Read parse rules */
	if (Option(use_parser))
	{
		if (Option(verbose)) printf("Read parse rules\n");
		parser_read_named(grammarname);
	}
#endif
#ifdef Use_FSM
	/* Read FSM definitions */
	if (Option(use_fsm))
	{
		if (Option(verbose)) printf("Read FSMs\n");
		fsm_read_named(fsmname);
	}
#endif

	/* Read skip list */
	if (Option(skip_list))
	{
		if (Option(verbose)) printf("Read skip list\n");
		read_named_dict(skipname, &skip_dict, -1);
	}

	/* Read unknown word rules */
	if (Option(unknown_rules))
	{
		if (Option(verbose)) printf("Read unknown word rules\n");
		read_unknown_rules(ukwname);
	}

	/* Set up dictionary [note]:it costs a few seconds*/
	if (dictname[0] == 0)
	{
		create_dict(&dict, dict_size);
		clear_dict(&dict);
	}
	else
	{
		if (Option(verbose)) printf("Read dictionary\n");
		read_named_dict(dictname, &dict, -1);
		if (infername[0] != 0)
		{
			if (Option(verbose)) printf("Read inference rules\n");
			infer_tags((char *)infername, &dict);

		}
	}

	/* Set up transitions [note] it costs a few seconds*/
	if (tranname[0] == 0)
	{
		create_trans(&trans, tags_all);
		clear_trans_all(&trans);
	}
	else
	{
		if (Option(verbose)) printf("Read transitions\n");
		read_named_ascii_trans(tranname, &trans);

		/* Analyze selected features of lexicon to generate tag probabilities for unknown words. */
		if ( Option(unknown_morph) || Option(unknown_rules))
		{
			/* Initialize feature values */

			Allocate(features->featuretags, sizeof(FeatureTagSt), "features->featuretags: main");
			features->featuretags->next_open_slot = 0;

			features->gamma = trans.gamma;

			if ( features->maxsuffix == 0 )
			  features->maxsuffix = MinSuffixLen;
			if ( features->maxunkwords == 0 )
			  features->maxunkwords = MAXUNKWORDS;
			if ( features->maxprefcut == 0 )
			  features->maxprefcut = MinPrefixLen;
			if ( features->maxsuffcut == 0 )
			  features->maxsuffcut = MinSuffixLen;

			unknown_word_handling_initialization();
			gather_unigram_freqs( &dict );
		}

		if ( Option(unknown_morph) )
		{
			analyze_features( &dict, ofeaturesname, obadwordname, &trans, dbp, &dict, runkstatname );
		}
	}

	set_special_words(&dict, features );

	/* Create space for re-estimation or training */
	if (Option(reestimate) || Option(training))
	{
		c_newtrans.gamma = trans.gamma; /* Share arrays */
		create_trans(&c_newtrans, tags_all);
	}

	if (odictname[0] != 0)
	  odictfile = open_file(odictname, "w");
	if (otranname[0] != 0)
	  otranfile = open_file(otranname, "w");

	/* Set up anchor word */
	set_anchor(&dict);

	adjust_dict(&dict, trans.gamma, FALSE);
	adjust_trans(&trans, NULL);
}