list *parse_file_to_list(FILE *file) { L1 = create_list(); dictionary *D = create_dict(MAX_LABEL_NUM); /* list *L2 = first_pass(L1, D, file); // 1.parse all instructions, remove junk, keep labels to // a dictionary and save branching instructions to L2 second_pass(L2, D); // 2.parse branching instructions to fill their pointers assert(L1->size == sublist_size(L1->head)); */ /* Much uglier alternative due to static list declarations (It even ignores the list "returned" by the first_pass() function and obscures the control flow): */ L2 = create_list(); first_pass(file, D); second_pass(L2, D); //Print dictionary fprintf(stdout, "\n\nPrinting Labels from the dictionary.\n"); fprintf(stdout, "There are %d entries:\n", D->entries); for(int i = 0; i < D->entries; ++i) fprintf(stdout, "Label: Hash Code %20lu. Next instruction @%d %5s %2d\n", D->e[i].hashcode, D->e[i].goto_node, getCmdStr(D->e[i].goto_node->instr.instr.r.cmd), D->e[i].goto_node->instr.instr.r.dstReg); free(D); fprintf(stderr,"\n\nStarting the issuing of commands:"); return L1; }
void sms(){ string x[] = {"0-0", "1-1", "2-ABC2", "3-DEF3", "4-GHI4", "5-JKL5", "6-MNO6","7-PQRS7", "8-TUV8", "9-WXYZ9"}; string inputString = "AD"; vector<string> z = getVector(x,10); unordered_map<string,char> dict; unordered_map<char,int> lendict; create_dict(dict,lendict,z); string res = transform(dict,inputString,lendict); }
gboolean cong_is_word_misspelt (const gchar* string, const CongWord* word) { if(!dict) { create_dict(); } return enchant_dict_check(dict, string + word->start_byte_offset, word->length_in_bytes); }
extern void compress_lzw(FILE *orig, FILE *archf, unsigned int *orig_size, unsigned int *archf_size) { current_code_len = 8; dictionary_t *dict = create_dict(); string_t *str = create_str(); int i; for(i = 0; i < CHARS_NUM; i++) { assign(str, (char)i); add_to_dictionary(dict, str); } int prev_id, t, k = 7, code, bit; char c, prin_c = (char)0; fscanf(orig, "%c", &c); for(i = 0; (char)i != c; prev_id = ++i); rewind(orig); str->length = 0; while (!feof(orig)) { if (fscanf(orig, "%c", &c) <= 0) break; append(str, c); t = dict_str_id(dict, str); if (t == -1) { add_to_dictionary(dict, str); code = dict->code[prev_id]; for(i = dict->code_len[prev_id] - 1; i >= 0; i--) { bit = !!(code & (1 << i)); prin_c = prin_c | (bit << k); if (--k == -1) { fprintf(archf, "%c", prin_c); prin_c = (char)0; k = 7; ++*archf_size; } } assign(str, c); for(i = 0; (char)i != c; prev_id = ++i); } else prev_id = t; } if (k < 7) { fprintf(archf, "%c", prin_c); ++*archf_size; } printf("%d\n", dict->size); for(i = 256; i < dict->size; i++) printf("%s %d\n", dict->word[i], dict->word_len[i]); }
//put to dict from file static void work_with_file(size_t count_of_files, char** files) { size_t buf_len = 0; char buf[4096]; char *buf_ptr = buf; size_t buf_size = sizeof(buf); for( size_t i = 0; i < count_of_files; ++i ) { char *fname = files[i]; // printf("%s\n", fname); FILE *file = fopen(fname, "r"); if ( !file ) continue; while( buf_len = fread( buf_ptr, 1, buf_size, file) ) { size_t last_word_pos = buf_len; if ( buf_len == buf_size ) { last_word_pos = find_space_or_punct_from_end( buf_ptr, buf_len ); if( !last_word_pos ) { printf("too long word\n"); fclose(file); exit(2); } } create_dict( buf, find_not_space_or_punct_from_end( buf_ptr, last_word_pos ) + buf_ptr - buf, i ); size_t length_of_part = buf_len - last_word_pos; if( length_of_part ) memcpy(buf, buf_ptr + last_word_pos, length_of_part); buf_size = sizeof(buf) - length_of_part; buf_ptr = buf + length_of_part; } if( buf != buf_ptr ) { create_dict( buf, buf_ptr - buf, i ); buf_ptr = buf; buf_size = sizeof(buf); } //printf("%lu %s %lu\n", i, fname, words_and_counts_used); fclose(file); } }
void Tagger::Init(int argc, char * argv[]) { setlocale(LC_CTYPE, "iso_8858_1"); #ifdef SpecialMalloc /* Force fast allocation */ set_small_allocation(100); #endif /* Clear data structures */ InitDict(dict); InitDict(skip_dict); InitTrans(trans); InitTrans(c_newtrans); odictfile = otranfile = NULL; /* Verify command line */ if (argc <= 2) error_exit("Usage: label corpus options\n"); /* Form options */ InitOptions; set_up_options(argc, argv, &iterations, &initialise, &dict_size, dictname, tranname, odictname, otranname, outname, mapname, skipname, reducename, fsmname, grammarname, infername, ukwname, ofeaturesname, obadwordname, bdbmname, runkstatname, wunkstatname); any_output = !no_output || Option(report_stats) || OutOpt(prob_dist); /* Open BDBM dictionary */ if (Option(bdbm)){ /* Berkeley DB: first of all need to create the dbp data structure*/ if((ret = db_create(&dbp, NULL, 0)) != 0) { fprintf(stderr, "db_create: %s\n", db_strerror(ret)); exit (1); } /* Berkeley DB: Then you open it, readonly */ if((ret = dbp->open(dbp,bdbmname, NULL, DB_BTREE, DB_RDONLY, 0777)) != 0) { dbp->err(dbp, ret, "%s", bdbmname); exit(1); } } /* Read mappings */ if (Option(verbose)) printf("Read mappings\n"); read_mapping(mapname); /* Read tag reduction mappings */ if (Option(reduced_tags)) { if (Option(verbose)) printf("Read reduced tag set\n"); read_reduce_mapping(reducename); } #ifdef Use_Parser /* Read parse rules */ if (Option(use_parser)) { if (Option(verbose)) printf("Read parse rules\n"); parser_read_named(grammarname); } #endif #ifdef Use_FSM /* Read FSM definitions */ if (Option(use_fsm)) { if (Option(verbose)) printf("Read FSMs\n"); fsm_read_named(fsmname); } #endif /* Read skip list */ if (Option(skip_list)) { if (Option(verbose)) printf("Read skip list\n"); read_named_dict(skipname, &skip_dict, -1); } /* Read unknown word rules */ if (Option(unknown_rules)) { if (Option(verbose)) printf("Read unknown word rules\n"); read_unknown_rules(ukwname); } /* Set up dictionary [note]:it costs a few seconds*/ if (dictname[0] == 0) { create_dict(&dict, dict_size); clear_dict(&dict); } else { if (Option(verbose)) printf("Read dictionary\n"); read_named_dict(dictname, &dict, -1); if (infername[0] != 0) { if (Option(verbose)) printf("Read inference rules\n"); infer_tags((char *)infername, &dict); } } /* Set up transitions [note] it costs a few seconds*/ if (tranname[0] == 0) { create_trans(&trans, tags_all); clear_trans_all(&trans); } else { if (Option(verbose)) printf("Read transitions\n"); read_named_ascii_trans(tranname, &trans); /* Analyze selected features of lexicon to generate tag probabilities for unknown words. */ if ( Option(unknown_morph) || Option(unknown_rules)) { /* Initialize feature values */ Allocate(features->featuretags, sizeof(FeatureTagSt), "features->featuretags: main"); features->featuretags->next_open_slot = 0; features->gamma = trans.gamma; if ( features->maxsuffix == 0 ) features->maxsuffix = MinSuffixLen; if ( features->maxunkwords == 0 ) features->maxunkwords = MAXUNKWORDS; if ( features->maxprefcut == 0 ) features->maxprefcut = MinPrefixLen; if ( features->maxsuffcut == 0 ) features->maxsuffcut = MinSuffixLen; unknown_word_handling_initialization(); gather_unigram_freqs( &dict ); } if ( Option(unknown_morph) ) { analyze_features( &dict, ofeaturesname, obadwordname, &trans, dbp, &dict, runkstatname ); } } set_special_words(&dict, features ); /* Create space for re-estimation or training */ if (Option(reestimate) || Option(training)) { c_newtrans.gamma = trans.gamma; /* Share arrays */ create_trans(&c_newtrans, tags_all); } if (odictname[0] != 0) odictfile = open_file(odictname, "w"); if (otranname[0] != 0) otranfile = open_file(otranname, "w"); /* Set up anchor word */ set_anchor(&dict); adjust_dict(&dict, trans.gamma, FALSE); adjust_trans(&trans, NULL); }