/* Dict::initialize: returns true if the map is ready to be searched, false otherwise. */ bool Dict::initialize() { ifstream dictfile(dictfileName.c_str(), ifstream::in); if (dictfile.good()) { loadDict(); } else { cout << "Dictionary file not present. Creating new file. This may take some time..." << endl; createDict(); } if (USE_BLACKLIST) { try { blacklistfile.open(blacklistfileName.c_str(), ifstream::in); // cout << "Loading Blacklist..." << endl; if (blacklistfile.good()) loadBlacklist(); } catch (ifstream::failure e) { cout << "Warning: Blacklist file not present!" << endl; } } return true; }
int main(int argc, const char** argv) { const char* const exeName = argv[0]; if (argc<3) { fprintf(stderr, "wrong arguments\n"); fprintf(stderr, "usage:\n"); fprintf(stderr, "%s [FILES] dictionary\n", exeName); return 1; } /* load dictionary only once */ const char* const dictName = argv[argc-1]; const ZSTD_CDict* const dictPtr = createDict(dictName); int u; for (u=1; u<argc-1; u++) { const char* inFilename = argv[u]; char* const outFilename = createOutFilename(inFilename); compress(inFilename, outFilename, dictPtr); free(outFilename); } printf("All %u files compressed. \n", argc-2); }
Box* JitFragmentWriter::createDictHelper(uint64_t num, Box** keys, Box** values) { BoxedDict* dict = (BoxedDict*)createDict(); for (uint64_t i = 0; i < num; ++i) { assert(gc::isValidGCObject(keys[i])); assert(gc::isValidGCObject(values[i])); dict->d[keys[i]] = values[i]; } return dict; }
static PyObject *doRead(PyObject *s, PyObject *args) { char *filename; struct deltarpm d; PyObject *ret; int pid; int ipcpipe[2]; if (!PyArg_ParseTuple(args, "s", &filename)) { PyErr_SetFromErrno(PyExc_SystemError); return NULL; } /* The delta rpm code does not expect to be used in its way. Its error handling * conststs of 'printf' and 'exit'. So, dirty hacks abound. */ if (pipe(ipcpipe) == -1) { PyErr_SetFromErrno(PyExc_SystemError); return NULL; } if ((pid = fork())) { FILE *readend = fdopen(ipcpipe[0], "r"); int rc, status; rc = waitpid(pid, &status, 0); if (rc == -1 || (WIFEXITED(status) && WEXITSTATUS(status) != 0)) { PyErr_SetFromErrno(PyExc_SystemError); return NULL; } ret = PyMarshal_ReadObjectFromFile(readend); fclose(readend); } else { FILE *writend = fdopen(ipcpipe[1], "w"); readdeltarpm(filename, &d, NULL); PyMarshal_WriteObjectToFile(createDict(d), writend, Py_MARSHAL_VERSION); fclose(writend); _exit(0); } close(ipcpipe[1]); return ret; }
int main(int argc, const char** argv) { const char* const exeName = argv[0]; if (argc<3) { printf("wrong arguments\n"); printf("usage:\n"); printf("%s [FILES] dictionary\n", exeName); return 1; } /* load dictionary only once */ const char* const dictName = argv[argc-1]; const ZSTD_DDict* const dictPtr = createDict(dictName); int u; for (u=1; u<argc-1; u++) decompress(argv[u], dictPtr); printf("All %u files decoded. \n", argc-2); }
DICT *RunCodeRepair(FILE *input, uint code_len, uint cont_len, uint mchar_size) { uint i, j; CRDS *crds; DICT *dict; CODE new_code; PAIR **mp_ary; uint limit = (uint)pow(2, code_len); uint num_loop, num_replaced; uint t_num_rules, c_seq_len; double comp_ratio; //initialization #ifdef DISPLAY printf("\n"); printf("Initializing ...\n"); #endif crds = createCRDS(input, cont_len, mchar_size); dict = createDict(crds, code_len); #ifdef DISPLAY printf("///////////////////////////////////////\n"); printf(" Input text size = %d (bytes).\n", crds->txt_len); printf(" Alphabet size = %d.\n", crds->char_size); printf(" # of contexts = %d.\n", crds->num_contexts); printf(" Code length = %d (bits).\n", code_len); printf(" # of new_code = %d.\n", limit - crds->char_size); printf("///////////////////////////////////////\n"); printf("\n"); printf("Compressing text ...\n"); #endif mp_ary = (PAIR **)malloc(sizeof(PAIR *) * (crds->num_contexts + 1)); num_loop = 0; num_replaced = 0; new_code = crds->char_size; t_num_rules = 0; c_seq_len = crds->txt_len; //select replaced pairs while (new_code < limit) { for (i = 0; i <= crds->num_contexts; i++) { mp_ary[i] = NULL; } for (i = 0; i < crds->num_contexts; i++) { mp_ary[i] = getMaxPair(crds, i); } //sort mp_ary by frequencies. qsort(mp_ary, crds->num_contexts + 1, sizeof(PAIR *), (int(*)(const void *, const void *))comparePair); //if mp_ary is empty, then break. if (mp_ary[0] == NULL) break; //replace pairs for (i = 0; mp_ary[i] != NULL; i++) { addNewPair(dict, new_code, mp_ary[i]); c_seq_len -= replacePairs(crds, mp_ary[i], new_code); t_num_rules++; } #ifdef DISPLAY comp_ratio = calCompRatio(crds->txt_len, crds->char_size, crds->cont_len, crds->num_contexts, c_seq_len, t_num_rules, code_len, false); printf("\r"); printf("new_code = [%5d], Comp.ratio = %0.3f %%.",new_code, comp_ratio); fflush(stdout); #endif //free replaced pairs for (i = 0; mp_ary[i] != NULL; i++) { destructPair(crds, mp_ary[i]); } //free unused pairs for (i = 0; i < crds->num_contexts; i++) { for (j = 1; j < THRESHOLD; j++) { deletePQ(crds, j, i); } } new_code++; } #ifdef DISPLAY printf("\n"); calCompRatio(crds->txt_len, crds->char_size, crds->cont_len, crds->num_contexts, c_seq_len, t_num_rules, code_len, true); #endif //post processing copyCompSeq(crds, dict); free(mp_ary); destructCRDS(crds); return dict; }
int main(int argc, char **argv) { //Set up variables for parsing and file read/write int fflag = 0; char *fvalue = NULL; int wflag = 0; char *wvalue = NULL; char *filein; int nonopt[2]; nonopt[0] = 0; nonopt[1] = 0; int index; char c; dictword *dictArray; int nonarg = 0; if (argc < 2) { printf("More arguments needed\n"); return 1; } //Parse command line arguments while (1) { c = getopt(argc, argv, "f:w:"); if (c == -1) { break; } switch (c) { case 'f': fflag = 1; fvalue = optarg; break; case 'w': wflag = 1; wvalue = optarg; break; } for (index = optind; index < argc; index++) { filein = argv[index]; nonopt[nonarg] = index; nonarg++; } } //Open files for read/write and/or create dictionary array based //on command line arguments. int num; if (fflag) { dict = fopen(fvalue,"r"); if (dict == NULL) { printf("Dictionary file not found"); return 1; } createDict(&dict, &dictArray, &num); } else { dictArray = malloc(sizeof(dictword)); strncpy(dictArray[0].word,argv[1],99); dictArray[0].count = 0; num = 1; } //If -w flag then create file for write if (wflag) { outfile = fopen(wvalue,"w"); } if (!fflag && !wflag) { infile = fopen(argv[2],"r"); } else { infile = fopen(filein,"r"); } if (infile == NULL) { printf("input file %s not found\n", argv[2]); return 1; } //Use strtok to get individual strings from file char line[61]; char *token; char *saveptr; int count = 0; char *tok = " .’,;?!'—:\"’"; int i; while (fgets(line, 60, infile) != NULL) { token = strtok_r(line, tok, &saveptr); while (token != NULL) { int j; //Compare string with all words in the dictionary for (j=0; j<num; j++) { if (!strcmp(dictArray[j].word,token)) { dictArray[j].count++; } } token = strtok_r(NULL, tok, &saveptr); } } //Read words and count from the dictionary and //format them properly for (i=0; i<num; i++) { char out[100]; strncpy(out,dictArray[i].word,99); strcat(out,","); char temp[20] = {}; snprintf(temp, 19, "%u",dictArray[i].count); strcat(out,temp); strcat(out,"\n"); //Write to file if -w else just print to terminal if (!wflag) { printf("%s",out); } else { fprintf(outfile, out); } } //Free memory and close files free(dictArray); fclose(infile); if (wflag) { fclose(outfile); } }
// encode用のmain関数 int main(int argc, char *argv[]) { char *target_filename = NULL; //char output_filename[1024]; char *output_filename = NULL; char *dict_filename = NULL; unsigned int codewordlength = 0; unsigned int shared_dictsize = 0; unsigned int chunk_size = 0; unsigned long int block_length = 0; unsigned int length; char *rest; FILE *input, *output, *dictfile; DICT *dict; EDICT *edict; USEDCHARTABLE ut; int result; unsigned int b; unsigned char *buf; unsigned int *buf2 = NULL; OBITFS seqout, dicout; int header_output = 0; uint i; /* オプションの解析 */ while ((result = getopt(argc, argv, "r:w:b:l:d:s:c:")) != -1) { switch (result) { case 'r': target_filename = optarg; break; case 'w': output_filename = optarg; break; case 'd': dict_filename = optarg; break; case 'b': block_length = strtoul(optarg, &rest, 10); if (*rest != '\0') { help(argv); } break; case 'c': chunk_size = strtol(optarg, &rest, 10); if (*rest != '\0') { help(argv); } break; case 'l': codewordlength = strtoul(optarg, &rest, 10); if (*rest != '\0') { help(argv); } break; case 's': shared_dictsize = strtoul(optarg, &rest, 10); if (*rest != '\0') { help(argv); } break; case '?': help(argv); break; } } // 必要なオプションがそろっているかを確認する if (!(target_filename && output_filename && dict_filename && block_length && codewordlength && chunk_size)) { help(argv); } if (chunk_size > block_length) { fprintf(stderr, "chunk length should not exceed block length.\n"); exit(1); } // 入力ファイルをオープンする input = fopen(target_filename, "r"); if (input == NULL) { puts("Input file open error at the beginning."); exit(1); } // 圧縮データファイルをオープンする output = fopen(output_filename, "wb"); if (output == NULL) { puts("Output file open error at the beginning."); exit(1); } // 辞書ファイルをオープンする dictfile = fopen(dict_filename, "wb"); if (!dictfile) { puts("Dictionary file open error at the beginning."); exit(EXIT_FAILURE); } // if (NULL == (buf = (unsigned char*)malloc(sizeof(unsigned char) * block_length))) { // || NULL == (buf2 = (unsigned int*)malloc(sizeof(unsigned int) * block_length))) { // puts("malloc fault."); // exit(EXIT_FAILURE); // } chartable_init(&ut); fill_chartable(input, &ut); fseeko(input, 0, SEEK_END); dict = createDict(ftello(input)); fseeko(input, 0, SEEK_SET); b = 0; obitfs_init(&seqout, output); obitfs_init(&dicout, dictfile); if (shared_dictsize < ut.size) shared_dictsize = ut.size; printf("Generating CFG..."); fflush(stdout); outputHeader(&dicout, dict, (unsigned int) codewordlength, (unsigned int) block_length, &ut); dict = RunRepair(dict, input, block_length, shared_dictsize, codewordlength, &ut, chunk_size, 1); if (!dict) exit(1); edict = convertDict(dict, &ut); outputSharedDictionary(&dicout, edict, &ut, codewordlength, shared_dictsize, b); CleanEDict(edict); if (dict->num_rules < shared_dictsize + CHAR_SIZE - ut.size) shared_dictsize = dict->num_rules + ut.size - CHAR_SIZE; fseeko(input, 0, SEEK_SET); while (!feof(input)) { // printf("************ Block #%d ************\n", b); // length = fread(buf, sizeof(unsigned char), block_length, input); // if (!length) break; // for (i = 0; i < length; i++) { // buf2[i] = buf[i]; // } /* for (unsigned int i = 0; i < length; i++) { */ /* printf("%u ", buf2[i]); */ /* } */ /* puts(""); */ dict = RunRepair(dict, input, block_length, shared_dictsize, codewordlength, &ut, chunk_size, 0); edict = convertDict(dict, &ut); outputLocalDictionary(&dicout, edict, &ut, codewordlength, shared_dictsize, b); EncodeCFG(edict, &seqout, codewordlength); CleanEDict(edict); b++; } printf("Finished!\n"); fflush(stdout); if (dict) { free(dict->rule); free(dict->comp_seq); free(dict); } obitfs_finalize(&seqout); obitfs_finalize(&dicout); fclose(input); fclose(output); fclose(dictfile); exit(0); }