Пример #1
0
/* Dict::initialize: returns true if the map is ready to be searched, false otherwise.
 */
bool Dict::initialize()
{

    ifstream dictfile(dictfileName.c_str(), ifstream::in);

    if (dictfile.good())
    {
        loadDict();
    }
    else
    {
        cout << "Dictionary file not present.  Creating new file.  This may take some time..." << endl;
        createDict();
    }
    if (USE_BLACKLIST)
    {
        try
        {
            blacklistfile.open(blacklistfileName.c_str(), ifstream::in);
            // cout << "Loading Blacklist..." << endl;
            if (blacklistfile.good())
                loadBlacklist();

        }
        catch (ifstream::failure e)
        {
            cout << "Warning: Blacklist file not present!" << endl;
        }
    }
    return true;
}
Пример #2
0
int main(int argc, const char** argv)
{
    const char* const exeName = argv[0];

    if (argc<3) {
        fprintf(stderr, "wrong arguments\n");
        fprintf(stderr, "usage:\n");
        fprintf(stderr, "%s [FILES] dictionary\n", exeName);
        return 1;
    }

    /* load dictionary only once */
    const char* const dictName = argv[argc-1];
    const ZSTD_CDict* const dictPtr = createDict(dictName);

    int u;
    for (u=1; u<argc-1; u++) {
        const char* inFilename = argv[u];
        char* const outFilename = createOutFilename(inFilename);
        compress(inFilename, outFilename, dictPtr);
        free(outFilename);
    }

    printf("All %u files compressed. \n", argc-2);
}
Пример #3
0
Box* JitFragmentWriter::createDictHelper(uint64_t num, Box** keys, Box** values) {
    BoxedDict* dict = (BoxedDict*)createDict();
    for (uint64_t i = 0; i < num; ++i) {
        assert(gc::isValidGCObject(keys[i]));
        assert(gc::isValidGCObject(values[i]));
        dict->d[keys[i]] = values[i];
    }
    return dict;
}
Пример #4
0
static PyObject *doRead(PyObject *s, PyObject *args)
{
  char *filename;
  struct deltarpm d;
  PyObject *ret;
  int pid;
  int ipcpipe[2];
  
  if (!PyArg_ParseTuple(args, "s", &filename)) {
    PyErr_SetFromErrno(PyExc_SystemError);
    return NULL;
  }

  /* The delta rpm code does not expect to be used in its way. Its error handling
   * conststs of 'printf' and 'exit'. So, dirty hacks abound. */
  if (pipe(ipcpipe) == -1) {
    PyErr_SetFromErrno(PyExc_SystemError);
    return NULL;
  }

  if ((pid = fork())) {
    FILE *readend = fdopen(ipcpipe[0], "r");
    int rc, status;

    rc = waitpid(pid, &status, 0);
    if (rc == -1 || (WIFEXITED(status) && WEXITSTATUS(status) != 0)) {
      PyErr_SetFromErrno(PyExc_SystemError);
      return NULL;
    }
    ret = PyMarshal_ReadObjectFromFile(readend);
    fclose(readend);
  } else {
    FILE *writend = fdopen(ipcpipe[1], "w");

    readdeltarpm(filename, &d, NULL);
    PyMarshal_WriteObjectToFile(createDict(d), writend, Py_MARSHAL_VERSION);
    fclose(writend);
    _exit(0);
  }
  close(ipcpipe[1]);
  return ret;
}
Пример #5
0
int main(int argc, const char** argv)
{
    const char* const exeName = argv[0];

    if (argc<3) {
        printf("wrong arguments\n");
        printf("usage:\n");
        printf("%s [FILES] dictionary\n", exeName);
        return 1;
    }

    /* load dictionary only once */
    const char* const dictName = argv[argc-1];
    const ZSTD_DDict* const dictPtr = createDict(dictName);

    int u;
    for (u=1; u<argc-1; u++) decompress(argv[u], dictPtr);

    printf("All %u files decoded. \n", argc-2);
}
Пример #6
0
DICT *RunCodeRepair(FILE *input, uint code_len, uint cont_len, uint mchar_size) 
{
  uint i, j;
  CRDS *crds;
  DICT *dict;
  CODE new_code;
  PAIR **mp_ary;
  uint limit = (uint)pow(2, code_len);
  uint num_loop, num_replaced;
  uint t_num_rules, c_seq_len;
  double comp_ratio;

  //initialization
#ifdef DISPLAY
  printf("\n");
  printf("Initializing ...\n");
#endif
  crds = createCRDS(input, cont_len, mchar_size);
  dict = createDict(crds, code_len);

#ifdef DISPLAY
  printf("///////////////////////////////////////\n");
  printf(" Input text size = %d (bytes).\n", crds->txt_len);
  printf(" Alphabet size   = %d.\n", crds->char_size);
  printf(" # of contexts   = %d.\n", crds->num_contexts);
  printf(" Code length     = %d (bits).\n", code_len);
  printf(" # of new_code   = %d.\n", limit - crds->char_size);
  printf("///////////////////////////////////////\n");
  printf("\n");
  printf("Compressing text ...\n");
#endif

  mp_ary = (PAIR **)malloc(sizeof(PAIR *) * (crds->num_contexts + 1));
  num_loop = 0; num_replaced = 0;
  new_code = crds->char_size;
  t_num_rules = 0;
  c_seq_len = crds->txt_len;

  //select replaced pairs
  while (new_code < limit) {
    for (i = 0; i <= crds->num_contexts; i++) {
      mp_ary[i] = NULL;
    }
    for (i = 0; i < crds->num_contexts; i++) {
      mp_ary[i] = getMaxPair(crds, i);
    }

    //sort mp_ary by frequencies.
    qsort(mp_ary, crds->num_contexts + 1, sizeof(PAIR *), 
	  (int(*)(const void *, const void *))comparePair);

    //if mp_ary is empty, then break.
    if (mp_ary[0] == NULL) break;

    //replace pairs
    for (i = 0; mp_ary[i] != NULL; i++) {
      addNewPair(dict, new_code, mp_ary[i]);
      c_seq_len -= replacePairs(crds, mp_ary[i], new_code);
      t_num_rules++;
    }

#ifdef DISPLAY
    comp_ratio = 
      calCompRatio(crds->txt_len, crds->char_size, crds->cont_len,
		   crds->num_contexts, c_seq_len, t_num_rules, code_len, false);
	  printf("\r");
    printf("new_code = [%5d], Comp.ratio = %0.3f %%.",new_code, comp_ratio);
    fflush(stdout);
#endif

    //free replaced pairs
    for (i = 0; mp_ary[i] != NULL; i++) {
      destructPair(crds, mp_ary[i]);
    }
    //free unused pairs
    for (i = 0; i < crds->num_contexts; i++) {
      for (j = 1; j < THRESHOLD; j++) {
	deletePQ(crds, j, i);
      }
    }
    new_code++;
  }

#ifdef DISPLAY
  printf("\n");
  calCompRatio(crds->txt_len, crds->char_size, crds->cont_len, 
	       crds->num_contexts, c_seq_len, t_num_rules, code_len, true);
#endif

  //post processing
  copyCompSeq(crds, dict);
  free(mp_ary);
  destructCRDS(crds);

  return dict;
}
Пример #7
0
int main(int argc, char **argv) {
	//Set up variables for parsing and file read/write
	int fflag = 0;
	char *fvalue = NULL;
	int wflag = 0;
	char *wvalue = NULL;
	char *filein; 
	int nonopt[2];
	nonopt[0] = 0;
	nonopt[1] = 0;
	int index;
	char c;
	dictword *dictArray;
	int nonarg = 0;
	if (argc < 2) {
		printf("More arguments needed\n");
		return 1;
	}
	//Parse command line arguments
	while (1)
	{
		c = getopt(argc, argv, "f:w:");
		if (c == -1) {
			break;
		}
		switch (c)
		{
			case 'f':
				fflag = 1;
				fvalue = optarg;
				break;
			case 'w':
				wflag = 1;
				wvalue = optarg;
				break;
		}
		for (index = optind; index < argc; index++) {
			filein = argv[index];
			nonopt[nonarg] = index;
			nonarg++;
		}
	}
	//Open files for read/write and/or create dictionary array based
	//on command line arguments.
	int num;
	if (fflag) {
		dict = fopen(fvalue,"r");
		if (dict == NULL) {
			printf("Dictionary file not found");
			return 1;
		}
		createDict(&dict, &dictArray, &num);
	} else {
		dictArray = malloc(sizeof(dictword));
		strncpy(dictArray[0].word,argv[1],99);
		dictArray[0].count = 0;
		num = 1;
	}

	//If -w flag then create file for write

	if (wflag) {
		outfile = fopen(wvalue,"w");
	}

	if (!fflag && !wflag) {
		infile = fopen(argv[2],"r");
	} else {
		infile = fopen(filein,"r");
	}
	if (infile == NULL) {
		printf("input file %s not found\n", argv[2]);
		return 1;
	}

	//Use strtok to get individual strings from file

	char line[61];
	char *token;
	char *saveptr;

	int count = 0;
	char *tok = " .’,;?!'—:\"’";
	int i;
	while (fgets(line, 60, infile) != NULL) 
	{
		token = strtok_r(line, tok, &saveptr);
		while (token !=  NULL) 
		{
			int j;
			//Compare string with all words in the dictionary
			for (j=0; j<num; j++) {
				if (!strcmp(dictArray[j].word,token)) {
					dictArray[j].count++;
				}
			}
			token = strtok_r(NULL, tok, &saveptr);
		}
	}
	//Read words and count from the dictionary and 
	//format them properly
	for (i=0; i<num; i++) {
		char out[100];
		strncpy(out,dictArray[i].word,99);
		strcat(out,",");
		char temp[20] = {};
		snprintf(temp, 19, "%u",dictArray[i].count);	
		strcat(out,temp);
		strcat(out,"\n");
		//Write to file if -w else just print to terminal
		if (!wflag) {
			printf("%s",out);
		} else {
			fprintf(outfile, out);
		}
	}

	//Free memory and close files
	free(dictArray);
	fclose(infile);
	if (wflag) {
		fclose(outfile);
	}
}
Пример #8
0
// encode用のmain関数
int main(int argc, char *argv[])
{
  char *target_filename = NULL;
  //char output_filename[1024];
  char *output_filename = NULL;
  char *dict_filename = NULL;
  unsigned int codewordlength = 0;
  unsigned int shared_dictsize = 0;
  unsigned int chunk_size = 0;
  unsigned long int block_length = 0;
  unsigned int length;
  char *rest;
  FILE *input, *output, *dictfile;
  DICT *dict;
  EDICT *edict;
  USEDCHARTABLE ut;
  int result;
  unsigned int b;
  unsigned char *buf;
  unsigned int  *buf2 = NULL;
  OBITFS seqout, dicout;
  int header_output = 0;
  uint i;

   /* オプションの解析 */
  while ((result = getopt(argc, argv, "r:w:b:l:d:s:c:")) != -1) {
    switch (result) {
    case 'r':
      target_filename = optarg;
      break;
      
    case 'w':
      output_filename = optarg;
      break;
      
    case 'd':
      dict_filename = optarg;
      break;
      
    case 'b':
      block_length = strtoul(optarg, &rest, 10);
      if (*rest != '\0') {
	help(argv);
      }
      break;

    case 'c':
      chunk_size = strtol(optarg, &rest, 10);
      if (*rest != '\0') {
	help(argv);
      }
      break;

    case 'l':
      codewordlength = strtoul(optarg, &rest, 10);
      if (*rest != '\0') {
	help(argv);
      }
      break;
      
    case 's':
      shared_dictsize = strtoul(optarg, &rest, 10);
      if (*rest != '\0') {
	help(argv);
      }
      break;
      
    case '?':
      help(argv);
      break;
    }
  }

  // 必要なオプションがそろっているかを確認する
  if (!(target_filename && output_filename && dict_filename && block_length && codewordlength && chunk_size)) {
    help(argv);
  }

  if (chunk_size > block_length) {
    fprintf(stderr, "chunk length should not exceed block length.\n");
    exit(1);
  }
  
  // 入力ファイルをオープンする
  input  = fopen(target_filename, "r");
  if (input == NULL) {
    puts("Input file open error at the beginning.");
    exit(1);
  }
  
  // 圧縮データファイルをオープンする
  output = fopen(output_filename, "wb");
  if (output == NULL) {
    puts("Output file open error at the beginning.");
    exit(1);
  }
  
  // 辞書ファイルをオープンする
  dictfile = fopen(dict_filename, "wb");
  if (!dictfile) {
    puts("Dictionary file open error at the beginning.");
    exit(EXIT_FAILURE);
  }

  //  if (NULL == (buf = (unsigned char*)malloc(sizeof(unsigned char) * block_length))) { // || NULL == (buf2 = (unsigned int*)malloc(sizeof(unsigned int) * block_length))) {
  //    puts("malloc fault.");
    //    exit(EXIT_FAILURE);
  //  }

  chartable_init(&ut);
  fill_chartable(input, &ut);
  fseeko(input, 0, SEEK_END);
  dict = createDict(ftello(input));
  fseeko(input, 0, SEEK_SET);
  b = 0;
  obitfs_init(&seqout, output);
  obitfs_init(&dicout, dictfile);
  if (shared_dictsize < ut.size) shared_dictsize = ut.size;
  printf("Generating CFG..."); fflush(stdout);
  outputHeader(&dicout, dict, (unsigned int) codewordlength, (unsigned int) block_length, &ut);

  dict = RunRepair(dict, input, block_length, shared_dictsize, codewordlength, &ut, chunk_size, 1);
  if (!dict) exit(1);
  edict = convertDict(dict, &ut);
  outputSharedDictionary(&dicout, edict, &ut, codewordlength, shared_dictsize, b);
  CleanEDict(edict);
  if (dict->num_rules < shared_dictsize + CHAR_SIZE - ut.size) shared_dictsize = dict->num_rules + ut.size - CHAR_SIZE;

  fseeko(input, 0, SEEK_SET);
  while (!feof(input)) {
    //    printf("************ Block #%d ************\n", b);
    //    length = fread(buf, sizeof(unsigned char), block_length, input);
    //    if (!length) break;
    //    for (i = 0; i < length; i++) {
    //      buf2[i] = buf[i];
    //    }
    /* for (unsigned int i = 0; i < length; i++) { */
    /*   printf("%u ", buf2[i]); */
    /* } */
    /* puts(""); */
    dict = RunRepair(dict, input, block_length, shared_dictsize, codewordlength, &ut, chunk_size, 0);
    edict = convertDict(dict, &ut);
    outputLocalDictionary(&dicout, edict, &ut, codewordlength, shared_dictsize, b);
    EncodeCFG(edict, &seqout, codewordlength);
    CleanEDict(edict);
    b++;
  }

  printf("Finished!\n"); fflush(stdout);
  if (dict) {
    free(dict->rule);
    free(dict->comp_seq);
    free(dict);
  }
  obitfs_finalize(&seqout);
  obitfs_finalize(&dicout);
  fclose(input);
  fclose(output);
  fclose(dictfile);
  exit(0);
}