コード例 #1
0
void Compress(std::string const &inputFileName, std::string const &outputFileName)
{
    EncodeUtilities helper(readFrequencies(inputFileName));
    encode(inputFileName,
           outputFileName,
           helper.GetCodes(),
           helper.GetCodeLengths(),
           helper.GetFrequencies());
}
コード例 #2
0
ファイル: testFreq.cpp プロジェクト: allenyin/ece551
int main(int argc, char ** argv) {
  if (argc != 2) {
    std::cerr << "Usage: " << argv[0] << " (input file)" << std::endl;
    return EXIT_FAILURE;
  }
  uint64_t * counts = readFrequencies(argv[1]);
  assert(counts != NULL);
  for (unsigned i = 0; i < 257; i++) {
    if (counts[i] == 0) {
      continue;
    }
    printSym(std::cout, i);
    std::cout << ": " << counts[i] << std::endl;
  }
  delete[] counts;
  return EXIT_SUCCESS;
}
コード例 #3
0
ファイル: makedict.cpp プロジェクト: crishoj/dep_feat
int makedict(FILE * fpin,FILE * fpout,bool nice,const char * format,const FreqFile * freq,bool CollapseHomographs)
    {
    root = new DictNode("","","",0);
    printf("reading lemmas\n");
    int failed;
    int cnt = readLemmas(fpin,format,add,CollapseHomographs,failed);
    printf("%d lemmas read, %d discarded\n",cnt,failed);
    if(failed)
        printf("(see file \"discarded\")\n");

    while(freq)
        {
        if(!freq->itsName())
            {
            printf("No file name matching format %s\n",freq->itsFormat());
            break;
            }
        if(!freq->itsFormat())
            {
            printf("No format matching file name %s\n",freq->itsName());
            break;
            }

        FILE * ffreq = fopen(freq->itsName(),"r");
        if(ffreq)
            {
            printf("reading frequencies from %s with format %s\n",freq->itsName(),freq->itsFormat());
            readFrequencies(ffreq,freq->itsFormat(),addFreq);
            }
        else
            printf("*** CANNOT OPEN %s\n",freq->itsName());
        freq = freq->Next();
        }
    printf("counting children\n");
    tchildrencount nroot = root->count();
//    printf("nroot %ld\n",nroot);
//    root->print(0,stdout);
//    char woord[1000];

    printf("counting strings\n");
    tcount nstrings = root->strcnt();
    tcount nUniqueStrings = 0;
    printf("compressing strings\n");
    tlength stringBufferLen = compressStrings(nstrings,&nUniqueStrings);
    tcount nLemmas = -1; // compensate for root
    printf("counting leafs\n");
    tcount nLeaf = root->LeafCount(&nLemmas);


    tcount nUniqueLemmas = 0;
    printf("compressing leafs\n");
    tcount LemmaBufferLen = compressLeafs(nLeaf,&nUniqueLemmas);
//    printf("LemmaBufferLen %ld\n",LemmaBufferLen);
//    root->print(0,stdout);
/*
    woord[0] = '\0';
    FILE * fpt = fopen("root.txt","w");
    root->print(0,fpt,woord);
    fclose(fpt);
*/
    printf("writing strings\n");
    tcount i;
    if(nice)
        {
        fprintf(fpout,"%ld\n",(long)stringBufferLen);
        for(i = 0;i < nUniqueStrings;++i)
            {
//            fprintf(fpout,"%ld %ld %s\n",i,strings[i] - STRINGS,strings[i]);
            fprintf(fpout,"%ld %d %s\n",i,strings[i] - STRINGS,strings[i]);
            }
        }
    /*
    else if(portable)
        {
        //    printf("stringBufferLen %d\n",stringBufferLen);
        fprintf(fpout,"%d\n",stringBufferLen);
        fwrite(STRINGS,stringBufferLen,1,fpout);
        fprintf(fpout,"\n");
        }*/
    else
        {
        //    printf("stringBufferLen %d\n",stringBufferLen);
        fwrite(&stringBufferLen,sizeof(stringBufferLen),1,fpout);
        fwrite(STRINGS,stringBufferLen,1,fpout);
        }
    printf("writing lemmas\n");
//    printf("nUniqueLemmas %ld\n",nUniqueLemmas);
    if(nice)
        {
        fprintf(fpout,"%ld\n",LemmaBufferLen);
        for(i = 0;i < nUniqueLemmas;++i)
            {
            fprintf(fpout,"%ld ",i);
            LEMMAS[i].print(fpout);
            fprintf(fpout,"\n");
            }
        }
    /*
    else if(portable)
        {
        //    printf("stringBufferLen %d\n",stringBufferLen);
        fprintf(fpout,"%d\n",LemmaBufferLen);
        for(i = 0;i < LemmaBufferLen;++i)
            LEMMAS[i].portableprint(fpout);
        }
        */
    else
        {
        //    printf("LemmaBufferLen %d\n",LemmaBufferLen);
        fwrite(&LemmaBufferLen,sizeof(LemmaBufferLen),1,fpout);
        for(i = 0;i < LemmaBufferLen;++i)
            LEMMAS[i].binprint(fpout);
        }
    printf("strings: %lu unique: %lu\n",nstrings,nUniqueStrings);
    printf("flexforms: %lu lemmas: %lu unique: %lu\n",nLeaf,nLemmas,nUniqueLemmas);
    tcount nnodes = root->BreadthFirst_position(0,nroot);
    printf("writing nodes\n");
    if(nice)
        {
        fprintf(fpout,"nodes %ld\n",nnodes);
        root->BreadthFirst_print(0,nroot,fpout);
        /*
        woord[0] = '\0';
        root->BreadthFirst_print(0,nroot,fpout,woord);
        */
        }
    else
        {
        //    printf("nnodes %d nroot %d\n",nnodes,nroot);
        fwrite(&nnodes,sizeof(nnodes),1,fpout);
        tchildren nrootwrite = (tchildren)nroot;
        fwrite(&nrootwrite,sizeof(nrootwrite),1,fpout);
        root->BreadthFirst_printBin(fpout);
        }
//    root->print(0,fpout);
    delete root;
    delete [] strings;
    delete [] STRINGS;
    delete [] LEMMAS;

/*
    fclose(fpin);
    fclose(fpout);
*/
    if(totcnt > 0)
        {
        printf("frequencies added from %d words (%f%% of reference text)\n",g_added,(double)addedcnt*100.0/(double)totcnt);
        printf("frequencies from %ld words are not added because they weren't found in the dictionary (%f%% of reference text)\n",notadded - notypematch,(double)notaddedcnt*100.0/(double)totcnt);
        printf("frequencies from %ld words are not added because the types didn't agree. (%f%% of reference text)\n",notypematch,(double)notypematchcnt*100.0/(double)totcnt);
        }
    return 0;
    }
コード例 #4
0
void main()
{
	vector<OBSTComputation*> *obstComputationVector;
	obstComputationVector = new vector<OBSTComputation*>;
	
	vector<string> *fileNameVector = new vector<string>;
	string fileName;
	char repeat;
	
	cout << "Please Read:" << endl;
	cout << "Datasets must be must be in text files." << endl;
	cout << "The first character in the file must be a space." << endl;
	cout << "After the space comes the first frequency" << endl;
	cout << "followed by a comma, then a space" << endl;
	cout << "The program will stop reading if it finds a zero" << endl;

	do {
		cout << "Enter the name of the text file you want to compute (Ex. dataset1.txt):	";
		cin >> fileName;
		fileNameVector->push_back(fileName);

		//compute lookup table and optimal binary search tree
		obstComputationVector->push_back(new OBSTComputation(readFrequencies(fileName)));

		//display results of all datasets that have been computed
		for (int i = 0; i < obstComputationVector->size(); i++)
		{
			cout << fileNameVector->at(i) << endl;
			displayOBSTInfo(obstComputationVector->at(i));
			cout << endl;
		}

		cout << "Would you like to enter another file? Y = yes, N = no	";
		cin >> repeat;

	} while (repeat == 'Y' || repeat == 'y');

	// garbage collection
	for (int i = 0; i < obstComputationVector->size(); i++)
	{
		delete obstComputationVector->at(i);
	}
	delete obstComputationVector;
	
	/*
	The following code is for testing purposes.
	It will automatically run datasets 1-6 sequentially
	*/
	/*
	string fileName = "dataset";
	string fileExt = ".txt";
	string fullFileName;
	

	const int TOTAL_DATASETS = 6;

	/*
	Read in frequencies from a file 
	and compute optimal binary search trees
	from multiple data sets and store them
	*/
	/*
	vector<vector<int>*> *datasets = new vector<vector<int>*>;
	for (int i = 0; i < TOTAL_DATASETS; i++)
	{
		fullFileName = fileName + intToString(i+1) + fileExt;
		obstComputationVector->push_back(new OBSTComputation(readFrequencies(fullFileName)));
	}
	
	/*
	Display information about the stored
	optimal binary search trees.
	*/
	/*
	for (int i = 0; i < TOTAL_DATASETS; i++)
	{
		cout << fileName + intToString(i + 1) + fileExt << endl;
		displayOBSTInfo(obstComputationVector->at(i));
		cout << endl;

		delete obstComputationVector->at(i);
	}

	delete obstComputationVector;
	
	system("pause");
	*/
}