void Compress(std::string const &inputFileName, std::string const &outputFileName) { EncodeUtilities helper(readFrequencies(inputFileName)); encode(inputFileName, outputFileName, helper.GetCodes(), helper.GetCodeLengths(), helper.GetFrequencies()); }
int main(int argc, char ** argv) { if (argc != 2) { std::cerr << "Usage: " << argv[0] << " (input file)" << std::endl; return EXIT_FAILURE; } uint64_t * counts = readFrequencies(argv[1]); assert(counts != NULL); for (unsigned i = 0; i < 257; i++) { if (counts[i] == 0) { continue; } printSym(std::cout, i); std::cout << ": " << counts[i] << std::endl; } delete[] counts; return EXIT_SUCCESS; }
int makedict(FILE * fpin,FILE * fpout,bool nice,const char * format,const FreqFile * freq,bool CollapseHomographs) { root = new DictNode("","","",0); printf("reading lemmas\n"); int failed; int cnt = readLemmas(fpin,format,add,CollapseHomographs,failed); printf("%d lemmas read, %d discarded\n",cnt,failed); if(failed) printf("(see file \"discarded\")\n"); while(freq) { if(!freq->itsName()) { printf("No file name matching format %s\n",freq->itsFormat()); break; } if(!freq->itsFormat()) { printf("No format matching file name %s\n",freq->itsName()); break; } FILE * ffreq = fopen(freq->itsName(),"r"); if(ffreq) { printf("reading frequencies from %s with format %s\n",freq->itsName(),freq->itsFormat()); readFrequencies(ffreq,freq->itsFormat(),addFreq); } else printf("*** CANNOT OPEN %s\n",freq->itsName()); freq = freq->Next(); } printf("counting children\n"); tchildrencount nroot = root->count(); // printf("nroot %ld\n",nroot); // root->print(0,stdout); // char woord[1000]; printf("counting strings\n"); tcount nstrings = root->strcnt(); tcount nUniqueStrings = 0; printf("compressing strings\n"); tlength stringBufferLen = compressStrings(nstrings,&nUniqueStrings); tcount nLemmas = -1; // compensate for root printf("counting leafs\n"); tcount nLeaf = root->LeafCount(&nLemmas); tcount nUniqueLemmas = 0; printf("compressing leafs\n"); tcount LemmaBufferLen = compressLeafs(nLeaf,&nUniqueLemmas); // printf("LemmaBufferLen %ld\n",LemmaBufferLen); // root->print(0,stdout); /* woord[0] = '\0'; FILE * fpt = fopen("root.txt","w"); root->print(0,fpt,woord); fclose(fpt); */ printf("writing strings\n"); tcount i; if(nice) { fprintf(fpout,"%ld\n",(long)stringBufferLen); for(i = 0;i < nUniqueStrings;++i) { // fprintf(fpout,"%ld %ld %s\n",i,strings[i] - STRINGS,strings[i]); fprintf(fpout,"%ld %d %s\n",i,strings[i] - STRINGS,strings[i]); } } /* else if(portable) { // printf("stringBufferLen %d\n",stringBufferLen); fprintf(fpout,"%d\n",stringBufferLen); fwrite(STRINGS,stringBufferLen,1,fpout); fprintf(fpout,"\n"); }*/ else { // printf("stringBufferLen %d\n",stringBufferLen); fwrite(&stringBufferLen,sizeof(stringBufferLen),1,fpout); fwrite(STRINGS,stringBufferLen,1,fpout); } printf("writing lemmas\n"); // printf("nUniqueLemmas %ld\n",nUniqueLemmas); if(nice) { fprintf(fpout,"%ld\n",LemmaBufferLen); for(i = 0;i < nUniqueLemmas;++i) { fprintf(fpout,"%ld ",i); LEMMAS[i].print(fpout); fprintf(fpout,"\n"); } } /* else if(portable) { // printf("stringBufferLen %d\n",stringBufferLen); fprintf(fpout,"%d\n",LemmaBufferLen); for(i = 0;i < LemmaBufferLen;++i) LEMMAS[i].portableprint(fpout); } */ else { // printf("LemmaBufferLen %d\n",LemmaBufferLen); fwrite(&LemmaBufferLen,sizeof(LemmaBufferLen),1,fpout); for(i = 0;i < LemmaBufferLen;++i) LEMMAS[i].binprint(fpout); } printf("strings: %lu unique: %lu\n",nstrings,nUniqueStrings); printf("flexforms: %lu lemmas: %lu unique: %lu\n",nLeaf,nLemmas,nUniqueLemmas); tcount nnodes = root->BreadthFirst_position(0,nroot); printf("writing nodes\n"); if(nice) { fprintf(fpout,"nodes %ld\n",nnodes); root->BreadthFirst_print(0,nroot,fpout); /* woord[0] = '\0'; root->BreadthFirst_print(0,nroot,fpout,woord); */ } else { // printf("nnodes %d nroot %d\n",nnodes,nroot); fwrite(&nnodes,sizeof(nnodes),1,fpout); tchildren nrootwrite = (tchildren)nroot; fwrite(&nrootwrite,sizeof(nrootwrite),1,fpout); root->BreadthFirst_printBin(fpout); } // root->print(0,fpout); delete root; delete [] strings; delete [] STRINGS; delete [] LEMMAS; /* fclose(fpin); fclose(fpout); */ if(totcnt > 0) { printf("frequencies added from %d words (%f%% of reference text)\n",g_added,(double)addedcnt*100.0/(double)totcnt); printf("frequencies from %ld words are not added because they weren't found in the dictionary (%f%% of reference text)\n",notadded - notypematch,(double)notaddedcnt*100.0/(double)totcnt); printf("frequencies from %ld words are not added because the types didn't agree. (%f%% of reference text)\n",notypematch,(double)notypematchcnt*100.0/(double)totcnt); } return 0; }
void main() { vector<OBSTComputation*> *obstComputationVector; obstComputationVector = new vector<OBSTComputation*>; vector<string> *fileNameVector = new vector<string>; string fileName; char repeat; cout << "Please Read:" << endl; cout << "Datasets must be must be in text files." << endl; cout << "The first character in the file must be a space." << endl; cout << "After the space comes the first frequency" << endl; cout << "followed by a comma, then a space" << endl; cout << "The program will stop reading if it finds a zero" << endl; do { cout << "Enter the name of the text file you want to compute (Ex. dataset1.txt): "; cin >> fileName; fileNameVector->push_back(fileName); //compute lookup table and optimal binary search tree obstComputationVector->push_back(new OBSTComputation(readFrequencies(fileName))); //display results of all datasets that have been computed for (int i = 0; i < obstComputationVector->size(); i++) { cout << fileNameVector->at(i) << endl; displayOBSTInfo(obstComputationVector->at(i)); cout << endl; } cout << "Would you like to enter another file? Y = yes, N = no "; cin >> repeat; } while (repeat == 'Y' || repeat == 'y'); // garbage collection for (int i = 0; i < obstComputationVector->size(); i++) { delete obstComputationVector->at(i); } delete obstComputationVector; /* The following code is for testing purposes. It will automatically run datasets 1-6 sequentially */ /* string fileName = "dataset"; string fileExt = ".txt"; string fullFileName; const int TOTAL_DATASETS = 6; /* Read in frequencies from a file and compute optimal binary search trees from multiple data sets and store them */ /* vector<vector<int>*> *datasets = new vector<vector<int>*>; for (int i = 0; i < TOTAL_DATASETS; i++) { fullFileName = fileName + intToString(i+1) + fileExt; obstComputationVector->push_back(new OBSTComputation(readFrequencies(fullFileName))); } /* Display information about the stored optimal binary search trees. */ /* for (int i = 0; i < TOTAL_DATASETS; i++) { cout << fileName + intToString(i + 1) + fileExt << endl; displayOBSTInfo(obstComputationVector->at(i)); cout << endl; delete obstComputationVector->at(i); } delete obstComputationVector; system("pause"); */ }