Index::Index(IndexConfig &config) { tTotal.start(); char** dict = NULL; DictFileManager dictFileManager; // Read dictionary file and get the terms in a vector ordered by its code int numberOfTerms = dictFileManager.read_dictionary(&dict, config.getDictFilePath()); // Create the cmph buildHash.start(); hash = new Hash(dict, numberOfTerms, config.getDirectory() + "temp.mph"); iReportedTime["hashTime"] = buildHash.reportTime(); // Create lexicon lexicon = new Lexicon(hash->getSize()); // Open an index file indexFile = fopen(config.getIndexFilePath().c_str(), "wb+"); if (indexFile == NULL) { printf("Error when opening index file."); exit(0); } number_of_documents = config.getDocumentsMaxNumber(); // Parse triples and build the index file buildIndex.start(); parseTriplesFile(dict, config.getTmpFilePath()); iReportedTime["indexTime"] = buildIndex.reportTime(); // Dict is not necessary anymore free(dict); }
// Compressed void Index::parseTriplesFile(char** dict, string filePath) { // Open the triples file FILE *triplesFile = fopen(filePath.c_str(), "rb"); if (triplesFile == NULL) { printf("Error when opening triples file."); exit(0); } EliasGamma gamma; vector<unsigned int> serial_buffer; size_t pos = 0; unsigned int term, doc, freq, last_doc = 0, cterm = 0, docCounter = 0; list<pair<unsigned int, unsigned int> > docList; avg_doclen = 0; while (!feof(triplesFile)) { fread(&term, sizeof(unsigned int), 1, triplesFile); fread(&doc, sizeof(unsigned int), 1, triplesFile); fread(&freq, sizeof(unsigned int), 1, triplesFile); if (cterm == 0) { cterm = term; } if (cterm != term || feof(triplesFile)) { // Get its hash position size_t p = hash->search(dict[cterm - 1]); // Compress buffer vector<unsigned char> compressed_data; gammaTime.start(); gamma.encode(serial_buffer, compressed_data); iReportedTime["compressionTime"] += gammaTime.reportTime(); serial_buffer.clear(); last_doc = 0; // Record the current file position for the new term pos = ftell(indexFile); // Calculate tf-idf double idf = log2( (double) number_of_documents / (double) docCounter); // printf("%u %u %g\n", cterm, docCounter, idf); // Adds the entry to the lexicon lexicon->add(p, dict[cterm - 1], docList.size(), pos, idf); while (!docList.empty()) { flushTime.start(); fwrite(&docList.front().first, sizeof(unsigned int), 1, indexFile); fwrite(&docList.front().second, sizeof(unsigned int), 1, indexFile); iReportedTime["flushTime"] += flushTime.reportTime(); documentNorm[docList.front().first] += pow( (1 + log2(docList.front().second)) * idf, 2); documentLen[docList.front().first] += docList.front().second; avg_doclen += docList.front().second; docList.pop_front(); } docCounter = 0; docList.clear(); // Flush to the index file flushTime.start(); for (size_t i = 0; i < compressed_data.size(); i++) { fputc(compressed_data[i], indexFile); } iReportedTime["flushTime"] += flushTime.reportTime(); // Updates the cterm and ndoc for the following dict being read cterm = term; } docList.push_back(make_pair(doc, freq)); docCounter++; serial_buffer.push_back(doc - last_doc); serial_buffer.push_back(freq); last_doc = doc; } avg_doclen /= number_of_documents; fclose(triplesFile); }
int main(int argc, char** argv){ if ( argc < 2 || argc > 5) { printf("Parameters, usage: <Image_Path> [<loops> [<threads/block-dim> [<se-dim>] ] ]\n"); return -1; } cout << "OpenMP version: " << _OPENMP << endl; cout << "Structured Version. " << endl; Mat imgCV = imread(argv[1], CV_LOAD_IMAGE_GRAYSCALE); int m = imgCV.rows; int n = imgCV.cols; uchar* originalImage = imgCV.data; uchar* processedImage = cloneImg(originalImage,m,n); if(argc >= 3 ) nErosions = atoi(argv[2]); if(argc >= 4) blockDim = nThreads = atoi(argv[3]); if(argc >= 5) seCols = seRows = atoi(argv[4]); const uchar* SE = newImg(seRows, seCols, 1); TimeProfiler t; uchar* multipleProcessedImg = cloneImg(originalImage,m,n); #ifdef BLOCK_EROSION cout << "Structured BLOCKS, Blocks: " << blockDim << "x" << blockDim << endl; cout << "SE-dim: " << seRows << "x" << seCols << endl; t.start(); blockErosion(processedImage, m, n, SE, seRows, blockDim, blockDim, blockDim); t.stop(); cout << "Erosion: " << t << endl; t.start(); blockErosion(processedImage, m, n, SE, seRows, blockDim, blockDim, blockDim); t.stop(); cout << "Erosion: " << t << endl; t.start(); blockErosion(processedImage, m, n, SE, seRows, blockDim, blockDim, blockDim); t.stop(); cout << "Erosion: " << t << endl; t.start(); for( int i = 0; i < nErosions; i++ ) blockErosion(multipleProcessedImg, m, n, SE, seRows, blockDim, blockDim, blockDim); t.stop(); cout << "Erosion IConvBench (erosion x " << nErosions << "): " << t << endl; #else cout << "Structured NO_BLOCK, threads: " << nThreads << endl; cout << "SE-dim: " << seRows << "x" << seCols << endl; t.start(); erosion(processedImage, m, n, SE, seRows, seCols); t.stop(); cout << "Erosion: " << t << endl; t.start(); erosion(processedImage, m, n, SE, seRows, seCols); t.stop(); cout << "Erosion: " << t << endl; t.start(); erosion(processedImage, m, n, SE, seRows, seCols); t.stop(); cout << "Erosion: " << t << endl; t.start(); for( int i = 0; i < nErosions; i++ ) { erosion(multipleProcessedImg, m, n, SE, seRows, seCols); } t.stop(); cout << "Erosion IConvBench (erosion x " << nErosions << "): " << t << endl; #endif imshow("Original Image", originalImage, m, n); imshow("Processed Image", processedImage, m, n); cv::waitKey(0); delete[] SE; return 0; }