Beispiel #1
0
Index::Index(IndexConfig &config) {

	tTotal.start();

	char** dict = NULL;
	DictFileManager dictFileManager;

	// Read dictionary file and get the terms in a vector ordered by its code
	int numberOfTerms = dictFileManager.read_dictionary(&dict,
			config.getDictFilePath());

	// Create the cmph
	buildHash.start();
	hash = new Hash(dict, numberOfTerms, config.getDirectory() + "temp.mph");
	iReportedTime["hashTime"] = buildHash.reportTime();

	// Create lexicon
	lexicon = new Lexicon(hash->getSize());

	// Open an index file
	indexFile = fopen(config.getIndexFilePath().c_str(), "wb+");
	if (indexFile == NULL) {
		printf("Error when opening index file.");
		exit(0);
	}

	number_of_documents = config.getDocumentsMaxNumber();

	// Parse triples and build the index file
	buildIndex.start();
	parseTriplesFile(dict, config.getTmpFilePath());
	iReportedTime["indexTime"] = buildIndex.reportTime();

	// Dict is not necessary anymore
	free(dict);
}
Beispiel #2
0
// Compressed
void Index::parseTriplesFile(char** dict, string filePath) {

	// Open the triples file
	FILE *triplesFile = fopen(filePath.c_str(), "rb");
	if (triplesFile == NULL) {
		printf("Error when opening triples file.");
		exit(0);
	}

	EliasGamma gamma;
	vector<unsigned int> serial_buffer;
	size_t pos = 0;
	unsigned int term, doc, freq, last_doc = 0, cterm = 0, docCounter = 0;

	list<pair<unsigned int, unsigned int> > docList;

	avg_doclen = 0;

	while (!feof(triplesFile)) {

		fread(&term, sizeof(unsigned int), 1, triplesFile);
		fread(&doc, sizeof(unsigned int), 1, triplesFile);
		fread(&freq, sizeof(unsigned int), 1, triplesFile);

		if (cterm == 0) {
			cterm = term;
		}

		if (cterm != term || feof(triplesFile)) {

			// Get its hash position
			size_t p = hash->search(dict[cterm - 1]);

			// Compress buffer
			vector<unsigned char> compressed_data;
			gammaTime.start();
			gamma.encode(serial_buffer, compressed_data);
			iReportedTime["compressionTime"] += gammaTime.reportTime();
			serial_buffer.clear();
			last_doc = 0;

			// Record the current file position for the new term
			pos = ftell(indexFile);

			// Calculate tf-idf
			double idf = log2(
					(double) number_of_documents / (double) docCounter);
			//			printf("%u %u %g\n", cterm, docCounter, idf);

			// Adds the entry to the lexicon
			lexicon->add(p, dict[cterm - 1], docList.size(), pos, idf);

			while (!docList.empty()) {

				flushTime.start();
				fwrite(&docList.front().first, sizeof(unsigned int), 1,
						indexFile);
				fwrite(&docList.front().second, sizeof(unsigned int), 1,
						indexFile);
				iReportedTime["flushTime"] += flushTime.reportTime();

				documentNorm[docList.front().first] += pow(
						(1 + log2(docList.front().second)) * idf, 2);

				documentLen[docList.front().first] += docList.front().second;

				avg_doclen += docList.front().second;

				docList.pop_front();
			}

			docCounter = 0;
			docList.clear();

			// Flush to the index file
			flushTime.start();
			for (size_t i = 0; i < compressed_data.size(); i++) {
				fputc(compressed_data[i], indexFile);
			}
			iReportedTime["flushTime"] += flushTime.reportTime();

			// Updates the cterm and ndoc for the following dict being read
			cterm = term;
		}

		docList.push_back(make_pair(doc, freq));
		docCounter++;

		serial_buffer.push_back(doc - last_doc);
		serial_buffer.push_back(freq);

		last_doc = doc;
	}

	avg_doclen /= number_of_documents;

	fclose(triplesFile);
}
int main(int argc, char** argv){

    if ( argc < 2 || argc > 5)
    {
        printf("Parameters, usage: <Image_Path> [<loops> [<threads/block-dim> [<se-dim>] ] ]\n");
        return -1;
    }

    cout << "OpenMP version: " << _OPENMP << endl;
    cout << "Structured Version. " << endl;

    Mat imgCV = imread(argv[1], CV_LOAD_IMAGE_GRAYSCALE);
    int m = imgCV.rows;
    int n = imgCV.cols;
    uchar* originalImage = imgCV.data;
    uchar* processedImage = cloneImg(originalImage,m,n);


    if(argc >= 3 )
        nErosions = atoi(argv[2]);

    if(argc >= 4)
        blockDim = nThreads = atoi(argv[3]);

    if(argc >= 5)
        seCols = seRows = atoi(argv[4]);


    const uchar* SE =  newImg(seRows, seCols, 1);

    TimeProfiler t;


    uchar* multipleProcessedImg = cloneImg(originalImage,m,n);

#ifdef BLOCK_EROSION

    cout << "Structured BLOCKS, Blocks: " << blockDim << "x" << blockDim << endl;
    cout << "SE-dim: " << seRows << "x" << seCols << endl;
    t.start();
    blockErosion(processedImage, m, n, SE, seRows, blockDim, blockDim, blockDim);
    t.stop();
    cout << "Erosion: " << t << endl;
    t.start();
    blockErosion(processedImage, m, n, SE, seRows, blockDim, blockDim, blockDim);
    t.stop();
    cout << "Erosion: " << t << endl;
    t.start();
    blockErosion(processedImage, m, n, SE, seRows, blockDim, blockDim, blockDim);
    t.stop();
    cout << "Erosion: " << t << endl;


    t.start();
    for( int i = 0; i < nErosions; i++ )
        blockErosion(multipleProcessedImg, m, n, SE, seRows, blockDim, blockDim, blockDim);
    t.stop();
    cout << "Erosion IConvBench (erosion x " << nErosions << "): " << t << endl;

#else

    cout << "Structured NO_BLOCK, threads: " << nThreads << endl;
    cout << "SE-dim: " << seRows << "x" << seCols << endl;

    t.start();
    erosion(processedImage, m, n, SE, seRows, seCols);
    t.stop();
    cout << "Erosion: " << t << endl;
    t.start();
    erosion(processedImage, m, n, SE, seRows, seCols);
    t.stop();
    cout << "Erosion: " << t << endl;
    t.start();
    erosion(processedImage, m, n, SE, seRows, seCols);
    t.stop();
    cout << "Erosion: " << t << endl;

    t.start();
    for( int i = 0; i < nErosions; i++ )
    {
        erosion(multipleProcessedImg, m, n, SE, seRows, seCols);
    }
    t.stop();
    cout << "Erosion IConvBench (erosion x " << nErosions << "): " << t << endl;

#endif




    imshow("Original Image", originalImage, m, n);
    imshow("Processed Image", processedImage, m, n);
    cv::waitKey(0);

    delete[] SE;

    return 0;
}