Пример #1
0
void SSE::storefile(string filename, uint64_t docID, double& cryptoduration){
	size_t filesize = readFileSize(filename);
	byte* docBytes = new byte[filesize];
	readFile(filename, docBytes, filesize);
	fstore.put(boost::lexical_cast<string>(docID), docBytes, filesize, cryptoduration);
	delete[] docBytes;
}
Пример #2
0
std::unique_ptr<File> Extractor::readFile() {
	auto fileName = readFileName();
	readFileTimestamp();
	readFileOwnerId();
	readFileGroupId();
	readFileMode();
	auto fileSize = readFileSize();
	readUntilEndOfFileHeader();
	auto fileContent = readFileContent(fileSize);

	return std::make_unique<StringFile>(fileContent, fileName);
}
Пример #3
0
void Extractor::readLookupTable() {
	// In the GNU format, the special file name '/' denotes a lookup table.
	// However, we need to ensure that it is just a standalone '/' because "//"
	// denotes the start of a filename table.
	if (hasLookupTableAt(i)) {
		// The lookup table has the same format as a file. However, as we do
		// not need it, throw it away after reading (i.e. do not store its
		// content).
		++i;
		readFileTimestamp();
		readFileOwnerId();
		readFileGroupId();
		readFileMode();
		auto fileSize = readFileSize();
		readUntilEndOfFileHeader();
		readFileContent(fileSize);
	}
}
Пример #4
0
bool SSE::retrieveIndex0(string keyword, vector<docid_t>& docIDs, double& diskTime){
	OnlineSession session;
	byte* docIDsBytes;

	size_t size = session.updateRead(keyword, docIDsBytes, 0, diskTime);

	cout << "Index0: Number of bytes retrieved are " << size << endl;
	if(size == 0)
		return false;

	cout << "Size is " << size << endl;
//	byte* updatedDocIDsBytes = new byte[size];
//	memset(updatedDocIDsBytes, 0, size);

	long totalDataDownloaded = 0;
	int32_t j = 0;
	for(int32_t i = 0; i < size/sizeof(docid_t); i++){
		docid_t docID = *(docid_t*)(&docIDsBytes[i*sizeof(docid_t)]);
		uint64_t docIDonDisk = getDocNameHash(boost::lexical_cast<string>(docID));
		CLEAR_BIT(docIDonDisk, 0);
		clock_t start = clock();
		bool filepresent = fstore.isFilePresent(boost::lexical_cast<string>(docIDonDisk));
		totalDataDownloaded += readFileSize(FILESTORE_PATH + boost::lexical_cast<string>(docIDonDisk));
		diskTime += (double)(clock()-start)/CLOCKS_PER_SEC;
		
		if(filepresent){
			docIDs.push_back(docID);
//			memcpy(&updatedDocIDsBytes[j*sizeof(docid_t)], &docIDsBytes[i*sizeof(docid_t)], sizeof(docid_t));
			j++;
		}
	}


	cout << "Index0: Total data downloaded is " << totalDataDownloaded << endl;
	session.updateWrite(keyword, docIDsBytes, size, diskTime);
	delete[] docIDsBytes;
	return true;
}
Пример #5
0
int main(){

	string directoryPath = "datasets/email/enron_mail_20110402/maildir/mann-k/inbox/";
	BStore* store = new BStore();

	int numFiles = 12;
	string filenumbers[] = {"1", "2", "3", "4", "5", "6", "7", "8", "9" , "10", "267", "207"};
	for(int i = 0; i < numFiles; i++){
		string filename = directoryPath + filenumbers[i] + ".";
		size_t size = readFileSize(filename);
		byte fileBytes[size];
		readFile(filename, fileBytes, size);

		printhex(fileBytes, size, "FILE BYTES");

		store->add(filename, fileBytes, size);
	}
	double execTime;
	store->finalize(execTime);
	delete store;
	
	return 0;
}
Пример #6
0
bool SSE::retrieveIndex1(string keyword, vector<docid_t>& docIDs, double& diskTime){
	OnlineSession session;
	session.resetDiskAccessTime();
	byte* docIDsBytes;

	size_t size = session.updateRead(keyword, docIDsBytes, 0, diskTime);
	cout << "Index1: Number of bytes retrieved are " << size << endl;

	long totalDataDownloaded = 0;
	if(size == 0)
		return false;

	for(int32_t i = 0; i < size/sizeof(docid_t); i++){
		docid_t docID = *(docid_t*)(&docIDsBytes[i*sizeof(docid_t)]);
		uint64_t docIDonDisk = getDocNameHash(boost::lexical_cast<string>(docID));
		docIDs.push_back(docID);
		totalDataDownloaded += readFileSize(boost::lexical_cast<string>(docIDonDisk));
	}

	cout << "Index1: Total data downloaded is " << totalDataDownloaded << endl;
	delete[] docIDsBytes;

	return true;
}