void SSE::storefile(string filename, uint64_t docID, double& cryptoduration){ size_t filesize = readFileSize(filename); byte* docBytes = new byte[filesize]; readFile(filename, docBytes, filesize); fstore.put(boost::lexical_cast<string>(docID), docBytes, filesize, cryptoduration); delete[] docBytes; }
std::unique_ptr<File> Extractor::readFile() { auto fileName = readFileName(); readFileTimestamp(); readFileOwnerId(); readFileGroupId(); readFileMode(); auto fileSize = readFileSize(); readUntilEndOfFileHeader(); auto fileContent = readFileContent(fileSize); return std::make_unique<StringFile>(fileContent, fileName); }
void Extractor::readLookupTable() { // In the GNU format, the special file name '/' denotes a lookup table. // However, we need to ensure that it is just a standalone '/' because "//" // denotes the start of a filename table. if (hasLookupTableAt(i)) { // The lookup table has the same format as a file. However, as we do // not need it, throw it away after reading (i.e. do not store its // content). ++i; readFileTimestamp(); readFileOwnerId(); readFileGroupId(); readFileMode(); auto fileSize = readFileSize(); readUntilEndOfFileHeader(); readFileContent(fileSize); } }
bool SSE::retrieveIndex0(string keyword, vector<docid_t>& docIDs, double& diskTime){ OnlineSession session; byte* docIDsBytes; size_t size = session.updateRead(keyword, docIDsBytes, 0, diskTime); cout << "Index0: Number of bytes retrieved are " << size << endl; if(size == 0) return false; cout << "Size is " << size << endl; // byte* updatedDocIDsBytes = new byte[size]; // memset(updatedDocIDsBytes, 0, size); long totalDataDownloaded = 0; int32_t j = 0; for(int32_t i = 0; i < size/sizeof(docid_t); i++){ docid_t docID = *(docid_t*)(&docIDsBytes[i*sizeof(docid_t)]); uint64_t docIDonDisk = getDocNameHash(boost::lexical_cast<string>(docID)); CLEAR_BIT(docIDonDisk, 0); clock_t start = clock(); bool filepresent = fstore.isFilePresent(boost::lexical_cast<string>(docIDonDisk)); totalDataDownloaded += readFileSize(FILESTORE_PATH + boost::lexical_cast<string>(docIDonDisk)); diskTime += (double)(clock()-start)/CLOCKS_PER_SEC; if(filepresent){ docIDs.push_back(docID); // memcpy(&updatedDocIDsBytes[j*sizeof(docid_t)], &docIDsBytes[i*sizeof(docid_t)], sizeof(docid_t)); j++; } } cout << "Index0: Total data downloaded is " << totalDataDownloaded << endl; session.updateWrite(keyword, docIDsBytes, size, diskTime); delete[] docIDsBytes; return true; }
int main(){ string directoryPath = "datasets/email/enron_mail_20110402/maildir/mann-k/inbox/"; BStore* store = new BStore(); int numFiles = 12; string filenumbers[] = {"1", "2", "3", "4", "5", "6", "7", "8", "9" , "10", "267", "207"}; for(int i = 0; i < numFiles; i++){ string filename = directoryPath + filenumbers[i] + "."; size_t size = readFileSize(filename); byte fileBytes[size]; readFile(filename, fileBytes, size); printhex(fileBytes, size, "FILE BYTES"); store->add(filename, fileBytes, size); } double execTime; store->finalize(execTime); delete store; return 0; }
bool SSE::retrieveIndex1(string keyword, vector<docid_t>& docIDs, double& diskTime){ OnlineSession session; session.resetDiskAccessTime(); byte* docIDsBytes; size_t size = session.updateRead(keyword, docIDsBytes, 0, diskTime); cout << "Index1: Number of bytes retrieved are " << size << endl; long totalDataDownloaded = 0; if(size == 0) return false; for(int32_t i = 0; i < size/sizeof(docid_t); i++){ docid_t docID = *(docid_t*)(&docIDsBytes[i*sizeof(docid_t)]); uint64_t docIDonDisk = getDocNameHash(boost::lexical_cast<string>(docID)); docIDs.push_back(docID); totalDataDownloaded += readFileSize(boost::lexical_cast<string>(docIDonDisk)); } cout << "Index1: Total data downloaded is " << totalDataDownloaded << endl; delete[] docIDsBytes; return true; }