Ejemplo n.º 1
0
HeapFile *SortFile(HeapFile *S, int len, int offset)
{
	Status s;

	Scan *scan;
	scan = S->OpenScan(s);
	if (s != OK)
	{
		cerr << "ERROR : cannot open scan on the heapfile to sort.\n";
	}

	//
	// Scan the HeapFile S, new a B+Tree and insert the records into B+Tree.
	// 

	BTreeFile *btree;
	btree = new BTreeFile (s, "BTree", ATTR_INT, sizeof(int));

	char *recPtr = new char[len];
	int recLen = len;
	RecordID rid;
	while (scan->GetNext(rid, recPtr, recLen) == OK)
	{
		btree->Insert(recPtr + offset, rid);
	}
	delete scan;

	HeapFile *sorted;
	sorted = new HeapFile(NULL, s); // create a temp HeapFile
	if (s != OK)
	{
	    	cerr << "Cannot create new file for sortedS\n";
	}

	//
	// Now scan the B+-Tree and insert the records into a 
	// new (sorted) HeapFile.
	//

	BTreeFileScan  *btreeScan;
	btreeScan = (BTreeFileScan *)btree->OpenScan(NULL, NULL);

	int key;

	while (btreeScan->GetNext(rid, &key) == OK)
	{
	    S->GetRecord (rid, recPtr, recLen);
	    sorted->InsertRecord (recPtr, recLen, rid);
	}
	btree->DestroyFile();

	delete btree;
	delete btreeScan;
	delete [] recPtr;

	return sorted;
}
Ejemplo n.º 2
0
//--------------------------------------------------------------------
// JoinMethod::SortFile
// 
// Purpose :  Sorts a relation by an integer attribute. 
// Input   :  file - pointer to the HeapFile to be sorted.
//            len  - length of the records in the file. (assume fixed size).
//            offset - offset of the sort attribute from the beginning of the record.
// Method  :  We create a B+-Tree using that attribute as the key. Then
//            we scan the B+-Tree and insert the records into a new
//            HeapFile. he HeapFile guarantees that the order of 
//            insertion will be the same as the order of scan later.
// Return  :  The new sorted relation/HeapFile.
//-------------------------------------------------------------------- 
HeapFile* JoinMethod::SortHeapFile(HeapFile *file, int len, int offset) {

	Status s;

	Scan *scan;
	scan = file->OpenScan(s);
	if (s != OK) {
		std::cerr << "ERROR : cannot open scan on the heapfile to sort." << std::endl;
	}

	//
	// Scan the HeapFile S, create a new B+Tree and insert the records into B+Tree.
	// 

	BTreeFile *btree;
	btree = new BTreeFile (s, "BTree");

	char* recPtr = new char[len];
	int recLen = len;
	RecordID rid;

	char* recKey = new char[100];

	while (scan->GetNext(rid, recPtr, recLen) == OK)
	{
		int* valPtr = (int*)(recPtr+offset);
		int val = *valPtr;
		toString(val,recKey);
		btree->Insert(recKey, rid);
	}
	delete scan;
	delete [] recKey;
	//std::cout << "created B+ tree!" << std::endl;

	HeapFile *sorted = new HeapFile(NULL, s); // create a temp HeapFile
	if (s != OK)
	{
		std::cerr << "Cannot create new file for sortedS\n";
	}

	// Now scan the B+-Tree and insert the records into a 
	// new (sorted) HeapFile.

	BTreeFileScan* btreeScan = btree->OpenScan(NULL, NULL);

	//int key;
	char* keyPtr;
	while (btreeScan->GetNext(rid, keyPtr) == OK)
	{
		//std::cout << "scanning " << rid << " " << keyPtr << std::endl;

	    file->GetRecord (rid, recPtr, recLen);
	    sorted->InsertRecord (recPtr, recLen, rid);
	}
	btree->DestroyFile();

	delete btree;
	delete btreeScan;
	delete [] recPtr;

	return sorted;
}
Ejemplo n.º 3
0
HeapFile* IndexNestedLoopJoin(JoinSpec specOfR, JoinSpec specOfS)
{
	Status status = OK;

	// Create a HeapFile for join results
	HeapFile* joinedFile = new HeapFile(NULL, status);
	if (OK != status)
	{
		cerr << "ERROR: cannot create a file for the joined relation.\n";
		return NULL;
	}

	int recLenR = specOfR.recLen;
	int recLenS = specOfS.recLen;
	int recLenJoined = recLenR + recLenS;

	char* recR = new char[recLenR];
	char* recS = new char[recLenS];
	char* recJoined = new char[recLenJoined];

	RecordID ridR, ridS, ridJoined;

	// Build the B+-tree index on the inner relation (S)
	Scan* scanS = specOfS.file->OpenScan(status);
	if (OK != status)
	{
		cerr << "ERROR: cannot open scan on the relation S heap file.\n";
		return NULL;
	}

	BTreeFile* bTree = new BTreeFile(status, "IJBT", ATTR_INT, sizeof(int));
	while (OK == scanS->GetNext(ridS, recS, recLenS))
	{
		bTree->Insert(recS + specOfS.offset, ridS);
	}
	delete scanS;

	// Iterate through the outer relation (R) and join
	Scan* scanR = specOfR.file->OpenScan(status);
	if (OK != status)
	{
		cerr << "ERROR: cannot open scan on the relation R heap file.\n";
		return NULL;
	}

	while (OK == scanR->GetNext(ridR, recR, recLenR))
	{
		int* joinArgR = (int*)&recR[specOfR.offset];

		BTreeFileScan* bTreeScan = (BTreeFileScan*)bTree->OpenSearchScan(joinArgR, joinArgR);
		int key;
		while (OK == bTreeScan->GetNext(ridS, &key))
		{
		    specOfS.file->GetRecord(ridS, recS, recLenS);

			MakeNewRecord(recJoined, recR, recS, recLenR, recLenS);
			joinedFile->InsertRecord(recJoined, recLenJoined, ridJoined);
		}
		delete bTreeScan;
	}

	// Release the allocated resources
	delete scanR;

	delete[] recR;
	delete[] recS;
	delete[] recJoined;

	delete bTree;

	return joinedFile;
}
Ejemplo n.º 4
0
//---------------------------------------------------------------
// TupleNestedLoop::Execute
//
// Input:   left  - The left relation to join. 
//          right - The right relation to join. 
// Output:  out   - The relation to hold the ouptut. 
// Return:  OK if join completed succesfully. FAIL otherwise. 
//          
// Purpose: Performs a nested loop join on relations left and right
//          a tuple a time. You can assume that left is the outer
//          relation and right is the inner relation. 
//---------------------------------------------------------------
Status TupleNestedLoops::Execute(JoinSpec& left, JoinSpec& right, JoinSpec& out) {
	JoinMethod::Execute(left, right, out);

	//	Create the temporary heapfile
	Status st;
	HeapFile *tmpHeap = new HeapFile(NULL, st);
	if (st != OK) {
		std::cerr << "Failed to create output heapfile." << std::endl;
		return FAIL;
	}

	//	Open scan on left relation
	Status leftStatus;
	Scan *leftScan = left.file->OpenScan(leftStatus);
	if (leftStatus != OK) {
		std::cerr << "Failed to open scan on left relation." << std::endl;
		return FAIL;
	}

	//	Loop over the left relation
	char *leftRec = new char[left.recLen];
	while (true) {
		RecordID leftRid;
		leftStatus = leftScan->GetNext(leftRid, leftRec, left.recLen);
		if (leftStatus == DONE) break;
		if (leftStatus != OK) return FAIL;

		//	The join attribute on left relation
		int *leftJoinValPtr = (int*)(leftRec + left.offset);

		//	Open scan on right relation
		Status rightStatus;
		Scan *rightScan = right.file->OpenScan(rightStatus);
		if (rightStatus != OK) {
			std::cerr << "Failed to open scan on right relation." << std::endl;
			return FAIL;
		}

		//	Loop over right relation
		char *rightRec = new char[right.recLen];
		while (true) {
			RecordID rightRid;
			rightStatus = rightScan->GetNext(rightRid, rightRec, right.recLen);
			if (rightStatus == DONE) break;
			if (rightStatus != OK) return FAIL;

			//	Compare join attribute
			int *rightJoinValPtr = (int*)(rightRec + right.offset);
			if (*leftJoinValPtr == *rightJoinValPtr) {
				//	Create the record and insert into tmpHeap...
				char *joinedRec = new char[out.recLen];
				MakeNewRecord(joinedRec, leftRec, rightRec, left, right);
				RecordID insertedRid;
				Status tmpStatus = tmpHeap->InsertRecord(joinedRec, out.recLen, insertedRid);

				if (tmpStatus != OK) {
					std::cerr << "Failed to insert tuple into output heapfile." << std::endl;
					return FAIL;
				}
				delete [] joinedRec;
			}
		}

		delete [] rightRec;
		delete rightScan;
	}

	out.file = tmpHeap;
	delete leftScan;
	delete [] leftRec;

	return OK;
}
Ejemplo n.º 5
0
HeapFile* BlockNestedLoopJoin(JoinSpec specOfR, JoinSpec specOfS, int B)
{
	Status status = OK;

	// Create a HeapFile for join results
	HeapFile* joinedFile = new HeapFile(NULL, status);
	if (OK != status)
	{
		cerr << "ERROR: cannot create a file for the joined relation.\n";
		return NULL;
	}

	int recLenR = specOfR.recLen;
	int recLenS = specOfS.recLen;
	int recLenJoined = recLenR + recLenS;

	char* recBlockR = new char[B]; // Allocate memory for the block
	char* recS = new char[recLenS];
	char* recJoined = new char[recLenJoined];

	RecordID ridR, ridS, ridJoined;

	Scan* scanR = specOfR.file->OpenScan(status);
	if (OK != status)
	{
		cerr << "ERROR: cannot open scan on the relation R heap file.\n";
		return NULL;
	}

	const int recordsPerBlock = B / recLenR;

	bool lastBlock = false;
	while (!lastBlock)
	{
		// Fill the block
		int i;
		for (i = 0; i < recordsPerBlock; i++)
		{
			if (OK != scanR->GetNext(ridR, recBlockR + i*recLenR, recLenR))
			{
				lastBlock = true;
				break;
			}
		}
		int lastRecordIndex = i;

		Scan* scanS = specOfS.file->OpenScan(status);
		if (OK != status)
		{
			cerr << "ERROR: cannot open scan on the relation S heap file.\n";
			return NULL;
		}

		while (OK == scanS->GetNext(ridS, recS, recLenS))
		{
			int* joinArgS = (int*)&recS[specOfS.offset];

			for (int currentRecordIndex = 0; currentRecordIndex < lastRecordIndex; currentRecordIndex++)
			{
				char* currentRecordPtr = recBlockR + (currentRecordIndex * recLenR);
				int* joinArgR = (int*)(currentRecordPtr + specOfR.offset);

				if (*joinArgR == *joinArgS)
				{
					MakeNewRecord(recJoined, currentRecordPtr, recS, recLenR, recLenS);
					joinedFile->InsertRecord(recJoined, recLenJoined, ridJoined);
				}
			}
		}

		delete scanS;
	}

	// Release the allocated resources
	delete scanR;

	delete[] recBlockR;
	delete[] recS;
	delete[] recJoined;

	return joinedFile;
}
Ejemplo n.º 6
0
//---------------------------------------------------------------
// BlockNestedLoop::Execute
//
// Input:   left  - The left relation to join. 
//          right - The right relation to join. 
// Output:  out   - The relation to hold the ouptut. 
// Return:  OK if join completed succesfully. FAIL otherwise. 
//          
// Purpose: Performs a block nested loops join on the specified relations. 
// You can find a specification of this algorithm on page 455. You should 
// choose the smaller of the two relations to be the outer relation, but you 
// should make sure to concatenate the tuples in order <left, right> when 
// producing output. The block size can be specified in the constructor, 
// and is stored in the variable blockSize. 
//---------------------------------------------------------------
Status BlockNestedLoops::Execute(JoinSpec& left, JoinSpec& right, JoinSpec& out) {
	JoinMethod::Execute(left, right, out);
	
	Status s;
	HeapFile* tmpHeap = new HeapFile(NULL, s);
	if (s != OK) {
		std::cout << "Creating new Heap File Failed" << std::endl;
		return FAIL;
	}

	Scan * leftScan = left.file->OpenScan(s);
	if (s != OK) {
		std::cout << "Open scan left failed" << std::endl;
		return FAIL;
	}

	Scan * rightScan = right.file->OpenScan(s);
	if (s != OK) {
		std::cout << "Open scan left failed" << std::endl;
		return FAIL;
	}

	RecordID leftRid, rightRid, rightFirstRid, outRid;

	// array to hold the "block" in memory
	char* blockArray = new char[left.recLen * blockSize];
	int blockArraySize = 0; // size in case of half full block
	int* leftCurrRec = (int *)blockArray;
	int* leftRec = new int[left.numOfAttr];
	int* rightRec = new int[right.numOfAttr];
	int leftRecLen = left.recLen;
	int rightRecLen = right.recLen;

	char* newRec = new char[left.recLen + right.recLen];

	rightFirstRid = rightScan->currRid;

	Status st = OK;

	while (true) {
		// fill the block with as many records as possible
		if (blockArraySize < blockSize) {
			st = leftScan->GetNext(leftRid, (char *)leftRec, leftRecLen);
			if (st != DONE) {
				memcpy(blockArray + left.recLen * blockArraySize, leftRec, left.recLen);
				blockArraySize++;
				continue;
			}
		}

		// scan through the right, and scan the block in memory for joins
		while (rightScan->GetNext(rightRid, (char *)rightRec, rightRecLen) != DONE) {
			for (int j = 0; j < blockSize; j++) {
				if (j >= blockArraySize) {
					break;
				}
				leftCurrRec = (int *) (blockArray + left.recLen * j);
				if (leftCurrRec[left.joinAttr] == rightRec[right.joinAttr]) {
					MakeNewRecord(newRec, (char *)leftCurrRec, (char *)rightRec, left, right);
					tmpHeap->InsertRecord(newRec, left.recLen + right.recLen, outRid);
				}
			}
		}
		rightScan->MoveTo(rightFirstRid);
		blockArraySize = 0;

		if (st == DONE) {
			break;
		}
	}

	out.file = tmpHeap;
	//std::cout << "NUM BNL: " << tmpHeap->GetNumOfRecords() << std::endl;

	delete leftScan;
	delete rightScan;
	delete blockArray;
	delete leftRec;
	delete rightRec;
	delete newRec;

	return OK;
}
Ejemplo n.º 7
0
//-------------------------------------------------------------------
// Sort::PassZero
//
// Input   : None
// Output  : Number of temp files generated after this pass
// Return  : OK if Pass 0 succeeds, FAIL otherwise
//-------------------------------------------------------------------
Status Sort::PassZero(int &numTempFiles) {
	// Get input file
	Status status;
	HeapFile inputFile(_inFile, status);
	if (status != OK) return ReturnFAIL("Opening input file in PassZero function failed.");
	int numRecords = inputFile.GetNumOfRecords(); 
	int recCounter = 0, globalRecCounter = 0;

	// Allocate memory
	int areaSize = MINIBASE_PAGESIZE * _numBufPages;
	char *area = (char *)malloc(areaSize);
	char *areaPtr = area;
	RecordID rid; char *recPtr = (char *)malloc(_recLength); int recLen = _recLength;
	int numRecForSort = std::min(areaSize/_recLength,numRecords); // number of rec in sorting area at once

	// Open Scan
	Scan *scan = inputFile.OpenScan(status); 
	if (status != OK) return ReturnFAIL("Opening scan in PassZero function failed.");
	
	// Sort
	passZeroRuns = 0;
	if (areaSize >= _recLength) { // can fit at least one record
		while (scan->GetNext(rid,recPtr,recLen) == OK) {
			recCounter++; globalRecCounter++;
			// add to memory
			if (memcpy(areaPtr,recPtr,recLen) != areaPtr) 
				return ReturnFAIL("Reading records to memory in PassZero function failed.");
			areaPtr += recLen;
			areaSize -= recLen;
			if (areaSize < _recLength || globalRecCounter == numRecords) { // can't fit another rec or all recs have been added
				// sort
				switch (_sortType) {
					case attrInteger:
						std::qsort(area,recCounter,_recLength,CompareInt); 
						break;
					case attrString:
						std::qsort(area,recCounter,_recLength,CompareString); 
					default:
						break;
				}
				// write out
				char *fileName = CreateTempFilename(_outFile,0,passZeroRuns);
				passZeroRuns++;
				HeapFile *tempFile =  new HeapFile(fileName,status); 
				if (status != OK) return ReturnFAIL("Opening temp file in PassZero function failed.");
				areaPtr = area;
				while (recCounter > 0) { // insert
					tempFile->InsertRecord(areaPtr,_recLength,rid);
					recCounter--;
					areaPtr += _recLength;
				}
				numTempFiles++;
				areaSize = MINIBASE_PAGESIZE * _numBufPages;
				areaPtr = area; // reset
				delete fileName;
				delete tempFile; 
			}
		}
	}

	free(area);
	free(recPtr);
	delete scan;
	return OK;
}