HeapFile *SortFile(HeapFile *S, int len, int offset) { Status s; Scan *scan; scan = S->OpenScan(s); if (s != OK) { cerr << "ERROR : cannot open scan on the heapfile to sort.\n"; } // // Scan the HeapFile S, new a B+Tree and insert the records into B+Tree. // BTreeFile *btree; btree = new BTreeFile (s, "BTree", ATTR_INT, sizeof(int)); char *recPtr = new char[len]; int recLen = len; RecordID rid; while (scan->GetNext(rid, recPtr, recLen) == OK) { btree->Insert(recPtr + offset, rid); } delete scan; HeapFile *sorted; sorted = new HeapFile(NULL, s); // create a temp HeapFile if (s != OK) { cerr << "Cannot create new file for sortedS\n"; } // // Now scan the B+-Tree and insert the records into a // new (sorted) HeapFile. // BTreeFileScan *btreeScan; btreeScan = (BTreeFileScan *)btree->OpenScan(NULL, NULL); int key; while (btreeScan->GetNext(rid, &key) == OK) { S->GetRecord (rid, recPtr, recLen); sorted->InsertRecord (recPtr, recLen, rid); } btree->DestroyFile(); delete btree; delete btreeScan; delete [] recPtr; return sorted; }
//-------------------------------------------------------------------- // JoinMethod::SortFile // // Purpose : Sorts a relation by an integer attribute. // Input : file - pointer to the HeapFile to be sorted. // len - length of the records in the file. (assume fixed size). // offset - offset of the sort attribute from the beginning of the record. // Method : We create a B+-Tree using that attribute as the key. Then // we scan the B+-Tree and insert the records into a new // HeapFile. he HeapFile guarantees that the order of // insertion will be the same as the order of scan later. // Return : The new sorted relation/HeapFile. //-------------------------------------------------------------------- HeapFile* JoinMethod::SortHeapFile(HeapFile *file, int len, int offset) { Status s; Scan *scan; scan = file->OpenScan(s); if (s != OK) { std::cerr << "ERROR : cannot open scan on the heapfile to sort." << std::endl; } // // Scan the HeapFile S, create a new B+Tree and insert the records into B+Tree. // BTreeFile *btree; btree = new BTreeFile (s, "BTree"); char* recPtr = new char[len]; int recLen = len; RecordID rid; char* recKey = new char[100]; while (scan->GetNext(rid, recPtr, recLen) == OK) { int* valPtr = (int*)(recPtr+offset); int val = *valPtr; toString(val,recKey); btree->Insert(recKey, rid); } delete scan; delete [] recKey; //std::cout << "created B+ tree!" << std::endl; HeapFile *sorted = new HeapFile(NULL, s); // create a temp HeapFile if (s != OK) { std::cerr << "Cannot create new file for sortedS\n"; } // Now scan the B+-Tree and insert the records into a // new (sorted) HeapFile. BTreeFileScan* btreeScan = btree->OpenScan(NULL, NULL); //int key; char* keyPtr; while (btreeScan->GetNext(rid, keyPtr) == OK) { //std::cout << "scanning " << rid << " " << keyPtr << std::endl; file->GetRecord (rid, recPtr, recLen); sorted->InsertRecord (recPtr, recLen, rid); } btree->DestroyFile(); delete btree; delete btreeScan; delete [] recPtr; return sorted; }
HeapFile* IndexNestedLoopJoin(JoinSpec specOfR, JoinSpec specOfS) { Status status = OK; // Create a HeapFile for join results HeapFile* joinedFile = new HeapFile(NULL, status); if (OK != status) { cerr << "ERROR: cannot create a file for the joined relation.\n"; return NULL; } int recLenR = specOfR.recLen; int recLenS = specOfS.recLen; int recLenJoined = recLenR + recLenS; char* recR = new char[recLenR]; char* recS = new char[recLenS]; char* recJoined = new char[recLenJoined]; RecordID ridR, ridS, ridJoined; // Build the B+-tree index on the inner relation (S) Scan* scanS = specOfS.file->OpenScan(status); if (OK != status) { cerr << "ERROR: cannot open scan on the relation S heap file.\n"; return NULL; } BTreeFile* bTree = new BTreeFile(status, "IJBT", ATTR_INT, sizeof(int)); while (OK == scanS->GetNext(ridS, recS, recLenS)) { bTree->Insert(recS + specOfS.offset, ridS); } delete scanS; // Iterate through the outer relation (R) and join Scan* scanR = specOfR.file->OpenScan(status); if (OK != status) { cerr << "ERROR: cannot open scan on the relation R heap file.\n"; return NULL; } while (OK == scanR->GetNext(ridR, recR, recLenR)) { int* joinArgR = (int*)&recR[specOfR.offset]; BTreeFileScan* bTreeScan = (BTreeFileScan*)bTree->OpenSearchScan(joinArgR, joinArgR); int key; while (OK == bTreeScan->GetNext(ridS, &key)) { specOfS.file->GetRecord(ridS, recS, recLenS); MakeNewRecord(recJoined, recR, recS, recLenR, recLenS); joinedFile->InsertRecord(recJoined, recLenJoined, ridJoined); } delete bTreeScan; } // Release the allocated resources delete scanR; delete[] recR; delete[] recS; delete[] recJoined; delete bTree; return joinedFile; }
//--------------------------------------------------------------- // TupleNestedLoop::Execute // // Input: left - The left relation to join. // right - The right relation to join. // Output: out - The relation to hold the ouptut. // Return: OK if join completed succesfully. FAIL otherwise. // // Purpose: Performs a nested loop join on relations left and right // a tuple a time. You can assume that left is the outer // relation and right is the inner relation. //--------------------------------------------------------------- Status TupleNestedLoops::Execute(JoinSpec& left, JoinSpec& right, JoinSpec& out) { JoinMethod::Execute(left, right, out); // Create the temporary heapfile Status st; HeapFile *tmpHeap = new HeapFile(NULL, st); if (st != OK) { std::cerr << "Failed to create output heapfile." << std::endl; return FAIL; } // Open scan on left relation Status leftStatus; Scan *leftScan = left.file->OpenScan(leftStatus); if (leftStatus != OK) { std::cerr << "Failed to open scan on left relation." << std::endl; return FAIL; } // Loop over the left relation char *leftRec = new char[left.recLen]; while (true) { RecordID leftRid; leftStatus = leftScan->GetNext(leftRid, leftRec, left.recLen); if (leftStatus == DONE) break; if (leftStatus != OK) return FAIL; // The join attribute on left relation int *leftJoinValPtr = (int*)(leftRec + left.offset); // Open scan on right relation Status rightStatus; Scan *rightScan = right.file->OpenScan(rightStatus); if (rightStatus != OK) { std::cerr << "Failed to open scan on right relation." << std::endl; return FAIL; } // Loop over right relation char *rightRec = new char[right.recLen]; while (true) { RecordID rightRid; rightStatus = rightScan->GetNext(rightRid, rightRec, right.recLen); if (rightStatus == DONE) break; if (rightStatus != OK) return FAIL; // Compare join attribute int *rightJoinValPtr = (int*)(rightRec + right.offset); if (*leftJoinValPtr == *rightJoinValPtr) { // Create the record and insert into tmpHeap... char *joinedRec = new char[out.recLen]; MakeNewRecord(joinedRec, leftRec, rightRec, left, right); RecordID insertedRid; Status tmpStatus = tmpHeap->InsertRecord(joinedRec, out.recLen, insertedRid); if (tmpStatus != OK) { std::cerr << "Failed to insert tuple into output heapfile." << std::endl; return FAIL; } delete [] joinedRec; } } delete [] rightRec; delete rightScan; } out.file = tmpHeap; delete leftScan; delete [] leftRec; return OK; }
HeapFile* BlockNestedLoopJoin(JoinSpec specOfR, JoinSpec specOfS, int B) { Status status = OK; // Create a HeapFile for join results HeapFile* joinedFile = new HeapFile(NULL, status); if (OK != status) { cerr << "ERROR: cannot create a file for the joined relation.\n"; return NULL; } int recLenR = specOfR.recLen; int recLenS = specOfS.recLen; int recLenJoined = recLenR + recLenS; char* recBlockR = new char[B]; // Allocate memory for the block char* recS = new char[recLenS]; char* recJoined = new char[recLenJoined]; RecordID ridR, ridS, ridJoined; Scan* scanR = specOfR.file->OpenScan(status); if (OK != status) { cerr << "ERROR: cannot open scan on the relation R heap file.\n"; return NULL; } const int recordsPerBlock = B / recLenR; bool lastBlock = false; while (!lastBlock) { // Fill the block int i; for (i = 0; i < recordsPerBlock; i++) { if (OK != scanR->GetNext(ridR, recBlockR + i*recLenR, recLenR)) { lastBlock = true; break; } } int lastRecordIndex = i; Scan* scanS = specOfS.file->OpenScan(status); if (OK != status) { cerr << "ERROR: cannot open scan on the relation S heap file.\n"; return NULL; } while (OK == scanS->GetNext(ridS, recS, recLenS)) { int* joinArgS = (int*)&recS[specOfS.offset]; for (int currentRecordIndex = 0; currentRecordIndex < lastRecordIndex; currentRecordIndex++) { char* currentRecordPtr = recBlockR + (currentRecordIndex * recLenR); int* joinArgR = (int*)(currentRecordPtr + specOfR.offset); if (*joinArgR == *joinArgS) { MakeNewRecord(recJoined, currentRecordPtr, recS, recLenR, recLenS); joinedFile->InsertRecord(recJoined, recLenJoined, ridJoined); } } } delete scanS; } // Release the allocated resources delete scanR; delete[] recBlockR; delete[] recS; delete[] recJoined; return joinedFile; }
//--------------------------------------------------------------- // BlockNestedLoop::Execute // // Input: left - The left relation to join. // right - The right relation to join. // Output: out - The relation to hold the ouptut. // Return: OK if join completed succesfully. FAIL otherwise. // // Purpose: Performs a block nested loops join on the specified relations. // You can find a specification of this algorithm on page 455. You should // choose the smaller of the two relations to be the outer relation, but you // should make sure to concatenate the tuples in order <left, right> when // producing output. The block size can be specified in the constructor, // and is stored in the variable blockSize. //--------------------------------------------------------------- Status BlockNestedLoops::Execute(JoinSpec& left, JoinSpec& right, JoinSpec& out) { JoinMethod::Execute(left, right, out); Status s; HeapFile* tmpHeap = new HeapFile(NULL, s); if (s != OK) { std::cout << "Creating new Heap File Failed" << std::endl; return FAIL; } Scan * leftScan = left.file->OpenScan(s); if (s != OK) { std::cout << "Open scan left failed" << std::endl; return FAIL; } Scan * rightScan = right.file->OpenScan(s); if (s != OK) { std::cout << "Open scan left failed" << std::endl; return FAIL; } RecordID leftRid, rightRid, rightFirstRid, outRid; // array to hold the "block" in memory char* blockArray = new char[left.recLen * blockSize]; int blockArraySize = 0; // size in case of half full block int* leftCurrRec = (int *)blockArray; int* leftRec = new int[left.numOfAttr]; int* rightRec = new int[right.numOfAttr]; int leftRecLen = left.recLen; int rightRecLen = right.recLen; char* newRec = new char[left.recLen + right.recLen]; rightFirstRid = rightScan->currRid; Status st = OK; while (true) { // fill the block with as many records as possible if (blockArraySize < blockSize) { st = leftScan->GetNext(leftRid, (char *)leftRec, leftRecLen); if (st != DONE) { memcpy(blockArray + left.recLen * blockArraySize, leftRec, left.recLen); blockArraySize++; continue; } } // scan through the right, and scan the block in memory for joins while (rightScan->GetNext(rightRid, (char *)rightRec, rightRecLen) != DONE) { for (int j = 0; j < blockSize; j++) { if (j >= blockArraySize) { break; } leftCurrRec = (int *) (blockArray + left.recLen * j); if (leftCurrRec[left.joinAttr] == rightRec[right.joinAttr]) { MakeNewRecord(newRec, (char *)leftCurrRec, (char *)rightRec, left, right); tmpHeap->InsertRecord(newRec, left.recLen + right.recLen, outRid); } } } rightScan->MoveTo(rightFirstRid); blockArraySize = 0; if (st == DONE) { break; } } out.file = tmpHeap; //std::cout << "NUM BNL: " << tmpHeap->GetNumOfRecords() << std::endl; delete leftScan; delete rightScan; delete blockArray; delete leftRec; delete rightRec; delete newRec; return OK; }
//------------------------------------------------------------------- // Sort::PassZero // // Input : None // Output : Number of temp files generated after this pass // Return : OK if Pass 0 succeeds, FAIL otherwise //------------------------------------------------------------------- Status Sort::PassZero(int &numTempFiles) { // Get input file Status status; HeapFile inputFile(_inFile, status); if (status != OK) return ReturnFAIL("Opening input file in PassZero function failed."); int numRecords = inputFile.GetNumOfRecords(); int recCounter = 0, globalRecCounter = 0; // Allocate memory int areaSize = MINIBASE_PAGESIZE * _numBufPages; char *area = (char *)malloc(areaSize); char *areaPtr = area; RecordID rid; char *recPtr = (char *)malloc(_recLength); int recLen = _recLength; int numRecForSort = std::min(areaSize/_recLength,numRecords); // number of rec in sorting area at once // Open Scan Scan *scan = inputFile.OpenScan(status); if (status != OK) return ReturnFAIL("Opening scan in PassZero function failed."); // Sort passZeroRuns = 0; if (areaSize >= _recLength) { // can fit at least one record while (scan->GetNext(rid,recPtr,recLen) == OK) { recCounter++; globalRecCounter++; // add to memory if (memcpy(areaPtr,recPtr,recLen) != areaPtr) return ReturnFAIL("Reading records to memory in PassZero function failed."); areaPtr += recLen; areaSize -= recLen; if (areaSize < _recLength || globalRecCounter == numRecords) { // can't fit another rec or all recs have been added // sort switch (_sortType) { case attrInteger: std::qsort(area,recCounter,_recLength,CompareInt); break; case attrString: std::qsort(area,recCounter,_recLength,CompareString); default: break; } // write out char *fileName = CreateTempFilename(_outFile,0,passZeroRuns); passZeroRuns++; HeapFile *tempFile = new HeapFile(fileName,status); if (status != OK) return ReturnFAIL("Opening temp file in PassZero function failed."); areaPtr = area; while (recCounter > 0) { // insert tempFile->InsertRecord(areaPtr,_recLength,rid); recCounter--; areaPtr += _recLength; } numTempFiles++; areaSize = MINIBASE_PAGESIZE * _numBufPages; areaPtr = area; // reset delete fileName; delete tempFile; } } } free(area); free(recPtr); delete scan; return OK; }