const Status ScanSelect(const string & result, const int projCnt, const AttrDesc projNames[], const AttrDesc *attrDesc, const Operator op, const char *filter, const int reclen) { Status status; RID rid; Record outputRec; Record rec; cout << "Doing HeapFileScan Selection using ScanSelect()" << endl; InsertFileScan resultRel(result, status); if(status != OK) return status; char outputData[reclen]; outputRec.data = (void *)outputData; outputRec.length = reclen; // HeapFileScan hfs(attrDesc->relName,status); HeapFileScan hfs(projNames->relName,status); if(status != OK) return status; if(filter == NULL){ if((status = hfs.startScan(0, 0, (Datatype)0, filter, op)) != OK){ return status; } }else{ if((status = hfs.startScan(attrDesc->attrOffset, attrDesc->attrLen, (Datatype)attrDesc->attrType, filter, op)) != OK){ return status; } } while((status = hfs.scanNext(rid)) == OK){ if((status = hfs.getRecord(rec)) != OK) return status; int outputOffset = 0; for( int i = 0; i < projCnt; i++){ memcpy(outputData + outputOffset, (char *)rec.data + projNames[i].attrOffset, projNames[i].attrLen); outputOffset += projNames[i].attrLen; } RID outRID; if((status = resultRel.insertRecord(outputRec,outRID)) != OK){ return status; } } if(status == FILEEOF) return OK; return status; }
/** * This function scans the relation for tuples * that match the filter predicate and copy these * tuples into a relation named result. * * @param result * @param projCnt * @param projNames[] * @param attrDesc * @param op * @param filter * @param reclen * @return: OK on success * an error code otherwise **/ const Status ScanSelect(const string & result, const int projCnt, const AttrDesc projNames[], const AttrDesc *attrDesc, const Operator op, const char *filter, const int reclen) { Status status; //used to keep track of how many total tuples are selected int resultTupCnt = 0; // open the result table InsertFileScan resultRel(result, status); if (status != OK) { return status; } //initialize pointer a location of size reclen char outputData[reclen]; //the record to be copied to later Record outputRec; outputRec.data = (void *) outputData; outputRec.length = reclen; // start scan on outer table HeapFileScan relScan(attrDesc->relName, status); if (status != OK) { return status; } status = relScan.startScan(attrDesc->attrOffset, attrDesc->attrLen, (Datatype) attrDesc->attrType, filter, op); if (status != OK) { return status; } // scan outer table RID relRID; Record relRec; while (relScan.scanNext(relRID) == OK) { status = relScan.getRecord(relRec); ASSERT(status == OK); // we have a match, copy data into the output record int outputOffset = 0; for (int i = 0; i < projCnt; i++) { memcpy(outputData + outputOffset, (char *)relRec.data + projNames[i].attrOffset, projNames[i].attrLen); outputOffset += projNames[i].attrLen; } // end copy attrs // add the new record to the output relation RID outRID; status = resultRel.insertRecord(outputRec, outRID); ASSERT(status == OK); resultTupCnt++; } //before returning print out the total number of tuples in this relation printf("selected %d result tuples \n", resultTupCnt); return OK; }
const Status ScanSelect(const string & result, const int projCnt, const AttrDesc projNames[], const AttrDesc *attrDesc, const Operator op, const char *filter, const int reclen) { cout << "Doing HeapFileScan Selection using ScanSelect()" << endl; Status status; int resultTupCnt = 0; /************* open result table ********/ InsertFileScan resultRel(result, status); if (status != OK) { return status; } char outputData[reclen]; Record outputRec; outputRec.data = (void *) outputData; outputRec.length = reclen; int offset=0; int len=0; const char* actual_filter=NULL; Datatype type=STRING; if (attrDesc){ offset=attrDesc->attrOffset; len=attrDesc->attrLen; type=(Datatype) attrDesc->attrType; /************ get the actual filter (predicate) for selection ********/ switch( (Datatype)attrDesc->attrType) { case INTEGER: { int * filter_int_ptr = (int *)calloc(1, sizeof(int)); *filter_int_ptr = atoi(filter); actual_filter=(char *)filter_int_ptr; } break; case FLOAT: { float * filter_float_ptr = (float *) calloc(1, sizeof(float)); *filter_float_ptr = atof(filter); actual_filter=(char *)filter_float_ptr; } break; case STRING: actual_filter=filter; break; } } /********* start scanning ***********/ string input_rel = projNames[0].relName; if (attrDesc) input_rel=attrDesc->relName; HeapFileScan innerScan(input_rel, status); if (status != OK) { return status; } status = innerScan.startScan(offset, len, type, actual_filter, op); if (status != OK) { return status; } RID innerRID; while (innerScan.scanNext(innerRID) == OK){ Record innerRec; status = innerScan.getRecord(innerRec); ASSERT(status == OK); // we have a match, copy data into the output record int outputOffset = 0; for (int i = 0; i < projCnt; i++){ //copy data to result memcpy(outputData + outputOffset, (char *)innerRec.data + projNames[i].attrOffset, projNames[i].attrLen); outputOffset += projNames[i].attrLen; } // add the new record to the output relation RID outRID; status = resultRel.insertRecord(outputRec, outRID); ASSERT(status == OK); resultTupCnt++; } // end scanner return OK; }
const Status QU_Hash_Join(const string & result, const int projCnt, const attrInfo projNames[], const attrInfo *attr1, const Operator op, const attrInfo *attr2) { Status status; int resultTupCnt = 0; if (attr1->attrType != attr2->attrType || attr1->attrLen != attr2->attrLen) { return ATTRTYPEMISMATCH; } // go through the projection list and look up each in the // attr cat to get an AttrDesc structure (for offset, length, etc) AttrDesc attrDescArray[projCnt]; for (int i = 0; i < projCnt; i++) { Status status = attrCat->getInfo(projNames[i].relName, projNames[i].attrName, attrDescArray[i]); if (status != OK) { return status; } } // get AttrDesc structure for the first join attribute AttrDesc attrDesc1; status = attrCat->getInfo(attr1->relName, attr1->attrName, attrDesc1); if (status != OK) return status; // get AttrDesc structure for the second join attribute AttrDesc attrDesc2; status = attrCat->getInfo(attr2->relName, attr2->attrName, attrDesc2); if (status != OK) return status; // get output record length from attrdesc structures int reclen = 0; for (int i = 0; i < projCnt; i++) { reclen += attrDescArray[i].attrLen; } // open the result table InsertFileScan resultRel(result, status); if (status != OK) { return status; } char outputData[reclen]; Record outputRec; outputRec.data = (void *) outputData; outputRec.length = reclen; // calculate size of each outer tuple AttrDesc *attrs; int attrCnt; // get attribute data if ((status = attrCat->getRelInfo(attr1->relName, attrCnt, attrs)) != OK) return status; // compute length of each outer tuple int outerTupwidth = 0; for (int i = 0; i < attrCnt; i++) { outerTupwidth = outerTupwidth + attrs[i].attrLen; } free(attrs); int BLOCKSIZE = 4; // hack to set number of pages in each block of the outer table // calculate number of outertuples per page int outerTupsPerPage = (PAGESIZE - DPFIXED)/outerTupwidth; // finally compute number of tuples in blockSize pages int outerTupsPerBlock = BLOCKSIZE * outerTupsPerPage; // open the outer table. the outer table actually gets opened // twice. Once as a HeapFile and once as a HeapFileScan. // The heapfilescan is used to scan the outer table. The heapfile // is used to retrieve tuples that match a given inner tuple HeapFile outerTable(string(attrDesc1.relName), status); if (status != OK) return status; // then start scan on outer table HeapFileScan outerScan(string(attrDesc1.relName), status); if (status != OK) return status; status = outerScan.startScan(0, 0, STRING, NULL, EQ); if (status != OK) return status; // scan outer table RID outerRID; Record outerRec; Operator myop; switch(op) { case EQ: myop=EQ; break; case GT: myop=LT; break; case GTE: myop=LTE; break; case LT: myop=GT; break; case LTE: myop=GTE; break; case NE: myop=NE; break; } RID *matchingOuterRids; // actually a variable length array int outerRidCnt; char* innerJoinAttrPtr; joinHashTbl* joinHT; bool endOfOuter = false; while (!endOfOuter) { // allocate and initialize the hash table joinHT = new joinHashTbl ((int) (outerTupsPerBlock * 1.15), attrDesc1); int i=0; // process the next block of the other table while (i < outerTupsPerBlock) { // get next outer tuple if (outerScan.scanNext(outerRID) == OK) { i++; // fetch outer tuple status = outerScan.getRecord(outerRec); ASSERT(status == OK); // insert (RID, joinAttrValue) into hash table. The hashtable code // actually does the job of extracting the join attribute value from tuple) status = joinHT->insert(outerRID, (char *) outerRec.data); ASSERT(status == OK); } else { endOfOuter = true; break; } } //printf("processed next block of outer with %d tuples. start scan of inner\n", i); // scan inner table HeapFileScan innerScan(string(attrDesc2.relName), status); if (status != OK) return status; status = innerScan.startScan(0, 0, STRING, NULL, EQ); if (status != OK) return status; RID innerRID; while (innerScan.scanNext(innerRID) == OK) { Record innerRec; status = innerScan.getRecord(innerRec); ASSERT(status == OK); innerJoinAttrPtr = ((char *)innerRec.data) + attrDesc2.attrOffset, // get matching outer rids outerRidCnt = 0; status = joinHT->lookup(innerJoinAttrPtr, outerRidCnt, matchingOuterRids); ASSERT(status == OK); if (outerRidCnt > 0) { // now do the join between the inner tuple and the matching outer tuples (if any) for (int j=0; j < outerRidCnt; j++) { // get each next matching outer record // printf("rid of next matching outer tuple is %d.%d\n", // matchingOuterRids[j].pageNo, matchingOuterRids[j].slotNo); status = outerTable.getRecord(matchingOuterRids[j], outerRec); ASSERT(status == OK); //printf("retrieved matching outer tuple from buffer pool\n"); // produce an output tuple. copy data into the output record // from both tuples int outputOffset = 0; for (int k = 0; k < projCnt; k++) { // copy the data out of the proper input file (inner vs. outer) if (0 == strcmp(attrDescArray[k].relName, attrDesc1.relName)) { memcpy(outputData + outputOffset, (char *)outerRec.data + attrDescArray[k].attrOffset, attrDescArray[k].attrLen); } else // get data from the inner record { memcpy(outputData + outputOffset, (char *)innerRec.data + attrDescArray[k].attrOffset, attrDescArray[k].attrLen); } outputOffset += attrDescArray[k].attrLen; } // insert the output tuple into the output relation RID outRID; status = resultRel.insertRecord(outputRec, outRID); ASSERT(status == OK); resultTupCnt++; } free(matchingOuterRids); // release rid vector } } // end scan inner // all done with current block of the outer table innerScan.endScan(); // close the current scan on the inner delete joinHT; // delete the join hashtable } // end scan outer outerScan.endScan(); printf("blockNL Hash join produced %d result tuples \n", resultTupCnt); return OK; }
// implementation of nested loops join goes here const Status QU_NL_Join(const string & result, const int projCnt, const attrInfo projNames[], const attrInfo *attr1, const Operator op, const attrInfo *attr2) { Status status; int resultTupCnt = 0; if (attr1->attrType != attr2->attrType || attr1->attrLen != attr2->attrLen) { return ATTRTYPEMISMATCH; } // go through the projection list and look up each in the // attr cat to get an AttrDesc structure (for offset, length, etc) AttrDesc attrDescArray[projCnt]; for (int i = 0; i < projCnt; i++) { Status status = attrCat->getInfo(projNames[i].relName, projNames[i].attrName, attrDescArray[i]); if (status != OK) { return status; } } // get AttrDesc structure for the first join attribute AttrDesc attrDesc1; status = attrCat->getInfo(attr1->relName, attr1->attrName, attrDesc1); if (status != OK) { return status; } // get AttrDesc structure for the first join attribute AttrDesc attrDesc2; status = attrCat->getInfo(attr2->relName, attr2->attrName, attrDesc2); if (status != OK) { return status; } // get output record length from attrdesc structures int reclen = 0; for (int i = 0; i < projCnt; i++) { reclen += attrDescArray[i].attrLen; } // open the result table InsertFileScan resultRel(result, status); if (status != OK) { return status; } char outputData[reclen]; Record outputRec; outputRec.data = (void *) outputData; outputRec.length = reclen; // start scan on outer table HeapFileScan outerScan(string(attrDesc1.relName), status); if (status != OK) { return status; } status = outerScan.startScan(0, 0, STRING, NULL, EQ); if (status != OK) { return status; } // scan outer table RID outerRID; Record outerRec; Operator myop; switch(op) { case EQ: myop=EQ; break; case GT: myop=LT; break; case GTE: myop=LTE; break; case LT: myop=GT; break; case LTE: myop=GTE; break; case NE: myop=NE; break; } while (outerScan.scanNext(outerRID) == OK) { status = outerScan.getRecord(outerRec); ASSERT(status == OK); // scan inner table HeapFileScan innerScan(string(attrDesc2.relName), status); if (status != OK) { return status; } status = innerScan.startScan(attrDesc2.attrOffset, attrDesc2.attrLen, (Datatype) attrDesc2.attrType, ((char *)outerRec.data) + attrDesc1.attrOffset, myop); if (status != OK) { return status; } RID innerRID; while (innerScan.scanNext(innerRID) == OK) { Record innerRec; status = innerScan.getRecord(innerRec); ASSERT(status == OK); // we have a match, copy data into the output record int outputOffset = 0; for (int i = 0; i < projCnt; i++) { // copy the data out of the proper input file (inner vs. outer) if (0 == strcmp(attrDescArray[i].relName, attrDesc1.relName)) { memcpy(outputData + outputOffset, (char *)outerRec.data + attrDescArray[i].attrOffset, attrDescArray[i].attrLen); } else // get data from the inner record { memcpy(outputData + outputOffset, (char *)innerRec.data + attrDescArray[i].attrOffset, attrDescArray[i].attrLen); } outputOffset += attrDescArray[i].attrLen; } // end copy attrs // add the new record to the output relation RID outRID; status = resultRel.insertRecord(outputRec, outRID); ASSERT(status == OK); resultTupCnt++; } // end scan inner } // end scan outer printf("tuple nested join produced %d result tuples \n", resultTupCnt); return OK; }
// implementation of sort merge join goes here const Status QU_SM_Join(const string & result, const int projCnt, const attrInfo projNames[], const attrInfo *attr1, const Operator op, const attrInfo *attr2) { Status status; if (attr1->attrType != attr2->attrType || attr1->attrLen != attr2->attrLen) { return ATTRTYPEMISMATCH; } // go through the projection list and look up each in the // attr cat to get an AttrDesc structure (for offset, length, etc) AttrDesc attrDescArray[projCnt]; for (int i = 0; i < projCnt; i++) { Status status = attrCat->getInfo(projNames[i].relName, projNames[i].attrName, attrDescArray[i]); if (status != OK) { return status; } } // get AttrDesc structure for the first join attribute AttrDesc attrDesc1; status = attrCat->getInfo(attr1->relName, attr1->attrName, attrDesc1); if (status != OK) { return status; } // get AttrDesc structure for the first join attribute AttrDesc attrDesc2; status = attrCat->getInfo(attr2->relName, attr2->attrName, attrDesc2); if (status != OK) { return status; } // get output record length from attrdesc structures int reclen = 0; for (int i = 0; i < projCnt; i++) { reclen += attrDescArray[i].attrLen; } // open sorted scans on both input files SortedFile sorted1(attrDesc1.relName, attrDesc1.attrOffset, attrDesc1.attrLen, (Datatype) attrDesc1.attrType, 1000, status); if (status != OK) { return status; } SortedFile sorted2(attrDesc2.relName, attrDesc2.attrOffset, attrDesc2.attrLen, (Datatype) attrDesc2.attrType, 1000, status); if (status != OK) { return status; } sorted2.setMark(); // prepare output buffer char outputData[reclen]; Record outputRec; outputRec.data = (void *) outputData; outputRec.length = reclen; // open output relation file InsertFileScan resultRel(result, status); if (status != OK) { return status; } // scan the outer file bool firstTime = true; bool endOfInner = false; Record outerRec; while (sorted1.next(outerRec) == OK) { if (!firstTime) { // go back sorted2.gotoMark(); endOfInner = false; } else firstTime = false; // go forward until we get a match Record innerRec; bool done = false; while (!done) { if (OK != sorted2.next(innerRec)) { endOfInner = true; break; } if (matchRec(outerRec, innerRec, attrDesc1, attrDesc2) <= 0) done = true; } sorted2.setMark(); while (! endOfInner && matchRec(outerRec, innerRec, attrDesc1, attrDesc2) == 0) { // we have a match, copy data into the output record int outputOffset = 0; for (int i = 0; i < projCnt; i++) { // copy the data out of the proper input file (inner vs. outer) if (0 == strcmp(attrDescArray[i].relName, attrDesc1.relName)) { memcpy(outputData + outputOffset, (char *)outerRec.data + attrDescArray[i].attrOffset, attrDescArray[i].attrLen); } else // get data from the inner record { memcpy(outputData + outputOffset, (char *)innerRec.data + attrDescArray[i].attrOffset, attrDescArray[i].attrLen); } outputOffset += attrDescArray[i].attrLen; } // end copy attrs // add the new record to the output relation RID outRID; status = resultRel.insertRecord(outputRec, outRID); ASSERT(status == OK); // scan to the next entry in the inner sorted file if (OK != sorted2.next(innerRec)) endOfInner = true; } // end scan inner } // end scan outer return OK; }