bool InMemorySort(int start_index, int num_blocks, vector<string> field_names) {// InMemorySort for 2-pass algorithm //count of blocks to sort in main memory vector<Tuple> mem_tuples = mem.getTuples(start_index, num_blocks); sort(mem_tuples.begin(), mem_tuples.end(), Tuple_Comparison(field_names)); for(int index=start_index; index< num_blocks; index++) { mem.getBlock(index)->clear(); // clearing the blocks } bool success = mem.setTuples(start_index, mem_tuples); if(!success) cout<<"Error in InMemorySort"<<endl; return success; }
//Main Memory Sorting //In memory sorting operation based on tuples bool MMSorting(int startMemIndex, int noOfBlocks, vector<string> fieldNames) { //total no of blocks to sort vector<Tuple> tuplesInMem = mem.getTuples(startMemIndex, noOfBlocks); sort(tuplesInMem.begin(), tuplesInMem.end(), CompareTuple(fieldNames)); //clear out any data in the given range of memory block for(int i=startMemIndex; i< noOfBlocks; i++) { mem.getBlock(i)->clear(); } bool success = mem.setTuples(startMemIndex, tuplesInMem); if(!success) cout<<"Error in MMSorting"<<endl; return success; }
string crossJoin(vector<string> attributes, string tableName1, string tableName2, string whereCondition, bool multi) { string small,big; bool proj = false; if(schemaManager.getRelation(tableName1)->getNumOfBlocks()<=schemaManager.getRelation(tableName2)->getNumOfBlocks()) { small = tableName1; big = tableName2; } else { small=tableName2; big=tableName1; } Schema schema1 = schemaManager.getSchema(small); Schema schema2 = schemaManager.getSchema(big); vector<string> fieldNames; vector<enum FIELD_TYPE> fieldTypes; if(!multi) { for(int i=0;i<schema1.getNumOfFields();i++) { fieldNames.push_back(small+"."+schema1.getFieldName(i)); fieldTypes.push_back(schema1.getFieldType(i)); } } else { for(int i=0;i<schema1.getNumOfFields();i++) { fieldNames.push_back(schema1.getFieldName(i)); fieldTypes.push_back(schema1.getFieldType(i)); } } for(int i=0;i<schema2.getNumOfFields();i++) { fieldNames.push_back(big+"."+schema2.getFieldName(i)); fieldTypes.push_back(schema2.getFieldType(i)); } Schema schema(fieldNames,fieldTypes); Relation *relation = schemaManager.createRelation(big+"_join",schema); Relation *relation1 = schemaManager.getRelation(small); Relation *relation2 = schemaManager.getRelation(big); int size1 = relation1->getNumOfBlocks(), size2 = relation2->getNumOfBlocks(); if(!((attributes.size()==1 && attributes[0]=="*") || multi)) { vector<string> fieldNames1; vector<enum FIELD_TYPE> fieldTypes1; for(int i=0;i<attributes.size();i++) { int temp = schema.getFieldOffset(attributes[i]); fieldNames1.push_back(schema.getFieldName(temp)); fieldTypes1.push_back(schema.getFieldType(attributes[i])); } Schema schema1(fieldNames1, fieldTypes1); proj = true; Relation *relationp = schemaManager.createRelation(big+"_joinp", schema1); } if(size1<=10) { relation1->getBlocks(0,0,size1); vector<Tuple> tuples = mainMemory.getTuples(0,size1); for(int x=0;x<tuples.size();x++) { for(int i=0;i<size2;i++) { relation2->getBlock(i,1); Block *block = mainMemory.getBlock(1); for(int j=0;j<block->getNumTuples();j++) { Tuple tuple2 = block->getTuple(j); join(tuples[x], tuple2, small, big, whereCondition, multi, attributes); } } } } else { for(int x=0;x<size1;x++) { relation1->getBlock(x,0); Block *block0 = mainMemory.getBlock(0); for(int y=0;y<block0->getNumTuples();y++) { Tuple tuple1 = block0->getTuple(y); for(int i=0;i<size2;i++) { relation2->getBlock(i,1); Block *block = mainMemory.getBlock(1); for(int j=0;j<block->getNumTuples();j++) { Tuple tuple2 = block->getTuple(j); join(tuple1, tuple2, small, big, whereCondition, multi, attributes); } } } } } string rt = big+"_join"; if(proj) { schemaManager.deleteRelation(rt); rt = big+"_joinp"; } return rt; }
string distinct(string tableName) { Relation *relation = schemaManager.getRelation(tableName); Schema schema = relation->getSchema(); int size = relation->getNumOfBlocks(); vector<Tuple> tuples; bool flag = true; //one-pass if(size<=10) { relation->getBlocks(0,0,size); tuples = mainMemory.getTuples(0,size); tuples = getDistinctTuples(tuples); Relation *relation1 = schemaManager.createRelation(tableName+"_distinct",schema); insertTuples(tableName+"_distinct",tuples); } //two pass else { int index = 0, loadSize=10; while(size>0) { relation->getBlocks(index,0,loadSize); for(int i=0;i<loadSize;i++) { Block *block = mainMemory.getBlock(i); for(int j=0;j<block->getNumTuples();j++) { tuples.push_back(block->getTuple(j)); } } tuples = getDistinctTuples(tuples); //partition(tuples, 0, tuples.size()-1); if(flag) { Relation *relation2= schemaManager.createRelation(tableName+"_dis", schema); flag = false; } insertTuples(tableName+"_dis", tuples); Relation *relation2 = schemaManager.getRelation(tableName+"_dis"); tuples.clear(); index = index+10; size = size-10; if(size<10) loadSize = size; } if(size<=100) { Relation *relation2 = schemaManager.createRelation(tableName+"_distinct", schema); relation = schemaManager.getRelation(tableName+"_dis"); int buckets = relation->getNumOfBlocks()/10; vector<Tuple> tuples; for(int i=0;i<10;i++) { for(int j=0;j<buckets;j++) { if(j*10+i > relation->getNumOfBlocks()) break; relation->getBlock(i+10*j,j); Block *block = mainMemory.getBlock(j); for(int k=0;k<block->getNumTuples();k++) { tuples.push_back(block->getTuple(k)); } } } tuples = getDistinctTuples(tuples); insertTuples(tableName+"_distinct", tuples); tuples.clear(); schemaManager.deleteRelation(tableName+"_dis"); } else cerr<<"Table size exceeds the limit size(mainMemory)^2"<<endl; } return tableName+"_distinct"; }