/* void addTupleToBlock(Tuple tuple, MainMemory &mem) { //===================Block============================= cout << "===================Block=============================" << endl; // Set up a block in the memory // cout << "Clear the memory block 0" << endl; // Block* block_ptr=mem.getBlock(0); //access to memory block 0 // block_ptr->clear(); //clear the block // A block stores at most 2 tuples in this case // -----------first tuple----------- cout << "Set the tuple at offset 0 of the memory block 0" << endl; block_ptr->setTuple(0,tuple); // You can also use appendTuple() cout << "Now the memory block 0 contains:" << endl; cout << *block_ptr << endl; cout << "The block is full? " << (block_ptr->isFull()==1?"true":"false") << endl; cout << "The block currently has " << block_ptr->getNumTuples() << " tuples" << endl; cout << "The tuple at offset 0 of the block is:" << endl; cout << block_ptr->getTuple(0) << endl << endl; return; } */ void processInsert(string line, vector<string> cmdStr, SchemaManager schema_manager, MainMemory &mem) { string tableName = cmdStr[2]; Relation* relation_ptr = schema_manager.getRelation(tableName); cout << "Inside the INSERT function" << endl; int memory_block_index=0; //====================Tuple============================= cout << "====================Tuple=============================" << endl; // Set up the first tuple Tuple tuple = relation_ptr->createTuple(); //The only way to create a tuple is to call "Relation" printRelation(relation_ptr); vector<string> insertStr = splitString(line, "\()"); vector<string> attr = splitString(insertStr[1], ", "); vector<string> val = splitString(insertStr[3], ", "); /* TODO commented unordered insert giving errors for (int i = 0; i < attr.size(); i++) { if (tuple.getSchema().getFieldType(i)==INT){ cout << "Errored?? " << endl; tuple.setField(attr[i], atoi(val[i].c_str())); } else{ tuple.setField(attr[i], val[i]); } } */ for (int i = 0; i < attr.size(); i++) { //cout << "HAHAHHAHHA " << tuple.getField(attr[i])[0] << endl << endl; if (tuple.getSchema().getFieldType(attr[i])==INT){ //if (tuple.getField(attr[i])==INT){ cout << "Errored?? " << endl; tuple.setField(attr[i], atoi(val[i].c_str())); } else{ tuple.setField(attr[i], val[i]); } } // TODO to send file //see that tuple was properly filled printTuple(tuple); cout << "My test function on blocks " << endl; memory_block_index = findBlockForTuple(mem); appendTupleToRelation(relation_ptr, mem, memory_block_index, tuple); //addTupleToBlock(tuple, mem); cout << "After insertion of tuble now the reation stuff is " << endl; printRelation(relation_ptr); // Now write the tuple in a Disk Block. /* vector<string>::iterator it; cout << "Printing new things" << endl; for(it=str.begin(); it!=str.end(); ++it) { cout << *it << endl; } */ return; }
void insertIntoTable(string tableName, vector<string> fieldNames, vector<string> fieldValues) { if(!schemaManager.relationExists(tableName)) { cout<<"Illegal Tablename"<<endl; return; } Relation *relation = schemaManager.getRelation(tableName); Tuple tuple = relation->createTuple(); Schema schema = relation->getSchema(); vector<string>::iterator it,it1; for(it = fieldNames.begin(),it1 = fieldValues.begin();it!=fieldNames.end();it++, it1++) { string str=*it,str1=*it1; str = removeSpaces(str); int type = schema.getFieldType(str); if(!type) { str1 = removeSpaces(str1); if(isNumber(str1)) { tuple.setField(str,stoi(str1)); } else { cout<<"Data type is not supported\n"; return; } } else { regex exp("\\ *\"(.*)\""); cmatch match; if(regex_match(str1.c_str(),match,exp)) { str1 = match[1]; if(str1.length()>20) { cout<<"Data type is not supported\n"; return; } else tuple.setField(str,str1); } else { cout<<"Data type is not supported\n"; return; } } } insertTuple(tableName, tuple); cout<<disk.getDiskIOs()<<endl; }
void processInsert(string line, vector<string> cmdStr, SchemaManager schema_manager, MainMemory &mem) { string tableName = cmdStr[2]; Relation* relation_ptr = schema_manager.getRelation(tableName); cout << "Inside the INSERT function" << endl; //====================Tuple============================= cout << "====================Tuple=============================" << endl; // Set up the first tuple Tuple tuple = relation_ptr->createTuple(); //The only way to create a tuple is to call "Relation" //printRelation(relation_ptr); vector<string> insertStr = splitString(line, "\()"); vector<string> attr = splitString(insertStr[1], ", "); vector<string> val = splitString(insertStr[3], ", "); for (int i = 0; i < attr.size(); i++) { if (tuple.getSchema().getFieldType(i)==INT){ tuple.setField(attr[i], atoi(val[i].c_str())); } else{ tuple.setField(attr[i], val[i]); } } //see that tuple was properly filled printTuple(tuple); // Now write the tuple in a Disk Block. /* vector<string>::iterator it; cout << "Printing new things" << endl; for(it=str.begin(); it!=str.end(); ++it) { cout << *it << endl; } */ return; }
void join(Tuple tuple1, Tuple tuple2, string tableName1, string tableName2, string whereCondition, bool multi, vector<string> attributes) { Relation *relation = schemaManager.getRelation(tableName2+"_join"); Tuple tuple =relation->createTuple(); if(!multi) { for(int i=0;i<tuple1.getNumOfFields();i++) { if(tuple1.getSchema().getFieldType(i) == INT) tuple.setField(tableName1+"."+tuple1.getSchema().getFieldName(i), tuple1.getField(i).integer); else tuple.setField(tableName1+"."+tuple1.getSchema().getFieldName(i), *(tuple1.getField(i).str) ); } } else { for(int i=0;i<tuple1.getNumOfFields();i++) { if(tuple1.getSchema().getFieldType(i) == INT) tuple.setField(tuple1.getSchema().getFieldName(i), tuple1.getField(i).integer); else tuple.setField(tuple1.getSchema().getFieldName(i), *(tuple1.getField(i).str) ); } } for(int i=0;i<tuple2.getNumOfFields();i++) { if(tuple2.getSchema().getFieldType(i) == INT) tuple.setField(tableName2+"."+tuple2.getSchema().getFieldName(i), tuple2.getField(i).integer); else tuple.setField(tableName2+"."+tuple2.getSchema().getFieldName(i), *(tuple2.getField(i).str) ); } if((attributes.size()==1 && attributes[0]=="*") || multi) { if(whereConditionEvaluator(whereCondition, tuple)) insertTuple(tableName2+"_join", tuple); } else { Relation *relation1 = schemaManager.getRelation(tableName2+"_joinp"); Tuple tuplep = relation1->createTuple(); for(int i=0;i<attributes.size();i++) { if(tuplep.getSchema().getFieldType(attributes[i]) == INT) tuplep.setField(attributes[i], tuple.getField(attributes[i]).integer); else tuplep.setField(attributes[i], *(tuple.getField(attributes[i]).str)); } if(whereConditionEvaluator(whereCondition, tuple)) insertTuple(tableName2+"_joinp", tuplep); } }
Relation *twoPassNaturalJoin(Relation *rptr1, Relation *rptr2, string r3, vector<string> fields1, vector<string> fields2, vector<string> op) { int i1,j1,k,l,m,c1,c2; i1=j1=k=l=m=c1=c2 = 0; Schema s1, s2; bool flag = 1; vector <string> fields_r1, fields_r2, fields_r3; vector<enum FIELD_TYPE> field_type1, field_type2, types_r3; Block *block_ptr1, *block_ptr2; //Relation *rptr1, *rptr2; vector<string> values_r3; int num_blocks_r1, num_blocks_r2, max_blocks; num_blocks_r1 = rptr1->getNumOfBlocks(); num_blocks_r2 = rptr2->getNumOfBlocks(); max_blocks = mem.getMemorySize(); s1 = rptr1->getSchema(); s2 = rptr2->getSchema(); fields_r1 = s1.getFieldNames(); fields_r2 = s2.getFieldNames(); for(int i=0; i< fields1.size(); i++) field_type1.push_back(s1.getFieldType(fields1[i])); //Create sorted sublists of size <M sortRelation(rptr1, s1, fields1, field_type1); for(int i=0; i< fields2.size(); i++) field_type2.push_back(s2.getFieldType(fields2[i])); //Create sorted sublists of size <M sortRelation(rptr2, s2, fields2, field_type2); for(int i=0; i<fields_r1.size(); i++){ fields_r3.push_back(rptr1->getRelationName() + "." + fields_r1[i]); types_r3.push_back(s1.getFieldType(fields_r1[i])); } //Find common attributes of both relations for(int i=0; i<fields_r2.size(); i++) { flag = 1; for(int k=0; k<op.size(); k++) { if(op[k] == "=" && fields_r2[i] == fields2[k] ) { flag = 0; break; } } if(flag) { fields_r3.push_back(rptr2->getRelationName() + "." + fields_r2[i]); types_r3.push_back(s2.getFieldType(fields_r2[i])); } } Schema s3(fields_r3,types_r3); //string r3 = rptr1->getRelationName() + "." + rptr2->getRelationName() + ".NaturalJoin"; Relation *rptr3 = schema_manager.createRelation(r3, s3); if(disp) { //displayRelationInfo(rptr3); //cout<<s3<<endl; cout<<*rptr1<<endl; cout<<*rptr2<<endl; } int num_sublists_r1 = num_blocks_r1/(max_blocks - 1)+((num_blocks_r1%(max_blocks - 1) > 0)?1:0); int num_sublists_r2 = num_blocks_r2/(max_blocks - 1)+((num_blocks_r2%(max_blocks - 1) > 0)?1:0); vector<int> block_used_r1, block_used_r2, disk_block_index_r1, disk_block_index_r2; if(disp) { cout<<"number of sublists in r1: "<<num_sublists_r1<<endl; cout<<"number of sublists in r2: "<<num_sublists_r2<<endl; } //Get one block from each sublist into main memory for(k = 0; k < num_sublists_r1; k++) { if(disp) { cout<<"k: "<<k<<endl; cout<<"disk block index: "<<k*(max_blocks-1)<<endl; } block_ptr1 = mem.getBlock(k); rptr1->getBlock(k*(max_blocks-1), k); disk_block_index_r1.push_back(1); block_used_r1.push_back(block_ptr1->getNumTuples()); //block_ptr1->clear(); } for(l = 0; l < num_sublists_r2; l++) { if(disp) { cout<<"l: "<<l<<endl; cout<<"disk block index: "<<l*(max_blocks - 1)<<endl; } block_ptr2 = mem.getBlock(l+num_sublists_r1); rptr2->getBlock(l*(max_blocks-1), l + num_sublists_r1); disk_block_index_r2.push_back(1); block_used_r2.push_back(block_ptr2->getNumTuples()); //block_ptr2->clear(); } if(disp) { cout<<"Block Used 1: "<<block_used_r1.size()<<endl; cout<<"Block Used 2: "<<block_used_r2.size()<<endl; } vector<Tuple> tuples_r3; //Read one block of relation into Main memory and combine each tuple in the block //with all the tuples in the other //Block *block_r1 = mem.getBlock(max_blocks - 2); Block *block_r3 = mem.getBlock(max_blocks - 1); block_r3->clear(); i1 = j1 = 1; bool done = false; int block_id, tuple_offset; while(!done) { vector<Tuple> tuples_r1 = mem.getTuples(0, num_sublists_r1); vector<Tuple> tuples_r2 = mem.getTuples(num_sublists_r1, num_sublists_r2); Tuple tuple = rptr3->createTuple(); int rel_no = 0; int id = findSmallest(tuples_r1, tuples_r2, fields1, fields2, rel_no); if(disp) cout<<"id: "<<id<<"rel: "<<rel_no<<endl; if(rel_no == 1) { if(disp) cout<<"Smallest Tuple: "<<tuples_r1[id]<<endl; for(m = 0; m < tuples_r2.size(); m++) { if(isJoinTuple(tuples_r1[id], tuples_r2[m], fields1, fields2, op)) { if(disp) { cout<<"\n********************\nJoining:"<<endl; cout<<tuples_r1[id]<<endl; cout<<tuples_r2[m]<<endl; } for(int l =0; l < fields_r1.size(); l++) { if(s3.getFieldType(rptr1->getRelationName() + "." + fields_r1[l]) == INT) tuple.setField(rptr1->getRelationName() + "." + fields_r1[l],tuples_r1[id].getField(fields_r1[l]).integer); else tuple.setField(rptr1->getRelationName() + "." + fields_r1[l],*(tuples_r1[id].getField(fields_r1[l]).str)); } for(int l =0; l < fields_r2.size(); l++) { int flag = 1; for(int n = 0; n < op.size(); n++) { if(op[n] == "=" && fields_r2[l].compare(fields2[n]) == 0) { flag = 0; break; } } if(flag) { if(s3.getFieldType(rptr2->getRelationName() + "." + fields_r2[l]) == INT) tuple.setField(rptr2->getRelationName() + "." + fields_r2[l],tuples_r2[m].getField(fields_r2[l]).integer); else tuple.setField(rptr2->getRelationName() + "." + fields_r2[l],*(tuples_r2[m].getField(fields_r2[l]).str)); } } if(disp) cout<<"New Tuple:"<<tuple<<endl; tuples_r3.push_back(tuple); block_r3->appendTuple(tuple); //If main memory block is full, write it to disk and clear that block if(tuples_r3.size() == s3.getTuplesPerBlock()) { rptr3->setBlock(rptr3->getNumOfBlocks(),max_blocks - 1); tuples_r3.clear(); block_r3->clear(); } } } //block_ptr1->clear(); if(s1.getTuplesPerBlock() == 1) { block_id = id; tuple_offset = 0; } else { block_id = id/num_sublists_r1; tuple_offset = id%num_sublists_r1; } block_used_r1[block_id]--; block_ptr1 = mem.getBlock(block_id); block_ptr1->nullTuple(tuple_offset); //tuples_r1[id].null(); //If a particular block of tuples has been used in the main memory, replenish from disk if(block_used_r1[block_id] == 0) { if(disp) cout<<"Block list consumed: "<<block_id<<endl; //If we have used up all the blocks in this sublist if(exhaustedAllSublists(block_used_r1)) done = true; else { cout<<disk_block_index_r1[block_id]<<endl; if(disk_block_index_r1[block_id] == ((max_blocks-1 < num_blocks_r1) ? (max_blocks - 1) : num_blocks_r1)) { continue; } else { block_ptr1->clear(); block_ptr1 = mem.getBlock(block_id); rptr1->getBlock(disk_block_index_r1[block_id] + (block_id)*(max_blocks-1), block_id); block_used_r1[block_id] = block_ptr1->getNumTuples(); ++disk_block_index_r1[block_id]; } } } } else if(rel_no == 2) { if(disp) cout<<"Smallest Tuple: "<<tuples_r2[id]<<endl; for(m = 0; m < tuples_r1.size(); m++) { if(isJoinTuple(tuples_r1[m], tuples_r2[id], fields1, fields2, op)) { if(disp) { cout<<"\n********************\nJoining:"<<endl; cout<<tuples_r1[m]<<endl; cout<<tuples_r2[id]<<endl; } for(int l =0; l < fields_r1.size(); l++) { if(s3.getFieldType(rptr1->getRelationName() + "." + fields_r1[l]) == INT) tuple.setField(rptr1->getRelationName() + "." + fields_r1[l],tuples_r1[m].getField(fields_r1[l]).integer); else tuple.setField(rptr1->getRelationName() + "." + fields_r1[l],*(tuples_r1[m].getField(fields_r1[l]).str)); } for(int l =0; l < fields_r2.size(); l++) { int flag = 1; for(int n = 0; n < op.size(); n++) { if(op[n] == "=" && fields_r2[l].compare(fields2[n]) == 0) { flag = 0; break; } } if(flag) { if(s3.getFieldType(rptr2->getRelationName() + "." + fields_r2[l]) == INT) tuple.setField(rptr2->getRelationName() + "." + fields_r2[l],tuples_r2[id].getField(fields_r2[l]).integer); else tuple.setField(rptr2->getRelationName() + "." + fields_r2[l],*(tuples_r2[id].getField(fields_r2[l]).str)); } } if(disp) cout<<"New Tuple:"<<tuple<<endl; tuples_r3.push_back(tuple); block_r3->appendTuple(tuple); //If main memory block is full, write it to disk and clear that block if(tuples_r3.size() == s3.getTuplesPerBlock()) { rptr3->setBlock(rptr3->getNumOfBlocks(),max_blocks - 1); tuples_r3.clear(); block_r3->clear(); } } } //block_ptr2->clear(); if(s2.getTuplesPerBlock() == 1) { block_id = id; tuple_offset = 0; } else { block_id = id/num_sublists_r2; tuple_offset = id%num_sublists_r2; } block_used_r2[block_id]--; block_ptr2 = mem.getBlock(num_sublists_r1 + block_id); block_ptr2->nullTuple(tuple_offset); //If a particular block of tuples has been used in the main memory, replenish from disk if(block_used_r2[block_id] == 0) { if(disp) cout<<"Block list consumed: "<<block_id<<endl; //If we have used up all the blocks in this sublist if(exhaustedAllSublists(block_used_r2)) done = true; else { cout<<disk_block_index_r2[block_id]<<endl; if(disk_block_index_r2[block_id] == ((max_blocks-1<< num_blocks_r2) ? (max_blocks - 1) : num_blocks_r2)) { continue; } else { block_ptr2->clear(); block_ptr2 = mem.getBlock(num_sublists_r1 + block_id); rptr2->getBlock(disk_block_index_r2[block_id] + (block_id)*(max_blocks-1), num_sublists_r1 + block_id); block_used_r2[block_id] = block_ptr2->getNumTuples(); ++disk_block_index_r2[block_id]; } } } } if(disp) { cout<<"#r3: "<<rptr3->getNumOfTuples()<<endl; //cout<<*rptr3<<endl; //cin.get(); } } //For the last tuple which might not be full, need to write that to disk too if(tuples_r3.size() !=0) rptr3->setBlock(rptr3->getNumOfBlocks(),max_blocks - 1); if(disp) { cout<<*rptr3<<endl; } return rptr3; }
Relation *onePassNaturalJoin(Relation *rptr1, Relation *rptr2, string r3, vector<string> fields1, vector<string> fields2, vector<string> op, string r1_name, string r2_name) { Schema s1, s2; bool flag = 1; vector <string> fields_r1, fields_r2, fields_r3; vector <enum FIELD_TYPE> types_r3; //Relation *rptr1, *rptr2; vector<string> values_r3; int num_blocks_r1, num_blocks_r2; num_blocks_r1 = rptr1->getNumOfBlocks(); num_blocks_r2 = rptr2->getNumOfBlocks(); s1 = rptr1->getSchema(); s2 = rptr2->getSchema(); fields_r1 = s1.getFieldNames(); fields_r2 = s2.getFieldNames(); for(int i=0; i<fields_r1.size(); i++){ fields_r3.push_back(r1_name + "." + fields_r1[i]); types_r3.push_back(s1.getFieldType(fields_r1[i])); } //Find common attributes of both relations for(int i=0; i<fields_r2.size(); i++) { flag = 1; for(int k=0; k<op.size(); k++) { if(op[k] == "=" && fields_r2[i] == fields2[k] ) { flag = 0; break; } } if(flag) { fields_r3.push_back(r2_name + "." + fields_r2[i]); types_r3.push_back(s2.getFieldType(fields_r2[i])); } } Schema s3(fields_r3,types_r3); //string r3 = rptr1->getRelationName() + "." + rptr2->getRelationName() + ".NaturalJoin"; Relation *rptr3 = schema_manager.createRelation(r3, s3); if(disp) { displayRelationInfo(rptr3); cout<<s3<<endl; } //Get tuples of smaller relation into Main Memory, assuming all can fit in if( num_blocks_r1 > num_blocks_r2 ) { rptr2->getBlocks(0, 0, num_blocks_r2); vector<Tuple> tuples_r3, tuples_r2 = mem.getTuples(0, num_blocks_r2); //Read one block of relation into Main memory and combine each tuple in the block //with all the tuples in the other Block *block_r1 = mem.getBlock(num_blocks_r2); Block *block_r3 = mem.getBlock(num_blocks_r2 + 1); block_r3->clear(); for(int i=0; i<num_blocks_r1; i++) { block_r1->clear(); rptr1->getBlock(i, num_blocks_r2); vector<Tuple> tuples_r1 = block_r1->getTuples(); for(int j =0; j < tuples_r1.size(); j++) { //Check for holes if(tuples_r1[j].isNull()) continue; for(int k=0; k < tuples_r2.size(); k++) { //Check for holes if(tuples_r2[k].isNull()) continue; Tuple tuple = rptr3->createTuple(); if(isJoinTuple(tuples_r1[j], tuples_r2[k], fields1, fields2, op)) { for(int l =0; l < fields_r1.size(); l++) { if(s3.getFieldType(r1_name + "." + fields_r1[l]) == INT) tuple.setField(r1_name + "." + fields_r1[l],tuples_r1[j].getField(fields_r1[l]).integer); else tuple.setField(r1_name + "." + fields_r1[l],*(tuples_r1[j].getField(fields_r1[l]).str)); } for(int l =0; l < fields_r2.size(); l++) { flag = 1; for(int m = 0; m < op.size(); m++) { if(op[m] == "=" && fields_r2[l].compare(fields2[m]) == 0) { flag = 0; break; } } if(flag) { if(s3.getFieldType(r2_name + "." + fields_r2[l]) == INT) tuple.setField(r2_name + "." + fields_r2[l],tuples_r2[k].getField(fields_r2[l]).integer); else tuple.setField(r2_name + "." + fields_r2[l],*(tuples_r2[k].getField(fields_r2[l]).str)); } } tuples_r3.push_back(tuple); block_r3->appendTuple(tuple); //If main memory block is full, write it to disk and clear that block if(tuples_r3.size() == s3.getTuplesPerBlock()) { rptr3->setBlock(rptr3->getNumOfBlocks(),num_blocks_r2 + 1); tuples_r3.clear(); block_r3->clear(); } } } } } //For the last tuple which might not be full, need to write that to disk too if(tuples_r3.size() !=0) rptr3->setBlock(rptr3->getNumOfBlocks(),num_blocks_r2 + 1); if(disp) { cout<<*rptr3<<endl; } } else { rptr1->getBlocks(0, 0, num_blocks_r1); vector<Tuple> tuples_r3, tuples_r1 = mem.getTuples(0, num_blocks_r1); //Read one block of rptr1 into Main memory and combine each tuple in the block //with all the tuples in rptr2 Block *block_r2 = mem.getBlock(num_blocks_r1); Block *block_r3 = mem.getBlock(num_blocks_r1 + 1); block_r3->clear(); for(int i=0; i<num_blocks_r2; i++) { block_r2->clear(); rptr2->getBlock(i, num_blocks_r1); vector<Tuple> tuples_r2 = block_r2->getTuples(); for(int j =0; j < tuples_r2.size(); j++) { //Check for holes if(tuples_r2[j].isNull()) continue; for(int k=0; k < tuples_r1.size(); k++) { //Check for holes if(tuples_r1[k].isNull()) continue; Tuple tuple = rptr3->createTuple(); if(isJoinTuple(tuples_r1[k], tuples_r2[j], fields1, fields2, op)) { for(int l =0; l < fields_r1.size(); l++) { if(s3.getFieldType(r1_name + "." + fields_r1[l]) == INT) tuple.setField(r1_name + "." + fields_r1[l],tuples_r1[k].getField(fields_r1[l]).integer); else tuple.setField(r1_name + "." + fields_r1[l],*(tuples_r1[k].getField(fields_r1[l]).str)); } for(int l =0; l < fields_r2.size(); l++) { flag = 1; for(int m = 0; m < op.size(); m++) { if(op[m] == "=" && fields_r2[l].compare(fields2[m]) == 0) { flag = 0; break; } } if(flag) { if(s3.getFieldType(r2_name + "." + fields_r2[l]) == INT) tuple.setField(r2_name + "." + fields_r2[l],tuples_r2[j].getField(fields_r2[l]).integer); else tuple.setField(r2_name + "." + fields_r2[l],*(tuples_r2[j].getField(fields_r2[l]).str)); } } tuples_r3.push_back(tuple); block_r3->appendTuple(tuple); //If main memory block is full, write it to disk and clear that block if(tuples_r3.size() == s3.getTuplesPerBlock()) { rptr3->setBlock(rptr3->getNumOfBlocks(),num_blocks_r1 + 1); tuples_r3.clear(); block_r3->clear(); } } } } } //For the last tuple which might not be full, need to write that to disk too if(tuples_r3.size() !=0) rptr3->setBlock(rptr3->getNumOfBlocks(),num_blocks_r1 + 1); if(disp) { cout<<*rptr3<<endl; } } return rptr3; }
Relation *slowCrossJoin(Relation *rptr1, Relation *rptr2, string r3, vector<string> fields1, vector<string> fields2, vector<string> op) { Schema s1, s2; vector <string> fields_r1, fields_r2, fields_r3; vector <enum FIELD_TYPE> types_r3; //Relation *rptr1, *rptr2; vector<string> values_r3; int num_blocks_r1, num_blocks_r2, max_blocks; num_blocks_r1 = rptr1->getNumOfBlocks(); num_blocks_r2 = rptr2->getNumOfBlocks(); max_blocks = mem.getMemorySize(); s1 = rptr1->getSchema(); s2 = rptr2->getSchema(); fields_r1 = s1.getFieldNames(); fields_r2 = s2.getFieldNames(); if(disp) { cout<<"#"<<rptr1->getRelationName()<<" ="<<num_blocks_r1<<endl; cout<<"#"<<rptr2->getRelationName()<<" ="<<num_blocks_r2<<endl; } for(int i=0; i<fields_r1.size(); i++) { fields_r3.push_back(rptr1->getRelationName() + "." + fields_r1[i]); types_r3.push_back(s1.getFieldType(fields_r1[i])); } for(int i=0; i<fields_r2.size(); i++) { fields_r3.push_back(rptr2->getRelationName() + "." + fields_r2[i]); types_r3.push_back(s2.getFieldType(fields_r2[i])); } Schema s3(fields_r3,types_r3); //string r3 = rptr1->getRelationName() + "." + rptr2->getRelationName() + ".CrossJoin"; Relation *rptr3 = schema_manager.createRelation(r3, s3); if(disp) { cout<<"New Relation: "<<r3<<endl; cout<<s3<<endl; } for(int id = 0; id<num_blocks_r2; id += (max_blocks - 2)) { //Get tuples of smaller relation into Main Memory, assuming all can fit in int num_blocks_to_read = ((num_blocks_r2 - id) < (max_blocks - 2))?(num_blocks_r2 - id):(max_blocks-2); rptr2->getBlocks(id, 0, num_blocks_to_read); vector<Tuple> tuples_r3, tuples_r2 = mem.getTuples(0, num_blocks_to_read); //Read one block of relation into Main memory and combine each tuple in the block //with all the tuples in the other Block *block_r1 = mem.getBlock(max_blocks - 2); Block *block_r3 = mem.getBlock(max_blocks - 1); block_r3->clear(); for(int i=0; i<num_blocks_r1; i++) { block_r1->clear(); rptr1->getBlock(i, max_blocks - 2); vector<Tuple> tuples_r1 = block_r1->getTuples(); for(int j =0; j < tuples_r1.size(); j++) { //Check for holes if(tuples_r1[j].isNull()) continue; for(int k=0; k < tuples_r2.size(); k++) { //Check for holes if(tuples_r2[k].isNull()) continue; Tuple tuple = rptr3->createTuple(); if(isJoinTuple(tuples_r1[j], tuples_r2[k], fields1, fields2, op)) { for(int l =0; l < fields_r1.size(); l++) { if(s3.getFieldType(rptr1->getRelationName() + "." + fields_r1[l]) == INT) tuple.setField(rptr1->getRelationName() + "." + fields_r1[l],tuples_r1[j].getField(fields_r1[l]).integer); else tuple.setField(rptr1->getRelationName() + "." + fields_r1[l],*(tuples_r1[j].getField(fields_r1[l]).str)); } for(int l =0; l < fields_r2.size(); l++) { if(s3.getFieldType(rptr2->getRelationName() + "." + fields_r2[l]) == INT) tuple.setField(rptr2->getRelationName() + "." + fields_r2[l],tuples_r2[k].getField(fields_r2[l]).integer); else tuple.setField(rptr2->getRelationName() + "." + fields_r2[l],*(tuples_r2[k].getField(fields_r2[l]).str)); } if(disp) cout<<"New Tuple:"<<tuple<<endl; tuples_r3.push_back(tuple); block_r3->appendTuple(tuple); //If main memory block is full, write it to disk and clear that block if(tuples_r3.size() == s3.getTuplesPerBlock()) { rptr3->setBlock(rptr3->getNumOfBlocks(),max_blocks - 1); tuples_r3.clear(); block_r3->clear(); } } } } } //For the last tuple which might not be full, need to write that to disk too if(tuples_r3.size() !=0) rptr3->setBlock(rptr3->getNumOfBlocks(),max_blocks - 1); if(disp) { cout<<*rptr3<<endl; } } return rptr3; }
Relation* Insert(vector<string> &words, string &line, SchemaManager &schema_manager, MainMemory &mem){ Relation* relation_ptr = schema_manager.getRelation(words[2]); vector<string>::iterator it = find(words.begin(), words.end(), "SELECT"); // no select if (it == words.end()){ // get insert vals vector<string> content = splitBy(line, "()"); vector<string> fields = splitBy(content[1], ", "); vector<string> vals = splitBy(content[3], ","); //preProcess(vector<string>(1, words[2]), fields, schema_manager); preProcess(vector<string>(1, words[2]), vals, schema_manager); assert(fields.size() == vals.size()); Tuple tuple = relation_ptr->createTuple(); // standard insert doesn't have table names vector<string> col_names = nakedFieldNames(relation_ptr); // comparing for (int i = 0; i < fields.size(); i++){ for (int j = 0; j < col_names.size(); j++){ // this is a match if (fields[i] == col_names[j]){ if (tuple.getSchema().getFieldType(j) == INT){ tuple.setField(j, atoi(vals[i].c_str())); } else{ tuple.setField(j, vals[i]); } break; } } } appendTupleToRelation(relation_ptr, mem, tuple); } // with SELECT else{ vector<string> SFW(it, words.end()); Relation* new_relation = Select(SFW, schema_manager, mem); assert(new_relation); vector<string> new_field_names = nakedFieldNames(new_relation); vector<string> field_names = nakedFieldNames(relation_ptr); // mapping: index of new_field_names to field_names vector<int> mapping(new_field_names.size(), -1); for (int i = 0; i < new_field_names.size(); i++){ for (int j = 0; j < field_names.size(); j++){ if (new_field_names[i] == field_names[j]){ mapping[i] = j; break; } } } int new_field_size = new_relation->getSchema().getNumOfFields(); // warning: new_relation and relation_ptr might be the same! // get all tuples from the new_relation in one run vector<Tuple> new_tuples; for (int i = 0; i < new_relation->getNumOfBlocks(); i++){ assert(!free_blocks.empty()); int memory_block_index = free_blocks.front(); free_blocks.pop(); // read the relation block by block new_relation->getBlock(i, memory_block_index); Block* block_ptr = mem.getBlock(memory_block_index); assert(block_ptr); vector<Tuple> block_tuples = block_ptr->getTuples(); new_tuples.insert(new_tuples.end(), block_tuples.begin(), block_tuples.end()); if(new_tuples.empty()){ cerr<<"Warning: Insert from SFW, No tuples in the current mem block!"<<endl; } free_blocks.push(memory_block_index); } for (int j = 0; j < new_tuples.size(); j++){ Tuple tuple = relation_ptr->createTuple(); for (int k = 0; k < new_field_size; k++){ if (mapping[k] != -1){ int idx = mapping[k]; assert(idx < relation_ptr->getSchema().getNumOfFields() && idx >= 0); if (tuple.getSchema().getFieldType(idx) == INT){ int val = new_tuples[j].getField(k).integer; tuple.setField(field_names[idx], val); } else{ string *str = new_tuples[j].getField(k).str; tuple.setField(field_names[idx], *str); } } } appendTupleToRelation(relation_ptr, mem, tuple); } cout<<*relation_ptr<<endl; } return relation_ptr; }
vector<Tuple> Qtree::exec(bool print, string *table_name){ vector<Tuple> ret ; #ifdef DEBUG this->print(0); #endif if(this->type == INS){ vector<Tuple> temp = this->left->exec( false, NULL ) ; if(temp.size() != 0){ Schema sins_from = temp[0].getSchema() ; vector<enum FIELD_TYPE> field_types_from = sins_from.getFieldTypes() ; vector<string> field_names_from = sins_from.getFieldNames() ; if(field_types_from.size() == this->info.size() - 1){ Schema sins_to = p->schema_manager.getSchema( this->info[0] ) ; vector<enum FIELD_TYPE> field_types_to ; vector<union Field> fields ; vector<string>::iterator it0 = this->info.begin() ; vector<enum FIELD_TYPE>::iterator it1 = field_types_from.begin(); vector<string>::iterator it2 = field_names_from.begin(); vector<string> STRv; vector<int> INTv ; string table_n = (*it0) ; vector<string> field_names_to ; it0 ++ ; for( ; it0 != this->info.end() ; it0 ++, it1++){ unsigned long found = it0->rfind('.') ; string s_table ; if(found == std::string::npos){ s_table = string( table_n + "." + (*it0) ) ; }else{ s_table = string( it0->substr( it0->rfind('.') + 1 ) ) ; } if( sins_to.fieldNameExists( *it0 ) ){ field_names_to.push_back(string( *it0) ) ; if(sins_to.getFieldType( *it0) == *it1 ){ }else{ perror( ": Type mismatch"); return ret; } }else{ if(sins_to.fieldNameExists(s_table) ) { field_names_to.push_back(string( s_table ) ) ; if(sins_to.getFieldType( s_table) == *it1 ){ }else{ perror( ": Type mismatch"); return ret; } } else{ perror( "exec: No such field"); } } } for(vector<Tuple>::iterator it_tuple = temp.begin(); it_tuple != temp.end(); it_tuple ++) { for(it1 = field_types_from.begin(), it2 = field_names_from.begin() ; it1 != field_types_from.end() ; it1++, it2++){ if(*it1 == INT){ INTv.push_back( it_tuple->getField( *it2).integer ) ; }else{ STRv.push_back( *(it_tuple->getField( *it2).str) ) ; } } p->insert(table_n, field_names_to, STRv, INTv) ; INTv.clear(); STRv.clear() ; } }else{ perror("Size mismatch"); return ret; } }else{ return ret; } }else if(this->type == TAU){ string table_n; if(this->left->type == TABLE && (output_s.empty() || output_s.top() == NULL) ){ Schema s = p->schema_manager.getSchema( this->left->info[0] ) ; string s_table ; unsigned long found = this->info[0].rfind('.') ; table_n = this->left->info[0] ; if(found == std::string::npos){ s_table = string( table_n + "." + this->info[0] ) ; }else{ s_table = string( this->info[0].substr( this->info[0].rfind('.') + 1 ) ) ; } if( s.fieldNameExists( this->info[0] ) ){ ret = p->SortTwoPass(table_n, this->info[0]) ; }else if(s.fieldNameExists(s_table) ) { ret = p->SortTwoPass(table_n, s_table) ; }else{ perror("No such field"); return ret ; } }else{ vector<Tuple> temp = this->left->exec( false, &table_n ) ; if(table_name != NULL) { (*table_name ) = string( this->info[0] ) ;} if(temp.size() != 0){ Schema s = temp[0].getSchema() ; string temp_table_name = "temp_table" ; while(p->schema_manager.relationExists(temp_table_name) ){ temp_table_name += "-a" ; } p->CreateTable(temp_table_name, temp ) ; temp_relations.push_back( temp_table_name ) ; unsigned long found = this->info[0].rfind('.') ; string s_table ; if(found == std::string::npos){ s_table = string( table_n + "." + this->info[0] ) ; }else{ s_table = string( this->info[0].substr( this->info[0].rfind('.') + 1 ) ) ; } if( s.fieldNameExists( this->info[0] ) ){ ret = p->SortTwoPass(temp_table_name, this->info[0]) ; }else if(s.fieldNameExists(s_table) ) { ret = p->SortTwoPass(temp_table_name, s_table) ; }else{ perror("No such field"); return ret ; } }else{ return ret; } } }else if(this->type == DELTA ){ string table_n; if(this->left->type == TABLE){ table_n = this->left->info[0] ; ret = p->dupTwoPass(table_n) ; }else{ vector<Tuple> temp = this->left->exec( false , &table_n) ; if(table_name != NULL) { (*table_name ) = string( this->info[0] ) ;} if(temp.size() != 0){ Schema s = temp[0].getSchema() ; string temp_table_name = "temp_table" ; while(p->schema_manager.relationExists(temp_table_name) ){ temp_table_name += "-a" ; } p->CreateTable(temp_table_name, temp ); temp_relations.push_back(temp_table_name ) ; ret = p->dupTwoPass(temp_table_name) ; }else{ return ret; } } }else if(this->type == PI ){ string table_n; vector<Tuple> temp = this->left->exec( false, &table_n ) ; if(table_name != NULL) { (*table_name ) = string( this->info[0] ) ;} if(temp.size() != 0){ Schema s = temp[0].getSchema() ; vector<string> field_names ; vector<enum FIELD_TYPE> field_types ; for(vector<string>::iterator it= this->info.begin(); it != this->info.end(); it++){ unsigned long found = it->rfind('.') ; string s_table ; if(found == std::string::npos){ s_table = string( table_n + "." + (*it) ) ; }else{ s_table = string( it->substr( it->rfind('.') + 1 ) ) ; } if( s.fieldNameExists( *it ) ){ field_names.push_back(string(*it) ) ; field_types.push_back(s.getFieldType( *it) ) ; }else{ if(s.fieldNameExists(s_table) ) { field_names.push_back(string( s_table ) ) ; field_types.push_back( s.getFieldType( s_table ) ); } else{ perror( "exec: No such field"); } } } string temp_table_name = "temp_table" ; Relation *rlt = NULL; while(p->schema_manager.relationExists(temp_table_name) ){ temp_table_name += "-a" ; } rlt = p->CreateTable(temp_table_name, field_names, field_types) ; temp_relations.push_back(temp_table_name ) ; for(vector<Tuple>::iterator tit = temp.begin(); tit != temp.end(); tit++){ Tuple t = rlt->createTuple() ; for(vector<string>::iterator it = field_names.begin(); it != field_names.end() ; it++){ union Field f= tit->getField(*it) ; if( s.getFieldType(*it) == INT ){ t.setField( *it, f.integer ) ; }else{ t.setField( *it, *(f.str)) ; } } ret.push_back( t ) ; } }else{ return ret; } }else if(this->type == PRODUCT){ vector<string> ptables; vector<Relation *> relations ; map<string, Qexpression *> sigma_operation ; vector<string> commons ; map<string, bool> joined_keys; vector<string>::iterator it = ptables.begin(); ptables.insert(ptables.end(), this->info.begin(), this->info.end() ); if(output_s.empty() ){ }else if(output_s.top()->type == INTEGER || output_s.top()->type == LITERAL ){ Tuple *t = NULL; if(output_s.top()->judge(*t) ){ /* WHERE clasuse always true */ while(! output_s.empty() ){ output_s.top()->free() ;output_s.pop();} }else{ /* empty results */ return ret; } }else{ Qexpression *optimized = output_s.top()->optimize_sigma(&sigma_operation) ; output_s.pop(); if(optimized != NULL){ output_s.push(optimized) ;} #ifdef DEBUG for(map<string, Qexpression *>::iterator it = sigma_operation.begin(); it != sigma_operation.end(); it ++){ cout << it->first << "->" << endl; it->second->print(0); } #endif if( ! output_s.empty() ){ optimized = output_s.top()->optimize_join(commons, joined_keys) ; output_s.pop(); if(optimized != NULL){ output_s.push(optimized) ; }else{ while(! output_s.empty() ){output_s.top()->free() ; output_s.pop();} } if(! output_s.empty()){ #ifdef DEBUG output_s.top()->print(0); #endif } } #ifdef DEBUG cerr << "commons: "; for(vector<string>::iterator it = commons.begin(); it != commons.end(); it++){ cerr<< *it << " " ; } cerr << endl ; #endif } vector<string> to_drop ; for(vector<string>::iterator it = ptables.begin(); it != ptables.end(); ){ if(sigma_operation[*it] == NULL){ it++; }else{ Relation *temp_relation; vector<Tuple> tuples = p->singleTableSelect( *it , sigma_operation[*it] ) ; if(tuples.size() != 0){ temp_relation = p->CreateTable( ( *it) + "-SIGMA", tuples) ; }else{ vector<string> field_names = p->schema_manager.getRelation(*it)->getSchema().getFieldNames(); vector<enum FIELD_TYPE> field_types = p->schema_manager.getRelation(*it)->getSchema().getFieldTypes() ; temp_relation = p->CreateTable( (*it) + "-SIGMA" , field_names, field_types ) ; } to_drop.push_back( temp_relation->getRelationName() ) ; it = ptables.erase(it) ;ptables.insert( it, temp_relation->getRelationName() ) ; } } if(ptables.size() == 2){ if(ptables[0] <= ptables[1]){ ret = p->JoinTwoPass(ptables[0], ptables[1], commons ) ; }else{ ret = p->JoinTwoPass(ptables[1], ptables[0], commons ) ; } }else{ ret = p->JoinTables(ptables, commons) ; } for(vector<string>::iterator it = to_drop.begin(); it != to_drop.end(); it++){ p->DropTable(*it) ; } if(output_s.empty() ){ }else{ string temp_table_name = "temp_table"; while(p->schema_manager.relationExists(temp_table_name)) { temp_table_name += "-a"; } p->CreateTable( temp_table_name, ret ) ; temp_relations.push_back(temp_table_name) ; ret = p->singleTableSelect(temp_table_name, output_s.top() ) ; } }else if(this->type == TABLE){ if(table_name != NULL) { (*table_name ) = string( this->info[0] ) ;} ret = p->singleTableSelect(this->info[0], output_s.empty() ? NULL : output_s.top() ); }else{ return ret; } if(ret.size() != 0 && print){ vector<string> field_names = ret[0].getSchema( ).getFieldNames() ; cout << "-----------------" << endl ; for(vector<string>::iterator it = field_names.begin(); it != field_names.end(); it++){ cout<< *it << ' ' ; } cout << endl << "-----------------" << endl ; for(vector<Tuple>::iterator it = ret.begin(); it != ret.end(); it ++ ){ cout << (*it) << endl; }cout << "-----------------" << endl ; } return ret; }
string projection(vector<string> attributes, string tableName, string whereCondition) { Relation *relation = schemaManager.getRelation(tableName); Schema tableSchema = relation->getSchema(); vector<string> fieldNames; vector<enum FIELD_TYPE> fieldTypes; vector<string>::iterator it; int flag=-1; bool print=true; for(it=attributes.begin();it!=attributes.end();it++) { for(int i=0;i<tableSchema.getNumOfFields();i++) { string temp = *it; if(tableSchema.getFieldName(i)==temp || tableName+"."+tableSchema.getFieldName(i) == temp) flag=i; } if(flag!=-1) { fieldNames.push_back(tableSchema.getFieldName(flag)); fieldTypes.push_back(tableSchema.getFieldType(flag)); flag = -1; } } if(attributes.size()==1 && attributes[0] == "*") { if(whereCondition.empty()) return tableName; fieldNames = tableSchema.getFieldNames(); fieldTypes = tableSchema.getFieldTypes(); } Schema dupSchema(fieldNames,fieldTypes); Relation *relationDup = schemaManager.createRelation(tableName.append("_dup"), dupSchema); Tuple tuple = relationDup->createTuple(); vector<Tuple>::iterator it1; Block *block = mainMemory.getBlock(9); block->clear(); int index=0; for(int i=0;i<relation->getNumOfBlocks();i++) { relation->getBlock(i,0); vector<Tuple> t = mainMemory.getBlock(0)->getTuples(); for(it1=t.begin();it1!=t.end();it1++) { if(!it1->isNull()){ for(int j=0;j<fieldNames.size();j++) { if(fieldTypes[j]==INT) tuple.setField(fieldNames[j],it1->getField(fieldNames[j]).integer); else tuple.setField(fieldNames[j],*(it1->getField(fieldNames[j]).str)); } bool ttp = whereConditionEvaluator(whereCondition, *it1); if(ttp) { if(!block->isFull()) block->appendTuple(tuple); else { relationDup->setBlock(index,9); index++; block->clear(); block->appendTuple(tuple); } } } } } if(index!=relationDup->getNumOfBlocks()-1) relationDup->setBlock(index, 9); return tableName; }