int MergeJoin::compare(const Tuple & lhs, const std::vector<const Attribute *> & lCols, const Tuple & rhs, const std::vector<const Attribute *> & rCols) { for (int i = 0; i < lCols.size(); i++) { int cmp = 0; const Attribute * lField = lCols[i]; const Attribute * rField = rCols[i]; switch (lField->type()) { case INTEGER: // endianness :( cmp = *(int *)(lhs.m_data + lhs.schema()->offset(lField)) - *(int *)(rhs.m_data + rhs.schema()->offset(rField)); break; case CHAR: case STRING: case BIT: cmp = memcmp(lhs.m_data + lhs.schema()->offset(lField), rhs.m_data + rhs.schema()->offset(rField), lField->size()); break; } if (cmp) { return cmp; } } return 0; };
void TupleStreamReader::read(Tuple & t) { if (m_layout != NULL) { int tuple_offset = 0; for (int i = 0; i < 2 /* m_layout->npartitions() */; i++) { const Partition * p = m_layout->partition(i); // all or none if ((t.schema()->m_partitions & (i+1)) == 0) continue; int partition_offset = p->start() + p->bytes() * m_nRecs; m_block.get(t.m_data + tuple_offset, partition_offset, p->bytes()); tuple_offset += p->bytes(); } } else { const Schema * atts = t.schema(); int totalNumBytes = atts->rsize(); int offset = totalNumBytes*m_nRecs; m_block.get(t.m_data,offset,totalNumBytes); } m_nRecs++; }
void MergeJoin::concatenate(Tuple & dst, const Tuple & s, const Tuple & t) { memcpy(dst.m_data, s.m_data, s.schema()->rsize()); memcpy(dst.m_data + s.schema()->rsize(), t.m_data, t.schema()->rsize()); /* std::cerr << "concat("; s.dump(std::cerr, '|', ';'); t.dump(std::cerr, '|', ')'); std::cerr << "="; dst.schema(&m_schema); dst.dump(std::cerr); */ }
void MergeJoin::create_merge_stack() { Tuple t; t.schema(m_tuple[TLEFT|PROJ].schema()); t.m_data = m_data; // merged_data > left.proj_data m_merge_stack.clear(); // get first item of merge stack for future comparisons. m_tsr[LEFT]->read(t); m_tsr[LEFT]->rewind(1); // rewind to push first item unto stack. while (!m_eof[LEFT] && get_tuple(LEFT, TLEFT|PROJ, false) && compare(t, m_joinCols[TLEFT|PROJ], m_tuple[TLEFT|PROJ], m_joinCols[TLEFT|PROJ]) == 0) { // push item unto merge stack. byte * data = new byte[m_tuple[TLEFT|PROJ].schema()->rsize()]; memcpy(data, m_tuple[LEFT].m_data, m_tuple[TLEFT|PROJ].schema()->rsize()); m_merge_stack.push_back(data); if (m_tsr[LEFT]->isEndOfStream()) { m_consumed[LEFT] = true; m_eof[LEFT] = isEmpty(LEFT); } } // rewind to retrieve next item for subsequent processing. if (!m_eof[LEFT]) m_tsr[LEFT]->rewind(1); }
void MergeJoin::merge() { Tuple t; Tuple merged; merged.m_data = m_data; merged.schema(&m_schema); // if stack is empty return 0; if (m_merge_stack.empty()) { return; } t.m_data = m_merge_stack[0]; t.schema(m_tuple[TLEFT|PROJ].schema()); while (!m_eof[RIGHT] && !m_tsw->isStreamFull() && get_tuple(RIGHT, TRIGHT|PROJ, false)) { // compare tuple from right branch with merge-stack. if (compare(t, m_joinCols[TLEFT|PROJ], m_tuple[TRIGHT|PROJ], m_joinCols[TRIGHT|PROJ]) == 0) { // merge item with all items on stack. for ( ; m_merge_with < m_merge_stack.size() && !m_tsw->isStreamFull(); m_merge_with++) { t.m_data = m_merge_stack[m_merge_with]; concatenate(merged, t, m_tuple[TRIGHT|PROJ]); m_tsw->write(merged); } // determine if concatenation is complete. if (m_merge_with >= m_merge_stack.size()) { m_merge_with = 0; // retrieve next tuple from right branch if (m_tsr[RIGHT]->isEndOfStream()) { m_consumed[RIGHT] = true; m_eof[RIGHT] = isEmpty(RIGHT); } } } else { m_tsr[RIGHT]->rewind(1); } // determine if merge is complete. remove and free data. if (m_eof[RIGHT] || (get_tuple(RIGHT, TRIGHT|PROJ, true) && compare(t, m_joinCols[TLEFT|PROJ], m_tuple[TRIGHT|PROJ], m_joinCols[TRIGHT|PROJ]) != 0)) { std::for_each(m_merge_stack.begin(), m_merge_stack.end(), free); // free valid in gnu++ m_merge_stack.clear(); m_merge_with = 0; break; } } }
bool WhereClause::evaluate(const Tuple & t) { m_tuple.schema(t.schema()); m_tuple.m_data = t.m_data; return m_expression->evaluate(); }