uint64_t JoinPartition::writeByteStream(int which, ByteStream &bs) { size_t &offset = (which == 0 ? nextSmallOffset : nextLargeOffset); fstream &fs = (which == 0 ? smallFile : largeFile); const char *filename = (which == 0 ? smallFilename.c_str() : largeFilename.c_str()); fs.open(filename, ios::binary | ios::out | ios::app); int saveErrno = errno; if (!fs) { fs.close(); ostringstream os; os << "Disk join could not open file (write access) " << filename << ": " << strerror(saveErrno) << endl; throw IDBExcept(os.str().c_str(), ERR_DBJ_FILE_IO_ERROR); } uint64_t ret = 0; size_t len = bs.length(); idbassert(len != 0); fs.seekp(offset); if (!useCompression) { ret = len + 4; fs.write((char *) &len, sizeof(len)); fs.write((char *) bs.buf(), len); saveErrno = errno; if (!fs) { fs.close(); ostringstream os; os << "Disk join could not write file " << filename << ": " << strerror(saveErrno) << endl; throw IDBExcept(os.str().c_str(), ERR_DBJ_FILE_IO_ERROR); } totalBytesWritten += sizeof(len) + len; } else { uint64_t maxSize = compressor.maxCompressedSize(len); size_t actualSize; boost::scoped_array<uint8_t> compressed(new uint8_t[maxSize]); compressor.compress((char *) bs.buf(), len, (char *) compressed.get(), &actualSize); ret = actualSize + 4; fs.write((char *) &actualSize, sizeof(actualSize)); fs.write((char *) compressed.get(), actualSize); saveErrno = errno; if (!fs) { fs.close(); ostringstream os; os << "Disk join could not write file " << filename << ": " << strerror(saveErrno) << endl; throw IDBExcept(os.str().c_str(), ERR_DBJ_FILE_IO_ERROR); } totalBytesWritten += sizeof(actualSize) + actualSize; } bs.advance(len); offset = fs.tellp(); fs.close(); return ret; }
void LimitedOrderBy::finalize() { if (fRowGroup.getRowCount() > 0) fDataQueue.push(fData); if (fStart != 0) { uint64_t newSize = fRowsPerRG * fRowGroup.getRowSize(); if (!fRm->getMemory(newSize)) { cerr << IDBErrorInfo::instance()->errorMsg(fErrorCode) << " @" << __FILE__ << ":" << __LINE__; throw IDBExcept(fErrorCode); } fMemSize += newSize; fData.reinit(fRowGroup, fRowsPerRG); fRowGroup.setData(&fData); fRowGroup.resetRowGroup(0); fRowGroup.getRow(0, &fRow0); queue<RGData> tempQueue; uint64_t i = 0; while ((fOrderByQueue.size() > fStart) && (i++ < fCount)) { const OrderByRow& topRow = fOrderByQueue.top(); row1.setData(topRow.fData); copyRow(row1, &fRow0); //memcpy(fRow0.getData(), topRow.fData, fRow0.getSize()); fRowGroup.incRowCount(); fRow0.nextRow(); fOrderByQueue.pop(); if (fRowGroup.getRowCount() >= fRowsPerRG) { tempQueue.push(fData); if (!fRm->getMemory(newSize)) { cerr << IDBErrorInfo::instance()->errorMsg(fErrorCode) << " @" << __FILE__ << ":" << __LINE__; throw IDBExcept(fErrorCode); } fMemSize += newSize; fData.reinit(fRowGroup, fRowsPerRG); //fData.reset(new uint8_t[fRowGroup.getDataSize(fRowsPerRG)]); fRowGroup.setData(&fData); fRowGroup.resetRowGroup(0); fRowGroup.getRow(0, &fRow0); } } if (fRowGroup.getRowCount() > 0) tempQueue.push(fData); fDataQueue = tempQueue; } }
void WF_nth_value<T>::parseParms(const std::vector<execplan::SRCP>& parms) { // parms[0]: value-expr // skip // parms[1]: nth value ConstantColumn* cc = dynamic_cast<ConstantColumn*>(parms[1].get()); if (cc != NULL) { fNthNull = false; fNth = cc->getIntVal(fRow, fNthNull); // row not used, no need to setData. if (fNth <= 0) { ostringstream oss; oss << fNth; throw IDBExcept(IDBErrorInfo::instance()->errorMsg(ERR_WF_ARG_OUT_OF_RANGE, oss.str()), ERR_WF_ARG_OUT_OF_RANGE); } } // parms[2]: from first | from last bool isNull = false; cc = dynamic_cast<ConstantColumn*>(parms[2].get()); idbassert(cc != NULL); fFromFirst = (cc->getIntVal(fRow, isNull) > 0); // parms[3]: respect null | ignore null cc = dynamic_cast<ConstantColumn*>(parms[3].get()); idbassert(cc != NULL); fRespectNulls = (cc->getIntVal(fRow, isNull) > 0); }
void IdbOrderBy::initialize(const RowGroup& rg) { // initialize rows IdbCompare::initialize(rg); uint64_t newSize = fRowsPerRG * rg.getRowSize(); if (!fRm->getMemory(newSize)) { cerr << IDBErrorInfo::instance()->errorMsg(fErrorCode) << " @" << __FILE__ << ":" << __LINE__; throw IDBExcept(fErrorCode); } fMemSize += newSize; fData.reinit(fRowGroup, fRowsPerRG); fRowGroup.setData(&fData); fRowGroup.resetRowGroup(0); fRowGroup.initRow(&fRow0); fRowGroup.getRow(0, &fRow0); // set compare functors fRule.compileRules(fOrderByCond, fRowGroup); fRowGroup.initRow(&row1); fRowGroup.initRow(&row2); if (fDistinct) { fDistinctMap.reset(new DistinctMap_t(10, Hasher(this, getKeyLength()), Eq(this, getKeyLength()))); } }
int64_t FrameBoundExpressionRow<T>::getBound(int64_t b, int64_t e, int64_t c) { // set row data // get expression int value fRow.setData(getPointer(fRowData->at(c))); if (fRow.isNullValue(fExprIdx)) throw IDBExcept(IDBErrorInfo::instance()->errorMsg(ERR_WF_BOUND_OUT_OF_RANGE, "NULL"), ERR_WF_BOUND_OUT_OF_RANGE); getOffset(); if (fOffset < 0) { throw IDBExcept(IDBErrorInfo::instance()->errorMsg(ERR_WF_BOUND_OUT_OF_RANGE, fOffset), ERR_WF_BOUND_OUT_OF_RANGE); } return FrameBoundConstantRow::getBound(b, e, c); }
boost::shared_ptr<WindowFunctionType> WF_sum_avg<T>::makeFunction(int id, const string& name, int ct) { boost::shared_ptr<WindowFunctionType> func; switch (ct) { case CalpontSystemCatalog::TINYINT: case CalpontSystemCatalog::SMALLINT: case CalpontSystemCatalog::MEDINT: case CalpontSystemCatalog::INT: case CalpontSystemCatalog::BIGINT: case CalpontSystemCatalog::DECIMAL: { func.reset(new WF_sum_avg<int64_t>(id, name)); break; } case CalpontSystemCatalog::UTINYINT: case CalpontSystemCatalog::USMALLINT: case CalpontSystemCatalog::UMEDINT: case CalpontSystemCatalog::UINT: case CalpontSystemCatalog::UBIGINT: case CalpontSystemCatalog::UDECIMAL: { func.reset(new WF_sum_avg<uint64_t>(id, name)); break; } case CalpontSystemCatalog::DOUBLE: case CalpontSystemCatalog::UDOUBLE: { func.reset(new WF_sum_avg<double>(id, name)); break; } case CalpontSystemCatalog::FLOAT: case CalpontSystemCatalog::UFLOAT: { func.reset(new WF_sum_avg<float>(id, name)); break; } default: { string errStr = name + "(" + colType2String[ct] + ")"; errStr = IDBErrorInfo::instance()->errorMsg(ERR_WF_INVALID_PARM_TYPE, errStr); cerr << errStr << endl; throw IDBExcept(errStr, ERR_WF_INVALID_PARM_TYPE); break; } } return func; }
void WF_percentile<T>::parseParms(const std::vector<execplan::SRCP>& parms) { // parms[0]: nve ConstantColumn* cc = dynamic_cast<ConstantColumn*>(parms[0].get()); if (cc != NULL) { fNveNull = false; fNve = cc->getDoubleVal(fRow, fNveNull); // row not used, no need to setData. if (!fNveNull && (fNve < 0 || fNve > 1)) { ostringstream oss; oss << fNve; throw IDBExcept(IDBErrorInfo::instance()->errorMsg(ERR_WF_ARG_OUT_OF_RANGE, oss.str()), ERR_WF_ARG_OUT_OF_RANGE); } } // workaround for the within group order by column index idbassert(fPeer->fIndex.size() > 0); fFieldIndex.push_back(fPeer->fIndex[0]); }
void WF_nth_value<T>::operator()(int64_t b, int64_t e, int64_t c) { int64_t s = b; int64_t t = e; if (c != WF__BOUND_ALL) s = t = c; for (int64_t c = s; c <= t; c++) { if (c % 1000 == 0 && fStep->cancelled()) break; int64_t idx = fFieldIndex[2]; fRow.setData(getPointer(fRowData->at(c))); if (idx != -1) { double tmp = 1.0; fNthNull = fRow.isNullValue(idx); if (!fNthNull) { implicit2T(idx, tmp, 0); if (tmp <= 0) { ostringstream oss; oss << tmp; throw IDBExcept(IDBErrorInfo::instance()->errorMsg(ERR_WF_ARG_OUT_OF_RANGE, oss.str()), ERR_WF_ARG_OUT_OF_RANGE); } if (tmp > e) // prevent integer overflow tmp = e + 1; fNth = (int64_t) tmp; } } bool isNull = true; if ((!fNthNull) && ((b + fNth - 1) <= e)) { uint64_t colIn = fFieldIndex[1]; if (fFromFirst) { int64_t k = b; fRow.setData(getPointer(fRowData->at(k))); if (fRespectNulls == false && fRow.isNullValue(colIn) == true) { while (++k < e) { fRow.setData(getPointer(fRowData->at(k))); if (fRow.isNullValue(colIn) == false) break; } } int64_t n = k + fNth - 1; if (n <= e) { fRow.setData(getPointer(fRowData->at(n))); getValue(colIn, fValue); isNull = fRow.isNullValue(colIn); } } else // from last { int64_t k = e; fRow.setData(getPointer(fRowData->at(k))); if (fRespectNulls == false && fRow.isNullValue(colIn) == true) { while (--k > b) { fRow.setData(getPointer(fRowData->at(k))); if (fRow.isNullValue(colIn) == false) break; } } int64_t n = k - fNth + 1; if (n >= b) { fRow.setData(getPointer(fRowData->at(n))); getValue(colIn, fValue); isNull = fRow.isNullValue(colIn); } } } T* v = (isNull) ? NULL : &fValue; setValue(fRow.getColType(fFieldIndex[0]), b, e, c, v); } }
void WindowFunctionType::implicit2T(uint64_t i, T& t, int s) { int ct = fRow.getColType(i); int pw = 0; switch (ct) { case CalpontSystemCatalog::TINYINT: case CalpontSystemCatalog::SMALLINT: case CalpontSystemCatalog::MEDINT: case CalpontSystemCatalog::INT: case CalpontSystemCatalog::BIGINT: case CalpontSystemCatalog::DECIMAL: { t = (T) fRow.getIntField(i); pw = s - fRow.getScale(i); // pw is difference of scales, will be in [-18, 18] if (pw > 0) t *= IDB_pow[pw]; else if (pw < 0) t /= IDB_pow[-pw]; break; } case CalpontSystemCatalog::UTINYINT: case CalpontSystemCatalog::USMALLINT: case CalpontSystemCatalog::UMEDINT: case CalpontSystemCatalog::UINT: case CalpontSystemCatalog::UBIGINT: case CalpontSystemCatalog::UDECIMAL: { t = (T) fRow.getUintField(i); pw = s - fRow.getScale(i); // pw is difference of scales, will be in [-18, 18] if (pw > 0) t *= IDB_pow[pw]; else if (pw < 0) t /= IDB_pow[-pw]; break; } case CalpontSystemCatalog::DOUBLE: case CalpontSystemCatalog::UDOUBLE: { if (s == 0) t = (T) fRow.getDoubleField(i); else t = (T) (fRow.getDoubleField(i) * IDB_pow[s]); // s is scale, [0, 18] break; } case CalpontSystemCatalog::FLOAT: case CalpontSystemCatalog::UFLOAT: { if (s == 0) t = (T) fRow.getFloatField(i); else t = (T) (fRow.getFloatField(i) * IDB_pow[s]); // s is scale, [0, 18] break; } case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::VARCHAR: default: { string errStr = fFunctionName + "(" + colType2String[ct] + ")"; errStr = IDBErrorInfo::instance()->errorMsg(ERR_WF_INVALID_PARM_TYPE, errStr); cerr << errStr << endl; throw IDBExcept(errStr, ERR_WF_INVALID_PARM_TYPE); break; } } }
boost::shared_ptr<WindowFunctionType> WindowFunctionType::makeWindowFunction(const string& name, int ct) { boost::shared_ptr<WindowFunctionType> af; int functionId = windowFunctionId[algorithm::to_upper_copy(name)]; switch (functionId) { case WF__COUNT_ASTERISK: case WF__COUNT: case WF__COUNT_DISTINCT: af = WF_count<int64_t>::makeFunction(functionId, name, ct); break; case WF__MIN: case WF__MAX: af = WF_min_max<int64_t>::makeFunction(functionId, name, ct); break; case WF__SUM: case WF__AVG: case WF__SUM_DISTINCT: case WF__AVG_DISTINCT: af = WF_sum_avg<int64_t>::makeFunction(functionId, name, ct); break; case WF__STDDEV_POP: case WF__STDDEV_SAMP: case WF__VAR_POP: case WF__VAR_SAMP: af = WF_stats<int64_t>::makeFunction(functionId, name, ct); break; case WF__ROW_NUMBER: af = WF_row_number::makeFunction(functionId, name, ct); break; case WF__RANK: case WF__DENSE_RANK: case WF__PERCENT_RANK: case WF__CUME_DIST: af = WF_ranking::makeFunction(functionId, name, ct); break; case WF__FIRST_VALUE: case WF__LAST_VALUE: case WF__NTH_VALUE: af = WF_nth_value<int64_t>::makeFunction(functionId, name, ct); break; case WF__LEAD: case WF__LAG: af = WF_lead_lag<int64_t>::makeFunction(functionId, name, ct); break; case WF__NTILE: af = WF_ntile::makeFunction(functionId, name, ct); break; case WF__PERCENTILE_CONT: case WF__PERCENTILE_DISC: af = WF_percentile<int64_t>::makeFunction(functionId, name, ct); break; case WF__REGR_SLOPE: case WF__REGR_INTERCEPT: case WF__REGR_COUNT: case WF__REGR_R2: case WF__REGR_AVGX: case WF__REGR_AVGY: case WF__REGR_SXX: case WF__REGR_SXY: case WF__REGR_SYY: case WF__UNDEFINED: default: throw IDBExcept(IDBErrorInfo::instance()->errorMsg(ERR_WF_NOT_SUPPORT, name), ERR_WF_NOT_SUPPORT); break; } return af; }
void LimitedOrderBy::processRow(const rowgroup::Row& row) { // check if this is a distinct row if (fDistinct && fDistinctMap->find(row.getPointer()) != fDistinctMap->end()) return; // @bug5312, limit count is 0, do nothing. if (fCount == 0) return; // if the row count is less than the limit if (fOrderByQueue.size() < fStart+fCount) { copyRow(row, &fRow0); //memcpy(fRow0.getData(), row.getData(), row.getSize()); OrderByRow newRow(fRow0, fRule); fOrderByQueue.push(newRow); // add to the distinct map if (fDistinct) fDistinctMap->insert(fRow0.getPointer()); //fDistinctMap->insert(make_pair((fRow0.getData()+2), fRow0.getData())); fRowGroup.incRowCount(); fRow0.nextRow(); if (fRowGroup.getRowCount() >= fRowsPerRG) { fDataQueue.push(fData); uint64_t newSize = fRowsPerRG * fRowGroup.getRowSize(); if (!fRm->getMemory(newSize)) { cerr << IDBErrorInfo::instance()->errorMsg(fErrorCode) << " @" << __FILE__ << ":" << __LINE__; throw IDBExcept(fErrorCode); } fMemSize += newSize; fData.reinit(fRowGroup, fRowsPerRG); fRowGroup.setData(&fData); fRowGroup.resetRowGroup(0); fRowGroup.getRow(0, &fRow0); } } else if (fOrderByCond.size() > 0 && fRule.less(row.getPointer(), fOrderByQueue.top().fData)) { OrderByRow swapRow = fOrderByQueue.top(); row1.setData(swapRow.fData); if (!fDistinct) { copyRow(row, &row1); //memcpy(swapRow.fData, row.getData(), row.getSize()); } else { fDistinctMap->erase(row.getPointer()); copyRow(row, &row1); fDistinctMap->insert(row1.getPointer()); //fDistinctMap->erase(fDistinctMap->find(row.getData() + 2)); //memcpy(swapRow.fData, row.getData(), row.getSize()); //fDistinctMap->insert(make_pair((swapRow.fData+2), swapRow.fData)); } fOrderByQueue.pop(); fOrderByQueue.push(swapRow); } }
void WF_percentile<T>::operator()(int64_t b, int64_t e, int64_t c) { int64_t idx = fFieldIndex[1]; fRow.setData(getPointer(fRowData->at(b))); if (idx != -1) { if (idx != -1) { fNveNull = fRow.isNullValue(idx); implicit2T(idx, fNve, 0); if (!fNveNull && (fNve < 0 || fNve > 1)) { ostringstream oss; oss << fNve; throw IDBExcept(IDBErrorInfo::instance()->errorMsg(ERR_WF_ARG_OUT_OF_RANGE, oss.str()), ERR_WF_ARG_OUT_OF_RANGE); } } } if (fNveNull) { for (c = b; c <= e; c++) { if (c % 1000 == 0 && fStep->cancelled()) break; fRow.setData(getPointer(fRowData->at(c))); setValue(fRow.getColType(fFieldIndex[0]), b, e, c, (T*) NULL); } return; } idx = fFieldIndex[2]; int64_t rank = 0; int64_t dups = 0; int64_t b1 = -1; int64_t e1 = -1; scoped_array<int64_t> rk(new int64_t[e - b + 1]); for (c = b; c <= e; c++) { if (c % 1000 == 0 && fStep->cancelled()) break; fRow.setData(getPointer(fRowData->at(c))); if (fRow.isNullValue(idx)) continue; // ignore nulls if (b1 == -1) b1 = c; e1 = c; if (fFunctionId == WF__PERCENTILE_DISC) { // need cume_rank if (c != b && fPeer->operator()(getPointer(fRowData->at(c)), getPointer(fRowData->at(c-1)))) { dups++; } else { rank++; rank += dups; dups = 0; } rk[c-b] = rank; } } T* p = NULL; T v; int ct = (fFunctionId == WF__PERCENTILE_CONT) ? CalpontSystemCatalog::DOUBLE : fRow.getColType(idx); if (b1 != -1) { double cnt = (e1 - b1 + 1); if (fFunctionId == WF__PERCENTILE_CONT) { // @bug5820, this "rn" is the normalized row number, not the real row number. // Using real row number here will introduce a small calculation error in double result. double rn = fNve * (cnt - 1); double crn = ceil(rn); double frn = floor(rn); double vd = 0; if (crn == rn && rn == frn) { fRow.setData(getPointer(fRowData->at((size_t) rn + (size_t) b1))); implicit2T(idx, vd, 0); } else { double cv = 0.0, fv = 0.0; fRow.setData(getPointer(fRowData->at((size_t) frn + (size_t) b1))); implicit2T(idx, fv, 0); fRow.setData(getPointer(fRowData->at((size_t) crn + (size_t) b1))); implicit2T(idx, cv, 0); vd = (crn - rn) * fv + (rn - frn) * cv; } v = *(reinterpret_cast<T*>(&vd)); p = &v; } else // (fFunctionId == WF__PERCENTILE_DISC) { int prevRank = ++rank + dups; double cumeDist = 1; fRow.setData(getPointer(fRowData->at(e1))); for (c = e1; c >= b1; c--) { int currRank = rk[c-b]; if (currRank != prevRank) { cumeDist = ((double) (prevRank-1)) / cnt; if (cumeDist < fNve) break; prevRank = currRank; } } c++; fRow.setData(getPointer(fRowData->at(c))); getValue(idx, v); p = &v; } } for (c = b; c <= e; c++) { if (c % 1000 == 0 && fStep->cancelled()) break; fRow.setData(getPointer(fRowData->at(c))); setValue(ct, b, e, c, p); } }
bool EqualCompData::operator()(Row::Pointer a, Row::Pointer b) { bool eq = true; fRow1.setData(a); fRow2.setData(b); for (vector<uint64_t>::const_iterator i = fIndex.begin(); i != fIndex.end() && eq; i++) { CalpontSystemCatalog::ColDataType type = fRow1.getColType(*i); switch (type) { case CalpontSystemCatalog::TINYINT: case CalpontSystemCatalog::SMALLINT: case CalpontSystemCatalog::MEDINT: case CalpontSystemCatalog::INT: case CalpontSystemCatalog::BIGINT: case CalpontSystemCatalog::DECIMAL: case CalpontSystemCatalog::UDECIMAL: case CalpontSystemCatalog::UTINYINT: case CalpontSystemCatalog::USMALLINT: case CalpontSystemCatalog::UMEDINT: case CalpontSystemCatalog::UINT: case CalpontSystemCatalog::UBIGINT: case CalpontSystemCatalog::DATE: case CalpontSystemCatalog::DATETIME: { // equal compare. ignore sign and null eq = (fRow1.getUintField(*i) == fRow2.getUintField(*i)); break; } case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::VARCHAR: { eq = (fRow1.getStringField(*i) == fRow2.getStringField(*i)); break; } case CalpontSystemCatalog::DOUBLE: case CalpontSystemCatalog::UDOUBLE: { eq = (fRow1.getDoubleField(*i) == fRow2.getDoubleField(*i)); break; } case CalpontSystemCatalog::FLOAT: case CalpontSystemCatalog::UFLOAT: { eq = (fRow1.getFloatField(*i) == fRow2.getFloatField(*i)); break; } default: { eq = false; uint64_t ec = ERR_WF_UNKNOWN_COL_TYPE; cerr << IDBErrorInfo::instance()->errorMsg(ec, type) << " @" << __FILE__ << ":" << __LINE__; throw IDBExcept(IDBErrorInfo::instance()->errorMsg(ec, type), ec); break; } } } return eq; }
void JoinPartition::readByteStream(int which, ByteStream *bs) { size_t &offset = (which == 0 ? nextSmallOffset : nextLargeOffset); fstream &fs = (which == 0 ? smallFile : largeFile); const char *filename = (which == 0 ? smallFilename.c_str() : largeFilename.c_str()); size_t len; bs->restart(); fs.open(filename, ios::binary | ios::in); int saveErrno = errno; if (!fs) { fs.close(); ostringstream os; os << "Disk join could not open file (read access) " << filename << ": " << strerror(saveErrno) << endl; throw IDBExcept(os.str().c_str(), ERR_DBJ_FILE_IO_ERROR); } fs.seekg(offset); fs.read((char *) &len, sizeof(len)); saveErrno = errno; if (!fs) { if (fs.eof()) { fs.close(); return; } else { fs.close(); ostringstream os; os << "Disk join could not read file " << filename << ": " << strerror(saveErrno) << endl; throw IDBExcept(os.str().c_str(), ERR_DBJ_FILE_IO_ERROR); } } idbassert(len != 0); totalBytesRead += sizeof(len); if (!useCompression) { bs->needAtLeast(len); fs.read((char *) bs->getInputPtr(), len); saveErrno = errno; if (!fs) { fs.close(); ostringstream os; os << "Disk join could not read file " << filename << ": " << strerror(saveErrno) << endl; throw IDBExcept(os.str().c_str(), ERR_DBJ_FILE_IO_ERROR); } totalBytesRead += len; bs->advanceInputPtr(len); } else { size_t uncompressedSize; boost::scoped_array<char> buf(new char[len]); fs.read(buf.get(), len); saveErrno = errno; if (!fs) { fs.close(); ostringstream os; os << "Disk join could not read file " << filename << ": " << strerror(saveErrno) << endl; throw IDBExcept(os.str().c_str(), ERR_DBJ_FILE_IO_ERROR); } totalBytesRead += len; compressor.getUncompressedSize(buf.get(), len, &uncompressedSize); bs->needAtLeast(uncompressedSize); compressor.uncompress(buf.get(), len, (char *) bs->getInputPtr()); bs->advanceInputPtr(uncompressedSize); } offset = fs.tellg(); fs.close(); }
int64_t JoinPartition::convertToSplitMode() { int i, j; ByteStream bs; RGData rgData; uint32_t hash; uint64_t tmp; int64_t ret = -(int64_t)smallSizeOnDisk; // smallFile gets deleted boost::scoped_array<uint32_t> rowDist(new uint32_t[bucketCount]); uint32_t rowCount = 0; memset(rowDist.get(), 0, sizeof(uint32_t) * bucketCount); fileMode = false; htSizeEstimate = 0; smallSizeOnDisk = 0; buckets.reserve(bucketCount); for (i = 0; i < (int) bucketCount; i++) buckets.push_back(boost::shared_ptr<JoinPartition>(new JoinPartition(*this, false))); RowGroup &rg = smallRG; Row &row = smallRow; nextSmallOffset = 0; while (1) { readByteStream(0, &bs); if (bs.length() == 0) break; rgData.deserialize(bs); rg.setData(&rgData); for (j = 0; j < (int) rg.getRowCount(); j++) { rg.getRow(j, &row); if (antiWithMatchNulls && hasNullJoinColumn(row)) { if (needsAllNullRows || !gotNullRow) { for (j = 0; j < (int) bucketCount; j++) ret += buckets[j]->insertSmallSideRow(row); gotNullRow = true; } continue; } if (typelessJoin) hash = getHashOfTypelessKey(row, smallKeyCols, hashSeed) % bucketCount; else { if (UNLIKELY(row.isUnsigned(smallKeyCols[0]))) tmp = row.getUintField(smallKeyCols[0]); else tmp = row.getIntField(smallKeyCols[0]); hash = hasher((char *) &tmp, 8, hashSeed); hash = hasher.finalize(hash, 8) % bucketCount; } rowCount++; rowDist[hash]++; ret += buckets[hash]->insertSmallSideRow(row); } } boost::filesystem::remove(smallFilename); smallFilename.clear(); for (i = 0; i < (int) bucketCount; i++) if (rowDist[i] == rowCount) throw IDBExcept("All rows hashed to the same bucket", ERR_DBJ_DATA_DISTRIBUTION); rg.setData(&buffer); rg.resetRowGroup(0); rg.getRow(0, &row); return ret; }
void TupleConstantStep::initialize(const JobInfo& jobInfo, const RowGroup* rgIn) { vector<uint> oids, oidsIn = fRowGroupIn.getOIDs(); vector<uint> keys, keysIn = fRowGroupIn.getKeys(); vector<uint> scale, scaleIn = fRowGroupIn.getScale(); vector<uint> precision, precisionIn = fRowGroupIn.getPrecision(); vector<CalpontSystemCatalog::ColDataType> types, typesIn = fRowGroupIn.getColTypes(); vector<uint> pos; pos.push_back(2); if (rgIn) { fRowGroupIn = *rgIn; fRowGroupIn.initRow(&fRowIn); oidsIn = fRowGroupIn.getOIDs(); keysIn = fRowGroupIn.getKeys(); scaleIn = fRowGroupIn.getScale(); precisionIn = fRowGroupIn.getPrecision(); typesIn = fRowGroupIn.getColTypes(); } for (uint64_t i = 0, j = 0; i < jobInfo.deliveredCols.size(); i++) { const ConstantColumn* cc = dynamic_cast<const ConstantColumn*>(jobInfo.deliveredCols[i].get()); if (cc != NULL) { CalpontSystemCatalog::ColType ct = cc->resultType(); if (ct.colDataType == CalpontSystemCatalog::VARCHAR) ct.colWidth++; //Round colWidth up if (ct.colWidth == 3) ct.colWidth = 4; else if (ct.colWidth == 5 || ct.colWidth == 6 || ct.colWidth == 7) ct.colWidth = 8; oids.push_back(-1); keys.push_back(-1); scale.push_back(ct.scale); precision.push_back(ct.precision); types.push_back(ct.colDataType); pos.push_back(pos.back() + ct.colWidth); fIndexConst.push_back(i); } else { // select (select a) from region; if (j >= oidsIn.size() && jobInfo.tableList.empty()) { throw IDBExcept(ERR_NO_FROM); } idbassert(j < oidsIn.size()); oids.push_back(oidsIn[j]); keys.push_back(keysIn[j]); scale.push_back(scaleIn[j]); precision.push_back(precisionIn[j]); types.push_back(typesIn[j]); pos.push_back(pos.back() + fRowGroupIn.getColumnWidth(j)); j++; fIndexMapping.push_back(i); } } fRowGroupOut = RowGroup(oids.size(), pos, oids, keys, types, scale, precision, jobInfo.stringTableThreshold); fRowGroupOut.initRow(&fRowOut); fRowGroupOut.initRow(&fRowConst, true); constructContanstRow(jobInfo); }