bool JoinPartition::getNextPartition(vector<RGData> *smallData, uint64_t *partitionID, JoinPartition **jp) { if (fileMode) { ByteStream bs; RGData rgData; if (nextPartitionToReturn > 0) return false; //cout << "reading the small side" << endl; nextSmallOffset = 0; while (1) { readByteStream(0, &bs); if (bs.length() == 0) break; rgData.deserialize(bs); //smallRG.setData(&rgData); //cout << "read a smallRG with " << smallRG.getRowCount() << " rows" << endl; smallData->push_back(rgData); } nextPartitionToReturn = 1; *partitionID = uniqueID; *jp = this; return true; } bool ret = false; while (!ret && nextPartitionToReturn < bucketCount) { ret = buckets[nextPartitionToReturn]->getNextPartition(smallData, partitionID, jp); if (!ret) nextPartitionToReturn++; } return ret; }
uint64_t UpdatePackageProcessor::fixUpRows(dmlpackage::CalpontDMLPackage& cpackage, DMLResult& result, const uint64_t uniqueId, const uint32_t tableOid) { ByteStream msg, msgBk, emsgBs; RGData rgData; uint32_t qb = 4; msg << qb; boost::scoped_ptr<rowgroup::RowGroup> rowGroup; uint64_t rowsProcessed = 0; uint32_t dbroot = 1; bool metaData = false; oam::OamCache* oamCache = oam::OamCache::makeOamCache(); std::vector<int> fPMs = oamCache->getModuleIds(); std::map<unsigned, bool> pmState; string emsg; string emsgStr; bool err = false; //boost::scoped_ptr<messageqcpp::MessageQueueClient> fExeMgr; //fExeMgr.reset( new messageqcpp::MessageQueueClient("ExeMgr1")); try { for (unsigned i = 0; i < fPMs.size(); i++) { pmState[fPMs[i]] = true; } //timer.start("ExeMgr"); fExeMgr->write(msg); fExeMgr->write(*(cpackage.get_ExecutionPlan())); //cout << "sending to ExeMgr plan with length " << (cpackage.get_ExecutionPlan())->length() << endl; msg.restart(); emsgBs.restart(); msg = fExeMgr->read(); //error handling if (msg.length() == 4) { msg >> qb; if (qb != 0) err = true; } else { qb = 999; err = true; } if (err) { logging::Message::Args args; logging::Message message(2); args.add("Update Failed: ExeMgr Error"); args.add((int)qb); message.format(args); result.result = UPDATE_ERROR; result.message = message; //timer.finish(); return rowsProcessed; } emsgBs = fExeMgr->read(); if (emsgBs.length() == 0) { logging::Message::Args args; logging::Message message(2); args.add("Update Failed: "); args.add("Lost connection to ExeMgr"); message.format(args); result.result = UPDATE_ERROR; result.message = message; //timer.finish(); return rowsProcessed; } emsgBs >> emsgStr; while (true) { if (fRollbackPending) { break; } msg.restart(); msgBk.restart(); msg = fExeMgr->read(); msgBk = msg; if ( msg.length() == 0 ) { cerr << "UpdatePackageProcessor::processPackage::fixupRows" << endl; logging::Message::Args args; logging::Message message(2); args.add("Update Failed: "); args.add("Lost connection to ExeMgr"); message.format(args); result.result = UPDATE_ERROR; result.message = message; //timer.finish(); //return rowsProcessed; break; } else { if (rowGroup.get() == NULL) { //This is mete data, need to send all PMs. metaData = true; //cout << "sending meta data" << endl; //timer.start("Meta"); err = processRowgroup(msgBk, result, uniqueId, cpackage, pmState, metaData, dbroot); rowGroup.reset(new rowgroup::RowGroup()); rowGroup->deserialize(msg); qb = 100; msg.restart(); msg << qb; fExeMgr->write(msg); metaData = false; //timer.stop("Meta"); continue; } rgData.deserialize(msg, true); rowGroup->setData(&rgData); //rowGroup->setData(const_cast<uint8_t*>(msg.buf())); err = (rowGroup->getStatus() != 0); if (err) { //msgBk.advance(rowGroup->getDataSize()); string errorMsg; msg >> errorMsg; logging::Message::Args args; logging::Message message(2); args.add("Update Failed: "); args.add(errorMsg); message.format(args); result.result = UPDATE_ERROR; result.message = message; DMLResult tmpResult; receiveAll( tmpResult, uniqueId, fPMs, pmState, tableOid); /* qb = 100; //@Bug 4358 get rid of broken pipe error. msg.restart(); msg << qb; fExeMgr->write(msg); */ //timer.finish(); //return rowsProcessed; //err = true; break; } if (rowGroup->getRGData() == NULL) { msg.restart(); } if (rowGroup->getRowCount() == 0) //done fetching { //timer.finish(); //need to receive all response err = receiveAll( result, uniqueId, fPMs, pmState, tableOid); //return rowsProcessed; break; } if (rowGroup->getBaseRid() == (uint64_t) (-1)) { continue; // @bug4247, not valid row ids, may from small side outer } dbroot = rowGroup->getDBRoot(); //cout << "dbroot in the rowgroup is " << dbroot << endl; //timer.start("processRowgroup"); err = processRowgroup(msgBk, result, uniqueId, cpackage, pmState, metaData, dbroot); //timer.stop("processRowgroup"); if (err) { //timer.finish(); LoggingID logid( DMLLoggingId, fSessionID, cpackage.get_TxnID()); logging::Message::Args args1; logging::Message msg1(1); args1.add("SQL statement erroring out, need to receive all messages from WES"); msg1.format( args1 ); logging::Logger logger(logid.fSubsysID); logger.logMessage(LOG_TYPE_DEBUG, msg1, logid); DMLResult tmpResult; receiveAll( tmpResult, uniqueId, fPMs, pmState, tableOid); logging::Message::Args args2; logging::Message msg2(1); args2.add("SQL statement erroring out, received all messages from WES"); msg2.format( args2 ); logger.logMessage(LOG_TYPE_DEBUG, msg2, logid); //@Bug 4358 get rid of broken pipe error. /* msg.restart(); msg << qb; fExeMgr->write(msg); return rowsProcessed; */ //err = true; break; } rowsProcessed += rowGroup->getRowCount(); } } if (fRollbackPending) { err = true; // Response to user cerr << "UpdatePackageProcessor::processPackage::fixupRows Rollback Pending" << endl; //@Bug 4994 Cancelled job is not error result.result = JOB_CANCELED; // Log LoggingID logid( DMLLoggingId, fSessionID, cpackage.get_TxnID()); logging::Message::Args args1; logging::Message msg1(1); args1.add("SQL statement canceled by user"); msg1.format( args1 ); logging::Logger logger(logid.fSubsysID); logger.logMessage(LOG_TYPE_DEBUG, msg1, logid); // Clean out the pipe; DMLResult tmpResult; receiveAll( tmpResult, uniqueId, fPMs, pmState, tableOid); } // get stats from ExeMgr if (!err) { qb = 3; msg.restart(); msg << qb; fExeMgr->write(msg); msg = fExeMgr->read(); msg >> result.queryStats; msg >> result.extendedStats; msg >> result.miniStats; result.stats.unserialize(msg); }
int64_t JoinPartition::convertToSplitMode() { int i, j; ByteStream bs; RGData rgData; uint32_t hash; uint64_t tmp; int64_t ret = -(int64_t)smallSizeOnDisk; // smallFile gets deleted boost::scoped_array<uint32_t> rowDist(new uint32_t[bucketCount]); uint32_t rowCount = 0; memset(rowDist.get(), 0, sizeof(uint32_t) * bucketCount); fileMode = false; htSizeEstimate = 0; smallSizeOnDisk = 0; buckets.reserve(bucketCount); for (i = 0; i < (int) bucketCount; i++) buckets.push_back(boost::shared_ptr<JoinPartition>(new JoinPartition(*this, false))); RowGroup &rg = smallRG; Row &row = smallRow; nextSmallOffset = 0; while (1) { readByteStream(0, &bs); if (bs.length() == 0) break; rgData.deserialize(bs); rg.setData(&rgData); for (j = 0; j < (int) rg.getRowCount(); j++) { rg.getRow(j, &row); if (antiWithMatchNulls && hasNullJoinColumn(row)) { if (needsAllNullRows || !gotNullRow) { for (j = 0; j < (int) bucketCount; j++) ret += buckets[j]->insertSmallSideRow(row); gotNullRow = true; } continue; } if (typelessJoin) hash = getHashOfTypelessKey(row, smallKeyCols, hashSeed) % bucketCount; else { if (UNLIKELY(row.isUnsigned(smallKeyCols[0]))) tmp = row.getUintField(smallKeyCols[0]); else tmp = row.getIntField(smallKeyCols[0]); hash = hasher((char *) &tmp, 8, hashSeed); hash = hasher.finalize(hash, 8) % bucketCount; } rowCount++; rowDist[hash]++; ret += buckets[hash]->insertSmallSideRow(row); } } boost::filesystem::remove(smallFilename); smallFilename.clear(); for (i = 0; i < (int) bucketCount; i++) if (rowDist[i] == rowCount) throw IDBExcept("All rows hashed to the same bucket", ERR_DBJ_DATA_DISTRIBUTION); rg.setData(&buffer); rg.resetRowGroup(0); rg.getRow(0, &row); return ret; }