inline void convertCoordinates(Coordinates const& srcPos, Dimensions const& srcDims, Coordinates& dstPos, Dimensions const& dstDims) { Coordinate offset = 0; for (size_t i = 0, n = srcDims.size(); i < n; i++) { offset *= srcDims[i].getLength(); offset += srcPos[i] - srcDims[i].getStart(); } for (int i = dstDims.size(); --i >= 0;) { dstPos[i] = dstDims[i].getStart() + (offset % dstDims[i].getLength()); offset /= dstDims[i].getLength(); } }
ArrayDesc inferSchema(std::vector<ArrayDesc> schemas, boost::shared_ptr<Query> query) { ArrayDesc const& input = schemas[0]; assert(schemas.size() == 1); string attName = _parameters.size() > 0 ? ((boost::shared_ptr<OperatorParamReference>&)_parameters[0])->getObjectName() : input.getAttributes()[0].getName(); AttributeID inputAttributeID = 0; bool found = false; BOOST_FOREACH(const AttributeDesc& att, input.getAttributes()) { if (att.getName() == attName) { found = true; inputAttributeID = att.getId(); } } if (!found) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_DLA_ERROR14); } AttributeDesc rankedAttribute = input.getAttributes()[inputAttributeID]; if (rankedAttribute.isEmptyIndicator()) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_DLA_ERROR15); } Dimensions dims = input.getDimensions(); if (_parameters.size()>1) { vector<int> groupBy(_parameters.size()-1); size_t i, j; for (i = 0; i < _parameters.size() - 1; i++) { const string& dimName = ((boost::shared_ptr<OperatorParamReference>&)_parameters[i + 1])->getObjectName(); const string& dimAlias = ((boost::shared_ptr<OperatorParamReference>&)_parameters[i + 1])->getArrayName(); for (j = 0; j < dims.size(); j++) { if (dims[j].hasNameAndAlias(dimName, dimAlias)) { break; } } if (j >= dims.size()) throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_DIMENSION_NOT_EXIST) << dimName; } } return getRankingSchema(input, inputAttributeID); }
void ConstRLEChunk::initialize(ArrayDesc const * desc, const Address &address, int compMethod) { _hasOverlap = false; _compressionMethod = compMethod; _arrayDesc = desc; _firstPositionNoOlap = address.coords; _addr = address; Dimensions dim = desc->getDimensions(); _firstPosition.clear(); _lastPositionNoOlap.clear(); _lastPosition.clear(); _chunkIntervals.clear(); for (uint32_t i = 0; i < dim.size(); ++i) { if (dim[i].getChunkOverlap()) { _hasOverlap = true; } _firstPosition.push_back( std::max<Coordinate>(_firstPositionNoOlap[i] - dim[i].getChunkOverlap(), dim[i].getStart())); _lastPosition.push_back( std::min<Coordinate>(_firstPositionNoOlap[i] + dim[i].getChunkInterval() + 2 * dim[i].getChunkOverlap() - 1, dim[i].getEndMax())); _lastPositionNoOlap.push_back( std::min<Coordinate>(_firstPositionNoOlap[i] + dim[i].getChunkInterval() - 1, dim[i].getEndMax())); _chunkIntervals.push_back(_lastPosition[i] - _firstPosition[i] + 1); } }
void log4cxx_debug_dimensions(const std::string& prefix, const Dimensions& dims) { if(logger->isDebugEnabled()) { for (size_t i=0; i<dims.size(); i++) { LOG4CXX_DEBUG(logger, prefix << " dims["<<i<<"] from " << dims[i].getStartMin() << " to " << dims[i].getEndMax()); } } }
virtual PhysicalBoundaries getOutputBoundaries( std::vector<PhysicalBoundaries> const& inputBoundaries, std::vector< ArrayDesc> const& inputSchemas) const { if (inputBoundaries[0].isEmpty()) { return PhysicalBoundaries::createEmpty(_schema.getDimensions().size()); } Coordinates newStart, newEnd; Coordinates inStart = inputBoundaries[0].getStartCoords(); Coordinates inEnd = inputBoundaries[0].getEndCoords(); Dimensions dims = inputSchemas[0].getDimensions(); size_t nDims = dims.size(); size_t nParams = _parameters.size(); std::vector<std::string> sliceDimName(nParams/2); for (size_t i = 0; i < nParams; i+=2) { sliceDimName[i >> 1] = ((std::shared_ptr<OperatorParamReference>&)_parameters[i])->getObjectName(); } for (size_t i = 0; i < nDims; i++) { const std::string dimName = dims[i].getBaseName(); int k = safe_static_cast<int>(sliceDimName.size()); while (--k >= 0 && sliceDimName[k] != dimName && !(sliceDimName[k][0] == '_' && (size_t)atoi(sliceDimName[k].c_str()+1) == i+1)) ; if (k < 0) { //dimension i is present in output newStart.push_back(inStart[i]); newEnd.push_back(inEnd[i]); } else { //dimension i is not present in output; check value Coordinate slice = ((std::shared_ptr<OperatorParamPhysicalExpression>&)_parameters[k*2+1])->getExpression()->evaluate().getInt64(); if (!inputBoundaries[0].isInsideBox(slice,i)) { //the slice value is outside the box; guess what - the result is an empty array return PhysicalBoundaries::createEmpty(_schema.getDimensions().size()); } } } // This does nothing but calculate a few local values // and then discard them. // // double resultCells = PhysicalBoundaries::getNumCells(newStart, newEnd); // double origCells = inputBoundaries[0].getNumCells(); // double newDensity = 1.0; // if (resultCells > 0.0) // { // newDensity = inputBoundaries[0].getDensity() * origCells / resultCells; // newDensity = newDensity > 1.0 ? 1.0 : newDensity; // } return PhysicalBoundaries(newStart, newEnd); }
ArrayDesc inferSchema(std::vector< ArrayDesc> schemas, boost::shared_ptr< Query> query) { assert(schemas.size() == 2); ArrayDesc const& patternDesc = schemas[0]; ArrayDesc const& catalogDesc = schemas[1]; Attributes const& catalogAttributes = catalogDesc.getAttributes(true); Dimensions const& catalogDimensions = catalogDesc.getDimensions(); Attributes const& patternAttributes = patternDesc.getAttributes(true); Dimensions resultDimensions = patternDesc.getDimensions(); size_t totalAttributes = catalogAttributes.size() + patternAttributes.size() + 1 + catalogDimensions.size(); Attributes matchAttributes(totalAttributes); if (catalogDimensions.size() != resultDimensions.size()) { stringstream left, right; printDimNames(left, resultDimensions); printDimNames(right, catalogDimensions); throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_DIMENSION_COUNT_MISMATCH) << "match" << left.str() << right.str(); } for (size_t i = 0, n = catalogDimensions.size(); i < n; i++) { if (!(catalogDimensions[i].getStartMin() == resultDimensions[i].getStartMin() && catalogDimensions[i].getChunkInterval() == resultDimensions[i].getChunkInterval() && catalogDimensions[i].getChunkOverlap() == resultDimensions[i].getChunkOverlap())) { // XXX To do: implement requiresRepart() method, remove interval/overlap checks // above, use SCIDB_LE_START_INDEX_MISMATCH here. throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_ARRAYS_NOT_CONFORMANT); } } size_t j = 0; for (size_t i = 0, n = patternAttributes.size(); i < n; i++, j++) { AttributeDesc const& attr = patternAttributes[i]; matchAttributes[j] = AttributeDesc(j, attr.getName(), attr.getType(), attr.getFlags(), attr.getDefaultCompressionMethod(), attr.getAliases(), &attr.getDefaultValue(), attr.getDefaultValueExpr()); } for (size_t i = 0, n = catalogAttributes.size(); i < n; i++, j++) { AttributeDesc const& attr = catalogAttributes[i]; matchAttributes[j] = AttributeDesc(j, "match_" + attr.getName(), attr.getType(), attr.getFlags(), attr.getDefaultCompressionMethod(), attr.getAliases(), &attr.getDefaultValue(), attr.getDefaultValueExpr()); } for (size_t i = 0, n = catalogDimensions.size(); i < n; i++, j++) { matchAttributes[j] = AttributeDesc(j, "match_" + catalogDimensions[i].getBaseName(), TID_INT64, 0, 0); } matchAttributes[j] = AttributeDesc(j, DEFAULT_EMPTY_TAG_ATTRIBUTE_NAME, TID_INDICATOR, AttributeDesc::IS_EMPTY_INDICATOR, 0); int64_t maxCollisions = evaluate(((boost::shared_ptr<OperatorParamLogicalExpression>&)_parameters[1])->getExpression(), query, TID_INT64).getInt64(); if (maxCollisions <= 0 || (int32_t)maxCollisions != maxCollisions) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_OPERATOR_ARGUMENT2) << "positive"; } resultDimensions.push_back(DimensionDesc("collision", 0, 0, maxCollisions-1, maxCollisions-1, (uint32_t)maxCollisions, 0)); return ArrayDesc("match", matchAttributes, resultDimensions); }
ArrayDesc inferSchema(std::vector< ArrayDesc> schemas, boost::shared_ptr< Query> query) { assert(schemas.size() >= 2); assert(_parameters.size() == 0); Attributes const& leftAttributes = schemas[0].getAttributes(); Dimensions const& leftDimensions = schemas[0].getDimensions(); Attributes const* newAttributes = &leftAttributes; Dimensions newDims = leftDimensions; size_t nDims = newDims.size(); for (size_t j = 1; j < schemas.size(); j++) { Attributes const& rightAttributes = schemas[j].getAttributes(); Dimensions const& rightDimensions = schemas[j].getDimensions(); if (nDims != rightDimensions.size()) throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_ARRAYS_NOT_CONFORMANT); for (size_t i = 0; i < nDims; i++) { if ( leftDimensions[i].getStart() != rightDimensions[i].getStart() || leftDimensions[i].getChunkInterval() != rightDimensions[i].getChunkInterval() || leftDimensions[i].getChunkOverlap() != rightDimensions[i].getChunkOverlap()) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_ARRAYS_NOT_CONFORMANT); } DimensionDesc& dim = newDims[i]; dim = DimensionDesc(dim.getBaseName(), dim.getNamesAndAliases(), min(dim.getStartMin(), rightDimensions[i].getStartMin()), min(dim.getCurrStart(), rightDimensions[i].getCurrStart()), max(dim.getCurrEnd(), rightDimensions[i].getCurrEnd()), max(dim.getEndMax(), rightDimensions[i].getEndMax()), dim.getChunkInterval(), dim.getChunkOverlap()); } if (leftAttributes.size() != rightAttributes.size() && (leftAttributes.size() != rightAttributes.size()+1 || !leftAttributes[leftAttributes.size()-1].isEmptyIndicator()) && (leftAttributes.size()+1 != rightAttributes.size() || !rightAttributes[rightAttributes.size()-1].isEmptyIndicator())) throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_ARRAYS_NOT_CONFORMANT); size_t nAttrs = min(leftAttributes.size(), rightAttributes.size()); if (rightAttributes.size() > newAttributes->size()) { newAttributes = &rightAttributes; } for (size_t i = 0; i < nAttrs; i++) { if (leftAttributes[i].getType() != rightAttributes[i].getType() || leftAttributes[i].getFlags() != rightAttributes[i].getFlags()) throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_ARRAYS_NOT_CONFORMANT); } } return ArrayDesc(schemas[0].getName(), *newAttributes, newDims); }
InstanceID getPrimaryInstanceForChunk(PartitioningSchema ps, Coordinates const& chunkPosition, Dimensions const& dims, size_t nInstancesOriginal) { const size_t instanceCount = nInstancesOriginal; // to make meld diff well. remove after first check-in InstanceID destInstanceId; switch (ps) { // // persistable cases: valid cases for ArrayDesc::getPrimaryInstanceForChunk() // these do, or soon will, return true from ArrayDesc::isPersistable() and // so can have their arrays entered in the catalog, and stored in TEMP and DB-Arrays. // Constrast with non-persistable cases further below. // case psReplication: { destInstanceId = ALL_INSTANCE_MASK; } case psHashPartitioned: { destInstanceId = getHashedChunkNumber(dims, chunkPosition) % instanceCount; break; } case psByRow: { uint64_t dim0Length = dims[0].getLength(); destInstanceId = ((chunkPosition)[0] - dims[0].getStartMin()) / dims[0].getChunkInterval() / (((dim0Length + dims[0].getChunkInterval() - 1) / dims[0].getChunkInterval() + instanceCount - 1) / instanceCount); break; } case psByCol: { uint64_t dim1Length = dims.size() > 1 ? dims[1].getLength() : 0; if (dims.size() > 1) { destInstanceId = ((chunkPosition)[1] - dims[1].getStartMin()) / dims[1].getChunkInterval() / (((dim1Length + dims[1].getChunkInterval() - 1) / dims[1].getChunkInterval() + instanceCount - 1) / instanceCount); } else { destInstanceId = 0; //XXX TODO Tigor ; you wanted a comment because you wanted to look at this line } break; } // // Non-persistable cases. // These are not mappable to instanceIds given the currently persisted array information. // They are used as arguments to redistributeXxxx() and their mapping typically // involves the use of additional "psData". They are not compatible with this // function. // TODO: clean up the bracing and verbosity after first checkin // it is "to make meld diff well. remove after first check-in" case psScaLAPACK: { ASSERT_EXCEPTION_FALSE("getPrimaryInstanceForChunk: psScaLAPACK not permitted in stored arrays"); break; // NOTREACHED } case psLocalInstance: { ASSERT_EXCEPTION_FALSE("getPrimaryInstanceForChunk: psLocalInstance not permitted in stored arrays"); break; // NOTREACHED } case psGroupby: { ASSERT_EXCEPTION_FALSE("getPrimaryInstanceForChunk: psGroupby not permitted in stored arrays"); break; // NOTREACHED } case psUndefined: { ASSERT_EXCEPTION_FALSE("getPrimaryInstanceForChunk: psUndefined not permitted in stored arrays"); break; // NOTREACHED } case psUninitialized: default: { ASSERT_EXCEPTION_FALSE("getPrimaryInstanceForChunk: internal error, an unknown PartitioningSchema was supplied."); break; // NOTREACHED } } // TODO: end region of bracing, tabbing, and verbosity cleanup. return destInstanceId; }
ArrayDesc inferSchema(std::vector< ArrayDesc> schemas, std::shared_ptr< Query> query) { // Matched dimensions must come in pairs. if ((_parameters.size() & 1) != 0) { throw USER_EXCEPTION(SCIDB_SE_OPERATOR, SCIDB_LE_OP_CROSSJOIN_ERROR2); } assert(schemas.size() == 2); // Names for things... leftFoo, rightFoo. ArrayDesc const& leftArrayDesc = schemas[0]; ArrayDesc const& rightArrayDesc = schemas[1]; Attributes const& leftAttributes = leftArrayDesc.getAttributes(); Dimensions leftDimensions = leftArrayDesc.getDimensions(); Attributes const& rightAttributes = rightArrayDesc.getAttributes(); Dimensions const& rightDimensions = rightArrayDesc.getDimensions(); size_t totalAttributes = leftAttributes.size() + rightAttributes.size(); AttributeDesc const* leftBitmap = leftArrayDesc.getEmptyBitmapAttribute(); AttributeDesc const* rightBitmap = rightArrayDesc.getEmptyBitmapAttribute(); if (leftBitmap && rightBitmap) { totalAttributes -= 1; } // Accumulate the result attributes. Prefer the rightBitmap if present; if not then the // leftBitmap; if not then oh well. Attributes CrossJoinAttributes(totalAttributes); AttributeID j = 0; for (size_t i = 0, n = leftAttributes.size(); i < n; i++) { AttributeDesc const& attr = leftAttributes[i]; if (!attr.isEmptyIndicator()) { CrossJoinAttributes[j] = AttributeDesc( j, attr.getName(), attr.getType(), attr.getFlags(), attr.getDefaultCompressionMethod(), attr.getAliases(), &attr.getDefaultValue(), attr.getDefaultValueExpr()); CrossJoinAttributes[j].addAlias(leftArrayDesc.getName()); j += 1; } } for (size_t i = 0, n = rightAttributes.size(); i < n; i++, j++) { AttributeDesc const& attr = rightAttributes[i]; CrossJoinAttributes[j] = AttributeDesc( j, attr.getName(), attr.getType(), attr.getFlags(), attr.getDefaultCompressionMethod(), attr.getAliases(), &attr.getDefaultValue(), attr.getDefaultValueExpr()); CrossJoinAttributes[j].addAlias(rightArrayDesc.getName()); } if (leftBitmap && !rightBitmap) { AttributeDesc const& attr = *leftBitmap; CrossJoinAttributes[j] = AttributeDesc( j, attr.getName(), attr.getType(), attr.getFlags(), attr.getDefaultCompressionMethod(), attr.getAliases(), &attr.getDefaultValue(), attr.getDefaultValueExpr()); CrossJoinAttributes[j].addAlias(leftArrayDesc.getName()); } size_t nRightDims = rightDimensions.size(); size_t nLeftDims = leftDimensions.size(); vector<ssize_t> CrossJoinOnDimensions(nRightDims, -1); std::bitset<MAX_NUM_DIMS_SUPPORTED> leftCrossJoinOnMask; std::bitset<MAX_NUM_DIMS_SUPPORTED> rightCrossJoinOnMask; // For each pair of matched dimensions... for (size_t p = 0, np = _parameters.size(); p < np; p += 2) { std::shared_ptr<OperatorParamDimensionReference> leftDim = (std::shared_ptr<OperatorParamDimensionReference>&)_parameters[p]; std::shared_ptr<OperatorParamDimensionReference> rightDim = (std::shared_ptr<OperatorParamDimensionReference>&)_parameters[p+1]; const string &leftDimName = leftDim->getObjectName(); const string &rightDimName = rightDim->getObjectName(); const string &leftDimArray = leftDim->getArrayName(); const string &rightDimArray = rightDim->getArrayName(); // Get left dimension index, make sure it's not a repeat. ssize_t l = leftArrayDesc.findDimension(leftDimName, leftDimArray); if (l < 0) { throw USER_QUERY_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_DIMENSION_NOT_EXIST, leftDim->getParsingContext()) << leftDimName << "lefthand" << leftDimensions; } if (leftCrossJoinOnMask.test(l)) { // Dimension should be specified only once in parameter list. throw USER_QUERY_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_CROSSJOIN_ERROR1, leftDim->getParsingContext()); } leftCrossJoinOnMask.set(l); // Get right dimension index, make sure it's not a repeat. ssize_t r = rightArrayDesc.findDimension(rightDimName, rightDimArray); if (r < 0) { throw USER_QUERY_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_DIMENSION_NOT_EXIST, rightDim->getParsingContext()) << rightDimName << "righthand" << rightDimensions; } if (rightCrossJoinOnMask.test(r)) { // Dimension should be specified only once in parameter list. throw USER_QUERY_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_CROSSJOIN_ERROR1, rightDim->getParsingContext()); } rightCrossJoinOnMask.set(r); // Differences in chunk size and overlap are now handled // via PhysicalCrossJoin::requiresRedimensionOrRepartition(). if (leftDimensions[l].getStartMin() != rightDimensions[r].getStartMin()) { ostringstream ss; ss << leftDimensions[l] << " != " << rightDimensions[r]; throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_START_INDEX_MISMATCH) << ss.str(); } // Build (r --> l) "joinOn" map of matched dimensions. if (CrossJoinOnDimensions[r] >= 0) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_CROSSJOIN_ERROR1); } CrossJoinOnDimensions[r] = l; } // Build result dimensions descriptor vector. Left dimensions are untouched, right // dimensions are the unmatched ones---those not part of a matched parameter pair. _fixer.clear(); j = 0; Dimensions CrossJoinDimensions(nLeftDims + nRightDims - _parameters.size()/2); // All of the left Dimensions are in the CrossJoinDimensions for (size_t i = 0; i < nLeftDims; i++) { CrossJoinDimensions[j] = leftDimensions[i]; CrossJoinDimensions[j].addAlias(leftArrayDesc.getName()); _fixer.takeDimension(j).fromArray(0).fromDimension(i); ++j; } for (size_t i = 0; i < nRightDims; i++) { // If the rightDimension is not part of the parameter pairs, add it // to the CrossJoinDimensions. if (CrossJoinOnDimensions[i] < 0) { CrossJoinDimensions[j] = rightDimensions[i]; CrossJoinDimensions[j].addAlias(rightArrayDesc.getName()); _fixer.takeDimension(j).fromArray(1).fromDimension(i); ++j; } else { // We are joining on this right-dimension. Adjust the paired left-dimension // descriptor so that it covers its *intersection* with the paired right-dimension. // (We use the smaller of the two dimension lengths... and the smallest overlap.) DimensionDesc& d = CrossJoinDimensions[CrossJoinOnDimensions[i]]; DimensionDesc const& right = rightDimensions[i]; Coordinate newCurrStart = max(d.getCurrStart(), right.getCurrStart()); Coordinate newCurrEnd = min(d.getCurrEnd(), right.getCurrEnd()); Coordinate newEndMax = min(d.getEndMax(), right.getEndMax()); d.setCurrStart(newCurrStart); d.setCurrEnd(newCurrEnd); d.setEndMax(newEndMax); d.setChunkOverlap(min(d.getChunkOverlap(), right.getChunkOverlap())); } } const std::string &leftName = leftArrayDesc.getName(); const std::string &rightName = rightArrayDesc.getName(); std::string newName; if(ArrayDesc::isQualifiedArrayName(leftName) || ArrayDesc::isQualifiedArrayName(rightName)) { newName = leftArrayDesc.getName() + std::string("~") + rightArrayDesc.getName(); } else { newName = leftArrayDesc.getName() + rightArrayDesc.getName(); } return ArrayDesc(newName, CrossJoinAttributes, CrossJoinDimensions, createDistribution(psUndefined), leftArrayDesc.getResidency()); }