/** * Perform operator-specific checks of input and return the shape of the output. Currently, * the output array must exist. * @param schemas the shapes of the input arrays * @param query the query context */ ArrayDesc inferSchema(std::vector< ArrayDesc> schemas, std::shared_ptr< Query> query) { SCIDB_ASSERT(schemas.size() == 1); SCIDB_ASSERT(_parameters.size() == 1); string arrayNameOrg = ((std::shared_ptr<OperatorParamReference>&)_parameters[0])->getObjectName(); SCIDB_ASSERT(ArrayDesc::isNameUnversioned(arrayNameOrg)); //Ensure attributes names uniqueness. std::string arrayName; std::string namespaceName; query->getNamespaceArrayNames(arrayNameOrg, namespaceName, arrayName); ArrayDesc dstDesc; ArrayDesc const& srcDesc = schemas[0]; ArrayID arrayId = query->getCatalogVersion(namespaceName, arrayName); bool fArrayDesc = scidb::namespaces::Communicator::getArrayDesc( namespaceName, arrayName, arrayId, dstDesc, false); if (!fArrayDesc) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_ARRAY_DOESNT_EXIST) << arrayName; } ArrayDesc::checkConformity(srcDesc, dstDesc, ArrayDesc::IGNORE_PSCHEME | ArrayDesc::IGNORE_OVERLAP | ArrayDesc::IGNORE_INTERVAL); // allows auto-repart() SCIDB_ASSERT(dstDesc.getId() == dstDesc.getUAId()); SCIDB_ASSERT(dstDesc.getName() == arrayName); SCIDB_ASSERT(dstDesc.getUAId() > 0); return dstDesc; }
//param desc --> the input array schema inline ArrayDesc createWindowDesc(ArrayDesc const& desc) { //get dimensions for output array Dimensions const& dims = desc.getDimensions(); Dimensions aggrDims(dims.size()); for (size_t i = 0; i < dims.size(); i++) { DimensionDesc const& srcDim = dims[i]; aggrDims[i] = DimensionDesc(srcDim.getBaseName(), srcDim.getNamesAndAliases(), srcDim.getStartMin(), srcDim.getCurrStart(), srcDim.getCurrEnd(), srcDim.getEndMax(), srcDim.getChunkInterval(), 0); } ArrayDesc output(desc.getName(), Attributes(), aggrDims); //get the aggregates, check if they make sense, make attributes for output array //_parameters[0~dims.size()*2-1] --> window boundaries, already get in inferSchema for (size_t i = dims.size()*2; i < _parameters.size(); i++) { boost::shared_ptr<scidb::OperatorParam> param = _parameters[i]; if ( param->getParamType() != PARAM_AGGREGATE_CALL) { throw USER_QUERY_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_WINDOW_ERROR5, _parameters[i]->getParsingContext()); } addAggregatedAttribute( (shared_ptr<OperatorParamAggregateCall> &) param, desc, output, true); } if ( desc.getEmptyBitmapAttribute()) //? { AttributeDesc const* eAttr = desc.getEmptyBitmapAttribute(); output.addAttribute(AttributeDesc(output.getAttributes().size(), eAttr->getName(), eAttr->getType(), eAttr->getFlags(), eAttr->getDefaultCompressionMethod())); } return output; }
inline ArrayDesc createWindowDesc(ArrayDesc const& desc) { Dimensions const& dims = desc.getDimensions(); Dimensions aggDims(dims.size()); for (size_t i = 0, n = dims.size(); i < n; i++) { DimensionDesc const& srcDim = dims[i]; aggDims[i] = DimensionDesc(srcDim.getBaseName(), srcDim.getNamesAndAliases(), srcDim.getStartMin(), srcDim.getCurrStart(), srcDim.getCurrEnd(), srcDim.getEndMax(), srcDim.getChunkInterval(), 0, srcDim.getType(), srcDim.getFlags(), srcDim.getMappingArrayName(), srcDim.getComment(), srcDim.getFuncMapOffset(), srcDim.getFuncMapScale()); } ArrayDesc output (desc.getName(), Attributes(), aggDims); for (size_t i = dims.size() * 2, size = _parameters.size(); i < size; i++) { addAggregatedAttribute( (shared_ptr <OperatorParamAggregateCall> &) _parameters[i], desc, output); } if ( desc.getEmptyBitmapAttribute()) { AttributeDesc const* eAtt = desc.getEmptyBitmapAttribute(); output.addAttribute(AttributeDesc(output.getAttributes().size(), eAtt->getName(), eAtt->getType(), eAtt->getFlags(), eAtt->getDefaultCompressionMethod())); } return output; }
//param desc --> the input array schema ArrayDesc createWindowDesc(ArrayDesc const& desc) { //get dimensions for output array Attributes const &attrs = desc.getAttributes(); /* Dimensions aggrDims(dims.size()); for (size_t i = 0; i < dims.size(); i++) { DimensionDesc const& srcDim = dims[i]; aggrDims[i] = DimensionDesc(srcDim.getBaseName(), srcDim.getNamesAndAliases(), srcDim.getStartMin(), srcDim.getCurrStart(), srcDim.getCurrEnd(), srcDim.getEndMax(), srcDim.getChunkInterval(), 0); } */ Attributes newAttributes; size_t n = 0; for (size_t i=desc.getDimensions().size()*2; i < _parameters.size()-1; i=i+2) { const AttributeDesc &attr = attrs[((boost::shared_ptr<OperatorParamReference>&)_parameters[i])->getObjectNo()]; newAttributes.push_back(AttributeDesc(n, attr.getName(), attr.getType(), attr.getFlags(), attr.getDefaultCompressionMethod(), attr.getAliases())); } return ArrayDesc(desc.getName(), newAttributes, desc.getDimensions()); }
ArrayDesc inferSchema(std::vector< ArrayDesc> schemas, boost::shared_ptr< Query> query) { assert(schemas.size() == 1); ArrayDesc const& srcDesc = schemas[0]; ArrayDesc dstDesc = ((boost::shared_ptr<OperatorParamSchema>&)_parameters[0])->getSchema(); //Compile a desc of all possible attributes (aggregate calls first) and source dimensions ArrayDesc aggregationDesc (srcDesc.getName(), Attributes(), srcDesc.getDimensions()); vector<string> aggregatedNames; //add aggregate calls first for (size_t i = 1; i < _parameters.size(); i++) { addAggregatedAttribute( (shared_ptr <OperatorParamAggregateCall>&) _parameters[i], srcDesc, aggregationDesc); string aggName = aggregationDesc.getAttributes()[aggregationDesc.getAttributes().size()-1].getName(); bool aggFound = false; BOOST_FOREACH(const AttributeDesc &dstAttr, dstDesc.getAttributes()) { if (dstAttr.getName() == aggName) { aggFound = true; break; } } if (!aggFound) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_ATTRIBUTE_DOESNT_EXIST) << aggName << dstDesc.getName(); } aggregatedNames.push_back(aggName); } //add other attributes BOOST_FOREACH(const AttributeDesc &srcAttr, srcDesc.getAttributes()) { //if there's an attribute with same name as an aggregate call - skip the attribute bool found = false; BOOST_FOREACH(const AttributeDesc &aggAttr, aggregationDesc.getAttributes()) { if( aggAttr.getName() == srcAttr.getName()) { found = true; } } if (!found) { aggregationDesc.addAttribute(AttributeDesc( aggregationDesc.getAttributes().size(), srcAttr.getName(), srcAttr.getType(), srcAttr.getFlags(), srcAttr.getDefaultCompressionMethod(), srcAttr.getAliases(), &srcAttr.getDefaultValue(), srcAttr.getDefaultValueExpr(), srcAttr.getVarSize())); } } //Ensure attributes names uniqueness. if (!dstDesc.getEmptyBitmapAttribute()) throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_REDIMENSION_ERROR1); BOOST_FOREACH(const AttributeDesc &dstAttr, dstDesc.getAttributes()) { BOOST_FOREACH(const AttributeDesc &srcAttr, aggregationDesc.getAttributes()) { if (srcAttr.getName() == dstAttr.getName()) { if (srcAttr.getType() != dstAttr.getType()) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_ATTRIBUTE_TYPE) << srcAttr.getName() << srcAttr.getType() << dstAttr.getType(); } if (!dstAttr.isNullable() && srcAttr.isNullable()) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_ATTRIBUTE_FLAGS) << srcAttr.getName(); } goto NextAttr; } } BOOST_FOREACH(const DimensionDesc &srcDim, aggregationDesc.getDimensions()) { if (srcDim.hasNameAndAlias(dstAttr.getName())) { if (dstAttr.getType() != TID_INT64) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_DESTINATION_ATTRIBUTE_TYPE) << dstAttr.getName() << TID_INT64; } if (dstAttr.getFlags() != 0) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_DESTINATION_ATTRIBUTE_FLAGS) << dstAttr.getName(); } goto NextAttr; } } if (dstAttr.isEmptyIndicator() == false) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_UNEXPECTED_DESTINATION_ATTRIBUTE) << dstAttr.getName(); } NextAttr:; } Dimensions outputDims; size_t nNewDims = 0; BOOST_FOREACH(const DimensionDesc &dstDim, dstDesc.getDimensions()) { if (dstDim.getChunkOverlap() > dstDim.getChunkInterval()) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OVERLAP_CANT_BE_LARGER_CHUNK); } BOOST_FOREACH(const AttributeDesc &srcAttr, aggregationDesc.getAttributes()) { if (dstDim.hasNameAndAlias(srcAttr.getName())) { for (size_t i = 0; i< aggregatedNames.size(); i++) { if (srcAttr.getName() == aggregatedNames[i]) throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_REDIMENSION_ERROR2); } if ( !IS_INTEGRAL(srcAttr.getType()) || srcAttr.getType() == TID_UINT64 ) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_SOURCE_ATTRIBUTE_TYPE) << srcAttr.getName() << TID_INT64; } outputDims.push_back(dstDim); goto NextDim; } } BOOST_FOREACH(const DimensionDesc &srcDim, aggregationDesc.getDimensions()) { if (srcDim.hasNameAndAlias(dstDim.getBaseName())) { DimensionDesc outputDim = dstDim; outputDims.push_back(outputDim); goto NextDim; } } //one synthetic dimension allowed if (nNewDims++ != 0 || !aggregatedNames.empty() ) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_UNEXPECTED_DESTINATION_DIMENSION) << dstDim.getBaseName(); } outputDims.push_back(dstDim); NextDim:; } return ArrayDesc(srcDesc.getName(), dstDesc.getAttributes(), outputDims, dstDesc.getFlags()); }
void InputArray::redistributeShadowArray(boost::shared_ptr<Query> const& query) { SCIDB_ASSERT(shadowArray); //All arrays are currently stored as round-robin. Let's store shadow arrays round-robin as well //TODO: revisit this when we allow users to store arrays with specified distributions PartitioningSchema ps = psHashPartitioned; ArrayDesc shadowArrayDesc = shadowArray->getArrayDesc(); string shadowArrayVersionName; LOG4CXX_DEBUG(logger, "Redistribute shadow array " << shadowArrayDesc.getName()); if (! query->isCoordinator()) { // worker string shadowArrayName = shadowArrayDesc.getName(); SCIDB_ASSERT(ArrayDesc::isNameUnversioned(shadowArrayName)); shared_ptr<SystemCatalog::LockDesc> lock(new SystemCatalog::LockDesc(shadowArrayName, query->getQueryID(), Cluster::getInstance()->getLocalInstanceId(), SystemCatalog::LockDesc::WORKER, SystemCatalog::LockDesc::WR)); shared_ptr<Query::ErrorHandler> ptr(new UpdateErrorHandler(lock)); query->pushErrorHandler(ptr); Query::Finalizer f = bind(&UpdateErrorHandler::releaseLock, lock, _1); query->pushFinalizer(f); SystemCatalog::ErrorChecker errorChecker = bind(&Query::validate, query); if (!SystemCatalog::getInstance()->lockArray(lock, errorChecker)) { throw USER_EXCEPTION(SCIDB_SE_SYSCAT, SCIDB_LE_CANT_INCREMENT_LOCK) << shadowArrayName; } ArrayDesc desc; bool arrayExists = SystemCatalog::getInstance()->getArrayDesc(shadowArrayName, desc, false); VersionID lastVersion = 0; if (arrayExists) { lastVersion = SystemCatalog::getInstance()->getLastVersion(desc.getId()); } VersionID version = lastVersion+1; lock->setArrayVersion(version); bool rc = SystemCatalog::getInstance()->updateArrayLock(lock); SCIDB_ASSERT(rc); LOG4CXX_DEBUG(logger, "Use version " << version << " of shadow array " << shadowArrayName); shadowArrayVersionName = ArrayDesc::makeVersionedName(shadowArrayName, version); } else { // coordinator shadowArrayVersionName = shadowArrayDesc.getName(); SCIDB_ASSERT(ArrayDesc::isNameVersioned(shadowArrayVersionName)); } shared_ptr<Array> persistentShadowArray(DBArray::newDBArray(shadowArrayVersionName, query)); ArrayDesc const& dstArrayDesc = persistentShadowArray->getArrayDesc(); query->getReplicationContext()->enableInboundQueue(dstArrayDesc.getId(), persistentShadowArray); set<Coordinates, CoordinatesLess> newChunkCoordinates; redistributeToArray(shadowArray, persistentShadowArray, &newChunkCoordinates, query, ps, ALL_INSTANCE_MASK, boost::shared_ptr <DistributionMapper>(), 0, shared_ptr<PartitioningSchemaData>()); StorageManager::getInstance().removeDeadChunks(dstArrayDesc, newChunkCoordinates, query); query->getReplicationContext()->replicationSync(dstArrayDesc.getId()); query->getReplicationContext()->removeInboundQueue(dstArrayDesc.getId()); StorageManager::getInstance().flush(); PhysicalBoundaries bounds = PhysicalBoundaries::createFromChunkList(persistentShadowArray, newChunkCoordinates); SystemCatalog::getInstance()->updateArrayBoundaries(dstArrayDesc, bounds); // XXX TODO: add: getInjectedErrorListener().check(); }