ArrayDesc inferSchema(std::vector< ArrayDesc> schemas, boost::shared_ptr< Query> query) { assert(schemas.size() == 1); ArrayDesc const& srcDesc = schemas[0]; ArrayDesc dstDesc = ((boost::shared_ptr<OperatorParamSchema>&)_parameters[0])->getSchema(); //Compile a desc of all possible attributes (aggregate calls first) and source dimensions ArrayDesc aggregationDesc (srcDesc.getName(), Attributes(), srcDesc.getDimensions()); vector<string> aggregatedNames; //add aggregate calls first for (size_t i = 1; i < _parameters.size(); i++) { addAggregatedAttribute( (shared_ptr <OperatorParamAggregateCall>&) _parameters[i], srcDesc, aggregationDesc); aggregatedNames.push_back(aggregationDesc.getAttributes()[aggregationDesc.getAttributes().size()-1].getName()); } //add other attributes BOOST_FOREACH(const AttributeDesc &srcAttr, srcDesc.getAttributes()) { //if there's an attribute with same name as an aggregate call - skip the attribute bool found = false; BOOST_FOREACH(const AttributeDesc &aggAttr, aggregationDesc.getAttributes()) { if( aggAttr.getName() == srcAttr.getName()) { found = true; } } if (!found) { aggregationDesc.addAttribute(AttributeDesc( aggregationDesc.getAttributes().size(), srcAttr.getName(), srcAttr.getType(), srcAttr.getFlags(), srcAttr.getDefaultCompressionMethod(), srcAttr.getAliases(), &srcAttr.getDefaultValue(), srcAttr.getDefaultValueExpr(), srcAttr.getComment(), srcAttr.getVarSize())); } } //Ensure attributes names uniqueness. if (!dstDesc.getEmptyBitmapAttribute()) throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_REDIMENSION_ERROR1); BOOST_FOREACH(const AttributeDesc &dstAttr, dstDesc.getAttributes()) { BOOST_FOREACH(const AttributeDesc &srcAttr, aggregationDesc.getAttributes()) { if (srcAttr.getName() == dstAttr.getName()) { if (srcAttr.getType() != dstAttr.getType()) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_ATTRIBUTE_TYPE) << srcAttr.getName() << srcAttr.getType() << dstAttr.getType(); } if (!dstAttr.isNullable() && srcAttr.isNullable()) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_ATTRIBUTE_FLAGS) << srcAttr.getName(); } goto NextAttr; } } BOOST_FOREACH(const DimensionDesc &srcDim, aggregationDesc.getDimensions()) { if (srcDim.hasNameOrAlias(dstAttr.getName())) { if (dstAttr.getType() != TID_INT64) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_DESTINATION_ATTRIBUTE_TYPE) << dstAttr.getName() << TID_INT64; } if (srcDim.getType() != TID_INT64) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_SOURCE_DIMENSION_TYPE) << dstAttr.getName() << TID_INT64; } if (dstAttr.getFlags() != 0) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_DESTINATION_ATTRIBUTE_FLAGS) << dstAttr.getName(); } goto NextAttr; } } if (dstAttr.isEmptyIndicator() == false) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_UNEXPECTED_DESTINATION_ATTRIBUTE) << dstAttr.getName(); } NextAttr:; } BOOST_FOREACH(const DimensionDesc &dstDim, dstDesc.getDimensions()) { if (dstDim.getChunkOverlap() != 0) throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_REDIMENSION_STORE_ERROR3); BOOST_FOREACH(const AttributeDesc &srcAttr, aggregationDesc.getAttributes()) { if (dstDim.hasNameOrAlias(srcAttr.getName())) { for (size_t i = 0; i< aggregatedNames.size(); i++) { if (srcAttr.getName() == aggregatedNames[i]) throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_REDIMENSION_ERROR2); } if (srcAttr.getType() != TID_INT64) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_SOURCE_ATTRIBUTE_TYPE) << srcAttr.getName() << TID_INT64; } if (srcAttr.getFlags() != 0) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_SOURCE_ATTRIBUTE_FLAGS) << srcAttr.getName() << TID_INT64; } if (dstDim.getType() != TID_INT64) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_DESTINATION_DIMENSION_TYPE) << srcAttr.getName() << TID_INT64; } goto NextDim; } } BOOST_FOREACH(const DimensionDesc &srcDim, aggregationDesc.getDimensions()) { if (srcDim.hasNameOrAlias(dstDim.getBaseName())) { if (dstDim.getType() != srcDim.getType() || dstDim.getStart() != srcDim.getStart() || dstDim.getLength() != srcDim.getLength() || dstDim.getChunkInterval() != srcDim.getChunkInterval()) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_DIMENSIONS_DONT_MATCH) << srcDim.getBaseName() << dstDim.getBaseName(); } goto NextDim; } } throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_UNEXPECTED_DESTINATION_DIMENSION) << dstDim.getBaseName(); NextDim:; } return ArrayDesc(srcDesc.getName()+"_redimension", dstDesc.getAttributes(), dstDesc.getDimensions(), dstDesc.getFlags()); }
ArrayDesc inferSchema(std::vector< ArrayDesc> schemas, boost::shared_ptr< Query> query) { assert(schemas.size() == 1); ArrayDesc const& srcDesc = schemas[0]; ArrayDesc dstDesc = ((boost::shared_ptr<OperatorParamSchema>&)_parameters[0])->getSchema(); //Compile a desc of all possible attributes (aggregate calls first) and source dimensions ArrayDesc aggregationDesc (srcDesc.getName(), Attributes(), srcDesc.getDimensions()); vector<string> aggregatedNames; //add aggregate calls first for (size_t i = 1; i < _parameters.size(); i++) { addAggregatedAttribute( (shared_ptr <OperatorParamAggregateCall>&) _parameters[i], srcDesc, aggregationDesc); string aggName = aggregationDesc.getAttributes()[aggregationDesc.getAttributes().size()-1].getName(); bool aggFound = false; BOOST_FOREACH(const AttributeDesc &dstAttr, dstDesc.getAttributes()) { if (dstAttr.getName() == aggName) { aggFound = true; break; } } if (!aggFound) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_ATTRIBUTE_DOESNT_EXIST) << aggName << dstDesc.getName(); } aggregatedNames.push_back(aggName); } //add other attributes BOOST_FOREACH(const AttributeDesc &srcAttr, srcDesc.getAttributes()) { //if there's an attribute with same name as an aggregate call - skip the attribute bool found = false; BOOST_FOREACH(const AttributeDesc &aggAttr, aggregationDesc.getAttributes()) { if( aggAttr.getName() == srcAttr.getName()) { found = true; } } if (!found) { aggregationDesc.addAttribute(AttributeDesc( aggregationDesc.getAttributes().size(), srcAttr.getName(), srcAttr.getType(), srcAttr.getFlags(), srcAttr.getDefaultCompressionMethod(), srcAttr.getAliases(), &srcAttr.getDefaultValue(), srcAttr.getDefaultValueExpr(), srcAttr.getVarSize())); } } //Ensure attributes names uniqueness. if (!dstDesc.getEmptyBitmapAttribute()) throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_REDIMENSION_ERROR1); BOOST_FOREACH(const AttributeDesc &dstAttr, dstDesc.getAttributes()) { BOOST_FOREACH(const AttributeDesc &srcAttr, aggregationDesc.getAttributes()) { if (srcAttr.getName() == dstAttr.getName()) { if (srcAttr.getType() != dstAttr.getType()) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_ATTRIBUTE_TYPE) << srcAttr.getName() << srcAttr.getType() << dstAttr.getType(); } if (!dstAttr.isNullable() && srcAttr.isNullable()) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_ATTRIBUTE_FLAGS) << srcAttr.getName(); } goto NextAttr; } } BOOST_FOREACH(const DimensionDesc &srcDim, aggregationDesc.getDimensions()) { if (srcDim.hasNameAndAlias(dstAttr.getName())) { if (dstAttr.getType() != TID_INT64) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_DESTINATION_ATTRIBUTE_TYPE) << dstAttr.getName() << TID_INT64; } if (dstAttr.getFlags() != 0) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_DESTINATION_ATTRIBUTE_FLAGS) << dstAttr.getName(); } goto NextAttr; } } if (dstAttr.isEmptyIndicator() == false) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_UNEXPECTED_DESTINATION_ATTRIBUTE) << dstAttr.getName(); } NextAttr:; } Dimensions outputDims; size_t nNewDims = 0; BOOST_FOREACH(const DimensionDesc &dstDim, dstDesc.getDimensions()) { if (dstDim.getChunkOverlap() > dstDim.getChunkInterval()) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OVERLAP_CANT_BE_LARGER_CHUNK); } BOOST_FOREACH(const AttributeDesc &srcAttr, aggregationDesc.getAttributes()) { if (dstDim.hasNameAndAlias(srcAttr.getName())) { for (size_t i = 0; i< aggregatedNames.size(); i++) { if (srcAttr.getName() == aggregatedNames[i]) throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_REDIMENSION_ERROR2); } if ( !IS_INTEGRAL(srcAttr.getType()) || srcAttr.getType() == TID_UINT64 ) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_SOURCE_ATTRIBUTE_TYPE) << srcAttr.getName() << TID_INT64; } outputDims.push_back(dstDim); goto NextDim; } } BOOST_FOREACH(const DimensionDesc &srcDim, aggregationDesc.getDimensions()) { if (srcDim.hasNameAndAlias(dstDim.getBaseName())) { DimensionDesc outputDim = dstDim; outputDims.push_back(outputDim); goto NextDim; } } //one synthetic dimension allowed if (nNewDims++ != 0 || !aggregatedNames.empty() ) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_UNEXPECTED_DESTINATION_DIMENSION) << dstDim.getBaseName(); } outputDims.push_back(dstDim); NextDim:; } return ArrayDesc(srcDesc.getName(), dstDesc.getAttributes(), outputDims, dstDesc.getFlags()); }
/** * Perform operator-specific checks of input and return the shape of the output. Currently, * the output array must exist. * @param schemas the shapes of the input arrays * @param query the query context */ ArrayDesc inferSchema(std::vector< ArrayDesc> schemas, shared_ptr< Query> query) { assert(schemas.size() == 1); assert(_parameters.size() == 1); string arrayName = ((shared_ptr<OperatorParamReference>&)_parameters[0])->getObjectName(); ArrayDesc const& srcDesc = schemas[0]; //Ensure attributes names uniqueness. ArrayDesc dstDesc; if (!SystemCatalog::getInstance()->getArrayDesc(arrayName, dstDesc, false)) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_ARRAY_DOESNT_EXIST) << arrayName; } if(dstDesc.isImmutable()) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_ILLEGAL_OPERATION) << "Target of INSERT must be a mutable array"; } Dimensions const& srcDims = srcDesc.getDimensions(); Dimensions const& dstDims = dstDesc.getDimensions(); if (srcDims.size() != dstDims.size()) { //TODO: this will get lifted when we allow redimension+insert in the same op //and when we DO implement redimension+insert - we will need to match attributes/dimensions by name, not position. throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_ILLEGAL_OPERATION) << "Temporary restriction: target of INSERT must have same dimensions as the source"; } for (size_t i = 0, n = srcDims.size(); i < n; i++) { if( srcDims[i].getType() != TID_INT64 || dstDims[i].getType() != TID_INT64) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_ILLEGAL_OPERATION) << "Temporary restriction: INSERT only supports integer dimensions"; } //TODO: we can also allow arrays that are smaller whose length is not evenly divided by chunk interval //but then we have to detect "edge chunks" and rewrite them cleverly if( srcDims[i].getStartMin() != dstDims[i].getStartMin() || srcDims[i].getChunkInterval() != dstDims[i].getChunkInterval() || srcDims[i].getChunkOverlap() != dstDims[i].getChunkOverlap() || srcDims[i].getEndMax() > dstDims[i].getEndMax() || ( srcDims[i].getEndMax() < dstDims[i].getEndMax() && srcDims[i].getLength() % srcDims[i].getChunkInterval() != 0)) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_DIMENSIONS_DONT_MATCH) << srcDims[i].getBaseName() << dstDims[i].getBaseName(); } } Attributes const& srcAttrs = srcDesc.getAttributes(true); Attributes const& dstAttrs = dstDesc.getAttributes(true); if (srcAttrs.size() != dstAttrs.size()) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_ILLEGAL_OPERATION) << "Temporary restriction: target of INSERT must have same attributes as the source"; } for (size_t i = 0, n = srcAttrs.size(); i < n; i++) { if(srcAttrs[i].getType() != dstAttrs[i].getType()) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_ATTRIBUTE_TYPE) << srcAttrs[i].getName() << srcAttrs[i].getType() << dstAttrs[i].getType(); } //can't store nulls into a non-nullable attribute if(!dstAttrs[i].isNullable() && srcAttrs[i].isNullable()) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_ATTRIBUTE_FLAGS) << srcAttrs[i].getName(); } } //Note: let us NOT add arrayID numbers to the schema - because we do not have our ArrayID yet. //We will get our ArrayID when we execute and create the array. Until then - don't bother. //Old store code adds the arrayID to the schema - but that's the arrayID of the previous version, //not the new version created by the op. A dangerous fallacy - stupid and unnecessary. return ArrayDesc(arrayName, dstDesc.getAttributes(), dstDesc.getDimensions(), dstDesc.getFlags()); }