ArrayDesc inferSchema(std::vector< ArrayDesc> schemas, boost::shared_ptr< Query> query)
	{
		assert(schemas.size() == 1);

        ArrayDesc const& srcDesc = schemas[0];
        
        ArrayDesc dstDesc = ((boost::shared_ptr<OperatorParamSchema>&)_parameters[0])->getSchema();

        //Compile a desc of all possible attributes (aggregate calls first) and source dimensions
        ArrayDesc aggregationDesc (srcDesc.getName(), Attributes(), srcDesc.getDimensions());
        vector<string> aggregatedNames;

        //add aggregate calls first
        for (size_t i = 1; i < _parameters.size(); i++)
        {
            addAggregatedAttribute( (shared_ptr <OperatorParamAggregateCall>&) _parameters[i], srcDesc, aggregationDesc);
            aggregatedNames.push_back(aggregationDesc.getAttributes()[aggregationDesc.getAttributes().size()-1].getName());
        }

        //add other attributes
        BOOST_FOREACH(const AttributeDesc &srcAttr, srcDesc.getAttributes())
        {
            //if there's an attribute with same name as an aggregate call - skip the attribute
            bool found = false;
            BOOST_FOREACH(const AttributeDesc &aggAttr, aggregationDesc.getAttributes())
            {
                if( aggAttr.getName() == srcAttr.getName())
                {
                    found = true;
                }
            }

            if (!found)
            {
                aggregationDesc.addAttribute(AttributeDesc( aggregationDesc.getAttributes().size(),
                                                            srcAttr.getName(),
                                                            srcAttr.getType(),
                                                            srcAttr.getFlags(),
                                                            srcAttr.getDefaultCompressionMethod(),
                                                            srcAttr.getAliases(),
                                                            &srcAttr.getDefaultValue(),
                                                            srcAttr.getDefaultValueExpr(),
                                                            srcAttr.getComment(),
                                                            srcAttr.getVarSize()));
            }
        }

        //Ensure attributes names uniqueness.        
        if (!dstDesc.getEmptyBitmapAttribute())
            throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_REDIMENSION_ERROR1);
 
        BOOST_FOREACH(const AttributeDesc &dstAttr, dstDesc.getAttributes())
        {
            BOOST_FOREACH(const AttributeDesc &srcAttr, aggregationDesc.getAttributes())
            {
                if (srcAttr.getName() == dstAttr.getName())
                {
                    if (srcAttr.getType() != dstAttr.getType())
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_ATTRIBUTE_TYPE)
                            << srcAttr.getName() << srcAttr.getType() << dstAttr.getType();
                    }
                    if (!dstAttr.isNullable() && srcAttr.isNullable())
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_ATTRIBUTE_FLAGS)
                            << srcAttr.getName();
                    }

                    goto NextAttr;
                }
            }
            BOOST_FOREACH(const DimensionDesc &srcDim, aggregationDesc.getDimensions())
            {
                if (srcDim.hasNameOrAlias(dstAttr.getName()))
                {
                    if (dstAttr.getType() != TID_INT64)
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_DESTINATION_ATTRIBUTE_TYPE)
                            << dstAttr.getName() << TID_INT64;
                    }
                    if (srcDim.getType() != TID_INT64)
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_SOURCE_DIMENSION_TYPE)
                            << dstAttr.getName() << TID_INT64;
                    }
                    if (dstAttr.getFlags() != 0)
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_DESTINATION_ATTRIBUTE_FLAGS)
                            << dstAttr.getName();
                    }

                    goto NextAttr;
                }
            }

            if (dstAttr.isEmptyIndicator() == false)
            {
                throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_UNEXPECTED_DESTINATION_ATTRIBUTE)
                    << dstAttr.getName();
            }
          NextAttr:;
        }
        
        BOOST_FOREACH(const DimensionDesc &dstDim, dstDesc.getDimensions())
        {
            if (dstDim.getChunkOverlap() != 0)
                throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_REDIMENSION_STORE_ERROR3);

            BOOST_FOREACH(const AttributeDesc &srcAttr, aggregationDesc.getAttributes())
            {
                if (dstDim.hasNameOrAlias(srcAttr.getName()))
                {
                    for (size_t i = 0; i< aggregatedNames.size(); i++)
                    {
                        if (srcAttr.getName() == aggregatedNames[i])
                            throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_REDIMENSION_ERROR2);
                    }
                    if (srcAttr.getType() != TID_INT64)
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_SOURCE_ATTRIBUTE_TYPE)
                            << srcAttr.getName() << TID_INT64;
                    }
                    if (srcAttr.getFlags() != 0)
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_SOURCE_ATTRIBUTE_FLAGS)
                           << srcAttr.getName() << TID_INT64;
                    }
                    if (dstDim.getType() != TID_INT64)
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_DESTINATION_DIMENSION_TYPE)
                            << srcAttr.getName() << TID_INT64;
                    }
                    goto NextDim;
                }
            }
            BOOST_FOREACH(const DimensionDesc &srcDim, aggregationDesc.getDimensions())
            {
                if (srcDim.hasNameOrAlias(dstDim.getBaseName()))
                {
                    if (dstDim.getType() != srcDim.getType()                   ||
                        dstDim.getStart() != srcDim.getStart()                 ||
                        dstDim.getLength() != srcDim.getLength()               ||
                        dstDim.getChunkInterval() != srcDim.getChunkInterval())
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_DIMENSIONS_DONT_MATCH)
                            << srcDim.getBaseName() << dstDim.getBaseName();
                    }
                    goto NextDim;
                }
            }

            throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_UNEXPECTED_DESTINATION_DIMENSION) << dstDim.getBaseName();
          NextDim:;
        }

        return ArrayDesc(srcDesc.getName()+"_redimension", dstDesc.getAttributes(), dstDesc.getDimensions(), dstDesc.getFlags());
	}
Exemplo n.º 2
0
    ArrayDesc inferSchema(std::vector< ArrayDesc> schemas, boost::shared_ptr< Query> query)
	{
		assert(schemas.size() == 1);

        ArrayDesc const& srcDesc = schemas[0];
        ArrayDesc dstDesc = ((boost::shared_ptr<OperatorParamSchema>&)_parameters[0])->getSchema();

        //Compile a desc of all possible attributes (aggregate calls first) and source dimensions
        ArrayDesc aggregationDesc (srcDesc.getName(), Attributes(), srcDesc.getDimensions());
        vector<string> aggregatedNames;

        //add aggregate calls first
        for (size_t i = 1; i < _parameters.size(); i++)
        {
            addAggregatedAttribute( (shared_ptr <OperatorParamAggregateCall>&) _parameters[i], srcDesc, aggregationDesc);
            string aggName =  aggregationDesc.getAttributes()[aggregationDesc.getAttributes().size()-1].getName();
            bool aggFound = false;
            BOOST_FOREACH(const AttributeDesc &dstAttr, dstDesc.getAttributes()) { 
                if (dstAttr.getName() == aggName) { 
                    aggFound = true;
                    break;
                }
            }
            if (!aggFound) { 
                throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_ATTRIBUTE_DOESNT_EXIST) << aggName << dstDesc.getName();
            }
            aggregatedNames.push_back(aggName);
        }

        //add other attributes
        BOOST_FOREACH(const AttributeDesc &srcAttr, srcDesc.getAttributes())
        {
            //if there's an attribute with same name as an aggregate call - skip the attribute
            bool found = false;
            BOOST_FOREACH(const AttributeDesc &aggAttr, aggregationDesc.getAttributes())
            {
                if( aggAttr.getName() == srcAttr.getName())
                {
                    found = true;
                }
            }

            if (!found)
            {
                aggregationDesc.addAttribute(AttributeDesc( aggregationDesc.getAttributes().size(),
                                                            srcAttr.getName(),
                                                            srcAttr.getType(),
                                                            srcAttr.getFlags(),
                                                            srcAttr.getDefaultCompressionMethod(),
                                                            srcAttr.getAliases(),
                                                            &srcAttr.getDefaultValue(),
                                                            srcAttr.getDefaultValueExpr(),
                                                            srcAttr.getVarSize()));
            }
        }

        //Ensure attributes names uniqueness.        
        if (!dstDesc.getEmptyBitmapAttribute())
            throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_REDIMENSION_ERROR1);
 
        BOOST_FOREACH(const AttributeDesc &dstAttr, dstDesc.getAttributes())
        {
            BOOST_FOREACH(const AttributeDesc &srcAttr, aggregationDesc.getAttributes())
            {
                if (srcAttr.getName() == dstAttr.getName())
                {
                    if (srcAttr.getType() != dstAttr.getType())
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_ATTRIBUTE_TYPE)
                            << srcAttr.getName() << srcAttr.getType() << dstAttr.getType();
                    }
                    if (!dstAttr.isNullable() && srcAttr.isNullable())
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_ATTRIBUTE_FLAGS)
                            << srcAttr.getName();
                    }

                    goto NextAttr;
                }
            }
            BOOST_FOREACH(const DimensionDesc &srcDim, aggregationDesc.getDimensions())
            {
                if (srcDim.hasNameAndAlias(dstAttr.getName()))
                {
                    if (dstAttr.getType() != TID_INT64)
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_DESTINATION_ATTRIBUTE_TYPE)
                            << dstAttr.getName() << TID_INT64;
                    }
                    if (dstAttr.getFlags() != 0)
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_DESTINATION_ATTRIBUTE_FLAGS)
                            << dstAttr.getName();
                    }

                    goto NextAttr;
                }
            }

            if (dstAttr.isEmptyIndicator() == false)
            {
                throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_UNEXPECTED_DESTINATION_ATTRIBUTE)
                    << dstAttr.getName();
            }
          NextAttr:;
        }
        
        Dimensions outputDims;
        size_t nNewDims = 0;
        BOOST_FOREACH(const DimensionDesc &dstDim, dstDesc.getDimensions())
        {
            if (dstDim.getChunkOverlap() > dstDim.getChunkInterval())
            {
                throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OVERLAP_CANT_BE_LARGER_CHUNK);
            }
            BOOST_FOREACH(const AttributeDesc &srcAttr, aggregationDesc.getAttributes())
            {
                if (dstDim.hasNameAndAlias(srcAttr.getName()))
                {
                    for (size_t i = 0; i< aggregatedNames.size(); i++)
                    {
                        if (srcAttr.getName() == aggregatedNames[i])
                            throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_REDIMENSION_ERROR2);
                    }
                    if ( !IS_INTEGRAL(srcAttr.getType())  || srcAttr.getType() == TID_UINT64 )
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_SOURCE_ATTRIBUTE_TYPE)
                            << srcAttr.getName() << TID_INT64;
                    }
                    outputDims.push_back(dstDim);
                    goto NextDim;
                }
            }
            BOOST_FOREACH(const DimensionDesc &srcDim, aggregationDesc.getDimensions())
            {
                if (srcDim.hasNameAndAlias(dstDim.getBaseName()))
                {
                    DimensionDesc outputDim = dstDim;
                    outputDims.push_back(outputDim);
                    goto NextDim;
                }
            }
            //one synthetic dimension allowed
            if (nNewDims++ != 0 || !aggregatedNames.empty() )
            {
                throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_UNEXPECTED_DESTINATION_DIMENSION) << dstDim.getBaseName();
            }
            outputDims.push_back(dstDim);
            NextDim:;
        }

        return ArrayDesc(srcDesc.getName(), dstDesc.getAttributes(), outputDims, dstDesc.getFlags());
	}
    /**
     * Perform operator-specific checks of input and return the shape of the output. Currently,
     * the output array must exist.
     * @param schemas the shapes of the input arrays
     * @param query the query context
     */
    ArrayDesc inferSchema(std::vector< ArrayDesc> schemas, shared_ptr< Query> query)
    {
        assert(schemas.size() == 1);
        assert(_parameters.size() == 1);

        string arrayName = ((shared_ptr<OperatorParamReference>&)_parameters[0])->getObjectName();
        ArrayDesc const& srcDesc = schemas[0];

        //Ensure attributes names uniqueness.
        ArrayDesc dstDesc;
        if (!SystemCatalog::getInstance()->getArrayDesc(arrayName, dstDesc, false))
        {
            throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_ARRAY_DOESNT_EXIST) << arrayName;
        }

        if(dstDesc.isImmutable())
        {
            throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_ILLEGAL_OPERATION)
                    << "Target of INSERT must be a mutable array";
        }

        Dimensions const& srcDims = srcDesc.getDimensions();
        Dimensions const& dstDims = dstDesc.getDimensions();

        if (srcDims.size() != dstDims.size())
        {
            //TODO: this will get lifted when we allow redimension+insert in the same op
            //and when we DO implement redimension+insert - we will need to match attributes/dimensions by name, not position.
            throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_ILLEGAL_OPERATION)
                    << "Temporary restriction: target of INSERT must have same dimensions as the source";
        }

        for (size_t i = 0, n = srcDims.size(); i < n; i++)
        {
            if( srcDims[i].getType() != TID_INT64 || dstDims[i].getType() != TID_INT64)
            {
                throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_ILLEGAL_OPERATION)
                    << "Temporary restriction: INSERT only supports integer dimensions";
            }

            //TODO: we can also allow arrays that are smaller whose length is not evenly divided by chunk interval
            //but then we have to detect "edge chunks" and rewrite them cleverly
            if( srcDims[i].getStartMin() != dstDims[i].getStartMin() ||
                srcDims[i].getChunkInterval() != dstDims[i].getChunkInterval() ||
                srcDims[i].getChunkOverlap() != dstDims[i].getChunkOverlap() ||
                srcDims[i].getEndMax() > dstDims[i].getEndMax() ||
                ( srcDims[i].getEndMax() < dstDims[i].getEndMax() &&
                  srcDims[i].getLength() % srcDims[i].getChunkInterval() != 0))
            {
                throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_DIMENSIONS_DONT_MATCH)
                        << srcDims[i].getBaseName() << dstDims[i].getBaseName();
            }
        }

        Attributes const& srcAttrs = srcDesc.getAttributes(true);
        Attributes const& dstAttrs = dstDesc.getAttributes(true);

        if (srcAttrs.size() != dstAttrs.size())
        {
            throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_ILLEGAL_OPERATION)
                    << "Temporary restriction: target of INSERT must have same attributes as the source";
        }
        for (size_t i = 0, n = srcAttrs.size(); i < n; i++)
        {
            if(srcAttrs[i].getType() != dstAttrs[i].getType())
            {
                throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_ATTRIBUTE_TYPE)
                    << srcAttrs[i].getName() << srcAttrs[i].getType() << dstAttrs[i].getType();
            }

            //can't store nulls into a non-nullable attribute
            if(!dstAttrs[i].isNullable() && srcAttrs[i].isNullable())
            {
                throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_ATTRIBUTE_FLAGS)
                   << srcAttrs[i].getName();
            }
        }

        //Note: let us NOT add arrayID numbers to the schema - because we do not have our ArrayID yet.
        //We will get our ArrayID when we execute and create the array. Until then - don't bother.
        //Old store code adds the arrayID to the schema - but that's the arrayID of the previous version,
        //not the new version created by the op. A dangerous fallacy - stupid and unnecessary.
        return ArrayDesc(arrayName, dstDesc.getAttributes(), dstDesc.getDimensions(), dstDesc.getFlags());
    }