Beispiel #1
0
InputArray::InputArray(ArrayDesc const& array,
                       string const& format,
                       boost::shared_ptr<Query>& query,
                       bool emptyMode,
                       bool enforceDataIntegrity,
                       int64_t maxCnvErrors,
                       string const& shadowArrayName,
                       bool parallel)
:     SinglePassArray(array),
      _chunkLoader(ChunkLoader::create(format)),
      _currChunkIndex(0),
      strVal(TypeLibrary::getType(TID_STRING)),
      emptyTagAttrID(array.getEmptyBitmapAttribute() != NULL
                     ? array.getEmptyBitmapAttribute()->getId()
                     : INVALID_ATTRIBUTE_ID),
      nLoadedCells(0),
      nLoadedChunks(0),
      nErrors(0),
      maxErrors(maxCnvErrors),
      state(emptyMode ? S_Empty : S_Normal),
      nAttrs(array.getAttributes(true).size()),
      parallelLoad(parallel),
      _enforceDataIntegrity(enforceDataIntegrity)
    {
        SCIDB_ASSERT(query);
        _query=query;
        myInstanceID = query->getInstanceID();

        SCIDB_ASSERT(_chunkLoader);   // else inferSchema() messed up
        _chunkLoader->bind(this, query);

        if (!shadowArrayName.empty()) {
            shadowArray.reset(new MemArray(generateShadowArraySchema(array, shadowArrayName), query));
        }
    }
	//param desc --> the input array schema
	inline ArrayDesc createWindowDesc(ArrayDesc const& desc)
	{
		//get dimensions for output array
		Dimensions const& dims = desc.getDimensions();
		Dimensions aggrDims(dims.size());
		for (size_t i = 0; i < dims.size(); i++)
		{
			DimensionDesc const& srcDim = dims[i];
			aggrDims[i] = DimensionDesc(srcDim.getBaseName(),
									    srcDim.getNamesAndAliases(),
								   	    srcDim.getStartMin(),
									    srcDim.getCurrStart(),
									    srcDim.getCurrEnd(),
									    srcDim.getEndMax(),
									    srcDim.getChunkInterval(),
									    0);
		}

		ArrayDesc output(desc.getName(), Attributes(), aggrDims);
		
		//get the aggregates, check if they make sense, make attributes for output array	
		//_parameters[0~dims.size()*2-1] --> window boundaries, already get in inferSchema
		for (size_t i = dims.size()*2; i < _parameters.size(); i++)
		{
			boost::shared_ptr<scidb::OperatorParam> param = _parameters[i];
			
			if ( param->getParamType() != PARAM_AGGREGATE_CALL) {
				throw USER_QUERY_EXCEPTION(SCIDB_SE_INFER_SCHEMA,
										   SCIDB_LE_OP_WINDOW_ERROR5,
										   _parameters[i]->getParsingContext());
			}
			addAggregatedAttribute( (shared_ptr<OperatorParamAggregateCall> &) param, desc, output, true);
		}

		if ( desc.getEmptyBitmapAttribute())			//?
		{
			AttributeDesc const* eAttr = desc.getEmptyBitmapAttribute();
			output.addAttribute(AttributeDesc(output.getAttributes().size(), 
						eAttr->getName(),
						eAttr->getType(),
						eAttr->getFlags(),
						eAttr->getDefaultCompressionMethod()));
		}

		return output;
	}
    inline ArrayDesc createWindowDesc(ArrayDesc const& desc)
    {
        Dimensions const& dims = desc.getDimensions();
        Dimensions aggDims(dims.size());
        for (size_t i = 0, n = dims.size(); i < n; i++)
        {
            DimensionDesc const& srcDim = dims[i];
            aggDims[i] = DimensionDesc(srcDim.getBaseName(),
                                       srcDim.getNamesAndAliases(),
                                       srcDim.getStartMin(),
                                       srcDim.getCurrStart(),
                                       srcDim.getCurrEnd(),
                                       srcDim.getEndMax(),
                                       srcDim.getChunkInterval(), 
                                       0,
                                       srcDim.getType(),
                                       srcDim.getFlags(),
                                       srcDim.getMappingArrayName(),
                                       srcDim.getComment(),
                                       srcDim.getFuncMapOffset(),
                                       srcDim.getFuncMapScale());
        }

        ArrayDesc output (desc.getName(), Attributes(), aggDims);

        for (size_t i = dims.size() * 2, size = _parameters.size(); i < size; i++)
        {
            addAggregatedAttribute( (shared_ptr <OperatorParamAggregateCall> &) _parameters[i], desc, output);
        }

        if ( desc.getEmptyBitmapAttribute())
        {
            AttributeDesc const* eAtt = desc.getEmptyBitmapAttribute();
            output.addAttribute(AttributeDesc(output.getAttributes().size(), eAtt->getName(),
                eAtt->getType(), eAtt->getFlags(), eAtt->getDefaultCompressionMethod()));
        }

        return output;
    }
    ArrayDesc inferSchema(std::vector< ArrayDesc> schemas, boost::shared_ptr< Query> query)
	{
		assert(schemas.size() == 1);

        ArrayDesc const& srcDesc = schemas[0];
        ArrayDesc dstDesc = ((boost::shared_ptr<OperatorParamSchema>&)_parameters[0])->getSchema();

        //Compile a desc of all possible attributes (aggregate calls first) and source dimensions
        ArrayDesc aggregationDesc (srcDesc.getName(), Attributes(), srcDesc.getDimensions());
        vector<string> aggregatedNames;

        //add aggregate calls first
        for (size_t i = 1; i < _parameters.size(); i++)
        {
            addAggregatedAttribute( (shared_ptr <OperatorParamAggregateCall>&) _parameters[i], srcDesc, aggregationDesc);
            string aggName =  aggregationDesc.getAttributes()[aggregationDesc.getAttributes().size()-1].getName();
            bool aggFound = false;
            BOOST_FOREACH(const AttributeDesc &dstAttr, dstDesc.getAttributes()) { 
                if (dstAttr.getName() == aggName) { 
                    aggFound = true;
                    break;
                }
            }
            if (!aggFound) { 
                throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_ATTRIBUTE_DOESNT_EXIST) << aggName << dstDesc.getName();
            }
            aggregatedNames.push_back(aggName);
        }

        //add other attributes
        BOOST_FOREACH(const AttributeDesc &srcAttr, srcDesc.getAttributes())
        {
            //if there's an attribute with same name as an aggregate call - skip the attribute
            bool found = false;
            BOOST_FOREACH(const AttributeDesc &aggAttr, aggregationDesc.getAttributes())
            {
                if( aggAttr.getName() == srcAttr.getName())
                {
                    found = true;
                }
            }

            if (!found)
            {
                aggregationDesc.addAttribute(AttributeDesc( aggregationDesc.getAttributes().size(),
                                                            srcAttr.getName(),
                                                            srcAttr.getType(),
                                                            srcAttr.getFlags(),
                                                            srcAttr.getDefaultCompressionMethod(),
                                                            srcAttr.getAliases(),
                                                            &srcAttr.getDefaultValue(),
                                                            srcAttr.getDefaultValueExpr(),
                                                            srcAttr.getVarSize()));
            }
        }

        //Ensure attributes names uniqueness.        
        if (!dstDesc.getEmptyBitmapAttribute())
            throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_REDIMENSION_ERROR1);
 
        BOOST_FOREACH(const AttributeDesc &dstAttr, dstDesc.getAttributes())
        {
            BOOST_FOREACH(const AttributeDesc &srcAttr, aggregationDesc.getAttributes())
            {
                if (srcAttr.getName() == dstAttr.getName())
                {
                    if (srcAttr.getType() != dstAttr.getType())
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_ATTRIBUTE_TYPE)
                            << srcAttr.getName() << srcAttr.getType() << dstAttr.getType();
                    }
                    if (!dstAttr.isNullable() && srcAttr.isNullable())
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_ATTRIBUTE_FLAGS)
                            << srcAttr.getName();
                    }

                    goto NextAttr;
                }
            }
            BOOST_FOREACH(const DimensionDesc &srcDim, aggregationDesc.getDimensions())
            {
                if (srcDim.hasNameAndAlias(dstAttr.getName()))
                {
                    if (dstAttr.getType() != TID_INT64)
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_DESTINATION_ATTRIBUTE_TYPE)
                            << dstAttr.getName() << TID_INT64;
                    }
                    if (dstAttr.getFlags() != 0)
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_DESTINATION_ATTRIBUTE_FLAGS)
                            << dstAttr.getName();
                    }

                    goto NextAttr;
                }
            }

            if (dstAttr.isEmptyIndicator() == false)
            {
                throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_UNEXPECTED_DESTINATION_ATTRIBUTE)
                    << dstAttr.getName();
            }
          NextAttr:;
        }
        
        Dimensions outputDims;
        size_t nNewDims = 0;
        BOOST_FOREACH(const DimensionDesc &dstDim, dstDesc.getDimensions())
        {
            if (dstDim.getChunkOverlap() > dstDim.getChunkInterval())
            {
                throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OVERLAP_CANT_BE_LARGER_CHUNK);
            }
            BOOST_FOREACH(const AttributeDesc &srcAttr, aggregationDesc.getAttributes())
            {
                if (dstDim.hasNameAndAlias(srcAttr.getName()))
                {
                    for (size_t i = 0; i< aggregatedNames.size(); i++)
                    {
                        if (srcAttr.getName() == aggregatedNames[i])
                            throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_REDIMENSION_ERROR2);
                    }
                    if ( !IS_INTEGRAL(srcAttr.getType())  || srcAttr.getType() == TID_UINT64 )
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_SOURCE_ATTRIBUTE_TYPE)
                            << srcAttr.getName() << TID_INT64;
                    }
                    outputDims.push_back(dstDim);
                    goto NextDim;
                }
            }
            BOOST_FOREACH(const DimensionDesc &srcDim, aggregationDesc.getDimensions())
            {
                if (srcDim.hasNameAndAlias(dstDim.getBaseName()))
                {
                    DimensionDesc outputDim = dstDim;
                    outputDims.push_back(outputDim);
                    goto NextDim;
                }
            }
            //one synthetic dimension allowed
            if (nNewDims++ != 0 || !aggregatedNames.empty() )
            {
                throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_UNEXPECTED_DESTINATION_DIMENSION) << dstDim.getBaseName();
            }
            outputDims.push_back(dstDim);
            NextDim:;
        }

        return ArrayDesc(srcDesc.getName(), dstDesc.getAttributes(), outputDims, dstDesc.getFlags());
	}
    ArrayDesc inferSchema(std::vector< ArrayDesc> schemas, boost::shared_ptr< Query> query)
	{
		assert(schemas.size() == 1);

        ArrayDesc const& srcDesc = schemas[0];
        
        ArrayDesc dstDesc = ((boost::shared_ptr<OperatorParamSchema>&)_parameters[0])->getSchema();

        //Compile a desc of all possible attributes (aggregate calls first) and source dimensions
        ArrayDesc aggregationDesc (srcDesc.getName(), Attributes(), srcDesc.getDimensions());
        vector<string> aggregatedNames;

        //add aggregate calls first
        for (size_t i = 1; i < _parameters.size(); i++)
        {
            addAggregatedAttribute( (shared_ptr <OperatorParamAggregateCall>&) _parameters[i], srcDesc, aggregationDesc);
            aggregatedNames.push_back(aggregationDesc.getAttributes()[aggregationDesc.getAttributes().size()-1].getName());
        }

        //add other attributes
        BOOST_FOREACH(const AttributeDesc &srcAttr, srcDesc.getAttributes())
        {
            //if there's an attribute with same name as an aggregate call - skip the attribute
            bool found = false;
            BOOST_FOREACH(const AttributeDesc &aggAttr, aggregationDesc.getAttributes())
            {
                if( aggAttr.getName() == srcAttr.getName())
                {
                    found = true;
                }
            }

            if (!found)
            {
                aggregationDesc.addAttribute(AttributeDesc( aggregationDesc.getAttributes().size(),
                                                            srcAttr.getName(),
                                                            srcAttr.getType(),
                                                            srcAttr.getFlags(),
                                                            srcAttr.getDefaultCompressionMethod(),
                                                            srcAttr.getAliases(),
                                                            &srcAttr.getDefaultValue(),
                                                            srcAttr.getDefaultValueExpr(),
                                                            srcAttr.getComment(),
                                                            srcAttr.getVarSize()));
            }
        }

        //Ensure attributes names uniqueness.        
        if (!dstDesc.getEmptyBitmapAttribute())
            throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_REDIMENSION_ERROR1);
 
        BOOST_FOREACH(const AttributeDesc &dstAttr, dstDesc.getAttributes())
        {
            BOOST_FOREACH(const AttributeDesc &srcAttr, aggregationDesc.getAttributes())
            {
                if (srcAttr.getName() == dstAttr.getName())
                {
                    if (srcAttr.getType() != dstAttr.getType())
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_ATTRIBUTE_TYPE)
                            << srcAttr.getName() << srcAttr.getType() << dstAttr.getType();
                    }
                    if (!dstAttr.isNullable() && srcAttr.isNullable())
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_ATTRIBUTE_FLAGS)
                            << srcAttr.getName();
                    }

                    goto NextAttr;
                }
            }
            BOOST_FOREACH(const DimensionDesc &srcDim, aggregationDesc.getDimensions())
            {
                if (srcDim.hasNameOrAlias(dstAttr.getName()))
                {
                    if (dstAttr.getType() != TID_INT64)
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_DESTINATION_ATTRIBUTE_TYPE)
                            << dstAttr.getName() << TID_INT64;
                    }
                    if (srcDim.getType() != TID_INT64)
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_SOURCE_DIMENSION_TYPE)
                            << dstAttr.getName() << TID_INT64;
                    }
                    if (dstAttr.getFlags() != 0)
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_DESTINATION_ATTRIBUTE_FLAGS)
                            << dstAttr.getName();
                    }

                    goto NextAttr;
                }
            }

            if (dstAttr.isEmptyIndicator() == false)
            {
                throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_UNEXPECTED_DESTINATION_ATTRIBUTE)
                    << dstAttr.getName();
            }
          NextAttr:;
        }
        
        BOOST_FOREACH(const DimensionDesc &dstDim, dstDesc.getDimensions())
        {
            if (dstDim.getChunkOverlap() != 0)
                throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_REDIMENSION_STORE_ERROR3);

            BOOST_FOREACH(const AttributeDesc &srcAttr, aggregationDesc.getAttributes())
            {
                if (dstDim.hasNameOrAlias(srcAttr.getName()))
                {
                    for (size_t i = 0; i< aggregatedNames.size(); i++)
                    {
                        if (srcAttr.getName() == aggregatedNames[i])
                            throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_REDIMENSION_ERROR2);
                    }
                    if (srcAttr.getType() != TID_INT64)
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_SOURCE_ATTRIBUTE_TYPE)
                            << srcAttr.getName() << TID_INT64;
                    }
                    if (srcAttr.getFlags() != 0)
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_SOURCE_ATTRIBUTE_FLAGS)
                           << srcAttr.getName() << TID_INT64;
                    }
                    if (dstDim.getType() != TID_INT64)
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_DESTINATION_DIMENSION_TYPE)
                            << srcAttr.getName() << TID_INT64;
                    }
                    goto NextDim;
                }
            }
            BOOST_FOREACH(const DimensionDesc &srcDim, aggregationDesc.getDimensions())
            {
                if (srcDim.hasNameOrAlias(dstDim.getBaseName()))
                {
                    if (dstDim.getType() != srcDim.getType()                   ||
                        dstDim.getStart() != srcDim.getStart()                 ||
                        dstDim.getLength() != srcDim.getLength()               ||
                        dstDim.getChunkInterval() != srcDim.getChunkInterval())
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_DIMENSIONS_DONT_MATCH)
                            << srcDim.getBaseName() << dstDim.getBaseName();
                    }
                    goto NextDim;
                }
            }

            throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_UNEXPECTED_DESTINATION_DIMENSION) << dstDim.getBaseName();
          NextDim:;
        }

        return ArrayDesc(srcDesc.getName()+"_redimension", dstDesc.getAttributes(), dstDesc.getDimensions(), dstDesc.getFlags());
	}