Esempio n. 1
0
TupleArray::TupleArray(ArrayDesc const& schema, vector< boost::shared_ptr<Tuple> > const& data, Coordinate offset)
: desc(schema),
  start(schema.getDimensions()[0].getStart() + offset),
  end(start + offset + schema.getDimensions()[0].getLength() - 1),
  tuples(data), chunkSize(schema.getDimensions()[0].getChunkInterval())
{
    desc.cutOverlap();
    if (Coordinate(start + tuples.size()) <= end) {
        end = start + tuples.size() - 1;
    }
}
 void fillUsedPlugins(const ArrayDesc& desc, vector<string>& plugins) const
 {
     for (size_t i = 0; i < desc.getAttributes().size(); i++) {
         const string& libName = TypeLibrary::getTypeLibraries().getObjectLibrary(desc.getAttributes()[i].getType());
         if (libName != "scidb")
             plugins.push_back(libName);
     }
     for (size_t i = 0; i < desc.getDimensions().size(); i++) {
         const string& libName = TypeLibrary::getTypeLibraries().getObjectLibrary(desc.getDimensions()[i].getType());
         if (libName != "scidb")
             plugins.push_back(libName);
     }
 }
Esempio n. 3
0
 SplitArray::SplitArray(ArrayDesc const& desc,
                        const boost::shared_array<char>& src,
                        Coordinates const& from,
                        Coordinates const& till,
                        shared_ptr<Query>const& query)
 : DelegateArray(desc, shared_ptr<Array>(), true),
   _startingChunk(from),
   _from(from),
   _till(till),
   _size(from.size()),
   _src(src),
   _empty(false)
 {
     assert(query);
     _query = query;
     desc.getChunkPositionFor(_startingChunk);
     Dimensions const& dims = desc.getDimensions();
     for (size_t i = 0, n = dims.size(); i < n; i++) { 
         _size[i] = _till[i] - _from[i] + 1;
         if (_size[i] == 0) { 
             _empty = true;
         }
         if (_till[i] > dims[i].getEndMax()) { 
             _till[i] = dims[i].getEndMax();
         }
     }
 }
Esempio n. 4
0
TupleArray::TupleArray(ArrayDesc const& schema, vector< boost::shared_ptr<ConstArrayIterator> > const& arrayIterators, size_t shift, size_t step)
: desc(schema),
  start(schema.getDimensions()[0].getStart()),
  end(schema.getDimensions()[0].getEndMax()),
  chunkSize(schema.getDimensions()[0].getChunkInterval())
{
    if (schema.getDimensions().size() != 1)
        throw USER_EXCEPTION(SCIDB_SE_EXECUTION, SCIDB_LE_MULTIDIMENSIONAL_ARRAY_NOT_ALLOWED);
    append(arrayIterators, shift, step);
    if (start == MIN_COORDINATE || end == MAX_COORDINATE) {
        start = 0;
        end = tuples.size()-1;
    } else if (Coordinate(start + tuples.size()) <= end) {
        end = start + tuples.size() - 1;
    }
}
Esempio n. 5
0
 ArrayDesc InputArray::generateShadowArraySchema(ArrayDesc const& targetArray, string const& shadowArrayName)
 {
     Attributes const& srcAttrs = targetArray.getAttributes(true);
     size_t nAttrs = srcAttrs.size();
     Attributes dstAttrs(nAttrs+2);
     for (size_t i = 0; i < nAttrs; i++) {
         dstAttrs[i] = AttributeDesc(i, srcAttrs[i].getName(), TID_STRING,  AttributeDesc::IS_NULLABLE, 0);
     }
     dstAttrs[nAttrs] = AttributeDesc(nAttrs, "row_offset", TID_INT64, 0, 0);
     dstAttrs[nAttrs+1] = AttributeDesc(nAttrs+1, DEFAULT_EMPTY_TAG_ATTRIBUTE_NAME,
                                        TID_INDICATOR, AttributeDesc::IS_EMPTY_INDICATOR, 0);
     return ArrayDesc(shadowArrayName, dstAttrs, targetArray.getDimensions());
 }
	//param desc --> the input array schema
	ArrayDesc createWindowDesc(ArrayDesc const& desc)
	{
		//get dimensions for output array
		Attributes const &attrs = desc.getAttributes();
		/*
		Dimensions aggrDims(dims.size());
		for (size_t i = 0; i < dims.size(); i++)
		{
			DimensionDesc const& srcDim = dims[i];
			aggrDims[i] = DimensionDesc(srcDim.getBaseName(),
									    srcDim.getNamesAndAliases(),
								   	    srcDim.getStartMin(),
									    srcDim.getCurrStart(),
									    srcDim.getCurrEnd(),
									    srcDim.getEndMax(),
									    srcDim.getChunkInterval(),
									    0);
		}
		*/

		Attributes newAttributes;
		size_t n = 0;
		for (size_t i=desc.getDimensions().size()*2; i < _parameters.size()-1; i=i+2)
		{
			const AttributeDesc &attr = attrs[((boost::shared_ptr<OperatorParamReference>&)_parameters[i])->getObjectNo()]; 
			newAttributes.push_back(AttributeDesc(n, attr.getName(), 
												  attr.getType(), 
												  attr.getFlags(),
												  attr.getDefaultCompressionMethod(),
												  attr.getAliases()));
			

		}
	
		return ArrayDesc(desc.getName(), newAttributes, desc.getDimensions());
	}
	//param desc --> the input array schema
	inline ArrayDesc createWindowDesc(ArrayDesc const& desc)
	{
		//get dimensions for output array
		Dimensions const& dims = desc.getDimensions();
		Dimensions aggrDims(dims.size());
		for (size_t i = 0; i < dims.size(); i++)
		{
			DimensionDesc const& srcDim = dims[i];
			aggrDims[i] = DimensionDesc(srcDim.getBaseName(),
									    srcDim.getNamesAndAliases(),
								   	    srcDim.getStartMin(),
									    srcDim.getCurrStart(),
									    srcDim.getCurrEnd(),
									    srcDim.getEndMax(),
									    srcDim.getChunkInterval(),
									    0);
		}

		ArrayDesc output(desc.getName(), Attributes(), aggrDims);
		
		//get the aggregates, check if they make sense, make attributes for output array	
		//_parameters[0~dims.size()*2-1] --> window boundaries, already get in inferSchema
		for (size_t i = dims.size()*2; i < _parameters.size(); i++)
		{
			boost::shared_ptr<scidb::OperatorParam> param = _parameters[i];
			
			if ( param->getParamType() != PARAM_AGGREGATE_CALL) {
				throw USER_QUERY_EXCEPTION(SCIDB_SE_INFER_SCHEMA,
										   SCIDB_LE_OP_WINDOW_ERROR5,
										   _parameters[i]->getParsingContext());
			}
			addAggregatedAttribute( (shared_ptr<OperatorParamAggregateCall> &) param, desc, output, true);
		}

		if ( desc.getEmptyBitmapAttribute())			//?
		{
			AttributeDesc const* eAttr = desc.getEmptyBitmapAttribute();
			output.addAttribute(AttributeDesc(output.getAttributes().size(), 
						eAttr->getName(),
						eAttr->getType(),
						eAttr->getFlags(),
						eAttr->getDefaultCompressionMethod()));
		}

		return output;
	}
Esempio n. 8
0
FITSInputArray::FITSInputArray(ArrayDesc const& array, string const& filePath, uint32_t hdu, std::shared_ptr<Query>& query)
    : parser(filePath),
      hdu(hdu),
      desc(array),
      dims(array.getDimensions()),
      nDims(dims.size()),
      nAttrs(array.getAttributes(true).size()),
      values(nAttrs),
      chunks(nAttrs),
      chunkIterators(nAttrs),
      chunkIndex(0),
      chunkPos(nDims),
      query(query)
{
    initValueHolders();

    // Most initialization steps are only done later, when the first
    // chunk is requested by an iterator. See getChunkByIndex()
}
    inline ArrayDesc createWindowDesc(ArrayDesc const& desc)
    {
        Dimensions const& dims = desc.getDimensions();
        Dimensions aggDims(dims.size());
        for (size_t i = 0, n = dims.size(); i < n; i++)
        {
            DimensionDesc const& srcDim = dims[i];
            aggDims[i] = DimensionDesc(srcDim.getBaseName(),
                                       srcDim.getNamesAndAliases(),
                                       srcDim.getStartMin(),
                                       srcDim.getCurrStart(),
                                       srcDim.getCurrEnd(),
                                       srcDim.getEndMax(),
                                       srcDim.getChunkInterval(), 
                                       0,
                                       srcDim.getType(),
                                       srcDim.getFlags(),
                                       srcDim.getMappingArrayName(),
                                       srcDim.getComment(),
                                       srcDim.getFuncMapOffset(),
                                       srcDim.getFuncMapScale());
        }

        ArrayDesc output (desc.getName(), Attributes(), aggDims);

        for (size_t i = dims.size() * 2, size = _parameters.size(); i < size; i++)
        {
            addAggregatedAttribute( (shared_ptr <OperatorParamAggregateCall> &) _parameters[i], desc, output);
        }

        if ( desc.getEmptyBitmapAttribute())
        {
            AttributeDesc const* eAtt = desc.getEmptyBitmapAttribute();
            output.addAttribute(AttributeDesc(output.getAttributes().size(), eAtt->getName(),
                eAtt->getType(), eAtt->getFlags(), eAtt->getDefaultCompressionMethod()));
        }

        return output;
    }
    ArrayDesc inferSchema(std::vector< ArrayDesc> schemas, boost::shared_ptr< Query> query)
	{
		assert(schemas.size() == 1);

        ArrayDesc const& srcDesc = schemas[0];
        
        ArrayDesc dstDesc = ((boost::shared_ptr<OperatorParamSchema>&)_parameters[0])->getSchema();

        //Compile a desc of all possible attributes (aggregate calls first) and source dimensions
        ArrayDesc aggregationDesc (srcDesc.getName(), Attributes(), srcDesc.getDimensions());
        vector<string> aggregatedNames;

        //add aggregate calls first
        for (size_t i = 1; i < _parameters.size(); i++)
        {
            addAggregatedAttribute( (shared_ptr <OperatorParamAggregateCall>&) _parameters[i], srcDesc, aggregationDesc);
            aggregatedNames.push_back(aggregationDesc.getAttributes()[aggregationDesc.getAttributes().size()-1].getName());
        }

        //add other attributes
        BOOST_FOREACH(const AttributeDesc &srcAttr, srcDesc.getAttributes())
        {
            //if there's an attribute with same name as an aggregate call - skip the attribute
            bool found = false;
            BOOST_FOREACH(const AttributeDesc &aggAttr, aggregationDesc.getAttributes())
            {
                if( aggAttr.getName() == srcAttr.getName())
                {
                    found = true;
                }
            }

            if (!found)
            {
                aggregationDesc.addAttribute(AttributeDesc( aggregationDesc.getAttributes().size(),
                                                            srcAttr.getName(),
                                                            srcAttr.getType(),
                                                            srcAttr.getFlags(),
                                                            srcAttr.getDefaultCompressionMethod(),
                                                            srcAttr.getAliases(),
                                                            &srcAttr.getDefaultValue(),
                                                            srcAttr.getDefaultValueExpr(),
                                                            srcAttr.getComment(),
                                                            srcAttr.getVarSize()));
            }
        }

        //Ensure attributes names uniqueness.        
        if (!dstDesc.getEmptyBitmapAttribute())
            throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_REDIMENSION_ERROR1);
 
        BOOST_FOREACH(const AttributeDesc &dstAttr, dstDesc.getAttributes())
        {
            BOOST_FOREACH(const AttributeDesc &srcAttr, aggregationDesc.getAttributes())
            {
                if (srcAttr.getName() == dstAttr.getName())
                {
                    if (srcAttr.getType() != dstAttr.getType())
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_ATTRIBUTE_TYPE)
                            << srcAttr.getName() << srcAttr.getType() << dstAttr.getType();
                    }
                    if (!dstAttr.isNullable() && srcAttr.isNullable())
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_ATTRIBUTE_FLAGS)
                            << srcAttr.getName();
                    }

                    goto NextAttr;
                }
            }
            BOOST_FOREACH(const DimensionDesc &srcDim, aggregationDesc.getDimensions())
            {
                if (srcDim.hasNameOrAlias(dstAttr.getName()))
                {
                    if (dstAttr.getType() != TID_INT64)
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_DESTINATION_ATTRIBUTE_TYPE)
                            << dstAttr.getName() << TID_INT64;
                    }
                    if (srcDim.getType() != TID_INT64)
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_SOURCE_DIMENSION_TYPE)
                            << dstAttr.getName() << TID_INT64;
                    }
                    if (dstAttr.getFlags() != 0)
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_DESTINATION_ATTRIBUTE_FLAGS)
                            << dstAttr.getName();
                    }

                    goto NextAttr;
                }
            }

            if (dstAttr.isEmptyIndicator() == false)
            {
                throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_UNEXPECTED_DESTINATION_ATTRIBUTE)
                    << dstAttr.getName();
            }
          NextAttr:;
        }
        
        BOOST_FOREACH(const DimensionDesc &dstDim, dstDesc.getDimensions())
        {
            if (dstDim.getChunkOverlap() != 0)
                throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_REDIMENSION_STORE_ERROR3);

            BOOST_FOREACH(const AttributeDesc &srcAttr, aggregationDesc.getAttributes())
            {
                if (dstDim.hasNameOrAlias(srcAttr.getName()))
                {
                    for (size_t i = 0; i< aggregatedNames.size(); i++)
                    {
                        if (srcAttr.getName() == aggregatedNames[i])
                            throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_REDIMENSION_ERROR2);
                    }
                    if (srcAttr.getType() != TID_INT64)
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_SOURCE_ATTRIBUTE_TYPE)
                            << srcAttr.getName() << TID_INT64;
                    }
                    if (srcAttr.getFlags() != 0)
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_SOURCE_ATTRIBUTE_FLAGS)
                           << srcAttr.getName() << TID_INT64;
                    }
                    if (dstDim.getType() != TID_INT64)
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_DESTINATION_DIMENSION_TYPE)
                            << srcAttr.getName() << TID_INT64;
                    }
                    goto NextDim;
                }
            }
            BOOST_FOREACH(const DimensionDesc &srcDim, aggregationDesc.getDimensions())
            {
                if (srcDim.hasNameOrAlias(dstDim.getBaseName()))
                {
                    if (dstDim.getType() != srcDim.getType()                   ||
                        dstDim.getStart() != srcDim.getStart()                 ||
                        dstDim.getLength() != srcDim.getLength()               ||
                        dstDim.getChunkInterval() != srcDim.getChunkInterval())
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_DIMENSIONS_DONT_MATCH)
                            << srcDim.getBaseName() << dstDim.getBaseName();
                    }
                    goto NextDim;
                }
            }

            throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_UNEXPECTED_DESTINATION_DIMENSION) << dstDim.getBaseName();
          NextDim:;
        }

        return ArrayDesc(srcDesc.getName()+"_redimension", dstDesc.getAttributes(), dstDesc.getDimensions(), dstDesc.getFlags());
	}
Esempio n. 11
0
inline size_t nCol(const ArrayDesc& desc, bool transpose=false) { return nCol(desc.getDimensions(), transpose); }
    ArrayDesc inferSchema(std::vector< ArrayDesc> schemas, boost::shared_ptr< Query> query)
	{
		assert(schemas.size() == 1);

        ArrayDesc const& srcDesc = schemas[0];
        ArrayDesc dstDesc = ((boost::shared_ptr<OperatorParamSchema>&)_parameters[0])->getSchema();

        //Compile a desc of all possible attributes (aggregate calls first) and source dimensions
        ArrayDesc aggregationDesc (srcDesc.getName(), Attributes(), srcDesc.getDimensions());
        vector<string> aggregatedNames;

        //add aggregate calls first
        for (size_t i = 1; i < _parameters.size(); i++)
        {
            addAggregatedAttribute( (shared_ptr <OperatorParamAggregateCall>&) _parameters[i], srcDesc, aggregationDesc);
            string aggName =  aggregationDesc.getAttributes()[aggregationDesc.getAttributes().size()-1].getName();
            bool aggFound = false;
            BOOST_FOREACH(const AttributeDesc &dstAttr, dstDesc.getAttributes()) { 
                if (dstAttr.getName() == aggName) { 
                    aggFound = true;
                    break;
                }
            }
            if (!aggFound) { 
                throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_ATTRIBUTE_DOESNT_EXIST) << aggName << dstDesc.getName();
            }
            aggregatedNames.push_back(aggName);
        }

        //add other attributes
        BOOST_FOREACH(const AttributeDesc &srcAttr, srcDesc.getAttributes())
        {
            //if there's an attribute with same name as an aggregate call - skip the attribute
            bool found = false;
            BOOST_FOREACH(const AttributeDesc &aggAttr, aggregationDesc.getAttributes())
            {
                if( aggAttr.getName() == srcAttr.getName())
                {
                    found = true;
                }
            }

            if (!found)
            {
                aggregationDesc.addAttribute(AttributeDesc( aggregationDesc.getAttributes().size(),
                                                            srcAttr.getName(),
                                                            srcAttr.getType(),
                                                            srcAttr.getFlags(),
                                                            srcAttr.getDefaultCompressionMethod(),
                                                            srcAttr.getAliases(),
                                                            &srcAttr.getDefaultValue(),
                                                            srcAttr.getDefaultValueExpr(),
                                                            srcAttr.getVarSize()));
            }
        }

        //Ensure attributes names uniqueness.        
        if (!dstDesc.getEmptyBitmapAttribute())
            throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_REDIMENSION_ERROR1);
 
        BOOST_FOREACH(const AttributeDesc &dstAttr, dstDesc.getAttributes())
        {
            BOOST_FOREACH(const AttributeDesc &srcAttr, aggregationDesc.getAttributes())
            {
                if (srcAttr.getName() == dstAttr.getName())
                {
                    if (srcAttr.getType() != dstAttr.getType())
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_ATTRIBUTE_TYPE)
                            << srcAttr.getName() << srcAttr.getType() << dstAttr.getType();
                    }
                    if (!dstAttr.isNullable() && srcAttr.isNullable())
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_ATTRIBUTE_FLAGS)
                            << srcAttr.getName();
                    }

                    goto NextAttr;
                }
            }
            BOOST_FOREACH(const DimensionDesc &srcDim, aggregationDesc.getDimensions())
            {
                if (srcDim.hasNameAndAlias(dstAttr.getName()))
                {
                    if (dstAttr.getType() != TID_INT64)
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_DESTINATION_ATTRIBUTE_TYPE)
                            << dstAttr.getName() << TID_INT64;
                    }
                    if (dstAttr.getFlags() != 0)
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_DESTINATION_ATTRIBUTE_FLAGS)
                            << dstAttr.getName();
                    }

                    goto NextAttr;
                }
            }

            if (dstAttr.isEmptyIndicator() == false)
            {
                throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_UNEXPECTED_DESTINATION_ATTRIBUTE)
                    << dstAttr.getName();
            }
          NextAttr:;
        }
        
        Dimensions outputDims;
        size_t nNewDims = 0;
        BOOST_FOREACH(const DimensionDesc &dstDim, dstDesc.getDimensions())
        {
            if (dstDim.getChunkOverlap() > dstDim.getChunkInterval())
            {
                throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OVERLAP_CANT_BE_LARGER_CHUNK);
            }
            BOOST_FOREACH(const AttributeDesc &srcAttr, aggregationDesc.getAttributes())
            {
                if (dstDim.hasNameAndAlias(srcAttr.getName()))
                {
                    for (size_t i = 0; i< aggregatedNames.size(); i++)
                    {
                        if (srcAttr.getName() == aggregatedNames[i])
                            throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_REDIMENSION_ERROR2);
                    }
                    if ( !IS_INTEGRAL(srcAttr.getType())  || srcAttr.getType() == TID_UINT64 )
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_SOURCE_ATTRIBUTE_TYPE)
                            << srcAttr.getName() << TID_INT64;
                    }
                    outputDims.push_back(dstDim);
                    goto NextDim;
                }
            }
            BOOST_FOREACH(const DimensionDesc &srcDim, aggregationDesc.getDimensions())
            {
                if (srcDim.hasNameAndAlias(dstDim.getBaseName()))
                {
                    DimensionDesc outputDim = dstDim;
                    outputDims.push_back(outputDim);
                    goto NextDim;
                }
            }
            //one synthetic dimension allowed
            if (nNewDims++ != 0 || !aggregatedNames.empty() )
            {
                throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_UNEXPECTED_DESTINATION_DIMENSION) << dstDim.getBaseName();
            }
            outputDims.push_back(dstDim);
            NextDim:;
        }

        return ArrayDesc(srcDesc.getName(), dstDesc.getAttributes(), outputDims, dstDesc.getFlags());
	}
    shared_ptr< Array > execute(vector< shared_ptr< Array> >& inputArrays, shared_ptr<Query> query)
    {
    	
    	// I maintain the log of the operator in a local file named after Correlation_N.log, N is the instance ID.
    	stringstream logFileName;
    	logFileName << "/home/scidb/preselect_" << query->getInstanceID() << ".log";
    	FILE *logFile;
    	logFile = fopen(logFileName.str().c_str(), "w");
    	
        shared_ptr<Array> originalArray = inputArrays[0];
        shared_ptr<Array> correlationArray = inputArrays[1];
        
        ArrayDesc originalSchema = originalArray->getArrayDesc();
        ArrayDesc corrSchema = correlationArray->getArrayDesc();
        
        Dimensions originalDims = originalSchema.getDimensions();
        Dimensions corrDims = corrSchema.getDimensions();
        DimensionDesc originalDimsP = originalDims[1];
        DimensionDesc corrDimsP = corrDims[0];
        // Note the correlation array doesn't have Y column.
        Coordinate p = corrDimsP.getCurrLength();
        fprintf(logFile, "p = %ld\n # of chunk = %ld\n", p, corrSchema.getNumberOfChunks());
        fflush(logFile);
        shared_ptr<ConstArrayIterator> corrArrayIter = correlationArray->getIterator(0);
        if(! corrArrayIter->end() )
        {
        	correlation *corr = new correlation[p];
	        // The correlation array will always have only 1 chunk (we designed correlation array like this), so no loops here.
	        shared_ptr<ConstChunkIterator> corrChunkIter = corrArrayIter->getChunk().getConstIterator();
			for(Coordinate i=0; i<p; ++i)
			{
				corr[i].id = i+1;
				corr[i].corr = corrChunkIter->getItem().getDouble();
				//fprintf(logFile, "%d, %f\n", corr[i].id, corr[i].corr);
				++(*corrChunkIter);
			}
			//fflush(logFile);
			qsort(corr, p, sizeof(correlation), &comp);
			for(Coordinate i=0; i<p; ++i)
			{
				fprintf(logFile, "%d, %f\n", corr[i].id, corr[i].corr);
			}
			fflush(logFile);
			
			Coordinate d = ((boost::shared_ptr<OperatorParamPhysicalExpression>&)_parameters[0])->getExpression()->evaluate().getInt64();
	        fprintf(logFile, "d=%ld\n", d);
	        stringstream ss;
	        vector<string> names;
	        names.push_back("j");
	        vector<TypeId> types;
	        types.push_back(TID_INT64);
	        for(Coordinate i=0; i<d; ++i)
	        {
	        	ss << "j=" << corr[i].id << " or ";
	        }
	        ss << "j=" << p+1;
	        fprintf(logFile, "%s\n", ss.str().c_str());
	        fflush(logFile);
	        Expression e;
        	e.compile(ss.str(), names, types);
        	fclose(logFile);
        	boost::shared_ptr<scidb::Query> emptyQuery;
	        return boost::shared_ptr<Array>(new FilterArray(_schema, inputArrays[0], boost::make_shared<Expression>(e), emptyQuery, _tileMode));
        }
        else
        {
        	shared_ptr<Array> outputArray(new MemArray(_schema, query));
        	fclose(logFile);
        	return outputArray;
        }
        
    }
Esempio n. 14
0
inline unsigned int chunkCol(const ArrayDesc& desc, bool transpose=false) { return chunkCol(desc.getDimensions(), transpose); }
 ReshapeArray::ReshapeArray(ArrayDesc const& desc, boost::shared_ptr<Array> const& array)
 : DelegateArray(desc, array),
   inDims(array->getArrayDesc().getDimensions()),
   outDims(desc.getDimensions())
 {
 } 
  shared_ptr< Array > execute(vector< shared_ptr< Array> >& inputArrays, shared_ptr<Query> query) {
    shared_ptr<Array> outputArray(new MemArray(_schema, query));
    shared_ptr<Array> inputArray = inputArrays[0];
    ArrayDesc inputSchema = inputArray->getArrayDesc();

    // Get descriptor of two dimensions d and n.
    DimensionDesc dimsN = inputSchema.getDimensions()[0]; 
    DimensionDesc dimsD = inputSchema.getDimensions()[1];
    size_t n = dimsN.getCurrEnd() - dimsN.getCurrStart() + 1;
    // Note: the input data set should have d+1 dimensions (including Y)
    size_t d = dimsD.getCurrEnd() - dimsD.getCurrStart();
    size_t nStart = dimsN.getCurrStart();
    size_t dStart = dimsD.getCurrStart(); 

    // Get chunk size of n.
    size_t nChunkSize = dimsN.getChunkInterval();

    // Helps to accumulate the n and L.
    z_i[0] = 1.0;

    shared_ptr<ConstArrayIterator> inputArrayIter = inputArray->getConstIterator(0);
    Coordinates chunkPosition;

    size_t i, j, k, m;
    while(! inputArrayIter->end() ) {
      shared_ptr<ConstChunkIterator> chunkIter = inputArrayIter->getChunk().getConstIterator();
      chunkPosition = inputArrayIter->getPosition();
      for(i=chunkPosition[0]; i<chunkPosition[0] + nChunkSize; i++) {
        // In case the chunk is partially filled.
        if(i == n + nStart) {
          break;
        }
        for(j=chunkPosition[1], m=1; j<=chunkPosition[1]+d; j++, m++) {
          // In case the chunk is partially filled.
          if(j == d + 1 + dStart) {
            break;
          }
          z_i[m] = chunkIter->getItem().getDouble();
          ++(*chunkIter);
        }
        for(k=0; k<=d+1; ++k) {
        // This operator is not optimized for entries with value zero.
        // TODO: should use fabs(z_i[k]) < 10e-6
//          if(z_i[k] == 0.0) {
//            continue;
//          }
          for(m=0; m<=k; ++m) {
            Gamma[k][m] += z_i[k]*z_i[m];
          }
        }
      }
      ++(*inputArrayIter);
    }

    /**
     * The "logical" instance ID of the instance responsible for coordination of query.
     * COORDINATOR_INSTANCE if instance execute this query itself.
     */
    if(query->getInstancesCount() > 1) {
      if(query->getInstanceID() != 0) {
        // I am not the coordinator, I should send my Gamma matrix out.
        shared_ptr <SharedBuffer> buf ( new MemoryBuffer(NULL, sizeof(double) * (d+3) * (d+2) / 2) );
        double *Gammabuf = static_cast<double*> (buf->getData());
        for(size_t i=0; i<d+2; ++i) {
          for(size_t j=0; j<=i; ++j) {
            *Gammabuf = Gamma[i][j];
            ++Gammabuf;
          }
        }
        BufSend(0, buf, query);
        return outputArray;
      }
      else {
        // I am the coordinator, I should collect Gamma matrix from workers.
        for(InstanceID l = 1; l<query->getInstancesCount(); ++l) {
          shared_ptr<SharedBuffer> buf = BufReceive(l, query);
          double *Gammabuf = static_cast<double*> (buf->getData());
          for(size_t i=0; i<d+2; ++i) {
            for(size_t j=0; j<=i; ++j) {
              Gamma[i][j] += *Gammabuf;
              ++Gammabuf;
            }
          }
        }
      } // end if getInstanceID() != 0
    } //end if InstancesCount() > 1

    return writeGamma(d, query);
  }
  shared_ptr< Array > execute(vector< shared_ptr< Array> >& inputArrays, shared_ptr<Query> query) {
    shared_ptr<Array> outputArray(new MemArray(_schema, query));
    shared_ptr<Array> inputArray = inputArrays[0];
    ArrayDesc inputSchema = inputArray->getArrayDesc();

    // Get descriptor of two dimensions d and n.
    DimensionDesc dimsN = inputSchema.getDimensions()[0]; 
    DimensionDesc dimsD = inputSchema.getDimensions()[1];
    size_t n = dimsN.getCurrLength();
    // Note: the input data set should have d+1 dimensions (including Y)
    size_t d = dimsD.getCurrLength() - 1;
    nlq.N = n;
    nlq.d = d;
    shared_ptr<ConstArrayIterator> inputArrayIter = inputArray->getConstIterator(0);
    Coordinates cellPosition;

    size_t i;
    double value;
    while(! inputArrayIter->end() ) {
      shared_ptr<ConstChunkIterator> chunkIter = inputArrayIter->getChunk().getConstIterator();
      // For each cell in the current chunk.
      // This will skip the empty cells.
      while(! chunkIter->end() ) {
        cellPosition = chunkIter->getPosition();
        value = chunkIter->getItem().getDouble();
        nlq.L[ cellPosition[1] ] += value;
        nlq.Q[ cellPosition[1] ] += value * value;
        ++(*chunkIter);
      }
      ++(*inputArrayIter);
    }

    /**
     * The "logical" instance ID of the instance responsible for coordination of query.
     * COORDINATOR_INSTANCE if instance execute this query itself.
     */
    if(query->getInstancesCount() > 1) {
      if(query->getInstanceID() != 0) {
        // I am not the coordinator, I should send my Gamma matrix out.
        shared_ptr <SharedBuffer> buf ( new MemoryBuffer(NULL, sizeof(double) * (d*2+2) ));
        double *Gammabuf = static_cast<double*> (buf->getData());
        for(i=1; i<=d+1; ++i) {
          *Gammabuf = nlq.L[i];
          ++Gammabuf;
        }
        for(i=1; i<=d+1; ++i) {
          *Gammabuf = nlq.Q[i];
          ++Gammabuf;
        }
        BufSend(0, buf, query);
        return outputArray;
      }
      else {
        // I am the coordinator, I should collect Gamma matrix from workers.
        for(InstanceID l = 1; l<query->getInstancesCount(); ++l) {
          shared_ptr<SharedBuffer> buf = BufReceive(l, query);
          double *Gammabuf = static_cast<double*> (buf->getData());
          for(i=1; i<=d+1; ++i) {
            nlq.L[i] += *Gammabuf;
            ++Gammabuf;
          }
          for(i=1; i<=d+1; ++i) {
            nlq.Q[i] += *Gammabuf;
            ++Gammabuf;
          }
        }
      }// end if getInstanceID() != 0
    }//end if InstancesCount() > 1

    return writeGamma(query);
  }
  shared_ptr< Array > execute(vector< shared_ptr< Array> >& inputArrays, shared_ptr<Query> query)
  {
    shared_ptr<Array> outputArray(new MemArray(_schema, query));
    shared_ptr<Array> inputArray = inputArrays[0];
    ArrayDesc inputSchema = inputArray->getArrayDesc();
    // Get descriptor of two dimensions d and n.
    DimensionDesc dimsN = inputSchema.getDimensions()[0]; 
    DimensionDesc dimsD = inputSchema.getDimensions()[1];
    int64_t n = dimsN.getCurrEnd() - dimsN.getCurrStart() + 1;
    // Note: the input data set should have d+1 dimensions (including Y)
    d = dimsD.getCurrEnd() - dimsD.getCurrStart();
    idY = d+1;
    int64_t nStart = dimsN.getCurrStart();
    int64_t dStart = dimsD.getCurrStart();
    // Get chunk size of n.
    int64_t nChunkSize = dimsN.getChunkInterval();
    k = ((shared_ptr<OperatorParamPhysicalExpression>&)_parameters[0])->getExpression()->evaluate().getInt64();
    if (_parameters.size() == 2) {
      idY = ((shared_ptr<OperatorParamPhysicalExpression>&)_parameters[1])->getExpression()->evaluate().getInt64();
    }

    #ifdef DEBUG
      stringstream ss;
      ss << getenv("HOME") << "/groupdiagdensegamma-instance-" << query->getInstanceID() << ".log";
      log.open(ss.str().c_str(), ios::out);
      log << "n = " << n << endl << "d = " << d << endl << "k = " << k << endl;
      log << "nStart = " << nStart << endl << "dStart = " << dStart << endl;
      log << "nChunkSize = " << nChunkSize << endl;
      log << "idY = " << idY << endl;
    #endif

    shared_ptr<ConstArrayIterator> inputArrayIter = inputArray->getConstIterator(0);
    Coordinates chunkPosition;
    int64_t i, j, k, m, l;
    double value;
    NLQ tmp;
    map<double, struct NLQ>::iterator it;

    while(! inputArrayIter->end() ) {
      shared_ptr<ConstChunkIterator> chunkIter = inputArrayIter->getChunk().getConstIterator();
      chunkPosition = inputArrayIter->getPosition();
      #ifdef DEBUG
        log << "Getting into chunk (" << chunkPosition[0] << ", " << chunkPosition[1] << ")." << endl;
      #endif
      for(i=chunkPosition[0]; i<chunkPosition[0] + nChunkSize; i++) {
        if(i == n + nStart) {
          #ifdef DEBUG
            log << "Reaching row " << i << ", exiting." << endl;
          #endif
          break;
        }
        for(j=chunkPosition[1], m=1; j<=chunkPosition[1]+d; j++, m++) {
          if(j == d + 1 + dStart) {
            #ifdef DEBUG
              log << "Reaching column " << j << ", exiting." << endl;
            #endif
            break;
          }
          value = chunkIter->getItem().getDouble();
          tmp.L[m] = value;
          tmp.Q[m] = value * value;
          ++(*chunkIter);
        }
        double Y = tmp.L[idY];
        it = nlq.find(Y);
        if (it == nlq.end()) {
          #ifdef DEBUG
            log << "Cannot find NLQ entry for class " << Y << ", creating new." << endl;
          #endif
          nlq[Y].N = 1;
          nlq[Y].groupId = Y;
        }
        else {
          nlq[Y].N++;
        }
        for (k=1, l=1; k<=d+1; k++) {
          if (k == idY) {
            continue;
          }
          nlq[Y].L[l] += tmp.L[k];
          nlq[Y].Q[l] += tmp.Q[k];
          l++;
        }
        nlq[Y].L[d+1] += tmp.L[idY];
        nlq[Y].Q[d+1] += tmp.Q[idY];
      }
      ++(*inputArrayIter);
    }

    /**
     * The "logical" instance ID of the instance responsible for coordination of query.
     * COORDINATOR_INSTANCE if instance execute this query itself.
     */
    size_t localClassCount = nlq.size();
    #ifdef DEBUG
      log << "localClassCount = " << localClassCount << endl;
    #endif
    if(query->getInstancesCount() > 1) {
      if(query->getInstanceID() != 0) {
        // I am not the coordinator, I should send my NLQ out.
        #ifdef DEBUG
          log << "I am not the coordinator, I should send my NLQ out." << endl;
        #endif
        shared_ptr <SharedBuffer> buf ( new MemoryBuffer(NULL, sizeof(struct NLQ) * localClassCount ));
        struct NLQ *NLQbuf = static_cast<struct NLQ*> (buf->getData());
        for(it = nlq.begin(); it != nlq.end(); it++) {
          *NLQbuf = it->second;
          ++NLQbuf;
        }
        BufSend(0, buf, query);
        #ifdef DEBUG
          log << "Exiting." << endl;
        #endif
        return outputArray;
      }
      else {
        // I am the coordinator, I should collect NLQ from workers.
        #ifdef DEBUG
          log << "I am the coordinator, I should collect NLQ from workers." << endl;
        #endif
        for(InstanceID l = 1; l<query->getInstancesCount(); ++l) {
          shared_ptr<SharedBuffer> buf = BufReceive(l, query);
          if(! buf) {
            #ifdef DEBUG
              log << "Nothing from instance " << l << ", continue." << endl;
            #endif
            continue;
          }
          int64_t remoteClassCount = buf->getSize() / sizeof(struct NLQ);
          struct NLQ* NLQbuf = static_cast<struct NLQ*> (buf->getData());
          #ifdef DEBUG
            log << "Received " << remoteClassCount << " entries from instance " << l << endl;
          #endif
          for(i=0; i<remoteClassCount; ++i) {
            it = nlq.find(NLQbuf->groupId);
            if( it == nlq.end() ) {
              #ifdef DEBUG
                log << "Cannot find NLQ entry for class " << NLQbuf->groupId << ", creating new." << endl;
              #endif
              nlq[NLQbuf->groupId] = *NLQbuf;
            }
            else {
              it->second.N += NLQbuf->N;
              for(j=1; j<=d+1; ++j) {
                it->second.L[j] += NLQbuf->L[j];
                it->second.Q[j] += NLQbuf->Q[j];
              }
            }
            ++NLQbuf;
          }
          #ifdef DEBUG
            log << "Merge complete." << endl;
          #endif
        }
      }// end if getInstanceID() != 0
    }//end if InstancesCount() > 1

    return writeGamma(query);
  }
    /**
     * Perform operator-specific checks of input and return the shape of the output. Currently,
     * the output array must exist.
     * @param schemas the shapes of the input arrays
     * @param query the query context
     */
    ArrayDesc inferSchema(std::vector< ArrayDesc> schemas, shared_ptr< Query> query)
    {
        assert(schemas.size() == 1);
        assert(_parameters.size() == 1);

        string arrayName = ((shared_ptr<OperatorParamReference>&)_parameters[0])->getObjectName();
        ArrayDesc const& srcDesc = schemas[0];

        //Ensure attributes names uniqueness.
        ArrayDesc dstDesc;
        if (!SystemCatalog::getInstance()->getArrayDesc(arrayName, dstDesc, false))
        {
            throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_ARRAY_DOESNT_EXIST) << arrayName;
        }

        if(dstDesc.isImmutable())
        {
            throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_ILLEGAL_OPERATION)
                    << "Target of INSERT must be a mutable array";
        }

        Dimensions const& srcDims = srcDesc.getDimensions();
        Dimensions const& dstDims = dstDesc.getDimensions();

        if (srcDims.size() != dstDims.size())
        {
            //TODO: this will get lifted when we allow redimension+insert in the same op
            //and when we DO implement redimension+insert - we will need to match attributes/dimensions by name, not position.
            throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_ILLEGAL_OPERATION)
                    << "Temporary restriction: target of INSERT must have same dimensions as the source";
        }

        for (size_t i = 0, n = srcDims.size(); i < n; i++)
        {
            if( srcDims[i].getType() != TID_INT64 || dstDims[i].getType() != TID_INT64)
            {
                throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_ILLEGAL_OPERATION)
                    << "Temporary restriction: INSERT only supports integer dimensions";
            }

            //TODO: we can also allow arrays that are smaller whose length is not evenly divided by chunk interval
            //but then we have to detect "edge chunks" and rewrite them cleverly
            if( srcDims[i].getStartMin() != dstDims[i].getStartMin() ||
                srcDims[i].getChunkInterval() != dstDims[i].getChunkInterval() ||
                srcDims[i].getChunkOverlap() != dstDims[i].getChunkOverlap() ||
                srcDims[i].getEndMax() > dstDims[i].getEndMax() ||
                ( srcDims[i].getEndMax() < dstDims[i].getEndMax() &&
                  srcDims[i].getLength() % srcDims[i].getChunkInterval() != 0))
            {
                throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_DIMENSIONS_DONT_MATCH)
                        << srcDims[i].getBaseName() << dstDims[i].getBaseName();
            }
        }

        Attributes const& srcAttrs = srcDesc.getAttributes(true);
        Attributes const& dstAttrs = dstDesc.getAttributes(true);

        if (srcAttrs.size() != dstAttrs.size())
        {
            throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_ILLEGAL_OPERATION)
                    << "Temporary restriction: target of INSERT must have same attributes as the source";
        }
        for (size_t i = 0, n = srcAttrs.size(); i < n; i++)
        {
            if(srcAttrs[i].getType() != dstAttrs[i].getType())
            {
                throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_ATTRIBUTE_TYPE)
                    << srcAttrs[i].getName() << srcAttrs[i].getType() << dstAttrs[i].getType();
            }

            //can't store nulls into a non-nullable attribute
            if(!dstAttrs[i].isNullable() && srcAttrs[i].isNullable())
            {
                throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_ATTRIBUTE_FLAGS)
                   << srcAttrs[i].getName();
            }
        }

        //Note: let us NOT add arrayID numbers to the schema - because we do not have our ArrayID yet.
        //We will get our ArrayID when we execute and create the array. Until then - don't bother.
        //Old store code adds the arrayID to the schema - but that's the arrayID of the previous version,
        //not the new version created by the op. A dangerous fallacy - stupid and unnecessary.
        return ArrayDesc(arrayName, dstDesc.getAttributes(), dstDesc.getDimensions(), dstDesc.getFlags());
    }