Exemplo n.º 1
0
    /**
     * Request a lock for all arrays that will be accessed by this operator.
     * Calls requestLock with the write lock over the target array (array inserted into)
     * @param query the query context
     */
    void inferArrayAccess(std::shared_ptr<Query>& query)
    {
        LogicalOperator::inferArrayAccess(query);
        SCIDB_ASSERT(_parameters.size() > 0);
        SCIDB_ASSERT(_parameters[0]->getParamType() == PARAM_ARRAY_REF);
        const string& arrayNameOrg = ((std::shared_ptr<OperatorParamReference>&)_parameters[0])->getObjectName();
        SCIDB_ASSERT(ArrayDesc::isNameUnversioned(arrayNameOrg));

        std::string arrayName;
        std::string namespaceName;
        query->getNamespaceArrayNames(arrayNameOrg, namespaceName, arrayName);

        ArrayDesc srcDesc;
        SCIDB_ASSERT(!srcDesc.isTransient());
        scidb::namespaces::Communicator::getArrayDesc(
            namespaceName, arrayName, SystemCatalog::ANY_VERSION, srcDesc);

        const SystemCatalog::LockDesc::LockMode lockMode =
            srcDesc.isTransient() ? SystemCatalog::LockDesc::XCL : SystemCatalog::LockDesc::WR;

        std::shared_ptr<SystemCatalog::LockDesc>  lock(
            make_shared<SystemCatalog::LockDesc>(
                namespaceName,
                arrayName,
                query->getQueryID(),
                Cluster::getInstance()->getLocalInstanceId(),
                SystemCatalog::LockDesc::COORD,
                lockMode));
        std::shared_ptr<SystemCatalog::LockDesc> resLock = query->requestLock(lock);
        SCIDB_ASSERT(resLock);
        SCIDB_ASSERT(resLock->getLockMode() >= SystemCatalog::LockDesc::WR);
    }
Exemplo n.º 2
0
InputArray::InputArray(ArrayDesc const& array,
                       string const& format,
                       boost::shared_ptr<Query>& query,
                       bool emptyMode,
                       bool enforceDataIntegrity,
                       int64_t maxCnvErrors,
                       string const& shadowArrayName,
                       bool parallel)
:     SinglePassArray(array),
      _chunkLoader(ChunkLoader::create(format)),
      _currChunkIndex(0),
      strVal(TypeLibrary::getType(TID_STRING)),
      emptyTagAttrID(array.getEmptyBitmapAttribute() != NULL
                     ? array.getEmptyBitmapAttribute()->getId()
                     : INVALID_ATTRIBUTE_ID),
      nLoadedCells(0),
      nLoadedChunks(0),
      nErrors(0),
      maxErrors(maxCnvErrors),
      state(emptyMode ? S_Empty : S_Normal),
      nAttrs(array.getAttributes(true).size()),
      parallelLoad(parallel),
      _enforceDataIntegrity(enforceDataIntegrity)
    {
        SCIDB_ASSERT(query);
        _query=query;
        myInstanceID = query->getInstanceID();

        SCIDB_ASSERT(_chunkLoader);   // else inferSchema() messed up
        _chunkLoader->bind(this, query);

        if (!shadowArrayName.empty()) {
            shadowArray.reset(new MemArray(generateShadowArraySchema(array, shadowArrayName), query));
        }
    }
Exemplo n.º 3
0
    /**
     * Perform operator-specific checks of input and return the shape of the output. Currently,
     * the output array must exist.
     * @param schemas the shapes of the input arrays
     * @param query the query context
     */
    ArrayDesc inferSchema(std::vector< ArrayDesc> schemas, std::shared_ptr< Query> query)
    {
        SCIDB_ASSERT(schemas.size() == 1);
        SCIDB_ASSERT(_parameters.size() == 1);

        string arrayNameOrg =
			((std::shared_ptr<OperatorParamReference>&)_parameters[0])->getObjectName();
        SCIDB_ASSERT(ArrayDesc::isNameUnversioned(arrayNameOrg));

        //Ensure attributes names uniqueness.

        std::string arrayName;
        std::string namespaceName;
        query->getNamespaceArrayNames(arrayNameOrg, namespaceName, arrayName);

        ArrayDesc dstDesc;
        ArrayDesc const& srcDesc = schemas[0];
        ArrayID arrayId = query->getCatalogVersion(namespaceName, arrayName);
        bool fArrayDesc = scidb::namespaces::Communicator::getArrayDesc(
            namespaceName, arrayName, arrayId, dstDesc, false);
        if (!fArrayDesc) {
            throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_ARRAY_DOESNT_EXIST) << arrayName;
        }
        ArrayDesc::checkConformity(srcDesc, dstDesc,
                                   ArrayDesc::IGNORE_PSCHEME |
                                   ArrayDesc::IGNORE_OVERLAP |
                                   ArrayDesc::IGNORE_INTERVAL); // allows auto-repart()

        SCIDB_ASSERT(dstDesc.getId() == dstDesc.getUAId());
        SCIDB_ASSERT(dstDesc.getName() == arrayName);
        SCIDB_ASSERT(dstDesc.getUAId() > 0);
        return dstDesc;
    }
Exemplo n.º 4
0
 ConcatArray::ConcatArray(ArrayDesc const& array, boost::shared_ptr<Array> const& left, boost::shared_ptr<Array> const& right)
 : DelegateArray(array, left),
   leftArray(left->getArrayDesc().getAttributes().size() == array.getAttributes().size() ? left : boost::shared_ptr<Array>(new NonEmptyableArray(left))),
   rightArray(right->getArrayDesc().getAttributes().size() == array.getAttributes().size() ? right : boost::shared_ptr<Array>(new NonEmptyableArray(right))),
   dims(desc.getDimensions())
 {
     Dimensions const& leftDimensions = left->getArrayDesc().getDimensions();
     Dimensions const& rightDimensions = right->getArrayDesc().getDimensions();
     lastLeft = leftDimensions[CONCAT_DIM].getEndMax();
     firstRight = rightDimensions[CONCAT_DIM].getStartMin();
     concatChunkInterval = leftDimensions[CONCAT_DIM].getChunkInterval() + leftDimensions[CONCAT_DIM].getChunkOverlap();
     size_t nDims = leftDimensions.size();
     if (leftDimensions[CONCAT_DIM].getChunkOverlap() != 0 
         || leftDimensions[CONCAT_DIM].getLength() % leftDimensions[CONCAT_DIM].getChunkInterval() != 0)
     {
         simpleAppend = false;
     }
     else
     {
         simpleAppend = true;            
         for (size_t i = 0; i < nDims; i++) { 
             if (leftDimensions[i].getChunkInterval() != rightDimensions[i].getChunkInterval()
                 || leftDimensions[i].getChunkOverlap() != rightDimensions[i].getChunkOverlap())
             {
                 simpleAppend = false;
                 break;
             }
         }
     }
 }
RemoteArray::RemoteArray(const ArrayDesc& arrayDesc, QueryID queryId, InstanceID instanceID)
: StreamArray(arrayDesc), _queryId(queryId), _instanceID(instanceID),
  _received(arrayDesc.getAttributes().size()),
  _messages(arrayDesc.getAttributes().size()),
  _requested(arrayDesc.getAttributes().size())
{
}
Exemplo n.º 6
0
 SplitArray::SplitArray(ArrayDesc const& desc,
                        const boost::shared_array<char>& src,
                        Coordinates const& from,
                        Coordinates const& till,
                        shared_ptr<Query>const& query)
 : DelegateArray(desc, shared_ptr<Array>(), true),
   _startingChunk(from),
   _from(from),
   _till(till),
   _size(from.size()),
   _src(src),
   _empty(false)
 {
     assert(query);
     _query = query;
     desc.getChunkPositionFor(_startingChunk);
     Dimensions const& dims = desc.getDimensions();
     for (size_t i = 0, n = dims.size(); i < n; i++) { 
         _size[i] = _till[i] - _from[i] + 1;
         if (_size[i] == 0) { 
             _empty = true;
         }
         if (_till[i] > dims[i].getEndMax()) { 
             _till[i] = dims[i].getEndMax();
         }
     }
 }
Exemplo n.º 7
0
 void fillUsedPlugins(const ArrayDesc& desc, vector<string>& plugins) const
 {
     for (size_t i = 0; i < desc.getAttributes().size(); i++) {
         const string& libName = TypeLibrary::getTypeLibraries().getObjectLibrary(desc.getAttributes()[i].getType());
         if (libName != "scidb")
             plugins.push_back(libName);
     }
 }
RemoteMergedArray::RemoteMergedArray(const ArrayDesc& arrayDesc, QueryID queryId, Statistics& statistics):
        MultiStreamArray(Query::getQueryByID(queryId)->getInstancesCount(), arrayDesc),
        _queryId(queryId),
        _received(arrayDesc.getAttributes().size(), vector<Semaphore>(getStreamsCount())),
        _messages(arrayDesc.getAttributes().size(), vector< boost::shared_ptr<MessageDesc> >(getStreamsCount())),
        _nextPositions(arrayDesc.getAttributes().size(), vector<Coordinates>(getStreamsCount())),
        _hasPositions(arrayDesc.getAttributes().size(), vector<bool>(getStreamsCount(), false))
{
    boost::shared_ptr<Query> query = Query::getQueryByID(queryId);
    _localArray = query->getCurrentResultArray();
}
Exemplo n.º 9
0
TupleArray::TupleArray(ArrayDesc const& schema, vector< boost::shared_ptr<Tuple> > const& data, Coordinate offset)
: desc(schema),
  start(schema.getDimensions()[0].getStart() + offset),
  end(start + offset + schema.getDimensions()[0].getLength() - 1),
  tuples(data), chunkSize(schema.getDimensions()[0].getChunkInterval())
{
    desc.cutOverlap();
    if (Coordinate(start + tuples.size()) <= end) {
        end = start + tuples.size() - 1;
    }
}
Exemplo n.º 10
0
 ArrayDesc InputArray::generateShadowArraySchema(ArrayDesc const& targetArray, string const& shadowArrayName)
 {
     Attributes const& srcAttrs = targetArray.getAttributes(true);
     size_t nAttrs = srcAttrs.size();
     Attributes dstAttrs(nAttrs+2);
     for (size_t i = 0; i < nAttrs; i++) {
         dstAttrs[i] = AttributeDesc(i, srcAttrs[i].getName(), TID_STRING,  AttributeDesc::IS_NULLABLE, 0);
     }
     dstAttrs[nAttrs] = AttributeDesc(nAttrs, "row_offset", TID_INT64, 0, 0);
     dstAttrs[nAttrs+1] = AttributeDesc(nAttrs+1, DEFAULT_EMPTY_TAG_ATTRIBUTE_NAME,
                                        TID_INDICATOR, AttributeDesc::IS_EMPTY_INDICATOR, 0);
     return ArrayDesc(shadowArrayName, dstAttrs, targetArray.getDimensions());
 }
ScanRLEArray::ScanRLEArray(ArrayDesc const& arr, std::string path) :
    RLEArray(arr), _dirPath(path), _maxChunkNo(0), logger(log4cxx::Logger::getLogger("scidb.query.ops.ScanRQArray"))
{
    filesystem::path full_path = filesystem::system_complete(filesystem::path(_dirPath));
    if (!filesystem::exists(full_path))
    {
        throw SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_CANT_OPEN_PATH) << _dirPath;
    }
    if (!filesystem::is_directory(full_path))
    {
        throw SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_DIRECTORY_EXPECTED) << _dirPath;
    }

    _maxChunkNo = arr.getNumberOfChunks() / arr.getAttributes().size();
}
Exemplo n.º 12
0
TupleArray::TupleArray(ArrayDesc const& schema, vector< boost::shared_ptr<ConstArrayIterator> > const& arrayIterators, size_t shift, size_t step)
: desc(schema),
  start(schema.getDimensions()[0].getStart()),
  end(schema.getDimensions()[0].getEndMax()),
  chunkSize(schema.getDimensions()[0].getChunkInterval())
{
    if (schema.getDimensions().size() != 1)
        throw USER_EXCEPTION(SCIDB_SE_EXECUTION, SCIDB_LE_MULTIDIMENSIONAL_ARRAY_NOT_ALLOWED);
    append(arrayIterators, shift, step);
    if (start == MIN_COORDINATE || end == MAX_COORDINATE) {
        start = 0;
        end = tuples.size()-1;
    } else if (Coordinate(start + tuples.size()) <= end) {
        end = start + tuples.size() - 1;
    }
}
	//param desc --> the input array schema
	inline ArrayDesc createWindowDesc(ArrayDesc const& desc)
	{
		//get dimensions for output array
		Dimensions const& dims = desc.getDimensions();
		Dimensions aggrDims(dims.size());
		for (size_t i = 0; i < dims.size(); i++)
		{
			DimensionDesc const& srcDim = dims[i];
			aggrDims[i] = DimensionDesc(srcDim.getBaseName(),
									    srcDim.getNamesAndAliases(),
								   	    srcDim.getStartMin(),
									    srcDim.getCurrStart(),
									    srcDim.getCurrEnd(),
									    srcDim.getEndMax(),
									    srcDim.getChunkInterval(),
									    0);
		}

		ArrayDesc output(desc.getName(), Attributes(), aggrDims);
		
		//get the aggregates, check if they make sense, make attributes for output array	
		//_parameters[0~dims.size()*2-1] --> window boundaries, already get in inferSchema
		for (size_t i = dims.size()*2; i < _parameters.size(); i++)
		{
			boost::shared_ptr<scidb::OperatorParam> param = _parameters[i];
			
			if ( param->getParamType() != PARAM_AGGREGATE_CALL) {
				throw USER_QUERY_EXCEPTION(SCIDB_SE_INFER_SCHEMA,
										   SCIDB_LE_OP_WINDOW_ERROR5,
										   _parameters[i]->getParsingContext());
			}
			addAggregatedAttribute( (shared_ptr<OperatorParamAggregateCall> &) param, desc, output, true);
		}

		if ( desc.getEmptyBitmapAttribute())			//?
		{
			AttributeDesc const* eAttr = desc.getEmptyBitmapAttribute();
			output.addAttribute(AttributeDesc(output.getAttributes().size(), 
						eAttr->getName(),
						eAttr->getType(),
						eAttr->getFlags(),
						eAttr->getDefaultCompressionMethod()));
		}

		return output;
	}
Exemplo n.º 14
0
const ArrayDesc& LogicalQueryPlanNode::inferTypes(std::shared_ptr< Query> query)
{
    std::vector<ArrayDesc> inputSchemas;
    ArrayDesc outputSchema;
    for (size_t i=0, end=_childNodes.size(); i<end; i++)
    {
        inputSchemas.push_back(_childNodes[i]->inferTypes(query));
    }
    outputSchema = _logicalOperator->inferSchema(inputSchemas, query);
    //FIXME: May be cover inferSchema method with another one and assign alias there?
    if (!_logicalOperator->getAliasName().empty())
    {
        outputSchema.addAlias(_logicalOperator->getAliasName());
    }
    _logicalOperator->setSchema(outputSchema);
    LOG4CXX_DEBUG(logger, "Inferred schema for operator " <<
                  _logicalOperator->getLogicalName() << ": " << outputSchema);
    return _logicalOperator->getSchema();
}
Exemplo n.º 15
0
FITSInputArray::FITSInputArray(ArrayDesc const& array, string const& filePath, uint32_t hdu, std::shared_ptr<Query>& query)
    : parser(filePath),
      hdu(hdu),
      desc(array),
      dims(array.getDimensions()),
      nDims(dims.size()),
      nAttrs(array.getAttributes(true).size()),
      values(nAttrs),
      chunks(nAttrs),
      chunkIterators(nAttrs),
      chunkIndex(0),
      chunkPos(nDims),
      query(query)
{
    initValueHolders();

    // Most initialization steps are only done later, when the first
    // chunk is requested by an iterator. See getChunkByIndex()
}
 MergeArray::MergeArray(ArrayDesc const& desc, vector< boost::shared_ptr<Array> > const& arrays)
 : DelegateArray(desc, arrays[0]),
   inputArrays(arrays)
 {
     for (size_t i = 0; i < inputArrays.size(); i++) { 
         if (inputArrays[i]->getArrayDesc().getAttributes().size() != desc.getAttributes().size()) { 
             inputArrays[i] = boost::shared_ptr<Array>(new NonEmptyableArray(inputArrays[i]));
         }
     }
 }
Exemplo n.º 17
0
    ArrayDesc inferSchema(std::vector<ArrayDesc> schemas, std::shared_ptr<Query> query)
    {
        assert(schemas.size() == 0);
        assert(_parameters.size() == 2);
        assert(((std::shared_ptr<OperatorParam>&)_parameters[0])->getParamType() == PARAM_ARRAY_REF);
        assert(((std::shared_ptr<OperatorParam>&)_parameters[1])->getParamType() == PARAM_ARRAY_REF);

        std::string oldArrayName;
        std::string oldNamespaceName;
        std::string newArrayName;
        std::string newNamespaceName;

        const string &oldArrayNameOrg =
            ((std::shared_ptr<OperatorParamReference>&)_parameters[0])->getObjectName();
        query->getNamespaceArrayNames(oldArrayNameOrg, oldNamespaceName, oldArrayName);

        const string &newArrayNameOrg =
            ((std::shared_ptr<OperatorParamReference>&)_parameters[1])->getObjectName();
        query->getNamespaceArrayNames(newArrayNameOrg, newNamespaceName, newArrayName);

        if(newNamespaceName != oldNamespaceName)
        {
            throw USER_QUERY_EXCEPTION(
                SCIDB_SE_INFER_SCHEMA, SCIDB_LE_CANNOT_RENAME_ACROSS_NAMESPACES,
                _parameters[1]->getParsingContext())
                << ArrayDesc::makeQualifiedArrayName(oldNamespaceName, oldArrayName)
                << ArrayDesc::makeQualifiedArrayName(newNamespaceName, newArrayName);
        }

        if (scidb::namespaces::Communicator::containsArray(newNamespaceName, newArrayName))
        {
            throw USER_QUERY_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_ARRAY_ALREADY_EXIST,
                _parameters[1]->getParsingContext()) << newArrayName;
        }
        ArrayDesc arrDesc;
        arrDesc.setDistribution(defaultPartitioning());
        arrDesc.setResidency(query->getDefaultArrayResidency());
        return arrDesc;
    }
Exemplo n.º 18
0
ApplyArray::ApplyArray(ArrayDesc const& desc,
                       boost::shared_ptr<Array> const& array,
                       vector<shared_ptr<Expression> > expressions,
                       const boost::shared_ptr<Query>& query,
                       bool tile) :
   DelegateArray(desc, array),
   _expressions(expressions),
   _attributeNullable(desc.getAttributes().size(), false),
   _runInTileMode(desc.getAttributes().size(), tile),
   _bindingSets(desc.getAttributes().size(), vector<BindInfo>(0))
{
    assert(query);
    _query=query;
    for(size_t i =0; i<expressions.size(); i++)
    {
        _attributeNullable[i] = desc.getAttributes()[i].isNullable();
        if (expressions[i].get())
        {
            _runInTileMode[i] = tile && expressions[i]->supportsTileMode();
            _bindingSets[i]= expressions[i]->getBindings();
        }
    }
}
	//param desc --> the input array schema
	ArrayDesc createWindowDesc(ArrayDesc const& desc)
	{
		//get dimensions for output array
		Attributes const &attrs = desc.getAttributes();
		/*
		Dimensions aggrDims(dims.size());
		for (size_t i = 0; i < dims.size(); i++)
		{
			DimensionDesc const& srcDim = dims[i];
			aggrDims[i] = DimensionDesc(srcDim.getBaseName(),
									    srcDim.getNamesAndAliases(),
								   	    srcDim.getStartMin(),
									    srcDim.getCurrStart(),
									    srcDim.getCurrEnd(),
									    srcDim.getEndMax(),
									    srcDim.getChunkInterval(),
									    0);
		}
		*/

		Attributes newAttributes;
		size_t n = 0;
		for (size_t i=desc.getDimensions().size()*2; i < _parameters.size()-1; i=i+2)
		{
			const AttributeDesc &attr = attrs[((boost::shared_ptr<OperatorParamReference>&)_parameters[i])->getObjectNo()]; 
			newAttributes.push_back(AttributeDesc(n, attr.getName(), 
												  attr.getType(), 
												  attr.getFlags(),
												  attr.getDefaultCompressionMethod(),
												  attr.getAliases()));
			

		}
	
		return ArrayDesc(desc.getName(), newAttributes, desc.getDimensions());
	}
    inline ArrayDesc createWindowDesc(ArrayDesc const& desc)
    {
        Dimensions const& dims = desc.getDimensions();
        Dimensions aggDims(dims.size());
        for (size_t i = 0, n = dims.size(); i < n; i++)
        {
            DimensionDesc const& srcDim = dims[i];
            aggDims[i] = DimensionDesc(srcDim.getBaseName(),
                                       srcDim.getNamesAndAliases(),
                                       srcDim.getStartMin(),
                                       srcDim.getCurrStart(),
                                       srcDim.getCurrEnd(),
                                       srcDim.getEndMax(),
                                       srcDim.getChunkInterval(), 
                                       0,
                                       srcDim.getType(),
                                       srcDim.getFlags(),
                                       srcDim.getMappingArrayName(),
                                       srcDim.getComment(),
                                       srcDim.getFuncMapOffset(),
                                       srcDim.getFuncMapScale());
        }

        ArrayDesc output (desc.getName(), Attributes(), aggDims);

        for (size_t i = dims.size() * 2, size = _parameters.size(); i < size; i++)
        {
            addAggregatedAttribute( (shared_ptr <OperatorParamAggregateCall> &) _parameters[i], desc, output);
        }

        if ( desc.getEmptyBitmapAttribute())
        {
            AttributeDesc const* eAtt = desc.getEmptyBitmapAttribute();
            output.addAttribute(AttributeDesc(output.getAttributes().size(), eAtt->getName(),
                eAtt->getType(), eAtt->getFlags(), eAtt->getDefaultCompressionMethod()));
        }

        return output;
    }
Exemplo n.º 21
0
    ArrayDesc inferSchema(std::vector< ArrayDesc> schemas, boost::shared_ptr< Query> query)
	{
		assert(schemas.size() == 1);

        ArrayDesc const& srcDesc = schemas[0];
        ArrayDesc dstDesc = ((boost::shared_ptr<OperatorParamSchema>&)_parameters[0])->getSchema();

        //Compile a desc of all possible attributes (aggregate calls first) and source dimensions
        ArrayDesc aggregationDesc (srcDesc.getName(), Attributes(), srcDesc.getDimensions());
        vector<string> aggregatedNames;

        //add aggregate calls first
        for (size_t i = 1; i < _parameters.size(); i++)
        {
            addAggregatedAttribute( (shared_ptr <OperatorParamAggregateCall>&) _parameters[i], srcDesc, aggregationDesc);
            string aggName =  aggregationDesc.getAttributes()[aggregationDesc.getAttributes().size()-1].getName();
            bool aggFound = false;
            BOOST_FOREACH(const AttributeDesc &dstAttr, dstDesc.getAttributes()) { 
                if (dstAttr.getName() == aggName) { 
                    aggFound = true;
                    break;
                }
            }
            if (!aggFound) { 
                throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_ATTRIBUTE_DOESNT_EXIST) << aggName << dstDesc.getName();
            }
            aggregatedNames.push_back(aggName);
        }

        //add other attributes
        BOOST_FOREACH(const AttributeDesc &srcAttr, srcDesc.getAttributes())
        {
            //if there's an attribute with same name as an aggregate call - skip the attribute
            bool found = false;
            BOOST_FOREACH(const AttributeDesc &aggAttr, aggregationDesc.getAttributes())
            {
                if( aggAttr.getName() == srcAttr.getName())
                {
                    found = true;
                }
            }

            if (!found)
            {
                aggregationDesc.addAttribute(AttributeDesc( aggregationDesc.getAttributes().size(),
                                                            srcAttr.getName(),
                                                            srcAttr.getType(),
                                                            srcAttr.getFlags(),
                                                            srcAttr.getDefaultCompressionMethod(),
                                                            srcAttr.getAliases(),
                                                            &srcAttr.getDefaultValue(),
                                                            srcAttr.getDefaultValueExpr(),
                                                            srcAttr.getVarSize()));
            }
        }

        //Ensure attributes names uniqueness.        
        if (!dstDesc.getEmptyBitmapAttribute())
            throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_REDIMENSION_ERROR1);
 
        BOOST_FOREACH(const AttributeDesc &dstAttr, dstDesc.getAttributes())
        {
            BOOST_FOREACH(const AttributeDesc &srcAttr, aggregationDesc.getAttributes())
            {
                if (srcAttr.getName() == dstAttr.getName())
                {
                    if (srcAttr.getType() != dstAttr.getType())
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_ATTRIBUTE_TYPE)
                            << srcAttr.getName() << srcAttr.getType() << dstAttr.getType();
                    }
                    if (!dstAttr.isNullable() && srcAttr.isNullable())
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_ATTRIBUTE_FLAGS)
                            << srcAttr.getName();
                    }

                    goto NextAttr;
                }
            }
            BOOST_FOREACH(const DimensionDesc &srcDim, aggregationDesc.getDimensions())
            {
                if (srcDim.hasNameAndAlias(dstAttr.getName()))
                {
                    if (dstAttr.getType() != TID_INT64)
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_DESTINATION_ATTRIBUTE_TYPE)
                            << dstAttr.getName() << TID_INT64;
                    }
                    if (dstAttr.getFlags() != 0)
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_DESTINATION_ATTRIBUTE_FLAGS)
                            << dstAttr.getName();
                    }

                    goto NextAttr;
                }
            }

            if (dstAttr.isEmptyIndicator() == false)
            {
                throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_UNEXPECTED_DESTINATION_ATTRIBUTE)
                    << dstAttr.getName();
            }
          NextAttr:;
        }
        
        Dimensions outputDims;
        size_t nNewDims = 0;
        BOOST_FOREACH(const DimensionDesc &dstDim, dstDesc.getDimensions())
        {
            if (dstDim.getChunkOverlap() > dstDim.getChunkInterval())
            {
                throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OVERLAP_CANT_BE_LARGER_CHUNK);
            }
            BOOST_FOREACH(const AttributeDesc &srcAttr, aggregationDesc.getAttributes())
            {
                if (dstDim.hasNameAndAlias(srcAttr.getName()))
                {
                    for (size_t i = 0; i< aggregatedNames.size(); i++)
                    {
                        if (srcAttr.getName() == aggregatedNames[i])
                            throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_REDIMENSION_ERROR2);
                    }
                    if ( !IS_INTEGRAL(srcAttr.getType())  || srcAttr.getType() == TID_UINT64 )
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_SOURCE_ATTRIBUTE_TYPE)
                            << srcAttr.getName() << TID_INT64;
                    }
                    outputDims.push_back(dstDim);
                    goto NextDim;
                }
            }
            BOOST_FOREACH(const DimensionDesc &srcDim, aggregationDesc.getDimensions())
            {
                if (srcDim.hasNameAndAlias(dstDim.getBaseName()))
                {
                    DimensionDesc outputDim = dstDim;
                    outputDims.push_back(outputDim);
                    goto NextDim;
                }
            }
            //one synthetic dimension allowed
            if (nNewDims++ != 0 || !aggregatedNames.empty() )
            {
                throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_UNEXPECTED_DESTINATION_DIMENSION) << dstDim.getBaseName();
            }
            outputDims.push_back(dstDim);
            NextDim:;
        }

        return ArrayDesc(srcDesc.getName(), dstDesc.getAttributes(), outputDims, dstDesc.getFlags());
	}
Exemplo n.º 22
0
  shared_ptr< Array > execute(vector< shared_ptr< Array> >& inputArrays, shared_ptr<Query> query) {
    shared_ptr<Array> outputArray(new MemArray(_schema, query));
    shared_ptr<Array> inputArray = inputArrays[0];
    ArrayDesc inputSchema = inputArray->getArrayDesc();

    // Get descriptor of two dimensions d and n.
    DimensionDesc dimsN = inputSchema.getDimensions()[0]; 
    DimensionDesc dimsD = inputSchema.getDimensions()[1];
    size_t n = dimsN.getCurrEnd() - dimsN.getCurrStart() + 1;
    // Note: the input data set should have d+1 dimensions (including Y)
    size_t d = dimsD.getCurrEnd() - dimsD.getCurrStart();
    size_t nStart = dimsN.getCurrStart();
    size_t dStart = dimsD.getCurrStart(); 

    // Get chunk size of n.
    size_t nChunkSize = dimsN.getChunkInterval();

    // Helps to accumulate the n and L.
    z_i[0] = 1.0;

    shared_ptr<ConstArrayIterator> inputArrayIter = inputArray->getConstIterator(0);
    Coordinates chunkPosition;

    size_t i, j, k, m;
    while(! inputArrayIter->end() ) {
      shared_ptr<ConstChunkIterator> chunkIter = inputArrayIter->getChunk().getConstIterator();
      chunkPosition = inputArrayIter->getPosition();
      for(i=chunkPosition[0]; i<chunkPosition[0] + nChunkSize; i++) {
        // In case the chunk is partially filled.
        if(i == n + nStart) {
          break;
        }
        for(j=chunkPosition[1], m=1; j<=chunkPosition[1]+d; j++, m++) {
          // In case the chunk is partially filled.
          if(j == d + 1 + dStart) {
            break;
          }
          z_i[m] = chunkIter->getItem().getDouble();
          ++(*chunkIter);
        }
        for(k=0; k<=d+1; ++k) {
        // This operator is not optimized for entries with value zero.
        // TODO: should use fabs(z_i[k]) < 10e-6
//          if(z_i[k] == 0.0) {
//            continue;
//          }
          for(m=0; m<=k; ++m) {
            Gamma[k][m] += z_i[k]*z_i[m];
          }
        }
      }
      ++(*inputArrayIter);
    }

    /**
     * The "logical" instance ID of the instance responsible for coordination of query.
     * COORDINATOR_INSTANCE if instance execute this query itself.
     */
    if(query->getInstancesCount() > 1) {
      if(query->getInstanceID() != 0) {
        // I am not the coordinator, I should send my Gamma matrix out.
        shared_ptr <SharedBuffer> buf ( new MemoryBuffer(NULL, sizeof(double) * (d+3) * (d+2) / 2) );
        double *Gammabuf = static_cast<double*> (buf->getData());
        for(size_t i=0; i<d+2; ++i) {
          for(size_t j=0; j<=i; ++j) {
            *Gammabuf = Gamma[i][j];
            ++Gammabuf;
          }
        }
        BufSend(0, buf, query);
        return outputArray;
      }
      else {
        // I am the coordinator, I should collect Gamma matrix from workers.
        for(InstanceID l = 1; l<query->getInstancesCount(); ++l) {
          shared_ptr<SharedBuffer> buf = BufReceive(l, query);
          double *Gammabuf = static_cast<double*> (buf->getData());
          for(size_t i=0; i<d+2; ++i) {
            for(size_t j=0; j<=i; ++j) {
              Gamma[i][j] += *Gammabuf;
              ++Gammabuf;
            }
          }
        }
      } // end if getInstanceID() != 0
    } //end if InstancesCount() > 1

    return writeGamma(d, query);
  }
 ReshapeArray::ReshapeArray(ArrayDesc const& desc, boost::shared_ptr<Array> const& array)
 : DelegateArray(desc, array),
   inDims(array->getArrayDesc().getDimensions()),
   outDims(desc.getDimensions())
 {
 } 
    shared_ptr< Array > execute(vector< shared_ptr< Array> >& inputArrays, shared_ptr<Query> query)
    {
    	
    	// I maintain the log of the operator in a local file named after Correlation_N.log, N is the instance ID.
    	stringstream logFileName;
    	logFileName << "/home/scidb/preselect_" << query->getInstanceID() << ".log";
    	FILE *logFile;
    	logFile = fopen(logFileName.str().c_str(), "w");
    	
        shared_ptr<Array> originalArray = inputArrays[0];
        shared_ptr<Array> correlationArray = inputArrays[1];
        
        ArrayDesc originalSchema = originalArray->getArrayDesc();
        ArrayDesc corrSchema = correlationArray->getArrayDesc();
        
        Dimensions originalDims = originalSchema.getDimensions();
        Dimensions corrDims = corrSchema.getDimensions();
        DimensionDesc originalDimsP = originalDims[1];
        DimensionDesc corrDimsP = corrDims[0];
        // Note the correlation array doesn't have Y column.
        Coordinate p = corrDimsP.getCurrLength();
        fprintf(logFile, "p = %ld\n # of chunk = %ld\n", p, corrSchema.getNumberOfChunks());
        fflush(logFile);
        shared_ptr<ConstArrayIterator> corrArrayIter = correlationArray->getIterator(0);
        if(! corrArrayIter->end() )
        {
        	correlation *corr = new correlation[p];
	        // The correlation array will always have only 1 chunk (we designed correlation array like this), so no loops here.
	        shared_ptr<ConstChunkIterator> corrChunkIter = corrArrayIter->getChunk().getConstIterator();
			for(Coordinate i=0; i<p; ++i)
			{
				corr[i].id = i+1;
				corr[i].corr = corrChunkIter->getItem().getDouble();
				//fprintf(logFile, "%d, %f\n", corr[i].id, corr[i].corr);
				++(*corrChunkIter);
			}
			//fflush(logFile);
			qsort(corr, p, sizeof(correlation), &comp);
			for(Coordinate i=0; i<p; ++i)
			{
				fprintf(logFile, "%d, %f\n", corr[i].id, corr[i].corr);
			}
			fflush(logFile);
			
			Coordinate d = ((boost::shared_ptr<OperatorParamPhysicalExpression>&)_parameters[0])->getExpression()->evaluate().getInt64();
	        fprintf(logFile, "d=%ld\n", d);
	        stringstream ss;
	        vector<string> names;
	        names.push_back("j");
	        vector<TypeId> types;
	        types.push_back(TID_INT64);
	        for(Coordinate i=0; i<d; ++i)
	        {
	        	ss << "j=" << corr[i].id << " or ";
	        }
	        ss << "j=" << p+1;
	        fprintf(logFile, "%s\n", ss.str().c_str());
	        fflush(logFile);
	        Expression e;
        	e.compile(ss.str(), names, types);
        	fclose(logFile);
        	boost::shared_ptr<scidb::Query> emptyQuery;
	        return boost::shared_ptr<Array>(new FilterArray(_schema, inputArrays[0], boost::make_shared<Expression>(e), emptyQuery, _tileMode));
        }
        else
        {
        	shared_ptr<Array> outputArray(new MemArray(_schema, query));
        	fclose(logFile);
        	return outputArray;
        }
        
    }
Exemplo n.º 25
0
 DBArray::DBArray(ArrayDesc const& desc, const boost::shared_ptr<Query>& query) 
 : 
 _desc(desc),
 _query(query)
 {
     _desc.setPartitioningSchema(SystemCatalog::getInstance()->getPartitioningSchema(desc.getId()));
     if (query) { 
         query->sharedLock(getRealName());
     }
 }
    ArrayDesc inferSchema(std::vector< ArrayDesc> schemas, boost::shared_ptr< Query> query)
	{
		assert(schemas.size() == 1);

        ArrayDesc const& srcDesc = schemas[0];
        
        ArrayDesc dstDesc = ((boost::shared_ptr<OperatorParamSchema>&)_parameters[0])->getSchema();

        //Compile a desc of all possible attributes (aggregate calls first) and source dimensions
        ArrayDesc aggregationDesc (srcDesc.getName(), Attributes(), srcDesc.getDimensions());
        vector<string> aggregatedNames;

        //add aggregate calls first
        for (size_t i = 1; i < _parameters.size(); i++)
        {
            addAggregatedAttribute( (shared_ptr <OperatorParamAggregateCall>&) _parameters[i], srcDesc, aggregationDesc);
            aggregatedNames.push_back(aggregationDesc.getAttributes()[aggregationDesc.getAttributes().size()-1].getName());
        }

        //add other attributes
        BOOST_FOREACH(const AttributeDesc &srcAttr, srcDesc.getAttributes())
        {
            //if there's an attribute with same name as an aggregate call - skip the attribute
            bool found = false;
            BOOST_FOREACH(const AttributeDesc &aggAttr, aggregationDesc.getAttributes())
            {
                if( aggAttr.getName() == srcAttr.getName())
                {
                    found = true;
                }
            }

            if (!found)
            {
                aggregationDesc.addAttribute(AttributeDesc( aggregationDesc.getAttributes().size(),
                                                            srcAttr.getName(),
                                                            srcAttr.getType(),
                                                            srcAttr.getFlags(),
                                                            srcAttr.getDefaultCompressionMethod(),
                                                            srcAttr.getAliases(),
                                                            &srcAttr.getDefaultValue(),
                                                            srcAttr.getDefaultValueExpr(),
                                                            srcAttr.getComment(),
                                                            srcAttr.getVarSize()));
            }
        }

        //Ensure attributes names uniqueness.        
        if (!dstDesc.getEmptyBitmapAttribute())
            throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_REDIMENSION_ERROR1);
 
        BOOST_FOREACH(const AttributeDesc &dstAttr, dstDesc.getAttributes())
        {
            BOOST_FOREACH(const AttributeDesc &srcAttr, aggregationDesc.getAttributes())
            {
                if (srcAttr.getName() == dstAttr.getName())
                {
                    if (srcAttr.getType() != dstAttr.getType())
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_ATTRIBUTE_TYPE)
                            << srcAttr.getName() << srcAttr.getType() << dstAttr.getType();
                    }
                    if (!dstAttr.isNullable() && srcAttr.isNullable())
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_ATTRIBUTE_FLAGS)
                            << srcAttr.getName();
                    }

                    goto NextAttr;
                }
            }
            BOOST_FOREACH(const DimensionDesc &srcDim, aggregationDesc.getDimensions())
            {
                if (srcDim.hasNameOrAlias(dstAttr.getName()))
                {
                    if (dstAttr.getType() != TID_INT64)
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_DESTINATION_ATTRIBUTE_TYPE)
                            << dstAttr.getName() << TID_INT64;
                    }
                    if (srcDim.getType() != TID_INT64)
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_SOURCE_DIMENSION_TYPE)
                            << dstAttr.getName() << TID_INT64;
                    }
                    if (dstAttr.getFlags() != 0)
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_DESTINATION_ATTRIBUTE_FLAGS)
                            << dstAttr.getName();
                    }

                    goto NextAttr;
                }
            }

            if (dstAttr.isEmptyIndicator() == false)
            {
                throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_UNEXPECTED_DESTINATION_ATTRIBUTE)
                    << dstAttr.getName();
            }
          NextAttr:;
        }
        
        BOOST_FOREACH(const DimensionDesc &dstDim, dstDesc.getDimensions())
        {
            if (dstDim.getChunkOverlap() != 0)
                throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_REDIMENSION_STORE_ERROR3);

            BOOST_FOREACH(const AttributeDesc &srcAttr, aggregationDesc.getAttributes())
            {
                if (dstDim.hasNameOrAlias(srcAttr.getName()))
                {
                    for (size_t i = 0; i< aggregatedNames.size(); i++)
                    {
                        if (srcAttr.getName() == aggregatedNames[i])
                            throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_REDIMENSION_ERROR2);
                    }
                    if (srcAttr.getType() != TID_INT64)
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_SOURCE_ATTRIBUTE_TYPE)
                            << srcAttr.getName() << TID_INT64;
                    }
                    if (srcAttr.getFlags() != 0)
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_SOURCE_ATTRIBUTE_FLAGS)
                           << srcAttr.getName() << TID_INT64;
                    }
                    if (dstDim.getType() != TID_INT64)
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_DESTINATION_DIMENSION_TYPE)
                            << srcAttr.getName() << TID_INT64;
                    }
                    goto NextDim;
                }
            }
            BOOST_FOREACH(const DimensionDesc &srcDim, aggregationDesc.getDimensions())
            {
                if (srcDim.hasNameOrAlias(dstDim.getBaseName()))
                {
                    if (dstDim.getType() != srcDim.getType()                   ||
                        dstDim.getStart() != srcDim.getStart()                 ||
                        dstDim.getLength() != srcDim.getLength()               ||
                        dstDim.getChunkInterval() != srcDim.getChunkInterval())
                    {
                        throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_DIMENSIONS_DONT_MATCH)
                            << srcDim.getBaseName() << dstDim.getBaseName();
                    }
                    goto NextDim;
                }
            }

            throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_UNEXPECTED_DESTINATION_DIMENSION) << dstDim.getBaseName();
          NextDim:;
        }

        return ArrayDesc(srcDesc.getName()+"_redimension", dstDesc.getAttributes(), dstDesc.getDimensions(), dstDesc.getFlags());
	}
Exemplo n.º 27
0
    void InputArray::redistributeShadowArray(boost::shared_ptr<Query> const& query)
    {
        SCIDB_ASSERT(shadowArray);

        //All arrays are currently stored as round-robin. Let's store shadow arrays round-robin as well
        //TODO: revisit this when we allow users to store arrays with specified distributions
        PartitioningSchema ps = psHashPartitioned;
        ArrayDesc shadowArrayDesc = shadowArray->getArrayDesc();
        string shadowArrayVersionName;

        LOG4CXX_DEBUG(logger, "Redistribute shadow array " << shadowArrayDesc.getName());

        if (! query->isCoordinator()) {
            // worker
            string shadowArrayName = shadowArrayDesc.getName();
            SCIDB_ASSERT(ArrayDesc::isNameUnversioned(shadowArrayName));

            shared_ptr<SystemCatalog::LockDesc> lock(new SystemCatalog::LockDesc(shadowArrayName,
                                                                                 query->getQueryID(),
                                                                                 Cluster::getInstance()->getLocalInstanceId(),
                                                                                 SystemCatalog::LockDesc::WORKER,
                                                                                 SystemCatalog::LockDesc::WR));

            shared_ptr<Query::ErrorHandler> ptr(new UpdateErrorHandler(lock));
            query->pushErrorHandler(ptr);

            Query::Finalizer f = bind(&UpdateErrorHandler::releaseLock, lock, _1);
            query->pushFinalizer(f);
            SystemCatalog::ErrorChecker errorChecker = bind(&Query::validate, query);
            if (!SystemCatalog::getInstance()->lockArray(lock, errorChecker)) {
                throw USER_EXCEPTION(SCIDB_SE_SYSCAT, SCIDB_LE_CANT_INCREMENT_LOCK) << shadowArrayName;
            }

            ArrayDesc desc;
            bool arrayExists = SystemCatalog::getInstance()->getArrayDesc(shadowArrayName, desc, false);
            VersionID lastVersion = 0;
            if (arrayExists) {
                lastVersion = SystemCatalog::getInstance()->getLastVersion(desc.getId());
            }
            VersionID version = lastVersion+1;

            lock->setArrayVersion(version);
            bool rc = SystemCatalog::getInstance()->updateArrayLock(lock);
            SCIDB_ASSERT(rc);

            LOG4CXX_DEBUG(logger, "Use version " << version << " of shadow array " << shadowArrayName);
            shadowArrayVersionName = ArrayDesc::makeVersionedName(shadowArrayName, version);
        } else {
            // coordinator
            shadowArrayVersionName = shadowArrayDesc.getName();
            SCIDB_ASSERT(ArrayDesc::isNameVersioned(shadowArrayVersionName));
        }

        shared_ptr<Array> persistentShadowArray(DBArray::newDBArray(shadowArrayVersionName, query));
        ArrayDesc const& dstArrayDesc = persistentShadowArray->getArrayDesc();

        query->getReplicationContext()->enableInboundQueue(dstArrayDesc.getId(),
                                                           persistentShadowArray);

        set<Coordinates, CoordinatesLess> newChunkCoordinates;
        redistributeToArray(shadowArray, persistentShadowArray,  &newChunkCoordinates,
                            query, ps,
                            ALL_INSTANCE_MASK,
                            boost::shared_ptr <DistributionMapper>(),
                            0,
                            shared_ptr<PartitioningSchemaData>());

        StorageManager::getInstance().removeDeadChunks(dstArrayDesc, newChunkCoordinates, query);
        query->getReplicationContext()->replicationSync(dstArrayDesc.getId());
        query->getReplicationContext()->removeInboundQueue(dstArrayDesc.getId());
        StorageManager::getInstance().flush();
        PhysicalBoundaries bounds = PhysicalBoundaries::createFromChunkList(persistentShadowArray,
                                                                            newChunkCoordinates);
        SystemCatalog::getInstance()->updateArrayBoundaries(dstArrayDesc, bounds);

        // XXX TODO: add: getInjectedErrorListener().check();
    }
  shared_ptr< Array > execute(vector< shared_ptr< Array> >& inputArrays, shared_ptr<Query> query) {
    shared_ptr<Array> outputArray(new MemArray(_schema, query));
    shared_ptr<Array> inputArray = inputArrays[0];
    ArrayDesc inputSchema = inputArray->getArrayDesc();

    // Get descriptor of two dimensions d and n.
    DimensionDesc dimsN = inputSchema.getDimensions()[0]; 
    DimensionDesc dimsD = inputSchema.getDimensions()[1];
    size_t n = dimsN.getCurrLength();
    // Note: the input data set should have d+1 dimensions (including Y)
    size_t d = dimsD.getCurrLength() - 1;
    nlq.N = n;
    nlq.d = d;
    shared_ptr<ConstArrayIterator> inputArrayIter = inputArray->getConstIterator(0);
    Coordinates cellPosition;

    size_t i;
    double value;
    while(! inputArrayIter->end() ) {
      shared_ptr<ConstChunkIterator> chunkIter = inputArrayIter->getChunk().getConstIterator();
      // For each cell in the current chunk.
      // This will skip the empty cells.
      while(! chunkIter->end() ) {
        cellPosition = chunkIter->getPosition();
        value = chunkIter->getItem().getDouble();
        nlq.L[ cellPosition[1] ] += value;
        nlq.Q[ cellPosition[1] ] += value * value;
        ++(*chunkIter);
      }
      ++(*inputArrayIter);
    }

    /**
     * The "logical" instance ID of the instance responsible for coordination of query.
     * COORDINATOR_INSTANCE if instance execute this query itself.
     */
    if(query->getInstancesCount() > 1) {
      if(query->getInstanceID() != 0) {
        // I am not the coordinator, I should send my Gamma matrix out.
        shared_ptr <SharedBuffer> buf ( new MemoryBuffer(NULL, sizeof(double) * (d*2+2) ));
        double *Gammabuf = static_cast<double*> (buf->getData());
        for(i=1; i<=d+1; ++i) {
          *Gammabuf = nlq.L[i];
          ++Gammabuf;
        }
        for(i=1; i<=d+1; ++i) {
          *Gammabuf = nlq.Q[i];
          ++Gammabuf;
        }
        BufSend(0, buf, query);
        return outputArray;
      }
      else {
        // I am the coordinator, I should collect Gamma matrix from workers.
        for(InstanceID l = 1; l<query->getInstancesCount(); ++l) {
          shared_ptr<SharedBuffer> buf = BufReceive(l, query);
          double *Gammabuf = static_cast<double*> (buf->getData());
          for(i=1; i<=d+1; ++i) {
            nlq.L[i] += *Gammabuf;
            ++Gammabuf;
          }
          for(i=1; i<=d+1; ++i) {
            nlq.Q[i] += *Gammabuf;
            ++Gammabuf;
          }
        }
      }// end if getInstanceID() != 0
    }//end if InstancesCount() > 1

    return writeGamma(query);
  }
  shared_ptr< Array > execute(vector< shared_ptr< Array> >& inputArrays, shared_ptr<Query> query)
  {
    shared_ptr<Array> outputArray(new MemArray(_schema, query));
    shared_ptr<Array> inputArray = inputArrays[0];
    ArrayDesc inputSchema = inputArray->getArrayDesc();
    // Get descriptor of two dimensions d and n.
    DimensionDesc dimsN = inputSchema.getDimensions()[0]; 
    DimensionDesc dimsD = inputSchema.getDimensions()[1];
    int64_t n = dimsN.getCurrEnd() - dimsN.getCurrStart() + 1;
    // Note: the input data set should have d+1 dimensions (including Y)
    d = dimsD.getCurrEnd() - dimsD.getCurrStart();
    idY = d+1;
    int64_t nStart = dimsN.getCurrStart();
    int64_t dStart = dimsD.getCurrStart();
    // Get chunk size of n.
    int64_t nChunkSize = dimsN.getChunkInterval();
    k = ((shared_ptr<OperatorParamPhysicalExpression>&)_parameters[0])->getExpression()->evaluate().getInt64();
    if (_parameters.size() == 2) {
      idY = ((shared_ptr<OperatorParamPhysicalExpression>&)_parameters[1])->getExpression()->evaluate().getInt64();
    }

    #ifdef DEBUG
      stringstream ss;
      ss << getenv("HOME") << "/groupdiagdensegamma-instance-" << query->getInstanceID() << ".log";
      log.open(ss.str().c_str(), ios::out);
      log << "n = " << n << endl << "d = " << d << endl << "k = " << k << endl;
      log << "nStart = " << nStart << endl << "dStart = " << dStart << endl;
      log << "nChunkSize = " << nChunkSize << endl;
      log << "idY = " << idY << endl;
    #endif

    shared_ptr<ConstArrayIterator> inputArrayIter = inputArray->getConstIterator(0);
    Coordinates chunkPosition;
    int64_t i, j, k, m, l;
    double value;
    NLQ tmp;
    map<double, struct NLQ>::iterator it;

    while(! inputArrayIter->end() ) {
      shared_ptr<ConstChunkIterator> chunkIter = inputArrayIter->getChunk().getConstIterator();
      chunkPosition = inputArrayIter->getPosition();
      #ifdef DEBUG
        log << "Getting into chunk (" << chunkPosition[0] << ", " << chunkPosition[1] << ")." << endl;
      #endif
      for(i=chunkPosition[0]; i<chunkPosition[0] + nChunkSize; i++) {
        if(i == n + nStart) {
          #ifdef DEBUG
            log << "Reaching row " << i << ", exiting." << endl;
          #endif
          break;
        }
        for(j=chunkPosition[1], m=1; j<=chunkPosition[1]+d; j++, m++) {
          if(j == d + 1 + dStart) {
            #ifdef DEBUG
              log << "Reaching column " << j << ", exiting." << endl;
            #endif
            break;
          }
          value = chunkIter->getItem().getDouble();
          tmp.L[m] = value;
          tmp.Q[m] = value * value;
          ++(*chunkIter);
        }
        double Y = tmp.L[idY];
        it = nlq.find(Y);
        if (it == nlq.end()) {
          #ifdef DEBUG
            log << "Cannot find NLQ entry for class " << Y << ", creating new." << endl;
          #endif
          nlq[Y].N = 1;
          nlq[Y].groupId = Y;
        }
        else {
          nlq[Y].N++;
        }
        for (k=1, l=1; k<=d+1; k++) {
          if (k == idY) {
            continue;
          }
          nlq[Y].L[l] += tmp.L[k];
          nlq[Y].Q[l] += tmp.Q[k];
          l++;
        }
        nlq[Y].L[d+1] += tmp.L[idY];
        nlq[Y].Q[d+1] += tmp.Q[idY];
      }
      ++(*inputArrayIter);
    }

    /**
     * The "logical" instance ID of the instance responsible for coordination of query.
     * COORDINATOR_INSTANCE if instance execute this query itself.
     */
    size_t localClassCount = nlq.size();
    #ifdef DEBUG
      log << "localClassCount = " << localClassCount << endl;
    #endif
    if(query->getInstancesCount() > 1) {
      if(query->getInstanceID() != 0) {
        // I am not the coordinator, I should send my NLQ out.
        #ifdef DEBUG
          log << "I am not the coordinator, I should send my NLQ out." << endl;
        #endif
        shared_ptr <SharedBuffer> buf ( new MemoryBuffer(NULL, sizeof(struct NLQ) * localClassCount ));
        struct NLQ *NLQbuf = static_cast<struct NLQ*> (buf->getData());
        for(it = nlq.begin(); it != nlq.end(); it++) {
          *NLQbuf = it->second;
          ++NLQbuf;
        }
        BufSend(0, buf, query);
        #ifdef DEBUG
          log << "Exiting." << endl;
        #endif
        return outputArray;
      }
      else {
        // I am the coordinator, I should collect NLQ from workers.
        #ifdef DEBUG
          log << "I am the coordinator, I should collect NLQ from workers." << endl;
        #endif
        for(InstanceID l = 1; l<query->getInstancesCount(); ++l) {
          shared_ptr<SharedBuffer> buf = BufReceive(l, query);
          if(! buf) {
            #ifdef DEBUG
              log << "Nothing from instance " << l << ", continue." << endl;
            #endif
            continue;
          }
          int64_t remoteClassCount = buf->getSize() / sizeof(struct NLQ);
          struct NLQ* NLQbuf = static_cast<struct NLQ*> (buf->getData());
          #ifdef DEBUG
            log << "Received " << remoteClassCount << " entries from instance " << l << endl;
          #endif
          for(i=0; i<remoteClassCount; ++i) {
            it = nlq.find(NLQbuf->groupId);
            if( it == nlq.end() ) {
              #ifdef DEBUG
                log << "Cannot find NLQ entry for class " << NLQbuf->groupId << ", creating new." << endl;
              #endif
              nlq[NLQbuf->groupId] = *NLQbuf;
            }
            else {
              it->second.N += NLQbuf->N;
              for(j=1; j<=d+1; ++j) {
                it->second.L[j] += NLQbuf->L[j];
                it->second.Q[j] += NLQbuf->Q[j];
              }
            }
            ++NLQbuf;
          }
          #ifdef DEBUG
            log << "Merge complete." << endl;
          #endif
        }
      }// end if getInstanceID() != 0
    }//end if InstancesCount() > 1

    return writeGamma(query);
  }
Exemplo n.º 30
0
inline bool hasSingleAttribute(ArrayDesc const& desc)
{
    return desc.getAttributes().size() == 1 || (desc.getAttributes().size() == 2 && desc.getAttributes()[1].isEmptyIndicator());
}