RowCollection<Group,Hash>::RowCollection(boost::shared_ptr<Query> const& query, const string& name, const Attributes& attributes, size_t chunkSize) : _query(query), _attributes(attributes), _chunkSize(chunkSize), _sizeBuffered(0), _mode(RowCollectionModeAppend) { assert(!attributes.empty()); assert(chunkSize >= 2); // Use (CONFIG_MEM_ARRAY_THRESHOLD / 10) as the #bytes the unflushed items may have. _maxSizeBuffered = Config::getInstance()->getOption<size_t>(CONFIG_MEM_ARRAY_THRESHOLD) * MiB / 10; // Push the empty tag Attributes attributesWithET(attributes); attributesWithET.push_back(AttributeDesc(attributes.size(), DEFAULT_EMPTY_TAG_ATTRIBUTE_NAME, TID_BOOL, AttributeDesc::IS_EMPTY_INDICATOR, 0)); // get the schema Dimensions dims(2); dims[0] = DimensionDesc("Row", 0, MAX_COORDINATE, 1, 0); dims[1] = DimensionDesc("Column", 0, MAX_COORDINATE, _chunkSize, 0); ArrayDesc schema(name, attributesWithET, dims); // create a MemArray _theArray = make_shared<MemArray>(schema,query); // get the array iterators _arrayIterators.reserve(attributes.size()); for (size_t t=0; t<attributes.size(); ++t) { _arrayIterators.push_back(_theArray->getIterator(t)); } }
ArrayDesc inferSchema(std::vector< ArrayDesc> schemas, boost::shared_ptr< Query> query) { SplitSettings settings (_parameters, true, query); //construct and check to ensure settings are legit vector<AttributeDesc> attributes(1); attributes[0] = AttributeDesc((AttributeID)0, "value", TID_STRING, 0, 0); vector<DimensionDesc> dimensions(2); dimensions[0] = DimensionDesc("source_instance_id", 0, 0, MAX_COORDINATE, MAX_COORDINATE, 1, 0); dimensions[1] = DimensionDesc("chunk_no", 0, 0, MAX_COORDINATE, MAX_COORDINATE, 1, 0); return ArrayDesc("split", attributes, dimensions); }
ArrayDesc inferSchema(std::vector< ArrayDesc> schemas, boost::shared_ptr< Query> query) { assert(schemas.size() == 1); ArrayDesc const& desc = schemas[0]; Dimensions const& dims = desc.getDimensions(); Attributes const& attrs = desc.getAttributes(); AttributeID aid = 0; if (_parameters.size() >= 1) { aid = ((boost::shared_ptr<OperatorParamReference>&)_parameters[0])->getObjectNo(); } AggregatePtr maxAggregate = AggregateLibrary::getInstance()->createAggregate("max", TypeLibrary::getType(attrs[aid].getType())); Attributes aggAttrs(1); aggAttrs[0] = AttributeDesc((AttributeID)0, attrs[aid].getName() + "_max", maxAggregate->getResultType().typeId(), AttributeDesc::IS_NULLABLE, 0); if (_parameters.size() <= 1) { Dimensions aggDims(1); aggDims[0] = DimensionDesc("i", 0, 0, 0, 0, 1, 0); return ArrayDesc(desc.getName(), aggAttrs, aggDims); } else { vector<int> groupBy(_parameters.size()-1); for (size_t i = 0; i < groupBy.size(); i++) { groupBy[i] = ((boost::shared_ptr<OperatorParamReference>&)_parameters[i + 1])->getObjectNo(); } Dimensions aggDims(groupBy.size()); for (size_t i = 0, n = aggDims.size(); i < n; i++) { DimensionDesc const& srcDim = dims[groupBy[i]]; aggDims[i] = DimensionDesc( srcDim.getBaseName(), srcDim.getStartMin(), srcDim.getCurrStart(), srcDim.getCurrEnd(), srcDim.getEndMax(), i == 0 && groupBy[i] == 0 ? srcDim.getChunkInterval() : srcDim.getCurrLength(), 0, srcDim.getType(), srcDim.getFlags(), srcDim.getMappingArrayName(), srcDim.getComment(), srcDim.getFuncMapOffset(), srcDim.getFuncMapScale()); } return ArrayDesc(desc.getName(), aggAttrs, aggDims); } }
ArrayDesc inferSchema(std::vector< ArrayDesc> schemas, boost::shared_ptr< Query> query) { Attributes outputAttrs; outputAttrs.push_back(AttributeDesc(0, "dummy", TID_DOUBLE, AttributeDesc::IS_NULLABLE, 0)); Dimensions outputDims; outputDims.push_back(DimensionDesc("i",0,0,1,0)); return ArrayDesc("test_cache", outputAttrs, outputDims); }
ArrayDesc inferSchema(std::vector< ArrayDesc> schemas, boost::shared_ptr< Query> query) { assert(schemas.size() == 2); ArrayDesc const& patternDesc = schemas[0]; ArrayDesc const& catalogDesc = schemas[1]; Attributes const& catalogAttributes = catalogDesc.getAttributes(true); Dimensions const& catalogDimensions = catalogDesc.getDimensions(); Attributes const& patternAttributes = patternDesc.getAttributes(true); Dimensions resultDimensions = patternDesc.getDimensions(); size_t totalAttributes = catalogAttributes.size() + patternAttributes.size() + 1 + catalogDimensions.size(); Attributes matchAttributes(totalAttributes); if (catalogDimensions.size() != resultDimensions.size()) { stringstream left, right; printDimNames(left, resultDimensions); printDimNames(right, catalogDimensions); throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_DIMENSION_COUNT_MISMATCH) << "match" << left.str() << right.str(); } for (size_t i = 0, n = catalogDimensions.size(); i < n; i++) { if (!(catalogDimensions[i].getStartMin() == resultDimensions[i].getStartMin() && catalogDimensions[i].getChunkInterval() == resultDimensions[i].getChunkInterval() && catalogDimensions[i].getChunkOverlap() == resultDimensions[i].getChunkOverlap())) { // XXX To do: implement requiresRepart() method, remove interval/overlap checks // above, use SCIDB_LE_START_INDEX_MISMATCH here. throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_ARRAYS_NOT_CONFORMANT); } } size_t j = 0; for (size_t i = 0, n = patternAttributes.size(); i < n; i++, j++) { AttributeDesc const& attr = patternAttributes[i]; matchAttributes[j] = AttributeDesc(j, attr.getName(), attr.getType(), attr.getFlags(), attr.getDefaultCompressionMethod(), attr.getAliases(), &attr.getDefaultValue(), attr.getDefaultValueExpr()); } for (size_t i = 0, n = catalogAttributes.size(); i < n; i++, j++) { AttributeDesc const& attr = catalogAttributes[i]; matchAttributes[j] = AttributeDesc(j, "match_" + attr.getName(), attr.getType(), attr.getFlags(), attr.getDefaultCompressionMethod(), attr.getAliases(), &attr.getDefaultValue(), attr.getDefaultValueExpr()); } for (size_t i = 0, n = catalogDimensions.size(); i < n; i++, j++) { matchAttributes[j] = AttributeDesc(j, "match_" + catalogDimensions[i].getBaseName(), TID_INT64, 0, 0); } matchAttributes[j] = AttributeDesc(j, DEFAULT_EMPTY_TAG_ATTRIBUTE_NAME, TID_INDICATOR, AttributeDesc::IS_EMPTY_INDICATOR, 0); int64_t maxCollisions = evaluate(((boost::shared_ptr<OperatorParamLogicalExpression>&)_parameters[1])->getExpression(), query, TID_INT64).getInt64(); if (maxCollisions <= 0 || (int32_t)maxCollisions != maxCollisions) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_OPERATOR_ARGUMENT2) << "positive"; } resultDimensions.push_back(DimensionDesc("collision", 0, 0, maxCollisions-1, maxCollisions-1, (uint32_t)maxCollisions, 0)); return ArrayDesc("match", matchAttributes, resultDimensions); }
ArrayDesc inferSchema(std::vector< ArrayDesc> schemas, shared_ptr< Query> query) { ArrayDesc const& inputSchema = schemas[0]; //FastCountSettings settings (_parameters, true, query); vector<DimensionDesc> dimensions(1); size_t const nInstances = query->getInstancesCount(); dimensions[0] = DimensionDesc("i", 0, 0, CoordinateBounds::getMax(), CoordinateBounds::getMax(), 1, 0); // dimensions[0] = DimensionDesc("i", 0, 0, nInstances-1, nInstances-1, 1, 0); vector<AttributeDesc> attributes; attributes.push_back(AttributeDesc((AttributeID)0, "count", TID_UINT64, AttributeDesc::IS_NULLABLE, 0)); return ArrayDesc("fast_count", attributes, dimensions, defaultPartitioning(), inputSchema.getResidency(),false); }
ArrayDesc inferSchema(vector<ArrayDesc> inputSchemas, shared_ptr<Query> query) { Attributes atts(1); atts[0] = AttributeDesc((AttributeID)0, "success", TID_BOOL, 0, CompressorType::NONE ); Dimensions dims(1); dims[0] = DimensionDesc("i", 0, 0, 0, 0, 1, 0); //#ifdef CPP11 return ArrayDesc("", atts, dims, defaultPartitioning(), query->getDefaultArrayResidency()); //#else //return ArrayDesc("", atts, dims); //#endif }
ArrayDesc inferSchema(std::vector< ArrayDesc> schemas, boost::shared_ptr< Query> query) { Attributes atts(1); TypeId type = schemas[0].getAttributes()[0].getType(); AttributeDesc multAttr((AttributeID)0, "matricize", type, 0, 0); atts[0] = multAttr; Dimensions const& inputDims = schemas[0].getDimensions(); Coordinate ndims = inputDims.size(); Coordinate rowmode = 0; Coordinate colmode = 1; if (_parameters.size() == 1) { rowmode = evaluate(((boost::shared_ptr<OperatorParamLogicalExpression>&)_parameters[0])->getExpression(), query, TID_INT64).getInt64(); if(rowmode < 1 || rowmode > ndims) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_MATRICIZE_ERROR1); } if(rowmode < ndims) colmode = ndims; else colmode = ndims-1; rowmode -= 1; colmode -= 1; } else { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_MATRICIZE_ERROR2); } Coordinate len = 1; Coordinate chunklen = 1; for(Coordinate i=ndims-1;i>=0;i--) { if(i == rowmode) continue; len *= inputDims[i].getLength(); chunklen *= inputDims[i].getChunkInterval(); } Dimensions dims(2); dims[0] = inputDims[rowmode]; dims[1] = DimensionDesc(inputDims[rowmode].getBaseName(), 1, len, chunklen, 0, inputDims[rowmode].getType(), inputDims[rowmode].getFlags(), inputDims[rowmode].getMappingArrayName(), inputDims[rowmode].getComment()); return ArrayDesc("Matricize",atts,dims); }
inline ArrayDesc createWindowDesc(ArrayDesc const& desc) { Dimensions const& dims = desc.getDimensions(); Dimensions aggDims(dims.size()); for (size_t i = 0, n = dims.size(); i < n; i++) { DimensionDesc const& srcDim = dims[i]; aggDims[i] = DimensionDesc(srcDim.getBaseName(), srcDim.getNamesAndAliases(), srcDim.getStartMin(), srcDim.getCurrStart(), srcDim.getCurrEnd(), srcDim.getEndMax(), srcDim.getChunkInterval(), 0, srcDim.getType(), srcDim.getFlags(), srcDim.getMappingArrayName(), srcDim.getComment(), srcDim.getFuncMapOffset(), srcDim.getFuncMapScale()); } ArrayDesc output (desc.getName(), Attributes(), aggDims); for (size_t i = dims.size() * 2, size = _parameters.size(); i < size; i++) { addAggregatedAttribute( (shared_ptr <OperatorParamAggregateCall> &) _parameters[i], desc, output); } if ( desc.getEmptyBitmapAttribute()) { AttributeDesc const* eAtt = desc.getEmptyBitmapAttribute(); output.addAttribute(AttributeDesc(output.getAttributes().size(), eAtt->getName(), eAtt->getType(), eAtt->getFlags(), eAtt->getDefaultCompressionMethod())); } return output; }
/** * Test sort array once. * The method sets the chunk limit to the indicated number, * then tries to create a chunk of the inidicated size and * type, using the indicated mode. If "expectFail" is true * then the method looks for the "CHUNK_TOO_LARGE" exception, * and fails if it does not see it. If "expectFail" is false, * the method does the opposite. Before exiting, the method * always resets the chunk limit to the original value. * * @param[in] query * @param[in] limit the desired chunk limit (as a string) * @param[in] type the value type * @param[in] count how many values * @param[in] mode iteration mode * @param[in] expectFail is an error expected? * * @throw SCIDB_SE_INTERNAL::SCIDB_LE_UNITTEST_FAILED */ void testOnce_ChunkLimit(std::shared_ptr<Query>& query, string const& limit, TypeId const& type, int count, int mode, bool expectFail) { bool failed = false; LOG4CXX_DEBUG(logger, "ChunkLimit UnitTest Attempt [type=" << type << "][count=" << count << "][mode=" << mode << "][expectFail=" << expectFail << "]"); // Array schema vector<AttributeDesc> attributes(1); attributes[0] = AttributeDesc((AttributeID)0, "X", type, AttributeDesc::IS_NULLABLE, 0); vector<DimensionDesc> dimensions(1); dimensions[0] = DimensionDesc(string("dummy_dimension"), 0, count, count, 0); ArrayDesc schema("dummy_array", addEmptyTagAttribute(attributes), dimensions, defaultPartitioning(), query->getDefaultArrayResidency()); // Test array std::shared_ptr<MemArray> array(new MemArray(schema, query)); // set the chunk size limit std::string oldLimit; try { oldLimit = Config::getInstance()->setOptionValue("chunk-size-limit-mb", limit); } catch (Exception const& e) { LOG4CXX_DEBUG(logger, "ChunkLimit UnitTest unexpected exception: " << e.getStringifiedLongErrorCode()); throw SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_UNITTEST_FAILED) << "UnitTestChunkLimitPhysical" << "setOptionValue"; } // try to create the chunk try { buildRandomArrayChunk(query, *array, type, count, mode); } catch (Exception const& x) { if (!expectFail) { LOG4CXX_DEBUG(logger, "ChunkLimit UnitTest unexpected exception: " << x.getStringifiedLongErrorCode()); failed = true; } else if (x.getLongErrorCode() != SCIDB_LE_CHUNK_TOO_LARGE) { LOG4CXX_DEBUG(logger, "ChunkLimit UnitTest incorrect exception: " << x.getStringifiedLongErrorCode()); failed = true; } } // set the chunk size limit back try { Config::getInstance()->setOptionValue("chunk-size-limit-mb", oldLimit); } catch (Exception const& e) { LOG4CXX_DEBUG(logger, "ChunkLimit UnitTest unexpected exception: " << e.getStringifiedLongErrorCode()); throw SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_UNITTEST_FAILED) << "UnitTestChunkLimitPhysical" << "setOptionValue2"; } if (failed) { LOG4CXX_DEBUG(logger, "ChunkLimit UnitTest Failed [type=" << type << "][count=" << count << "][mode=" << mode << "][expectFail=" << expectFail << "]"); throw SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_UNITTEST_FAILED) << "UnitTestChunkLimitPhysical" << "unexpected status"; } else { LOG4CXX_DEBUG(logger, "ChunkLimit UnitTest Success [type=" << type << "][count=" << count << "][mode=" << mode << "][expectFail=" << expectFail << "]"); } }
ArrayDesc inferSchema(std::vector< ArrayDesc> schemas, boost::shared_ptr< Query> query) { assert(schemas.size() == 2); if (!hasSingleAttribute(schemas[0]) || !hasSingleAttribute(schemas[1])) throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_MULTIPLY_ERROR2); if (schemas[0].getDimensions().size() != 2 || schemas[1].getDimensions().size() != 2) throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_MULTIPLY_ERROR3); if (schemas[0].getDimensions()[0].getLength() == INFINITE_LENGTH || schemas[0].getDimensions()[1].getLength() == INFINITE_LENGTH || schemas[1].getDimensions()[0].getLength() == INFINITE_LENGTH || schemas[1].getDimensions()[1].getLength() == INFINITE_LENGTH) throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_MULTIPLY_ERROR4); if (schemas[0].getDimensions()[1].getLength() != schemas[1].getDimensions()[1].getLength() || schemas[0].getDimensions()[1].getStart() != schemas[1].getDimensions()[1].getStart()) throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_MULTIPLY_ERROR5); // FIXME: This condition needs to go away later if (schemas[0].getDimensions()[1].getChunkInterval() != schemas[1].getDimensions()[1].getChunkInterval()) throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_MULTIPLY_ERROR6); if (schemas[0].getAttributes()[0].getType() != schemas[1].getAttributes()[0].getType()) throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_MULTIPLY_ERROR7); if (schemas[0].getAttributes()[0].isNullable() || schemas[1].getAttributes()[0].isNullable()) throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_MULTIPLY_ERROR8); Attributes atts(1); TypeId type = schemas[0].getAttributes()[0].getType(); AttributeDesc multAttr((AttributeID)0, "multiply", type, 0, 0); atts[0] = multAttr; Dimensions dims(2); DimensionDesc const& d1 = schemas[0].getDimensions()[0]; dims[0] = DimensionDesc(d1.getBaseName(), d1.getNamesAndAliases(), d1.getStartMin(), d1.getCurrStart(), d1.getCurrEnd(), d1.getEndMax(), d1.getChunkInterval(), 0, d1.getType(), d1.getFlags(), d1.getMappingArrayName(), d1.getComment(), d1.getFuncMapOffset(), d1.getFuncMapScale()); DimensionDesc const& d2 = schemas[1].getDimensions()[0]; dims[1] = DimensionDesc(d1.getBaseName() == d2.getBaseName() ? d1.getBaseName() + "2" : d2.getBaseName(), d2.getNamesAndAliases(), d2.getStartMin(), d2.getCurrStart(), d2.getCurrEnd(), d2.getEndMax(), d2.getChunkInterval(), 0, d2.getType(), d2.getFlags(), d2.getMappingArrayName(), d2.getComment(), d2.getFuncMapOffset(), d2.getFuncMapScale()); return ArrayDesc("MultiplyRow",atts,dims); }
ArrayDesc inferSchema(std::vector<ArrayDesc> schemas, std::shared_ptr<Query> query) { assert(schemas.size() == 1); ArrayDesc const& inputDesc = schemas[0]; size_t nDims = inputDesc.getDimensions().size(); Dimensions outDims(nDims); // How many parameters are of each type. size_t numAggregateCalls = 0; size_t numChunkSizes = 0; for (size_t i = nDims, n = _parameters.size(); i < n; ++i) { if (_parameters[i]->getParamType() == PARAM_AGGREGATE_CALL) { ++numAggregateCalls; } else // chunk size { ++numChunkSizes; } } if (numChunkSizes && numChunkSizes != nDims) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_NUM_CHUNKSIZES_NOT_MATCH_NUM_DIMS) << "regrid()"; } // Generate the output dims. for (size_t i = 0; i < nDims; i++) { int64_t blockSize = evaluate(((std::shared_ptr<OperatorParamLogicalExpression>&)_parameters[i])->getExpression(), query, TID_INT64).getInt64(); if (blockSize <= 0) { throw USER_QUERY_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_REGRID_ERROR1, _parameters[i]->getParsingContext()); } DimensionDesc const& srcDim = inputDesc.getDimensions()[i]; int64_t chunkSize = srcDim.getRawChunkInterval(); if (numChunkSizes) { size_t index = i + nDims + numAggregateCalls; chunkSize = evaluate(((std::shared_ptr<OperatorParamLogicalExpression>&)_parameters[index])->getExpression(), query, TID_INT64).getInt64(); if (chunkSize<=0) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_CHUNK_SIZE_MUST_BE_POSITIVE); } } outDims[i] = DimensionDesc( srcDim.getBaseName(), srcDim.getNamesAndAliases(), srcDim.getStartMin(), srcDim.getStartMin(), srcDim.getEndMax() == CoordinateBounds::getMax() ? CoordinateBounds::getMax() : srcDim.getStartMin() + (srcDim.getLength() + blockSize - 1)/blockSize - 1, srcDim.getEndMax() == CoordinateBounds::getMax() ? CoordinateBounds::getMax() : srcDim.getStartMin() + (srcDim.getLength() + blockSize - 1)/blockSize - 1, chunkSize, 0 ); } // Input and output dimensions are 1-to-1, so... _fixer.takeAllDimensions(inputDesc.getDimensions()); ArrayDesc outSchema(inputDesc.getName(), Attributes(), outDims, defaultPartitioning(), query->getDefaultArrayResidency() ); for (size_t i = nDims, j=nDims+numAggregateCalls; i<j; i++) { bool isInOrderAggregation = false; addAggregatedAttribute( (std::shared_ptr <OperatorParamAggregateCall> &) _parameters[i], inputDesc, outSchema, isInOrderAggregation); } AttributeDesc et ((AttributeID) outSchema.getAttributes().size(), DEFAULT_EMPTY_TAG_ATTRIBUTE_NAME, TID_INDICATOR, AttributeDesc::IS_EMPTY_INDICATOR, 0); outSchema.addAttribute(et); return outSchema; }