ArrayDesc inferSchema(std::vector< ArrayDesc> schemas, boost::shared_ptr< Query> query) { assert(schemas.size() == 1); ArrayDesc const& desc = schemas[0]; Dimensions const& dims = desc.getDimensions(); Attributes const& attrs = desc.getAttributes(); AttributeID aid = 0; if (_parameters.size() >= 1) { aid = ((boost::shared_ptr<OperatorParamReference>&)_parameters[0])->getObjectNo(); } AggregatePtr maxAggregate = AggregateLibrary::getInstance()->createAggregate("max", TypeLibrary::getType(attrs[aid].getType())); Attributes aggAttrs(1); aggAttrs[0] = AttributeDesc((AttributeID)0, attrs[aid].getName() + "_max", maxAggregate->getResultType().typeId(), AttributeDesc::IS_NULLABLE, 0); if (_parameters.size() <= 1) { Dimensions aggDims(1); aggDims[0] = DimensionDesc("i", 0, 0, 0, 0, 1, 0); return ArrayDesc(desc.getName(), aggAttrs, aggDims); } else { vector<int> groupBy(_parameters.size()-1); for (size_t i = 0; i < groupBy.size(); i++) { groupBy[i] = ((boost::shared_ptr<OperatorParamReference>&)_parameters[i + 1])->getObjectNo(); } Dimensions aggDims(groupBy.size()); for (size_t i = 0, n = aggDims.size(); i < n; i++) { DimensionDesc const& srcDim = dims[groupBy[i]]; aggDims[i] = DimensionDesc( srcDim.getBaseName(), srcDim.getStartMin(), srcDim.getCurrStart(), srcDim.getCurrEnd(), srcDim.getEndMax(), i == 0 && groupBy[i] == 0 ? srcDim.getChunkInterval() : srcDim.getCurrLength(), 0, srcDim.getType(), srcDim.getFlags(), srcDim.getMappingArrayName(), srcDim.getComment(), srcDim.getFuncMapOffset(), srcDim.getFuncMapScale()); } return ArrayDesc(desc.getName(), aggAttrs, aggDims); } }
ArrayDesc inferSchema(std::vector<ArrayDesc> schemas, boost::shared_ptr<Query> query) { assert(schemas.size() == 1); ArrayDesc const& schema = schemas[0]; Dimensions const& dims = schema.getDimensions(); size_t nDims = dims.size(); size_t nParams = _parameters.size(); assert((nParams & 1) == 0 || nParams >= nDims*2); Dimensions newDims(nDims - nParams/2); if (newDims.size() <= 0) throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_SLICE_ERROR1); std::vector<std::string> sliceDimName(nParams/2); for (size_t i = 0; i < nParams; i+=2) { sliceDimName[i >> 1] = ((boost::shared_ptr<OperatorParamReference>&)_parameters[i])->getObjectName(); } size_t j = 0; for (size_t i = 0; i < nDims; i++) { const std::string dimName = dims[i].getBaseName(); int k = sliceDimName.size(); while (--k >= 0 && sliceDimName[k] != dimName && !(sliceDimName[k][0] == '_' && (size_t)atoi(sliceDimName[k].c_str()+1) == i+1)); if (k < 0) { if (j >= newDims.size()) throw USER_QUERY_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_DUPLICATE_DIMENSION_NAME, _parameters[i]->getParsingContext()) << dimName; newDims[j++] = dims[i]; } } return ArrayDesc(schema.getName(), schema.getAttributes(), newDims); }
/** * Determine the schema of the output. inferSchema is called on the coordinator instance during query planning and * may be called several times as the planner gets its act together. It will always be called with the same inputs * for the same query. This function must behave deterministically, but the shape of the output may vary based on * inputs and parameters. * @param schemas all of the schemas of the input arrays (if the operator accepts any) * @param query the query context * @return the schema of the outpt, as described above. */ ArrayDesc inferSchema(vector< ArrayDesc> schemas, shared_ptr< Query> query) { /* * Make one string attribute: id=0, name="instance_status" of type string, no flags, no default compression. * The ID of the attribute is simply a number from 0 to num_attributes-1 and must equal to its position * in the attributes vector. */ AttributeDesc outputAttribute (0, "instance_status", TID_STRING, 0, 0); Attributes outputAttributes(1, outputAttribute); /* Add the empty tag attribute. Arrays with the empty tag are "emptyable" meaning that some cells may be empty. * It is a good practice to add this to every constructed array. In fact, in the future it may become the * default for all arrays. */ outputAttributes = addEmptyTagAttribute(outputAttributes); /* The output dimension: from 0 to "*" with a chunk size of 1. The amount of data returned is so small that the * chunk size is not relevant. */ DimensionDesc outputDimension("instance_no", 0, MAX_COORDINATE, 1, 0); Dimensions outputDimensions(1, outputDimension); /* The first argument is the name of the returned array. */ return ArrayDesc("hello_instances", outputAttributes, outputDimensions); }
ArrayDesc inferSchema(std::vector< ArrayDesc> schemas, std::shared_ptr< Query> query) { assert(schemas.size() == 1); for (Parameters::const_iterator it = _parameters.begin(); it != _parameters.end(); ++it) assert(((std::shared_ptr<OperatorParamReference>&)*it)->getParamType() == PARAM_ATTRIBUTE_REF); Attributes newAttributes; const Attributes &oldAttributes = schemas[0].getAttributes(); bool includesIndicator = false; size_t n = _parameters.size(); for (size_t i = 0; i < n; i++) { const AttributeDesc &attr = oldAttributes[((std::shared_ptr<OperatorParamReference>&)_parameters[i])->getObjectNo()]; newAttributes.push_back(AttributeDesc(i, attr.getName(), attr.getType(), attr.getFlags(), attr.getDefaultCompressionMethod(), attr.getAliases(), &attr.getDefaultValue(), attr.getDefaultValueExpr())); includesIndicator |= attr.isEmptyIndicator(); } if (!includesIndicator) { AttributeDesc const* indicator = schemas[0].getEmptyBitmapAttribute(); if (indicator != NULL) { newAttributes.push_back(AttributeDesc(n, indicator->getName(), indicator->getType(), indicator->getFlags(), indicator->getDefaultCompressionMethod(), indicator->getAliases())); } } return ArrayDesc(schemas[0].getName(), newAttributes, schemas[0].getDimensions(), defaultPartitioning()); }
ArrayDesc inferSchema(std::vector< ArrayDesc> schemas, boost::shared_ptr< Query> query) { Attributes outputAttrs; outputAttrs.push_back(AttributeDesc(0, "dummy", TID_DOUBLE, AttributeDesc::IS_NULLABLE, 0)); Dimensions outputDims; outputDims.push_back(DimensionDesc("i",0,0,1,0)); return ArrayDesc("test_cache", outputAttrs, outputDims); }
ArrayDesc inferSchema(std::vector< ArrayDesc> schemas, boost::shared_ptr< Query> query) { assert(schemas.size() == 2); ArrayDesc const& patternDesc = schemas[0]; ArrayDesc const& catalogDesc = schemas[1]; Attributes const& catalogAttributes = catalogDesc.getAttributes(true); Dimensions const& catalogDimensions = catalogDesc.getDimensions(); Attributes const& patternAttributes = patternDesc.getAttributes(true); Dimensions resultDimensions = patternDesc.getDimensions(); size_t totalAttributes = catalogAttributes.size() + patternAttributes.size() + 1 + catalogDimensions.size(); Attributes matchAttributes(totalAttributes); if (catalogDimensions.size() != resultDimensions.size()) { stringstream left, right; printDimNames(left, resultDimensions); printDimNames(right, catalogDimensions); throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_DIMENSION_COUNT_MISMATCH) << "match" << left.str() << right.str(); } for (size_t i = 0, n = catalogDimensions.size(); i < n; i++) { if (!(catalogDimensions[i].getStartMin() == resultDimensions[i].getStartMin() && catalogDimensions[i].getChunkInterval() == resultDimensions[i].getChunkInterval() && catalogDimensions[i].getChunkOverlap() == resultDimensions[i].getChunkOverlap())) { // XXX To do: implement requiresRepart() method, remove interval/overlap checks // above, use SCIDB_LE_START_INDEX_MISMATCH here. throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_ARRAYS_NOT_CONFORMANT); } } size_t j = 0; for (size_t i = 0, n = patternAttributes.size(); i < n; i++, j++) { AttributeDesc const& attr = patternAttributes[i]; matchAttributes[j] = AttributeDesc(j, attr.getName(), attr.getType(), attr.getFlags(), attr.getDefaultCompressionMethod(), attr.getAliases(), &attr.getDefaultValue(), attr.getDefaultValueExpr()); } for (size_t i = 0, n = catalogAttributes.size(); i < n; i++, j++) { AttributeDesc const& attr = catalogAttributes[i]; matchAttributes[j] = AttributeDesc(j, "match_" + attr.getName(), attr.getType(), attr.getFlags(), attr.getDefaultCompressionMethod(), attr.getAliases(), &attr.getDefaultValue(), attr.getDefaultValueExpr()); } for (size_t i = 0, n = catalogDimensions.size(); i < n; i++, j++) { matchAttributes[j] = AttributeDesc(j, "match_" + catalogDimensions[i].getBaseName(), TID_INT64, 0, 0); } matchAttributes[j] = AttributeDesc(j, DEFAULT_EMPTY_TAG_ATTRIBUTE_NAME, TID_INDICATOR, AttributeDesc::IS_EMPTY_INDICATOR, 0); int64_t maxCollisions = evaluate(((boost::shared_ptr<OperatorParamLogicalExpression>&)_parameters[1])->getExpression(), query, TID_INT64).getInt64(); if (maxCollisions <= 0 || (int32_t)maxCollisions != maxCollisions) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_OPERATOR_ARGUMENT2) << "positive"; } resultDimensions.push_back(DimensionDesc("collision", 0, 0, maxCollisions-1, maxCollisions-1, (uint32_t)maxCollisions, 0)); return ArrayDesc("match", matchAttributes, resultDimensions); }
/* inferSchema helps the query planner decide on the shape of * the output array. All operators must define this function. */ ArrayDesc inferSchema(vector< ArrayDesc> schemas, shared_ptr< Query> query) { ArrayDesc const& matrix = schemas[0]; if(matrix.getAttributes(true)[0].getType() != TID_STRING) throw SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_ILLEGAL_OPERATION) << "cu requires a single string-valued attribute"; Attributes outputAttributes(matrix.getAttributes()); Dimensions outputDimensions(matrix.getDimensions()); return ArrayDesc(matrix.getName(), outputAttributes, outputDimensions); }
ArrayDesc inferSchema(std::vector< ArrayDesc> schemas, boost::shared_ptr< Query> query) { SplitSettings settings (_parameters, true, query); //construct and check to ensure settings are legit vector<AttributeDesc> attributes(1); attributes[0] = AttributeDesc((AttributeID)0, "value", TID_STRING, 0, 0); vector<DimensionDesc> dimensions(2); dimensions[0] = DimensionDesc("source_instance_id", 0, 0, MAX_COORDINATE, MAX_COORDINATE, 1, 0); dimensions[1] = DimensionDesc("chunk_no", 0, 0, MAX_COORDINATE, MAX_COORDINATE, 1, 0); return ArrayDesc("split", attributes, dimensions); }
ArrayDesc inferSchema(std::vector< ArrayDesc> schemas, shared_ptr< Query> query) { ArrayDesc const& inputSchema = schemas[0]; //FastCountSettings settings (_parameters, true, query); vector<DimensionDesc> dimensions(1); size_t const nInstances = query->getInstancesCount(); dimensions[0] = DimensionDesc("i", 0, 0, CoordinateBounds::getMax(), CoordinateBounds::getMax(), 1, 0); // dimensions[0] = DimensionDesc("i", 0, 0, nInstances-1, nInstances-1, 1, 0); vector<AttributeDesc> attributes; attributes.push_back(AttributeDesc((AttributeID)0, "count", TID_UINT64, AttributeDesc::IS_NULLABLE, 0)); return ArrayDesc("fast_count", attributes, dimensions, defaultPartitioning(), inputSchema.getResidency(),false); }
ArrayDesc inferSchema(vector<ArrayDesc> inputSchemas, shared_ptr<Query> query) { Attributes atts(1); atts[0] = AttributeDesc((AttributeID)0, "success", TID_BOOL, 0, CompressorType::NONE ); Dimensions dims(1); dims[0] = DimensionDesc("i", 0, 0, 0, 0, 1, 0); //#ifdef CPP11 return ArrayDesc("", atts, dims, defaultPartitioning(), query->getDefaultArrayResidency()); //#else //return ArrayDesc("", atts, dims); //#endif }
ArrayDesc inferSchema(std::vector< ArrayDesc> schemas, boost::shared_ptr< Query> query) { Attributes atts(1); TypeId type = schemas[0].getAttributes()[0].getType(); AttributeDesc multAttr((AttributeID)0, "matricize", type, 0, 0); atts[0] = multAttr; Dimensions const& inputDims = schemas[0].getDimensions(); Coordinate ndims = inputDims.size(); Coordinate rowmode = 0; Coordinate colmode = 1; if (_parameters.size() == 1) { rowmode = evaluate(((boost::shared_ptr<OperatorParamLogicalExpression>&)_parameters[0])->getExpression(), query, TID_INT64).getInt64(); if(rowmode < 1 || rowmode > ndims) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_MATRICIZE_ERROR1); } if(rowmode < ndims) colmode = ndims; else colmode = ndims-1; rowmode -= 1; colmode -= 1; } else { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_MATRICIZE_ERROR2); } Coordinate len = 1; Coordinate chunklen = 1; for(Coordinate i=ndims-1;i>=0;i--) { if(i == rowmode) continue; len *= inputDims[i].getLength(); chunklen *= inputDims[i].getChunkInterval(); } Dimensions dims(2); dims[0] = inputDims[rowmode]; dims[1] = DimensionDesc(inputDims[rowmode].getBaseName(), 1, len, chunklen, 0, inputDims[rowmode].getType(), inputDims[rowmode].getFlags(), inputDims[rowmode].getMappingArrayName(), inputDims[rowmode].getComment()); return ArrayDesc("Matricize",atts,dims); }
ArrayDesc inferSchema(std::vector<ArrayDesc> schemas, boost::shared_ptr<Query> query) { assert(schemas.size() == 0); assert(_parameters.size() == 2); assert(((boost::shared_ptr<OperatorParam>&)_parameters[1])->getParamType() == PARAM_ARRAY_REF); const string &newArrayName = ((boost::shared_ptr<OperatorParamReference>&)_parameters[1])->getObjectName(); if (SystemCatalog::getInstance()->containsArray(newArrayName)) { throw USER_QUERY_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_ARRAY_ALREADY_EXIST, _parameters[1]->getParsingContext()) << newArrayName; } return ArrayDesc(); }
ArrayDesc inferSchema(std::vector< ArrayDesc> schemas, boost::shared_ptr< Query> query) { assert(schemas.size() == 1); const ArrayDesc& desc = schemas[0]; const Attributes &oldAttributes = desc.getAttributes(); Attributes newAttributes = desc.getAttributes(); for (size_t paramNo = 0, paramCount = _parameters.size(); paramNo < paramCount; paramNo+=2) { int32_t attNo = ((boost::shared_ptr<OperatorParamReference>&)_parameters[paramNo])->getObjectNo(); AttributeDesc attr = oldAttributes[attNo]; newAttributes[attNo] = AttributeDesc(attNo, ((boost::shared_ptr<OperatorParamReference>&)_parameters[paramNo + 1])->getObjectName(), attr.getType(), attr.getFlags(), attr.getDefaultCompressionMethod(), attr.getAliases(), &attr.getDefaultValue(), attr.getDefaultValueExpr()); } return ArrayDesc(desc.getId(), desc.getUAId(), desc.getVersionId(), desc.getName(), newAttributes, desc.getDimensions()); }
ArrayDesc inferSchema(std::vector< ArrayDesc> schemas, boost::shared_ptr< Query> query) { assert(schemas.size() == 1); assert(_parameters[0]->getParamType() == PARAM_ATTRIBUTE_REF); assert(_parameters[1]->getParamType() == PARAM_LOGICAL_EXPRESSION); if ( _parameters.size() % 2 != 0 ) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_OPERATOR_ARGUMENTS_COUNT2) << "tile_apply"; } Attributes outAttrs; AttributeID nextAttrId =0; for (size_t i=0; i<schemas[0].getAttributes().size(); i++) { AttributeDesc const& attr = schemas[0].getAttributes()[i]; if(attr.getType()!=TID_INDICATOR) { outAttrs.push_back( AttributeDesc(nextAttrId++, attr.getName(), attr.getType(), attr.getFlags(), attr.getDefaultCompressionMethod(), attr.getAliases(), attr.getReserve(), &attr.getDefaultValue(), attr.getDefaultValueExpr(), attr.getVarSize())); } } size_t k; for (k=0; k<_parameters.size(); k+=2) { const string &attributeName = ((boost::shared_ptr<OperatorParamReference>&)_parameters[k])->getObjectName(); Expression expr; expr.compile(((boost::shared_ptr<OperatorParamLogicalExpression>&)_parameters[k+1])->getExpression(), query, _properties.tile, TID_VOID, schemas); assert(!_properties.tile); int flags = 0; if (expr.isNullable()) { flags = (int)AttributeDesc::IS_NULLABLE; } for (size_t j = 0; j < nextAttrId; j++) { AttributeDesc const& attr = outAttrs[j]; if (attr.getName() == attributeName) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_DUPLICATE_ATTRIBUTE_NAME) << attributeName; } } outAttrs.push_back(AttributeDesc(nextAttrId++, attributeName, expr.getType(), flags, 0)); } if(schemas[0].getEmptyBitmapAttribute()) { AttributeDesc const* emptyTag = schemas[0].getEmptyBitmapAttribute(); for (size_t j = 0; j < nextAttrId; j++) { AttributeDesc const& attr = outAttrs[j]; if (attr.getName() == emptyTag->getName()) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_DUPLICATE_ATTRIBUTE_NAME) << attr.getName(); } } outAttrs.push_back( AttributeDesc(nextAttrId, emptyTag->getName(), emptyTag->getType(), emptyTag->getFlags(), emptyTag->getDefaultCompressionMethod(), emptyTag->getAliases(), emptyTag->getReserve(), &emptyTag->getDefaultValue(), emptyTag->getDefaultValueExpr(), emptyTag->getVarSize())); } return ArrayDesc(schemas[0].getName(), outAttrs, schemas[0].getDimensions()); }
ArrayDesc inferSchema(std::vector< ArrayDesc> schemas, boost::shared_ptr< Query> query) { assert(schemas.size() == 2); if (!hasSingleAttribute(schemas[0]) || !hasSingleAttribute(schemas[1])) throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_MULTIPLY_ERROR2); if (schemas[0].getDimensions().size() != 2 || schemas[1].getDimensions().size() != 2) throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_MULTIPLY_ERROR3); if (schemas[0].getDimensions()[0].getLength() == INFINITE_LENGTH || schemas[0].getDimensions()[1].getLength() == INFINITE_LENGTH || schemas[1].getDimensions()[0].getLength() == INFINITE_LENGTH || schemas[1].getDimensions()[1].getLength() == INFINITE_LENGTH) throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_MULTIPLY_ERROR4); if (schemas[0].getDimensions()[1].getLength() != schemas[1].getDimensions()[1].getLength() || schemas[0].getDimensions()[1].getStart() != schemas[1].getDimensions()[1].getStart()) throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_MULTIPLY_ERROR5); // FIXME: This condition needs to go away later if (schemas[0].getDimensions()[1].getChunkInterval() != schemas[1].getDimensions()[1].getChunkInterval()) throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_MULTIPLY_ERROR6); if (schemas[0].getAttributes()[0].getType() != schemas[1].getAttributes()[0].getType()) throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_MULTIPLY_ERROR7); if (schemas[0].getAttributes()[0].isNullable() || schemas[1].getAttributes()[0].isNullable()) throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_MULTIPLY_ERROR8); Attributes atts(1); TypeId type = schemas[0].getAttributes()[0].getType(); AttributeDesc multAttr((AttributeID)0, "multiply", type, 0, 0); atts[0] = multAttr; Dimensions dims(2); DimensionDesc const& d1 = schemas[0].getDimensions()[0]; dims[0] = DimensionDesc(d1.getBaseName(), d1.getNamesAndAliases(), d1.getStartMin(), d1.getCurrStart(), d1.getCurrEnd(), d1.getEndMax(), d1.getChunkInterval(), 0, d1.getType(), d1.getFlags(), d1.getMappingArrayName(), d1.getComment(), d1.getFuncMapOffset(), d1.getFuncMapScale()); DimensionDesc const& d2 = schemas[1].getDimensions()[0]; dims[1] = DimensionDesc(d1.getBaseName() == d2.getBaseName() ? d1.getBaseName() + "2" : d2.getBaseName(), d2.getNamesAndAliases(), d2.getStartMin(), d2.getCurrStart(), d2.getCurrEnd(), d2.getEndMax(), d2.getChunkInterval(), 0, d2.getType(), d2.getFlags(), d2.getMappingArrayName(), d2.getComment(), d2.getFuncMapOffset(), d2.getFuncMapScale()); return ArrayDesc("MultiplyRow",atts,dims); }