TupleArray::TupleArray(ArrayDesc const& schema, vector< boost::shared_ptr<Tuple> > const& data, Coordinate offset) : desc(schema), start(schema.getDimensions()[0].getStart() + offset), end(start + offset + schema.getDimensions()[0].getLength() - 1), tuples(data), chunkSize(schema.getDimensions()[0].getChunkInterval()) { desc.cutOverlap(); if (Coordinate(start + tuples.size()) <= end) { end = start + tuples.size() - 1; } }
void fillUsedPlugins(const ArrayDesc& desc, vector<string>& plugins) const { for (size_t i = 0; i < desc.getAttributes().size(); i++) { const string& libName = TypeLibrary::getTypeLibraries().getObjectLibrary(desc.getAttributes()[i].getType()); if (libName != "scidb") plugins.push_back(libName); } for (size_t i = 0; i < desc.getDimensions().size(); i++) { const string& libName = TypeLibrary::getTypeLibraries().getObjectLibrary(desc.getDimensions()[i].getType()); if (libName != "scidb") plugins.push_back(libName); } }
SplitArray::SplitArray(ArrayDesc const& desc, const boost::shared_array<char>& src, Coordinates const& from, Coordinates const& till, shared_ptr<Query>const& query) : DelegateArray(desc, shared_ptr<Array>(), true), _startingChunk(from), _from(from), _till(till), _size(from.size()), _src(src), _empty(false) { assert(query); _query = query; desc.getChunkPositionFor(_startingChunk); Dimensions const& dims = desc.getDimensions(); for (size_t i = 0, n = dims.size(); i < n; i++) { _size[i] = _till[i] - _from[i] + 1; if (_size[i] == 0) { _empty = true; } if (_till[i] > dims[i].getEndMax()) { _till[i] = dims[i].getEndMax(); } } }
TupleArray::TupleArray(ArrayDesc const& schema, vector< boost::shared_ptr<ConstArrayIterator> > const& arrayIterators, size_t shift, size_t step) : desc(schema), start(schema.getDimensions()[0].getStart()), end(schema.getDimensions()[0].getEndMax()), chunkSize(schema.getDimensions()[0].getChunkInterval()) { if (schema.getDimensions().size() != 1) throw USER_EXCEPTION(SCIDB_SE_EXECUTION, SCIDB_LE_MULTIDIMENSIONAL_ARRAY_NOT_ALLOWED); append(arrayIterators, shift, step); if (start == MIN_COORDINATE || end == MAX_COORDINATE) { start = 0; end = tuples.size()-1; } else if (Coordinate(start + tuples.size()) <= end) { end = start + tuples.size() - 1; } }
ArrayDesc InputArray::generateShadowArraySchema(ArrayDesc const& targetArray, string const& shadowArrayName) { Attributes const& srcAttrs = targetArray.getAttributes(true); size_t nAttrs = srcAttrs.size(); Attributes dstAttrs(nAttrs+2); for (size_t i = 0; i < nAttrs; i++) { dstAttrs[i] = AttributeDesc(i, srcAttrs[i].getName(), TID_STRING, AttributeDesc::IS_NULLABLE, 0); } dstAttrs[nAttrs] = AttributeDesc(nAttrs, "row_offset", TID_INT64, 0, 0); dstAttrs[nAttrs+1] = AttributeDesc(nAttrs+1, DEFAULT_EMPTY_TAG_ATTRIBUTE_NAME, TID_INDICATOR, AttributeDesc::IS_EMPTY_INDICATOR, 0); return ArrayDesc(shadowArrayName, dstAttrs, targetArray.getDimensions()); }
//param desc --> the input array schema ArrayDesc createWindowDesc(ArrayDesc const& desc) { //get dimensions for output array Attributes const &attrs = desc.getAttributes(); /* Dimensions aggrDims(dims.size()); for (size_t i = 0; i < dims.size(); i++) { DimensionDesc const& srcDim = dims[i]; aggrDims[i] = DimensionDesc(srcDim.getBaseName(), srcDim.getNamesAndAliases(), srcDim.getStartMin(), srcDim.getCurrStart(), srcDim.getCurrEnd(), srcDim.getEndMax(), srcDim.getChunkInterval(), 0); } */ Attributes newAttributes; size_t n = 0; for (size_t i=desc.getDimensions().size()*2; i < _parameters.size()-1; i=i+2) { const AttributeDesc &attr = attrs[((boost::shared_ptr<OperatorParamReference>&)_parameters[i])->getObjectNo()]; newAttributes.push_back(AttributeDesc(n, attr.getName(), attr.getType(), attr.getFlags(), attr.getDefaultCompressionMethod(), attr.getAliases())); } return ArrayDesc(desc.getName(), newAttributes, desc.getDimensions()); }
//param desc --> the input array schema inline ArrayDesc createWindowDesc(ArrayDesc const& desc) { //get dimensions for output array Dimensions const& dims = desc.getDimensions(); Dimensions aggrDims(dims.size()); for (size_t i = 0; i < dims.size(); i++) { DimensionDesc const& srcDim = dims[i]; aggrDims[i] = DimensionDesc(srcDim.getBaseName(), srcDim.getNamesAndAliases(), srcDim.getStartMin(), srcDim.getCurrStart(), srcDim.getCurrEnd(), srcDim.getEndMax(), srcDim.getChunkInterval(), 0); } ArrayDesc output(desc.getName(), Attributes(), aggrDims); //get the aggregates, check if they make sense, make attributes for output array //_parameters[0~dims.size()*2-1] --> window boundaries, already get in inferSchema for (size_t i = dims.size()*2; i < _parameters.size(); i++) { boost::shared_ptr<scidb::OperatorParam> param = _parameters[i]; if ( param->getParamType() != PARAM_AGGREGATE_CALL) { throw USER_QUERY_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_WINDOW_ERROR5, _parameters[i]->getParsingContext()); } addAggregatedAttribute( (shared_ptr<OperatorParamAggregateCall> &) param, desc, output, true); } if ( desc.getEmptyBitmapAttribute()) //? { AttributeDesc const* eAttr = desc.getEmptyBitmapAttribute(); output.addAttribute(AttributeDesc(output.getAttributes().size(), eAttr->getName(), eAttr->getType(), eAttr->getFlags(), eAttr->getDefaultCompressionMethod())); } return output; }
FITSInputArray::FITSInputArray(ArrayDesc const& array, string const& filePath, uint32_t hdu, std::shared_ptr<Query>& query) : parser(filePath), hdu(hdu), desc(array), dims(array.getDimensions()), nDims(dims.size()), nAttrs(array.getAttributes(true).size()), values(nAttrs), chunks(nAttrs), chunkIterators(nAttrs), chunkIndex(0), chunkPos(nDims), query(query) { initValueHolders(); // Most initialization steps are only done later, when the first // chunk is requested by an iterator. See getChunkByIndex() }
inline ArrayDesc createWindowDesc(ArrayDesc const& desc) { Dimensions const& dims = desc.getDimensions(); Dimensions aggDims(dims.size()); for (size_t i = 0, n = dims.size(); i < n; i++) { DimensionDesc const& srcDim = dims[i]; aggDims[i] = DimensionDesc(srcDim.getBaseName(), srcDim.getNamesAndAliases(), srcDim.getStartMin(), srcDim.getCurrStart(), srcDim.getCurrEnd(), srcDim.getEndMax(), srcDim.getChunkInterval(), 0, srcDim.getType(), srcDim.getFlags(), srcDim.getMappingArrayName(), srcDim.getComment(), srcDim.getFuncMapOffset(), srcDim.getFuncMapScale()); } ArrayDesc output (desc.getName(), Attributes(), aggDims); for (size_t i = dims.size() * 2, size = _parameters.size(); i < size; i++) { addAggregatedAttribute( (shared_ptr <OperatorParamAggregateCall> &) _parameters[i], desc, output); } if ( desc.getEmptyBitmapAttribute()) { AttributeDesc const* eAtt = desc.getEmptyBitmapAttribute(); output.addAttribute(AttributeDesc(output.getAttributes().size(), eAtt->getName(), eAtt->getType(), eAtt->getFlags(), eAtt->getDefaultCompressionMethod())); } return output; }
ArrayDesc inferSchema(std::vector< ArrayDesc> schemas, boost::shared_ptr< Query> query) { assert(schemas.size() == 1); ArrayDesc const& srcDesc = schemas[0]; ArrayDesc dstDesc = ((boost::shared_ptr<OperatorParamSchema>&)_parameters[0])->getSchema(); //Compile a desc of all possible attributes (aggregate calls first) and source dimensions ArrayDesc aggregationDesc (srcDesc.getName(), Attributes(), srcDesc.getDimensions()); vector<string> aggregatedNames; //add aggregate calls first for (size_t i = 1; i < _parameters.size(); i++) { addAggregatedAttribute( (shared_ptr <OperatorParamAggregateCall>&) _parameters[i], srcDesc, aggregationDesc); aggregatedNames.push_back(aggregationDesc.getAttributes()[aggregationDesc.getAttributes().size()-1].getName()); } //add other attributes BOOST_FOREACH(const AttributeDesc &srcAttr, srcDesc.getAttributes()) { //if there's an attribute with same name as an aggregate call - skip the attribute bool found = false; BOOST_FOREACH(const AttributeDesc &aggAttr, aggregationDesc.getAttributes()) { if( aggAttr.getName() == srcAttr.getName()) { found = true; } } if (!found) { aggregationDesc.addAttribute(AttributeDesc( aggregationDesc.getAttributes().size(), srcAttr.getName(), srcAttr.getType(), srcAttr.getFlags(), srcAttr.getDefaultCompressionMethod(), srcAttr.getAliases(), &srcAttr.getDefaultValue(), srcAttr.getDefaultValueExpr(), srcAttr.getComment(), srcAttr.getVarSize())); } } //Ensure attributes names uniqueness. if (!dstDesc.getEmptyBitmapAttribute()) throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_REDIMENSION_ERROR1); BOOST_FOREACH(const AttributeDesc &dstAttr, dstDesc.getAttributes()) { BOOST_FOREACH(const AttributeDesc &srcAttr, aggregationDesc.getAttributes()) { if (srcAttr.getName() == dstAttr.getName()) { if (srcAttr.getType() != dstAttr.getType()) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_ATTRIBUTE_TYPE) << srcAttr.getName() << srcAttr.getType() << dstAttr.getType(); } if (!dstAttr.isNullable() && srcAttr.isNullable()) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_ATTRIBUTE_FLAGS) << srcAttr.getName(); } goto NextAttr; } } BOOST_FOREACH(const DimensionDesc &srcDim, aggregationDesc.getDimensions()) { if (srcDim.hasNameOrAlias(dstAttr.getName())) { if (dstAttr.getType() != TID_INT64) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_DESTINATION_ATTRIBUTE_TYPE) << dstAttr.getName() << TID_INT64; } if (srcDim.getType() != TID_INT64) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_SOURCE_DIMENSION_TYPE) << dstAttr.getName() << TID_INT64; } if (dstAttr.getFlags() != 0) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_DESTINATION_ATTRIBUTE_FLAGS) << dstAttr.getName(); } goto NextAttr; } } if (dstAttr.isEmptyIndicator() == false) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_UNEXPECTED_DESTINATION_ATTRIBUTE) << dstAttr.getName(); } NextAttr:; } BOOST_FOREACH(const DimensionDesc &dstDim, dstDesc.getDimensions()) { if (dstDim.getChunkOverlap() != 0) throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_REDIMENSION_STORE_ERROR3); BOOST_FOREACH(const AttributeDesc &srcAttr, aggregationDesc.getAttributes()) { if (dstDim.hasNameOrAlias(srcAttr.getName())) { for (size_t i = 0; i< aggregatedNames.size(); i++) { if (srcAttr.getName() == aggregatedNames[i]) throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_REDIMENSION_ERROR2); } if (srcAttr.getType() != TID_INT64) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_SOURCE_ATTRIBUTE_TYPE) << srcAttr.getName() << TID_INT64; } if (srcAttr.getFlags() != 0) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_SOURCE_ATTRIBUTE_FLAGS) << srcAttr.getName() << TID_INT64; } if (dstDim.getType() != TID_INT64) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_DESTINATION_DIMENSION_TYPE) << srcAttr.getName() << TID_INT64; } goto NextDim; } } BOOST_FOREACH(const DimensionDesc &srcDim, aggregationDesc.getDimensions()) { if (srcDim.hasNameOrAlias(dstDim.getBaseName())) { if (dstDim.getType() != srcDim.getType() || dstDim.getStart() != srcDim.getStart() || dstDim.getLength() != srcDim.getLength() || dstDim.getChunkInterval() != srcDim.getChunkInterval()) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_DIMENSIONS_DONT_MATCH) << srcDim.getBaseName() << dstDim.getBaseName(); } goto NextDim; } } throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_UNEXPECTED_DESTINATION_DIMENSION) << dstDim.getBaseName(); NextDim:; } return ArrayDesc(srcDesc.getName()+"_redimension", dstDesc.getAttributes(), dstDesc.getDimensions(), dstDesc.getFlags()); }
inline size_t nCol(const ArrayDesc& desc, bool transpose=false) { return nCol(desc.getDimensions(), transpose); }
ArrayDesc inferSchema(std::vector< ArrayDesc> schemas, boost::shared_ptr< Query> query) { assert(schemas.size() == 1); ArrayDesc const& srcDesc = schemas[0]; ArrayDesc dstDesc = ((boost::shared_ptr<OperatorParamSchema>&)_parameters[0])->getSchema(); //Compile a desc of all possible attributes (aggregate calls first) and source dimensions ArrayDesc aggregationDesc (srcDesc.getName(), Attributes(), srcDesc.getDimensions()); vector<string> aggregatedNames; //add aggregate calls first for (size_t i = 1; i < _parameters.size(); i++) { addAggregatedAttribute( (shared_ptr <OperatorParamAggregateCall>&) _parameters[i], srcDesc, aggregationDesc); string aggName = aggregationDesc.getAttributes()[aggregationDesc.getAttributes().size()-1].getName(); bool aggFound = false; BOOST_FOREACH(const AttributeDesc &dstAttr, dstDesc.getAttributes()) { if (dstAttr.getName() == aggName) { aggFound = true; break; } } if (!aggFound) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_ATTRIBUTE_DOESNT_EXIST) << aggName << dstDesc.getName(); } aggregatedNames.push_back(aggName); } //add other attributes BOOST_FOREACH(const AttributeDesc &srcAttr, srcDesc.getAttributes()) { //if there's an attribute with same name as an aggregate call - skip the attribute bool found = false; BOOST_FOREACH(const AttributeDesc &aggAttr, aggregationDesc.getAttributes()) { if( aggAttr.getName() == srcAttr.getName()) { found = true; } } if (!found) { aggregationDesc.addAttribute(AttributeDesc( aggregationDesc.getAttributes().size(), srcAttr.getName(), srcAttr.getType(), srcAttr.getFlags(), srcAttr.getDefaultCompressionMethod(), srcAttr.getAliases(), &srcAttr.getDefaultValue(), srcAttr.getDefaultValueExpr(), srcAttr.getVarSize())); } } //Ensure attributes names uniqueness. if (!dstDesc.getEmptyBitmapAttribute()) throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_REDIMENSION_ERROR1); BOOST_FOREACH(const AttributeDesc &dstAttr, dstDesc.getAttributes()) { BOOST_FOREACH(const AttributeDesc &srcAttr, aggregationDesc.getAttributes()) { if (srcAttr.getName() == dstAttr.getName()) { if (srcAttr.getType() != dstAttr.getType()) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_ATTRIBUTE_TYPE) << srcAttr.getName() << srcAttr.getType() << dstAttr.getType(); } if (!dstAttr.isNullable() && srcAttr.isNullable()) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_ATTRIBUTE_FLAGS) << srcAttr.getName(); } goto NextAttr; } } BOOST_FOREACH(const DimensionDesc &srcDim, aggregationDesc.getDimensions()) { if (srcDim.hasNameAndAlias(dstAttr.getName())) { if (dstAttr.getType() != TID_INT64) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_DESTINATION_ATTRIBUTE_TYPE) << dstAttr.getName() << TID_INT64; } if (dstAttr.getFlags() != 0) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_DESTINATION_ATTRIBUTE_FLAGS) << dstAttr.getName(); } goto NextAttr; } } if (dstAttr.isEmptyIndicator() == false) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_UNEXPECTED_DESTINATION_ATTRIBUTE) << dstAttr.getName(); } NextAttr:; } Dimensions outputDims; size_t nNewDims = 0; BOOST_FOREACH(const DimensionDesc &dstDim, dstDesc.getDimensions()) { if (dstDim.getChunkOverlap() > dstDim.getChunkInterval()) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OVERLAP_CANT_BE_LARGER_CHUNK); } BOOST_FOREACH(const AttributeDesc &srcAttr, aggregationDesc.getAttributes()) { if (dstDim.hasNameAndAlias(srcAttr.getName())) { for (size_t i = 0; i< aggregatedNames.size(); i++) { if (srcAttr.getName() == aggregatedNames[i]) throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_OP_REDIMENSION_ERROR2); } if ( !IS_INTEGRAL(srcAttr.getType()) || srcAttr.getType() == TID_UINT64 ) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_SOURCE_ATTRIBUTE_TYPE) << srcAttr.getName() << TID_INT64; } outputDims.push_back(dstDim); goto NextDim; } } BOOST_FOREACH(const DimensionDesc &srcDim, aggregationDesc.getDimensions()) { if (srcDim.hasNameAndAlias(dstDim.getBaseName())) { DimensionDesc outputDim = dstDim; outputDims.push_back(outputDim); goto NextDim; } } //one synthetic dimension allowed if (nNewDims++ != 0 || !aggregatedNames.empty() ) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_UNEXPECTED_DESTINATION_DIMENSION) << dstDim.getBaseName(); } outputDims.push_back(dstDim); NextDim:; } return ArrayDesc(srcDesc.getName(), dstDesc.getAttributes(), outputDims, dstDesc.getFlags()); }
shared_ptr< Array > execute(vector< shared_ptr< Array> >& inputArrays, shared_ptr<Query> query) { // I maintain the log of the operator in a local file named after Correlation_N.log, N is the instance ID. stringstream logFileName; logFileName << "/home/scidb/preselect_" << query->getInstanceID() << ".log"; FILE *logFile; logFile = fopen(logFileName.str().c_str(), "w"); shared_ptr<Array> originalArray = inputArrays[0]; shared_ptr<Array> correlationArray = inputArrays[1]; ArrayDesc originalSchema = originalArray->getArrayDesc(); ArrayDesc corrSchema = correlationArray->getArrayDesc(); Dimensions originalDims = originalSchema.getDimensions(); Dimensions corrDims = corrSchema.getDimensions(); DimensionDesc originalDimsP = originalDims[1]; DimensionDesc corrDimsP = corrDims[0]; // Note the correlation array doesn't have Y column. Coordinate p = corrDimsP.getCurrLength(); fprintf(logFile, "p = %ld\n # of chunk = %ld\n", p, corrSchema.getNumberOfChunks()); fflush(logFile); shared_ptr<ConstArrayIterator> corrArrayIter = correlationArray->getIterator(0); if(! corrArrayIter->end() ) { correlation *corr = new correlation[p]; // The correlation array will always have only 1 chunk (we designed correlation array like this), so no loops here. shared_ptr<ConstChunkIterator> corrChunkIter = corrArrayIter->getChunk().getConstIterator(); for(Coordinate i=0; i<p; ++i) { corr[i].id = i+1; corr[i].corr = corrChunkIter->getItem().getDouble(); //fprintf(logFile, "%d, %f\n", corr[i].id, corr[i].corr); ++(*corrChunkIter); } //fflush(logFile); qsort(corr, p, sizeof(correlation), &comp); for(Coordinate i=0; i<p; ++i) { fprintf(logFile, "%d, %f\n", corr[i].id, corr[i].corr); } fflush(logFile); Coordinate d = ((boost::shared_ptr<OperatorParamPhysicalExpression>&)_parameters[0])->getExpression()->evaluate().getInt64(); fprintf(logFile, "d=%ld\n", d); stringstream ss; vector<string> names; names.push_back("j"); vector<TypeId> types; types.push_back(TID_INT64); for(Coordinate i=0; i<d; ++i) { ss << "j=" << corr[i].id << " or "; } ss << "j=" << p+1; fprintf(logFile, "%s\n", ss.str().c_str()); fflush(logFile); Expression e; e.compile(ss.str(), names, types); fclose(logFile); boost::shared_ptr<scidb::Query> emptyQuery; return boost::shared_ptr<Array>(new FilterArray(_schema, inputArrays[0], boost::make_shared<Expression>(e), emptyQuery, _tileMode)); } else { shared_ptr<Array> outputArray(new MemArray(_schema, query)); fclose(logFile); return outputArray; } }
inline unsigned int chunkCol(const ArrayDesc& desc, bool transpose=false) { return chunkCol(desc.getDimensions(), transpose); }
ReshapeArray::ReshapeArray(ArrayDesc const& desc, boost::shared_ptr<Array> const& array) : DelegateArray(desc, array), inDims(array->getArrayDesc().getDimensions()), outDims(desc.getDimensions()) { }
shared_ptr< Array > execute(vector< shared_ptr< Array> >& inputArrays, shared_ptr<Query> query) { shared_ptr<Array> outputArray(new MemArray(_schema, query)); shared_ptr<Array> inputArray = inputArrays[0]; ArrayDesc inputSchema = inputArray->getArrayDesc(); // Get descriptor of two dimensions d and n. DimensionDesc dimsN = inputSchema.getDimensions()[0]; DimensionDesc dimsD = inputSchema.getDimensions()[1]; size_t n = dimsN.getCurrEnd() - dimsN.getCurrStart() + 1; // Note: the input data set should have d+1 dimensions (including Y) size_t d = dimsD.getCurrEnd() - dimsD.getCurrStart(); size_t nStart = dimsN.getCurrStart(); size_t dStart = dimsD.getCurrStart(); // Get chunk size of n. size_t nChunkSize = dimsN.getChunkInterval(); // Helps to accumulate the n and L. z_i[0] = 1.0; shared_ptr<ConstArrayIterator> inputArrayIter = inputArray->getConstIterator(0); Coordinates chunkPosition; size_t i, j, k, m; while(! inputArrayIter->end() ) { shared_ptr<ConstChunkIterator> chunkIter = inputArrayIter->getChunk().getConstIterator(); chunkPosition = inputArrayIter->getPosition(); for(i=chunkPosition[0]; i<chunkPosition[0] + nChunkSize; i++) { // In case the chunk is partially filled. if(i == n + nStart) { break; } for(j=chunkPosition[1], m=1; j<=chunkPosition[1]+d; j++, m++) { // In case the chunk is partially filled. if(j == d + 1 + dStart) { break; } z_i[m] = chunkIter->getItem().getDouble(); ++(*chunkIter); } for(k=0; k<=d+1; ++k) { // This operator is not optimized for entries with value zero. // TODO: should use fabs(z_i[k]) < 10e-6 // if(z_i[k] == 0.0) { // continue; // } for(m=0; m<=k; ++m) { Gamma[k][m] += z_i[k]*z_i[m]; } } } ++(*inputArrayIter); } /** * The "logical" instance ID of the instance responsible for coordination of query. * COORDINATOR_INSTANCE if instance execute this query itself. */ if(query->getInstancesCount() > 1) { if(query->getInstanceID() != 0) { // I am not the coordinator, I should send my Gamma matrix out. shared_ptr <SharedBuffer> buf ( new MemoryBuffer(NULL, sizeof(double) * (d+3) * (d+2) / 2) ); double *Gammabuf = static_cast<double*> (buf->getData()); for(size_t i=0; i<d+2; ++i) { for(size_t j=0; j<=i; ++j) { *Gammabuf = Gamma[i][j]; ++Gammabuf; } } BufSend(0, buf, query); return outputArray; } else { // I am the coordinator, I should collect Gamma matrix from workers. for(InstanceID l = 1; l<query->getInstancesCount(); ++l) { shared_ptr<SharedBuffer> buf = BufReceive(l, query); double *Gammabuf = static_cast<double*> (buf->getData()); for(size_t i=0; i<d+2; ++i) { for(size_t j=0; j<=i; ++j) { Gamma[i][j] += *Gammabuf; ++Gammabuf; } } } } // end if getInstanceID() != 0 } //end if InstancesCount() > 1 return writeGamma(d, query); }
shared_ptr< Array > execute(vector< shared_ptr< Array> >& inputArrays, shared_ptr<Query> query) { shared_ptr<Array> outputArray(new MemArray(_schema, query)); shared_ptr<Array> inputArray = inputArrays[0]; ArrayDesc inputSchema = inputArray->getArrayDesc(); // Get descriptor of two dimensions d and n. DimensionDesc dimsN = inputSchema.getDimensions()[0]; DimensionDesc dimsD = inputSchema.getDimensions()[1]; size_t n = dimsN.getCurrLength(); // Note: the input data set should have d+1 dimensions (including Y) size_t d = dimsD.getCurrLength() - 1; nlq.N = n; nlq.d = d; shared_ptr<ConstArrayIterator> inputArrayIter = inputArray->getConstIterator(0); Coordinates cellPosition; size_t i; double value; while(! inputArrayIter->end() ) { shared_ptr<ConstChunkIterator> chunkIter = inputArrayIter->getChunk().getConstIterator(); // For each cell in the current chunk. // This will skip the empty cells. while(! chunkIter->end() ) { cellPosition = chunkIter->getPosition(); value = chunkIter->getItem().getDouble(); nlq.L[ cellPosition[1] ] += value; nlq.Q[ cellPosition[1] ] += value * value; ++(*chunkIter); } ++(*inputArrayIter); } /** * The "logical" instance ID of the instance responsible for coordination of query. * COORDINATOR_INSTANCE if instance execute this query itself. */ if(query->getInstancesCount() > 1) { if(query->getInstanceID() != 0) { // I am not the coordinator, I should send my Gamma matrix out. shared_ptr <SharedBuffer> buf ( new MemoryBuffer(NULL, sizeof(double) * (d*2+2) )); double *Gammabuf = static_cast<double*> (buf->getData()); for(i=1; i<=d+1; ++i) { *Gammabuf = nlq.L[i]; ++Gammabuf; } for(i=1; i<=d+1; ++i) { *Gammabuf = nlq.Q[i]; ++Gammabuf; } BufSend(0, buf, query); return outputArray; } else { // I am the coordinator, I should collect Gamma matrix from workers. for(InstanceID l = 1; l<query->getInstancesCount(); ++l) { shared_ptr<SharedBuffer> buf = BufReceive(l, query); double *Gammabuf = static_cast<double*> (buf->getData()); for(i=1; i<=d+1; ++i) { nlq.L[i] += *Gammabuf; ++Gammabuf; } for(i=1; i<=d+1; ++i) { nlq.Q[i] += *Gammabuf; ++Gammabuf; } } }// end if getInstanceID() != 0 }//end if InstancesCount() > 1 return writeGamma(query); }
shared_ptr< Array > execute(vector< shared_ptr< Array> >& inputArrays, shared_ptr<Query> query) { shared_ptr<Array> outputArray(new MemArray(_schema, query)); shared_ptr<Array> inputArray = inputArrays[0]; ArrayDesc inputSchema = inputArray->getArrayDesc(); // Get descriptor of two dimensions d and n. DimensionDesc dimsN = inputSchema.getDimensions()[0]; DimensionDesc dimsD = inputSchema.getDimensions()[1]; int64_t n = dimsN.getCurrEnd() - dimsN.getCurrStart() + 1; // Note: the input data set should have d+1 dimensions (including Y) d = dimsD.getCurrEnd() - dimsD.getCurrStart(); idY = d+1; int64_t nStart = dimsN.getCurrStart(); int64_t dStart = dimsD.getCurrStart(); // Get chunk size of n. int64_t nChunkSize = dimsN.getChunkInterval(); k = ((shared_ptr<OperatorParamPhysicalExpression>&)_parameters[0])->getExpression()->evaluate().getInt64(); if (_parameters.size() == 2) { idY = ((shared_ptr<OperatorParamPhysicalExpression>&)_parameters[1])->getExpression()->evaluate().getInt64(); } #ifdef DEBUG stringstream ss; ss << getenv("HOME") << "/groupdiagdensegamma-instance-" << query->getInstanceID() << ".log"; log.open(ss.str().c_str(), ios::out); log << "n = " << n << endl << "d = " << d << endl << "k = " << k << endl; log << "nStart = " << nStart << endl << "dStart = " << dStart << endl; log << "nChunkSize = " << nChunkSize << endl; log << "idY = " << idY << endl; #endif shared_ptr<ConstArrayIterator> inputArrayIter = inputArray->getConstIterator(0); Coordinates chunkPosition; int64_t i, j, k, m, l; double value; NLQ tmp; map<double, struct NLQ>::iterator it; while(! inputArrayIter->end() ) { shared_ptr<ConstChunkIterator> chunkIter = inputArrayIter->getChunk().getConstIterator(); chunkPosition = inputArrayIter->getPosition(); #ifdef DEBUG log << "Getting into chunk (" << chunkPosition[0] << ", " << chunkPosition[1] << ")." << endl; #endif for(i=chunkPosition[0]; i<chunkPosition[0] + nChunkSize; i++) { if(i == n + nStart) { #ifdef DEBUG log << "Reaching row " << i << ", exiting." << endl; #endif break; } for(j=chunkPosition[1], m=1; j<=chunkPosition[1]+d; j++, m++) { if(j == d + 1 + dStart) { #ifdef DEBUG log << "Reaching column " << j << ", exiting." << endl; #endif break; } value = chunkIter->getItem().getDouble(); tmp.L[m] = value; tmp.Q[m] = value * value; ++(*chunkIter); } double Y = tmp.L[idY]; it = nlq.find(Y); if (it == nlq.end()) { #ifdef DEBUG log << "Cannot find NLQ entry for class " << Y << ", creating new." << endl; #endif nlq[Y].N = 1; nlq[Y].groupId = Y; } else { nlq[Y].N++; } for (k=1, l=1; k<=d+1; k++) { if (k == idY) { continue; } nlq[Y].L[l] += tmp.L[k]; nlq[Y].Q[l] += tmp.Q[k]; l++; } nlq[Y].L[d+1] += tmp.L[idY]; nlq[Y].Q[d+1] += tmp.Q[idY]; } ++(*inputArrayIter); } /** * The "logical" instance ID of the instance responsible for coordination of query. * COORDINATOR_INSTANCE if instance execute this query itself. */ size_t localClassCount = nlq.size(); #ifdef DEBUG log << "localClassCount = " << localClassCount << endl; #endif if(query->getInstancesCount() > 1) { if(query->getInstanceID() != 0) { // I am not the coordinator, I should send my NLQ out. #ifdef DEBUG log << "I am not the coordinator, I should send my NLQ out." << endl; #endif shared_ptr <SharedBuffer> buf ( new MemoryBuffer(NULL, sizeof(struct NLQ) * localClassCount )); struct NLQ *NLQbuf = static_cast<struct NLQ*> (buf->getData()); for(it = nlq.begin(); it != nlq.end(); it++) { *NLQbuf = it->second; ++NLQbuf; } BufSend(0, buf, query); #ifdef DEBUG log << "Exiting." << endl; #endif return outputArray; } else { // I am the coordinator, I should collect NLQ from workers. #ifdef DEBUG log << "I am the coordinator, I should collect NLQ from workers." << endl; #endif for(InstanceID l = 1; l<query->getInstancesCount(); ++l) { shared_ptr<SharedBuffer> buf = BufReceive(l, query); if(! buf) { #ifdef DEBUG log << "Nothing from instance " << l << ", continue." << endl; #endif continue; } int64_t remoteClassCount = buf->getSize() / sizeof(struct NLQ); struct NLQ* NLQbuf = static_cast<struct NLQ*> (buf->getData()); #ifdef DEBUG log << "Received " << remoteClassCount << " entries from instance " << l << endl; #endif for(i=0; i<remoteClassCount; ++i) { it = nlq.find(NLQbuf->groupId); if( it == nlq.end() ) { #ifdef DEBUG log << "Cannot find NLQ entry for class " << NLQbuf->groupId << ", creating new." << endl; #endif nlq[NLQbuf->groupId] = *NLQbuf; } else { it->second.N += NLQbuf->N; for(j=1; j<=d+1; ++j) { it->second.L[j] += NLQbuf->L[j]; it->second.Q[j] += NLQbuf->Q[j]; } } ++NLQbuf; } #ifdef DEBUG log << "Merge complete." << endl; #endif } }// end if getInstanceID() != 0 }//end if InstancesCount() > 1 return writeGamma(query); }
/** * Perform operator-specific checks of input and return the shape of the output. Currently, * the output array must exist. * @param schemas the shapes of the input arrays * @param query the query context */ ArrayDesc inferSchema(std::vector< ArrayDesc> schemas, shared_ptr< Query> query) { assert(schemas.size() == 1); assert(_parameters.size() == 1); string arrayName = ((shared_ptr<OperatorParamReference>&)_parameters[0])->getObjectName(); ArrayDesc const& srcDesc = schemas[0]; //Ensure attributes names uniqueness. ArrayDesc dstDesc; if (!SystemCatalog::getInstance()->getArrayDesc(arrayName, dstDesc, false)) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_ARRAY_DOESNT_EXIST) << arrayName; } if(dstDesc.isImmutable()) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_ILLEGAL_OPERATION) << "Target of INSERT must be a mutable array"; } Dimensions const& srcDims = srcDesc.getDimensions(); Dimensions const& dstDims = dstDesc.getDimensions(); if (srcDims.size() != dstDims.size()) { //TODO: this will get lifted when we allow redimension+insert in the same op //and when we DO implement redimension+insert - we will need to match attributes/dimensions by name, not position. throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_ILLEGAL_OPERATION) << "Temporary restriction: target of INSERT must have same dimensions as the source"; } for (size_t i = 0, n = srcDims.size(); i < n; i++) { if( srcDims[i].getType() != TID_INT64 || dstDims[i].getType() != TID_INT64) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_ILLEGAL_OPERATION) << "Temporary restriction: INSERT only supports integer dimensions"; } //TODO: we can also allow arrays that are smaller whose length is not evenly divided by chunk interval //but then we have to detect "edge chunks" and rewrite them cleverly if( srcDims[i].getStartMin() != dstDims[i].getStartMin() || srcDims[i].getChunkInterval() != dstDims[i].getChunkInterval() || srcDims[i].getChunkOverlap() != dstDims[i].getChunkOverlap() || srcDims[i].getEndMax() > dstDims[i].getEndMax() || ( srcDims[i].getEndMax() < dstDims[i].getEndMax() && srcDims[i].getLength() % srcDims[i].getChunkInterval() != 0)) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_DIMENSIONS_DONT_MATCH) << srcDims[i].getBaseName() << dstDims[i].getBaseName(); } } Attributes const& srcAttrs = srcDesc.getAttributes(true); Attributes const& dstAttrs = dstDesc.getAttributes(true); if (srcAttrs.size() != dstAttrs.size()) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_ILLEGAL_OPERATION) << "Temporary restriction: target of INSERT must have same attributes as the source"; } for (size_t i = 0, n = srcAttrs.size(); i < n; i++) { if(srcAttrs[i].getType() != dstAttrs[i].getType()) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_ATTRIBUTE_TYPE) << srcAttrs[i].getName() << srcAttrs[i].getType() << dstAttrs[i].getType(); } //can't store nulls into a non-nullable attribute if(!dstAttrs[i].isNullable() && srcAttrs[i].isNullable()) { throw USER_EXCEPTION(SCIDB_SE_INFER_SCHEMA, SCIDB_LE_WRONG_ATTRIBUTE_FLAGS) << srcAttrs[i].getName(); } } //Note: let us NOT add arrayID numbers to the schema - because we do not have our ArrayID yet. //We will get our ArrayID when we execute and create the array. Until then - don't bother. //Old store code adds the arrayID to the schema - but that's the arrayID of the previous version, //not the new version created by the op. A dangerous fallacy - stupid and unnecessary. return ArrayDesc(arrayName, dstDesc.getAttributes(), dstDesc.getDimensions(), dstDesc.getFlags()); }