ScanRLEArray::ScanRLEArray(ArrayDesc const& arr, std::string path) : RLEArray(arr), _dirPath(path), _maxChunkNo(0), logger(log4cxx::Logger::getLogger("scidb.query.ops.ScanRQArray")) { filesystem::path full_path = filesystem::system_complete(filesystem::path(_dirPath)); if (!filesystem::exists(full_path)) { throw SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_CANT_OPEN_PATH) << _dirPath; } if (!filesystem::is_directory(full_path)) { throw SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_DIRECTORY_EXPECTED) << _dirPath; } _maxChunkNo = arr.getNumberOfChunks() / arr.getAttributes().size(); }
shared_ptr< Array > execute(vector< shared_ptr< Array> >& inputArrays, shared_ptr<Query> query) { // I maintain the log of the operator in a local file named after Correlation_N.log, N is the instance ID. stringstream logFileName; logFileName << "/home/scidb/preselect_" << query->getInstanceID() << ".log"; FILE *logFile; logFile = fopen(logFileName.str().c_str(), "w"); shared_ptr<Array> originalArray = inputArrays[0]; shared_ptr<Array> correlationArray = inputArrays[1]; ArrayDesc originalSchema = originalArray->getArrayDesc(); ArrayDesc corrSchema = correlationArray->getArrayDesc(); Dimensions originalDims = originalSchema.getDimensions(); Dimensions corrDims = corrSchema.getDimensions(); DimensionDesc originalDimsP = originalDims[1]; DimensionDesc corrDimsP = corrDims[0]; // Note the correlation array doesn't have Y column. Coordinate p = corrDimsP.getCurrLength(); fprintf(logFile, "p = %ld\n # of chunk = %ld\n", p, corrSchema.getNumberOfChunks()); fflush(logFile); shared_ptr<ConstArrayIterator> corrArrayIter = correlationArray->getIterator(0); if(! corrArrayIter->end() ) { correlation *corr = new correlation[p]; // The correlation array will always have only 1 chunk (we designed correlation array like this), so no loops here. shared_ptr<ConstChunkIterator> corrChunkIter = corrArrayIter->getChunk().getConstIterator(); for(Coordinate i=0; i<p; ++i) { corr[i].id = i+1; corr[i].corr = corrChunkIter->getItem().getDouble(); //fprintf(logFile, "%d, %f\n", corr[i].id, corr[i].corr); ++(*corrChunkIter); } //fflush(logFile); qsort(corr, p, sizeof(correlation), &comp); for(Coordinate i=0; i<p; ++i) { fprintf(logFile, "%d, %f\n", corr[i].id, corr[i].corr); } fflush(logFile); Coordinate d = ((boost::shared_ptr<OperatorParamPhysicalExpression>&)_parameters[0])->getExpression()->evaluate().getInt64(); fprintf(logFile, "d=%ld\n", d); stringstream ss; vector<string> names; names.push_back("j"); vector<TypeId> types; types.push_back(TID_INT64); for(Coordinate i=0; i<d; ++i) { ss << "j=" << corr[i].id << " or "; } ss << "j=" << p+1; fprintf(logFile, "%s\n", ss.str().c_str()); fflush(logFile); Expression e; e.compile(ss.str(), names, types); fclose(logFile); boost::shared_ptr<scidb::Query> emptyQuery; return boost::shared_ptr<Array>(new FilterArray(_schema, inputArrays[0], boost::make_shared<Expression>(e), emptyQuery, _tileMode)); } else { shared_ptr<Array> outputArray(new MemArray(_schema, query)); fclose(logFile); return outputArray; } }