DataflowPartitioningDescriptor EqualJoin::decideOutputDataflowProperty(const Dataflow& left_dataflow,const Dataflow& right_dataflow)const{ DataflowPartitioningDescriptor ret; // const unsigned l_data_cardinality=left_dataflow.getAggregatedDatasize(); // const unsigned r_datasize=right_dataflow.getAggregatedDatasize(); const unsigned long l_data_cardinality=left_dataflow.getAggregatedDataCardinality(); const unsigned long r_data_cardinality=right_dataflow.getAggregatedDataCardinality(); std::vector<NodeID> all_node_id_list=NodeTracker::getInstance()->getNodeIDList(); /* In the current implementation, all the nodes are involved in the complete_repartition method. * TODO decide the degree of parallelism*/ const unsigned degree_of_parallelism=all_node_id_list.size(); std::vector<DataflowPartition> dataflow_partition_list; for(unsigned i=0;i<degree_of_parallelism;i++){ const NodeID location=all_node_id_list[i]; /* Currently, the join output size cannot be predicted due to the absence of data statistics. * We just use the magic number as following */ // const unsigned cardinality=l_data_cardinality/degree_of_parallelism+r_data_cardinality/degree_of_parallelism; const unsigned long cardinality=l_data_cardinality*r_data_cardinality*predictEqualJoinSelectivity(left_dataflow,right_dataflow)/degree_of_parallelism; DataflowPartition dfp(i,cardinality,location); dataflow_partition_list.push_back(dfp); } ret.setPartitionList(dataflow_partition_list); ret.setPartitionKey(joinkey_pair_list_[0].first); ret.addShadowPartitionKey(joinkey_pair_list_[0].second); PartitionFunction* partition_function=PartitionFunctionFactory::createBoostHashFunction(degree_of_parallelism); ret.setPartitionFunction(partition_function); return ret; }
void main() { int i; struct map_type **points, *result; function *functions; functions = get_functions(); points = get_start_points(); for( i = 0; i < 5; i++ ) { printf("<func %d>\n", i + 1 ); show( "Start X", *( points + i ) ); result = dfp( *( functions + i ), *( points + i ), 0.0000100f, 0.0000100f, 500 ); printf( "Value %.10Lf\n", (long double)( *( functions + i ) )( result ) ); show( "X", result ); printf("</func %d>\n\n", i + 1 ); } free( points ); free( functions ); }
// constructor definition of filter service registration inner class clamd_av_filter::initializer::initializer() { clamd_av_filter_ptr dfp(new clamd_av_filter()); if(FI_CONFIG.use_clamd()) { try { boost::asio::io_service io_service; boost::asio::ip::tcp::resolver resolver(io_service); boost::asio::ip::tcp::resolver::query query(FI_CONFIG.clamd_host(), FI_CONFIG.clamd_port()); boost::asio::ip::tcp::resolver::iterator endpoint_iterator = resolver.resolve(query); boost::asio::ip::tcp::resolver::iterator end; boost::asio::ip::tcp::socket socket(io_service); boost::system::error_code error = boost::asio::error::host_not_found; while (error && endpoint_iterator != end) { socket.close(); socket.connect(*endpoint_iterator++, error); } if (error) throw boost::system::system_error(error); boost::asio::write(socket, boost::asio::buffer("PING\r\n", 6)); char reply[4]; boost::asio::read(socket, boost::asio::buffer(reply, 4)); std::string received_data(reply); // Check that response is OK. socket.close(); if(received_data.substr(0,3) == "PON") { FI_SERVICES->filter_srv().register_filter(filter_code_,dfp); LOG4CXX_DEBUG(debug_logger_, "Clamd AV filter is running"); } else LOG4CXX_ERROR(debug_logger_, "Clamd AV filter is not running"); } catch (std::exception& e) { LOG4CXX_ERROR(debug_logger_, "Clamd AV filter is not running. Exception: " + (std::string)e.what()); } } }
bool LogicalScan::GetOptimalPhysicalPlan(Requirement requirement,PhysicalPlanDescriptor& physical_plan_descriptor, const unsigned & block_size){ Dataflow dataflow=getDataflow(); NetworkTransfer transfer=requirement.requireNetworkTransfer(dataflow); ExpandableBlockStreamProjectionScan::State state; state.block_size_=block_size; state.projection_id_=target_projection_->getProjectionID(); state.schema_=getSchema(dataflow_->attribute_list_); state.sample_rate_=sample_rate_; PhysicalPlan scan=new ExpandableBlockStreamProjectionScan(state); if(transfer==NONE){ physical_plan_descriptor.plan=scan; physical_plan_descriptor.dataflow=dataflow; physical_plan_descriptor.cost+=0; } else{ physical_plan_descriptor.cost+=dataflow.getAggregatedDatasize(); ExpandableBlockStreamExchangeEpoll::State state; state.block_size_=block_size; state.child_=scan;//child_iterator; state.exchange_id_=IDsGenerator::getInstance()->generateUniqueExchangeID(); state.schema_=getSchema(dataflow.attribute_list_); std::vector<NodeID> lower_id_list=getInvolvedNodeID(dataflow.property_.partitioner); state.lower_id_list_=lower_id_list; std::vector<NodeID> upper_id_list; if(requirement.hasRequiredLocations()){ upper_id_list=requirement.getRequiredLocations(); } else{ if(requirement.hasRequiredPartitionFunction()){ /* partition function contains the number of partitions*/ PartitionFunction* partitoin_function=requirement.getPartitionFunction(); upper_id_list=std::vector<NodeID>(NodeTracker::getInstance()->getNodeIDList().begin(),NodeTracker::getInstance()->getNodeIDList().begin()+partitoin_function->getNumberOfPartitions()-1); } else{ //TODO: decide the degree of parallelism upper_id_list=NodeTracker::getInstance()->getNodeIDList(); } } state.upper_id_list_=upper_id_list; state.partition_schema_=partition_schema::set_hash_partition(getIndexInAttributeList(dataflow.attribute_list_,requirement.getPartitionKey())); assert(state.partition_schema_.partition_key_index>=0); BlockStreamIteratorBase* exchange=new ExpandableBlockStreamExchangeEpoll(state); Dataflow new_dataflow; new_dataflow.attribute_list_=dataflow.attribute_list_; new_dataflow.property_.partitioner.setPartitionKey(requirement.getPartitionKey()); new_dataflow.property_.partitioner.setPartitionFunction(PartitionFunctionFactory::createBoostHashFunction(state.upper_id_list_.size())); const unsigned total_size=dataflow.getAggregatedDatasize(); const unsigned degree_of_parallelism=state.upper_id_list_.size(); std::vector<DataflowPartition> dataflow_partition_list; for(unsigned i=0;i<degree_of_parallelism;i++){ const NodeID location=upper_id_list[i]; /* Currently, the join output size cannot be predicted due to the absence of data statistics. * We just use the magic number as following */ const unsigned datasize=total_size/degree_of_parallelism; DataflowPartition dfp(i,datasize,location); dataflow_partition_list.push_back(dfp); } new_dataflow.property_.partitioner.setPartitionList(dataflow_partition_list); physical_plan_descriptor.plan=exchange; physical_plan_descriptor.dataflow=new_dataflow; physical_plan_descriptor.cost+=new_dataflow.getAggregatedDatasize(); } if(requirement.passLimits(physical_plan_descriptor.cost)) return true; else return false; }