DataflowPartitioningDescriptor EqualJoin::decideOutputDataflowProperty(const Dataflow& left_dataflow,const Dataflow& right_dataflow)const{ DataflowPartitioningDescriptor ret; // const unsigned l_data_cardinality=left_dataflow.getAggregatedDatasize(); // const unsigned r_datasize=right_dataflow.getAggregatedDatasize(); const unsigned long l_data_cardinality=left_dataflow.getAggregatedDataCardinality(); const unsigned long r_data_cardinality=right_dataflow.getAggregatedDataCardinality(); std::vector<NodeID> all_node_id_list=NodeTracker::getInstance()->getNodeIDList(); /* In the current implementation, all the nodes are involved in the complete_repartition method. * TODO decide the degree of parallelism*/ const unsigned degree_of_parallelism=all_node_id_list.size(); std::vector<DataflowPartition> dataflow_partition_list; for(unsigned i=0;i<degree_of_parallelism;i++){ const NodeID location=all_node_id_list[i]; /* Currently, the join output size cannot be predicted due to the absence of data statistics. * We just use the magic number as following */ // const unsigned cardinality=l_data_cardinality/degree_of_parallelism+r_data_cardinality/degree_of_parallelism; const unsigned long cardinality=l_data_cardinality*r_data_cardinality*predictEqualJoinSelectivity(left_dataflow,right_dataflow)/degree_of_parallelism; DataflowPartition dfp(i,cardinality,location); dataflow_partition_list.push_back(dfp); } ret.setPartitionList(dataflow_partition_list); ret.setPartitionKey(joinkey_pair_list_[0].first); ret.addShadowPartitionKey(joinkey_pair_list_[0].second); PartitionFunction* partition_function=PartitionFunctionFactory::createBoostHashFunction(degree_of_parallelism); ret.setPartitionFunction(partition_function); return ret; }
EqualJoin::JoinPolice EqualJoin::decideLeftOrRightRepartition(const Dataflow& left_dataflow,const Dataflow& right_dataflow)const{ const unsigned left_data_size=left_dataflow.getAggregatedDatasize(); const unsigned right_data_size=right_dataflow.getAggregatedDatasize(); if(left_data_size>right_data_size){ return right_repartition; } else{ return left_repartition; } }
bool isReturn(const Jump *jump, const Dataflow &dataflow) { assert(jump != nullptr); if (jump->isConditional()) { return false; } if (!jump->thenTarget().address()) { return false; } return dataflow.getValue(jump->thenTarget().address())->isReturnAddress(); }
bool isReturnAddress(const Term *term, const Dataflow &dataflow) { assert(term != nullptr); return dataflow.getValue(term)->isReturnAddress(); }