void ConditionEncoder::EncodeIfPossible(Descriptor& desc, bool for_rough_query, bool additional_nulls) { MEASURE_FET("ConditionEncoder::EncodeIfPossible(...)"); if(desc.done || desc.IsDelayed()) return; if(desc.IsType_OrTree()) { desc.tree->root->EncodeIfPossible(for_rough_query, additional_nulls); desc.Simplify(); return; } if(!desc.attr.vc || desc.attr.vc->GetDim() == -1) return; SingleColumn* vcsc = (desc.attr.vc->IsSingleColumn() ? static_cast<SingleColumn*>(desc.attr.vc) : NULL); bool encode_now = false; if(desc.IsType_AttrAttr() && IsSimpleEqualityOperator(desc.op) && vcsc) { // special case: simple operator on two compatible numerical columns SingleColumn* vcsc2 = NULL; if(desc.val1.vc->IsSingleColumn()) vcsc2 = static_cast<SingleColumn*>(desc.val1.vc); if(vcsc2 == NULL || vcsc->GetVarMap()[0].GetTabPtr()->TableType() != RC_TABLE || vcsc2->GetVarMap()[0].GetTabPtr()->TableType() != RC_TABLE) return; if( vcsc->Type().IsString() || vcsc->Type().IsLookup() || vcsc2->Type().IsString() || vcsc2->Type().IsLookup()) // excluding strings return; bool is_timestamp1 = (vcsc->Type().GetTypeName() == RC_TIMESTAMP); bool is_timestamp2 = (vcsc2->Type().GetTypeName() == RC_TIMESTAMP); if(is_timestamp1 || is_timestamp2 && !(is_timestamp1 && is_timestamp2)) // excluding timestamps compared with something else return; encode_now =(vcsc->Type().IsDateTime() && vcsc2->Type().IsDateTime()) || (vcsc->Type().IsFloat() && vcsc2->Type().IsFloat()) || (vcsc->Type().IsFixed() && vcsc2->Type().IsFixed() && vcsc->Type().GetScale() == vcsc2->Type().GetScale()); // excluding floats } if(!encode_now) { ExpressionColumn* vcec = dynamic_cast<ExpressionColumn*>(desc.attr.vc); if(vcec == NULL && (vcsc == NULL || vcsc->GetVarMap()[0].GetTabPtr()->TableType() != RC_TABLE)) return; if(vcec != NULL) { encode_now = (vcec->ExactlyOneLookup() && (desc.op == O_IS_NULL || desc.op == O_NOT_NULL || (desc.val1.vc && desc.val1.vc->IsConst() && (desc.val2.vc == NULL || desc.val2.vc->IsConst())))); } else { encode_now = (desc.IsType_AttrValOrAttrValVal() || desc.IsType_AttrMultiVal() || desc.op == O_IS_NULL || desc.op == O_NOT_NULL ) && desc.attr.vc->GetVarMap()[0].GetTabPtr()->TableType() == RC_TABLE && (!for_rough_query || !desc.IsType_Subquery()); } } if(!encode_now) return; ///////////////////////////////////////////////////////////////////////////////////// // Encoding itself ConditionEncoder ce(additional_nulls); ce(desc); desc.Simplify(); }
double ParameterizedFilter::EvaluateConditionNonJoinWeight(Descriptor &d, bool for_or) { // Interpretation of weight: // an approximation of logarithm of answer size // (in case of for_or: an approximation of (full_table - answer) size) // 0 -> time is very short (constant). // high weight -> schedule this query to be executed later double eval = 0.0; _uint64 no_distinct, no_distinct2; _uint64 answer_size; if(d.IsTrue() || d.IsFalse()) eval = 0; // constant time else if(d.IsType_AttrValOrAttrValVal()) { // typical condition: attr=val if(!d.encoded) { return log(1 + double(d.attr.vc->NoTuples())) + 5; // +5 as a penalty for complex expression } SingleColumn* col = static_cast<SingleColumn*>(d.attr.vc); answer_size = col->ApproxAnswerSize(d); if(for_or) answer_size = d.attr.vc->NoTuples() - answer_size; _int64 no_in_values = 1; if(d.op == O_IN || d.op == O_NOT_IN) { MultiValColumn* iscol = static_cast<MultiValColumn*>(d.val1.vc); no_in_values = iscol->NoValues(NULL); } eval = log(1 + double(answer_size)); // approximate size of the result if(no_in_values > 1) eval += log(double(no_in_values)) * 0.5; // INs are potentially slower (many comparisons needed) if(col->Type().IsString() && !col->Type().IsLookup()) eval += 0.5; // strings are slower if(col->Type().IsFloat()) eval += 0.1; // floats are slower if(d.op == O_LIKE || d.op == O_NOT_LIKE) eval += 0.2; // these operators need more work } else if(d.IsType_AttrAttr()) { // attr=attr on the same table _uint64 no_obj = d.attr.vc->NoTuples(); // changed to uint64 to prevent negative logarithm for NULL_VALUE_64 if(!d.encoded) return log(1 + double(2 * no_obj)) + 5; // +5 as a penalty for complex expression else if(d.op == O_EQ) { no_distinct = d.attr.vc->GetApproxDistVals(false); if(no_distinct == 0) no_distinct = 1; no_distinct2 = d.val1.vc->GetApproxDistVals(false); if(no_distinct2 == 0) no_distinct2 = 1; if(no_distinct2 > no_distinct) no_distinct = no_distinct2; // find the attribute with smaller abstract classes if(for_or) eval = log(1 + (no_obj - double(no_obj) / no_distinct)); else eval = log(1 + double(no_obj) / no_distinct); // size of the smaller abstract class } else { eval = log(1 + double(no_obj) / 2); // other operators filter potentially a half of objects } eval += 1; // add to compensate opening two packs } else if(d.IsType_OrTree() && !d.IsType_Join()) { eval = d.tree->root->EvaluateConditionWeight(this, for_or); } else { // expressions and other types, incl. joins (to be calculated separately) if(d.IsType_IBExpression()) return log(1 + double(d.attr.vc->NoTuples())) + 2; // +2 as a penalty for IB complex expression eval = 99999; } return eval; }