Example #1
0
void ConditionEncoder::EncodeIfPossible(Descriptor& desc, bool for_rough_query, bool additional_nulls)
{
	MEASURE_FET("ConditionEncoder::EncodeIfPossible(...)");
	if(desc.done || desc.IsDelayed())
		return;
	if(desc.IsType_OrTree()) {
		desc.tree->root->EncodeIfPossible(for_rough_query, additional_nulls);
		desc.Simplify();
		return;
	}
	if(!desc.attr.vc || desc.attr.vc->GetDim() == -1)
		return;

	SingleColumn* vcsc = (desc.attr.vc->IsSingleColumn() ? static_cast<SingleColumn*>(desc.attr.vc) : NULL);

	bool encode_now = false;
	if(desc.IsType_AttrAttr() && IsSimpleEqualityOperator(desc.op) && vcsc) {
		// special case: simple operator on two compatible numerical columns
		SingleColumn* vcsc2 = NULL;
		if(desc.val1.vc->IsSingleColumn())
			vcsc2 = static_cast<SingleColumn*>(desc.val1.vc);
		if(vcsc2 == NULL ||	vcsc->GetVarMap()[0].GetTabPtr()->TableType() != RC_TABLE || 
			vcsc2->GetVarMap()[0].GetTabPtr()->TableType() != RC_TABLE)
			return;
		if(	vcsc->Type().IsString()  || vcsc->Type().IsLookup()  || 
			vcsc2->Type().IsString() || vcsc2->Type().IsLookup())					// excluding strings
			return;
		bool is_timestamp1 = (vcsc->Type().GetTypeName() == RC_TIMESTAMP);
		bool is_timestamp2 = (vcsc2->Type().GetTypeName() == RC_TIMESTAMP);
		if(is_timestamp1 || is_timestamp2 && !(is_timestamp1 && is_timestamp2))		// excluding timestamps compared with something else
			return;

		encode_now =(vcsc->Type().IsDateTime() && vcsc2->Type().IsDateTime()) ||
					(vcsc->Type().IsFloat() && vcsc2->Type().IsFloat()) ||
					(vcsc->Type().IsFixed() && vcsc2->Type().IsFixed() && 
					 vcsc->Type().GetScale() == vcsc2->Type().GetScale());			// excluding floats
	}

	if(!encode_now) {
		ExpressionColumn* vcec = dynamic_cast<ExpressionColumn*>(desc.attr.vc);
		if(vcec == NULL && (vcsc == NULL || vcsc->GetVarMap()[0].GetTabPtr()->TableType() != RC_TABLE))
			return;
		if(vcec != NULL) {
			encode_now = (vcec->ExactlyOneLookup() &&
							(desc.op == O_IS_NULL || desc.op == O_NOT_NULL ||
							(desc.val1.vc && desc.val1.vc->IsConst() &&
							(desc.val2.vc == NULL || desc.val2.vc->IsConst()))));
		} else {
			encode_now = (desc.IsType_AttrValOrAttrValVal() ||
						  desc.IsType_AttrMultiVal() || 
						  desc.op == O_IS_NULL || desc.op == O_NOT_NULL )	&&
						  desc.attr.vc->GetVarMap()[0].GetTabPtr()->TableType() == RC_TABLE &&
						  (!for_rough_query || !desc.IsType_Subquery());
		}
	}
	if(!encode_now)
		return;
	/////////////////////////////////////////////////////////////////////////////////////
	// Encoding itself
	ConditionEncoder ce(additional_nulls);
	ce(desc);
	desc.Simplify();
}
double ParameterizedFilter::EvaluateConditionNonJoinWeight(Descriptor &d, bool for_or)
{
	// Interpretation of weight:
	// an approximation of logarithm of answer size
	// (in case of for_or: an approximation of (full_table - answer) size)
	// 0 -> time is very short (constant).
	// high weight -> schedule this query to be executed later
	double eval = 0.0;
	_uint64 no_distinct, no_distinct2;
	_uint64 answer_size;

	if(d.IsTrue() || d.IsFalse())
		eval = 0;				// constant time
	else if(d.IsType_AttrValOrAttrValVal()) {		// typical condition: attr=val
		if(!d.encoded) {
			return log(1 + double(d.attr.vc->NoTuples())) + 5;			// +5 as a penalty for complex expression
		}
		SingleColumn* col = static_cast<SingleColumn*>(d.attr.vc);
		answer_size = col->ApproxAnswerSize(d);
		if(for_or)
			answer_size = d.attr.vc->NoTuples() - answer_size;
		_int64 no_in_values = 1;
		if(d.op == O_IN || d.op == O_NOT_IN) {
			MultiValColumn* iscol = static_cast<MultiValColumn*>(d.val1.vc);
			no_in_values = iscol->NoValues(NULL);
		}
		eval = log(1 + double(answer_size));		// approximate size of the result
		if(no_in_values > 1)
			eval += log(double(no_in_values)) * 0.5;	// INs are potentially slower (many comparisons needed)
		if(col->Type().IsString() && !col->Type().IsLookup())
			eval += 0.5;							// strings are slower
		if(col->Type().IsFloat())
			eval += 0.1;							// floats are slower
		if(d.op == O_LIKE || d.op == O_NOT_LIKE)
			eval += 0.2;							// these operators need more work
	} else if(d.IsType_AttrAttr()) {				// attr=attr on the same table
		_uint64 no_obj = d.attr.vc->NoTuples();		// changed to uint64 to prevent negative logarithm for NULL_VALUE_64
		if(!d.encoded)
			return log(1 + double(2 * no_obj)) + 5;		// +5 as a penalty for complex expression
		else if(d.op == O_EQ) {
			no_distinct = d.attr.vc->GetApproxDistVals(false);
			if(no_distinct == 0)
				no_distinct = 1;
			no_distinct2 = d.val1.vc->GetApproxDistVals(false);
			if(no_distinct2 == 0)
				no_distinct2 = 1;
			if(no_distinct2 > no_distinct)
				no_distinct = no_distinct2;					// find the attribute with smaller abstract classes
			if(for_or)
				eval = log(1 + (no_obj - double(no_obj) / no_distinct));
			else
				eval = log(1 + double(no_obj) / no_distinct);	// size of the smaller abstract class
		} else {
			eval = log(1 + double(no_obj) / 2);	// other operators filter potentially a half of objects
		}
		eval += 1;			// add to compensate opening two packs
	} else if(d.IsType_OrTree() && !d.IsType_Join()) {
		eval = d.tree->root->EvaluateConditionWeight(this, for_or);
	} else {	// expressions and other types, incl. joins (to be calculated separately)
		if(d.IsType_IBExpression())
			return log(1 + double(d.attr.vc->NoTuples())) + 2;		// +2 as a penalty for IB complex expression
		eval = 99999;
	}
	return eval;
}