Beispiel #1
0
int getMaxGain(int parent, int S, int values[LENGTH][LENGTH], int nvalues, int nattributes, att * test)
{
	float max=0;
	int maxAtt;
	int i;
	float gain;

	for(i=-1; i>-nattributes; i--)
	{
		if(isUsed(i, test)==0)
		{
			gain=getGain(parent, S, i, values, nvalues, nattributes, test);
			printf("Gain of %d and %d is %lf\n", S, i, gain);
			if(gain>max)
			{
				max=gain;
				maxAtt=i;
			}
		}
	}
	if (max==0)
	{
		int pos=positiveValues(0, S, values, nvalues, nattributes, test);
		int neg=negativeValues(0, S, values, nvalues, nattributes, test);
		if (pos>neg)
		{
			maxAtt=100;
		}
		else
		{
			maxAtt=101;
		}
	}
	return maxAtt;
}
Beispiel #2
0
float getEntropy(int S, int attribute, int values[LENGTH][LENGTH], int nvalues, int nattributes, att * test)
{
	float entropy;
	int positive;
	int negative;
	attnode * curr;
	curr=test->head;
	while(curr!=NULL)
	{
		if(curr->equivalent==attribute)
		{
			break;
		}
		curr=curr->next;
	}
	if(S==0)
	{
		positive=positiveValues(S, attribute, values, nvalues, nattributes, test);
		negative=negativeValues(S, attribute, values, nvalues, nattributes, test);
	}
	else
	{
		updatePosNeg(S, attribute, values, nvalues, nattributes, test);
		positive=curr->positivetracker;
		negative=curr->negativetracker;
	}
	float pos;
	float neg;
	float logpos;
	float logneg;
	int total;

	total=positive+negative;
	pos=(float)positive/total;
	neg=(float)negative/total;
	logpos=log2(pos);
	logneg=log2(neg);
	if(positive==0||negative==0)
	{
		entropy=0;
	}
	else if(positive==negative)
	{
		entropy=1;
	}
	else
	{
		entropy=(-pos*logpos)-(neg*logneg);
		printf("entropy of %d wrt %d=-%d/%dlg%d/%d - %d/%dlg%d/%d=%lf\n", attribute, S, positive, total, positive, total, negative, total, negative, total, entropy);
	}
	//printf("%d %lf %lf\n", total, pos, neg);

	//printf("\nanswer: %lf\n", entropy);
	return entropy;
}
void PlotDataFileCreator::createPlottableDatafile(std::string filename, std::string preamble, int max, int min, long* data, int dataSize, bool commentNeg)
{
	const int maxValue = max;
	const int minValue = min;
	const int latHitArraySize = maxValue + 1;
	const long* plotValues = data;
	int hits[latHitArraySize];
	for(int i = 0; i < latHitArraySize; i++)
	{
		hits[i] = 0;
	}

	const int loopLength = dataSize;
	for(int i = 0; i < loopLength; i++)
	{
		if(plotValues[i] >= 0)
		{
			hits[plotValues[i]]++;
		}
	}

	std::ofstream fs;
	fs.open(filename.c_str());
	fs << preamble;
	std::stringstream positiveValues;
	for(int i = 0; i < latHitArraySize; i++)
	{
		positiveValues << std::setfill('0') << std::setw(6) << i << " \t" << std::setfill('0') << std::setw(6) << hits[i] << std::endl;
	}

	std::stringstream negativeValues("");
	if(minValue < 0)
	{
		const int negHitArraySize = minValue*(-1) + 1;
		if(commentNeg)
		{
			negativeValues << "# negative Values following" << std::endl;
		}
		int negHits[negHitArraySize];
		for(int i = 0; i < negHitArraySize; i++)
		{
			negHits[i] = 0;
		}
		for(int i = 0; i < loopLength; i++)
		{
			if(plotValues[i] < 0)
			{
				negHits[plotValues[i] * (-1)]++;
			}
		}

		for(int i = negHitArraySize-1; i > 0; i--)
		{
			if(commentNeg)
			{
				negativeValues << "#";
			}
			negativeValues << "-" << std::setfill('0') << std::setw(6) << i << " \t" << std::setfill('0') << std::setw(6) << negHits[i] << std::endl;
		}
	}
	if(commentNeg)
	{
		fs << positiveValues.str();
		fs << negativeValues.str();
	} else {
		fs << negativeValues.str();
		fs << positiveValues.str();
	}
	fs << std::endl << "end" << std::endl;
	fs.close();
}
Beispiel #4
0
float getGain(int parent, int S, int attribute, int values[LENGTH][LENGTH], int nvalues, int nattributes, att * test)
{
	attnode * curr;
	attnode * curr2;
	float entropy;
	float entropy2;
	float sum=0;
	int denominator;
	//=positiveValues(S, S, values, nvalues, nattributes, test)+negativeValues(S, S, values, nvalues, nattributes, test);
	int numerator;
	float gain;
	int positive;
	int negative;
	entropy=getEntropy(parent, S, values, nvalues, nattributes, test);
	//printf("ooh yas %lf\n", entropy);

	curr=test->head;
	while(curr!=NULL)
	{
		if(curr->equivalent==S)
		{
			break;
		}
		curr=curr->next;
	}
	if(S==0)
	{
		positive=positiveValues(S, S, values, nvalues, nattributes, test);
		negative=negativeValues(S, S, values, nvalues, nattributes, test);	
	}
	else
	{
		updatePosNeg(parent, S, values, nvalues, nattributes, test);
		positive=curr->positivetracker;
		negative=curr->negativetracker;

	}
	denominator=positive+negative;

	curr=test->head;
	while(curr!=NULL)
	{
		if(curr->equivalent==attribute)
		{
			//printf("Found curr: %s %d %d %d\n", curr->attname, curr->x, curr->y, curr->equivalent);
			break;
		}
		curr=curr->next;
	}

	curr2=curr->next;
	while(curr2!=NULL)
	{
		if(curr2->y==curr->y)
		{
			//printf("Found curr2: %s %d %d %d\n", curr2->attname, curr2->x, curr2->y, curr2->equivalent);
			entropy2=getEntropy(S, curr2->equivalent, values, nvalues, nattributes, test);
			//printf("Entropy: %lf\n", entropy2);
			if (entropy2!=0)
			{
				if(S==0)
				{
					positive=positiveValues(S, curr2->equivalent, values, nvalues, nattributes, test);
					negative=negativeValues(S, curr2->equivalent, values, nvalues, nattributes, test);
				}
				else
				{
					updatePosNeg(S, curr2->equivalent, values, nvalues, nattributes, test);
					positive=curr2->positivetracker;
					negative=curr2->negativetracker;
				}
				numerator=positive+negative;
				//positiveValues(S, curr2->equivalent, values, nvalues, nattributes, test)+negativeValues(S, curr2->equivalent, values, nvalues, nattributes, test);
				//printf("Numerator: %d\n", numerator);
				//printf("Denominator: %d\n", denominator);
				sum+=(float)numerator/denominator*entropy2;
				printf("Gain+=%d/%d * %lf\n", numerator, denominator, entropy2);
				//printf("Sum ryt nao: %lf\n", sum);
			}
		}
		curr2=curr2->next;
	}
	gain=entropy-sum;
	printf("Gain: %lf\n", gain);
	return gain;
}