Beispiel #1
0
/*
This example uses a Generalized Linear Model (GLM), in this case a LOGIT to fit to the Wisconsin Breast Cancer
data set.  The output is shown below.  The solution coefficients are shown.

Actual input count: 9
Actual output count: 1
Iteration #1, Error: inf
Iteration #2, Error: 0.63698368
Iteration #3, Error: 0.37629115
Iteration #4, Error: 0.24127030
Iteration #5, Error: 0.12430888
Iteration #6, Error: 0.03587100
Iteration #7, Error: 0.00312089
Ideal: 0.00, Actual: 0.02
Ideal: 0.00, Actual: 0.91
Ideal: 0.00, Actual: 0.01
Ideal: 0.00, Actual: 0.77
Ideal: 0.00, Actual: 0.02
Ideal: 1.00, Actual: 1.00
Ideal: 0.00, Actual: 0.09
Ideal: 0.00, Actual: 0.01
Ideal: 0.00, Actual: 0.01
...
Ideal: 0.00, Actual: 0.00
Ideal: 1.00, Actual: 0.94
Ideal: 0.00, Actual: 0.00
Ideal: 0.00, Actual: 0.01
Ideal: 0.00, Actual: 0.00
Ideal: 0.00, Actual: 0.00
Ideal: 1.00, Actual: 0.99
Ideal: 1.00, Actual: 0.95
Ideal: 1.00, Actual: 0.98
*/
void ExampleGLM(int argIndex, int argc, char **argv) {	
	char filename[FILENAME_MAX];
	NORM_DATA *norm;
	DATA_SET *data;
	REGRESSION_MODEL *reg;
	double *ideal, actual, *input, error;
	unsigned int i, iteration;

	LocateFile("breast-cancer-wisconsin.csv",filename,FILENAME_MAX);
	
	/* Normalize the data.  If you use a different data set, this area will need to be
	   updated. */
	norm = NormCreate();
	NormDefIgnore(norm);
	NormDefPass(norm);
	NormDefPass(norm);
	NormDefPass(norm);
	NormDefPass(norm);
	NormDefPass(norm);
	NormDefPass(norm);
	NormDefPass(norm);
	NormDefPass(norm);
	NormDefPass(norm);
	NormDefReplace(norm,4,1,0);

	NormAnalyze(norm,filename);
	data = NormProcess(norm,filename,9,1);
	
	printf("Actual input count: %i\n", data->inputCount);
	printf("Actual output count: %i\n", data->idealCount);

	reg = RegressionCreate(data->inputCount,LinkLOGIT);
	
	iteration = 0;
    do {
		iteration++;
		error = RegressionReweightLeastSquares(reg,data);
		printf("Iteration #%i, Error: %.8f\n",iteration,error);
	} while (iteration < 1000 && error > 0.01);

	/* Display results */
	for(i=0;i<data->recordCount;i++) {
		ideal = DataGetIdeal(data,i);
		input = DataGetInput(data,i);
		actual = RegressionCalculate(reg,input);
		printf("Ideal: %.2f, Actual: %.2f\n",ideal[0],actual);
	}

	NormDelete(norm);
	DataDelete(data);
	RegressionDelete(reg);
}
/* Create the training set, and normalize the Iris data.  If you are using a data set
   other than Iris, you will need to update the normalization. */
static DATA_SET *create_iris_training(NORM_DATA *norm) {
	char filename[FILENAME_MAX];
	
	DATA_SET *data;

	LocateFile("iris.csv",filename,FILENAME_MAX);
	
	NormDefRange(norm,0,1);
	NormDefRange(norm,0,1);
	NormDefRange(norm,0,1);
	NormDefRange(norm,0,1);
	NormDefClass(norm,NORM_CLASS_ONEOFN,0,1);

	NormAnalyze(norm,filename);
	data = NormProcess(norm,filename,4,1);	
	return data;
}
void ExampleAnalyze(int argIndex, int argc, char **argv) {	
	char filename[FILENAME_MAX];
	NORM_DATA *norm;
	NORM_DATA_ITEM *col;
	NORM_DATA_CLASS *currentClass;

	LocateFile("iris.csv",filename,FILENAME_MAX);
	norm = NormCreate();
	NormDefRange(norm,0,1);
	NormDefRange(norm,0,1);
	NormDefRange(norm,0,1);
	NormDefRange(norm,0,1);
	NormDefClass(norm,NORM_CLASS_ONEOFN,0,1);
	NormAnalyze(norm,filename);
	
	col = norm->firstItem;
	while(col!=NULL) {

		if( col->type == NORM_TYPE_RANGE ) {
			printf("Column: \"%s\",actualMin=%.2f,actualHigh=%.2f\n",col->name,col->actualHigh,col->actualLow);
		} else {
			printf("Column: \"%s\",classes= ",col->name);
			currentClass = col->firstClass;
			while(currentClass!=NULL) {
				printf("\"%s\";",currentClass->name);
				currentClass = currentClass->next;
			}
			printf("\n");

		}
		col = col->next;
	}

	printf("Rows: %i\n",norm->rowCount);

	NormDelete(norm);
}