/* This example uses a Generalized Linear Model (GLM), in this case a LOGIT to fit to the Wisconsin Breast Cancer data set. The output is shown below. The solution coefficients are shown. Actual input count: 9 Actual output count: 1 Iteration #1, Error: inf Iteration #2, Error: 0.63698368 Iteration #3, Error: 0.37629115 Iteration #4, Error: 0.24127030 Iteration #5, Error: 0.12430888 Iteration #6, Error: 0.03587100 Iteration #7, Error: 0.00312089 Ideal: 0.00, Actual: 0.02 Ideal: 0.00, Actual: 0.91 Ideal: 0.00, Actual: 0.01 Ideal: 0.00, Actual: 0.77 Ideal: 0.00, Actual: 0.02 Ideal: 1.00, Actual: 1.00 Ideal: 0.00, Actual: 0.09 Ideal: 0.00, Actual: 0.01 Ideal: 0.00, Actual: 0.01 ... Ideal: 0.00, Actual: 0.00 Ideal: 1.00, Actual: 0.94 Ideal: 0.00, Actual: 0.00 Ideal: 0.00, Actual: 0.01 Ideal: 0.00, Actual: 0.00 Ideal: 0.00, Actual: 0.00 Ideal: 1.00, Actual: 0.99 Ideal: 1.00, Actual: 0.95 Ideal: 1.00, Actual: 0.98 */ void ExampleGLM(int argIndex, int argc, char **argv) { char filename[FILENAME_MAX]; NORM_DATA *norm; DATA_SET *data; REGRESSION_MODEL *reg; double *ideal, actual, *input, error; unsigned int i, iteration; LocateFile("breast-cancer-wisconsin.csv",filename,FILENAME_MAX); /* Normalize the data. If you use a different data set, this area will need to be updated. */ norm = NormCreate(); NormDefIgnore(norm); NormDefPass(norm); NormDefPass(norm); NormDefPass(norm); NormDefPass(norm); NormDefPass(norm); NormDefPass(norm); NormDefPass(norm); NormDefPass(norm); NormDefPass(norm); NormDefReplace(norm,4,1,0); NormAnalyze(norm,filename); data = NormProcess(norm,filename,9,1); printf("Actual input count: %i\n", data->inputCount); printf("Actual output count: %i\n", data->idealCount); reg = RegressionCreate(data->inputCount,LinkLOGIT); iteration = 0; do { iteration++; error = RegressionReweightLeastSquares(reg,data); printf("Iteration #%i, Error: %.8f\n",iteration,error); } while (iteration < 1000 && error > 0.01); /* Display results */ for(i=0;i<data->recordCount;i++) { ideal = DataGetIdeal(data,i); input = DataGetInput(data,i); actual = RegressionCalculate(reg,input); printf("Ideal: %.2f, Actual: %.2f\n",ideal[0],actual); } NormDelete(norm); DataDelete(data); RegressionDelete(reg); }
/* Create the training set, and normalize the Iris data. If you are using a data set other than Iris, you will need to update the normalization. */ static DATA_SET *create_iris_training(NORM_DATA *norm) { char filename[FILENAME_MAX]; DATA_SET *data; LocateFile("iris.csv",filename,FILENAME_MAX); NormDefRange(norm,0,1); NormDefRange(norm,0,1); NormDefRange(norm,0,1); NormDefRange(norm,0,1); NormDefClass(norm,NORM_CLASS_ONEOFN,0,1); NormAnalyze(norm,filename); data = NormProcess(norm,filename,4,1); return data; }
void ExampleAnalyze(int argIndex, int argc, char **argv) { char filename[FILENAME_MAX]; NORM_DATA *norm; NORM_DATA_ITEM *col; NORM_DATA_CLASS *currentClass; LocateFile("iris.csv",filename,FILENAME_MAX); norm = NormCreate(); NormDefRange(norm,0,1); NormDefRange(norm,0,1); NormDefRange(norm,0,1); NormDefRange(norm,0,1); NormDefClass(norm,NORM_CLASS_ONEOFN,0,1); NormAnalyze(norm,filename); col = norm->firstItem; while(col!=NULL) { if( col->type == NORM_TYPE_RANGE ) { printf("Column: \"%s\",actualMin=%.2f,actualHigh=%.2f\n",col->name,col->actualHigh,col->actualLow); } else { printf("Column: \"%s\",classes= ",col->name); currentClass = col->firstClass; while(currentClass!=NULL) { printf("\"%s\";",currentClass->name); currentClass = currentClass->next; } printf("\n"); } col = col->next; } printf("Rows: %i\n",norm->rowCount); NormDelete(norm); }