DataSet* read_dataset(char *filename) { FILE *f; int done = FALSE, i, j, k; double fnlwgt, education_num, capital_gain, capital_loss; double hours_per_week, age; char workclass[40], education[40], marital_status[40], occupation[40]; char relationship[40], race[40], sex[40], native_country[40]; char buffer[240]; DataSet *dset; dset = (DataSet*) malloc(sizeof(DataSet)); if (dset == NULL) { fprintf(stderr, "Could not allocate memory\n"); return NULL; } f = fopen(filename, "r"); if (f == NULL) { fprintf(stderr, "File not found: %s\n", filename); free(dset); return NULL; } // count lines in file to allocate dataset arrays i = 0; while (fgets(buffer, 240, f) != NULL) ++i; if (!feof(f) || ferror(f)) { fprintf(stderr, "IO error while reading from file\n"); free(dset); fclose(f); return NULL; } fseek(f, 0, SEEK_SET); // prepare dataset dset->n_cases = i; dset->input_size = 14; dset->output_size = 2; allocate_dataset_arrays(dset); i = 0; while (!done) { j = fscanf(f, "%lf, %s %lf, %s %lf, %s %s %s %s %s %lf, %lf, %lf, %s %s\n", &age, &workclass, &fnlwgt, education, &education_num, marital_status, occupation, relationship, &race, &sex, &capital_gain, &capital_loss, &hours_per_week, native_country, buffer); /*printf("%3.2lf; %s; %3.2lf; %s; %3.2lf; %s; %s; %s; %s; %s; %3.2lf; %3.2lf; %3.2lf; %s; %s\n", age, workclass, fnlwgt, education, education_num, marital_status, occupation, relationship, race, sex, capital_gain, capital_loss, hours_per_week, native_country, buffer);*/ if (j != 15) done = TRUE; else { dset->input[i][0] = age; dset->input[i][1] = string_to_double_workclass(workclass); dset->input[i][2] = fnlwgt; dset->input[i][3] = string_to_double_education(education); dset->input[i][4] = education_num; dset->input[i][5] = string_to_double_marital_status(marital_status); dset->input[i][6] = string_to_double_occupation(occupation); dset->input[i][7] = string_to_double_relationship(relationship); dset->input[i][8] = string_to_double_race(race); dset->input[i][9] = string_to_double_sex(sex); dset->input[i][10] = capital_gain; dset->input[i][11] = capital_loss; dset->input[i][12] = hours_per_week; dset->input[i][13] = string_to_double_native_country(native_country); if (strstr(buffer, "<=50K")) { dset->output[i][0] = 0.9; dset->output[i][1] = 0.1; } else { dset->output[i][0] = 0.1; dset->output[i][1] = 0.9; } ++i; } } if (i != dset->n_cases) fprintf(stderr, "Error reading dataset: could not read all expected cases. Expected %d, got %d\n", dset->n_cases, i); fclose(f); return dset; }
DataSet* read_dataset(char *filename) { FILE *f; int done = FALSE, i, j, k; double Elevation_quantitative, Aspect_quantitative, Slope_quantitative, Horizontal_Distance_To_Hydrology_quantitative, Vertical_Distance_To_Hydrology_quantitative, Horizontal_Distance_To_Roadways_quantitative, Hillshade_9am_quantitative, Hillshade_Noon_quantitative, Hillshade_3pm_quantitative, Horizontal_Distance_To_Fire_Points_quantitative, Wilderness_Area_1, Wilderness_Area_2, Wilderness_Area_3, Wilderness_Area_4, Soil_Type1, Soil_Type2, Soil_Type3, Soil_Type4, Soil_Type5, Soil_Type6, Soil_Type7, Soil_Type8, Soil_Type9, Soil_Type10, Soil_Type11, Soil_Type12, Soil_Type13, Soil_Type14, Soil_Type15, Soil_Type16, Soil_Type17, Soil_Type18, Soil_Type19, Soil_Type20, Soil_Type21, Soil_Type22, Soil_Type23, Soil_Type24, Soil_Type25, Soil_Type26, Soil_Type27, Soil_Type28, Soil_Type29, Soil_Type30, Soil_Type31, Soil_Type32, Soil_Type33, Soil_Type34, Soil_Type35, Soil_Type36, Soil_Type37, Soil_Type38, Soil_Type39, Soil_Type40, Cover_Type; DataSet *dset; char buffer[140]; dset = (DataSet*) malloc(sizeof(DataSet)); if (dset == NULL) { fprintf(stderr, "Could not allocate memory\n"); return NULL; } f = fopen(filename, "r"); if (f == NULL) { fprintf(stderr, "File not found: %s\n", filename); free(dset); return NULL; } // count lines in file to allocate dataset arrays i = 0; while (fgets(buffer, 140, f) != NULL) ++i; if (!feof(f) || ferror(f)) { fprintf(stderr, "IO error while reading from file\n"); free(dset); fclose(f); return NULL; } fseek(f, 0, SEEK_SET); // prepare dataset dset->n_cases = i; dset->input_size = 54; dset->output_size = 7; allocate_dataset_arrays(dset); i = 0; while (!done) { j = fscanf(f, "%lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf \n", &Elevation_quantitative, &Aspect_quantitative, &Slope_quantitative, &Horizontal_Distance_To_Hydrology_quantitative, &Vertical_Distance_To_Hydrology_quantitative, &Horizontal_Distance_To_Roadways_quantitative, &Hillshade_9am_quantitative, &Hillshade_Noon_quantitative, &Hillshade_3pm_quantitative, &Horizontal_Distance_To_Fire_Points_quantitative, &Wilderness_Area_1, &Wilderness_Area_2, &Wilderness_Area_3, &Wilderness_Area_4, &Soil_Type1, &Soil_Type2, &Soil_Type3, &Soil_Type4, &Soil_Type5, &Soil_Type6, &Soil_Type7, &Soil_Type8, &Soil_Type9, &Soil_Type10, &Soil_Type11, &Soil_Type12, &Soil_Type13, &Soil_Type14, &Soil_Type15, &Soil_Type16, &Soil_Type17, &Soil_Type18, &Soil_Type19, &Soil_Type20, &Soil_Type21, &Soil_Type22, &Soil_Type23, &Soil_Type24, &Soil_Type25, &Soil_Type26, &Soil_Type27, &Soil_Type28, &Soil_Type29, &Soil_Type30, &Soil_Type31, &Soil_Type32, &Soil_Type33, &Soil_Type34, &Soil_Type35, &Soil_Type36, &Soil_Type37, &Soil_Type38, &Soil_Type39, &Soil_Type40, &Cover_Type); /*printf("%lf, %f, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf\n", Elevation_quantitative, Aspect_quantitative, Slope_quantitative, Horizontal_Distance_To_Hydrology_quantitative, Vertical_Distance_To_Hydrology_quantitative, Horizontal_Distance_To_Roadways_quantitative, Hillshade_9am_quantitative, Hillshade_Noon_quantitative, Hillshade_3pm_quantitative, Horizontal_Distance_To_Fire_Points_quantitative, Wilderness_Area_1, Wilderness_Area_2, Wilderness_Area_3, Wilderness_Area_4, Soil_Type1, Soil_Type2, Soil_Type3, Soil_Type4, Soil_Type5, Soil_Type6, Soil_Type7, Soil_Type8, Soil_Type9, Soil_Type10, Soil_Type11, Soil_Type12, Soil_Type13, Soil_Type14, Soil_Type15, Soil_Type16, Soil_Type17, Soil_Type18, Soil_Type19, Soil_Type20, Soil_Type21, Soil_Type22, Soil_Type23, Soil_Type24, Soil_Type25, Soil_Type26, Soil_Type27, Soil_Type28, Soil_Type29, Soil_Type30, Soil_Type31, Soil_Type32, Soil_Type33, Soil_Type34, Soil_Type35, Soil_Type36, Soil_Type37, Soil_Type38, Soil_Type39, Soil_Type40, Cover_Type); */ if (j != 55) done = TRUE; else { dset->input[i][0] = Elevation_quantitative; dset->input[i][1] = Aspect_quantitative; dset->input[i][2] = Slope_quantitative; dset->input[i][3] = Horizontal_Distance_To_Hydrology_quantitative; dset->input[i][4] = Vertical_Distance_To_Hydrology_quantitative; dset->input[i][5] = Horizontal_Distance_To_Roadways_quantitative; dset->input[i][6] = Hillshade_9am_quantitative; dset->input[i][7] = Hillshade_Noon_quantitative; dset->input[i][8] = Hillshade_3pm_quantitative; dset->input[i][9] = Horizontal_Distance_To_Fire_Points_quantitative; dset->input[i][10] = Wilderness_Area_1; dset->input[i][11] = Wilderness_Area_2; dset->input[i][12] = Wilderness_Area_3; dset->input[i][13] = Wilderness_Area_4; dset->input[i][14] = Soil_Type1; dset->input[i][15] = Soil_Type2; dset->input[i][16] = Soil_Type3; dset->input[i][17] = Soil_Type4; dset->input[i][18] = Soil_Type5; dset->input[i][19] = Soil_Type6; dset->input[i][20] = Soil_Type7; dset->input[i][21] = Soil_Type8; dset->input[i][22] = Soil_Type9; dset->input[i][23] = Soil_Type10; dset->input[i][24] = Soil_Type11; dset->input[i][25] = Soil_Type12; dset->input[i][26] = Soil_Type13; dset->input[i][27] = Soil_Type14; dset->input[i][28] = Soil_Type15; dset->input[i][29] = Soil_Type16; dset->input[i][30] = Soil_Type17; dset->input[i][31] = Soil_Type18; dset->input[i][32] = Soil_Type19; dset->input[i][33] = Soil_Type20; dset->input[i][34] = Soil_Type21; dset->input[i][35] = Soil_Type22; dset->input[i][36] = Soil_Type23; dset->input[i][37] = Soil_Type24; dset->input[i][38] = Soil_Type25; dset->input[i][39] = Soil_Type26; dset->input[i][40] = Soil_Type27; dset->input[i][41] = Soil_Type28; dset->input[i][42] = Soil_Type29; dset->input[i][43] = Soil_Type30; dset->input[i][44] = Soil_Type31; dset->input[i][45] = Soil_Type32; dset->input[i][46] = Soil_Type33; dset->input[i][47] = Soil_Type34; dset->input[i][48] = Soil_Type35; dset->input[i][49] = Soil_Type36; dset->input[i][50] = Soil_Type37; dset->input[i][51] = Soil_Type38; dset->input[i][52] = Soil_Type39; dset->input[i][53] = Soil_Type40; if (Cover_Type==1) { dset->output[i][0] = 0.9; dset->output[i][1] = 0.1; dset->output[i][2] = 0.1; dset->output[i][3] = 0.1; dset->output[i][4] = 0.1; dset->output[i][5] = 0.1; dset->output[i][6] = 0.1; } else if (Cover_Type==2) { dset->output[i][0] = 0.1; dset->output[i][1] = 0.9; dset->output[i][2] = 0.1; dset->output[i][3] = 0.1; dset->output[i][4] = 0.1; dset->output[i][5] = 0.1; dset->output[i][6] = 0.1; } else if (Cover_Type==3) { dset->output[i][0] = 0.1; dset->output[i][1] = 0.1; dset->output[i][2] = 0.9; dset->output[i][3] = 0.1; dset->output[i][4] = 0.1; dset->output[i][5] = 0.1; dset->output[i][6] = 0.1; } else if (Cover_Type==4) { dset->output[i][0] = 0.1; dset->output[i][1] = 0.1; dset->output[i][2] = 0.1; dset->output[i][3] = 0.9; dset->output[i][4] = 0.1; dset->output[i][5] = 0.1; dset->output[i][6] = 0.1; } else if (Cover_Type==5) { dset->output[i][0] = 0.1; dset->output[i][1] = 0.1; dset->output[i][2] = 0.1; dset->output[i][3] = 0.1; dset->output[i][4] = 0.9; dset->output[i][5] = 0.1; dset->output[i][6] = 0.1; } else if (Cover_Type==6) { dset->output[i][0] = 0.1; dset->output[i][1] = 0.1; dset->output[i][2] = 0.1; dset->output[i][3] = 0.1; dset->output[i][4] = 0.1; dset->output[i][5] = 0.9; dset->output[i][6] = 0.1; } else { dset->output[i][0] = 0.1; dset->output[i][1] = 0.1; dset->output[i][2] = 0.1; dset->output[i][3] = 0.1; dset->output[i][4] = 0.1; dset->output[i][5] = 0.1; dset->output[i][6] = 0.9; } ++i; } } // system("pause"); fclose(f); return dset; }