示例#1
0
DataSet* read_dataset(char *filename)
{
    FILE    *f;
    int     done = FALSE, i, j, k;
    double  fnlwgt, education_num, capital_gain, capital_loss;
    double  hours_per_week, age;
    char    workclass[40],  education[40],  marital_status[40], occupation[40];
    char    relationship[40], race[40], sex[40], native_country[40];
    char    buffer[240];
    DataSet *dset;

    dset = (DataSet*) malloc(sizeof(DataSet));
    if (dset == NULL) {
        fprintf(stderr, "Could not allocate memory\n");
        return NULL;
    }

    f = fopen(filename, "r");
    if (f == NULL) {
        fprintf(stderr, "File not found: %s\n", filename);
        free(dset);
        return NULL;
    }

    // count lines in file to allocate dataset arrays
    i = 0;
    while (fgets(buffer, 240, f) != NULL)
        ++i;

    if (!feof(f) || ferror(f)) {
        fprintf(stderr, "IO error while reading from file\n");
        free(dset);
        fclose(f);
        return NULL;
    }
    fseek(f, 0, SEEK_SET);

    // prepare dataset
    dset->n_cases = i;
    dset->input_size = 14;
    dset->output_size = 2;
    allocate_dataset_arrays(dset);

    i = 0;
    while (!done) {
        j = fscanf(f, "%lf, %s %lf, %s %lf, %s %s %s %s %s %lf, %lf, %lf, %s %s\n",
                   &age, &workclass, &fnlwgt, education, &education_num,
                   marital_status, occupation, relationship, &race, &sex,
                   &capital_gain, &capital_loss, &hours_per_week,
                   native_country, buffer);
        /*printf("%3.2lf; %s; %3.2lf; %s; %3.2lf; %s; %s; %s; %s; %s; %3.2lf; %3.2lf; %3.2lf; %s; %s\n", age, workclass, fnlwgt, education, education_num,
                     marital_status, occupation, relationship, race, sex, capital_gain,
                     capital_loss, hours_per_week, native_country, buffer);*/

        if (j != 15)
            done = TRUE;
        else {

            dset->input[i][0] = age;
            dset->input[i][1] = string_to_double_workclass(workclass);
            dset->input[i][2] = fnlwgt;
            dset->input[i][3] = string_to_double_education(education);
            dset->input[i][4] = education_num;
            dset->input[i][5] = string_to_double_marital_status(marital_status);
            dset->input[i][6] = string_to_double_occupation(occupation);
            dset->input[i][7] = string_to_double_relationship(relationship);
            dset->input[i][8] = string_to_double_race(race);
            dset->input[i][9] = string_to_double_sex(sex);
            dset->input[i][10] = capital_gain;
            dset->input[i][11] = capital_loss;
            dset->input[i][12] = hours_per_week;
            dset->input[i][13] = string_to_double_native_country(native_country);

            if (strstr(buffer, "<=50K")) {
                dset->output[i][0] = 0.9;
                dset->output[i][1] = 0.1;
            } else {
                dset->output[i][0] = 0.1;
                dset->output[i][1] = 0.9;
            }
            ++i;

        }

    }

    if (i != dset->n_cases)
        fprintf(stderr, "Error reading dataset: could not read all expected cases. Expected %d, got %d\n",
                dset->n_cases, i);

    fclose(f);
    return dset;
}
示例#2
0
DataSet* read_dataset(char *filename)
{
    FILE    *f;
    int     done = FALSE, i, j, k;

    double Elevation_quantitative, Aspect_quantitative, Slope_quantitative, Horizontal_Distance_To_Hydrology_quantitative,
           Vertical_Distance_To_Hydrology_quantitative, Horizontal_Distance_To_Roadways_quantitative, Hillshade_9am_quantitative, 
           Hillshade_Noon_quantitative, Hillshade_3pm_quantitative, Horizontal_Distance_To_Fire_Points_quantitative, 
           Wilderness_Area_1, Wilderness_Area_2, Wilderness_Area_3, Wilderness_Area_4, 
           Soil_Type1, Soil_Type2, Soil_Type3, Soil_Type4, Soil_Type5, Soil_Type6, Soil_Type7, Soil_Type8, Soil_Type9, Soil_Type10, 
           Soil_Type11, Soil_Type12, Soil_Type13, Soil_Type14, Soil_Type15, Soil_Type16, Soil_Type17, Soil_Type18, Soil_Type19, Soil_Type20, 
           Soil_Type21, Soil_Type22, Soil_Type23, Soil_Type24, Soil_Type25, Soil_Type26, Soil_Type27, Soil_Type28, Soil_Type29, Soil_Type30, 
           Soil_Type31, Soil_Type32, Soil_Type33, Soil_Type34, Soil_Type35, Soil_Type36, Soil_Type37, Soil_Type38, Soil_Type39, Soil_Type40, 
           Cover_Type;
                                             
    DataSet *dset;
    
    char buffer[140];
        
    dset = (DataSet*) malloc(sizeof(DataSet));
    if (dset == NULL) {
        fprintf(stderr, "Could not allocate memory\n");
        return NULL;
    }
          
    f = fopen(filename, "r");
    if (f == NULL) {
        fprintf(stderr, "File not found: %s\n", filename);
        free(dset);
        return NULL;
    }
       
    // count lines in file to allocate dataset arrays
    i = 0;
    while (fgets(buffer, 140, f) != NULL)
        ++i;

    if (!feof(f) || ferror(f)) {
        fprintf(stderr, "IO error while reading from file\n");
        free(dset);
        fclose(f);
        return NULL;
    }
    fseek(f, 0, SEEK_SET);
                
    // prepare dataset
    dset->n_cases = i;
    dset->input_size = 54;
    dset->output_size = 7;
    allocate_dataset_arrays(dset);
            
    i = 0;
    while (!done) {
        j = fscanf(f, "%lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf \n", 
                      &Elevation_quantitative, &Aspect_quantitative, &Slope_quantitative, &Horizontal_Distance_To_Hydrology_quantitative, 
                      &Vertical_Distance_To_Hydrology_quantitative, &Horizontal_Distance_To_Roadways_quantitative, &Hillshade_9am_quantitative, 
                      &Hillshade_Noon_quantitative, &Hillshade_3pm_quantitative, &Horizontal_Distance_To_Fire_Points_quantitative, 
                      &Wilderness_Area_1, &Wilderness_Area_2, &Wilderness_Area_3, &Wilderness_Area_4, 
                      &Soil_Type1, &Soil_Type2, &Soil_Type3, &Soil_Type4, &Soil_Type5, &Soil_Type6, &Soil_Type7, &Soil_Type8, &Soil_Type9, &Soil_Type10, 
                      &Soil_Type11, &Soil_Type12, &Soil_Type13, &Soil_Type14, &Soil_Type15, &Soil_Type16, &Soil_Type17, &Soil_Type18, &Soil_Type19, &Soil_Type20, 
                      &Soil_Type21, &Soil_Type22, &Soil_Type23, &Soil_Type24, &Soil_Type25, &Soil_Type26, &Soil_Type27, &Soil_Type28, &Soil_Type29, &Soil_Type30, 
                      &Soil_Type31, &Soil_Type32, &Soil_Type33, &Soil_Type34, &Soil_Type35, &Soil_Type36, &Soil_Type37, &Soil_Type38, &Soil_Type39, &Soil_Type40, 
                      &Cover_Type);

        /*printf("%lf, %f, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf\n", 
                      Elevation_quantitative, Aspect_quantitative, Slope_quantitative, Horizontal_Distance_To_Hydrology_quantitative, 
                      Vertical_Distance_To_Hydrology_quantitative, Horizontal_Distance_To_Roadways_quantitative, Hillshade_9am_quantitative, 
                      Hillshade_Noon_quantitative, Hillshade_3pm_quantitative, Horizontal_Distance_To_Fire_Points_quantitative, 
                      Wilderness_Area_1, Wilderness_Area_2, Wilderness_Area_3, Wilderness_Area_4, 
                      Soil_Type1, Soil_Type2, Soil_Type3, Soil_Type4, Soil_Type5, Soil_Type6, Soil_Type7, Soil_Type8, Soil_Type9, Soil_Type10, 
                      Soil_Type11, Soil_Type12, Soil_Type13, Soil_Type14, Soil_Type15, Soil_Type16, Soil_Type17, Soil_Type18, Soil_Type19, Soil_Type20, 
                      Soil_Type21, Soil_Type22, Soil_Type23, Soil_Type24, Soil_Type25, Soil_Type26, Soil_Type27, Soil_Type28, Soil_Type29, Soil_Type30, 
                      Soil_Type31, Soil_Type32, Soil_Type33, Soil_Type34, Soil_Type35, Soil_Type36, Soil_Type37, Soil_Type38, Soil_Type39, Soil_Type40, 
                      Cover_Type);
        */
        if (j != 55)
            done = TRUE;
        else {

            dset->input[i][0] = Elevation_quantitative;
            dset->input[i][1] = Aspect_quantitative;
            dset->input[i][2] = Slope_quantitative;
            dset->input[i][3] = Horizontal_Distance_To_Hydrology_quantitative;
            dset->input[i][4] = Vertical_Distance_To_Hydrology_quantitative;            
            dset->input[i][5] = Horizontal_Distance_To_Roadways_quantitative;            
            dset->input[i][6] = Hillshade_9am_quantitative;            
            dset->input[i][7] = Hillshade_Noon_quantitative;            
            dset->input[i][8] = Hillshade_3pm_quantitative;            
            dset->input[i][9] = Horizontal_Distance_To_Fire_Points_quantitative;            
            dset->input[i][10] = Wilderness_Area_1;                        
            dset->input[i][11] = Wilderness_Area_2;            
            dset->input[i][12] = Wilderness_Area_3;            
            dset->input[i][13] = Wilderness_Area_4;                                    
            dset->input[i][14] = Soil_Type1;                        
            dset->input[i][15] = Soil_Type2;            
            dset->input[i][16] = Soil_Type3;            
            dset->input[i][17] = Soil_Type4;
            dset->input[i][18] = Soil_Type5;                        
            dset->input[i][19] = Soil_Type6;            
            dset->input[i][20] = Soil_Type7; 
            dset->input[i][21] = Soil_Type8;
            dset->input[i][22] = Soil_Type9;            
            dset->input[i][23] = Soil_Type10;
            dset->input[i][24] = Soil_Type11;            
            dset->input[i][25] = Soil_Type12;
            dset->input[i][26] = Soil_Type13;            
            dset->input[i][27] = Soil_Type14;
            dset->input[i][28] = Soil_Type15;            
            dset->input[i][29] = Soil_Type16;
            dset->input[i][30] = Soil_Type17;            
            dset->input[i][31] = Soil_Type18;
            dset->input[i][32] = Soil_Type19;            
            dset->input[i][33] = Soil_Type20;
            dset->input[i][34] = Soil_Type21;            
            dset->input[i][35] = Soil_Type22;
            dset->input[i][36] = Soil_Type23;            
            dset->input[i][37] = Soil_Type24;
            dset->input[i][38] = Soil_Type25;            
            dset->input[i][39] = Soil_Type26;
            dset->input[i][40] = Soil_Type27;            
            dset->input[i][41] = Soil_Type28;
            dset->input[i][42] = Soil_Type29;            
            dset->input[i][43] = Soil_Type30;
            dset->input[i][44] = Soil_Type31;            
            dset->input[i][45] = Soil_Type32;
            dset->input[i][46] = Soil_Type33;            
            dset->input[i][47] = Soil_Type34;
            dset->input[i][48] = Soil_Type35;            
            dset->input[i][49] = Soil_Type36;
            dset->input[i][50] = Soil_Type37;            
            dset->input[i][51] = Soil_Type38;
            dset->input[i][52] = Soil_Type39;            
            dset->input[i][53] = Soil_Type40;        
           
            if (Cover_Type==1) {
                dset->output[i][0] = 0.9;
                dset->output[i][1] = 0.1;
                dset->output[i][2] = 0.1;
                dset->output[i][3] = 0.1;
                dset->output[i][4] = 0.1;
                dset->output[i][5] = 0.1;
                dset->output[i][6] = 0.1;
               
            } else if (Cover_Type==2) {
                dset->output[i][0] = 0.1;
                dset->output[i][1] = 0.9;
                dset->output[i][2] = 0.1;
                dset->output[i][3] = 0.1;
                dset->output[i][4] = 0.1;
                dset->output[i][5] = 0.1;
                dset->output[i][6] = 0.1;
               
            } else if (Cover_Type==3) {
                dset->output[i][0] = 0.1;
                dset->output[i][1] = 0.1;
                dset->output[i][2] = 0.9;
                dset->output[i][3] = 0.1;
                dset->output[i][4] = 0.1;
                dset->output[i][5] = 0.1;
                dset->output[i][6] = 0.1;
               
            } else if (Cover_Type==4) {
                dset->output[i][0] = 0.1;
                dset->output[i][1] = 0.1;
                dset->output[i][2] = 0.1;
                dset->output[i][3] = 0.9;
                dset->output[i][4] = 0.1;
                dset->output[i][5] = 0.1;
                dset->output[i][6] = 0.1;
               
            } else if (Cover_Type==5) {
                dset->output[i][0] = 0.1;
                dset->output[i][1] = 0.1;
                dset->output[i][2] = 0.1;
                dset->output[i][3] = 0.1;
                dset->output[i][4] = 0.9;
                dset->output[i][5] = 0.1;
                dset->output[i][6] = 0.1;
               
            } else if (Cover_Type==6) {
                dset->output[i][0] = 0.1;
                dset->output[i][1] = 0.1;
                dset->output[i][2] = 0.1;
                dset->output[i][3] = 0.1;
                dset->output[i][4] = 0.1;
                dset->output[i][5] = 0.9;
                dset->output[i][6] = 0.1;
               
            } else { 
                dset->output[i][0] = 0.1;
                dset->output[i][1] = 0.1;
                dset->output[i][2] = 0.1;
                dset->output[i][3] = 0.1;
                dset->output[i][4] = 0.1;
                dset->output[i][5] = 0.1;
                dset->output[i][6] = 0.9;
            }
            
            ++i;
        }

    }
//    system("pause");
    fclose(f);                                                                                                       
    return dset;
}