inline void basic_examples() { std::ofstream output("output/basic_examples_output.txt"); cl::tape_serializer<double> serializer(output); add_example(serializer); mult_example(serializer); pow_example(serializer); exponent_example(serializer); cos_sin_example(serializer); }
/* in : - attribute is the attribute to compute the gain of - examples is a tab of examples used to compute entropy - n_ex is explicit out : - the gain ration value */ static double gain_ratio(const struct attribute_t *attribute, const struct example_t *examples, int n_ex) { double ret = 0.0; struct example_t *ex_set = malloc(sizeof(*ex_set) * n_ex); int i, j, index; int index_attr = get_index_attribute(attribute->property, attributes_set, n_attr_set); for(j = 0; j < attribute->l_tab; ++j) { index = 0; /*ex_set = S_v*/ for(i = 0; i < n_ex; ++i) { if(strncmp(attribute->tab_values[j], HIGHER, sizeof(HIGHER)) == 0) { double down, up, v; get_double_from_string(examples[i].tab_values[index_attr], &v); sscanf(attribute->tab_values[j], "%*c %lf %*c%*c %lf", &down, &up); if(down < v && (v > up || v - up < DBL_EPSILON)) add_example(ex_set, &index, &examples[i]); } else if(strncmp(attribute->tab_values[j], LESS_OR_EQUAL, sizeof(LESS_OR_EQUAL)) == 0) { double up, v; get_double_from_string(examples[i].tab_values[index_attr], &v); sscanf(attribute->tab_values[j], "%*c %lf", &up); if(v > up || v - up < DBL_EPSILON) add_example(ex_set, &index, &examples[i]); } else { if(strcmp(examples[i].tab_values[index_attr], attribute->tab_values[j]) == 0) add_example(ex_set, &index, &examples[i]); } } ret += entropy(ex_set, index)/log((double)index/n_ex); } free(ex_set); return ret*(-log(2)); }
/* in : - examples is the set of examples where elements of the subset lie - n_ex is explicit - len_sebset is a pointer on the length of the subset - index_attribute is the index of the attribute that is checked to create the subset - value is the value of this attribute out : - a tab of examples */ static struct example_t *create_subset_ex_from_attr(const struct example_t *examples, int n_ex, int *len_subset, int index_attribute, string value) { int i; void *tmp_ptr; struct example_t *ret = NULL; *len_subset = 0; for(i = 0; i < n_ex; ++i) if(strcmp(examples[i].tab_values[index_attribute], value) == 0) { tmp_ptr = realloc(ret, (*len_subset+1) * sizeof(*ret)); if(tmp_ptr == NULL) leave_memory_error("create_subset_ex_from_attr"); ret = tmp_ptr; add_example(ret, len_subset, &examples[i]); } return ret; }
/* in : - attribute is the attribute to compute the gain of - examples is the set of examples used to compute entropy - n_ex is explicit out : - the (optimized) gain value */ static double gain(const struct attribute_t *attribute, const struct example_t *examples, int n_ex) { double ret = 0.0; struct example_t *ex_set = malloc(sizeof(*ex_set) * n_ex); int i, j, index; int index_attr = get_index_attribute(attribute->property, attributes_set, n_attr_set); for(j = 0; j < attribute->l_tab; ++j) { index = 0; /*ex_set = S_v*/ for(i = 0; i < n_ex; ++i) { if(strcmp(examples[i].tab_values[index_attr], attribute->tab_values[j]) == 0) add_example(ex_set, &index, &examples[i]); } ret += index*entropy(ex_set, index)/n_ex; } free(ex_set); return ret; }