bool DataTypeClassNameParser::Parser::read_one(std::string* name_and_args) { std::string name; get_next_name(&name); std::string args; if (!read_raw_arguments(&args)) { return false; } *name_and_args = name + args; return true; }
void get_type_name(att_info attribute, FILE *fp) { while(terminator!='|'&&terminator!='\n'&&terminator!=EOF) { terminator = getc(fp); } if(terminator=='|'&&get_next_name(fp,1,1)&&!strcmp(name,"type") &&get_next_name(fp,1,1)) { attribute->type_name = copy_string(name); } else { printf("Attribute %s lacks type info\n", attribute->name); exit(1); } return; }
main(int argc, char *argv[]) { extern char *optarg; extern int optind; int o; int verbosity=0; char *filestem=NULL, *filename; FILE *fp, *tp; char **class_names=NULL; int n_class_names; char ***cases; int n_cases; char ***testing_cases; int n_testing_cases; att_info *attributes, attribute; int n_attributes; type_info *types, type; int n_types; int i, j, k; int c; /* Initialise the name char array */ name = (char *) malloc(MAXNAMELENGTH*sizeof(char)); /* Option handling */ while ( (o=getopt(argc,argv,"vf:")) != -1 ) { switch (o) { case 'f': filestem = copy_string(optarg); break; case 'v': verbosity++; break; case '?': printf("Option error in c4 to FOIL conversion\n"); exit(1); } } if(!filestem) { printf("Must enter filestem: -f filestem\n"); exit(1); } /* Extract the class and attribute/type information from the .names file */ filename = (char*) malloc((strlen(filestem)+7)*sizeof(char)); sprintf(filename,"%s.names",filestem); fp = fopen(filename,"r"); if(!fp) { printf("Can't open %s\n", filename); exit(1); } if(verbosity) printf("Reading from %s\n", filename); /* Get class names */ n_class_names = 0; class_names = (char**) malloc(11*sizeof(char*)); do { if(!get_next_name(fp,1,1)) { printf("Problem while reading class names\n"); exit(1); } if(n_class_names&&!(n_class_names%10)) class_names = (char**) realloc((void*)class_names, (n_class_names+11)*sizeof(char*)); class_names[n_class_names++] = copy_string(name); }while(terminator==','); if(verbosity) { printf("Have read class names:"); for(i=0;i<n_class_names;i++) { printf(" %s",class_names[i]); } printf("\n"); } /* Get attribute info */ n_attributes = 0; attributes =(att_info*)malloc(11*sizeof(att_info)); while(get_next_name(fp,1,1)) { if(terminator!=':') { printf("Attribute %s, not followed by :\n", name); exit(1); } if(n_attributes&&(n_attributes%10==0)) attributes = (att_info*) realloc((void*)attributes, (n_attributes+11)*sizeof(att_info)); attributes[n_attributes] = (att_info)malloc(sizeof(struct _att_info)); attribute = attributes[n_attributes]; attribute->name = copy_string(name); if(!get_next_name(fp,0,1)) { printf("Information missing for attribute %s\n", name); exit(1); } if(!strcmp(name,"ignore")) { attribute->ignore = 1; attribute->discrete = 0; if(terminator!='\n') SkipComment; } else if(!strcmp(name,"continuous")) { attribute->ignore = 0; attribute->discrete = 0; attribute->discrete_n = 0; attribute->n_values = 0; attribute->values = NULL; get_type_name(attribute,fp); } else if(!strncmp(name,"discrete",8)) { attribute->ignore = 0; attribute->discrete = 1; attribute->discrete_n = atoi(name+8); attribute->n_values = 0; attribute->values=(char**)malloc(attribute->discrete_n* sizeof(char*)); if(attribute->discrete_n && (attribute->values==NULL)) { printf("Problem allocating space for values of %s\n", attribute->name); exit(1); } get_type_name(attribute,fp); } else /* Presume that this is discrete with specified values */ { attribute->ignore = 0; attribute->discrete = 1; attribute->discrete_n = 0; attribute->n_values = 0; attribute->values = (char**)malloc(11*sizeof(char*)); do { if(attribute->n_values&&!(attribute->n_values%10)) attribute->values = (char**) realloc( (void*)attribute->values, (attribute->n_values+11)*sizeof(char*)); attribute->values[attribute->n_values++] = copy_string(name); if(terminator!=',') break; } while(get_next_name(fp,0,1)); get_type_name(attribute,fp); } n_attributes++; } fclose(fp); /* Read the .data file, extracting constant occurrence info */ for(i=0;i<n_attributes;i++) { attribute = attributes[i]; if(attribute->discrete) if(attribute->n_values) attribute->value_occurs_tr = (int*) calloc((size_t)attribute->n_values,sizeof(int)); else attribute->value_occurs_tr = (int*) calloc((size_t)attribute->discrete_n,sizeof(int)); } sprintf(filename,"%s.data",filestem); fp = fopen(filename,"r"); if(!fp) { printf("Can't open %s\n", filename); exit(1); } if(verbosity) printf("Reading from %s\n", filename); cases = (char***)malloc(101*sizeof(char**)); n_cases = 0; while(get_next_name(fp,1,attributes[0]->discrete)) { if(n_cases&&!(n_cases%100)) cases = (char***) realloc((void*)cases, (n_cases+101)*sizeof(char**)); cases[n_cases] = (char**)malloc((n_attributes+1)*sizeof(char*)); i = 0; do { attribute = attributes[i]; if(attribute->ignore) { } else if(!strcmp(name,"?")) { cases[n_cases][i] = NULL; } else if(attribute->discrete) { for(j=0;j<attribute->n_values;j++) { if(!strcmp(name,attribute->values[j])) break; } if(j==attribute->n_values) /* value not seen before */ { if(j>=attribute->discrete_n) { printf("%s has extra value %s in data file\n", attribute->name, name); exit(1); } else /* add it in */ { attribute->values[j] = copy_string(name); attribute->n_values++; } } attribute->value_occurs_tr[j]++; cases[n_cases][i] = attribute->values[j]; } else /* continuous attribute with value */ { cases[n_cases][i] = copy_string(name); } i++; } while(get_next_name(fp,1,(i==n_attributes) ? 1 : attributes[i]->discrete) &&(i<n_attributes)); if(i!=n_attributes) { printf("Problem reading line in %s\n", filename); exit(1); } for(j=0;j<n_class_names;j++) { if(!strcmp(name,class_names[j])) break; } if(j==n_class_names) { printf("Undeclared class name %s in data file\n", name); exit(1); } cases[n_cases][i] = class_names[j]; n_cases++; } fclose(fp); /* Read the .test file, extracting constant existence info */ sprintf(filename,"%s.test",filestem); fp = fopen(filename,"r"); if(fp&&verbosity) printf("Reading from %s\n", filename); testing_cases = (char***)malloc(101*sizeof(char**)); n_testing_cases = 0; while(fp&&get_next_name(fp,1,attributes[0]->discrete)) { if(n_testing_cases&&!(n_testing_cases%100)) testing_cases = (char***) realloc((void*)testing_cases, (n_testing_cases+101)*sizeof(char**)); testing_cases[n_testing_cases] = (char**)malloc((n_attributes+1)* sizeof(char*)); i = 0; do { attribute = attributes[i]; if(attribute->ignore) { } else if(!strcmp(name,"?")) { testing_cases[n_testing_cases][i] = NULL; } else if(attribute->discrete) { for(j=0;j<attribute->n_values;j++) { if(!strcmp(name,attribute->values[j])) break; } if(j==attribute->n_values) /* value not seen before */ { if(j>=attribute->discrete_n) { printf("%s has extra value %s in data file\n", attribute->name, name); exit(1); } else /* add it in */ { attribute->values[j] = copy_string(name); attribute->n_values++; } } testing_cases[n_testing_cases][i]=attribute->values[j]; } else /* continuous attribute with value */ { testing_cases[n_testing_cases][i] = copy_string(name); } i++; } while(get_next_name(fp,1,(i==n_attributes)? 1 : attributes[i]->discrete) &&(i<n_attributes)); if(i!=n_attributes) { printf("Problem reading line in %s\n", filename); exit(1); } for(j=0;j<n_class_names;j++) { if(!strcmp(name,class_names[j])) break; } if(j==n_class_names) { printf("Undeclared class name %s in data file\n", name); exit(1); } testing_cases[n_testing_cases][i] = class_names[j]; n_testing_cases++; } /* Now combine the attribute information into type information */ n_types = 0; types = (type_info*) malloc(11*sizeof(type_info)); for(i=0;i<n_attributes;i++) { attribute = attributes[i]; if(attribute->ignore) continue; for(j=0;j<n_types;j++) { if(!strcmp(types[j]->name,attribute->type_name)) break; } attribute->type_number = j; if(j==n_types) /* New type */ { if(n_types&&!(n_types%10)) types = (type_info*) realloc((void*)types, (n_types+11)*sizeof(type_info)); types[n_types] = (type_info)malloc(sizeof(struct _type_info)); types[n_types]->name = attribute->type_name; types[n_types]->discrete = attribute->discrete; types[n_types]->n_atts = 0; types[n_types]->atts = (int*)malloc(11*sizeof(int)); types[n_types]->n_values = 0; if(types[n_types]->discrete) { types[n_types]->values = (char**)malloc(11*sizeof(char*)); types[n_types]->value_occurs_tr =(int*)malloc(11*sizeof(int*)); } else { types[n_types]->values = NULL; } n_types++; } type = types[j]; if(type->n_atts&&!(type->n_atts%10)) type->atts = (int*) realloc((void*)type->atts, (type->n_atts+11)*sizeof(int)); type->atts[type->n_atts++] = i; if(type->discrete!=attribute->discrete) { printf("Type %s declared to be both discrete and continuous\n", type->name); exit(1); } if(type->discrete) { for(j=0;j<attribute->n_values;j++) { for(k=0;k<type->n_values;k++) { if(!strcmp(attribute->values[j],type->values[k])) break; } if(k==type->n_values) /* New value for this type */ { if(type->n_values&&!(type->n_values%10)) { type->values = (char**) realloc((void*)type->values, (type->n_values+11)*sizeof(char*)); type->value_occurs_tr =(int*)realloc( (void*)type->value_occurs_tr, (type->n_values+11)*sizeof(int*)); } type->values[k] = attribute->values[j]; type->value_occurs_tr[k] = 0; type->n_values++; } type->value_occurs_tr[k] += attribute->value_occurs_tr[j]; } } } /* Check the discrete type names to ensure that none is the prefix of another */ for(i=0;i<n_types;i++) { if(!type->discrete) continue; for(j=i+1;j<n_types;j++) { if(!type->discrete) continue; if((int)strlen(types[i]->name)>(int)strlen(types[j]->name)) { if(!strncmp(types[i]->name,types[j]->name, strlen(types[j]->name))) { printf("Type name %s is prefix of type name %s\n", types[j]->name, types[i]->name); exit(1); } } else { if(!strncmp(types[j]->name,types[i]->name, strlen(types[i]->name))) { printf("Type name %s is prefix of type name %s\n", types[i]->name, types[j]->name); exit(1); } } } } /* Output type info on std out */ if(verbosity) { printf("Type information from names file:\n\n"); for(i=0;i<n_types;i++) { type = types[i]; printf("%s:\n",type->name); if(type->discrete) printf("\tdiscrete\n"); else printf("\tcontinuous\n"); printf("\tattributes:\n"); for(j=0;j<type->n_atts;j++) printf("\t\t%s\n",attributes[type->atts[j]]->name); printf("\tvalues:\n"); for(j=0;j<type->n_values;j++) printf("\t\t%s\n",type->values[j]); } } /* Now write out to the .d file for FOIL */ sprintf(filename,"%s.d",filestem); tp = fopen(filename,"w"); if(verbosity) printf("Writing to %s\n", filename); /* First the types complete with constants - note that in the .d file each discrete constant name is augmented by being preceded by its type name to prevent FOIL equating two constants of different types */ for(i=0;i<n_types;i++) { type = types[i]; if(type->discrete) { fprintf(tp,"#%s: ",type->name); k = 0; for(j=0;j<type->n_values;j++) { if(k) fprintf(tp,", "); if(type->value_occurs_tr[j]) fprintf(tp,"*"); /* Theory Constant */ fprintf(tp,"%s%s",type->name, type->values[j]); k++; } if(!k) fprintf(tp,"\n"); /* empty type */ fprintf(tp,".\n"); } else fprintf(tp,"%s: continuous.\n",type->name); } fprintf(tp,"\n"); /* Now the sole relation - is_first_named_class() */ fprintf(tp,"is_%s(", class_names[0]); k = 0; for(i=0;i<n_attributes;i++) { attribute = attributes[i]; if(attribute->ignore) continue; if(k) fprintf(tp,","); fprintf(tp,"%s",attribute->type_name); k++; } if(!k) { printf("All attributes ignored\n"); exit(1); } fprintf(tp,")\n"); /* Now the positive training cases */ for(i=0;i<n_cases;i++) { if(cases[i][n_attributes]!=class_names[0]) continue; k = 0; for(j=0;j<n_attributes;j++) { attribute = attributes[j]; if(attribute->ignore) continue; if(k) fprintf(tp,","); if(!cases[i][j]) /* Missing Value */ { fprintf(tp,"?"); } else if(attribute->discrete) { fprintf(tp,"%s%s",attribute->type_name,cases[i][j]); } else /* attribute is continuous */ { fprintf(tp,"%s",cases[i][j]); } k++; } fprintf(tp,"\n"); } /* Now the negative training cases */ fprintf(tp,";\n"); for(i=0;i<n_cases;i++) { if(cases[i][n_attributes]==class_names[0]) continue; k = 0; for(j=0;j<n_attributes;j++) { attribute = attributes[j]; if(attribute->ignore) continue; if(k) fprintf(tp,","); if(!cases[i][j]) /* Missing Value */ { fprintf(tp,"?"); } else if(attribute->discrete) { fprintf(tp,"%s%s",attribute->type_name,cases[i][j]); } else /* attribute is continuous */ { fprintf(tp,"%s",cases[i][j]); } k++; } fprintf(tp,"\n"); } fprintf(tp,".\n"); /* Now the test cases */ if(!fp) { fclose(tp); exit(0); } fprintf(tp,"\nis_%s\n", class_names[0]); for(i=0;i<n_testing_cases;i++) { k = 0; for(j=0;j<n_attributes;j++) { attribute = attributes[j]; if(attribute->ignore) continue; if(k) fprintf(tp,","); if(!testing_cases[i][j]) /* Missing Value */ { fprintf(tp,"?"); } else if(attribute->discrete) { fprintf(tp,"%s%s",attribute->type_name,testing_cases[i][j]); } else /* attribute is continuous */ { fprintf(tp,"%s",testing_cases[i][j]); } k++; } if(testing_cases[i][n_attributes]==class_names[0]) fprintf(tp,":+\n"); else fprintf(tp,":-\n"); } fprintf(tp,".\n"); fclose(fp); fclose(tp); return 0; /* Changed from exit(0) */ }