int count_classes(PCLASSE lc){ if(lc == NULL) return 0; return 1 + count_classes(lc->suiv); }
Dataset cnn_reduce(Dataset ds, int n_neighbors) { int i, j, k, l; int n_classes; int* class_labels = NULL; int* S = malloc(sizeof(int) * ds.n_instances); int* S_copy = malloc(sizeof(int) * ds.n_instances); int* non_S = malloc(sizeof(int) * ds.n_instances); int* last_train_S_size = calloc(ds.n_instances, sizeof(int)); int S_size = 0; int non_S_size = 0; int S_index; int* nearest = malloc(sizeof(int) * ds.n_instances * n_neighbors); int* votes = NULL; int neighbor_majority_class; int neighbor_majority_class_count; bool whole_non_S_classified_correctly = FALSE; Dataset ds_reduced; fill_int_array(nearest, ds.n_instances * n_neighbors, -1); count_classes(ds, &n_classes, &class_labels); votes = malloc(sizeof(int) * n_classes); // Add one random instance from each class to S srand(time(NULL)); for (i = 0; i < n_classes; i++) while (1) { int j = rand() % ds.n_instances; if (ds.y[j] == class_labels[i]) { S[S_size++] = j; break; } } while (!whole_non_S_classified_correctly) { whole_non_S_classified_correctly = TRUE; // copy S to auxiliary array and sort it memcpy(S_copy, S, sizeof(int) * S_size); qsort(S_copy, S_size, sizeof(int), compare_ints); // Find all instances not in S S_index = 0; non_S_size = 0; for (i = 0; i < ds.n_instances; i++) if (S_index == S_size || i < S_copy[S_index]) non_S[non_S_size++] = i; else S_index++; shuffle_ints(non_S_size, non_S); for (i = 0; i < non_S_size; i++) { // update nearest neighbors for non_S[i] for (j = last_train_S_size[non_S[i]]; j < S_size; j++) { for (k = 0; k < n_neighbors; k++) { int* nearest_for_i = nearest + non_S[i] * n_neighbors; if (nearest_for_i[k] < 0) { nearest_for_i[k] = j; break; } if (squared_dist(ds.n_features, ds.X + ds.n_features * nearest_for_i[k], ds.X + ds.n_features * non_S[i]) > squared_dist(ds.n_features, ds.X + ds.n_features * non_S[i], ds.X + ds.n_features * j)) { for (l = n_neighbors - 1; l >= k + 1; l--) nearest_for_i[l] = nearest_for_i[l - 1]; nearest_for_i[k] = j; break; } } } // count votes for non_S[i] memset(votes, 0, n_classes * sizeof(int)); for (j = 0; j < n_neighbors; j++) { int current_neighbor = nearest[non_S[i] * n_neighbors + j]; if (current_neighbor >= 0) { int current_class = -1; for (k = 0; k < n_classes; k++) if (ds.y[current_neighbor] == class_labels[k]) { current_class = k; break; } votes[current_class]++; } else break; } // find out the majority class of non_S[i] neighbor_majority_class = class_labels[0]; neighbor_majority_class_count = votes[0]; for (j = 1; j < n_classes; j++) if (votes[j] > neighbor_majority_class_count) { neighbor_majority_class_count = votes[j]; neighbor_majority_class = class_labels[j]; } // based on the majority class either add non_S[i] to S // or remember the S_size used to classify non_S[i] if (ds.y[non_S[i]] != neighbor_majority_class) { S[S_size++] = non_S[i]; whole_non_S_classified_correctly = FALSE; } else last_train_S_size[non_S[i]] = S_size; } } // form a new dataset with only selected instances ds_reduced = alloc_dataset(ds.n_features, S_size); for (i = 0; i < S_size; i++) { memcpy(ds_reduced.X + ds.n_features * i, ds.X + ds.n_features * S[i], sizeof(flpoint) * ds.n_features); ds_reduced.y[i] = ds.y[S[i]]; } free(class_labels); free(S); free(S_copy); free(non_S); free(nearest); free(last_train_S_size); free(votes); return ds_reduced; }
void writecode_exp(PARBRE arbre, PATT super_fin_env) { char * typeg = NULL; char * typed = NULL; PATT local_fin_env = NULL; char * id = arbre->gauche.S; if(arbre) switch (arbre->op) { case New : { writecodeln("--NEW"); int c=0; writecode("ALLOC "); PCLASSE lc = get_class(arbre->gauche.A->gauche.S); while(lc){ PATT att = lc->lattributs; while(att){ c++; att = att->suiv; } lc = get_class(lc->name_parent); } writecodeiln(1+c); writecodeln("DUPN 1"); writecode("PUSHG "); writecodeiln( (get_class(arbre->gauche.A->gauche.S))->index ); writecodeln("STORE 0"); writecodeln("--NEWEND"); } break; case Bloc : { writecodeln("--BLOC"); local_fin_env = enrichissement_att_environnement(NULL,arbre->droit.lattributs); //Allouer les variables PATT att = arbre->droit.lattributs; int c = 0; while(att){ c++; att = att->suiv; } writecode("PUSHN "); writecodeiln(c); writecode_exp( arbre->gauche.A, local_fin_env ); int i; for(i=0;i<c;i++){ writecodeln("SWAP"); writecodeln("POPN 1"); } desenrichissement_att_environnement(local_fin_env); writecodeln("--BLOCEND"); } break; case Self : writecodeln("PUSHL -1"); //return current_class_name; break; case Id : { int index = get_var_index( super_fin_env, id ); if( current_method == NULL ){ //Main writecode("PUSHL "); writecodeiln(index + count_classes(definedClasses)); }else{ if( index > -1 ){ writecode("PUSHL "); writecodeiln(index); }else{ int indexparam = index_param( current_method , id ); int n = count_params( current_method ); if( indexparam ){ writecode("PUSHL -"); writecodeiln( n + 2 - indexparam ); }else{ int indexatt = index_att( current_class_name , id ); if( indexatt >=0 ){ writecodeln("PUSHL -1"); writecode("LOAD "); writecodeiln(indexatt + 1); } } } } } break; case Fct :{ writecodeln("--APPEL"); writecodeln("PUSHN 1"); //Pour le retour de la fonction //Met self au dessus de la pile writecodeln("PUSHL -1"); PFONC f = arbre->gauche.F; PARG arg = f->largs; int c = 0; while(arg){ writecode_exp( arg->expression, super_fin_env ); writecodeln("SWAP"); c++; arg = arg->suiv; } writecodeln("DUPN 1"); writecodeln("LOAD 0"); writecode("LOAD "); writecodeiln( (get_meth_index( check_type(arbre->gauche.A , NULL, super_fin_env), f->name) ) ); writecodeln("CALL"); writecode("POPN "); writecodeiln( c + 1 ); //Dépile le destinataire et les paramètres writecodeln("--APPELEND"); } break; case Aff: { writecodeln("--AFF"); writecode_exp(arbre->droit.A, super_fin_env); writecodeln("DUPN 1"); if( arbre->gauche.A->op == '.' ){ //C'est le champ d'un objet writecode_exp(arbre->gauche.A->gauche.A, super_fin_env); int indexatt = index_att( check_type( arbre->gauche.A->gauche.A, NULL, super_fin_env ) , arbre->gauche.A->droit.S ); writecodeln("SWAP"); writecode("STORE "); writecodeiln(indexatt + 2); }else { //C'est un id int index = get_var_index( super_fin_env, arbre->gauche.A->gauche.S ); if( current_method == NULL ){ //Main writecode("STOREL "); writecodeiln(index + count_classes(definedClasses)); }else{ if( index > -1 ){ writecode("STOREL "); writecodeiln(index); }else{ int indexparam = index_param( current_method , arbre->gauche.A->gauche.S ); if( indexparam ){ int n = count_params( current_method ); writecode("STOREL -"); writecodeiln( n + 2 - indexparam ); }else{ int indexatt = index_att( current_class_name , arbre->gauche.A->gauche.S ); if( indexatt >= 0 ){ writecodeln("PUSHL -1"); writecodeln("SWAP"); writecode("STORE "); writecodeiln(indexatt + 1); } } } } }writecodeln("--AFFEND"); } break; case ';': if( arbre->droit.A != NULL ){ writecode_exp( arbre->gauche.A, super_fin_env ); writecodeln("POPN 1"); writecode_exp( arbre->droit.A, super_fin_env ); } break; case '.' : { if( arbre->droit.A->op == Id ){ writecode_exp( arbre->gauche.A, super_fin_env ); int index = index_att( check_type( arbre->gauche.A, NULL, super_fin_env ) , arbre->droit.A->gauche.S ); writecode("LOAD "); writecodeiln(index + 2); }else if( arbre->droit.A->op == Fct ){ int imprimer = 0; PFONC f = arbre->droit.A->gauche.F; if( strcmp(f->name,"imprimer")==0 ){ if( strcmp(check_type( arbre->gauche.A, NULL, super_fin_env ),"Entier")==0 ){ writecode_exp( arbre->gauche.A, super_fin_env ); writecodeln("DUPN 1"); writecodeln("WRITEI"); imprimer=1; } if( strcmp(check_type( arbre->gauche.A, NULL, super_fin_env ),"Chaine")==0 ){ writecode_exp( arbre->gauche.A, super_fin_env ); writecodeln("DUPN 1"); writecodeln("WRITES"); imprimer=1; } } if( !imprimer && arbre->gauche.A->op == Super ){ writecodeln("--APPEL"); writecodeln("PUSHN 1"); //Pour le retour de la fonction PARG arg = f->largs; int c = 0; while(arg){ writecode_exp( arg->expression, super_fin_env ); c++; arg = arg->suiv; } writecodeln("PUSHL -1"); int index = get_class(parent_current_class_name)->index; writecode("PUSHG "); writecodeiln(index); writecode("LOAD "); writecodeiln( (get_meth_index( check_type(arbre->gauche.A, NULL ,super_fin_env), f->name) ) ); writecodeln("CALL"); writecode("POPN "); writecodeiln( c + 1 ); //Dépile le destinataire et les paramètres writecodeln("--APPELEND"); }else if(!imprimer){ writecodeln("--APPEL"); writecodeln("PUSHN 1"); //Pour le retour de la fonction writecode_exp( arbre->gauche.A, super_fin_env ); PFONC f = arbre->droit.A->gauche.F; PARG arg = f->largs; int c = 0; while(arg){ writecode_exp( arg->expression, super_fin_env ); writecodeln("SWAP"); c++; arg = arg->suiv; } writecodeln("DUPN 1"); writecodeln("LOAD 0"); writecode("LOAD "); writecodeiln( (get_meth_index( check_type(arbre->gauche.A, NULL, super_fin_env), f->name) ) ); writecodeln("CALL"); writecode("POPN "); writecodeiln( c + 1 ); //Dépile le destinataire et les paramètres writecodeln("--APPELEND"); } } } break; case Cste: writecode("PUSHI "); writecodeiln(arbre->gauche.E); break; case String: writecode("PUSHS "); writecodeln(arbre->gauche.S); break; case '+': writecode_exp(arbre->gauche.A, super_fin_env); writecode_exp(arbre->droit.A, super_fin_env); writecodeln("ADD"); break; case '-': writecode_exp(arbre->gauche.A, super_fin_env); writecode_exp(arbre->droit.A, super_fin_env); writecodeln("SUB"); break; case '*': writecode_exp(arbre->gauche.A, super_fin_env); writecode_exp(arbre->droit.A, super_fin_env); writecodeln("MUL"); break; case '/': writecode_exp(arbre->gauche.A, super_fin_env); writecode_exp(arbre->droit.A, super_fin_env); writecodeln("DIV"); break; case ITE : writecode_exp(arbre->gauche.A, super_fin_env); int lblelse = newlbl(); int lblend = newlbl(); writecode("JZ "); writecodeln(lbl(lblelse)); writecode_exp( arbre->droit.A->gauche.A, super_fin_env ); writecode("JUMP "); writecodeln(lbl(lblend)); writecode(lbl(lblelse)); writecodeln(": NOP"); writecode_exp( arbre->droit.A->droit.A, super_fin_env ); writecode(lbl(lblend)); writecodeln(": NOP"); break; case LT : writecode_exp(arbre->gauche.A, super_fin_env); writecode_exp(arbre->droit.A, super_fin_env); writecodeln("INF"); break; case LE : writecode_exp(arbre->gauche.A, super_fin_env); writecode_exp(arbre->droit.A, super_fin_env); writecodeln("INFEQ"); break; case GT : writecode_exp(arbre->gauche.A, super_fin_env); writecode_exp(arbre->droit.A, super_fin_env); writecodeln("SUP"); break; case GE : writecode_exp(arbre->gauche.A, super_fin_env); writecode_exp(arbre->droit.A, super_fin_env); writecodeln("SUPEQ"); break; case EQ : writecode_exp(arbre->gauche.A, super_fin_env); writecode_exp(arbre->droit.A, super_fin_env); writecodeln("EQUAL"); break; case NEQ : writecode_exp(arbre->gauche.A, super_fin_env); writecode_exp(arbre->droit.A, super_fin_env); writecodeln("EQUAL"); writecodeln("NOT"); break; } }
Dataset fcnn_reduce(Dataset ds, int n_neighbors) { int i, j, k, l; int n_classes; int* class_labels = NULL; int* S = malloc(sizeof(int) * ds.n_instances); int* delta_S = malloc(sizeof(int) * ds.n_instances); int* non_S = malloc(sizeof(int) * ds.n_instances); int S_size = 0; int delta_S_size = 0; int non_S_size = 0; int S_index; int* nearest = malloc(sizeof(int) * ds.n_instances * n_neighbors); int* rep = NULL; int* votes = NULL; int neighbor_majority_class; int neighbor_majority_class_count; Dataset ds_reduced; count_classes(ds, &n_classes, &class_labels); fill_int_array(nearest, ds.n_instances * n_neighbors, -1); delta_S_size = n_classes; find_classes_centroids_in_data(ds, n_classes, class_labels, delta_S); rep = malloc(sizeof(int) * ds.n_instances); votes = malloc(sizeof(int) * n_classes); // main loop while (delta_S_size > 0) { // merge delta_S into S for (i = 0; i < delta_S_size; i++) { S[S_size + i] = delta_S[i]; } S_size += delta_S_size; qsort(S, S_size, sizeof(int), compare_ints); fill_int_array(rep, ds.n_instances, -1); // find instances which are not in S S_index = 0; non_S_size = 0; for (i = 0; i < ds.n_instances; i++) if (S_index == S_size || i < S[S_index]) non_S[non_S_size++] = i; else S_index++; for (i = 0; i < non_S_size; i++) { // find n_neighbors nearest neighbors for X[non_S[i]] // in delta_S for (j = 0; j < delta_S_size; j++) { for (k = 0; k < n_neighbors; k++) { int* nearest_for_i = nearest + non_S[i] * n_neighbors; if (nearest_for_i[k] < 0) { nearest_for_i[k] = delta_S[j]; break; } if (squared_dist(ds.n_features, ds.X + ds.n_features * nearest_for_i[k], ds.X + ds.n_features * non_S[i]) > squared_dist(ds.n_features, ds.X + ds.n_features * non_S[i], ds.X + ds.n_features * delta_S[j])) { // move all farther neighbors to the right for (l = n_neighbors - 1; l >= k + 1; l--) nearest_for_i[l] = nearest_for_i[l - 1]; nearest_for_i[k] = delta_S[j]; break; } } } memset(votes, 0, sizeof(int) * n_classes); // collect votes for their classes from these neighbors for (j = 0; j < n_neighbors; j++) { int current_neighbor = nearest[non_S[i] * n_neighbors + j]; if (current_neighbor >= 0) { int current_class = -1; for (k = 0; k < n_classes; k++) if (class_labels[k] == ds.y[current_neighbor]) { current_class = k; break; } votes[current_class]++; } else break; } // find majority class of these neighbors neighbor_majority_class = class_labels[0]; neighbor_majority_class_count = votes[0]; for (j = 1; j < n_classes; j++) if (votes[j] > neighbor_majority_class_count) { neighbor_majority_class_count = votes[j]; neighbor_majority_class = class_labels[j]; } // if majority class is incorrect (i.e. non_S[i] would // be misclassified by kNN-classifier trained on delta_S) // update representative instance for each neighbor if (ds.y[non_S[i]] != neighbor_majority_class) { for (j = 0; j < n_neighbors; j++) { int current_neighbor = nearest[non_S[i] * n_neighbors + j]; if (current_neighbor >= 0) { if (rep[current_neighbor] < 0 || squared_dist(ds.n_features, ds.X + ds.n_features * current_neighbor, ds.X + ds.n_features * non_S[i]) < squared_dist(ds.n_features, ds.X + ds.n_features * current_neighbor, ds.X + ds.n_features * rep[current_neighbor]) ) rep[current_neighbor] = non_S[i]; } else break; } } } // refill delta_S again delta_S_size = 0; for (i = 0; i < S_size; i++) { bool instance_in_delta_S = FALSE; for (j = 0; j < delta_S_size; j++) if (rep[S[i]] == delta_S[j]) { instance_in_delta_S = TRUE; break; } if (rep[S[i]] >= 0 && !instance_in_delta_S) delta_S[delta_S_size++] = rep[S[i]]; } } // form a new dataset with only selected instances ds_reduced = alloc_dataset(ds.n_features, S_size); for (i = 0; i < S_size; i++) { memcpy(ds_reduced.X + ds.n_features * i, ds.X + ds.n_features * S[i], sizeof(flpoint) * ds.n_features); ds_reduced.y[i] = ds.y[S[i]]; } free(class_labels); free(S); free(delta_S); free(non_S); free(nearest); free(rep); free(votes); return ds_reduced; }