static void test_count_features( void **state ) { UNUSED( state ); instructions_capabilities *ins_cap = ( instructions_capabilities * ) xcalloc( 1, sizeof( *ins_cap ) ); uint16_t feature_len; feature_len = count_features( ( void * ) ins_cap, sizeof( *ins_cap ) ); assert_int_equal( feature_len, 0 ); // test setting all the attributes to true memset( ins_cap, 1, sizeof( *ins_cap ) ); feature_len = count_features( ( void * ) ins_cap, sizeof( *ins_cap ) ); assert_int_equal( feature_len, 6 ); // test setting the first and last attribute memset( ins_cap, 0, sizeof( *ins_cap ) ); ins_cap->meter = true; ins_cap->goto_table = true; feature_len = count_features( ( void * ) ins_cap, sizeof( *ins_cap ) ); assert_int_equal( feature_len, 2 ); // test some attributes found around the middle of the structure. memset( ins_cap, 0, sizeof( *ins_cap ) ); ins_cap->clear_actions = true; ins_cap->write_actions = true; ins_cap->write_metadata = true; feature_len = count_features( ( void * ) ins_cap, sizeof( *ins_cap ) ); assert_int_equal( feature_len, 3 ); xfree( ins_cap ); }
//counts total number of features int count_features(feature_tree *root){ int count = 0; if (root == NULL){ return 0; } count += 1 + count_features(root->left) + count_features(root->right); return count; }
static void test_assign_action_ids( void **state ) { UNUSED( state ); actions_capabilities *ac_cap = ( actions_capabilities * ) xmalloc( sizeof( *ac_cap ) ); uint16_t feature_len; memset( ac_cap, 1, sizeof( *ac_cap ) ); ac_cap->drop = false; feature_len = count_features( ( void * ) ac_cap, sizeof( *ac_cap ) ); struct ofp_action_header *ac_hdr = ( struct ofp_action_header * ) xmalloc( feature_len * sizeof( *ac_hdr ) ); uint16_t total_len = assign_action_ids( ac_hdr, ac_cap ); for ( uint16_t i = 0; i < feature_len; i++ ) { assert_action_ids( &ac_hdr[ i ], ac_cap ); } assert_int_equal( total_len, feature_len * sizeof( struct ofp_action_header ) ); xfree( ac_hdr ); xfree( ac_cap ); }
static void test_assign_instruction_ids( void **state ) { UNUSED( state ); instructions_capabilities *ins_cap = ( instructions_capabilities * ) xmalloc( sizeof( *ins_cap ) ); uint16_t feature_len; // test setting all the instructions memset( ins_cap, 1, sizeof( *ins_cap ) ); feature_len = count_features( ( void * ) ins_cap, sizeof( *ins_cap ) ); // allocate space for all ofp_instruction struct ofp_instruction *instructions = ( struct ofp_instruction * ) xmalloc( feature_len * sizeof( *instructions ) ); uint16_t total_len = assign_instruction_ids( instructions, ins_cap ); for ( uint16_t i = 0; i < feature_len; i++) { assert_instruction( &instructions[ i ], ins_cap ); } assert_int_equal( total_len, feature_len * sizeof( struct ofp_instruction ) ); xfree( instructions ); xfree( ins_cap ); }
int main (int argc, char **argv) { struct arguments arguments; /* Parse our arguments; every option seen by parse_opt will be reflected in arguments. */ argp_parse (&argp, argc, argv, 0, 0, &arguments); // number of nearest neighbors int k; k = 1; //default is 1 if (sscanf (arguments.args[0], "%i", &k)!=1) {} //omp vars int num_threads; num_threads = 4; if (sscanf(arguments.args[1], "%i", &num_threads)!=1) {} //verbose? int verbose; verbose = arguments.verbose; if (verbose>0 && verbose<130){ verbose = 1; } else{ verbose = 0; } //define a bunch of counters! int i, j, m, n, ii, jj, kk; //number of examples to read in int total_examples = 10000; // int total_examples = 19; //max words per question int num_words = 300; //max word length int max_word_len = 20; //max vocab count // int max_vocab = 200000; //data read in poorly int bad_iter = 0; //Used to split into training and testing data (will train on example_num%train) int train = 10; //Debug int debug = 0; printf("k, Verbose, num_threads = %i, %i, %i\n", k, verbose, num_threads); //Allocate space for data being read in with fgets char *csv_line = malloc(sizeof(char)*1500); //store all data //array of structs //struct.question->array of char* //struct.cat->char* //struct.example_num->int struct data *all_data; all_data = malloc(sizeof(struct data)*total_examples); for (ii=0; ii<total_examples; ii++){ all_data[ii].question = malloc(sizeof(char*)*num_words); for (jj=0; jj<num_words; jj++){ // all_data[ii].question[jj] = malloc(sizeof(char)*max_word_len); all_data[ii].question[jj] = calloc(max_word_len, sizeof(char)); } all_data[ii].cat = malloc(sizeof(char)*max_word_len); } //store numeric version of data for algorithms struct numeric_data *num_data; num_data = malloc(sizeof(struct numeric_data)*total_examples); for (ii=0; ii<total_examples; ii++){ num_data[ii].array_of_features = malloc(sizeof(struct feature_count)*num_words); for (jj=0; jj<num_words; jj++){ num_data[ii].array_of_features[jj].feature_num = 0; num_data[ii].array_of_features[jj].count = 0; } } //store struct which keep track of the k nearest neighbors // struct distance_results results; // results.example_num = 0; // results.distances = calloc(k, sizeof(double)); // results.cat = calloc(k, sizeof(int)); // results.example_nums = calloc(k, sizeof(int)); // //struct used to calculate the mode of the k nearest neighbors // struct mode mod; // mod.count = calloc(k, sizeof(int)); // mod.cat = calloc(k, sizeof(int)); // //store vocabulary list (char** points to array of char* of length 20) // char **word_list; // word_list = malloc(sizeof(char*)*max_vocab); //assumes max_vocab total vocab // for (ii=0; ii<max_vocab; ii++){ // // word_list[ii] = malloc(sizeof(char)*max_word_len); //assumes max word length of 20 // word_list[ii] = calloc(max_word_len, sizeof(char)); //assumes max word length of 20 // } //alternate vocab store tree feature_tree *vocab; vocab = NULL; //store category list char **cat_list; cat_list = malloc(sizeof(char*)*40); //assumes 20 max categories for (ii=0; ii<40; ii++){ cat_list[ii] = malloc(sizeof(char)*max_word_len); strncpy(cat_list[ii], "\0", 1); } //Read in csv file FILE *f = fopen("train_pruned2.csv", "r"); if (f == NULL){ printf("Failed to open file \n"); return -1; } //parse question into individual words, create vocabulary list int vocab_count = 0; int category_count = 1; for (i=0; i<total_examples; i++){ // printf("Iteration = %i\n", i); //line in csv to buffer if (fgets(csv_line, 1500, f) == NULL){ printf("Fgets error!\n"); exit(0); } //csv line to 3 individual parts if (i>0) { char *tok; char *tok_copy; //problem with tok getting overwritten in parse_question // char **parsed_question = malloc(sizeof(char*)*num_words); // printf("CSV_LINE = %s\n", csv_line); tok = strtok(csv_line, "|"); if (tok == NULL){ // all_data[i-bad_iter-1].example_num = -1; bad_iter++; // i--; continue; } sscanf(tok, "%i", &all_data[i-bad_iter-1].example_num); tok = strtok(NULL, "|"); if (tok == NULL){ // all_data[i-bad_iter-1].example_num = -1; bad_iter++; // i--; continue; } tok_copy = (char *)tok; tok = strtok(NULL, "|"); if (tok == NULL){ // all_data[i-bad_iter-1].example_num = -1; bad_iter++; // i--; continue; } strncpy(all_data[i-bad_iter-1].cat, tok, 19); all_data[i-bad_iter-1].cat[max_word_len-1] = 0; char *tok2; tok2 = strtok(tok_copy, " \t"); j = 0; if ((tok2 != NULL) && (strlen(tok2)>3)){ strncpy(all_data[i-bad_iter-1].question[0], tok2, 19); all_data[i-bad_iter-1].question[0][max_word_len-1] = 0; //add to tree if not test data // if (all_data[i-bad_iter-1].example_num % train != 0){ insert_word(&vocab, all_data[i-bad_iter-1].question[0]); j += 1; // } } while (tok2 != NULL){ if (j>=num_words){ break; } tok2 = strtok(NULL, " \t"); if ((tok2 != NULL) && (strlen(tok2)>3)){ strncpy(all_data[i-bad_iter-1].question[j], tok2, 19); all_data[i-bad_iter-1].question[j][max_word_len-1] = 0; //add to tree if not test data // if (all_data[i-bad_iter-1].example_num % train != 0){ insert_word(&vocab, all_data[i-bad_iter-1].question[j]); j++; // } } } //end while // all_data[i-bad_iter-1] = instance; // print_data(&all_data[i-bad_iter-1]); ////add to vocabulary (using array, VERY slow with lots of data) // add_to_word_list(all_data[i-bad_iter-1].question, word_list, &vocab_count); //add to category list add_to_cat_list(all_data[i-bad_iter-1].cat, cat_list, &category_count); } //end if } //end for //close file fclose(f); //assign unique number to each feature //first feature is feature 1, feature 0 is for errors etc. unsigned int mm = 1; number_features(vocab, &mm); //Some of the csv rows aren't read in properly with fgets printf("Bad iterations = %i/%i\n", bad_iter, i); printf("Feature count = %i\n", count_features(vocab)); // print_inorder(vocab); // for (ii=0; ii<40; ii++){ // printf("%s", cat_list[ii]); // } ////turn data into numeric features//// for (i=0; i<total_examples; i++){ num_data[i].example_num = all_data[i].example_num; num_data[i].cat = get_cat_index(cat_list, all_data[i].cat); words_to_num(&num_data[i], &all_data[i], &vocab, num_words); // count_features2(&num_data[i]); } // num_data->array_of_features[0].feature_num = 44; // print_num_data(&num_data[0]); // print_num_data(&num_data[1]); total_examples = total_examples-bad_iter-1; int sadfjh; double av_feature_count = 0; for (ii=0; ii<total_examples; ii++){ sadfjh = count_features2(&num_data[ii]); av_feature_count += sadfjh; // printf("%i ", sadfjh); } // printf("\n av_feature_count %f\n", av_feature_count/(total_examples-bad_iter-1)); // print_num_data(&num_data[4464]); // printf("vocab->right = %s \n", vocab->feature); // print_data(&all_data[0]); // print_data(&all_data[29000]); // printf("%s, %u\n", "1829", get_feature_number(&vocab, "1829")); //find the distance between first example and rest double distance; //range each process will cover int range; // printf("%i, %i\n", range, total_examples); // printf("R, Min, Max = %i, %i, %i\n", rank, rank*range, (rank+1)*range); // struct distance_results results; // results.example_num = 0; // results.distances = calloc(k, sizeof(double)); // results.cat = calloc(k, sizeof(int)); // results.example_nums = calloc(k, sizeof(int)); // //struct used to calculate the mode of the k nearest neighbors // struct mode mod; // mod.count = calloc(k, sizeof(int)); // mod.cat = calloc(k, sizeof(int)); //correct/total/answer int c = 0; int total = 0; int answer; omp_set_dynamic(0); //Explicitly disable dynamic teams omp_set_num_threads(num_threads); //Specify thread count #pragma omp parallel \ private(kk, ii, distance, answer) \ reduction(+:c,total) \ shared(num_data) { //store struct which keep track of the k nearest neighbors struct distance_results results; results.example_num = 0; results.distances = calloc(k, sizeof(double)); results.cat = calloc(k, sizeof(int)); results.example_nums = calloc(k, sizeof(int)); //struct used to calculate the mode of the k nearest neighbors struct mode mod; mod.count = calloc(k, sizeof(int)); mod.cat = calloc(k, sizeof(int)); #pragma omp for for (kk=0; kk<total_examples; kk++){ // printf("Thread = %i, Iter = %i, c = %i, total=%i\n", omp_get_thread_num(), kk, c, total); //only test on test data if (num_data[kk].example_num%train != 0){ continue; } if (num_data[kk].cat == 0){ continue; } results.correct_answer = num_data[kk].cat; results.example_num = num_data[kk].example_num; for (ii=0; ii<k; ii++){ results.distances[ii] = 0; results.cat[ii] = 0; mod.count[ii] = 0; mod.cat[ii] = 0; } // print_num_data(&num_data[kk]); //calc distance to neighbors for (ii=0; ii<total_examples-1; ii++){ //don't calc distance to self if (kk != ii){ //Eliminate bad data (examples with few words tend to have low distances //reguardless of whether they are more similar... if (num_data[ii].total_features >= 40){ distance = get_distance(&num_data[kk], &num_data[ii], num_words); // if (distance < 2){ // continue; // } // printf("%f ", distance); if (num_data[ii].example_num > 0){ add_distance_to_results(&results, distance, k, num_data[ii].cat, num_data[ii].example_num); } } } } answer = calc_nearest_neighbor(&results, &mod, k); if (answer == results.correct_answer){ c += 1; } // printf("\n"); // for (ii=0; ii<k; ii++){ // printf("Distance, cat, example_num1, example_num2 = %2.2f, %i, %i, %i\n", // results.distances[ii], results.cat[ii], results.example_num, results.example_nums[ii]); // } // else{ // } total += 1; if (verbose>0 && debug>0){ printf("Thread = %i, Correct/Total = %i/%i Answer/Correct = %i/%i\n", omp_get_thread_num(), c, total, answer, results.correct_answer); } } //Thread results #pragma omp barrier if (omp_get_thread_num() == 0){ printf("/// Thread Results ///\n"); } #pragma omp barrier printf("Thread = %i, Correct/Total = %i/%i\n", omp_get_thread_num(), c, total); //free distance result free(results.distances); free(results.cat); //free mode struct free(mod.count); free(mod.cat); } printf("/// Final Results ///\n"); printf("Correct/Total = %i/%i\n", c, total); // printf("verbose = %i", verbose); ////free malloc calls//// //free feature tree free_feature_tree(vocab); //free numeric data for (ii=0; ii<total_examples; ii++){ free(num_data[ii].array_of_features); } free(num_data); // //free vocab list // for (ii=0; ii<max_vocab; ii++){ // free(word_list[ii]); // } // free(word_list); //free category list for (ii=0; ii<40; ii++){ free(cat_list[ii]); } free(cat_list); //free all_data list for (ii=0; ii<total_examples; ii++){ for (jj=0; jj<num_words; jj++){ free(all_data[ii].question[jj]); } free(all_data[ii].question); free(all_data[ii].cat); } free(all_data); //free var used to rean in csv free(csv_line); }