//Insert a word into the tree void insert_word(feature_tree **root, char *word){ if (word == NULL){ return; } if (strlen(word)<4){ return; } if (!(*root)){ feature_tree *temp = NULL; temp = (feature_tree *)malloc(sizeof(feature_tree)); temp->feature_num = 0; temp->feature = malloc(sizeof(char)*20); strcpy(temp->feature, word); temp->left = NULL; temp->right = NULL; *root = temp; // printf("root feat = %s\n", root.feature); return; } // printf("%s, %s\n", root->feature, word); //less than goes left if (strcmp(word, (*root)->feature) < 0){ insert_word(&(*root)->left, word); } //greater than goes right else if (strcmp(word, (*root)->feature) > 0){ insert_word(&(*root)->right, word); } }
END_TEST START_TEST (ut_graph_load_dictionary) { init_node(graph); FILE *fp = fopen("/usr/share/dict/words", "r"); wchar_t word[1000]; while (fgetws(word, 80, fp)) { word[wcslen(word)-1] = L'\0'; insert_word(graph, word); } rewind(fp); while (fgetws(word, 80, fp)) { word[wcslen(word)-1] = L'\0'; ck_assert_msg(find_word(graph, word) == 1, "Can't find word %ls in full dictionary test\n", word); } fclose(fp); clear_node(graph); }
bool fit_word( word_search_t *ws, wchar_t *word ) { int fit, best_fit; int length; position_t position, best_position; length = wcslen(word); // Start at a random location looking for places to fit the word position_create_random( ws, &position); best_fit = -1; do { fit = fit_word_score( ws, word, &position ); if( fit > best_fit ) { if( best_fit >= 0 ) { position_free( &best_position ); } best_fit = fit; position_copy( &best_position, &position ); if( best_fit == length ) { break; } } } while(position_iterate( &position ) ); if( best_fit >= 0 ) { insert_word( ws, word, &best_position ); position_free( &best_position ); } position_free( &position ); return best_fit >= 0; }
void load_words(unsigned char filter[], char *filename) { FILE *fp = fopen(filename, "r"); char word[WORD_BUF_SIZE + 1]; if (!fp) { err("[e] %s: can't open file \"%s\"\n", __FUNCTION__, filename); } while (fgets(word, WORD_BUF_SIZE, (FILE *) fp)) { int len = strlen(word); if (len == WORD_BUF_SIZE - 1) { err("[e] WORD_BUF_SIZE is small\n"); } word[strcspn(word, "\n")] = 0; len--; IF_CNT insert_word(filter, word); } fclose(fp); }
int main (void) { char s[21], /* a temp word variable */ table[200][21]; /* the table of words */ int ns[200]; /* array of occurences */ int n; /* count variable */ for(n=0;n<199;n++) ns[n]=1; /* set up the array */ n=0; do { get_word(s); /* input a word from the inout */ if (!strcmp(s,"* no more *")) { /* If it is the end of the file : */ print_table(sort(table,ns,n),n,ns); return(0); } convert_word(s); /* make the word loower case */ if (!lookup(s,table,n,ns)) { /* word is not in the table yet */ insert_word(s,table,n); n++; } } while (1); /* wont quit until the end of file is reached */ }
END_TEST START_TEST (ut_graph_load_dictionary_prefix_search) { init_node(graph); FILE *fp = fopen("/usr/share/dict/words", "r"); wchar_t word[1000]; while (fgetws(word, 80, fp)) { word[wcslen(word)-1] = L'\0'; printf("%ls\n", word); insert_word(graph, word); } fclose(fp); PrefixResult *result = search_prefix(graph, L"app"); while(result != NULL) { printf("%ls\n", result->word); result = result->next; } clear_node(graph); }
void write_data(){ struct Lnode *actual_node = NULL; struct Lnode *next_node = NULL; struct Lnode *ptr = NULL; int i, *Outputs; uint32_t value; clear_vector(inputs_vector_state, 14, 0); clear_vector(written, 4, 0); inputs_vector_state[7] = 1; inputs_vector_values[7] = 11; store_count = 0; store_values_r3[0] = 0; store_values_r3[1] = SH_HEX; store_values_r3[2] = BIN_HEX; store_values_r4[0] = key_instructions.r_w_addr + 8; store_values_r4[1] = key_instructions.r_w_addr + 4; store_values_r4[2] = key_instructions.r_w_addr; for(actual_node = payload->tail; actual_node != NULL; actual_node = actual_node->prev){ next_node = actual_node->prev; if(next_node == NULL) break; for(ptr = (GETPOINTER(next_node, payload_gadget_t))->gadget; ptr != NULL && ptr->next != NULL; ptr = ptr->next); //'ret' Outputs = (GETPOINTER(ptr, Gadget_t))->Outputs; if(Outputs[0] && !written[0]){ written[0] = 1; inputs_vector_state[0] = 1; inputs_vector_values[0] = key_instructions.r_w_addr; } if(Outputs[1] && !written[1]){ written[1] = 1; inputs_vector_state[1] = 1; inputs_vector_values[1] = key_instructions.r_w_addr + 8; } if(Outputs[2] && !written[2]){ written[2] = 1; inputs_vector_state[2] = 1; inputs_vector_values[2] = 0; } for(i = 14; i >= 0; i--){ if(Outputs[i]){ value = getValue(i); actual_node = insert_word(value, actual_node); } } if(Outputs[7] && !written[3]){ written[3] = 1; inputs_vector_values[7] = WORD_PADDING; } process_inputs(GETPOINTER(next_node, payload_gadget_t)->gadget); } }
void ddfs_index_update(Fingerprint* finger, ContainerId id) { db_insert_fingerprint(finger, id); insert_word(filter, (char*) finger, sizeof(Fingerprint)); dirty = TRUE; index_write_times++; index_write_entry_counter++; }
void *words( FILE *infile ) { //dict_t *wd = NULL; char wordbuf[MAXWORD]; while( get_word( wordbuf, MAXWORD, infile ) ) { d = insert_word(d, wordbuf); // add to dict } //return wd; }
int main(int argc, char** argv) { int total_test_number = 10, successes = 0; char buffer[1024]; int rows, cols, i; printf("\n"); for(i = 0; i < total_test_number; ++i) { printf("Test number %d out of %d\n", i+1, total_test_number); if(positions[i][0] == 0) { printf("Insert the word %s horizontally in [%d, %d]\n", words[i], positions[i][1], positions[i][2]); } else { printf("Insert the word %s vertically in [%d, %d]\n", words[i], positions[i][1], positions[i][2]); } printf("Input:\n"); sprintf(buffer, "matrices/matrix%d.txt", i); char** input_matrix = matrixcLoad(&rows, &cols, buffer); matrixcPrint(input_matrix, rows, cols); printf("Expected:\n"); sprintf(buffer, "matrices/expected_matrix%d.txt", i); char** expected_matrix = matrixcLoad(&rows, &cols, buffer); matrixcPrint(expected_matrix, rows, cols); int inserted = insert_word(words[i], input_matrix, 12, 14, positions[i][1], positions[i][2], positions[i][0]); printf("Result:\n"); matrixcPrint(input_matrix, rows, cols); int result = matrixcCmp(input_matrix, expected_matrix, rows, cols); if(result > 0 && inserted == positions[i][3]) { printf("SUCCESS\n"); successes++; } else { printf("FAILURE\n"); } printf("Current success rate: %0.02f%%\n", (float)(successes) * 100.0f/ (float)(total_test_number)); printf("\n*************************************************\n\n"); matrixcFree(input_matrix, rows); matrixcFree(expected_matrix, rows); } /* printf("Delete all occurences of 1 inside the input list\n"); */ /* printf("Input : "); */ /* print(input[i]); */ /* printf("Expected: "); */ /* print(expected[i]); */ /* int deleted = delete_occurrences(&input[i], 1); */ /* printf("Result : "); */ /* print(input[i]); */ /* printf("Number of deleted elements: %d\n", deleted); */ /* int result = compare(input[i], expected[i]); */ /* if(result > 0 && deleted == expected_deleted[i]) { */ /* printf("SUCCESS\n"); */ /* successes++; */ /* } */ /* else { printf("FAILURE\n"); } */ /* printf("Current success rate: %0.02f%%\n", (float)(successes) * 100.0f/ (float)(total_test_number)); */ /* printf("\n*************************************************\n\n"); */ printf("FINAL SUCCESS RATE: %0.02f%%\n", (float)(successes) * 100.0f / (float)(total_test_number)); return 0; }
FormattedOstreamIterator& FormattedOstreamIterator::operator=(char c) { *active_instance_ = this; if (std::isspace(c)) { if (word_buffer_.size() > 0) { insert_word(); } } else { word_buffer_.push_back(c); } return *this; }
void insert_word(No** a_root, char* a_word){ //------------------------------------------------------------- //Retorno: // void; // //Argumentos: // No** a_root: Nó da arvore que será inserido a letra da palavra(Passagem por parametro); // char* a_word: String que se deseja inserir; // //Descrição da função: // Inserção na arvore trie de maneira recursiva; //------------------------------------------------------------- int i; if (*a_root == NULL){//Caso o nó é nulo (caso base) *a_root = (No*)malloc(sizeof(No)); for (i = 0;i<c_alphabet_length;i++){ (*a_root)->sheet[i]=NULL;//Retirada dos possiveis lixos de memoria (*a_root)->line=NULL; } if(a_word[0]!='\0'){//Se n for final de palavra insert_word(&(*a_root)->sheet[a_word[0]-'a'],a_word+1);//obs: a = 97, logo a-a = 0, posição 0(zero) (*a_root)->exists=false; }else{ (*a_root)->exists=gs_palavra;//Final de palavra } }else{ if(a_word[0] !='\0'){//Se a palavra não chegou ao fim insert_word(&(*a_root)->sheet[a_word[0]-'a'],a_word+1); }else{ (*a_root)->exists=gs_palavra; } } }
void * dic_words(void * args){ FILE * infile = (FILE *) args; char w_buf[MAXWORD]; while(get_word( w_buf, MAXWORD, infile ) ) { d = insert_word(d, w_buf); } }
void * words(void * args){ FILE * infile = (FILE *) args; //dict_t *wd = NULL; char wordbuf[MAXWORD]; while(get_word( wordbuf, MAXWORD, infile ) ) { //pthread_mutex_lock(&mtex); d = insert_word(d, wordbuf); // add to dict //pthread_mutex_unlock(&mtex); } }
dict_t * words( thread_data *TData ) { int c=1; do { pthread_mutex_lock(&mutexdict); c=get_word( TData->wordbuf, MAXWORD, TData->infile ); TData->dict = insert_word(TData->dict, TData->wordbuf); // add to dict pthread_mutex_unlock(&mutexdict); }while(c); return TData->dict; }
void add(gdsl_bstree_t dict){ char *buffer=NULL; size_t len; gdsl_constant_t result; getchar();//gets rid of the newline. printf("Input a word to add: "); getline(&buffer,&len,stdin); insert_word(dict,buffer,&result); if(result!=GDSL_INSERTED){ printf("That word already exists in the dictionary.\n"); }else{ printf("Word added.\n"); } //getchar(); }
/* My thread function. */ void* thread_stuff(void* arg){ char word[MAXWORD]; int okgo=1; while (okgo){ pthread_mutex_lock(&wordmut); okgo=get_word(word); pthread_mutex_unlock(&wordmut); if (okgo==0) break; pthread_mutex_lock(&mutex); insert_word(word); pthread_mutex_unlock(&mutex); } pthread_exit(NULL); }
void * words( FILE *infile ) { pthread_mutex_t mutex1; pthread_mutex_t mutex2; dict_t *wd = NULL; char wordbuf[MAXWORD]; int x = get_word( wordbuf, MAXWORD, infile);; while( x ) { pthread_mutex_lock (&mutex1); wd = insert_word(wd, wordbuf); // add to dict pthread_mutex_unlock (&mutex1); pthread_mutex_lock (&mutex2); x = get_word( wordbuf, MAXWORD, infile); pthread_mutex_unlock (&mutex2); } pthread_exit(wd); }
/* typedef struct aThread { int file; double info; } aThread_t; */ void* mythread(void* arg){ char wordbuf[MAXWORD]; int flag=1; while(flag){ pthread_mutex_lock(&wlock); flag=get_word(wordbuf); pthread_mutex_unlock(&wlock); if (flag==0) break; pthread_mutex_lock(&mlock); insert_word(wordbuf); pthread_mutex_unlock(&mlock); } pthread_exit(NULL); }
/* Parses a given file with a GScanner and stores a word count in the given GHashTable @param {char *} filename - path of the file to parse @param {GHashTable *} table - hash table in which to store the word counts */ void parse_file(char* filename, GHashTable* table) { GScanner* scanner = make_scanner(filename); char *key; int i = 0; while (1) { g_scanner_get_next_token(scanner); if (scanner->token == G_TOKEN_EOF) break; if (scanner->token == G_TOKEN_IDENTIFIER) { if (scanner->value.v_string != NULL) { //g_printf("string->%s\n", scanner->value.v_string); insert_word(scanner->value.v_string, table); i++; } } } //g_printf("Total Word Count: %i\n", i); g_scanner_destroy(scanner); }
int main(){ struct dictionary *d = NULL, *tmp; char *text = "Ciao a tutti ciao ciao a tutti tutti ciao ciao ciao tutti ciao a a"; char *word; do{ word = next_word(text); if(d==NULL) d=new_dictionary_element(word, 1); if( (tmp = search_word(d, word)) != NULL){ tmp->occurences++; } else { insert_word(d, word, 1); } if(text=strchr(text, ' ')) text++; }while( text != NULL ); print_dictionary(d); }
bool determine_repeating_words(void) { int i = 0; struct stack_words *st = { NULL }; FILE *f; char symbol; bool flag = false; if (!(f = fopen("input_file.txt", "r+t"))) { printf("File couldn't be opened. Maybe it doesn't exist\n"); return false; } if (!(st = (struct stack_words*)calloc(1, sizeof(struct stack_words))) || !(st->word = (char*)calloc((STRING_LENGTH + 1), sizeof(char)))) { printf("Memory is not allocated\n"); func_delete(st); exit(-1); } while (1) { if (feof(f)) break; fscanf(f, "%c", &symbol); flag = true; if (symbol >= 'a' && symbol <= 'z') { st->word[i++] = symbol; } else { if (i != 0) { st = insert_word(st, st->word); i = 0; } } } flag = true; flag = print_results(st); flag = func_delete(st); fclose(f); return (flag == false) ? false : true; }
void *words( FILE *infile ) { d = NULL; char wordbuf[MAXWORD]; pthread_mutex_lock(&lock); int have_words = get_word( wordbuf, MAXWORD, infile ); pthread_mutex_unlock(&lock); while( have_words ) { pthread_mutex_lock(&lock); // add word to dictinonary d = insert_word(d, wordbuf); pthread_mutex_unlock(&lock); pthread_mutex_lock(&lock); have_words = get_word( wordbuf, MAXWORD, infile ); pthread_mutex_unlock(&lock); } }
void* thread_worker(void* rank) { int words = 1, inword, c; long _self = (long) rank; char* _wb = malloc(sizeof(char) * (MAXWORD+1)); while (words) { inword = 0; pthread_mutex_lock(&guardian); c = fgetc(infile); dbg("[1] pthread_mutex_lock -\n"); while (c != EOF && inword < MAXWORD) { if (inword && !isalpha(c)) { _wb[inword] = '\0'; // terminate the word string break; } if (isalpha(c)) _wb[inword++] = c; c = fgetc(infile); } if (DEBUG) fprintf(outfile, "thread_num: %ld - %s\n", _self, _wb); pthread_mutex_unlock(&guardian); dbg("[1] pthread_mutex_unlock -\n\n"); if (c == EOF) break; pthread_mutex_lock(&guardian); dbg("[2] pthread_mutex_lock -\n"); d = insert_word(d, _wb); // add to dict pthread_mutex_unlock(&guardian); dbg("[2] pthread_mutex_unlock -\n\n"); } free(_wb); return NULL; }
void *words() { char wordbuf[MAXWORD]; int wordIn; pthread_mutex_lock(&fileMutex); wordIn = get_word(wordbuf, MAXWORD); pthread_mutex_unlock(&fileMutex); while(wordIn) { pthread_mutex_lock(&dictMutex); wd = insert_word(wd, wordbuf); // add to dict pthread_mutex_unlock(&dictMutex); pthread_mutex_lock(&fileMutex); wordIn = get_word(wordbuf, MAXWORD); pthread_mutex_unlock(&fileMutex); } pthread_mutex_lock(&dictMutex); First = wd; pthread_mutex_unlock(&dictMutex); pthread_exit(NULL); }
int read_file(char *filename) { FILE *fp; char line[512]; fp = fopen(filename, "r"); if (!fp) { printf("Unable to open file %s\n", filename); return (-1); } while (fgets(line, sizeof (line), fp) != NULL) { /* read a character at a time and insert it into the trie */ char *ch = line; char *start = NULL; char *end = NULL; printf("line = %s\n", line); g_stats.total_lines++; while (*ch != '\n' && *ch != '\0') { if (!isalpha(*ch)) { end = ch; insert_word(&g_root, start, end); start = NULL; } else { if (start == NULL) { start = ch; } } ch++; } } fclose(fp); return (0); }
void insert_word(trie_t *root, char *start, char *end) { trie_t *curr; int indx; int word_len; int match_len; char next_ch; char temp_ch; if (!start) { return; } while (*start == ' ' && start++ < end); if (start >= end) { return; } curr = root; word_len = end - start + 1; match_len = find_longest_match(curr, start, end); temp_ch = start[word_len]; start[word_len] = '\0'; printf("longest match in %s for %s is %d\n", curr->str, start, match_len); start[word_len] = temp_ch; /* * split based on the length being matched * match len has to be less than/equal to the curr node string * if remaining in the current node, then * we need to create a new node, copy the remaining str * null terminate * if remaining in the current word, then, * we need to create a new node, copy the remaining str */ if (curr->len == 0) { next_ch = *(start + match_len); indx = tolower(next_ch) - 'a'; if (indx > MAX_ALPHA) { g_stats.total_ignored++; return; } if (curr->child[indx] == NULL) { curr->child[indx] = create_new_node(start + match_len, word_len - match_len); } else { insert_word(curr->child[indx], start + match_len, end); } return; } if (match_len < curr->len) { next_ch = curr->str[match_len]; indx = tolower(next_ch) - 'a'; if (indx > MAX_ALPHA) { g_stats.total_ignored++; return; } curr->child[indx] = create_new_node(&curr->str[match_len], curr->len - match_len); if (curr->type == LEAF) { g_stats.total_leaf--; } curr->type = NODE; } curr->str[match_len] = '\0'; curr->len = match_len; if (match_len == word_len) { curr->type = LEAF; g_stats.total_leaf++; } else { next_ch = *(start + match_len); indx = tolower(next_ch) - 'a'; if (indx > MAX_ALPHA) { g_stats.total_ignored++; return; } if (curr->child[indx] == NULL) { curr->child[indx] = create_new_node(start + match_len, word_len - match_len); } else { insert_word(curr->child[indx], start + match_len, end); } } }
int main(int argc, char *argv[]) { if (argc != 2) { fprintf(stderr, "%s\n", "Usage: t9 [FILE]"); return 1; } FILE *file = fopen(argv[1], "r"); if (!file) { fprintf(stderr, "Error: file not found\n"); return 1; } // insert dictionary into trie Tnode *trie = init_trie(); char line[MAX_LINE_LENGTH]; while (fgets(line, MAX_LINE_LENGTH, file)) { insert_word(trie, line); } fclose(file); // current position in trie Tnode *currentNode = trie; char input[MAX_LINE_LENGTH]; printf("Enter \"exit\" to quit.\n"); printf("Enter Key Sequence (or \"#\" for next word):\n"); printf("> "); while (fgets(input, MAX_LINE_LENGTH, stdin)) { // remove newline char *trim; trim = strchr(input, '\n'); if (trim) { *trim = '\0'; } if (strcmp(input, "exit") == 0 || feof(stdin)) { break; } // check for #, else lookup word if (strcmp(input, "#") == 0) { if (currentNode && currentNode != trie) { printf(" %s\n", currentNode->word); currentNode = currentNode->nodes[8]; } else { printf(" %s\n", "There are no more T9onyms"); } } else { currentNode = lookup_word(trie, input); if (!currentNode || !currentNode->word) { printf(" %s\n", "Not found in current dictionary."); } else { printf(" \'%s\'\n", currentNode->word); currentNode = currentNode->nodes[8]; } } printf("Enter Key Sequence (or \"#\" for next word):\n"); printf("> "); } destroy_trie(trie); return 0; }
void initialize_dictionary(char* a_name_file){ //------------------------------------------------------------- //Retorno: // void // //Argumentos: // char* a_name_file: diretório/nome do arquivo que será aberto // //Descrição da função: // Inicializa a arvore com as palavras do dicionário //------------------------------------------------------------- FILE *lf_file; //lf_file = fopen("Debug/gramatica.txt","r"); if(a_name_file!=NULL) lf_file = fopen(a_name_file,"r"); if(lf_file!=NULL){ char lc_c; char* ls_str; long int li_file_size; gno_root_dictionary = NULL; fseek(lf_file, 0L, SEEK_END);//deslocar o curso para o fim para poder pegar seu tamanho maximo li_file_size = ftell(lf_file);//pegar o tamanho do arquivo fseek(lf_file,0,SEEK_SET);//setar o cursor do arquivo para o ��nicio lc_c=' ';//preenche com qualquer coisa para entao entrar no while while(lc_c!=EOF && ftell(lf_file) < li_file_size){//Loop para pegar o texto no arquivo ls_str=NULL; gs_palavra=NULL; fscanf(lf_file,"%c",&lc_c); while (is_letter(lc_c) && ftell(lf_file) < li_file_size){ gs_palavra = append(gs_palavra,lc_c); lc_c = lower(lc_c); ls_str = append(ls_str,lc_c); fscanf(lf_file,"%c",&lc_c); } if(ls_str!=NULL){ if(!is_letter(lc_c)){ insert_word(&gno_root_dictionary,ls_str); }else{//Necessário devido ao final de texto(código exclui a ultima letra por causa do while) gs_palavra = append(gs_palavra,lc_c); lc_c = lower(lc_c); ls_str = append(ls_str,lc_c); insert_word(&gno_root_dictionary,ls_str); } } } ls_str=NULL; gs_palavra = NULL; free(gs_palavra); free(ls_str); fclose(lf_file); }else{ printf("File (%s) not found!",a_name_file); exit(-1); } }
int main (int argc, char **argv) { struct arguments arguments; /* Parse our arguments; every option seen by parse_opt will be reflected in arguments. */ argp_parse (&argp, argc, argv, 0, 0, &arguments); // number of nearest neighbors int k; k = 1; //default is 1 if (sscanf (arguments.args[0], "%i", &k)!=1) {} //omp vars int num_threads; num_threads = 4; if (sscanf(arguments.args[1], "%i", &num_threads)!=1) {} //verbose? int verbose; verbose = arguments.verbose; if (verbose>0 && verbose<130){ verbose = 1; } else{ verbose = 0; } //define a bunch of counters! int i, j, m, n, ii, jj, kk; //number of examples to read in int total_examples = 10000; // int total_examples = 19; //max words per question int num_words = 300; //max word length int max_word_len = 20; //max vocab count // int max_vocab = 200000; //data read in poorly int bad_iter = 0; //Used to split into training and testing data (will train on example_num%train) int train = 10; //Debug int debug = 0; printf("k, Verbose, num_threads = %i, %i, %i\n", k, verbose, num_threads); //Allocate space for data being read in with fgets char *csv_line = malloc(sizeof(char)*1500); //store all data //array of structs //struct.question->array of char* //struct.cat->char* //struct.example_num->int struct data *all_data; all_data = malloc(sizeof(struct data)*total_examples); for (ii=0; ii<total_examples; ii++){ all_data[ii].question = malloc(sizeof(char*)*num_words); for (jj=0; jj<num_words; jj++){ // all_data[ii].question[jj] = malloc(sizeof(char)*max_word_len); all_data[ii].question[jj] = calloc(max_word_len, sizeof(char)); } all_data[ii].cat = malloc(sizeof(char)*max_word_len); } //store numeric version of data for algorithms struct numeric_data *num_data; num_data = malloc(sizeof(struct numeric_data)*total_examples); for (ii=0; ii<total_examples; ii++){ num_data[ii].array_of_features = malloc(sizeof(struct feature_count)*num_words); for (jj=0; jj<num_words; jj++){ num_data[ii].array_of_features[jj].feature_num = 0; num_data[ii].array_of_features[jj].count = 0; } } //store struct which keep track of the k nearest neighbors // struct distance_results results; // results.example_num = 0; // results.distances = calloc(k, sizeof(double)); // results.cat = calloc(k, sizeof(int)); // results.example_nums = calloc(k, sizeof(int)); // //struct used to calculate the mode of the k nearest neighbors // struct mode mod; // mod.count = calloc(k, sizeof(int)); // mod.cat = calloc(k, sizeof(int)); // //store vocabulary list (char** points to array of char* of length 20) // char **word_list; // word_list = malloc(sizeof(char*)*max_vocab); //assumes max_vocab total vocab // for (ii=0; ii<max_vocab; ii++){ // // word_list[ii] = malloc(sizeof(char)*max_word_len); //assumes max word length of 20 // word_list[ii] = calloc(max_word_len, sizeof(char)); //assumes max word length of 20 // } //alternate vocab store tree feature_tree *vocab; vocab = NULL; //store category list char **cat_list; cat_list = malloc(sizeof(char*)*40); //assumes 20 max categories for (ii=0; ii<40; ii++){ cat_list[ii] = malloc(sizeof(char)*max_word_len); strncpy(cat_list[ii], "\0", 1); } //Read in csv file FILE *f = fopen("train_pruned2.csv", "r"); if (f == NULL){ printf("Failed to open file \n"); return -1; } //parse question into individual words, create vocabulary list int vocab_count = 0; int category_count = 1; for (i=0; i<total_examples; i++){ // printf("Iteration = %i\n", i); //line in csv to buffer if (fgets(csv_line, 1500, f) == NULL){ printf("Fgets error!\n"); exit(0); } //csv line to 3 individual parts if (i>0) { char *tok; char *tok_copy; //problem with tok getting overwritten in parse_question // char **parsed_question = malloc(sizeof(char*)*num_words); // printf("CSV_LINE = %s\n", csv_line); tok = strtok(csv_line, "|"); if (tok == NULL){ // all_data[i-bad_iter-1].example_num = -1; bad_iter++; // i--; continue; } sscanf(tok, "%i", &all_data[i-bad_iter-1].example_num); tok = strtok(NULL, "|"); if (tok == NULL){ // all_data[i-bad_iter-1].example_num = -1; bad_iter++; // i--; continue; } tok_copy = (char *)tok; tok = strtok(NULL, "|"); if (tok == NULL){ // all_data[i-bad_iter-1].example_num = -1; bad_iter++; // i--; continue; } strncpy(all_data[i-bad_iter-1].cat, tok, 19); all_data[i-bad_iter-1].cat[max_word_len-1] = 0; char *tok2; tok2 = strtok(tok_copy, " \t"); j = 0; if ((tok2 != NULL) && (strlen(tok2)>3)){ strncpy(all_data[i-bad_iter-1].question[0], tok2, 19); all_data[i-bad_iter-1].question[0][max_word_len-1] = 0; //add to tree if not test data // if (all_data[i-bad_iter-1].example_num % train != 0){ insert_word(&vocab, all_data[i-bad_iter-1].question[0]); j += 1; // } } while (tok2 != NULL){ if (j>=num_words){ break; } tok2 = strtok(NULL, " \t"); if ((tok2 != NULL) && (strlen(tok2)>3)){ strncpy(all_data[i-bad_iter-1].question[j], tok2, 19); all_data[i-bad_iter-1].question[j][max_word_len-1] = 0; //add to tree if not test data // if (all_data[i-bad_iter-1].example_num % train != 0){ insert_word(&vocab, all_data[i-bad_iter-1].question[j]); j++; // } } } //end while // all_data[i-bad_iter-1] = instance; // print_data(&all_data[i-bad_iter-1]); ////add to vocabulary (using array, VERY slow with lots of data) // add_to_word_list(all_data[i-bad_iter-1].question, word_list, &vocab_count); //add to category list add_to_cat_list(all_data[i-bad_iter-1].cat, cat_list, &category_count); } //end if } //end for //close file fclose(f); //assign unique number to each feature //first feature is feature 1, feature 0 is for errors etc. unsigned int mm = 1; number_features(vocab, &mm); //Some of the csv rows aren't read in properly with fgets printf("Bad iterations = %i/%i\n", bad_iter, i); printf("Feature count = %i\n", count_features(vocab)); // print_inorder(vocab); // for (ii=0; ii<40; ii++){ // printf("%s", cat_list[ii]); // } ////turn data into numeric features//// for (i=0; i<total_examples; i++){ num_data[i].example_num = all_data[i].example_num; num_data[i].cat = get_cat_index(cat_list, all_data[i].cat); words_to_num(&num_data[i], &all_data[i], &vocab, num_words); // count_features2(&num_data[i]); } // num_data->array_of_features[0].feature_num = 44; // print_num_data(&num_data[0]); // print_num_data(&num_data[1]); total_examples = total_examples-bad_iter-1; int sadfjh; double av_feature_count = 0; for (ii=0; ii<total_examples; ii++){ sadfjh = count_features2(&num_data[ii]); av_feature_count += sadfjh; // printf("%i ", sadfjh); } // printf("\n av_feature_count %f\n", av_feature_count/(total_examples-bad_iter-1)); // print_num_data(&num_data[4464]); // printf("vocab->right = %s \n", vocab->feature); // print_data(&all_data[0]); // print_data(&all_data[29000]); // printf("%s, %u\n", "1829", get_feature_number(&vocab, "1829")); //find the distance between first example and rest double distance; //range each process will cover int range; // printf("%i, %i\n", range, total_examples); // printf("R, Min, Max = %i, %i, %i\n", rank, rank*range, (rank+1)*range); // struct distance_results results; // results.example_num = 0; // results.distances = calloc(k, sizeof(double)); // results.cat = calloc(k, sizeof(int)); // results.example_nums = calloc(k, sizeof(int)); // //struct used to calculate the mode of the k nearest neighbors // struct mode mod; // mod.count = calloc(k, sizeof(int)); // mod.cat = calloc(k, sizeof(int)); //correct/total/answer int c = 0; int total = 0; int answer; omp_set_dynamic(0); //Explicitly disable dynamic teams omp_set_num_threads(num_threads); //Specify thread count #pragma omp parallel \ private(kk, ii, distance, answer) \ reduction(+:c,total) \ shared(num_data) { //store struct which keep track of the k nearest neighbors struct distance_results results; results.example_num = 0; results.distances = calloc(k, sizeof(double)); results.cat = calloc(k, sizeof(int)); results.example_nums = calloc(k, sizeof(int)); //struct used to calculate the mode of the k nearest neighbors struct mode mod; mod.count = calloc(k, sizeof(int)); mod.cat = calloc(k, sizeof(int)); #pragma omp for for (kk=0; kk<total_examples; kk++){ // printf("Thread = %i, Iter = %i, c = %i, total=%i\n", omp_get_thread_num(), kk, c, total); //only test on test data if (num_data[kk].example_num%train != 0){ continue; } if (num_data[kk].cat == 0){ continue; } results.correct_answer = num_data[kk].cat; results.example_num = num_data[kk].example_num; for (ii=0; ii<k; ii++){ results.distances[ii] = 0; results.cat[ii] = 0; mod.count[ii] = 0; mod.cat[ii] = 0; } // print_num_data(&num_data[kk]); //calc distance to neighbors for (ii=0; ii<total_examples-1; ii++){ //don't calc distance to self if (kk != ii){ //Eliminate bad data (examples with few words tend to have low distances //reguardless of whether they are more similar... if (num_data[ii].total_features >= 40){ distance = get_distance(&num_data[kk], &num_data[ii], num_words); // if (distance < 2){ // continue; // } // printf("%f ", distance); if (num_data[ii].example_num > 0){ add_distance_to_results(&results, distance, k, num_data[ii].cat, num_data[ii].example_num); } } } } answer = calc_nearest_neighbor(&results, &mod, k); if (answer == results.correct_answer){ c += 1; } // printf("\n"); // for (ii=0; ii<k; ii++){ // printf("Distance, cat, example_num1, example_num2 = %2.2f, %i, %i, %i\n", // results.distances[ii], results.cat[ii], results.example_num, results.example_nums[ii]); // } // else{ // } total += 1; if (verbose>0 && debug>0){ printf("Thread = %i, Correct/Total = %i/%i Answer/Correct = %i/%i\n", omp_get_thread_num(), c, total, answer, results.correct_answer); } } //Thread results #pragma omp barrier if (omp_get_thread_num() == 0){ printf("/// Thread Results ///\n"); } #pragma omp barrier printf("Thread = %i, Correct/Total = %i/%i\n", omp_get_thread_num(), c, total); //free distance result free(results.distances); free(results.cat); //free mode struct free(mod.count); free(mod.cat); } printf("/// Final Results ///\n"); printf("Correct/Total = %i/%i\n", c, total); // printf("verbose = %i", verbose); ////free malloc calls//// //free feature tree free_feature_tree(vocab); //free numeric data for (ii=0; ii<total_examples; ii++){ free(num_data[ii].array_of_features); } free(num_data); // //free vocab list // for (ii=0; ii<max_vocab; ii++){ // free(word_list[ii]); // } // free(word_list); //free category list for (ii=0; ii<40; ii++){ free(cat_list[ii]); } free(cat_list); //free all_data list for (ii=0; ii<total_examples; ii++){ for (jj=0; jj<num_words; jj++){ free(all_data[ii].question[jj]); } free(all_data[ii].question); free(all_data[ii].cat); } free(all_data); //free var used to rean in csv free(csv_line); }