Exemple #1
0
//Insert a word into the tree
void insert_word(feature_tree **root, char *word){
	if (word == NULL){
		return;
	}

	if (strlen(word)<4){
		return;
	}

	if (!(*root)){
		feature_tree *temp = NULL;
		temp = (feature_tree *)malloc(sizeof(feature_tree));
		temp->feature_num = 0;
		temp->feature = malloc(sizeof(char)*20);
		strcpy(temp->feature, word);
		temp->left = NULL;
		temp->right = NULL;
		*root = temp;
		// printf("root feat = %s\n", root.feature);
		return;
	}

	// printf("%s, %s\n", root->feature, word);
	
	//less than goes left
	if (strcmp(word, (*root)->feature) < 0){
		insert_word(&(*root)->left, word);
	}
	//greater than goes right
	else if (strcmp(word, (*root)->feature) > 0){
		insert_word(&(*root)->right, word);
	}
}
Exemple #2
0
END_TEST

START_TEST (ut_graph_load_dictionary)
{
  init_node(graph);
  
  FILE *fp = fopen("/usr/share/dict/words", "r");
  wchar_t word[1000];
  
  while (fgetws(word, 80, fp)) 
  {
    word[wcslen(word)-1] = L'\0';
    insert_word(graph, word);
  }
  rewind(fp);
 
  while (fgetws(word, 80, fp)) 
  {
    word[wcslen(word)-1] = L'\0';
    ck_assert_msg(find_word(graph, word) == 1, "Can't find word %ls in full dictionary test\n", word);
  }
  fclose(fp);

  clear_node(graph);
}
Exemple #3
0
bool fit_word( word_search_t *ws, wchar_t *word )
{
	int fit, best_fit;
	int length;
	position_t position, best_position;

	length = wcslen(word);

	// Start at a random location looking for places to fit the word
	position_create_random( ws, &position);

	best_fit = -1;
	do {
		fit = fit_word_score( ws, word, &position );
		if( fit > best_fit ) {
			if( best_fit >= 0 ) {
				position_free( &best_position );
			}
			best_fit = fit;
			position_copy( &best_position, &position );
			if( best_fit == length ) {
				break;
			}
		}
	} while(position_iterate( &position ) );

	if( best_fit >= 0 ) {
		insert_word( ws, word, &best_position );
		position_free( &best_position );
	}
	
	position_free( &position );

	return best_fit >= 0;
}
void load_words(unsigned char filter[], char *filename)
{
	FILE *fp = fopen(filename, "r");
	char word[WORD_BUF_SIZE + 1];

	if (!fp) {
		err("[e] %s: can't open file \"%s\"\n", __FUNCTION__, filename);
	}

	while (fgets(word, WORD_BUF_SIZE, (FILE *) fp)) {
		int len = strlen(word);

		if (len == WORD_BUF_SIZE - 1) {
			err("[e] WORD_BUF_SIZE is small\n");
		}

		word[strcspn(word, "\n")] = 0;
		len--;

		IF_CNT
			insert_word(filter, word);
	}

	fclose(fp);
}
Exemple #5
0
int main (void)
{
 	
	char  s[21],             /* a temp word variable */
	      table[200][21];    /* the table of words   */
	int ns[200];             /* array of occurences  */
        int n;                   /* count variable       */

   for(n=0;n<199;n++) ns[n]=1;   /*   set up the array   */
   n=0;
   do
   {
   	get_word(s);    /*  input a word from the inout */

      if (!strcmp(s,"* no more *"))
      {
	/*    If it is the end of the file : */
	print_table(sort(table,ns,n),n,ns);
      	return(0);
      }
      convert_word(s);   /* make the word loower case */
      if (!lookup(s,table,n,ns))
      {
	/*   word is not in the table yet   */
      	insert_word(s,table,n);
      	n++;
      }
   } while (1);
   /* wont quit until the end of file is reached  */
}
Exemple #6
0
END_TEST

START_TEST (ut_graph_load_dictionary_prefix_search)
{
  init_node(graph);

  FILE *fp = fopen("/usr/share/dict/words", "r");
  wchar_t word[1000];

  while (fgetws(word, 80, fp))
  {
    word[wcslen(word)-1] = L'\0';
    printf("%ls\n", word);
    insert_word(graph, word);
  }
  fclose(fp);

  PrefixResult *result = search_prefix(graph, L"app");

  while(result != NULL)
  {
    printf("%ls\n", result->word);
    result = result->next;
  }

  clear_node(graph);
}
Exemple #7
0
void write_data(){
  struct Lnode *actual_node = NULL;
  struct Lnode *next_node = NULL;
  struct Lnode *ptr = NULL;
  int i, *Outputs;
  uint32_t value;

  clear_vector(inputs_vector_state, 14, 0);
  clear_vector(written, 4, 0);

  inputs_vector_state[7] = 1;
  inputs_vector_values[7] = 11;
  store_count = 0;
  store_values_r3[0] = 0;
  store_values_r3[1] = SH_HEX;
  store_values_r3[2] = BIN_HEX;
  store_values_r4[0] = key_instructions.r_w_addr + 8;
  store_values_r4[1] = key_instructions.r_w_addr + 4;
  store_values_r4[2] = key_instructions.r_w_addr;

  for(actual_node = payload->tail; actual_node != NULL; actual_node = actual_node->prev){
    next_node = actual_node->prev;
    if(next_node == NULL) break;

    for(ptr = (GETPOINTER(next_node, payload_gadget_t))->gadget;
          ptr != NULL && ptr->next != NULL; ptr = ptr->next); //'ret'
    Outputs = (GETPOINTER(ptr, Gadget_t))->Outputs;

    if(Outputs[0] && !written[0]){
      written[0] = 1;
      inputs_vector_state[0] = 1;
      inputs_vector_values[0] = key_instructions.r_w_addr;
    }
    if(Outputs[1] && !written[1]){
      written[1] = 1;
      inputs_vector_state[1] = 1;
      inputs_vector_values[1] = key_instructions.r_w_addr + 8;
    }
    if(Outputs[2] && !written[2]){
      written[2] = 1;
      inputs_vector_state[2] = 1;
      inputs_vector_values[2] = 0;
    }

    for(i = 14; i >= 0; i--){
      if(Outputs[i]){
        value = getValue(i);
        actual_node = insert_word(value, actual_node);
      }
    }

    if(Outputs[7] && !written[3]){
      written[3] = 1;
      inputs_vector_values[7] = WORD_PADDING;
    }

    process_inputs(GETPOINTER(next_node, payload_gadget_t)->gadget);
  }
}
Exemple #8
0
void ddfs_index_update(Fingerprint* finger, ContainerId id) {
	db_insert_fingerprint(finger, id);

	insert_word(filter, (char*) finger, sizeof(Fingerprint));
	dirty = TRUE;
	index_write_times++;
	index_write_entry_counter++;
}
Exemple #9
0
void *words( FILE *infile ) {
  //dict_t *wd = NULL;
  char wordbuf[MAXWORD];
  while( get_word( wordbuf, MAXWORD, infile ) ) {
    d = insert_word(d, wordbuf); // add to dict
  }
  //return wd;
}
int main(int argc, char** argv) {  
  int total_test_number = 10, successes = 0;

  char buffer[1024];
  int rows, cols, i;
  printf("\n");
  for(i = 0; i < total_test_number; ++i) {
    printf("Test number %d out of %d\n", i+1, total_test_number);
    if(positions[i][0] == 0) { printf("Insert the word %s horizontally in [%d, %d]\n", words[i], positions[i][1], positions[i][2]); }
    else { printf("Insert the word %s vertically in [%d, %d]\n", words[i], positions[i][1], positions[i][2]); }
    printf("Input:\n");
    sprintf(buffer, "matrices/matrix%d.txt", i);
    char** input_matrix = matrixcLoad(&rows, &cols, buffer);
    matrixcPrint(input_matrix, rows, cols);
    printf("Expected:\n");
    sprintf(buffer, "matrices/expected_matrix%d.txt", i);
    char** expected_matrix = matrixcLoad(&rows, &cols, buffer);
    matrixcPrint(expected_matrix, rows, cols);
    int inserted = insert_word(words[i], input_matrix, 12, 14, positions[i][1], positions[i][2], positions[i][0]);
    printf("Result:\n");
    matrixcPrint(input_matrix, rows, cols);
    int result = matrixcCmp(input_matrix, expected_matrix, rows, cols);
    if(result > 0 && inserted == positions[i][3]) {
      printf("SUCCESS\n");
      successes++;
    }
    else { printf("FAILURE\n"); }
    printf("Current success rate: %0.02f%%\n", (float)(successes) * 100.0f/ (float)(total_test_number));
    printf("\n*************************************************\n\n");
    matrixcFree(input_matrix, rows);
    matrixcFree(expected_matrix, rows);
  }

  /* printf("Delete all occurences of 1 inside the input list\n"); */
  /* printf("Input   : "); */
  /* print(input[i]);  */
  /* printf("Expected: "); */
  /* print(expected[i]);  */
  /* int deleted = delete_occurrences(&input[i], 1); */
  /* printf("Result  : "); */
  /* print(input[i]); */
  /* printf("Number of deleted elements: %d\n", deleted); */
  /* int result = compare(input[i], expected[i]); */
  /* if(result > 0 && deleted == expected_deleted[i]) { */
  /*   printf("SUCCESS\n"); */
  /*   successes++; */
  /* } */
  /* else { printf("FAILURE\n"); } */
  /* printf("Current success rate: %0.02f%%\n", (float)(successes) * 100.0f/ (float)(total_test_number)); */
  /* printf("\n*************************************************\n\n"); */

  
  printf("FINAL SUCCESS RATE: %0.02f%%\n", (float)(successes) * 100.0f / (float)(total_test_number));

  return 0;
}
Exemple #11
0
FormattedOstreamIterator& FormattedOstreamIterator::operator=(char c) {
  *active_instance_ = this;
  if (std::isspace(c)) {
    if (word_buffer_.size() > 0) {
      insert_word();
    }
  } else {
    word_buffer_.push_back(c);
  }
  return *this;
}
Exemple #12
0
void insert_word(No** a_root, char* a_word){
	//-------------------------------------------------------------
	//Retorno:
	//			void;
	//
	//Argumentos:
	//			No** a_root: Nó da arvore que será inserido a letra da palavra(Passagem por parametro);
	//			char* a_word: String que se deseja inserir;
	//
	//Descrição da função:
	//			Inserção na arvore trie de maneira recursiva;
	//-------------------------------------------------------------

	int i;

	if (*a_root == NULL){//Caso o nó é nulo (caso base)

		*a_root = (No*)malloc(sizeof(No));

		for (i = 0;i<c_alphabet_length;i++){
			(*a_root)->sheet[i]=NULL;//Retirada dos possiveis lixos de memoria
			(*a_root)->line=NULL;
		}

		if(a_word[0]!='\0'){//Se n for final de palavra
			insert_word(&(*a_root)->sheet[a_word[0]-'a'],a_word+1);//obs: a = 97, logo a-a = 0, posição 0(zero)
			(*a_root)->exists=false;
		}else{
			(*a_root)->exists=gs_palavra;//Final de palavra
		}

	}else{

		if(a_word[0] !='\0'){//Se a palavra não chegou ao fim
			insert_word(&(*a_root)->sheet[a_word[0]-'a'],a_word+1);
		}else{
			(*a_root)->exists=gs_palavra;
		}

	}
}
Exemple #13
0
void * dic_words(void * args){

  FILE * infile = (FILE *) args;
  
  char w_buf[MAXWORD];
  
  while(get_word( w_buf, MAXWORD, infile ) ) {

    d = insert_word(d, w_buf); 
  }
 
}
Exemple #14
0
void * words(void * args){

  FILE * infile = (FILE *) args;
  //dict_t *wd = NULL;
  char wordbuf[MAXWORD];
 
  while(get_word( wordbuf, MAXWORD, infile ) ) {
//pthread_mutex_lock(&mtex);
    d = insert_word(d, wordbuf); // add to dict
//pthread_mutex_unlock(&mtex);
  }
}
Exemple #15
0
dict_t *
words( thread_data *TData ) {
  
  int c=1;
  do {
  	pthread_mutex_lock(&mutexdict);
  	
  	c=get_word( TData->wordbuf, MAXWORD, TData->infile );
  	
    TData->dict = insert_word(TData->dict, TData->wordbuf); // add to dict
    pthread_mutex_unlock(&mutexdict);
  }while(c);
  
  return TData->dict;
}
Exemple #16
0
void add(gdsl_bstree_t dict){
	char *buffer=NULL;
	size_t len;
	gdsl_constant_t result;
	getchar();//gets rid of the newline.
	printf("Input a word to add: ");
	getline(&buffer,&len,stdin);
	insert_word(dict,buffer,&result);
	if(result!=GDSL_INSERTED){
		printf("That word already exists in the dictionary.\n");
	}else{
		printf("Word added.\n");
	}
	//getchar();
	
}
Exemple #17
0
/* My thread function. */
void*
thread_stuff(void* arg){
  char word[MAXWORD];
  int okgo=1;

  while (okgo){
    pthread_mutex_lock(&wordmut);
    okgo=get_word(word);
    pthread_mutex_unlock(&wordmut);
    if (okgo==0) break;
    pthread_mutex_lock(&mutex);
    insert_word(word);
    pthread_mutex_unlock(&mutex);
  }
  pthread_exit(NULL);
}
Exemple #18
0
void *
words( FILE *infile ) {
  pthread_mutex_t mutex1;
  pthread_mutex_t mutex2;
  dict_t *wd = NULL;
  char wordbuf[MAXWORD]; 
  int x = get_word( wordbuf, MAXWORD, infile);;
  while( x ) {
    pthread_mutex_lock (&mutex1);
    wd = insert_word(wd, wordbuf); // add to dict
    pthread_mutex_unlock (&mutex1);
    pthread_mutex_lock (&mutex2);
    x = get_word( wordbuf, MAXWORD, infile);
    pthread_mutex_unlock (&mutex2);
  }
  pthread_exit(wd);
}
Exemple #19
0
/*
typedef struct aThread {
  int file;
  double info;
} aThread_t;
*/
void* 
mythread(void* arg){

	char wordbuf[MAXWORD];
	int flag=1;

	while(flag){
		pthread_mutex_lock(&wlock);
		flag=get_word(wordbuf);
		pthread_mutex_unlock(&wlock);
		if (flag==0) break;
		pthread_mutex_lock(&mlock);
		insert_word(wordbuf);
		pthread_mutex_unlock(&mlock);
	}
pthread_exit(NULL);
}
Exemple #20
0
/*
Parses a given file with a GScanner and stores a word count in the given GHashTable
@param {char *} filename - path of the file to parse
@param {GHashTable *} table - hash table in which to store the word counts
*/
void parse_file(char* filename, GHashTable* table) {
  GScanner* scanner = make_scanner(filename);
  char *key;
  int i = 0;
  while (1) {
    g_scanner_get_next_token(scanner);
    if (scanner->token == G_TOKEN_EOF) break;
    if (scanner->token == G_TOKEN_IDENTIFIER) {
      if (scanner->value.v_string != NULL) {
        //g_printf("string->%s\n", scanner->value.v_string);
        insert_word(scanner->value.v_string, table);
        i++;
      }
    }
  }
  //g_printf("Total Word Count: %i\n", i);
  g_scanner_destroy(scanner);
}
int main(){
	struct dictionary *d = NULL, *tmp;
	char *text = "Ciao a tutti ciao ciao a tutti tutti ciao ciao ciao tutti ciao a a";
	char *word;
	
	do{
		word = next_word(text);
		if(d==NULL)
			d=new_dictionary_element(word, 1);	
		if( (tmp = search_word(d, word)) != NULL){
			tmp->occurences++;
		} else {
			insert_word(d, word, 1);
		} 
		if(text=strchr(text, ' '))
			text++;
	}while( text != NULL );
	print_dictionary(d);
}
Exemple #22
0
bool determine_repeating_words(void)
{
	int i = 0;
	struct stack_words *st = { NULL };
	FILE *f;
	char symbol;
	bool flag = false;
	if (!(f = fopen("input_file.txt", "r+t")))
	{
		printf("File couldn't be opened. Maybe it doesn't exist\n");
		return false;
	}
	if (!(st = (struct stack_words*)calloc(1, sizeof(struct stack_words))) ||
		!(st->word = (char*)calloc((STRING_LENGTH + 1), sizeof(char))))
	{
		printf("Memory is not allocated\n");
		func_delete(st);
		exit(-1);
	}
	while (1)
	{
		if (feof(f)) break;
		fscanf(f, "%c", &symbol);
		flag = true;
		if (symbol >= 'a' && symbol <= 'z')
		{
			st->word[i++] = symbol;
		}
		else
		{
			if (i != 0)
			{
				st = insert_word(st, st->word);
				i = 0;
			}
		}
	}
	flag = true;
	flag = print_results(st);
	flag = func_delete(st);
	fclose(f);
	return (flag == false) ? false : true;
}
Exemple #23
0
void *words( FILE *infile ) {
  d = NULL;
  char wordbuf[MAXWORD];

  pthread_mutex_lock(&lock);
  int have_words = get_word( wordbuf, MAXWORD, infile );
  pthread_mutex_unlock(&lock);


  while( have_words ) {
	 pthread_mutex_lock(&lock);
 	// add word to dictinonary
    	d = insert_word(d, wordbuf); 
  	pthread_mutex_unlock(&lock);
	
	pthread_mutex_lock(&lock);
	have_words = get_word( wordbuf, MAXWORD, infile );
	pthread_mutex_unlock(&lock);
  }
}
Exemple #24
0
void*
thread_worker(void* rank) {
	int words = 1, inword, c;
	long _self = (long) rank;
	char* _wb = malloc(sizeof(char) * (MAXWORD+1));

	while (words) {
		inword = 0;

		pthread_mutex_lock(&guardian);
		c = fgetc(infile);
		dbg("[1] pthread_mutex_lock -\n");
		while (c != EOF && inword < MAXWORD) {
			if (inword && !isalpha(c)) {
				_wb[inword] = '\0'; // terminate the word string
				break;
			}

			if (isalpha(c)) _wb[inword++] = c;
			c = fgetc(infile);
		}

		if (DEBUG) fprintf(outfile, "thread_num: %ld - %s\n", _self, _wb);
		pthread_mutex_unlock(&guardian);

		dbg("[1] pthread_mutex_unlock -\n\n");

		if (c == EOF) break;

		pthread_mutex_lock(&guardian);
		dbg("[2] pthread_mutex_lock -\n");

		d = insert_word(d, _wb);              // add to dict

		pthread_mutex_unlock(&guardian);
		dbg("[2] pthread_mutex_unlock -\n\n");
	}

	free(_wb);
	return NULL;
}
Exemple #25
0
void *words() {
	
	char wordbuf[MAXWORD];
	int wordIn;
	pthread_mutex_lock(&fileMutex);
	wordIn = get_word(wordbuf, MAXWORD);
	pthread_mutex_unlock(&fileMutex);

	while(wordIn) {
		
		pthread_mutex_lock(&dictMutex);
		wd = insert_word(wd, wordbuf); // add to dict
		pthread_mutex_unlock(&dictMutex);

		pthread_mutex_lock(&fileMutex);
		wordIn = get_word(wordbuf, MAXWORD);
		pthread_mutex_unlock(&fileMutex);
	}

	pthread_mutex_lock(&dictMutex);
	First =  wd;
	pthread_mutex_unlock(&dictMutex);
	pthread_exit(NULL);
}
Exemple #26
0
int
read_file(char *filename)
{
	FILE	*fp;
	char	line[512];

	fp = fopen(filename, "r");
	if (!fp) {
		printf("Unable to open file %s\n", filename);
		return (-1);
	}

	while (fgets(line, sizeof (line), fp) != NULL) {
		/* read a character at a time and insert it into the trie */
		char	*ch = line;
		char	*start = NULL;
		char	*end = NULL;

		printf("line = %s\n", line);
		g_stats.total_lines++;
		while (*ch != '\n' && *ch != '\0') {
			if (!isalpha(*ch)) {
				end = ch;
				insert_word(&g_root, start, end);
				start = NULL;
			} else {
				if (start == NULL) {
					start = ch;
				}
			}
			ch++;
		}
	}
	fclose(fp);
	return (0);
}
Exemple #27
0
void
insert_word(trie_t *root, char *start, char *end)
{
	trie_t	*curr;
	int	indx;
	int	word_len;
	int	match_len;
	char	next_ch;
	char	temp_ch;

	if (!start) {
		return;
	}

	while (*start == ' ' && start++ < end);
	if (start >= end) {
		return;
	}

	curr = root;
	word_len = end - start + 1;

	match_len = find_longest_match(curr, start, end);
	temp_ch = start[word_len];
	start[word_len] = '\0';
	printf("longest match in %s for %s is %d\n", curr->str, start, match_len);
	start[word_len] = temp_ch;

	/*
	 * split based on the length being matched
	 * match len has to be less than/equal to the curr node string
	 * if remaining in the current node, then
	 *	we need to create a new node, copy the remaining str
	 * 	null terminate
	 * if remaining in the current word, then,
	 *	we need to create a new node, copy the remaining str
	 */

	if (curr->len == 0) {
		next_ch = *(start + match_len);
		indx = tolower(next_ch) - 'a';
		if (indx > MAX_ALPHA) {
			g_stats.total_ignored++;
			return;
		}

		if (curr->child[indx] == NULL) {
			curr->child[indx] = create_new_node(start + match_len,
			    word_len - match_len);
		} else {
			insert_word(curr->child[indx], start + match_len,
			    end);
		}

		return;
	}

	if (match_len < curr->len) {
		next_ch = curr->str[match_len];
		indx = tolower(next_ch) - 'a';
		if (indx > MAX_ALPHA) {
			g_stats.total_ignored++;
			return;
		}
		curr->child[indx] = create_new_node(&curr->str[match_len],
		    curr->len - match_len);		
		if (curr->type == LEAF) {
			g_stats.total_leaf--;
		}
		curr->type = NODE;
	}

	curr->str[match_len] = '\0';
	curr->len = match_len;

	if (match_len == word_len) {
		curr->type = LEAF;
		g_stats.total_leaf++;
	} else {
		next_ch = *(start + match_len);
		indx = tolower(next_ch) - 'a';
		if (indx > MAX_ALPHA) {
			g_stats.total_ignored++;
			return;
		}

		if (curr->child[indx] == NULL) {
			curr->child[indx] = create_new_node(start + match_len,
			    word_len - match_len);
		} else {
			insert_word(curr->child[indx], start + match_len,
			    end);
		}
	}
}
Exemple #28
0
int main(int argc, char *argv[]) {
  if (argc != 2) {
    fprintf(stderr, "%s\n", "Usage: t9 [FILE]");
    return 1;
  }

  FILE *file = fopen(argv[1], "r");
  if (!file) {
    fprintf(stderr, "Error: file not found\n");
    return 1;
  }

  // insert dictionary into trie
  Tnode *trie = init_trie();
  char line[MAX_LINE_LENGTH];
  while (fgets(line, MAX_LINE_LENGTH, file)) {
    insert_word(trie, line);
  }

  fclose(file);

  // current position in trie
  Tnode *currentNode = trie;
  char input[MAX_LINE_LENGTH];

  printf("Enter \"exit\" to quit.\n");
  printf("Enter Key Sequence (or \"#\" for next word):\n");
  printf("> ");

  while (fgets(input, MAX_LINE_LENGTH, stdin)) {
    // remove newline
    char *trim;
    trim = strchr(input, '\n');
    if (trim) {
      *trim = '\0';
    }

    if (strcmp(input, "exit") == 0 || feof(stdin)) {
      break;
    }

    // check for #, else lookup word
    if (strcmp(input, "#") == 0) {
      if (currentNode && currentNode != trie) {
        printf("  %s\n", currentNode->word);
        currentNode = currentNode->nodes[8];
      } else {
        printf("  %s\n", "There are no more T9onyms");
      }
    } else {
      currentNode = lookup_word(trie, input);
      if (!currentNode || !currentNode->word) {
        printf("  %s\n", "Not found in current dictionary.");
      } else {
        printf("  \'%s\'\n", currentNode->word);
        currentNode = currentNode->nodes[8];
      }
    }

    printf("Enter Key Sequence (or \"#\" for next word):\n");
    printf("> ");
  }

  destroy_trie(trie);
  return 0;
}
Exemple #29
0
void initialize_dictionary(char* a_name_file){
	//-------------------------------------------------------------
	//Retorno:
	//			void
	//
	//Argumentos:
	//			char* a_name_file: diretório/nome do arquivo que será aberto
	//
	//Descrição da função:
	//			Inicializa a arvore com as palavras do dicionário
	//-------------------------------------------------------------

	FILE *lf_file;
			//lf_file = fopen("Debug/gramatica.txt","r");
			if(a_name_file!=NULL)
				lf_file = fopen(a_name_file,"r");

			if(lf_file!=NULL){
				char lc_c;
				char* ls_str;
				long int li_file_size;

				gno_root_dictionary = NULL;

				fseek(lf_file, 0L, SEEK_END);//deslocar o curso para o fim para poder pegar seu tamanho maximo
				li_file_size = ftell(lf_file);//pegar o tamanho do arquivo
				fseek(lf_file,0,SEEK_SET);//setar o cursor do arquivo para o ��nicio


				lc_c=' ';//preenche com qualquer coisa para entao entrar no while


				while(lc_c!=EOF && ftell(lf_file) < li_file_size){//Loop para pegar o texto no arquivo
					ls_str=NULL;
					gs_palavra=NULL;
					fscanf(lf_file,"%c",&lc_c);


					while (is_letter(lc_c) && ftell(lf_file) < li_file_size){
						gs_palavra = append(gs_palavra,lc_c);

						lc_c = lower(lc_c);
						ls_str = append(ls_str,lc_c);
						fscanf(lf_file,"%c",&lc_c);

					}
					if(ls_str!=NULL){
						if(!is_letter(lc_c)){
							insert_word(&gno_root_dictionary,ls_str);
						}else{//Necessário devido ao final de texto(código exclui a ultima letra por causa do while)
							gs_palavra = append(gs_palavra,lc_c);

							lc_c = lower(lc_c);
							ls_str = append(ls_str,lc_c);
							insert_word(&gno_root_dictionary,ls_str);
						}
					}
				}
				ls_str=NULL;
				gs_palavra = NULL;
				free(gs_palavra);
				free(ls_str);
				fclose(lf_file);
			}else{
				printf("File (%s) not found!",a_name_file);
				exit(-1);
			}

}
Exemple #30
0
int main (int argc, char **argv)
{
    struct arguments arguments;

    /* Parse our arguments; every option seen by parse_opt will
       be reflected in arguments. */
    argp_parse (&argp, argc, argv, 0, 0, &arguments); 

    // number of nearest neighbors
    int k;
    k = 1; //default is 1
    if (sscanf (arguments.args[0], "%i", &k)!=1) {}

    //omp vars
    int num_threads;
    num_threads = 4;
    if (sscanf(arguments.args[1], "%i", &num_threads)!=1) {}

    //verbose?
    int verbose;
    verbose = arguments.verbose;
    if (verbose>0 && verbose<130){
        verbose = 1;
    }
    else{
        verbose = 0;
    }


    //define a bunch of counters!
    int i, j, m, n, ii, jj, kk;

    //number of examples to read in
    int total_examples = 10000;
    // int total_examples = 19;

    //max words per question
    int num_words = 300;

    //max word length
    int max_word_len = 20;
    //max vocab count
    // int max_vocab = 200000;

    //data read in poorly
    int bad_iter = 0;

    //Used to split into training and testing data (will train on example_num%train)
    int train = 10;

    //Debug
    int debug = 0;


    printf("k, Verbose, num_threads = %i, %i, %i\n",
                          k, verbose, num_threads);
    

     //Allocate space for data being read in with fgets
    char *csv_line = malloc(sizeof(char)*1500);

    //store all data
    //array of structs
    //struct.question->array of char*
    //struct.cat->char*
    //struct.example_num->int
    struct data *all_data;
    all_data = malloc(sizeof(struct data)*total_examples);
    for (ii=0; ii<total_examples; ii++){
    	all_data[ii].question = malloc(sizeof(char*)*num_words);
    	for (jj=0; jj<num_words; jj++){
    		// all_data[ii].question[jj] = malloc(sizeof(char)*max_word_len);
    		all_data[ii].question[jj] = calloc(max_word_len, sizeof(char));
    	}
    	all_data[ii].cat = malloc(sizeof(char)*max_word_len);
    }

    //store numeric version of data for algorithms
    struct numeric_data *num_data;
    num_data = malloc(sizeof(struct numeric_data)*total_examples);
    for (ii=0; ii<total_examples; ii++){
    	num_data[ii].array_of_features = malloc(sizeof(struct feature_count)*num_words);
    	for (jj=0; jj<num_words; jj++){
    		num_data[ii].array_of_features[jj].feature_num = 0;
    		num_data[ii].array_of_features[jj].count = 0;
    	}
    }

    //store struct which keep track of the k nearest neighbors
    // struct distance_results results;
    // results.example_num = 0;
    // results.distances = calloc(k, sizeof(double));
    // results.cat = calloc(k, sizeof(int));
    // results.example_nums = calloc(k, sizeof(int));

    // //struct used to calculate the mode of the k nearest neighbors
    // struct mode mod;
    // mod.count = calloc(k, sizeof(int));
    // mod.cat = calloc(k, sizeof(int));
 
    // //store vocabulary list (char** points to array of char* of length 20)
    // char **word_list;
    // word_list = malloc(sizeof(char*)*max_vocab); //assumes max_vocab total vocab
    // for (ii=0; ii<max_vocab; ii++){
    // 	// word_list[ii] = malloc(sizeof(char)*max_word_len);  //assumes max word length of 20
    // 	word_list[ii] = calloc(max_word_len, sizeof(char));  //assumes max word length of 20
    // }

    //alternate vocab store tree
    feature_tree *vocab;
    vocab = NULL;

    //store category list
    char **cat_list;
    cat_list = malloc(sizeof(char*)*40);  //assumes 20 max categories
    for (ii=0; ii<40; ii++){
    	cat_list[ii] = malloc(sizeof(char)*max_word_len);
		strncpy(cat_list[ii], "\0", 1);	
    }

    //Read in csv file
    FILE *f = fopen("train_pruned2.csv", "r");
    if (f == NULL){
    	printf("Failed to open file \n");
    	return -1;
    }

    //parse question into individual words, create vocabulary list
    int vocab_count = 0;
    int category_count = 1;

    for (i=0; i<total_examples; i++){
    	// printf("Iteration = %i\n", i);

    	//line in csv to buffer
    	if (fgets(csv_line, 1500, f) == NULL){
            printf("Fgets error!\n");
            exit(0);
        }

    	//csv line to 3 individual parts
    	if (i>0)
    	{

			char *tok;
			char *tok_copy; //problem with tok getting overwritten in parse_question
			// char **parsed_question = malloc(sizeof(char*)*num_words);

			// printf("CSV_LINE = %s\n", csv_line);

			tok = strtok(csv_line, "|");
			if (tok == NULL){
				// all_data[i-bad_iter-1].example_num = -1;
				bad_iter++;
				// i--;
				continue;
			}
			sscanf(tok, "%i", &all_data[i-bad_iter-1].example_num);

			

			tok = strtok(NULL, "|");
			if (tok == NULL){
				// all_data[i-bad_iter-1].example_num = -1;
				bad_iter++;
				// i--;
				continue;
			}
			tok_copy = (char *)tok;

			

			tok = strtok(NULL, "|");
			if (tok == NULL){
				// all_data[i-bad_iter-1].example_num = -1;
				bad_iter++;
				// i--;
				continue;
			}
			strncpy(all_data[i-bad_iter-1].cat, tok, 19);
			all_data[i-bad_iter-1].cat[max_word_len-1] = 0;


			char *tok2;
			tok2 = strtok(tok_copy, " \t");

			j = 0;
			if ((tok2 != NULL) && (strlen(tok2)>3)){
				strncpy(all_data[i-bad_iter-1].question[0], tok2, 19);
		    	all_data[i-bad_iter-1].question[0][max_word_len-1] = 0;

		    	//add to tree if not test data
		    	// if (all_data[i-bad_iter-1].example_num % train != 0){
		    	insert_word(&vocab, all_data[i-bad_iter-1].question[0]);
    			j += 1;
		    	// }
    			
			}

			while (tok2 != NULL){
				if (j>=num_words){
					break;
				}
				tok2 = strtok(NULL, " \t");
		        if ((tok2 != NULL) && (strlen(tok2)>3)){
		            strncpy(all_data[i-bad_iter-1].question[j], tok2, 19);
		            all_data[i-bad_iter-1].question[j][max_word_len-1] = 0;

		            //add to tree if not test data
		            // if (all_data[i-bad_iter-1].example_num % train != 0){
					insert_word(&vocab, all_data[i-bad_iter-1].question[j]);
					j++;
    				// }
		        }
				
			} //end while

    		// all_data[i-bad_iter-1] = instance;
    		// print_data(&all_data[i-bad_iter-1]);

    		////add to vocabulary (using array, VERY slow with lots of data)
    		// add_to_word_list(all_data[i-bad_iter-1].question, word_list, &vocab_count);
    		
    		//add to category list
    		add_to_cat_list(all_data[i-bad_iter-1].cat, cat_list, &category_count);

    	} //end if
    } //end for

    //close file
    fclose(f);

    //assign unique number to each feature
    //first feature is feature 1, feature 0 is for errors etc.
    unsigned int mm = 1;
    number_features(vocab, &mm);

    //Some of the csv rows aren't read in properly with fgets
    printf("Bad iterations = %i/%i\n", bad_iter, i);
    printf("Feature count = %i\n", count_features(vocab));
    // print_inorder(vocab);

    // for (ii=0; ii<40; ii++){
    // 	printf("%s", cat_list[ii]);
    // }

    ////turn data into numeric features////
    for (i=0; i<total_examples; i++){
    	num_data[i].example_num = all_data[i].example_num;
    	num_data[i].cat = get_cat_index(cat_list, all_data[i].cat);
    	words_to_num(&num_data[i], &all_data[i], &vocab, num_words);
        // count_features2(&num_data[i]);
    }

    // num_data->array_of_features[0].feature_num = 44;

    // print_num_data(&num_data[0]);
    // print_num_data(&num_data[1]);
    total_examples = total_examples-bad_iter-1;

    int sadfjh;
    double av_feature_count = 0;
    for (ii=0; ii<total_examples; ii++){
        sadfjh = count_features2(&num_data[ii]);
        av_feature_count += sadfjh;
        // printf("%i ", sadfjh);
    }
    // printf("\n av_feature_count %f\n", av_feature_count/(total_examples-bad_iter-1));
    
    
    // print_num_data(&num_data[4464]);

    // printf("vocab->right = %s \n", vocab->feature);
    // print_data(&all_data[0]);
    // print_data(&all_data[29000]);
    // printf("%s, %u\n", "1829", get_feature_number(&vocab, "1829"));



    //find the distance between first example and rest
    double distance;

    //range each process will cover
    int range;

    // printf("%i, %i\n", range, total_examples);
    // printf("R, Min, Max = %i, %i, %i\n", rank, rank*range, (rank+1)*range);

    //     struct distance_results results;
    // results.example_num = 0;
    // results.distances = calloc(k, sizeof(double));
    // results.cat = calloc(k, sizeof(int));
    // results.example_nums = calloc(k, sizeof(int));

    // //struct used to calculate the mode of the k nearest neighbors
    // struct mode mod;
    // mod.count = calloc(k, sizeof(int));
    // mod.cat = calloc(k, sizeof(int));


   	//correct/total/answer
   	int c = 0;
    int total = 0;
    int answer;

    omp_set_dynamic(0); //Explicitly disable dynamic teams
    omp_set_num_threads(num_threads); //Specify thread count

    #pragma omp parallel \
            private(kk, ii, distance, answer) \
            reduction(+:c,total) \
            shared(num_data)
    {
        //store struct which keep track of the k nearest neighbors
        struct distance_results results;
        results.example_num = 0;
        results.distances = calloc(k, sizeof(double));
        results.cat = calloc(k, sizeof(int));
        results.example_nums = calloc(k, sizeof(int));

        //struct used to calculate the mode of the k nearest neighbors
        struct mode mod;
        mod.count = calloc(k, sizeof(int));
        mod.cat = calloc(k, sizeof(int));
    
        #pragma omp for
        for (kk=0; kk<total_examples; kk++){
            // printf("Thread = %i, Iter = %i, c = %i, total=%i\n", omp_get_thread_num(), kk, c, total);

        	//only test on test data
        	if (num_data[kk].example_num%train != 0){
        		continue;
        	}

        	if (num_data[kk].cat == 0){
        		continue;
        	}

        	results.correct_answer = num_data[kk].cat;
        	results.example_num = num_data[kk].example_num;
        	for (ii=0; ii<k; ii++){
        		results.distances[ii] = 0;
        		results.cat[ii] = 0;
        		mod.count[ii] = 0;
        		mod.cat[ii] = 0;
        	}

        	// print_num_data(&num_data[kk]);

        	//calc distance to neighbors
        	for (ii=0; ii<total_examples-1; ii++){
        		//don't calc distance to self
        		if (kk != ii){
                    //Eliminate bad data (examples with few words tend to have low distances
                    //reguardless of whether they are more similar...
                    if (num_data[ii].total_features >= 40){
                        distance = get_distance(&num_data[kk], &num_data[ii], num_words);
                        // if (distance < 2){
                        //  continue;
                        // }
                        // printf("%f ", distance);
                        if (num_data[ii].example_num > 0){
                            add_distance_to_results(&results, distance, k, 
                                                    num_data[ii].cat, num_data[ii].example_num);
                        }
    		    	}
        		}
    	    	
    	    }

    	    answer = calc_nearest_neighbor(&results, &mod, k);
    	    if (answer == results.correct_answer){
    	    	c += 1;
    	    }
    	    // printf("\n");
    	    // for (ii=0; ii<k; ii++){
    	    // 	printf("Distance, cat, example_num1, example_num2 = %2.2f, %i, %i, %i\n", 
    	    // 		results.distances[ii], results.cat[ii], results.example_num, results.example_nums[ii]);
    	    // }
    	    // else{
    	    	
    	    // }
    	    total += 1;

            if (verbose>0 && debug>0){
                printf("Thread = %i, Correct/Total = %i/%i  Answer/Correct = %i/%i\n", 
                    omp_get_thread_num(), c, total, answer, results.correct_answer);
            }
    	    
        }

        //Thread results
        #pragma omp barrier
        if (omp_get_thread_num() == 0){
            printf("/// Thread Results ///\n");
        }
        #pragma omp barrier
        printf("Thread = %i, Correct/Total = %i/%i\n", 
                omp_get_thread_num(), c, total);

        //free distance result
        free(results.distances);
        free(results.cat); 

        //free mode struct
        free(mod.count);
        free(mod.cat);
    }

    printf("/// Final Results ///\n");
    printf("Correct/Total = %i/%i\n", c, total);
    // printf("verbose = %i", verbose);


    

    ////free malloc calls////
    //free feature tree
    free_feature_tree(vocab);

    //free numeric data
    for (ii=0; ii<total_examples; ii++){
    	free(num_data[ii].array_of_features);
    }
    free(num_data);

    // //free vocab list
    // for (ii=0; ii<max_vocab; ii++){
    //     free(word_list[ii]);  
    // }
    // free(word_list);

    //free category list
    for (ii=0; ii<40; ii++){
        free(cat_list[ii]);  
    }
    free(cat_list);

    //free all_data list
    for (ii=0; ii<total_examples; ii++){
        
        for (jj=0; jj<num_words; jj++){
            free(all_data[ii].question[jj]);
        }
        free(all_data[ii].question);
        free(all_data[ii].cat);
    }
    free(all_data);

    //free var used to rean in csv
    free(csv_line);

    
}