static void
parseName(const char *name, int16_t length) {
    int16_t start=0, limit, wordLength/*, prevStart=-1*/;
    Word *word;

    while(start<length) {
        /* skip any "noise" characters */
        limit=skipNoise(name, start, length);
        if(start<limit) {
            /*prevStart=-1;*/
            start=limit;
        }
        if(start==length) {
            break;
        }

        /* get a word and add it if it is longer than 1 */
        limit=getWord(name, start, length);
        wordLength=(int16_t)(limit-start);
        if(wordLength>1) {
            word=findWord(name+start, wordLength);
            if(word==NULL) {
                word=addWord(name+start, wordLength);
            }
            countWord(word);
        }

#if 0
        /*
         * if there was a word before this
         * (with no noise in between), then add the pair of words, too
         */
        if(prevStart!=-1) {
            wordLength=limit-prevStart;
            word=findWord(name+prevStart, wordLength);
            if(word==NULL) {
                word=addWord(name+prevStart, wordLength);
            }
            countWord(word);
        }
#endif

        /*prevStart=start;*/
        start=limit;
    }
}
Example #2
0
GHashTable *makeHistogram(gchar **str_array)
{
    GHashTable *map = makeNewMap();
    for (; NULL != *str_array; str_array++)
    {
        g_strstrip(*str_array);
        gchar *no_punc = removePunct(*str_array);
        GString *gstr = g_string_new(no_punc);
        if (gstr == NULL)
        {
            fprintf(stderr, "failled to create a new string key\n");
            exit(EXIT_FAILURE);
        }
        if ((gstr->str != NULL) && (gstr->len > 0))
        {
            countWord(map, gstr->str);
        }
    };
    return map;
}
Example #3
0
bool wordPattern(char* pattern, char* str) {
	bool isMatched = true;
	int i, index, len, shift=0, offset = 0;
	int pattern_num = 0;
	int p_size = strlen(pattern);
#ifdef DEBUG
	printf("p_size=%d\n", p_size);
#endif

	int num_word = countWord(str);
#ifdef DEBUG
	printf("num_word=%d\n", num_word);
#endif

	if (p_size != num_word)
		return false;

	char **HashTable = (char **) malloc(sizeof(char *) * HT_SIZE);
	if (HashTable != NULL) {
		for (i=0;i<HT_SIZE;i++) {
			HashTable[i] = malloc(sizeof(char) * WORD_LEN);
			if (HashTable[i] != NULL) {
				HashTable[i][0] = '\0';
			}
		}
	}

	int *pattern_list = (int *) malloc(sizeof(int) * HT_SIZE);
	if (pattern_list != NULL) {
		for (i=0;i<HT_SIZE;i++) {
			pattern_list[i] = -1;
		}
	}

	for (i=0;i<p_size;i++) {
		index = (int) (pattern[i] - 97);
#ifdef DEBUG
		printf("index=%d\n", index);
#endif

		if (i == p_size-1) {
			len = ((int) strlen(str)) - offset + 1;
		}
		else {
			char *pch = strchr(str+offset,' ');
			shift = (int) (pch-str+1);
			len = shift - offset;
		}
#ifdef DEBUG
		printf("offset=%d len=%d\n", offset, len);
#endif
			
		char *word = getWord(str, offset, len);
#ifdef DEBUG
		printf("word:%s\n", word);
#endif

		if (HashTable[index][0] == '\0') {
#ifdef DEBUG
			printf("HashTable[%d] is empty!Add %s to hash table\n", index, word);
#endif
			bool isUnique = checkPatternUnique(HashTable, word, pattern_list, pattern_num);
			if (isUnique == true) {
				strcpy(HashTable[index], word);
				pattern_list[pattern_num] = index;
				pattern_num++;
			}
			else {
#ifdef DEBUG
				printf("Word %s is not uniqle!\n", word);
#endif
				isMatched = false;
				break;
			}
		}
		else {
#ifdef DEBUG
			printf("HashTable[%d] is not empty!\n", index);
#endif
			if (strcmp(HashTable[index], word) == 0) {
#ifdef DEBUG
				printf("HashTable[%d]:%s is matched to word:%s!\n", index, HashTable[index], word);
#endif
			}
			else {
#ifdef DEBUG
				printf("HashTable[%d]:%s\tword:%s not matched!\n", index, HashTable[index], word);
#endif
				isMatched = false;
				break;
			}
		}

		offset = shift;
	}

	if (pattern_list != NULL)
		free(pattern_list);

	for (i=0;i<HT_SIZE;i++) {
		if (HashTable[i] != NULL) {
			free(HashTable[i]);
		}
	}

	if (HashTable != NULL)
		free(HashTable);

	return isMatched;
}