Esempio n. 1
0
void encode () {
    codeCount = 1;
    Tree codeTrie;
    createT(&codeTrie);
    if (!Escaped){
        initializeTrie(&codeTrie);
    }
    int firstChar = getchar();
    while (firstChar != EOF) {
        firstChar = getLargestSequence(firstChar, &codeTrie);
    }
    freeTrie(&codeTrie);
}
Esempio n. 2
0
int main()
{
  Trie Root;
  Root =initializeTrie(Root);
  Root= addwords(Root ,"sairam");
  Root =addwords(Root,"surfeit");
  Root =addwords(Root,"hello");
  if(!searchWords(Root,"sairam"))
    printf("does not exist\n");
  else
    printf("it does\n");  
return 0;
}
Esempio n. 3
0
Trie addwords(Trie Root ,char *words)
{
   int t;
	if(words[0]=='\0')
	{
	}
	else
	{
	  t = words[0]-'a';
	  if(Root->edges[t]==NULL)
	        Root->edges[t] =initializeTrie(Root->edges[t]);
	  else	 
	   Root->edges[t] =addwords(Root->edges[t],++words);        	  
	}
	return Root;
}
void insert_i(Trie * node, const char * key, const char * val, int pos, int n, int idx) 
{
	if( pos == n) 
	{	
		/* Se achou o elemento */
		int size = strlen(val);

		/* Se ainda está nulo, aloca o espaço */
		if(node->elem == NULL) 
			node->elem = (char*) malloc(sizeof(char) * size);
		/* Copia o identificador para a posição correta */
		strcpy(node->elem, val);

		node->idx = idx;
	} else {
		/* Se o nó que precisa ser inserido está nulo, alocar */
		if(node->branch[mapChar(val[pos])] == NULL) {
			node->branch[mapChar(val[pos])] = (Trie*) malloc(sizeof(Trie));
			initializeTrie(node->branch[mapChar(val[pos])]);
		}

 		insert_i(node->branch[mapChar(val[pos])], key, val, pos+1, n, idx);
	}
}
Esempio n. 5
0
int main(){
//int indexer() {

	FILE *files,*fp,*conf;
	char temp[MAXIMUM],tempFileName[MAXIMUM],filesName[MAXIMUM],textFileName[MAXIMUM];
	int i,docId = 1;
	int fileType;
	int ret;
	int totalErrors = 0;

	initializeStopWordsArray();
	initializeTrie();

/*	conf = fopen("CONFIG","r");
	readLine(conf,filesName);
	fclose(conf);*/

	files = fopen("INDEX/files.txt","r");

	if(files == NULL){
		goto createStatusFile;
	}
	
	while(readLine(files,tempFileName) != FAIL) {
		printf("%d \n",docId);
		printf("%s\n",tempFileName);

		fileType = checkFileExtension(tempFileName);

		strcpy(textFileName,"temp.txt");
		if(fileType == PDF){
			ret = convertPdfToTxt(tempFileName,textFileName);
			if(ret != SUCCESS){
				++totalErrors;
				++docId;
				continue;
			}
		}
		else if(fileType == DOC){
			ret = convertDocToTxt(tempFileName,textFileName);
			if(ret != SUCCESS){
				++totalErrors;
				++docId;
				continue;
			}
		}
		else if(fileType == ODT){
			ret = convertOdtToTxt(tempFileName,textFileName);
			if(ret != SUCCESS){
				++totalErrors;
				++docId;
				continue;
			}
		}else if(fileType == TXT){
			strcpy(textFileName,tempFileName);
		}else if(fileType == 0){
			++totalErrors;
			++docId;
			continue;
		}

		fp = fopen(textFileName,"r");

		if(fp == NULL){
				++totalErrors;
			++docId;
			continue;
		}

		// 	Read each filename from the file 'files.txt'
		//	For each filename, read each word and add to index trie.

	
		while(readWord(fp,temp) != FAIL) {
			
			convertToLowerCase(temp);
			removeSpecialSymbols(temp);

//			printf("%s ",temp);

			if(temp[0] == '\0' || temp[0] == '\t' || temp[0] == ' ' || temp[0] == '\n')
				continue;

			if(checkForStopWords(temp) != SUCCESS) {
				int i = 0;

				// Presently on the words containing letters are allowed 
				while(temp[i] != '\0') {
					if(temp[i] < 97 || temp[i] > 122)
						break;
					++i;
				}
				if(temp[i] == '\0') {
					fileLen[docId]++;
//					printf("%d %s\n",docId,temp);
					addTerm(temp,docId);
				}

				//if(temp[0] >= 97 && temp[0]<=122) {
				//	addTerm(temp,docId);
				//}
				//printf("%s\n",temp);
			}
		}

		++docId;

		fclose(fp);
		if(fileType != TXT){
			system("rm temp.txt");
			system("touch temp.txt");
		}
		printf("...Done\n");
	}


	//printf("\nTrie Traversal\n");
	//traverseTerms();
	numOfDocs = docId - 1;

	printf("Number of docs : %d\n",numOfDocs);
	printf("Number of Errors : %d\n",totalErrors);

//	traverseTerms();
	createIndexFile();
	
	createStatusFile:
	system("echo \"Index Status\n==================\nIndex created on :\n\"  > INDEX/STATUS.txt");
	system("date >> INDEX/STATUS.txt");
	system("echo \"Directory indexed : \"#// >> INDEX/STATUS.txt");
	sprintf(temp,"echo \"No of Documents indexed : %d\" >> INDEX/STATUS.txt",numOfDocs);
	system(temp);

	return 0;
}
Esempio n. 6
0
void decode () {
    int possition = 0;
    int code = 9;
    Tree codeTrie;
    createT(&codeTrie);
    int codeCount = 1;
    int *outputString = malloc(sizeof(int));
    int *lastString = NULL;
    outputString[0] = -1;
    int * nextString = NULL;
    int bitsCount = 8;
    int getMaxBits = getBits(bitsCount);
    setMaxBits(getMaxBits);
    int Escaped = getBits(1);
    if (Escaped) {
        setEscaped();
    }
    else {
        initializeTrie(&codeTrie);
    }
    int shouldPrune = getBits(1);
    if (shouldPrune) {
        setShouldPrune();
    }
    while ((code = getBits(bitsCount)) != EOF) {
        if (code == 0 && Escaped) {
            int singleLetter = getBits(8);
            nextString = malloc(2 * sizeof(int));
            nextString[0] = singleLetter;
            nextString[1] = -1;
        }
        else {
            nextString = checkForNextCode(&codeTrie, code);
        }
        if (!nextString) {
            nextString = addLetterToStringNoFree(lastString, lastString[0]);
            
        }
        printArray(nextString);
        int *output = combine(outputString, nextString, possition);
        free(outputString);
        outputString = output;
        possition = addStringToTrie(&codeTrie, outputString, 0, &codeCount);
        int stringLength = arrayLen(nextString);
        if (lastString) {
            free(lastString);
            lastString = NULL;
        }
        lastString = malloc((arrayLen(nextString) + 1) * sizeof(int));
        for (int i = 0; nextString[i] != -1; i++) {
            lastString[i] = nextString[i];
        }
        lastString[stringLength] = -1;
        bitsCount = getBitsCount();
        free(nextString);
        nextString = NULL;
    }
    if (lastString) {
        free(lastString);
    }
    freeTrie(&codeTrie);
    free(outputString);
}