Esempio n. 1
0
// Parse a file from the starting to the end.
void parseFile(void) {
  // Where we are paring now?
  char * current;
  char * wordStart;
  int    insideWord = 0;

  current = fileStart;
 
  while(current <= fileEnd) {
    if(insideWord == 1) {
      // If we are inside a word, we only care about
      // the end of this word.
      if(*current == ' ' || current==fileEnd) {
        if(isGoodWord(wordStart, current-1)) {
          addWordToList(wordStart, current-1);
        }

        insideWord = 0;
        wordStart = NULL;
      }
    }
    else {
      if(*current != ' ') {
        wordStart = current;
        insideWord = 1;
      //  printf("wordStart %c\n", *current);
      } 
    }
      
    current++;
  }


}
Esempio n. 2
0
int main(int argc, char **argv){

	int fd;
	char *fname;
	struct stat finfo;
	char *fdata;
	char *fstart;
	char *fend;
	char *fcurrent;	
	validWords *list=NULL;
	if(argc<=2){
		if(argc==2){
			printf("Third argument is missing\n");
			exit(-1);
		} else {
			printf("File location not specified\n");
			exit(-1);
		}
	}
	
	//reading file name from command line argument
	fname=argv[1];

	//Open system call will open the file for you. It returns the file descripter
	fd=open(fname, O_RDONLY);
	if(fd<0){
		perror("Opening the file failed");
		exit(-1);
	}

	//initializing fstats with the statistics of the file to retrieve the file size
	if(fstat(fd, &finfo) < 0){
		perror("couldn't fetch the stats of the file");
		exit(-1);
	}
	
	//creating a private memory of size equal to size of the file and mapping it to the address space of this process
	fdata = (char *) mmap(0, finfo.st_size + 1, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
	if(fdata==MAP_FAILED){
		perror("couldn't map a memory of file content with process address space");
		exit(-1);
	}
	
	fstart=fdata;
	fend=fdata+finfo.st_size;

	fcurrent=fstart;
	
	//Reaching to the first valid word
	while((toupper(*fcurrent)<'A' || toupper(*fcurrent)>'Z') && *fcurrent!='-' && fcurrent<fend){
		fcurrent++;
	}
	char *wordStartChar=fcurrent;
	char *wordEndChar=NULL;
	int isGoodWord=1;
//	int count=0;
	char c;
	while(fcurrent<fend){
		c = toupper(*fcurrent);
		if((c<'A' || c>'Z') && c!='-' && c!=' ' && c!='\n'){
			isGoodWord=0;
		}else if(c==' ' || c=='\n'){
			if(isGoodWord){
				//add semaphore here
				list = addWordToList(wordStartChar, wordEndChar, list);
				//leaveSemaphore lock here
//				count++;		
			}
			while((c<'A' || c>'Z') && c!='-' && fcurrent<fend){
				fcurrent++;
				c = toupper(*fcurrent);
			}
			if(fcurrent>=fend){
				break;
			}else {
				wordStartChar=fcurrent;
			        wordEndChar=NULL;
			        isGoodWord=1;	
			}		
		}
		wordEndChar=fcurrent;		
		fcurrent++;
	}
//	printList(list);
	list=mergeSort(list);
//	printf("%d\n",count);
	while(list!=NULL){
		printf("%s\t%d\n",list->word,list->occurences);
		list=list->next;
	}
	
}
Esempio n. 3
0
int main()
{
    //Performance Calculation Variables
    clock_t begin, end;
    double processTime;
    /////////////////////////

    hashTable* hTable = NULL;
    wList* wordList = NULL;

	int i=0;
    char** fileNames = getCWDtxtFiles();
    FILE* file[NumOftxtFiles];

	for(i=0;i<NumOftxtFiles;++i)
    {
        file[i] = fopen(fileNames[i], "r");
    }

	FILE* out = fopen("Analysis/output.txt", "a");
    char lol[50] = {'\0'};

    begin = clock();

	for(i=0;i<NumOftxtFiles;++i)
    {
        while(!feof(file[i]))
        {
            fscanf(file[i], "%s", lol);
            trimWord(lol);
            if(lol[0] == '\0')
                continue;
            wordList = addWordToList(wordList, lol, i+1);
        }
    }
	storeBST();
	//Performance Calculation--
    end  = clock();
    processTime = (double)(end-begin)/CLOCKS_PER_SEC;
    //Performance Calculation--

	int numWords = countWords(wordList);
    hTable = createHashTable(numWords);
    addToTableFromList(hTable, wordList);

    //printHashTable(hTable);
    //printHashTableToFile(hTable, out);
    fclose(out);
	fclose(out);

	for(i=0;i<2;++i)
    {
        fclose(file[i]);
    }

	FILE* record = fopen("Analysis/record.txt", "a");
    int emptyCount = countEmpty(hTable);
	fprintf(record, "WordCount : %d Empty : %d ProcessTime : %f s\n", numWords, emptyCount, processTime);

	fclose(record);

    printf("\nNo in list = %d %d", countWords(wordList), numWords);
    int c = 0;
    for(i=0;i<numWords;i++){
        bucket* b = hTable->table[i];
        while(b != NULL){
            ++c;
            b = b->next;
        }
    }

	//Driver Loop
	char ans = 'y';
    while(ans == 'y'){
        printBannner(processTime);
        printf("\n\t"); printf("__________________________________________________________\n\n");
        printf("\t"); printf("                        Enter the Query                              \n\n");
        printf("\t\t\t\t"); scanf("%s",lol);
        printf("\t"); printf("__________________________________________________________\n");
        if(lol[strlen(lol)-1] == '*'){
            makeList(wordList, lol);
            char* word = NULL;
            while((word = getNextWord())!= NULL){
                printf("\n\t\t");printf("%s : \n", word);
                searchText(word, hTable, fileNames, NumOftxtFiles);
            }
        }
        else{
            trimWord(lol);
        	char* word = getCorrectWord(lol);
        	if(strcmp(word, lol) != 0)
        	{
				printf("\n\t\t\t");printf("Corrected Word : %s\n", word);
            	searchText(word, hTable, fileNames, NumOftxtFiles);
            	printf("\n\t\t\t");printf("Do you want to search for original word? y/n : ");
				char a = 'y';
				while(getchar() != '\n');
				scanf("%c", &a);
				if(a == 'y')
					searchText(lol, hTable, fileNames, NumOftxtFiles);
        	}
        	else
        		searchText(lol, hTable, fileNames, NumOftxtFiles);
        }

        printf("\n\n\t\t\t");printf("Search Again? y/n : ");
        while(getchar() != '\n');
        scanf("%c", &ans);
    }

    return 0;
}