Esempio n. 1
0
int main(){

// create a list and keep adding different words
SinLL *wordList = CreateSinLL();

char *word;
word = calloc(20, sizeof(char));
strcpy(word, "kitty");

// testing appendNewWordChain function
appendNewWordChain(word, wordList);

char *word2;
word2 = calloc(20, sizeof(char));
strcpy(word2, "doggy");

appendNewWordChain(word2, wordList);

char *word5;
word5 = calloc(20, sizeof(char));
strcpy(word5, "bat");

// testing appendWord function
appendWord(word5, wordList);

char *word4;
word4 = calloc(20, sizeof(char));
strcpy(word4, "cat");

// doing a few more adds
appendNewWordChain(word4, wordList);

char *word3;
word3 = calloc(20, sizeof(char));
strcpy(word3, "turtle");

appendWord(word3, wordList);

WordChainNode *printChain = wordList->head; 
WordChainNode *tempPrintChain;

// printing the words in the list one by one
while(printChain){
	WordsLL *words = printChain->words;
	while(words){
		printf("Word is: %s\n", (words->word));
		free(words->word);
		words = words->nextWord;
	}
	printf("\n");
	tempPrintChain = printChain;
	DeleteWordChainNode(tempPrintChain);
	printChain = printChain->nextWords;
}
// expected results
printf("Words should be kitty  doggy bat  cat turtle.");
free(wordList);
return 1;
}
Esempio n. 2
0
void LinkedList::transmitMsg(string word){

	//Transmit function. Enable to comments to see the flow during execution.
	City *head;
	head = first;
	head->msg = word;
	//cout << "DEBUG: Transmitted " << head->msg << " to " << head->name << endl;
	while(head->next != NULL){
		head->next->msg = head->msg;
		head->msg = '\0';
		head = head->next;
		++operations;
		//cout << "DEBUG: Transmitted " << head->msg << " to " << head->name << endl;

	}
	//Have hit the end of the line... retransmit from last
	//cout << "************* TURNAROUND ***********" << endl;
	head = last;
	head->msg = word;

	while(head->prev != NULL){
		head->prev->msg = head->msg;
		head->msg = '\0';
		head = head->prev;
		++operations;
		//cout << "DEBUG: RE-Transmitted " << head->msg << " to " << head->name << endl;
	}
	//Append to final sentence after passing through the last city
	appendWord(head->msg);
}
Esempio n. 3
0
text::text(const word& w)
{
	appendWord(make_shared <word>(w));
}
Esempio n. 4
0
void text::createFromString(const string& in, const charset& ch)
{
	size_t asciiCount = 0;
	size_t asciiPercent = 0;

	removeAllWords();

	// Check whether there is a recommended encoding for this charset.
	// If so, the whole buffer will be encoded. Else, the number of
	// 7-bit (ASCII) bytes in the input will be used to determine if
	// we need to encode the whole buffer.
	encoding recommendedEnc;
	const bool alwaysEncode = ch.getRecommendedEncoding(recommendedEnc);

	if (!alwaysEncode)
	{
		asciiCount = utility::stringUtils::countASCIIchars(in.begin(), in.end());
		asciiPercent = (in.length() == 0 ? 100 : (100 * asciiCount) / in.length());
	}

	// If there are "too much" non-ASCII chars, encode everything
	if (alwaysEncode || asciiPercent < 60)  // less than 60% ASCII chars
	{
		appendWord(make_shared <word>(in, ch));
	}
	// Else, only encode words which need it
	else
	{
		bool is8bit = false;     // is the current word 8-bit?
		bool prevIs8bit = false; // is previous word 8-bit?
		unsigned int count = 0;  // total number of words

		for (size_t end = in.size(), pos = 0, start = 0 ; ; )
		{
			if (pos == end || parserHelpers::isSpace(in[pos]))
			{
				const string chunk(in.begin() + start, in.begin() + pos);

				if (pos != end)
					++pos;

				if (is8bit)
				{
					if (count && prevIs8bit)
					{
						// No need to create a new encoded word, just append
						// the current word to the previous one.
						shared_ptr <word> w = getWordAt(getWordCount() - 1);
						w->getBuffer() += " " + chunk;
					}
					else
					{
						if (count)
						{
							shared_ptr <word> w = getWordAt(getWordCount() - 1);
							w->getBuffer() += ' ';
						}

						appendWord(make_shared <word>(chunk, ch));

						prevIs8bit = true;
						++count;
					}
				}
				else
				{
					if (count && !prevIs8bit)
					{
						shared_ptr <word> w = getWordAt(getWordCount() - 1);
						w->getBuffer() += " " + chunk;
					}
					else
					{
						appendWord(make_shared <word>
							(chunk, charset(charsets::US_ASCII)));

						prevIs8bit = false;
						++count;
					}
				}

				if (pos == end)
					break;

				is8bit = false;
				start = pos;
			}
			else if (!parserHelpers::isAscii(in[pos]))
			{
				is8bit = true;
				++pos;
			}
			else
			{
				++pos;
			}
		}
	}
}
Esempio n. 5
0
text::text(const word& w)
{
	appendWord(vmime::create <word>(w));
}
Esempio n. 6
0
int main(int argc, char* argv[]){
	int success;			// contains 1 if removing from SinLL was successful
	int funcSuccess;
	int orNext;			// contains > 0 if the next word in query should be ORed
	int firstAdd;			// contains > 0 if the addition to SinLL is the first addition
	int tempChar;			// used to flush the stdin for too long inputs
	char query[MAX_QUERY_LEN];	// contains string of query
	char *getsSuccess;			// determines if EOF is met.
	int status = 1;
	SinLL *wordList;

	if(argc != 3){		// invalid number of arguments
		fprintf(stderr, ANSI_COLOR_RED "Usage: query [INDEXER OUTPUT FILE] [CRAWLER OUTPUT FILE DIRECTORY]"  ANSI_COLOR_RESET "\n");
		return 0;
	}

	if(!(access(argv[1], F_OK) != -1)){	// invalid file
		fprintf(stderr, ANSI_COLOR_RED "First argument is not a valid file."  ANSI_COLOR_RESET "\n");
                return 0;
	}	

	if(!IsDir(argv[2])){	// invalid "directory"
		fprintf(stderr, ANSI_COLOR_RED "Second argument is not a directory."  ANSI_COLOR_RESET "\n");
		return 0;
	}

	HashTable *invertedIndex;
	invertedIndex = calloc(1, sizeof(HashTable));

	if(!invertedIndex){
		status = 0;
		goto cleanup;
	}

	funcSuccess = readFile(argv[1], invertedIndex);	// recreate inverted index
	if(!funcSuccess){
		status = 0;
                goto cleanup;
	}

	while(1){
		// get the query from user
		fputs("QUERY> ", stdout);
  		fflush(stdout); 
		
		getsSuccess = fgets(query, sizeof(char)*MAX_QUERY_LEN, stdin);
		if(!getsSuccess) break;	// EOF means exiting program

		// this means the user input more than MAX_QUERY_LEN characters to query
		if(getsSuccess[strlen(getsSuccess)-1] != '\n'){
			fprintf(stderr, ANSI_COLOR_RED "Query length is over the maximum 1000 characters!"     ANSI_COLOR_RESET "\n");
			while((tempChar = getchar()) != '\n' && tempChar != EOF){
				/*do nothing*/
			}
			continue;
		}

		// at this stage, the next add is the first add, and we have not seen a 
		// OR yet.
		orNext = 0;
		firstAdd = 1;

		wordList = CreateSinLL();
		if(!wordList) break;

		char *wordP;  
		wordP = strtok(query," ");

		// get all the words from the query
		while(wordP){
			// last word in query will have a \n attached to it, so if 
			// there is a \n at the end of a word, take that out
			if(wordP[strlen(wordP)-1] == '\n'){
				wordP[strlen(wordP)-1] = 0;
			}

			// ignore ANDs.
			if(strcmp(AND, wordP) == 0){ 
				wordP = strtok (NULL, " ");
				continue;
			}
	
			// ignore ORs but make sure you OR the next coming word.
			if(strcmp(OR, wordP) == 0){
				orNext = 1;
				wordP = strtok (NULL, " ");
				continue;			
			}

			// make word lowercase. If this word is the first one, or 
			// the previous word was OR, make a new node in the SinLL 
			// of WordChainList
			NormalizeWord(wordP);
			if(firstAdd){
				funcSuccess = appendNewWordChain(wordP, wordList);
				if(!funcSuccess) break;
				firstAdd = 0;
        	        }
			else if(orNext){
				funcSuccess = appendNewWordChain(wordP, wordList);
				if(!funcSuccess) break;
				orNext = 0;
			}
			// if not the previous two cases, just append the word to 
			// current node.
			else{
				appendWord(wordP, wordList);
			}
			wordP = strtok (NULL, " ");
		}
		
		// first process will AND all the words contained in each WordChainNodes
		// of the list.
		WordChainNode *curWordChain = wordList->head;
		while(curWordChain){		// while there are more nodes
			firstAdd = 1;
			DocNode *tempProcessDocNode;	// contains original DocNodes to AND from index
                	DocNode *processDocNode;	// contains copied version of above.

			WordsLL *wordsProc = curWordChain->words;	// gettng first set of words.
			while(wordsProc){	// while there are more words
	
				// get DocNodes associated with that word from the inverted index and 
				// copy it as to not mess up the inverted index. 
				tempProcessDocNode = DocsFromWordNode(wordsProc->word, invertedIndex);	
				processDocNode = CopyDocs(tempProcessDocNode);
				
				// merge the above DocNodes with the DocNodes saved at the current
				// WordChainNode.
				DocMergedID(&processDocNode, &(curWordChain->docs));			

				// if it is the first add, we want to skip this step. If it isnt the 
				// first add, and the above DocNodes with the ocNodes saved at the current
				// WordChainNode.	
				if(!firstAdd){
					ProcessAND(&processDocNode);
				}

				// Add the processed (ANDed) DocNode chain at the current
				// WordChainNode.
				AddDocNodeChain(curWordChain, processDocNode);
			
				// iterate through to the next word at the current node.
				wordsProc = wordsProc->nextWord;
				firstAdd = 0;
			}
			// move on to the next node. 
			curWordChain = curWordChain->nextWords;
		}
		
		// now we OR each individual WordChainNodes' DocNode lists.
		curWordChain = wordList->head;
		DocNode *curDocs;
		DocNode *nextDocs;
		success = removeTopDoc(wordList, &curDocs);	// gets the DocNode list from the first node
		// if there you fail here, it means that the list is empty/
		if(success){
			success = removeTopDoc(wordList, &nextDocs);	// gets the next DocNode list from
									// the next WordChainNode
			while(success){		// if you fail here, there was only one WordChainNode in the list

				// process the DocNodes together by ORing them
				DocMergedID(&curDocs, &nextDocs);
				ProcessOR(&curDocs);
				// move on to the next DocNodes from the next WordChainNode.
				success = removeTopDoc(wordList, &nextDocs);
			}
		}
		// the list was empty, so found nothing. 
		else{
			printf("Found 0 pages\n"); 
			continue;
		}

		// sort by the rank and print the results.
		SortByRank(&curDocs);
		PrintQueryResult(curDocs, argv[2]);
		free(wordList);		// clean up for next query
	}
	cleanup:
		if(invertedIndex) DeleteHashTable(invertedIndex); 	// final clean up
		if(!status){ 
			fprintf(stderr, ANSI_COLOR_RED "Failed inverted index building."  ANSI_COLOR_RESET "\n");
			return 0;
		}
	return 1;
}