int main(){ // create a list and keep adding different words SinLL *wordList = CreateSinLL(); char *word; word = calloc(20, sizeof(char)); strcpy(word, "kitty"); // testing appendNewWordChain function appendNewWordChain(word, wordList); char *word2; word2 = calloc(20, sizeof(char)); strcpy(word2, "doggy"); appendNewWordChain(word2, wordList); char *word5; word5 = calloc(20, sizeof(char)); strcpy(word5, "bat"); // testing appendWord function appendWord(word5, wordList); char *word4; word4 = calloc(20, sizeof(char)); strcpy(word4, "cat"); // doing a few more adds appendNewWordChain(word4, wordList); char *word3; word3 = calloc(20, sizeof(char)); strcpy(word3, "turtle"); appendWord(word3, wordList); WordChainNode *printChain = wordList->head; WordChainNode *tempPrintChain; // printing the words in the list one by one while(printChain){ WordsLL *words = printChain->words; while(words){ printf("Word is: %s\n", (words->word)); free(words->word); words = words->nextWord; } printf("\n"); tempPrintChain = printChain; DeleteWordChainNode(tempPrintChain); printChain = printChain->nextWords; } // expected results printf("Words should be kitty doggy bat cat turtle."); free(wordList); return 1; }
void LinkedList::transmitMsg(string word){ //Transmit function. Enable to comments to see the flow during execution. City *head; head = first; head->msg = word; //cout << "DEBUG: Transmitted " << head->msg << " to " << head->name << endl; while(head->next != NULL){ head->next->msg = head->msg; head->msg = '\0'; head = head->next; ++operations; //cout << "DEBUG: Transmitted " << head->msg << " to " << head->name << endl; } //Have hit the end of the line... retransmit from last //cout << "************* TURNAROUND ***********" << endl; head = last; head->msg = word; while(head->prev != NULL){ head->prev->msg = head->msg; head->msg = '\0'; head = head->prev; ++operations; //cout << "DEBUG: RE-Transmitted " << head->msg << " to " << head->name << endl; } //Append to final sentence after passing through the last city appendWord(head->msg); }
text::text(const word& w) { appendWord(make_shared <word>(w)); }
void text::createFromString(const string& in, const charset& ch) { size_t asciiCount = 0; size_t asciiPercent = 0; removeAllWords(); // Check whether there is a recommended encoding for this charset. // If so, the whole buffer will be encoded. Else, the number of // 7-bit (ASCII) bytes in the input will be used to determine if // we need to encode the whole buffer. encoding recommendedEnc; const bool alwaysEncode = ch.getRecommendedEncoding(recommendedEnc); if (!alwaysEncode) { asciiCount = utility::stringUtils::countASCIIchars(in.begin(), in.end()); asciiPercent = (in.length() == 0 ? 100 : (100 * asciiCount) / in.length()); } // If there are "too much" non-ASCII chars, encode everything if (alwaysEncode || asciiPercent < 60) // less than 60% ASCII chars { appendWord(make_shared <word>(in, ch)); } // Else, only encode words which need it else { bool is8bit = false; // is the current word 8-bit? bool prevIs8bit = false; // is previous word 8-bit? unsigned int count = 0; // total number of words for (size_t end = in.size(), pos = 0, start = 0 ; ; ) { if (pos == end || parserHelpers::isSpace(in[pos])) { const string chunk(in.begin() + start, in.begin() + pos); if (pos != end) ++pos; if (is8bit) { if (count && prevIs8bit) { // No need to create a new encoded word, just append // the current word to the previous one. shared_ptr <word> w = getWordAt(getWordCount() - 1); w->getBuffer() += " " + chunk; } else { if (count) { shared_ptr <word> w = getWordAt(getWordCount() - 1); w->getBuffer() += ' '; } appendWord(make_shared <word>(chunk, ch)); prevIs8bit = true; ++count; } } else { if (count && !prevIs8bit) { shared_ptr <word> w = getWordAt(getWordCount() - 1); w->getBuffer() += " " + chunk; } else { appendWord(make_shared <word> (chunk, charset(charsets::US_ASCII))); prevIs8bit = false; ++count; } } if (pos == end) break; is8bit = false; start = pos; } else if (!parserHelpers::isAscii(in[pos])) { is8bit = true; ++pos; } else { ++pos; } } } }
text::text(const word& w) { appendWord(vmime::create <word>(w)); }
int main(int argc, char* argv[]){ int success; // contains 1 if removing from SinLL was successful int funcSuccess; int orNext; // contains > 0 if the next word in query should be ORed int firstAdd; // contains > 0 if the addition to SinLL is the first addition int tempChar; // used to flush the stdin for too long inputs char query[MAX_QUERY_LEN]; // contains string of query char *getsSuccess; // determines if EOF is met. int status = 1; SinLL *wordList; if(argc != 3){ // invalid number of arguments fprintf(stderr, ANSI_COLOR_RED "Usage: query [INDEXER OUTPUT FILE] [CRAWLER OUTPUT FILE DIRECTORY]" ANSI_COLOR_RESET "\n"); return 0; } if(!(access(argv[1], F_OK) != -1)){ // invalid file fprintf(stderr, ANSI_COLOR_RED "First argument is not a valid file." ANSI_COLOR_RESET "\n"); return 0; } if(!IsDir(argv[2])){ // invalid "directory" fprintf(stderr, ANSI_COLOR_RED "Second argument is not a directory." ANSI_COLOR_RESET "\n"); return 0; } HashTable *invertedIndex; invertedIndex = calloc(1, sizeof(HashTable)); if(!invertedIndex){ status = 0; goto cleanup; } funcSuccess = readFile(argv[1], invertedIndex); // recreate inverted index if(!funcSuccess){ status = 0; goto cleanup; } while(1){ // get the query from user fputs("QUERY> ", stdout); fflush(stdout); getsSuccess = fgets(query, sizeof(char)*MAX_QUERY_LEN, stdin); if(!getsSuccess) break; // EOF means exiting program // this means the user input more than MAX_QUERY_LEN characters to query if(getsSuccess[strlen(getsSuccess)-1] != '\n'){ fprintf(stderr, ANSI_COLOR_RED "Query length is over the maximum 1000 characters!" ANSI_COLOR_RESET "\n"); while((tempChar = getchar()) != '\n' && tempChar != EOF){ /*do nothing*/ } continue; } // at this stage, the next add is the first add, and we have not seen a // OR yet. orNext = 0; firstAdd = 1; wordList = CreateSinLL(); if(!wordList) break; char *wordP; wordP = strtok(query," "); // get all the words from the query while(wordP){ // last word in query will have a \n attached to it, so if // there is a \n at the end of a word, take that out if(wordP[strlen(wordP)-1] == '\n'){ wordP[strlen(wordP)-1] = 0; } // ignore ANDs. if(strcmp(AND, wordP) == 0){ wordP = strtok (NULL, " "); continue; } // ignore ORs but make sure you OR the next coming word. if(strcmp(OR, wordP) == 0){ orNext = 1; wordP = strtok (NULL, " "); continue; } // make word lowercase. If this word is the first one, or // the previous word was OR, make a new node in the SinLL // of WordChainList NormalizeWord(wordP); if(firstAdd){ funcSuccess = appendNewWordChain(wordP, wordList); if(!funcSuccess) break; firstAdd = 0; } else if(orNext){ funcSuccess = appendNewWordChain(wordP, wordList); if(!funcSuccess) break; orNext = 0; } // if not the previous two cases, just append the word to // current node. else{ appendWord(wordP, wordList); } wordP = strtok (NULL, " "); } // first process will AND all the words contained in each WordChainNodes // of the list. WordChainNode *curWordChain = wordList->head; while(curWordChain){ // while there are more nodes firstAdd = 1; DocNode *tempProcessDocNode; // contains original DocNodes to AND from index DocNode *processDocNode; // contains copied version of above. WordsLL *wordsProc = curWordChain->words; // gettng first set of words. while(wordsProc){ // while there are more words // get DocNodes associated with that word from the inverted index and // copy it as to not mess up the inverted index. tempProcessDocNode = DocsFromWordNode(wordsProc->word, invertedIndex); processDocNode = CopyDocs(tempProcessDocNode); // merge the above DocNodes with the DocNodes saved at the current // WordChainNode. DocMergedID(&processDocNode, &(curWordChain->docs)); // if it is the first add, we want to skip this step. If it isnt the // first add, and the above DocNodes with the ocNodes saved at the current // WordChainNode. if(!firstAdd){ ProcessAND(&processDocNode); } // Add the processed (ANDed) DocNode chain at the current // WordChainNode. AddDocNodeChain(curWordChain, processDocNode); // iterate through to the next word at the current node. wordsProc = wordsProc->nextWord; firstAdd = 0; } // move on to the next node. curWordChain = curWordChain->nextWords; } // now we OR each individual WordChainNodes' DocNode lists. curWordChain = wordList->head; DocNode *curDocs; DocNode *nextDocs; success = removeTopDoc(wordList, &curDocs); // gets the DocNode list from the first node // if there you fail here, it means that the list is empty/ if(success){ success = removeTopDoc(wordList, &nextDocs); // gets the next DocNode list from // the next WordChainNode while(success){ // if you fail here, there was only one WordChainNode in the list // process the DocNodes together by ORing them DocMergedID(&curDocs, &nextDocs); ProcessOR(&curDocs); // move on to the next DocNodes from the next WordChainNode. success = removeTopDoc(wordList, &nextDocs); } } // the list was empty, so found nothing. else{ printf("Found 0 pages\n"); continue; } // sort by the rank and print the results. SortByRank(&curDocs); PrintQueryResult(curDocs, argv[2]); free(wordList); // clean up for next query } cleanup: if(invertedIndex) DeleteHashTable(invertedIndex); // final clean up if(!status){ fprintf(stderr, ANSI_COLOR_RED "Failed inverted index building." ANSI_COLOR_RESET "\n"); return 0; } return 1; }