C++ (Cpp) StoreWord Exemples

Exemple #1

0

Afficher le fichier

Fichier : display.cpp Projet : zpzjzj/MipsBox

void CountTime(void *argv){
	time_t currTime;
	while(WaitForSingleObject(hRunMutex, 75L) == WAIT_TIMEOUT){
		currTime = time(&currTime);
		//printf("currTime: %X\n", currTime);
		WaitForSingleObject(hScreenMutex, INFINITE);

		size_t tmp = currTime & 0XFFFFFFFF;
		StoreWord(tmp, SYS_TIME_ADDR);
		tmp = currTime >> 32;
		StoreWord(tmp, SYS_TIME_ADDR+4);
		ReleaseMutex(hScreenMutex);
		ReleaseMutex(hRunMutex);
	}
}

Exemple #2

0

Afficher le fichier

Fichier : display.cpp Projet : zpzjzj/MipsBox

void IOKey(void *argv){
	reg_type receiver_control = LoadWord(RECEIVER_CONTROL);
	SetBit(&receiver_control, IO_INTERRUPT_ENABLE);
	while(!prog_finished && (WaitForSingleObject(hRunMutex, 75L) == WAIT_TIMEOUT)){
		if(kbhit()){
			char ch = getch();
		/*	for(size_t i = 0 ; i < 1000; i++)
				printf("Inside if kbhit() ch = %c\n", ch);*/
			StoreByte(ch, RECEIVER_DATA);
			
			//muxtex in StoreByte function
			if(bitState(cpo_reg[STATUS], INTERRUPT_ENABLE) && bitState(cpo_reg[STATUS], EXCEPTION_LEVEL)
				&& bitState(receiver_control, IO_INTERRUPT_ENABLE)){
				SetCause(INTERRUPT);
				SetBit(&cpo_reg[CAUSE], 0XD);//set pending bit
				/*need to rectify perhaps*/
				SetBit(&receiver_control, IO_READY);
				StoreWord(receiver_control, RECEIVER_CONTROL);
			/*	for(size_t i = 0 ; i < 1000; i++)
					printf("Inside inside of kbhit() set state cause: %X\n", cpo_reg[CAUSE]);*/
			}//end of if
		}//end of if kbhit
		ReleaseMutex(hRunMutex);//unlock
	}//end of while prog_finished
}

Exemple #3

0

Afficher le fichier

Fichier : spellcheck.cpp Projet : thirupathibandam/ChatScript

static int SplitWord(char* word)
{
	WORDP D2;
	bool good;
	int breakAt = 0;
	if (IsDigit(*word))
    {
		while (IsDigit(word[++breakAt]) || word[breakAt] == '.'){;} //   find end of number
        if (word[breakAt]) // found end of number
		{
			D2 = FindWord(word+breakAt,0,PRIMARY_CASE_ALLOWED);
			if (D2)
			{
				good = (D2->properties & (PART_OF_SPEECH|FOREIGN_WORD)) != 0 || (D2->internalBits & HAS_SUBSTITUTE) != 0; 
				if (good && (D2->systemFlags & AGE_LEARNED))// must be common words we find
				{
					char number[MAX_WORD_SIZE];
					strncpy(number,word,breakAt);
					number[breakAt] = 0;
					StoreWord(number,ADJECTIVE|NOUN|ADJECTIVE_NUMBER|NOUN_NUMBER); 
					return breakAt; // split here
				}
			}
		}
    }

	//  try all combinations of breaking the word into two known words
	breakAt = 0;
	size_t len = strlen(word);
    for (unsigned int k = 1; k < len-1; ++k)
    {
        if (!stricmp(language,"english") && k == 1 && *word != 'a' && *word != 'A' && *word != 'i' && *word != 'I') continue; //   only a and i are allowed single-letter words
        else if (!stricmp(language,"french") && k == 1 && *word != 'y' && *word != 'a' && *word != 'A' && !SameUTF(word,"à") && !SameUTF(word, "À") && !SameUTF(word, "ô") && !SameUTF(word,"Ô")) continue; //   in french only y, a and ô are allowed single-letter words
		WORDP D1 = FindWord(word,k,PRIMARY_CASE_ALLOWED);
        if (!D1) continue;
		good = (D1->properties & (PART_OF_SPEECH|FOREIGN_WORD)) != 0 || (D1->internalBits & HAS_SUBSTITUTE) != 0; 
		if (!good || !(D1->systemFlags & AGE_LEARNED)) continue; // must be normal common words we find

        D2 = FindWord(word+k,len-k,PRIMARY_CASE_ALLOWED);
        if (!D2) continue;
        good = (D2->properties & (PART_OF_SPEECH|FOREIGN_WORD)) != 0 || (D2->internalBits & HAS_SUBSTITUTE) != 0;
		if (!good || !(D2->systemFlags & AGE_LEARNED) ) continue; // must be normal common words we find

        if (!breakAt) breakAt = k; // found a split
		else // found multiple places to split... dont know what to do
        {
           breakAt = -1; 
           break;
		}
    }
	return breakAt;
}

Exemple #4

0

Afficher le fichier

Fichier : spellcheck.cpp Projet : geckom/ChatScript

static char* SpellCheck(unsigned int i)
{
    //   on entry we will have passed over words which are KnownWord (including bases) or isInitialWord (all initials)
    //   wordstarts from 1 ... wordCount is the incoming sentence words (original). We are processing the ith word here.
    char* word = wordStarts[i];
	if (!*word) return NULL;
	if (!stricmp(word,loginID) || !stricmp(word,computerID)) return word; //   dont change his/our name ever

	size_t len = strlen(word);
	if (len > 2 && word[len-2] == '\'') return word;	// dont do anything with ' words

    //   test for run togetherness like "talkabout fingers"
    int breakAt = SplitWord(word);
    if (breakAt > 0)//   we found a split, insert 2nd word into word stream
    {
        ++wordCount;
		memmove(wordStarts+i+1,wordStarts+i,sizeof(char*) * (wordCount-i)); // open up a slot for a new word
        wordStarts[i+1] = reuseAllocation(wordStarts[i+1],wordStarts[i]+breakAt); // set this to the second word (shared from within 1st word)
        return FindWord(wordStarts[i],breakAt,PRIMARY_CASE_ALLOWED)->word; //   1st word gets replaced, we added valid word after
    }

	// now imagine partial runtogetherness, like "talkab out fingers"
	if (i < wordCount)
	{
		char tmp[MAX_WORD_SIZE];
		strcpy(tmp,word);
		strcat(tmp,wordStarts[i+1]);
		breakAt = SplitWord(tmp);
		if (breakAt > 0) // replace words with the dual pair
		{
			wordStarts[i+1] = reuseAllocation(wordStarts[i+1],StoreWord(tmp+breakAt)->word); // set this to the second word (shared from within 1st word)
			return FindWord(tmp,breakAt,PRIMARY_CASE_ALLOWED)->word; // 1st word gets replaced, we added valid word after
		}
	}

    //   remove any nondigit characters repeated more than once. Dont do this earlier, we want substitutions to have a chance at it first.  ammmmmmazing
	static char word1[MAX_WORD_SIZE];
    char* ptr = word-1; 
	char* ptr1 = word1;
    while (*++ptr)
    {
	   *ptr1 = *ptr;
	   while (ptr[1] == *ptr1 && ptr[2] == *ptr1 && (*ptr1 < '0' || *ptr1 > '9')) ++ptr; // skip double repeats
	   ++ptr1;
    }
	*ptr1 = 0;
	if (FindCanonical(word1,0,true) && !IsUpperCase(*word1)) return word1; // this is a different form of a canonical word so its ok

	//   now use word spell checker 
    char* d = SpellFix(word,i,PART_OF_SPEECH); 
    return (d) ? d : NULL;
}

Exemple #5

0

Afficher le fichier

Fichier : spellcheck.cpp Projet : Sreeram-ganesan/ChatScript

bool SpellCheckSentence()
{
	WORDP D,E;
	fixedSpell = false;
	bool lowercase = false;
	int language = ENGLISH;
	char* lang = GetUserVariable((char*)"$cs_language");
	if (lang && !stricmp(lang,(char*)"spanish")) language = SPANISH;
	
	// check for all uppercase
	for (int i = FindOOBEnd(1) + 1; i <= wordCount; ++i) // skip start of sentence
	{
		char* word = wordStarts[i];
		size_t len = strlen(word);
		for (int j = 0; j < (int)len; ++j) 
		{
			if (IsLowerCase(word[j])) 
			{
				lowercase = true;
				i = j = 1000;
			}
		}
	}
	if (!lowercase && wordCount > 2) // must have several words in uppercase
	{
		for (int i = FindOOBEnd(1); i <= wordCount; ++i)
		{
			char* word = wordStarts[i];
			MakeLowerCase(word);
		}
	}

	int startWord = FindOOBEnd(1);
	for (int i = startWord; i <= wordCount; ++i)
	{
		char* word = wordStarts[i];
		if (!word || !word[1] || *word == '"' ) continue; // illegal or single char or quoted thingy 
		size_t len = strlen(word);

		// dont spell check uppercase not at start or joined word
		if (IsUpperCase(word[0]) && (i != startWord || strchr(word,'_')) && tokenControl & NO_PROPER_SPELLCHECK) continue; 
		//  dont  spell check email or other things with @ or . in them
		if (strchr(word,'@') || strchr(word,'.') || strchr(word,'$')) continue;

		// dont spell check names of json objects or arrays
		if (!strnicmp(word,"ja-",3) || !strnicmp(word,"jo-",3)) continue;

		char* known = ProbableKnownWord(word);
		if (known && !strcmp(known,word)) continue;	 // we know it
		if (known && strcmp(known,word)) 
		{
			char* tokens[2];
			if (!IsUpperCase(*known)) // revised the word to lower case (avoid to upper case like "fields" to "Fields"
			{
				WORDP D = FindWord(known,0,LOWERCASE_LOOKUP);
				if (D) 
				{
					tokens[1] = D->word;
					ReplaceWords(i,1,1,tokens);
					fixedSpell = true;
					continue;
				}
			}
			else // is uppercase a concept member? then revise upwards
			{
				WORDP D = FindWord(known,0,UPPERCASE_LOOKUP);
				if (IsConceptMember(D))
				{
					tokens[1] = D->word;
					ReplaceWords(i,1,1,tokens);
					fixedSpell = true;		
					continue;
				}
			}
		}

		char* p = word -1;
		unsigned char c;
		char* hyphen = 0;
		while ((c = *++p) != 0)
		{ 
			++len;
			if (c == '-') hyphen = p; // note is hyphenated - use trailing
		}
		if (len == 0 || GetTemperatureLetter(word)) continue;	// bad ignore utf word or llegal length - also no composite words
		if (c && c != '@' && c != '.') // illegal word character
		{
			if (IsDigit(word[0]) || len == 1){;} // probable numeric?
			// accidental junk on end of word we do know immedately?
			else if (i > 1 && !IsAlphaUTF8OrDigit(wordStarts[i][len-1]) )
			{
				WORDP entry,canonical;
				char word[MAX_WORD_SIZE];
				strcpy(word,wordStarts[i]);
				word[len-1] = 0;
				uint64 sysflags = 0;
				uint64 cansysflags = 0;
				WORDP revise;
				GetPosData(i,word,revise,entry,canonical,sysflags,cansysflags,true,true); // dont create a non-existent word
				if (entry && entry->properties & PART_OF_SPEECH)
				{
					wordStarts[i] = reuseAllocation(wordStarts[i],entry->word);
					fixedSpell = true;
					continue;	// not a legal word character, leave it alone
				}
			}
		}

		// see if we know the other case
		if (!(tokenControl & (ONLY_LOWERCASE|STRICT_CASING)) || (i == startSentence && !(tokenControl & ONLY_LOWERCASE)))
		{
			WORDP E = FindWord(word,0,SECONDARY_CASE_ALLOWED);
			bool useAlternateCase = false;
			if (E && E->systemFlags & PATTERN_WORD) useAlternateCase = true;
			if (E && E->properties & (PART_OF_SPEECH|FOREIGN_WORD))
			{
				// if the word we find is UPPER case, and this might be a lower case noun plural, don't change case.
				size_t len = strlen(word);
				if (word[len-1] == 's' ) 
				{
					WORDP F = FindWord(word,len-1);
					if (!F || !(F->properties & (PART_OF_SPEECH|FOREIGN_WORD))) useAlternateCase = true;
					else continue;
				}
				else useAlternateCase = true;
			}
			else if (E) // does it have a member concept fact
			{
				if (IsConceptMember(E)) 
				{
					useAlternateCase = true;
					break;
				}
			}
			if (useAlternateCase)
			{
				char* tokens[2];
				tokens[1] = E->word;
				ReplaceWords(i,1,1,tokens);
				fixedSpell = true;
				continue;	
			}
		}
		
		// merge with next token?
		char join[MAX_WORD_SIZE * 3];
		if (i != wordCount && *wordStarts[i+1] != '"' )
		{
			// direct merge as a single word
			strcpy(join,word);
			strcat(join,wordStarts[i+1]);
			WORDP D = FindWord(join,0,(tokenControl & ONLY_LOWERCASE) ?  PRIMARY_CASE_ALLOWED : STANDARD_LOOKUP);

			strcpy(join,word);
			if (!D || !(D->properties & PART_OF_SPEECH) ) // merge these two, except "going to" or wordnet composites of normal words  // merge as a compound word
			{
				strcat(join,(char*)"_");
				strcat(join,wordStarts[i+1]);
				D = FindWord(join,0,(tokenControl & ONLY_LOWERCASE) ?  PRIMARY_CASE_ALLOWED : STANDARD_LOOKUP);
			}

			if (D && D->properties & PART_OF_SPEECH && !(D->properties & AUX_VERB)) // merge these two, except "going to" or wordnet composites of normal words
			{
				WORDP P1 = FindWord(word,0,LOWERCASE_LOOKUP);
				WORDP P2 = FindWord(wordStarts[i+1],0,LOWERCASE_LOOKUP);
				if (!P1 || !P2 || !(P1->properties & PART_OF_SPEECH) || !(P2->properties & PART_OF_SPEECH)) 
				{
					char* tokens[2];
					tokens[1] = D->word;
					ReplaceWords(i,2,1,tokens);
					fixedSpell = true;
					continue;
				}
			}
		}   

		// break apart slashed pair like eat/feed
		char* slash = strchr(word,'/');
		if (slash && slash != word && slash[1]) //   break apart word/word
		{
			if ((wordCount + 2 ) >= REAL_SENTENCE_LIMIT) continue;	// no room
			*slash = 0;
			D = StoreWord(word);
			*slash = '/';
			E = StoreWord(slash+1);
			char* tokens[4];
			tokens[1] = D->word;
			tokens[2] = "/";
			tokens[3] = E->word;
			ReplaceWords(i,1,3,tokens);
			fixedSpell = true;
			--i;
			continue;
		}

		// see if hypenated word should be separate or joined (ignore obvious adjective suffix)
		if (hyphen &&  !stricmp(hyphen,(char*)"-like"))
		{
			StoreWord(word,ADJECTIVE_NORMAL|ADJECTIVE); // accept it as a word
			continue;
		}
		else if (hyphen && (hyphen-word) > 1)
		{
			char test[MAX_WORD_SIZE];
			char first[MAX_WORD_SIZE];

			// test for split
			*hyphen = 0;
			strcpy(test,hyphen+1);
			strcpy(first,word);
			*hyphen = '-';

			WORDP E = FindWord(test,0,LOWERCASE_LOOKUP);
			WORDP D = FindWord(first,0,LOWERCASE_LOOKUP);
			if (*first == 0) 
			{
				wordStarts[i] = AllocateString(wordStarts[i] + 1); // -pieces  want to lose the leading hypen  (2-pieces)
				fixedSpell = true;
			}
			else if (D && E) //   1st word gets replaced, we added another word after
			{
				if ((wordCount + 1 ) >= REAL_SENTENCE_LIMIT) continue;	// no room
				char* tokens[3];
				tokens[1] = D->word;
				tokens[2] = E->word;
				ReplaceWords(i,1,2,tokens);
				fixedSpell = true;
				--i;
			}
			else if (!stricmp(test,(char*)"old") || !stricmp(test,(char*)"olds")) //   break apart 5-year-old
			{
				if ((wordCount + 1 ) >= REAL_SENTENCE_LIMIT) continue;	// no room
				D = StoreWord(first);
				E = StoreWord(test);
				char* tokens[3];
				tokens[1] = D->word;
				tokens[2] = E->word;
				ReplaceWords(i,1,2,tokens);
				fixedSpell = true;
				--i;
			}
			else // remove hyphen entirely?
			{
				strcpy(test,first);
				strcat(test,hyphen+1);
				D = FindWord(test,0,(tokenControl & ONLY_LOWERCASE) ?  PRIMARY_CASE_ALLOWED : STANDARD_LOOKUP);
				if (D) 
				{
					wordStarts[i] = D->word;
					fixedSpell = true;
					--i;
				}
			}
			continue; // ignore hypenated errors that we couldnt solve, because no one mistypes a hypen
		}
		
		// leave uppercase in first position if not adjusted yet... but check for lower case spell error
		if (IsUpperCase(word[0])  && tokenControl & NO_PROPER_SPELLCHECK) 
		{
			char lower[MAX_WORD_SIZE];
			MakeLowerCopy(lower,word);
			WORDP D = FindWord(lower,0,LOWERCASE_LOOKUP);
			if (!D && i == startWord)
			{
				char* okword = SpellFix(lower,i,PART_OF_SPEECH,language); 
				if (okword)
				{
					char* tokens[2];
					WORDP E = StoreWord(okword);
					tokens[1] = E->word;
					ReplaceWords(i,1,1,tokens);
					fixedSpell = true;
				}
			}
			continue; 
		}

		if (*word != '\'' && (!FindCanonical(word, i,true) || IsUpperCase(word[0]))) // dont check quoted or findable words unless they are capitalized
		{
			word = SpellCheck(i,language);

			// dont spell check proper names to improper, if word before or after is lower case originally
			if (word && i != 1 && originalCapState[i] && !IsUpperCase(*word))
			{
				if (!originalCapState[i-1]) return false;
				else if (i != wordCount && !originalCapState[i+1]) return false;
			}

			if (word && !*word) // performed substitution on prior word, restart this one
			{
				fixedSpell = true;
				--i;
				continue;
			}
			if (word) 
			{
				char* tokens[2];
				tokens[1] = word;
				ReplaceWords(i,1,1,tokens);
				fixedSpell = true;
				continue;
			}
		}
    }
	return fixedSpell;
}

Exemple #6

0

Afficher le fichier

Fichier : spellcheck.cpp Projet : Sreeram-ganesan/ChatScript

char* ProbableKnownWord(char* word)
{
	if (strchr(word,' ') || strchr(word,'_')) return word; // not user input, is synthesized
	size_t len = strlen(word);

	// do we know the word as is?
	WORDP D = FindWord(word,0,PRIMARY_CASE_ALLOWED);
	if (D) 
	{
		if (D->properties & FOREIGN_WORD || *D->word == '~' || D->systemFlags & PATTERN_WORD) return D->word;	// we know this word clearly or its a concept set ref emotion
		if (D->properties & PART_OF_SPEECH && !IS_NEW_WORD(D)) return D->word; // old word we know
		if (IsConceptMember(D)) return D->word;
		// are there facts using this word? -- issue with facts because on seeing input second time, having made facts of original, we see original
//		if (GetSubjectNondeadHead(D) || GetObjectNondeadHead(D) || GetVerbNondeadHead(D)) return D->word;
	}
	
	char lower[MAX_WORD_SIZE];
	MakeLowerCopy(lower,word);

	// do we know the word in lower case?
	D = FindWord(word,0,LOWERCASE_LOOKUP);
	if (D) // direct recognition
	{
		if (D->properties & FOREIGN_WORD || *D->word == '~' || D->systemFlags & PATTERN_WORD) return D->word;	// we know this word clearly or its a concept set ref emotion
		if (D->properties & PART_OF_SPEECH && !IS_NEW_WORD(D)) return D->word; // old word we know
		if (IsConceptMember(D)) return D->word;

		// are there facts using this word?
//		if (GetSubjectNondeadHead(D) || GetObjectNondeadHead(D) || GetVerbNondeadHead(D)) return D->word;
	}

	// do we know the word in upper case?
	char upper[MAX_WORD_SIZE];
	MakeLowerCopy(upper,word);
	upper[0] = GetUppercaseData(upper[0]);
	D = FindWord(upper,0,UPPERCASE_LOOKUP);
	if (D) // direct recognition
	{
		if (D->properties & FOREIGN_WORD || *D->word == '~' || D->systemFlags & PATTERN_WORD) return D->word;	// we know this word clearly or its a concept set ref emotion
		if (D->properties & PART_OF_SPEECH && !IS_NEW_WORD(D)) return D->word; // old word we know
		if (IsConceptMember(D)) return D->word;

	// are there facts using this word?
//		if (GetSubjectNondeadHead(D) || GetObjectNondeadHead(D) || GetVerbNondeadHead(D)) return D->word;
	}

	// interpolate to lower case words 
	uint64 expectedBase = 0;
	if (ProbableAdjective(word,len,expectedBase) && expectedBase) return word;
	expectedBase = 0;
	if (ProbableAdverb(word,len,expectedBase) && expectedBase) return word;
	// is it a verb form
	char* verb = GetInfinitive(lower,true); // no new verbs
	if (verb) 
	{
		WORDP D =  StoreWord(lower,0); // verb form recognized
		return D->word;
	}
	
	// is it simple plural of a noun?
	if (word[len-1] == 's') 
	{
		WORDP E = FindWord(lower,len-1,LOWERCASE_LOOKUP);
		if (E && E->properties & NOUN) 
		{
			E = StoreWord(word,NOUN|NOUN_PLURAL);
			return E->word;	
		}
		E = FindWord(lower,len-1,UPPERCASE_LOOKUP);
		if (E && E->properties & NOUN) 
		{
			*word = toUppercaseData[*word];
			E = StoreWord(word,NOUN|NOUN_PROPER_PLURAL);
			return E->word;	
		}
	}

	return NULL;
}

Exemple #7

0

Afficher le fichier

Fichier : spellcheck.cpp Projet : thirupathibandam/ChatScript

bool SpellCheckSentence()
{
	WORDP D,E;
	fixedSpell = false;
	bool lowercase = false;
	
	// check for all uppercase (capslock)
	for (int i = FindOOBEnd(1); i <= wordCount; ++i) // skip start of sentence
	{
		char* word = wordStarts[i];
		if (!word[1]) continue; // autoconversion of letters to lower case should be ignored (eg A)
		if (!stricmp(word, "the")) continue;
		size_t len = strlen(word);
		for (int j = 0; j < (int)len; ++j) 
		{
			if (IsLowerCase(word[j])) 
			{
				lowercase = true;
				i = j = len+1000; // len might be BIG (oob data) so make sure beyond it)
			}
		}
	}

	if (!lowercase && wordCount > 2) // must have multiple words all in uppercase
	{
		for (int i = FindOOBEnd(1); i <= wordCount; ++i)
		{
			char* word = wordStarts[i];
			char myword[MAX_WORD_SIZE];
			MakeLowerCopy(myword,word);
			if (strcmp(word, myword))
			{
				char* tokens[2];
				tokens[1] = myword;
				ReplaceWords("caplocWord", i, 1, 1, tokens);
				originalCapState[i] = false;
			}
		}
	}

	int startWord = FindOOBEnd(1);
	for (int i = startWord; i <= wordCount; ++i)
	{
		char* word = wordStarts[i];
		char* tokens[2];

		// change any \ to /
		char newword[MAX_WORD_SIZE];
		bool altered = false;
		if (strlen(word) < MAX_WORD_SIZE)
		{
			strcpy(newword, word);
			char* at = newword;
			while ((at = strchr(at,'\\')))
			{
				*at = '/';
				altered = true;
			}
			if (altered) word = wordStarts[i] = StoreWord(newword, AS_IS)->word;
		}

		if (*word == '\'' && !word[1] && i != startWord && IsDigit(*wordStarts[i - 1]) && !stricmp(language, "english")) // fails if not digit bug
		{
			tokens[1] = (char*)"foot";
			ReplaceWords("' as feet", i, 1, 1, tokens);
			fixedSpell = true;
			continue;
		}
		if (*word == '"' && !word[1] && i != startWord && IsDigit(*wordStarts[i - 1]) && !stricmp(language, "english")) // fails if not digit bug
		{
			tokens[1] = (char*)"inch";
			ReplaceWords("' as feet", i, 1, 1, tokens);
			fixedSpell = true;
			continue;
		}
		if (!word || !word[1] || *word == '"' ) continue; // illegal or single char or quoted thingy 
		size_t len = strlen(word);

		// dont spell check uppercase not at start or joined word
		if (IsUpperCase(word[0]) && (i != startWord || strchr(word,'_')) && tokenControl & NO_PROPER_SPELLCHECK) continue; 
		//  dont  spell check email or other things with @ or . in them
		if (strchr(word,'@') || strchr(word, '&')  || strchr(word,'.') || strchr(word,'$')) continue;

		// dont spell check names of json objects or arrays
		if (!strnicmp(word,"ja-",3) || !strnicmp(word,"jo-",3)) continue;

		// dont spell check web addresses
		if (!strnicmp(word,"http",4) || !strnicmp(word,"www",3)) continue;

		// nor fractions
		if (IsFraction(word))  continue; // fraction?

		// joined number words  like 100dollars
		char* at = word - 1;
		while (IsDigit(*++at) || *at == numberPeriod);
		if (IsDigit(*word) && strlen(at) > 3 && ProbableKnownWord(at))
		{
			char first[MAX_WORD_SIZE];
			strncpy(first, word, (at - word));
			first[at - word] = 0;
			char* tokens[3];
			tokens[1] = first;
			tokens[2] = at;
			ReplaceWords("joined number word", i, 1, 2, tokens);
			continue;
		}

		// nor model numbers
		if (IsModelNumber(word))
		{
			WORDP X = FindWord(word, 0, UPPERCASE_LOOKUP);
			if (IsConceptMember(X) && !strcmp(word,X->word))
			{
				char* tokens[2];
				tokens[1] = X->word;
				ReplaceWords("KnownUpperModelNumber", i, 1, 1, tokens);
				fixedSpell = true;
			}
			continue;
		}

		char* number;
		if (GetCurrency((unsigned char*)word, number)) continue; // currency

		if (!stricmp(word, (char*)"am") && i != startWord && 
			(IsDigit(*wordStarts[i-1]) || IsNumber(wordStarts[i-1]) ==REAL_NUMBER) && !stricmp(language,"english")) // fails if not digit bug
		{
			char* tokens[2];
			tokens[1] = (char*)"a.m.";
			ReplaceWords("am as time", i, 1, 1, tokens);
			fixedSpell = true;
			continue;
		}

		char* known = ProbableKnownWord(word);
		if (known && !strcmp(known,word)) continue;	 // we know it
		if (known && strcmp(known,word)) 
		{
			WORDP D = FindWord(known);
			char* tokens[2];
			if ((!D || !(D->internalBits & UPPERCASE_HASH)) && !IsUpperCase(*known)) // revised the word to lower case (avoid to upper case like "fields" to "Fields"
			{
				WORDP X = FindWord(known,0,LOWERCASE_LOOKUP);
				if (X) 
				{
					tokens[1] = X->word;
					ReplaceWords("KnownWord",i,1,1,tokens);
					fixedSpell = true;
					continue;
				}
			}
			else // is uppercase a concept member? then revise upwards
			{
				WORDP X = FindWord(known,0,UPPERCASE_LOOKUP);
				if (IsConceptMember(X) || stricmp(language,"english")) // all german nouns are uppercase
				{
					tokens[1] = X->word;
					ReplaceWords("KnownUpper",i,1,1,tokens);
					fixedSpell = true;		
					continue;
				}
			}
		}

		char* p = word -1;
		unsigned char c;
		char* hyphen = 0;
		while ((c = *++p) != 0)
		{ 
			++len;
			if (c == '-') hyphen = p; // note is hyphenated - use trailing
		}
		if (len == 0 || GetTemperatureLetter(word)) continue;	// bad ignore utf word or llegal length - also no composite words
		if (c && c != '@' && c != '.') // illegal word character
		{
			if (IsDigit(word[0]) || len == 1){;} // probable numeric?
			// accidental junk on end of word we do know immedately?
			else if (i > 1 && !IsAlphaUTF8OrDigit(wordStarts[i][len-1]) )
			{
				WORDP entry,canonical;
				char word[MAX_WORD_SIZE];
				strcpy(word,wordStarts[i]);
				word[len-1] = 0;
				uint64 sysflags = 0;
				uint64 cansysflags = 0;
				WORDP revise;
				GetPosData(i,word,revise,entry,canonical,sysflags,cansysflags,true,true); // dont create a non-existent word
				if (entry && entry->properties & PART_OF_SPEECH)
				{
					wordStarts[i] = entry->word;
					fixedSpell = true;
					continue;	// not a legal word character, leave it alone
				}
			}
		}

		// see if we know the other case
		if (!(tokenControl & (ONLY_LOWERCASE|STRICT_CASING)) || (i == startSentence && !(tokenControl & ONLY_LOWERCASE)))
		{
			WORDP E = FindWord(word,0,SECONDARY_CASE_ALLOWED);
			bool useAlternateCase = false;
			if (E && E->systemFlags & PATTERN_WORD) useAlternateCase = true;
			if (E && E->properties & (PART_OF_SPEECH|FOREIGN_WORD))
			{
				// if the word we find is UPPER case, and this might be a lower case noun plural, don't change case.
				size_t len = strlen(word);
				if (word[len-1] == 's' ) 
				{
					WORDP F = FindWord(word,len-1);
					if (!F || !(F->properties & (PART_OF_SPEECH|FOREIGN_WORD))) useAlternateCase = true;
					else continue;
				}
				else useAlternateCase = true;
			}
			else if (E) // does it have a member concept fact
			{
				if (IsConceptMember(E)) 
				{
					useAlternateCase = true;
					break;
				}
			}
			if (useAlternateCase)
			{
				char* tokens[2];
				tokens[1] = E->word;
				ReplaceWords("Alternatecase",i,1,1,tokens);
				fixedSpell = true;
				continue;	
			}
		}
		
		// merge with next token?
		char join[MAX_WORD_SIZE * 3];
		if (i != wordCount && *wordStarts[i+1] != '"' )
		{
			// direct merge as a single word
			strcpy(join,word);
			strcat(join,wordStarts[i+1]);
			WORDP D = FindWord(join,0,(tokenControl & ONLY_LOWERCASE) ?  PRIMARY_CASE_ALLOWED : STANDARD_LOOKUP);

			strcpy(join,word);
//			if (!D || !(D->properties & PART_OF_SPEECH) ) // merge these two, except "going to" or wordnet composites of normal words  // merge as a compound word
//			{
//				strcat(join,(char*)"_");
//				strcat(join,wordStarts[i+1]);
//				D = FindWord(join,0,(tokenControl & ONLY_LOWERCASE) ?  PRIMARY_CASE_ALLOWED : STANDARD_LOOKUP);
//			}    DONT CREATE _ words, let sequence handle it

			if (D && D->properties & PART_OF_SPEECH && !(D->properties & AUX_VERB)) // merge these two, except "going to" or wordnet composites of normal words
			{
				WORDP P1 = FindWord(word,0,LOWERCASE_LOOKUP);
				WORDP P2 = FindWord(wordStarts[i+1],0,LOWERCASE_LOOKUP);
				if (!P1 || !P2 || !(P1->properties & PART_OF_SPEECH) || !(P2->properties & PART_OF_SPEECH)) 
				{
					char* tokens[2];
					tokens[1] = D->word;
					ReplaceWords("merge",i,2,1,tokens);
					fixedSpell = true;
					continue;
				}
			}
		}   

		// break apart slashed pair like eat/feed
		char* slash = strchr(word,'/');
		if (slash && !slash[1] && len < MAX_WORD_SIZE) // remove trailing slash
		{
			strcpy(newword, word);
			newword[slash - word] = 0;
			word = wordStarts[i] = StoreWord(newword, AS_IS)->word;
		}
		if (slash && slash != word && slash[1]) //   break apart word/word
		{
			if ((wordCount + 2 ) >= REAL_SENTENCE_LIMIT) continue;	// no room
			*slash = 0;
			D = StoreWord(word);
			*slash = '/';
			E = StoreWord(slash+1);
			char* tokens[4];
			tokens[1] = D->word;
			tokens[2] = "/";
			tokens[3] = E->word;
			ReplaceWords("Split",i,1,3,tokens);
			fixedSpell = true;
			--i;
			continue;
		}

		// see if hypenated word should be separate or joined (ignore obvious adjective suffix)
		if (hyphen &&  !stricmp(hyphen,(char*)"-like"))
		{
			StoreWord(word,ADJECTIVE_NORMAL|ADJECTIVE); // accept it as a word
			continue;
		}
		else if (hyphen && (hyphen-word) > 1 && !IsPlaceNumber(word)) // dont break up fifty-second
		{
			char test[MAX_WORD_SIZE];
			char first[MAX_WORD_SIZE];

			// test for split
			*hyphen = 0;
			strcpy(test,hyphen+1);
			strcpy(first,word);
			*hyphen = '-';

			WORDP E = FindWord(test,0,LOWERCASE_LOOKUP);
			WORDP D = FindWord(first,0,LOWERCASE_LOOKUP);
			if (*first == 0) 
			{
				wordStarts[i] = AllocateHeap(wordStarts[i] + 1); // -pieces  want to lose the leading hypen  (2-pieces)
				fixedSpell = true;
			}
			else if (D && E) //   1st word gets replaced, we added another word after
			{
				if ((wordCount + 1 ) >= REAL_SENTENCE_LIMIT) continue;	// no room
				char* tokens[3];
				tokens[1] = D->word;
				tokens[2] = E->word;
				ReplaceWords("Pair",i,1,2,tokens);
				fixedSpell = true;
				--i;
			}
			else if (!stricmp(test,(char*)"old") || !stricmp(test,(char*)"olds")) //   break apart 5-year-old
			{
				if ((wordCount + 1 ) >= REAL_SENTENCE_LIMIT) continue;	// no room
				D = StoreWord(first);
				E = StoreWord(test);
				char* tokens[3];
				tokens[1] = D->word;
				tokens[2] = E->word;
				ReplaceWords("Break old",i,1,2,tokens);
				fixedSpell = true;
				--i;
			}
			else // remove hyphen entirely?
			{
				strcpy(test,first);
				strcat(test,hyphen+1);
				D = FindWord(test,0,(tokenControl & ONLY_LOWERCASE) ?  PRIMARY_CASE_ALLOWED : STANDARD_LOOKUP);
				if (D) 
				{
					wordStarts[i] = D->word;
					fixedSpell = true;
					--i;
				}
			}
			continue; // ignore hypenated errors that we couldnt solve, because no one mistypes a hypen
		}

		// see if number in front of unit split like 10mg
		if (IsDigit(*word))
		{
			char* at = word;
			while (*++at && IsDigit(*at)) {;}
			WORDP E = FindWord(at);
			if (E && strlen(at) > 2 && *at != 'm') // number in front of known word ( but must be longer than 2 char, 5th) but allow mg
			{
				char token1[MAX_WORD_SIZE];
				int len = at - word;
				strncpy(token1,word,len);
				token1[len] = 0;
				D = StoreWord(token1);
				char* tokens[4];
				tokens[1] = D->word;
				tokens[2] = E->word;
				ReplaceWords("Split",i,1,2,tokens);
				fixedSpell = true;
				continue;
			}
		}
		
		// leave uppercase in first position if not adjusted yet... but check for lower case spell error
		if (IsUpperCase(word[0])  && tokenControl & NO_PROPER_SPELLCHECK) 
		{
			char lower[MAX_WORD_SIZE];
			MakeLowerCopy(lower,word);
			WORDP D = FindWord(lower,0,LOWERCASE_LOOKUP);
			if (!D && i == startWord)
			{
				char* okword = SpellFix(lower,i,PART_OF_SPEECH); 
				if (okword)
				{
					char* tokens[2];
					WORDP E = StoreWord(okword);
					tokens[1] = E->word;
					ReplaceWords("Spell",i,1,1,tokens);
					fixedSpell = true;
				}
			}
			continue; 
		}

		// see if smooshed word pair
		size_t len1 = strlen(word);
		int j;
		if (!IsDigit(*word))
		{
			for (j = 1; j <= len1 - 1; ++j)
			{
				WORDP X1 = FindWord(word, j);  // any case
				WORDP X2 = FindWord(word + j, len1 - i); // any case

				if (X1 && X2 && (X1->word[1] || X1->word[0] == 'i' || X1->word[0] == 'I' || X1->word[0] == 'a'))
				{
					char* tokens[3];
					tokens[1] = X1->word;
					tokens[2] = X2->word;
					ReplaceWords("Split", i, 1, 2, tokens);
					fixedSpell = true;
					break;
				}
			}
			if (j != len1) continue;
		}

		if (*word != '\'' && (!FindCanonical(word, i,true) || IsUpperCase(word[0]))) // dont check quoted or findable words unless they are capitalized
		{
			word = SpellCheck(i);

			// dont spell check proper names to improper, if word before or after is lower case originally
			if (word && i != 1 && originalCapState[i] && !IsUpperCase(*word))
			{
				if (!originalCapState[i-1]) continue;
				else if (i != wordCount && !originalCapState[i+1]) continue;
			}

			if (word && !*word) // performed substitution on prior word, restart this one
			{
				fixedSpell = true;
				--i;
				continue;
			}
			if (word) 
			{
				char* tokens[2];
				tokens[1] = word;
				ReplaceWords("Spell",i,1,1,tokens);
				fixedSpell = true;
				continue;
			}
		}
    }
	return fixedSpell;
}

Exemple #8

0

Afficher le fichier

Fichier : spellcheck.cpp Projet : thirupathibandam/ChatScript

char* SpellFix(char* originalWord,int start,uint64 posflags)
{
    multichoice = false;
    char word[MAX_WORD_SIZE];
    MakeLowerCopy(word, originalWord);
	char word1[MAX_WORD_SIZE];
	MakeUpperCopy(word1, originalWord);
	WORDINFO realWordData;
    ComputeWordData(word, &realWordData);
	if (realWordData.bytelen >= 100 || realWordData.bytelen == 0) return NULL;
	if (IsDigit(*originalWord)) return NULL; // number-based words and numbers must be treated elsewhere
    char letterLow = *word;
	char letterHigh = *word1;
	bool hasUnderscore = (strchr(originalWord,'_')) ? true : false;
	bool isUpper = IsUpperCase(originalWord[0]);
	if (IsUpperCase(originalWord[1])) isUpper = false;	// not if all caps
	if (trace == TRACE_SPELLING) Log(STDTRACELOG,(char*)"Spell: %s\r\n",originalWord);

	//   Priority is to a word that looks like what the user typed, because the user probably would have noticed if it didnt and changed it. So add/delete  has priority over tranform
    WORDP choices[4000];
    WORDP bestGuess[4000];
    unsigned int index = 0;
    unsigned int bestGuessindex = 0;
    int min = 35; // allow 2 changes as needed
      
	uint64  pos = PART_OF_SPEECH;  // all pos allowed
    WORDP D;
    if (posflags == PART_OF_SPEECH && start < wordCount) // see if we can restrict word based on next word
    {
        D = FindWord(wordStarts[start+1],0,PRIMARY_CASE_ALLOWED);
		uint64 flags = (D) ? D->properties : (-1ull); //   if we dont know the word, it could be anything
		if ((flags & PART_OF_SPEECH) == PREPOSITION) pos &= -1 ^ (PREPOSITION | NOUN);   //   prep cannot be preceeded by noun or prep
		if (!(flags & (PREPOSITION | VERB | CONJUNCTION | ADVERB)) && flags & DETERMINER) pos &= -1 ^ (DETERMINER | ADJECTIVE | NOUN | ADJECTIVE_NUMBER | NOUN_NUMBER); //   determiner cannot be preceeded by noun determiner adjective
		if (!(flags & (PREPOSITION | VERB | CONJUNCTION | DETERMINER | ADVERB)) && flags & ADJECTIVE) pos &= -1 ^ (NOUN);
		if (!(flags & (PREPOSITION | NOUN | CONJUNCTION | DETERMINER | ADVERB | ADJECTIVE)) && flags & VERB) pos &= -1 ^ (VERB); //   we know all helper verbs we might be
		if (D && *D->word == '\'' && D->word[1] == 's' ) pos &= NOUN;    //   we can only be a noun if possessive - contracted 's should already be removed by now
    }
    if (posflags == PART_OF_SPEECH && start > 1)
    {
        D = FindWord(wordStarts[start-1],0,PRIMARY_CASE_ALLOWED);
        uint64 flags = (D) ? D->properties : (-1); // if we dont know the word, it could be anything
        if (flags & DETERMINER) pos &= -1 ^ (VERB|CONJUNCTION|PREPOSITION|DETERMINER);  
    }
    posflags &= pos; //   if pos types are known and restricted and dont match
	static int range[] = {0,-1,1,-2,2};
	for (unsigned int i = 0; i < 5; ++i)
	{
		if (i >= 3) break;
		MEANING offset = lengthLists[realWordData.charlen + range[i]];
		if (trace == TRACE_SPELLING) Log(STDTRACELOG,(char*)"\r\n  Begin offset %d\r\n",i);
		while (offset)
		{
			D = Meaning2Word(offset);
			offset = D->spellNode;
			if (PART_OF_SPEECH == posflags  && D->systemFlags & PATTERN_WORD){;} // legal generic match
			else if (!(D->properties & posflags)) continue; // wrong kind of word
			char* under = strchr(D->word,'_');
			// SPELLING lists have no underscore or space words in them
			if (hasUnderscore && !under) continue;	 // require keep any underscore
			if (!hasUnderscore && under) continue;	 // require not have any underscore
			if (isUpper && !(D->internalBits & UPPERCASE_HASH) && start != 1) continue;	// dont spell check to lower a word in upper
            WORDINFO dictWordData;
            ComputeWordData(D->word, &dictWordData);
            int val = EditDistance(dictWordData, realWordData, min);
			if (val <= min) // as good or better
			{
				if (val < min)
				{
					if (trace == TRACE_SPELLING) Log(STDTRACELOG,(char*)"    Better: %s against %s value: %d\r\n",D->word,originalWord,val);
					index = 0;
					min = val;
				}
				else if ( val == min && trace == TRACE_SPELLING) Log(STDTRACELOG,(char*)"    Equal: %s against %s value: %d\r\n",D->word,originalWord,val);

				if (!(D->internalBits & BEEN_HERE)) 
				{
					choices[index++] = D;
					if (index > 3998) break; 
					AddInternalFlag(D,BEEN_HERE);
				}
			}
		}
	}
	// try endings ing, s, etc
	if (start && !index && !stricmp(language,"english")) // no stem spell if COMING from a stem spell attempt (start == 0) or we have a good guess already
	{
        uint64 flags = 0;
		char* stem = StemSpell(word,start,flags);
		if (stem) 
		{
            WORDP X = StoreWord(stem,flags); 
			if (X) choices[index++] = X;
		}
	}

    if (!index)  return NULL; 
    if (index > 1) multichoice = true;

	// take our guesses, and pick the most common (earliest learned or most frequently used) word
    uint64 commonmin = 0;
    bestGuess[0] = NULL;
	for (unsigned int j = 0; j < index; ++j) RemoveInternalFlag(choices[j],BEEN_HERE);
    if (index == 1) 
	{
		if (trace == TRACE_SPELLING) Log(STDTRACELOG,(char*)"    Single best spell: %s\r\n",choices[0]->word);
		return choices[0]->word;	// pick the one
	}
    for (unsigned int j = 0; j < index; ++j) 
    {
        uint64 common = choices[j]->systemFlags & COMMONNESS;
        if (common < commonmin) continue;
		if (choices[j]->internalBits & UPPERCASE_HASH && index > 1) continue;	// ignore proper names for spell better when some other choice exists
        if (common > commonmin) // this one is more common
        {
            commonmin = common;
            bestGuessindex = 0;
        }
        bestGuess[bestGuessindex++] = choices[j];
    }
	if (bestGuessindex) 
	{
        if (bestGuessindex > 1) multichoice = true;
		if (trace == TRACE_SPELLING) Log(STDTRACELOG,(char*)"    Pick spell: %s\r\n",bestGuess[0]->word);
		return bestGuess[0]->word; 
	}
	return NULL;
}

Exemple #9

0

Afficher le fichier

Fichier : CPU-simulator.cpp Projet : tomerye/CPU-simulator

void StartSimulator()
{
	std::vector<std::string> current_instruction;
	int r0,r1,r2,res,desination;

	Tomasulo tomasulo(
		configuration.addsub_delay,
		configuration.mul_delay,
		configuration.div_delay,
		configuration.instruction_q_depth,
		configuration.addsub_rs,
		configuration.muldiv_rs,
		configuration.load_q_depth,
		configuration.store_q_depth);

	reg[0]=0;//make reg 0 always 0

	while(1)
	{
		
		current_instruction=commands_vector[pc];
		//simulate load instruction
		executetime += LoadWord(PCtoAddress(pc),&desination);
		executetime++;
		if (!tomasulo.isInstQueueFull()) {
			tomasulo.addToQueue(current_instruction);
		}
		tomasulo.doWork();
		DoWork();
		instructioncount++;
		if(current_instruction[1]=="halt")
			return;
		if(current_instruction[1]=="j")
		{
			pc=lables_map[current_instruction[2]];
			continue;
		}
		if(current_instruction[1]=="add")
		{
			r0=GetRegNumberFromString(current_instruction[2]);
			r1=GetRegNumberFromString(current_instruction[3]);
			r2=GetRegNumberFromString(current_instruction[4]);
			reg[r0]=reg[r1]+reg[r2];
			pc++;
			continue;
		}
		if(current_instruction[1]=="sub")
		{
			r0=GetRegNumberFromString(current_instruction[2]);
			r1=GetRegNumberFromString(current_instruction[3]);
			r2=GetRegNumberFromString(current_instruction[4]);
			reg[r0]=reg[r1]-reg[r2];
			pc++;
			continue;
		}
		if(current_instruction[1]=="mul")
		{
			r0=GetRegNumberFromString(current_instruction[2]);
			r1=GetRegNumberFromString(current_instruction[3]);
			r2=GetRegNumberFromString(current_instruction[4]);
			reg[r0]=reg[r1]*reg[r2];
			pc++;
			continue;
		}
		if(current_instruction[1]=="div")
		{
			r0=GetRegNumberFromString(current_instruction[2]);
			r1=GetRegNumberFromString(current_instruction[3]);
			r2=GetRegNumberFromString(current_instruction[4]);
			reg[r0]=reg[r1]/reg[r2];
			pc++;
			continue;
		}
		if(current_instruction[1]=="slt")
		{
			r0=GetRegNumberFromString(current_instruction[2]);
			r1=GetRegNumberFromString(current_instruction[3]);
			r2=GetRegNumberFromString(current_instruction[4]);
			reg[r0]=reg[r1]<reg[r2]?1:0;
			pc++;
			continue;
		}
		if(current_instruction[1]=="addi")
		{
			r0=GetRegNumberFromString(current_instruction[2]);
			r1=GetRegNumberFromString(current_instruction[3]);
			res=MyAtoi(current_instruction[4]);
			reg[r0]=reg[r1]+res;
			pc++;
			continue;
		}
		if(current_instruction[1]=="subi")
		{
			r0=GetRegNumberFromString(current_instruction[2]);
			r1=GetRegNumberFromString(current_instruction[3]);
			res=MyAtoi(current_instruction[4]);
			reg[r0]=reg[r1]-res;
			pc++;
			continue;
		}
		if(current_instruction[1]=="slti")
		{
			r0=GetRegNumberFromString(current_instruction[2]);
			r1=GetRegNumberFromString(current_instruction[3]);
			res=MyAtoi(current_instruction[4]);
			reg[r0]=reg[r1]<res?1:0;
			pc++;
			continue;
		}
		if(current_instruction[1]=="beq")
		{
			r0=GetRegNumberFromString(current_instruction[2]);
			r1=GetRegNumberFromString(current_instruction[3]);

			if(reg[r0]==reg[r1])
			{
				pc=lables_map[current_instruction[4]];
				continue;
			}
			pc++;
			continue;
		}
		if(current_instruction[1]=="bne")
		{
			r0=GetRegNumberFromString(current_instruction[2]);
			r1=GetRegNumberFromString(current_instruction[3]);

			if(reg[r0]!=reg[r1])
			{
				pc=lables_map[current_instruction[4]];
				continue;
			}
			pc++;
			continue;
		}
		if(current_instruction[1]=="lw")
		{
			r0=GetRegNumberFromString(current_instruction[2]);
			res=GetOffset(current_instruction[3]);
			r1=GetRegNumberFromString(current_instruction[3].substr(current_instruction[3].find_first_of(")")+1,std::string::npos));
			executetime += LoadWord((reg[r1]+res)/4,&reg[r0]);
			pc++;
			continue;
		}
		if(current_instruction[1]=="sw")
		{
			r0=GetRegNumberFromString(current_instruction[2]);
			res=GetOffset(current_instruction[3]);
			r1=GetRegNumberFromString(current_instruction[3].substr(current_instruction[3].find_first_of(")")+1,std::string::npos));
			executetime += StoreWord((reg[r1]+res)/4,&reg[r0]);
			pc++;
			continue;
		}

		printf("unknown instruction\n");
		exit(1);
	}
}