void CountTime(void *argv){ time_t currTime; while(WaitForSingleObject(hRunMutex, 75L) == WAIT_TIMEOUT){ currTime = time(&currTime); //printf("currTime: %X\n", currTime); WaitForSingleObject(hScreenMutex, INFINITE); size_t tmp = currTime & 0XFFFFFFFF; StoreWord(tmp, SYS_TIME_ADDR); tmp = currTime >> 32; StoreWord(tmp, SYS_TIME_ADDR+4); ReleaseMutex(hScreenMutex); ReleaseMutex(hRunMutex); } }
void IOKey(void *argv){ reg_type receiver_control = LoadWord(RECEIVER_CONTROL); SetBit(&receiver_control, IO_INTERRUPT_ENABLE); while(!prog_finished && (WaitForSingleObject(hRunMutex, 75L) == WAIT_TIMEOUT)){ if(kbhit()){ char ch = getch(); /* for(size_t i = 0 ; i < 1000; i++) printf("Inside if kbhit() ch = %c\n", ch);*/ StoreByte(ch, RECEIVER_DATA); //muxtex in StoreByte function if(bitState(cpo_reg[STATUS], INTERRUPT_ENABLE) && bitState(cpo_reg[STATUS], EXCEPTION_LEVEL) && bitState(receiver_control, IO_INTERRUPT_ENABLE)){ SetCause(INTERRUPT); SetBit(&cpo_reg[CAUSE], 0XD);//set pending bit /*need to rectify perhaps*/ SetBit(&receiver_control, IO_READY); StoreWord(receiver_control, RECEIVER_CONTROL); /* for(size_t i = 0 ; i < 1000; i++) printf("Inside inside of kbhit() set state cause: %X\n", cpo_reg[CAUSE]);*/ }//end of if }//end of if kbhit ReleaseMutex(hRunMutex);//unlock }//end of while prog_finished }
static int SplitWord(char* word) { WORDP D2; bool good; int breakAt = 0; if (IsDigit(*word)) { while (IsDigit(word[++breakAt]) || word[breakAt] == '.'){;} // find end of number if (word[breakAt]) // found end of number { D2 = FindWord(word+breakAt,0,PRIMARY_CASE_ALLOWED); if (D2) { good = (D2->properties & (PART_OF_SPEECH|FOREIGN_WORD)) != 0 || (D2->internalBits & HAS_SUBSTITUTE) != 0; if (good && (D2->systemFlags & AGE_LEARNED))// must be common words we find { char number[MAX_WORD_SIZE]; strncpy(number,word,breakAt); number[breakAt] = 0; StoreWord(number,ADJECTIVE|NOUN|ADJECTIVE_NUMBER|NOUN_NUMBER); return breakAt; // split here } } } } // try all combinations of breaking the word into two known words breakAt = 0; size_t len = strlen(word); for (unsigned int k = 1; k < len-1; ++k) { if (!stricmp(language,"english") && k == 1 && *word != 'a' && *word != 'A' && *word != 'i' && *word != 'I') continue; // only a and i are allowed single-letter words else if (!stricmp(language,"french") && k == 1 && *word != 'y' && *word != 'a' && *word != 'A' && !SameUTF(word,"à") && !SameUTF(word, "À") && !SameUTF(word, "ô") && !SameUTF(word,"Ô")) continue; // in french only y, a and ô are allowed single-letter words WORDP D1 = FindWord(word,k,PRIMARY_CASE_ALLOWED); if (!D1) continue; good = (D1->properties & (PART_OF_SPEECH|FOREIGN_WORD)) != 0 || (D1->internalBits & HAS_SUBSTITUTE) != 0; if (!good || !(D1->systemFlags & AGE_LEARNED)) continue; // must be normal common words we find D2 = FindWord(word+k,len-k,PRIMARY_CASE_ALLOWED); if (!D2) continue; good = (D2->properties & (PART_OF_SPEECH|FOREIGN_WORD)) != 0 || (D2->internalBits & HAS_SUBSTITUTE) != 0; if (!good || !(D2->systemFlags & AGE_LEARNED) ) continue; // must be normal common words we find if (!breakAt) breakAt = k; // found a split else // found multiple places to split... dont know what to do { breakAt = -1; break; } } return breakAt; }
static char* SpellCheck(unsigned int i) { // on entry we will have passed over words which are KnownWord (including bases) or isInitialWord (all initials) // wordstarts from 1 ... wordCount is the incoming sentence words (original). We are processing the ith word here. char* word = wordStarts[i]; if (!*word) return NULL; if (!stricmp(word,loginID) || !stricmp(word,computerID)) return word; // dont change his/our name ever size_t len = strlen(word); if (len > 2 && word[len-2] == '\'') return word; // dont do anything with ' words // test for run togetherness like "talkabout fingers" int breakAt = SplitWord(word); if (breakAt > 0)// we found a split, insert 2nd word into word stream { ++wordCount; memmove(wordStarts+i+1,wordStarts+i,sizeof(char*) * (wordCount-i)); // open up a slot for a new word wordStarts[i+1] = reuseAllocation(wordStarts[i+1],wordStarts[i]+breakAt); // set this to the second word (shared from within 1st word) return FindWord(wordStarts[i],breakAt,PRIMARY_CASE_ALLOWED)->word; // 1st word gets replaced, we added valid word after } // now imagine partial runtogetherness, like "talkab out fingers" if (i < wordCount) { char tmp[MAX_WORD_SIZE]; strcpy(tmp,word); strcat(tmp,wordStarts[i+1]); breakAt = SplitWord(tmp); if (breakAt > 0) // replace words with the dual pair { wordStarts[i+1] = reuseAllocation(wordStarts[i+1],StoreWord(tmp+breakAt)->word); // set this to the second word (shared from within 1st word) return FindWord(tmp,breakAt,PRIMARY_CASE_ALLOWED)->word; // 1st word gets replaced, we added valid word after } } // remove any nondigit characters repeated more than once. Dont do this earlier, we want substitutions to have a chance at it first. ammmmmmazing static char word1[MAX_WORD_SIZE]; char* ptr = word-1; char* ptr1 = word1; while (*++ptr) { *ptr1 = *ptr; while (ptr[1] == *ptr1 && ptr[2] == *ptr1 && (*ptr1 < '0' || *ptr1 > '9')) ++ptr; // skip double repeats ++ptr1; } *ptr1 = 0; if (FindCanonical(word1,0,true) && !IsUpperCase(*word1)) return word1; // this is a different form of a canonical word so its ok // now use word spell checker char* d = SpellFix(word,i,PART_OF_SPEECH); return (d) ? d : NULL; }
bool SpellCheckSentence() { WORDP D,E; fixedSpell = false; bool lowercase = false; int language = ENGLISH; char* lang = GetUserVariable((char*)"$cs_language"); if (lang && !stricmp(lang,(char*)"spanish")) language = SPANISH; // check for all uppercase for (int i = FindOOBEnd(1) + 1; i <= wordCount; ++i) // skip start of sentence { char* word = wordStarts[i]; size_t len = strlen(word); for (int j = 0; j < (int)len; ++j) { if (IsLowerCase(word[j])) { lowercase = true; i = j = 1000; } } } if (!lowercase && wordCount > 2) // must have several words in uppercase { for (int i = FindOOBEnd(1); i <= wordCount; ++i) { char* word = wordStarts[i]; MakeLowerCase(word); } } int startWord = FindOOBEnd(1); for (int i = startWord; i <= wordCount; ++i) { char* word = wordStarts[i]; if (!word || !word[1] || *word == '"' ) continue; // illegal or single char or quoted thingy size_t len = strlen(word); // dont spell check uppercase not at start or joined word if (IsUpperCase(word[0]) && (i != startWord || strchr(word,'_')) && tokenControl & NO_PROPER_SPELLCHECK) continue; // dont spell check email or other things with @ or . in them if (strchr(word,'@') || strchr(word,'.') || strchr(word,'$')) continue; // dont spell check names of json objects or arrays if (!strnicmp(word,"ja-",3) || !strnicmp(word,"jo-",3)) continue; char* known = ProbableKnownWord(word); if (known && !strcmp(known,word)) continue; // we know it if (known && strcmp(known,word)) { char* tokens[2]; if (!IsUpperCase(*known)) // revised the word to lower case (avoid to upper case like "fields" to "Fields" { WORDP D = FindWord(known,0,LOWERCASE_LOOKUP); if (D) { tokens[1] = D->word; ReplaceWords(i,1,1,tokens); fixedSpell = true; continue; } } else // is uppercase a concept member? then revise upwards { WORDP D = FindWord(known,0,UPPERCASE_LOOKUP); if (IsConceptMember(D)) { tokens[1] = D->word; ReplaceWords(i,1,1,tokens); fixedSpell = true; continue; } } } char* p = word -1; unsigned char c; char* hyphen = 0; while ((c = *++p) != 0) { ++len; if (c == '-') hyphen = p; // note is hyphenated - use trailing } if (len == 0 || GetTemperatureLetter(word)) continue; // bad ignore utf word or llegal length - also no composite words if (c && c != '@' && c != '.') // illegal word character { if (IsDigit(word[0]) || len == 1){;} // probable numeric? // accidental junk on end of word we do know immedately? else if (i > 1 && !IsAlphaUTF8OrDigit(wordStarts[i][len-1]) ) { WORDP entry,canonical; char word[MAX_WORD_SIZE]; strcpy(word,wordStarts[i]); word[len-1] = 0; uint64 sysflags = 0; uint64 cansysflags = 0; WORDP revise; GetPosData(i,word,revise,entry,canonical,sysflags,cansysflags,true,true); // dont create a non-existent word if (entry && entry->properties & PART_OF_SPEECH) { wordStarts[i] = reuseAllocation(wordStarts[i],entry->word); fixedSpell = true; continue; // not a legal word character, leave it alone } } } // see if we know the other case if (!(tokenControl & (ONLY_LOWERCASE|STRICT_CASING)) || (i == startSentence && !(tokenControl & ONLY_LOWERCASE))) { WORDP E = FindWord(word,0,SECONDARY_CASE_ALLOWED); bool useAlternateCase = false; if (E && E->systemFlags & PATTERN_WORD) useAlternateCase = true; if (E && E->properties & (PART_OF_SPEECH|FOREIGN_WORD)) { // if the word we find is UPPER case, and this might be a lower case noun plural, don't change case. size_t len = strlen(word); if (word[len-1] == 's' ) { WORDP F = FindWord(word,len-1); if (!F || !(F->properties & (PART_OF_SPEECH|FOREIGN_WORD))) useAlternateCase = true; else continue; } else useAlternateCase = true; } else if (E) // does it have a member concept fact { if (IsConceptMember(E)) { useAlternateCase = true; break; } } if (useAlternateCase) { char* tokens[2]; tokens[1] = E->word; ReplaceWords(i,1,1,tokens); fixedSpell = true; continue; } } // merge with next token? char join[MAX_WORD_SIZE * 3]; if (i != wordCount && *wordStarts[i+1] != '"' ) { // direct merge as a single word strcpy(join,word); strcat(join,wordStarts[i+1]); WORDP D = FindWord(join,0,(tokenControl & ONLY_LOWERCASE) ? PRIMARY_CASE_ALLOWED : STANDARD_LOOKUP); strcpy(join,word); if (!D || !(D->properties & PART_OF_SPEECH) ) // merge these two, except "going to" or wordnet composites of normal words // merge as a compound word { strcat(join,(char*)"_"); strcat(join,wordStarts[i+1]); D = FindWord(join,0,(tokenControl & ONLY_LOWERCASE) ? PRIMARY_CASE_ALLOWED : STANDARD_LOOKUP); } if (D && D->properties & PART_OF_SPEECH && !(D->properties & AUX_VERB)) // merge these two, except "going to" or wordnet composites of normal words { WORDP P1 = FindWord(word,0,LOWERCASE_LOOKUP); WORDP P2 = FindWord(wordStarts[i+1],0,LOWERCASE_LOOKUP); if (!P1 || !P2 || !(P1->properties & PART_OF_SPEECH) || !(P2->properties & PART_OF_SPEECH)) { char* tokens[2]; tokens[1] = D->word; ReplaceWords(i,2,1,tokens); fixedSpell = true; continue; } } } // break apart slashed pair like eat/feed char* slash = strchr(word,'/'); if (slash && slash != word && slash[1]) // break apart word/word { if ((wordCount + 2 ) >= REAL_SENTENCE_LIMIT) continue; // no room *slash = 0; D = StoreWord(word); *slash = '/'; E = StoreWord(slash+1); char* tokens[4]; tokens[1] = D->word; tokens[2] = "/"; tokens[3] = E->word; ReplaceWords(i,1,3,tokens); fixedSpell = true; --i; continue; } // see if hypenated word should be separate or joined (ignore obvious adjective suffix) if (hyphen && !stricmp(hyphen,(char*)"-like")) { StoreWord(word,ADJECTIVE_NORMAL|ADJECTIVE); // accept it as a word continue; } else if (hyphen && (hyphen-word) > 1) { char test[MAX_WORD_SIZE]; char first[MAX_WORD_SIZE]; // test for split *hyphen = 0; strcpy(test,hyphen+1); strcpy(first,word); *hyphen = '-'; WORDP E = FindWord(test,0,LOWERCASE_LOOKUP); WORDP D = FindWord(first,0,LOWERCASE_LOOKUP); if (*first == 0) { wordStarts[i] = AllocateString(wordStarts[i] + 1); // -pieces want to lose the leading hypen (2-pieces) fixedSpell = true; } else if (D && E) // 1st word gets replaced, we added another word after { if ((wordCount + 1 ) >= REAL_SENTENCE_LIMIT) continue; // no room char* tokens[3]; tokens[1] = D->word; tokens[2] = E->word; ReplaceWords(i,1,2,tokens); fixedSpell = true; --i; } else if (!stricmp(test,(char*)"old") || !stricmp(test,(char*)"olds")) // break apart 5-year-old { if ((wordCount + 1 ) >= REAL_SENTENCE_LIMIT) continue; // no room D = StoreWord(first); E = StoreWord(test); char* tokens[3]; tokens[1] = D->word; tokens[2] = E->word; ReplaceWords(i,1,2,tokens); fixedSpell = true; --i; } else // remove hyphen entirely? { strcpy(test,first); strcat(test,hyphen+1); D = FindWord(test,0,(tokenControl & ONLY_LOWERCASE) ? PRIMARY_CASE_ALLOWED : STANDARD_LOOKUP); if (D) { wordStarts[i] = D->word; fixedSpell = true; --i; } } continue; // ignore hypenated errors that we couldnt solve, because no one mistypes a hypen } // leave uppercase in first position if not adjusted yet... but check for lower case spell error if (IsUpperCase(word[0]) && tokenControl & NO_PROPER_SPELLCHECK) { char lower[MAX_WORD_SIZE]; MakeLowerCopy(lower,word); WORDP D = FindWord(lower,0,LOWERCASE_LOOKUP); if (!D && i == startWord) { char* okword = SpellFix(lower,i,PART_OF_SPEECH,language); if (okword) { char* tokens[2]; WORDP E = StoreWord(okword); tokens[1] = E->word; ReplaceWords(i,1,1,tokens); fixedSpell = true; } } continue; } if (*word != '\'' && (!FindCanonical(word, i,true) || IsUpperCase(word[0]))) // dont check quoted or findable words unless they are capitalized { word = SpellCheck(i,language); // dont spell check proper names to improper, if word before or after is lower case originally if (word && i != 1 && originalCapState[i] && !IsUpperCase(*word)) { if (!originalCapState[i-1]) return false; else if (i != wordCount && !originalCapState[i+1]) return false; } if (word && !*word) // performed substitution on prior word, restart this one { fixedSpell = true; --i; continue; } if (word) { char* tokens[2]; tokens[1] = word; ReplaceWords(i,1,1,tokens); fixedSpell = true; continue; } } } return fixedSpell; }
char* ProbableKnownWord(char* word) { if (strchr(word,' ') || strchr(word,'_')) return word; // not user input, is synthesized size_t len = strlen(word); // do we know the word as is? WORDP D = FindWord(word,0,PRIMARY_CASE_ALLOWED); if (D) { if (D->properties & FOREIGN_WORD || *D->word == '~' || D->systemFlags & PATTERN_WORD) return D->word; // we know this word clearly or its a concept set ref emotion if (D->properties & PART_OF_SPEECH && !IS_NEW_WORD(D)) return D->word; // old word we know if (IsConceptMember(D)) return D->word; // are there facts using this word? -- issue with facts because on seeing input second time, having made facts of original, we see original // if (GetSubjectNondeadHead(D) || GetObjectNondeadHead(D) || GetVerbNondeadHead(D)) return D->word; } char lower[MAX_WORD_SIZE]; MakeLowerCopy(lower,word); // do we know the word in lower case? D = FindWord(word,0,LOWERCASE_LOOKUP); if (D) // direct recognition { if (D->properties & FOREIGN_WORD || *D->word == '~' || D->systemFlags & PATTERN_WORD) return D->word; // we know this word clearly or its a concept set ref emotion if (D->properties & PART_OF_SPEECH && !IS_NEW_WORD(D)) return D->word; // old word we know if (IsConceptMember(D)) return D->word; // are there facts using this word? // if (GetSubjectNondeadHead(D) || GetObjectNondeadHead(D) || GetVerbNondeadHead(D)) return D->word; } // do we know the word in upper case? char upper[MAX_WORD_SIZE]; MakeLowerCopy(upper,word); upper[0] = GetUppercaseData(upper[0]); D = FindWord(upper,0,UPPERCASE_LOOKUP); if (D) // direct recognition { if (D->properties & FOREIGN_WORD || *D->word == '~' || D->systemFlags & PATTERN_WORD) return D->word; // we know this word clearly or its a concept set ref emotion if (D->properties & PART_OF_SPEECH && !IS_NEW_WORD(D)) return D->word; // old word we know if (IsConceptMember(D)) return D->word; // are there facts using this word? // if (GetSubjectNondeadHead(D) || GetObjectNondeadHead(D) || GetVerbNondeadHead(D)) return D->word; } // interpolate to lower case words uint64 expectedBase = 0; if (ProbableAdjective(word,len,expectedBase) && expectedBase) return word; expectedBase = 0; if (ProbableAdverb(word,len,expectedBase) && expectedBase) return word; // is it a verb form char* verb = GetInfinitive(lower,true); // no new verbs if (verb) { WORDP D = StoreWord(lower,0); // verb form recognized return D->word; } // is it simple plural of a noun? if (word[len-1] == 's') { WORDP E = FindWord(lower,len-1,LOWERCASE_LOOKUP); if (E && E->properties & NOUN) { E = StoreWord(word,NOUN|NOUN_PLURAL); return E->word; } E = FindWord(lower,len-1,UPPERCASE_LOOKUP); if (E && E->properties & NOUN) { *word = toUppercaseData[*word]; E = StoreWord(word,NOUN|NOUN_PROPER_PLURAL); return E->word; } } return NULL; }
bool SpellCheckSentence() { WORDP D,E; fixedSpell = false; bool lowercase = false; // check for all uppercase (capslock) for (int i = FindOOBEnd(1); i <= wordCount; ++i) // skip start of sentence { char* word = wordStarts[i]; if (!word[1]) continue; // autoconversion of letters to lower case should be ignored (eg A) if (!stricmp(word, "the")) continue; size_t len = strlen(word); for (int j = 0; j < (int)len; ++j) { if (IsLowerCase(word[j])) { lowercase = true; i = j = len+1000; // len might be BIG (oob data) so make sure beyond it) } } } if (!lowercase && wordCount > 2) // must have multiple words all in uppercase { for (int i = FindOOBEnd(1); i <= wordCount; ++i) { char* word = wordStarts[i]; char myword[MAX_WORD_SIZE]; MakeLowerCopy(myword,word); if (strcmp(word, myword)) { char* tokens[2]; tokens[1] = myword; ReplaceWords("caplocWord", i, 1, 1, tokens); originalCapState[i] = false; } } } int startWord = FindOOBEnd(1); for (int i = startWord; i <= wordCount; ++i) { char* word = wordStarts[i]; char* tokens[2]; // change any \ to / char newword[MAX_WORD_SIZE]; bool altered = false; if (strlen(word) < MAX_WORD_SIZE) { strcpy(newword, word); char* at = newword; while ((at = strchr(at,'\\'))) { *at = '/'; altered = true; } if (altered) word = wordStarts[i] = StoreWord(newword, AS_IS)->word; } if (*word == '\'' && !word[1] && i != startWord && IsDigit(*wordStarts[i - 1]) && !stricmp(language, "english")) // fails if not digit bug { tokens[1] = (char*)"foot"; ReplaceWords("' as feet", i, 1, 1, tokens); fixedSpell = true; continue; } if (*word == '"' && !word[1] && i != startWord && IsDigit(*wordStarts[i - 1]) && !stricmp(language, "english")) // fails if not digit bug { tokens[1] = (char*)"inch"; ReplaceWords("' as feet", i, 1, 1, tokens); fixedSpell = true; continue; } if (!word || !word[1] || *word == '"' ) continue; // illegal or single char or quoted thingy size_t len = strlen(word); // dont spell check uppercase not at start or joined word if (IsUpperCase(word[0]) && (i != startWord || strchr(word,'_')) && tokenControl & NO_PROPER_SPELLCHECK) continue; // dont spell check email or other things with @ or . in them if (strchr(word,'@') || strchr(word, '&') || strchr(word,'.') || strchr(word,'$')) continue; // dont spell check names of json objects or arrays if (!strnicmp(word,"ja-",3) || !strnicmp(word,"jo-",3)) continue; // dont spell check web addresses if (!strnicmp(word,"http",4) || !strnicmp(word,"www",3)) continue; // nor fractions if (IsFraction(word)) continue; // fraction? // joined number words like 100dollars char* at = word - 1; while (IsDigit(*++at) || *at == numberPeriod); if (IsDigit(*word) && strlen(at) > 3 && ProbableKnownWord(at)) { char first[MAX_WORD_SIZE]; strncpy(first, word, (at - word)); first[at - word] = 0; char* tokens[3]; tokens[1] = first; tokens[2] = at; ReplaceWords("joined number word", i, 1, 2, tokens); continue; } // nor model numbers if (IsModelNumber(word)) { WORDP X = FindWord(word, 0, UPPERCASE_LOOKUP); if (IsConceptMember(X) && !strcmp(word,X->word)) { char* tokens[2]; tokens[1] = X->word; ReplaceWords("KnownUpperModelNumber", i, 1, 1, tokens); fixedSpell = true; } continue; } char* number; if (GetCurrency((unsigned char*)word, number)) continue; // currency if (!stricmp(word, (char*)"am") && i != startWord && (IsDigit(*wordStarts[i-1]) || IsNumber(wordStarts[i-1]) ==REAL_NUMBER) && !stricmp(language,"english")) // fails if not digit bug { char* tokens[2]; tokens[1] = (char*)"a.m."; ReplaceWords("am as time", i, 1, 1, tokens); fixedSpell = true; continue; } char* known = ProbableKnownWord(word); if (known && !strcmp(known,word)) continue; // we know it if (known && strcmp(known,word)) { WORDP D = FindWord(known); char* tokens[2]; if ((!D || !(D->internalBits & UPPERCASE_HASH)) && !IsUpperCase(*known)) // revised the word to lower case (avoid to upper case like "fields" to "Fields" { WORDP X = FindWord(known,0,LOWERCASE_LOOKUP); if (X) { tokens[1] = X->word; ReplaceWords("KnownWord",i,1,1,tokens); fixedSpell = true; continue; } } else // is uppercase a concept member? then revise upwards { WORDP X = FindWord(known,0,UPPERCASE_LOOKUP); if (IsConceptMember(X) || stricmp(language,"english")) // all german nouns are uppercase { tokens[1] = X->word; ReplaceWords("KnownUpper",i,1,1,tokens); fixedSpell = true; continue; } } } char* p = word -1; unsigned char c; char* hyphen = 0; while ((c = *++p) != 0) { ++len; if (c == '-') hyphen = p; // note is hyphenated - use trailing } if (len == 0 || GetTemperatureLetter(word)) continue; // bad ignore utf word or llegal length - also no composite words if (c && c != '@' && c != '.') // illegal word character { if (IsDigit(word[0]) || len == 1){;} // probable numeric? // accidental junk on end of word we do know immedately? else if (i > 1 && !IsAlphaUTF8OrDigit(wordStarts[i][len-1]) ) { WORDP entry,canonical; char word[MAX_WORD_SIZE]; strcpy(word,wordStarts[i]); word[len-1] = 0; uint64 sysflags = 0; uint64 cansysflags = 0; WORDP revise; GetPosData(i,word,revise,entry,canonical,sysflags,cansysflags,true,true); // dont create a non-existent word if (entry && entry->properties & PART_OF_SPEECH) { wordStarts[i] = entry->word; fixedSpell = true; continue; // not a legal word character, leave it alone } } } // see if we know the other case if (!(tokenControl & (ONLY_LOWERCASE|STRICT_CASING)) || (i == startSentence && !(tokenControl & ONLY_LOWERCASE))) { WORDP E = FindWord(word,0,SECONDARY_CASE_ALLOWED); bool useAlternateCase = false; if (E && E->systemFlags & PATTERN_WORD) useAlternateCase = true; if (E && E->properties & (PART_OF_SPEECH|FOREIGN_WORD)) { // if the word we find is UPPER case, and this might be a lower case noun plural, don't change case. size_t len = strlen(word); if (word[len-1] == 's' ) { WORDP F = FindWord(word,len-1); if (!F || !(F->properties & (PART_OF_SPEECH|FOREIGN_WORD))) useAlternateCase = true; else continue; } else useAlternateCase = true; } else if (E) // does it have a member concept fact { if (IsConceptMember(E)) { useAlternateCase = true; break; } } if (useAlternateCase) { char* tokens[2]; tokens[1] = E->word; ReplaceWords("Alternatecase",i,1,1,tokens); fixedSpell = true; continue; } } // merge with next token? char join[MAX_WORD_SIZE * 3]; if (i != wordCount && *wordStarts[i+1] != '"' ) { // direct merge as a single word strcpy(join,word); strcat(join,wordStarts[i+1]); WORDP D = FindWord(join,0,(tokenControl & ONLY_LOWERCASE) ? PRIMARY_CASE_ALLOWED : STANDARD_LOOKUP); strcpy(join,word); // if (!D || !(D->properties & PART_OF_SPEECH) ) // merge these two, except "going to" or wordnet composites of normal words // merge as a compound word // { // strcat(join,(char*)"_"); // strcat(join,wordStarts[i+1]); // D = FindWord(join,0,(tokenControl & ONLY_LOWERCASE) ? PRIMARY_CASE_ALLOWED : STANDARD_LOOKUP); // } DONT CREATE _ words, let sequence handle it if (D && D->properties & PART_OF_SPEECH && !(D->properties & AUX_VERB)) // merge these two, except "going to" or wordnet composites of normal words { WORDP P1 = FindWord(word,0,LOWERCASE_LOOKUP); WORDP P2 = FindWord(wordStarts[i+1],0,LOWERCASE_LOOKUP); if (!P1 || !P2 || !(P1->properties & PART_OF_SPEECH) || !(P2->properties & PART_OF_SPEECH)) { char* tokens[2]; tokens[1] = D->word; ReplaceWords("merge",i,2,1,tokens); fixedSpell = true; continue; } } } // break apart slashed pair like eat/feed char* slash = strchr(word,'/'); if (slash && !slash[1] && len < MAX_WORD_SIZE) // remove trailing slash { strcpy(newword, word); newword[slash - word] = 0; word = wordStarts[i] = StoreWord(newword, AS_IS)->word; } if (slash && slash != word && slash[1]) // break apart word/word { if ((wordCount + 2 ) >= REAL_SENTENCE_LIMIT) continue; // no room *slash = 0; D = StoreWord(word); *slash = '/'; E = StoreWord(slash+1); char* tokens[4]; tokens[1] = D->word; tokens[2] = "/"; tokens[3] = E->word; ReplaceWords("Split",i,1,3,tokens); fixedSpell = true; --i; continue; } // see if hypenated word should be separate or joined (ignore obvious adjective suffix) if (hyphen && !stricmp(hyphen,(char*)"-like")) { StoreWord(word,ADJECTIVE_NORMAL|ADJECTIVE); // accept it as a word continue; } else if (hyphen && (hyphen-word) > 1 && !IsPlaceNumber(word)) // dont break up fifty-second { char test[MAX_WORD_SIZE]; char first[MAX_WORD_SIZE]; // test for split *hyphen = 0; strcpy(test,hyphen+1); strcpy(first,word); *hyphen = '-'; WORDP E = FindWord(test,0,LOWERCASE_LOOKUP); WORDP D = FindWord(first,0,LOWERCASE_LOOKUP); if (*first == 0) { wordStarts[i] = AllocateHeap(wordStarts[i] + 1); // -pieces want to lose the leading hypen (2-pieces) fixedSpell = true; } else if (D && E) // 1st word gets replaced, we added another word after { if ((wordCount + 1 ) >= REAL_SENTENCE_LIMIT) continue; // no room char* tokens[3]; tokens[1] = D->word; tokens[2] = E->word; ReplaceWords("Pair",i,1,2,tokens); fixedSpell = true; --i; } else if (!stricmp(test,(char*)"old") || !stricmp(test,(char*)"olds")) // break apart 5-year-old { if ((wordCount + 1 ) >= REAL_SENTENCE_LIMIT) continue; // no room D = StoreWord(first); E = StoreWord(test); char* tokens[3]; tokens[1] = D->word; tokens[2] = E->word; ReplaceWords("Break old",i,1,2,tokens); fixedSpell = true; --i; } else // remove hyphen entirely? { strcpy(test,first); strcat(test,hyphen+1); D = FindWord(test,0,(tokenControl & ONLY_LOWERCASE) ? PRIMARY_CASE_ALLOWED : STANDARD_LOOKUP); if (D) { wordStarts[i] = D->word; fixedSpell = true; --i; } } continue; // ignore hypenated errors that we couldnt solve, because no one mistypes a hypen } // see if number in front of unit split like 10mg if (IsDigit(*word)) { char* at = word; while (*++at && IsDigit(*at)) {;} WORDP E = FindWord(at); if (E && strlen(at) > 2 && *at != 'm') // number in front of known word ( but must be longer than 2 char, 5th) but allow mg { char token1[MAX_WORD_SIZE]; int len = at - word; strncpy(token1,word,len); token1[len] = 0; D = StoreWord(token1); char* tokens[4]; tokens[1] = D->word; tokens[2] = E->word; ReplaceWords("Split",i,1,2,tokens); fixedSpell = true; continue; } } // leave uppercase in first position if not adjusted yet... but check for lower case spell error if (IsUpperCase(word[0]) && tokenControl & NO_PROPER_SPELLCHECK) { char lower[MAX_WORD_SIZE]; MakeLowerCopy(lower,word); WORDP D = FindWord(lower,0,LOWERCASE_LOOKUP); if (!D && i == startWord) { char* okword = SpellFix(lower,i,PART_OF_SPEECH); if (okword) { char* tokens[2]; WORDP E = StoreWord(okword); tokens[1] = E->word; ReplaceWords("Spell",i,1,1,tokens); fixedSpell = true; } } continue; } // see if smooshed word pair size_t len1 = strlen(word); int j; if (!IsDigit(*word)) { for (j = 1; j <= len1 - 1; ++j) { WORDP X1 = FindWord(word, j); // any case WORDP X2 = FindWord(word + j, len1 - i); // any case if (X1 && X2 && (X1->word[1] || X1->word[0] == 'i' || X1->word[0] == 'I' || X1->word[0] == 'a')) { char* tokens[3]; tokens[1] = X1->word; tokens[2] = X2->word; ReplaceWords("Split", i, 1, 2, tokens); fixedSpell = true; break; } } if (j != len1) continue; } if (*word != '\'' && (!FindCanonical(word, i,true) || IsUpperCase(word[0]))) // dont check quoted or findable words unless they are capitalized { word = SpellCheck(i); // dont spell check proper names to improper, if word before or after is lower case originally if (word && i != 1 && originalCapState[i] && !IsUpperCase(*word)) { if (!originalCapState[i-1]) continue; else if (i != wordCount && !originalCapState[i+1]) continue; } if (word && !*word) // performed substitution on prior word, restart this one { fixedSpell = true; --i; continue; } if (word) { char* tokens[2]; tokens[1] = word; ReplaceWords("Spell",i,1,1,tokens); fixedSpell = true; continue; } } } return fixedSpell; }
char* SpellFix(char* originalWord,int start,uint64 posflags) { multichoice = false; char word[MAX_WORD_SIZE]; MakeLowerCopy(word, originalWord); char word1[MAX_WORD_SIZE]; MakeUpperCopy(word1, originalWord); WORDINFO realWordData; ComputeWordData(word, &realWordData); if (realWordData.bytelen >= 100 || realWordData.bytelen == 0) return NULL; if (IsDigit(*originalWord)) return NULL; // number-based words and numbers must be treated elsewhere char letterLow = *word; char letterHigh = *word1; bool hasUnderscore = (strchr(originalWord,'_')) ? true : false; bool isUpper = IsUpperCase(originalWord[0]); if (IsUpperCase(originalWord[1])) isUpper = false; // not if all caps if (trace == TRACE_SPELLING) Log(STDTRACELOG,(char*)"Spell: %s\r\n",originalWord); // Priority is to a word that looks like what the user typed, because the user probably would have noticed if it didnt and changed it. So add/delete has priority over tranform WORDP choices[4000]; WORDP bestGuess[4000]; unsigned int index = 0; unsigned int bestGuessindex = 0; int min = 35; // allow 2 changes as needed uint64 pos = PART_OF_SPEECH; // all pos allowed WORDP D; if (posflags == PART_OF_SPEECH && start < wordCount) // see if we can restrict word based on next word { D = FindWord(wordStarts[start+1],0,PRIMARY_CASE_ALLOWED); uint64 flags = (D) ? D->properties : (-1ull); // if we dont know the word, it could be anything if ((flags & PART_OF_SPEECH) == PREPOSITION) pos &= -1 ^ (PREPOSITION | NOUN); // prep cannot be preceeded by noun or prep if (!(flags & (PREPOSITION | VERB | CONJUNCTION | ADVERB)) && flags & DETERMINER) pos &= -1 ^ (DETERMINER | ADJECTIVE | NOUN | ADJECTIVE_NUMBER | NOUN_NUMBER); // determiner cannot be preceeded by noun determiner adjective if (!(flags & (PREPOSITION | VERB | CONJUNCTION | DETERMINER | ADVERB)) && flags & ADJECTIVE) pos &= -1 ^ (NOUN); if (!(flags & (PREPOSITION | NOUN | CONJUNCTION | DETERMINER | ADVERB | ADJECTIVE)) && flags & VERB) pos &= -1 ^ (VERB); // we know all helper verbs we might be if (D && *D->word == '\'' && D->word[1] == 's' ) pos &= NOUN; // we can only be a noun if possessive - contracted 's should already be removed by now } if (posflags == PART_OF_SPEECH && start > 1) { D = FindWord(wordStarts[start-1],0,PRIMARY_CASE_ALLOWED); uint64 flags = (D) ? D->properties : (-1); // if we dont know the word, it could be anything if (flags & DETERMINER) pos &= -1 ^ (VERB|CONJUNCTION|PREPOSITION|DETERMINER); } posflags &= pos; // if pos types are known and restricted and dont match static int range[] = {0,-1,1,-2,2}; for (unsigned int i = 0; i < 5; ++i) { if (i >= 3) break; MEANING offset = lengthLists[realWordData.charlen + range[i]]; if (trace == TRACE_SPELLING) Log(STDTRACELOG,(char*)"\r\n Begin offset %d\r\n",i); while (offset) { D = Meaning2Word(offset); offset = D->spellNode; if (PART_OF_SPEECH == posflags && D->systemFlags & PATTERN_WORD){;} // legal generic match else if (!(D->properties & posflags)) continue; // wrong kind of word char* under = strchr(D->word,'_'); // SPELLING lists have no underscore or space words in them if (hasUnderscore && !under) continue; // require keep any underscore if (!hasUnderscore && under) continue; // require not have any underscore if (isUpper && !(D->internalBits & UPPERCASE_HASH) && start != 1) continue; // dont spell check to lower a word in upper WORDINFO dictWordData; ComputeWordData(D->word, &dictWordData); int val = EditDistance(dictWordData, realWordData, min); if (val <= min) // as good or better { if (val < min) { if (trace == TRACE_SPELLING) Log(STDTRACELOG,(char*)" Better: %s against %s value: %d\r\n",D->word,originalWord,val); index = 0; min = val; } else if ( val == min && trace == TRACE_SPELLING) Log(STDTRACELOG,(char*)" Equal: %s against %s value: %d\r\n",D->word,originalWord,val); if (!(D->internalBits & BEEN_HERE)) { choices[index++] = D; if (index > 3998) break; AddInternalFlag(D,BEEN_HERE); } } } } // try endings ing, s, etc if (start && !index && !stricmp(language,"english")) // no stem spell if COMING from a stem spell attempt (start == 0) or we have a good guess already { uint64 flags = 0; char* stem = StemSpell(word,start,flags); if (stem) { WORDP X = StoreWord(stem,flags); if (X) choices[index++] = X; } } if (!index) return NULL; if (index > 1) multichoice = true; // take our guesses, and pick the most common (earliest learned or most frequently used) word uint64 commonmin = 0; bestGuess[0] = NULL; for (unsigned int j = 0; j < index; ++j) RemoveInternalFlag(choices[j],BEEN_HERE); if (index == 1) { if (trace == TRACE_SPELLING) Log(STDTRACELOG,(char*)" Single best spell: %s\r\n",choices[0]->word); return choices[0]->word; // pick the one } for (unsigned int j = 0; j < index; ++j) { uint64 common = choices[j]->systemFlags & COMMONNESS; if (common < commonmin) continue; if (choices[j]->internalBits & UPPERCASE_HASH && index > 1) continue; // ignore proper names for spell better when some other choice exists if (common > commonmin) // this one is more common { commonmin = common; bestGuessindex = 0; } bestGuess[bestGuessindex++] = choices[j]; } if (bestGuessindex) { if (bestGuessindex > 1) multichoice = true; if (trace == TRACE_SPELLING) Log(STDTRACELOG,(char*)" Pick spell: %s\r\n",bestGuess[0]->word); return bestGuess[0]->word; } return NULL; }
void StartSimulator() { std::vector<std::string> current_instruction; int r0,r1,r2,res,desination; Tomasulo tomasulo( configuration.addsub_delay, configuration.mul_delay, configuration.div_delay, configuration.instruction_q_depth, configuration.addsub_rs, configuration.muldiv_rs, configuration.load_q_depth, configuration.store_q_depth); reg[0]=0;//make reg 0 always 0 while(1) { current_instruction=commands_vector[pc]; //simulate load instruction executetime += LoadWord(PCtoAddress(pc),&desination); executetime++; if (!tomasulo.isInstQueueFull()) { tomasulo.addToQueue(current_instruction); } tomasulo.doWork(); DoWork(); instructioncount++; if(current_instruction[1]=="halt") return; if(current_instruction[1]=="j") { pc=lables_map[current_instruction[2]]; continue; } if(current_instruction[1]=="add") { r0=GetRegNumberFromString(current_instruction[2]); r1=GetRegNumberFromString(current_instruction[3]); r2=GetRegNumberFromString(current_instruction[4]); reg[r0]=reg[r1]+reg[r2]; pc++; continue; } if(current_instruction[1]=="sub") { r0=GetRegNumberFromString(current_instruction[2]); r1=GetRegNumberFromString(current_instruction[3]); r2=GetRegNumberFromString(current_instruction[4]); reg[r0]=reg[r1]-reg[r2]; pc++; continue; } if(current_instruction[1]=="mul") { r0=GetRegNumberFromString(current_instruction[2]); r1=GetRegNumberFromString(current_instruction[3]); r2=GetRegNumberFromString(current_instruction[4]); reg[r0]=reg[r1]*reg[r2]; pc++; continue; } if(current_instruction[1]=="div") { r0=GetRegNumberFromString(current_instruction[2]); r1=GetRegNumberFromString(current_instruction[3]); r2=GetRegNumberFromString(current_instruction[4]); reg[r0]=reg[r1]/reg[r2]; pc++; continue; } if(current_instruction[1]=="slt") { r0=GetRegNumberFromString(current_instruction[2]); r1=GetRegNumberFromString(current_instruction[3]); r2=GetRegNumberFromString(current_instruction[4]); reg[r0]=reg[r1]<reg[r2]?1:0; pc++; continue; } if(current_instruction[1]=="addi") { r0=GetRegNumberFromString(current_instruction[2]); r1=GetRegNumberFromString(current_instruction[3]); res=MyAtoi(current_instruction[4]); reg[r0]=reg[r1]+res; pc++; continue; } if(current_instruction[1]=="subi") { r0=GetRegNumberFromString(current_instruction[2]); r1=GetRegNumberFromString(current_instruction[3]); res=MyAtoi(current_instruction[4]); reg[r0]=reg[r1]-res; pc++; continue; } if(current_instruction[1]=="slti") { r0=GetRegNumberFromString(current_instruction[2]); r1=GetRegNumberFromString(current_instruction[3]); res=MyAtoi(current_instruction[4]); reg[r0]=reg[r1]<res?1:0; pc++; continue; } if(current_instruction[1]=="beq") { r0=GetRegNumberFromString(current_instruction[2]); r1=GetRegNumberFromString(current_instruction[3]); if(reg[r0]==reg[r1]) { pc=lables_map[current_instruction[4]]; continue; } pc++; continue; } if(current_instruction[1]=="bne") { r0=GetRegNumberFromString(current_instruction[2]); r1=GetRegNumberFromString(current_instruction[3]); if(reg[r0]!=reg[r1]) { pc=lables_map[current_instruction[4]]; continue; } pc++; continue; } if(current_instruction[1]=="lw") { r0=GetRegNumberFromString(current_instruction[2]); res=GetOffset(current_instruction[3]); r1=GetRegNumberFromString(current_instruction[3].substr(current_instruction[3].find_first_of(")")+1,std::string::npos)); executetime += LoadWord((reg[r1]+res)/4,®[r0]); pc++; continue; } if(current_instruction[1]=="sw") { r0=GetRegNumberFromString(current_instruction[2]); res=GetOffset(current_instruction[3]); r1=GetRegNumberFromString(current_instruction[3].substr(current_instruction[3].find_first_of(")")+1,std::string::npos)); executetime += StoreWord((reg[r1]+res)/4,®[r0]); pc++; continue; } printf("unknown instruction\n"); exit(1); } }