void CSuggestor::MultiReplace(const CFSWString &szWord, INTPTR ipStartPos) { if (ipStartPos>0) CheckAndAdd(szWord); INTPTR ipLength=szWord.GetLength(); for (; ipStartPos<ipLength; ipStartPos++){ for (INTPTR ip=0; ip<(INTPTR)(sizeof(ChangeStringsMultiple)/sizeof(__CChangeStrings)); ip++){ if (szWord.ContainsAt(ipStartPos, ChangeStringsMultiple[ip].m_lpszFrom)){ MultiReplace(szWord.Left(ipStartPos)+ChangeStringsMultiple[ip].m_lpszTo+szWord.Mid(ipStartPos+FSStrLen(ChangeStringsMultiple[ip].m_lpszFrom)), ipStartPos+FSStrLen(ChangeStringsMultiple[ip].m_lpszTo)); } } } }
CFSWString syllabify2(CFSWString s) { CFSWString res; for (INTPTR i = 0; i < s.GetLength(); i++) { CFSWString c = s.GetAt(i); if (is_consonant(c) && is_vowel(s.GetAt(i - 1)) && is_vowel(s.GetAt(i + 1))) res += d; if (is_vowel(c) && is_vowel(s.GetAt(i - 1)) && is_vowel(s.GetAt(i + 1)) && c.ToLower() == s.GetAt(i + 1)) res += d; if (is_consonant(c) && is_consonant(s.GetAt(i - 1)) && is_vowel(s.GetAt(i + 1)) && has_vowel(res)) //küsitav res += d; res += c; } return res; }
int CSuggestor::CheckAndAdd(const CFSWString &szWord) { if (m_fTimeOut>0 && (CFSTime::Now()-m_TimeStart).GetSeconds()>=m_fTimeOut) return -1; if (szWord.IsEmpty()) return -1; CFSWString szTemp; long lLevel=100; if (SpellWord(szWord, szTemp, &lLevel)==SPL_NOERROR && !szTemp.IsEmpty()){ szTemp=m_Cap.GetCap(szTemp); CFSWString szTemp2; long lLevel2; if (SpellWord(szTemp, szTemp2, &lLevel2)==SPL_NOERROR){ SetLevel(GetLevelGroup(lLevel)); m_Items.AddItem(CSuggestorItem(szTemp, lLevel)); return 0; } } return -1; }
bool is_word(CFSWString s) { if ((has_vowel(s) == true) && (s.GetLength() == make_char_string(s).GetLength()) && (s.GetLength() > 1)) return true; return false; }
void PTWSplitBuffer(const CFSWString &szBuffer, CPTWordArray &Words) { Words.Cleanup(); INTPTR ipStartPos=0; INTPTR ipPos; for (ipPos=0; ipPos<szBuffer.GetLength(); ipPos++) { if (FSIsSpace(szBuffer[ipPos])) { if (ipPos>ipStartPos) { Words.AddItem(CPTWord(szBuffer.Mid(ipStartPos, ipPos-ipStartPos), ipStartPos)); } ipStartPos=ipPos+1; } } if (ipPos>ipStartPos) { Words.AddItem(CPTWord(szBuffer.Mid(ipStartPos, ipPos-ipStartPos), ipStartPos)); } }
CFSWString palat_vru (CFSWString s) { CFSWString res; bool m = false; if (s == L"är'") res = L"ärq"; else if (s == L"ar'") res = L"arq"; else if (s == L"jäl'") res = L"jälq"; else if (s == L"jal'") res = L"jalq"; else if (s == L"kül'") res = L"külq"; else if (s == L"pan'") res = L"panq"; else if (s == L"tul'") res = L"tulq"; else if (s == L"ol'") res = L"olq"; else for (INTPTR i = s.GetLength()-1; i >= 0; i--) { CFSWString c = s.GetAt(i); if (c == L"'") { m = true; } else if (must(c)) { res = c + res; m = true; } else if (m) { if (can_palat_vr(c)) { c = c.ToUpper(); res = c + res; } else { res = c + res; m = false; } } else { res = c + res; } } return res; }
CFSWString shift_pattern(CFSWString s) { if (s == L'j') return L'j'; else if (s == L'h') return L'h'; else if (s == L'v') return L'v'; else if (s.FindOneOf(L"sS") > -1) return L's'; else if (s.FindOneOf(L"lmnrLN") > -1) return L'L'; else if (s.FindOneOf(L"kptfšT") > -1) return L'Q'; else if (is_vowel(s)) return L'V'; else if (is_consonant(s)) return L'C'; return s; }
CFSWString DealWithText(CFSWString text) { /* Proovin kogu sõnniku minema loopida */ CFSWString res; text.Trim(); text.Replace(L"\n\n", L"\n", 1); for (INTPTR i = 0; i < text.GetLength(); i++) { CFSWString c = text.GetAt(i); CFSWString pc = res.GetAt(res.GetLength() - 1); CFSWString nc = text.GetAt(i + 1); if (is_char(c)) res += c; else if (is_digit(c)) res += c; else if (is_hyphen(c) && is_char(pc) && is_char(nc)) res += sp; else if (is_symbol(c)) res += c; else if (is_colon(c) && !is_colon(pc)) res += c; else if (is_bbracket(c) && !is_bbracket(pc)) res += c; else if (is_ebracket(c) && is_ending(nc)) res += L""; else if (is_ebracket(c) && !is_ebracket(pc)) res += c; else if (is_comma(c) && !is_comma(pc)) res += c; else if (is_fchar(c)) res += replace_fchar(c); else if (is_space(c) && !is_whitespace(pc)) res += c; else if (is_break(c) && !is_break(pc)) { res += c; } //kahtlane else if (is_tab(c) && !is_whitespace(pc)) res += c; else if (is_ending(c) && !is_ending(pc) && !is_whitespace(pc)) res += c; } res.Trim(); return res; }
void print_u(utterance_struct u) { for (INTPTR i = 0; i < u.phr_vector.GetSize(); i++) for (INTPTR i1 = 0; i1 < u.phr_vector[i].word_vector.GetSize(); i1++) { fprintf(stderr, "%s\n\n", ccstr(u.phr_vector[i].word_vector[i1].mi.m_szRoot)); for (INTPTR i2 = 0; i2 < u.phr_vector[i].word_vector[i1].syl_vector.GetSize(); i2++) { fprintf(stderr, "\t%s\n", ccstr(u.phr_vector[i].word_vector[i1].syl_vector[i2].syl)); for (INTPTR i3 = 0; i3 < u.phr_vector[i].word_vector[i1].syl_vector[i2].phone_vector.GetSize(); i3++) { CFSWString w = u.phr_vector[i].word_vector[i1].syl_vector[i2].phone_vector[i3].phone; while (w.GetLength() < 6) w += sp; fprintf(stderr, "\t\t%s", ccstr(w)); wprintf(L"%i %i %i %i\t", u.phra_c, u.word_c, u.syl_c, u.phone_c); wprintf(L"%i %i %i [%i]\t", u.phr_vector[i].word_c, u.phr_vector[i].syl_c, u.phr_vector[i].phone_c, u.phr_vector[i].utt_p); wprintf(L"%i %i [%i %i]\t", u.phr_vector[i].word_vector[i1].syl_c, u.phr_vector[i].word_vector[i1].phone_c, u.phr_vector[i].word_vector[i1].utt_p, u.phr_vector[i].word_vector[i1].phr_p ); wprintf(L"%i [%i %i %i]\t", u.phr_vector[i].word_vector[i1].syl_vector[i2].phone_c, u.phr_vector[i].word_vector[i1].syl_vector[i2].utt_p, u.phr_vector[i].word_vector[i1].syl_vector[i2].phr_p, u.phr_vector[i].word_vector[i1].syl_vector[i2].word_p ); phone_struct p = u.phr_vector[i].word_vector[i1].syl_vector[i2].phone_vector[i3]; wprintf(L"[%i %i %i %i]\t", p.utt_p, p.phr_p, p.word_p, p.syl_p); wprintf(L"{ %i }", u.phr_vector[i].word_vector[i1].syl_vector[i2].stress); wprintf(L"\n"); } } } wprintf(L"\n"); }
void do_syls(word_struct &w) { CFSArray<syl_struct> sv, sv_temp; syl_struct ss; CFSArray<CFSWString> temp_arr, c_words; ss.phone_c = 0, ss.word_p = 0, ss.phr_p = 0, ss.utt_p = 0; w.syl_c = 0; INTPTR word_p = 1; explode(w.mi.m_szRoot, L"_", c_words); for (INTPTR cw = 0; cw < c_words.GetSize(); cw++) { CFSWString s = word_to_syls(c_words[cw]); //MINGI MUSTRITE ERROR paindliKkus s.Replace(L"K", L"k", 1); s.Replace(L"R", L"r", 1); s.Replace(L"V", L"v", 1); explode(s, d, temp_arr); ss.stress = 0; sv_temp.Cleanup(); for (INTPTR i = 0; i < temp_arr.GetSize(); i++) { ss.syl = temp_arr[i]; ss.stress = 0; // rõhu algväärtus ss.word_p = word_p++; sv_temp.AddItem(ss); } add_stress2(sv_temp, cw); for (INTPTR i = 0; i < sv_temp.GetSize(); i++) sv.AddItem(sv_temp[i]); } w.syl_vector = sv; }
CFSArray<CFSWString> do_utterances(CFSWString s) { CFSWString res = empty_str; CFSArray<CFSWString> res_array; if (s.GetLength() == 1) res_array.AddItem(s); else for (INTPTR i = 0; i < s.GetLength(); i++) { CFSWString c = s.GetAt(i); CFSWString pc = res.GetAt(res.GetLength() - 1); CFSWString nc = s.GetAt(i + 1); CFSWString nnc = s.GetAt(i + 2); if (is_ending(c) && is_whitespace(nc) && is_upper(nnc)) { res.Trim(); res_array.AddItem(res); res = empty_str; } else if (is_tab(c)) { if (res.GetLength() > 0) { res.Trim(); res_array.AddItem(res); res = empty_str; } } else res += c; } res.Trim(); if (res.GetLength() > 0) { while (is_ending(res.GetAt(res.GetLength() - 1))) { res.Delete(res.GetLength() - 1, 1); } res_array.AddItem(res); } for (INTPTR i=0; i < res_array.GetSize(); i++) { if (is_ending(res_array[i].GetAt(res_array[i].GetLength()-1))) res_array[i].Delete( res_array[i].GetLength()-1, 1 ); } return res_array; }
INTPTR do_phrases(utterance_struct &u) { phrase_struct p; CFSWString res; p.phone_c = 0; p.syl_c = 0; p.word_c = 0; for (INTPTR i = 0; i < u.s.GetLength(); i++) { CFSWString c = u.s.GetAt(i); CFSWString pc = res.GetAt(res.GetLength() - 1); CFSWString nc = u.s.GetAt(i + 1); CFSWString nnc = u.s.GetAt(i + 2); if ((is_comma(c) || is_colon(c) || is_semicolon(c)) && is_space(nc) && is_char(nnc)) { res.Trim(); if (res.GetLength() > 0) { push_ph_res(u, p, res); } } else if (is_bbracket(c)) { res.Trim(); if (res.GetLength() > 0) { push_ph_res(u, p, res); } p.s = L"sulgudes"; u.phr_vector.AddItem(p); } else if (is_ebracket(c)) { res.Trim(); if (res.GetLength() > 0) { push_ph_res(u, p, res); } } else if (is_space(c)) { // komatud sidesõnad CFSWString tempm = u.s.Mid(i + 1, -1); res.Trim(); if (is_conju(tempm.Left(tempm.Find(sp))) && res.GetLength() > 0) { push_ph_res(u, p, res); } else res += c; } else if (is_bhyphen(c)) { res.Trim(); if (res.GetLength() > 0 && ((is_char(pc) && is_space(nc)) || (is_space(nc) && is_char(nnc)) || (is_space(pc) && is_char(nc)))) { push_ph_res(u, p, res); } else res += c; } else res += c; } if (res.GetLength() > 0) { // if (is_ending(res.GetAt(res.GetLength() - 1))) { // res.Delete(res.GetLength() - 1, 1); // } push_ph_res(u, p, res); } return u.phr_vector.GetSize(); }
bool must (CFSWString c) { if (c.FindOneOf(L"eijäöü") > -1) return true; return false; }
void push_ph_res(utterance_struct &u, phrase_struct &p, CFSWString &res) { res.Trim(); p.s = res; u.phr_vector.AddItem(p); res = empty_str; } //do_phrases osa
bool can_palat_vr(CFSWString c) { if (c.FindOneOf(L"bdfghklmnprstv") > -1) return true; return false; }
int CSuggestor::Suggest(const CFSWString &szWord, bool bStartSentence){ m_TimeStart=CFSTime::Now(); m_Items.Cleanup(); m_Cap.SetCap(szWord); if (bStartSentence && m_Cap.GetCapMode()==CFSStrCap<CFSWString>::CAP_LOWER) { m_Cap.SetCapMode(CFSStrCap<CFSWString>::CAP_INITIAL); } CFSWString szWordHigh=szWord.ToUpper(); INTPTR ipWordLength=szWordHigh.GetLength(); CFSWString szTemp; INTPTR i, j; long lLevel=100; SetLevel(lLevel); // Case problems & change list i=SpellWord(szWordHigh, szTemp, &lLevel); if ((i==SPL_NOERROR || i==SPL_CHANGEONCE) && !szTemp.IsEmpty()){ SetLevel(GetLevelGroup(lLevel)); m_Items.AddItem(CSuggestorItem(szTemp, lLevel)); } else SetLevel(5); // Abbrevations // !!! Unimplemented // Quotes /* if (ipWordLength>=2 && (szAllQuot.Find(szWordHigh[0])>=0 || szAllQuot.Find(szWordHigh[ipWordLength-1])>=0)) { szTemp=szWordHigh; int iPos; if (szAllQuot.Find(szTemp[0])>=0){ if (szQuotLeft.Find(szTemp[0])>=0) { } else if ((iPos=szQuotRight.Find(szTemp[0]))>=0) { szTemp[0]=szQuotLeft[iPos]; } else if (szDQuotLeft.Find(szTemp[0])>=0) { } else if ((iPos=szDQuotRight.Find(szTemp[0]))>=0) { szTemp[0]=szDQuotLeft[iPos]; } if (szAllQuot.Find(szTemp[ipWordLength-1])>=0) { szTemp[ipWordLength-1]=(szQuotRight+szDQuotRight)[(szQuotLeft+szDQuotLeft).Find(szTemp[0])]; } else{ if (szQuotRight.Find(szTemp[ipWordLength-1])>=0) { } else if ((iPos=szQuotLeft.Find(szTemp[ipWordLength-1]))>=0) { szTemp[ipWordLength-1]=szQuotRight[iPos]; } else if (szDQuotRight.Find(szTemp[ipWordLength-1])>=0) { } else if ((iPos=szDQuotLeft.Find(szTemp[ipWordLength-1]))>=0) { szTemp[ipWordLength-1]=szDQuotRight[iPos]; } } CheckAndAdd(szTemp); }*/ // Add space for (i=1; i<ipWordLength-1; i++){ static CFSWString szPunktuation=FSWSTR(".:,;!?"); if (szPunktuation.Find(szWord[i])>=0){ long lLevel1, lLevel2; CFSWString szTemp1, szTemp2; if (SpellWord(szWord.Left(i+1), szTemp1, &lLevel1)==SPL_NOERROR && SpellWord(szWord.Mid(i+1), szTemp2, &lLevel2)==SPL_NOERROR) { m_Items.AddItem(CSuggestorItem(szWord.Left(i+1)+L' '+szWord.Mid(i+1), FSMAX(lLevel1, lLevel2))); } } } // Delete following blocks: le[nnu][nnu]jaam for (i=2; i<=3; i++){ for (j=0; j<ipWordLength-i-i; j++){ if (memcmp((const FSWCHAR *)szWordHigh+j, (const FSWCHAR *)szWordHigh+j+i, i*sizeof(FSWCHAR))==0){ szTemp=szWordHigh.Left(j)+szWordHigh.Mid(j+i); CheckAndAdd(szTemp); } } } // Change following letters: abb -> aab & aab -> abb for (i=1; i<ipWordLength-1; i++){ if (szWordHigh[i]==szWordHigh[i+1]){ szTemp=szWordHigh; szTemp[i]=szTemp[i-1]; if (FSIsLetterEst(szTemp[i])) CheckAndAdd(szTemp); } else if (szWordHigh[i]==szWordHigh[i-1]){ szTemp=szWordHigh; szTemp[i]=szTemp[i+1]; if (FSIsLetterEst(szTemp[i])) CheckAndAdd(szTemp); } } // Exchange letters: van[na]ema -> van[an]ema szTemp=szWordHigh; for (i=1; i<ipWordLength; i++){ if (szTemp[i]!=szTemp[i-1]){ FSWCHAR ch=szTemp[i]; szTemp[i]=szTemp[i-1]; szTemp[i-1]=ch; CheckAndAdd(szTemp); szTemp[i-1]=szTemp[i]; szTemp[i]=ch; } } // Change blocks for (i=0; i<ipWordLength; i++){ for (j=0; j<(INTPTR)(sizeof(ChangeStrings)/sizeof(__CChangeStrings)); j++){ if (szWordHigh.ContainsAt(i, ChangeStrings[j].m_lpszFrom)){ szTemp=szWordHigh.Left(i)+ChangeStrings[j].m_lpszTo+szWordHigh.Mid(i+FSStrLen(ChangeStrings[j].m_lpszFrom)); CheckAndAdd(szTemp); } } } // Change end blocks for (i=0; i<(INTPTR)(sizeof(ChangeStringsEnd)/sizeof(__CChangeStrings)); i++){ if (szWordHigh.EndsWith(ChangeStringsEnd[i].m_lpszFrom)){ szTemp=szWordHigh.Left(ipWordLength-FSStrLen(ChangeStringsEnd[i].m_lpszFrom))+ChangeStringsEnd[i].m_lpszTo; CheckAndAdd(szTemp); } } // Po~o~sas MultiReplace(szWordHigh, 0); // gi/ki: Kylli[gi]le -> Kyllile[gi] for (i=3; i<=6; i++){ if (i>ipWordLength) break; if (memcmp((const FSWCHAR *)szWordHigh+ipWordLength-i, FSWSTR("GI"), 2*sizeof(FSWCHAR))==0){ szTemp=szWordHigh.Left(ipWordLength-i)+szWordHigh.Mid(ipWordLength-i+2)+FSWSTR("GI"); CheckAndAdd(szTemp); szTemp=szWordHigh.Left(ipWordLength-i)+szWordHigh.Mid(ipWordLength-i+2)+FSWSTR("KI"); CheckAndAdd(szTemp); } } // Delete letters: van[n]aema -> vanaema szTemp=szWordHigh.Mid(1); CheckAndAdd(szTemp); for (i=0; i<ipWordLength-1; i++){ if (szTemp[i]!=szWordHigh[i]){ szTemp[i]=szWordHigh[i]; CheckAndAdd(szTemp); } } // Change letters from list for (i=0; i<ipWordLength; i++){ const FSWCHAR *lpszTo=__SuggestChangeLetters(szWordHigh[i]); if (!lpszTo) continue; szTemp=szWordHigh; for (; lpszTo[0]; lpszTo++){ szTemp[i]=lpszTo[0]; CheckAndAdd(szTemp); } } // Insert letters to word body for (i=1; i<ipWordLength; i++){ szTemp=szWordHigh.Left(i)+FSWSTR(' ')+szWordHigh.Mid(i); for (j=0; szInsertLetters[j]; j++){ szTemp[i]=szInsertLetters[j]; CheckAndAdd(szTemp); } } // Insert letters to the beginning szTemp=CFSWString(FSWSTR(" "))+szWordHigh; for (i=0; szInsertLettersBeg[i]; i++){ if (szTemp[1]==szInsertLettersBeg[i]) continue; szTemp[0]=szInsertLettersBeg[i]; CheckAndAdd(szTemp); } // Try apostrophe for names if (szWord[0]!=szWordHigh[0] && szWordHigh.Find('\'')<0){ for (i=0; i<5; i++){ if (i>=ipWordLength) break; szTemp=szWordHigh.Left(ipWordLength-i)+L'\''+szWordHigh.Mid(ipWordLength-i); CheckAndAdd(szTemp); } } Order(); RemoveImmoderate(); RemoveDuplicates(); return 0; }
CFSArray<CFSWString> do_all(CFSWString utt, bool print_label, bool print_utt) { CFSArray<CFSWString> res; CFSArray<CPTWord> PTW; utterance_struct u; u.s = utt.ToLower(); u.syl_c = 0; u.phone_c = 0; u.phra_c = do_phrases(u); INTPTR word_count = 0; if (print_utt) fprintf(stderr, "%s\n", ccstr(utt)); for (INTPTR i = 0; i < u.phr_vector.GetSize(); i++) { u.phr_vector[i].utt_p = i; phrase2words(u.phr_vector[i], PTW); word_count += u.phr_vector[i].word_c; } CFSArray<CMorphInfo> words; for (INTPTR i = 0; i < PTW.GetSize(); i++) { CMorphInfo MI; MI.m_szRoot = PTW[i].m_szWord; words.AddItem(MI); } u.word_c = words.GetSize(); word_struct w; INTPTR utt_phone_c = 1; INTPTR syl_utt_p = 1; INTPTR phone_utt_p = 1; for (INTPTR i = 0; i < u.phr_vector.GetSize(); i++) { u.phr_vector[i].utt_p = i + 1; INTPTR syl_phr_p = 1; INTPTR phone_phr_p = 1; INTPTR phrase_pho_c = 1; for (INTPTR i1 = 0; i1 < u.phr_vector[i].word_c; i1++) { w.utt_p = utt_phone_c++; w.phr_p = i1 + 1; w.mi = words[0]; w.mi.m_szRoot += make_char_string(w.mi.m_szEnding) + w.mi.m_szClitic; w.mi.m_szRoot = w.mi.m_szRoot.ToLower(); do_syls(w); u.phr_vector[i].word_vector.AddItem(w); INTPTR phone_word_p = 1; INTPTR word_pho_c = 1; for (INTPTR i2 = 0; i2 < u.phr_vector[i].word_vector[i1].syl_vector.GetSize(); i2++) { u.syl_c++; u.phr_vector[i].syl_c++; u.phr_vector[i].word_vector[i1].syl_c++; u.phr_vector[i].word_vector[i1].syl_vector[i2].phr_p = syl_phr_p++; u.phr_vector[i].word_vector[i1].syl_vector[i2].utt_p = syl_utt_p++; INTPTR syl_phone_c = 1; do_phones(u.phr_vector[i].word_vector[i1].syl_vector[i2]); for (INTPTR i3 = 0; i3 < u.phr_vector[i].word_vector[i1].syl_vector[i2].phone_vector.GetSize(); i3++) { u.phone_c++; u.phr_vector[i].phone_c = phrase_pho_c++; u.phr_vector[i].word_vector[i1].phone_c = word_pho_c++; u.phr_vector[i].word_vector[i1].syl_vector[i2].phone_c = syl_phone_c++; phone_struct p = u.phr_vector[i].word_vector[i1].syl_vector[i2].phone_vector[i3]; p.utt_p = phone_utt_p++; p.phr_p = phone_phr_p++; p.word_p = phone_word_p++; u.phr_vector[i].word_vector[i1].syl_vector[i2].phone_vector[i3] = p; } } words.RemoveItem(0, 1); } } if (print_label) print_u(u); res = do_label(u); return res; }
bool can_palat(CFSWString c) { if (c.FindOneOf(L"BDFGHKLMNPRSTV") > -1) return true; return false; }
CFSWString chars_to_phones_part_I(CFSWString &s) { CFSWString res; for (INTPTR i = 0; i < s.GetLength(); i++) { CFSWString c = s.GetAt(i); if (c == L']') { CFSWString t = CFSWString(s.GetAt(i - 1)).ToUpper(); res.SetAt(res.GetLength() - 1, t.GetAt(0)); //vaatab taha; pole kindel, et kas on vajalik t = CFSWString(s.GetAt(i - 2)).ToUpper(); if (can_palat(t)) { res.SetAt(res.GetLength() - 2, t.GetAt(0)); t = CFSWString(s.GetAt(i - 3)).ToUpper(); if (can_palat(t)) { res.SetAt(res.GetLength() - 2, t.GetAt(0)); } } //vaatab ette t = CFSWString(s.GetAt(i + 1)).ToUpper(); if (can_palat(t)) { s.SetAt(i + 1, t.GetAt(0)); t = CFSWString(s.GetAt(i + 2)).ToUpper(); if (can_palat(t)) { s.SetAt(i + 2, t.GetAt(0)); } } } else if (c == L'<') { CFSWString t = CFSWString(s.GetAt(i + 1)).ToUpper(); s.SetAt(i + 1, t.GetAt(0)); } else if (c == L'?') { }//Ebanormaalne rõhk. Pärast vaatab, mis teha else if (c == L'x') res += L"ks"; else if (c == L'y') res += L"i"; else if (c == L'w') res += L"v"; else if (c == L'z') res += L"ts"; else if (c == L'c') { res += L"k"; } else if (c == L'ü' && is_vowel(s.GetAt(i + 1)) && s.GetAt(i - 1) == L'ü') res += L"i"; else res += c; } return res; }
void CONV_HTML_UC2::ConvToUc( CFSWString& wStr, const CFSAString& aStr, const PFSCODEPAGE koodiTabel ) { wStr.Empty(); if(koodiTabel!=PFSCP_HTMLEXT) // Krutime Renee algoritmi j�rgi { wStr = FSStrAtoW(aStr, koodiTabel); // Kui teisendus k�ib Rene tabelite j�rgi, siis teeme �ra ja valmis return; } assert(koodiTabel==PFSCP_HTMLEXT); // Kasutame teisendamiseks failist loetud tabelit if(sgml2uc.idxLast<=0) throw VEAD(ERR_X_TYKK, ERR_ARGVAL, __FILE__,__LINE__, "$Revision: 557 $", "SGML olemite tabel mallu lugemata"); int l, n=aStr.GetLength(); for(l=0; l < n; l++) { if((aStr[l] & (~0x7F))!=0) // peab olema 7bitine ascii throw VEAD(ERR_X_TYKK, ERR_ARGVAL, __FILE__,__LINE__, "$Revision: 557 $", "String peab koosnema ASCII (7bitistest) koodidest", (const char*)aStr+l); if(aStr[l]!='&') // ei alusta SGML olemit... { tryki: wStr += ((FSWCHAR)(aStr[l])) & 0x7F; // ...l�heb niisama continue; } // V�ib alustada mingit SGML olemit - &blah; int lSemiPos=(int)aStr.Find(";", l+1); if(lSemiPos<0) // see ampersand ilma l�petava semita { if(ignoramp==true) goto tryki; throw VEAD(ERR_X_TYKK, ERR_ARGVAL, __FILE__,__LINE__, "$Revision: 557 $", "Ampersandi tagant semi puudu", (const char*)aStr+l); } if(autosgml==true && aStr[l+1]=='#') // teisenda &#[{x|X}]12345; s�mbolid { int tmp=0, j=l+2; if(aStr[j]=='x' || aStr[j]=='X') // teisenda 𒍅 ja 𒍅 hexakoodid { j++; //if(sscanf(((const char*)aStr)+j, "%x", &tmp)!=1) // throw VEAD(ERR_X_TYKK, ERR_ARGVAL, __FILE__,__LINE__, "$Revision: 557 $", // "Vigane SGML olem", (const char*)aStr+l); //for(; j<lSemiPos; j++) // { // if(strchr("0123456789aAbBcCdDeEfF", aStr[j])==NULL) // throw VEAD(ERR_X_TYKK, ERR_ARGVAL, __FILE__,__LINE__, "$Revision: 557 $", // "Vigane SGML olem", (const char*)aStr+l); // } j+=STRSOUP::UnsignedStr2Hex<int, char>(&tmp, ((const char*)aStr)+j); if(j<=0 || aStr[j]!=';') throw VEAD(ERR_X_TYKK, ERR_ARGVAL, __FILE__,__LINE__, "$Revision: 557 $", "Vigane SGML olem", (const char*)aStr+l); if(tmp>0xFFFF) throw VEAD(ERR_X_TYKK, ERR_ARGVAL, __FILE__,__LINE__, "$Revision: 557 $", "Vigane SGML olem (peab mahtuma 2 baidi peale)", (const char*)aStr+l); } else // teisenda 〹 ja 〹 k�mnendkoodid { //for(; j<lSemiPos; j++) // { // if(aStr[j]<'0' || aStr[j]>'9') // throw VEAD(ERR_X_TYKK, ERR_ARGVAL, __FILE__,__LINE__, "$Revision: 557 $", // "Vigane SGML olem (lubatud 0-9)", (const char*)aStr+l); // if((tmp=10*tmp+aStr[j]-'0')>0xFFFF) // throw VEAD(ERR_X_TYKK, ERR_ARGVAL, __FILE__,__LINE__, "$Revision: 557 $", // "Vigane SGML olem (peab mahtuma 2 baidi peale)", (const char*)aStr+l); // } j+=STRSOUP::UnsignedStr2Num<int, char>(&tmp, ((const char*)aStr)+j); if(j<=0 || aStr[j]!=';') throw VEAD(ERR_X_TYKK, ERR_ARGVAL, __FILE__,__LINE__, "$Revision: 557 $", "Vigane SGML olem", (const char*)aStr+l); if(tmp>0xFFFF) throw VEAD(ERR_X_TYKK, ERR_ARGVAL, __FILE__,__LINE__, "$Revision: 557 $", "Vigane SGML olem (peab mahtuma 2 baidi peale)", (const char*)aStr+l); } wStr += (WCHAR)tmp; l=lSemiPos; continue; } if(lSemiPos-l+1 > sgml_stringi_max_pikkus) // nii pikk ei saa olla tabelis { if(ignoramp==true) goto tryki; throw VEAD(ERR_X_TYKK, ERR_ARGVAL, __FILE__,__LINE__, "$Revision: 557 $", "Puudub SGML olemite tabelist", (const char*)aStr+l); } CFSAString szSymbol=aStr.Mid(l, lSemiPos-l+1); // l�ikame &bla; sisendstringist v�lja SGML_UC* rec; if((rec=sgml2uc.Get(&szSymbol))==NULL) // ei leidnud kahendtabelist - jama lahti { if(ignoramp==true) goto tryki; throw VEAD(ERR_X_TYKK, ERR_ARGVAL, __FILE__,__LINE__, "$Revision: 557 $", "Puudub SGML olemite tabelist", (const char*)szSymbol); } wStr += rec->uc; l=lSemiPos; } }
CFSWString the_shift(CFSWString s) { /* On mingi võimalus, et lihtsustus tuleb teha kahes astmes. LQ-ta ja LQ-ga (vt shift_pattern). Kõik seotud sellega, et pole vältenihutusreeglitest lõpuni aru saanud. Eksisteerib Mihkla versioon ja ametlik versioon. Tänud Mihklale, kes kala asemel annab tattninale õnge, see õpetab ujuma. Maadlesin õngega pikalt. */ CFSWString res; CFSWString code; INTPTR pos; INTPTR i = 0; INTPTR x; while (s.GetLength() > 0) { CFSWString c = s.GetAt(0); s.Delete(0, 1); if (is_uvowel(c)) { c = c.ToLower(); code += shift_pattern(c); res += c; pos = i; } else if (c == d && code.GetLength() > 0) { res += c; code += c; CFSWString t_code = code; t_code += shift_pattern(s.GetAt(0)); x = pattern_lookup(t_code); //orig üle silbipiiri if (x > -1) { x += pos; if (x > res.GetLength()) { // kui kargab järgmisse silpi x = x - res.GetLength(); s.Insert(x, colon); } else res.Insert(x, colon); i++; } else { t_code = simplify_pattern(t_code); x = pattern_lookup(t_code); //liht üle silbipiiri if (x > -1) { x += pos; if (x > res.GetLength()) { // kui kargab järgmisse silpi x = x - res.GetLength(); s.Insert(x, colon); } else res.Insert(x, colon); i++; } else { x = pattern_lookup(code); //orig if (x > -1) { x += pos; res.Insert(x, colon); i++; } else { code = simplify_pattern(code); x = pattern_lookup(code); //liht if (x > -1) { x += pos; res.Insert(x, colon); i++; } } } } code = empty_str; } else { res += c; if (code.GetLength() > 0) { code += shift_pattern(c); } } i++; } //while // sõna lõpus if (code.GetLength() > 0) { code += L"#"; //imelik koht ainult "lonksu" pärast if ((code.Left(3) == L"VLQ") && ((code.GetAt(3) == L's') || (code.GetAt(3) == L'h') || (code.GetAt(3) == L'v') || (code.GetAt(3) == L'j'))) { code = L"VLQC#"; } INTPTR x = pattern_lookup(code); if (x > -1) { x += pos; res.Insert(x, colon); } else { code = simplify_pattern(code); x = pattern_lookup(code); if (x > -1) { x += pos; res.Insert(x, colon); } } code = empty_str; } return res; }
bool can_palat(CFSWString c) { if (c.FindOneOf(L"DLNST") > -1) return true; return false; }
std::string to_stdstring(CFSWString s) { std::string res = ""; for (INTPTR i = 0; i < s.GetLength(); i++) res += s.GetAt(i); return res; }
CFSWString chars_to_phones_part_I(CFSWString &s) { /* müüa -> müia siia? Kuna vältemärgi nihutamise reeglid on ehitatud selliselt, et kohati kasutatakse foneeme ja kohati ei, siis tuleb täht->foneem teisendus teha kahes jaos. Palataliseerimine põhineb ideel, et palataliseeritud foneemi ümbruses ei saa olla palataliseeruvaid mittepalataliseeritud foneeme :D */ CFSWString res; for (INTPTR i = 0; i < s.GetLength(); i++) { CFSWString c = s.GetAt(i); if (c == L']') { CFSWString t = CFSWString(s.GetAt(i - 1)).ToUpper(); res.SetAt(res.GetLength() - 1, t.GetAt(0)); //vaatab taha; pole kindel, et kas on vajalik t = CFSWString(s.GetAt(i - 2)).ToUpper(); if (can_palat(t)) { res.SetAt(res.GetLength() - 2, t.GetAt(0)); t = CFSWString(s.GetAt(i - 3)).ToUpper(); if (can_palat(t)) { res.SetAt(res.GetLength() - 2, t.GetAt(0)); } } //vaatab ette t = CFSWString(s.GetAt(i + 1)).ToUpper(); if (can_palat(t)) { s.SetAt(i + 1, t.GetAt(0)); t = CFSWString(s.GetAt(i + 2)).ToUpper(); if (can_palat(t)) { s.SetAt(i + 2, t.GetAt(0)); } } } else if (c == L'<') { CFSWString t = CFSWString(s.GetAt(i + 1)).ToUpper(); s.SetAt(i + 1, t.GetAt(0)); } else if (c == L'?') { }//Ebanormaalne rõhk. Pärast vaatab, mis teha else if (c == L'x') res += L"ks"; else if (c == L'y') res += L"i"; else if (c == L'w') res += L"v"; else if (c == L'z') res += L"ts"; else if (c == L'c') { res += L"k"; } else if (c == L'ü' && is_vowel(s.GetAt(i + 1)) && s.GetAt(i - 1) == L'ü') res += L"i"; else if (c == L'q') { if (is_vowel(s.GetAt(i + 1))) { res += L"k"; s.SetAt(i + 1, L'v'); } else res += L"k"; } else res += c; } return res; }