Beispiel #1
0
void CSuggestor::MultiReplace(const CFSWString &szWord, INTPTR ipStartPos)
{
	if (ipStartPos>0) CheckAndAdd(szWord);
	INTPTR ipLength=szWord.GetLength();
	for (; ipStartPos<ipLength; ipStartPos++){
		for (INTPTR ip=0; ip<(INTPTR)(sizeof(ChangeStringsMultiple)/sizeof(__CChangeStrings)); ip++){
			if (szWord.ContainsAt(ipStartPos, ChangeStringsMultiple[ip].m_lpszFrom)){
				MultiReplace(szWord.Left(ipStartPos)+ChangeStringsMultiple[ip].m_lpszTo+szWord.Mid(ipStartPos+FSStrLen(ChangeStringsMultiple[ip].m_lpszFrom)), ipStartPos+FSStrLen(ChangeStringsMultiple[ip].m_lpszTo));
			}
		}
	}
}
Beispiel #2
0
CFSWString syllabify2(CFSWString s) {
    CFSWString res;

    for (INTPTR i = 0; i < s.GetLength(); i++) {
        CFSWString c = s.GetAt(i);
        if (is_consonant(c) && is_vowel(s.GetAt(i - 1)) && is_vowel(s.GetAt(i + 1)))
            res += d;
        if (is_vowel(c) && is_vowel(s.GetAt(i - 1)) && is_vowel(s.GetAt(i + 1)) && c.ToLower() == s.GetAt(i + 1))
            res += d;
        if (is_consonant(c) && is_consonant(s.GetAt(i - 1)) && is_vowel(s.GetAt(i + 1)) && has_vowel(res)) //küsitav
            res += d;
        res += c;
    }
    return res;
}
Beispiel #3
0
int CSuggestor::CheckAndAdd(const CFSWString &szWord)
{
	if (m_fTimeOut>0 && (CFSTime::Now()-m_TimeStart).GetSeconds()>=m_fTimeOut) return -1;
	if (szWord.IsEmpty()) return -1;
	CFSWString szTemp;
	long lLevel=100;
	if (SpellWord(szWord, szTemp, &lLevel)==SPL_NOERROR && !szTemp.IsEmpty()){
		szTemp=m_Cap.GetCap(szTemp);
		CFSWString szTemp2; long lLevel2;
		if (SpellWord(szTemp, szTemp2, &lLevel2)==SPL_NOERROR){
			SetLevel(GetLevelGroup(lLevel));
			m_Items.AddItem(CSuggestorItem(szTemp, lLevel)); return 0;
		}
	}
	return -1;
}
Beispiel #4
0
bool is_word(CFSWString s) {
    if ((has_vowel(s) == true) &&
            (s.GetLength() == make_char_string(s).GetLength()) &&
            (s.GetLength() > 1))
        return true;
    return false;
}
Beispiel #5
0
void PTWSplitBuffer(const CFSWString &szBuffer, CPTWordArray &Words)
{
	Words.Cleanup();
	INTPTR ipStartPos=0;
	INTPTR ipPos;
	for (ipPos=0; ipPos<szBuffer.GetLength(); ipPos++) {
		if (FSIsSpace(szBuffer[ipPos])) {
			if (ipPos>ipStartPos) {
				Words.AddItem(CPTWord(szBuffer.Mid(ipStartPos, ipPos-ipStartPos), ipStartPos));
			}
			ipStartPos=ipPos+1;
		}
	}
	if (ipPos>ipStartPos) {
		Words.AddItem(CPTWord(szBuffer.Mid(ipStartPos, ipPos-ipStartPos), ipStartPos));
	}
}
Beispiel #6
0
CFSWString palat_vru (CFSWString s) {
    CFSWString res;
    bool m = false;
    if (s == L"är'") res = L"ärq"; 
    else
    if (s == L"ar'") res = L"arq"; 
    else
    if (s == L"jäl'") res = L"jälq"; 
    else
    if (s == L"jal'") res = L"jalq"; 
    else
    if (s == L"kül'") res = L"külq"; 
    else
    if (s == L"pan'") res = L"panq"; 
    else
    if (s == L"tul'") res = L"tulq"; 
    else
    if (s == L"ol'") res = L"olq"; 
    else
        for (INTPTR i = s.GetLength()-1; i >= 0; i--) {
        CFSWString c = s.GetAt(i);
        if (c == L"'") {
            m = true;
        } else 
            if (must(c)) {
                res = c + res;
                m = true;
            }
            else
        if (m) {
            if (can_palat_vr(c)) {
                c = c.ToUpper();
                res = c + res;
            } else {
                res = c + res;
                m = false;
            }
        } else {
            res = c + res;
        }
        
       
    }    
    return res;
}
Beispiel #7
0
CFSWString shift_pattern(CFSWString s) {
    if (s == L'j') return L'j';
    else
        if (s == L'h') return L'h';
    else
        if (s == L'v') return L'v';
    else
        if (s.FindOneOf(L"sS") > -1) return L's';
    else
        if (s.FindOneOf(L"lmnrLN") > -1) return L'L';
    else
        if (s.FindOneOf(L"kptfšT") > -1) return L'Q';
    else
        if (is_vowel(s)) return L'V';
    else
        if (is_consonant(s)) return L'C';
    return s;
}
Beispiel #8
0
CFSWString DealWithText(CFSWString text) {
    /* Proovin kogu sõnniku minema loopida */
    CFSWString res;
    text.Trim();
    text.Replace(L"\n\n", L"\n", 1);    
    for (INTPTR i = 0; i < text.GetLength(); i++) {
        CFSWString c = text.GetAt(i);
        CFSWString pc = res.GetAt(res.GetLength() - 1);
        CFSWString nc = text.GetAt(i + 1);
        if (is_char(c)) res += c;
        else
            if (is_digit(c)) res += c;
        else
            if (is_hyphen(c) && is_char(pc) && is_char(nc)) res += sp;
        else
            if (is_symbol(c)) res += c;
        else
            if (is_colon(c) && !is_colon(pc)) res += c;
        else
            if (is_bbracket(c) && !is_bbracket(pc)) res += c;
        else
            if (is_ebracket(c) && is_ending(nc)) res += L"";
        else
            if (is_ebracket(c) && !is_ebracket(pc)) res += c;
        else
            if (is_comma(c) && !is_comma(pc)) res += c;
        else
            if (is_fchar(c)) res += replace_fchar(c);
        else
            if (is_space(c) && !is_whitespace(pc)) res += c;
        else
            if (is_break(c) && !is_break(pc)) { 
                
                res += c;   
            } //kahtlane
        else
            if (is_tab(c) && !is_whitespace(pc)) res += c;
        else
            if (is_ending(c) && !is_ending(pc) && !is_whitespace(pc)) res += c;

    }
    res.Trim();        
    return res;

}
Beispiel #9
0
void print_u(utterance_struct u) {
    for (INTPTR i = 0; i < u.phr_vector.GetSize(); i++)
        for (INTPTR i1 = 0; i1 < u.phr_vector[i].word_vector.GetSize(); i1++) {
            fprintf(stderr, "%s\n\n", ccstr(u.phr_vector[i].word_vector[i1].mi.m_szRoot));
            for (INTPTR i2 = 0; i2 < u.phr_vector[i].word_vector[i1].syl_vector.GetSize(); i2++) {
                fprintf(stderr, "\t%s\n", ccstr(u.phr_vector[i].word_vector[i1].syl_vector[i2].syl));

                for (INTPTR i3 = 0; i3 < u.phr_vector[i].word_vector[i1].syl_vector[i2].phone_vector.GetSize(); i3++) {
                    CFSWString w = u.phr_vector[i].word_vector[i1].syl_vector[i2].phone_vector[i3].phone;
                    while (w.GetLength() < 6) w += sp;
                    fprintf(stderr, "\t\t%s", ccstr(w));
                    wprintf(L"%i %i %i %i\t", u.phra_c, u.word_c, u.syl_c, u.phone_c);

                    wprintf(L"%i %i %i [%i]\t", u.phr_vector[i].word_c, u.phr_vector[i].syl_c, u.phr_vector[i].phone_c, u.phr_vector[i].utt_p);

                    wprintf(L"%i %i [%i %i]\t", 
                            u.phr_vector[i].word_vector[i1].syl_c,
                            u.phr_vector[i].word_vector[i1].phone_c,
                            u.phr_vector[i].word_vector[i1].utt_p,
                            u.phr_vector[i].word_vector[i1].phr_p
                            );

                    wprintf(L"%i [%i %i %i]\t", 
                            u.phr_vector[i].word_vector[i1].syl_vector[i2].phone_c,
                            u.phr_vector[i].word_vector[i1].syl_vector[i2].utt_p,
                            u.phr_vector[i].word_vector[i1].syl_vector[i2].phr_p,
                            u.phr_vector[i].word_vector[i1].syl_vector[i2].word_p
                            );

                    phone_struct p = u.phr_vector[i].word_vector[i1].syl_vector[i2].phone_vector[i3];
                    wprintf(L"[%i %i %i %i]\t", p.utt_p, p.phr_p, p.word_p, p.syl_p);

                    wprintf(L"{ %i }", u.phr_vector[i].word_vector[i1].syl_vector[i2].stress);

                    wprintf(L"\n");
                }
            }
        }
    wprintf(L"\n");
}
Beispiel #10
0
void do_syls(word_struct &w) {
    CFSArray<syl_struct> sv, sv_temp;
    syl_struct ss;
    CFSArray<CFSWString> temp_arr, c_words;
    ss.phone_c = 0, ss.word_p = 0, ss.phr_p = 0, ss.utt_p = 0;
    w.syl_c = 0;
    INTPTR word_p = 1;

    explode(w.mi.m_szRoot, L"_", c_words);

    for (INTPTR cw = 0; cw < c_words.GetSize(); cw++) {

        CFSWString s = word_to_syls(c_words[cw]);

        //MINGI MUSTRITE ERROR paindliKkus
        s.Replace(L"K", L"k", 1);
        s.Replace(L"R", L"r", 1);
        s.Replace(L"V", L"v", 1);

        explode(s, d, temp_arr);
        ss.stress = 0;
        sv_temp.Cleanup();
        for (INTPTR i = 0; i < temp_arr.GetSize(); i++) {
            ss.syl = temp_arr[i];
            ss.stress = 0; // rõhu algväärtus
            ss.word_p = word_p++;
            sv_temp.AddItem(ss);
        }

        add_stress2(sv_temp, cw);
        for (INTPTR i = 0; i < sv_temp.GetSize(); i++)
            sv.AddItem(sv_temp[i]);
    }

    w.syl_vector = sv;

}
Beispiel #11
0
CFSArray<CFSWString> do_utterances(CFSWString s) {
    CFSWString res = empty_str;
    CFSArray<CFSWString> res_array;

    if (s.GetLength() == 1)
        res_array.AddItem(s);
    else
        for (INTPTR i = 0; i < s.GetLength(); i++) {
            CFSWString c = s.GetAt(i);
            CFSWString pc = res.GetAt(res.GetLength() - 1);
            CFSWString nc = s.GetAt(i + 1);
            CFSWString nnc = s.GetAt(i + 2);

            if (is_ending(c) && is_whitespace(nc) && is_upper(nnc)) {
                res.Trim();
                res_array.AddItem(res);
                res = empty_str;
            } else
                if (is_tab(c)) {
                if (res.GetLength() > 0) {
                    res.Trim();
                    res_array.AddItem(res);
                    res = empty_str;
                }
            } else
                res += c;
        }
    res.Trim();
    
    if (res.GetLength() > 0) {
        while (is_ending(res.GetAt(res.GetLength() - 1))) {
            res.Delete(res.GetLength() - 1, 1);
        }
        
        res_array.AddItem(res);
    }

    for (INTPTR i=0; i < res_array.GetSize(); i++) {
        if (is_ending(res_array[i].GetAt(res_array[i].GetLength()-1))) 
            res_array[i].Delete(  res_array[i].GetLength()-1, 1   );

    }    
    return res_array;
}
Beispiel #12
0
INTPTR do_phrases(utterance_struct &u) {
    phrase_struct p;
    CFSWString res;
    p.phone_c = 0;
    p.syl_c = 0;
    p.word_c = 0;

    for (INTPTR i = 0; i < u.s.GetLength(); i++) {
        CFSWString c = u.s.GetAt(i);
        CFSWString pc = res.GetAt(res.GetLength() - 1);
        CFSWString nc = u.s.GetAt(i + 1);
        CFSWString nnc = u.s.GetAt(i + 2);
        if ((is_comma(c) || is_colon(c) || is_semicolon(c)) && is_space(nc) && is_char(nnc)) {
            res.Trim();
            if (res.GetLength() > 0) {
                push_ph_res(u, p, res);
            }
        } else
            if (is_bbracket(c)) {
               res.Trim();
            if (res.GetLength() > 0) {                
                push_ph_res(u, p, res);
            }
               p.s = L"sulgudes";
               u.phr_vector.AddItem(p);

        } else
            if (is_ebracket(c)) {
             res.Trim();
            if (res.GetLength() > 0) {                
                push_ph_res(u, p, res);
            }
        } else
            if (is_space(c)) { // komatud sidesõnad
            CFSWString tempm = u.s.Mid(i + 1, -1);
            res.Trim();
            if (is_conju(tempm.Left(tempm.Find(sp))) && res.GetLength() > 0) {
                push_ph_res(u, p, res);
            } else
                res += c;
        } else
            if (is_bhyphen(c)) {
            res.Trim();
            if (res.GetLength() > 0 && ((is_char(pc) && is_space(nc)) || (is_space(nc) && is_char(nnc)) || (is_space(pc) && is_char(nc)))) {
                push_ph_res(u, p, res);
            } else
                res += c;
        } else
            res += c;
    }

    if (res.GetLength() > 0) {
        //        if (is_ending(res.GetAt(res.GetLength() - 1))) {
        //            res.Delete(res.GetLength() - 1, 1);
        //        }        
        push_ph_res(u, p, res);
    }        
    return u.phr_vector.GetSize();
}
Beispiel #13
0
bool must (CFSWString c) {
	if (c.FindOneOf(L"eijäöü") > -1)	return true;
		return false;
}
Beispiel #14
0
void push_ph_res(utterance_struct &u, phrase_struct &p, CFSWString &res) {
    res.Trim();
    p.s = res;
    u.phr_vector.AddItem(p);
    res = empty_str;
} //do_phrases osa
Beispiel #15
0
bool can_palat_vr(CFSWString c) {
    if (c.FindOneOf(L"bdfghklmnprstv") > -1) return true;
    return false;
}
Beispiel #16
0
int CSuggestor::Suggest(const CFSWString &szWord, bool bStartSentence){
	m_TimeStart=CFSTime::Now();
	m_Items.Cleanup();

	m_Cap.SetCap(szWord);
	if (bStartSentence && m_Cap.GetCapMode()==CFSStrCap<CFSWString>::CAP_LOWER) {
		m_Cap.SetCapMode(CFSStrCap<CFSWString>::CAP_INITIAL);
	}

	CFSWString szWordHigh=szWord.ToUpper();
	INTPTR ipWordLength=szWordHigh.GetLength();
	CFSWString szTemp;
	INTPTR i, j;
	long lLevel=100;
	SetLevel(lLevel);

	// Case problems & change list
	i=SpellWord(szWordHigh, szTemp, &lLevel);
	if ((i==SPL_NOERROR || i==SPL_CHANGEONCE) && !szTemp.IsEmpty()){
		SetLevel(GetLevelGroup(lLevel));
		m_Items.AddItem(CSuggestorItem(szTemp, lLevel));
	}
	else SetLevel(5);
	
	// Abbrevations
	// !!! Unimplemented

	// Quotes
/*	if (ipWordLength>=2 && 
		(szAllQuot.Find(szWordHigh[0])>=0 || szAllQuot.Find(szWordHigh[ipWordLength-1])>=0))
	{
		szTemp=szWordHigh;
		int iPos;
		if (szAllQuot.Find(szTemp[0])>=0){
			if (szQuotLeft.Find(szTemp[0])>=0) { }
			else if ((iPos=szQuotRight.Find(szTemp[0]))>=0) { szTemp[0]=szQuotLeft[iPos]; }
			else if (szDQuotLeft.Find(szTemp[0])>=0) { }
			else if ((iPos=szDQuotRight.Find(szTemp[0]))>=0) { szTemp[0]=szDQuotLeft[iPos]; }

			if (szAllQuot.Find(szTemp[ipWordLength-1])>=0) { szTemp[ipWordLength-1]=(szQuotRight+szDQuotRight)[(szQuotLeft+szDQuotLeft).Find(szTemp[0])];
		}
		else{
			if (szQuotRight.Find(szTemp[ipWordLength-1])>=0) { }
			else if ((iPos=szQuotLeft.Find(szTemp[ipWordLength-1]))>=0) { szTemp[ipWordLength-1]=szQuotRight[iPos]; }
			else if (szDQuotRight.Find(szTemp[ipWordLength-1])>=0) { }
			else if ((iPos=szDQuotLeft.Find(szTemp[ipWordLength-1]))>=0) { szTemp[ipWordLength-1]=szDQuotRight[iPos]; }
		}
		CheckAndAdd(szTemp);
	}*/

	// Add space
	for (i=1; i<ipWordLength-1; i++){
		static CFSWString szPunktuation=FSWSTR(".:,;!?");
		if (szPunktuation.Find(szWord[i])>=0){
			long lLevel1, lLevel2;
			CFSWString szTemp1, szTemp2;
			if (SpellWord(szWord.Left(i+1), szTemp1, &lLevel1)==SPL_NOERROR &&
				SpellWord(szWord.Mid(i+1), szTemp2, &lLevel2)==SPL_NOERROR)
			{
				m_Items.AddItem(CSuggestorItem(szWord.Left(i+1)+L' '+szWord.Mid(i+1), FSMAX(lLevel1, lLevel2)));
			}
		}
	}

	// Delete following blocks: le[nnu][nnu]jaam
	for (i=2; i<=3; i++){
		for (j=0; j<ipWordLength-i-i; j++){
			if (memcmp((const FSWCHAR *)szWordHigh+j, (const FSWCHAR *)szWordHigh+j+i, i*sizeof(FSWCHAR))==0){
				szTemp=szWordHigh.Left(j)+szWordHigh.Mid(j+i);
				CheckAndAdd(szTemp);
			}
		}
	}

	// Change following letters: abb -> aab & aab -> abb
	for (i=1; i<ipWordLength-1; i++){
		if (szWordHigh[i]==szWordHigh[i+1]){
			szTemp=szWordHigh;
			szTemp[i]=szTemp[i-1];
			if (FSIsLetterEst(szTemp[i])) CheckAndAdd(szTemp);
		}
		else if (szWordHigh[i]==szWordHigh[i-1]){
			szTemp=szWordHigh;
			szTemp[i]=szTemp[i+1];
			if (FSIsLetterEst(szTemp[i])) CheckAndAdd(szTemp);
		}
	}

	// Exchange letters: van[na]ema -> van[an]ema
	szTemp=szWordHigh;
	for (i=1; i<ipWordLength; i++){
		if (szTemp[i]!=szTemp[i-1]){
			FSWCHAR ch=szTemp[i];
			szTemp[i]=szTemp[i-1];
			szTemp[i-1]=ch;
			CheckAndAdd(szTemp);
			szTemp[i-1]=szTemp[i];
			szTemp[i]=ch;
		}
	}

	// Change blocks
	for (i=0; i<ipWordLength; i++){
		for (j=0; j<(INTPTR)(sizeof(ChangeStrings)/sizeof(__CChangeStrings)); j++){
			if (szWordHigh.ContainsAt(i, ChangeStrings[j].m_lpszFrom)){
				szTemp=szWordHigh.Left(i)+ChangeStrings[j].m_lpszTo+szWordHigh.Mid(i+FSStrLen(ChangeStrings[j].m_lpszFrom));
				CheckAndAdd(szTemp);
			}
		}
	}

	// Change end blocks
	for (i=0; i<(INTPTR)(sizeof(ChangeStringsEnd)/sizeof(__CChangeStrings)); i++){
		if (szWordHigh.EndsWith(ChangeStringsEnd[i].m_lpszFrom)){
			szTemp=szWordHigh.Left(ipWordLength-FSStrLen(ChangeStringsEnd[i].m_lpszFrom))+ChangeStringsEnd[i].m_lpszTo;
			CheckAndAdd(szTemp);
		}
	}

	// Po~o~sas
	MultiReplace(szWordHigh, 0);

	// gi/ki: Kylli[gi]le -> Kyllile[gi]
	for (i=3; i<=6; i++){
		if (i>ipWordLength) break;
		if (memcmp((const FSWCHAR *)szWordHigh+ipWordLength-i, FSWSTR("GI"), 2*sizeof(FSWCHAR))==0){
			szTemp=szWordHigh.Left(ipWordLength-i)+szWordHigh.Mid(ipWordLength-i+2)+FSWSTR("GI");
			CheckAndAdd(szTemp);
			szTemp=szWordHigh.Left(ipWordLength-i)+szWordHigh.Mid(ipWordLength-i+2)+FSWSTR("KI");
			CheckAndAdd(szTemp);
		}
	}

	// Delete letters: van[n]aema -> vanaema
	szTemp=szWordHigh.Mid(1);
	CheckAndAdd(szTemp);
	for (i=0; i<ipWordLength-1; i++){
		if (szTemp[i]!=szWordHigh[i]){
			szTemp[i]=szWordHigh[i];
			CheckAndAdd(szTemp);
		}
	}

	// Change letters from list
	for (i=0; i<ipWordLength; i++){
		const FSWCHAR *lpszTo=__SuggestChangeLetters(szWordHigh[i]);
		if (!lpszTo) continue;
		szTemp=szWordHigh;
		for (; lpszTo[0]; lpszTo++){
			szTemp[i]=lpszTo[0];
			CheckAndAdd(szTemp);
		}
	}
	
	// Insert letters to word body
	for (i=1; i<ipWordLength; i++){
		szTemp=szWordHigh.Left(i)+FSWSTR(' ')+szWordHigh.Mid(i);
		for (j=0; szInsertLetters[j]; j++){
			szTemp[i]=szInsertLetters[j];
			CheckAndAdd(szTemp);
		}
	}

	// Insert letters to the beginning
	szTemp=CFSWString(FSWSTR(" "))+szWordHigh;
	for (i=0; szInsertLettersBeg[i]; i++){
		if (szTemp[1]==szInsertLettersBeg[i]) continue;
		szTemp[0]=szInsertLettersBeg[i];
		CheckAndAdd(szTemp);
	}

	// Try apostrophe for names
	if (szWord[0]!=szWordHigh[0] && szWordHigh.Find('\'')<0){
		for (i=0; i<5; i++){
			if (i>=ipWordLength) break;
			szTemp=szWordHigh.Left(ipWordLength-i)+L'\''+szWordHigh.Mid(ipWordLength-i);
			CheckAndAdd(szTemp);
		}
	}

	Order();
	RemoveImmoderate();
	RemoveDuplicates();
	return 0;
}
Beispiel #17
0
CFSArray<CFSWString> do_all(CFSWString utt, bool print_label, bool print_utt) {
    CFSArray<CFSWString> res;
    CFSArray<CPTWord> PTW;
    utterance_struct u;
    

    u.s = utt.ToLower();
    u.syl_c = 0;
    u.phone_c = 0;
    u.phra_c = do_phrases(u);
    INTPTR word_count = 0;
    
    if (print_utt) fprintf(stderr, "%s\n", ccstr(utt));
       
    for (INTPTR i = 0; i < u.phr_vector.GetSize(); i++) {
        u.phr_vector[i].utt_p = i;        
        phrase2words(u.phr_vector[i], PTW);
        word_count += u.phr_vector[i].word_c;        
        
    }
    

    CFSArray<CMorphInfo> words;
    for (INTPTR i = 0; i < PTW.GetSize(); i++) {
        CMorphInfo MI;
        MI.m_szRoot = PTW[i].m_szWord;
        words.AddItem(MI);
    }
        
    
    u.word_c = words.GetSize();


    word_struct w;

    INTPTR utt_phone_c = 1;
    INTPTR syl_utt_p = 1;
    INTPTR phone_utt_p = 1;



    for (INTPTR i = 0; i < u.phr_vector.GetSize(); i++) {
        u.phr_vector[i].utt_p = i + 1;
        INTPTR syl_phr_p = 1;
        INTPTR phone_phr_p = 1;
        INTPTR phrase_pho_c = 1;
        for (INTPTR i1 = 0; i1 < u.phr_vector[i].word_c; i1++) {
            
            w.utt_p = utt_phone_c++;
            w.phr_p = i1 + 1;
            w.mi = words[0];
            w.mi.m_szRoot += make_char_string(w.mi.m_szEnding) + w.mi.m_szClitic;
            w.mi.m_szRoot = w.mi.m_szRoot.ToLower();
            do_syls(w);
            u.phr_vector[i].word_vector.AddItem(w);

            INTPTR phone_word_p = 1;
            INTPTR word_pho_c = 1;
            for (INTPTR i2 = 0; i2 < u.phr_vector[i].word_vector[i1].syl_vector.GetSize(); i2++) {
                u.syl_c++;
                u.phr_vector[i].syl_c++;
                u.phr_vector[i].word_vector[i1].syl_c++;
                u.phr_vector[i].word_vector[i1].syl_vector[i2].phr_p = syl_phr_p++;
                u.phr_vector[i].word_vector[i1].syl_vector[i2].utt_p = syl_utt_p++;
                INTPTR syl_phone_c = 1;

                do_phones(u.phr_vector[i].word_vector[i1].syl_vector[i2]);

                for (INTPTR i3 = 0; i3 < u.phr_vector[i].word_vector[i1].syl_vector[i2].phone_vector.GetSize(); i3++) {
                    u.phone_c++;
                    u.phr_vector[i].phone_c = phrase_pho_c++;
                    u.phr_vector[i].word_vector[i1].phone_c = word_pho_c++;
                    u.phr_vector[i].word_vector[i1].syl_vector[i2].phone_c = syl_phone_c++;
                    phone_struct p = u.phr_vector[i].word_vector[i1].syl_vector[i2].phone_vector[i3];
                    p.utt_p = phone_utt_p++;
                    p.phr_p = phone_phr_p++;
                    p.word_p = phone_word_p++;
                    u.phr_vector[i].word_vector[i1].syl_vector[i2].phone_vector[i3] = p;

                }

            }
            words.RemoveItem(0, 1);
        }
    }




    if (print_label) print_u(u);
    res = do_label(u);
    return res;
}
Beispiel #18
0
bool can_palat(CFSWString c) {
    if (c.FindOneOf(L"BDFGHKLMNPRSTV") > -1) return true;
    return false;
}
Beispiel #19
0
CFSWString chars_to_phones_part_I(CFSWString &s) {
    CFSWString res;
    for (INTPTR i = 0; i < s.GetLength(); i++) {
        CFSWString c = s.GetAt(i);
        if (c == L']') {
            CFSWString t = CFSWString(s.GetAt(i - 1)).ToUpper();
            res.SetAt(res.GetLength() - 1, t.GetAt(0));
            //vaatab taha; pole kindel, et kas on vajalik
            t = CFSWString(s.GetAt(i - 2)).ToUpper();
            if (can_palat(t)) {
                res.SetAt(res.GetLength() - 2, t.GetAt(0));
                t = CFSWString(s.GetAt(i - 3)).ToUpper();
                if (can_palat(t)) {
                    res.SetAt(res.GetLength() - 2, t.GetAt(0));
                }
            }
            //vaatab ette					
            t = CFSWString(s.GetAt(i + 1)).ToUpper();
            if (can_palat(t)) {
                s.SetAt(i + 1, t.GetAt(0));
                t = CFSWString(s.GetAt(i + 2)).ToUpper();
                if (can_palat(t)) {
                    s.SetAt(i + 2, t.GetAt(0));
                }
            }
        } else
            if (c == L'<') {
            CFSWString t = CFSWString(s.GetAt(i + 1)).ToUpper();
            s.SetAt(i + 1, t.GetAt(0));
        } else
            if (c == L'?') {
        }//Ebanormaalne rõhk. Pärast vaatab, mis teha
        else
            if (c == L'x') res += L"ks";
        else
            if (c == L'y') res += L"i";
        else
            if (c == L'w') res += L"v";
        else
            if (c == L'z') res += L"ts";
        else
            if (c == L'c') {
            res += L"k";
        } else
            if (c == L'ü' && is_vowel(s.GetAt(i + 1)) && s.GetAt(i - 1) == L'ü')
            res += L"i";
        else
            res += c;
    }
    return res;
}
Beispiel #20
0
void CONV_HTML_UC2::ConvToUc(
    CFSWString& wStr,
    const CFSAString& aStr,
    const PFSCODEPAGE koodiTabel
)
{
    wStr.Empty();
    if(koodiTabel!=PFSCP_HTMLEXT) // Krutime Renee algoritmi j�rgi
    {
        wStr = FSStrAtoW(aStr, koodiTabel); // Kui teisendus k�ib Rene tabelite j�rgi, siis teeme �ra ja valmis
        return;
    }
    assert(koodiTabel==PFSCP_HTMLEXT); // Kasutame teisendamiseks failist loetud tabelit
    if(sgml2uc.idxLast<=0)
        throw VEAD(ERR_X_TYKK, ERR_ARGVAL, __FILE__,__LINE__, "$Revision: 557 $",
                   "SGML olemite tabel mallu lugemata");
    int l, n=aStr.GetLength();
    for(l=0; l < n; l++)
    {
        if((aStr[l] & (~0x7F))!=0) // peab olema 7bitine ascii
            throw VEAD(ERR_X_TYKK, ERR_ARGVAL, __FILE__,__LINE__, "$Revision: 557 $",
                       "String peab koosnema ASCII (7bitistest) koodidest", (const char*)aStr+l);
        if(aStr[l]!='&') // ei alusta SGML olemit...
        {
tryki:
            wStr += ((FSWCHAR)(aStr[l])) & 0x7F; // ...l�heb niisama
            continue;
        }
        // V�ib alustada mingit SGML olemit - &blah;
        int lSemiPos=(int)aStr.Find(";", l+1);
        if(lSemiPos<0) // see ampersand ilma l�petava semita
        {
            if(ignoramp==true)
                goto tryki;
            throw VEAD(ERR_X_TYKK, ERR_ARGVAL, __FILE__,__LINE__, "$Revision: 557 $",
                       "Ampersandi tagant semi puudu", (const char*)aStr+l);
        }
        if(autosgml==true && aStr[l+1]=='#') // teisenda &#[{x|X}]12345; s�mbolid
        {
            int tmp=0, j=l+2;
            if(aStr[j]=='x' || aStr[j]=='X')    // teisenda &#x12345; ja &#X12345; hexakoodid
            {
                j++;
                //if(sscanf(((const char*)aStr)+j, "%x", &tmp)!=1)
                //        throw VEAD(ERR_X_TYKK, ERR_ARGVAL, __FILE__,__LINE__, "$Revision: 557 $",
                //                    "Vigane SGML olem", (const char*)aStr+l);
                //for(; j<lSemiPos; j++)
                //    {
                //    if(strchr("0123456789aAbBcCdDeEfF", aStr[j])==NULL)
                //       throw VEAD(ERR_X_TYKK, ERR_ARGVAL, __FILE__,__LINE__, "$Revision: 557 $",
                //                    "Vigane SGML olem", (const char*)aStr+l);
                //    }
                j+=STRSOUP::UnsignedStr2Hex<int, char>(&tmp, ((const char*)aStr)+j);
                if(j<=0 || aStr[j]!=';')
                    throw VEAD(ERR_X_TYKK, ERR_ARGVAL, __FILE__,__LINE__, "$Revision: 557 $",
                               "Vigane SGML olem", (const char*)aStr+l);
                if(tmp>0xFFFF)
                    throw VEAD(ERR_X_TYKK, ERR_ARGVAL, __FILE__,__LINE__, "$Revision: 557 $",
                               "Vigane SGML olem (peab mahtuma 2 baidi peale)", (const char*)aStr+l);
            }
            else                                // teisenda &#12345; ja &#12345; k�mnendkoodid
            {
                //for(; j<lSemiPos; j++)
                //    {
                //    if(aStr[j]<'0' || aStr[j]>'9')
                //        throw VEAD(ERR_X_TYKK, ERR_ARGVAL, __FILE__,__LINE__, "$Revision: 557 $",
                //                    "Vigane SGML olem (lubatud 0-9)", (const char*)aStr+l);
                //    if((tmp=10*tmp+aStr[j]-'0')>0xFFFF)
                //        throw VEAD(ERR_X_TYKK, ERR_ARGVAL, __FILE__,__LINE__, "$Revision: 557 $",
                //                    "Vigane SGML olem (peab mahtuma 2 baidi peale)", (const char*)aStr+l);
                //    }
                j+=STRSOUP::UnsignedStr2Num<int, char>(&tmp, ((const char*)aStr)+j);
                if(j<=0 || aStr[j]!=';')
                    throw VEAD(ERR_X_TYKK, ERR_ARGVAL, __FILE__,__LINE__, "$Revision: 557 $",
                               "Vigane SGML olem", (const char*)aStr+l);
                if(tmp>0xFFFF)
                    throw VEAD(ERR_X_TYKK, ERR_ARGVAL, __FILE__,__LINE__, "$Revision: 557 $",
                               "Vigane SGML olem (peab mahtuma 2 baidi peale)", (const char*)aStr+l);
            }
            wStr += (WCHAR)tmp;
            l=lSemiPos;
            continue;
        }
        if(lSemiPos-l+1 > sgml_stringi_max_pikkus) // nii pikk ei saa olla tabelis
        {
            if(ignoramp==true)
                goto tryki;
            throw VEAD(ERR_X_TYKK, ERR_ARGVAL, __FILE__,__LINE__, "$Revision: 557 $",
                       "Puudub SGML olemite tabelist", (const char*)aStr+l);
        }
        CFSAString szSymbol=aStr.Mid(l, lSemiPos-l+1); // l�ikame &bla; sisendstringist v�lja
        SGML_UC* rec;
        if((rec=sgml2uc.Get(&szSymbol))==NULL) // ei leidnud kahendtabelist - jama lahti
        {
            if(ignoramp==true)
                goto tryki;
            throw VEAD(ERR_X_TYKK, ERR_ARGVAL, __FILE__,__LINE__, "$Revision: 557 $",
                       "Puudub SGML olemite tabelist", (const char*)szSymbol);
        }
        wStr += rec->uc;
        l=lSemiPos;
    }
}
Beispiel #21
0
CFSWString the_shift(CFSWString s) {
    /*
            On mingi võimalus, et lihtsustus tuleb teha kahes astmes. LQ-ta ja LQ-ga (vt shift_pattern). Kõik 
            seotud sellega, et pole	vältenihutusreeglitest lõpuni aru saanud. Eksisteerib Mihkla versioon ja 
            ametlik versioon. Tänud	Mihklale, kes kala asemel annab tattninale õnge, see õpetab ujuma.
            Maadlesin õngega pikalt.
     */

    CFSWString res;
    CFSWString code;
    INTPTR pos;
    INTPTR i = 0;
    INTPTR x;

    while (s.GetLength() > 0) {
        CFSWString c = s.GetAt(0);
        s.Delete(0, 1);
        if (is_uvowel(c)) {
            c = c.ToLower();
            code += shift_pattern(c);
            res += c;
            pos = i;
        } else
            if (c == d && code.GetLength() > 0) {
            res += c;
            code += c;
            CFSWString t_code = code;
            t_code += shift_pattern(s.GetAt(0));

            x = pattern_lookup(t_code); //orig üle silbipiiri
            if (x > -1) {
                x += pos;
                if (x > res.GetLength()) { // kui kargab järgmisse silpi
                    x = x - res.GetLength();
                    s.Insert(x, colon);
                } else
                    res.Insert(x, colon);
                i++;
            } else {
                t_code = simplify_pattern(t_code);
                x = pattern_lookup(t_code); //liht üle silbipiiri
                if (x > -1) {
                    x += pos;
                    if (x > res.GetLength()) { // kui kargab järgmisse silpi
                        x = x - res.GetLength();
                        s.Insert(x, colon);
                    } else
                        res.Insert(x, colon);
                    i++;
                } else {
                    x = pattern_lookup(code); //orig 
                    if (x > -1) {
                        x += pos;
                        res.Insert(x, colon);
                        i++;
                    } else {
                        code = simplify_pattern(code);
                        x = pattern_lookup(code); //liht
                        if (x > -1) {
                            x += pos;
                            res.Insert(x, colon);
                            i++;
                        }
                    }
                }
            }

            code = empty_str;
        } else {
            res += c;
            if (code.GetLength() > 0) {
                code += shift_pattern(c);
            }
        }
        i++;
    } //while

    // sõna lõpus
    if (code.GetLength() > 0) {
        code += L"#";
        //imelik koht ainult "lonksu" pärast
        if ((code.Left(3) == L"VLQ") && ((code.GetAt(3) == L's') || (code.GetAt(3) == L'h') || (code.GetAt(3) == L'v') || (code.GetAt(3) == L'j'))) {
            code = L"VLQC#";
        }
        INTPTR x = pattern_lookup(code);
        if (x > -1) {
            x += pos;
            res.Insert(x, colon);
        } else {
            code = simplify_pattern(code);
            x = pattern_lookup(code);
            if (x > -1) {
                x += pos;
                res.Insert(x, colon);
            }
        }
        code = empty_str;
    }
    return res;
}
Beispiel #22
0
bool can_palat(CFSWString c) {
    if (c.FindOneOf(L"DLNST") > -1) return true;
    return false;
}
Beispiel #23
0
std::string to_stdstring(CFSWString s) {
    std::string res = "";
    for (INTPTR i = 0; i < s.GetLength(); i++)
        res += s.GetAt(i);
    return res;
}
Beispiel #24
0
CFSWString chars_to_phones_part_I(CFSWString &s) {
    /*	müüa -> müia siia?
                    Kuna vältemärgi nihutamise reeglid on ehitatud selliselt, et kohati kasutatakse 
                    foneeme ja kohati ei, siis tuleb täht->foneem teisendus teha kahes jaos.
                    Palataliseerimine põhineb ideel, et palataliseeritud foneemi ümbruses ei saa
                    olla palataliseeruvaid mittepalataliseeritud foneeme :D
     */
    CFSWString res;
    for (INTPTR i = 0; i < s.GetLength(); i++) {
        CFSWString c = s.GetAt(i);
        if (c == L']') {
            CFSWString t = CFSWString(s.GetAt(i - 1)).ToUpper();
            res.SetAt(res.GetLength() - 1, t.GetAt(0));
            //vaatab taha; pole kindel, et kas on vajalik
            t = CFSWString(s.GetAt(i - 2)).ToUpper();
            if (can_palat(t)) {
                res.SetAt(res.GetLength() - 2, t.GetAt(0));
                t = CFSWString(s.GetAt(i - 3)).ToUpper();
                if (can_palat(t)) {
                    res.SetAt(res.GetLength() - 2, t.GetAt(0));
                }
            }
            //vaatab ette					
            t = CFSWString(s.GetAt(i + 1)).ToUpper();
            if (can_palat(t)) {
                s.SetAt(i + 1, t.GetAt(0));
                t = CFSWString(s.GetAt(i + 2)).ToUpper();
                if (can_palat(t)) {
                    s.SetAt(i + 2, t.GetAt(0));
                }
            }
        } else
            if (c == L'<') {
            CFSWString t = CFSWString(s.GetAt(i + 1)).ToUpper();
            s.SetAt(i + 1, t.GetAt(0));
        } else
            if (c == L'?') {
        }//Ebanormaalne rõhk. Pärast vaatab, mis teha
        else
            if (c == L'x') res += L"ks";
        else
            if (c == L'y') res += L"i";
        else
            if (c == L'w') res += L"v";
        else
            if (c == L'z') res += L"ts";
        else
            if (c == L'c') {
            res += L"k";
        } else
            if (c == L'ü' && is_vowel(s.GetAt(i + 1)) && s.GetAt(i - 1) == L'ü')
            res += L"i";
        else
            if (c == L'q') {
            if (is_vowel(s.GetAt(i + 1))) {
                res += L"k";
                s.SetAt(i + 1, L'v');
            } else
                res += L"k";
        } else
            res += c;
    }
    return res;
}