Example #1
0
CFSWString DealWithText(CFSWString text) {
    /* Proovin kogu sõnniku minema loopida */
    CFSWString res;
    text.Trim();
    text.Replace(L"\n\n", L"\n", 1);
    text.Replace(L"‘", L"'", 1);
    text.Replace(L"`", L"'", 1);
    text.Replace(L"´", L"'", 1);
    text.Replace(L"’", L"'", 1);

    for (INTPTR i = 0; i < text.GetLength(); i++) {
        CFSWString c = text.GetAt(i);
        CFSWString pc = res.GetAt(res.GetLength() - 1);
        CFSWString nc = text.GetAt(i + 1);
        if (c == L"'") {
            if (is_vowel(pc)) 
                res += L"q";
            else
                res += c;
        }
        else
        if (is_char(c)) res += c;
        else
            if (is_digit(c)) res += c;
        else
            if (is_hyphen(c) && is_char(pc) && is_char(nc)) res += sp;
        else
            if (is_symbol(c)) res += c;
        else
            if (is_colon(c) && !is_colon(pc)) res += c;
        else
            if (is_bbracket(c) && !is_bbracket(pc)) res += c;
        else
            if (is_ebracket(c) && is_ending(nc)) res += L"";
        else
            if (is_ebracket(c) && !is_ebracket(pc)) res += c;
        else
            if (is_comma(c) && !is_comma(pc)) res += c;
        else
            if (is_fchar(c)) res += replace_fchar(c);
        else
            if (is_space(c) && !is_whitespace(pc)) res += c;
        else
            if (is_break(c) && !is_break(pc)) { 
                
                res += c;   
            } //kahtlane
        else
            if (is_tab(c) && !is_whitespace(pc)) res += c;
        else            
            if (is_ending(c) && !is_ending(pc) && !is_whitespace(pc)) res += c;

    }
    res.Trim();        
    return res;

}
Example #2
0
CFSWString chars_to_phones_part_I(CFSWString &s) {
    CFSWString res;
    for (INTPTR i = 0; i < s.GetLength(); i++) {
        CFSWString c = s.GetAt(i);
        if (c == L']') {
            CFSWString t = CFSWString(s.GetAt(i - 1)).ToUpper();
            res.SetAt(res.GetLength() - 1, t.GetAt(0));
            //vaatab taha; pole kindel, et kas on vajalik
            t = CFSWString(s.GetAt(i - 2)).ToUpper();
            if (can_palat(t)) {
                res.SetAt(res.GetLength() - 2, t.GetAt(0));
                t = CFSWString(s.GetAt(i - 3)).ToUpper();
                if (can_palat(t)) {
                    res.SetAt(res.GetLength() - 2, t.GetAt(0));
                }
            }
            //vaatab ette					
            t = CFSWString(s.GetAt(i + 1)).ToUpper();
            if (can_palat(t)) {
                s.SetAt(i + 1, t.GetAt(0));
                t = CFSWString(s.GetAt(i + 2)).ToUpper();
                if (can_palat(t)) {
                    s.SetAt(i + 2, t.GetAt(0));
                }
            }
        } else
            if (c == L'<') {
            CFSWString t = CFSWString(s.GetAt(i + 1)).ToUpper();
            s.SetAt(i + 1, t.GetAt(0));
        } else
            if (c == L'?') {
        }//Ebanormaalne rõhk. Pärast vaatab, mis teha
        else
            if (c == L'x') res += L"ks";
        else
            if (c == L'y') res += L"i";
        else
            if (c == L'w') res += L"v";
        else
            if (c == L'z') res += L"ts";
        else
            if (c == L'c') {
            res += L"k";
        } else
            if (c == L'ü' && is_vowel(s.GetAt(i + 1)) && s.GetAt(i - 1) == L'ü')
            res += L"i";
        else
            res += c;
    }
    return res;
}
Example #3
0
bool is_word(CFSWString s) {
    if ((has_vowel(s) == true) &&
            (s.GetLength() == make_char_string(s).GetLength()) &&
            (s.GetLength() > 1))
        return true;
    return false;
}
Example #4
0
bool is_stressed_syl(CFSWString syl) {
    bool res = false;
    for (INTPTR i = 0; i < syl.GetLength(); i++) {
        if ((syl.GetAt(i) == colon) || ((is_vowel(syl.GetAt(i))) && (is_vowel(syl.GetAt(i + 1)))))
            res = true;
    }
    return res;
}
Example #5
0
INTPTR do_phrases(utterance_struct &u) {
    phrase_struct p;
    CFSWString res;
    p.phone_c = 0;
    p.syl_c = 0;
    p.word_c = 0;

    for (INTPTR i = 0; i < u.s.GetLength(); i++) {
        CFSWString c = u.s.GetAt(i);
        CFSWString pc = res.GetAt(res.GetLength() - 1);
        CFSWString nc = u.s.GetAt(i + 1);
        CFSWString nnc = u.s.GetAt(i + 2);
        if ((is_comma(c) || is_colon(c) || is_semicolon(c)) && is_space(nc) && is_char(nnc)) {
            res.Trim();
            if (res.GetLength() > 0) {
                push_ph_res(u, p, res);
            }
        } else
            if (is_bbracket(c)) {
               res.Trim();
            if (res.GetLength() > 0) {                
                push_ph_res(u, p, res);
            }
               p.s = L"sulgudes";
               u.phr_vector.AddItem(p);

        } else
            if (is_ebracket(c)) {
             res.Trim();
            if (res.GetLength() > 0) {                
                push_ph_res(u, p, res);
            }
        } else
            if (is_space(c)) { // komatud sidesõnad
            CFSWString tempm = u.s.Mid(i + 1, -1);
            res.Trim();
            if (is_conju(tempm.Left(tempm.Find(sp))) && res.GetLength() > 0) {
                push_ph_res(u, p, res);
            } else
                res += c;
        } else
            if (is_bhyphen(c)) {
            res.Trim();
            if (res.GetLength() > 0 && ((is_char(pc) && is_space(nc)) || (is_space(nc) && is_char(nnc)) || (is_space(pc) && is_char(nc)))) {
                push_ph_res(u, p, res);
            } else
                res += c;
        } else
            res += c;
    }

    if (res.GetLength() > 0) {
        //        if (is_ending(res.GetAt(res.GetLength() - 1))) {
        //            res.Delete(res.GetLength() - 1, 1);
        //        }        
        push_ph_res(u, p, res);
    }        
    return u.phr_vector.GetSize();
}
Example #6
0
bool VOTAFAILIST::RidaTrimmitud(CFSWString& rida)
{
    while (Rida(rida) == true)
    {
        rida.Trim();
        if (rida.GetLength() > 0)
            return true;
    }
    return false;
}
Example #7
0
CFSWString simplify_pattern(CFSWString s) {
    CFSWString res;
    for (INTPTR i = 0; i < (s.GetLength()); i++) {
        CFSWString c = s.GetAt(i);
        if (c.FindOneOf(L"jhvsLQ") > -1) res += L"C";
        else
            res += c;
    }
    return res;
}
Example #8
0
void CSuggestor::MultiReplace(const CFSWString &szWord, INTPTR ipStartPos)
{
	if (ipStartPos>0) CheckAndAdd(szWord);
	INTPTR ipLength=szWord.GetLength();
	for (; ipStartPos<ipLength; ipStartPos++){
		for (INTPTR ip=0; ip<(INTPTR)(sizeof(ChangeStringsMultiple)/sizeof(__CChangeStrings)); ip++){
			if (szWord.ContainsAt(ipStartPos, ChangeStringsMultiple[ip].m_lpszFrom)){
				MultiReplace(szWord.Left(ipStartPos)+ChangeStringsMultiple[ip].m_lpszTo+szWord.Mid(ipStartPos+FSStrLen(ChangeStringsMultiple[ip].m_lpszFrom)), ipStartPos+FSStrLen(ChangeStringsMultiple[ip].m_lpszTo));
			}
		}
	}
}
Example #9
0
CFSWString syllabify2(CFSWString s) {
    CFSWString res;

    for (INTPTR i = 0; i < s.GetLength(); i++) {
        CFSWString c = s.GetAt(i);
        if (is_consonant(c) && is_vowel(s.GetAt(i - 1)) && is_vowel(s.GetAt(i + 1)))
            res += d;
        if (is_vowel(c) && is_vowel(s.GetAt(i - 1)) && is_vowel(s.GetAt(i + 1)) && c.ToLower() == s.GetAt(i + 1))
            res += d;
        if (is_consonant(c) && is_consonant(s.GetAt(i - 1)) && is_vowel(s.GetAt(i + 1)) && has_vowel(res)) //küsitav
            res += d;
        res += c;
    }
    return res;
}
Example #10
0
void PTWSplitBuffer(const CFSWString &szBuffer, CPTWordArray &Words)
{
	Words.Cleanup();
	INTPTR ipStartPos=0;
	INTPTR ipPos;
	for (ipPos=0; ipPos<szBuffer.GetLength(); ipPos++) {
		if (FSIsSpace(szBuffer[ipPos])) {
			if (ipPos>ipStartPos) {
				Words.AddItem(CPTWord(szBuffer.Mid(ipStartPos, ipPos-ipStartPos), ipStartPos));
			}
			ipStartPos=ipPos+1;
		}
	}
	if (ipPos>ipStartPos) {
		Words.AddItem(CPTWord(szBuffer.Mid(ipStartPos, ipPos-ipStartPos), ipStartPos));
	}
}
Example #11
0
CFSWString palat_vru (CFSWString s) {
    CFSWString res;
    bool m = false;
    if (s == L"är'") res = L"ärq"; 
    else
    if (s == L"ar'") res = L"arq"; 
    else
    if (s == L"jäl'") res = L"jälq"; 
    else
    if (s == L"jal'") res = L"jalq"; 
    else
    if (s == L"kül'") res = L"külq"; 
    else
    if (s == L"pan'") res = L"panq"; 
    else
    if (s == L"tul'") res = L"tulq"; 
    else
    if (s == L"ol'") res = L"olq"; 
    else
        for (INTPTR i = s.GetLength()-1; i >= 0; i--) {
        CFSWString c = s.GetAt(i);
        if (c == L"'") {
            m = true;
        } else 
            if (must(c)) {
                res = c + res;
                m = true;
            }
            else
        if (m) {
            if (can_palat_vr(c)) {
                c = c.ToUpper();
                res = c + res;
            } else {
                res = c + res;
                m = false;
            }
        } else {
            res = c + res;
        }
        
       
    }    
    return res;
}
Example #12
0
CFSArray<CFSWString> do_utterances(CFSWString s) {
    CFSWString res = empty_str;
    CFSArray<CFSWString> res_array;

    if (s.GetLength() == 1)
        res_array.AddItem(s);
    else
        for (INTPTR i = 0; i < s.GetLength(); i++) {
            CFSWString c = s.GetAt(i);
            CFSWString pc = res.GetAt(res.GetLength() - 1);
            CFSWString nc = s.GetAt(i + 1);
            CFSWString nnc = s.GetAt(i + 2);

            if (is_ending(c) && is_whitespace(nc) && is_upper(nnc)) {
                res.Trim();
                res_array.AddItem(res);
                res = empty_str;
            } else
                if (is_tab(c)) {
                if (res.GetLength() > 0) {
                    res.Trim();
                    res_array.AddItem(res);
                    res = empty_str;
                }
            } else
                res += c;
        }
    res.Trim();
    
    if (res.GetLength() > 0) {
        while (is_ending(res.GetAt(res.GetLength() - 1))) {
            res.Delete(res.GetLength() - 1, 1);
        }
        
        res_array.AddItem(res);
    }

    for (INTPTR i=0; i < res_array.GetSize(); i++) {
        if (is_ending(res_array[i].GetAt(res_array[i].GetLength()-1))) 
            res_array[i].Delete(  res_array[i].GetLength()-1, 1   );

    }
    
    return res_array;
}
Example #13
0
void print_u(utterance_struct u) {
    for (INTPTR i = 0; i < u.phr_vector.GetSize(); i++)
        for (INTPTR i1 = 0; i1 < u.phr_vector[i].word_vector.GetSize(); i1++) {
            fprintf(stderr, "%s\n\n", ccstr(u.phr_vector[i].word_vector[i1].mi.m_szRoot));
            for (INTPTR i2 = 0; i2 < u.phr_vector[i].word_vector[i1].syl_vector.GetSize(); i2++) {
                fprintf(stderr, "\t%s\n", ccstr(u.phr_vector[i].word_vector[i1].syl_vector[i2].syl));

                for (INTPTR i3 = 0; i3 < u.phr_vector[i].word_vector[i1].syl_vector[i2].phone_vector.GetSize(); i3++) {
                    CFSWString w = u.phr_vector[i].word_vector[i1].syl_vector[i2].phone_vector[i3].phone;
                    while (w.GetLength() < 6) w += sp;
                    fprintf(stderr, "\t\t%s", ccstr(w));
                    wprintf(L"%i %i %i %i\t", u.phra_c, u.word_c, u.syl_c, u.phone_c);

                    wprintf(L"%i %i %i [%i]\t", u.phr_vector[i].word_c, u.phr_vector[i].syl_c, u.phr_vector[i].phone_c, u.phr_vector[i].utt_p);

                    wprintf(L"%i %i [%i %i]\t", 
                            u.phr_vector[i].word_vector[i1].syl_c,
                            u.phr_vector[i].word_vector[i1].phone_c,
                            u.phr_vector[i].word_vector[i1].utt_p,
                            u.phr_vector[i].word_vector[i1].phr_p
                            );

                    wprintf(L"%i [%i %i %i]\t", 
                            u.phr_vector[i].word_vector[i1].syl_vector[i2].phone_c,
                            u.phr_vector[i].word_vector[i1].syl_vector[i2].utt_p,
                            u.phr_vector[i].word_vector[i1].syl_vector[i2].phr_p,
                            u.phr_vector[i].word_vector[i1].syl_vector[i2].word_p
                            );

                    phone_struct p = u.phr_vector[i].word_vector[i1].syl_vector[i2].phone_vector[i3];
                    wprintf(L"[%i %i %i %i]\t", p.utt_p, p.phr_p, p.word_p, p.syl_p);

                    wprintf(L"{ %i }", u.phr_vector[i].word_vector[i1].syl_vector[i2].stress);

                    wprintf(L"\n");
                }
            }
        }
    wprintf(L"\n");
}
Example #14
0
std::string to_stdstring(CFSWString s) {
    std::string res = "";
    for (INTPTR i = 0; i < s.GetLength(); i++)
        res += s.GetAt(i);
    return res;
}
Example #15
0
CFSWString chars_to_phones_part_I(CFSWString &s) {
    /*	müüa -> müia siia?
                    Kuna vältemärgi nihutamise reeglid on ehitatud selliselt, et kohati kasutatakse 
                    foneeme ja kohati ei, siis tuleb täht->foneem teisendus teha kahes jaos.
                    Palataliseerimine põhineb ideel, et palataliseeritud foneemi ümbruses ei saa
                    olla palataliseeruvaid mittepalataliseeritud foneeme :D
     */
    CFSWString res;
    for (INTPTR i = 0; i < s.GetLength(); i++) {
        CFSWString c = s.GetAt(i);
        if (c == L']') {
            CFSWString t = CFSWString(s.GetAt(i - 1)).ToUpper();
            res.SetAt(res.GetLength() - 1, t.GetAt(0));
            //vaatab taha; pole kindel, et kas on vajalik
            t = CFSWString(s.GetAt(i - 2)).ToUpper();
            if (can_palat(t)) {
                res.SetAt(res.GetLength() - 2, t.GetAt(0));
                t = CFSWString(s.GetAt(i - 3)).ToUpper();
                if (can_palat(t)) {
                    res.SetAt(res.GetLength() - 2, t.GetAt(0));
                }
            }
            //vaatab ette					
            t = CFSWString(s.GetAt(i + 1)).ToUpper();
            if (can_palat(t)) {
                s.SetAt(i + 1, t.GetAt(0));
                t = CFSWString(s.GetAt(i + 2)).ToUpper();
                if (can_palat(t)) {
                    s.SetAt(i + 2, t.GetAt(0));
                }
            }
        } else
            if (c == L'<') {
            CFSWString t = CFSWString(s.GetAt(i + 1)).ToUpper();
            s.SetAt(i + 1, t.GetAt(0));
        } else
            if (c == L'?') {
        }//Ebanormaalne rõhk. Pärast vaatab, mis teha
        else
            if (c == L'x') res += L"ks";
        else
            if (c == L'y') res += L"i";
        else
            if (c == L'w') res += L"v";
        else
            if (c == L'z') res += L"ts";
        else
            if (c == L'c') {
            res += L"k";
        } else
            if (c == L'ü' && is_vowel(s.GetAt(i + 1)) && s.GetAt(i - 1) == L'ü')
            res += L"i";
        else
            if (c == L'q') {
            if (is_vowel(s.GetAt(i + 1))) {
                res += L"k";
                s.SetAt(i + 1, L'v');
            } else
                res += L"k";
        } else
            res += c;
    }
    return res;
}
Example #16
0
CFSWString the_shift(CFSWString s) {
    /*
            On mingi võimalus, et lihtsustus tuleb teha kahes astmes. LQ-ta ja LQ-ga (vt shift_pattern). Kõik 
            seotud sellega, et pole	vältenihutusreeglitest lõpuni aru saanud. Eksisteerib Mihkla versioon ja 
            ametlik versioon. Tänud	Mihklale, kes kala asemel annab tattninale õnge, see õpetab ujuma.
            Maadlesin õngega pikalt.
     */

    CFSWString res;
    CFSWString code;
    INTPTR pos;
    INTPTR i = 0;
    INTPTR x;

    while (s.GetLength() > 0) {
        CFSWString c = s.GetAt(0);
        s.Delete(0, 1);
        if (is_uvowel(c)) {
            c = c.ToLower();
            code += shift_pattern(c);
            res += c;
            pos = i;
        } else
            if (c == d && code.GetLength() > 0) {
            res += c;
            code += c;
            CFSWString t_code = code;
            t_code += shift_pattern(s.GetAt(0));

            x = pattern_lookup(t_code); //orig üle silbipiiri
            if (x > -1) {
                x += pos;
                if (x > res.GetLength()) { // kui kargab järgmisse silpi
                    x = x - res.GetLength();
                    s.Insert(x, colon);
                } else
                    res.Insert(x, colon);
                i++;
            } else {
                t_code = simplify_pattern(t_code);
                x = pattern_lookup(t_code); //liht üle silbipiiri
                if (x > -1) {
                    x += pos;
                    if (x > res.GetLength()) { // kui kargab järgmisse silpi
                        x = x - res.GetLength();
                        s.Insert(x, colon);
                    } else
                        res.Insert(x, colon);
                    i++;
                } else {
                    x = pattern_lookup(code); //orig 
                    if (x > -1) {
                        x += pos;
                        res.Insert(x, colon);
                        i++;
                    } else {
                        code = simplify_pattern(code);
                        x = pattern_lookup(code); //liht
                        if (x > -1) {
                            x += pos;
                            res.Insert(x, colon);
                            i++;
                        }
                    }
                }
            }

            code = empty_str;
        } else {
            res += c;
            if (code.GetLength() > 0) {
                code += shift_pattern(c);
            }
        }
        i++;
    } //while

    // sõna lõpus
    if (code.GetLength() > 0) {
        code += L"#";
        //imelik koht ainult "lonksu" pärast
        if ((code.Left(3) == L"VLQ") && ((code.GetAt(3) == L's') || (code.GetAt(3) == L'h') || (code.GetAt(3) == L'v') || (code.GetAt(3) == L'j'))) {
            code = L"VLQC#";
        }
        INTPTR x = pattern_lookup(code);
        if (x > -1) {
            x += pos;
            res.Insert(x, colon);
        } else {
            code = simplify_pattern(code);
            x = pattern_lookup(code);
            if (x > -1) {
                x += pos;
                res.Insert(x, colon);
            }
        }
        code = empty_str;
    }
    return res;
}