コード例 #1
0
ファイル: vabamorf.cpp プロジェクト: urdvr/estnltk
// add morphological analysis to CFSArray containing the sentence
void addAnalysis(CLinguistic& linguistic, CDisambiguator& disambiguator, CFSArray<CFSVar>& words, const bool disambiguate) {
    //CFSVar &words=Data["words"];
    CFSArray<CPTWord> PTWords;
    for (INTPTR ip=0; ip<words.GetSize(); ip++) {
        PTWords.AddItem(words[ip]["text"].GetWString());
    }
    // perform analysis and optional disambiguation
    CFSArray<CMorphInfos> MorphResults=linguistic.AnalyzeSentense(PTWords);
    if (disambiguate) {
        MorphResults=disambiguator.Disambiguate(MorphResults);
    }
    // collect the analysis results
    ASSERT(PTWords.GetSize()==MorphResults.GetSize());
    for (INTPTR ip=0; ip<words.GetSize(); ip++) {
        const CFSArray<CMorphInfo> &Analysis=MorphResults[ip].m_MorphInfo;
        CFSVar VarAnalysis;
        VarAnalysis.Cast(CFSVar::VAR_ARRAY);
        for (INTPTR ipRes=0; ipRes<Analysis.GetSize(); ipRes++) {
            const CMorphInfo &Analysis1=Analysis[ipRes];
            CFSVar VarAnalysis1;
            VarAnalysis1["root"]=Analysis1.m_szRoot;
            VarAnalysis1["ending"]=Analysis1.m_szEnding;
            VarAnalysis1["clitic"]=Analysis1.m_szClitic;
            VarAnalysis1["partofspeech"]=CFSWString(Analysis1.m_cPOS);
            VarAnalysis1["form"]=Analysis1.m_szForm;
            VarAnalysis[ipRes]=VarAnalysis1;
        }
        words[ip]["analysis"]=VarAnalysis;
    }
}
コード例 #2
0
ファイル: syls.cpp プロジェクト: ikiissel/synthts_vr
CFSWString chars_to_phones_part_I(CFSWString &s) {
    CFSWString res;
    for (INTPTR i = 0; i < s.GetLength(); i++) {
        CFSWString c = s.GetAt(i);
        if (c == L']') {
            CFSWString t = CFSWString(s.GetAt(i - 1)).ToUpper();
            res.SetAt(res.GetLength() - 1, t.GetAt(0));
            //vaatab taha; pole kindel, et kas on vajalik
            t = CFSWString(s.GetAt(i - 2)).ToUpper();
            if (can_palat(t)) {
                res.SetAt(res.GetLength() - 2, t.GetAt(0));
                t = CFSWString(s.GetAt(i - 3)).ToUpper();
                if (can_palat(t)) {
                    res.SetAt(res.GetLength() - 2, t.GetAt(0));
                }
            }
            //vaatab ette					
            t = CFSWString(s.GetAt(i + 1)).ToUpper();
            if (can_palat(t)) {
                s.SetAt(i + 1, t.GetAt(0));
                t = CFSWString(s.GetAt(i + 2)).ToUpper();
                if (can_palat(t)) {
                    s.SetAt(i + 2, t.GetAt(0));
                }
            }
        } else
            if (c == L'<') {
            CFSWString t = CFSWString(s.GetAt(i + 1)).ToUpper();
            s.SetAt(i + 1, t.GetAt(0));
        } else
            if (c == L'?') {
        }//Ebanormaalne rõhk. Pärast vaatab, mis teha
        else
            if (c == L'x') res += L"ks";
        else
            if (c == L'y') res += L"i";
        else
            if (c == L'w') res += L"v";
        else
            if (c == L'z') res += L"ts";
        else
            if (c == L'c') {
            res += L"k";
        } else
            if (c == L'ü' && is_vowel(s.GetAt(i + 1)) && s.GetAt(i - 1) == L'ü')
            res += L"i";
        else
            res += c;
    }
    return res;
}
コード例 #3
0
ファイル: mklab.cpp プロジェクト: ikiissel/synthts_et
CFSArray<CFSWString> do_all(CFSWString utt, bool print_label, bool print_utt) {
    CFSArray<CFSWString> res;
    CFSArray<CPTWord> PTW;
    utterance_struct u;
    u.s = utt;
    u.syl_c = 0;
    u.phone_c = 0;
    u.phra_c = do_phrases(u);
    INTPTR word_count = 0;
	 if (print_utt) fprintf(stderr, "%s\n", ccstr(utt));    
    for (INTPTR i = 0; i < u.phr_vector.GetSize(); i++) {
        u.phr_vector[i].utt_p = i;
        phrase2words(u.phr_vector[i], PTW);
        word_count += u.phr_vector[i].word_c;
    }

    CFSArray<CMorphInfos> MRs = Disambiguator.Disambiguate(Linguistic.AnalyzeSentense(PTW));

    CFSArray<CMorphInfo> words;

    for (INTPTR i = 0; i < MRs.GetSize(); i++)
        //for (INTPTR i1 = 0; i1 < MRs[i].m_MorphInfo.GetSize(); i1++)
        words.AddItem(MRs[i].m_MorphInfo[0]); //Ühestamistulemuse ühestamise koht


    u.word_c = words.GetSize();


    word_struct w;

    INTPTR utt_phone_c = 1;
    INTPTR syl_utt_p = 1;
    INTPTR phone_utt_p = 1;



    for (INTPTR i = 0; i < u.phr_vector.GetSize(); i++) {
        u.phr_vector[i].utt_p = i + 1;
        INTPTR syl_phr_p = 1;
        INTPTR phone_phr_p = 1;
        INTPTR phrase_pho_c = 1;
        for (INTPTR i1 = 0; i1 < u.phr_vector[i].word_c; i1++) {
            w.utt_p = utt_phone_c++;
            w.phr_p = i1 + 1;
            w.mi = words[0];
            w.mi.m_szRoot += make_char_string(w.mi.m_szEnding) + w.mi.m_szClitic;            
            w.mi.m_szRoot = w.mi.m_szRoot.ToLower();
            // sidesõnad + ei välteta
            if ((CFSWString(w.mi.m_cPOS) == L"J") || (w.mi.m_szRoot == L"<ei")) w.mi.m_szRoot.Replace(L"<", L"", 1);
            do_syls(w);
            u.phr_vector[i].word_vector.AddItem(w);

            INTPTR phone_word_p = 1;
            INTPTR word_pho_c = 1;
            for (INTPTR i2 = 0; i2 < u.phr_vector[i].word_vector[i1].syl_vector.GetSize(); i2++) {
                u.syl_c++;
                u.phr_vector[i].syl_c++;
                u.phr_vector[i].word_vector[i1].syl_c++;
                u.phr_vector[i].word_vector[i1].syl_vector[i2].phr_p = syl_phr_p++;
                u.phr_vector[i].word_vector[i1].syl_vector[i2].utt_p = syl_utt_p++;
                INTPTR syl_phone_c = 1;

                do_phones(u.phr_vector[i].word_vector[i1].syl_vector[i2]);

                for (INTPTR i3 = 0; i3 < u.phr_vector[i].word_vector[i1].syl_vector[i2].phone_vector.GetSize(); i3++) {
                    u.phone_c++;
                    u.phr_vector[i].phone_c = phrase_pho_c++;
                    u.phr_vector[i].word_vector[i1].phone_c = word_pho_c++;
                    u.phr_vector[i].word_vector[i1].syl_vector[i2].phone_c = syl_phone_c++;
                    phone_struct p = u.phr_vector[i].word_vector[i1].syl_vector[i2].phone_vector[i3];
                    p.utt_p = phone_utt_p++;
                    p.phr_p = phone_phr_p++;
                    p.word_p = phone_word_p++;
                    u.phr_vector[i].word_vector[i1].syl_vector[i2].phone_vector[i3] = p;

                }

            }
            words.RemoveItem(0, 1);
        }
    }




    if (print_label) print_u(u);
    res = do_label(u);
    return res;
}
コード例 #4
0
ファイル: suggestor.cpp プロジェクト: Kaljurand/vabamorf
int CSuggestor::Suggest(const CFSWString &szWord, bool bStartSentence){
	m_TimeStart=CFSTime::Now();
	m_Items.Cleanup();

	m_Cap.SetCap(szWord);
	if (bStartSentence && m_Cap.GetCapMode()==CFSStrCap<CFSWString>::CAP_LOWER) {
		m_Cap.SetCapMode(CFSStrCap<CFSWString>::CAP_INITIAL);
	}

	CFSWString szWordHigh=szWord.ToUpper();
	INTPTR ipWordLength=szWordHigh.GetLength();
	CFSWString szTemp;
	INTPTR i, j;
	long lLevel=100;
	SetLevel(lLevel);

	// Case problems & change list
	i=SpellWord(szWordHigh, szTemp, &lLevel);
	if ((i==SPL_NOERROR || i==SPL_CHANGEONCE) && !szTemp.IsEmpty()){
		SetLevel(GetLevelGroup(lLevel));
		m_Items.AddItem(CSuggestorItem(szTemp, lLevel));
	}
	else SetLevel(5);
	
	// Abbrevations
	// !!! Unimplemented

	// Quotes
/*	if (ipWordLength>=2 && 
		(szAllQuot.Find(szWordHigh[0])>=0 || szAllQuot.Find(szWordHigh[ipWordLength-1])>=0))
	{
		szTemp=szWordHigh;
		int iPos;
		if (szAllQuot.Find(szTemp[0])>=0){
			if (szQuotLeft.Find(szTemp[0])>=0) { }
			else if ((iPos=szQuotRight.Find(szTemp[0]))>=0) { szTemp[0]=szQuotLeft[iPos]; }
			else if (szDQuotLeft.Find(szTemp[0])>=0) { }
			else if ((iPos=szDQuotRight.Find(szTemp[0]))>=0) { szTemp[0]=szDQuotLeft[iPos]; }

			if (szAllQuot.Find(szTemp[ipWordLength-1])>=0) { szTemp[ipWordLength-1]=(szQuotRight+szDQuotRight)[(szQuotLeft+szDQuotLeft).Find(szTemp[0])];
		}
		else{
			if (szQuotRight.Find(szTemp[ipWordLength-1])>=0) { }
			else if ((iPos=szQuotLeft.Find(szTemp[ipWordLength-1]))>=0) { szTemp[ipWordLength-1]=szQuotRight[iPos]; }
			else if (szDQuotRight.Find(szTemp[ipWordLength-1])>=0) { }
			else if ((iPos=szDQuotLeft.Find(szTemp[ipWordLength-1]))>=0) { szTemp[ipWordLength-1]=szDQuotRight[iPos]; }
		}
		CheckAndAdd(szTemp);
	}*/

	// Add space
	for (i=1; i<ipWordLength-1; i++){
		static CFSWString szPunktuation=FSWSTR(".:,;!?");
		if (szPunktuation.Find(szWord[i])>=0){
			long lLevel1, lLevel2;
			CFSWString szTemp1, szTemp2;
			if (SpellWord(szWord.Left(i+1), szTemp1, &lLevel1)==SPL_NOERROR &&
				SpellWord(szWord.Mid(i+1), szTemp2, &lLevel2)==SPL_NOERROR)
			{
				m_Items.AddItem(CSuggestorItem(szWord.Left(i+1)+L' '+szWord.Mid(i+1), FSMAX(lLevel1, lLevel2)));
			}
		}
	}

	// Delete following blocks: le[nnu][nnu]jaam
	for (i=2; i<=3; i++){
		for (j=0; j<ipWordLength-i-i; j++){
			if (memcmp((const FSWCHAR *)szWordHigh+j, (const FSWCHAR *)szWordHigh+j+i, i*sizeof(FSWCHAR))==0){
				szTemp=szWordHigh.Left(j)+szWordHigh.Mid(j+i);
				CheckAndAdd(szTemp);
			}
		}
	}

	// Change following letters: abb -> aab & aab -> abb
	for (i=1; i<ipWordLength-1; i++){
		if (szWordHigh[i]==szWordHigh[i+1]){
			szTemp=szWordHigh;
			szTemp[i]=szTemp[i-1];
			if (FSIsLetterEst(szTemp[i])) CheckAndAdd(szTemp);
		}
		else if (szWordHigh[i]==szWordHigh[i-1]){
			szTemp=szWordHigh;
			szTemp[i]=szTemp[i+1];
			if (FSIsLetterEst(szTemp[i])) CheckAndAdd(szTemp);
		}
	}

	// Exchange letters: van[na]ema -> van[an]ema
	szTemp=szWordHigh;
	for (i=1; i<ipWordLength; i++){
		if (szTemp[i]!=szTemp[i-1]){
			FSWCHAR ch=szTemp[i];
			szTemp[i]=szTemp[i-1];
			szTemp[i-1]=ch;
			CheckAndAdd(szTemp);
			szTemp[i-1]=szTemp[i];
			szTemp[i]=ch;
		}
	}

	// Change blocks
	for (i=0; i<ipWordLength; i++){
		for (j=0; j<(INTPTR)(sizeof(ChangeStrings)/sizeof(__CChangeStrings)); j++){
			if (szWordHigh.ContainsAt(i, ChangeStrings[j].m_lpszFrom)){
				szTemp=szWordHigh.Left(i)+ChangeStrings[j].m_lpszTo+szWordHigh.Mid(i+FSStrLen(ChangeStrings[j].m_lpszFrom));
				CheckAndAdd(szTemp);
			}
		}
	}

	// Change end blocks
	for (i=0; i<(INTPTR)(sizeof(ChangeStringsEnd)/sizeof(__CChangeStrings)); i++){
		if (szWordHigh.EndsWith(ChangeStringsEnd[i].m_lpszFrom)){
			szTemp=szWordHigh.Left(ipWordLength-FSStrLen(ChangeStringsEnd[i].m_lpszFrom))+ChangeStringsEnd[i].m_lpszTo;
			CheckAndAdd(szTemp);
		}
	}

	// Po~o~sas
	MultiReplace(szWordHigh, 0);

	// gi/ki: Kylli[gi]le -> Kyllile[gi]
	for (i=3; i<=6; i++){
		if (i>ipWordLength) break;
		if (memcmp((const FSWCHAR *)szWordHigh+ipWordLength-i, FSWSTR("GI"), 2*sizeof(FSWCHAR))==0){
			szTemp=szWordHigh.Left(ipWordLength-i)+szWordHigh.Mid(ipWordLength-i+2)+FSWSTR("GI");
			CheckAndAdd(szTemp);
			szTemp=szWordHigh.Left(ipWordLength-i)+szWordHigh.Mid(ipWordLength-i+2)+FSWSTR("KI");
			CheckAndAdd(szTemp);
		}
	}

	// Delete letters: van[n]aema -> vanaema
	szTemp=szWordHigh.Mid(1);
	CheckAndAdd(szTemp);
	for (i=0; i<ipWordLength-1; i++){
		if (szTemp[i]!=szWordHigh[i]){
			szTemp[i]=szWordHigh[i];
			CheckAndAdd(szTemp);
		}
	}

	// Change letters from list
	for (i=0; i<ipWordLength; i++){
		const FSWCHAR *lpszTo=__SuggestChangeLetters(szWordHigh[i]);
		if (!lpszTo) continue;
		szTemp=szWordHigh;
		for (; lpszTo[0]; lpszTo++){
			szTemp[i]=lpszTo[0];
			CheckAndAdd(szTemp);
		}
	}
	
	// Insert letters to word body
	for (i=1; i<ipWordLength; i++){
		szTemp=szWordHigh.Left(i)+FSWSTR(' ')+szWordHigh.Mid(i);
		for (j=0; szInsertLetters[j]; j++){
			szTemp[i]=szInsertLetters[j];
			CheckAndAdd(szTemp);
		}
	}

	// Insert letters to the beginning
	szTemp=CFSWString(FSWSTR(" "))+szWordHigh;
	for (i=0; szInsertLettersBeg[i]; i++){
		if (szTemp[1]==szInsertLettersBeg[i]) continue;
		szTemp[0]=szInsertLettersBeg[i];
		CheckAndAdd(szTemp);
	}

	// Try apostrophe for names
	if (szWord[0]!=szWordHigh[0] && szWordHigh.Find('\'')<0){
		for (i=0; i<5; i++){
			if (i>=ipWordLength) break;
			szTemp=szWordHigh.Left(ipWordLength-i)+L'\''+szWordHigh.Mid(ipWordLength-i);
			CheckAndAdd(szTemp);
		}
	}

	Order();
	RemoveImmoderate();
	RemoveDuplicates();
	return 0;
}
コード例 #5
0
ファイル: syls.cpp プロジェクト: glensc/synthts_et
CFSWString chars_to_phones_part_I(CFSWString &s) {
    /*	müüa -> müia siia?
                    Kuna vältemärgi nihutamise reeglid on ehitatud selliselt, et kohati kasutatakse 
                    foneeme ja kohati ei, siis tuleb täht->foneem teisendus teha kahes jaos.
                    Palataliseerimine põhineb ideel, et palataliseeritud foneemi ümbruses ei saa
                    olla palataliseeruvaid mittepalataliseeritud foneeme :D
     */
    CFSWString res;
    for (INTPTR i = 0; i < s.GetLength(); i++) {
        CFSWString c = s.GetAt(i);
        if (c == L']') {
            CFSWString t = CFSWString(s.GetAt(i - 1)).ToUpper();
            res.SetAt(res.GetLength() - 1, t.GetAt(0));
            //vaatab taha; pole kindel, et kas on vajalik
            t = CFSWString(s.GetAt(i - 2)).ToUpper();
            if (can_palat(t)) {
                res.SetAt(res.GetLength() - 2, t.GetAt(0));
                t = CFSWString(s.GetAt(i - 3)).ToUpper();
                if (can_palat(t)) {
                    res.SetAt(res.GetLength() - 2, t.GetAt(0));
                }
            }
            //vaatab ette					
            t = CFSWString(s.GetAt(i + 1)).ToUpper();
            if (can_palat(t)) {
                s.SetAt(i + 1, t.GetAt(0));
                t = CFSWString(s.GetAt(i + 2)).ToUpper();
                if (can_palat(t)) {
                    s.SetAt(i + 2, t.GetAt(0));
                }
            }
        } else
            if (c == L'<') {
            CFSWString t = CFSWString(s.GetAt(i + 1)).ToUpper();
            s.SetAt(i + 1, t.GetAt(0));
        } else
            if (c == L'?') {
        }//Ebanormaalne rõhk. Pärast vaatab, mis teha
        else
            if (c == L'x') res += L"ks";
        else
            if (c == L'y') res += L"i";
        else
            if (c == L'w') res += L"v";
        else
            if (c == L'z') res += L"ts";
        else
            if (c == L'c') {
            res += L"k";
        } else
            if (c == L'ü' && is_vowel(s.GetAt(i + 1)) && s.GetAt(i - 1) == L'ü')
            res += L"i";
        else
            if (c == L'q') {
            if (is_vowel(s.GetAt(i + 1))) {
                res += L"k";
                s.SetAt(i + 1, L'v');
            } else
                res += L"k";
        } else
            res += c;
    }
    return res;
}
コード例 #6
0
ファイル: etdisamb.cpp プロジェクト: theranger/vabamorf
	void OnValReadEnd(const CFSAString &szKey, CFSVar &Data) {
		if (szKey.IsEmpty()) {
			SubKeys("paragraphs", Data);
			m_Writer.ObjectEnd();
		} else if (szKey=="/paragraphs") {
			m_Writer.ArrayEnd();
			m_iCollectData++;
		} else if (KeyMatch(szKey, "/paragraphs/%d")) {
			SubKeys("sentences", Data);
			m_Writer.ObjectEnd();
			m_iCollectData--;
		} else if (KeyMatch(szKey, "/paragraphs/%d/sentences")) {
			m_Writer.ArrayEnd();
			m_iCollectData++;
		} else if (KeyMatch(szKey, "/paragraphs/%d/sentences/%d")) {
			if (Data.KeyExist("words")) {

				CFSVar &Words=Data["words"];
				CFSArray<CMorphInfos> WordsAnalysis;
				for (INTPTR ip=0; ip<Words.GetSize(); ip++) {
					const CFSVar &Word=Words[ip];
					CMorphInfos Analysis;
					Analysis.m_szWord=Word["text"].GetWString();
					const CFSVar &VarAnalysis=Word["analysis"];
					for (INTPTR ip2=0; ip2<VarAnalysis.GetSize(); ip2++) {
						const CFSVar &VarAnalysis1=VarAnalysis[ip2];
						CMorphInfo Analysis1;
						Analysis1.m_szRoot=VarAnalysis1["root"].GetWString();
						Analysis1.m_szEnding=VarAnalysis1["ending"].GetWString();
						Analysis1.m_szClitic=VarAnalysis1["clitic"].GetWString();
						Analysis1.m_cPOS=VarAnalysis1["partofspeech"].GetWString()[0];
						Analysis1.m_szForm=VarAnalysis1["form"].GetWString();
						Analysis.m_MorphInfo.AddItem(Analysis1);
					}
					WordsAnalysis.AddItem(Analysis);
				}

				WordsAnalysis=m_Disambiguator.Disambiguate(WordsAnalysis);
				RT_ASSERT(Words.GetSize()==WordsAnalysis.GetSize());

				for (INTPTR ip=0; ip<Words.GetSize(); ip++) {
					const CMorphInfos &Analysis=WordsAnalysis[ip];
					CFSVar VarAnalysis;
					VarAnalysis.Cast(CFSVar::VAR_ARRAY);
					for (INTPTR ipRes=0; ipRes<Analysis.m_MorphInfo.GetSize(); ipRes++) {
						const CMorphInfo &Analysis1=Analysis.m_MorphInfo[ipRes];
						CFSVar VarAnalysis1;
						VarAnalysis1["root"]=Analysis1.m_szRoot;
						VarAnalysis1["ending"]=Analysis1.m_szEnding;
						VarAnalysis1["clitic"]=Analysis1.m_szClitic;
						VarAnalysis1["partofspeech"]=CFSWString(Analysis1.m_cPOS);
						VarAnalysis1["form"]=Analysis1.m_szForm;
						VarAnalysis[ipRes]=VarAnalysis1;
					}
					Words[ip]["analysis"]=VarAnalysis;
				}

			}
			m_Writer.Val(Data);
			m_iCollectData--;
		}
	}