Пример #1
0
JNIEXPORT jobjectArray JNICALL Java_ee_filosoft_vabamorf_Linguistic_synthesize(JNIEnv *env, jobject jobj, jobject info, jstring hint)
{
	FUNCTION_HEADER;
	if (!linguistic) return NULL;

	try {
		jclass MorphInfoClass = env->FindClass("ee/filosoft/vabamorf/Linguistic$MorphInfo");
		jmethodID MorphInfoClassConstructor = (MorphInfoClass ? env->GetMethodID(MorphInfoClass, "<init>", "()V") : 0);
		if (!MorphInfoClassConstructor) return NULL;

		CMorphInfo morphInfo;
		morphInfo.m_szRoot = FSJNIStrtoW(env, FSJNIGetStringField(env, info, "root"));
		morphInfo.m_szClitic = FSJNIStrtoW(env, FSJNIGetStringField(env, info, "clitic"));
		morphInfo.m_cPOS = FSJNIGetCharField(env, info, "pos");
		morphInfo.m_szForm = FSJNIStrtoW(env, FSJNIGetStringField(env, info, "form"));

		CFSArray<CMorphInfo> results = linguistic->Synthesize(morphInfo, FSJNIStrtoW(env, hint));

		jobjectArray synths=(jobjectArray)env->NewObjectArray(results.GetSize(), env->FindClass("java/lang/Object"), 0);
		for (INTPTR ip=0; ip<results.GetSize(); ip++) {
			jobject analyze1=env->NewObject(MorphInfoClass, MorphInfoClassConstructor);
			FSJNISetStringField(env, analyze1, "root", FSJNIWtoStr(env, results[ip].m_szRoot));
			FSJNISetStringField(env, analyze1, "ending", FSJNIWtoStr(env, results[ip].m_szEnding));
			FSJNISetStringField(env, analyze1, "clitic", FSJNIWtoStr(env, results[ip].m_szClitic));
			FSJNISetCharField(env, analyze1, "pos", results[ip].m_cPOS);
			FSJNISetStringField(env, analyze1, "form", FSJNIWtoStr(env, results[ip].m_szForm));
			env->SetObjectArrayElement(synths, ip, analyze1);
		}
		return synths;
	} catch(...) {
		return NULL;
	}
}
Пример #2
0
// add morphological analysis to CFSArray containing the sentence
void addAnalysis(CLinguistic& linguistic, CDisambiguator& disambiguator, CFSArray<CFSVar>& words, const bool disambiguate) {
    //CFSVar &words=Data["words"];
    CFSArray<CPTWord> PTWords;
    for (INTPTR ip=0; ip<words.GetSize(); ip++) {
        PTWords.AddItem(words[ip]["text"].GetWString());
    }
    // perform analysis and optional disambiguation
    CFSArray<CMorphInfos> MorphResults=linguistic.AnalyzeSentense(PTWords);
    if (disambiguate) {
        MorphResults=disambiguator.Disambiguate(MorphResults);
    }
    // collect the analysis results
    ASSERT(PTWords.GetSize()==MorphResults.GetSize());
    for (INTPTR ip=0; ip<words.GetSize(); ip++) {
        const CFSArray<CMorphInfo> &Analysis=MorphResults[ip].m_MorphInfo;
        CFSVar VarAnalysis;
        VarAnalysis.Cast(CFSVar::VAR_ARRAY);
        for (INTPTR ipRes=0; ipRes<Analysis.GetSize(); ipRes++) {
            const CMorphInfo &Analysis1=Analysis[ipRes];
            CFSVar VarAnalysis1;
            VarAnalysis1["root"]=Analysis1.m_szRoot;
            VarAnalysis1["ending"]=Analysis1.m_szEnding;
            VarAnalysis1["clitic"]=Analysis1.m_szClitic;
            VarAnalysis1["partofspeech"]=CFSWString(Analysis1.m_cPOS);
            VarAnalysis1["form"]=Analysis1.m_szForm;
            VarAnalysis[ipRes]=VarAnalysis1;
        }
        words[ip]["analysis"]=VarAnalysis;
    }
}
Пример #3
0
void do_phones(syl_struct &ss) {
    CFSArray<phone_struct> pv;
    phone_struct p;
    p.utt_p = 0;
    p.phr_p = 0;
    p.word_p = 0;
    p.syl_p = 0;
    INTPTR phone_syl_p = 1;
    
    for (INTPTR i = 0; i < ss.syl.GetLength(); i++) {
        CFSWString c = ss.syl.GetAt(i);        
        if ((c == L'š') || (c == L'ž')) c = L"sh";
        else
        if (c == L'q') c = L"kw";
        else
        if (c == L'õ') c = L"q";
        else
        if (c == L'ä') c = L"x";
        else
        if (c == L'ö') c = L"c";
        else
        if (c == L'ü') c = L"y";

        if (is_colon(c)) {
            if (i > 0) // nihutusvigade kaitseks (vt "piirkonda")
                pv[pv.GetSize() - 1].phone += doq;
        } else {
            p.phone = c;
            p.syl_p = phone_syl_p++;
            pv.AddItem(p);
        }

    }
    ss.phone_vector = pv;
}
Пример #4
0
void do_phones(syl_struct &ss) {
    CFSArray<phone_struct> pv;
    phone_struct p;
    p.utt_p = 0;
    p.phr_p = 0;
    p.word_p = 0;
    p.syl_p = 0;
    INTPTR phone_syl_p = 1;
    for (INTPTR i = 0; i < ss.syl.GetLength(); i++) {
        CFSWString c = ss.syl.GetAt(i);
        if ((c == L'š') || (c == L'ž')) c = L"sh";
        if (c == L'õ') c = L"q";
        if (c == L'ä') c = L"x";
        if (c == L'ö') c = L"c";
        if (c == L'ü') c = L"y";

        if (is_colon(c)) {
            // 1. nihutusvigade kaitseks (vt "piirkonda")
            // 2-3. kolmandas vältes v ja j on kõnebaasis sedavõrd haruldased,
            // et väljundis kuuleb nende asemel mingit r-i laadset hääikut.
            // Kellel on parem baas, kommenteerigu 2. ja 3. tingimus välja.            
				if ((i > 0) && (pv[pv.GetSize() - 1].phone != L"j") && (pv[pv.GetSize() - 1].phone != L"v"))
                pv[pv.GetSize() - 1].phone += doq;
        } else {
            p.phone = c;
            p.syl_p = phone_syl_p++;
            pv.AddItem(p);
        }

    }
    ss.phone_vector = pv;
}
Пример #5
0
CFSArray<CMorphInfos> convertDisambInput(std::vector<WordAnalysis> const& words) {
    CFSArray<CMorphInfos> infosarray;
    const int n = words.size();
    for (int i=0 ; i<n ; ++i) {
        WordAnalysis const& word = words[i];
        infosarray.AddItem(convertWordAnalysis(word));
    }
    return infosarray;
}
Пример #6
0
void phrase2words(phrase_struct &p, CFSArray<CPTWord> &PTW) {

    CFSWString res;
    CFSArray<CPTWord> PTWords;
    CFSArray<CFSWString> word_array;

    for (INTPTR i = 0; i < p.s.GetLength(); i++) {
        CFSWString c = p.s.GetAt(i);
        if (is_space(c) || i == (p.s.GetLength() - 1)) {
            if (i == (p.s.GetLength() - 1)) res += c;
            word_array.AddItem(res);
            res = empty_str;
        } else res += c;
    }

    word_array = tokens2words(word_array);
    p.s = empty_str;

    for (INTPTR i = 0; i < word_array.GetSize(); i++) {
        p.s += word_array[i];
        if (i < word_array.GetSize() - 1) p.s += sp;
        PTW.AddItem(word_array[i]); //.ToLower()); Kui väiketähtedeks, siis siin
    }
    p.word_c = word_array.GetSize();
}
Пример #7
0
// convert output to wrapper format
std::vector<SpellingResults> convertSpellingOutput(CFSArray<CFSVar>& words) {
    std::vector<SpellingResults> results;
    results.reserve(words.GetSize());
    for (int widx=0 ; widx < words.GetSize() ; ++widx) {
        CFSVar word = words[widx];
        std::string text = std::string(word["text"].GetAString());
        CFSVar suggestions = word["suggestions"];
        StringVector suggestStrings;
        suggestStrings.reserve(suggestions.GetSize());
        for (int sidx=0 ; sidx < suggestions.GetSize() ; ++sidx) {
            CFSVar suggestion = suggestions[sidx];
            suggestStrings.push_back(std::string(suggestion.GetAString()));
        }
        results.push_back(SpellingResults(text, word["spelling"].GetInt(), suggestStrings));
    }
    return results;
}
Пример #8
0
JNIEXPORT jobjectArray JNICALL Java_ee_filosoft_vabamorf_Linguistic_analyze(JNIEnv *env, jobject jobj, jstring word)
{
	FUNCTION_HEADER;
	if (!linguistic) return NULL;

	try {
		CFSArray<CMorphInfo> results = linguistic->Analyze(FSJNIStrtoW(env, word));

		jobjectArray analyzes=(jobjectArray)env->NewObjectArray(results.GetSize(), env->FindClass("java/lang/Object"), 0);
		for (INTPTR ip=0; ip<results.GetSize(); ip++) {
			env->SetObjectArrayElement(analyzes, ip, MorphInfoToJNI(env, results[ip]));
		}
		return analyzes;
	} catch(...) {
		return NULL;
	}
}
Пример #9
0
// convert vabamorf base library output to WordAnalysis instances, which as easier to wrap.
std::vector<WordAnalysis> convertOutput(CFSArray<CFSVar>& words) {
    std::vector<WordAnalysis> results;
    results.reserve(words.GetSize());
    for (int widx=0 ; widx < words.GetSize() ; ++widx) {
        CFSVar word = words[widx];
        CFSVar analysis = word["analysis"];
        AnalysisVector vec;
        for (int aidx=0 ; aidx < analysis.GetSize() ; ++aidx) {
            CFSVar a = analysis[aidx];
            vec.push_back(Analysis(a["root"].GetAString(),
                                   a["ending"].GetAString(),
                                   a["clitic"].GetAString(),
                                   a["partofspeech"].GetAString(),
                                   a["form"].GetAString()));
        }
        results.push_back(WordAnalysis(std::string(word["text"].GetAString()), vec));
    }
    return results;
}
Пример #10
0
std::vector<WordAnalysis> convertDisambOutput(CFSArray<CMorphInfos> const& morphInfos) {
    std::vector<WordAnalysis> output;
    const int n = morphInfos.GetSize();
    output.reserve(n);
    for (int i=0 ; i<n ; ++i) {
        CMorphInfos const& infos = morphInfos[i];
        output.push_back(convertMorphInfos(infos));
    }
    return output;
}
Пример #11
0
CFSArray<CFSWString> tokens2words(CFSArray<CFSWString> a) {
    CFSArray<CFSWString> temp_array;
    CFSArray<CFSWString> res;
    for (INTPTR i = 0; i < a.GetSize(); i++) {
        CFSWString s = a[i];

        if (is_abbreviation(s, temp_array) > 0) {
            for (INTPTR i = 0; i < temp_array.GetSize(); i++)
                res.AddItem(temp_array[i]);
        } else
            if (is_word(s))
            res.AddItem(s);
        else {
            CFSArray<CFSWString> carray;
            make_ctype_array(s, carray);
            for (INTPTR ictype = 0; ictype < carray.GetSize(); ictype++) {
                INTPTR c_type = ctype(carray[ictype].GetAt(0));
                if (c_type == 1) { //Tähed
                    if (is_word(carray[ictype])) res.AddItem(carray[ictype]);
                    else { // kui on sodipodi
                        explode(carray[ictype], L"", temp_array);
                        for (INTPTR i_temp = 0; i_temp < temp_array.GetSize(); i_temp++)
                            res.AddItem(replace_schar(temp_array[i_temp]));
                    }
                } else
                    if (c_type == 2) { //Sümbolid
                    explode(carray[ictype], L"", temp_array);
                    for (INTPTR i_temp = 0; i_temp < temp_array.GetSize(); i_temp++)
                        res.AddItem(replace_schar(temp_array[i_temp]));
                } else
                    if (c_type == 3) { //Numbrid
                    CFSWString nr = int_to_words(carray[ictype]);
                    explode(nr, sp, temp_array);
                    for (INTPTR i_temp = 0; i_temp < temp_array.GetSize(); i_temp++)
                        if (temp_array[i_temp].GetLength() > 0)
                            res.AddItem(temp_array[i_temp]);
                }
            }
        }
    } // iga token

    return res;
}
Пример #12
0
JNIEXPORT jobjectArray JNICALL Java_ee_filosoft_vabamorf_Linguistic_synthesize(JNIEnv *env, jobject jobj, jobject info, jstring hint)
{
	FUNCTION_HEADER;
	if (!linguistic) return NULL;

	try {
		CMorphInfo MorphInfo = JNIToMorphInfo(env, info);
		MorphInfo.m_szEnding.Empty();

		CFSArray<CMorphInfo> results = linguistic->Synthesize(MorphInfo, FSJNIStrtoW(env, hint));

		jobjectArray synths=(jobjectArray)env->NewObjectArray(results.GetSize(), env->FindClass("java/lang/Object"), 0);
		for (INTPTR ip=0; ip<results.GetSize(); ip++) {
			env->SetObjectArrayElement(synths, ip, MorphInfoToJNI(env, results[ip]));
		}
		return synths;
	} catch(...) {
		return NULL;
	}
}
Пример #13
0
CMorphInfo DisambiguateSynthesisResult(CFSArray<CMorphInfo> MIs, CPTWord s) {
    CFSWString s1;
    CMorphInfo x;

    for (INTPTR i = 0; i < MIs.GetSize(); i++) {
        s1 = MIs[i].m_szRoot + MIs[i].m_szEnding + MIs[i].m_szClitic;
        if (s.m_szWord == make_char_string(s1)) {
            MIs[i].m_szRoot = s1;
            return MIs[i];
        }
    }
    x.m_szRoot = s.m_szWord;
    return x;
}
Пример #14
0
JNIEXPORT jobjectArray JNICALL Java_ee_filosoft_vabamorf_Linguistic_analyzeSentence(JNIEnv *env, jobject jobj, jobjectArray words)
{
	FUNCTION_HEADER;
	if (!linguistic) return NULL;

	try {
		CFSArray<CPTWord> ptwords;
		INTPTR ipSize = env->GetArrayLength(words);
		for (INTPTR ip=0; ip<ipSize; ip++) {
			ptwords.AddItem(FSJNIStrtoW(env, (jstring)env->GetObjectArrayElement(words, ip)));
		}

		CFSArray<CMorphInfos> results = linguistic->AnalyzeSentence(ptwords);

		jobjectArray analyzes = (jobjectArray)env->NewObjectArray(results.GetSize(), env->FindClass("java/lang/Object"), 0);
		for (INTPTR ip = 0; ip < results.GetSize(); ip++) {
			env->SetObjectArrayElement(analyzes, ip, MorphInfosToJNI(env, results[ip]));
		}
		return analyzes;
	} catch(...) {
		return NULL;
	}
}
Пример #15
0
JNIEXPORT jintArray JNICALL Java_ee_filosoft_vabamorf_Linguistic_spellWords(JNIEnv *env, jobject jobj, jobjectArray words)
{
	FUNCTION_HEADER;
	if (!linguistic) return NULL;

	try {
		CFSArray<CPTWord> ptwords;
		INTPTR ipSize = env->GetArrayLength(words);
		for (INTPTR ip=0; ip<ipSize; ip++) {
			ptwords.AddItem(FSJNIStrtoW(env, (jstring)env->GetObjectArrayElement(words, ip)));
		}

		CFSArray<SPLRESULT> splresults = linguistic->SpellWords(ptwords);

		jintArray result = env->NewIntArray(splresults.GetSize());
		for (INTPTR ip=0; ip<splresults.GetSize(); ip++) {
			jint result1 = splresults[ip];
			env->SetIntArrayRegion(result, ip, 1, &result1);
		}
		return result;
	} catch (...) {
		return NULL;
	}
}
Пример #16
0
void add_stress2(CFSArray<syl_struct> &sv, INTPTR wp) {
    /* Kõige radikaalsem rõhutus siiani. 
     * wp = kui on liitsõna esimene liige siis on seal pearõhk.
     */
    INTPTR main_stress = 2;
    INTPTR stress = 1;
    INTPTR size = sv.GetSize();
    INTPTR stress_type = extra_stress(sv, size);
    if (stress_type == 0) {
        for (INTPTR i = 0; i < size; i++) {
            if (i % 2 == 0) {
                if ((i == 0) && (wp == 0))
                    sv[i].stress = main_stress;
                else
                    sv[i].stress = stress;
            }
        }
        if (size > 1) sv[size - 1].stress = 0;
    }
    else {
        if (wp == 0)
            sv[stress_type].stress = main_stress;
        else
            sv[stress_type].stress = stress;

        //esimene pool
        if (stress_type == 1) sv[0].stress = 0;
        else
            for (INTPTR i = stress_type - 1; i >= 0; i--)
                if (i % 2 == 0)
                    sv[i].stress = stress;

        if ((stress_type % 2 != 0) && (stress_type > 1))
            sv[stress_type - 1].stress = 0;

        //teine pool

        INTPTR lopp = size - stress_type;

        if (lopp > 3) {
            for (INTPTR i = stress_type + 1; i < size; i++)
                if (i % 2 != 0) sv[i].stress = stress;

            sv[size - 1].stress = 0;
        }
    }
}
Пример #17
0
// spellcheck the words and add suggestions
void addSuggestions(CLinguistic& linguistic, CFSArray<CFSVar>& words, const bool suggest) {
    for (INTPTR ip=0; ip<words.GetSize(); ip++) {
        CFSVar &Word=words[ip];
        CPTWord PTWord=Word["text"].GetWString();
        PTWord.RemoveHyphens();
        PTWord.RemovePunctuation();
        PTWord.Trim();
        if (PTWord.m_szWord.IsEmpty() || linguistic.SpellWord(PTWord.m_szWord)==SPL_NOERROR) {
            Word["spelling"]=true;
        } else {
            Word["spelling"]=false;
            if (suggest) {
                CFSWStringArray Suggestions=linguistic.Suggest(PTWord.m_szWord);
                CFSVar VarSuggestions;
                VarSuggestions.Cast(CFSVar::VAR_ARRAY);
                for (INTPTR ipRes=0; ipRes<Suggestions.GetSize(); ipRes++) {
                    VarSuggestions[ipRes]=Suggestions[ipRes];
                }
                Word["suggestions"]=VarSuggestions;
            }
        }
    }
}
Пример #18
0
	void OnValReadEnd(const CFSAString &szKey, CFSVar &Data) {
		if (szKey.IsEmpty()) {
			SubKeys("paragraphs", Data);
			m_Writer.ObjectEnd();
		} else if (szKey=="/paragraphs") {
			m_Writer.ArrayEnd();
			m_iCollectData++;
		} else if (KeyMatch(szKey, "/paragraphs/%d")) {
			SubKeys("sentences", Data);
			m_Writer.ObjectEnd();
			m_iCollectData--;
		} else if (KeyMatch(szKey, "/paragraphs/%d/sentences")) {
			m_Writer.ArrayEnd();
			m_iCollectData++;
		} else if (KeyMatch(szKey, "/paragraphs/%d/sentences/%d")) {
			if (Data.KeyExist("words")) {

				CFSVar &Words=Data["words"];
				CFSArray<CMorphInfos> WordsAnalysis;
				for (INTPTR ip=0; ip<Words.GetSize(); ip++) {
					const CFSVar &Word=Words[ip];
					CMorphInfos Analysis;
					Analysis.m_szWord=Word["text"].GetWString();
					const CFSVar &VarAnalysis=Word["analysis"];
					for (INTPTR ip2=0; ip2<VarAnalysis.GetSize(); ip2++) {
						const CFSVar &VarAnalysis1=VarAnalysis[ip2];
						CMorphInfo Analysis1;
						Analysis1.m_szRoot=VarAnalysis1["root"].GetWString();
						Analysis1.m_szEnding=VarAnalysis1["ending"].GetWString();
						Analysis1.m_szClitic=VarAnalysis1["clitic"].GetWString();
						Analysis1.m_cPOS=VarAnalysis1["partofspeech"].GetWString()[0];
						Analysis1.m_szForm=VarAnalysis1["form"].GetWString();
						Analysis.m_MorphInfo.AddItem(Analysis1);
					}
					WordsAnalysis.AddItem(Analysis);
				}

				WordsAnalysis=m_Disambiguator.Disambiguate(WordsAnalysis);
				RT_ASSERT(Words.GetSize()==WordsAnalysis.GetSize());

				for (INTPTR ip=0; ip<Words.GetSize(); ip++) {
					const CMorphInfos &Analysis=WordsAnalysis[ip];
					CFSVar VarAnalysis;
					VarAnalysis.Cast(CFSVar::VAR_ARRAY);
					for (INTPTR ipRes=0; ipRes<Analysis.m_MorphInfo.GetSize(); ipRes++) {
						const CMorphInfo &Analysis1=Analysis.m_MorphInfo[ipRes];
						CFSVar VarAnalysis1;
						VarAnalysis1["root"]=Analysis1.m_szRoot;
						VarAnalysis1["ending"]=Analysis1.m_szEnding;
						VarAnalysis1["clitic"]=Analysis1.m_szClitic;
						VarAnalysis1["partofspeech"]=CFSWString(Analysis1.m_cPOS);
						VarAnalysis1["form"]=Analysis1.m_szForm;
						VarAnalysis[ipRes]=VarAnalysis1;
					}
					Words[ip]["analysis"]=VarAnalysis;
				}

			}
			m_Writer.Val(Data);
			m_iCollectData--;
		}
	}
Пример #19
0
CFSArray<CFSWString> do_all(CFSWString utt, bool print_label, bool print_utt) {
    CFSArray<CFSWString> res;
    CFSArray<CPTWord> PTW;
    utterance_struct u;
    

    u.s = utt.ToLower();
    u.syl_c = 0;
    u.phone_c = 0;
    u.phra_c = do_phrases(u);
    INTPTR word_count = 0;
    
    if (print_utt) fprintf(stderr, "%s\n", ccstr(utt));
       
    for (INTPTR i = 0; i < u.phr_vector.GetSize(); i++) {
        u.phr_vector[i].utt_p = i;        
        phrase2words(u.phr_vector[i], PTW);
        word_count += u.phr_vector[i].word_c;        
        
    }
    

    CFSArray<CMorphInfo> words;
    for (INTPTR i = 0; i < PTW.GetSize(); i++) {
        CMorphInfo MI;
        MI.m_szRoot = PTW[i].m_szWord;
        words.AddItem(MI);
    }
        
    
    u.word_c = words.GetSize();


    word_struct w;

    INTPTR utt_phone_c = 1;
    INTPTR syl_utt_p = 1;
    INTPTR phone_utt_p = 1;



    for (INTPTR i = 0; i < u.phr_vector.GetSize(); i++) {
        u.phr_vector[i].utt_p = i + 1;
        INTPTR syl_phr_p = 1;
        INTPTR phone_phr_p = 1;
        INTPTR phrase_pho_c = 1;
        for (INTPTR i1 = 0; i1 < u.phr_vector[i].word_c; i1++) {
            
            w.utt_p = utt_phone_c++;
            w.phr_p = i1 + 1;
            w.mi = words[0];
            w.mi.m_szRoot += make_char_string(w.mi.m_szEnding) + w.mi.m_szClitic;
            w.mi.m_szRoot = w.mi.m_szRoot.ToLower();
            do_syls(w);
            u.phr_vector[i].word_vector.AddItem(w);

            INTPTR phone_word_p = 1;
            INTPTR word_pho_c = 1;
            for (INTPTR i2 = 0; i2 < u.phr_vector[i].word_vector[i1].syl_vector.GetSize(); i2++) {
                u.syl_c++;
                u.phr_vector[i].syl_c++;
                u.phr_vector[i].word_vector[i1].syl_c++;
                u.phr_vector[i].word_vector[i1].syl_vector[i2].phr_p = syl_phr_p++;
                u.phr_vector[i].word_vector[i1].syl_vector[i2].utt_p = syl_utt_p++;
                INTPTR syl_phone_c = 1;

                do_phones(u.phr_vector[i].word_vector[i1].syl_vector[i2]);

                for (INTPTR i3 = 0; i3 < u.phr_vector[i].word_vector[i1].syl_vector[i2].phone_vector.GetSize(); i3++) {
                    u.phone_c++;
                    u.phr_vector[i].phone_c = phrase_pho_c++;
                    u.phr_vector[i].word_vector[i1].phone_c = word_pho_c++;
                    u.phr_vector[i].word_vector[i1].syl_vector[i2].phone_c = syl_phone_c++;
                    phone_struct p = u.phr_vector[i].word_vector[i1].syl_vector[i2].phone_vector[i3];
                    p.utt_p = phone_utt_p++;
                    p.phr_p = phone_phr_p++;
                    p.word_p = phone_word_p++;
                    u.phr_vector[i].word_vector[i1].syl_vector[i2].phone_vector[i3] = p;

                }

            }
            words.RemoveItem(0, 1);
        }
    }




    if (print_label) print_u(u);
    res = do_label(u);
    return res;
}
Пример #20
0
int main(int argc, char* argv[]) {
    size_t num_voices;
    char **fn_voices;
    char* in_fname;
    char* output_fname;
    FILE * outfp;
    char* dur_fname;
    FILE * durfp;    
    bool print_label = false;
    bool print_utt = false;
    bool write_raw = false;
    bool write_durlabel = false;

    CFSAString LexFileName, LexDFileName;
    HTS_Engine engine;
    double speed = 1.1;
    size_t fr = 48000;
    size_t fp = 240;
    float alpha = 0.55;
    float beta = 0.0;
    float ht = 2.0;
    float th = 0.5;
    float gvw1 = 1.0;
    float gvw2 = 1.2;

    FSCInit();
    fn_voices = (char **) malloc(argc * sizeof (char *));
    
    if (argc < 11) {
        fprintf(stderr, "Viga: liiga vähe parameetreid\n\n");
        PrintUsage();
    }    

    for (int i = 0; i < argc; i++) {
        if (CFSAString("-lex") == argv[i]) {
            if (i + 1 < argc) {
                LexFileName = argv[++i];
            } else {
                return PrintUsage();
            }
        }
        if (CFSAString("-lexd") == argv[i]) {
            if (i + 1 < argc) {
                LexDFileName = argv[++i];
            } else {
                return PrintUsage();
            }
        }
        if (CFSAString("-m") == argv[i]) {
            if (i + 1 < argc) {
                fn_voices[0] = argv[i + 1];
            } else {
                fprintf(stderr, "Viga: puudub *.htsvoice fail\n");
                PrintUsage();
                exit(0);
            }
        }
        if (CFSAString("-o") == argv[i]) {
            if (i + 1 < argc) {
                output_fname = argv[i + 1];
                cfileexists(output_fname);
            } else {
                fprintf(stderr, "Viga: puudb väljundfaili nimi\n");
                PrintUsage();
                exit(0);
            }
        }
        if (CFSAString("-f") == argv[i]) {
            if (i + 1 < argc) {
                in_fname = argv[i + 1];
            } else {
                fprintf(stderr, "Viga: puudb sisendfaili nimi\n");
                PrintUsage();
                exit(0);
            }
        }
        if (CFSAString("-s") == argv[i]) {
            if (i + 1 < argc) {
                samplerate(fr, fp, alpha, atoi(argv[i + 1]));
            }
        }
        if (CFSAString("-r") == argv[i]) {
            if (i + 1 < argc) {
                speed = atof(argv[i + 1]);
            }
        }
        if (CFSAString("-ht") == argv[i]) {
            if (i + 1 < argc) {
                ht = atof(argv[i + 1]);
            }
        }
        if (CFSAString("-gvw1") == argv[i]) {
            if (i + 1 < argc) {
                gvw1 = atof(argv[i + 1]);
            }
        }
        if (CFSAString("-gvw2") == argv[i]) {
            if (i + 1 < argc) {
                gvw2 = atof(argv[i + 1]);
            }
        }        
        if (CFSAString("-debug") == argv[i]) {
            print_label = true;
        }
        if (CFSAString("-utt") == argv[i]) {
            print_utt = true;
        }        
        if (CFSAString("-raw") == argv[i]) {
            write_raw = true;
        }
        if (CFSAString("-dur") == argv[i]) {
            if (i + 1 < argc) {
                dur_fname = argv[i + 1];
                cfileexists(dur_fname);
                write_durlabel = true;                
            } else {
                fprintf(stderr, "Viga: puudb kestustefaili nimi\n");
                PrintUsage();
                exit(0);
            }
        }

        
    }

    Linguistic.Open(LexFileName);
    Disambiguator.Open(LexDFileName);

    CFSWString text;
    ReadUTF8Text(text, in_fname);
    HTS_Engine_initialize(&engine);

    if (HTS_Engine_load(&engine, fn_voices, 1) != TRUE) {
        fprintf(stderr, "Viga: puudub *.htsvoice. %p\n", fn_voices[0]);
        free(fn_voices);
        HTS_Engine_clear(&engine);
        exit(1);
    }
    free(fn_voices);

    HTS_Engine_set_sampling_frequency(&engine, (size_t) fr);
    HTS_Engine_set_phoneme_alignment_flag(&engine, FALSE);
    HTS_Engine_set_fperiod(&engine, (size_t) fp);
    HTS_Engine_set_alpha(&engine, alpha);
    HTS_Engine_set_beta(&engine, beta);
    HTS_Engine_set_speed(&engine, speed);
    HTS_Engine_add_half_tone(&engine, ht);
    HTS_Engine_set_msd_threshold(&engine, 1, th);
    /*
    HTS_Engine_set_duration_interpolation_weight(&engine, 1, diw);
    HTS_Engine_set_parameter_interpolation_weight(&engine, 0, 0, piw1);
    HTS_Engine_set_parameter_interpolation_weight(&engine, 0, 1, piw2);
    HTS_Engine_set_gv_interpolation_weight(&engine, 0, 0, giw1);
    HTS_Engine_set_gv_interpolation_weight(&engine, 0, 1, giw2);
     */
    HTS_Engine_set_gv_weight(&engine, 0, gvw1);
    HTS_Engine_set_gv_weight(&engine, 1, gvw2);

    text = DealWithText(text);
    CFSArray<CFSWString> res = do_utterances(text);

    INTPTR data_size = 0;
    outfp = fopen(output_fname, "wb");
    if (write_durlabel) durfp = fopen(dur_fname, "w");
    if (!write_raw) HTS_Engine_write_header(&engine, outfp, 1);
    for (INTPTR i = 0; i < res.GetSize(); i++) {

        CFSArray<CFSWString> label = do_all(res[i], print_label, print_utt);

        std::vector<std::string> v;
        v = to_vector(label);

        std::vector<char*> vc;
        fill_char_vector(v, vc);

        size_t n_lines = vc.size();

        if (HTS_Engine_synthesize_from_strings(&engine, &vc[0], n_lines) != TRUE) {
            fprintf(stderr, "Viga: süntees ebaonnestus.\n");            
            HTS_Engine_clear(&engine);
            exit(1);
        }

        clean_char_vector(vc);
        data_size += HTS_Engine_engine_speech_size(&engine);
        if (write_durlabel) HTS_Engine_save_durlabel(&engine, durfp);
        HTS_Engine_save_generated_speech(&engine, outfp);

        HTS_Engine_refresh(&engine);

    } //synth loop
    
    if (!write_raw) HTS_Engine_write_header(&engine, outfp, data_size);
    if (write_durlabel) fclose(durfp);
    fclose(outfp);

    HTS_Engine_clear(&engine);
    Linguistic.Close();

    FSCTerminate();
    return 0;

}
Пример #21
0
std::vector<std::string> to_vector(CFSArray<CFSWString> arr) {
    std::vector<std::string> v;
    for (INTPTR i = 0; i < arr.GetSize(); i++)
        v.push_back(to_stdstring(arr[i]));
    return v;
}
Пример #22
0
CFSArray<CFSWString> do_all(CFSWString utt, bool print_label, bool print_utt) {
    CFSArray<CFSWString> res;
    CFSArray<CPTWord> PTW;
    utterance_struct u;
    u.s = utt;
    u.syl_c = 0;
    u.phone_c = 0;
    u.phra_c = do_phrases(u);
    INTPTR word_count = 0;
	 if (print_utt) fprintf(stderr, "%s\n", ccstr(utt));    
    for (INTPTR i = 0; i < u.phr_vector.GetSize(); i++) {
        u.phr_vector[i].utt_p = i;
        phrase2words(u.phr_vector[i], PTW);
        word_count += u.phr_vector[i].word_c;
    }

    CFSArray<CMorphInfos> MRs = Disambiguator.Disambiguate(Linguistic.AnalyzeSentense(PTW));

    CFSArray<CMorphInfo> words;

    for (INTPTR i = 0; i < MRs.GetSize(); i++)
        //for (INTPTR i1 = 0; i1 < MRs[i].m_MorphInfo.GetSize(); i1++)
        words.AddItem(MRs[i].m_MorphInfo[0]); //Ühestamistulemuse ühestamise koht


    u.word_c = words.GetSize();


    word_struct w;

    INTPTR utt_phone_c = 1;
    INTPTR syl_utt_p = 1;
    INTPTR phone_utt_p = 1;



    for (INTPTR i = 0; i < u.phr_vector.GetSize(); i++) {
        u.phr_vector[i].utt_p = i + 1;
        INTPTR syl_phr_p = 1;
        INTPTR phone_phr_p = 1;
        INTPTR phrase_pho_c = 1;
        for (INTPTR i1 = 0; i1 < u.phr_vector[i].word_c; i1++) {
            w.utt_p = utt_phone_c++;
            w.phr_p = i1 + 1;
            w.mi = words[0];
            w.mi.m_szRoot += make_char_string(w.mi.m_szEnding) + w.mi.m_szClitic;            
            w.mi.m_szRoot = w.mi.m_szRoot.ToLower();
            // sidesõnad + ei välteta
            if ((CFSWString(w.mi.m_cPOS) == L"J") || (w.mi.m_szRoot == L"<ei")) w.mi.m_szRoot.Replace(L"<", L"", 1);
            do_syls(w);
            u.phr_vector[i].word_vector.AddItem(w);

            INTPTR phone_word_p = 1;
            INTPTR word_pho_c = 1;
            for (INTPTR i2 = 0; i2 < u.phr_vector[i].word_vector[i1].syl_vector.GetSize(); i2++) {
                u.syl_c++;
                u.phr_vector[i].syl_c++;
                u.phr_vector[i].word_vector[i1].syl_c++;
                u.phr_vector[i].word_vector[i1].syl_vector[i2].phr_p = syl_phr_p++;
                u.phr_vector[i].word_vector[i1].syl_vector[i2].utt_p = syl_utt_p++;
                INTPTR syl_phone_c = 1;

                do_phones(u.phr_vector[i].word_vector[i1].syl_vector[i2]);

                for (INTPTR i3 = 0; i3 < u.phr_vector[i].word_vector[i1].syl_vector[i2].phone_vector.GetSize(); i3++) {
                    u.phone_c++;
                    u.phr_vector[i].phone_c = phrase_pho_c++;
                    u.phr_vector[i].word_vector[i1].phone_c = word_pho_c++;
                    u.phr_vector[i].word_vector[i1].syl_vector[i2].phone_c = syl_phone_c++;
                    phone_struct p = u.phr_vector[i].word_vector[i1].syl_vector[i2].phone_vector[i3];
                    p.utt_p = phone_utt_p++;
                    p.phr_p = phone_phr_p++;
                    p.word_p = phone_word_p++;
                    u.phr_vector[i].word_vector[i1].syl_vector[i2].phone_vector[i3] = p;

                }

            }
            words.RemoveItem(0, 1);
        }
    }




    if (print_label) print_u(u);
    res = do_label(u);
    return res;
}