void LDA<Scalar>::fit(const Eigen::MatrixXi &X) { auto corpus = get_corpus(X); for (size_t i=0; i<iterations_; i++) { partial_fit(corpus); } }
int resynthesize(Options& opts) { readCoefOptions(opts); auto index = util::parse<unsigned>(opts.input); auto corpus = get_corpus(opts); assert(index < corpus.size()); std::vector<PhonemeInstance> input = corpus.input(index); INFO("Input file: " << alphabet_test.file_data_of(input[0]).file); INFO("Total duration: " << get_total_duration(input)); INFO("Original cost: " << concat_cost(input, crf, crf.lambda, input)); std::vector<int> path; traverse_automaton<MinPathFindFunctions, CRF, 1>(input, crf, crf.lambda, &path); std::vector<PhonemeInstance> output = crf.alphabet().to_phonemes(path); SynthPrinter sp(crf.alphabet(), labels_all); if(opts.has_opt("verbose")) sp.print_synth(path, input); sp.print_textgrid(path, input, labels_synth, opts.text_grid); CRF::Stats stats; INFO("Resynth. cost: " << concat_cost(output, crf, crf.lambda, input, &stats)); //INFO("Second best cost: " << costs[1]); auto baselineCost = concat_cost(output, baseline_crf, baseline_crf.lambda, input); INFO("Baseline cost: " << baselineCost); outputStats(crf.lambda, stats, opts); outputPath(opts, output, input); auto sws = SpeechWaveSynthesis(output, input, crf.alphabet()); Wave outputSignal = sws.get_resynthesis(opts); outputSignal.write(opts.get_opt<std::string>("output", "resynth.wav")); auto sws2 = SpeechWaveSynthesis(input, input, alphabet_test); auto concatenation = sws2.get_concatenation(); concatenation.write(opts.get_opt<std::string>("original", "original.wav")); if(opts.has_opt("verbose")) { Comparisons cmp; cmp.fill(concatenation, outputSignal); INFO("LogSpectrum = " << cmp.LogSpectrum); INFO("LogSpectrumCritical = " << cmp.LogSpectrumCritical); INFO("SegSNR = " << cmp.SegSNR); INFO("MFCC = " << cmp.MFCC); INFO("WSS = " << cmp.WSS); outputComparisons(opts, cmp, baselineCost); } return 0; }
int baseline(const Options& opts) { readCoefOptions(opts); auto index = opts.get_opt<unsigned>("input", 0); auto corpus = get_corpus(opts); std::vector<PhonemeInstance> input = corpus.input(index); INFO("Input file: " << alphabet_test.file_data_of(input[0]).file); INFO("Total duration: " << get_total_duration(input)); std::vector<int> path; traverse_automaton<MinPathFindFunctions>(input, baseline_crf, baseline_crf.lambda, &path); std::vector<PhonemeInstance> output = baseline_crf.alphabet().to_phonemes(path); Wave outputSignal = SpeechWaveSynthesis(output, input, baseline_crf.alphabet()) .get_resynthesis_td(); outputSignal.write(opts.get_opt<std::string>("output", "baseline.wav")); auto sws2 = SpeechWaveSynthesis(input, input, alphabet_test); auto concatenation = sws2.get_concatenation(); concatenation.write(opts.get_opt<std::string>("original", "original.wav")); CRF::Stats stats; INFO("Baseline cost in original: " << concat_cost(output, crf, crf.lambda, input, &stats)); auto baselineCost = concat_cost(output, baseline_crf, baseline_crf.lambda, input); INFO("Baseline cost:" << baselineCost); outputStats(crf.lambda, stats, opts); outputPath(opts, output, input); if(opts.has_opt("verbose")) { auto sws = SpeechWaveSynthesis(input, input, alphabet_test); auto concatenation = sws.get_concatenation(); Comparisons cmp; cmp.fill(concatenation, outputSignal); INFO("LogSpectrum = " << cmp.LogSpectrum); INFO("LogSpectrumCritical = " << cmp.LogSpectrumCritical); INFO("SegSNR = " << cmp.SegSNR); INFO("MFCC = " << cmp.MFCC); INFO("WSS = " << cmp.WSS); outputComparisons(opts, cmp, baselineCost); } return 0; }
int psola(const Options& opts) { auto inputString = opts.get_opt<std::string>("input", ""); auto inputPhonemes = util::split_string(inputString, ','); std::vector<int> input(inputPhonemes.size()); std::transform(inputPhonemes.begin(), inputPhonemes.end(), input.begin(), util::parse<int>); std::vector<PhonemeInstance> phonemeInput; std::vector<PhonemeInstance> phonemeOutput; auto pitchScale = opts.get_opt<double>("pitch-scale", 1.0); auto durationScale = opts.get_opt<double>("duration-scale", 1.0); auto& alphabet = opts.has_opt("synth") ? alphabet_synth : alphabet_test; if(input.size() > 1) { phonemeInput = alphabet.to_phonemes(input); } else { Corpus& corpus = get_corpus(opts); phonemeInput = corpus.input(input[0]); INFO("Input file: " << alphabet.file_data_of(phonemeInput[0]).file); } for(auto p : phonemeInput) { p.end += p.duration * std::abs(1 - durationScale); p.duration += p.duration * std::abs(1 - durationScale); p.pitch_contour[0] += std::log(pitchScale); p.pitch_contour[1] += std::log(pitchScale); phonemeOutput.push_back(p); } auto sws = SpeechWaveSynthesis(phonemeInput, phonemeOutput, alphabet); auto outputSignal = sws.get_resynthesis(opts); auto original = sws.get_concatenation(); original.write(opts.get_opt<std::string>("original", "")); outputSignal.write(opts.get_opt<std::string>("output", "")); return 0; }
AlignmentCorpus::AlignmentCorpus(string file_name): number_source_words_(0), number_target_words_(0) { if(file_name.find("training_data_file_names_english_to_french",0) == 0){ translation_type_ = "english_to_french"; } else if(file_name.find("training_data_file_names_french_to_english",0) == 0){ translation_type_ = "french_to_english"; } else if(file_name.find("test_data_file_names_english_to_french.",0) == 0){ translation_type_ = "english_to_french"; } else if(file_name.find("test_data_file_names_french_to_english.",0) == 0){ translation_type_ = "french_to_english"; } else{ cout << "A type of error happened! "<< endl; exit(1); } ifstream data_file; data_file.open(file_name.c_str()); string data_line, source_file_name, target_file_name; string::size_type split_position; int i(0); while(true){ getline(data_file,data_line); split_position = data_line.find(' ', 0); target_file_name = data_line.substr(split_position + 1); source_file_name = data_line.substr(0,split_position); if(source_file_name.size() == 0){ break; } get_corpus(source_file_name, target_file_name); } data_file.close(); }
typename LDA<Scalar>::MatrixX LDA<Scalar>::transform(const Eigen::MatrixXi& X) { // cast the parameters to what is needed auto model = std::static_pointer_cast<parameters::ModelParameters<Scalar> >( model_parameters_ ); // make some room for the transformed data MatrixX gammas(model->beta.rows(), X.cols()); // make a corpus to use auto corpus = get_corpus(X); // Queue all the documents for (size_t i=0; i<corpus->size(); i++) { queue_in_.emplace_back(corpus, i); } // create the thread pool create_worker_pool(); // Extract variational parameters and calculate the doc_e_step for (size_t i=0; i<corpus->size(); i++) { std::shared_ptr<parameters::Parameters> vp; size_t index; std::tie(vp, index) = extract_vp_from_queue(); gammas.col(index) = std::static_pointer_cast<parameters::VariationalParameters<Scalar> >(vp)->gamma; // tell the thread safe event dispatcher to process the events from the // workers process_worker_events(); } // destroy the thread pool destroy_worker_pool(); return gammas; }
void LDA<Scalar>::partial_fit(const Eigen::MatrixXi &X, const Eigen::VectorXi &y) { partial_fit(get_corpus(X, y)); }
void parse(int fd, wchar_t* buf, char* dir) { /* word word */ CorpusInfo *corpus = NULL; wchar_t *ptr; int direction = 0; nat_boolean_t exact_match = FALSE; nat_boolean_t both = FALSE; wchar_t words[50][150]; int i = 0; wchar_t *token = NULL; char tmp[150]; chomp(buf); for (i=0;i<50;i++) wcscpy(words[i], L""); i = 0; if (wcscmp(buf, L"") == 0) return; #if DEBUG LOG("Request was [%s]", buf); #endif token = wcstok(buf, L" ", &ptr); while(token) { wcscpy(words[i], token); i++; token = wcstok(NULL, L" ", &ptr); } if (wcsncmp(words[0], L"LIST", 4) == 0) { dump_corpora_list(fd, LAST_CORPORA, CORPORA); return; } else if (wcsncmp(words[0], L"??", 2) == 0) { dump_all_conf(get_corpus(atoi((char*)words[1])), fd); return; } else if (wcsncmp(words[0], L"?", 1) == 0) { sprintf(tmp, "%ls", words[2]); dump_conf(get_corpus(atoi((char*)words[1])), fd, tmp); return; } else if (wcsncmp(words[0], L"~>", 2) == 0) { corpus = get_corpus(atoi((char*)words[1])); dump_dict_w(fd, words[2], 1, corpus); return; } else if (wcsncmp(words[0], L"~#>", 3) == 0) { corpus = get_corpus(atoi((char*)words[1])); dump_dict_n(fd, words[2], 1, corpus); return; } else if (wcsncmp(words[0], L"<~", 2) == 0) { corpus = get_corpus(atoi((char*)words[1])); dump_dict_w(fd, words[2], -1, corpus); return; } else if (wcsncmp(words[0], L"<#~", 3) == 0) { corpus = get_corpus(atoi((char*)words[1])); dump_dict_n(fd, words[2], -1, corpus); return; } else if (wcsncmp(words[0], L"<->", 3) == 0) { direction = 1; both = TRUE; exact_match = FALSE; corpus = get_corpus(atoi((char*)words[1])); dump_conc(fd, corpus, direction, both, exact_match, words, i); } else if (wcsncmp(words[0], L"<=>", 3) == 0) { direction = 1; both = TRUE; exact_match = TRUE; corpus = get_corpus(atoi((char*)words[1])); dump_conc(fd, corpus, direction, both, exact_match, words, i); } else if (wcsncmp(words[0], L"<-", 2) == 0) { direction = -1; both = FALSE; exact_match = FALSE; corpus = get_corpus(atoi((char*)words[1])); dump_conc(fd, corpus, direction, both, exact_match, words, i); } else if (wcsncmp(words[0], L"->", 2) == 0) { direction = 1; both = FALSE; exact_match = FALSE; corpus = get_corpus(atoi((char*)words[1])); dump_conc(fd, corpus, direction, both, exact_match, words, i); } else if (wcsncmp(words[0], L"<=", 2) == 0) { direction = -1; both = FALSE; exact_match = TRUE; corpus = get_corpus(atoi((char*)words[1])); dump_conc(fd, corpus, direction, both, exact_match, words, i); } else if (wcsncmp(words[0], L"=>", 2) == 0) { direction = 1; both = FALSE; exact_match = TRUE; corpus = get_corpus(atoi((char*)words[1])); dump_conc(fd, corpus, direction, both, exact_match, words, i); } else if (wcsncmp(words[0], L":>", 2) == 0) { direction = 1; corpus = get_corpus(atoi((char*)words[1])); dump_ngrams(fd, corpus, direction, words, i ); } else if (wcsncmp(words[0], L"<:", 2) == 0) { direction = -1; corpus = get_corpus(atoi((char*)words[1])); dump_ngrams(fd, corpus, direction, words, i ); } else if (wcsncmp(words[0], L"GET", 3) == 0) { LOG("Playing http server"); play(fd); return; } else { ERROR(fd); } }