static void test_iso(const psl_ctx_t *psl, const char *domain, const char *expected_result) { /* makes only sense with a runtime IDN library configured */ #if defined(WITH_LIBIDN) || defined(WITH_LIBIDN2) || defined(WITH_LIBICU) testx(psl, domain, "iso-8859-15", "de", expected_result); #endif }
double cross_validation(Matrix<double> x, Vector<double> y, Kernel *k, double c, int cross = -1) { // 交差検定 int group = cross; if (group <= 0) group = 1 + log2(x.nrows()); int size = x.nrows() / group; double accuracy = 0.0; for (int i = 0; i < group; i++) { size_t si = i * size, sz = size + (i == group - 1 ? (x.nrows() % size) : 0); Matrix<double> samplex(x.nrows() - sz, x.ncols()), testx(sz, x.ncols()); Vector<double> sampley(y.size() - sz), testy(sz); for (size_t j = 0; j < x.nrows(); j++) { if (j < si) { samplex.setRow(j, x.extractRow(j)); sampley[j] = y[j]; } else if (j < si + sz) { testx.setRow(j - si, x.extractRow(j)); testy[j - si] = y[j]; } else { samplex.setRow(j - sz, x.extractRow(j)); sampley[j - sz] = y[j]; } } SVM svm(samplex, sampley, k, c); int pass = 0; for (size_t j = 0; j < sz; j++) if (svm.discriminant(testx.extractRow(j)) * testy[j] > 0.0) pass++; double acc = pass / (double)sz * 100.0; printf("loop #%d :: %f\n", i, acc); accuracy += acc / group; } printf("total accuracy :: %f\n", accuracy); return accuracy; }
static void test(const psl_ctx_t *psl, const char *domain, const char *expected_result) { testx(psl, domain, "utf-8", "en", expected_result); }
int run_sampler(unsigned epochs,float alpha,unsigned batch_size){ //load sampler string training_path = "PPAttachData/training.lemma"; string param_path = "PPAttachData/wordsketches/"; string vpath = param_path + string("vdistrib"); string x1vpath = param_path + string("x1givenv"); string pvpath = param_path + string("pgivenv"); string x2vppath = param_path + string("x2givenvp"); string px1path = param_path + string("pgivenx1"); string x2x1ppath = param_path + string("x2givenx1p"); DataSampler samp(training_path.c_str(), vpath.c_str(), x1vpath.c_str(), pvpath.c_str(), x2vppath.c_str(), px1path.c_str(), x2x1ppath.c_str()); //load dev and test PPADataEncoder dev_set("PPAttachData/devset.lemma"); PPADataEncoder test_set("PPAttachData/test.lemma"); //load Word vectors Word2vec w2v; vector<string> wvdict; af::array w2v_embeddings; w2v.load_dictionary("PPAttachData/embeddings/deps.words.lemmatized"); //w2v.filter(xdict); //make network vector<string> ydict; samp.getYdictionary(ydict); SymbolicFeedForwardNetwork<string,string> net; net.set_output_layer("loss",new SoftMaxLoss<string>(ydict)); net.add_layer("top",new LinearLayer()); net.add_layer("hidden",new ReLUActivation(400)); net.add_layer("A",new LinearLayer()); net.add_input_layer("lookupA",new LinearLookup<string>(w2v.get_keys(),w2v.get_values(),4,false)); net.connect_layers("loss","top"); net.connect_layers("top","hidden"); net.connect_layers("hidden","A"); net.connect_layers("A","lookupA"); for(int E = 0; E < epochs;++E){ vector<string> ydata; vector<vector<string>> xdata; //af::timer start1 = af::timer::start(); samp.generate_sample(ydata,xdata,batch_size); //printf("elapsed seconds (sampling): %g\n", af::timer::stop(start1)); PPADataEncoder sampdata(ydata,xdata); vector<string> enc_ydata; vector<vector<string>> enc_xdata(1,vector<string>()); sampdata.getYdata(enc_ydata); sampdata.getXdata(enc_xdata[0]); //af::timer start2 = af::timer::start(); net.set_batch_data(enc_ydata,enc_xdata); float loss = net.train_one(alpha,true,true); //printf("elapsed seconds (backprop): %g\n", af::timer::stop(start2)); if (E % 20 == 0){ vector<string> devy; vector<vector<string>> devx(1,vector<string>()); dev_set.getYdata(devy); dev_set.getXdata(devx[0]); float acc = net.eval_avg(devy,devx); //auto-eval on dev data cout << "epoch " << E << ", loss= " << loss << ", eval (dev) = " << acc << endl; }else { cout << "epoch" << E <<endl; } } vector<string> testy; vector<vector<string>> testx(1,vector<string>()); test_set.getYdata(testy); test_set.getXdata(testx[0]); float acc = net.eval_avg(testy,testx); cout << "final eval (test) = " << acc << endl; return 0; }