int main(int argc, char **argv) { int i; if (argc == 1) { printf("Concatenate the 1st-order embedding and the 2nd-order embeddings\n\n"); printf("Options:\n"); printf("Parameters for training:\n"); printf("\t-input1 <file>\n"); printf("\t\tThe 1st-order embeddings\n"); printf("\t-input2 <file>\n"); printf("\t\tThe 2nd-order embeddings\n"); printf("\t-output <file>\n"); printf("\t\tUse <file> to save the concatenated embeddings\n"); printf("\t-binary <int>\n"); printf("\t\tSave the learnt embeddings in binary moded; default is 0 (off)\n"); printf("\nExamples:\n"); printf("./concatenate -input1 vec_1st.txt -input2 vec_2nd.txt -output vec_all.txt -binary 1\n\n"); return 0; } if ((i = ArgPos((char *)"-input1", argc, argv)) > 0) strcpy(vector_file1, argv[i + 1]); if ((i = ArgPos((char *)"-input2", argc, argv)) > 0) strcpy(vector_file2, argv[i + 1]); if ((i = ArgPos((char *)"-output", argc, argv)) > 0) strcpy(output_file, argv[i + 1]); if ((i = ArgPos((char *)"-binary", argc, argv)) > 0) binary = atoi(argv[i + 1]); TrainModel(); return 0; }
void tmcn_word2vec(char *train_file0, char *output_file0, char *binary0, char *dims0, char *threads, char *window0, char *classes0, char *cbow0, char *min_count0, char *iter0) { int i; layer1_size = atoll(dims0); num_threads = atoi(threads); window=atoi(window0); binary = atoi(binary0); classes = atoi(classes0); cbow = atoi(cbow0); min_count = atoi(min_count0); iter = atoll(iter0); strcpy(train_file, train_file0); strcpy(output_file, output_file0); alpha = 0.025; starting_alpha = alpha; word_count_actual = 0; vocab = (struct vocab_word *)calloc(vocab_max_size, sizeof(struct vocab_word)); vocab_hash = (int *)calloc(vocab_hash_size, sizeof(int)); expTable = (real *)malloc((EXP_TABLE_SIZE + 1) * sizeof(real)); for (i = 0; i < EXP_TABLE_SIZE; i++) { expTable[i] = exp((i / (real)EXP_TABLE_SIZE * 2 - 1) * MAX_EXP); // Precompute the exp() table expTable[i] = expTable[i] / (expTable[i] + 1); // Precompute f(x) = x / (x + 1) } TrainModel(); }
int main(int argc, char **argv) { int i; if (argc == 1) { printf("WORD2PHRASE tool v0.1a\n\n"); printf("Options:\n"); printf("Parameters for training:\n"); printf("\t-train <file>\n"); printf("\t\tUse text data from <file> to train the model\n"); printf("\t-output <file>\n"); printf("\t\tUse <file> to save the resulting word vectors / word clusters / phrases\n"); printf("\t-min-count <int>\n"); printf("\t\tThis will discard words that appear less than <int> times; default is 5\n"); printf("\t-threshold <float>\n"); printf("\t\t The <float> value represents threshold for forming the phrases (higher means less phrases); default 100\n"); printf("\t-debug <int>\n"); printf("\t\tSet the debug mode (default = 2 = more info during training)\n"); printf("\nExamples:\n"); printf("./word2phrase -train text.txt -output phrases.txt -threshold 100 -debug 2\n\n"); return 0; } if ((i = ArgPos((char *)"-train", argc, argv)) > 0) strcpy(train_file, argv[i + 1]); if ((i = ArgPos((char *)"-debug", argc, argv)) > 0) debug_mode = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-output", argc, argv)) > 0) strcpy(output_file, argv[i + 1]); if ((i = ArgPos((char *)"-min-count", argc, argv)) > 0) min_count = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-threshold", argc, argv)) > 0) threshold = atof(argv[i + 1]); vocab = (struct vocab_word *)calloc(vocab_max_size, sizeof(struct vocab_word)); vocab_hash = (int *)calloc(vocab_hash_size, sizeof(int)); TrainModel(); return 0; }
SEXP CatBoostFit_R(SEXP learnPoolParam, SEXP testPoolParam, SEXP fitParamsAsJsonParam) { SEXP result = NULL; R_API_BEGIN(); TPoolHandle learnPool = reinterpret_cast<TPoolHandle>(R_ExternalPtrAddr(learnPoolParam)); auto fitParams = LoadFitParams(fitParamsAsJsonParam); TFullModelPtr modelPtr = std::make_unique<TFullModel>(); TEvalResult evalResult; if (testPoolParam != R_NilValue) { TPoolHandle testPool = reinterpret_cast<TPoolHandle>(R_ExternalPtrAddr(testPoolParam)); TrainModel(fitParams, Nothing(), Nothing(), *learnPool, false, *testPool, "", modelPtr.get(), &evalResult); } else { TrainModel(fitParams, Nothing(), Nothing(), *learnPool, false, TPool(), "", modelPtr.get(), &evalResult); } result = PROTECT(R_MakeExternalPtr(modelPtr.get(), R_NilValue, R_NilValue)); R_RegisterCFinalizerEx(result, _Finalizer<TFullModelHandle>, TRUE); modelPtr.release(); R_API_END(); UNPROTECT(1); return result; }
void LearnEmbeddings(TVVec<TInt, int64>& WalksVV, int& Dimensions, int& WinSize, int& Iter, bool& Verbose, TIntFltVH& EmbeddingsHV) { TIntIntH RnmH; TIntIntH RnmBackH; int64 NNodes = 0; //renaming nodes into consecutive numbers for (int i = 0; i < WalksVV.GetXDim(); i++) { for (int64 j = 0; j < WalksVV.GetYDim(); j++) { if ( RnmH.IsKey(WalksVV(i, j)) ) { WalksVV(i, j) = RnmH.GetDat(WalksVV(i, j)); } else { RnmH.AddDat(WalksVV(i,j),NNodes); RnmBackH.AddDat(NNodes,WalksVV(i, j)); WalksVV(i, j) = NNodes++; } } } TIntV Vocab(NNodes); LearnVocab(WalksVV, Vocab); TIntV KTable(NNodes); TFltV UTable(NNodes); TVVec<TFlt, int64> SynNeg; TVVec<TFlt, int64> SynPos; TRnd Rnd(time(NULL)); InitPosEmb(Vocab, Dimensions, Rnd, SynPos); InitNegEmb(Vocab, Dimensions, SynNeg); InitUnigramTable(Vocab, KTable, UTable); TFltV ExpTable(TableSize); double Alpha = StartAlpha; //learning rate #pragma omp parallel for schedule(dynamic) for (int i = 0; i < TableSize; i++ ) { double Value = -MaxExp + static_cast<double>(i) / static_cast<double>(ExpTablePrecision); ExpTable[i] = TMath::Power(TMath::E, Value); } int64 WordCntAll = 0; // op RS 2016/09/26, collapse does not compile on Mac OS X //#pragma omp parallel for schedule(dynamic) collapse(2) for (int j = 0; j < Iter; j++) { #pragma omp parallel for schedule(dynamic) for (int64 i = 0; i < WalksVV.GetXDim(); i++) { TrainModel(WalksVV, Dimensions, WinSize, Iter, Verbose, KTable, UTable, WordCntAll, ExpTable, Alpha, i, Rnd, SynNeg, SynPos); } } if (Verbose) { printf("\n"); fflush(stdout); } for (int64 i = 0; i < SynPos.GetXDim(); i++) { TFltV CurrV(SynPos.GetYDim()); for (int j = 0; j < SynPos.GetYDim(); j++) { CurrV[j] = SynPos(i, j); } EmbeddingsHV.AddDat(RnmBackH.GetDat(i), CurrV); } }
int main(int argc, char **argv) { int i; if (argc == 1) { printf("HPLE\n\n"); printf("Options:\n"); printf("Parameters for training:\n"); printf("\t-data <path>\n"); printf("\t\tData (FIGER / BBN)\n"); printf("\t-task <path>\n"); printf("\t\tTask (reduce_label_noise / typing)\n"); printf("\t-binary <int>\n"); printf("\t\tSave the resulting vectors in binary moded; default is 0 (off)\n"); printf("\t-size <int>\n"); printf("\t\tSet size of embedding; default is 100\n"); printf("\t-negative <int>\n"); printf("\t\tNumber of negative examples; default is 5, common values are 5 - 10 (0 = not used)\n"); printf("\t-iters <int>\n"); printf("\t\tSet the number of iterations as <int>\n"); printf("\t-threads <int>\n"); printf("\t\tUse <int> threads (default 1)\n"); printf("\t-alpha <float>\n"); printf("\t\tSet the value of weight decay (default 0.0001)\n"); printf("\t-lr <float>\n"); printf("\t\tSet the value of learning rate (default 0.025)\n"); return 0; } if ((i = ArgPos((char *)"-data", argc, argv)) > 0) strcpy(data, argv[i + 1]); if ((i = ArgPos((char *)"-task", argc, argv)) > 0) strcpy(task, argv[i + 1]); if ((i = ArgPos((char *)"-mode", argc, argv)) > 0) mode = argv[i + 1][0]; if ((i = ArgPos((char *)"-binary", argc, argv)) > 0) binary = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-size", argc, argv)) > 0) vector_size = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-negative", argc, argv)) > 0) negative = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-iters", argc, argv)) > 0) iters = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-lr", argc, argv)) > 0) starting_lr = atof(argv[i + 1]); if ((i = ArgPos((char *)"-alpha", argc, argv)) > 0) alpha = atof(argv[i + 1]); if ((i = ArgPos((char *)"-threads", argc, argv)) > 0) num_threads = atoi(argv[i + 1]); sprintf(file_path, "Intermediate/%s/", data); sprintf(output_path, "Results/%s/", data); lr = starting_lr; TrainModel(); return 0; }
int main(int argc, char **argv) { int i; if (argc == 1) { printf("RNNLM based on WORD VECTOR estimation toolkit v 0.1b\n\n"); printf("Options:\n"); printf("Parameters for training:\n"); printf("\t-train <file>\n"); printf("\t\tUse text data from <file> to train the model\n"); printf("\t-valid <file>\n"); printf("\t\tUse text data from <file> to perform validation and control learning rate\n"); printf("\t-test <file>\n"); printf("\t\tUse text data from <file> to compute logprobs with an existing model\n"); printf("\t-rnnlm <file>\n"); printf("\t\tUse <file> to save the resulting language model\n"); printf("\t-hidden <int>\n"); printf("\t\tSet size of hidden layer; default is 100\n"); printf("\t-bptt <int>\n"); printf("\t\tSet length of BPTT unfolding; default is 3; set to 0 to disable truncation\n"); printf("\t-bptt-block <int>\n"); printf("\t\tSet period of BPTT unfolding; default is 10; BPTT is performed each bptt+bptt_block steps\n"); printf("\t-gen <int>\n"); printf("\t\tSampling mode; number of sentences to sample, default is 0 (off); enter negative number for interactive mode\n"); printf("\t-threads <int>\n"); printf("\t\tUse <int> threads (default 1)\n"); printf("\t-min-count <int>\n"); printf("\t\tThis will discard words that appear less than <int> times; default is 0\n"); printf("\t-alpha <float>\n"); printf("\t\tSet the starting learning rate; default is 0.1\n"); printf("\t-maxent-alpha <float>\n"); printf("\t\tSet the starting learning rate for maxent; default is 0.1\n"); printf("\t-reject-threshold <float>\n"); printf("\t\tReject nnet and reload nnet from previous epoch if the relative entropy improvement on the validation set is below this threshold (default 0.997)\n"); printf("\t-stop <float>\n"); printf("\t\tStop training when the relative entropy improvement on the validation set is below this threshold (default 1.003); see also -retry\n"); printf("\t-retry <int>\n"); printf("\t\tStop training iff N retries with halving learning rate have failed (default 2)\n"); printf("\t-debug <int>\n"); printf("\t\tSet the debug mode (default = 2 = more info during training)\n"); printf("\t-direct <int>\n"); printf("\t\tSet the size of hash for maxent parameters, in millions (default 0 = maxent off)\n"); printf("\t-direct-order <int>\n"); printf("\t\tSet the order of n-gram features to be used in maxent (default 3)\n"); printf("\t-beta1 <float>\n"); printf("\t\tL2 regularisation parameter for RNNLM weights (default 1e-6)\n"); printf("\t-beta2 <float>\n"); printf("\t\tL2 regularisation parameter for maxent weights (default 1e-6)\n"); printf("\t-recompute-counts <int>\n"); printf("\t\tRecompute train words counts, useful for fine-tuning (default = 0 = use counts stored in the vocab file)\n"); printf("\nExamples:\n"); printf("./rnnlm -train data.txt -valid valid.txt -rnnlm result.rnnlm -debug 2 -hidden 200\n\n"); return 0; } model_file[0] = 0; test_file[0] = 0; if ((i = ArgPos((char *)"-hidden", argc, argv)) > 0) layer1_size = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-train", argc, argv)) > 0) strcpy(train_file, argv[i + 1]); if ((i = ArgPos((char *)"-valid", argc, argv)) > 0) strcpy(valid_file, argv[i + 1]); if ((i = ArgPos((char *)"-test", argc, argv)) > 0) strcpy(test_file, argv[i + 1]); if ((i = ArgPos((char *)"-debug", argc, argv)) > 0) debug_mode = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-bptt", argc, argv)) > 0) bptt = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-bptt-block", argc, argv)) > 0) bptt_block = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-alpha", argc, argv)) > 0) alpha = atof(argv[i + 1]); if ((i = ArgPos((char *)"-maxent-alpha", argc, argv)) > 0) maxent_alpha = atof(argv[i + 1]); if ((i = ArgPos((char *)"-reject-threshold", argc, argv)) > 0) reject_threshold = atof(argv[i + 1]); if ((i = ArgPos((char *)"-stop", argc, argv)) > 0) stop = atof(argv[i + 1]); if ((i = ArgPos((char *)"-retry", argc, argv)) > 0) max_retry = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-rnnlm", argc, argv)) > 0) { strcpy(model_file, argv[i + 1]); strcpy(model_file_nnet, argv[i + 1]); strcat(model_file_nnet, ".nnet"); } if ((i = ArgPos((char *)"-threads", argc, argv)) > 0) num_threads = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-min-count", argc, argv)) > 0) min_count = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-direct", argc, argv)) > 0) maxent_hash_size = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-direct-order", argc, argv)) > 0) maxent_order = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-beta1", argc, argv)) > 0) beta = atof(argv[i + 1]); if ((i = ArgPos((char *)"-beta2", argc, argv)) > 0) maxent_beta = atof(argv[i + 1]); if ((i = ArgPos((char *)"-gen", argc, argv)) > 0) gen = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-recompute-counts", argc, argv)) > 0) recompute_train_counts = atoi(argv[i + 1]); vocab = (struct vocab_word *)calloc(vocab_max_size, sizeof(struct vocab_word)); vocab_hash = (int *)calloc(vocab_hash_size, sizeof(int)); TrainModel(); return 0; }
void Widget::on_pushButton_3_clicked() { applySettings(); TrainModel(); }