コード例 #1
0
ファイル: concatenate.cpp プロジェクト: chentingpc/LINE
int main(int argc, char **argv) {
  int i;
  if (argc == 1) {
    printf("Concatenate the 1st-order embedding and the 2nd-order embeddings\n\n");
    printf("Options:\n");
    printf("Parameters for training:\n");
    printf("\t-input1 <file>\n");
    printf("\t\tThe 1st-order embeddings\n");
    printf("\t-input2 <file>\n");
    printf("\t\tThe 2nd-order embeddings\n");
    printf("\t-output <file>\n");
    printf("\t\tUse <file> to save the concatenated embeddings\n");
    printf("\t-binary <int>\n");
    printf("\t\tSave the learnt embeddings in binary moded; default is 0 (off)\n");
    printf("\nExamples:\n");
    printf("./concatenate -input1 vec_1st.txt -input2 vec_2nd.txt -output vec_all.txt -binary 1\n\n");
    return 0;
  }
  if ((i = ArgPos((char *)"-input1", argc, argv)) > 0) strcpy(vector_file1, argv[i + 1]);
  if ((i = ArgPos((char *)"-input2", argc, argv)) > 0) strcpy(vector_file2, argv[i + 1]);
  if ((i = ArgPos((char *)"-output", argc, argv)) > 0) strcpy(output_file, argv[i + 1]);
  if ((i = ArgPos((char *)"-binary", argc, argv)) > 0) binary = atoi(argv[i + 1]);
  TrainModel();
  return 0;
}
コード例 #2
0
void tmcn_word2vec(char *train_file0, char *output_file0,
                   char *binary0, char *dims0, char *threads,
                   char *window0, char *classes0, char *cbow0,
                   char *min_count0, char *iter0)
{
	int i;
  layer1_size = atoll(dims0);
  num_threads = atoi(threads);
  window=atoi(window0);
	binary = atoi(binary0);
	classes = atoi(classes0);
	cbow = atoi(cbow0);
	min_count = atoi(min_count0);
	iter = atoll(iter0);
	strcpy(train_file, train_file0);
	strcpy(output_file, output_file0);

	alpha = 0.025;
	starting_alpha = alpha;
	word_count_actual = 0;

	vocab = (struct vocab_word *)calloc(vocab_max_size, sizeof(struct vocab_word));
	vocab_hash = (int *)calloc(vocab_hash_size, sizeof(int));
	expTable = (real *)malloc((EXP_TABLE_SIZE + 1) * sizeof(real));
	for (i = 0; i < EXP_TABLE_SIZE; i++) {
    	expTable[i] = exp((i / (real)EXP_TABLE_SIZE * 2 - 1) * MAX_EXP); // Precompute the exp() table
		expTable[i] = expTable[i] / (expTable[i] + 1);                   // Precompute f(x) = x / (x + 1)
	}
	TrainModel();
}
コード例 #3
0
ファイル: word2phrase.c プロジェクト: 1206lyp/word2vec
int main(int argc, char **argv) {
  int i;
  if (argc == 1) {
    printf("WORD2PHRASE tool v0.1a\n\n");
    printf("Options:\n");
    printf("Parameters for training:\n");
    printf("\t-train <file>\n");
    printf("\t\tUse text data from <file> to train the model\n");
    printf("\t-output <file>\n");
    printf("\t\tUse <file> to save the resulting word vectors / word clusters / phrases\n");
    printf("\t-min-count <int>\n");
    printf("\t\tThis will discard words that appear less than <int> times; default is 5\n");
    printf("\t-threshold <float>\n");
    printf("\t\t The <float> value represents threshold for forming the phrases (higher means less phrases); default 100\n");
    printf("\t-debug <int>\n");
    printf("\t\tSet the debug mode (default = 2 = more info during training)\n");
    printf("\nExamples:\n");
    printf("./word2phrase -train text.txt -output phrases.txt -threshold 100 -debug 2\n\n");
    return 0;
  }
  if ((i = ArgPos((char *)"-train", argc, argv)) > 0) strcpy(train_file, argv[i + 1]);
  if ((i = ArgPos((char *)"-debug", argc, argv)) > 0) debug_mode = atoi(argv[i + 1]);
  if ((i = ArgPos((char *)"-output", argc, argv)) > 0) strcpy(output_file, argv[i + 1]);
  if ((i = ArgPos((char *)"-min-count", argc, argv)) > 0) min_count = atoi(argv[i + 1]);
  if ((i = ArgPos((char *)"-threshold", argc, argv)) > 0) threshold = atof(argv[i + 1]);
  vocab = (struct vocab_word *)calloc(vocab_max_size, sizeof(struct vocab_word));
  vocab_hash = (int *)calloc(vocab_hash_size, sizeof(int));
  TrainModel();
  return 0;
}
コード例 #4
0
ファイル: catboostr.cpp プロジェクト: iamnik13/catboost
SEXP CatBoostFit_R(SEXP learnPoolParam, SEXP testPoolParam, SEXP fitParamsAsJsonParam) {
    SEXP result = NULL;
    R_API_BEGIN();
    TPoolHandle learnPool = reinterpret_cast<TPoolHandle>(R_ExternalPtrAddr(learnPoolParam));
    auto fitParams = LoadFitParams(fitParamsAsJsonParam);
    TFullModelPtr modelPtr = std::make_unique<TFullModel>();
    TEvalResult evalResult;
    if (testPoolParam != R_NilValue) {
        TPoolHandle testPool = reinterpret_cast<TPoolHandle>(R_ExternalPtrAddr(testPoolParam));
        TrainModel(fitParams, Nothing(), Nothing(), *learnPool, false, *testPool, "", modelPtr.get(), &evalResult);
    }
    else {
        TrainModel(fitParams, Nothing(), Nothing(), *learnPool, false, TPool(), "", modelPtr.get(), &evalResult);
    }
    result = PROTECT(R_MakeExternalPtr(modelPtr.get(), R_NilValue, R_NilValue));
    R_RegisterCFinalizerEx(result, _Finalizer<TFullModelHandle>, TRUE);
    modelPtr.release();
    R_API_END();
    UNPROTECT(1);
    return result;
}
コード例 #5
0
ファイル: word2vec.cpp プロジェクト: jsw883/snap
void LearnEmbeddings(TVVec<TInt, int64>& WalksVV, int& Dimensions, int& WinSize,
 int& Iter, bool& Verbose, TIntFltVH& EmbeddingsHV) {
  TIntIntH RnmH;
  TIntIntH RnmBackH;
  int64 NNodes = 0;
  //renaming nodes into consecutive numbers
  for (int i = 0; i < WalksVV.GetXDim(); i++) {
    for (int64 j = 0; j < WalksVV.GetYDim(); j++) {
      if ( RnmH.IsKey(WalksVV(i, j)) ) {
        WalksVV(i, j) = RnmH.GetDat(WalksVV(i, j));
      } else {
        RnmH.AddDat(WalksVV(i,j),NNodes);
        RnmBackH.AddDat(NNodes,WalksVV(i, j));
        WalksVV(i, j) = NNodes++;
      }
    }
  }
  TIntV Vocab(NNodes);
  LearnVocab(WalksVV, Vocab);
  TIntV KTable(NNodes);
  TFltV UTable(NNodes);
  TVVec<TFlt, int64> SynNeg;
  TVVec<TFlt, int64> SynPos;
  TRnd Rnd(time(NULL));
  InitPosEmb(Vocab, Dimensions, Rnd, SynPos);
  InitNegEmb(Vocab, Dimensions, SynNeg);
  InitUnigramTable(Vocab, KTable, UTable);
  TFltV ExpTable(TableSize);
  double Alpha = StartAlpha;                              //learning rate
#pragma omp parallel for schedule(dynamic)
  for (int i = 0; i < TableSize; i++ ) {
    double Value = -MaxExp + static_cast<double>(i) / static_cast<double>(ExpTablePrecision);
    ExpTable[i] = TMath::Power(TMath::E, Value);
  }
  int64 WordCntAll = 0;
// op RS 2016/09/26, collapse does not compile on Mac OS X
//#pragma omp parallel for schedule(dynamic) collapse(2)
  for (int j = 0; j < Iter; j++) {
#pragma omp parallel for schedule(dynamic)
    for (int64 i = 0; i < WalksVV.GetXDim(); i++) {
      TrainModel(WalksVV, Dimensions, WinSize, Iter, Verbose, KTable, UTable,
       WordCntAll, ExpTable, Alpha, i, Rnd, SynNeg, SynPos); 
    }
  }
  if (Verbose) { printf("\n"); fflush(stdout); }
  for (int64 i = 0; i < SynPos.GetXDim(); i++) {
    TFltV CurrV(SynPos.GetYDim());
    for (int j = 0; j < SynPos.GetYDim(); j++) { CurrV[j] = SynPos(i, j); }
    EmbeddingsHV.AddDat(RnmBackH.GetDat(i), CurrV);
  }
}
コード例 #6
0
ファイル: hple-corrKB.cpp プロジェクト: shanzhenren/PLE
int main(int argc, char **argv) {
    int i;
    if (argc == 1) {
        printf("HPLE\n\n");
        printf("Options:\n");
        printf("Parameters for training:\n");
        printf("\t-data <path>\n");
        printf("\t\tData (FIGER / BBN)\n");
        printf("\t-task <path>\n");
        printf("\t\tTask (reduce_label_noise / typing)\n");
        printf("\t-binary <int>\n");
        printf("\t\tSave the resulting vectors in binary moded; default is 0 (off)\n");
        printf("\t-size <int>\n");
        printf("\t\tSet size of embedding; default is 100\n");
        printf("\t-negative <int>\n");
        printf("\t\tNumber of negative examples; default is 5, common values are 5 - 10 (0 = not used)\n");
        printf("\t-iters <int>\n");
        printf("\t\tSet the number of iterations as <int>\n");
        printf("\t-threads <int>\n");
        printf("\t\tUse <int> threads (default 1)\n");
        printf("\t-alpha <float>\n");
        printf("\t\tSet the value of weight decay (default 0.0001)\n");
        printf("\t-lr <float>\n");
        printf("\t\tSet the value of learning rate (default 0.025)\n");
        return 0;
    }
    if ((i = ArgPos((char *)"-data", argc, argv)) > 0) strcpy(data, argv[i + 1]);
    if ((i = ArgPos((char *)"-task", argc, argv)) > 0) strcpy(task, argv[i + 1]);
    if ((i = ArgPos((char *)"-mode", argc, argv)) > 0) mode = argv[i + 1][0];
    if ((i = ArgPos((char *)"-binary", argc, argv)) > 0) binary = atoi(argv[i + 1]);
    if ((i = ArgPos((char *)"-size", argc, argv)) > 0) vector_size = atoi(argv[i + 1]);
    if ((i = ArgPos((char *)"-negative", argc, argv)) > 0) negative = atoi(argv[i + 1]);
    if ((i = ArgPos((char *)"-iters", argc, argv)) > 0) iters = atoi(argv[i + 1]);
    if ((i = ArgPos((char *)"-lr", argc, argv)) > 0) starting_lr = atof(argv[i + 1]);
    if ((i = ArgPos((char *)"-alpha", argc, argv)) > 0) alpha = atof(argv[i + 1]);
    if ((i = ArgPos((char *)"-threads", argc, argv)) > 0) num_threads = atoi(argv[i + 1]);
    sprintf(file_path, "Intermediate/%s/", data);
    sprintf(output_path, "Results/%s/", data);
    lr = starting_lr;
    TrainModel();
    return 0;
}
コード例 #7
0
ファイル: rnnlm.c プロジェクト: zweiein/kaldi
int main(int argc, char **argv) {
  int i;
  if (argc == 1) {
    printf("RNNLM based on WORD VECTOR estimation toolkit v 0.1b\n\n");
    printf("Options:\n");
    printf("Parameters for training:\n");
    printf("\t-train <file>\n");
    printf("\t\tUse text data from <file> to train the model\n");
    printf("\t-valid <file>\n");
    printf("\t\tUse text data from <file> to perform validation and control learning rate\n");
    printf("\t-test <file>\n");
    printf("\t\tUse text data from <file> to compute logprobs with an existing model\n");
    printf("\t-rnnlm <file>\n");
    printf("\t\tUse <file> to save the resulting language model\n");
    printf("\t-hidden <int>\n");
    printf("\t\tSet size of hidden layer; default is 100\n");
    printf("\t-bptt <int>\n");
    printf("\t\tSet length of BPTT unfolding; default is 3; set to 0 to disable truncation\n");
    printf("\t-bptt-block <int>\n");
    printf("\t\tSet period of BPTT unfolding; default is 10; BPTT is performed each bptt+bptt_block steps\n");
    printf("\t-gen <int>\n");
    printf("\t\tSampling mode; number of sentences to sample, default is 0 (off); enter negative number for interactive mode\n");
    printf("\t-threads <int>\n");
    printf("\t\tUse <int> threads (default 1)\n");
    printf("\t-min-count <int>\n");
    printf("\t\tThis will discard words that appear less than <int> times; default is 0\n");
    printf("\t-alpha <float>\n");
    printf("\t\tSet the starting learning rate; default is 0.1\n");
    printf("\t-maxent-alpha <float>\n");
    printf("\t\tSet the starting learning rate for maxent; default is 0.1\n");
    printf("\t-reject-threshold <float>\n");
    printf("\t\tReject nnet and reload nnet from previous epoch if the relative entropy improvement on the validation set is below this threshold (default 0.997)\n");
    printf("\t-stop <float>\n");
    printf("\t\tStop training when the relative entropy improvement on the validation set is below this threshold (default 1.003); see also -retry\n");
    printf("\t-retry <int>\n");
    printf("\t\tStop training iff N retries with halving learning rate have failed (default 2)\n");
    printf("\t-debug <int>\n");
    printf("\t\tSet the debug mode (default = 2 = more info during training)\n");
    printf("\t-direct <int>\n");
    printf("\t\tSet the size of hash for maxent parameters, in millions (default 0 = maxent off)\n");
    printf("\t-direct-order <int>\n");
    printf("\t\tSet the order of n-gram features to be used in maxent (default 3)\n");
    printf("\t-beta1 <float>\n");
    printf("\t\tL2 regularisation parameter for RNNLM weights (default 1e-6)\n");
    printf("\t-beta2 <float>\n");
    printf("\t\tL2 regularisation parameter for maxent weights (default 1e-6)\n");
    printf("\t-recompute-counts <int>\n");
    printf("\t\tRecompute train words counts, useful for fine-tuning (default = 0 = use counts stored in the vocab file)\n");
    printf("\nExamples:\n");
    printf("./rnnlm -train data.txt -valid valid.txt -rnnlm result.rnnlm -debug 2 -hidden 200\n\n");
    return 0;
  }
  model_file[0] = 0;
  test_file[0] = 0;
  if ((i = ArgPos((char *)"-hidden", argc, argv)) > 0) layer1_size = atoi(argv[i + 1]);
  if ((i = ArgPos((char *)"-train", argc, argv)) > 0) strcpy(train_file, argv[i + 1]);
  if ((i = ArgPos((char *)"-valid", argc, argv)) > 0) strcpy(valid_file, argv[i + 1]);
  if ((i = ArgPos((char *)"-test", argc, argv)) > 0) strcpy(test_file, argv[i + 1]);
  if ((i = ArgPos((char *)"-debug", argc, argv)) > 0) debug_mode = atoi(argv[i + 1]);
  if ((i = ArgPos((char *)"-bptt", argc, argv)) > 0) bptt = atoi(argv[i + 1]);
  if ((i = ArgPos((char *)"-bptt-block", argc, argv)) > 0) bptt_block = atoi(argv[i + 1]);
  if ((i = ArgPos((char *)"-alpha", argc, argv)) > 0) alpha = atof(argv[i + 1]);
  if ((i = ArgPos((char *)"-maxent-alpha", argc, argv)) > 0) maxent_alpha = atof(argv[i + 1]);
  if ((i = ArgPos((char *)"-reject-threshold", argc, argv)) > 0) reject_threshold = atof(argv[i + 1]);
  if ((i = ArgPos((char *)"-stop", argc, argv)) > 0) stop = atof(argv[i + 1]);
  if ((i = ArgPos((char *)"-retry", argc, argv)) > 0) max_retry = atoi(argv[i + 1]);
  if ((i = ArgPos((char *)"-rnnlm", argc, argv)) > 0) {
    strcpy(model_file, argv[i + 1]);
    strcpy(model_file_nnet, argv[i + 1]);
    strcat(model_file_nnet, ".nnet");
  }
  if ((i = ArgPos((char *)"-threads", argc, argv)) > 0) num_threads = atoi(argv[i + 1]);
  if ((i = ArgPos((char *)"-min-count", argc, argv)) > 0) min_count = atoi(argv[i + 1]);
  if ((i = ArgPos((char *)"-direct", argc, argv)) > 0) maxent_hash_size = atoi(argv[i + 1]);
  if ((i = ArgPos((char *)"-direct-order", argc, argv)) > 0) maxent_order = atoi(argv[i + 1]);
  if ((i = ArgPos((char *)"-beta1", argc, argv)) > 0) beta = atof(argv[i + 1]);
  if ((i = ArgPos((char *)"-beta2", argc, argv)) > 0) maxent_beta = atof(argv[i + 1]);
  if ((i = ArgPos((char *)"-gen", argc, argv)) > 0) gen = atoi(argv[i + 1]);
  if ((i = ArgPos((char *)"-recompute-counts", argc, argv)) > 0) recompute_train_counts = atoi(argv[i + 1]);


  vocab = (struct vocab_word *)calloc(vocab_max_size, sizeof(struct vocab_word));
  vocab_hash = (int *)calloc(vocab_hash_size, sizeof(int));
  TrainModel();
  return 0;
}
コード例 #8
0
ファイル: widget.cpp プロジェクト: neopenx/QWord2Vec
void Widget::on_pushButton_3_clicked()
{
    applySettings();
    TrainModel();
}