int main(int argc, char **argv) { int i; if (argc == 1) { printf("Concatenate the 1st-order embedding and the 2nd-order embeddings\n\n"); printf("Options:\n"); printf("Parameters for training:\n"); printf("\t-input1 <file>\n"); printf("\t\tThe 1st-order embeddings\n"); printf("\t-input2 <file>\n"); printf("\t\tThe 2nd-order embeddings\n"); printf("\t-output <file>\n"); printf("\t\tUse <file> to save the concatenated embeddings\n"); printf("\t-binary <int>\n"); printf("\t\tSave the learnt embeddings in binary moded; default is 0 (off)\n"); printf("\nExamples:\n"); printf("./concatenate -input1 vec_1st.txt -input2 vec_2nd.txt -output vec_all.txt -binary 1\n\n"); return 0; } if ((i = ArgPos((char *)"-input1", argc, argv)) > 0) strcpy(vector_file1, argv[i + 1]); if ((i = ArgPos((char *)"-input2", argc, argv)) > 0) strcpy(vector_file2, argv[i + 1]); if ((i = ArgPos((char *)"-output", argc, argv)) > 0) strcpy(output_file, argv[i + 1]); if ((i = ArgPos((char *)"-binary", argc, argv)) > 0) binary = atoi(argv[i + 1]); TrainModel(); return 0; }
int main(int argc, char **argv) { int i; if (argc == 1) { printf("WORD2PHRASE tool v0.1a\n\n"); printf("Options:\n"); printf("Parameters for training:\n"); printf("\t-train <file>\n"); printf("\t\tUse text data from <file> to train the model\n"); printf("\t-output <file>\n"); printf("\t\tUse <file> to save the resulting word vectors / word clusters / phrases\n"); printf("\t-min-count <int>\n"); printf("\t\tThis will discard words that appear less than <int> times; default is 5\n"); printf("\t-threshold <float>\n"); printf("\t\t The <float> value represents threshold for forming the phrases (higher means less phrases); default 100\n"); printf("\t-debug <int>\n"); printf("\t\tSet the debug mode (default = 2 = more info during training)\n"); printf("\nExamples:\n"); printf("./word2phrase -train text.txt -output phrases.txt -threshold 100 -debug 2\n\n"); return 0; } if ((i = ArgPos((char *)"-train", argc, argv)) > 0) strcpy(train_file, argv[i + 1]); if ((i = ArgPos((char *)"-debug", argc, argv)) > 0) debug_mode = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-output", argc, argv)) > 0) strcpy(output_file, argv[i + 1]); if ((i = ArgPos((char *)"-min-count", argc, argv)) > 0) min_count = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-threshold", argc, argv)) > 0) threshold = atof(argv[i + 1]); vocab = (struct vocab_word *)calloc(vocab_max_size, sizeof(struct vocab_word)); vocab_hash = (int *)calloc(vocab_hash_size, sizeof(int)); TrainModel(); return 0; }
int main(int argc, char **argv) { int i; if (argc == 1) { printf("Reconstruct the network by using a Breadth-First-Search strategy\n\n"); printf("Options:\n"); printf("Parameters for training:\n"); printf("\t-train <file>\n"); printf("\t\tReconstruct the network from <file>\n"); printf("\t-output <file>\n"); printf("\t\tUse <file> to save the reconstructed network\n"); printf("\t-depth <int>\n"); printf("\t\tThe maximum depth in the Breadth-First-Search; default is 0\n"); printf("\t-threshold <int>\n"); printf("\t\tFor vertex whose degree is less than <int>, we will expand its neighbors until the degree reaches <iny>\n"); printf("\nExamples:\n"); printf("./reconstruct -train net.txt -output net_dense.txt -depth 2 -threshold 1000\n\n"); return 0; } if ((i = ArgPos((char *)"-train", argc, argv)) > 0) strcpy(train_file, argv[i + 1]); if ((i = ArgPos((char *)"-output", argc, argv)) > 0) strcpy(output_file, argv[i + 1]); if ((i = ArgPos((char *)"-depth", argc, argv)) > 0) max_depth = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-threshold", argc, argv)) > 0) max_k = atoi(argv[i + 1]); vertex = (struct ClassVertex *)calloc(max_num_vertices, sizeof(struct ClassVertex)); TrainLINE(); return 0; }
int main(int argc, char *argv[]) { // read arguments int a; if ((a = ArgPos((char *)"-delay", argc, argv)) > 0) delay = atoi(argv[a + 1]); if ((a = ArgPos((char *)"-numline", argc, argv)) > 0) nLine = atoi(argv[a + 1]); if ((a = ArgPos((char *)"-length", argc, argv)) > 0) lineLength = atoi(argv[a + 1]); clear(); pthread_t *pt = (pthread_t *)malloc(nLine * sizeof(pthread_t)); for (int i=0; i<nLine; i++) pthread_create(&pt[i], NULL, matrixLine, (void *)i); for (int i=0; i<nLine; i++) pthread_join(pt[i], NULL); return 0; }
int main(int argc, char **argv) { int i; if (argc == 1) { printf("HPLE\n\n"); printf("Options:\n"); printf("Parameters for training:\n"); printf("\t-data <path>\n"); printf("\t\tData (FIGER / BBN)\n"); printf("\t-task <path>\n"); printf("\t\tTask (reduce_label_noise / typing)\n"); printf("\t-binary <int>\n"); printf("\t\tSave the resulting vectors in binary moded; default is 0 (off)\n"); printf("\t-size <int>\n"); printf("\t\tSet size of embedding; default is 100\n"); printf("\t-negative <int>\n"); printf("\t\tNumber of negative examples; default is 5, common values are 5 - 10 (0 = not used)\n"); printf("\t-iters <int>\n"); printf("\t\tSet the number of iterations as <int>\n"); printf("\t-threads <int>\n"); printf("\t\tUse <int> threads (default 1)\n"); printf("\t-alpha <float>\n"); printf("\t\tSet the value of weight decay (default 0.0001)\n"); printf("\t-lr <float>\n"); printf("\t\tSet the value of learning rate (default 0.025)\n"); return 0; } if ((i = ArgPos((char *)"-data", argc, argv)) > 0) strcpy(data, argv[i + 1]); if ((i = ArgPos((char *)"-task", argc, argv)) > 0) strcpy(task, argv[i + 1]); if ((i = ArgPos((char *)"-mode", argc, argv)) > 0) mode = argv[i + 1][0]; if ((i = ArgPos((char *)"-binary", argc, argv)) > 0) binary = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-size", argc, argv)) > 0) vector_size = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-negative", argc, argv)) > 0) negative = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-iters", argc, argv)) > 0) iters = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-lr", argc, argv)) > 0) starting_lr = atof(argv[i + 1]); if ((i = ArgPos((char *)"-alpha", argc, argv)) > 0) alpha = atof(argv[i + 1]); if ((i = ArgPos((char *)"-threads", argc, argv)) > 0) num_threads = atoi(argv[i + 1]); sprintf(file_path, "Intermediate/%s/", data); sprintf(output_path, "Results/%s/", data); lr = starting_lr; TrainModel(); return 0; }
int main(int argc, char **argv) { int i; if (argc == 1) { printf("Normalize vertex embeddings by setting their L2 norm as 1\n\n"); printf("Options:\n"); printf("Parameters for training:\n"); printf("\t-input <file>\n"); printf("\t\tThe original vertex embeddings\n"); printf("\t-output <file>\n"); printf("\t\tUse <file> to save the normalized vertex embeddings\n"); printf("\t-binary <int>\n"); printf("\t\tSave the learnt embeddings in binary moded; default is 0 (off)\n"); printf("\nExamples:\n"); printf("./normalize -input vec_wo_norm.txt -output vec_norm.txt -binary 1\n\n"); return 0; } if ((i = ArgPos((char *)"-input", argc, argv)) > 0) strcpy(input_file, argv[i + 1]); if ((i = ArgPos((char *)"-output", argc, argv)) > 0) strcpy(output_file, argv[i + 1]); if ((i = ArgPos((char *)"-binary", argc, argv)) > 0) binary = atoi(argv[i + 1]); Normalize(); return 0; }
int main(int argc, char **argv) { int i; if (argc == 1) { printf("LINE: Large Information Network Embedding\n\n"); printf("Options:\n"); printf("Parameters for training:\n"); printf("\t-train <file>\n"); printf("\t\tUse network data from <file> to train the model\n"); printf("\t-output <file>\n"); printf("\t\tUse <file> to save the learnt embeddings\n"); printf("\t-binary <int>\n"); printf("\t\tSave the learnt embeddings in binary moded; default is 0 (off)\n"); printf("\t-size <int>\n"); printf("\t\tSet dimension of vertex embeddings; default is 100\n"); printf("\t-order <int>\n"); printf("\t\tThe type of the model; 1 for first order, 2 for second order; default is 2\n"); printf("\t-negative <int>\n"); printf("\t\tNumber of negative examples; default is 5\n"); printf("\t-samples <int>\n"); printf("\t\tSet the number of training samples as <int>Million; default is 1\n"); printf("\t-threads <int>\n"); printf("\t\tUse <int> threads (default 1)\n"); printf("\t-rho <float>\n"); printf("\t\tSet the starting learning rate; default is 0.025\n"); printf("\nExamples:\n"); printf("./line -train net.txt -output vec.txt -binary 1 -size 200 -order 2 -negative 5 -samples 100 -rho 0.025 -threads 20\n\n"); return 0; } if ((i = ArgPos((char *)"-train", argc, argv)) > 0) strcpy(network_file, argv[i + 1]); if ((i = ArgPos((char *)"-output", argc, argv)) > 0) strcpy(embedding_file, argv[i + 1]); if ((i = ArgPos((char *)"-binary", argc, argv)) > 0) is_binary = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-size", argc, argv)) > 0) dim = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-order", argc, argv)) > 0) order = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-negative", argc, argv)) > 0) num_negative = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-samples", argc, argv)) > 0) total_samples = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-rho", argc, argv)) > 0) init_rho = atof(argv[i + 1]); if ((i = ArgPos((char *)"-threads", argc, argv)) > 0) num_threads = atoi(argv[i + 1]); total_samples *= 1000000; rho = init_rho; vertex = (struct ClassVertex *)calloc(max_num_vertices, sizeof(struct ClassVertex)); TrainLINE(); return 0; }
int main(int argc, char **argv) { int i; if (argc == 1) { printf("RNNLM based on WORD VECTOR estimation toolkit v 0.1b\n\n"); printf("Options:\n"); printf("Parameters for training:\n"); printf("\t-train <file>\n"); printf("\t\tUse text data from <file> to train the model\n"); printf("\t-valid <file>\n"); printf("\t\tUse text data from <file> to perform validation and control learning rate\n"); printf("\t-test <file>\n"); printf("\t\tUse text data from <file> to compute logprobs with an existing model\n"); printf("\t-rnnlm <file>\n"); printf("\t\tUse <file> to save the resulting language model\n"); printf("\t-hidden <int>\n"); printf("\t\tSet size of hidden layer; default is 100\n"); printf("\t-bptt <int>\n"); printf("\t\tSet length of BPTT unfolding; default is 3; set to 0 to disable truncation\n"); printf("\t-bptt-block <int>\n"); printf("\t\tSet period of BPTT unfolding; default is 10; BPTT is performed each bptt+bptt_block steps\n"); printf("\t-gen <int>\n"); printf("\t\tSampling mode; number of sentences to sample, default is 0 (off); enter negative number for interactive mode\n"); printf("\t-threads <int>\n"); printf("\t\tUse <int> threads (default 1)\n"); printf("\t-min-count <int>\n"); printf("\t\tThis will discard words that appear less than <int> times; default is 0\n"); printf("\t-alpha <float>\n"); printf("\t\tSet the starting learning rate; default is 0.1\n"); printf("\t-maxent-alpha <float>\n"); printf("\t\tSet the starting learning rate for maxent; default is 0.1\n"); printf("\t-reject-threshold <float>\n"); printf("\t\tReject nnet and reload nnet from previous epoch if the relative entropy improvement on the validation set is below this threshold (default 0.997)\n"); printf("\t-stop <float>\n"); printf("\t\tStop training when the relative entropy improvement on the validation set is below this threshold (default 1.003); see also -retry\n"); printf("\t-retry <int>\n"); printf("\t\tStop training iff N retries with halving learning rate have failed (default 2)\n"); printf("\t-debug <int>\n"); printf("\t\tSet the debug mode (default = 2 = more info during training)\n"); printf("\t-direct <int>\n"); printf("\t\tSet the size of hash for maxent parameters, in millions (default 0 = maxent off)\n"); printf("\t-direct-order <int>\n"); printf("\t\tSet the order of n-gram features to be used in maxent (default 3)\n"); printf("\t-beta1 <float>\n"); printf("\t\tL2 regularisation parameter for RNNLM weights (default 1e-6)\n"); printf("\t-beta2 <float>\n"); printf("\t\tL2 regularisation parameter for maxent weights (default 1e-6)\n"); printf("\t-recompute-counts <int>\n"); printf("\t\tRecompute train words counts, useful for fine-tuning (default = 0 = use counts stored in the vocab file)\n"); printf("\nExamples:\n"); printf("./rnnlm -train data.txt -valid valid.txt -rnnlm result.rnnlm -debug 2 -hidden 200\n\n"); return 0; } model_file[0] = 0; test_file[0] = 0; if ((i = ArgPos((char *)"-hidden", argc, argv)) > 0) layer1_size = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-train", argc, argv)) > 0) strcpy(train_file, argv[i + 1]); if ((i = ArgPos((char *)"-valid", argc, argv)) > 0) strcpy(valid_file, argv[i + 1]); if ((i = ArgPos((char *)"-test", argc, argv)) > 0) strcpy(test_file, argv[i + 1]); if ((i = ArgPos((char *)"-debug", argc, argv)) > 0) debug_mode = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-bptt", argc, argv)) > 0) bptt = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-bptt-block", argc, argv)) > 0) bptt_block = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-alpha", argc, argv)) > 0) alpha = atof(argv[i + 1]); if ((i = ArgPos((char *)"-maxent-alpha", argc, argv)) > 0) maxent_alpha = atof(argv[i + 1]); if ((i = ArgPos((char *)"-reject-threshold", argc, argv)) > 0) reject_threshold = atof(argv[i + 1]); if ((i = ArgPos((char *)"-stop", argc, argv)) > 0) stop = atof(argv[i + 1]); if ((i = ArgPos((char *)"-retry", argc, argv)) > 0) max_retry = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-rnnlm", argc, argv)) > 0) { strcpy(model_file, argv[i + 1]); strcpy(model_file_nnet, argv[i + 1]); strcat(model_file_nnet, ".nnet"); } if ((i = ArgPos((char *)"-threads", argc, argv)) > 0) num_threads = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-min-count", argc, argv)) > 0) min_count = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-direct", argc, argv)) > 0) maxent_hash_size = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-direct-order", argc, argv)) > 0) maxent_order = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-beta1", argc, argv)) > 0) beta = atof(argv[i + 1]); if ((i = ArgPos((char *)"-beta2", argc, argv)) > 0) maxent_beta = atof(argv[i + 1]); if ((i = ArgPos((char *)"-gen", argc, argv)) > 0) gen = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-recompute-counts", argc, argv)) > 0) recompute_train_counts = atoi(argv[i + 1]); vocab = (struct vocab_word *)calloc(vocab_max_size, sizeof(struct vocab_word)); vocab_hash = (int *)calloc(vocab_hash_size, sizeof(int)); TrainModel(); return 0; }
int main(int argc, char* argv[]) { Eigen::initParallel(); int i = 0; if (argc == 1) { help(); return 0; } string input_file = ""; string output_file = "text8-sgns.txt"; string save_vocab_file = ""; string read_vocab_file = ""; string model = "sg"; string train_method = "ns"; int table_size = 100000000; int word_dim = 200; float init_alpha = 0.025f; int window = 5; float subsample_threshold = 0.0001; float min_alpha = init_alpha * 0.0001; bool cbow_mean = true; int negative = 0; int num_threads = 1; int iter = 1; int min_count = 5; if ((i = ArgPos((char *)"-size", argc, argv)) > 0) word_dim = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-train", argc, argv)) > 0) input_file = std::string(argv[i + 1]); if ((i = ArgPos((char *)"-save-vocab", argc, argv)) > 0) save_vocab_file = std::string(argv[i + 1]); if ((i = ArgPos((char *)"-read-vocab", argc, argv)) > 0) read_vocab_file = std::string(argv[i + 1]); //if ((i = ArgPos((char *)"-binary", argc, argv)) > 0) binary = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-model", argc, argv)) > 0) model = std::string(argv[i + 1]); if ((i = ArgPos((char *)"-alpha", argc, argv)) > 0) init_alpha = atof(argv[i + 1]); if ((i = ArgPos((char *)"-output", argc, argv)) > 0) output_file = std::string(argv[i + 1]); if ((i = ArgPos((char *)"-window", argc, argv)) > 0) window = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-subsample", argc, argv)) > 0) subsample_threshold = atof(argv[i + 1]); if ((i = ArgPos((char *)"-train_method", argc, argv)) > 0) train_method = std::string(argv[i + 1]); if ((i = ArgPos((char *)"-negative", argc, argv)) > 0) negative = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-threads", argc, argv)) > 0) num_threads = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-iter", argc, argv)) > 0) iter = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-min-count", argc, argv)) > 0) min_count = atoi(argv[i + 1]); if(model == "") { model = "sg"; cout << "Default use skip gram model" << endl; } if(train_method == "") { train_method = "ns"; cout << "Default use negative sampling model" << endl; } if(train_method == "ns" && negative <= 0) { cout << "Please set -negative > 0!" << endl; return 1; } if(train_method == "hs" && negative > 0) { cout << "Do not set -negative under hierarchical softmax!" << endl; return 1; } if(train_method == "hs" && model.find("align") != string::npos) { cout << "Please use negative sampling in aligned skip gram model!" << endl; return 1; } if(cbow_mean) init_alpha = 0.05; Word2Vec w2v(iter, window, min_count, table_size, word_dim, negative, subsample_threshold, init_alpha, min_alpha, cbow_mean, num_threads, train_method, model); omp_set_num_threads(num_threads); //vector<vector<string>> sentences = w2v.line_docs("imdb_train.txt"); vector<vector<string>> sentences = text8_corpus(); w2v.build_vocab(sentences); w2v.init_weights(w2v.vocab.size()); if(save_vocab_file != "") w2v.save_vocab(save_vocab_file); w2v.train(sentences); if(output_file != "") { if(train_method == "hs" && model == "cbow") w2v.save_word2vec(output_file, w2v.C); else w2v.save_word2vec(output_file, w2v.W); } return 0; }
int main(int argc, char **argv) { int i,j,k = 0;//counters if(argc == 1) { //printing instructions printf("\n"); printf("Forward propagation of sentences in a file delimited by \\n\n\n"); printf("Parameters:\n"); printf("\tValue for the vocabulary size that resulted from training (first number in the output file of word2vec):\n"); printf("\t\t-vocab_size <int>\n"); printf("\tValue for the layer size used in training (second number in the output file of word2vec):\n"); printf("\t\t-layer_size <int>\n"); printf("\tValue for the window size:\n"); printf("\t\t-window <int>\n\n"); return 0; } //reading command line arguments if ((i = ArgPos((char *)"-layer_size", argc, argv)) > 0) layer1_size = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-vocab_size", argc, argv)) > 0) vocab_size = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-window", argc, argv)) > 0) window = atoi(argv[i + 1]); // allocating memory to store the network elements syn0 = (real *)calloc(layer1_size*vocab_size,sizeof(real)); syn1 = (real *)calloc(layer1_size*vocab_size,sizeof(real)); neu1 = (real *)calloc(layer1_size,sizeof(real)); index_buff = (char *)calloc(MAX_INDEX_BUFF_SIZE,sizeof(char)); // reading the network from file read_syn0(); read_syn1(); expTable = (real *)malloc((EXP_TABLE_SIZE + 1) * sizeof(real)); //allocating memory for expTable for (i = 0; i < EXP_TABLE_SIZE; i++) { expTable[i] = exp((i / (real)EXP_TABLE_SIZE * 2 - 1) * MAX_EXP); // Precompute the exp() table in the same way as in word2vec expTable[i] = expTable[i] / (expTable[i] + 1); // Precompute f(x) = x / (x + 1) } //building the vocabulary and the vocabulary hash from the files it was stored in BuildVocabFromFile(); BuildVocabHashFromFile(); int length = 0; //word lenght of sentence variable int syno_length = 0; //how many synonyms/replacements long long * sen; //sentence variable where words are represented as vocabualry indices long long * sen_temp; //temporary sentence variable where words are represented as vocabulary indices sen_temp = (long long *)calloc(MAX_SENTENCE_LENGTH,sizeof(long long)); //allocating memory for sen_temp long long * synonym; //replacement word (in vocabulary index form) long double prob = 0; //probability variable long long ptr = 0, ptr_temp = 0; //pointer used to go through the sentences file long long syno_ptr = 0, syno_ptr_temp = 0; //pointer used to go through the synonyms/replacements file FILE *sentfile = fopen("sentences","r"); FILE *indices = fopen("indices","r"); FILE *synfile = fopen("synonyms","r"); FILE *fo = fopen("wordprobs","w"); int lines = 0; char line[MAX_SENTENCE_LENGTH]; // buffer to store current sentence char synline[MAX_SENTENCE_LENGTH]; // buffer to store synonyms lines = Lines(sentfile); // how many lines in the sentences file, which is used as the outer loop delimiter //(this can be done) since all the files "sentences", "synonyms" and "indices" have the same number of lines delimited by "\n" rewind(sentfile); rewind(synfile); for(i = 0; i<lines; i++) { //outer loop iterating through "sentences", "synonyms" and "indices" line by line // read sentence ptr = ftell(sentfile); // store beginning of line if (readLine(sentfile,line) < 0) break; length = LineWordCount(line); //printf("sent words %d\n",length); // read word replacements syno_ptr = ftell(synfile); // store beginning of line if (readLine(synfile,synline) < 0) break; syno_length = LineWordCount(synline); printf("synline %s\n",synline); fseek(sentfile,ptr,SEEK_SET); // move the pointer back to the beginning of the line sen = FileToSen(length,sentfile); //sen is an array of longs with the words of the sentence in a vocabulary index format fseek(synfile,syno_ptr,SEEK_SET); synonym = FileToSen(syno_length,synfile); //synonym is an array of longs with the replacements/synonyms from the "synonyms" file in vocabulary index format fseek(sentfile,1,SEEK_CUR); // added to get past newline fseek(synfile,1,SEEK_CUR); ReadIndexFromFile(indices); //reads the index and puts it in the char array "index_buff" target_index = GetIndex(); //returns a numerical value from what is in the char array "index_buff" for(k=0; k<syno_length; k++) { //repeats forward propagation for each synonym in the line memcpy(sen_temp,sen,MAX_SENTENCE_LENGTH*sizeof(long long)); //copying the sentence into sen_temp where synonyms will be changed sen_temp[target_index] = synonym[k]; //replacing the target word with a synonym/replacement prob = ForwardPropagate(length,sen_temp); //doing forward propagation to get the probability //prob = prob * 100000; // multiplying the probabilty by 100000 or taking the negative log is done in this line fprintf(fo,"%s %Lf\n",vocab[synonym[k]].word,prob); // SEA the replacement word and its probability } } fclose(fo); fclose(sentfile); fclose(synfile); fclose(indices); return 0; }
int main(int argc, const char *argv[]) { if (argc == 1) // print usage instructions { std::cout << "Usage: \n" << "\t--datadir <dirname> REQUIRED [place to store db shards]\n" << "\t--nodestate <filename> REQUIRED [current node state]\n" << "\t--clustermembers <filename> REQUIRED [list of cluster members]\n" << "\t--ownershipmap <filename> REQUIRED [list of partitions owned by current node]\n" << "\t--partitionmap <filename> REQUIRED [mapping of partitions to nodes]\n" << "\t--log <filename> REQUIRED [log file for recovery]\n" << "\t--join OPTIONAL\n" << "\t--recover OPTIONAL\n" << "\t--debug OPTIONAL [start sending fake data to other cluster members]\n" << "\t--size OPTIONAL [mbytes]" // << "\t--name OPTIONAL [give the node a custom name that can be reached, IP address]" << std::endl; return 0; } std::string logfile; int i; bool join = false, recover = false, debug = false; if ((i = ArgPos("--datadir", argc, argv, true)) > 0) g_db_files_dirname = std::string(argv[i+1]); if ((i = ArgPos("--nodestate", argc, argv, true)) > 0) g_current_node_state_filename = std::string(argv[i+1]); if ((i = ArgPos("--clustermembers", argc, argv, true)) > 0) g_cluster_member_list_filename = std::string(argv[i+1]); if ((i = ArgPos("--ownershipmap", argc, argv, true)) > 0) g_owned_partition_state_filename = std::string(argv[i+1]); if ((i = ArgPos("--partitionmap", argc, argv, true)) > 0) g_cached_partition_map_filename = std::string(argv[i+1]); if ((i = ArgPos("--log", argc, argv, true)) > 0) logfile = std::string(argv[i+1]); if ((i = ArgPos("--join", argc, argv, false)) > 0) join = true; if ((i = ArgPos("--recover", argc, argv, false)) > 0) recover = true; if ((i = ArgPos("--debug", argc, argv, false)) > 0) debug = true; if ((i = ArgPos("--size", argc, argv, true)) > 0) g_local_disk_limit_in_bytes = atoi(argv[i+1]) * 1024 * 1024; else g_local_disk_limit_in_bytes = 512 * 1024 * 1024; std::cout << "DB directory : " << g_db_files_dirname << std::endl; std::cout << "Node state file : " << g_current_node_state_filename << std::endl; std::cout << "Cluster members file : " << g_cluster_member_list_filename << std::endl; std::cout << "Ownership map file : " << g_owned_partition_state_filename << std::endl; std::cout << "Partition-node map file : " << g_cached_partition_map_filename << std::endl; std::cout << "Log file : " << logfile << std::endl; if (join && recover) { perror("cannot join and recover\n"); exit(0); } if (g_db_files_dirname == "") { perror("must specify a directory for database files\n"); exit(0); } if (g_current_node_state_filename == "") { perror("must specify a filename for persisting node state\n"); exit(0); } if (g_cluster_member_list_filename == "") { perror("must specify a file for list of cluster members\n"); exit(0); } if (g_owned_partition_state_filename == "") { perror("must specify a file for owned partition state\n"); exit(0); } if (g_cached_partition_map_filename == "") { perror("must specify a file for partition map\n"); exit(0); } if (logfile == "") { perror("must specify a file for log\n"); exit(0); } if (join) { JoinInitState(); } else if (recover) { RecoverInitState(logfile); } else { InitializeState(); } edisense_comms::Member member; Server server; member.start(&server); if (join) { JoinFinishInit(&member); } std::thread async_put_thread(RetryPutDaemon, &member, 15); // 15 seconds std::thread rebalance_thread(LoadBalanceDaemon, &member, 60, logfile, recover); // 1 minutes std::thread gc_thread(GarbageCollectDaemon, 60 * 60 * 12); // 12 hrs std::thread db_transfer_thread(DBTransferServerDaemon); if (debug) // simulate data { for (int j = 1; j <= 50; j++) { std::thread simulate_put_thread(SimulatePutDaemon, &member, 1, j); simulate_put_thread.detach(); } } gc_thread.join(); rebalance_thread.join(); db_transfer_thread.join(); async_put_thread.join(); member.stop(); }
//get arguments from command line int get_optarg(int argc, char **argv) { int i; output_file[0] = 0; if ((i = ArgPos((char *)"-dim", argc, argv)) > 0) dim = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-train", argc, argv)) > 0) strcpy(train_file, argv[i + 1]); if ((i = ArgPos((char *)"-cbow", argc, argv)) > 0) cbow = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-hs", argc, argv)) > 0) hs = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-negtive", argc, argv)) > 0) negtive = atoi(argv[i + 1]); if (cbow) alpha = 0.05; if ((i = ArgPos((char *)"-alpha", argc, argv)) > 0) alpha = atof(argv[i + 1]); if ((i = ArgPos((char *)"-output", argc, argv)) > 0) strcpy(output_file, argv[i + 1]); if ((i = ArgPos((char *)"-window", argc, argv)) > 0) window = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-sample", argc, argv)) > 0) sample = atof(argv[i + 1]); if ((i = ArgPos((char *)"-threads", argc, argv)) > 0) num_threads = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-iter", argc, argv)) > 0) iter = atoi(argv[i + 1]); if ((i = ArgPos((char *)"-min-count", argc, argv)) > 0) min_count = atoi(argv[i + 1]); return output_file[0] == 0 ? -1 : 0; }