void trainCBOW(){ if(rank == 0){ readVocab(); InitNet(); if (negative > 0){ InitUnigramTable(); } } trainModelParallelCBOW(); }
void MonolingualModel::train(const string& training_file) { cout << "MultiVec-mono" << endl; config.print(); cout << "Training file: " << training_file << endl; if (!config.freeze) { // reads vocab and counts words readVocab(training_file); // TODO: incremental training if (config.verbose) cout << "Total number of words: " << training_words << endl; } words_processed = 0; alpha = config.starting_alpha; // read file to find out the beginning of each chunk auto chunks = chunkify(training_file, config.n_threads); // also counts the number of lines if (!config.freeze) initNet(); if (config.sent_vector) initSentWeights(); high_resolution_clock::time_point start = high_resolution_clock::now(); if (config.n_threads == 1) { trainChunk(training_file, chunks, 0); } else { vector<thread> threads; for (int i = 0; i < config.n_threads; ++i) { threads.push_back(thread(&MonolingualModel::trainChunk, this, training_file, chunks, i)); } for (auto it = threads.begin(); it != threads.end(); ++it) { it->join(); } } high_resolution_clock::time_point end = high_resolution_clock::now(); auto duration = duration_cast<microseconds>(end - start).count(); if (config.verbose) cout << endl; cout << "Training time: " << static_cast<float>(duration) / 1000000 << endl; }