예제 #1
0
void trainCBOW(){
	if(rank == 0){
    	readVocab();
    	InitNet();
		if (negative > 0){ 
			InitUnigramTable();
		}
	}
	trainModelParallelCBOW();
	
}
예제 #2
0
void MonolingualModel::train(const string& training_file) {
    cout << "MultiVec-mono" << endl;
    config.print();
    cout << "Training file: " << training_file << endl;

    if (!config.freeze) {
        // reads vocab and counts words
        readVocab(training_file); // TODO: incremental training
        if (config.verbose)
            cout << "Total number of words: " << training_words << endl;
    }

    words_processed = 0;
    alpha = config.starting_alpha;

    // read file to find out the beginning of each chunk
    auto chunks = chunkify(training_file, config.n_threads); // also counts the number of lines
    if (!config.freeze)
        initNet();
    if (config.sent_vector)
        initSentWeights();

    high_resolution_clock::time_point start = high_resolution_clock::now();
    if (config.n_threads == 1) {
        trainChunk(training_file, chunks, 0);
    } else {
        vector<thread> threads;

        for (int i = 0; i < config.n_threads; ++i) {
            threads.push_back(thread(&MonolingualModel::trainChunk, this,
                training_file, chunks, i));
        }

        for (auto it = threads.begin(); it != threads.end(); ++it) {
            it->join();
        }
    }
    high_resolution_clock::time_point end = high_resolution_clock::now();
    auto duration = duration_cast<microseconds>(end - start).count();

    if (config.verbose)
        cout << endl;

    cout << "Training time: " << static_cast<float>(duration) / 1000000 << endl;
}