void trainWithBuiltInRNNOp(const string file, int batch_size, int max_epoch, int start_epoch) { Context device(DeviceType::kGPU, 0); BucketSentenceIter dataIter(file, batch_size, device); string prefix = file.substr(0, file.rfind(".")); dataIter.saveCharIndices(prefix + ".dictionary"); input_dim = static_cast<int>(dataIter.characterSize()); sequence_length_max = dataIter.maxSequenceLength(); auto RNN = LSTMWithBuiltInRNNOp(num_lstm_layer, sequence_length_max, input_dim, num_hidden, num_embed, dropout); map<string, NDArray> args_map; args_map["data"] = NDArray(Shape(batch_size, sequence_length_max), device, false); // Avoiding SwapAxis, batch_size is of second dimension. args_map["LSTM_init_c"] = NDArray(Shape(num_lstm_layer, batch_size, num_hidden), device, false); args_map["LSTM_init_h"] = NDArray(Shape(num_lstm_layer, batch_size, num_hidden), device, false); args_map["softmax_label"] = NDArray(Shape(batch_size, sequence_length_max), device, false); vector<mx_float> zeros(batch_size * num_lstm_layer * num_hidden, 0); Executor* exe = RNN.SimpleBind(device, args_map); if (start_epoch == -1) { RNNXavier xavier = RNNXavier(Xavier::gaussian, Xavier::in, 2.34); for (auto &arg : exe->arg_dict()) xavier(arg.first, &arg.second); } else { LoadCheckpoint(prefix + "-" + to_string(start_epoch) + ".params", exe); } start_epoch++; mx_float learning_rate = 0.0002; mx_float weight_decay = 0.000002; Optimizer* opt = OptimizerRegistry::Find("ccsgd"); // opt->SetParam("momentum", 0.9)->SetParam("rescale_grad", 1.0 / batch_size) // ->SetParam("clip_gradient", 10); for (int epoch = start_epoch; epoch < max_epoch; ++epoch) { dataIter.Reset(); auto tic = chrono::system_clock::now(); while (dataIter.Next()) { auto data_batch = dataIter.GetDataBatch(); data_batch.data.CopyTo(&exe->arg_dict()["data"]); data_batch.label.CopyTo(&exe->arg_dict()["softmax_label"]); exe->arg_dict()["LSTM_init_c"].SyncCopyFromCPU(zeros); exe->arg_dict()["LSTM_init_h"].SyncCopyFromCPU(zeros); NDArray::WaitAll(); exe->Forward(true); exe->Backward(); exe->UpdateAll(opt, learning_rate, weight_decay); NDArray::WaitAll(); } auto toc = chrono::system_clock::now(); cout << "Epoch[" << epoch << "] Time Cost:" << chrono::duration_cast<chrono::seconds>(toc - tic).count() << " seconds "; OutputPerplexity(&exe->arg_dict()["softmax_label"], &exe->outputs[0]); string filepath = prefix + "-" + to_string(epoch) + ".params"; SaveCheckpoint(filepath, RNN, exe); } }
void predictWithBuiltInRNNOp(wstring* ptext, int sequence_length, const string param_file, const string dictionary_file) { Context device(DeviceType::kGPU, 0); auto results = BucketSentenceIter::loadCharIndices(dictionary_file); auto dictionary = get<0>(results); auto charIndices = get<1>(results); input_dim = static_cast<int>(charIndices.size()); auto RNN = LSTMWithBuiltInRNNOp(num_lstm_layer, 1, input_dim, num_hidden, num_embed, 0); map<string, NDArray> args_map; args_map["data"] = NDArray(Shape(1, 1), device, false); args_map["softmax_label"] = NDArray(Shape(1, 1), device, false); vector<mx_float> zeros(1 * num_lstm_layer * num_hidden, 0); // Avoiding SwapAxis, batch_size=1 is of second dimension. args_map["LSTM_init_c"] = NDArray(Shape(num_lstm_layer, 1, num_hidden), device, false); args_map["LSTM_init_h"] = NDArray(Shape(num_lstm_layer, 1, num_hidden), device, false); args_map["LSTM_init_c"].SyncCopyFromCPU(zeros); args_map["LSTM_init_h"].SyncCopyFromCPU(zeros); Executor* exe = RNN.SimpleBind(device, args_map); LoadCheckpoint(param_file, exe); mx_float index; wchar_t next = 0; vector<mx_float> softmax; softmax.resize(input_dim); for (auto c : *ptext) { exe->arg_dict()["data"].SyncCopyFromCPU(&dictionary[c], 1); exe->Forward(false); exe->outputs[0].SyncCopyToCPU(softmax.data(), input_dim); exe->outputs[1].CopyTo(&args_map["LSTM_init_h"]); exe->outputs[2].CopyTo(&args_map["LSTM_init_c"]); size_t n = max_element(softmax.begin(), softmax.end()) - softmax.begin(); index = (mx_float) n; next = charIndices[n]; } ptext->push_back(next); for (int i = 0; i < sequence_length; i++) { exe->arg_dict()["data"].SyncCopyFromCPU(&index, 1); exe->Forward(false); exe->outputs[0].SyncCopyToCPU(softmax.data(), input_dim); exe->outputs[1].CopyTo(&args_map["LSTM_init_h"]); exe->outputs[2].CopyTo(&args_map["LSTM_init_c"]); size_t n = max_element(softmax.begin(), softmax.end()) - softmax.begin(); index = (mx_float) n; next = charIndices[n]; ptext->push_back(next); } }
void predict(wstring* ptext, int sequence_length, const string param_file, const string dictionary_file) { Context device(DeviceType::kGPU, 0); auto results = BucketSentenceIter::loadCharIndices(dictionary_file); auto dictionary = get<0>(results); auto charIndices = get<1>(results); input_dim = static_cast<int>(charIndices.size()); auto RNN = LSTMUnroll(num_lstm_layer, 1, input_dim, num_hidden, num_embed, 0); map<string, NDArray> args_map; args_map["data"] = NDArray(Shape(1, 1), device, false); args_map["softmax_label"] = NDArray(Shape(1, 1), device, false); vector<mx_float> zeros(1 * num_hidden, 0); for (int l = 0; l < num_lstm_layer; l++) { string key = "l" + to_string(l) + "_init_"; args_map[key + "c"] = NDArray(Shape(1, num_hidden), device, false); args_map[key + "h"] = NDArray(Shape(1, num_hidden), device, false); args_map[key + "c"].SyncCopyFromCPU(zeros); args_map[key + "h"].SyncCopyFromCPU(zeros); } Executor* exe = RNN.SimpleBind(device, args_map); LoadCheckpoint(param_file, exe); mx_float index; wchar_t next; vector<mx_float> softmax; softmax.resize(input_dim); for (auto c : *ptext) { exe->arg_dict()["data"].SyncCopyFromCPU(&dictionary[c], 1); exe->Forward(false); exe->outputs[0].SyncCopyToCPU(softmax.data(), input_dim); for (int l = 0; l < num_lstm_layer; l++) { string key = "l" + to_string(l) + "_init_"; exe->outputs[l * 2 + 1].CopyTo(&args_map[key + "c"]); exe->outputs[l * 2 + 2].CopyTo(&args_map[key + "h"]); } size_t n = max_element(softmax.begin(), softmax.end()) - softmax.begin(); index = (mx_float) n; next = charIndices[n]; } ptext->push_back(next); for (int i = 0; i < sequence_length; i++) { exe->arg_dict()["data"].SyncCopyFromCPU(&index, 1); exe->Forward(false); exe->outputs[0].SyncCopyToCPU(softmax.data(), input_dim); for (int l = 0; l < num_lstm_layer; l++) { string key = "l" + to_string(l) + "_init_"; exe->outputs[l * 2 + 1].CopyTo(&args_map[key + "c"]); exe->outputs[l * 2 + 2].CopyTo(&args_map[key + "h"]); } size_t n = max_element(softmax.begin(), softmax.end()) - softmax.begin(); index = (mx_float) n; next = charIndices[n]; ptext->push_back(next); } }