Example #1
0
void trainWithBuiltInRNNOp(const string file, int batch_size, int max_epoch, int start_epoch) {
  Context device(DeviceType::kGPU, 0);
  BucketSentenceIter dataIter(file, batch_size, device);
  string prefix = file.substr(0, file.rfind("."));
  dataIter.saveCharIndices(prefix + ".dictionary");

  input_dim = static_cast<int>(dataIter.characterSize());
  sequence_length_max = dataIter.maxSequenceLength();

  auto RNN = LSTMWithBuiltInRNNOp(num_lstm_layer, sequence_length_max, input_dim, num_hidden,
      num_embed, dropout);
  map<string, NDArray> args_map;
  args_map["data"] = NDArray(Shape(batch_size, sequence_length_max), device, false);
  // Avoiding SwapAxis, batch_size is of second dimension.
  args_map["LSTM_init_c"] = NDArray(Shape(num_lstm_layer, batch_size, num_hidden), device, false);
  args_map["LSTM_init_h"] = NDArray(Shape(num_lstm_layer, batch_size, num_hidden), device, false);
  args_map["softmax_label"] = NDArray(Shape(batch_size, sequence_length_max), device, false);
  vector<mx_float> zeros(batch_size * num_lstm_layer * num_hidden, 0);
  Executor* exe = RNN.SimpleBind(device, args_map);

  if (start_epoch == -1) {
    RNNXavier xavier = RNNXavier(Xavier::gaussian, Xavier::in, 2.34);
    for (auto &arg : exe->arg_dict())
      xavier(arg.first, &arg.second);
  } else {
    LoadCheckpoint(prefix + "-" + to_string(start_epoch) + ".params", exe);
  }
  start_epoch++;

  mx_float learning_rate = 0.0002;
  mx_float weight_decay = 0.000002;
  Optimizer* opt = OptimizerRegistry::Find("ccsgd");
//  opt->SetParam("momentum", 0.9)->SetParam("rescale_grad", 1.0 / batch_size)
//  ->SetParam("clip_gradient", 10);

  for (int epoch = start_epoch; epoch < max_epoch; ++epoch) {
    dataIter.Reset();
    auto tic = chrono::system_clock::now();
    while (dataIter.Next()) {
      auto data_batch = dataIter.GetDataBatch();
      data_batch.data.CopyTo(&exe->arg_dict()["data"]);
      data_batch.label.CopyTo(&exe->arg_dict()["softmax_label"]);
      exe->arg_dict()["LSTM_init_c"].SyncCopyFromCPU(zeros);
      exe->arg_dict()["LSTM_init_h"].SyncCopyFromCPU(zeros);
      NDArray::WaitAll();

      exe->Forward(true);
      exe->Backward();
      exe->UpdateAll(opt, learning_rate, weight_decay);
      NDArray::WaitAll();
    }
    auto toc = chrono::system_clock::now();
    cout << "Epoch[" << epoch << "] Time Cost:" <<
        chrono::duration_cast<chrono::seconds>(toc - tic).count() << " seconds ";
    OutputPerplexity(&exe->arg_dict()["softmax_label"], &exe->outputs[0]);
    string filepath = prefix + "-" + to_string(epoch) + ".params";
    SaveCheckpoint(filepath, RNN, exe);
  }
}
Example #2
0
void predictWithBuiltInRNNOp(wstring* ptext, int sequence_length, const string param_file,
  const string dictionary_file) {
  Context device(DeviceType::kGPU, 0);
  auto results = BucketSentenceIter::loadCharIndices(dictionary_file);
  auto dictionary = get<0>(results);
  auto charIndices = get<1>(results);
  input_dim = static_cast<int>(charIndices.size());
  auto RNN = LSTMWithBuiltInRNNOp(num_lstm_layer, 1, input_dim, num_hidden, num_embed, 0);

  map<string, NDArray> args_map;
  args_map["data"] = NDArray(Shape(1, 1), device, false);
  args_map["softmax_label"] = NDArray(Shape(1, 1), device, false);
  vector<mx_float> zeros(1 * num_lstm_layer * num_hidden, 0);
  // Avoiding SwapAxis, batch_size=1 is of second dimension.
  args_map["LSTM_init_c"] = NDArray(Shape(num_lstm_layer, 1, num_hidden), device, false);
  args_map["LSTM_init_h"] = NDArray(Shape(num_lstm_layer, 1, num_hidden), device, false);
  args_map["LSTM_init_c"].SyncCopyFromCPU(zeros);
  args_map["LSTM_init_h"].SyncCopyFromCPU(zeros);
  Executor* exe = RNN.SimpleBind(device, args_map);
  LoadCheckpoint(param_file, exe);

  mx_float index;
  wchar_t next = 0;
  vector<mx_float> softmax;
  softmax.resize(input_dim);
  for (auto c : *ptext) {
    exe->arg_dict()["data"].SyncCopyFromCPU(&dictionary[c], 1);
    exe->Forward(false);

    exe->outputs[0].SyncCopyToCPU(softmax.data(), input_dim);
    exe->outputs[1].CopyTo(&args_map["LSTM_init_h"]);
    exe->outputs[2].CopyTo(&args_map["LSTM_init_c"]);

    size_t n = max_element(softmax.begin(), softmax.end()) - softmax.begin();
    index = (mx_float) n;
    next = charIndices[n];
  }
  ptext->push_back(next);

  for (int i = 0; i < sequence_length; i++) {
    exe->arg_dict()["data"].SyncCopyFromCPU(&index, 1);
    exe->Forward(false);

    exe->outputs[0].SyncCopyToCPU(softmax.data(), input_dim);
    exe->outputs[1].CopyTo(&args_map["LSTM_init_h"]);
    exe->outputs[2].CopyTo(&args_map["LSTM_init_c"]);

    size_t n = max_element(softmax.begin(), softmax.end()) - softmax.begin();
    index = (mx_float) n;
    next = charIndices[n];
    ptext->push_back(next);
  }
}
Example #3
0
void predict(wstring* ptext, int sequence_length, const string param_file,
    const string dictionary_file) {
  Context device(DeviceType::kGPU, 0);
  auto results = BucketSentenceIter::loadCharIndices(dictionary_file);
  auto dictionary = get<0>(results);
  auto charIndices = get<1>(results);
  input_dim = static_cast<int>(charIndices.size());
  auto RNN = LSTMUnroll(num_lstm_layer, 1, input_dim, num_hidden, num_embed, 0);

  map<string, NDArray> args_map;
  args_map["data"] = NDArray(Shape(1, 1), device, false);
  args_map["softmax_label"] = NDArray(Shape(1, 1), device, false);
  vector<mx_float> zeros(1 * num_hidden, 0);
  for (int l = 0; l < num_lstm_layer; l++) {
    string key = "l" + to_string(l) + "_init_";
    args_map[key + "c"] = NDArray(Shape(1, num_hidden), device, false);
    args_map[key + "h"] = NDArray(Shape(1, num_hidden), device, false);
    args_map[key + "c"].SyncCopyFromCPU(zeros);
    args_map[key + "h"].SyncCopyFromCPU(zeros);
  }
  Executor* exe = RNN.SimpleBind(device, args_map);
  LoadCheckpoint(param_file, exe);

  mx_float index;
  wchar_t next;
  vector<mx_float> softmax;
  softmax.resize(input_dim);
  for (auto c : *ptext) {
    exe->arg_dict()["data"].SyncCopyFromCPU(&dictionary[c], 1);
    exe->Forward(false);

    exe->outputs[0].SyncCopyToCPU(softmax.data(), input_dim);
    for (int l = 0; l < num_lstm_layer; l++) {
      string key = "l" + to_string(l) + "_init_";
      exe->outputs[l * 2 + 1].CopyTo(&args_map[key + "c"]);
      exe->outputs[l * 2 + 2].CopyTo(&args_map[key + "h"]);
    }

    size_t n = max_element(softmax.begin(), softmax.end()) - softmax.begin();
    index = (mx_float) n;
    next = charIndices[n];
  }
  ptext->push_back(next);

  for (int i = 0; i < sequence_length; i++) {
    exe->arg_dict()["data"].SyncCopyFromCPU(&index, 1);
    exe->Forward(false);

    exe->outputs[0].SyncCopyToCPU(softmax.data(), input_dim);
    for (int l = 0; l < num_lstm_layer; l++) {
      string key = "l" + to_string(l) + "_init_";
      exe->outputs[l * 2 + 1].CopyTo(&args_map[key + "c"]);
      exe->outputs[l * 2 + 2].CopyTo(&args_map[key + "h"]);
    }

    size_t n = max_element(softmax.begin(), softmax.end()) - softmax.begin();
    index = (mx_float) n;
    next = charIndices[n];
    ptext->push_back(next);
  }
}