Esempio n. 1
0
int main(int argc, char const *argv[]) {
  int batch_size = 40;
  int max_epoch = 100;
  float learning_rate = 1e-4;
  float weight_decay = 1e-4;

  auto inception_bn_net = InceptionSymbol(10);
  std::map<std::string, NDArray> args_map;
  std::map<std::string, NDArray> aux_map;

  args_map["data"] = NDArray(Shape(batch_size, 3, 224, 224), Context::gpu());
  args_map["data_label"] = NDArray(Shape(batch_size), Context::gpu());
  inception_bn_net.InferArgsMap(Context::gpu(), &args_map, args_map);

  auto train_iter = MXDataIter("ImageRecordIter")
      .SetParam("path_imglist", "./train.lst")
      .SetParam("path_imgrec", "./train.rec")
      .SetParam("data_shape", Shape(3, 224, 224))
      .SetParam("batch_size", batch_size)
      .SetParam("shuffle", 1)
      .CreateDataIter();

  auto val_iter = MXDataIter("ImageRecordIter")
      .SetParam("path_imglist", "./val.lst")
      .SetParam("path_imgrec", "./val.rec")
      .SetParam("data_shape", Shape(3, 224, 224))
      .SetParam("batch_size", batch_size)
      .CreateDataIter();

  Optimizer* opt = OptimizerRegistry::Find("ccsgd");
  opt->SetParam("momentum", 0.9)
     ->SetParam("rescale_grad", 1.0 / batch_size)
     ->SetParam("clip_gradient", 10)
     ->SetParam("lr", learning_rate)
     ->SetParam("wd", weight_decay);

  auto *exec = inception_bn_net.SimpleBind(Context::gpu(), args_map);
  auto arg_names = inception_bn_net.ListArguments();

  for (int iter = 0; iter < max_epoch; ++iter) {
    LG << "Epoch: " << iter;
    train_iter.Reset();
    while (train_iter.Next()) {
      auto data_batch = train_iter.GetDataBatch();
      data_batch.data.CopyTo(&args_map["data"]);
      data_batch.label.CopyTo(&args_map["data_label"]);
      NDArray::WaitAll();

      exec->Forward(true);
      exec->Backward();
      // Update parameters
      for (size_t i = 0; i < arg_names.size(); ++i) {
        if (arg_names[i] == "data" || arg_names[i] == "data_label") continue;
        opt->Update(i, exec->arg_arrays[i], exec->grad_arrays[i]);
      }

      NDArray::WaitAll();
    }

    Accuracy acu;
    val_iter.Reset();
    while (val_iter.Next()) {
      auto data_batch = val_iter.GetDataBatch();
      data_batch.data.CopyTo(&args_map["data"]);
      data_batch.label.CopyTo(&args_map["data_label"]);
      NDArray::WaitAll();
      exec->Forward(false);
      NDArray::WaitAll();
      acu.Update(data_batch.label, exec->outputs[0]);
    }
    LG << "Accuracy: " << acu.Get();
  }
  delete exec;
  MXNotifyShutdown();
  return 0;
}
Esempio n. 2
0
int main(int argc, char const *argv[]) {
  /*setup basic configs*/
  int W = 28;
  int H = 28;
  int batch_size = 128;
  int max_epoch = 100;
  float learning_rate = 1e-4;
  float weight_decay = 1e-4;

  auto lenet = LenetSymbol();
  std::map<string, NDArray> args_map;

  args_map["data"] = NDArray(Shape(batch_size, 1, W, H), Context::gpu());
  args_map["data_label"] = NDArray(Shape(batch_size), Context::gpu());
  lenet.InferArgsMap(Context::gpu(), &args_map, args_map);

  args_map["fc1_w"] = NDArray(Shape(500, 4 * 4 * 50), Context::gpu());
  NDArray::SampleGaussian(0, 1, &args_map["fc1_w"]);
  args_map["fc2_b"] = NDArray(Shape(10), Context::gpu());
  args_map["fc2_b"] = 0;

  auto train_iter = MXDataIter("MNISTIter")
      .SetParam("image", "./train-images-idx3-ubyte")
      .SetParam("label", "./train-labels-idx1-ubyte")
      .SetParam("batch_size", batch_size)
      .SetParam("shuffle", 1)
      .SetParam("flat", 0)
      .CreateDataIter();
  auto val_iter = MXDataIter("MNISTIter")
      .SetParam("image", "./t10k-images-idx3-ubyte")
      .SetParam("label", "./t10k-labels-idx1-ubyte")
      .CreateDataIter();

  Optimizer opt("ccsgd", learning_rate, weight_decay);
  opt.SetParam("momentum", 0.9).SetParam("rescale_grad", 1.0).SetParam(
      "clip_gradient", 10);

  for (int iter = 0; iter < max_epoch; ++iter) {
    LG << "Epoch: " << iter;
    train_iter.Reset();
    while (train_iter.Next()) {
      auto data_batch = train_iter.GetDataBatch();
      args_map["data"] = data_batch.data.Copy(Context::gpu());
      args_map["data_label"] = data_batch.label.Copy(Context::gpu());
      NDArray::WaitAll();
      auto *exec = lenet.SimpleBind(Context::gpu(), args_map);
      exec->Forward(true);
      exec->Backward();
      exec->UpdateAll(&opt, learning_rate, weight_decay);
      delete exec;
    }

    Accuracy acu;
    val_iter.Reset();
    while (val_iter.Next()) {
      auto data_batch = val_iter.GetDataBatch();
      args_map["data"] = data_batch.data.Copy(Context::gpu());
      args_map["data_label"] = data_batch.label.Copy(Context::gpu());
      NDArray::WaitAll();
      auto *exec = lenet.SimpleBind(Context::gpu(), args_map);
      exec->Forward(false);
      NDArray::WaitAll();
      acu.Update(data_batch.label, exec->outputs[0]);
      delete exec;
    }
    LG << "Accuracy: " << acu.Get();
  }
  return 0;
}
Esempio n. 3
0
int main(int argc, char** argv) {
  const int image_size = 28;
  const std::vector<int> layers{128, 64, 10};
  const int batch_size = 100;
  const int max_epoch = 10;
  const float learning_rate = 0.1;
  const float weight_decay = 1e-2;

  std::vector<std::string> data_files = { "./data/mnist_data/train-images-idx3-ubyte",
                                          "./data/mnist_data/train-labels-idx1-ubyte",
                                          "./data/mnist_data/t10k-images-idx3-ubyte",
                                          "./data/mnist_data/t10k-labels-idx1-ubyte"
                                        };

  auto train_iter =  MXDataIter("MNISTIter");
  if (!setDataIter(&train_iter, "Train", data_files, batch_size)) {
    return 1;
  }

  auto val_iter = MXDataIter("MNISTIter");
  if (!setDataIter(&val_iter, "Label", data_files, batch_size)) {
    return 1;
  }

  TRY
  auto net = mlp(layers);

  Context ctx = Context::cpu();  // Use CPU for training

  std::map<std::string, NDArray> args;
  args["X"] = NDArray(Shape(batch_size, image_size*image_size), ctx);
  args["label"] = NDArray(Shape(batch_size), ctx);
  // Let MXNet infer shapes other parameters such as weights
  net.InferArgsMap(ctx, &args, args);

  // Initialize all parameters with uniform distribution U(-0.01, 0.01)
  auto initializer = Uniform(0.01);
  for (auto& arg : args) {
    // arg.first is parameter name, and arg.second is the value
    initializer(arg.first, &arg.second);
  }

  // Create sgd optimizer
  Optimizer* opt = OptimizerRegistry::Find("sgd");
  opt->SetParam("rescale_grad", 1.0/batch_size)
     ->SetParam("lr", learning_rate)
     ->SetParam("wd", weight_decay);

  // Create executor by binding parameters to the model
  auto *exec = net.SimpleBind(ctx, args);
  auto arg_names = net.ListArguments();

  // Start training
  for (int iter = 0; iter < max_epoch; ++iter) {
    int samples = 0;
    train_iter.Reset();

    auto tic = std::chrono::system_clock::now();
    while (train_iter.Next()) {
      samples += batch_size;
      auto data_batch = train_iter.GetDataBatch();
      // Set data and label
      data_batch.data.CopyTo(&args["X"]);
      data_batch.label.CopyTo(&args["label"]);

      // Compute gradients
      exec->Forward(true);
      exec->Backward();
      // Update parameters
      for (size_t i = 0; i < arg_names.size(); ++i) {
        if (arg_names[i] == "X" || arg_names[i] == "label") continue;
        opt->Update(i, exec->arg_arrays[i], exec->grad_arrays[i]);
      }
    }
    auto toc = std::chrono::system_clock::now();

    Accuracy acc;
    val_iter.Reset();
    while (val_iter.Next()) {
      auto data_batch = val_iter.GetDataBatch();
      data_batch.data.CopyTo(&args["X"]);
      data_batch.label.CopyTo(&args["label"]);
      // Forward pass is enough as no gradient is needed when evaluating
      exec->Forward(false);
      acc.Update(data_batch.label, exec->outputs[0]);
    }
    float duration = std::chrono::duration_cast<std::chrono::milliseconds>
                     (toc - tic).count() / 1000.0;
    LG << "Epoch: " << iter << " " << samples/duration << " samples/sec Accuracy: " << acc.Get();
  }

  delete exec;
  delete opt;
  MXNotifyShutdown();
  CATCH
  return 0;
}
Esempio n. 4
0
int main(int argc, char** argv) {
    const int image_size = 28;
    const int num_mnist_features = image_size * image_size;
    int batch_size = 100;
    int max_epoch = 10;
    const float learning_rate = 0.1;
    const float weight_decay = 1e-2;
    bool isGpu = false;

    std::string training_set;
    std::string test_set;
    std::string hidden_units_string;
    int index = 1;
    while (index < argc) {
        if (strcmp("--train", argv[index]) == 0) {
            index++;
            training_set = argv[index];
        } else if (strcmp("--test", argv[index]) == 0) {
            index++;
            test_set = argv[index];
        } else if (strcmp("--epochs", argv[index]) == 0) {
            index++;
            max_epoch = strtol(argv[index], NULL, 10);
        } else if (strcmp("--batch_size", argv[index]) == 0) {
            index++;
            batch_size = strtol(argv[index], NULL, 10);
        } else if (strcmp("--hidden_units", argv[index]) == 0) {
            index++;
            hidden_units_string = argv[index];
        } else if (strcmp("--gpu", argv[index]) == 0) {
            isGpu = true;
            index++;
        } else if (strcmp("--help", argv[index]) == 0) {
            printUsage();
            return 0;
        }
        index++;
    }

    if (training_set.empty() || test_set.empty() || hidden_units_string.empty()) {
        std::cout << "ERROR: The mandatory arguments such as path to training and test data or "
        << "number of hidden units for mlp are not specified." << std::endl << std::endl;
        printUsage();
        return 1;
    }

    std::vector<int> hidden_units = getLayers(hidden_units_string);

    if (hidden_units.empty()) {
        std::cout << "ERROR: Number of hidden units are not provided in correct format."
        << "The numbers need to be separated by ' '." << std::endl << std::endl;
        printUsage();
        return 1;
    }

    /*
     * The MNIST data in CSV format has 785 columns.
     * The first column is "Label" and rest of the columns contain data.
     * The mnist_train.csv has 60000 records and mnist_test.csv has
     * 10000 records.
     */
    auto train_iter = MXDataIter("CSVIter")
    .SetParam("data_csv", training_set)
    .SetParam("data_shape", Shape(num_mnist_features + 1, 1))
    .SetParam("batch_size", batch_size)
    .SetParam("flat", 1)
    .SetParam("shuffle", 0)
    .CreateDataIter();

    auto val_iter = MXDataIter("CSVIter")
    .SetParam("data_csv", test_set)
    .SetParam("data_shape", Shape(num_mnist_features + 1, 1))
    .SetParam("batch_size", batch_size)
    .SetParam("flat", 1)
    .SetParam("shuffle", 0)
    .CreateDataIter();

    TRY
    auto net = mlp(hidden_units);

    Context ctx = Context::cpu();
    if (isGpu) {
        ctx = Context::gpu();
    }

    std::map<std::string, NDArray> args;
    args["data"] = NDArray(Shape(batch_size, num_mnist_features), ctx);
    args["label"] = NDArray(Shape(batch_size), ctx);
    // Let MXNet infer shapes other parameters such as weights
    net.InferArgsMap(ctx, &args, args);

    // Initialize all parameters with uniform distribution U(-0.01, 0.01)
    auto initializer = Uniform(0.01);
    for (auto& arg : args) {
        // arg.first is parameter name, and arg.second is the value
        initializer(arg.first, &arg.second);
    }

    // Create sgd optimiz er
    Optimizer* opt = OptimizerRegistry::Find("sgd");
    opt->SetParam("rescale_grad", 1.0/batch_size)
    ->SetParam("lr", learning_rate)
    ->SetParam("wd", weight_decay);

    // Create executor by binding parameters to the model
    auto *exec = net.SimpleBind(ctx, args);
    auto arg_names = net.ListArguments();

    // Start training
    for (int iter = 0; iter < max_epoch; ++iter) {
        int samples = 0;
        train_iter.Reset();

        auto tic = std::chrono::system_clock::now();
        while (train_iter.Next()) {
            samples += batch_size;
            auto data_batch = train_iter.GetDataBatch();

            /*
             * The shape of data_batch.data is (batch_size, (num_mnist_features + 1))
             * Need to reshape this data so that label column can be extracted from this data.
             */
            NDArray reshapedData = data_batch.data.Reshape(Shape((num_mnist_features + 1),
                                                                 batch_size));

            /*
             * Extract the label data by slicing the first column of the data and
             * copy it to "label" arg.
             */
            reshapedData.Slice(0, 1).Reshape(Shape(batch_size)).CopyTo(&args["label"]);

            /*
             * Extract the feature data by slicing the columns 1 to 785 of the data and
             * copy it to "data" arg.
             */
            reshapedData.Slice(1, (num_mnist_features + 1)).Reshape(Shape(batch_size,
                                                                         num_mnist_features))
                                                           .CopyTo(&args["data"]);

            exec->Forward(true);

            // Compute gradients
            exec->Backward();
            // Update parameters
            for (size_t i = 0; i < arg_names.size(); ++i) {
                if (arg_names[i] == "data" || arg_names[i] == "label") continue;
                opt->Update(i, exec->arg_arrays[i], exec->grad_arrays[i]);
            }
        }
        auto toc = std::chrono::system_clock::now();

        Accuracy acc;
        val_iter.Reset();
        while (val_iter.Next()) {
            auto data_batch = val_iter.GetDataBatch();

            /*
             * The shape of data_batch.data is (batch_size, (num_mnist_features + 1))
             * Need to reshape this data so that label column can be extracted from this data.
             */
            NDArray reshapedData = data_batch.data.Reshape(Shape((num_mnist_features + 1),
                                                                 batch_size));

            /*
             * Extract the label data by slicing the first column of the data and
             * copy it to "label" arg.
             */
            NDArray labelData = reshapedData.Slice(0, 1).Reshape(Shape(batch_size));
            labelData.CopyTo(&args["label"]);

            /*
             * Extract the feature data by slicing the columns 1 to 785 of the data and
             * copy it to "data" arg.
             */
            reshapedData.Slice(1, (num_mnist_features + 1)).Reshape(Shape(batch_size,
                                                                         num_mnist_features))
                                                                   .CopyTo(&args["data"]);

            // Forward pass is enough as no gradient is needed when evaluating
            exec->Forward(false);
            acc.Update(labelData, exec->outputs[0]);
        }
        float duration = std::chrono::duration_cast<std::chrono::milliseconds>
        (toc - tic).count() / 1000.0;
        LG << "Epoch[" << iter << "]  " << samples/duration << " samples/sec Accuracy: "
        << acc.Get();
    }

    delete exec;
    delete opt;
    MXNotifyShutdown();
    CATCH
    return 0;
}
int main(int argc, char const *argv[]) {
  /*setup basic configs*/
  int W = 28;
  int H = 28;
  int batch_size = 128;
  int max_epoch = 100;
  float learning_rate = 1e-4;
  float weight_decay = 1e-4;

  auto lenet = LenetSymbol();
  std::map<string, NDArray> args_map;

  args_map["data"] = NDArray(Shape(batch_size, 1, W, H), Context::gpu());
  args_map["data_label"] = NDArray(Shape(batch_size), Context::gpu());
  lenet.InferArgsMap(Context::gpu(), &args_map, args_map);

  args_map["fc1_w"] = NDArray(Shape(500, 4 * 4 * 50), Context::gpu());
  NDArray::SampleGaussian(0, 1, &args_map["fc1_w"]);
  args_map["fc2_b"] = NDArray(Shape(10), Context::gpu());
  args_map["fc2_b"] = 0;

  auto train_iter = MXDataIter("MNISTIter")
      .SetParam("image", "./mnist_data/train-images-idx3-ubyte")
      .SetParam("label", "./mnist_data/train-labels-idx1-ubyte")
      .SetParam("batch_size", batch_size)
      .SetParam("shuffle", 1)
      .SetParam("flat", 0)
      .CreateDataIter();
  auto val_iter = MXDataIter("MNISTIter")
      .SetParam("image", "./mnist_data/t10k-images-idx3-ubyte")
      .SetParam("label", "./mnist_data/t10k-labels-idx1-ubyte")
      .CreateDataIter();

  Optimizer* opt = OptimizerRegistry::Find("ccsgd");
  opt->SetParam("momentum", 0.9)
     ->SetParam("rescale_grad", 1.0)
     ->SetParam("clip_gradient", 10)
     ->SetParam("lr", learning_rate)
     ->SetParam("wd", weight_decay);


  auto *exec = lenet.SimpleBind(Context::gpu(), args_map);
  auto arg_names = lenet.ListArguments();

  // Create metrics
  Accuracy train_acc, val_acc;

  for (int iter = 0; iter < max_epoch; ++iter) {
      int samples = 0;
      train_iter.Reset();
      train_acc.Reset();

      auto tic = chrono::system_clock::now();

     while (train_iter.Next()) {
      samples += batch_size;
      auto data_batch = train_iter.GetDataBatch();

      data_batch.data.CopyTo(&args_map["data"]);
      data_batch.label.CopyTo(&args_map["data_label"]);
      NDArray::WaitAll();

      // Compute gradients
      exec->Forward(true);
      exec->Backward();

      // Update parameters
      for (size_t i = 0; i < arg_names.size(); ++i) {
        if (arg_names[i] == "data" || arg_names[i] == "data_label") continue;
        opt->Update(i, exec->arg_arrays[i], exec->grad_arrays[i]);
      }

      // Update metric
      train_acc.Update(data_batch.label, exec->outputs[0]);
    }

     // one epoch of training is finished
     auto toc = chrono::system_clock::now();
     float duration = chrono::duration_cast<chrono::milliseconds>(toc - tic).count() / 1000.0;
     LG << "Epoch[" << iter << "] " << samples / duration \
         << " samples/sec " << "Train-Accuracy=" << train_acc.Get();;

      val_iter.Reset();
      val_acc.Reset();

    Accuracy acu;
    val_iter.Reset();
    while (val_iter.Next()) {
      auto data_batch = val_iter.GetDataBatch();
      data_batch.data.CopyTo(&args_map["data"]);
      data_batch.label.CopyTo(&args_map["data_label"]);
      NDArray::WaitAll();

      // Only forward pass is enough as no gradient is needed when evaluating
      exec->Forward(false);
      NDArray::WaitAll();
      acu.Update(data_batch.label, exec->outputs[0]);
      val_acc.Update(data_batch.label, exec->outputs[0]);
    }
    LG << "Epoch[" << iter << "] Val-Accuracy=" << val_acc.Get();
  }

  delete exec;
  MXNotifyShutdown();
  return 0;
}
Esempio n. 6
0
int main(int argc, char** argv) {
  const int image_size = 28;
  const vector<int> layers{128, 64, 10};
  const int batch_size = 100;
  const int max_epoch = 10;
  const float learning_rate = 0.1;
  const float weight_decay = 1e-2;

  auto train_iter = MXDataIter("MNISTIter")
      .SetParam("image", "./mnist_data/train-images-idx3-ubyte")
      .SetParam("label", "./mnist_data/train-labels-idx1-ubyte")
      .SetParam("batch_size", batch_size)
      .SetParam("flat", 1)
      .CreateDataIter();
  auto val_iter = MXDataIter("MNISTIter")
      .SetParam("image", "./mnist_data/t10k-images-idx3-ubyte")
      .SetParam("label", "./mnist_data/t10k-labels-idx1-ubyte")
      .SetParam("batch_size", batch_size)
      .SetParam("flat", 1)
      .CreateDataIter();

  auto net = mlp(layers);

  Context ctx = Context::cpu();  // Use CPU for training

  std::map<string, NDArray> args;
  args["X"] = NDArray(Shape(batch_size, image_size*image_size), ctx);
  args["label"] = NDArray(Shape(batch_size), ctx);
  // Let MXNet infer shapes other parameters such as weights
  net.InferArgsMap(ctx, &args, args);

  // Initialize all parameters with uniform distribution U(-0.01, 0.01)
  auto initializer = Uniform(0.01);
  for (auto& arg : args) {
    // arg.first is parameter name, and arg.second is the value
    initializer(arg.first, &arg.second);
  }

  // Create sgd optimizer
  Optimizer* opt = OptimizerRegistry::Find("sgd");
  opt->SetParam("rescale_grad", 1.0/batch_size);

  // Start training
  for (int iter = 0; iter < max_epoch; ++iter) {
    int samples = 0;
    train_iter.Reset();

    auto tic = chrono::system_clock::now();
    while (train_iter.Next()) {
      samples += batch_size;
      auto data_batch = train_iter.GetDataBatch();
      // Set data and label
      args["X"] = data_batch.data;
      args["label"] = data_batch.label;

      // Create executor by binding parmeters to the model
      auto *exec = net.SimpleBind(ctx, args);
      // Compute gradients
      exec->Forward(true);
      exec->Backward();
      // Update parameters
      exec->UpdateAll(opt, learning_rate, weight_decay);
      // Remember to free the memory
      delete exec;
    }
    auto toc = chrono::system_clock::now();

    Accuracy acc;
    val_iter.Reset();
    while (val_iter.Next()) {
      auto data_batch = val_iter.GetDataBatch();
      args["X"] = data_batch.data;
      args["label"] = data_batch.label;
      auto *exec = net.SimpleBind(ctx, args);
      // Forward pass is enough as no gradient is needed when evaluating
      exec->Forward(false);
      acc.Update(data_batch.label, exec->outputs[0]);
      delete exec;
    }
    float duration = chrono::duration_cast<chrono::milliseconds>(toc - tic).count() / 1000.0;
    LG << "Epoch: " << iter << " " << samples/duration << " samples/sec Accuracy: " << acc.Get();
  }

  MXNotifyShutdown();
  return 0;
}
int main(int argc, char const *argv[]) {
  /*setup basic configs*/
  int W = 28;
  int H = 28;
  int batch_size = 128;
  int max_epoch = argc > 1 ? strtol(argv[1], NULL, 10) : 100;
  float learning_rate = 1e-4;
  float weight_decay = 1e-4;

  auto dev_ctx = Context::cpu();
  int num_gpu;
  MXGetGPUCount(&num_gpu);
#if !MXNET_USE_CPU
  if (num_gpu > 0) {
    dev_ctx = Context::gpu();
  }
#endif

  auto lenet = LenetSymbol();
  std::map<std::string, NDArray> args_map;

  const Shape data_shape = Shape(batch_size, 1, H, W),
              label_shape = Shape(batch_size);
  args_map["data"] = NDArray(data_shape, dev_ctx);
  args_map["data_label"] = NDArray(label_shape, dev_ctx);
  lenet.InferArgsMap(dev_ctx, &args_map, args_map);

  args_map["fc1_w"] = NDArray(Shape(500, 4 * 4 * 50), dev_ctx);
  NDArray::SampleGaussian(0, 1, &args_map["fc1_w"]);
  args_map["fc2_b"] = NDArray(Shape(10), dev_ctx);
  args_map["fc2_b"] = 0;

  std::vector<std::string> data_files = { "./data/mnist_data/train-images-idx3-ubyte",
                                          "./data/mnist_data/train-labels-idx1-ubyte",
                                          "./data/mnist_data/t10k-images-idx3-ubyte",
                                          "./data/mnist_data/t10k-labels-idx1-ubyte"
                                        };

  auto train_iter =  MXDataIter("MNISTIter");
  if (!setDataIter(&train_iter, "Train", data_files, batch_size)) {
    return 1;
  }

  auto val_iter = MXDataIter("MNISTIter");
  if (!setDataIter(&val_iter, "Label", data_files, batch_size)) {
    return 1;
  }

  Optimizer* opt = OptimizerRegistry::Find("sgd");
  opt->SetParam("momentum", 0.9)
     ->SetParam("rescale_grad", 1.0)
     ->SetParam("clip_gradient", 10)
     ->SetParam("lr", learning_rate)
     ->SetParam("wd", weight_decay);


  auto *exec = lenet.SimpleBind(dev_ctx, args_map);
  auto arg_names = lenet.ListArguments();

  // Create metrics
  Accuracy train_acc, val_acc;

  for (int iter = 0; iter < max_epoch; ++iter) {
      int samples = 0;
      train_iter.Reset();
      train_acc.Reset();

      auto tic = std::chrono::system_clock::now();

     while (train_iter.Next()) {
      samples += batch_size;
      auto data_batch = train_iter.GetDataBatch();

      ResizeInput(data_batch.data, data_shape).CopyTo(&args_map["data"]);
      data_batch.label.CopyTo(&args_map["data_label"]);
      NDArray::WaitAll();

      // Compute gradients
      exec->Forward(true);
      exec->Backward();

      // Update parameters
      for (size_t i = 0; i < arg_names.size(); ++i) {
        if (arg_names[i] == "data" || arg_names[i] == "data_label") continue;
        opt->Update(i, exec->arg_arrays[i], exec->grad_arrays[i]);
      }

      // Update metric
      train_acc.Update(data_batch.label, exec->outputs[0]);
    }

     // one epoch of training is finished
     auto toc = std::chrono::system_clock::now();
     float duration = std::chrono::duration_cast<std::chrono::milliseconds>
                      (toc - tic).count() / 1000.0;
     LG << "Epoch[" << iter << "] " << samples / duration \
         << " samples/sec " << "Train-Accuracy=" << train_acc.Get();;

      val_iter.Reset();
      val_acc.Reset();

    Accuracy acu;
    val_iter.Reset();
    while (val_iter.Next()) {
      auto data_batch = val_iter.GetDataBatch();
      ResizeInput(data_batch.data, data_shape).CopyTo(&args_map["data"]);
      data_batch.label.CopyTo(&args_map["data_label"]);
      NDArray::WaitAll();

      // Only forward pass is enough as no gradient is needed when evaluating
      exec->Forward(false);
      NDArray::WaitAll();
      acu.Update(data_batch.label, exec->outputs[0]);
      val_acc.Update(data_batch.label, exec->outputs[0]);
    }
    LG << "Epoch[" << iter << "] Val-Accuracy=" << val_acc.Get();
  }

  delete exec;
  delete opt;
  MXNotifyShutdown();
  return 0;
}