int main(int argc, char const *argv[]) { int batch_size = 40; int max_epoch = 100; float learning_rate = 1e-4; float weight_decay = 1e-4; auto inception_bn_net = InceptionSymbol(10); std::map<std::string, NDArray> args_map; std::map<std::string, NDArray> aux_map; args_map["data"] = NDArray(Shape(batch_size, 3, 224, 224), Context::gpu()); args_map["data_label"] = NDArray(Shape(batch_size), Context::gpu()); inception_bn_net.InferArgsMap(Context::gpu(), &args_map, args_map); auto train_iter = MXDataIter("ImageRecordIter") .SetParam("path_imglist", "./train.lst") .SetParam("path_imgrec", "./train.rec") .SetParam("data_shape", Shape(3, 224, 224)) .SetParam("batch_size", batch_size) .SetParam("shuffle", 1) .CreateDataIter(); auto val_iter = MXDataIter("ImageRecordIter") .SetParam("path_imglist", "./val.lst") .SetParam("path_imgrec", "./val.rec") .SetParam("data_shape", Shape(3, 224, 224)) .SetParam("batch_size", batch_size) .CreateDataIter(); Optimizer* opt = OptimizerRegistry::Find("ccsgd"); opt->SetParam("momentum", 0.9) ->SetParam("rescale_grad", 1.0 / batch_size) ->SetParam("clip_gradient", 10) ->SetParam("lr", learning_rate) ->SetParam("wd", weight_decay); auto *exec = inception_bn_net.SimpleBind(Context::gpu(), args_map); auto arg_names = inception_bn_net.ListArguments(); for (int iter = 0; iter < max_epoch; ++iter) { LG << "Epoch: " << iter; train_iter.Reset(); while (train_iter.Next()) { auto data_batch = train_iter.GetDataBatch(); data_batch.data.CopyTo(&args_map["data"]); data_batch.label.CopyTo(&args_map["data_label"]); NDArray::WaitAll(); exec->Forward(true); exec->Backward(); // Update parameters for (size_t i = 0; i < arg_names.size(); ++i) { if (arg_names[i] == "data" || arg_names[i] == "data_label") continue; opt->Update(i, exec->arg_arrays[i], exec->grad_arrays[i]); } NDArray::WaitAll(); } Accuracy acu; val_iter.Reset(); while (val_iter.Next()) { auto data_batch = val_iter.GetDataBatch(); data_batch.data.CopyTo(&args_map["data"]); data_batch.label.CopyTo(&args_map["data_label"]); NDArray::WaitAll(); exec->Forward(false); NDArray::WaitAll(); acu.Update(data_batch.label, exec->outputs[0]); } LG << "Accuracy: " << acu.Get(); } delete exec; MXNotifyShutdown(); return 0; }
int main(int argc, char const *argv[]) { /*setup basic configs*/ int W = 28; int H = 28; int batch_size = 128; int max_epoch = 100; float learning_rate = 1e-4; float weight_decay = 1e-4; auto lenet = LenetSymbol(); std::map<string, NDArray> args_map; args_map["data"] = NDArray(Shape(batch_size, 1, W, H), Context::gpu()); args_map["data_label"] = NDArray(Shape(batch_size), Context::gpu()); lenet.InferArgsMap(Context::gpu(), &args_map, args_map); args_map["fc1_w"] = NDArray(Shape(500, 4 * 4 * 50), Context::gpu()); NDArray::SampleGaussian(0, 1, &args_map["fc1_w"]); args_map["fc2_b"] = NDArray(Shape(10), Context::gpu()); args_map["fc2_b"] = 0; auto train_iter = MXDataIter("MNISTIter") .SetParam("image", "./train-images-idx3-ubyte") .SetParam("label", "./train-labels-idx1-ubyte") .SetParam("batch_size", batch_size) .SetParam("shuffle", 1) .SetParam("flat", 0) .CreateDataIter(); auto val_iter = MXDataIter("MNISTIter") .SetParam("image", "./t10k-images-idx3-ubyte") .SetParam("label", "./t10k-labels-idx1-ubyte") .CreateDataIter(); Optimizer opt("ccsgd", learning_rate, weight_decay); opt.SetParam("momentum", 0.9).SetParam("rescale_grad", 1.0).SetParam( "clip_gradient", 10); for (int iter = 0; iter < max_epoch; ++iter) { LG << "Epoch: " << iter; train_iter.Reset(); while (train_iter.Next()) { auto data_batch = train_iter.GetDataBatch(); args_map["data"] = data_batch.data.Copy(Context::gpu()); args_map["data_label"] = data_batch.label.Copy(Context::gpu()); NDArray::WaitAll(); auto *exec = lenet.SimpleBind(Context::gpu(), args_map); exec->Forward(true); exec->Backward(); exec->UpdateAll(&opt, learning_rate, weight_decay); delete exec; } Accuracy acu; val_iter.Reset(); while (val_iter.Next()) { auto data_batch = val_iter.GetDataBatch(); args_map["data"] = data_batch.data.Copy(Context::gpu()); args_map["data_label"] = data_batch.label.Copy(Context::gpu()); NDArray::WaitAll(); auto *exec = lenet.SimpleBind(Context::gpu(), args_map); exec->Forward(false); NDArray::WaitAll(); acu.Update(data_batch.label, exec->outputs[0]); delete exec; } LG << "Accuracy: " << acu.Get(); } return 0; }
int main(int argc, char** argv) { const int image_size = 28; const std::vector<int> layers{128, 64, 10}; const int batch_size = 100; const int max_epoch = 10; const float learning_rate = 0.1; const float weight_decay = 1e-2; std::vector<std::string> data_files = { "./data/mnist_data/train-images-idx3-ubyte", "./data/mnist_data/train-labels-idx1-ubyte", "./data/mnist_data/t10k-images-idx3-ubyte", "./data/mnist_data/t10k-labels-idx1-ubyte" }; auto train_iter = MXDataIter("MNISTIter"); if (!setDataIter(&train_iter, "Train", data_files, batch_size)) { return 1; } auto val_iter = MXDataIter("MNISTIter"); if (!setDataIter(&val_iter, "Label", data_files, batch_size)) { return 1; } TRY auto net = mlp(layers); Context ctx = Context::cpu(); // Use CPU for training std::map<std::string, NDArray> args; args["X"] = NDArray(Shape(batch_size, image_size*image_size), ctx); args["label"] = NDArray(Shape(batch_size), ctx); // Let MXNet infer shapes other parameters such as weights net.InferArgsMap(ctx, &args, args); // Initialize all parameters with uniform distribution U(-0.01, 0.01) auto initializer = Uniform(0.01); for (auto& arg : args) { // arg.first is parameter name, and arg.second is the value initializer(arg.first, &arg.second); } // Create sgd optimizer Optimizer* opt = OptimizerRegistry::Find("sgd"); opt->SetParam("rescale_grad", 1.0/batch_size) ->SetParam("lr", learning_rate) ->SetParam("wd", weight_decay); // Create executor by binding parameters to the model auto *exec = net.SimpleBind(ctx, args); auto arg_names = net.ListArguments(); // Start training for (int iter = 0; iter < max_epoch; ++iter) { int samples = 0; train_iter.Reset(); auto tic = std::chrono::system_clock::now(); while (train_iter.Next()) { samples += batch_size; auto data_batch = train_iter.GetDataBatch(); // Set data and label data_batch.data.CopyTo(&args["X"]); data_batch.label.CopyTo(&args["label"]); // Compute gradients exec->Forward(true); exec->Backward(); // Update parameters for (size_t i = 0; i < arg_names.size(); ++i) { if (arg_names[i] == "X" || arg_names[i] == "label") continue; opt->Update(i, exec->arg_arrays[i], exec->grad_arrays[i]); } } auto toc = std::chrono::system_clock::now(); Accuracy acc; val_iter.Reset(); while (val_iter.Next()) { auto data_batch = val_iter.GetDataBatch(); data_batch.data.CopyTo(&args["X"]); data_batch.label.CopyTo(&args["label"]); // Forward pass is enough as no gradient is needed when evaluating exec->Forward(false); acc.Update(data_batch.label, exec->outputs[0]); } float duration = std::chrono::duration_cast<std::chrono::milliseconds> (toc - tic).count() / 1000.0; LG << "Epoch: " << iter << " " << samples/duration << " samples/sec Accuracy: " << acc.Get(); } delete exec; delete opt; MXNotifyShutdown(); CATCH return 0; }
int main(int argc, char** argv) { const int image_size = 28; const int num_mnist_features = image_size * image_size; int batch_size = 100; int max_epoch = 10; const float learning_rate = 0.1; const float weight_decay = 1e-2; bool isGpu = false; std::string training_set; std::string test_set; std::string hidden_units_string; int index = 1; while (index < argc) { if (strcmp("--train", argv[index]) == 0) { index++; training_set = argv[index]; } else if (strcmp("--test", argv[index]) == 0) { index++; test_set = argv[index]; } else if (strcmp("--epochs", argv[index]) == 0) { index++; max_epoch = strtol(argv[index], NULL, 10); } else if (strcmp("--batch_size", argv[index]) == 0) { index++; batch_size = strtol(argv[index], NULL, 10); } else if (strcmp("--hidden_units", argv[index]) == 0) { index++; hidden_units_string = argv[index]; } else if (strcmp("--gpu", argv[index]) == 0) { isGpu = true; index++; } else if (strcmp("--help", argv[index]) == 0) { printUsage(); return 0; } index++; } if (training_set.empty() || test_set.empty() || hidden_units_string.empty()) { std::cout << "ERROR: The mandatory arguments such as path to training and test data or " << "number of hidden units for mlp are not specified." << std::endl << std::endl; printUsage(); return 1; } std::vector<int> hidden_units = getLayers(hidden_units_string); if (hidden_units.empty()) { std::cout << "ERROR: Number of hidden units are not provided in correct format." << "The numbers need to be separated by ' '." << std::endl << std::endl; printUsage(); return 1; } /* * The MNIST data in CSV format has 785 columns. * The first column is "Label" and rest of the columns contain data. * The mnist_train.csv has 60000 records and mnist_test.csv has * 10000 records. */ auto train_iter = MXDataIter("CSVIter") .SetParam("data_csv", training_set) .SetParam("data_shape", Shape(num_mnist_features + 1, 1)) .SetParam("batch_size", batch_size) .SetParam("flat", 1) .SetParam("shuffle", 0) .CreateDataIter(); auto val_iter = MXDataIter("CSVIter") .SetParam("data_csv", test_set) .SetParam("data_shape", Shape(num_mnist_features + 1, 1)) .SetParam("batch_size", batch_size) .SetParam("flat", 1) .SetParam("shuffle", 0) .CreateDataIter(); TRY auto net = mlp(hidden_units); Context ctx = Context::cpu(); if (isGpu) { ctx = Context::gpu(); } std::map<std::string, NDArray> args; args["data"] = NDArray(Shape(batch_size, num_mnist_features), ctx); args["label"] = NDArray(Shape(batch_size), ctx); // Let MXNet infer shapes other parameters such as weights net.InferArgsMap(ctx, &args, args); // Initialize all parameters with uniform distribution U(-0.01, 0.01) auto initializer = Uniform(0.01); for (auto& arg : args) { // arg.first is parameter name, and arg.second is the value initializer(arg.first, &arg.second); } // Create sgd optimiz er Optimizer* opt = OptimizerRegistry::Find("sgd"); opt->SetParam("rescale_grad", 1.0/batch_size) ->SetParam("lr", learning_rate) ->SetParam("wd", weight_decay); // Create executor by binding parameters to the model auto *exec = net.SimpleBind(ctx, args); auto arg_names = net.ListArguments(); // Start training for (int iter = 0; iter < max_epoch; ++iter) { int samples = 0; train_iter.Reset(); auto tic = std::chrono::system_clock::now(); while (train_iter.Next()) { samples += batch_size; auto data_batch = train_iter.GetDataBatch(); /* * The shape of data_batch.data is (batch_size, (num_mnist_features + 1)) * Need to reshape this data so that label column can be extracted from this data. */ NDArray reshapedData = data_batch.data.Reshape(Shape((num_mnist_features + 1), batch_size)); /* * Extract the label data by slicing the first column of the data and * copy it to "label" arg. */ reshapedData.Slice(0, 1).Reshape(Shape(batch_size)).CopyTo(&args["label"]); /* * Extract the feature data by slicing the columns 1 to 785 of the data and * copy it to "data" arg. */ reshapedData.Slice(1, (num_mnist_features + 1)).Reshape(Shape(batch_size, num_mnist_features)) .CopyTo(&args["data"]); exec->Forward(true); // Compute gradients exec->Backward(); // Update parameters for (size_t i = 0; i < arg_names.size(); ++i) { if (arg_names[i] == "data" || arg_names[i] == "label") continue; opt->Update(i, exec->arg_arrays[i], exec->grad_arrays[i]); } } auto toc = std::chrono::system_clock::now(); Accuracy acc; val_iter.Reset(); while (val_iter.Next()) { auto data_batch = val_iter.GetDataBatch(); /* * The shape of data_batch.data is (batch_size, (num_mnist_features + 1)) * Need to reshape this data so that label column can be extracted from this data. */ NDArray reshapedData = data_batch.data.Reshape(Shape((num_mnist_features + 1), batch_size)); /* * Extract the label data by slicing the first column of the data and * copy it to "label" arg. */ NDArray labelData = reshapedData.Slice(0, 1).Reshape(Shape(batch_size)); labelData.CopyTo(&args["label"]); /* * Extract the feature data by slicing the columns 1 to 785 of the data and * copy it to "data" arg. */ reshapedData.Slice(1, (num_mnist_features + 1)).Reshape(Shape(batch_size, num_mnist_features)) .CopyTo(&args["data"]); // Forward pass is enough as no gradient is needed when evaluating exec->Forward(false); acc.Update(labelData, exec->outputs[0]); } float duration = std::chrono::duration_cast<std::chrono::milliseconds> (toc - tic).count() / 1000.0; LG << "Epoch[" << iter << "] " << samples/duration << " samples/sec Accuracy: " << acc.Get(); } delete exec; delete opt; MXNotifyShutdown(); CATCH return 0; }
int main(int argc, char const *argv[]) { /*setup basic configs*/ int W = 28; int H = 28; int batch_size = 128; int max_epoch = 100; float learning_rate = 1e-4; float weight_decay = 1e-4; auto lenet = LenetSymbol(); std::map<string, NDArray> args_map; args_map["data"] = NDArray(Shape(batch_size, 1, W, H), Context::gpu()); args_map["data_label"] = NDArray(Shape(batch_size), Context::gpu()); lenet.InferArgsMap(Context::gpu(), &args_map, args_map); args_map["fc1_w"] = NDArray(Shape(500, 4 * 4 * 50), Context::gpu()); NDArray::SampleGaussian(0, 1, &args_map["fc1_w"]); args_map["fc2_b"] = NDArray(Shape(10), Context::gpu()); args_map["fc2_b"] = 0; auto train_iter = MXDataIter("MNISTIter") .SetParam("image", "./mnist_data/train-images-idx3-ubyte") .SetParam("label", "./mnist_data/train-labels-idx1-ubyte") .SetParam("batch_size", batch_size) .SetParam("shuffle", 1) .SetParam("flat", 0) .CreateDataIter(); auto val_iter = MXDataIter("MNISTIter") .SetParam("image", "./mnist_data/t10k-images-idx3-ubyte") .SetParam("label", "./mnist_data/t10k-labels-idx1-ubyte") .CreateDataIter(); Optimizer* opt = OptimizerRegistry::Find("ccsgd"); opt->SetParam("momentum", 0.9) ->SetParam("rescale_grad", 1.0) ->SetParam("clip_gradient", 10) ->SetParam("lr", learning_rate) ->SetParam("wd", weight_decay); auto *exec = lenet.SimpleBind(Context::gpu(), args_map); auto arg_names = lenet.ListArguments(); // Create metrics Accuracy train_acc, val_acc; for (int iter = 0; iter < max_epoch; ++iter) { int samples = 0; train_iter.Reset(); train_acc.Reset(); auto tic = chrono::system_clock::now(); while (train_iter.Next()) { samples += batch_size; auto data_batch = train_iter.GetDataBatch(); data_batch.data.CopyTo(&args_map["data"]); data_batch.label.CopyTo(&args_map["data_label"]); NDArray::WaitAll(); // Compute gradients exec->Forward(true); exec->Backward(); // Update parameters for (size_t i = 0; i < arg_names.size(); ++i) { if (arg_names[i] == "data" || arg_names[i] == "data_label") continue; opt->Update(i, exec->arg_arrays[i], exec->grad_arrays[i]); } // Update metric train_acc.Update(data_batch.label, exec->outputs[0]); } // one epoch of training is finished auto toc = chrono::system_clock::now(); float duration = chrono::duration_cast<chrono::milliseconds>(toc - tic).count() / 1000.0; LG << "Epoch[" << iter << "] " << samples / duration \ << " samples/sec " << "Train-Accuracy=" << train_acc.Get();; val_iter.Reset(); val_acc.Reset(); Accuracy acu; val_iter.Reset(); while (val_iter.Next()) { auto data_batch = val_iter.GetDataBatch(); data_batch.data.CopyTo(&args_map["data"]); data_batch.label.CopyTo(&args_map["data_label"]); NDArray::WaitAll(); // Only forward pass is enough as no gradient is needed when evaluating exec->Forward(false); NDArray::WaitAll(); acu.Update(data_batch.label, exec->outputs[0]); val_acc.Update(data_batch.label, exec->outputs[0]); } LG << "Epoch[" << iter << "] Val-Accuracy=" << val_acc.Get(); } delete exec; MXNotifyShutdown(); return 0; }
int main(int argc, char** argv) { const int image_size = 28; const vector<int> layers{128, 64, 10}; const int batch_size = 100; const int max_epoch = 10; const float learning_rate = 0.1; const float weight_decay = 1e-2; auto train_iter = MXDataIter("MNISTIter") .SetParam("image", "./mnist_data/train-images-idx3-ubyte") .SetParam("label", "./mnist_data/train-labels-idx1-ubyte") .SetParam("batch_size", batch_size) .SetParam("flat", 1) .CreateDataIter(); auto val_iter = MXDataIter("MNISTIter") .SetParam("image", "./mnist_data/t10k-images-idx3-ubyte") .SetParam("label", "./mnist_data/t10k-labels-idx1-ubyte") .SetParam("batch_size", batch_size) .SetParam("flat", 1) .CreateDataIter(); auto net = mlp(layers); Context ctx = Context::cpu(); // Use CPU for training std::map<string, NDArray> args; args["X"] = NDArray(Shape(batch_size, image_size*image_size), ctx); args["label"] = NDArray(Shape(batch_size), ctx); // Let MXNet infer shapes other parameters such as weights net.InferArgsMap(ctx, &args, args); // Initialize all parameters with uniform distribution U(-0.01, 0.01) auto initializer = Uniform(0.01); for (auto& arg : args) { // arg.first is parameter name, and arg.second is the value initializer(arg.first, &arg.second); } // Create sgd optimizer Optimizer* opt = OptimizerRegistry::Find("sgd"); opt->SetParam("rescale_grad", 1.0/batch_size); // Start training for (int iter = 0; iter < max_epoch; ++iter) { int samples = 0; train_iter.Reset(); auto tic = chrono::system_clock::now(); while (train_iter.Next()) { samples += batch_size; auto data_batch = train_iter.GetDataBatch(); // Set data and label args["X"] = data_batch.data; args["label"] = data_batch.label; // Create executor by binding parmeters to the model auto *exec = net.SimpleBind(ctx, args); // Compute gradients exec->Forward(true); exec->Backward(); // Update parameters exec->UpdateAll(opt, learning_rate, weight_decay); // Remember to free the memory delete exec; } auto toc = chrono::system_clock::now(); Accuracy acc; val_iter.Reset(); while (val_iter.Next()) { auto data_batch = val_iter.GetDataBatch(); args["X"] = data_batch.data; args["label"] = data_batch.label; auto *exec = net.SimpleBind(ctx, args); // Forward pass is enough as no gradient is needed when evaluating exec->Forward(false); acc.Update(data_batch.label, exec->outputs[0]); delete exec; } float duration = chrono::duration_cast<chrono::milliseconds>(toc - tic).count() / 1000.0; LG << "Epoch: " << iter << " " << samples/duration << " samples/sec Accuracy: " << acc.Get(); } MXNotifyShutdown(); return 0; }
int main(int argc, char const *argv[]) { /*setup basic configs*/ int W = 28; int H = 28; int batch_size = 128; int max_epoch = argc > 1 ? strtol(argv[1], NULL, 10) : 100; float learning_rate = 1e-4; float weight_decay = 1e-4; auto dev_ctx = Context::cpu(); int num_gpu; MXGetGPUCount(&num_gpu); #if !MXNET_USE_CPU if (num_gpu > 0) { dev_ctx = Context::gpu(); } #endif auto lenet = LenetSymbol(); std::map<std::string, NDArray> args_map; const Shape data_shape = Shape(batch_size, 1, H, W), label_shape = Shape(batch_size); args_map["data"] = NDArray(data_shape, dev_ctx); args_map["data_label"] = NDArray(label_shape, dev_ctx); lenet.InferArgsMap(dev_ctx, &args_map, args_map); args_map["fc1_w"] = NDArray(Shape(500, 4 * 4 * 50), dev_ctx); NDArray::SampleGaussian(0, 1, &args_map["fc1_w"]); args_map["fc2_b"] = NDArray(Shape(10), dev_ctx); args_map["fc2_b"] = 0; std::vector<std::string> data_files = { "./data/mnist_data/train-images-idx3-ubyte", "./data/mnist_data/train-labels-idx1-ubyte", "./data/mnist_data/t10k-images-idx3-ubyte", "./data/mnist_data/t10k-labels-idx1-ubyte" }; auto train_iter = MXDataIter("MNISTIter"); if (!setDataIter(&train_iter, "Train", data_files, batch_size)) { return 1; } auto val_iter = MXDataIter("MNISTIter"); if (!setDataIter(&val_iter, "Label", data_files, batch_size)) { return 1; } Optimizer* opt = OptimizerRegistry::Find("sgd"); opt->SetParam("momentum", 0.9) ->SetParam("rescale_grad", 1.0) ->SetParam("clip_gradient", 10) ->SetParam("lr", learning_rate) ->SetParam("wd", weight_decay); auto *exec = lenet.SimpleBind(dev_ctx, args_map); auto arg_names = lenet.ListArguments(); // Create metrics Accuracy train_acc, val_acc; for (int iter = 0; iter < max_epoch; ++iter) { int samples = 0; train_iter.Reset(); train_acc.Reset(); auto tic = std::chrono::system_clock::now(); while (train_iter.Next()) { samples += batch_size; auto data_batch = train_iter.GetDataBatch(); ResizeInput(data_batch.data, data_shape).CopyTo(&args_map["data"]); data_batch.label.CopyTo(&args_map["data_label"]); NDArray::WaitAll(); // Compute gradients exec->Forward(true); exec->Backward(); // Update parameters for (size_t i = 0; i < arg_names.size(); ++i) { if (arg_names[i] == "data" || arg_names[i] == "data_label") continue; opt->Update(i, exec->arg_arrays[i], exec->grad_arrays[i]); } // Update metric train_acc.Update(data_batch.label, exec->outputs[0]); } // one epoch of training is finished auto toc = std::chrono::system_clock::now(); float duration = std::chrono::duration_cast<std::chrono::milliseconds> (toc - tic).count() / 1000.0; LG << "Epoch[" << iter << "] " << samples / duration \ << " samples/sec " << "Train-Accuracy=" << train_acc.Get();; val_iter.Reset(); val_acc.Reset(); Accuracy acu; val_iter.Reset(); while (val_iter.Next()) { auto data_batch = val_iter.GetDataBatch(); ResizeInput(data_batch.data, data_shape).CopyTo(&args_map["data"]); data_batch.label.CopyTo(&args_map["data_label"]); NDArray::WaitAll(); // Only forward pass is enough as no gradient is needed when evaluating exec->Forward(false); NDArray::WaitAll(); acu.Update(data_batch.label, exec->outputs[0]); val_acc.Update(data_batch.label, exec->outputs[0]); } LG << "Epoch[" << iter << "] Val-Accuracy=" << val_acc.Get(); } delete exec; delete opt; MXNotifyShutdown(); return 0; }