Exemplo n.º 1
 void Extract(NDArray data) {
   /*Normalize the pictures*/
   data.Slice(0, 1) -= mean_img;
   data.Slice(1, 2) -= mean_img;
   args_map["data"] = data;
   /*bind the excutor*/
   executor = net.SimpleBind(global_ctx, args_map, map<string, NDArray>(),
                             map<string, OpReqType>(), aux_map);
   /*print out the features*/
   auto array = executor->outputs[0].Copy(Context(kCPU, 0));
   for (int i = 0; i < 1024; ++i) {
     cout << array.At(0, i) << ",";
   cout << endl;
Exemplo n.º 2
int main(int argc, char** argv) {
    const int image_size = 28;
    const int num_mnist_features = image_size * image_size;
    int batch_size = 100;
    int max_epoch = 10;
    const float learning_rate = 0.1;
    const float weight_decay = 1e-2;
    bool isGpu = false;

    std::string training_set;
    std::string test_set;
    std::string hidden_units_string;
    int index = 1;
    while (index < argc) {
        if (strcmp("--train", argv[index]) == 0) {
            training_set = argv[index];
        } else if (strcmp("--test", argv[index]) == 0) {
            test_set = argv[index];
        } else if (strcmp("--epochs", argv[index]) == 0) {
            max_epoch = strtol(argv[index], NULL, 10);
        } else if (strcmp("--batch_size", argv[index]) == 0) {
            batch_size = strtol(argv[index], NULL, 10);
        } else if (strcmp("--hidden_units", argv[index]) == 0) {
            hidden_units_string = argv[index];
        } else if (strcmp("--gpu", argv[index]) == 0) {
            isGpu = true;
        } else if (strcmp("--help", argv[index]) == 0) {
            return 0;

    if (training_set.empty() || test_set.empty() || hidden_units_string.empty()) {
        std::cout << "ERROR: The mandatory arguments such as path to training and test data or "
        << "number of hidden units for mlp are not specified." << std::endl << std::endl;
        return 1;

    std::vector<int> hidden_units = getLayers(hidden_units_string);

    if (hidden_units.empty()) {
        std::cout << "ERROR: Number of hidden units are not provided in correct format."
        << "The numbers need to be separated by ' '." << std::endl << std::endl;
        return 1;

     * The MNIST data in CSV format has 785 columns.
     * The first column is "Label" and rest of the columns contain data.
     * The mnist_train.csv has 60000 records and mnist_test.csv has
     * 10000 records.
    auto train_iter = MXDataIter("CSVIter")
    .SetParam("data_csv", training_set)
    .SetParam("data_shape", Shape(num_mnist_features + 1, 1))
    .SetParam("batch_size", batch_size)
    .SetParam("flat", 1)
    .SetParam("shuffle", 0)

    auto val_iter = MXDataIter("CSVIter")
    .SetParam("data_csv", test_set)
    .SetParam("data_shape", Shape(num_mnist_features + 1, 1))
    .SetParam("batch_size", batch_size)
    .SetParam("flat", 1)
    .SetParam("shuffle", 0)

    auto net = mlp(hidden_units);

    Context ctx = Context::cpu();
    if (isGpu) {
        ctx = Context::gpu();

    std::map<std::string, NDArray> args;
    args["data"] = NDArray(Shape(batch_size, num_mnist_features), ctx);
    args["label"] = NDArray(Shape(batch_size), ctx);
    // Let MXNet infer shapes other parameters such as weights
    net.InferArgsMap(ctx, &args, args);

    // Initialize all parameters with uniform distribution U(-0.01, 0.01)
    auto initializer = Uniform(0.01);
    for (auto& arg : args) {
        // arg.first is parameter name, and arg.second is the value
        initializer(arg.first, &arg.second);

    // Create sgd optimiz er
    Optimizer* opt = OptimizerRegistry::Find("sgd");
    opt->SetParam("rescale_grad", 1.0/batch_size)
    ->SetParam("lr", learning_rate)
    ->SetParam("wd", weight_decay);

    // Create executor by binding parameters to the model
    auto *exec = net.SimpleBind(ctx, args);
    auto arg_names = net.ListArguments();

    // Start training
    for (int iter = 0; iter < max_epoch; ++iter) {
        int samples = 0;

        auto tic = std::chrono::system_clock::now();
        while (train_iter.Next()) {
            samples += batch_size;
            auto data_batch = train_iter.GetDataBatch();

             * The shape of data_batch.data is (batch_size, (num_mnist_features + 1))
             * Need to reshape this data so that label column can be extracted from this data.
            NDArray reshapedData = data_batch.data.Reshape(Shape((num_mnist_features + 1),

             * Extract the label data by slicing the first column of the data and
             * copy it to "label" arg.
            reshapedData.Slice(0, 1).Reshape(Shape(batch_size)).CopyTo(&args["label"]);

             * Extract the feature data by slicing the columns 1 to 785 of the data and
             * copy it to "data" arg.
            reshapedData.Slice(1, (num_mnist_features + 1)).Reshape(Shape(batch_size,


            // Compute gradients
            // Update parameters
            for (size_t i = 0; i < arg_names.size(); ++i) {
                if (arg_names[i] == "data" || arg_names[i] == "label") continue;
                opt->Update(i, exec->arg_arrays[i], exec->grad_arrays[i]);
        auto toc = std::chrono::system_clock::now();

        Accuracy acc;
        while (val_iter.Next()) {
            auto data_batch = val_iter.GetDataBatch();

             * The shape of data_batch.data is (batch_size, (num_mnist_features + 1))
             * Need to reshape this data so that label column can be extracted from this data.
            NDArray reshapedData = data_batch.data.Reshape(Shape((num_mnist_features + 1),

             * Extract the label data by slicing the first column of the data and
             * copy it to "label" arg.
            NDArray labelData = reshapedData.Slice(0, 1).Reshape(Shape(batch_size));

             * Extract the feature data by slicing the columns 1 to 785 of the data and
             * copy it to "data" arg.
            reshapedData.Slice(1, (num_mnist_features + 1)).Reshape(Shape(batch_size,

            // Forward pass is enough as no gradient is needed when evaluating
            acc.Update(labelData, exec->outputs[0]);
        float duration = std::chrono::duration_cast<std::chrono::milliseconds>
        (toc - tic).count() / 1000.0;
        LG << "Epoch[" << iter << "]  " << samples/duration << " samples/sec Accuracy: "
        << acc.Get();

    delete exec;
    delete opt;
    return 0;