void predictWithBuiltInRNNOp(wstring* ptext, int sequence_length, const string param_file, const string dictionary_file) { Context device(DeviceType::kGPU, 0); auto results = BucketSentenceIter::loadCharIndices(dictionary_file); auto dictionary = get<0>(results); auto charIndices = get<1>(results); input_dim = static_cast<int>(charIndices.size()); auto RNN = LSTMWithBuiltInRNNOp(num_lstm_layer, 1, input_dim, num_hidden, num_embed, 0); map<string, NDArray> args_map; args_map["data"] = NDArray(Shape(1, 1), device, false); args_map["softmax_label"] = NDArray(Shape(1, 1), device, false); vector<mx_float> zeros(1 * num_lstm_layer * num_hidden, 0); // Avoiding SwapAxis, batch_size=1 is of second dimension. args_map["LSTM_init_c"] = NDArray(Shape(num_lstm_layer, 1, num_hidden), device, false); args_map["LSTM_init_h"] = NDArray(Shape(num_lstm_layer, 1, num_hidden), device, false); args_map["LSTM_init_c"].SyncCopyFromCPU(zeros); args_map["LSTM_init_h"].SyncCopyFromCPU(zeros); Executor* exe = RNN.SimpleBind(device, args_map); LoadCheckpoint(param_file, exe); mx_float index; wchar_t next = 0; vector<mx_float> softmax; softmax.resize(input_dim); for (auto c : *ptext) { exe->arg_dict()["data"].SyncCopyFromCPU(&dictionary[c], 1); exe->Forward(false); exe->outputs[0].SyncCopyToCPU(softmax.data(), input_dim); exe->outputs[1].CopyTo(&args_map["LSTM_init_h"]); exe->outputs[2].CopyTo(&args_map["LSTM_init_c"]); size_t n = max_element(softmax.begin(), softmax.end()) - softmax.begin(); index = (mx_float) n; next = charIndices[n]; } ptext->push_back(next); for (int i = 0; i < sequence_length; i++) { exe->arg_dict()["data"].SyncCopyFromCPU(&index, 1); exe->Forward(false); exe->outputs[0].SyncCopyToCPU(softmax.data(), input_dim); exe->outputs[1].CopyTo(&args_map["LSTM_init_h"]); exe->outputs[2].CopyTo(&args_map["LSTM_init_c"]); size_t n = max_element(softmax.begin(), softmax.end()) - softmax.begin(); index = (mx_float) n; next = charIndices[n]; ptext->push_back(next); } }
void trainWithBuiltInRNNOp(const string file, int batch_size, int max_epoch, int start_epoch) { Context device(DeviceType::kGPU, 0); BucketSentenceIter dataIter(file, batch_size, device); string prefix = file.substr(0, file.rfind(".")); dataIter.saveCharIndices(prefix + ".dictionary"); input_dim = static_cast<int>(dataIter.characterSize()); sequence_length_max = dataIter.maxSequenceLength(); auto RNN = LSTMWithBuiltInRNNOp(num_lstm_layer, sequence_length_max, input_dim, num_hidden, num_embed, dropout); map<string, NDArray> args_map; args_map["data"] = NDArray(Shape(batch_size, sequence_length_max), device, false); // Avoiding SwapAxis, batch_size is of second dimension. args_map["LSTM_init_c"] = NDArray(Shape(num_lstm_layer, batch_size, num_hidden), device, false); args_map["LSTM_init_h"] = NDArray(Shape(num_lstm_layer, batch_size, num_hidden), device, false); args_map["softmax_label"] = NDArray(Shape(batch_size, sequence_length_max), device, false); vector<mx_float> zeros(batch_size * num_lstm_layer * num_hidden, 0); Executor* exe = RNN.SimpleBind(device, args_map); if (start_epoch == -1) { RNNXavier xavier = RNNXavier(Xavier::gaussian, Xavier::in, 2.34); for (auto &arg : exe->arg_dict()) xavier(arg.first, &arg.second); } else { LoadCheckpoint(prefix + "-" + to_string(start_epoch) + ".params", exe); } start_epoch++; mx_float learning_rate = 0.0002; mx_float weight_decay = 0.000002; Optimizer* opt = OptimizerRegistry::Find("ccsgd"); // opt->SetParam("momentum", 0.9)->SetParam("rescale_grad", 1.0 / batch_size) // ->SetParam("clip_gradient", 10); for (int epoch = start_epoch; epoch < max_epoch; ++epoch) { dataIter.Reset(); auto tic = chrono::system_clock::now(); while (dataIter.Next()) { auto data_batch = dataIter.GetDataBatch(); data_batch.data.CopyTo(&exe->arg_dict()["data"]); data_batch.label.CopyTo(&exe->arg_dict()["softmax_label"]); exe->arg_dict()["LSTM_init_c"].SyncCopyFromCPU(zeros); exe->arg_dict()["LSTM_init_h"].SyncCopyFromCPU(zeros); NDArray::WaitAll(); exe->Forward(true); exe->Backward(); exe->UpdateAll(opt, learning_rate, weight_decay); NDArray::WaitAll(); } auto toc = chrono::system_clock::now(); cout << "Epoch[" << epoch << "] Time Cost:" << chrono::duration_cast<chrono::seconds>(toc - tic).count() << " seconds "; OutputPerplexity(&exe->arg_dict()["softmax_label"], &exe->outputs[0]); string filepath = prefix + "-" + to_string(epoch) + ".params"; SaveCheckpoint(filepath, RNN, exe); } }
void Extract(NDArray data) { /*Normalize the pictures*/ data.Slice(0, 1) -= mean_img; data.Slice(1, 2) -= mean_img; args_map["data"] = data; /*bind the excutor*/ executor = net.SimpleBind(global_ctx, args_map, map<string, NDArray>(), map<string, OpReqType>(), aux_map); executor->Forward(false); /*print out the features*/ auto array = executor->outputs[0].Copy(Context(kCPU, 0)); NDArray::WaitAll(); for (int i = 0; i < 1024; ++i) { cout << array.At(0, i) << ","; } cout << endl; }
void Test(vector<float > &vec, int data_count) { args_map["data"] = NDArray(Shape(data_count, 1, W, H), ctx_dev, false); args_map["data_label"] = NDArray(Shape(data_count), ctx_dev, false); const float *dptr = vec.data(); args_map["data"].SyncCopyFromCPU(dptr, data_count * W * H);; NDArray::WaitAll(); Executor *exe = lenet.SimpleBind(ctx_dev, args_map); exe->Forward(false); const auto &out = exe->outputs; NDArray out_cpu = out[0].Copy(ctx_cpu); NDArray::WaitAll(); const mx_float *dptr_out = out_cpu.GetData(); //cout << out_cpu.GetShape()[0]; for (int i = 0; i < data_count; ++i) { int cat_num = out_cpu.GetShape()[1]; float p_label = 0, max_p = dptr_out[i * cat_num]; for (int j = 0; j < cat_num; ++j) { float p = dptr_out[i * cat_num + j]; if (max_p < p) { p_label = j; max_p = p; } } cout << p_label << endl; } delete exe; }
void MLP() { auto sym_x = Symbol::Variable("X"); auto sym_label = Symbol::Variable("label"); const int nLayers = 2; vector<int> layerSizes({512, 10}); vector<Symbol> weights(nLayers); vector<Symbol> biases(nLayers); vector<Symbol> outputs(nLayers); for (int i = 0; i < nLayers; i++) { string istr = to_string(i); weights[i] = Symbol::Variable(string("w") + istr); biases[i] = Symbol::Variable(string("b") + istr); Symbol fc = FullyConnected(string("fc") + istr, i == 0? sym_x : outputs[i-1], weights[i], biases[i], layerSizes[i]); outputs[i] = LeakyReLU(string("act") + istr, fc, LeakyReLUActType::leaky); } auto sym_out = SoftmaxOutput("softmax", outputs[nLayers - 1], sym_label); Context ctx_dev(DeviceType::kCPU, 0); NDArray array_x(Shape(128, 28), ctx_dev, false); NDArray array_y(Shape(128), ctx_dev, false); mx_float* aptr_x = new mx_float[128 * 28]; mx_float* aptr_y = new mx_float[128]; // we make the data by hand, in 10 classes, with some pattern for (int i = 0; i < 128; i++) { for (int j = 0; j < 28; j++) { aptr_x[i * 28 + j] = i % 10 * 1.0f; } aptr_y[i] = i % 10; } array_x.SyncCopyFromCPU(aptr_x, 128 * 28); array_x.WaitToRead(); array_y.SyncCopyFromCPU(aptr_y, 128); array_y.WaitToRead(); // init the parameters NDArray array_w_1(Shape(512, 28), ctx_dev, false); NDArray array_b_1(Shape(512), ctx_dev, false); NDArray array_w_2(Shape(10, 512), ctx_dev, false); NDArray array_b_2(Shape(10), ctx_dev, false); // the parameters should be initialized in some kind of distribution, // so it learns fast // but here just give a const value by hand array_w_1 = 0.5f; array_b_1 = 0.0f; array_w_2 = 0.5f; array_b_2 = 0.0f; // the grads NDArray array_w_1_g(Shape(512, 28), ctx_dev, false); NDArray array_b_1_g(Shape(512), ctx_dev, false); NDArray array_w_2_g(Shape(10, 512), ctx_dev, false); NDArray array_b_2_g(Shape(10), ctx_dev, false); // Bind the symolic network with the ndarray // all the input args std::vector<NDArray> in_args; in_args.push_back(array_x); in_args.push_back(array_w_1); in_args.push_back(array_b_1); in_args.push_back(array_w_2); in_args.push_back(array_b_2); in_args.push_back(array_y); // all the grads std::vector<NDArray> arg_grad_store; arg_grad_store.push_back(NDArray()); // we don't need the grad of the input arg_grad_store.push_back(array_w_1_g); arg_grad_store.push_back(array_b_1_g); arg_grad_store.push_back(array_w_2_g); arg_grad_store.push_back(array_b_2_g); arg_grad_store.push_back( NDArray()); // neither do we need the grad of the loss // how to handle the grad std::vector<OpReqType> grad_req_type; grad_req_type.push_back(kNullOp); grad_req_type.push_back(kWriteTo); grad_req_type.push_back(kWriteTo); grad_req_type.push_back(kWriteTo); grad_req_type.push_back(kWriteTo); grad_req_type.push_back(kNullOp); std::vector<NDArray> aux_states; cout << "make the Executor" << endl; Executor* exe = new Executor(sym_out, ctx_dev, in_args, arg_grad_store, grad_req_type, aux_states); cout << "Training" << endl; int max_iters = 20000; mx_float learning_rate = 0.0001; for (int iter = 0; iter < max_iters; ++iter) { exe->Forward(true); if (iter % 100 == 0) { cout << "epoch " << iter << endl; std::vector<NDArray>& out = exe->outputs; float* cptr = new float[128 * 10]; out[0].SyncCopyToCPU(cptr, 128 * 10); NDArray::WaitAll(); OutputAccuracy(cptr, aptr_y); delete[] cptr; } // update the parameters exe->Backward(); for (int i = 1; i < 5; ++i) { in_args[i] -= arg_grad_store[i] * learning_rate; } NDArray::WaitAll(); } delete exe; delete[] aptr_x; delete[] aptr_y; }
void Run() { /* * LeCun, Yann, Leon Bottou, Yoshua Bengio, and Patrick Haffner. * "Gradient-based learning applied to document recognition." * Proceedings of the IEEE (1998) * */ /*define the symbolic net*/ Symbol data = Symbol::Variable("data"); Symbol data_label = Symbol::Variable("data_label"); Symbol conv1_w("conv1_w"), conv1_b("conv1_b"); Symbol conv2_w("conv2_w"), conv2_b("conv2_b"); Symbol conv3_w("conv3_w"), conv3_b("conv3_b"); Symbol fc1_w("fc1_w"), fc1_b("fc1_b"); Symbol fc2_w("fc2_w"), fc2_b("fc2_b"); Symbol conv1 = Convolution("conv1", data, conv1_w, conv1_b, Shape(5, 5), 20); Symbol tanh1 = Activation("tanh1", conv1, ActivationActType::tanh); Symbol pool1 = Pooling("pool1", tanh1, Shape(2, 2), PoolingPoolType::max, false, false, PoolingPoolingConvention::valid, Shape(2, 2)); Symbol conv2 = Convolution("conv2", pool1, conv2_w, conv2_b, Shape(5, 5), 50); Symbol tanh2 = Activation("tanh2", conv2, ActivationActType::tanh); Symbol pool2 = Pooling("pool2", tanh2, Shape(2, 2), PoolingPoolType::max, false, false, PoolingPoolingConvention::valid, Shape(2, 2)); Symbol conv3 = Convolution("conv3", pool2, conv3_w, conv3_b, Shape(2, 2), 500); Symbol tanh3 = Activation("tanh3", conv3, ActivationActType::tanh); Symbol pool3 = Pooling("pool3", tanh3, Shape(2, 2), PoolingPoolType::max, false, false, PoolingPoolingConvention::valid, Shape(1, 1)); Symbol flatten = Flatten("flatten", pool3); Symbol fc1 = FullyConnected("fc1", flatten, fc1_w, fc1_b, 500); Symbol tanh4 = Activation("tanh4", fc1, ActivationActType::tanh); Symbol fc2 = FullyConnected("fc2", tanh4, fc2_w, fc2_b, 10); Symbol lenet = SoftmaxOutput("softmax", fc2, data_label); for (auto s : lenet.ListArguments()) { LG << s; } /*setup basic configs*/ int val_fold = 1; int W = 28; int H = 28; int batch_size = 42; int max_epoch = 100000; float learning_rate = 1e-4; float weight_decay = 1e-4; /*prepare the data*/ vector<float> data_vec, label_vec; size_t data_count = GetData(&data_vec, &label_vec); const float *dptr = data_vec.data(); const float *lptr = label_vec.data(); NDArray data_array = NDArray(Shape(data_count, 1, W, H), ctx_cpu, false); // store in main memory, and copy to // device memory while training NDArray label_array = NDArray(Shape(data_count), ctx_cpu, false); // it's also ok if just store them all in device memory data_array.SyncCopyFromCPU(dptr, data_count * W * H); label_array.SyncCopyFromCPU(lptr, data_count); data_array.WaitToRead(); label_array.WaitToRead(); size_t train_num = data_count * (1 - val_fold / 10.0); train_data = data_array.Slice(0, train_num); train_label = label_array.Slice(0, train_num); val_data = data_array.Slice(train_num, data_count); val_label = label_array.Slice(train_num, data_count); LG << "here read fin"; /*init some of the args*/ // map<string, NDArray> args_map; args_map["data"] = data_array.Slice(0, batch_size).Copy(ctx_dev); args_map["data_label"] = label_array.Slice(0, batch_size).Copy(ctx_dev); NDArray::WaitAll(); LG << "here slice fin"; /* * we can also feed in some of the args other than the input all by * ourselves, * fc2-w , fc1-b for example: * */ // args_map["fc2_w"] = // NDArray(mshadow::Shape2(500, 4 * 4 * 50), ctx_dev, false); // NDArray::SampleGaussian(0, 1, &args_map["fc2_w"]); // args_map["fc1_b"] = NDArray(mshadow::Shape1(10), ctx_dev, false); // args_map["fc1_b"] = 0; lenet.InferArgsMap(ctx_dev, &args_map, args_map); Optimizer* opt = OptimizerRegistry::Find("ccsgd"); opt->SetParam("momentum", 0.9) ->SetParam("rescale_grad", 1.0) ->SetParam("clip_gradient", 10); for (int ITER = 0; ITER < max_epoch; ++ITER) { size_t start_index = 0; while (start_index < train_num) { if (start_index + batch_size > train_num) { start_index = train_num - batch_size; } args_map["data"] = train_data.Slice(start_index, start_index + batch_size) .Copy(ctx_dev); args_map["data_label"] = train_label.Slice(start_index, start_index + batch_size) .Copy(ctx_dev); start_index += batch_size; NDArray::WaitAll(); Executor *exe = lenet.SimpleBind(ctx_dev, args_map); exe->Forward(true); exe->Backward(); exe->UpdateAll(opt, learning_rate, weight_decay); delete exe; } LG << "Iter " << ITER << ", accuracy: " << ValAccuracy(batch_size * 10, lenet); } }
void Run() { /* * LeCun, Yann, Leon Bottou, Yoshua Bengio, and Patrick Haffner. * "Gradient-based learning applied to document recognition." * Proceedings of the IEEE (1998) * */ /*define the symbolic net*/ for (auto s : lenet.ListArguments()) { LG << s; } /*setup basic configs*/ int val_fold = 3; int batch_size = 20; int max_epoch = 50; float learning_rate = 4e-6; float weight_decay = 1e-5; /*prepare the data*/ vector<float> data_vec, label_vec; //size_t data_count = GetData(&data_vec, &label_vec); size_t data_count = Getdata(data_vec, label_vec); const float *dptr = data_vec.data(); const float *lptr = label_vec.data(); NDArray data_array = NDArray(Shape(data_count, 1, W, H), ctx_cpu, false); // store in main memory, and copy to // device memory while training NDArray label_array = NDArray(Shape(data_count), ctx_cpu, false); // it's also ok if just store them all in device memory data_array.SyncCopyFromCPU(dptr, data_count * W * H); label_array.SyncCopyFromCPU(lptr, data_count); data_array.WaitToRead(); label_array.WaitToRead(); size_t train_num = data_count * (1 - val_fold / 10.0); train_data = data_array.Slice(0, train_num); train_label = label_array.Slice(0, train_num); val_data = data_array.Slice(train_num, data_count); val_label = label_array.Slice(train_num, data_count); LG << "here read fin"; /*init some of the args*/ // map<string, NDArray> args_map; args_map["data"] = NDArray(Shape(batch_size, 1, W, H), ctx_dev, false); args_map["data"] = data_array.Slice(0, batch_size).Copy(ctx_dev); //args_map["data_label"] = label_array.Slice(0, batch_size).Copy(ctx_dev); /*args_map["fc1_weight"] = NDArray(Shape(500, 4 * 4 * 50), ctx_dev, false); //SampleGaussian(0, 1, &args_map["fc1_weight"]); args_map["fc1_weight"] = 0.3; args_map["fc2_bias"] = NDArray(Shape(10), ctx_dev, false); args_map["fc2_bias"] = 0;*/ NDArray::WaitAll(); LG << "here slice fin"; /* * we can also feed in some of the args other than the input all by * ourselves, * fc2-w , fc1-b for example: * */ // args_map["fc2_w"] = // NDArray(mshadow::Shape2(500, 4 * 4 * 50), ctx_dev, false); // NDArray::SampleGaussian(0, 1, &args_map["fc2_w"]); // args_map["fc1_b"] = NDArray(mshadow::Shape1(10), ctx_dev, false); // args_map["fc1_b"] = 0; Optimizer opt("ccsgd", learning_rate, weight_decay); opt.SetParam("momentum", 0.9) .SetParam("rescale_grad", 1.0) .SetParam("clip_gradient", 10); for (int ITER = 0; ITER < max_epoch; ++ITER) { size_t start_index = 0; while (start_index < train_num) { if (start_index + batch_size > train_num) { start_index = train_num - batch_size; } args_map["data"] = train_data.Slice(start_index, start_index + batch_size) .Copy(ctx_dev); args_map["data_label"] = train_label.Slice(start_index, start_index + batch_size) .Copy(ctx_dev); start_index += batch_size; NDArray::WaitAll(); Executor *exe = lenet.SimpleBind(ctx_dev, args_map); exe->Forward(true); exe->Backward(); exe->UpdateAll(&opt, learning_rate, weight_decay); delete exe; } LG << "Iter " << ITER << ", accuracy: " << ValAccuracy(batch_size , lenet); } }
void predict(wstring* ptext, int sequence_length, const string param_file, const string dictionary_file) { Context device(DeviceType::kGPU, 0); auto results = BucketSentenceIter::loadCharIndices(dictionary_file); auto dictionary = get<0>(results); auto charIndices = get<1>(results); input_dim = static_cast<int>(charIndices.size()); auto RNN = LSTMUnroll(num_lstm_layer, 1, input_dim, num_hidden, num_embed, 0); map<string, NDArray> args_map; args_map["data"] = NDArray(Shape(1, 1), device, false); args_map["softmax_label"] = NDArray(Shape(1, 1), device, false); vector<mx_float> zeros(1 * num_hidden, 0); for (int l = 0; l < num_lstm_layer; l++) { string key = "l" + to_string(l) + "_init_"; args_map[key + "c"] = NDArray(Shape(1, num_hidden), device, false); args_map[key + "h"] = NDArray(Shape(1, num_hidden), device, false); args_map[key + "c"].SyncCopyFromCPU(zeros); args_map[key + "h"].SyncCopyFromCPU(zeros); } Executor* exe = RNN.SimpleBind(device, args_map); LoadCheckpoint(param_file, exe); mx_float index; wchar_t next; vector<mx_float> softmax; softmax.resize(input_dim); for (auto c : *ptext) { exe->arg_dict()["data"].SyncCopyFromCPU(&dictionary[c], 1); exe->Forward(false); exe->outputs[0].SyncCopyToCPU(softmax.data(), input_dim); for (int l = 0; l < num_lstm_layer; l++) { string key = "l" + to_string(l) + "_init_"; exe->outputs[l * 2 + 1].CopyTo(&args_map[key + "c"]); exe->outputs[l * 2 + 2].CopyTo(&args_map[key + "h"]); } size_t n = max_element(softmax.begin(), softmax.end()) - softmax.begin(); index = (mx_float) n; next = charIndices[n]; } ptext->push_back(next); for (int i = 0; i < sequence_length; i++) { exe->arg_dict()["data"].SyncCopyFromCPU(&index, 1); exe->Forward(false); exe->outputs[0].SyncCopyToCPU(softmax.data(), input_dim); for (int l = 0; l < num_lstm_layer; l++) { string key = "l" + to_string(l) + "_init_"; exe->outputs[l * 2 + 1].CopyTo(&args_map[key + "c"]); exe->outputs[l * 2 + 2].CopyTo(&args_map[key + "h"]); } size_t n = max_element(softmax.begin(), softmax.end()) - softmax.begin(); index = (mx_float) n; next = charIndices[n]; ptext->push_back(next); } }