void convert_to_binary_row(mxnet::NDArray& array) { CHECK(array.shape().ndim() >= 2); // second dimension is input depth from prev. layer, needed for next line std::cout << "array shape: " << array.shape() << std::endl; //if(array.shape()[1] < BITS_PER_BINARY_WORD) return; CHECK(array.shape()[1] % BITS_PER_BINARY_WORD == 0); // depth from input has to be divisible by 32 (or 64) nnvm::TShape binarized_shape(1); size_t size = array.shape().Size(); binarized_shape[0] = size / BITS_PER_BINARY_WORD; mxnet::NDArray temp(binarized_shape, mxnet::Context::CPU(), false, mxnet::op::xnor_cpu::corresponding_dtype()); mxnet::op::xnor_cpu::get_binary_row((float*) array.data().dptr_, (BINARY_WORD*) temp.data().dptr_, size); array = temp; }
void transpose(mxnet::NDArray& array) { CHECK(array.shape().ndim() == 2); nnvm::TShape tansposed_shape(2); int rows = array.shape()[0]; int cols = array.shape()[1]; tansposed_shape[0] = cols; tansposed_shape[1] = rows; mxnet::NDArray temp(tansposed_shape, mxnet::Context::CPU(), false, array.dtype()); MSHADOW_REAL_TYPE_SWITCH(array.dtype(), DType, { for (int row = 0; row < rows; row++) { for (int col = 0; col < cols; col++) { ((DType*)temp.data().dptr_)[col * rows + row] = ((DType*)array.data().dptr_)[row * cols + col]; } } })
void Train(mxnet::NDArray data_array, mxnet::NDArray label_array, int max_epoches, int val_fold, float start_learning_rate, std::vector<mxnet::NDArray> &argsl) { /*prepare ndarray*/ learning_rate = start_learning_rate; size_t data_count = data_array.shape()[0]; size_t val_data_count = data_count * val_fold / 10; size_t train_data_count = data_count - val_data_count; train_data = data_array.Slice(0, train_data_count); train_label = label_array.Slice(0, train_data_count); val_data = data_array.Slice(train_data_count, data_count); val_label = label_array.Slice(train_data_count, data_count); size_t batch_size = in_args[0].shape()[0]; /*start the training*/ for (int iter = 0; iter < max_epoches; ++iter) { CHECK(optimizer); size_t start_index = 0; in_args[0] = train_data.Slice(start_index, start_index + batch_size).Copy(ctx_dev); in_args[in_args.size() - 1] = train_label.Slice(start_index, start_index + batch_size) .Copy(ctx_dev); in_args[0].WaitToRead(); in_args[in_args.size() - 1].WaitToRead(); while (start_index < train_data_count) { /*rebind the excutor*/ delete exe; exe = mxnet::Executor::Bind(net, ctx_dev, g2c, in_args, arg_grad_store, grad_req_type, aux_states); CHECK(exe); exe->Forward(true); exe->Backward(std::vector<mxnet::NDArray>()); start_index += batch_size; if (start_index < train_data_count) { if (start_index + batch_size >= train_data_count) start_index = train_data_count - batch_size; in_args[0] = train_data.Slice(start_index, start_index + batch_size) .Copy(ctx_dev); in_args[in_args.size() - 1] = train_label.Slice(start_index, start_index + batch_size) .Copy(ctx_dev); } for (size_t i = 1; i < in_args.size() - 1; ++i) { optimizer->Update(i, &in_args[i], &arg_grad_store[i], learning_rate); } for (size_t i = 1; i < in_args.size() - 1; ++i) { in_args[i].WaitToRead(); } in_args[0].WaitToRead(); in_args[in_args.size() - 1].WaitToRead(); } /*call every iter*/ TrainingCallBack(iter, exe); } argsl.assign(in_args.begin(), in_args.end()); }