inline double network::batch_backpropagate(MatrixXd &input_mat, MatrixXd& des_mat) { double er = 0.0f; for(int i = 0; i < layers.size(); i++ ) { layers[i].delta_weights = lrate * mom * layers[i].delta_weights; } for(int i = 0; i < input_mat.rows(); i++ ) { VectorXd input = input_mat.row(i).transpose(); VectorXd des = des_mat.row(i).transpose(); feedforward(input); er += error(layers.back().outputs, des); VectorXd err = des - layers.back().outputs; for(int i = layers.size() - 1; i > 0; i-- ) { VectorXd deltas = (err.array() * derivative(layers[i].outputs).array()).matrix(); err = (layers[i].weights.transpose() * deltas).topRows(layers[i - 1].outputs.size()); layers[i].delta_weights.leftCols(layers[i].weights.cols() - 1) += lrate * ( deltas * layers[i - 1].outputs.transpose() ) ; layers[i].delta_weights.rightCols(1) += lrate * deltas * layers[i].bias; } VectorXd deltas = (err.array() * derivative(layers[0].outputs).array()).matrix(); layers[0].delta_weights.leftCols(layers[0].weights.cols() - 1) += lrate * ( deltas * input.transpose() ); layers[0].delta_weights.rightCols(1) += lrate * deltas * layers[0].bias; } for(int i = 0; i < layers.size(); i++ ) { layers[i].weights += layers[i].delta_weights; } return er; }
int main() { int structure[3] = {2, 2, 1}; float input[2] = {1,2}; float output[1]; struct network *net = create_network(3, structure); feedforward(net, input, output); vprint_float(1, output); network_save_to_file(net, "mynet.net"); srand(time(NULL)); network_set_random_weights_biases(net, -1, 1); network_load_from_file(net, "mynet.net"); feedforward(net, input, output); vprint_float(1, output); }
int ConvNet::predict(dmatrix3 &mat) { dvec thought(12); real maxi; int result; thought = feedforward(mat); //We determine the index of the maximum element in the output vector. maxi = *std::max_element(thought.begin(), thought.end()); result= *std::find(thought.begin(), thought.end(), maxi); return result; }
void Perception::train(std::vector<double> inputs, int desired) { std::cout<<"training point "<<inputs.at(0)<<" "<<inputs.at(0)<<std::endl; std::cout<<"desired "<<desired<<std::endl; int guess = feedforward(inputs); std::cout<<"guessed "<<guess<<std::endl; float error = desired - guess; std::cout<<"error "<<error<<std::endl; for (int i = 0; i < weights.size(); i++) { weights.at(i) += c * error * inputs[i]; } }
void ConvNet::learn(dmatrix3 &stimulus, dvec &target) { dvec result(target.size()); result = feedforward(stimulus); real er = 0; for(int x=0;x<target.size();x++) { L4.Errors[x] = sigmoid_p(target[x]) * (target[x] - result[x]); er += L4.Errors[x]; } std::cout << "Output error: " << er << std::endl; L3.Errors = L4.backpropagation(); dvec l2er = L3.backpropagation(); L2.Errors = fold3(l2er, L2.OutShape); L1.Errors = L2.backpropagation(); L4.weight_update(L3.Activations); L3.weight_update(flatten(L2.Activations)); //L2 is a PoolLayer, those doesn't have any weights. L1.weight_update(Inputs); // Warning: Input is a pointer!!! }
void Trainee::train(std::vector<std::pair<InputType, AnswerType>> minibatch, float learning_rate) { Eigen::MatrixXf dweight3 = Eigen::MatrixXf::Zero(n_outputvec, n_hid2vec); Eigen::VectorXf dbias3 = Eigen::VectorXf::Zero(n_outputvec); Eigen::MatrixXf dweight2 = Eigen::MatrixXf::Zero(n_hid2vec, n_hid1vec); Eigen::VectorXf dbias2 = Eigen::VectorXf::Zero(n_hid2vec); Eigen::MatrixXf dweight1 = Eigen::MatrixXf::Zero(n_hid1vec, n_inputvec); Eigen::VectorXf dbias1 = Eigen::VectorXf::Zero(n_hid1vec); /* For AdaGrad */ auto fn = [](float lhs, float rhs) -> float { return lhs != 0.0 ? lhs / rhs : 0.0; }; for(auto sample: minibatch){ Eigen::VectorXf inputvec = input2vec(sample.first); Eigen::VectorXf z1 = feedforward(inputvec, 1); Eigen::VectorXf z2 = feedforward(inputvec, 2); // 後付けとはいえ。この計算、あからさまに無駄だな。z1からz2を計算すべき。 // Calculate delta of output layer. Eigen::VectorXf delta3; delta3 = feedforward(inputvec, 3); delta3(sample.second) -= 1.0f; { Eigen::ArrayXXf e = delta3 * z2.transpose(); gsq_w3 += e * e; gsq_b3 += delta3.array() * delta3.array(); dweight3 += e.matrix(); dbias3 += delta3; } // Calculate delta of 2nd hidden layer. Eigen::VectorXf delta2 = Eigen::VectorXf::Zero(n_hid2vec); for(int j=0;j<n_hid2vec;j++){ for(int k=0;k<n_outputvec;k++) delta2(j) += delta3(k) * weight3(k, j) * (z2(j) >= 0.f ? 1.f : 0.f); } { Eigen::ArrayXXf e = delta2 * z1.transpose(); gsq_w2 += e * e; gsq_b2 += delta2.array() * delta2.array(); dweight2 += e.matrix(); dbias2 += delta2; } // Calculate delta of 1st hidden layer. Eigen::VectorXf delta1 = Eigen::VectorXf::Zero(n_hid1vec); for(int j=0;j<n_hid1vec;j++){ for(int k=0;k<n_hid2vec;k++) delta1(j) += delta2(k) * weight2(k, j) * (z1(j) >= 0.f ? 1.f : 0.f); } { Eigen::ArrayXXf e = delta1 * inputvec.transpose(); gsq_w1 += e * e; gsq_b1 += delta1.array() * delta1.array(); dweight1 += e.matrix(); dbias1 += delta1; } } weight1 -= dweight1.binaryExpr(gsq_w1.sqrt().matrix(), fn) * learning_rate / minibatch.size(); bias1 -= dbias1.binaryExpr(gsq_b1.sqrt().matrix(), fn) * learning_rate / minibatch.size(); weight2 -= dweight2.binaryExpr(gsq_w2.sqrt().matrix(), fn) * learning_rate / minibatch.size(); bias2 -= dbias2.binaryExpr(gsq_b2.sqrt().matrix(), fn) * learning_rate / minibatch.size(); weight3 -= dweight3.binaryExpr(gsq_w3.sqrt().matrix(), fn) * learning_rate / minibatch.size(); bias3 -= dbias3.binaryExpr(gsq_b3.sqrt().matrix(), fn) * learning_rate / minibatch.size(); }