void backward_rnn_layer(layer l, network_state state) { network_state s = { 0 }; s.train = state.train; int i; layer input_layer = *(l.input_layer); layer self_layer = *(l.self_layer); layer output_layer = *(l.output_layer); increment_layer(&input_layer, l.steps - 1); increment_layer(&self_layer, l.steps - 1); increment_layer(&output_layer, l.steps - 1); l.state += l.hidden * l.batch * l.steps; for (i = l.steps - 1; i >= 0; --i) { copy_cpu(l.hidden * l.batch, input_layer.output, 1, l.state, 1); axpy_cpu(l.hidden * l.batch, 1, self_layer.output, 1, l.state, 1); s.input = l.state; s.delta = self_layer.delta; backward_connected_layer(output_layer, s); l.state -= l.hidden * l.batch; /* if(i > 0){ copy_cpu(l.hidden * l.batch, input_layer.output - l.hidden*l.batch, 1, l.state, 1); axpy_cpu(l.hidden * l.batch, 1, self_layer.output - l.hidden*l.batch, 1, l.state, 1); }else{ fill_cpu(l.hidden * l.batch, 0, l.state, 1); } */ s.input = l.state; s.delta = self_layer.delta - l.hidden * l.batch; if (i == 0) s.delta = 0; backward_connected_layer(self_layer, s); copy_cpu(l.hidden * l.batch, self_layer.delta, 1, input_layer.delta, 1); if (i > 0 && l.shortcut) axpy_cpu(l.hidden * l.batch, 1, self_layer.delta, 1, self_layer.delta - l.hidden * l.batch, 1); s.input = state.input + i * l.inputs * l.batch; if (state.delta) s.delta = state.delta + i * l.inputs * l.batch; else s.delta = 0; backward_connected_layer(input_layer, s); increment_layer(&input_layer, -1); increment_layer(&self_layer, -1); increment_layer(&output_layer, -1); } }
void backward_network(network net, network_state state) { int i; float *original_input = state.input; for(i = net.n-1; i >= 0; --i){ if(i == 0){ state.input = original_input; state.delta = 0; }else{ layer prev = net.layers[i-1]; state.input = prev.output; state.delta = prev.delta; } layer l = net.layers[i]; if(l.type == CONVOLUTIONAL){ backward_convolutional_layer(l, state); } else if(l.type == DECONVOLUTIONAL){ backward_deconvolutional_layer(l, state); } else if(l.type == MAXPOOL){ if(i != 0) backward_maxpool_layer(l, state); } else if(l.type == DROPOUT){ backward_dropout_layer(l, state); } else if(l.type == DETECTION){ backward_detection_layer(l, state); } else if(l.type == SOFTMAX){ if(i != 0) backward_softmax_layer(l, state); } else if(l.type == CONNECTED){ backward_connected_layer(l, state); } else if(l.type == COST){ backward_cost_layer(l, state); } else if(l.type == ROUTE){ backward_route_layer(l, net); } } }
void backward_network(network net, network_state state) { int i; float *original_input = state.input; float *original_delta = state.delta; state.workspace = net.workspace; for(i = net.n-1; i >= 0; --i){ state.index = i; if(i == 0){ state.input = original_input; state.delta = original_delta; }else{ layer prev = net.layers[i-1]; state.input = prev.output; state.delta = prev.delta; } layer l = net.layers[i]; if(l.type == CONVOLUTIONAL){ backward_convolutional_layer(l, state); } else if(l.type == DECONVOLUTIONAL){ backward_deconvolutional_layer(l, state); } else if(l.type == ACTIVE){ backward_activation_layer(l, state); } else if(l.type == NORMALIZATION){ backward_normalization_layer(l, state); } else if(l.type == BATCHNORM){ backward_batchnorm_layer(l, state); } else if(l.type == MAXPOOL){ if(i != 0) backward_maxpool_layer(l, state); } else if(l.type == AVGPOOL){ backward_avgpool_layer(l, state); } else if(l.type == DROPOUT){ backward_dropout_layer(l, state); } else if(l.type == DETECTION){ backward_detection_layer(l, state); } else if(l.type == SOFTMAX){ if(i != 0) backward_softmax_layer(l, state); } else if(l.type == CONNECTED){ backward_connected_layer(l, state); } else if(l.type == RNN){ backward_rnn_layer(l, state); } else if(l.type == GRU){ backward_gru_layer(l, state); } else if(l.type == CRNN){ backward_crnn_layer(l, state); } else if(l.type == LOCAL){ backward_local_layer(l, state); } else if(l.type == COST){ backward_cost_layer(l, state); } else if(l.type == ROUTE){ backward_route_layer(l, net); } else if(l.type == SHORTCUT){ backward_shortcut_layer(l, state); } } }
void backward_lstm_layer(layer l, network state) { network s = { 0 }; s.train = state.train; int i; layer wf = *(l.wf); layer wi = *(l.wi); layer wg = *(l.wg); layer wo = *(l.wo); layer uf = *(l.uf); layer ui = *(l.ui); layer ug = *(l.ug); layer uo = *(l.uo); increment_layer(&wf, l.steps - 1); increment_layer(&wi, l.steps - 1); increment_layer(&wg, l.steps - 1); increment_layer(&wo, l.steps - 1); increment_layer(&uf, l.steps - 1); increment_layer(&ui, l.steps - 1); increment_layer(&ug, l.steps - 1); increment_layer(&uo, l.steps - 1); state.input += l.inputs * l.batch * (l.steps - 1); if (state.delta) state.delta += l.inputs * l.batch * (l.steps - 1); l.output += l.outputs * l.batch * (l.steps - 1); l.cell_cpu += l.outputs * l.batch * (l.steps - 1); l.delta += l.outputs * l.batch * (l.steps - 1); for (i = l.steps - 1; i >= 0; --i) { if (i != 0) copy_cpu(l.outputs * l.batch, l.cell_cpu - l.outputs * l.batch, 1, l.prev_cell_cpu, 1); copy_cpu(l.outputs * l.batch, l.cell_cpu, 1, l.c_cpu, 1); if (i != 0) copy_cpu(l.outputs * l.batch, l.output - l.outputs * l.batch, 1, l.prev_state_cpu, 1); copy_cpu(l.outputs * l.batch, l.output, 1, l.h_cpu, 1); l.dh_cpu = (i == 0) ? 0 : l.delta - l.outputs * l.batch; copy_cpu(l.outputs * l.batch, wf.output, 1, l.f_cpu, 1); axpy_cpu(l.outputs * l.batch, 1, uf.output, 1, l.f_cpu, 1); copy_cpu(l.outputs * l.batch, wi.output, 1, l.i_cpu, 1); axpy_cpu(l.outputs * l.batch, 1, ui.output, 1, l.i_cpu, 1); copy_cpu(l.outputs * l.batch, wg.output, 1, l.g_cpu, 1); axpy_cpu(l.outputs * l.batch, 1, ug.output, 1, l.g_cpu, 1); copy_cpu(l.outputs * l.batch, wo.output, 1, l.o_cpu, 1); axpy_cpu(l.outputs * l.batch, 1, uo.output, 1, l.o_cpu, 1); activate_array(l.f_cpu, l.outputs * l.batch, LOGISTIC); activate_array(l.i_cpu, l.outputs * l.batch, LOGISTIC); activate_array(l.g_cpu, l.outputs * l.batch, TANH); activate_array(l.o_cpu, l.outputs * l.batch, LOGISTIC); copy_cpu(l.outputs * l.batch, l.delta, 1, l.temp3_cpu, 1); copy_cpu(l.outputs * l.batch, l.c_cpu, 1, l.temp_cpu, 1); activate_array(l.temp_cpu, l.outputs * l.batch, TANH); copy_cpu(l.outputs * l.batch, l.temp3_cpu, 1, l.temp2_cpu, 1); mul_cpu(l.outputs * l.batch, l.o_cpu, 1, l.temp2_cpu, 1); gradient_array(l.temp_cpu, l.outputs * l.batch, TANH, l.temp2_cpu); axpy_cpu(l.outputs * l.batch, 1, l.dc_cpu, 1, l.temp2_cpu, 1); copy_cpu(l.outputs * l.batch, l.c_cpu, 1, l.temp_cpu, 1); activate_array(l.temp_cpu, l.outputs * l.batch, TANH); mul_cpu(l.outputs * l.batch, l.temp3_cpu, 1, l.temp_cpu, 1); gradient_array(l.o_cpu, l.outputs * l.batch, LOGISTIC, l.temp_cpu); copy_cpu(l.outputs * l.batch, l.temp_cpu, 1, wo.delta, 1); s.input = l.prev_state_cpu; s.delta = l.dh_cpu; backward_connected_layer(wo, s); copy_cpu(l.outputs * l.batch, l.temp_cpu, 1, uo.delta, 1); s.input = state.input; s.delta = state.delta; backward_connected_layer(uo, s); copy_cpu(l.outputs * l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); mul_cpu(l.outputs * l.batch, l.i_cpu, 1, l.temp_cpu, 1); gradient_array(l.g_cpu, l.outputs * l.batch, TANH, l.temp_cpu); copy_cpu(l.outputs * l.batch, l.temp_cpu, 1, wg.delta, 1); s.input = l.prev_state_cpu; s.delta = l.dh_cpu; backward_connected_layer(wg, s); copy_cpu(l.outputs * l.batch, l.temp_cpu, 1, ug.delta, 1); s.input = state.input; s.delta = state.delta; backward_connected_layer(ug, s); copy_cpu(l.outputs * l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); mul_cpu(l.outputs * l.batch, l.g_cpu, 1, l.temp_cpu, 1); gradient_array(l.i_cpu, l.outputs * l.batch, LOGISTIC, l.temp_cpu); copy_cpu(l.outputs * l.batch, l.temp_cpu, 1, wi.delta, 1); s.input = l.prev_state_cpu; s.delta = l.dh_cpu; backward_connected_layer(wi, s); copy_cpu(l.outputs * l.batch, l.temp_cpu, 1, ui.delta, 1); s.input = state.input; s.delta = state.delta; backward_connected_layer(ui, s); copy_cpu(l.outputs * l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); mul_cpu(l.outputs * l.batch, l.prev_cell_cpu, 1, l.temp_cpu, 1); gradient_array(l.f_cpu, l.outputs * l.batch, LOGISTIC, l.temp_cpu); copy_cpu(l.outputs * l.batch, l.temp_cpu, 1, wf.delta, 1); s.input = l.prev_state_cpu; s.delta = l.dh_cpu; backward_connected_layer(wf, s); copy_cpu(l.outputs * l.batch, l.temp_cpu, 1, uf.delta, 1); s.input = state.input; s.delta = state.delta; backward_connected_layer(uf, s); copy_cpu(l.outputs * l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); mul_cpu(l.outputs * l.batch, l.f_cpu, 1, l.temp_cpu, 1); copy_cpu(l.outputs * l.batch, l.temp_cpu, 1, l.dc_cpu, 1); state.input -= l.inputs * l.batch; if (state.delta) state.delta -= l.inputs * l.batch; l.output -= l.outputs * l.batch; l.cell_cpu -= l.outputs * l.batch; l.delta -= l.outputs * l.batch; increment_layer(&wf, -1); increment_layer(&wi, -1); increment_layer(&wg, -1); increment_layer(&wo, -1); increment_layer(&uf, -1); increment_layer(&ui, -1); increment_layer(&ug, -1); increment_layer(&uo, -1); } }