void backward_rnn_layer(layer l, network_state state) {
	network_state s = { 0 };
	s.train = state.train;
	int i;
	layer input_layer = *(l.input_layer);
	layer self_layer = *(l.self_layer);
	layer output_layer = *(l.output_layer);

	increment_layer(&input_layer, l.steps - 1);
	increment_layer(&self_layer, l.steps - 1);
	increment_layer(&output_layer, l.steps - 1);

	l.state += l.hidden * l.batch * l.steps;
	for (i = l.steps - 1; i >= 0; --i) {
		copy_cpu(l.hidden * l.batch, input_layer.output, 1, l.state, 1);
		axpy_cpu(l.hidden * l.batch, 1, self_layer.output, 1, l.state, 1);

		s.input = l.state;
		s.delta = self_layer.delta;
		backward_connected_layer(output_layer, s);

		l.state -= l.hidden * l.batch;
		/*
		 if(i > 0){
		 copy_cpu(l.hidden * l.batch, input_layer.output - l.hidden*l.batch, 1, l.state, 1);
		 axpy_cpu(l.hidden * l.batch, 1, self_layer.output - l.hidden*l.batch, 1, l.state, 1);
		 }else{
		 fill_cpu(l.hidden * l.batch, 0, l.state, 1);
		 }
		 */

		s.input = l.state;
		s.delta = self_layer.delta - l.hidden * l.batch;
		if (i == 0)
			s.delta = 0;
		backward_connected_layer(self_layer, s);

		copy_cpu(l.hidden * l.batch, self_layer.delta, 1, input_layer.delta, 1);
		if (i > 0 && l.shortcut)
			axpy_cpu(l.hidden * l.batch, 1, self_layer.delta, 1,
					self_layer.delta - l.hidden * l.batch, 1);
		s.input = state.input + i * l.inputs * l.batch;
		if (state.delta)
			s.delta = state.delta + i * l.inputs * l.batch;
		else
			s.delta = 0;
		backward_connected_layer(input_layer, s);

		increment_layer(&input_layer, -1);
		increment_layer(&self_layer, -1);
		increment_layer(&output_layer, -1);
	}
}
Esempio n. 2
0
void backward_network(network net, network_state state)
{
    int i;
    float *original_input = state.input;
    for(i = net.n-1; i >= 0; --i){
        if(i == 0){
            state.input = original_input;
            state.delta = 0;
        }else{
            layer prev = net.layers[i-1];
            state.input = prev.output;
            state.delta = prev.delta;
        }
        layer l = net.layers[i];
        if(l.type == CONVOLUTIONAL){
            backward_convolutional_layer(l, state);
        } else if(l.type == DECONVOLUTIONAL){
            backward_deconvolutional_layer(l, state);
        } else if(l.type == MAXPOOL){
            if(i != 0) backward_maxpool_layer(l, state);
        } else if(l.type == DROPOUT){
            backward_dropout_layer(l, state);
        } else if(l.type == DETECTION){
            backward_detection_layer(l, state);
        } else if(l.type == SOFTMAX){
            if(i != 0) backward_softmax_layer(l, state);
        } else if(l.type == CONNECTED){
            backward_connected_layer(l, state);
        } else if(l.type == COST){
            backward_cost_layer(l, state);
        } else if(l.type == ROUTE){
            backward_route_layer(l, net);
        }
    }
}
Esempio n. 3
0
void backward_network(network net, network_state state)
{
    int i;
    float *original_input = state.input;
    float *original_delta = state.delta;
    state.workspace = net.workspace;
    for(i = net.n-1; i >= 0; --i){
        state.index = i;
        if(i == 0){
            state.input = original_input;
            state.delta = original_delta;
        }else{
            layer prev = net.layers[i-1];
            state.input = prev.output;
            state.delta = prev.delta;
        }
        layer l = net.layers[i];
        if(l.type == CONVOLUTIONAL){
            backward_convolutional_layer(l, state);
        } else if(l.type == DECONVOLUTIONAL){
            backward_deconvolutional_layer(l, state);
        } else if(l.type == ACTIVE){
            backward_activation_layer(l, state);
        } else if(l.type == NORMALIZATION){
            backward_normalization_layer(l, state);
        } else if(l.type == BATCHNORM){
            backward_batchnorm_layer(l, state);
        } else if(l.type == MAXPOOL){
            if(i != 0) backward_maxpool_layer(l, state);
        } else if(l.type == AVGPOOL){
            backward_avgpool_layer(l, state);
        } else if(l.type == DROPOUT){
            backward_dropout_layer(l, state);
        } else if(l.type == DETECTION){
            backward_detection_layer(l, state);
        } else if(l.type == SOFTMAX){
            if(i != 0) backward_softmax_layer(l, state);
        } else if(l.type == CONNECTED){
            backward_connected_layer(l, state);
        } else if(l.type == RNN){
            backward_rnn_layer(l, state);
        } else if(l.type == GRU){
            backward_gru_layer(l, state);
        } else if(l.type == CRNN){
            backward_crnn_layer(l, state);
        } else if(l.type == LOCAL){
            backward_local_layer(l, state);
        } else if(l.type == COST){
            backward_cost_layer(l, state);
        } else if(l.type == ROUTE){
            backward_route_layer(l, net);
        } else if(l.type == SHORTCUT){
            backward_shortcut_layer(l, state);
        }
    }
}
void backward_lstm_layer(layer l, network state) {
	network s = { 0 };
	s.train = state.train;
	int i;
	layer wf = *(l.wf);
	layer wi = *(l.wi);
	layer wg = *(l.wg);
	layer wo = *(l.wo);

	layer uf = *(l.uf);
	layer ui = *(l.ui);
	layer ug = *(l.ug);
	layer uo = *(l.uo);

	increment_layer(&wf, l.steps - 1);
	increment_layer(&wi, l.steps - 1);
	increment_layer(&wg, l.steps - 1);
	increment_layer(&wo, l.steps - 1);

	increment_layer(&uf, l.steps - 1);
	increment_layer(&ui, l.steps - 1);
	increment_layer(&ug, l.steps - 1);
	increment_layer(&uo, l.steps - 1);

	state.input += l.inputs * l.batch * (l.steps - 1);
	if (state.delta)
		state.delta += l.inputs * l.batch * (l.steps - 1);

	l.output += l.outputs * l.batch * (l.steps - 1);
	l.cell_cpu += l.outputs * l.batch * (l.steps - 1);
	l.delta += l.outputs * l.batch * (l.steps - 1);

	for (i = l.steps - 1; i >= 0; --i) {
		if (i != 0)
			copy_cpu(l.outputs * l.batch, l.cell_cpu - l.outputs * l.batch, 1,
					l.prev_cell_cpu, 1);
		copy_cpu(l.outputs * l.batch, l.cell_cpu, 1, l.c_cpu, 1);
		if (i != 0)
			copy_cpu(l.outputs * l.batch, l.output - l.outputs * l.batch, 1,
					l.prev_state_cpu, 1);
		copy_cpu(l.outputs * l.batch, l.output, 1, l.h_cpu, 1);

		l.dh_cpu = (i == 0) ? 0 : l.delta - l.outputs * l.batch;

		copy_cpu(l.outputs * l.batch, wf.output, 1, l.f_cpu, 1);
		axpy_cpu(l.outputs * l.batch, 1, uf.output, 1, l.f_cpu, 1);

		copy_cpu(l.outputs * l.batch, wi.output, 1, l.i_cpu, 1);
		axpy_cpu(l.outputs * l.batch, 1, ui.output, 1, l.i_cpu, 1);

		copy_cpu(l.outputs * l.batch, wg.output, 1, l.g_cpu, 1);
		axpy_cpu(l.outputs * l.batch, 1, ug.output, 1, l.g_cpu, 1);

		copy_cpu(l.outputs * l.batch, wo.output, 1, l.o_cpu, 1);
		axpy_cpu(l.outputs * l.batch, 1, uo.output, 1, l.o_cpu, 1);

		activate_array(l.f_cpu, l.outputs * l.batch, LOGISTIC);
		activate_array(l.i_cpu, l.outputs * l.batch, LOGISTIC);
		activate_array(l.g_cpu, l.outputs * l.batch, TANH);
		activate_array(l.o_cpu, l.outputs * l.batch, LOGISTIC);

		copy_cpu(l.outputs * l.batch, l.delta, 1, l.temp3_cpu, 1);

		copy_cpu(l.outputs * l.batch, l.c_cpu, 1, l.temp_cpu, 1);
		activate_array(l.temp_cpu, l.outputs * l.batch, TANH);

		copy_cpu(l.outputs * l.batch, l.temp3_cpu, 1, l.temp2_cpu, 1);
		mul_cpu(l.outputs * l.batch, l.o_cpu, 1, l.temp2_cpu, 1);

		gradient_array(l.temp_cpu, l.outputs * l.batch, TANH, l.temp2_cpu);
		axpy_cpu(l.outputs * l.batch, 1, l.dc_cpu, 1, l.temp2_cpu, 1);

		copy_cpu(l.outputs * l.batch, l.c_cpu, 1, l.temp_cpu, 1);
		activate_array(l.temp_cpu, l.outputs * l.batch, TANH);
		mul_cpu(l.outputs * l.batch, l.temp3_cpu, 1, l.temp_cpu, 1);
		gradient_array(l.o_cpu, l.outputs * l.batch, LOGISTIC, l.temp_cpu);
		copy_cpu(l.outputs * l.batch, l.temp_cpu, 1, wo.delta, 1);
		s.input = l.prev_state_cpu;
		s.delta = l.dh_cpu;
		backward_connected_layer(wo, s);

		copy_cpu(l.outputs * l.batch, l.temp_cpu, 1, uo.delta, 1);
		s.input = state.input;
		s.delta = state.delta;
		backward_connected_layer(uo, s);

		copy_cpu(l.outputs * l.batch, l.temp2_cpu, 1, l.temp_cpu, 1);
		mul_cpu(l.outputs * l.batch, l.i_cpu, 1, l.temp_cpu, 1);
		gradient_array(l.g_cpu, l.outputs * l.batch, TANH, l.temp_cpu);
		copy_cpu(l.outputs * l.batch, l.temp_cpu, 1, wg.delta, 1);
		s.input = l.prev_state_cpu;
		s.delta = l.dh_cpu;
		backward_connected_layer(wg, s);

		copy_cpu(l.outputs * l.batch, l.temp_cpu, 1, ug.delta, 1);
		s.input = state.input;
		s.delta = state.delta;
		backward_connected_layer(ug, s);

		copy_cpu(l.outputs * l.batch, l.temp2_cpu, 1, l.temp_cpu, 1);
		mul_cpu(l.outputs * l.batch, l.g_cpu, 1, l.temp_cpu, 1);
		gradient_array(l.i_cpu, l.outputs * l.batch, LOGISTIC, l.temp_cpu);
		copy_cpu(l.outputs * l.batch, l.temp_cpu, 1, wi.delta, 1);
		s.input = l.prev_state_cpu;
		s.delta = l.dh_cpu;
		backward_connected_layer(wi, s);

		copy_cpu(l.outputs * l.batch, l.temp_cpu, 1, ui.delta, 1);
		s.input = state.input;
		s.delta = state.delta;
		backward_connected_layer(ui, s);

		copy_cpu(l.outputs * l.batch, l.temp2_cpu, 1, l.temp_cpu, 1);
		mul_cpu(l.outputs * l.batch, l.prev_cell_cpu, 1, l.temp_cpu, 1);
		gradient_array(l.f_cpu, l.outputs * l.batch, LOGISTIC, l.temp_cpu);
		copy_cpu(l.outputs * l.batch, l.temp_cpu, 1, wf.delta, 1);
		s.input = l.prev_state_cpu;
		s.delta = l.dh_cpu;
		backward_connected_layer(wf, s);

		copy_cpu(l.outputs * l.batch, l.temp_cpu, 1, uf.delta, 1);
		s.input = state.input;
		s.delta = state.delta;
		backward_connected_layer(uf, s);

		copy_cpu(l.outputs * l.batch, l.temp2_cpu, 1, l.temp_cpu, 1);
		mul_cpu(l.outputs * l.batch, l.f_cpu, 1, l.temp_cpu, 1);
		copy_cpu(l.outputs * l.batch, l.temp_cpu, 1, l.dc_cpu, 1);

		state.input -= l.inputs * l.batch;
		if (state.delta)
			state.delta -= l.inputs * l.batch;
		l.output -= l.outputs * l.batch;
		l.cell_cpu -= l.outputs * l.batch;
		l.delta -= l.outputs * l.batch;

		increment_layer(&wf, -1);
		increment_layer(&wi, -1);
		increment_layer(&wg, -1);
		increment_layer(&wo, -1);

		increment_layer(&uf, -1);
		increment_layer(&ui, -1);
		increment_layer(&ug, -1);
		increment_layer(&uo, -1);
	}
}