layer make_rnn_layer(int batch, int inputs, int hidden, int outputs, int steps, ACTIVATION activation, int batch_normalize, int log) { fprintf(stderr, "RNN Layer: %d inputs, %d outputs\n", inputs, outputs); batch = batch / steps; layer l = { 0 }; l.batch = batch; l.type = RNN; l.steps = steps; l.hidden = hidden; l.inputs = inputs; l.state = calloc(batch * hidden * (steps + 1), sizeof(float)); l.input_layer = malloc(sizeof(layer)); fprintf(stderr, "\t\t"); *(l.input_layer) = make_connected_layer(batch * steps, inputs, hidden, activation, batch_normalize); l.input_layer->batch = batch; l.self_layer = malloc(sizeof(layer)); fprintf(stderr, "\t\t"); *(l.self_layer) = make_connected_layer(batch * steps, hidden, hidden, (log == 2) ? LOGGY : (log == 1 ? LOGISTIC : activation), batch_normalize); l.self_layer->batch = batch; l.output_layer = malloc(sizeof(layer)); fprintf(stderr, "\t\t"); *(l.output_layer) = make_connected_layer(batch * steps, hidden, outputs, activation, batch_normalize); l.output_layer->batch = batch; l.outputs = outputs; l.output = l.output_layer->output; l.delta = l.output_layer->delta; #ifdef GPU l.state_gpu = cuda_make_array(l.state, batch*hidden*(steps+1)); l.output_gpu = l.output_layer->output_gpu; l.delta_gpu = l.output_layer->delta_gpu; #endif return l; }
layer parse_connected(list *options, size_params params) { int output = option_find_int(options, "output",1); char *activation_s = option_find_str(options, "activation", "logistic"); ACTIVATION activation = get_activation(activation_s); int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); layer l = make_connected_layer(params.batch, params.inputs, output, activation, batch_normalize, params.net->adam); return l; }
connected_layer parse_connected(list *options, size_params params) { int output = option_find_int(options, "output",1); char *activation_s = option_find_str(options, "activation", "logistic"); ACTIVATION activation = get_activation(activation_s); connected_layer layer = make_connected_layer(params.batch, params.inputs, output, activation); char *weights = option_find_str(options, "weights", 0); char *biases = option_find_str(options, "biases", 0); parse_data(biases, layer.biases, output); parse_data(weights, layer.weights, params.inputs*output); #ifdef GPU if(weights || biases) push_connected_layer(layer); #endif return layer; }
layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_normalize, int adam) { fprintf(stderr, "LSTM Layer: %d inputs, %d outputs\n", inputs, outputs); batch = batch / steps; layer l = { 0 }; l.batch = batch; l.type = LSTM; l.steps = steps; l.inputs = inputs; l.uf = malloc(sizeof(layer)); fprintf(stderr, "\t\t"); *(l.uf) = make_connected_layer(batch * steps, inputs, outputs, LINEAR, batch_normalize, adam); l.uf->batch = batch; l.ui = malloc(sizeof(layer)); fprintf(stderr, "\t\t"); *(l.ui) = make_connected_layer(batch * steps, inputs, outputs, LINEAR, batch_normalize, adam); l.ui->batch = batch; l.ug = malloc(sizeof(layer)); fprintf(stderr, "\t\t"); *(l.ug) = make_connected_layer(batch * steps, inputs, outputs, LINEAR, batch_normalize, adam); l.ug->batch = batch; l.uo = malloc(sizeof(layer)); fprintf(stderr, "\t\t"); *(l.uo) = make_connected_layer(batch * steps, inputs, outputs, LINEAR, batch_normalize, adam); l.uo->batch = batch; l.wf = malloc(sizeof(layer)); fprintf(stderr, "\t\t"); *(l.wf) = make_connected_layer(batch * steps, outputs, outputs, LINEAR, batch_normalize, adam); l.wf->batch = batch; l.wi = malloc(sizeof(layer)); fprintf(stderr, "\t\t"); *(l.wi) = make_connected_layer(batch * steps, outputs, outputs, LINEAR, batch_normalize, adam); l.wi->batch = batch; l.wg = malloc(sizeof(layer)); fprintf(stderr, "\t\t"); *(l.wg) = make_connected_layer(batch * steps, outputs, outputs, LINEAR, batch_normalize, adam); l.wg->batch = batch; l.wo = malloc(sizeof(layer)); fprintf(stderr, "\t\t"); *(l.wo) = make_connected_layer(batch * steps, outputs, outputs, LINEAR, batch_normalize, adam); l.wo->batch = batch; l.batch_normalize = batch_normalize; l.outputs = outputs; l.output = calloc(outputs * batch * steps, sizeof(real_t)); l.state = calloc(outputs * batch, sizeof(real_t)); l.forward = forward_lstm_layer; l.update = update_lstm_layer; l.prev_state_cpu = calloc(batch * outputs, sizeof(real_t)); l.prev_cell_cpu = calloc(batch * outputs, sizeof(real_t)); l.cell_cpu = calloc(batch * outputs * steps, sizeof(real_t)); l.f_cpu = calloc(batch * outputs, sizeof(real_t)); l.i_cpu = calloc(batch * outputs, sizeof(real_t)); l.g_cpu = calloc(batch * outputs, sizeof(real_t)); l.o_cpu = calloc(batch * outputs, sizeof(real_t)); l.c_cpu = calloc(batch * outputs, sizeof(real_t)); l.h_cpu = calloc(batch * outputs, sizeof(real_t)); l.temp_cpu = calloc(batch * outputs, sizeof(real_t)); l.temp2_cpu = calloc(batch * outputs, sizeof(real_t)); l.temp3_cpu = calloc(batch * outputs, sizeof(real_t)); l.dc_cpu = calloc(batch * outputs, sizeof(real_t)); l.dh_cpu = calloc(batch * outputs, sizeof(real_t)); #ifdef GPU l.forward_gpu = forward_lstm_layer_gpu; l.backward_gpu = backward_lstm_layer_gpu; l.update_gpu = update_lstm_layer_gpu; l.output_gpu = cuda_make_array(0, batch * outputs * steps); l.delta_gpu = cuda_make_array(0, batch * l.outputs * steps); l.prev_state_gpu = cuda_make_array(0, batch * outputs); l.prev_cell_gpu = cuda_make_array(0, batch * outputs); l.cell_gpu = cuda_make_array(0, batch * outputs * steps); l.f_gpu = cuda_make_array(0, batch * outputs); l.i_gpu = cuda_make_array(0, batch * outputs); l.g_gpu = cuda_make_array(0, batch * outputs); l.o_gpu = cuda_make_array(0, batch * outputs); l.c_gpu = cuda_make_array(0, batch * outputs); l.h_gpu = cuda_make_array(0, batch * outputs); l.temp_gpu = cuda_make_array(0, batch * outputs); l.temp2_gpu = cuda_make_array(0, batch * outputs); l.temp3_gpu = cuda_make_array(0, batch * outputs); l.dc_gpu = cuda_make_array(0, batch * outputs); l.dh_gpu = cuda_make_array(0, batch * outputs); #ifdef CUDNN cudnnSetTensor4dDescriptor(l.wf->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wf->out_c, l.wf->out_h, l.wf->out_w); cudnnSetTensor4dDescriptor(l.wi->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wi->out_c, l.wi->out_h, l.wi->out_w); cudnnSetTensor4dDescriptor(l.wg->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wg->out_c, l.wg->out_h, l.wg->out_w); cudnnSetTensor4dDescriptor(l.wo->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wo->out_c, l.wo->out_h, l.wo->out_w); cudnnSetTensor4dDescriptor(l.uf->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.uf->out_c, l.uf->out_h, l.uf->out_w); cudnnSetTensor4dDescriptor(l.ui->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.ui->out_c, l.ui->out_h, l.ui->out_w); cudnnSetTensor4dDescriptor(l.ug->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.ug->out_c, l.ug->out_h, l.ug->out_w); cudnnSetTensor4dDescriptor(l.uo->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.uo->out_c, l.uo->out_h, l.uo->out_w); #endif #endif return l; }