void forward_detection_layer_gpu(const detection_layer l, network_state state) { if(!state.train){ copy_ongpu(l.batch*l.inputs, state.input, 1, l.output_gpu, 1); return; } float *in_cpu = calloc(l.batch*l.inputs, sizeof(float)); float *truth_cpu = 0; if(state.truth){ int num_truth = l.batch*l.side*l.side*(1+l.coords+l.classes); truth_cpu = calloc(num_truth, sizeof(float)); cuda_pull_array(state.truth, truth_cpu, num_truth); } cuda_pull_array(state.input, in_cpu, l.batch*l.inputs); network_state cpu_state = state; cpu_state.train = state.train; cpu_state.truth = truth_cpu; cpu_state.input = in_cpu; forward_detection_layer(l, cpu_state); cuda_push_array(l.output_gpu, l.output, l.batch*l.outputs); cuda_push_array(l.delta_gpu, l.delta, l.batch*l.inputs); free(cpu_state.input); if(cpu_state.truth) free(cpu_state.truth); }
void forward_network(network net, network_state state) { int i; for(i = 0; i < net.n; ++i){ layer l = net.layers[i]; if(l.type == CONVOLUTIONAL){ forward_convolutional_layer(l, state); } else if(l.type == DECONVOLUTIONAL){ forward_deconvolutional_layer(l, state); } else if(l.type == DETECTION){ forward_detection_layer(l, state); } else if(l.type == CONNECTED){ forward_connected_layer(l, state); } else if(l.type == CROP){ forward_crop_layer(l, state); } else if(l.type == COST){ forward_cost_layer(l, state); } else if(l.type == SOFTMAX){ forward_softmax_layer(l, state); } else if(l.type == MAXPOOL){ forward_maxpool_layer(l, state); } else if(l.type == DROPOUT){ forward_dropout_layer(l, state); } else if(l.type == ROUTE){ forward_route_layer(l, net); } state.input = l.output; } }
void forward_network(network net, network_state state) { state.workspace = net.workspace; int i; for(i = 0; i < net.n; ++i){ state.index = i; layer l = net.layers[i]; if(l.delta){ scal_cpu(l.outputs * l.batch, 0, l.delta, 1); } if(l.type == CONVOLUTIONAL){ forward_convolutional_layer(l, state); } else if(l.type == DECONVOLUTIONAL){ forward_deconvolutional_layer(l, state); } else if(l.type == ACTIVE){ forward_activation_layer(l, state); } else if(l.type == LOCAL){ forward_local_layer(l, state); } else if(l.type == NORMALIZATION){ forward_normalization_layer(l, state); } else if(l.type == BATCHNORM){ forward_batchnorm_layer(l, state); } else if(l.type == DETECTION){ forward_detection_layer(l, state); } else if(l.type == CONNECTED){ forward_connected_layer(l, state); } else if(l.type == RNN){ forward_rnn_layer(l, state); } else if(l.type == GRU){ forward_gru_layer(l, state); } else if(l.type == CRNN){ forward_crnn_layer(l, state); } else if(l.type == CROP){ forward_crop_layer(l, state); } else if(l.type == COST){ forward_cost_layer(l, state); } else if(l.type == SOFTMAX){ forward_softmax_layer(l, state); } else if(l.type == MAXPOOL){ forward_maxpool_layer(l, state); } else if(l.type == AVGPOOL){ forward_avgpool_layer(l, state); } else if(l.type == DROPOUT){ forward_dropout_layer(l, state); } else if(l.type == ROUTE){ forward_route_layer(l, net); } else if(l.type == SHORTCUT){ forward_shortcut_layer(l, state); } state.input = l.output; } }
void forward_detection_layer_gpu(const detection_layer l, network net) { if(!net.train){ copy_gpu(l.batch*l.inputs, net.input_gpu, 1, l.output_gpu, 1); return; } //float *in_cpu = calloc(l.batch*l.inputs, sizeof(float)); //float *truth_cpu = 0; forward_detection_layer(l, net); cuda_push_array(l.output_gpu, l.output, l.batch*l.outputs); cuda_push_array(l.delta_gpu, l.delta, l.batch*l.inputs); }
void forward_detection_layer_gpu(const detection_layer l, network_state state) { int outputs = get_detection_layer_output_size(l); float *in_cpu = calloc(l.batch*l.inputs, sizeof(float)); float *truth_cpu = 0; if(state.truth){ truth_cpu = calloc(l.batch*outputs, sizeof(float)); cuda_pull_array(state.truth, truth_cpu, l.batch*outputs); } cuda_pull_array(state.input, in_cpu, l.batch*l.inputs); network_state cpu_state; cpu_state.train = state.train; cpu_state.truth = truth_cpu; cpu_state.input = in_cpu; forward_detection_layer(l, cpu_state); cuda_push_array(l.output_gpu, l.output, l.batch*outputs); cuda_push_array(l.delta_gpu, l.delta, l.batch*outputs); free(cpu_state.input); if(cpu_state.truth) free(cpu_state.truth); }
void forward_network(network net, network_state state) { int i; for(i = 0; i < net.n; ++i){ layer l = net.layers[i]; if(l.delta){ scal_cpu(l.outputs * l.batch, 0, l.delta, 1); } if(l.type == CONVOLUTIONAL){ forward_convolutional_layer(l, state); } else if(l.type == DECONVOLUTIONAL){ forward_deconvolutional_layer(l, state); } else if(l.type == NORMALIZATION){ forward_normalization_layer(l, state); } else if(l.type == DETECTION){ forward_detection_layer(l, state); } else if(l.type == CONNECTED){ forward_connected_layer(l, state); } else if(l.type == CROP){ forward_crop_layer(l, state); } else if(l.type == COST){ forward_cost_layer(l, state); } else if(l.type == SOFTMAX){ forward_softmax_layer(l, state); } else if(l.type == MAXPOOL){ forward_maxpool_layer(l, state); } else if(l.type == AVGPOOL){ forward_avgpool_layer(l, state); } else if(l.type == DROPOUT){ forward_dropout_layer(l, state); } else if(l.type == ROUTE){ forward_route_layer(l, net); } state.input = l.output; } }