void forward_cost_layer_gpu(cost_layer l, network_state state) { if (!state.truth) return; if (l.cost_type == MASKED) { mask_ongpu(l.batch*l.inputs, state.input, SECRET_NUM, state.truth); } if(l.cost_type == SMOOTH){ smooth_l1_gpu(l.batch*l.inputs, state.input, state.truth, l.delta_gpu); } else { copy_ongpu(l.batch*l.inputs, state.truth, 1, l.delta_gpu, 1); axpy_ongpu(l.batch*l.inputs, -1, state.input, 1, l.delta_gpu, 1); } cuda_pull_array(l.delta_gpu, l.delta, l.batch*l.inputs); *(l.output) = dot_cpu(l.batch*l.inputs, l.delta, 1, l.delta, 1); }
void forward_cost_layer_gpu(cost_layer l, network *net) { if (!net->truth) return; if(l.smooth){ scal_gpu(l.batch*l.inputs, (1-l.smooth), net->truth_gpu, 1); add_gpu(l.batch*l.inputs, l.smooth * 1./l.inputs, net->truth_gpu, 1); } if(l.cost_type == SMOOTH){ smooth_l1_gpu(l.batch*l.inputs, net->input_gpu, net->truth_gpu, l.delta_gpu, l.output_gpu); } else if (l.cost_type == L1){ l1_gpu(l.batch*l.inputs, net->input_gpu, net->truth_gpu, l.delta_gpu, l.output_gpu); } else if (l.cost_type == WGAN){ wgan_gpu(l.batch*l.inputs, net->input_gpu, net->truth_gpu, l.delta_gpu, l.output_gpu); } else { l2_gpu(l.batch*l.inputs, net->input_gpu, net->truth_gpu, l.delta_gpu, l.output_gpu); } if (l.cost_type == SEG && l.noobject_scale != 1) { scale_mask_gpu(l.batch*l.inputs, l.delta_gpu, 0, net->truth_gpu, l.noobject_scale); scale_mask_gpu(l.batch*l.inputs, l.output_gpu, 0, net->truth_gpu, l.noobject_scale); } if (l.cost_type == MASKED) { mask_gpu(l.batch*l.inputs, net->delta_gpu, SECRET_NUM, net->truth_gpu, 0); } if(l.ratio){ cuda_pull_array(l.delta_gpu, l.delta, l.batch*l.inputs); qsort(l.delta, l.batch*l.inputs, sizeof(float), float_abs_compare); int n = (1-l.ratio) * l.batch*l.inputs; float thresh = l.delta[n]; thresh = 0; printf("%f\n", thresh); supp_gpu(l.batch*l.inputs, thresh, l.delta_gpu, 1); } if(l.thresh){ supp_gpu(l.batch*l.inputs, l.thresh*1./l.inputs, l.delta_gpu, 1); } cuda_pull_array(l.output_gpu, l.output, l.batch*l.inputs); l.cost[0] = sum_array(l.output, l.batch*l.inputs); }
void forward_cost_layer_gpu(cost_layer l, network_state state) { if (!state.truth) return; if(l.smooth){ scal_ongpu(l.batch*l.inputs, (1-l.smooth), state.truth, 1); add_ongpu(l.batch*l.inputs, l.smooth * 1./l.inputs, state.truth, 1); } if (l.cost_type == MASKED) { mask_ongpu(l.batch*l.inputs, state.input, SECRET_NUM, state.truth); } if(l.cost_type == SMOOTH){ smooth_l1_gpu(l.batch*l.inputs, state.input, state.truth, l.delta_gpu, l.output_gpu); } else if (l.cost_type == L1){ l1_gpu(l.batch*l.inputs, state.input, state.truth, l.delta_gpu, l.output_gpu); } else { l2_gpu(l.batch*l.inputs, state.input, state.truth, l.delta_gpu, l.output_gpu); } if(l.ratio){ cuda_pull_array(l.delta_gpu, l.delta, l.batch*l.inputs); qsort(l.delta, l.batch*l.inputs, sizeof(float), float_abs_compare); int n = (1-l.ratio) * l.batch*l.inputs; float thresh = l.delta[n]; thresh = 0; printf("%f\n", thresh); supp_ongpu(l.batch*l.inputs, thresh, l.delta_gpu, 1); } if(l.thresh){ supp_ongpu(l.batch*l.inputs, l.thresh*1./l.inputs, l.delta_gpu, 1); } cuda_pull_array(l.output_gpu, l.output, l.batch*l.inputs); l.cost[0] = sum_array(l.output, l.batch*l.inputs); }