float cuda_mag_array(float *x_gpu, size_t n) { float *temp = calloc(n, sizeof(float)); cuda_pull_array(x_gpu, temp, n); float m = mag_array(temp, n); free(temp); return m; }
image random_unit_vector_image(int w, int h, int c) { image im = make_image(w, h, c); int i; for(i = 0; i < im.w*im.h*im.c; ++i){ im.data[i] = rand_normal(); } float mag = mag_array(im.data, im.w*im.h*im.c); scale_array(im.data, im.w*im.h*im.c, 1./mag); return im; }
void slerp(float *start, float *end, float s, int n, float *out) { float omega = acos(dot_cpu(n, start, 1, end, 1)); float so = sin(omega); fill_cpu(n, 0, out, 1); axpy_cpu(n, sin((1-s)*omega)/so, start, 1, out, 1); axpy_cpu(n, sin(s*omega)/so, end, 1, out, 1); float mag = mag_array(out, n); scale_array(out, n, 1./mag); }
void test_resize(char *filename) { image im = load_image(filename, 0,0, 3); float mag = mag_array(im.data, im.w*im.h*im.c); printf("L2 Norm: %f\n", mag); image gray = grayscale_image(im); image sat2 = copy_image(im); saturate_image(sat2, 2); image sat5 = copy_image(im); saturate_image(sat5, .5); image exp2 = copy_image(im); exposure_image(exp2, 2); image exp5 = copy_image(im); exposure_image(exp5, .5); #ifdef GPU image r = resize_image(im, im.w, im.h); image black = make_image(im.w*2 + 3, im.h*2 + 3, 9); image black2 = make_image(im.w, im.h, 3); float *r_gpu = cuda_make_array(r.data, r.w*r.h*r.c); float *black_gpu = cuda_make_array(black.data, black.w*black.h*black.c); float *black2_gpu = cuda_make_array(black2.data, black2.w*black2.h*black2.c); shortcut_gpu(3, r.w, r.h, 1, r_gpu, black.w, black.h, 3, black_gpu); //flip_image(r); //shortcut_gpu(3, r.w, r.h, 1, r.data, black.w, black.h, 3, black.data); shortcut_gpu(3, black.w, black.h, 3, black_gpu, black2.w, black2.h, 1, black2_gpu); cuda_pull_array(black_gpu, black.data, black.w*black.h*black.c); cuda_pull_array(black2_gpu, black2.data, black2.w*black2.h*black2.c); show_image_layers(black, "Black"); show_image(black2, "Recreate"); #endif show_image(im, "Original"); show_image(gray, "Gray"); show_image(sat2, "Saturation-2"); show_image(sat5, "Saturation-.5"); show_image(exp2, "Exposure-2"); show_image(exp5, "Exposure-.5"); #ifdef OPENCV cvWaitKey(0); #endif }
void forward_region_layer(const region_layer l, network_state state) { int i,j,b,t,n; int size = l.coords + l.classes + 1; memcpy(l.output, state.input, l.outputs*l.batch*sizeof(float)); #ifndef GPU flatten(l.output, l.w*l.h, size*l.n, l.batch, 1); #endif for (b = 0; b < l.batch; ++b){ for(i = 0; i < l.h*l.w*l.n; ++i){ int index = size*i + b*l.outputs; l.output[index + 4] = logistic_activate(l.output[index + 4]); } } #ifndef GPU if (l.softmax_tree){ for (b = 0; b < l.batch; ++b){ for(i = 0; i < l.h*l.w*l.n; ++i){ int index = size*i + b*l.outputs; softmax_tree(l.output + index + 5, 1, 0, 1, l.softmax_tree, l.output + index + 5); } } } else if (l.softmax){ for (b = 0; b < l.batch; ++b){ for(i = 0; i < l.h*l.w*l.n; ++i){ int index = size*i + b*l.outputs; softmax(l.output + index + 5, l.classes, 1, l.output + index + 5, 1); } } } #endif if(!state.train) return; memset(l.delta, 0, l.outputs * l.batch * sizeof(float)); float avg_iou = 0; float recall = 0; float avg_cat = 0; float avg_obj = 0; float avg_anyobj = 0; int count = 0; int class_count = 0; *(l.cost) = 0; for (b = 0; b < l.batch; ++b) { if(l.softmax_tree){ int onlyclass_id = 0; for(t = 0; t < l.max_boxes; ++t){ box truth = float_to_box(state.truth + t*5 + b*l.truths); if(!truth.x) break; // continue; int class_id = state.truth[t*5 + b*l.truths + 4]; float maxp = 0; int maxi = 0; if(truth.x > 100000 && truth.y > 100000){ for(n = 0; n < l.n*l.w*l.h; ++n){ int index = size*n + b*l.outputs + 5; float scale = l.output[index-1]; float p = scale*get_hierarchy_probability(l.output + index, l.softmax_tree, class_id); if(p > maxp){ maxp = p; maxi = n; } } int index = size*maxi + b*l.outputs + 5; delta_region_class(l.output, l.delta, index, class_id, l.classes, l.softmax_tree, l.class_scale, &avg_cat, l.focal_loss); ++class_count; onlyclass_id = 1; break; } } if(onlyclass_id) continue; } for (j = 0; j < l.h; ++j) { for (i = 0; i < l.w; ++i) { for (n = 0; n < l.n; ++n) { int index = size*(j*l.w*l.n + i*l.n + n) + b*l.outputs; box pred = get_region_box(l.output, l.biases, n, index, i, j, l.w, l.h); float best_iou = 0; int best_class_id = -1; for(t = 0; t < l.max_boxes; ++t){ box truth = float_to_box(state.truth + t*5 + b*l.truths); int class_id = state.truth[t * 5 + b*l.truths + 4]; if (class_id >= l.classes) continue; // if label contains class_id more than number of classes in the cfg-file if(!truth.x) break; // continue; float iou = box_iou(pred, truth); if (iou > best_iou) { best_class_id = state.truth[t*5 + b*l.truths + 4]; best_iou = iou; } } avg_anyobj += l.output[index + 4]; l.delta[index + 4] = l.noobject_scale * ((0 - l.output[index + 4]) * logistic_gradient(l.output[index + 4])); if(l.classfix == -1) l.delta[index + 4] = l.noobject_scale * ((best_iou - l.output[index + 4]) * logistic_gradient(l.output[index + 4])); else{ if (best_iou > l.thresh) { l.delta[index + 4] = 0; if(l.classfix > 0){ delta_region_class(l.output, l.delta, index + 5, best_class_id, l.classes, l.softmax_tree, l.class_scale*(l.classfix == 2 ? l.output[index + 4] : 1), &avg_cat, l.focal_loss); ++class_count; } } } if(*(state.net.seen) < 12800){ box truth = {0}; truth.x = (i + .5)/l.w; truth.y = (j + .5)/l.h; truth.w = l.biases[2*n]; truth.h = l.biases[2*n+1]; if(DOABS){ truth.w = l.biases[2*n]/l.w; truth.h = l.biases[2*n+1]/l.h; } delta_region_box(truth, l.output, l.biases, n, index, i, j, l.w, l.h, l.delta, .01); } } } } for(t = 0; t < l.max_boxes; ++t){ box truth = float_to_box(state.truth + t*5 + b*l.truths); int class_id = state.truth[t * 5 + b*l.truths + 4]; if (class_id >= l.classes) { printf(" Warning: in txt-labels class_id=%d >= classes=%d in cfg-file. In txt-labels class_id should be [from 0 to %d] \n", class_id, l.classes, l.classes-1); getchar(); continue; // if label contains class_id more than number of classes in the cfg-file } if(!truth.x) break; // continue; float best_iou = 0; int best_index = 0; int best_n = 0; i = (truth.x * l.w); j = (truth.y * l.h); //printf("%d %f %d %f\n", i, truth.x*l.w, j, truth.y*l.h); box truth_shift = truth; truth_shift.x = 0; truth_shift.y = 0; //printf("index %d %d\n",i, j); for(n = 0; n < l.n; ++n){ int index = size*(j*l.w*l.n + i*l.n + n) + b*l.outputs; box pred = get_region_box(l.output, l.biases, n, index, i, j, l.w, l.h); if(l.bias_match){ pred.w = l.biases[2*n]; pred.h = l.biases[2*n+1]; if(DOABS){ pred.w = l.biases[2*n]/l.w; pred.h = l.biases[2*n+1]/l.h; } } //printf("pred: (%f, %f) %f x %f\n", pred.x, pred.y, pred.w, pred.h); pred.x = 0; pred.y = 0; float iou = box_iou(pred, truth_shift); if (iou > best_iou){ best_index = index; best_iou = iou; best_n = n; } } //printf("%d %f (%f, %f) %f x %f\n", best_n, best_iou, truth.x, truth.y, truth.w, truth.h); float iou = delta_region_box(truth, l.output, l.biases, best_n, best_index, i, j, l.w, l.h, l.delta, l.coord_scale); if(iou > .5) recall += 1; avg_iou += iou; //l.delta[best_index + 4] = iou - l.output[best_index + 4]; avg_obj += l.output[best_index + 4]; l.delta[best_index + 4] = l.object_scale * (1 - l.output[best_index + 4]) * logistic_gradient(l.output[best_index + 4]); if (l.rescore) { l.delta[best_index + 4] = l.object_scale * (iou - l.output[best_index + 4]) * logistic_gradient(l.output[best_index + 4]); } if (l.map) class_id = l.map[class_id]; delta_region_class(l.output, l.delta, best_index + 5, class_id, l.classes, l.softmax_tree, l.class_scale, &avg_cat, l.focal_loss); ++count; ++class_count; } } //printf("\n"); #ifndef GPU flatten(l.delta, l.w*l.h, size*l.n, l.batch, 0); #endif *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); printf("Region Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, Avg Recall: %f, count: %d\n", avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l.w*l.h*l.n*l.batch), recall/count, count); }
void forward_detection_layer(const detection_layer l, network_state state) { int locations = l.side*l.side; int i,j; memcpy(l.output, state.input, l.outputs*l.batch*sizeof(float)); int b; if (l.softmax){ for(b = 0; b < l.batch; ++b){ int index = b*l.inputs; for (i = 0; i < locations; ++i) { int offset = i*l.classes; softmax_array(l.output + index + offset, l.classes, 1, l.output + index + offset); } } } if(state.train){ float avg_iou = 0; float avg_cat = 0; float avg_allcat = 0; float avg_obj = 0; float avg_anyobj = 0; int count = 0; *(l.cost) = 0; int size = l.inputs * l.batch; memset(l.delta, 0, size * sizeof(float)); for (b = 0; b < l.batch; ++b){ int index = b*l.inputs; for (i = 0; i < locations; ++i) { int truth_index = (b*locations + i)*(1+l.coords+l.classes); int is_obj = state.truth[truth_index]; for (j = 0; j < l.n; ++j) { int p_index = index + locations*l.classes + i*l.n + j; l.delta[p_index] = l.noobject_scale*(0 - l.output[p_index]); *(l.cost) += l.noobject_scale*pow(l.output[p_index], 2); avg_anyobj += l.output[p_index]; } int best_index = -1; float best_iou = 0; float best_rmse = 20; if (!is_obj){ continue; } int class_index = index + i*l.classes; for(j = 0; j < l.classes; ++j) { l.delta[class_index+j] = l.class_scale * (state.truth[truth_index+1+j] - l.output[class_index+j]); *(l.cost) += l.class_scale * pow(state.truth[truth_index+1+j] - l.output[class_index+j], 2); if(state.truth[truth_index + 1 + j]) avg_cat += l.output[class_index+j]; avg_allcat += l.output[class_index+j]; } box truth = float_to_box(state.truth + truth_index + 1 + l.classes); truth.x /= l.side; truth.y /= l.side; for(j = 0; j < l.n; ++j){ int box_index = index + locations*(l.classes + l.n) + (i*l.n + j) * l.coords; box out = float_to_box(l.output + box_index); out.x /= l.side; out.y /= l.side; if (l.sqrt){ out.w = out.w*out.w; out.h = out.h*out.h; } float iou = box_iou(out, truth); //iou = 0; float rmse = box_rmse(out, truth); if(best_iou > 0 || iou > 0){ if(iou > best_iou){ best_iou = iou; best_index = j; } }else{ if(rmse < best_rmse){ best_rmse = rmse; best_index = j; } } } if(l.forced){ if(truth.w*truth.h < .1){ best_index = 1; }else{ best_index = 0; } } if(l.random && *(state.net.seen) < 64000){ best_index = rand()%l.n; } int box_index = index + locations*(l.classes + l.n) + (i*l.n + best_index) * l.coords; int tbox_index = truth_index + 1 + l.classes; box out = float_to_box(l.output + box_index); out.x /= l.side; out.y /= l.side; if (l.sqrt) { out.w = out.w*out.w; out.h = out.h*out.h; } float iou = box_iou(out, truth); //printf("%d,", best_index); int p_index = index + locations*l.classes + i*l.n + best_index; *(l.cost) -= l.noobject_scale * pow(l.output[p_index], 2); *(l.cost) += l.object_scale * pow(1-l.output[p_index], 2); avg_obj += l.output[p_index]; l.delta[p_index] = l.object_scale * (1.-l.output[p_index]); if(l.rescore){ l.delta[p_index] = l.object_scale * (iou - l.output[p_index]); } l.delta[box_index+0] = l.coord_scale*(state.truth[tbox_index + 0] - l.output[box_index + 0]); l.delta[box_index+1] = l.coord_scale*(state.truth[tbox_index + 1] - l.output[box_index + 1]); l.delta[box_index+2] = l.coord_scale*(state.truth[tbox_index + 2] - l.output[box_index + 2]); l.delta[box_index+3] = l.coord_scale*(state.truth[tbox_index + 3] - l.output[box_index + 3]); if(l.sqrt){ l.delta[box_index+2] = l.coord_scale*(sqrt(state.truth[tbox_index + 2]) - l.output[box_index + 2]); l.delta[box_index+3] = l.coord_scale*(sqrt(state.truth[tbox_index + 3]) - l.output[box_index + 3]); } *(l.cost) += pow(1-iou, 2); avg_iou += iou; ++count; } } if(0){ float *costs = calloc(l.batch*locations*l.n, sizeof(float)); for (b = 0; b < l.batch; ++b) { int index = b*l.inputs; for (i = 0; i < locations; ++i) { for (j = 0; j < l.n; ++j) { int p_index = index + locations*l.classes + i*l.n + j; costs[b*locations*l.n + i*l.n + j] = l.delta[p_index]*l.delta[p_index]; } } } int indexes[100]; top_k(costs, l.batch*locations*l.n, 100, indexes); float cutoff = costs[indexes[99]]; for (b = 0; b < l.batch; ++b) { int index = b*l.inputs; for (i = 0; i < locations; ++i) { for (j = 0; j < l.n; ++j) { int p_index = index + locations*l.classes + i*l.n + j; if (l.delta[p_index]*l.delta[p_index] < cutoff) l.delta[p_index] = 0; } } } free(costs); } *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); if ( l.b_debug ) { printf("Detection Avg IOU: %f, Pos Cat: %f, All Cat: %f, Pos Obj: %f, Any Obj: %f, count: %d\n", avg_iou/count, avg_cat/count, avg_allcat/(count*l.classes), avg_obj/count, avg_anyobj/(l.batch*locations*l.n), count); } } }
void train_dcgan(char *cfg, char *weight, char *acfg, char *aweight, int clear, int display, char *train_images, int maxbatch) { #ifdef GPU char *backup_directory = "/home/kunle12/backup/"; srand(time(0)); char *base = basecfg(cfg); char *abase = basecfg(acfg); printf("%s\n", base); network *gnet = load_network(cfg, weight, clear); network *anet = load_network(acfg, aweight, clear); //float orig_rate = anet->learning_rate; int i, j, k; layer imlayer = {0}; for (i = 0; i < gnet->n; ++i) { if (gnet->layers[i].out_c == 3) { imlayer = gnet->layers[i]; break; } } printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", gnet->learning_rate, gnet->momentum, gnet->decay); int imgs = gnet->batch*gnet->subdivisions; i = *gnet->seen/imgs; data train, buffer; list *plist = get_paths(train_images); //int N = plist->size; char **paths = (char **)list_to_array(plist); load_args args= get_base_args(anet); args.paths = paths; args.n = imgs; args.m = plist->size; args.d = &buffer; args.type = CLASSIFICATION_DATA; args.threads=16; args.classes = 1; char *ls[2] = {"imagenet", "zzzzzzzz"}; args.labels = ls; pthread_t load_thread = load_data_in_thread(args); clock_t time; gnet->train = 1; anet->train = 1; int x_size = gnet->inputs*gnet->batch; int y_size = gnet->truths*gnet->batch; float *imerror = cuda_make_array(0, y_size); //int ay_size = anet->truths*anet->batch; float aloss_avg = -1; //data generated = copy_data(train); if (maxbatch == 0) maxbatch = gnet->max_batches; while (get_current_batch(gnet) < maxbatch) { i += 1; time=clock(); pthread_join(load_thread, 0); train = buffer; //translate_data_rows(train, -.5); //scale_data_rows(train, 2); load_thread = load_data_in_thread(args); printf("Loaded: %lf seconds\n", sec(clock()-time)); data gen = copy_data(train); for (j = 0; j < imgs; ++j) { train.y.vals[j][0] = 1; gen.y.vals[j][0] = 0; } time=clock(); for(j = 0; j < gnet->subdivisions; ++j){ get_next_batch(train, gnet->batch, j*gnet->batch, gnet->truth, 0); int z; for(z = 0; z < x_size; ++z){ gnet->input[z] = rand_normal(); } for(z = 0; z < gnet->batch; ++z){ float mag = mag_array(gnet->input + z*gnet->inputs, gnet->inputs); scale_array(gnet->input + z*gnet->inputs, gnet->inputs, 1./mag); } /* for(z = 0; z < 100; ++z){ printf("%f, ", gnet->input[z]); } printf("\n"); printf("input: %f %f\n", mean_array(gnet->input, x_size), variance_array(gnet->input, x_size)); */ //cuda_push_array(gnet->input_gpu, gnet->input, x_size); //cuda_push_array(gnet->truth_gpu, gnet->truth, y_size); *gnet->seen += gnet->batch; forward_network(gnet); fill_gpu(imlayer.outputs*imlayer.batch, 0, imerror, 1); fill_cpu(anet->truths*anet->batch, 1, anet->truth, 1); copy_cpu(anet->inputs*anet->batch, imlayer.output, 1, anet->input, 1); anet->delta_gpu = imerror; forward_network(anet); backward_network(anet); //float genaloss = *anet->cost / anet->batch; //printf("%f\n", genaloss); scal_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1); scal_gpu(imlayer.outputs*imlayer.batch, 0, gnet->layers[gnet->n-1].delta_gpu, 1); //printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs*imlayer.batch)); //printf("features %f\n", cuda_mag_array(gnet->layers[gnet->n-1].delta_gpu, imlayer.outputs*imlayer.batch)); axpy_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1, gnet->layers[gnet->n-1].delta_gpu, 1); backward_network(gnet); /* for(k = 0; k < gnet->n; ++k){ layer l = gnet->layers[k]; cuda_pull_array(l.output_gpu, l.output, l.outputs*l.batch); printf("%d: %f %f\n", k, mean_array(l.output, l.outputs*l.batch), variance_array(l.output, l.outputs*l.batch)); } */ for(k = 0; k < gnet->batch; ++k){ int index = j*gnet->batch + k; copy_cpu(gnet->outputs, gnet->output + k*gnet->outputs, 1, gen.X.vals[index], 1); } } harmless_update_network_gpu(anet); data merge = concat_data(train, gen); //randomize_data(merge); float aloss = train_network(anet, merge); //translate_image(im, 1); //scale_image(im, .5); //translate_image(im2, 1); //scale_image(im2, .5); #ifdef OPENCV if(display){ image im = float_to_image(anet->w, anet->h, anet->c, gen.X.vals[0]); image im2 = float_to_image(anet->w, anet->h, anet->c, train.X.vals[0]); show_image(im, "gen", 1); show_image(im2, "train", 1); save_image(im, "gen"); save_image(im2, "train"); } #endif /* if(aloss < .1){ anet->learning_rate = 0; } else if (aloss > .3){ anet->learning_rate = orig_rate; } */ update_network_gpu(gnet); free_data(merge); free_data(train); free_data(gen); if (aloss_avg < 0) aloss_avg = aloss; aloss_avg = aloss_avg*.9 + aloss*.1; printf("%d: adv: %f | adv_avg: %f, %f rate, %lf seconds, %d images\n", i, aloss, aloss_avg, get_current_rate(gnet), sec(clock()-time), i*imgs); if(i%10000==0){ char buff[256]; sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); save_weights(gnet, buff); sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); save_weights(anet, buff); } if(i%1000==0){ char buff[256]; sprintf(buff, "%s/%s.backup", backup_directory, base); save_weights(gnet, buff); sprintf(buff, "%s/%s.backup", backup_directory, abase); save_weights(anet, buff); } } char buff[256]; sprintf(buff, "%s/%s_final.weights", backup_directory, base); save_weights(gnet, buff); #endif free_network(gnet); free_network(anet); }
void forward_iseg_layer(const layer l, network net) { double time = what_time_is_it_now(); int i, b, j, k; int ids = l.extra; memcpy(l.output, net.input, l.outputs * l.batch * sizeof(real_t)); memset(l.delta, 0, l.outputs * l.batch * sizeof(real_t)); #ifndef GPU for (b = 0; b < l.batch; ++b) { int index = b * l.outputs; activate_array(l.output + index, l.classes * l.w * l.h, LOGISTIC); } #endif for (b = 0; b < l.batch; ++b) { // a priori, each pixel has no class for (i = 0; i < l.classes; ++i) { for (k = 0; k < l.w * l.h; ++k) { int index = b * l.outputs + i * l.w * l.h + k; l.delta[index] = 0 - l.output[index]; } } // a priori, embedding should be small magnitude for (i = 0; i < ids; ++i) { for (k = 0; k < l.w * l.h; ++k) { int index = b * l.outputs + (i + l.classes) * l.w * l.h + k; l.delta[index] = .1 * (0 - l.output[index]); } } memset(l.counts, 0, 90 * sizeof(int)); for (i = 0; i < 90; ++i) { fill_cpu(ids, 0, l.sums[i], 1); int c = net.truth[b * l.truths + i * (l.w * l.h + 1)]; if (c < 0) break; // add up metric embeddings for each instance for (k = 0; k < l.w * l.h; ++k) { int index = b * l.outputs + c * l.w * l.h + k; real_t v = net.truth[b * l.truths + i * (l.w * l.h + 1) + 1 + k]; if (v) { l.delta[index] = v - l.output[index]; axpy_cpu(ids, 1, l.output + b * l.outputs + l.classes * l.w * l.h + k, l.w * l.h, l.sums[i], 1); ++l.counts[i]; } } } real_t *mse = calloc(90, sizeof(real_t)); for (i = 0; i < 90; ++i) { int c = net.truth[b * l.truths + i * (l.w * l.h + 1)]; if (c < 0) break; for (k = 0; k < l.w * l.h; ++k) { real_t v = net.truth[b * l.truths + i * (l.w * l.h + 1) + 1 + k]; if (v) { int z; real_t sum = 0; for (z = 0; z < ids; ++z) { int index = b * l.outputs + (l.classes + z) * l.w * l.h + k; sum += pow(l.sums[i][z] / l.counts[i] - l.output[index], 2); } mse[i] += sum; } } mse[i] /= l.counts[i]; } // Calculate average embedding for (i = 0; i < 90; ++i) { if (!l.counts[i]) continue; scal_cpu(ids, 1.f / l.counts[i], l.sums[i], 1); if (b == 0 && net.gpu_index == 0) { printf("%4d, %6.3f, ", l.counts[i], mse[i]); for (j = 0; j < ids; ++j) { printf("%6.3f,", l.sums[i][j]); } printf("\n"); } } free(mse); // Calculate embedding loss for (i = 0; i < 90; ++i) { if (!l.counts[i]) continue; for (k = 0; k < l.w * l.h; ++k) { real_t v = net.truth[b * l.truths + i * (l.w * l.h + 1) + 1 + k]; if (v) { for (j = 0; j < 90; ++j) { if (!l.counts[j]) continue; int z; for (z = 0; z < ids; ++z) { int index = b * l.outputs + (l.classes + z) * l.w * l.h + k; real_t diff = l.sums[j][z] - l.output[index]; if (j == i) l.delta[index] += diff < 0 ? -.1 : .1; else l.delta[index] += -(diff < 0 ? -.1 : .1); } } } } } for (i = 0; i < ids; ++i) { for (k = 0; k < l.w * l.h; ++k) { int index = b * l.outputs + (i + l.classes) * l.w * l.h + k; l.delta[index] *= .01; } } } *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); printf("took %lf sec\n", what_time_is_it_now() - time); }