void speed(char *cfgfile, int tics) { if (tics == 0) tics = 1000; network *net = parse_network_cfg(cfgfile); set_batch_network(net, 1); int i; double time=what_time_is_it_now(); image im = make_image(net->w, net->h, net->c*net->batch); for(i = 0; i < tics; ++i){ network_predict(net, im.data); } double t = what_time_is_it_now() - time; long ops = numops(net); printf("\n%d evals, %f Seconds\n", tics, t); printf("Floating Point Operations: %.2f Bn\n", (float)ops/1000000000.); printf("FLOPS: %.2f Bn\n", (float)ops/1000000000.*tics/t); printf("Speed: %f sec/eval\n", t/tics); printf("Speed: %f Hz\n", tics/t); }
void forward_iseg_layer(const layer l, network net) { double time = what_time_is_it_now(); int i, b, j, k; int ids = l.extra; memcpy(l.output, net.input, l.outputs * l.batch * sizeof(real_t)); memset(l.delta, 0, l.outputs * l.batch * sizeof(real_t)); #ifndef GPU for (b = 0; b < l.batch; ++b) { int index = b * l.outputs; activate_array(l.output + index, l.classes * l.w * l.h, LOGISTIC); } #endif for (b = 0; b < l.batch; ++b) { // a priori, each pixel has no class for (i = 0; i < l.classes; ++i) { for (k = 0; k < l.w * l.h; ++k) { int index = b * l.outputs + i * l.w * l.h + k; l.delta[index] = 0 - l.output[index]; } } // a priori, embedding should be small magnitude for (i = 0; i < ids; ++i) { for (k = 0; k < l.w * l.h; ++k) { int index = b * l.outputs + (i + l.classes) * l.w * l.h + k; l.delta[index] = .1 * (0 - l.output[index]); } } memset(l.counts, 0, 90 * sizeof(int)); for (i = 0; i < 90; ++i) { fill_cpu(ids, 0, l.sums[i], 1); int c = net.truth[b * l.truths + i * (l.w * l.h + 1)]; if (c < 0) break; // add up metric embeddings for each instance for (k = 0; k < l.w * l.h; ++k) { int index = b * l.outputs + c * l.w * l.h + k; real_t v = net.truth[b * l.truths + i * (l.w * l.h + 1) + 1 + k]; if (v) { l.delta[index] = v - l.output[index]; axpy_cpu(ids, 1, l.output + b * l.outputs + l.classes * l.w * l.h + k, l.w * l.h, l.sums[i], 1); ++l.counts[i]; } } } real_t *mse = calloc(90, sizeof(real_t)); for (i = 0; i < 90; ++i) { int c = net.truth[b * l.truths + i * (l.w * l.h + 1)]; if (c < 0) break; for (k = 0; k < l.w * l.h; ++k) { real_t v = net.truth[b * l.truths + i * (l.w * l.h + 1) + 1 + k]; if (v) { int z; real_t sum = 0; for (z = 0; z < ids; ++z) { int index = b * l.outputs + (l.classes + z) * l.w * l.h + k; sum += pow(l.sums[i][z] / l.counts[i] - l.output[index], 2); } mse[i] += sum; } } mse[i] /= l.counts[i]; } // Calculate average embedding for (i = 0; i < 90; ++i) { if (!l.counts[i]) continue; scal_cpu(ids, 1.f / l.counts[i], l.sums[i], 1); if (b == 0 && net.gpu_index == 0) { printf("%4d, %6.3f, ", l.counts[i], mse[i]); for (j = 0; j < ids; ++j) { printf("%6.3f,", l.sums[i][j]); } printf("\n"); } } free(mse); // Calculate embedding loss for (i = 0; i < 90; ++i) { if (!l.counts[i]) continue; for (k = 0; k < l.w * l.h; ++k) { real_t v = net.truth[b * l.truths + i * (l.w * l.h + 1) + 1 + k]; if (v) { for (j = 0; j < 90; ++j) { if (!l.counts[j]) continue; int z; for (z = 0; z < ids; ++z) { int index = b * l.outputs + (l.classes + z) * l.w * l.h + k; real_t diff = l.sums[j][z] - l.output[index]; if (j == i) l.delta[index] += diff < 0 ? -.1 : .1; else l.delta[index] += -(diff < 0 ? -.1 : .1); } } } } } for (i = 0; i < ids; ++i) { for (k = 0; k < l.w * l.h; ++k) { int index = b * l.outputs + (i + l.classes) * l.w * l.h + k; l.delta[index] *= .01; } } } *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); printf("took %lf sec\n", what_time_is_it_now() - time); }
void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear) { int i; float avg_loss = -1; char *base = basecfg(cfgfile); printf("%s\n", base); printf("%d\n", ngpus); network **nets = calloc(ngpus, sizeof(network*)); srand(time(0)); int seed = rand(); for(i = 0; i < ngpus; ++i){ srand(seed); #ifdef GPU cuda_set_device(gpus[i]); #endif nets[i] = load_network(cfgfile, weightfile, clear); nets[i]->learning_rate *= ngpus; } srand(time(0)); network *net = nets[0]; int imgs = net->batch * net->subdivisions * ngpus; printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); list *options = read_data_cfg(datacfg); char *backup_directory = option_find_str(options, "backup", "/backup/"); char *label_list = option_find_str(options, "labels", "data/labels.list"); char *train_list = option_find_str(options, "train", "data/train.list"); int classes = option_find_int(options, "classes", 2); char **labels = get_labels(label_list); list *plist = get_paths(train_list); char **paths = (char **)list_to_array(plist); printf("%d\n", plist->size); int N = plist->size; double time; load_args args = {0}; args.w = net->w; args.h = net->h; args.threads = 32; args.hierarchy = net->hierarchy; args.min = net->min_ratio*net->w; args.max = net->max_ratio*net->w; printf("%d %d\n", args.min, args.max); args.angle = net->angle; args.aspect = net->aspect; args.exposure = net->exposure; args.saturation = net->saturation; args.hue = net->hue; args.size = net->w; args.paths = paths; args.classes = classes; args.n = imgs; args.m = N; args.labels = labels; args.type = CLASSIFICATION_DATA; data train; data buffer; pthread_t load_thread; args.d = &buffer; load_thread = load_data(args); int count = 0; int epoch = (*net->seen)/N; while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ if(net->random && count++%40 == 0){ printf("Resizing\n"); int dim = (rand() % 11 + 4) * 32; //if (get_current_batch(net)+200 > net->max_batches) dim = 608; //int dim = (rand() % 4 + 16) * 32; printf("%d\n", dim); args.w = dim; args.h = dim; args.size = dim; args.min = net->min_ratio*dim; args.max = net->max_ratio*dim; printf("%d %d\n", args.min, args.max); pthread_join(load_thread, 0); train = buffer; free_data(train); load_thread = load_data(args); for(i = 0; i < ngpus; ++i){ resize_network(nets[i], dim, dim); } net = nets[0]; } time = what_time_is_it_now(); pthread_join(load_thread, 0); train = buffer; load_thread = load_data(args); printf("Loaded: %lf seconds\n", what_time_is_it_now()-time); time = what_time_is_it_now(); float loss = 0; #ifdef GPU if(ngpus == 1){ loss = train_network(net, train); } else { loss = train_networks(nets, ngpus, train, 4); } #else loss = train_network(net, train); #endif if(avg_loss == -1) avg_loss = loss; avg_loss = avg_loss*.9 + loss*.1; printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen); free_data(train); if(*net->seen/N > epoch){ epoch = *net->seen/N; char buff[256]; sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); save_weights(net, buff); } if(get_current_batch(net)%1000 == 0){ char buff[256]; sprintf(buff, "%s/%s.backup",backup_directory,base); save_weights(net, buff); } } char buff[256]; sprintf(buff, "%s/%s.weights", backup_directory, base); save_weights(net, buff); pthread_join(load_thread, 0); free_network(net); free_ptrs((void**)labels, classes); free_ptrs((void**)paths, plist->size); free_list(plist); free(base); }
void train_segmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int display) { int i; float avg_loss = -1; char *base = basecfg(cfgfile); printf("%s\n", base); printf("%d\n", ngpus); network ** nets = calloc(ngpus, sizeof(network*)); srand(time(0)); int seed = rand(); for(i = 0; i < ngpus; ++i){ srand(seed); #ifdef GPU cuda_set_device(gpus[i]); #endif nets[i] = load_network(cfgfile, weightfile, clear); nets[i]->learning_rate *= ngpus; } srand(time(0)); network * net = nets[0]; image pred = get_network_image(net); int div = net->w/pred.w; assert(pred.w * div == net->w); assert(pred.h * div == net->h); int imgs = net->batch * net->subdivisions * ngpus; printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); list *options = read_data_cfg(datacfg); char *backup_directory = option_find_str(options, "backup", "/backup/"); char *train_list = option_find_str(options, "train", "data/train.list"); list *plist = get_paths(train_list); char **paths = (char **)list_to_array(plist); printf("%d\n", plist->size); int N = plist->size; load_args args = {0}; args.w = net->w; args.h = net->h; args.threads = 32; args.scale = div; args.min = net->min_crop; args.max = net->max_crop; args.angle = net->angle; args.aspect = net->aspect; args.exposure = net->exposure; args.saturation = net->saturation; args.hue = net->hue; args.size = net->w; args.classes = 80; args.paths = paths; args.n = imgs; args.m = N; args.type = SEGMENTATION_DATA; data train; data buffer; pthread_t load_thread; args.d = &buffer; load_thread = load_data(args); int epoch = (*net->seen)/N; while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ double time = what_time_is_it_now(); pthread_join(load_thread, 0); train = buffer; load_thread = load_data(args); printf("Loaded: %lf seconds\n", what_time_is_it_now()-time); time = what_time_is_it_now(); float loss = 0; #ifdef GPU if(ngpus == 1){ loss = train_network(net, train); } else { loss = train_networks(nets, ngpus, train, 4); } #else loss = train_network(net, train); #endif if(display){ image tr = float_to_image(net->w/div, net->h/div, 80, train.y.vals[net->batch*(net->subdivisions-1)]); image im = float_to_image(net->w, net->h, net->c, train.X.vals[net->batch*(net->subdivisions-1)]); image mask = mask_to_rgb(tr); image prmask = mask_to_rgb(pred); show_image(im, "input", 1); show_image(prmask, "pred", 1); show_image(mask, "truth", 100); free_image(mask); free_image(prmask); } if(avg_loss == -1) avg_loss = loss; avg_loss = avg_loss*.9 + loss*.1; printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen); free_data(train); if(*net->seen/N > epoch){ epoch = *net->seen/N; char buff[256]; sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); save_weights(net, buff); } if(get_current_batch(net)%100 == 0){ char buff[256]; sprintf(buff, "%s/%s.backup",backup_directory,base); save_weights(net, buff); } } char buff[256]; sprintf(buff, "%s/%s.weights", backup_directory, base); save_weights(net, buff); for(i = 0; i < ngpus; ++i){ free_network(nets[i]); } free(nets); free_ptrs((void**)paths, plist->size); free_list(plist); free(base); }