Esempio n. 1
0
void speed(char *cfgfile, int tics)
{
    if (tics == 0) tics = 1000;
    network *net = parse_network_cfg(cfgfile);
    set_batch_network(net, 1);
    int i;
    double time=what_time_is_it_now();
    image im = make_image(net->w, net->h, net->c*net->batch);
    for(i = 0; i < tics; ++i){
        network_predict(net, im.data);
    }
    double t = what_time_is_it_now() - time;
    long ops = numops(net);
    printf("\n%d evals, %f Seconds\n", tics, t);
    printf("Floating Point Operations: %.2f Bn\n", (float)ops/1000000000.);
    printf("FLOPS: %.2f Bn\n", (float)ops/1000000000.*tics/t);
    printf("Speed: %f sec/eval\n", t/tics);
    printf("Speed: %f Hz\n", tics/t);
}
void forward_iseg_layer(const layer l, network net) {

	double time = what_time_is_it_now();
	int i, b, j, k;
	int ids = l.extra;
	memcpy(l.output, net.input, l.outputs * l.batch * sizeof(real_t));
	memset(l.delta, 0, l.outputs * l.batch * sizeof(real_t));

#ifndef GPU
	for (b = 0; b < l.batch; ++b) {
		int index = b * l.outputs;
		activate_array(l.output + index, l.classes * l.w * l.h, LOGISTIC);
	}
#endif

	for (b = 0; b < l.batch; ++b) {
		// a priori, each pixel has no class
		for (i = 0; i < l.classes; ++i) {
			for (k = 0; k < l.w * l.h; ++k) {
				int index = b * l.outputs + i * l.w * l.h + k;
				l.delta[index] = 0 - l.output[index];
			}
		}

		// a priori, embedding should be small magnitude
		for (i = 0; i < ids; ++i) {
			for (k = 0; k < l.w * l.h; ++k) {
				int index = b * l.outputs + (i + l.classes) * l.w * l.h + k;
				l.delta[index] = .1 * (0 - l.output[index]);
			}
		}

		memset(l.counts, 0, 90 * sizeof(int));
		for (i = 0; i < 90; ++i) {
			fill_cpu(ids, 0, l.sums[i], 1);

			int c = net.truth[b * l.truths + i * (l.w * l.h + 1)];
			if (c < 0)
				break;
			// add up metric embeddings for each instance
			for (k = 0; k < l.w * l.h; ++k) {
				int index = b * l.outputs + c * l.w * l.h + k;
				real_t v = net.truth[b * l.truths + i * (l.w * l.h + 1) + 1 + k];
				if (v) {
					l.delta[index] = v - l.output[index];
					axpy_cpu(ids, 1,
							l.output + b * l.outputs + l.classes * l.w * l.h
									+ k, l.w * l.h, l.sums[i], 1);
					++l.counts[i];
				}
			}
		}

		real_t *mse = calloc(90, sizeof(real_t));
		for (i = 0; i < 90; ++i) {
			int c = net.truth[b * l.truths + i * (l.w * l.h + 1)];
			if (c < 0)
				break;
			for (k = 0; k < l.w * l.h; ++k) {
				real_t v = net.truth[b * l.truths + i * (l.w * l.h + 1) + 1 + k];
				if (v) {
					int z;
					real_t sum = 0;
					for (z = 0; z < ids; ++z) {
						int index = b * l.outputs + (l.classes + z) * l.w * l.h
								+ k;
						sum += pow(l.sums[i][z] / l.counts[i] - l.output[index],
								2);
					}
					mse[i] += sum;
				}
			}
			mse[i] /= l.counts[i];
		}

		// Calculate average embedding
		for (i = 0; i < 90; ++i) {
			if (!l.counts[i])
				continue;
			scal_cpu(ids, 1.f / l.counts[i], l.sums[i], 1);
			if (b == 0 && net.gpu_index == 0) {
				printf("%4d, %6.3f, ", l.counts[i], mse[i]);
				for (j = 0; j < ids; ++j) {
					printf("%6.3f,", l.sums[i][j]);
				}
				printf("\n");
			}
		}
		free(mse);

		// Calculate embedding loss
		for (i = 0; i < 90; ++i) {
			if (!l.counts[i])
				continue;
			for (k = 0; k < l.w * l.h; ++k) {
				real_t v = net.truth[b * l.truths + i * (l.w * l.h + 1) + 1 + k];
				if (v) {
					for (j = 0; j < 90; ++j) {
						if (!l.counts[j])
							continue;
						int z;
						for (z = 0; z < ids; ++z) {
							int index = b * l.outputs
									+ (l.classes + z) * l.w * l.h + k;
							real_t diff = l.sums[j][z] - l.output[index];
							if (j == i)
								l.delta[index] += diff < 0 ? -.1 : .1;
							else
								l.delta[index] += -(diff < 0 ? -.1 : .1);
						}
					}
				}
			}
		}

		for (i = 0; i < ids; ++i) {
			for (k = 0; k < l.w * l.h; ++k) {
				int index = b * l.outputs + (i + l.classes) * l.w * l.h + k;
				l.delta[index] *= .01;
			}
		}
	}

	*(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2);
	printf("took %lf sec\n", what_time_is_it_now() - time);
}
Esempio n. 3
0
void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear)
{
    int i;

    float avg_loss = -1;
    char *base = basecfg(cfgfile);
    printf("%s\n", base);
    printf("%d\n", ngpus);
    network **nets = calloc(ngpus, sizeof(network*));

    srand(time(0));
    int seed = rand();
    for(i = 0; i < ngpus; ++i){
        srand(seed);
#ifdef GPU
        cuda_set_device(gpus[i]);
#endif
        nets[i] = load_network(cfgfile, weightfile, clear);
        nets[i]->learning_rate *= ngpus;
    }
    srand(time(0));
    network *net = nets[0];

    int imgs = net->batch * net->subdivisions * ngpus;

    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
    list *options = read_data_cfg(datacfg);

    char *backup_directory = option_find_str(options, "backup", "/backup/");
    char *label_list = option_find_str(options, "labels", "data/labels.list");
    char *train_list = option_find_str(options, "train", "data/train.list");
    int classes = option_find_int(options, "classes", 2);

    char **labels = get_labels(label_list);
    list *plist = get_paths(train_list);
    char **paths = (char **)list_to_array(plist);
    printf("%d\n", plist->size);
    int N = plist->size;
    double time;

    load_args args = {0};
    args.w = net->w;
    args.h = net->h;
    args.threads = 32;
    args.hierarchy = net->hierarchy;

    args.min = net->min_ratio*net->w;
    args.max = net->max_ratio*net->w;
    printf("%d %d\n", args.min, args.max);
    args.angle = net->angle;
    args.aspect = net->aspect;
    args.exposure = net->exposure;
    args.saturation = net->saturation;
    args.hue = net->hue;
    args.size = net->w;

    args.paths = paths;
    args.classes = classes;
    args.n = imgs;
    args.m = N;
    args.labels = labels;
    args.type = CLASSIFICATION_DATA;

    data train;
    data buffer;
    pthread_t load_thread;
    args.d = &buffer;
    load_thread = load_data(args);

    int count = 0;
    int epoch = (*net->seen)/N;
    while(get_current_batch(net) < net->max_batches || net->max_batches == 0){
        if(net->random && count++%40 == 0){
            printf("Resizing\n");
            int dim = (rand() % 11 + 4) * 32;
            //if (get_current_batch(net)+200 > net->max_batches) dim = 608;
            //int dim = (rand() % 4 + 16) * 32;
            printf("%d\n", dim);
            args.w = dim;
            args.h = dim;
            args.size = dim;
            args.min = net->min_ratio*dim;
            args.max = net->max_ratio*dim;
            printf("%d %d\n", args.min, args.max);

            pthread_join(load_thread, 0);
            train = buffer;
            free_data(train);
            load_thread = load_data(args);

            for(i = 0; i < ngpus; ++i){
                resize_network(nets[i], dim, dim);
            }
            net = nets[0];
        }
        time = what_time_is_it_now();

        pthread_join(load_thread, 0);
        train = buffer;
        load_thread = load_data(args);

        printf("Loaded: %lf seconds\n", what_time_is_it_now()-time);
        time = what_time_is_it_now();

        float loss = 0;
#ifdef GPU
        if(ngpus == 1){
            loss = train_network(net, train);
        } else {
            loss = train_networks(nets, ngpus, train, 4);
        }
#else
        loss = train_network(net, train);
#endif
        if(avg_loss == -1) avg_loss = loss;
        avg_loss = avg_loss*.9 + loss*.1;
        printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen);
        free_data(train);
        if(*net->seen/N > epoch){
            epoch = *net->seen/N;
            char buff[256];
            sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch);
            save_weights(net, buff);
        }
        if(get_current_batch(net)%1000 == 0){
            char buff[256];
            sprintf(buff, "%s/%s.backup",backup_directory,base);
            save_weights(net, buff);
        }
    }
    char buff[256];
    sprintf(buff, "%s/%s.weights", backup_directory, base);
    save_weights(net, buff);
    pthread_join(load_thread, 0);

    free_network(net);
    free_ptrs((void**)labels, classes);
    free_ptrs((void**)paths, plist->size);
    free_list(plist);
    free(base);
}
Esempio n. 4
0
void train_segmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int display)
{
    int i;

    float avg_loss = -1;
    char *base = basecfg(cfgfile);
    printf("%s\n", base);
    printf("%d\n", ngpus);
    network ** nets = calloc(ngpus, sizeof(network*));

    srand(time(0));
    int seed = rand();
    for(i = 0; i < ngpus; ++i){
        srand(seed);
#ifdef GPU
        cuda_set_device(gpus[i]);
#endif
        nets[i] = load_network(cfgfile, weightfile, clear);
        nets[i]->learning_rate *= ngpus;
    }
    srand(time(0));
    network * net = nets[0];
    image pred = get_network_image(net);

    int div = net->w/pred.w;
    assert(pred.w * div == net->w);
    assert(pred.h * div == net->h);

    int imgs = net->batch * net->subdivisions * ngpus;

    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
    list *options = read_data_cfg(datacfg);

    char *backup_directory = option_find_str(options, "backup", "/backup/");
    char *train_list = option_find_str(options, "train", "data/train.list");

    list *plist = get_paths(train_list);
    char **paths = (char **)list_to_array(plist);
    printf("%d\n", plist->size);
    int N = plist->size;

    load_args args = {0};
    args.w = net->w;
    args.h = net->h;
    args.threads = 32;
    args.scale = div;

    args.min = net->min_crop;
    args.max = net->max_crop;
    args.angle = net->angle;
    args.aspect = net->aspect;
    args.exposure = net->exposure;
    args.saturation = net->saturation;
    args.hue = net->hue;
    args.size = net->w;
    args.classes = 80;

    args.paths = paths;
    args.n = imgs;
    args.m = N;
    args.type = SEGMENTATION_DATA;

    data train;
    data buffer;
    pthread_t load_thread;
    args.d = &buffer;
    load_thread = load_data(args);

    int epoch = (*net->seen)/N;
    while(get_current_batch(net) < net->max_batches || net->max_batches == 0){
        double time = what_time_is_it_now();

        pthread_join(load_thread, 0);
        train = buffer;
        load_thread = load_data(args);

        printf("Loaded: %lf seconds\n", what_time_is_it_now()-time);
        time = what_time_is_it_now();

        float loss = 0;
#ifdef GPU
        if(ngpus == 1){
            loss = train_network(net, train);
        } else {
            loss = train_networks(nets, ngpus, train, 4);
        }
#else
        loss = train_network(net, train);
#endif
        if(display){
            image tr = float_to_image(net->w/div, net->h/div, 80, train.y.vals[net->batch*(net->subdivisions-1)]);
            image im = float_to_image(net->w, net->h, net->c, train.X.vals[net->batch*(net->subdivisions-1)]);
            image mask = mask_to_rgb(tr);
            image prmask = mask_to_rgb(pred);
            show_image(im, "input", 1);
            show_image(prmask, "pred", 1);
            show_image(mask, "truth", 100);
            free_image(mask);
            free_image(prmask);
        }
        if(avg_loss == -1) avg_loss = loss;
        avg_loss = avg_loss*.9 + loss*.1;
        printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen);
        free_data(train);
        if(*net->seen/N > epoch){
            epoch = *net->seen/N;
            char buff[256];
            sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch);
            save_weights(net, buff);
        }
        if(get_current_batch(net)%100 == 0){
            char buff[256];
            sprintf(buff, "%s/%s.backup",backup_directory,base);
            save_weights(net, buff);
        }
    }
    char buff[256];
    sprintf(buff, "%s/%s.weights", backup_directory, base);
    save_weights(net, buff);

    for(i = 0; i < ngpus; ++i){
      free_network(nets[i]);
    }
    free(nets);
    free_ptrs((void**)paths, plist->size);
    free_list(plist);
    free(base);
}