Пример #1
0
void load_convolutional_weights(layer l, FILE *fp)
{
    if(l.binary){
        //load_convolutional_weights_binary(l, fp);
        //return;
    }
    if(l.numload) l.n = l.numload;
    int num = l.c/l.groups*l.n*l.size*l.size;
    fread(l.biases, sizeof(float), l.n, fp);
    if (l.batch_normalize && (!l.dontloadscales)){
        fread(l.scales, sizeof(float), l.n, fp);
        fread(l.rolling_mean, sizeof(float), l.n, fp);
        fread(l.rolling_variance, sizeof(float), l.n, fp);
        if(0){
            int i;
            for(i = 0; i < l.n; ++i){
                printf("%g, ", l.rolling_mean[i]);
            }
            printf("\n");
            for(i = 0; i < l.n; ++i){
                printf("%g, ", l.rolling_variance[i]);
            }
            printf("\n");
        }
        if(0){
            fill_cpu(l.n, 0, l.rolling_mean, 1);
            fill_cpu(l.n, 0, l.rolling_variance, 1);
        }
        if(0){
            int i;
            for(i = 0; i < l.n; ++i){
                printf("%g, ", l.rolling_mean[i]);
            }
            printf("\n");
            for(i = 0; i < l.n; ++i){
                printf("%g, ", l.rolling_variance[i]);
            }
            printf("\n");
        }
    }
    fread(l.weights, sizeof(float), num, fp);
    //if(l.c == 3) scal_cpu(num, 1./256, l.weights, 1);
    if (l.flipped) {
        transpose_matrix(l.weights, l.c*l.size*l.size, l.n);
    }
    //if (l.binary) binarize_weights(l.weights, l.n, l.c*l.size*l.size, l.weights);
#ifdef GPU
    if(gpu_index >= 0){
        push_convolutional_layer(l);
    }
#endif
}
Пример #2
0
void forward_convolutional_layer(convolutional_layer l, network_state state)
{
    int out_h = convolutional_out_height(l);
    int out_w = convolutional_out_width(l);
    int i;

    fill_cpu(l.outputs*l.batch, 0, l.output, 1);
    /*
       if(l.binary){
       binarize_filters(l.filters, l.n, l.c*l.size*l.size, l.binary_filters);
       binarize_filters2(l.filters, l.n, l.c*l.size*l.size, l.cfilters, l.scales);
       swap_binary(&l);
       }
     */

    if(l.binary){
        int m = l.n;
        int k = l.size*l.size*l.c;
        int n = out_h*out_w;

        char  *a = l.cfilters;
        float *b = state.workspace;
        float *c = l.output;

        for(i = 0; i < l.batch; ++i){
            im2col_cpu(state.input, l.c, l.h, l.w, 
                    l.size, l.stride, l.pad, b);
            gemm_bin(m,n,k,1,a,k,b,n,c,n);
            c += n*m;
            state.input += l.c*l.h*l.w;
        }
        scale_bias(l.output, l.scales, l.batch, l.n, out_h*out_w);
        add_bias(l.output, l.biases, l.batch, l.n, out_h*out_w);
        activate_array(l.output, m*n*l.batch, l.activation);
        return;
    }

    int m = l.n;
    int k = l.size*l.size*l.c;
    int n = out_h*out_w;

    float *a = l.filters;
    float *b = state.workspace;
    float *c = l.output;

    for(i = 0; i < l.batch; ++i){
        im2col_cpu(state.input, l.c, l.h, l.w, 
                l.size, l.stride, l.pad, b);
        gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
        c += n*m;
        state.input += l.c*l.h*l.w;
    }

    if(l.batch_normalize){
        forward_batchnorm_layer(l, state);
    }
    add_bias(l.output, l.biases, l.batch, l.n, out_h*out_w);

    activate_array(l.output, m*n*l.batch, l.activation);
}
void forward_deconvolutional_layer(const layer l, network net) {
	int i;

	int m = l.size * l.size * l.n;
	int n = l.h * l.w;
	int k = l.c;

	fill_cpu(l.outputs * l.batch, 0, l.output, 1);

	for (i = 0; i < l.batch; ++i) {
		real_t *a = l.weights;
		real_t *b = net.input + i * l.c * l.h * l.w;
		real_t *c = net.workspace;

		gemm_cpu(1, 0, m, n, k, 1, a, m, b, n, 0, c, n);

		col2im_cpu(net.workspace, l.out_c, l.out_h, l.out_w, l.size, l.stride,
				l.pad, l.output + i * l.outputs);
	}
	if (l.batch_normalize) {
		forward_batchnorm_layer(l, net);
	} else {
		add_bias(l.output, l.biases, l.batch, l.n, l.out_w * l.out_h);
	}
	activate_array(l.output, l.batch * l.n * l.out_w * l.out_h, l.activation);
}
Пример #4
0
void forward_connected_layer(connected_layer l, network_state state)
{
    int i;
    fill_cpu(l.outputs*l.batch, 0, l.output, 1);
    int m = l.batch;
    int k = l.inputs;
    int n = l.outputs;
    float *a = state.input;
    float *b = l.weights;
    float *c = l.output;
    gemm(0,1,m,n,k,1,a,k,b,k,1,c,n);
    if(l.batch_normalize){
        if(state.train){
            mean_cpu(l.output, l.batch, l.outputs, 1, l.mean);
            variance_cpu(l.output, l.mean, l.batch, l.outputs, 1, l.variance);

            scal_cpu(l.outputs, .95, l.rolling_mean, 1);
            axpy_cpu(l.outputs, .05, l.mean, 1, l.rolling_mean, 1);
            scal_cpu(l.outputs, .95, l.rolling_variance, 1);
            axpy_cpu(l.outputs, .05, l.variance, 1, l.rolling_variance, 1);

            copy_cpu(l.outputs*l.batch, l.output, 1, l.x, 1);
            normalize_cpu(l.output, l.mean, l.variance, l.batch, l.outputs, 1);   
            copy_cpu(l.outputs*l.batch, l.output, 1, l.x_norm, 1);
        } else {
            normalize_cpu(l.output, l.rolling_mean, l.rolling_variance, l.batch, l.outputs, 1);
        }
        scale_bias(l.output, l.scales, l.batch, l.outputs, 1);
    }
    for(i = 0; i < l.batch; ++i){
        axpy_cpu(l.outputs, 1, l.biases, 1, l.output + i*l.outputs, 1);
    }
    activate_array(l.output, l.outputs*l.batch, l.activation);
}
Пример #5
0
void forward_deconvolutional_layer(const layer l, network_state state)
{
    int i;
    int out_h = l.out_h;
    int out_w = l.out_w;
    int size = out_h*out_w;

    int m = l.size*l.size*l.n;
    int n = l.h*l.w;
    int k = l.c;

    fill_cpu(l.outputs*l.batch, 0, l.output, 1);

    for(i = 0; i < l.batch; ++i){
        float *a = l.weights;
        float *b = state.input + i*l.c*l.h*l.w;
        float *c = state.workspace;

        gemm(1,0,m,n,k,1,a,m,b,n,0,c,n);

        col2im_cpu(c, l.n, out_h, out_w, l.size, l.stride, 0, l.output+i*l.n*size);
    }

    if(l.batch_normalize){
        forward_batchnorm_layer(l, state);
    } else {
        add_bias(l.output, l.biases, l.batch, l.n, l.out_h*l.out_w);
    }
    activate_array(l.output, l.batch*l.n*size, l.activation);
}
Пример #6
0
void slerp(float *start, float *end, float s, int n, float *out)
{
    float omega = acos(dot_cpu(n, start, 1, end, 1));
    float so = sin(omega);
    fill_cpu(n, 0, out, 1);
    axpy_cpu(n, sin((1-s)*omega)/so, start, 1, out, 1);
    axpy_cpu(n, sin(s*omega)/so, end, 1, out, 1);

    float mag = mag_array(out, n);
    scale_array(out, n, 1./mag);
}
Пример #7
0
void forward_rnn_layer(layer l, network_state state) {
	network_state s = { 0 };
	s.train = state.train;
	int i;
	layer input_layer = *(l.input_layer);
	layer self_layer = *(l.self_layer);
	layer output_layer = *(l.output_layer);

	fill_cpu(l.outputs * l.batch * l.steps, 0, output_layer.delta, 1);
	fill_cpu(l.hidden * l.batch * l.steps, 0, self_layer.delta, 1);
	fill_cpu(l.hidden * l.batch * l.steps, 0, input_layer.delta, 1);
	if (state.train)
		fill_cpu(l.hidden * l.batch, 0, l.state, 1);

	for (i = 0; i < l.steps; ++i) {
		s.input = state.input;
		forward_connected_layer(input_layer, s);

		s.input = l.state;
		forward_connected_layer(self_layer, s);

		float *old_state = l.state;
		if (state.train)
			l.state += l.hidden * l.batch;
		if (l.shortcut) {
			copy_cpu(l.hidden * l.batch, old_state, 1, l.state, 1);
		} else {
			fill_cpu(l.hidden * l.batch, 0, l.state, 1);
		}
		axpy_cpu(l.hidden * l.batch, 1, input_layer.output, 1, l.state, 1);
		axpy_cpu(l.hidden * l.batch, 1, self_layer.output, 1, l.state, 1);

		s.input = l.state;
		forward_connected_layer(output_layer, s);

		state.input += l.inputs * l.batch;
		increment_layer(&input_layer, 1);
		increment_layer(&self_layer, 1);
		increment_layer(&output_layer, 1);
	}
}
Пример #8
0
void forward_network(network net, network_state state)
{
    state.workspace = net.workspace;
    int i;
    for(i = 0; i < net.n; ++i){
        state.index = i;
        layer l = net.layers[i];
        if(l.delta){
            fill_cpu(l.outputs * l.batch, 0, l.delta, 1);
        }
        l.forward(l, state);
        state.input = l.output;
    }
}
Пример #9
0
void forward_network(network net)
{
    int i;
    for(i = 0; i < net.n; ++i){
        net.index = i;
        layer l = net.layers[i];
        if(l.delta){
            fill_cpu(l.outputs * l.batch, 0, l.delta, 1);
        }
        l.forward(l, net);
        net.input = l.output;
        if(l.truth) {
            net.truth = l.output;
        }
    }
    calc_network_cost(net);
}
Пример #10
0
void forward_convolutional_layer(convolutional_layer l, network net)
{
    int i, j;

    fill_cpu(l.outputs*l.batch, 0, l.output, 1);

    if(l.xnor){
        binarize_weights(l.weights, l.n, l.c/l.groups*l.size*l.size, l.binary_weights);
        swap_binary(&l);
        binarize_cpu(net.input, l.c*l.h*l.w*l.batch, l.binary_input);
        net.input = l.binary_input;
    }

    int m = l.n/l.groups;
    int k = l.size*l.size*l.c/l.groups;
    int n = l.out_w*l.out_h;
    for(i = 0; i < l.batch; ++i){
        for(j = 0; j < l.groups; ++j){
            float *a = l.weights + j*l.nweights/l.groups;
            float *b = net.workspace;
            float *c = l.output + (i*l.groups + j)*n*m;

            im2col_cpu(net.input + (i*l.groups + j)*l.c/l.groups*l.h*l.w,
                l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, b);
            gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
        }
    }

    if(l.batch_normalize){
        forward_batchnorm_layer(l, net);
    } else {
        add_bias(l.output, l.biases, l.batch, l.n, l.out_h*l.out_w);
    }

    activate_array(l.output, l.outputs*l.batch, l.activation);
    if(l.binary || l.xnor) swap_binary(&l);
}
void forward_convolutional_layer(const convolutional_layer l, network_state state)
{
    int out_h = convolutional_out_height(l);
    int out_w = convolutional_out_width(l);
    int i;

    fill_cpu(l.outputs*l.batch, 0, l.output, 1);

    int m = l.n;
    int k = l.size*l.size*l.c;
    int n = out_h*out_w;

    float *a = l.filters;
    float *b = l.col_image;
    float *c = l.output;
//	printf("the l.size is %i \n", l.size);
///*

//printf("the m,k,n is %i,%i,%i \n", m,k,n);
    for(i = 0; i < l.batch; ++i){
        im2col_cpu(state.input, l.c, l.h, l.w, 
                l.size, l.stride, l.pad, b);
        gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
        c += n*m;
        state.input += l.c*l.h*l.w;
    }
//*/

//add by fanghao
 /*   int ii,jj,kk,mm,pp,tt;
    int lcc = l.c;
    int lhh = l.h;
    int lww = l.w;
    int kernel = l.size;
    int pad;
    if(l.pad)
         pad = l.size/2;
    else
	 pad = l.pad;
    lhh += 2*pad;
    lww += 2*pad;
    float *dataP;
    dataP = (float *)calloc(lcc*lhh*lww, sizeof(float));


//printf("the l.h is %i \n", l.h);
//printf("the l.w is %i \n", l.w);
//printf("the lhh is %i \n", lhh);
//printf("the lww is %i \n", lww);
//printf("the pad is %i \n", pad);




    for(ii=0; ii < lcc; ii++)
        for(jj=pad; jj<lhh-pad; jj++)
             for(kk=pad; kk<lww-pad; kk++)
                dataP[ii*lhh*lww + jj*lww + kk] = state.input[ii*(lhh - 2*pad)*(lww-2*pad) + (jj - pad)*(lww - 2*pad) + kk-pad];

    for(ii=0; ii<m; ii++)
        for(jj=0; jj<out_h; jj++)
            for(kk=0; kk<out_w; kk++) {
                float tempAcc = 0.0;
                    for(mm=0; mm<lcc; mm++)
                        for(pp=0; pp<kernel; pp++)
                            for(tt=0; tt<kernel; tt++)
                                tempAcc += a[ii*lcc*kernel*kernel+mm*kernel*kernel+pp*kernel+tt]*dataP[mm*lhh*lww+(l.stride*jj+pp)*lww+l.stride*kk+tt];
                c[ii*out_h*out_w+jj*out_w+kk] = tempAcc;
                        }
//	c += n*m;
//state.input += l.c*l.h*l.w;
//
*/



    if(l.batch_normalize){
        if(state.train){
            mean_cpu(l.output, l.batch, l.n, l.out_h*l.out_w, l.mean);   
            variance_cpu(l.output, l.mean, l.batch, l.n, l.out_h*l.out_w, l.variance);   
            normalize_cpu(l.output, l.mean, l.variance, l.batch, l.n, l.out_h*l.out_w);   
        } else {
            normalize_cpu(l.output, l.rolling_mean, l.rolling_variance, l.batch, l.n, l.out_h*l.out_w);
        }
        scale_bias(l.output, l.scales, l.batch, l.n, out_h*out_w);
    }
    add_bias(l.output, l.biases, l.batch, l.n, out_h*out_w);

    activate_array(l.output, m*n*l.batch, l.activation);
}
Пример #12
0
void train_dcgan(char *cfg, char *weight, char *acfg, char *aweight, int clear, int display, char *train_images, int maxbatch)
{
#ifdef GPU
    char *backup_directory = "/home/kunle12/backup/";
    srand(time(0));
    char *base = basecfg(cfg);
    char *abase = basecfg(acfg);
    printf("%s\n", base);
    network *gnet = load_network(cfg, weight, clear);
    network *anet = load_network(acfg, aweight, clear);
    //float orig_rate = anet->learning_rate;

    int i, j, k;
    layer imlayer = {0};
    for (i = 0; i < gnet->n; ++i) {
        if (gnet->layers[i].out_c == 3) {
            imlayer = gnet->layers[i];
            break;
        }
    }

    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", gnet->learning_rate, gnet->momentum, gnet->decay);
    int imgs = gnet->batch*gnet->subdivisions;
    i = *gnet->seen/imgs;
    data train, buffer;


    list *plist = get_paths(train_images);
    //int N = plist->size;
    char **paths = (char **)list_to_array(plist);

    load_args args= get_base_args(anet);
    args.paths = paths;
    args.n = imgs;
    args.m = plist->size;
    args.d = &buffer;
    args.type = CLASSIFICATION_DATA;
    args.threads=16;
    args.classes = 1;
    char *ls[2] = {"imagenet", "zzzzzzzz"};
    args.labels = ls;

    pthread_t load_thread = load_data_in_thread(args);
    clock_t time;

    gnet->train = 1;
    anet->train = 1;

    int x_size = gnet->inputs*gnet->batch;
    int y_size = gnet->truths*gnet->batch;
    float *imerror = cuda_make_array(0, y_size);

    //int ay_size = anet->truths*anet->batch;

    float aloss_avg = -1;

    //data generated = copy_data(train);

    if (maxbatch == 0) maxbatch = gnet->max_batches;
    while (get_current_batch(gnet) < maxbatch) {
        i += 1;
        time=clock();
        pthread_join(load_thread, 0);
        train = buffer;

        //translate_data_rows(train, -.5);
        //scale_data_rows(train, 2);

        load_thread = load_data_in_thread(args);

        printf("Loaded: %lf seconds\n", sec(clock()-time));

        data gen = copy_data(train);
        for (j = 0; j < imgs; ++j) {
            train.y.vals[j][0] = 1;
            gen.y.vals[j][0] = 0;
        }
        time=clock();

        for(j = 0; j < gnet->subdivisions; ++j){
            get_next_batch(train, gnet->batch, j*gnet->batch, gnet->truth, 0);
            int z;
            for(z = 0; z < x_size; ++z){
                gnet->input[z] = rand_normal();
            }
            for(z = 0; z < gnet->batch; ++z){
                float mag = mag_array(gnet->input + z*gnet->inputs, gnet->inputs);
                scale_array(gnet->input + z*gnet->inputs, gnet->inputs, 1./mag);
            }
            /*
               for(z = 0; z < 100; ++z){
               printf("%f, ", gnet->input[z]);
               }
               printf("\n");
               printf("input: %f %f\n", mean_array(gnet->input, x_size), variance_array(gnet->input, x_size));
             */

            //cuda_push_array(gnet->input_gpu, gnet->input, x_size);
            //cuda_push_array(gnet->truth_gpu, gnet->truth, y_size);
            *gnet->seen += gnet->batch;
            forward_network(gnet);

            fill_gpu(imlayer.outputs*imlayer.batch, 0, imerror, 1);
            fill_cpu(anet->truths*anet->batch, 1, anet->truth, 1);
            copy_cpu(anet->inputs*anet->batch, imlayer.output, 1, anet->input, 1);
            anet->delta_gpu = imerror;
            forward_network(anet);
            backward_network(anet);

            //float genaloss = *anet->cost / anet->batch;
            //printf("%f\n", genaloss);

            scal_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1);
            scal_gpu(imlayer.outputs*imlayer.batch, 0, gnet->layers[gnet->n-1].delta_gpu, 1);

            //printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs*imlayer.batch));
            //printf("features %f\n", cuda_mag_array(gnet->layers[gnet->n-1].delta_gpu, imlayer.outputs*imlayer.batch));

            axpy_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1, gnet->layers[gnet->n-1].delta_gpu, 1);

            backward_network(gnet);

            /*
               for(k = 0; k < gnet->n; ++k){
               layer l = gnet->layers[k];
               cuda_pull_array(l.output_gpu, l.output, l.outputs*l.batch);
               printf("%d: %f %f\n", k, mean_array(l.output, l.outputs*l.batch), variance_array(l.output, l.outputs*l.batch));
               }
             */

            for(k = 0; k < gnet->batch; ++k){
                int index = j*gnet->batch + k;
                copy_cpu(gnet->outputs, gnet->output + k*gnet->outputs, 1, gen.X.vals[index], 1);
            }
        }
        harmless_update_network_gpu(anet);

        data merge = concat_data(train, gen);
        //randomize_data(merge);
        float aloss = train_network(anet, merge);

        //translate_image(im, 1);
        //scale_image(im, .5);
        //translate_image(im2, 1);
        //scale_image(im2, .5);
#ifdef OPENCV
        if(display){
            image im = float_to_image(anet->w, anet->h, anet->c, gen.X.vals[0]);
            image im2 = float_to_image(anet->w, anet->h, anet->c, train.X.vals[0]);
            show_image(im, "gen", 1);
            show_image(im2, "train", 1);
            save_image(im, "gen");
            save_image(im2, "train");
        }
#endif

        /*
           if(aloss < .1){
           anet->learning_rate = 0;
           } else if (aloss > .3){
           anet->learning_rate = orig_rate;
           }
         */

        update_network_gpu(gnet);

        free_data(merge);
        free_data(train);
        free_data(gen);
        if (aloss_avg < 0) aloss_avg = aloss;
        aloss_avg = aloss_avg*.9 + aloss*.1;

        printf("%d: adv: %f | adv_avg: %f, %f rate, %lf seconds, %d images\n", i, aloss, aloss_avg, get_current_rate(gnet), sec(clock()-time), i*imgs);
        if(i%10000==0){
            char buff[256];
            sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i);
            save_weights(gnet, buff);
            sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i);
            save_weights(anet, buff);
        }
        if(i%1000==0){
            char buff[256];
            sprintf(buff, "%s/%s.backup", backup_directory, base);
            save_weights(gnet, buff);
            sprintf(buff, "%s/%s.backup", backup_directory, abase);
            save_weights(anet, buff);
        }
    }
    char buff[256];
    sprintf(buff, "%s/%s_final.weights", backup_directory, base);
    save_weights(gnet, buff);
#endif
    free_network(gnet);
    free_network(anet);
}
Пример #13
0
void train_prog(char *cfg, char *weight, char *acfg, char *aweight, int clear, int display, char *train_images, int maxbatch)
{
#ifdef GPU
    char *backup_directory = "/home/kunle12/backup/";
    srand(time(0));
    char *base = basecfg(cfg);
    char *abase = basecfg(acfg);
    printf("%s\n", base);
    network *gnet = load_network(cfg, weight, clear);
    network *anet = load_network(acfg, aweight, clear);

    int i, j, k;
    layer imlayer = gnet->layers[gnet->n-1];

    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", gnet->learning_rate, gnet->momentum, gnet->decay);
    int imgs = gnet->batch*gnet->subdivisions;
    i = *gnet->seen/imgs;
    data train, buffer;


    list *plist = get_paths(train_images);
    char **paths = (char **)list_to_array(plist);

    load_args args= get_base_args(anet);
    args.paths = paths;
    args.n = imgs;
    args.m = plist->size;
    args.d = &buffer;
    args.type = CLASSIFICATION_DATA;
    args.threads=16;
    args.classes = 1;
    char *ls[2] = {"imagenet", "zzzzzzzz"};
    args.labels = ls;

    pthread_t load_thread = load_data_in_thread(args);
    clock_t time;

    gnet->train = 1;
    anet->train = 1;

    int x_size = gnet->inputs*gnet->batch;
    int y_size = gnet->truths*gnet->batch;
    float *imerror = cuda_make_array(0, y_size);

    float aloss_avg = -1;

    if (maxbatch == 0) maxbatch = gnet->max_batches;
    while (get_current_batch(gnet) < maxbatch) {
        {
            int cb = get_current_batch(gnet);
            float alpha = (float) cb / (maxbatch/2);
            if(alpha > 1) alpha = 1;
            float beta = 1 - alpha;
            printf("%f %f\n", alpha, beta);
            set_network_alpha_beta(gnet, alpha, beta);
            set_network_alpha_beta(anet, beta, alpha);
        }

        i += 1;
        time=clock();
        pthread_join(load_thread, 0);
        train = buffer;

        load_thread = load_data_in_thread(args);

        printf("Loaded: %lf seconds\n", sec(clock()-time));

        data gen = copy_data(train);
        for (j = 0; j < imgs; ++j) {
            train.y.vals[j][0] = 1;
            gen.y.vals[j][0] = 0;
        }
        time=clock();

        for (j = 0; j < gnet->subdivisions; ++j) {
            get_next_batch(train, gnet->batch, j*gnet->batch, gnet->truth, 0);
            int z;
            for(z = 0; z < x_size; ++z){
                gnet->input[z] = rand_normal();
            }
            /*
               for(z = 0; z < gnet->batch; ++z){
               float mag = mag_array(gnet->input + z*gnet->inputs, gnet->inputs);
               scale_array(gnet->input + z*gnet->inputs, gnet->inputs, 1./mag);
               }
             */
            *gnet->seen += gnet->batch;
            forward_network(gnet);

            fill_gpu(imlayer.outputs*imlayer.batch, 0, imerror, 1);
            fill_cpu(anet->truths*anet->batch, 1, anet->truth, 1);
            copy_cpu(anet->inputs*anet->batch, imlayer.output, 1, anet->input, 1);
            anet->delta_gpu = imerror;
            forward_network(anet);
            backward_network(anet);

            //float genaloss = *anet->cost / anet->batch;

            scal_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1);
            scal_gpu(imlayer.outputs*imlayer.batch, 0, gnet->layers[gnet->n-1].delta_gpu, 1);

            axpy_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1, gnet->layers[gnet->n-1].delta_gpu, 1);

            backward_network(gnet);

            for(k = 0; k < gnet->batch; ++k){
                int index = j*gnet->batch + k;
                copy_cpu(gnet->outputs, gnet->output + k*gnet->outputs, 1, gen.X.vals[index], 1);
            }
        }
        harmless_update_network_gpu(anet);

        data merge = concat_data(train, gen);
        float aloss = train_network(anet, merge);

#ifdef OPENCV
        if(display){
            image im = float_to_image(anet->w, anet->h, anet->c, gen.X.vals[0]);
            image im2 = float_to_image(anet->w, anet->h, anet->c, train.X.vals[0]);
            show_image(im, "gen", 1);
            show_image(im2, "train", 1);
            save_image(im, "gen");
            save_image(im2, "train");
        }
#endif

        update_network_gpu(gnet);

        free_data(merge);
        free_data(train);
        free_data(gen);
        if (aloss_avg < 0) aloss_avg = aloss;
        aloss_avg = aloss_avg*.9 + aloss*.1;

        printf("%d: adv: %f | adv_avg: %f, %f rate, %lf seconds, %d images\n", i, aloss, aloss_avg, get_current_rate(gnet), sec(clock()-time), i*imgs);
        if(i%10000==0){
            char buff[256];
            sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i);
            save_weights(gnet, buff);
            sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i);
            save_weights(anet, buff);
        }
        if(i%1000==0){
            char buff[256];
            sprintf(buff, "%s/%s.backup", backup_directory, base);
            save_weights(gnet, buff);
            sprintf(buff, "%s/%s.backup", backup_directory, abase);
            save_weights(anet, buff);
        }
    }
    char buff[256];
    sprintf(buff, "%s/%s_final.weights", backup_directory, base);
    save_weights(gnet, buff);
#endif
  free_network( gnet );
  free_network( anet );
}
Пример #14
0
/*
 * Routine used to setup a newly inserted CPU in preparation for starting
 * it running code.
 */
int
mp_cpu_configure(int cpuid)
{
	md_t		*mdp;
	mde_cookie_t	rootnode, cpunode = MDE_INVAL_ELEM_COOKIE;
	int		listsz, i;
	mde_cookie_t	*listp = NULL;
	int		num_nodes;
	uint64_t	cpuid_prop;
	cpu_t		*cpu;
	processorid_t	id;

	ASSERT(MUTEX_HELD(&cpu_lock));

	if ((mdp = md_get_handle()) == NULL)
		return (ENODEV);

	rootnode = md_root_node(mdp);

	ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE);

	num_nodes = md_node_count(mdp);

	ASSERT(num_nodes > 0);

	listsz = num_nodes * sizeof (mde_cookie_t);
	listp = kmem_zalloc(listsz, KM_SLEEP);

	num_nodes = md_scan_dag(mdp, rootnode, md_find_name(mdp, "cpu"),
	    md_find_name(mdp, "fwd"), listp);

	if (num_nodes < 0)
		return (ENODEV);

	for (i = 0; i < num_nodes; i++) {
		if (md_get_prop_val(mdp, listp[i], "id", &cpuid_prop))
			break;
		if (cpuid_prop == (uint64_t)cpuid) {
			cpunode = listp[i];
			break;
		}
	}

	if (cpunode == MDE_INVAL_ELEM_COOKIE)
		return (ENODEV);

	kmem_free(listp, listsz);

	mpo_cpu_add(mdp, cpuid);

	/*
	 * Note: uses cpu_lock to protect cpunodes
	 * which will be modified inside of fill_cpu and
	 * setup_exec_unit_mappings.
	 */
	fill_cpu(mdp, cpunode);

	/*
	 * Adding a CPU may cause the execution unit sharing
	 * relationships to change. Update the mappings in
	 * the cpunode structures.
	 */
	setup_chip_mappings(mdp);
	setup_exec_unit_mappings(mdp);

	/* propagate the updated mappings to the CPU structures */
	for (id = 0; id < NCPU; id++) {
		if ((cpu = cpu_get(id)) == NULL)
			continue;

		cpu_map_exec_units(cpu);
	}

	(void) md_fini_handle(mdp);

	if ((i = setup_cpu_common(cpuid)) != 0) {
		(void) cleanup_cpu_common(cpuid);
		return (i);
	}

	return (0);
}
Пример #15
0
/*
 * map_wellknown - map known devices & registers
 */
static void
map_wellknown(pnode_t curnode)
{
    extern int status_okay(int, char *, int);
    char tmp_name[MAXSYSNAME];
    int sok;

#ifdef VPRINTF
    VPRINTF("map_wellknown(%x)\n", curnode);
#endif /* VPRINTF */

    for (curnode = CHILD(curnode); curnode; curnode = NEXT(curnode)) {
        /*
         * prune subtree if status property indicating not okay
         */
        sok = status_okay((int)curnode, (char *)NULL, 0);
        if (!sok) {
            char devtype_buf[OBP_MAXPROPNAME];
            int size;

#ifdef VPRINTF
            VPRINTF("map_wellknown: !okay status property\n");
#endif /* VPRINTF */
            /*
             * a status property indicating bad memory will be
             * associated with a node which has a "device_type"
             * property with a value of "memory-controller"
             */
            if ((size = GETPROPLEN(curnode,
                                   OBP_DEVICETYPE)) == -1)
                continue;
            if (size > OBP_MAXPROPNAME) {
                cmn_err(CE_CONT, "node %x '%s' prop too "
                        "big\n", curnode, OBP_DEVICETYPE);
                continue;
            }
            if (GETPROP(curnode, OBP_DEVICETYPE,
                        devtype_buf) == -1) {
                cmn_err(CE_CONT, "node %x '%s' get failed\n",
                        curnode, OBP_DEVICETYPE);
                continue;
            }
            if (strcmp(devtype_buf, "memory-controller") != 0)
                continue;
            /*
             * ...else fall thru and process the node...
             */
        }
        bzero(tmp_name, MAXSYSNAME);
        if (GETPROP(curnode, OBP_NAME, (caddr_t)tmp_name) != -1)
            fill_address(curnode, tmp_name);
        if (GETPROP(curnode, OBP_DEVICETYPE, tmp_name) != -1 &&
                strcmp(tmp_name, "cpu") == 0) {
            fill_cpu(curnode);
        }
        if (strcmp(tmp_name, "tod") == 0)
            have_tod(curnode);
        if (sok && (strcmp(tmp_name, "memory-controller") == 0) &&
                (&plat_fill_mc != NULL))
            plat_fill_mc(curnode);
        map_wellknown(curnode);
    }
}
Пример #16
0
void forward_lstm_layer(layer l, network state) {
	network s = { 0 };
	s.train = state.train;
	int i;
	layer wf = *(l.wf);
	layer wi = *(l.wi);
	layer wg = *(l.wg);
	layer wo = *(l.wo);

	layer uf = *(l.uf);
	layer ui = *(l.ui);
	layer ug = *(l.ug);
	layer uo = *(l.uo);

	fill_cpu(l.outputs * l.batch * l.steps, 0, wf.delta, 1);
	fill_cpu(l.outputs * l.batch * l.steps, 0, wi.delta, 1);
	fill_cpu(l.outputs * l.batch * l.steps, 0, wg.delta, 1);
	fill_cpu(l.outputs * l.batch * l.steps, 0, wo.delta, 1);

	fill_cpu(l.outputs * l.batch * l.steps, 0, uf.delta, 1);
	fill_cpu(l.outputs * l.batch * l.steps, 0, ui.delta, 1);
	fill_cpu(l.outputs * l.batch * l.steps, 0, ug.delta, 1);
	fill_cpu(l.outputs * l.batch * l.steps, 0, uo.delta, 1);
	if (state.train) {
		fill_cpu(l.outputs * l.batch * l.steps, 0, l.delta, 1);
	}

	for (i = 0; i < l.steps; ++i) {
		s.input = l.h_cpu;
		forward_connected_layer(wf, s);
		forward_connected_layer(wi, s);
		forward_connected_layer(wg, s);
		forward_connected_layer(wo, s);

		s.input = state.input;
		forward_connected_layer(uf, s);
		forward_connected_layer(ui, s);
		forward_connected_layer(ug, s);
		forward_connected_layer(uo, s);

		copy_cpu(l.outputs * l.batch, wf.output, 1, l.f_cpu, 1);
		axpy_cpu(l.outputs * l.batch, 1, uf.output, 1, l.f_cpu, 1);

		copy_cpu(l.outputs * l.batch, wi.output, 1, l.i_cpu, 1);
		axpy_cpu(l.outputs * l.batch, 1, ui.output, 1, l.i_cpu, 1);

		copy_cpu(l.outputs * l.batch, wg.output, 1, l.g_cpu, 1);
		axpy_cpu(l.outputs * l.batch, 1, ug.output, 1, l.g_cpu, 1);

		copy_cpu(l.outputs * l.batch, wo.output, 1, l.o_cpu, 1);
		axpy_cpu(l.outputs * l.batch, 1, uo.output, 1, l.o_cpu, 1);

		activate_array(l.f_cpu, l.outputs * l.batch, LOGISTIC);
		activate_array(l.i_cpu, l.outputs * l.batch, LOGISTIC);
		activate_array(l.g_cpu, l.outputs * l.batch, TANH);
		activate_array(l.o_cpu, l.outputs * l.batch, LOGISTIC);

		copy_cpu(l.outputs * l.batch, l.i_cpu, 1, l.temp_cpu, 1);
		mul_cpu(l.outputs * l.batch, l.g_cpu, 1, l.temp_cpu, 1);
		mul_cpu(l.outputs * l.batch, l.f_cpu, 1, l.c_cpu, 1);
		axpy_cpu(l.outputs * l.batch, 1, l.temp_cpu, 1, l.c_cpu, 1);

		copy_cpu(l.outputs * l.batch, l.c_cpu, 1, l.h_cpu, 1);
		activate_array(l.h_cpu, l.outputs * l.batch, TANH);
		mul_cpu(l.outputs * l.batch, l.o_cpu, 1, l.h_cpu, 1);

		copy_cpu(l.outputs * l.batch, l.c_cpu, 1, l.cell_cpu, 1);
		copy_cpu(l.outputs * l.batch, l.h_cpu, 1, l.output, 1);

		state.input += l.inputs * l.batch;
		l.output += l.outputs * l.batch;
		l.cell_cpu += l.outputs * l.batch;

		increment_layer(&wf, 1);
		increment_layer(&wi, 1);
		increment_layer(&wg, 1);
		increment_layer(&wo, 1);

		increment_layer(&uf, 1);
		increment_layer(&ui, 1);
		increment_layer(&ug, 1);
		increment_layer(&uo, 1);
	}
}
Пример #17
0
void forward_iseg_layer(const layer l, network net) {

	double time = what_time_is_it_now();
	int i, b, j, k;
	int ids = l.extra;
	memcpy(l.output, net.input, l.outputs * l.batch * sizeof(real_t));
	memset(l.delta, 0, l.outputs * l.batch * sizeof(real_t));

#ifndef GPU
	for (b = 0; b < l.batch; ++b) {
		int index = b * l.outputs;
		activate_array(l.output + index, l.classes * l.w * l.h, LOGISTIC);
	}
#endif

	for (b = 0; b < l.batch; ++b) {
		// a priori, each pixel has no class
		for (i = 0; i < l.classes; ++i) {
			for (k = 0; k < l.w * l.h; ++k) {
				int index = b * l.outputs + i * l.w * l.h + k;
				l.delta[index] = 0 - l.output[index];
			}
		}

		// a priori, embedding should be small magnitude
		for (i = 0; i < ids; ++i) {
			for (k = 0; k < l.w * l.h; ++k) {
				int index = b * l.outputs + (i + l.classes) * l.w * l.h + k;
				l.delta[index] = .1 * (0 - l.output[index]);
			}
		}

		memset(l.counts, 0, 90 * sizeof(int));
		for (i = 0; i < 90; ++i) {
			fill_cpu(ids, 0, l.sums[i], 1);

			int c = net.truth[b * l.truths + i * (l.w * l.h + 1)];
			if (c < 0)
				break;
			// add up metric embeddings for each instance
			for (k = 0; k < l.w * l.h; ++k) {
				int index = b * l.outputs + c * l.w * l.h + k;
				real_t v = net.truth[b * l.truths + i * (l.w * l.h + 1) + 1 + k];
				if (v) {
					l.delta[index] = v - l.output[index];
					axpy_cpu(ids, 1,
							l.output + b * l.outputs + l.classes * l.w * l.h
									+ k, l.w * l.h, l.sums[i], 1);
					++l.counts[i];
				}
			}
		}

		real_t *mse = calloc(90, sizeof(real_t));
		for (i = 0; i < 90; ++i) {
			int c = net.truth[b * l.truths + i * (l.w * l.h + 1)];
			if (c < 0)
				break;
			for (k = 0; k < l.w * l.h; ++k) {
				real_t v = net.truth[b * l.truths + i * (l.w * l.h + 1) + 1 + k];
				if (v) {
					int z;
					real_t sum = 0;
					for (z = 0; z < ids; ++z) {
						int index = b * l.outputs + (l.classes + z) * l.w * l.h
								+ k;
						sum += pow(l.sums[i][z] / l.counts[i] - l.output[index],
								2);
					}
					mse[i] += sum;
				}
			}
			mse[i] /= l.counts[i];
		}

		// Calculate average embedding
		for (i = 0; i < 90; ++i) {
			if (!l.counts[i])
				continue;
			scal_cpu(ids, 1.f / l.counts[i], l.sums[i], 1);
			if (b == 0 && net.gpu_index == 0) {
				printf("%4d, %6.3f, ", l.counts[i], mse[i]);
				for (j = 0; j < ids; ++j) {
					printf("%6.3f,", l.sums[i][j]);
				}
				printf("\n");
			}
		}
		free(mse);

		// Calculate embedding loss
		for (i = 0; i < 90; ++i) {
			if (!l.counts[i])
				continue;
			for (k = 0; k < l.w * l.h; ++k) {
				real_t v = net.truth[b * l.truths + i * (l.w * l.h + 1) + 1 + k];
				if (v) {
					for (j = 0; j < 90; ++j) {
						if (!l.counts[j])
							continue;
						int z;
						for (z = 0; z < ids; ++z) {
							int index = b * l.outputs
									+ (l.classes + z) * l.w * l.h + k;
							real_t diff = l.sums[j][z] - l.output[index];
							if (j == i)
								l.delta[index] += diff < 0 ? -.1 : .1;
							else
								l.delta[index] += -(diff < 0 ? -.1 : .1);
						}
					}
				}
			}
		}

		for (i = 0; i < ids; ++i) {
			for (k = 0; k < l.w * l.h; ++k) {
				int index = b * l.outputs + (i + l.classes) * l.w * l.h + k;
				l.delta[index] *= .01;
			}
		}
	}

	*(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2);
	printf("took %lf sec\n", what_time_is_it_now() - time);
}