// fills vscale and vshift with values so that algorithm performs
    // vdst = vscale * vsrc + vbeta next;
    void compute_vscaleshift(const Vmm &vscale, const Vmm &vshift,
            const Vmm &vmean, const Vmm &vsqrtvar, size_t offt,
            bool need_tail = false) {
        if (need_tail) {
            uni_vmovups_tail(vmean, mean_ptr(offt));
            uni_vmovups_tail(vsqrtvar, var_ptr(offt));
        } else {
            uni_vmovups(vmean, mean_ptr(offt));
            uni_vmovups(vsqrtvar, var_ptr(offt));
        }
        uni_vaddps(vsqrtvar, vsqrtvar, veps);
        uni_vsqrtps(vsqrtvar, vsqrtvar);

        if (bdesc_->use_scaleshift()) {
            if (need_tail) {
                uni_vmovups_tail(vscale, scale_ptr(offt));
                uni_vmovups_tail(vshift, shift_ptr(offt));
            } else {
                uni_vmovups(vscale, scale_ptr(offt));
                uni_vmovups(vshift, shift_ptr(offt));
            }
            vdivps(vscale, vscale, vsqrtvar);
            uni_vfnmadd231ps(vshift, vmean, vscale);
        } else {
            vdivps(vscale, vone, vsqrtvar);
            uni_vmulps(vmean, vmean, vscale);
            uni_vsubps(vshift, vzero, vmean);
        }
    };
示例#2
0
文件: sdcg.c 项目: berkus/lang-e
int main(int argc, char *argv[]) { 
	int i,j,trials=1, verbose=0,n = 3;
	data_t **a,**b,mask=1023,c = 7;
	v_vptr scale_ptr;

    	/* get options */
    	for (i=1; i<argc; i++)
      		if (strcmp(argv[i],"-n") == 0) n = atoi(argv[++i]);
      		else if (strcmp(argv[i],"-t") == 0) trials = atoi(argv[++i]);
      		else if (strcmp(argv[i],"-m") == 0) mask = atoi(argv[++i]);
      		else if (strcmp(argv[i],"-c") == 0) c = atoi(argv[++i]);
      		else if (strcmp(argv[i],"-v") == 0) verbose = 1;
      		else if (strcmp(argv[i],"-d") == 0) disass = 1;
      		else if (strcmp(argv[i],"-p") == 0) pixie = 1;
      		else if (strcmp(argv[i],"-s") == 0) strength_reduce = 0;

	/* allocate and initialize */
	a = new_matrix(n,n);
	b = new_matrix(n,n);
    	for(i=0;i<n;i++) {
      		for(j=0;j<n;j++) {
                	b[i][j] = (random() & mask) + 1;
		}
	}
	if(verbose) {
		printf("constant = %u\n",c);
		m_print("B", b, n);
	}

	scale_ptr = scale(n, a, b);
        if(verbose) printf("doing constants 1 to %d\n",c);
        for(i=0;i<trials;i++) {
                int c1;
                if(!verbose) startwatch(0);
		for(c1 = 0; c1 < c; c1++)
                        scale_ptr(a, b, c1);
                if(!verbose) stopwatch(0);
        }
	if(verbose) m_print("A", a, n);
	return 0;
}