Beispiel #1
0
static void open_ta()
{
	TEEC_Result res;
	TEEC_UUID uuid = TA_SHA_PERF_UUID;
	uint32_t err_origin;

	res = TEEC_InitializeContext(NULL, &ctx);
	check_res(res,"TEEC_InitializeContext");

	res = TEEC_OpenSession(&ctx, &sess, &uuid, TEEC_LOGIN_PUBLIC, NULL,
			       NULL, &err_origin);
	check_res(res,"TEEC_OpenSession");
}
Beispiel #2
0
static void alloc_shm(size_t sz, uint32_t algo)
{
	TEEC_Result res;

	in_shm.buffer = NULL;
	in_shm.size = sz + offset;
	res = TEEC_AllocateSharedMemory(&ctx, &in_shm);
	check_res(res, "TEEC_AllocateSharedMemory");

	out_shm.buffer = NULL;
	out_shm.size = hash_size(algo);
	res = TEEC_AllocateSharedMemory(&ctx, &out_shm);
	check_res(res, "TEEC_AllocateSharedMemory");
}
    void freceive_more_async(boost::asio::io_service & ios, aziomq::socket & socket,
                             const const_buf_vec & expected_bufs, int flags) {
        SYNC_LOG(__PRETTY_FUNCTION__);
         //create vector of raw bufs to fill from length of expected_bufs
        buf_vec_t buf_vec(std::distance(std::begin(expected_bufs),
                                        std::end(expected_bufs)));
        zero(buf_vec);

         //create azio buffer vector
        mutable_buf_vec bufs;
        init(bufs, buf_vec);

        auto it = std::begin(bufs);
        auto e = std::end(bufs);

        std::exception_ptr err;
        socket.async_receive_more(boost::asio::buffer(*it++),
                [&ios, &socket, &bufs, &it, e, &err, expected_bufs](const boost::system::error_code & ec,
                                                                   aziomq::socket::more_result mr) {
                    if (ec) {
                        err = std::make_exception_ptr(boost::system::system_error(ec));
                    } else {
                        for (; mr.second && it != e; ++it) {
                            mr = socket.receive_more(boost::asio::buffer(*it));
                            err = check_res(bufs, expected_bufs);
                        }
                    }
                    ios.stop();
                });
        ios.run();
        if (err != std::exception_ptr())
            std::rethrow_exception(err);
    }
    void freceive_async(boost::asio::io_service & ios, aziomq::socket & socket,
                            const const_buf_vec & expected_bufs, int flags) {
        SYNC_LOG(__PRETTY_FUNCTION__);
         //create vector of raw bufs to fill from length of expected_bufs
        buf_vec_t buf_vec(std::distance(std::begin(expected_bufs),
                                        std::end(expected_bufs)));
        zero(buf_vec);

         //create azio buffer vector
        mutable_buf_vec bufs;
        init(bufs, buf_vec);

        std::exception_ptr err;
        socket.async_receive(bufs,
                [&ios, &bufs, &err, expected_bufs](const boost::system::error_code & ec, size_t bytes_transferred) {
                    if (ec) {
                        err = std::make_exception_ptr(boost::system::system_error(ec));
                    } else {
                        err = check_res(bufs, expected_bufs);
                    }
                    ios.stop();
                });
        ios.run();
        if (err != std::exception_ptr())
            std::rethrow_exception(err);
    }
Beispiel #5
0
static
int little_check_reserved_lock(sqlite3_file *file) {
  little_file *self = (little_file*)file;
  int res = check_res(self->name);
  trace("RESERVED: %d\n", res);
  return res;
}
 std::exception_ptr check_res(const mutable_buf_vec & bufs, const const_buf_vec & expected_bufs) {
     auto expected_len = std::distance(std::begin(expected_bufs), std::end(expected_bufs));
     auto actual_len = std::distance(std::begin(bufs), std::end(bufs));
     if (expected_len != actual_len) {
         std::ostringstream stm;
         stm << boost::format("Expecting %1% buffers, got %2% buffers") % expected_len % actual_len;
         return std::make_exception_ptr(std::runtime_error(stm.str()));
     }
     return check_res(std::begin(bufs), std::end(bufs), std::begin(expected_bufs));
 }
void *th_func_first(void *arg)
{
  int i;
  for (i = 2; i <= LIM; i += 2)
  {
    check_res(sem_wait(&bin_sem), SEM_ERR_WAIT, NULL);
    counter = i;
    printf("TH1: %i\n", counter);
  }
  pthread_exit(NULL);
}
void *th_func_second(void *arg)
{
  int i;
  for (i = 1; i <= LIM; i += 2)
  {
    counter = i;
    printf("TH2: %i\n", counter);
    check_res(sem_post(&bin_sem), SEM_ERR_POST, NULL);
    while (counter % 2) sleep(1);
  }
  pthread_exit(NULL);
}
Beispiel #9
0
static void prepare_op()
{
	TEEC_Result res;
	uint32_t ret_origin;
	TEEC_Operation op;

	memset(&op, 0, sizeof(op));
	op.paramTypes = TEEC_PARAM_TYPES(TEEC_VALUE_INPUT, TEEC_NONE,
					 TEEC_NONE, TEEC_NONE);
	op.params[0].value.a = algo;
	res = TEEC_InvokeCommand(&sess, TA_SHA_PERF_CMD_PREPARE_OP, &op,
				 &ret_origin);
	check_res(res, "TEEC_InvokeCommand");
}
Beispiel #10
0
static uint64_t run_test_once(void *in, size_t size, TEEC_Operation *op,
			  unsigned int l)
{
	struct timespec t0, t1;
	TEEC_Result res;
	uint32_t ret_origin;

	if (random_in)
		read_random(in, size);
	get_current_time(&t0);
	res = TEEC_InvokeCommand(&sess, TA_SHA_PERF_CMD_PROCESS, op,
				 &ret_origin);
	check_res(res, "TEEC_InvokeCommand");
	get_current_time(&t1);

	return timespec_diff_ns(&t0, &t1);
}
Beispiel #11
0
    void freceive_sync(boost::asio::io_service &, aziomq::socket & socket,
                            const const_buf_vec & expected_bufs, int flags) {
        SYNC_LOG(__PRETTY_FUNCTION__);
        // create vector of raw bufs to fill from length of expected_bufs
        buf_vec_t buf_vec(std::distance(std::begin(expected_bufs),
                                        std::end(expected_bufs)));
        zero(buf_vec);

        // create azio buffer vector
        mutable_buf_vec bufs;
        init(bufs, buf_vec);

        socket.receive(bufs, flags);
        auto e = check_res(bufs, expected_bufs);
        if (e != std::exception_ptr())
            std::rethrow_exception(e);
    }
Beispiel #12
0
    void freceive_more_sync(boost::asio::io_service &, aziomq::socket & socket,
                            const const_buf_vec & expected_bufs, int flags) {
        SYNC_LOG(__PRETTY_FUNCTION__);
        // create vector of raw bufs to fill from length of expected_bufs
        buf_vec_t buf_vec(std::distance(std::begin(expected_bufs),
                                        std::end(expected_bufs)));
        zero(buf_vec);

        // create azio buffer vector
        mutable_buf_vec bufs;
        init(bufs, buf_vec);

        aziomq::socket::more_result mr = std::make_pair(0, true);
        for (auto it = std::begin(bufs), e = std::end(bufs); mr.second && it != e; ++it) {
            mr = socket.receive_more(boost::asio::buffer(*it), flags);
        }

        auto e = check_res(bufs, expected_bufs);
        if (e != std::exception_ptr())
            std::rethrow_exception(e);
    }
Beispiel #13
0
int main(int argc, char *argv[])
{
  pthread_t th_first, th_second;

  counter = 0;
  check_res(sem_init(&bin_sem, 0, 0), SEM_ERR_INIT, NULL);

  check_res(
      pthread_create(&th_first, NULL, th_func_first, NULL), TH_ERR_TMPL_CREATION, TH_FIRST);
  check_res(
    pthread_create(&th_second, NULL, th_func_second, NULL), TH_ERR_TMPL_CREATION, TH_SECOND);

  check_res(pthread_join(th_first, NULL), TH_ERR_TMPL_JOIN, TH_FIRST);
  check_res(pthread_join(th_second, NULL), TH_ERR_TMPL_JOIN, TH_SECOND);

  check_res(sem_destroy(&bin_sem), SEM_ERR_DESTROY, NULL);

  return EXIT_SUCCESS;
}
Beispiel #14
0
void init_gl(const opt_data *opt_data, int width, int height)
{
	if(!ogl_LoadFunctions()) {
		fprintf(stderr, "ERROR: Failed to load GL extensions\n");
		exit(EXIT_FAILURE);
	}
	CHECK_GL_ERR;
	if(!(ogl_GetMajorVersion() > 1 || ogl_GetMinorVersion() >= 4)) {
		fprintf(stderr, "ERROR: Your OpenGL Implementation is too old\n");
		exit(EXIT_FAILURE);
	}

	opts = opt_data;
	GLboolean force_fixed = opts->gl_opts != NULL && strstr(opts->gl_opts, "fixed") != NULL;
	GLboolean res_boost = opts->gl_opts != NULL && strstr(opts->gl_opts, "rboost") != NULL;
	packed_intesity_pixels = opts->gl_opts != NULL && strstr(opts->gl_opts, "pintens") != NULL;
	scr_w = width; scr_h = height;
	if(opts->fullscreen) im_w = scr_w, im_h=scr_h;
	else im_w = IMAX(make_pow2(IMAX(scr_w, scr_h)), 128)<<res_boost; im_h = im_w;
	while(!check_res(im_w, im_h)) { // shrink textures until they work
		printf(" %ix%i Too big! Shrink texture\n", im_h, im_w);
		im_w = im_w/2;
		im_h = im_h/2;
	}

	printf("Using internel resolution of %ix%i\n\n", im_h, im_w);

	CHECK_GL_ERR;
	setup_viewport(scr_w, scr_h); CHECK_GL_ERR;
	//glEnable(GL_LINE_SMOOTH); CHECK_GL_ERR;

	glClear(GL_COLOR_BUFFER_BIT); CHECK_GL_ERR;
	glRasterPos2f(-1,1 - 20.0f/(scr_h*0.5f));
	//draw_string("Loading... "); swap_buffers(); CHECK_GL_ERR;

	printf("GL_VENDOR: %s\n", glGetString(GL_VENDOR));
	printf("GL_RENDERER: %s\n", glGetString(GL_RENDERER));
	printf("GL_VERSION: %s\n", glGetString(GL_VERSION));
	if(ogl_ext_ARB_shading_language_100) printf("GL_SL_VERSION: %s\n", glGetString(GL_SHADING_LANGUAGE_VERSION_ARB));
	printf("GL_EXTENSIONS: %s\n", glGetString(GL_EXTENSIONS));
	printf("\n\n");

/*
        void DebugMessageControlARB(enum source,
                                    enum type,
                                    enum severity,
                                    sizei count,
                                    const uint* ids,
                                    boolean enabled);
 */
	if(ogl_ext_ARB_debug_output) {
		glDebugMessageCallbackARB((GLDEBUGPROCARB)gl_debug_callback, NULL);
#if DEBUG
		glDebugMessageControlARB(GL_DONT_CARE,
		                         GL_DONT_CARE,
		                         GL_DONT_CARE,
		                         0, NULL,
		                         GL_TRUE);
		glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS_ARB);
#else
		glDebugMessageControlARB(GL_DONT_CARE,
		                         GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR_ARB,
		                         GL_DONT_CARE,
		                         0, NULL,
		                         GL_TRUE);
		glDebugMessageControlARB(GL_DONT_CARE,
		                         GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR_ARB,
		                         GL_DONT_CARE,
		                         0, NULL,
		                         GL_TRUE);
#endif
		printf("Have ARB_debug_output: registered callback\n");
	}
	CHECK_GL_ERR;

	// for ES we need:
	//    EXT_blend_minmax
	//    EXT_texture_format_BGRA8888

	// optionally we should use
	//    EXT_texture_rg + OES_texture_float/OES_texture_half_float
	//    OES_mapbuffer

/*	if(!ogl_ext_EXT_blend_minmax) { // can stop checking for this, it's in OpenGL 1.4*/
/*		printf("missing required gl extension EXT_blend_minmax!\n");*/
/*		exit(1);*/
/*	}*/
	if(!ogl_ext_EXT_framebuffer_object && !ogl_ext_ARB_framebuffer_object) {
		printf("missing required gl extension EXT_framebuffer_object!\n");
		exit(1);
	}

	// TODO: this should also pass if we have GL 2.0+
	if(!ogl_ext_ARB_shading_language_100 && !(ogl_ext_ARB_fragment_shader && ogl_ext_ARB_vertex_shader && ogl_ext_ARB_shader_objects)) {
		if(!ogl_ext_ARB_pixel_buffer_object)
			printf("Missing GLSL and no pixel buffer objects, WILL be slow!\n");
		else
			printf("No GLSL using all fixed function! (might be slow)\n");
	}

	if(force_fixed) {
		printf("Fixed function code forced\n");
		packed_intesity_pixels = GL_FALSE;
	}
	CHECK_GL_ERR;

	if(packed_intesity_pixels) printf("Packed intensity enabled\n");

	glEnable(GL_TEXTURE_2D); CHECK_GL_ERR;

	init_mandel(); CHECK_GL_ERR;

	if(!force_fixed) glfract = fractal_glsl_init(opts, im_w, im_h, packed_intesity_pixels);
	if(!glfract) glfract = fractal_fixed_init(opts, im_w, im_h);
	CHECK_GL_ERR;

	if(!force_fixed) glmaxsrc = maxsrc_new_glsl(IMAX(im_w>>res_boost, 256), IMAX(im_h>>res_boost, 256), packed_intesity_pixels);
	if(!glmaxsrc) glmaxsrc = maxsrc_new_fixed(IMAX(im_w>>res_boost, 256), IMAX(im_h>>res_boost, 256));
	CHECK_GL_ERR;

	if(!force_fixed) glpal = pal_init_glsl(packed_intesity_pixels);
	if(!glpal) glpal = pal_init_fixed(im_w, im_h);
	CHECK_GL_ERR;

	pd = new_point_data(opts->rational_julia?4:2);

	memset(frametimes, 0, sizeof(frametimes));
	totframetime = frametimes[0] = MAX(10000000/opts->draw_rate, 1);
	memset(worktimes, 0, sizeof(worktimes));
	totworktime = worktimes[0] = MIN(10000000/opts->draw_rate, 1);
	tick0 = uget_ticks();
}
int main()
{
    int i;

    init_double(old_dst.d, 4, 3.14);	/* Initialize memory and the registers */

    vind.i[0] = 0;
    vind.i[1] = 2;
    vind.i[2] = 3;
    vind.i[3] = 1;
    vind.i[4] = 7;
    vind.i[5] = 11;
    vind.i[6] = 13;
    vind.i[7] = 10;

    mask.i[0] = 0x80ffffff;
    mask.i[1] = 0x00ffffff;
    mask.i[2] = 0x00ffffff;
    mask.i[3] = 0x80ffffff;
    mask.i[4] = 0x80ffffff;
    mask.i[5] = 0x00ffffff;
    mask.i[6] = 0x80ffffff;
    mask.i[7] = 0x80ffffff;

    for (i = 0; i < 8; i++) {
        expect.f[i] = (mask.i[i] & 0x80000000) ? *(float *)(ptr+(vind.i[i] * 4 + 0))
                                               : old_dst.f[i];
    }

    __asm {     /* VGATHERDPS ymm1, [rax + ymm2_vind*4], ymm3_mask */
        lea REG, ADDRPTR [arr-4] /* the memory rewrite of the vgather will add 4 back to the address */
        vmovups YMMa, [old_dst.ms];
        vmovups YMMb, [vind.ms];
        vmovups YMMc, [mask.ms];
        vmovups [YMMDestBefore.ms],  YMMa;
        vmovups [YMMIndexBefore.ms], YMMb;
        vmovups [YMMMaskBefore.ms],  YMMc;
        vgatherdps YMMa, [REG + YMMb*4], YMMc
        vmovups [d.ms], YMMa;
        vmovups [YMMIndexAfter.ms], YMMb;
        vmovups [YMMMaskAfter.ms],  YMMc;
    }
    printVl("YMM dest  before:     ", YMMDestBefore.l);
    printVl("YMM dest  after:      ", d.l);
    printVl("YMM index before:     ", YMMIndexBefore.l);
    printVl("YMM index after:      ", YMMIndexAfter.l);
    printVl("YMM mask  before:     ", YMMMaskBefore.l);
    printVl("YMM mask  after:      ", YMMMaskAfter.l);
    check_res(1);

    for (i = 4; i < 8; i++) {
        expect.f[i] = 0;
    }
    __asm {     /* VGATHERDPS xmm1, [rax + xmm2_vind*4], xmm3_mask */
        lea REG, ADDRPTR [arr-4] /* the memory rewrite of the vgather will add 4 back to the address */
        vmovups YMMa, [old_dst.ms];
        vmovups YMMb, [vind.ms];
        vmovups YMMc, [mask.ms];
        vmovups [YMMDestBefore.ms],  YMMa;
        vmovups [YMMIndexBefore.ms], YMMb;
        vmovups [YMMMaskBefore.ms],  YMMc;
        vgatherdps XMMa, [REG + XMMb*4], XMMc
        vmovups [d.ms], YMMa;
        vmovups [YMMIndexAfter.ms], YMMb;
        vmovups [YMMMaskAfter.ms],  YMMc;
    }
    printVl("YMM dest  before:     ", YMMDestBefore.l);
    printVl("YMM dest  after:      ", d.l);
    printVl("YMM index before:     ", YMMIndexBefore.l);
    printVl("YMM index after:      ", YMMIndexAfter.l);
    printVl("YMM mask  before:     ", YMMMaskBefore.l);
    printVl("YMM mask  after:      ", YMMMaskAfter.l);
    check_res(2);

    for (i = 0; i < 8; i++) {
        expect.f[i] = (mask.i[i] & 0x80000000) ? *(float *)(ptr+(vind.i[i] * 4 + 8))
                                               : old_dst.f[i];
    }
    __asm {     /* VGATHERDPS ymm1, [rax + ymm2_vind*4 + 8], ymm3_mask */
        lea REG, ADDRPTR [arr-4] /* the memory rewrite of the vgather will add 4 back to the address */
        vmovups YMMa, [old_dst.ms];
        vmovups YMMb, [vind.ms];
        vmovups YMMc, [mask.ms];
        vmovups [YMMDestBefore.ms],  YMMa;
        vmovups [YMMIndexBefore.ms], YMMb;
        vmovups [YMMMaskBefore.ms],  YMMc;
        vgatherdps YMMa, [REG + YMMb*4 + 8], YMMc
        vmovups [d.ms], YMMa;
        vmovups [YMMIndexAfter.ms], YMMb;
        vmovups [YMMMaskAfter.ms],  YMMc;
    }
    printVl("YMM dest  before:     ", YMMDestBefore.l);
    printVl("YMM dest  after:      ", d.l);
    printVl("YMM index before:     ", YMMIndexBefore.l);
    printVl("YMM index after:      ", YMMIndexAfter.l);
    printVl("YMM mask  before:     ", YMMMaskBefore.l);
    printVl("YMM mask  after:      ", YMMMaskAfter.l);
    check_res(3);

    for (i = 4; i < 8; i++) {
        expect.f[i] = 0;
    }
    __asm {     /* VGATHERDPS xmm1, [rax + xmm2_vind*4 + 8], xmm3_mask */
        lea REG, ADDRPTR [arr-4] /* the memory rewrite of the vgather will add 4 back to the address */
        vmovups YMMa, [old_dst.ms];
        vmovups YMMb, [vind.ms];
        vmovups YMMc, [mask.ms];
        vmovups [YMMDestBefore.ms],  YMMa;
        vmovups [YMMIndexBefore.ms], YMMb;
        vmovups [YMMMaskBefore.ms],  YMMc;
        vgatherdps XMMa, [REG + XMMb*4 + 8], XMMc
        vmovups [d.ms], YMMa;
        vmovups [YMMIndexAfter.ms], YMMb;
        vmovups [YMMMaskAfter.ms],  YMMc;
    }
    printVl("YMM dest  before:     ", YMMDestBefore.l);
    printVl("YMM dest  after:      ", d.l);
    printVl("YMM index before:     ", YMMIndexBefore.l);
    printVl("YMM index after:      ", YMMIndexAfter.l);
    printVl("YMM mask  before:     ", YMMMaskBefore.l);
    printVl("YMM mask  after:      ", YMMMaskAfter.l);
    check_res(4);

    if(!res) PRINTF("gatherdps passed\n");
    return res;
}
Beispiel #16
0
static int config(uint32_t width, uint32_t height,
                       uint32_t d_width, uint32_t d_height,
                       uint32_t flags, char *title, uint32_t format)
{

    int x_off, y_off;
    int wanted_width, wanted_height;

    static unsigned char *vo_dga_base;
    static int prev_width, prev_height;

#ifdef HAVE_DGA2
    // needed to change DGA video mode
    int mX = VO_DGA_INVALID_RES, mY = VO_DGA_INVALID_RES, mVBI =
        100000, mMaxY = 0, i, j = 0;
    int dga_modenum;
    XDGAMode *modeline;
    XDGADevice *dgadevice;
#else
#ifdef HAVE_XF86VM
    unsigned int vm_event, vm_error;
    unsigned int vm_ver, vm_rev;
    int i, j = 0, have_vm = 0;
    int mX = VO_DGA_INVALID_RES, mY = VO_DGA_INVALID_RES, mVBI =
        100000, mMaxY = 0, dga_modenum;
#endif
    int bank, ram;
#endif

    vo_dga_src_format = format;

    wanted_width = d_width;
    wanted_height = d_height;

    if (!wanted_height)
        wanted_height = height;
    if (!wanted_width)
        wanted_width = width;

    if (!vo_dbpp)
    {
        if ((format & IMGFMT_BGR_MASK) == IMGFMT_BGR)
        {
            vo_dga_src_mode = vd_ModeValid(format & 0xff);
        }
    } else
    {
        vo_dga_src_mode = vd_ModeValid(vo_dbpp);
    }
    vo_dga_hw_mode = SRC_MODE.vdm_hw_mode;

    if (!vo_dga_src_mode)
    {
        mp_msg(MSGT_VO, MSGL_ERR, "vo_dga: unsupported video format!\n");
        return 1;
    }

    vo_dga_vp_width = vo_screenwidth;
    vo_dga_vp_height = vo_screenheight;

    mp_msg(MSGT_VO, MSGL_V, "vo_dga: XServer res: %dx%d\n",
           vo_dga_vp_width, vo_dga_vp_height);

// choose a suitable mode ...

#ifdef HAVE_DGA2
// Code to change the video mode added by Michael Graffam
// [email protected]

    mp_msg(MSGT_VO, MSGL_V, "vo_dga: vo_modelines=%p, vo_modecount=%d\n",
           vo_modelines, vo_modecount);

    if (vo_modelines == NULL)
    {
        mp_msg(MSGT_VO, MSGL_ERR, "vo_dga: can't get modelines\n");
        return 1;
    }

    mp_msg(MSGT_VO, MSGL_INFO,
           "vo_dga: DGA 2.0 available :-) Can switch resolution AND depth!\n");
    for (i = 0; i < vo_modecount; i++)
    {
        if (vd_ModeEqual(vo_modelines[i].depth,
                         vo_modelines[i].bitsPerPixel,
                         vo_modelines[i].redMask,
                         vo_modelines[i].greenMask,
                         vo_modelines[i].blueMask, vo_dga_hw_mode))
        {

            mp_msg(MSGT_VO, MSGL_V, "maxy: %4d, depth: %2d, %4dx%4d, ",
                   vo_modelines[i].maxViewportY, vo_modelines[i].depth,
                   vo_modelines[i].imageWidth,
                   vo_modelines[i].imageHeight);
            if (check_res
                (i, wanted_width, wanted_height, vo_modelines[i].depth,
                 vo_modelines[i].viewportWidth,
                 vo_modelines[i].viewportHeight,
                 (unsigned) vo_modelines[i].verticalRefresh,
                 vo_modelines[i].maxViewportY, &mX, &mY, &mVBI, &mMaxY))
                j = i;
        }
    }
    mp_msg(MSGT_VO, MSGL_INFO,
           "vo_dga: Selected hardware mode %4d x %4d @ %3d Hz @ depth %2d, bitspp %2d.\n",
           mX, mY, mVBI, HW_MODE.vdm_depth, HW_MODE.vdm_bitspp);
    mp_msg(MSGT_VO, MSGL_INFO,
           "vo_dga: Video parameters by codec: %3d x %3d, depth %2d, bitspp %2d.\n",
           width, height, SRC_MODE.vdm_depth, SRC_MODE.vdm_bitspp);
    vo_dga_vp_width = mX;
    vo_dga_vp_height = mY;

    if ((flags & VOFLAG_SWSCALE) || (flags & VOFLAG_FULLSCREEN))
    {                           /* -zoom or -fs */
        scale_dstW = (d_width + 7) & ~7;
        scale_dstH = d_height;
        scale_srcW = width;
        scale_srcH = height;
        aspect_save_screenres(mX, mY);
        aspect_save_orig(scale_srcW, scale_srcH);
        aspect_save_prescale(scale_dstW, scale_dstH);
        if (flags & VOFLAG_FULLSCREEN)       /* -fs */
            aspect(&scale_dstW, &scale_dstH, A_ZOOM);
        else if (flags & VOFLAG_SWSCALE)  /* -fs */
            aspect(&scale_dstW, &scale_dstH, A_NOZOOM);
        mp_msg(MSGT_VO, MSGL_INFO,
               "vo_dga: Aspect corrected size for SwScaler: %4d x %4d.\n",
               scale_dstW, scale_dstH);
        /* XXX this is a hack, but I'm lazy ;-) :: atmos */
        width = scale_dstW;
        height = scale_dstH;
    }

    vo_dga_width = vo_modelines[j].bytesPerScanline / HW_MODE.vdm_bytespp;
    dga_modenum = vo_modelines[j].num;
    modeline = vo_modelines + j;

#else

#ifdef HAVE_XF86VM

    mp_msg(MSGT_VO, MSGL_INFO,
           "vo_dga: DGA 1.0 compatibility code: Using XF86VidMode for mode switching!\n");

    if (XF86VidModeQueryExtension(mDisplay, &vm_event, &vm_error))
    {
        XF86VidModeQueryVersion(mDisplay, &vm_ver, &vm_rev);
        mp_msg(MSGT_VO, MSGL_INFO,
               "vo_dga: XF86VidMode Extension v%i.%i\n", vm_ver, vm_rev);
        have_vm = 1;
    } else
    {
        mp_msg(MSGT_VO, MSGL_ERR,
               "vo_dga: XF86VidMode Extension not available.\n");
    }

#define GET_VREFRESH(dotclk, x, y)( (((dotclk)/(x))*1000)/(y) )

    if (have_vm)
    {
        int modecount;

        XF86VidModeGetAllModeLines(mDisplay, mScreen, &modecount,
                                   &vo_dga_vidmodes);

        if (vo_dga_vidmodes != NULL)
        {
            for (i = 0; i < modecount; i++)
            {
                if (check_res(i, wanted_width, wanted_height,
                              vo_dga_modes[vo_dga_hw_mode].vdm_depth,
                              vo_dga_vidmodes[i]->hdisplay,
                              vo_dga_vidmodes[i]->vdisplay,
                              GET_VREFRESH(vo_dga_vidmodes[i]->dotclock,
                                           vo_dga_vidmodes[i]->htotal,
                                           vo_dga_vidmodes[i]->vtotal),
                              0, &mX, &mY, &mVBI, &mMaxY))
                    j = i;
            }

            mp_msg(MSGT_VO, MSGL_INFO,
                   "vo_dga: Selected video mode %4d x %4d @ %3d Hz @ depth %2d, bitspp %2d, video %3d x %3d.\n",
                   mX, mY, mVBI,
                   vo_dga_modes[vo_dga_hw_mode].vdm_depth,
                   vo_dga_modes[vo_dga_hw_mode].vdm_bitspp, width, height);
        } else
        {
            mp_msg(MSGT_VO, MSGL_INFO,
                   "vo_dga: XF86VidMode returned no screens - using current resolution.\n");
        }
        dga_modenum = j;
        vo_dga_vp_width = mX;
        vo_dga_vp_height = mY;
    }

#else
    mp_msg(MSGT_VO, MSGL_INFO,
           "vo_dga: Only have DGA 1.0 extension and no XF86VidMode :-(\n");
    mp_msg(MSGT_VO, MSGL_INFO,
           "        Thus, resolution switching is NOT possible.\n");

#endif
#endif

    vo_dga_src_width = width;
    vo_dga_src_height = height;

    if (vo_dga_src_width > vo_dga_vp_width ||
        vo_dga_src_height > vo_dga_vp_height)
    {
        mp_msg(MSGT_VO, MSGL_ERR,
               "vo_dga: Sorry, video larger than viewport is not yet supported!\n");
        // ugly, do something nicer in the future ...
#ifndef HAVE_DGA2
#ifdef HAVE_XF86VM
        if (vo_dga_vidmodes)
        {
            XFree(vo_dga_vidmodes);
            vo_dga_vidmodes = NULL;
        }
#endif
#endif
        return 1;
    }

    if (vo_dga_vp_width == VO_DGA_INVALID_RES)
    {
        mp_msg(MSGT_VO, MSGL_ERR,
               "vo_dga: Something is wrong with your DGA. There doesn't seem to be a\n"
               "         single suitable mode!\n"
               "         Please file a bug report (see DOCS/HTML/en/bugreports.html)\n");
#ifndef HAVE_DGA2
#ifdef HAVE_XF86VM
        if (vo_dga_vidmodes)
        {
            XFree(vo_dga_vidmodes);
            vo_dga_vidmodes = NULL;
        }
#endif
#endif
        return 1;
    }
// now let's start the DGA thing 

    if (!vo_config_count || width != prev_width || height != prev_height)
    {
#ifdef HAVE_DGA2

        if (!XDGAOpenFramebuffer(mDisplay, mScreen))
        {
            mp_msg(MSGT_VO, MSGL_ERR,
                   "vo_dga: Framebuffer mapping failed!!!\n");
            return 1;
        }

        dgadevice = XDGASetMode(mDisplay, mScreen, dga_modenum);
        XDGASync(mDisplay, mScreen);

        vo_dga_base = dgadevice->data;
        XFree(dgadevice);

        XDGASetViewport(mDisplay, mScreen, 0, 0, XDGAFlipRetrace);

#else

#ifdef HAVE_XF86VM
        if (have_vm)
        {
            XF86VidModeLockModeSwitch(mDisplay, mScreen, 0);
            // Two calls are needed to switch modes on my ATI Rage 128. Why?
            // for riva128 one call is enough!
            XF86VidModeSwitchToMode(mDisplay, mScreen,
                                    vo_dga_vidmodes[dga_modenum]);
            XF86VidModeSwitchToMode(mDisplay, mScreen,
                                    vo_dga_vidmodes[dga_modenum]);
        }
#endif

        XF86DGAGetViewPortSize(mDisplay, mScreen,
                               &vo_dga_vp_width, &vo_dga_vp_height);

        XF86DGAGetVideo(mDisplay, mScreen,
                        (char **) &vo_dga_base, &vo_dga_width, &bank,
                        &ram);

        XF86DGADirectVideo(mDisplay, mScreen,
                           XF86DGADirectGraphics | XF86DGADirectMouse |
                           XF86DGADirectKeyb);

        XF86DGASetViewPort(mDisplay, mScreen, 0, 0);

#endif
    }
    // do some more checkings here ...

    mp_msg(MSGT_VO, MSGL_V,
           "vo_dga: bytes/line: %d, screen res: %dx%d, depth: %d, base: %p, bpp: %d\n",
           vo_dga_width, vo_dga_vp_width, vo_dga_vp_height,
           HW_MODE.vdm_bytespp, vo_dga_base, HW_MODE.vdm_bitspp);

    x_off = (vo_dga_vp_width - vo_dga_src_width) >> 1;
    y_off = (vo_dga_vp_height - vo_dga_src_height) >> 1;

    vo_dga_bytes_per_line = vo_dga_src_width * HW_MODE.vdm_bytespp;
    vo_dga_lines = vo_dga_src_height;

    vo_dga_src_offset = 0;
    vo_dga_vp_offset =
        (y_off * vo_dga_width + x_off) * HW_MODE.vdm_bytespp;

    vo_dga_vp_skip = (vo_dga_width - vo_dga_src_width) * HW_MODE.vdm_bytespp;   // todo

    mp_msg(MSGT_VO, MSGL_V, "vo_dga: vp_off=%d, vp_skip=%d, bpl=%d\n",
           vo_dga_vp_offset, vo_dga_vp_skip, vo_dga_bytes_per_line);


    XGrabKeyboard(mDisplay, DefaultRootWindow(mDisplay), True,
                  GrabModeAsync, GrabModeAsync, CurrentTime);
    if (vo_grabpointer)
        XGrabPointer(mDisplay, DefaultRootWindow(mDisplay), True,
                     ButtonPressMask, GrabModeAsync, GrabModeAsync,
                     None, None, CurrentTime);

    if (!vo_config_count || width != prev_width || height != prev_height)
    {
        init_video_buffers(vo_dga_base,
                           vo_dga_vp_height,
                           vo_dga_width * HW_MODE.vdm_bytespp,
#ifdef HAVE_DGA2
                           modeline->maxViewportY,
#else
                           vo_dga_vp_height,
#endif
                           vo_doublebuffering);
        prev_width = width;
        prev_height = height;
    }

    mp_msg(MSGT_VO, MSGL_V, "vo_dga: Using %d frame buffer%s.\n",
           vo_dga_nr_video_buffers,
           vo_dga_nr_video_buffers == 1 ? "" : "s");

    vo_dga_is_running = 1;
    return 0;
}
Beispiel #17
0
int main(int argc, char *argv[])
{
  int proc_num, proc_rank;
  MPI_Status status;

  size_t b_muls_num, b_col_num, b_row_num;
  BLOCK *b_muls;

  uint i, j, k, l;

  T *mat_a = (T *)malloc(NN * sizeof(T));
  T *mat_b = (T *)malloc(NN * sizeof(T));
  T *mat_c = (T *)malloc(NN * sizeof(T));

  generate_mat(mat_c, NN, 0);
  
  MPI_Init(&argc, &argv);
  MPI_Comm_size(MPI_COMM_WORLD, &proc_num);
  MPI_Comm_rank(MPI_COMM_WORLD, &proc_rank);

  if (proc_rank == 0)
  {
    generate_mat(mat_a, NN, VAL_A);
    generate_mat(mat_b, NN, VAL_B);
  }

  MPI_Bcast(mat_a, NN, T_MPI, 0, MPI_COMM_WORLD);
  MPI_Bcast(mat_b, NN, T_MPI, 0, MPI_COMM_WORLD);

  b_muls_num = M * K * MAX(M, K);
  b_muls = (BLOCK *)malloc(b_muls_num * sizeof(BLOCK));
  b_row_num = N / K;  //Count of rows in the single block
  b_col_num = N / M;  //Count of cols in the single block
  for (l = i = 0; i < K; ++i)
  {
    uint row_low = i * b_row_num;
    uint row_num = (i == K - 1 ? N : (i + 1) * b_row_num) - row_low;  //Count of rows in the first matrix
    for (j = 0; j < M; ++j)
    {
      uint col_low = j * b_col_num;
      uint col_num = (j == M - 1 ? N : (j + 1) * b_col_num) - col_low;  //Count of cols/rows in the first/second matrix
      for (k = 0; k < K; ++k, ++l)
      {
        b_muls[l].r_low_first = row_low;
        b_muls[l].c_low_first = col_low;

        b_muls[l].r_low_second = col_low;
        b_muls[l].c_low_second = k * b_row_num;

        b_muls[l].r_num_first = row_num;
        b_muls[l].c_num_second = (k == K - 1 ? N : (k + 1) * b_row_num) - b_muls[l].c_low_second;
        b_muls[l].r_c_num = col_num;
      }
    }
  }

  for (l = proc_rank; l < b_muls_num; l += proc_num)
    for (i = b_muls[l].r_low_first; i < b_muls[l].r_low_first + b_muls[l].r_num_first; ++i)
      for (j = b_muls[l].c_low_second; j < b_muls[l].c_low_second + b_muls[l].c_num_second; ++j)
        for (k = 0; k < b_muls[l].r_c_num; ++k)
          mat_c[i * N + j] += 
            mat_a[i * N + k + b_muls[l].c_low_first] * 
            mat_b[(k + b_muls[l].r_low_second) * N + j];
  
  if (proc_rank == 0)
  {
    T *foo = (T *)malloc(NN * sizeof(T));
    for (i = 1; i < proc_num; ++i)
    {
      MPI_Recv(foo, NN, T_MPI, i, 0, MPI_COMM_WORLD, &status);
      for (j = 0; j < N; ++j)
        for (k = 0; k < N; ++k)
          mat_c[j*N+k] += foo[j*N+k];
    }
    free(foo);
  }
  else
    MPI_Send(mat_c, NN, T_MPI, 0, 0, MPI_COMM_WORLD);

  MPI_Finalize();

  if (proc_rank == 0)
    printf("Result: %i\n", check_res(mat_c, NN, VAL_C));

  free(mat_a);
  free(mat_b);
  free(mat_c);

  return EXIT_SUCCESS;
}
int main()
{
    int i;

    init_double(old_dst.d, 4, 3.14);	/* Initialize memory and the registers */

    vind.l[0] = 0;
    vind.l[1] = 6;
    vind.l[2] = 3;
    vind.l[3] = 1;

    mask.l[0] = 0x80ffffff00000000LL;
    mask.l[1] = 0x00ffffff10000000LL;
    mask.l[2] = 0x00ffffff00000000LL;
    mask.l[3] = 0x80ffffff10000000LL;

    init_long(&d, 0xdeadbeef);
    for (i = 0; i < 4; i++) {
        expect.d[i] = (mask.l[i] & 0x8000000000000000LL)
            ? *(double *)(ptr+(vind.l[i] * 8 + 0))
            : old_dst.d[i];
    }

    __asm {     /* VGATHERDPD ymm1, [rax + ymm2_vind*8], ymm3_mask */
        lea REG, ADDRPTR [arr-4] /* the memory rewrite of the vgather will add 4 back to the address */
        vmovupd YMMa, [old_dst.md];
        vmovupd YMMb, [vind.md];
        vmovupd YMMc, [mask.md];
        vmovupd [YMMDestBefore.ms],  YMMa;
        vmovupd [YMMIndexBefore.ms], YMMb;
        vmovupd [YMMMaskBefore.ms],  YMMc;
        vgatherqpd YMMa, [REG + YMMb*8], YMMc
        vmovupd [d.md], YMMa;
        vmovupd [YMMIndexAfter.ms], YMMb;
        vmovupd [YMMMaskAfter.ms],  YMMc;
    }
    printVl("YMM dest  before:     ", YMMDestBefore.l);
    printVl("YMM dest  after:      ", d.l);
    printVl("YMM index before:     ", YMMIndexBefore.l);
    printVl("YMM index after:      ", YMMIndexAfter.l);
    printVl("YMM mask  before:     ", YMMMaskBefore.l);
    printVl("YMM mask  after:      ", YMMMaskAfter.l);
    check_res(1);

    init_long(&d, 0xdeadbeef);
    for (i = 2; i < 4; i++) {
        expect.d[i] = 0;
    }
    __asm {     /* VGATHERDPD xmm1, [rax + xmm2_vind*8], xmm3_mask */
        lea REG, ADDRPTR [arr-4] /* the memory rewrite of the vgather will add 4 back to the address */
        vmovupd YMMa, [old_dst.md];
        vmovupd YMMb, [vind.md];
        vmovupd YMMc, [mask.md];
        vmovupd [YMMDestBefore.ms],  YMMa;
        vmovupd [YMMIndexBefore.ms], YMMb;
        vmovupd [YMMMaskBefore.ms],  YMMc;
        vgatherqpd XMMa, [REG + XMMb*8], XMMc
        vmovupd [d.md], YMMa;
        vmovupd [YMMIndexAfter.ms], YMMb;
        vmovupd [YMMMaskAfter.ms],  YMMc;
    }
    printVl("YMM dest  before:     ", YMMDestBefore.l);
    printVl("YMM dest  after:      ", d.l);
    printVl("YMM index before:     ", YMMIndexBefore.l);
    printVl("YMM index after:      ", YMMIndexAfter.l);
    printVl("YMM mask  before:     ", YMMMaskBefore.l);
    printVl("YMM mask  after:      ", YMMMaskAfter.l);
    check_res(2);

    init_long(&d, 0xdeadbeef);
    for (i = 0; i < 4; i++) {
        expect.d[i] = (mask.l[i] & 0x8000000000000000LL)
            ? *(double *)(ptr+(vind.l[i] * 8 + 8))
            : old_dst.d[i];
    }
    __asm {     /* VGATHERDPD ymm1, [rax + ymm2_vind*8 + 8], ymm3_mask */
        lea REG, ADDRPTR [arr-4] /* the memory rewrite of the vgather will add 4 back to the address */
        vmovupd YMMa, [old_dst.md];
        vmovupd YMMb, [vind.md];
        vmovupd YMMc, [mask.md];
        vmovupd [YMMDestBefore.ms],  YMMa;
        vmovupd [YMMIndexBefore.ms], YMMb;
        vmovupd [YMMMaskBefore.ms],  YMMc;
        vgatherqpd YMMa, [REG + YMMb*8 + 8], YMMc
        vmovupd [d.md], YMMa;
        vmovupd [YMMIndexAfter.ms], YMMb;
        vmovupd [YMMMaskAfter.ms],  YMMc;
    }
    printVl("YMM dest  before:     ", YMMDestBefore.l);
    printVl("YMM dest  after:      ", d.l);
    printVl("YMM index before:     ", YMMIndexBefore.l);
    printVl("YMM index after:      ", YMMIndexAfter.l);
    printVl("YMM mask  before:     ", YMMMaskBefore.l);
    printVl("YMM mask  after:      ", YMMMaskAfter.l);
    check_res(3);

    init_long(&d, 0xdeadbeef);
    for (i = 2; i < 4; i++) {
        expect.d[i] = 0;
    }
    __asm {     /* VGATHERDPD xmm1, [rax + xmm2_vind*8 + 8], xmm3_mask */
        lea REG, ADDRPTR [arr-4] /* the memory rewrite of the vgather will add 4 back to the address */
        vmovupd YMMa, [old_dst.md];
        vmovupd YMMb, [vind.md];
        vmovupd YMMc, [mask.md];
        vmovupd [YMMDestBefore.ms],  YMMa;
        vmovupd [YMMIndexBefore.ms], YMMb;
        vmovupd [YMMMaskBefore.ms],  YMMc;
        vgatherqpd XMMa, [REG + XMMb*8 + 8], XMMc
        vmovupd [d.md], YMMa;
        vmovupd [YMMIndexAfter.ms], YMMb;
        vmovupd [YMMMaskAfter.ms],  YMMc;
    }
    printVl("YMM dest  before:     ", YMMDestBefore.l);
    printVl("YMM dest  after:      ", d.l);
    printVl("YMM index before:     ", YMMIndexBefore.l);
    printVl("YMM index after:      ", YMMIndexAfter.l);
    printVl("YMM mask  before:     ", YMMMaskBefore.l);
    printVl("YMM mask  after:      ", YMMMaskAfter.l);
    check_res(4);

    if(!res) PRINTF("gatherqpd passed\n");
    return res;
}