static void open_ta() { TEEC_Result res; TEEC_UUID uuid = TA_SHA_PERF_UUID; uint32_t err_origin; res = TEEC_InitializeContext(NULL, &ctx); check_res(res,"TEEC_InitializeContext"); res = TEEC_OpenSession(&ctx, &sess, &uuid, TEEC_LOGIN_PUBLIC, NULL, NULL, &err_origin); check_res(res,"TEEC_OpenSession"); }
static void alloc_shm(size_t sz, uint32_t algo) { TEEC_Result res; in_shm.buffer = NULL; in_shm.size = sz + offset; res = TEEC_AllocateSharedMemory(&ctx, &in_shm); check_res(res, "TEEC_AllocateSharedMemory"); out_shm.buffer = NULL; out_shm.size = hash_size(algo); res = TEEC_AllocateSharedMemory(&ctx, &out_shm); check_res(res, "TEEC_AllocateSharedMemory"); }
void freceive_more_async(boost::asio::io_service & ios, aziomq::socket & socket, const const_buf_vec & expected_bufs, int flags) { SYNC_LOG(__PRETTY_FUNCTION__); //create vector of raw bufs to fill from length of expected_bufs buf_vec_t buf_vec(std::distance(std::begin(expected_bufs), std::end(expected_bufs))); zero(buf_vec); //create azio buffer vector mutable_buf_vec bufs; init(bufs, buf_vec); auto it = std::begin(bufs); auto e = std::end(bufs); std::exception_ptr err; socket.async_receive_more(boost::asio::buffer(*it++), [&ios, &socket, &bufs, &it, e, &err, expected_bufs](const boost::system::error_code & ec, aziomq::socket::more_result mr) { if (ec) { err = std::make_exception_ptr(boost::system::system_error(ec)); } else { for (; mr.second && it != e; ++it) { mr = socket.receive_more(boost::asio::buffer(*it)); err = check_res(bufs, expected_bufs); } } ios.stop(); }); ios.run(); if (err != std::exception_ptr()) std::rethrow_exception(err); }
void freceive_async(boost::asio::io_service & ios, aziomq::socket & socket, const const_buf_vec & expected_bufs, int flags) { SYNC_LOG(__PRETTY_FUNCTION__); //create vector of raw bufs to fill from length of expected_bufs buf_vec_t buf_vec(std::distance(std::begin(expected_bufs), std::end(expected_bufs))); zero(buf_vec); //create azio buffer vector mutable_buf_vec bufs; init(bufs, buf_vec); std::exception_ptr err; socket.async_receive(bufs, [&ios, &bufs, &err, expected_bufs](const boost::system::error_code & ec, size_t bytes_transferred) { if (ec) { err = std::make_exception_ptr(boost::system::system_error(ec)); } else { err = check_res(bufs, expected_bufs); } ios.stop(); }); ios.run(); if (err != std::exception_ptr()) std::rethrow_exception(err); }
static int little_check_reserved_lock(sqlite3_file *file) { little_file *self = (little_file*)file; int res = check_res(self->name); trace("RESERVED: %d\n", res); return res; }
std::exception_ptr check_res(const mutable_buf_vec & bufs, const const_buf_vec & expected_bufs) { auto expected_len = std::distance(std::begin(expected_bufs), std::end(expected_bufs)); auto actual_len = std::distance(std::begin(bufs), std::end(bufs)); if (expected_len != actual_len) { std::ostringstream stm; stm << boost::format("Expecting %1% buffers, got %2% buffers") % expected_len % actual_len; return std::make_exception_ptr(std::runtime_error(stm.str())); } return check_res(std::begin(bufs), std::end(bufs), std::begin(expected_bufs)); }
void *th_func_first(void *arg) { int i; for (i = 2; i <= LIM; i += 2) { check_res(sem_wait(&bin_sem), SEM_ERR_WAIT, NULL); counter = i; printf("TH1: %i\n", counter); } pthread_exit(NULL); }
void *th_func_second(void *arg) { int i; for (i = 1; i <= LIM; i += 2) { counter = i; printf("TH2: %i\n", counter); check_res(sem_post(&bin_sem), SEM_ERR_POST, NULL); while (counter % 2) sleep(1); } pthread_exit(NULL); }
static void prepare_op() { TEEC_Result res; uint32_t ret_origin; TEEC_Operation op; memset(&op, 0, sizeof(op)); op.paramTypes = TEEC_PARAM_TYPES(TEEC_VALUE_INPUT, TEEC_NONE, TEEC_NONE, TEEC_NONE); op.params[0].value.a = algo; res = TEEC_InvokeCommand(&sess, TA_SHA_PERF_CMD_PREPARE_OP, &op, &ret_origin); check_res(res, "TEEC_InvokeCommand"); }
static uint64_t run_test_once(void *in, size_t size, TEEC_Operation *op, unsigned int l) { struct timespec t0, t1; TEEC_Result res; uint32_t ret_origin; if (random_in) read_random(in, size); get_current_time(&t0); res = TEEC_InvokeCommand(&sess, TA_SHA_PERF_CMD_PROCESS, op, &ret_origin); check_res(res, "TEEC_InvokeCommand"); get_current_time(&t1); return timespec_diff_ns(&t0, &t1); }
void freceive_sync(boost::asio::io_service &, aziomq::socket & socket, const const_buf_vec & expected_bufs, int flags) { SYNC_LOG(__PRETTY_FUNCTION__); // create vector of raw bufs to fill from length of expected_bufs buf_vec_t buf_vec(std::distance(std::begin(expected_bufs), std::end(expected_bufs))); zero(buf_vec); // create azio buffer vector mutable_buf_vec bufs; init(bufs, buf_vec); socket.receive(bufs, flags); auto e = check_res(bufs, expected_bufs); if (e != std::exception_ptr()) std::rethrow_exception(e); }
void freceive_more_sync(boost::asio::io_service &, aziomq::socket & socket, const const_buf_vec & expected_bufs, int flags) { SYNC_LOG(__PRETTY_FUNCTION__); // create vector of raw bufs to fill from length of expected_bufs buf_vec_t buf_vec(std::distance(std::begin(expected_bufs), std::end(expected_bufs))); zero(buf_vec); // create azio buffer vector mutable_buf_vec bufs; init(bufs, buf_vec); aziomq::socket::more_result mr = std::make_pair(0, true); for (auto it = std::begin(bufs), e = std::end(bufs); mr.second && it != e; ++it) { mr = socket.receive_more(boost::asio::buffer(*it), flags); } auto e = check_res(bufs, expected_bufs); if (e != std::exception_ptr()) std::rethrow_exception(e); }
int main(int argc, char *argv[]) { pthread_t th_first, th_second; counter = 0; check_res(sem_init(&bin_sem, 0, 0), SEM_ERR_INIT, NULL); check_res( pthread_create(&th_first, NULL, th_func_first, NULL), TH_ERR_TMPL_CREATION, TH_FIRST); check_res( pthread_create(&th_second, NULL, th_func_second, NULL), TH_ERR_TMPL_CREATION, TH_SECOND); check_res(pthread_join(th_first, NULL), TH_ERR_TMPL_JOIN, TH_FIRST); check_res(pthread_join(th_second, NULL), TH_ERR_TMPL_JOIN, TH_SECOND); check_res(sem_destroy(&bin_sem), SEM_ERR_DESTROY, NULL); return EXIT_SUCCESS; }
void init_gl(const opt_data *opt_data, int width, int height) { if(!ogl_LoadFunctions()) { fprintf(stderr, "ERROR: Failed to load GL extensions\n"); exit(EXIT_FAILURE); } CHECK_GL_ERR; if(!(ogl_GetMajorVersion() > 1 || ogl_GetMinorVersion() >= 4)) { fprintf(stderr, "ERROR: Your OpenGL Implementation is too old\n"); exit(EXIT_FAILURE); } opts = opt_data; GLboolean force_fixed = opts->gl_opts != NULL && strstr(opts->gl_opts, "fixed") != NULL; GLboolean res_boost = opts->gl_opts != NULL && strstr(opts->gl_opts, "rboost") != NULL; packed_intesity_pixels = opts->gl_opts != NULL && strstr(opts->gl_opts, "pintens") != NULL; scr_w = width; scr_h = height; if(opts->fullscreen) im_w = scr_w, im_h=scr_h; else im_w = IMAX(make_pow2(IMAX(scr_w, scr_h)), 128)<<res_boost; im_h = im_w; while(!check_res(im_w, im_h)) { // shrink textures until they work printf(" %ix%i Too big! Shrink texture\n", im_h, im_w); im_w = im_w/2; im_h = im_h/2; } printf("Using internel resolution of %ix%i\n\n", im_h, im_w); CHECK_GL_ERR; setup_viewport(scr_w, scr_h); CHECK_GL_ERR; //glEnable(GL_LINE_SMOOTH); CHECK_GL_ERR; glClear(GL_COLOR_BUFFER_BIT); CHECK_GL_ERR; glRasterPos2f(-1,1 - 20.0f/(scr_h*0.5f)); //draw_string("Loading... "); swap_buffers(); CHECK_GL_ERR; printf("GL_VENDOR: %s\n", glGetString(GL_VENDOR)); printf("GL_RENDERER: %s\n", glGetString(GL_RENDERER)); printf("GL_VERSION: %s\n", glGetString(GL_VERSION)); if(ogl_ext_ARB_shading_language_100) printf("GL_SL_VERSION: %s\n", glGetString(GL_SHADING_LANGUAGE_VERSION_ARB)); printf("GL_EXTENSIONS: %s\n", glGetString(GL_EXTENSIONS)); printf("\n\n"); /* void DebugMessageControlARB(enum source, enum type, enum severity, sizei count, const uint* ids, boolean enabled); */ if(ogl_ext_ARB_debug_output) { glDebugMessageCallbackARB((GLDEBUGPROCARB)gl_debug_callback, NULL); #if DEBUG glDebugMessageControlARB(GL_DONT_CARE, GL_DONT_CARE, GL_DONT_CARE, 0, NULL, GL_TRUE); glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS_ARB); #else glDebugMessageControlARB(GL_DONT_CARE, GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR_ARB, GL_DONT_CARE, 0, NULL, GL_TRUE); glDebugMessageControlARB(GL_DONT_CARE, GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR_ARB, GL_DONT_CARE, 0, NULL, GL_TRUE); #endif printf("Have ARB_debug_output: registered callback\n"); } CHECK_GL_ERR; // for ES we need: // EXT_blend_minmax // EXT_texture_format_BGRA8888 // optionally we should use // EXT_texture_rg + OES_texture_float/OES_texture_half_float // OES_mapbuffer /* if(!ogl_ext_EXT_blend_minmax) { // can stop checking for this, it's in OpenGL 1.4*/ /* printf("missing required gl extension EXT_blend_minmax!\n");*/ /* exit(1);*/ /* }*/ if(!ogl_ext_EXT_framebuffer_object && !ogl_ext_ARB_framebuffer_object) { printf("missing required gl extension EXT_framebuffer_object!\n"); exit(1); } // TODO: this should also pass if we have GL 2.0+ if(!ogl_ext_ARB_shading_language_100 && !(ogl_ext_ARB_fragment_shader && ogl_ext_ARB_vertex_shader && ogl_ext_ARB_shader_objects)) { if(!ogl_ext_ARB_pixel_buffer_object) printf("Missing GLSL and no pixel buffer objects, WILL be slow!\n"); else printf("No GLSL using all fixed function! (might be slow)\n"); } if(force_fixed) { printf("Fixed function code forced\n"); packed_intesity_pixels = GL_FALSE; } CHECK_GL_ERR; if(packed_intesity_pixels) printf("Packed intensity enabled\n"); glEnable(GL_TEXTURE_2D); CHECK_GL_ERR; init_mandel(); CHECK_GL_ERR; if(!force_fixed) glfract = fractal_glsl_init(opts, im_w, im_h, packed_intesity_pixels); if(!glfract) glfract = fractal_fixed_init(opts, im_w, im_h); CHECK_GL_ERR; if(!force_fixed) glmaxsrc = maxsrc_new_glsl(IMAX(im_w>>res_boost, 256), IMAX(im_h>>res_boost, 256), packed_intesity_pixels); if(!glmaxsrc) glmaxsrc = maxsrc_new_fixed(IMAX(im_w>>res_boost, 256), IMAX(im_h>>res_boost, 256)); CHECK_GL_ERR; if(!force_fixed) glpal = pal_init_glsl(packed_intesity_pixels); if(!glpal) glpal = pal_init_fixed(im_w, im_h); CHECK_GL_ERR; pd = new_point_data(opts->rational_julia?4:2); memset(frametimes, 0, sizeof(frametimes)); totframetime = frametimes[0] = MAX(10000000/opts->draw_rate, 1); memset(worktimes, 0, sizeof(worktimes)); totworktime = worktimes[0] = MIN(10000000/opts->draw_rate, 1); tick0 = uget_ticks(); }
int main() { int i; init_double(old_dst.d, 4, 3.14); /* Initialize memory and the registers */ vind.i[0] = 0; vind.i[1] = 2; vind.i[2] = 3; vind.i[3] = 1; vind.i[4] = 7; vind.i[5] = 11; vind.i[6] = 13; vind.i[7] = 10; mask.i[0] = 0x80ffffff; mask.i[1] = 0x00ffffff; mask.i[2] = 0x00ffffff; mask.i[3] = 0x80ffffff; mask.i[4] = 0x80ffffff; mask.i[5] = 0x00ffffff; mask.i[6] = 0x80ffffff; mask.i[7] = 0x80ffffff; for (i = 0; i < 8; i++) { expect.f[i] = (mask.i[i] & 0x80000000) ? *(float *)(ptr+(vind.i[i] * 4 + 0)) : old_dst.f[i]; } __asm { /* VGATHERDPS ymm1, [rax + ymm2_vind*4], ymm3_mask */ lea REG, ADDRPTR [arr-4] /* the memory rewrite of the vgather will add 4 back to the address */ vmovups YMMa, [old_dst.ms]; vmovups YMMb, [vind.ms]; vmovups YMMc, [mask.ms]; vmovups [YMMDestBefore.ms], YMMa; vmovups [YMMIndexBefore.ms], YMMb; vmovups [YMMMaskBefore.ms], YMMc; vgatherdps YMMa, [REG + YMMb*4], YMMc vmovups [d.ms], YMMa; vmovups [YMMIndexAfter.ms], YMMb; vmovups [YMMMaskAfter.ms], YMMc; } printVl("YMM dest before: ", YMMDestBefore.l); printVl("YMM dest after: ", d.l); printVl("YMM index before: ", YMMIndexBefore.l); printVl("YMM index after: ", YMMIndexAfter.l); printVl("YMM mask before: ", YMMMaskBefore.l); printVl("YMM mask after: ", YMMMaskAfter.l); check_res(1); for (i = 4; i < 8; i++) { expect.f[i] = 0; } __asm { /* VGATHERDPS xmm1, [rax + xmm2_vind*4], xmm3_mask */ lea REG, ADDRPTR [arr-4] /* the memory rewrite of the vgather will add 4 back to the address */ vmovups YMMa, [old_dst.ms]; vmovups YMMb, [vind.ms]; vmovups YMMc, [mask.ms]; vmovups [YMMDestBefore.ms], YMMa; vmovups [YMMIndexBefore.ms], YMMb; vmovups [YMMMaskBefore.ms], YMMc; vgatherdps XMMa, [REG + XMMb*4], XMMc vmovups [d.ms], YMMa; vmovups [YMMIndexAfter.ms], YMMb; vmovups [YMMMaskAfter.ms], YMMc; } printVl("YMM dest before: ", YMMDestBefore.l); printVl("YMM dest after: ", d.l); printVl("YMM index before: ", YMMIndexBefore.l); printVl("YMM index after: ", YMMIndexAfter.l); printVl("YMM mask before: ", YMMMaskBefore.l); printVl("YMM mask after: ", YMMMaskAfter.l); check_res(2); for (i = 0; i < 8; i++) { expect.f[i] = (mask.i[i] & 0x80000000) ? *(float *)(ptr+(vind.i[i] * 4 + 8)) : old_dst.f[i]; } __asm { /* VGATHERDPS ymm1, [rax + ymm2_vind*4 + 8], ymm3_mask */ lea REG, ADDRPTR [arr-4] /* the memory rewrite of the vgather will add 4 back to the address */ vmovups YMMa, [old_dst.ms]; vmovups YMMb, [vind.ms]; vmovups YMMc, [mask.ms]; vmovups [YMMDestBefore.ms], YMMa; vmovups [YMMIndexBefore.ms], YMMb; vmovups [YMMMaskBefore.ms], YMMc; vgatherdps YMMa, [REG + YMMb*4 + 8], YMMc vmovups [d.ms], YMMa; vmovups [YMMIndexAfter.ms], YMMb; vmovups [YMMMaskAfter.ms], YMMc; } printVl("YMM dest before: ", YMMDestBefore.l); printVl("YMM dest after: ", d.l); printVl("YMM index before: ", YMMIndexBefore.l); printVl("YMM index after: ", YMMIndexAfter.l); printVl("YMM mask before: ", YMMMaskBefore.l); printVl("YMM mask after: ", YMMMaskAfter.l); check_res(3); for (i = 4; i < 8; i++) { expect.f[i] = 0; } __asm { /* VGATHERDPS xmm1, [rax + xmm2_vind*4 + 8], xmm3_mask */ lea REG, ADDRPTR [arr-4] /* the memory rewrite of the vgather will add 4 back to the address */ vmovups YMMa, [old_dst.ms]; vmovups YMMb, [vind.ms]; vmovups YMMc, [mask.ms]; vmovups [YMMDestBefore.ms], YMMa; vmovups [YMMIndexBefore.ms], YMMb; vmovups [YMMMaskBefore.ms], YMMc; vgatherdps XMMa, [REG + XMMb*4 + 8], XMMc vmovups [d.ms], YMMa; vmovups [YMMIndexAfter.ms], YMMb; vmovups [YMMMaskAfter.ms], YMMc; } printVl("YMM dest before: ", YMMDestBefore.l); printVl("YMM dest after: ", d.l); printVl("YMM index before: ", YMMIndexBefore.l); printVl("YMM index after: ", YMMIndexAfter.l); printVl("YMM mask before: ", YMMMaskBefore.l); printVl("YMM mask after: ", YMMMaskAfter.l); check_res(4); if(!res) PRINTF("gatherdps passed\n"); return res; }
static int config(uint32_t width, uint32_t height, uint32_t d_width, uint32_t d_height, uint32_t flags, char *title, uint32_t format) { int x_off, y_off; int wanted_width, wanted_height; static unsigned char *vo_dga_base; static int prev_width, prev_height; #ifdef HAVE_DGA2 // needed to change DGA video mode int mX = VO_DGA_INVALID_RES, mY = VO_DGA_INVALID_RES, mVBI = 100000, mMaxY = 0, i, j = 0; int dga_modenum; XDGAMode *modeline; XDGADevice *dgadevice; #else #ifdef HAVE_XF86VM unsigned int vm_event, vm_error; unsigned int vm_ver, vm_rev; int i, j = 0, have_vm = 0; int mX = VO_DGA_INVALID_RES, mY = VO_DGA_INVALID_RES, mVBI = 100000, mMaxY = 0, dga_modenum; #endif int bank, ram; #endif vo_dga_src_format = format; wanted_width = d_width; wanted_height = d_height; if (!wanted_height) wanted_height = height; if (!wanted_width) wanted_width = width; if (!vo_dbpp) { if ((format & IMGFMT_BGR_MASK) == IMGFMT_BGR) { vo_dga_src_mode = vd_ModeValid(format & 0xff); } } else { vo_dga_src_mode = vd_ModeValid(vo_dbpp); } vo_dga_hw_mode = SRC_MODE.vdm_hw_mode; if (!vo_dga_src_mode) { mp_msg(MSGT_VO, MSGL_ERR, "vo_dga: unsupported video format!\n"); return 1; } vo_dga_vp_width = vo_screenwidth; vo_dga_vp_height = vo_screenheight; mp_msg(MSGT_VO, MSGL_V, "vo_dga: XServer res: %dx%d\n", vo_dga_vp_width, vo_dga_vp_height); // choose a suitable mode ... #ifdef HAVE_DGA2 // Code to change the video mode added by Michael Graffam // [email protected] mp_msg(MSGT_VO, MSGL_V, "vo_dga: vo_modelines=%p, vo_modecount=%d\n", vo_modelines, vo_modecount); if (vo_modelines == NULL) { mp_msg(MSGT_VO, MSGL_ERR, "vo_dga: can't get modelines\n"); return 1; } mp_msg(MSGT_VO, MSGL_INFO, "vo_dga: DGA 2.0 available :-) Can switch resolution AND depth!\n"); for (i = 0; i < vo_modecount; i++) { if (vd_ModeEqual(vo_modelines[i].depth, vo_modelines[i].bitsPerPixel, vo_modelines[i].redMask, vo_modelines[i].greenMask, vo_modelines[i].blueMask, vo_dga_hw_mode)) { mp_msg(MSGT_VO, MSGL_V, "maxy: %4d, depth: %2d, %4dx%4d, ", vo_modelines[i].maxViewportY, vo_modelines[i].depth, vo_modelines[i].imageWidth, vo_modelines[i].imageHeight); if (check_res (i, wanted_width, wanted_height, vo_modelines[i].depth, vo_modelines[i].viewportWidth, vo_modelines[i].viewportHeight, (unsigned) vo_modelines[i].verticalRefresh, vo_modelines[i].maxViewportY, &mX, &mY, &mVBI, &mMaxY)) j = i; } } mp_msg(MSGT_VO, MSGL_INFO, "vo_dga: Selected hardware mode %4d x %4d @ %3d Hz @ depth %2d, bitspp %2d.\n", mX, mY, mVBI, HW_MODE.vdm_depth, HW_MODE.vdm_bitspp); mp_msg(MSGT_VO, MSGL_INFO, "vo_dga: Video parameters by codec: %3d x %3d, depth %2d, bitspp %2d.\n", width, height, SRC_MODE.vdm_depth, SRC_MODE.vdm_bitspp); vo_dga_vp_width = mX; vo_dga_vp_height = mY; if ((flags & VOFLAG_SWSCALE) || (flags & VOFLAG_FULLSCREEN)) { /* -zoom or -fs */ scale_dstW = (d_width + 7) & ~7; scale_dstH = d_height; scale_srcW = width; scale_srcH = height; aspect_save_screenres(mX, mY); aspect_save_orig(scale_srcW, scale_srcH); aspect_save_prescale(scale_dstW, scale_dstH); if (flags & VOFLAG_FULLSCREEN) /* -fs */ aspect(&scale_dstW, &scale_dstH, A_ZOOM); else if (flags & VOFLAG_SWSCALE) /* -fs */ aspect(&scale_dstW, &scale_dstH, A_NOZOOM); mp_msg(MSGT_VO, MSGL_INFO, "vo_dga: Aspect corrected size for SwScaler: %4d x %4d.\n", scale_dstW, scale_dstH); /* XXX this is a hack, but I'm lazy ;-) :: atmos */ width = scale_dstW; height = scale_dstH; } vo_dga_width = vo_modelines[j].bytesPerScanline / HW_MODE.vdm_bytespp; dga_modenum = vo_modelines[j].num; modeline = vo_modelines + j; #else #ifdef HAVE_XF86VM mp_msg(MSGT_VO, MSGL_INFO, "vo_dga: DGA 1.0 compatibility code: Using XF86VidMode for mode switching!\n"); if (XF86VidModeQueryExtension(mDisplay, &vm_event, &vm_error)) { XF86VidModeQueryVersion(mDisplay, &vm_ver, &vm_rev); mp_msg(MSGT_VO, MSGL_INFO, "vo_dga: XF86VidMode Extension v%i.%i\n", vm_ver, vm_rev); have_vm = 1; } else { mp_msg(MSGT_VO, MSGL_ERR, "vo_dga: XF86VidMode Extension not available.\n"); } #define GET_VREFRESH(dotclk, x, y)( (((dotclk)/(x))*1000)/(y) ) if (have_vm) { int modecount; XF86VidModeGetAllModeLines(mDisplay, mScreen, &modecount, &vo_dga_vidmodes); if (vo_dga_vidmodes != NULL) { for (i = 0; i < modecount; i++) { if (check_res(i, wanted_width, wanted_height, vo_dga_modes[vo_dga_hw_mode].vdm_depth, vo_dga_vidmodes[i]->hdisplay, vo_dga_vidmodes[i]->vdisplay, GET_VREFRESH(vo_dga_vidmodes[i]->dotclock, vo_dga_vidmodes[i]->htotal, vo_dga_vidmodes[i]->vtotal), 0, &mX, &mY, &mVBI, &mMaxY)) j = i; } mp_msg(MSGT_VO, MSGL_INFO, "vo_dga: Selected video mode %4d x %4d @ %3d Hz @ depth %2d, bitspp %2d, video %3d x %3d.\n", mX, mY, mVBI, vo_dga_modes[vo_dga_hw_mode].vdm_depth, vo_dga_modes[vo_dga_hw_mode].vdm_bitspp, width, height); } else { mp_msg(MSGT_VO, MSGL_INFO, "vo_dga: XF86VidMode returned no screens - using current resolution.\n"); } dga_modenum = j; vo_dga_vp_width = mX; vo_dga_vp_height = mY; } #else mp_msg(MSGT_VO, MSGL_INFO, "vo_dga: Only have DGA 1.0 extension and no XF86VidMode :-(\n"); mp_msg(MSGT_VO, MSGL_INFO, " Thus, resolution switching is NOT possible.\n"); #endif #endif vo_dga_src_width = width; vo_dga_src_height = height; if (vo_dga_src_width > vo_dga_vp_width || vo_dga_src_height > vo_dga_vp_height) { mp_msg(MSGT_VO, MSGL_ERR, "vo_dga: Sorry, video larger than viewport is not yet supported!\n"); // ugly, do something nicer in the future ... #ifndef HAVE_DGA2 #ifdef HAVE_XF86VM if (vo_dga_vidmodes) { XFree(vo_dga_vidmodes); vo_dga_vidmodes = NULL; } #endif #endif return 1; } if (vo_dga_vp_width == VO_DGA_INVALID_RES) { mp_msg(MSGT_VO, MSGL_ERR, "vo_dga: Something is wrong with your DGA. There doesn't seem to be a\n" " single suitable mode!\n" " Please file a bug report (see DOCS/HTML/en/bugreports.html)\n"); #ifndef HAVE_DGA2 #ifdef HAVE_XF86VM if (vo_dga_vidmodes) { XFree(vo_dga_vidmodes); vo_dga_vidmodes = NULL; } #endif #endif return 1; } // now let's start the DGA thing if (!vo_config_count || width != prev_width || height != prev_height) { #ifdef HAVE_DGA2 if (!XDGAOpenFramebuffer(mDisplay, mScreen)) { mp_msg(MSGT_VO, MSGL_ERR, "vo_dga: Framebuffer mapping failed!!!\n"); return 1; } dgadevice = XDGASetMode(mDisplay, mScreen, dga_modenum); XDGASync(mDisplay, mScreen); vo_dga_base = dgadevice->data; XFree(dgadevice); XDGASetViewport(mDisplay, mScreen, 0, 0, XDGAFlipRetrace); #else #ifdef HAVE_XF86VM if (have_vm) { XF86VidModeLockModeSwitch(mDisplay, mScreen, 0); // Two calls are needed to switch modes on my ATI Rage 128. Why? // for riva128 one call is enough! XF86VidModeSwitchToMode(mDisplay, mScreen, vo_dga_vidmodes[dga_modenum]); XF86VidModeSwitchToMode(mDisplay, mScreen, vo_dga_vidmodes[dga_modenum]); } #endif XF86DGAGetViewPortSize(mDisplay, mScreen, &vo_dga_vp_width, &vo_dga_vp_height); XF86DGAGetVideo(mDisplay, mScreen, (char **) &vo_dga_base, &vo_dga_width, &bank, &ram); XF86DGADirectVideo(mDisplay, mScreen, XF86DGADirectGraphics | XF86DGADirectMouse | XF86DGADirectKeyb); XF86DGASetViewPort(mDisplay, mScreen, 0, 0); #endif } // do some more checkings here ... mp_msg(MSGT_VO, MSGL_V, "vo_dga: bytes/line: %d, screen res: %dx%d, depth: %d, base: %p, bpp: %d\n", vo_dga_width, vo_dga_vp_width, vo_dga_vp_height, HW_MODE.vdm_bytespp, vo_dga_base, HW_MODE.vdm_bitspp); x_off = (vo_dga_vp_width - vo_dga_src_width) >> 1; y_off = (vo_dga_vp_height - vo_dga_src_height) >> 1; vo_dga_bytes_per_line = vo_dga_src_width * HW_MODE.vdm_bytespp; vo_dga_lines = vo_dga_src_height; vo_dga_src_offset = 0; vo_dga_vp_offset = (y_off * vo_dga_width + x_off) * HW_MODE.vdm_bytespp; vo_dga_vp_skip = (vo_dga_width - vo_dga_src_width) * HW_MODE.vdm_bytespp; // todo mp_msg(MSGT_VO, MSGL_V, "vo_dga: vp_off=%d, vp_skip=%d, bpl=%d\n", vo_dga_vp_offset, vo_dga_vp_skip, vo_dga_bytes_per_line); XGrabKeyboard(mDisplay, DefaultRootWindow(mDisplay), True, GrabModeAsync, GrabModeAsync, CurrentTime); if (vo_grabpointer) XGrabPointer(mDisplay, DefaultRootWindow(mDisplay), True, ButtonPressMask, GrabModeAsync, GrabModeAsync, None, None, CurrentTime); if (!vo_config_count || width != prev_width || height != prev_height) { init_video_buffers(vo_dga_base, vo_dga_vp_height, vo_dga_width * HW_MODE.vdm_bytespp, #ifdef HAVE_DGA2 modeline->maxViewportY, #else vo_dga_vp_height, #endif vo_doublebuffering); prev_width = width; prev_height = height; } mp_msg(MSGT_VO, MSGL_V, "vo_dga: Using %d frame buffer%s.\n", vo_dga_nr_video_buffers, vo_dga_nr_video_buffers == 1 ? "" : "s"); vo_dga_is_running = 1; return 0; }
int main(int argc, char *argv[]) { int proc_num, proc_rank; MPI_Status status; size_t b_muls_num, b_col_num, b_row_num; BLOCK *b_muls; uint i, j, k, l; T *mat_a = (T *)malloc(NN * sizeof(T)); T *mat_b = (T *)malloc(NN * sizeof(T)); T *mat_c = (T *)malloc(NN * sizeof(T)); generate_mat(mat_c, NN, 0); MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &proc_num); MPI_Comm_rank(MPI_COMM_WORLD, &proc_rank); if (proc_rank == 0) { generate_mat(mat_a, NN, VAL_A); generate_mat(mat_b, NN, VAL_B); } MPI_Bcast(mat_a, NN, T_MPI, 0, MPI_COMM_WORLD); MPI_Bcast(mat_b, NN, T_MPI, 0, MPI_COMM_WORLD); b_muls_num = M * K * MAX(M, K); b_muls = (BLOCK *)malloc(b_muls_num * sizeof(BLOCK)); b_row_num = N / K; //Count of rows in the single block b_col_num = N / M; //Count of cols in the single block for (l = i = 0; i < K; ++i) { uint row_low = i * b_row_num; uint row_num = (i == K - 1 ? N : (i + 1) * b_row_num) - row_low; //Count of rows in the first matrix for (j = 0; j < M; ++j) { uint col_low = j * b_col_num; uint col_num = (j == M - 1 ? N : (j + 1) * b_col_num) - col_low; //Count of cols/rows in the first/second matrix for (k = 0; k < K; ++k, ++l) { b_muls[l].r_low_first = row_low; b_muls[l].c_low_first = col_low; b_muls[l].r_low_second = col_low; b_muls[l].c_low_second = k * b_row_num; b_muls[l].r_num_first = row_num; b_muls[l].c_num_second = (k == K - 1 ? N : (k + 1) * b_row_num) - b_muls[l].c_low_second; b_muls[l].r_c_num = col_num; } } } for (l = proc_rank; l < b_muls_num; l += proc_num) for (i = b_muls[l].r_low_first; i < b_muls[l].r_low_first + b_muls[l].r_num_first; ++i) for (j = b_muls[l].c_low_second; j < b_muls[l].c_low_second + b_muls[l].c_num_second; ++j) for (k = 0; k < b_muls[l].r_c_num; ++k) mat_c[i * N + j] += mat_a[i * N + k + b_muls[l].c_low_first] * mat_b[(k + b_muls[l].r_low_second) * N + j]; if (proc_rank == 0) { T *foo = (T *)malloc(NN * sizeof(T)); for (i = 1; i < proc_num; ++i) { MPI_Recv(foo, NN, T_MPI, i, 0, MPI_COMM_WORLD, &status); for (j = 0; j < N; ++j) for (k = 0; k < N; ++k) mat_c[j*N+k] += foo[j*N+k]; } free(foo); } else MPI_Send(mat_c, NN, T_MPI, 0, 0, MPI_COMM_WORLD); MPI_Finalize(); if (proc_rank == 0) printf("Result: %i\n", check_res(mat_c, NN, VAL_C)); free(mat_a); free(mat_b); free(mat_c); return EXIT_SUCCESS; }
int main() { int i; init_double(old_dst.d, 4, 3.14); /* Initialize memory and the registers */ vind.l[0] = 0; vind.l[1] = 6; vind.l[2] = 3; vind.l[3] = 1; mask.l[0] = 0x80ffffff00000000LL; mask.l[1] = 0x00ffffff10000000LL; mask.l[2] = 0x00ffffff00000000LL; mask.l[3] = 0x80ffffff10000000LL; init_long(&d, 0xdeadbeef); for (i = 0; i < 4; i++) { expect.d[i] = (mask.l[i] & 0x8000000000000000LL) ? *(double *)(ptr+(vind.l[i] * 8 + 0)) : old_dst.d[i]; } __asm { /* VGATHERDPD ymm1, [rax + ymm2_vind*8], ymm3_mask */ lea REG, ADDRPTR [arr-4] /* the memory rewrite of the vgather will add 4 back to the address */ vmovupd YMMa, [old_dst.md]; vmovupd YMMb, [vind.md]; vmovupd YMMc, [mask.md]; vmovupd [YMMDestBefore.ms], YMMa; vmovupd [YMMIndexBefore.ms], YMMb; vmovupd [YMMMaskBefore.ms], YMMc; vgatherqpd YMMa, [REG + YMMb*8], YMMc vmovupd [d.md], YMMa; vmovupd [YMMIndexAfter.ms], YMMb; vmovupd [YMMMaskAfter.ms], YMMc; } printVl("YMM dest before: ", YMMDestBefore.l); printVl("YMM dest after: ", d.l); printVl("YMM index before: ", YMMIndexBefore.l); printVl("YMM index after: ", YMMIndexAfter.l); printVl("YMM mask before: ", YMMMaskBefore.l); printVl("YMM mask after: ", YMMMaskAfter.l); check_res(1); init_long(&d, 0xdeadbeef); for (i = 2; i < 4; i++) { expect.d[i] = 0; } __asm { /* VGATHERDPD xmm1, [rax + xmm2_vind*8], xmm3_mask */ lea REG, ADDRPTR [arr-4] /* the memory rewrite of the vgather will add 4 back to the address */ vmovupd YMMa, [old_dst.md]; vmovupd YMMb, [vind.md]; vmovupd YMMc, [mask.md]; vmovupd [YMMDestBefore.ms], YMMa; vmovupd [YMMIndexBefore.ms], YMMb; vmovupd [YMMMaskBefore.ms], YMMc; vgatherqpd XMMa, [REG + XMMb*8], XMMc vmovupd [d.md], YMMa; vmovupd [YMMIndexAfter.ms], YMMb; vmovupd [YMMMaskAfter.ms], YMMc; } printVl("YMM dest before: ", YMMDestBefore.l); printVl("YMM dest after: ", d.l); printVl("YMM index before: ", YMMIndexBefore.l); printVl("YMM index after: ", YMMIndexAfter.l); printVl("YMM mask before: ", YMMMaskBefore.l); printVl("YMM mask after: ", YMMMaskAfter.l); check_res(2); init_long(&d, 0xdeadbeef); for (i = 0; i < 4; i++) { expect.d[i] = (mask.l[i] & 0x8000000000000000LL) ? *(double *)(ptr+(vind.l[i] * 8 + 8)) : old_dst.d[i]; } __asm { /* VGATHERDPD ymm1, [rax + ymm2_vind*8 + 8], ymm3_mask */ lea REG, ADDRPTR [arr-4] /* the memory rewrite of the vgather will add 4 back to the address */ vmovupd YMMa, [old_dst.md]; vmovupd YMMb, [vind.md]; vmovupd YMMc, [mask.md]; vmovupd [YMMDestBefore.ms], YMMa; vmovupd [YMMIndexBefore.ms], YMMb; vmovupd [YMMMaskBefore.ms], YMMc; vgatherqpd YMMa, [REG + YMMb*8 + 8], YMMc vmovupd [d.md], YMMa; vmovupd [YMMIndexAfter.ms], YMMb; vmovupd [YMMMaskAfter.ms], YMMc; } printVl("YMM dest before: ", YMMDestBefore.l); printVl("YMM dest after: ", d.l); printVl("YMM index before: ", YMMIndexBefore.l); printVl("YMM index after: ", YMMIndexAfter.l); printVl("YMM mask before: ", YMMMaskBefore.l); printVl("YMM mask after: ", YMMMaskAfter.l); check_res(3); init_long(&d, 0xdeadbeef); for (i = 2; i < 4; i++) { expect.d[i] = 0; } __asm { /* VGATHERDPD xmm1, [rax + xmm2_vind*8 + 8], xmm3_mask */ lea REG, ADDRPTR [arr-4] /* the memory rewrite of the vgather will add 4 back to the address */ vmovupd YMMa, [old_dst.md]; vmovupd YMMb, [vind.md]; vmovupd YMMc, [mask.md]; vmovupd [YMMDestBefore.ms], YMMa; vmovupd [YMMIndexBefore.ms], YMMb; vmovupd [YMMMaskBefore.ms], YMMc; vgatherqpd XMMa, [REG + XMMb*8 + 8], XMMc vmovupd [d.md], YMMa; vmovupd [YMMIndexAfter.ms], YMMb; vmovupd [YMMMaskAfter.ms], YMMc; } printVl("YMM dest before: ", YMMDestBefore.l); printVl("YMM dest after: ", d.l); printVl("YMM index before: ", YMMIndexBefore.l); printVl("YMM index after: ", YMMIndexAfter.l); printVl("YMM mask before: ", YMMMaskBefore.l); printVl("YMM mask after: ", YMMMaskAfter.l); check_res(4); if(!res) PRINTF("gatherqpd passed\n"); return res; }