void KVServer::run()
{
	while (1) {
		socklen_t remote_addr_len = sizeof(remote_addr);
		printf("Waiting for a packet ...\n");
		fflush(stdout);
		int rc = recvfrom(sock, buffer, MAX_KV_SIZE, 0, (sockaddr*)&remote_addr, &remote_addr_len);
		if (-1 == rc) {
			fprintf(stderr, "(KeyValueServer) Failed to receive packet!\n");
			exit(1);
		}
		lite_assert(rc<MAX_KV_SIZE);	// darn -- MAX_KV_SIZE too small for actual usage.

		lite_assert(rc>=(int)sizeof(KVRequest));
		KVRequest* request = (KVRequest*)buffer;

		switch (request->type) {
			case TYPE_LOOKUP:
				handle_lookup(request);
				break;
			case TYPE_INSERT:
				handle_insert(request);
				break;
			default:
				fprintf(stderr, "(KeyValueServer) Dropping malformed request with type %04x.\n", request->type);
				continue;
		}
	}

	// TODO:Doesn't clean-up after itself yet
	hash_table_free_discard(&db);
}
VdpStatus
softVdpPresentationQueueCreate(VdpDevice device,
                               VdpPresentationQueueTarget presentation_queue_target,
                               VdpPresentationQueue *presentation_queue)
{
    if (!presentation_queue)
        return VDP_STATUS_INVALID_POINTER;
    VdpDeviceData *deviceData = handle_acquire(device, HANDLETYPE_DEVICE);
    if (NULL == deviceData)
        return VDP_STATUS_INVALID_HANDLE;

    VdpPresentationQueueTargetData *targetData =
        handle_acquire(presentation_queue_target, HANDLETYPE_PRESENTATION_QUEUE_TARGET);
    if (NULL == targetData) {
        handle_release(device);
        return VDP_STATUS_INVALID_HANDLE;
    }

    VdpPresentationQueueData *data = calloc(1, sizeof(VdpPresentationQueueData));
    if (NULL == data) {
        handle_release(device);
        handle_release(presentation_queue_target);
        return VDP_STATUS_RESOURCES;
    }

    data->type = HANDLETYPE_PRESENTATION_QUEUE;
    data->device = deviceData;
    data->target = targetData;
    data->bg_color.red = 0.0;
    data->bg_color.green = 0.0;
    data->bg_color.blue = 0.0;
    data->bg_color.alpha = 0.0;

    deviceData->refcount ++;
    targetData->refcount ++;
    *presentation_queue = handle_insert(data);

    // initialize queue
    data->queue.head = -1;
    data->queue.used = 0;
    for (unsigned int k = 0; k < PRESENTATION_QUEUE_LENGTH; k ++) {
        data->queue.item[k].next = -1;
        // other fields are zero due to calloc
    }
    for (unsigned int k = 0; k < PRESENTATION_QUEUE_LENGTH - 1; k ++)
        data->queue.freelist[k] = k + 1;
    data->queue.freelist[PRESENTATION_QUEUE_LENGTH - 1] = -1;
    data->queue.firstfree = 0;

    pthread_mutex_init(&data->queue_mutex, NULL);
    pthread_cond_init(&data->new_work_available, NULL);

    // launch worker thread
    pthread_create(&data->worker_thread, NULL, presentation_thread,
                   (void *)(size_t)(*presentation_queue));

    handle_release(device);
    handle_release(presentation_queue_target);
    return VDP_STATUS_OK;
}
VdpStatus
softVdpPresentationQueueTargetCreateX11(VdpDevice device, Drawable drawable,
                                        VdpPresentationQueueTarget *target)
{
    if (!target)
        return VDP_STATUS_INVALID_POINTER;
    VdpDeviceData *deviceData = handle_acquire(device, HANDLETYPE_DEVICE);
    if (NULL == deviceData)
        return VDP_STATUS_INVALID_HANDLE;

    VdpPresentationQueueTargetData *data = calloc(1, sizeof(VdpPresentationQueueTargetData));
    if (NULL == data) {
        handle_release(device);
        return VDP_STATUS_RESOURCES;
    }

    data->type = HANDLETYPE_PRESENTATION_QUEUE_TARGET;
    data->device = deviceData;
    data->drawable = drawable;
    data->refcount = 0;

    pthread_mutex_lock(&global.glx_ctx_stack_mutex);
    GLint att[] = { GLX_RGBA, GLX_DEPTH_SIZE, 24, GLX_DOUBLEBUFFER, None };
    XVisualInfo *vi;
    vi = glXChooseVisual(deviceData->display, deviceData->screen, att);
    if (NULL == vi) {
        traceError("error (softVdpPresentationQueueTargetCreateX11): glXChooseVisual failed\n");
        free(data);
        pthread_mutex_unlock(&global.glx_ctx_stack_mutex);
        handle_release(device);
        return VDP_STATUS_ERROR;
    }

    // create context for dislaying result (can share display lists with deviceData->glc
    data->glc = glXCreateContext(deviceData->display, vi, deviceData->root_glc, GL_TRUE);
    deviceData->refcount ++;
    *target = handle_insert(data);
    pthread_mutex_unlock(&global.glx_ctx_stack_mutex);

    handle_release(device);
    return VDP_STATUS_OK;
}
VdpStatus
softVdpDecoderCreate(VdpDevice device, VdpDecoderProfile profile, uint32_t width, uint32_t height,
                     uint32_t max_references, VdpDecoder *decoder)
{
    VdpStatus err_code;
    if (!decoder)
        return VDP_STATUS_INVALID_POINTER;
    VdpDeviceData *deviceData = handle_acquire(device, HANDLETYPE_DEVICE);
    if (NULL == deviceData)
        return VDP_STATUS_INVALID_HANDLE;
    if (!deviceData->va_available) {
        err_code = VDP_STATUS_INVALID_DECODER_PROFILE;
        goto quit;
    }
    VADisplay va_dpy = deviceData->va_dpy;

    VdpDecoderData *data = calloc(1, sizeof(VdpDecoderData));
    if (NULL == data) {
        err_code = VDP_STATUS_RESOURCES;
        goto quit;
    }

    data->type = HANDLETYPE_DECODER;
    data->device = deviceData;
    data->profile = profile;
    data->width = width;
    data->height = height;
    data->max_references = max_references;

    // initialize free_list. Initially they all free
    data->free_list_head = -1;
    for (int k = 0; k < MAX_RENDER_TARGETS; k ++) {
        free_list_push(data->free_list, &data->free_list_head, k);
    }

    VAProfile va_profile;
    VAStatus status;
    int final_try = 0;
    VdpDecoderProfile next_profile = profile;

    // Try to create decoder for asked profile. On failure try to create more advanced one
    while (! final_try) {
        profile = next_profile;
        switch (profile) {
        case VDP_DECODER_PROFILE_H264_BASELINE:
            va_profile = VAProfileH264Baseline;
            data->num_render_targets = NUM_RENDER_TARGETS_H264;
            next_profile = VDP_DECODER_PROFILE_H264_MAIN;
            break;
        case VDP_DECODER_PROFILE_H264_MAIN:
            va_profile = VAProfileH264Main;
            data->num_render_targets = NUM_RENDER_TARGETS_H264;
            next_profile = VDP_DECODER_PROFILE_H264_HIGH;
            break;
        case VDP_DECODER_PROFILE_H264_HIGH:
            va_profile = VAProfileH264High;
            data->num_render_targets = NUM_RENDER_TARGETS_H264;
            // there is no more advanced profile, so it's final try
            final_try = 1;
            break;
        default:
            traceError("error (softVdpDecoderCreate): decoder %s not implemented\n",
                       reverse_decoder_profile(profile));
            err_code = VDP_STATUS_INVALID_DECODER_PROFILE;
            goto quit_free_data;
        }

        status = vaCreateConfig(va_dpy, va_profile, VAEntrypointVLD, NULL, 0, &data->config_id);
        if (VA_STATUS_SUCCESS == status)        // break loop if decoder created
            break;
    }

    if (VA_STATUS_SUCCESS != status) {
        err_code = VDP_STATUS_ERROR;
        goto quit_free_data;
    }

    // Create surfaces. All video surfaces created here, rather than in VdpVideoSurfaceCreate.
    // VAAPI requires surfaces to be bound with context on its creation time, while VDPAU allows
    // to do it later. So here is a trick: VDP video surfaces get their va_surf dynamically in
    // DecoderRender.

    // TODO: check format of surfaces created
#if VA_CHECK_VERSION(0, 34, 0)
    status = vaCreateSurfaces(va_dpy, VA_RT_FORMAT_YUV420, width, height,
        data->render_targets, data->num_render_targets, NULL, 0);
#else
    status = vaCreateSurfaces(va_dpy, width, height, VA_RT_FORMAT_YUV420,
        data->num_render_targets, data->render_targets);
#endif
    if (VA_STATUS_SUCCESS != status) {
        err_code = VDP_STATUS_ERROR;
        goto quit_free_data;
    }

    status = vaCreateContext(va_dpy, data->config_id, width, height, VA_PROGRESSIVE,
        data->render_targets, data->num_render_targets, &data->context_id);
    if (VA_STATUS_SUCCESS != status) {
        err_code = VDP_STATUS_ERROR;
        goto quit_free_data;
    }

    deviceData->refcount ++;
    *decoder = handle_insert(data);

    err_code = VDP_STATUS_OK;
    goto quit;

quit_free_data:
    free(data);
quit:
    handle_release(device);
    return err_code;
}
VdpStatus
vdpDeviceCreateX11(Display *display_orig, int screen, VdpDevice *device,
                   VdpGetProcAddress **get_proc_address)
{
    if (!display_orig || !device)
        return VDP_STATUS_INVALID_POINTER;

    // Let's get own connection to the X server
    Display *display = handle_xdpy_ref(display_orig);
    if (NULL == display)
        return VDP_STATUS_ERROR;

    if (global.quirks.buggy_XCloseDisplay) {
        // XCloseDisplay could segfault on fglrx. To avoid calling XCloseDisplay,
        // make one more reference to xdpy copy.
        handle_xdpy_ref(display_orig);
    }

    VdpDeviceData *data = calloc(1, sizeof(VdpDeviceData));
    if (NULL == data)
        return VDP_STATUS_RESOURCES;

    glx_ctx_lock(); // use glx lock to serialize X calls
    data->type = HANDLETYPE_DEVICE;
    data->display = display;
    data->display_orig = display_orig;   // save supplied pointer too
    data->screen = screen;
    data->refcount = 0;
    pthread_mutex_init(&data->refcount_mutex, NULL);
    data->root = DefaultRootWindow(display);

    XWindowAttributes wnd_attrs;
    XGetWindowAttributes(display, data->root, &wnd_attrs);
    data->color_depth = wnd_attrs.depth;

    data->fn.glXBindTexImageEXT =
        (PFNGLXBINDTEXIMAGEEXTPROC)glXGetProcAddress((GLubyte *)"glXBindTexImageEXT");
    data->fn.glXReleaseTexImageEXT =
        (PFNGLXRELEASETEXIMAGEEXTPROC)glXGetProcAddress((GLubyte *)"glXReleaseTexImageEXT");
    glx_ctx_unlock();

    if (!data->fn.glXBindTexImageEXT || !data->fn.glXReleaseTexImageEXT) {
        traceError("error (%s): can't get glXBindTexImageEXT address\n");
        free(data);
        return VDP_STATUS_RESOURCES;
    }

    // create master GLX context to share data between further created ones
    glx_ctx_ref_glc_hash_table(display, screen);
    data->root_glc = glx_ctx_get_root_context();

    glx_ctx_push_thread_local(data);

    glClearColor(0.0f, 0.0f, 0.0f, 0.0f);

    glMatrixMode(GL_PROJECTION);
    glLoadIdentity();

    glMatrixMode(GL_MODELVIEW);
    glLoadIdentity();

    // initialize VAAPI
    if (global.quirks.avoid_va) {
        // pretend there is no VA-API available
        data->va_available = 0;
    } else {
        data->va_dpy = vaGetDisplay(display);
        data->va_available = 0;

        VAStatus status = vaInitialize(data->va_dpy, &data->va_version_major,
                                       &data->va_version_minor);
        if (VA_STATUS_SUCCESS == status) {
            data->va_available = 1;
            traceInfo("libva (version %d.%d) library initialized\n",
                      data->va_version_major, data->va_version_minor);
        } else {
            data->va_available = 0;
            traceInfo("warning: failed to initialize libva. "
                      "No video decode acceleration available.\n");
        }
    }

    compile_shaders(data);

    glGenTextures(1, &data->watermark_tex_id);
    glBindTexture(GL_TEXTURE_2D, data->watermark_tex_id);

    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);

    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, watermark_width, watermark_height, 0, GL_BGRA,
                 GL_UNSIGNED_BYTE, watermark_data);
    glFinish();

    *device = handle_insert(data);
    if (get_proc_address)
        *get_proc_address = &vdpGetProcAddress;

    GLenum gl_error = glGetError();
    glx_ctx_pop();

    if (GL_NO_ERROR != gl_error) {
        traceError("error (%s): gl error %d\n", __func__, gl_error);
        return VDP_STATUS_ERROR;
    }

    return VDP_STATUS_OK;
}
VdpStatus
vdpVideoSurfaceCreate(VdpDevice device, VdpChromaType chroma_type, uint32_t width, uint32_t height,
                      VdpVideoSurface *surface)
{
    VdpStatus err_code;
    if (!surface)
        return VDP_STATUS_INVALID_POINTER;
    if (chroma_type != VDP_CHROMA_TYPE_420 &&
        chroma_type != VDP_CHROMA_TYPE_422 &&
        chroma_type != VDP_CHROMA_TYPE_444)
    {
        return VDP_STATUS_INVALID_CHROMA_TYPE;
    }

    VdpDeviceData *deviceData = handle_acquire(device, HANDLETYPE_DEVICE);
    if (NULL == deviceData)
        return VDP_STATUS_INVALID_HANDLE;

    VdpVideoSurfaceData *data = calloc(1, sizeof(VdpVideoSurfaceData));
    if (NULL == data) {
        err_code = VDP_STATUS_RESOURCES;
        goto quit;
    }

    data->type = HANDLETYPE_VIDEO_SURFACE;
    data->device = device;
    data->deviceData = deviceData;
    data->chroma_type = chroma_type;
    data->width = width;
    data->height = height;

    switch (chroma_type) {
    case VDP_CHROMA_TYPE_420:
        data->chroma_width = ((width + 1) & (~1u)) / 2;
        data->chroma_height = ((height + 1) & (~1u)) / 2;
        data->stride = (width + 0xfu) & (~0xfu);
        break;
    case VDP_CHROMA_TYPE_422:
        data->chroma_width = ((width + 1) & (~1u)) / 2;
        data->chroma_height = height;
        data->stride = (width + 2 * data->chroma_width + 0xfu) & (~0xfu);
        break;
    case VDP_CHROMA_TYPE_444:
        data->chroma_width = width;
        data->chroma_height = height;
        data->stride = (4 * width + 0xfu) & (~0xfu);
        break;
    }
    data->chroma_stride = (data->chroma_width + 0xfu) & (~0xfu);

    data->va_surf = VA_INVALID_SURFACE;
    data->tex_id = 0;
    data->sync_va_to_glx = 0;
    data->decoder = VDP_INVALID_HANDLE;
    data->y_plane = NULL;
    data->u_plane = NULL;
    data->v_plane = NULL;

    glx_ctx_push_thread_local(deviceData);
    glGenTextures(1, &data->tex_id);
    glBindTexture(GL_TEXTURE_2D, data->tex_id);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, data->width, data->height, 0,
                 GL_BGRA, GL_UNSIGNED_BYTE, NULL);

    glGenFramebuffers(1, &data->fbo_id);
    glBindFramebuffer(GL_FRAMEBUFFER, data->fbo_id);
    glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, data->tex_id, 0);
    GLenum gl_status = glCheckFramebufferStatus(GL_FRAMEBUFFER);
    if (GL_FRAMEBUFFER_COMPLETE != gl_status) {
        traceError("error (%s): framebuffer not ready, %d, %s\n", __func__, gl_status,
                   gluErrorString(gl_status));
        glx_ctx_pop();
        free(data);
        err_code = VDP_STATUS_ERROR;
        goto quit;
    }
    glFinish();

    GLenum gl_error = glGetError();
    glx_ctx_pop();
    if (GL_NO_ERROR != gl_error) {
        traceError("error (%s): gl error %d\n", __func__, gl_error);
        free(data);
        err_code = VDP_STATUS_ERROR;
        goto quit;
    }

    // no VA surface creation here. Actual pool of VA surfaces should be allocated already
    // by VdpDecoderCreate. VdpDecoderCreate will update ->va_surf field as needed.

    ref_device(deviceData);
    *surface = handle_insert(data);

    err_code = VDP_STATUS_OK;
quit:
    handle_release(device);
    return err_code;
}