Exemple #1
0
int
main(int argc, char *argv[])
{
	struct limare_state *state;
	int ret;

	const char *vertex_shader_source =
		"uniform mat4 modelviewMatrix;\n"
		"uniform mat4 modelviewprojectionMatrix;\n"
		"uniform mat3 normalMatrix;\n"
		"\n"
		"attribute vec4 in_position;    \n"
		"attribute vec3 in_normal;      \n"
		"attribute vec2 in_coord;       \n"
		"\n"
		"vec4 lightSource = vec4(10.0, 20.0, 40.0, 0.0);\n"
		"                             \n"
		"varying vec4 vVaryingColor;  \n"
		"varying vec2 coord;          \n"
		"                             \n"
		"void main()                  \n"
		"{                            \n"
		"    gl_Position = modelviewprojectionMatrix * in_position;\n"
		"    vec3 vEyeNormal = normalMatrix * in_normal;\n"
		"    vec4 vPosition4 = modelviewMatrix * in_position;\n"
		"    vec3 vPosition3 = vPosition4.xyz / vPosition4.w;\n"
		"    vec3 vLightDir = normalize(lightSource.xyz - vPosition3);\n"
		"    float diff = max(0.0, dot(vEyeNormal, vLightDir));\n"
		"    vVaryingColor = vec4(diff * vec3(1.0, 1.0, 1.0), 1.0);\n"
		"    coord = in_coord;        \n"
		"}                            \n";
	const char *fragment_shader_source =
		"precision mediump float;     \n"
		"                             \n"
		"varying vec4 vVaryingColor;  \n"
		"varying vec2 coord;          \n"
		"                             \n"
		"uniform sampler2D in_texture; \n"
		"                             \n"
		"void main()                  \n"
		"{                            \n"
		"    gl_FragColor = vVaryingColor * texture2D(in_texture, coord);\n"
		"}                            \n";

	float *vertices_array = companion_vertices_array();
	float *texture_coordinates_array =
		companion_texture_coordinates_array();
	float *normals_array = companion_normals_array();

	state = limare_init();
	if (!state)
		return -1;

	limare_buffer_clear(state);

	ret = limare_state_setup(state, 0, 0, 0xFF505050);
	if (ret)
		return ret;

	int width, height;
	limare_buffer_size(state, &width, &height);
	float aspect = (float) height / width;

	limare_enable(state, GL_DEPTH_TEST);
	limare_enable(state, GL_CULL_FACE);
	limare_depth_mask(state, 1);

	int program = limare_program_new(state);
	vertex_shader_attach(state, program, vertex_shader_source);
	fragment_shader_attach(state, program, fragment_shader_source);

	limare_link(state);

	int vertices_buffer =
		limare_attribute_buffer_upload(state, LIMARE_ATTRIB_FLOAT,
					       3, 0, COMPANION_ARRAY_COUNT,
					       vertices_array);

	int texture_coordinates_buffer =
		limare_attribute_buffer_upload(state, LIMARE_ATTRIB_FLOAT,
					       2, 0, COMPANION_ARRAY_COUNT,
					       texture_coordinates_array);

	int normals_buffer =
		limare_attribute_buffer_upload(state, LIMARE_ATTRIB_FLOAT,
					       3, 0, COMPANION_ARRAY_COUNT,
					       normals_array);

	limare_attribute_buffer_attach(state, "in_position", vertices_buffer);
	limare_attribute_buffer_attach(state, "in_coord",
				       texture_coordinates_buffer);
	limare_attribute_buffer_attach(state, "in_normal", normals_buffer);

	int texture = limare_texture_upload(state, companion_texture,
					    COMPANION_TEXTURE_WIDTH,
					    COMPANION_TEXTURE_HEIGHT,
					    COMPANION_TEXTURE_FORMAT, 0);
	limare_texture_attach(state, "in_texture", texture);

	ESMatrix modelview;
	esMatrixLoadIdentity(&modelview);
	esTranslate(&modelview, 0.0, 0.0, -4.0);
	esRotate(&modelview, -30.0, 1.0, 0.0, 0.0);
	esRotate(&modelview, -45.0, 0.0, 1.0, 0.0);

	ESMatrix projection;
	esMatrixLoadIdentity(&projection);
	esFrustum(&projection, -1.0, +1.0, -1.0 * aspect, +1.0 * aspect,
		  1.0, 10.0);

	ESMatrix modelviewprojection;
	esMatrixLoadIdentity(&modelviewprojection);
	esMatrixMultiply(&modelviewprojection, &modelview, &projection);

	float normal[9];
	normal[0] = modelview.m[0][0];
	normal[1] = modelview.m[0][1];
	normal[2] = modelview.m[0][2];
	normal[3] = modelview.m[1][0];
	normal[4] = modelview.m[1][1];
	normal[5] = modelview.m[1][2];
	normal[6] = modelview.m[2][0];
	normal[7] = modelview.m[2][1];
	normal[8] = modelview.m[2][2];

	limare_uniform_attach(state, "modelviewMatrix", 16,
			      &modelview.m[0][0]);
	limare_uniform_attach(state, "modelviewprojectionMatrix", 16,
			      &modelviewprojection.m[0][0]);
	limare_uniform_attach(state, "normalMatrix", 9, normal);

	limare_frame_new(state);

	ret = limare_draw_arrays(state, GL_TRIANGLES, 0, COMPANION_ARRAY_COUNT);
	if (ret)
		return ret;

	ret = limare_frame_flush(state);
	if (ret)
		return ret;

	limare_buffer_swap(state);

	limare_finish(state);

	return 0;
}
int main(int argc, char **argv)
{
    int rv;
    struct viv_conn *conn = 0;
    rv = viv_open(VIV_HW_3D, &conn);
    if(rv!=0)
    {
        fprintf(stderr, "Error opening device\n");
        exit(1);
    }
    printf("Succesfully opened device\n");
    viv_show_chip_info(conn);
    
    gcsHAL_INTERFACE id = {};
    id.command = gcvHAL_ATTACH;
    if((viv_invoke(conn, &id)) != gcvSTATUS_OK)
    {
        #ifdef DEBUG
        fprintf(stderr, "Error attaching to GPU\n");
        #endif
        exit(1);
    }
    gckCONTEXT context = id.u.Attach.context;

    /* allocate command buffer (blob uses four command buffers, but we don't even fill one) */
    viv_addr_t buf0_physical = 0;
    void *buf0_logical = 0;
    if(viv_alloc_contiguous(conn, 0x20000, &buf0_physical, &buf0_logical, NULL)!=0)
    {
        fprintf(stderr, "Error allocating host memory\n");
        exit(1);
    }
    printf("Allocated buffer: phys=%08x log=%08x\n", (uint32_t)buf0_physical, (uint32_t)buf0_logical);

    /* allocate main render target */
    gcuVIDMEM_NODE_PTR color_surface_node = 0;
    if(viv_alloc_linear_vidmem(conn, 0x1ab000, 0x40, gcvSURF_RENDER_TARGET, gcvPOOL_DEFAULT, &color_surface_node, NULL)!=0)
    {
        fprintf(stderr, "Error allocating render target buffer memory\n");
        exit(1);
    }
    printf("Allocated render target node: node=%08x\n", (uint32_t)color_surface_node);
    viv_addr_t color_surface_physical = 0;
    void *color_surface_logical = 0;
    if(viv_lock_vidmem(conn, color_surface_node, &color_surface_physical, &color_surface_logical)!=0)
    {
        fprintf(stderr, "Error locking render target memory\n");
        exit(1);
    }
    printf("Locked render target: phys=%08x log=%08x\n", (uint32_t)color_surface_physical, (uint32_t)color_surface_logical);

    /* allocate tile status for main render target */
    gcuVIDMEM_NODE_PTR color_status_node = 0;
    if(viv_alloc_linear_vidmem(conn, 0x1b00, 0x40, gcvSURF_TILE_STATUS, gcvPOOL_DEFAULT, &color_status_node, NULL)!=0)
    {
        fprintf(stderr, "Error allocating render target tile status memory\n");
        exit(1);
    }
    printf("Allocated render target tile status node: node=%08x\n", (uint32_t)color_status_node);
    viv_addr_t color_status_physical = 0;
    void *color_status_logical = 0;
    if(viv_lock_vidmem(conn, color_status_node, &color_status_physical, &color_status_logical)!=0)
    {
        fprintf(stderr, "Error locking render target memory\n");
        exit(1);
    }
    printf("Locked render target ts: phys=%08x log=%08x\n", (uint32_t)color_status_physical, (uint32_t)color_status_logical);

    /* allocate depth for main render target */
    gcuVIDMEM_NODE_PTR depth_surface_node = 0;
    if(viv_alloc_linear_vidmem(conn, 0xd1000, 0x40, gcvSURF_DEPTH, gcvPOOL_DEFAULT, &depth_surface_node, NULL)!=0)
    {
        fprintf(stderr, "Error allocating depth memory\n");
        exit(1);
    }
    printf("Allocated depth node: node=%08x\n", (uint32_t)depth_surface_node);
    viv_addr_t depth_surface_physical = 0;
    void *depth_surface_logical = 0;
    if(viv_lock_vidmem(conn, depth_surface_node, &depth_surface_physical, &depth_surface_logical)!=0)
    {
        fprintf(stderr, "Error locking depth target memory\n");
        exit(1);
    }
    printf("Locked depth target: phys=%08x log=%08x\n", (uint32_t)depth_surface_physical, (uint32_t)depth_surface_logical);
    
    /* allocate depth ts for main render target */
    gcuVIDMEM_NODE_PTR depth_status_node = 0;
    if(viv_alloc_linear_vidmem(conn, 0xe00, 0x40, gcvSURF_TILE_STATUS, gcvPOOL_DEFAULT, &depth_status_node, NULL)!=0)
    {
        fprintf(stderr, "Error allocating depth memory\n");
        exit(1);
    }
    printf("Allocated depth ts node: node=%08x\n", (uint32_t)depth_status_node);
    viv_addr_t depth_status_physical = 0;
    void *depth_status_logical = 0;
    if(viv_lock_vidmem(conn, depth_status_node, &depth_status_physical, &depth_status_logical)!=0)
    {
        fprintf(stderr, "Error locking depth target ts memory\n");
        exit(1);
    }
    printf("Locked depth ts target: phys=%08x log=%08x\n", (uint32_t)depth_status_physical, (uint32_t)depth_status_logical);

    /* allocate vertex buffer */
    gcuVIDMEM_NODE_PTR vtx_node = 0;
    if(viv_alloc_linear_vidmem(conn, 0x100000, 0x40, gcvSURF_VERTEX, gcvPOOL_DEFAULT, &vtx_node, NULL)!=0)
    {
        fprintf(stderr, "Error allocating vertex memory\n");
        exit(1);
    }
    printf("Allocated vertex node: node=%08x\n", (uint32_t)vtx_node);
    viv_addr_t vtx_physical = 0;
    void *vtx_logical = 0;
    if(viv_lock_vidmem(conn, vtx_node, &vtx_physical, &vtx_logical)!=0)
    {
        fprintf(stderr, "Error locking vertex memory\n");
        exit(1);
    }
    printf("Locked vertex memory: phys=%08x log=%08x\n", (uint32_t)vtx_physical, (uint32_t)vtx_logical);

    /* allocate tile status for aux render target */
    gcuVIDMEM_NODE_PTR rs_dest_node = 0;
    if(viv_alloc_linear_vidmem(conn, 0x1a0000, 0x40, gcvSURF_BITMAP, gcvPOOL_DEFAULT, &rs_dest_node, NULL)!=0)
    {
        fprintf(stderr, "Error allocating aux render target tile status memory\n");
        exit(1);
    }
    printf("Allocated aux render target tile status node: node=%08x\n", (uint32_t)rs_dest_node);
    viv_addr_t rs_dest_physical = 0;
    void *rs_dest_logical = 0;
    if(viv_lock_vidmem(conn, rs_dest_node, &rs_dest_physical, &rs_dest_logical)!=0)
    {
        fprintf(stderr, "Error locking aux ts render target memory\n");
        exit(1);
    }
    printf("Locked aux render target ts: phys=%08x log=%08x\n", (uint32_t)rs_dest_physical, (uint32_t)rs_dest_logical);
    
    int texture_size[] = {0x100000, 0x040000, 0x010000, 0x004000, 0x001000, 0x000400, 0x000200, 0x000100, 0x000100, 0x000100};
    gcuVIDMEM_NODE_PTR text_lod[10];
    viv_addr_t text_lod_physical[10];
    void* text_lod_logical[10];
    for(int idx=0; idx<10; idx++)
    {
        if (viv_alloc_linear_vidmem(conn, texture_size[idx], 0x40, gcvSURF_TEXTURE, gcvPOOL_DEFAULT, &text_lod[idx], NULL) != 0)
        {
            fprintf(stderr, "Error locking texture nr %d\n", idx);
            exit(1);
        }
        if(viv_lock_vidmem(conn, text_lod[idx], &text_lod_physical[idx], &text_lod_logical[idx]) != 0)
        {
            fprintf(stderr, "Error locking texture memory\n");
            exit(1);
        }
        printf("Locked texture target nr %d: phys=%08x log=%08x\n", idx, (uint32_t)text_lod_physical[idx], (uint32_t)text_lod_logical[idx]);
    }
    
    /* Interleave companion cube vertex data into ADDR_I */
    //memset(vtx_logical, 0, 0x5ef80);
    float *vertices_array = companion_vertices_array();
    float *texture_coordinates_array = companion_texture_coordinates_array();
    float *normals_array = companion_normals_array();
    int dest_idx = 0;
    for(int vert=0; vert<COMPANION_ARRAY_COUNT*3; ++vert)
    {
        ((float*)vtx_logical)[dest_idx] = vertices_array[vert];
        dest_idx++;
    }
    for(int vert=0; vert<COMPANION_ARRAY_COUNT*3; ++vert)
    {
        ((float*)vtx_logical)[dest_idx] = normals_array[vert];
        dest_idx++;
    }
    for(int vert=0; vert<COMPANION_ARRAY_COUNT*2; ++vert)
    {
        ((float*)vtx_logical)[dest_idx] = texture_coordinates_array[vert];
        dest_idx++;
    }
    
    /* Fill in texture (convert from RGB linear to tiled) */
    #if 1
    #define TILE_WIDTH (4)
    #define TILE_HEIGHT (4)
    #define TILE_WORDS (TILE_WIDTH*TILE_HEIGHT)
    unsigned ytiles = COMPANION_TEXTURE_HEIGHT / TILE_HEIGHT;
    unsigned xtiles = COMPANION_TEXTURE_WIDTH / TILE_WIDTH;
    unsigned dst_stride = xtiles * TILE_WORDS;
    
    for(unsigned ty=0; ty<ytiles; ++ty)
    {
        for(unsigned tx=0; tx<xtiles; ++tx)
        {
            unsigned ofs = ty * dst_stride + tx * TILE_WORDS;
            for(unsigned y=0; y<TILE_HEIGHT; ++y)
            {
                for(unsigned x=0; x<TILE_WIDTH; ++x)
                {
                    unsigned srcy = ty*TILE_HEIGHT + y;
                    unsigned srcx = tx*TILE_WIDTH + x;
                    unsigned src_ofs = (srcy*COMPANION_TEXTURE_WIDTH+srcx)*3;
                    unsigned r,g,b,a;
                    r = ((uint8_t*)companion_texture)[src_ofs+0];
                    g = ((uint8_t*)companion_texture)[src_ofs+1];
                    b = ((uint8_t*)companion_texture)[src_ofs+2];
                    a = 255;
                    
                    ((uint32_t*)text_lod_logical[0])[ofs] = ((a&0xFF) << 24) | ((b&0xFF) << 16) | ((g&0xFF) << 8) | (r&0xFF);
                    //((uint32_t*)text_lod_logical[0])[ofs] = 0xff00ff00;
                    ofs += 1;
                }
            }
        }
    }
    #endif
    #if 0
    int texfd = open("/data/mine/texture.raw", O_RDONLY); 
    read(texfd, text_lod_logical[0], 512*512*4);
    close(texfd);
    #endif
    
    struct _gcoCMDBUF commandBuffer = {
        .object = {
            .type = gcvOBJ_COMMANDBUFFER
        },
        .physical = (void*)buf0_physical,
        .logical = (void*)buf0_logical,
        .bytes = 0x20000,
        .startOffset = 0x0,
    };

    commandBuffer.free = commandBuffer.bytes - 0x8; /* Always keep 0x8 at end of buffer for kernel driver */
    /* Set addresses in first command buffer */
    cmdbuf1[31] = cmdbuf1[81] = cmdbuf1[103] = color_status_physical; //H
    cmdbuf1[32] = cmdbuf1[104] = color_surface_physical; //G
    cmdbuf1[41] = cmdbuf1[135] = cmdbuf1[155] = depth_status_physical; //J
    cmdbuf1[42] = cmdbuf1[156] = depth_surface_physical; //I
    cmdbuf1[191] = text_lod_physical[0]; //L - base bitmap
    cmdbuf1[193] = text_lod_physical[1]; //M

    /* Submit first command buffer */
    commandBuffer.startOffset = 0;
    memcpy((void*)((size_t)commandBuffer.logical + commandBuffer.startOffset), cmdbuf1, sizeof(cmdbuf1));
    commandBuffer.offset = commandBuffer.startOffset + sizeof(cmdbuf1);
    commandBuffer.free -= sizeof(cmdbuf1) + 0x08;
    printf("[1] startOffset=%08x, offset=%08x, free=%08x\n", (uint32_t)commandBuffer.startOffset, (uint32_t)commandBuffer.offset, (uint32_t)commandBuffer.free);
    if(viv_commit(conn, &commandBuffer, context) != 0)
    {
        fprintf(stderr, "Error committing first command buffer\n");
        exit(1);
    }

    /* Create signal */
    int sig_id = 0;
    if(viv_user_signal_create(conn, 0, &sig_id) != 0) /* automatic resetting signal */
    {
        fprintf(stderr, "Cannot create user signal\n");
        exit(1);
    }
    printf("Created user signal %i\n", sig_id);
    
    /* Queue and wait for signal */
    if(viv_event_queue_signal(conn, sig_id, gcvKERNEL_PIXEL) != 0)
    {
        fprintf(stderr, "Cannot queue GPU signal\n");
        exit(1);
    }
    if(viv_user_signal_wait(conn, sig_id, VIV_WAIT_INDEFINITE) != 0)
    {
        fprintf(stderr, "Cannot wait for signal\n");
        exit(1);
    }

    //generate LOD
    unsigned stride[] = {0x1000, 0x800, 0x400, 0x200, 0x100, 0x100, 0x100, 0x100, 0x100};
    unsigned height[] = { 256, 128,  64,  32,  16,   8,   8,   8};
    unsigned width[] =  { 256, 128,  64,  32,  32,  32,  32,  32};
    /* 33 - stride src
     35 - stride dst  *
     39 - pad but allways different - garbage?
     45 - source addr
     47 - dest addr
     49 - height width*/
    for (int idx=0; idx<8; idx++)
    {
        /* Submit command buffer 2 */
        cmdbuf2[33] = stride[idx];
        cmdbuf2[35] = stride[idx + 1];
        cmdbuf2[45] = text_lod_physical[idx + 1]; //idx 0 - 1 done in first cmdbuf
        cmdbuf2[47] = text_lod_physical[idx + 2];
        cmdbuf2[49] = (height[idx] << 16) | width[idx];

        commandBuffer.startOffset = commandBuffer.offset + 0x08; /* Make space for LINK */
        memcpy((void*)((size_t)commandBuffer.logical + commandBuffer.startOffset), cmdbuf2, sizeof(cmdbuf2));
        commandBuffer.offset = commandBuffer.startOffset + sizeof(cmdbuf2);
        commandBuffer.free -= sizeof(cmdbuf2) + 0x08;
        printf("[2,%d] startOffset=%08x, offset=%08x, free=%08x\n", idx, (uint32_t)commandBuffer.startOffset, (uint32_t)commandBuffer.offset, (uint32_t)commandBuffer.free);
        if(viv_commit(conn, &commandBuffer, context) != 0)
        {
            fprintf(stderr, "Error committing second command buffer\n");
            exit(1);
        }
    }

    /* Submit command buffer 3 */
    commandBuffer.startOffset = commandBuffer.offset + 0x08;
    memcpy((void*)((size_t)commandBuffer.logical + commandBuffer.startOffset), cmdbuf3, sizeof(cmdbuf3));
    commandBuffer.offset = commandBuffer.startOffset + sizeof(cmdbuf3);
    commandBuffer.free -= sizeof(cmdbuf3) + 0x08;
    printf("[3] startOffset=%08x, offset=%08x, free=%08x\n", (uint32_t)commandBuffer.startOffset, (uint32_t)commandBuffer.offset, (uint32_t)commandBuffer.free);
    if(viv_commit(conn, &commandBuffer, context) != 0)
    {
        fprintf(stderr, "Error committing third command buffer\n");
        exit(1);
    }

    /* Submit command buffer 4 */
    cmdbuf4[67] = vtx_physical; //A
    cmdbuf4[68] = vtx_physical+0x239d0; //A
    cmdbuf4[69] = vtx_physical+(2*0x239d0); //A
    cmdbuf4[87] = text_lod_physical[0];
    cmdbuf4[89] = text_lod_physical[1];
    cmdbuf4[91] = text_lod_physical[2];
    cmdbuf4[93] = text_lod_physical[3];
    cmdbuf4[95] = text_lod_physical[4];
    cmdbuf4[97] = text_lod_physical[5];
    cmdbuf4[99] = text_lod_physical[6];
    cmdbuf4[101] = text_lod_physical[7];
    cmdbuf4[103] = text_lod_physical[8];
    cmdbuf4[105] = text_lod_physical[9];
    cmdbuf4[153] = cmdbuf4[155] = color_surface_physical;//G
    cmdbuf4[165] = cmdbuf4[167] = depth_surface_physical;//I    

    commandBuffer.startOffset = commandBuffer.offset + 0x08;
    memcpy((void*)((size_t)commandBuffer.logical + commandBuffer.startOffset), cmdbuf4, sizeof(cmdbuf4));
    commandBuffer.offset = commandBuffer.startOffset + sizeof(cmdbuf4);
    commandBuffer.free -= sizeof(cmdbuf4) + 0x08;
    printf("[4] startOffset=%08x, offset=%08x, free=%08x\n", (uint32_t)commandBuffer.startOffset, (uint32_t)commandBuffer.offset, (uint32_t)commandBuffer.free);
    if(viv_commit(conn, &commandBuffer, context) != 0)
    {
        fprintf(stderr, "Error committing command buffer 4\n");
        exit(1);
    }

    /* Submit event, and wait */
    if(viv_event_queue_signal(conn, sig_id, gcvKERNEL_PIXEL) != 0)
    {
        fprintf(stderr, "Cannot queue GPU signal\n");
        exit(1);
    }
    if(viv_user_signal_wait(conn, sig_id, VIV_WAIT_INDEFINITE) != 0)
    {
        fprintf(stderr, "Cannot wait for signal\n");
        exit(1);
    }

    /* Submit command buffer 5 */
    cmdbuf5[35] = cmdbuf5[37] = color_surface_physical;
    
    commandBuffer.startOffset = commandBuffer.offset + 0x08;
    memcpy((void*)((size_t)commandBuffer.logical + commandBuffer.startOffset), cmdbuf5, sizeof(cmdbuf5));
    commandBuffer.offset = commandBuffer.startOffset + sizeof(cmdbuf5);
    commandBuffer.free -= sizeof(cmdbuf5) + 0x08;
    printf("[5] startOffset=%08x, offset=%08x, free=%08x\n", (uint32_t)commandBuffer.startOffset, (uint32_t)commandBuffer.offset, (uint32_t)commandBuffer.free);
    if(viv_commit(conn, &commandBuffer, context) != 0)
    {
        fprintf(stderr, "Error committing command buffer 5\n");
        exit(1);
    }
    
    
    
    cmdbuf6[35] = color_surface_physical;
    cmdbuf6[37] = rs_dest_physical;
    
    commandBuffer.startOffset = commandBuffer.offset + 0x08;
    memcpy((void*)((size_t)commandBuffer.logical + commandBuffer.startOffset), cmdbuf6, sizeof(cmdbuf6));
    commandBuffer.offset = commandBuffer.startOffset + sizeof(cmdbuf6);
    commandBuffer.free -= sizeof(cmdbuf6) + 0x08;
    printf("[6] startOffset=%08x, offset=%08x, free=%08x\n", (uint32_t)commandBuffer.startOffset, (uint32_t)commandBuffer.offset, (uint32_t)commandBuffer.free);
    if(viv_commit(conn, &commandBuffer, context) != 0)
    {
        fprintf(stderr, "Error committing command buffer 5\n");
        exit(1);
    }
    
    
    /* Allocate bitmap memory, map */
    gcuVIDMEM_NODE_PTR bmp_node = 0;
    if(viv_alloc_linear_vidmem(conn, 0x177000, 0x40, gcvSURF_BITMAP, gcvPOOL_DEFAULT, &bmp_node, NULL)!=0)
    {
        fprintf(stderr, "Error allocating bitmap status memory\n");
        exit(1);
    }
    printf("Allocated bitmap node: node=%08x\n", (uint32_t)bmp_node);
    viv_addr_t bmp_physical = 0; // ADDR_J 
    void *bmp_logical = 0;
    if(viv_lock_vidmem(conn, bmp_node, &bmp_physical, &bmp_logical)!=0)
    {
        fprintf(stderr, "Error locking bmp memory\n");
        exit(1);
    }
    memset(bmp_logical, 0xff, 0x177000); // clear previous result
    printf("Locked bmp: phys=%08x log=%08x\n", (uint32_t)bmp_physical, (uint32_t)bmp_logical);
     
     
    /* Submit command buffer 7 */
    cmdbuf7[0x19] = rs_dest_physical; //color_surface_physical or rs_dest_physical
    cmdbuf7[0x1b] = bmp_physical;

    commandBuffer.startOffset = commandBuffer.offset + 0x08;
    memcpy((void*)((size_t)commandBuffer.logical + commandBuffer.startOffset), cmdbuf7, sizeof(cmdbuf7));
    commandBuffer.offset = commandBuffer.startOffset + sizeof(cmdbuf7);
    commandBuffer.free -= sizeof(cmdbuf7) + 0x08;
    printf("[7] startOffset=%08x, offset=%08x, free=%08x\n", (uint32_t)commandBuffer.startOffset, (uint32_t)commandBuffer.offset, (uint32_t)commandBuffer.free);
    if(viv_commit(conn, &commandBuffer, context) != 0)
    {
        fprintf(stderr, "Error committing command buffer 5\n");
        exit(1);
    }

    /* Submit event queue with SIGNAL, fromWhere=gcvKERNEL_PIXEL */
    if(viv_event_queue_signal(conn, sig_id, gcvKERNEL_PIXEL) != 0)
    {
        fprintf(stderr, "Cannot queue GPU signal\n");
        exit(1);
    }
    /* Wait for signal */
    if(viv_user_signal_wait(conn, sig_id, VIV_WAIT_INDEFINITE) != 0)
    {
        fprintf(stderr, "Cannot wait for signal\n");
        exit(1);
    }

    bmp_dump32(bmp_logical, 800, 480, false, "/home/linaro/replay.bmp");
    /* Unlock video memory */
    if(viv_unlock_vidmem(conn, bmp_node, gcvSURF_BITMAP, 1) != 0)
    {
        fprintf(stderr, "Cannot unlock vidmem\n");
        exit(1);
    }
    /*
    for(int x=0; x<0x700; ++x)
    {
        uint32_t value = ((uint32_t*)rt_ts_logical)[x];
        printf("Sample ts: %x %08x\n", x*4, value);
    }*/
    //printf("Contextbuffer used %i\n", *contextBuffer.inUse);
    viv_close(conn);
    return 0;
}
Exemple #3
0
int main(int argc, char **argv)
{
    struct fbdemos_scaffold *fbs = 0;
    fbdemo_init(&fbs);
    int width = fbs->width;
    int height = fbs->height;
    struct pipe_context *pipe = fbs->pipe;

    /* Convert and upload embedded texture */
    struct pipe_resource *tex_resource = fbdemo_create_2d(fbs->screen, PIPE_BIND_SAMPLER_VIEW, PIPE_FORMAT_B8G8R8X8_UNORM, 
            COMPANION_TEXTURE_WIDTH, COMPANION_TEXTURE_HEIGHT, 0);
    void *temp = malloc(COMPANION_TEXTURE_WIDTH * COMPANION_TEXTURE_HEIGHT * 4); 
    etna_convert_r8g8b8_to_b8g8r8x8(temp, (const uint8_t*)companion_texture, COMPANION_TEXTURE_WIDTH * COMPANION_TEXTURE_HEIGHT);
    etna_pipe_inline_write(pipe, tex_resource, 0, 0, temp, COMPANION_TEXTURE_WIDTH * COMPANION_TEXTURE_HEIGHT * 4);
    free(temp);

    /* resources */
    struct pipe_resource *rt_resource = fbdemo_create_2d(fbs->screen, PIPE_BIND_RENDER_TARGET, PIPE_FORMAT_B8G8R8X8_UNORM, width, height, 0);
    struct pipe_resource *z_resource = fbdemo_create_2d(fbs->screen, PIPE_BIND_RENDER_TARGET, PIPE_FORMAT_Z16_UNORM, width, height, 0);

    /* bind render target to framebuffer */
    etna_fb_bind_resource(&fbs->fb, rt_resource);

    /* geometry */
    struct pipe_resource *vtx_resource = pipe_buffer_create(fbs->screen, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_IMMUTABLE, VERTEX_BUFFER_SIZE);
    struct pipe_resource *idx_resource = pipe_buffer_create(fbs->screen, PIPE_BIND_INDEX_BUFFER, PIPE_USAGE_IMMUTABLE, INDEX_BUFFER_SIZE);

    struct pipe_transfer *vtx_transfer = 0;
    float *vtx_logical = pipe_buffer_map(pipe, vtx_resource, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED, &vtx_transfer);
    assert(vtx_logical);
    struct pipe_transfer *idx_transfer = 0;
    float *idx_logical = pipe_buffer_map(pipe, idx_resource, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED, &idx_transfer);
    assert(idx_logical);
#ifndef INDEXED
    printf("Interleaving vertices...\n");
    float *vertices_array = companion_vertices_array();
    float *texture_coordinates_array =
            companion_texture_coordinates_array();
    float *normals_array = companion_normals_array();
    assert(COMPANION_ARRAY_COUNT*(3+3+2)*sizeof(float) < VERTEX_BUFFER_SIZE);
    for(int vert=0; vert<COMPANION_ARRAY_COUNT; ++vert)
    {
        int dest_idx = vert * (3 + 3 + 2);
        for(int comp=0; comp<3; ++comp)
            ((float*)vtx_logical)[dest_idx+comp+0] = vertices_array[vert*3 + comp]; /* 0 */
        for(int comp=0; comp<3; ++comp)
            ((float*)vtx_logical)[dest_idx+comp+3] = normals_array[vert*3 + comp]; /* 1 */
        for(int comp=0; comp<2; ++comp)
            ((float*)vtx_logical)[dest_idx+comp+6] = texture_coordinates_array[vert*2 + comp]; /* 2 */
    }
#else
    printf("Interleaving vertices and copying index buffer...\n");
    assert(COMPANION_VERTEX_COUNT*(3+3+2)*sizeof(float) < VERTEX_BUFFER_SIZE);
    for(int vert=0; vert<COMPANION_VERTEX_COUNT; ++vert)
    {
        int dest_idx = vert * (3 + 3 + 2);
        for(int comp=0; comp<3; ++comp)
            ((float*)vtx_logical)[dest_idx+comp+0] = companion_vertices[vert][comp]; /* 0 */
        for(int comp=0; comp<3; ++comp)
            ((float*)vtx_logical)[dest_idx+comp+3] = companion_normals[vert][comp]; /* 1 */
        for(int comp=0; comp<2; ++comp)
            ((float*)vtx_logical)[dest_idx+comp+6] = companion_texture_coordinates[vert][comp]; /* 2 */
    }
    assert(COMPANION_TRIANGLE_COUNT*3*sizeof(unsigned short) < INDEX_BUFFER_SIZE);
    memcpy(idx_logical, &companion_triangles[0][0], COMPANION_TRIANGLE_COUNT*3*sizeof(unsigned short));
#endif
    pipe_buffer_unmap(pipe, vtx_transfer);
    pipe_buffer_unmap(pipe, idx_transfer);

    struct pipe_vertex_buffer vertex_buffer_desc = {
            .stride = (3 + 3 + 2)*4,
            .buffer_offset = 0,
            .buffer = vtx_resource,
            .user_buffer = 0
            };
    struct pipe_index_buffer index_buffer_desc = {
            .index_size = sizeof(unsigned short),
            .offset = 0,
            .buffer = idx_resource,
            .user_buffer = 0
            };
    struct pipe_vertex_element pipe_vertex_elements[] = {
        { /* positions */
            .src_offset = 0,
            .instance_divisor = 0,
            .vertex_buffer_index = 0,
            .src_format = PIPE_FORMAT_R32G32B32_FLOAT 
        },
        { /* normals */
            .src_offset = 0xc,
            .instance_divisor = 0,
            .vertex_buffer_index = 0,
            .src_format = PIPE_FORMAT_R32G32B32_FLOAT 
        },
        { /* texture coord */
            .src_offset = 0x18,
            .instance_divisor = 0,
            .vertex_buffer_index = 0,
            .src_format = PIPE_FORMAT_R32G32_FLOAT
        }
    };
Exemple #4
0
int main(int argc, char **argv)
{
    int rv;
    int width, height;
    int width_s, height_s;
    int padded_width, padded_height;
    int backbuffer = 0;
    int supersample_x = 2; // 1 or 2
    int supersample_y = 2; // 1 or 2
    int extra_ps_inputs = 0;
    
    fb_info fb;
    rv = fb_open(0, &fb);
    if(rv!=0)
    {
        exit(1);
    }
    width = fb.fb_var.xres;
    height = fb.fb_var.yres;
    width_s = width * supersample_x;
    height_s = height * supersample_y;
    padded_width = etna_align_up(width_s, 64);
    padded_height = etna_align_up(height_s, 64);
    extra_ps_inputs = (supersample_x > 1 || supersample_y > 1); // in case of supersample, there is an extra input to PS

    printf("padded_width %i padded_height %i\n", padded_width, padded_height);
    rv = viv_open();
    if(rv!=0)
    {
        fprintf(stderr, "Error opening device\n");
        exit(1);
    }
    printf("Succesfully opened device\n");

    etna_vidmem *rt = 0; /* main render target */
    etna_vidmem *rt_ts = 0; /* tile status for main render target */
    etna_vidmem *z = 0; /* depth for main render target */
    etna_vidmem *z_ts = 0; /* depth ts for main render target */
    etna_vidmem *vtx = 0; /* vertex buffer */
    etna_vidmem *idx = 0; /* index buffer */
    etna_vidmem *aux_rt = 0; /* auxilary render target */
    etna_vidmem *aux_rt_ts = 0; /* tile status for auxilary render target */
    etna_vidmem *tex = 0; /* texture */
    etna_vidmem *bmp = 0; /* bitmap */

    /* TODO: anti aliasing (doubles width/height) */
    size_t rt_size = padded_width * padded_height * 4;
    size_t rt_ts_size = etna_align_up((padded_width * padded_height * 4)/0x100, 0x100);
    size_t z_size = padded_width * padded_height * 2;
    size_t z_ts_size = etna_align_up((padded_width * padded_height * 2)/0x100, 0x100);
    size_t bmp_size = width * height * 4;

    if(etna_vidmem_alloc_linear(&rt, rt_size, gcvSURF_RENDER_TARGET, gcvPOOL_DEFAULT, true)!=ETNA_OK ||
       etna_vidmem_alloc_linear(&rt_ts, rt_ts_size, gcvSURF_TILE_STATUS, gcvPOOL_DEFAULT, true)!=ETNA_OK ||
       etna_vidmem_alloc_linear(&z, z_size, gcvSURF_DEPTH, gcvPOOL_DEFAULT, true)!=ETNA_OK ||
       etna_vidmem_alloc_linear(&z_ts, z_ts_size, gcvSURF_TILE_STATUS, gcvPOOL_DEFAULT, true)!=ETNA_OK ||
       etna_vidmem_alloc_linear(&vtx, VERTEX_BUFFER_SIZE, gcvSURF_VERTEX, gcvPOOL_DEFAULT, true)!=ETNA_OK ||
       etna_vidmem_alloc_linear(&idx, INDEX_BUFFER_SIZE, gcvSURF_INDEX, gcvPOOL_DEFAULT, true)!=ETNA_OK ||
       etna_vidmem_alloc_linear(&aux_rt, 0x4000, gcvSURF_RENDER_TARGET, gcvPOOL_SYSTEM, true)!=ETNA_OK ||
       etna_vidmem_alloc_linear(&aux_rt_ts, 0x100, gcvSURF_TILE_STATUS, gcvPOOL_DEFAULT, true)!=ETNA_OK ||
       etna_vidmem_alloc_linear(&tex, 0x100000, gcvSURF_TEXTURE, gcvPOOL_DEFAULT, true)!=ETNA_OK ||
       etna_vidmem_alloc_linear(&bmp, bmp_size, gcvSURF_BITMAP, gcvPOOL_DEFAULT, true)!=ETNA_OK
       )
    {
        fprintf(stderr, "Error allocating video memory\n");
        exit(1);
    }

    /* Phew, now we got all the memory we need.
     * Write interleaved attribute vertex stream.
     * Unlike the GL example we only do this once, not every time glDrawArrays is called, the same would be accomplished
     * from GL by using a vertex buffer object.
     */
    memset(vtx->logical, 0, 0x5ef80);
#ifndef INDEXED
    printf("Interleaving vertices...\n");
    float *vertices_array = companion_vertices_array();
    float *texture_coordinates_array =
            companion_texture_coordinates_array();
    float *normals_array = companion_normals_array();
    assert(COMPANION_ARRAY_COUNT*(3+3+2)*sizeof(float) < VERTEX_BUFFER_SIZE);
    for(int vert=0; vert<COMPANION_ARRAY_COUNT; ++vert)
    {
        int dest_idx = vert * (3 + 3 + 2);
        for(int comp=0; comp<3; ++comp)
            ((float*)vtx->logical)[dest_idx+comp+0] = vertices_array[vert*3 + comp]; /* 0 */
        for(int comp=0; comp<3; ++comp)
            ((float*)vtx->logical)[dest_idx+comp+3] = normals_array[vert*3 + comp]; /* 1 */
        for(int comp=0; comp<2; ++comp)
            ((float*)vtx->logical)[dest_idx+comp+6] = texture_coordinates_array[vert*2 + comp]; /* 2 */
    }
#else
    printf("Interleaving vertices and copying index buffer...\n");
    assert(COMPANION_VERTEX_COUNT*(3+3+2)*sizeof(float) < VERTEX_BUFFER_SIZE);
    for(int vert=0; vert<COMPANION_VERTEX_COUNT; ++vert)
    {
        int dest_idx = vert * (3 + 3 + 2);
        for(int comp=0; comp<3; ++comp)
            ((float*)vtx->logical)[dest_idx+comp+0] = companion_vertices[vert][comp]; /* 0 */
        for(int comp=0; comp<3; ++comp)
            ((float*)vtx->logical)[dest_idx+comp+3] = companion_normals[vert][comp]; /* 1 */
        for(int comp=0; comp<2; ++comp)
            ((float*)vtx->logical)[dest_idx+comp+6] = companion_texture_coordinates[vert][comp]; /* 2 */
    }
    assert(COMPANION_TRIANGLE_COUNT*3*sizeof(unsigned short) < INDEX_BUFFER_SIZE);
    memcpy(idx->logical, &companion_triangles[0][0], COMPANION_TRIANGLE_COUNT*3*sizeof(unsigned short));
#endif
    /* Fill in texture (convert from RGB linear to tiled) */
#define TILE_WIDTH (4)
#define TILE_HEIGHT (4)
#define TILE_WORDS (TILE_WIDTH*TILE_HEIGHT)
    unsigned ytiles = COMPANION_TEXTURE_HEIGHT / TILE_HEIGHT;
    unsigned xtiles = COMPANION_TEXTURE_WIDTH / TILE_WIDTH;
    unsigned dst_stride = xtiles * TILE_WORDS;

    for(unsigned ty=0; ty<ytiles; ++ty)
    {
        for(unsigned tx=0; tx<xtiles; ++tx)
        {
            unsigned ofs = ty * dst_stride + tx * TILE_WORDS;
            for(unsigned y=0; y<TILE_HEIGHT; ++y)
            {
                for(unsigned x=0; x<TILE_WIDTH; ++x)
                {
                    unsigned srcy = ty*TILE_HEIGHT + y;
                    unsigned srcx = tx*TILE_WIDTH + x;
                    unsigned src_ofs = (srcy*COMPANION_TEXTURE_WIDTH+srcx)*3;
                    unsigned r,g,b,a;
#ifndef TEST_PATTERN /* actual texture */
                    r = ((uint8_t*)companion_texture)[src_ofs+0];
                    g = ((uint8_t*)companion_texture)[src_ofs+1];
                    b = ((uint8_t*)companion_texture)[src_ofs+2];
#else /* test pattern */
                    r = srcx; g = srcy; b = 0;
#endif
                    a = 255;

                    ((uint32_t*)tex->logical)[ofs] = ((a&0xFF) << 24) | ((b&0xFF) << 16) | ((g&0xFF) << 8) | (r&0xFF);
                    ofs += 1;
                }
            }
        }
    }

    etna_ctx *ctx = 0;
    if(etna_create(&ctx) != ETNA_OK)
    {
        printf("Unable to create context\n");
        exit(1);
    }

    for(int frame=0; frame<1000; ++frame)
    {
        printf("*** FRAME %i ****\n", frame);
        /* XXX part of this can be put outside the loop, but until we have usable context management
         * this is safest.
         */
        etna_set_state(ctx, VIVS_GL_VERTEX_ELEMENT_CONFIG, 0x1);
        etna_set_state(ctx, VIVS_RA_CONTROL, 0x1);
        etna_set_state(ctx, VIVS_PA_W_CLIP_LIMIT, 0x34000001);
        etna_set_state(ctx, VIVS_PA_SYSTEM_MODE, 0x11);
        etna_set_state(ctx, VIVS_PA_CONFIG, ETNA_MASKED_BIT(VIVS_PA_CONFIG_UNK22, 0));
        etna_set_state(ctx, VIVS_SE_CONFIG, 0x0);
        etna_set_state(ctx, VIVS_GL_FLUSH_CACHE, VIVS_GL_FLUSH_CACHE_COLOR);

        etna_set_state(ctx, VIVS_PE_ALPHA_CONFIG,
                ETNA_MASKED_BIT(VIVS_PE_ALPHA_CONFIG_BLEND_ENABLE_COLOR, 0) &
                ETNA_MASKED_BIT(VIVS_PE_ALPHA_CONFIG_BLEND_ENABLE_ALPHA, 0) &
                ETNA_MASKED(VIVS_PE_ALPHA_CONFIG_SRC_FUNC_COLOR, BLEND_FUNC_ONE) &
                ETNA_MASKED(VIVS_PE_ALPHA_CONFIG_SRC_FUNC_ALPHA, BLEND_FUNC_ONE) &
                ETNA_MASKED(VIVS_PE_ALPHA_CONFIG_DST_FUNC_COLOR, BLEND_FUNC_ZERO) &
                ETNA_MASKED(VIVS_PE_ALPHA_CONFIG_DST_FUNC_ALPHA, BLEND_FUNC_ZERO) &
                ETNA_MASKED(VIVS_PE_ALPHA_CONFIG_EQ_COLOR, BLEND_EQ_ADD) &
                ETNA_MASKED(VIVS_PE_ALPHA_CONFIG_EQ_ALPHA, BLEND_EQ_ADD));
        etna_set_state(ctx, VIVS_PE_ALPHA_BLEND_COLOR, 
                VIVS_PE_ALPHA_BLEND_COLOR_B(0) | 
                VIVS_PE_ALPHA_BLEND_COLOR_G(0) | 
                VIVS_PE_ALPHA_BLEND_COLOR_R(0) | 
                VIVS_PE_ALPHA_BLEND_COLOR_A(0));
        etna_set_state(ctx, VIVS_PE_ALPHA_OP, ETNA_MASKED_BIT(VIVS_PE_ALPHA_OP_ALPHA_TEST, 0));
        etna_set_state(ctx, VIVS_PA_CONFIG, ETNA_MASKED_INL(VIVS_PA_CONFIG_CULL_FACE_MODE, CCW));
        etna_set_state(ctx, VIVS_PE_STENCIL_CONFIG, ETNA_MASKED(VIVS_PE_STENCIL_CONFIG_REF_FRONT, 0) &
                                                    ETNA_MASKED(VIVS_PE_STENCIL_CONFIG_MASK_FRONT, 0xff) & 
                                                    ETNA_MASKED(VIVS_PE_STENCIL_CONFIG_WRITE_MASK, 0xff) &
                                                    ETNA_MASKED_INL(VIVS_PE_STENCIL_CONFIG_MODE, DISABLED));
        etna_set_state(ctx, VIVS_PE_STENCIL_OP, ETNA_MASKED(VIVS_PE_STENCIL_OP_FUNC_FRONT, COMPARE_FUNC_ALWAYS) &
                                                ETNA_MASKED(VIVS_PE_STENCIL_OP_FUNC_BACK, COMPARE_FUNC_ALWAYS) &
                                                ETNA_MASKED(VIVS_PE_STENCIL_OP_FAIL_FRONT, STENCIL_OP_KEEP) & 
                                                ETNA_MASKED(VIVS_PE_STENCIL_OP_FAIL_BACK, STENCIL_OP_KEEP) & 
                                                ETNA_MASKED(VIVS_PE_STENCIL_OP_DEPTH_FAIL_FRONT, STENCIL_OP_KEEP) & 
                                                ETNA_MASKED(VIVS_PE_STENCIL_OP_DEPTH_FAIL_BACK, STENCIL_OP_KEEP) &
                                                ETNA_MASKED(VIVS_PE_STENCIL_OP_PASS_FRONT, STENCIL_OP_KEEP) &
                                                ETNA_MASKED(VIVS_PE_STENCIL_OP_PASS_BACK, STENCIL_OP_KEEP));

        etna_set_state(ctx, VIVS_SE_DEPTH_SCALE, 0x0);
        etna_set_state(ctx, VIVS_SE_DEPTH_BIAS, 0x0);
        
        etna_set_state(ctx, VIVS_PA_CONFIG, ETNA_MASKED_INL(VIVS_PA_CONFIG_FILL_MODE, SOLID) & 
                                            ETNA_MASKED_INL(VIVS_PA_CONFIG_SHADE_MODEL, SMOOTH));
        
        etna_set_state(ctx, VIVS_PE_COLOR_FORMAT, 
                ETNA_MASKED_BIT(VIVS_PE_COLOR_FORMAT_PARTIAL, 0) & 
                ETNA_MASKED(VIVS_PE_COLOR_FORMAT_COMPONENTS, 0xf) &
                ETNA_MASKED(VIVS_PE_COLOR_FORMAT_FORMAT, RS_FORMAT_X8R8G8B8) &
                ETNA_MASKED_BIT(VIVS_PE_COLOR_FORMAT_SUPER_TILED, 1));

        etna_set_state(ctx, VIVS_PE_COLOR_ADDR, rt->address);
        etna_set_state(ctx, VIVS_PE_COLOR_STRIDE, padded_width * 4); 

        uint32_t ts_msaa_config;
        if(supersample_x == 2 && supersample_y == 2)
        {
            // 4X MSAA
            etna_set_state(ctx, VIVS_GL_MULTI_SAMPLE_CONFIG, 
                    ETNA_MASKED_INL(VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES, 4X) &
                    ETNA_MASKED(VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_ENABLES, 0xf) &
                    ETNA_MASKED(VIVS_GL_MULTI_SAMPLE_CONFIG_UNK12, 0x0) &
                    ETNA_MASKED(VIVS_GL_MULTI_SAMPLE_CONFIG_UNK16, 0x0)
                    ); 
            etna_set_state(ctx, VIVS_RA_MULTISAMPLE_UNK00E04, 0x0);
            etna_set_state(ctx, VIVS_RA_MULTISAMPLE_UNK00E10(2), 0xaaa22a22);
            etna_set_state(ctx, VIVS_RA_CENTROID_TABLE(8), 0x262a2288);
            etna_set_state(ctx, VIVS_RA_CENTROID_TABLE(9), 0x886688a2);
            etna_set_state(ctx, VIVS_RA_CENTROID_TABLE(10), 0x888866aa);
            etna_set_state(ctx, VIVS_RA_CENTROID_TABLE(11), 0x668888a6);
            etna_set_state(ctx, VIVS_RA_MULTISAMPLE_UNK00E10(1), 0xe6ae622a);
            etna_set_state(ctx, VIVS_RA_CENTROID_TABLE(4), 0x46622a88);
            etna_set_state(ctx, VIVS_RA_CENTROID_TABLE(5), 0x888888ae);
            etna_set_state(ctx, VIVS_RA_CENTROID_TABLE(6), 0x888888e6);
            etna_set_state(ctx, VIVS_RA_CENTROID_TABLE(7), 0x888888ca);
            etna_set_state(ctx, VIVS_RA_MULTISAMPLE_UNK00E10(0), 0xeaa26e26);
            etna_set_state(ctx, VIVS_RA_CENTROID_TABLE(0), 0x4a6e2688);
            etna_set_state(ctx, VIVS_RA_CENTROID_TABLE(1), 0x888888a2);
            etna_set_state(ctx, VIVS_RA_CENTROID_TABLE(2), 0x888888ea);
            etna_set_state(ctx, VIVS_RA_CENTROID_TABLE(3), 0x888888c6);
            ts_msaa_config = VIVS_TS_MEM_CONFIG_MSAA | VIVS_TS_MEM_CONFIG_MSAA_FORMAT_A8R8G8B8;
        } else if(supersample_x == 2 && supersample_y == 1)
        {
            // 2X MSAA
            etna_set_state(ctx, VIVS_GL_MULTI_SAMPLE_CONFIG, 
                    ETNA_MASKED_INL(VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES, 2X) &
                    ETNA_MASKED(VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_ENABLES, 0xf) &
                    ETNA_MASKED(VIVS_GL_MULTI_SAMPLE_CONFIG_UNK12, 0x0) &
                    ETNA_MASKED(VIVS_GL_MULTI_SAMPLE_CONFIG_UNK16, 0x0)
                    ); 
            etna_set_state(ctx, VIVS_RA_MULTISAMPLE_UNK00E04, 0x0);
            etna_set_state(ctx, VIVS_RA_MULTISAMPLE_UNK00E10(0), 0xaa22);
            etna_set_state(ctx, VIVS_RA_CENTROID_TABLE(0), 0x66aa2288);
            etna_set_state(ctx, VIVS_RA_CENTROID_TABLE(1), 0x88558800);
            etna_set_state(ctx, VIVS_RA_CENTROID_TABLE(2), 0x88881100);
            etna_set_state(ctx, VIVS_RA_CENTROID_TABLE(3), 0x33888800);
            ts_msaa_config = VIVS_TS_MEM_CONFIG_MSAA | VIVS_TS_MEM_CONFIG_MSAA_FORMAT_A8R8G8B8;
        } else { // No multisampling
            etna_set_state(ctx, VIVS_GL_MULTI_SAMPLE_CONFIG, 
                    ETNA_MASKED_INL(VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES, NONE) &
                    ETNA_MASKED(VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_ENABLES, 0xf) &
                    ETNA_MASKED(VIVS_GL_MULTI_SAMPLE_CONFIG_UNK12, 0x0) &
                    ETNA_MASKED(VIVS_GL_MULTI_SAMPLE_CONFIG_UNK16, 0x0)
                    ); 
            ts_msaa_config = 0;
        }

        etna_set_state(ctx, VIVS_GL_FLUSH_CACHE, VIVS_GL_FLUSH_CACHE_COLOR);

        etna_set_state(ctx, VIVS_TS_COLOR_CLEAR_VALUE, 0);
        etna_set_state(ctx, VIVS_TS_COLOR_STATUS_BASE, rt_ts->address);
        etna_set_state(ctx, VIVS_TS_COLOR_SURFACE_BASE, rt->address);

        etna_set_state(ctx, VIVS_PE_DEPTH_CONFIG, 
                ETNA_MASKED_BIT(VIVS_PE_DEPTH_CONFIG_WRITE_ENABLE, 0) &
                ETNA_MASKED_INL(VIVS_PE_DEPTH_CONFIG_DEPTH_FORMAT, D16) &
                ETNA_MASKED_BIT(VIVS_PE_DEPTH_CONFIG_SUPER_TILED, 1) &
                ETNA_MASKED_BIT(VIVS_PE_DEPTH_CONFIG_EARLY_Z, 1));
        etna_set_state(ctx, VIVS_PE_DEPTH_ADDR, z->address);
        etna_set_state(ctx, VIVS_PE_DEPTH_STRIDE, padded_width * 2);
        etna_set_state(ctx, VIVS_PE_HDEPTH_CONTROL, VIVS_PE_HDEPTH_CONTROL_FORMAT_DISABLED);
        etna_set_state_f32(ctx, VIVS_PE_DEPTH_NORMALIZE, 65535.0);
        etna_set_state(ctx, VIVS_GL_FLUSH_CACHE, VIVS_GL_FLUSH_CACHE_DEPTH);

        etna_set_state(ctx, VIVS_TS_DEPTH_CLEAR_VALUE, 0xffffffff);
        etna_set_state(ctx, VIVS_TS_DEPTH_STATUS_BASE, z_ts->address);
        etna_set_state(ctx, VIVS_TS_DEPTH_SURFACE_BASE, z->address);
        etna_set_state(ctx, VIVS_TS_MEM_CONFIG, 
                VIVS_TS_MEM_CONFIG_DEPTH_FAST_CLEAR |
                VIVS_TS_MEM_CONFIG_COLOR_FAST_CLEAR |
                VIVS_TS_MEM_CONFIG_DEPTH_16BPP | 
                VIVS_TS_MEM_CONFIG_DEPTH_COMPRESSION | 
                ts_msaa_config);

#ifdef EXTRA_DELAYS
        /* Warm up RS on aux render target (is this needed?) */
        etna_set_state(ctx, VIVS_GL_FLUSH_CACHE, VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH);
        etna_warm_up_rs(ctx, aux_rt->address, aux_rt_ts->address);

        etna_set_state(ctx, VIVS_TS_COLOR_STATUS_BASE, rt_ts_physical);
        etna_set_state(ctx, VIVS_TS_COLOR_SURFACE_BASE, rt_physical);

        etna_set_state(ctx, VIVS_GL_FLUSH_CACHE, VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH);
        etna_warm_up_rs(ctx, aux_rt_physical, aux_rt_ts_physical);

        etna_set_state(ctx, VIVS_TS_COLOR_STATUS_BASE, rt_ts_physical);
        etna_set_state(ctx, VIVS_TS_COLOR_SURFACE_BASE, rt_physical);
        etna_set_state(ctx, VIVS_GL_FLUSH_CACHE, VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH);
       
        etna_set_state(ctx, VIVS_GL_FLUSH_CACHE, VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH);
        etna_warm_up_rs(ctx, aux_rt_physical, aux_rt_ts_physical);
        etna_set_state(ctx, VIVS_TS_COLOR_STATUS_BASE, rt_ts_physical);
        etna_set_state(ctx, VIVS_TS_COLOR_SURFACE_BASE, rt_physical);
#endif
        /* sync rasterizer to pixel engine after changes to PE config */
        etna_stall(ctx, SYNC_RECIPIENT_RA, SYNC_RECIPIENT_PE);

        /* Set up the resolve to clear tile status for main render target and depth
         * Regard the TS as an image of width 16 with 4 bytes per pixel (64 bytes per row)
         * */
        etna_set_state(ctx, VIVS_RS_CONFIG,
                VIVS_RS_CONFIG_SOURCE_FORMAT(RS_FORMAT_A8R8G8B8) |
                VIVS_RS_CONFIG_DEST_FORMAT(RS_FORMAT_A8R8G8B8)
                );
        etna_set_state_multi(ctx, VIVS_RS_DITHER(0), 2, (uint32_t[]){0xffffffff, 0xffffffff});
        etna_set_state(ctx, VIVS_RS_FILL_VALUE(0), 0x55555555);
        etna_set_state(ctx, VIVS_RS_CLEAR_CONTROL, 
                VIVS_RS_CLEAR_CONTROL_MODE_ENABLED1 |
                (0xffff << VIVS_RS_CLEAR_CONTROL_BITS__SHIFT));
        etna_set_state(ctx, VIVS_RS_EXTRA_CONFIG, 0);
        /*    clear color ts */
        etna_set_state(ctx, VIVS_RS_DEST_ADDR, rt_ts->address);
        etna_set_state(ctx, VIVS_RS_DEST_STRIDE, 0x40);
        etna_set_state(ctx, VIVS_RS_WINDOW_SIZE, 
                ((rt_ts_size/0x40) << VIVS_RS_WINDOW_SIZE_HEIGHT__SHIFT) |
                (16 << VIVS_RS_WINDOW_SIZE_WIDTH__SHIFT));
        etna_set_state(ctx, VIVS_RS_KICKER, 0xbeebbeeb);
        /*    clear depth ts */
        etna_set_state(ctx, VIVS_RS_DEST_ADDR, z_ts->address);
        etna_set_state(ctx, VIVS_RS_DEST_STRIDE, 0x40);
        etna_set_state(ctx, VIVS_RS_WINDOW_SIZE, 
                ((z_ts_size/0x40) << VIVS_RS_WINDOW_SIZE_HEIGHT__SHIFT) |
                (16 << VIVS_RS_WINDOW_SIZE_WIDTH__SHIFT));
        etna_set_state(ctx, VIVS_RS_KICKER, 0xbeebbeeb);
        /** Done */
        
        etna_set_state(ctx, VIVS_TS_COLOR_CLEAR_VALUE, 0xff7f7f7f);
        etna_set_state(ctx, VIVS_GL_FLUSH_CACHE, VIVS_GL_FLUSH_CACHE_COLOR);

        etna_set_state(ctx, VIVS_TS_COLOR_STATUS_BASE, rt_ts->address);
        etna_set_state(ctx, VIVS_TS_COLOR_SURFACE_BASE, rt->address);
        etna_set_state(ctx, VIVS_GL_FLUSH_CACHE, VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH);

        /* depth setup */
        etna_set_state(ctx, VIVS_PE_DEPTH_CONFIG, ETNA_MASKED_BIT(VIVS_PE_DEPTH_CONFIG_WRITE_ENABLE, 1) & 
                                                  ETNA_MASKED(VIVS_PE_DEPTH_CONFIG_DEPTH_FUNC, COMPARE_FUNC_LESS) & 
                                                  ETNA_MASKED_INL(VIVS_PE_DEPTH_CONFIG_DEPTH_MODE, Z) &
                                                  ETNA_MASKED_BIT(VIVS_PE_DEPTH_CONFIG_ONLY_DEPTH, 0));
        etna_set_state_f32(ctx, VIVS_PE_DEPTH_NEAR, 0.0);
        etna_set_state_f32(ctx, VIVS_PE_DEPTH_FAR, 1.0);
        etna_set_state_f32(ctx, VIVS_PE_DEPTH_NORMALIZE, 65535.0);

        /* set up primitive assembly and setup engine */
        etna_set_state_f32(ctx, VIVS_PA_VIEWPORT_OFFSET_Z, 0.0);
        etna_set_state_f32(ctx, VIVS_PA_VIEWPORT_SCALE_Z, 1.0);
        etna_set_state_fixp(ctx, VIVS_PA_VIEWPORT_OFFSET_X, width << 15);
        etna_set_state_fixp(ctx, VIVS_PA_VIEWPORT_OFFSET_Y, height << 15);
        etna_set_state_fixp(ctx, VIVS_PA_VIEWPORT_SCALE_X, width << 15);
        etna_set_state_fixp(ctx, VIVS_PA_VIEWPORT_SCALE_Y, height << 15);
        etna_set_state_fixp(ctx, VIVS_SE_SCISSOR_LEFT, 0);
        etna_set_state_fixp(ctx, VIVS_SE_SCISSOR_TOP, 0);
        etna_set_state_fixp(ctx, VIVS_SE_SCISSOR_RIGHT, (width << 16) | 5);
        etna_set_state_fixp(ctx, VIVS_SE_SCISSOR_BOTTOM, (height << 16) | 5);

        /* set up texture unit */
        etna_set_state(ctx, VIVS_GL_FLUSH_CACHE, VIVS_GL_FLUSH_CACHE_TEXTURE);
        etna_set_state(ctx, VIVS_TE_SAMPLER_SIZE(0), 
                VIVS_TE_SAMPLER_SIZE_WIDTH(512)|VIVS_TE_SAMPLER_SIZE_HEIGHT(512));
        etna_set_state(ctx, VIVS_TE_SAMPLER_LOG_SIZE(0), 
                VIVS_TE_SAMPLER_LOG_SIZE_WIDTH(9<<5) |
                VIVS_TE_SAMPLER_LOG_SIZE_HEIGHT(9<<5));
        etna_set_state(ctx, VIVS_TE_SAMPLER_LOD_ADDR(0,0), tex->address);
        etna_set_state(ctx, VIVS_TE_SAMPLER_CONFIG0(0), 
                VIVS_TE_SAMPLER_CONFIG0_TYPE(TEXTURE_TYPE_2D)|
                VIVS_TE_SAMPLER_CONFIG0_UWRAP(TEXTURE_WRAPMODE_CLAMP_TO_EDGE)|
                VIVS_TE_SAMPLER_CONFIG0_VWRAP(TEXTURE_WRAPMODE_CLAMP_TO_EDGE)|
                VIVS_TE_SAMPLER_CONFIG0_MIN(TEXTURE_FILTER_LINEAR)|
                VIVS_TE_SAMPLER_CONFIG0_MIP(TEXTURE_FILTER_NONE)|
                VIVS_TE_SAMPLER_CONFIG0_MAG(TEXTURE_FILTER_LINEAR)|
                VIVS_TE_SAMPLER_CONFIG0_FORMAT(TEXTURE_FORMAT_X8R8G8B8));
        etna_set_state(ctx, VIVS_TE_SAMPLER_LOD_CONFIG(0), 0x00000000); /*   TE.SAMPLER[0].LOD_CONFIG := 0x0 */

        /* shader setup */
        etna_set_state(ctx, VIVS_VS_END_PC, vs_size/16);
        etna_set_state_multi(ctx, VIVS_VS_INPUT_COUNT, 3, (uint32_t[]){
                /* VIVS_VS_INPUT_COUNT */ VIVS_VS_INPUT_COUNT_UNK8(1) | VIVS_VS_INPUT_COUNT_COUNT(3),
                /* VIVS_VS_TEMP_REGISTER_CONTROL */ VIVS_VS_TEMP_REGISTER_CONTROL_NUM_TEMPS(6),
                /* VIVS_VS_OUTPUT(0) */ 0x10004});
        etna_set_state(ctx, VIVS_VS_START_PC, 0x0);
        etna_set_state_f32(ctx, VIVS_VS_UNIFORMS(45), 0.5); /* u11.y */
        etna_set_state_f32(ctx, VIVS_VS_UNIFORMS(44), 1.0); /* u11.x */
        etna_set_state_f32(ctx, VIVS_VS_UNIFORMS(27), 0.0); /* u6.w */
        etna_set_state_f32(ctx, VIVS_VS_UNIFORMS(23), 20.0); /* u5.w */
        etna_set_state_f32(ctx, VIVS_VS_UNIFORMS(19), 2.0); /* u4.w */

        /* Now load the shader itself */
        etna_set_state_multi(ctx, VIVS_VS_INST_MEM(0), vs_size/4, vs);
        etna_set_state(ctx, VIVS_RA_CONTROL, 0x3);
        etna_set_state_f32(ctx, VIVS_PS_UNIFORMS(0), 1.0); /* u0.x */
        etna_set_state_multi(ctx, VIVS_PS_END_PC, 2, (uint32_t[]){
                /* VIVS_PS_END_PC */ ps_size/16,
                /* VIVS_PS_OUTPUT_REG */ 0x1});
        etna_set_state(ctx, VIVS_PS_START_PC, 0x0);
        etna_set_state(ctx, VIVS_PA_ATTRIBUTE_ELEMENT_COUNT, 0x200);
        etna_set_state(ctx, VIVS_PA_SHADER_ATTRIBUTES(0), 0x200);
        etna_set_state(ctx, VIVS_PA_SHADER_ATTRIBUTES(1), 0x200);
        etna_set_state(ctx, VIVS_GL_VARYING_NUM_COMPONENTS,  
                VIVS_GL_VARYING_NUM_COMPONENTS_VAR0(4)| /* position */
                VIVS_GL_VARYING_NUM_COMPONENTS_VAR1(2)  /* texture coordinate */
                );
        etna_set_state_multi(ctx, VIVS_GL_VARYING_COMPONENT_USE(0), 2, (uint32_t[]){
                VIVS_GL_VARYING_COMPONENT_USE_COMP0(VARYING_COMPONENT_USE_USED) |
                VIVS_GL_VARYING_COMPONENT_USE_COMP1(VARYING_COMPONENT_USE_USED) |
                VIVS_GL_VARYING_COMPONENT_USE_COMP2(VARYING_COMPONENT_USE_USED) |
                VIVS_GL_VARYING_COMPONENT_USE_COMP3(VARYING_COMPONENT_USE_USED) |
                VIVS_GL_VARYING_COMPONENT_USE_COMP4(VARYING_COMPONENT_USE_USED) |
                VIVS_GL_VARYING_COMPONENT_USE_COMP5(VARYING_COMPONENT_USE_USED)
                , 0
                });
        etna_set_state_multi(ctx, VIVS_PS_INST_MEM(0), ps_size/4, ps);
        etna_set_state(ctx, VIVS_PS_INPUT_COUNT, 
                VIVS_PS_INPUT_COUNT_UNK8(31)| VIVS_PS_INPUT_COUNT_COUNT(3 + extra_ps_inputs));
        etna_set_state(ctx, VIVS_PS_TEMP_REGISTER_CONTROL, 
                VIVS_PS_TEMP_REGISTER_CONTROL_NUM_TEMPS(3 + extra_ps_inputs));
        etna_set_state(ctx, VIVS_PS_CONTROL, VIVS_PS_CONTROL_UNK1);
        etna_set_state(ctx, VIVS_GL_VARYING_TOTAL_COMPONENTS,
                VIVS_GL_VARYING_TOTAL_COMPONENTS_NUM(6)); /* 4+2=6 total varying components */
        etna_set_state(ctx, VIVS_VS_LOAD_BALANCING, 0xf3f0542); /* depends on number of inputs/outputs/varyings? XXX how exactly */
        etna_set_state(ctx, VIVS_VS_OUTPUT_COUNT, 3);
        etna_set_state(ctx, VIVS_PA_CONFIG, ETNA_MASKED_BIT(VIVS_PA_CONFIG_POINT_SIZE_ENABLE, 0));
        
        /*   Compute transform matrices in the same way as cube egl demo */ 
        ESMatrix modelview;
        esMatrixLoadIdentity(&modelview);
        esTranslate(&modelview, 0.0f, 0.0f, -9.0f);
        esRotate(&modelview, 45.0f, 1.0f, 0.0f, 0.0f);
        esRotate(&modelview, 45.0f, 0.0f, 1.0f, 0.0f);
        esRotate(&modelview, frame*0.5f, 0.0f, 0.0f, 1.0f);
	esScale(&modelview, 0.475f, 0.475f, 0.475f);
        GLfloat aspect = (GLfloat)(height) / (GLfloat)(width);
        ESMatrix projection;
        esMatrixLoadIdentity(&projection);
        esFrustum(&projection, -2.8f, +2.8f, -2.8f * aspect, +2.8f * aspect, 6.0f, 10.0f);
        ESMatrix modelviewprojection;
        esMatrixLoadIdentity(&modelviewprojection);
        esMatrixMultiply(&modelviewprojection, &modelview, &projection);
        ESMatrix inverse, normal; /* compute inverse transpose normal transformation matrix */
        esMatrixInverse3x3(&inverse, &modelview);
        esMatrixTranspose(&normal, &inverse);
        
        etna_set_state_multi(ctx, VIVS_VS_UNIFORMS(0), 16, (uint32_t*)&modelviewprojection.m[0][0]);
        etna_set_state_multi(ctx, VIVS_VS_UNIFORMS(16), 3, (uint32_t*)&normal.m[0][0]); /* u4.xyz */
        etna_set_state_multi(ctx, VIVS_VS_UNIFORMS(20), 3, (uint32_t*)&normal.m[1][0]); /* u5.xyz */
        etna_set_state_multi(ctx, VIVS_VS_UNIFORMS(24), 3, (uint32_t*)&normal.m[2][0]); /* u6.xyz */
        etna_set_state_multi(ctx, VIVS_VS_UNIFORMS(28), 16, (uint32_t*)&modelview.m[0][0]);
#ifdef INDEXED
        etna_set_state(ctx, VIVS_FE_INDEX_STREAM_BASE_ADDR, idx->address);
        etna_set_state(ctx, VIVS_FE_INDEX_STREAM_CONTROL, VIVS_FE_INDEX_STREAM_CONTROL_TYPE_UNSIGNED_SHORT);
#endif
        etna_set_state(ctx, VIVS_FE_VERTEX_STREAM_BASE_ADDR, vtx->address);
        etna_set_state(ctx, VIVS_FE_VERTEX_STREAM_CONTROL, 
                VIVS_FE_VERTEX_STREAM_CONTROL_VERTEX_STRIDE((3 + 3 + 2)*4));
        etna_set_state(ctx, VIVS_FE_VERTEX_ELEMENT_CONFIG(0), 
                VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE_FLOAT |
                (ENDIAN_MODE_NO_SWAP << VIVS_FE_VERTEX_ELEMENT_CONFIG_ENDIAN__SHIFT) |
                VIVS_FE_VERTEX_ELEMENT_CONFIG_STREAM(0) |
                VIVS_FE_VERTEX_ELEMENT_CONFIG_NUM_3 |
                VIVS_FE_VERTEX_ELEMENT_CONFIG_NORMALIZE_OFF |
                VIVS_FE_VERTEX_ELEMENT_CONFIG_START(0x0) |
                VIVS_FE_VERTEX_ELEMENT_CONFIG_END(0xc));
        etna_set_state(ctx, VIVS_FE_VERTEX_ELEMENT_CONFIG(1), 
                VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE_FLOAT |
                (ENDIAN_MODE_NO_SWAP << VIVS_FE_VERTEX_ELEMENT_CONFIG_ENDIAN__SHIFT) |
                VIVS_FE_VERTEX_ELEMENT_CONFIG_STREAM(0) |
                VIVS_FE_VERTEX_ELEMENT_CONFIG_NUM_3 |
                VIVS_FE_VERTEX_ELEMENT_CONFIG_NORMALIZE_OFF |
                VIVS_FE_VERTEX_ELEMENT_CONFIG_START(0xc) |
                VIVS_FE_VERTEX_ELEMENT_CONFIG_END(0x18));
        etna_set_state(ctx, VIVS_FE_VERTEX_ELEMENT_CONFIG(2), 
                VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE_FLOAT |
                (ENDIAN_MODE_NO_SWAP << VIVS_FE_VERTEX_ELEMENT_CONFIG_ENDIAN__SHIFT) |
                VIVS_FE_VERTEX_ELEMENT_CONFIG_NONCONSECUTIVE |
                VIVS_FE_VERTEX_ELEMENT_CONFIG_STREAM(0) |
                VIVS_FE_VERTEX_ELEMENT_CONFIG_NUM_2 |
                VIVS_FE_VERTEX_ELEMENT_CONFIG_NORMALIZE_OFF |
                VIVS_FE_VERTEX_ELEMENT_CONFIG_START(0x18) |
                VIVS_FE_VERTEX_ELEMENT_CONFIG_END(0x20));
        etna_set_state(ctx, VIVS_VS_INPUT(0), 0x20100);
        etna_set_state(ctx, VIVS_PA_CONFIG, ETNA_MASKED_BIT(VIVS_PA_CONFIG_POINT_SPRITE_ENABLE, 0));

#ifdef INDEXED
        etna_draw_indexed_primitives(ctx, PRIMITIVE_TYPE_TRIANGLES, 0, COMPANION_TRIANGLE_COUNT, 0);
#else
        etna_draw_primitives(ctx, PRIMITIVE_TYPE_TRIANGLES, 0, COMPANION_TRIANGLE_COUNT);
#endif
        etna_set_state(ctx, VIVS_GL_FLUSH_CACHE, VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH);
#ifdef EXTRA_DELAYS
        etna_flush(ctx);
#endif
        etna_set_state(ctx, VIVS_GL_FLUSH_CACHE, VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH);
        etna_set_state(ctx, VIVS_RS_CONFIG,
                VIVS_RS_CONFIG_SOURCE_FORMAT(RS_FORMAT_A8R8G8B8) |
                VIVS_RS_CONFIG_SOURCE_TILED |
                VIVS_RS_CONFIG_DEST_FORMAT(RS_FORMAT_A8R8G8B8) |
                VIVS_RS_CONFIG_DEST_TILED);
        etna_set_state(ctx, VIVS_RS_SOURCE_STRIDE, (padded_width * 4 * 4) | VIVS_RS_SOURCE_STRIDE_TILING);
        etna_set_state(ctx, VIVS_RS_DEST_STRIDE, (padded_width * 4 * 4) | VIVS_RS_DEST_STRIDE_TILING);
        etna_set_state(ctx, VIVS_RS_DITHER(0), 0xffffffff);
        etna_set_state(ctx, VIVS_RS_DITHER(1), 0xffffffff);
        etna_set_state(ctx, VIVS_RS_CLEAR_CONTROL, VIVS_RS_CLEAR_CONTROL_MODE_DISABLED);
        etna_set_state(ctx, VIVS_RS_EXTRA_CONFIG, 0); 
        etna_set_state(ctx, VIVS_RS_SOURCE_ADDR, rt->address);
        etna_set_state(ctx, VIVS_RS_DEST_ADDR, rt->address);
        etna_set_state(ctx, VIVS_RS_WINDOW_SIZE, 
                VIVS_RS_WINDOW_SIZE_HEIGHT(padded_height) |
                VIVS_RS_WINDOW_SIZE_WIDTH(padded_width));
        etna_set_state(ctx, VIVS_RS_KICKER, 0xbeebbeeb);
#ifdef EXTRA_DELAYS
        etna_flush(ctx);

        etna_warm_up_rs(ctx, aux_rt->address, aux_rt_ts->address);
#endif

        etna_set_state(ctx, VIVS_TS_COLOR_STATUS_BASE, rt_ts->address);
        etna_set_state(ctx, VIVS_TS_COLOR_SURFACE_BASE, rt->address);
        etna_set_state(ctx, VIVS_GL_FLUSH_CACHE, VIVS_GL_FLUSH_CACHE_COLOR);
        etna_set_state(ctx, VIVS_TS_MEM_CONFIG, 
                VIVS_TS_MEM_CONFIG_DEPTH_FAST_CLEAR |
                VIVS_TS_MEM_CONFIG_DEPTH_16BPP | 
                VIVS_TS_MEM_CONFIG_DEPTH_COMPRESSION);
        etna_set_state(ctx, VIVS_GL_FLUSH_CACHE, VIVS_GL_FLUSH_CACHE_COLOR);

        /* Wait for pixel engine to finish */
#ifdef EXTRA_DELAYS
        etna_finish(ctx);
#else
        etna_stall(ctx, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
#endif

        /* Copy result to framebuffer */
        etna_set_state(ctx, VIVS_GL_FLUSH_CACHE, VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH);
        etna_set_state(ctx, VIVS_RS_CONFIG,
                VIVS_RS_CONFIG_SOURCE_FORMAT(RS_FORMAT_A8R8G8B8) |
                VIVS_RS_CONFIG_SOURCE_TILED |
                ((supersample_x>1)?VIVS_RS_CONFIG_DOWNSAMPLE_X:0) |
                ((supersample_y>1)?VIVS_RS_CONFIG_DOWNSAMPLE_Y:0) |
                VIVS_RS_CONFIG_DEST_FORMAT(RS_FORMAT_A8R8G8B8) |
                VIVS_RS_CONFIG_SWAP_RB);
        etna_set_state(ctx, VIVS_RS_SOURCE_STRIDE, (padded_width * 4 * 4) | VIVS_RS_SOURCE_STRIDE_TILING);
        etna_set_state(ctx, VIVS_RS_DEST_STRIDE, fb.fb_fix.line_length);
        etna_set_state(ctx, VIVS_RS_DITHER(0), 0xffffffff);
        etna_set_state(ctx, VIVS_RS_DITHER(1), 0xffffffff);
        etna_set_state(ctx, VIVS_RS_CLEAR_CONTROL, VIVS_RS_CLEAR_CONTROL_MODE_DISABLED);
        etna_set_state(ctx, VIVS_RS_EXTRA_CONFIG, 0);
        etna_set_state(ctx, VIVS_RS_SOURCE_ADDR, rt->address);
        etna_set_state(ctx, VIVS_RS_DEST_ADDR, fb.physical[backbuffer]);
        etna_set_state(ctx, VIVS_RS_WINDOW_SIZE, 
                VIVS_RS_WINDOW_SIZE_HEIGHT(height * supersample_y) |
                VIVS_RS_WINDOW_SIZE_WIDTH(width * supersample_x));
        etna_set_state(ctx, VIVS_RS_KICKER, 0xbeebbeeb);
        etna_finish(ctx);

        /* switch buffers */
        fb_set_buffer(&fb, backbuffer);
        backbuffer = 1-backbuffer;
    }
    
    etna_free(ctx);
    viv_close();
    return 0;
}
int main(int argc, char **argv)
{
    int rv;
    int width = 256;
    int height = 256;
    
    fb_info fb;
    rv = fb_open(0, &fb);
    if(rv!=0)
    {
        exit(1);
    }
    width = fb.fb_var.xres;
    height = fb.fb_var.yres;

    rv = viv_open();
    if(rv!=0)
    {
        fprintf(stderr, "Error opening device\n");
        exit(1);
    }
    printf("Succesfully opened device\n");

    etna_ctx *ctx = 0;
    struct pipe_context *pipe = 0;
    etna_bswap_buffers *buffers = 0;
    if(etna_create(&ctx) != ETNA_OK ||
        etna_bswap_create(ctx, &buffers, (etna_set_buffer_cb_t)&fb_set_buffer, (etna_copy_buffer_cb_t)&etna_fb_copy_buffer, &fb) != ETNA_OK ||
        (pipe = etna_new_pipe_context(ctx)) == NULL)
    {
        printf("Unable to create etna context\n");
        exit(1);
    }

    /* Convert and upload embedded texture */
    struct pipe_resource *tex_resource = etna_pipe_create_2d(pipe, ETNA_IS_TEXTURE, PIPE_FORMAT_B8G8R8X8_UNORM, 
            COMPANION_TEXTURE_WIDTH, COMPANION_TEXTURE_HEIGHT, 0);
    void *temp = malloc(COMPANION_TEXTURE_WIDTH * COMPANION_TEXTURE_HEIGHT * 4); 
    etna_convert_r8g8b8_to_b8g8r8x8(temp, (const uint8_t*)companion_texture, COMPANION_TEXTURE_WIDTH * COMPANION_TEXTURE_HEIGHT);
    etna_pipe_inline_write(pipe, tex_resource, 0, 0, temp, COMPANION_TEXTURE_WIDTH * COMPANION_TEXTURE_HEIGHT * 4);
    free(temp);

    /* resources */
    struct pipe_resource *rt_resource = etna_pipe_create_2d(pipe, ETNA_IS_RENDER_TARGET, PIPE_FORMAT_B8G8R8X8_UNORM, width, height, 0);
    struct pipe_resource *z_resource = etna_pipe_create_2d(pipe, ETNA_IS_RENDER_TARGET, PIPE_FORMAT_Z16_UNORM, width, height, 0);

    /* bind render target to framebuffer */
    etna_fb_bind_resource(&fb, rt_resource);

    /* geometry */
    struct pipe_resource *vtx_resource = etna_pipe_create_buffer(pipe, ETNA_IS_VERTEX, VERTEX_BUFFER_SIZE);
    struct pipe_resource *idx_resource = etna_pipe_create_buffer(pipe, ETNA_IS_INDEX, INDEX_BUFFER_SIZE);

    float *vtx_logical = etna_pipe_get_resource_ptr(pipe, vtx_resource, 0, 0);
    assert(vtx_logical);
    float *idx_logical = etna_pipe_get_resource_ptr(pipe, idx_resource, 0, 0);
    assert(idx_logical);
#ifndef INDEXED
    printf("Interleaving vertices...\n");
    float *vertices_array = companion_vertices_array();
    float *texture_coordinates_array =
            companion_texture_coordinates_array();
    float *normals_array = companion_normals_array();
    assert(COMPANION_ARRAY_COUNT*(3+3+2)*sizeof(float) < VERTEX_BUFFER_SIZE);
    for(int vert=0; vert<COMPANION_ARRAY_COUNT; ++vert)
    {
        int dest_idx = vert * (3 + 3 + 2);
        for(int comp=0; comp<3; ++comp)
            ((float*)vtx_logical)[dest_idx+comp+0] = vertices_array[vert*3 + comp]; /* 0 */
        for(int comp=0; comp<3; ++comp)
            ((float*)vtx_logical)[dest_idx+comp+3] = normals_array[vert*3 + comp]; /* 1 */
        for(int comp=0; comp<2; ++comp)
            ((float*)vtx_logical)[dest_idx+comp+6] = texture_coordinates_array[vert*2 + comp]; /* 2 */
    }
#else
    printf("Interleaving vertices and copying index buffer...\n");
    assert(COMPANION_VERTEX_COUNT*(3+3+2)*sizeof(float) < VERTEX_BUFFER_SIZE);
    for(int vert=0; vert<COMPANION_VERTEX_COUNT; ++vert)
    {
        int dest_idx = vert * (3 + 3 + 2);
        for(int comp=0; comp<3; ++comp)
            ((float*)vtx_logical)[dest_idx+comp+0] = companion_vertices[vert][comp]; /* 0 */
        for(int comp=0; comp<3; ++comp)
            ((float*)vtx_logical)[dest_idx+comp+3] = companion_normals[vert][comp]; /* 1 */
        for(int comp=0; comp<2; ++comp)
            ((float*)vtx_logical)[dest_idx+comp+6] = companion_texture_coordinates[vert][comp]; /* 2 */
    }
    assert(COMPANION_TRIANGLE_COUNT*3*sizeof(unsigned short) < INDEX_BUFFER_SIZE);
    memcpy(idx_logical, &companion_triangles[0][0], COMPANION_TRIANGLE_COUNT*3*sizeof(unsigned short));
#endif
    struct pipe_vertex_buffer vertex_buffer_desc = {
            .stride = (3 + 3 + 2)*4,
            .buffer_offset = 0,
            .buffer = vtx_resource,
            .user_buffer = 0
            };
    struct pipe_index_buffer index_buffer_desc = {
            .index_size = sizeof(unsigned short),
            .offset = 0,
            .buffer = idx_resource,
            .user_buffer = 0
            };
    struct pipe_vertex_element pipe_vertex_elements[] = {
        { /* positions */
            .src_offset = 0,
            .instance_divisor = 0,
            .vertex_buffer_index = 0,
            .src_format = PIPE_FORMAT_R32G32B32_FLOAT 
        },
        { /* normals */
            .src_offset = 0xc,
            .instance_divisor = 0,
            .vertex_buffer_index = 0,
            .src_format = PIPE_FORMAT_R32G32B32_FLOAT 
        },
        { /* texture coord */
            .src_offset = 0x18,
            .instance_divisor = 0,
            .vertex_buffer_index = 0,
            .src_format = PIPE_FORMAT_R32G32_FLOAT
        }
    };