bool tree_intersect_stack(group *root, const ray& ray, const range& r, surface_hit *hit) { bool have_hit = false; group* stack_groups[25]; int stack_top; range r2 = box_intersect(root->boxmin, root->boxmax, ray); r2.intersect(r); if(!r2 || (r2.t0 > hit->t)) return false; stack_groups[0] = root; stack_top = 0; while(stack_top >= 0) { group *g = stack_groups[stack_top--]; if(g->negative != NULL) { group *g1, *g2; if(vec3_dot(ray.d, g->D) > 0) { g1 = g->negative; g2 = g->positive; } else { g1 = g->positive; g2 = g->negative; } range r3 = box_intersect(g2->boxmin, g2->boxmax, ray); r3.intersect(r); if(r3 && (r3.t0 < hit->t)) stack_groups[++stack_top] = g2; r3 = box_intersect(g1->boxmin, g1->boxmax, ray); r3.intersect(r); if(r3 && (r3.t0 < hit->t)) stack_groups[++stack_top] = g1; } else { for(unsigned int i = 0; i < g->count; i++) { if(g->spheres[g->start + i].intersect(ray, r, hit)) have_hit = true; } } } return have_hit; }
void sna_read_boxes(struct sna *sna, PixmapPtr dst, struct kgem_bo *src_bo, const BoxRec *box, int nbox) { struct kgem *kgem = &sna->kgem; struct kgem_bo *dst_bo; BoxRec extents; const BoxRec *tmp_box; int tmp_nbox; void *ptr; int src_pitch, cpp, offset; int n, cmd, br13; bool can_blt; DBG(("%s x %d, src=(handle=%d), dst=(size=(%d, %d)\n", __FUNCTION__, nbox, src_bo->handle, dst->drawable.width, dst->drawable.height)); #ifndef NDEBUG for (n = 0; n < nbox; n++) { if (box[n].x1 < 0 || box[n].y1 < 0 || box[n].x2 * dst->drawable.bitsPerPixel/8 > src_bo->pitch || box[n].y2 * src_bo->pitch > kgem_bo_size(src_bo)) { FatalError("source out-of-bounds box[%d]=(%d, %d), (%d, %d), pitch=%d, size=%d\n", n, box[n].x1, box[n].y1, box[n].x2, box[n].y2, src_bo->pitch, kgem_bo_size(src_bo)); } } #endif /* XXX The gpu is faster to perform detiling in bulk, but takes * longer to setup and retrieve the results, with an additional * copy. The long term solution is to use snoopable bo and avoid * this path. */ if (download_inplace(kgem, dst, src_bo, box ,nbox)) { fallback: read_boxes_inplace(kgem, dst, src_bo, box, nbox); return; } can_blt = kgem_bo_can_blt(kgem, src_bo) && (box[0].x2 - box[0].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4); extents = box[0]; for (n = 1; n < nbox; n++) { if (box[n].x1 < extents.x1) extents.x1 = box[n].x1; if (box[n].x2 > extents.x2) extents.x2 = box[n].x2; if (can_blt) can_blt = (box[n].x2 - box[n].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4); if (box[n].y1 < extents.y1) extents.y1 = box[n].y1; if (box[n].y2 > extents.y2) extents.y2 = box[n].y2; } if (kgem_bo_can_map(kgem, src_bo)) { /* Is it worth detiling? */ if ((extents.y2 - extents.y1 - 1) * src_bo->pitch < 4096) goto fallback; } /* Try to avoid switching rings... */ if (!can_blt || kgem->ring == KGEM_RENDER || upload_too_large(sna, extents.x2 - extents.x1, extents.y2 - extents.y1)) { PixmapRec tmp; tmp.drawable.width = extents.x2 - extents.x1; tmp.drawable.height = extents.y2 - extents.y1; tmp.drawable.depth = dst->drawable.depth; tmp.drawable.bitsPerPixel = dst->drawable.bitsPerPixel; tmp.devPrivate.ptr = NULL; assert(tmp.drawable.width); assert(tmp.drawable.height); if (must_tile(sna, tmp.drawable.width, tmp.drawable.height)) { BoxRec tile, stack[64], *clipped, *c; int step; if (n > ARRAY_SIZE(stack)) { clipped = malloc(sizeof(BoxRec) * n); if (clipped == NULL) goto fallback; } else clipped = stack; step = MIN(sna->render.max_3d_size, 8*(MAXSHORT&~63) / dst->drawable.bitsPerPixel); while (step * step * 4 > sna->kgem.max_upload_tile_size) step /= 2; DBG(("%s: tiling download, using %dx%d tiles\n", __FUNCTION__, step, step)); assert(step); for (tile.y1 = extents.y1; tile.y1 < extents.y2; tile.y1 = tile.y2) { int y2 = tile.y1 + step; if (y2 > extents.y2) y2 = extents.y2; tile.y2 = y2; for (tile.x1 = extents.x1; tile.x1 < extents.x2; tile.x1 = tile.x2) { int x2 = tile.x1 + step; if (x2 > extents.x2) x2 = extents.x2; tile.x2 = x2; tmp.drawable.width = tile.x2 - tile.x1; tmp.drawable.height = tile.y2 - tile.y1; c = clipped; for (n = 0; n < nbox; n++) { *c = box[n]; if (!box_intersect(c, &tile)) continue; DBG(("%s: box(%d, %d), (%d, %d),, dst=(%d, %d)\n", __FUNCTION__, c->x1, c->y1, c->x2, c->y2, c->x1 - tile.x1, c->y1 - tile.y1)); c++; } if (c == clipped) continue; dst_bo = kgem_create_buffer_2d(kgem, tmp.drawable.width, tmp.drawable.height, tmp.drawable.bitsPerPixel, KGEM_BUFFER_LAST, &ptr); if (!dst_bo) { if (clipped != stack) free(clipped); goto fallback; } if (!sna->render.copy_boxes(sna, GXcopy, dst, src_bo, 0, 0, &tmp, dst_bo, -tile.x1, -tile.y1, clipped, c-clipped, COPY_LAST)) { kgem_bo_destroy(&sna->kgem, dst_bo); if (clipped != stack) free(clipped); goto fallback; } kgem_bo_submit(&sna->kgem, dst_bo); kgem_buffer_read_sync(kgem, dst_bo); if (sigtrap_get() == 0) { while (c-- != clipped) { memcpy_blt(ptr, dst->devPrivate.ptr, tmp.drawable.bitsPerPixel, dst_bo->pitch, dst->devKind, c->x1 - tile.x1, c->y1 - tile.y1, c->x1, c->y1, c->x2 - c->x1, c->y2 - c->y1); } sigtrap_put(); } kgem_bo_destroy(&sna->kgem, dst_bo); } } if (clipped != stack) free(clipped); } else { dst_bo = kgem_create_buffer_2d(kgem, tmp.drawable.width, tmp.drawable.height, tmp.drawable.bitsPerPixel, KGEM_BUFFER_LAST, &ptr); if (!dst_bo) goto fallback; if (!sna->render.copy_boxes(sna, GXcopy, dst, src_bo, 0, 0, &tmp, dst_bo, -extents.x1, -extents.y1, box, nbox, COPY_LAST)) { kgem_bo_destroy(&sna->kgem, dst_bo); goto fallback; } kgem_bo_submit(&sna->kgem, dst_bo); kgem_buffer_read_sync(kgem, dst_bo); if (sigtrap_get() == 0) { for (n = 0; n < nbox; n++) { memcpy_blt(ptr, dst->devPrivate.ptr, tmp.drawable.bitsPerPixel, dst_bo->pitch, dst->devKind, box[n].x1 - extents.x1, box[n].y1 - extents.y1, box[n].x1, box[n].y1, box[n].x2 - box[n].x1, box[n].y2 - box[n].y1); } sigtrap_put(); } kgem_bo_destroy(&sna->kgem, dst_bo); } return; } /* count the total number of bytes to be read and allocate a bo */ cpp = dst->drawable.bitsPerPixel / 8; offset = 0; for (n = 0; n < nbox; n++) { int height = box[n].y2 - box[n].y1; int width = box[n].x2 - box[n].x1; offset += PITCH(width, cpp) * height; } DBG((" read buffer size=%d\n", offset)); dst_bo = kgem_create_buffer(kgem, offset, KGEM_BUFFER_LAST, &ptr); if (!dst_bo) { read_boxes_inplace(kgem, dst, src_bo, box, nbox); return; } cmd = XY_SRC_COPY_BLT_CMD; src_pitch = src_bo->pitch; if (kgem->gen >= 040 && src_bo->tiling) { cmd |= BLT_SRC_TILED; src_pitch >>= 2; }
/* we found some space filler that may intersect this query. * First check if it does intersect, then break it into * overlaping regions that don't intersect this box. */ static int query_one (const BoxType * box, void *cl) { struct query_closure *qc = (struct query_closure *) cl; mtspacebox_t *mtsb = (mtspacebox_t *) box; Coord shrink; assert (box_intersect (qc->cbox, &mtsb->box)); /* we need to satisfy the larger of the two keepaways */ if (qc->keepaway > mtsb->keepaway) shrink = mtsb->keepaway; else shrink = qc->keepaway; /* if we shrink qc->box by this amount and it doesn't intersect * then we didn't actually touch this box */ if (qc->cbox->X1 + shrink >= mtsb->box.X2 || qc->cbox->X2 - shrink <= mtsb->box.X1 || qc->cbox->Y1 + shrink >= mtsb->box.Y2 || qc->cbox->Y2 - shrink <= mtsb->box.Y1) return 0; /* ok, we do touch this box, now create up to 4 boxes that don't */ if (mtsb->box.Y1 > qc->cbox->Y1 + shrink) /* top region exists */ { Coord Y1 = qc->cbox->Y1; Coord Y2 = mtsb->box.Y1 + shrink; if (Y2 - Y1 >= 2 * (qc->radius + qc->keepaway)) { BoxType *newone = (BoxType *) malloc (sizeof (BoxType)); newone->X1 = qc->cbox->X1; newone->X2 = qc->cbox->X2; newone->Y1 = Y1; newone->Y2 = Y2; assert (newone->Y2 < qc->cbox->Y2); append(qc, newone); } } if (mtsb->box.Y2 < qc->cbox->Y2 - shrink) /* bottom region exists */ { Coord Y1 = mtsb->box.Y2 - shrink; Coord Y2 = qc->cbox->Y2; if (Y2 - Y1 >= 2 * (qc->radius + qc->keepaway)) { BoxType *newone = (BoxType *) malloc (sizeof (BoxType)); newone->X1 = qc->cbox->X1; newone->X2 = qc->cbox->X2; newone->Y2 = qc->cbox->Y2; newone->Y1 = Y1; assert (newone->Y1 > qc->cbox->Y1); append (qc, newone); } } if (mtsb->box.X1 > qc->cbox->X1 + shrink) /* left region exists */ { Coord X1 = qc->cbox->X1; Coord X2 = mtsb->box.X1 + shrink; if (X2 - X1 >= 2 * (qc->radius + qc->keepaway)) { BoxType *newone; newone = (BoxType *) malloc (sizeof (BoxType)); newone->Y1 = qc->cbox->Y1; newone->Y2 = qc->cbox->Y2; newone->X1 = qc->cbox->X1; newone->X2 = X2; assert (newone->X2 < qc->cbox->X2); append (qc, newone); } } if (mtsb->box.X2 < qc->cbox->X2 - shrink) /* right region exists */ { Coord X1 = mtsb->box.X2 - shrink; Coord X2 = qc->cbox->X2; if (X2 - X1 >= 2 * (qc->radius + qc->keepaway)) { BoxType *newone = (BoxType *) malloc (sizeof (BoxType)); newone->Y1 = qc->cbox->Y1; newone->Y2 = qc->cbox->Y2; newone->X2 = qc->cbox->X2; newone->X1 = X1; assert (newone->X1 > qc->cbox->X1); append (qc, newone); } } if (qc->touching.v) { if (qc->touch_is_vec || !qc->desired) vector_append (qc->touching.v, qc->cbox); else heap_append (qc->touching.h, qc->desired, qc->cbox); } else free (qc->cbox); /* done with this one */ longjmp (qc->env, 1); return 1; /* never reached */ }
bool sna_tiling_copy_boxes(struct sna *sna, uint8_t alu, PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, const BoxRec *box, int n) { BoxRec extents, tile, stack[64], *clipped, *c; PixmapRec p; int i, step, tiling; bool create = true; bool ret = false; extents = box[0]; for (i = 1; i < n; i++) { if (box[i].x1 < extents.x1) extents.x1 = box[i].x1; if (box[i].y1 < extents.y1) extents.y1 = box[i].y1; if (box[i].x2 > extents.x2) extents.x2 = box[i].x2; if (box[i].y2 > extents.y2) extents.y2 = box[i].y2; } tiling = I915_TILING_X; if (!kgem_bo_can_blt(&sna->kgem, src_bo) || !kgem_bo_can_blt(&sna->kgem, dst_bo)) tiling = I915_TILING_Y; create = (src_bo->pitch > sna->render.max_3d_pitch || dst_bo->pitch > sna->render.max_3d_pitch); step = sna->render.max_3d_size / 2; if (create) { while (step * step * 4 > sna->kgem.max_upload_tile_size) step /= 2; } DBG(("%s: tiling copy %dx%d, %s %dx%d %c tiles\n", __FUNCTION__, extents.x2-extents.x1, extents.y2-extents.y1, create ? "creating" : "using", step, step, tiling == I915_TILING_X ? 'X' : 'Y')); if (n > ARRAY_SIZE(stack)) { clipped = malloc(sizeof(BoxRec) * n); if (clipped == NULL) goto tiled_error; } else clipped = stack; p.drawable.depth = src->drawable.depth; p.drawable.bitsPerPixel = src->drawable.bitsPerPixel; p.devPrivate.ptr = NULL; for (tile.y1 = extents.y1; tile.y1 < extents.y2; tile.y1 = tile.y2) { int y2 = tile.y1 + step; if (y2 > extents.y2) y2 = extents.y2; tile.y2 = y2; for (tile.x1 = extents.x1; tile.x1 < extents.x2; tile.x1 = tile.x2) { struct kgem_bo *tmp_bo; int x2 = tile.x1 + step; if (x2 > extents.x2) x2 = extents.x2; tile.x2 = x2; c = clipped; for (i = 0; i < n; i++) { *c = box[i]; if (!box_intersect(c, &tile)) continue; DBG(("%s: box(%d, %d), (%d, %d), src=(%d, %d), dst=(%d, %d)\n", __FUNCTION__, c->x1, c->y1, c->x2, c->y2, src_dx, src_dy, c->x1 - tile.x1, c->y1 - tile.y1)); c++; } if (c == clipped) continue; p.drawable.width = tile.x2 - tile.x1; p.drawable.height = tile.y2 - tile.y1; DBG(("%s: tile (%d, %d), (%d, %d)\n", __FUNCTION__, tile.x1, tile.y1, tile.x2, tile.y2)); if (create) { tmp_bo = kgem_create_2d(&sna->kgem, p.drawable.width, p.drawable.height, p.drawable.bitsPerPixel, tiling, CREATE_TEMPORARY); if (!tmp_bo) goto tiled_error; i = (sna->render.copy_boxes(sna, GXcopy, src, src_bo, src_dx, src_dy, &p, tmp_bo, -tile.x1, -tile.y1, clipped, c - clipped, 0) && sna->render.copy_boxes(sna, alu, &p, tmp_bo, -tile.x1, -tile.y1, dst, dst_bo, dst_dx, dst_dy, clipped, c - clipped, 0)); kgem_bo_destroy(&sna->kgem, tmp_bo); } else { i = sna->render.copy_boxes(sna, GXcopy, src, src_bo, src_dx, src_dy, dst, dst_bo, dst_dx, dst_dy, clipped, c - clipped, 0); } if (!i) goto tiled_error; } } ret = true; tiled_error: if (clipped != stack) free(clipped); return ret; }
void box_partition_map( const int np , const int my_p , const int gbox[3][2] , const int pbox[][3][2] , const int ghost , int map_use_box[3][2] , int map_local_id[] , int * map_count_interior , int * map_count_owns , int * map_count_uses , int ** map_recv_pc , int ** map_send_pc , int ** map_send_id ) { int * recv_pc = (int *) malloc( ( np + 1 ) * sizeof(int) ); int * send_pc = (int *) malloc( ( np + 1 ) * sizeof(int) ); int id_length = 0 ; int * send_id = NULL ; int send_id_size = 0 ; int own_length , use_length , int_length ; int count_interior , count_parallel ; int iSend ; int g_ix , g_iy , g_iz ; int i ; int my_int_box[3][2] ; global_to_use_box( gbox , pbox[my_p] , ghost , my_int_box , map_use_box ); own_length = ( pbox[my_p][0][1] - pbox[my_p][0][0] ) * ( pbox[my_p][1][1] - pbox[my_p][1][0] ) * ( pbox[my_p][2][1] - pbox[my_p][2][0] ); use_length = ( map_use_box[0][1] - map_use_box[0][0] ) * ( map_use_box[1][1] - map_use_box[1][0] ) * ( map_use_box[2][1] - map_use_box[2][0] ); int_length = ( my_int_box[0][1] - my_int_box[0][0] ) * ( my_int_box[1][1] - my_int_box[1][0] ) * ( my_int_box[2][1] - my_int_box[2][0] ); for ( i = 0 ; i < id_length ; ++i ) { map_local_id[i] = -1 ; } /* Fill in locally owned portion: { interior , parallel } */ count_interior = 0 ; count_parallel = int_length ; for ( g_iz = pbox[my_p][2][0] ; g_iz < pbox[my_p][2][1] ; ++g_iz ) { for ( g_iy = pbox[my_p][1][0] ; g_iy < pbox[my_p][1][1] ; ++g_iy ) { for ( g_ix = pbox[my_p][0][0] ; g_ix < pbox[my_p][0][1] ; ++g_ix ) { const int local = map_global_to_use_box( (BoxInput) map_use_box, g_ix, g_iy, g_iz ); if ( local < 0 ) { abort(); } if ( my_int_box[2][0] <= g_iz && g_iz < my_int_box[2][1] && my_int_box[1][0] <= g_iy && g_iy < my_int_box[1][1] && my_int_box[0][0] <= g_ix && g_ix < my_int_box[0][1] ) { /* Interior */ map_local_id[ local ] = count_interior++ ; } else { /* Parallel */ map_local_id[ local ] = count_parallel++ ; } } } } if ( count_interior != int_length ) { abort(); } if ( count_parallel != own_length ) { abort(); } /* Fill in off-process received portion: { ( i + my_p ) % np } */ recv_pc[0] = count_parallel ; recv_pc[1] = count_parallel ; send_pc[0] = 0 ; send_pc[1] = 0 ; iSend = 0 ; for ( i = 1 ; i < np ; ++i ) { const int ip = ( i + my_p ) % np ; int recv_box[3][2] ; int send_box[3][2] ; int other_int_box[3][2] ; int other_use_box[3][2] ; /* Received portions */ if ( box_intersect( (BoxInput) map_use_box , (BoxInput) pbox[ip] , recv_box ) ) { for ( g_iz = recv_box[2][0] ; g_iz < recv_box[2][1] ; ++g_iz ) { for ( g_iy = recv_box[1][0] ; g_iy < recv_box[1][1] ; ++g_iy ) { for ( g_ix = recv_box[0][0] ; g_ix < recv_box[0][1] ; ++g_ix ) { const int local = map_global_to_use_box( (BoxInput) map_use_box, g_ix, g_iy, g_iz ); map_local_id[ local ] = count_parallel++ ; } } } } recv_pc[i+1] = count_parallel ; /* Sent items */ global_to_use_box( gbox, pbox[ip], ghost, other_int_box, other_use_box ); if ( box_intersect( (BoxInput) other_use_box , (BoxInput) pbox[my_p] , send_box ) ) { int nSend = ( send_box[0][1] - send_box[0][0] ) * ( send_box[1][1] - send_box[1][0] ) * ( send_box[2][1] - send_box[2][0] ); resize_int( & send_id , & send_id_size , (iSend + nSend ) ); for ( g_iz = send_box[2][0] ; g_iz < send_box[2][1] ; ++g_iz ) { for ( g_iy = send_box[1][0] ; g_iy < send_box[1][1] ; ++g_iy ) { for ( g_ix = send_box[0][0] ; g_ix < send_box[0][1] ; ++g_ix ) { const int local = map_global_to_use_box( (BoxInput) map_use_box, g_ix, g_iy, g_iz ); if ( map_local_id[ local ] < count_interior ) { abort(); } send_id[ iSend ] = map_local_id[ local ] ; ++iSend ; } } } } send_pc[i+1] = iSend ; } if ( count_parallel != use_length ) { abort(); } *map_count_interior = int_length ; *map_count_owns = own_length ; *map_count_uses = use_length ; *map_recv_pc = recv_pc ; *map_send_pc = send_pc ; *map_send_id = send_id ; }