//float atomic func
__device__ static float atomicMul(float* address, float val)
{
  int* address_as_int = (int*)address;
  int old = *address_as_int, assumed;
  do {
    assumed = old;
    old = atomicCAS(address_as_int, assumed, __float_as_int(__int_as_float(assumed) * val));
  } while (assumed != old);
  return __int_as_float(old);
}
Beispiel #2
0
__device__ __forceinline__ float atomicAdd(float* address, float val)
{
#if CV_CUDEV_ARCH >= 200
    return ::atomicAdd(address, val);
#else
    int* address_as_i = (int*) address;
    int old = *address_as_i, assumed;
    do {
        assumed = old;
        old = ::atomicCAS(address_as_i, assumed,
            __float_as_int(val + __int_as_float(assumed)));
    } while (assumed != old);
    return __int_as_float(old);
#endif
}
Beispiel #3
0
void BVH4::pack_aligned_node(int idx,
                             const BoundBox *bounds,
                             const int *child,
                             const uint visibility,
                             const float time_from,
                             const float time_to,
                             const int num)
{
	float4 data[BVH_QNODE_SIZE];
	memset(data, 0, sizeof(data));

	data[0].x = __uint_as_float(visibility & ~PATH_RAY_NODE_UNALIGNED);
	data[0].y = time_from;
	data[0].z = time_to;

	for(int i = 0; i < num; i++) {
		float3 bb_min = bounds[i].min;
		float3 bb_max = bounds[i].max;

		data[1][i] = bb_min.x;
		data[2][i] = bb_max.x;
		data[3][i] = bb_min.y;
		data[4][i] = bb_max.y;
		data[5][i] = bb_min.z;
		data[6][i] = bb_max.z;

		data[7][i] = __int_as_float(child[i]);
	}

	for(int i = num; i < 4; i++) {
		/* We store BB which would never be recorded as intersection
		 * so kernel might safely assume there are always 4 child nodes.
		 */
		data[1][i] = FLT_MAX;
		data[2][i] = -FLT_MAX;

		data[3][i] = FLT_MAX;
		data[4][i] = -FLT_MAX;

		data[5][i] = FLT_MAX;
		data[6][i] = -FLT_MAX;

		data[7][i] = __int_as_float(0);
	}

	memcpy(&pack.nodes[idx], data, sizeof(float4)*BVH_QNODE_SIZE);
}
Beispiel #4
0
__device__ static float atomicMax(float* address, float val)
{
#if CV_CUDEV_ARCH >= 120
    int* address_as_i = (int*) address;
    int old = *address_as_i, assumed;
    do {
        assumed = old;
        old = ::atomicCAS(address_as_i, assumed,
            __float_as_int(::fmaxf(val, __int_as_float(assumed))));
    } while (assumed != old);
    return __int_as_float(old);
#else
    (void) address;
    (void) val;
    return 0.0f;
#endif
}
Beispiel #5
0
CUGIP_DECL_DEVICE inline float
atomicFloatCAS(float *address, float old, float val)
{
    int i_val = __float_as_int(val);
    int tmp0 = __float_as_int(old);

    return __int_as_float(atomicCAS((int *)address, tmp0, i_val));
}
Beispiel #6
0
void BVH4::pack_leaf(const BVHStackEntry& e, const LeafNode *leaf)
{
	float4 data[BVH_QNODE_LEAF_SIZE];
	memset(data, 0, sizeof(data));
	if(leaf->num_triangles() == 1 && pack.prim_index[leaf->lo] == -1) {
		/* object */
		data[0].x = __int_as_float(~(leaf->lo));
		data[0].y = __int_as_float(0);
	}
	else {
		/* triangle */
		data[0].x = __int_as_float(leaf->lo);
		data[0].y = __int_as_float(leaf->hi);
	}
	data[0].z = __uint_as_float(leaf->visibility);
	if(leaf->num_triangles() != 0) {
		data[0].w = __uint_as_float(pack.prim_type[leaf->lo]);
	}

	memcpy(&pack.leaf_nodes[e.idx], data, sizeof(float4)*BVH_QNODE_LEAF_SIZE);
}
Beispiel #7
0
void BVH4::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
{
	if(leaf) {
		int4 *data = &pack.leaf_nodes[idx];
		int4 c = data[0];
		/* Refit leaf node. */
		for(int prim = c.x; prim < c.y; prim++) {
			int pidx = pack.prim_index[prim];
			int tob = pack.prim_object[prim];
			Object *ob = objects[tob];

			if(pidx == -1) {
				/* Object instance. */
				bbox.grow(ob->bounds);
			}
			else {
				/* Primitives. */
				const Mesh *mesh = ob->mesh;

				if(pack.prim_type[prim] & PRIMITIVE_ALL_CURVE) {
					/* Curves. */
					int str_offset = (params.top_level)? mesh->curve_offset: 0;
					Mesh::Curve curve = mesh->get_curve(pidx - str_offset);
					int k = PRIMITIVE_UNPACK_SEGMENT(pack.prim_type[prim]);

					curve.bounds_grow(k, &mesh->curve_keys[0], &mesh->curve_radius[0], bbox);

					visibility |= PATH_RAY_CURVE;

					/* Motion curves. */
					if(mesh->use_motion_blur) {
						Attribute *attr = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);

						if(attr) {
							size_t mesh_size = mesh->curve_keys.size();
							size_t steps = mesh->motion_steps - 1;
							float3 *key_steps = attr->data_float3();

							for(size_t i = 0; i < steps; i++)
								curve.bounds_grow(k, key_steps + i*mesh_size, &mesh->curve_radius[0], bbox);
						}
					}
				}
				else {
					/* Triangles. */
					int tri_offset = (params.top_level)? mesh->tri_offset: 0;
					Mesh::Triangle triangle = mesh->get_triangle(pidx - tri_offset);
					const float3 *vpos = &mesh->verts[0];

					triangle.bounds_grow(vpos, bbox);

					/* Motion triangles. */
					if(mesh->use_motion_blur) {
						Attribute *attr = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);

						if(attr) {
							size_t mesh_size = mesh->verts.size();
							size_t steps = mesh->motion_steps - 1;
							float3 *vert_steps = attr->data_float3();

							for(size_t i = 0; i < steps; i++)
								triangle.bounds_grow(vert_steps + i*mesh_size, bbox);
						}
					}
				}
			}
			visibility |= ob->visibility_for_tracing();
		}
		/* TODO(sergey): This is actually a copy of pack_leaf(),
		 * but this chunk of code only knows actual data and has
		 * no idea about BVHNode.
		 *
		 * Would be nice to de-duplicate code, but trying to make
		 * making code more general ends up in much nastier code
		 * in my opinion so far.
		 *
		 * Same applies to the inner nodes case below.
		 */
		float4 leaf_data[BVH_QNODE_LEAF_SIZE];
		leaf_data[0].x = __int_as_float(c.x);
		leaf_data[0].y = __int_as_float(c.y);
		leaf_data[0].z = __uint_as_float(visibility);
		leaf_data[0].w = __uint_as_float(c.w);
		memcpy(&pack.leaf_nodes[idx], leaf_data, sizeof(float4)*BVH_QNODE_LEAF_SIZE);
	}
	else {
		int4 *data = &pack.nodes[idx];
		bool is_unaligned = (data[0].x & PATH_RAY_NODE_UNALIGNED) != 0;
		int4 c;
		if(is_unaligned) {
			c = data[13];
		}
		else {
			c = data[7];
		}
		/* Refit inner node, set bbox from children. */
		BoundBox child_bbox[4] = {BoundBox::empty,
		                          BoundBox::empty,
		                          BoundBox::empty,
		                          BoundBox::empty};
		uint child_visibility[4] = {0};
		int num_nodes = 0;

		for(int i = 0; i < 4; ++i) {
			if(c[i] != 0) {
				refit_node((c[i] < 0)? -c[i]-1: c[i], (c[i] < 0),
				           child_bbox[i], child_visibility[i]);
				++num_nodes;
				bbox.grow(child_bbox[i]);
				visibility |= child_visibility[i];
			}
		}

		if(is_unaligned) {
			Transform aligned_space[4] = {transform_identity(),
			                              transform_identity(),
			                              transform_identity(),
			                              transform_identity()};
			pack_unaligned_node(idx,
			                    aligned_space,
			                    child_bbox,
			                    &c[0],
			                    visibility,
			                    0.0f,
			                    1.0f,
			                    4);
		}
		else {
			pack_aligned_node(idx,
			                  child_bbox,
			                  &c[0],
			                  visibility,
			                  0.0f,
			                  1.0f,
			                  4);
		}
	}
}
Beispiel #8
0
void BVH4::pack_unaligned_node(int idx,
                               const Transform *aligned_space,
                               const BoundBox *bounds,
                               const int *child,
                               const uint visibility,
                               const float time_from,
                               const float time_to,
                               const int num)
{
	float4 data[BVH_UNALIGNED_QNODE_SIZE];
	memset(data, 0, sizeof(data));

	data[0].x = __uint_as_float(visibility | PATH_RAY_NODE_UNALIGNED);
	data[0].y = time_from;
	data[0].z = time_to;

	for(int i = 0; i < num; i++) {
		Transform space = BVHUnaligned::compute_node_transform(
		        bounds[i],
		        aligned_space[i]);

		data[1][i] = space.x.x;
		data[2][i] = space.x.y;
		data[3][i] = space.x.z;

		data[4][i] = space.y.x;
		data[5][i] = space.y.y;
		data[6][i] = space.y.z;

		data[7][i] = space.z.x;
		data[8][i] = space.z.y;
		data[9][i] = space.z.z;

		data[10][i] = space.x.w;
		data[11][i] = space.y.w;
		data[12][i] = space.z.w;

		data[13][i] = __int_as_float(child[i]);
	}

	for(int i = num; i < 4; i++) {
		/* We store BB which would never be recorded as intersection
		 * so kernel might safely assume there are always 4 child nodes.
		 */

		data[1][i] = NAN;
		data[2][i] = NAN;
		data[3][i] = NAN;

		data[4][i] = NAN;
		data[5][i] = NAN;
		data[6][i] = NAN;

		data[7][i] = NAN;
		data[8][i] = NAN;
		data[9][i] = NAN;

		data[10][i] = NAN;
		data[11][i] = NAN;
		data[12][i] = NAN;

		data[13][i] = __int_as_float(0);
	}

	memcpy(&pack.nodes[idx], data, sizeof(float4)*BVH_UNALIGNED_QNODE_SIZE);
}
Beispiel #9
0
void BVH8::refit_node(int idx, bool leaf, BoundBox &bbox, uint &visibility)
{
  if (leaf) {
    int4 *data = &pack.leaf_nodes[idx];
    int4 c = data[0];
    /* Refit leaf node. */
    for (int prim = c.x; prim < c.y; prim++) {
      int pidx = pack.prim_index[prim];
      int tob = pack.prim_object[prim];
      Object *ob = objects[tob];

      if (pidx == -1) {
        /* Object instance. */
        bbox.grow(ob->bounds);
      }
      else {
        /* Primitives. */
        const Mesh *mesh = ob->mesh;

        if (pack.prim_type[prim] & PRIMITIVE_ALL_CURVE) {
          /* Curves. */
          int str_offset = (params.top_level) ? mesh->curve_offset : 0;
          Mesh::Curve curve = mesh->get_curve(pidx - str_offset);
          int k = PRIMITIVE_UNPACK_SEGMENT(pack.prim_type[prim]);

          curve.bounds_grow(k, &mesh->curve_keys[0], &mesh->curve_radius[0], bbox);

          visibility |= PATH_RAY_CURVE;

          /* Motion curves. */
          if (mesh->use_motion_blur) {
            Attribute *attr = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);

            if (attr) {
              size_t mesh_size = mesh->curve_keys.size();
              size_t steps = mesh->motion_steps - 1;
              float3 *key_steps = attr->data_float3();

              for (size_t i = 0; i < steps; i++) {
                curve.bounds_grow(k, key_steps + i * mesh_size, &mesh->curve_radius[0], bbox);
              }
            }
          }
        }
        else {
          /* Triangles. */
          int tri_offset = (params.top_level) ? mesh->tri_offset : 0;
          Mesh::Triangle triangle = mesh->get_triangle(pidx - tri_offset);
          const float3 *vpos = &mesh->verts[0];

          triangle.bounds_grow(vpos, bbox);

          /* Motion triangles. */
          if (mesh->use_motion_blur) {
            Attribute *attr = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);

            if (attr) {
              size_t mesh_size = mesh->verts.size();
              size_t steps = mesh->motion_steps - 1;
              float3 *vert_steps = attr->data_float3();

              for (size_t i = 0; i < steps; i++) {
                triangle.bounds_grow(vert_steps + i * mesh_size, bbox);
              }
            }
          }
        }
      }

      visibility |= ob->visibility;
    }

    float4 leaf_data[BVH_ONODE_LEAF_SIZE];
    leaf_data[0].x = __int_as_float(c.x);
    leaf_data[0].y = __int_as_float(c.y);
    leaf_data[0].z = __uint_as_float(visibility);
    leaf_data[0].w = __uint_as_float(c.w);
    memcpy(&pack.leaf_nodes[idx], leaf_data, sizeof(float4) * BVH_ONODE_LEAF_SIZE);
  }
  else {
    float8 *data = (float8 *)&pack.nodes[idx];
    bool is_unaligned = (__float_as_uint(data[0].a) & PATH_RAY_NODE_UNALIGNED) != 0;
    /* Refit inner node, set bbox from children. */
    BoundBox child_bbox[8] = {BoundBox::empty,
                              BoundBox::empty,
                              BoundBox::empty,
                              BoundBox::empty,
                              BoundBox::empty,
                              BoundBox::empty,
                              BoundBox::empty,
                              BoundBox::empty};
    int child[8];
    uint child_visibility[8] = {0};
    int num_nodes = 0;

    for (int i = 0; i < 8; ++i) {
      child[i] = __float_as_int(data[(is_unaligned) ? 13 : 7][i]);

      if (child[i] != 0) {
        refit_node((child[i] < 0) ? -child[i] - 1 : child[i],
                   (child[i] < 0),
                   child_bbox[i],
                   child_visibility[i]);
        ++num_nodes;
        bbox.grow(child_bbox[i]);
        visibility |= child_visibility[i];
      }
    }

    if (is_unaligned) {
      Transform aligned_space[8] = {transform_identity(),
                                    transform_identity(),
                                    transform_identity(),
                                    transform_identity(),
                                    transform_identity(),
                                    transform_identity(),
                                    transform_identity(),
                                    transform_identity()};
      pack_unaligned_node(
          idx, aligned_space, child_bbox, child, visibility, 0.0f, 1.0f, num_nodes);
    }
    else {
      pack_aligned_node(idx, child_bbox, child, visibility, 0.0f, 1.0f, num_nodes);
    }
  }
}