예제 #1
0
  BVH2Builder::BVH2Builder(const BuildTriangle* triangles, size_t numTriangles, Ref<BVH2<Triangle4> > bvh)
    : triangles(triangles), numTriangles(numTriangles), bvh(bvh)
  {
    size_t numThreads = scheduler->getNumThreads();

    /*! Allocate storage for nodes. Each thread should at least be able to get one block. */
    allocatedNodes = numTriangles+numThreads*allocBlockSize;
    bvh->nodes = (BVH2<Triangle4>::Node*)alignedMalloc(allocatedNodes*sizeof(BVH2<Triangle4>::Node));

    /*! Allocate storage for triangles. Each thread should at least be able to get one block. */
    allocatedPrimitives = numTriangles+numThreads*allocBlockSize;
    bvh->triangles      = (Triangle4*)alignedMalloc(allocatedPrimitives*sizeof(Triangle4));

    /*! Allocate array for splitting primitive lists. 2*N required for parallel splits. */
    prims = (Box*)alignedMalloc(2*numTriangles*sizeof(Box));

    /*! initiate parallel computation of bounds */
    ComputeBoundsTask computeBounds(triangles,numTriangles,prims);
    computeBounds.go();

    /*! start build */
    recurse(bvh->root,1,BuildRange(0,numTriangles,computeBounds.geomBound,computeBounds.centBound));
    scheduler->go();

    /*! rotate top part of tree */
    for (int i=0; i<5; i++) bvh->rotate(bvh->root,4);

    /*! free temporary memory again */
    bvh->nodes     = (BVH2<Triangle4>::Node*) alignedRealloc(bvh->nodes    ,atomicNextNode     *sizeof(BVH2<Triangle4>::Node));
    bvh->triangles = (Triangle4*            ) alignedRealloc(bvh->triangles,atomicNextPrimitive*sizeof(Triangle4            ));
    bvh->allocatedNodes     = atomicNextNode;
    bvh->allocatedTriangles = atomicNextPrimitive;
    alignedFree(prims); prims = NULL;
  }
예제 #2
0
파일: LocalFB.cpp 프로젝트: ingowald/OSPRay
  LocalFrameBuffer::LocalFrameBuffer(const vec2i &size,
                                     ColorBufferFormat colorBufferFormat,
                                     const uint32 channels,
                                     void *colorBufferToUse)
    : FrameBuffer(size, colorBufferFormat, channels)
      , tileErrorRegion(hasVarianceBuffer ? getNumTiles() : vec2i(0))
  {
    Assert(size.x > 0);
    Assert(size.y > 0);
    if (colorBufferToUse)
      colorBuffer = colorBufferToUse;
    else {
      switch (colorBufferFormat) {
      case OSP_FB_NONE:
        colorBuffer = nullptr;
        break;
      case OSP_FB_RGBA8:
      case OSP_FB_SRGBA:
        colorBuffer = (uint32*)alignedMalloc(sizeof(uint32)*size.x*size.y);
        break;
      case OSP_FB_RGBA32F:
        colorBuffer = (vec4f*)alignedMalloc(sizeof(vec4f)*size.x*size.y);
        break;
      }
    }

    depthBuffer = hasDepthBuffer ? alignedMalloc<float>(size.x*size.y) :
      nullptr;

    accumBuffer = hasAccumBuffer ? alignedMalloc<vec4f>(size.x*size.y) :
      nullptr;

    const size_t bytes = sizeof(int32)*getTotalTiles();
    tileAccumID = (int32*)alignedMalloc(bytes);
    memset(tileAccumID, 0, bytes);

    varianceBuffer = hasVarianceBuffer ? alignedMalloc<vec4f>(size.x*size.y) :
      nullptr;

    normalBuffer = hasNormalBuffer ? alignedMalloc<vec3f>(size.x*size.y) :
      nullptr;

    albedoBuffer = hasAlbedoBuffer ? alignedMalloc<vec3f>(size.x*size.y) :
      nullptr;


    ispcEquivalent = ispc::LocalFrameBuffer_create(this,size.x,size.y,
                                                   colorBufferFormat,
                                                   colorBuffer,
                                                   depthBuffer,
                                                   accumBuffer,
                                                   varianceBuffer,
                                                   normalBuffer,
                                                   albedoBuffer,
                                                   tileAccumID);
  }
예제 #3
0
    // if alignment < 1 uses the last specified alignment or the default one
    void resize(size_t byte_count, size_t alignment = 0)
    {
      VL_CHECK( mAllocationMode == AutoAllocatedBuffer );

      if (byte_count == 0)
      {
        clear();
        return;
      }

      alignment = alignment >= 1 ? alignment : mAlignment;
      if ( byte_count != mByteCount || alignment != mAlignment)
      {
        mAlignment = alignment;
        // allocate the new chunk
        unsigned char* ptr = NULL;
        if (byte_count)
          ptr = (unsigned char*)alignedMalloc(byte_count, mAlignment);
        if (mPtr)
        {
          if (byte_count)
          {
            size_t min = mByteCount < byte_count ? mByteCount : byte_count;
            // copy the old content brutally
            memcpy(ptr, mPtr, min);
          }
          // free the old pointer
          alignedFree(mPtr);
        }
        // set the new pointer
        mPtr = ptr;
        // set the new reserved bytes
        mByteCount = byte_count;
      }
    }
/* adds a cube to the scene */
unsigned int addSubdivCube (RTCScene scene_i)
{
  unsigned int geomID = rtcNewSubdivisionMesh(scene_i, RTC_GEOMETRY_STATIC, NUM_QUAD_FACES, NUM_QUAD_INDICES, NUM_VERTICES, 0, 0, 0);
  rtcSetBuffer(scene_i, geomID, RTC_VERTEX_BUFFER, cube_vertices,      0, sizeof(Vec3fa  ));
  rtcSetBuffer(scene_i, geomID, RTC_INDEX_BUFFER,  cube_quad_indices , 0, sizeof(unsigned int));
  rtcSetBuffer(scene_i, geomID, RTC_FACE_BUFFER,   cube_quad_faces,    0, sizeof(unsigned int));

  float* level = (float*) rtcMapBuffer(scene_i, geomID, RTC_LEVEL_BUFFER);
  for (size_t i=0; i<NUM_QUAD_INDICES; i++) level[i] = 4;
  rtcUnmapBuffer(scene_i, geomID, RTC_LEVEL_BUFFER);

  /* create face color array */
  colors = (Vec3fa*) alignedMalloc(6*sizeof(Vec3fa));
  colors[0] = Vec3fa(1,0,0); // left side
  colors[1] = Vec3fa(0,1,0); // right side
  colors[2] = Vec3fa(0.5f);  // bottom side
  colors[3] = Vec3fa(1.0f);  // top side
  colors[4] = Vec3fa(0,0,1); // front side
  colors[5] = Vec3fa(1,1,0); // back side

  /* set intersection filter for the cube */
  if (g_mode != MODE_NORMAL) {
    rtcSetIntersectionFilterFunctionN(scene_i,geomID,intersectionFilterN);
    rtcSetOcclusionFilterFunctionN   (scene_i,geomID,occlusionFilterN);
  }
  else {
    rtcSetIntersectionFilterFunction(scene_i,geomID,intersectionFilter);
    rtcSetOcclusionFilterFunction   (scene_i,geomID,occlusionFilter);
  }

  return geomID;
}
예제 #5
0
파일: main.c 프로젝트: anirban04/ipc
int main() {
  	int i = 0;
  	while(i++ < 5) {
		int* ptr = (int*)alignedMalloc(1024, 32);
		printf("Got pointer with address %p\n", ptr);
		alignedFree(ptr);
	}
}
예제 #6
0
 extern "C" __dllexport void* ISPCAlloc(void** taskPtr, int64_t size, int32_t alignment)
 {
   if (*taskPtr == nullptr) *taskPtr = new std::vector<void*>;
   std::vector<void*>* lst = (std::vector<void*>*)(*taskPtr);
   void* ptr = alignedMalloc((size_t)size,alignment);
   lst->push_back(ptr);
   return ptr;
 }
예제 #7
0
 extern "C" void rtNewDataStart(uint32_t numBuffers, void** buffers, uint64_t* bufferBytes, parmsNewDataStart* parms, uint16_t parmBytes, void* ret, uint16_t retBytess)
 {
   if (g_verbose) {
     printf("handle %06d = rtNewDataStart(%zu)\n", parms->id, parms->bytes);
     fflush(stdout);
   }
   g_data = (char*) alignedMalloc(parms->bytes);
 }
예제 #8
0
파일: texture.cpp 프로젝트: nyue/embree
  Texture::Texture(Ref<Image> img, const std::string fileName)
    : width(unsigned(img->width)), height(unsigned(img->height)), format(RGBA8), bytesPerTexel(4), width_mask(0), height_mask(0), data(nullptr), fileName(fileName)
  {
    width_mask  = isPowerOf2(width) ? width-1 : 0;
    height_mask = isPowerOf2(height) ? height-1 : 0;

    data = alignedMalloc(4*width*height,64);
    img->convertToRGBA8((unsigned char*)data);
  }
예제 #9
0
/* adds a cube to the scene */
unsigned int addCube (RTCScene scene_i)
{
  /* create a triangulated cube with 12 triangles and 8 vertices */
  unsigned int mesh = rtcNewTriangleMesh (scene_i, RTC_GEOMETRY_STATIC, 12, 8);

  /* set vertices */
  Vertex* vertices = (Vertex*) rtcMapBuffer(scene_i,mesh,RTC_VERTEX_BUFFER); 
  vertices[0].x = -1; vertices[0].y = -1; vertices[0].z = -1; 
  vertices[1].x = -1; vertices[1].y = -1; vertices[1].z = +1; 
  vertices[2].x = -1; vertices[2].y = +1; vertices[2].z = -1; 
  vertices[3].x = -1; vertices[3].y = +1; vertices[3].z = +1; 
  vertices[4].x = +1; vertices[4].y = -1; vertices[4].z = -1; 
  vertices[5].x = +1; vertices[5].y = -1; vertices[5].z = +1; 
  vertices[6].x = +1; vertices[6].y = +1; vertices[6].z = -1; 
  vertices[7].x = +1; vertices[7].y = +1; vertices[7].z = +1; 
  rtcUnmapBuffer(scene_i,mesh,RTC_VERTEX_BUFFER); 

  /* create triangle color array */
  colors = (Vec3fa*) alignedMalloc(12*sizeof(Vec3fa));

  /* set triangles and colors */
  int tri = 0;
  Triangle* triangles = (Triangle*) rtcMapBuffer(scene_i,mesh,RTC_INDEX_BUFFER);
  
  // left side
  colors[tri] = Vec3fa(1,0,0); triangles[tri].v0 = 0; triangles[tri].v1 = 2; triangles[tri].v2 = 1; tri++;
  colors[tri] = Vec3fa(1,0,0); triangles[tri].v0 = 1; triangles[tri].v1 = 2; triangles[tri].v2 = 3; tri++;

  // right side
  colors[tri] = Vec3fa(0,1,0); triangles[tri].v0 = 4; triangles[tri].v1 = 5; triangles[tri].v2 = 6; tri++;
  colors[tri] = Vec3fa(0,1,0); triangles[tri].v0 = 5; triangles[tri].v1 = 7; triangles[tri].v2 = 6; tri++;

  // bottom side
  colors[tri] = Vec3fa(0.5f);  triangles[tri].v0 = 0; triangles[tri].v1 = 1; triangles[tri].v2 = 4; tri++;
  colors[tri] = Vec3fa(0.5f);  triangles[tri].v0 = 1; triangles[tri].v1 = 5; triangles[tri].v2 = 4; tri++;

  // top side
  colors[tri] = Vec3fa(1.0f);  triangles[tri].v0 = 2; triangles[tri].v1 = 6; triangles[tri].v2 = 3; tri++;
  colors[tri] = Vec3fa(1.0f);  triangles[tri].v0 = 3; triangles[tri].v1 = 6; triangles[tri].v2 = 7; tri++;

  // front side
  colors[tri] = Vec3fa(0,0,1); triangles[tri].v0 = 0; triangles[tri].v1 = 4; triangles[tri].v2 = 2; tri++;
  colors[tri] = Vec3fa(0,0,1); triangles[tri].v0 = 2; triangles[tri].v1 = 4; triangles[tri].v2 = 6; tri++;

  // back side
  colors[tri] = Vec3fa(1,1,0); triangles[tri].v0 = 1; triangles[tri].v1 = 3; triangles[tri].v2 = 5; tri++;
  colors[tri] = Vec3fa(1,1,0); triangles[tri].v0 = 3; triangles[tri].v1 = 7; triangles[tri].v2 = 5; tri++;

  rtcUnmapBuffer(scene_i,mesh,RTC_INDEX_BUFFER);

  /* set intersection filter for the cube */
  rtcSetIntersectionFilterFunction(scene_i,mesh,(RTCFilterFunc)&intersectionFilter);
  rtcSetOcclusionFilterFunction   (scene_i,mesh,(RTCFilterFunc)&occlusionFilter);

  return mesh;
}
예제 #10
0
//! Create an ispc-side DirectionalLight object
extern "C" void* DirectionalLight_create()
{
  DirectionalLight* self = (DirectionalLight*) alignedMalloc(sizeof(DirectionalLight));
  Light_Constructor(&self->super);
  self->super.sample = DirectionalLight_sample;
  self->super.eval = DirectionalLight_eval;

  DirectionalLight_set(self, Vec3fa(0.f, 0.f, 1.f), Vec3fa(1.f), 1.f);
  return self;
}
예제 #11
0
  void resize(int32 width, int32 height)
  {
    if (width == g_width && height == g_height)
      return;

    if (g_pixels) alignedFree(g_pixels);
    g_width = width;
    g_height = height;
    g_pixels = (int*) alignedMalloc(g_width*g_height*sizeof(int),64);
  }
예제 #12
0
 void* Alloc::malloc() 
 {
   Lock<MutexSys> lock(mutex);
   if (blocks.size()) {
     void* ptr = blocks.back();
     blocks.pop_back();
     return ptr;
   }
   return alignedMalloc(blockSize,64);
 }
예제 #13
0
//! Create an ispc-side PointLight object
extern "C" void* PointLight_create()
{
  PointLight* self = (PointLight*) alignedMalloc(sizeof(PointLight),16);
  Light_Constructor(&self->super);
  self->super.sample = PointLight_sample;
  self->super.eval = PointLight_eval;

  PointLight_set(self, Vec3fa(0.f), Vec3fa(1.f), 0.f);
  return self;
}
예제 #14
0
파일: texture.cpp 프로젝트: nyue/embree
  Texture::Texture (unsigned width, unsigned height, const Format format, const char* in)
    : width(width), height(height), format(format), bytesPerTexel(getFormatBytesPerTexel(format)), width_mask(0), height_mask(0), data(nullptr)
  {
    width_mask  = isPowerOf2(width) ? width-1 : 0;
    height_mask = isPowerOf2(height) ? height-1 : 0;

    data = alignedMalloc(bytesPerTexel*width*height,64);
    if (in) {
      for (size_t i=0; i<bytesPerTexel*width*height; i++)
	((char*)data)[i] = in[i];
    }
    else {
      memset(data,0 ,bytesPerTexel*width*height);
    }   
  }
예제 #15
0
void SimVars::create(size_t dim_real, size_t dim_int, size_t dim_bool, size_t dim_string, size_t dim_pre_vars, size_t dim_state_vars, size_t state_index)
{
	_dim_real = dim_real;
	_dim_int = dim_int;
	_dim_bool = dim_bool;
	_dim_string = dim_string;
	_dim_pre_vars = dim_pre_vars;
	_dim_z = dim_state_vars;
	_z_i = state_index;

	if (_dim_real + _dim_int + _dim_bool > _dim_pre_vars)
		throw std::runtime_error("Wrong pre variable size");
	//allocate memory for all model variables
	if (dim_string > 0) {
		_string_vars = new string[dim_string];
	}
	else {
		_string_vars = NULL;
	}
	if (dim_bool > 0) {
		_bool_vars = (bool*)alignedMalloc(sizeof(bool) * dim_bool, 64);
		_pre_bool_vars = (bool*)alignedMalloc(sizeof(bool) * dim_bool, 64);
	}
	else {
		_bool_vars = NULL;
		_pre_bool_vars = NULL;
	}
	if (dim_int > 0) {
		_int_vars = (int*)alignedMalloc(sizeof(int) * dim_int, 64);
		_pre_int_vars = (int*)alignedMalloc(sizeof(int) * dim_int, 64);
	}
	else {
		_int_vars = NULL;
		_pre_int_vars = NULL;
	}
	if (dim_real > 0) {
		_real_vars = (double*)alignedMalloc(sizeof(double) * dim_real, 64);
		_pre_real_vars = (double*)alignedMalloc(sizeof(double) * dim_real, 64);
	}
	else {
		_real_vars = NULL;
		_pre_real_vars = NULL;
	}

	//initialize all model variables
	if(dim_string > 0)
		std::fill(_string_vars, _string_vars + dim_string, string());
	if (dim_bool > 0)
		std::fill(_bool_vars, _bool_vars + dim_bool, false);
	if (dim_int > 0)
		std::fill(_int_vars, _int_vars + dim_int, 0);
	if (dim_real > 0)
		std::fill(_real_vars, _real_vars + dim_real, 0.0);
}
예제 #16
0
파일: texture2d.cpp 프로젝트: nyue/embree
extern "C" void *Texture2D_create(Vec2i &size, void *data,
    uint32_t type, uint32_t flags)
{
  Texture2D *self = (Texture2D*) alignedMalloc(sizeof(Texture2D));
  self->size      = size;

  // Due to float rounding frac(x) can be exactly 1.0f (e.g. for very small
  // negative x), although it should be strictly smaller than 1.0f. We handle
  // this case by having sizef slightly smaller than size, such that
  // frac(x)*sizef is always < size.
  self->sizef = Vec2f(nextafter((float)size.x, -1.0f), nextafter((float)size.y, -1.0f));
  self->halfTexel = Vec2f(0.5f/size.x, 0.5f/size.y);
  self->data = data;
  self->get = Texture2D_get_addr(type, flags & TEXTURE_FILTER_NEAREST);

  return self;
}
/* adds a cube to the scene */
unsigned int addCube (RTCScene scene_i, const Vec3fa& offset, const Vec3fa& scale, float rotation)
{
  /* create a triangulated cube with 12 triangles and 8 vertices */
  unsigned int geomID = rtcNewTriangleMesh (scene_i, RTC_GEOMETRY_STATIC, NUM_TRI_FACES, NUM_VERTICES);
  //rtcSetBuffer(scene_i, geomID, RTC_VERTEX_BUFFER, cube_vertices,     0, sizeof(Vec3fa  ));
  Vec3fa* ptr = (Vec3fa*) rtcMapBuffer(scene_i, geomID, RTC_VERTEX_BUFFER);
  for (size_t i=0; i<NUM_VERTICES; i++) {
    float x = cube_vertices[i][0];
    float y = cube_vertices[i][1];
    float z = cube_vertices[i][2];
    Vec3fa vtx = Vec3fa(x,y,z);
    ptr[i] = Vec3fa(offset+LinearSpace3fa::rotate(Vec3fa(0,1,0),rotation)*LinearSpace3fa::scale(scale)*vtx);
  }
  rtcUnmapBuffer(scene_i,geomID,RTC_VERTEX_BUFFER);
  rtcSetBuffer(scene_i, geomID, RTC_INDEX_BUFFER,  cube_tri_indices , 0, 3*sizeof(unsigned int));

  /* create per-triangle color array */
  colors = (Vec3fa*) alignedMalloc(12*sizeof(Vec3fa));
  colors[0] = Vec3fa(1,0,0); // left side
  colors[1] = Vec3fa(1,0,0);
  colors[2] = Vec3fa(0,1,0); // right side
  colors[3] = Vec3fa(0,1,0);
  colors[4] = Vec3fa(0.5f);  // bottom side
  colors[5] = Vec3fa(0.5f);
  colors[6] = Vec3fa(1.0f);  // top side
  colors[7] = Vec3fa(1.0f);
  colors[8] = Vec3fa(0,0,1); // front side
  colors[9] = Vec3fa(0,0,1);
  colors[10] = Vec3fa(1,1,0); // back side
  colors[11] = Vec3fa(1,1,0);

  /* set intersection filter for the cube */
  if (g_mode != MODE_NORMAL) {
    rtcSetIntersectionFilterFunctionN(scene_i,geomID,intersectionFilterN);
    rtcSetOcclusionFilterFunctionN   (scene_i,geomID,occlusionFilterN);
  }
  else {
    rtcSetIntersectionFilterFunction(scene_i,geomID,intersectionFilter);
    rtcSetOcclusionFilterFunction   (scene_i,geomID,occlusionFilter);
  }

  return geomID;
}
예제 #18
0
/* add hair geometry */
unsigned int addCurve (RTCScene scene, const Vec3fa& pos)
{
  unsigned int geomID = rtcNewCurveGeometry (scene, RTC_GEOMETRY_STATIC, NUM_CURVES, 4*NUM_CURVES);

  /* converts b-spline to bezier basis */
  Vec3fa* vtx = (Vec3fa*) rtcMapBuffer(scene, geomID, RTC_VERTEX_BUFFER);
  for (size_t i=0; i<NUM_CURVES; i++)
  {
    Vec3fa P = Vec3fa(pos.x,pos.y,pos.z,0.0f);
    const Vec3fa v0 = Vec3fa(hair_vertices[i+0][0],hair_vertices[i+0][1],hair_vertices[i+0][2],hair_vertices[i+0][3]);
    const Vec3fa v1 = Vec3fa(hair_vertices[i+1][0],hair_vertices[i+1][1],hair_vertices[i+1][2],hair_vertices[i+1][3]);
    const Vec3fa v2 = Vec3fa(hair_vertices[i+2][0],hair_vertices[i+2][1],hair_vertices[i+2][2],hair_vertices[i+2][3]);
    const Vec3fa v3 = Vec3fa(hair_vertices[i+3][0],hair_vertices[i+3][1],hair_vertices[i+3][2],hair_vertices[i+3][3]);
    vtx[4*i+0] = P + (1.0f/6.0f)*v0 + (2.0f/3.0f)*v1 + (1.0f/6.0f)*v2;
    vtx[4*i+1] = P + (2.0f/3.0f)*v1 + (1.0f/3.0f)*v2;
    vtx[4*i+2] = P + (1.0f/3.0f)*v1 + (2.0f/3.0f)*v2;
    vtx[4*i+3] = P + (1.0f/6.0f)*v1 + (2.0f/3.0f)*v2 + (1.0f/6.0f)*v3;
  }
  rtcUnmapBuffer(scene, geomID, RTC_VERTEX_BUFFER);

  Vec3fa* colors = (Vec3fa*) alignedMalloc(4*NUM_CURVES*sizeof(Vec3fa));
  for (size_t i=0; i<NUM_CURVES; i++)
  {
    const Vec3fa v0 = Vec3fa(hair_vertex_colors[i+0][0],hair_vertex_colors[i+0][1],hair_vertex_colors[i+0][2],hair_vertex_colors[i+0][3]);
    const Vec3fa v1 = Vec3fa(hair_vertex_colors[i+1][0],hair_vertex_colors[i+1][1],hair_vertex_colors[i+1][2],hair_vertex_colors[i+1][3]);
    const Vec3fa v2 = Vec3fa(hair_vertex_colors[i+2][0],hair_vertex_colors[i+2][1],hair_vertex_colors[i+2][2],hair_vertex_colors[i+2][3]);
    const Vec3fa v3 = Vec3fa(hair_vertex_colors[i+3][0],hair_vertex_colors[i+3][1],hair_vertex_colors[i+3][2],hair_vertex_colors[i+3][3]);
    colors[4*i+0] = (1.0f/6.0f)*v0 + (2.0f/3.0f)*v1 + (1.0f/6.0f)*v2;
    colors[4*i+1] = (2.0f/3.0f)*v1 + (1.0f/3.0f)*v2;
    colors[4*i+2] = (1.0f/3.0f)*v1 + (2.0f/3.0f)*v2;
    colors[4*i+3] = (1.0f/6.0f)*v1 + (2.0f/3.0f)*v2 + (1.0f/6.0f)*v3;
  }

  int* index = (int*) rtcMapBuffer(scene, geomID, RTC_INDEX_BUFFER);
  for (int i=0; i<NUM_CURVES; i++) {
    index[i] = 4*i;
  }
  rtcUnmapBuffer(scene,geomID,RTC_INDEX_BUFFER);

  rtcSetBuffer(scene, geomID, RTC_USER_VERTEX_BUFFER0, colors, 0, sizeof(Vec3fa));
  return geomID;
}
예제 #19
0
/* called by the C++ code to render */
extern "C" void device_render (int* pixels,
                           const int width,
                           const int height,
                           const float time,
                           const Vec3fa& vx, 
                           const Vec3fa& vy, 
                           const Vec3fa& vz, 
                           const Vec3fa& p)
{
  /* create scene */
  if (g_scene == NULL)
    g_scene = convertScene(g_ispc_scene);

  /* create accumulator */
  if (g_accu_width != width || g_accu_height != height) {
	//g_accu = new Vec3fa[width*height];
	g_accu = (Vec3fa*)alignedMalloc(width*height*sizeof(Vec3fa));
    g_accu_width = width;
    g_accu_height = height;
    memset(g_accu,0,width*height*sizeof(Vec3fa));
  }

  /* reset accumulator */
  bool camera_changed = g_changed; g_changed = false;
  camera_changed |= ne(g_accu_vx,vx); g_accu_vx = vx; // FIXME: use != operator
  camera_changed |= ne(g_accu_vy,vy); g_accu_vy = vy; // FIXME: use != operator
  camera_changed |= ne(g_accu_vz,vz); g_accu_vz = vz; // FIXME: use != operator
  camera_changed |= ne(g_accu_p,  p); g_accu_p  = p;  // FIXME: use != operator
  g_accu_count++;
  if (camera_changed) {
    g_accu_count=0;
    memset(g_accu,0,width*height*sizeof(Vec3fa));
  }

  /* render frame */
  const int numTilesX = (width +TILE_SIZE_X-1)/TILE_SIZE_X;
  const int numTilesY = (height+TILE_SIZE_Y-1)/TILE_SIZE_Y;
  enableFilterDispatch = renderPixel == renderPixelStandard; 
  launch_renderTile(numTilesX*numTilesY,pixels,width,height,time,vx,vy,vz,p,numTilesX,numTilesY); 
  enableFilterDispatch = false;
  rtcDebug();
}
예제 #20
0
  void TaskScheduler::createThreads(size_t numThreads_in)
  {
    numThreads = numThreads_in;
#if defined(__MIC__)
    if (numThreads == 0) numThreads = getNumberOfLogicalThreads()-4;
#else
    if (numThreads == 0) numThreads = getNumberOfLogicalThreads();
#endif
    
    /* this mapping is only required as ISPC does not propagate task groups */

    thread2event = (ThreadEvent*) alignedMalloc(numThreads*sizeof(ThreadEvent));

    memset(thread2event,0,numThreads*sizeof(ThreadEvent));

    /* generate all threads */
    for (size_t t=0; t<numThreads; t++) {
      threads.push_back(createThread((thread_func)threadFunction,new Thread(t,numThreads,this),4*1024*1024,t));
    }

    //setAffinity(0);
    TaskLogger::init(numThreads);
  }
예제 #21
0
//! Create an ispc-side AmbientLight object
extern "C" void *AmbientLight_create()
{
    AmbientLight* self = (AmbientLight*) alignedMalloc(sizeof(AmbientLight));
    AmbientLight_Constructor(self, Vec3fa(1.f));
    return self;
}
예제 #22
0
  void SamplerFactory::init(float rcpWidth, float rcpHeight, int iteration, const Ref<Filter> filter)
  {
    this->iteration = iteration;
    samples = new PrecomputedSample*[sampleSets];
    if (samplesPerPixel != (1 << __bsf(samplesPerPixel)))
      throw std::runtime_error("Number of samples per pixel have to be a power of two.");

    int chunkSize = max((int)samplesPerPixel,64);
    int currentChunk = int(iteration*samplesPerPixel) / chunkSize;
    int offset = (iteration*samplesPerPixel) % chunkSize;
    Random rng;
    rng.setSeed(currentChunk * 5897);

    Vec2f* pixel = new Vec2f[chunkSize];
    float* time = new float[chunkSize];
    Vec2f* lens = new Vec2f[chunkSize];
    float* samples1D = new float[chunkSize];
    Vec2f* samples2D = new Vec2f[chunkSize];

  allSamples1D = static_cast<float*>(alignedMalloc(sizeof(float)*SamplerFactory::numSamples1D*sampleSets*samplesPerPixel));
  allSamples2D = static_cast<Vec2f*>(alignedMalloc(sizeof(Vec2f)*SamplerFactory::numSamples2D*sampleSets*samplesPerPixel));
  allLightSamples = new PackedLightSample[SamplerFactory::numLightSamples*sampleSets*samplesPerPixel];

    for (int set = 0; set < sampleSets; set++)
    {
      samples[set] = new PrecomputedSample[samplesPerPixel];

      /*! Generate pixel and lens samples. */
      multiJittered(pixel, chunkSize, rng);
      jittered(time, chunkSize, rng);
      multiJittered(lens, chunkSize, rng);
      for (int s = 0; s < samplesPerPixel; s++) {
        samples[set][s].pixel = pixel[offset + s];
        samples[set][s].time = time[offset + s];
        samples[set][s].lens = lens[offset + s];
        if (filter) {
          samples[set][s].pixel = filter->sample(samples[set][s].pixel) + Vec2f(0.5f, 0.5f);
        }
        samples[set][s].pixel = samples[set][s].pixel * Vec2f(rcpWidth, rcpHeight);
        samples[set][s].samples1D = allSamples1D + (s + samplesPerPixel*set)*SamplerFactory::numSamples1D;
    samples[set][s].samples2D = allSamples2D + (s + samplesPerPixel*set)*SamplerFactory::numSamples2D;
    samples[set][s].lightSamples = allLightSamples + (s + samplesPerPixel*set)*SamplerFactory::numLightSamples;
      }

      /*! Generate requested 1D samples. */
      for (int d = 0; d < SamplerFactory::numSamples1D; d++) {
        jittered(samples1D, chunkSize, rng);
        for (int s = 0; s < samplesPerPixel; s++) {
          samples[set][s].samples1D[d] = samples1D[offset + s];
        }
      }

      /*! Generate 2D samples. */
      for (int d = 0; d < SamplerFactory::numSamples2D; d++) {
        multiJittered(samples2D, chunkSize, rng);
        for (int s = 0; s < samplesPerPixel; s++) {
          samples[set][s].samples2D[d] = samples2D[offset + s];
        }
      }

      /*! Generate light samples. */
      for (int d = 0; d < SamplerFactory::numLightSamples; d++) {
        for (int s = 0; s < samplesPerPixel; s++) {
          LightSample ls;
          DifferentialGeometry dg;
          ls.L = lights[d]->sample(dg, ls.wi, ls.tMax, samples[set][s].samples2D[lightBaseSamples[d]]);
          samples[set][s].lightSamples[d] = ls;
        }
      }
    }
    
    delete[] pixel;
    delete[] time;
    delete[] lens;
    delete[] samples1D;
    delete[] samples2D;
  }
예제 #23
0
LocalFrameBuffer::LocalFrameBuffer(const vec2i &size,
                                   ColorBufferFormat colorBufferFormat,
                                   bool hasDepthBuffer,
                                   bool hasAccumBuffer,
                                   bool hasVarianceBuffer,
                                   void *colorBufferToUse)
    : FrameBuffer(size, colorBufferFormat, hasDepthBuffer, hasAccumBuffer, hasVarianceBuffer)
{
    Assert(size.x > 0);
    Assert(size.y > 0);
    if (colorBufferToUse)
        colorBuffer = colorBufferToUse;
    else {
        switch (colorBufferFormat) {
        case OSP_FB_NONE:
            colorBuffer = NULL;
            break;
        case OSP_FB_RGBA8:
        case OSP_FB_SRGBA:
            colorBuffer = (vec4f*)alignedMalloc(sizeof(vec4f)*size.x*size.y);
            break;
        case OSP_FB_RGBA32F:
            colorBuffer = (uint32*)alignedMalloc(sizeof(uint32)*size.x*size.y);
            break;
        default:
            throw std::runtime_error("color buffer format not supported");
        }
    }

    if (hasDepthBuffer)
        depthBuffer = (float*)alignedMalloc(sizeof(float)*size.x*size.y);
    else
        depthBuffer = NULL;

    if (hasAccumBuffer)
        accumBuffer = (vec4f*)alignedMalloc(sizeof(vec4f)*size.x*size.y);
    else
        accumBuffer = NULL;

    tilesx = divRoundUp(size.x, TILE_SIZE);
    tiles = tilesx * divRoundUp(size.y, TILE_SIZE);
    tileAccumID = new int32[tiles];
    memset(tileAccumID, 0, tiles*sizeof(int32));

    if (hasVarianceBuffer) {
        varianceBuffer = (vec4f*)alignedMalloc(sizeof(vec4f)*size.x*size.y);
        tileErrorBuffer = new float[tiles];
        // maximum number of regions: all regions are of size 3 are split in half
        errorRegion.reserve(divRoundUp(tiles*2, 3));
    } else {
        varianceBuffer = NULL;
        tileErrorBuffer = NULL;
    }

    ispcEquivalent = ispc::LocalFrameBuffer_create(this,size.x,size.y,
                     colorBufferFormat,
                     colorBuffer,
                     depthBuffer,
                     accumBuffer,
                     varianceBuffer,
                     tileAccumID,
                     tileErrorBuffer);
}
예제 #24
0
 void Buffer::alloc() {
   ptr = ptr_ofs = (char*) alignedMalloc(bytes);
 }
예제 #25
0
파일: buffer.cpp 프로젝트: dkoerner/embree
 void Buffer::alloc() {
   if (device) device->memoryMonitor(bytes,false);
   ptr = ptr_ofs = (char*) alignedMalloc(bytes);
 }