BVH2Builder::BVH2Builder(const BuildTriangle* triangles, size_t numTriangles, Ref<BVH2<Triangle4> > bvh) : triangles(triangles), numTriangles(numTriangles), bvh(bvh) { size_t numThreads = scheduler->getNumThreads(); /*! Allocate storage for nodes. Each thread should at least be able to get one block. */ allocatedNodes = numTriangles+numThreads*allocBlockSize; bvh->nodes = (BVH2<Triangle4>::Node*)alignedMalloc(allocatedNodes*sizeof(BVH2<Triangle4>::Node)); /*! Allocate storage for triangles. Each thread should at least be able to get one block. */ allocatedPrimitives = numTriangles+numThreads*allocBlockSize; bvh->triangles = (Triangle4*)alignedMalloc(allocatedPrimitives*sizeof(Triangle4)); /*! Allocate array for splitting primitive lists. 2*N required for parallel splits. */ prims = (Box*)alignedMalloc(2*numTriangles*sizeof(Box)); /*! initiate parallel computation of bounds */ ComputeBoundsTask computeBounds(triangles,numTriangles,prims); computeBounds.go(); /*! start build */ recurse(bvh->root,1,BuildRange(0,numTriangles,computeBounds.geomBound,computeBounds.centBound)); scheduler->go(); /*! rotate top part of tree */ for (int i=0; i<5; i++) bvh->rotate(bvh->root,4); /*! free temporary memory again */ bvh->nodes = (BVH2<Triangle4>::Node*) alignedRealloc(bvh->nodes ,atomicNextNode *sizeof(BVH2<Triangle4>::Node)); bvh->triangles = (Triangle4* ) alignedRealloc(bvh->triangles,atomicNextPrimitive*sizeof(Triangle4 )); bvh->allocatedNodes = atomicNextNode; bvh->allocatedTriangles = atomicNextPrimitive; alignedFree(prims); prims = NULL; }
LocalFrameBuffer::LocalFrameBuffer(const vec2i &size, ColorBufferFormat colorBufferFormat, const uint32 channels, void *colorBufferToUse) : FrameBuffer(size, colorBufferFormat, channels) , tileErrorRegion(hasVarianceBuffer ? getNumTiles() : vec2i(0)) { Assert(size.x > 0); Assert(size.y > 0); if (colorBufferToUse) colorBuffer = colorBufferToUse; else { switch (colorBufferFormat) { case OSP_FB_NONE: colorBuffer = nullptr; break; case OSP_FB_RGBA8: case OSP_FB_SRGBA: colorBuffer = (uint32*)alignedMalloc(sizeof(uint32)*size.x*size.y); break; case OSP_FB_RGBA32F: colorBuffer = (vec4f*)alignedMalloc(sizeof(vec4f)*size.x*size.y); break; } } depthBuffer = hasDepthBuffer ? alignedMalloc<float>(size.x*size.y) : nullptr; accumBuffer = hasAccumBuffer ? alignedMalloc<vec4f>(size.x*size.y) : nullptr; const size_t bytes = sizeof(int32)*getTotalTiles(); tileAccumID = (int32*)alignedMalloc(bytes); memset(tileAccumID, 0, bytes); varianceBuffer = hasVarianceBuffer ? alignedMalloc<vec4f>(size.x*size.y) : nullptr; normalBuffer = hasNormalBuffer ? alignedMalloc<vec3f>(size.x*size.y) : nullptr; albedoBuffer = hasAlbedoBuffer ? alignedMalloc<vec3f>(size.x*size.y) : nullptr; ispcEquivalent = ispc::LocalFrameBuffer_create(this,size.x,size.y, colorBufferFormat, colorBuffer, depthBuffer, accumBuffer, varianceBuffer, normalBuffer, albedoBuffer, tileAccumID); }
// if alignment < 1 uses the last specified alignment or the default one void resize(size_t byte_count, size_t alignment = 0) { VL_CHECK( mAllocationMode == AutoAllocatedBuffer ); if (byte_count == 0) { clear(); return; } alignment = alignment >= 1 ? alignment : mAlignment; if ( byte_count != mByteCount || alignment != mAlignment) { mAlignment = alignment; // allocate the new chunk unsigned char* ptr = NULL; if (byte_count) ptr = (unsigned char*)alignedMalloc(byte_count, mAlignment); if (mPtr) { if (byte_count) { size_t min = mByteCount < byte_count ? mByteCount : byte_count; // copy the old content brutally memcpy(ptr, mPtr, min); } // free the old pointer alignedFree(mPtr); } // set the new pointer mPtr = ptr; // set the new reserved bytes mByteCount = byte_count; } }
/* adds a cube to the scene */ unsigned int addSubdivCube (RTCScene scene_i) { unsigned int geomID = rtcNewSubdivisionMesh(scene_i, RTC_GEOMETRY_STATIC, NUM_QUAD_FACES, NUM_QUAD_INDICES, NUM_VERTICES, 0, 0, 0); rtcSetBuffer(scene_i, geomID, RTC_VERTEX_BUFFER, cube_vertices, 0, sizeof(Vec3fa )); rtcSetBuffer(scene_i, geomID, RTC_INDEX_BUFFER, cube_quad_indices , 0, sizeof(unsigned int)); rtcSetBuffer(scene_i, geomID, RTC_FACE_BUFFER, cube_quad_faces, 0, sizeof(unsigned int)); float* level = (float*) rtcMapBuffer(scene_i, geomID, RTC_LEVEL_BUFFER); for (size_t i=0; i<NUM_QUAD_INDICES; i++) level[i] = 4; rtcUnmapBuffer(scene_i, geomID, RTC_LEVEL_BUFFER); /* create face color array */ colors = (Vec3fa*) alignedMalloc(6*sizeof(Vec3fa)); colors[0] = Vec3fa(1,0,0); // left side colors[1] = Vec3fa(0,1,0); // right side colors[2] = Vec3fa(0.5f); // bottom side colors[3] = Vec3fa(1.0f); // top side colors[4] = Vec3fa(0,0,1); // front side colors[5] = Vec3fa(1,1,0); // back side /* set intersection filter for the cube */ if (g_mode != MODE_NORMAL) { rtcSetIntersectionFilterFunctionN(scene_i,geomID,intersectionFilterN); rtcSetOcclusionFilterFunctionN (scene_i,geomID,occlusionFilterN); } else { rtcSetIntersectionFilterFunction(scene_i,geomID,intersectionFilter); rtcSetOcclusionFilterFunction (scene_i,geomID,occlusionFilter); } return geomID; }
int main() { int i = 0; while(i++ < 5) { int* ptr = (int*)alignedMalloc(1024, 32); printf("Got pointer with address %p\n", ptr); alignedFree(ptr); } }
extern "C" __dllexport void* ISPCAlloc(void** taskPtr, int64_t size, int32_t alignment) { if (*taskPtr == nullptr) *taskPtr = new std::vector<void*>; std::vector<void*>* lst = (std::vector<void*>*)(*taskPtr); void* ptr = alignedMalloc((size_t)size,alignment); lst->push_back(ptr); return ptr; }
extern "C" void rtNewDataStart(uint32_t numBuffers, void** buffers, uint64_t* bufferBytes, parmsNewDataStart* parms, uint16_t parmBytes, void* ret, uint16_t retBytess) { if (g_verbose) { printf("handle %06d = rtNewDataStart(%zu)\n", parms->id, parms->bytes); fflush(stdout); } g_data = (char*) alignedMalloc(parms->bytes); }
Texture::Texture(Ref<Image> img, const std::string fileName) : width(unsigned(img->width)), height(unsigned(img->height)), format(RGBA8), bytesPerTexel(4), width_mask(0), height_mask(0), data(nullptr), fileName(fileName) { width_mask = isPowerOf2(width) ? width-1 : 0; height_mask = isPowerOf2(height) ? height-1 : 0; data = alignedMalloc(4*width*height,64); img->convertToRGBA8((unsigned char*)data); }
/* adds a cube to the scene */ unsigned int addCube (RTCScene scene_i) { /* create a triangulated cube with 12 triangles and 8 vertices */ unsigned int mesh = rtcNewTriangleMesh (scene_i, RTC_GEOMETRY_STATIC, 12, 8); /* set vertices */ Vertex* vertices = (Vertex*) rtcMapBuffer(scene_i,mesh,RTC_VERTEX_BUFFER); vertices[0].x = -1; vertices[0].y = -1; vertices[0].z = -1; vertices[1].x = -1; vertices[1].y = -1; vertices[1].z = +1; vertices[2].x = -1; vertices[2].y = +1; vertices[2].z = -1; vertices[3].x = -1; vertices[3].y = +1; vertices[3].z = +1; vertices[4].x = +1; vertices[4].y = -1; vertices[4].z = -1; vertices[5].x = +1; vertices[5].y = -1; vertices[5].z = +1; vertices[6].x = +1; vertices[6].y = +1; vertices[6].z = -1; vertices[7].x = +1; vertices[7].y = +1; vertices[7].z = +1; rtcUnmapBuffer(scene_i,mesh,RTC_VERTEX_BUFFER); /* create triangle color array */ colors = (Vec3fa*) alignedMalloc(12*sizeof(Vec3fa)); /* set triangles and colors */ int tri = 0; Triangle* triangles = (Triangle*) rtcMapBuffer(scene_i,mesh,RTC_INDEX_BUFFER); // left side colors[tri] = Vec3fa(1,0,0); triangles[tri].v0 = 0; triangles[tri].v1 = 2; triangles[tri].v2 = 1; tri++; colors[tri] = Vec3fa(1,0,0); triangles[tri].v0 = 1; triangles[tri].v1 = 2; triangles[tri].v2 = 3; tri++; // right side colors[tri] = Vec3fa(0,1,0); triangles[tri].v0 = 4; triangles[tri].v1 = 5; triangles[tri].v2 = 6; tri++; colors[tri] = Vec3fa(0,1,0); triangles[tri].v0 = 5; triangles[tri].v1 = 7; triangles[tri].v2 = 6; tri++; // bottom side colors[tri] = Vec3fa(0.5f); triangles[tri].v0 = 0; triangles[tri].v1 = 1; triangles[tri].v2 = 4; tri++; colors[tri] = Vec3fa(0.5f); triangles[tri].v0 = 1; triangles[tri].v1 = 5; triangles[tri].v2 = 4; tri++; // top side colors[tri] = Vec3fa(1.0f); triangles[tri].v0 = 2; triangles[tri].v1 = 6; triangles[tri].v2 = 3; tri++; colors[tri] = Vec3fa(1.0f); triangles[tri].v0 = 3; triangles[tri].v1 = 6; triangles[tri].v2 = 7; tri++; // front side colors[tri] = Vec3fa(0,0,1); triangles[tri].v0 = 0; triangles[tri].v1 = 4; triangles[tri].v2 = 2; tri++; colors[tri] = Vec3fa(0,0,1); triangles[tri].v0 = 2; triangles[tri].v1 = 4; triangles[tri].v2 = 6; tri++; // back side colors[tri] = Vec3fa(1,1,0); triangles[tri].v0 = 1; triangles[tri].v1 = 3; triangles[tri].v2 = 5; tri++; colors[tri] = Vec3fa(1,1,0); triangles[tri].v0 = 3; triangles[tri].v1 = 7; triangles[tri].v2 = 5; tri++; rtcUnmapBuffer(scene_i,mesh,RTC_INDEX_BUFFER); /* set intersection filter for the cube */ rtcSetIntersectionFilterFunction(scene_i,mesh,(RTCFilterFunc)&intersectionFilter); rtcSetOcclusionFilterFunction (scene_i,mesh,(RTCFilterFunc)&occlusionFilter); return mesh; }
//! Create an ispc-side DirectionalLight object extern "C" void* DirectionalLight_create() { DirectionalLight* self = (DirectionalLight*) alignedMalloc(sizeof(DirectionalLight)); Light_Constructor(&self->super); self->super.sample = DirectionalLight_sample; self->super.eval = DirectionalLight_eval; DirectionalLight_set(self, Vec3fa(0.f, 0.f, 1.f), Vec3fa(1.f), 1.f); return self; }
void resize(int32 width, int32 height) { if (width == g_width && height == g_height) return; if (g_pixels) alignedFree(g_pixels); g_width = width; g_height = height; g_pixels = (int*) alignedMalloc(g_width*g_height*sizeof(int),64); }
void* Alloc::malloc() { Lock<MutexSys> lock(mutex); if (blocks.size()) { void* ptr = blocks.back(); blocks.pop_back(); return ptr; } return alignedMalloc(blockSize,64); }
//! Create an ispc-side PointLight object extern "C" void* PointLight_create() { PointLight* self = (PointLight*) alignedMalloc(sizeof(PointLight),16); Light_Constructor(&self->super); self->super.sample = PointLight_sample; self->super.eval = PointLight_eval; PointLight_set(self, Vec3fa(0.f), Vec3fa(1.f), 0.f); return self; }
Texture::Texture (unsigned width, unsigned height, const Format format, const char* in) : width(width), height(height), format(format), bytesPerTexel(getFormatBytesPerTexel(format)), width_mask(0), height_mask(0), data(nullptr) { width_mask = isPowerOf2(width) ? width-1 : 0; height_mask = isPowerOf2(height) ? height-1 : 0; data = alignedMalloc(bytesPerTexel*width*height,64); if (in) { for (size_t i=0; i<bytesPerTexel*width*height; i++) ((char*)data)[i] = in[i]; } else { memset(data,0 ,bytesPerTexel*width*height); } }
void SimVars::create(size_t dim_real, size_t dim_int, size_t dim_bool, size_t dim_string, size_t dim_pre_vars, size_t dim_state_vars, size_t state_index) { _dim_real = dim_real; _dim_int = dim_int; _dim_bool = dim_bool; _dim_string = dim_string; _dim_pre_vars = dim_pre_vars; _dim_z = dim_state_vars; _z_i = state_index; if (_dim_real + _dim_int + _dim_bool > _dim_pre_vars) throw std::runtime_error("Wrong pre variable size"); //allocate memory for all model variables if (dim_string > 0) { _string_vars = new string[dim_string]; } else { _string_vars = NULL; } if (dim_bool > 0) { _bool_vars = (bool*)alignedMalloc(sizeof(bool) * dim_bool, 64); _pre_bool_vars = (bool*)alignedMalloc(sizeof(bool) * dim_bool, 64); } else { _bool_vars = NULL; _pre_bool_vars = NULL; } if (dim_int > 0) { _int_vars = (int*)alignedMalloc(sizeof(int) * dim_int, 64); _pre_int_vars = (int*)alignedMalloc(sizeof(int) * dim_int, 64); } else { _int_vars = NULL; _pre_int_vars = NULL; } if (dim_real > 0) { _real_vars = (double*)alignedMalloc(sizeof(double) * dim_real, 64); _pre_real_vars = (double*)alignedMalloc(sizeof(double) * dim_real, 64); } else { _real_vars = NULL; _pre_real_vars = NULL; } //initialize all model variables if(dim_string > 0) std::fill(_string_vars, _string_vars + dim_string, string()); if (dim_bool > 0) std::fill(_bool_vars, _bool_vars + dim_bool, false); if (dim_int > 0) std::fill(_int_vars, _int_vars + dim_int, 0); if (dim_real > 0) std::fill(_real_vars, _real_vars + dim_real, 0.0); }
extern "C" void *Texture2D_create(Vec2i &size, void *data, uint32_t type, uint32_t flags) { Texture2D *self = (Texture2D*) alignedMalloc(sizeof(Texture2D)); self->size = size; // Due to float rounding frac(x) can be exactly 1.0f (e.g. for very small // negative x), although it should be strictly smaller than 1.0f. We handle // this case by having sizef slightly smaller than size, such that // frac(x)*sizef is always < size. self->sizef = Vec2f(nextafter((float)size.x, -1.0f), nextafter((float)size.y, -1.0f)); self->halfTexel = Vec2f(0.5f/size.x, 0.5f/size.y); self->data = data; self->get = Texture2D_get_addr(type, flags & TEXTURE_FILTER_NEAREST); return self; }
/* adds a cube to the scene */ unsigned int addCube (RTCScene scene_i, const Vec3fa& offset, const Vec3fa& scale, float rotation) { /* create a triangulated cube with 12 triangles and 8 vertices */ unsigned int geomID = rtcNewTriangleMesh (scene_i, RTC_GEOMETRY_STATIC, NUM_TRI_FACES, NUM_VERTICES); //rtcSetBuffer(scene_i, geomID, RTC_VERTEX_BUFFER, cube_vertices, 0, sizeof(Vec3fa )); Vec3fa* ptr = (Vec3fa*) rtcMapBuffer(scene_i, geomID, RTC_VERTEX_BUFFER); for (size_t i=0; i<NUM_VERTICES; i++) { float x = cube_vertices[i][0]; float y = cube_vertices[i][1]; float z = cube_vertices[i][2]; Vec3fa vtx = Vec3fa(x,y,z); ptr[i] = Vec3fa(offset+LinearSpace3fa::rotate(Vec3fa(0,1,0),rotation)*LinearSpace3fa::scale(scale)*vtx); } rtcUnmapBuffer(scene_i,geomID,RTC_VERTEX_BUFFER); rtcSetBuffer(scene_i, geomID, RTC_INDEX_BUFFER, cube_tri_indices , 0, 3*sizeof(unsigned int)); /* create per-triangle color array */ colors = (Vec3fa*) alignedMalloc(12*sizeof(Vec3fa)); colors[0] = Vec3fa(1,0,0); // left side colors[1] = Vec3fa(1,0,0); colors[2] = Vec3fa(0,1,0); // right side colors[3] = Vec3fa(0,1,0); colors[4] = Vec3fa(0.5f); // bottom side colors[5] = Vec3fa(0.5f); colors[6] = Vec3fa(1.0f); // top side colors[7] = Vec3fa(1.0f); colors[8] = Vec3fa(0,0,1); // front side colors[9] = Vec3fa(0,0,1); colors[10] = Vec3fa(1,1,0); // back side colors[11] = Vec3fa(1,1,0); /* set intersection filter for the cube */ if (g_mode != MODE_NORMAL) { rtcSetIntersectionFilterFunctionN(scene_i,geomID,intersectionFilterN); rtcSetOcclusionFilterFunctionN (scene_i,geomID,occlusionFilterN); } else { rtcSetIntersectionFilterFunction(scene_i,geomID,intersectionFilter); rtcSetOcclusionFilterFunction (scene_i,geomID,occlusionFilter); } return geomID; }
/* add hair geometry */ unsigned int addCurve (RTCScene scene, const Vec3fa& pos) { unsigned int geomID = rtcNewCurveGeometry (scene, RTC_GEOMETRY_STATIC, NUM_CURVES, 4*NUM_CURVES); /* converts b-spline to bezier basis */ Vec3fa* vtx = (Vec3fa*) rtcMapBuffer(scene, geomID, RTC_VERTEX_BUFFER); for (size_t i=0; i<NUM_CURVES; i++) { Vec3fa P = Vec3fa(pos.x,pos.y,pos.z,0.0f); const Vec3fa v0 = Vec3fa(hair_vertices[i+0][0],hair_vertices[i+0][1],hair_vertices[i+0][2],hair_vertices[i+0][3]); const Vec3fa v1 = Vec3fa(hair_vertices[i+1][0],hair_vertices[i+1][1],hair_vertices[i+1][2],hair_vertices[i+1][3]); const Vec3fa v2 = Vec3fa(hair_vertices[i+2][0],hair_vertices[i+2][1],hair_vertices[i+2][2],hair_vertices[i+2][3]); const Vec3fa v3 = Vec3fa(hair_vertices[i+3][0],hair_vertices[i+3][1],hair_vertices[i+3][2],hair_vertices[i+3][3]); vtx[4*i+0] = P + (1.0f/6.0f)*v0 + (2.0f/3.0f)*v1 + (1.0f/6.0f)*v2; vtx[4*i+1] = P + (2.0f/3.0f)*v1 + (1.0f/3.0f)*v2; vtx[4*i+2] = P + (1.0f/3.0f)*v1 + (2.0f/3.0f)*v2; vtx[4*i+3] = P + (1.0f/6.0f)*v1 + (2.0f/3.0f)*v2 + (1.0f/6.0f)*v3; } rtcUnmapBuffer(scene, geomID, RTC_VERTEX_BUFFER); Vec3fa* colors = (Vec3fa*) alignedMalloc(4*NUM_CURVES*sizeof(Vec3fa)); for (size_t i=0; i<NUM_CURVES; i++) { const Vec3fa v0 = Vec3fa(hair_vertex_colors[i+0][0],hair_vertex_colors[i+0][1],hair_vertex_colors[i+0][2],hair_vertex_colors[i+0][3]); const Vec3fa v1 = Vec3fa(hair_vertex_colors[i+1][0],hair_vertex_colors[i+1][1],hair_vertex_colors[i+1][2],hair_vertex_colors[i+1][3]); const Vec3fa v2 = Vec3fa(hair_vertex_colors[i+2][0],hair_vertex_colors[i+2][1],hair_vertex_colors[i+2][2],hair_vertex_colors[i+2][3]); const Vec3fa v3 = Vec3fa(hair_vertex_colors[i+3][0],hair_vertex_colors[i+3][1],hair_vertex_colors[i+3][2],hair_vertex_colors[i+3][3]); colors[4*i+0] = (1.0f/6.0f)*v0 + (2.0f/3.0f)*v1 + (1.0f/6.0f)*v2; colors[4*i+1] = (2.0f/3.0f)*v1 + (1.0f/3.0f)*v2; colors[4*i+2] = (1.0f/3.0f)*v1 + (2.0f/3.0f)*v2; colors[4*i+3] = (1.0f/6.0f)*v1 + (2.0f/3.0f)*v2 + (1.0f/6.0f)*v3; } int* index = (int*) rtcMapBuffer(scene, geomID, RTC_INDEX_BUFFER); for (int i=0; i<NUM_CURVES; i++) { index[i] = 4*i; } rtcUnmapBuffer(scene,geomID,RTC_INDEX_BUFFER); rtcSetBuffer(scene, geomID, RTC_USER_VERTEX_BUFFER0, colors, 0, sizeof(Vec3fa)); return geomID; }
/* called by the C++ code to render */ extern "C" void device_render (int* pixels, const int width, const int height, const float time, const Vec3fa& vx, const Vec3fa& vy, const Vec3fa& vz, const Vec3fa& p) { /* create scene */ if (g_scene == NULL) g_scene = convertScene(g_ispc_scene); /* create accumulator */ if (g_accu_width != width || g_accu_height != height) { //g_accu = new Vec3fa[width*height]; g_accu = (Vec3fa*)alignedMalloc(width*height*sizeof(Vec3fa)); g_accu_width = width; g_accu_height = height; memset(g_accu,0,width*height*sizeof(Vec3fa)); } /* reset accumulator */ bool camera_changed = g_changed; g_changed = false; camera_changed |= ne(g_accu_vx,vx); g_accu_vx = vx; // FIXME: use != operator camera_changed |= ne(g_accu_vy,vy); g_accu_vy = vy; // FIXME: use != operator camera_changed |= ne(g_accu_vz,vz); g_accu_vz = vz; // FIXME: use != operator camera_changed |= ne(g_accu_p, p); g_accu_p = p; // FIXME: use != operator g_accu_count++; if (camera_changed) { g_accu_count=0; memset(g_accu,0,width*height*sizeof(Vec3fa)); } /* render frame */ const int numTilesX = (width +TILE_SIZE_X-1)/TILE_SIZE_X; const int numTilesY = (height+TILE_SIZE_Y-1)/TILE_SIZE_Y; enableFilterDispatch = renderPixel == renderPixelStandard; launch_renderTile(numTilesX*numTilesY,pixels,width,height,time,vx,vy,vz,p,numTilesX,numTilesY); enableFilterDispatch = false; rtcDebug(); }
void TaskScheduler::createThreads(size_t numThreads_in) { numThreads = numThreads_in; #if defined(__MIC__) if (numThreads == 0) numThreads = getNumberOfLogicalThreads()-4; #else if (numThreads == 0) numThreads = getNumberOfLogicalThreads(); #endif /* this mapping is only required as ISPC does not propagate task groups */ thread2event = (ThreadEvent*) alignedMalloc(numThreads*sizeof(ThreadEvent)); memset(thread2event,0,numThreads*sizeof(ThreadEvent)); /* generate all threads */ for (size_t t=0; t<numThreads; t++) { threads.push_back(createThread((thread_func)threadFunction,new Thread(t,numThreads,this),4*1024*1024,t)); } //setAffinity(0); TaskLogger::init(numThreads); }
//! Create an ispc-side AmbientLight object extern "C" void *AmbientLight_create() { AmbientLight* self = (AmbientLight*) alignedMalloc(sizeof(AmbientLight)); AmbientLight_Constructor(self, Vec3fa(1.f)); return self; }
void SamplerFactory::init(float rcpWidth, float rcpHeight, int iteration, const Ref<Filter> filter) { this->iteration = iteration; samples = new PrecomputedSample*[sampleSets]; if (samplesPerPixel != (1 << __bsf(samplesPerPixel))) throw std::runtime_error("Number of samples per pixel have to be a power of two."); int chunkSize = max((int)samplesPerPixel,64); int currentChunk = int(iteration*samplesPerPixel) / chunkSize; int offset = (iteration*samplesPerPixel) % chunkSize; Random rng; rng.setSeed(currentChunk * 5897); Vec2f* pixel = new Vec2f[chunkSize]; float* time = new float[chunkSize]; Vec2f* lens = new Vec2f[chunkSize]; float* samples1D = new float[chunkSize]; Vec2f* samples2D = new Vec2f[chunkSize]; allSamples1D = static_cast<float*>(alignedMalloc(sizeof(float)*SamplerFactory::numSamples1D*sampleSets*samplesPerPixel)); allSamples2D = static_cast<Vec2f*>(alignedMalloc(sizeof(Vec2f)*SamplerFactory::numSamples2D*sampleSets*samplesPerPixel)); allLightSamples = new PackedLightSample[SamplerFactory::numLightSamples*sampleSets*samplesPerPixel]; for (int set = 0; set < sampleSets; set++) { samples[set] = new PrecomputedSample[samplesPerPixel]; /*! Generate pixel and lens samples. */ multiJittered(pixel, chunkSize, rng); jittered(time, chunkSize, rng); multiJittered(lens, chunkSize, rng); for (int s = 0; s < samplesPerPixel; s++) { samples[set][s].pixel = pixel[offset + s]; samples[set][s].time = time[offset + s]; samples[set][s].lens = lens[offset + s]; if (filter) { samples[set][s].pixel = filter->sample(samples[set][s].pixel) + Vec2f(0.5f, 0.5f); } samples[set][s].pixel = samples[set][s].pixel * Vec2f(rcpWidth, rcpHeight); samples[set][s].samples1D = allSamples1D + (s + samplesPerPixel*set)*SamplerFactory::numSamples1D; samples[set][s].samples2D = allSamples2D + (s + samplesPerPixel*set)*SamplerFactory::numSamples2D; samples[set][s].lightSamples = allLightSamples + (s + samplesPerPixel*set)*SamplerFactory::numLightSamples; } /*! Generate requested 1D samples. */ for (int d = 0; d < SamplerFactory::numSamples1D; d++) { jittered(samples1D, chunkSize, rng); for (int s = 0; s < samplesPerPixel; s++) { samples[set][s].samples1D[d] = samples1D[offset + s]; } } /*! Generate 2D samples. */ for (int d = 0; d < SamplerFactory::numSamples2D; d++) { multiJittered(samples2D, chunkSize, rng); for (int s = 0; s < samplesPerPixel; s++) { samples[set][s].samples2D[d] = samples2D[offset + s]; } } /*! Generate light samples. */ for (int d = 0; d < SamplerFactory::numLightSamples; d++) { for (int s = 0; s < samplesPerPixel; s++) { LightSample ls; DifferentialGeometry dg; ls.L = lights[d]->sample(dg, ls.wi, ls.tMax, samples[set][s].samples2D[lightBaseSamples[d]]); samples[set][s].lightSamples[d] = ls; } } } delete[] pixel; delete[] time; delete[] lens; delete[] samples1D; delete[] samples2D; }
LocalFrameBuffer::LocalFrameBuffer(const vec2i &size, ColorBufferFormat colorBufferFormat, bool hasDepthBuffer, bool hasAccumBuffer, bool hasVarianceBuffer, void *colorBufferToUse) : FrameBuffer(size, colorBufferFormat, hasDepthBuffer, hasAccumBuffer, hasVarianceBuffer) { Assert(size.x > 0); Assert(size.y > 0); if (colorBufferToUse) colorBuffer = colorBufferToUse; else { switch (colorBufferFormat) { case OSP_FB_NONE: colorBuffer = NULL; break; case OSP_FB_RGBA8: case OSP_FB_SRGBA: colorBuffer = (vec4f*)alignedMalloc(sizeof(vec4f)*size.x*size.y); break; case OSP_FB_RGBA32F: colorBuffer = (uint32*)alignedMalloc(sizeof(uint32)*size.x*size.y); break; default: throw std::runtime_error("color buffer format not supported"); } } if (hasDepthBuffer) depthBuffer = (float*)alignedMalloc(sizeof(float)*size.x*size.y); else depthBuffer = NULL; if (hasAccumBuffer) accumBuffer = (vec4f*)alignedMalloc(sizeof(vec4f)*size.x*size.y); else accumBuffer = NULL; tilesx = divRoundUp(size.x, TILE_SIZE); tiles = tilesx * divRoundUp(size.y, TILE_SIZE); tileAccumID = new int32[tiles]; memset(tileAccumID, 0, tiles*sizeof(int32)); if (hasVarianceBuffer) { varianceBuffer = (vec4f*)alignedMalloc(sizeof(vec4f)*size.x*size.y); tileErrorBuffer = new float[tiles]; // maximum number of regions: all regions are of size 3 are split in half errorRegion.reserve(divRoundUp(tiles*2, 3)); } else { varianceBuffer = NULL; tileErrorBuffer = NULL; } ispcEquivalent = ispc::LocalFrameBuffer_create(this,size.x,size.y, colorBufferFormat, colorBuffer, depthBuffer, accumBuffer, varianceBuffer, tileAccumID, tileErrorBuffer); }
void Buffer::alloc() { ptr = ptr_ofs = (char*) alignedMalloc(bytes); }
void Buffer::alloc() { if (device) device->memoryMonitor(bytes,false); ptr = ptr_ofs = (char*) alignedMalloc(bytes); }