Search Issue Tracker
Fixed in 2020.1.X
Fixed in 2019.3.X
Votes
1
Found in
2019.1
2019.2
2019.2.0f1
2019.3
2020.1
Issue ID
1196347
Regression
No
[GPU Lightmapper] LightGrid takes up too much memory on large scenes
Reproduction steps:
1. Open "1196347" project with 2020.1
2. Open "REPROSCENE" scene.
3. Bake Lighting with GPU lightmapper.
Actual Result: Falling back to CPU Lightmapper with this message "OpenCL Error. Falling back to CPU lightmapper. Error callback from context: Max allocation size supported by this device is 1.50 GB. 13.45 GB requested."
Reproduced with: 2020.1.0a12.
In 2019.3.0b10, 2019.2.12f1 the Editor crashed and this issue is tracked in a separate bug that Dovydas Dainys will create.
GPU lightmapper light grid requests 13.53 GB in a large 3x5 km scene. 2 x 606208 light grid cells are generated because of heuristicCellSize(5.0f, 15.0f, 5.0f) * 128.
The proper fix is to not store actual light structs per grid cell, but indices into the unique light buffers.
If this is not enough, change the size calculation of the grid based on scene size and a more advanced heuristic. If it generates too many cells, recursively divide cell starting size by 2 (128 -> 64 -> 32 ...) and query expected memory footprint using CalculateExpectedMemoryUsage() and compare with OpenCLState::maxAllocationSize.
diff --git a/Editor/Src/GI/Progressive/RadeonRays/RadeonRaysLightGrid.cpp b/Editor/Src/GI/Progressive/RadeonRays/RadeonRaysLightGrid.cpp
--- a/Editor/Src/GI/Progressive/RadeonRays/RadeonRaysLightGrid.cpp
+++ b/Editor/Src/GI/Progressive/RadeonRays/RadeonRaysLightGrid.cpp
@@ -223,14 +223,20 @@
// Release previous buffers
Destroy();
+ // Upload the direct and indirect lights buffers, non-blocking is OK because the map calls below will force a sync.
+ CL_CHECK_ERR(GetBuffer(kRRBuf_directLightsBuffer).CreateBuffer(_openCLState.context, CL_MEM_READ_ONLY, _lightsForDirect.size(), NULL, &_err));
+ CL_CHECK(GetBuffer(kRRBuf_directLightsBuffer).EnqueueWriteBuffer(_openCLState.commandQueue, CL_FALSE, 0, _lightsForDirect.size(), _lightsForDirect.data(), 0, NULL, NULL));
+ CL_CHECK_ERR(GetBuffer(kRRBuf_indirectLightsBuffer).CreateBuffer(_openCLState.context, CL_MEM_READ_ONLY, _lightsForIndirect.size(), NULL, &_err));
+ CL_CHECK(GetBuffer(kRRBuf_indirectLightsBuffer).EnqueueWriteBuffer(_openCLState.commandQueue, CL_FALSE, 0, _lightsForIndirect.size(), _lightsForIndirect.data(), 0, NULL, NULL));
+
// Heuristic: One cell every 5 meters along X & Z and every 15 along Y
const Vector3f heuristicCellSize(5.0f, 15.0f, 5.0f);
// Resize the grid based on scene size and heuristic. If this generates too many cells, recursively divide cell starting size by 2 (128 -> 64 -> 32 ...)
SetDims(
std::max(1u, std::min(128u, static_cast<UInt32>(ceilf(2.0f * _sceneBounds.m_Extent.x / heuristicCellSize.x)))),
std::max(1u, std::min(128u, static_cast<UInt32>(ceilf(2.0f * _sceneBounds.m_Extent.y / heuristicCellSize.y)))),
std::max(1u, std::min(128u, static_cast<UInt32>(ceilf(2.0f * _sceneBounds.m_Extent.z / heuristicCellSize.z)))));
// Compute grid transforms
m_Bounds = _sceneBounds;
@@ -242,15 +248,15 @@
// Allocate temp memory
const UInt32 cellCount = GetCellCount();
- dynamic_array<dynamic_array<UInt32> > directLightsBuffers(cellCount, kMemTempAlloc);
- dynamic_array<dynamic_array<UInt32> > indirectLightsBuffers(cellCount, kMemTempAlloc);
+ dynamic_array<dynamic_array<UInt32> > directLightsBuffers(cellCount, kMemTempAlloc); // TODO remove!
+ dynamic_array<dynamic_array<UInt32> > indirectLightsBuffers(cellCount, kMemTempAlloc); // TODO remove!
for (UInt32 i = 0; i < cellCount; ++i)
{
directLightsBuffers[i].set_memory_label(kMemTempAlloc);
indirectLightsBuffers[i].set_memory_label(kMemTempAlloc);
}
- // Partition the lights
+ // Partition the lights and generate indices into the light arrays.
PartitionLightsIntoCells(directLightsBuffers, _lightsForDirect);
PartitionLightsIntoCells(indirectLightsBuffers, _lightsForIndirect);
@@ -272,6 +278,7 @@
{
for (UInt32 cellIdx = 0; cellIdx < cellCount; ++cellIdx)
{
+ // TODO store offsets into _lightsForDirect and _lightsForIndirect instead.
directLightOffsets[cellIdx] = directLightOffset;
directLightCountPerCells[cellIdx] = directLightsBuffers[cellIdx].size();
indirectLightOffsets[cellIdx] = indirectLightOffset;
@@ -290,54 +297,6 @@
m_MaxDirectLightsPerCell = std::max((int)directLightsBuffers[cellIdx].size(), m_MaxDirectLightsPerCell);
}
- // Upload the direct lights
- CL_CHECK_ERR(GetBuffer(kRRBuf_directLightsBuffer).CreateBuffer(_openCLState.context, CL_MEM_READ_ONLY, std::max(directLightOffset, 1u), NULL, &_err));
- if (directLightOffset > 0)
- {
- LightBuffer *const lightsForDirect = static_cast<LightBuffer *>(clEnqueueMapBuffer(_openCLState.commandQueue, GetBuffer(kRRBuf_directLightsBuffer).m_NativeBuffer, CL_TRUE, CL_MAP_WRITE, 0, directLightOffset * sizeof(LightBuffer), 0, NULL, &mapEvent, NULL));
- DebugAssert(lightsForDirect != NULL);
- CL_CHECK(clWaitForEvents(1, &mapEvent));
- CL_CHECK(clReleaseEvent(mapEvent));
- if (lightsForDirect != NULL)
- {
- for (UInt32 i = 0, cellIdx = 0, localIdx = 0; i < directLightOffset; ++i, ++localIdx)
- {
- while (localIdx >= directLightsBuffers[cellIdx].size())
- {
- localIdx = 0;
- ++cellIdx;
- }
- DebugAssert(cellIdx < cellCount);
- lightsForDirect[i] = _lightsForDirect[directLightsBuffers[cellIdx][localIdx]];
- }
- }
- CL_CHECK(clEnqueueUnmapMemObject(_openCLState.commandQueue, GetBuffer(kRRBuf_directLightsBuffer).m_NativeBuffer, lightsForDirect, 0, NULL, NULL));
- }
-
- // Upload the indirect lights
- CL_CHECK_ERR(GetBuffer(kRRBuf_indirectLightsBuffer).CreateBuffer(_openCLState.context, CL_MEM_READ_ONLY, std::max(indirectLightOffset, 1u), NULL, &_err));
- if (indirectLightOffset > 0)
- {
- LightBuffer *const lightsForIndirect = static_cast<LightBuffer *>(clEnqueueMapBuffer(_openCLState.commandQueue, GetBuffer(kRRBuf_indirectLightsBuffer).m_NativeBuffer, CL_TRUE, CL_MAP_WRITE, 0, indirectLightOffset * sizeof(LightBuffer), 0, NULL, &mapEvent, NULL));
- DebugAssert(lightsForIndirect != NULL);
- CL_CHECK(clWaitForEvents(1, &mapEvent));
- CL_CHECK(clReleaseEvent(mapEvent));
- if (lightsForIndirect != NULL)
- {
- for (UInt32 i = 0, cellIdx = 0, localIdx = 0; i < indirectLightOffset; ++i, ++localIdx)
- {
- while (localIdx >= indirectLightsBuffers[cellIdx].size())
- {
- localIdx = 0;
- ++cellIdx;
- }
- DebugAssert(cellIdx < cellCount);
- lightsForIndirect[i] = _lightsForIndirect[indirectLightsBuffers[cellIdx][localIdx]];
- }
- }
- CL_CHECK(clEnqueueUnmapMemObject(_openCLState.commandQueue, GetBuffer(kRRBuf_indirectLightsBuffer).m_NativeBuffer, lightsForIndirect, 0, NULL, NULL));
- }
-
// Upload indirect distribution offsets
UInt32 indirectDistributionOffset = 0;
CL_CHECK_ERR(GetBuffer(kRRBuf_indirectLightDistributionOffsetBuffer).CreateBuffer(_openCLState.context, CL_MEM_READ_ONLY, cellCount, NULL, &_err));
Add comment
All about bugs
View bugs we have successfully reproduced, and vote for the bugs you want to see fixed most urgently.
Latest issues
- Any small change in UI Builder Inspector refreshes Editor Inspector
- Inaccurate Box Collider boundaries on a rotated child Cube when the parent GameObject Scale is non-uniform
- [Android] "SHADOWS_SCREEN" set as shader Keyword when no "_ShadowMapTexture" is bound leads to freeze on a build on some Mali GPU devices
- The global scene list is overridden in a project built with command line when the Override Global Scene List setting is disabled in the build profile
- Global Scenes are not included in the Build when building multiple Build Profiles at the same time
Resolution Note (fix version 2019.3):
After baking, Lightmapper is still remains to the Progressive GPU