Search Issue Tracker
Fixed in 2020.1.X
Fixed in 2019.3.X
Votes
1
Found in
2019.1
2019.2
2019.2.0f1
2019.3
2020.1
Issue ID
1196347
Regression
No
[GPU Lightmapper] LightGrid takes up too much memory on large scenes
Reproduction steps:
1. Open "1196347" project with 2020.1
2. Open "REPROSCENE" scene.
3. Bake Lighting with GPU lightmapper.
Actual Result: Falling back to CPU Lightmapper with this message "OpenCL Error. Falling back to CPU lightmapper. Error callback from context: Max allocation size supported by this device is 1.50 GB. 13.45 GB requested."
Reproduced with: 2020.1.0a12.
In 2019.3.0b10, 2019.2.12f1 the Editor crashed and this issue is tracked in a separate bug that Dovydas Dainys will create.
GPU lightmapper light grid requests 13.53 GB in a large 3x5 km scene. 2 x 606208 light grid cells are generated because of heuristicCellSize(5.0f, 15.0f, 5.0f) * 128.
The proper fix is to not store actual light structs per grid cell, but indices into the unique light buffers.
If this is not enough, change the size calculation of the grid based on scene size and a more advanced heuristic. If it generates too many cells, recursively divide cell starting size by 2 (128 -> 64 -> 32 ...) and query expected memory footprint using CalculateExpectedMemoryUsage() and compare with OpenCLState::maxAllocationSize.
diff --git a/Editor/Src/GI/Progressive/RadeonRays/RadeonRaysLightGrid.cpp b/Editor/Src/GI/Progressive/RadeonRays/RadeonRaysLightGrid.cpp
--- a/Editor/Src/GI/Progressive/RadeonRays/RadeonRaysLightGrid.cpp
+++ b/Editor/Src/GI/Progressive/RadeonRays/RadeonRaysLightGrid.cpp
@@ -223,14 +223,20 @@
// Release previous buffers
Destroy();
+ // Upload the direct and indirect lights buffers, non-blocking is OK because the map calls below will force a sync.
+ CL_CHECK_ERR(GetBuffer(kRRBuf_directLightsBuffer).CreateBuffer(_openCLState.context, CL_MEM_READ_ONLY, _lightsForDirect.size(), NULL, &_err));
+ CL_CHECK(GetBuffer(kRRBuf_directLightsBuffer).EnqueueWriteBuffer(_openCLState.commandQueue, CL_FALSE, 0, _lightsForDirect.size(), _lightsForDirect.data(), 0, NULL, NULL));
+ CL_CHECK_ERR(GetBuffer(kRRBuf_indirectLightsBuffer).CreateBuffer(_openCLState.context, CL_MEM_READ_ONLY, _lightsForIndirect.size(), NULL, &_err));
+ CL_CHECK(GetBuffer(kRRBuf_indirectLightsBuffer).EnqueueWriteBuffer(_openCLState.commandQueue, CL_FALSE, 0, _lightsForIndirect.size(), _lightsForIndirect.data(), 0, NULL, NULL));
+
// Heuristic: One cell every 5 meters along X & Z and every 15 along Y
const Vector3f heuristicCellSize(5.0f, 15.0f, 5.0f);
// Resize the grid based on scene size and heuristic. If this generates too many cells, recursively divide cell starting size by 2 (128 -> 64 -> 32 ...)
SetDims(
std::max(1u, std::min(128u, static_cast<UInt32>(ceilf(2.0f * _sceneBounds.m_Extent.x / heuristicCellSize.x)))),
std::max(1u, std::min(128u, static_cast<UInt32>(ceilf(2.0f * _sceneBounds.m_Extent.y / heuristicCellSize.y)))),
std::max(1u, std::min(128u, static_cast<UInt32>(ceilf(2.0f * _sceneBounds.m_Extent.z / heuristicCellSize.z)))));
// Compute grid transforms
m_Bounds = _sceneBounds;
@@ -242,15 +248,15 @@
// Allocate temp memory
const UInt32 cellCount = GetCellCount();
- dynamic_array<dynamic_array<UInt32> > directLightsBuffers(cellCount, kMemTempAlloc);
- dynamic_array<dynamic_array<UInt32> > indirectLightsBuffers(cellCount, kMemTempAlloc);
+ dynamic_array<dynamic_array<UInt32> > directLightsBuffers(cellCount, kMemTempAlloc); // TODO remove!
+ dynamic_array<dynamic_array<UInt32> > indirectLightsBuffers(cellCount, kMemTempAlloc); // TODO remove!
for (UInt32 i = 0; i < cellCount; ++i)
{
directLightsBuffers[i].set_memory_label(kMemTempAlloc);
indirectLightsBuffers[i].set_memory_label(kMemTempAlloc);
}
- // Partition the lights
+ // Partition the lights and generate indices into the light arrays.
PartitionLightsIntoCells(directLightsBuffers, _lightsForDirect);
PartitionLightsIntoCells(indirectLightsBuffers, _lightsForIndirect);
@@ -272,6 +278,7 @@
{
for (UInt32 cellIdx = 0; cellIdx < cellCount; ++cellIdx)
{
+ // TODO store offsets into _lightsForDirect and _lightsForIndirect instead.
directLightOffsets[cellIdx] = directLightOffset;
directLightCountPerCells[cellIdx] = directLightsBuffers[cellIdx].size();
indirectLightOffsets[cellIdx] = indirectLightOffset;
@@ -290,54 +297,6 @@
m_MaxDirectLightsPerCell = std::max((int)directLightsBuffers[cellIdx].size(), m_MaxDirectLightsPerCell);
}
- // Upload the direct lights
- CL_CHECK_ERR(GetBuffer(kRRBuf_directLightsBuffer).CreateBuffer(_openCLState.context, CL_MEM_READ_ONLY, std::max(directLightOffset, 1u), NULL, &_err));
- if (directLightOffset > 0)
- {
- LightBuffer *const lightsForDirect = static_cast<LightBuffer *>(clEnqueueMapBuffer(_openCLState.commandQueue, GetBuffer(kRRBuf_directLightsBuffer).m_NativeBuffer, CL_TRUE, CL_MAP_WRITE, 0, directLightOffset * sizeof(LightBuffer), 0, NULL, &mapEvent, NULL));
- DebugAssert(lightsForDirect != NULL);
- CL_CHECK(clWaitForEvents(1, &mapEvent));
- CL_CHECK(clReleaseEvent(mapEvent));
- if (lightsForDirect != NULL)
- {
- for (UInt32 i = 0, cellIdx = 0, localIdx = 0; i < directLightOffset; ++i, ++localIdx)
- {
- while (localIdx >= directLightsBuffers[cellIdx].size())
- {
- localIdx = 0;
- ++cellIdx;
- }
- DebugAssert(cellIdx < cellCount);
- lightsForDirect[i] = _lightsForDirect[directLightsBuffers[cellIdx][localIdx]];
- }
- }
- CL_CHECK(clEnqueueUnmapMemObject(_openCLState.commandQueue, GetBuffer(kRRBuf_directLightsBuffer).m_NativeBuffer, lightsForDirect, 0, NULL, NULL));
- }
-
- // Upload the indirect lights
- CL_CHECK_ERR(GetBuffer(kRRBuf_indirectLightsBuffer).CreateBuffer(_openCLState.context, CL_MEM_READ_ONLY, std::max(indirectLightOffset, 1u), NULL, &_err));
- if (indirectLightOffset > 0)
- {
- LightBuffer *const lightsForIndirect = static_cast<LightBuffer *>(clEnqueueMapBuffer(_openCLState.commandQueue, GetBuffer(kRRBuf_indirectLightsBuffer).m_NativeBuffer, CL_TRUE, CL_MAP_WRITE, 0, indirectLightOffset * sizeof(LightBuffer), 0, NULL, &mapEvent, NULL));
- DebugAssert(lightsForIndirect != NULL);
- CL_CHECK(clWaitForEvents(1, &mapEvent));
- CL_CHECK(clReleaseEvent(mapEvent));
- if (lightsForIndirect != NULL)
- {
- for (UInt32 i = 0, cellIdx = 0, localIdx = 0; i < indirectLightOffset; ++i, ++localIdx)
- {
- while (localIdx >= indirectLightsBuffers[cellIdx].size())
- {
- localIdx = 0;
- ++cellIdx;
- }
- DebugAssert(cellIdx < cellCount);
- lightsForIndirect[i] = _lightsForIndirect[indirectLightsBuffers[cellIdx][localIdx]];
- }
- }
- CL_CHECK(clEnqueueUnmapMemObject(_openCLState.commandQueue, GetBuffer(kRRBuf_indirectLightsBuffer).m_NativeBuffer, lightsForIndirect, 0, NULL, NULL));
- }
-
// Upload indirect distribution offsets
UInt32 indirectDistributionOffset = 0;
CL_CHECK_ERR(GetBuffer(kRRBuf_indirectLightDistributionOffsetBuffer).CreateBuffer(_openCLState.context, CL_MEM_READ_ONLY, cellCount, NULL, &_err));
Add comment
All about bugs
View bugs we have successfully reproduced, and vote for the bugs you want to see fixed most urgently.
Latest issues
- Articulation Body with 'Revolute' Joint Type has erratic behavior when Upper Limit is set to above 360
- WebGL Player fails to render Scene when Terrain with Detail Mesh is added and WebGPU Graphics API is used
- Inconsistent errors are logged when different types are passed into the Query "Q<>" method in UIToolkit and the ancestor VisualElement is null
- Crash on GetMaterialPropertyByIndex when opening a specific Scene
- Discrepancies in the styling are present when using a TSS file instead of a USS file in custom EditorWindow
Resolution Note (fix version 2019.3):
After baking, Lightmapper is still remains to the Progressive GPU