Search Issue Tracker

Fixed in 2020.1

Fixed in 2019.3

Votes

1

Found in

2019.1

2019.2

2019.2.0f1

2019.3

2020.1

Issue ID

1196347

Regression

No

[GPU Lightmapper] LightGrid takes up too much memory on large scenes

Global Illumination

-

Reproduction steps:
1. Open "1196347" project with 2020.1
2. Open "REPROSCENE" scene.
3. Bake Lighting with GPU lightmapper.

Actual Result: Falling back to CPU Lightmapper with this message "OpenCL Error. Falling back to CPU lightmapper. Error callback from context: Max allocation size supported by this device is 1.50 GB. 13.45 GB requested."

Reproduced with: 2020.1.0a12.

In 2019.3.0b10, 2019.2.12f1 the Editor crashed and this issue is tracked in a separate bug that Dovydas Dainys will create.

GPU lightmapper light grid requests 13.53 GB in a large 3x5 km scene. 2 x 606208 light grid cells are generated because of heuristicCellSize(5.0f, 15.0f, 5.0f) * 128.

The proper fix is to not store actual light structs per grid cell, but indices into the unique light buffers.

If this is not enough, change the size calculation of the grid based on scene size and a more advanced heuristic. If it generates too many cells, recursively divide cell starting size by 2 (128 -> 64 -> 32 ...) and query expected memory footprint using CalculateExpectedMemoryUsage() and compare with OpenCLState::maxAllocationSize.

diff --git a/Editor/Src/GI/Progressive/RadeonRays/RadeonRaysLightGrid.cpp b/Editor/Src/GI/Progressive/RadeonRays/RadeonRaysLightGrid.cpp
--- a/Editor/Src/GI/Progressive/RadeonRays/RadeonRaysLightGrid.cpp
+++ b/Editor/Src/GI/Progressive/RadeonRays/RadeonRaysLightGrid.cpp
@@ -223,14 +223,20 @@
// Release previous buffers
Destroy();

+ // Upload the direct and indirect lights buffers, non-blocking is OK because the map calls below will force a sync.
+ CL_CHECK_ERR(GetBuffer(kRRBuf_directLightsBuffer).CreateBuffer(_openCLState.context, CL_MEM_READ_ONLY, _lightsForDirect.size(), NULL, &_err));
+ CL_CHECK(GetBuffer(kRRBuf_directLightsBuffer).EnqueueWriteBuffer(_openCLState.commandQueue, CL_FALSE, 0, _lightsForDirect.size(), _lightsForDirect.data(), 0, NULL, NULL));
+ CL_CHECK_ERR(GetBuffer(kRRBuf_indirectLightsBuffer).CreateBuffer(_openCLState.context, CL_MEM_READ_ONLY, _lightsForIndirect.size(), NULL, &_err));
+ CL_CHECK(GetBuffer(kRRBuf_indirectLightsBuffer).EnqueueWriteBuffer(_openCLState.commandQueue, CL_FALSE, 0, _lightsForIndirect.size(), _lightsForIndirect.data(), 0, NULL, NULL));
+
// Heuristic: One cell every 5 meters along X & Z and every 15 along Y
const Vector3f heuristicCellSize(5.0f, 15.0f, 5.0f);

// Resize the grid based on scene size and heuristic. If this generates too many cells, recursively divide cell starting size by 2 (128 -> 64 -> 32 ...)
SetDims(
std::max(1u, std::min(128u, static_cast<UInt32>(ceilf(2.0f * _sceneBounds.m_Extent.x / heuristicCellSize.x)))),
std::max(1u, std::min(128u, static_cast<UInt32>(ceilf(2.0f * _sceneBounds.m_Extent.y / heuristicCellSize.y)))),
std::max(1u, std::min(128u, static_cast<UInt32>(ceilf(2.0f * _sceneBounds.m_Extent.z / heuristicCellSize.z)))));

// Compute grid transforms
m_Bounds = _sceneBounds;
@@ -242,15 +248,15 @@

// Allocate temp memory
const UInt32 cellCount = GetCellCount();
- dynamic_array<dynamic_array<UInt32> > directLightsBuffers(cellCount, kMemTempAlloc);
- dynamic_array<dynamic_array<UInt32> > indirectLightsBuffers(cellCount, kMemTempAlloc);
+ dynamic_array<dynamic_array<UInt32> > directLightsBuffers(cellCount, kMemTempAlloc); // TODO remove!
+ dynamic_array<dynamic_array<UInt32> > indirectLightsBuffers(cellCount, kMemTempAlloc); // TODO remove!
for (UInt32 i = 0; i < cellCount; ++i)
{
directLightsBuffers[i].set_memory_label(kMemTempAlloc);
indirectLightsBuffers[i].set_memory_label(kMemTempAlloc);
}

- // Partition the lights
+ // Partition the lights and generate indices into the light arrays.
PartitionLightsIntoCells(directLightsBuffers, _lightsForDirect);
PartitionLightsIntoCells(indirectLightsBuffers, _lightsForIndirect);

@@ -272,6 +278,7 @@
{
for (UInt32 cellIdx = 0; cellIdx < cellCount; ++cellIdx)
{
+ // TODO store offsets into _lightsForDirect and _lightsForIndirect instead.
directLightOffsets[cellIdx] = directLightOffset;
directLightCountPerCells[cellIdx] = directLightsBuffers[cellIdx].size();
indirectLightOffsets[cellIdx] = indirectLightOffset;
@@ -290,54 +297,6 @@
m_MaxDirectLightsPerCell = std::max((int)directLightsBuffers[cellIdx].size(), m_MaxDirectLightsPerCell);
}

- // Upload the direct lights
- CL_CHECK_ERR(GetBuffer(kRRBuf_directLightsBuffer).CreateBuffer(_openCLState.context, CL_MEM_READ_ONLY, std::max(directLightOffset, 1u), NULL, &_err));
- if (directLightOffset > 0)
- {
- LightBuffer *const lightsForDirect = static_cast<LightBuffer *>(clEnqueueMapBuffer(_openCLState.commandQueue, GetBuffer(kRRBuf_directLightsBuffer).m_NativeBuffer, CL_TRUE, CL_MAP_WRITE, 0, directLightOffset * sizeof(LightBuffer), 0, NULL, &mapEvent, NULL));
- DebugAssert(lightsForDirect != NULL);
- CL_CHECK(clWaitForEvents(1, &mapEvent));
- CL_CHECK(clReleaseEvent(mapEvent));
- if (lightsForDirect != NULL)
- {
- for (UInt32 i = 0, cellIdx = 0, localIdx = 0; i < directLightOffset; ++i, ++localIdx)
- {
- while (localIdx >= directLightsBuffers[cellIdx].size())
- {
- localIdx = 0;
- ++cellIdx;
- }
- DebugAssert(cellIdx < cellCount);
- lightsForDirect[i] = _lightsForDirect[directLightsBuffers[cellIdx][localIdx]];
- }
- }
- CL_CHECK(clEnqueueUnmapMemObject(_openCLState.commandQueue, GetBuffer(kRRBuf_directLightsBuffer).m_NativeBuffer, lightsForDirect, 0, NULL, NULL));
- }
-
- // Upload the indirect lights
- CL_CHECK_ERR(GetBuffer(kRRBuf_indirectLightsBuffer).CreateBuffer(_openCLState.context, CL_MEM_READ_ONLY, std::max(indirectLightOffset, 1u), NULL, &_err));
- if (indirectLightOffset > 0)
- {
- LightBuffer *const lightsForIndirect = static_cast<LightBuffer *>(clEnqueueMapBuffer(_openCLState.commandQueue, GetBuffer(kRRBuf_indirectLightsBuffer).m_NativeBuffer, CL_TRUE, CL_MAP_WRITE, 0, indirectLightOffset * sizeof(LightBuffer), 0, NULL, &mapEvent, NULL));
- DebugAssert(lightsForIndirect != NULL);
- CL_CHECK(clWaitForEvents(1, &mapEvent));
- CL_CHECK(clReleaseEvent(mapEvent));
- if (lightsForIndirect != NULL)
- {
- for (UInt32 i = 0, cellIdx = 0, localIdx = 0; i < indirectLightOffset; ++i, ++localIdx)
- {
- while (localIdx >= indirectLightsBuffers[cellIdx].size())
- {
- localIdx = 0;
- ++cellIdx;
- }
- DebugAssert(cellIdx < cellCount);
- lightsForIndirect[i] = _lightsForIndirect[indirectLightsBuffers[cellIdx][localIdx]];
- }
- }
- CL_CHECK(clEnqueueUnmapMemObject(_openCLState.commandQueue, GetBuffer(kRRBuf_indirectLightsBuffer).m_NativeBuffer, lightsForIndirect, 0, NULL, NULL));
- }
-
// Upload indirect distribution offsets
UInt32 indirectDistributionOffset = 0;
CL_CHECK_ERR(GetBuffer(kRRBuf_indirectLightDistributionOffsetBuffer).CreateBuffer(_openCLState.context, CL_MEM_READ_ONLY, cellCount, NULL, &_err));

  1. Response avatar

    Resolution Note (fix version 2019.3):

    After baking, Lightmapper is still remains to the Progressive GPU

Add comment

Log in to post comment

All about bugs

View bugs we have successfully reproduced, and vote for the bugs you want to see fixed most urgently.