Created
December 1, 2022 23:07
-
-
Save Shimmen/d708819d050e3e022a74b2ba20cef943 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| //////////////////////////////////////////////////////////////////////////////// | |
| // Range list logic | |
| #define LIST_BUFFER_SIZE (8196) | |
| #define LIST_BUFFER_MASK (LIST_BUFFER_SIZE - 1) | |
| layout(set = 10, binding = 0) buffer coherent restrict WorkingCountsBlock { | |
| uvec2 listBuffer[LIST_BUFFER_SIZE]; | |
| uint listWriteHead; | |
| uint listWriteTail; | |
| uint listReadHead; | |
| }; | |
| // TODO: Make a #define version for it, so we can reuse! | |
| bool consumeRange(out uint index) | |
| { | |
| uvec4 ballot = subgroupBallot(true); | |
| uint consumeCount = subgroupBallotBitCount(ballot); | |
| uint laneIdx = subgroupBallotExclusiveBitCount(ballot); | |
| //////////////////////////////////////////////////////////////////////////// | |
| // 1) Grab the next range if possible | |
| uvec2 range = uvec2(0u, 0u); | |
| if (subgroupElect()) { | |
| while (true) { | |
| uint currentReadHead = listReadHead; | |
| if (listWriteTail > currentReadHead) { // TODO: Off by one? | |
| uint oldReadHead = atomicCompSwap(listReadHead, currentReadHead, currentReadHead + 1); | |
| if (oldReadHead != listReadHead) { | |
| range = listBuffer[oldReadHead & uint(LIST_BUFFER_MASK)]; | |
| break; | |
| } | |
| } | |
| } | |
| } | |
| range = subgroupBroadcastFirst(range); | |
| uint rangeCount = range.y - range.x; | |
| // Early-out if no count (could not grab a range) | |
| if (rangeCount == 0) { | |
| return false; | |
| } | |
| //////////////////////////////////////////////////////////////////////////// | |
| // 2) Create a new sub-range if needed | |
| if (subgroupElect()) { | |
| if (rangeCount > consumeCount) { | |
| // 2.1) allocate range slot | |
| uint newRangeSlot = atomicAdd(listWriteHead, 1u) & uint(LIST_BUFFER_MASK); | |
| // 2.2) copy data into slot | |
| listBuffer[newRangeSlot] = uvec2(range.x + consumeCount, range.y); | |
| // 2.3) push write tail | |
| atomicAdd(listWriteTail, 1u); | |
| } | |
| // Assign the first index of the range to the elected/first lane | |
| index = range.x; | |
| } | |
| //////////////////////////////////////////////////////////////////////////// | |
| // 3) Assign indices from the range for each active lane | |
| // Assign consecutive indices of the range to consecutive active lanes | |
| index = subgroupBroadcastFirst(index) + laneIdx; | |
| // The out index is only valid if we're within the range | |
| return laneIdx < rangeCount; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment